html-to-markdown 2.4.1__cp310-abi3-win_amd64.whl → 2.5.0__cp310-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

@@ -49,4 +49,4 @@ __all__ = [
49
49
  "markdownify",
50
50
  ]
51
51
 
52
- __version__ = "2.4.1"
52
+ __version__ = "2.5.0"
Binary file
html_to_markdown/api.py CHANGED
@@ -82,6 +82,7 @@ def _to_rust_options(
82
82
  encoding=options.encoding,
83
83
  debug=options.debug,
84
84
  strip_tags=list(options.strip_tags) if options.strip_tags else [],
85
+ preserve_tags=list(options.preserve_tags) if options.preserve_tags else [],
85
86
  )
86
87
 
87
88
 
Binary file
@@ -95,6 +95,9 @@ class ConversionOptions:
95
95
  strip_tags: set[str] | None = None
96
96
  """HTML tags to strip from output (output only text content, no markdown conversion)."""
97
97
 
98
+ preserve_tags: set[str] | None = None
99
+ """HTML tags to preserve as-is in the output (keep original HTML). Useful for complex elements like tables."""
100
+
98
101
  convert_as_inline: bool = False
99
102
  """Treat block elements as inline during conversion."""
100
103
 
@@ -128,8 +131,8 @@ class PreprocessingOptions:
128
131
  ... )
129
132
  """
130
133
 
131
- enabled: bool = False
132
- """Whether to enable HTML preprocessing (disabled by default for minimal transformation)."""
134
+ enabled: bool = True
135
+ """Whether to enable HTML preprocessing (enabled by default for robust handling of malformed HTML)."""
133
136
 
134
137
  preset: Literal["minimal", "standard", "aggressive"] = "standard"
135
138
  """Preprocessing aggressiveness level."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 2.4.1
3
+ Version: 2.5.0
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -162,9 +162,12 @@ Key fields (see docstring for full matrix):
162
162
 
163
163
  ### `PreprocessingOptions`
164
164
 
165
- - `enabled`: enable HTML sanitisation
166
- - `preset`: `"minimal" | "standard" | "aggressive"`
167
- - `remove_navigation`, `remove_forms`
165
+ - `enabled`: enable HTML sanitisation (default: `True` since v2.4.2 for robust malformed HTML handling)
166
+ - `preset`: `"minimal" | "standard" | "aggressive"` (default: `"standard"`)
167
+ - `remove_navigation`: remove navigation elements (default: `True`)
168
+ - `remove_forms`: remove form elements (default: `True`)
169
+
170
+ **Note:** As of v2.4.2, preprocessing is enabled by default to ensure robust handling of malformed HTML (e.g., bare angle brackets like `1<2` in content). Set `enabled=False` if you need minimal preprocessing.
168
171
 
169
172
  ### `InlineImageConfig`
170
173
 
@@ -0,0 +1,17 @@
1
+ html_to_markdown-2.5.0.data/scripts/html-to-markdown.exe,sha256=QBIiheprZwfuz5EhSqttEteTPn-w7LeqQNoGx7kO9PE,4531200
2
+ html_to_markdown-2.5.0.dist-info/METADATA,sha256=vsDigqwoGBTZ5L31L7GCePFzhQcuV2WLg2Tn5JlxbR4,9403
3
+ html_to_markdown-2.5.0.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
4
+ html_to_markdown-2.5.0.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
5
+ html_to_markdown/__init__.py,sha256=w4c2ZeJNTRWE4kGKQEEEE0HabgfxNNpNOyXkBJmGAQ8,1410
6
+ html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
7
+ html_to_markdown/_html_to_markdown.pyd,sha256=tIxDvRmWyhcR9_PekDwYtRO0bhXww0GQ8vF-ZAttYgw,4254720
8
+ html_to_markdown/_rust.pyi,sha256=JP8tvcjYDfFJeJkbLpQ4qeK-5jl0hzIVT3Sa0daTkyo,2171
9
+ html_to_markdown/api.py,sha256=fghTHTmWkl9K0ap67uZAhiwxDGYYkknGX8le6prhz34,5088
10
+ html_to_markdown/bin/html-to-markdown.exe,sha256=QBIiheprZwfuz5EhSqttEteTPn-w7LeqQNoGx7kO9PE,4531200
11
+ html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
12
+ html_to_markdown/cli_proxy.py,sha256=JGOuINBI8OMYLxojXGz8DdzMHo8eqgdINstOZWrdw-8,3816
13
+ html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
14
+ html_to_markdown/options.py,sha256=iDEIfxxZlSHDM3V-Sr-XVxYLC1mzvuic56jSycYvQvY,5224
15
+ html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ html_to_markdown/v1_compat.py,sha256=aVt9cVTBfYcrS8EfBsrC6HQwWc3Kz9-65-LB9foN6Jk,8227
17
+ html_to_markdown-2.5.0.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- html_to_markdown-2.4.1.data/scripts/html-to-markdown.exe,sha256=C7lSkItGKC-rxUTJ1-s_lLM4mgsNYKAF1aA1ctQjTJE,4476928
2
- html_to_markdown-2.4.1.dist-info/METADATA,sha256=ONL8X8T_EPsCf6LmWEnNozk5IFmfkWdp-IYjMD_u2K8,9012
3
- html_to_markdown-2.4.1.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
4
- html_to_markdown-2.4.1.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
5
- html_to_markdown/__init__.py,sha256=RMyWY2awLrpssSrYn0eh-BnlGgDxcK8dhSXAj5_Upbc,1410
6
- html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
7
- html_to_markdown/_html_to_markdown.pyd,sha256=0OYBHg29UeEQ3tHzdUL0CyinV1T3V5ed1L5Nto7oI50,4223488
8
- html_to_markdown/_rust.pyi,sha256=JP8tvcjYDfFJeJkbLpQ4qeK-5jl0hzIVT3Sa0daTkyo,2171
9
- html_to_markdown/api.py,sha256=jDb8PT1cS3KqipT4m_rKBE0R20UKOU85rH-7M6P6Owk,5003
10
- html_to_markdown/bin/html-to-markdown.exe,sha256=C7lSkItGKC-rxUTJ1-s_lLM4mgsNYKAF1aA1ctQjTJE,4476928
11
- html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
12
- html_to_markdown/cli_proxy.py,sha256=JGOuINBI8OMYLxojXGz8DdzMHo8eqgdINstOZWrdw-8,3816
13
- html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
14
- html_to_markdown/options.py,sha256=jna7fx9bHhx8N7u5IYtMXganFFzdJSVVgLZW0tYk3GA,5054
15
- html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- html_to_markdown/v1_compat.py,sha256=aVt9cVTBfYcrS8EfBsrC6HQwWc3Kz9-65-LB9foN6Jk,8227
17
- html_to_markdown-2.4.1.dist-info/RECORD,,