html-to-markdown 2.4.1__cp310-abi3-macosx_11_0_arm64.whl → 2.5.0__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- html_to_markdown/__init__.py +1 -1
- html_to_markdown/_html_to_markdown.abi3.so +0 -0
- html_to_markdown/api.py +1 -0
- html_to_markdown/bin/html-to-markdown +0 -0
- html_to_markdown/options.py +5 -2
- {html_to_markdown-2.4.1.data → html_to_markdown-2.5.0.data}/scripts/html-to-markdown +0 -0
- {html_to_markdown-2.4.1.dist-info → html_to_markdown-2.5.0.dist-info}/METADATA +7 -4
- html_to_markdown-2.5.0.dist-info/RECORD +17 -0
- html_to_markdown-2.4.1.dist-info/RECORD +0 -17
- {html_to_markdown-2.4.1.dist-info → html_to_markdown-2.5.0.dist-info}/WHEEL +0 -0
- {html_to_markdown-2.4.1.dist-info → html_to_markdown-2.5.0.dist-info}/licenses/LICENSE +0 -0
html_to_markdown/__init__.py
CHANGED
|
Binary file
|
html_to_markdown/api.py
CHANGED
|
Binary file
|
html_to_markdown/options.py
CHANGED
|
@@ -95,6 +95,9 @@ class ConversionOptions:
|
|
|
95
95
|
strip_tags: set[str] | None = None
|
|
96
96
|
"""HTML tags to strip from output (output only text content, no markdown conversion)."""
|
|
97
97
|
|
|
98
|
+
preserve_tags: set[str] | None = None
|
|
99
|
+
"""HTML tags to preserve as-is in the output (keep original HTML). Useful for complex elements like tables."""
|
|
100
|
+
|
|
98
101
|
convert_as_inline: bool = False
|
|
99
102
|
"""Treat block elements as inline during conversion."""
|
|
100
103
|
|
|
@@ -128,8 +131,8 @@ class PreprocessingOptions:
|
|
|
128
131
|
... )
|
|
129
132
|
"""
|
|
130
133
|
|
|
131
|
-
enabled: bool =
|
|
132
|
-
"""Whether to enable HTML preprocessing (
|
|
134
|
+
enabled: bool = True
|
|
135
|
+
"""Whether to enable HTML preprocessing (enabled by default for robust handling of malformed HTML)."""
|
|
133
136
|
|
|
134
137
|
preset: Literal["minimal", "standard", "aggressive"] = "standard"
|
|
135
138
|
"""Preprocessing aggressiveness level."""
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: html-to-markdown
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.5.0
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Environment :: Console
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -162,9 +162,12 @@ Key fields (see docstring for full matrix):
|
|
|
162
162
|
|
|
163
163
|
### `PreprocessingOptions`
|
|
164
164
|
|
|
165
|
-
- `enabled`: enable HTML sanitisation
|
|
166
|
-
- `preset`: `"minimal" | "standard" | "aggressive"`
|
|
167
|
-
- `remove_navigation
|
|
165
|
+
- `enabled`: enable HTML sanitisation (default: `True` since v2.4.2 for robust malformed HTML handling)
|
|
166
|
+
- `preset`: `"minimal" | "standard" | "aggressive"` (default: `"standard"`)
|
|
167
|
+
- `remove_navigation`: remove navigation elements (default: `True`)
|
|
168
|
+
- `remove_forms`: remove form elements (default: `True`)
|
|
169
|
+
|
|
170
|
+
**Note:** As of v2.4.2, preprocessing is enabled by default to ensure robust handling of malformed HTML (e.g., bare angle brackets like `1<2` in content). Set `enabled=False` if you need minimal preprocessing.
|
|
168
171
|
|
|
169
172
|
### `InlineImageConfig`
|
|
170
173
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
html_to_markdown-2.5.0.dist-info/RECORD,,
|
|
2
|
+
html_to_markdown-2.5.0.dist-info/WHEEL,sha256=HtAbUhtjhH1WdiDuIy2CapdoAiKCwe6bij_Tlxr1lEg,131
|
|
3
|
+
html_to_markdown-2.5.0.dist-info/METADATA,sha256=-cJomvtoyyYuKadCinA5UZEO4Oc-0SFP4gozQZMW0bc,9199
|
|
4
|
+
html_to_markdown-2.5.0.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
|
|
5
|
+
html_to_markdown-2.5.0.data/scripts/html-to-markdown,sha256=mTrDOF6GXv69CkGoDj8kfAZYxtxUraCrJn098XF9eWA,3884336
|
|
6
|
+
html_to_markdown/options.py,sha256=vImRfeHAeyAy0Lnt6cTPHGbj7mTdw8AEUgo19u7MAA0,5080
|
|
7
|
+
html_to_markdown/_html_to_markdown.abi3.so,sha256=uMIpbLa7N1jXzb2iYHWkD0-FMfxVP5oOwXvV-huAq0E,3701616
|
|
8
|
+
html_to_markdown/__init__.py,sha256=dyDrtQxQvh8ReI-5LVHgeR1cflevhIW838jFo5KRdpI,1358
|
|
9
|
+
html_to_markdown/api.py,sha256=uiNoieNXrcXTJI2_vV7ruDv9HKD7XFuosCAeqZL-C_Q,4944
|
|
10
|
+
html_to_markdown/_rust.pyi,sha256=pi6C_qAdB81qUlC89Dy1ZKC1JrpdnqVce-caJx3ekPA,2098
|
|
11
|
+
html_to_markdown/v1_compat.py,sha256=5DZA-fPMqZ5hYiA43rFaOAqshLS8MScbBnivDXuvQII,8034
|
|
12
|
+
html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
|
|
13
|
+
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
|
|
15
|
+
html_to_markdown/cli_proxy.py,sha256=8Byrnok5-WkDWToaUeMcKi1xVr62PhZKygHymdrSfFE,3682
|
|
16
|
+
html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
|
|
17
|
+
html_to_markdown/bin/html-to-markdown,sha256=mTrDOF6GXv69CkGoDj8kfAZYxtxUraCrJn098XF9eWA,3884336
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
html_to_markdown-2.4.1.dist-info/RECORD,,
|
|
2
|
-
html_to_markdown-2.4.1.dist-info/WHEEL,sha256=HtAbUhtjhH1WdiDuIy2CapdoAiKCwe6bij_Tlxr1lEg,131
|
|
3
|
-
html_to_markdown-2.4.1.dist-info/METADATA,sha256=4x7lilhuSBe0BYBt8adF-sWSDyc3ltqeozbuq2WliGQ,8811
|
|
4
|
-
html_to_markdown-2.4.1.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
|
|
5
|
-
html_to_markdown-2.4.1.data/scripts/html-to-markdown,sha256=emkAIvTTFXT2bSBY6gbJ6MMNvI7zGnJJ-U74JPmhL1M,3834416
|
|
6
|
-
html_to_markdown/options.py,sha256=N1orEtzXiqgV88Y7eqRjw3ilxDi7N97FBkBa79hdfzo,4913
|
|
7
|
-
html_to_markdown/_html_to_markdown.abi3.so,sha256=NP1TwcmacpejjFFEF6ivrqj1ja1lWolha2UtcPrd0Ac,3668544
|
|
8
|
-
html_to_markdown/__init__.py,sha256=dE3LTqvmvxA0QzXwC-36wah17rJtmzWXKFAvraStEkQ,1358
|
|
9
|
-
html_to_markdown/api.py,sha256=sXezV2iZb42s-za0hWWHFhihKjaMo6j_tx7e-0EyQPg,4860
|
|
10
|
-
html_to_markdown/_rust.pyi,sha256=pi6C_qAdB81qUlC89Dy1ZKC1JrpdnqVce-caJx3ekPA,2098
|
|
11
|
-
html_to_markdown/v1_compat.py,sha256=5DZA-fPMqZ5hYiA43rFaOAqshLS8MScbBnivDXuvQII,8034
|
|
12
|
-
html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
|
|
13
|
-
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
|
|
15
|
-
html_to_markdown/cli_proxy.py,sha256=8Byrnok5-WkDWToaUeMcKi1xVr62PhZKygHymdrSfFE,3682
|
|
16
|
-
html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
|
|
17
|
-
html_to_markdown/bin/html-to-markdown,sha256=emkAIvTTFXT2bSBY6gbJ6MMNvI7zGnJJ-U74JPmhL1M,3834416
|
|
File without changes
|
|
File without changes
|