html-to-markdown 2.14.2__cp310-abi3-macosx_11_0_arm64.whl → 2.14.5__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- html_to_markdown/__init__.py +3 -6
- html_to_markdown/_html_to_markdown.abi3.so +0 -0
- html_to_markdown/api.py +45 -46
- html_to_markdown/bin/html-to-markdown +0 -0
- {html_to_markdown-2.14.2.data → html_to_markdown-2.14.5.data}/scripts/html-to-markdown +0 -0
- {html_to_markdown-2.14.2.dist-info → html_to_markdown-2.14.5.dist-info}/METADATA +2 -2
- html_to_markdown-2.14.5.dist-info/RECORD +17 -0
- html_to_markdown-2.14.2.dist-info/RECORD +0 -17
- {html_to_markdown-2.14.2.dist-info → html_to_markdown-2.14.5.dist-info}/WHEEL +0 -0
- {html_to_markdown-2.14.2.dist-info → html_to_markdown-2.14.5.dist-info}/licenses/LICENSE +0 -0
html_to_markdown/__init__.py
CHANGED
|
@@ -15,16 +15,16 @@ V1 API (backward compatibility):
|
|
|
15
15
|
markdown = convert_to_markdown(html, heading_style="atx")
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
-
import contextlib
|
|
19
|
-
|
|
20
18
|
from html_to_markdown.api import (
|
|
21
19
|
InlineImage,
|
|
22
20
|
InlineImageConfig,
|
|
23
21
|
InlineImageWarning,
|
|
22
|
+
MetadataConfig,
|
|
24
23
|
OptionsHandle,
|
|
25
24
|
convert,
|
|
26
25
|
convert_with_handle,
|
|
27
26
|
convert_with_inline_images,
|
|
27
|
+
convert_with_metadata,
|
|
28
28
|
create_options_handle,
|
|
29
29
|
)
|
|
30
30
|
from html_to_markdown.exceptions import (
|
|
@@ -37,9 +37,6 @@ from html_to_markdown.exceptions import (
|
|
|
37
37
|
from html_to_markdown.options import ConversionOptions, PreprocessingOptions
|
|
38
38
|
from html_to_markdown.v1_compat import convert_to_markdown, markdownify
|
|
39
39
|
|
|
40
|
-
with contextlib.suppress(ImportError):
|
|
41
|
-
from html_to_markdown.api import MetadataConfig, convert_with_metadata
|
|
42
|
-
|
|
43
40
|
__all__ = [
|
|
44
41
|
"ConflictingOptionsError",
|
|
45
42
|
"ConversionOptions",
|
|
@@ -62,4 +59,4 @@ __all__ = [
|
|
|
62
59
|
"markdownify",
|
|
63
60
|
]
|
|
64
61
|
|
|
65
|
-
__version__ = "2.14.
|
|
62
|
+
__version__ = "2.14.5"
|
|
Binary file
|
html_to_markdown/api.py
CHANGED
|
@@ -8,20 +8,16 @@ import html_to_markdown._html_to_markdown as _rust
|
|
|
8
8
|
from html_to_markdown._html_to_markdown import (
|
|
9
9
|
ConversionOptionsHandle as OptionsHandle,
|
|
10
10
|
)
|
|
11
|
-
from html_to_markdown._html_to_markdown import
|
|
11
|
+
from html_to_markdown._html_to_markdown import (
|
|
12
|
+
InlineImageConfig,
|
|
13
|
+
MetadataConfig,
|
|
14
|
+
)
|
|
12
15
|
from html_to_markdown.options import ConversionOptions, PreprocessingOptions
|
|
13
16
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
_HAS_METADATA = True
|
|
19
|
-
except ImportError:
|
|
20
|
-
MetadataConfig = None # type: ignore[misc,assignment]
|
|
21
|
-
if TYPE_CHECKING:
|
|
22
|
-
from html_to_markdown._html_to_markdown import ExtendedMetadata # pragma: no cover
|
|
23
|
-
else:
|
|
24
|
-
ExtendedMetadata = dict[str, object] # type: ignore[assignment]
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from html_to_markdown._html_to_markdown import ExtendedMetadata # pragma: no cover
|
|
19
|
+
else:
|
|
20
|
+
ExtendedMetadata = dict[str, object] # type: ignore[assignment]
|
|
25
21
|
|
|
26
22
|
|
|
27
23
|
class InlineImage(TypedDict):
|
|
@@ -146,40 +142,43 @@ def convert_with_handle(html: str, handle: OptionsHandle) -> str:
|
|
|
146
142
|
return _rust.convert_with_options_handle(html, handle)
|
|
147
143
|
|
|
148
144
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
145
|
+
def convert_with_metadata(
|
|
146
|
+
html: str,
|
|
147
|
+
options: ConversionOptions | None = None,
|
|
148
|
+
preprocessing: PreprocessingOptions | None = None,
|
|
149
|
+
metadata_config: MetadataConfig | None = None,
|
|
150
|
+
) -> tuple[str, ExtendedMetadata]:
|
|
151
|
+
"""Convert HTML and extract comprehensive metadata.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
html: HTML string to convert
|
|
155
|
+
options: Optional conversion configuration
|
|
156
|
+
preprocessing: Optional preprocessing configuration
|
|
157
|
+
metadata_config: Optional metadata extraction configuration
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Tuple of (markdown, metadata_dict) where metadata_dict contains:
|
|
161
|
+
- document: Document-level metadata (title, description, lang, etc.)
|
|
162
|
+
- headers: List of header elements with hierarchy
|
|
163
|
+
- links: List of extracted hyperlinks with classification
|
|
164
|
+
- images: List of extracted images with metadata
|
|
165
|
+
- structured_data: List of JSON-LD, Microdata, or RDFa blocks
|
|
166
|
+
"""
|
|
167
|
+
if not hasattr(_rust, "convert_with_metadata"):
|
|
168
|
+
raise ImportError(
|
|
169
|
+
"convert_with_metadata is missing from the native extension; this indicates a broken/partial installation."
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if options is None:
|
|
173
|
+
options = ConversionOptions()
|
|
174
|
+
if preprocessing is None:
|
|
175
|
+
preprocessing = PreprocessingOptions()
|
|
176
|
+
if metadata_config is None:
|
|
177
|
+
metadata_config = MetadataConfig()
|
|
178
|
+
|
|
179
|
+
rust_options = _to_rust_options(options, preprocessing)
|
|
180
|
+
markdown, metadata = _rust.convert_with_metadata(html, rust_options, metadata_config)
|
|
181
|
+
return markdown, metadata
|
|
183
182
|
|
|
184
183
|
|
|
185
184
|
__all__ = [
|
|
Binary file
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: html-to-markdown
|
|
3
|
-
Version: 2.14.
|
|
3
|
+
Version: 2.14.5
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Environment :: Console
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -44,7 +44,7 @@ High-performance HTML to Markdown converter with a clean Python API (powered by
|
|
|
44
44
|
[](https://hex.pm/packages/html_to_markdown)
|
|
45
45
|
[](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
|
|
46
46
|
[](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
|
|
47
|
-
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
|
|
47
|
+
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
|
|
48
48
|
[](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
|
|
49
49
|
[](https://discord.gg/pXxagNK2zN)
|
|
50
50
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
html_to_markdown-2.14.5.data/scripts/html-to-markdown,sha256=pZtJBTQolbGl_VL3mBo8mLIppGODYhfcq6yOZVldWOg,6263872
|
|
2
|
+
html_to_markdown-2.14.5.dist-info/RECORD,,
|
|
3
|
+
html_to_markdown-2.14.5.dist-info/WHEEL,sha256=WvP__evn8XoyZeDO32cKBm5BQTOFbdB1WoQ-d3AzYdw,132
|
|
4
|
+
html_to_markdown-2.14.5.dist-info/METADATA,sha256=_m5URCaMwleUAZpC712kPn5LKdqwwZesMTzlbm0F1MQ,23252
|
|
5
|
+
html_to_markdown-2.14.5.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
|
|
6
|
+
html_to_markdown/options.py,sha256=vImRfeHAeyAy0Lnt6cTPHGbj7mTdw8AEUgo19u7MAA0,5080
|
|
7
|
+
html_to_markdown/_html_to_markdown.pyi,sha256=IPD6CegtaanBsKTmK30v4nvWZ5HUlCajS6jkiOsoVj8,5875
|
|
8
|
+
html_to_markdown/_html_to_markdown.abi3.so,sha256=hpn9BCm1Z-79e5NsXTxDjhtloCGWp1I_2aEwRy9-KTI,3503168
|
|
9
|
+
html_to_markdown/__init__.py,sha256=7FQJWsnvd_XVwzVEM7rEKI1XKfqIB81oRnV44qUTJeM,1605
|
|
10
|
+
html_to_markdown/api.py,sha256=MsTij04ij6hFhhNxdc5RXf2yobaRmB0BO1P_fjS4VvY,6806
|
|
11
|
+
html_to_markdown/v1_compat.py,sha256=kn5GYvgn3dTW_Zksu9PzWVk-5CYhvXxsqAeyTdDYZSY,8001
|
|
12
|
+
html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
|
|
13
|
+
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
|
|
15
|
+
html_to_markdown/cli_proxy.py,sha256=HPYKH5Mf5OUvkbEQISJvAkxrbjWKxE5GokA44HoQ6z8,3858
|
|
16
|
+
html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
|
|
17
|
+
html_to_markdown/bin/html-to-markdown,sha256=pZtJBTQolbGl_VL3mBo8mLIppGODYhfcq6yOZVldWOg,6263872
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
html_to_markdown-2.14.2.data/scripts/html-to-markdown,sha256=hniSeml124eJXvYbQsC3GLsUlS-TX93fFYgLtxogCn8,6263856
|
|
2
|
-
html_to_markdown-2.14.2.dist-info/RECORD,,
|
|
3
|
-
html_to_markdown-2.14.2.dist-info/WHEEL,sha256=WvP__evn8XoyZeDO32cKBm5BQTOFbdB1WoQ-d3AzYdw,132
|
|
4
|
-
html_to_markdown-2.14.2.dist-info/METADATA,sha256=KceoGs__CWCDYonJi8jTkXIyTSczsxeQP6IT-niDOPE,23246
|
|
5
|
-
html_to_markdown-2.14.2.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
|
|
6
|
-
html_to_markdown/options.py,sha256=vImRfeHAeyAy0Lnt6cTPHGbj7mTdw8AEUgo19u7MAA0,5080
|
|
7
|
-
html_to_markdown/_html_to_markdown.pyi,sha256=IPD6CegtaanBsKTmK30v4nvWZ5HUlCajS6jkiOsoVj8,5875
|
|
8
|
-
html_to_markdown/_html_to_markdown.abi3.so,sha256=aL3Cy8W9rUaEyom4OTw9D1NQJ01ELgSzXSr-aDjVPc4,3503168
|
|
9
|
-
html_to_markdown/__init__.py,sha256=heUlsM_dzRMTxzDPQtvEHO-9g85GtWXyLucGfkk_wp0,1692
|
|
10
|
-
html_to_markdown/api.py,sha256=zXXoFpdDbMIQXl65NT7BjjYu_1xwEM7VNGNUK2zQNfQ,6934
|
|
11
|
-
html_to_markdown/v1_compat.py,sha256=kn5GYvgn3dTW_Zksu9PzWVk-5CYhvXxsqAeyTdDYZSY,8001
|
|
12
|
-
html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
|
|
13
|
-
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
|
|
15
|
-
html_to_markdown/cli_proxy.py,sha256=HPYKH5Mf5OUvkbEQISJvAkxrbjWKxE5GokA44HoQ6z8,3858
|
|
16
|
-
html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
|
|
17
|
-
html_to_markdown/bin/html-to-markdown,sha256=hniSeml124eJXvYbQsC3GLsUlS-TX93fFYgLtxogCn8,6263856
|
|
File without changes
|
|
File without changes
|