html-to-markdown 2.14.2__cp310-abi3-macosx_11_0_arm64.whl → 2.14.5__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,16 +15,16 @@ V1 API (backward compatibility):
15
15
  markdown = convert_to_markdown(html, heading_style="atx")
16
16
  """
17
17
 
18
- import contextlib
19
-
20
18
  from html_to_markdown.api import (
21
19
  InlineImage,
22
20
  InlineImageConfig,
23
21
  InlineImageWarning,
22
+ MetadataConfig,
24
23
  OptionsHandle,
25
24
  convert,
26
25
  convert_with_handle,
27
26
  convert_with_inline_images,
27
+ convert_with_metadata,
28
28
  create_options_handle,
29
29
  )
30
30
  from html_to_markdown.exceptions import (
@@ -37,9 +37,6 @@ from html_to_markdown.exceptions import (
37
37
  from html_to_markdown.options import ConversionOptions, PreprocessingOptions
38
38
  from html_to_markdown.v1_compat import convert_to_markdown, markdownify
39
39
 
40
- with contextlib.suppress(ImportError):
41
- from html_to_markdown.api import MetadataConfig, convert_with_metadata
42
-
43
40
  __all__ = [
44
41
  "ConflictingOptionsError",
45
42
  "ConversionOptions",
@@ -62,4 +59,4 @@ __all__ = [
62
59
  "markdownify",
63
60
  ]
64
61
 
65
- __version__ = "2.14.2"
62
+ __version__ = "2.14.5"
Binary file
html_to_markdown/api.py CHANGED
@@ -8,20 +8,16 @@ import html_to_markdown._html_to_markdown as _rust
8
8
  from html_to_markdown._html_to_markdown import (
9
9
  ConversionOptionsHandle as OptionsHandle,
10
10
  )
11
- from html_to_markdown._html_to_markdown import InlineImageConfig
11
+ from html_to_markdown._html_to_markdown import (
12
+ InlineImageConfig,
13
+ MetadataConfig,
14
+ )
12
15
  from html_to_markdown.options import ConversionOptions, PreprocessingOptions
13
16
 
14
- _HAS_METADATA = False
15
- try:
16
- from html_to_markdown._html_to_markdown import ExtendedMetadata, MetadataConfig
17
-
18
- _HAS_METADATA = True
19
- except ImportError:
20
- MetadataConfig = None # type: ignore[misc,assignment]
21
- if TYPE_CHECKING:
22
- from html_to_markdown._html_to_markdown import ExtendedMetadata # pragma: no cover
23
- else:
24
- ExtendedMetadata = dict[str, object] # type: ignore[assignment]
17
+ if TYPE_CHECKING:
18
+ from html_to_markdown._html_to_markdown import ExtendedMetadata # pragma: no cover
19
+ else:
20
+ ExtendedMetadata = dict[str, object] # type: ignore[assignment]
25
21
 
26
22
 
27
23
  class InlineImage(TypedDict):
@@ -146,40 +142,43 @@ def convert_with_handle(html: str, handle: OptionsHandle) -> str:
146
142
  return _rust.convert_with_options_handle(html, handle)
147
143
 
148
144
 
149
- if _HAS_METADATA:
150
-
151
- def convert_with_metadata(
152
- html: str,
153
- options: ConversionOptions | None = None,
154
- preprocessing: PreprocessingOptions | None = None,
155
- metadata_config: MetadataConfig | None = None,
156
- ) -> tuple[str, ExtendedMetadata]:
157
- """Convert HTML and extract comprehensive metadata.
158
-
159
- Args:
160
- html: HTML string to convert
161
- options: Optional conversion configuration
162
- preprocessing: Optional preprocessing configuration
163
- metadata_config: Optional metadata extraction configuration
164
-
165
- Returns:
166
- Tuple of (markdown, metadata_dict) where metadata_dict contains:
167
- - document: Document-level metadata (title, description, lang, etc.)
168
- - headers: List of header elements with hierarchy
169
- - links: List of extracted hyperlinks with classification
170
- - images: List of extracted images with metadata
171
- - structured_data: List of JSON-LD, Microdata, or RDFa blocks
172
- """
173
- if options is None:
174
- options = ConversionOptions()
175
- if preprocessing is None:
176
- preprocessing = PreprocessingOptions()
177
- if metadata_config is None:
178
- metadata_config = MetadataConfig()
179
-
180
- rust_options = _to_rust_options(options, preprocessing)
181
- markdown, metadata = _rust.convert_with_metadata(html, rust_options, metadata_config)
182
- return markdown, metadata
145
+ def convert_with_metadata(
146
+ html: str,
147
+ options: ConversionOptions | None = None,
148
+ preprocessing: PreprocessingOptions | None = None,
149
+ metadata_config: MetadataConfig | None = None,
150
+ ) -> tuple[str, ExtendedMetadata]:
151
+ """Convert HTML and extract comprehensive metadata.
152
+
153
+ Args:
154
+ html: HTML string to convert
155
+ options: Optional conversion configuration
156
+ preprocessing: Optional preprocessing configuration
157
+ metadata_config: Optional metadata extraction configuration
158
+
159
+ Returns:
160
+ Tuple of (markdown, metadata_dict) where metadata_dict contains:
161
+ - document: Document-level metadata (title, description, lang, etc.)
162
+ - headers: List of header elements with hierarchy
163
+ - links: List of extracted hyperlinks with classification
164
+ - images: List of extracted images with metadata
165
+ - structured_data: List of JSON-LD, Microdata, or RDFa blocks
166
+ """
167
+ if not hasattr(_rust, "convert_with_metadata"):
168
+ raise ImportError(
169
+ "convert_with_metadata is missing from the native extension; this indicates a broken/partial installation."
170
+ )
171
+
172
+ if options is None:
173
+ options = ConversionOptions()
174
+ if preprocessing is None:
175
+ preprocessing = PreprocessingOptions()
176
+ if metadata_config is None:
177
+ metadata_config = MetadataConfig()
178
+
179
+ rust_options = _to_rust_options(options, preprocessing)
180
+ markdown, metadata = _rust.convert_with_metadata(html, rust_options, metadata_config)
181
+ return markdown, metadata
183
182
 
184
183
 
185
184
  __all__ = [
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 2.14.2
3
+ Version: 2.14.5
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -44,7 +44,7 @@ High-performance HTML to Markdown converter with a clean Python API (powered by
44
44
  [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
45
45
  [![NuGet](https://img.shields.io/nuget/v/Goldziher.HtmlToMarkdown.svg)](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
46
46
  [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
47
- [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
47
+ [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
48
48
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
49
49
  [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
50
50
 
@@ -0,0 +1,17 @@
1
+ html_to_markdown-2.14.5.data/scripts/html-to-markdown,sha256=pZtJBTQolbGl_VL3mBo8mLIppGODYhfcq6yOZVldWOg,6263872
2
+ html_to_markdown-2.14.5.dist-info/RECORD,,
3
+ html_to_markdown-2.14.5.dist-info/WHEEL,sha256=WvP__evn8XoyZeDO32cKBm5BQTOFbdB1WoQ-d3AzYdw,132
4
+ html_to_markdown-2.14.5.dist-info/METADATA,sha256=_m5URCaMwleUAZpC712kPn5LKdqwwZesMTzlbm0F1MQ,23252
5
+ html_to_markdown-2.14.5.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
6
+ html_to_markdown/options.py,sha256=vImRfeHAeyAy0Lnt6cTPHGbj7mTdw8AEUgo19u7MAA0,5080
7
+ html_to_markdown/_html_to_markdown.pyi,sha256=IPD6CegtaanBsKTmK30v4nvWZ5HUlCajS6jkiOsoVj8,5875
8
+ html_to_markdown/_html_to_markdown.abi3.so,sha256=hpn9BCm1Z-79e5NsXTxDjhtloCGWp1I_2aEwRy9-KTI,3503168
9
+ html_to_markdown/__init__.py,sha256=7FQJWsnvd_XVwzVEM7rEKI1XKfqIB81oRnV44qUTJeM,1605
10
+ html_to_markdown/api.py,sha256=MsTij04ij6hFhhNxdc5RXf2yobaRmB0BO1P_fjS4VvY,6806
11
+ html_to_markdown/v1_compat.py,sha256=kn5GYvgn3dTW_Zksu9PzWVk-5CYhvXxsqAeyTdDYZSY,8001
12
+ html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
13
+ html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
15
+ html_to_markdown/cli_proxy.py,sha256=HPYKH5Mf5OUvkbEQISJvAkxrbjWKxE5GokA44HoQ6z8,3858
16
+ html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
17
+ html_to_markdown/bin/html-to-markdown,sha256=pZtJBTQolbGl_VL3mBo8mLIppGODYhfcq6yOZVldWOg,6263872
@@ -1,17 +0,0 @@
1
- html_to_markdown-2.14.2.data/scripts/html-to-markdown,sha256=hniSeml124eJXvYbQsC3GLsUlS-TX93fFYgLtxogCn8,6263856
2
- html_to_markdown-2.14.2.dist-info/RECORD,,
3
- html_to_markdown-2.14.2.dist-info/WHEEL,sha256=WvP__evn8XoyZeDO32cKBm5BQTOFbdB1WoQ-d3AzYdw,132
4
- html_to_markdown-2.14.2.dist-info/METADATA,sha256=KceoGs__CWCDYonJi8jTkXIyTSczsxeQP6IT-niDOPE,23246
5
- html_to_markdown-2.14.2.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
6
- html_to_markdown/options.py,sha256=vImRfeHAeyAy0Lnt6cTPHGbj7mTdw8AEUgo19u7MAA0,5080
7
- html_to_markdown/_html_to_markdown.pyi,sha256=IPD6CegtaanBsKTmK30v4nvWZ5HUlCajS6jkiOsoVj8,5875
8
- html_to_markdown/_html_to_markdown.abi3.so,sha256=aL3Cy8W9rUaEyom4OTw9D1NQJ01ELgSzXSr-aDjVPc4,3503168
9
- html_to_markdown/__init__.py,sha256=heUlsM_dzRMTxzDPQtvEHO-9g85GtWXyLucGfkk_wp0,1692
10
- html_to_markdown/api.py,sha256=zXXoFpdDbMIQXl65NT7BjjYu_1xwEM7VNGNUK2zQNfQ,6934
11
- html_to_markdown/v1_compat.py,sha256=kn5GYvgn3dTW_Zksu9PzWVk-5CYhvXxsqAeyTdDYZSY,8001
12
- html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
13
- html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
15
- html_to_markdown/cli_proxy.py,sha256=HPYKH5Mf5OUvkbEQISJvAkxrbjWKxE5GokA44HoQ6z8,3858
16
- html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
17
- html_to_markdown/bin/html-to-markdown,sha256=hniSeml124eJXvYbQsC3GLsUlS-TX93fFYgLtxogCn8,6263856