html-to-markdown 2.14.0__cp310-abi3-macosx_11_0_arm64.whl → 2.14.5__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,16 +15,16 @@ V1 API (backward compatibility):
15
15
  markdown = convert_to_markdown(html, heading_style="atx")
16
16
  """
17
17
 
18
- import contextlib
19
-
20
18
  from html_to_markdown.api import (
21
19
  InlineImage,
22
20
  InlineImageConfig,
23
21
  InlineImageWarning,
22
+ MetadataConfig,
24
23
  OptionsHandle,
25
24
  convert,
26
25
  convert_with_handle,
27
26
  convert_with_inline_images,
27
+ convert_with_metadata,
28
28
  create_options_handle,
29
29
  )
30
30
  from html_to_markdown.exceptions import (
@@ -37,10 +37,6 @@ from html_to_markdown.exceptions import (
37
37
  from html_to_markdown.options import ConversionOptions, PreprocessingOptions
38
38
  from html_to_markdown.v1_compat import convert_to_markdown, markdownify
39
39
 
40
- # Optional metadata support
41
- with contextlib.suppress(ImportError):
42
- from html_to_markdown.api import MetadataConfig, convert_with_metadata
43
-
44
40
  __all__ = [
45
41
  "ConflictingOptionsError",
46
42
  "ConversionOptions",
@@ -63,4 +59,4 @@ __all__ = [
63
59
  "markdownify",
64
60
  ]
65
61
 
66
- __version__ = "2.14.0"
62
+ __version__ = "2.14.5"
Binary file
@@ -141,13 +141,13 @@ class DocumentMetadata(TypedDict):
141
141
  canonical_url: str | None
142
142
  base_href: str | None
143
143
  language: str | None
144
- text_direction: str | None # "ltr" | "rtl" | "auto" | None
144
+ text_direction: str | None
145
145
  open_graph: dict[str, str]
146
146
  twitter_card: dict[str, str]
147
147
  meta_tags: dict[str, str]
148
148
 
149
149
  class HeaderMetadata(TypedDict):
150
- level: int # 1-6
150
+ level: int
151
151
  text: str
152
152
  id: str | None
153
153
  depth: int
@@ -157,7 +157,7 @@ class LinkMetadata(TypedDict):
157
157
  href: str
158
158
  text: str
159
159
  title: str | None
160
- link_type: str # "anchor" | "internal" | "external" | "email" | "phone" | "other"
160
+ link_type: str
161
161
  rel: list[str]
162
162
  attributes: dict[str, str]
163
163
 
@@ -165,12 +165,12 @@ class ImageMetadata(TypedDict):
165
165
  src: str
166
166
  alt: str | None
167
167
  title: str | None
168
- dimensions: tuple[int, int] | None # (width, height)
169
- image_type: str # "data_uri" | "inline_svg" | "external" | "relative"
168
+ dimensions: tuple[int, int] | None
169
+ image_type: str
170
170
  attributes: dict[str, str]
171
171
 
172
172
  class StructuredData(TypedDict):
173
- data_type: str # "json_ld" | "microdata" | "rdfa"
173
+ data_type: str
174
174
  raw_json: str
175
175
  schema_type: str | None
176
176
 
html_to_markdown/api.py CHANGED
@@ -8,20 +8,16 @@ import html_to_markdown._html_to_markdown as _rust
8
8
  from html_to_markdown._html_to_markdown import (
9
9
  ConversionOptionsHandle as OptionsHandle,
10
10
  )
11
- from html_to_markdown._html_to_markdown import InlineImageConfig
11
+ from html_to_markdown._html_to_markdown import (
12
+ InlineImageConfig,
13
+ MetadataConfig,
14
+ )
12
15
  from html_to_markdown.options import ConversionOptions, PreprocessingOptions
13
16
 
14
- _HAS_METADATA = False
15
- try:
16
- from html_to_markdown._html_to_markdown import ExtendedMetadata, MetadataConfig
17
-
18
- _HAS_METADATA = True
19
- except ImportError:
20
- MetadataConfig = None # type: ignore[misc,assignment]
21
- if TYPE_CHECKING:
22
- from html_to_markdown._html_to_markdown import ExtendedMetadata # pragma: no cover
23
- else:
24
- ExtendedMetadata = dict[str, object] # type: ignore[assignment]
17
+ if TYPE_CHECKING:
18
+ from html_to_markdown._html_to_markdown import ExtendedMetadata # pragma: no cover
19
+ else:
20
+ ExtendedMetadata = dict[str, object] # type: ignore[assignment]
25
21
 
26
22
 
27
23
  class InlineImage(TypedDict):
@@ -146,40 +142,43 @@ def convert_with_handle(html: str, handle: OptionsHandle) -> str:
146
142
  return _rust.convert_with_options_handle(html, handle)
147
143
 
148
144
 
149
- if _HAS_METADATA:
150
-
151
- def convert_with_metadata(
152
- html: str,
153
- options: ConversionOptions | None = None,
154
- preprocessing: PreprocessingOptions | None = None,
155
- metadata_config: MetadataConfig | None = None,
156
- ) -> tuple[str, ExtendedMetadata]:
157
- """Convert HTML and extract comprehensive metadata.
158
-
159
- Args:
160
- html: HTML string to convert
161
- options: Optional conversion configuration
162
- preprocessing: Optional preprocessing configuration
163
- metadata_config: Optional metadata extraction configuration
164
-
165
- Returns:
166
- Tuple of (markdown, metadata_dict) where metadata_dict contains:
167
- - document: Document-level metadata (title, description, lang, etc.)
168
- - headers: List of header elements with hierarchy
169
- - links: List of extracted hyperlinks with classification
170
- - images: List of extracted images with metadata
171
- - structured_data: List of JSON-LD, Microdata, or RDFa blocks
172
- """
173
- if options is None:
174
- options = ConversionOptions()
175
- if preprocessing is None:
176
- preprocessing = PreprocessingOptions()
177
- if metadata_config is None:
178
- metadata_config = MetadataConfig()
179
-
180
- rust_options = _to_rust_options(options, preprocessing)
181
- markdown, metadata = _rust.convert_with_metadata(html, rust_options, metadata_config)
182
- return markdown, metadata
145
+ def convert_with_metadata(
146
+ html: str,
147
+ options: ConversionOptions | None = None,
148
+ preprocessing: PreprocessingOptions | None = None,
149
+ metadata_config: MetadataConfig | None = None,
150
+ ) -> tuple[str, ExtendedMetadata]:
151
+ """Convert HTML and extract comprehensive metadata.
152
+
153
+ Args:
154
+ html: HTML string to convert
155
+ options: Optional conversion configuration
156
+ preprocessing: Optional preprocessing configuration
157
+ metadata_config: Optional metadata extraction configuration
158
+
159
+ Returns:
160
+ Tuple of (markdown, metadata_dict) where metadata_dict contains:
161
+ - document: Document-level metadata (title, description, lang, etc.)
162
+ - headers: List of header elements with hierarchy
163
+ - links: List of extracted hyperlinks with classification
164
+ - images: List of extracted images with metadata
165
+ - structured_data: List of JSON-LD, Microdata, or RDFa blocks
166
+ """
167
+ if not hasattr(_rust, "convert_with_metadata"):
168
+ raise ImportError(
169
+ "convert_with_metadata is missing from the native extension; this indicates a broken/partial installation."
170
+ )
171
+
172
+ if options is None:
173
+ options = ConversionOptions()
174
+ if preprocessing is None:
175
+ preprocessing = PreprocessingOptions()
176
+ if metadata_config is None:
177
+ metadata_config = MetadataConfig()
178
+
179
+ rust_options = _to_rust_options(options, preprocessing)
180
+ markdown, metadata = _rust.convert_with_metadata(html, rust_options, metadata_config)
181
+ return markdown, metadata
183
182
 
184
183
 
185
184
  __all__ = [
Binary file
@@ -136,7 +136,6 @@ def convert_to_markdown(
136
136
  )
137
137
 
138
138
  # ~keep: v1 used indented code blocks by default, but switched to backticks when a language was set
139
- # This maintains v1 behavior for backward compatibility
140
139
  code_block_style = "backticks" if code_language else "indented"
141
140
 
142
141
  options = ConversionOptions(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 2.14.0
3
+ Version: 2.14.5
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -44,7 +44,7 @@ High-performance HTML to Markdown converter with a clean Python API (powered by
44
44
  [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
45
45
  [![NuGet](https://img.shields.io/nuget/v/Goldziher.HtmlToMarkdown.svg)](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
46
46
  [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
47
- [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
47
+ [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
48
48
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
49
49
  [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
50
50
 
@@ -0,0 +1,17 @@
1
+ html_to_markdown-2.14.5.data/scripts/html-to-markdown,sha256=pZtJBTQolbGl_VL3mBo8mLIppGODYhfcq6yOZVldWOg,6263872
2
+ html_to_markdown-2.14.5.dist-info/RECORD,,
3
+ html_to_markdown-2.14.5.dist-info/WHEEL,sha256=WvP__evn8XoyZeDO32cKBm5BQTOFbdB1WoQ-d3AzYdw,132
4
+ html_to_markdown-2.14.5.dist-info/METADATA,sha256=_m5URCaMwleUAZpC712kPn5LKdqwwZesMTzlbm0F1MQ,23252
5
+ html_to_markdown-2.14.5.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
6
+ html_to_markdown/options.py,sha256=vImRfeHAeyAy0Lnt6cTPHGbj7mTdw8AEUgo19u7MAA0,5080
7
+ html_to_markdown/_html_to_markdown.pyi,sha256=IPD6CegtaanBsKTmK30v4nvWZ5HUlCajS6jkiOsoVj8,5875
8
+ html_to_markdown/_html_to_markdown.abi3.so,sha256=hpn9BCm1Z-79e5NsXTxDjhtloCGWp1I_2aEwRy9-KTI,3503168
9
+ html_to_markdown/__init__.py,sha256=7FQJWsnvd_XVwzVEM7rEKI1XKfqIB81oRnV44qUTJeM,1605
10
+ html_to_markdown/api.py,sha256=MsTij04ij6hFhhNxdc5RXf2yobaRmB0BO1P_fjS4VvY,6806
11
+ html_to_markdown/v1_compat.py,sha256=kn5GYvgn3dTW_Zksu9PzWVk-5CYhvXxsqAeyTdDYZSY,8001
12
+ html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
13
+ html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
15
+ html_to_markdown/cli_proxy.py,sha256=HPYKH5Mf5OUvkbEQISJvAkxrbjWKxE5GokA44HoQ6z8,3858
16
+ html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
17
+ html_to_markdown/bin/html-to-markdown,sha256=pZtJBTQolbGl_VL3mBo8mLIppGODYhfcq6yOZVldWOg,6263872
@@ -1,17 +0,0 @@
1
- html_to_markdown-2.14.0.dist-info/RECORD,,
2
- html_to_markdown-2.14.0.dist-info/WHEEL,sha256=WvP__evn8XoyZeDO32cKBm5BQTOFbdB1WoQ-d3AzYdw,132
3
- html_to_markdown-2.14.0.dist-info/METADATA,sha256=JG4gq6AeDUutrpmm9yBndbm3rGNfg5TpQH6SpOsuTw4,23246
4
- html_to_markdown-2.14.0.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
5
- html_to_markdown-2.14.0.data/scripts/html-to-markdown,sha256=IuASvb2-OF95pz52ODWCERrDGRIBc4bWjzETXGcP_3w,6263856
6
- html_to_markdown/options.py,sha256=vImRfeHAeyAy0Lnt6cTPHGbj7mTdw8AEUgo19u7MAA0,5080
7
- html_to_markdown/_html_to_markdown.pyi,sha256=C4Y80U97JpQhjWZxpDb2Lu2ojbFwAZLtygchVdmhYs0,6093
8
- html_to_markdown/_html_to_markdown.abi3.so,sha256=jIVAAeoTyJkoHiTwjskEbKnJxvWuDDQ2HMvzs2qZMzs,3486656
9
- html_to_markdown/__init__.py,sha256=NuQiHxunqCAfbGRYXjgbp31YBXoLlYjXU_Fko4GC5oM,1720
10
- html_to_markdown/api.py,sha256=zXXoFpdDbMIQXl65NT7BjjYu_1xwEM7VNGNUK2zQNfQ,6934
11
- html_to_markdown/v1_compat.py,sha256=nZN8hVd3u4vacbfyLsPMIGqSmJENZgx1Ya0SpqVLi-g,8061
12
- html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
13
- html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
15
- html_to_markdown/cli_proxy.py,sha256=HPYKH5Mf5OUvkbEQISJvAkxrbjWKxE5GokA44HoQ6z8,3858
16
- html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
17
- html_to_markdown/bin/html-to-markdown,sha256=IuASvb2-OF95pz52ODWCERrDGRIBc4bWjzETXGcP_3w,6263856