html-to-markdown 2.3.0__cp310-abi3-win_amd64.whl → 2.3.4__cp310-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

@@ -15,7 +15,13 @@ V1 API (backward compatibility):
15
15
  markdown = convert_to_markdown(html, heading_style="atx")
16
16
  """
17
17
 
18
- from html_to_markdown.api import convert
18
+ from html_to_markdown.api import (
19
+ InlineImage,
20
+ InlineImageConfig,
21
+ InlineImageWarning,
22
+ convert,
23
+ convert_with_inline_images,
24
+ )
19
25
  from html_to_markdown.exceptions import (
20
26
  ConflictingOptionsError,
21
27
  EmptyHtmlError,
@@ -31,12 +37,16 @@ __all__ = [
31
37
  "ConversionOptions",
32
38
  "EmptyHtmlError",
33
39
  "HtmlToMarkdownError",
40
+ "InlineImage",
41
+ "InlineImageConfig",
42
+ "InlineImageWarning",
34
43
  "InvalidParserError",
35
44
  "MissingDependencyError",
36
45
  "PreprocessingOptions",
37
46
  "convert",
38
47
  "convert_to_markdown",
48
+ "convert_with_inline_images",
39
49
  "markdownify",
40
50
  ]
41
51
 
42
- __version__ = "2.3.0"
52
+ __version__ = "2.3.4"
Binary file
html_to_markdown/api.py CHANGED
@@ -6,38 +6,52 @@ using the Rust backend for conversion.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ from typing import TYPE_CHECKING, Literal, TypedDict, cast
10
+
9
11
  import html_to_markdown._html_to_markdown as _rust # type: ignore[import-not-found]
10
12
  from html_to_markdown.options import ConversionOptions, PreprocessingOptions
11
13
 
14
+ if TYPE_CHECKING:
15
+ from html_to_markdown._html_to_markdown import InlineImageConfig
16
+ else:
17
+ InlineImageConfig = _rust.InlineImageConfig # type: ignore[misc, assignment]
12
18
 
13
- def convert(
14
- html: str,
15
- options: ConversionOptions | None = None,
16
- preprocessing: PreprocessingOptions | None = None,
17
- ) -> str:
18
- """Convert HTML to Markdown using the Rust backend.
19
19
 
20
- Args:
21
- html: HTML string to convert.
22
- options: Conversion configuration options (defaults to ConversionOptions()).
23
- preprocessing: HTML preprocessing options (defaults to PreprocessingOptions()).
20
+ class InlineImage(TypedDict):
21
+ """Inline image extracted during conversion."""
22
+
23
+ data: bytes
24
+ format: str
25
+ filename: str | None
26
+ description: str | None
27
+ dimensions: tuple[int, int] | None
28
+ source: Literal["img_data_uri", "svg_element"]
29
+ attributes: dict[str, str]
24
30
 
25
- Returns:
26
- Converted Markdown string.
27
- """
28
- if options is None:
29
- options = ConversionOptions()
30
- if preprocessing is None:
31
- preprocessing = PreprocessingOptions()
32
31
 
33
- rust_preprocessing = _rust.PreprocessingOptions(
34
- enabled=preprocessing.enabled,
35
- preset=preprocessing.preset,
36
- remove_navigation=preprocessing.remove_navigation,
37
- remove_forms=preprocessing.remove_forms,
32
+ class InlineImageWarning(TypedDict):
33
+ """Warning produced during inline image extraction."""
34
+
35
+ index: int
36
+ message: str
37
+
38
+
39
+ def _to_rust_preprocessing(options: PreprocessingOptions) -> _rust.PreprocessingOptions:
40
+ """Convert high-level preprocessing options to the Rust bindings."""
41
+ return _rust.PreprocessingOptions(
42
+ enabled=options.enabled,
43
+ preset=options.preset,
44
+ remove_navigation=options.remove_navigation,
45
+ remove_forms=options.remove_forms,
38
46
  )
39
47
 
40
- rust_options = _rust.ConversionOptions(
48
+
49
+ def _to_rust_options(
50
+ options: ConversionOptions,
51
+ preprocessing: PreprocessingOptions,
52
+ ) -> _rust.ConversionOptions:
53
+ """Convert high-level conversion options to the Rust bindings."""
54
+ return _rust.ConversionOptions(
41
55
  heading_style=options.heading_style,
42
56
  list_indent_type=options.list_indent_type,
43
57
  list_indent_width=options.list_indent_width,
@@ -64,11 +78,66 @@ def convert(
64
78
  newline_style=options.newline_style,
65
79
  code_block_style=options.code_block_style,
66
80
  keep_inline_images_in=list(options.keep_inline_images_in) if options.keep_inline_images_in else [],
67
- preprocessing=rust_preprocessing,
81
+ preprocessing=_to_rust_preprocessing(preprocessing),
68
82
  encoding=options.encoding,
69
83
  debug=options.debug,
70
84
  strip_tags=list(options.strip_tags) if options.strip_tags else [],
71
85
  )
72
86
 
73
- result: str = _rust.convert(html, rust_options)
74
- return result
87
+
88
+ def convert(
89
+ html: str,
90
+ options: ConversionOptions | None = None,
91
+ preprocessing: PreprocessingOptions | None = None,
92
+ ) -> str:
93
+ """Convert HTML to Markdown using the Rust backend.
94
+
95
+ Args:
96
+ html: HTML string to convert.
97
+ options: Conversion configuration options (defaults to ConversionOptions()).
98
+ preprocessing: HTML preprocessing options (defaults to PreprocessingOptions()).
99
+
100
+ Returns:
101
+ Converted Markdown string.
102
+ """
103
+ if options is None:
104
+ options = ConversionOptions()
105
+ if preprocessing is None:
106
+ preprocessing = PreprocessingOptions()
107
+
108
+ rust_options = _to_rust_options(options, preprocessing)
109
+ return cast("str", _rust.convert(html, rust_options))
110
+
111
+
112
+ def convert_with_inline_images(
113
+ html: str,
114
+ options: ConversionOptions | None = None,
115
+ preprocessing: PreprocessingOptions | None = None,
116
+ image_config: InlineImageConfig | None = None,
117
+ ) -> tuple[str, list[InlineImage], list[InlineImageWarning]]:
118
+ """Convert HTML and extract inline images.
119
+
120
+ Returns Markdown along with extracted inline images and any warnings.
121
+ """
122
+ if options is None:
123
+ options = ConversionOptions()
124
+ if preprocessing is None:
125
+ preprocessing = PreprocessingOptions()
126
+ if image_config is None:
127
+ image_config = InlineImageConfig()
128
+
129
+ rust_options = _to_rust_options(options, preprocessing)
130
+ markdown, images, warnings = cast(
131
+ "tuple[str, list[InlineImage], list[InlineImageWarning]]",
132
+ _rust.convert_with_inline_images(html, rust_options, image_config),
133
+ )
134
+ return markdown, list(images), list(warnings)
135
+
136
+
137
+ __all__ = [
138
+ "InlineImage",
139
+ "InlineImageConfig",
140
+ "InlineImageWarning",
141
+ "convert",
142
+ "convert_with_inline_images",
143
+ ]
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 2.3.0
3
+ Version: 2.3.4
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -0,0 +1,17 @@
1
+ html_to_markdown-2.3.4.data/scripts/html-to-markdown.exe,sha256=_d3wckq2iLwVEUENRr5yDwmXSqRkhI2_umhtuaKPBbI,4470272
2
+ html_to_markdown-2.3.4.dist-info/METADATA,sha256=t4Tw_UzLr7ewVN7deNNtkyUh_8XGD3SoafiGxlRSJDk,9012
3
+ html_to_markdown-2.3.4.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
4
+ html_to_markdown-2.3.4.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
5
+ html_to_markdown/__init__.py,sha256=P7YA3QFT6Le-BF1c-thxQuKzXFHaQEWhZCHPmkirYWU,1410
6
+ html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
7
+ html_to_markdown/_html_to_markdown.pyd,sha256=B6k-CqYNPzJ0rorjOl-Rcox8iDXk7LYshHY15B8g1_A,4214784
8
+ html_to_markdown/_rust.pyi,sha256=JP8tvcjYDfFJeJkbLpQ4qeK-5jl0hzIVT3Sa0daTkyo,2171
9
+ html_to_markdown/api.py,sha256=jDb8PT1cS3KqipT4m_rKBE0R20UKOU85rH-7M6P6Owk,5003
10
+ html_to_markdown/bin/html-to-markdown.exe,sha256=_d3wckq2iLwVEUENRr5yDwmXSqRkhI2_umhtuaKPBbI,4470272
11
+ html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
12
+ html_to_markdown/cli_proxy.py,sha256=JGOuINBI8OMYLxojXGz8DdzMHo8eqgdINstOZWrdw-8,3816
13
+ html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
14
+ html_to_markdown/options.py,sha256=jna7fx9bHhx8N7u5IYtMXganFFzdJSVVgLZW0tYk3GA,5054
15
+ html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ html_to_markdown/v1_compat.py,sha256=aVt9cVTBfYcrS8EfBsrC6HQwWc3Kz9-65-LB9foN6Jk,8227
17
+ html_to_markdown-2.3.4.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- html_to_markdown-2.3.0.data/scripts/html-to-markdown.exe,sha256=NoG5btr57ihL85-URLcreFRZTiAUfO7km_FiEUMa7xI,4469760
2
- html_to_markdown-2.3.0.dist-info/METADATA,sha256=wcmX8lAc-dZZp8ETrpbXqu58Kf19n3fE6ubBPd0OfKU,9012
3
- html_to_markdown-2.3.0.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
4
- html_to_markdown-2.3.0.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
5
- html_to_markdown/__init__.py,sha256=KgR9V82EqdL5S7dzK_USOv6STjyhVRJubDYScHxOJS0,1191
6
- html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
7
- html_to_markdown/_html_to_markdown.pyd,sha256=Peez6o-WS_O6Cc3YqbxjJzRVw0mLY5HsbJyfeoYj7BY,4214784
8
- html_to_markdown/_rust.pyi,sha256=JP8tvcjYDfFJeJkbLpQ4qeK-5jl0hzIVT3Sa0daTkyo,2171
9
- html_to_markdown/api.py,sha256=U7-Tu8TaVa32vveCtiOhTwoEojklkDV2e-6ItAiP3d4,2858
10
- html_to_markdown/bin/html-to-markdown.exe,sha256=NoG5btr57ihL85-URLcreFRZTiAUfO7km_FiEUMa7xI,4469760
11
- html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
12
- html_to_markdown/cli_proxy.py,sha256=JGOuINBI8OMYLxojXGz8DdzMHo8eqgdINstOZWrdw-8,3816
13
- html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
14
- html_to_markdown/options.py,sha256=jna7fx9bHhx8N7u5IYtMXganFFzdJSVVgLZW0tYk3GA,5054
15
- html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- html_to_markdown/v1_compat.py,sha256=aVt9cVTBfYcrS8EfBsrC6HQwWc3Kz9-65-LB9foN6Jk,8227
17
- html_to_markdown-2.3.0.dist-info/RECORD,,