html-to-markdown 2.3.0__cp310-abi3-win_amd64.whl → 2.3.3__cp310-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- html_to_markdown/__init__.py +12 -2
- html_to_markdown/_html_to_markdown.pyd +0 -0
- html_to_markdown/api.py +95 -26
- html_to_markdown/bin/html-to-markdown.exe +0 -0
- {html_to_markdown-2.3.0.data → html_to_markdown-2.3.3.data}/scripts/html-to-markdown.exe +0 -0
- {html_to_markdown-2.3.0.dist-info → html_to_markdown-2.3.3.dist-info}/METADATA +1 -1
- html_to_markdown-2.3.3.dist-info/RECORD +17 -0
- html_to_markdown-2.3.0.dist-info/RECORD +0 -17
- {html_to_markdown-2.3.0.dist-info → html_to_markdown-2.3.3.dist-info}/WHEEL +0 -0
- {html_to_markdown-2.3.0.dist-info → html_to_markdown-2.3.3.dist-info}/licenses/LICENSE +0 -0
html_to_markdown/__init__.py
CHANGED
|
@@ -15,7 +15,13 @@ V1 API (backward compatibility):
|
|
|
15
15
|
markdown = convert_to_markdown(html, heading_style="atx")
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
-
from html_to_markdown.api import
|
|
18
|
+
from html_to_markdown.api import (
|
|
19
|
+
InlineImage,
|
|
20
|
+
InlineImageConfig,
|
|
21
|
+
InlineImageWarning,
|
|
22
|
+
convert,
|
|
23
|
+
convert_with_inline_images,
|
|
24
|
+
)
|
|
19
25
|
from html_to_markdown.exceptions import (
|
|
20
26
|
ConflictingOptionsError,
|
|
21
27
|
EmptyHtmlError,
|
|
@@ -31,12 +37,16 @@ __all__ = [
|
|
|
31
37
|
"ConversionOptions",
|
|
32
38
|
"EmptyHtmlError",
|
|
33
39
|
"HtmlToMarkdownError",
|
|
40
|
+
"InlineImage",
|
|
41
|
+
"InlineImageConfig",
|
|
42
|
+
"InlineImageWarning",
|
|
34
43
|
"InvalidParserError",
|
|
35
44
|
"MissingDependencyError",
|
|
36
45
|
"PreprocessingOptions",
|
|
37
46
|
"convert",
|
|
38
47
|
"convert_to_markdown",
|
|
48
|
+
"convert_with_inline_images",
|
|
39
49
|
"markdownify",
|
|
40
50
|
]
|
|
41
51
|
|
|
42
|
-
__version__ = "2.3.
|
|
52
|
+
__version__ = "2.3.3"
|
|
Binary file
|
html_to_markdown/api.py
CHANGED
|
@@ -6,38 +6,52 @@ using the Rust backend for conversion.
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
from typing import TYPE_CHECKING, Literal, TypedDict, cast
|
|
10
|
+
|
|
9
11
|
import html_to_markdown._html_to_markdown as _rust # type: ignore[import-not-found]
|
|
10
12
|
from html_to_markdown.options import ConversionOptions, PreprocessingOptions
|
|
11
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from html_to_markdown._html_to_markdown import InlineImageConfig
|
|
16
|
+
else:
|
|
17
|
+
InlineImageConfig = _rust.InlineImageConfig # type: ignore[misc, assignment]
|
|
12
18
|
|
|
13
|
-
def convert(
|
|
14
|
-
html: str,
|
|
15
|
-
options: ConversionOptions | None = None,
|
|
16
|
-
preprocessing: PreprocessingOptions | None = None,
|
|
17
|
-
) -> str:
|
|
18
|
-
"""Convert HTML to Markdown using the Rust backend.
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
class InlineImage(TypedDict):
|
|
21
|
+
"""Inline image extracted during conversion."""
|
|
22
|
+
|
|
23
|
+
data: bytes
|
|
24
|
+
format: str
|
|
25
|
+
filename: str | None
|
|
26
|
+
description: str | None
|
|
27
|
+
dimensions: tuple[int, int] | None
|
|
28
|
+
source: Literal["img_data_uri", "svg_element"]
|
|
29
|
+
attributes: dict[str, str]
|
|
24
30
|
|
|
25
|
-
Returns:
|
|
26
|
-
Converted Markdown string.
|
|
27
|
-
"""
|
|
28
|
-
if options is None:
|
|
29
|
-
options = ConversionOptions()
|
|
30
|
-
if preprocessing is None:
|
|
31
|
-
preprocessing = PreprocessingOptions()
|
|
32
31
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
class InlineImageWarning(TypedDict):
|
|
33
|
+
"""Warning produced during inline image extraction."""
|
|
34
|
+
|
|
35
|
+
index: int
|
|
36
|
+
message: str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _to_rust_preprocessing(options: PreprocessingOptions) -> _rust.PreprocessingOptions:
|
|
40
|
+
"""Convert high-level preprocessing options to the Rust bindings."""
|
|
41
|
+
return _rust.PreprocessingOptions(
|
|
42
|
+
enabled=options.enabled,
|
|
43
|
+
preset=options.preset,
|
|
44
|
+
remove_navigation=options.remove_navigation,
|
|
45
|
+
remove_forms=options.remove_forms,
|
|
38
46
|
)
|
|
39
47
|
|
|
40
|
-
|
|
48
|
+
|
|
49
|
+
def _to_rust_options(
|
|
50
|
+
options: ConversionOptions,
|
|
51
|
+
preprocessing: PreprocessingOptions,
|
|
52
|
+
) -> _rust.ConversionOptions:
|
|
53
|
+
"""Convert high-level conversion options to the Rust bindings."""
|
|
54
|
+
return _rust.ConversionOptions(
|
|
41
55
|
heading_style=options.heading_style,
|
|
42
56
|
list_indent_type=options.list_indent_type,
|
|
43
57
|
list_indent_width=options.list_indent_width,
|
|
@@ -64,11 +78,66 @@ def convert(
|
|
|
64
78
|
newline_style=options.newline_style,
|
|
65
79
|
code_block_style=options.code_block_style,
|
|
66
80
|
keep_inline_images_in=list(options.keep_inline_images_in) if options.keep_inline_images_in else [],
|
|
67
|
-
preprocessing=
|
|
81
|
+
preprocessing=_to_rust_preprocessing(preprocessing),
|
|
68
82
|
encoding=options.encoding,
|
|
69
83
|
debug=options.debug,
|
|
70
84
|
strip_tags=list(options.strip_tags) if options.strip_tags else [],
|
|
71
85
|
)
|
|
72
86
|
|
|
73
|
-
|
|
74
|
-
|
|
87
|
+
|
|
88
|
+
def convert(
|
|
89
|
+
html: str,
|
|
90
|
+
options: ConversionOptions | None = None,
|
|
91
|
+
preprocessing: PreprocessingOptions | None = None,
|
|
92
|
+
) -> str:
|
|
93
|
+
"""Convert HTML to Markdown using the Rust backend.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
html: HTML string to convert.
|
|
97
|
+
options: Conversion configuration options (defaults to ConversionOptions()).
|
|
98
|
+
preprocessing: HTML preprocessing options (defaults to PreprocessingOptions()).
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Converted Markdown string.
|
|
102
|
+
"""
|
|
103
|
+
if options is None:
|
|
104
|
+
options = ConversionOptions()
|
|
105
|
+
if preprocessing is None:
|
|
106
|
+
preprocessing = PreprocessingOptions()
|
|
107
|
+
|
|
108
|
+
rust_options = _to_rust_options(options, preprocessing)
|
|
109
|
+
return cast("str", _rust.convert(html, rust_options))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def convert_with_inline_images(
|
|
113
|
+
html: str,
|
|
114
|
+
options: ConversionOptions | None = None,
|
|
115
|
+
preprocessing: PreprocessingOptions | None = None,
|
|
116
|
+
image_config: InlineImageConfig | None = None,
|
|
117
|
+
) -> tuple[str, list[InlineImage], list[InlineImageWarning]]:
|
|
118
|
+
"""Convert HTML and extract inline images.
|
|
119
|
+
|
|
120
|
+
Returns Markdown along with extracted inline images and any warnings.
|
|
121
|
+
"""
|
|
122
|
+
if options is None:
|
|
123
|
+
options = ConversionOptions()
|
|
124
|
+
if preprocessing is None:
|
|
125
|
+
preprocessing = PreprocessingOptions()
|
|
126
|
+
if image_config is None:
|
|
127
|
+
image_config = InlineImageConfig()
|
|
128
|
+
|
|
129
|
+
rust_options = _to_rust_options(options, preprocessing)
|
|
130
|
+
markdown, images, warnings = cast(
|
|
131
|
+
"tuple[str, list[InlineImage], list[InlineImageWarning]]",
|
|
132
|
+
_rust.convert_with_inline_images(html, rust_options, image_config),
|
|
133
|
+
)
|
|
134
|
+
return markdown, list(images), list(warnings)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
__all__ = [
|
|
138
|
+
"InlineImage",
|
|
139
|
+
"InlineImageConfig",
|
|
140
|
+
"InlineImageWarning",
|
|
141
|
+
"convert",
|
|
142
|
+
"convert_with_inline_images",
|
|
143
|
+
]
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
html_to_markdown-2.3.3.data/scripts/html-to-markdown.exe,sha256=A6Ml_r5cPUS5RboXit05znHdztMOwNfL2eT2zdRd0NA,4470272
|
|
2
|
+
html_to_markdown-2.3.3.dist-info/METADATA,sha256=m_eiLS6e9-1ouLZXh_20dqrDt-JBDihQRkpnLCLiR3o,9012
|
|
3
|
+
html_to_markdown-2.3.3.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
|
|
4
|
+
html_to_markdown-2.3.3.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
|
|
5
|
+
html_to_markdown/__init__.py,sha256=L2YjYJ0ZQV4KkFpXa7kGoCKu8FSlyJvoWNvl_D6J0Oo,1410
|
|
6
|
+
html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
|
|
7
|
+
html_to_markdown/_html_to_markdown.pyd,sha256=m0S4v8ET9BmqSBTyQM_3_NpCZMFEOetsaWZtBfvClw4,4214784
|
|
8
|
+
html_to_markdown/_rust.pyi,sha256=JP8tvcjYDfFJeJkbLpQ4qeK-5jl0hzIVT3Sa0daTkyo,2171
|
|
9
|
+
html_to_markdown/api.py,sha256=jDb8PT1cS3KqipT4m_rKBE0R20UKOU85rH-7M6P6Owk,5003
|
|
10
|
+
html_to_markdown/bin/html-to-markdown.exe,sha256=A6Ml_r5cPUS5RboXit05znHdztMOwNfL2eT2zdRd0NA,4470272
|
|
11
|
+
html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
|
|
12
|
+
html_to_markdown/cli_proxy.py,sha256=JGOuINBI8OMYLxojXGz8DdzMHo8eqgdINstOZWrdw-8,3816
|
|
13
|
+
html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
|
|
14
|
+
html_to_markdown/options.py,sha256=jna7fx9bHhx8N7u5IYtMXganFFzdJSVVgLZW0tYk3GA,5054
|
|
15
|
+
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
html_to_markdown/v1_compat.py,sha256=aVt9cVTBfYcrS8EfBsrC6HQwWc3Kz9-65-LB9foN6Jk,8227
|
|
17
|
+
html_to_markdown-2.3.3.dist-info/RECORD,,
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
html_to_markdown-2.3.0.data/scripts/html-to-markdown.exe,sha256=NoG5btr57ihL85-URLcreFRZTiAUfO7km_FiEUMa7xI,4469760
|
|
2
|
-
html_to_markdown-2.3.0.dist-info/METADATA,sha256=wcmX8lAc-dZZp8ETrpbXqu58Kf19n3fE6ubBPd0OfKU,9012
|
|
3
|
-
html_to_markdown-2.3.0.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
|
|
4
|
-
html_to_markdown-2.3.0.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
|
|
5
|
-
html_to_markdown/__init__.py,sha256=KgR9V82EqdL5S7dzK_USOv6STjyhVRJubDYScHxOJS0,1191
|
|
6
|
-
html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
|
|
7
|
-
html_to_markdown/_html_to_markdown.pyd,sha256=Peez6o-WS_O6Cc3YqbxjJzRVw0mLY5HsbJyfeoYj7BY,4214784
|
|
8
|
-
html_to_markdown/_rust.pyi,sha256=JP8tvcjYDfFJeJkbLpQ4qeK-5jl0hzIVT3Sa0daTkyo,2171
|
|
9
|
-
html_to_markdown/api.py,sha256=U7-Tu8TaVa32vveCtiOhTwoEojklkDV2e-6ItAiP3d4,2858
|
|
10
|
-
html_to_markdown/bin/html-to-markdown.exe,sha256=NoG5btr57ihL85-URLcreFRZTiAUfO7km_FiEUMa7xI,4469760
|
|
11
|
-
html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
|
|
12
|
-
html_to_markdown/cli_proxy.py,sha256=JGOuINBI8OMYLxojXGz8DdzMHo8eqgdINstOZWrdw-8,3816
|
|
13
|
-
html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
|
|
14
|
-
html_to_markdown/options.py,sha256=jna7fx9bHhx8N7u5IYtMXganFFzdJSVVgLZW0tYk3GA,5054
|
|
15
|
-
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
-
html_to_markdown/v1_compat.py,sha256=aVt9cVTBfYcrS8EfBsrC6HQwWc3Kz9-65-LB9foN6Jk,8227
|
|
17
|
-
html_to_markdown-2.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|