html-to-markdown 2.1.0__cp310-abi3-win_amd64.whl → 2.2.0__cp310-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- html_to_markdown/__init__.py +1 -1
- html_to_markdown/_html_to_markdown.pyd +0 -0
- html_to_markdown/_rust.pyi +2 -0
- html_to_markdown/api.py +1 -0
- html_to_markdown/bin/html-to-markdown.exe +0 -0
- html_to_markdown/options.py +3 -0
- html_to_markdown/v1_compat.py +6 -2
- {html_to_markdown-2.1.0.data → html_to_markdown-2.2.0.data}/scripts/html-to-markdown.exe +0 -0
- {html_to_markdown-2.1.0.dist-info → html_to_markdown-2.2.0.dist-info}/METADATA +1 -1
- html_to_markdown-2.2.0.dist-info/RECORD +17 -0
- html_to_markdown-2.1.0.dist-info/RECORD +0 -17
- {html_to_markdown-2.1.0.dist-info → html_to_markdown-2.2.0.dist-info}/WHEEL +0 -0
- {html_to_markdown-2.1.0.dist-info → html_to_markdown-2.2.0.dist-info}/licenses/LICENSE +0 -0
html_to_markdown/__init__.py
CHANGED
|
Binary file
|
html_to_markdown/_rust.pyi
CHANGED
|
@@ -11,6 +11,7 @@ class ConversionOptions:
|
|
|
11
11
|
autolinks: bool
|
|
12
12
|
default_title: bool
|
|
13
13
|
br_in_tables: bool
|
|
14
|
+
hocr_spatial_tables: bool
|
|
14
15
|
highlight_style: str
|
|
15
16
|
extract_metadata: bool
|
|
16
17
|
whitespace_mode: str
|
|
@@ -39,6 +40,7 @@ class ConversionOptions:
|
|
|
39
40
|
autolinks: bool = True,
|
|
40
41
|
default_title: bool = False,
|
|
41
42
|
br_in_tables: bool = False,
|
|
43
|
+
hocr_spatial_tables: bool = True,
|
|
42
44
|
highlight_style: str = "double-equal",
|
|
43
45
|
extract_metadata: bool = True,
|
|
44
46
|
whitespace_mode: str = "normalized",
|
html_to_markdown/api.py
CHANGED
|
@@ -51,6 +51,7 @@ def convert(
|
|
|
51
51
|
autolinks=options.autolinks,
|
|
52
52
|
default_title=options.default_title,
|
|
53
53
|
br_in_tables=options.br_in_tables,
|
|
54
|
+
hocr_spatial_tables=options.hocr_spatial_tables,
|
|
54
55
|
highlight_style=options.highlight_style,
|
|
55
56
|
extract_metadata=options.extract_metadata,
|
|
56
57
|
whitespace_mode=options.whitespace_mode,
|
|
Binary file
|
html_to_markdown/options.py
CHANGED
|
@@ -71,6 +71,9 @@ class ConversionOptions:
|
|
|
71
71
|
br_in_tables: bool = False
|
|
72
72
|
"""Use <br> tags for line breaks in table cells instead of spaces."""
|
|
73
73
|
|
|
74
|
+
hocr_spatial_tables: bool = True
|
|
75
|
+
"""Reconstruct tables in hOCR documents using spatial heuristics."""
|
|
76
|
+
|
|
74
77
|
highlight_style: Literal["double-equal", "html", "bold"] = "double-equal"
|
|
75
78
|
"""Style for highlighting <mark> elements."""
|
|
76
79
|
|
html_to_markdown/v1_compat.py
CHANGED
|
@@ -120,8 +120,11 @@ def convert_to_markdown(
|
|
|
120
120
|
if custom_converters is not None:
|
|
121
121
|
raise NotImplementedError("custom_converters is not yet implemented in v2")
|
|
122
122
|
if not hocr_extract_tables:
|
|
123
|
-
|
|
124
|
-
"hocr_extract_tables
|
|
123
|
+
warnings.warn(
|
|
124
|
+
"hocr_extract_tables is deprecated and will be removed in a future release. "
|
|
125
|
+
"Use ConversionOptions(hocr_spatial_tables=False) to disable spatial table reconstruction.",
|
|
126
|
+
DeprecationWarning,
|
|
127
|
+
stacklevel=2,
|
|
125
128
|
)
|
|
126
129
|
if hocr_table_column_threshold != 50 or hocr_table_row_threshold_ratio != 0.5:
|
|
127
130
|
raise NotImplementedError(
|
|
@@ -146,6 +149,7 @@ def convert_to_markdown(
|
|
|
146
149
|
autolinks=autolinks,
|
|
147
150
|
default_title=default_title,
|
|
148
151
|
br_in_tables=br_in_tables,
|
|
152
|
+
hocr_spatial_tables=hocr_extract_tables,
|
|
149
153
|
highlight_style=highlight_style, # type: ignore[arg-type]
|
|
150
154
|
extract_metadata=extract_metadata,
|
|
151
155
|
whitespace_mode=whitespace_mode, # type: ignore[arg-type]
|
|
Binary file
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
html_to_markdown-2.2.0.data/scripts/html-to-markdown.exe,sha256=aMAtVnodlg1vMH3l9sQOHHpRJN6c9D0nEnBqjsjz0Cg,4465664
|
|
2
|
+
html_to_markdown-2.2.0.dist-info/METADATA,sha256=dpiwXABx23YyxSxDe9hiHcspTwNLtCO35TMXLt7KAYQ,7233
|
|
3
|
+
html_to_markdown-2.2.0.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
|
|
4
|
+
html_to_markdown-2.2.0.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
|
|
5
|
+
html_to_markdown/__init__.py,sha256=qNK0cpaGcb8H2zl9BFO9sG_RQs4hMp_GrihUuIHqV10,1191
|
|
6
|
+
html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
|
|
7
|
+
html_to_markdown/_html_to_markdown.pyd,sha256=44hglf_qiVnD8-bCpsScL_qxKiWRlNjl1ai_Hpko26U,4211200
|
|
8
|
+
html_to_markdown/_rust.pyi,sha256=JP8tvcjYDfFJeJkbLpQ4qeK-5jl0hzIVT3Sa0daTkyo,2171
|
|
9
|
+
html_to_markdown/api.py,sha256=U7-Tu8TaVa32vveCtiOhTwoEojklkDV2e-6ItAiP3d4,2858
|
|
10
|
+
html_to_markdown/bin/html-to-markdown.exe,sha256=aMAtVnodlg1vMH3l9sQOHHpRJN6c9D0nEnBqjsjz0Cg,4465664
|
|
11
|
+
html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
|
|
12
|
+
html_to_markdown/cli_proxy.py,sha256=J2Qk9MnnkFKIroxc0wn79nzI0dXqXDDNEAF9o9hth9Y,3829
|
|
13
|
+
html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
|
|
14
|
+
html_to_markdown/options.py,sha256=jna7fx9bHhx8N7u5IYtMXganFFzdJSVVgLZW0tYk3GA,5054
|
|
15
|
+
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
html_to_markdown/v1_compat.py,sha256=aVt9cVTBfYcrS8EfBsrC6HQwWc3Kz9-65-LB9foN6Jk,8227
|
|
17
|
+
html_to_markdown-2.2.0.dist-info/RECORD,,
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
html_to_markdown-2.1.0.data/scripts/html-to-markdown.exe,sha256=KTLXwvysi6qTuC98zNR61AcY7fQfQnjGdjb9f3vizHE,4414976
|
|
2
|
-
html_to_markdown-2.1.0.dist-info/METADATA,sha256=fhmaYGx2LDFfgtHXsVcr2ASa-V595-drAyu3tVH-Zgo,7233
|
|
3
|
-
html_to_markdown-2.1.0.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
|
|
4
|
-
html_to_markdown-2.1.0.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
|
|
5
|
-
html_to_markdown/__init__.py,sha256=dPrT1-0PNyk-MqMjRjRBadV_wpZUEY0Hehi4njK_sag,1191
|
|
6
|
-
html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
|
|
7
|
-
html_to_markdown/_html_to_markdown.pyd,sha256=i2GJLjEHkSfyvjtgJ94hhxiRCuRA7rDxmkRGLAHpOJs,4158976
|
|
8
|
-
html_to_markdown/_rust.pyi,sha256=SHrrT8opJd5kcRYycooR4AS9is5tr1beSGtpoUWqzNc,2097
|
|
9
|
-
html_to_markdown/api.py,sha256=YQQuJoO1OQnXpuOLk8TbdQDTARcKYFbf_zSA44BeHCM,2800
|
|
10
|
-
html_to_markdown/bin/html-to-markdown.exe,sha256=KTLXwvysi6qTuC98zNR61AcY7fQfQnjGdjb9f3vizHE,4414976
|
|
11
|
-
html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
|
|
12
|
-
html_to_markdown/cli_proxy.py,sha256=J2Qk9MnnkFKIroxc0wn79nzI0dXqXDDNEAF9o9hth9Y,3829
|
|
13
|
-
html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
|
|
14
|
-
html_to_markdown/options.py,sha256=ijjRBTwrbESbwmYTOXV_ZO1A1GAmOzzILiFoPeC-jZk,4940
|
|
15
|
-
html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
-
html_to_markdown/v1_compat.py,sha256=Lb3pppLfVH9EyAYGbOfpcO3vYkof4SIYDMI-CBEbh-A,8045
|
|
17
|
-
html_to_markdown-2.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|