html-to-markdown 2.1.2__cp310-abi3-win_amd64.whl → 2.2.0__cp310-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

@@ -39,4 +39,4 @@ __all__ = [
39
39
  "markdownify",
40
40
  ]
41
41
 
42
- __version__ = "2.1.1"
42
+ __version__ = "2.2.0"
Binary file
@@ -11,6 +11,7 @@ class ConversionOptions:
11
11
  autolinks: bool
12
12
  default_title: bool
13
13
  br_in_tables: bool
14
+ hocr_spatial_tables: bool
14
15
  highlight_style: str
15
16
  extract_metadata: bool
16
17
  whitespace_mode: str
@@ -39,6 +40,7 @@ class ConversionOptions:
39
40
  autolinks: bool = True,
40
41
  default_title: bool = False,
41
42
  br_in_tables: bool = False,
43
+ hocr_spatial_tables: bool = True,
42
44
  highlight_style: str = "double-equal",
43
45
  extract_metadata: bool = True,
44
46
  whitespace_mode: str = "normalized",
html_to_markdown/api.py CHANGED
@@ -51,6 +51,7 @@ def convert(
51
51
  autolinks=options.autolinks,
52
52
  default_title=options.default_title,
53
53
  br_in_tables=options.br_in_tables,
54
+ hocr_spatial_tables=options.hocr_spatial_tables,
54
55
  highlight_style=options.highlight_style,
55
56
  extract_metadata=options.extract_metadata,
56
57
  whitespace_mode=options.whitespace_mode,
Binary file
@@ -71,6 +71,9 @@ class ConversionOptions:
71
71
  br_in_tables: bool = False
72
72
  """Use <br> tags for line breaks in table cells instead of spaces."""
73
73
 
74
+ hocr_spatial_tables: bool = True
75
+ """Reconstruct tables in hOCR documents using spatial heuristics."""
76
+
74
77
  highlight_style: Literal["double-equal", "html", "bold"] = "double-equal"
75
78
  """Style for highlighting <mark> elements."""
76
79
 
@@ -120,8 +120,11 @@ def convert_to_markdown(
120
120
  if custom_converters is not None:
121
121
  raise NotImplementedError("custom_converters is not yet implemented in v2")
122
122
  if not hocr_extract_tables:
123
- raise NotImplementedError(
124
- "hocr_extract_tables toggle was removed in v2. hOCR tables are always reconstructed when detected."
123
+ warnings.warn(
124
+ "hocr_extract_tables is deprecated and will be removed in a future release. "
125
+ "Use ConversionOptions(hocr_spatial_tables=False) to disable spatial table reconstruction.",
126
+ DeprecationWarning,
127
+ stacklevel=2,
125
128
  )
126
129
  if hocr_table_column_threshold != 50 or hocr_table_row_threshold_ratio != 0.5:
127
130
  raise NotImplementedError(
@@ -146,6 +149,7 @@ def convert_to_markdown(
146
149
  autolinks=autolinks,
147
150
  default_title=default_title,
148
151
  br_in_tables=br_in_tables,
152
+ hocr_spatial_tables=hocr_extract_tables,
149
153
  highlight_style=highlight_style, # type: ignore[arg-type]
150
154
  extract_metadata=extract_metadata,
151
155
  whitespace_mode=whitespace_mode, # type: ignore[arg-type]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 2.1.2
3
+ Version: 2.2.0
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -0,0 +1,17 @@
1
+ html_to_markdown-2.2.0.data/scripts/html-to-markdown.exe,sha256=aMAtVnodlg1vMH3l9sQOHHpRJN6c9D0nEnBqjsjz0Cg,4465664
2
+ html_to_markdown-2.2.0.dist-info/METADATA,sha256=dpiwXABx23YyxSxDe9hiHcspTwNLtCO35TMXLt7KAYQ,7233
3
+ html_to_markdown-2.2.0.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
4
+ html_to_markdown-2.2.0.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
5
+ html_to_markdown/__init__.py,sha256=qNK0cpaGcb8H2zl9BFO9sG_RQs4hMp_GrihUuIHqV10,1191
6
+ html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
7
+ html_to_markdown/_html_to_markdown.pyd,sha256=44hglf_qiVnD8-bCpsScL_qxKiWRlNjl1ai_Hpko26U,4211200
8
+ html_to_markdown/_rust.pyi,sha256=JP8tvcjYDfFJeJkbLpQ4qeK-5jl0hzIVT3Sa0daTkyo,2171
9
+ html_to_markdown/api.py,sha256=U7-Tu8TaVa32vveCtiOhTwoEojklkDV2e-6ItAiP3d4,2858
10
+ html_to_markdown/bin/html-to-markdown.exe,sha256=aMAtVnodlg1vMH3l9sQOHHpRJN6c9D0nEnBqjsjz0Cg,4465664
11
+ html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
12
+ html_to_markdown/cli_proxy.py,sha256=J2Qk9MnnkFKIroxc0wn79nzI0dXqXDDNEAF9o9hth9Y,3829
13
+ html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
14
+ html_to_markdown/options.py,sha256=jna7fx9bHhx8N7u5IYtMXganFFzdJSVVgLZW0tYk3GA,5054
15
+ html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ html_to_markdown/v1_compat.py,sha256=aVt9cVTBfYcrS8EfBsrC6HQwWc3Kz9-65-LB9foN6Jk,8227
17
+ html_to_markdown-2.2.0.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- html_to_markdown-2.1.2.data/scripts/html-to-markdown.exe,sha256=SJCBlZp0uTo6_wzS7KEgXariZtABUVc64o5TVxOwVZo,4414976
2
- html_to_markdown-2.1.2.dist-info/METADATA,sha256=TgFH9djK4HzJF_vDFVZCm7EDXYscA4v9t31DuXCujIE,7233
3
- html_to_markdown-2.1.2.dist-info/WHEEL,sha256=4EDp_7DiFfWl1yYv5M4wSosAn5L_xgD1dyrQxQxfCx8,95
4
- html_to_markdown-2.1.2.dist-info/licenses/LICENSE,sha256=QhKFMkQLa4mSUlOsyG9VElzC7GYbAKtiS_EwOCyH-b4,1107
5
- html_to_markdown/__init__.py,sha256=3_Egcf46oNcEam7rc7zAHx8lfOj1eVNO1p0kErVf_fs,1191
6
- html_to_markdown/__main__.py,sha256=5objj9lB7hhpSpZsDok5tv9o9yztVR63Ccww-pXsAyY,343
7
- html_to_markdown/_html_to_markdown.pyd,sha256=ES7QEe9lTb2ZK3yvC2-vNHng__U7HB3CY5p2wJ0IuNQ,4159488
8
- html_to_markdown/_rust.pyi,sha256=SHrrT8opJd5kcRYycooR4AS9is5tr1beSGtpoUWqzNc,2097
9
- html_to_markdown/api.py,sha256=YQQuJoO1OQnXpuOLk8TbdQDTARcKYFbf_zSA44BeHCM,2800
10
- html_to_markdown/bin/html-to-markdown.exe,sha256=SJCBlZp0uTo6_wzS7KEgXariZtABUVc64o5TVxOwVZo,4414976
11
- html_to_markdown/cli.py,sha256=z59l8sF8wIRRzJtUd-tXgqiC0WTqkTjzl-df8Ey_oQ0,67
12
- html_to_markdown/cli_proxy.py,sha256=J2Qk9MnnkFKIroxc0wn79nzI0dXqXDDNEAF9o9hth9Y,3829
13
- html_to_markdown/exceptions.py,sha256=31VqpPi4JLGv7lI2481Z4f2s5ejYmq97c3s-WFFkXVU,2443
14
- html_to_markdown/options.py,sha256=ijjRBTwrbESbwmYTOXV_ZO1A1GAmOzzILiFoPeC-jZk,4940
15
- html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- html_to_markdown/v1_compat.py,sha256=Lb3pppLfVH9EyAYGbOfpcO3vYkof4SIYDMI-CBEbh-A,8045
17
- html_to_markdown-2.1.2.dist-info/RECORD,,