html-to-markdown 2.1.0__cp310-abi3-macosx_11_0_arm64.whl → 2.2.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

@@ -39,4 +39,4 @@ __all__ = [
39
39
  "markdownify",
40
40
  ]
41
41
 
42
- __version__ = "2.1.0"
42
+ __version__ = "2.2.0"
Binary file
@@ -11,6 +11,7 @@ class ConversionOptions:
11
11
  autolinks: bool
12
12
  default_title: bool
13
13
  br_in_tables: bool
14
+ hocr_spatial_tables: bool
14
15
  highlight_style: str
15
16
  extract_metadata: bool
16
17
  whitespace_mode: str
@@ -39,6 +40,7 @@ class ConversionOptions:
39
40
  autolinks: bool = True,
40
41
  default_title: bool = False,
41
42
  br_in_tables: bool = False,
43
+ hocr_spatial_tables: bool = True,
42
44
  highlight_style: str = "double-equal",
43
45
  extract_metadata: bool = True,
44
46
  whitespace_mode: str = "normalized",
html_to_markdown/api.py CHANGED
@@ -51,6 +51,7 @@ def convert(
51
51
  autolinks=options.autolinks,
52
52
  default_title=options.default_title,
53
53
  br_in_tables=options.br_in_tables,
54
+ hocr_spatial_tables=options.hocr_spatial_tables,
54
55
  highlight_style=options.highlight_style,
55
56
  extract_metadata=options.extract_metadata,
56
57
  whitespace_mode=options.whitespace_mode,
Binary file
@@ -71,6 +71,9 @@ class ConversionOptions:
71
71
  br_in_tables: bool = False
72
72
  """Use <br> tags for line breaks in table cells instead of spaces."""
73
73
 
74
+ hocr_spatial_tables: bool = True
75
+ """Reconstruct tables in hOCR documents using spatial heuristics."""
76
+
74
77
  highlight_style: Literal["double-equal", "html", "bold"] = "double-equal"
75
78
  """Style for highlighting <mark> elements."""
76
79
 
@@ -120,8 +120,11 @@ def convert_to_markdown(
120
120
  if custom_converters is not None:
121
121
  raise NotImplementedError("custom_converters is not yet implemented in v2")
122
122
  if not hocr_extract_tables:
123
- raise NotImplementedError(
124
- "hocr_extract_tables toggle was removed in v2. hOCR tables are always reconstructed when detected."
123
+ warnings.warn(
124
+ "hocr_extract_tables is deprecated and will be removed in a future release. "
125
+ "Use ConversionOptions(hocr_spatial_tables=False) to disable spatial table reconstruction.",
126
+ DeprecationWarning,
127
+ stacklevel=2,
125
128
  )
126
129
  if hocr_table_column_threshold != 50 or hocr_table_row_threshold_ratio != 0.5:
127
130
  raise NotImplementedError(
@@ -146,6 +149,7 @@ def convert_to_markdown(
146
149
  autolinks=autolinks,
147
150
  default_title=default_title,
148
151
  br_in_tables=br_in_tables,
152
+ hocr_spatial_tables=hocr_extract_tables,
149
153
  highlight_style=highlight_style, # type: ignore[arg-type]
150
154
  extract_metadata=extract_metadata,
151
155
  whitespace_mode=whitespace_mode, # type: ignore[arg-type]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 2.1.0
3
+ Version: 2.2.0
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -0,0 +1,17 @@
1
+ html_to_markdown-2.2.0.data/scripts/html-to-markdown,sha256=_jgp22NNhPsmx9MgSb11wrTWiYpwnzZ7qZtpdXrudls,3817824
2
+ html_to_markdown-2.2.0.dist-info/RECORD,,
3
+ html_to_markdown-2.2.0.dist-info/WHEEL,sha256=HtAbUhtjhH1WdiDuIy2CapdoAiKCwe6bij_Tlxr1lEg,131
4
+ html_to_markdown-2.2.0.dist-info/METADATA,sha256=v8jDHZrq6_Vuwry7Dn3e8r9QlAZDj3-8kU_mFrSA9og,7071
5
+ html_to_markdown-2.2.0.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
6
+ html_to_markdown/options.py,sha256=N1orEtzXiqgV88Y7eqRjw3ilxDi7N97FBkBa79hdfzo,4913
7
+ html_to_markdown/_html_to_markdown.abi3.so,sha256=xI0Wr26xhNoPGzlrlngo49vWvg61X_4d0xdMd9de8Rk,3668288
8
+ html_to_markdown/__init__.py,sha256=QZ314Edod-PD-v5CvRtJ-Lw7lqXqHP6o36UtyzUQrg8,1149
9
+ html_to_markdown/api.py,sha256=f0jhD003p8Kz5jHe0BdaKN8Uh9mvGDo2Sl9eNw06VAY,2784
10
+ html_to_markdown/_rust.pyi,sha256=pi6C_qAdB81qUlC89Dy1ZKC1JrpdnqVce-caJx3ekPA,2098
11
+ html_to_markdown/v1_compat.py,sha256=5DZA-fPMqZ5hYiA43rFaOAqshLS8MScbBnivDXuvQII,8034
12
+ html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
13
+ html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
15
+ html_to_markdown/cli_proxy.py,sha256=MbDRZdmQMCDI9cruy1vifc__FsjNPRdvBXKFU9GaAZE,3695
16
+ html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
17
+ html_to_markdown/bin/html-to-markdown,sha256=_jgp22NNhPsmx9MgSb11wrTWiYpwnzZ7qZtpdXrudls,3817824
@@ -1,17 +0,0 @@
1
- html_to_markdown-2.1.0.dist-info/RECORD,,
2
- html_to_markdown-2.1.0.dist-info/WHEEL,sha256=HtAbUhtjhH1WdiDuIy2CapdoAiKCwe6bij_Tlxr1lEg,131
3
- html_to_markdown-2.1.0.dist-info/METADATA,sha256=FPqk_oGX2HOKE-VW5m8w4lfuUm4oxqQTXPanby6jwVU,7071
4
- html_to_markdown-2.1.0.dist-info/licenses/LICENSE,sha256=oQvPC-0UWvfg0WaeUBe11OJMtX60An-TW1ev_oaAA0k,1086
5
- html_to_markdown-2.1.0.data/scripts/html-to-markdown,sha256=U7CnxO8PkyRby25ZWYC1jbC0LQepZMzZbidDcDp6yBM,3784640
6
- html_to_markdown/options.py,sha256=oV-_GFEKuL3RBu27RD1AhVruEh-bNuY3c8ATRbzcos0,4802
7
- html_to_markdown/_html_to_markdown.abi3.so,sha256=mGBBfwO_d5IbJW2bY0fvJIJhuLKn2Yb3zShn3y-m9eE,3618704
8
- html_to_markdown/__init__.py,sha256=ePkhgm_2JevNaqTGchhqkUVNLtxkFJiBJ9EE4gUXpBw,1149
9
- html_to_markdown/api.py,sha256=HuM6RZg064VxrTvwcY-OmraS-hsGM9Bt1tIaM0_w7F8,2727
10
- html_to_markdown/_rust.pyi,sha256=An3Wlvedlr_2XgzqmXulLi5AzMx3HTqOJWH11M5cgcY,2026
11
- html_to_markdown/v1_compat.py,sha256=VQq1wv8OedkESpCFaUpaSUBh6vJNkByylVUbY6EPIZ8,7856
12
- html_to_markdown/cli.py,sha256=Rn-s3FZPea1jgCJtDzH_TFvOEiA_uZFVfgjhr6xyL_g,64
13
- html_to_markdown/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- html_to_markdown/exceptions.py,sha256=aTASOzbywgfqOYjlw18ZkOWSxKff4EbUbmMua_73TGA,2370
15
- html_to_markdown/cli_proxy.py,sha256=MbDRZdmQMCDI9cruy1vifc__FsjNPRdvBXKFU9GaAZE,3695
16
- html_to_markdown/__main__.py,sha256=3Ic_EbOt2h6W88q084pkz5IKU6iY5z_woBygH6u9aw0,327
17
- html_to_markdown/bin/html-to-markdown,sha256=U7CnxO8PkyRby25ZWYC1jbC0LQepZMzZbidDcDp6yBM,3784640