html-to-markdown 2.3.4__tar.gz → 2.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/Cargo.lock +55 -171
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/Cargo.toml +3 -3
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/PKG-INFO +1 -1
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/Cargo.toml +1 -1
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/converter.rs +41 -5
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/__init__.py +1 -1
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/bin/html-to-markdown +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/pyproject.toml +4 -4
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/LICENSE +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/README_PYPI.md +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/README.md +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/benches/conversion_benchmark.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/benches/micro_benchmark.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/benches/profiling_benchmark.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/basic.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/table.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_escape.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_inline_formatting.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_lists.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_semantic_tags.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_tables.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_task_lists.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_whitespace.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/converter.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/error.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/extractor.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/mod.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/parser.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/spatial.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/types.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/inline_images.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/lib.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/options.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/sanitizer.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/text.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/wrapper.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/tests/commonmark_compliance_test.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/tests/hocr_compliance_test.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/tests/integration_test.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown-py/Cargo.toml +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown-py/README.md +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown-py/python/html_to_markdown/__init__.py +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown-py/python/html_to_markdown/_html_to_markdown.pyi +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown-py/src/lib.rs +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown-py/uv.lock +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/__main__.py +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/_rust.pyi +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/api.py +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/cli.py +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/cli_proxy.py +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/exceptions.py +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/options.py +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/py.typed +0 -0
- {html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/html_to_markdown/v1_compat.py +0 -0
|
@@ -116,9 +116,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
|
|
116
116
|
|
|
117
117
|
[[package]]
|
|
118
118
|
name = "bitflags"
|
|
119
|
-
version = "2.
|
|
119
|
+
version = "2.10.0"
|
|
120
120
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
121
|
-
checksum = "
|
|
121
|
+
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
|
122
122
|
|
|
123
123
|
[[package]]
|
|
124
124
|
name = "bstr"
|
|
@@ -167,9 +167,9 @@ dependencies = [
|
|
|
167
167
|
|
|
168
168
|
[[package]]
|
|
169
169
|
name = "cfg-if"
|
|
170
|
-
version = "1.0.
|
|
170
|
+
version = "1.0.4"
|
|
171
171
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
172
|
-
checksum = "
|
|
172
|
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
173
173
|
|
|
174
174
|
[[package]]
|
|
175
175
|
name = "ciborium"
|
|
@@ -200,9 +200,9 @@ dependencies = [
|
|
|
200
200
|
|
|
201
201
|
[[package]]
|
|
202
202
|
name = "clap"
|
|
203
|
-
version = "4.5.
|
|
203
|
+
version = "4.5.50"
|
|
204
204
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
205
|
-
checksum = "
|
|
205
|
+
checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623"
|
|
206
206
|
dependencies = [
|
|
207
207
|
"clap_builder",
|
|
208
208
|
"clap_derive",
|
|
@@ -210,9 +210,9 @@ dependencies = [
|
|
|
210
210
|
|
|
211
211
|
[[package]]
|
|
212
212
|
name = "clap_builder"
|
|
213
|
-
version = "4.5.
|
|
213
|
+
version = "4.5.50"
|
|
214
214
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
215
|
-
checksum = "
|
|
215
|
+
checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0"
|
|
216
216
|
dependencies = [
|
|
217
217
|
"anstream",
|
|
218
218
|
"anstyle",
|
|
@@ -299,25 +299,22 @@ dependencies = [
|
|
|
299
299
|
|
|
300
300
|
[[package]]
|
|
301
301
|
name = "criterion"
|
|
302
|
-
version = "0.
|
|
302
|
+
version = "0.7.0"
|
|
303
303
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
304
|
-
checksum = "
|
|
304
|
+
checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928"
|
|
305
305
|
dependencies = [
|
|
306
306
|
"anes",
|
|
307
307
|
"cast",
|
|
308
308
|
"ciborium",
|
|
309
309
|
"clap",
|
|
310
310
|
"criterion-plot",
|
|
311
|
-
"is-terminal",
|
|
312
311
|
"itertools",
|
|
313
312
|
"num-traits",
|
|
314
|
-
"once_cell",
|
|
315
313
|
"oorandom",
|
|
316
314
|
"plotters",
|
|
317
315
|
"rayon",
|
|
318
316
|
"regex",
|
|
319
317
|
"serde",
|
|
320
|
-
"serde_derive",
|
|
321
318
|
"serde_json",
|
|
322
319
|
"tinytemplate",
|
|
323
320
|
"walkdir",
|
|
@@ -325,9 +322,9 @@ dependencies = [
|
|
|
325
322
|
|
|
326
323
|
[[package]]
|
|
327
324
|
name = "criterion-plot"
|
|
328
|
-
version = "0.
|
|
325
|
+
version = "0.6.0"
|
|
329
326
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
330
|
-
checksum = "
|
|
327
|
+
checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338"
|
|
331
328
|
dependencies = [
|
|
332
329
|
"cast",
|
|
333
330
|
"itertools",
|
|
@@ -548,27 +545,16 @@ dependencies = [
|
|
|
548
545
|
|
|
549
546
|
[[package]]
|
|
550
547
|
name = "getrandom"
|
|
551
|
-
version = "0.
|
|
548
|
+
version = "0.3.4"
|
|
552
549
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
553
|
-
checksum = "
|
|
550
|
+
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
|
|
554
551
|
dependencies = [
|
|
555
552
|
"cfg-if",
|
|
556
553
|
"js-sys",
|
|
557
554
|
"libc",
|
|
558
|
-
"wasi 0.11.1+wasi-snapshot-preview1",
|
|
559
|
-
"wasm-bindgen",
|
|
560
|
-
]
|
|
561
|
-
|
|
562
|
-
[[package]]
|
|
563
|
-
name = "getrandom"
|
|
564
|
-
version = "0.3.3"
|
|
565
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
566
|
-
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
|
|
567
|
-
dependencies = [
|
|
568
|
-
"cfg-if",
|
|
569
|
-
"libc",
|
|
570
555
|
"r-efi",
|
|
571
|
-
"
|
|
556
|
+
"wasip2",
|
|
557
|
+
"wasm-bindgen",
|
|
572
558
|
]
|
|
573
559
|
|
|
574
560
|
[[package]]
|
|
@@ -598,12 +584,6 @@ version = "0.5.0"
|
|
|
598
584
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
599
585
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
600
586
|
|
|
601
|
-
[[package]]
|
|
602
|
-
name = "hermit-abi"
|
|
603
|
-
version = "0.5.2"
|
|
604
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
605
|
-
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
|
606
|
-
|
|
607
587
|
[[package]]
|
|
608
588
|
name = "html-escape"
|
|
609
589
|
version = "0.2.13"
|
|
@@ -615,7 +595,7 @@ dependencies = [
|
|
|
615
595
|
|
|
616
596
|
[[package]]
|
|
617
597
|
name = "html-to-markdown-cli"
|
|
618
|
-
version = "2.
|
|
598
|
+
version = "2.4.0"
|
|
619
599
|
dependencies = [
|
|
620
600
|
"assert_cmd",
|
|
621
601
|
"clap",
|
|
@@ -629,7 +609,7 @@ dependencies = [
|
|
|
629
609
|
|
|
630
610
|
[[package]]
|
|
631
611
|
name = "html-to-markdown-node"
|
|
632
|
-
version = "2.
|
|
612
|
+
version = "2.4.0"
|
|
633
613
|
dependencies = [
|
|
634
614
|
"html-to-markdown-rs",
|
|
635
615
|
"mimalloc-rust",
|
|
@@ -640,7 +620,7 @@ dependencies = [
|
|
|
640
620
|
|
|
641
621
|
[[package]]
|
|
642
622
|
name = "html-to-markdown-py"
|
|
643
|
-
version = "2.
|
|
623
|
+
version = "2.4.0"
|
|
644
624
|
dependencies = [
|
|
645
625
|
"base64",
|
|
646
626
|
"html-to-markdown-rs",
|
|
@@ -650,7 +630,7 @@ dependencies = [
|
|
|
650
630
|
|
|
651
631
|
[[package]]
|
|
652
632
|
name = "html-to-markdown-rs"
|
|
653
|
-
version = "2.
|
|
633
|
+
version = "2.4.0"
|
|
654
634
|
dependencies = [
|
|
655
635
|
"ammonia",
|
|
656
636
|
"base64",
|
|
@@ -667,10 +647,10 @@ dependencies = [
|
|
|
667
647
|
|
|
668
648
|
[[package]]
|
|
669
649
|
name = "html-to-markdown-wasm"
|
|
670
|
-
version = "2.
|
|
650
|
+
version = "2.4.0"
|
|
671
651
|
dependencies = [
|
|
672
652
|
"console_error_panic_hook",
|
|
673
|
-
"getrandom
|
|
653
|
+
"getrandom",
|
|
674
654
|
"html-to-markdown-rs",
|
|
675
655
|
"js-sys",
|
|
676
656
|
"serde",
|
|
@@ -827,32 +807,24 @@ dependencies = [
|
|
|
827
807
|
|
|
828
808
|
[[package]]
|
|
829
809
|
name = "indoc"
|
|
830
|
-
version = "2.0.
|
|
810
|
+
version = "2.0.7"
|
|
831
811
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
832
|
-
checksum = "
|
|
833
|
-
|
|
834
|
-
[[package]]
|
|
835
|
-
name = "is-terminal"
|
|
836
|
-
version = "0.4.16"
|
|
837
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
838
|
-
checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
|
|
812
|
+
checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
|
|
839
813
|
dependencies = [
|
|
840
|
-
"
|
|
841
|
-
"libc",
|
|
842
|
-
"windows-sys 0.59.0",
|
|
814
|
+
"rustversion",
|
|
843
815
|
]
|
|
844
816
|
|
|
845
817
|
[[package]]
|
|
846
818
|
name = "is_terminal_polyfill"
|
|
847
|
-
version = "1.70.
|
|
819
|
+
version = "1.70.2"
|
|
848
820
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
849
|
-
checksum = "
|
|
821
|
+
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
|
850
822
|
|
|
851
823
|
[[package]]
|
|
852
824
|
name = "itertools"
|
|
853
|
-
version = "0.
|
|
825
|
+
version = "0.13.0"
|
|
854
826
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
855
|
-
checksum = "
|
|
827
|
+
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
|
856
828
|
dependencies = [
|
|
857
829
|
"either",
|
|
858
830
|
]
|
|
@@ -1106,9 +1078,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|
|
1106
1078
|
|
|
1107
1079
|
[[package]]
|
|
1108
1080
|
name = "once_cell_polyfill"
|
|
1109
|
-
version = "1.70.
|
|
1081
|
+
version = "1.70.2"
|
|
1110
1082
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1111
|
-
checksum = "
|
|
1083
|
+
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
|
1112
1084
|
|
|
1113
1085
|
[[package]]
|
|
1114
1086
|
name = "oorandom"
|
|
@@ -1309,9 +1281,9 @@ dependencies = [
|
|
|
1309
1281
|
|
|
1310
1282
|
[[package]]
|
|
1311
1283
|
name = "pyo3"
|
|
1312
|
-
version = "0.
|
|
1284
|
+
version = "0.27.1"
|
|
1313
1285
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1314
|
-
checksum = "
|
|
1286
|
+
checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf"
|
|
1315
1287
|
dependencies = [
|
|
1316
1288
|
"indoc",
|
|
1317
1289
|
"libc",
|
|
@@ -1326,18 +1298,18 @@ dependencies = [
|
|
|
1326
1298
|
|
|
1327
1299
|
[[package]]
|
|
1328
1300
|
name = "pyo3-build-config"
|
|
1329
|
-
version = "0.
|
|
1301
|
+
version = "0.27.1"
|
|
1330
1302
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1331
|
-
checksum = "
|
|
1303
|
+
checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb"
|
|
1332
1304
|
dependencies = [
|
|
1333
1305
|
"target-lexicon",
|
|
1334
1306
|
]
|
|
1335
1307
|
|
|
1336
1308
|
[[package]]
|
|
1337
1309
|
name = "pyo3-ffi"
|
|
1338
|
-
version = "0.
|
|
1310
|
+
version = "0.27.1"
|
|
1339
1311
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1340
|
-
checksum = "
|
|
1312
|
+
checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be"
|
|
1341
1313
|
dependencies = [
|
|
1342
1314
|
"libc",
|
|
1343
1315
|
"pyo3-build-config",
|
|
@@ -1345,9 +1317,9 @@ dependencies = [
|
|
|
1345
1317
|
|
|
1346
1318
|
[[package]]
|
|
1347
1319
|
name = "pyo3-macros"
|
|
1348
|
-
version = "0.
|
|
1320
|
+
version = "0.27.1"
|
|
1349
1321
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1350
|
-
checksum = "
|
|
1322
|
+
checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71"
|
|
1351
1323
|
dependencies = [
|
|
1352
1324
|
"proc-macro2",
|
|
1353
1325
|
"pyo3-macros-backend",
|
|
@@ -1357,9 +1329,9 @@ dependencies = [
|
|
|
1357
1329
|
|
|
1358
1330
|
[[package]]
|
|
1359
1331
|
name = "pyo3-macros-backend"
|
|
1360
|
-
version = "0.
|
|
1332
|
+
version = "0.27.1"
|
|
1361
1333
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1362
|
-
checksum = "
|
|
1334
|
+
checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b"
|
|
1363
1335
|
dependencies = [
|
|
1364
1336
|
"heck",
|
|
1365
1337
|
"proc-macro2",
|
|
@@ -1637,9 +1609,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
|
|
1637
1609
|
|
|
1638
1610
|
[[package]]
|
|
1639
1611
|
name = "syn"
|
|
1640
|
-
version = "2.0.
|
|
1612
|
+
version = "2.0.107"
|
|
1641
1613
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1642
|
-
checksum = "
|
|
1614
|
+
checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b"
|
|
1643
1615
|
dependencies = [
|
|
1644
1616
|
"proc-macro2",
|
|
1645
1617
|
"quote",
|
|
@@ -1670,7 +1642,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
1670
1642
|
checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
|
|
1671
1643
|
dependencies = [
|
|
1672
1644
|
"fastrand",
|
|
1673
|
-
"getrandom
|
|
1645
|
+
"getrandom",
|
|
1674
1646
|
"once_cell",
|
|
1675
1647
|
"rustix",
|
|
1676
1648
|
"windows-sys 0.61.2",
|
|
@@ -1741,9 +1713,9 @@ checksum = "b130bd8a58c163224b44e217b4239ca7b927d82bf6cc2fea1fc561d15056e3f7"
|
|
|
1741
1713
|
|
|
1742
1714
|
[[package]]
|
|
1743
1715
|
name = "unicode-ident"
|
|
1744
|
-
version = "1.0.
|
|
1716
|
+
version = "1.0.20"
|
|
1745
1717
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1746
|
-
checksum = "
|
|
1718
|
+
checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
|
|
1747
1719
|
|
|
1748
1720
|
[[package]]
|
|
1749
1721
|
name = "unicode-segmentation"
|
|
@@ -1812,21 +1784,6 @@ dependencies = [
|
|
|
1812
1784
|
"winapi-util",
|
|
1813
1785
|
]
|
|
1814
1786
|
|
|
1815
|
-
[[package]]
|
|
1816
|
-
name = "wasi"
|
|
1817
|
-
version = "0.11.1+wasi-snapshot-preview1"
|
|
1818
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1819
|
-
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
|
1820
|
-
|
|
1821
|
-
[[package]]
|
|
1822
|
-
name = "wasi"
|
|
1823
|
-
version = "0.14.7+wasi-0.2.4"
|
|
1824
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1825
|
-
checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
|
|
1826
|
-
dependencies = [
|
|
1827
|
-
"wasip2",
|
|
1828
|
-
]
|
|
1829
|
-
|
|
1830
1787
|
[[package]]
|
|
1831
1788
|
name = "wasip2"
|
|
1832
1789
|
version = "1.0.1+wasi-0.2.4"
|
|
@@ -1975,22 +1932,13 @@ version = "0.2.1"
|
|
|
1975
1932
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1976
1933
|
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
|
1977
1934
|
|
|
1978
|
-
[[package]]
|
|
1979
|
-
name = "windows-sys"
|
|
1980
|
-
version = "0.59.0"
|
|
1981
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1982
|
-
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
|
1983
|
-
dependencies = [
|
|
1984
|
-
"windows-targets 0.52.6",
|
|
1985
|
-
]
|
|
1986
|
-
|
|
1987
1935
|
[[package]]
|
|
1988
1936
|
name = "windows-sys"
|
|
1989
1937
|
version = "0.60.2"
|
|
1990
1938
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1991
1939
|
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
|
|
1992
1940
|
dependencies = [
|
|
1993
|
-
"windows-targets
|
|
1941
|
+
"windows-targets",
|
|
1994
1942
|
]
|
|
1995
1943
|
|
|
1996
1944
|
[[package]]
|
|
@@ -2002,22 +1950,6 @@ dependencies = [
|
|
|
2002
1950
|
"windows-link",
|
|
2003
1951
|
]
|
|
2004
1952
|
|
|
2005
|
-
[[package]]
|
|
2006
|
-
name = "windows-targets"
|
|
2007
|
-
version = "0.52.6"
|
|
2008
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2009
|
-
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
|
2010
|
-
dependencies = [
|
|
2011
|
-
"windows_aarch64_gnullvm 0.52.6",
|
|
2012
|
-
"windows_aarch64_msvc 0.52.6",
|
|
2013
|
-
"windows_i686_gnu 0.52.6",
|
|
2014
|
-
"windows_i686_gnullvm 0.52.6",
|
|
2015
|
-
"windows_i686_msvc 0.52.6",
|
|
2016
|
-
"windows_x86_64_gnu 0.52.6",
|
|
2017
|
-
"windows_x86_64_gnullvm 0.52.6",
|
|
2018
|
-
"windows_x86_64_msvc 0.52.6",
|
|
2019
|
-
]
|
|
2020
|
-
|
|
2021
1953
|
[[package]]
|
|
2022
1954
|
name = "windows-targets"
|
|
2023
1955
|
version = "0.53.5"
|
|
@@ -2025,106 +1957,58 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
2025
1957
|
checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
|
|
2026
1958
|
dependencies = [
|
|
2027
1959
|
"windows-link",
|
|
2028
|
-
"windows_aarch64_gnullvm
|
|
2029
|
-
"windows_aarch64_msvc
|
|
2030
|
-
"windows_i686_gnu
|
|
2031
|
-
"windows_i686_gnullvm
|
|
2032
|
-
"windows_i686_msvc
|
|
2033
|
-
"windows_x86_64_gnu
|
|
2034
|
-
"windows_x86_64_gnullvm
|
|
2035
|
-
"windows_x86_64_msvc
|
|
1960
|
+
"windows_aarch64_gnullvm",
|
|
1961
|
+
"windows_aarch64_msvc",
|
|
1962
|
+
"windows_i686_gnu",
|
|
1963
|
+
"windows_i686_gnullvm",
|
|
1964
|
+
"windows_i686_msvc",
|
|
1965
|
+
"windows_x86_64_gnu",
|
|
1966
|
+
"windows_x86_64_gnullvm",
|
|
1967
|
+
"windows_x86_64_msvc",
|
|
2036
1968
|
]
|
|
2037
1969
|
|
|
2038
|
-
[[package]]
|
|
2039
|
-
name = "windows_aarch64_gnullvm"
|
|
2040
|
-
version = "0.52.6"
|
|
2041
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2042
|
-
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
|
2043
|
-
|
|
2044
1970
|
[[package]]
|
|
2045
1971
|
name = "windows_aarch64_gnullvm"
|
|
2046
1972
|
version = "0.53.1"
|
|
2047
1973
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2048
1974
|
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
|
|
2049
1975
|
|
|
2050
|
-
[[package]]
|
|
2051
|
-
name = "windows_aarch64_msvc"
|
|
2052
|
-
version = "0.52.6"
|
|
2053
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2054
|
-
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
|
2055
|
-
|
|
2056
1976
|
[[package]]
|
|
2057
1977
|
name = "windows_aarch64_msvc"
|
|
2058
1978
|
version = "0.53.1"
|
|
2059
1979
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2060
1980
|
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
|
|
2061
1981
|
|
|
2062
|
-
[[package]]
|
|
2063
|
-
name = "windows_i686_gnu"
|
|
2064
|
-
version = "0.52.6"
|
|
2065
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2066
|
-
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
|
2067
|
-
|
|
2068
1982
|
[[package]]
|
|
2069
1983
|
name = "windows_i686_gnu"
|
|
2070
1984
|
version = "0.53.1"
|
|
2071
1985
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2072
1986
|
checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
|
|
2073
1987
|
|
|
2074
|
-
[[package]]
|
|
2075
|
-
name = "windows_i686_gnullvm"
|
|
2076
|
-
version = "0.52.6"
|
|
2077
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2078
|
-
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
|
2079
|
-
|
|
2080
1988
|
[[package]]
|
|
2081
1989
|
name = "windows_i686_gnullvm"
|
|
2082
1990
|
version = "0.53.1"
|
|
2083
1991
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2084
1992
|
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
|
|
2085
1993
|
|
|
2086
|
-
[[package]]
|
|
2087
|
-
name = "windows_i686_msvc"
|
|
2088
|
-
version = "0.52.6"
|
|
2089
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2090
|
-
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
|
2091
|
-
|
|
2092
1994
|
[[package]]
|
|
2093
1995
|
name = "windows_i686_msvc"
|
|
2094
1996
|
version = "0.53.1"
|
|
2095
1997
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2096
1998
|
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
|
|
2097
1999
|
|
|
2098
|
-
[[package]]
|
|
2099
|
-
name = "windows_x86_64_gnu"
|
|
2100
|
-
version = "0.52.6"
|
|
2101
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2102
|
-
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
|
2103
|
-
|
|
2104
2000
|
[[package]]
|
|
2105
2001
|
name = "windows_x86_64_gnu"
|
|
2106
2002
|
version = "0.53.1"
|
|
2107
2003
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2108
2004
|
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
|
|
2109
2005
|
|
|
2110
|
-
[[package]]
|
|
2111
|
-
name = "windows_x86_64_gnullvm"
|
|
2112
|
-
version = "0.52.6"
|
|
2113
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2114
|
-
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
|
2115
|
-
|
|
2116
2006
|
[[package]]
|
|
2117
2007
|
name = "windows_x86_64_gnullvm"
|
|
2118
2008
|
version = "0.53.1"
|
|
2119
2009
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2120
2010
|
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
|
|
2121
2011
|
|
|
2122
|
-
[[package]]
|
|
2123
|
-
name = "windows_x86_64_msvc"
|
|
2124
|
-
version = "0.52.6"
|
|
2125
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2126
|
-
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
|
2127
|
-
|
|
2128
2012
|
[[package]]
|
|
2129
2013
|
name = "windows_x86_64_msvc"
|
|
2130
2014
|
version = "0.53.1"
|
|
@@ -3,7 +3,7 @@ resolver = "2"
|
|
|
3
3
|
members = ["crates/html-to-markdown-py"]
|
|
4
4
|
|
|
5
5
|
[workspace.package]
|
|
6
|
-
version = "2.
|
|
6
|
+
version = "2.4.0"
|
|
7
7
|
edition = "2021"
|
|
8
8
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
9
9
|
license = "MIT"
|
|
@@ -15,7 +15,7 @@ rust-version = "1.80"
|
|
|
15
15
|
|
|
16
16
|
[workspace.dependencies]
|
|
17
17
|
# Core library
|
|
18
|
-
html-to-markdown-rs = { version = "2.
|
|
18
|
+
html-to-markdown-rs = { version = "2.4.0", path = "crates/html-to-markdown" }
|
|
19
19
|
|
|
20
20
|
# HTML parsing and sanitization
|
|
21
21
|
tl = "0.7"
|
|
@@ -34,7 +34,7 @@ clap_complete = "4.5"
|
|
|
34
34
|
clap_mangen = "0.2"
|
|
35
35
|
|
|
36
36
|
# Python bindings
|
|
37
|
-
pyo3 = { version = "0.
|
|
37
|
+
pyo3 = { version = "0.27.1", features = ["abi3-py310"] }
|
|
38
38
|
|
|
39
39
|
[profile.release]
|
|
40
40
|
lto = "thin"
|
|
@@ -33,7 +33,7 @@ image = { version = "0.25", default-features = false, features = ["gif", "jpeg",
|
|
|
33
33
|
[dev-dependencies]
|
|
34
34
|
serde = { version = "1.0", features = ["derive"] }
|
|
35
35
|
serde_json = "1.0"
|
|
36
|
-
criterion = { version = "0.
|
|
36
|
+
criterion = { version = "0.7", features = ["html_reports"] }
|
|
37
37
|
|
|
38
38
|
[[bench]]
|
|
39
39
|
name = "conversion_benchmark"
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/converter.rs
RENAMED
|
@@ -92,6 +92,7 @@ pub fn convert_to_markdown_with_options(
|
|
|
92
92
|
);
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
+
collapse_extra_newlines(&mut output);
|
|
95
96
|
output.trim().to_string()
|
|
96
97
|
}
|
|
97
98
|
|
|
@@ -185,7 +186,7 @@ fn convert_element(
|
|
|
185
186
|
if enable_spatial_tables {
|
|
186
187
|
if let Some(table_markdown) = try_spatial_table_reconstruction(element) {
|
|
187
188
|
output.push_str(&table_markdown);
|
|
188
|
-
output
|
|
189
|
+
ensure_trailing_blank_line(output);
|
|
189
190
|
return;
|
|
190
191
|
}
|
|
191
192
|
}
|
|
@@ -324,7 +325,7 @@ fn convert_element(
|
|
|
324
325
|
if enable_spatial_tables {
|
|
325
326
|
if let Some(table_markdown) = try_spatial_table_reconstruction(element) {
|
|
326
327
|
output.push_str(&table_markdown);
|
|
327
|
-
output
|
|
328
|
+
ensure_trailing_blank_line(output);
|
|
328
329
|
} else {
|
|
329
330
|
// Fallback: process children normally
|
|
330
331
|
let mut sorted_children: Vec<_> = element.children.iter().collect();
|
|
@@ -334,7 +335,7 @@ fn convert_element(
|
|
|
334
335
|
for child in sorted_children {
|
|
335
336
|
convert_element(child, output, depth + 1, preserve_structure, enable_spatial_tables, ctx);
|
|
336
337
|
}
|
|
337
|
-
output
|
|
338
|
+
ensure_trailing_blank_line(output);
|
|
338
339
|
}
|
|
339
340
|
} else {
|
|
340
341
|
// Fallback: process children normally
|
|
@@ -345,7 +346,7 @@ fn convert_element(
|
|
|
345
346
|
for child in sorted_children {
|
|
346
347
|
convert_element(child, output, depth + 1, preserve_structure, enable_spatial_tables, ctx);
|
|
347
348
|
}
|
|
348
|
-
output
|
|
349
|
+
ensure_trailing_blank_line(output);
|
|
349
350
|
}
|
|
350
351
|
}
|
|
351
352
|
|
|
@@ -361,7 +362,7 @@ fn convert_element(
|
|
|
361
362
|
for child in sorted_children {
|
|
362
363
|
convert_element(child, output, depth + 1, preserve_structure, enable_spatial_tables, ctx);
|
|
363
364
|
}
|
|
364
|
-
output
|
|
365
|
+
ensure_trailing_blank_line(output);
|
|
365
366
|
}
|
|
366
367
|
|
|
367
368
|
// Images - markdown image placeholder or alt text
|
|
@@ -463,6 +464,41 @@ fn convert_element(
|
|
|
463
464
|
}
|
|
464
465
|
}
|
|
465
466
|
|
|
467
|
+
fn ensure_trailing_blank_line(output: &mut String) {
|
|
468
|
+
while output.ends_with("\n\n\n") {
|
|
469
|
+
output.pop();
|
|
470
|
+
}
|
|
471
|
+
if output.ends_with("\n\n") {
|
|
472
|
+
return;
|
|
473
|
+
}
|
|
474
|
+
if output.ends_with('\n') {
|
|
475
|
+
output.push('\n');
|
|
476
|
+
} else {
|
|
477
|
+
output.push_str("\n\n");
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
fn collapse_extra_newlines(output: &mut String) {
|
|
482
|
+
let mut collapsed = String::with_capacity(output.len());
|
|
483
|
+
let mut newline_count = 0;
|
|
484
|
+
|
|
485
|
+
for ch in output.chars() {
|
|
486
|
+
if ch == '\n' {
|
|
487
|
+
newline_count += 1;
|
|
488
|
+
if newline_count <= 2 {
|
|
489
|
+
collapsed.push('\n');
|
|
490
|
+
}
|
|
491
|
+
} else {
|
|
492
|
+
newline_count = 0;
|
|
493
|
+
collapsed.push(ch);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
if collapsed.len() != output.len() {
|
|
498
|
+
*output = collapsed;
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
466
502
|
fn append_text_and_children(
|
|
467
503
|
element: &HocrElement,
|
|
468
504
|
output: &mut String,
|
|
Binary file
|
|
@@ -7,7 +7,7 @@ requires = [
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "html-to-markdown"
|
|
10
|
-
version = "2.
|
|
10
|
+
version = "2.4.0"
|
|
11
11
|
description = "High-performance HTML to Markdown converter powered by Rust with a clean Python API"
|
|
12
12
|
readme = "README_PYPI.md"
|
|
13
13
|
keywords = [
|
|
@@ -54,15 +54,15 @@ urls.Repository = "https://github.com/Goldziher/html-to-markdown.git"
|
|
|
54
54
|
[dependency-groups]
|
|
55
55
|
dev = [
|
|
56
56
|
"covdefaults>=2.3",
|
|
57
|
-
"memray>=1.
|
|
57
|
+
"memray>=1.19.1; sys_platform!='win32'",
|
|
58
58
|
"mypy>=1.18.2",
|
|
59
59
|
"pre-commit>=4.3",
|
|
60
|
-
"psutil>=7.1; sys_platform!='win32'",
|
|
60
|
+
"psutil>=7.1.1; sys_platform!='win32'",
|
|
61
61
|
"pytest>=8.4.2",
|
|
62
62
|
"pytest-benchmark>=5.1",
|
|
63
63
|
"pytest-cov>=7",
|
|
64
64
|
"pytest-mock>=3.15.1",
|
|
65
|
-
"ruff>=0.
|
|
65
|
+
"ruff>=0.14.1",
|
|
66
66
|
"types-psutil>=7.0.0.20251001",
|
|
67
67
|
"uv-bump",
|
|
68
68
|
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/benches/micro_benchmark.rs
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_escape.rs
RENAMED
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_lists.rs
RENAMED
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/examples/test_tables.rs
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/extractor.rs
RENAMED
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/parser.rs
RENAMED
|
File without changes
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/hocr/spatial.rs
RENAMED
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/src/inline_images.rs
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.3.4 → html_to_markdown-2.4.0}/crates/html-to-markdown/tests/integration_test.rs
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|