html-to-markdown 2.1.0__tar.gz → 2.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of html-to-markdown might be problematic. Click here for more details.
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/Cargo.lock +3 -3
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/Cargo.toml +2 -2
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/PKG-INFO +1 -1
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/converter.rs +7 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/__init__.py +1 -1
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/pyproject.toml +1 -1
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/LICENSE +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/README_PYPI.md +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/Cargo.toml +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/README.md +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/benches/conversion_benchmark.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/benches/micro_benchmark.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/benches/profiling_benchmark.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/basic.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/table.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_escape.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_inline_formatting.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_lists.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_semantic_tags.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_tables.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_task_lists.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_whitespace.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/converter.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/error.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/extractor.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/mod.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/parser.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/spatial.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/types.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/inline_images.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/lib.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/options.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/sanitizer.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/text.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/wrapper.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/tests/commonmark_compliance_test.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/tests/hocr_compliance_test.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/tests/integration_test.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/Cargo.toml +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/README.md +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/python/html_to_markdown/__init__.py +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/python/html_to_markdown/_html_to_markdown.pyi +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/src/lib.rs +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/uv.lock +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/__main__.py +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/_rust.pyi +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/api.py +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/cli.py +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/cli_proxy.py +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/exceptions.py +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/options.py +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/py.typed +0 -0
- {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/v1_compat.py +0 -0
|
@@ -530,7 +530,7 @@ dependencies = [
|
|
|
530
530
|
|
|
531
531
|
[[package]]
|
|
532
532
|
name = "html-to-markdown-cli"
|
|
533
|
-
version = "2.1.
|
|
533
|
+
version = "2.1.2"
|
|
534
534
|
dependencies = [
|
|
535
535
|
"assert_cmd",
|
|
536
536
|
"clap",
|
|
@@ -544,7 +544,7 @@ dependencies = [
|
|
|
544
544
|
|
|
545
545
|
[[package]]
|
|
546
546
|
name = "html-to-markdown-py"
|
|
547
|
-
version = "2.1.
|
|
547
|
+
version = "2.1.2"
|
|
548
548
|
dependencies = [
|
|
549
549
|
"base64",
|
|
550
550
|
"html-to-markdown-rs",
|
|
@@ -554,7 +554,7 @@ dependencies = [
|
|
|
554
554
|
|
|
555
555
|
[[package]]
|
|
556
556
|
name = "html-to-markdown-rs"
|
|
557
|
-
version = "2.1.
|
|
557
|
+
version = "2.1.2"
|
|
558
558
|
dependencies = [
|
|
559
559
|
"ammonia",
|
|
560
560
|
"base64",
|
|
@@ -3,7 +3,7 @@ resolver = "2"
|
|
|
3
3
|
members = ["crates/html-to-markdown-py"]
|
|
4
4
|
|
|
5
5
|
[workspace.package]
|
|
6
|
-
version = "2.1.
|
|
6
|
+
version = "2.1.2"
|
|
7
7
|
edition = "2021"
|
|
8
8
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
9
9
|
license = "MIT"
|
|
@@ -15,7 +15,7 @@ rust-version = "1.80"
|
|
|
15
15
|
|
|
16
16
|
[workspace.dependencies]
|
|
17
17
|
# Core library
|
|
18
|
-
html-to-markdown-rs = { version = "2.1.
|
|
18
|
+
html-to-markdown-rs = { version = "2.1.2", path = "crates/html-to-markdown" }
|
|
19
19
|
|
|
20
20
|
# HTML parsing and sanitization
|
|
21
21
|
tl = "0.7"
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/converter.rs
RENAMED
|
@@ -130,6 +130,13 @@ fn convert_element(element: &HocrElement, output: &mut String, depth: usize, pre
|
|
|
130
130
|
if !output.is_empty() && !output.ends_with("\n\n") {
|
|
131
131
|
output.push_str("\n\n");
|
|
132
132
|
}
|
|
133
|
+
|
|
134
|
+
if let Some(table_markdown) = try_spatial_table_reconstruction(element) {
|
|
135
|
+
output.push_str(&table_markdown);
|
|
136
|
+
output.push_str("\n\n");
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
|
|
133
140
|
append_text_and_children(element, output, depth, preserve_structure);
|
|
134
141
|
// Trim trailing space
|
|
135
142
|
if output.ends_with(' ') {
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/benches/micro_benchmark.rs
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_escape.rs
RENAMED
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_lists.rs
RENAMED
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_tables.rs
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/extractor.rs
RENAMED
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/parser.rs
RENAMED
|
File without changes
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/spatial.rs
RENAMED
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/inline_images.rs
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/tests/integration_test.rs
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|