html-to-markdown 2.1.0__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

Files changed (53) hide show
  1. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/Cargo.lock +3 -3
  2. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/Cargo.toml +2 -2
  3. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/PKG-INFO +1 -1
  4. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/converter.rs +7 -0
  5. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/__init__.py +1 -1
  6. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/pyproject.toml +1 -1
  7. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/LICENSE +0 -0
  8. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/README_PYPI.md +0 -0
  9. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/Cargo.toml +0 -0
  10. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/README.md +0 -0
  11. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/benches/conversion_benchmark.rs +0 -0
  12. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/benches/micro_benchmark.rs +0 -0
  13. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/benches/profiling_benchmark.rs +0 -0
  14. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/basic.rs +0 -0
  15. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/table.rs +0 -0
  16. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_escape.rs +0 -0
  17. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_inline_formatting.rs +0 -0
  18. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_lists.rs +0 -0
  19. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_semantic_tags.rs +0 -0
  20. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_tables.rs +0 -0
  21. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_task_lists.rs +0 -0
  22. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/examples/test_whitespace.rs +0 -0
  23. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/converter.rs +0 -0
  24. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/error.rs +0 -0
  25. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/extractor.rs +0 -0
  26. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/mod.rs +0 -0
  27. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/parser.rs +0 -0
  28. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/spatial.rs +0 -0
  29. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/hocr/types.rs +0 -0
  30. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/inline_images.rs +0 -0
  31. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/lib.rs +0 -0
  32. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/options.rs +0 -0
  33. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/sanitizer.rs +0 -0
  34. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/text.rs +0 -0
  35. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/src/wrapper.rs +0 -0
  36. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/tests/commonmark_compliance_test.rs +0 -0
  37. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/tests/hocr_compliance_test.rs +0 -0
  38. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown/tests/integration_test.rs +0 -0
  39. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/Cargo.toml +0 -0
  40. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/README.md +0 -0
  41. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/python/html_to_markdown/__init__.py +0 -0
  42. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/python/html_to_markdown/_html_to_markdown.pyi +0 -0
  43. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/src/lib.rs +0 -0
  44. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/crates/html-to-markdown-py/uv.lock +0 -0
  45. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/__main__.py +0 -0
  46. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/_rust.pyi +0 -0
  47. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/api.py +0 -0
  48. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/cli.py +0 -0
  49. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/cli_proxy.py +0 -0
  50. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/exceptions.py +0 -0
  51. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/options.py +0 -0
  52. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/py.typed +0 -0
  53. {html_to_markdown-2.1.0 → html_to_markdown-2.1.2}/html_to_markdown/v1_compat.py +0 -0
@@ -530,7 +530,7 @@ dependencies = [
530
530
 
531
531
  [[package]]
532
532
  name = "html-to-markdown-cli"
533
- version = "2.1.0"
533
+ version = "2.1.2"
534
534
  dependencies = [
535
535
  "assert_cmd",
536
536
  "clap",
@@ -544,7 +544,7 @@ dependencies = [
544
544
 
545
545
  [[package]]
546
546
  name = "html-to-markdown-py"
547
- version = "2.1.0"
547
+ version = "2.1.2"
548
548
  dependencies = [
549
549
  "base64",
550
550
  "html-to-markdown-rs",
@@ -554,7 +554,7 @@ dependencies = [
554
554
 
555
555
  [[package]]
556
556
  name = "html-to-markdown-rs"
557
- version = "2.1.0"
557
+ version = "2.1.2"
558
558
  dependencies = [
559
559
  "ammonia",
560
560
  "base64",
@@ -3,7 +3,7 @@ resolver = "2"
3
3
  members = ["crates/html-to-markdown-py"]
4
4
 
5
5
  [workspace.package]
6
- version = "2.1.0"
6
+ version = "2.1.2"
7
7
  edition = "2021"
8
8
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
9
9
  license = "MIT"
@@ -15,7 +15,7 @@ rust-version = "1.80"
15
15
 
16
16
  [workspace.dependencies]
17
17
  # Core library
18
- html-to-markdown-rs = { version = "2.1.0", path = "crates/html-to-markdown" }
18
+ html-to-markdown-rs = { version = "2.1.2", path = "crates/html-to-markdown" }
19
19
 
20
20
  # HTML parsing and sanitization
21
21
  tl = "0.7"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -130,6 +130,13 @@ fn convert_element(element: &HocrElement, output: &mut String, depth: usize, pre
130
130
  if !output.is_empty() && !output.ends_with("\n\n") {
131
131
  output.push_str("\n\n");
132
132
  }
133
+
134
+ if let Some(table_markdown) = try_spatial_table_reconstruction(element) {
135
+ output.push_str(&table_markdown);
136
+ output.push_str("\n\n");
137
+ return;
138
+ }
139
+
133
140
  append_text_and_children(element, output, depth, preserve_structure);
134
141
  // Trim trailing space
135
142
  if output.ends_with(' ') {
@@ -39,4 +39,4 @@ __all__ = [
39
39
  "markdownify",
40
40
  ]
41
41
 
42
- __version__ = "2.1.0"
42
+ __version__ = "2.1.1"
@@ -7,7 +7,7 @@ requires = [
7
7
 
8
8
  [project]
9
9
  name = "html-to-markdown"
10
- version = "2.1.0"
10
+ version = "2.1.2"
11
11
  description = "High-performance HTML to Markdown converter powered by Rust with a clean Python API"
12
12
  readme = "README_PYPI.md"
13
13
  keywords = [