html-to-markdown 2.1.0__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

Files changed (54) hide show
  1. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/Cargo.lock +3 -3
  2. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/Cargo.toml +2 -2
  3. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/PKG-INFO +1 -1
  4. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/benches/conversion_benchmark.rs +3 -3
  5. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/benches/micro_benchmark.rs +1 -1
  6. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/benches/profiling_benchmark.rs +1 -1
  7. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/converter.rs +2 -2
  8. html_to_markdown-2.2.0/crates/html-to-markdown/src/hocr/converter.rs +1273 -0
  9. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/hocr/mod.rs +1 -1
  10. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/options.rs +4 -0
  11. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown-py/python/html_to_markdown/_html_to_markdown.pyi +2 -0
  12. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown-py/src/lib.rs +6 -0
  13. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/__init__.py +1 -1
  14. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/_rust.pyi +2 -0
  15. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/api.py +1 -0
  16. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/options.py +3 -0
  17. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/v1_compat.py +6 -2
  18. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/pyproject.toml +1 -1
  19. html_to_markdown-2.1.0/crates/html-to-markdown/src/hocr/converter.rs +0 -491
  20. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/LICENSE +0 -0
  21. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/README_PYPI.md +0 -0
  22. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/Cargo.toml +0 -0
  23. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/README.md +0 -0
  24. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/examples/basic.rs +0 -0
  25. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/examples/table.rs +0 -0
  26. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/examples/test_escape.rs +0 -0
  27. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/examples/test_inline_formatting.rs +0 -0
  28. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/examples/test_lists.rs +0 -0
  29. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/examples/test_semantic_tags.rs +0 -0
  30. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/examples/test_tables.rs +0 -0
  31. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/examples/test_task_lists.rs +0 -0
  32. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/examples/test_whitespace.rs +0 -0
  33. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/error.rs +0 -0
  34. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/hocr/extractor.rs +0 -0
  35. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/hocr/parser.rs +0 -0
  36. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/hocr/spatial.rs +0 -0
  37. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/hocr/types.rs +0 -0
  38. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/inline_images.rs +0 -0
  39. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/lib.rs +0 -0
  40. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/sanitizer.rs +0 -0
  41. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/text.rs +0 -0
  42. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/src/wrapper.rs +0 -0
  43. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/tests/commonmark_compliance_test.rs +0 -0
  44. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/tests/hocr_compliance_test.rs +0 -0
  45. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown/tests/integration_test.rs +0 -0
  46. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown-py/Cargo.toml +0 -0
  47. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown-py/README.md +0 -0
  48. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown-py/python/html_to_markdown/__init__.py +0 -0
  49. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/crates/html-to-markdown-py/uv.lock +0 -0
  50. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/__main__.py +0 -0
  51. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/cli.py +0 -0
  52. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/cli_proxy.py +0 -0
  53. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/exceptions.py +0 -0
  54. {html_to_markdown-2.1.0 → html_to_markdown-2.2.0}/html_to_markdown/py.typed +0 -0
@@ -530,7 +530,7 @@ dependencies = [
530
530
 
531
531
  [[package]]
532
532
  name = "html-to-markdown-cli"
533
- version = "2.1.0"
533
+ version = "2.2.0"
534
534
  dependencies = [
535
535
  "assert_cmd",
536
536
  "clap",
@@ -544,7 +544,7 @@ dependencies = [
544
544
 
545
545
  [[package]]
546
546
  name = "html-to-markdown-py"
547
- version = "2.1.0"
547
+ version = "2.2.0"
548
548
  dependencies = [
549
549
  "base64",
550
550
  "html-to-markdown-rs",
@@ -554,7 +554,7 @@ dependencies = [
554
554
 
555
555
  [[package]]
556
556
  name = "html-to-markdown-rs"
557
- version = "2.1.0"
557
+ version = "2.2.0"
558
558
  dependencies = [
559
559
  "ammonia",
560
560
  "base64",
@@ -3,7 +3,7 @@ resolver = "2"
3
3
  members = ["crates/html-to-markdown-py"]
4
4
 
5
5
  [workspace.package]
6
- version = "2.1.0"
6
+ version = "2.2.0"
7
7
  edition = "2021"
8
8
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
9
9
  license = "MIT"
@@ -15,7 +15,7 @@ rust-version = "1.80"
15
15
 
16
16
  [workspace.dependencies]
17
17
  # Core library
18
- html-to-markdown-rs = { version = "2.1.0", path = "crates/html-to-markdown" }
18
+ html-to-markdown-rs = { version = "2.2.0", path = "crates/html-to-markdown" }
19
19
 
20
20
  # HTML parsing and sanitization
21
21
  tl = "0.7"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 2.1.0
3
+ Version: 2.2.0
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -4,7 +4,7 @@
4
4
  //! for the core Rust conversion engine.
5
5
 
6
6
  use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
7
- use html_to_markdown::{convert, ConversionOptions};
7
+ use html_to_markdown_rs::{convert, CodeBlockStyle, ConversionOptions, HeadingStyle};
8
8
  use std::time::Duration;
9
9
 
10
10
  /// Generate HTML with varying complexity
@@ -230,7 +230,7 @@ fn bench_configurations(c: &mut Criterion) {
230
230
  });
231
231
 
232
232
  let options = ConversionOptions {
233
- heading_style: html_to_markdown::HeadingStyle::Atx,
233
+ heading_style: HeadingStyle::Atx,
234
234
  ..Default::default()
235
235
  };
236
236
  group.throughput(Throughput::Bytes(size as u64));
@@ -239,7 +239,7 @@ fn bench_configurations(c: &mut Criterion) {
239
239
  });
240
240
 
241
241
  let options = ConversionOptions {
242
- code_block_style: html_to_markdown::CodeBlockStyle::Backticks,
242
+ code_block_style: CodeBlockStyle::Backticks,
243
243
  ..Default::default()
244
244
  };
245
245
  group.throughput(Throughput::Bytes(size as u64));
@@ -1,7 +1,7 @@
1
1
  //! Micro-benchmarks for specific operations
2
2
 
3
3
  use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
4
- use html_to_markdown::{convert, ConversionOptions};
4
+ use html_to_markdown_rs::{convert, ConversionOptions};
5
5
 
6
6
  /// Benchmark text-heavy documents
7
7
  fn bench_text_operations(c: &mut Criterion) {
@@ -3,7 +3,7 @@
3
3
  //! Run with: cargo bench --bench profiling_benchmark --profile-time=10
4
4
  //! Or with flamegraph: cargo flamegraph --bench profiling_benchmark
5
5
 
6
- use html_to_markdown::{convert, ConversionOptions};
6
+ use html_to_markdown_rs::convert;
7
7
  use std::hint::black_box;
8
8
 
9
9
  fn load_wikipedia_document(filename: &str) -> Option<String> {
@@ -998,7 +998,7 @@ fn convert_html_impl(
998
998
  }
999
999
 
1000
1000
  if is_hocr {
1001
- use crate::hocr::{convert_to_markdown as convert_hocr_to_markdown, extract_hocr_document};
1001
+ use crate::hocr::{convert_to_markdown_with_options as convert_hocr_to_markdown, extract_hocr_document};
1002
1002
 
1003
1003
  let (elements, metadata) = extract_hocr_document(&dom, options.debug);
1004
1004
 
@@ -1026,7 +1026,7 @@ fn convert_html_impl(
1026
1026
  }
1027
1027
  }
1028
1028
 
1029
- let mut markdown = convert_hocr_to_markdown(&elements, true);
1029
+ let mut markdown = convert_hocr_to_markdown(&elements, true, options.hocr_spatial_tables);
1030
1030
 
1031
1031
  if markdown.trim().is_empty() {
1032
1032
  return Ok(output);