html-to-markdown 3.2.4 → 3.4.0.pre.rc.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/Steepfile +6 -0
  3. data/ext/html_to_markdown_rb/Cargo.toml +2 -2
  4. data/ext/html_to_markdown_rb/native/Cargo.toml +28 -0
  5. data/ext/html_to_markdown_rb/src/html-to-markdown/version.rb +10 -0
  6. data/ext/html_to_markdown_rb/src/html-to-markdown.rb +13 -0
  7. data/ext/html_to_markdown_rb/src/lib.rs +2088 -268
  8. data/lib/bin/html-to-markdown +0 -0
  9. data/lib/html_to_markdown/version.rb +1 -1
  10. data/lib/html_to_markdown.rb +5 -3
  11. data/sig/types.rbs +769 -0
  12. data/vendor/Cargo.toml +2 -2
  13. data/vendor/html-to-markdown-rs/Cargo.toml +1 -1
  14. data/vendor/html-to-markdown-rs/examples/basic.rs +1 -1
  15. data/vendor/html-to-markdown-rs/examples/table.rs +1 -1
  16. data/vendor/html-to-markdown-rs/examples/test_deser.rs +1 -1
  17. data/vendor/html-to-markdown-rs/examples/test_escape.rs +1 -1
  18. data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +1 -1
  19. data/vendor/html-to-markdown-rs/examples/test_lists.rs +1 -1
  20. data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +1 -1
  21. data/vendor/html-to-markdown-rs/examples/test_tables.rs +1 -1
  22. data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +1 -1
  23. data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +1 -1
  24. data/vendor/html-to-markdown-rs/src/convert_api.rs +15 -25
  25. data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +1 -1
  26. data/vendor/html-to-markdown-rs/src/converter/block/container.rs +3 -3
  27. data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -1
  28. data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +6 -7
  29. data/vendor/html-to-markdown-rs/src/converter/block/horizontal_rule.rs +1 -1
  30. data/vendor/html-to-markdown-rs/src/converter/block/line_break.rs +1 -1
  31. data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +0 -108
  32. data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +1 -1
  33. data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +1 -1
  34. data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +1 -1
  35. data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +1 -1
  36. data/vendor/html-to-markdown-rs/src/converter/block/table/layout.rs +1 -1
  37. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +2 -4
  38. data/vendor/html-to-markdown-rs/src/converter/block/unknown.rs +1 -1
  39. data/vendor/html-to-markdown-rs/src/converter/context.rs +10 -0
  40. data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -1
  41. data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
  42. data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +1 -1
  43. data/vendor/html-to-markdown-rs/src/converter/format/mod.rs +0 -3
  44. data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +1 -1
  45. data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +1 -1
  46. data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +2 -2
  47. data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +0 -1
  48. data/vendor/html-to-markdown-rs/src/converter/inline/ruby.rs +1 -1
  49. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/mod.rs +1 -1
  50. data/vendor/html-to-markdown-rs/src/converter/list/definition.rs +3 -3
  51. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +1 -1
  52. data/vendor/html-to-markdown-rs/src/converter/list/mod.rs +0 -1
  53. data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +2 -2
  54. data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +2 -2
  55. data/vendor/html-to-markdown-rs/src/converter/main.rs +57 -31
  56. data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +8 -8
  57. data/vendor/html-to-markdown-rs/src/converter/media/image.rs +1 -1
  58. data/vendor/html-to-markdown-rs/src/converter/media/mod.rs +1 -1
  59. data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +5 -5
  60. data/vendor/html-to-markdown-rs/src/converter/mod.rs +6 -17
  61. data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +64 -11
  62. data/vendor/html-to-markdown-rs/src/converter/preprocessing_helpers.rs +80 -22
  63. data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +1 -1
  64. data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +1 -1
  65. data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +0 -4
  66. data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +5 -9
  67. data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +3 -3
  68. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +10 -10
  69. data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +13 -13
  70. data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +4 -4
  71. data/vendor/html-to-markdown-rs/src/converter/utility/siblings.rs +6 -14
  72. data/vendor/html-to-markdown-rs/src/inline_images.rs +6 -0
  73. data/vendor/html-to-markdown-rs/src/lib.rs +17 -18
  74. data/vendor/html-to-markdown-rs/src/options/conversion.rs +31 -0
  75. data/vendor/html-to-markdown-rs/src/prelude.rs +1 -12
  76. data/vendor/html-to-markdown-rs/src/text.rs +0 -44
  77. data/vendor/html-to-markdown-rs/src/types/warnings.rs +2 -0
  78. data/vendor/html-to-markdown-rs/src/visitor/types.rs +5 -1
  79. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +4 -1
  80. data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +1 -1
  81. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +1 -1
  82. data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +1 -1
  83. data/vendor/html-to-markdown-rs/tests/exclude_selectors_test.rs +136 -0
  84. data/vendor/html-to-markdown-rs/tests/integration_test.rs +1 -1
  85. data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +1 -1
  86. data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +1 -1
  87. data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +1 -1
  88. data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +1 -1
  89. data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +1 -1
  90. data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +1 -1
  91. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +1 -1
  92. data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +1 -1
  93. data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +1 -1
  94. data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +1 -1
  95. data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +2 -2
  96. data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +1 -1
  97. data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +1 -1
  98. data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +1 -1
  99. data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +1 -1
  100. data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +1 -1
  101. data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +2 -2
  102. data/vendor/html-to-markdown-rs/tests/lists_test.rs +1 -1
  103. data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +1 -1
  104. data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +1 -1
  105. data/vendor/html-to-markdown-rs/tests/reference_links_test.rs +1 -1
  106. data/vendor/html-to-markdown-rs/tests/sectioning_elements_test.rs +137 -0
  107. data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +1 -1
  108. data/vendor/html-to-markdown-rs/tests/tables_test.rs +2 -2
  109. data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +1 -1
  110. data/vendor/html-to-markdown-rs/tests/test_issue_187.rs +5 -2
  111. data/vendor/html-to-markdown-rs/tests/test_issue_218.rs +4 -4
  112. data/vendor/html-to-markdown-rs/tests/test_issue_277.rs +77 -0
  113. data/vendor/html-to-markdown-rs/tests/test_max_depth.rs +82 -0
  114. data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +1 -1
  115. data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +4 -4
  116. data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +1 -1
  117. data/vendor/html-to-markdown-rs/tests/visitor_code_integration_test.rs +6 -6
  118. data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +103 -35
  119. data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +1 -1
  120. metadata +21 -43
  121. data/.bundle/config +0 -2
  122. data/.gitignore +0 -3
  123. data/.rubocop.yml +0 -59
  124. data/Gemfile +0 -18
  125. data/Gemfile.lock +0 -173
  126. data/README.md +0 -331
  127. data/Rakefile +0 -26
  128. data/exe/html-to-markdown +0 -6
  129. data/ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb +0 -6
  130. data/ext/html_to_markdown_rb/src/html_to_markdown_rs.rb +0 -9
  131. data/html-to-markdown-rb.gemspec +0 -99
  132. data/lib/html_to_markdown_rs.rb +0 -3
  133. data/sig/html_to_markdown.rbs +0 -149
  134. data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +0 -94
  135. data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -86
  136. data/vendor/html-to-markdown-rs/src/safety.rs +0 -70
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b178073d72890b5e70bc49e43ceadea99e6bcf2084fe3144cfeca4fc344f8c3
4
- data.tar.gz: fa08a0e2b76cb5b2dbcc30f9b70f3c4c94f24233b928d6f298473212dede4bdf
3
+ metadata.gz: ce0862c960079660d08fea06ebc87b2cd2d2c8b944baaad80953669df9e20604
4
+ data.tar.gz: fd2a1cf7286ef34596e5ceb8dcdefbcc416c3eb1cdbf10b264438ec8c183f345
5
5
  SHA512:
6
- metadata.gz: bd7003fb95b5ea676de569cbbbe5017a8aab149382ddbf4a917131c30bce9a849d9b679cba36c0007757dcd123f79e3f86dbb9bce37f26b2230eb120b0a71997
7
- data.tar.gz: fc44a0e8254f63b6ed00007336371bd74ce0a8bd19d151b126e75184211028a20b918088c7f83e5f91ffea7aa1f0a46affdcdc42763db6152e31ca48d17e516d
6
+ metadata.gz: 9d256ca1ae78387f3338b1adcc7a01a5628e152bc59dd7ef967fa17fe3a266f275218c404b35ef0b180e9384dbdf07fb91f978a9c3b720c0aa15656334da293e
7
+ data.tar.gz: 81a86eeb5d17e25d7bb2d559acb96f998b9c4b7b0f66d67cc02be4bdaa4b23584223d0293e3a2e9fb0750b4d3ad11492dfffe5e857c223a1c1e55c4a4206c5ae
data/Steepfile CHANGED
@@ -7,6 +7,12 @@ target :lib do
7
7
 
8
8
  check 'lib'
9
9
 
10
+ configure_code_diagnostics do |hash|
11
+ hash[Steep::Diagnostic::Ruby::UnannotatedEmptyCollection] = :hint
12
+ hash[Steep::Diagnostic::Ruby::UnknownConstant] = :hint
13
+ hash[Steep::Diagnostic::Ruby::NoMethod] = :hint
14
+ end
15
+
10
16
  # Configure libraries
11
17
  library 'pathname'
12
18
  library 'open3'
@@ -2,7 +2,7 @@
2
2
 
3
3
  [package]
4
4
  name = "html-to-markdown-rb"
5
- version = "3.2.0"
5
+ version = "3.4.0-rc.14"
6
6
  edition = "2024"
7
7
  license = "MIT"
8
8
 
@@ -10,7 +10,7 @@ license = "MIT"
10
10
  crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
- html-to-markdown-rs = { path = "../../../../crates/html-to-markdown", features = ["full", "metadata", "visitor", "serde", "inline-images"] }
13
+ html-to-markdown-rs = { path = "../../vendor/html-to-markdown-rs", features = ["full", "metadata", "visitor", "serde", "inline-images"] }
14
14
  magnus = "0.8"
15
15
  serde = { version = "1", features = ["derive"] }
16
16
  serde_json = "1"
@@ -0,0 +1,28 @@
1
+ [package]
2
+ name = "html-to-markdown-rb"
3
+ version = "3.4.0-rc.3"
4
+ edition = "2024"
5
+ license = "MIT"
6
+ description = "High-performance HTML to Markdown converter"
7
+ readme = false
8
+ keywords = ["html", "markdown", "converter"]
9
+ categories = ["text-processing"]
10
+
11
+ [package.metadata.cargo-machete]
12
+ ignored = ["async-trait", "tokio"]
13
+
14
+ [lib]
15
+ name = "html_to_markdown_rb"
16
+ path = "../src/lib.rs"
17
+ crate-type = ["cdylib"]
18
+
19
+ [dependencies]
20
+ html-to-markdown-rs = { path = "../../../vendor/html-to-markdown-rs", features = ["full", "metadata", "visitor", "serde", "inline-images"] }
21
+ magnus = "0.8"
22
+ serde = { version = "1", features = ["derive"] }
23
+ serde_json = "1"
24
+ tokio = { version = "1", features = ["rt-multi-thread"] }
25
+ async-trait = "0.1"
26
+
27
+ [lints]
28
+ workspace = true
@@ -0,0 +1,10 @@
1
+ # This file is auto-generated by alef — DO NOT EDIT.
2
+ # alef:hash:93356c3b57bac64be0a438c79e2d6c509f5a7c9950897339702d8c0f0c89cdb8
3
+ # To regenerate: alef generate
4
+ # To verify freshness: alef verify --exit-code
5
+ # Issues & docs: https://github.com/kreuzberg-dev/alef
6
+ # frozen_string_literal: true
7
+
8
+ module HtmlToMarkdown
9
+ VERSION = '3.4.0.pre.rc.14'
10
+ end
@@ -0,0 +1,13 @@
1
+ # This file is auto-generated by alef — DO NOT EDIT.
2
+ # alef:hash:de7c621ce0da78b37e21fdb1d38bbbf5c3259509f57cb0f671732eb28b2b7e56
3
+ # To regenerate: alef generate
4
+ # To verify freshness: alef verify --exit-code
5
+ # Issues & docs: https://github.com/kreuzberg-dev/alef
6
+ # frozen_string_literal: true
7
+
8
+ require_relative 'html-to-markdown/version'
9
+ require_relative 'html-to-markdown/native'
10
+
11
+ module HtmlToMarkdown
12
+ # Re-export all types and functions from native extension
13
+ end