html-to-markdown 3.4.0 → 3.6.0.pre.rc.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -0
  3. data/README.md +347 -0
  4. data/Steepfile +10 -2
  5. data/ext/html_to_markdown_rb/Cargo.toml +3 -2
  6. data/ext/html_to_markdown_rb/extconf.rb +5 -5
  7. data/ext/html_to_markdown_rb/native/Cargo.lock +962 -0
  8. data/ext/html_to_markdown_rb/native/Cargo.toml +6 -11
  9. data/ext/html_to_markdown_rb/native/extconf.rb +14 -0
  10. data/ext/html_to_markdown_rb/src/lib.rs +1715 -646
  11. data/lib/html_to_markdown/native.rb +913 -37
  12. data/lib/html_to_markdown/version.rb +3 -3
  13. data/lib/html_to_markdown.rb +9 -4
  14. data/lib/html_to_markdown_rb.so +0 -0
  15. data/sig/types.rbs +59 -292
  16. metadata +32 -179
  17. data/ext/html_to_markdown_rb/Makefile +0 -592
  18. data/lib/bin/html-to-markdown +0 -0
  19. data/vendor/Cargo.toml +0 -33
  20. data/vendor/html-to-markdown-rs/Cargo.toml +0 -54
  21. data/vendor/html-to-markdown-rs/README.md +0 -278
  22. data/vendor/html-to-markdown-rs/examples/basic.rs +0 -24
  23. data/vendor/html-to-markdown-rs/examples/table.rs +0 -25
  24. data/vendor/html-to-markdown-rs/examples/test_deser.rs +0 -12
  25. data/vendor/html-to-markdown-rs/examples/test_escape.rs +0 -58
  26. data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +0 -113
  27. data/vendor/html-to-markdown-rs/examples/test_lists.rs +0 -39
  28. data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +0 -89
  29. data/vendor/html-to-markdown-rs/examples/test_tables.rs +0 -100
  30. data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +0 -61
  31. data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +0 -34
  32. data/vendor/html-to-markdown-rs/src/convert_api.rs +0 -349
  33. data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +0 -178
  34. data/vendor/html-to-markdown-rs/src/converter/block/container.rs +0 -114
  35. data/vendor/html-to-markdown-rs/src/converter/block/div.rs +0 -149
  36. data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +0 -428
  37. data/vendor/html-to-markdown-rs/src/converter/block/horizontal_rule.rs +0 -103
  38. data/vendor/html-to-markdown-rs/src/converter/block/line_break.rs +0 -89
  39. data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +0 -10
  40. data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +0 -140
  41. data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +0 -298
  42. data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +0 -453
  43. data/vendor/html-to-markdown-rs/src/converter/block/table/caption.rs +0 -44
  44. data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +0 -276
  45. data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +0 -336
  46. data/vendor/html-to-markdown-rs/src/converter/block/table/layout.rs +0 -58
  47. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +0 -266
  48. data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +0 -146
  49. data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +0 -34
  50. data/vendor/html-to-markdown-rs/src/converter/block/unknown.rs +0 -138
  51. data/vendor/html-to-markdown-rs/src/converter/context.rs +0 -208
  52. data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +0 -337
  53. data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +0 -770
  54. data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +0 -82
  55. data/vendor/html-to-markdown-rs/src/converter/format/djot.rs +0 -64
  56. data/vendor/html-to-markdown-rs/src/converter/format/markdown.rs +0 -59
  57. data/vendor/html-to-markdown-rs/src/converter/format/mod.rs +0 -43
  58. data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +0 -173
  59. data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +0 -434
  60. data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +0 -234
  61. data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +0 -282
  62. data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +0 -316
  63. data/vendor/html-to-markdown-rs/src/converter/handlers/mod.rs +0 -26
  64. data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +0 -306
  65. data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +0 -345
  66. data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +0 -428
  67. data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +0 -237
  68. data/vendor/html-to-markdown-rs/src/converter/inline/ruby.rs +0 -337
  69. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +0 -566
  70. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/mod.rs +0 -86
  71. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/typography.rs +0 -558
  72. data/vendor/html-to-markdown-rs/src/converter/list/definition.rs +0 -232
  73. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +0 -332
  74. data/vendor/html-to-markdown-rs/src/converter/list/mod.rs +0 -70
  75. data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +0 -201
  76. data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +0 -195
  77. data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +0 -314
  78. data/vendor/html-to-markdown-rs/src/converter/main.rs +0 -710
  79. data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +0 -452
  80. data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +0 -393
  81. data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +0 -4
  82. data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -183
  83. data/vendor/html-to-markdown-rs/src/converter/media/mod.rs +0 -87
  84. data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +0 -280
  85. data/vendor/html-to-markdown-rs/src/converter/metadata.rs +0 -220
  86. data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -156
  87. data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +0 -516
  88. data/vendor/html-to-markdown-rs/src/converter/preprocessing_helpers.rs +0 -201
  89. data/vendor/html-to-markdown-rs/src/converter/reference_collector.rs +0 -69
  90. data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +0 -269
  91. data/vendor/html-to-markdown-rs/src/converter/semantic/definition_list.rs +0 -266
  92. data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +0 -391
  93. data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +0 -112
  94. data/vendor/html-to-markdown-rs/src/converter/semantic/sectioning.rs +0 -85
  95. data/vendor/html-to-markdown-rs/src/converter/semantic/summary.rs +0 -324
  96. data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +0 -8
  97. data/vendor/html-to-markdown-rs/src/converter/text/processing.rs +0 -56
  98. data/vendor/html-to-markdown-rs/src/converter/text_node.rs +0 -269
  99. data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -151
  100. data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +0 -74
  101. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +0 -271
  102. data/vendor/html-to-markdown-rs/src/converter/utility/mod.rs +0 -17
  103. data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +0 -1002
  104. data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +0 -126
  105. data/vendor/html-to-markdown-rs/src/converter/utility/siblings.rs +0 -97
  106. data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +0 -189
  107. data/vendor/html-to-markdown-rs/src/error.rs +0 -43
  108. data/vendor/html-to-markdown-rs/src/exports.rs +0 -24
  109. data/vendor/html-to-markdown-rs/src/inline_images.rs +0 -336
  110. data/vendor/html-to-markdown-rs/src/lib.rs +0 -139
  111. data/vendor/html-to-markdown-rs/src/metadata/collector.rs +0 -457
  112. data/vendor/html-to-markdown-rs/src/metadata/config.rs +0 -394
  113. data/vendor/html-to-markdown-rs/src/metadata/extraction.rs +0 -398
  114. data/vendor/html-to-markdown-rs/src/metadata/mod.rs +0 -288
  115. data/vendor/html-to-markdown-rs/src/metadata/types.rs +0 -477
  116. data/vendor/html-to-markdown-rs/src/options/conversion.rs +0 -559
  117. data/vendor/html-to-markdown-rs/src/options/inline_image.rs +0 -111
  118. data/vendor/html-to-markdown-rs/src/options/mod.rs +0 -20
  119. data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +0 -201
  120. data/vendor/html-to-markdown-rs/src/options/validation.rs +0 -416
  121. data/vendor/html-to-markdown-rs/src/prelude.rs +0 -1
  122. data/vendor/html-to-markdown-rs/src/rcdom.rs +0 -487
  123. data/vendor/html-to-markdown-rs/src/text.rs +0 -358
  124. data/vendor/html-to-markdown-rs/src/types/document.rs +0 -191
  125. data/vendor/html-to-markdown-rs/src/types/mod.rs +0 -17
  126. data/vendor/html-to-markdown-rs/src/types/result.rs +0 -54
  127. data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +0 -791
  128. data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +0 -483
  129. data/vendor/html-to-markdown-rs/src/types/tables.rs +0 -52
  130. data/vendor/html-to-markdown-rs/src/types/warnings.rs +0 -33
  131. data/vendor/html-to-markdown-rs/src/validation.rs +0 -158
  132. data/vendor/html-to-markdown-rs/src/visitor/default_impl.rs +0 -63
  133. data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -41
  134. data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -370
  135. data/vendor/html-to-markdown-rs/src/visitor/types.rs +0 -319
  136. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +0 -1
  137. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/content.rs +0 -126
  138. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -27
  139. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/state.rs +0 -110
  140. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/traversal.rs +0 -250
  141. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +0 -597
  142. data/vendor/html-to-markdown-rs/src/wrapper/sync.rs +0 -413
  143. data/vendor/html-to-markdown-rs/src/wrapper/utils.rs +0 -290
  144. data/vendor/html-to-markdown-rs/src/wrapper.rs +0 -9
  145. data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +0 -87
  146. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +0 -297
  147. data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +0 -153
  148. data/vendor/html-to-markdown-rs/tests/exclude_selectors_test.rs +0 -132
  149. data/vendor/html-to-markdown-rs/tests/integration_test.rs +0 -631
  150. data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +0 -49
  151. data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +0 -58
  152. data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +0 -17
  153. data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +0 -41
  154. data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +0 -40
  155. data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +0 -26
  156. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +0 -185
  157. data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +0 -100
  158. data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +0 -133
  159. data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +0 -144
  160. data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +0 -62
  161. data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +0 -128
  162. data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +0 -20
  163. data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +0 -62
  164. data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +0 -68
  165. data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +0 -87
  166. data/vendor/html-to-markdown-rs/tests/issue_336_regressions.rs +0 -74
  167. data/vendor/html-to-markdown-rs/tests/issue_339_regressions.rs +0 -92
  168. data/vendor/html-to-markdown-rs/tests/issue_347_regressions.rs +0 -154
  169. data/vendor/html-to-markdown-rs/tests/issue_348_visitor_plain.rs +0 -93
  170. data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +0 -44
  171. data/vendor/html-to-markdown-rs/tests/lists_test.rs +0 -199
  172. data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +0 -273
  173. data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +0 -61
  174. data/vendor/html-to-markdown-rs/tests/reference_links_test.rs +0 -169
  175. data/vendor/html-to-markdown-rs/tests/sectioning_elements_test.rs +0 -137
  176. data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +0 -522
  177. data/vendor/html-to-markdown-rs/tests/tables_test.rs +0 -743
  178. data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +0 -41
  179. data/vendor/html-to-markdown-rs/tests/test_issue_187.rs +0 -204
  180. data/vendor/html-to-markdown-rs/tests/test_issue_218.rs +0 -68
  181. data/vendor/html-to-markdown-rs/tests/test_issue_277.rs +0 -77
  182. data/vendor/html-to-markdown-rs/tests/test_max_depth.rs +0 -82
  183. data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +0 -45
  184. data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +0 -396
  185. data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +0 -34
  186. data/vendor/html-to-markdown-rs/tests/visitor_code_integration_test.rs +0 -121
  187. data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +0 -1190
  188. data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +0 -372
@@ -1,15 +1,15 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rb"
3
- version = "3.4.0-rc.25"
3
+ version = "3.6.0-rc.23"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  description = "High-performance HTML to Markdown converter"
7
7
  readme = false
8
- keywords = ["html", "markdown", "converter"]
9
- categories = ["text-processing"]
8
+ keywords = ["converter", "html", "markdown"]
9
+ categories = []
10
10
 
11
11
  [package.metadata.cargo-machete]
12
- ignored = ["async-trait", "tokio"]
12
+ ignored = ["rb-sys"]
13
13
 
14
14
  [lib]
15
15
  name = "html_to_markdown_rb"
@@ -18,14 +18,9 @@ crate-type = ["cdylib"]
18
18
 
19
19
  [dependencies]
20
20
  async-trait = "0.1"
21
- html-to-markdown-rs = { path = "../../../vendor/html-to-markdown-rs", features = [
22
- "full",
23
- "metadata",
24
- "visitor",
25
- "serde",
26
- "inline-images",
27
- ] }
21
+ html-to-markdown-rs = { version = "3.6.0-rc.23", features = ["serde", "metadata", "visitor", "inline-images", "testkit"] }
28
22
  magnus = "0.8"
23
+ rb-sys = ">=0.9, <0.9.128"
29
24
  serde = { version = "1", features = ["derive"] }
30
25
  serde_json = "1"
31
26
  tokio = { version = "1", features = ["rt-multi-thread"] }
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ require "rb_sys/mkmf"
5
+
6
+ default_profile = ENV.fetch("CARGO_PROFILE", "release")
7
+
8
+ create_rust_makefile("html_to_markdown_rb") do |config|
9
+ config.profile = default_profile.to_sym
10
+ # extconf.rb and Cargo.toml are siblings under ext/html_to_markdown_rb/native/; rb_sys interprets
11
+ # ext_dir relative to extconf.rb, so "." finds the sibling Cargo.toml. "native" would
12
+ # resolve to native/native/Cargo.toml and break `gem install` on end-user machines.
13
+ config.ext_dir = "."
14
+ end