html-to-markdown 3.2.4 → 3.4.0.pre.rc.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/Steepfile +6 -0
  3. data/ext/html_to_markdown_rb/Cargo.toml +2 -2
  4. data/ext/html_to_markdown_rb/native/Cargo.toml +28 -0
  5. data/ext/html_to_markdown_rb/src/html-to-markdown/version.rb +10 -0
  6. data/ext/html_to_markdown_rb/src/html-to-markdown.rb +13 -0
  7. data/ext/html_to_markdown_rb/src/lib.rs +2088 -268
  8. data/lib/bin/html-to-markdown +0 -0
  9. data/lib/html_to_markdown/version.rb +1 -1
  10. data/lib/html_to_markdown.rb +5 -3
  11. data/sig/types.rbs +769 -0
  12. data/vendor/Cargo.toml +2 -2
  13. data/vendor/html-to-markdown-rs/Cargo.toml +1 -1
  14. data/vendor/html-to-markdown-rs/examples/basic.rs +1 -1
  15. data/vendor/html-to-markdown-rs/examples/table.rs +1 -1
  16. data/vendor/html-to-markdown-rs/examples/test_deser.rs +1 -1
  17. data/vendor/html-to-markdown-rs/examples/test_escape.rs +1 -1
  18. data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +1 -1
  19. data/vendor/html-to-markdown-rs/examples/test_lists.rs +1 -1
  20. data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +1 -1
  21. data/vendor/html-to-markdown-rs/examples/test_tables.rs +1 -1
  22. data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +1 -1
  23. data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +1 -1
  24. data/vendor/html-to-markdown-rs/src/convert_api.rs +15 -25
  25. data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +1 -1
  26. data/vendor/html-to-markdown-rs/src/converter/block/container.rs +3 -3
  27. data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -1
  28. data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +6 -7
  29. data/vendor/html-to-markdown-rs/src/converter/block/horizontal_rule.rs +1 -1
  30. data/vendor/html-to-markdown-rs/src/converter/block/line_break.rs +1 -1
  31. data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +0 -108
  32. data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +1 -1
  33. data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +1 -1
  34. data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +1 -1
  35. data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +1 -1
  36. data/vendor/html-to-markdown-rs/src/converter/block/table/layout.rs +1 -1
  37. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +2 -4
  38. data/vendor/html-to-markdown-rs/src/converter/block/unknown.rs +1 -1
  39. data/vendor/html-to-markdown-rs/src/converter/context.rs +10 -0
  40. data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -1
  41. data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
  42. data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +1 -1
  43. data/vendor/html-to-markdown-rs/src/converter/format/mod.rs +0 -3
  44. data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +1 -1
  45. data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +1 -1
  46. data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +2 -2
  47. data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +0 -1
  48. data/vendor/html-to-markdown-rs/src/converter/inline/ruby.rs +1 -1
  49. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/mod.rs +1 -1
  50. data/vendor/html-to-markdown-rs/src/converter/list/definition.rs +3 -3
  51. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +1 -1
  52. data/vendor/html-to-markdown-rs/src/converter/list/mod.rs +0 -1
  53. data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +2 -2
  54. data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +2 -2
  55. data/vendor/html-to-markdown-rs/src/converter/main.rs +57 -31
  56. data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +8 -8
  57. data/vendor/html-to-markdown-rs/src/converter/media/image.rs +1 -1
  58. data/vendor/html-to-markdown-rs/src/converter/media/mod.rs +1 -1
  59. data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +5 -5
  60. data/vendor/html-to-markdown-rs/src/converter/mod.rs +6 -17
  61. data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +64 -11
  62. data/vendor/html-to-markdown-rs/src/converter/preprocessing_helpers.rs +80 -22
  63. data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +1 -1
  64. data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +1 -1
  65. data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +0 -4
  66. data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +5 -9
  67. data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +3 -3
  68. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +10 -10
  69. data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +13 -13
  70. data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +4 -4
  71. data/vendor/html-to-markdown-rs/src/converter/utility/siblings.rs +6 -14
  72. data/vendor/html-to-markdown-rs/src/inline_images.rs +6 -0
  73. data/vendor/html-to-markdown-rs/src/lib.rs +17 -18
  74. data/vendor/html-to-markdown-rs/src/options/conversion.rs +31 -0
  75. data/vendor/html-to-markdown-rs/src/prelude.rs +1 -12
  76. data/vendor/html-to-markdown-rs/src/text.rs +0 -44
  77. data/vendor/html-to-markdown-rs/src/types/warnings.rs +2 -0
  78. data/vendor/html-to-markdown-rs/src/visitor/types.rs +5 -1
  79. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +4 -1
  80. data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +1 -1
  81. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +1 -1
  82. data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +1 -1
  83. data/vendor/html-to-markdown-rs/tests/exclude_selectors_test.rs +136 -0
  84. data/vendor/html-to-markdown-rs/tests/integration_test.rs +1 -1
  85. data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +1 -1
  86. data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +1 -1
  87. data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +1 -1
  88. data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +1 -1
  89. data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +1 -1
  90. data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +1 -1
  91. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +1 -1
  92. data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +1 -1
  93. data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +1 -1
  94. data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +1 -1
  95. data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +2 -2
  96. data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +1 -1
  97. data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +1 -1
  98. data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +1 -1
  99. data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +1 -1
  100. data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +1 -1
  101. data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +2 -2
  102. data/vendor/html-to-markdown-rs/tests/lists_test.rs +1 -1
  103. data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +1 -1
  104. data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +1 -1
  105. data/vendor/html-to-markdown-rs/tests/reference_links_test.rs +1 -1
  106. data/vendor/html-to-markdown-rs/tests/sectioning_elements_test.rs +137 -0
  107. data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +1 -1
  108. data/vendor/html-to-markdown-rs/tests/tables_test.rs +2 -2
  109. data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +1 -1
  110. data/vendor/html-to-markdown-rs/tests/test_issue_187.rs +5 -2
  111. data/vendor/html-to-markdown-rs/tests/test_issue_218.rs +4 -4
  112. data/vendor/html-to-markdown-rs/tests/test_issue_277.rs +77 -0
  113. data/vendor/html-to-markdown-rs/tests/test_max_depth.rs +82 -0
  114. data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +1 -1
  115. data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +4 -4
  116. data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +1 -1
  117. data/vendor/html-to-markdown-rs/tests/visitor_code_integration_test.rs +6 -6
  118. data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +103 -35
  119. data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +1 -1
  120. metadata +21 -43
  121. data/.bundle/config +0 -2
  122. data/.gitignore +0 -3
  123. data/.rubocop.yml +0 -59
  124. data/Gemfile +0 -18
  125. data/Gemfile.lock +0 -173
  126. data/README.md +0 -331
  127. data/Rakefile +0 -26
  128. data/exe/html-to-markdown +0 -6
  129. data/ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb +0 -6
  130. data/ext/html_to_markdown_rb/src/html_to_markdown_rs.rb +0 -9
  131. data/html-to-markdown-rb.gemspec +0 -99
  132. data/lib/html_to_markdown_rs.rb +0 -3
  133. data/sig/html_to_markdown.rbs +0 -149
  134. data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +0 -94
  135. data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -86
  136. data/vendor/html-to-markdown-rs/src/safety.rs +0 -70
@@ -8,7 +8,7 @@
8
8
  #![cfg(feature = "visitor")]
9
9
 
10
10
  use html_to_markdown_rs::visitor::{HtmlVisitor, NodeContext, NodeType, VisitResult};
11
- use html_to_markdown_rs::{ConversionOptions, convert_with_visitor};
11
+ use html_to_markdown_rs::{ConversionOptions, convert};
12
12
  use std::cell::RefCell;
13
13
  use std::rc::Rc;
14
14
 
@@ -99,7 +99,10 @@ fn test_custom_visitor_transforms_text() {
99
99
  let html = r"<p>Hello world</p>";
100
100
  let visitor = Rc::new(RefCell::new(CustomizingVisitor));
101
101
 
102
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
102
+ let result = convert(html, None, Some(visitor))
103
+ .expect("conversion failed")
104
+ .content
105
+ .unwrap_or_default();
103
106
 
104
107
  assert!(result.contains("[TEXT:"), "Should contain custom text format");
105
108
  }
@@ -109,7 +112,10 @@ fn test_custom_visitor_transforms_links() {
109
112
  let html = r#"<a href="https://example.com">Example</a>"#;
110
113
  let visitor = Rc::new(RefCell::new(CustomizingVisitor));
111
114
 
112
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
115
+ let result = convert(html, None, Some(visitor))
116
+ .expect("conversion failed")
117
+ .content
118
+ .unwrap_or_default();
113
119
 
114
120
  assert!(
115
121
  result.contains("[LINK:Example -> https://example.com]"),
@@ -122,7 +128,10 @@ fn test_custom_visitor_transforms_images() {
122
128
  let html = r#"<img src="/test.png" alt="Test">"#;
123
129
  let visitor = Rc::new(RefCell::new(CustomizingVisitor));
124
130
 
125
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
131
+ let result = convert(html, None, Some(visitor))
132
+ .expect("conversion failed")
133
+ .content
134
+ .unwrap_or_default();
126
135
 
127
136
  assert!(
128
137
  result.contains("[IMAGE:Test @ /test.png]"),
@@ -135,7 +144,10 @@ fn test_custom_visitor_transforms_headings() {
135
144
  let html = r"<h2>My Heading</h2>";
136
145
  let visitor = Rc::new(RefCell::new(CustomizingVisitor));
137
146
 
138
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
147
+ let result = convert(html, None, Some(visitor))
148
+ .expect("conversion failed")
149
+ .content
150
+ .unwrap_or_default();
139
151
 
140
152
  assert!(
141
153
  result.contains("[H2: My Heading]"),
@@ -151,7 +163,10 @@ fn test_skipping_visitor_removes_links() {
151
163
  skip_images: false,
152
164
  }));
153
165
 
154
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
166
+ let result = convert(html, None, Some(visitor))
167
+ .expect("conversion failed")
168
+ .content
169
+ .unwrap_or_default();
155
170
 
156
171
  assert!(
157
172
  !result.contains("example.com"),
@@ -167,7 +182,10 @@ fn test_skipping_visitor_removes_images() {
167
182
  skip_images: true,
168
183
  }));
169
184
 
170
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
185
+ let result = convert(html, None, Some(visitor))
186
+ .expect("conversion failed")
187
+ .content
188
+ .unwrap_or_default();
171
189
 
172
190
  assert!(
173
191
  !result.contains("test.png") && !result.contains("!["),
@@ -180,7 +198,10 @@ fn test_preserving_visitor_keeps_html() {
180
198
  let html = r#"<a href="https://example.com" class="special">Example</a>"#;
181
199
  let visitor = Rc::new(RefCell::new(PreservingVisitor { preserve_links: true }));
182
200
 
183
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
201
+ let result = convert(html, None, Some(visitor))
202
+ .expect("conversion failed")
203
+ .content
204
+ .unwrap_or_default();
184
205
 
185
206
  assert!(
186
207
  result.contains("<a") && result.contains("href"),
@@ -193,7 +214,7 @@ fn test_visitor_receives_node_context() {
193
214
  let html = r#"<h1 id="title" class="main">Title</h1>"#;
194
215
  let visitor = Rc::new(RefCell::new(ContextCheckingVisitor::default()));
195
216
 
196
- let _result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
217
+ let _result = convert(html, None, Some(visitor)).expect("conversion failed");
197
218
  }
198
219
 
199
220
  #[test]
@@ -216,7 +237,10 @@ fn test_visitor_works_with_complex_document() {
216
237
 
217
238
  let visitor = Rc::new(RefCell::new(CustomizingVisitor));
218
239
 
219
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
240
+ let result = convert(html, None, Some(visitor))
241
+ .expect("conversion failed")
242
+ .content
243
+ .unwrap_or_default();
220
244
 
221
245
  assert!(result.contains("[H1:"));
222
246
  assert!(result.contains("[H2:"));
@@ -243,7 +267,10 @@ fn test_visitor_with_conversion_options() {
243
267
 
244
268
  let visitor = Rc::new(RefCell::new(ContinueVisitor));
245
269
 
246
- let result = convert_with_visitor(html, Some(options), Some(visitor)).expect("conversion failed");
270
+ let result = convert(html, Some(options), Some(visitor))
271
+ .expect("conversion failed")
272
+ .content
273
+ .unwrap_or_default();
247
274
 
248
275
  assert!(
249
276
  result.contains(r"\*") || result.contains(r"\_"),
@@ -265,7 +292,10 @@ fn test_visitor_continue_result_produces_default_markdown() {
265
292
  let html = r"<h1>Title</h1>";
266
293
  let visitor = Rc::new(RefCell::new(ContinueVisitor));
267
294
 
268
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
295
+ let result = convert(html, None, Some(visitor))
296
+ .expect("conversion failed")
297
+ .content
298
+ .unwrap_or_default();
269
299
 
270
300
  assert!(
271
301
  result.contains("# Title"),
@@ -294,7 +324,10 @@ fn test_visitor_skip_vs_continue() {
294
324
  let html = r#"<p><a href="/first">First</a> and <a href="/second">Second</a></p>"#;
295
325
  let visitor = Rc::new(RefCell::new(SelectiveSkipper { skip_first_link: true }));
296
326
 
297
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
327
+ let result = convert(html, None, Some(visitor))
328
+ .expect("conversion failed")
329
+ .content
330
+ .unwrap_or_default();
298
331
 
299
332
  assert!(!result.contains("/first"));
300
333
  assert!(result.contains("/second"));
@@ -305,7 +338,10 @@ fn test_multiple_elements_of_same_type() {
305
338
  let html = r"<h1>First</h1><h2>Second</h2><h3>Third</h3>";
306
339
  let visitor = Rc::new(RefCell::new(CustomizingVisitor));
307
340
 
308
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
341
+ let result = convert(html, None, Some(visitor))
342
+ .expect("conversion failed")
343
+ .content
344
+ .unwrap_or_default();
309
345
 
310
346
  assert!(result.contains("[H1: First]"));
311
347
  assert!(result.contains("[H2: Second]"));
@@ -317,7 +353,10 @@ fn test_nested_elements_invoke_visitor() {
317
353
  let html = r#"<p>Text with <a href="/url">a <strong>bold</strong> link</a></p>"#;
318
354
  let visitor = Rc::new(RefCell::new(CustomizingVisitor));
319
355
 
320
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
356
+ let result = convert(html, None, Some(visitor))
357
+ .expect("conversion failed")
358
+ .content
359
+ .unwrap_or_default();
321
360
 
322
361
  assert!(result.contains("[TEXT:"));
323
362
  assert!(result.contains("[LINK:"));
@@ -336,7 +375,7 @@ fn test_visitor_error_stops_conversion() {
336
375
 
337
376
  let html = "<p>text</p>";
338
377
  let visitor = Rc::new(RefCell::new(ErrorVisitor));
339
- let result = convert_with_visitor(html, None, Some(visitor));
378
+ let result = convert(html, None, Some(visitor));
340
379
 
341
380
  assert!(result.is_err(), "Should return error when visitor returns Error");
342
381
  assert!(
@@ -359,7 +398,10 @@ fn test_visitor_code_block() {
359
398
 
360
399
  let html = r#"<pre><code class="language-rust">fn main() {}</code></pre>"#;
361
400
  let visitor = Rc::new(RefCell::new(CodeBlockVisitor));
362
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
401
+ let result = convert(html, None, Some(visitor))
402
+ .expect("conversion failed")
403
+ .content
404
+ .unwrap_or_default();
363
405
 
364
406
  assert!(
365
407
  result.contains("[CODE_BLOCK:rust -> fn main() {}]"),
@@ -380,7 +422,10 @@ fn test_visitor_code_inline() {
380
422
 
381
423
  let html = r"<p>Use <code>println!</code> macro</p>";
382
424
  let visitor = Rc::new(RefCell::new(InlineCodeVisitor));
383
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
425
+ let result = convert(html, None, Some(visitor))
426
+ .expect("conversion failed")
427
+ .content
428
+ .unwrap_or_default();
384
429
 
385
430
  assert!(
386
431
  result.contains("[CODE:println!]"),
@@ -418,7 +463,10 @@ fn test_visitor_list_callbacks() {
418
463
 
419
464
  let html = r"<ul><li>First</li><li>Second</li></ul>";
420
465
  let visitor = Rc::new(RefCell::new(ListVisitor::default()));
421
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
466
+ let result = convert(html, None, Some(visitor))
467
+ .expect("conversion failed")
468
+ .content
469
+ .unwrap_or_default();
422
470
 
423
471
  assert!(
424
472
  result.contains("[LIST_START:UL:1]"),
@@ -462,7 +510,10 @@ fn test_visitor_table_callbacks() {
462
510
 
463
511
  let html = r"<table><tr><th>Name</th><th>Age</th></tr><tr><td>Alice</td><td>30</td></tr></table>";
464
512
  let visitor = Rc::new(RefCell::new(TableVisitor::default()));
465
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
513
+ let result = convert(html, None, Some(visitor))
514
+ .expect("conversion failed")
515
+ .content
516
+ .unwrap_or_default();
466
517
 
467
518
  assert!(
468
519
  result.contains("[TABLE_START]"),
@@ -492,7 +543,10 @@ fn test_visitor_blockquote() {
492
543
 
493
544
  let html = r"<blockquote>This is a quote</blockquote>";
494
545
  let visitor = Rc::new(RefCell::new(BlockquoteVisitor));
495
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
546
+ let result = convert(html, None, Some(visitor))
547
+ .expect("conversion failed")
548
+ .content
549
+ .unwrap_or_default();
496
550
 
497
551
  assert!(
498
552
  result.contains("[QUOTE:This is a quote]"),
@@ -521,7 +575,10 @@ fn test_visitor_inline_formatting() {
521
575
 
522
576
  let html = r"<p><strong>bold</strong> <em>italic</em> <del>struck</del></p>";
523
577
  let visitor = Rc::new(RefCell::new(FormattingVisitor));
524
- let result = convert_with_visitor(html, None, Some(visitor)).expect("conversion failed");
578
+ let result = convert(html, None, Some(visitor))
579
+ .expect("conversion failed")
580
+ .content
581
+ .unwrap_or_default();
525
582
 
526
583
  assert!(result.contains("[STRONG:bold]"), "Should see strong, got: {result}");
527
584
  assert!(result.contains("[EM:italic]"), "Should see emphasis, got: {result}");
@@ -551,7 +608,7 @@ fn test_no_double_visit_in_links() {
551
608
 
552
609
  let html = r#"<a href="/url">link text</a>"#;
553
610
  let visitor = Rc::new(RefCell::new(CountingVisitor::default()));
554
- let _result = convert_with_visitor(html, None, Some(visitor.clone())).expect("conversion failed");
611
+ let _result = convert(html, None, Some(visitor.clone())).expect("conversion failed");
555
612
 
556
613
  assert_eq!(
557
614
  visitor.borrow().text_visits,
@@ -581,7 +638,7 @@ fn test_no_double_visit_in_headings() {
581
638
 
582
639
  let html = r"<h1>heading text</h1>";
583
640
  let visitor = Rc::new(RefCell::new(CountingVisitor::default()));
584
- let _result = convert_with_visitor(html, None, Some(visitor.clone())).expect("conversion failed");
641
+ let _result = convert(html, None, Some(visitor.clone())).expect("conversion failed");
585
642
 
586
643
  assert_eq!(
587
644
  visitor.borrow().text_visits,
@@ -624,8 +681,10 @@ fn test_visitor_with_skip_images() {
624
681
  };
625
682
 
626
683
  let visitor = Rc::new(RefCell::new(SkipImageVisitor::default()));
627
- let result = convert_with_visitor(html, Some(options), Some(visitor))
628
- .expect("conversion with skip_images and visitor should succeed");
684
+ let result = convert(html, Some(options), Some(visitor))
685
+ .expect("conversion with skip_images and visitor should succeed")
686
+ .content
687
+ .unwrap_or_default();
629
688
 
630
689
  // When skip_images is true, images should not appear in output
631
690
  assert!(
@@ -650,7 +709,7 @@ fn test_visitor_with_skip_images() {
650
709
  /// Test that the main `convert()` function accepts optional visitor parameter
651
710
  #[test]
652
711
  fn test_convert_accepts_visitor_parameter() {
653
- use html_to_markdown_rs::convert_with_visitor;
712
+ use html_to_markdown_rs::convert;
654
713
 
655
714
  #[derive(Debug, Default)]
656
715
  struct CountingVisitor {
@@ -674,7 +733,7 @@ fn test_convert_accepts_visitor_parameter() {
674
733
  let visitor = Rc::new(RefCell::new(CountingVisitor::default()));
675
734
 
676
735
  // Test using the main convert() function with visitor parameter
677
- let _result = convert_with_visitor(html, None, Some(visitor.clone())).expect("convert with visitor should work");
736
+ let _result = convert(html, None, Some(visitor.clone())).expect("convert with visitor should work");
678
737
 
679
738
  let borrowed = visitor.borrow();
680
739
  assert!(
@@ -719,7 +778,10 @@ fn test_convert_with_inline_images_accepts_visitor() {
719
778
 
720
779
  // Verify visitor callbacks fire via convert_with_visitor
721
780
  let visitor = Rc::new(RefCell::new(ImageTrackingVisitor::default()));
722
- let markdown = convert_with_visitor(html, None, Some(visitor.clone())).expect("convert_with_visitor should work");
781
+ let markdown = convert(html, None, Some(visitor.clone()))
782
+ .expect("convert should work")
783
+ .content
784
+ .unwrap_or_default();
723
785
 
724
786
  assert_eq!(
725
787
  visitor.borrow().images_seen,
@@ -771,7 +833,10 @@ fn test_visitor_and_metadata_both_work() {
771
833
 
772
834
  // Verify visitor callbacks fire via convert_with_visitor
773
835
  let visitor = Rc::new(RefCell::new(MetadataAwareVisitor::default()));
774
- let markdown = convert_with_visitor(html, None, Some(visitor.clone())).expect("convert_with_visitor should work");
836
+ let markdown = convert(html, None, Some(visitor.clone()))
837
+ .expect("convert should work")
838
+ .content
839
+ .unwrap_or_default();
775
840
 
776
841
  let borrowed = visitor.borrow();
777
842
  assert!(
@@ -788,7 +853,7 @@ fn test_visitor_and_metadata_both_work() {
788
853
  drop(borrowed);
789
854
 
790
855
  // Verify metadata extraction via convert()
791
- let result = html_to_markdown_rs::convert(html, None).expect("convert should work");
856
+ let result = html_to_markdown_rs::convert(html, None, None).expect("convert should work");
792
857
  let metadata = result.metadata;
793
858
 
794
859
  assert_eq!(
@@ -856,7 +921,10 @@ fn test_convert_with_all_features_and_visitor() {
856
921
 
857
922
  // Verify visitor callbacks fire via convert_with_visitor
858
923
  let visitor = Rc::new(RefCell::new(ComprehensiveVisitor::default()));
859
- let markdown = convert_with_visitor(html, None, Some(visitor.clone())).expect("convert_with_visitor should work");
924
+ let markdown = convert(html, None, Some(visitor.clone()))
925
+ .expect("convert should work")
926
+ .content
927
+ .unwrap_or_default();
860
928
 
861
929
  // Verify all visitor callbacks were invoked
862
930
  let borrowed = visitor.borrow();
@@ -901,7 +969,7 @@ fn test_image_visitor_with_metadata_does_not_panic() {
901
969
  ..Default::default()
902
970
  };
903
971
 
904
- let result = convert_with_visitor(html, Some(options), Some(Rc::new(RefCell::new(ImageVisitor))));
972
+ let result = convert(html, Some(options), Some(Rc::new(RefCell::new(ImageVisitor))));
905
973
  assert!(result.is_ok(), "conversion panicked or errored: {:?}", result.err());
906
974
  }
907
975
 
@@ -927,10 +995,10 @@ fn test_element_end_replacement_with_metadata_preserves_subsequent_content() {
927
995
  ..Default::default()
928
996
  };
929
997
 
930
- let result = convert_with_visitor(html, Some(options), Some(Rc::new(RefCell::new(FigureReplacingVisitor))));
998
+ let result = convert(html, Some(options), Some(Rc::new(RefCell::new(FigureReplacingVisitor))));
931
999
  assert!(result.is_ok(), "conversion panicked or errored: {:?}", result.err());
932
1000
  assert!(
933
- result.unwrap().contains("after"),
1001
+ result.unwrap().content.unwrap_or_default().contains("after"),
934
1002
  "content after replaced element should not be lost"
935
1003
  );
936
1004
  }
@@ -4,7 +4,7 @@ fn convert(
4
4
  html: &str,
5
5
  opts: Option<html_to_markdown_rs::ConversionOptions>,
6
6
  ) -> html_to_markdown_rs::error::Result<String> {
7
- html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
7
+ html_to_markdown_rs::convert(html, opts, None).map(|r| r.content.unwrap_or_default())
8
8
  }
9
9
 
10
10
  use html_to_markdown_rs::ConversionOptions;
metadata CHANGED
@@ -1,69 +1,50 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.4
4
+ version: 3.4.0.pre.rc.13
5
5
  platform: ruby
6
6
  authors:
7
- - Na'aman Hirschfeld
7
+ - Kreuzberg Team
8
8
  autorequire:
9
- bindir: exe
9
+ bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-17 00:00:00.000000000 Z
11
+ date: 2026-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0.9'
20
- - - "<"
21
- - !ruby/object:Gem::Version
22
- version: '1.0'
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
- - - ">="
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0.9'
30
- - - "<"
31
- - !ruby/object:Gem::Version
32
- version: '1.0'
33
- description: |-
34
- html-to-markdown is a native Ruby extension built on the shared Rust engine that powers the html-to-markdown project.
35
- It delivers identical HTML-to-Markdown output across languages, exposes inline image extraction, and ships with a CLI for automation workflows.
27
+ description: High-performance HTML to Markdown converter
36
28
  email:
37
- - nhirschfeld@gmail.com
38
- executables:
39
- - html-to-markdown
29
+ executables: []
40
30
  extensions:
41
31
  - ext/html_to_markdown_rb/extconf.rb
42
- extra_rdoc_files:
43
- - README.md
32
+ extra_rdoc_files: []
44
33
  files:
45
- - ".bundle/config"
46
- - ".gitignore"
47
- - ".rubocop.yml"
48
- - Gemfile
49
- - Gemfile.lock
50
- - README.md
51
- - Rakefile
52
34
  - Steepfile
53
- - exe/html-to-markdown
54
35
  - ext/html_to_markdown_rb/Cargo.toml
55
36
  - ext/html_to_markdown_rb/extconf.rb
56
- - ext/html_to_markdown_rb/src/html_to_markdown_rs.rb
57
- - ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb
37
+ - ext/html_to_markdown_rb/native/Cargo.toml
38
+ - ext/html_to_markdown_rb/src/html-to-markdown.rb
39
+ - ext/html_to_markdown_rb/src/html-to-markdown/version.rb
58
40
  - ext/html_to_markdown_rb/src/lib.rs
59
- - html-to-markdown-rb.gemspec
41
+ - lib/bin/html-to-markdown
60
42
  - lib/html_to_markdown.rb
61
43
  - lib/html_to_markdown/version.rb
62
- - lib/html_to_markdown_rs.rb
63
- - sig/html_to_markdown.rbs
64
44
  - sig/html_to_markdown/cli.rbs
65
45
  - sig/html_to_markdown/cli_proxy.rbs
66
46
  - sig/open3.rbs
47
+ - sig/types.rbs
67
48
  - vendor/Cargo.toml
68
49
  - vendor/html-to-markdown-rs/Cargo.toml
69
50
  - vendor/html-to-markdown-rs/README.md
@@ -141,9 +122,7 @@ files:
141
122
  - vendor/html-to-markdown-rs/src/converter/semantic/mod.rs
142
123
  - vendor/html-to-markdown-rs/src/converter/semantic/sectioning.rs
143
124
  - vendor/html-to-markdown-rs/src/converter/semantic/summary.rs
144
- - vendor/html-to-markdown-rs/src/converter/text/escaping.rs
145
125
  - vendor/html-to-markdown-rs/src/converter/text/mod.rs
146
- - vendor/html-to-markdown-rs/src/converter/text/normalization.rs
147
126
  - vendor/html-to-markdown-rs/src/converter/text/processing.rs
148
127
  - vendor/html-to-markdown-rs/src/converter/text_node.rs
149
128
  - vendor/html-to-markdown-rs/src/converter/utility/attributes.rs
@@ -170,7 +149,6 @@ files:
170
149
  - vendor/html-to-markdown-rs/src/options/validation.rs
171
150
  - vendor/html-to-markdown-rs/src/prelude.rs
172
151
  - vendor/html-to-markdown-rs/src/rcdom.rs
173
- - vendor/html-to-markdown-rs/src/safety.rs
174
152
  - vendor/html-to-markdown-rs/src/text.rs
175
153
  - vendor/html-to-markdown-rs/src/types/document.rs
176
154
  - vendor/html-to-markdown-rs/src/types/mod.rs
@@ -196,6 +174,7 @@ files:
196
174
  - vendor/html-to-markdown-rs/tests/br_in_inline_test.rs
197
175
  - vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs
198
176
  - vendor/html-to-markdown-rs/tests/djot_output_test.rs
177
+ - vendor/html-to-markdown-rs/tests/exclude_selectors_test.rs
199
178
  - vendor/html-to-markdown-rs/tests/integration_test.rs
200
179
  - vendor/html-to-markdown-rs/tests/issue_121_regressions.rs
201
180
  - vendor/html-to-markdown-rs/tests/issue_127_regressions.rs
@@ -218,11 +197,14 @@ files:
218
197
  - vendor/html-to-markdown-rs/tests/plain_output_test.rs
219
198
  - vendor/html-to-markdown-rs/tests/preprocessing_tests.rs
220
199
  - vendor/html-to-markdown-rs/tests/reference_links_test.rs
200
+ - vendor/html-to-markdown-rs/tests/sectioning_elements_test.rs
221
201
  - vendor/html-to-markdown-rs/tests/skip_images_test.rs
222
202
  - vendor/html-to-markdown-rs/tests/tables_test.rs
223
203
  - vendor/html-to-markdown-rs/tests/test_custom_elements.rs
224
204
  - vendor/html-to-markdown-rs/tests/test_issue_187.rs
225
205
  - vendor/html-to-markdown-rs/tests/test_issue_218.rs
206
+ - vendor/html-to-markdown-rs/tests/test_issue_277.rs
207
+ - vendor/html-to-markdown-rs/tests/test_max_depth.rs
226
208
  - vendor/html-to-markdown-rs/tests/test_nested_simple.rs
227
209
  - vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs
228
210
  - vendor/html-to-markdown-rs/tests/test_spa_bisect.rs
@@ -233,12 +215,8 @@ homepage: https://github.com/kreuzberg-dev/html-to-markdown
233
215
  licenses:
234
216
  - MIT
235
217
  metadata:
218
+ keywords: html,markdown,converter
236
219
  rubygems_mfa_required: 'true'
237
- homepage_uri: https://github.com/kreuzberg-dev/html-to-markdown
238
- source_code_uri: https://github.com/kreuzberg-dev/html-to-markdown
239
- bug_tracker_uri: https://github.com/kreuzberg-dev/html-to-markdown/issues
240
- changelog_uri: https://github.com/kreuzberg-dev/html-to-markdown/releases
241
- documentation_uri: https://github.com/kreuzberg-dev/html-to-markdown/blob/main/packages/ruby/README.md
242
220
  post_install_message:
243
221
  rdoc_options: []
244
222
  require_paths:
@@ -247,7 +225,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
247
225
  requirements:
248
226
  - - ">="
249
227
  - !ruby/object:Gem::Version
250
- version: '3.2'
228
+ version: 3.2.0
251
229
  required_rubygems_version: !ruby/object:Gem::Requirement
252
230
  requirements:
253
231
  - - ">="
@@ -257,5 +235,5 @@ requirements: []
257
235
  rubygems_version: 3.5.22
258
236
  signing_key:
259
237
  specification_version: 4
260
- summary: Blazing-fast HTML to Markdown conversion for Ruby, powered by Rust.
238
+ summary: High-performance HTML to Markdown converter
261
239
  test_files: []
data/.bundle/config DELETED
@@ -1,2 +0,0 @@
1
- ---
2
- BUNDLE_PATH: "vendor/bundle"
data/.gitignore DELETED
@@ -1,3 +0,0 @@
1
- vendor/
2
- .cargo/
3
- rust-vendor/
data/.rubocop.yml DELETED
@@ -1,59 +0,0 @@
1
- plugins:
2
- - rubocop-performance
3
- - rubocop-rspec
4
-
5
- AllCops:
6
- TargetRubyVersion: 3.2
7
- NewCops: enable
8
- SuggestExtensions: false
9
- Exclude:
10
- - 'vendor/**/*'
11
- - 'tmp/**/*'
12
- - 'lib/**/*.bundle'
13
- - 'ext/**/*'
14
-
15
- Style/FrozenStringLiteralComment:
16
- Enabled: true
17
- EnforcedStyle: always
18
-
19
- Style/StringLiterals:
20
- Enabled: true
21
- EnforcedStyle: single_quotes
22
-
23
- Style/StringLiteralsInInterpolation:
24
- Enabled: true
25
- EnforcedStyle: single_quotes
26
-
27
- Style/Documentation:
28
- Enabled: false
29
-
30
- Layout/LineLength:
31
- Max: 120
32
- AllowedPatterns:
33
- - '\A\s*#'
34
- Exclude:
35
- - 'spec/**/*'
36
-
37
- Metrics/MethodLength:
38
- Max: 20
39
- Exclude:
40
- - 'spec/**/*'
41
-
42
- Metrics/BlockLength:
43
- Enabled: true
44
- Max: 350
45
- CountComments: false
46
-
47
- Metrics/AbcSize:
48
- Max: 20
49
- Exclude:
50
- - 'spec/**/*'
51
-
52
- RSpec/ExampleLength:
53
- Max: 50
54
-
55
- RSpec/MultipleExpectations:
56
- Max: 25
57
-
58
- RSpec/NestedGroups:
59
- Max: 6
data/Gemfile DELETED
@@ -1,18 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- source 'https://rubygems.org'
4
-
5
- ruby '>= 3.2'
6
-
7
- gemspec
8
-
9
- group :development, :test do
10
- gem 'rake-compiler'
11
- gem 'rbs', require: false
12
- gem 'rb_sys' # provides build tooling when developing locally
13
- gem 'rspec'
14
- gem 'rubocop', require: false
15
- gem 'rubocop-performance', require: false
16
- gem 'rubocop-rspec', require: false
17
- gem 'steep', require: false
18
- end