html-to-markdown 2.29.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +18 -41
  3. data/README.md +37 -50
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +17 -705
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
  6. data/ext/html-to-markdown-rb/native/README.md +4 -13
  7. data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
  8. data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
  9. data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
  10. data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
  11. data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
  12. data/lib/html_to_markdown/version.rb +1 -1
  13. data/lib/html_to_markdown.rb +13 -194
  14. data/sig/html_to_markdown.rbs +12 -373
  15. data/vendor/Cargo.toml +7 -4
  16. data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
  17. data/vendor/html-to-markdown-rs/README.md +127 -51
  18. data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
  19. data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
  20. data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
  21. data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
  22. data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
  23. data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
  24. data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
  25. data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
  26. data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
  27. data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
  28. data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
  29. data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
  30. data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
  31. data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
  32. data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
  33. data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
  34. data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
  35. data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
  36. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
  37. data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
  38. data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
  39. data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
  40. data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
  41. data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
  42. data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
  43. data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
  44. data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
  45. data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
  46. data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
  47. data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
  48. data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
  49. data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
  50. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
  51. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
  52. data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
  53. data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
  54. data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
  55. data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
  56. data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -67
  57. data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
  58. data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
  59. data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
  60. data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
  61. data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
  62. data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
  63. data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
  64. data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
  65. data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
  66. data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
  67. data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
  68. data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
  69. data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
  70. data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
  71. data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
  72. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
  73. data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
  74. data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
  75. data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
  76. data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
  77. data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
  78. data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
  79. data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
  80. data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
  81. data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
  82. data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -319
  83. data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
  84. data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
  85. data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
  86. data/vendor/html-to-markdown-rs/src/text.rs +25 -14
  87. data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
  88. data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
  89. data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
  90. data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
  91. data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
  92. data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
  93. data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
  94. data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
  95. data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
  96. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
  97. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
  98. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
  99. data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
  100. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
  101. data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
  102. data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
  103. data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
  104. data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
  105. data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
  106. data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
  107. data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
  108. data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
  109. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
  110. data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
  111. data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
  112. data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
  113. data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
  114. data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
  115. data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
  116. data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
  117. data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
  118. data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
  119. data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
  120. data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
  121. data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
  122. data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
  123. data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
  124. data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
  125. data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
  126. data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
  127. data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
  128. data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
  129. data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
  130. data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
  131. metadata +9 -37
  132. data/bin/benchmark.rb +0 -232
  133. data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
  134. data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
  135. data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
  136. data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
  137. data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
  138. data/spec/convert_spec.rb +0 -77
  139. data/spec/convert_with_tables_spec.rb +0 -194
  140. data/spec/metadata_extraction_spec.rb +0 -437
  141. data/spec/visitor_issue_187_spec.rb +0 -605
  142. data/spec/visitor_spec.rb +0 -1149
  143. data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
  144. data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
  145. data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
  146. data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
  147. data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
  148. data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
  149. data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
  150. data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
  151. data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
  152. data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -31
  153. data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
  154. data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
  155. data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
  156. data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
  157. data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
  158. data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
  159. data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
  160. data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
  161. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
  162. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
  163. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
  164. data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
  165. data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
  166. data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
@@ -24,9 +24,6 @@ use std::rc::Rc;
24
24
  use crate::error::{ConversionError, Result};
25
25
  use crate::visitor::{HtmlVisitor, NodeContext, NodeType, VisitResult};
26
26
 
27
- #[cfg(feature = "async-visitor")]
28
- use crate::visitor::AsyncHtmlVisitor;
29
-
30
27
  /// Build a `NodeContext` from current parsing state.
31
28
  ///
32
29
  /// Creates a complete `NodeContext` suitable for passing to visitor callbacks.
@@ -245,92 +242,7 @@ impl VisitorDispatch {
245
242
  }
246
243
  }
247
244
 
248
- /// Type alias for an async visitor handle (Arc-wrapped `Mutex` for interior mutability).
249
- ///
250
- /// This allows async visitors to be passed around and shared while still being mutable.
251
- /// Uses Arc<Mutex<>> instead of Rc<RefCell<>> to enable Send across thread boundaries.
252
- /// The + Send + 'static bounds allow the visitor to be moved to other threads.
253
- #[cfg(feature = "async-visitor")]
254
- pub type AsyncVisitorHandle = std::sync::Arc<tokio::sync::Mutex<dyn AsyncHtmlVisitor + Send + 'static>>;
255
-
256
- /// Dispatch an async visitor callback and handle the result.
257
- ///
258
- /// This is the async version of `dispatch_visitor`, supporting async visitor implementations.
259
- /// It safely handles the optional visitor, calls the callback function, and translates the
260
- /// `VisitResult` into concrete control flow decisions.
261
- ///
262
- /// # Type Parameters
263
- ///
264
- /// - `F`: Async visitor callback function type
265
- ///
266
- /// # Parameters
267
- ///
268
- /// - `visitor`: Optional async visitor (wrapped in Rc<`RefCell`<>>)
269
- /// - `callback`: Async closure that invokes the appropriate async visitor method
270
- ///
271
- /// # Returns
272
- ///
273
- /// - `Ok(VisitorDispatch::Custom(String))`: Custom markdown output from `VisitResult::Custom`
274
- /// - `Ok(VisitorDispatch::Continue)`: Continue with default behavior (`VisitResult::Continue`)
275
- /// - `Err(ConversionError)`: Stop conversion with error (`VisitResult::Error`)
276
- ///
277
- /// # Errors
278
- ///
279
- /// - If the visitor returns `VisitResult::Error`, this is converted to `Error::Visitor`
280
- /// - `RefCell` borrow failures panic (should never happen with correct usage)
281
- ///
282
- /// # Performance
283
- ///
284
- /// - Zero-cost when visitor is None (common case)
285
- /// - Single dynamic dispatch when visitor is present
286
- /// - No allocations except for error messages
287
- ///
288
- /// # Examples
289
- ///
290
- /// ```ignore
291
- /// let result = dispatch_async_visitor(
292
- /// &visitor,
293
- /// |v| Box::pin(v.visit_heading(&ctx, level, text, id)),
294
- /// ).await?;
295
- ///
296
- /// match result {
297
- /// VisitorDispatch::Custom(output) => return Ok(output),
298
- /// VisitorDispatch::Continue => { /* proceed with default conversion */ }
299
- /// _ => {}
300
- /// }
301
- /// ```
302
- #[cfg(feature = "async-visitor")]
303
- #[allow(dead_code, clippy::future_not_send)]
304
- #[inline]
305
- pub async fn dispatch_async_visitor<F, Fut>(
306
- visitor: &Option<Rc<RefCell<dyn AsyncHtmlVisitor>>>,
307
- callback: F,
308
- ) -> Result<VisitorDispatch>
309
- where
310
- F: FnOnce(&mut dyn AsyncHtmlVisitor) -> Fut,
311
- Fut: std::future::Future<Output = VisitResult>,
312
- {
313
- let Some(visitor_rc) = visitor else {
314
- return Ok(VisitorDispatch::Continue);
315
- };
316
-
317
- let future = {
318
- let mut visitor_ref = visitor_rc.borrow_mut();
319
- callback(&mut *visitor_ref)
320
- };
321
-
322
- let result = future.await;
323
-
324
- match result {
325
- VisitResult::Continue => Ok(VisitorDispatch::Continue),
326
- VisitResult::Custom(output) => Ok(VisitorDispatch::Custom(output)),
327
- VisitResult::Skip => Ok(VisitorDispatch::Skip),
328
- VisitResult::PreserveHtml => Ok(VisitorDispatch::PreserveHtml),
329
- VisitResult::Error(msg) => Err(ConversionError::Visitor(msg)),
330
- }
331
- }
332
-
333
- /// Macro to reduce boilerplate when calling async visitor methods.
245
+ /// Macro to reduce boilerplate when calling visitor methods.
334
246
  ///
335
247
  /// This macro wraps the common pattern of:
336
248
  /// 1. Check if visitor is present
@@ -450,762 +362,6 @@ macro_rules! try_visitor_element_end {
450
362
  }};
451
363
  }
452
364
 
453
- /// Macro to reduce boilerplate when calling async visitor methods.
454
- ///
455
- /// This is the async version of `try_visitor!` macro. It wraps the common pattern of:
456
- /// 1. Check if visitor is present
457
- /// 2. Call async visitor method (awaiting the result)
458
- /// 3. Handle early return for Custom/Skip/PreserveHtml/Error
459
- /// 4. Continue with default behavior if visitor returns Continue
460
- ///
461
- /// # Syntax
462
- ///
463
- /// ```ignore
464
- /// try_async_visitor!(visitor_option, method_name, ctx, arg1, arg2, ...).await?;
465
- /// ```
466
- ///
467
- /// # Returns
468
- ///
469
- /// - Returns early with custom output if visitor returns Custom/Skip/PreserveHtml
470
- /// - Returns early with Err if visitor returns Error
471
- /// - Continues execution if visitor returns Continue or is None
472
- ///
473
- /// # Examples
474
- ///
475
- /// ```ignore
476
- /// // Before (verbose):
477
- /// let dispatch = dispatch_async_visitor(&visitor, |v| {
478
- /// Box::pin(v.visit_heading(&ctx, level, text, id))
479
- /// }).await?;
480
- /// match dispatch {
481
- /// VisitorDispatch::Custom(output) => return Ok(output),
482
- /// VisitorDispatch::Skip => return Ok(String::new()),
483
- /// VisitorDispatch::PreserveHtml => return Ok(preserve_html_output),
484
- /// VisitorDispatch::Continue => { /* proceed */ }
485
- /// }
486
- ///
487
- /// // After (concise):
488
- /// try_async_visitor!(visitor, visit_heading, &ctx, level, text, id).await?;
489
- /// // Default conversion logic continues here...
490
- /// ```
491
- #[cfg(feature = "async-visitor")]
492
- #[macro_export]
493
- macro_rules! try_async_visitor {
494
- ($visitor:expr, $method:ident, $ctx:expr $(, $arg:expr)*) => {{
495
- let dispatch = $crate::visitor_helpers::dispatch_async_visitor(
496
- $visitor,
497
- |v| Box::pin(v.$method($ctx $(, $arg)*)),
498
- ).await?;
499
-
500
- match dispatch {
501
- $crate::visitor_helpers::VisitorDispatch::Continue => {
502
- }
503
- $crate::visitor_helpers::VisitorDispatch::Custom(output) => {
504
- return Ok(output);
505
- }
506
- $crate::visitor_helpers::VisitorDispatch::Skip => {
507
- return Ok(String::new());
508
- }
509
- $crate::visitor_helpers::VisitorDispatch::PreserveHtml => {
510
- // TODO: Implement HTML preservation logic
511
- }
512
- }
513
- }};
514
- }
515
-
516
- /// Convenience macro for async `element_start` visitor calls with early return.
517
- ///
518
- /// This is the async version of `try_visitor_element_start!` macro.
519
- /// It handles the common pattern of calling `visit_element_start` at the beginning
520
- /// of element processing.
521
- ///
522
- /// # Syntax
523
- ///
524
- /// ```ignore
525
- /// try_async_visitor_element_start!(visitor_option, ctx).await?;
526
- /// ```
527
- ///
528
- /// # Examples
529
- ///
530
- /// ```ignore
531
- /// async fn process_heading(...) -> Result<String> {
532
- /// let ctx = build_node_context(...);
533
- /// try_async_visitor_element_start!(visitor, &ctx).await?;
534
- ///
535
- /// // Default heading processing continues here...
536
- /// }
537
- /// ```
538
- #[cfg(feature = "async-visitor")]
539
- #[macro_export]
540
- macro_rules! try_async_visitor_element_start {
541
- ($visitor:expr, $ctx:expr) => {{
542
- $crate::try_async_visitor!($visitor, visit_element_start, $ctx);
543
- }};
544
- }
545
-
546
- /// Convenience macro for async `element_end` visitor calls with output inspection.
547
- ///
548
- /// This is the async version of `try_visitor_element_end!` macro.
549
- /// It handles the common pattern of calling `visit_element_end` after generating
550
- /// default markdown output.
551
- ///
552
- /// # Syntax
553
- ///
554
- /// ```ignore
555
- /// try_async_visitor_element_end!(visitor_option, ctx, default_output_string).await?;
556
- /// ```
557
- ///
558
- /// # Examples
559
- ///
560
- /// ```ignore
561
- /// async fn process_heading(...) -> Result<String> {
562
- /// let ctx = build_node_context(...);
563
- /// let mut output = String::from("# Heading");
564
- ///
565
- /// try_async_visitor_element_end!(visitor, &ctx, &output).await?;
566
- /// Ok(output)
567
- /// }
568
- /// ```
569
- #[cfg(feature = "async-visitor")]
570
- #[macro_export]
571
- macro_rules! try_async_visitor_element_end {
572
- ($visitor:expr, $ctx:expr, $output:expr) => {{
573
- $crate::try_async_visitor!($visitor, visit_element_end, $ctx, $output);
574
- }};
575
- }
576
-
577
- /// Bridge that wraps an async visitor and implements the sync `HtmlVisitor` trait.
578
- ///
579
- /// This bridge uses a channel-based approach to avoid blocking:
580
- /// 1. Sync converter sends visitor call request through channel
581
- /// 2. Async runtime receives request and awaits JS callback
582
- /// 3. Result sent back through response channel
583
- /// 4. Sync converter receives result and continues
584
- ///
585
- /// This approach avoids deadlock by never blocking on async operations.
586
- /// The response_rx is wrapped in a Mutex to provide interior mutability,
587
- /// avoiding the need for external RefCell wrapping that causes borrow conflicts.
588
- #[cfg(feature = "async-visitor")]
589
- pub struct AsyncToSyncVisitorBridge {
590
- async_visitor: AsyncVisitorHandle,
591
- // Using tokio::sync::mpsc for async communication (request) and std::sync::mpsc for sync (response)
592
- request_tx: tokio::sync::mpsc::UnboundedSender<VisitorRequest>,
593
- // Wrapped in Mutex for interior mutability - allows recv() without &mut self
594
- response_rx: std::sync::Mutex<std::sync::mpsc::Receiver<crate::visitor::VisitResult>>,
595
- }
596
-
597
- #[cfg(feature = "async-visitor")]
598
- enum VisitorRequest {
599
- ElementStart(crate::visitor::NodeContext),
600
- ElementEnd(crate::visitor::NodeContext, String),
601
- Text(crate::visitor::NodeContext, String),
602
- Link(crate::visitor::NodeContext, String, String, Option<String>),
603
- Image(crate::visitor::NodeContext, String, String, Option<String>),
604
- Heading(crate::visitor::NodeContext, u32, String, Option<String>),
605
- CodeBlock(crate::visitor::NodeContext, Option<String>, String),
606
- CodeInline(crate::visitor::NodeContext, String),
607
- ListItem(crate::visitor::NodeContext, bool, String, String),
608
- ListStart(crate::visitor::NodeContext, bool),
609
- ListEnd(crate::visitor::NodeContext, bool, String),
610
- TableStart(crate::visitor::NodeContext),
611
- TableRow(crate::visitor::NodeContext, Vec<String>, bool),
612
- TableEnd(crate::visitor::NodeContext, String),
613
- Blockquote(crate::visitor::NodeContext, String, usize),
614
- Strong(crate::visitor::NodeContext, String),
615
- Emphasis(crate::visitor::NodeContext, String),
616
- Strikethrough(crate::visitor::NodeContext, String),
617
- Underline(crate::visitor::NodeContext, String),
618
- Subscript(crate::visitor::NodeContext, String),
619
- Superscript(crate::visitor::NodeContext, String),
620
- Mark(crate::visitor::NodeContext, String),
621
- LineBreak(crate::visitor::NodeContext),
622
- HorizontalRule(crate::visitor::NodeContext),
623
- CustomElement(crate::visitor::NodeContext, String, String),
624
- }
625
-
626
- #[cfg(feature = "async-visitor")]
627
- impl std::fmt::Debug for AsyncToSyncVisitorBridge {
628
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
629
- f.debug_struct("AsyncToSyncVisitorBridge")
630
- .field("async_visitor", &self.async_visitor)
631
- .finish_non_exhaustive()
632
- }
633
- }
634
-
635
- #[cfg(feature = "async-visitor")]
636
- impl AsyncToSyncVisitorBridge {
637
- /// Create a new async-to-sync visitor bridge with channel-based communication.
638
- pub fn new(async_visitor: AsyncVisitorHandle) -> Self {
639
- // Use tokio::sync::mpsc for async channels (not std::sync::mpsc which blocks)
640
- let (request_tx, mut request_rx) = tokio::sync::mpsc::unbounded_channel();
641
- let (response_tx, response_rx) = std::sync::mpsc::channel();
642
- let response_rx = std::sync::Mutex::new(response_rx);
643
-
644
- // Spawn async task to handle visitor requests
645
- let visitor_clone = async_visitor.clone();
646
- tokio::spawn(async move {
647
- while let Some(request) = request_rx.recv().await {
648
- let result = match request {
649
- VisitorRequest::ElementStart(ctx) => {
650
- let mut visitor = visitor_clone.lock().await;
651
- visitor.visit_element_start(&ctx).await
652
- }
653
- VisitorRequest::ElementEnd(ctx, output) => {
654
- let mut visitor = visitor_clone.lock().await;
655
- visitor.visit_element_end(&ctx, &output).await
656
- }
657
- VisitorRequest::Text(ctx, text) => {
658
- let mut visitor = visitor_clone.lock().await;
659
- visitor.visit_text(&ctx, &text).await
660
- }
661
- VisitorRequest::Link(ctx, href, text, title) => {
662
- let mut visitor = visitor_clone.lock().await;
663
- visitor.visit_link(&ctx, &href, &text, title.as_deref()).await
664
- }
665
- VisitorRequest::Image(ctx, src, alt, title) => {
666
- let mut visitor = visitor_clone.lock().await;
667
- visitor.visit_image(&ctx, &src, &alt, title.as_deref()).await
668
- }
669
- VisitorRequest::Heading(ctx, level, text, id) => {
670
- let mut visitor = visitor_clone.lock().await;
671
- visitor.visit_heading(&ctx, level, &text, id.as_deref()).await
672
- }
673
- VisitorRequest::CodeBlock(ctx, lang, code) => {
674
- let mut visitor = visitor_clone.lock().await;
675
- visitor.visit_code_block(&ctx, lang.as_deref(), &code).await
676
- }
677
- VisitorRequest::CodeInline(ctx, code) => {
678
- let mut visitor = visitor_clone.lock().await;
679
- visitor.visit_code_inline(&ctx, &code).await
680
- }
681
- VisitorRequest::ListItem(ctx, ordered, marker, text) => {
682
- let mut visitor = visitor_clone.lock().await;
683
- visitor.visit_list_item(&ctx, ordered, &marker, &text).await
684
- }
685
- VisitorRequest::ListStart(ctx, ordered) => {
686
- let mut visitor = visitor_clone.lock().await;
687
- visitor.visit_list_start(&ctx, ordered).await
688
- }
689
- VisitorRequest::ListEnd(ctx, ordered, output) => {
690
- let mut visitor = visitor_clone.lock().await;
691
- visitor.visit_list_end(&ctx, ordered, &output).await
692
- }
693
- VisitorRequest::TableStart(ctx) => {
694
- let mut visitor = visitor_clone.lock().await;
695
- visitor.visit_table_start(&ctx).await
696
- }
697
- VisitorRequest::TableRow(ctx, cells, is_header) => {
698
- let mut visitor = visitor_clone.lock().await;
699
- visitor.visit_table_row(&ctx, &cells, is_header).await
700
- }
701
- VisitorRequest::TableEnd(ctx, output) => {
702
- let mut visitor = visitor_clone.lock().await;
703
- visitor.visit_table_end(&ctx, &output).await
704
- }
705
- VisitorRequest::Blockquote(ctx, content, depth) => {
706
- let mut visitor = visitor_clone.lock().await;
707
- visitor.visit_blockquote(&ctx, &content, depth).await
708
- }
709
- VisitorRequest::Strong(ctx, text) => {
710
- let mut visitor = visitor_clone.lock().await;
711
- visitor.visit_strong(&ctx, &text).await
712
- }
713
- VisitorRequest::Emphasis(ctx, text) => {
714
- let mut visitor = visitor_clone.lock().await;
715
- visitor.visit_emphasis(&ctx, &text).await
716
- }
717
- VisitorRequest::Strikethrough(ctx, text) => {
718
- let mut visitor = visitor_clone.lock().await;
719
- visitor.visit_strikethrough(&ctx, &text).await
720
- }
721
- VisitorRequest::Underline(ctx, text) => {
722
- let mut visitor = visitor_clone.lock().await;
723
- visitor.visit_underline(&ctx, &text).await
724
- }
725
- VisitorRequest::Subscript(ctx, text) => {
726
- let mut visitor = visitor_clone.lock().await;
727
- visitor.visit_subscript(&ctx, &text).await
728
- }
729
- VisitorRequest::Superscript(ctx, text) => {
730
- let mut visitor = visitor_clone.lock().await;
731
- visitor.visit_superscript(&ctx, &text).await
732
- }
733
- VisitorRequest::Mark(ctx, text) => {
734
- let mut visitor = visitor_clone.lock().await;
735
- visitor.visit_mark(&ctx, &text).await
736
- }
737
- VisitorRequest::LineBreak(ctx) => {
738
- let mut visitor = visitor_clone.lock().await;
739
- visitor.visit_line_break(&ctx).await
740
- }
741
- VisitorRequest::HorizontalRule(ctx) => {
742
- let mut visitor = visitor_clone.lock().await;
743
- visitor.visit_horizontal_rule(&ctx).await
744
- }
745
- VisitorRequest::CustomElement(ctx, tag_name, html) => {
746
- let mut visitor = visitor_clone.lock().await;
747
- visitor.visit_custom_element(&ctx, &tag_name, &html).await
748
- }
749
- };
750
- let _ = response_tx.send(result);
751
- }
752
- });
753
-
754
- Self {
755
- async_visitor,
756
- request_tx,
757
- response_rx,
758
- }
759
- }
760
- }
761
-
762
- #[cfg(feature = "async-visitor")]
763
- impl crate::visitor::HtmlVisitor for AsyncToSyncVisitorBridge {
764
- fn visit_element_start(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
765
- // Send request through channel
766
- if self.request_tx.send(VisitorRequest::ElementStart(ctx.clone())).is_err() {
767
- return crate::visitor::VisitResult::Continue;
768
- }
769
- // Wait for response
770
- self.response_rx
771
- .lock()
772
- .unwrap()
773
- .recv()
774
- .unwrap_or(crate::visitor::VisitResult::Continue)
775
- }
776
-
777
- fn visit_element_end(&mut self, ctx: &crate::visitor::NodeContext, output: &str) -> crate::visitor::VisitResult {
778
- // Send request through channel
779
- if self
780
- .request_tx
781
- .send(VisitorRequest::ElementEnd(ctx.clone(), output.to_string()))
782
- .is_err()
783
- {
784
- return crate::visitor::VisitResult::Continue;
785
- }
786
- // Wait for response
787
- self.response_rx
788
- .lock()
789
- .unwrap()
790
- .recv()
791
- .unwrap_or(crate::visitor::VisitResult::Continue)
792
- }
793
-
794
- fn visit_text(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
795
- if self
796
- .request_tx
797
- .send(VisitorRequest::Text(ctx.clone(), text.to_string()))
798
- .is_err()
799
- {
800
- return crate::visitor::VisitResult::Continue;
801
- }
802
- self.response_rx
803
- .lock()
804
- .unwrap()
805
- .recv()
806
- .unwrap_or(crate::visitor::VisitResult::Continue)
807
- }
808
-
809
- fn visit_link(
810
- &mut self,
811
- ctx: &crate::visitor::NodeContext,
812
- href: &str,
813
- text: &str,
814
- title: Option<&str>,
815
- ) -> crate::visitor::VisitResult {
816
- if self
817
- .request_tx
818
- .send(VisitorRequest::Link(
819
- ctx.clone(),
820
- href.to_string(),
821
- text.to_string(),
822
- title.map(std::string::ToString::to_string),
823
- ))
824
- .is_err()
825
- {
826
- return crate::visitor::VisitResult::Continue;
827
- }
828
- self.response_rx
829
- .lock()
830
- .unwrap()
831
- .recv()
832
- .unwrap_or(crate::visitor::VisitResult::Continue)
833
- }
834
-
835
- fn visit_image(
836
- &mut self,
837
- ctx: &crate::visitor::NodeContext,
838
- src: &str,
839
- alt: &str,
840
- title: Option<&str>,
841
- ) -> crate::visitor::VisitResult {
842
- if self
843
- .request_tx
844
- .send(VisitorRequest::Image(
845
- ctx.clone(),
846
- src.to_string(),
847
- alt.to_string(),
848
- title.map(std::string::ToString::to_string),
849
- ))
850
- .is_err()
851
- {
852
- return crate::visitor::VisitResult::Continue;
853
- }
854
- self.response_rx
855
- .lock()
856
- .unwrap()
857
- .recv()
858
- .unwrap_or(crate::visitor::VisitResult::Continue)
859
- }
860
-
861
- fn visit_heading(
862
- &mut self,
863
- ctx: &crate::visitor::NodeContext,
864
- level: u32,
865
- text: &str,
866
- id: Option<&str>,
867
- ) -> crate::visitor::VisitResult {
868
- if self
869
- .request_tx
870
- .send(VisitorRequest::Heading(
871
- ctx.clone(),
872
- level,
873
- text.to_string(),
874
- id.map(std::string::ToString::to_string),
875
- ))
876
- .is_err()
877
- {
878
- return crate::visitor::VisitResult::Continue;
879
- }
880
- self.response_rx
881
- .lock()
882
- .unwrap()
883
- .recv()
884
- .unwrap_or(crate::visitor::VisitResult::Continue)
885
- }
886
-
887
- fn visit_code_block(
888
- &mut self,
889
- ctx: &crate::visitor::NodeContext,
890
- language: Option<&str>,
891
- code: &str,
892
- ) -> crate::visitor::VisitResult {
893
- if self
894
- .request_tx
895
- .send(VisitorRequest::CodeBlock(
896
- ctx.clone(),
897
- language.map(std::string::ToString::to_string),
898
- code.to_string(),
899
- ))
900
- .is_err()
901
- {
902
- return crate::visitor::VisitResult::Continue;
903
- }
904
- self.response_rx
905
- .lock()
906
- .unwrap()
907
- .recv()
908
- .unwrap_or(crate::visitor::VisitResult::Continue)
909
- }
910
-
911
- fn visit_code_inline(&mut self, ctx: &crate::visitor::NodeContext, code: &str) -> crate::visitor::VisitResult {
912
- if self
913
- .request_tx
914
- .send(VisitorRequest::CodeInline(ctx.clone(), code.to_string()))
915
- .is_err()
916
- {
917
- return crate::visitor::VisitResult::Continue;
918
- }
919
- self.response_rx
920
- .lock()
921
- .unwrap()
922
- .recv()
923
- .unwrap_or(crate::visitor::VisitResult::Continue)
924
- }
925
-
926
- fn visit_list_item(
927
- &mut self,
928
- ctx: &crate::visitor::NodeContext,
929
- ordered: bool,
930
- marker: &str,
931
- text: &str,
932
- ) -> crate::visitor::VisitResult {
933
- if self
934
- .request_tx
935
- .send(VisitorRequest::ListItem(
936
- ctx.clone(),
937
- ordered,
938
- marker.to_string(),
939
- text.to_string(),
940
- ))
941
- .is_err()
942
- {
943
- return crate::visitor::VisitResult::Continue;
944
- }
945
- self.response_rx
946
- .lock()
947
- .unwrap()
948
- .recv()
949
- .unwrap_or(crate::visitor::VisitResult::Continue)
950
- }
951
-
952
- fn visit_list_start(&mut self, ctx: &crate::visitor::NodeContext, ordered: bool) -> crate::visitor::VisitResult {
953
- if self
954
- .request_tx
955
- .send(VisitorRequest::ListStart(ctx.clone(), ordered))
956
- .is_err()
957
- {
958
- return crate::visitor::VisitResult::Continue;
959
- }
960
- self.response_rx
961
- .lock()
962
- .unwrap()
963
- .recv()
964
- .unwrap_or(crate::visitor::VisitResult::Continue)
965
- }
966
-
967
- fn visit_list_end(
968
- &mut self,
969
- ctx: &crate::visitor::NodeContext,
970
- ordered: bool,
971
- output: &str,
972
- ) -> crate::visitor::VisitResult {
973
- if self
974
- .request_tx
975
- .send(VisitorRequest::ListEnd(ctx.clone(), ordered, output.to_string()))
976
- .is_err()
977
- {
978
- return crate::visitor::VisitResult::Continue;
979
- }
980
- self.response_rx
981
- .lock()
982
- .unwrap()
983
- .recv()
984
- .unwrap_or(crate::visitor::VisitResult::Continue)
985
- }
986
-
987
- fn visit_table_start(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
988
- if self.request_tx.send(VisitorRequest::TableStart(ctx.clone())).is_err() {
989
- return crate::visitor::VisitResult::Continue;
990
- }
991
- self.response_rx
992
- .lock()
993
- .unwrap()
994
- .recv()
995
- .unwrap_or(crate::visitor::VisitResult::Continue)
996
- }
997
-
998
- fn visit_table_row(
999
- &mut self,
1000
- ctx: &crate::visitor::NodeContext,
1001
- cells: &[String],
1002
- is_header: bool,
1003
- ) -> crate::visitor::VisitResult {
1004
- if self
1005
- .request_tx
1006
- .send(VisitorRequest::TableRow(ctx.clone(), cells.to_vec(), is_header))
1007
- .is_err()
1008
- {
1009
- return crate::visitor::VisitResult::Continue;
1010
- }
1011
- self.response_rx
1012
- .lock()
1013
- .unwrap()
1014
- .recv()
1015
- .unwrap_or(crate::visitor::VisitResult::Continue)
1016
- }
1017
-
1018
- fn visit_table_end(&mut self, ctx: &crate::visitor::NodeContext, output: &str) -> crate::visitor::VisitResult {
1019
- if self
1020
- .request_tx
1021
- .send(VisitorRequest::TableEnd(ctx.clone(), output.to_string()))
1022
- .is_err()
1023
- {
1024
- return crate::visitor::VisitResult::Continue;
1025
- }
1026
- self.response_rx
1027
- .lock()
1028
- .unwrap()
1029
- .recv()
1030
- .unwrap_or(crate::visitor::VisitResult::Continue)
1031
- }
1032
-
1033
- fn visit_blockquote(
1034
- &mut self,
1035
- ctx: &crate::visitor::NodeContext,
1036
- content: &str,
1037
- depth: usize,
1038
- ) -> crate::visitor::VisitResult {
1039
- if self
1040
- .request_tx
1041
- .send(VisitorRequest::Blockquote(ctx.clone(), content.to_string(), depth))
1042
- .is_err()
1043
- {
1044
- return crate::visitor::VisitResult::Continue;
1045
- }
1046
- self.response_rx
1047
- .lock()
1048
- .unwrap()
1049
- .recv()
1050
- .unwrap_or(crate::visitor::VisitResult::Continue)
1051
- }
1052
-
1053
- fn visit_strong(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
1054
- if self
1055
- .request_tx
1056
- .send(VisitorRequest::Strong(ctx.clone(), text.to_string()))
1057
- .is_err()
1058
- {
1059
- return crate::visitor::VisitResult::Continue;
1060
- }
1061
- self.response_rx
1062
- .lock()
1063
- .unwrap()
1064
- .recv()
1065
- .unwrap_or(crate::visitor::VisitResult::Continue)
1066
- }
1067
-
1068
- fn visit_emphasis(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
1069
- if self
1070
- .request_tx
1071
- .send(VisitorRequest::Emphasis(ctx.clone(), text.to_string()))
1072
- .is_err()
1073
- {
1074
- return crate::visitor::VisitResult::Continue;
1075
- }
1076
- self.response_rx
1077
- .lock()
1078
- .unwrap()
1079
- .recv()
1080
- .unwrap_or(crate::visitor::VisitResult::Continue)
1081
- }
1082
-
1083
- fn visit_strikethrough(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
1084
- if self
1085
- .request_tx
1086
- .send(VisitorRequest::Strikethrough(ctx.clone(), text.to_string()))
1087
- .is_err()
1088
- {
1089
- return crate::visitor::VisitResult::Continue;
1090
- }
1091
- self.response_rx
1092
- .lock()
1093
- .unwrap()
1094
- .recv()
1095
- .unwrap_or(crate::visitor::VisitResult::Continue)
1096
- }
1097
-
1098
- fn visit_underline(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
1099
- if self
1100
- .request_tx
1101
- .send(VisitorRequest::Underline(ctx.clone(), text.to_string()))
1102
- .is_err()
1103
- {
1104
- return crate::visitor::VisitResult::Continue;
1105
- }
1106
- self.response_rx
1107
- .lock()
1108
- .unwrap()
1109
- .recv()
1110
- .unwrap_or(crate::visitor::VisitResult::Continue)
1111
- }
1112
-
1113
- fn visit_subscript(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
1114
- if self
1115
- .request_tx
1116
- .send(VisitorRequest::Subscript(ctx.clone(), text.to_string()))
1117
- .is_err()
1118
- {
1119
- return crate::visitor::VisitResult::Continue;
1120
- }
1121
- self.response_rx
1122
- .lock()
1123
- .unwrap()
1124
- .recv()
1125
- .unwrap_or(crate::visitor::VisitResult::Continue)
1126
- }
1127
-
1128
- fn visit_superscript(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
1129
- if self
1130
- .request_tx
1131
- .send(VisitorRequest::Superscript(ctx.clone(), text.to_string()))
1132
- .is_err()
1133
- {
1134
- return crate::visitor::VisitResult::Continue;
1135
- }
1136
- self.response_rx
1137
- .lock()
1138
- .unwrap()
1139
- .recv()
1140
- .unwrap_or(crate::visitor::VisitResult::Continue)
1141
- }
1142
-
1143
- fn visit_line_break(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
1144
- if self.request_tx.send(VisitorRequest::LineBreak(ctx.clone())).is_err() {
1145
- return crate::visitor::VisitResult::Continue;
1146
- }
1147
- self.response_rx
1148
- .lock()
1149
- .unwrap()
1150
- .recv()
1151
- .unwrap_or(crate::visitor::VisitResult::Continue)
1152
- }
1153
-
1154
- fn visit_mark(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
1155
- if self
1156
- .request_tx
1157
- .send(VisitorRequest::Mark(ctx.clone(), text.to_string()))
1158
- .is_err()
1159
- {
1160
- return crate::visitor::VisitResult::Continue;
1161
- }
1162
- self.response_rx
1163
- .lock()
1164
- .unwrap()
1165
- .recv()
1166
- .unwrap_or(crate::visitor::VisitResult::Continue)
1167
- }
1168
-
1169
- fn visit_horizontal_rule(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
1170
- if self
1171
- .request_tx
1172
- .send(VisitorRequest::HorizontalRule(ctx.clone()))
1173
- .is_err()
1174
- {
1175
- return crate::visitor::VisitResult::Continue;
1176
- }
1177
- self.response_rx
1178
- .lock()
1179
- .unwrap()
1180
- .recv()
1181
- .unwrap_or(crate::visitor::VisitResult::Continue)
1182
- }
1183
-
1184
- fn visit_custom_element(
1185
- &mut self,
1186
- ctx: &crate::visitor::NodeContext,
1187
- tag_name: &str,
1188
- html: &str,
1189
- ) -> crate::visitor::VisitResult {
1190
- if self
1191
- .request_tx
1192
- .send(VisitorRequest::CustomElement(
1193
- ctx.clone(),
1194
- tag_name.to_string(),
1195
- html.to_string(),
1196
- ))
1197
- .is_err()
1198
- {
1199
- return crate::visitor::VisitResult::Continue;
1200
- }
1201
- self.response_rx
1202
- .lock()
1203
- .unwrap()
1204
- .recv()
1205
- .unwrap_or(crate::visitor::VisitResult::Continue)
1206
- }
1207
- }
1208
-
1209
365
  #[cfg(test)]
1210
366
  mod tests {
1211
367
  use super::*;