html-to-markdown 2.30.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +4 -14
  3. data/README.md +37 -50
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +13 -701
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
  6. data/ext/html-to-markdown-rb/native/README.md +4 -13
  7. data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
  8. data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
  9. data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
  10. data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
  11. data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
  12. data/lib/html_to_markdown/version.rb +1 -1
  13. data/lib/html_to_markdown.rb +13 -194
  14. data/sig/html_to_markdown.rbs +12 -373
  15. data/vendor/Cargo.toml +5 -2
  16. data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
  17. data/vendor/html-to-markdown-rs/README.md +126 -52
  18. data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
  19. data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
  20. data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
  21. data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
  22. data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
  23. data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
  24. data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
  25. data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
  26. data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
  27. data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
  28. data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
  29. data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
  30. data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
  31. data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
  32. data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
  33. data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
  34. data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
  35. data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
  36. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
  37. data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
  38. data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
  39. data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
  40. data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
  41. data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
  42. data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
  43. data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
  44. data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
  45. data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
  46. data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
  47. data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
  48. data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
  49. data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
  50. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
  51. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
  52. data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
  53. data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
  54. data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
  55. data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
  56. data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -68
  57. data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
  58. data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
  59. data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
  60. data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
  61. data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
  62. data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
  63. data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
  64. data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
  65. data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
  66. data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
  67. data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
  68. data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
  69. data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
  70. data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
  71. data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
  72. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
  73. data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
  74. data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
  75. data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
  76. data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
  77. data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
  78. data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
  79. data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
  80. data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
  81. data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
  82. data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -323
  83. data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
  84. data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
  85. data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
  86. data/vendor/html-to-markdown-rs/src/text.rs +25 -14
  87. data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
  88. data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
  89. data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
  90. data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
  91. data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
  92. data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
  93. data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
  94. data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
  95. data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
  96. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
  97. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
  98. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
  99. data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
  100. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
  101. data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
  102. data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
  103. data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
  104. data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
  105. data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
  106. data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
  107. data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
  108. data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
  109. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
  110. data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
  111. data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
  112. data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
  113. data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
  114. data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
  115. data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
  116. data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
  117. data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
  118. data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
  119. data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
  120. data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
  121. data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
  122. data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
  123. data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
  124. data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
  125. data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
  126. data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
  127. data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
  128. data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
  129. data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
  130. data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
  131. metadata +9 -37
  132. data/bin/benchmark.rb +0 -232
  133. data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
  134. data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
  135. data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
  136. data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
  137. data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
  138. data/spec/convert_spec.rb +0 -77
  139. data/spec/convert_with_tables_spec.rb +0 -194
  140. data/spec/metadata_extraction_spec.rb +0 -437
  141. data/spec/visitor_issue_187_spec.rb +0 -605
  142. data/spec/visitor_spec.rb +0 -1149
  143. data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
  144. data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
  145. data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
  146. data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
  147. data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
  148. data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
  149. data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
  150. data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
  151. data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
  152. data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -42
  153. data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
  154. data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
  155. data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
  156. data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
  157. data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
  158. data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
  159. data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
  160. data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
  161. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
  162. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
  163. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
  164. data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
  165. data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
  166. data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
@@ -1,249 +0,0 @@
1
- //! Asynchronous visitor trait for HTML to Markdown conversion.
2
- //!
3
- //! This module contains the `AsyncHtmlVisitor` trait for async/await based visitation.
4
-
5
- #[cfg(feature = "async-visitor")]
6
- use async_trait::async_trait;
7
-
8
- use super::types::{NodeContext, VisitResult};
9
-
10
- /// Async visitor trait for HTML→Markdown conversion.
11
- ///
12
- /// This trait is identical to `HtmlVisitor` but all methods are async. Use this for languages
13
- /// with native async/await support:
14
- /// - Python (with `async def` and `asyncio`)
15
- /// - TypeScript/JavaScript (with `Promise`-based callbacks)
16
- /// - Elixir (with message-passing processes)
17
- ///
18
- /// For synchronous languages (Ruby, PHP, Go, Java, C#), use the sync `HtmlVisitor` trait.
19
- ///
20
- /// # Example (Python-like)
21
- ///
22
- /// ```ignore
23
- /// use html_to_markdown_rs::visitor::{AsyncHtmlVisitor, NodeContext, VisitResult};
24
- ///
25
- /// struct CustomAsyncVisitor;
26
- ///
27
- /// #[async_trait::async_trait]
28
- /// impl AsyncHtmlVisitor for CustomAsyncVisitor {
29
- /// async fn visit_link(
30
- /// &mut self,
31
- /// ctx: &NodeContext,
32
- /// href: &str,
33
- /// text: &str,
34
- /// title: Option<&str>,
35
- /// ) -> VisitResult {
36
- /// // Can await async operations here
37
- /// VisitResult::Custom(format!("{} ({})", text, href))
38
- /// }
39
- /// }
40
- /// ```
41
- #[cfg(feature = "async-visitor")]
42
- #[async_trait]
43
- pub trait AsyncHtmlVisitor: std::fmt::Debug + Send + Sync {
44
- /// Called before entering any element (async version).
45
- async fn visit_element_start(&mut self, _ctx: &NodeContext) -> VisitResult {
46
- VisitResult::Continue
47
- }
48
-
49
- /// Called after exiting any element (async version).
50
- async fn visit_element_end(&mut self, _ctx: &NodeContext, _output: &str) -> VisitResult {
51
- VisitResult::Continue
52
- }
53
-
54
- /// Visit text nodes (async version - most frequent callback - ~100+ per document).
55
- async fn visit_text(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
56
- VisitResult::Continue
57
- }
58
-
59
- /// Visit anchor links `<a href="...">` (async version).
60
- async fn visit_link(&mut self, _ctx: &NodeContext, _href: &str, _text: &str, _title: Option<&str>) -> VisitResult {
61
- VisitResult::Continue
62
- }
63
-
64
- /// Visit images `<img src="...">` (async version).
65
- async fn visit_image(&mut self, _ctx: &NodeContext, _src: &str, _alt: &str, _title: Option<&str>) -> VisitResult {
66
- VisitResult::Continue
67
- }
68
-
69
- /// Visit heading elements `<h1>` through `<h6>` (async version).
70
- async fn visit_heading(&mut self, _ctx: &NodeContext, _level: u32, _text: &str, _id: Option<&str>) -> VisitResult {
71
- VisitResult::Continue
72
- }
73
-
74
- /// Visit code blocks `<pre><code>` (async version).
75
- async fn visit_code_block(&mut self, _ctx: &NodeContext, _lang: Option<&str>, _code: &str) -> VisitResult {
76
- VisitResult::Continue
77
- }
78
-
79
- /// Visit inline code `<code>` (async version).
80
- async fn visit_code_inline(&mut self, _ctx: &NodeContext, _code: &str) -> VisitResult {
81
- VisitResult::Continue
82
- }
83
-
84
- /// Visit list items `<li>` (async version).
85
- async fn visit_list_item(&mut self, _ctx: &NodeContext, _ordered: bool, _marker: &str, _text: &str) -> VisitResult {
86
- VisitResult::Continue
87
- }
88
-
89
- /// Called before processing a list `<ul>` or `<ol>` (async version).
90
- async fn visit_list_start(&mut self, _ctx: &NodeContext, _ordered: bool) -> VisitResult {
91
- VisitResult::Continue
92
- }
93
-
94
- /// Called after processing a list `</ul>` or `</ol>` (async version).
95
- async fn visit_list_end(&mut self, _ctx: &NodeContext, _ordered: bool, _output: &str) -> VisitResult {
96
- VisitResult::Continue
97
- }
98
-
99
- /// Called before processing a table `<table>` (async version).
100
- async fn visit_table_start(&mut self, _ctx: &NodeContext) -> VisitResult {
101
- VisitResult::Continue
102
- }
103
-
104
- /// Visit table rows `<tr>` (async version).
105
- async fn visit_table_row(&mut self, _ctx: &NodeContext, _cells: &[String], _is_header: bool) -> VisitResult {
106
- VisitResult::Continue
107
- }
108
-
109
- /// Called after processing a table `</table>` (async version).
110
- async fn visit_table_end(&mut self, _ctx: &NodeContext, _output: &str) -> VisitResult {
111
- VisitResult::Continue
112
- }
113
-
114
- /// Visit blockquote elements `<blockquote>` (async version).
115
- async fn visit_blockquote(&mut self, _ctx: &NodeContext, _content: &str, _depth: usize) -> VisitResult {
116
- VisitResult::Continue
117
- }
118
-
119
- /// Visit strong/bold elements `<strong>`, `<b>` (async version).
120
- async fn visit_strong(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
121
- VisitResult::Continue
122
- }
123
-
124
- /// Visit emphasis/italic elements `<em>`, `<i>` (async version).
125
- async fn visit_emphasis(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
126
- VisitResult::Continue
127
- }
128
-
129
- /// Visit strikethrough elements `<s>`, `<del>`, `<strike>` (async version).
130
- async fn visit_strikethrough(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
131
- VisitResult::Continue
132
- }
133
-
134
- /// Visit underline elements `<u>`, `<ins>` (async version).
135
- async fn visit_underline(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
136
- VisitResult::Continue
137
- }
138
-
139
- /// Visit subscript elements `<sub>` (async version).
140
- async fn visit_subscript(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
141
- VisitResult::Continue
142
- }
143
-
144
- /// Visit superscript elements `<sup>` (async version).
145
- async fn visit_superscript(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
146
- VisitResult::Continue
147
- }
148
-
149
- /// Visit mark/highlight elements `<mark>` (async version).
150
- async fn visit_mark(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
151
- VisitResult::Continue
152
- }
153
-
154
- /// Visit line break elements `<br>` (async version).
155
- async fn visit_line_break(&mut self, _ctx: &NodeContext) -> VisitResult {
156
- VisitResult::Continue
157
- }
158
-
159
- /// Visit horizontal rule elements `<hr>` (async version).
160
- async fn visit_horizontal_rule(&mut self, _ctx: &NodeContext) -> VisitResult {
161
- VisitResult::Continue
162
- }
163
-
164
- /// Visit custom elements (web components) or unknown tags (async version).
165
- async fn visit_custom_element(&mut self, _ctx: &NodeContext, _tag_name: &str, _html: &str) -> VisitResult {
166
- VisitResult::Continue
167
- }
168
-
169
- /// Visit definition list `<dl>` (async version).
170
- async fn visit_definition_list_start(&mut self, _ctx: &NodeContext) -> VisitResult {
171
- VisitResult::Continue
172
- }
173
-
174
- /// Visit definition term `<dt>` (async version).
175
- async fn visit_definition_term(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
176
- VisitResult::Continue
177
- }
178
-
179
- /// Visit definition description `<dd>` (async version).
180
- async fn visit_definition_description(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
181
- VisitResult::Continue
182
- }
183
-
184
- /// Called after processing a definition list `</dl>` (async version).
185
- async fn visit_definition_list_end(&mut self, _ctx: &NodeContext, _output: &str) -> VisitResult {
186
- VisitResult::Continue
187
- }
188
-
189
- /// Visit form elements `<form>` (async version).
190
- async fn visit_form(&mut self, _ctx: &NodeContext, _action: Option<&str>, _method: Option<&str>) -> VisitResult {
191
- VisitResult::Continue
192
- }
193
-
194
- /// Visit input elements `<input>` (async version).
195
- async fn visit_input(
196
- &mut self,
197
- _ctx: &NodeContext,
198
- _input_type: &str,
199
- _name: Option<&str>,
200
- _value: Option<&str>,
201
- ) -> VisitResult {
202
- VisitResult::Continue
203
- }
204
-
205
- /// Visit button elements `<button>` (async version).
206
- async fn visit_button(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
207
- VisitResult::Continue
208
- }
209
-
210
- /// Visit audio elements `<audio>` (async version).
211
- async fn visit_audio(&mut self, _ctx: &NodeContext, _src: Option<&str>) -> VisitResult {
212
- VisitResult::Continue
213
- }
214
-
215
- /// Visit video elements `<video>` (async version).
216
- async fn visit_video(&mut self, _ctx: &NodeContext, _src: Option<&str>) -> VisitResult {
217
- VisitResult::Continue
218
- }
219
-
220
- /// Visit iframe elements `<iframe>` (async version).
221
- async fn visit_iframe(&mut self, _ctx: &NodeContext, _src: Option<&str>) -> VisitResult {
222
- VisitResult::Continue
223
- }
224
-
225
- /// Visit details elements `<details>` (async version).
226
- async fn visit_details(&mut self, _ctx: &NodeContext, _open: bool) -> VisitResult {
227
- VisitResult::Continue
228
- }
229
-
230
- /// Visit summary elements `<summary>` (async version).
231
- async fn visit_summary(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
232
- VisitResult::Continue
233
- }
234
-
235
- /// Visit figure elements `<figure>` (async version).
236
- async fn visit_figure_start(&mut self, _ctx: &NodeContext) -> VisitResult {
237
- VisitResult::Continue
238
- }
239
-
240
- /// Visit figcaption elements `<figcaption>` (async version).
241
- async fn visit_figcaption(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
242
- VisitResult::Continue
243
- }
244
-
245
- /// Called after processing a figure `</figure>` (async version).
246
- async fn visit_figure_end(&mut self, _ctx: &NodeContext, _output: &str) -> VisitResult {
247
- VisitResult::Continue
248
- }
249
- }
@@ -1,189 +0,0 @@
1
- //! Async-to-sync visitor bridge for integrating async visitors with synchronous converters.
2
- //!
3
- //! This module provides the `AsyncToSyncVisitorBridge` struct that wraps an async visitor
4
- //! and implements the sync `HtmlVisitor` trait using channel-based communication.
5
-
6
- #[cfg(feature = "async-visitor")]
7
- use super::AsyncVisitorHandle;
8
-
9
- /// Request types for visitor method calls over the channel.
10
- #[cfg(feature = "async-visitor")]
11
- pub(super) enum VisitorRequest {
12
- ElementStart(crate::visitor::NodeContext),
13
- ElementEnd(crate::visitor::NodeContext, String),
14
- Text(crate::visitor::NodeContext, String),
15
- Link(crate::visitor::NodeContext, String, String, Option<String>),
16
- Image(crate::visitor::NodeContext, String, String, Option<String>),
17
- Heading(crate::visitor::NodeContext, u32, String, Option<String>),
18
- CodeBlock(crate::visitor::NodeContext, Option<String>, String),
19
- CodeInline(crate::visitor::NodeContext, String),
20
- ListItem(crate::visitor::NodeContext, bool, String, String),
21
- ListStart(crate::visitor::NodeContext, bool),
22
- ListEnd(crate::visitor::NodeContext, bool, String),
23
- TableStart(crate::visitor::NodeContext),
24
- TableRow(crate::visitor::NodeContext, Vec<String>, bool),
25
- TableEnd(crate::visitor::NodeContext, String),
26
- Blockquote(crate::visitor::NodeContext, String, usize),
27
- Strong(crate::visitor::NodeContext, String),
28
- Emphasis(crate::visitor::NodeContext, String),
29
- Strikethrough(crate::visitor::NodeContext, String),
30
- Underline(crate::visitor::NodeContext, String),
31
- Subscript(crate::visitor::NodeContext, String),
32
- Superscript(crate::visitor::NodeContext, String),
33
- Mark(crate::visitor::NodeContext, String),
34
- LineBreak(crate::visitor::NodeContext),
35
- HorizontalRule(crate::visitor::NodeContext),
36
- CustomElement(crate::visitor::NodeContext, String, String),
37
- }
38
-
39
- /// Bridge that wraps an async visitor and implements the sync `HtmlVisitor` trait.
40
- ///
41
- /// This bridge uses a channel-based approach to avoid blocking:
42
- /// 1. Sync converter sends visitor call request through channel
43
- /// 2. Async runtime receives request and awaits JS callback
44
- /// 3. Result sent back through response channel
45
- /// 4. Sync converter receives result and continues
46
- ///
47
- /// This approach avoids deadlock by never blocking on async operations.
48
- #[cfg(feature = "async-visitor")]
49
- pub struct AsyncToSyncVisitorBridge {
50
- #[allow(dead_code)]
51
- pub(super) async_visitor: AsyncVisitorHandle,
52
- pub(super) request_tx: tokio::sync::mpsc::UnboundedSender<VisitorRequest>,
53
- pub(super) response_rx: std::sync::mpsc::Receiver<crate::visitor::VisitResult>,
54
- }
55
-
56
- #[cfg(feature = "async-visitor")]
57
- impl std::fmt::Debug for AsyncToSyncVisitorBridge {
58
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59
- f.debug_struct("AsyncToSyncVisitorBridge")
60
- .field("async_visitor", &self.async_visitor)
61
- .finish_non_exhaustive()
62
- }
63
- }
64
-
65
- #[cfg(feature = "async-visitor")]
66
- impl AsyncToSyncVisitorBridge {
67
- /// Create a new async-to-sync visitor bridge with channel-based communication.
68
- pub fn new(async_visitor: AsyncVisitorHandle) -> Self {
69
- // Use tokio::sync::mpsc for async channels (not std::sync::mpsc which blocks)
70
- let (request_tx, mut request_rx) = tokio::sync::mpsc::unbounded_channel();
71
- let (response_tx, response_rx) = std::sync::mpsc::channel();
72
-
73
- // Spawn async task to handle visitor requests
74
- let visitor_clone = async_visitor.clone();
75
- tokio::spawn(async move {
76
- while let Some(request) = request_rx.recv().await {
77
- let result = match request {
78
- VisitorRequest::ElementStart(ctx) => {
79
- let mut visitor = visitor_clone.lock().await;
80
- visitor.visit_element_start(&ctx).await
81
- }
82
- VisitorRequest::ElementEnd(ctx, output) => {
83
- let mut visitor = visitor_clone.lock().await;
84
- visitor.visit_element_end(&ctx, &output).await
85
- }
86
- VisitorRequest::Text(ctx, text) => {
87
- let mut visitor = visitor_clone.lock().await;
88
- visitor.visit_text(&ctx, &text).await
89
- }
90
- VisitorRequest::Link(ctx, href, text, title) => {
91
- let mut visitor = visitor_clone.lock().await;
92
- visitor.visit_link(&ctx, &href, &text, title.as_deref()).await
93
- }
94
- VisitorRequest::Image(ctx, src, alt, title) => {
95
- let mut visitor = visitor_clone.lock().await;
96
- visitor.visit_image(&ctx, &src, &alt, title.as_deref()).await
97
- }
98
- VisitorRequest::Heading(ctx, level, text, id) => {
99
- let mut visitor = visitor_clone.lock().await;
100
- visitor.visit_heading(&ctx, level, &text, id.as_deref()).await
101
- }
102
- VisitorRequest::CodeBlock(ctx, lang, code) => {
103
- let mut visitor = visitor_clone.lock().await;
104
- visitor.visit_code_block(&ctx, lang.as_deref(), &code).await
105
- }
106
- VisitorRequest::CodeInline(ctx, code) => {
107
- let mut visitor = visitor_clone.lock().await;
108
- visitor.visit_code_inline(&ctx, &code).await
109
- }
110
- VisitorRequest::ListItem(ctx, ordered, marker, text) => {
111
- let mut visitor = visitor_clone.lock().await;
112
- visitor.visit_list_item(&ctx, ordered, &marker, &text).await
113
- }
114
- VisitorRequest::ListStart(ctx, ordered) => {
115
- let mut visitor = visitor_clone.lock().await;
116
- visitor.visit_list_start(&ctx, ordered).await
117
- }
118
- VisitorRequest::ListEnd(ctx, ordered, output) => {
119
- let mut visitor = visitor_clone.lock().await;
120
- visitor.visit_list_end(&ctx, ordered, &output).await
121
- }
122
- VisitorRequest::TableStart(ctx) => {
123
- let mut visitor = visitor_clone.lock().await;
124
- visitor.visit_table_start(&ctx).await
125
- }
126
- VisitorRequest::TableRow(ctx, cells, is_header) => {
127
- let mut visitor = visitor_clone.lock().await;
128
- visitor.visit_table_row(&ctx, &cells, is_header).await
129
- }
130
- VisitorRequest::TableEnd(ctx, output) => {
131
- let mut visitor = visitor_clone.lock().await;
132
- visitor.visit_table_end(&ctx, &output).await
133
- }
134
- VisitorRequest::Blockquote(ctx, content, depth) => {
135
- let mut visitor = visitor_clone.lock().await;
136
- visitor.visit_blockquote(&ctx, &content, depth).await
137
- }
138
- VisitorRequest::Strong(ctx, text) => {
139
- let mut visitor = visitor_clone.lock().await;
140
- visitor.visit_strong(&ctx, &text).await
141
- }
142
- VisitorRequest::Emphasis(ctx, text) => {
143
- let mut visitor = visitor_clone.lock().await;
144
- visitor.visit_emphasis(&ctx, &text).await
145
- }
146
- VisitorRequest::Strikethrough(ctx, text) => {
147
- let mut visitor = visitor_clone.lock().await;
148
- visitor.visit_strikethrough(&ctx, &text).await
149
- }
150
- VisitorRequest::Underline(ctx, text) => {
151
- let mut visitor = visitor_clone.lock().await;
152
- visitor.visit_underline(&ctx, &text).await
153
- }
154
- VisitorRequest::Subscript(ctx, text) => {
155
- let mut visitor = visitor_clone.lock().await;
156
- visitor.visit_subscript(&ctx, &text).await
157
- }
158
- VisitorRequest::Superscript(ctx, text) => {
159
- let mut visitor = visitor_clone.lock().await;
160
- visitor.visit_superscript(&ctx, &text).await
161
- }
162
- VisitorRequest::Mark(ctx, text) => {
163
- let mut visitor = visitor_clone.lock().await;
164
- visitor.visit_mark(&ctx, &text).await
165
- }
166
- VisitorRequest::LineBreak(ctx) => {
167
- let mut visitor = visitor_clone.lock().await;
168
- visitor.visit_line_break(&ctx).await
169
- }
170
- VisitorRequest::HorizontalRule(ctx) => {
171
- let mut visitor = visitor_clone.lock().await;
172
- visitor.visit_horizontal_rule(&ctx).await
173
- }
174
- VisitorRequest::CustomElement(ctx, tag_name, html) => {
175
- let mut visitor = visitor_clone.lock().await;
176
- visitor.visit_custom_element(&ctx, &tag_name, &html).await
177
- }
178
- };
179
- let _ = response_tx.send(result);
180
- }
181
- });
182
-
183
- Self {
184
- async_visitor,
185
- request_tx,
186
- response_rx,
187
- }
188
- }
189
- }