html-to-markdown 2.30.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +4 -14
  3. data/README.md +37 -50
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +13 -701
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
  6. data/ext/html-to-markdown-rb/native/README.md +4 -13
  7. data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
  8. data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
  9. data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
  10. data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
  11. data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
  12. data/lib/html_to_markdown/version.rb +1 -1
  13. data/lib/html_to_markdown.rb +13 -194
  14. data/sig/html_to_markdown.rbs +12 -373
  15. data/vendor/Cargo.toml +5 -2
  16. data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
  17. data/vendor/html-to-markdown-rs/README.md +126 -52
  18. data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
  19. data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
  20. data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
  21. data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
  22. data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
  23. data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
  24. data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
  25. data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
  26. data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
  27. data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
  28. data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
  29. data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
  30. data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
  31. data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
  32. data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
  33. data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
  34. data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
  35. data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
  36. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
  37. data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
  38. data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
  39. data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
  40. data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
  41. data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
  42. data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
  43. data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
  44. data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
  45. data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
  46. data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
  47. data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
  48. data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
  49. data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
  50. data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
  51. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
  52. data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
  53. data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
  54. data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
  55. data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
  56. data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -68
  57. data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
  58. data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
  59. data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
  60. data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
  61. data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
  62. data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
  63. data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
  64. data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
  65. data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
  66. data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
  67. data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
  68. data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
  69. data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
  70. data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
  71. data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
  72. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
  73. data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
  74. data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
  75. data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
  76. data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
  77. data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
  78. data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
  79. data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
  80. data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
  81. data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
  82. data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -323
  83. data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
  84. data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
  85. data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
  86. data/vendor/html-to-markdown-rs/src/text.rs +25 -14
  87. data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
  88. data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
  89. data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
  90. data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
  91. data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
  92. data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
  93. data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
  94. data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
  95. data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
  96. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
  97. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
  98. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
  99. data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
  100. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
  101. data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
  102. data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
  103. data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
  104. data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
  105. data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
  106. data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
  107. data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
  108. data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
  109. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
  110. data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
  111. data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
  112. data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
  113. data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
  114. data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
  115. data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
  116. data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
  117. data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
  118. data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
  119. data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
  120. data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
  121. data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
  122. data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
  123. data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
  124. data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
  125. data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
  126. data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
  127. data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
  128. data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
  129. data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
  130. data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
  131. metadata +9 -37
  132. data/bin/benchmark.rb +0 -232
  133. data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
  134. data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
  135. data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
  136. data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
  137. data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
  138. data/spec/convert_spec.rb +0 -77
  139. data/spec/convert_with_tables_spec.rb +0 -194
  140. data/spec/metadata_extraction_spec.rb +0 -437
  141. data/spec/visitor_issue_187_spec.rb +0 -605
  142. data/spec/visitor_spec.rb +0 -1149
  143. data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
  144. data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
  145. data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
  146. data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
  147. data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
  148. data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
  149. data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
  150. data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
  151. data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
  152. data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -42
  153. data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
  154. data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
  155. data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
  156. data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
  157. data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
  158. data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
  159. data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
  160. data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
  161. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
  162. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
  163. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
  164. data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
  165. data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
  166. data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
@@ -1,343 +0,0 @@
1
- //! HtmlVisitor trait implementation for AsyncToSyncVisitorBridge.
2
- //!
3
- //! This module implements the sync `HtmlVisitor` trait for the async-to-sync bridge,
4
- //! translating each method call into a channel request/response pair.
5
-
6
- #[cfg(feature = "async-visitor")]
7
- use super::bridge::{AsyncToSyncVisitorBridge, VisitorRequest};
8
-
9
- #[cfg(feature = "async-visitor")]
10
- impl crate::visitor::HtmlVisitor for AsyncToSyncVisitorBridge {
11
- fn visit_element_start(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
12
- if self.request_tx.send(VisitorRequest::ElementStart(ctx.clone())).is_err() {
13
- return crate::visitor::VisitResult::Continue;
14
- }
15
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
16
- }
17
-
18
- fn visit_element_end(&mut self, ctx: &crate::visitor::NodeContext, output: &str) -> crate::visitor::VisitResult {
19
- if self
20
- .request_tx
21
- .send(VisitorRequest::ElementEnd(ctx.clone(), output.to_string()))
22
- .is_err()
23
- {
24
- return crate::visitor::VisitResult::Continue;
25
- }
26
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
27
- }
28
-
29
- fn visit_text(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
30
- if self
31
- .request_tx
32
- .send(VisitorRequest::Text(ctx.clone(), text.to_string()))
33
- .is_err()
34
- {
35
- return crate::visitor::VisitResult::Continue;
36
- }
37
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
38
- }
39
-
40
- fn visit_link(
41
- &mut self,
42
- ctx: &crate::visitor::NodeContext,
43
- href: &str,
44
- text: &str,
45
- title: Option<&str>,
46
- ) -> crate::visitor::VisitResult {
47
- if self
48
- .request_tx
49
- .send(VisitorRequest::Link(
50
- ctx.clone(),
51
- href.to_string(),
52
- text.to_string(),
53
- title.map(std::string::ToString::to_string),
54
- ))
55
- .is_err()
56
- {
57
- return crate::visitor::VisitResult::Continue;
58
- }
59
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
60
- }
61
-
62
- fn visit_image(
63
- &mut self,
64
- ctx: &crate::visitor::NodeContext,
65
- src: &str,
66
- alt: &str,
67
- title: Option<&str>,
68
- ) -> crate::visitor::VisitResult {
69
- if self
70
- .request_tx
71
- .send(VisitorRequest::Image(
72
- ctx.clone(),
73
- src.to_string(),
74
- alt.to_string(),
75
- title.map(std::string::ToString::to_string),
76
- ))
77
- .is_err()
78
- {
79
- return crate::visitor::VisitResult::Continue;
80
- }
81
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
82
- }
83
-
84
- fn visit_heading(
85
- &mut self,
86
- ctx: &crate::visitor::NodeContext,
87
- level: u32,
88
- text: &str,
89
- id: Option<&str>,
90
- ) -> crate::visitor::VisitResult {
91
- if self
92
- .request_tx
93
- .send(VisitorRequest::Heading(
94
- ctx.clone(),
95
- level,
96
- text.to_string(),
97
- id.map(std::string::ToString::to_string),
98
- ))
99
- .is_err()
100
- {
101
- return crate::visitor::VisitResult::Continue;
102
- }
103
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
104
- }
105
-
106
- fn visit_code_block(
107
- &mut self,
108
- ctx: &crate::visitor::NodeContext,
109
- language: Option<&str>,
110
- code: &str,
111
- ) -> crate::visitor::VisitResult {
112
- if self
113
- .request_tx
114
- .send(VisitorRequest::CodeBlock(
115
- ctx.clone(),
116
- language.map(std::string::ToString::to_string),
117
- code.to_string(),
118
- ))
119
- .is_err()
120
- {
121
- return crate::visitor::VisitResult::Continue;
122
- }
123
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
124
- }
125
-
126
- fn visit_code_inline(&mut self, ctx: &crate::visitor::NodeContext, code: &str) -> crate::visitor::VisitResult {
127
- if self
128
- .request_tx
129
- .send(VisitorRequest::CodeInline(ctx.clone(), code.to_string()))
130
- .is_err()
131
- {
132
- return crate::visitor::VisitResult::Continue;
133
- }
134
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
135
- }
136
-
137
- fn visit_list_item(
138
- &mut self,
139
- ctx: &crate::visitor::NodeContext,
140
- ordered: bool,
141
- marker: &str,
142
- text: &str,
143
- ) -> crate::visitor::VisitResult {
144
- if self
145
- .request_tx
146
- .send(VisitorRequest::ListItem(
147
- ctx.clone(),
148
- ordered,
149
- marker.to_string(),
150
- text.to_string(),
151
- ))
152
- .is_err()
153
- {
154
- return crate::visitor::VisitResult::Continue;
155
- }
156
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
157
- }
158
-
159
- fn visit_list_start(&mut self, ctx: &crate::visitor::NodeContext, ordered: bool) -> crate::visitor::VisitResult {
160
- if self
161
- .request_tx
162
- .send(VisitorRequest::ListStart(ctx.clone(), ordered))
163
- .is_err()
164
- {
165
- return crate::visitor::VisitResult::Continue;
166
- }
167
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
168
- }
169
-
170
- fn visit_list_end(
171
- &mut self,
172
- ctx: &crate::visitor::NodeContext,
173
- ordered: bool,
174
- output: &str,
175
- ) -> crate::visitor::VisitResult {
176
- if self
177
- .request_tx
178
- .send(VisitorRequest::ListEnd(ctx.clone(), ordered, output.to_string()))
179
- .is_err()
180
- {
181
- return crate::visitor::VisitResult::Continue;
182
- }
183
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
184
- }
185
-
186
- fn visit_table_start(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
187
- if self.request_tx.send(VisitorRequest::TableStart(ctx.clone())).is_err() {
188
- return crate::visitor::VisitResult::Continue;
189
- }
190
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
191
- }
192
-
193
- fn visit_table_row(
194
- &mut self,
195
- ctx: &crate::visitor::NodeContext,
196
- cells: &[String],
197
- is_header: bool,
198
- ) -> crate::visitor::VisitResult {
199
- if self
200
- .request_tx
201
- .send(VisitorRequest::TableRow(ctx.clone(), cells.to_vec(), is_header))
202
- .is_err()
203
- {
204
- return crate::visitor::VisitResult::Continue;
205
- }
206
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
207
- }
208
-
209
- fn visit_table_end(&mut self, ctx: &crate::visitor::NodeContext, output: &str) -> crate::visitor::VisitResult {
210
- if self
211
- .request_tx
212
- .send(VisitorRequest::TableEnd(ctx.clone(), output.to_string()))
213
- .is_err()
214
- {
215
- return crate::visitor::VisitResult::Continue;
216
- }
217
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
218
- }
219
-
220
- fn visit_blockquote(
221
- &mut self,
222
- ctx: &crate::visitor::NodeContext,
223
- content: &str,
224
- depth: usize,
225
- ) -> crate::visitor::VisitResult {
226
- if self
227
- .request_tx
228
- .send(VisitorRequest::Blockquote(ctx.clone(), content.to_string(), depth))
229
- .is_err()
230
- {
231
- return crate::visitor::VisitResult::Continue;
232
- }
233
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
234
- }
235
-
236
- fn visit_strong(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
237
- if self
238
- .request_tx
239
- .send(VisitorRequest::Strong(ctx.clone(), text.to_string()))
240
- .is_err()
241
- {
242
- return crate::visitor::VisitResult::Continue;
243
- }
244
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
245
- }
246
-
247
- fn visit_emphasis(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
248
- if self
249
- .request_tx
250
- .send(VisitorRequest::Emphasis(ctx.clone(), text.to_string()))
251
- .is_err()
252
- {
253
- return crate::visitor::VisitResult::Continue;
254
- }
255
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
256
- }
257
-
258
- fn visit_strikethrough(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
259
- if self
260
- .request_tx
261
- .send(VisitorRequest::Strikethrough(ctx.clone(), text.to_string()))
262
- .is_err()
263
- {
264
- return crate::visitor::VisitResult::Continue;
265
- }
266
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
267
- }
268
-
269
- fn visit_underline(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
270
- if self
271
- .request_tx
272
- .send(VisitorRequest::Underline(ctx.clone(), text.to_string()))
273
- .is_err()
274
- {
275
- return crate::visitor::VisitResult::Continue;
276
- }
277
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
278
- }
279
-
280
- fn visit_subscript(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
281
- if self
282
- .request_tx
283
- .send(VisitorRequest::Subscript(ctx.clone(), text.to_string()))
284
- .is_err()
285
- {
286
- return crate::visitor::VisitResult::Continue;
287
- }
288
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
289
- }
290
-
291
- fn visit_superscript(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
292
- if self
293
- .request_tx
294
- .send(VisitorRequest::Superscript(ctx.clone(), text.to_string()))
295
- .is_err()
296
- {
297
- return crate::visitor::VisitResult::Continue;
298
- }
299
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
300
- }
301
-
302
- fn visit_mark(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
303
- if self
304
- .request_tx
305
- .send(VisitorRequest::Mark(ctx.clone(), text.to_string()))
306
- .is_err()
307
- {
308
- return crate::visitor::VisitResult::Continue;
309
- }
310
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
311
- }
312
-
313
- fn visit_horizontal_rule(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
314
- if self
315
- .request_tx
316
- .send(VisitorRequest::HorizontalRule(ctx.clone()))
317
- .is_err()
318
- {
319
- return crate::visitor::VisitResult::Continue;
320
- }
321
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
322
- }
323
-
324
- fn visit_custom_element(
325
- &mut self,
326
- ctx: &crate::visitor::NodeContext,
327
- tag_name: &str,
328
- html: &str,
329
- ) -> crate::visitor::VisitResult {
330
- if self
331
- .request_tx
332
- .send(VisitorRequest::CustomElement(
333
- ctx.clone(),
334
- tag_name.to_string(),
335
- html.to_string(),
336
- ))
337
- .is_err()
338
- {
339
- return crate::visitor::VisitResult::Continue;
340
- }
341
- self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
342
- }
343
- }
@@ -1,217 +0,0 @@
1
- //! Async visitor dispatch function and helper macros.
2
- //!
3
- //! This module provides:
4
- //! - `dispatch_async_visitor` function for async visitor callback dispatch
5
- //! - `try_async_visitor!` macro for common visitor patterns
6
- //! - `try_async_visitor_element_start!` macro for element start callbacks
7
- //! - `try_async_visitor_element_end!` macro for element end callbacks
8
-
9
- #[cfg(feature = "async-visitor")]
10
- use super::super::content::VisitorDispatch;
11
- #[cfg(feature = "async-visitor")]
12
- use super::AsyncVisitorHandle;
13
- #[cfg(feature = "async-visitor")]
14
- use crate::error::{ConversionError, Result};
15
- #[cfg(feature = "async-visitor")]
16
- use crate::visitor::AsyncHtmlVisitor;
17
-
18
- /// Dispatch an async visitor callback and handle the result.
19
- ///
20
- /// This is the async version of `dispatch_visitor`, supporting async visitor implementations.
21
- /// It safely handles the optional visitor, calls the callback function, and translates the
22
- /// `VisitResult` into concrete control flow decisions.
23
- ///
24
- /// # Type Parameters
25
- ///
26
- /// - `F`: Async visitor callback function type
27
- ///
28
- /// # Parameters
29
- ///
30
- /// - `visitor`: Optional async visitor (wrapped in Rc<`RefCell`<>>)
31
- /// - `callback`: Async closure that invokes the appropriate async visitor method
32
- ///
33
- /// # Returns
34
- ///
35
- /// - `Ok(VisitorDispatch::Custom(String))`: Custom markdown output from `VisitResult::Custom`
36
- /// - `Ok(VisitorDispatch::Continue)`: Continue with default behavior (`VisitResult::Continue`)
37
- /// - `Err(ConversionError)`: Stop conversion with error (`VisitResult::Error`)
38
- ///
39
- /// # Errors
40
- ///
41
- /// - If the visitor returns `VisitResult::Error`, this is converted to `Error::Visitor`
42
- /// - `RefCell` borrow failures panic (should never happen with correct usage)
43
- ///
44
- /// # Performance
45
- ///
46
- /// - Zero-cost when visitor is None (common case)
47
- /// - Single dynamic dispatch when visitor is present
48
- /// - No allocations except for error messages
49
- ///
50
- /// # Examples
51
- ///
52
- /// ```ignore
53
- /// let result = dispatch_async_visitor(
54
- /// &visitor,
55
- /// |v| Box::pin(v.visit_heading(&ctx, level, text, id)),
56
- /// ).await?;
57
- ///
58
- /// match result {
59
- /// VisitorDispatch::Custom(output) => return Ok(output),
60
- /// VisitorDispatch::Continue => { /* proceed with default conversion */ }
61
- /// _ => {}
62
- /// }
63
- /// ```
64
- #[cfg(feature = "async-visitor")]
65
- #[allow(dead_code, clippy::future_not_send)]
66
- #[inline]
67
- pub async fn dispatch_async_visitor<F, Fut>(
68
- visitor: &Option<AsyncVisitorHandle>,
69
- callback: F,
70
- ) -> Result<VisitorDispatch>
71
- where
72
- F: FnOnce(&mut dyn AsyncHtmlVisitor) -> Fut,
73
- Fut: std::future::Future<Output = crate::visitor::VisitResult>,
74
- {
75
- let Some(visitor_mutex) = visitor else {
76
- return Ok(VisitorDispatch::Continue);
77
- };
78
-
79
- let future = {
80
- let mut visitor_ref = visitor_mutex.lock().await;
81
- callback(&mut *visitor_ref)
82
- };
83
-
84
- let result = future.await;
85
-
86
- match result {
87
- crate::visitor::VisitResult::Continue => Ok(VisitorDispatch::Continue),
88
- crate::visitor::VisitResult::Custom(output) => Ok(VisitorDispatch::Custom(output)),
89
- crate::visitor::VisitResult::Skip => Ok(VisitorDispatch::Skip),
90
- crate::visitor::VisitResult::PreserveHtml => Ok(VisitorDispatch::PreserveHtml),
91
- crate::visitor::VisitResult::Error(msg) => Err(ConversionError::Visitor(msg)),
92
- }
93
- }
94
-
95
- /// Macro to reduce boilerplate when calling async visitor methods.
96
- ///
97
- /// This macro wraps the common pattern of:
98
- /// 1. Check if visitor is present
99
- /// 2. Call async visitor method (awaiting the result)
100
- /// 3. Handle early return for Custom/Skip/PreserveHtml/Error
101
- /// 4. Continue with default behavior if visitor returns Continue
102
- ///
103
- /// # Syntax
104
- ///
105
- /// ```ignore
106
- /// try_async_visitor!(visitor_option, method_name, ctx, arg1, arg2, ...).await?;
107
- /// ```
108
- ///
109
- /// # Returns
110
- ///
111
- /// - Returns early with custom output if visitor returns Custom/Skip/PreserveHtml
112
- /// - Returns early with Err if visitor returns Error
113
- /// - Continues execution if visitor returns Continue or is None
114
- ///
115
- /// # Examples
116
- ///
117
- /// ```ignore
118
- /// // Before (verbose):
119
- /// let dispatch = dispatch_async_visitor(&visitor, |v| {
120
- /// Box::pin(v.visit_heading(&ctx, level, text, id))
121
- /// }).await?;
122
- /// match dispatch {
123
- /// VisitorDispatch::Custom(output) => return Ok(output),
124
- /// VisitorDispatch::Skip => return Ok(String::new()),
125
- /// VisitorDispatch::PreserveHtml => return Ok(preserve_html_output),
126
- /// VisitorDispatch::Continue => { /* proceed */ }
127
- /// }
128
- ///
129
- /// // After (concise):
130
- /// try_async_visitor!(visitor, visit_heading, &ctx, level, text, id).await?;
131
- /// // Default conversion logic continues here...
132
- /// ```
133
- #[cfg(feature = "async-visitor")]
134
- #[macro_export]
135
- macro_rules! try_async_visitor {
136
- ($visitor:expr, $method:ident, $ctx:expr $(, $arg:expr)*) => {{
137
- let dispatch = $crate::visitor_helpers::dispatch_async_visitor(
138
- $visitor,
139
- |v| Box::pin(v.$method($ctx $(, $arg)*)),
140
- ).await?;
141
-
142
- match dispatch {
143
- $crate::visitor_helpers::VisitorDispatch::Continue => {
144
- }
145
- $crate::visitor_helpers::VisitorDispatch::Custom(output) => {
146
- return Ok(output);
147
- }
148
- $crate::visitor_helpers::VisitorDispatch::Skip => {
149
- return Ok(String::new());
150
- }
151
- $crate::visitor_helpers::VisitorDispatch::PreserveHtml => {
152
- // TODO: Implement HTML preservation logic
153
- }
154
- }
155
- }};
156
- }
157
-
158
- /// Convenience macro for async `element_start` visitor calls with early return.
159
- ///
160
- /// This is the async version of `try_visitor_element_start!` macro.
161
- /// It handles the common pattern of calling `visit_element_start` at the beginning
162
- /// of element processing.
163
- ///
164
- /// # Syntax
165
- ///
166
- /// ```ignore
167
- /// try_async_visitor_element_start!(visitor_option, ctx).await?;
168
- /// ```
169
- ///
170
- /// # Examples
171
- ///
172
- /// ```ignore
173
- /// async fn process_heading(...) -> Result<String> {
174
- /// let ctx = build_node_context(...);
175
- /// try_async_visitor_element_start!(visitor, &ctx).await?;
176
- ///
177
- /// // Default heading processing continues here...
178
- /// }
179
- /// ```
180
- #[cfg(feature = "async-visitor")]
181
- #[macro_export]
182
- macro_rules! try_async_visitor_element_start {
183
- ($visitor:expr, $ctx:expr) => {{
184
- $crate::try_async_visitor!($visitor, visit_element_start, $ctx);
185
- }};
186
- }
187
-
188
- /// Convenience macro for async `element_end` visitor calls with output inspection.
189
- ///
190
- /// This is the async version of `try_visitor_element_end!` macro.
191
- /// It handles the common pattern of calling `visit_element_end` after generating
192
- /// default markdown output.
193
- ///
194
- /// # Syntax
195
- ///
196
- /// ```ignore
197
- /// try_async_visitor_element_end!(visitor_option, ctx, default_output_string).await?;
198
- /// ```
199
- ///
200
- /// # Examples
201
- ///
202
- /// ```ignore
203
- /// async fn process_heading(...) -> Result<String> {
204
- /// let ctx = build_node_context(...);
205
- /// let mut output = String::from("# Heading");
206
- ///
207
- /// try_async_visitor_element_end!(visitor, &ctx, &output).await?;
208
- /// Ok(output)
209
- /// }
210
- /// ```
211
- #[cfg(feature = "async-visitor")]
212
- #[macro_export]
213
- macro_rules! try_async_visitor_element_end {
214
- ($visitor:expr, $ctx:expr, $output:expr) => {{
215
- $crate::try_async_visitor!($visitor, visit_element_end, $ctx, $output);
216
- }};
217
- }
@@ -1,57 +0,0 @@
1
- //! Integration tests for async visitor functionality
2
- //! Tests that async visitors work correctly with `current_thread` runtime
3
-
4
- #![cfg(feature = "async-visitor")]
5
-
6
- use async_trait::async_trait;
7
- use html_to_markdown_rs::visitor::{AsyncHtmlVisitor, NodeContext, VisitResult};
8
- use std::cell::RefCell;
9
- use std::rc::Rc;
10
-
11
- #[derive(Debug)]
12
- struct CustomOutputVisitor;
13
-
14
- #[async_trait]
15
- impl AsyncHtmlVisitor for CustomOutputVisitor {
16
- async fn visit_heading(&mut self, _ctx: &NodeContext, level: u32, text: &str, _id: Option<&str>) -> VisitResult {
17
- // Return custom output for headings
18
- VisitResult::Custom(format!("[HEADING-{level}] {text}\n\n"))
19
- }
20
-
21
- async fn visit_text(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
22
- VisitResult::Continue
23
- }
24
- }
25
-
26
- // Manual test function for current_thread runtime
27
- // Note: We don't use #[tokio::test] here because it requires the macros feature
28
- // Instead, this test demonstrates that the code compiles and can be called
29
- #[test]
30
- fn test_async_visitor_signature_compatibility() {
31
- // This test verifies that the AsyncToSyncVisitorBridge properly compiles
32
- // and can wrap async visitors for use in sync contexts.
33
- // The actual functionality is tested in integration tests or manually.
34
-
35
- // Create a visitor
36
- let visitor = CustomOutputVisitor;
37
- let _visitor_handle: Rc<RefCell<dyn AsyncHtmlVisitor>> = Rc::new(RefCell::new(visitor));
38
-
39
- // This test just verifies compilation
40
- }
41
-
42
- #[derive(Debug)]
43
- struct SkipImagesVisitor;
44
-
45
- #[async_trait]
46
- impl AsyncHtmlVisitor for SkipImagesVisitor {
47
- async fn visit_image(&mut self, _ctx: &NodeContext, _src: &str, _alt: &str, _title: Option<&str>) -> VisitResult {
48
- VisitResult::Skip
49
- }
50
- }
51
-
52
- #[test]
53
- fn test_skip_images_visitor_compiles() {
54
- // Verify the SkipImagesVisitor compiles correctly
55
- let visitor = SkipImagesVisitor;
56
- let _visitor_handle: Rc<RefCell<dyn AsyncHtmlVisitor>> = Rc::new(RefCell::new(visitor));
57
- }