html-to-markdown 2.30.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -19
- data/README.md +37 -50
- data/ext/html-to-markdown-rb/native/Cargo.lock +13 -701
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
- data/ext/html-to-markdown-rb/native/README.md +4 -13
- data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
- data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
- data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
- data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
- data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown.rb +13 -194
- data/sig/html_to_markdown.rbs +12 -373
- data/vendor/Cargo.toml +6 -3
- data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
- data/vendor/html-to-markdown-rs/README.md +126 -52
- data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
- data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
- data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
- data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
- data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
- data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
- data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
- data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
- data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
- data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
- data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
- data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
- data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
- data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
- data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
- data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
- data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
- data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
- data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
- data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
- data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
- data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
- data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
- data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -68
- data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
- data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
- data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
- data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
- data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
- data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
- data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
- data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
- data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
- data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
- data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
- data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
- data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
- data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
- data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
- data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
- data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
- data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
- data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
- data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
- data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
- data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
- data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -323
- data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
- data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
- data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
- data/vendor/html-to-markdown-rs/src/text.rs +25 -14
- data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
- data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
- data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
- data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
- data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
- data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
- data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
- data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
- data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
- data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
- data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
- data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
- data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
- data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
- data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
- data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
- data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
- data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
- data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
- data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
- data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
- metadata +9 -37
- data/bin/benchmark.rb +0 -232
- data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
- data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
- data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
- data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
- data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
- data/spec/convert_spec.rb +0 -77
- data/spec/convert_with_tables_spec.rb +0 -194
- data/spec/metadata_extraction_spec.rb +0 -437
- data/spec/visitor_issue_187_spec.rb +0 -605
- data/spec/visitor_spec.rb +0 -1149
- data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
- data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
- data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
- data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
- data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
- data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
- data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
- data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
- data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
- data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -42
- data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
- data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
- data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
- data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
- data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
- data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
- data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
- data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
- data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
- data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
- data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
|
@@ -1,249 +0,0 @@
|
|
|
1
|
-
//! Asynchronous visitor trait for HTML to Markdown conversion.
|
|
2
|
-
//!
|
|
3
|
-
//! This module contains the `AsyncHtmlVisitor` trait for async/await based visitation.
|
|
4
|
-
|
|
5
|
-
#[cfg(feature = "async-visitor")]
|
|
6
|
-
use async_trait::async_trait;
|
|
7
|
-
|
|
8
|
-
use super::types::{NodeContext, VisitResult};
|
|
9
|
-
|
|
10
|
-
/// Async visitor trait for HTML→Markdown conversion.
|
|
11
|
-
///
|
|
12
|
-
/// This trait is identical to `HtmlVisitor` but all methods are async. Use this for languages
|
|
13
|
-
/// with native async/await support:
|
|
14
|
-
/// - Python (with `async def` and `asyncio`)
|
|
15
|
-
/// - TypeScript/JavaScript (with `Promise`-based callbacks)
|
|
16
|
-
/// - Elixir (with message-passing processes)
|
|
17
|
-
///
|
|
18
|
-
/// For synchronous languages (Ruby, PHP, Go, Java, C#), use the sync `HtmlVisitor` trait.
|
|
19
|
-
///
|
|
20
|
-
/// # Example (Python-like)
|
|
21
|
-
///
|
|
22
|
-
/// ```ignore
|
|
23
|
-
/// use html_to_markdown_rs::visitor::{AsyncHtmlVisitor, NodeContext, VisitResult};
|
|
24
|
-
///
|
|
25
|
-
/// struct CustomAsyncVisitor;
|
|
26
|
-
///
|
|
27
|
-
/// #[async_trait::async_trait]
|
|
28
|
-
/// impl AsyncHtmlVisitor for CustomAsyncVisitor {
|
|
29
|
-
/// async fn visit_link(
|
|
30
|
-
/// &mut self,
|
|
31
|
-
/// ctx: &NodeContext,
|
|
32
|
-
/// href: &str,
|
|
33
|
-
/// text: &str,
|
|
34
|
-
/// title: Option<&str>,
|
|
35
|
-
/// ) -> VisitResult {
|
|
36
|
-
/// // Can await async operations here
|
|
37
|
-
/// VisitResult::Custom(format!("{} ({})", text, href))
|
|
38
|
-
/// }
|
|
39
|
-
/// }
|
|
40
|
-
/// ```
|
|
41
|
-
#[cfg(feature = "async-visitor")]
|
|
42
|
-
#[async_trait]
|
|
43
|
-
pub trait AsyncHtmlVisitor: std::fmt::Debug + Send + Sync {
|
|
44
|
-
/// Called before entering any element (async version).
|
|
45
|
-
async fn visit_element_start(&mut self, _ctx: &NodeContext) -> VisitResult {
|
|
46
|
-
VisitResult::Continue
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
/// Called after exiting any element (async version).
|
|
50
|
-
async fn visit_element_end(&mut self, _ctx: &NodeContext, _output: &str) -> VisitResult {
|
|
51
|
-
VisitResult::Continue
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
/// Visit text nodes (async version - most frequent callback - ~100+ per document).
|
|
55
|
-
async fn visit_text(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
56
|
-
VisitResult::Continue
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/// Visit anchor links `<a href="...">` (async version).
|
|
60
|
-
async fn visit_link(&mut self, _ctx: &NodeContext, _href: &str, _text: &str, _title: Option<&str>) -> VisitResult {
|
|
61
|
-
VisitResult::Continue
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
/// Visit images `<img src="...">` (async version).
|
|
65
|
-
async fn visit_image(&mut self, _ctx: &NodeContext, _src: &str, _alt: &str, _title: Option<&str>) -> VisitResult {
|
|
66
|
-
VisitResult::Continue
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/// Visit heading elements `<h1>` through `<h6>` (async version).
|
|
70
|
-
async fn visit_heading(&mut self, _ctx: &NodeContext, _level: u32, _text: &str, _id: Option<&str>) -> VisitResult {
|
|
71
|
-
VisitResult::Continue
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/// Visit code blocks `<pre><code>` (async version).
|
|
75
|
-
async fn visit_code_block(&mut self, _ctx: &NodeContext, _lang: Option<&str>, _code: &str) -> VisitResult {
|
|
76
|
-
VisitResult::Continue
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/// Visit inline code `<code>` (async version).
|
|
80
|
-
async fn visit_code_inline(&mut self, _ctx: &NodeContext, _code: &str) -> VisitResult {
|
|
81
|
-
VisitResult::Continue
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
/// Visit list items `<li>` (async version).
|
|
85
|
-
async fn visit_list_item(&mut self, _ctx: &NodeContext, _ordered: bool, _marker: &str, _text: &str) -> VisitResult {
|
|
86
|
-
VisitResult::Continue
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/// Called before processing a list `<ul>` or `<ol>` (async version).
|
|
90
|
-
async fn visit_list_start(&mut self, _ctx: &NodeContext, _ordered: bool) -> VisitResult {
|
|
91
|
-
VisitResult::Continue
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/// Called after processing a list `</ul>` or `</ol>` (async version).
|
|
95
|
-
async fn visit_list_end(&mut self, _ctx: &NodeContext, _ordered: bool, _output: &str) -> VisitResult {
|
|
96
|
-
VisitResult::Continue
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
/// Called before processing a table `<table>` (async version).
|
|
100
|
-
async fn visit_table_start(&mut self, _ctx: &NodeContext) -> VisitResult {
|
|
101
|
-
VisitResult::Continue
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/// Visit table rows `<tr>` (async version).
|
|
105
|
-
async fn visit_table_row(&mut self, _ctx: &NodeContext, _cells: &[String], _is_header: bool) -> VisitResult {
|
|
106
|
-
VisitResult::Continue
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
/// Called after processing a table `</table>` (async version).
|
|
110
|
-
async fn visit_table_end(&mut self, _ctx: &NodeContext, _output: &str) -> VisitResult {
|
|
111
|
-
VisitResult::Continue
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
/// Visit blockquote elements `<blockquote>` (async version).
|
|
115
|
-
async fn visit_blockquote(&mut self, _ctx: &NodeContext, _content: &str, _depth: usize) -> VisitResult {
|
|
116
|
-
VisitResult::Continue
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
/// Visit strong/bold elements `<strong>`, `<b>` (async version).
|
|
120
|
-
async fn visit_strong(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
121
|
-
VisitResult::Continue
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
/// Visit emphasis/italic elements `<em>`, `<i>` (async version).
|
|
125
|
-
async fn visit_emphasis(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
126
|
-
VisitResult::Continue
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/// Visit strikethrough elements `<s>`, `<del>`, `<strike>` (async version).
|
|
130
|
-
async fn visit_strikethrough(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
131
|
-
VisitResult::Continue
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
/// Visit underline elements `<u>`, `<ins>` (async version).
|
|
135
|
-
async fn visit_underline(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
136
|
-
VisitResult::Continue
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
/// Visit subscript elements `<sub>` (async version).
|
|
140
|
-
async fn visit_subscript(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
141
|
-
VisitResult::Continue
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
/// Visit superscript elements `<sup>` (async version).
|
|
145
|
-
async fn visit_superscript(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
146
|
-
VisitResult::Continue
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
/// Visit mark/highlight elements `<mark>` (async version).
|
|
150
|
-
async fn visit_mark(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
151
|
-
VisitResult::Continue
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
/// Visit line break elements `<br>` (async version).
|
|
155
|
-
async fn visit_line_break(&mut self, _ctx: &NodeContext) -> VisitResult {
|
|
156
|
-
VisitResult::Continue
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
/// Visit horizontal rule elements `<hr>` (async version).
|
|
160
|
-
async fn visit_horizontal_rule(&mut self, _ctx: &NodeContext) -> VisitResult {
|
|
161
|
-
VisitResult::Continue
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
/// Visit custom elements (web components) or unknown tags (async version).
|
|
165
|
-
async fn visit_custom_element(&mut self, _ctx: &NodeContext, _tag_name: &str, _html: &str) -> VisitResult {
|
|
166
|
-
VisitResult::Continue
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
/// Visit definition list `<dl>` (async version).
|
|
170
|
-
async fn visit_definition_list_start(&mut self, _ctx: &NodeContext) -> VisitResult {
|
|
171
|
-
VisitResult::Continue
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
/// Visit definition term `<dt>` (async version).
|
|
175
|
-
async fn visit_definition_term(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
176
|
-
VisitResult::Continue
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
/// Visit definition description `<dd>` (async version).
|
|
180
|
-
async fn visit_definition_description(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
181
|
-
VisitResult::Continue
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
/// Called after processing a definition list `</dl>` (async version).
|
|
185
|
-
async fn visit_definition_list_end(&mut self, _ctx: &NodeContext, _output: &str) -> VisitResult {
|
|
186
|
-
VisitResult::Continue
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
/// Visit form elements `<form>` (async version).
|
|
190
|
-
async fn visit_form(&mut self, _ctx: &NodeContext, _action: Option<&str>, _method: Option<&str>) -> VisitResult {
|
|
191
|
-
VisitResult::Continue
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
/// Visit input elements `<input>` (async version).
|
|
195
|
-
async fn visit_input(
|
|
196
|
-
&mut self,
|
|
197
|
-
_ctx: &NodeContext,
|
|
198
|
-
_input_type: &str,
|
|
199
|
-
_name: Option<&str>,
|
|
200
|
-
_value: Option<&str>,
|
|
201
|
-
) -> VisitResult {
|
|
202
|
-
VisitResult::Continue
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
/// Visit button elements `<button>` (async version).
|
|
206
|
-
async fn visit_button(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
207
|
-
VisitResult::Continue
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
/// Visit audio elements `<audio>` (async version).
|
|
211
|
-
async fn visit_audio(&mut self, _ctx: &NodeContext, _src: Option<&str>) -> VisitResult {
|
|
212
|
-
VisitResult::Continue
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
/// Visit video elements `<video>` (async version).
|
|
216
|
-
async fn visit_video(&mut self, _ctx: &NodeContext, _src: Option<&str>) -> VisitResult {
|
|
217
|
-
VisitResult::Continue
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
/// Visit iframe elements `<iframe>` (async version).
|
|
221
|
-
async fn visit_iframe(&mut self, _ctx: &NodeContext, _src: Option<&str>) -> VisitResult {
|
|
222
|
-
VisitResult::Continue
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
/// Visit details elements `<details>` (async version).
|
|
226
|
-
async fn visit_details(&mut self, _ctx: &NodeContext, _open: bool) -> VisitResult {
|
|
227
|
-
VisitResult::Continue
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
/// Visit summary elements `<summary>` (async version).
|
|
231
|
-
async fn visit_summary(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
232
|
-
VisitResult::Continue
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
/// Visit figure elements `<figure>` (async version).
|
|
236
|
-
async fn visit_figure_start(&mut self, _ctx: &NodeContext) -> VisitResult {
|
|
237
|
-
VisitResult::Continue
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
/// Visit figcaption elements `<figcaption>` (async version).
|
|
241
|
-
async fn visit_figcaption(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
242
|
-
VisitResult::Continue
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
/// Called after processing a figure `</figure>` (async version).
|
|
246
|
-
async fn visit_figure_end(&mut self, _ctx: &NodeContext, _output: &str) -> VisitResult {
|
|
247
|
-
VisitResult::Continue
|
|
248
|
-
}
|
|
249
|
-
}
|
|
@@ -1,189 +0,0 @@
|
|
|
1
|
-
//! Async-to-sync visitor bridge for integrating async visitors with synchronous converters.
|
|
2
|
-
//!
|
|
3
|
-
//! This module provides the `AsyncToSyncVisitorBridge` struct that wraps an async visitor
|
|
4
|
-
//! and implements the sync `HtmlVisitor` trait using channel-based communication.
|
|
5
|
-
|
|
6
|
-
#[cfg(feature = "async-visitor")]
|
|
7
|
-
use super::AsyncVisitorHandle;
|
|
8
|
-
|
|
9
|
-
/// Request types for visitor method calls over the channel.
|
|
10
|
-
#[cfg(feature = "async-visitor")]
|
|
11
|
-
pub(super) enum VisitorRequest {
|
|
12
|
-
ElementStart(crate::visitor::NodeContext),
|
|
13
|
-
ElementEnd(crate::visitor::NodeContext, String),
|
|
14
|
-
Text(crate::visitor::NodeContext, String),
|
|
15
|
-
Link(crate::visitor::NodeContext, String, String, Option<String>),
|
|
16
|
-
Image(crate::visitor::NodeContext, String, String, Option<String>),
|
|
17
|
-
Heading(crate::visitor::NodeContext, u32, String, Option<String>),
|
|
18
|
-
CodeBlock(crate::visitor::NodeContext, Option<String>, String),
|
|
19
|
-
CodeInline(crate::visitor::NodeContext, String),
|
|
20
|
-
ListItem(crate::visitor::NodeContext, bool, String, String),
|
|
21
|
-
ListStart(crate::visitor::NodeContext, bool),
|
|
22
|
-
ListEnd(crate::visitor::NodeContext, bool, String),
|
|
23
|
-
TableStart(crate::visitor::NodeContext),
|
|
24
|
-
TableRow(crate::visitor::NodeContext, Vec<String>, bool),
|
|
25
|
-
TableEnd(crate::visitor::NodeContext, String),
|
|
26
|
-
Blockquote(crate::visitor::NodeContext, String, usize),
|
|
27
|
-
Strong(crate::visitor::NodeContext, String),
|
|
28
|
-
Emphasis(crate::visitor::NodeContext, String),
|
|
29
|
-
Strikethrough(crate::visitor::NodeContext, String),
|
|
30
|
-
Underline(crate::visitor::NodeContext, String),
|
|
31
|
-
Subscript(crate::visitor::NodeContext, String),
|
|
32
|
-
Superscript(crate::visitor::NodeContext, String),
|
|
33
|
-
Mark(crate::visitor::NodeContext, String),
|
|
34
|
-
LineBreak(crate::visitor::NodeContext),
|
|
35
|
-
HorizontalRule(crate::visitor::NodeContext),
|
|
36
|
-
CustomElement(crate::visitor::NodeContext, String, String),
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
/// Bridge that wraps an async visitor and implements the sync `HtmlVisitor` trait.
|
|
40
|
-
///
|
|
41
|
-
/// This bridge uses a channel-based approach to avoid blocking:
|
|
42
|
-
/// 1. Sync converter sends visitor call request through channel
|
|
43
|
-
/// 2. Async runtime receives request and awaits JS callback
|
|
44
|
-
/// 3. Result sent back through response channel
|
|
45
|
-
/// 4. Sync converter receives result and continues
|
|
46
|
-
///
|
|
47
|
-
/// This approach avoids deadlock by never blocking on async operations.
|
|
48
|
-
#[cfg(feature = "async-visitor")]
|
|
49
|
-
pub struct AsyncToSyncVisitorBridge {
|
|
50
|
-
#[allow(dead_code)]
|
|
51
|
-
pub(super) async_visitor: AsyncVisitorHandle,
|
|
52
|
-
pub(super) request_tx: tokio::sync::mpsc::UnboundedSender<VisitorRequest>,
|
|
53
|
-
pub(super) response_rx: std::sync::mpsc::Receiver<crate::visitor::VisitResult>,
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
#[cfg(feature = "async-visitor")]
|
|
57
|
-
impl std::fmt::Debug for AsyncToSyncVisitorBridge {
|
|
58
|
-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
59
|
-
f.debug_struct("AsyncToSyncVisitorBridge")
|
|
60
|
-
.field("async_visitor", &self.async_visitor)
|
|
61
|
-
.finish_non_exhaustive()
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
#[cfg(feature = "async-visitor")]
|
|
66
|
-
impl AsyncToSyncVisitorBridge {
|
|
67
|
-
/// Create a new async-to-sync visitor bridge with channel-based communication.
|
|
68
|
-
pub fn new(async_visitor: AsyncVisitorHandle) -> Self {
|
|
69
|
-
// Use tokio::sync::mpsc for async channels (not std::sync::mpsc which blocks)
|
|
70
|
-
let (request_tx, mut request_rx) = tokio::sync::mpsc::unbounded_channel();
|
|
71
|
-
let (response_tx, response_rx) = std::sync::mpsc::channel();
|
|
72
|
-
|
|
73
|
-
// Spawn async task to handle visitor requests
|
|
74
|
-
let visitor_clone = async_visitor.clone();
|
|
75
|
-
tokio::spawn(async move {
|
|
76
|
-
while let Some(request) = request_rx.recv().await {
|
|
77
|
-
let result = match request {
|
|
78
|
-
VisitorRequest::ElementStart(ctx) => {
|
|
79
|
-
let mut visitor = visitor_clone.lock().await;
|
|
80
|
-
visitor.visit_element_start(&ctx).await
|
|
81
|
-
}
|
|
82
|
-
VisitorRequest::ElementEnd(ctx, output) => {
|
|
83
|
-
let mut visitor = visitor_clone.lock().await;
|
|
84
|
-
visitor.visit_element_end(&ctx, &output).await
|
|
85
|
-
}
|
|
86
|
-
VisitorRequest::Text(ctx, text) => {
|
|
87
|
-
let mut visitor = visitor_clone.lock().await;
|
|
88
|
-
visitor.visit_text(&ctx, &text).await
|
|
89
|
-
}
|
|
90
|
-
VisitorRequest::Link(ctx, href, text, title) => {
|
|
91
|
-
let mut visitor = visitor_clone.lock().await;
|
|
92
|
-
visitor.visit_link(&ctx, &href, &text, title.as_deref()).await
|
|
93
|
-
}
|
|
94
|
-
VisitorRequest::Image(ctx, src, alt, title) => {
|
|
95
|
-
let mut visitor = visitor_clone.lock().await;
|
|
96
|
-
visitor.visit_image(&ctx, &src, &alt, title.as_deref()).await
|
|
97
|
-
}
|
|
98
|
-
VisitorRequest::Heading(ctx, level, text, id) => {
|
|
99
|
-
let mut visitor = visitor_clone.lock().await;
|
|
100
|
-
visitor.visit_heading(&ctx, level, &text, id.as_deref()).await
|
|
101
|
-
}
|
|
102
|
-
VisitorRequest::CodeBlock(ctx, lang, code) => {
|
|
103
|
-
let mut visitor = visitor_clone.lock().await;
|
|
104
|
-
visitor.visit_code_block(&ctx, lang.as_deref(), &code).await
|
|
105
|
-
}
|
|
106
|
-
VisitorRequest::CodeInline(ctx, code) => {
|
|
107
|
-
let mut visitor = visitor_clone.lock().await;
|
|
108
|
-
visitor.visit_code_inline(&ctx, &code).await
|
|
109
|
-
}
|
|
110
|
-
VisitorRequest::ListItem(ctx, ordered, marker, text) => {
|
|
111
|
-
let mut visitor = visitor_clone.lock().await;
|
|
112
|
-
visitor.visit_list_item(&ctx, ordered, &marker, &text).await
|
|
113
|
-
}
|
|
114
|
-
VisitorRequest::ListStart(ctx, ordered) => {
|
|
115
|
-
let mut visitor = visitor_clone.lock().await;
|
|
116
|
-
visitor.visit_list_start(&ctx, ordered).await
|
|
117
|
-
}
|
|
118
|
-
VisitorRequest::ListEnd(ctx, ordered, output) => {
|
|
119
|
-
let mut visitor = visitor_clone.lock().await;
|
|
120
|
-
visitor.visit_list_end(&ctx, ordered, &output).await
|
|
121
|
-
}
|
|
122
|
-
VisitorRequest::TableStart(ctx) => {
|
|
123
|
-
let mut visitor = visitor_clone.lock().await;
|
|
124
|
-
visitor.visit_table_start(&ctx).await
|
|
125
|
-
}
|
|
126
|
-
VisitorRequest::TableRow(ctx, cells, is_header) => {
|
|
127
|
-
let mut visitor = visitor_clone.lock().await;
|
|
128
|
-
visitor.visit_table_row(&ctx, &cells, is_header).await
|
|
129
|
-
}
|
|
130
|
-
VisitorRequest::TableEnd(ctx, output) => {
|
|
131
|
-
let mut visitor = visitor_clone.lock().await;
|
|
132
|
-
visitor.visit_table_end(&ctx, &output).await
|
|
133
|
-
}
|
|
134
|
-
VisitorRequest::Blockquote(ctx, content, depth) => {
|
|
135
|
-
let mut visitor = visitor_clone.lock().await;
|
|
136
|
-
visitor.visit_blockquote(&ctx, &content, depth).await
|
|
137
|
-
}
|
|
138
|
-
VisitorRequest::Strong(ctx, text) => {
|
|
139
|
-
let mut visitor = visitor_clone.lock().await;
|
|
140
|
-
visitor.visit_strong(&ctx, &text).await
|
|
141
|
-
}
|
|
142
|
-
VisitorRequest::Emphasis(ctx, text) => {
|
|
143
|
-
let mut visitor = visitor_clone.lock().await;
|
|
144
|
-
visitor.visit_emphasis(&ctx, &text).await
|
|
145
|
-
}
|
|
146
|
-
VisitorRequest::Strikethrough(ctx, text) => {
|
|
147
|
-
let mut visitor = visitor_clone.lock().await;
|
|
148
|
-
visitor.visit_strikethrough(&ctx, &text).await
|
|
149
|
-
}
|
|
150
|
-
VisitorRequest::Underline(ctx, text) => {
|
|
151
|
-
let mut visitor = visitor_clone.lock().await;
|
|
152
|
-
visitor.visit_underline(&ctx, &text).await
|
|
153
|
-
}
|
|
154
|
-
VisitorRequest::Subscript(ctx, text) => {
|
|
155
|
-
let mut visitor = visitor_clone.lock().await;
|
|
156
|
-
visitor.visit_subscript(&ctx, &text).await
|
|
157
|
-
}
|
|
158
|
-
VisitorRequest::Superscript(ctx, text) => {
|
|
159
|
-
let mut visitor = visitor_clone.lock().await;
|
|
160
|
-
visitor.visit_superscript(&ctx, &text).await
|
|
161
|
-
}
|
|
162
|
-
VisitorRequest::Mark(ctx, text) => {
|
|
163
|
-
let mut visitor = visitor_clone.lock().await;
|
|
164
|
-
visitor.visit_mark(&ctx, &text).await
|
|
165
|
-
}
|
|
166
|
-
VisitorRequest::LineBreak(ctx) => {
|
|
167
|
-
let mut visitor = visitor_clone.lock().await;
|
|
168
|
-
visitor.visit_line_break(&ctx).await
|
|
169
|
-
}
|
|
170
|
-
VisitorRequest::HorizontalRule(ctx) => {
|
|
171
|
-
let mut visitor = visitor_clone.lock().await;
|
|
172
|
-
visitor.visit_horizontal_rule(&ctx).await
|
|
173
|
-
}
|
|
174
|
-
VisitorRequest::CustomElement(ctx, tag_name, html) => {
|
|
175
|
-
let mut visitor = visitor_clone.lock().await;
|
|
176
|
-
visitor.visit_custom_element(&ctx, &tag_name, &html).await
|
|
177
|
-
}
|
|
178
|
-
};
|
|
179
|
-
let _ = response_tx.send(result);
|
|
180
|
-
}
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
Self {
|
|
184
|
-
async_visitor,
|
|
185
|
-
request_tx,
|
|
186
|
-
response_rx,
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
}
|