html-to-markdown 2.29.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +18 -41
- data/README.md +37 -50
- data/ext/html-to-markdown-rb/native/Cargo.lock +17 -705
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
- data/ext/html-to-markdown-rb/native/README.md +4 -13
- data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
- data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
- data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
- data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
- data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown.rb +13 -194
- data/sig/html_to_markdown.rbs +12 -373
- data/vendor/Cargo.toml +7 -4
- data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
- data/vendor/html-to-markdown-rs/README.md +127 -51
- data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
- data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
- data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
- data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
- data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
- data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
- data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
- data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
- data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
- data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
- data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
- data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
- data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
- data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
- data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
- data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
- data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
- data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
- data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
- data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
- data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
- data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
- data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
- data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -67
- data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
- data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
- data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
- data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
- data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
- data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
- data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
- data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
- data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
- data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
- data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
- data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
- data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
- data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
- data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
- data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
- data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
- data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
- data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
- data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
- data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
- data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
- data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -319
- data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
- data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
- data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
- data/vendor/html-to-markdown-rs/src/text.rs +25 -14
- data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
- data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
- data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
- data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
- data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
- data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
- data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
- data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
- data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
- data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
- data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
- data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
- data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
- data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
- data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
- data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
- data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
- data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
- data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
- data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
- data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
- metadata +9 -37
- data/bin/benchmark.rb +0 -232
- data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
- data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
- data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
- data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
- data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
- data/spec/convert_spec.rb +0 -77
- data/spec/convert_with_tables_spec.rb +0 -194
- data/spec/metadata_extraction_spec.rb +0 -437
- data/spec/visitor_issue_187_spec.rb +0 -605
- data/spec/visitor_spec.rb +0 -1149
- data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
- data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
- data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
- data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
- data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
- data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
- data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
- data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
- data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
- data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -31
- data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
- data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
- data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
- data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
- data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
- data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
- data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
- data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
- data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
- data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
- data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
|
@@ -1,343 +0,0 @@
|
|
|
1
|
-
//! HtmlVisitor trait implementation for AsyncToSyncVisitorBridge.
|
|
2
|
-
//!
|
|
3
|
-
//! This module implements the sync `HtmlVisitor` trait for the async-to-sync bridge,
|
|
4
|
-
//! translating each method call into a channel request/response pair.
|
|
5
|
-
|
|
6
|
-
#[cfg(feature = "async-visitor")]
|
|
7
|
-
use super::bridge::{AsyncToSyncVisitorBridge, VisitorRequest};
|
|
8
|
-
|
|
9
|
-
#[cfg(feature = "async-visitor")]
|
|
10
|
-
impl crate::visitor::HtmlVisitor for AsyncToSyncVisitorBridge {
|
|
11
|
-
fn visit_element_start(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
|
|
12
|
-
if self.request_tx.send(VisitorRequest::ElementStart(ctx.clone())).is_err() {
|
|
13
|
-
return crate::visitor::VisitResult::Continue;
|
|
14
|
-
}
|
|
15
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
fn visit_element_end(&mut self, ctx: &crate::visitor::NodeContext, output: &str) -> crate::visitor::VisitResult {
|
|
19
|
-
if self
|
|
20
|
-
.request_tx
|
|
21
|
-
.send(VisitorRequest::ElementEnd(ctx.clone(), output.to_string()))
|
|
22
|
-
.is_err()
|
|
23
|
-
{
|
|
24
|
-
return crate::visitor::VisitResult::Continue;
|
|
25
|
-
}
|
|
26
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
fn visit_text(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
|
|
30
|
-
if self
|
|
31
|
-
.request_tx
|
|
32
|
-
.send(VisitorRequest::Text(ctx.clone(), text.to_string()))
|
|
33
|
-
.is_err()
|
|
34
|
-
{
|
|
35
|
-
return crate::visitor::VisitResult::Continue;
|
|
36
|
-
}
|
|
37
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
fn visit_link(
|
|
41
|
-
&mut self,
|
|
42
|
-
ctx: &crate::visitor::NodeContext,
|
|
43
|
-
href: &str,
|
|
44
|
-
text: &str,
|
|
45
|
-
title: Option<&str>,
|
|
46
|
-
) -> crate::visitor::VisitResult {
|
|
47
|
-
if self
|
|
48
|
-
.request_tx
|
|
49
|
-
.send(VisitorRequest::Link(
|
|
50
|
-
ctx.clone(),
|
|
51
|
-
href.to_string(),
|
|
52
|
-
text.to_string(),
|
|
53
|
-
title.map(std::string::ToString::to_string),
|
|
54
|
-
))
|
|
55
|
-
.is_err()
|
|
56
|
-
{
|
|
57
|
-
return crate::visitor::VisitResult::Continue;
|
|
58
|
-
}
|
|
59
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
fn visit_image(
|
|
63
|
-
&mut self,
|
|
64
|
-
ctx: &crate::visitor::NodeContext,
|
|
65
|
-
src: &str,
|
|
66
|
-
alt: &str,
|
|
67
|
-
title: Option<&str>,
|
|
68
|
-
) -> crate::visitor::VisitResult {
|
|
69
|
-
if self
|
|
70
|
-
.request_tx
|
|
71
|
-
.send(VisitorRequest::Image(
|
|
72
|
-
ctx.clone(),
|
|
73
|
-
src.to_string(),
|
|
74
|
-
alt.to_string(),
|
|
75
|
-
title.map(std::string::ToString::to_string),
|
|
76
|
-
))
|
|
77
|
-
.is_err()
|
|
78
|
-
{
|
|
79
|
-
return crate::visitor::VisitResult::Continue;
|
|
80
|
-
}
|
|
81
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
fn visit_heading(
|
|
85
|
-
&mut self,
|
|
86
|
-
ctx: &crate::visitor::NodeContext,
|
|
87
|
-
level: u32,
|
|
88
|
-
text: &str,
|
|
89
|
-
id: Option<&str>,
|
|
90
|
-
) -> crate::visitor::VisitResult {
|
|
91
|
-
if self
|
|
92
|
-
.request_tx
|
|
93
|
-
.send(VisitorRequest::Heading(
|
|
94
|
-
ctx.clone(),
|
|
95
|
-
level,
|
|
96
|
-
text.to_string(),
|
|
97
|
-
id.map(std::string::ToString::to_string),
|
|
98
|
-
))
|
|
99
|
-
.is_err()
|
|
100
|
-
{
|
|
101
|
-
return crate::visitor::VisitResult::Continue;
|
|
102
|
-
}
|
|
103
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
fn visit_code_block(
|
|
107
|
-
&mut self,
|
|
108
|
-
ctx: &crate::visitor::NodeContext,
|
|
109
|
-
language: Option<&str>,
|
|
110
|
-
code: &str,
|
|
111
|
-
) -> crate::visitor::VisitResult {
|
|
112
|
-
if self
|
|
113
|
-
.request_tx
|
|
114
|
-
.send(VisitorRequest::CodeBlock(
|
|
115
|
-
ctx.clone(),
|
|
116
|
-
language.map(std::string::ToString::to_string),
|
|
117
|
-
code.to_string(),
|
|
118
|
-
))
|
|
119
|
-
.is_err()
|
|
120
|
-
{
|
|
121
|
-
return crate::visitor::VisitResult::Continue;
|
|
122
|
-
}
|
|
123
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
fn visit_code_inline(&mut self, ctx: &crate::visitor::NodeContext, code: &str) -> crate::visitor::VisitResult {
|
|
127
|
-
if self
|
|
128
|
-
.request_tx
|
|
129
|
-
.send(VisitorRequest::CodeInline(ctx.clone(), code.to_string()))
|
|
130
|
-
.is_err()
|
|
131
|
-
{
|
|
132
|
-
return crate::visitor::VisitResult::Continue;
|
|
133
|
-
}
|
|
134
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
fn visit_list_item(
|
|
138
|
-
&mut self,
|
|
139
|
-
ctx: &crate::visitor::NodeContext,
|
|
140
|
-
ordered: bool,
|
|
141
|
-
marker: &str,
|
|
142
|
-
text: &str,
|
|
143
|
-
) -> crate::visitor::VisitResult {
|
|
144
|
-
if self
|
|
145
|
-
.request_tx
|
|
146
|
-
.send(VisitorRequest::ListItem(
|
|
147
|
-
ctx.clone(),
|
|
148
|
-
ordered,
|
|
149
|
-
marker.to_string(),
|
|
150
|
-
text.to_string(),
|
|
151
|
-
))
|
|
152
|
-
.is_err()
|
|
153
|
-
{
|
|
154
|
-
return crate::visitor::VisitResult::Continue;
|
|
155
|
-
}
|
|
156
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
fn visit_list_start(&mut self, ctx: &crate::visitor::NodeContext, ordered: bool) -> crate::visitor::VisitResult {
|
|
160
|
-
if self
|
|
161
|
-
.request_tx
|
|
162
|
-
.send(VisitorRequest::ListStart(ctx.clone(), ordered))
|
|
163
|
-
.is_err()
|
|
164
|
-
{
|
|
165
|
-
return crate::visitor::VisitResult::Continue;
|
|
166
|
-
}
|
|
167
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
fn visit_list_end(
|
|
171
|
-
&mut self,
|
|
172
|
-
ctx: &crate::visitor::NodeContext,
|
|
173
|
-
ordered: bool,
|
|
174
|
-
output: &str,
|
|
175
|
-
) -> crate::visitor::VisitResult {
|
|
176
|
-
if self
|
|
177
|
-
.request_tx
|
|
178
|
-
.send(VisitorRequest::ListEnd(ctx.clone(), ordered, output.to_string()))
|
|
179
|
-
.is_err()
|
|
180
|
-
{
|
|
181
|
-
return crate::visitor::VisitResult::Continue;
|
|
182
|
-
}
|
|
183
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
fn visit_table_start(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
|
|
187
|
-
if self.request_tx.send(VisitorRequest::TableStart(ctx.clone())).is_err() {
|
|
188
|
-
return crate::visitor::VisitResult::Continue;
|
|
189
|
-
}
|
|
190
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
fn visit_table_row(
|
|
194
|
-
&mut self,
|
|
195
|
-
ctx: &crate::visitor::NodeContext,
|
|
196
|
-
cells: &[String],
|
|
197
|
-
is_header: bool,
|
|
198
|
-
) -> crate::visitor::VisitResult {
|
|
199
|
-
if self
|
|
200
|
-
.request_tx
|
|
201
|
-
.send(VisitorRequest::TableRow(ctx.clone(), cells.to_vec(), is_header))
|
|
202
|
-
.is_err()
|
|
203
|
-
{
|
|
204
|
-
return crate::visitor::VisitResult::Continue;
|
|
205
|
-
}
|
|
206
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
fn visit_table_end(&mut self, ctx: &crate::visitor::NodeContext, output: &str) -> crate::visitor::VisitResult {
|
|
210
|
-
if self
|
|
211
|
-
.request_tx
|
|
212
|
-
.send(VisitorRequest::TableEnd(ctx.clone(), output.to_string()))
|
|
213
|
-
.is_err()
|
|
214
|
-
{
|
|
215
|
-
return crate::visitor::VisitResult::Continue;
|
|
216
|
-
}
|
|
217
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
fn visit_blockquote(
|
|
221
|
-
&mut self,
|
|
222
|
-
ctx: &crate::visitor::NodeContext,
|
|
223
|
-
content: &str,
|
|
224
|
-
depth: usize,
|
|
225
|
-
) -> crate::visitor::VisitResult {
|
|
226
|
-
if self
|
|
227
|
-
.request_tx
|
|
228
|
-
.send(VisitorRequest::Blockquote(ctx.clone(), content.to_string(), depth))
|
|
229
|
-
.is_err()
|
|
230
|
-
{
|
|
231
|
-
return crate::visitor::VisitResult::Continue;
|
|
232
|
-
}
|
|
233
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
fn visit_strong(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
|
|
237
|
-
if self
|
|
238
|
-
.request_tx
|
|
239
|
-
.send(VisitorRequest::Strong(ctx.clone(), text.to_string()))
|
|
240
|
-
.is_err()
|
|
241
|
-
{
|
|
242
|
-
return crate::visitor::VisitResult::Continue;
|
|
243
|
-
}
|
|
244
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
fn visit_emphasis(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
|
|
248
|
-
if self
|
|
249
|
-
.request_tx
|
|
250
|
-
.send(VisitorRequest::Emphasis(ctx.clone(), text.to_string()))
|
|
251
|
-
.is_err()
|
|
252
|
-
{
|
|
253
|
-
return crate::visitor::VisitResult::Continue;
|
|
254
|
-
}
|
|
255
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
fn visit_strikethrough(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
|
|
259
|
-
if self
|
|
260
|
-
.request_tx
|
|
261
|
-
.send(VisitorRequest::Strikethrough(ctx.clone(), text.to_string()))
|
|
262
|
-
.is_err()
|
|
263
|
-
{
|
|
264
|
-
return crate::visitor::VisitResult::Continue;
|
|
265
|
-
}
|
|
266
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
fn visit_underline(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
|
|
270
|
-
if self
|
|
271
|
-
.request_tx
|
|
272
|
-
.send(VisitorRequest::Underline(ctx.clone(), text.to_string()))
|
|
273
|
-
.is_err()
|
|
274
|
-
{
|
|
275
|
-
return crate::visitor::VisitResult::Continue;
|
|
276
|
-
}
|
|
277
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
fn visit_subscript(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
|
|
281
|
-
if self
|
|
282
|
-
.request_tx
|
|
283
|
-
.send(VisitorRequest::Subscript(ctx.clone(), text.to_string()))
|
|
284
|
-
.is_err()
|
|
285
|
-
{
|
|
286
|
-
return crate::visitor::VisitResult::Continue;
|
|
287
|
-
}
|
|
288
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
fn visit_superscript(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
|
|
292
|
-
if self
|
|
293
|
-
.request_tx
|
|
294
|
-
.send(VisitorRequest::Superscript(ctx.clone(), text.to_string()))
|
|
295
|
-
.is_err()
|
|
296
|
-
{
|
|
297
|
-
return crate::visitor::VisitResult::Continue;
|
|
298
|
-
}
|
|
299
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
fn visit_mark(&mut self, ctx: &crate::visitor::NodeContext, text: &str) -> crate::visitor::VisitResult {
|
|
303
|
-
if self
|
|
304
|
-
.request_tx
|
|
305
|
-
.send(VisitorRequest::Mark(ctx.clone(), text.to_string()))
|
|
306
|
-
.is_err()
|
|
307
|
-
{
|
|
308
|
-
return crate::visitor::VisitResult::Continue;
|
|
309
|
-
}
|
|
310
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
fn visit_horizontal_rule(&mut self, ctx: &crate::visitor::NodeContext) -> crate::visitor::VisitResult {
|
|
314
|
-
if self
|
|
315
|
-
.request_tx
|
|
316
|
-
.send(VisitorRequest::HorizontalRule(ctx.clone()))
|
|
317
|
-
.is_err()
|
|
318
|
-
{
|
|
319
|
-
return crate::visitor::VisitResult::Continue;
|
|
320
|
-
}
|
|
321
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
fn visit_custom_element(
|
|
325
|
-
&mut self,
|
|
326
|
-
ctx: &crate::visitor::NodeContext,
|
|
327
|
-
tag_name: &str,
|
|
328
|
-
html: &str,
|
|
329
|
-
) -> crate::visitor::VisitResult {
|
|
330
|
-
if self
|
|
331
|
-
.request_tx
|
|
332
|
-
.send(VisitorRequest::CustomElement(
|
|
333
|
-
ctx.clone(),
|
|
334
|
-
tag_name.to_string(),
|
|
335
|
-
html.to_string(),
|
|
336
|
-
))
|
|
337
|
-
.is_err()
|
|
338
|
-
{
|
|
339
|
-
return crate::visitor::VisitResult::Continue;
|
|
340
|
-
}
|
|
341
|
-
self.response_rx.recv().unwrap_or(crate::visitor::VisitResult::Continue)
|
|
342
|
-
}
|
|
343
|
-
}
|
|
@@ -1,217 +0,0 @@
|
|
|
1
|
-
//! Async visitor dispatch function and helper macros.
|
|
2
|
-
//!
|
|
3
|
-
//! This module provides:
|
|
4
|
-
//! - `dispatch_async_visitor` function for async visitor callback dispatch
|
|
5
|
-
//! - `try_async_visitor!` macro for common visitor patterns
|
|
6
|
-
//! - `try_async_visitor_element_start!` macro for element start callbacks
|
|
7
|
-
//! - `try_async_visitor_element_end!` macro for element end callbacks
|
|
8
|
-
|
|
9
|
-
#[cfg(feature = "async-visitor")]
|
|
10
|
-
use super::super::content::VisitorDispatch;
|
|
11
|
-
#[cfg(feature = "async-visitor")]
|
|
12
|
-
use super::AsyncVisitorHandle;
|
|
13
|
-
#[cfg(feature = "async-visitor")]
|
|
14
|
-
use crate::error::{ConversionError, Result};
|
|
15
|
-
#[cfg(feature = "async-visitor")]
|
|
16
|
-
use crate::visitor::AsyncHtmlVisitor;
|
|
17
|
-
|
|
18
|
-
/// Dispatch an async visitor callback and handle the result.
|
|
19
|
-
///
|
|
20
|
-
/// This is the async version of `dispatch_visitor`, supporting async visitor implementations.
|
|
21
|
-
/// It safely handles the optional visitor, calls the callback function, and translates the
|
|
22
|
-
/// `VisitResult` into concrete control flow decisions.
|
|
23
|
-
///
|
|
24
|
-
/// # Type Parameters
|
|
25
|
-
///
|
|
26
|
-
/// - `F`: Async visitor callback function type
|
|
27
|
-
///
|
|
28
|
-
/// # Parameters
|
|
29
|
-
///
|
|
30
|
-
/// - `visitor`: Optional async visitor (wrapped in Rc<`RefCell`<>>)
|
|
31
|
-
/// - `callback`: Async closure that invokes the appropriate async visitor method
|
|
32
|
-
///
|
|
33
|
-
/// # Returns
|
|
34
|
-
///
|
|
35
|
-
/// - `Ok(VisitorDispatch::Custom(String))`: Custom markdown output from `VisitResult::Custom`
|
|
36
|
-
/// - `Ok(VisitorDispatch::Continue)`: Continue with default behavior (`VisitResult::Continue`)
|
|
37
|
-
/// - `Err(ConversionError)`: Stop conversion with error (`VisitResult::Error`)
|
|
38
|
-
///
|
|
39
|
-
/// # Errors
|
|
40
|
-
///
|
|
41
|
-
/// - If the visitor returns `VisitResult::Error`, this is converted to `Error::Visitor`
|
|
42
|
-
/// - `RefCell` borrow failures panic (should never happen with correct usage)
|
|
43
|
-
///
|
|
44
|
-
/// # Performance
|
|
45
|
-
///
|
|
46
|
-
/// - Zero-cost when visitor is None (common case)
|
|
47
|
-
/// - Single dynamic dispatch when visitor is present
|
|
48
|
-
/// - No allocations except for error messages
|
|
49
|
-
///
|
|
50
|
-
/// # Examples
|
|
51
|
-
///
|
|
52
|
-
/// ```ignore
|
|
53
|
-
/// let result = dispatch_async_visitor(
|
|
54
|
-
/// &visitor,
|
|
55
|
-
/// |v| Box::pin(v.visit_heading(&ctx, level, text, id)),
|
|
56
|
-
/// ).await?;
|
|
57
|
-
///
|
|
58
|
-
/// match result {
|
|
59
|
-
/// VisitorDispatch::Custom(output) => return Ok(output),
|
|
60
|
-
/// VisitorDispatch::Continue => { /* proceed with default conversion */ }
|
|
61
|
-
/// _ => {}
|
|
62
|
-
/// }
|
|
63
|
-
/// ```
|
|
64
|
-
#[cfg(feature = "async-visitor")]
|
|
65
|
-
#[allow(dead_code, clippy::future_not_send)]
|
|
66
|
-
#[inline]
|
|
67
|
-
pub async fn dispatch_async_visitor<F, Fut>(
|
|
68
|
-
visitor: &Option<AsyncVisitorHandle>,
|
|
69
|
-
callback: F,
|
|
70
|
-
) -> Result<VisitorDispatch>
|
|
71
|
-
where
|
|
72
|
-
F: FnOnce(&mut dyn AsyncHtmlVisitor) -> Fut,
|
|
73
|
-
Fut: std::future::Future<Output = crate::visitor::VisitResult>,
|
|
74
|
-
{
|
|
75
|
-
let Some(visitor_mutex) = visitor else {
|
|
76
|
-
return Ok(VisitorDispatch::Continue);
|
|
77
|
-
};
|
|
78
|
-
|
|
79
|
-
let future = {
|
|
80
|
-
let mut visitor_ref = visitor_mutex.lock().await;
|
|
81
|
-
callback(&mut *visitor_ref)
|
|
82
|
-
};
|
|
83
|
-
|
|
84
|
-
let result = future.await;
|
|
85
|
-
|
|
86
|
-
match result {
|
|
87
|
-
crate::visitor::VisitResult::Continue => Ok(VisitorDispatch::Continue),
|
|
88
|
-
crate::visitor::VisitResult::Custom(output) => Ok(VisitorDispatch::Custom(output)),
|
|
89
|
-
crate::visitor::VisitResult::Skip => Ok(VisitorDispatch::Skip),
|
|
90
|
-
crate::visitor::VisitResult::PreserveHtml => Ok(VisitorDispatch::PreserveHtml),
|
|
91
|
-
crate::visitor::VisitResult::Error(msg) => Err(ConversionError::Visitor(msg)),
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
/// Macro to reduce boilerplate when calling async visitor methods.
|
|
96
|
-
///
|
|
97
|
-
/// This macro wraps the common pattern of:
|
|
98
|
-
/// 1. Check if visitor is present
|
|
99
|
-
/// 2. Call async visitor method (awaiting the result)
|
|
100
|
-
/// 3. Handle early return for Custom/Skip/PreserveHtml/Error
|
|
101
|
-
/// 4. Continue with default behavior if visitor returns Continue
|
|
102
|
-
///
|
|
103
|
-
/// # Syntax
|
|
104
|
-
///
|
|
105
|
-
/// ```ignore
|
|
106
|
-
/// try_async_visitor!(visitor_option, method_name, ctx, arg1, arg2, ...).await?;
|
|
107
|
-
/// ```
|
|
108
|
-
///
|
|
109
|
-
/// # Returns
|
|
110
|
-
///
|
|
111
|
-
/// - Returns early with custom output if visitor returns Custom/Skip/PreserveHtml
|
|
112
|
-
/// - Returns early with Err if visitor returns Error
|
|
113
|
-
/// - Continues execution if visitor returns Continue or is None
|
|
114
|
-
///
|
|
115
|
-
/// # Examples
|
|
116
|
-
///
|
|
117
|
-
/// ```ignore
|
|
118
|
-
/// // Before (verbose):
|
|
119
|
-
/// let dispatch = dispatch_async_visitor(&visitor, |v| {
|
|
120
|
-
/// Box::pin(v.visit_heading(&ctx, level, text, id))
|
|
121
|
-
/// }).await?;
|
|
122
|
-
/// match dispatch {
|
|
123
|
-
/// VisitorDispatch::Custom(output) => return Ok(output),
|
|
124
|
-
/// VisitorDispatch::Skip => return Ok(String::new()),
|
|
125
|
-
/// VisitorDispatch::PreserveHtml => return Ok(preserve_html_output),
|
|
126
|
-
/// VisitorDispatch::Continue => { /* proceed */ }
|
|
127
|
-
/// }
|
|
128
|
-
///
|
|
129
|
-
/// // After (concise):
|
|
130
|
-
/// try_async_visitor!(visitor, visit_heading, &ctx, level, text, id).await?;
|
|
131
|
-
/// // Default conversion logic continues here...
|
|
132
|
-
/// ```
|
|
133
|
-
#[cfg(feature = "async-visitor")]
|
|
134
|
-
#[macro_export]
|
|
135
|
-
macro_rules! try_async_visitor {
|
|
136
|
-
($visitor:expr, $method:ident, $ctx:expr $(, $arg:expr)*) => {{
|
|
137
|
-
let dispatch = $crate::visitor_helpers::dispatch_async_visitor(
|
|
138
|
-
$visitor,
|
|
139
|
-
|v| Box::pin(v.$method($ctx $(, $arg)*)),
|
|
140
|
-
).await?;
|
|
141
|
-
|
|
142
|
-
match dispatch {
|
|
143
|
-
$crate::visitor_helpers::VisitorDispatch::Continue => {
|
|
144
|
-
}
|
|
145
|
-
$crate::visitor_helpers::VisitorDispatch::Custom(output) => {
|
|
146
|
-
return Ok(output);
|
|
147
|
-
}
|
|
148
|
-
$crate::visitor_helpers::VisitorDispatch::Skip => {
|
|
149
|
-
return Ok(String::new());
|
|
150
|
-
}
|
|
151
|
-
$crate::visitor_helpers::VisitorDispatch::PreserveHtml => {
|
|
152
|
-
// TODO: Implement HTML preservation logic
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}};
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
/// Convenience macro for async `element_start` visitor calls with early return.
|
|
159
|
-
///
|
|
160
|
-
/// This is the async version of `try_visitor_element_start!` macro.
|
|
161
|
-
/// It handles the common pattern of calling `visit_element_start` at the beginning
|
|
162
|
-
/// of element processing.
|
|
163
|
-
///
|
|
164
|
-
/// # Syntax
|
|
165
|
-
///
|
|
166
|
-
/// ```ignore
|
|
167
|
-
/// try_async_visitor_element_start!(visitor_option, ctx).await?;
|
|
168
|
-
/// ```
|
|
169
|
-
///
|
|
170
|
-
/// # Examples
|
|
171
|
-
///
|
|
172
|
-
/// ```ignore
|
|
173
|
-
/// async fn process_heading(...) -> Result<String> {
|
|
174
|
-
/// let ctx = build_node_context(...);
|
|
175
|
-
/// try_async_visitor_element_start!(visitor, &ctx).await?;
|
|
176
|
-
///
|
|
177
|
-
/// // Default heading processing continues here...
|
|
178
|
-
/// }
|
|
179
|
-
/// ```
|
|
180
|
-
#[cfg(feature = "async-visitor")]
|
|
181
|
-
#[macro_export]
|
|
182
|
-
macro_rules! try_async_visitor_element_start {
|
|
183
|
-
($visitor:expr, $ctx:expr) => {{
|
|
184
|
-
$crate::try_async_visitor!($visitor, visit_element_start, $ctx);
|
|
185
|
-
}};
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
/// Convenience macro for async `element_end` visitor calls with output inspection.
|
|
189
|
-
///
|
|
190
|
-
/// This is the async version of `try_visitor_element_end!` macro.
|
|
191
|
-
/// It handles the common pattern of calling `visit_element_end` after generating
|
|
192
|
-
/// default markdown output.
|
|
193
|
-
///
|
|
194
|
-
/// # Syntax
|
|
195
|
-
///
|
|
196
|
-
/// ```ignore
|
|
197
|
-
/// try_async_visitor_element_end!(visitor_option, ctx, default_output_string).await?;
|
|
198
|
-
/// ```
|
|
199
|
-
///
|
|
200
|
-
/// # Examples
|
|
201
|
-
///
|
|
202
|
-
/// ```ignore
|
|
203
|
-
/// async fn process_heading(...) -> Result<String> {
|
|
204
|
-
/// let ctx = build_node_context(...);
|
|
205
|
-
/// let mut output = String::from("# Heading");
|
|
206
|
-
///
|
|
207
|
-
/// try_async_visitor_element_end!(visitor, &ctx, &output).await?;
|
|
208
|
-
/// Ok(output)
|
|
209
|
-
/// }
|
|
210
|
-
/// ```
|
|
211
|
-
#[cfg(feature = "async-visitor")]
|
|
212
|
-
#[macro_export]
|
|
213
|
-
macro_rules! try_async_visitor_element_end {
|
|
214
|
-
($visitor:expr, $ctx:expr, $output:expr) => {{
|
|
215
|
-
$crate::try_async_visitor!($visitor, visit_element_end, $ctx, $output);
|
|
216
|
-
}};
|
|
217
|
-
}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
//! Integration tests for async visitor functionality
|
|
2
|
-
//! Tests that async visitors work correctly with `current_thread` runtime
|
|
3
|
-
|
|
4
|
-
#![cfg(feature = "async-visitor")]
|
|
5
|
-
|
|
6
|
-
use async_trait::async_trait;
|
|
7
|
-
use html_to_markdown_rs::visitor::{AsyncHtmlVisitor, NodeContext, VisitResult};
|
|
8
|
-
use std::cell::RefCell;
|
|
9
|
-
use std::rc::Rc;
|
|
10
|
-
|
|
11
|
-
#[derive(Debug)]
|
|
12
|
-
struct CustomOutputVisitor;
|
|
13
|
-
|
|
14
|
-
#[async_trait]
|
|
15
|
-
impl AsyncHtmlVisitor for CustomOutputVisitor {
|
|
16
|
-
async fn visit_heading(&mut self, _ctx: &NodeContext, level: u32, text: &str, _id: Option<&str>) -> VisitResult {
|
|
17
|
-
// Return custom output for headings
|
|
18
|
-
VisitResult::Custom(format!("[HEADING-{level}] {text}\n\n"))
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
async fn visit_text(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
|
|
22
|
-
VisitResult::Continue
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
// Manual test function for current_thread runtime
|
|
27
|
-
// Note: We don't use #[tokio::test] here because it requires the macros feature
|
|
28
|
-
// Instead, this test demonstrates that the code compiles and can be called
|
|
29
|
-
#[test]
|
|
30
|
-
fn test_async_visitor_signature_compatibility() {
|
|
31
|
-
// This test verifies that the AsyncToSyncVisitorBridge properly compiles
|
|
32
|
-
// and can wrap async visitors for use in sync contexts.
|
|
33
|
-
// The actual functionality is tested in integration tests or manually.
|
|
34
|
-
|
|
35
|
-
// Create a visitor
|
|
36
|
-
let visitor = CustomOutputVisitor;
|
|
37
|
-
let _visitor_handle: Rc<RefCell<dyn AsyncHtmlVisitor>> = Rc::new(RefCell::new(visitor));
|
|
38
|
-
|
|
39
|
-
// This test just verifies compilation
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
#[derive(Debug)]
|
|
43
|
-
struct SkipImagesVisitor;
|
|
44
|
-
|
|
45
|
-
#[async_trait]
|
|
46
|
-
impl AsyncHtmlVisitor for SkipImagesVisitor {
|
|
47
|
-
async fn visit_image(&mut self, _ctx: &NodeContext, _src: &str, _alt: &str, _title: Option<&str>) -> VisitResult {
|
|
48
|
-
VisitResult::Skip
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
#[test]
|
|
53
|
-
fn test_skip_images_visitor_compiles() {
|
|
54
|
-
// Verify the SkipImagesVisitor compiles correctly
|
|
55
|
-
let visitor = SkipImagesVisitor;
|
|
56
|
-
let _visitor_handle: Rc<RefCell<dyn AsyncHtmlVisitor>> = Rc::new(RefCell::new(visitor));
|
|
57
|
-
}
|