html-to-markdown 2.30.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -19
- data/README.md +37 -50
- data/ext/html-to-markdown-rb/native/Cargo.lock +13 -701
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
- data/ext/html-to-markdown-rb/native/README.md +4 -13
- data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
- data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
- data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
- data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
- data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown.rb +13 -194
- data/sig/html_to_markdown.rbs +12 -373
- data/vendor/Cargo.toml +6 -3
- data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
- data/vendor/html-to-markdown-rs/README.md +126 -52
- data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
- data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
- data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
- data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
- data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
- data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
- data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
- data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
- data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
- data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
- data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
- data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
- data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
- data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
- data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
- data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
- data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
- data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
- data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
- data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
- data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
- data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
- data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
- data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -68
- data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
- data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
- data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
- data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
- data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
- data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
- data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
- data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
- data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
- data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
- data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
- data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
- data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
- data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
- data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
- data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
- data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
- data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
- data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
- data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
- data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
- data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
- data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -323
- data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
- data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
- data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
- data/vendor/html-to-markdown-rs/src/text.rs +25 -14
- data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
- data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
- data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
- data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
- data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
- data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
- data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
- data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
- data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
- data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
- data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
- data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
- data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
- data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
- data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
- data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
- data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
- data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
- data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
- data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
- data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
- metadata +9 -37
- data/bin/benchmark.rb +0 -232
- data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
- data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
- data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
- data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
- data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
- data/spec/convert_spec.rb +0 -77
- data/spec/convert_with_tables_spec.rb +0 -194
- data/spec/metadata_extraction_spec.rb +0 -437
- data/spec/visitor_issue_187_spec.rb +0 -605
- data/spec/visitor_spec.rb +0 -1149
- data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
- data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
- data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
- data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
- data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
- data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
- data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
- data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
- data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
- data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -42
- data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
- data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
- data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
- data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
- data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
- data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
- data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
- data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
- data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
- data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
- data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
//! - Spacing management for various contexts
|
|
8
8
|
//! - Visitor callbacks for custom blockquote processing
|
|
9
9
|
|
|
10
|
+
#[cfg(feature = "visitor")]
|
|
11
|
+
use crate::converter::utility::content::collect_tag_attributes;
|
|
10
12
|
use crate::options::ConversionOptions;
|
|
11
13
|
#[allow(unused_imports)]
|
|
12
14
|
use std::collections::BTreeMap;
|
|
@@ -88,11 +90,7 @@ pub(crate) fn handle(
|
|
|
88
90
|
|
|
89
91
|
if let Some(node) = node_handle.get(parser) {
|
|
90
92
|
if let tl::Node::Tag(tag) = node {
|
|
91
|
-
let attributes: BTreeMap<String, String> = tag
|
|
92
|
-
.attributes()
|
|
93
|
-
.iter()
|
|
94
|
-
.filter_map(|(k, v)| v.as_ref().map(|val| (k.to_string(), val.to_string())))
|
|
95
|
-
.collect();
|
|
93
|
+
let attributes: BTreeMap<String, String> = collect_tag_attributes(tag);
|
|
96
94
|
|
|
97
95
|
let node_id = node_handle.get_inner();
|
|
98
96
|
let parent_tag = dom_ctx.parent_tag_name(node_id, parser);
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
//! - List continuations: Uses list indentation
|
|
7
7
|
//! - Block context: Adds surrounding newlines for proper block separation
|
|
8
8
|
|
|
9
|
+
use crate::converter::main_helpers::trim_trailing_whitespace;
|
|
9
10
|
use crate::options::ConversionOptions;
|
|
10
11
|
use tl::{NodeHandle, Parser};
|
|
11
12
|
|
|
@@ -131,13 +132,6 @@ pub(crate) fn handle(
|
|
|
131
132
|
}
|
|
132
133
|
}
|
|
133
134
|
|
|
134
|
-
/// Helper function to trim trailing whitespace
|
|
135
|
-
fn trim_trailing_whitespace(output: &mut String) {
|
|
136
|
-
while output.ends_with(' ') || output.ends_with('\t') {
|
|
137
|
-
output.pop();
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
135
|
/// Helper function to add list continuation indentation
|
|
142
136
|
fn add_list_continuation_indent(
|
|
143
137
|
output: &mut String,
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
//! - Metadata collection (headers, IDs)
|
|
7
7
|
//! - Visitor callbacks for custom heading processing
|
|
8
8
|
|
|
9
|
+
#[cfg(feature = "visitor")]
|
|
10
|
+
use crate::converter::utility::content::collect_tag_attributes;
|
|
9
11
|
use crate::options::{ConversionOptions, HeadingStyle};
|
|
10
12
|
use std::borrow::Cow;
|
|
11
13
|
#[allow(unused_imports)]
|
|
@@ -124,6 +126,21 @@ pub(crate) fn handle(
|
|
|
124
126
|
}
|
|
125
127
|
}
|
|
126
128
|
}
|
|
129
|
+
|
|
130
|
+
// Notify the structure collector if present.
|
|
131
|
+
if let Some(ref sc) = ctx.structure_collector {
|
|
132
|
+
if let Some(node) = node_handle.get(parser) {
|
|
133
|
+
if let tl::Node::Tag(tag) = node {
|
|
134
|
+
let id = tag
|
|
135
|
+
.attributes()
|
|
136
|
+
.get("id")
|
|
137
|
+
.flatten()
|
|
138
|
+
.map(|v| v.as_utf8_str().to_string());
|
|
139
|
+
sc.borrow_mut()
|
|
140
|
+
.push_heading(level as u8, normalized.as_ref(), id.as_deref());
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
127
144
|
}
|
|
128
145
|
}
|
|
129
146
|
|
|
@@ -292,11 +309,7 @@ fn visitor_heading_output(
|
|
|
292
309
|
.flatten()
|
|
293
310
|
.map(|v| v.as_utf8_str().to_string());
|
|
294
311
|
|
|
295
|
-
let attributes: BTreeMap<String, String> = tag
|
|
296
|
-
.attributes()
|
|
297
|
-
.iter()
|
|
298
|
-
.filter_map(|(k, v)| v.as_ref().map(|val| (k.to_string(), val.to_string())))
|
|
299
|
-
.collect();
|
|
312
|
+
let attributes: BTreeMap<String, String> = collect_tag_attributes(tag);
|
|
300
313
|
|
|
301
314
|
let node_id = node_handle.get_inner();
|
|
302
315
|
let parent_tag = dom_ctx.parent_tag_name(node_id, parser);
|
|
@@ -96,6 +96,16 @@ pub(crate) fn handle(
|
|
|
96
96
|
if has_content && !ctx.convert_as_inline && !ctx.in_table_cell {
|
|
97
97
|
output.push_str("\n\n");
|
|
98
98
|
}
|
|
99
|
+
|
|
100
|
+
// Notify the structure collector if present and we produced non-empty top-level paragraph content.
|
|
101
|
+
if has_content && !ctx.in_table_cell && !ctx.in_list_item && !ctx.convert_as_inline {
|
|
102
|
+
if let Some(ref sc) = ctx.structure_collector {
|
|
103
|
+
let text = output[content_start_pos..].trim().to_string();
|
|
104
|
+
if !text.is_empty() {
|
|
105
|
+
sc.borrow_mut().push_paragraph(&text);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
99
109
|
}
|
|
100
110
|
|
|
101
111
|
/// Add continuation indentation for list items.
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
//! - Inline code formatting with backtick management
|
|
8
8
|
//! - Visitor callbacks for custom code processing
|
|
9
9
|
|
|
10
|
+
#[cfg(feature = "visitor")]
|
|
11
|
+
use crate::converter::utility::content::collect_tag_attributes;
|
|
10
12
|
use crate::options::{CodeBlockStyle, ConversionOptions, WhitespaceMode};
|
|
11
13
|
#[allow(unused_imports)]
|
|
12
14
|
use std::collections::BTreeMap;
|
|
@@ -93,11 +95,7 @@ pub(crate) fn handle_pre(
|
|
|
93
95
|
|
|
94
96
|
if let Some(node) = node_handle.get(parser) {
|
|
95
97
|
if let tl::Node::Tag(tag) = node {
|
|
96
|
-
let attributes: BTreeMap<String, String> = tag
|
|
97
|
-
.attributes()
|
|
98
|
-
.iter()
|
|
99
|
-
.filter_map(|(k, v)| v.as_ref().map(|val| (k.to_string(), val.to_string())))
|
|
100
|
-
.collect();
|
|
98
|
+
let attributes: BTreeMap<String, String> = collect_tag_attributes(tag);
|
|
101
99
|
|
|
102
100
|
let node_id = node_handle.get_inner();
|
|
103
101
|
let parent_tag = dom_ctx.parent_tag_name(node_id, parser);
|
|
@@ -9,6 +9,8 @@ use super::cell::{collect_table_cells, get_colspan};
|
|
|
9
9
|
use super::cells::{append_layout_row, convert_table_row};
|
|
10
10
|
use super::scanner::scan_table;
|
|
11
11
|
use super::utils::{is_tag_name, normalized_tag_name};
|
|
12
|
+
#[cfg(feature = "visitor")]
|
|
13
|
+
use crate::converter::utility::content::collect_tag_attributes;
|
|
12
14
|
|
|
13
15
|
/// Maximum allowed table columns to prevent unbounded memory usage.
|
|
14
16
|
const MAX_TABLE_COLS: usize = 1000;
|
|
@@ -106,11 +108,7 @@ pub fn handle_table(
|
|
|
106
108
|
use crate::visitor::{NodeContext, NodeType, VisitResult};
|
|
107
109
|
use std::collections::BTreeMap;
|
|
108
110
|
|
|
109
|
-
let attributes: BTreeMap<String, String> = tag
|
|
110
|
-
.attributes()
|
|
111
|
-
.iter()
|
|
112
|
-
.filter_map(|(k, v)| v.as_ref().map(|val| (k.to_string(), val.to_string())))
|
|
113
|
-
.collect();
|
|
111
|
+
let attributes: BTreeMap<String, String> = collect_tag_attributes(tag);
|
|
114
112
|
|
|
115
113
|
let node_id = node_handle.get_inner();
|
|
116
114
|
let parent_tag = dom_ctx.parent_tag_name(node_id, parser);
|
|
@@ -160,7 +158,7 @@ pub fn handle_table(
|
|
|
160
158
|
.get("border")
|
|
161
159
|
.is_some_and(|v| v.as_ref().is_some_and(|b| b.as_utf8_str() == "0"));
|
|
162
160
|
let looks_like_layout =
|
|
163
|
-
table_scan.
|
|
161
|
+
table_scan.nested_table_count > 1 || distinct_counts.len() > 1 || (table_scan.has_span && has_border_zero);
|
|
164
162
|
let link_count = table_scan.link_count;
|
|
165
163
|
let is_blank_table = !table_scan.has_text;
|
|
166
164
|
|
|
@@ -343,11 +341,7 @@ pub fn handle_table(
|
|
|
343
341
|
use crate::visitor::{NodeContext, NodeType, VisitResult};
|
|
344
342
|
use std::collections::BTreeMap;
|
|
345
343
|
|
|
346
|
-
let attributes: BTreeMap<String, String> = tag
|
|
347
|
-
.attributes()
|
|
348
|
-
.iter()
|
|
349
|
-
.filter_map(|(k, v)| v.as_ref().map(|val| (k.to_string(), val.to_string())))
|
|
350
|
-
.collect();
|
|
344
|
+
let attributes: BTreeMap<String, String> = collect_tag_attributes(tag);
|
|
351
345
|
|
|
352
346
|
let node_id = node_handle.get_inner();
|
|
353
347
|
let parent_tag = dom_ctx.parent_tag_name(node_id, parser);
|
|
@@ -400,3 +394,14 @@ pub fn handle_table(
|
|
|
400
394
|
}
|
|
401
395
|
}
|
|
402
396
|
}
|
|
397
|
+
|
|
398
|
+
#[cfg(test)]
|
|
399
|
+
mod tests {
|
|
400
|
+
#[test]
|
|
401
|
+
fn single_nested_table_stays_as_table() {
|
|
402
|
+
let html = r"<table><tr><td>Label</td><td><table><tr><td>A</td><td>B</td></tr></table></td></tr></table>";
|
|
403
|
+
let result = crate::convert(html, None).unwrap();
|
|
404
|
+
let content = result.content.unwrap_or_default();
|
|
405
|
+
assert!(content.contains('|'), "should produce pipe table, not list");
|
|
406
|
+
}
|
|
407
|
+
}
|
|
@@ -179,3 +179,23 @@ pub fn convert_table_cell(
|
|
|
179
179
|
output.push_str(" |");
|
|
180
180
|
}
|
|
181
181
|
}
|
|
182
|
+
|
|
183
|
+
#[cfg(test)]
|
|
184
|
+
mod tests {
|
|
185
|
+
#[test]
|
|
186
|
+
fn rich_formatting_preserved_in_cells() {
|
|
187
|
+
let html = "<table><tr><th>H</th></tr><tr><td><strong>Bold</strong> and <em>italic</em></td></tr></table>";
|
|
188
|
+
let result = crate::convert(html, None).unwrap();
|
|
189
|
+
let content = result.content.unwrap_or_default();
|
|
190
|
+
assert!(
|
|
191
|
+
content.contains("**Bold**") || content.contains("__Bold__"),
|
|
192
|
+
"bold should be preserved: {}",
|
|
193
|
+
content
|
|
194
|
+
);
|
|
195
|
+
assert!(
|
|
196
|
+
content.contains("*italic*") || content.contains("_italic_"),
|
|
197
|
+
"italic should be preserved: {}",
|
|
198
|
+
content
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
@@ -5,6 +5,9 @@
|
|
|
5
5
|
//! - Cell layout handling with colspan/rowspan support
|
|
6
6
|
//! - Layout table row conversion to list items
|
|
7
7
|
|
|
8
|
+
#[cfg(feature = "visitor")]
|
|
9
|
+
use crate::converter::utility::content::collect_tag_attributes;
|
|
10
|
+
use crate::converter::utility::content::normalized_tag_name;
|
|
8
11
|
use std::borrow::Cow;
|
|
9
12
|
|
|
10
13
|
use super::cell::{collect_table_cells, convert_table_cell, get_colspan_rowspan};
|
|
@@ -84,18 +87,6 @@ pub fn append_layout_row(
|
|
|
84
87
|
}
|
|
85
88
|
}
|
|
86
89
|
|
|
87
|
-
/// Normalize HTML tag names to lowercase.
|
|
88
|
-
///
|
|
89
|
-
/// Converts tag names to a consistent lowercase form for comparison.
|
|
90
|
-
fn normalized_tag_name(raw: Cow<'_, str>) -> Cow<'_, str> {
|
|
91
|
-
let lowercased = raw.to_lowercase();
|
|
92
|
-
if lowercased.as_str() == raw.as_ref() {
|
|
93
|
-
raw
|
|
94
|
-
} else {
|
|
95
|
-
Cow::Owned(lowercased)
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
90
|
/// Convert a table row (tr) to Markdown format.
|
|
100
91
|
///
|
|
101
92
|
/// Processes all cells in a row, handling colspan and rowspan for proper
|
|
@@ -167,11 +158,7 @@ pub fn convert_table_row(
|
|
|
167
158
|
use std::collections::BTreeMap;
|
|
168
159
|
|
|
169
160
|
if let Some(tl::Node::Tag(tag)) = node_handle.get(parser) {
|
|
170
|
-
let attributes: BTreeMap<String, String> = tag
|
|
171
|
-
.attributes()
|
|
172
|
-
.iter()
|
|
173
|
-
.filter_map(|(k, v)| v.as_ref().map(|val| (k.to_string(), val.to_string())))
|
|
174
|
-
.collect();
|
|
161
|
+
let attributes: BTreeMap<String, String> = collect_tag_attributes(tag);
|
|
175
162
|
|
|
176
163
|
let node_ctx = NodeContext {
|
|
177
164
|
node_type: NodeType::TableRow,
|
|
@@ -96,6 +96,13 @@ pub(crate) fn handle_table_with_context(
|
|
|
96
96
|
let mut table_output = String::new();
|
|
97
97
|
builder::handle_table(node_handle, parser, &mut table_output, options, ctx, dom_ctx, depth);
|
|
98
98
|
|
|
99
|
+
// Feed the table into the structure collector when document structure extraction is enabled.
|
|
100
|
+
if let Some(ref sc) = ctx.structure_collector {
|
|
101
|
+
if let Some(grid) = collect_table_grid(node_handle, parser, options, ctx, dom_ctx) {
|
|
102
|
+
sc.borrow_mut().push_table(grid);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
99
106
|
if ctx.in_list_item {
|
|
100
107
|
let has_caption = table_output.starts_with('*');
|
|
101
108
|
|
|
@@ -124,3 +131,136 @@ pub(crate) fn handle_table_with_context(
|
|
|
124
131
|
output.push('\n');
|
|
125
132
|
}
|
|
126
133
|
}
|
|
134
|
+
|
|
135
|
+
/// Collect a [`crate::types::TableGrid`] from the DOM for the structure collector.
|
|
136
|
+
///
|
|
137
|
+
/// Walks the table's rows and cells, extracting text content and span attributes
|
|
138
|
+
/// to build a structured grid representation.
|
|
139
|
+
fn collect_table_grid(
|
|
140
|
+
node_handle: &tl::NodeHandle,
|
|
141
|
+
parser: &tl::Parser,
|
|
142
|
+
options: &crate::options::ConversionOptions,
|
|
143
|
+
ctx: &super::super::Context,
|
|
144
|
+
dom_ctx: &super::super::DomContext,
|
|
145
|
+
) -> Option<crate::types::TableGrid> {
|
|
146
|
+
use utils::{is_tag_name, normalized_tag_name};
|
|
147
|
+
|
|
148
|
+
let tl::Node::Tag(tag) = node_handle.get(parser)? else {
|
|
149
|
+
return None;
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
let mut grid_cells = Vec::new();
|
|
153
|
+
let mut row_index: u32 = 0;
|
|
154
|
+
let mut max_cols: u32 = 0;
|
|
155
|
+
let mut cell_handles = Vec::new();
|
|
156
|
+
|
|
157
|
+
let children = tag.children();
|
|
158
|
+
for child_handle in children.top().iter() {
|
|
159
|
+
if let Some(tl::Node::Tag(child_tag)) = child_handle.get(parser) {
|
|
160
|
+
let tag_name = normalized_tag_name(child_tag.name().as_utf8_str());
|
|
161
|
+
match tag_name.as_ref() {
|
|
162
|
+
"thead" | "tbody" | "tfoot" => {
|
|
163
|
+
let is_header_section = tag_name.as_ref() == "thead";
|
|
164
|
+
for row_handle in child_tag.children().top().iter() {
|
|
165
|
+
if is_tag_name(row_handle, parser, dom_ctx, "tr") {
|
|
166
|
+
collect_grid_row(
|
|
167
|
+
row_handle,
|
|
168
|
+
parser,
|
|
169
|
+
options,
|
|
170
|
+
ctx,
|
|
171
|
+
dom_ctx,
|
|
172
|
+
&mut cell_handles,
|
|
173
|
+
&mut grid_cells,
|
|
174
|
+
&mut row_index,
|
|
175
|
+
&mut max_cols,
|
|
176
|
+
is_header_section,
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
"tr" | "row" => {
|
|
182
|
+
let is_first = row_index == 0;
|
|
183
|
+
collect_grid_row(
|
|
184
|
+
child_handle,
|
|
185
|
+
parser,
|
|
186
|
+
options,
|
|
187
|
+
ctx,
|
|
188
|
+
dom_ctx,
|
|
189
|
+
&mut cell_handles,
|
|
190
|
+
&mut grid_cells,
|
|
191
|
+
&mut row_index,
|
|
192
|
+
&mut max_cols,
|
|
193
|
+
is_first,
|
|
194
|
+
);
|
|
195
|
+
}
|
|
196
|
+
_ => {}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if row_index == 0 {
|
|
202
|
+
return None;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
Some(crate::types::TableGrid {
|
|
206
|
+
rows: row_index,
|
|
207
|
+
cols: max_cols,
|
|
208
|
+
cells: grid_cells,
|
|
209
|
+
})
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/// Process a single table row for grid collection.
|
|
213
|
+
#[allow(clippy::too_many_arguments)]
|
|
214
|
+
fn collect_grid_row(
|
|
215
|
+
row_handle: &tl::NodeHandle,
|
|
216
|
+
parser: &tl::Parser,
|
|
217
|
+
options: &crate::options::ConversionOptions,
|
|
218
|
+
ctx: &super::super::Context,
|
|
219
|
+
dom_ctx: &super::super::DomContext,
|
|
220
|
+
cell_handles: &mut Vec<tl::NodeHandle>,
|
|
221
|
+
grid_cells: &mut Vec<crate::types::GridCell>,
|
|
222
|
+
row_index: &mut u32,
|
|
223
|
+
max_cols: &mut u32,
|
|
224
|
+
is_header_section: bool,
|
|
225
|
+
) {
|
|
226
|
+
use cell::{collect_table_cells, get_colspan_rowspan};
|
|
227
|
+
|
|
228
|
+
collect_table_cells(row_handle, parser, dom_ctx, cell_handles);
|
|
229
|
+
|
|
230
|
+
let mut col_index: u32 = 0;
|
|
231
|
+
for cell_handle in cell_handles.iter() {
|
|
232
|
+
let is_header = is_header_section
|
|
233
|
+
|| dom_ctx
|
|
234
|
+
.tag_name_for(*cell_handle, parser)
|
|
235
|
+
.is_some_and(|name| name.as_ref() == "th");
|
|
236
|
+
|
|
237
|
+
let mut text = String::new();
|
|
238
|
+
let cell_ctx = super::super::Context {
|
|
239
|
+
in_table_cell: true,
|
|
240
|
+
..ctx.clone()
|
|
241
|
+
};
|
|
242
|
+
if let Some(tl::Node::Tag(cell_tag)) = cell_handle.get(parser) {
|
|
243
|
+
for child_handle in cell_tag.children().top().iter() {
|
|
244
|
+
super::super::walk_node(child_handle, parser, &mut text, options, &cell_ctx, 0, dom_ctx);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
let content = crate::text::normalize_whitespace_cow(&text).trim().to_string();
|
|
248
|
+
|
|
249
|
+
let (colspan, rowspan) = get_colspan_rowspan(cell_handle, parser);
|
|
250
|
+
|
|
251
|
+
grid_cells.push(crate::types::GridCell {
|
|
252
|
+
content,
|
|
253
|
+
row: *row_index,
|
|
254
|
+
col: col_index,
|
|
255
|
+
row_span: rowspan as u32,
|
|
256
|
+
col_span: colspan as u32,
|
|
257
|
+
is_header,
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
col_index += colspan as u32;
|
|
261
|
+
}
|
|
262
|
+
if col_index > *max_cols {
|
|
263
|
+
*max_cols = col_index;
|
|
264
|
+
}
|
|
265
|
+
*row_index += 1;
|
|
266
|
+
}
|
|
@@ -3,11 +3,9 @@
|
|
|
3
3
|
//! Provides the TableScan struct and scanning functions for analyzing table structure
|
|
4
4
|
//! to determine if it should be rendered as a Markdown table or converted to list format.
|
|
5
5
|
|
|
6
|
+
use crate::converter::utility::content::normalized_tag_name;
|
|
6
7
|
use std::borrow::Cow;
|
|
7
8
|
|
|
8
|
-
/// Maximum allowed table columns to prevent unbounded memory usage.
|
|
9
|
-
const MAX_TABLE_COLS: usize = 1000;
|
|
10
|
-
|
|
11
9
|
/// Scan results for a table element.
|
|
12
10
|
///
|
|
13
11
|
/// Contains metadata about table structure to determine optimal rendering:
|
|
@@ -25,8 +23,8 @@ pub struct TableScan {
|
|
|
25
23
|
pub has_header: bool,
|
|
26
24
|
/// Whether the table has a caption element
|
|
27
25
|
pub has_caption: bool,
|
|
28
|
-
///
|
|
29
|
-
pub
|
|
26
|
+
/// Number of nested tables found inside this table
|
|
27
|
+
pub nested_table_count: usize,
|
|
30
28
|
/// Count of anchor elements in the table
|
|
31
29
|
pub link_count: usize,
|
|
32
30
|
/// Whether the table contains text content (not empty)
|
|
@@ -111,7 +109,7 @@ fn scan_table_node(
|
|
|
111
109
|
}
|
|
112
110
|
}
|
|
113
111
|
}
|
|
114
|
-
"table" if !is_root => scan.
|
|
112
|
+
"table" if !is_root => scan.nested_table_count += 1,
|
|
115
113
|
"tr" | "row" => {
|
|
116
114
|
let mut cell_count = 0;
|
|
117
115
|
for child in tag.children().top().iter() {
|
|
@@ -146,15 +144,3 @@ fn scan_table_node(
|
|
|
146
144
|
}
|
|
147
145
|
}
|
|
148
146
|
}
|
|
149
|
-
|
|
150
|
-
/// Normalize HTML tag names to lowercase.
|
|
151
|
-
///
|
|
152
|
-
/// Converts tag names to a consistent lowercase form for comparison.
|
|
153
|
-
fn normalized_tag_name(raw: Cow<'_, str>) -> Cow<'_, str> {
|
|
154
|
-
let lowercased = raw.to_lowercase();
|
|
155
|
-
if lowercased.as_str() == raw.as_ref() {
|
|
156
|
-
raw
|
|
157
|
-
} else {
|
|
158
|
-
Cow::Owned(lowercased)
|
|
159
|
-
}
|
|
160
|
-
}
|
|
@@ -2,24 +2,8 @@
|
|
|
2
2
|
//!
|
|
3
3
|
//! Provides helper functions for tag name normalization and comparison.
|
|
4
4
|
|
|
5
|
-
use
|
|
6
|
-
|
|
7
|
-
/// Normalize HTML tag names to lowercase.
|
|
8
|
-
///
|
|
9
|
-
/// Converts tag names to a consistent lowercase form for comparison.
|
|
10
|
-
pub(super) fn normalized_tag_name(raw: Cow<'_, str>) -> Cow<'_, str> {
|
|
11
|
-
let lowercased = raw.to_lowercase();
|
|
12
|
-
if lowercased.as_str() == raw.as_ref() {
|
|
13
|
-
raw
|
|
14
|
-
} else {
|
|
15
|
-
Cow::Owned(lowercased)
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
/// Check tag name equality with case-insensitive comparison.
|
|
20
|
-
pub(super) fn tag_name_eq(name: Cow<'_, str>, needle: &str) -> bool {
|
|
21
|
-
name.eq_ignore_ascii_case(needle)
|
|
22
|
-
}
|
|
5
|
+
pub(super) use crate::converter::main_helpers::tag_name_eq;
|
|
6
|
+
pub(super) use crate::converter::utility::content::normalized_tag_name;
|
|
23
7
|
|
|
24
8
|
/// Check if a node has a specific tag name.
|
|
25
9
|
///
|
|
@@ -12,6 +12,8 @@ use std::rc::Rc;
|
|
|
12
12
|
#[cfg(feature = "inline-images")]
|
|
13
13
|
use crate::inline_images::InlineImageCollector;
|
|
14
14
|
|
|
15
|
+
use crate::types::structure_collector::StructureCollectorHandle;
|
|
16
|
+
|
|
15
17
|
/// Handle type for inline image collector when feature is enabled.
|
|
16
18
|
#[cfg(feature = "inline-images")]
|
|
17
19
|
pub type InlineCollectorHandle = Rc<RefCell<InlineImageCollector>>;
|
|
@@ -99,6 +101,10 @@ pub struct Context {
|
|
|
99
101
|
#[cfg(feature = "visitor")]
|
|
100
102
|
/// Stores the first visitor error encountered during traversal.
|
|
101
103
|
pub(crate) visitor_error: Rc<RefCell<Option<String>>>,
|
|
104
|
+
/// Optional structure collector for building a [`crate::types::DocumentStructure`].
|
|
105
|
+
///
|
|
106
|
+
/// Populated when `options.include_document_structure == true`.
|
|
107
|
+
pub(crate) structure_collector: Option<StructureCollectorHandle>,
|
|
102
108
|
}
|
|
103
109
|
|
|
104
110
|
impl Context {
|
|
@@ -115,6 +121,7 @@ impl Context {
|
|
|
115
121
|
#[cfg(not(feature = "metadata"))] _metadata_collector: Option<()>,
|
|
116
122
|
#[cfg(feature = "visitor")] visitor: Option<crate::visitor::VisitorHandle>,
|
|
117
123
|
#[cfg(not(feature = "visitor"))] _visitor: Option<()>,
|
|
124
|
+
structure_collector: Option<StructureCollectorHandle>,
|
|
118
125
|
) -> Self {
|
|
119
126
|
#[cfg(feature = "metadata")]
|
|
120
127
|
let (
|
|
@@ -178,6 +185,7 @@ impl Context {
|
|
|
178
185
|
visitor: visitor.clone(),
|
|
179
186
|
#[cfg(feature = "visitor")]
|
|
180
187
|
visitor_error: Rc::new(RefCell::new(None)),
|
|
188
|
+
structure_collector,
|
|
181
189
|
}
|
|
182
190
|
}
|
|
183
191
|
}
|
|
@@ -11,8 +11,6 @@ use crate::converter::main_helpers::is_inline_element;
|
|
|
11
11
|
use crate::converter::utility::content::{is_block_level_name, normalized_tag_name};
|
|
12
12
|
use crate::text;
|
|
13
13
|
|
|
14
|
-
const TEXT_CACHE_CAPACITY: usize = 4096;
|
|
15
|
-
|
|
16
14
|
/// Cached information about an HTML tag element.
|
|
17
15
|
///
|
|
18
16
|
/// This struct stores pre-computed information about tag elements to avoid
|
|
@@ -236,11 +234,8 @@ impl DomContext {
|
|
|
236
234
|
.or_else(|| siblings.iter().position(|handle| handle.get_inner() == id))?;
|
|
237
235
|
|
|
238
236
|
for sibling in siblings.iter().skip(position + 1) {
|
|
239
|
-
if
|
|
237
|
+
if self.tag_info(sibling.get_inner(), parser).is_some() {
|
|
240
238
|
let sibling_id = sibling.get_inner();
|
|
241
|
-
if info.name == "script" || info.name == "style" {
|
|
242
|
-
return Some(sibling_id);
|
|
243
|
-
}
|
|
244
239
|
return Some(sibling_id);
|
|
245
240
|
}
|
|
246
241
|
if let Some(tl::Node::Raw(raw)) = sibling.get(parser) {
|
|
@@ -27,7 +27,7 @@ use std::borrow::Cow;
|
|
|
27
27
|
/// - **Inline mode**: Children are processed inline without block spacing
|
|
28
28
|
/// - **Block mode**: Content is collected, trimmed, and wrapped with blank lines
|
|
29
29
|
/// - **Empty content**: Skipped entirely
|
|
30
|
-
pub fn handle_form(
|
|
30
|
+
pub(crate) fn handle_form(
|
|
31
31
|
_tag_name: &str,
|
|
32
32
|
node_handle: &tl::NodeHandle,
|
|
33
33
|
parser: &tl::Parser,
|
|
@@ -82,7 +82,7 @@ pub fn handle_form(
|
|
|
82
82
|
/// - **Inline mode**: Children are processed inline without block spacing
|
|
83
83
|
/// - **Block mode**: Content is collected, trimmed, and wrapped with blank lines
|
|
84
84
|
/// - **Empty content**: Skipped entirely
|
|
85
|
-
pub fn handle_fieldset(
|
|
85
|
+
pub(crate) fn handle_fieldset(
|
|
86
86
|
_tag_name: &str,
|
|
87
87
|
node_handle: &tl::NodeHandle,
|
|
88
88
|
parser: &tl::Parser,
|
|
@@ -137,7 +137,7 @@ pub fn handle_fieldset(
|
|
|
137
137
|
/// - **Block mode**: Content is wrapped in strong markers (e.g., `**text**`)
|
|
138
138
|
/// - **Inline mode**: Content is rendered without emphasis
|
|
139
139
|
/// - Uses the configured strong/emphasis symbol from ConversionOptions
|
|
140
|
-
pub fn handle_legend(
|
|
140
|
+
pub(crate) fn handle_legend(
|
|
141
141
|
_tag_name: &str,
|
|
142
142
|
node_handle: &tl::NodeHandle,
|
|
143
143
|
parser: &tl::Parser,
|
|
@@ -198,7 +198,7 @@ pub fn handle_legend(
|
|
|
198
198
|
/// - Content is collected from children
|
|
199
199
|
/// - Non-empty content is output followed by blank lines (in block mode)
|
|
200
200
|
/// - Blank lines are suppressed in inline mode
|
|
201
|
-
pub fn handle_label(
|
|
201
|
+
pub(crate) fn handle_label(
|
|
202
202
|
_tag_name: &str,
|
|
203
203
|
node_handle: &tl::NodeHandle,
|
|
204
204
|
parser: &tl::Parser,
|
|
@@ -231,7 +231,7 @@ pub fn handle_label(
|
|
|
231
231
|
///
|
|
232
232
|
/// An input element represents a form control for user input. Since input
|
|
233
233
|
/// elements typically have no text content, this handler produces no output.
|
|
234
|
-
pub fn handle_input(
|
|
234
|
+
pub(crate) fn handle_input(
|
|
235
235
|
_tag_name: &str,
|
|
236
236
|
_node_handle: &tl::NodeHandle,
|
|
237
237
|
_parser: &tl::Parser,
|
|
@@ -253,7 +253,7 @@ pub fn handle_input(
|
|
|
253
253
|
///
|
|
254
254
|
/// - Content is collected from children
|
|
255
255
|
/// - Blank lines are added after content in block mode only
|
|
256
|
-
pub fn handle_textarea(
|
|
256
|
+
pub(crate) fn handle_textarea(
|
|
257
257
|
_tag_name: &str,
|
|
258
258
|
node_handle: &tl::NodeHandle,
|
|
259
259
|
parser: &tl::Parser,
|
|
@@ -287,7 +287,7 @@ pub fn handle_textarea(
|
|
|
287
287
|
///
|
|
288
288
|
/// - Content (options) is collected from children
|
|
289
289
|
/// - A single newline is added after the select in block mode
|
|
290
|
-
pub fn handle_select(
|
|
290
|
+
pub(crate) fn handle_select(
|
|
291
291
|
_tag_name: &str,
|
|
292
292
|
node_handle: &tl::NodeHandle,
|
|
293
293
|
parser: &tl::Parser,
|
|
@@ -322,7 +322,7 @@ pub fn handle_select(
|
|
|
322
322
|
/// - Content is collected from children
|
|
323
323
|
/// - If the option has the `selected` attribute, it's prefixed with `* ` in block mode
|
|
324
324
|
/// - A newline is added after each option in block mode
|
|
325
|
-
pub fn handle_option(
|
|
325
|
+
pub(crate) fn handle_option(
|
|
326
326
|
_tag_name: &str,
|
|
327
327
|
node_handle: &tl::NodeHandle,
|
|
328
328
|
parser: &tl::Parser,
|
|
@@ -365,7 +365,7 @@ pub fn handle_option(
|
|
|
365
365
|
///
|
|
366
366
|
/// - The `label` attribute is output as strong text (if present)
|
|
367
367
|
/// - Options within the group are rendered normally
|
|
368
|
-
pub fn handle_optgroup(
|
|
368
|
+
pub(crate) fn handle_optgroup(
|
|
369
369
|
_tag_name: &str,
|
|
370
370
|
node_handle: &tl::NodeHandle,
|
|
371
371
|
parser: &tl::Parser,
|
|
@@ -410,7 +410,7 @@ pub fn handle_optgroup(
|
|
|
410
410
|
///
|
|
411
411
|
/// - Content is collected from children
|
|
412
412
|
/// - Blank lines are added after content in block mode only
|
|
413
|
-
pub fn handle_button(
|
|
413
|
+
pub(crate) fn handle_button(
|
|
414
414
|
_tag_name: &str,
|
|
415
415
|
node_handle: &tl::NodeHandle,
|
|
416
416
|
parser: &tl::Parser,
|
|
@@ -444,7 +444,7 @@ pub fn handle_button(
|
|
|
444
444
|
///
|
|
445
445
|
/// - Content is collected from children (usually empty)
|
|
446
446
|
/// - Blank lines are added after content in block mode only
|
|
447
|
-
pub fn handle_progress(
|
|
447
|
+
pub(crate) fn handle_progress(
|
|
448
448
|
_tag_name: &str,
|
|
449
449
|
node_handle: &tl::NodeHandle,
|
|
450
450
|
parser: &tl::Parser,
|
|
@@ -478,7 +478,7 @@ pub fn handle_progress(
|
|
|
478
478
|
///
|
|
479
479
|
/// - Content is collected from children (usually empty)
|
|
480
480
|
/// - Blank lines are added after content in block mode only
|
|
481
|
-
pub fn handle_meter(
|
|
481
|
+
pub(crate) fn handle_meter(
|
|
482
482
|
_tag_name: &str,
|
|
483
483
|
node_handle: &tl::NodeHandle,
|
|
484
484
|
parser: &tl::Parser,
|
|
@@ -512,7 +512,7 @@ pub fn handle_meter(
|
|
|
512
512
|
///
|
|
513
513
|
/// - Content is collected from children
|
|
514
514
|
/// - Blank lines are added after content in block mode only
|
|
515
|
-
pub fn handle_output(
|
|
515
|
+
pub(crate) fn handle_output(
|
|
516
516
|
_tag_name: &str,
|
|
517
517
|
node_handle: &tl::NodeHandle,
|
|
518
518
|
parser: &tl::Parser,
|
|
@@ -546,7 +546,7 @@ pub fn handle_output(
|
|
|
546
546
|
///
|
|
547
547
|
/// - Content (options) is collected from children
|
|
548
548
|
/// - A single newline is added after the datalist in block mode
|
|
549
|
-
pub fn handle_datalist(
|
|
549
|
+
pub(crate) fn handle_datalist(
|
|
550
550
|
_tag_name: &str,
|
|
551
551
|
node_handle: &tl::NodeHandle,
|
|
552
552
|
parser: &tl::Parser,
|