html-to-markdown 2.29.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +18 -41
- data/README.md +37 -50
- data/ext/html-to-markdown-rb/native/Cargo.lock +17 -705
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -4
- data/ext/html-to-markdown-rb/native/README.md +4 -13
- data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +2 -73
- data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +5 -49
- data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -6
- data/ext/html-to-markdown-rb/native/src/lib.rs +76 -213
- data/ext/html-to-markdown-rb/native/src/options.rs +0 -3
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown.rb +13 -194
- data/sig/html_to_markdown.rbs +12 -373
- data/vendor/Cargo.toml +7 -4
- data/vendor/html-to-markdown-rs/Cargo.toml +4 -10
- data/vendor/html-to-markdown-rs/README.md +127 -51
- data/vendor/html-to-markdown-rs/examples/basic.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/table.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_escape.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +8 -2
- data/vendor/html-to-markdown-rs/examples/test_lists.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_tables.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +6 -1
- data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +6 -1
- data/vendor/html-to-markdown-rs/src/convert_api.rs +151 -745
- data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -7
- data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +18 -5
- data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +10 -0
- data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +16 -11
- data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +20 -0
- data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +4 -17
- data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +140 -0
- data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +4 -18
- data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +2 -18
- data/vendor/html-to-markdown-rs/src/converter/context.rs +8 -0
- data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -6
- data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
- data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +4 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +5 -10
- data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +4 -10
- data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +4 -170
- data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +7 -19
- data/vendor/html-to-markdown-rs/src/converter/list/item.rs +3 -5
- data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +4 -10
- data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +6 -12
- data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +1 -12
- data/vendor/html-to-markdown-rs/src/converter/main.rs +85 -56
- data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +4 -67
- data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +1 -5
- data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +3 -40
- data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -8
- data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +3 -13
- data/vendor/html-to-markdown-rs/src/converter/metadata.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -8
- data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +37 -12
- data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +5 -30
- data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +29 -0
- data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +1 -36
- data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +1 -3
- data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -53
- data/vendor/html-to-markdown-rs/src/converter/text_node.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -41
- data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +2 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +15 -98
- data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +113 -4
- data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +3 -0
- data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +4 -10
- data/vendor/html-to-markdown-rs/src/exports.rs +1 -4
- data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
- data/vendor/html-to-markdown-rs/src/lib.rs +13 -133
- data/vendor/html-to-markdown-rs/src/metadata/collector.rs +4 -4
- data/vendor/html-to-markdown-rs/src/metadata/mod.rs +22 -22
- data/vendor/html-to-markdown-rs/src/metadata/types.rs +3 -3
- data/vendor/html-to-markdown-rs/src/options/conversion.rs +351 -319
- data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +8 -2
- data/vendor/html-to-markdown-rs/src/prelude.rs +1 -15
- data/vendor/html-to-markdown-rs/src/rcdom.rs +7 -1
- data/vendor/html-to-markdown-rs/src/text.rs +25 -14
- data/vendor/html-to-markdown-rs/src/types/document.rs +175 -0
- data/vendor/html-to-markdown-rs/src/types/mod.rs +17 -0
- data/vendor/html-to-markdown-rs/src/types/result.rs +49 -0
- data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +790 -0
- data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +442 -0
- data/vendor/html-to-markdown-rs/src/types/tables.rs +47 -0
- data/vendor/html-to-markdown-rs/src/types/warnings.rs +28 -0
- data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -6
- data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +1 -21
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -5
- data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +1 -845
- data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +8 -8
- data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/integration_test.rs +23 -6
- data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +8 -7
- data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +8 -7
- data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +12 -2
- data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +6 -1
- data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +4 -6
- data/vendor/html-to-markdown-rs/tests/lists_test.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +8 -11
- data/vendor/html-to-markdown-rs/tests/tables_test.rs +12 -2
- data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +17 -28
- data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +8 -1
- data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +29 -33
- data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +8 -1
- metadata +9 -37
- data/bin/benchmark.rb +0 -232
- data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +0 -71
- data/ext/html-to-markdown-rb/native/src/profiling.rs +0 -215
- data/ext/html-to-markdown-rb/native/src/visitor/bridge.rs +0 -252
- data/ext/html-to-markdown-rb/native/src/visitor/callbacks.rs +0 -640
- data/ext/html-to-markdown-rb/native/src/visitor/mod.rs +0 -12
- data/spec/convert_spec.rb +0 -77
- data/spec/convert_with_tables_spec.rb +0 -194
- data/spec/metadata_extraction_spec.rb +0 -437
- data/spec/visitor_issue_187_spec.rb +0 -605
- data/spec/visitor_spec.rb +0 -1149
- data/vendor/html-to-markdown-rs/src/hocr/converter/code_analysis.rs +0 -254
- data/vendor/html-to-markdown-rs/src/hocr/converter/core.rs +0 -249
- data/vendor/html-to-markdown-rs/src/hocr/converter/elements.rs +0 -382
- data/vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +0 -379
- data/vendor/html-to-markdown-rs/src/hocr/converter/keywords.rs +0 -55
- data/vendor/html-to-markdown-rs/src/hocr/converter/layout.rs +0 -313
- data/vendor/html-to-markdown-rs/src/hocr/converter/mod.rs +0 -26
- data/vendor/html-to-markdown-rs/src/hocr/converter/output.rs +0 -78
- data/vendor/html-to-markdown-rs/src/hocr/extractor.rs +0 -232
- data/vendor/html-to-markdown-rs/src/hocr/mod.rs +0 -31
- data/vendor/html-to-markdown-rs/src/hocr/parser.rs +0 -333
- data/vendor/html-to-markdown-rs/src/hocr/spatial/coords.rs +0 -129
- data/vendor/html-to-markdown-rs/src/hocr/spatial/grouping.rs +0 -165
- data/vendor/html-to-markdown-rs/src/hocr/spatial/layout.rs +0 -335
- data/vendor/html-to-markdown-rs/src/hocr/spatial/mod.rs +0 -15
- data/vendor/html-to-markdown-rs/src/hocr/spatial/output.rs +0 -63
- data/vendor/html-to-markdown-rs/src/hocr/types.rs +0 -269
- data/vendor/html-to-markdown-rs/src/visitor/async_traits.rs +0 -249
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge.rs +0 -189
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/bridge_visitor.rs +0 -343
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/macros.rs +0 -217
- data/vendor/html-to-markdown-rs/tests/async_visitor_test.rs +0 -57
- data/vendor/html-to-markdown-rs/tests/convert_with_metadata_no_frontmatter.rs +0 -100
- data/vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +0 -509
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
4
11
|
|
|
5
12
|
#[test]
|
|
6
13
|
fn test_br_inside_bold_tags() {
|
|
@@ -1,13 +1,6 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
//!
|
|
5
|
-
//! This test suite verifies that our HTML-to-Markdown converter produces
|
|
6
|
-
//! CommonMark-compliant output by testing against the official `CommonMark` spec.
|
|
7
|
-
//!
|
|
8
|
-
//! The test cases are derived from <https://spec.commonmark.org>/
|
|
9
|
-
|
|
10
|
-
use html_to_markdown_rs::{ConversionOptions, convert};
|
|
3
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
11
4
|
use serde::Deserialize;
|
|
12
5
|
|
|
13
6
|
#[derive(Debug, Deserialize)]
|
|
@@ -289,3 +282,10 @@ fn test_commonmark_compliance() {
|
|
|
289
282
|
fn normalize_markdown(md: &str) -> String {
|
|
290
283
|
md.trim_end().to_string()
|
|
291
284
|
}
|
|
285
|
+
|
|
286
|
+
fn convert(
|
|
287
|
+
html: &str,
|
|
288
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
289
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
290
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
291
|
+
}
|
|
@@ -1,6 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
#![allow(missing_docs)]
|
|
2
|
+
fn convert(
|
|
3
|
+
html: &str,
|
|
4
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
5
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
6
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
7
|
+
}
|
|
2
8
|
|
|
3
|
-
use html_to_markdown_rs::{ConversionOptions, OutputFormat
|
|
9
|
+
use html_to_markdown_rs::{ConversionOptions, OutputFormat};
|
|
4
10
|
|
|
5
11
|
fn djot_options() -> ConversionOptions {
|
|
6
12
|
ConversionOptions {
|
|
@@ -1,11 +1,6 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
//!
|
|
5
|
-
//! These tests verify end-to-end conversion of various HTML elements
|
|
6
|
-
//! to ensure correct Markdown output.
|
|
7
|
-
|
|
8
|
-
use html_to_markdown_rs::{ConversionOptions, convert};
|
|
3
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
9
4
|
|
|
10
5
|
#[test]
|
|
11
6
|
fn test_basic_paragraph() {
|
|
@@ -580,3 +575,25 @@ fn test_nested_bold_issue_111() {
|
|
|
580
575
|
let result = convert(html, None).unwrap();
|
|
581
576
|
assert_eq!(result, "**bolder**\n");
|
|
582
577
|
}
|
|
578
|
+
|
|
579
|
+
#[test]
|
|
580
|
+
fn hidden_elements_stripped() {
|
|
581
|
+
let html = "<p>visible</p><div hidden>secret</div><p>also visible</p>";
|
|
582
|
+
let result = convert(html, None).unwrap();
|
|
583
|
+
assert!(!result.contains("secret"));
|
|
584
|
+
assert!(result.contains("visible"));
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
#[test]
|
|
588
|
+
fn q_element_produces_quotes() {
|
|
589
|
+
let html = "<p>He said <q>hello</q> to me</p>";
|
|
590
|
+
let result = convert(html, None).unwrap();
|
|
591
|
+
assert!(result.contains(r#""hello""#), "q element should add quotes: {result}");
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
fn convert(
|
|
595
|
+
html: &str,
|
|
596
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
597
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
598
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
599
|
+
}
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
3
10
|
use std::fs;
|
|
4
11
|
use std::path::PathBuf;
|
|
5
12
|
|
|
6
|
-
use html_to_markdown_rs::
|
|
13
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
7
14
|
|
|
8
15
|
fn fixture_path(name: &str) -> PathBuf {
|
|
9
16
|
[env!("CARGO_MANIFEST_DIR"), "../../test_documents/html/issues", name]
|
|
@@ -1,11 +1,18 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
3
10
|
use std::fs;
|
|
4
11
|
use std::path::PathBuf;
|
|
5
12
|
|
|
6
13
|
use html_to_markdown_rs::{
|
|
7
14
|
CodeBlockStyle, ConversionOptions, HeadingStyle, HighlightStyle, ListIndentType, PreprocessingOptions,
|
|
8
|
-
PreprocessingPreset, WhitespaceMode,
|
|
15
|
+
PreprocessingPreset, WhitespaceMode,
|
|
9
16
|
};
|
|
10
17
|
|
|
11
18
|
fn fixture_path(name: &str) -> PathBuf {
|
|
@@ -27,7 +34,6 @@ fn issue_127_options() -> ConversionOptions {
|
|
|
27
34
|
code_block_style: CodeBlockStyle::Backticks,
|
|
28
35
|
strip_newlines: true,
|
|
29
36
|
extract_metadata: false,
|
|
30
|
-
hocr_spatial_tables: true,
|
|
31
37
|
preprocessing: PreprocessingOptions {
|
|
32
38
|
enabled: true,
|
|
33
39
|
preset: PreprocessingPreset::Minimal,
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
4
9
|
|
|
5
10
|
#[test]
|
|
6
11
|
fn images_with_dimensions_render_as_markdown_links() {
|
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
3
11
|
use html_to_markdown_rs::options::WhitespaceMode;
|
|
4
|
-
use html_to_markdown_rs::{ConversionOptions, convert};
|
|
5
12
|
|
|
6
13
|
#[test]
|
|
7
14
|
fn link_flattens_block_children_issue_131() {
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
3
10
|
use std::fs;
|
|
4
11
|
use std::path::PathBuf;
|
|
5
12
|
|
|
6
|
-
use html_to_markdown_rs::
|
|
13
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
7
14
|
|
|
8
15
|
fn fixture_path(name: &str) -> PathBuf {
|
|
9
16
|
[env!("CARGO_MANIFEST_DIR"), "../../test_documents/html/issues", name]
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
4
11
|
|
|
5
12
|
#[test]
|
|
6
13
|
fn long_multibyte_link_label_does_not_panic() {
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
3
10
|
use std::fs;
|
|
4
11
|
use std::path::PathBuf;
|
|
5
12
|
|
|
6
|
-
use html_to_markdown_rs::
|
|
13
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
7
14
|
|
|
8
15
|
fn fixture_path(name: &str) -> PathBuf {
|
|
9
16
|
[env!("CARGO_MANIFEST_DIR"), "../../test_documents/html/issues", name]
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
3
10
|
use std::fs;
|
|
4
11
|
use std::path::PathBuf;
|
|
5
12
|
|
|
6
|
-
use html_to_markdown_rs::
|
|
13
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
7
14
|
|
|
8
15
|
fn fixture_path(name: &str) -> PathBuf {
|
|
9
16
|
[env!("CARGO_MANIFEST_DIR"), "../../test_documents/html/issues", name]
|
|
@@ -1,12 +1,6 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
//!
|
|
5
|
-
//! Tests for ensuring that `strip_newlines=True` doesn't cause excessive whitespace
|
|
6
|
-
//! around block elements. The root cause was that newlines were converted to spaces
|
|
7
|
-
//! BEFORE whitespace-only node detection, causing the detection to fail.
|
|
8
|
-
|
|
9
|
-
use html_to_markdown_rs::{ConversionOptions, convert};
|
|
3
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
10
4
|
|
|
11
5
|
#[test]
|
|
12
6
|
fn test_strip_newlines_preserves_block_spacing() {
|
|
@@ -130,3 +124,10 @@ fn test_strip_newlines_handles_nested_blocks() {
|
|
|
130
124
|
"excessive blank lines in nested blocks: {max_consecutive_blank} consecutive blanks in:\n{result}"
|
|
131
125
|
);
|
|
132
126
|
}
|
|
127
|
+
|
|
128
|
+
fn convert(
|
|
129
|
+
html: &str,
|
|
130
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
131
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
132
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
133
|
+
}
|
|
@@ -1,12 +1,6 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
//!
|
|
5
|
-
//! Tests for ensuring that `strip_tags` and `preserve_tags` properly prevent
|
|
6
|
-
//! `<meta>` and `<title>` tags from appearing in YAML frontmatter when metadata
|
|
7
|
-
//! extraction is enabled.
|
|
8
|
-
|
|
9
|
-
use html_to_markdown_rs::{ConversionOptions, convert};
|
|
3
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
10
4
|
|
|
11
5
|
#[test]
|
|
12
6
|
fn test_strip_tags_prevents_metadata_extraction() {
|
|
@@ -141,3 +135,10 @@ fn test_preserve_tags_prevents_metadata_extraction() {
|
|
|
141
135
|
"meta-author should NOT be in YAML frontmatter when preserve_tags=['meta']: {result}"
|
|
142
136
|
);
|
|
143
137
|
}
|
|
138
|
+
|
|
139
|
+
fn convert(
|
|
140
|
+
html: &str,
|
|
141
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
142
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
143
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
144
|
+
}
|
|
@@ -4,7 +4,12 @@
|
|
|
4
4
|
|
|
5
5
|
#[test]
|
|
6
6
|
fn test_strong_blockquote_strong_newlines() {
|
|
7
|
-
|
|
7
|
+
fn convert(
|
|
8
|
+
html: &str,
|
|
9
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
10
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
11
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
12
|
+
}
|
|
8
13
|
|
|
9
14
|
// Test case from issue #176: strong + blockquote + strong
|
|
10
15
|
let html = r"<strong>2. Point two</strong><blockquote>Option Explicit
|
|
@@ -30,7 +35,12 @@ End Function</blockquote><strong>3. Point three</strong>";
|
|
|
30
35
|
|
|
31
36
|
#[test]
|
|
32
37
|
fn test_paragraph_blockquote_paragraph_newlines() {
|
|
33
|
-
|
|
38
|
+
fn convert(
|
|
39
|
+
html: &str,
|
|
40
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
41
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
42
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
43
|
+
}
|
|
34
44
|
|
|
35
45
|
// Control test: p + blockquote + p should work correctly
|
|
36
46
|
let html = r"<p>First paragraph</p><blockquote>A quote</blockquote><p>Second paragraph</p>";
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
//! Regression coverage for issue #190.
|
|
2
2
|
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
3
10
|
use std::fs;
|
|
4
11
|
use std::path::PathBuf;
|
|
5
12
|
|
|
6
|
-
use html_to_markdown_rs::{CodeBlockStyle, ConversionOptions
|
|
13
|
+
use html_to_markdown_rs::{CodeBlockStyle, ConversionOptions};
|
|
7
14
|
|
|
8
15
|
fn fixture_path(name: &str) -> PathBuf {
|
|
9
16
|
[
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
//! Regression coverage for issue #199.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
4
9
|
|
|
5
10
|
#[test]
|
|
6
11
|
fn test_link_label_is_not_truncated() {
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
//! Regression coverage for issues #200 and #214.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
4
9
|
|
|
5
10
|
#[test]
|
|
6
11
|
fn test_definition_list_spacing_consistency() {
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
4
9
|
|
|
5
10
|
/// Regression test for <https://github.com/kreuzberg-dev/html-to-markdown/issues/212>
|
|
6
11
|
///
|
|
@@ -7,7 +7,12 @@
|
|
|
7
7
|
//! fresh String buffer while inheriting a parent context with `block_content_start`
|
|
8
8
|
//! set by a paragraph handler, the index points into the wrong buffer.
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
fn convert(
|
|
11
|
+
html: &str,
|
|
12
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
13
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
14
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
15
|
+
}
|
|
11
16
|
|
|
12
17
|
/// Minimal reproducer: a <details> containing a <p> with <strong> inside.
|
|
13
18
|
/// The <details> handler collects into a fresh buffer, the <p> sets
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
use html_to_markdown_rs::metadata::MetadataConfig;
|
|
4
|
-
|
|
5
3
|
#[test]
|
|
6
4
|
fn extracts_json_ld_from_head_script() {
|
|
7
5
|
let html = r#"
|
|
@@ -16,8 +14,8 @@ fn extracts_json_ld_from_head_script() {
|
|
|
16
14
|
</html>
|
|
17
15
|
"#;
|
|
18
16
|
|
|
19
|
-
let
|
|
20
|
-
|
|
17
|
+
let result = html_to_markdown_rs::convert(html, None).expect("convert failed");
|
|
18
|
+
let metadata = result.metadata;
|
|
21
19
|
|
|
22
20
|
assert_eq!(metadata.structured_data.len(), 1);
|
|
23
21
|
assert!(metadata.structured_data[0].raw_json.contains(r#""@type": "Article""#));
|
|
@@ -37,8 +35,8 @@ fn extracts_json_ld_from_body_script_and_keeps_content() {
|
|
|
37
35
|
</html>
|
|
38
36
|
"#;
|
|
39
37
|
|
|
40
|
-
let
|
|
41
|
-
|
|
38
|
+
let result = html_to_markdown_rs::convert(html, None).expect("convert failed");
|
|
39
|
+
let metadata = result.metadata;
|
|
42
40
|
|
|
43
41
|
assert_eq!(metadata.structured_data.len(), 1);
|
|
44
42
|
assert!(!metadata.structured_data[0].raw_json.trim().is_empty());
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
4
11
|
|
|
5
12
|
#[test]
|
|
6
13
|
fn test_basic_unordered_list() {
|
|
@@ -1,6 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
#![allow(missing_docs)]
|
|
2
|
+
fn convert(
|
|
3
|
+
html: &str,
|
|
4
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
5
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
6
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
7
|
+
}
|
|
2
8
|
|
|
3
|
-
use html_to_markdown_rs::{ConversionOptions, OutputFormat
|
|
9
|
+
use html_to_markdown_rs::{ConversionOptions, OutputFormat};
|
|
4
10
|
|
|
5
11
|
fn plain_options() -> ConversionOptions {
|
|
6
12
|
ConversionOptions {
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
4
11
|
|
|
5
12
|
#[test]
|
|
6
13
|
fn footer_without_navigation_hint_is_preserved() {
|
|
@@ -1,16 +1,6 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
//!
|
|
5
|
-
//! This test suite verifies that the `skip_images` option correctly omits all `<img>` tags
|
|
6
|
-
//! from the markdown output when enabled, while preserving all other content.
|
|
7
|
-
//!
|
|
8
|
-
//! The `skip_images` option is useful for:
|
|
9
|
-
//! - Text-only extraction from HTML documents
|
|
10
|
-
//! - Filtering out visual content for accessibility or reduced bandwidth
|
|
11
|
-
//! - Converting image-heavy documents to plain text markdown
|
|
12
|
-
|
|
13
|
-
use html_to_markdown_rs::{ConversionOptions, convert};
|
|
3
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
14
4
|
|
|
15
5
|
#[test]
|
|
16
6
|
fn test_skip_images_enabled() {
|
|
@@ -523,3 +513,10 @@ fn test_skip_images_preserves_links_and_formatting() {
|
|
|
523
513
|
// Should not contain image
|
|
524
514
|
assert!(!result.contains("![Ignored]"), "Should not contain image");
|
|
525
515
|
}
|
|
516
|
+
|
|
517
|
+
fn convert(
|
|
518
|
+
html: &str,
|
|
519
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
520
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
521
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
522
|
+
}
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
4
11
|
|
|
5
12
|
#[test]
|
|
6
13
|
fn test_basic_table() {
|
|
@@ -711,7 +718,10 @@ fn test_table_colspan_no_header_issue_233() {
|
|
|
711
718
|
<td>Cell 2</td>
|
|
712
719
|
</tr>
|
|
713
720
|
</table>"#;
|
|
714
|
-
let result = html_to_markdown_rs::convert(html, None)
|
|
721
|
+
let result = html_to_markdown_rs::convert(html, None)
|
|
722
|
+
.unwrap()
|
|
723
|
+
.content
|
|
724
|
+
.unwrap_or_default();
|
|
715
725
|
assert!(result.contains("| Cell spanning 2 columns | |"));
|
|
716
726
|
assert!(result.contains("| Cell 1 | Cell 2 |"));
|
|
717
727
|
}
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
3
10
|
use std::fs;
|
|
4
11
|
use std::path::PathBuf;
|
|
5
12
|
|
|
6
|
-
use html_to_markdown_rs::
|
|
13
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
7
14
|
|
|
8
15
|
fn fixture_path(name: &str) -> PathBuf {
|
|
9
16
|
[env!("CARGO_MANIFEST_DIR"), "../../test_documents/html/issues", name]
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
+
fn convert(
|
|
4
|
+
html: &str,
|
|
5
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
6
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
7
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
8
|
+
}
|
|
9
|
+
|
|
3
10
|
use std::fs;
|
|
4
11
|
use std::path::PathBuf;
|
|
5
12
|
|
|
6
|
-
use html_to_markdown_rs::
|
|
13
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
7
14
|
|
|
8
15
|
fn fixture_path(name: &str) -> PathBuf {
|
|
9
16
|
[env!("CARGO_MANIFEST_DIR"), "../../test_documents/html/issues", name]
|
|
@@ -1,12 +1,6 @@
|
|
|
1
1
|
#![allow(missing_docs)]
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
//!
|
|
5
|
-
//! This test suite verifies that script and style tags are completely removed
|
|
6
|
-
//! from HTML before parsing, preventing the tl parser from misinterpreting
|
|
7
|
-
//! HTML-like content inside scripts as actual tags.
|
|
8
|
-
|
|
9
|
-
use html_to_markdown_rs::{ConversionOptions, MetadataConfig, convert, convert_with_metadata};
|
|
3
|
+
use html_to_markdown_rs::ConversionOptions;
|
|
10
4
|
|
|
11
5
|
#[test]
|
|
12
6
|
fn test_strip_simple_script_tag() {
|
|
@@ -117,13 +111,9 @@ fn test_preserve_json_ld_script() {
|
|
|
117
111
|
</body>
|
|
118
112
|
</html>"#;
|
|
119
113
|
|
|
120
|
-
let
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
};
|
|
124
|
-
|
|
125
|
-
let (markdown, metadata) =
|
|
126
|
-
convert_with_metadata(html, Some(options), MetadataConfig::default(), None).expect("Failed to convert");
|
|
114
|
+
let result = html_to_markdown_rs::convert(html, None).expect("Failed to convert");
|
|
115
|
+
let metadata = result.metadata;
|
|
116
|
+
let markdown = result.content.unwrap_or_default();
|
|
127
117
|
|
|
128
118
|
println!("Markdown:\n{markdown}");
|
|
129
119
|
println!("Metadata: {:?}", metadata.document.title);
|
|
@@ -174,13 +164,9 @@ fn test_multiple_script_tags() {
|
|
|
174
164
|
</body>
|
|
175
165
|
</html>"#;
|
|
176
166
|
|
|
177
|
-
let
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
};
|
|
181
|
-
|
|
182
|
-
let (markdown, metadata) =
|
|
183
|
-
convert_with_metadata(html, Some(options), MetadataConfig::default(), None).expect("Failed to convert");
|
|
167
|
+
let result = html_to_markdown_rs::convert(html, None).expect("Failed to convert");
|
|
168
|
+
let metadata = result.metadata;
|
|
169
|
+
let markdown = result.content.unwrap_or_default();
|
|
184
170
|
|
|
185
171
|
println!("Markdown:\n{markdown}");
|
|
186
172
|
|
|
@@ -235,13 +221,9 @@ fn test_reuters_like_structure() {
|
|
|
235
221
|
</body>
|
|
236
222
|
</html>"#;
|
|
237
223
|
|
|
238
|
-
let
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
};
|
|
242
|
-
|
|
243
|
-
let (markdown, metadata) =
|
|
244
|
-
convert_with_metadata(html, Some(options), MetadataConfig::default(), None).expect("Failed to convert");
|
|
224
|
+
let result = html_to_markdown_rs::convert(html, None).expect("Failed to convert");
|
|
225
|
+
let metadata = result.metadata;
|
|
226
|
+
let markdown = result.content.unwrap_or_default();
|
|
245
227
|
|
|
246
228
|
println!("Markdown output:\n{markdown}");
|
|
247
229
|
println!("Metadata title: {:?}", metadata.document.title);
|
|
@@ -405,3 +387,10 @@ fn test_inline_script_attributes_not_affected() {
|
|
|
405
387
|
"Should remove script tag content"
|
|
406
388
|
);
|
|
407
389
|
}
|
|
390
|
+
|
|
391
|
+
fn convert(
|
|
392
|
+
html: &str,
|
|
393
|
+
opts: Option<html_to_markdown_rs::ConversionOptions>,
|
|
394
|
+
) -> html_to_markdown_rs::error::Result<String> {
|
|
395
|
+
html_to_markdown_rs::convert(html, opts).map(|r| r.content.unwrap_or_default())
|
|
396
|
+
}
|