html-to-markdown 3.1.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +42 -12
- data/Gemfile +1 -0
- data/Gemfile.lock +27 -55
- data/README.md +9 -10
- data/Rakefile +4 -10
- data/ext/html-to-markdown_rb/Cargo.toml +14 -0
- data/ext/html_to_markdown_rb/Cargo.toml +16 -0
- data/ext/html_to_markdown_rb/extconf.rb +10 -0
- data/ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb +6 -0
- data/ext/html_to_markdown_rb/src/html_to_markdown_rs.rb +9 -0
- data/ext/html_to_markdown_rb/src/lib.rs +3941 -0
- data/html-to-markdown-rb.gemspec +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown.rb +31 -21
- data/{ext/html-to-markdown-rb/native/extconf.rb → lib/html_to_markdown_rs.rb} +1 -1
- data/sig/html_to_markdown.rbs +17 -5
- data/vendor/Cargo.toml +4 -4
- data/vendor/html-to-markdown-rs/Cargo.toml +2 -2
- data/vendor/html-to-markdown-rs/examples/test_deser.rs +12 -0
- data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/list/item.rs +10 -2
- data/vendor/html-to-markdown-rs/src/converter/mod.rs +2 -2
- data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +1 -1
- data/vendor/html-to-markdown-rs/src/exports.rs +3 -3
- data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
- data/vendor/html-to-markdown-rs/src/lib.rs +1 -2
- data/vendor/html-to-markdown-rs/src/metadata/config.rs +1 -1
- data/vendor/html-to-markdown-rs/src/metadata/mod.rs +5 -5
- data/vendor/html-to-markdown-rs/src/options/conversion.rs +6 -12
- data/vendor/html-to-markdown-rs/src/options/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +3 -9
- data/vendor/html-to-markdown-rs/src/options/validation.rs +3 -3
- data/vendor/html-to-markdown-rs/src/types/document.rs +11 -0
- data/vendor/html-to-markdown-rs/src/types/result.rs +5 -2
- data/vendor/html-to-markdown-rs/src/types/tables.rs +1 -1
- data/vendor/html-to-markdown-rs/src/visitor/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/state.rs +1 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/traversal.rs +1 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +8 -8
- data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +6 -0
- data/vendor/html-to-markdown-rs/tests/integration_test.rs +3 -3
- data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/lists_test.rs +4 -4
- metadata +11 -18
- data/ext/html-to-markdown-rb/extconf.rb +0 -41
- data/ext/html-to-markdown-rb/native/Cargo.lock +0 -934
- data/ext/html-to-markdown-rb/native/Cargo.toml +0 -48
- data/ext/html-to-markdown-rb/native/README.md +0 -215
- data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +0 -54
- data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +0 -158
- data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -11
- data/ext/html-to-markdown-rb/native/src/lib.rs +0 -128
- data/ext/html-to-markdown-rb/native/src/options.rs +0 -238
- data/ext/html-to-markdown-rb/native/src/types.rs +0 -24
- data/lib/html_to_markdown/cli.rb +0 -21
- data/lib/html_to_markdown/cli_proxy.rb +0 -74
- data/spec/cli_proxy_spec.rb +0 -42
- data/spec/spec_helper.rb +0 -10
|
@@ -58,7 +58,7 @@ use crate::visitor::{HtmlVisitor, NodeContext, NodeType, VisitResult};
|
|
|
58
58
|
///
|
|
59
59
|
/// # Examples
|
|
60
60
|
///
|
|
61
|
-
/// ```
|
|
61
|
+
/// ```text
|
|
62
62
|
/// let ctx = build_node_context(
|
|
63
63
|
/// NodeType::Heading,
|
|
64
64
|
/// "h1",
|
|
@@ -129,7 +129,7 @@ pub fn build_node_context(
|
|
|
129
129
|
///
|
|
130
130
|
/// # Examples
|
|
131
131
|
///
|
|
132
|
-
/// ```
|
|
132
|
+
/// ```text
|
|
133
133
|
/// let result = dispatch_visitor(
|
|
134
134
|
/// &visitor,
|
|
135
135
|
/// |v| v.visit_heading(&ctx, level, text, id),
|
|
@@ -252,7 +252,7 @@ impl VisitorDispatch {
|
|
|
252
252
|
///
|
|
253
253
|
/// # Syntax
|
|
254
254
|
///
|
|
255
|
-
/// ```
|
|
255
|
+
/// ```text
|
|
256
256
|
/// try_visitor!(visitor_option, method_name, ctx, arg1, arg2, ...);
|
|
257
257
|
/// ```
|
|
258
258
|
///
|
|
@@ -264,7 +264,7 @@ impl VisitorDispatch {
|
|
|
264
264
|
///
|
|
265
265
|
/// # Examples
|
|
266
266
|
///
|
|
267
|
-
/// ```
|
|
267
|
+
/// ```text
|
|
268
268
|
/// // Before (verbose):
|
|
269
269
|
/// let dispatch = dispatch_visitor(&visitor, |v| v.visit_heading(&ctx, level, text, id))?;
|
|
270
270
|
/// match dispatch {
|
|
@@ -311,13 +311,13 @@ macro_rules! try_visitor {
|
|
|
311
311
|
///
|
|
312
312
|
/// # Syntax
|
|
313
313
|
///
|
|
314
|
-
/// ```
|
|
314
|
+
/// ```text
|
|
315
315
|
/// try_visitor_element_start!(visitor_option, ctx);
|
|
316
316
|
/// ```
|
|
317
317
|
///
|
|
318
318
|
/// # Examples
|
|
319
319
|
///
|
|
320
|
-
/// ```
|
|
320
|
+
/// ```text
|
|
321
321
|
/// fn process_heading(...) -> Result<String> {
|
|
322
322
|
/// let ctx = build_node_context(...);
|
|
323
323
|
/// try_visitor_element_start!(visitor, &ctx)?;
|
|
@@ -340,13 +340,13 @@ macro_rules! try_visitor_element_start {
|
|
|
340
340
|
///
|
|
341
341
|
/// # Syntax
|
|
342
342
|
///
|
|
343
|
-
/// ```
|
|
343
|
+
/// ```text
|
|
344
344
|
/// try_visitor_element_end!(visitor_option, ctx, default_output_string);
|
|
345
345
|
/// ```
|
|
346
346
|
///
|
|
347
347
|
/// # Examples
|
|
348
348
|
///
|
|
349
|
-
/// ```
|
|
349
|
+
/// ```text
|
|
350
350
|
/// fn process_heading(...) -> Result<String> {
|
|
351
351
|
/// let ctx = build_node_context(...);
|
|
352
352
|
/// let mut output = String::from("# Heading");
|
|
@@ -54,6 +54,12 @@ fn test_commonmark_compliance() {
|
|
|
54
54
|
continue;
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
// Example 231: expects indented code blocks but we default to backtick-fenced
|
|
58
|
+
if test.example == 231 {
|
|
59
|
+
skipped += 1;
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
|
|
57
63
|
if test.section == "Tabs" {
|
|
58
64
|
skipped += 1;
|
|
59
65
|
continue;
|
|
@@ -55,7 +55,7 @@ fn test_inline_code() {
|
|
|
55
55
|
fn test_code_block() {
|
|
56
56
|
let html = "<pre><code>fn main() {\n println!(\"Hello\");\n}</code></pre>";
|
|
57
57
|
let result = convert(html, None).unwrap();
|
|
58
|
-
assert_eq!(result, "
|
|
58
|
+
assert_eq!(result, "```\nfn main() {\n println!(\"Hello\");\n}\n```\n");
|
|
59
59
|
}
|
|
60
60
|
|
|
61
61
|
#[test]
|
|
@@ -141,8 +141,8 @@ fn test_nested_lists() {
|
|
|
141
141
|
let html = "<ul><li>Item 1<ul><li>Nested 1</li><li>Nested 2</li></ul></li><li>Item 2</li></ul>";
|
|
142
142
|
let result = convert(html, None).unwrap();
|
|
143
143
|
assert!(result.contains("- Item 1"));
|
|
144
|
-
assert!(result.contains("
|
|
145
|
-
assert!(result.contains("
|
|
144
|
+
assert!(result.contains("Nested 1"), "Expected 'Nested 1' in result: {result:?}");
|
|
145
|
+
assert!(result.contains("Nested 2"), "Expected 'Nested 2' in result: {result:?}");
|
|
146
146
|
}
|
|
147
147
|
|
|
148
148
|
#[test]
|
|
@@ -44,10 +44,16 @@ fn converts_should_not_escape_in_pre_or_code_fixture() {
|
|
|
44
44
|
let pre_html = r"<pre>This pipe | should not be escaped.<pre/>";
|
|
45
45
|
|
|
46
46
|
let pre_markdown_without_misc = convert(pre_html, Some(default_options())).expect("conversion should succeed");
|
|
47
|
-
assert_eq!(
|
|
47
|
+
assert_eq!(
|
|
48
|
+
pre_markdown_without_misc.trim(),
|
|
49
|
+
"```\nThis pipe | should not be escaped.\n```"
|
|
50
|
+
);
|
|
48
51
|
|
|
49
52
|
let pre_markdown_with_misc = convert(pre_html, Some(escape_misc_options())).expect("conversion should succeed");
|
|
50
|
-
assert_eq!(
|
|
53
|
+
assert_eq!(
|
|
54
|
+
pre_markdown_with_misc.trim(),
|
|
55
|
+
"```\nThis pipe | should not be escaped.\n```"
|
|
56
|
+
);
|
|
51
57
|
|
|
52
58
|
let code_html = r"<code>This pipe | should not be escaped.<code/>";
|
|
53
59
|
|
|
@@ -51,8 +51,8 @@ fn test_nested_lists() {
|
|
|
51
51
|
|
|
52
52
|
let result = convert(html, None).unwrap();
|
|
53
53
|
assert!(result.contains("- Item 1"));
|
|
54
|
-
assert!(result.contains("
|
|
55
|
-
assert!(result.contains("
|
|
54
|
+
assert!(result.contains("* Nested 1"));
|
|
55
|
+
assert!(result.contains("* Nested 2"));
|
|
56
56
|
assert!(result.contains("- Item 2"));
|
|
57
57
|
}
|
|
58
58
|
|
|
@@ -129,7 +129,7 @@ fn test_list_indent_spaces() {
|
|
|
129
129
|
|
|
130
130
|
let result = convert(html, Some(options)).unwrap();
|
|
131
131
|
assert!(result.contains("- Parent"));
|
|
132
|
-
assert!(result.contains("
|
|
132
|
+
assert!(result.contains(" * Child"));
|
|
133
133
|
}
|
|
134
134
|
|
|
135
135
|
#[test]
|
|
@@ -149,7 +149,7 @@ fn test_list_indent_tabs() {
|
|
|
149
149
|
|
|
150
150
|
let result = convert(html, Some(options)).unwrap();
|
|
151
151
|
assert!(result.contains("- Parent"));
|
|
152
|
-
assert!(result.contains("\t
|
|
152
|
+
assert!(result.contains("\t* Child"));
|
|
153
153
|
}
|
|
154
154
|
|
|
155
155
|
#[test]
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html-to-markdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.1
|
|
4
|
+
version: 3.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|
|
@@ -38,7 +38,7 @@ email:
|
|
|
38
38
|
executables:
|
|
39
39
|
- html-to-markdown
|
|
40
40
|
extensions:
|
|
41
|
-
- ext/
|
|
41
|
+
- ext/html_to_markdown_rb/extconf.rb
|
|
42
42
|
extra_rdoc_files:
|
|
43
43
|
- README.md
|
|
44
44
|
files:
|
|
@@ -51,33 +51,26 @@ files:
|
|
|
51
51
|
- Rakefile
|
|
52
52
|
- Steepfile
|
|
53
53
|
- exe/html-to-markdown
|
|
54
|
-
- ext/html-to-
|
|
55
|
-
- ext/
|
|
56
|
-
- ext/
|
|
57
|
-
- ext/
|
|
58
|
-
- ext/
|
|
59
|
-
- ext/
|
|
60
|
-
- ext/html-to-markdown-rb/native/src/conversion/metadata.rs
|
|
61
|
-
- ext/html-to-markdown-rb/native/src/conversion/mod.rs
|
|
62
|
-
- ext/html-to-markdown-rb/native/src/lib.rs
|
|
63
|
-
- ext/html-to-markdown-rb/native/src/options.rs
|
|
64
|
-
- ext/html-to-markdown-rb/native/src/types.rs
|
|
54
|
+
- ext/html-to-markdown_rb/Cargo.toml
|
|
55
|
+
- ext/html_to_markdown_rb/Cargo.toml
|
|
56
|
+
- ext/html_to_markdown_rb/extconf.rb
|
|
57
|
+
- ext/html_to_markdown_rb/src/html_to_markdown_rs.rb
|
|
58
|
+
- ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb
|
|
59
|
+
- ext/html_to_markdown_rb/src/lib.rs
|
|
65
60
|
- html-to-markdown-rb.gemspec
|
|
66
61
|
- lib/html_to_markdown.rb
|
|
67
|
-
- lib/html_to_markdown/cli.rb
|
|
68
|
-
- lib/html_to_markdown/cli_proxy.rb
|
|
69
62
|
- lib/html_to_markdown/version.rb
|
|
63
|
+
- lib/html_to_markdown_rs.rb
|
|
70
64
|
- sig/html_to_markdown.rbs
|
|
71
65
|
- sig/html_to_markdown/cli.rbs
|
|
72
66
|
- sig/html_to_markdown/cli_proxy.rbs
|
|
73
67
|
- sig/open3.rbs
|
|
74
|
-
- spec/cli_proxy_spec.rb
|
|
75
|
-
- spec/spec_helper.rb
|
|
76
68
|
- vendor/Cargo.toml
|
|
77
69
|
- vendor/html-to-markdown-rs/Cargo.toml
|
|
78
70
|
- vendor/html-to-markdown-rs/README.md
|
|
79
71
|
- vendor/html-to-markdown-rs/examples/basic.rs
|
|
80
72
|
- vendor/html-to-markdown-rs/examples/table.rs
|
|
73
|
+
- vendor/html-to-markdown-rs/examples/test_deser.rs
|
|
81
74
|
- vendor/html-to-markdown-rs/examples/test_escape.rs
|
|
82
75
|
- vendor/html-to-markdown-rs/examples/test_inline_formatting.rs
|
|
83
76
|
- vendor/html-to-markdown-rs/examples/test_lists.rs
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'mkmf'
|
|
4
|
-
require 'rb_sys/mkmf'
|
|
5
|
-
require 'rbconfig'
|
|
6
|
-
require 'pathname'
|
|
7
|
-
|
|
8
|
-
if RbConfig::CONFIG['host_os'] =~ /mswin|mingw/
|
|
9
|
-
devkit = ENV.fetch('RI_DEVKIT', nil)
|
|
10
|
-
prefix = ENV['MSYSTEM_PREFIX'] || '/ucrt64'
|
|
11
|
-
|
|
12
|
-
if devkit
|
|
13
|
-
sysroot = "#{devkit}#{prefix}".tr('\\\\', '/')
|
|
14
|
-
extra_args = [
|
|
15
|
-
'--target=x86_64-pc-windows-gnu',
|
|
16
|
-
"--sysroot=#{sysroot}"
|
|
17
|
-
]
|
|
18
|
-
|
|
19
|
-
existing = ENV['BINDGEN_EXTRA_CLANG_ARGS'].to_s.split(/\s+/)
|
|
20
|
-
ENV['BINDGEN_EXTRA_CLANG_ARGS'] = (existing + extra_args).uniq.join(' ')
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
default_profile = ENV.fetch('CARGO_PROFILE', 'release')
|
|
25
|
-
|
|
26
|
-
create_rust_makefile('html_to_markdown_rb') do |config|
|
|
27
|
-
config.profile = default_profile.to_sym
|
|
28
|
-
features_env = ENV.fetch('HTML_TO_MARKDOWN_CARGO_FEATURES', '')
|
|
29
|
-
features = features_env.split(',').map(&:strip).reject(&:empty?)
|
|
30
|
-
config.features = features unless features.empty?
|
|
31
|
-
|
|
32
|
-
native_dir = File.expand_path('native', __dir__)
|
|
33
|
-
relative_native =
|
|
34
|
-
begin
|
|
35
|
-
Pathname.new(native_dir).relative_path_from(Pathname.new(__dir__)).to_s
|
|
36
|
-
rescue ArgumentError
|
|
37
|
-
native_dir
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
config.ext_dir = relative_native
|
|
41
|
-
end
|