html-to-markdown 3.1.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +42 -12
  3. data/Gemfile +1 -0
  4. data/Gemfile.lock +27 -55
  5. data/README.md +9 -10
  6. data/Rakefile +4 -10
  7. data/ext/html-to-markdown_rb/Cargo.toml +14 -0
  8. data/ext/html_to_markdown_rb/Cargo.toml +16 -0
  9. data/ext/html_to_markdown_rb/extconf.rb +10 -0
  10. data/ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb +6 -0
  11. data/ext/html_to_markdown_rb/src/html_to_markdown_rs.rb +9 -0
  12. data/ext/html_to_markdown_rb/src/lib.rs +3941 -0
  13. data/html-to-markdown-rb.gemspec +1 -1
  14. data/lib/html_to_markdown/version.rb +1 -1
  15. data/lib/html_to_markdown.rb +31 -21
  16. data/{ext/html-to-markdown-rb/native/extconf.rb → lib/html_to_markdown_rs.rb} +1 -1
  17. data/sig/html_to_markdown.rbs +17 -5
  18. data/vendor/Cargo.toml +4 -4
  19. data/vendor/html-to-markdown-rs/Cargo.toml +2 -2
  20. data/vendor/html-to-markdown-rs/examples/test_deser.rs +12 -0
  21. data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +1 -1
  22. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +1 -1
  23. data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +1 -1
  24. data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +1 -1
  25. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +10 -2
  26. data/vendor/html-to-markdown-rs/src/converter/mod.rs +2 -2
  27. data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +1 -1
  28. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +1 -1
  29. data/vendor/html-to-markdown-rs/src/exports.rs +3 -3
  30. data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
  31. data/vendor/html-to-markdown-rs/src/lib.rs +1 -2
  32. data/vendor/html-to-markdown-rs/src/metadata/config.rs +1 -1
  33. data/vendor/html-to-markdown-rs/src/metadata/mod.rs +5 -5
  34. data/vendor/html-to-markdown-rs/src/options/conversion.rs +6 -12
  35. data/vendor/html-to-markdown-rs/src/options/mod.rs +1 -1
  36. data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +3 -9
  37. data/vendor/html-to-markdown-rs/src/options/validation.rs +3 -3
  38. data/vendor/html-to-markdown-rs/src/types/document.rs +11 -0
  39. data/vendor/html-to-markdown-rs/src/types/result.rs +5 -2
  40. data/vendor/html-to-markdown-rs/src/types/tables.rs +1 -1
  41. data/vendor/html-to-markdown-rs/src/visitor/mod.rs +1 -1
  42. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/state.rs +1 -1
  43. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/traversal.rs +1 -1
  44. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +8 -8
  45. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +6 -0
  46. data/vendor/html-to-markdown-rs/tests/integration_test.rs +3 -3
  47. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -2
  48. data/vendor/html-to-markdown-rs/tests/lists_test.rs +4 -4
  49. metadata +11 -18
  50. data/ext/html-to-markdown-rb/extconf.rb +0 -41
  51. data/ext/html-to-markdown-rb/native/Cargo.lock +0 -934
  52. data/ext/html-to-markdown-rb/native/Cargo.toml +0 -48
  53. data/ext/html-to-markdown-rb/native/README.md +0 -215
  54. data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +0 -54
  55. data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +0 -158
  56. data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -11
  57. data/ext/html-to-markdown-rb/native/src/lib.rs +0 -128
  58. data/ext/html-to-markdown-rb/native/src/options.rs +0 -238
  59. data/ext/html-to-markdown-rb/native/src/types.rs +0 -24
  60. data/lib/html_to_markdown/cli.rb +0 -21
  61. data/lib/html_to_markdown/cli_proxy.rb +0 -74
  62. data/spec/cli_proxy_spec.rb +0 -42
  63. data/spec/spec_helper.rb +0 -10
@@ -58,7 +58,7 @@ use crate::visitor::{HtmlVisitor, NodeContext, NodeType, VisitResult};
58
58
  ///
59
59
  /// # Examples
60
60
  ///
61
- /// ```ignore
61
+ /// ```text
62
62
  /// let ctx = build_node_context(
63
63
  /// NodeType::Heading,
64
64
  /// "h1",
@@ -129,7 +129,7 @@ pub fn build_node_context(
129
129
  ///
130
130
  /// # Examples
131
131
  ///
132
- /// ```ignore
132
+ /// ```text
133
133
  /// let result = dispatch_visitor(
134
134
  /// &visitor,
135
135
  /// |v| v.visit_heading(&ctx, level, text, id),
@@ -252,7 +252,7 @@ impl VisitorDispatch {
252
252
  ///
253
253
  /// # Syntax
254
254
  ///
255
- /// ```ignore
255
+ /// ```text
256
256
  /// try_visitor!(visitor_option, method_name, ctx, arg1, arg2, ...);
257
257
  /// ```
258
258
  ///
@@ -264,7 +264,7 @@ impl VisitorDispatch {
264
264
  ///
265
265
  /// # Examples
266
266
  ///
267
- /// ```ignore
267
+ /// ```text
268
268
  /// // Before (verbose):
269
269
  /// let dispatch = dispatch_visitor(&visitor, |v| v.visit_heading(&ctx, level, text, id))?;
270
270
  /// match dispatch {
@@ -311,13 +311,13 @@ macro_rules! try_visitor {
311
311
  ///
312
312
  /// # Syntax
313
313
  ///
314
- /// ```ignore
314
+ /// ```text
315
315
  /// try_visitor_element_start!(visitor_option, ctx);
316
316
  /// ```
317
317
  ///
318
318
  /// # Examples
319
319
  ///
320
- /// ```ignore
320
+ /// ```text
321
321
  /// fn process_heading(...) -> Result<String> {
322
322
  /// let ctx = build_node_context(...);
323
323
  /// try_visitor_element_start!(visitor, &ctx)?;
@@ -340,13 +340,13 @@ macro_rules! try_visitor_element_start {
340
340
  ///
341
341
  /// # Syntax
342
342
  ///
343
- /// ```ignore
343
+ /// ```text
344
344
  /// try_visitor_element_end!(visitor_option, ctx, default_output_string);
345
345
  /// ```
346
346
  ///
347
347
  /// # Examples
348
348
  ///
349
- /// ```ignore
349
+ /// ```text
350
350
  /// fn process_heading(...) -> Result<String> {
351
351
  /// let ctx = build_node_context(...);
352
352
  /// let mut output = String::from("# Heading");
@@ -54,6 +54,12 @@ fn test_commonmark_compliance() {
54
54
  continue;
55
55
  }
56
56
 
57
+ // Example 231: expects indented code blocks but we default to backtick-fenced
58
+ if test.example == 231 {
59
+ skipped += 1;
60
+ continue;
61
+ }
62
+
57
63
  if test.section == "Tabs" {
58
64
  skipped += 1;
59
65
  continue;
@@ -55,7 +55,7 @@ fn test_inline_code() {
55
55
  fn test_code_block() {
56
56
  let html = "<pre><code>fn main() {\n println!(\"Hello\");\n}</code></pre>";
57
57
  let result = convert(html, None).unwrap();
58
- assert_eq!(result, " fn main() {\n println!(\"Hello\");\n }\n");
58
+ assert_eq!(result, "```\nfn main() {\n println!(\"Hello\");\n}\n```\n");
59
59
  }
60
60
 
61
61
  #[test]
@@ -141,8 +141,8 @@ fn test_nested_lists() {
141
141
  let html = "<ul><li>Item 1<ul><li>Nested 1</li><li>Nested 2</li></ul></li><li>Item 2</li></ul>";
142
142
  let result = convert(html, None).unwrap();
143
143
  assert!(result.contains("- Item 1"));
144
- assert!(result.contains(" - Nested 1"));
145
- assert!(result.contains(" - Nested 2"));
144
+ assert!(result.contains("Nested 1"), "Expected 'Nested 1' in result: {result:?}");
145
+ assert!(result.contains("Nested 2"), "Expected 'Nested 2' in result: {result:?}");
146
146
  }
147
147
 
148
148
  #[test]
@@ -44,10 +44,16 @@ fn converts_should_not_escape_in_pre_or_code_fixture() {
44
44
  let pre_html = r"<pre>This pipe | should not be escaped.<pre/>";
45
45
 
46
46
  let pre_markdown_without_misc = convert(pre_html, Some(default_options())).expect("conversion should succeed");
47
- assert_eq!(pre_markdown_without_misc.trim(), "This pipe | should not be escaped.");
47
+ assert_eq!(
48
+ pre_markdown_without_misc.trim(),
49
+ "```\nThis pipe | should not be escaped.\n```"
50
+ );
48
51
 
49
52
  let pre_markdown_with_misc = convert(pre_html, Some(escape_misc_options())).expect("conversion should succeed");
50
- assert_eq!(pre_markdown_with_misc.trim(), "This pipe | should not be escaped.");
53
+ assert_eq!(
54
+ pre_markdown_with_misc.trim(),
55
+ "```\nThis pipe | should not be escaped.\n```"
56
+ );
51
57
 
52
58
  let code_html = r"<code>This pipe | should not be escaped.<code/>";
53
59
 
@@ -51,8 +51,8 @@ fn test_nested_lists() {
51
51
 
52
52
  let result = convert(html, None).unwrap();
53
53
  assert!(result.contains("- Item 1"));
54
- assert!(result.contains("- Nested 1"));
55
- assert!(result.contains("- Nested 2"));
54
+ assert!(result.contains("* Nested 1"));
55
+ assert!(result.contains("* Nested 2"));
56
56
  assert!(result.contains("- Item 2"));
57
57
  }
58
58
 
@@ -129,7 +129,7 @@ fn test_list_indent_spaces() {
129
129
 
130
130
  let result = convert(html, Some(options)).unwrap();
131
131
  assert!(result.contains("- Parent"));
132
- assert!(result.contains(" - Child"));
132
+ assert!(result.contains(" * Child"));
133
133
  }
134
134
 
135
135
  #[test]
@@ -149,7 +149,7 @@ fn test_list_indent_tabs() {
149
149
 
150
150
  let result = convert(html, Some(options)).unwrap();
151
151
  assert!(result.contains("- Parent"));
152
- assert!(result.contains("\t- Child"));
152
+ assert!(result.contains("\t* Child"));
153
153
  }
154
154
 
155
155
  #[test]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 3.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-04-01 00:00:00.000000000 Z
11
+ date: 2026-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -38,7 +38,7 @@ email:
38
38
  executables:
39
39
  - html-to-markdown
40
40
  extensions:
41
- - ext/html-to-markdown-rb/extconf.rb
41
+ - ext/html_to_markdown_rb/extconf.rb
42
42
  extra_rdoc_files:
43
43
  - README.md
44
44
  files:
@@ -51,33 +51,26 @@ files:
51
51
  - Rakefile
52
52
  - Steepfile
53
53
  - exe/html-to-markdown
54
- - ext/html-to-markdown-rb/extconf.rb
55
- - ext/html-to-markdown-rb/native/Cargo.lock
56
- - ext/html-to-markdown-rb/native/Cargo.toml
57
- - ext/html-to-markdown-rb/native/README.md
58
- - ext/html-to-markdown-rb/native/extconf.rb
59
- - ext/html-to-markdown-rb/native/src/conversion/inline_images.rs
60
- - ext/html-to-markdown-rb/native/src/conversion/metadata.rs
61
- - ext/html-to-markdown-rb/native/src/conversion/mod.rs
62
- - ext/html-to-markdown-rb/native/src/lib.rs
63
- - ext/html-to-markdown-rb/native/src/options.rs
64
- - ext/html-to-markdown-rb/native/src/types.rs
54
+ - ext/html-to-markdown_rb/Cargo.toml
55
+ - ext/html_to_markdown_rb/Cargo.toml
56
+ - ext/html_to_markdown_rb/extconf.rb
57
+ - ext/html_to_markdown_rb/src/html_to_markdown_rs.rb
58
+ - ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb
59
+ - ext/html_to_markdown_rb/src/lib.rs
65
60
  - html-to-markdown-rb.gemspec
66
61
  - lib/html_to_markdown.rb
67
- - lib/html_to_markdown/cli.rb
68
- - lib/html_to_markdown/cli_proxy.rb
69
62
  - lib/html_to_markdown/version.rb
63
+ - lib/html_to_markdown_rs.rb
70
64
  - sig/html_to_markdown.rbs
71
65
  - sig/html_to_markdown/cli.rbs
72
66
  - sig/html_to_markdown/cli_proxy.rbs
73
67
  - sig/open3.rbs
74
- - spec/cli_proxy_spec.rb
75
- - spec/spec_helper.rb
76
68
  - vendor/Cargo.toml
77
69
  - vendor/html-to-markdown-rs/Cargo.toml
78
70
  - vendor/html-to-markdown-rs/README.md
79
71
  - vendor/html-to-markdown-rs/examples/basic.rs
80
72
  - vendor/html-to-markdown-rs/examples/table.rs
73
+ - vendor/html-to-markdown-rs/examples/test_deser.rs
81
74
  - vendor/html-to-markdown-rs/examples/test_escape.rs
82
75
  - vendor/html-to-markdown-rs/examples/test_inline_formatting.rs
83
76
  - vendor/html-to-markdown-rs/examples/test_lists.rs
@@ -1,41 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'mkmf'
4
- require 'rb_sys/mkmf'
5
- require 'rbconfig'
6
- require 'pathname'
7
-
8
- if RbConfig::CONFIG['host_os'] =~ /mswin|mingw/
9
- devkit = ENV.fetch('RI_DEVKIT', nil)
10
- prefix = ENV['MSYSTEM_PREFIX'] || '/ucrt64'
11
-
12
- if devkit
13
- sysroot = "#{devkit}#{prefix}".tr('\\\\', '/')
14
- extra_args = [
15
- '--target=x86_64-pc-windows-gnu',
16
- "--sysroot=#{sysroot}"
17
- ]
18
-
19
- existing = ENV['BINDGEN_EXTRA_CLANG_ARGS'].to_s.split(/\s+/)
20
- ENV['BINDGEN_EXTRA_CLANG_ARGS'] = (existing + extra_args).uniq.join(' ')
21
- end
22
- end
23
-
24
- default_profile = ENV.fetch('CARGO_PROFILE', 'release')
25
-
26
- create_rust_makefile('html_to_markdown_rb') do |config|
27
- config.profile = default_profile.to_sym
28
- features_env = ENV.fetch('HTML_TO_MARKDOWN_CARGO_FEATURES', '')
29
- features = features_env.split(',').map(&:strip).reject(&:empty?)
30
- config.features = features unless features.empty?
31
-
32
- native_dir = File.expand_path('native', __dir__)
33
- relative_native =
34
- begin
35
- Pathname.new(native_dir).relative_path_from(Pathname.new(__dir__)).to_s
36
- rescue ArgumentError
37
- native_dir
38
- end
39
-
40
- config.ext_dir = relative_native
41
- end