html-to-markdown 2.26.2 → 2.26.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 12d5559bda903dfbeb563dba48c733845621b2c67cbca597fb4111c094f33fe0
4
- data.tar.gz: 3ef2fdc3b30051c1eec6a956c42465f3d31670f81f345b67d98a84935f77e02c
3
+ metadata.gz: 23d0242cd4fc575d8081e675fb8d16f09faa7fb1c6c0df9b18d21338c0391880
4
+ data.tar.gz: cf86724440a34a26e1f17c134a232b1b321edaacb95758e42f9eab59dc710f8b
5
5
  SHA512:
6
- metadata.gz: dee90b55391d5f84466c2a2d3591a7d3565ebc88357118b0725d57ad2c06cc5e9f965a93f4ad6111bab89c589552cc0f3ecfb21701ae4f0e18b9b9d55e0aa3ef
7
- data.tar.gz: 7b0927d2fa482712bdfac03152a19375e1744e3e22b0121d7567967bc2fa215396d8e292f4d2974962477b839a644e96b01d828687a8fd520cc22171b3a83908
6
+ metadata.gz: 63afe8bdf9d36f4cc225859e3a7ebb62452e97feafccc5ea1e20564a47b7e037900b6af7300508eec4313d4306608e3b00bc5f6a3115001ee250d5c560880bb6
7
+ data.tar.gz: c8fcaa6e61fea4325b08ce39ebf0a2bd92ff4e6d58702497e38cc6a081c1eec3de095d72986ccdc828ee6e219697d53c12483087ee563d39f767fe989d72ffdb
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html-to-markdown (2.26.2)
4
+ html-to-markdown (2.26.3)
5
5
  rb_sys (>= 0.9, < 1.0)
6
6
 
7
7
  GEM
@@ -20,6 +20,8 @@ GEM
20
20
  securerandom (>= 0.3)
21
21
  tzinfo (~> 2.0, >= 2.0.5)
22
22
  uri (>= 0.13.1)
23
+ addressable (2.8.9)
24
+ public_suffix (>= 2.0.2, < 8.0)
23
25
  ast (2.4.3)
24
26
  base64 (0.3.0)
25
27
  bigdecimal (4.0.1)
@@ -37,6 +39,9 @@ GEM
37
39
  i18n (1.14.8)
38
40
  concurrent-ruby (~> 1.0)
39
41
  json (2.18.1)
42
+ json-schema (6.1.0)
43
+ addressable (~> 2.8)
44
+ bigdecimal (>= 3.1, < 5)
40
45
  language_server-protocol (3.17.0.5)
41
46
  lint_roller (1.1.0)
42
47
  listen (3.10.0)
@@ -44,14 +49,18 @@ GEM
44
49
  rb-fsevent (~> 0.10, >= 0.10.3)
45
50
  rb-inotify (~> 0.9, >= 0.9.10)
46
51
  logger (1.7.0)
47
- minitest (6.0.1)
52
+ mcp (0.7.1)
53
+ json-schema (>= 4.1)
54
+ minitest (6.0.2)
55
+ drb (~> 2.0)
48
56
  prism (~> 1.5)
49
57
  mutex_m (0.3.0)
50
58
  parallel (1.27.0)
51
- parser (3.3.10.1)
59
+ parser (3.3.10.2)
52
60
  ast (~> 2.4.1)
53
61
  racc
54
62
  prism (1.9.0)
63
+ public_suffix (7.0.2)
55
64
  racc (1.8.1)
56
65
  rainbow (3.1.1)
57
66
  rake (13.3.1)
@@ -76,14 +85,15 @@ GEM
76
85
  rspec-expectations (3.13.5)
77
86
  diff-lcs (>= 1.2.0, < 2.0)
78
87
  rspec-support (~> 3.13.0)
79
- rspec-mocks (3.13.7)
88
+ rspec-mocks (3.13.8)
80
89
  diff-lcs (>= 1.2.0, < 2.0)
81
90
  rspec-support (~> 3.13.0)
82
91
  rspec-support (3.13.7)
83
- rubocop (1.84.2)
92
+ rubocop (1.85.0)
84
93
  json (~> 2.3)
85
94
  language_server-protocol (~> 3.17.0.2)
86
95
  lint_roller (~> 1.1.0)
96
+ mcp (~> 0.6)
87
97
  parallel (~> 1.10)
88
98
  parser (>= 3.3.0.2)
89
99
  rainbow (>= 2.2.2, < 4.0)
@@ -147,6 +157,7 @@ DEPENDENCIES
147
157
 
148
158
  CHECKSUMS
149
159
  activesupport (8.1.2) sha256=88842578ccd0d40f658289b0e8c842acfe9af751afee2e0744a7873f50b6fdae
160
+ addressable (2.8.9) sha256=cc154fcbe689711808a43601dee7b980238ce54368d23e127421753e46895485
150
161
  ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
151
162
  base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
152
163
  bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
@@ -161,18 +172,21 @@ CHECKSUMS
161
172
  ffi (1.17.3-x86_64-darwin) sha256=1f211811eb5cfaa25998322cdd92ab104bfbd26d1c4c08471599c511f2c00bb5
162
173
  ffi (1.17.3-x86_64-linux-gnu) sha256=3746b01f677aae7b16dc1acb7cb3cc17b3e35bdae7676a3f568153fb0e2c887f
163
174
  fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
164
- html-to-markdown (2.26.2)
175
+ html-to-markdown (2.26.3)
165
176
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
166
177
  json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
178
+ json-schema (6.1.0) sha256=6bf70a2cfb6dfd5a06da28093fa8190f324c88eabd36a7f47097f227321dc702
167
179
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
168
180
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
169
181
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
170
182
  logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
171
- minitest (6.0.1) sha256=7854c74f48e2e975969062833adc4013f249a4b212f5e7b9d5c040bf838d54bb
183
+ mcp (0.7.1) sha256=fa967895d6952bad0d981ea907731d8528d2c246d2079d56a9c8bae83d14f1c7
184
+ minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
172
185
  mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
173
186
  parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
174
- parser (3.3.10.1) sha256=06f6a725d2cd91e5e7f2b7c32ba143631e1f7c8ae2fb918fc4cebec187e6a688
187
+ parser (3.3.10.2) sha256=6f60c84aa4bdcedb6d1a2434b738fe8a8136807b6adc8f7f53b97da9bc4e9357
175
188
  prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
189
+ public_suffix (7.0.2) sha256=9114090c8e4e7135c1fd0e7acfea33afaab38101884320c65aaa0ffb8e26a857
176
190
  racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
177
191
  rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
178
192
  rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
@@ -186,9 +200,9 @@ CHECKSUMS
186
200
  rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
187
201
  rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
188
202
  rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
189
- rspec-mocks (3.13.7) sha256=0979034e64b1d7a838aaaddf12bf065ea4dc40ef3d4c39f01f93ae2c66c62b1c
203
+ rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
190
204
  rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
191
- rubocop (1.84.2) sha256=5692cea54168f3dc8cb79a6fe95c5424b7ea893c707ad7a4307b0585e88dbf5f
205
+ rubocop (1.85.0) sha256=317407feb681a07d54f64d2f9e1d6b6af1ce7678e51cd658e3ad8bd66da48c01
192
206
  rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
193
207
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
194
208
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rb"
3
- version ="2.26.2"
3
+ version ="2.26.3"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
6
6
  license = "MIT"
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HtmlToMarkdown
4
- VERSION = '2.26.2'
4
+ VERSION = '2.26.3'
5
5
  end
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rs"
3
- version = "2.26.2"
3
+ version = "2.26.3"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
6
6
  license = "MIT"
@@ -87,6 +87,8 @@ pub fn handle_subscript(
87
87
  } else {
88
88
  output.push_str(&options.sub_symbol);
89
89
  }
90
+ } else {
91
+ output.push_str(trimmed);
90
92
  }
91
93
  append_inline_suffix(output, suffix, !trimmed.is_empty(), node_handle, parser, dom_ctx);
92
94
  }
@@ -139,6 +141,8 @@ pub fn handle_superscript(
139
141
  } else {
140
142
  output.push_str(&options.sup_symbol);
141
143
  }
144
+ } else {
145
+ output.push_str(trimmed);
142
146
  }
143
147
  append_inline_suffix(output, suffix, !trimmed.is_empty(), node_handle, parser, dom_ctx);
144
148
  }
@@ -82,6 +82,12 @@ pub fn process_text_node(
82
82
  if !output.ends_with("\n\n") {
83
83
  if let Some(next_tag) = get_next_sibling_tag(node_handle, parser, dom_ctx) {
84
84
  if is_inline_element(next_tag) {
85
+ // Newlines between inline elements collapse to a single space
86
+ // in HTML rendering (per CSS white-space: normal). Preserve
87
+ // this word boundary so adjacent inline content doesn't merge.
88
+ if !output.ends_with(' ') && !output.ends_with('\n') {
89
+ output.push(' ');
90
+ }
85
91
  return;
86
92
  }
87
93
  }
@@ -373,6 +373,66 @@ fn test_superscript_leading_whitespace() {
373
373
  assert_eq!(result, "hello ^world^\n");
374
374
  }
375
375
 
376
+ #[test]
377
+ fn test_subscript_default_passthrough() {
378
+ let html = "<p>H<sub>2</sub>O</p>";
379
+ let result = convert(html, None).unwrap();
380
+ assert_eq!(result, "H2O\n");
381
+ }
382
+
383
+ #[test]
384
+ fn test_superscript_default_passthrough() {
385
+ let html = "<p>x<sup>2</sup> + y<sup>3</sup></p>";
386
+ let result = convert(html, None).unwrap();
387
+ assert_eq!(result, "x2 + y3\n");
388
+ }
389
+
390
+ #[test]
391
+ fn test_subscript_superscript_combined_default() {
392
+ let html = "<p>CO<sub>2</sub><sup>*</sup></p>";
393
+ let result = convert(html, None).unwrap();
394
+ assert_eq!(result, "CO2*\n");
395
+ }
396
+
397
+ #[test]
398
+ fn test_subscript_html_tag_symbol() {
399
+ let html = "<p>H<sub>2</sub>O</p>";
400
+ let opts = ConversionOptions {
401
+ sub_symbol: "<sub>".to_string(),
402
+ ..Default::default()
403
+ };
404
+ let result = convert(html, Some(opts)).unwrap();
405
+ assert_eq!(result, "H<sub>2</sub>O\n");
406
+ }
407
+
408
+ #[test]
409
+ fn test_adjacent_links_with_newline_separator() {
410
+ let html = "<p>\n<a href=\"/page1\">Link 1</a>\n<a href=\"/page2\">Link 2</a>\n</p>";
411
+ let result = convert(html, None).unwrap();
412
+ assert_eq!(result, "[Link 1](/page1) [Link 2](/page2)\n");
413
+ }
414
+
415
+ #[test]
416
+ fn test_adjacent_links_no_whitespace() {
417
+ let html = "<p><a href=\"/page1\">Link 1</a><a href=\"/page2\">Link 2</a></p>";
418
+ let result = convert(html, None).unwrap();
419
+ assert_eq!(result, "[Link 1](/page1)[Link 2](/page2)\n");
420
+ }
421
+
422
+ #[test]
423
+ fn test_adjacent_links_with_space() {
424
+ let html = "<p><a href=\"/page1\">Link 1</a> <a href=\"/page2\">Link 2</a></p>";
425
+ let result = convert(html, None).unwrap();
426
+ assert_eq!(result, "[Link 1](/page1) [Link 2](/page2)\n");
427
+ }
428
+
429
+ #[test]
430
+ fn test_adjacent_inline_elements_with_newline() {
431
+ let html = "<p><strong>bold</strong>\n<em>italic</em></p>";
432
+ let result = convert(html, None).unwrap();
433
+ assert_eq!(result, "**bold** *italic*\n");
434
+ }
435
+
376
436
  #[test]
377
437
  fn test_autolink() {
378
438
  let html = "<p><a href=\"https://example.com\">https://example.com</a></p>";
data/spec/visitor_spec.rb CHANGED
@@ -35,7 +35,7 @@ RSpec.describe HtmlToMarkdown do
35
35
  visit_definition_list_end visit_form visit_input visit_button visit_audio visit_video
36
36
  visit_iframe visit_details visit_summary visit_figure_start visit_figcaption
37
37
  visit_figure_end
38
- ].each_with_object({}) { |name, hash| hash[name.to_sym] = { type: :continue } }
38
+ ].to_h { |name| [name.to_sym, { type: :continue }] }
39
39
  end
40
40
 
41
41
  def create_visitor(**overrides)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.26.2
4
+ version: 2.26.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld