html-to-markdown 2.24.4 → 2.24.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7aba0e152474e36acd5ca4a9abb8cd891a6edd671086048415eec13bb25a69af
4
- data.tar.gz: 55c7d8340aa73d115005745d8e93894c7494588a1458f55427b765ff392f1cd6
3
+ metadata.gz: 4d566a6fb7b9c675389f2f4998f64ccc15e5c10585f0ec70de610052787129d7
4
+ data.tar.gz: 8f32d24f5ff7fe7144772315f1524463199c063a1d561fc640d5077cbea2312f
5
5
  SHA512:
6
- metadata.gz: 8a591d0bbbba4cc5682cc8d51e6f851199a70065294352251b4d6b7794ce53d257ab66c873b79da9c10f15b74da7670cf77caa3ddd1eec5d93593435aa6979b7
7
- data.tar.gz: b979428207c4ebc1e7a7ea0f9d4e4859f11c50558bd11f9fae2e7e61d6901538c09594e7307bfce3910f4abfa3d9a4fe230f1a61312c592c86c5c30693ed3b5b
6
+ metadata.gz: b318d7be65796bad2aac9a4b466a76081e5db2ff86e53725ce43cb555da96224fe6a0502b523543197676ac649b9a8474fd043e59195bda9e5aef804828b742c
7
+ data.tar.gz: 322c526cb10c704bc7221885990f807a6500c3949cddefa2d0e45c92e5b89e9f08705a9cb62b71d25b768d95b613aa34d5722aea4a617da71fc71725f9f7963f
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html-to-markdown (2.24.4)
4
+ html-to-markdown (2.24.5)
5
5
  rb_sys (>= 0.9, < 1.0)
6
6
 
7
7
  GEM
@@ -161,7 +161,7 @@ CHECKSUMS
161
161
  ffi (1.17.3-x86_64-darwin) sha256=1f211811eb5cfaa25998322cdd92ab104bfbd26d1c4c08471599c511f2c00bb5
162
162
  ffi (1.17.3-x86_64-linux-gnu) sha256=3746b01f677aae7b16dc1acb7cb3cc17b3e35bdae7676a3f568153fb0e2c887f
163
163
  fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
164
- html-to-markdown (2.24.4)
164
+ html-to-markdown (2.24.5)
165
165
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
166
166
  json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
167
167
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
@@ -384,7 +384,7 @@ dependencies = [
384
384
 
385
385
  [[package]]
386
386
  name = "html-to-markdown-rb"
387
- version = "2.24.4"
387
+ version = "2.24.5"
388
388
  dependencies = [
389
389
  "html-to-markdown-rs",
390
390
  "magnus",
@@ -1387,9 +1387,9 @@ dependencies = [
1387
1387
 
1388
1388
  [[package]]
1389
1389
  name = "zmij"
1390
- version = "1.0.18"
1390
+ version = "1.0.19"
1391
1391
  source = "registry+https://github.com/rust-lang/crates.io-index"
1392
- checksum = "1966f8ac2c1f76987d69a74d0e0f929241c10e78136434e3be70ff7f58f64214"
1392
+ checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445"
1393
1393
 
1394
1394
  [[package]]
1395
1395
  name = "zune-core"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rb"
3
- version ="2.24.4"
3
+ version ="2.24.5"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
6
6
  license = "MIT"
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HtmlToMarkdown
4
- VERSION = '2.24.4'
4
+ VERSION = '2.24.5'
5
5
  end
@@ -52,7 +52,7 @@ pub fn handle_subscript(
52
52
  depth: usize,
53
53
  dom_ctx: &DomContext,
54
54
  ) {
55
- use crate::converter::walk_node;
55
+ use crate::converter::{append_inline_suffix, chomp_inline, walk_node};
56
56
 
57
57
  let Some(node) = node_handle.get(parser) else { return };
58
58
 
@@ -61,29 +61,34 @@ pub fn handle_subscript(
61
61
  _ => return,
62
62
  };
63
63
 
64
- if !ctx.in_code {
65
- if options.output_format == OutputFormat::Djot {
66
- output.push('~');
67
- } else if !options.sub_symbol.is_empty() {
68
- output.push_str(&options.sub_symbol);
69
- }
70
- }
71
-
64
+ let mut content = String::with_capacity(32);
72
65
  let children = tag.children();
73
66
  for child_handle in children.top().iter() {
74
- walk_node(child_handle, parser, output, options, ctx, depth + 1, dom_ctx);
67
+ walk_node(child_handle, parser, &mut content, options, ctx, depth + 1, dom_ctx);
75
68
  }
76
69
 
77
- if !ctx.in_code {
70
+ if ctx.in_code {
71
+ output.push_str(&content);
72
+ return;
73
+ }
74
+
75
+ let (prefix, suffix, trimmed) = chomp_inline(&content);
76
+ if !trimmed.is_empty() {
77
+ output.push_str(prefix);
78
78
  if options.output_format == OutputFormat::Djot {
79
79
  output.push('~');
80
+ output.push_str(trimmed);
81
+ output.push('~');
80
82
  } else if !options.sub_symbol.is_empty() {
83
+ output.push_str(&options.sub_symbol);
84
+ output.push_str(trimmed);
81
85
  if options.sub_symbol.starts_with('<') && !options.sub_symbol.starts_with("</") {
82
86
  output.push_str(&options.sub_symbol.replace('<', "</"));
83
87
  } else {
84
88
  output.push_str(&options.sub_symbol);
85
89
  }
86
90
  }
91
+ append_inline_suffix(output, suffix, !trimmed.is_empty(), node_handle, parser, dom_ctx);
87
92
  }
88
93
  }
89
94
 
@@ -99,7 +104,7 @@ pub fn handle_superscript(
99
104
  depth: usize,
100
105
  dom_ctx: &DomContext,
101
106
  ) {
102
- use crate::converter::walk_node;
107
+ use crate::converter::{append_inline_suffix, chomp_inline, walk_node};
103
108
 
104
109
  let Some(node) = node_handle.get(parser) else { return };
105
110
 
@@ -108,29 +113,34 @@ pub fn handle_superscript(
108
113
  _ => return,
109
114
  };
110
115
 
111
- if !ctx.in_code {
112
- if options.output_format == OutputFormat::Djot {
113
- output.push('^');
114
- } else if !options.sup_symbol.is_empty() {
115
- output.push_str(&options.sup_symbol);
116
- }
117
- }
118
-
116
+ let mut content = String::with_capacity(32);
119
117
  let children = tag.children();
120
118
  for child_handle in children.top().iter() {
121
- walk_node(child_handle, parser, output, options, ctx, depth + 1, dom_ctx);
119
+ walk_node(child_handle, parser, &mut content, options, ctx, depth + 1, dom_ctx);
122
120
  }
123
121
 
124
- if !ctx.in_code {
122
+ if ctx.in_code {
123
+ output.push_str(&content);
124
+ return;
125
+ }
126
+
127
+ let (prefix, suffix, trimmed) = chomp_inline(&content);
128
+ if !trimmed.is_empty() {
129
+ output.push_str(prefix);
125
130
  if options.output_format == OutputFormat::Djot {
126
131
  output.push('^');
132
+ output.push_str(trimmed);
133
+ output.push('^');
127
134
  } else if !options.sup_symbol.is_empty() {
135
+ output.push_str(&options.sup_symbol);
136
+ output.push_str(trimmed);
128
137
  if options.sup_symbol.starts_with('<') && !options.sup_symbol.starts_with("</") {
129
138
  output.push_str(&options.sup_symbol.replace('<', "</"));
130
139
  } else {
131
140
  output.push_str(&options.sup_symbol);
132
141
  }
133
142
  }
143
+ append_inline_suffix(output, suffix, !trimmed.is_empty(), node_handle, parser, dom_ctx);
134
144
  }
135
145
  }
136
146
 
@@ -318,6 +318,28 @@ fn test_subscript() {
318
318
  assert_eq!(result, "H~2~O\n");
319
319
  }
320
320
 
321
+ #[test]
322
+ fn test_subscript_trailing_whitespace() {
323
+ let html = "<p><sub>hello </sub>world</p>";
324
+ let opts = ConversionOptions {
325
+ sub_symbol: "~".to_string(),
326
+ ..Default::default()
327
+ };
328
+ let result = convert(html, Some(opts)).unwrap();
329
+ assert_eq!(result, "~hello~ world\n");
330
+ }
331
+
332
+ #[test]
333
+ fn test_subscript_leading_whitespace() {
334
+ let html = "<p>hello<sub> world</sub></p>";
335
+ let opts = ConversionOptions {
336
+ sub_symbol: "~".to_string(),
337
+ ..Default::default()
338
+ };
339
+ let result = convert(html, Some(opts)).unwrap();
340
+ assert_eq!(result, "hello ~world~\n");
341
+ }
342
+
321
343
  #[test]
322
344
  fn test_superscript() {
323
345
  let html = "<p>x<sup>2</sup></p>";
@@ -329,6 +351,28 @@ fn test_superscript() {
329
351
  assert_eq!(result, "x^2^\n");
330
352
  }
331
353
 
354
+ #[test]
355
+ fn test_superscript_trailing_whitespace() {
356
+ let html = "<p><sup>hello </sup>world</p>";
357
+ let opts = ConversionOptions {
358
+ sup_symbol: "^".to_string(),
359
+ ..Default::default()
360
+ };
361
+ let result = convert(html, Some(opts)).unwrap();
362
+ assert_eq!(result, "^hello^ world\n");
363
+ }
364
+
365
+ #[test]
366
+ fn test_superscript_leading_whitespace() {
367
+ let html = "<p>hello<sup> world</sup></p>";
368
+ let opts = ConversionOptions {
369
+ sup_symbol: "^".to_string(),
370
+ ..Default::default()
371
+ };
372
+ let result = convert(html, Some(opts)).unwrap();
373
+ assert_eq!(result, "hello ^world^\n");
374
+ }
375
+
332
376
  #[test]
333
377
  fn test_autolink() {
334
378
  let html = "<p><a href=\"https://example.com\">https://example.com</a></p>";
@@ -1 +1 @@
1
- {"files":{".cargo_vcs_info.json":"fc4cfe0823e2b7f29e3cbf71460386760fe0e52e23fd2d654b588bd60b29a99d",".github/workflows/ci.yml":"c6a8fe8d53e0556a4ebaefae94a0b2ffa74288e1652093667b135ed37f4374f2","Cargo.lock":"d4964a22db25c234c84332212016fdab413d4c8cabde24bcd5d57f8f4b6da042","Cargo.toml":"206bff1b72ce6a6d5123d53a6a4930ec103a634efd03deec287f437fc0c54e44","Cargo.toml.orig":"a65a097cbb8779590a2243773290f7dddb2bcaa904255485428dd4c1be39272f","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"cb4094e229373958e8de8b88048f7f09ce41b403b14543469354ba4049075eda","benches/bench.rs":"28d23a29046c4d50c73a19fd81ad80cc97e474af6d8c30f81dcc8c9621065cca","build.rs":"13201b550236a9ff2186b5303c77341d3b0874989fdaf39613a7452e4ce57817","src/hint.rs":"7e03e3c031b7c20127517b794a4b641b3efb6ccd4c7b1aae1b7b6f28b9968052","src/lib.rs":"15a8f3dda4a51320e1da93a052f015b2a9cc720306d3967dce854feab44f96de","src/stdarch_x86.rs":"b19b4c54f0c08c92f96d18c152f796d85cc17f9e6195f3e7d5612f05f28836fb","src/tests.rs":"04ba0d4a2b0e46e0c4f294af3bc1a1c938e72b7f9d0435b2eac0848ce5d4866a","src/traits.rs":"d289ade8d66f03096349f1cf2d4e68fad792e7b089bc956410cf700d9aad67e1","tests/exhaustive.rs":"3d5c1036dcb391bfd038fcb9feb03380b467a184ed9e0463d52f35b521684029","tests/ryu_comparison.rs":"5874eaff05486484029b333a8e7104db62b2706c8e64aecd69d8ccffad0880c3","tests/test.rs":"0f5efa4354a932d767ed561ddee411a10dd71faafa202372f3c00d81b88d1381"},"package":"1966f8ac2c1f76987d69a74d0e0f929241c10e78136434e3be70ff7f58f64214"}
1
+ {"files":{".cargo_vcs_info.json":"43d54f8649f2696fa0547ab60188ba59d1f89d875de1c168e0c36cf1841ebc75",".github/workflows/ci.yml":"c6a8fe8d53e0556a4ebaefae94a0b2ffa74288e1652093667b135ed37f4374f2","Cargo.lock":"25584c87076d00dcf3e855ab05536cc3c61bdbc8eb838582f7129fb05634389e","Cargo.toml":"51d5a637a4a00f6d6d004d7c9b68d49ad6f88082176a0d2d05b9d1c0e12f4f82","Cargo.toml.orig":"45c6ba574b101f3ab87e0a19513589c78fcb3fb45c9b4c4195a145244c251de6","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"21305a2d248c7375059fa0e63402dd702bc18d1801fddd63df9b5f8ba39efe51","benches/bench.rs":"28d23a29046c4d50c73a19fd81ad80cc97e474af6d8c30f81dcc8c9621065cca","build.rs":"13201b550236a9ff2186b5303c77341d3b0874989fdaf39613a7452e4ce57817","src/hint.rs":"7e03e3c031b7c20127517b794a4b641b3efb6ccd4c7b1aae1b7b6f28b9968052","src/lib.rs":"69f08c4f84b63b5fc3d772fa7c0d23094fb9d0f2a90c0c1037275863a510c628","src/stdarch_x86.rs":"b19b4c54f0c08c92f96d18c152f796d85cc17f9e6195f3e7d5612f05f28836fb","src/tests.rs":"04ba0d4a2b0e46e0c4f294af3bc1a1c938e72b7f9d0435b2eac0848ce5d4866a","src/traits.rs":"d289ade8d66f03096349f1cf2d4e68fad792e7b089bc956410cf700d9aad67e1","tests/exhaustive.rs":"3d5c1036dcb391bfd038fcb9feb03380b467a184ed9e0463d52f35b521684029","tests/ryu_comparison.rs":"5874eaff05486484029b333a8e7104db62b2706c8e64aecd69d8ccffad0880c3","tests/test.rs":"0f5efa4354a932d767ed561ddee411a10dd71faafa202372f3c00d81b88d1381"},"package":"3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445"}
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "git": {
3
- "sha1": "3a9446327c05de8446d7423854bbb03d0b243968"
3
+ "sha1": "990ad7ec835af798afdebc4361a346775227953b"
4
4
  },
5
5
  "path_in_vcs": ""
6
6
  }
@@ -436,7 +436,7 @@ dependencies = [
436
436
  "memchr",
437
437
  "serde",
438
438
  "serde_core",
439
- "zmij 1.0.17",
439
+ "zmij 1.0.18",
440
440
  ]
441
441
 
442
442
  [[package]]
@@ -565,13 +565,13 @@ dependencies = [
565
565
 
566
566
  [[package]]
567
567
  name = "zmij"
568
- version = "1.0.17"
568
+ version = "1.0.18"
569
569
  source = "registry+https://github.com/rust-lang/crates.io-index"
570
- checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439"
570
+ checksum = "1966f8ac2c1f76987d69a74d0e0f929241c10e78136434e3be70ff7f58f64214"
571
571
 
572
572
  [[package]]
573
573
  name = "zmij"
574
- version = "1.0.18"
574
+ version = "1.0.19"
575
575
  dependencies = [
576
576
  "criterion",
577
577
  "no-panic",
@@ -13,7 +13,7 @@
13
13
  edition = "2021"
14
14
  rust-version = "1.68"
15
15
  name = "zmij"
16
- version = "1.0.18"
16
+ version = "1.0.19"
17
17
  authors = ["David Tolnay <dtolnay@gmail.com>"]
18
18
  build = "build.rs"
19
19
  exclude = ["*.png"]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "zmij"
3
- version = "1.0.18"
3
+ version = "1.0.19"
4
4
  authors = ["David Tolnay <dtolnay@gmail.com>"]
5
5
  categories = ["value-formatting", "no-std", "no-std::no-alloc"]
6
6
  description = "A double-to-string conversion algorithm based on Schubfach and yy"
@@ -11,7 +11,7 @@ numbers to decimal strings.
11
11
  This Rust implementation is a line-by-line port of Victor Zverovich's
12
12
  implementation in C++, [https://github.com/vitaut/zmij][upstream].
13
13
 
14
- [upstream]: https://github.com/vitaut/zmij/tree/fb4fafadd8ad318df51ee00140699062db8d5b3e
14
+ [upstream]: https://github.com/vitaut/zmij/tree/d8cb94516d0c480d6d70851ca677beae6ba721fc
15
15
 
16
16
  ## Example
17
17
 
@@ -41,7 +41,7 @@
41
41
  //! ![performance](https://raw.githubusercontent.com/dtolnay/zmij/master/dtoa-benchmark.png)
42
42
 
43
43
  #![no_std]
44
- #![doc(html_root_url = "https://docs.rs/zmij/1.0.18")]
44
+ #![doc(html_root_url = "https://docs.rs/zmij/1.0.19")]
45
45
  #![deny(unsafe_op_in_unsafe_fn)]
46
46
  #![allow(non_camel_case_types, non_snake_case)]
47
47
  #![allow(
@@ -806,8 +806,7 @@ where
806
806
 
807
807
  // The idea of using a single shorter candidate is by Cassio Neri.
808
808
  // It is less or equal to the upper bound by construction.
809
- let div10 = (upper >> BOUND_SHIFT) / UInt::from(10);
810
- let shorter = div10 * UInt::from(10);
809
+ let shorter = (upper >> BOUND_SHIFT) / UInt::from(10) * UInt::from(10);
811
810
  if (shorter << BOUND_SHIFT) >= lower {
812
811
  return ToDecimalResult {
813
812
  sig: shorter.into() as i64,
@@ -929,30 +928,20 @@ where
929
928
 
930
929
  // Check for boundary case when rounding down to nearest 10 and
931
930
  // near-boundary case when rounding up to nearest 10.
932
- if scaled_sig_mod10 == scaled_half_ulp
933
- // Case where upper == ten is insufficient: 1.342178e+08f.
934
- // upper == ten || upper == ten - 1
935
- || ten.wrapping_sub(upper) <= 1
931
+ // Case where upper == ten is insufficient: 1.342178e+08f.
932
+ if ten.wrapping_sub(upper) <= 1 // upper == ten || upper == ten - 1
933
+ || scaled_sig_mod10 == scaled_half_ulp
936
934
  {
937
935
  break;
938
936
  }
939
937
 
940
938
  let round_up = upper >= ten;
941
- let mut shorter = (integral.into() - digit) as i64;
939
+ let shorter = (integral.into() - digit) as i64;
942
940
  let longer = (integral.into() + u64::from(cmp >= 0)) as i64;
943
- if cfg!(target_arch = "aarch64") {
944
- // Faster version without ccmp.
945
- let dec_sig =
946
- hint::select_unpredictable(scaled_sig_mod10 < scaled_half_ulp, shorter, longer);
947
- return ToDecimalResult {
948
- sig: hint::select_unpredictable(round_up, shorter + 10, dec_sig),
949
- exp: dec_exp,
950
- };
951
- }
952
- shorter += i64::from(round_up) * 10;
953
- let use_shorter = scaled_sig_mod10 <= scaled_half_ulp || round_up;
941
+ let dec_sig =
942
+ hint::select_unpredictable(scaled_sig_mod10 < scaled_half_ulp, shorter, longer);
954
943
  return ToDecimalResult {
955
- sig: hint::select_unpredictable(use_shorter, shorter, longer),
944
+ sig: hint::select_unpredictable(round_up, shorter + 10, dec_sig),
956
945
  exp: dec_exp,
957
946
  };
958
947
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.24.4
4
+ version: 2.24.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld