html-to-markdown 2.20.0 → 2.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb2c495228591e141cf544c71b304a71cacdc5fa41e18a32fbad0e3272500572
4
- data.tar.gz: 95726bbff3c579df5beb07c6661ec7a01fc28a8f00bf2343b8c86a2e2cf7e794
3
+ metadata.gz: 46a7d4481ee2c94abc6e5f9a2ab0a97a237f947b380c0e5a50f7dd6a14e9db11
4
+ data.tar.gz: 9a7d16c208370cb5f135a13a3beefadb556ec2d71406081d1cb65cd8cb984ae3
5
5
  SHA512:
6
- metadata.gz: 9e36d4658492138a7aa78d5356b9880ac066ad00b87c7442c4294a55d85d76dbf498c19770ca753cb3c2223026f85a2d6316a7c083cdf193822b8750314188ee
7
- data.tar.gz: adc36f80908f378198e53668e17947bca49ddf0532fb2e03373dd5796e2556126b760b5118f90cf3f4f13b3a0c92b93ef9912d62021ac36cfc834835df098936
6
+ metadata.gz: 836773d7fb48ebac8fa9df5e3720391d1416e5f5c3dc1ff49b1ae1a5ea38aabd72e0a9c1b6bd9e9b26bee1cfbb07979bf1f986485c035bc7fb9a72c4e29bab01
7
+ data.tar.gz: fe819f21832df5712ce19459fe6e44dd659a9c11e2429c48c5010ca7c3fa6da9c7f492b72185acc77df7b6c7cb52dda7a5052c0f6589769e7df9fad174af229d
data/Gemfile.lock CHANGED
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html-to-markdown (2.20.0)
4
+ html-to-markdown (2.21.1)
5
5
  rb_sys (>= 0.9, < 1.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- activesupport (8.1.1)
10
+ activesupport (8.1.2)
11
11
  base64
12
12
  bigdecimal
13
13
  concurrent-ruby (~> 1.0, >= 1.3.1)
@@ -47,19 +47,19 @@ GEM
47
47
  parser (3.3.10.0)
48
48
  ast (~> 2.4.1)
49
49
  racc
50
- prism (1.7.0)
50
+ prism (1.8.0)
51
51
  racc (1.8.1)
52
52
  rainbow (3.1.1)
53
53
  rake (13.3.1)
54
54
  rake-compiler (1.3.1)
55
55
  rake
56
- rake-compiler-dock (1.10.0)
56
+ rake-compiler-dock (1.11.0)
57
57
  rb-fsevent (0.11.2)
58
58
  rb-inotify (0.11.1)
59
59
  ffi (~> 1.0)
60
- rb_sys (0.9.123)
61
- rake-compiler-dock (= 1.10.0)
62
- rbs (3.10.0)
60
+ rb_sys (0.9.124)
61
+ rake-compiler-dock (= 1.11.0)
62
+ rbs (3.10.2)
63
63
  logger
64
64
  regexp_parser (2.11.3)
65
65
  rspec (3.13.2)
@@ -89,7 +89,7 @@ GEM
89
89
  rubocop-ast (1.49.0)
90
90
  parser (>= 3.3.7.2)
91
91
  prism (~> 1.7)
92
- rubocop-rspec (3.8.0)
92
+ rubocop-rspec (3.9.0)
93
93
  lint_roller (~> 1.1)
94
94
  rubocop (~> 1.81)
95
95
  ruby-progressbar (1.13.0)
@@ -111,7 +111,7 @@ GEM
111
111
  strscan (>= 1.0.0)
112
112
  terminal-table (>= 2, < 5)
113
113
  uri (>= 0.12.0)
114
- strscan (3.1.6)
114
+ strscan (3.1.7)
115
115
  terminal-table (4.0.0)
116
116
  unicode-display_width (>= 1.1.1, < 4)
117
117
  tzinfo (2.0.6)
@@ -136,7 +136,7 @@ DEPENDENCIES
136
136
  steep
137
137
 
138
138
  CHECKSUMS
139
- activesupport (8.1.1) sha256=5e92534e8d0c8b8b5e6b16789c69dbea65c1d7b752269f71a39422e9546cea67
139
+ activesupport (8.1.2) sha256=88842578ccd0d40f658289b0e8c842acfe9af751afee2e0744a7873f50b6fdae
140
140
  ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
141
141
  base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
142
142
  bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
@@ -148,7 +148,7 @@ CHECKSUMS
148
148
  ffi (1.17.3-arm64-darwin) sha256=0c690555d4cee17a7f07c04d59df39b2fba74ec440b19da1f685c6579bb0717f
149
149
  ffi (1.17.3-x86_64-linux-gnu) sha256=3746b01f677aae7b16dc1acb7cb3cc17b3e35bdae7676a3f568153fb0e2c887f
150
150
  fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
151
- html-to-markdown (2.20.0)
151
+ html-to-markdown (2.21.1)
152
152
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
153
153
  json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
154
154
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
@@ -159,16 +159,16 @@ CHECKSUMS
159
159
  mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
160
160
  parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
161
161
  parser (3.3.10.0) sha256=ce3587fa5cc55a88c4ba5b2b37621b3329aadf5728f9eafa36bbd121462aabd6
162
- prism (1.7.0) sha256=10062f734bf7985c8424c44fac382ac04a58124ea3d220ec3ba9fe4f2da65103
162
+ prism (1.8.0) sha256=84453a16ef5530ea62c5f03ec16b52a459575ad4e7b9c2b360fd8ce2c39c1254
163
163
  racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
164
164
  rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
165
165
  rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
166
166
  rake-compiler (1.3.1) sha256=6b351612b6e2d73ddd5563ee799bb58685176e05363db6758504bd11573d670a
167
- rake-compiler-dock (1.10.0) sha256=dd62ee19df2a185a3315697e560cfa8cc9129901332152851e023fab0e94bf11
167
+ rake-compiler-dock (1.11.0) sha256=eab51f2cd533eb35cea6b624a75281f047123e70a64c58b607471bb49428f8c2
168
168
  rb-fsevent (0.11.2) sha256=43900b972e7301d6570f64b850a5aa67833ee7d87b458ee92805d56b7318aefe
169
169
  rb-inotify (0.11.1) sha256=a0a700441239b0ff18eb65e3866236cd78613d6b9f78fea1f9ac47a85e47be6e
170
- rb_sys (0.9.123) sha256=c22ae84d1bca3eec0f13a45ae4ca9ba6eace93d5be270a40a9c0a9a5b92a34e5
171
- rbs (3.10.0) sha256=e75b5f1313c71c9ee0fcea68bf97d3e5fe8ec7a641d4b5cd18bbc28c94ddf298
170
+ rb_sys (0.9.124) sha256=513476557b12eaf73764b3da9f8746024558fe8699bda785fb548c9aa3877ae7
171
+ rbs (3.10.2) sha256=bd8a5dc4c62f229f020146b61844a31f9c79e649449d212904a474eb79c846fc
172
172
  regexp_parser (2.11.3) sha256=ca13f381a173b7a93450e53459075c9b76a10433caadcb2f1180f2c741fc55a4
173
173
  rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
174
174
  rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
@@ -177,11 +177,11 @@ CHECKSUMS
177
177
  rspec-support (3.13.6) sha256=2e8de3702427eab064c9352fe74488cc12a1bfae887ad8b91cba480ec9f8afb2
178
178
  rubocop (1.82.1) sha256=09f1a6a654a960eda767aebea33e47603080f8e9c9a3f019bf9b94c9cab5e273
179
179
  rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
180
- rubocop-rspec (3.8.0) sha256=28440dccb3f223a9938ca1f946bd3438275b8c6c156dab909e2cb8bc424cab33
180
+ rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
181
181
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
182
182
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
183
183
  steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
184
- strscan (3.1.6) sha256=ebd56df0b0468b00a1f2004b4078c34df58c3506b2bba939e7531892aece81f3
184
+ strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
185
185
  terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
186
186
  tzinfo (2.0.6) sha256=8daf828cc77bcf7d63b0e3bdb6caa47e2272dcfaf4fbfe46f8c3a9df087a829b
187
187
  unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rb"
3
- version = "2.20.0"
3
+ version ="2.21.1"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
6
6
  license = "MIT"
@@ -13,12 +13,16 @@ description = "Ruby bindings (Magnus) for html-to-markdown - high-performance HT
13
13
  keywords = ["html", "markdown", "ruby", "magnus", "bindings"]
14
14
  categories = ["api-bindings"]
15
15
 
16
+
17
+ [lints]
18
+ workspace = true
19
+
16
20
  [lib]
17
21
  name = "html_to_markdown_rb"
18
22
  crate-type = ["cdylib", "rlib"]
19
23
 
20
24
  [dependencies]
21
- html-to-markdown-rs = { version = "2.20.0", features = ["inline-images", "visitor", "metadata"] }
25
+ html-to-markdown-rs = { version = "2.21.1", features = ["inline-images", "visitor", "metadata"] }
22
26
  magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = ["rb-sys"] }
23
27
 
24
28
  [target.'cfg(not(target_os = "windows"))'.dependencies]
@@ -1,3 +1,4 @@
1
+ #![allow(clippy::all, clippy::pedantic, clippy::nursery, missing_docs)]
1
2
  use html_to_markdown_rs::{
2
3
  CodeBlockStyle, ConversionOptions, ConversionOptionsUpdate, DEFAULT_INLINE_IMAGE_LIMIT, HeadingStyle,
3
4
  HighlightStyle, HtmlExtraction, InlineImage, InlineImageConfig, InlineImageConfigUpdate, InlineImageWarning,
@@ -51,12 +52,11 @@ impl RubyVisitorWrapper {
51
52
  }
52
53
 
53
54
  fn utf8_str(&self, ruby: &Ruby, s: &str) -> Value {
54
- match ruby.eval::<Value>(&format!("String.new({:?}, encoding: 'UTF-8')", s)) {
55
- Ok(val) => val,
56
- Err(_) => {
57
- let str_val = ruby.str_from_slice(s.as_bytes());
58
- str_val.as_value()
59
- }
55
+ if let Ok(val) = ruby.eval::<Value>(&format!("String.new({s:?}, encoding: 'UTF-8')")) {
56
+ val
57
+ } else {
58
+ let str_val = ruby.str_from_slice(s.as_bytes());
59
+ str_val.as_value()
60
60
  }
61
61
  }
62
62
 
@@ -67,7 +67,7 @@ impl RubyVisitorWrapper {
67
67
  0 => match self.ruby_visitor.funcall::<&str, (), Value>(method_name, ()) {
68
68
  Ok(val) => val,
69
69
  Err(e) => {
70
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
70
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
71
71
  return Err(e);
72
72
  }
73
73
  },
@@ -77,7 +77,7 @@ impl RubyVisitorWrapper {
77
77
  {
78
78
  Ok(val) => val,
79
79
  Err(e) => {
80
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
80
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
81
81
  return Err(e);
82
82
  }
83
83
  },
@@ -87,7 +87,7 @@ impl RubyVisitorWrapper {
87
87
  {
88
88
  Ok(val) => val,
89
89
  Err(e) => {
90
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
90
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
91
91
  return Err(e);
92
92
  }
93
93
  },
@@ -97,7 +97,7 @@ impl RubyVisitorWrapper {
97
97
  {
98
98
  Ok(val) => val,
99
99
  Err(e) => {
100
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
100
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
101
101
  return Err(e);
102
102
  }
103
103
  },
@@ -107,7 +107,7 @@ impl RubyVisitorWrapper {
107
107
  {
108
108
  Ok(val) => val,
109
109
  Err(e) => {
110
- *self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}", method_name, e));
110
+ *self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
111
111
  return Err(e);
112
112
  }
113
113
  },
@@ -120,14 +120,11 @@ impl RubyVisitorWrapper {
120
120
  };
121
121
 
122
122
  let hash = RHash::from_value(result)
123
- .ok_or_else(|| arg_error(format!("visitor method {} must return a Hash", method_name)))?;
123
+ .ok_or_else(|| arg_error(format!("visitor method {method_name} must return a Hash")))?;
124
124
 
125
- let type_value: Value = hash.get(ruby.intern("type")).ok_or_else(|| {
126
- arg_error(format!(
127
- "visitor method {} result Hash must have :type key",
128
- method_name
129
- ))
130
- })?;
125
+ let type_value: Value = hash
126
+ .get(ruby.intern("type"))
127
+ .ok_or_else(|| arg_error(format!("visitor method {method_name} result Hash must have :type key")))?;
131
128
 
132
129
  let type_str = symbol_to_string(type_value)?;
133
130
 
@@ -136,8 +133,7 @@ impl RubyVisitorWrapper {
136
133
  "custom" => {
137
134
  let output_value: Value = hash.get(ruby.intern("output")).ok_or_else(|| {
138
135
  arg_error(format!(
139
- "visitor method {} with type :custom must provide :output string",
140
- method_name
136
+ "visitor method {method_name} with type :custom must provide :output string"
141
137
  ))
142
138
  })?;
143
139
  let output = String::try_convert(output_value)?;
@@ -148,16 +144,14 @@ impl RubyVisitorWrapper {
148
144
  "error" => {
149
145
  let message_value: Value = hash.get(ruby.intern("message")).ok_or_else(|| {
150
146
  arg_error(format!(
151
- "visitor method {} with type :error must provide :message string",
152
- method_name
147
+ "visitor method {method_name} with type :error must provide :message string"
153
148
  ))
154
149
  })?;
155
150
  let message = String::try_convert(message_value)?;
156
151
  Ok(VisitResult::Error(message))
157
152
  }
158
153
  other => Err(arg_error(format!(
159
- "visitor method {} returned invalid type: {}",
160
- method_name, other
154
+ "visitor method {method_name} returned invalid type: {other}"
161
155
  ))),
162
156
  }
163
157
  }
@@ -384,7 +378,7 @@ impl HtmlVisitor for RubyVisitorWrapper {
384
378
  "visit_heading",
385
379
  &[
386
380
  node_ctx,
387
- ruby.integer_from_i64(level as i64).as_value(),
381
+ ruby.integer_from_i64(i64::from(level)).as_value(),
388
382
  ruby.str_from_slice(text.as_bytes()).as_value(),
389
383
  id_val,
390
384
  ],
@@ -1242,8 +1236,8 @@ fn inline_image_to_value(ruby: &Ruby, image: InlineImage) -> Result<Value, Error
1242
1236
 
1243
1237
  if let Some((width, height)) = dimensions {
1244
1238
  let dims = ruby.ary_new();
1245
- dims.push(width as i64)?;
1246
- dims.push(height as i64)?;
1239
+ dims.push(i64::from(width))?;
1240
+ dims.push(i64::from(height))?;
1247
1241
  hash.aset(ruby.intern("dimensions"), dims)?;
1248
1242
  } else {
1249
1243
  hash.aset(ruby.intern("dimensions"), ruby.qnil())?;
@@ -1317,8 +1311,8 @@ fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, E
1317
1311
  let options = build_conversion_options(ruby, parsed.optional.0)?;
1318
1312
  let config = build_inline_image_config(ruby, parsed.optional.1)?;
1319
1313
 
1320
- let extraction =
1321
- guard_panic(|| convert_with_inline_images_inner(&html, Some(options), config)).map_err(conversion_error)?;
1314
+ let extraction = guard_panic(|| convert_with_inline_images_inner(&html, Some(options), config, None))
1315
+ .map_err(conversion_error)?;
1322
1316
 
1323
1317
  extraction_to_value(ruby, extraction)
1324
1318
  }
@@ -1331,8 +1325,8 @@ fn convert_with_inline_images_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<V
1331
1325
  let options = handle.0.clone();
1332
1326
  let config = build_inline_image_config(ruby, parsed.optional.0)?;
1333
1327
 
1334
- let extraction =
1335
- guard_panic(|| convert_with_inline_images_inner(&html, Some(options), config)).map_err(conversion_error)?;
1328
+ let extraction = guard_panic(|| convert_with_inline_images_inner(&html, Some(options), config, None))
1329
+ .map_err(conversion_error)?;
1336
1330
 
1337
1331
  extraction_to_value(ruby, extraction)
1338
1332
  }
@@ -1487,8 +1481,8 @@ fn images_to_ruby(ruby: &Ruby, images: Vec<RustImageMetadata>) -> Result<Value,
1487
1481
  match image.dimensions {
1488
1482
  Some((width, height)) => {
1489
1483
  let dims = ruby.ary_new();
1490
- dims.push(width as i64)?;
1491
- dims.push(height as i64)?;
1484
+ dims.push(i64::from(width))?;
1485
+ dims.push(i64::from(height))?;
1492
1486
  hash.aset(ruby.intern("dimensions"), dims)?;
1493
1487
  }
1494
1488
  None => {
@@ -1540,13 +1534,14 @@ fn extended_metadata_to_ruby(ruby: &Ruby, metadata: RustExtendedMetadata) -> Res
1540
1534
 
1541
1535
  #[cfg(feature = "metadata")]
1542
1536
  fn convert_with_metadata_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
1543
- let parsed = scan_args::<(String,), (Option<Value>, Option<Value>), (), (), (), ()>(args)?;
1537
+ let parsed = scan_args::<(String,), (Option<Value>, Option<Value>, Option<Value>), (), (), (), ()>(args)?;
1544
1538
  let html = parsed.required.0;
1545
1539
  let options = build_conversion_options(ruby, parsed.optional.0)?;
1546
1540
  let metadata_config = build_metadata_config(ruby, parsed.optional.1)?;
1541
+ let _visitor = parsed.optional.2;
1547
1542
 
1548
- let (markdown, metadata) =
1549
- guard_panic(|| convert_with_metadata_inner(&html, Some(options), metadata_config)).map_err(conversion_error)?;
1543
+ let (markdown, metadata) = guard_panic(|| convert_with_metadata_inner(&html, Some(options), metadata_config, None))
1544
+ .map_err(conversion_error)?;
1550
1545
 
1551
1546
  let array = ruby.ary_new();
1552
1547
  array.push(markdown)?;
@@ -1563,8 +1558,8 @@ fn convert_with_metadata_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<Value,
1563
1558
  let options = handle.0.clone();
1564
1559
  let metadata_config = build_metadata_config(ruby, parsed.optional.0)?;
1565
1560
 
1566
- let (markdown, metadata) =
1567
- guard_panic(|| convert_with_metadata_inner(&html, Some(options), metadata_config)).map_err(conversion_error)?;
1561
+ let (markdown, metadata) = guard_panic(|| convert_with_metadata_inner(&html, Some(options), metadata_config, None))
1562
+ .map_err(conversion_error)?;
1568
1563
 
1569
1564
  let array = ruby.ary_new();
1570
1565
  array.push(markdown)?;
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HtmlToMarkdown
4
- VERSION = '2.20.0'
4
+ VERSION = '2.21.1'
5
5
  end
@@ -21,7 +21,7 @@ module HtmlToMarkdown
21
21
 
22
22
  module_function
23
23
 
24
- def convert(html, options = nil)
24
+ def convert(html, options = nil, _visitor = nil)
25
25
  native_convert(html.to_s, options)
26
26
  end
27
27
 
@@ -29,7 +29,7 @@ module HtmlToMarkdown
29
29
  native_convert_with_options(html.to_s, options_handle)
30
30
  end
31
31
 
32
- def convert_with_inline_images(html, options = nil, image_config = nil)
32
+ def convert_with_inline_images(html, options = nil, image_config = nil, _visitor = nil)
33
33
  native_convert_with_inline_images(html.to_s, options, image_config)
34
34
  end
35
35
 
@@ -163,7 +163,7 @@ module HtmlToMarkdown
163
163
  # @see #convert Simple conversion without metadata
164
164
  # @see #convert_with_inline_images Extract inline images during conversion
165
165
  # @see ConversionOptions Detailed conversion configuration
166
- def convert_with_metadata(html, options = nil, metadata_config = nil)
166
+ def convert_with_metadata(html, options = nil, metadata_config = nil, _visitor = nil)
167
167
  native_convert_with_metadata(html.to_s, options, metadata_config)
168
168
  end
169
169
 
@@ -78,7 +78,8 @@ module HtmlToMarkdown
78
78
  encoding?: String,
79
79
  debug?: bool,
80
80
  strip_tags?: Array[String],
81
- preserve_tags?: Array[String]
81
+ preserve_tags?: Array[String],
82
+ skip_images?: bool
82
83
  }
83
84
 
84
85
  type inline_image_config = {
@@ -249,8 +250,22 @@ module HtmlToMarkdown
249
250
 
250
251
  public
251
252
 
252
- # Convert HTML to Markdown with optional configuration
253
- def self.convert: (String html, ?conversion_options options) -> String
253
+ # Convert HTML to Markdown with optional configuration and visitor
254
+ #
255
+ # The optional visitor parameter allows customization of conversion behavior for specific elements.
256
+ # When both options and visitor are provided, the visitor can override default conversions.
257
+ #
258
+ # Args:
259
+ # html: HTML string to convert
260
+ # options: Optional conversion configuration
261
+ # visitor: Optional visitor object for customizing conversion
262
+ #
263
+ # Returns:
264
+ # markdown: String - Converted markdown output
265
+ #
266
+ # Example:
267
+ # markdown = HtmlToMarkdown.convert(html, { wrap: true }, my_visitor)
268
+ def self.convert: (String html, ?conversion_options options, ?visitor visitor) -> String
254
269
 
255
270
  # Create a reusable options handle for performance
256
271
  def self.options: (?conversion_options options_hash) -> Options
@@ -264,13 +279,36 @@ module HtmlToMarkdown
264
279
  ) -> html_extraction
265
280
 
266
281
  # Convert HTML with inline image extraction
282
+ #
283
+ # Optionally accepts a visitor for customizing conversion behavior.
284
+ #
285
+ # Args:
286
+ # html: HTML string to convert
287
+ # options: Optional conversion configuration
288
+ # image_config: Optional inline image extraction configuration
289
+ # visitor: Optional visitor object for customizing conversion
290
+ #
291
+ # Returns:
292
+ # html_extraction: Hash containing markdown, inline_images array, and warnings array
293
+ #
294
+ # Example:
295
+ # result = HtmlToMarkdown.convert_with_inline_images(html, { wrap: true }, image_config, my_visitor)
267
296
  def self.convert_with_inline_images: (
268
297
  String html,
269
298
  ?conversion_options options,
270
- ?inline_image_config image_config
299
+ ?inline_image_config image_config,
300
+ ?visitor visitor
271
301
  ) -> html_extraction
272
302
 
273
- # Convert HTML to Markdown with a custom visitor
303
+ # Convert HTML to Markdown with a custom visitor (deprecated)
304
+ #
305
+ # DEPRECATED: Use convert() with the optional visitor parameter instead.
306
+ # This method is maintained for backward compatibility.
307
+ #
308
+ # All convert functions now accept optional visitors:
309
+ # - convert(html, options, visitor)
310
+ # - convert_with_inline_images(html, options, image_config, visitor)
311
+ # - convert_with_metadata(html, options, metadata_config, visitor)
274
312
  #
275
313
  # The visitor object can implement any of the following methods:
276
314
  # - visit_element_start(ctx) -> visitor_result
@@ -342,11 +380,13 @@ module HtmlToMarkdown
342
380
  # Convert HTML to Markdown with metadata extraction
343
381
  #
344
382
  # Extracts comprehensive metadata (headers, links, images, structured data) during conversion.
383
+ # Optionally accepts a visitor for customizing conversion behavior.
345
384
  #
346
385
  # Args:
347
386
  # html: HTML string to convert
348
387
  # options: Optional conversion configuration
349
388
  # metadata_config: Optional metadata extraction configuration
389
+ # visitor: Optional visitor object for customizing conversion
350
390
  #
351
391
  # Returns:
352
392
  # Array containing:
@@ -365,10 +405,14 @@ module HtmlToMarkdown
365
405
  # markdown, metadata = HtmlToMarkdown.convert_with_metadata(html)
366
406
  # puts "Title: #{metadata['document']['title']}"
367
407
  # puts "Headers: #{metadata['headers'].length}"
408
+ #
409
+ # Example with visitor:
410
+ # markdown, metadata = HtmlToMarkdown.convert_with_metadata(html, options, metadata_config, my_visitor)
368
411
  def self.convert_with_metadata: (
369
412
  String html,
370
413
  ?conversion_options options,
371
- ?metadata_config metadata_config
414
+ ?metadata_config metadata_config,
415
+ ?visitor visitor
372
416
  ) -> [String, extended_metadata]
373
417
  def self.convert_with_metadata_handle: (
374
418
  String html,
@@ -377,7 +421,7 @@ module HtmlToMarkdown
377
421
  ) -> [String, extended_metadata]
378
422
 
379
423
  # Instance method versions (created by module_function)
380
- def convert: (String html, ?conversion_options options) -> String
424
+ def convert: (String html, ?conversion_options options, ?visitor visitor) -> String
381
425
  def options: (?conversion_options options_hash) -> Options
382
426
  def convert_with_options: (String html, Options options_handle) -> String
383
427
  def convert_with_inline_images_handle: (
@@ -388,13 +432,15 @@ module HtmlToMarkdown
388
432
  def convert_with_inline_images: (
389
433
  String html,
390
434
  ?conversion_options options,
391
- ?inline_image_config image_config
435
+ ?inline_image_config image_config,
436
+ ?visitor visitor
392
437
  ) -> html_extraction
393
438
  def convert_with_visitor: (String html, ?conversion_options options, visitor: visitor) -> String
394
439
  def convert_with_metadata: (
395
440
  String html,
396
441
  ?conversion_options options,
397
- ?metadata_config metadata_config
442
+ ?metadata_config metadata_config,
443
+ ?visitor visitor
398
444
  ) -> [String, extended_metadata]
399
445
  def convert_with_metadata_handle: (
400
446
  String html,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.20.0
4
+ version: 2.21.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-01-05 00:00:00.000000000 Z
11
+ date: 2026-01-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys