html-to-markdown 2.15.0 → 2.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b63f7961f8ff85d6b1af2095e9033365d35d53d7370b9b76da970ca5202c338e
4
- data.tar.gz: ce6d5595eaa8796e11ab377e17cb722119512273f1c24c38a10d441fe8073a82
3
+ metadata.gz: 1f7bea61e577b40f11d2ca37e357c5470c39df8354940d5227c5e86e671e7f78
4
+ data.tar.gz: fd466c3d0302478de58e9c1c3aef84977e6ec5a8911ed118fed9a24d28b8d9c8
5
5
  SHA512:
6
- metadata.gz: a07e1a18462aeeed9fb13a6fe62ed4ad04620bc34208f7a1a8510f4949dc4dd7465a94e7cee72483db5317f14a297aa2004663fe4ddab86582be9c44980608c9
7
- data.tar.gz: 77b1a8aa989c37e1d52ddefbe71c36dc1a12ac7f35923003cd266ac4689a964c90774d587d9fe9392f007f6df967d675e8039c7227bc741992e67c393824a584
6
+ metadata.gz: 52d4af88f51f4a37f16d2bd9ddb3865fb391afe881cf805321aa5434b06f790089452574df88c9500fe82c29e4cb52341c150b95fce3abec9a244ef2c11aae9f
7
+ data.tar.gz: 5b52a52f003097f6b45bb101adaf01aeeb5dc7b2af6f7174d69df0e6bdb3673b49379c657b97c129e3efbc55229215df86989c505ec3a7643ec22109ce49a738
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html-to-markdown (2.15.0)
4
+ html-to-markdown (2.16.1)
5
5
  rb_sys (>= 0.9, < 1.0)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -69,18 +69,18 @@ Apple M4 • Real Wikipedia documents • `HtmlToMarkdown.convert` (Ruby)
69
69
 
70
70
  ### Benchmark Fixtures (Apple M4)
71
71
 
72
- Measured via `task bench:bindings -- --language ruby` with the shared Wikipedia + hOCR suite:
72
+ Measured via `task bench:harness` with the shared Wikipedia + hOCR suite:
73
73
 
74
74
  | Document | Size | ops/sec (Ruby) |
75
75
  | ---------------------- | ------ | -------------- |
76
- | Lists (Timeline) | 129 KB | 1,349 |
77
- | Tables (Countries) | 360 KB | 326 |
78
- | Medium (Python) | 657 KB | 157 |
79
- | Large (Rust) | 567 KB | 174 |
80
- | Small (Intro) | 463 KB | 214 |
81
- | hOCR German PDF | 44 KB | 2,936 |
82
- | hOCR Invoice | 4 KB | 25,740 |
83
- | hOCR Embedded Tables | 37 KB | 3,328 |
76
+ | Lists (Timeline) | 129 KB | 3,156 |
77
+ | Tables (Countries) | 360 KB | 921 |
78
+ | Medium (Python) | 657 KB | 469 |
79
+ | Large (Rust) | 567 KB | 534 |
80
+ | Small (Intro) | 463 KB | 629 |
81
+ | hOCR German PDF | 44 KB | 7,250 |
82
+ | hOCR Invoice | 4 KB | 83,883 |
83
+ | hOCR Embedded Tables | 37 KB | 7,890 |
84
84
 
85
85
  > These numbers line up with the Python/Node bindings because everything flows through the same Rust engine.
86
86
 
data/bin/benchmark.rb CHANGED
@@ -22,7 +22,8 @@ end
22
22
 
23
23
  options = {
24
24
  iterations: 50,
25
- format: 'html'
25
+ format: 'html',
26
+ scenario: 'convert-default'
26
27
  }
27
28
 
28
29
  OptionParser.new do |parser|
@@ -36,6 +37,10 @@ OptionParser.new do |parser|
36
37
  options[:iterations] = n.positive? ? n : 1
37
38
  end
38
39
 
40
+ parser.on('--scenario SCENARIO', 'Scenario to benchmark') do |scenario|
41
+ options[:scenario] = scenario
42
+ end
43
+
39
44
  parser.on('--format FORMAT', 'Fixture format (html or hocr)') do |format|
40
45
  options[:format] = format.downcase
41
46
  end
@@ -56,24 +61,68 @@ unless %w[html hocr].include?(options[:format])
56
61
  exit 1
57
62
  end
58
63
 
64
+ supported_scenarios = %w[
65
+ convert-default
66
+ convert-options
67
+ inline-images-default
68
+ inline-images-options
69
+ metadata-default
70
+ metadata-options
71
+ ]
72
+ unless supported_scenarios.include?(options[:scenario])
73
+ warn "Unsupported scenario: #{options[:scenario]}"
74
+ exit 1
75
+ end
76
+
59
77
  html = File.binread(fixture)
60
78
  html.force_encoding(Encoding::UTF_8)
61
79
  html.freeze
62
80
  iterations = options[:iterations]
63
- options_handle = HtmlToMarkdown.options(
64
- options[:format] == 'hocr' ? { hocr_spatial_tables: false } : nil
65
- )
81
+ conversion_options = options[:format] == 'hocr' ? { hocr_spatial_tables: false } : {}
82
+ options_handle = if %w[convert-options inline-images-options metadata-options].include?(options[:scenario])
83
+ HtmlToMarkdown.options(conversion_options)
84
+ end
85
+
86
+ SCENARIO_RUNNERS = {
87
+ 'convert-default' => ->(html, _options, _handle) { HtmlToMarkdown.convert(html) },
88
+ 'convert-options' => lambda do |html, _options, handle|
89
+ raise ArgumentError, 'options handle required' unless handle
90
+
91
+ HtmlToMarkdown.convert_with_options(html, handle)
92
+ end,
93
+ 'inline-images-default' => ->(html, _options, _handle) { HtmlToMarkdown.convert_with_inline_images(html, nil, nil) },
94
+ 'inline-images-options' => lambda do |html, _options, handle|
95
+ raise ArgumentError, 'options handle required' unless handle
96
+
97
+ HtmlToMarkdown.convert_with_inline_images_handle(html, handle, nil)
98
+ end,
99
+ 'metadata-default' => ->(html, _options, _handle) { HtmlToMarkdown.convert_with_metadata(html, nil, nil) },
100
+ 'metadata-options' => lambda do |html, _options, handle|
101
+ raise ArgumentError, 'options handle required' unless handle
102
+
103
+ HtmlToMarkdown.convert_with_metadata_handle(html, handle, nil)
104
+ end
105
+ }.freeze
66
106
 
67
- def convert_document(html, options_handle)
68
- HtmlToMarkdown.convert_with_options(html, options_handle)
107
+ def run_scenario(html, scenario, options, handle)
108
+ runner = SCENARIO_RUNNERS.fetch(scenario) { raise ArgumentError, "Unsupported scenario: #{scenario}" }
109
+ runner.call(html, options, handle)
69
110
  end
70
111
 
71
- convert_document(html, options_handle)
112
+ run_scenario(html, options[:scenario], conversion_options, options_handle)
113
+
114
+ profile_output = ENV.fetch('HTML_TO_MARKDOWN_PROFILE_OUTPUT', nil)
115
+ if profile_output && HtmlToMarkdown.respond_to?(:start_profiling)
116
+ freq = Integer(ENV.fetch('HTML_TO_MARKDOWN_PROFILE_FREQUENCY', '1000'), 10)
117
+ HtmlToMarkdown.start_profiling(profile_output, freq)
118
+ end
72
119
 
73
120
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
74
- iterations.times { convert_document(html, options_handle) }
121
+ iterations.times { run_scenario(html, options[:scenario], conversion_options, options_handle) }
75
122
  elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
76
123
 
124
+ HtmlToMarkdown.stop_profiling if profile_output && HtmlToMarkdown.respond_to?(:stop_profiling)
125
+
77
126
  payload_size_bytes = html.bytesize
78
127
  bytes_processed = payload_size_bytes * iterations
79
128
  ops_per_sec = iterations / elapsed
@@ -83,6 +132,7 @@ payload = %({
83
132
  "language":"ruby",
84
133
  "fixture":"#{json_escape(File.basename(fixture))}",
85
134
  "fixture_path":"#{json_escape(fixture)}",
135
+ "scenario":"#{json_escape(options[:scenario])}",
86
136
  "iterations":#{iterations},
87
137
  "elapsed_seconds":#{format('%.8f', elapsed)},
88
138
  "ops_per_sec":#{format('%.4f', ops_per_sec)},
@@ -25,6 +25,9 @@ default_profile = ENV.fetch('CARGO_PROFILE', 'release')
25
25
 
26
26
  create_rust_makefile('html_to_markdown_rb') do |config|
27
27
  config.profile = default_profile.to_sym
28
+ features_env = ENV.fetch('HTML_TO_MARKDOWN_CARGO_FEATURES', '')
29
+ features = features_env.split(',').map(&:strip).reject(&:empty?)
30
+ config.features = features unless features.empty?
28
31
 
29
32
  native_dir = File.expand_path('native', __dir__)
30
33
  relative_native =
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rb"
3
- version = "2.15.0"
3
+ version = "2.16.1"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
6
6
  license = "MIT"
@@ -17,13 +17,16 @@ categories = ["api-bindings"]
17
17
  name = "html_to_markdown_rb"
18
18
  crate-type = ["cdylib", "rlib"]
19
19
 
20
- [features]
21
- default = ["metadata"]
22
- metadata = ["html-to-markdown-rs/metadata"]
23
-
24
20
  [dependencies]
25
- html-to-markdown-rs = { version = "2.15.0", features = ["inline-images"] }
21
+ html-to-markdown-rs = { version = "2.16.1", features = ["inline-images"] }
26
22
  magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = ["rb-sys"] }
27
23
 
24
+ [target.'cfg(not(target_os = "windows"))'.dependencies]
25
+ pprof = { version = "0.15", features = ["flamegraph"], optional = true }
26
+
28
27
  [dev-dependencies]
29
28
  pretty_assertions = "1.4"
29
+ [features]
30
+ default = ["metadata"]
31
+ metadata = ["html-to-markdown-rs/metadata"]
32
+ profiling = ["dep:pprof"]
@@ -62,6 +62,21 @@ Apple M4 • Real Wikipedia documents • `HtmlToMarkdown.convert` (Ruby)
62
62
 
63
63
  > Same core, same benchmarks: the Ruby extension stays within single-digit % of the Rust CLI and mirrors the Python/Node numbers.
64
64
 
65
+ ### Benchmark Fixtures (Apple M4)
66
+
67
+ Measured via `task bench:harness` with the shared Wikipedia + hOCR suite:
68
+
69
+ | Document | Size | ops/sec (Ruby) |
70
+ | ---------------------- | ------ | -------------- |
71
+ | Lists (Timeline) | 129 KB | 3,156 |
72
+ | Tables (Countries) | 360 KB | 921 |
73
+ | Medium (Python) | 657 KB | 469 |
74
+ | Large (Rust) | 567 KB | 534 |
75
+ | Small (Intro) | 463 KB | 629 |
76
+ | hOCR German PDF | 44 KB | 7,250 |
77
+ | hOCR Invoice | 4 KB | 83,883 |
78
+ | hOCR Embedded Tables | 37 KB | 7,890 |
79
+
65
80
  ## Quick Start
66
81
 
67
82
  ```ruby
@@ -1,30 +1,29 @@
1
1
  use html_to_markdown_rs::{
2
- CodeBlockStyle, ConversionOptions, HeadingStyle, HighlightStyle, HtmlExtraction, InlineImage, InlineImageConfig,
3
- InlineImageFormat, InlineImageSource, InlineImageWarning, ListIndentType, NewlineStyle, PreprocessingOptions,
4
- PreprocessingPreset, WhitespaceMode, convert as convert_inner,
5
- convert_with_inline_images as convert_with_inline_images_inner, error::ConversionError, safety::guard_panic,
2
+ CodeBlockStyle, ConversionOptions, ConversionOptionsUpdate, DEFAULT_INLINE_IMAGE_LIMIT, HeadingStyle,
3
+ HighlightStyle, HtmlExtraction, InlineImage, InlineImageConfig, InlineImageConfigUpdate, InlineImageWarning,
4
+ ListIndentType, NewlineStyle, PreprocessingOptionsUpdate, PreprocessingPreset, WhitespaceMode,
5
+ convert as convert_inner, convert_with_inline_images as convert_with_inline_images_inner, error::ConversionError,
6
+ safety::guard_panic,
6
7
  };
7
8
 
8
9
  #[cfg(feature = "metadata")]
9
10
  use html_to_markdown_rs::convert_with_metadata as convert_with_metadata_inner;
11
+ mod profiling;
10
12
  #[cfg(feature = "metadata")]
11
13
  use html_to_markdown_rs::metadata::{
12
14
  DocumentMetadata as RustDocumentMetadata, ExtendedMetadata as RustExtendedMetadata,
13
- HeaderMetadata as RustHeaderMetadata, ImageMetadata as RustImageMetadata, ImageType as RustImageType,
14
- LinkMetadata as RustLinkMetadata, LinkType as RustLinkType, MetadataConfig as RustMetadataConfig,
15
- StructuredData as RustStructuredData, StructuredDataType as RustStructuredDataType,
16
- TextDirection as RustTextDirection,
15
+ HeaderMetadata as RustHeaderMetadata, ImageMetadata as RustImageMetadata, LinkMetadata as RustLinkMetadata,
16
+ MetadataConfig as RustMetadataConfig, StructuredData as RustStructuredData, TextDirection as RustTextDirection,
17
17
  };
18
18
  use magnus::prelude::*;
19
19
  use magnus::r_hash::ForEach;
20
20
  use magnus::{Error, RArray, RHash, Ruby, Symbol, TryConvert, Value, function, scan_args::scan_args};
21
+ use std::path::PathBuf;
21
22
 
22
23
  #[derive(Clone)]
23
24
  #[magnus::wrap(class = "HtmlToMarkdown::Options", free_immediately)]
24
25
  struct OptionsHandle(ConversionOptions);
25
26
 
26
- const DEFAULT_INLINE_IMAGE_LIMIT: u64 = 5 * 1024 * 1024;
27
-
28
27
  fn conversion_error(err: ConversionError) -> Error {
29
28
  match err {
30
29
  ConversionError::ConfigError(msg) => arg_error(msg),
@@ -120,43 +119,43 @@ fn parse_vec_of_strings(value: Value) -> Result<Vec<String>, Error> {
120
119
  array.to_vec::<String>()
121
120
  }
122
121
 
123
- fn parse_preprocessing_options(_ruby: &Ruby, value: Value) -> Result<PreprocessingOptions, Error> {
122
+ fn parse_preprocessing_options(_ruby: &Ruby, value: Value) -> Result<PreprocessingOptionsUpdate, Error> {
124
123
  let hash = RHash::from_value(value).ok_or_else(|| arg_error("expected preprocessing to be a Hash"))?;
125
124
 
126
- let mut opts = PreprocessingOptions::default();
125
+ let mut update = PreprocessingOptionsUpdate::default();
127
126
 
128
127
  hash.foreach(|key: Value, val: Value| {
129
128
  let key_name = symbol_to_string(key)?;
130
129
  match key_name.as_str() {
131
130
  "enabled" => {
132
- opts.enabled = bool::try_convert(val)?;
131
+ update.enabled = Some(bool::try_convert(val)?);
133
132
  }
134
133
  "preset" => {
135
- opts.preset = parse_preset(val)?;
134
+ update.preset = Some(parse_preset(val)?);
136
135
  }
137
136
  "remove_navigation" => {
138
- opts.remove_navigation = bool::try_convert(val)?;
137
+ update.remove_navigation = Some(bool::try_convert(val)?);
139
138
  }
140
139
  "remove_forms" => {
141
- opts.remove_forms = bool::try_convert(val)?;
140
+ update.remove_forms = Some(bool::try_convert(val)?);
142
141
  }
143
142
  _ => {}
144
143
  }
145
144
  Ok(ForEach::Continue)
146
145
  })?;
147
146
 
148
- Ok(opts)
147
+ Ok(update)
149
148
  }
150
149
 
151
150
  fn build_conversion_options(ruby: &Ruby, options: Option<Value>) -> Result<ConversionOptions, Error> {
152
- let mut opts = ConversionOptions::default();
151
+ let mut update = ConversionOptionsUpdate::default();
153
152
 
154
153
  let Some(options) = options else {
155
- return Ok(opts);
154
+ return Ok(ConversionOptions::default());
156
155
  };
157
156
 
158
157
  if options.is_nil() {
159
- return Ok(opts);
158
+ return Ok(ConversionOptions::default());
160
159
  }
161
160
 
162
161
  let hash = RHash::from_value(options).ok_or_else(|| arg_error("options must be provided as a Hash"))?;
@@ -165,16 +164,16 @@ fn build_conversion_options(ruby: &Ruby, options: Option<Value>) -> Result<Conve
165
164
  let key_name = symbol_to_string(key)?;
166
165
  match key_name.as_str() {
167
166
  "heading_style" => {
168
- opts.heading_style = parse_heading_style(val)?;
167
+ update.heading_style = Some(parse_heading_style(val)?);
169
168
  }
170
169
  "list_indent_type" => {
171
- opts.list_indent_type = parse_list_indent_type(val)?;
170
+ update.list_indent_type = Some(parse_list_indent_type(val)?);
172
171
  }
173
172
  "list_indent_width" => {
174
- opts.list_indent_width = usize::try_convert(val)?;
173
+ update.list_indent_width = Some(usize::try_convert(val)?);
175
174
  }
176
175
  "bullets" => {
177
- opts.bullets = String::try_convert(val)?;
176
+ update.bullets = Some(String::try_convert(val)?);
178
177
  }
179
178
  "strong_em_symbol" => {
180
179
  let value = String::try_convert(val)?;
@@ -185,103 +184,103 @@ fn build_conversion_options(ruby: &Ruby, options: Option<Value>) -> Result<Conve
185
184
  if chars.next().is_some() {
186
185
  return Err(arg_error("strong_em_symbol must be a single character"));
187
186
  }
188
- opts.strong_em_symbol = ch;
187
+ update.strong_em_symbol = Some(ch);
189
188
  }
190
189
  "escape_asterisks" => {
191
- opts.escape_asterisks = bool::try_convert(val)?;
190
+ update.escape_asterisks = Some(bool::try_convert(val)?);
192
191
  }
193
192
  "escape_underscores" => {
194
- opts.escape_underscores = bool::try_convert(val)?;
193
+ update.escape_underscores = Some(bool::try_convert(val)?);
195
194
  }
196
195
  "escape_misc" => {
197
- opts.escape_misc = bool::try_convert(val)?;
196
+ update.escape_misc = Some(bool::try_convert(val)?);
198
197
  }
199
198
  "escape_ascii" => {
200
- opts.escape_ascii = bool::try_convert(val)?;
199
+ update.escape_ascii = Some(bool::try_convert(val)?);
201
200
  }
202
201
  "code_language" => {
203
- opts.code_language = String::try_convert(val)?;
202
+ update.code_language = Some(String::try_convert(val)?);
204
203
  }
205
204
  "autolinks" => {
206
- opts.autolinks = bool::try_convert(val)?;
205
+ update.autolinks = Some(bool::try_convert(val)?);
207
206
  }
208
207
  "default_title" => {
209
- opts.default_title = bool::try_convert(val)?;
208
+ update.default_title = Some(bool::try_convert(val)?);
210
209
  }
211
210
  "br_in_tables" => {
212
- opts.br_in_tables = bool::try_convert(val)?;
211
+ update.br_in_tables = Some(bool::try_convert(val)?);
213
212
  }
214
213
  "hocr_spatial_tables" => {
215
- opts.hocr_spatial_tables = bool::try_convert(val)?;
214
+ update.hocr_spatial_tables = Some(bool::try_convert(val)?);
216
215
  }
217
216
  "highlight_style" => {
218
- opts.highlight_style = parse_highlight_style(val)?;
217
+ update.highlight_style = Some(parse_highlight_style(val)?);
219
218
  }
220
219
  "extract_metadata" => {
221
- opts.extract_metadata = bool::try_convert(val)?;
220
+ update.extract_metadata = Some(bool::try_convert(val)?);
222
221
  }
223
222
  "whitespace_mode" => {
224
- opts.whitespace_mode = parse_whitespace_mode(val)?;
223
+ update.whitespace_mode = Some(parse_whitespace_mode(val)?);
225
224
  }
226
225
  "strip_newlines" => {
227
- opts.strip_newlines = bool::try_convert(val)?;
226
+ update.strip_newlines = Some(bool::try_convert(val)?);
228
227
  }
229
228
  "wrap" => {
230
- opts.wrap = bool::try_convert(val)?;
229
+ update.wrap = Some(bool::try_convert(val)?);
231
230
  }
232
231
  "wrap_width" => {
233
- opts.wrap_width = usize::try_convert(val)?;
232
+ update.wrap_width = Some(usize::try_convert(val)?);
234
233
  }
235
234
  "convert_as_inline" => {
236
- opts.convert_as_inline = bool::try_convert(val)?;
235
+ update.convert_as_inline = Some(bool::try_convert(val)?);
237
236
  }
238
237
  "sub_symbol" => {
239
- opts.sub_symbol = String::try_convert(val)?;
238
+ update.sub_symbol = Some(String::try_convert(val)?);
240
239
  }
241
240
  "sup_symbol" => {
242
- opts.sup_symbol = String::try_convert(val)?;
241
+ update.sup_symbol = Some(String::try_convert(val)?);
243
242
  }
244
243
  "newline_style" => {
245
- opts.newline_style = parse_newline_style(val)?;
244
+ update.newline_style = Some(parse_newline_style(val)?);
246
245
  }
247
246
  "code_block_style" => {
248
- opts.code_block_style = parse_code_block_style(val)?;
247
+ update.code_block_style = Some(parse_code_block_style(val)?);
249
248
  }
250
249
  "keep_inline_images_in" => {
251
- opts.keep_inline_images_in = parse_vec_of_strings(val)?;
250
+ update.keep_inline_images_in = Some(parse_vec_of_strings(val)?);
252
251
  }
253
252
  "preprocessing" => {
254
- opts.preprocessing = parse_preprocessing_options(ruby, val)?;
253
+ update.preprocessing = Some(parse_preprocessing_options(ruby, val)?);
255
254
  }
256
255
  "encoding" => {
257
- opts.encoding = String::try_convert(val)?;
256
+ update.encoding = Some(String::try_convert(val)?);
258
257
  }
259
258
  "debug" => {
260
- opts.debug = bool::try_convert(val)?;
259
+ update.debug = Some(bool::try_convert(val)?);
261
260
  }
262
261
  "strip_tags" => {
263
- opts.strip_tags = parse_vec_of_strings(val)?;
262
+ update.strip_tags = Some(parse_vec_of_strings(val)?);
264
263
  }
265
264
  "preserve_tags" => {
266
- opts.preserve_tags = parse_vec_of_strings(val)?;
265
+ update.preserve_tags = Some(parse_vec_of_strings(val)?);
267
266
  }
268
267
  _ => {}
269
268
  }
270
269
  Ok(ForEach::Continue)
271
270
  })?;
272
271
 
273
- Ok(opts)
272
+ Ok(ConversionOptions::from(update))
274
273
  }
275
274
 
276
275
  fn build_inline_image_config(_ruby: &Ruby, config: Option<Value>) -> Result<InlineImageConfig, Error> {
277
- let mut cfg = InlineImageConfig::new(DEFAULT_INLINE_IMAGE_LIMIT);
276
+ let mut update = InlineImageConfigUpdate::default();
278
277
 
279
278
  let Some(config) = config else {
280
- return Ok(cfg);
279
+ return Ok(InlineImageConfig::new(DEFAULT_INLINE_IMAGE_LIMIT));
281
280
  };
282
281
 
283
282
  if config.is_nil() {
284
- return Ok(cfg);
283
+ return Ok(InlineImageConfig::new(DEFAULT_INLINE_IMAGE_LIMIT));
285
284
  }
286
285
 
287
286
  let hash = RHash::from_value(config).ok_or_else(|| arg_error("inline image config must be provided as a Hash"))?;
@@ -290,27 +289,27 @@ fn build_inline_image_config(_ruby: &Ruby, config: Option<Value>) -> Result<Inli
290
289
  let key_name = symbol_to_string(key)?;
291
290
  match key_name.as_str() {
292
291
  "max_decoded_size_bytes" => {
293
- cfg.max_decoded_size_bytes = u64::try_convert(val)?;
292
+ update.max_decoded_size_bytes = Some(u64::try_convert(val)?);
294
293
  }
295
294
  "filename_prefix" => {
296
- cfg.filename_prefix = if val.is_nil() {
295
+ update.filename_prefix = if val.is_nil() {
297
296
  None
298
297
  } else {
299
298
  Some(String::try_convert(val)?)
300
299
  };
301
300
  }
302
301
  "capture_svg" => {
303
- cfg.capture_svg = bool::try_convert(val)?;
302
+ update.capture_svg = Some(bool::try_convert(val)?);
304
303
  }
305
304
  "infer_dimensions" => {
306
- cfg.infer_dimensions = bool::try_convert(val)?;
305
+ update.infer_dimensions = Some(bool::try_convert(val)?);
307
306
  }
308
307
  _ => {}
309
308
  }
310
309
  Ok(ForEach::Continue)
311
310
  })?;
312
311
 
313
- Ok(cfg)
312
+ Ok(InlineImageConfig::from_update(update))
314
313
  }
315
314
 
316
315
  fn inline_image_to_value(ruby: &Ruby, image: InlineImage) -> Result<Value, Error> {
@@ -328,15 +327,7 @@ fn inline_image_to_value(ruby: &Ruby, image: InlineImage) -> Result<Value, Error
328
327
  let data_value = ruby.str_from_slice(&data);
329
328
  hash.aset(ruby.intern("data"), data_value)?;
330
329
 
331
- let format_value = match format {
332
- InlineImageFormat::Png => "png".to_string(),
333
- InlineImageFormat::Jpeg => "jpeg".to_string(),
334
- InlineImageFormat::Gif => "gif".to_string(),
335
- InlineImageFormat::Bmp => "bmp".to_string(),
336
- InlineImageFormat::Webp => "webp".to_string(),
337
- InlineImageFormat::Svg => "svg".to_string(),
338
- InlineImageFormat::Other(other) => other,
339
- };
330
+ let format_value = format.to_string();
340
331
  hash.aset(ruby.intern("format"), format_value)?;
341
332
 
342
333
  match filename {
@@ -358,10 +349,7 @@ fn inline_image_to_value(ruby: &Ruby, image: InlineImage) -> Result<Value, Error
358
349
  hash.aset(ruby.intern("dimensions"), ruby.qnil())?;
359
350
  }
360
351
 
361
- let source_value = match source {
362
- InlineImageSource::ImgDataUri => "img_data_uri",
363
- InlineImageSource::SvgElement => "svg_element",
364
- };
352
+ let source_value = source.to_string();
365
353
  hash.aset(ruby.intern("source"), source_value)?;
366
354
 
367
355
  let attrs = ruby.hash_new();
@@ -404,7 +392,7 @@ fn convert_fn(ruby: &Ruby, args: &[Value]) -> Result<String, Error> {
404
392
  let html = parsed.required.0;
405
393
  let options = build_conversion_options(ruby, parsed.optional.0)?;
406
394
 
407
- guard_panic(|| convert_inner(&html, Some(options))).map_err(conversion_error)
395
+ guard_panic(|| profiling::maybe_profile(|| convert_inner(&html, Some(options)))).map_err(conversion_error)
408
396
  }
409
397
 
410
398
  fn options_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<OptionsHandle, Error> {
@@ -419,7 +407,7 @@ fn convert_with_options_handle_fn(_ruby: &Ruby, args: &[Value]) -> Result<String
419
407
  let handle = parsed.required.1;
420
408
  let options = handle.0.clone();
421
409
 
422
- guard_panic(|| convert_inner(&html, Some(options))).map_err(conversion_error)
410
+ guard_panic(|| profiling::maybe_profile(|| convert_inner(&html, Some(options)))).map_err(conversion_error)
423
411
  }
424
412
 
425
413
  fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
@@ -434,6 +422,19 @@ fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, E
434
422
  extraction_to_value(ruby, extraction)
435
423
  }
436
424
 
425
+ fn convert_with_inline_images_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
426
+ let parsed = scan_args::<(String, &OptionsHandle), (Option<Value>,), (), (), (), ()>(args)?;
427
+ let html = parsed.required.0;
428
+ let handle = parsed.required.1;
429
+ let options = handle.0.clone();
430
+ let config = build_inline_image_config(ruby, parsed.optional.0)?;
431
+
432
+ let extraction =
433
+ guard_panic(|| convert_with_inline_images_inner(&html, Some(options), config)).map_err(conversion_error)?;
434
+
435
+ extraction_to_value(ruby, extraction)
436
+ }
437
+
437
438
  #[cfg(feature = "metadata")]
438
439
  fn build_metadata_config(_ruby: &Ruby, config: Option<Value>) -> Result<RustMetadataConfig, Error> {
439
440
  let mut cfg = RustMetadataConfig::default();
@@ -451,6 +452,9 @@ fn build_metadata_config(_ruby: &Ruby, config: Option<Value>) -> Result<RustMeta
451
452
  hash.foreach(|key: Value, val: Value| {
452
453
  let key_name = symbol_to_string(key)?;
453
454
  match key_name.as_str() {
455
+ "extract_document" => {
456
+ cfg.extract_document = bool::try_convert(val)?;
457
+ }
454
458
  "extract_headers" => {
455
459
  cfg.extract_headers = bool::try_convert(val)?;
456
460
  }
@@ -492,44 +496,8 @@ fn btreemap_to_ruby_hash(ruby: &Ruby, map: std::collections::BTreeMap<String, St
492
496
  }
493
497
 
494
498
  #[cfg(feature = "metadata")]
495
- fn text_direction_to_string(text_direction: Option<RustTextDirection>) -> Option<&'static str> {
496
- match text_direction {
497
- Some(RustTextDirection::LeftToRight) => Some("ltr"),
498
- Some(RustTextDirection::RightToLeft) => Some("rtl"),
499
- Some(RustTextDirection::Auto) => Some("auto"),
500
- None => None,
501
- }
502
- }
503
-
504
- #[cfg(feature = "metadata")]
505
- fn link_type_to_string(link_type: &RustLinkType) -> &'static str {
506
- match link_type {
507
- RustLinkType::Anchor => "anchor",
508
- RustLinkType::Internal => "internal",
509
- RustLinkType::External => "external",
510
- RustLinkType::Email => "email",
511
- RustLinkType::Phone => "phone",
512
- RustLinkType::Other => "other",
513
- }
514
- }
515
-
516
- #[cfg(feature = "metadata")]
517
- fn image_type_to_string(image_type: &RustImageType) -> &'static str {
518
- match image_type {
519
- RustImageType::DataUri => "data_uri",
520
- RustImageType::InlineSvg => "inline_svg",
521
- RustImageType::External => "external",
522
- RustImageType::Relative => "relative",
523
- }
524
- }
525
-
526
- #[cfg(feature = "metadata")]
527
- fn structured_data_type_to_string(data_type: &RustStructuredDataType) -> &'static str {
528
- match data_type {
529
- RustStructuredDataType::JsonLd => "json_ld",
530
- RustStructuredDataType::Microdata => "microdata",
531
- RustStructuredDataType::RDFa => "rdfa",
532
- }
499
+ fn text_direction_to_string(text_direction: Option<RustTextDirection>) -> Option<String> {
500
+ text_direction.map(|direction| direction.to_string())
533
501
  }
534
502
 
535
503
  #[cfg(feature = "metadata")]
@@ -591,7 +559,7 @@ fn links_to_ruby(ruby: &Ruby, links: Vec<RustLinkMetadata>) -> Result<Value, Err
591
559
  hash.aset(ruby.intern("href"), link.href)?;
592
560
  hash.aset(ruby.intern("text"), link.text)?;
593
561
  hash.aset(ruby.intern("title"), opt_string_to_ruby(ruby, link.title)?)?;
594
- hash.aset(ruby.intern("link_type"), link_type_to_string(&link.link_type))?;
562
+ hash.aset(ruby.intern("link_type"), link.link_type.to_string())?;
595
563
 
596
564
  let rel_array = ruby.ary_new();
597
565
  for r in link.rel {
@@ -626,7 +594,7 @@ fn images_to_ruby(ruby: &Ruby, images: Vec<RustImageMetadata>) -> Result<Value,
626
594
  }
627
595
  }
628
596
 
629
- hash.aset(ruby.intern("image_type"), image_type_to_string(&image.image_type))?;
597
+ hash.aset(ruby.intern("image_type"), image.image_type.to_string())?;
630
598
  hash.aset(
631
599
  ruby.intern("attributes"),
632
600
  btreemap_to_ruby_hash(ruby, image.attributes)?,
@@ -641,10 +609,7 @@ fn structured_data_to_ruby(ruby: &Ruby, data: Vec<RustStructuredData>) -> Result
641
609
  let array = ruby.ary_new();
642
610
  for item in data {
643
611
  let hash = ruby.hash_new();
644
- hash.aset(
645
- ruby.intern("data_type"),
646
- structured_data_type_to_string(&item.data_type),
647
- )?;
612
+ hash.aset(ruby.intern("data_type"), item.data_type.to_string())?;
648
613
  hash.aset(ruby.intern("raw_json"), item.raw_json)?;
649
614
  hash.aset(ruby.intern("schema_type"), opt_string_to_ruby(ruby, item.schema_type)?)?;
650
615
  array.push(hash)?;
@@ -688,6 +653,41 @@ fn convert_with_metadata_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error>
688
653
  Ok(array.as_value())
689
654
  }
690
655
 
656
+ #[cfg(feature = "metadata")]
657
+ fn convert_with_metadata_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
658
+ let parsed = scan_args::<(String, &OptionsHandle), (Option<Value>,), (), (), (), ()>(args)?;
659
+ let html = parsed.required.0;
660
+ let handle = parsed.required.1;
661
+ let options = handle.0.clone();
662
+ let metadata_config = build_metadata_config(ruby, parsed.optional.0)?;
663
+
664
+ let (markdown, metadata) =
665
+ guard_panic(|| convert_with_metadata_inner(&html, Some(options), metadata_config)).map_err(conversion_error)?;
666
+
667
+ let array = ruby.ary_new();
668
+ array.push(markdown)?;
669
+ array.push(extended_metadata_to_ruby(ruby, metadata)?)?;
670
+
671
+ Ok(array.as_value())
672
+ }
673
+
674
+ fn start_profiling_fn(_ruby: &Ruby, args: &[Value]) -> Result<bool, Error> {
675
+ let output = args.first().ok_or_else(|| arg_error("output_path required"))?;
676
+ let output: String = String::try_convert(*output)?;
677
+ let freq = if let Some(value) = args.get(1) {
678
+ i32::try_convert(*value)?
679
+ } else {
680
+ 1000
681
+ };
682
+ profiling::start(PathBuf::from(output), freq).map_err(conversion_error)?;
683
+ Ok(true)
684
+ }
685
+
686
+ fn stop_profiling_fn(_ruby: &Ruby, _args: &[Value]) -> Result<bool, Error> {
687
+ profiling::stop().map_err(conversion_error)?;
688
+ Ok(true)
689
+ }
690
+
691
691
  #[magnus::init]
692
692
  fn init(ruby: &Ruby) -> Result<(), Error> {
693
693
  let module = ruby.define_module("HtmlToMarkdown")?;
@@ -698,9 +698,20 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
698
698
  "convert_with_inline_images",
699
699
  function!(convert_with_inline_images_fn, -1),
700
700
  )?;
701
+ module.define_singleton_method(
702
+ "convert_with_inline_images_handle",
703
+ function!(convert_with_inline_images_handle_fn, -1),
704
+ )?;
701
705
 
702
706
  #[cfg(feature = "metadata")]
703
707
  module.define_singleton_method("convert_with_metadata", function!(convert_with_metadata_fn, -1))?;
708
+ #[cfg(feature = "metadata")]
709
+ module.define_singleton_method(
710
+ "convert_with_metadata_handle",
711
+ function!(convert_with_metadata_handle_fn, -1),
712
+ )?;
713
+ module.define_singleton_method("start_profiling", function!(start_profiling_fn, -1))?;
714
+ module.define_singleton_method("stop_profiling", function!(stop_profiling_fn, -1))?;
704
715
 
705
716
  Ok(())
706
717
  }
@@ -0,0 +1,211 @@
1
+ use html_to_markdown_rs::{ConversionError, Result};
2
+ use std::path::PathBuf;
3
+
4
+ #[cfg(all(not(target_os = "windows"), feature = "profiling"))]
5
+ mod enabled {
6
+ use super::{ConversionError, PathBuf, Result};
7
+ use std::sync::atomic::{AtomicBool, Ordering};
8
+ use std::sync::{Mutex, OnceLock};
9
+
10
+ const ENV_OUTPUT: &str = "HTML_TO_MARKDOWN_PROFILE_OUTPUT";
11
+ const ENV_FREQUENCY: &str = "HTML_TO_MARKDOWN_PROFILE_FREQUENCY";
12
+ const ENV_ONCE: &str = "HTML_TO_MARKDOWN_PROFILE_ONCE";
13
+
14
+ static PROFILED_ONCE: AtomicBool = AtomicBool::new(false);
15
+ static PROFILE_ACTIVE: AtomicBool = AtomicBool::new(false);
16
+
17
+ struct EnvProfileConfig {
18
+ output: Option<PathBuf>,
19
+ profile_once: bool,
20
+ frequency: i32,
21
+ }
22
+
23
+ fn env_profile_config() -> &'static EnvProfileConfig {
24
+ static ENV_CONFIG: OnceLock<EnvProfileConfig> = OnceLock::new();
25
+ ENV_CONFIG.get_or_init(|| {
26
+ let output = match std::env::var(ENV_OUTPUT) {
27
+ Ok(value) if !value.trim().is_empty() => Some(PathBuf::from(value)),
28
+ _ => None,
29
+ };
30
+
31
+ let profile_once = match std::env::var(ENV_ONCE) {
32
+ Ok(value) => !matches!(value.as_str(), "0" | "false" | "no"),
33
+ Err(_) => true,
34
+ };
35
+
36
+ let frequency = std::env::var(ENV_FREQUENCY)
37
+ .ok()
38
+ .and_then(|value| value.parse::<i32>().ok())
39
+ .unwrap_or(1000);
40
+
41
+ EnvProfileConfig {
42
+ output,
43
+ profile_once,
44
+ frequency,
45
+ }
46
+ })
47
+ }
48
+
49
+ struct ProfileState {
50
+ guard: Option<pprof::ProfilerGuard<'static>>,
51
+ output: Option<PathBuf>,
52
+ }
53
+
54
+ fn state() -> &'static Mutex<ProfileState> {
55
+ static STATE: OnceLock<Mutex<ProfileState>> = OnceLock::new();
56
+ STATE.get_or_init(|| {
57
+ Mutex::new(ProfileState {
58
+ guard: None,
59
+ output: None,
60
+ })
61
+ })
62
+ }
63
+
64
+ pub fn start(output_path: PathBuf, frequency: i32) -> Result<()> {
65
+ let mut state = state()
66
+ .lock()
67
+ .map_err(|_| ConversionError::Other("profiling state lock poisoned".to_string()))?;
68
+
69
+ if state.guard.is_some() {
70
+ return Err(ConversionError::Other("profiling already active".to_string()));
71
+ }
72
+
73
+ let guard = pprof::ProfilerGuardBuilder::default()
74
+ .frequency(frequency)
75
+ .blocklist(&["libc", "libpthread", "libgcc", "libm"])
76
+ .build()
77
+ .map_err(|err| ConversionError::Other(format!("Profiling init failed: {err}")))?;
78
+
79
+ state.guard = Some(guard);
80
+ state.output = Some(output_path);
81
+ PROFILE_ACTIVE.store(true, Ordering::Release);
82
+ Ok(())
83
+ }
84
+
85
+ pub fn stop() -> Result<()> {
86
+ let (guard, output) = {
87
+ let mut state = state()
88
+ .lock()
89
+ .map_err(|_| ConversionError::Other("profiling state lock poisoned".to_string()))?;
90
+ let guard = state.guard.take();
91
+ let output = state.output.take();
92
+ (guard, output)
93
+ };
94
+ PROFILE_ACTIVE.store(false, Ordering::Release);
95
+
96
+ let Some(guard) = guard else {
97
+ return Err(ConversionError::Other("profiling not active".to_string()));
98
+ };
99
+ let Some(output_path) = output else {
100
+ return Err(ConversionError::Other("profiling output path missing".to_string()));
101
+ };
102
+
103
+ if let Some(parent) = output_path.parent() {
104
+ std::fs::create_dir_all(parent).map_err(ConversionError::IoError)?;
105
+ }
106
+
107
+ let report = guard
108
+ .report()
109
+ .build()
110
+ .map_err(|err| ConversionError::Other(format!("Profiling report failed: {err}")))?;
111
+
112
+ let file = std::fs::File::create(&output_path).map_err(ConversionError::IoError)?;
113
+ report
114
+ .flamegraph(file)
115
+ .map_err(|err| ConversionError::Other(format!("Flamegraph write failed: {err}")))?;
116
+ PROFILE_ACTIVE.store(false, Ordering::Release);
117
+ Ok(())
118
+ }
119
+
120
+ pub fn maybe_profile<T, F>(f: F) -> Result<T>
121
+ where
122
+ F: FnOnce() -> Result<T>,
123
+ {
124
+ if PROFILE_ACTIVE.load(Ordering::Relaxed) {
125
+ return f();
126
+ }
127
+
128
+ let config = env_profile_config();
129
+ let Some(output_path) = config.output.as_ref() else {
130
+ return f();
131
+ };
132
+
133
+ if config.profile_once && PROFILED_ONCE.swap(true, Ordering::SeqCst) {
134
+ return f();
135
+ }
136
+
137
+ struct ActiveGuard;
138
+ impl Drop for ActiveGuard {
139
+ fn drop(&mut self) {
140
+ PROFILE_ACTIVE.store(false, Ordering::Release);
141
+ }
142
+ }
143
+ PROFILE_ACTIVE.store(true, Ordering::Release);
144
+ let _active = ActiveGuard;
145
+
146
+ let guard = pprof::ProfilerGuardBuilder::default()
147
+ .frequency(config.frequency)
148
+ .blocklist(&["libc", "libpthread", "libgcc", "libm"])
149
+ .build()
150
+ .map_err(|err| ConversionError::Other(format!("Profiling init failed: {err}")))?;
151
+
152
+ let result = f();
153
+
154
+ if result.is_ok() {
155
+ if let Some(parent) = output_path.parent() {
156
+ std::fs::create_dir_all(parent).map_err(ConversionError::IoError)?;
157
+ }
158
+
159
+ let report = guard
160
+ .report()
161
+ .build()
162
+ .map_err(|err| ConversionError::Other(format!("Profiling report failed: {err}")))?;
163
+
164
+ let file = std::fs::File::create(output_path).map_err(ConversionError::IoError)?;
165
+ report
166
+ .flamegraph(file)
167
+ .map_err(|err| ConversionError::Other(format!("Flamegraph write failed: {err}")))?;
168
+ }
169
+
170
+ result
171
+ }
172
+ }
173
+
174
+ #[cfg(all(not(target_os = "windows"), feature = "profiling"))]
175
+ pub use enabled::{maybe_profile, start, stop};
176
+
177
+ #[cfg(target_os = "windows")]
178
+ pub fn start(_output_path: PathBuf, _frequency: i32) -> Result<()> {
179
+ Err(ConversionError::Other(
180
+ "Profiling is not supported on Windows".to_string(),
181
+ ))
182
+ }
183
+
184
+ #[cfg(all(not(target_os = "windows"), not(feature = "profiling")))]
185
+ pub fn start(_output_path: PathBuf, _frequency: i32) -> Result<()> {
186
+ Err(ConversionError::Other(
187
+ "Profiling is disabled; rebuild with the profiling feature".to_string(),
188
+ ))
189
+ }
190
+
191
+ #[cfg(target_os = "windows")]
192
+ pub fn stop() -> Result<()> {
193
+ Err(ConversionError::Other(
194
+ "Profiling is not supported on Windows".to_string(),
195
+ ))
196
+ }
197
+
198
+ #[cfg(all(not(target_os = "windows"), not(feature = "profiling")))]
199
+ pub fn stop() -> Result<()> {
200
+ Err(ConversionError::Other(
201
+ "Profiling is disabled; rebuild with the profiling feature".to_string(),
202
+ ))
203
+ }
204
+
205
+ #[cfg(any(target_os = "windows", not(feature = "profiling")))]
206
+ pub fn maybe_profile<T, F>(f: F) -> Result<T>
207
+ where
208
+ F: FnOnce() -> Result<T>,
209
+ {
210
+ f()
211
+ }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HtmlToMarkdown
4
- VERSION = '2.15.0'
4
+ VERSION = '2.16.1'
5
5
  end
@@ -12,9 +12,11 @@ module HtmlToMarkdown
12
12
  class << self
13
13
  alias native_convert convert
14
14
  alias native_convert_with_inline_images convert_with_inline_images
15
+ alias native_convert_with_inline_images_handle convert_with_inline_images_handle
15
16
  alias native_options options
16
17
  alias native_convert_with_options convert_with_options
17
18
  alias native_convert_with_metadata convert_with_metadata
19
+ alias native_convert_with_metadata_handle convert_with_metadata_handle
18
20
  end
19
21
 
20
22
  module_function
@@ -31,6 +33,10 @@ module HtmlToMarkdown
31
33
  native_convert_with_inline_images(html.to_s, options, image_config)
32
34
  end
33
35
 
36
+ def convert_with_inline_images_handle(html, options_handle, image_config = nil)
37
+ native_convert_with_inline_images_handle(html.to_s, options_handle, image_config)
38
+ end
39
+
34
40
  def options(options_hash = nil)
35
41
  native_options(options_hash)
36
42
  end
@@ -160,4 +166,8 @@ module HtmlToMarkdown
160
166
  def convert_with_metadata(html, options = nil, metadata_config = nil)
161
167
  native_convert_with_metadata(html.to_s, options, metadata_config)
162
168
  end
169
+
170
+ def convert_with_metadata_handle(html, options_handle, metadata_config = nil)
171
+ native_convert_with_metadata_handle(html.to_s, options_handle, metadata_config)
172
+ end
163
173
  end
@@ -162,11 +162,21 @@ module HtmlToMarkdown
162
162
  def self.native_convert: (String html, conversion_options? options) -> String
163
163
  def self.native_options: (conversion_options? options_hash) -> Options
164
164
  def self.native_convert_with_options: (String html, Options options_handle) -> String
165
+ def self.native_convert_with_inline_images_handle: (
166
+ String html,
167
+ Options options_handle,
168
+ inline_image_config? image_config
169
+ ) -> html_extraction
165
170
  def self.native_convert_with_inline_images: (
166
171
  String html,
167
172
  conversion_options? options,
168
173
  inline_image_config? image_config
169
174
  ) -> html_extraction
175
+ def self.native_convert_with_metadata_handle: (
176
+ String html,
177
+ Options options_handle,
178
+ metadata_config? metadata_config
179
+ ) -> [String, extended_metadata]
170
180
  def self.native_convert_with_metadata: (
171
181
  String html,
172
182
  conversion_options? options,
@@ -176,11 +186,21 @@ module HtmlToMarkdown
176
186
  def native_convert: (String html, conversion_options? options) -> String
177
187
  def native_options: (conversion_options? options_hash) -> Options
178
188
  def native_convert_with_options: (String html, Options options_handle) -> String
189
+ def native_convert_with_inline_images_handle: (
190
+ String html,
191
+ Options options_handle,
192
+ inline_image_config? image_config
193
+ ) -> html_extraction
179
194
  def native_convert_with_inline_images: (
180
195
  String html,
181
196
  conversion_options? options,
182
197
  inline_image_config? image_config
183
198
  ) -> html_extraction
199
+ def native_convert_with_metadata_handle: (
200
+ String html,
201
+ Options options_handle,
202
+ metadata_config? metadata_config
203
+ ) -> [String, extended_metadata]
184
204
  def native_convert_with_metadata: (
185
205
  String html,
186
206
  conversion_options? options,
@@ -197,6 +217,11 @@ module HtmlToMarkdown
197
217
 
198
218
  # Convert HTML using a pre-built options handle
199
219
  def self.convert_with_options: (String html, Options options_handle) -> String
220
+ def self.convert_with_inline_images_handle: (
221
+ String html,
222
+ Options options_handle,
223
+ ?inline_image_config image_config
224
+ ) -> html_extraction
200
225
 
201
226
  # Convert HTML with inline image extraction
202
227
  def self.convert_with_inline_images: (
@@ -236,11 +261,21 @@ module HtmlToMarkdown
236
261
  ?conversion_options options,
237
262
  ?metadata_config metadata_config
238
263
  ) -> [String, extended_metadata]
264
+ def self.convert_with_metadata_handle: (
265
+ String html,
266
+ Options options_handle,
267
+ ?metadata_config metadata_config
268
+ ) -> [String, extended_metadata]
239
269
 
240
270
  # Instance method versions (created by module_function)
241
271
  def convert: (String html, ?conversion_options options) -> String
242
272
  def options: (?conversion_options options_hash) -> Options
243
273
  def convert_with_options: (String html, Options options_handle) -> String
274
+ def convert_with_inline_images_handle: (
275
+ String html,
276
+ Options options_handle,
277
+ ?inline_image_config image_config
278
+ ) -> html_extraction
244
279
  def convert_with_inline_images: (
245
280
  String html,
246
281
  ?conversion_options options,
@@ -251,4 +286,9 @@ module HtmlToMarkdown
251
286
  ?conversion_options options,
252
287
  ?metadata_config metadata_config
253
288
  ) -> [String, extended_metadata]
289
+ def convert_with_metadata_handle: (
290
+ String html,
291
+ Options options_handle,
292
+ ?metadata_config metadata_config
293
+ ) -> [String, extended_metadata]
254
294
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.15.0
4
+ version: 2.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-12-19 00:00:00.000000000 Z
11
+ date: 2025-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -56,6 +56,7 @@ files:
56
56
  - ext/html-to-markdown-rb/native/README.md
57
57
  - ext/html-to-markdown-rb/native/extconf.rb
58
58
  - ext/html-to-markdown-rb/native/src/lib.rs
59
+ - ext/html-to-markdown-rb/native/src/profiling.rs
59
60
  - html-to-markdown-rb.gemspec
60
61
  - lib/html_to_markdown.rb
61
62
  - lib/html_to_markdown/cli.rb