html-to-markdown 2.9.1 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e4b858a910ecf3f0bdbc020b6815e59e4350ff603a45480adf52bd0cc8f1af7
4
- data.tar.gz: 75c4596c2e3d7490b317e31869bfe0cf995ebc7fb5ed14a4011b1829ff2d16b2
3
+ metadata.gz: d3d074895d2d481529034534a5d788509ec845973fcfb5ed9b987218f1b526c9
4
+ data.tar.gz: f43ecf9a97013f806132fd9dbec4cc58ad644feeaef8cd8ac0f3409c9bdb4675
5
5
  SHA512:
6
- metadata.gz: 7178c3fcf3abe87a2b49350c7a328f2ce31755ad2907c6a6920e60d17700e3cbc475078f9505bf146a1b6358375e79c04421a1e69b2e3666db9a2a15bde54947
7
- data.tar.gz: 5eb86a9c629a0d4bc0f8cb0ee05562f28ad5c4c73bcde98013b5283281df0064de489ee0130911cfe24c8817fcad69a96da00c89be278f3d784e0db45fdc68b9
6
+ metadata.gz: f702eb6c46d5c8cb15d1dfcf20fc7af0fa31ca499de696c79266e9763fa1b4e6914fdba8d1c33543b8e45303474c90d0ef1840f1c2af8e2526f390126b91c6f0
7
+ data.tar.gz: 7213faa4510619678b4336affd9f913cca36414526669b79db136c3960ed9d50b1483d5c31620d41e03496dd5c92938bdb28cb91e0dc073d377b2a72d4d2ad39
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html-to-markdown (2.9.1)
4
+ html-to-markdown (2.10.1)
5
5
  rb_sys (>= 0.9, < 1.0)
6
6
 
7
7
  GEM
@@ -24,7 +24,7 @@ GEM
24
24
  base64 (0.3.0)
25
25
  bigdecimal (3.3.1)
26
26
  concurrent-ruby (1.3.5)
27
- connection_pool (2.5.4)
27
+ connection_pool (2.5.5)
28
28
  csv (3.3.5)
29
29
  diff-lcs (1.6.2)
30
30
  drb (2.2.3)
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rb"
3
- version = "2.9.1"
3
+ version = "2.10.1"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
6
6
  license = "MIT"
@@ -21,7 +21,7 @@ crate-type = ["cdylib", "rlib"]
21
21
  default = []
22
22
 
23
23
  [dependencies]
24
- html-to-markdown-rs = { version = "2.9.1", features = ["inline-images"] }
24
+ html-to-markdown-rs = { version = "2.10.1", features = ["inline-images"] }
25
25
  magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = ["rb-sys"] }
26
26
 
27
27
  [dev-dependencies]
@@ -2,7 +2,7 @@ use html_to_markdown_rs::{
2
2
  CodeBlockStyle, ConversionOptions, HeadingStyle, HighlightStyle, HtmlExtraction, InlineImage, InlineImageConfig,
3
3
  InlineImageFormat, InlineImageSource, InlineImageWarning, ListIndentType, NewlineStyle, PreprocessingOptions,
4
4
  PreprocessingPreset, WhitespaceMode, convert as convert_inner,
5
- convert_with_inline_images as convert_with_inline_images_inner, error::ConversionError,
5
+ convert_with_inline_images as convert_with_inline_images_inner, error::ConversionError, safety::guard_panic,
6
6
  };
7
7
  use magnus::prelude::*;
8
8
  use magnus::r_hash::ForEach;
@@ -17,6 +17,9 @@ const DEFAULT_INLINE_IMAGE_LIMIT: u64 = 5 * 1024 * 1024;
17
17
  fn conversion_error(err: ConversionError) -> Error {
18
18
  match err {
19
19
  ConversionError::ConfigError(msg) => arg_error(msg),
20
+ ConversionError::Panic(message) => {
21
+ runtime_error(format!("html-to-markdown panic during conversion: {message}"))
22
+ }
20
23
  other => runtime_error(other.to_string()),
21
24
  }
22
25
  }
@@ -390,7 +393,7 @@ fn convert_fn(ruby: &Ruby, args: &[Value]) -> Result<String, Error> {
390
393
  let html = parsed.required.0;
391
394
  let options = build_conversion_options(ruby, parsed.optional.0)?;
392
395
 
393
- convert_inner(&html, Some(options)).map_err(conversion_error)
396
+ guard_panic(|| convert_inner(&html, Some(options))).map_err(conversion_error)
394
397
  }
395
398
 
396
399
  fn options_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<OptionsHandle, Error> {
@@ -403,7 +406,9 @@ fn convert_with_options_handle_fn(_ruby: &Ruby, args: &[Value]) -> Result<String
403
406
  let parsed = scan_args::<(String, &OptionsHandle), (), (), (), (), ()>(args)?;
404
407
  let html = parsed.required.0;
405
408
  let handle = parsed.required.1;
406
- convert_inner(&html, Some(handle.0.clone())).map_err(conversion_error)
409
+ let options = handle.0.clone();
410
+
411
+ guard_panic(|| convert_inner(&html, Some(options))).map_err(conversion_error)
407
412
  }
408
413
 
409
414
  fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
@@ -412,7 +417,8 @@ fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, E
412
417
  let options = build_conversion_options(ruby, parsed.optional.0)?;
413
418
  let config = build_inline_image_config(ruby, parsed.optional.1)?;
414
419
 
415
- let extraction = convert_with_inline_images_inner(&html, Some(options), config).map_err(conversion_error)?;
420
+ let extraction =
421
+ guard_panic(|| convert_with_inline_images_inner(&html, Some(options), config)).map_err(conversion_error)?;
416
422
 
417
423
  extraction_to_value(ruby, extraction)
418
424
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HtmlToMarkdown
4
- VERSION = '2.9.1'
4
+ VERSION = '2.10.1'
5
5
  end
data/spec/convert_spec.rb CHANGED
@@ -35,4 +35,43 @@ RSpec.describe HtmlToMarkdown do
35
35
  expect(result).to include('# Hello #')
36
36
  end
37
37
  end
38
+
39
+ describe 'panic handling' do
40
+ context 'when a Rust panic would occur' do
41
+ it 'catches panics in convert method' do
42
+ malformed_html = "#{'<' * 100_000}div#{'>' * 100_000}"
43
+
44
+ begin
45
+ result = described_class.convert(malformed_html)
46
+ expect(result).to be_a(String)
47
+ rescue RuntimeError => e
48
+ expect(e.message).to match(/html-to-markdown panic during conversion/)
49
+ end
50
+ end
51
+
52
+ it 'catches panics in convert_with_options method' do
53
+ malformed_html = "#{'<' * 100_000}div#{'>' * 100_000}"
54
+ handle = described_class.options(heading_style: :atx)
55
+
56
+ begin
57
+ result = described_class.convert_with_options(malformed_html, handle)
58
+ expect(result).to be_a(String)
59
+ rescue RuntimeError => e
60
+ expect(e.message).to match(/html-to-markdown panic during conversion/)
61
+ end
62
+ end
63
+
64
+ it 'catches panics in convert_with_inline_images method' do
65
+ malformed_html = "#{'<' * 100_000}div#{'>' * 100_000}"
66
+
67
+ begin
68
+ result = described_class.convert_with_inline_images(malformed_html)
69
+ expect(result).to be_a(Hash)
70
+ expect(result).to include(:markdown, :inline_images, :warnings)
71
+ rescue RuntimeError => e
72
+ expect(e.message).to match(/html-to-markdown panic during conversion/)
73
+ end
74
+ end
75
+ end
76
+ end
38
77
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.9.1
4
+ version: 2.10.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-11-21 00:00:00.000000000 Z
11
+ date: 2025-12-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys