html-to-markdown 2.9.1 → 2.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/ext/html-to-markdown-rb/native/Cargo.toml +2 -2
- data/ext/html-to-markdown-rb/native/src/lib.rs +10 -4
- data/lib/html_to_markdown/version.rb +1 -1
- data/spec/convert_spec.rb +39 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d3d074895d2d481529034534a5d788509ec845973fcfb5ed9b987218f1b526c9
|
|
4
|
+
data.tar.gz: f43ecf9a97013f806132fd9dbec4cc58ad644feeaef8cd8ac0f3409c9bdb4675
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f702eb6c46d5c8cb15d1dfcf20fc7af0fa31ca499de696c79266e9763fa1b4e6914fdba8d1c33543b8e45303474c90d0ef1840f1c2af8e2526f390126b91c6f0
|
|
7
|
+
data.tar.gz: 7213faa4510619678b4336affd9f913cca36414526669b79db136c3960ed9d50b1483d5c31620d41e03496dd5c92938bdb28cb91e0dc073d377b2a72d4d2ad39
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
html-to-markdown (2.
|
|
4
|
+
html-to-markdown (2.10.1)
|
|
5
5
|
rb_sys (>= 0.9, < 1.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
@@ -24,7 +24,7 @@ GEM
|
|
|
24
24
|
base64 (0.3.0)
|
|
25
25
|
bigdecimal (3.3.1)
|
|
26
26
|
concurrent-ruby (1.3.5)
|
|
27
|
-
connection_pool (2.5.
|
|
27
|
+
connection_pool (2.5.5)
|
|
28
28
|
csv (3.3.5)
|
|
29
29
|
diff-lcs (1.6.2)
|
|
30
30
|
drb (2.2.3)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "html-to-markdown-rb"
|
|
3
|
-
version = "2.
|
|
3
|
+
version = "2.10.1"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -21,7 +21,7 @@ crate-type = ["cdylib", "rlib"]
|
|
|
21
21
|
default = []
|
|
22
22
|
|
|
23
23
|
[dependencies]
|
|
24
|
-
html-to-markdown-rs = { version = "2.
|
|
24
|
+
html-to-markdown-rs = { version = "2.10.1", features = ["inline-images"] }
|
|
25
25
|
magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = ["rb-sys"] }
|
|
26
26
|
|
|
27
27
|
[dev-dependencies]
|
|
@@ -2,7 +2,7 @@ use html_to_markdown_rs::{
|
|
|
2
2
|
CodeBlockStyle, ConversionOptions, HeadingStyle, HighlightStyle, HtmlExtraction, InlineImage, InlineImageConfig,
|
|
3
3
|
InlineImageFormat, InlineImageSource, InlineImageWarning, ListIndentType, NewlineStyle, PreprocessingOptions,
|
|
4
4
|
PreprocessingPreset, WhitespaceMode, convert as convert_inner,
|
|
5
|
-
convert_with_inline_images as convert_with_inline_images_inner, error::ConversionError,
|
|
5
|
+
convert_with_inline_images as convert_with_inline_images_inner, error::ConversionError, safety::guard_panic,
|
|
6
6
|
};
|
|
7
7
|
use magnus::prelude::*;
|
|
8
8
|
use magnus::r_hash::ForEach;
|
|
@@ -17,6 +17,9 @@ const DEFAULT_INLINE_IMAGE_LIMIT: u64 = 5 * 1024 * 1024;
|
|
|
17
17
|
fn conversion_error(err: ConversionError) -> Error {
|
|
18
18
|
match err {
|
|
19
19
|
ConversionError::ConfigError(msg) => arg_error(msg),
|
|
20
|
+
ConversionError::Panic(message) => {
|
|
21
|
+
runtime_error(format!("html-to-markdown panic during conversion: {message}"))
|
|
22
|
+
}
|
|
20
23
|
other => runtime_error(other.to_string()),
|
|
21
24
|
}
|
|
22
25
|
}
|
|
@@ -390,7 +393,7 @@ fn convert_fn(ruby: &Ruby, args: &[Value]) -> Result<String, Error> {
|
|
|
390
393
|
let html = parsed.required.0;
|
|
391
394
|
let options = build_conversion_options(ruby, parsed.optional.0)?;
|
|
392
395
|
|
|
393
|
-
convert_inner(&html, Some(options)).map_err(conversion_error)
|
|
396
|
+
guard_panic(|| convert_inner(&html, Some(options))).map_err(conversion_error)
|
|
394
397
|
}
|
|
395
398
|
|
|
396
399
|
fn options_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<OptionsHandle, Error> {
|
|
@@ -403,7 +406,9 @@ fn convert_with_options_handle_fn(_ruby: &Ruby, args: &[Value]) -> Result<String
|
|
|
403
406
|
let parsed = scan_args::<(String, &OptionsHandle), (), (), (), (), ()>(args)?;
|
|
404
407
|
let html = parsed.required.0;
|
|
405
408
|
let handle = parsed.required.1;
|
|
406
|
-
|
|
409
|
+
let options = handle.0.clone();
|
|
410
|
+
|
|
411
|
+
guard_panic(|| convert_inner(&html, Some(options))).map_err(conversion_error)
|
|
407
412
|
}
|
|
408
413
|
|
|
409
414
|
fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
|
|
@@ -412,7 +417,8 @@ fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, E
|
|
|
412
417
|
let options = build_conversion_options(ruby, parsed.optional.0)?;
|
|
413
418
|
let config = build_inline_image_config(ruby, parsed.optional.1)?;
|
|
414
419
|
|
|
415
|
-
let extraction =
|
|
420
|
+
let extraction =
|
|
421
|
+
guard_panic(|| convert_with_inline_images_inner(&html, Some(options), config)).map_err(conversion_error)?;
|
|
416
422
|
|
|
417
423
|
extraction_to_value(ruby, extraction)
|
|
418
424
|
}
|
data/spec/convert_spec.rb
CHANGED
|
@@ -35,4 +35,43 @@ RSpec.describe HtmlToMarkdown do
|
|
|
35
35
|
expect(result).to include('# Hello #')
|
|
36
36
|
end
|
|
37
37
|
end
|
|
38
|
+
|
|
39
|
+
describe 'panic handling' do
|
|
40
|
+
context 'when a Rust panic would occur' do
|
|
41
|
+
it 'catches panics in convert method' do
|
|
42
|
+
malformed_html = "#{'<' * 100_000}div#{'>' * 100_000}"
|
|
43
|
+
|
|
44
|
+
begin
|
|
45
|
+
result = described_class.convert(malformed_html)
|
|
46
|
+
expect(result).to be_a(String)
|
|
47
|
+
rescue RuntimeError => e
|
|
48
|
+
expect(e.message).to match(/html-to-markdown panic during conversion/)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it 'catches panics in convert_with_options method' do
|
|
53
|
+
malformed_html = "#{'<' * 100_000}div#{'>' * 100_000}"
|
|
54
|
+
handle = described_class.options(heading_style: :atx)
|
|
55
|
+
|
|
56
|
+
begin
|
|
57
|
+
result = described_class.convert_with_options(malformed_html, handle)
|
|
58
|
+
expect(result).to be_a(String)
|
|
59
|
+
rescue RuntimeError => e
|
|
60
|
+
expect(e.message).to match(/html-to-markdown panic during conversion/)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it 'catches panics in convert_with_inline_images method' do
|
|
65
|
+
malformed_html = "#{'<' * 100_000}div#{'>' * 100_000}"
|
|
66
|
+
|
|
67
|
+
begin
|
|
68
|
+
result = described_class.convert_with_inline_images(malformed_html)
|
|
69
|
+
expect(result).to be_a(Hash)
|
|
70
|
+
expect(result).to include(:markdown, :inline_images, :warnings)
|
|
71
|
+
rescue RuntimeError => e
|
|
72
|
+
expect(e.message).to match(/html-to-markdown panic during conversion/)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
38
77
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html-to-markdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.10.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-
|
|
11
|
+
date: 2025-12-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|