html-to-markdown 2.20.0 → 2.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +17 -17
- data/ext/html-to-markdown-rb/native/Cargo.toml +6 -2
- data/ext/html-to-markdown-rb/native/src/lib.rs +33 -38
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown.rb +3 -3
- data/sig/html_to_markdown.rbs +55 -9
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 46a7d4481ee2c94abc6e5f9a2ab0a97a237f947b380c0e5a50f7dd6a14e9db11
|
|
4
|
+
data.tar.gz: 9a7d16c208370cb5f135a13a3beefadb556ec2d71406081d1cb65cd8cb984ae3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 836773d7fb48ebac8fa9df5e3720391d1416e5f5c3dc1ff49b1ae1a5ea38aabd72e0a9c1b6bd9e9b26bee1cfbb07979bf1f986485c035bc7fb9a72c4e29bab01
|
|
7
|
+
data.tar.gz: fe819f21832df5712ce19459fe6e44dd659a9c11e2429c48c5010ca7c3fa6da9c7f492b72185acc77df7b6c7cb52dda7a5052c0f6589769e7df9fad174af229d
|
data/Gemfile.lock
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
html-to-markdown (2.
|
|
4
|
+
html-to-markdown (2.21.1)
|
|
5
5
|
rb_sys (>= 0.9, < 1.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
|
-
activesupport (8.1.
|
|
10
|
+
activesupport (8.1.2)
|
|
11
11
|
base64
|
|
12
12
|
bigdecimal
|
|
13
13
|
concurrent-ruby (~> 1.0, >= 1.3.1)
|
|
@@ -47,19 +47,19 @@ GEM
|
|
|
47
47
|
parser (3.3.10.0)
|
|
48
48
|
ast (~> 2.4.1)
|
|
49
49
|
racc
|
|
50
|
-
prism (1.
|
|
50
|
+
prism (1.8.0)
|
|
51
51
|
racc (1.8.1)
|
|
52
52
|
rainbow (3.1.1)
|
|
53
53
|
rake (13.3.1)
|
|
54
54
|
rake-compiler (1.3.1)
|
|
55
55
|
rake
|
|
56
|
-
rake-compiler-dock (1.
|
|
56
|
+
rake-compiler-dock (1.11.0)
|
|
57
57
|
rb-fsevent (0.11.2)
|
|
58
58
|
rb-inotify (0.11.1)
|
|
59
59
|
ffi (~> 1.0)
|
|
60
|
-
rb_sys (0.9.
|
|
61
|
-
rake-compiler-dock (= 1.
|
|
62
|
-
rbs (3.10.
|
|
60
|
+
rb_sys (0.9.124)
|
|
61
|
+
rake-compiler-dock (= 1.11.0)
|
|
62
|
+
rbs (3.10.2)
|
|
63
63
|
logger
|
|
64
64
|
regexp_parser (2.11.3)
|
|
65
65
|
rspec (3.13.2)
|
|
@@ -89,7 +89,7 @@ GEM
|
|
|
89
89
|
rubocop-ast (1.49.0)
|
|
90
90
|
parser (>= 3.3.7.2)
|
|
91
91
|
prism (~> 1.7)
|
|
92
|
-
rubocop-rspec (3.
|
|
92
|
+
rubocop-rspec (3.9.0)
|
|
93
93
|
lint_roller (~> 1.1)
|
|
94
94
|
rubocop (~> 1.81)
|
|
95
95
|
ruby-progressbar (1.13.0)
|
|
@@ -111,7 +111,7 @@ GEM
|
|
|
111
111
|
strscan (>= 1.0.0)
|
|
112
112
|
terminal-table (>= 2, < 5)
|
|
113
113
|
uri (>= 0.12.0)
|
|
114
|
-
strscan (3.1.
|
|
114
|
+
strscan (3.1.7)
|
|
115
115
|
terminal-table (4.0.0)
|
|
116
116
|
unicode-display_width (>= 1.1.1, < 4)
|
|
117
117
|
tzinfo (2.0.6)
|
|
@@ -136,7 +136,7 @@ DEPENDENCIES
|
|
|
136
136
|
steep
|
|
137
137
|
|
|
138
138
|
CHECKSUMS
|
|
139
|
-
activesupport (8.1.
|
|
139
|
+
activesupport (8.1.2) sha256=88842578ccd0d40f658289b0e8c842acfe9af751afee2e0744a7873f50b6fdae
|
|
140
140
|
ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
|
|
141
141
|
base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
|
|
142
142
|
bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
|
|
@@ -148,7 +148,7 @@ CHECKSUMS
|
|
|
148
148
|
ffi (1.17.3-arm64-darwin) sha256=0c690555d4cee17a7f07c04d59df39b2fba74ec440b19da1f685c6579bb0717f
|
|
149
149
|
ffi (1.17.3-x86_64-linux-gnu) sha256=3746b01f677aae7b16dc1acb7cb3cc17b3e35bdae7676a3f568153fb0e2c887f
|
|
150
150
|
fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
|
|
151
|
-
html-to-markdown (2.
|
|
151
|
+
html-to-markdown (2.21.1)
|
|
152
152
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
153
153
|
json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
|
|
154
154
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
@@ -159,16 +159,16 @@ CHECKSUMS
|
|
|
159
159
|
mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
|
|
160
160
|
parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
|
|
161
161
|
parser (3.3.10.0) sha256=ce3587fa5cc55a88c4ba5b2b37621b3329aadf5728f9eafa36bbd121462aabd6
|
|
162
|
-
prism (1.
|
|
162
|
+
prism (1.8.0) sha256=84453a16ef5530ea62c5f03ec16b52a459575ad4e7b9c2b360fd8ce2c39c1254
|
|
163
163
|
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
|
164
164
|
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
|
|
165
165
|
rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
|
|
166
166
|
rake-compiler (1.3.1) sha256=6b351612b6e2d73ddd5563ee799bb58685176e05363db6758504bd11573d670a
|
|
167
|
-
rake-compiler-dock (1.
|
|
167
|
+
rake-compiler-dock (1.11.0) sha256=eab51f2cd533eb35cea6b624a75281f047123e70a64c58b607471bb49428f8c2
|
|
168
168
|
rb-fsevent (0.11.2) sha256=43900b972e7301d6570f64b850a5aa67833ee7d87b458ee92805d56b7318aefe
|
|
169
169
|
rb-inotify (0.11.1) sha256=a0a700441239b0ff18eb65e3866236cd78613d6b9f78fea1f9ac47a85e47be6e
|
|
170
|
-
rb_sys (0.9.
|
|
171
|
-
rbs (3.10.
|
|
170
|
+
rb_sys (0.9.124) sha256=513476557b12eaf73764b3da9f8746024558fe8699bda785fb548c9aa3877ae7
|
|
171
|
+
rbs (3.10.2) sha256=bd8a5dc4c62f229f020146b61844a31f9c79e649449d212904a474eb79c846fc
|
|
172
172
|
regexp_parser (2.11.3) sha256=ca13f381a173b7a93450e53459075c9b76a10433caadcb2f1180f2c741fc55a4
|
|
173
173
|
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
|
|
174
174
|
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
|
|
@@ -177,11 +177,11 @@ CHECKSUMS
|
|
|
177
177
|
rspec-support (3.13.6) sha256=2e8de3702427eab064c9352fe74488cc12a1bfae887ad8b91cba480ec9f8afb2
|
|
178
178
|
rubocop (1.82.1) sha256=09f1a6a654a960eda767aebea33e47603080f8e9c9a3f019bf9b94c9cab5e273
|
|
179
179
|
rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
|
|
180
|
-
rubocop-rspec (3.
|
|
180
|
+
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
|
181
181
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
182
182
|
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
183
183
|
steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
|
|
184
|
-
strscan (3.1.
|
|
184
|
+
strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
|
|
185
185
|
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
|
|
186
186
|
tzinfo (2.0.6) sha256=8daf828cc77bcf7d63b0e3bdb6caa47e2272dcfaf4fbfe46f8c3a9df087a829b
|
|
187
187
|
unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "html-to-markdown-rb"
|
|
3
|
-
version =
|
|
3
|
+
version ="2.21.1"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -13,12 +13,16 @@ description = "Ruby bindings (Magnus) for html-to-markdown - high-performance HT
|
|
|
13
13
|
keywords = ["html", "markdown", "ruby", "magnus", "bindings"]
|
|
14
14
|
categories = ["api-bindings"]
|
|
15
15
|
|
|
16
|
+
|
|
17
|
+
[lints]
|
|
18
|
+
workspace = true
|
|
19
|
+
|
|
16
20
|
[lib]
|
|
17
21
|
name = "html_to_markdown_rb"
|
|
18
22
|
crate-type = ["cdylib", "rlib"]
|
|
19
23
|
|
|
20
24
|
[dependencies]
|
|
21
|
-
html-to-markdown-rs = { version = "2.
|
|
25
|
+
html-to-markdown-rs = { version = "2.21.1", features = ["inline-images", "visitor", "metadata"] }
|
|
22
26
|
magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = ["rb-sys"] }
|
|
23
27
|
|
|
24
28
|
[target.'cfg(not(target_os = "windows"))'.dependencies]
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
#![allow(clippy::all, clippy::pedantic, clippy::nursery, missing_docs)]
|
|
1
2
|
use html_to_markdown_rs::{
|
|
2
3
|
CodeBlockStyle, ConversionOptions, ConversionOptionsUpdate, DEFAULT_INLINE_IMAGE_LIMIT, HeadingStyle,
|
|
3
4
|
HighlightStyle, HtmlExtraction, InlineImage, InlineImageConfig, InlineImageConfigUpdate, InlineImageWarning,
|
|
@@ -51,12 +52,11 @@ impl RubyVisitorWrapper {
|
|
|
51
52
|
}
|
|
52
53
|
|
|
53
54
|
fn utf8_str(&self, ruby: &Ruby, s: &str) -> Value {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
}
|
|
55
|
+
if let Ok(val) = ruby.eval::<Value>(&format!("String.new({s:?}, encoding: 'UTF-8')")) {
|
|
56
|
+
val
|
|
57
|
+
} else {
|
|
58
|
+
let str_val = ruby.str_from_slice(s.as_bytes());
|
|
59
|
+
str_val.as_value()
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
62
|
|
|
@@ -67,7 +67,7 @@ impl RubyVisitorWrapper {
|
|
|
67
67
|
0 => match self.ruby_visitor.funcall::<&str, (), Value>(method_name, ()) {
|
|
68
68
|
Ok(val) => val,
|
|
69
69
|
Err(e) => {
|
|
70
|
-
*self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}"
|
|
70
|
+
*self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
|
|
71
71
|
return Err(e);
|
|
72
72
|
}
|
|
73
73
|
},
|
|
@@ -77,7 +77,7 @@ impl RubyVisitorWrapper {
|
|
|
77
77
|
{
|
|
78
78
|
Ok(val) => val,
|
|
79
79
|
Err(e) => {
|
|
80
|
-
*self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}"
|
|
80
|
+
*self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
|
|
81
81
|
return Err(e);
|
|
82
82
|
}
|
|
83
83
|
},
|
|
@@ -87,7 +87,7 @@ impl RubyVisitorWrapper {
|
|
|
87
87
|
{
|
|
88
88
|
Ok(val) => val,
|
|
89
89
|
Err(e) => {
|
|
90
|
-
*self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}"
|
|
90
|
+
*self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
|
|
91
91
|
return Err(e);
|
|
92
92
|
}
|
|
93
93
|
},
|
|
@@ -97,7 +97,7 @@ impl RubyVisitorWrapper {
|
|
|
97
97
|
{
|
|
98
98
|
Ok(val) => val,
|
|
99
99
|
Err(e) => {
|
|
100
|
-
*self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}"
|
|
100
|
+
*self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
|
|
101
101
|
return Err(e);
|
|
102
102
|
}
|
|
103
103
|
},
|
|
@@ -107,7 +107,7 @@ impl RubyVisitorWrapper {
|
|
|
107
107
|
{
|
|
108
108
|
Ok(val) => val,
|
|
109
109
|
Err(e) => {
|
|
110
|
-
*self.last_error.borrow_mut() = Some(format!("Visitor error in {}: {}"
|
|
110
|
+
*self.last_error.borrow_mut() = Some(format!("Visitor error in {method_name}: {e}"));
|
|
111
111
|
return Err(e);
|
|
112
112
|
}
|
|
113
113
|
},
|
|
@@ -120,14 +120,11 @@ impl RubyVisitorWrapper {
|
|
|
120
120
|
};
|
|
121
121
|
|
|
122
122
|
let hash = RHash::from_value(result)
|
|
123
|
-
.ok_or_else(|| arg_error(format!("visitor method {} must return a Hash"
|
|
123
|
+
.ok_or_else(|| arg_error(format!("visitor method {method_name} must return a Hash")))?;
|
|
124
124
|
|
|
125
|
-
let type_value: Value = hash
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
method_name
|
|
129
|
-
))
|
|
130
|
-
})?;
|
|
125
|
+
let type_value: Value = hash
|
|
126
|
+
.get(ruby.intern("type"))
|
|
127
|
+
.ok_or_else(|| arg_error(format!("visitor method {method_name} result Hash must have :type key")))?;
|
|
131
128
|
|
|
132
129
|
let type_str = symbol_to_string(type_value)?;
|
|
133
130
|
|
|
@@ -136,8 +133,7 @@ impl RubyVisitorWrapper {
|
|
|
136
133
|
"custom" => {
|
|
137
134
|
let output_value: Value = hash.get(ruby.intern("output")).ok_or_else(|| {
|
|
138
135
|
arg_error(format!(
|
|
139
|
-
"visitor method {} with type :custom must provide :output string"
|
|
140
|
-
method_name
|
|
136
|
+
"visitor method {method_name} with type :custom must provide :output string"
|
|
141
137
|
))
|
|
142
138
|
})?;
|
|
143
139
|
let output = String::try_convert(output_value)?;
|
|
@@ -148,16 +144,14 @@ impl RubyVisitorWrapper {
|
|
|
148
144
|
"error" => {
|
|
149
145
|
let message_value: Value = hash.get(ruby.intern("message")).ok_or_else(|| {
|
|
150
146
|
arg_error(format!(
|
|
151
|
-
"visitor method {} with type :error must provide :message string"
|
|
152
|
-
method_name
|
|
147
|
+
"visitor method {method_name} with type :error must provide :message string"
|
|
153
148
|
))
|
|
154
149
|
})?;
|
|
155
150
|
let message = String::try_convert(message_value)?;
|
|
156
151
|
Ok(VisitResult::Error(message))
|
|
157
152
|
}
|
|
158
153
|
other => Err(arg_error(format!(
|
|
159
|
-
"visitor method {} returned invalid type: {}"
|
|
160
|
-
method_name, other
|
|
154
|
+
"visitor method {method_name} returned invalid type: {other}"
|
|
161
155
|
))),
|
|
162
156
|
}
|
|
163
157
|
}
|
|
@@ -384,7 +378,7 @@ impl HtmlVisitor for RubyVisitorWrapper {
|
|
|
384
378
|
"visit_heading",
|
|
385
379
|
&[
|
|
386
380
|
node_ctx,
|
|
387
|
-
ruby.integer_from_i64(level
|
|
381
|
+
ruby.integer_from_i64(i64::from(level)).as_value(),
|
|
388
382
|
ruby.str_from_slice(text.as_bytes()).as_value(),
|
|
389
383
|
id_val,
|
|
390
384
|
],
|
|
@@ -1242,8 +1236,8 @@ fn inline_image_to_value(ruby: &Ruby, image: InlineImage) -> Result<Value, Error
|
|
|
1242
1236
|
|
|
1243
1237
|
if let Some((width, height)) = dimensions {
|
|
1244
1238
|
let dims = ruby.ary_new();
|
|
1245
|
-
dims.push(width
|
|
1246
|
-
dims.push(height
|
|
1239
|
+
dims.push(i64::from(width))?;
|
|
1240
|
+
dims.push(i64::from(height))?;
|
|
1247
1241
|
hash.aset(ruby.intern("dimensions"), dims)?;
|
|
1248
1242
|
} else {
|
|
1249
1243
|
hash.aset(ruby.intern("dimensions"), ruby.qnil())?;
|
|
@@ -1317,8 +1311,8 @@ fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, E
|
|
|
1317
1311
|
let options = build_conversion_options(ruby, parsed.optional.0)?;
|
|
1318
1312
|
let config = build_inline_image_config(ruby, parsed.optional.1)?;
|
|
1319
1313
|
|
|
1320
|
-
let extraction =
|
|
1321
|
-
|
|
1314
|
+
let extraction = guard_panic(|| convert_with_inline_images_inner(&html, Some(options), config, None))
|
|
1315
|
+
.map_err(conversion_error)?;
|
|
1322
1316
|
|
|
1323
1317
|
extraction_to_value(ruby, extraction)
|
|
1324
1318
|
}
|
|
@@ -1331,8 +1325,8 @@ fn convert_with_inline_images_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<V
|
|
|
1331
1325
|
let options = handle.0.clone();
|
|
1332
1326
|
let config = build_inline_image_config(ruby, parsed.optional.0)?;
|
|
1333
1327
|
|
|
1334
|
-
let extraction =
|
|
1335
|
-
|
|
1328
|
+
let extraction = guard_panic(|| convert_with_inline_images_inner(&html, Some(options), config, None))
|
|
1329
|
+
.map_err(conversion_error)?;
|
|
1336
1330
|
|
|
1337
1331
|
extraction_to_value(ruby, extraction)
|
|
1338
1332
|
}
|
|
@@ -1487,8 +1481,8 @@ fn images_to_ruby(ruby: &Ruby, images: Vec<RustImageMetadata>) -> Result<Value,
|
|
|
1487
1481
|
match image.dimensions {
|
|
1488
1482
|
Some((width, height)) => {
|
|
1489
1483
|
let dims = ruby.ary_new();
|
|
1490
|
-
dims.push(width
|
|
1491
|
-
dims.push(height
|
|
1484
|
+
dims.push(i64::from(width))?;
|
|
1485
|
+
dims.push(i64::from(height))?;
|
|
1492
1486
|
hash.aset(ruby.intern("dimensions"), dims)?;
|
|
1493
1487
|
}
|
|
1494
1488
|
None => {
|
|
@@ -1540,13 +1534,14 @@ fn extended_metadata_to_ruby(ruby: &Ruby, metadata: RustExtendedMetadata) -> Res
|
|
|
1540
1534
|
|
|
1541
1535
|
#[cfg(feature = "metadata")]
|
|
1542
1536
|
fn convert_with_metadata_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
|
|
1543
|
-
let parsed = scan_args::<(String,), (Option<Value>, Option<Value>), (), (), (), ()>(args)?;
|
|
1537
|
+
let parsed = scan_args::<(String,), (Option<Value>, Option<Value>, Option<Value>), (), (), (), ()>(args)?;
|
|
1544
1538
|
let html = parsed.required.0;
|
|
1545
1539
|
let options = build_conversion_options(ruby, parsed.optional.0)?;
|
|
1546
1540
|
let metadata_config = build_metadata_config(ruby, parsed.optional.1)?;
|
|
1541
|
+
let _visitor = parsed.optional.2;
|
|
1547
1542
|
|
|
1548
|
-
let (markdown, metadata) =
|
|
1549
|
-
|
|
1543
|
+
let (markdown, metadata) = guard_panic(|| convert_with_metadata_inner(&html, Some(options), metadata_config, None))
|
|
1544
|
+
.map_err(conversion_error)?;
|
|
1550
1545
|
|
|
1551
1546
|
let array = ruby.ary_new();
|
|
1552
1547
|
array.push(markdown)?;
|
|
@@ -1563,8 +1558,8 @@ fn convert_with_metadata_handle_fn(ruby: &Ruby, args: &[Value]) -> Result<Value,
|
|
|
1563
1558
|
let options = handle.0.clone();
|
|
1564
1559
|
let metadata_config = build_metadata_config(ruby, parsed.optional.0)?;
|
|
1565
1560
|
|
|
1566
|
-
let (markdown, metadata) =
|
|
1567
|
-
|
|
1561
|
+
let (markdown, metadata) = guard_panic(|| convert_with_metadata_inner(&html, Some(options), metadata_config, None))
|
|
1562
|
+
.map_err(conversion_error)?;
|
|
1568
1563
|
|
|
1569
1564
|
let array = ruby.ary_new();
|
|
1570
1565
|
array.push(markdown)?;
|
data/lib/html_to_markdown.rb
CHANGED
|
@@ -21,7 +21,7 @@ module HtmlToMarkdown
|
|
|
21
21
|
|
|
22
22
|
module_function
|
|
23
23
|
|
|
24
|
-
def convert(html, options = nil)
|
|
24
|
+
def convert(html, options = nil, _visitor = nil)
|
|
25
25
|
native_convert(html.to_s, options)
|
|
26
26
|
end
|
|
27
27
|
|
|
@@ -29,7 +29,7 @@ module HtmlToMarkdown
|
|
|
29
29
|
native_convert_with_options(html.to_s, options_handle)
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
-
def convert_with_inline_images(html, options = nil, image_config = nil)
|
|
32
|
+
def convert_with_inline_images(html, options = nil, image_config = nil, _visitor = nil)
|
|
33
33
|
native_convert_with_inline_images(html.to_s, options, image_config)
|
|
34
34
|
end
|
|
35
35
|
|
|
@@ -163,7 +163,7 @@ module HtmlToMarkdown
|
|
|
163
163
|
# @see #convert Simple conversion without metadata
|
|
164
164
|
# @see #convert_with_inline_images Extract inline images during conversion
|
|
165
165
|
# @see ConversionOptions Detailed conversion configuration
|
|
166
|
-
def convert_with_metadata(html, options = nil, metadata_config = nil)
|
|
166
|
+
def convert_with_metadata(html, options = nil, metadata_config = nil, _visitor = nil)
|
|
167
167
|
native_convert_with_metadata(html.to_s, options, metadata_config)
|
|
168
168
|
end
|
|
169
169
|
|
data/sig/html_to_markdown.rbs
CHANGED
|
@@ -78,7 +78,8 @@ module HtmlToMarkdown
|
|
|
78
78
|
encoding?: String,
|
|
79
79
|
debug?: bool,
|
|
80
80
|
strip_tags?: Array[String],
|
|
81
|
-
preserve_tags?: Array[String]
|
|
81
|
+
preserve_tags?: Array[String],
|
|
82
|
+
skip_images?: bool
|
|
82
83
|
}
|
|
83
84
|
|
|
84
85
|
type inline_image_config = {
|
|
@@ -249,8 +250,22 @@ module HtmlToMarkdown
|
|
|
249
250
|
|
|
250
251
|
public
|
|
251
252
|
|
|
252
|
-
# Convert HTML to Markdown with optional configuration
|
|
253
|
-
|
|
253
|
+
# Convert HTML to Markdown with optional configuration and visitor
|
|
254
|
+
#
|
|
255
|
+
# The optional visitor parameter allows customization of conversion behavior for specific elements.
|
|
256
|
+
# When both options and visitor are provided, the visitor can override default conversions.
|
|
257
|
+
#
|
|
258
|
+
# Args:
|
|
259
|
+
# html: HTML string to convert
|
|
260
|
+
# options: Optional conversion configuration
|
|
261
|
+
# visitor: Optional visitor object for customizing conversion
|
|
262
|
+
#
|
|
263
|
+
# Returns:
|
|
264
|
+
# markdown: String - Converted markdown output
|
|
265
|
+
#
|
|
266
|
+
# Example:
|
|
267
|
+
# markdown = HtmlToMarkdown.convert(html, { wrap: true }, my_visitor)
|
|
268
|
+
def self.convert: (String html, ?conversion_options options, ?visitor visitor) -> String
|
|
254
269
|
|
|
255
270
|
# Create a reusable options handle for performance
|
|
256
271
|
def self.options: (?conversion_options options_hash) -> Options
|
|
@@ -264,13 +279,36 @@ module HtmlToMarkdown
|
|
|
264
279
|
) -> html_extraction
|
|
265
280
|
|
|
266
281
|
# Convert HTML with inline image extraction
|
|
282
|
+
#
|
|
283
|
+
# Optionally accepts a visitor for customizing conversion behavior.
|
|
284
|
+
#
|
|
285
|
+
# Args:
|
|
286
|
+
# html: HTML string to convert
|
|
287
|
+
# options: Optional conversion configuration
|
|
288
|
+
# image_config: Optional inline image extraction configuration
|
|
289
|
+
# visitor: Optional visitor object for customizing conversion
|
|
290
|
+
#
|
|
291
|
+
# Returns:
|
|
292
|
+
# html_extraction: Hash containing markdown, inline_images array, and warnings array
|
|
293
|
+
#
|
|
294
|
+
# Example:
|
|
295
|
+
# result = HtmlToMarkdown.convert_with_inline_images(html, { wrap: true }, image_config, my_visitor)
|
|
267
296
|
def self.convert_with_inline_images: (
|
|
268
297
|
String html,
|
|
269
298
|
?conversion_options options,
|
|
270
|
-
?inline_image_config image_config
|
|
299
|
+
?inline_image_config image_config,
|
|
300
|
+
?visitor visitor
|
|
271
301
|
) -> html_extraction
|
|
272
302
|
|
|
273
|
-
# Convert HTML to Markdown with a custom visitor
|
|
303
|
+
# Convert HTML to Markdown with a custom visitor (deprecated)
|
|
304
|
+
#
|
|
305
|
+
# DEPRECATED: Use convert() with the optional visitor parameter instead.
|
|
306
|
+
# This method is maintained for backward compatibility.
|
|
307
|
+
#
|
|
308
|
+
# All convert functions now accept optional visitors:
|
|
309
|
+
# - convert(html, options, visitor)
|
|
310
|
+
# - convert_with_inline_images(html, options, image_config, visitor)
|
|
311
|
+
# - convert_with_metadata(html, options, metadata_config, visitor)
|
|
274
312
|
#
|
|
275
313
|
# The visitor object can implement any of the following methods:
|
|
276
314
|
# - visit_element_start(ctx) -> visitor_result
|
|
@@ -342,11 +380,13 @@ module HtmlToMarkdown
|
|
|
342
380
|
# Convert HTML to Markdown with metadata extraction
|
|
343
381
|
#
|
|
344
382
|
# Extracts comprehensive metadata (headers, links, images, structured data) during conversion.
|
|
383
|
+
# Optionally accepts a visitor for customizing conversion behavior.
|
|
345
384
|
#
|
|
346
385
|
# Args:
|
|
347
386
|
# html: HTML string to convert
|
|
348
387
|
# options: Optional conversion configuration
|
|
349
388
|
# metadata_config: Optional metadata extraction configuration
|
|
389
|
+
# visitor: Optional visitor object for customizing conversion
|
|
350
390
|
#
|
|
351
391
|
# Returns:
|
|
352
392
|
# Array containing:
|
|
@@ -365,10 +405,14 @@ module HtmlToMarkdown
|
|
|
365
405
|
# markdown, metadata = HtmlToMarkdown.convert_with_metadata(html)
|
|
366
406
|
# puts "Title: #{metadata['document']['title']}"
|
|
367
407
|
# puts "Headers: #{metadata['headers'].length}"
|
|
408
|
+
#
|
|
409
|
+
# Example with visitor:
|
|
410
|
+
# markdown, metadata = HtmlToMarkdown.convert_with_metadata(html, options, metadata_config, my_visitor)
|
|
368
411
|
def self.convert_with_metadata: (
|
|
369
412
|
String html,
|
|
370
413
|
?conversion_options options,
|
|
371
|
-
?metadata_config metadata_config
|
|
414
|
+
?metadata_config metadata_config,
|
|
415
|
+
?visitor visitor
|
|
372
416
|
) -> [String, extended_metadata]
|
|
373
417
|
def self.convert_with_metadata_handle: (
|
|
374
418
|
String html,
|
|
@@ -377,7 +421,7 @@ module HtmlToMarkdown
|
|
|
377
421
|
) -> [String, extended_metadata]
|
|
378
422
|
|
|
379
423
|
# Instance method versions (created by module_function)
|
|
380
|
-
def convert: (String html, ?conversion_options options) -> String
|
|
424
|
+
def convert: (String html, ?conversion_options options, ?visitor visitor) -> String
|
|
381
425
|
def options: (?conversion_options options_hash) -> Options
|
|
382
426
|
def convert_with_options: (String html, Options options_handle) -> String
|
|
383
427
|
def convert_with_inline_images_handle: (
|
|
@@ -388,13 +432,15 @@ module HtmlToMarkdown
|
|
|
388
432
|
def convert_with_inline_images: (
|
|
389
433
|
String html,
|
|
390
434
|
?conversion_options options,
|
|
391
|
-
?inline_image_config image_config
|
|
435
|
+
?inline_image_config image_config,
|
|
436
|
+
?visitor visitor
|
|
392
437
|
) -> html_extraction
|
|
393
438
|
def convert_with_visitor: (String html, ?conversion_options options, visitor: visitor) -> String
|
|
394
439
|
def convert_with_metadata: (
|
|
395
440
|
String html,
|
|
396
441
|
?conversion_options options,
|
|
397
|
-
?metadata_config metadata_config
|
|
442
|
+
?metadata_config metadata_config,
|
|
443
|
+
?visitor visitor
|
|
398
444
|
) -> [String, extended_metadata]
|
|
399
445
|
def convert_with_metadata_handle: (
|
|
400
446
|
String html,
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html-to-markdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.21.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-01-
|
|
11
|
+
date: 2026-01-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|