html-to-markdown 3.8.0 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/html_to_markdown_rb/Cargo.lock +2 -2
- data/ext/html_to_markdown_rb/Cargo.toml +1 -1
- data/ext/html_to_markdown_rb/native/Cargo.lock +3 -3
- data/ext/html_to_markdown_rb/native/Cargo.toml +3 -3
- data/ext/html_to_markdown_rb/src/lib.rs +9 -135
- data/lib/html_to_markdown/native.rb +106 -49
- data/lib/html_to_markdown/version.rb +2 -2
- data/lib/html_to_markdown.rb +10 -1
- data/lib/html_to_markdown_rb.so +0 -0
- data/sig/html_to_markdown/cli.rbs +21 -21
- data/sig/html_to_markdown/cli_proxy.rbs +26 -26
- data/sig/open3.rbs +9 -9
- data/sig/types.rbs +264 -264
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d001dab2d306ee1cdc794224b24f05739fa0249fd978b401aa605596979f1d1f
|
|
4
|
+
data.tar.gz: 944a3931d94eabeb9c97eeb6cdc376032d88019b01d89d3c4ba4ff53efb731e6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 71aa0c8bf8f700bd91dd3fe9f0a4d4ad01e2cfcf88a78b544b7ffe4fc1bbd6194a2a639210d3b5a7244a3862460acb59417cd4909efab8b3bf183f1088d20d7b
|
|
7
|
+
data.tar.gz: c85fdced163091bc61da0d7c89718570805d80afb1d545daaadaab7780d57a1d06fbb8e3312037716def7eada0ffde2b63c9378f6cbdb3466880e42c1d706d5e
|
|
@@ -252,7 +252,7 @@ dependencies = [
|
|
|
252
252
|
|
|
253
253
|
[[package]]
|
|
254
254
|
name = "html-to-markdown-rb"
|
|
255
|
-
version = "3.
|
|
255
|
+
version = "3.8.1"
|
|
256
256
|
dependencies = [
|
|
257
257
|
"html-to-markdown-rs",
|
|
258
258
|
"magnus",
|
|
@@ -262,7 +262,7 @@ dependencies = [
|
|
|
262
262
|
|
|
263
263
|
[[package]]
|
|
264
264
|
name = "html-to-markdown-rs"
|
|
265
|
-
version = "3.
|
|
265
|
+
version = "3.8.1"
|
|
266
266
|
dependencies = [
|
|
267
267
|
"ahash",
|
|
268
268
|
"astral-tl",
|
|
@@ -263,7 +263,7 @@ dependencies = [
|
|
|
263
263
|
|
|
264
264
|
[[package]]
|
|
265
265
|
name = "html-to-markdown-rb"
|
|
266
|
-
version = "3.8.
|
|
266
|
+
version = "3.8.1"
|
|
267
267
|
dependencies = [
|
|
268
268
|
"async-trait",
|
|
269
269
|
"html-to-markdown-rs",
|
|
@@ -276,9 +276,9 @@ dependencies = [
|
|
|
276
276
|
|
|
277
277
|
[[package]]
|
|
278
278
|
name = "html-to-markdown-rs"
|
|
279
|
-
version = "3.8.
|
|
279
|
+
version = "3.8.1"
|
|
280
280
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
281
|
-
checksum = "
|
|
281
|
+
checksum = "5b8defe72747832a90e54cd6b0daad64f89a970b9b3c977f40ba968d3f7f50c6"
|
|
282
282
|
dependencies = [
|
|
283
283
|
"ahash",
|
|
284
284
|
"astral-tl",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "html-to-markdown-rb"
|
|
3
|
-
version = "3.8.
|
|
3
|
+
version = "3.8.1"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
license = "MIT"
|
|
6
6
|
description = "High-performance HTML to Markdown converter"
|
|
@@ -9,7 +9,7 @@ keywords = ["converter", "html", "markdown"]
|
|
|
9
9
|
categories = []
|
|
10
10
|
|
|
11
11
|
[package.metadata.cargo-machete]
|
|
12
|
-
ignored = ["rb-sys"]
|
|
12
|
+
ignored = ["rb-sys", "async-trait", "tokio"]
|
|
13
13
|
|
|
14
14
|
[lib]
|
|
15
15
|
name = "html_to_markdown_rb"
|
|
@@ -25,7 +25,7 @@ visitor = ["html-to-markdown-rs/visitor"]
|
|
|
25
25
|
|
|
26
26
|
[dependencies]
|
|
27
27
|
async-trait = "0.1"
|
|
28
|
-
html-to-markdown-rs = { version = "3.8.
|
|
28
|
+
html-to-markdown-rs = { version = "3.8.1", features = ["serde", "metadata", "visitor", "inline-images", "testkit"] }
|
|
29
29
|
magnus = "0.8"
|
|
30
30
|
rb-sys = ">=0.9, <0.9.128"
|
|
31
31
|
serde = { version = "1", features = ["derive"] }
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// This file is auto-generated by alef. DO NOT EDIT.
|
|
2
|
-
// alef:hash:
|
|
2
|
+
// alef:hash:526c92c34b7201230da79912b4ec8eb9ecc5c6683951a43cd0234a619377accd
|
|
3
3
|
// Re-generate with: alef generate
|
|
4
4
|
#![allow(dead_code, unused_imports, unused_variables)]
|
|
5
5
|
#![allow(
|
|
@@ -105,19 +105,7 @@ unsafe impl TryConvertOwned for DocumentMetadata {}
|
|
|
105
105
|
|
|
106
106
|
impl Default for DocumentMetadata {
|
|
107
107
|
fn default() -> Self {
|
|
108
|
-
|
|
109
|
-
title: None,
|
|
110
|
-
description: None,
|
|
111
|
-
keywords: vec![],
|
|
112
|
-
author: None,
|
|
113
|
-
canonical_url: None,
|
|
114
|
-
base_href: None,
|
|
115
|
-
language: None,
|
|
116
|
-
text_direction: None,
|
|
117
|
-
open_graph: Default::default(),
|
|
118
|
-
twitter_card: Default::default(),
|
|
119
|
-
meta_tags: Default::default(),
|
|
120
|
-
}
|
|
108
|
+
html_to_markdown_rs::metadata::DocumentMetadata::default().into()
|
|
121
109
|
}
|
|
122
110
|
}
|
|
123
111
|
|
|
@@ -634,13 +622,7 @@ unsafe impl TryConvertOwned for HtmlMetadata {}
|
|
|
634
622
|
|
|
635
623
|
impl Default for HtmlMetadata {
|
|
636
624
|
fn default() -> Self {
|
|
637
|
-
|
|
638
|
-
document: Default::default(),
|
|
639
|
-
headers: vec![],
|
|
640
|
-
links: vec![],
|
|
641
|
-
images: vec![],
|
|
642
|
-
structured_data: vec![],
|
|
643
|
-
}
|
|
625
|
+
html_to_markdown_rs::metadata::HtmlMetadata::default().into()
|
|
644
626
|
}
|
|
645
627
|
}
|
|
646
628
|
|
|
@@ -774,51 +756,7 @@ unsafe impl TryConvertOwned for ConversionOptions {}
|
|
|
774
756
|
|
|
775
757
|
impl Default for ConversionOptions {
|
|
776
758
|
fn default() -> Self {
|
|
777
|
-
|
|
778
|
-
heading_style: Default::default(),
|
|
779
|
-
list_indent_type: Default::default(),
|
|
780
|
-
list_indent_width: 2,
|
|
781
|
-
bullets: "-*+".to_string(),
|
|
782
|
-
strong_em_symbol: "*".to_string(),
|
|
783
|
-
escape_asterisks: false,
|
|
784
|
-
escape_underscores: false,
|
|
785
|
-
escape_misc: false,
|
|
786
|
-
escape_ascii: false,
|
|
787
|
-
code_language: "".to_string(),
|
|
788
|
-
autolinks: true,
|
|
789
|
-
default_title: false,
|
|
790
|
-
br_in_tables: false,
|
|
791
|
-
compact_tables: false,
|
|
792
|
-
highlight_style: Default::default(),
|
|
793
|
-
extract_metadata: true,
|
|
794
|
-
whitespace_mode: Default::default(),
|
|
795
|
-
strip_newlines: false,
|
|
796
|
-
wrap: false,
|
|
797
|
-
wrap_width: 80,
|
|
798
|
-
convert_as_inline: false,
|
|
799
|
-
sub_symbol: "".to_string(),
|
|
800
|
-
sup_symbol: "".to_string(),
|
|
801
|
-
newline_style: NewlineStyle::Spaces,
|
|
802
|
-
code_block_style: Default::default(),
|
|
803
|
-
keep_inline_images_in: vec![],
|
|
804
|
-
preprocessing: Default::default(),
|
|
805
|
-
encoding: "utf-8".to_string(),
|
|
806
|
-
debug: false,
|
|
807
|
-
strip_tags: vec![],
|
|
808
|
-
preserve_tags: vec![],
|
|
809
|
-
skip_images: false,
|
|
810
|
-
url_escape_style: Default::default(),
|
|
811
|
-
link_style: Default::default(),
|
|
812
|
-
output_format: Default::default(),
|
|
813
|
-
include_document_structure: false,
|
|
814
|
-
extract_images: false,
|
|
815
|
-
max_image_size: 5242880,
|
|
816
|
-
capture_svg: false,
|
|
817
|
-
infer_dimensions: true,
|
|
818
|
-
max_depth: None,
|
|
819
|
-
exclude_selectors: vec![],
|
|
820
|
-
tier_strategy: TierStrategy::Auto,
|
|
821
|
-
}
|
|
759
|
+
html_to_markdown_rs::options::ConversionOptions::default().into()
|
|
822
760
|
}
|
|
823
761
|
}
|
|
824
762
|
|
|
@@ -1255,51 +1193,7 @@ unsafe impl TryConvertOwned for ConversionOptionsUpdate {}
|
|
|
1255
1193
|
|
|
1256
1194
|
impl Default for ConversionOptionsUpdate {
|
|
1257
1195
|
fn default() -> Self {
|
|
1258
|
-
|
|
1259
|
-
heading_style: None,
|
|
1260
|
-
list_indent_type: None,
|
|
1261
|
-
list_indent_width: None,
|
|
1262
|
-
bullets: None,
|
|
1263
|
-
strong_em_symbol: None,
|
|
1264
|
-
escape_asterisks: None,
|
|
1265
|
-
escape_underscores: None,
|
|
1266
|
-
escape_misc: None,
|
|
1267
|
-
escape_ascii: None,
|
|
1268
|
-
code_language: None,
|
|
1269
|
-
autolinks: None,
|
|
1270
|
-
default_title: None,
|
|
1271
|
-
br_in_tables: None,
|
|
1272
|
-
compact_tables: None,
|
|
1273
|
-
highlight_style: None,
|
|
1274
|
-
extract_metadata: None,
|
|
1275
|
-
whitespace_mode: None,
|
|
1276
|
-
strip_newlines: None,
|
|
1277
|
-
wrap: None,
|
|
1278
|
-
wrap_width: None,
|
|
1279
|
-
convert_as_inline: None,
|
|
1280
|
-
sub_symbol: None,
|
|
1281
|
-
sup_symbol: None,
|
|
1282
|
-
newline_style: None,
|
|
1283
|
-
code_block_style: None,
|
|
1284
|
-
keep_inline_images_in: None,
|
|
1285
|
-
preprocessing: None,
|
|
1286
|
-
encoding: None,
|
|
1287
|
-
debug: None,
|
|
1288
|
-
strip_tags: None,
|
|
1289
|
-
preserve_tags: None,
|
|
1290
|
-
skip_images: None,
|
|
1291
|
-
url_escape_style: None,
|
|
1292
|
-
link_style: None,
|
|
1293
|
-
output_format: None,
|
|
1294
|
-
include_document_structure: None,
|
|
1295
|
-
extract_images: None,
|
|
1296
|
-
max_image_size: None,
|
|
1297
|
-
capture_svg: None,
|
|
1298
|
-
infer_dimensions: None,
|
|
1299
|
-
max_depth: None,
|
|
1300
|
-
exclude_selectors: None,
|
|
1301
|
-
tier_strategy: None,
|
|
1302
|
-
}
|
|
1196
|
+
html_to_markdown_rs::options::ConversionOptionsUpdate::default().into()
|
|
1303
1197
|
}
|
|
1304
1198
|
}
|
|
1305
1199
|
|
|
@@ -1655,12 +1549,7 @@ unsafe impl TryConvertOwned for PreprocessingOptions {}
|
|
|
1655
1549
|
|
|
1656
1550
|
impl Default for PreprocessingOptions {
|
|
1657
1551
|
fn default() -> Self {
|
|
1658
|
-
|
|
1659
|
-
enabled: true,
|
|
1660
|
-
preset: Default::default(),
|
|
1661
|
-
remove_navigation: true,
|
|
1662
|
-
remove_forms: true,
|
|
1663
|
-
}
|
|
1552
|
+
html_to_markdown_rs::options::PreprocessingOptions::default().into()
|
|
1664
1553
|
}
|
|
1665
1554
|
}
|
|
1666
1555
|
|
|
@@ -1747,12 +1636,7 @@ unsafe impl TryConvertOwned for PreprocessingOptionsUpdate {}
|
|
|
1747
1636
|
|
|
1748
1637
|
impl Default for PreprocessingOptionsUpdate {
|
|
1749
1638
|
fn default() -> Self {
|
|
1750
|
-
|
|
1751
|
-
enabled: None,
|
|
1752
|
-
preset: None,
|
|
1753
|
-
remove_navigation: None,
|
|
1754
|
-
remove_forms: None,
|
|
1755
|
-
}
|
|
1639
|
+
html_to_markdown_rs::options::PreprocessingOptionsUpdate::default().into()
|
|
1756
1640
|
}
|
|
1757
1641
|
}
|
|
1758
1642
|
|
|
@@ -2211,13 +2095,7 @@ unsafe impl TryConvertOwned for ConversionResult {}
|
|
|
2211
2095
|
|
|
2212
2096
|
impl Default for ConversionResult {
|
|
2213
2097
|
fn default() -> Self {
|
|
2214
|
-
|
|
2215
|
-
content: None,
|
|
2216
|
-
document: None,
|
|
2217
|
-
metadata: Default::default(),
|
|
2218
|
-
tables: vec![],
|
|
2219
|
-
warnings: vec![],
|
|
2220
|
-
}
|
|
2098
|
+
html_to_markdown_rs::ConversionResult::default().into()
|
|
2221
2099
|
}
|
|
2222
2100
|
}
|
|
2223
2101
|
|
|
@@ -2314,11 +2192,7 @@ unsafe impl TryConvertOwned for TableGrid {}
|
|
|
2314
2192
|
|
|
2315
2193
|
impl Default for TableGrid {
|
|
2316
2194
|
fn default() -> Self {
|
|
2317
|
-
|
|
2318
|
-
rows: 0,
|
|
2319
|
-
cols: 0,
|
|
2320
|
-
cells: vec![],
|
|
2321
|
-
}
|
|
2195
|
+
html_to_markdown_rs::TableGrid::default().into()
|
|
2322
2196
|
}
|
|
2323
2197
|
}
|
|
2324
2198
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
-
# alef:hash:
|
|
2
|
+
# alef:hash:526c92c34b7201230da79912b4ec8eb9ecc5c6683951a43cd0234a619377accd
|
|
3
3
|
# To regenerate: alef generate
|
|
4
4
|
# To verify freshness: alef verify --exit-code
|
|
5
5
|
# frozen_string_literal: true
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
require "json"
|
|
8
8
|
require "sorbet-runtime"
|
|
9
9
|
require "html_to_markdown_rb"
|
|
10
|
+
|
|
10
11
|
module HtmlToMarkdown
|
|
11
12
|
# Re-export public types from the native extension (curated list, excludes Update/Builder types)
|
|
12
13
|
ConversionOptions = HtmlToMarkdownRs.const_get(:ConversionOptions)
|
|
@@ -29,8 +30,11 @@ module HtmlToMarkdown
|
|
|
29
30
|
TableGrid = HtmlToMarkdownRs.const_get(:TableGrid)
|
|
30
31
|
TextAnnotation = HtmlToMarkdownRs.const_get(:TextAnnotation)
|
|
31
32
|
# Re-export public module functions from the native extension (curated list)
|
|
32
|
-
define_singleton_method(:convert) { |*args, **kwargs, &blk|
|
|
33
|
+
define_singleton_method(:convert) { |*args, **kwargs, &blk|
|
|
34
|
+
HtmlToMarkdownRs.public_send(:convert, *args, **kwargs, &blk)
|
|
35
|
+
}
|
|
33
36
|
end
|
|
37
|
+
|
|
34
38
|
module HtmlToMarkdown
|
|
35
39
|
# The semantic content type of a document node.
|
|
36
40
|
#
|
|
@@ -48,20 +52,34 @@ module HtmlToMarkdown
|
|
|
48
52
|
def self.from_hash(hash)
|
|
49
53
|
discriminator = hash[:node_type] || hash["node_type"]
|
|
50
54
|
case discriminator
|
|
51
|
-
when "heading"
|
|
52
|
-
|
|
53
|
-
when "
|
|
54
|
-
|
|
55
|
-
when "
|
|
56
|
-
|
|
57
|
-
when "
|
|
58
|
-
|
|
59
|
-
when "
|
|
60
|
-
|
|
61
|
-
when "
|
|
62
|
-
|
|
63
|
-
when "
|
|
64
|
-
|
|
55
|
+
when "heading"
|
|
56
|
+
NodeContentHeading.from_hash(hash)
|
|
57
|
+
when "paragraph"
|
|
58
|
+
NodeContentParagraph.from_hash(hash)
|
|
59
|
+
when "list"
|
|
60
|
+
NodeContentList.from_hash(hash)
|
|
61
|
+
when "list_item"
|
|
62
|
+
NodeContentListItem.from_hash(hash)
|
|
63
|
+
when "table"
|
|
64
|
+
NodeContentTable.from_hash(hash)
|
|
65
|
+
when "image"
|
|
66
|
+
NodeContentImage.from_hash(hash)
|
|
67
|
+
when "code"
|
|
68
|
+
NodeContentCode.from_hash(hash)
|
|
69
|
+
when "quote"
|
|
70
|
+
NodeContentQuote.from_hash(hash)
|
|
71
|
+
when "definition_list"
|
|
72
|
+
NodeContentDefinitionList.from_hash(hash)
|
|
73
|
+
when "definition_item"
|
|
74
|
+
NodeContentDefinitionItem.from_hash(hash)
|
|
75
|
+
when "raw_block"
|
|
76
|
+
NodeContentRawBlock.from_hash(hash)
|
|
77
|
+
when "metadata_block"
|
|
78
|
+
NodeContentMetadataBlock.from_hash(hash)
|
|
79
|
+
when "group"
|
|
80
|
+
NodeContentGroup.from_hash(hash)
|
|
81
|
+
else
|
|
82
|
+
raise "Unknown discriminator: #{discriminator}"
|
|
65
83
|
end
|
|
66
84
|
end
|
|
67
85
|
end
|
|
@@ -72,10 +90,12 @@ module HtmlToMarkdown
|
|
|
72
90
|
|
|
73
91
|
# Heading level (1-6).
|
|
74
92
|
sig { returns(Integer) }
|
|
75
|
-
|
|
93
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
94
|
+
def level = super
|
|
76
95
|
# The heading text content.
|
|
77
96
|
sig { returns(String) }
|
|
78
|
-
|
|
97
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
98
|
+
def text = super
|
|
79
99
|
sig { returns(T::Boolean) }
|
|
80
100
|
def heading? = true
|
|
81
101
|
sig { returns(T::Boolean) }
|
|
@@ -116,7 +136,8 @@ module HtmlToMarkdown
|
|
|
116
136
|
|
|
117
137
|
# The paragraph text content.
|
|
118
138
|
sig { returns(String) }
|
|
119
|
-
|
|
139
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
140
|
+
def text = super
|
|
120
141
|
sig { returns(T::Boolean) }
|
|
121
142
|
def heading? = false
|
|
122
143
|
sig { returns(T::Boolean) }
|
|
@@ -157,7 +178,8 @@ module HtmlToMarkdown
|
|
|
157
178
|
|
|
158
179
|
# Whether this is an ordered list.
|
|
159
180
|
sig { returns(T::Boolean) }
|
|
160
|
-
|
|
181
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
182
|
+
def ordered = super
|
|
161
183
|
sig { returns(T::Boolean) }
|
|
162
184
|
def heading? = false
|
|
163
185
|
sig { returns(T::Boolean) }
|
|
@@ -198,7 +220,8 @@ module HtmlToMarkdown
|
|
|
198
220
|
|
|
199
221
|
# The list item text content.
|
|
200
222
|
sig { returns(String) }
|
|
201
|
-
|
|
223
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
224
|
+
def text = super
|
|
202
225
|
sig { returns(T::Boolean) }
|
|
203
226
|
def heading? = false
|
|
204
227
|
sig { returns(T::Boolean) }
|
|
@@ -239,7 +262,8 @@ module HtmlToMarkdown
|
|
|
239
262
|
|
|
240
263
|
# The table grid structure.
|
|
241
264
|
sig { returns(TableGrid) }
|
|
242
|
-
|
|
265
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
266
|
+
def grid = super
|
|
243
267
|
sig { returns(T::Boolean) }
|
|
244
268
|
def heading? = false
|
|
245
269
|
sig { returns(T::Boolean) }
|
|
@@ -280,13 +304,16 @@ module HtmlToMarkdown
|
|
|
280
304
|
|
|
281
305
|
# Alt text or caption.
|
|
282
306
|
sig { returns(T.nilable(String)) }
|
|
283
|
-
|
|
307
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
308
|
+
def description = super
|
|
284
309
|
# Image source URL.
|
|
285
310
|
sig { returns(T.nilable(String)) }
|
|
286
|
-
|
|
311
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
312
|
+
def src = super
|
|
287
313
|
# Index into `ConversionResult.images` when image extraction is enabled.
|
|
288
314
|
sig { returns(T.nilable(Integer)) }
|
|
289
|
-
|
|
315
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
316
|
+
def image_index = super
|
|
290
317
|
sig { returns(T::Boolean) }
|
|
291
318
|
def heading? = false
|
|
292
319
|
sig { returns(T::Boolean) }
|
|
@@ -317,7 +344,11 @@ module HtmlToMarkdown
|
|
|
317
344
|
# @return [self]
|
|
318
345
|
sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
|
|
319
346
|
def self.from_hash(hash)
|
|
320
|
-
new(
|
|
347
|
+
new(
|
|
348
|
+
description: hash[:description] || hash["description"],
|
|
349
|
+
src: hash[:src] || hash["src"],
|
|
350
|
+
image_index: hash[:image_index] || hash["image_index"]
|
|
351
|
+
)
|
|
321
352
|
end
|
|
322
353
|
end
|
|
323
354
|
## A code block or inline code.
|
|
@@ -327,10 +358,12 @@ module HtmlToMarkdown
|
|
|
327
358
|
|
|
328
359
|
# The code text content.
|
|
329
360
|
sig { returns(String) }
|
|
330
|
-
|
|
361
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
362
|
+
def text = super
|
|
331
363
|
# Programming language (from class="language-*" or similar).
|
|
332
364
|
sig { returns(T.nilable(String)) }
|
|
333
|
-
|
|
365
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
366
|
+
def language = super
|
|
334
367
|
sig { returns(T::Boolean) }
|
|
335
368
|
def heading? = false
|
|
336
369
|
sig { returns(T::Boolean) }
|
|
@@ -447,10 +480,12 @@ module HtmlToMarkdown
|
|
|
447
480
|
|
|
448
481
|
# The term being defined.
|
|
449
482
|
sig { returns(String) }
|
|
450
|
-
|
|
483
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
484
|
+
def term = super
|
|
451
485
|
# The definition text.
|
|
452
486
|
sig { returns(String) }
|
|
453
|
-
|
|
487
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
488
|
+
def definition = super
|
|
454
489
|
sig { returns(T::Boolean) }
|
|
455
490
|
def heading? = false
|
|
456
491
|
sig { returns(T::Boolean) }
|
|
@@ -491,10 +526,12 @@ module HtmlToMarkdown
|
|
|
491
526
|
|
|
492
527
|
# The format of the raw content (e.g. "html", "css", "javascript").
|
|
493
528
|
sig { returns(String) }
|
|
494
|
-
|
|
529
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
530
|
+
def format = super
|
|
495
531
|
# The raw content.
|
|
496
532
|
sig { returns(String) }
|
|
497
|
-
|
|
533
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
534
|
+
def content = super
|
|
498
535
|
sig { returns(T::Boolean) }
|
|
499
536
|
def heading? = false
|
|
500
537
|
sig { returns(T::Boolean) }
|
|
@@ -535,7 +572,8 @@ module HtmlToMarkdown
|
|
|
535
572
|
|
|
536
573
|
# Key-value metadata pairs.
|
|
537
574
|
sig { returns(T::Array[MetadataEntry]) }
|
|
538
|
-
|
|
575
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
576
|
+
def entries = super
|
|
539
577
|
sig { returns(T::Boolean) }
|
|
540
578
|
def heading? = false
|
|
541
579
|
sig { returns(T::Boolean) }
|
|
@@ -576,13 +614,16 @@ module HtmlToMarkdown
|
|
|
576
614
|
|
|
577
615
|
# Optional section label.
|
|
578
616
|
sig { returns(T.nilable(String)) }
|
|
579
|
-
|
|
617
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
618
|
+
def label = super
|
|
580
619
|
# The heading level that created this group.
|
|
581
620
|
sig { returns(T.nilable(Integer)) }
|
|
582
|
-
|
|
621
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
622
|
+
def heading_level = super
|
|
583
623
|
# The heading text that created this group.
|
|
584
624
|
sig { returns(T.nilable(String)) }
|
|
585
|
-
|
|
625
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
626
|
+
def heading_text = super
|
|
586
627
|
sig { returns(T::Boolean) }
|
|
587
628
|
def heading? = false
|
|
588
629
|
sig { returns(T::Boolean) }
|
|
@@ -613,7 +654,11 @@ module HtmlToMarkdown
|
|
|
613
654
|
# @return [self]
|
|
614
655
|
sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
|
|
615
656
|
def self.from_hash(hash)
|
|
616
|
-
new(
|
|
657
|
+
new(
|
|
658
|
+
label: hash[:label] || hash["label"],
|
|
659
|
+
heading_level: hash[:heading_level] || hash["heading_level"],
|
|
660
|
+
heading_text: hash[:heading_text] || hash["heading_text"]
|
|
661
|
+
)
|
|
617
662
|
end
|
|
618
663
|
end
|
|
619
664
|
end
|
|
@@ -635,16 +680,26 @@ module HtmlToMarkdown
|
|
|
635
680
|
def self.from_hash(hash)
|
|
636
681
|
discriminator = hash[:annotation_type] || hash["annotation_type"]
|
|
637
682
|
case discriminator
|
|
638
|
-
when "bold"
|
|
639
|
-
|
|
640
|
-
when "
|
|
641
|
-
|
|
642
|
-
when "
|
|
643
|
-
|
|
644
|
-
when "
|
|
645
|
-
|
|
646
|
-
when "
|
|
647
|
-
|
|
683
|
+
when "bold"
|
|
684
|
+
AnnotationKindBold.from_hash(hash)
|
|
685
|
+
when "italic"
|
|
686
|
+
AnnotationKindItalic.from_hash(hash)
|
|
687
|
+
when "underline"
|
|
688
|
+
AnnotationKindUnderline.from_hash(hash)
|
|
689
|
+
when "strikethrough"
|
|
690
|
+
AnnotationKindStrikethrough.from_hash(hash)
|
|
691
|
+
when "code"
|
|
692
|
+
AnnotationKindCode.from_hash(hash)
|
|
693
|
+
when "subscript"
|
|
694
|
+
AnnotationKindSubscript.from_hash(hash)
|
|
695
|
+
when "superscript"
|
|
696
|
+
AnnotationKindSuperscript.from_hash(hash)
|
|
697
|
+
when "highlight"
|
|
698
|
+
AnnotationKindHighlight.from_hash(hash)
|
|
699
|
+
when "link"
|
|
700
|
+
AnnotationKindLink.from_hash(hash)
|
|
701
|
+
else
|
|
702
|
+
raise "Unknown discriminator: #{discriminator}"
|
|
648
703
|
end
|
|
649
704
|
end
|
|
650
705
|
end
|
|
@@ -900,13 +955,15 @@ module HtmlToMarkdown
|
|
|
900
955
|
# written in the source. Callers that need an absolute URL must resolve it against the
|
|
901
956
|
# document base URL themselves.
|
|
902
957
|
sig { returns(String) }
|
|
903
|
-
|
|
958
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
959
|
+
def url = super
|
|
904
960
|
# The `title` attribute of the `<a>` element, if present.
|
|
905
961
|
#
|
|
906
962
|
# `None` when the `<a>` tag has no `title="..."` attribute. When present, the value
|
|
907
963
|
# is copied verbatim — HTML entities within the title are not decoded.
|
|
908
964
|
sig { returns(T.nilable(String)) }
|
|
909
|
-
|
|
965
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
966
|
+
def title = super
|
|
910
967
|
sig { returns(T::Boolean) }
|
|
911
968
|
def bold? = false
|
|
912
969
|
sig { returns(T::Boolean) }
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
-
# alef:hash:
|
|
2
|
+
# alef:hash:526c92c34b7201230da79912b4ec8eb9ecc5c6683951a43cd0234a619377accd
|
|
3
3
|
# To regenerate: alef generate
|
|
4
4
|
# To verify freshness: alef verify --exit-code
|
|
5
5
|
# frozen_string_literal: true
|
|
6
6
|
|
|
7
7
|
module HtmlToMarkdown
|
|
8
8
|
## The version string for this package.
|
|
9
|
-
VERSION = "3.8.
|
|
9
|
+
VERSION = "3.8.1"
|
|
10
10
|
end
|
data/lib/html_to_markdown.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
-
# alef:hash:
|
|
2
|
+
# alef:hash:526c92c34b7201230da79912b4ec8eb9ecc5c6683951a43cd0234a619377accd
|
|
3
3
|
# To regenerate: alef generate
|
|
4
4
|
# To verify freshness: alef verify --exit-code
|
|
5
5
|
# frozen_string_literal: true
|
|
@@ -16,3 +16,12 @@ require_relative "html_to_markdown/native"
|
|
|
16
16
|
module HtmlToMarkdown
|
|
17
17
|
# Re-export all types and functions from native extension
|
|
18
18
|
end
|
|
19
|
+
|
|
20
|
+
# Bring top-level HtmlToMarkdown classes into the global namespace so callers
|
|
21
|
+
# (and the generated e2e suite) can reference them unqualified. The native
|
|
22
|
+
# extension has already been required above, so every type constant is defined
|
|
23
|
+
# under HtmlToMarkdown by this point.
|
|
24
|
+
HtmlToMarkdown.constants.each do |const_name|
|
|
25
|
+
value = HtmlToMarkdown.const_get(const_name)
|
|
26
|
+
::Object.const_set(const_name, value) if value.is_a?(Module) && !::Object.const_defined?(const_name)
|
|
27
|
+
end
|
data/lib/html_to_markdown_rb.so
CHANGED
|
Binary file
|
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
module HtmlToMarkdown
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
2
|
+
module CLI
|
|
3
|
+
# Module method (module_function creates both module and instance methods)
|
|
4
|
+
#
|
|
5
|
+
# Run the CLI with the given arguments
|
|
6
|
+
#
|
|
7
|
+
# @param argv Command-line arguments (defaults to ARGV)
|
|
8
|
+
# @param stdout Output stream for standard output
|
|
9
|
+
# @param stderr Output stream for standard error
|
|
10
|
+
# @return Exit code (0 for success, non-zero for failure)
|
|
11
|
+
def self.run: (
|
|
12
|
+
?Array[String] argv,
|
|
13
|
+
?stdout: IO,
|
|
14
|
+
?stderr: IO
|
|
15
|
+
) -> Integer
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
17
|
+
# Instance method version (created by module_function)
|
|
18
|
+
def run: (
|
|
19
|
+
?Array[String] argv,
|
|
20
|
+
?stdout: IO,
|
|
21
|
+
?stderr: IO
|
|
22
|
+
) -> Integer
|
|
23
|
+
end
|
|
24
24
|
end
|