html-to-markdown 3.4.0.pre.rc.24-aarch64-linux → 3.4.0.pre.rc.30-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Steepfile +0 -26
- data/lib/bin/html-to-markdown +0 -0
- data/lib/html_to_markdown/native.rb +21 -0
- data/lib/html_to_markdown/version.rb +6 -1
- data/lib/html_to_markdown.rb +7 -34
- data/lib/html_to_markdown_rb.so +0 -0
- data/sig/types.rbs +142 -302
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0dc08795dc2bd8efce4fa93d0cd32f48820e5e62006a1ec30de1db83a0756aa6
|
|
4
|
+
data.tar.gz: f69ec46c76e87bedcf33032daff71050bc687c837b184ba434d24a2b8bf8a13c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d4c0ec31bc6b61106b1203c2b3d5aaef2900e9bf930ebc8c135258f9d85a70c73397e9a2cdfb087a182d235110f49603beb6c71d8b79c3d058f02f6e8b03a7e8
|
|
7
|
+
data.tar.gz: 90f3e6a64111b2dd63c8521205ab906ddf0c95d6c718910baf37a64826d52a6103c3efa81cb111e071624439f3451dd22f9d4466621855ea2615c027b4e07abb
|
data/Steepfile
CHANGED
|
@@ -1,32 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# Steepfile for type checking html-to-markdown Ruby gem
|
|
4
|
-
|
|
5
3
|
target :lib do
|
|
6
4
|
signature 'sig'
|
|
7
|
-
|
|
8
5
|
check 'lib'
|
|
9
|
-
|
|
10
|
-
configure_code_diagnostics do |hash|
|
|
11
|
-
hash[Steep::Diagnostic::Ruby::UnannotatedEmptyCollection] = :hint
|
|
12
|
-
hash[Steep::Diagnostic::Ruby::UnknownConstant] = :hint
|
|
13
|
-
hash[Steep::Diagnostic::Ruby::NoMethod] = :hint
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
# Configure libraries
|
|
17
|
-
library 'pathname'
|
|
18
|
-
library 'open3'
|
|
19
|
-
|
|
20
|
-
# Ignore vendor directory
|
|
21
|
-
ignore 'vendor'
|
|
22
|
-
|
|
23
|
-
# Ignore spec directory
|
|
24
|
-
ignore 'spec'
|
|
25
|
-
|
|
26
|
-
# Ignore bin directory
|
|
27
|
-
ignore 'bin'
|
|
28
|
-
|
|
29
|
-
# Ignore internal implementation modules (not public API)
|
|
30
|
-
ignore 'lib/html_to_markdown/cli.rb'
|
|
31
|
-
ignore 'lib/html_to_markdown/cli_proxy.rb'
|
|
32
6
|
end
|
data/lib/bin/html-to-markdown
CHANGED
|
Binary file
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
+
# alef:hash:990c47eb4d87f0600f1e6da62d32bf37c84cf430faf31d78f5df7425a93a0f0c
|
|
3
|
+
# To regenerate: alef generate
|
|
4
|
+
# To verify freshness: alef verify --exit-code
|
|
5
|
+
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
6
|
+
# frozen_string_literal: true
|
|
7
|
+
|
|
8
|
+
require 'json'
|
|
9
|
+
require 'html_to_markdown_rb'
|
|
10
|
+
|
|
11
|
+
module HtmlToMarkdown
|
|
12
|
+
# Re-export all public module functions from the native extension
|
|
13
|
+
HtmlToMarkdownRs.methods(false).each do |m|
|
|
14
|
+
define_singleton_method(m) { |*args, **kwargs, &blk| HtmlToMarkdownRs.public_send(m, *args, **kwargs, &blk) }
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Re-export all constants (classes, structs, etc.) from the native extension
|
|
18
|
+
HtmlToMarkdownRs.constants.each do |c|
|
|
19
|
+
const_set(c, HtmlToMarkdownRs.const_get(c)) unless const_defined?(c)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -1,5 +1,10 @@
|
|
|
1
|
+
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
+
# alef:hash:bf6359c7254886342acb441f675467dc2b2e926e46a52f7006642ddfd64583f8
|
|
3
|
+
# To regenerate: alef generate
|
|
4
|
+
# To verify freshness: alef verify --exit-code
|
|
5
|
+
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
1
6
|
# frozen_string_literal: true
|
|
2
7
|
|
|
3
8
|
module HtmlToMarkdown
|
|
4
|
-
VERSION = '3.4.0.pre.rc.
|
|
9
|
+
VERSION = '3.4.0.pre.rc.30'
|
|
5
10
|
end
|
data/lib/html_to_markdown.rb
CHANGED
|
@@ -1,40 +1,13 @@
|
|
|
1
|
+
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
+
# alef:hash:b671355c68864d5f935b91f875ab29144d9543baad5a955cd926ab9881762a19
|
|
3
|
+
# To regenerate: alef generate
|
|
4
|
+
# To verify freshness: alef verify --exit-code
|
|
5
|
+
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
1
6
|
# frozen_string_literal: true
|
|
2
7
|
|
|
3
8
|
require_relative 'html_to_markdown/version'
|
|
4
|
-
|
|
5
|
-
require 'json'
|
|
9
|
+
require_relative 'html_to_markdown/native'
|
|
6
10
|
|
|
7
|
-
# High-performance HTML to Markdown conversion.
|
|
8
|
-
#
|
|
9
|
-
# @example Simple conversion
|
|
10
|
-
# HtmlToMarkdown.convert('<h1>Hello</h1>') # => "# Hello\n\n"
|
|
11
|
-
#
|
|
12
|
-
# @example With options
|
|
13
|
-
# HtmlToMarkdown.convert('<h1>Hello</h1>', heading_style: 'atx')
|
|
14
11
|
module HtmlToMarkdown
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
# @param html [String] The HTML content to convert.
|
|
18
|
-
# @param options [Hash] Optional conversion options.
|
|
19
|
-
# Supported keys (all optional):
|
|
20
|
-
# - :heading_style - 'atx', 'atx_closed', 'setext', 'underlined'
|
|
21
|
-
# - :code_block_style - 'backticks', 'tildes', 'indented'
|
|
22
|
-
# - :escape_asterisks - Boolean
|
|
23
|
-
# - :escape_underscores - Boolean
|
|
24
|
-
# - :escape_misc - Boolean
|
|
25
|
-
# - :escape_ascii - Boolean
|
|
26
|
-
# - :strip_newlines - Boolean
|
|
27
|
-
# - :keep_inline_images_in - Array of tag names
|
|
28
|
-
# - :strip_tags - Array of tag names to strip
|
|
29
|
-
# - :preserve_tags - Array of tag names to preserve verbatim
|
|
30
|
-
# (and more, matching ConversionOptions fields)
|
|
31
|
-
# @return [String] The converted Markdown content.
|
|
32
|
-
def self.convert(html, options = {}, visitor = nil)
|
|
33
|
-
# The Rust FFI expects options as a JSON string; serialise the hash here
|
|
34
|
-
# rather than constructing a ConversionOptions object, which the generated
|
|
35
|
-
# FFI layer cannot coerce back to String (see issue #334).
|
|
36
|
-
opts_json = options.nil? || options.empty? ? nil : options.to_json
|
|
37
|
-
result = HtmlToMarkdownRs.convert(html, opts_json, visitor)
|
|
38
|
-
result.content || ''
|
|
39
|
-
end
|
|
12
|
+
# Re-export all types and functions from native extension
|
|
40
13
|
end
|
data/lib/html_to_markdown_rb.so
CHANGED
|
Binary file
|
data/sig/types.rbs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
-
# alef:hash:
|
|
2
|
+
# alef:hash:da88db156d77eefe37cfd0ca53ea75c07abbc5d3ebb7ad977060f871af4c9ff3
|
|
3
3
|
# To regenerate: alef generate
|
|
4
4
|
# To verify freshness: alef verify --exit-code
|
|
5
5
|
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
@@ -17,7 +17,6 @@ module HtmlToMarkdown
|
|
|
17
17
|
# # Examples
|
|
18
18
|
#
|
|
19
19
|
# ```
|
|
20
|
-
# # use html_to_markdown_rs::metadata::DocumentMetadata;
|
|
21
20
|
# let doc = DocumentMetadata {
|
|
22
21
|
# title: Some("My Article".to_string()),
|
|
23
22
|
# description: Some("A great article about Rust".to_string()),
|
|
@@ -28,17 +27,17 @@ module HtmlToMarkdown
|
|
|
28
27
|
# assert_eq!(doc.title, Some("My Article".to_string()));
|
|
29
28
|
# ```
|
|
30
29
|
|
|
31
|
-
attr_accessor title: String
|
|
32
|
-
attr_accessor description: String
|
|
33
|
-
attr_accessor keywords: Array[String]
|
|
34
|
-
attr_accessor author: String
|
|
35
|
-
attr_accessor canonical_url: String
|
|
36
|
-
attr_accessor base_href: String
|
|
37
|
-
attr_accessor language: String
|
|
38
|
-
attr_accessor text_direction: TextDirection
|
|
39
|
-
attr_accessor open_graph: Hash[String, String]
|
|
40
|
-
attr_accessor twitter_card: Hash[String, String]
|
|
41
|
-
attr_accessor meta_tags: Hash[String, String]
|
|
30
|
+
attr_accessor title: String?
|
|
31
|
+
attr_accessor description: String?
|
|
32
|
+
attr_accessor keywords: Array[String]?
|
|
33
|
+
attr_accessor author: String?
|
|
34
|
+
attr_accessor canonical_url: String?
|
|
35
|
+
attr_accessor base_href: String?
|
|
36
|
+
attr_accessor language: String?
|
|
37
|
+
attr_accessor text_direction: TextDirection?
|
|
38
|
+
attr_accessor open_graph: Hash[String, String]?
|
|
39
|
+
attr_accessor twitter_card: Hash[String, String]?
|
|
40
|
+
attr_accessor meta_tags: Hash[String, String]?
|
|
42
41
|
|
|
43
42
|
def initialize: (?title: String, ?description: String, keywords: Array[String], ?author: String, ?canonical_url: String, ?base_href: String, ?language: String, ?text_direction: TextDirection, open_graph: Hash[String, String], twitter_card: Hash[String, String], meta_tags: Hash[String, String]) -> void
|
|
44
43
|
end
|
|
@@ -52,7 +51,6 @@ module HtmlToMarkdown
|
|
|
52
51
|
# # Examples
|
|
53
52
|
#
|
|
54
53
|
# ```
|
|
55
|
-
# # use html_to_markdown_rs::metadata::HeaderMetadata;
|
|
56
54
|
# let header = HeaderMetadata {
|
|
57
55
|
# level: 1,
|
|
58
56
|
# text: "Main Title".to_string(),
|
|
@@ -83,7 +81,6 @@ module HtmlToMarkdown
|
|
|
83
81
|
# # Examples
|
|
84
82
|
#
|
|
85
83
|
# ```
|
|
86
|
-
# # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
|
|
87
84
|
# let link = LinkMetadata {
|
|
88
85
|
# href: "https://example.com".to_string(),
|
|
89
86
|
# text: "Example".to_string(),
|
|
@@ -117,7 +114,6 @@ module HtmlToMarkdown
|
|
|
117
114
|
# # Examples
|
|
118
115
|
#
|
|
119
116
|
# ```
|
|
120
|
-
# # use html_to_markdown_rs::metadata::{ImageMetadata, ImageType};
|
|
121
117
|
# let img = ImageMetadata {
|
|
122
118
|
# src: "https://example.com/image.jpg".to_string(),
|
|
123
119
|
# alt: Some("An example image".to_string()),
|
|
@@ -149,7 +145,6 @@ module HtmlToMarkdown
|
|
|
149
145
|
# # Examples
|
|
150
146
|
#
|
|
151
147
|
# ```
|
|
152
|
-
# # use html_to_markdown_rs::metadata::{StructuredData, StructuredDataType};
|
|
153
148
|
# let schema = StructuredData {
|
|
154
149
|
# data_type: StructuredDataType::JsonLd,
|
|
155
150
|
# raw_json: r#"{"@context":"https://schema.org","@type":"Article"}"#.to_string(),
|
|
@@ -175,7 +170,6 @@ module HtmlToMarkdown
|
|
|
175
170
|
# # Examples
|
|
176
171
|
#
|
|
177
172
|
# ```
|
|
178
|
-
# # use html_to_markdown_rs::metadata::HtmlMetadata;
|
|
179
173
|
# let metadata = HtmlMetadata {
|
|
180
174
|
# document: Default::default(),
|
|
181
175
|
# headers: Vec::new(),
|
|
@@ -187,11 +181,11 @@ module HtmlToMarkdown
|
|
|
187
181
|
# assert!(metadata.headers.is_empty());
|
|
188
182
|
# ```
|
|
189
183
|
|
|
190
|
-
attr_accessor document: DocumentMetadata
|
|
191
|
-
attr_accessor headers: Array[HeaderMetadata]
|
|
192
|
-
attr_accessor links: Array[LinkMetadata]
|
|
193
|
-
attr_accessor images: Array[ImageMetadata]
|
|
194
|
-
attr_accessor structured_data: Array[StructuredData]
|
|
184
|
+
attr_accessor document: DocumentMetadata?
|
|
185
|
+
attr_accessor headers: Array[HeaderMetadata]?
|
|
186
|
+
attr_accessor links: Array[LinkMetadata]?
|
|
187
|
+
attr_accessor images: Array[ImageMetadata]?
|
|
188
|
+
attr_accessor structured_data: Array[StructuredData]?
|
|
195
189
|
|
|
196
190
|
def initialize: (document: DocumentMetadata, headers: Array[HeaderMetadata], links: Array[LinkMetadata], images: Array[ImageMetadata], structured_data: Array[StructuredData]) -> void
|
|
197
191
|
end
|
|
@@ -213,48 +207,49 @@ module HtmlToMarkdown
|
|
|
213
207
|
# .build();
|
|
214
208
|
# ```
|
|
215
209
|
|
|
216
|
-
attr_accessor heading_style: HeadingStyle
|
|
217
|
-
attr_accessor list_indent_type: ListIndentType
|
|
218
|
-
attr_accessor list_indent_width: Integer
|
|
219
|
-
attr_accessor bullets: String
|
|
220
|
-
attr_accessor strong_em_symbol: String
|
|
221
|
-
attr_accessor escape_asterisks: bool
|
|
222
|
-
attr_accessor escape_underscores: bool
|
|
223
|
-
attr_accessor escape_misc: bool
|
|
224
|
-
attr_accessor escape_ascii: bool
|
|
225
|
-
attr_accessor code_language: String
|
|
226
|
-
attr_accessor autolinks: bool
|
|
227
|
-
attr_accessor default_title: bool
|
|
228
|
-
attr_accessor br_in_tables: bool
|
|
229
|
-
attr_accessor highlight_style: HighlightStyle
|
|
230
|
-
attr_accessor extract_metadata: bool
|
|
231
|
-
attr_accessor whitespace_mode: WhitespaceMode
|
|
232
|
-
attr_accessor strip_newlines: bool
|
|
233
|
-
attr_accessor wrap: bool
|
|
234
|
-
attr_accessor wrap_width: Integer
|
|
235
|
-
attr_accessor convert_as_inline: bool
|
|
236
|
-
attr_accessor sub_symbol: String
|
|
237
|
-
attr_accessor sup_symbol: String
|
|
238
|
-
attr_accessor newline_style: NewlineStyle
|
|
239
|
-
attr_accessor code_block_style: CodeBlockStyle
|
|
240
|
-
attr_accessor keep_inline_images_in: Array[String]
|
|
241
|
-
attr_accessor preprocessing: PreprocessingOptions
|
|
242
|
-
attr_accessor encoding: String
|
|
243
|
-
attr_accessor debug: bool
|
|
244
|
-
attr_accessor strip_tags: Array[String]
|
|
245
|
-
attr_accessor preserve_tags: Array[String]
|
|
246
|
-
attr_accessor skip_images: bool
|
|
247
|
-
attr_accessor link_style: LinkStyle
|
|
248
|
-
attr_accessor output_format: OutputFormat
|
|
249
|
-
attr_accessor include_document_structure: bool
|
|
250
|
-
attr_accessor extract_images: bool
|
|
251
|
-
attr_accessor max_image_size: Integer
|
|
252
|
-
attr_accessor capture_svg: bool
|
|
253
|
-
attr_accessor infer_dimensions: bool
|
|
254
|
-
attr_accessor max_depth: Integer
|
|
255
|
-
attr_accessor exclude_selectors: Array[String]
|
|
256
|
-
|
|
257
|
-
|
|
210
|
+
attr_accessor heading_style: HeadingStyle?
|
|
211
|
+
attr_accessor list_indent_type: ListIndentType?
|
|
212
|
+
attr_accessor list_indent_width: Integer?
|
|
213
|
+
attr_accessor bullets: String?
|
|
214
|
+
attr_accessor strong_em_symbol: String?
|
|
215
|
+
attr_accessor escape_asterisks: bool?
|
|
216
|
+
attr_accessor escape_underscores: bool?
|
|
217
|
+
attr_accessor escape_misc: bool?
|
|
218
|
+
attr_accessor escape_ascii: bool?
|
|
219
|
+
attr_accessor code_language: String?
|
|
220
|
+
attr_accessor autolinks: bool?
|
|
221
|
+
attr_accessor default_title: bool?
|
|
222
|
+
attr_accessor br_in_tables: bool?
|
|
223
|
+
attr_accessor highlight_style: HighlightStyle?
|
|
224
|
+
attr_accessor extract_metadata: bool?
|
|
225
|
+
attr_accessor whitespace_mode: WhitespaceMode?
|
|
226
|
+
attr_accessor strip_newlines: bool?
|
|
227
|
+
attr_accessor wrap: bool?
|
|
228
|
+
attr_accessor wrap_width: Integer?
|
|
229
|
+
attr_accessor convert_as_inline: bool?
|
|
230
|
+
attr_accessor sub_symbol: String?
|
|
231
|
+
attr_accessor sup_symbol: String?
|
|
232
|
+
attr_accessor newline_style: NewlineStyle?
|
|
233
|
+
attr_accessor code_block_style: CodeBlockStyle?
|
|
234
|
+
attr_accessor keep_inline_images_in: Array[String]?
|
|
235
|
+
attr_accessor preprocessing: PreprocessingOptions?
|
|
236
|
+
attr_accessor encoding: String?
|
|
237
|
+
attr_accessor debug: bool?
|
|
238
|
+
attr_accessor strip_tags: Array[String]?
|
|
239
|
+
attr_accessor preserve_tags: Array[String]?
|
|
240
|
+
attr_accessor skip_images: bool?
|
|
241
|
+
attr_accessor link_style: LinkStyle?
|
|
242
|
+
attr_accessor output_format: OutputFormat?
|
|
243
|
+
attr_accessor include_document_structure: bool?
|
|
244
|
+
attr_accessor extract_images: bool?
|
|
245
|
+
attr_accessor max_image_size: Integer?
|
|
246
|
+
attr_accessor capture_svg: bool?
|
|
247
|
+
attr_accessor infer_dimensions: bool?
|
|
248
|
+
attr_accessor max_depth: Integer?
|
|
249
|
+
attr_accessor exclude_selectors: Array[String]?
|
|
250
|
+
attr_accessor visitor: VisitorHandle?
|
|
251
|
+
|
|
252
|
+
def initialize: (heading_style: HeadingStyle, list_indent_type: ListIndentType, list_indent_width: Integer, bullets: String, strong_em_symbol: String, escape_asterisks: bool, escape_underscores: bool, escape_misc: bool, escape_ascii: bool, code_language: String, autolinks: bool, default_title: bool, br_in_tables: bool, highlight_style: HighlightStyle, extract_metadata: bool, whitespace_mode: WhitespaceMode, strip_newlines: bool, wrap: bool, wrap_width: Integer, convert_as_inline: bool, sub_symbol: String, sup_symbol: String, newline_style: NewlineStyle, code_block_style: CodeBlockStyle, keep_inline_images_in: Array[String], preprocessing: PreprocessingOptions, encoding: String, debug: bool, strip_tags: Array[String], preserve_tags: Array[String], skip_images: bool, link_style: LinkStyle, output_format: OutputFormat, include_document_structure: bool, extract_images: bool, max_image_size: Integer, capture_svg: bool, infer_dimensions: bool, ?max_depth: Integer, exclude_selectors: Array[String], ?visitor: VisitorHandle) -> void
|
|
258
253
|
def apply_update: (ConversionOptionsUpdate update) -> void
|
|
259
254
|
def self.default: () -> ConversionOptions
|
|
260
255
|
def self.builder: () -> ConversionOptionsBuilder
|
|
@@ -271,6 +266,7 @@ module HtmlToMarkdown
|
|
|
271
266
|
def preserve_tags: (Array[String] tags) -> ConversionOptionsBuilder
|
|
272
267
|
def keep_inline_images_in: (Array[String] tags) -> ConversionOptionsBuilder
|
|
273
268
|
def exclude_selectors: (Array[String] selectors) -> ConversionOptionsBuilder
|
|
269
|
+
def visitor: (?VisitorHandle visitor) -> ConversionOptionsBuilder
|
|
274
270
|
def preprocessing: (PreprocessingOptions preprocessing) -> ConversionOptionsBuilder
|
|
275
271
|
def build: () -> ConversionOptions
|
|
276
272
|
end
|
|
@@ -281,57 +277,58 @@ module HtmlToMarkdown
|
|
|
281
277
|
# Uses `Option<T>` fields for selective updates. Bindings use this to construct
|
|
282
278
|
# options from language-native types. Prefer [`ConversionOptionsBuilder`] for Rust code.
|
|
283
279
|
|
|
284
|
-
attr_accessor heading_style: HeadingStyle
|
|
285
|
-
attr_accessor list_indent_type: ListIndentType
|
|
286
|
-
attr_accessor list_indent_width: Integer
|
|
287
|
-
attr_accessor bullets: String
|
|
288
|
-
attr_accessor strong_em_symbol: String
|
|
289
|
-
attr_accessor escape_asterisks: bool
|
|
290
|
-
attr_accessor escape_underscores: bool
|
|
291
|
-
attr_accessor escape_misc: bool
|
|
292
|
-
attr_accessor escape_ascii: bool
|
|
293
|
-
attr_accessor code_language: String
|
|
294
|
-
attr_accessor autolinks: bool
|
|
295
|
-
attr_accessor default_title: bool
|
|
296
|
-
attr_accessor br_in_tables: bool
|
|
297
|
-
attr_accessor highlight_style: HighlightStyle
|
|
298
|
-
attr_accessor extract_metadata: bool
|
|
299
|
-
attr_accessor whitespace_mode: WhitespaceMode
|
|
300
|
-
attr_accessor strip_newlines: bool
|
|
301
|
-
attr_accessor wrap: bool
|
|
302
|
-
attr_accessor wrap_width: Integer
|
|
303
|
-
attr_accessor convert_as_inline: bool
|
|
304
|
-
attr_accessor sub_symbol: String
|
|
305
|
-
attr_accessor sup_symbol: String
|
|
306
|
-
attr_accessor newline_style: NewlineStyle
|
|
307
|
-
attr_accessor code_block_style: CodeBlockStyle
|
|
308
|
-
attr_accessor keep_inline_images_in: Array[String]
|
|
309
|
-
attr_accessor preprocessing: PreprocessingOptionsUpdate
|
|
310
|
-
attr_accessor encoding: String
|
|
311
|
-
attr_accessor debug: bool
|
|
312
|
-
attr_accessor strip_tags: Array[String]
|
|
313
|
-
attr_accessor preserve_tags: Array[String]
|
|
314
|
-
attr_accessor skip_images: bool
|
|
315
|
-
attr_accessor link_style: LinkStyle
|
|
316
|
-
attr_accessor output_format: OutputFormat
|
|
317
|
-
attr_accessor include_document_structure: bool
|
|
318
|
-
attr_accessor extract_images: bool
|
|
319
|
-
attr_accessor max_image_size: Integer
|
|
320
|
-
attr_accessor capture_svg: bool
|
|
321
|
-
attr_accessor infer_dimensions: bool
|
|
280
|
+
attr_accessor heading_style: HeadingStyle?
|
|
281
|
+
attr_accessor list_indent_type: ListIndentType?
|
|
282
|
+
attr_accessor list_indent_width: Integer?
|
|
283
|
+
attr_accessor bullets: String?
|
|
284
|
+
attr_accessor strong_em_symbol: String?
|
|
285
|
+
attr_accessor escape_asterisks: bool?
|
|
286
|
+
attr_accessor escape_underscores: bool?
|
|
287
|
+
attr_accessor escape_misc: bool?
|
|
288
|
+
attr_accessor escape_ascii: bool?
|
|
289
|
+
attr_accessor code_language: String?
|
|
290
|
+
attr_accessor autolinks: bool?
|
|
291
|
+
attr_accessor default_title: bool?
|
|
292
|
+
attr_accessor br_in_tables: bool?
|
|
293
|
+
attr_accessor highlight_style: HighlightStyle?
|
|
294
|
+
attr_accessor extract_metadata: bool?
|
|
295
|
+
attr_accessor whitespace_mode: WhitespaceMode?
|
|
296
|
+
attr_accessor strip_newlines: bool?
|
|
297
|
+
attr_accessor wrap: bool?
|
|
298
|
+
attr_accessor wrap_width: Integer?
|
|
299
|
+
attr_accessor convert_as_inline: bool?
|
|
300
|
+
attr_accessor sub_symbol: String?
|
|
301
|
+
attr_accessor sup_symbol: String?
|
|
302
|
+
attr_accessor newline_style: NewlineStyle?
|
|
303
|
+
attr_accessor code_block_style: CodeBlockStyle?
|
|
304
|
+
attr_accessor keep_inline_images_in: Array[String]?
|
|
305
|
+
attr_accessor preprocessing: PreprocessingOptionsUpdate?
|
|
306
|
+
attr_accessor encoding: String?
|
|
307
|
+
attr_accessor debug: bool?
|
|
308
|
+
attr_accessor strip_tags: Array[String]?
|
|
309
|
+
attr_accessor preserve_tags: Array[String]?
|
|
310
|
+
attr_accessor skip_images: bool?
|
|
311
|
+
attr_accessor link_style: LinkStyle?
|
|
312
|
+
attr_accessor output_format: OutputFormat?
|
|
313
|
+
attr_accessor include_document_structure: bool?
|
|
314
|
+
attr_accessor extract_images: bool?
|
|
315
|
+
attr_accessor max_image_size: Integer?
|
|
316
|
+
attr_accessor capture_svg: bool?
|
|
317
|
+
attr_accessor infer_dimensions: bool?
|
|
322
318
|
attr_accessor max_depth: Integer?
|
|
323
|
-
attr_accessor exclude_selectors: Array[String]
|
|
319
|
+
attr_accessor exclude_selectors: Array[String]?
|
|
320
|
+
attr_accessor visitor: VisitorHandle?
|
|
324
321
|
|
|
325
|
-
def initialize: (?heading_style: HeadingStyle, ?list_indent_type: ListIndentType, ?list_indent_width: Integer, ?bullets: String, ?strong_em_symbol: String, ?escape_asterisks: bool, ?escape_underscores: bool, ?escape_misc: bool, ?escape_ascii: bool, ?code_language: String, ?autolinks: bool, ?default_title: bool, ?br_in_tables: bool, ?highlight_style: HighlightStyle, ?extract_metadata: bool, ?whitespace_mode: WhitespaceMode, ?strip_newlines: bool, ?wrap: bool, ?wrap_width: Integer, ?convert_as_inline: bool, ?sub_symbol: String, ?sup_symbol: String, ?newline_style: NewlineStyle, ?code_block_style: CodeBlockStyle, ?keep_inline_images_in: Array[String], ?preprocessing: PreprocessingOptionsUpdate, ?encoding: String, ?debug: bool, ?strip_tags: Array[String], ?preserve_tags: Array[String], ?skip_images: bool, ?link_style: LinkStyle, ?output_format: OutputFormat, ?include_document_structure: bool, ?extract_images: bool, ?max_image_size: Integer, ?capture_svg: bool, ?infer_dimensions: bool, ?max_depth: Integer?, ?exclude_selectors: Array[String]) -> void
|
|
322
|
+
def initialize: (?heading_style: HeadingStyle, ?list_indent_type: ListIndentType, ?list_indent_width: Integer, ?bullets: String, ?strong_em_symbol: String, ?escape_asterisks: bool, ?escape_underscores: bool, ?escape_misc: bool, ?escape_ascii: bool, ?code_language: String, ?autolinks: bool, ?default_title: bool, ?br_in_tables: bool, ?highlight_style: HighlightStyle, ?extract_metadata: bool, ?whitespace_mode: WhitespaceMode, ?strip_newlines: bool, ?wrap: bool, ?wrap_width: Integer, ?convert_as_inline: bool, ?sub_symbol: String, ?sup_symbol: String, ?newline_style: NewlineStyle, ?code_block_style: CodeBlockStyle, ?keep_inline_images_in: Array[String], ?preprocessing: PreprocessingOptionsUpdate, ?encoding: String, ?debug: bool, ?strip_tags: Array[String], ?preserve_tags: Array[String], ?skip_images: bool, ?link_style: LinkStyle, ?output_format: OutputFormat, ?include_document_structure: bool, ?extract_images: bool, ?max_image_size: Integer, ?capture_svg: bool, ?infer_dimensions: bool, ?max_depth: Integer?, ?exclude_selectors: Array[String], ?visitor: VisitorHandle) -> void
|
|
326
323
|
end
|
|
327
324
|
|
|
328
325
|
class PreprocessingOptions
|
|
329
326
|
# HTML preprocessing options for document cleanup before conversion.
|
|
330
327
|
|
|
331
|
-
attr_accessor enabled: bool
|
|
332
|
-
attr_accessor preset: PreprocessingPreset
|
|
333
|
-
attr_accessor remove_navigation: bool
|
|
334
|
-
attr_accessor remove_forms: bool
|
|
328
|
+
attr_accessor enabled: bool?
|
|
329
|
+
attr_accessor preset: PreprocessingPreset?
|
|
330
|
+
attr_accessor remove_navigation: bool?
|
|
331
|
+
attr_accessor remove_forms: bool?
|
|
335
332
|
|
|
336
333
|
def initialize: (enabled: bool, preset: PreprocessingPreset, remove_navigation: bool, remove_forms: bool) -> void
|
|
337
334
|
def apply_update: (PreprocessingOptionsUpdate update) -> void
|
|
@@ -347,10 +344,10 @@ module HtmlToMarkdown
|
|
|
347
344
|
# Only specified fields (Some values) will override existing options; None values leave the
|
|
348
345
|
# corresponding fields unchanged when applied via [`PreprocessingOptions::apply_update`].
|
|
349
346
|
|
|
350
|
-
attr_accessor enabled: bool
|
|
351
|
-
attr_accessor preset: PreprocessingPreset
|
|
352
|
-
attr_accessor remove_navigation: bool
|
|
353
|
-
attr_accessor remove_forms: bool
|
|
347
|
+
attr_accessor enabled: bool?
|
|
348
|
+
attr_accessor preset: PreprocessingPreset?
|
|
349
|
+
attr_accessor remove_navigation: bool?
|
|
350
|
+
attr_accessor remove_forms: bool?
|
|
354
351
|
|
|
355
352
|
def initialize: (?enabled: bool, ?preset: PreprocessingPreset, ?remove_navigation: bool, ?remove_forms: bool) -> void
|
|
356
353
|
end
|
|
@@ -407,12 +404,12 @@ module HtmlToMarkdown
|
|
|
407
404
|
# assert!(result.warnings.is_empty());
|
|
408
405
|
# ```
|
|
409
406
|
|
|
410
|
-
attr_accessor content: String
|
|
411
|
-
attr_accessor document: DocumentStructure
|
|
412
|
-
attr_accessor metadata: HtmlMetadata
|
|
413
|
-
attr_accessor tables: Array[TableData]
|
|
414
|
-
attr_accessor images: Array[String]
|
|
415
|
-
attr_accessor warnings: Array[ProcessingWarning]
|
|
407
|
+
attr_accessor content: String?
|
|
408
|
+
attr_accessor document: DocumentStructure?
|
|
409
|
+
attr_accessor metadata: HtmlMetadata?
|
|
410
|
+
attr_accessor tables: Array[TableData]?
|
|
411
|
+
attr_accessor images: Array[String]?
|
|
412
|
+
attr_accessor warnings: Array[ProcessingWarning]?
|
|
416
413
|
|
|
417
414
|
def initialize: (?content: String, ?document: DocumentStructure, metadata: HtmlMetadata, tables: Array[TableData], images: Array[String], warnings: Array[ProcessingWarning]) -> void
|
|
418
415
|
end
|
|
@@ -420,9 +417,9 @@ module HtmlToMarkdown
|
|
|
420
417
|
class TableGrid
|
|
421
418
|
# A structured table grid with cell-level data including spans.
|
|
422
419
|
|
|
423
|
-
attr_accessor rows: Integer
|
|
424
|
-
attr_accessor cols: Integer
|
|
425
|
-
attr_accessor cells: Array[GridCell]
|
|
420
|
+
attr_accessor rows: Integer?
|
|
421
|
+
attr_accessor cols: Integer?
|
|
422
|
+
attr_accessor cells: Array[GridCell]?
|
|
426
423
|
|
|
427
424
|
def initialize: (rows: Integer, cols: Integer, cells: Array[GridCell]) -> void
|
|
428
425
|
end
|
|
@@ -458,6 +455,13 @@ module HtmlToMarkdown
|
|
|
458
455
|
def initialize: (message: String, kind: WarningKind) -> void
|
|
459
456
|
end
|
|
460
457
|
|
|
458
|
+
class VisitorHandle
|
|
459
|
+
# Type alias for a visitor handle (Rc-wrapped `RefCell` for interior mutability).
|
|
460
|
+
#
|
|
461
|
+
# This allows visitors to be passed around and shared while still being mutable.
|
|
462
|
+
|
|
463
|
+
end
|
|
464
|
+
|
|
461
465
|
class NodeContext
|
|
462
466
|
# Context information passed to all visitor methods.
|
|
463
467
|
#
|
|
@@ -479,112 +483,77 @@ module HtmlToMarkdown
|
|
|
479
483
|
# Text directionality of document content.
|
|
480
484
|
#
|
|
481
485
|
# Corresponds to the HTML `dir` attribute and `bdi` element directionality.
|
|
482
|
-
|
|
483
|
-
LeftToRight: Integer
|
|
484
|
-
RightToLeft: Integer
|
|
485
|
-
Auto: Integer
|
|
486
|
+
type instance = :left_to_right | :right_to_left | :auto
|
|
486
487
|
end
|
|
487
488
|
|
|
488
489
|
class LinkType
|
|
489
490
|
# Link classification based on href value and document context.
|
|
490
491
|
#
|
|
491
492
|
# Used to categorize links during extraction for filtering and analysis.
|
|
492
|
-
|
|
493
|
-
Anchor: Integer
|
|
494
|
-
Internal: Integer
|
|
495
|
-
External: Integer
|
|
496
|
-
Email: Integer
|
|
497
|
-
Phone: Integer
|
|
498
|
-
Other: Integer
|
|
493
|
+
type instance = :anchor | :internal | :external | :email | :phone | :other
|
|
499
494
|
end
|
|
500
495
|
|
|
501
496
|
class ImageType
|
|
502
497
|
# Image source classification for proper handling and processing.
|
|
503
498
|
#
|
|
504
499
|
# Determines whether an image is embedded (data URI), inline SVG, external, or relative.
|
|
505
|
-
|
|
506
|
-
DataUri: Integer
|
|
507
|
-
InlineSvg: Integer
|
|
508
|
-
External: Integer
|
|
509
|
-
Relative: Integer
|
|
500
|
+
type instance = :data_uri | :inline_svg | :external | :relative
|
|
510
501
|
end
|
|
511
502
|
|
|
512
503
|
class StructuredDataType
|
|
513
504
|
# Structured data format type.
|
|
514
505
|
#
|
|
515
506
|
# Identifies the schema/format used for structured data markup.
|
|
516
|
-
|
|
517
|
-
JsonLd: Integer
|
|
518
|
-
Microdata: Integer
|
|
519
|
-
RDFa: Integer
|
|
507
|
+
type instance = :json_ld | :microdata | :r_d_fa
|
|
520
508
|
end
|
|
521
509
|
|
|
522
510
|
class PreprocessingPreset
|
|
523
511
|
# HTML preprocessing aggressiveness level.
|
|
524
512
|
#
|
|
525
513
|
# Controls the extent of cleanup performed before conversion. Higher levels remove more elements.
|
|
526
|
-
|
|
527
|
-
Minimal: Integer
|
|
528
|
-
Standard: Integer
|
|
529
|
-
Aggressive: Integer
|
|
514
|
+
type instance = :minimal | :standard | :aggressive
|
|
530
515
|
end
|
|
531
516
|
|
|
532
517
|
class HeadingStyle
|
|
533
518
|
# Heading style options for Markdown output.
|
|
534
519
|
#
|
|
535
520
|
# Controls how headings (h1-h6) are rendered in the output Markdown.
|
|
536
|
-
|
|
537
|
-
Underlined: Integer
|
|
538
|
-
Atx: Integer
|
|
539
|
-
AtxClosed: Integer
|
|
521
|
+
type instance = :underlined | :atx | :atx_closed
|
|
540
522
|
end
|
|
541
523
|
|
|
542
524
|
class ListIndentType
|
|
543
525
|
# List indentation character type.
|
|
544
526
|
#
|
|
545
527
|
# Controls whether list items are indented with spaces or tabs.
|
|
546
|
-
|
|
547
|
-
Spaces: Integer
|
|
548
|
-
Tabs: Integer
|
|
528
|
+
type instance = :spaces | :tabs
|
|
549
529
|
end
|
|
550
530
|
|
|
551
531
|
class WhitespaceMode
|
|
552
532
|
# Whitespace handling strategy during conversion.
|
|
553
533
|
#
|
|
554
534
|
# Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
|
|
555
|
-
|
|
556
|
-
Normalized: Integer
|
|
557
|
-
Strict: Integer
|
|
535
|
+
type instance = :normalized | :strict
|
|
558
536
|
end
|
|
559
537
|
|
|
560
538
|
class NewlineStyle
|
|
561
539
|
# Line break syntax in Markdown output.
|
|
562
540
|
#
|
|
563
541
|
# Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
|
|
564
|
-
|
|
565
|
-
Spaces: Integer
|
|
566
|
-
Backslash: Integer
|
|
542
|
+
type instance = :spaces | :backslash
|
|
567
543
|
end
|
|
568
544
|
|
|
569
545
|
class CodeBlockStyle
|
|
570
546
|
# Code block fence style in Markdown output.
|
|
571
547
|
#
|
|
572
548
|
# Determines how code blocks (`<pre><code>`) are rendered in Markdown.
|
|
573
|
-
|
|
574
|
-
Indented: Integer
|
|
575
|
-
Backticks: Integer
|
|
576
|
-
Tildes: Integer
|
|
549
|
+
type instance = :indented | :backticks | :tildes
|
|
577
550
|
end
|
|
578
551
|
|
|
579
552
|
class HighlightStyle
|
|
580
553
|
# Highlight rendering style for `<mark>` elements.
|
|
581
554
|
#
|
|
582
555
|
# Controls how highlighted text is rendered in Markdown output.
|
|
583
|
-
|
|
584
|
-
DoubleEqual: Integer
|
|
585
|
-
Html: Integer
|
|
586
|
-
Bold: Integer
|
|
587
|
-
None: Integer
|
|
556
|
+
type instance = :double_equal | :html | :bold | :none
|
|
588
557
|
end
|
|
589
558
|
|
|
590
559
|
class LinkStyle
|
|
@@ -592,66 +561,31 @@ module HtmlToMarkdown
|
|
|
592
561
|
#
|
|
593
562
|
# Controls whether links and images use inline `[text](url)` syntax or
|
|
594
563
|
# reference-style `[text][1]` syntax with definitions collected at the end.
|
|
595
|
-
|
|
596
|
-
Inline: Integer
|
|
597
|
-
Reference: Integer
|
|
564
|
+
type instance = :inline | :reference
|
|
598
565
|
end
|
|
599
566
|
|
|
600
567
|
class OutputFormat
|
|
601
568
|
# Output format for conversion.
|
|
602
569
|
#
|
|
603
570
|
# Specifies the target markup language format for the conversion output.
|
|
604
|
-
|
|
605
|
-
Markdown: Integer
|
|
606
|
-
Djot: Integer
|
|
607
|
-
Plain: Integer
|
|
571
|
+
type instance = :markdown | :djot | :plain
|
|
608
572
|
end
|
|
609
573
|
|
|
610
574
|
class NodeContent
|
|
611
575
|
# The semantic content type of a document node.
|
|
612
576
|
#
|
|
613
577
|
# Uses internally tagged representation (`"node_type": "heading"`) for JSON serialization.
|
|
614
|
-
|
|
615
|
-
Heading: Integer
|
|
616
|
-
Paragraph: Integer
|
|
617
|
-
List: Integer
|
|
618
|
-
ListItem: Integer
|
|
619
|
-
Table: Integer
|
|
620
|
-
Image: Integer
|
|
621
|
-
Code: Integer
|
|
622
|
-
Quote: Integer
|
|
623
|
-
DefinitionList: Integer
|
|
624
|
-
DefinitionItem: Integer
|
|
625
|
-
RawBlock: Integer
|
|
626
|
-
MetadataBlock: Integer
|
|
627
|
-
Group: Integer
|
|
628
578
|
end
|
|
629
579
|
|
|
630
580
|
class AnnotationKind
|
|
631
581
|
# The type of an inline text annotation.
|
|
632
582
|
#
|
|
633
583
|
# Uses internally tagged representation (`"annotation_type": "bold"`) for JSON serialization.
|
|
634
|
-
|
|
635
|
-
Bold: Integer
|
|
636
|
-
Italic: Integer
|
|
637
|
-
Underline: Integer
|
|
638
|
-
Strikethrough: Integer
|
|
639
|
-
Code: Integer
|
|
640
|
-
Subscript: Integer
|
|
641
|
-
Superscript: Integer
|
|
642
|
-
Highlight: Integer
|
|
643
|
-
Link: Integer
|
|
644
584
|
end
|
|
645
585
|
|
|
646
586
|
class WarningKind
|
|
647
587
|
# Categories of processing warnings.
|
|
648
|
-
|
|
649
|
-
ImageExtractionFailed: Integer
|
|
650
|
-
EncodingFallback: Integer
|
|
651
|
-
TruncatedInput: Integer
|
|
652
|
-
MalformedHtml: Integer
|
|
653
|
-
SanitizationApplied: Integer
|
|
654
|
-
DepthLimitExceeded: Integer
|
|
588
|
+
type instance = :image_extraction_failed | :encoding_fallback | :truncated_input | :malformed_html | :sanitization_applied | :depth_limit_exceeded
|
|
655
589
|
end
|
|
656
590
|
|
|
657
591
|
class NodeType
|
|
@@ -659,95 +593,7 @@ module HtmlToMarkdown
|
|
|
659
593
|
#
|
|
660
594
|
# This enum categorizes all HTML elements that the converter recognizes,
|
|
661
595
|
# providing a coarse-grained classification for visitor dispatch.
|
|
662
|
-
|
|
663
|
-
Text: Integer
|
|
664
|
-
Element: Integer
|
|
665
|
-
Heading: Integer
|
|
666
|
-
Paragraph: Integer
|
|
667
|
-
Div: Integer
|
|
668
|
-
Blockquote: Integer
|
|
669
|
-
Pre: Integer
|
|
670
|
-
Hr: Integer
|
|
671
|
-
List: Integer
|
|
672
|
-
ListItem: Integer
|
|
673
|
-
DefinitionList: Integer
|
|
674
|
-
DefinitionTerm: Integer
|
|
675
|
-
DefinitionDescription: Integer
|
|
676
|
-
Table: Integer
|
|
677
|
-
TableRow: Integer
|
|
678
|
-
TableCell: Integer
|
|
679
|
-
TableHeader: Integer
|
|
680
|
-
TableBody: Integer
|
|
681
|
-
TableHead: Integer
|
|
682
|
-
TableFoot: Integer
|
|
683
|
-
Link: Integer
|
|
684
|
-
Image: Integer
|
|
685
|
-
Strong: Integer
|
|
686
|
-
Em: Integer
|
|
687
|
-
Code: Integer
|
|
688
|
-
Strikethrough: Integer
|
|
689
|
-
Underline: Integer
|
|
690
|
-
Subscript: Integer
|
|
691
|
-
Superscript: Integer
|
|
692
|
-
Mark: Integer
|
|
693
|
-
Small: Integer
|
|
694
|
-
Br: Integer
|
|
695
|
-
Span: Integer
|
|
696
|
-
Article: Integer
|
|
697
|
-
Section: Integer
|
|
698
|
-
Nav: Integer
|
|
699
|
-
Aside: Integer
|
|
700
|
-
Header: Integer
|
|
701
|
-
Footer: Integer
|
|
702
|
-
Main: Integer
|
|
703
|
-
Figure: Integer
|
|
704
|
-
Figcaption: Integer
|
|
705
|
-
Time: Integer
|
|
706
|
-
Details: Integer
|
|
707
|
-
Summary: Integer
|
|
708
|
-
Form: Integer
|
|
709
|
-
Input: Integer
|
|
710
|
-
Select: Integer
|
|
711
|
-
Option: Integer
|
|
712
|
-
Button: Integer
|
|
713
|
-
Textarea: Integer
|
|
714
|
-
Label: Integer
|
|
715
|
-
Fieldset: Integer
|
|
716
|
-
Legend: Integer
|
|
717
|
-
Audio: Integer
|
|
718
|
-
Video: Integer
|
|
719
|
-
Picture: Integer
|
|
720
|
-
Source: Integer
|
|
721
|
-
Iframe: Integer
|
|
722
|
-
Svg: Integer
|
|
723
|
-
Canvas: Integer
|
|
724
|
-
Ruby: Integer
|
|
725
|
-
Rt: Integer
|
|
726
|
-
Rp: Integer
|
|
727
|
-
Abbr: Integer
|
|
728
|
-
Kbd: Integer
|
|
729
|
-
Samp: Integer
|
|
730
|
-
Var: Integer
|
|
731
|
-
Cite: Integer
|
|
732
|
-
Q: Integer
|
|
733
|
-
Del: Integer
|
|
734
|
-
Ins: Integer
|
|
735
|
-
Data: Integer
|
|
736
|
-
Meter: Integer
|
|
737
|
-
Progress: Integer
|
|
738
|
-
Output: Integer
|
|
739
|
-
Template: Integer
|
|
740
|
-
Slot: Integer
|
|
741
|
-
Html: Integer
|
|
742
|
-
Head: Integer
|
|
743
|
-
Body: Integer
|
|
744
|
-
Title: Integer
|
|
745
|
-
Meta: Integer
|
|
746
|
-
LinkTag: Integer
|
|
747
|
-
Style: Integer
|
|
748
|
-
Script: Integer
|
|
749
|
-
Base: Integer
|
|
750
|
-
Custom: Integer
|
|
596
|
+
type instance = :text | :element | :heading | :paragraph | :div | :blockquote | :pre | :hr | :list | :list_item | :definition_list | :definition_term | :definition_description | :table | :table_row | :table_cell | :table_header | :table_body | :table_head | :table_foot | :link | :image | :strong | :em | :code | :strikethrough | :underline | :subscript | :superscript | :mark | :small | :br | :span | :article | :section | :nav | :aside | :header | :footer | :main | :figure | :figcaption | :time | :details | :summary | :form | :input | :select | :option | :button | :textarea | :label | :fieldset | :legend | :audio | :video | :picture | :source | :iframe | :svg | :canvas | :ruby | :rt | :rp | :abbr | :kbd | :samp | :var | :cite | :q | :del | :ins | :data | :meter | :progress | :output | :template | :slot | :html | :head | :body | :title | :meta | :link_tag | :style | :script | :base | :custom
|
|
751
597
|
end
|
|
752
598
|
|
|
753
599
|
class VisitResult
|
|
@@ -756,14 +602,8 @@ module HtmlToMarkdown
|
|
|
756
602
|
# Allows visitors to control the conversion flow by either proceeding
|
|
757
603
|
# with default behavior, providing custom output, skipping elements,
|
|
758
604
|
# preserving HTML, or signaling errors.
|
|
759
|
-
|
|
760
|
-
Continue: Integer
|
|
761
|
-
Custom: Integer
|
|
762
|
-
Skip: Integer
|
|
763
|
-
PreserveHtml: Integer
|
|
764
|
-
Error: Integer
|
|
765
605
|
end
|
|
766
606
|
|
|
767
|
-
def self.convert: (String html, ?ConversionOptions options
|
|
607
|
+
def self.convert: (String html, ?ConversionOptions options) -> ConversionResult
|
|
768
608
|
|
|
769
609
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html-to-markdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.4.0.pre.rc.
|
|
4
|
+
version: 3.4.0.pre.rc.30
|
|
5
5
|
platform: aarch64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- Kreuzberg Team
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: High-performance HTML to Markdown converter
|
|
14
14
|
email:
|
|
@@ -19,6 +19,7 @@ files:
|
|
|
19
19
|
- Steepfile
|
|
20
20
|
- lib/bin/html-to-markdown
|
|
21
21
|
- lib/html_to_markdown.rb
|
|
22
|
+
- lib/html_to_markdown/native.rb
|
|
22
23
|
- lib/html_to_markdown/version.rb
|
|
23
24
|
- lib/html_to_markdown_rb.so
|
|
24
25
|
- sig/html_to_markdown/cli.rbs
|