html-to-markdown 3.2.3 → 3.4.0.pre.rc.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Steepfile +6 -0
- data/ext/html_to_markdown_rb/Cargo.toml +2 -2
- data/ext/html_to_markdown_rb/native/Cargo.toml +28 -0
- data/ext/html_to_markdown_rb/src/html-to-markdown/version.rb +10 -0
- data/ext/html_to_markdown_rb/src/html-to-markdown.rb +13 -0
- data/ext/html_to_markdown_rb/src/lib.rs +2088 -268
- data/lib/bin/html-to-markdown +0 -0
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown.rb +5 -3
- data/sig/types.rbs +769 -0
- data/vendor/Cargo.toml +2 -2
- data/vendor/html-to-markdown-rs/Cargo.toml +1 -1
- data/vendor/html-to-markdown-rs/examples/basic.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/table.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_deser.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_escape.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_lists.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_tables.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +1 -1
- data/vendor/html-to-markdown-rs/src/convert_api.rs +15 -25
- data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/container.rs +3 -3
- data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +6 -7
- data/vendor/html-to-markdown-rs/src/converter/block/horizontal_rule.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/line_break.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +0 -108
- data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/layout.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +2 -4
- data/vendor/html-to-markdown-rs/src/converter/block/unknown.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/context.rs +10 -0
- data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
- data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/format/mod.rs +0 -3
- data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +2 -2
- data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +0 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/ruby.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/semantic/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/list/definition.rs +3 -3
- data/vendor/html-to-markdown-rs/src/converter/list/item.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/list/mod.rs +0 -1
- data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +2 -2
- data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +2 -2
- data/vendor/html-to-markdown-rs/src/converter/main.rs +57 -31
- data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +8 -8
- data/vendor/html-to-markdown-rs/src/converter/media/image.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/media/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +5 -5
- data/vendor/html-to-markdown-rs/src/converter/mod.rs +6 -17
- data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +64 -11
- data/vendor/html-to-markdown-rs/src/converter/preprocessing_helpers.rs +80 -22
- data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +0 -4
- data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +5 -9
- data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +3 -3
- data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +10 -10
- data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +13 -13
- data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +4 -4
- data/vendor/html-to-markdown-rs/src/converter/utility/siblings.rs +6 -14
- data/vendor/html-to-markdown-rs/src/inline_images.rs +6 -0
- data/vendor/html-to-markdown-rs/src/lib.rs +17 -18
- data/vendor/html-to-markdown-rs/src/options/conversion.rs +31 -0
- data/vendor/html-to-markdown-rs/src/prelude.rs +1 -12
- data/vendor/html-to-markdown-rs/src/text.rs +0 -44
- data/vendor/html-to-markdown-rs/src/types/warnings.rs +2 -0
- data/vendor/html-to-markdown-rs/src/visitor/types.rs +5 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +4 -1
- data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/exclude_selectors_test.rs +136 -0
- data/vendor/html-to-markdown-rs/tests/integration_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +2 -2
- data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +2 -2
- data/vendor/html-to-markdown-rs/tests/lists_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/reference_links_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/sectioning_elements_test.rs +137 -0
- data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/tables_test.rs +2 -2
- data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/test_issue_187.rs +5 -2
- data/vendor/html-to-markdown-rs/tests/test_issue_218.rs +4 -4
- data/vendor/html-to-markdown-rs/tests/test_issue_277.rs +77 -0
- data/vendor/html-to-markdown-rs/tests/test_max_depth.rs +82 -0
- data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +4 -4
- data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/visitor_code_integration_test.rs +6 -6
- data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +103 -35
- data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +1 -1
- metadata +21 -43
- data/.bundle/config +0 -2
- data/.gitignore +0 -3
- data/.rubocop.yml +0 -59
- data/Gemfile +0 -18
- data/Gemfile.lock +0 -173
- data/README.md +0 -331
- data/Rakefile +0 -26
- data/exe/html-to-markdown +0 -6
- data/ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb +0 -6
- data/ext/html_to_markdown_rb/src/html_to_markdown_rs.rb +0 -9
- data/html-to-markdown-rb.gemspec +0 -99
- data/lib/html_to_markdown_rs.rb +0 -3
- data/sig/html_to_markdown.rbs +0 -149
- data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +0 -94
- data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -86
- data/vendor/html-to-markdown-rs/src/safety.rs +0 -70
data/sig/types.rbs
ADDED
|
@@ -0,0 +1,769 @@
|
|
|
1
|
+
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
+
# alef:hash:fa557708df795d5b42dd32042603884cf4e9e96a2609974ffb238997cf8b32b3
|
|
3
|
+
# To regenerate: alef generate
|
|
4
|
+
# To verify freshness: alef verify --exit-code
|
|
5
|
+
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
6
|
+
|
|
7
|
+
module HtmlToMarkdown
|
|
8
|
+
|
|
9
|
+
VERSION: String
|
|
10
|
+
|
|
11
|
+
class DocumentMetadata
|
|
12
|
+
# Document-level metadata extracted from `<head>` and top-level elements.
|
|
13
|
+
#
|
|
14
|
+
# Contains all metadata typically used by search engines, social media platforms,
|
|
15
|
+
# and browsers for document indexing and presentation.
|
|
16
|
+
#
|
|
17
|
+
# # Examples
|
|
18
|
+
#
|
|
19
|
+
# ```
|
|
20
|
+
# # use html_to_markdown_rs::metadata::DocumentMetadata;
|
|
21
|
+
# let doc = DocumentMetadata {
|
|
22
|
+
# title: Some("My Article".to_string()),
|
|
23
|
+
# description: Some("A great article about Rust".to_string()),
|
|
24
|
+
# keywords: vec!["rust".to_string(), "programming".to_string()],
|
|
25
|
+
# ..Default::default()
|
|
26
|
+
# };
|
|
27
|
+
#
|
|
28
|
+
# assert_eq!(doc.title, Some("My Article".to_string()));
|
|
29
|
+
# ```
|
|
30
|
+
|
|
31
|
+
attr_accessor title: String
|
|
32
|
+
attr_accessor description: String
|
|
33
|
+
attr_accessor keywords: Array[String]
|
|
34
|
+
attr_accessor author: String
|
|
35
|
+
attr_accessor canonical_url: String
|
|
36
|
+
attr_accessor base_href: String
|
|
37
|
+
attr_accessor language: String
|
|
38
|
+
attr_accessor text_direction: TextDirection
|
|
39
|
+
attr_accessor open_graph: Hash[String, String]
|
|
40
|
+
attr_accessor twitter_card: Hash[String, String]
|
|
41
|
+
attr_accessor meta_tags: Hash[String, String]
|
|
42
|
+
|
|
43
|
+
def initialize: (?title: String, ?description: String, keywords: Array[String], ?author: String, ?canonical_url: String, ?base_href: String, ?language: String, ?text_direction: TextDirection, open_graph: Hash[String, String], twitter_card: Hash[String, String], meta_tags: Hash[String, String]) -> void
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
class HeaderMetadata
|
|
47
|
+
# Header element metadata with hierarchy tracking.
|
|
48
|
+
#
|
|
49
|
+
# Captures heading elements (h1-h6) with their text content, identifiers,
|
|
50
|
+
# and position in the document structure.
|
|
51
|
+
#
|
|
52
|
+
# # Examples
|
|
53
|
+
#
|
|
54
|
+
# ```
|
|
55
|
+
# # use html_to_markdown_rs::metadata::HeaderMetadata;
|
|
56
|
+
# let header = HeaderMetadata {
|
|
57
|
+
# level: 1,
|
|
58
|
+
# text: "Main Title".to_string(),
|
|
59
|
+
# id: Some("main-title".to_string()),
|
|
60
|
+
# depth: 0,
|
|
61
|
+
# html_offset: 145,
|
|
62
|
+
# };
|
|
63
|
+
#
|
|
64
|
+
# assert_eq!(header.level, 1);
|
|
65
|
+
# assert!(header.is_valid());
|
|
66
|
+
# ```
|
|
67
|
+
|
|
68
|
+
attr_reader level: Integer
|
|
69
|
+
attr_reader text: String
|
|
70
|
+
attr_reader id: String
|
|
71
|
+
attr_reader depth: Integer
|
|
72
|
+
attr_reader html_offset: Integer
|
|
73
|
+
|
|
74
|
+
def initialize: (level: Integer, text: String, ?id: String, depth: Integer, html_offset: Integer) -> void
|
|
75
|
+
def is_valid: () -> bool
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
class LinkMetadata
|
|
79
|
+
# Hyperlink metadata with categorization and attributes.
|
|
80
|
+
#
|
|
81
|
+
# Represents `<a>` elements with parsed href values, text content, and link type classification.
|
|
82
|
+
#
|
|
83
|
+
# # Examples
|
|
84
|
+
#
|
|
85
|
+
# ```
|
|
86
|
+
# # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
|
|
87
|
+
# let link = LinkMetadata {
|
|
88
|
+
# href: "https://example.com".to_string(),
|
|
89
|
+
# text: "Example".to_string(),
|
|
90
|
+
# title: Some("Visit Example".to_string()),
|
|
91
|
+
# link_type: LinkType::External,
|
|
92
|
+
# rel: vec!["nofollow".to_string()],
|
|
93
|
+
# attributes: Default::default(),
|
|
94
|
+
# };
|
|
95
|
+
#
|
|
96
|
+
# assert_eq!(link.link_type, LinkType::External);
|
|
97
|
+
# assert_eq!(link.text, "Example");
|
|
98
|
+
# ```
|
|
99
|
+
|
|
100
|
+
attr_reader href: String
|
|
101
|
+
attr_reader text: String
|
|
102
|
+
attr_reader title: String
|
|
103
|
+
attr_reader link_type: LinkType
|
|
104
|
+
attr_reader rel: Array[String]
|
|
105
|
+
attr_reader attributes: Hash[String, String]
|
|
106
|
+
|
|
107
|
+
def initialize: (href: String, text: String, ?title: String, link_type: LinkType, rel: Array[String], attributes: Hash[String, String]) -> void
|
|
108
|
+
def self.classify_link: (String href) -> LinkType
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
class ImageMetadata
|
|
112
|
+
# Image metadata with source and dimensions.
|
|
113
|
+
#
|
|
114
|
+
# Captures `<img>` elements and inline `<svg>` elements with metadata
|
|
115
|
+
# for image analysis and optimization.
|
|
116
|
+
#
|
|
117
|
+
# # Examples
|
|
118
|
+
#
|
|
119
|
+
# ```
|
|
120
|
+
# # use html_to_markdown_rs::metadata::{ImageMetadata, ImageType};
|
|
121
|
+
# let img = ImageMetadata {
|
|
122
|
+
# src: "https://example.com/image.jpg".to_string(),
|
|
123
|
+
# alt: Some("An example image".to_string()),
|
|
124
|
+
# title: Some("Example".to_string()),
|
|
125
|
+
# dimensions: Some((800, 600)),
|
|
126
|
+
# image_type: ImageType::External,
|
|
127
|
+
# attributes: Default::default(),
|
|
128
|
+
# };
|
|
129
|
+
#
|
|
130
|
+
# assert_eq!(img.image_type, ImageType::External);
|
|
131
|
+
# ```
|
|
132
|
+
|
|
133
|
+
attr_reader src: String
|
|
134
|
+
attr_reader alt: String
|
|
135
|
+
attr_reader title: String
|
|
136
|
+
attr_reader dimensions: Array[Integer]
|
|
137
|
+
attr_reader image_type: ImageType
|
|
138
|
+
attr_reader attributes: Hash[String, String]
|
|
139
|
+
|
|
140
|
+
def initialize: (src: String, ?alt: String, ?title: String, ?dimensions: Array[Integer], image_type: ImageType, attributes: Hash[String, String]) -> void
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
class StructuredData
|
|
144
|
+
# Structured data block (JSON-LD, Microdata, or RDFa).
|
|
145
|
+
#
|
|
146
|
+
# Represents machine-readable structured data found in the document.
|
|
147
|
+
# JSON-LD blocks are collected as raw JSON strings for flexibility.
|
|
148
|
+
#
|
|
149
|
+
# # Examples
|
|
150
|
+
#
|
|
151
|
+
# ```
|
|
152
|
+
# # use html_to_markdown_rs::metadata::{StructuredData, StructuredDataType};
|
|
153
|
+
# let schema = StructuredData {
|
|
154
|
+
# data_type: StructuredDataType::JsonLd,
|
|
155
|
+
# raw_json: r#"{"@context":"https://schema.org","@type":"Article"}"#.to_string(),
|
|
156
|
+
# schema_type: Some("Article".to_string()),
|
|
157
|
+
# };
|
|
158
|
+
#
|
|
159
|
+
# assert_eq!(schema.data_type, StructuredDataType::JsonLd);
|
|
160
|
+
# ```
|
|
161
|
+
|
|
162
|
+
attr_reader data_type: StructuredDataType
|
|
163
|
+
attr_reader raw_json: String
|
|
164
|
+
attr_reader schema_type: String
|
|
165
|
+
|
|
166
|
+
def initialize: (data_type: StructuredDataType, raw_json: String, ?schema_type: String) -> void
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
class HtmlMetadata
|
|
170
|
+
# Comprehensive metadata extraction result from HTML document.
|
|
171
|
+
#
|
|
172
|
+
# Contains all extracted metadata types in a single structure,
|
|
173
|
+
# suitable for serialization and transmission across language boundaries.
|
|
174
|
+
#
|
|
175
|
+
# # Examples
|
|
176
|
+
#
|
|
177
|
+
# ```
|
|
178
|
+
# # use html_to_markdown_rs::metadata::HtmlMetadata;
|
|
179
|
+
# let metadata = HtmlMetadata {
|
|
180
|
+
# document: Default::default(),
|
|
181
|
+
# headers: Vec::new(),
|
|
182
|
+
# links: Vec::new(),
|
|
183
|
+
# images: Vec::new(),
|
|
184
|
+
# structured_data: Vec::new(),
|
|
185
|
+
# };
|
|
186
|
+
#
|
|
187
|
+
# assert!(metadata.headers.is_empty());
|
|
188
|
+
# ```
|
|
189
|
+
|
|
190
|
+
attr_accessor document: DocumentMetadata
|
|
191
|
+
attr_accessor headers: Array[HeaderMetadata]
|
|
192
|
+
attr_accessor links: Array[LinkMetadata]
|
|
193
|
+
attr_accessor images: Array[ImageMetadata]
|
|
194
|
+
attr_accessor structured_data: Array[StructuredData]
|
|
195
|
+
|
|
196
|
+
def initialize: (document: DocumentMetadata, headers: Array[HeaderMetadata], links: Array[LinkMetadata], images: Array[ImageMetadata], structured_data: Array[StructuredData]) -> void
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
class ConversionOptions
|
|
200
|
+
# Main conversion options for HTML to Markdown conversion.
|
|
201
|
+
#
|
|
202
|
+
# Use [`ConversionOptions::builder()`] to construct, or [`Default::default()`] for defaults.
|
|
203
|
+
#
|
|
204
|
+
# # Example
|
|
205
|
+
#
|
|
206
|
+
# ```text
|
|
207
|
+
# use html_to_markdown_rs::ConversionOptions;
|
|
208
|
+
#
|
|
209
|
+
# let options = ConversionOptions::builder()
|
|
210
|
+
# .heading_style(HeadingStyle::Atx)
|
|
211
|
+
# .wrap(true)
|
|
212
|
+
# .wrap_width(100)
|
|
213
|
+
# .build();
|
|
214
|
+
# ```
|
|
215
|
+
|
|
216
|
+
attr_accessor heading_style: HeadingStyle
|
|
217
|
+
attr_accessor list_indent_type: ListIndentType
|
|
218
|
+
attr_accessor list_indent_width: Integer
|
|
219
|
+
attr_accessor bullets: String
|
|
220
|
+
attr_accessor strong_em_symbol: String
|
|
221
|
+
attr_accessor escape_asterisks: bool
|
|
222
|
+
attr_accessor escape_underscores: bool
|
|
223
|
+
attr_accessor escape_misc: bool
|
|
224
|
+
attr_accessor escape_ascii: bool
|
|
225
|
+
attr_accessor code_language: String
|
|
226
|
+
attr_accessor autolinks: bool
|
|
227
|
+
attr_accessor default_title: bool
|
|
228
|
+
attr_accessor br_in_tables: bool
|
|
229
|
+
attr_accessor highlight_style: HighlightStyle
|
|
230
|
+
attr_accessor extract_metadata: bool
|
|
231
|
+
attr_accessor whitespace_mode: WhitespaceMode
|
|
232
|
+
attr_accessor strip_newlines: bool
|
|
233
|
+
attr_accessor wrap: bool
|
|
234
|
+
attr_accessor wrap_width: Integer
|
|
235
|
+
attr_accessor convert_as_inline: bool
|
|
236
|
+
attr_accessor sub_symbol: String
|
|
237
|
+
attr_accessor sup_symbol: String
|
|
238
|
+
attr_accessor newline_style: NewlineStyle
|
|
239
|
+
attr_accessor code_block_style: CodeBlockStyle
|
|
240
|
+
attr_accessor keep_inline_images_in: Array[String]
|
|
241
|
+
attr_accessor preprocessing: PreprocessingOptions
|
|
242
|
+
attr_accessor encoding: String
|
|
243
|
+
attr_accessor debug: bool
|
|
244
|
+
attr_accessor strip_tags: Array[String]
|
|
245
|
+
attr_accessor preserve_tags: Array[String]
|
|
246
|
+
attr_accessor skip_images: bool
|
|
247
|
+
attr_accessor link_style: LinkStyle
|
|
248
|
+
attr_accessor output_format: OutputFormat
|
|
249
|
+
attr_accessor include_document_structure: bool
|
|
250
|
+
attr_accessor extract_images: bool
|
|
251
|
+
attr_accessor max_image_size: Integer
|
|
252
|
+
attr_accessor capture_svg: bool
|
|
253
|
+
attr_accessor infer_dimensions: bool
|
|
254
|
+
attr_accessor max_depth: Integer
|
|
255
|
+
attr_accessor exclude_selectors: Array[String]
|
|
256
|
+
|
|
257
|
+
def initialize: (heading_style: HeadingStyle, list_indent_type: ListIndentType, list_indent_width: Integer, bullets: String, strong_em_symbol: String, escape_asterisks: bool, escape_underscores: bool, escape_misc: bool, escape_ascii: bool, code_language: String, autolinks: bool, default_title: bool, br_in_tables: bool, highlight_style: HighlightStyle, extract_metadata: bool, whitespace_mode: WhitespaceMode, strip_newlines: bool, wrap: bool, wrap_width: Integer, convert_as_inline: bool, sub_symbol: String, sup_symbol: String, newline_style: NewlineStyle, code_block_style: CodeBlockStyle, keep_inline_images_in: Array[String], preprocessing: PreprocessingOptions, encoding: String, debug: bool, strip_tags: Array[String], preserve_tags: Array[String], skip_images: bool, link_style: LinkStyle, output_format: OutputFormat, include_document_structure: bool, extract_images: bool, max_image_size: Integer, capture_svg: bool, infer_dimensions: bool, ?max_depth: Integer, exclude_selectors: Array[String]) -> void
|
|
258
|
+
def apply_update: (ConversionOptionsUpdate update) -> void
|
|
259
|
+
def self.default: () -> ConversionOptions
|
|
260
|
+
def self.builder: () -> ConversionOptionsBuilder
|
|
261
|
+
def self.from_update: (ConversionOptionsUpdate update) -> ConversionOptions
|
|
262
|
+
def self.from: (ConversionOptionsUpdate update) -> ConversionOptions
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
class ConversionOptionsBuilder
|
|
266
|
+
# Builder for [`ConversionOptions`].
|
|
267
|
+
#
|
|
268
|
+
# All fields start with default values. Call `.build()` to produce the final options.
|
|
269
|
+
|
|
270
|
+
def strip_tags: (Array[String] tags) -> ConversionOptionsBuilder
|
|
271
|
+
def preserve_tags: (Array[String] tags) -> ConversionOptionsBuilder
|
|
272
|
+
def keep_inline_images_in: (Array[String] tags) -> ConversionOptionsBuilder
|
|
273
|
+
def exclude_selectors: (Array[String] selectors) -> ConversionOptionsBuilder
|
|
274
|
+
def preprocessing: (PreprocessingOptions preprocessing) -> ConversionOptionsBuilder
|
|
275
|
+
def build: () -> ConversionOptions
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
class ConversionOptionsUpdate
|
|
279
|
+
# Partial update for `ConversionOptions`.
|
|
280
|
+
#
|
|
281
|
+
# Uses `Option<T>` fields for selective updates. Bindings use this to construct
|
|
282
|
+
# options from language-native types. Prefer [`ConversionOptionsBuilder`] for Rust code.
|
|
283
|
+
|
|
284
|
+
attr_accessor heading_style: HeadingStyle
|
|
285
|
+
attr_accessor list_indent_type: ListIndentType
|
|
286
|
+
attr_accessor list_indent_width: Integer
|
|
287
|
+
attr_accessor bullets: String
|
|
288
|
+
attr_accessor strong_em_symbol: String
|
|
289
|
+
attr_accessor escape_asterisks: bool
|
|
290
|
+
attr_accessor escape_underscores: bool
|
|
291
|
+
attr_accessor escape_misc: bool
|
|
292
|
+
attr_accessor escape_ascii: bool
|
|
293
|
+
attr_accessor code_language: String
|
|
294
|
+
attr_accessor autolinks: bool
|
|
295
|
+
attr_accessor default_title: bool
|
|
296
|
+
attr_accessor br_in_tables: bool
|
|
297
|
+
attr_accessor highlight_style: HighlightStyle
|
|
298
|
+
attr_accessor extract_metadata: bool
|
|
299
|
+
attr_accessor whitespace_mode: WhitespaceMode
|
|
300
|
+
attr_accessor strip_newlines: bool
|
|
301
|
+
attr_accessor wrap: bool
|
|
302
|
+
attr_accessor wrap_width: Integer
|
|
303
|
+
attr_accessor convert_as_inline: bool
|
|
304
|
+
attr_accessor sub_symbol: String
|
|
305
|
+
attr_accessor sup_symbol: String
|
|
306
|
+
attr_accessor newline_style: NewlineStyle
|
|
307
|
+
attr_accessor code_block_style: CodeBlockStyle
|
|
308
|
+
attr_accessor keep_inline_images_in: Array[String]
|
|
309
|
+
attr_accessor preprocessing: PreprocessingOptionsUpdate
|
|
310
|
+
attr_accessor encoding: String
|
|
311
|
+
attr_accessor debug: bool
|
|
312
|
+
attr_accessor strip_tags: Array[String]
|
|
313
|
+
attr_accessor preserve_tags: Array[String]
|
|
314
|
+
attr_accessor skip_images: bool
|
|
315
|
+
attr_accessor link_style: LinkStyle
|
|
316
|
+
attr_accessor output_format: OutputFormat
|
|
317
|
+
attr_accessor include_document_structure: bool
|
|
318
|
+
attr_accessor extract_images: bool
|
|
319
|
+
attr_accessor max_image_size: Integer
|
|
320
|
+
attr_accessor capture_svg: bool
|
|
321
|
+
attr_accessor infer_dimensions: bool
|
|
322
|
+
attr_accessor max_depth: Integer?
|
|
323
|
+
attr_accessor exclude_selectors: Array[String]
|
|
324
|
+
|
|
325
|
+
def initialize: (?heading_style: HeadingStyle, ?list_indent_type: ListIndentType, ?list_indent_width: Integer, ?bullets: String, ?strong_em_symbol: String, ?escape_asterisks: bool, ?escape_underscores: bool, ?escape_misc: bool, ?escape_ascii: bool, ?code_language: String, ?autolinks: bool, ?default_title: bool, ?br_in_tables: bool, ?highlight_style: HighlightStyle, ?extract_metadata: bool, ?whitespace_mode: WhitespaceMode, ?strip_newlines: bool, ?wrap: bool, ?wrap_width: Integer, ?convert_as_inline: bool, ?sub_symbol: String, ?sup_symbol: String, ?newline_style: NewlineStyle, ?code_block_style: CodeBlockStyle, ?keep_inline_images_in: Array[String], ?preprocessing: PreprocessingOptionsUpdate, ?encoding: String, ?debug: bool, ?strip_tags: Array[String], ?preserve_tags: Array[String], ?skip_images: bool, ?link_style: LinkStyle, ?output_format: OutputFormat, ?include_document_structure: bool, ?extract_images: bool, ?max_image_size: Integer, ?capture_svg: bool, ?infer_dimensions: bool, ?max_depth: Integer?, ?exclude_selectors: Array[String]) -> void
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
class PreprocessingOptions
|
|
329
|
+
# HTML preprocessing options for document cleanup before conversion.
|
|
330
|
+
|
|
331
|
+
attr_accessor enabled: bool
|
|
332
|
+
attr_accessor preset: PreprocessingPreset
|
|
333
|
+
attr_accessor remove_navigation: bool
|
|
334
|
+
attr_accessor remove_forms: bool
|
|
335
|
+
|
|
336
|
+
def initialize: (enabled: bool, preset: PreprocessingPreset, remove_navigation: bool, remove_forms: bool) -> void
|
|
337
|
+
def apply_update: (PreprocessingOptionsUpdate update) -> void
|
|
338
|
+
def self.default: () -> PreprocessingOptions
|
|
339
|
+
def self.from_update: (PreprocessingOptionsUpdate update) -> PreprocessingOptions
|
|
340
|
+
def self.from: (PreprocessingOptionsUpdate update) -> PreprocessingOptions
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
class PreprocessingOptionsUpdate
|
|
344
|
+
# Partial update for `PreprocessingOptions`.
|
|
345
|
+
#
|
|
346
|
+
# This struct uses `Option<T>` to represent optional fields that can be selectively updated.
|
|
347
|
+
# Only specified fields (Some values) will override existing options; None values leave the
|
|
348
|
+
# corresponding fields unchanged when applied via [`PreprocessingOptions::apply_update`].
|
|
349
|
+
|
|
350
|
+
attr_accessor enabled: bool
|
|
351
|
+
attr_accessor preset: PreprocessingPreset
|
|
352
|
+
attr_accessor remove_navigation: bool
|
|
353
|
+
attr_accessor remove_forms: bool
|
|
354
|
+
|
|
355
|
+
def initialize: (?enabled: bool, ?preset: PreprocessingPreset, ?remove_navigation: bool, ?remove_forms: bool) -> void
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
class DocumentStructure
|
|
359
|
+
# A structured document tree representing the semantic content of an HTML document.
|
|
360
|
+
#
|
|
361
|
+
# Uses a flat node array with index-based parent/child references for efficient traversal.
|
|
362
|
+
|
|
363
|
+
attr_reader nodes: Array[DocumentNode]
|
|
364
|
+
attr_reader source_format: String
|
|
365
|
+
|
|
366
|
+
def initialize: (nodes: Array[DocumentNode], ?source_format: String) -> void
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
class DocumentNode
|
|
370
|
+
# A single node in the document tree.
|
|
371
|
+
|
|
372
|
+
attr_reader id: String
|
|
373
|
+
attr_reader content: NodeContent
|
|
374
|
+
attr_reader parent: Integer
|
|
375
|
+
attr_reader children: Array[Integer]
|
|
376
|
+
attr_reader annotations: Array[TextAnnotation]
|
|
377
|
+
attr_reader attributes: Hash[String, String]
|
|
378
|
+
|
|
379
|
+
def initialize: (id: String, content: NodeContent, ?parent: Integer, children: Array[Integer], annotations: Array[TextAnnotation], ?attributes: Hash[String, String]) -> void
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
class TextAnnotation
|
|
383
|
+
# An inline text annotation with byte-range offsets.
|
|
384
|
+
#
|
|
385
|
+
# Annotations describe formatting (bold, italic, etc.) and links within a node's text content.
|
|
386
|
+
|
|
387
|
+
attr_reader start: Integer
|
|
388
|
+
attr_reader end: Integer
|
|
389
|
+
attr_reader kind: AnnotationKind
|
|
390
|
+
|
|
391
|
+
def initialize: (start: Integer, end: Integer, kind: AnnotationKind) -> void
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
class ConversionResult
|
|
395
|
+
# The primary result of HTML conversion and extraction.
|
|
396
|
+
#
|
|
397
|
+
# Contains the converted text output, optional structured document tree,
|
|
398
|
+
# metadata, extracted tables, images, and processing warnings.
|
|
399
|
+
#
|
|
400
|
+
# # Example
|
|
401
|
+
#
|
|
402
|
+
# ```text
|
|
403
|
+
# use html_to_markdown_rs::{convert, ConversionOptions};
|
|
404
|
+
#
|
|
405
|
+
# let result = convert("<h1>Hello</h1><p>World</p>", None)?;
|
|
406
|
+
# assert!(result.content.is_some());
|
|
407
|
+
# assert!(result.warnings.is_empty());
|
|
408
|
+
# ```
|
|
409
|
+
|
|
410
|
+
attr_accessor content: String
|
|
411
|
+
attr_accessor document: DocumentStructure
|
|
412
|
+
attr_accessor metadata: HtmlMetadata
|
|
413
|
+
attr_accessor tables: Array[TableData]
|
|
414
|
+
attr_accessor images: Array[String]
|
|
415
|
+
attr_accessor warnings: Array[ProcessingWarning]
|
|
416
|
+
|
|
417
|
+
def initialize: (?content: String, ?document: DocumentStructure, metadata: HtmlMetadata, tables: Array[TableData], images: Array[String], warnings: Array[ProcessingWarning]) -> void
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
class TableGrid
|
|
421
|
+
# A structured table grid with cell-level data including spans.
|
|
422
|
+
|
|
423
|
+
attr_accessor rows: Integer
|
|
424
|
+
attr_accessor cols: Integer
|
|
425
|
+
attr_accessor cells: Array[GridCell]
|
|
426
|
+
|
|
427
|
+
def initialize: (rows: Integer, cols: Integer, cells: Array[GridCell]) -> void
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
class GridCell
|
|
431
|
+
# A single cell in a table grid.
|
|
432
|
+
|
|
433
|
+
attr_reader content: String
|
|
434
|
+
attr_reader row: Integer
|
|
435
|
+
attr_reader col: Integer
|
|
436
|
+
attr_reader row_span: Integer
|
|
437
|
+
attr_reader col_span: Integer
|
|
438
|
+
attr_reader is_header: bool
|
|
439
|
+
|
|
440
|
+
def initialize: (content: String, row: Integer, col: Integer, row_span: Integer, col_span: Integer, is_header: bool) -> void
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
class TableData
|
|
444
|
+
# A top-level extracted table with both structured data and markdown representation.
|
|
445
|
+
|
|
446
|
+
attr_reader grid: TableGrid
|
|
447
|
+
attr_reader markdown: String
|
|
448
|
+
|
|
449
|
+
def initialize: (grid: TableGrid, markdown: String) -> void
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
class ProcessingWarning
|
|
453
|
+
# A non-fatal warning generated during HTML processing.
|
|
454
|
+
|
|
455
|
+
attr_reader message: String
|
|
456
|
+
attr_reader kind: WarningKind
|
|
457
|
+
|
|
458
|
+
def initialize: (message: String, kind: WarningKind) -> void
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
class NodeContext
|
|
462
|
+
# Context information passed to all visitor methods.
|
|
463
|
+
#
|
|
464
|
+
# Provides comprehensive metadata about the current node being visited,
|
|
465
|
+
# including its type, attributes, position in the DOM tree, and parent context.
|
|
466
|
+
|
|
467
|
+
attr_reader node_type: NodeType
|
|
468
|
+
attr_reader tag_name: String
|
|
469
|
+
attr_reader attributes: Hash[String, String]
|
|
470
|
+
attr_reader depth: Integer
|
|
471
|
+
attr_reader index_in_parent: Integer
|
|
472
|
+
attr_reader parent_tag: String
|
|
473
|
+
attr_reader is_inline: bool
|
|
474
|
+
|
|
475
|
+
def initialize: (node_type: NodeType, tag_name: String, attributes: Hash[String, String], depth: Integer, index_in_parent: Integer, ?parent_tag: String, is_inline: bool) -> void
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
class TextDirection
|
|
479
|
+
# Text directionality of document content.
|
|
480
|
+
#
|
|
481
|
+
# Corresponds to the HTML `dir` attribute and `bdi` element directionality.
|
|
482
|
+
|
|
483
|
+
LeftToRight: Integer
|
|
484
|
+
RightToLeft: Integer
|
|
485
|
+
Auto: Integer
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
class LinkType
|
|
489
|
+
# Link classification based on href value and document context.
|
|
490
|
+
#
|
|
491
|
+
# Used to categorize links during extraction for filtering and analysis.
|
|
492
|
+
|
|
493
|
+
Anchor: Integer
|
|
494
|
+
Internal: Integer
|
|
495
|
+
External: Integer
|
|
496
|
+
Email: Integer
|
|
497
|
+
Phone: Integer
|
|
498
|
+
Other: Integer
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
class ImageType
|
|
502
|
+
# Image source classification for proper handling and processing.
|
|
503
|
+
#
|
|
504
|
+
# Determines whether an image is embedded (data URI), inline SVG, external, or relative.
|
|
505
|
+
|
|
506
|
+
DataUri: Integer
|
|
507
|
+
InlineSvg: Integer
|
|
508
|
+
External: Integer
|
|
509
|
+
Relative: Integer
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
class StructuredDataType
|
|
513
|
+
# Structured data format type.
|
|
514
|
+
#
|
|
515
|
+
# Identifies the schema/format used for structured data markup.
|
|
516
|
+
|
|
517
|
+
JsonLd: Integer
|
|
518
|
+
Microdata: Integer
|
|
519
|
+
RDFa: Integer
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
class PreprocessingPreset
|
|
523
|
+
# HTML preprocessing aggressiveness level.
|
|
524
|
+
#
|
|
525
|
+
# Controls the extent of cleanup performed before conversion. Higher levels remove more elements.
|
|
526
|
+
|
|
527
|
+
Minimal: Integer
|
|
528
|
+
Standard: Integer
|
|
529
|
+
Aggressive: Integer
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
class HeadingStyle
|
|
533
|
+
# Heading style options for Markdown output.
|
|
534
|
+
#
|
|
535
|
+
# Controls how headings (h1-h6) are rendered in the output Markdown.
|
|
536
|
+
|
|
537
|
+
Underlined: Integer
|
|
538
|
+
Atx: Integer
|
|
539
|
+
AtxClosed: Integer
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
class ListIndentType
|
|
543
|
+
# List indentation character type.
|
|
544
|
+
#
|
|
545
|
+
# Controls whether list items are indented with spaces or tabs.
|
|
546
|
+
|
|
547
|
+
Spaces: Integer
|
|
548
|
+
Tabs: Integer
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
class WhitespaceMode
|
|
552
|
+
# Whitespace handling strategy during conversion.
|
|
553
|
+
#
|
|
554
|
+
# Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
|
|
555
|
+
|
|
556
|
+
Normalized: Integer
|
|
557
|
+
Strict: Integer
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
class NewlineStyle
|
|
561
|
+
# Line break syntax in Markdown output.
|
|
562
|
+
#
|
|
563
|
+
# Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
|
|
564
|
+
|
|
565
|
+
Spaces: Integer
|
|
566
|
+
Backslash: Integer
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
class CodeBlockStyle
|
|
570
|
+
# Code block fence style in Markdown output.
|
|
571
|
+
#
|
|
572
|
+
# Determines how code blocks (`<pre><code>`) are rendered in Markdown.
|
|
573
|
+
|
|
574
|
+
Indented: Integer
|
|
575
|
+
Backticks: Integer
|
|
576
|
+
Tildes: Integer
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
class HighlightStyle
|
|
580
|
+
# Highlight rendering style for `<mark>` elements.
|
|
581
|
+
#
|
|
582
|
+
# Controls how highlighted text is rendered in Markdown output.
|
|
583
|
+
|
|
584
|
+
DoubleEqual: Integer
|
|
585
|
+
Html: Integer
|
|
586
|
+
Bold: Integer
|
|
587
|
+
None: Integer
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
class LinkStyle
|
|
591
|
+
# Link rendering style in Markdown output.
|
|
592
|
+
#
|
|
593
|
+
# Controls whether links and images use inline `[text](url)` syntax or
|
|
594
|
+
# reference-style `[text][1]` syntax with definitions collected at the end.
|
|
595
|
+
|
|
596
|
+
Inline: Integer
|
|
597
|
+
Reference: Integer
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
class OutputFormat
|
|
601
|
+
# Output format for conversion.
|
|
602
|
+
#
|
|
603
|
+
# Specifies the target markup language format for the conversion output.
|
|
604
|
+
|
|
605
|
+
Markdown: Integer
|
|
606
|
+
Djot: Integer
|
|
607
|
+
Plain: Integer
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
class NodeContent
|
|
611
|
+
# The semantic content type of a document node.
|
|
612
|
+
#
|
|
613
|
+
# Uses internally tagged representation (`"node_type": "heading"`) for JSON serialization.
|
|
614
|
+
|
|
615
|
+
Heading: Integer
|
|
616
|
+
Paragraph: Integer
|
|
617
|
+
List: Integer
|
|
618
|
+
ListItem: Integer
|
|
619
|
+
Table: Integer
|
|
620
|
+
Image: Integer
|
|
621
|
+
Code: Integer
|
|
622
|
+
Quote: Integer
|
|
623
|
+
DefinitionList: Integer
|
|
624
|
+
DefinitionItem: Integer
|
|
625
|
+
RawBlock: Integer
|
|
626
|
+
MetadataBlock: Integer
|
|
627
|
+
Group: Integer
|
|
628
|
+
end
|
|
629
|
+
|
|
630
|
+
class AnnotationKind
|
|
631
|
+
# The type of an inline text annotation.
|
|
632
|
+
#
|
|
633
|
+
# Uses internally tagged representation (`"annotation_type": "bold"`) for JSON serialization.
|
|
634
|
+
|
|
635
|
+
Bold: Integer
|
|
636
|
+
Italic: Integer
|
|
637
|
+
Underline: Integer
|
|
638
|
+
Strikethrough: Integer
|
|
639
|
+
Code: Integer
|
|
640
|
+
Subscript: Integer
|
|
641
|
+
Superscript: Integer
|
|
642
|
+
Highlight: Integer
|
|
643
|
+
Link: Integer
|
|
644
|
+
end
|
|
645
|
+
|
|
646
|
+
class WarningKind
|
|
647
|
+
# Categories of processing warnings.
|
|
648
|
+
|
|
649
|
+
ImageExtractionFailed: Integer
|
|
650
|
+
EncodingFallback: Integer
|
|
651
|
+
TruncatedInput: Integer
|
|
652
|
+
MalformedHtml: Integer
|
|
653
|
+
SanitizationApplied: Integer
|
|
654
|
+
DepthLimitExceeded: Integer
|
|
655
|
+
end
|
|
656
|
+
|
|
657
|
+
class NodeType
|
|
658
|
+
# Node type enumeration covering all HTML element types.
|
|
659
|
+
#
|
|
660
|
+
# This enum categorizes all HTML elements that the converter recognizes,
|
|
661
|
+
# providing a coarse-grained classification for visitor dispatch.
|
|
662
|
+
|
|
663
|
+
Text: Integer
|
|
664
|
+
Element: Integer
|
|
665
|
+
Heading: Integer
|
|
666
|
+
Paragraph: Integer
|
|
667
|
+
Div: Integer
|
|
668
|
+
Blockquote: Integer
|
|
669
|
+
Pre: Integer
|
|
670
|
+
Hr: Integer
|
|
671
|
+
List: Integer
|
|
672
|
+
ListItem: Integer
|
|
673
|
+
DefinitionList: Integer
|
|
674
|
+
DefinitionTerm: Integer
|
|
675
|
+
DefinitionDescription: Integer
|
|
676
|
+
Table: Integer
|
|
677
|
+
TableRow: Integer
|
|
678
|
+
TableCell: Integer
|
|
679
|
+
TableHeader: Integer
|
|
680
|
+
TableBody: Integer
|
|
681
|
+
TableHead: Integer
|
|
682
|
+
TableFoot: Integer
|
|
683
|
+
Link: Integer
|
|
684
|
+
Image: Integer
|
|
685
|
+
Strong: Integer
|
|
686
|
+
Em: Integer
|
|
687
|
+
Code: Integer
|
|
688
|
+
Strikethrough: Integer
|
|
689
|
+
Underline: Integer
|
|
690
|
+
Subscript: Integer
|
|
691
|
+
Superscript: Integer
|
|
692
|
+
Mark: Integer
|
|
693
|
+
Small: Integer
|
|
694
|
+
Br: Integer
|
|
695
|
+
Span: Integer
|
|
696
|
+
Article: Integer
|
|
697
|
+
Section: Integer
|
|
698
|
+
Nav: Integer
|
|
699
|
+
Aside: Integer
|
|
700
|
+
Header: Integer
|
|
701
|
+
Footer: Integer
|
|
702
|
+
Main: Integer
|
|
703
|
+
Figure: Integer
|
|
704
|
+
Figcaption: Integer
|
|
705
|
+
Time: Integer
|
|
706
|
+
Details: Integer
|
|
707
|
+
Summary: Integer
|
|
708
|
+
Form: Integer
|
|
709
|
+
Input: Integer
|
|
710
|
+
Select: Integer
|
|
711
|
+
Option: Integer
|
|
712
|
+
Button: Integer
|
|
713
|
+
Textarea: Integer
|
|
714
|
+
Label: Integer
|
|
715
|
+
Fieldset: Integer
|
|
716
|
+
Legend: Integer
|
|
717
|
+
Audio: Integer
|
|
718
|
+
Video: Integer
|
|
719
|
+
Picture: Integer
|
|
720
|
+
Source: Integer
|
|
721
|
+
Iframe: Integer
|
|
722
|
+
Svg: Integer
|
|
723
|
+
Canvas: Integer
|
|
724
|
+
Ruby: Integer
|
|
725
|
+
Rt: Integer
|
|
726
|
+
Rp: Integer
|
|
727
|
+
Abbr: Integer
|
|
728
|
+
Kbd: Integer
|
|
729
|
+
Samp: Integer
|
|
730
|
+
Var: Integer
|
|
731
|
+
Cite: Integer
|
|
732
|
+
Q: Integer
|
|
733
|
+
Del: Integer
|
|
734
|
+
Ins: Integer
|
|
735
|
+
Data: Integer
|
|
736
|
+
Meter: Integer
|
|
737
|
+
Progress: Integer
|
|
738
|
+
Output: Integer
|
|
739
|
+
Template: Integer
|
|
740
|
+
Slot: Integer
|
|
741
|
+
Html: Integer
|
|
742
|
+
Head: Integer
|
|
743
|
+
Body: Integer
|
|
744
|
+
Title: Integer
|
|
745
|
+
Meta: Integer
|
|
746
|
+
LinkTag: Integer
|
|
747
|
+
Style: Integer
|
|
748
|
+
Script: Integer
|
|
749
|
+
Base: Integer
|
|
750
|
+
Custom: Integer
|
|
751
|
+
end
|
|
752
|
+
|
|
753
|
+
class VisitResult
|
|
754
|
+
# Result of a visitor callback.
|
|
755
|
+
#
|
|
756
|
+
# Allows visitors to control the conversion flow by either proceeding
|
|
757
|
+
# with default behavior, providing custom output, skipping elements,
|
|
758
|
+
# preserving HTML, or signaling errors.
|
|
759
|
+
|
|
760
|
+
Continue: Integer
|
|
761
|
+
Custom: Integer
|
|
762
|
+
Skip: Integer
|
|
763
|
+
PreserveHtml: Integer
|
|
764
|
+
Error: Integer
|
|
765
|
+
end
|
|
766
|
+
|
|
767
|
+
def self.convert: (String html, ?ConversionOptions options, ?String visitor) -> ConversionResult
|
|
768
|
+
|
|
769
|
+
end
|