@xberg-io/html-to-markdown-wasm 0.0.1 → 3.8.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +250 -2
- package/package.json +34 -4
- package/pkg/bundler/LICENSE +21 -0
- package/pkg/bundler/README.md +251 -0
- package/pkg/bundler/html_to_markdown_wasm.d.ts +999 -0
- package/pkg/bundler/html_to_markdown_wasm.js +9 -0
- package/pkg/bundler/html_to_markdown_wasm_bg.js +6227 -0
- package/pkg/bundler/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/bundler/html_to_markdown_wasm_bg.wasm.d.ts +466 -0
- package/pkg/bundler/package.json +28 -0
- package/pkg/deno/LICENSE +21 -0
- package/pkg/deno/README.md +251 -0
- package/pkg/deno/html_to_markdown_wasm.d.ts +999 -0
- package/pkg/deno/html_to_markdown_wasm.js +6219 -0
- package/pkg/deno/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/deno/html_to_markdown_wasm_bg.wasm.d.ts +466 -0
- package/pkg/nodejs/LICENSE +21 -0
- package/pkg/nodejs/README.md +251 -0
- package/pkg/nodejs/html_to_markdown_wasm.d.ts +999 -0
- package/pkg/nodejs/html_to_markdown_wasm.js +6274 -0
- package/pkg/nodejs/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/nodejs/html_to_markdown_wasm_bg.wasm.d.ts +466 -0
- package/pkg/nodejs/package.json +22 -0
- package/pkg/web/LICENSE +21 -0
- package/pkg/web/README.md +251 -0
- package/pkg/web/html_to_markdown_wasm.d.ts +1490 -0
- package/pkg/web/html_to_markdown_wasm.js +6327 -0
- package/pkg/web/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/web/html_to_markdown_wasm_bg.wasm.d.ts +466 -0
- package/pkg/web/package.json +26 -0
|
@@ -0,0 +1,999 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* The type of an inline text annotation.
|
|
6
|
+
*
|
|
7
|
+
* Uses internally tagged representation (`"annotation_type": "bold"`) for JSON serialization.
|
|
8
|
+
*/
|
|
9
|
+
export class WasmAnnotationKind {
|
|
10
|
+
free(): void;
|
|
11
|
+
[Symbol.dispose](): void;
|
|
12
|
+
static default(): WasmAnnotationKind;
|
|
13
|
+
constructor();
|
|
14
|
+
annotationType: string;
|
|
15
|
+
get title(): string | undefined;
|
|
16
|
+
set title(value: string | null | undefined);
|
|
17
|
+
get url(): string | undefined;
|
|
18
|
+
set url(value: string | null | undefined);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Code block fence style in Markdown output.
|
|
23
|
+
*
|
|
24
|
+
* Determines how code blocks (`<pre><code>`) are rendered in Markdown.
|
|
25
|
+
*/
|
|
26
|
+
export enum WasmCodeBlockStyle {
|
|
27
|
+
Indented = 0,
|
|
28
|
+
Backticks = 1,
|
|
29
|
+
Tildes = 2,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Main conversion options for HTML to Markdown conversion.
|
|
34
|
+
*
|
|
35
|
+
* Use `ConversionOptions.builder()` to construct, or `Default.default()` for defaults.
|
|
36
|
+
*
|
|
37
|
+
* # Example
|
|
38
|
+
*/
|
|
39
|
+
export class WasmConversionOptions {
|
|
40
|
+
free(): void;
|
|
41
|
+
[Symbol.dispose](): void;
|
|
42
|
+
static default(): WasmConversionOptions;
|
|
43
|
+
constructor(headingStyle?: WasmHeadingStyle | null, listIndentType?: WasmListIndentType | null, listIndentWidth?: number | null, bullets?: string | null, strongEmSymbol?: string | null, escapeAsterisks?: boolean | null, escapeUnderscores?: boolean | null, escapeMisc?: boolean | null, escapeAscii?: boolean | null, codeLanguage?: string | null, autolinks?: boolean | null, defaultTitle?: boolean | null, brInTables?: boolean | null, compactTables?: boolean | null, highlightStyle?: WasmHighlightStyle | null, extractMetadata?: boolean | null, whitespaceMode?: WasmWhitespaceMode | null, stripNewlines?: boolean | null, wrap?: boolean | null, wrapWidth?: number | null, convertAsInline?: boolean | null, subSymbol?: string | null, supSymbol?: string | null, newlineStyle?: WasmNewlineStyle | null, codeBlockStyle?: WasmCodeBlockStyle | null, keepInlineImagesIn?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, stripTags?: string[] | null, preserveTags?: string[] | null, skipImages?: boolean | null, urlEscapeStyle?: WasmUrlEscapeStyle | null, linkStyle?: WasmLinkStyle | null, outputFormat?: WasmOutputFormat | null, includeDocumentStructure?: boolean | null, extractImages?: boolean | null, maxImageSize?: bigint | null, captureSvg?: boolean | null, inferDimensions?: boolean | null, excludeSelectors?: string[] | null, tierStrategy?: WasmTierStrategy | null, maxDepth?: number | null);
|
|
44
|
+
autolinks: boolean;
|
|
45
|
+
brInTables: boolean;
|
|
46
|
+
bullets: string;
|
|
47
|
+
captureSvg: boolean;
|
|
48
|
+
get codeBlockStyle(): string;
|
|
49
|
+
set codeBlockStyle(value: WasmCodeBlockStyle);
|
|
50
|
+
codeLanguage: string;
|
|
51
|
+
compactTables: boolean;
|
|
52
|
+
convertAsInline: boolean;
|
|
53
|
+
debug: boolean;
|
|
54
|
+
defaultTitle: boolean;
|
|
55
|
+
encoding: string;
|
|
56
|
+
escapeAscii: boolean;
|
|
57
|
+
escapeAsterisks: boolean;
|
|
58
|
+
escapeMisc: boolean;
|
|
59
|
+
escapeUnderscores: boolean;
|
|
60
|
+
excludeSelectors: string[];
|
|
61
|
+
extractImages: boolean;
|
|
62
|
+
extractMetadata: boolean;
|
|
63
|
+
get headingStyle(): string;
|
|
64
|
+
set headingStyle(value: WasmHeadingStyle);
|
|
65
|
+
get highlightStyle(): string;
|
|
66
|
+
set highlightStyle(value: WasmHighlightStyle);
|
|
67
|
+
includeDocumentStructure: boolean;
|
|
68
|
+
inferDimensions: boolean;
|
|
69
|
+
keepInlineImagesIn: string[];
|
|
70
|
+
get linkStyle(): string;
|
|
71
|
+
set linkStyle(value: WasmLinkStyle);
|
|
72
|
+
get listIndentType(): string;
|
|
73
|
+
set listIndentType(value: WasmListIndentType);
|
|
74
|
+
listIndentWidth: number;
|
|
75
|
+
get maxDepth(): number | undefined;
|
|
76
|
+
set maxDepth(value: number | null | undefined);
|
|
77
|
+
maxImageSize: bigint;
|
|
78
|
+
get newlineStyle(): string;
|
|
79
|
+
set newlineStyle(value: WasmNewlineStyle);
|
|
80
|
+
get outputFormat(): string;
|
|
81
|
+
set outputFormat(value: WasmOutputFormat);
|
|
82
|
+
preprocessing: WasmPreprocessingOptions;
|
|
83
|
+
preserveTags: string[];
|
|
84
|
+
skipImages: boolean;
|
|
85
|
+
stripNewlines: boolean;
|
|
86
|
+
stripTags: string[];
|
|
87
|
+
strongEmSymbol: string;
|
|
88
|
+
subSymbol: string;
|
|
89
|
+
supSymbol: string;
|
|
90
|
+
get tierStrategy(): string;
|
|
91
|
+
set tierStrategy(value: WasmTierStrategy);
|
|
92
|
+
get urlEscapeStyle(): string;
|
|
93
|
+
set urlEscapeStyle(value: WasmUrlEscapeStyle);
|
|
94
|
+
get visitor(): WasmVisitorHandle | undefined;
|
|
95
|
+
set visitor(value: WasmVisitorHandle | null | undefined);
|
|
96
|
+
get whitespaceMode(): string;
|
|
97
|
+
set whitespaceMode(value: WasmWhitespaceMode);
|
|
98
|
+
wrap: boolean;
|
|
99
|
+
wrapWidth: number;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Partial update for `ConversionOptions`.
|
|
104
|
+
*
|
|
105
|
+
* Uses `Option<T>` fields for selective updates. Bindings use this to construct
|
|
106
|
+
* options from language-native types. Prefer `ConversionOptionsBuilder` for Rust code.
|
|
107
|
+
*/
|
|
108
|
+
export class WasmConversionOptionsUpdate {
|
|
109
|
+
free(): void;
|
|
110
|
+
[Symbol.dispose](): void;
|
|
111
|
+
static default(): WasmConversionOptionsUpdate;
|
|
112
|
+
constructor(headingStyle?: WasmHeadingStyle | null, listIndentType?: WasmListIndentType | null, listIndentWidth?: number | null, bullets?: string | null, strongEmSymbol?: string | null, escapeAsterisks?: boolean | null, escapeUnderscores?: boolean | null, escapeMisc?: boolean | null, escapeAscii?: boolean | null, codeLanguage?: string | null, autolinks?: boolean | null, defaultTitle?: boolean | null, brInTables?: boolean | null, compactTables?: boolean | null, highlightStyle?: WasmHighlightStyle | null, extractMetadata?: boolean | null, whitespaceMode?: WasmWhitespaceMode | null, stripNewlines?: boolean | null, wrap?: boolean | null, wrapWidth?: number | null, convertAsInline?: boolean | null, subSymbol?: string | null, supSymbol?: string | null, newlineStyle?: WasmNewlineStyle | null, codeBlockStyle?: WasmCodeBlockStyle | null, keepInlineImagesIn?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, stripTags?: string[] | null, preserveTags?: string[] | null, skipImages?: boolean | null, urlEscapeStyle?: WasmUrlEscapeStyle | null, linkStyle?: WasmLinkStyle | null, outputFormat?: WasmOutputFormat | null, includeDocumentStructure?: boolean | null, extractImages?: boolean | null, maxImageSize?: bigint | null, captureSvg?: boolean | null, inferDimensions?: boolean | null, maxDepth?: number | null, excludeSelectors?: string[] | null, tierStrategy?: WasmTierStrategy | null);
|
|
113
|
+
get autolinks(): boolean | undefined;
|
|
114
|
+
set autolinks(value: boolean | null | undefined);
|
|
115
|
+
get brInTables(): boolean | undefined;
|
|
116
|
+
set brInTables(value: boolean | null | undefined);
|
|
117
|
+
get bullets(): string | undefined;
|
|
118
|
+
set bullets(value: string | null | undefined);
|
|
119
|
+
get captureSvg(): boolean | undefined;
|
|
120
|
+
set captureSvg(value: boolean | null | undefined);
|
|
121
|
+
get codeBlockStyle(): string | undefined;
|
|
122
|
+
set codeBlockStyle(value: WasmCodeBlockStyle | null | undefined);
|
|
123
|
+
get codeLanguage(): string | undefined;
|
|
124
|
+
set codeLanguage(value: string | null | undefined);
|
|
125
|
+
get compactTables(): boolean | undefined;
|
|
126
|
+
set compactTables(value: boolean | null | undefined);
|
|
127
|
+
get convertAsInline(): boolean | undefined;
|
|
128
|
+
set convertAsInline(value: boolean | null | undefined);
|
|
129
|
+
get debug(): boolean | undefined;
|
|
130
|
+
set debug(value: boolean | null | undefined);
|
|
131
|
+
get defaultTitle(): boolean | undefined;
|
|
132
|
+
set defaultTitle(value: boolean | null | undefined);
|
|
133
|
+
get encoding(): string | undefined;
|
|
134
|
+
set encoding(value: string | null | undefined);
|
|
135
|
+
get escapeAscii(): boolean | undefined;
|
|
136
|
+
set escapeAscii(value: boolean | null | undefined);
|
|
137
|
+
get escapeAsterisks(): boolean | undefined;
|
|
138
|
+
set escapeAsterisks(value: boolean | null | undefined);
|
|
139
|
+
get escapeMisc(): boolean | undefined;
|
|
140
|
+
set escapeMisc(value: boolean | null | undefined);
|
|
141
|
+
get escapeUnderscores(): boolean | undefined;
|
|
142
|
+
set escapeUnderscores(value: boolean | null | undefined);
|
|
143
|
+
get excludeSelectors(): string[] | undefined;
|
|
144
|
+
set excludeSelectors(value: string[] | null | undefined);
|
|
145
|
+
get extractImages(): boolean | undefined;
|
|
146
|
+
set extractImages(value: boolean | null | undefined);
|
|
147
|
+
get extractMetadata(): boolean | undefined;
|
|
148
|
+
set extractMetadata(value: boolean | null | undefined);
|
|
149
|
+
get headingStyle(): string | undefined;
|
|
150
|
+
set headingStyle(value: WasmHeadingStyle | null | undefined);
|
|
151
|
+
get highlightStyle(): string | undefined;
|
|
152
|
+
set highlightStyle(value: WasmHighlightStyle | null | undefined);
|
|
153
|
+
get includeDocumentStructure(): boolean | undefined;
|
|
154
|
+
set includeDocumentStructure(value: boolean | null | undefined);
|
|
155
|
+
get inferDimensions(): boolean | undefined;
|
|
156
|
+
set inferDimensions(value: boolean | null | undefined);
|
|
157
|
+
get keepInlineImagesIn(): string[] | undefined;
|
|
158
|
+
set keepInlineImagesIn(value: string[] | null | undefined);
|
|
159
|
+
get linkStyle(): string | undefined;
|
|
160
|
+
set linkStyle(value: WasmLinkStyle | null | undefined);
|
|
161
|
+
get listIndentType(): string | undefined;
|
|
162
|
+
set listIndentType(value: WasmListIndentType | null | undefined);
|
|
163
|
+
get listIndentWidth(): number | undefined;
|
|
164
|
+
set listIndentWidth(value: number | null | undefined);
|
|
165
|
+
get maxDepth(): number | undefined;
|
|
166
|
+
set maxDepth(value: number | null | undefined);
|
|
167
|
+
get maxImageSize(): bigint | undefined;
|
|
168
|
+
set maxImageSize(value: bigint | null | undefined);
|
|
169
|
+
get newlineStyle(): string | undefined;
|
|
170
|
+
set newlineStyle(value: WasmNewlineStyle | null | undefined);
|
|
171
|
+
get outputFormat(): string | undefined;
|
|
172
|
+
set outputFormat(value: WasmOutputFormat | null | undefined);
|
|
173
|
+
get preprocessing(): WasmPreprocessingOptionsUpdate | undefined;
|
|
174
|
+
set preprocessing(value: WasmPreprocessingOptionsUpdate | null | undefined);
|
|
175
|
+
get preserveTags(): string[] | undefined;
|
|
176
|
+
set preserveTags(value: string[] | null | undefined);
|
|
177
|
+
get skipImages(): boolean | undefined;
|
|
178
|
+
set skipImages(value: boolean | null | undefined);
|
|
179
|
+
get stripNewlines(): boolean | undefined;
|
|
180
|
+
set stripNewlines(value: boolean | null | undefined);
|
|
181
|
+
get stripTags(): string[] | undefined;
|
|
182
|
+
set stripTags(value: string[] | null | undefined);
|
|
183
|
+
get strongEmSymbol(): string | undefined;
|
|
184
|
+
set strongEmSymbol(value: string | null | undefined);
|
|
185
|
+
get subSymbol(): string | undefined;
|
|
186
|
+
set subSymbol(value: string | null | undefined);
|
|
187
|
+
get supSymbol(): string | undefined;
|
|
188
|
+
set supSymbol(value: string | null | undefined);
|
|
189
|
+
get tierStrategy(): string | undefined;
|
|
190
|
+
set tierStrategy(value: WasmTierStrategy | null | undefined);
|
|
191
|
+
get urlEscapeStyle(): string | undefined;
|
|
192
|
+
set urlEscapeStyle(value: WasmUrlEscapeStyle | null | undefined);
|
|
193
|
+
get visitor(): WasmVisitorHandle | undefined;
|
|
194
|
+
set visitor(value: WasmVisitorHandle | null | undefined);
|
|
195
|
+
get whitespaceMode(): string | undefined;
|
|
196
|
+
set whitespaceMode(value: WasmWhitespaceMode | null | undefined);
|
|
197
|
+
get wrap(): boolean | undefined;
|
|
198
|
+
set wrap(value: boolean | null | undefined);
|
|
199
|
+
get wrapWidth(): number | undefined;
|
|
200
|
+
set wrapWidth(value: number | null | undefined);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* The primary result of HTML conversion and extraction.
|
|
205
|
+
*
|
|
206
|
+
* Contains the converted text output, optional structured document tree,
|
|
207
|
+
* metadata, extracted tables, images, and processing warnings.
|
|
208
|
+
*
|
|
209
|
+
* # Example
|
|
210
|
+
*
|
|
211
|
+
* ```text
|
|
212
|
+
* use html_to_markdown_rs::{convert, ConversionOptions};
|
|
213
|
+
*
|
|
214
|
+
* let result = convert("<h1>Hello</h1><p>World</p>", None)?;
|
|
215
|
+
* assert!(result.content.is_some());
|
|
216
|
+
* assert!(result.warnings.is_empty());
|
|
217
|
+
* ```
|
|
218
|
+
*/
|
|
219
|
+
export class WasmConversionResult {
|
|
220
|
+
free(): void;
|
|
221
|
+
[Symbol.dispose](): void;
|
|
222
|
+
static default(): WasmConversionResult;
|
|
223
|
+
constructor(tables?: WasmTableData[] | null, warnings?: WasmProcessingWarning[] | null, content?: string | null, document?: WasmDocumentStructure | null);
|
|
224
|
+
get content(): string | undefined;
|
|
225
|
+
set content(value: string | null | undefined);
|
|
226
|
+
get document(): WasmDocumentStructure | undefined;
|
|
227
|
+
set document(value: WasmDocumentStructure | null | undefined);
|
|
228
|
+
metadata: WasmHtmlMetadata;
|
|
229
|
+
tables: WasmTableData[];
|
|
230
|
+
warnings: WasmProcessingWarning[];
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Document-level metadata extracted from `<head>` and top-level elements.
|
|
235
|
+
*
|
|
236
|
+
* Contains all metadata typically used by search engines, social media platforms,
|
|
237
|
+
* and browsers for document indexing and presentation.
|
|
238
|
+
*
|
|
239
|
+
* # Examples
|
|
240
|
+
*/
|
|
241
|
+
export class WasmDocumentMetadata {
|
|
242
|
+
free(): void;
|
|
243
|
+
[Symbol.dispose](): void;
|
|
244
|
+
static default(): WasmDocumentMetadata;
|
|
245
|
+
constructor(keywords?: string[] | null, openGraph?: any | null, twitterCard?: any | null, metaTags?: any | null, title?: string | null, description?: string | null, author?: string | null, canonicalUrl?: string | null, baseHref?: string | null, language?: string | null, textDirection?: WasmTextDirection | null);
|
|
246
|
+
get author(): string | undefined;
|
|
247
|
+
set author(value: string | null | undefined);
|
|
248
|
+
get baseHref(): string | undefined;
|
|
249
|
+
set baseHref(value: string | null | undefined);
|
|
250
|
+
get canonicalUrl(): string | undefined;
|
|
251
|
+
set canonicalUrl(value: string | null | undefined);
|
|
252
|
+
get description(): string | undefined;
|
|
253
|
+
set description(value: string | null | undefined);
|
|
254
|
+
keywords: string[];
|
|
255
|
+
get language(): string | undefined;
|
|
256
|
+
set language(value: string | null | undefined);
|
|
257
|
+
metaTags: any;
|
|
258
|
+
openGraph: any;
|
|
259
|
+
get textDirection(): string | undefined;
|
|
260
|
+
set textDirection(value: WasmTextDirection | null | undefined);
|
|
261
|
+
get title(): string | undefined;
|
|
262
|
+
set title(value: string | null | undefined);
|
|
263
|
+
twitterCard: any;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* A single node in the document tree.
|
|
268
|
+
*/
|
|
269
|
+
export class WasmDocumentNode {
|
|
270
|
+
free(): void;
|
|
271
|
+
[Symbol.dispose](): void;
|
|
272
|
+
static default(): WasmDocumentNode;
|
|
273
|
+
constructor(id: string, content: any, children: Uint32Array, annotations: WasmTextAnnotation[], parent?: number | null, attributes?: any | null);
|
|
274
|
+
annotations: WasmTextAnnotation[];
|
|
275
|
+
get attributes(): any | undefined;
|
|
276
|
+
set attributes(value: any | null | undefined);
|
|
277
|
+
children: Uint32Array;
|
|
278
|
+
content: any;
|
|
279
|
+
id: string;
|
|
280
|
+
get parent(): number | undefined;
|
|
281
|
+
set parent(value: number | null | undefined);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* A structured document tree representing the semantic content of an HTML document.
|
|
286
|
+
*
|
|
287
|
+
* Uses a flat node array with index-based parent/child references for efficient traversal.
|
|
288
|
+
*/
|
|
289
|
+
export class WasmDocumentStructure {
|
|
290
|
+
free(): void;
|
|
291
|
+
[Symbol.dispose](): void;
|
|
292
|
+
static default(): WasmDocumentStructure;
|
|
293
|
+
constructor(nodes: WasmDocumentNode[], sourceFormat?: string | null);
|
|
294
|
+
nodes: WasmDocumentNode[];
|
|
295
|
+
get sourceFormat(): string | undefined;
|
|
296
|
+
set sourceFormat(value: string | null | undefined);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* A single cell in a table grid.
|
|
301
|
+
*/
|
|
302
|
+
export class WasmGridCell {
|
|
303
|
+
free(): void;
|
|
304
|
+
[Symbol.dispose](): void;
|
|
305
|
+
static default(): WasmGridCell;
|
|
306
|
+
constructor(content: string, row: number, col: number, rowSpan: number, colSpan: number, isHeader: boolean);
|
|
307
|
+
col: number;
|
|
308
|
+
colSpan: number;
|
|
309
|
+
content: string;
|
|
310
|
+
isHeader: boolean;
|
|
311
|
+
row: number;
|
|
312
|
+
rowSpan: number;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Header element metadata with hierarchy tracking.
|
|
317
|
+
*
|
|
318
|
+
* Captures heading elements (h1-h6) with their text content, identifiers,
|
|
319
|
+
* and position in the document structure.
|
|
320
|
+
*
|
|
321
|
+
* # Examples
|
|
322
|
+
*/
|
|
323
|
+
export class WasmHeaderMetadata {
|
|
324
|
+
free(): void;
|
|
325
|
+
[Symbol.dispose](): void;
|
|
326
|
+
static default(): WasmHeaderMetadata;
|
|
327
|
+
/**
|
|
328
|
+
* Validate that the header level is within valid range (1-6).
|
|
329
|
+
*
|
|
330
|
+
* # Returns
|
|
331
|
+
*
|
|
332
|
+
* `true` if level is 1-6, `false` otherwise.
|
|
333
|
+
*
|
|
334
|
+
* # Examples
|
|
335
|
+
*/
|
|
336
|
+
isValid(): boolean;
|
|
337
|
+
constructor(level: number, text: string, depth: number, htmlOffset: number, id?: string | null);
|
|
338
|
+
depth: number;
|
|
339
|
+
htmlOffset: number;
|
|
340
|
+
get id(): string | undefined;
|
|
341
|
+
set id(value: string | null | undefined);
|
|
342
|
+
level: number;
|
|
343
|
+
text: string;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Heading style options for Markdown output.
|
|
348
|
+
*
|
|
349
|
+
* Controls how headings (h1-h6) are rendered in the output Markdown.
|
|
350
|
+
*/
|
|
351
|
+
export enum WasmHeadingStyle {
|
|
352
|
+
Underlined = 0,
|
|
353
|
+
Atx = 1,
|
|
354
|
+
AtxClosed = 2,
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Highlight rendering style for `<mark>` elements.
|
|
359
|
+
*
|
|
360
|
+
* Controls how highlighted text is rendered in Markdown output.
|
|
361
|
+
*/
|
|
362
|
+
export enum WasmHighlightStyle {
|
|
363
|
+
DoubleEqual = 0,
|
|
364
|
+
Html = 1,
|
|
365
|
+
Bold = 2,
|
|
366
|
+
None = 3,
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
/**
|
|
370
|
+
* Comprehensive metadata extraction result from HTML document.
|
|
371
|
+
*
|
|
372
|
+
* Contains all extracted metadata types in a single structure,
|
|
373
|
+
* suitable for serialization and transmission across language boundaries.
|
|
374
|
+
*
|
|
375
|
+
* # Examples
|
|
376
|
+
*/
|
|
377
|
+
export class WasmHtmlMetadata {
|
|
378
|
+
free(): void;
|
|
379
|
+
[Symbol.dispose](): void;
|
|
380
|
+
static default(): WasmHtmlMetadata;
|
|
381
|
+
constructor(document?: WasmDocumentMetadata | null, headers?: WasmHeaderMetadata[] | null, links?: WasmLinkMetadata[] | null, images?: WasmImageMetadata[] | null, structuredData?: WasmStructuredData[] | null);
|
|
382
|
+
document: WasmDocumentMetadata;
|
|
383
|
+
headers: WasmHeaderMetadata[];
|
|
384
|
+
images: WasmImageMetadata[];
|
|
385
|
+
links: WasmLinkMetadata[];
|
|
386
|
+
structuredData: WasmStructuredData[];
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Image dimensions in pixels.
|
|
391
|
+
*
|
|
392
|
+
* Binding-safe replacement for `(u32, u32)` tuples, which degrade to
|
|
393
|
+
* `Vec<Vec<String>>` when sanitized for cross-language binding generation.
|
|
394
|
+
* Used by both `ImageMetadata` and
|
|
395
|
+
* `InlineImage`.
|
|
396
|
+
*/
|
|
397
|
+
export class WasmImageDimensions {
|
|
398
|
+
free(): void;
|
|
399
|
+
[Symbol.dispose](): void;
|
|
400
|
+
static default(): WasmImageDimensions;
|
|
401
|
+
constructor(width: number, height: number);
|
|
402
|
+
height: number;
|
|
403
|
+
width: number;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* Image metadata with source and dimensions.
|
|
408
|
+
*
|
|
409
|
+
* Captures `<img>` elements and inline `<svg>` elements with metadata
|
|
410
|
+
* for image analysis and optimization.
|
|
411
|
+
*
|
|
412
|
+
* # Examples
|
|
413
|
+
*/
|
|
414
|
+
export class WasmImageMetadata {
|
|
415
|
+
free(): void;
|
|
416
|
+
[Symbol.dispose](): void;
|
|
417
|
+
static default(): WasmImageMetadata;
|
|
418
|
+
constructor(src: string, imageType: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: WasmImageDimensions | null);
|
|
419
|
+
get alt(): string | undefined;
|
|
420
|
+
set alt(value: string | null | undefined);
|
|
421
|
+
attributes: any;
|
|
422
|
+
get dimensions(): WasmImageDimensions | undefined;
|
|
423
|
+
set dimensions(value: WasmImageDimensions | null | undefined);
|
|
424
|
+
get imageType(): string;
|
|
425
|
+
set imageType(value: WasmImageType);
|
|
426
|
+
src: string;
|
|
427
|
+
get title(): string | undefined;
|
|
428
|
+
set title(value: string | null | undefined);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
/**
|
|
432
|
+
* Image source classification for proper handling and processing.
|
|
433
|
+
*
|
|
434
|
+
* Determines whether an image is embedded (data URI), inline SVG, external, or relative.
|
|
435
|
+
*/
|
|
436
|
+
export enum WasmImageType {
|
|
437
|
+
DataUri = 0,
|
|
438
|
+
InlineSvg = 1,
|
|
439
|
+
External = 2,
|
|
440
|
+
Relative = 3,
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* Hyperlink metadata with categorization and attributes.
|
|
445
|
+
*
|
|
446
|
+
* Represents `<a>` elements with parsed href values, text content, and link type classification.
|
|
447
|
+
*
|
|
448
|
+
* # Examples
|
|
449
|
+
*/
|
|
450
|
+
export class WasmLinkMetadata {
|
|
451
|
+
free(): void;
|
|
452
|
+
[Symbol.dispose](): void;
|
|
453
|
+
static default(): WasmLinkMetadata;
|
|
454
|
+
constructor(href: string, text: string, linkType: WasmLinkType, rel: string[], attributes: any, title?: string | null);
|
|
455
|
+
attributes: any;
|
|
456
|
+
href: string;
|
|
457
|
+
get linkType(): string;
|
|
458
|
+
set linkType(value: WasmLinkType);
|
|
459
|
+
rel: string[];
|
|
460
|
+
text: string;
|
|
461
|
+
get title(): string | undefined;
|
|
462
|
+
set title(value: string | null | undefined);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Link rendering style in Markdown output.
|
|
467
|
+
*
|
|
468
|
+
* Controls whether links and images use inline `[text](url)` syntax or
|
|
469
|
+
* reference-style `[text][1]` syntax with definitions collected at the end.
|
|
470
|
+
*/
|
|
471
|
+
export enum WasmLinkStyle {
|
|
472
|
+
Inline = 0,
|
|
473
|
+
Reference = 1,
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
/**
|
|
477
|
+
* Link classification based on href value and document context.
|
|
478
|
+
*
|
|
479
|
+
* Used to categorize links during extraction for filtering and analysis.
|
|
480
|
+
*/
|
|
481
|
+
export enum WasmLinkType {
|
|
482
|
+
Anchor = 0,
|
|
483
|
+
Internal = 1,
|
|
484
|
+
External = 2,
|
|
485
|
+
Email = 3,
|
|
486
|
+
Phone = 4,
|
|
487
|
+
Other = 5,
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* List indentation character type.
|
|
492
|
+
*
|
|
493
|
+
* Controls whether list items are indented with spaces or tabs.
|
|
494
|
+
*/
|
|
495
|
+
export enum WasmListIndentType {
|
|
496
|
+
Spaces = 0,
|
|
497
|
+
Tabs = 1,
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* A single key-value metadata entry from `<head>` meta tags.
|
|
502
|
+
*
|
|
503
|
+
* Binding-safe replacement for `(String, String)` tuples used in
|
|
504
|
+
* `NodeContent.MetadataBlock`. Tuple pairs cannot be represented
|
|
505
|
+
* across language boundaries without lossy degradation.
|
|
506
|
+
*/
|
|
507
|
+
export class WasmMetadataEntry {
|
|
508
|
+
free(): void;
|
|
509
|
+
[Symbol.dispose](): void;
|
|
510
|
+
static default(): WasmMetadataEntry;
|
|
511
|
+
constructor(key: string, value: string);
|
|
512
|
+
key: string;
|
|
513
|
+
value: string;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
/**
|
|
517
|
+
* Line break syntax in Markdown output.
|
|
518
|
+
*
|
|
519
|
+
* Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
|
|
520
|
+
*/
|
|
521
|
+
export enum WasmNewlineStyle {
|
|
522
|
+
Spaces = 0,
|
|
523
|
+
Backslash = 1,
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
/**
|
|
527
|
+
* The semantic content type of a document node.
|
|
528
|
+
*
|
|
529
|
+
* Uses internally tagged representation (`"node_type": "heading"`) for JSON serialization.
|
|
530
|
+
*/
|
|
531
|
+
export class WasmNodeContent {
|
|
532
|
+
free(): void;
|
|
533
|
+
[Symbol.dispose](): void;
|
|
534
|
+
static default(): WasmNodeContent;
|
|
535
|
+
constructor();
|
|
536
|
+
get content(): string | undefined;
|
|
537
|
+
set content(value: string | null | undefined);
|
|
538
|
+
get definition(): string | undefined;
|
|
539
|
+
set definition(value: string | null | undefined);
|
|
540
|
+
get description(): string | undefined;
|
|
541
|
+
set description(value: string | null | undefined);
|
|
542
|
+
get entries(): WasmMetadataEntry[] | undefined;
|
|
543
|
+
set entries(value: WasmMetadataEntry[] | null | undefined);
|
|
544
|
+
get format(): string | undefined;
|
|
545
|
+
set format(value: string | null | undefined);
|
|
546
|
+
get grid(): WasmTableGrid | undefined;
|
|
547
|
+
set grid(value: WasmTableGrid | null | undefined);
|
|
548
|
+
get headingLevel(): number | undefined;
|
|
549
|
+
set headingLevel(value: number | null | undefined);
|
|
550
|
+
get headingText(): string | undefined;
|
|
551
|
+
set headingText(value: string | null | undefined);
|
|
552
|
+
get imageIndex(): number | undefined;
|
|
553
|
+
set imageIndex(value: number | null | undefined);
|
|
554
|
+
get label(): string | undefined;
|
|
555
|
+
set label(value: string | null | undefined);
|
|
556
|
+
get language(): string | undefined;
|
|
557
|
+
set language(value: string | null | undefined);
|
|
558
|
+
get level(): number | undefined;
|
|
559
|
+
set level(value: number | null | undefined);
|
|
560
|
+
nodeType: string;
|
|
561
|
+
get ordered(): boolean | undefined;
|
|
562
|
+
set ordered(value: boolean | null | undefined);
|
|
563
|
+
get src(): string | undefined;
|
|
564
|
+
set src(value: string | null | undefined);
|
|
565
|
+
get term(): string | undefined;
|
|
566
|
+
set term(value: string | null | undefined);
|
|
567
|
+
get text(): string | undefined;
|
|
568
|
+
set text(value: string | null | undefined);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Context information passed to all visitor methods.
|
|
573
|
+
*
|
|
574
|
+
* Provides comprehensive metadata about the current node being visited,
|
|
575
|
+
* including its type, tag name, position in the DOM tree, and parent context.
|
|
576
|
+
*
|
|
577
|
+
* ## Attributes
|
|
578
|
+
*
|
|
579
|
+
* Access attributes via `NodeContext.attributes`, which returns
|
|
580
|
+
* `&BTreeMap<String, String>`. When the context was built with
|
|
581
|
+
* `NodeContext.with_lazy_attributes` (the hot path inside the converter),
|
|
582
|
+
* the map is only materialized on the first call — if the visitor never reads
|
|
583
|
+
* attributes, the allocation is skipped.
|
|
584
|
+
*
|
|
585
|
+
* ## Lifetimes
|
|
586
|
+
*
|
|
587
|
+
* String fields use `Cow<'_, str>` so the converter can pass slices directly
|
|
588
|
+
* out of the parsed DOM without allocating. Visitor implementations that need
|
|
589
|
+
* to outlive the callback should call `NodeContext.into_owned`.
|
|
590
|
+
*/
|
|
591
|
+
export class WasmNodeContext {
|
|
592
|
+
free(): void;
|
|
593
|
+
[Symbol.dispose](): void;
|
|
594
|
+
/**
|
|
595
|
+
* Return a reference to the attribute map.
|
|
596
|
+
*
|
|
597
|
+
* If the context was built with `NodeContext.with_lazy_attributes`, the
|
|
598
|
+
* map is materialized on the first call and cached for subsequent calls.
|
|
599
|
+
* If this method is never called, no allocation occurs for attributes.
|
|
600
|
+
*/
|
|
601
|
+
attributes(): any;
|
|
602
|
+
static default(): WasmNodeContext;
|
|
603
|
+
/**
|
|
604
|
+
* Promote any borrowed fields into owned storage so the context can outlive `'a`.
|
|
605
|
+
*/
|
|
606
|
+
intoOwned(): WasmNodeContext;
|
|
607
|
+
constructor(nodeType: WasmNodeType, tagName: string, depth: number, indexInParent: number, isInline: boolean, parentTag?: string | null);
|
|
608
|
+
/**
|
|
609
|
+
* Construct a `NodeContext` with an owned attribute map.
|
|
610
|
+
*
|
|
611
|
+
* Use this when the caller already has materialized attributes.
|
|
612
|
+
*/
|
|
613
|
+
static withOwnedAttributes(node_type: WasmNodeType, tag_name: string, attributes: any, depth: number, index_in_parent: number, parent_tag: string | null | undefined, is_inline: boolean): WasmNodeContext;
|
|
614
|
+
depth: number;
|
|
615
|
+
indexInParent: number;
|
|
616
|
+
isInline: boolean;
|
|
617
|
+
get nodeType(): string;
|
|
618
|
+
set nodeType(value: WasmNodeType);
|
|
619
|
+
get parentTag(): string | undefined;
|
|
620
|
+
set parentTag(value: string | null | undefined);
|
|
621
|
+
tagName: string;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Node type enumeration covering all HTML element types.
|
|
626
|
+
*
|
|
627
|
+
* This enum categorizes all HTML elements that the converter recognizes,
|
|
628
|
+
* providing a coarse-grained classification for visitor dispatch.
|
|
629
|
+
*/
|
|
630
|
+
export enum WasmNodeType {
|
|
631
|
+
Text = 0,
|
|
632
|
+
Element = 1,
|
|
633
|
+
Heading = 2,
|
|
634
|
+
Paragraph = 3,
|
|
635
|
+
Div = 4,
|
|
636
|
+
Blockquote = 5,
|
|
637
|
+
Pre = 6,
|
|
638
|
+
Hr = 7,
|
|
639
|
+
List = 8,
|
|
640
|
+
ListItem = 9,
|
|
641
|
+
DefinitionList = 10,
|
|
642
|
+
DefinitionTerm = 11,
|
|
643
|
+
DefinitionDescription = 12,
|
|
644
|
+
Table = 13,
|
|
645
|
+
TableRow = 14,
|
|
646
|
+
TableCell = 15,
|
|
647
|
+
TableHeader = 16,
|
|
648
|
+
TableBody = 17,
|
|
649
|
+
TableHead = 18,
|
|
650
|
+
TableFoot = 19,
|
|
651
|
+
Link = 20,
|
|
652
|
+
Image = 21,
|
|
653
|
+
Strong = 22,
|
|
654
|
+
Em = 23,
|
|
655
|
+
Code = 24,
|
|
656
|
+
Strikethrough = 25,
|
|
657
|
+
Underline = 26,
|
|
658
|
+
Subscript = 27,
|
|
659
|
+
Superscript = 28,
|
|
660
|
+
Mark = 29,
|
|
661
|
+
Small = 30,
|
|
662
|
+
Br = 31,
|
|
663
|
+
Span = 32,
|
|
664
|
+
Article = 33,
|
|
665
|
+
Section = 34,
|
|
666
|
+
Nav = 35,
|
|
667
|
+
Aside = 36,
|
|
668
|
+
Header = 37,
|
|
669
|
+
Footer = 38,
|
|
670
|
+
Main = 39,
|
|
671
|
+
Figure = 40,
|
|
672
|
+
Figcaption = 41,
|
|
673
|
+
Time = 42,
|
|
674
|
+
Details = 43,
|
|
675
|
+
Summary = 44,
|
|
676
|
+
Form = 45,
|
|
677
|
+
Input = 46,
|
|
678
|
+
Select = 47,
|
|
679
|
+
Option = 48,
|
|
680
|
+
Button = 49,
|
|
681
|
+
Textarea = 50,
|
|
682
|
+
Label = 51,
|
|
683
|
+
Fieldset = 52,
|
|
684
|
+
Legend = 53,
|
|
685
|
+
Audio = 54,
|
|
686
|
+
Video = 55,
|
|
687
|
+
Picture = 56,
|
|
688
|
+
Source = 57,
|
|
689
|
+
Iframe = 58,
|
|
690
|
+
Svg = 59,
|
|
691
|
+
Canvas = 60,
|
|
692
|
+
Ruby = 61,
|
|
693
|
+
Rt = 62,
|
|
694
|
+
Rp = 63,
|
|
695
|
+
Abbr = 64,
|
|
696
|
+
Kbd = 65,
|
|
697
|
+
Samp = 66,
|
|
698
|
+
Var = 67,
|
|
699
|
+
Cite = 68,
|
|
700
|
+
Q = 69,
|
|
701
|
+
Del = 70,
|
|
702
|
+
Ins = 71,
|
|
703
|
+
Data = 72,
|
|
704
|
+
Meter = 73,
|
|
705
|
+
Progress = 74,
|
|
706
|
+
Output = 75,
|
|
707
|
+
Template = 76,
|
|
708
|
+
Slot = 77,
|
|
709
|
+
Html = 78,
|
|
710
|
+
Head = 79,
|
|
711
|
+
Body = 80,
|
|
712
|
+
Title = 81,
|
|
713
|
+
Meta = 82,
|
|
714
|
+
LinkTag = 83,
|
|
715
|
+
Style = 84,
|
|
716
|
+
Script = 85,
|
|
717
|
+
Base = 86,
|
|
718
|
+
Custom = 87,
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
/**
|
|
722
|
+
* Output format for conversion.
|
|
723
|
+
*
|
|
724
|
+
* Specifies the target markup language format for the conversion output.
|
|
725
|
+
*/
|
|
726
|
+
export enum WasmOutputFormat {
|
|
727
|
+
Markdown = 0,
|
|
728
|
+
Djot = 1,
|
|
729
|
+
Plain = 2,
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
/**
|
|
733
|
+
* HTML preprocessing options for document cleanup before conversion.
|
|
734
|
+
*/
|
|
735
|
+
export class WasmPreprocessingOptions {
|
|
736
|
+
free(): void;
|
|
737
|
+
[Symbol.dispose](): void;
|
|
738
|
+
static default(): WasmPreprocessingOptions;
|
|
739
|
+
constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, removeNavigation?: boolean | null, removeForms?: boolean | null);
|
|
740
|
+
enabled: boolean;
|
|
741
|
+
get preset(): string;
|
|
742
|
+
set preset(value: WasmPreprocessingPreset);
|
|
743
|
+
removeForms: boolean;
|
|
744
|
+
removeNavigation: boolean;
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
/**
|
|
748
|
+
* Partial update for `PreprocessingOptions`.
|
|
749
|
+
*
|
|
750
|
+
* This struct uses `Option<T>` to represent optional fields that can be selectively updated.
|
|
751
|
+
* Only specified fields (values) will override existing options; undefined values leave the
|
|
752
|
+
* corresponding fields unchanged when applied via `PreprocessingOptions.apply_update`.
|
|
753
|
+
*/
|
|
754
|
+
export class WasmPreprocessingOptionsUpdate {
|
|
755
|
+
free(): void;
|
|
756
|
+
[Symbol.dispose](): void;
|
|
757
|
+
static default(): WasmPreprocessingOptionsUpdate;
|
|
758
|
+
constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, removeNavigation?: boolean | null, removeForms?: boolean | null);
|
|
759
|
+
get enabled(): boolean | undefined;
|
|
760
|
+
set enabled(value: boolean | null | undefined);
|
|
761
|
+
get preset(): string | undefined;
|
|
762
|
+
set preset(value: WasmPreprocessingPreset | null | undefined);
|
|
763
|
+
get removeForms(): boolean | undefined;
|
|
764
|
+
set removeForms(value: boolean | null | undefined);
|
|
765
|
+
get removeNavigation(): boolean | undefined;
|
|
766
|
+
set removeNavigation(value: boolean | null | undefined);
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
/**
|
|
770
|
+
* HTML preprocessing aggressiveness level.
|
|
771
|
+
*
|
|
772
|
+
* Controls the extent of cleanup performed before conversion. Higher levels remove more elements.
|
|
773
|
+
*/
|
|
774
|
+
export enum WasmPreprocessingPreset {
|
|
775
|
+
Minimal = 0,
|
|
776
|
+
Standard = 1,
|
|
777
|
+
Aggressive = 2,
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
/**
|
|
781
|
+
* A non-fatal diagnostic produced during HTML conversion.
|
|
782
|
+
*
|
|
783
|
+
* Warnings indicate that conversion completed but some content may have been handled
|
|
784
|
+
* differently than expected — for example, an image that could not be extracted, a truncated
|
|
785
|
+
* input, or malformed HTML that was repaired with best-effort parsing.
|
|
786
|
+
*
|
|
787
|
+
* Conversion always succeeds (returns `ConversionResult`) even when warnings are
|
|
788
|
+
* present. Callers should inspect `warnings` and decide how to
|
|
789
|
+
* handle them based on their tolerance for partial results:
|
|
790
|
+
*
|
|
791
|
+
* - **Logging pipelines**: emit each warning at `WARN` level and continue.
|
|
792
|
+
* - **Strict pipelines**: treat any warning as a hard error by checking
|
|
793
|
+
* `result.warnings.is_empty()` before using the output.
|
|
794
|
+
*
|
|
795
|
+
* See `WarningKind` for the full taxonomy of warning categories.
|
|
796
|
+
*/
|
|
797
|
+
export class WasmProcessingWarning {
|
|
798
|
+
free(): void;
|
|
799
|
+
[Symbol.dispose](): void;
|
|
800
|
+
static default(): WasmProcessingWarning;
|
|
801
|
+
constructor(message: string, kind: WasmWarningKind);
|
|
802
|
+
get kind(): string;
|
|
803
|
+
set kind(value: WasmWarningKind);
|
|
804
|
+
message: string;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
/**
|
|
808
|
+
* Structured data block (JSON-LD, Microdata, or `RDFa`).
|
|
809
|
+
*
|
|
810
|
+
* Represents machine-readable structured data found in the document.
|
|
811
|
+
* JSON-LD blocks are collected as raw JSON strings for flexibility.
|
|
812
|
+
*
|
|
813
|
+
* # Examples
|
|
814
|
+
*/
|
|
815
|
+
export class WasmStructuredData {
|
|
816
|
+
free(): void;
|
|
817
|
+
[Symbol.dispose](): void;
|
|
818
|
+
static default(): WasmStructuredData;
|
|
819
|
+
constructor(dataType: WasmStructuredDataType, rawJson: string, schemaType?: string | null);
|
|
820
|
+
get dataType(): string;
|
|
821
|
+
set dataType(value: WasmStructuredDataType);
|
|
822
|
+
rawJson: string;
|
|
823
|
+
get schemaType(): string | undefined;
|
|
824
|
+
set schemaType(value: string | null | undefined);
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
/**
|
|
828
|
+
* Structured data format type.
|
|
829
|
+
*
|
|
830
|
+
* Identifies the schema/format used for structured data markup.
|
|
831
|
+
*/
|
|
832
|
+
export enum WasmStructuredDataType {
|
|
833
|
+
JsonLd = 0,
|
|
834
|
+
Microdata = 1,
|
|
835
|
+
RDFa = 2,
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
/**
|
|
839
|
+
* A top-level extracted table with both structured data and markdown representation.
|
|
840
|
+
*/
|
|
841
|
+
export class WasmTableData {
|
|
842
|
+
free(): void;
|
|
843
|
+
[Symbol.dispose](): void;
|
|
844
|
+
static default(): WasmTableData;
|
|
845
|
+
constructor(grid: WasmTableGrid, markdown: string);
|
|
846
|
+
grid: WasmTableGrid;
|
|
847
|
+
markdown: string;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
/**
|
|
851
|
+
* A structured table grid with cell-level data including spans.
|
|
852
|
+
*/
|
|
853
|
+
export class WasmTableGrid {
|
|
854
|
+
free(): void;
|
|
855
|
+
[Symbol.dispose](): void;
|
|
856
|
+
static default(): WasmTableGrid;
|
|
857
|
+
constructor(rows?: number | null, cols?: number | null, cells?: WasmGridCell[] | null);
|
|
858
|
+
cells: WasmGridCell[];
|
|
859
|
+
cols: number;
|
|
860
|
+
rows: number;
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
/**
|
|
864
|
+
* A styling or semantic annotation that applies to a byte range within a node's text.
|
|
865
|
+
*
|
|
866
|
+
* Unlike `DocumentNode`, which captures block-level structure (headings, paragraphs, etc.),
|
|
867
|
+
* a `TextAnnotation` describes inline-level markup — bold, italic, links, code spans, and
|
|
868
|
+
* similar — that spans a contiguous run of bytes inside `DocumentNode.content`'s text field.
|
|
869
|
+
*
|
|
870
|
+
* Byte offsets (`start`..`end`) are into the UTF-8 encoded text of the parent node. The range
|
|
871
|
+
* is half-open: `start` is inclusive and `end` is exclusive.
|
|
872
|
+
*
|
|
873
|
+
* Multiple annotations on the same node can overlap (e.g. bold-italic text), and they are
|
|
874
|
+
* stored in the order they are encountered during DOM traversal.
|
|
875
|
+
*
|
|
876
|
+
* See `AnnotationKind` for the full list of supported annotation types.
|
|
877
|
+
*/
|
|
878
|
+
export class WasmTextAnnotation {
|
|
879
|
+
free(): void;
|
|
880
|
+
[Symbol.dispose](): void;
|
|
881
|
+
static default(): WasmTextAnnotation;
|
|
882
|
+
constructor(start: number, end: number, kind: any);
|
|
883
|
+
end: number;
|
|
884
|
+
kind: any;
|
|
885
|
+
start: number;
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
/**
|
|
889
|
+
* Text directionality of document content.
|
|
890
|
+
*
|
|
891
|
+
* Corresponds to the HTML `dir` attribute and `bdi` element directionality.
|
|
892
|
+
*/
|
|
893
|
+
export enum WasmTextDirection {
|
|
894
|
+
LeftToRight = 0,
|
|
895
|
+
RightToLeft = 1,
|
|
896
|
+
Auto = 2,
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
/**
|
|
900
|
+
* Controls which conversion tier is used.
|
|
901
|
+
*/
|
|
902
|
+
export enum WasmTierStrategy {
|
|
903
|
+
Auto = 0,
|
|
904
|
+
Tier2 = 1,
|
|
905
|
+
Tier1 = 2,
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
/**
|
|
909
|
+
* URL encoding strategy for link and image destinations.
|
|
910
|
+
*
|
|
911
|
+
* Controls how special characters in URL destinations are handled when they
|
|
912
|
+
* require escaping to produce valid Markdown.
|
|
913
|
+
*
|
|
914
|
+
* The `Angle` variant (default) wraps the destination in angle brackets:
|
|
915
|
+
* `[text](<url with spaces>)`. This is the CommonMark-specified escape hatch
|
|
916
|
+
* but breaks when the URL itself contains `>`.
|
|
917
|
+
*
|
|
918
|
+
* The `Percent` variant percent-encodes every character that is not an RFC 3986
|
|
919
|
+
* unreserved character or `/`, producing a destination safe for all Markdown
|
|
920
|
+
* parsers: `[text](url%20with%20spaces)`.
|
|
921
|
+
*/
|
|
922
|
+
export enum WasmUrlEscapeStyle {
|
|
923
|
+
Angle = 0,
|
|
924
|
+
Percent = 1,
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
/**
|
|
928
|
+
* Result of a visitor callback.
|
|
929
|
+
*
|
|
930
|
+
* Allows visitors to control the conversion flow by either proceeding
|
|
931
|
+
* with default behavior, providing custom output, skipping elements,
|
|
932
|
+
* preserving HTML, or signaling errors.
|
|
933
|
+
*/
|
|
934
|
+
export enum WasmVisitResult {
|
|
935
|
+
Continue = 0,
|
|
936
|
+
Custom = 1,
|
|
937
|
+
Skip = 2,
|
|
938
|
+
PreserveHtml = 3,
|
|
939
|
+
Error = 4,
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
/**
|
|
943
|
+
* Shareable, thread-safe handle to a user-provided HTML visitor implementation.
|
|
944
|
+
*
|
|
945
|
+
* Pass an instance wrapped in this handle to `ConversionOptions` to
|
|
946
|
+
* customise how the HTML document is traversed and converted to Markdown.
|
|
947
|
+
* The handle may be cloned and shared across threads without additional
|
|
948
|
+
* synchronisation on the caller's side.
|
|
949
|
+
*/
|
|
950
|
+
export class WasmVisitorHandle {
|
|
951
|
+
free(): void;
|
|
952
|
+
[Symbol.dispose](): void;
|
|
953
|
+
constructor(visitor: any);
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
/**
|
|
957
|
+
* Categories of processing warnings.
|
|
958
|
+
*/
|
|
959
|
+
export enum WasmWarningKind {
|
|
960
|
+
ImageExtractionFailed = 0,
|
|
961
|
+
EncodingFallback = 1,
|
|
962
|
+
TruncatedInput = 2,
|
|
963
|
+
MalformedHtml = 3,
|
|
964
|
+
SanitizationApplied = 4,
|
|
965
|
+
DepthLimitExceeded = 5,
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
/**
|
|
969
|
+
* Whitespace handling strategy during conversion.
|
|
970
|
+
*
|
|
971
|
+
* Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
|
|
972
|
+
*/
|
|
973
|
+
export enum WasmWhitespaceMode {
|
|
974
|
+
Normalized = 0,
|
|
975
|
+
Strict = 1,
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
/**
|
|
979
|
+
* Convert HTML to Markdown, Djot, or plain text.
|
|
980
|
+
*
|
|
981
|
+
* Returns a `ConversionResult` with converted content plus optional metadata,
|
|
982
|
+
* document structure, table data, inline images, and warnings depending on the
|
|
983
|
+
* enabled features and conversion options.
|
|
984
|
+
*
|
|
985
|
+
* # Arguments
|
|
986
|
+
*
|
|
987
|
+
* * `html` — the HTML string to convert.
|
|
988
|
+
* * `options` — conversion options. Rust accepts bare `ConversionOptions`,
|
|
989
|
+
* `Some(options)`, or `None`. Language bindings expose the same option
|
|
990
|
+
* fields through native constructors or optional parameters.
|
|
991
|
+
*
|
|
992
|
+
* # Example
|
|
993
|
+
*
|
|
994
|
+
*
|
|
995
|
+
* # Errors
|
|
996
|
+
*
|
|
997
|
+
* Returns an error if HTML parsing fails or if the input contains invalid UTF-8.
|
|
998
|
+
*/
|
|
999
|
+
export function convert(html: string, options?: WasmConversionOptions | null): WasmConversionResult;
|