@kreuzberg/html-to-markdown-wasm 3.4.0-rc.9 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1055 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ /**
5
+ * The type of an inline text annotation.
6
+ *
7
+ * Uses internally tagged representation (`"annotation_type": "bold"`) for JSON serialization.
8
+ */
9
+ export enum WasmAnnotationKind {
10
+ Bold = 0,
11
+ Italic = 1,
12
+ Underline = 2,
13
+ Strikethrough = 3,
14
+ Code = 4,
15
+ Subscript = 5,
16
+ Superscript = 6,
17
+ Highlight = 7,
18
+ Link = 8,
19
+ }
20
+
21
+ /**
22
+ * Code block fence style in Markdown output.
23
+ *
24
+ * Determines how code blocks (`<pre><code>`) are rendered in Markdown.
25
+ */
26
+ export enum WasmCodeBlockStyle {
27
+ Indented = 0,
28
+ Backticks = 1,
29
+ Tildes = 2,
30
+ }
31
+
32
+ /**
33
+ * Main conversion options for HTML to Markdown conversion.
34
+ *
35
+ * Use [`ConversionOptions::builder()`] to construct, or [`Default::default()`] for defaults.
36
+ *
37
+ * # Example
38
+ *
39
+ * ```text
40
+ * use html_to_markdown_rs::ConversionOptions;
41
+ *
42
+ * let options = ConversionOptions::builder()
43
+ * .heading_style(HeadingStyle::Atx)
44
+ * .wrap(true)
45
+ * .wrap_width(100)
46
+ * .build();
47
+ * ```
48
+ */
49
+ export class WasmConversionOptions {
50
+ free(): void;
51
+ [Symbol.dispose](): void;
52
+ /**
53
+ * Apply a partial update to these conversion options.
54
+ */
55
+ applyUpdate(update: WasmConversionOptionsUpdate): void;
56
+ /**
57
+ * Create a new builder with default values.
58
+ */
59
+ static builder(): WasmConversionOptionsBuilder;
60
+ static default(): WasmConversionOptions;
61
+ static from(update: WasmConversionOptionsUpdate): WasmConversionOptions;
62
+ /**
63
+ * Create from a partial update, applying to defaults.
64
+ */
65
+ static fromUpdate(update: WasmConversionOptionsUpdate): WasmConversionOptions;
66
+ constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, exclude_selectors?: string[] | null, max_depth?: number | null);
67
+ autolinks: boolean;
68
+ brInTables: boolean;
69
+ bullets: string;
70
+ captureSvg: boolean;
71
+ get codeBlockStyle(): string;
72
+ set codeBlockStyle(value: WasmCodeBlockStyle);
73
+ codeLanguage: string;
74
+ convertAsInline: boolean;
75
+ debug: boolean;
76
+ defaultTitle: boolean;
77
+ encoding: string;
78
+ escapeAscii: boolean;
79
+ escapeAsterisks: boolean;
80
+ escapeMisc: boolean;
81
+ escapeUnderscores: boolean;
82
+ excludeSelectors: string[];
83
+ extractImages: boolean;
84
+ extractMetadata: boolean;
85
+ get headingStyle(): string;
86
+ set headingStyle(value: WasmHeadingStyle);
87
+ get highlightStyle(): string;
88
+ set highlightStyle(value: WasmHighlightStyle);
89
+ includeDocumentStructure: boolean;
90
+ inferDimensions: boolean;
91
+ keepInlineImagesIn: string[];
92
+ get linkStyle(): string;
93
+ set linkStyle(value: WasmLinkStyle);
94
+ get listIndentType(): string;
95
+ set listIndentType(value: WasmListIndentType);
96
+ listIndentWidth: number;
97
+ get maxDepth(): number | undefined;
98
+ set maxDepth(value: number | null | undefined);
99
+ maxImageSize: bigint;
100
+ get newlineStyle(): string;
101
+ set newlineStyle(value: WasmNewlineStyle);
102
+ get outputFormat(): string;
103
+ set outputFormat(value: WasmOutputFormat);
104
+ preprocessing: WasmPreprocessingOptions;
105
+ preserveTags: string[];
106
+ skipImages: boolean;
107
+ stripNewlines: boolean;
108
+ stripTags: string[];
109
+ strongEmSymbol: string;
110
+ subSymbol: string;
111
+ supSymbol: string;
112
+ get visitor(): WasmVisitorHandle | undefined;
113
+ set visitor(value: WasmVisitorHandle | null | undefined);
114
+ get whitespaceMode(): string;
115
+ set whitespaceMode(value: WasmWhitespaceMode);
116
+ wrap: boolean;
117
+ wrapWidth: number;
118
+ }
119
+
120
+ /**
121
+ * Builder for [`ConversionOptions`].
122
+ *
123
+ * All fields start with default values. Call `.build()` to produce the final options.
124
+ */
125
+ export class WasmConversionOptionsBuilder {
126
+ private constructor();
127
+ free(): void;
128
+ [Symbol.dispose](): void;
129
+ /**
130
+ * Build the final [`ConversionOptions`].
131
+ */
132
+ build(): WasmConversionOptions;
133
+ /**
134
+ * Set the list of CSS selectors for elements to exclude entirely from output.
135
+ */
136
+ excludeSelectors(selectors: string[]): WasmConversionOptionsBuilder;
137
+ /**
138
+ * Set the list of HTML tag names whose `<img>` children are kept inline.
139
+ */
140
+ keepInlineImagesIn(tags: string[]): WasmConversionOptionsBuilder;
141
+ /**
142
+ * Set the pre-processing options applied to the HTML before conversion.
143
+ */
144
+ preprocessing(preprocessing: WasmPreprocessingOptions): WasmConversionOptionsBuilder;
145
+ /**
146
+ * Set the list of HTML tag names that are preserved verbatim in output.
147
+ */
148
+ preserveTags(tags: string[]): WasmConversionOptionsBuilder;
149
+ /**
150
+ * Set the list of HTML tag names whose content is stripped from output.
151
+ */
152
+ stripTags(tags: string[]): WasmConversionOptionsBuilder;
153
+ /**
154
+ * Set the visitor used during conversion.
155
+ */
156
+ visitor(visitor?: WasmVisitorHandle | null): WasmConversionOptionsBuilder;
157
+ }
158
+
159
+ /**
160
+ * Partial update for `ConversionOptions`.
161
+ *
162
+ * Uses `Option<T>` fields for selective updates. Bindings use this to construct
163
+ * options from language-native types. Prefer [`ConversionOptionsBuilder`] for Rust code.
164
+ */
165
+ export class WasmConversionOptionsUpdate {
166
+ free(): void;
167
+ [Symbol.dispose](): void;
168
+ static default(): WasmConversionOptionsUpdate;
169
+ constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, max_depth?: number | null, exclude_selectors?: string[] | null);
170
+ get autolinks(): boolean | undefined;
171
+ set autolinks(value: boolean | null | undefined);
172
+ get brInTables(): boolean | undefined;
173
+ set brInTables(value: boolean | null | undefined);
174
+ get bullets(): string | undefined;
175
+ set bullets(value: string | null | undefined);
176
+ get captureSvg(): boolean | undefined;
177
+ set captureSvg(value: boolean | null | undefined);
178
+ get codeBlockStyle(): string | undefined;
179
+ set codeBlockStyle(value: WasmCodeBlockStyle | null | undefined);
180
+ get codeLanguage(): string | undefined;
181
+ set codeLanguage(value: string | null | undefined);
182
+ get convertAsInline(): boolean | undefined;
183
+ set convertAsInline(value: boolean | null | undefined);
184
+ get debug(): boolean | undefined;
185
+ set debug(value: boolean | null | undefined);
186
+ get defaultTitle(): boolean | undefined;
187
+ set defaultTitle(value: boolean | null | undefined);
188
+ get encoding(): string | undefined;
189
+ set encoding(value: string | null | undefined);
190
+ get escapeAscii(): boolean | undefined;
191
+ set escapeAscii(value: boolean | null | undefined);
192
+ get escapeAsterisks(): boolean | undefined;
193
+ set escapeAsterisks(value: boolean | null | undefined);
194
+ get escapeMisc(): boolean | undefined;
195
+ set escapeMisc(value: boolean | null | undefined);
196
+ get escapeUnderscores(): boolean | undefined;
197
+ set escapeUnderscores(value: boolean | null | undefined);
198
+ get excludeSelectors(): string[] | undefined;
199
+ set excludeSelectors(value: string[] | null | undefined);
200
+ get extractImages(): boolean | undefined;
201
+ set extractImages(value: boolean | null | undefined);
202
+ get extractMetadata(): boolean | undefined;
203
+ set extractMetadata(value: boolean | null | undefined);
204
+ get headingStyle(): string | undefined;
205
+ set headingStyle(value: WasmHeadingStyle | null | undefined);
206
+ get highlightStyle(): string | undefined;
207
+ set highlightStyle(value: WasmHighlightStyle | null | undefined);
208
+ get includeDocumentStructure(): boolean | undefined;
209
+ set includeDocumentStructure(value: boolean | null | undefined);
210
+ get inferDimensions(): boolean | undefined;
211
+ set inferDimensions(value: boolean | null | undefined);
212
+ get keepInlineImagesIn(): string[] | undefined;
213
+ set keepInlineImagesIn(value: string[] | null | undefined);
214
+ get linkStyle(): string | undefined;
215
+ set linkStyle(value: WasmLinkStyle | null | undefined);
216
+ get listIndentType(): string | undefined;
217
+ set listIndentType(value: WasmListIndentType | null | undefined);
218
+ get listIndentWidth(): number | undefined;
219
+ set listIndentWidth(value: number | null | undefined);
220
+ get maxDepth(): number | undefined;
221
+ set maxDepth(value: number | null | undefined);
222
+ get maxImageSize(): bigint | undefined;
223
+ set maxImageSize(value: bigint | null | undefined);
224
+ get newlineStyle(): string | undefined;
225
+ set newlineStyle(value: WasmNewlineStyle | null | undefined);
226
+ get outputFormat(): string | undefined;
227
+ set outputFormat(value: WasmOutputFormat | null | undefined);
228
+ get preprocessing(): WasmPreprocessingOptionsUpdate | undefined;
229
+ set preprocessing(value: WasmPreprocessingOptionsUpdate | null | undefined);
230
+ get preserveTags(): string[] | undefined;
231
+ set preserveTags(value: string[] | null | undefined);
232
+ get skipImages(): boolean | undefined;
233
+ set skipImages(value: boolean | null | undefined);
234
+ get stripNewlines(): boolean | undefined;
235
+ set stripNewlines(value: boolean | null | undefined);
236
+ get stripTags(): string[] | undefined;
237
+ set stripTags(value: string[] | null | undefined);
238
+ get strongEmSymbol(): string | undefined;
239
+ set strongEmSymbol(value: string | null | undefined);
240
+ get subSymbol(): string | undefined;
241
+ set subSymbol(value: string | null | undefined);
242
+ get supSymbol(): string | undefined;
243
+ set supSymbol(value: string | null | undefined);
244
+ get visitor(): WasmVisitorHandle | undefined;
245
+ set visitor(value: WasmVisitorHandle | null | undefined);
246
+ get whitespaceMode(): string | undefined;
247
+ set whitespaceMode(value: WasmWhitespaceMode | null | undefined);
248
+ get wrap(): boolean | undefined;
249
+ set wrap(value: boolean | null | undefined);
250
+ get wrapWidth(): number | undefined;
251
+ set wrapWidth(value: number | null | undefined);
252
+ }
253
+
254
+ /**
255
+ * The primary result of HTML conversion and extraction.
256
+ *
257
+ * Contains the converted text output, optional structured document tree,
258
+ * metadata, extracted tables, images, and processing warnings.
259
+ *
260
+ * # Example
261
+ *
262
+ * ```text
263
+ * use html_to_markdown_rs::{convert, ConversionOptions};
264
+ *
265
+ * let result = convert("<h1>Hello</h1><p>World</p>", None)?;
266
+ * assert!(result.content.is_some());
267
+ * assert!(result.warnings.is_empty());
268
+ * ```
269
+ */
270
+ export class WasmConversionResult {
271
+ free(): void;
272
+ [Symbol.dispose](): void;
273
+ static default(): WasmConversionResult;
274
+ constructor(tables?: WasmTableData[] | null, warnings?: WasmProcessingWarning[] | null, content?: string | null, document?: WasmDocumentStructure | null);
275
+ get content(): string | undefined;
276
+ set content(value: string | null | undefined);
277
+ get document(): WasmDocumentStructure | undefined;
278
+ set document(value: WasmDocumentStructure | null | undefined);
279
+ images: string[];
280
+ metadata: WasmHtmlMetadata;
281
+ tables: WasmTableData[];
282
+ warnings: WasmProcessingWarning[];
283
+ }
284
+
285
+ /**
286
+ * Document-level metadata extracted from `<head>` and top-level elements.
287
+ *
288
+ * Contains all metadata typically used by search engines, social media platforms,
289
+ * and browsers for document indexing and presentation.
290
+ *
291
+ * # Examples
292
+ *
293
+ * ```
294
+ * let doc = DocumentMetadata {
295
+ * title: Some("My Article".to_string()),
296
+ * description: Some("A great article about Rust".to_string()),
297
+ * keywords: vec!["rust".to_string(), "programming".to_string()],
298
+ * ..Default::default()
299
+ * };
300
+ *
301
+ * assert_eq!(doc.title, Some("My Article".to_string()));
302
+ * ```
303
+ */
304
+ export class WasmDocumentMetadata {
305
+ free(): void;
306
+ [Symbol.dispose](): void;
307
+ static default(): WasmDocumentMetadata;
308
+ constructor(keywords?: string[] | null, open_graph?: any | null, twitter_card?: any | null, meta_tags?: any | null, title?: string | null, description?: string | null, author?: string | null, canonical_url?: string | null, base_href?: string | null, language?: string | null, text_direction?: WasmTextDirection | null);
309
+ get author(): string | undefined;
310
+ set author(value: string | null | undefined);
311
+ get baseHref(): string | undefined;
312
+ set baseHref(value: string | null | undefined);
313
+ get canonicalUrl(): string | undefined;
314
+ set canonicalUrl(value: string | null | undefined);
315
+ get description(): string | undefined;
316
+ set description(value: string | null | undefined);
317
+ keywords: string[];
318
+ get language(): string | undefined;
319
+ set language(value: string | null | undefined);
320
+ metaTags: any;
321
+ openGraph: any;
322
+ get textDirection(): string | undefined;
323
+ set textDirection(value: WasmTextDirection | null | undefined);
324
+ get title(): string | undefined;
325
+ set title(value: string | null | undefined);
326
+ twitterCard: any;
327
+ }
328
+
329
+ /**
330
+ * A single node in the document tree.
331
+ */
332
+ export class WasmDocumentNode {
333
+ free(): void;
334
+ [Symbol.dispose](): void;
335
+ static default(): WasmDocumentNode;
336
+ constructor(id: string, content: WasmNodeContent, children: Uint32Array, annotations: WasmTextAnnotation[], parent?: number | null, attributes?: any | null);
337
+ annotations: WasmTextAnnotation[];
338
+ get attributes(): any | undefined;
339
+ set attributes(value: any | null | undefined);
340
+ children: Uint32Array;
341
+ get content(): string;
342
+ set content(value: WasmNodeContent);
343
+ id: string;
344
+ get parent(): number | undefined;
345
+ set parent(value: number | null | undefined);
346
+ }
347
+
348
+ /**
349
+ * A structured document tree representing the semantic content of an HTML document.
350
+ *
351
+ * Uses a flat node array with index-based parent/child references for efficient traversal.
352
+ */
353
+ export class WasmDocumentStructure {
354
+ free(): void;
355
+ [Symbol.dispose](): void;
356
+ static default(): WasmDocumentStructure;
357
+ constructor(nodes: WasmDocumentNode[], source_format?: string | null);
358
+ nodes: WasmDocumentNode[];
359
+ get sourceFormat(): string | undefined;
360
+ set sourceFormat(value: string | null | undefined);
361
+ }
362
+
363
+ /**
364
+ * A single cell in a table grid.
365
+ */
366
+ export class WasmGridCell {
367
+ free(): void;
368
+ [Symbol.dispose](): void;
369
+ static default(): WasmGridCell;
370
+ constructor(content: string, row: number, col: number, row_span: number, col_span: number, is_header: boolean);
371
+ col: number;
372
+ colSpan: number;
373
+ content: string;
374
+ isHeader: boolean;
375
+ row: number;
376
+ rowSpan: number;
377
+ }
378
+
379
+ /**
380
+ * Header element metadata with hierarchy tracking.
381
+ *
382
+ * Captures heading elements (h1-h6) with their text content, identifiers,
383
+ * and position in the document structure.
384
+ *
385
+ * # Examples
386
+ *
387
+ * ```
388
+ * let header = HeaderMetadata {
389
+ * level: 1,
390
+ * text: "Main Title".to_string(),
391
+ * id: Some("main-title".to_string()),
392
+ * depth: 0,
393
+ * html_offset: 145,
394
+ * };
395
+ *
396
+ * assert_eq!(header.level, 1);
397
+ * assert!(header.is_valid());
398
+ * ```
399
+ */
400
+ export class WasmHeaderMetadata {
401
+ free(): void;
402
+ [Symbol.dispose](): void;
403
+ static default(): WasmHeaderMetadata;
404
+ /**
405
+ * Validate that the header level is within valid range (1-6).
406
+ *
407
+ * # Returns
408
+ *
409
+ * `true` if level is 1-6, `false` otherwise.
410
+ *
411
+ * # Examples
412
+ *
413
+ * ```
414
+ * let valid = HeaderMetadata {
415
+ * level: 3,
416
+ * text: "Title".to_string(),
417
+ * id: None,
418
+ * depth: 2,
419
+ * html_offset: 100,
420
+ * };
421
+ * assert!(valid.is_valid());
422
+ *
423
+ * let invalid = HeaderMetadata {
424
+ * level: 7, // Invalid
425
+ * text: "Title".to_string(),
426
+ * id: None,
427
+ * depth: 2,
428
+ * html_offset: 100,
429
+ * };
430
+ * assert!(!invalid.is_valid());
431
+ * ```
432
+ */
433
+ isValid(): boolean;
434
+ constructor(level: number, text: string, depth: number, html_offset: number, id?: string | null);
435
+ depth: number;
436
+ htmlOffset: number;
437
+ get id(): string | undefined;
438
+ set id(value: string | null | undefined);
439
+ level: number;
440
+ text: string;
441
+ }
442
+
443
+ /**
444
+ * Heading style options for Markdown output.
445
+ *
446
+ * Controls how headings (h1-h6) are rendered in the output Markdown.
447
+ */
448
+ export enum WasmHeadingStyle {
449
+ Underlined = 0,
450
+ Atx = 1,
451
+ AtxClosed = 2,
452
+ }
453
+
454
+ /**
455
+ * Highlight rendering style for `<mark>` elements.
456
+ *
457
+ * Controls how highlighted text is rendered in Markdown output.
458
+ */
459
+ export enum WasmHighlightStyle {
460
+ DoubleEqual = 0,
461
+ Html = 1,
462
+ Bold = 2,
463
+ None = 3,
464
+ }
465
+
466
+ /**
467
+ * Comprehensive metadata extraction result from HTML document.
468
+ *
469
+ * Contains all extracted metadata types in a single structure,
470
+ * suitable for serialization and transmission across language boundaries.
471
+ *
472
+ * # Examples
473
+ *
474
+ * ```
475
+ * let metadata = HtmlMetadata {
476
+ * document: Default::default(),
477
+ * headers: Vec::new(),
478
+ * links: Vec::new(),
479
+ * images: Vec::new(),
480
+ * structured_data: Vec::new(),
481
+ * };
482
+ *
483
+ * assert!(metadata.headers.is_empty());
484
+ * ```
485
+ */
486
+ export class WasmHtmlMetadata {
487
+ free(): void;
488
+ [Symbol.dispose](): void;
489
+ static default(): WasmHtmlMetadata;
490
+ constructor(document?: WasmDocumentMetadata | null, headers?: WasmHeaderMetadata[] | null, links?: WasmLinkMetadata[] | null, images?: WasmImageMetadata[] | null, structured_data?: WasmStructuredData[] | null);
491
+ document: WasmDocumentMetadata;
492
+ headers: WasmHeaderMetadata[];
493
+ images: WasmImageMetadata[];
494
+ links: WasmLinkMetadata[];
495
+ structuredData: WasmStructuredData[];
496
+ }
497
+
498
+ /**
499
+ * Image metadata with source and dimensions.
500
+ *
501
+ * Captures `<img>` elements and inline `<svg>` elements with metadata
502
+ * for image analysis and optimization.
503
+ *
504
+ * # Examples
505
+ *
506
+ * ```
507
+ * let img = ImageMetadata {
508
+ * src: "https://example.com/image.jpg".to_string(),
509
+ * alt: Some("An example image".to_string()),
510
+ * title: Some("Example".to_string()),
511
+ * dimensions: Some((800, 600)),
512
+ * image_type: ImageType::External,
513
+ * attributes: Default::default(),
514
+ * };
515
+ *
516
+ * assert_eq!(img.image_type, ImageType::External);
517
+ * ```
518
+ */
519
+ export class WasmImageMetadata {
520
+ free(): void;
521
+ [Symbol.dispose](): void;
522
+ static default(): WasmImageMetadata;
523
+ constructor(src: string, image_type: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: Uint32Array | null);
524
+ get alt(): string | undefined;
525
+ set alt(value: string | null | undefined);
526
+ attributes: any;
527
+ get dimensions(): Uint32Array | undefined;
528
+ set dimensions(value: Uint32Array | null | undefined);
529
+ get imageType(): string;
530
+ set imageType(value: WasmImageType);
531
+ src: string;
532
+ get title(): string | undefined;
533
+ set title(value: string | null | undefined);
534
+ }
535
+
536
+ /**
537
+ * Image source classification for proper handling and processing.
538
+ *
539
+ * Determines whether an image is embedded (data URI), inline SVG, external, or relative.
540
+ */
541
+ export enum WasmImageType {
542
+ DataUri = 0,
543
+ InlineSvg = 1,
544
+ External = 2,
545
+ Relative = 3,
546
+ }
547
+
548
+ /**
549
+ * Hyperlink metadata with categorization and attributes.
550
+ *
551
+ * Represents `<a>` elements with parsed href values, text content, and link type classification.
552
+ *
553
+ * # Examples
554
+ *
555
+ * ```
556
+ * let link = LinkMetadata {
557
+ * href: "https://example.com".to_string(),
558
+ * text: "Example".to_string(),
559
+ * title: Some("Visit Example".to_string()),
560
+ * link_type: LinkType::External,
561
+ * rel: vec!["nofollow".to_string()],
562
+ * attributes: Default::default(),
563
+ * };
564
+ *
565
+ * assert_eq!(link.link_type, LinkType::External);
566
+ * assert_eq!(link.text, "Example");
567
+ * ```
568
+ */
569
+ export class WasmLinkMetadata {
570
+ free(): void;
571
+ [Symbol.dispose](): void;
572
+ /**
573
+ * Classify a link based on href value.
574
+ *
575
+ * # Arguments
576
+ *
577
+ * * `href` - The href attribute value
578
+ *
579
+ * # Returns
580
+ *
581
+ * Appropriate [`LinkType`] based on protocol and content.
582
+ *
583
+ * # Examples
584
+ *
585
+ * ```
586
+ * assert_eq!(LinkMetadata::classify_link("#section"), LinkType::Anchor);
587
+ * assert_eq!(LinkMetadata::classify_link("mailto:test@example.com"), LinkType::Email);
588
+ * assert_eq!(LinkMetadata::classify_link("tel:+1234567890"), LinkType::Phone);
589
+ * assert_eq!(LinkMetadata::classify_link("https://example.com"), LinkType::External);
590
+ * ```
591
+ */
592
+ static classifyLink(href: string): WasmLinkType;
593
+ static default(): WasmLinkMetadata;
594
+ constructor(href: string, text: string, link_type: WasmLinkType, rel: string[], attributes: any, title?: string | null);
595
+ attributes: any;
596
+ href: string;
597
+ get linkType(): string;
598
+ set linkType(value: WasmLinkType);
599
+ rel: string[];
600
+ text: string;
601
+ get title(): string | undefined;
602
+ set title(value: string | null | undefined);
603
+ }
604
+
605
+ /**
606
+ * Link rendering style in Markdown output.
607
+ *
608
+ * Controls whether links and images use inline `[text](url)` syntax or
609
+ * reference-style `[text][1]` syntax with definitions collected at the end.
610
+ */
611
+ export enum WasmLinkStyle {
612
+ Inline = 0,
613
+ Reference = 1,
614
+ }
615
+
616
+ /**
617
+ * Link classification based on href value and document context.
618
+ *
619
+ * Used to categorize links during extraction for filtering and analysis.
620
+ */
621
+ export enum WasmLinkType {
622
+ Anchor = 0,
623
+ Internal = 1,
624
+ External = 2,
625
+ Email = 3,
626
+ Phone = 4,
627
+ Other = 5,
628
+ }
629
+
630
+ /**
631
+ * List indentation character type.
632
+ *
633
+ * Controls whether list items are indented with spaces or tabs.
634
+ */
635
+ export enum WasmListIndentType {
636
+ Spaces = 0,
637
+ Tabs = 1,
638
+ }
639
+
640
+ /**
641
+ * Line break syntax in Markdown output.
642
+ *
643
+ * Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
644
+ */
645
+ export enum WasmNewlineStyle {
646
+ Spaces = 0,
647
+ Backslash = 1,
648
+ }
649
+
650
+ /**
651
+ * The semantic content type of a document node.
652
+ *
653
+ * Uses internally tagged representation (`"node_type": "heading"`) for JSON serialization.
654
+ */
655
+ export enum WasmNodeContent {
656
+ Heading = 0,
657
+ Paragraph = 1,
658
+ List = 2,
659
+ ListItem = 3,
660
+ Table = 4,
661
+ Image = 5,
662
+ Code = 6,
663
+ Quote = 7,
664
+ DefinitionList = 8,
665
+ DefinitionItem = 9,
666
+ RawBlock = 10,
667
+ MetadataBlock = 11,
668
+ Group = 12,
669
+ }
670
+
671
+ /**
672
+ * Context information passed to all visitor methods.
673
+ *
674
+ * Provides comprehensive metadata about the current node being visited,
675
+ * including its type, attributes, position in the DOM tree, and parent context.
676
+ */
677
+ export class WasmNodeContext {
678
+ free(): void;
679
+ [Symbol.dispose](): void;
680
+ static default(): WasmNodeContext;
681
+ constructor(node_type: WasmNodeType, tag_name: string, attributes: any, depth: number, index_in_parent: number, is_inline: boolean, parent_tag?: string | null);
682
+ attributes: any;
683
+ depth: number;
684
+ indexInParent: number;
685
+ isInline: boolean;
686
+ get nodeType(): string;
687
+ set nodeType(value: WasmNodeType);
688
+ get parentTag(): string | undefined;
689
+ set parentTag(value: string | null | undefined);
690
+ tagName: string;
691
+ }
692
+
693
+ /**
694
+ * Node type enumeration covering all HTML element types.
695
+ *
696
+ * This enum categorizes all HTML elements that the converter recognizes,
697
+ * providing a coarse-grained classification for visitor dispatch.
698
+ */
699
+ export enum WasmNodeType {
700
+ Text = 0,
701
+ Element = 1,
702
+ Heading = 2,
703
+ Paragraph = 3,
704
+ Div = 4,
705
+ Blockquote = 5,
706
+ Pre = 6,
707
+ Hr = 7,
708
+ List = 8,
709
+ ListItem = 9,
710
+ DefinitionList = 10,
711
+ DefinitionTerm = 11,
712
+ DefinitionDescription = 12,
713
+ Table = 13,
714
+ TableRow = 14,
715
+ TableCell = 15,
716
+ TableHeader = 16,
717
+ TableBody = 17,
718
+ TableHead = 18,
719
+ TableFoot = 19,
720
+ Link = 20,
721
+ Image = 21,
722
+ Strong = 22,
723
+ Em = 23,
724
+ Code = 24,
725
+ Strikethrough = 25,
726
+ Underline = 26,
727
+ Subscript = 27,
728
+ Superscript = 28,
729
+ Mark = 29,
730
+ Small = 30,
731
+ Br = 31,
732
+ Span = 32,
733
+ Article = 33,
734
+ Section = 34,
735
+ Nav = 35,
736
+ Aside = 36,
737
+ Header = 37,
738
+ Footer = 38,
739
+ Main = 39,
740
+ Figure = 40,
741
+ Figcaption = 41,
742
+ Time = 42,
743
+ Details = 43,
744
+ Summary = 44,
745
+ Form = 45,
746
+ Input = 46,
747
+ Select = 47,
748
+ Option = 48,
749
+ Button = 49,
750
+ Textarea = 50,
751
+ Label = 51,
752
+ Fieldset = 52,
753
+ Legend = 53,
754
+ Audio = 54,
755
+ Video = 55,
756
+ Picture = 56,
757
+ Source = 57,
758
+ Iframe = 58,
759
+ Svg = 59,
760
+ Canvas = 60,
761
+ Ruby = 61,
762
+ Rt = 62,
763
+ Rp = 63,
764
+ Abbr = 64,
765
+ Kbd = 65,
766
+ Samp = 66,
767
+ Var = 67,
768
+ Cite = 68,
769
+ Q = 69,
770
+ Del = 70,
771
+ Ins = 71,
772
+ Data = 72,
773
+ Meter = 73,
774
+ Progress = 74,
775
+ Output = 75,
776
+ Template = 76,
777
+ Slot = 77,
778
+ Html = 78,
779
+ Head = 79,
780
+ Body = 80,
781
+ Title = 81,
782
+ Meta = 82,
783
+ LinkTag = 83,
784
+ Style = 84,
785
+ Script = 85,
786
+ Base = 86,
787
+ Custom = 87,
788
+ }
789
+
790
+ /**
791
+ * Output format for conversion.
792
+ *
793
+ * Specifies the target markup language format for the conversion output.
794
+ */
795
+ export enum WasmOutputFormat {
796
+ Markdown = 0,
797
+ Djot = 1,
798
+ Plain = 2,
799
+ }
800
+
801
+ /**
802
+ * HTML preprocessing options for document cleanup before conversion.
803
+ */
804
+ export class WasmPreprocessingOptions {
805
+ free(): void;
806
+ [Symbol.dispose](): void;
807
+ /**
808
+ * Apply a partial update to these preprocessing options.
809
+ *
810
+ * Any specified fields in the update will override the current values.
811
+ * Unspecified fields (None) are left unchanged.
812
+ *
813
+ * # Arguments
814
+ *
815
+ * * `update` - Partial preprocessing options update
816
+ */
817
+ applyUpdate(update: WasmPreprocessingOptionsUpdate): void;
818
+ static default(): WasmPreprocessingOptions;
819
+ static from(update: WasmPreprocessingOptionsUpdate): WasmPreprocessingOptions;
820
+ /**
821
+ * Create new preprocessing options from a partial update.
822
+ *
823
+ * Creates a new `PreprocessingOptions` struct with defaults, then applies the update.
824
+ * Fields not specified in the update keep their default values.
825
+ *
826
+ * # Arguments
827
+ *
828
+ * * `update` - Partial preprocessing options update
829
+ *
830
+ * # Returns
831
+ *
832
+ * New `PreprocessingOptions` with specified updates applied to defaults
833
+ */
834
+ static fromUpdate(update: WasmPreprocessingOptionsUpdate): WasmPreprocessingOptions;
835
+ constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
836
+ enabled: boolean;
837
+ get preset(): string;
838
+ set preset(value: WasmPreprocessingPreset);
839
+ removeForms: boolean;
840
+ removeNavigation: boolean;
841
+ }
842
+
843
+ /**
844
+ * Partial update for `PreprocessingOptions`.
845
+ *
846
+ * This struct uses `Option<T>` to represent optional fields that can be selectively updated.
847
+ * Only specified fields (Some values) will override existing options; None values leave the
848
+ * corresponding fields unchanged when applied via [`PreprocessingOptions::apply_update`].
849
+ */
850
+ export class WasmPreprocessingOptionsUpdate {
851
+ free(): void;
852
+ [Symbol.dispose](): void;
853
+ static default(): WasmPreprocessingOptionsUpdate;
854
+ constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
855
+ get enabled(): boolean | undefined;
856
+ set enabled(value: boolean | null | undefined);
857
+ get preset(): string | undefined;
858
+ set preset(value: WasmPreprocessingPreset | null | undefined);
859
+ get removeForms(): boolean | undefined;
860
+ set removeForms(value: boolean | null | undefined);
861
+ get removeNavigation(): boolean | undefined;
862
+ set removeNavigation(value: boolean | null | undefined);
863
+ }
864
+
865
+ /**
866
+ * HTML preprocessing aggressiveness level.
867
+ *
868
+ * Controls the extent of cleanup performed before conversion. Higher levels remove more elements.
869
+ */
870
+ export enum WasmPreprocessingPreset {
871
+ Minimal = 0,
872
+ Standard = 1,
873
+ Aggressive = 2,
874
+ }
875
+
876
+ /**
877
+ * A non-fatal warning generated during HTML processing.
878
+ */
879
+ export class WasmProcessingWarning {
880
+ free(): void;
881
+ [Symbol.dispose](): void;
882
+ static default(): WasmProcessingWarning;
883
+ constructor(message: string, kind: WasmWarningKind);
884
+ get kind(): string;
885
+ set kind(value: WasmWarningKind);
886
+ message: string;
887
+ }
888
+
889
+ /**
890
+ * Structured data block (JSON-LD, Microdata, or RDFa).
891
+ *
892
+ * Represents machine-readable structured data found in the document.
893
+ * JSON-LD blocks are collected as raw JSON strings for flexibility.
894
+ *
895
+ * # Examples
896
+ *
897
+ * ```
898
+ * let schema = StructuredData {
899
+ * data_type: StructuredDataType::JsonLd,
900
+ * raw_json: r#"{"@context":"https://schema.org","@type":"Article"}"#.to_string(),
901
+ * schema_type: Some("Article".to_string()),
902
+ * };
903
+ *
904
+ * assert_eq!(schema.data_type, StructuredDataType::JsonLd);
905
+ * ```
906
+ */
907
+ export class WasmStructuredData {
908
+ free(): void;
909
+ [Symbol.dispose](): void;
910
+ static default(): WasmStructuredData;
911
+ constructor(data_type: WasmStructuredDataType, raw_json: string, schema_type?: string | null);
912
+ get dataType(): string;
913
+ set dataType(value: WasmStructuredDataType);
914
+ rawJson: string;
915
+ get schemaType(): string | undefined;
916
+ set schemaType(value: string | null | undefined);
917
+ }
918
+
919
+ /**
920
+ * Structured data format type.
921
+ *
922
+ * Identifies the schema/format used for structured data markup.
923
+ */
924
+ export enum WasmStructuredDataType {
925
+ JsonLd = 0,
926
+ Microdata = 1,
927
+ RDFa = 2,
928
+ }
929
+
930
+ /**
931
+ * A top-level extracted table with both structured data and markdown representation.
932
+ */
933
+ export class WasmTableData {
934
+ free(): void;
935
+ [Symbol.dispose](): void;
936
+ static default(): WasmTableData;
937
+ constructor(grid: WasmTableGrid, markdown: string);
938
+ grid: WasmTableGrid;
939
+ markdown: string;
940
+ }
941
+
942
+ /**
943
+ * A structured table grid with cell-level data including spans.
944
+ */
945
+ export class WasmTableGrid {
946
+ free(): void;
947
+ [Symbol.dispose](): void;
948
+ static default(): WasmTableGrid;
949
+ constructor(rows?: number | null, cols?: number | null, cells?: WasmGridCell[] | null);
950
+ cells: WasmGridCell[];
951
+ cols: number;
952
+ rows: number;
953
+ }
954
+
955
+ /**
956
+ * An inline text annotation with byte-range offsets.
957
+ *
958
+ * Annotations describe formatting (bold, italic, etc.) and links within a node's text content.
959
+ */
960
+ export class WasmTextAnnotation {
961
+ free(): void;
962
+ [Symbol.dispose](): void;
963
+ static default(): WasmTextAnnotation;
964
+ constructor(start: number, end: number, kind: WasmAnnotationKind);
965
+ end: number;
966
+ get kind(): string;
967
+ set kind(value: WasmAnnotationKind);
968
+ start: number;
969
+ }
970
+
971
+ /**
972
+ * Text directionality of document content.
973
+ *
974
+ * Corresponds to the HTML `dir` attribute and `bdi` element directionality.
975
+ */
976
+ export enum WasmTextDirection {
977
+ LeftToRight = 0,
978
+ RightToLeft = 1,
979
+ Auto = 2,
980
+ }
981
+
982
+ /**
983
+ * Result of a visitor callback.
984
+ *
985
+ * Allows visitors to control the conversion flow by either proceeding
986
+ * with default behavior, providing custom output, skipping elements,
987
+ * preserving HTML, or signaling errors.
988
+ */
989
+ export enum WasmVisitResult {
990
+ Continue = 0,
991
+ Custom = 1,
992
+ Skip = 2,
993
+ PreserveHtml = 3,
994
+ Error = 4,
995
+ }
996
+
997
+ /**
998
+ * Type alias for a visitor handle (`Arc`-wrapped `Mutex` for thread-safe shared mutation).
999
+ *
1000
+ * `Send + Sync` so that types embedding a `VisitorHandle` (e.g. `ConversionOptions`)
1001
+ * can be shared across threads — required by callers that stash configs inside
1002
+ * axum/rmcp/tokio Send-bound contexts.
1003
+ */
1004
+ export class WasmVisitorHandle {
1005
+ free(): void;
1006
+ [Symbol.dispose](): void;
1007
+ constructor(visitor: any);
1008
+ }
1009
+
1010
+ /**
1011
+ * Categories of processing warnings.
1012
+ */
1013
+ export enum WasmWarningKind {
1014
+ ImageExtractionFailed = 0,
1015
+ EncodingFallback = 1,
1016
+ TruncatedInput = 2,
1017
+ MalformedHtml = 3,
1018
+ SanitizationApplied = 4,
1019
+ DepthLimitExceeded = 5,
1020
+ }
1021
+
1022
+ /**
1023
+ * Whitespace handling strategy during conversion.
1024
+ *
1025
+ * Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
1026
+ */
1027
+ export enum WasmWhitespaceMode {
1028
+ Normalized = 0,
1029
+ Strict = 1,
1030
+ }
1031
+
1032
+ /**
1033
+ * Convert HTML to Markdown, returning a [`ConversionResult`] with content, metadata, images,
1034
+ * and warnings.
1035
+ *
1036
+ * # Arguments
1037
+ *
1038
+ * * `html` — the HTML string to convert.
1039
+ * * `options` — optional conversion options. Defaults to [`ConversionOptions::default`].
1040
+ *
1041
+ * # Example
1042
+ *
1043
+ * ```
1044
+ * use html_to_markdown_rs::convert;
1045
+ *
1046
+ * let html = "<h1>Hello World</h1>";
1047
+ * let result = convert(html, None).unwrap();
1048
+ * assert!(result.content.as_deref().unwrap_or("").contains("Hello World"));
1049
+ * ```
1050
+ *
1051
+ * # Errors
1052
+ *
1053
+ * Returns an error if HTML parsing fails or if the input contains invalid UTF-8.
1054
+ */
1055
+ export function convert(html: string, options?: WasmConversionOptions | null): WasmConversionResult;