@kreuzberg/html-to-markdown-wasm 3.2.4 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,11 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
 
4
+ /**
5
+ * The type of an inline text annotation.
6
+ *
7
+ * Uses internally tagged representation (`"annotation_type": "bold"`) for JSON serialization.
8
+ */
4
9
  export enum WasmAnnotationKind {
5
10
  Bold = 0,
6
11
  Italic = 1,
@@ -13,18 +18,52 @@ export enum WasmAnnotationKind {
13
18
  Link = 8,
14
19
  }
15
20
 
21
+ /**
22
+ * Code block fence style in Markdown output.
23
+ *
24
+ * Determines how code blocks (`<pre><code>`) are rendered in Markdown.
25
+ */
16
26
  export enum WasmCodeBlockStyle {
17
27
  Indented = 0,
18
28
  Backticks = 1,
19
29
  Tildes = 2,
20
30
  }
21
31
 
32
+ /**
33
+ * Main conversion options for HTML to Markdown conversion.
34
+ *
35
+ * Use [`ConversionOptions::builder()`] to construct, or [`Default::default()`] for defaults.
36
+ *
37
+ * # Example
38
+ *
39
+ * ```text
40
+ * use html_to_markdown_rs::ConversionOptions;
41
+ *
42
+ * let options = ConversionOptions::builder()
43
+ * .heading_style(HeadingStyle::Atx)
44
+ * .wrap(true)
45
+ * .wrap_width(100)
46
+ * .build();
47
+ * ```
48
+ */
22
49
  export class WasmConversionOptions {
23
50
  free(): void;
24
51
  [Symbol.dispose](): void;
52
+ /**
53
+ * Apply a partial update to these conversion options.
54
+ */
55
+ applyUpdate(_update: WasmConversionOptionsUpdate): void;
56
+ /**
57
+ * Create a new builder with default values.
58
+ */
25
59
  static builder(): WasmConversionOptionsBuilder;
26
60
  static default(): WasmConversionOptions;
27
- constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null);
61
+ static from(update: WasmConversionOptionsUpdate): WasmConversionOptions;
62
+ /**
63
+ * Create from a partial update, applying to defaults.
64
+ */
65
+ static fromUpdate(update: WasmConversionOptionsUpdate): WasmConversionOptions;
66
+ constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, exclude_selectors?: string[] | null, max_depth?: number | null);
28
67
  autolinks: boolean;
29
68
  brInTables: boolean;
30
69
  bullets: string;
@@ -39,6 +78,7 @@ export class WasmConversionOptions {
39
78
  escapeAsterisks: boolean;
40
79
  escapeMisc: boolean;
41
80
  escapeUnderscores: boolean;
81
+ excludeSelectors: string[];
42
82
  extractImages: boolean;
43
83
  extractMetadata: boolean;
44
84
  headingStyle: WasmHeadingStyle;
@@ -49,6 +89,8 @@ export class WasmConversionOptions {
49
89
  linkStyle: WasmLinkStyle;
50
90
  listIndentType: WasmListIndentType;
51
91
  listIndentWidth: number;
92
+ get maxDepth(): number | undefined;
93
+ set maxDepth(value: number | null | undefined);
52
94
  maxImageSize: bigint;
53
95
  newlineStyle: WasmNewlineStyle;
54
96
  outputFormat: WasmOutputFormat;
@@ -65,21 +107,51 @@ export class WasmConversionOptions {
65
107
  wrapWidth: number;
66
108
  }
67
109
 
110
+ /**
111
+ * Builder for [`ConversionOptions`].
112
+ *
113
+ * All fields start with default values. Call `.build()` to produce the final options.
114
+ */
68
115
  export class WasmConversionOptionsBuilder {
69
116
  private constructor();
70
117
  free(): void;
71
118
  [Symbol.dispose](): void;
119
+ /**
120
+ * Build the final [`ConversionOptions`].
121
+ */
72
122
  build(): WasmConversionOptions;
123
+ /**
124
+ * Set the list of CSS selectors for elements to exclude entirely from output.
125
+ */
126
+ excludeSelectors(selectors: string[]): WasmConversionOptionsBuilder;
127
+ /**
128
+ * Set the list of HTML tag names whose `<img>` children are kept inline.
129
+ */
73
130
  keepInlineImagesIn(tags: string[]): WasmConversionOptionsBuilder;
131
+ /**
132
+ * Set the pre-processing options applied to the HTML before conversion.
133
+ */
74
134
  preprocessing(preprocessing: WasmPreprocessingOptions): WasmConversionOptionsBuilder;
135
+ /**
136
+ * Set the list of HTML tag names that are preserved verbatim in output.
137
+ */
75
138
  preserveTags(tags: string[]): WasmConversionOptionsBuilder;
139
+ /**
140
+ * Set the list of HTML tag names whose content is stripped from output.
141
+ */
76
142
  stripTags(tags: string[]): WasmConversionOptionsBuilder;
77
143
  }
78
144
 
145
+ /**
146
+ * Partial update for `ConversionOptions`.
147
+ *
148
+ * Uses `Option<T>` fields for selective updates. Bindings use this to construct
149
+ * options from language-native types. Prefer [`ConversionOptionsBuilder`] for Rust code.
150
+ */
79
151
  export class WasmConversionOptionsUpdate {
80
152
  free(): void;
81
153
  [Symbol.dispose](): void;
82
- constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null);
154
+ constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, max_depth?: number | null, exclude_selectors?: string[] | null);
83
155
  get autolinks(): boolean | undefined;
84
156
  set autolinks(value: boolean | null | undefined);
85
157
  get brInTables(): boolean | undefined;
@@ -108,6 +180,8 @@ export class WasmConversionOptionsUpdate {
108
180
  set escapeMisc(value: boolean | null | undefined);
109
181
  get escapeUnderscores(): boolean | undefined;
110
182
  set escapeUnderscores(value: boolean | null | undefined);
183
+ get excludeSelectors(): string[] | undefined;
184
+ set excludeSelectors(value: string[] | null | undefined);
111
185
  get extractImages(): boolean | undefined;
112
186
  set extractImages(value: boolean | null | undefined);
113
187
  get extractMetadata(): boolean | undefined;
@@ -128,6 +202,8 @@ export class WasmConversionOptionsUpdate {
128
202
  set listIndentType(value: WasmListIndentType | null | undefined);
129
203
  get listIndentWidth(): number | undefined;
130
204
  set listIndentWidth(value: number | null | undefined);
205
+ get maxDepth(): number | undefined;
206
+ set maxDepth(value: number | null | undefined);
131
207
  get maxImageSize(): bigint | undefined;
132
208
  set maxImageSize(value: bigint | null | undefined);
133
209
  get newlineStyle(): WasmNewlineStyle | undefined;
@@ -158,6 +234,22 @@ export class WasmConversionOptionsUpdate {
158
234
  set wrapWidth(value: number | null | undefined);
159
235
  }
160
236
 
237
+ /**
238
+ * The primary result of HTML conversion and extraction.
239
+ *
240
+ * Contains the converted text output, optional structured document tree,
241
+ * metadata, extracted tables, images, and processing warnings.
242
+ *
243
+ * # Example
244
+ *
245
+ * ```text
246
+ * use html_to_markdown_rs::{convert, ConversionOptions};
247
+ *
248
+ * let result = convert("<h1>Hello</h1><p>World</p>", None)?;
249
+ * assert!(result.content.is_some());
250
+ * assert!(result.warnings.is_empty());
251
+ * ```
252
+ */
161
253
  export class WasmConversionResult {
162
254
  free(): void;
163
255
  [Symbol.dispose](): void;
@@ -172,6 +264,26 @@ export class WasmConversionResult {
172
264
  warnings: WasmProcessingWarning[];
173
265
  }
174
266
 
267
+ /**
268
+ * Document-level metadata extracted from `<head>` and top-level elements.
269
+ *
270
+ * Contains all metadata typically used by search engines, social media platforms,
271
+ * and browsers for document indexing and presentation.
272
+ *
273
+ * # Examples
274
+ *
275
+ * ```
276
+ * # use html_to_markdown_rs::metadata::DocumentMetadata;
277
+ * let doc = DocumentMetadata {
278
+ * title: Some("My Article".to_string()),
279
+ * description: Some("A great article about Rust".to_string()),
280
+ * keywords: vec!["rust".to_string(), "programming".to_string()],
281
+ * ..Default::default()
282
+ * };
283
+ *
284
+ * assert_eq!(doc.title, Some("My Article".to_string()));
285
+ * ```
286
+ */
175
287
  export class WasmDocumentMetadata {
176
288
  free(): void;
177
289
  [Symbol.dispose](): void;
@@ -196,6 +308,9 @@ export class WasmDocumentMetadata {
196
308
  twitterCard: any;
197
309
  }
198
310
 
311
+ /**
312
+ * A single node in the document tree.
313
+ */
199
314
  export class WasmDocumentNode {
200
315
  free(): void;
201
316
  [Symbol.dispose](): void;
@@ -210,6 +325,11 @@ export class WasmDocumentNode {
210
325
  set parent(value: number | null | undefined);
211
326
  }
212
327
 
328
+ /**
329
+ * A structured document tree representing the semantic content of an HTML document.
330
+ *
331
+ * Uses a flat node array with index-based parent/child references for efficient traversal.
332
+ */
213
333
  export class WasmDocumentStructure {
214
334
  free(): void;
215
335
  [Symbol.dispose](): void;
@@ -219,6 +339,9 @@ export class WasmDocumentStructure {
219
339
  set sourceFormat(value: string | null | undefined);
220
340
  }
221
341
 
342
+ /**
343
+ * A single cell in a table grid.
344
+ */
222
345
  export class WasmGridCell {
223
346
  free(): void;
224
347
  [Symbol.dispose](): void;
@@ -231,9 +354,61 @@ export class WasmGridCell {
231
354
  rowSpan: number;
232
355
  }
233
356
 
357
+ /**
358
+ * Header element metadata with hierarchy tracking.
359
+ *
360
+ * Captures heading elements (h1-h6) with their text content, identifiers,
361
+ * and position in the document structure.
362
+ *
363
+ * # Examples
364
+ *
365
+ * ```
366
+ * # use html_to_markdown_rs::metadata::HeaderMetadata;
367
+ * let header = HeaderMetadata {
368
+ * level: 1,
369
+ * text: "Main Title".to_string(),
370
+ * id: Some("main-title".to_string()),
371
+ * depth: 0,
372
+ * html_offset: 145,
373
+ * };
374
+ *
375
+ * assert_eq!(header.level, 1);
376
+ * assert!(header.is_valid());
377
+ * ```
378
+ */
234
379
  export class WasmHeaderMetadata {
235
380
  free(): void;
236
381
  [Symbol.dispose](): void;
382
+ /**
383
+ * Validate that the header level is within valid range (1-6).
384
+ *
385
+ * # Returns
386
+ *
387
+ * `true` if level is 1-6, `false` otherwise.
388
+ *
389
+ * # Examples
390
+ *
391
+ * ```
392
+ * # use html_to_markdown_rs::metadata::HeaderMetadata;
393
+ * let valid = HeaderMetadata {
394
+ * level: 3,
395
+ * text: "Title".to_string(),
396
+ * id: None,
397
+ * depth: 2,
398
+ * html_offset: 100,
399
+ * };
400
+ * assert!(valid.is_valid());
401
+ *
402
+ * let invalid = HeaderMetadata {
403
+ * level: 7, // Invalid
404
+ * text: "Title".to_string(),
405
+ * id: None,
406
+ * depth: 2,
407
+ * html_offset: 100,
408
+ * };
409
+ * assert!(!invalid.is_valid());
410
+ * ```
411
+ */
237
412
  isValid(): boolean;
238
413
  constructor(level: number, text: string, depth: number, html_offset: number, id?: string | null);
239
414
  depth: number;
@@ -244,12 +419,22 @@ export class WasmHeaderMetadata {
244
419
  text: string;
245
420
  }
246
421
 
422
+ /**
423
+ * Heading style options for Markdown output.
424
+ *
425
+ * Controls how headings (h1-h6) are rendered in the output Markdown.
426
+ */
247
427
  export enum WasmHeadingStyle {
248
428
  Underlined = 0,
249
429
  Atx = 1,
250
430
  AtxClosed = 2,
251
431
  }
252
432
 
433
+ /**
434
+ * Highlight rendering style for `<mark>` elements.
435
+ *
436
+ * Controls how highlighted text is rendered in Markdown output.
437
+ */
253
438
  export enum WasmHighlightStyle {
254
439
  DoubleEqual = 0,
255
440
  Html = 1,
@@ -257,6 +442,27 @@ export enum WasmHighlightStyle {
257
442
  None = 3,
258
443
  }
259
444
 
445
+ /**
446
+ * Comprehensive metadata extraction result from HTML document.
447
+ *
448
+ * Contains all extracted metadata types in a single structure,
449
+ * suitable for serialization and transmission across language boundaries.
450
+ *
451
+ * # Examples
452
+ *
453
+ * ```
454
+ * # use html_to_markdown_rs::metadata::HtmlMetadata;
455
+ * let metadata = HtmlMetadata {
456
+ * document: Default::default(),
457
+ * headers: Vec::new(),
458
+ * links: Vec::new(),
459
+ * images: Vec::new(),
460
+ * structured_data: Vec::new(),
461
+ * };
462
+ *
463
+ * assert!(metadata.headers.is_empty());
464
+ * ```
465
+ */
260
466
  export class WasmHtmlMetadata {
261
467
  free(): void;
262
468
  [Symbol.dispose](): void;
@@ -268,21 +474,48 @@ export class WasmHtmlMetadata {
268
474
  structuredData: WasmStructuredData[];
269
475
  }
270
476
 
477
+ /**
478
+ * Image metadata with source and dimensions.
479
+ *
480
+ * Captures `<img>` elements and inline `<svg>` elements with metadata
481
+ * for image analysis and optimization.
482
+ *
483
+ * # Examples
484
+ *
485
+ * ```
486
+ * # use html_to_markdown_rs::metadata::{ImageMetadata, ImageType};
487
+ * let img = ImageMetadata {
488
+ * src: "https://example.com/image.jpg".to_string(),
489
+ * alt: Some("An example image".to_string()),
490
+ * title: Some("Example".to_string()),
491
+ * dimensions: Some((800, 600)),
492
+ * image_type: ImageType::External,
493
+ * attributes: Default::default(),
494
+ * };
495
+ *
496
+ * assert_eq!(img.image_type, ImageType::External);
497
+ * ```
498
+ */
271
499
  export class WasmImageMetadata {
272
500
  free(): void;
273
501
  [Symbol.dispose](): void;
274
- constructor(src: string, image_type: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: string | null);
502
+ constructor(src: string, image_type: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: Uint32Array | null);
275
503
  get alt(): string | undefined;
276
504
  set alt(value: string | null | undefined);
277
505
  attributes: any;
278
- get dimensions(): string | undefined;
279
- set dimensions(value: string | null | undefined);
506
+ get dimensions(): Uint32Array | undefined;
507
+ set dimensions(value: Uint32Array | null | undefined);
280
508
  imageType: WasmImageType;
281
509
  src: string;
282
510
  get title(): string | undefined;
283
511
  set title(value: string | null | undefined);
284
512
  }
285
513
 
514
+ /**
515
+ * Image source classification for proper handling and processing.
516
+ *
517
+ * Determines whether an image is embedded (data URI), inline SVG, external, or relative.
518
+ */
286
519
  export enum WasmImageType {
287
520
  DataUri = 0,
288
521
  InlineSvg = 1,
@@ -290,9 +523,52 @@ export enum WasmImageType {
290
523
  Relative = 3,
291
524
  }
292
525
 
526
+ /**
527
+ * Hyperlink metadata with categorization and attributes.
528
+ *
529
+ * Represents `<a>` elements with parsed href values, text content, and link type classification.
530
+ *
531
+ * # Examples
532
+ *
533
+ * ```
534
+ * # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
535
+ * let link = LinkMetadata {
536
+ * href: "https://example.com".to_string(),
537
+ * text: "Example".to_string(),
538
+ * title: Some("Visit Example".to_string()),
539
+ * link_type: LinkType::External,
540
+ * rel: vec!["nofollow".to_string()],
541
+ * attributes: Default::default(),
542
+ * };
543
+ *
544
+ * assert_eq!(link.link_type, LinkType::External);
545
+ * assert_eq!(link.text, "Example");
546
+ * ```
547
+ */
293
548
  export class WasmLinkMetadata {
294
549
  free(): void;
295
550
  [Symbol.dispose](): void;
551
+ /**
552
+ * Classify a link based on href value.
553
+ *
554
+ * # Arguments
555
+ *
556
+ * * `href` - The href attribute value
557
+ *
558
+ * # Returns
559
+ *
560
+ * Appropriate [`LinkType`] based on protocol and content.
561
+ *
562
+ * # Examples
563
+ *
564
+ * ```
565
+ * # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
566
+ * assert_eq!(LinkMetadata::classify_link("#section"), LinkType::Anchor);
567
+ * assert_eq!(LinkMetadata::classify_link("mailto:test@example.com"), LinkType::Email);
568
+ * assert_eq!(LinkMetadata::classify_link("tel:+1234567890"), LinkType::Phone);
569
+ * assert_eq!(LinkMetadata::classify_link("https://example.com"), LinkType::External);
570
+ * ```
571
+ */
296
572
  static classifyLink(href: string): WasmLinkType;
297
573
  constructor(href: string, text: string, link_type: WasmLinkType, rel: string[], attributes: any, title?: string | null);
298
574
  attributes: any;
@@ -304,11 +580,22 @@ export class WasmLinkMetadata {
304
580
  set title(value: string | null | undefined);
305
581
  }
306
582
 
583
+ /**
584
+ * Link rendering style in Markdown output.
585
+ *
586
+ * Controls whether links and images use inline `[text](url)` syntax or
587
+ * reference-style `[text][1]` syntax with definitions collected at the end.
588
+ */
307
589
  export enum WasmLinkStyle {
308
590
  Inline = 0,
309
591
  Reference = 1,
310
592
  }
311
593
 
594
+ /**
595
+ * Link classification based on href value and document context.
596
+ *
597
+ * Used to categorize links during extraction for filtering and analysis.
598
+ */
312
599
  export enum WasmLinkType {
313
600
  Anchor = 0,
314
601
  Internal = 1,
@@ -318,48 +605,31 @@ export enum WasmLinkType {
318
605
  Other = 5,
319
606
  }
320
607
 
608
+ /**
609
+ * List indentation character type.
610
+ *
611
+ * Controls whether list items are indented with spaces or tabs.
612
+ */
321
613
  export enum WasmListIndentType {
322
614
  Spaces = 0,
323
615
  Tabs = 1,
324
616
  }
325
617
 
326
- export class WasmMetadataConfig {
327
- free(): void;
328
- [Symbol.dispose](): void;
329
- anyEnabled(): boolean;
330
- static default(): WasmMetadataConfig;
331
- constructor(extract_document?: boolean | null, extract_headers?: boolean | null, extract_links?: boolean | null, extract_images?: boolean | null, extract_structured_data?: boolean | null, max_structured_data_size?: number | null);
332
- extractDocument: boolean;
333
- extractHeaders: boolean;
334
- extractImages: boolean;
335
- extractLinks: boolean;
336
- extractStructuredData: boolean;
337
- maxStructuredDataSize: number;
338
- }
339
-
340
- export class WasmMetadataConfigUpdate {
341
- free(): void;
342
- [Symbol.dispose](): void;
343
- constructor(extract_document?: boolean | null, extract_headers?: boolean | null, extract_links?: boolean | null, extract_images?: boolean | null, extract_structured_data?: boolean | null, max_structured_data_size?: number | null);
344
- get extractDocument(): boolean | undefined;
345
- set extractDocument(value: boolean | null | undefined);
346
- get extractHeaders(): boolean | undefined;
347
- set extractHeaders(value: boolean | null | undefined);
348
- get extractImages(): boolean | undefined;
349
- set extractImages(value: boolean | null | undefined);
350
- get extractLinks(): boolean | undefined;
351
- set extractLinks(value: boolean | null | undefined);
352
- get extractStructuredData(): boolean | undefined;
353
- set extractStructuredData(value: boolean | null | undefined);
354
- get maxStructuredDataSize(): number | undefined;
355
- set maxStructuredDataSize(value: number | null | undefined);
356
- }
357
-
618
+ /**
619
+ * Line break syntax in Markdown output.
620
+ *
621
+ * Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
622
+ */
358
623
  export enum WasmNewlineStyle {
359
624
  Spaces = 0,
360
625
  Backslash = 1,
361
626
  }
362
627
 
628
+ /**
629
+ * The semantic content type of a document node.
630
+ *
631
+ * Uses internally tagged representation (`"node_type": "heading"`) for JSON serialization.
632
+ */
363
633
  export enum WasmNodeContent {
364
634
  Heading = 0,
365
635
  Paragraph = 1,
@@ -376,16 +646,168 @@ export enum WasmNodeContent {
376
646
  Group = 12,
377
647
  }
378
648
 
649
+ /**
650
+ * Context information passed to all visitor methods.
651
+ *
652
+ * Provides comprehensive metadata about the current node being visited,
653
+ * including its type, attributes, position in the DOM tree, and parent context.
654
+ */
655
+ export class WasmNodeContext {
656
+ free(): void;
657
+ [Symbol.dispose](): void;
658
+ constructor(node_type: WasmNodeType, tag_name: string, attributes: any, depth: number, index_in_parent: number, is_inline: boolean, parent_tag?: string | null);
659
+ attributes: any;
660
+ depth: number;
661
+ indexInParent: number;
662
+ isInline: boolean;
663
+ nodeType: WasmNodeType;
664
+ get parentTag(): string | undefined;
665
+ set parentTag(value: string | null | undefined);
666
+ tagName: string;
667
+ }
668
+
669
+ /**
670
+ * Node type enumeration covering all HTML element types.
671
+ *
672
+ * This enum categorizes all HTML elements that the converter recognizes,
673
+ * providing a coarse-grained classification for visitor dispatch.
674
+ */
675
+ export enum WasmNodeType {
676
+ Text = 0,
677
+ Element = 1,
678
+ Heading = 2,
679
+ Paragraph = 3,
680
+ Div = 4,
681
+ Blockquote = 5,
682
+ Pre = 6,
683
+ Hr = 7,
684
+ List = 8,
685
+ ListItem = 9,
686
+ DefinitionList = 10,
687
+ DefinitionTerm = 11,
688
+ DefinitionDescription = 12,
689
+ Table = 13,
690
+ TableRow = 14,
691
+ TableCell = 15,
692
+ TableHeader = 16,
693
+ TableBody = 17,
694
+ TableHead = 18,
695
+ TableFoot = 19,
696
+ Link = 20,
697
+ Image = 21,
698
+ Strong = 22,
699
+ Em = 23,
700
+ Code = 24,
701
+ Strikethrough = 25,
702
+ Underline = 26,
703
+ Subscript = 27,
704
+ Superscript = 28,
705
+ Mark = 29,
706
+ Small = 30,
707
+ Br = 31,
708
+ Span = 32,
709
+ Article = 33,
710
+ Section = 34,
711
+ Nav = 35,
712
+ Aside = 36,
713
+ Header = 37,
714
+ Footer = 38,
715
+ Main = 39,
716
+ Figure = 40,
717
+ Figcaption = 41,
718
+ Time = 42,
719
+ Details = 43,
720
+ Summary = 44,
721
+ Form = 45,
722
+ Input = 46,
723
+ Select = 47,
724
+ Option = 48,
725
+ Button = 49,
726
+ Textarea = 50,
727
+ Label = 51,
728
+ Fieldset = 52,
729
+ Legend = 53,
730
+ Audio = 54,
731
+ Video = 55,
732
+ Picture = 56,
733
+ Source = 57,
734
+ Iframe = 58,
735
+ Svg = 59,
736
+ Canvas = 60,
737
+ Ruby = 61,
738
+ Rt = 62,
739
+ Rp = 63,
740
+ Abbr = 64,
741
+ Kbd = 65,
742
+ Samp = 66,
743
+ Var = 67,
744
+ Cite = 68,
745
+ Q = 69,
746
+ Del = 70,
747
+ Ins = 71,
748
+ Data = 72,
749
+ Meter = 73,
750
+ Progress = 74,
751
+ Output = 75,
752
+ Template = 76,
753
+ Slot = 77,
754
+ Html = 78,
755
+ Head = 79,
756
+ Body = 80,
757
+ Title = 81,
758
+ Meta = 82,
759
+ LinkTag = 83,
760
+ Style = 84,
761
+ Script = 85,
762
+ Base = 86,
763
+ Custom = 87,
764
+ }
765
+
766
+ /**
767
+ * Output format for conversion.
768
+ *
769
+ * Specifies the target markup language format for the conversion output.
770
+ */
379
771
  export enum WasmOutputFormat {
380
772
  Markdown = 0,
381
773
  Djot = 1,
382
774
  Plain = 2,
383
775
  }
384
776
 
777
+ /**
778
+ * HTML preprocessing options for document cleanup before conversion.
779
+ */
385
780
  export class WasmPreprocessingOptions {
386
781
  free(): void;
387
782
  [Symbol.dispose](): void;
783
+ /**
784
+ * Apply a partial update to these preprocessing options.
785
+ *
786
+ * Any specified fields in the update will override the current values.
787
+ * Unspecified fields (None) are left unchanged.
788
+ *
789
+ * # Arguments
790
+ *
791
+ * * `update` - Partial preprocessing options update
792
+ */
793
+ applyUpdate(_update: WasmPreprocessingOptionsUpdate): void;
388
794
  static default(): WasmPreprocessingOptions;
795
+ static from(update: WasmPreprocessingOptionsUpdate): WasmPreprocessingOptions;
796
+ /**
797
+ * Create new preprocessing options from a partial update.
798
+ *
799
+ * Creates a new `PreprocessingOptions` struct with defaults, then applies the update.
800
+ * Fields not specified in the update keep their default values.
801
+ *
802
+ * # Arguments
803
+ *
804
+ * * `update` - Partial preprocessing options update
805
+ *
806
+ * # Returns
807
+ *
808
+ * New `PreprocessingOptions` with specified updates applied to defaults
809
+ */
810
+ static fromUpdate(update: WasmPreprocessingOptionsUpdate): WasmPreprocessingOptions;
389
811
  constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
390
812
  enabled: boolean;
391
813
  preset: WasmPreprocessingPreset;
@@ -393,6 +815,13 @@ export class WasmPreprocessingOptions {
393
815
  removeNavigation: boolean;
394
816
  }
395
817
 
818
+ /**
819
+ * Partial update for `PreprocessingOptions`.
820
+ *
821
+ * This struct uses `Option<T>` to represent optional fields that can be selectively updated.
822
+ * Only specified fields (Some values) will override existing options; None values leave the
823
+ * corresponding fields unchanged when applied via [`PreprocessingOptions::apply_update`].
824
+ */
396
825
  export class WasmPreprocessingOptionsUpdate {
397
826
  free(): void;
398
827
  [Symbol.dispose](): void;
@@ -407,12 +836,20 @@ export class WasmPreprocessingOptionsUpdate {
407
836
  set removeNavigation(value: boolean | null | undefined);
408
837
  }
409
838
 
839
+ /**
840
+ * HTML preprocessing aggressiveness level.
841
+ *
842
+ * Controls the extent of cleanup performed before conversion. Higher levels remove more elements.
843
+ */
410
844
  export enum WasmPreprocessingPreset {
411
845
  Minimal = 0,
412
846
  Standard = 1,
413
847
  Aggressive = 2,
414
848
  }
415
849
 
850
+ /**
851
+ * A non-fatal warning generated during HTML processing.
852
+ */
416
853
  export class WasmProcessingWarning {
417
854
  free(): void;
418
855
  [Symbol.dispose](): void;
@@ -421,6 +858,25 @@ export class WasmProcessingWarning {
421
858
  message: string;
422
859
  }
423
860
 
861
+ /**
862
+ * Structured data block (JSON-LD, Microdata, or RDFa).
863
+ *
864
+ * Represents machine-readable structured data found in the document.
865
+ * JSON-LD blocks are collected as raw JSON strings for flexibility.
866
+ *
867
+ * # Examples
868
+ *
869
+ * ```
870
+ * # use html_to_markdown_rs::metadata::{StructuredData, StructuredDataType};
871
+ * let schema = StructuredData {
872
+ * data_type: StructuredDataType::JsonLd,
873
+ * raw_json: r#"{"@context":"https://schema.org","@type":"Article"}"#.to_string(),
874
+ * schema_type: Some("Article".to_string()),
875
+ * };
876
+ *
877
+ * assert_eq!(schema.data_type, StructuredDataType::JsonLd);
878
+ * ```
879
+ */
424
880
  export class WasmStructuredData {
425
881
  free(): void;
426
882
  [Symbol.dispose](): void;
@@ -431,12 +887,20 @@ export class WasmStructuredData {
431
887
  set schemaType(value: string | null | undefined);
432
888
  }
433
889
 
890
+ /**
891
+ * Structured data format type.
892
+ *
893
+ * Identifies the schema/format used for structured data markup.
894
+ */
434
895
  export enum WasmStructuredDataType {
435
896
  JsonLd = 0,
436
897
  Microdata = 1,
437
898
  RDFa = 2,
438
899
  }
439
900
 
901
+ /**
902
+ * A top-level extracted table with both structured data and markdown representation.
903
+ */
440
904
  export class WasmTableData {
441
905
  free(): void;
442
906
  [Symbol.dispose](): void;
@@ -445,6 +909,9 @@ export class WasmTableData {
445
909
  markdown: string;
446
910
  }
447
911
 
912
+ /**
913
+ * A structured table grid with cell-level data including spans.
914
+ */
448
915
  export class WasmTableGrid {
449
916
  free(): void;
450
917
  [Symbol.dispose](): void;
@@ -454,6 +921,11 @@ export class WasmTableGrid {
454
921
  rows: number;
455
922
  }
456
923
 
924
+ /**
925
+ * An inline text annotation with byte-range offsets.
926
+ *
927
+ * Annotations describe formatting (bold, italic, etc.) and links within a node's text content.
928
+ */
457
929
  export class WasmTextAnnotation {
458
930
  free(): void;
459
931
  [Symbol.dispose](): void;
@@ -463,26 +935,55 @@ export class WasmTextAnnotation {
463
935
  start: number;
464
936
  }
465
937
 
938
+ /**
939
+ * Text directionality of document content.
940
+ *
941
+ * Corresponds to the HTML `dir` attribute and `bdi` element directionality.
942
+ */
466
943
  export enum WasmTextDirection {
467
944
  LeftToRight = 0,
468
945
  RightToLeft = 1,
469
946
  Auto = 2,
470
947
  }
471
948
 
949
+ /**
950
+ * Result of a visitor callback.
951
+ *
952
+ * Allows visitors to control the conversion flow by either proceeding
953
+ * with default behavior, providing custom output, skipping elements,
954
+ * preserving HTML, or signaling errors.
955
+ */
956
+ export enum WasmVisitResult {
957
+ Continue = 0,
958
+ Custom = 1,
959
+ Skip = 2,
960
+ PreserveHtml = 3,
961
+ Error = 4,
962
+ }
963
+
964
+ /**
965
+ * Categories of processing warnings.
966
+ */
472
967
  export enum WasmWarningKind {
473
968
  ImageExtractionFailed = 0,
474
969
  EncodingFallback = 1,
475
970
  TruncatedInput = 2,
476
971
  MalformedHtml = 3,
477
972
  SanitizationApplied = 4,
973
+ DepthLimitExceeded = 5,
478
974
  }
479
975
 
976
+ /**
977
+ * Whitespace handling strategy during conversion.
978
+ *
979
+ * Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
980
+ */
480
981
  export enum WasmWhitespaceMode {
481
982
  Normalized = 0,
482
983
  Strict = 1,
483
984
  }
484
985
 
485
- export function convert(html: string, options?: WasmConversionOptions | null): WasmConversionResult;
986
+ export function convert(html: string, options?: WasmConversionOptions | null, visitor?: any | null): WasmConversionResult;
486
987
 
487
988
  export declare function initWasm(): Promise<void>;
488
989
  export declare const wasmReady: Promise<void>;