@kreuzberg/html-to-markdown-wasm 3.2.6 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,11 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
 
4
+ /**
5
+ * The type of an inline text annotation.
6
+ *
7
+ * Uses internally tagged representation (`"annotation_type": "bold"`) for JSON serialization.
8
+ */
4
9
  export enum WasmAnnotationKind {
5
10
  Bold = 0,
6
11
  Italic = 1,
@@ -13,21 +18,52 @@ export enum WasmAnnotationKind {
13
18
  Link = 8,
14
19
  }
15
20
 
21
+ /**
22
+ * Code block fence style in Markdown output.
23
+ *
24
+ * Determines how code blocks (`<pre><code>`) are rendered in Markdown.
25
+ */
16
26
  export enum WasmCodeBlockStyle {
17
27
  Indented = 0,
18
28
  Backticks = 1,
19
29
  Tildes = 2,
20
30
  }
21
31
 
32
+ /**
33
+ * Main conversion options for HTML to Markdown conversion.
34
+ *
35
+ * Use [`ConversionOptions::builder()`] to construct, or [`Default::default()`] for defaults.
36
+ *
37
+ * # Example
38
+ *
39
+ * ```text
40
+ * use html_to_markdown_rs::ConversionOptions;
41
+ *
42
+ * let options = ConversionOptions::builder()
43
+ * .heading_style(HeadingStyle::Atx)
44
+ * .wrap(true)
45
+ * .wrap_width(100)
46
+ * .build();
47
+ * ```
48
+ */
22
49
  export class WasmConversionOptions {
23
50
  free(): void;
24
51
  [Symbol.dispose](): void;
52
+ /**
53
+ * Apply a partial update to these conversion options.
54
+ */
25
55
  applyUpdate(_update: WasmConversionOptionsUpdate): void;
56
+ /**
57
+ * Create a new builder with default values.
58
+ */
26
59
  static builder(): WasmConversionOptionsBuilder;
27
60
  static default(): WasmConversionOptions;
28
61
  static from(update: WasmConversionOptionsUpdate): WasmConversionOptions;
62
+ /**
63
+ * Create from a partial update, applying to defaults.
64
+ */
29
65
  static fromUpdate(update: WasmConversionOptionsUpdate): WasmConversionOptions;
30
- constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, max_depth?: number | null);
66
+ constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, exclude_selectors?: string[] | null, max_depth?: number | null);
31
67
  autolinks: boolean;
32
68
  brInTables: boolean;
33
69
  bullets: string;
@@ -42,6 +78,7 @@ export class WasmConversionOptions {
42
78
  escapeAsterisks: boolean;
43
79
  escapeMisc: boolean;
44
80
  escapeUnderscores: boolean;
81
+ excludeSelectors: string[];
45
82
  extractImages: boolean;
46
83
  extractMetadata: boolean;
47
84
  headingStyle: WasmHeadingStyle;
@@ -70,21 +107,51 @@ export class WasmConversionOptions {
70
107
  wrapWidth: number;
71
108
  }
72
109
 
110
+ /**
111
+ * Builder for [`ConversionOptions`].
112
+ *
113
+ * All fields start with default values. Call `.build()` to produce the final options.
114
+ */
73
115
  export class WasmConversionOptionsBuilder {
74
116
  private constructor();
75
117
  free(): void;
76
118
  [Symbol.dispose](): void;
119
+ /**
120
+ * Build the final [`ConversionOptions`].
121
+ */
77
122
  build(): WasmConversionOptions;
123
+ /**
124
+ * Set the list of CSS selectors for elements to exclude entirely from output.
125
+ */
126
+ excludeSelectors(selectors: string[]): WasmConversionOptionsBuilder;
127
+ /**
128
+ * Set the list of HTML tag names whose `<img>` children are kept inline.
129
+ */
78
130
  keepInlineImagesIn(tags: string[]): WasmConversionOptionsBuilder;
131
+ /**
132
+ * Set the pre-processing options applied to the HTML before conversion.
133
+ */
79
134
  preprocessing(preprocessing: WasmPreprocessingOptions): WasmConversionOptionsBuilder;
135
+ /**
136
+ * Set the list of HTML tag names that are preserved verbatim in output.
137
+ */
80
138
  preserveTags(tags: string[]): WasmConversionOptionsBuilder;
139
+ /**
140
+ * Set the list of HTML tag names whose content is stripped from output.
141
+ */
81
142
  stripTags(tags: string[]): WasmConversionOptionsBuilder;
82
143
  }
83
144
 
145
+ /**
146
+ * Partial update for `ConversionOptions`.
147
+ *
148
+ * Uses `Option<T>` fields for selective updates. Bindings use this to construct
149
+ * options from language-native types. Prefer [`ConversionOptionsBuilder`] for Rust code.
150
+ */
84
151
  export class WasmConversionOptionsUpdate {
85
152
  free(): void;
86
153
  [Symbol.dispose](): void;
87
- constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, max_depth?: number | null);
154
+ constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, max_depth?: number | null, exclude_selectors?: string[] | null);
88
155
  get autolinks(): boolean | undefined;
89
156
  set autolinks(value: boolean | null | undefined);
90
157
  get brInTables(): boolean | undefined;
@@ -113,6 +180,8 @@ export class WasmConversionOptionsUpdate {
113
180
  set escapeMisc(value: boolean | null | undefined);
114
181
  get escapeUnderscores(): boolean | undefined;
115
182
  set escapeUnderscores(value: boolean | null | undefined);
183
+ get excludeSelectors(): string[] | undefined;
184
+ set excludeSelectors(value: string[] | null | undefined);
116
185
  get extractImages(): boolean | undefined;
117
186
  set extractImages(value: boolean | null | undefined);
118
187
  get extractMetadata(): boolean | undefined;
@@ -165,6 +234,22 @@ export class WasmConversionOptionsUpdate {
165
234
  set wrapWidth(value: number | null | undefined);
166
235
  }
167
236
 
237
+ /**
238
+ * The primary result of HTML conversion and extraction.
239
+ *
240
+ * Contains the converted text output, optional structured document tree,
241
+ * metadata, extracted tables, images, and processing warnings.
242
+ *
243
+ * # Example
244
+ *
245
+ * ```text
246
+ * use html_to_markdown_rs::{convert, ConversionOptions};
247
+ *
248
+ * let result = convert("<h1>Hello</h1><p>World</p>", None)?;
249
+ * assert!(result.content.is_some());
250
+ * assert!(result.warnings.is_empty());
251
+ * ```
252
+ */
168
253
  export class WasmConversionResult {
169
254
  free(): void;
170
255
  [Symbol.dispose](): void;
@@ -179,6 +264,26 @@ export class WasmConversionResult {
179
264
  warnings: WasmProcessingWarning[];
180
265
  }
181
266
 
267
+ /**
268
+ * Document-level metadata extracted from `<head>` and top-level elements.
269
+ *
270
+ * Contains all metadata typically used by search engines, social media platforms,
271
+ * and browsers for document indexing and presentation.
272
+ *
273
+ * # Examples
274
+ *
275
+ * ```
276
+ * # use html_to_markdown_rs::metadata::DocumentMetadata;
277
+ * let doc = DocumentMetadata {
278
+ * title: Some("My Article".to_string()),
279
+ * description: Some("A great article about Rust".to_string()),
280
+ * keywords: vec!["rust".to_string(), "programming".to_string()],
281
+ * ..Default::default()
282
+ * };
283
+ *
284
+ * assert_eq!(doc.title, Some("My Article".to_string()));
285
+ * ```
286
+ */
182
287
  export class WasmDocumentMetadata {
183
288
  free(): void;
184
289
  [Symbol.dispose](): void;
@@ -203,6 +308,9 @@ export class WasmDocumentMetadata {
203
308
  twitterCard: any;
204
309
  }
205
310
 
311
+ /**
312
+ * A single node in the document tree.
313
+ */
206
314
  export class WasmDocumentNode {
207
315
  free(): void;
208
316
  [Symbol.dispose](): void;
@@ -217,6 +325,11 @@ export class WasmDocumentNode {
217
325
  set parent(value: number | null | undefined);
218
326
  }
219
327
 
328
+ /**
329
+ * A structured document tree representing the semantic content of an HTML document.
330
+ *
331
+ * Uses a flat node array with index-based parent/child references for efficient traversal.
332
+ */
220
333
  export class WasmDocumentStructure {
221
334
  free(): void;
222
335
  [Symbol.dispose](): void;
@@ -226,6 +339,9 @@ export class WasmDocumentStructure {
226
339
  set sourceFormat(value: string | null | undefined);
227
340
  }
228
341
 
342
+ /**
343
+ * A single cell in a table grid.
344
+ */
229
345
  export class WasmGridCell {
230
346
  free(): void;
231
347
  [Symbol.dispose](): void;
@@ -238,9 +354,61 @@ export class WasmGridCell {
238
354
  rowSpan: number;
239
355
  }
240
356
 
357
+ /**
358
+ * Header element metadata with hierarchy tracking.
359
+ *
360
+ * Captures heading elements (h1-h6) with their text content, identifiers,
361
+ * and position in the document structure.
362
+ *
363
+ * # Examples
364
+ *
365
+ * ```
366
+ * # use html_to_markdown_rs::metadata::HeaderMetadata;
367
+ * let header = HeaderMetadata {
368
+ * level: 1,
369
+ * text: "Main Title".to_string(),
370
+ * id: Some("main-title".to_string()),
371
+ * depth: 0,
372
+ * html_offset: 145,
373
+ * };
374
+ *
375
+ * assert_eq!(header.level, 1);
376
+ * assert!(header.is_valid());
377
+ * ```
378
+ */
241
379
  export class WasmHeaderMetadata {
242
380
  free(): void;
243
381
  [Symbol.dispose](): void;
382
+ /**
383
+ * Validate that the header level is within valid range (1-6).
384
+ *
385
+ * # Returns
386
+ *
387
+ * `true` if level is 1-6, `false` otherwise.
388
+ *
389
+ * # Examples
390
+ *
391
+ * ```
392
+ * # use html_to_markdown_rs::metadata::HeaderMetadata;
393
+ * let valid = HeaderMetadata {
394
+ * level: 3,
395
+ * text: "Title".to_string(),
396
+ * id: None,
397
+ * depth: 2,
398
+ * html_offset: 100,
399
+ * };
400
+ * assert!(valid.is_valid());
401
+ *
402
+ * let invalid = HeaderMetadata {
403
+ * level: 7, // Invalid
404
+ * text: "Title".to_string(),
405
+ * id: None,
406
+ * depth: 2,
407
+ * html_offset: 100,
408
+ * };
409
+ * assert!(!invalid.is_valid());
410
+ * ```
411
+ */
244
412
  isValid(): boolean;
245
413
  constructor(level: number, text: string, depth: number, html_offset: number, id?: string | null);
246
414
  depth: number;
@@ -251,12 +419,22 @@ export class WasmHeaderMetadata {
251
419
  text: string;
252
420
  }
253
421
 
422
+ /**
423
+ * Heading style options for Markdown output.
424
+ *
425
+ * Controls how headings (h1-h6) are rendered in the output Markdown.
426
+ */
254
427
  export enum WasmHeadingStyle {
255
428
  Underlined = 0,
256
429
  Atx = 1,
257
430
  AtxClosed = 2,
258
431
  }
259
432
 
433
+ /**
434
+ * Highlight rendering style for `<mark>` elements.
435
+ *
436
+ * Controls how highlighted text is rendered in Markdown output.
437
+ */
260
438
  export enum WasmHighlightStyle {
261
439
  DoubleEqual = 0,
262
440
  Html = 1,
@@ -264,6 +442,27 @@ export enum WasmHighlightStyle {
264
442
  None = 3,
265
443
  }
266
444
 
445
+ /**
446
+ * Comprehensive metadata extraction result from HTML document.
447
+ *
448
+ * Contains all extracted metadata types in a single structure,
449
+ * suitable for serialization and transmission across language boundaries.
450
+ *
451
+ * # Examples
452
+ *
453
+ * ```
454
+ * # use html_to_markdown_rs::metadata::HtmlMetadata;
455
+ * let metadata = HtmlMetadata {
456
+ * document: Default::default(),
457
+ * headers: Vec::new(),
458
+ * links: Vec::new(),
459
+ * images: Vec::new(),
460
+ * structured_data: Vec::new(),
461
+ * };
462
+ *
463
+ * assert!(metadata.headers.is_empty());
464
+ * ```
465
+ */
267
466
  export class WasmHtmlMetadata {
268
467
  free(): void;
269
468
  [Symbol.dispose](): void;
@@ -275,21 +474,48 @@ export class WasmHtmlMetadata {
275
474
  structuredData: WasmStructuredData[];
276
475
  }
277
476
 
477
+ /**
478
+ * Image metadata with source and dimensions.
479
+ *
480
+ * Captures `<img>` elements and inline `<svg>` elements with metadata
481
+ * for image analysis and optimization.
482
+ *
483
+ * # Examples
484
+ *
485
+ * ```
486
+ * # use html_to_markdown_rs::metadata::{ImageMetadata, ImageType};
487
+ * let img = ImageMetadata {
488
+ * src: "https://example.com/image.jpg".to_string(),
489
+ * alt: Some("An example image".to_string()),
490
+ * title: Some("Example".to_string()),
491
+ * dimensions: Some((800, 600)),
492
+ * image_type: ImageType::External,
493
+ * attributes: Default::default(),
494
+ * };
495
+ *
496
+ * assert_eq!(img.image_type, ImageType::External);
497
+ * ```
498
+ */
278
499
  export class WasmImageMetadata {
279
500
  free(): void;
280
501
  [Symbol.dispose](): void;
281
- constructor(src: string, image_type: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: string | null);
502
+ constructor(src: string, image_type: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: Uint32Array | null);
282
503
  get alt(): string | undefined;
283
504
  set alt(value: string | null | undefined);
284
505
  attributes: any;
285
- get dimensions(): string | undefined;
286
- set dimensions(value: string | null | undefined);
506
+ get dimensions(): Uint32Array | undefined;
507
+ set dimensions(value: Uint32Array | null | undefined);
287
508
  imageType: WasmImageType;
288
509
  src: string;
289
510
  get title(): string | undefined;
290
511
  set title(value: string | null | undefined);
291
512
  }
292
513
 
514
+ /**
515
+ * Image source classification for proper handling and processing.
516
+ *
517
+ * Determines whether an image is embedded (data URI), inline SVG, external, or relative.
518
+ */
293
519
  export enum WasmImageType {
294
520
  DataUri = 0,
295
521
  InlineSvg = 1,
@@ -297,9 +523,52 @@ export enum WasmImageType {
297
523
  Relative = 3,
298
524
  }
299
525
 
526
+ /**
527
+ * Hyperlink metadata with categorization and attributes.
528
+ *
529
+ * Represents `<a>` elements with parsed href values, text content, and link type classification.
530
+ *
531
+ * # Examples
532
+ *
533
+ * ```
534
+ * # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
535
+ * let link = LinkMetadata {
536
+ * href: "https://example.com".to_string(),
537
+ * text: "Example".to_string(),
538
+ * title: Some("Visit Example".to_string()),
539
+ * link_type: LinkType::External,
540
+ * rel: vec!["nofollow".to_string()],
541
+ * attributes: Default::default(),
542
+ * };
543
+ *
544
+ * assert_eq!(link.link_type, LinkType::External);
545
+ * assert_eq!(link.text, "Example");
546
+ * ```
547
+ */
300
548
  export class WasmLinkMetadata {
301
549
  free(): void;
302
550
  [Symbol.dispose](): void;
551
+ /**
552
+ * Classify a link based on href value.
553
+ *
554
+ * # Arguments
555
+ *
556
+ * * `href` - The href attribute value
557
+ *
558
+ * # Returns
559
+ *
560
+ * Appropriate [`LinkType`] based on protocol and content.
561
+ *
562
+ * # Examples
563
+ *
564
+ * ```
565
+ * # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
566
+ * assert_eq!(LinkMetadata::classify_link("#section"), LinkType::Anchor);
567
+ * assert_eq!(LinkMetadata::classify_link("mailto:test@example.com"), LinkType::Email);
568
+ * assert_eq!(LinkMetadata::classify_link("tel:+1234567890"), LinkType::Phone);
569
+ * assert_eq!(LinkMetadata::classify_link("https://example.com"), LinkType::External);
570
+ * ```
571
+ */
303
572
  static classifyLink(href: string): WasmLinkType;
304
573
  constructor(href: string, text: string, link_type: WasmLinkType, rel: string[], attributes: any, title?: string | null);
305
574
  attributes: any;
@@ -311,11 +580,22 @@ export class WasmLinkMetadata {
311
580
  set title(value: string | null | undefined);
312
581
  }
313
582
 
583
+ /**
584
+ * Link rendering style in Markdown output.
585
+ *
586
+ * Controls whether links and images use inline `[text](url)` syntax or
587
+ * reference-style `[text][1]` syntax with definitions collected at the end.
588
+ */
314
589
  export enum WasmLinkStyle {
315
590
  Inline = 0,
316
591
  Reference = 1,
317
592
  }
318
593
 
594
+ /**
595
+ * Link classification based on href value and document context.
596
+ *
597
+ * Used to categorize links during extraction for filtering and analysis.
598
+ */
319
599
  export enum WasmLinkType {
320
600
  Anchor = 0,
321
601
  Internal = 1,
@@ -325,51 +605,31 @@ export enum WasmLinkType {
325
605
  Other = 5,
326
606
  }
327
607
 
608
+ /**
609
+ * List indentation character type.
610
+ *
611
+ * Controls whether list items are indented with spaces or tabs.
612
+ */
328
613
  export enum WasmListIndentType {
329
614
  Spaces = 0,
330
615
  Tabs = 1,
331
616
  }
332
617
 
333
- export class WasmMetadataConfig {
334
- free(): void;
335
- [Symbol.dispose](): void;
336
- anyEnabled(): boolean;
337
- applyUpdate(_update: WasmMetadataConfigUpdate): void;
338
- static default(): WasmMetadataConfig;
339
- static from(update: WasmMetadataConfigUpdate): WasmMetadataConfig;
340
- static fromUpdate(update: WasmMetadataConfigUpdate): WasmMetadataConfig;
341
- constructor(extract_document?: boolean | null, extract_headers?: boolean | null, extract_links?: boolean | null, extract_images?: boolean | null, extract_structured_data?: boolean | null, max_structured_data_size?: number | null);
342
- extractDocument: boolean;
343
- extractHeaders: boolean;
344
- extractImages: boolean;
345
- extractLinks: boolean;
346
- extractStructuredData: boolean;
347
- maxStructuredDataSize: number;
348
- }
349
-
350
- export class WasmMetadataConfigUpdate {
351
- free(): void;
352
- [Symbol.dispose](): void;
353
- constructor(extract_document?: boolean | null, extract_headers?: boolean | null, extract_links?: boolean | null, extract_images?: boolean | null, extract_structured_data?: boolean | null, max_structured_data_size?: number | null);
354
- get extractDocument(): boolean | undefined;
355
- set extractDocument(value: boolean | null | undefined);
356
- get extractHeaders(): boolean | undefined;
357
- set extractHeaders(value: boolean | null | undefined);
358
- get extractImages(): boolean | undefined;
359
- set extractImages(value: boolean | null | undefined);
360
- get extractLinks(): boolean | undefined;
361
- set extractLinks(value: boolean | null | undefined);
362
- get extractStructuredData(): boolean | undefined;
363
- set extractStructuredData(value: boolean | null | undefined);
364
- get maxStructuredDataSize(): number | undefined;
365
- set maxStructuredDataSize(value: number | null | undefined);
366
- }
367
-
618
+ /**
619
+ * Line break syntax in Markdown output.
620
+ *
621
+ * Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
622
+ */
368
623
  export enum WasmNewlineStyle {
369
624
  Spaces = 0,
370
625
  Backslash = 1,
371
626
  }
372
627
 
628
+ /**
629
+ * The semantic content type of a document node.
630
+ *
631
+ * Uses internally tagged representation (`"node_type": "heading"`) for JSON serialization.
632
+ */
373
633
  export enum WasmNodeContent {
374
634
  Heading = 0,
375
635
  Paragraph = 1,
@@ -386,18 +646,167 @@ export enum WasmNodeContent {
386
646
  Group = 12,
387
647
  }
388
648
 
649
+ /**
650
+ * Context information passed to all visitor methods.
651
+ *
652
+ * Provides comprehensive metadata about the current node being visited,
653
+ * including its type, attributes, position in the DOM tree, and parent context.
654
+ */
655
+ export class WasmNodeContext {
656
+ free(): void;
657
+ [Symbol.dispose](): void;
658
+ constructor(node_type: WasmNodeType, tag_name: string, attributes: any, depth: number, index_in_parent: number, is_inline: boolean, parent_tag?: string | null);
659
+ attributes: any;
660
+ depth: number;
661
+ indexInParent: number;
662
+ isInline: boolean;
663
+ nodeType: WasmNodeType;
664
+ get parentTag(): string | undefined;
665
+ set parentTag(value: string | null | undefined);
666
+ tagName: string;
667
+ }
668
+
669
+ /**
670
+ * Node type enumeration covering all HTML element types.
671
+ *
672
+ * This enum categorizes all HTML elements that the converter recognizes,
673
+ * providing a coarse-grained classification for visitor dispatch.
674
+ */
675
+ export enum WasmNodeType {
676
+ Text = 0,
677
+ Element = 1,
678
+ Heading = 2,
679
+ Paragraph = 3,
680
+ Div = 4,
681
+ Blockquote = 5,
682
+ Pre = 6,
683
+ Hr = 7,
684
+ List = 8,
685
+ ListItem = 9,
686
+ DefinitionList = 10,
687
+ DefinitionTerm = 11,
688
+ DefinitionDescription = 12,
689
+ Table = 13,
690
+ TableRow = 14,
691
+ TableCell = 15,
692
+ TableHeader = 16,
693
+ TableBody = 17,
694
+ TableHead = 18,
695
+ TableFoot = 19,
696
+ Link = 20,
697
+ Image = 21,
698
+ Strong = 22,
699
+ Em = 23,
700
+ Code = 24,
701
+ Strikethrough = 25,
702
+ Underline = 26,
703
+ Subscript = 27,
704
+ Superscript = 28,
705
+ Mark = 29,
706
+ Small = 30,
707
+ Br = 31,
708
+ Span = 32,
709
+ Article = 33,
710
+ Section = 34,
711
+ Nav = 35,
712
+ Aside = 36,
713
+ Header = 37,
714
+ Footer = 38,
715
+ Main = 39,
716
+ Figure = 40,
717
+ Figcaption = 41,
718
+ Time = 42,
719
+ Details = 43,
720
+ Summary = 44,
721
+ Form = 45,
722
+ Input = 46,
723
+ Select = 47,
724
+ Option = 48,
725
+ Button = 49,
726
+ Textarea = 50,
727
+ Label = 51,
728
+ Fieldset = 52,
729
+ Legend = 53,
730
+ Audio = 54,
731
+ Video = 55,
732
+ Picture = 56,
733
+ Source = 57,
734
+ Iframe = 58,
735
+ Svg = 59,
736
+ Canvas = 60,
737
+ Ruby = 61,
738
+ Rt = 62,
739
+ Rp = 63,
740
+ Abbr = 64,
741
+ Kbd = 65,
742
+ Samp = 66,
743
+ Var = 67,
744
+ Cite = 68,
745
+ Q = 69,
746
+ Del = 70,
747
+ Ins = 71,
748
+ Data = 72,
749
+ Meter = 73,
750
+ Progress = 74,
751
+ Output = 75,
752
+ Template = 76,
753
+ Slot = 77,
754
+ Html = 78,
755
+ Head = 79,
756
+ Body = 80,
757
+ Title = 81,
758
+ Meta = 82,
759
+ LinkTag = 83,
760
+ Style = 84,
761
+ Script = 85,
762
+ Base = 86,
763
+ Custom = 87,
764
+ }
765
+
766
+ /**
767
+ * Output format for conversion.
768
+ *
769
+ * Specifies the target markup language format for the conversion output.
770
+ */
389
771
  export enum WasmOutputFormat {
390
772
  Markdown = 0,
391
773
  Djot = 1,
392
774
  Plain = 2,
393
775
  }
394
776
 
777
+ /**
778
+ * HTML preprocessing options for document cleanup before conversion.
779
+ */
395
780
  export class WasmPreprocessingOptions {
396
781
  free(): void;
397
782
  [Symbol.dispose](): void;
783
+ /**
784
+ * Apply a partial update to these preprocessing options.
785
+ *
786
+ * Any specified fields in the update will override the current values.
787
+ * Unspecified fields (None) are left unchanged.
788
+ *
789
+ * # Arguments
790
+ *
791
+ * * `update` - Partial preprocessing options update
792
+ */
398
793
  applyUpdate(_update: WasmPreprocessingOptionsUpdate): void;
399
794
  static default(): WasmPreprocessingOptions;
400
795
  static from(update: WasmPreprocessingOptionsUpdate): WasmPreprocessingOptions;
796
+ /**
797
+ * Create new preprocessing options from a partial update.
798
+ *
799
+ * Creates a new `PreprocessingOptions` struct with defaults, then applies the update.
800
+ * Fields not specified in the update keep their default values.
801
+ *
802
+ * # Arguments
803
+ *
804
+ * * `update` - Partial preprocessing options update
805
+ *
806
+ * # Returns
807
+ *
808
+ * New `PreprocessingOptions` with specified updates applied to defaults
809
+ */
401
810
  static fromUpdate(update: WasmPreprocessingOptionsUpdate): WasmPreprocessingOptions;
402
811
  constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
403
812
  enabled: boolean;
@@ -406,6 +815,13 @@ export class WasmPreprocessingOptions {
406
815
  removeNavigation: boolean;
407
816
  }
408
817
 
818
+ /**
819
+ * Partial update for `PreprocessingOptions`.
820
+ *
821
+ * This struct uses `Option<T>` to represent optional fields that can be selectively updated.
822
+ * Only specified fields (Some values) will override existing options; None values leave the
823
+ * corresponding fields unchanged when applied via [`PreprocessingOptions::apply_update`].
824
+ */
409
825
  export class WasmPreprocessingOptionsUpdate {
410
826
  free(): void;
411
827
  [Symbol.dispose](): void;
@@ -420,12 +836,20 @@ export class WasmPreprocessingOptionsUpdate {
420
836
  set removeNavigation(value: boolean | null | undefined);
421
837
  }
422
838
 
839
+ /**
840
+ * HTML preprocessing aggressiveness level.
841
+ *
842
+ * Controls the extent of cleanup performed before conversion. Higher levels remove more elements.
843
+ */
423
844
  export enum WasmPreprocessingPreset {
424
845
  Minimal = 0,
425
846
  Standard = 1,
426
847
  Aggressive = 2,
427
848
  }
428
849
 
850
+ /**
851
+ * A non-fatal warning generated during HTML processing.
852
+ */
429
853
  export class WasmProcessingWarning {
430
854
  free(): void;
431
855
  [Symbol.dispose](): void;
@@ -434,6 +858,25 @@ export class WasmProcessingWarning {
434
858
  message: string;
435
859
  }
436
860
 
861
+ /**
862
+ * Structured data block (JSON-LD, Microdata, or RDFa).
863
+ *
864
+ * Represents machine-readable structured data found in the document.
865
+ * JSON-LD blocks are collected as raw JSON strings for flexibility.
866
+ *
867
+ * # Examples
868
+ *
869
+ * ```
870
+ * # use html_to_markdown_rs::metadata::{StructuredData, StructuredDataType};
871
+ * let schema = StructuredData {
872
+ * data_type: StructuredDataType::JsonLd,
873
+ * raw_json: r#"{"@context":"https://schema.org","@type":"Article"}"#.to_string(),
874
+ * schema_type: Some("Article".to_string()),
875
+ * };
876
+ *
877
+ * assert_eq!(schema.data_type, StructuredDataType::JsonLd);
878
+ * ```
879
+ */
437
880
  export class WasmStructuredData {
438
881
  free(): void;
439
882
  [Symbol.dispose](): void;
@@ -444,12 +887,20 @@ export class WasmStructuredData {
444
887
  set schemaType(value: string | null | undefined);
445
888
  }
446
889
 
890
+ /**
891
+ * Structured data format type.
892
+ *
893
+ * Identifies the schema/format used for structured data markup.
894
+ */
447
895
  export enum WasmStructuredDataType {
448
896
  JsonLd = 0,
449
897
  Microdata = 1,
450
898
  RDFa = 2,
451
899
  }
452
900
 
901
+ /**
902
+ * A top-level extracted table with both structured data and markdown representation.
903
+ */
453
904
  export class WasmTableData {
454
905
  free(): void;
455
906
  [Symbol.dispose](): void;
@@ -458,6 +909,9 @@ export class WasmTableData {
458
909
  markdown: string;
459
910
  }
460
911
 
912
+ /**
913
+ * A structured table grid with cell-level data including spans.
914
+ */
461
915
  export class WasmTableGrid {
462
916
  free(): void;
463
917
  [Symbol.dispose](): void;
@@ -467,6 +921,11 @@ export class WasmTableGrid {
467
921
  rows: number;
468
922
  }
469
923
 
924
+ /**
925
+ * An inline text annotation with byte-range offsets.
926
+ *
927
+ * Annotations describe formatting (bold, italic, etc.) and links within a node's text content.
928
+ */
470
929
  export class WasmTextAnnotation {
471
930
  free(): void;
472
931
  [Symbol.dispose](): void;
@@ -476,12 +935,35 @@ export class WasmTextAnnotation {
476
935
  start: number;
477
936
  }
478
937
 
938
+ /**
939
+ * Text directionality of document content.
940
+ *
941
+ * Corresponds to the HTML `dir` attribute and `bdi` element directionality.
942
+ */
479
943
  export enum WasmTextDirection {
480
944
  LeftToRight = 0,
481
945
  RightToLeft = 1,
482
946
  Auto = 2,
483
947
  }
484
948
 
949
+ /**
950
+ * Result of a visitor callback.
951
+ *
952
+ * Allows visitors to control the conversion flow by either proceeding
953
+ * with default behavior, providing custom output, skipping elements,
954
+ * preserving HTML, or signaling errors.
955
+ */
956
+ export enum WasmVisitResult {
957
+ Continue = 0,
958
+ Custom = 1,
959
+ Skip = 2,
960
+ PreserveHtml = 3,
961
+ Error = 4,
962
+ }
963
+
964
+ /**
965
+ * Categories of processing warnings.
966
+ */
485
967
  export enum WasmWarningKind {
486
968
  ImageExtractionFailed = 0,
487
969
  EncodingFallback = 1,
@@ -491,12 +973,17 @@ export enum WasmWarningKind {
491
973
  DepthLimitExceeded = 5,
492
974
  }
493
975
 
976
+ /**
977
+ * Whitespace handling strategy during conversion.
978
+ *
979
+ * Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
980
+ */
494
981
  export enum WasmWhitespaceMode {
495
982
  Normalized = 0,
496
983
  Strict = 1,
497
984
  }
498
985
 
499
- export function convert(html: string, options?: WasmConversionOptions | null): WasmConversionResult;
986
+ export function convert(html: string, options?: WasmConversionOptions | null, visitor?: any | null): WasmConversionResult;
500
987
 
501
988
  export declare function initWasm(): Promise<void>;
502
989
  export declare const wasmReady: Promise<void>;