@kreuzberg/html-to-markdown-wasm 3.4.1 → 3.5.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +233 -0
- package/package.json +1 -1
- package/pkg/bundler/LICENSE +21 -0
- package/pkg/bundler/README.md +233 -0
- package/pkg/bundler/html_to_markdown_wasm.d.ts +107 -266
- package/pkg/bundler/html_to_markdown_wasm.js +1 -1
- package/pkg/bundler/html_to_markdown_wasm_bg.js +798 -591
- package/pkg/bundler/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/bundler/html_to_markdown_wasm_bg.wasm.d.ts +54 -20
- package/pkg/bundler/package.json +1 -1
- package/pkg/deno/LICENSE +21 -0
- package/pkg/deno/README.md +233 -0
- package/pkg/deno/html_to_markdown_wasm.d.ts +107 -266
- package/pkg/deno/html_to_markdown_wasm.js +798 -591
- package/pkg/deno/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/deno/html_to_markdown_wasm_bg.wasm.d.ts +54 -20
- package/pkg/nodejs/LICENSE +21 -0
- package/pkg/nodejs/README.md +233 -0
- package/pkg/nodejs/html_to_markdown_wasm.d.ts +107 -266
- package/pkg/nodejs/html_to_markdown_wasm.js +798 -592
- package/pkg/nodejs/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/nodejs/html_to_markdown_wasm_bg.wasm.d.ts +54 -20
- package/pkg/nodejs/package.json +1 -1
- package/pkg/web/LICENSE +21 -0
- package/pkg/web/README.md +233 -0
- package/pkg/web/html_to_markdown_wasm.d.ts +161 -286
- package/pkg/web/html_to_markdown_wasm.js +798 -591
- package/pkg/web/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/web/html_to_markdown_wasm_bg.wasm.d.ts +54 -20
- package/pkg/web/package.json +1 -1
|
@@ -6,16 +6,16 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Uses internally tagged representation (`"annotation_type": "bold"`) for JSON serialization.
|
|
8
8
|
*/
|
|
9
|
-
export
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
9
|
+
export class WasmAnnotationKind {
|
|
10
|
+
free(): void;
|
|
11
|
+
[Symbol.dispose](): void;
|
|
12
|
+
static default(): WasmAnnotationKind;
|
|
13
|
+
constructor();
|
|
14
|
+
annotationType: string;
|
|
15
|
+
get title(): string | undefined;
|
|
16
|
+
set title(value: string | null | undefined);
|
|
17
|
+
get url(): string | undefined;
|
|
18
|
+
set url(value: string | null | undefined);
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
/**
|
|
@@ -32,38 +32,15 @@ export enum WasmCodeBlockStyle {
|
|
|
32
32
|
/**
|
|
33
33
|
* Main conversion options for HTML to Markdown conversion.
|
|
34
34
|
*
|
|
35
|
-
* Use
|
|
35
|
+
* Use `ConversionOptions.builder()` to construct, or `Default.default()` for defaults.
|
|
36
36
|
*
|
|
37
37
|
* # Example
|
|
38
|
-
*
|
|
39
|
-
* ```text
|
|
40
|
-
* use html_to_markdown_rs::ConversionOptions;
|
|
41
|
-
*
|
|
42
|
-
* let options = ConversionOptions::builder()
|
|
43
|
-
* .heading_style(HeadingStyle::Atx)
|
|
44
|
-
* .wrap(true)
|
|
45
|
-
* .wrap_width(100)
|
|
46
|
-
* .build();
|
|
47
|
-
* ```
|
|
48
38
|
*/
|
|
49
39
|
export class WasmConversionOptions {
|
|
50
40
|
free(): void;
|
|
51
41
|
[Symbol.dispose](): void;
|
|
52
|
-
/**
|
|
53
|
-
* Apply a partial update to these conversion options.
|
|
54
|
-
*/
|
|
55
|
-
applyUpdate(update: WasmConversionOptionsUpdate): void;
|
|
56
|
-
/**
|
|
57
|
-
* Create a new builder with default values.
|
|
58
|
-
*/
|
|
59
|
-
static builder(): WasmConversionOptionsBuilder;
|
|
60
42
|
static default(): WasmConversionOptions;
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
* Create from a partial update, applying to defaults.
|
|
64
|
-
*/
|
|
65
|
-
static fromUpdate(update: WasmConversionOptionsUpdate): WasmConversionOptions;
|
|
66
|
-
constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, exclude_selectors?: string[] | null, max_depth?: number | null);
|
|
43
|
+
constructor(headingStyle?: WasmHeadingStyle | null, listIndentType?: WasmListIndentType | null, listIndentWidth?: number | null, bullets?: string | null, strongEmSymbol?: string | null, escapeAsterisks?: boolean | null, escapeUnderscores?: boolean | null, escapeMisc?: boolean | null, escapeAscii?: boolean | null, codeLanguage?: string | null, autolinks?: boolean | null, defaultTitle?: boolean | null, brInTables?: boolean | null, compactTables?: boolean | null, highlightStyle?: WasmHighlightStyle | null, extractMetadata?: boolean | null, whitespaceMode?: WasmWhitespaceMode | null, stripNewlines?: boolean | null, wrap?: boolean | null, wrapWidth?: number | null, convertAsInline?: boolean | null, subSymbol?: string | null, supSymbol?: string | null, newlineStyle?: WasmNewlineStyle | null, codeBlockStyle?: WasmCodeBlockStyle | null, keepInlineImagesIn?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, stripTags?: string[] | null, preserveTags?: string[] | null, skipImages?: boolean | null, linkStyle?: WasmLinkStyle | null, outputFormat?: WasmOutputFormat | null, includeDocumentStructure?: boolean | null, extractImages?: boolean | null, maxImageSize?: bigint | null, captureSvg?: boolean | null, inferDimensions?: boolean | null, excludeSelectors?: string[] | null, maxDepth?: number | null);
|
|
67
44
|
autolinks: boolean;
|
|
68
45
|
brInTables: boolean;
|
|
69
46
|
bullets: string;
|
|
@@ -71,6 +48,7 @@ export class WasmConversionOptions {
|
|
|
71
48
|
get codeBlockStyle(): string;
|
|
72
49
|
set codeBlockStyle(value: WasmCodeBlockStyle);
|
|
73
50
|
codeLanguage: string;
|
|
51
|
+
compactTables: boolean;
|
|
74
52
|
convertAsInline: boolean;
|
|
75
53
|
debug: boolean;
|
|
76
54
|
defaultTitle: boolean;
|
|
@@ -117,56 +95,17 @@ export class WasmConversionOptions {
|
|
|
117
95
|
wrapWidth: number;
|
|
118
96
|
}
|
|
119
97
|
|
|
120
|
-
/**
|
|
121
|
-
* Builder for [`ConversionOptions`].
|
|
122
|
-
*
|
|
123
|
-
* All fields start with default values. Call `.build()` to produce the final options.
|
|
124
|
-
*/
|
|
125
|
-
export class WasmConversionOptionsBuilder {
|
|
126
|
-
private constructor();
|
|
127
|
-
free(): void;
|
|
128
|
-
[Symbol.dispose](): void;
|
|
129
|
-
/**
|
|
130
|
-
* Build the final [`ConversionOptions`].
|
|
131
|
-
*/
|
|
132
|
-
build(): WasmConversionOptions;
|
|
133
|
-
/**
|
|
134
|
-
* Set the list of CSS selectors for elements to exclude entirely from output.
|
|
135
|
-
*/
|
|
136
|
-
excludeSelectors(selectors: string[]): WasmConversionOptionsBuilder;
|
|
137
|
-
/**
|
|
138
|
-
* Set the list of HTML tag names whose `<img>` children are kept inline.
|
|
139
|
-
*/
|
|
140
|
-
keepInlineImagesIn(tags: string[]): WasmConversionOptionsBuilder;
|
|
141
|
-
/**
|
|
142
|
-
* Set the pre-processing options applied to the HTML before conversion.
|
|
143
|
-
*/
|
|
144
|
-
preprocessing(preprocessing: WasmPreprocessingOptions): WasmConversionOptionsBuilder;
|
|
145
|
-
/**
|
|
146
|
-
* Set the list of HTML tag names that are preserved verbatim in output.
|
|
147
|
-
*/
|
|
148
|
-
preserveTags(tags: string[]): WasmConversionOptionsBuilder;
|
|
149
|
-
/**
|
|
150
|
-
* Set the list of HTML tag names whose content is stripped from output.
|
|
151
|
-
*/
|
|
152
|
-
stripTags(tags: string[]): WasmConversionOptionsBuilder;
|
|
153
|
-
/**
|
|
154
|
-
* Set the visitor used during conversion.
|
|
155
|
-
*/
|
|
156
|
-
visitor(visitor?: WasmVisitorHandle | null): WasmConversionOptionsBuilder;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
98
|
/**
|
|
160
99
|
* Partial update for `ConversionOptions`.
|
|
161
100
|
*
|
|
162
101
|
* Uses `Option<T>` fields for selective updates. Bindings use this to construct
|
|
163
|
-
* options from language-native types. Prefer
|
|
102
|
+
* options from language-native types. Prefer `ConversionOptionsBuilder` for Rust code.
|
|
164
103
|
*/
|
|
165
104
|
export class WasmConversionOptionsUpdate {
|
|
166
105
|
free(): void;
|
|
167
106
|
[Symbol.dispose](): void;
|
|
168
107
|
static default(): WasmConversionOptionsUpdate;
|
|
169
|
-
constructor(
|
|
108
|
+
constructor(headingStyle?: WasmHeadingStyle | null, listIndentType?: WasmListIndentType | null, listIndentWidth?: number | null, bullets?: string | null, strongEmSymbol?: string | null, escapeAsterisks?: boolean | null, escapeUnderscores?: boolean | null, escapeMisc?: boolean | null, escapeAscii?: boolean | null, codeLanguage?: string | null, autolinks?: boolean | null, defaultTitle?: boolean | null, brInTables?: boolean | null, compactTables?: boolean | null, highlightStyle?: WasmHighlightStyle | null, extractMetadata?: boolean | null, whitespaceMode?: WasmWhitespaceMode | null, stripNewlines?: boolean | null, wrap?: boolean | null, wrapWidth?: number | null, convertAsInline?: boolean | null, subSymbol?: string | null, supSymbol?: string | null, newlineStyle?: WasmNewlineStyle | null, codeBlockStyle?: WasmCodeBlockStyle | null, keepInlineImagesIn?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, stripTags?: string[] | null, preserveTags?: string[] | null, skipImages?: boolean | null, linkStyle?: WasmLinkStyle | null, outputFormat?: WasmOutputFormat | null, includeDocumentStructure?: boolean | null, extractImages?: boolean | null, maxImageSize?: bigint | null, captureSvg?: boolean | null, inferDimensions?: boolean | null, maxDepth?: number | null, excludeSelectors?: string[] | null);
|
|
170
109
|
get autolinks(): boolean | undefined;
|
|
171
110
|
set autolinks(value: boolean | null | undefined);
|
|
172
111
|
get brInTables(): boolean | undefined;
|
|
@@ -179,6 +118,8 @@ export class WasmConversionOptionsUpdate {
|
|
|
179
118
|
set codeBlockStyle(value: WasmCodeBlockStyle | null | undefined);
|
|
180
119
|
get codeLanguage(): string | undefined;
|
|
181
120
|
set codeLanguage(value: string | null | undefined);
|
|
121
|
+
get compactTables(): boolean | undefined;
|
|
122
|
+
set compactTables(value: boolean | null | undefined);
|
|
182
123
|
get convertAsInline(): boolean | undefined;
|
|
183
124
|
set convertAsInline(value: boolean | null | undefined);
|
|
184
125
|
get debug(): boolean | undefined;
|
|
@@ -289,23 +230,12 @@ export class WasmConversionResult {
|
|
|
289
230
|
* and browsers for document indexing and presentation.
|
|
290
231
|
*
|
|
291
232
|
* # Examples
|
|
292
|
-
*
|
|
293
|
-
* ```
|
|
294
|
-
* let doc = DocumentMetadata {
|
|
295
|
-
* title: Some("My Article".to_string()),
|
|
296
|
-
* description: Some("A great article about Rust".to_string()),
|
|
297
|
-
* keywords: vec!["rust".to_string(), "programming".to_string()],
|
|
298
|
-
* ..Default::default()
|
|
299
|
-
* };
|
|
300
|
-
*
|
|
301
|
-
* assert_eq!(doc.title, Some("My Article".to_string()));
|
|
302
|
-
* ```
|
|
303
233
|
*/
|
|
304
234
|
export class WasmDocumentMetadata {
|
|
305
235
|
free(): void;
|
|
306
236
|
[Symbol.dispose](): void;
|
|
307
237
|
static default(): WasmDocumentMetadata;
|
|
308
|
-
constructor(keywords?: string[] | null,
|
|
238
|
+
constructor(keywords?: string[] | null, openGraph?: any | null, twitterCard?: any | null, metaTags?: any | null, title?: string | null, description?: string | null, author?: string | null, canonicalUrl?: string | null, baseHref?: string | null, language?: string | null, textDirection?: WasmTextDirection | null);
|
|
309
239
|
get author(): string | undefined;
|
|
310
240
|
set author(value: string | null | undefined);
|
|
311
241
|
get baseHref(): string | undefined;
|
|
@@ -333,13 +263,12 @@ export class WasmDocumentNode {
|
|
|
333
263
|
free(): void;
|
|
334
264
|
[Symbol.dispose](): void;
|
|
335
265
|
static default(): WasmDocumentNode;
|
|
336
|
-
constructor(id: string, content:
|
|
266
|
+
constructor(id: string, content: any, children: Uint32Array, annotations: WasmTextAnnotation[], parent?: number | null, attributes?: any | null);
|
|
337
267
|
annotations: WasmTextAnnotation[];
|
|
338
268
|
get attributes(): any | undefined;
|
|
339
269
|
set attributes(value: any | null | undefined);
|
|
340
270
|
children: Uint32Array;
|
|
341
|
-
|
|
342
|
-
set content(value: WasmNodeContent);
|
|
271
|
+
content: any;
|
|
343
272
|
id: string;
|
|
344
273
|
get parent(): number | undefined;
|
|
345
274
|
set parent(value: number | null | undefined);
|
|
@@ -354,7 +283,7 @@ export class WasmDocumentStructure {
|
|
|
354
283
|
free(): void;
|
|
355
284
|
[Symbol.dispose](): void;
|
|
356
285
|
static default(): WasmDocumentStructure;
|
|
357
|
-
constructor(nodes: WasmDocumentNode[],
|
|
286
|
+
constructor(nodes: WasmDocumentNode[], sourceFormat?: string | null);
|
|
358
287
|
nodes: WasmDocumentNode[];
|
|
359
288
|
get sourceFormat(): string | undefined;
|
|
360
289
|
set sourceFormat(value: string | null | undefined);
|
|
@@ -367,7 +296,7 @@ export class WasmGridCell {
|
|
|
367
296
|
free(): void;
|
|
368
297
|
[Symbol.dispose](): void;
|
|
369
298
|
static default(): WasmGridCell;
|
|
370
|
-
constructor(content: string, row: number, col: number,
|
|
299
|
+
constructor(content: string, row: number, col: number, rowSpan: number, colSpan: number, isHeader: boolean);
|
|
371
300
|
col: number;
|
|
372
301
|
colSpan: number;
|
|
373
302
|
content: string;
|
|
@@ -383,19 +312,6 @@ export class WasmGridCell {
|
|
|
383
312
|
* and position in the document structure.
|
|
384
313
|
*
|
|
385
314
|
* # Examples
|
|
386
|
-
*
|
|
387
|
-
* ```
|
|
388
|
-
* let header = HeaderMetadata {
|
|
389
|
-
* level: 1,
|
|
390
|
-
* text: "Main Title".to_string(),
|
|
391
|
-
* id: Some("main-title".to_string()),
|
|
392
|
-
* depth: 0,
|
|
393
|
-
* html_offset: 145,
|
|
394
|
-
* };
|
|
395
|
-
*
|
|
396
|
-
* assert_eq!(header.level, 1);
|
|
397
|
-
* assert!(header.is_valid());
|
|
398
|
-
* ```
|
|
399
315
|
*/
|
|
400
316
|
export class WasmHeaderMetadata {
|
|
401
317
|
free(): void;
|
|
@@ -409,29 +325,9 @@ export class WasmHeaderMetadata {
|
|
|
409
325
|
* `true` if level is 1-6, `false` otherwise.
|
|
410
326
|
*
|
|
411
327
|
* # Examples
|
|
412
|
-
*
|
|
413
|
-
* ```
|
|
414
|
-
* let valid = HeaderMetadata {
|
|
415
|
-
* level: 3,
|
|
416
|
-
* text: "Title".to_string(),
|
|
417
|
-
* id: None,
|
|
418
|
-
* depth: 2,
|
|
419
|
-
* html_offset: 100,
|
|
420
|
-
* };
|
|
421
|
-
* assert!(valid.is_valid());
|
|
422
|
-
*
|
|
423
|
-
* let invalid = HeaderMetadata {
|
|
424
|
-
* level: 7, // Invalid
|
|
425
|
-
* text: "Title".to_string(),
|
|
426
|
-
* id: None,
|
|
427
|
-
* depth: 2,
|
|
428
|
-
* html_offset: 100,
|
|
429
|
-
* };
|
|
430
|
-
* assert!(!invalid.is_valid());
|
|
431
|
-
* ```
|
|
432
328
|
*/
|
|
433
329
|
isValid(): boolean;
|
|
434
|
-
constructor(level: number, text: string, depth: number,
|
|
330
|
+
constructor(level: number, text: string, depth: number, htmlOffset: number, id?: string | null);
|
|
435
331
|
depth: number;
|
|
436
332
|
htmlOffset: number;
|
|
437
333
|
get id(): string | undefined;
|
|
@@ -470,24 +366,12 @@ export enum WasmHighlightStyle {
|
|
|
470
366
|
* suitable for serialization and transmission across language boundaries.
|
|
471
367
|
*
|
|
472
368
|
* # Examples
|
|
473
|
-
*
|
|
474
|
-
* ```
|
|
475
|
-
* let metadata = HtmlMetadata {
|
|
476
|
-
* document: Default::default(),
|
|
477
|
-
* headers: Vec::new(),
|
|
478
|
-
* links: Vec::new(),
|
|
479
|
-
* images: Vec::new(),
|
|
480
|
-
* structured_data: Vec::new(),
|
|
481
|
-
* };
|
|
482
|
-
*
|
|
483
|
-
* assert!(metadata.headers.is_empty());
|
|
484
|
-
* ```
|
|
485
369
|
*/
|
|
486
370
|
export class WasmHtmlMetadata {
|
|
487
371
|
free(): void;
|
|
488
372
|
[Symbol.dispose](): void;
|
|
489
373
|
static default(): WasmHtmlMetadata;
|
|
490
|
-
constructor(document?: WasmDocumentMetadata | null, headers?: WasmHeaderMetadata[] | null, links?: WasmLinkMetadata[] | null, images?: WasmImageMetadata[] | null,
|
|
374
|
+
constructor(document?: WasmDocumentMetadata | null, headers?: WasmHeaderMetadata[] | null, links?: WasmLinkMetadata[] | null, images?: WasmImageMetadata[] | null, structuredData?: WasmStructuredData[] | null);
|
|
491
375
|
document: WasmDocumentMetadata;
|
|
492
376
|
headers: WasmHeaderMetadata[];
|
|
493
377
|
images: WasmImageMetadata[];
|
|
@@ -502,25 +386,12 @@ export class WasmHtmlMetadata {
|
|
|
502
386
|
* for image analysis and optimization.
|
|
503
387
|
*
|
|
504
388
|
* # Examples
|
|
505
|
-
*
|
|
506
|
-
* ```
|
|
507
|
-
* let img = ImageMetadata {
|
|
508
|
-
* src: "https://example.com/image.jpg".to_string(),
|
|
509
|
-
* alt: Some("An example image".to_string()),
|
|
510
|
-
* title: Some("Example".to_string()),
|
|
511
|
-
* dimensions: Some((800, 600)),
|
|
512
|
-
* image_type: ImageType::External,
|
|
513
|
-
* attributes: Default::default(),
|
|
514
|
-
* };
|
|
515
|
-
*
|
|
516
|
-
* assert_eq!(img.image_type, ImageType::External);
|
|
517
|
-
* ```
|
|
518
389
|
*/
|
|
519
390
|
export class WasmImageMetadata {
|
|
520
391
|
free(): void;
|
|
521
392
|
[Symbol.dispose](): void;
|
|
522
393
|
static default(): WasmImageMetadata;
|
|
523
|
-
constructor(src: string,
|
|
394
|
+
constructor(src: string, imageType: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: Uint32Array | null);
|
|
524
395
|
get alt(): string | undefined;
|
|
525
396
|
set alt(value: string | null | undefined);
|
|
526
397
|
attributes: any;
|
|
@@ -551,47 +422,12 @@ export enum WasmImageType {
|
|
|
551
422
|
* Represents `<a>` elements with parsed href values, text content, and link type classification.
|
|
552
423
|
*
|
|
553
424
|
* # Examples
|
|
554
|
-
*
|
|
555
|
-
* ```
|
|
556
|
-
* let link = LinkMetadata {
|
|
557
|
-
* href: "https://example.com".to_string(),
|
|
558
|
-
* text: "Example".to_string(),
|
|
559
|
-
* title: Some("Visit Example".to_string()),
|
|
560
|
-
* link_type: LinkType::External,
|
|
561
|
-
* rel: vec!["nofollow".to_string()],
|
|
562
|
-
* attributes: Default::default(),
|
|
563
|
-
* };
|
|
564
|
-
*
|
|
565
|
-
* assert_eq!(link.link_type, LinkType::External);
|
|
566
|
-
* assert_eq!(link.text, "Example");
|
|
567
|
-
* ```
|
|
568
425
|
*/
|
|
569
426
|
export class WasmLinkMetadata {
|
|
570
427
|
free(): void;
|
|
571
428
|
[Symbol.dispose](): void;
|
|
572
|
-
/**
|
|
573
|
-
* Classify a link based on href value.
|
|
574
|
-
*
|
|
575
|
-
* # Arguments
|
|
576
|
-
*
|
|
577
|
-
* * `href` - The href attribute value
|
|
578
|
-
*
|
|
579
|
-
* # Returns
|
|
580
|
-
*
|
|
581
|
-
* Appropriate [`LinkType`] based on protocol and content.
|
|
582
|
-
*
|
|
583
|
-
* # Examples
|
|
584
|
-
*
|
|
585
|
-
* ```
|
|
586
|
-
* assert_eq!(LinkMetadata::classify_link("#section"), LinkType::Anchor);
|
|
587
|
-
* assert_eq!(LinkMetadata::classify_link("mailto:test@example.com"), LinkType::Email);
|
|
588
|
-
* assert_eq!(LinkMetadata::classify_link("tel:+1234567890"), LinkType::Phone);
|
|
589
|
-
* assert_eq!(LinkMetadata::classify_link("https://example.com"), LinkType::External);
|
|
590
|
-
* ```
|
|
591
|
-
*/
|
|
592
|
-
static classifyLink(href: string): WasmLinkType;
|
|
593
429
|
static default(): WasmLinkMetadata;
|
|
594
|
-
constructor(href: string, text: string,
|
|
430
|
+
constructor(href: string, text: string, linkType: WasmLinkType, rel: string[], attributes: any, title?: string | null);
|
|
595
431
|
attributes: any;
|
|
596
432
|
href: string;
|
|
597
433
|
get linkType(): string;
|
|
@@ -652,20 +488,44 @@ export enum WasmNewlineStyle {
|
|
|
652
488
|
*
|
|
653
489
|
* Uses internally tagged representation (`"node_type": "heading"`) for JSON serialization.
|
|
654
490
|
*/
|
|
655
|
-
export
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
491
|
+
export class WasmNodeContent {
|
|
492
|
+
free(): void;
|
|
493
|
+
[Symbol.dispose](): void;
|
|
494
|
+
static default(): WasmNodeContent;
|
|
495
|
+
constructor();
|
|
496
|
+
get content(): string | undefined;
|
|
497
|
+
set content(value: string | null | undefined);
|
|
498
|
+
get definition(): string | undefined;
|
|
499
|
+
set definition(value: string | null | undefined);
|
|
500
|
+
get description(): string | undefined;
|
|
501
|
+
set description(value: string | null | undefined);
|
|
502
|
+
get entries(): any | undefined;
|
|
503
|
+
set entries(value: any | null | undefined);
|
|
504
|
+
get format(): string | undefined;
|
|
505
|
+
set format(value: string | null | undefined);
|
|
506
|
+
get grid(): WasmTableGrid | undefined;
|
|
507
|
+
set grid(value: WasmTableGrid | null | undefined);
|
|
508
|
+
get headingLevel(): number | undefined;
|
|
509
|
+
set headingLevel(value: number | null | undefined);
|
|
510
|
+
get headingText(): string | undefined;
|
|
511
|
+
set headingText(value: string | null | undefined);
|
|
512
|
+
get imageIndex(): number | undefined;
|
|
513
|
+
set imageIndex(value: number | null | undefined);
|
|
514
|
+
get label(): string | undefined;
|
|
515
|
+
set label(value: string | null | undefined);
|
|
516
|
+
get language(): string | undefined;
|
|
517
|
+
set language(value: string | null | undefined);
|
|
518
|
+
get level(): number | undefined;
|
|
519
|
+
set level(value: number | null | undefined);
|
|
520
|
+
nodeType: string;
|
|
521
|
+
get ordered(): boolean | undefined;
|
|
522
|
+
set ordered(value: boolean | null | undefined);
|
|
523
|
+
get src(): string | undefined;
|
|
524
|
+
set src(value: string | null | undefined);
|
|
525
|
+
get term(): string | undefined;
|
|
526
|
+
set term(value: string | null | undefined);
|
|
527
|
+
get text(): string | undefined;
|
|
528
|
+
set text(value: string | null | undefined);
|
|
669
529
|
}
|
|
670
530
|
|
|
671
531
|
/**
|
|
@@ -678,7 +538,7 @@ export class WasmNodeContext {
|
|
|
678
538
|
free(): void;
|
|
679
539
|
[Symbol.dispose](): void;
|
|
680
540
|
static default(): WasmNodeContext;
|
|
681
|
-
constructor(
|
|
541
|
+
constructor(nodeType: WasmNodeType, tagName: string, attributes: any, depth: number, indexInParent: number, isInline: boolean, parentTag?: string | null);
|
|
682
542
|
attributes: any;
|
|
683
543
|
depth: number;
|
|
684
544
|
indexInParent: number;
|
|
@@ -804,35 +664,8 @@ export enum WasmOutputFormat {
|
|
|
804
664
|
export class WasmPreprocessingOptions {
|
|
805
665
|
free(): void;
|
|
806
666
|
[Symbol.dispose](): void;
|
|
807
|
-
/**
|
|
808
|
-
* Apply a partial update to these preprocessing options.
|
|
809
|
-
*
|
|
810
|
-
* Any specified fields in the update will override the current values.
|
|
811
|
-
* Unspecified fields (None) are left unchanged.
|
|
812
|
-
*
|
|
813
|
-
* # Arguments
|
|
814
|
-
*
|
|
815
|
-
* * `update` - Partial preprocessing options update
|
|
816
|
-
*/
|
|
817
|
-
applyUpdate(update: WasmPreprocessingOptionsUpdate): void;
|
|
818
667
|
static default(): WasmPreprocessingOptions;
|
|
819
|
-
|
|
820
|
-
/**
|
|
821
|
-
* Create new preprocessing options from a partial update.
|
|
822
|
-
*
|
|
823
|
-
* Creates a new `PreprocessingOptions` struct with defaults, then applies the update.
|
|
824
|
-
* Fields not specified in the update keep their default values.
|
|
825
|
-
*
|
|
826
|
-
* # Arguments
|
|
827
|
-
*
|
|
828
|
-
* * `update` - Partial preprocessing options update
|
|
829
|
-
*
|
|
830
|
-
* # Returns
|
|
831
|
-
*
|
|
832
|
-
* New `PreprocessingOptions` with specified updates applied to defaults
|
|
833
|
-
*/
|
|
834
|
-
static fromUpdate(update: WasmPreprocessingOptionsUpdate): WasmPreprocessingOptions;
|
|
835
|
-
constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
|
|
668
|
+
constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, removeNavigation?: boolean | null, removeForms?: boolean | null);
|
|
836
669
|
enabled: boolean;
|
|
837
670
|
get preset(): string;
|
|
838
671
|
set preset(value: WasmPreprocessingPreset);
|
|
@@ -844,14 +677,14 @@ export class WasmPreprocessingOptions {
|
|
|
844
677
|
* Partial update for `PreprocessingOptions`.
|
|
845
678
|
*
|
|
846
679
|
* This struct uses `Option<T>` to represent optional fields that can be selectively updated.
|
|
847
|
-
* Only specified fields (
|
|
848
|
-
* corresponding fields unchanged when applied via
|
|
680
|
+
* Only specified fields (values) will override existing options; undefined values leave the
|
|
681
|
+
* corresponding fields unchanged when applied via `PreprocessingOptions.apply_update`.
|
|
849
682
|
*/
|
|
850
683
|
export class WasmPreprocessingOptionsUpdate {
|
|
851
684
|
free(): void;
|
|
852
685
|
[Symbol.dispose](): void;
|
|
853
686
|
static default(): WasmPreprocessingOptionsUpdate;
|
|
854
|
-
constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null,
|
|
687
|
+
constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, removeNavigation?: boolean | null, removeForms?: boolean | null);
|
|
855
688
|
get enabled(): boolean | undefined;
|
|
856
689
|
set enabled(value: boolean | null | undefined);
|
|
857
690
|
get preset(): string | undefined;
|
|
@@ -874,7 +707,21 @@ export enum WasmPreprocessingPreset {
|
|
|
874
707
|
}
|
|
875
708
|
|
|
876
709
|
/**
|
|
877
|
-
* A non-fatal
|
|
710
|
+
* A non-fatal diagnostic produced during HTML conversion.
|
|
711
|
+
*
|
|
712
|
+
* Warnings indicate that conversion completed but some content may have been handled
|
|
713
|
+
* differently than expected — for example, an image that could not be extracted, a truncated
|
|
714
|
+
* input, or malformed HTML that was repaired with best-effort parsing.
|
|
715
|
+
*
|
|
716
|
+
* Conversion always succeeds (returns `ConversionResult`) even when warnings are
|
|
717
|
+
* present. Callers should inspect `warnings` and decide how to
|
|
718
|
+
* handle them based on their tolerance for partial results:
|
|
719
|
+
*
|
|
720
|
+
* - **Logging pipelines**: emit each warning at `WARN` level and continue.
|
|
721
|
+
* - **Strict pipelines**: treat any warning as a hard error by checking
|
|
722
|
+
* `result.warnings.is_empty()` before using the output.
|
|
723
|
+
*
|
|
724
|
+
* See `WarningKind` for the full taxonomy of warning categories.
|
|
878
725
|
*/
|
|
879
726
|
export class WasmProcessingWarning {
|
|
880
727
|
free(): void;
|
|
@@ -893,22 +740,12 @@ export class WasmProcessingWarning {
|
|
|
893
740
|
* JSON-LD blocks are collected as raw JSON strings for flexibility.
|
|
894
741
|
*
|
|
895
742
|
* # Examples
|
|
896
|
-
*
|
|
897
|
-
* ```
|
|
898
|
-
* let schema = StructuredData {
|
|
899
|
-
* data_type: StructuredDataType::JsonLd,
|
|
900
|
-
* raw_json: r#"{"@context":"https://schema.org","@type":"Article"}"#.to_string(),
|
|
901
|
-
* schema_type: Some("Article".to_string()),
|
|
902
|
-
* };
|
|
903
|
-
*
|
|
904
|
-
* assert_eq!(schema.data_type, StructuredDataType::JsonLd);
|
|
905
|
-
* ```
|
|
906
743
|
*/
|
|
907
744
|
export class WasmStructuredData {
|
|
908
745
|
free(): void;
|
|
909
746
|
[Symbol.dispose](): void;
|
|
910
747
|
static default(): WasmStructuredData;
|
|
911
|
-
constructor(
|
|
748
|
+
constructor(dataType: WasmStructuredDataType, rawJson: string, schemaType?: string | null);
|
|
912
749
|
get dataType(): string;
|
|
913
750
|
set dataType(value: WasmStructuredDataType);
|
|
914
751
|
rawJson: string;
|
|
@@ -953,18 +790,28 @@ export class WasmTableGrid {
|
|
|
953
790
|
}
|
|
954
791
|
|
|
955
792
|
/**
|
|
956
|
-
*
|
|
793
|
+
* A styling or semantic annotation that applies to a byte range within a node's text.
|
|
794
|
+
*
|
|
795
|
+
* Unlike `DocumentNode`, which captures block-level structure (headings, paragraphs, etc.),
|
|
796
|
+
* a `TextAnnotation` describes inline-level markup — bold, italic, links, code spans, and
|
|
797
|
+
* similar — that spans a contiguous run of bytes inside `DocumentNode.content`'s text field.
|
|
957
798
|
*
|
|
958
|
-
*
|
|
799
|
+
* Byte offsets (`start`..`end`) are into the UTF-8 encoded text of the parent node. The range
|
|
800
|
+
* follows Rust slice conventions: `start` is inclusive and `end` is exclusive, so the annotated
|
|
801
|
+
* text is `text[start as usize..end as usize]`.
|
|
802
|
+
*
|
|
803
|
+
* Multiple annotations on the same node can overlap (e.g. bold-italic text), and they are
|
|
804
|
+
* stored in the order they are encountered during DOM traversal.
|
|
805
|
+
*
|
|
806
|
+
* See `AnnotationKind` for the full list of supported annotation types.
|
|
959
807
|
*/
|
|
960
808
|
export class WasmTextAnnotation {
|
|
961
809
|
free(): void;
|
|
962
810
|
[Symbol.dispose](): void;
|
|
963
811
|
static default(): WasmTextAnnotation;
|
|
964
|
-
constructor(start: number, end: number, kind:
|
|
812
|
+
constructor(start: number, end: number, kind: any);
|
|
965
813
|
end: number;
|
|
966
|
-
|
|
967
|
-
set kind(value: WasmAnnotationKind);
|
|
814
|
+
kind: any;
|
|
968
815
|
start: number;
|
|
969
816
|
}
|
|
970
817
|
|
|
@@ -995,11 +842,12 @@ export enum WasmVisitResult {
|
|
|
995
842
|
}
|
|
996
843
|
|
|
997
844
|
/**
|
|
998
|
-
*
|
|
845
|
+
* Shareable, thread-safe handle to a user-provided HTML visitor implementation.
|
|
999
846
|
*
|
|
1000
|
-
*
|
|
1001
|
-
*
|
|
1002
|
-
*
|
|
847
|
+
* Pass an instance wrapped in this handle to `ConversionOptions` to
|
|
848
|
+
* customise how the HTML document is traversed and converted to Markdown.
|
|
849
|
+
* The handle may be cloned and shared across threads without additional
|
|
850
|
+
* synchronisation on the caller's side.
|
|
1003
851
|
*/
|
|
1004
852
|
export class WasmVisitorHandle {
|
|
1005
853
|
free(): void;
|
|
@@ -1030,23 +878,16 @@ export enum WasmWhitespaceMode {
|
|
|
1030
878
|
}
|
|
1031
879
|
|
|
1032
880
|
/**
|
|
1033
|
-
* Convert HTML to Markdown, returning a
|
|
881
|
+
* Convert HTML to Markdown, returning a `ConversionResult` with content, metadata, images,
|
|
1034
882
|
* and warnings.
|
|
1035
883
|
*
|
|
1036
884
|
* # Arguments
|
|
1037
885
|
*
|
|
1038
886
|
* * `html` — the HTML string to convert.
|
|
1039
|
-
* * `options` — optional conversion options. Defaults to
|
|
887
|
+
* * `options` — optional conversion options. Defaults to `ConversionOptions.default`.
|
|
1040
888
|
*
|
|
1041
889
|
* # Example
|
|
1042
890
|
*
|
|
1043
|
-
* ```
|
|
1044
|
-
* use html_to_markdown_rs::convert;
|
|
1045
|
-
*
|
|
1046
|
-
* let html = "<h1>Hello World</h1>";
|
|
1047
|
-
* let result = convert(html, None).unwrap();
|
|
1048
|
-
* assert!(result.content.as_deref().unwrap_or("").contains("Hello World"));
|
|
1049
|
-
* ```
|
|
1050
891
|
*
|
|
1051
892
|
* # Errors
|
|
1052
893
|
*
|