@kreuzberg/html-to-markdown-wasm 3.4.0-rc.9 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +33 -63
- package/{dist-node → pkg/bundler}/html_to_markdown_wasm.d.ts +107 -163
- package/pkg/bundler/html_to_markdown_wasm.js +9 -0
- package/{dist → pkg/bundler}/html_to_markdown_wasm_bg.js +675 -151
- package/{dist-web → pkg/bundler}/html_to_markdown_wasm_bg.wasm +0 -0
- package/{dist-web → pkg/bundler}/html_to_markdown_wasm_bg.wasm.d.ts +52 -28
- package/{dist → pkg/bundler}/package.json +6 -5
- package/{dist → pkg/deno}/html_to_markdown_wasm.d.ts +107 -166
- package/pkg/deno/html_to_markdown_wasm.js +5481 -0
- package/{dist → pkg/deno}/html_to_markdown_wasm_bg.wasm +0 -0
- package/{dist → pkg/deno}/html_to_markdown_wasm_bg.wasm.d.ts +52 -28
- package/pkg/nodejs/html_to_markdown_wasm.d.ts +1055 -0
- package/{dist-node → pkg/nodejs}/html_to_markdown_wasm.js +676 -151
- package/{dist-node → pkg/nodejs}/html_to_markdown_wasm_bg.wasm +0 -0
- package/{dist-node → pkg/nodejs}/html_to_markdown_wasm_bg.wasm.d.ts +52 -28
- package/{dist-node → pkg/nodejs}/package.json +5 -5
- package/{dist-web → pkg/web}/html_to_markdown_wasm.d.ts +159 -191
- package/{dist-web → pkg/web}/html_to_markdown_wasm.js +675 -151
- package/pkg/web/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/web/html_to_markdown_wasm_bg.wasm.d.ts +411 -0
- package/{dist-web → pkg/web}/package.json +5 -5
- package/dist/html_to_markdown_wasm.js +0 -116
|
@@ -52,7 +52,7 @@ export class WasmConversionOptions {
|
|
|
52
52
|
/**
|
|
53
53
|
* Apply a partial update to these conversion options.
|
|
54
54
|
*/
|
|
55
|
-
applyUpdate(
|
|
55
|
+
applyUpdate(update: WasmConversionOptionsUpdate): void;
|
|
56
56
|
/**
|
|
57
57
|
* Create a new builder with default values.
|
|
58
58
|
*/
|
|
@@ -68,7 +68,8 @@ export class WasmConversionOptions {
|
|
|
68
68
|
brInTables: boolean;
|
|
69
69
|
bullets: string;
|
|
70
70
|
captureSvg: boolean;
|
|
71
|
-
codeBlockStyle:
|
|
71
|
+
get codeBlockStyle(): string;
|
|
72
|
+
set codeBlockStyle(value: WasmCodeBlockStyle);
|
|
72
73
|
codeLanguage: string;
|
|
73
74
|
convertAsInline: boolean;
|
|
74
75
|
debug: boolean;
|
|
@@ -81,19 +82,25 @@ export class WasmConversionOptions {
|
|
|
81
82
|
excludeSelectors: string[];
|
|
82
83
|
extractImages: boolean;
|
|
83
84
|
extractMetadata: boolean;
|
|
84
|
-
headingStyle:
|
|
85
|
-
|
|
85
|
+
get headingStyle(): string;
|
|
86
|
+
set headingStyle(value: WasmHeadingStyle);
|
|
87
|
+
get highlightStyle(): string;
|
|
88
|
+
set highlightStyle(value: WasmHighlightStyle);
|
|
86
89
|
includeDocumentStructure: boolean;
|
|
87
90
|
inferDimensions: boolean;
|
|
88
91
|
keepInlineImagesIn: string[];
|
|
89
|
-
linkStyle:
|
|
90
|
-
|
|
92
|
+
get linkStyle(): string;
|
|
93
|
+
set linkStyle(value: WasmLinkStyle);
|
|
94
|
+
get listIndentType(): string;
|
|
95
|
+
set listIndentType(value: WasmListIndentType);
|
|
91
96
|
listIndentWidth: number;
|
|
92
97
|
get maxDepth(): number | undefined;
|
|
93
98
|
set maxDepth(value: number | null | undefined);
|
|
94
99
|
maxImageSize: bigint;
|
|
95
|
-
newlineStyle:
|
|
96
|
-
|
|
100
|
+
get newlineStyle(): string;
|
|
101
|
+
set newlineStyle(value: WasmNewlineStyle);
|
|
102
|
+
get outputFormat(): string;
|
|
103
|
+
set outputFormat(value: WasmOutputFormat);
|
|
97
104
|
preprocessing: WasmPreprocessingOptions;
|
|
98
105
|
preserveTags: string[];
|
|
99
106
|
skipImages: boolean;
|
|
@@ -102,7 +109,10 @@ export class WasmConversionOptions {
|
|
|
102
109
|
strongEmSymbol: string;
|
|
103
110
|
subSymbol: string;
|
|
104
111
|
supSymbol: string;
|
|
105
|
-
|
|
112
|
+
get visitor(): WasmVisitorHandle | undefined;
|
|
113
|
+
set visitor(value: WasmVisitorHandle | null | undefined);
|
|
114
|
+
get whitespaceMode(): string;
|
|
115
|
+
set whitespaceMode(value: WasmWhitespaceMode);
|
|
106
116
|
wrap: boolean;
|
|
107
117
|
wrapWidth: number;
|
|
108
118
|
}
|
|
@@ -140,6 +150,10 @@ export class WasmConversionOptionsBuilder {
|
|
|
140
150
|
* Set the list of HTML tag names whose content is stripped from output.
|
|
141
151
|
*/
|
|
142
152
|
stripTags(tags: string[]): WasmConversionOptionsBuilder;
|
|
153
|
+
/**
|
|
154
|
+
* Set the visitor used during conversion.
|
|
155
|
+
*/
|
|
156
|
+
visitor(visitor?: WasmVisitorHandle | null): WasmConversionOptionsBuilder;
|
|
143
157
|
}
|
|
144
158
|
|
|
145
159
|
/**
|
|
@@ -151,6 +165,7 @@ export class WasmConversionOptionsBuilder {
|
|
|
151
165
|
export class WasmConversionOptionsUpdate {
|
|
152
166
|
free(): void;
|
|
153
167
|
[Symbol.dispose](): void;
|
|
168
|
+
static default(): WasmConversionOptionsUpdate;
|
|
154
169
|
constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, max_depth?: number | null, exclude_selectors?: string[] | null);
|
|
155
170
|
get autolinks(): boolean | undefined;
|
|
156
171
|
set autolinks(value: boolean | null | undefined);
|
|
@@ -160,7 +175,7 @@ export class WasmConversionOptionsUpdate {
|
|
|
160
175
|
set bullets(value: string | null | undefined);
|
|
161
176
|
get captureSvg(): boolean | undefined;
|
|
162
177
|
set captureSvg(value: boolean | null | undefined);
|
|
163
|
-
get codeBlockStyle():
|
|
178
|
+
get codeBlockStyle(): string | undefined;
|
|
164
179
|
set codeBlockStyle(value: WasmCodeBlockStyle | null | undefined);
|
|
165
180
|
get codeLanguage(): string | undefined;
|
|
166
181
|
set codeLanguage(value: string | null | undefined);
|
|
@@ -186,9 +201,9 @@ export class WasmConversionOptionsUpdate {
|
|
|
186
201
|
set extractImages(value: boolean | null | undefined);
|
|
187
202
|
get extractMetadata(): boolean | undefined;
|
|
188
203
|
set extractMetadata(value: boolean | null | undefined);
|
|
189
|
-
get headingStyle():
|
|
204
|
+
get headingStyle(): string | undefined;
|
|
190
205
|
set headingStyle(value: WasmHeadingStyle | null | undefined);
|
|
191
|
-
get highlightStyle():
|
|
206
|
+
get highlightStyle(): string | undefined;
|
|
192
207
|
set highlightStyle(value: WasmHighlightStyle | null | undefined);
|
|
193
208
|
get includeDocumentStructure(): boolean | undefined;
|
|
194
209
|
set includeDocumentStructure(value: boolean | null | undefined);
|
|
@@ -196,9 +211,9 @@ export class WasmConversionOptionsUpdate {
|
|
|
196
211
|
set inferDimensions(value: boolean | null | undefined);
|
|
197
212
|
get keepInlineImagesIn(): string[] | undefined;
|
|
198
213
|
set keepInlineImagesIn(value: string[] | null | undefined);
|
|
199
|
-
get linkStyle():
|
|
214
|
+
get linkStyle(): string | undefined;
|
|
200
215
|
set linkStyle(value: WasmLinkStyle | null | undefined);
|
|
201
|
-
get listIndentType():
|
|
216
|
+
get listIndentType(): string | undefined;
|
|
202
217
|
set listIndentType(value: WasmListIndentType | null | undefined);
|
|
203
218
|
get listIndentWidth(): number | undefined;
|
|
204
219
|
set listIndentWidth(value: number | null | undefined);
|
|
@@ -206,9 +221,9 @@ export class WasmConversionOptionsUpdate {
|
|
|
206
221
|
set maxDepth(value: number | null | undefined);
|
|
207
222
|
get maxImageSize(): bigint | undefined;
|
|
208
223
|
set maxImageSize(value: bigint | null | undefined);
|
|
209
|
-
get newlineStyle():
|
|
224
|
+
get newlineStyle(): string | undefined;
|
|
210
225
|
set newlineStyle(value: WasmNewlineStyle | null | undefined);
|
|
211
|
-
get outputFormat():
|
|
226
|
+
get outputFormat(): string | undefined;
|
|
212
227
|
set outputFormat(value: WasmOutputFormat | null | undefined);
|
|
213
228
|
get preprocessing(): WasmPreprocessingOptionsUpdate | undefined;
|
|
214
229
|
set preprocessing(value: WasmPreprocessingOptionsUpdate | null | undefined);
|
|
@@ -226,7 +241,9 @@ export class WasmConversionOptionsUpdate {
|
|
|
226
241
|
set subSymbol(value: string | null | undefined);
|
|
227
242
|
get supSymbol(): string | undefined;
|
|
228
243
|
set supSymbol(value: string | null | undefined);
|
|
229
|
-
get
|
|
244
|
+
get visitor(): WasmVisitorHandle | undefined;
|
|
245
|
+
set visitor(value: WasmVisitorHandle | null | undefined);
|
|
246
|
+
get whitespaceMode(): string | undefined;
|
|
230
247
|
set whitespaceMode(value: WasmWhitespaceMode | null | undefined);
|
|
231
248
|
get wrap(): boolean | undefined;
|
|
232
249
|
set wrap(value: boolean | null | undefined);
|
|
@@ -253,7 +270,8 @@ export class WasmConversionOptionsUpdate {
|
|
|
253
270
|
export class WasmConversionResult {
|
|
254
271
|
free(): void;
|
|
255
272
|
[Symbol.dispose](): void;
|
|
256
|
-
|
|
273
|
+
static default(): WasmConversionResult;
|
|
274
|
+
constructor(tables?: WasmTableData[] | null, warnings?: WasmProcessingWarning[] | null, content?: string | null, document?: WasmDocumentStructure | null);
|
|
257
275
|
get content(): string | undefined;
|
|
258
276
|
set content(value: string | null | undefined);
|
|
259
277
|
get document(): WasmDocumentStructure | undefined;
|
|
@@ -273,7 +291,6 @@ export class WasmConversionResult {
|
|
|
273
291
|
* # Examples
|
|
274
292
|
*
|
|
275
293
|
* ```
|
|
276
|
-
* # use html_to_markdown_rs::metadata::DocumentMetadata;
|
|
277
294
|
* let doc = DocumentMetadata {
|
|
278
295
|
* title: Some("My Article".to_string()),
|
|
279
296
|
* description: Some("A great article about Rust".to_string()),
|
|
@@ -287,6 +304,7 @@ export class WasmConversionResult {
|
|
|
287
304
|
export class WasmDocumentMetadata {
|
|
288
305
|
free(): void;
|
|
289
306
|
[Symbol.dispose](): void;
|
|
307
|
+
static default(): WasmDocumentMetadata;
|
|
290
308
|
constructor(keywords?: string[] | null, open_graph?: any | null, twitter_card?: any | null, meta_tags?: any | null, title?: string | null, description?: string | null, author?: string | null, canonical_url?: string | null, base_href?: string | null, language?: string | null, text_direction?: WasmTextDirection | null);
|
|
291
309
|
get author(): string | undefined;
|
|
292
310
|
set author(value: string | null | undefined);
|
|
@@ -301,7 +319,7 @@ export class WasmDocumentMetadata {
|
|
|
301
319
|
set language(value: string | null | undefined);
|
|
302
320
|
metaTags: any;
|
|
303
321
|
openGraph: any;
|
|
304
|
-
get textDirection():
|
|
322
|
+
get textDirection(): string | undefined;
|
|
305
323
|
set textDirection(value: WasmTextDirection | null | undefined);
|
|
306
324
|
get title(): string | undefined;
|
|
307
325
|
set title(value: string | null | undefined);
|
|
@@ -314,12 +332,14 @@ export class WasmDocumentMetadata {
|
|
|
314
332
|
export class WasmDocumentNode {
|
|
315
333
|
free(): void;
|
|
316
334
|
[Symbol.dispose](): void;
|
|
335
|
+
static default(): WasmDocumentNode;
|
|
317
336
|
constructor(id: string, content: WasmNodeContent, children: Uint32Array, annotations: WasmTextAnnotation[], parent?: number | null, attributes?: any | null);
|
|
318
337
|
annotations: WasmTextAnnotation[];
|
|
319
338
|
get attributes(): any | undefined;
|
|
320
339
|
set attributes(value: any | null | undefined);
|
|
321
340
|
children: Uint32Array;
|
|
322
|
-
content:
|
|
341
|
+
get content(): string;
|
|
342
|
+
set content(value: WasmNodeContent);
|
|
323
343
|
id: string;
|
|
324
344
|
get parent(): number | undefined;
|
|
325
345
|
set parent(value: number | null | undefined);
|
|
@@ -333,6 +353,7 @@ export class WasmDocumentNode {
|
|
|
333
353
|
export class WasmDocumentStructure {
|
|
334
354
|
free(): void;
|
|
335
355
|
[Symbol.dispose](): void;
|
|
356
|
+
static default(): WasmDocumentStructure;
|
|
336
357
|
constructor(nodes: WasmDocumentNode[], source_format?: string | null);
|
|
337
358
|
nodes: WasmDocumentNode[];
|
|
338
359
|
get sourceFormat(): string | undefined;
|
|
@@ -345,6 +366,7 @@ export class WasmDocumentStructure {
|
|
|
345
366
|
export class WasmGridCell {
|
|
346
367
|
free(): void;
|
|
347
368
|
[Symbol.dispose](): void;
|
|
369
|
+
static default(): WasmGridCell;
|
|
348
370
|
constructor(content: string, row: number, col: number, row_span: number, col_span: number, is_header: boolean);
|
|
349
371
|
col: number;
|
|
350
372
|
colSpan: number;
|
|
@@ -363,7 +385,6 @@ export class WasmGridCell {
|
|
|
363
385
|
* # Examples
|
|
364
386
|
*
|
|
365
387
|
* ```
|
|
366
|
-
* # use html_to_markdown_rs::metadata::HeaderMetadata;
|
|
367
388
|
* let header = HeaderMetadata {
|
|
368
389
|
* level: 1,
|
|
369
390
|
* text: "Main Title".to_string(),
|
|
@@ -379,6 +400,7 @@ export class WasmGridCell {
|
|
|
379
400
|
export class WasmHeaderMetadata {
|
|
380
401
|
free(): void;
|
|
381
402
|
[Symbol.dispose](): void;
|
|
403
|
+
static default(): WasmHeaderMetadata;
|
|
382
404
|
/**
|
|
383
405
|
* Validate that the header level is within valid range (1-6).
|
|
384
406
|
*
|
|
@@ -389,7 +411,6 @@ export class WasmHeaderMetadata {
|
|
|
389
411
|
* # Examples
|
|
390
412
|
*
|
|
391
413
|
* ```
|
|
392
|
-
* # use html_to_markdown_rs::metadata::HeaderMetadata;
|
|
393
414
|
* let valid = HeaderMetadata {
|
|
394
415
|
* level: 3,
|
|
395
416
|
* text: "Title".to_string(),
|
|
@@ -451,7 +472,6 @@ export enum WasmHighlightStyle {
|
|
|
451
472
|
* # Examples
|
|
452
473
|
*
|
|
453
474
|
* ```
|
|
454
|
-
* # use html_to_markdown_rs::metadata::HtmlMetadata;
|
|
455
475
|
* let metadata = HtmlMetadata {
|
|
456
476
|
* document: Default::default(),
|
|
457
477
|
* headers: Vec::new(),
|
|
@@ -466,6 +486,7 @@ export enum WasmHighlightStyle {
|
|
|
466
486
|
export class WasmHtmlMetadata {
|
|
467
487
|
free(): void;
|
|
468
488
|
[Symbol.dispose](): void;
|
|
489
|
+
static default(): WasmHtmlMetadata;
|
|
469
490
|
constructor(document?: WasmDocumentMetadata | null, headers?: WasmHeaderMetadata[] | null, links?: WasmLinkMetadata[] | null, images?: WasmImageMetadata[] | null, structured_data?: WasmStructuredData[] | null);
|
|
470
491
|
document: WasmDocumentMetadata;
|
|
471
492
|
headers: WasmHeaderMetadata[];
|
|
@@ -483,7 +504,6 @@ export class WasmHtmlMetadata {
|
|
|
483
504
|
* # Examples
|
|
484
505
|
*
|
|
485
506
|
* ```
|
|
486
|
-
* # use html_to_markdown_rs::metadata::{ImageMetadata, ImageType};
|
|
487
507
|
* let img = ImageMetadata {
|
|
488
508
|
* src: "https://example.com/image.jpg".to_string(),
|
|
489
509
|
* alt: Some("An example image".to_string()),
|
|
@@ -499,13 +519,15 @@ export class WasmHtmlMetadata {
|
|
|
499
519
|
export class WasmImageMetadata {
|
|
500
520
|
free(): void;
|
|
501
521
|
[Symbol.dispose](): void;
|
|
522
|
+
static default(): WasmImageMetadata;
|
|
502
523
|
constructor(src: string, image_type: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: Uint32Array | null);
|
|
503
524
|
get alt(): string | undefined;
|
|
504
525
|
set alt(value: string | null | undefined);
|
|
505
526
|
attributes: any;
|
|
506
527
|
get dimensions(): Uint32Array | undefined;
|
|
507
528
|
set dimensions(value: Uint32Array | null | undefined);
|
|
508
|
-
imageType:
|
|
529
|
+
get imageType(): string;
|
|
530
|
+
set imageType(value: WasmImageType);
|
|
509
531
|
src: string;
|
|
510
532
|
get title(): string | undefined;
|
|
511
533
|
set title(value: string | null | undefined);
|
|
@@ -531,7 +553,6 @@ export enum WasmImageType {
|
|
|
531
553
|
* # Examples
|
|
532
554
|
*
|
|
533
555
|
* ```
|
|
534
|
-
* # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
|
|
535
556
|
* let link = LinkMetadata {
|
|
536
557
|
* href: "https://example.com".to_string(),
|
|
537
558
|
* text: "Example".to_string(),
|
|
@@ -562,7 +583,6 @@ export class WasmLinkMetadata {
|
|
|
562
583
|
* # Examples
|
|
563
584
|
*
|
|
564
585
|
* ```
|
|
565
|
-
* # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
|
|
566
586
|
* assert_eq!(LinkMetadata::classify_link("#section"), LinkType::Anchor);
|
|
567
587
|
* assert_eq!(LinkMetadata::classify_link("mailto:test@example.com"), LinkType::Email);
|
|
568
588
|
* assert_eq!(LinkMetadata::classify_link("tel:+1234567890"), LinkType::Phone);
|
|
@@ -570,10 +590,12 @@ export class WasmLinkMetadata {
|
|
|
570
590
|
* ```
|
|
571
591
|
*/
|
|
572
592
|
static classifyLink(href: string): WasmLinkType;
|
|
593
|
+
static default(): WasmLinkMetadata;
|
|
573
594
|
constructor(href: string, text: string, link_type: WasmLinkType, rel: string[], attributes: any, title?: string | null);
|
|
574
595
|
attributes: any;
|
|
575
596
|
href: string;
|
|
576
|
-
linkType:
|
|
597
|
+
get linkType(): string;
|
|
598
|
+
set linkType(value: WasmLinkType);
|
|
577
599
|
rel: string[];
|
|
578
600
|
text: string;
|
|
579
601
|
get title(): string | undefined;
|
|
@@ -655,12 +677,14 @@ export enum WasmNodeContent {
|
|
|
655
677
|
export class WasmNodeContext {
|
|
656
678
|
free(): void;
|
|
657
679
|
[Symbol.dispose](): void;
|
|
680
|
+
static default(): WasmNodeContext;
|
|
658
681
|
constructor(node_type: WasmNodeType, tag_name: string, attributes: any, depth: number, index_in_parent: number, is_inline: boolean, parent_tag?: string | null);
|
|
659
682
|
attributes: any;
|
|
660
683
|
depth: number;
|
|
661
684
|
indexInParent: number;
|
|
662
685
|
isInline: boolean;
|
|
663
|
-
nodeType:
|
|
686
|
+
get nodeType(): string;
|
|
687
|
+
set nodeType(value: WasmNodeType);
|
|
664
688
|
get parentTag(): string | undefined;
|
|
665
689
|
set parentTag(value: string | null | undefined);
|
|
666
690
|
tagName: string;
|
|
@@ -790,7 +814,7 @@ export class WasmPreprocessingOptions {
|
|
|
790
814
|
*
|
|
791
815
|
* * `update` - Partial preprocessing options update
|
|
792
816
|
*/
|
|
793
|
-
applyUpdate(
|
|
817
|
+
applyUpdate(update: WasmPreprocessingOptionsUpdate): void;
|
|
794
818
|
static default(): WasmPreprocessingOptions;
|
|
795
819
|
static from(update: WasmPreprocessingOptionsUpdate): WasmPreprocessingOptions;
|
|
796
820
|
/**
|
|
@@ -810,7 +834,8 @@ export class WasmPreprocessingOptions {
|
|
|
810
834
|
static fromUpdate(update: WasmPreprocessingOptionsUpdate): WasmPreprocessingOptions;
|
|
811
835
|
constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
|
|
812
836
|
enabled: boolean;
|
|
813
|
-
preset:
|
|
837
|
+
get preset(): string;
|
|
838
|
+
set preset(value: WasmPreprocessingPreset);
|
|
814
839
|
removeForms: boolean;
|
|
815
840
|
removeNavigation: boolean;
|
|
816
841
|
}
|
|
@@ -825,10 +850,11 @@ export class WasmPreprocessingOptions {
|
|
|
825
850
|
export class WasmPreprocessingOptionsUpdate {
|
|
826
851
|
free(): void;
|
|
827
852
|
[Symbol.dispose](): void;
|
|
853
|
+
static default(): WasmPreprocessingOptionsUpdate;
|
|
828
854
|
constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
|
|
829
855
|
get enabled(): boolean | undefined;
|
|
830
856
|
set enabled(value: boolean | null | undefined);
|
|
831
|
-
get preset():
|
|
857
|
+
get preset(): string | undefined;
|
|
832
858
|
set preset(value: WasmPreprocessingPreset | null | undefined);
|
|
833
859
|
get removeForms(): boolean | undefined;
|
|
834
860
|
set removeForms(value: boolean | null | undefined);
|
|
@@ -853,8 +879,10 @@ export enum WasmPreprocessingPreset {
|
|
|
853
879
|
export class WasmProcessingWarning {
|
|
854
880
|
free(): void;
|
|
855
881
|
[Symbol.dispose](): void;
|
|
882
|
+
static default(): WasmProcessingWarning;
|
|
856
883
|
constructor(message: string, kind: WasmWarningKind);
|
|
857
|
-
kind:
|
|
884
|
+
get kind(): string;
|
|
885
|
+
set kind(value: WasmWarningKind);
|
|
858
886
|
message: string;
|
|
859
887
|
}
|
|
860
888
|
|
|
@@ -867,7 +895,6 @@ export class WasmProcessingWarning {
|
|
|
867
895
|
* # Examples
|
|
868
896
|
*
|
|
869
897
|
* ```
|
|
870
|
-
* # use html_to_markdown_rs::metadata::{StructuredData, StructuredDataType};
|
|
871
898
|
* let schema = StructuredData {
|
|
872
899
|
* data_type: StructuredDataType::JsonLd,
|
|
873
900
|
* raw_json: r#"{"@context":"https://schema.org","@type":"Article"}"#.to_string(),
|
|
@@ -880,8 +907,10 @@ export class WasmProcessingWarning {
|
|
|
880
907
|
export class WasmStructuredData {
|
|
881
908
|
free(): void;
|
|
882
909
|
[Symbol.dispose](): void;
|
|
910
|
+
static default(): WasmStructuredData;
|
|
883
911
|
constructor(data_type: WasmStructuredDataType, raw_json: string, schema_type?: string | null);
|
|
884
|
-
dataType:
|
|
912
|
+
get dataType(): string;
|
|
913
|
+
set dataType(value: WasmStructuredDataType);
|
|
885
914
|
rawJson: string;
|
|
886
915
|
get schemaType(): string | undefined;
|
|
887
916
|
set schemaType(value: string | null | undefined);
|
|
@@ -904,6 +933,7 @@ export enum WasmStructuredDataType {
|
|
|
904
933
|
export class WasmTableData {
|
|
905
934
|
free(): void;
|
|
906
935
|
[Symbol.dispose](): void;
|
|
936
|
+
static default(): WasmTableData;
|
|
907
937
|
constructor(grid: WasmTableGrid, markdown: string);
|
|
908
938
|
grid: WasmTableGrid;
|
|
909
939
|
markdown: string;
|
|
@@ -915,6 +945,7 @@ export class WasmTableData {
|
|
|
915
945
|
export class WasmTableGrid {
|
|
916
946
|
free(): void;
|
|
917
947
|
[Symbol.dispose](): void;
|
|
948
|
+
static default(): WasmTableGrid;
|
|
918
949
|
constructor(rows?: number | null, cols?: number | null, cells?: WasmGridCell[] | null);
|
|
919
950
|
cells: WasmGridCell[];
|
|
920
951
|
cols: number;
|
|
@@ -929,9 +960,11 @@ export class WasmTableGrid {
|
|
|
929
960
|
export class WasmTextAnnotation {
|
|
930
961
|
free(): void;
|
|
931
962
|
[Symbol.dispose](): void;
|
|
963
|
+
static default(): WasmTextAnnotation;
|
|
932
964
|
constructor(start: number, end: number, kind: WasmAnnotationKind);
|
|
933
965
|
end: number;
|
|
934
|
-
kind:
|
|
966
|
+
get kind(): string;
|
|
967
|
+
set kind(value: WasmAnnotationKind);
|
|
935
968
|
start: number;
|
|
936
969
|
}
|
|
937
970
|
|
|
@@ -961,6 +994,19 @@ export enum WasmVisitResult {
|
|
|
961
994
|
Error = 4,
|
|
962
995
|
}
|
|
963
996
|
|
|
997
|
+
/**
|
|
998
|
+
* Type alias for a visitor handle (`Arc`-wrapped `Mutex` for thread-safe shared mutation).
|
|
999
|
+
*
|
|
1000
|
+
* `Send + Sync` so that types embedding a `VisitorHandle` (e.g. `ConversionOptions`)
|
|
1001
|
+
* can be shared across threads — required by callers that stash configs inside
|
|
1002
|
+
* axum/rmcp/tokio Send-bound contexts.
|
|
1003
|
+
*/
|
|
1004
|
+
export class WasmVisitorHandle {
|
|
1005
|
+
free(): void;
|
|
1006
|
+
[Symbol.dispose](): void;
|
|
1007
|
+
constructor(visitor: any);
|
|
1008
|
+
}
|
|
1009
|
+
|
|
964
1010
|
/**
|
|
965
1011
|
* Categories of processing warnings.
|
|
966
1012
|
*/
|
|
@@ -983,132 +1029,27 @@ export enum WasmWhitespaceMode {
|
|
|
983
1029
|
Strict = 1,
|
|
984
1030
|
}
|
|
985
1031
|
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
listIndentType?: WasmListIndentType;
|
|
1011
|
-
listIndentWidth?: number;
|
|
1012
|
-
bullets?: string;
|
|
1013
|
-
strongEmSymbol?: string;
|
|
1014
|
-
escapeAsterisks?: boolean;
|
|
1015
|
-
escapeUnderscores?: boolean;
|
|
1016
|
-
escapeMisc?: boolean;
|
|
1017
|
-
escapeAscii?: boolean;
|
|
1018
|
-
codeLanguage?: string;
|
|
1019
|
-
autolinks?: boolean;
|
|
1020
|
-
defaultTitle?: boolean;
|
|
1021
|
-
brInTables?: boolean;
|
|
1022
|
-
hocrSpatialTables?: boolean;
|
|
1023
|
-
highlightStyle?: WasmHighlightStyle;
|
|
1024
|
-
extractMetadata?: boolean;
|
|
1025
|
-
whitespaceMode?: WasmWhitespaceMode;
|
|
1026
|
-
stripNewlines?: boolean;
|
|
1027
|
-
wrap?: boolean;
|
|
1028
|
-
wrapWidth?: number;
|
|
1029
|
-
convertAsInline?: boolean;
|
|
1030
|
-
subSymbol?: string;
|
|
1031
|
-
supSymbol?: string;
|
|
1032
|
-
newlineStyle?: WasmNewlineStyle;
|
|
1033
|
-
codeBlockStyle?: WasmCodeBlockStyle;
|
|
1034
|
-
keepInlineImagesIn?: string[];
|
|
1035
|
-
preprocessing?: WasmPreprocessingOptions | null;
|
|
1036
|
-
encoding?: string;
|
|
1037
|
-
debug?: boolean;
|
|
1038
|
-
stripTags?: string[];
|
|
1039
|
-
preserveTags?: string[];
|
|
1040
|
-
skipImages?: boolean;
|
|
1041
|
-
outputFormat?: WasmOutputFormat;
|
|
1042
|
-
includeDocumentStructure?: boolean;
|
|
1043
|
-
extractImages?: boolean;
|
|
1044
|
-
maxImageSize?: number;
|
|
1045
|
-
captureSvg?: boolean;
|
|
1046
|
-
inferDimensions?: boolean;
|
|
1047
|
-
}
|
|
1048
|
-
|
|
1049
|
-
/** A single cell in a structured table grid. */
|
|
1050
|
-
export interface WasmGridCell {
|
|
1051
|
-
content: string;
|
|
1052
|
-
row: number;
|
|
1053
|
-
col: number;
|
|
1054
|
-
rowSpan: number;
|
|
1055
|
-
colSpan: number;
|
|
1056
|
-
isHeader: boolean;
|
|
1057
|
-
}
|
|
1058
|
-
|
|
1059
|
-
/** Structured table grid with cell-level data. */
|
|
1060
|
-
export interface WasmTableGrid {
|
|
1061
|
-
rows: number;
|
|
1062
|
-
cols: number;
|
|
1063
|
-
cells: WasmGridCell[];
|
|
1064
|
-
}
|
|
1065
|
-
|
|
1066
|
-
/** A table extracted during conversion. */
|
|
1067
|
-
export interface WasmConversionTable {
|
|
1068
|
-
grid: WasmTableGrid;
|
|
1069
|
-
markdown: string;
|
|
1070
|
-
}
|
|
1071
|
-
|
|
1072
|
-
/** Non-fatal warning emitted during conversion. */
|
|
1073
|
-
export interface WasmConversionWarning {
|
|
1074
|
-
/** Human-readable warning message. */
|
|
1075
|
-
message: string;
|
|
1076
|
-
/** Warning kind identifier. */
|
|
1077
|
-
kind: string;
|
|
1078
|
-
}
|
|
1079
|
-
|
|
1080
|
-
/** An extracted inline image from the HTML document. */
|
|
1081
|
-
export interface WasmInlineImage {
|
|
1082
|
-
/** Raw image data as a Uint8Array. */
|
|
1083
|
-
data: Uint8Array;
|
|
1084
|
-
/** Image format (png, jpeg, gif, svg, etc.). */
|
|
1085
|
-
format: string;
|
|
1086
|
-
/** Generated or provided filename, or null. */
|
|
1087
|
-
filename: string | null;
|
|
1088
|
-
/** Alt text or description, or null. */
|
|
1089
|
-
description: string | null;
|
|
1090
|
-
/** Image width in pixels, or null if not available. */
|
|
1091
|
-
width: number | null;
|
|
1092
|
-
/** Image height in pixels, or null if not available. */
|
|
1093
|
-
height: number | null;
|
|
1094
|
-
/** Source type ("img_data_uri" or "svg_element"). */
|
|
1095
|
-
source: string;
|
|
1096
|
-
/** HTML attributes from the source element. */
|
|
1097
|
-
attributes: Record<string, string>;
|
|
1098
|
-
}
|
|
1099
|
-
|
|
1100
|
-
/** Result of the convert() API. */
|
|
1101
|
-
export interface WasmConversionResult {
|
|
1102
|
-
/** Converted text output (markdown, djot, or plain text), or null. */
|
|
1103
|
-
content: string | null;
|
|
1104
|
-
/** Structured document tree serialized as a JSON value, or null. */
|
|
1105
|
-
document: unknown | null;
|
|
1106
|
-
/** Extracted HTML metadata serialized as a JSON value, or null. */
|
|
1107
|
-
metadata: unknown | null;
|
|
1108
|
-
/** All tables found in the HTML, in document order. */
|
|
1109
|
-
tables: WasmConversionTable[];
|
|
1110
|
-
/** Extracted inline images (data URIs and SVGs). */
|
|
1111
|
-
images: WasmInlineImage[];
|
|
1112
|
-
/** Non-fatal processing warnings. */
|
|
1113
|
-
warnings: WasmConversionWarning[];
|
|
1114
|
-
}
|
|
1032
|
+
/**
|
|
1033
|
+
* Convert HTML to Markdown, returning a [`ConversionResult`] with content, metadata, images,
|
|
1034
|
+
* and warnings.
|
|
1035
|
+
*
|
|
1036
|
+
* # Arguments
|
|
1037
|
+
*
|
|
1038
|
+
* * `html` — the HTML string to convert.
|
|
1039
|
+
* * `options` — optional conversion options. Defaults to [`ConversionOptions::default`].
|
|
1040
|
+
*
|
|
1041
|
+
* # Example
|
|
1042
|
+
*
|
|
1043
|
+
* ```
|
|
1044
|
+
* use html_to_markdown_rs::convert;
|
|
1045
|
+
*
|
|
1046
|
+
* let html = "<h1>Hello World</h1>";
|
|
1047
|
+
* let result = convert(html, None).unwrap();
|
|
1048
|
+
* assert!(result.content.as_deref().unwrap_or("").contains("Hello World"));
|
|
1049
|
+
* ```
|
|
1050
|
+
*
|
|
1051
|
+
* # Errors
|
|
1052
|
+
*
|
|
1053
|
+
* Returns an error if HTML parsing fails or if the input contains invalid UTF-8.
|
|
1054
|
+
*/
|
|
1055
|
+
export function convert(html: string, options?: WasmConversionOptions | null): WasmConversionResult;
|