@kreuzberg/html-to-markdown-wasm 3.1.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,40 +1,488 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
 
4
- /**
5
- * Convert HTML to Markdown, returning a JavaScript object with structured content, metadata,
6
- * images, and warnings in a single pass.
7
- *
8
- * This is the primary API entry point. Returns a JavaScript object with:
9
- * - `content`: converted text (string or null)
10
- * - `document`: structured document tree (object or null)
11
- * - `metadata`: extracted HTML metadata (object or null)
12
- * - `tables`: array of extracted table data
13
- * - `warnings`: array of non-fatal processing warnings
14
- *
15
- * # Arguments
16
- *
17
- * * `html` - The HTML string to convert
18
- * * `options` - Optional conversion options (as a JavaScript object)
19
- *
20
- * # Example
21
- *
22
- * ```javascript
23
- * import { convert } from 'html-to-markdown-wasm';
24
- *
25
- * const html = '<h1>Hello World</h1><p>Some text.</p>';
26
- * const result = convert(html, null);
27
- * console.log(result.content); // '# Hello World\n\nSome text.'
28
- * console.log(result.tables); // []
29
- * console.log(result.warnings); // []
30
- * ```
31
- */
32
- export function convert(html: string, options?: WasmConversionOptions | null): WasmConversionResult;
4
+ export enum WasmAnnotationKind {
5
+ Bold = 0,
6
+ Italic = 1,
7
+ Underline = 2,
8
+ Strikethrough = 3,
9
+ Code = 4,
10
+ Subscript = 5,
11
+ Superscript = 6,
12
+ Highlight = 7,
13
+ Link = 8,
14
+ }
15
+
16
+ export enum WasmCodeBlockStyle {
17
+ Indented = 0,
18
+ Backticks = 1,
19
+ Tildes = 2,
20
+ }
21
+
22
+ export class WasmConversionOptions {
23
+ free(): void;
24
+ [Symbol.dispose](): void;
25
+ static builder(): WasmConversionOptionsBuilder;
26
+ static default(): WasmConversionOptions;
27
+ constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null);
28
+ autolinks: boolean;
29
+ brInTables: boolean;
30
+ bullets: string;
31
+ captureSvg: boolean;
32
+ codeBlockStyle: WasmCodeBlockStyle;
33
+ codeLanguage: string;
34
+ convertAsInline: boolean;
35
+ debug: boolean;
36
+ defaultTitle: boolean;
37
+ encoding: string;
38
+ escapeAscii: boolean;
39
+ escapeAsterisks: boolean;
40
+ escapeMisc: boolean;
41
+ escapeUnderscores: boolean;
42
+ extractImages: boolean;
43
+ extractMetadata: boolean;
44
+ headingStyle: WasmHeadingStyle;
45
+ highlightStyle: WasmHighlightStyle;
46
+ includeDocumentStructure: boolean;
47
+ inferDimensions: boolean;
48
+ keepInlineImagesIn: string[];
49
+ linkStyle: WasmLinkStyle;
50
+ listIndentType: WasmListIndentType;
51
+ listIndentWidth: number;
52
+ maxImageSize: bigint;
53
+ newlineStyle: WasmNewlineStyle;
54
+ outputFormat: WasmOutputFormat;
55
+ preprocessing: WasmPreprocessingOptions;
56
+ preserveTags: string[];
57
+ skipImages: boolean;
58
+ stripNewlines: boolean;
59
+ stripTags: string[];
60
+ strongEmSymbol: string;
61
+ subSymbol: string;
62
+ supSymbol: string;
63
+ whitespaceMode: WasmWhitespaceMode;
64
+ wrap: boolean;
65
+ wrapWidth: number;
66
+ }
67
+
68
+ export class WasmConversionOptionsBuilder {
69
+ private constructor();
70
+ free(): void;
71
+ [Symbol.dispose](): void;
72
+ build(): WasmConversionOptions;
73
+ keepInlineImagesIn(tags: string[]): WasmConversionOptionsBuilder;
74
+ preprocessing(preprocessing: WasmPreprocessingOptions): WasmConversionOptionsBuilder;
75
+ preserveTags(tags: string[]): WasmConversionOptionsBuilder;
76
+ stripTags(tags: string[]): WasmConversionOptionsBuilder;
77
+ }
78
+
79
+ export class WasmConversionOptionsUpdate {
80
+ free(): void;
81
+ [Symbol.dispose](): void;
82
+ constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null);
83
+ get autolinks(): boolean | undefined;
84
+ set autolinks(value: boolean | null | undefined);
85
+ get brInTables(): boolean | undefined;
86
+ set brInTables(value: boolean | null | undefined);
87
+ get bullets(): string | undefined;
88
+ set bullets(value: string | null | undefined);
89
+ get captureSvg(): boolean | undefined;
90
+ set captureSvg(value: boolean | null | undefined);
91
+ get codeBlockStyle(): WasmCodeBlockStyle | undefined;
92
+ set codeBlockStyle(value: WasmCodeBlockStyle | null | undefined);
93
+ get codeLanguage(): string | undefined;
94
+ set codeLanguage(value: string | null | undefined);
95
+ get convertAsInline(): boolean | undefined;
96
+ set convertAsInline(value: boolean | null | undefined);
97
+ get debug(): boolean | undefined;
98
+ set debug(value: boolean | null | undefined);
99
+ get defaultTitle(): boolean | undefined;
100
+ set defaultTitle(value: boolean | null | undefined);
101
+ get encoding(): string | undefined;
102
+ set encoding(value: string | null | undefined);
103
+ get escapeAscii(): boolean | undefined;
104
+ set escapeAscii(value: boolean | null | undefined);
105
+ get escapeAsterisks(): boolean | undefined;
106
+ set escapeAsterisks(value: boolean | null | undefined);
107
+ get escapeMisc(): boolean | undefined;
108
+ set escapeMisc(value: boolean | null | undefined);
109
+ get escapeUnderscores(): boolean | undefined;
110
+ set escapeUnderscores(value: boolean | null | undefined);
111
+ get extractImages(): boolean | undefined;
112
+ set extractImages(value: boolean | null | undefined);
113
+ get extractMetadata(): boolean | undefined;
114
+ set extractMetadata(value: boolean | null | undefined);
115
+ get headingStyle(): WasmHeadingStyle | undefined;
116
+ set headingStyle(value: WasmHeadingStyle | null | undefined);
117
+ get highlightStyle(): WasmHighlightStyle | undefined;
118
+ set highlightStyle(value: WasmHighlightStyle | null | undefined);
119
+ get includeDocumentStructure(): boolean | undefined;
120
+ set includeDocumentStructure(value: boolean | null | undefined);
121
+ get inferDimensions(): boolean | undefined;
122
+ set inferDimensions(value: boolean | null | undefined);
123
+ get keepInlineImagesIn(): string[] | undefined;
124
+ set keepInlineImagesIn(value: string[] | null | undefined);
125
+ get linkStyle(): WasmLinkStyle | undefined;
126
+ set linkStyle(value: WasmLinkStyle | null | undefined);
127
+ get listIndentType(): WasmListIndentType | undefined;
128
+ set listIndentType(value: WasmListIndentType | null | undefined);
129
+ get listIndentWidth(): number | undefined;
130
+ set listIndentWidth(value: number | null | undefined);
131
+ get maxImageSize(): bigint | undefined;
132
+ set maxImageSize(value: bigint | null | undefined);
133
+ get newlineStyle(): WasmNewlineStyle | undefined;
134
+ set newlineStyle(value: WasmNewlineStyle | null | undefined);
135
+ get outputFormat(): WasmOutputFormat | undefined;
136
+ set outputFormat(value: WasmOutputFormat | null | undefined);
137
+ get preprocessing(): WasmPreprocessingOptionsUpdate | undefined;
138
+ set preprocessing(value: WasmPreprocessingOptionsUpdate | null | undefined);
139
+ get preserveTags(): string[] | undefined;
140
+ set preserveTags(value: string[] | null | undefined);
141
+ get skipImages(): boolean | undefined;
142
+ set skipImages(value: boolean | null | undefined);
143
+ get stripNewlines(): boolean | undefined;
144
+ set stripNewlines(value: boolean | null | undefined);
145
+ get stripTags(): string[] | undefined;
146
+ set stripTags(value: string[] | null | undefined);
147
+ get strongEmSymbol(): string | undefined;
148
+ set strongEmSymbol(value: string | null | undefined);
149
+ get subSymbol(): string | undefined;
150
+ set subSymbol(value: string | null | undefined);
151
+ get supSymbol(): string | undefined;
152
+ set supSymbol(value: string | null | undefined);
153
+ get whitespaceMode(): WasmWhitespaceMode | undefined;
154
+ set whitespaceMode(value: WasmWhitespaceMode | null | undefined);
155
+ get wrap(): boolean | undefined;
156
+ set wrap(value: boolean | null | undefined);
157
+ get wrapWidth(): number | undefined;
158
+ set wrapWidth(value: number | null | undefined);
159
+ }
160
+
161
+ export class WasmConversionResult {
162
+ free(): void;
163
+ [Symbol.dispose](): void;
164
+ constructor(metadata?: WasmHtmlMetadata | null, tables?: WasmTableData[] | null, images?: string[] | null, warnings?: WasmProcessingWarning[] | null, content?: string | null, document?: WasmDocumentStructure | null);
165
+ get content(): string | undefined;
166
+ set content(value: string | null | undefined);
167
+ get document(): WasmDocumentStructure | undefined;
168
+ set document(value: WasmDocumentStructure | null | undefined);
169
+ images: string[];
170
+ metadata: WasmHtmlMetadata;
171
+ tables: WasmTableData[];
172
+ warnings: WasmProcessingWarning[];
173
+ }
174
+
175
+ export class WasmDocumentMetadata {
176
+ free(): void;
177
+ [Symbol.dispose](): void;
178
+ constructor(keywords?: string[] | null, open_graph?: any | null, twitter_card?: any | null, meta_tags?: any | null, title?: string | null, description?: string | null, author?: string | null, canonical_url?: string | null, base_href?: string | null, language?: string | null, text_direction?: WasmTextDirection | null);
179
+ get author(): string | undefined;
180
+ set author(value: string | null | undefined);
181
+ get baseHref(): string | undefined;
182
+ set baseHref(value: string | null | undefined);
183
+ get canonicalUrl(): string | undefined;
184
+ set canonicalUrl(value: string | null | undefined);
185
+ get description(): string | undefined;
186
+ set description(value: string | null | undefined);
187
+ keywords: string[];
188
+ get language(): string | undefined;
189
+ set language(value: string | null | undefined);
190
+ metaTags: any;
191
+ openGraph: any;
192
+ get textDirection(): WasmTextDirection | undefined;
193
+ set textDirection(value: WasmTextDirection | null | undefined);
194
+ get title(): string | undefined;
195
+ set title(value: string | null | undefined);
196
+ twitterCard: any;
197
+ }
198
+
199
+ export class WasmDocumentNode {
200
+ free(): void;
201
+ [Symbol.dispose](): void;
202
+ constructor(id: string, content: WasmNodeContent, children: Uint32Array, annotations: WasmTextAnnotation[], parent?: number | null, attributes?: any | null);
203
+ annotations: WasmTextAnnotation[];
204
+ get attributes(): any | undefined;
205
+ set attributes(value: any | null | undefined);
206
+ children: Uint32Array;
207
+ content: WasmNodeContent;
208
+ id: string;
209
+ get parent(): number | undefined;
210
+ set parent(value: number | null | undefined);
211
+ }
212
+
213
+ export class WasmDocumentStructure {
214
+ free(): void;
215
+ [Symbol.dispose](): void;
216
+ constructor(nodes: WasmDocumentNode[], source_format?: string | null);
217
+ nodes: WasmDocumentNode[];
218
+ get sourceFormat(): string | undefined;
219
+ set sourceFormat(value: string | null | undefined);
220
+ }
221
+
222
+ export class WasmGridCell {
223
+ free(): void;
224
+ [Symbol.dispose](): void;
225
+ constructor(content: string, row: number, col: number, row_span: number, col_span: number, is_header: boolean);
226
+ col: number;
227
+ colSpan: number;
228
+ content: string;
229
+ isHeader: boolean;
230
+ row: number;
231
+ rowSpan: number;
232
+ }
233
+
234
+ export class WasmHeaderMetadata {
235
+ free(): void;
236
+ [Symbol.dispose](): void;
237
+ isValid(): boolean;
238
+ constructor(level: number, text: string, depth: number, html_offset: number, id?: string | null);
239
+ depth: number;
240
+ htmlOffset: number;
241
+ get id(): string | undefined;
242
+ set id(value: string | null | undefined);
243
+ level: number;
244
+ text: string;
245
+ }
246
+
247
+ export enum WasmHeadingStyle {
248
+ Underlined = 0,
249
+ Atx = 1,
250
+ AtxClosed = 2,
251
+ }
252
+
253
+ export enum WasmHighlightStyle {
254
+ DoubleEqual = 0,
255
+ Html = 1,
256
+ Bold = 2,
257
+ None = 3,
258
+ }
259
+
260
+ export class WasmHtmlMetadata {
261
+ free(): void;
262
+ [Symbol.dispose](): void;
263
+ constructor(document?: WasmDocumentMetadata | null, headers?: WasmHeaderMetadata[] | null, links?: WasmLinkMetadata[] | null, images?: WasmImageMetadata[] | null, structured_data?: WasmStructuredData[] | null);
264
+ document: WasmDocumentMetadata;
265
+ headers: WasmHeaderMetadata[];
266
+ images: WasmImageMetadata[];
267
+ links: WasmLinkMetadata[];
268
+ structuredData: WasmStructuredData[];
269
+ }
270
+
271
+ export class WasmImageMetadata {
272
+ free(): void;
273
+ [Symbol.dispose](): void;
274
+ constructor(src: string, image_type: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: string | null);
275
+ get alt(): string | undefined;
276
+ set alt(value: string | null | undefined);
277
+ attributes: any;
278
+ get dimensions(): string | undefined;
279
+ set dimensions(value: string | null | undefined);
280
+ imageType: WasmImageType;
281
+ src: string;
282
+ get title(): string | undefined;
283
+ set title(value: string | null | undefined);
284
+ }
285
+
286
+ export enum WasmImageType {
287
+ DataUri = 0,
288
+ InlineSvg = 1,
289
+ External = 2,
290
+ Relative = 3,
291
+ }
292
+
293
+ export class WasmLinkMetadata {
294
+ free(): void;
295
+ [Symbol.dispose](): void;
296
+ static classifyLink(href: string): WasmLinkType;
297
+ constructor(href: string, text: string, link_type: WasmLinkType, rel: string[], attributes: any, title?: string | null);
298
+ attributes: any;
299
+ href: string;
300
+ linkType: WasmLinkType;
301
+ rel: string[];
302
+ text: string;
303
+ get title(): string | undefined;
304
+ set title(value: string | null | undefined);
305
+ }
306
+
307
+ export enum WasmLinkStyle {
308
+ Inline = 0,
309
+ Reference = 1,
310
+ }
311
+
312
+ export enum WasmLinkType {
313
+ Anchor = 0,
314
+ Internal = 1,
315
+ External = 2,
316
+ Email = 3,
317
+ Phone = 4,
318
+ Other = 5,
319
+ }
33
320
 
34
- /**
35
- * Initialize panic hook for better error messages in the browser
36
- */
37
- export function init(): void;
321
+ export enum WasmListIndentType {
322
+ Spaces = 0,
323
+ Tabs = 1,
324
+ }
325
+
326
+ export class WasmMetadataConfig {
327
+ free(): void;
328
+ [Symbol.dispose](): void;
329
+ anyEnabled(): boolean;
330
+ static default(): WasmMetadataConfig;
331
+ constructor(extract_document?: boolean | null, extract_headers?: boolean | null, extract_links?: boolean | null, extract_images?: boolean | null, extract_structured_data?: boolean | null, max_structured_data_size?: number | null);
332
+ extractDocument: boolean;
333
+ extractHeaders: boolean;
334
+ extractImages: boolean;
335
+ extractLinks: boolean;
336
+ extractStructuredData: boolean;
337
+ maxStructuredDataSize: number;
338
+ }
339
+
340
+ export class WasmMetadataConfigUpdate {
341
+ free(): void;
342
+ [Symbol.dispose](): void;
343
+ constructor(extract_document?: boolean | null, extract_headers?: boolean | null, extract_links?: boolean | null, extract_images?: boolean | null, extract_structured_data?: boolean | null, max_structured_data_size?: number | null);
344
+ get extractDocument(): boolean | undefined;
345
+ set extractDocument(value: boolean | null | undefined);
346
+ get extractHeaders(): boolean | undefined;
347
+ set extractHeaders(value: boolean | null | undefined);
348
+ get extractImages(): boolean | undefined;
349
+ set extractImages(value: boolean | null | undefined);
350
+ get extractLinks(): boolean | undefined;
351
+ set extractLinks(value: boolean | null | undefined);
352
+ get extractStructuredData(): boolean | undefined;
353
+ set extractStructuredData(value: boolean | null | undefined);
354
+ get maxStructuredDataSize(): number | undefined;
355
+ set maxStructuredDataSize(value: number | null | undefined);
356
+ }
357
+
358
+ export enum WasmNewlineStyle {
359
+ Spaces = 0,
360
+ Backslash = 1,
361
+ }
362
+
363
+ export enum WasmNodeContent {
364
+ Heading = 0,
365
+ Paragraph = 1,
366
+ List = 2,
367
+ ListItem = 3,
368
+ Table = 4,
369
+ Image = 5,
370
+ Code = 6,
371
+ Quote = 7,
372
+ DefinitionList = 8,
373
+ DefinitionItem = 9,
374
+ RawBlock = 10,
375
+ MetadataBlock = 11,
376
+ Group = 12,
377
+ }
378
+
379
+ export enum WasmOutputFormat {
380
+ Markdown = 0,
381
+ Djot = 1,
382
+ Plain = 2,
383
+ }
384
+
385
+ export class WasmPreprocessingOptions {
386
+ free(): void;
387
+ [Symbol.dispose](): void;
388
+ static default(): WasmPreprocessingOptions;
389
+ constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
390
+ enabled: boolean;
391
+ preset: WasmPreprocessingPreset;
392
+ removeForms: boolean;
393
+ removeNavigation: boolean;
394
+ }
395
+
396
+ export class WasmPreprocessingOptionsUpdate {
397
+ free(): void;
398
+ [Symbol.dispose](): void;
399
+ constructor(enabled?: boolean | null, preset?: WasmPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
400
+ get enabled(): boolean | undefined;
401
+ set enabled(value: boolean | null | undefined);
402
+ get preset(): WasmPreprocessingPreset | undefined;
403
+ set preset(value: WasmPreprocessingPreset | null | undefined);
404
+ get removeForms(): boolean | undefined;
405
+ set removeForms(value: boolean | null | undefined);
406
+ get removeNavigation(): boolean | undefined;
407
+ set removeNavigation(value: boolean | null | undefined);
408
+ }
409
+
410
+ export enum WasmPreprocessingPreset {
411
+ Minimal = 0,
412
+ Standard = 1,
413
+ Aggressive = 2,
414
+ }
415
+
416
+ export class WasmProcessingWarning {
417
+ free(): void;
418
+ [Symbol.dispose](): void;
419
+ constructor(message: string, kind: WasmWarningKind);
420
+ kind: WasmWarningKind;
421
+ message: string;
422
+ }
423
+
424
+ export class WasmStructuredData {
425
+ free(): void;
426
+ [Symbol.dispose](): void;
427
+ constructor(data_type: WasmStructuredDataType, raw_json: string, schema_type?: string | null);
428
+ dataType: WasmStructuredDataType;
429
+ rawJson: string;
430
+ get schemaType(): string | undefined;
431
+ set schemaType(value: string | null | undefined);
432
+ }
433
+
434
+ export enum WasmStructuredDataType {
435
+ JsonLd = 0,
436
+ Microdata = 1,
437
+ RDFa = 2,
438
+ }
439
+
440
+ export class WasmTableData {
441
+ free(): void;
442
+ [Symbol.dispose](): void;
443
+ constructor(grid: WasmTableGrid, markdown: string);
444
+ grid: WasmTableGrid;
445
+ markdown: string;
446
+ }
447
+
448
+ export class WasmTableGrid {
449
+ free(): void;
450
+ [Symbol.dispose](): void;
451
+ constructor(rows?: number | null, cols?: number | null, cells?: WasmGridCell[] | null);
452
+ cells: WasmGridCell[];
453
+ cols: number;
454
+ rows: number;
455
+ }
456
+
457
+ export class WasmTextAnnotation {
458
+ free(): void;
459
+ [Symbol.dispose](): void;
460
+ constructor(start: number, end: number, kind: WasmAnnotationKind);
461
+ end: number;
462
+ kind: WasmAnnotationKind;
463
+ start: number;
464
+ }
465
+
466
+ export enum WasmTextDirection {
467
+ LeftToRight = 0,
468
+ RightToLeft = 1,
469
+ Auto = 2,
470
+ }
471
+
472
+ export enum WasmWarningKind {
473
+ ImageExtractionFailed = 0,
474
+ EncodingFallback = 1,
475
+ TruncatedInput = 2,
476
+ MalformedHtml = 3,
477
+ SanitizationApplied = 4,
478
+ }
479
+
480
+ export enum WasmWhitespaceMode {
481
+ Normalized = 0,
482
+ Strict = 1,
483
+ }
484
+
485
+ export function convert(html: string, options?: WasmConversionOptions | null): WasmConversionResult;
38
486
 
39
487
  export declare function initWasm(): Promise<void>;
40
488
  export declare const wasmReady: Promise<void>;