@kreuzberg/html-to-markdown-wasm 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,40 +1,488 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
 
4
- /**
5
- * Convert HTML to Markdown, returning a JavaScript object with structured content, metadata,
6
- * images, and warnings in a single pass.
7
- *
8
- * This is the primary API entry point. Returns a JavaScript object with:
9
- * - `content`: converted text (string or null)
10
- * - `document`: structured document tree (object or null)
11
- * - `metadata`: extracted HTML metadata (object or null)
12
- * - `tables`: array of extracted table data
13
- * - `warnings`: array of non-fatal processing warnings
14
- *
15
- * # Arguments
16
- *
17
- * * `html` - The HTML string to convert
18
- * * `options` - Optional conversion options (as a JavaScript object)
19
- *
20
- * # Example
21
- *
22
- * ```javascript
23
- * import { convert } from 'html-to-markdown-wasm';
24
- *
25
- * const html = '<h1>Hello World</h1><p>Some text.</p>';
26
- * const result = convert(html, null);
27
- * console.log(result.content); // '# Hello World\n\nSome text.'
28
- * console.log(result.tables); // []
29
- * console.log(result.warnings); // []
30
- * ```
31
- */
32
- export function convert(html: string, options?: WasmConversionOptions | null): WasmConversionResult;
33
-
34
- /**
35
- * Initialize panic hook for better error messages in the browser
36
- */
37
- export function init(): void;
4
+ export enum JsAnnotationKind {
5
+ Bold = 0,
6
+ Italic = 1,
7
+ Underline = 2,
8
+ Strikethrough = 3,
9
+ Code = 4,
10
+ Subscript = 5,
11
+ Superscript = 6,
12
+ Highlight = 7,
13
+ Link = 8,
14
+ }
15
+
16
+ export enum JsCodeBlockStyle {
17
+ Indented = 0,
18
+ Backticks = 1,
19
+ Tildes = 2,
20
+ }
21
+
22
+ export class JsConversionOptions {
23
+ free(): void;
24
+ [Symbol.dispose](): void;
25
+ static builder(): JsConversionOptionsBuilder;
26
+ static default(): JsConversionOptions;
27
+ constructor(heading_style?: JsHeadingStyle | null, list_indent_type?: JsListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: JsHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: JsWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: JsNewlineStyle | null, code_block_style?: JsCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: JsPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: JsLinkStyle | null, output_format?: JsOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null);
28
+ autolinks: boolean;
29
+ brInTables: boolean;
30
+ bullets: string;
31
+ captureSvg: boolean;
32
+ codeBlockStyle: JsCodeBlockStyle;
33
+ codeLanguage: string;
34
+ convertAsInline: boolean;
35
+ debug: boolean;
36
+ defaultTitle: boolean;
37
+ encoding: string;
38
+ escapeAscii: boolean;
39
+ escapeAsterisks: boolean;
40
+ escapeMisc: boolean;
41
+ escapeUnderscores: boolean;
42
+ extractImages: boolean;
43
+ extractMetadata: boolean;
44
+ headingStyle: JsHeadingStyle;
45
+ highlightStyle: JsHighlightStyle;
46
+ includeDocumentStructure: boolean;
47
+ inferDimensions: boolean;
48
+ keepInlineImagesIn: string[];
49
+ linkStyle: JsLinkStyle;
50
+ listIndentType: JsListIndentType;
51
+ listIndentWidth: number;
52
+ maxImageSize: bigint;
53
+ newlineStyle: JsNewlineStyle;
54
+ outputFormat: JsOutputFormat;
55
+ preprocessing: JsPreprocessingOptions;
56
+ preserveTags: string[];
57
+ skipImages: boolean;
58
+ stripNewlines: boolean;
59
+ stripTags: string[];
60
+ strongEmSymbol: string;
61
+ subSymbol: string;
62
+ supSymbol: string;
63
+ whitespaceMode: JsWhitespaceMode;
64
+ wrap: boolean;
65
+ wrapWidth: number;
66
+ }
67
+
68
+ export class JsConversionOptionsBuilder {
69
+ private constructor();
70
+ free(): void;
71
+ [Symbol.dispose](): void;
72
+ build(): JsConversionOptions;
73
+ keepInlineImagesIn(tags: string[]): JsConversionOptionsBuilder;
74
+ preprocessing(preprocessing: JsPreprocessingOptions): JsConversionOptionsBuilder;
75
+ preserveTags(tags: string[]): JsConversionOptionsBuilder;
76
+ stripTags(tags: string[]): JsConversionOptionsBuilder;
77
+ }
78
+
79
+ export class JsConversionOptionsUpdate {
80
+ free(): void;
81
+ [Symbol.dispose](): void;
82
+ constructor(heading_style?: JsHeadingStyle | null, list_indent_type?: JsListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: JsHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: JsWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: JsNewlineStyle | null, code_block_style?: JsCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: JsPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: JsLinkStyle | null, output_format?: JsOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null);
83
+ get autolinks(): boolean | undefined;
84
+ set autolinks(value: boolean | null | undefined);
85
+ get brInTables(): boolean | undefined;
86
+ set brInTables(value: boolean | null | undefined);
87
+ get bullets(): string | undefined;
88
+ set bullets(value: string | null | undefined);
89
+ get captureSvg(): boolean | undefined;
90
+ set captureSvg(value: boolean | null | undefined);
91
+ get codeBlockStyle(): JsCodeBlockStyle | undefined;
92
+ set codeBlockStyle(value: JsCodeBlockStyle | null | undefined);
93
+ get codeLanguage(): string | undefined;
94
+ set codeLanguage(value: string | null | undefined);
95
+ get convertAsInline(): boolean | undefined;
96
+ set convertAsInline(value: boolean | null | undefined);
97
+ get debug(): boolean | undefined;
98
+ set debug(value: boolean | null | undefined);
99
+ get defaultTitle(): boolean | undefined;
100
+ set defaultTitle(value: boolean | null | undefined);
101
+ get encoding(): string | undefined;
102
+ set encoding(value: string | null | undefined);
103
+ get escapeAscii(): boolean | undefined;
104
+ set escapeAscii(value: boolean | null | undefined);
105
+ get escapeAsterisks(): boolean | undefined;
106
+ set escapeAsterisks(value: boolean | null | undefined);
107
+ get escapeMisc(): boolean | undefined;
108
+ set escapeMisc(value: boolean | null | undefined);
109
+ get escapeUnderscores(): boolean | undefined;
110
+ set escapeUnderscores(value: boolean | null | undefined);
111
+ get extractImages(): boolean | undefined;
112
+ set extractImages(value: boolean | null | undefined);
113
+ get extractMetadata(): boolean | undefined;
114
+ set extractMetadata(value: boolean | null | undefined);
115
+ get headingStyle(): JsHeadingStyle | undefined;
116
+ set headingStyle(value: JsHeadingStyle | null | undefined);
117
+ get highlightStyle(): JsHighlightStyle | undefined;
118
+ set highlightStyle(value: JsHighlightStyle | null | undefined);
119
+ get includeDocumentStructure(): boolean | undefined;
120
+ set includeDocumentStructure(value: boolean | null | undefined);
121
+ get inferDimensions(): boolean | undefined;
122
+ set inferDimensions(value: boolean | null | undefined);
123
+ get keepInlineImagesIn(): string[] | undefined;
124
+ set keepInlineImagesIn(value: string[] | null | undefined);
125
+ get linkStyle(): JsLinkStyle | undefined;
126
+ set linkStyle(value: JsLinkStyle | null | undefined);
127
+ get listIndentType(): JsListIndentType | undefined;
128
+ set listIndentType(value: JsListIndentType | null | undefined);
129
+ get listIndentWidth(): number | undefined;
130
+ set listIndentWidth(value: number | null | undefined);
131
+ get maxImageSize(): bigint | undefined;
132
+ set maxImageSize(value: bigint | null | undefined);
133
+ get newlineStyle(): JsNewlineStyle | undefined;
134
+ set newlineStyle(value: JsNewlineStyle | null | undefined);
135
+ get outputFormat(): JsOutputFormat | undefined;
136
+ set outputFormat(value: JsOutputFormat | null | undefined);
137
+ get preprocessing(): JsPreprocessingOptionsUpdate | undefined;
138
+ set preprocessing(value: JsPreprocessingOptionsUpdate | null | undefined);
139
+ get preserveTags(): string[] | undefined;
140
+ set preserveTags(value: string[] | null | undefined);
141
+ get skipImages(): boolean | undefined;
142
+ set skipImages(value: boolean | null | undefined);
143
+ get stripNewlines(): boolean | undefined;
144
+ set stripNewlines(value: boolean | null | undefined);
145
+ get stripTags(): string[] | undefined;
146
+ set stripTags(value: string[] | null | undefined);
147
+ get strongEmSymbol(): string | undefined;
148
+ set strongEmSymbol(value: string | null | undefined);
149
+ get subSymbol(): string | undefined;
150
+ set subSymbol(value: string | null | undefined);
151
+ get supSymbol(): string | undefined;
152
+ set supSymbol(value: string | null | undefined);
153
+ get whitespaceMode(): JsWhitespaceMode | undefined;
154
+ set whitespaceMode(value: JsWhitespaceMode | null | undefined);
155
+ get wrap(): boolean | undefined;
156
+ set wrap(value: boolean | null | undefined);
157
+ get wrapWidth(): number | undefined;
158
+ set wrapWidth(value: number | null | undefined);
159
+ }
160
+
161
+ export class JsConversionResult {
162
+ free(): void;
163
+ [Symbol.dispose](): void;
164
+ constructor(metadata?: JsHtmlMetadata | null, tables?: JsTableData[] | null, images?: string[] | null, warnings?: JsProcessingWarning[] | null, content?: string | null, document?: JsDocumentStructure | null);
165
+ get content(): string | undefined;
166
+ set content(value: string | null | undefined);
167
+ get document(): JsDocumentStructure | undefined;
168
+ set document(value: JsDocumentStructure | null | undefined);
169
+ images: string[];
170
+ metadata: JsHtmlMetadata;
171
+ tables: JsTableData[];
172
+ warnings: JsProcessingWarning[];
173
+ }
174
+
175
+ export class JsDocumentMetadata {
176
+ free(): void;
177
+ [Symbol.dispose](): void;
178
+ constructor(keywords?: string[] | null, open_graph?: any | null, twitter_card?: any | null, meta_tags?: any | null, title?: string | null, description?: string | null, author?: string | null, canonical_url?: string | null, base_href?: string | null, language?: string | null, text_direction?: JsTextDirection | null);
179
+ get author(): string | undefined;
180
+ set author(value: string | null | undefined);
181
+ get baseHref(): string | undefined;
182
+ set baseHref(value: string | null | undefined);
183
+ get canonicalUrl(): string | undefined;
184
+ set canonicalUrl(value: string | null | undefined);
185
+ get description(): string | undefined;
186
+ set description(value: string | null | undefined);
187
+ keywords: string[];
188
+ get language(): string | undefined;
189
+ set language(value: string | null | undefined);
190
+ metaTags: any;
191
+ openGraph: any;
192
+ get textDirection(): JsTextDirection | undefined;
193
+ set textDirection(value: JsTextDirection | null | undefined);
194
+ get title(): string | undefined;
195
+ set title(value: string | null | undefined);
196
+ twitterCard: any;
197
+ }
198
+
199
+ export class JsDocumentNode {
200
+ free(): void;
201
+ [Symbol.dispose](): void;
202
+ constructor(id: string, content: JsNodeContent, children: Uint32Array, annotations: JsTextAnnotation[], parent?: number | null, attributes?: any | null);
203
+ annotations: JsTextAnnotation[];
204
+ get attributes(): any | undefined;
205
+ set attributes(value: any | null | undefined);
206
+ children: Uint32Array;
207
+ content: JsNodeContent;
208
+ id: string;
209
+ get parent(): number | undefined;
210
+ set parent(value: number | null | undefined);
211
+ }
212
+
213
+ export class JsDocumentStructure {
214
+ free(): void;
215
+ [Symbol.dispose](): void;
216
+ constructor(nodes: JsDocumentNode[], source_format?: string | null);
217
+ nodes: JsDocumentNode[];
218
+ get sourceFormat(): string | undefined;
219
+ set sourceFormat(value: string | null | undefined);
220
+ }
221
+
222
+ export class JsGridCell {
223
+ free(): void;
224
+ [Symbol.dispose](): void;
225
+ constructor(content: string, row: number, col: number, row_span: number, col_span: number, is_header: boolean);
226
+ col: number;
227
+ colSpan: number;
228
+ content: string;
229
+ isHeader: boolean;
230
+ row: number;
231
+ rowSpan: number;
232
+ }
233
+
234
+ export class JsHeaderMetadata {
235
+ free(): void;
236
+ [Symbol.dispose](): void;
237
+ isValid(): boolean;
238
+ constructor(level: number, text: string, depth: number, html_offset: number, id?: string | null);
239
+ depth: number;
240
+ htmlOffset: number;
241
+ get id(): string | undefined;
242
+ set id(value: string | null | undefined);
243
+ level: number;
244
+ text: string;
245
+ }
246
+
247
+ export enum JsHeadingStyle {
248
+ Underlined = 0,
249
+ Atx = 1,
250
+ AtxClosed = 2,
251
+ }
252
+
253
+ export enum JsHighlightStyle {
254
+ DoubleEqual = 0,
255
+ Html = 1,
256
+ Bold = 2,
257
+ None = 3,
258
+ }
259
+
260
+ export class JsHtmlMetadata {
261
+ free(): void;
262
+ [Symbol.dispose](): void;
263
+ constructor(document?: JsDocumentMetadata | null, headers?: JsHeaderMetadata[] | null, links?: JsLinkMetadata[] | null, images?: JsImageMetadata[] | null, structured_data?: JsStructuredData[] | null);
264
+ document: JsDocumentMetadata;
265
+ headers: JsHeaderMetadata[];
266
+ images: JsImageMetadata[];
267
+ links: JsLinkMetadata[];
268
+ structuredData: JsStructuredData[];
269
+ }
270
+
271
+ export class JsImageMetadata {
272
+ free(): void;
273
+ [Symbol.dispose](): void;
274
+ constructor(src: string, image_type: JsImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: string | null);
275
+ get alt(): string | undefined;
276
+ set alt(value: string | null | undefined);
277
+ attributes: any;
278
+ get dimensions(): string | undefined;
279
+ set dimensions(value: string | null | undefined);
280
+ imageType: JsImageType;
281
+ src: string;
282
+ get title(): string | undefined;
283
+ set title(value: string | null | undefined);
284
+ }
285
+
286
+ export enum JsImageType {
287
+ DataUri = 0,
288
+ InlineSvg = 1,
289
+ External = 2,
290
+ Relative = 3,
291
+ }
292
+
293
+ export class JsLinkMetadata {
294
+ free(): void;
295
+ [Symbol.dispose](): void;
296
+ static classifyLink(href: string): JsLinkType;
297
+ constructor(href: string, text: string, link_type: JsLinkType, rel: string[], attributes: any, title?: string | null);
298
+ attributes: any;
299
+ href: string;
300
+ linkType: JsLinkType;
301
+ rel: string[];
302
+ text: string;
303
+ get title(): string | undefined;
304
+ set title(value: string | null | undefined);
305
+ }
306
+
307
+ export enum JsLinkStyle {
308
+ Inline = 0,
309
+ Reference = 1,
310
+ }
311
+
312
+ export enum JsLinkType {
313
+ Anchor = 0,
314
+ Internal = 1,
315
+ External = 2,
316
+ Email = 3,
317
+ Phone = 4,
318
+ Other = 5,
319
+ }
320
+
321
+ export enum JsListIndentType {
322
+ Spaces = 0,
323
+ Tabs = 1,
324
+ }
325
+
326
+ export class JsMetadataConfig {
327
+ free(): void;
328
+ [Symbol.dispose](): void;
329
+ anyEnabled(): boolean;
330
+ static default(): JsMetadataConfig;
331
+ constructor(extract_document?: boolean | null, extract_headers?: boolean | null, extract_links?: boolean | null, extract_images?: boolean | null, extract_structured_data?: boolean | null, max_structured_data_size?: number | null);
332
+ extractDocument: boolean;
333
+ extractHeaders: boolean;
334
+ extractImages: boolean;
335
+ extractLinks: boolean;
336
+ extractStructuredData: boolean;
337
+ maxStructuredDataSize: number;
338
+ }
339
+
340
+ export class JsMetadataConfigUpdate {
341
+ free(): void;
342
+ [Symbol.dispose](): void;
343
+ constructor(extract_document?: boolean | null, extract_headers?: boolean | null, extract_links?: boolean | null, extract_images?: boolean | null, extract_structured_data?: boolean | null, max_structured_data_size?: number | null);
344
+ get extractDocument(): boolean | undefined;
345
+ set extractDocument(value: boolean | null | undefined);
346
+ get extractHeaders(): boolean | undefined;
347
+ set extractHeaders(value: boolean | null | undefined);
348
+ get extractImages(): boolean | undefined;
349
+ set extractImages(value: boolean | null | undefined);
350
+ get extractLinks(): boolean | undefined;
351
+ set extractLinks(value: boolean | null | undefined);
352
+ get extractStructuredData(): boolean | undefined;
353
+ set extractStructuredData(value: boolean | null | undefined);
354
+ get maxStructuredDataSize(): number | undefined;
355
+ set maxStructuredDataSize(value: number | null | undefined);
356
+ }
357
+
358
+ export enum JsNewlineStyle {
359
+ Spaces = 0,
360
+ Backslash = 1,
361
+ }
362
+
363
+ export enum JsNodeContent {
364
+ Heading = 0,
365
+ Paragraph = 1,
366
+ List = 2,
367
+ ListItem = 3,
368
+ Table = 4,
369
+ Image = 5,
370
+ Code = 6,
371
+ Quote = 7,
372
+ DefinitionList = 8,
373
+ DefinitionItem = 9,
374
+ RawBlock = 10,
375
+ MetadataBlock = 11,
376
+ Group = 12,
377
+ }
378
+
379
+ export enum JsOutputFormat {
380
+ Markdown = 0,
381
+ Djot = 1,
382
+ Plain = 2,
383
+ }
384
+
385
+ export class JsPreprocessingOptions {
386
+ free(): void;
387
+ [Symbol.dispose](): void;
388
+ static default(): JsPreprocessingOptions;
389
+ constructor(enabled?: boolean | null, preset?: JsPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
390
+ enabled: boolean;
391
+ preset: JsPreprocessingPreset;
392
+ removeForms: boolean;
393
+ removeNavigation: boolean;
394
+ }
395
+
396
+ export class JsPreprocessingOptionsUpdate {
397
+ free(): void;
398
+ [Symbol.dispose](): void;
399
+ constructor(enabled?: boolean | null, preset?: JsPreprocessingPreset | null, remove_navigation?: boolean | null, remove_forms?: boolean | null);
400
+ get enabled(): boolean | undefined;
401
+ set enabled(value: boolean | null | undefined);
402
+ get preset(): JsPreprocessingPreset | undefined;
403
+ set preset(value: JsPreprocessingPreset | null | undefined);
404
+ get removeForms(): boolean | undefined;
405
+ set removeForms(value: boolean | null | undefined);
406
+ get removeNavigation(): boolean | undefined;
407
+ set removeNavigation(value: boolean | null | undefined);
408
+ }
409
+
410
+ export enum JsPreprocessingPreset {
411
+ Minimal = 0,
412
+ Standard = 1,
413
+ Aggressive = 2,
414
+ }
415
+
416
+ export class JsProcessingWarning {
417
+ free(): void;
418
+ [Symbol.dispose](): void;
419
+ constructor(message: string, kind: JsWarningKind);
420
+ kind: JsWarningKind;
421
+ message: string;
422
+ }
423
+
424
+ export class JsStructuredData {
425
+ free(): void;
426
+ [Symbol.dispose](): void;
427
+ constructor(data_type: JsStructuredDataType, raw_json: string, schema_type?: string | null);
428
+ dataType: JsStructuredDataType;
429
+ rawJson: string;
430
+ get schemaType(): string | undefined;
431
+ set schemaType(value: string | null | undefined);
432
+ }
433
+
434
+ export enum JsStructuredDataType {
435
+ JsonLd = 0,
436
+ Microdata = 1,
437
+ RDFa = 2,
438
+ }
439
+
440
+ export class JsTableData {
441
+ free(): void;
442
+ [Symbol.dispose](): void;
443
+ constructor(grid: JsTableGrid, markdown: string);
444
+ grid: JsTableGrid;
445
+ markdown: string;
446
+ }
447
+
448
+ export class JsTableGrid {
449
+ free(): void;
450
+ [Symbol.dispose](): void;
451
+ constructor(rows?: number | null, cols?: number | null, cells?: JsGridCell[] | null);
452
+ cells: JsGridCell[];
453
+ cols: number;
454
+ rows: number;
455
+ }
456
+
457
+ export class JsTextAnnotation {
458
+ free(): void;
459
+ [Symbol.dispose](): void;
460
+ constructor(start: number, end: number, kind: JsAnnotationKind);
461
+ end: number;
462
+ kind: JsAnnotationKind;
463
+ start: number;
464
+ }
465
+
466
+ export enum JsTextDirection {
467
+ LeftToRight = 0,
468
+ RightToLeft = 1,
469
+ Auto = 2,
470
+ }
471
+
472
+ export enum JsWarningKind {
473
+ ImageExtractionFailed = 0,
474
+ EncodingFallback = 1,
475
+ TruncatedInput = 2,
476
+ MalformedHtml = 3,
477
+ SanitizationApplied = 4,
478
+ }
479
+
480
+ export enum JsWhitespaceMode {
481
+ Normalized = 0,
482
+ Strict = 1,
483
+ }
484
+
485
+ export function convert(html: string, options?: JsConversionOptions | null): JsConversionResult;
38
486
 
39
487
  export declare function initWasm(): Promise<void>;
40
488
  export declare const wasmReady: Promise<void>;