@kreuzberg/html-to-markdown-node 2.19.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts ADDED
@@ -0,0 +1,455 @@
1
+ /* auto-generated by NAPI-RS */
2
+ /* eslint-disable */
3
+
4
+ export declare class ExternalObject<T> {
5
+ readonly '': {
6
+ readonly '': unique symbol
7
+ [K: symbol]: T
8
+ }
9
+ }
10
+ /**
11
+ * * `html` - The HTML string to convert
12
+ * * `options` - Optional conversion options
13
+ *
14
+ * # Example
15
+ *
16
+ * ```javascript
17
+ * const { convert } = require('html-to-markdown');
18
+ *
19
+ * const html = '<h1>Hello World</h1>';
20
+ * const markdown = convert(html);
21
+ * console.log(markdown); // # Hello World
22
+ * ```
23
+ */
24
+ export declare function convert(html: string, options?: JsConversionOptions | undefined | null): string
25
+
26
+ /** Convert HTML to Markdown from a Buffer/Uint8Array without creating intermediate JS strings. */
27
+ export declare function convertBuffer(html: Buffer, options?: JsConversionOptions | undefined | null): string
28
+
29
+ export declare function convertBufferJson(html: Buffer, optionsJson?: string | undefined | null): string
30
+
31
+ /** Convert HTML Buffer data using a previously-created ConversionOptions handle. */
32
+ export declare function convertBufferWithOptionsHandle(html: Buffer, options: ExternalObject<RustConversionOptions>): string
33
+
34
+ /** Convert inline images from Buffer/Uint8Array input without an intermediate string allocation. */
35
+ export declare function convertInlineImagesBuffer(html: Buffer, options?: JsConversionOptions | undefined | null, imageConfig?: JsInlineImageConfig | undefined | null): JsHtmlExtraction
36
+
37
+ export declare function convertInlineImagesBufferJson(html: Buffer, optionsJson?: string | undefined | null, imageConfigJson?: string | undefined | null): JsHtmlExtraction
38
+
39
+ /** Convert inline images from Buffer/Uint8Array input using a pre-created options handle. */
40
+ export declare function convertInlineImagesBufferWithOptionsHandle(html: Buffer, options: ExternalObject<RustConversionOptions>, imageConfig?: JsInlineImageConfig | undefined | null): JsHtmlExtraction
41
+
42
+ export declare function convertJson(html: string, optionsJson?: string | undefined | null): string
43
+
44
+ /**
45
+ * Convert HTML to Markdown while collecting inline images
46
+ *
47
+ * # Arguments
48
+ *
49
+ * * `html` - The HTML string to convert
50
+ * * `options` - Optional conversion options
51
+ * * `image_config` - Configuration for inline image extraction
52
+ */
53
+ export declare function convertWithInlineImages(html: string, options?: JsConversionOptions | undefined | null, imageConfig?: JsInlineImageConfig | undefined | null): JsHtmlExtraction
54
+
55
+ /** Convert HTML to Markdown while collecting inline images using a pre-created options handle. */
56
+ export declare function convertWithInlineImagesHandle(html: string, options: ExternalObject<RustConversionOptions>, imageConfig?: JsInlineImageConfig | undefined | null): JsHtmlExtraction
57
+
58
+ export declare function convertWithInlineImagesJson(html: string, optionsJson?: string | undefined | null, imageConfigJson?: string | undefined | null): JsHtmlExtraction
59
+
60
+ /**
61
+ * Convert HTML to Markdown with metadata extraction.
62
+ *
63
+ * # Arguments
64
+ *
65
+ * * `html` - The HTML string to convert
66
+ * * `options` - Optional conversion options
67
+ * * `metadata_config` - Optional metadata extraction configuration
68
+ *
69
+ * # Example
70
+ *
71
+ * ```javascript
72
+ * const { convertWithMetadata } = require('html-to-markdown');
73
+ *
74
+ * const html = '<html lang="en"><head><title>Test</title></head><body><h1>Hello</h1></body></html>';
75
+ * const config = { extractHeaders: true, extractLinks: true };
76
+ * const result = convertWithMetadata(html, undefined, config);
77
+ * console.log(result.markdown);
78
+ * console.log(result.metadata.document.title);
79
+ * ```
80
+ */
81
+ export declare function convertWithMetadata(html: string, options?: JsConversionOptions | undefined | null, metadataConfig?: JsMetadataConfig | undefined | null): JsMetadataExtraction
82
+
83
+ /** Convert HTML from Buffer/Uint8Array with metadata extraction without intermediate string allocation. */
84
+ export declare function convertWithMetadataBuffer(html: Buffer, options?: JsConversionOptions | undefined | null, metadataConfig?: JsMetadataConfig | undefined | null): JsMetadataExtraction
85
+
86
+ /** Convert HTML from Buffer/Uint8Array with metadata extraction using JSON config. */
87
+ export declare function convertWithMetadataBufferJson(html: Buffer, optionsJson?: string | undefined | null, metadataConfigJson?: string | undefined | null): JsMetadataExtraction
88
+
89
+ /** Convert HTML from Buffer/Uint8Array with metadata extraction using a metadata handle. */
90
+ export declare function convertWithMetadataBufferWithMetadataHandle(html: Buffer, metadataConfig: ExternalObject<RustMetadataConfig>): JsMetadataExtraction
91
+
92
+ /** Convert HTML from Buffer/Uint8Array with metadata extraction using options + metadata handles. */
93
+ export declare function convertWithMetadataBufferWithOptionsAndMetadataHandle(html: Buffer, options: ExternalObject<RustConversionOptions>, metadataConfig: ExternalObject<RustMetadataConfig>): JsMetadataExtraction
94
+
95
+ /** Convert HTML from Buffer/Uint8Array with metadata extraction using a pre-created options handle. */
96
+ export declare function convertWithMetadataBufferWithOptionsHandle(html: Buffer, options: ExternalObject<RustConversionOptions>, metadataConfig?: JsMetadataConfig | undefined | null): JsMetadataExtraction
97
+
98
+ /** Convert HTML to Markdown with metadata extraction using a pre-created options handle. */
99
+ export declare function convertWithMetadataHandle(html: string, options: ExternalObject<RustConversionOptions>, metadataConfig?: JsMetadataConfig | undefined | null): JsMetadataExtraction
100
+
101
+ export declare function convertWithMetadataJson(html: string, optionsJson?: string | undefined | null, metadataConfigJson?: string | undefined | null): JsMetadataExtraction
102
+
103
+ /** Convert HTML using a previously-created ConversionOptions handle. */
104
+ export declare function convertWithOptionsHandle(html: string, options: ExternalObject<RustConversionOptions>): string
105
+
106
+ /**
107
+ * Convert HTML to Markdown with an async visitor object.
108
+ *
109
+ * # Async Visitor Support
110
+ *
111
+ * This function enables full async visitor pattern support for Node.js:
112
+ * - JavaScript visitor callbacks are invoked asynchronously via NAPI ThreadsafeFunction
113
+ * - All 30+ visitor methods are supported (links, images, headings, code, lists, tables, etc.)
114
+ * - Callback errors gracefully default to VisitResult::Continue
115
+ * - Powered by tokio async runtime for seamless JS-Rust cooperation
116
+ *
117
+ * # Visitor Methods
118
+ *
119
+ * Implement any combination of these optional async methods in your visitor:
120
+ * - `visitText(ctx, text) -> { type: string, output?: string }`
121
+ * - `visitLink(ctx, href, text, title) -> VisitResult`
122
+ * - `visitImage(ctx, src, alt, title) -> VisitResult`
123
+ * - `visitHeading(ctx, level, text, id) -> VisitResult`
124
+ * - `visitCodeBlock(ctx, lang, code) -> VisitResult`
125
+ * - `visitCodeInline(ctx, code) -> VisitResult`
126
+ * - `visitListItem(ctx, ordered, marker, text) -> VisitResult`
127
+ * - `visitTableRow(ctx, cells, isHeader) -> VisitResult`
128
+ * - `visitBlockquote(ctx, content, depth) -> VisitResult`
129
+ * - And 20+ more semantic and inline element callbacks
130
+ *
131
+ * # VisitResult Types
132
+ *
133
+ * Each callback should return an object with:
134
+ * - `type: 'continue' | 'skip' | 'custom' | 'preservehtml' | 'error'`
135
+ * - `output?: string` (required for 'custom' and 'error' types)
136
+ *
137
+ * # Arguments
138
+ *
139
+ * * `html` - The HTML string to convert
140
+ * * `options` - Optional conversion options
141
+ * * `visitor` - Visitor object with optional async callback methods
142
+ *
143
+ * # Example
144
+ *
145
+ * ```javascript
146
+ * const { convertWithVisitor } = require('@kreuzberg/html-to-markdown-node');
147
+ *
148
+ * const html = '<a href="https://example.com">Click me</a>';
149
+ * const visitor = {
150
+ * visitLink: async (ctx, href, text, title) => {
151
+ * console.log(`Found link: ${href}`);
152
+ * return { type: 'continue' }; // Use default markdown conversion
153
+ * }
154
+ * };
155
+ *
156
+ * const markdown = await convertWithVisitor(html, undefined, visitor);
157
+ * console.log(markdown); // [Click me](https://example.com)
158
+ * ```
159
+ */
160
+ export declare function convertWithVisitor(html: string, options: JsConversionOptions | undefined | null, visitor: object): string
161
+
162
+ /** Create a reusable ConversionOptions handle. */
163
+ export declare function createConversionOptionsHandle(options?: JsConversionOptions | undefined | null): ExternalObject<RustConversionOptions>
164
+
165
+ export declare function createConversionOptionsHandleJson(optionsJson?: string | undefined | null): ExternalObject<RustConversionOptions>
166
+
167
+ /** Create a reusable MetadataConfig handle. */
168
+ export declare function createMetadataConfigHandle(metadataConfig?: JsMetadataConfig | undefined | null): ExternalObject<RustMetadataConfig>
169
+
170
+ export declare function createMetadataConfigHandleJson(metadataConfigJson?: string | undefined | null): ExternalObject<RustMetadataConfig>
171
+
172
+ /** Code block style */
173
+ export declare const enum JsCodeBlockStyle {
174
+ /** Indented code blocks (4 spaces) - CommonMark default */
175
+ Indented = 'Indented',
176
+ /** Fenced code blocks with backticks (```) */
177
+ Backticks = 'Backticks',
178
+ /** Fenced code blocks with tildes (~~~) */
179
+ Tildes = 'Tildes'
180
+ }
181
+
182
+ /** Main conversion options */
183
+ export interface JsConversionOptions {
184
+ /** Heading style */
185
+ headingStyle?: JsHeadingStyle
186
+ /** List indentation type */
187
+ listIndentType?: JsListIndentType
188
+ /** List indentation width (spaces) */
189
+ listIndentWidth?: number
190
+ /** Bullet characters for unordered lists */
191
+ bullets?: string
192
+ /** Symbol for strong/emphasis (* or _) */
193
+ strongEmSymbol?: string
194
+ /** Escape asterisks in text */
195
+ escapeAsterisks?: boolean
196
+ /** Escape underscores in text */
197
+ escapeUnderscores?: boolean
198
+ /** Escape misc markdown characters */
199
+ escapeMisc?: boolean
200
+ /** Escape all ASCII punctuation */
201
+ escapeAscii?: boolean
202
+ /** Default code language */
203
+ codeLanguage?: string
204
+ /** Use autolinks for bare URLs */
205
+ autolinks?: boolean
206
+ /** Add default title if none exists */
207
+ defaultTitle?: boolean
208
+ /** Use <br> in tables instead of spaces */
209
+ brInTables?: boolean
210
+ /** Enable spatial table reconstruction in hOCR documents */
211
+ hocrSpatialTables?: boolean
212
+ /** Highlight style for <mark> elements */
213
+ highlightStyle?: JsHighlightStyle
214
+ /** Extract metadata from HTML */
215
+ extractMetadata?: boolean
216
+ /** Whitespace handling mode */
217
+ whitespaceMode?: JsWhitespaceMode
218
+ /** Strip newlines from HTML before processing */
219
+ stripNewlines?: boolean
220
+ /** Enable text wrapping */
221
+ wrap?: boolean
222
+ /** Text wrap width */
223
+ wrapWidth?: number
224
+ /** Treat block elements as inline */
225
+ convertAsInline?: boolean
226
+ /** Subscript symbol */
227
+ subSymbol?: string
228
+ /** Superscript symbol */
229
+ supSymbol?: string
230
+ /** Newline style */
231
+ newlineStyle?: JsNewlineStyle
232
+ /** Code block style */
233
+ codeBlockStyle?: JsCodeBlockStyle
234
+ /** Elements where images should remain as markdown */
235
+ keepInlineImagesIn?: Array<string>
236
+ /** Preprocessing options */
237
+ preprocessing?: JsPreprocessingOptions
238
+ /** Source encoding (informational) */
239
+ encoding?: string
240
+ /** Enable debug mode with diagnostic warnings */
241
+ debug?: boolean
242
+ /** List of HTML tags to strip */
243
+ stripTags?: Array<string>
244
+ /** List of HTML tags to preserve as-is in the output */
245
+ preserveTags?: Array<string>
246
+ }
247
+
248
+ /** Document-level metadata */
249
+ export interface JsDocumentMetadata {
250
+ title?: string
251
+ description?: string
252
+ keywords: Array<string>
253
+ author?: string
254
+ canonical_url?: string
255
+ base_href?: string
256
+ language?: string
257
+ text_direction?: string
258
+ open_graph: Record<string, string>
259
+ twitter_card: Record<string, string>
260
+ meta_tags: Record<string, string>
261
+ }
262
+
263
+ /** Complete extracted metadata */
264
+ export interface JsExtendedMetadata {
265
+ document: JsDocumentMetadata
266
+ headers: Array<JsHeaderMetadata>
267
+ links: Array<JsLinkMetadata>
268
+ images: Array<JsImageMetadata>
269
+ structuredData: Array<JsStructuredData>
270
+ }
271
+
272
+ /** Header element metadata */
273
+ export interface JsHeaderMetadata {
274
+ level: number
275
+ text: string
276
+ id?: string
277
+ depth: number
278
+ html_offset: number
279
+ }
280
+
281
+ /** Heading style options */
282
+ export declare const enum JsHeadingStyle {
283
+ /** Underlined style (=== for h1, --- for h2) */
284
+ Underlined = 'Underlined',
285
+ /** ATX style (# for h1, ## for h2, etc.) */
286
+ Atx = 'Atx',
287
+ /** ATX closed style (# title #) */
288
+ AtxClosed = 'AtxClosed'
289
+ }
290
+
291
+ /** Highlight style for `<mark>` elements */
292
+ export declare const enum JsHighlightStyle {
293
+ /** ==text== */
294
+ DoubleEqual = 'DoubleEqual',
295
+ /** <mark>text</mark> */
296
+ Html = 'Html',
297
+ /** **text** */
298
+ Bold = 'Bold',
299
+ /** Plain text (no formatting) */
300
+ None = 'None'
301
+ }
302
+
303
+ /** Result of HTML extraction with inline images */
304
+ export interface JsHtmlExtraction {
305
+ /** Converted markdown */
306
+ markdown: string
307
+ /** Extracted inline images */
308
+ inlineImages: Array<JsInlineImage>
309
+ /** Warnings encountered during extraction */
310
+ warnings: Array<JsInlineImageWarning>
311
+ }
312
+
313
+ /** Image metadata */
314
+ export interface JsImageMetadata {
315
+ src: string
316
+ alt?: string
317
+ title?: string
318
+ dimensions?: Array<number>
319
+ image_type: string
320
+ attributes: Record<string, string>
321
+ }
322
+
323
+ /** Inline image data */
324
+ export interface JsInlineImage {
325
+ /** Raw image data */
326
+ data: Buffer
327
+ /** Image format (png, jpeg, gif, etc.) */
328
+ format: string
329
+ /** Generated or provided filename */
330
+ filename?: string
331
+ /** Alt text / description */
332
+ description?: string
333
+ /** Image dimensions (width, height) if available */
334
+ dimensions?: Array<number>
335
+ /** Source type (img_data_uri or svg_element) */
336
+ source: string
337
+ /** HTML attributes from the source element */
338
+ attributes: Record<string, string>
339
+ }
340
+
341
+ /** Inline image configuration */
342
+ export interface JsInlineImageConfig {
343
+ /** Maximum decoded size in bytes (default: 5MB) */
344
+ maxDecodedSizeBytes?: bigint
345
+ /** Filename prefix for generated filenames */
346
+ filenamePrefix?: string
347
+ /** Capture inline SVG elements (default: true) */
348
+ captureSvg?: boolean
349
+ /** Infer image dimensions (default: false) */
350
+ inferDimensions?: boolean
351
+ }
352
+
353
+ /** Warning about inline image processing */
354
+ export interface JsInlineImageWarning {
355
+ /** Index of the image that caused the warning */
356
+ index: number
357
+ /** Warning message */
358
+ message: string
359
+ }
360
+
361
+ /** Hyperlink metadata */
362
+ export interface JsLinkMetadata {
363
+ href: string
364
+ text: string
365
+ title?: string
366
+ link_type: string
367
+ rel: Array<string>
368
+ attributes: Record<string, string>
369
+ }
370
+
371
+ /** List indentation type */
372
+ export declare const enum JsListIndentType {
373
+ Spaces = 'Spaces',
374
+ Tabs = 'Tabs'
375
+ }
376
+
377
+ /** Metadata extraction configuration */
378
+ export interface JsMetadataConfig {
379
+ extract_document?: boolean
380
+ extract_headers?: boolean
381
+ extract_links?: boolean
382
+ extract_images?: boolean
383
+ extract_structured_data?: boolean
384
+ max_structured_data_size?: number
385
+ }
386
+
387
+ /** Result of conversion with metadata extraction */
388
+ export interface JsMetadataExtraction {
389
+ markdown: string
390
+ metadata: JsExtendedMetadata
391
+ }
392
+
393
+ /** Newline style */
394
+ export declare const enum JsNewlineStyle {
395
+ /** Two spaces at end of line */
396
+ Spaces = 'Spaces',
397
+ /** Backslash at end of line */
398
+ Backslash = 'Backslash'
399
+ }
400
+
401
+ /**
402
+ * Convert HTML to Markdown
403
+ *
404
+ * # Arguments
405
+ */
406
+ export interface JsNodeContext {
407
+ nodeType: string
408
+ tagName: string
409
+ attributes: Record<string, string>
410
+ depth: number
411
+ indexInParent: number
412
+ parentTag?: string
413
+ isInline: boolean
414
+ }
415
+
416
+ /** HTML preprocessing options */
417
+ export interface JsPreprocessingOptions {
418
+ /** Enable preprocessing */
419
+ enabled?: boolean
420
+ /** Preprocessing preset */
421
+ preset?: JsPreprocessingPreset
422
+ /** Remove navigation elements */
423
+ removeNavigation?: boolean
424
+ /** Remove form elements */
425
+ removeForms?: boolean
426
+ }
427
+
428
+ /** Preprocessing preset levels */
429
+ export declare const enum JsPreprocessingPreset {
430
+ Minimal = 'Minimal',
431
+ Standard = 'Standard',
432
+ Aggressive = 'Aggressive'
433
+ }
434
+
435
+ /** Structured data (JSON-LD, Microdata, RDFa) */
436
+ export interface JsStructuredData {
437
+ data_type: string
438
+ raw_json: string
439
+ schema_type?: string
440
+ }
441
+
442
+ export interface JsVisitResult {
443
+ type: string
444
+ output?: string
445
+ }
446
+
447
+ /** Whitespace handling mode */
448
+ export declare const enum JsWhitespaceMode {
449
+ Normalized = 'Normalized',
450
+ Strict = 'Strict'
451
+ }
452
+
453
+ export declare function startProfiling(outputPath: string, frequency?: number | undefined | null): void
454
+
455
+ export declare function stopProfiling(): void