@kreuzberg/wasm 4.0.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +982 -0
  2. package/dist/adapters/wasm-adapter.d.mts +121 -0
  3. package/dist/adapters/wasm-adapter.d.ts +121 -0
  4. package/dist/adapters/wasm-adapter.js +241 -0
  5. package/dist/adapters/wasm-adapter.js.map +1 -0
  6. package/dist/adapters/wasm-adapter.mjs +221 -0
  7. package/dist/adapters/wasm-adapter.mjs.map +1 -0
  8. package/dist/index.d.mts +466 -0
  9. package/dist/index.d.ts +466 -0
  10. package/dist/index.js +383 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/index.mjs +384 -0
  13. package/dist/index.mjs.map +1 -0
  14. package/dist/kreuzberg_wasm.d.mts +758 -0
  15. package/dist/kreuzberg_wasm.d.ts +758 -0
  16. package/dist/kreuzberg_wasm.js +1913 -0
  17. package/dist/kreuzberg_wasm.mjs +48 -0
  18. package/dist/kreuzberg_wasm_bg.wasm +0 -0
  19. package/dist/kreuzberg_wasm_bg.wasm.d.ts +54 -0
  20. package/dist/ocr/registry.d.mts +102 -0
  21. package/dist/ocr/registry.d.ts +102 -0
  22. package/dist/ocr/registry.js +90 -0
  23. package/dist/ocr/registry.js.map +1 -0
  24. package/dist/ocr/registry.mjs +70 -0
  25. package/dist/ocr/registry.mjs.map +1 -0
  26. package/dist/ocr/tesseract-wasm-backend.d.mts +257 -0
  27. package/dist/ocr/tesseract-wasm-backend.d.ts +257 -0
  28. package/dist/ocr/tesseract-wasm-backend.js +454 -0
  29. package/dist/ocr/tesseract-wasm-backend.js.map +1 -0
  30. package/dist/ocr/tesseract-wasm-backend.mjs +424 -0
  31. package/dist/ocr/tesseract-wasm-backend.mjs.map +1 -0
  32. package/dist/runtime.d.mts +256 -0
  33. package/dist/runtime.d.ts +256 -0
  34. package/dist/runtime.js +172 -0
  35. package/dist/runtime.js.map +1 -0
  36. package/dist/runtime.mjs +152 -0
  37. package/dist/runtime.mjs.map +1 -0
  38. package/dist/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +107 -0
  39. package/dist/types-GJVIvbPy.d.mts +221 -0
  40. package/dist/types-GJVIvbPy.d.ts +221 -0
  41. package/package.json +138 -0
@@ -0,0 +1,758 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ export class ModuleInfo {
5
+ private constructor();
6
+ free(): void;
7
+ [Symbol.dispose](): void;
8
+ /**
9
+ * Get the module name
10
+ */
11
+ name(): string;
12
+ /**
13
+ * Get the module version
14
+ */
15
+ version(): string;
16
+ }
17
+
18
+ /**
19
+ * Batch extract from multiple byte arrays (asynchronous).
20
+ *
21
+ * Asynchronously processes multiple document byte arrays in parallel.
22
+ * Non-blocking alternative to `batchExtractBytesSync`.
23
+ *
24
+ * # JavaScript Parameters
25
+ *
26
+ * * `dataList: Uint8Array[]` - Array of document bytes
27
+ * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
28
+ * * `config?: object` - Optional extraction configuration (applied to all)
29
+ *
30
+ * # Returns
31
+ *
32
+ * `Promise<object[]>` - Promise resolving to array of ExtractionResults
33
+ *
34
+ * # Throws
35
+ *
36
+ * Rejects if dataList and mimeTypes lengths don't match.
37
+ *
38
+ * # Example
39
+ *
40
+ * ```javascript
41
+ * import { batchExtractBytes } from '@kreuzberg/wasm';
42
+ *
43
+ * const responses = await Promise.all([
44
+ * fetch('doc1.pdf'),
45
+ * fetch('doc2.docx')
46
+ * ]);
47
+ *
48
+ * const buffers = await Promise.all(
49
+ * responses.map(r => r.arrayBuffer().then(b => new Uint8Array(b)))
50
+ * );
51
+ *
52
+ * const results = await batchExtractBytes(
53
+ * buffers,
54
+ * ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
55
+ * null
56
+ * );
57
+ * ```
58
+ */
59
+ export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[], config?: any | null): Promise<any>;
60
+
61
+ /**
62
+ * Batch extract from multiple byte arrays (synchronous).
63
+ *
64
+ * Processes multiple document byte arrays in parallel. All documents use the
65
+ * same extraction configuration.
66
+ *
67
+ * # JavaScript Parameters
68
+ *
69
+ * * `dataList: Uint8Array[]` - Array of document bytes
70
+ * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
71
+ * * `config?: object` - Optional extraction configuration (applied to all)
72
+ *
73
+ * # Returns
74
+ *
75
+ * `object[]` - Array of ExtractionResults in the same order as inputs
76
+ *
77
+ * # Throws
78
+ *
79
+ * Throws if dataList and mimeTypes lengths don't match.
80
+ *
81
+ * # Example
82
+ *
83
+ * ```javascript
84
+ * import { batchExtractBytesSync } from '@kreuzberg/wasm';
85
+ *
86
+ * const buffers = [buffer1, buffer2, buffer3];
87
+ * const mimeTypes = ['application/pdf', 'text/plain', 'image/png'];
88
+ * const results = batchExtractBytesSync(buffers, mimeTypes, null);
89
+ *
90
+ * results.forEach((result, i) => {
91
+ * console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
92
+ * });
93
+ * ```
94
+ */
95
+ export function batchExtractBytesSync(data_list: Uint8Array[], mime_types: string[], config?: any | null): any;
96
+
97
+ /**
98
+ * Batch extract from multiple Files or Blobs (asynchronous).
99
+ *
100
+ * Processes multiple web File or Blob objects in parallel using the FileReader API.
101
+ * Only available in browser environments.
102
+ *
103
+ * # JavaScript Parameters
104
+ *
105
+ * * `files: (File | Blob)[]` - Array of files or blobs to extract
106
+ * * `config?: object` - Optional extraction configuration (applied to all)
107
+ *
108
+ * # Returns
109
+ *
110
+ * `Promise<object[]>` - Promise resolving to array of ExtractionResults
111
+ *
112
+ * # Example
113
+ *
114
+ * ```javascript
115
+ * import { batchExtractFiles } from '@kreuzberg/wasm';
116
+ *
117
+ * // From file input with multiple files
118
+ * const fileInput = document.getElementById('file-input');
119
+ * const files = Array.from(fileInput.files);
120
+ *
121
+ * const results = await batchExtractFiles(files, null);
122
+ * console.log(`Processed ${results.length} files`);
123
+ * ```
124
+ */
125
+ export function batchExtractFiles(files: File[], config?: any | null): Promise<any>;
126
+
127
+ /**
128
+ * Batch extract from multiple files (synchronous) - NOT AVAILABLE IN WASM.
129
+ *
130
+ * File system operations are not available in WebAssembly environments.
131
+ * Use `batchExtractBytesSync` or `batchExtractBytes` instead.
132
+ *
133
+ * # Throws
134
+ *
135
+ * Always throws: "File operations are not available in WASM. Use batchExtractBytesSync or batchExtractBytes instead."
136
+ */
137
+ export function batchExtractFilesSync(): any;
138
+
139
+ /**
140
+ * Clear all registered OCR backends.
141
+ *
142
+ * # Returns
143
+ *
144
+ * Ok if clearing succeeds, Err if an error occurs.
145
+ *
146
+ * # Example
147
+ *
148
+ * ```javascript
149
+ * clearOcrBackends();
150
+ * ```
151
+ */
152
+ export function clear_ocr_backends(): void;
153
+
154
+ /**
155
+ * Clear all registered post-processors.
156
+ *
157
+ * # Returns
158
+ *
159
+ * Ok if clearing succeeds, Err if an error occurs.
160
+ *
161
+ * # Example
162
+ *
163
+ * ```javascript
164
+ * clearPostProcessors();
165
+ * ```
166
+ */
167
+ export function clear_post_processors(): void;
168
+
169
+ /**
170
+ * Clear all registered validators.
171
+ *
172
+ * # Returns
173
+ *
174
+ * Ok if clearing succeeds, Err if an error occurs.
175
+ *
176
+ * # Example
177
+ *
178
+ * ```javascript
179
+ * clearValidators();
180
+ * ```
181
+ */
182
+ export function clear_validators(): void;
183
+
184
+ /**
185
+ * Detect MIME type from raw file bytes.
186
+ *
187
+ * Uses magic byte signatures and content analysis to detect the MIME type of
188
+ * a document from its binary content. Falls back to text detection if binary
189
+ * detection fails.
190
+ *
191
+ * # JavaScript Parameters
192
+ *
193
+ * * `data: Uint8Array` - The raw file bytes
194
+ *
195
+ * # Returns
196
+ *
197
+ * `string` - The detected MIME type (e.g., "application/pdf", "image/png")
198
+ *
199
+ * # Throws
200
+ *
201
+ * Throws an error if MIME type cannot be determined from the content.
202
+ *
203
+ * # Example
204
+ *
205
+ * ```javascript
206
+ * import { detectMimeFromBytes } from '@kreuzberg/wasm';
207
+ * import { readFileSync } from 'fs';
208
+ *
209
+ * const pdfBytes = readFileSync('document.pdf');
210
+ * const mimeType = detectMimeFromBytes(new Uint8Array(pdfBytes));
211
+ * console.log(mimeType); // "application/pdf"
212
+ * ```
213
+ */
214
+ export function detectMimeFromBytes(data: Uint8Array): string;
215
+
216
+ /**
217
+ * Discover configuration file in the project hierarchy.
218
+ *
219
+ * In WebAssembly environments, configuration discovery is not available because
220
+ * there is no file system access. This function always returns an error with a
221
+ * descriptive message directing users to use `loadConfigFromString()` instead.
222
+ *
223
+ * # JavaScript Parameters
224
+ *
225
+ * None
226
+ *
227
+ * # Returns
228
+ *
229
+ * Never returns successfully.
230
+ *
231
+ * # Throws
232
+ *
233
+ * Always throws an error with message:
234
+ * "discoverConfig is not available in WebAssembly (no file system access). Use loadConfigFromString() instead."
235
+ *
236
+ * # Example
237
+ *
238
+ * ```javascript
239
+ * import { discoverConfig } from '@kreuzberg/wasm';
240
+ *
241
+ * try {
242
+ * const config = discoverConfig();
243
+ * } catch (e) {
244
+ * console.error(e.message);
245
+ * // "discoverConfig is not available in WebAssembly (no file system access).
246
+ * // Use loadConfigFromString() instead."
247
+ * }
248
+ * ```
249
+ */
250
+ export function discoverConfig(): any;
251
+
252
+ /**
253
+ * Extract content from a byte array (asynchronous).
254
+ *
255
+ * Asynchronously extracts text, tables, images, and metadata from a document.
256
+ * Non-blocking alternative to `extractBytesSync` suitable for large documents
257
+ * or browser environments.
258
+ *
259
+ * # JavaScript Parameters
260
+ *
261
+ * * `data: Uint8Array` - The document bytes to extract
262
+ * * `mimeType: string` - MIME type of the data (e.g., "application/pdf")
263
+ * * `config?: object` - Optional extraction configuration
264
+ *
265
+ * # Returns
266
+ *
267
+ * `Promise<object>` - Promise resolving to ExtractionResult
268
+ *
269
+ * # Throws
270
+ *
271
+ * Rejects if data is malformed or MIME type is unsupported.
272
+ *
273
+ * # Example
274
+ *
275
+ * ```javascript
276
+ * import { extractBytes } from '@kreuzberg/wasm';
277
+ *
278
+ * // Fetch from URL
279
+ * const response = await fetch('document.pdf');
280
+ * const arrayBuffer = await response.arrayBuffer();
281
+ * const data = new Uint8Array(arrayBuffer);
282
+ *
283
+ * const result = await extractBytes(data, 'application/pdf', null);
284
+ * console.log(result.content.substring(0, 100));
285
+ * ```
286
+ */
287
+ export function extractBytes(data: Uint8Array, mime_type: string, config?: any | null): Promise<any>;
288
+
289
+ /**
290
+ * Extract content from a byte array (synchronous).
291
+ *
292
+ * Extracts text, tables, images, and metadata from a document represented as bytes.
293
+ * This is a synchronous, blocking operation suitable for smaller documents or when
294
+ * async execution is not available.
295
+ *
296
+ * # JavaScript Parameters
297
+ *
298
+ * * `data: Uint8Array` - The document bytes to extract
299
+ * * `mimeType: string` - MIME type of the data (e.g., "application/pdf", "image/png")
300
+ * * `config?: object` - Optional extraction configuration
301
+ *
302
+ * # Returns
303
+ *
304
+ * `object` - ExtractionResult with extracted content and metadata
305
+ *
306
+ * # Throws
307
+ *
308
+ * Throws an error if data is malformed or MIME type is unsupported.
309
+ *
310
+ * # Example
311
+ *
312
+ * ```javascript
313
+ * import { extractBytesSync } from '@kreuzberg/wasm';
314
+ * import { readFileSync } from 'fs';
315
+ *
316
+ * const buffer = readFileSync('document.pdf');
317
+ * const data = new Uint8Array(buffer);
318
+ * const result = extractBytesSync(data, 'application/pdf', null);
319
+ * console.log(result.content);
320
+ * ```
321
+ */
322
+ export function extractBytesSync(data: Uint8Array, mime_type: string, config?: any | null): any;
323
+
324
+ /**
325
+ * Extract content from a web File or Blob (asynchronous).
326
+ *
327
+ * Extracts content from a web File (from `<input type="file">`) or Blob object
328
+ * using the FileReader API. Only available in browser environments.
329
+ *
330
+ * # JavaScript Parameters
331
+ *
332
+ * * `file: File | Blob` - The file or blob to extract
333
+ * * `mimeType?: string` - Optional MIME type hint (auto-detected if omitted)
334
+ * * `config?: object` - Optional extraction configuration
335
+ *
336
+ * # Returns
337
+ *
338
+ * `Promise<object>` - Promise resolving to ExtractionResult
339
+ *
340
+ * # Throws
341
+ *
342
+ * Rejects if file cannot be read or is malformed.
343
+ *
344
+ * # Example
345
+ *
346
+ * ```javascript
347
+ * import { extractFile } from '@kreuzberg/wasm';
348
+ *
349
+ * // From file input
350
+ * const fileInput = document.getElementById('file-input');
351
+ * const file = fileInput.files[0];
352
+ *
353
+ * const result = await extractFile(file, null, null);
354
+ * console.log(`Extracted ${result.content.length} characters`);
355
+ * ```
356
+ */
357
+ export function extractFile(file: File, mime_type?: string | null, config?: any | null): Promise<any>;
358
+
359
+ /**
360
+ * Extract content from a file (synchronous) - NOT AVAILABLE IN WASM.
361
+ *
362
+ * File system operations are not available in WebAssembly environments.
363
+ * Use `extractBytesSync` or `extractBytes` instead.
364
+ *
365
+ * # Throws
366
+ *
367
+ * Always throws: "File operations are not available in WASM. Use extractBytesSync or extractBytes instead."
368
+ */
369
+ export function extractFileSync(): any;
370
+
371
+ /**
372
+ * Get file extensions for a given MIME type.
373
+ *
374
+ * Looks up all known file extensions that correspond to the specified MIME type.
375
+ * Returns a JavaScript Array of extension strings (without leading dots).
376
+ *
377
+ * # JavaScript Parameters
378
+ *
379
+ * * `mimeType: string` - The MIME type to look up (e.g., "application/pdf")
380
+ *
381
+ * # Returns
382
+ *
383
+ * `string[]` - Array of file extensions for the MIME type
384
+ *
385
+ * # Throws
386
+ *
387
+ * Throws an error if the MIME type is not recognized.
388
+ *
389
+ * # Example
390
+ *
391
+ * ```javascript
392
+ * import { getExtensionsForMime } from '@kreuzberg/wasm';
393
+ *
394
+ * const pdfExts = getExtensionsForMime('application/pdf');
395
+ * console.log(pdfExts); // ["pdf"]
396
+ *
397
+ * const jpegExts = getExtensionsForMime('image/jpeg');
398
+ * console.log(jpegExts); // ["jpg", "jpeg"]
399
+ * ```
400
+ */
401
+ export function getExtensionsForMime(mime_type: string): Array<any>;
402
+
403
+ /**
404
+ * Get MIME type from file extension.
405
+ *
406
+ * Looks up the MIME type associated with a given file extension.
407
+ * Returns None if the extension is not recognized.
408
+ *
409
+ * # JavaScript Parameters
410
+ *
411
+ * * `extension: string` - The file extension (with or without leading dot)
412
+ *
413
+ * # Returns
414
+ *
415
+ * `string | null` - The MIME type if found, null otherwise
416
+ *
417
+ * # Example
418
+ *
419
+ * ```javascript
420
+ * import { getMimeFromExtension } from '@kreuzberg/wasm';
421
+ *
422
+ * const pdfMime = getMimeFromExtension('pdf');
423
+ * console.log(pdfMime); // "application/pdf"
424
+ *
425
+ * const docMime = getMimeFromExtension('docx');
426
+ * console.log(docMime); // "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
427
+ *
428
+ * const unknownMime = getMimeFromExtension('unknown');
429
+ * console.log(unknownMime); // null
430
+ * ```
431
+ */
432
+ export function getMimeFromExtension(extension: string): string | undefined;
433
+
434
+ /**
435
+ * Get module information
436
+ */
437
+ export function get_module_info(): ModuleInfo;
438
+
439
+ /**
440
+ * Initialize the WASM module
441
+ * This function should be called once at application startup
442
+ */
443
+ export function init(): void;
444
+
445
+ export function initThreadPool(num_threads: number): Promise<any>;
446
+
447
+ /**
448
+ * Helper function to initialize the thread pool with error handling
449
+ * Accepts the number of threads to use for the thread pool.
450
+ * Returns true if initialization succeeded, false for graceful degradation.
451
+ *
452
+ * This function wraps init_thread_pool with panic handling to ensure graceful
453
+ * degradation if thread pool initialization fails. The application will continue
454
+ * to work in single-threaded mode if the thread pool cannot be initialized.
455
+ */
456
+ export function init_thread_pool_safe(num_threads: number): boolean;
457
+
458
+ /**
459
+ * List all registered OCR backend names.
460
+ *
461
+ * # Returns
462
+ *
463
+ * Array of OCR backend names, or Err if an error occurs.
464
+ *
465
+ * # Example
466
+ *
467
+ * ```javascript
468
+ * const backends = listOcrBackends();
469
+ * console.log(backends); // ["tesseract", "custom-ocr", ...]
470
+ * ```
471
+ */
472
+ export function list_ocr_backends(): Array<any>;
473
+
474
+ /**
475
+ * List all registered post-processor names.
476
+ *
477
+ * # Returns
478
+ *
479
+ * Array of post-processor names, or Err if an error occurs.
480
+ *
481
+ * # Example
482
+ *
483
+ * ```javascript
484
+ * const processors = listPostProcessors();
485
+ * console.log(processors); // ["my-post-processor", ...]
486
+ * ```
487
+ */
488
+ export function list_post_processors(): Array<any>;
489
+
490
+ /**
491
+ * List all registered validator names.
492
+ *
493
+ * # Returns
494
+ *
495
+ * Array of validator names, or Err if an error occurs.
496
+ *
497
+ * # Example
498
+ *
499
+ * ```javascript
500
+ * const validators = listValidators();
501
+ * console.log(validators); // ["min-content-length", ...]
502
+ * ```
503
+ */
504
+ export function list_validators(): Array<any>;
505
+
506
+ /**
507
+ * Load configuration from a string in the specified format.
508
+ *
509
+ * Parses configuration content from TOML, YAML, or JSON formats and returns
510
+ * a JavaScript object representing the ExtractionConfig. This is the primary
511
+ * way to load configuration in WebAssembly environments since file system
512
+ * access is not available.
513
+ *
514
+ * # JavaScript Parameters
515
+ *
516
+ * * `content: string` - The configuration content as a string
517
+ * * `format: string` - The format of the content: "toml", "yaml", or "json"
518
+ *
519
+ * # Returns
520
+ *
521
+ * `object` - JavaScript object representing the ExtractionConfig
522
+ *
523
+ * # Throws
524
+ *
525
+ * Throws an error if:
526
+ * - The content is invalid for the specified format
527
+ * - The format is not one of "toml", "yaml", or "json"
528
+ * - Required configuration fields are missing or invalid
529
+ *
530
+ * # Example
531
+ *
532
+ * ```javascript
533
+ * import { loadConfigFromString } from '@kreuzberg/wasm';
534
+ *
535
+ * // Load from TOML string
536
+ * const tomlConfig = `
537
+ * use_cache = true
538
+ * enable_quality_processing = true
539
+ * `;
540
+ * const config1 = loadConfigFromString(tomlConfig, 'toml');
541
+ * console.log(config1.use_cache); // true
542
+ *
543
+ * // Load from YAML string
544
+ * const yamlConfig = `
545
+ * use_cache: true
546
+ * enable_quality_processing: true
547
+ * `;
548
+ * const config2 = loadConfigFromString(yamlConfig, 'yaml');
549
+ *
550
+ * // Load from JSON string
551
+ * const jsonConfig = `{"use_cache": true, "enable_quality_processing": true}`;
552
+ * const config3 = loadConfigFromString(jsonConfig, 'json');
553
+ * ```
554
+ */
555
+ export function loadConfigFromString(content: string, format: string): any;
556
+
557
+ /**
558
+ * Normalize a MIME type string.
559
+ *
560
+ * Normalizes a MIME type by converting to lowercase and removing parameters
561
+ * (e.g., "application/json; charset=utf-8" becomes "application/json").
562
+ * This is useful for consistent MIME type comparison.
563
+ *
564
+ * # JavaScript Parameters
565
+ *
566
+ * * `mimeType: string` - The MIME type string to normalize
567
+ *
568
+ * # Returns
569
+ *
570
+ * `string` - The normalized MIME type
571
+ *
572
+ * # Example
573
+ *
574
+ * ```javascript
575
+ * import { normalizeMimeType } from '@kreuzberg/wasm';
576
+ *
577
+ * const normalized1 = normalizeMimeType('Application/JSON');
578
+ * console.log(normalized1); // "application/json"
579
+ *
580
+ * const normalized2 = normalizeMimeType('text/html; charset=utf-8');
581
+ * console.log(normalized2); // "text/html"
582
+ *
583
+ * const normalized3 = normalizeMimeType('Text/Plain; charset=ISO-8859-1');
584
+ * console.log(normalized3); // "text/plain"
585
+ * ```
586
+ */
587
+ export function normalizeMimeType(mime_type: string): string;
588
+
589
+ /**
590
+ * Register a custom OCR backend.
591
+ *
592
+ * # Arguments
593
+ *
594
+ * * `backend` - JavaScript object implementing the OcrBackendProtocol interface:
595
+ * - `name(): string` - Unique backend name
596
+ * - `supportedLanguages(): string[]` - Array of language codes the backend supports
597
+ * - `processImage(imageBase64: string, language: string): Promise<string>` - Process image and return JSON result
598
+ *
599
+ * # Returns
600
+ *
601
+ * Ok if registration succeeds, Err with description if it fails.
602
+ *
603
+ * # Example
604
+ *
605
+ * ```javascript
606
+ * registerOcrBackend({
607
+ * name: () => "custom-ocr",
608
+ * supportedLanguages: () => ["en", "es", "fr"],
609
+ * processImage: async (imageBase64, language) => {
610
+ * const buffer = Buffer.from(imageBase64, "base64");
611
+ * // Process image with custom OCR engine
612
+ * const text = await customOcrEngine.recognize(buffer, language);
613
+ * return JSON.stringify({
614
+ * content: text,
615
+ * mime_type: "text/plain",
616
+ * metadata: {}
617
+ * });
618
+ * }
619
+ * });
620
+ * ```
621
+ */
622
+ export function register_ocr_backend(backend: any): void;
623
+
624
+ /**
625
+ * Register a custom post-processor.
626
+ *
627
+ * # Arguments
628
+ *
629
+ * * `processor` - JavaScript object implementing the PostProcessorProtocol interface:
630
+ * - `name(): string` - Unique processor name
631
+ * - `process(jsonString: string): Promise<string>` - Process function that takes JSON input
632
+ * - `processingStage(): "early" | "middle" | "late"` - Optional processing stage (defaults to "middle")
633
+ *
634
+ * # Returns
635
+ *
636
+ * Ok if registration succeeds, Err with description if it fails.
637
+ *
638
+ * # Example
639
+ *
640
+ * ```javascript
641
+ * registerPostProcessor({
642
+ * name: () => "my-post-processor",
643
+ * processingStage: () => "middle",
644
+ * process: async (jsonString) => {
645
+ * const result = JSON.parse(jsonString);
646
+ * // Process the extraction result
647
+ * result.metadata.processed_by = "my-post-processor";
648
+ * return JSON.stringify(result);
649
+ * }
650
+ * });
651
+ * ```
652
+ */
653
+ export function register_post_processor(processor: any): void;
654
+
655
+ /**
656
+ * Register a custom validator.
657
+ *
658
+ * # Arguments
659
+ *
660
+ * * `validator` - JavaScript object implementing the ValidatorProtocol interface:
661
+ * - `name(): string` - Unique validator name
662
+ * - `validate(jsonString: string): Promise<string>` - Validation function returning empty string on success, error message on failure
663
+ * - `priority(): number` - Optional priority (defaults to 50, higher runs first)
664
+ *
665
+ * # Returns
666
+ *
667
+ * Ok if registration succeeds, Err with description if it fails.
668
+ *
669
+ * # Example
670
+ *
671
+ * ```javascript
672
+ * registerValidator({
673
+ * name: () => "min-content-length",
674
+ * priority: () => 100,
675
+ * validate: async (jsonString) => {
676
+ * const result = JSON.parse(jsonString);
677
+ * if (result.content.length < 100) {
678
+ * return "Content too short"; // Validation failure
679
+ * }
680
+ * return ""; // Success
681
+ * }
682
+ * });
683
+ * ```
684
+ */
685
+ export function register_validator(validator: any): void;
686
+
687
+ /**
688
+ * Unregister an OCR backend by name.
689
+ *
690
+ * # Arguments
691
+ *
692
+ * * `name` - Name of the OCR backend to unregister
693
+ *
694
+ * # Returns
695
+ *
696
+ * Ok if unregistration succeeds, Err if the backend is not found or other error occurs.
697
+ *
698
+ * # Example
699
+ *
700
+ * ```javascript
701
+ * unregisterOcrBackend("custom-ocr");
702
+ * ```
703
+ */
704
+ export function unregister_ocr_backend(name: string): void;
705
+
706
+ /**
707
+ * Unregister a post-processor by name.
708
+ *
709
+ * # Arguments
710
+ *
711
+ * * `name` - Name of the post-processor to unregister
712
+ *
713
+ * # Returns
714
+ *
715
+ * Ok if unregistration succeeds, Err if the processor is not found or other error occurs.
716
+ *
717
+ * # Example
718
+ *
719
+ * ```javascript
720
+ * unregisterPostProcessor("my-post-processor");
721
+ * ```
722
+ */
723
+ export function unregister_post_processor(name: string): void;
724
+
725
+ /**
726
+ * Unregister a validator by name.
727
+ *
728
+ * # Arguments
729
+ *
730
+ * * `name` - Name of the validator to unregister
731
+ *
732
+ * # Returns
733
+ *
734
+ * Ok if unregistration succeeds, Err if the validator is not found or other error occurs.
735
+ *
736
+ * # Example
737
+ *
738
+ * ```javascript
739
+ * unregisterValidator("min-content-length");
740
+ * ```
741
+ */
742
+ export function unregister_validator(name: string): void;
743
+
744
+ /**
745
+ * Version of the kreuzberg-wasm binding
746
+ */
747
+ export function version(): string;
748
+
749
+ export class wbg_rayon_PoolBuilder {
750
+ private constructor();
751
+ free(): void;
752
+ [Symbol.dispose](): void;
753
+ numThreads(): number;
754
+ build(): void;
755
+ receiver(): number;
756
+ }
757
+
758
+ export function wbg_rayon_start_worker(receiver: number): void;