@kreuzberg/wasm 4.0.0-rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +982 -0
- package/dist/adapters/wasm-adapter.d.mts +121 -0
- package/dist/adapters/wasm-adapter.d.ts +121 -0
- package/dist/adapters/wasm-adapter.js +241 -0
- package/dist/adapters/wasm-adapter.js.map +1 -0
- package/dist/adapters/wasm-adapter.mjs +221 -0
- package/dist/adapters/wasm-adapter.mjs.map +1 -0
- package/dist/index.d.mts +466 -0
- package/dist/index.d.ts +466 -0
- package/dist/index.js +383 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +384 -0
- package/dist/index.mjs.map +1 -0
- package/dist/kreuzberg_wasm.d.mts +758 -0
- package/dist/kreuzberg_wasm.d.ts +758 -0
- package/dist/kreuzberg_wasm.js +1913 -0
- package/dist/kreuzberg_wasm.mjs +48 -0
- package/dist/kreuzberg_wasm_bg.wasm +0 -0
- package/dist/kreuzberg_wasm_bg.wasm.d.ts +54 -0
- package/dist/ocr/registry.d.mts +102 -0
- package/dist/ocr/registry.d.ts +102 -0
- package/dist/ocr/registry.js +90 -0
- package/dist/ocr/registry.js.map +1 -0
- package/dist/ocr/registry.mjs +70 -0
- package/dist/ocr/registry.mjs.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.d.mts +257 -0
- package/dist/ocr/tesseract-wasm-backend.d.ts +257 -0
- package/dist/ocr/tesseract-wasm-backend.js +454 -0
- package/dist/ocr/tesseract-wasm-backend.js.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.mjs +424 -0
- package/dist/ocr/tesseract-wasm-backend.mjs.map +1 -0
- package/dist/runtime.d.mts +256 -0
- package/dist/runtime.d.ts +256 -0
- package/dist/runtime.js +172 -0
- package/dist/runtime.js.map +1 -0
- package/dist/runtime.mjs +152 -0
- package/dist/runtime.mjs.map +1 -0
- package/dist/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +107 -0
- package/dist/types-GJVIvbPy.d.mts +221 -0
- package/dist/types-GJVIvbPy.d.ts +221 -0
- package/package.json +138 -0
|
@@ -0,0 +1,758 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
export class ModuleInfo {
|
|
5
|
+
private constructor();
|
|
6
|
+
free(): void;
|
|
7
|
+
[Symbol.dispose](): void;
|
|
8
|
+
/**
|
|
9
|
+
* Get the module name
|
|
10
|
+
*/
|
|
11
|
+
name(): string;
|
|
12
|
+
/**
|
|
13
|
+
* Get the module version
|
|
14
|
+
*/
|
|
15
|
+
version(): string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Batch extract from multiple byte arrays (asynchronous).
|
|
20
|
+
*
|
|
21
|
+
* Asynchronously processes multiple document byte arrays in parallel.
|
|
22
|
+
* Non-blocking alternative to `batchExtractBytesSync`.
|
|
23
|
+
*
|
|
24
|
+
* # JavaScript Parameters
|
|
25
|
+
*
|
|
26
|
+
* * `dataList: Uint8Array[]` - Array of document bytes
|
|
27
|
+
* * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
|
|
28
|
+
* * `config?: object` - Optional extraction configuration (applied to all)
|
|
29
|
+
*
|
|
30
|
+
* # Returns
|
|
31
|
+
*
|
|
32
|
+
* `Promise<object[]>` - Promise resolving to array of ExtractionResults
|
|
33
|
+
*
|
|
34
|
+
* # Throws
|
|
35
|
+
*
|
|
36
|
+
* Rejects if dataList and mimeTypes lengths don't match.
|
|
37
|
+
*
|
|
38
|
+
* # Example
|
|
39
|
+
*
|
|
40
|
+
* ```javascript
|
|
41
|
+
* import { batchExtractBytes } from '@kreuzberg/wasm';
|
|
42
|
+
*
|
|
43
|
+
* const responses = await Promise.all([
|
|
44
|
+
* fetch('doc1.pdf'),
|
|
45
|
+
* fetch('doc2.docx')
|
|
46
|
+
* ]);
|
|
47
|
+
*
|
|
48
|
+
* const buffers = await Promise.all(
|
|
49
|
+
* responses.map(r => r.arrayBuffer().then(b => new Uint8Array(b)))
|
|
50
|
+
* );
|
|
51
|
+
*
|
|
52
|
+
* const results = await batchExtractBytes(
|
|
53
|
+
* buffers,
|
|
54
|
+
* ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
|
|
55
|
+
* null
|
|
56
|
+
* );
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[], config?: any | null): Promise<any>;
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Batch extract from multiple byte arrays (synchronous).
|
|
63
|
+
*
|
|
64
|
+
* Processes multiple document byte arrays in parallel. All documents use the
|
|
65
|
+
* same extraction configuration.
|
|
66
|
+
*
|
|
67
|
+
* # JavaScript Parameters
|
|
68
|
+
*
|
|
69
|
+
* * `dataList: Uint8Array[]` - Array of document bytes
|
|
70
|
+
* * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
|
|
71
|
+
* * `config?: object` - Optional extraction configuration (applied to all)
|
|
72
|
+
*
|
|
73
|
+
* # Returns
|
|
74
|
+
*
|
|
75
|
+
* `object[]` - Array of ExtractionResults in the same order as inputs
|
|
76
|
+
*
|
|
77
|
+
* # Throws
|
|
78
|
+
*
|
|
79
|
+
* Throws if dataList and mimeTypes lengths don't match.
|
|
80
|
+
*
|
|
81
|
+
* # Example
|
|
82
|
+
*
|
|
83
|
+
* ```javascript
|
|
84
|
+
* import { batchExtractBytesSync } from '@kreuzberg/wasm';
|
|
85
|
+
*
|
|
86
|
+
* const buffers = [buffer1, buffer2, buffer3];
|
|
87
|
+
* const mimeTypes = ['application/pdf', 'text/plain', 'image/png'];
|
|
88
|
+
* const results = batchExtractBytesSync(buffers, mimeTypes, null);
|
|
89
|
+
*
|
|
90
|
+
* results.forEach((result, i) => {
|
|
91
|
+
* console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
|
|
92
|
+
* });
|
|
93
|
+
* ```
|
|
94
|
+
*/
|
|
95
|
+
export function batchExtractBytesSync(data_list: Uint8Array[], mime_types: string[], config?: any | null): any;
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Batch extract from multiple Files or Blobs (asynchronous).
|
|
99
|
+
*
|
|
100
|
+
* Processes multiple web File or Blob objects in parallel using the FileReader API.
|
|
101
|
+
* Only available in browser environments.
|
|
102
|
+
*
|
|
103
|
+
* # JavaScript Parameters
|
|
104
|
+
*
|
|
105
|
+
* * `files: (File | Blob)[]` - Array of files or blobs to extract
|
|
106
|
+
* * `config?: object` - Optional extraction configuration (applied to all)
|
|
107
|
+
*
|
|
108
|
+
* # Returns
|
|
109
|
+
*
|
|
110
|
+
* `Promise<object[]>` - Promise resolving to array of ExtractionResults
|
|
111
|
+
*
|
|
112
|
+
* # Example
|
|
113
|
+
*
|
|
114
|
+
* ```javascript
|
|
115
|
+
* import { batchExtractFiles } from '@kreuzberg/wasm';
|
|
116
|
+
*
|
|
117
|
+
* // From file input with multiple files
|
|
118
|
+
* const fileInput = document.getElementById('file-input');
|
|
119
|
+
* const files = Array.from(fileInput.files);
|
|
120
|
+
*
|
|
121
|
+
* const results = await batchExtractFiles(files, null);
|
|
122
|
+
* console.log(`Processed ${results.length} files`);
|
|
123
|
+
* ```
|
|
124
|
+
*/
|
|
125
|
+
export function batchExtractFiles(files: File[], config?: any | null): Promise<any>;
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Batch extract from multiple files (synchronous) - NOT AVAILABLE IN WASM.
|
|
129
|
+
*
|
|
130
|
+
* File system operations are not available in WebAssembly environments.
|
|
131
|
+
* Use `batchExtractBytesSync` or `batchExtractBytes` instead.
|
|
132
|
+
*
|
|
133
|
+
* # Throws
|
|
134
|
+
*
|
|
135
|
+
* Always throws: "File operations are not available in WASM. Use batchExtractBytesSync or batchExtractBytes instead."
|
|
136
|
+
*/
|
|
137
|
+
export function batchExtractFilesSync(): any;
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Clear all registered OCR backends.
|
|
141
|
+
*
|
|
142
|
+
* # Returns
|
|
143
|
+
*
|
|
144
|
+
* Ok if clearing succeeds, Err if an error occurs.
|
|
145
|
+
*
|
|
146
|
+
* # Example
|
|
147
|
+
*
|
|
148
|
+
* ```javascript
|
|
149
|
+
* clearOcrBackends();
|
|
150
|
+
* ```
|
|
151
|
+
*/
|
|
152
|
+
export function clear_ocr_backends(): void;
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Clear all registered post-processors.
|
|
156
|
+
*
|
|
157
|
+
* # Returns
|
|
158
|
+
*
|
|
159
|
+
* Ok if clearing succeeds, Err if an error occurs.
|
|
160
|
+
*
|
|
161
|
+
* # Example
|
|
162
|
+
*
|
|
163
|
+
* ```javascript
|
|
164
|
+
* clearPostProcessors();
|
|
165
|
+
* ```
|
|
166
|
+
*/
|
|
167
|
+
export function clear_post_processors(): void;
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Clear all registered validators.
|
|
171
|
+
*
|
|
172
|
+
* # Returns
|
|
173
|
+
*
|
|
174
|
+
* Ok if clearing succeeds, Err if an error occurs.
|
|
175
|
+
*
|
|
176
|
+
* # Example
|
|
177
|
+
*
|
|
178
|
+
* ```javascript
|
|
179
|
+
* clearValidators();
|
|
180
|
+
* ```
|
|
181
|
+
*/
|
|
182
|
+
export function clear_validators(): void;
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Detect MIME type from raw file bytes.
|
|
186
|
+
*
|
|
187
|
+
* Uses magic byte signatures and content analysis to detect the MIME type of
|
|
188
|
+
* a document from its binary content. Falls back to text detection if binary
|
|
189
|
+
* detection fails.
|
|
190
|
+
*
|
|
191
|
+
* # JavaScript Parameters
|
|
192
|
+
*
|
|
193
|
+
* * `data: Uint8Array` - The raw file bytes
|
|
194
|
+
*
|
|
195
|
+
* # Returns
|
|
196
|
+
*
|
|
197
|
+
* `string` - The detected MIME type (e.g., "application/pdf", "image/png")
|
|
198
|
+
*
|
|
199
|
+
* # Throws
|
|
200
|
+
*
|
|
201
|
+
* Throws an error if MIME type cannot be determined from the content.
|
|
202
|
+
*
|
|
203
|
+
* # Example
|
|
204
|
+
*
|
|
205
|
+
* ```javascript
|
|
206
|
+
* import { detectMimeFromBytes } from '@kreuzberg/wasm';
|
|
207
|
+
* import { readFileSync } from 'fs';
|
|
208
|
+
*
|
|
209
|
+
* const pdfBytes = readFileSync('document.pdf');
|
|
210
|
+
* const mimeType = detectMimeFromBytes(new Uint8Array(pdfBytes));
|
|
211
|
+
* console.log(mimeType); // "application/pdf"
|
|
212
|
+
* ```
|
|
213
|
+
*/
|
|
214
|
+
export function detectMimeFromBytes(data: Uint8Array): string;
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Discover configuration file in the project hierarchy.
|
|
218
|
+
*
|
|
219
|
+
* In WebAssembly environments, configuration discovery is not available because
|
|
220
|
+
* there is no file system access. This function always returns an error with a
|
|
221
|
+
* descriptive message directing users to use `loadConfigFromString()` instead.
|
|
222
|
+
*
|
|
223
|
+
* # JavaScript Parameters
|
|
224
|
+
*
|
|
225
|
+
* None
|
|
226
|
+
*
|
|
227
|
+
* # Returns
|
|
228
|
+
*
|
|
229
|
+
* Never returns successfully.
|
|
230
|
+
*
|
|
231
|
+
* # Throws
|
|
232
|
+
*
|
|
233
|
+
* Always throws an error with message:
|
|
234
|
+
* "discoverConfig is not available in WebAssembly (no file system access). Use loadConfigFromString() instead."
|
|
235
|
+
*
|
|
236
|
+
* # Example
|
|
237
|
+
*
|
|
238
|
+
* ```javascript
|
|
239
|
+
* import { discoverConfig } from '@kreuzberg/wasm';
|
|
240
|
+
*
|
|
241
|
+
* try {
|
|
242
|
+
* const config = discoverConfig();
|
|
243
|
+
* } catch (e) {
|
|
244
|
+
* console.error(e.message);
|
|
245
|
+
* // "discoverConfig is not available in WebAssembly (no file system access).
|
|
246
|
+
* // Use loadConfigFromString() instead."
|
|
247
|
+
* }
|
|
248
|
+
* ```
|
|
249
|
+
*/
|
|
250
|
+
export function discoverConfig(): any;
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Extract content from a byte array (asynchronous).
|
|
254
|
+
*
|
|
255
|
+
* Asynchronously extracts text, tables, images, and metadata from a document.
|
|
256
|
+
* Non-blocking alternative to `extractBytesSync` suitable for large documents
|
|
257
|
+
* or browser environments.
|
|
258
|
+
*
|
|
259
|
+
* # JavaScript Parameters
|
|
260
|
+
*
|
|
261
|
+
* * `data: Uint8Array` - The document bytes to extract
|
|
262
|
+
* * `mimeType: string` - MIME type of the data (e.g., "application/pdf")
|
|
263
|
+
* * `config?: object` - Optional extraction configuration
|
|
264
|
+
*
|
|
265
|
+
* # Returns
|
|
266
|
+
*
|
|
267
|
+
* `Promise<object>` - Promise resolving to ExtractionResult
|
|
268
|
+
*
|
|
269
|
+
* # Throws
|
|
270
|
+
*
|
|
271
|
+
* Rejects if data is malformed or MIME type is unsupported.
|
|
272
|
+
*
|
|
273
|
+
* # Example
|
|
274
|
+
*
|
|
275
|
+
* ```javascript
|
|
276
|
+
* import { extractBytes } from '@kreuzberg/wasm';
|
|
277
|
+
*
|
|
278
|
+
* // Fetch from URL
|
|
279
|
+
* const response = await fetch('document.pdf');
|
|
280
|
+
* const arrayBuffer = await response.arrayBuffer();
|
|
281
|
+
* const data = new Uint8Array(arrayBuffer);
|
|
282
|
+
*
|
|
283
|
+
* const result = await extractBytes(data, 'application/pdf', null);
|
|
284
|
+
* console.log(result.content.substring(0, 100));
|
|
285
|
+
* ```
|
|
286
|
+
*/
|
|
287
|
+
export function extractBytes(data: Uint8Array, mime_type: string, config?: any | null): Promise<any>;
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Extract content from a byte array (synchronous).
|
|
291
|
+
*
|
|
292
|
+
* Extracts text, tables, images, and metadata from a document represented as bytes.
|
|
293
|
+
* This is a synchronous, blocking operation suitable for smaller documents or when
|
|
294
|
+
* async execution is not available.
|
|
295
|
+
*
|
|
296
|
+
* # JavaScript Parameters
|
|
297
|
+
*
|
|
298
|
+
* * `data: Uint8Array` - The document bytes to extract
|
|
299
|
+
* * `mimeType: string` - MIME type of the data (e.g., "application/pdf", "image/png")
|
|
300
|
+
* * `config?: object` - Optional extraction configuration
|
|
301
|
+
*
|
|
302
|
+
* # Returns
|
|
303
|
+
*
|
|
304
|
+
* `object` - ExtractionResult with extracted content and metadata
|
|
305
|
+
*
|
|
306
|
+
* # Throws
|
|
307
|
+
*
|
|
308
|
+
* Throws an error if data is malformed or MIME type is unsupported.
|
|
309
|
+
*
|
|
310
|
+
* # Example
|
|
311
|
+
*
|
|
312
|
+
* ```javascript
|
|
313
|
+
* import { extractBytesSync } from '@kreuzberg/wasm';
|
|
314
|
+
* import { readFileSync } from 'fs';
|
|
315
|
+
*
|
|
316
|
+
* const buffer = readFileSync('document.pdf');
|
|
317
|
+
* const data = new Uint8Array(buffer);
|
|
318
|
+
* const result = extractBytesSync(data, 'application/pdf', null);
|
|
319
|
+
* console.log(result.content);
|
|
320
|
+
* ```
|
|
321
|
+
*/
|
|
322
|
+
export function extractBytesSync(data: Uint8Array, mime_type: string, config?: any | null): any;
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Extract content from a web File or Blob (asynchronous).
|
|
326
|
+
*
|
|
327
|
+
* Extracts content from a web File (from `<input type="file">`) or Blob object
|
|
328
|
+
* using the FileReader API. Only available in browser environments.
|
|
329
|
+
*
|
|
330
|
+
* # JavaScript Parameters
|
|
331
|
+
*
|
|
332
|
+
* * `file: File | Blob` - The file or blob to extract
|
|
333
|
+
* * `mimeType?: string` - Optional MIME type hint (auto-detected if omitted)
|
|
334
|
+
* * `config?: object` - Optional extraction configuration
|
|
335
|
+
*
|
|
336
|
+
* # Returns
|
|
337
|
+
*
|
|
338
|
+
* `Promise<object>` - Promise resolving to ExtractionResult
|
|
339
|
+
*
|
|
340
|
+
* # Throws
|
|
341
|
+
*
|
|
342
|
+
* Rejects if file cannot be read or is malformed.
|
|
343
|
+
*
|
|
344
|
+
* # Example
|
|
345
|
+
*
|
|
346
|
+
* ```javascript
|
|
347
|
+
* import { extractFile } from '@kreuzberg/wasm';
|
|
348
|
+
*
|
|
349
|
+
* // From file input
|
|
350
|
+
* const fileInput = document.getElementById('file-input');
|
|
351
|
+
* const file = fileInput.files[0];
|
|
352
|
+
*
|
|
353
|
+
* const result = await extractFile(file, null, null);
|
|
354
|
+
* console.log(`Extracted ${result.content.length} characters`);
|
|
355
|
+
* ```
|
|
356
|
+
*/
|
|
357
|
+
export function extractFile(file: File, mime_type?: string | null, config?: any | null): Promise<any>;
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Extract content from a file (synchronous) - NOT AVAILABLE IN WASM.
|
|
361
|
+
*
|
|
362
|
+
* File system operations are not available in WebAssembly environments.
|
|
363
|
+
* Use `extractBytesSync` or `extractBytes` instead.
|
|
364
|
+
*
|
|
365
|
+
* # Throws
|
|
366
|
+
*
|
|
367
|
+
* Always throws: "File operations are not available in WASM. Use extractBytesSync or extractBytes instead."
|
|
368
|
+
*/
|
|
369
|
+
export function extractFileSync(): any;
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Get file extensions for a given MIME type.
|
|
373
|
+
*
|
|
374
|
+
* Looks up all known file extensions that correspond to the specified MIME type.
|
|
375
|
+
* Returns a JavaScript Array of extension strings (without leading dots).
|
|
376
|
+
*
|
|
377
|
+
* # JavaScript Parameters
|
|
378
|
+
*
|
|
379
|
+
* * `mimeType: string` - The MIME type to look up (e.g., "application/pdf")
|
|
380
|
+
*
|
|
381
|
+
* # Returns
|
|
382
|
+
*
|
|
383
|
+
* `string[]` - Array of file extensions for the MIME type
|
|
384
|
+
*
|
|
385
|
+
* # Throws
|
|
386
|
+
*
|
|
387
|
+
* Throws an error if the MIME type is not recognized.
|
|
388
|
+
*
|
|
389
|
+
* # Example
|
|
390
|
+
*
|
|
391
|
+
* ```javascript
|
|
392
|
+
* import { getExtensionsForMime } from '@kreuzberg/wasm';
|
|
393
|
+
*
|
|
394
|
+
* const pdfExts = getExtensionsForMime('application/pdf');
|
|
395
|
+
* console.log(pdfExts); // ["pdf"]
|
|
396
|
+
*
|
|
397
|
+
* const jpegExts = getExtensionsForMime('image/jpeg');
|
|
398
|
+
* console.log(jpegExts); // ["jpg", "jpeg"]
|
|
399
|
+
* ```
|
|
400
|
+
*/
|
|
401
|
+
export function getExtensionsForMime(mime_type: string): Array<any>;
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Get MIME type from file extension.
|
|
405
|
+
*
|
|
406
|
+
* Looks up the MIME type associated with a given file extension.
|
|
407
|
+
* Returns None if the extension is not recognized.
|
|
408
|
+
*
|
|
409
|
+
* # JavaScript Parameters
|
|
410
|
+
*
|
|
411
|
+
* * `extension: string` - The file extension (with or without leading dot)
|
|
412
|
+
*
|
|
413
|
+
* # Returns
|
|
414
|
+
*
|
|
415
|
+
* `string | null` - The MIME type if found, null otherwise
|
|
416
|
+
*
|
|
417
|
+
* # Example
|
|
418
|
+
*
|
|
419
|
+
* ```javascript
|
|
420
|
+
* import { getMimeFromExtension } from '@kreuzberg/wasm';
|
|
421
|
+
*
|
|
422
|
+
* const pdfMime = getMimeFromExtension('pdf');
|
|
423
|
+
* console.log(pdfMime); // "application/pdf"
|
|
424
|
+
*
|
|
425
|
+
* const docMime = getMimeFromExtension('docx');
|
|
426
|
+
* console.log(docMime); // "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
427
|
+
*
|
|
428
|
+
* const unknownMime = getMimeFromExtension('unknown');
|
|
429
|
+
* console.log(unknownMime); // null
|
|
430
|
+
* ```
|
|
431
|
+
*/
|
|
432
|
+
export function getMimeFromExtension(extension: string): string | undefined;
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Get module information
|
|
436
|
+
*/
|
|
437
|
+
export function get_module_info(): ModuleInfo;
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* Initialize the WASM module
|
|
441
|
+
* This function should be called once at application startup
|
|
442
|
+
*/
|
|
443
|
+
export function init(): void;
|
|
444
|
+
|
|
445
|
+
export function initThreadPool(num_threads: number): Promise<any>;
|
|
446
|
+
|
|
447
|
+
/**
|
|
448
|
+
* Helper function to initialize the thread pool with error handling
|
|
449
|
+
* Accepts the number of threads to use for the thread pool.
|
|
450
|
+
* Returns true if initialization succeeded, false for graceful degradation.
|
|
451
|
+
*
|
|
452
|
+
* This function wraps init_thread_pool with panic handling to ensure graceful
|
|
453
|
+
* degradation if thread pool initialization fails. The application will continue
|
|
454
|
+
* to work in single-threaded mode if the thread pool cannot be initialized.
|
|
455
|
+
*/
|
|
456
|
+
export function init_thread_pool_safe(num_threads: number): boolean;
|
|
457
|
+
|
|
458
|
+
/**
|
|
459
|
+
* List all registered OCR backend names.
|
|
460
|
+
*
|
|
461
|
+
* # Returns
|
|
462
|
+
*
|
|
463
|
+
* Array of OCR backend names, or Err if an error occurs.
|
|
464
|
+
*
|
|
465
|
+
* # Example
|
|
466
|
+
*
|
|
467
|
+
* ```javascript
|
|
468
|
+
* const backends = listOcrBackends();
|
|
469
|
+
* console.log(backends); // ["tesseract", "custom-ocr", ...]
|
|
470
|
+
* ```
|
|
471
|
+
*/
|
|
472
|
+
export function list_ocr_backends(): Array<any>;
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* List all registered post-processor names.
|
|
476
|
+
*
|
|
477
|
+
* # Returns
|
|
478
|
+
*
|
|
479
|
+
* Array of post-processor names, or Err if an error occurs.
|
|
480
|
+
*
|
|
481
|
+
* # Example
|
|
482
|
+
*
|
|
483
|
+
* ```javascript
|
|
484
|
+
* const processors = listPostProcessors();
|
|
485
|
+
* console.log(processors); // ["my-post-processor", ...]
|
|
486
|
+
* ```
|
|
487
|
+
*/
|
|
488
|
+
export function list_post_processors(): Array<any>;
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* List all registered validator names.
|
|
492
|
+
*
|
|
493
|
+
* # Returns
|
|
494
|
+
*
|
|
495
|
+
* Array of validator names, or Err if an error occurs.
|
|
496
|
+
*
|
|
497
|
+
* # Example
|
|
498
|
+
*
|
|
499
|
+
* ```javascript
|
|
500
|
+
* const validators = listValidators();
|
|
501
|
+
* console.log(validators); // ["min-content-length", ...]
|
|
502
|
+
* ```
|
|
503
|
+
*/
|
|
504
|
+
export function list_validators(): Array<any>;
|
|
505
|
+
|
|
506
|
+
/**
|
|
507
|
+
* Load configuration from a string in the specified format.
|
|
508
|
+
*
|
|
509
|
+
* Parses configuration content from TOML, YAML, or JSON formats and returns
|
|
510
|
+
* a JavaScript object representing the ExtractionConfig. This is the primary
|
|
511
|
+
* way to load configuration in WebAssembly environments since file system
|
|
512
|
+
* access is not available.
|
|
513
|
+
*
|
|
514
|
+
* # JavaScript Parameters
|
|
515
|
+
*
|
|
516
|
+
* * `content: string` - The configuration content as a string
|
|
517
|
+
* * `format: string` - The format of the content: "toml", "yaml", or "json"
|
|
518
|
+
*
|
|
519
|
+
* # Returns
|
|
520
|
+
*
|
|
521
|
+
* `object` - JavaScript object representing the ExtractionConfig
|
|
522
|
+
*
|
|
523
|
+
* # Throws
|
|
524
|
+
*
|
|
525
|
+
* Throws an error if:
|
|
526
|
+
* - The content is invalid for the specified format
|
|
527
|
+
* - The format is not one of "toml", "yaml", or "json"
|
|
528
|
+
* - Required configuration fields are missing or invalid
|
|
529
|
+
*
|
|
530
|
+
* # Example
|
|
531
|
+
*
|
|
532
|
+
* ```javascript
|
|
533
|
+
* import { loadConfigFromString } from '@kreuzberg/wasm';
|
|
534
|
+
*
|
|
535
|
+
* // Load from TOML string
|
|
536
|
+
* const tomlConfig = `
|
|
537
|
+
* use_cache = true
|
|
538
|
+
* enable_quality_processing = true
|
|
539
|
+
* `;
|
|
540
|
+
* const config1 = loadConfigFromString(tomlConfig, 'toml');
|
|
541
|
+
* console.log(config1.use_cache); // true
|
|
542
|
+
*
|
|
543
|
+
* // Load from YAML string
|
|
544
|
+
* const yamlConfig = `
|
|
545
|
+
* use_cache: true
|
|
546
|
+
* enable_quality_processing: true
|
|
547
|
+
* `;
|
|
548
|
+
* const config2 = loadConfigFromString(yamlConfig, 'yaml');
|
|
549
|
+
*
|
|
550
|
+
* // Load from JSON string
|
|
551
|
+
* const jsonConfig = `{"use_cache": true, "enable_quality_processing": true}`;
|
|
552
|
+
* const config3 = loadConfigFromString(jsonConfig, 'json');
|
|
553
|
+
* ```
|
|
554
|
+
*/
|
|
555
|
+
export function loadConfigFromString(content: string, format: string): any;
|
|
556
|
+
|
|
557
|
+
/**
|
|
558
|
+
* Normalize a MIME type string.
|
|
559
|
+
*
|
|
560
|
+
* Normalizes a MIME type by converting to lowercase and removing parameters
|
|
561
|
+
* (e.g., "application/json; charset=utf-8" becomes "application/json").
|
|
562
|
+
* This is useful for consistent MIME type comparison.
|
|
563
|
+
*
|
|
564
|
+
* # JavaScript Parameters
|
|
565
|
+
*
|
|
566
|
+
* * `mimeType: string` - The MIME type string to normalize
|
|
567
|
+
*
|
|
568
|
+
* # Returns
|
|
569
|
+
*
|
|
570
|
+
* `string` - The normalized MIME type
|
|
571
|
+
*
|
|
572
|
+
* # Example
|
|
573
|
+
*
|
|
574
|
+
* ```javascript
|
|
575
|
+
* import { normalizeMimeType } from '@kreuzberg/wasm';
|
|
576
|
+
*
|
|
577
|
+
* const normalized1 = normalizeMimeType('Application/JSON');
|
|
578
|
+
* console.log(normalized1); // "application/json"
|
|
579
|
+
*
|
|
580
|
+
* const normalized2 = normalizeMimeType('text/html; charset=utf-8');
|
|
581
|
+
* console.log(normalized2); // "text/html"
|
|
582
|
+
*
|
|
583
|
+
* const normalized3 = normalizeMimeType('Text/Plain; charset=ISO-8859-1');
|
|
584
|
+
* console.log(normalized3); // "text/plain"
|
|
585
|
+
* ```
|
|
586
|
+
*/
|
|
587
|
+
export function normalizeMimeType(mime_type: string): string;
|
|
588
|
+
|
|
589
|
+
/**
|
|
590
|
+
* Register a custom OCR backend.
|
|
591
|
+
*
|
|
592
|
+
* # Arguments
|
|
593
|
+
*
|
|
594
|
+
* * `backend` - JavaScript object implementing the OcrBackendProtocol interface:
|
|
595
|
+
* - `name(): string` - Unique backend name
|
|
596
|
+
* - `supportedLanguages(): string[]` - Array of language codes the backend supports
|
|
597
|
+
* - `processImage(imageBase64: string, language: string): Promise<string>` - Process image and return JSON result
|
|
598
|
+
*
|
|
599
|
+
* # Returns
|
|
600
|
+
*
|
|
601
|
+
* Ok if registration succeeds, Err with description if it fails.
|
|
602
|
+
*
|
|
603
|
+
* # Example
|
|
604
|
+
*
|
|
605
|
+
* ```javascript
|
|
606
|
+
* registerOcrBackend({
|
|
607
|
+
* name: () => "custom-ocr",
|
|
608
|
+
* supportedLanguages: () => ["en", "es", "fr"],
|
|
609
|
+
* processImage: async (imageBase64, language) => {
|
|
610
|
+
* const buffer = Buffer.from(imageBase64, "base64");
|
|
611
|
+
* // Process image with custom OCR engine
|
|
612
|
+
* const text = await customOcrEngine.recognize(buffer, language);
|
|
613
|
+
* return JSON.stringify({
|
|
614
|
+
* content: text,
|
|
615
|
+
* mime_type: "text/plain",
|
|
616
|
+
* metadata: {}
|
|
617
|
+
* });
|
|
618
|
+
* }
|
|
619
|
+
* });
|
|
620
|
+
* ```
|
|
621
|
+
*/
|
|
622
|
+
export function register_ocr_backend(backend: any): void;
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Register a custom post-processor.
|
|
626
|
+
*
|
|
627
|
+
* # Arguments
|
|
628
|
+
*
|
|
629
|
+
* * `processor` - JavaScript object implementing the PostProcessorProtocol interface:
|
|
630
|
+
* - `name(): string` - Unique processor name
|
|
631
|
+
* - `process(jsonString: string): Promise<string>` - Process function that takes JSON input
|
|
632
|
+
* - `processingStage(): "early" | "middle" | "late"` - Optional processing stage (defaults to "middle")
|
|
633
|
+
*
|
|
634
|
+
* # Returns
|
|
635
|
+
*
|
|
636
|
+
* Ok if registration succeeds, Err with description if it fails.
|
|
637
|
+
*
|
|
638
|
+
* # Example
|
|
639
|
+
*
|
|
640
|
+
* ```javascript
|
|
641
|
+
* registerPostProcessor({
|
|
642
|
+
* name: () => "my-post-processor",
|
|
643
|
+
* processingStage: () => "middle",
|
|
644
|
+
* process: async (jsonString) => {
|
|
645
|
+
* const result = JSON.parse(jsonString);
|
|
646
|
+
* // Process the extraction result
|
|
647
|
+
* result.metadata.processed_by = "my-post-processor";
|
|
648
|
+
* return JSON.stringify(result);
|
|
649
|
+
* }
|
|
650
|
+
* });
|
|
651
|
+
* ```
|
|
652
|
+
*/
|
|
653
|
+
export function register_post_processor(processor: any): void;
|
|
654
|
+
|
|
655
|
+
/**
|
|
656
|
+
* Register a custom validator.
|
|
657
|
+
*
|
|
658
|
+
* # Arguments
|
|
659
|
+
*
|
|
660
|
+
* * `validator` - JavaScript object implementing the ValidatorProtocol interface:
|
|
661
|
+
* - `name(): string` - Unique validator name
|
|
662
|
+
* - `validate(jsonString: string): Promise<string>` - Validation function returning empty string on success, error message on failure
|
|
663
|
+
* - `priority(): number` - Optional priority (defaults to 50, higher runs first)
|
|
664
|
+
*
|
|
665
|
+
* # Returns
|
|
666
|
+
*
|
|
667
|
+
* Ok if registration succeeds, Err with description if it fails.
|
|
668
|
+
*
|
|
669
|
+
* # Example
|
|
670
|
+
*
|
|
671
|
+
* ```javascript
|
|
672
|
+
* registerValidator({
|
|
673
|
+
* name: () => "min-content-length",
|
|
674
|
+
* priority: () => 100,
|
|
675
|
+
* validate: async (jsonString) => {
|
|
676
|
+
* const result = JSON.parse(jsonString);
|
|
677
|
+
* if (result.content.length < 100) {
|
|
678
|
+
* return "Content too short"; // Validation failure
|
|
679
|
+
* }
|
|
680
|
+
* return ""; // Success
|
|
681
|
+
* }
|
|
682
|
+
* });
|
|
683
|
+
* ```
|
|
684
|
+
*/
|
|
685
|
+
export function register_validator(validator: any): void;
|
|
686
|
+
|
|
687
|
+
/**
|
|
688
|
+
* Unregister an OCR backend by name.
|
|
689
|
+
*
|
|
690
|
+
* # Arguments
|
|
691
|
+
*
|
|
692
|
+
* * `name` - Name of the OCR backend to unregister
|
|
693
|
+
*
|
|
694
|
+
* # Returns
|
|
695
|
+
*
|
|
696
|
+
* Ok if unregistration succeeds, Err if the backend is not found or other error occurs.
|
|
697
|
+
*
|
|
698
|
+
* # Example
|
|
699
|
+
*
|
|
700
|
+
* ```javascript
|
|
701
|
+
* unregisterOcrBackend("custom-ocr");
|
|
702
|
+
* ```
|
|
703
|
+
*/
|
|
704
|
+
export function unregister_ocr_backend(name: string): void;
|
|
705
|
+
|
|
706
|
+
/**
|
|
707
|
+
* Unregister a post-processor by name.
|
|
708
|
+
*
|
|
709
|
+
* # Arguments
|
|
710
|
+
*
|
|
711
|
+
* * `name` - Name of the post-processor to unregister
|
|
712
|
+
*
|
|
713
|
+
* # Returns
|
|
714
|
+
*
|
|
715
|
+
* Ok if unregistration succeeds, Err if the processor is not found or other error occurs.
|
|
716
|
+
*
|
|
717
|
+
* # Example
|
|
718
|
+
*
|
|
719
|
+
* ```javascript
|
|
720
|
+
* unregisterPostProcessor("my-post-processor");
|
|
721
|
+
* ```
|
|
722
|
+
*/
|
|
723
|
+
export function unregister_post_processor(name: string): void;
|
|
724
|
+
|
|
725
|
+
/**
|
|
726
|
+
* Unregister a validator by name.
|
|
727
|
+
*
|
|
728
|
+
* # Arguments
|
|
729
|
+
*
|
|
730
|
+
* * `name` - Name of the validator to unregister
|
|
731
|
+
*
|
|
732
|
+
* # Returns
|
|
733
|
+
*
|
|
734
|
+
* Ok if unregistration succeeds, Err if the validator is not found or other error occurs.
|
|
735
|
+
*
|
|
736
|
+
* # Example
|
|
737
|
+
*
|
|
738
|
+
* ```javascript
|
|
739
|
+
* unregisterValidator("min-content-length");
|
|
740
|
+
* ```
|
|
741
|
+
*/
|
|
742
|
+
export function unregister_validator(name: string): void;
|
|
743
|
+
|
|
744
|
+
/**
|
|
745
|
+
* Version of the kreuzberg-wasm binding
|
|
746
|
+
*/
|
|
747
|
+
export function version(): string;
|
|
748
|
+
|
|
749
|
+
export class wbg_rayon_PoolBuilder {
|
|
750
|
+
private constructor();
|
|
751
|
+
free(): void;
|
|
752
|
+
[Symbol.dispose](): void;
|
|
753
|
+
numThreads(): number;
|
|
754
|
+
build(): void;
|
|
755
|
+
receiver(): number;
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
export function wbg_rayon_start_worker(receiver: number): void;
|