@kreuzberg/wasm 4.0.0-rc.21 → 4.0.0-rc.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +520 -837
  2. package/dist/adapters/wasm-adapter.d.ts +7 -10
  3. package/dist/adapters/wasm-adapter.d.ts.map +1 -0
  4. package/dist/adapters/wasm-adapter.js +41 -19
  5. package/dist/adapters/wasm-adapter.js.map +1 -1
  6. package/dist/index.d.ts +23 -24
  7. package/dist/index.d.ts.map +1 -0
  8. package/dist/index.js +240 -67
  9. package/dist/index.js.map +1 -1
  10. package/dist/ocr/registry.d.ts +7 -10
  11. package/dist/ocr/registry.d.ts.map +1 -0
  12. package/dist/ocr/registry.js.map +1 -1
  13. package/dist/ocr/tesseract-wasm-backend.d.ts +3 -6
  14. package/dist/ocr/tesseract-wasm-backend.d.ts.map +1 -0
  15. package/dist/ocr/tesseract-wasm-backend.js +0 -46
  16. package/dist/ocr/tesseract-wasm-backend.js.map +1 -1
  17. package/dist/pdfium.js +0 -5
  18. package/dist/plugin-registry.d.ts +246 -0
  19. package/dist/plugin-registry.d.ts.map +1 -0
  20. package/dist/runtime.d.ts +21 -22
  21. package/dist/runtime.d.ts.map +1 -0
  22. package/dist/runtime.js +0 -1
  23. package/dist/runtime.js.map +1 -1
  24. package/dist/{types-CKjcIYcX.d.ts → types.d.ts} +91 -22
  25. package/dist/types.d.ts.map +1 -0
  26. package/package.json +119 -162
  27. package/dist/adapters/wasm-adapter.cjs +0 -245
  28. package/dist/adapters/wasm-adapter.cjs.map +0 -1
  29. package/dist/adapters/wasm-adapter.d.cts +0 -121
  30. package/dist/index.cjs +0 -1245
  31. package/dist/index.cjs.map +0 -1
  32. package/dist/index.d.cts +0 -423
  33. package/dist/ocr/registry.cjs +0 -92
  34. package/dist/ocr/registry.cjs.map +0 -1
  35. package/dist/ocr/registry.d.cts +0 -102
  36. package/dist/ocr/tesseract-wasm-backend.cjs +0 -456
  37. package/dist/ocr/tesseract-wasm-backend.cjs.map +0 -1
  38. package/dist/ocr/tesseract-wasm-backend.d.cts +0 -257
  39. package/dist/runtime.cjs +0 -174
  40. package/dist/runtime.cjs.map +0 -1
  41. package/dist/runtime.d.cts +0 -256
  42. package/dist/types-CKjcIYcX.d.cts +0 -294
package/dist/index.d.cts DELETED
@@ -1,423 +0,0 @@
1
- import { E as ExtractionConfig, a as ExtractionResult } from './types-CKjcIYcX.cjs.js';
2
- export { C as Chunk, b as ChunkingConfig, c as ChunkMetadata, d as ExtractedImage, I as ImageExtractionConfig, L as LanguageDetectionConfig, M as Metadata, O as OcrBackendProtocol, e as OcrConfig, P as PageContent, f as PageExtractionConfig, g as PdfConfig, h as PostProcessorConfig, T as Table, i as TesseractConfig, j as TokenReductionConfig, E as ExtractionConfig, a as ExtractionResult } from './types-CKjcIYcX.cjs.js';
3
- export { configToJS, fileToUint8Array, isValidExtractionResult, jsToExtractionResult, wrapWasmError } from './adapters/wasm-adapter.cjs';
4
- export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend } from './ocr/registry.cjs';
5
- export { TesseractWasmBackend } from './ocr/tesseract-wasm-backend.cjs';
6
- export { type RuntimeType, type WasmCapabilities, detectRuntime, getRuntimeInfo, getRuntimeVersion, getWasmCapabilities, hasBigInt, hasBlob, hasFileApi, hasModuleWorkers, hasSharedArrayBuffer, hasWasm, hasWasmStreaming, hasWorkers, isBrowser, isBun, isDeno, isNode, isServerEnvironment, isWebEnvironment } from './runtime.d.cts';
7
-
8
- /**
9
- * Kreuzberg - WebAssembly Bindings for Browser and Runtime Environments
10
- *
11
- * This module provides WebAssembly bindings for Kreuzberg document intelligence,
12
- * enabling high-performance document extraction in browser and JavaScript runtime environments.
13
- *
14
- * ## Features
15
- *
16
- * - Extract text, metadata, and tables from documents
17
- * - Support for multiple document formats (PDF, Office, images, etc.)
18
- * - Browser and runtime-compatible WASM bindings
19
- * - Type-safe TypeScript interfaces
20
- * - Runtime detection and feature capability checking
21
- * - Automatic type conversion and error handling
22
- *
23
- * ## Installation
24
- *
25
- * ```bash
26
- * npm install @kreuzberg/wasm
27
- * ```
28
- *
29
- * ## Basic Usage
30
- *
31
- * ```typescript
32
- * import { extractBytes, initWasm } from '@kreuzberg/wasm';
33
- *
34
- * // Initialize WASM module once at app startup
35
- * await initWasm();
36
- *
37
- * // Extract from bytes
38
- * const bytes = new Uint8Array(buffer);
39
- * const result = await extractBytes(bytes, 'application/pdf');
40
- * console.log(result.content);
41
- * ```
42
- *
43
- * ## Browser Usage with File Input
44
- *
45
- * ```typescript
46
- * import { extractBytes, initWasm } from '@kreuzberg/wasm';
47
- * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';
48
- *
49
- * // Initialize once at app startup
50
- * await initWasm();
51
- *
52
- * // Handle file input
53
- * const fileInput = document.getElementById('file');
54
- * fileInput.addEventListener('change', async (e) => {
55
- * const file = e.target.files?.[0];
56
- * if (file) {
57
- * const bytes = await fileToUint8Array(file);
58
- * const result = await extractBytes(bytes, file.type);
59
- * console.log(result.content);
60
- * }
61
- * });
62
- * ```
63
- *
64
- * ## Runtime Detection
65
- *
66
- * ```typescript
67
- * import { detectRuntime, getWasmCapabilities } from '@kreuzberg/wasm/runtime';
68
- *
69
- * const runtime = detectRuntime();
70
- * const caps = getWasmCapabilities();
71
- *
72
- * if (caps.hasWorkers) {
73
- * // Can use Web Workers for parallel processing
74
- * }
75
- * ```
76
- *
77
- * ## Configuration
78
- *
79
- * ```typescript
80
- * import { extractBytes, initWasm } from '@kreuzberg/wasm';
81
- * import type { ExtractionConfig } from '@kreuzberg/wasm';
82
- *
83
- * await initWasm();
84
- *
85
- * const config: ExtractionConfig = {
86
- * ocr: {
87
- * backend: 'tesseract',
88
- * language: 'eng'
89
- * },
90
- * chunking: {
91
- * maxChars: 1000,
92
- * chunkOverlap: 100
93
- * },
94
- * images: {
95
- * extractImages: true,
96
- * targetDpi: 150
97
- * }
98
- * };
99
- *
100
- * const result = await extractBytes(bytes, 'application/pdf', config);
101
- * ```
102
- */
103
-
104
- declare function initWasm(): Promise<void>;
105
- /**
106
- * Check if WASM module is initialized
107
- *
108
- * @returns True if WASM module is initialized, false otherwise
109
- *
110
- * @example
111
- * ```typescript
112
- * if (!isInitialized()) {
113
- * await initWasm();
114
- * }
115
- * ```
116
- */
117
- declare function isInitialized(): boolean;
118
- /**
119
- * Get WASM module version
120
- *
121
- * @throws {Error} If WASM module is not initialized
122
- * @returns The version string of the WASM module
123
- *
124
- * @example
125
- * ```typescript
126
- * const version = getVersion();
127
- * console.log(`Using Kreuzberg ${version}`);
128
- * ```
129
- */
130
- declare function getVersion(): string;
131
- /**
132
- * Get initialization error if module failed to load
133
- *
134
- * @returns The error that occurred during initialization, or null if no error
135
- *
136
- * @internal
137
- */
138
- declare function getInitializationError(): Error | null;
139
- /**
140
- * Extract content from bytes (document data)
141
- *
142
- * Extracts text, metadata, tables, images, and other content from document bytes.
143
- * Automatically detects document type from MIME type and applies appropriate extraction logic.
144
- *
145
- * @param data - The document bytes to extract from
146
- * @param mimeType - MIME type of the document (e.g., 'application/pdf', 'image/jpeg')
147
- * @param config - Optional extraction configuration
148
- * @returns Promise resolving to the extraction result
149
- * @throws {Error} If WASM module is not initialized or extraction fails
150
- *
151
- * @example Extract PDF
152
- * ```typescript
153
- * const bytes = new Uint8Array(buffer);
154
- * const result = await extractBytes(bytes, 'application/pdf');
155
- * console.log(result.content);
156
- * console.log(result.tables);
157
- * ```
158
- *
159
- * @example Extract with Configuration
160
- * ```typescript
161
- * const result = await extractBytes(bytes, 'application/pdf', {
162
- * ocr: {
163
- * backend: 'tesseract',
164
- * language: 'deu' // German
165
- * },
166
- * images: {
167
- * extractImages: true,
168
- * targetDpi: 200
169
- * }
170
- * });
171
- * ```
172
- *
173
- * @example Extract from File
174
- * ```typescript
175
- * const file = inputEvent.target.files[0];
176
- * const bytes = await fileToUint8Array(file);
177
- * const result = await extractBytes(bytes, file.type);
178
- * ```
179
- */
180
- declare function extractBytes(data: Uint8Array, mimeType: string, config?: ExtractionConfig | null): Promise<ExtractionResult>;
181
- /**
182
- * Extract content from a file on the file system
183
- *
184
- * Node.js and Deno specific function that reads a file from the file system
185
- * and extracts content from it. Automatically detects MIME type if not provided.
186
- *
187
- * @param path - Path to the file to extract from
188
- * @param mimeType - Optional MIME type of the file. If not provided, will attempt to detect
189
- * @param config - Optional extraction configuration
190
- * @returns Promise resolving to the extraction result
191
- * @throws {Error} If WASM module is not initialized, file doesn't exist, or extraction fails
192
- *
193
- * @example Extract with auto-detection
194
- * ```typescript
195
- * const result = await extractFile('./document.pdf');
196
- * console.log(result.content);
197
- * ```
198
- *
199
- * @example Extract with explicit MIME type
200
- * ```typescript
201
- * const result = await extractFile('./document.docx', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
202
- * ```
203
- *
204
- * @example Extract from Node.js with config
205
- * ```typescript
206
- * import { extractFile } from '@kreuzberg/wasm';
207
- * import { readFile } from 'fs/promises';
208
- *
209
- * const result = await extractFile('./report.xlsx', null, {
210
- * chunking: {
211
- * maxChars: 1000
212
- * }
213
- * });
214
- * ```
215
- */
216
- declare function extractFile(path: string, mimeType?: string | null, config?: ExtractionConfig | null): Promise<ExtractionResult>;
217
- /**
218
- * Extract content from a File or Blob (browser-friendly wrapper)
219
- *
220
- * Convenience function that wraps fileToUint8Array and extractBytes,
221
- * providing a streamlined API for browser applications handling file inputs.
222
- *
223
- * @param file - The File or Blob to extract from
224
- * @param mimeType - Optional MIME type. If not provided, uses file.type if available
225
- * @param config - Optional extraction configuration
226
- * @returns Promise resolving to the extraction result
227
- * @throws {Error} If WASM module is not initialized or extraction fails
228
- *
229
- * @example Simple file extraction
230
- * ```typescript
231
- * const fileInput = document.getElementById('file');
232
- * fileInput.addEventListener('change', async (e) => {
233
- * const file = e.target.files?.[0];
234
- * if (file) {
235
- * const result = await extractFromFile(file);
236
- * console.log(result.content);
237
- * }
238
- * });
239
- * ```
240
- *
241
- * @example With configuration
242
- * ```typescript
243
- * const result = await extractFromFile(file, file.type, {
244
- * chunking: { maxChars: 1000 },
245
- * images: { extractImages: true }
246
- * });
247
- * ```
248
- */
249
- declare function extractFromFile(file: File | Blob, mimeType?: string | null, config?: ExtractionConfig | null): Promise<ExtractionResult>;
250
- /**
251
- * Extract content from bytes synchronously
252
- *
253
- * Synchronous version of extractBytes. Performs extraction without async operations.
254
- * Note: Some extraction features may still be async internally, but the wrapper is synchronous.
255
- *
256
- * @param data - The document bytes to extract from
257
- * @param mimeType - MIME type of the document
258
- * @param config - Optional extraction configuration
259
- * @returns The extraction result
260
- * @throws {Error} If WASM module is not initialized or extraction fails
261
- *
262
- * @example
263
- * ```typescript
264
- * const bytes = new Uint8Array(buffer);
265
- * const result = extractBytesSync(bytes, 'application/pdf');
266
- * console.log(result.content);
267
- * ```
268
- */
269
- declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: ExtractionConfig | null): ExtractionResult;
270
- /**
271
- * Batch extract content from multiple byte arrays asynchronously
272
- *
273
- * Extracts content from multiple documents in a single batch operation,
274
- * allowing for more efficient processing of multiple files.
275
- *
276
- * @param files - Array of objects containing data (Uint8Array) and mimeType (string)
277
- * @param config - Optional extraction configuration applied to all files
278
- * @returns Promise resolving to array of extraction results
279
- * @throws {Error} If WASM module is not initialized or extraction fails
280
- *
281
- * @example
282
- * ```typescript
283
- * const files = [
284
- * { data: pdfBytes, mimeType: 'application/pdf' },
285
- * { data: docxBytes, mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }
286
- * ];
287
- * const results = await batchExtractBytes(files);
288
- * results.forEach((result) => console.log(result.content));
289
- * ```
290
- */
291
- declare function batchExtractBytes(files: Array<{
292
- data: Uint8Array;
293
- mimeType: string;
294
- }>, config?: ExtractionConfig | null): Promise<ExtractionResult[]>;
295
- /**
296
- * Batch extract content from multiple byte arrays synchronously
297
- *
298
- * Synchronous version of batchExtractBytes. Extracts content from multiple documents
299
- * in a single batch operation without async operations.
300
- *
301
- * @param files - Array of objects containing data (Uint8Array) and mimeType (string)
302
- * @param config - Optional extraction configuration applied to all files
303
- * @returns Array of extraction results
304
- * @throws {Error} If WASM module is not initialized or extraction fails
305
- *
306
- * @example
307
- * ```typescript
308
- * const files = [
309
- * { data: pdfBytes, mimeType: 'application/pdf' },
310
- * { data: docxBytes, mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }
311
- * ];
312
- * const results = batchExtractBytesSync(files);
313
- * results.forEach((result) => console.log(result.content));
314
- * ```
315
- */
316
- declare function batchExtractBytesSync(files: Array<{
317
- data: Uint8Array;
318
- mimeType: string;
319
- }>, config?: ExtractionConfig | null): ExtractionResult[];
320
- /**
321
- * Batch extract content from multiple File objects asynchronously
322
- *
323
- * Convenience function that converts File objects to Uint8Array and calls batchExtractBytes.
324
- * Automatically uses the file.type as MIME type if available.
325
- *
326
- * @param files - Array of File objects to extract from
327
- * @param config - Optional extraction configuration applied to all files
328
- * @returns Promise resolving to array of extraction results
329
- * @throws {Error} If WASM module is not initialized, files cannot be read, or extraction fails
330
- *
331
- * @example
332
- * ```typescript
333
- * const fileInput = document.getElementById('files');
334
- * const files = Array.from(fileInput.files ?? []);
335
- * const results = await batchExtractFiles(files);
336
- * results.forEach((result, index) => {
337
- * console.log(`File ${index}: ${result.content.substring(0, 50)}...`);
338
- * });
339
- * ```
340
- */
341
- declare function batchExtractFiles(files: File[], config?: ExtractionConfig | null): Promise<ExtractionResult[]>;
342
- /**
343
- * Enable OCR functionality with tesseract-wasm backend
344
- *
345
- * Convenience function that automatically initializes and registers the Tesseract WASM backend.
346
- * This is the recommended approach for enabling OCR in WASM-based applications.
347
- *
348
- * ## Browser Requirement
349
- *
350
- * This function requires a browser environment with support for:
351
- * - WebWorkers (for Tesseract processing)
352
- * - createImageBitmap (for image conversion)
353
- * - Blob API
354
- *
355
- * ## Network Requirement
356
- *
357
- * Training data will be loaded from jsDelivr CDN on first use of each language.
358
- * Ensure network access to cdn.jsdelivr.net is available.
359
- *
360
- * @throws {Error} If not in browser environment or tesseract-wasm is not available
361
- *
362
- * @example Basic Usage
363
- * ```typescript
364
- * import { enableOcr, extractBytes, initWasm } from '@kreuzberg/wasm';
365
- *
366
- * async function main() {
367
- * // Initialize WASM module
368
- * await initWasm();
369
- *
370
- * // Enable OCR with tesseract-wasm
371
- * await enableOcr();
372
- *
373
- * // Now you can use OCR in extraction
374
- * const imageBytes = new Uint8Array(buffer);
375
- * const result = await extractBytes(imageBytes, 'image/png', {
376
- * ocr: { backend: 'tesseract-wasm', language: 'eng' }
377
- * });
378
- *
379
- * console.log(result.content); // Extracted text
380
- * }
381
- *
382
- * main().catch(console.error);
383
- * ```
384
- *
385
- * @example With Progress Tracking
386
- * ```typescript
387
- * import { enableOcr, TesseractWasmBackend } from '@kreuzberg/wasm';
388
- *
389
- * async function setupOcrWithProgress() {
390
- * const backend = new TesseractWasmBackend();
391
- * backend.setProgressCallback((progress) => {
392
- * console.log(`OCR Progress: ${progress}%`);
393
- * updateProgressBar(progress);
394
- * });
395
- *
396
- * await backend.initialize();
397
- * registerOcrBackend(backend);
398
- * }
399
- *
400
- * setupOcrWithProgress().catch(console.error);
401
- * ```
402
- *
403
- * @example Multiple Languages
404
- * ```typescript
405
- * import { enableOcr, extractBytes, initWasm } from '@kreuzberg/wasm';
406
- *
407
- * await initWasm();
408
- * await enableOcr();
409
- *
410
- * // Extract English text
411
- * const englishResult = await extractBytes(engImageBytes, 'image/png', {
412
- * ocr: { backend: 'tesseract-wasm', language: 'eng' }
413
- * });
414
- *
415
- * // Extract German text - model is cached after first use
416
- * const germanResult = await extractBytes(deImageBytes, 'image/png', {
417
- * ocr: { backend: 'tesseract-wasm', language: 'deu' }
418
- * });
419
- * ```
420
- */
421
- declare function enableOcr(): Promise<void>;
422
-
423
- export { batchExtractBytes, batchExtractBytesSync, batchExtractFiles, enableOcr, extractBytes, extractBytesSync, extractFile, extractFromFile, getInitializationError, getVersion, initWasm, isInitialized };
@@ -1,92 +0,0 @@
1
- "use strict";
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __hasOwnProp = Object.prototype.hasOwnProperty;
6
- var __export = (target, all) => {
7
- for (var name in all)
8
- __defProp(target, name, { get: all[name], enumerable: true });
9
- };
10
- var __copyProps = (to, from, except, desc) => {
11
- if (from && typeof from === "object" || typeof from === "function") {
12
- for (let key of __getOwnPropNames(from))
13
- if (!__hasOwnProp.call(to, key) && key !== except)
14
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
- }
16
- return to;
17
- };
18
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
-
20
- // typescript/ocr/registry.ts
21
- var registry_exports = {};
22
- __export(registry_exports, {
23
- clearOcrBackends: () => clearOcrBackends,
24
- getOcrBackend: () => getOcrBackend,
25
- listOcrBackends: () => listOcrBackends,
26
- registerOcrBackend: () => registerOcrBackend,
27
- unregisterOcrBackend: () => unregisterOcrBackend
28
- });
29
- module.exports = __toCommonJS(registry_exports);
30
- var ocrBackendRegistry = /* @__PURE__ */ new Map();
31
- function registerOcrBackend(backend) {
32
- if (!backend) {
33
- throw new Error("Backend cannot be null or undefined");
34
- }
35
- if (typeof backend.name !== "function") {
36
- throw new Error("Backend must implement name() method");
37
- }
38
- if (typeof backend.supportedLanguages !== "function") {
39
- throw new Error("Backend must implement supportedLanguages() method");
40
- }
41
- if (typeof backend.processImage !== "function") {
42
- throw new Error("Backend must implement processImage() method");
43
- }
44
- const backendName = backend.name();
45
- if (!backendName || typeof backendName !== "string") {
46
- throw new Error("Backend name must be a non-empty string");
47
- }
48
- if (ocrBackendRegistry.has(backendName)) {
49
- console.warn(`OCR backend "${backendName}" is already registered and will be replaced`);
50
- }
51
- ocrBackendRegistry.set(backendName, backend);
52
- }
53
- function getOcrBackend(name) {
54
- return ocrBackendRegistry.get(name);
55
- }
56
- function listOcrBackends() {
57
- return Array.from(ocrBackendRegistry.keys());
58
- }
59
- async function unregisterOcrBackend(name) {
60
- const backend = ocrBackendRegistry.get(name);
61
- if (!backend) {
62
- throw new Error(
63
- `OCR backend "${name}" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(", ")}`
64
- );
65
- }
66
- if (typeof backend.shutdown === "function") {
67
- try {
68
- await backend.shutdown();
69
- } catch (error) {
70
- console.warn(
71
- `Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
72
- );
73
- }
74
- }
75
- ocrBackendRegistry.delete(name);
76
- }
77
- async function clearOcrBackends() {
78
- const backends = Array.from(ocrBackendRegistry.entries());
79
- for (const [name, backend] of backends) {
80
- if (typeof backend.shutdown === "function") {
81
- try {
82
- await backend.shutdown();
83
- } catch (error) {
84
- console.warn(
85
- `Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
86
- );
87
- }
88
- }
89
- }
90
- ocrBackendRegistry.clear();
91
- }
92
- //# sourceMappingURL=registry.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../typescript/ocr/registry.ts"],"sourcesContent":["/**\n * OCR Backend Registry\n *\n * Provides a registry for OCR backends in the WASM environment.\n * This enables auto-registration and management of OCR backends.\n *\n * Note: The WASM package provides a lightweight registry in the browser.\n * For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { enableOcr } from '@kreuzberg/wasm';\n *\n * // Simple auto-registration\n * await enableOcr();\n * ```\n */\n\nimport type { OcrBackendProtocol } from \"../types.js\";\n\n/** Global registry of OCR backends */\nconst ocrBackendRegistry = new Map<string, OcrBackendProtocol>();\n\n/**\n * Register an OCR backend\n *\n * Registers an OCR backend with the WASM extraction pipeline.\n * If a backend with the same name is already registered, it will be replaced.\n *\n * @param backend - OCR backend implementing OcrBackendProtocol\n * @throws {Error} If backend validation fails\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = new TesseractWasmBackend();\n * await backend.initialize();\n * registerOcrBackend(backend);\n * ```\n */\nexport function registerOcrBackend(backend: OcrBackendProtocol): void {\n\t// Validate backend\n\tif (!backend) {\n\t\tthrow new Error(\"Backend cannot be null or undefined\");\n\t}\n\n\tif (typeof backend.name !== \"function\") {\n\t\tthrow new Error(\"Backend must implement name() method\");\n\t}\n\n\tif (typeof backend.supportedLanguages !== \"function\") {\n\t\tthrow new Error(\"Backend must implement supportedLanguages() method\");\n\t}\n\n\tif (typeof backend.processImage !== \"function\") {\n\t\tthrow new Error(\"Backend must implement processImage() method\");\n\t}\n\n\tconst backendName = backend.name();\n\n\tif (!backendName || typeof backendName !== \"string\") {\n\t\tthrow new Error(\"Backend name must be a non-empty string\");\n\t}\n\n\t// Check for duplicate registration (allow overwriting with warning)\n\tif (ocrBackendRegistry.has(backendName)) {\n\t\tconsole.warn(`OCR backend \"${backendName}\" is already registered and will be replaced`);\n\t}\n\n\t// Register the backend\n\tocrBackendRegistry.set(backendName, backend);\n}\n\n/**\n * Get a registered OCR backend by name\n *\n * @param name - Backend name\n * @returns The OCR backend or undefined if not found\n *\n * @example\n * ```typescript\n * import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = getOcrBackend('tesseract-wasm');\n * if (backend) {\n * console.log('Available languages:', backend.supportedLanguages());\n * }\n * ```\n */\nexport function getOcrBackend(name: string): OcrBackendProtocol | undefined {\n\treturn ocrBackendRegistry.get(name);\n}\n\n/**\n * List all registered OCR backends\n *\n * @returns Array of registered backend names\n *\n * @example\n * ```typescript\n * import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backends = listOcrBackends();\n * console.log('Available OCR backends:', backends);\n * ```\n */\nexport function listOcrBackends(): string[] {\n\treturn Array.from(ocrBackendRegistry.keys());\n}\n\n/**\n * Unregister an OCR backend\n *\n * @param name - Backend name to unregister\n * @throws {Error} If backend is not found\n *\n * @example\n * ```typescript\n * import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * unregisterOcrBackend('tesseract-wasm');\n * ```\n */\nexport async function unregisterOcrBackend(name: string): Promise<void> {\n\tconst backend = ocrBackendRegistry.get(name);\n\n\tif (!backend) {\n\t\tthrow new Error(\n\t\t\t`OCR backend \"${name}\" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(\", \")}`,\n\t\t);\n\t}\n\n\t// Call shutdown if available\n\tif (typeof backend.shutdown === \"function\") {\n\t\ttry {\n\t\t\tawait backend.shutdown();\n\t\t} catch (error) {\n\t\t\tconsole.warn(\n\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t);\n\t\t}\n\t}\n\n\tocrBackendRegistry.delete(name);\n}\n\n/**\n * Clear all registered OCR backends\n *\n * Unregisters all OCR backends and calls their shutdown methods.\n *\n * @example\n * ```typescript\n * import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * // Clean up all backends when shutting down\n * await clearOcrBackends();\n * ```\n */\nexport async function clearOcrBackends(): Promise<void> {\n\tconst backends = Array.from(ocrBackendRegistry.entries());\n\n\tfor (const [name, backend] of backends) {\n\t\tif (typeof backend.shutdown === \"function\") {\n\t\t\ttry {\n\t\t\t\tawait backend.shutdown();\n\t\t\t} catch (error) {\n\t\t\t\tconsole.warn(\n\t\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t\t);\n\t\t\t}\n\t\t}\n\t}\n\n\tocrBackendRegistry.clear();\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAsBA,IAAM,qBAAqB,oBAAI,IAAgC;AAqBxD,SAAS,mBAAmB,SAAmC;AAErE,MAAI,CAAC,SAAS;AACb,UAAM,IAAI,MAAM,qCAAqC;AAAA,EACtD;AAEA,MAAI,OAAO,QAAQ,SAAS,YAAY;AACvC,UAAM,IAAI,MAAM,sCAAsC;AAAA,EACvD;AAEA,MAAI,OAAO,QAAQ,uBAAuB,YAAY;AACrD,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACrE;AAEA,MAAI,OAAO,QAAQ,iBAAiB,YAAY;AAC/C,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAC/D;AAEA,QAAM,cAAc,QAAQ,KAAK;AAEjC,MAAI,CAAC,eAAe,OAAO,gBAAgB,UAAU;AACpD,UAAM,IAAI,MAAM,yCAAyC;AAAA,EAC1D;AAGA,MAAI,mBAAmB,IAAI,WAAW,GAAG;AACxC,YAAQ,KAAK,gBAAgB,WAAW,8CAA8C;AAAA,EACvF;AAGA,qBAAmB,IAAI,aAAa,OAAO;AAC5C;AAkBO,SAAS,cAAc,MAA8C;AAC3E,SAAO,mBAAmB,IAAI,IAAI;AACnC;AAeO,SAAS,kBAA4B;AAC3C,SAAO,MAAM,KAAK,mBAAmB,KAAK,CAAC;AAC5C;AAeA,eAAsB,qBAAqB,MAA6B;AACvE,QAAM,UAAU,mBAAmB,IAAI,IAAI;AAE3C,MAAI,CAAC,SAAS;AACb,UAAM,IAAI;AAAA,MACT,gBAAgB,IAAI,4CAA4C,MAAM,KAAK,mBAAmB,KAAK,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,IACjH;AAAA,EACD;AAGA,MAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,QAAI;AACH,YAAM,QAAQ,SAAS;AAAA,IACxB,SAAS,OAAO;AACf,cAAQ;AAAA,QACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,MACrG;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,OAAO,IAAI;AAC/B;AAeA,eAAsB,mBAAkC;AACvD,QAAM,WAAW,MAAM,KAAK,mBAAmB,QAAQ,CAAC;AAExD,aAAW,CAAC,MAAM,OAAO,KAAK,UAAU;AACvC,QAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,UAAI;AACH,cAAM,QAAQ,SAAS;AAAA,MACxB,SAAS,OAAO;AACf,gBAAQ;AAAA,UACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,QACrG;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,MAAM;AAC1B;","names":[]}
@@ -1,102 +0,0 @@
1
- import { O as OcrBackendProtocol } from '../types-CKjcIYcX.cjs';
2
-
3
- /**
4
- * OCR Backend Registry
5
- *
6
- * Provides a registry for OCR backends in the WASM environment.
7
- * This enables auto-registration and management of OCR backends.
8
- *
9
- * Note: The WASM package provides a lightweight registry in the browser.
10
- * For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.
11
- *
12
- * @example
13
- * ```typescript
14
- * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
15
- * import { enableOcr } from '@kreuzberg/wasm';
16
- *
17
- * // Simple auto-registration
18
- * await enableOcr();
19
- * ```
20
- */
21
-
22
- /**
23
- * Register an OCR backend
24
- *
25
- * Registers an OCR backend with the WASM extraction pipeline.
26
- * If a backend with the same name is already registered, it will be replaced.
27
- *
28
- * @param backend - OCR backend implementing OcrBackendProtocol
29
- * @throws {Error} If backend validation fails
30
- *
31
- * @example
32
- * ```typescript
33
- * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
34
- * import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';
35
- *
36
- * const backend = new TesseractWasmBackend();
37
- * await backend.initialize();
38
- * registerOcrBackend(backend);
39
- * ```
40
- */
41
- declare function registerOcrBackend(backend: OcrBackendProtocol): void;
42
- /**
43
- * Get a registered OCR backend by name
44
- *
45
- * @param name - Backend name
46
- * @returns The OCR backend or undefined if not found
47
- *
48
- * @example
49
- * ```typescript
50
- * import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';
51
- *
52
- * const backend = getOcrBackend('tesseract-wasm');
53
- * if (backend) {
54
- * console.log('Available languages:', backend.supportedLanguages());
55
- * }
56
- * ```
57
- */
58
- declare function getOcrBackend(name: string): OcrBackendProtocol | undefined;
59
- /**
60
- * List all registered OCR backends
61
- *
62
- * @returns Array of registered backend names
63
- *
64
- * @example
65
- * ```typescript
66
- * import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';
67
- *
68
- * const backends = listOcrBackends();
69
- * console.log('Available OCR backends:', backends);
70
- * ```
71
- */
72
- declare function listOcrBackends(): string[];
73
- /**
74
- * Unregister an OCR backend
75
- *
76
- * @param name - Backend name to unregister
77
- * @throws {Error} If backend is not found
78
- *
79
- * @example
80
- * ```typescript
81
- * import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';
82
- *
83
- * unregisterOcrBackend('tesseract-wasm');
84
- * ```
85
- */
86
- declare function unregisterOcrBackend(name: string): Promise<void>;
87
- /**
88
- * Clear all registered OCR backends
89
- *
90
- * Unregisters all OCR backends and calls their shutdown methods.
91
- *
92
- * @example
93
- * ```typescript
94
- * import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';
95
- *
96
- * // Clean up all backends when shutting down
97
- * await clearOcrBackends();
98
- * ```
99
- */
100
- declare function clearOcrBackends(): Promise<void>;
101
-
102
- export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend };