@kreuzberg/wasm 4.0.0-rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +982 -0
- package/dist/adapters/wasm-adapter.d.mts +121 -0
- package/dist/adapters/wasm-adapter.d.ts +121 -0
- package/dist/adapters/wasm-adapter.js +241 -0
- package/dist/adapters/wasm-adapter.js.map +1 -0
- package/dist/adapters/wasm-adapter.mjs +221 -0
- package/dist/adapters/wasm-adapter.mjs.map +1 -0
- package/dist/index.d.mts +466 -0
- package/dist/index.d.ts +466 -0
- package/dist/index.js +383 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +384 -0
- package/dist/index.mjs.map +1 -0
- package/dist/kreuzberg_wasm.d.mts +758 -0
- package/dist/kreuzberg_wasm.d.ts +758 -0
- package/dist/kreuzberg_wasm.js +1913 -0
- package/dist/kreuzberg_wasm.mjs +48 -0
- package/dist/kreuzberg_wasm_bg.wasm +0 -0
- package/dist/kreuzberg_wasm_bg.wasm.d.ts +54 -0
- package/dist/ocr/registry.d.mts +102 -0
- package/dist/ocr/registry.d.ts +102 -0
- package/dist/ocr/registry.js +90 -0
- package/dist/ocr/registry.js.map +1 -0
- package/dist/ocr/registry.mjs +70 -0
- package/dist/ocr/registry.mjs.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.d.mts +257 -0
- package/dist/ocr/tesseract-wasm-backend.d.ts +257 -0
- package/dist/ocr/tesseract-wasm-backend.js +454 -0
- package/dist/ocr/tesseract-wasm-backend.js.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.mjs +424 -0
- package/dist/ocr/tesseract-wasm-backend.mjs.map +1 -0
- package/dist/runtime.d.mts +256 -0
- package/dist/runtime.d.ts +256 -0
- package/dist/runtime.js +172 -0
- package/dist/runtime.js.map +1 -0
- package/dist/runtime.mjs +152 -0
- package/dist/runtime.mjs.map +1 -0
- package/dist/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +107 -0
- package/dist/types-GJVIvbPy.d.mts +221 -0
- package/dist/types-GJVIvbPy.d.ts +221 -0
- package/package.json +138 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { createRequire } from 'module';
|
|
2
|
+
import { fileURLToPath } from 'url';
|
|
3
|
+
import { dirname, join } from 'path';
|
|
4
|
+
|
|
5
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
+
const __dirname = dirname(__filename);
|
|
7
|
+
const require = createRequire(import.meta.url);
|
|
8
|
+
|
|
9
|
+
// Import the CommonJS module
|
|
10
|
+
const wasmModule = require('./kreuzberg_wasm.js');
|
|
11
|
+
|
|
12
|
+
// Re-export everything from the CommonJS module as ESM
|
|
13
|
+
export const {
|
|
14
|
+
memory,
|
|
15
|
+
extractBytes,
|
|
16
|
+
extractBytesSync,
|
|
17
|
+
batchExtractBytes,
|
|
18
|
+
batchExtractBytesSync,
|
|
19
|
+
extractFile,
|
|
20
|
+
batchExtractFiles,
|
|
21
|
+
detectMimeFromBytes,
|
|
22
|
+
normalizeMimeType,
|
|
23
|
+
getMimeFromExtension,
|
|
24
|
+
getExtensionsForMime,
|
|
25
|
+
loadConfigFromString,
|
|
26
|
+
discoverConfig,
|
|
27
|
+
version,
|
|
28
|
+
get_module_info,
|
|
29
|
+
register_ocr_backend,
|
|
30
|
+
unregister_ocr_backend,
|
|
31
|
+
list_ocr_backends,
|
|
32
|
+
clear_ocr_backends,
|
|
33
|
+
register_post_processor,
|
|
34
|
+
unregister_post_processor,
|
|
35
|
+
list_post_processors,
|
|
36
|
+
clear_post_processors,
|
|
37
|
+
register_validator,
|
|
38
|
+
unregister_validator,
|
|
39
|
+
list_validators,
|
|
40
|
+
clear_validators,
|
|
41
|
+
initialize_pdfium_render,
|
|
42
|
+
read_block_from_callback_wasm,
|
|
43
|
+
write_block_from_callback_wasm,
|
|
44
|
+
default: wasmDefault
|
|
45
|
+
} = wasmModule;
|
|
46
|
+
|
|
47
|
+
// Support default export pattern
|
|
48
|
+
export default wasmModule;
|
|
Binary file
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
export const __wbg_moduleinfo_free: (a: number, b: number) => void;
|
|
4
|
+
export const batchExtractBytes: (a: number, b: number, c: number, d: number, e: number) => any;
|
|
5
|
+
export const batchExtractBytesSync: (a: number, b: number, c: number, d: number, e: number) => [number, number, number];
|
|
6
|
+
export const batchExtractFiles: (a: number, b: number, c: number) => any;
|
|
7
|
+
export const batchExtractFilesSync: () => [number, number, number];
|
|
8
|
+
export const clear_ocr_backends: () => [number, number];
|
|
9
|
+
export const clear_post_processors: () => [number, number];
|
|
10
|
+
export const clear_validators: () => [number, number];
|
|
11
|
+
export const detectMimeFromBytes: (a: any) => [number, number, number, number];
|
|
12
|
+
export const discoverConfig: () => [number, number, number];
|
|
13
|
+
export const extractBytes: (a: any, b: number, c: number, d: number) => any;
|
|
14
|
+
export const extractBytesSync: (a: any, b: number, c: number, d: number) => [number, number, number];
|
|
15
|
+
export const extractFile: (a: any, b: number, c: number, d: number) => any;
|
|
16
|
+
export const extractFileSync: () => [number, number, number];
|
|
17
|
+
export const getExtensionsForMime: (a: number, b: number) => [number, number, number];
|
|
18
|
+
export const getMimeFromExtension: (a: number, b: number) => [number, number];
|
|
19
|
+
export const get_module_info: () => number;
|
|
20
|
+
export const init_thread_pool_safe: (a: number) => number;
|
|
21
|
+
export const list_ocr_backends: () => [number, number, number];
|
|
22
|
+
export const list_post_processors: () => [number, number, number];
|
|
23
|
+
export const list_validators: () => [number, number, number];
|
|
24
|
+
export const loadConfigFromString: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
25
|
+
export const moduleinfo_name: (a: number) => [number, number];
|
|
26
|
+
export const moduleinfo_version: (a: number) => [number, number];
|
|
27
|
+
export const normalizeMimeType: (a: number, b: number) => [number, number];
|
|
28
|
+
export const register_ocr_backend: (a: any) => [number, number];
|
|
29
|
+
export const register_post_processor: (a: any) => [number, number];
|
|
30
|
+
export const register_validator: (a: any) => [number, number];
|
|
31
|
+
export const unregister_ocr_backend: (a: number, b: number) => [number, number];
|
|
32
|
+
export const unregister_post_processor: (a: number, b: number) => [number, number];
|
|
33
|
+
export const unregister_validator: (a: number, b: number) => [number, number];
|
|
34
|
+
export const version: () => [number, number];
|
|
35
|
+
export const init: () => void;
|
|
36
|
+
export const __wbg_wbg_rayon_poolbuilder_free: (a: number, b: number) => void;
|
|
37
|
+
export const initThreadPool: (a: number) => any;
|
|
38
|
+
export const wbg_rayon_poolbuilder_build: (a: number) => void;
|
|
39
|
+
export const wbg_rayon_poolbuilder_numThreads: (a: number) => number;
|
|
40
|
+
export const wbg_rayon_poolbuilder_receiver: (a: number) => number;
|
|
41
|
+
export const wbg_rayon_start_worker: (a: number) => void;
|
|
42
|
+
export const wasm_bindgen_3a2dd18e2c0b33f8___convert__closures_____invoke___wasm_bindgen_3a2dd18e2c0b33f8___JsValue_____: (a: number, b: number, c: any) => void;
|
|
43
|
+
export const wasm_bindgen_3a2dd18e2c0b33f8___closure__destroy___dyn_core_f96ffdd67f65b3d8___ops__function__FnMut__wasm_bindgen_3a2dd18e2c0b33f8___JsValue____Output_______: (a: number, b: number) => void;
|
|
44
|
+
export const wasm_bindgen_3a2dd18e2c0b33f8___convert__closures_____invoke___wasm_bindgen_3a2dd18e2c0b33f8___JsValue__wasm_bindgen_3a2dd18e2c0b33f8___JsValue_____: (a: number, b: number, c: any, d: any) => void;
|
|
45
|
+
export const memory: WebAssembly.Memory;
|
|
46
|
+
export const __wbindgen_malloc: (a: number, b: number) => number;
|
|
47
|
+
export const __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
|
|
48
|
+
export const __wbindgen_exn_store: (a: number) => void;
|
|
49
|
+
export const __externref_table_alloc: () => number;
|
|
50
|
+
export const __wbindgen_externrefs: WebAssembly.Table;
|
|
51
|
+
export const __wbindgen_free: (a: number, b: number, c: number) => void;
|
|
52
|
+
export const __externref_table_dealloc: (a: number) => void;
|
|
53
|
+
export const __wbindgen_thread_destroy: (a?: number, b?: number, c?: number) => void;
|
|
54
|
+
export const __wbindgen_start: (a: number) => void;
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { f as OcrBackendProtocol } from '../types-GJVIvbPy.mjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OCR Backend Registry
|
|
5
|
+
*
|
|
6
|
+
* Provides a registry for OCR backends in the WASM environment.
|
|
7
|
+
* This enables auto-registration and management of OCR backends.
|
|
8
|
+
*
|
|
9
|
+
* Note: The WASM package provides a lightweight registry in the browser.
|
|
10
|
+
* For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
|
|
15
|
+
* import { enableOcr } from '@kreuzberg/wasm';
|
|
16
|
+
*
|
|
17
|
+
* // Simple auto-registration
|
|
18
|
+
* await enableOcr();
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Register an OCR backend
|
|
24
|
+
*
|
|
25
|
+
* Registers an OCR backend with the WASM extraction pipeline.
|
|
26
|
+
* If a backend with the same name is already registered, it will be replaced.
|
|
27
|
+
*
|
|
28
|
+
* @param backend - OCR backend implementing OcrBackendProtocol
|
|
29
|
+
* @throws {Error} If backend validation fails
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```typescript
|
|
33
|
+
* import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
|
|
34
|
+
* import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
35
|
+
*
|
|
36
|
+
* const backend = new TesseractWasmBackend();
|
|
37
|
+
* await backend.initialize();
|
|
38
|
+
* registerOcrBackend(backend);
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
declare function registerOcrBackend(backend: OcrBackendProtocol): void;
|
|
42
|
+
/**
|
|
43
|
+
* Get a registered OCR backend by name
|
|
44
|
+
*
|
|
45
|
+
* @param name - Backend name
|
|
46
|
+
* @returns The OCR backend or undefined if not found
|
|
47
|
+
*
|
|
48
|
+
* @example
|
|
49
|
+
* ```typescript
|
|
50
|
+
* import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
51
|
+
*
|
|
52
|
+
* const backend = getOcrBackend('tesseract-wasm');
|
|
53
|
+
* if (backend) {
|
|
54
|
+
* console.log('Available languages:', backend.supportedLanguages());
|
|
55
|
+
* }
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
declare function getOcrBackend(name: string): OcrBackendProtocol | undefined;
|
|
59
|
+
/**
|
|
60
|
+
* List all registered OCR backends
|
|
61
|
+
*
|
|
62
|
+
* @returns Array of registered backend names
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* ```typescript
|
|
66
|
+
* import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';
|
|
67
|
+
*
|
|
68
|
+
* const backends = listOcrBackends();
|
|
69
|
+
* console.log('Available OCR backends:', backends);
|
|
70
|
+
* ```
|
|
71
|
+
*/
|
|
72
|
+
declare function listOcrBackends(): string[];
|
|
73
|
+
/**
|
|
74
|
+
* Unregister an OCR backend
|
|
75
|
+
*
|
|
76
|
+
* @param name - Backend name to unregister
|
|
77
|
+
* @throws {Error} If backend is not found
|
|
78
|
+
*
|
|
79
|
+
* @example
|
|
80
|
+
* ```typescript
|
|
81
|
+
* import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
82
|
+
*
|
|
83
|
+
* unregisterOcrBackend('tesseract-wasm');
|
|
84
|
+
* ```
|
|
85
|
+
*/
|
|
86
|
+
declare function unregisterOcrBackend(name: string): Promise<void>;
|
|
87
|
+
/**
|
|
88
|
+
* Clear all registered OCR backends
|
|
89
|
+
*
|
|
90
|
+
* Unregisters all OCR backends and calls their shutdown methods.
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';
|
|
95
|
+
*
|
|
96
|
+
* // Clean up all backends when shutting down
|
|
97
|
+
* await clearOcrBackends();
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
declare function clearOcrBackends(): Promise<void>;
|
|
101
|
+
|
|
102
|
+
export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend };
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { f as OcrBackendProtocol } from '../types-GJVIvbPy.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OCR Backend Registry
|
|
5
|
+
*
|
|
6
|
+
* Provides a registry for OCR backends in the WASM environment.
|
|
7
|
+
* This enables auto-registration and management of OCR backends.
|
|
8
|
+
*
|
|
9
|
+
* Note: The WASM package provides a lightweight registry in the browser.
|
|
10
|
+
* For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
|
|
15
|
+
* import { enableOcr } from '@kreuzberg/wasm';
|
|
16
|
+
*
|
|
17
|
+
* // Simple auto-registration
|
|
18
|
+
* await enableOcr();
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Register an OCR backend
|
|
24
|
+
*
|
|
25
|
+
* Registers an OCR backend with the WASM extraction pipeline.
|
|
26
|
+
* If a backend with the same name is already registered, it will be replaced.
|
|
27
|
+
*
|
|
28
|
+
* @param backend - OCR backend implementing OcrBackendProtocol
|
|
29
|
+
* @throws {Error} If backend validation fails
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```typescript
|
|
33
|
+
* import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
|
|
34
|
+
* import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
35
|
+
*
|
|
36
|
+
* const backend = new TesseractWasmBackend();
|
|
37
|
+
* await backend.initialize();
|
|
38
|
+
* registerOcrBackend(backend);
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
declare function registerOcrBackend(backend: OcrBackendProtocol): void;
|
|
42
|
+
/**
|
|
43
|
+
* Get a registered OCR backend by name
|
|
44
|
+
*
|
|
45
|
+
* @param name - Backend name
|
|
46
|
+
* @returns The OCR backend or undefined if not found
|
|
47
|
+
*
|
|
48
|
+
* @example
|
|
49
|
+
* ```typescript
|
|
50
|
+
* import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
51
|
+
*
|
|
52
|
+
* const backend = getOcrBackend('tesseract-wasm');
|
|
53
|
+
* if (backend) {
|
|
54
|
+
* console.log('Available languages:', backend.supportedLanguages());
|
|
55
|
+
* }
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
declare function getOcrBackend(name: string): OcrBackendProtocol | undefined;
|
|
59
|
+
/**
|
|
60
|
+
* List all registered OCR backends
|
|
61
|
+
*
|
|
62
|
+
* @returns Array of registered backend names
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* ```typescript
|
|
66
|
+
* import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';
|
|
67
|
+
*
|
|
68
|
+
* const backends = listOcrBackends();
|
|
69
|
+
* console.log('Available OCR backends:', backends);
|
|
70
|
+
* ```
|
|
71
|
+
*/
|
|
72
|
+
declare function listOcrBackends(): string[];
|
|
73
|
+
/**
|
|
74
|
+
* Unregister an OCR backend
|
|
75
|
+
*
|
|
76
|
+
* @param name - Backend name to unregister
|
|
77
|
+
* @throws {Error} If backend is not found
|
|
78
|
+
*
|
|
79
|
+
* @example
|
|
80
|
+
* ```typescript
|
|
81
|
+
* import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
82
|
+
*
|
|
83
|
+
* unregisterOcrBackend('tesseract-wasm');
|
|
84
|
+
* ```
|
|
85
|
+
*/
|
|
86
|
+
declare function unregisterOcrBackend(name: string): Promise<void>;
|
|
87
|
+
/**
|
|
88
|
+
* Clear all registered OCR backends
|
|
89
|
+
*
|
|
90
|
+
* Unregisters all OCR backends and calls their shutdown methods.
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';
|
|
95
|
+
*
|
|
96
|
+
* // Clean up all backends when shutting down
|
|
97
|
+
* await clearOcrBackends();
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
declare function clearOcrBackends(): Promise<void>;
|
|
101
|
+
|
|
102
|
+
export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend };
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var registry_exports = {};
|
|
20
|
+
__export(registry_exports, {
|
|
21
|
+
clearOcrBackends: () => clearOcrBackends,
|
|
22
|
+
getOcrBackend: () => getOcrBackend,
|
|
23
|
+
listOcrBackends: () => listOcrBackends,
|
|
24
|
+
registerOcrBackend: () => registerOcrBackend,
|
|
25
|
+
unregisterOcrBackend: () => unregisterOcrBackend
|
|
26
|
+
});
|
|
27
|
+
module.exports = __toCommonJS(registry_exports);
|
|
28
|
+
const ocrBackendRegistry = /* @__PURE__ */ new Map();
|
|
29
|
+
function registerOcrBackend(backend) {
|
|
30
|
+
if (!backend) {
|
|
31
|
+
throw new Error("Backend cannot be null or undefined");
|
|
32
|
+
}
|
|
33
|
+
if (typeof backend.name !== "function") {
|
|
34
|
+
throw new Error("Backend must implement name() method");
|
|
35
|
+
}
|
|
36
|
+
if (typeof backend.supportedLanguages !== "function") {
|
|
37
|
+
throw new Error("Backend must implement supportedLanguages() method");
|
|
38
|
+
}
|
|
39
|
+
if (typeof backend.processImage !== "function") {
|
|
40
|
+
throw new Error("Backend must implement processImage() method");
|
|
41
|
+
}
|
|
42
|
+
const backendName = backend.name();
|
|
43
|
+
if (!backendName || typeof backendName !== "string") {
|
|
44
|
+
throw new Error("Backend name must be a non-empty string");
|
|
45
|
+
}
|
|
46
|
+
if (ocrBackendRegistry.has(backendName)) {
|
|
47
|
+
console.warn(`OCR backend "${backendName}" is already registered and will be replaced`);
|
|
48
|
+
}
|
|
49
|
+
ocrBackendRegistry.set(backendName, backend);
|
|
50
|
+
}
|
|
51
|
+
function getOcrBackend(name) {
|
|
52
|
+
return ocrBackendRegistry.get(name);
|
|
53
|
+
}
|
|
54
|
+
function listOcrBackends() {
|
|
55
|
+
return Array.from(ocrBackendRegistry.keys());
|
|
56
|
+
}
|
|
57
|
+
async function unregisterOcrBackend(name) {
|
|
58
|
+
const backend = ocrBackendRegistry.get(name);
|
|
59
|
+
if (!backend) {
|
|
60
|
+
throw new Error(
|
|
61
|
+
`OCR backend "${name}" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(", ")}`
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
if (typeof backend.shutdown === "function") {
|
|
65
|
+
try {
|
|
66
|
+
await backend.shutdown();
|
|
67
|
+
} catch (error) {
|
|
68
|
+
console.warn(
|
|
69
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
ocrBackendRegistry.delete(name);
|
|
74
|
+
}
|
|
75
|
+
async function clearOcrBackends() {
|
|
76
|
+
const backends = Array.from(ocrBackendRegistry.entries());
|
|
77
|
+
for (const [name, backend] of backends) {
|
|
78
|
+
if (typeof backend.shutdown === "function") {
|
|
79
|
+
try {
|
|
80
|
+
await backend.shutdown();
|
|
81
|
+
} catch (error) {
|
|
82
|
+
console.warn(
|
|
83
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
ocrBackendRegistry.clear();
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../typescript/ocr/registry.ts"],"sourcesContent":["/**\n * OCR Backend Registry\n *\n * Provides a registry for OCR backends in the WASM environment.\n * This enables auto-registration and management of OCR backends.\n *\n * Note: The WASM package provides a lightweight registry in the browser.\n * For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { enableOcr } from '@kreuzberg/wasm';\n *\n * // Simple auto-registration\n * await enableOcr();\n * ```\n */\n\nimport type { OcrBackendProtocol } from \"../types.js\";\n\n/** Global registry of OCR backends */\nconst ocrBackendRegistry = new Map<string, OcrBackendProtocol>();\n\n/**\n * Register an OCR backend\n *\n * Registers an OCR backend with the WASM extraction pipeline.\n * If a backend with the same name is already registered, it will be replaced.\n *\n * @param backend - OCR backend implementing OcrBackendProtocol\n * @throws {Error} If backend validation fails\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = new TesseractWasmBackend();\n * await backend.initialize();\n * registerOcrBackend(backend);\n * ```\n */\nexport function registerOcrBackend(backend: OcrBackendProtocol): void {\n\t// Validate backend\n\tif (!backend) {\n\t\tthrow new Error(\"Backend cannot be null or undefined\");\n\t}\n\n\tif (typeof backend.name !== \"function\") {\n\t\tthrow new Error(\"Backend must implement name() method\");\n\t}\n\n\tif (typeof backend.supportedLanguages !== \"function\") {\n\t\tthrow new Error(\"Backend must implement supportedLanguages() method\");\n\t}\n\n\tif (typeof backend.processImage !== \"function\") {\n\t\tthrow new Error(\"Backend must implement processImage() method\");\n\t}\n\n\tconst backendName = backend.name();\n\n\tif (!backendName || typeof backendName !== \"string\") {\n\t\tthrow new Error(\"Backend name must be a non-empty string\");\n\t}\n\n\t// Check for duplicate registration (allow overwriting with warning)\n\tif (ocrBackendRegistry.has(backendName)) {\n\t\tconsole.warn(`OCR backend \"${backendName}\" is already registered and will be replaced`);\n\t}\n\n\t// Register the backend\n\tocrBackendRegistry.set(backendName, backend);\n}\n\n/**\n * Get a registered OCR backend by name\n *\n * @param name - Backend name\n * @returns The OCR backend or undefined if not found\n *\n * @example\n * ```typescript\n * import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = getOcrBackend('tesseract-wasm');\n * if (backend) {\n * console.log('Available languages:', backend.supportedLanguages());\n * }\n * ```\n */\nexport function getOcrBackend(name: string): OcrBackendProtocol | undefined {\n\treturn ocrBackendRegistry.get(name);\n}\n\n/**\n * List all registered OCR backends\n *\n * @returns Array of registered backend names\n *\n * @example\n * ```typescript\n * import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backends = listOcrBackends();\n * console.log('Available OCR backends:', backends);\n * ```\n */\nexport function listOcrBackends(): string[] {\n\treturn Array.from(ocrBackendRegistry.keys());\n}\n\n/**\n * Unregister an OCR backend\n *\n * @param name - Backend name to unregister\n * @throws {Error} If backend is not found\n *\n * @example\n * ```typescript\n * import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * unregisterOcrBackend('tesseract-wasm');\n * ```\n */\nexport async function unregisterOcrBackend(name: string): Promise<void> {\n\tconst backend = ocrBackendRegistry.get(name);\n\n\tif (!backend) {\n\t\tthrow new Error(\n\t\t\t`OCR backend \"${name}\" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(\", \")}`,\n\t\t);\n\t}\n\n\t// Call shutdown if available\n\tif (typeof backend.shutdown === \"function\") {\n\t\ttry {\n\t\t\tawait backend.shutdown();\n\t\t} catch (error) {\n\t\t\tconsole.warn(\n\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t);\n\t\t}\n\t}\n\n\tocrBackendRegistry.delete(name);\n}\n\n/**\n * Clear all registered OCR backends\n *\n * Unregisters all OCR backends and calls their shutdown methods.\n *\n * @example\n * ```typescript\n * import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * // Clean up all backends when shutting down\n * await clearOcrBackends();\n * ```\n */\nexport async function clearOcrBackends(): Promise<void> {\n\tconst backends = Array.from(ocrBackendRegistry.entries());\n\n\tfor (const [name, backend] of backends) {\n\t\tif (typeof backend.shutdown === \"function\") {\n\t\t\ttry {\n\t\t\t\tawait backend.shutdown();\n\t\t\t} catch (error) {\n\t\t\t\tconsole.warn(\n\t\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t\t);\n\t\t\t}\n\t\t}\n\t}\n\n\tocrBackendRegistry.clear();\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAsBA,MAAM,qBAAqB,oBAAI,IAAgC;AAqBxD,SAAS,mBAAmB,SAAmC;AAErE,MAAI,CAAC,SAAS;AACb,UAAM,IAAI,MAAM,qCAAqC;AAAA,EACtD;AAEA,MAAI,OAAO,QAAQ,SAAS,YAAY;AACvC,UAAM,IAAI,MAAM,sCAAsC;AAAA,EACvD;AAEA,MAAI,OAAO,QAAQ,uBAAuB,YAAY;AACrD,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACrE;AAEA,MAAI,OAAO,QAAQ,iBAAiB,YAAY;AAC/C,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAC/D;AAEA,QAAM,cAAc,QAAQ,KAAK;AAEjC,MAAI,CAAC,eAAe,OAAO,gBAAgB,UAAU;AACpD,UAAM,IAAI,MAAM,yCAAyC;AAAA,EAC1D;AAGA,MAAI,mBAAmB,IAAI,WAAW,GAAG;AACxC,YAAQ,KAAK,gBAAgB,WAAW,8CAA8C;AAAA,EACvF;AAGA,qBAAmB,IAAI,aAAa,OAAO;AAC5C;AAkBO,SAAS,cAAc,MAA8C;AAC3E,SAAO,mBAAmB,IAAI,IAAI;AACnC;AAeO,SAAS,kBAA4B;AAC3C,SAAO,MAAM,KAAK,mBAAmB,KAAK,CAAC;AAC5C;AAeA,eAAsB,qBAAqB,MAA6B;AACvE,QAAM,UAAU,mBAAmB,IAAI,IAAI;AAE3C,MAAI,CAAC,SAAS;AACb,UAAM,IAAI;AAAA,MACT,gBAAgB,IAAI,4CAA4C,MAAM,KAAK,mBAAmB,KAAK,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,IACjH;AAAA,EACD;AAGA,MAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,QAAI;AACH,YAAM,QAAQ,SAAS;AAAA,IACxB,SAAS,OAAO;AACf,cAAQ;AAAA,QACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,MACrG;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,OAAO,IAAI;AAC/B;AAeA,eAAsB,mBAAkC;AACvD,QAAM,WAAW,MAAM,KAAK,mBAAmB,QAAQ,CAAC;AAExD,aAAW,CAAC,MAAM,OAAO,KAAK,UAAU;AACvC,QAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,UAAI;AACH,cAAM,QAAQ,SAAS;AAAA,MACxB,SAAS,OAAO;AACf,gBAAQ;AAAA,UACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,QACrG;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,MAAM;AAC1B;","names":[]}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
const ocrBackendRegistry = /* @__PURE__ */ new Map();
|
|
2
|
+
function registerOcrBackend(backend) {
|
|
3
|
+
if (!backend) {
|
|
4
|
+
throw new Error("Backend cannot be null or undefined");
|
|
5
|
+
}
|
|
6
|
+
if (typeof backend.name !== "function") {
|
|
7
|
+
throw new Error("Backend must implement name() method");
|
|
8
|
+
}
|
|
9
|
+
if (typeof backend.supportedLanguages !== "function") {
|
|
10
|
+
throw new Error("Backend must implement supportedLanguages() method");
|
|
11
|
+
}
|
|
12
|
+
if (typeof backend.processImage !== "function") {
|
|
13
|
+
throw new Error("Backend must implement processImage() method");
|
|
14
|
+
}
|
|
15
|
+
const backendName = backend.name();
|
|
16
|
+
if (!backendName || typeof backendName !== "string") {
|
|
17
|
+
throw new Error("Backend name must be a non-empty string");
|
|
18
|
+
}
|
|
19
|
+
if (ocrBackendRegistry.has(backendName)) {
|
|
20
|
+
console.warn(`OCR backend "${backendName}" is already registered and will be replaced`);
|
|
21
|
+
}
|
|
22
|
+
ocrBackendRegistry.set(backendName, backend);
|
|
23
|
+
}
|
|
24
|
+
function getOcrBackend(name) {
|
|
25
|
+
return ocrBackendRegistry.get(name);
|
|
26
|
+
}
|
|
27
|
+
function listOcrBackends() {
|
|
28
|
+
return Array.from(ocrBackendRegistry.keys());
|
|
29
|
+
}
|
|
30
|
+
async function unregisterOcrBackend(name) {
|
|
31
|
+
const backend = ocrBackendRegistry.get(name);
|
|
32
|
+
if (!backend) {
|
|
33
|
+
throw new Error(
|
|
34
|
+
`OCR backend "${name}" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(", ")}`
|
|
35
|
+
);
|
|
36
|
+
}
|
|
37
|
+
if (typeof backend.shutdown === "function") {
|
|
38
|
+
try {
|
|
39
|
+
await backend.shutdown();
|
|
40
|
+
} catch (error) {
|
|
41
|
+
console.warn(
|
|
42
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
ocrBackendRegistry.delete(name);
|
|
47
|
+
}
|
|
48
|
+
async function clearOcrBackends() {
|
|
49
|
+
const backends = Array.from(ocrBackendRegistry.entries());
|
|
50
|
+
for (const [name, backend] of backends) {
|
|
51
|
+
if (typeof backend.shutdown === "function") {
|
|
52
|
+
try {
|
|
53
|
+
await backend.shutdown();
|
|
54
|
+
} catch (error) {
|
|
55
|
+
console.warn(
|
|
56
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
ocrBackendRegistry.clear();
|
|
62
|
+
}
|
|
63
|
+
export {
|
|
64
|
+
clearOcrBackends,
|
|
65
|
+
getOcrBackend,
|
|
66
|
+
listOcrBackends,
|
|
67
|
+
registerOcrBackend,
|
|
68
|
+
unregisterOcrBackend
|
|
69
|
+
};
|
|
70
|
+
//# sourceMappingURL=registry.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../typescript/ocr/registry.ts"],"sourcesContent":["/**\n * OCR Backend Registry\n *\n * Provides a registry for OCR backends in the WASM environment.\n * This enables auto-registration and management of OCR backends.\n *\n * Note: The WASM package provides a lightweight registry in the browser.\n * For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { enableOcr } from '@kreuzberg/wasm';\n *\n * // Simple auto-registration\n * await enableOcr();\n * ```\n */\n\nimport type { OcrBackendProtocol } from \"../types.js\";\n\n/** Global registry of OCR backends */\nconst ocrBackendRegistry = new Map<string, OcrBackendProtocol>();\n\n/**\n * Register an OCR backend\n *\n * Registers an OCR backend with the WASM extraction pipeline.\n * If a backend with the same name is already registered, it will be replaced.\n *\n * @param backend - OCR backend implementing OcrBackendProtocol\n * @throws {Error} If backend validation fails\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = new TesseractWasmBackend();\n * await backend.initialize();\n * registerOcrBackend(backend);\n * ```\n */\nexport function registerOcrBackend(backend: OcrBackendProtocol): void {\n\t// Validate backend\n\tif (!backend) {\n\t\tthrow new Error(\"Backend cannot be null or undefined\");\n\t}\n\n\tif (typeof backend.name !== \"function\") {\n\t\tthrow new Error(\"Backend must implement name() method\");\n\t}\n\n\tif (typeof backend.supportedLanguages !== \"function\") {\n\t\tthrow new Error(\"Backend must implement supportedLanguages() method\");\n\t}\n\n\tif (typeof backend.processImage !== \"function\") {\n\t\tthrow new Error(\"Backend must implement processImage() method\");\n\t}\n\n\tconst backendName = backend.name();\n\n\tif (!backendName || typeof backendName !== \"string\") {\n\t\tthrow new Error(\"Backend name must be a non-empty string\");\n\t}\n\n\t// Check for duplicate registration (allow overwriting with warning)\n\tif (ocrBackendRegistry.has(backendName)) {\n\t\tconsole.warn(`OCR backend \"${backendName}\" is already registered and will be replaced`);\n\t}\n\n\t// Register the backend\n\tocrBackendRegistry.set(backendName, backend);\n}\n\n/**\n * Get a registered OCR backend by name\n *\n * @param name - Backend name\n * @returns The OCR backend or undefined if not found\n *\n * @example\n * ```typescript\n * import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = getOcrBackend('tesseract-wasm');\n * if (backend) {\n * console.log('Available languages:', backend.supportedLanguages());\n * }\n * ```\n */\nexport function getOcrBackend(name: string): OcrBackendProtocol | undefined {\n\treturn ocrBackendRegistry.get(name);\n}\n\n/**\n * List all registered OCR backends\n *\n * @returns Array of registered backend names\n *\n * @example\n * ```typescript\n * import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backends = listOcrBackends();\n * console.log('Available OCR backends:', backends);\n * ```\n */\nexport function listOcrBackends(): string[] {\n\treturn Array.from(ocrBackendRegistry.keys());\n}\n\n/**\n * Unregister an OCR backend\n *\n * @param name - Backend name to unregister\n * @throws {Error} If backend is not found\n *\n * @example\n * ```typescript\n * import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * unregisterOcrBackend('tesseract-wasm');\n * ```\n */\nexport async function unregisterOcrBackend(name: string): Promise<void> {\n\tconst backend = ocrBackendRegistry.get(name);\n\n\tif (!backend) {\n\t\tthrow new Error(\n\t\t\t`OCR backend \"${name}\" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(\", \")}`,\n\t\t);\n\t}\n\n\t// Call shutdown if available\n\tif (typeof backend.shutdown === \"function\") {\n\t\ttry {\n\t\t\tawait backend.shutdown();\n\t\t} catch (error) {\n\t\t\tconsole.warn(\n\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t);\n\t\t}\n\t}\n\n\tocrBackendRegistry.delete(name);\n}\n\n/**\n * Clear all registered OCR backends\n *\n * Unregisters all OCR backends and calls their shutdown methods.\n *\n * @example\n * ```typescript\n * import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * // Clean up all backends when shutting down\n * await clearOcrBackends();\n * ```\n */\nexport async function clearOcrBackends(): Promise<void> {\n\tconst backends = Array.from(ocrBackendRegistry.entries());\n\n\tfor (const [name, backend] of backends) {\n\t\tif (typeof backend.shutdown === \"function\") {\n\t\t\ttry {\n\t\t\t\tawait backend.shutdown();\n\t\t\t} catch (error) {\n\t\t\t\tconsole.warn(\n\t\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t\t);\n\t\t\t}\n\t\t}\n\t}\n\n\tocrBackendRegistry.clear();\n}\n"],"mappings":"AAsBA,MAAM,qBAAqB,oBAAI,IAAgC;AAqBxD,SAAS,mBAAmB,SAAmC;AAErE,MAAI,CAAC,SAAS;AACb,UAAM,IAAI,MAAM,qCAAqC;AAAA,EACtD;AAEA,MAAI,OAAO,QAAQ,SAAS,YAAY;AACvC,UAAM,IAAI,MAAM,sCAAsC;AAAA,EACvD;AAEA,MAAI,OAAO,QAAQ,uBAAuB,YAAY;AACrD,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACrE;AAEA,MAAI,OAAO,QAAQ,iBAAiB,YAAY;AAC/C,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAC/D;AAEA,QAAM,cAAc,QAAQ,KAAK;AAEjC,MAAI,CAAC,eAAe,OAAO,gBAAgB,UAAU;AACpD,UAAM,IAAI,MAAM,yCAAyC;AAAA,EAC1D;AAGA,MAAI,mBAAmB,IAAI,WAAW,GAAG;AACxC,YAAQ,KAAK,gBAAgB,WAAW,8CAA8C;AAAA,EACvF;AAGA,qBAAmB,IAAI,aAAa,OAAO;AAC5C;AAkBO,SAAS,cAAc,MAA8C;AAC3E,SAAO,mBAAmB,IAAI,IAAI;AACnC;AAeO,SAAS,kBAA4B;AAC3C,SAAO,MAAM,KAAK,mBAAmB,KAAK,CAAC;AAC5C;AAeA,eAAsB,qBAAqB,MAA6B;AACvE,QAAM,UAAU,mBAAmB,IAAI,IAAI;AAE3C,MAAI,CAAC,SAAS;AACb,UAAM,IAAI;AAAA,MACT,gBAAgB,IAAI,4CAA4C,MAAM,KAAK,mBAAmB,KAAK,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,IACjH;AAAA,EACD;AAGA,MAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,QAAI;AACH,YAAM,QAAQ,SAAS;AAAA,IACxB,SAAS,OAAO;AACf,cAAQ;AAAA,QACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,MACrG;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,OAAO,IAAI;AAC/B;AAeA,eAAsB,mBAAkC;AACvD,QAAM,WAAW,MAAM,KAAK,mBAAmB,QAAQ,CAAC;AAExD,aAAW,CAAC,MAAM,OAAO,KAAK,UAAU;AACvC,QAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,UAAI;AACH,cAAM,QAAQ,SAAS;AAAA,MACxB,SAAS,OAAO;AACf,gBAAQ;AAAA,UACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,QACrG;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,MAAM;AAC1B;","names":[]}
|