@kreuzberg/wasm 4.0.0-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +982 -0
- package/dist/adapters/wasm-adapter.cjs +245 -0
- package/dist/adapters/wasm-adapter.cjs.map +1 -0
- package/dist/adapters/wasm-adapter.d.cts +121 -0
- package/dist/adapters/wasm-adapter.d.ts +121 -0
- package/dist/adapters/wasm-adapter.js +224 -0
- package/dist/adapters/wasm-adapter.js.map +1 -0
- package/dist/index.cjs +4335 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +466 -0
- package/dist/index.d.ts +466 -0
- package/dist/index.js +4308 -0
- package/dist/index.js.map +1 -0
- package/dist/ocr/registry.cjs +92 -0
- package/dist/ocr/registry.cjs.map +1 -0
- package/dist/ocr/registry.d.cts +102 -0
- package/dist/ocr/registry.d.ts +102 -0
- package/dist/ocr/registry.js +71 -0
- package/dist/ocr/registry.js.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.cjs +3566 -0
- package/dist/ocr/tesseract-wasm-backend.cjs.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.d.cts +257 -0
- package/dist/ocr/tesseract-wasm-backend.d.ts +257 -0
- package/dist/ocr/tesseract-wasm-backend.js +3551 -0
- package/dist/ocr/tesseract-wasm-backend.js.map +1 -0
- package/dist/runtime.cjs +174 -0
- package/dist/runtime.cjs.map +1 -0
- package/dist/runtime.d.cts +256 -0
- package/dist/runtime.d.ts +256 -0
- package/dist/runtime.js +153 -0
- package/dist/runtime.js.map +1 -0
- package/dist/types-CKjcIYcX.d.cts +294 -0
- package/dist/types-CKjcIYcX.d.ts +294 -0
- package/package.json +140 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// typescript/ocr/registry.ts
|
|
21
|
+
var registry_exports = {};
|
|
22
|
+
__export(registry_exports, {
|
|
23
|
+
clearOcrBackends: () => clearOcrBackends,
|
|
24
|
+
getOcrBackend: () => getOcrBackend,
|
|
25
|
+
listOcrBackends: () => listOcrBackends,
|
|
26
|
+
registerOcrBackend: () => registerOcrBackend,
|
|
27
|
+
unregisterOcrBackend: () => unregisterOcrBackend
|
|
28
|
+
});
|
|
29
|
+
module.exports = __toCommonJS(registry_exports);
|
|
30
|
+
var ocrBackendRegistry = /* @__PURE__ */ new Map();
|
|
31
|
+
function registerOcrBackend(backend) {
|
|
32
|
+
if (!backend) {
|
|
33
|
+
throw new Error("Backend cannot be null or undefined");
|
|
34
|
+
}
|
|
35
|
+
if (typeof backend.name !== "function") {
|
|
36
|
+
throw new Error("Backend must implement name() method");
|
|
37
|
+
}
|
|
38
|
+
if (typeof backend.supportedLanguages !== "function") {
|
|
39
|
+
throw new Error("Backend must implement supportedLanguages() method");
|
|
40
|
+
}
|
|
41
|
+
if (typeof backend.processImage !== "function") {
|
|
42
|
+
throw new Error("Backend must implement processImage() method");
|
|
43
|
+
}
|
|
44
|
+
const backendName = backend.name();
|
|
45
|
+
if (!backendName || typeof backendName !== "string") {
|
|
46
|
+
throw new Error("Backend name must be a non-empty string");
|
|
47
|
+
}
|
|
48
|
+
if (ocrBackendRegistry.has(backendName)) {
|
|
49
|
+
console.warn(`OCR backend "${backendName}" is already registered and will be replaced`);
|
|
50
|
+
}
|
|
51
|
+
ocrBackendRegistry.set(backendName, backend);
|
|
52
|
+
}
|
|
53
|
+
function getOcrBackend(name) {
|
|
54
|
+
return ocrBackendRegistry.get(name);
|
|
55
|
+
}
|
|
56
|
+
function listOcrBackends() {
|
|
57
|
+
return Array.from(ocrBackendRegistry.keys());
|
|
58
|
+
}
|
|
59
|
+
async function unregisterOcrBackend(name) {
|
|
60
|
+
const backend = ocrBackendRegistry.get(name);
|
|
61
|
+
if (!backend) {
|
|
62
|
+
throw new Error(
|
|
63
|
+
`OCR backend "${name}" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(", ")}`
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
if (typeof backend.shutdown === "function") {
|
|
67
|
+
try {
|
|
68
|
+
await backend.shutdown();
|
|
69
|
+
} catch (error) {
|
|
70
|
+
console.warn(
|
|
71
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
ocrBackendRegistry.delete(name);
|
|
76
|
+
}
|
|
77
|
+
async function clearOcrBackends() {
|
|
78
|
+
const backends = Array.from(ocrBackendRegistry.entries());
|
|
79
|
+
for (const [name, backend] of backends) {
|
|
80
|
+
if (typeof backend.shutdown === "function") {
|
|
81
|
+
try {
|
|
82
|
+
await backend.shutdown();
|
|
83
|
+
} catch (error) {
|
|
84
|
+
console.warn(
|
|
85
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
ocrBackendRegistry.clear();
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=registry.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../typescript/ocr/registry.ts"],"sourcesContent":["/**\n * OCR Backend Registry\n *\n * Provides a registry for OCR backends in the WASM environment.\n * This enables auto-registration and management of OCR backends.\n *\n * Note: The WASM package provides a lightweight registry in the browser.\n * For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { enableOcr } from '@kreuzberg/wasm';\n *\n * // Simple auto-registration\n * await enableOcr();\n * ```\n */\n\nimport type { OcrBackendProtocol } from \"../types.js\";\n\n/** Global registry of OCR backends */\nconst ocrBackendRegistry = new Map<string, OcrBackendProtocol>();\n\n/**\n * Register an OCR backend\n *\n * Registers an OCR backend with the WASM extraction pipeline.\n * If a backend with the same name is already registered, it will be replaced.\n *\n * @param backend - OCR backend implementing OcrBackendProtocol\n * @throws {Error} If backend validation fails\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = new TesseractWasmBackend();\n * await backend.initialize();\n * registerOcrBackend(backend);\n * ```\n */\nexport function registerOcrBackend(backend: OcrBackendProtocol): void {\n\t// Validate backend\n\tif (!backend) {\n\t\tthrow new Error(\"Backend cannot be null or undefined\");\n\t}\n\n\tif (typeof backend.name !== \"function\") {\n\t\tthrow new Error(\"Backend must implement name() method\");\n\t}\n\n\tif (typeof backend.supportedLanguages !== \"function\") {\n\t\tthrow new Error(\"Backend must implement supportedLanguages() method\");\n\t}\n\n\tif (typeof backend.processImage !== \"function\") {\n\t\tthrow new Error(\"Backend must implement processImage() method\");\n\t}\n\n\tconst backendName = backend.name();\n\n\tif (!backendName || typeof backendName !== \"string\") {\n\t\tthrow new Error(\"Backend name must be a non-empty string\");\n\t}\n\n\t// Check for duplicate registration (allow overwriting with warning)\n\tif (ocrBackendRegistry.has(backendName)) {\n\t\tconsole.warn(`OCR backend \"${backendName}\" is already registered and will be replaced`);\n\t}\n\n\t// Register the backend\n\tocrBackendRegistry.set(backendName, backend);\n}\n\n/**\n * Get a registered OCR backend by name\n *\n * @param name - Backend name\n * @returns The OCR backend or undefined if not found\n *\n * @example\n * ```typescript\n * import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = getOcrBackend('tesseract-wasm');\n * if (backend) {\n * console.log('Available languages:', backend.supportedLanguages());\n * }\n * ```\n */\nexport function getOcrBackend(name: string): OcrBackendProtocol | undefined {\n\treturn ocrBackendRegistry.get(name);\n}\n\n/**\n * List all registered OCR backends\n *\n * @returns Array of registered backend names\n *\n * @example\n * ```typescript\n * import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backends = listOcrBackends();\n * console.log('Available OCR backends:', backends);\n * ```\n */\nexport function listOcrBackends(): string[] {\n\treturn Array.from(ocrBackendRegistry.keys());\n}\n\n/**\n * Unregister an OCR backend\n *\n * @param name - Backend name to unregister\n * @throws {Error} If backend is not found\n *\n * @example\n * ```typescript\n * import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * unregisterOcrBackend('tesseract-wasm');\n * ```\n */\nexport async function unregisterOcrBackend(name: string): Promise<void> {\n\tconst backend = ocrBackendRegistry.get(name);\n\n\tif (!backend) {\n\t\tthrow new Error(\n\t\t\t`OCR backend \"${name}\" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(\", \")}`,\n\t\t);\n\t}\n\n\t// Call shutdown if available\n\tif (typeof backend.shutdown === \"function\") {\n\t\ttry {\n\t\t\tawait backend.shutdown();\n\t\t} catch (error) {\n\t\t\tconsole.warn(\n\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t);\n\t\t}\n\t}\n\n\tocrBackendRegistry.delete(name);\n}\n\n/**\n * Clear all registered OCR backends\n *\n * Unregisters all OCR backends and calls their shutdown methods.\n *\n * @example\n * ```typescript\n * import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * // Clean up all backends when shutting down\n * await clearOcrBackends();\n * ```\n */\nexport async function clearOcrBackends(): Promise<void> {\n\tconst backends = Array.from(ocrBackendRegistry.entries());\n\n\tfor (const [name, backend] of backends) {\n\t\tif (typeof backend.shutdown === \"function\") {\n\t\t\ttry {\n\t\t\t\tawait backend.shutdown();\n\t\t\t} catch (error) {\n\t\t\t\tconsole.warn(\n\t\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t\t);\n\t\t\t}\n\t\t}\n\t}\n\n\tocrBackendRegistry.clear();\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAsBA,IAAM,qBAAqB,oBAAI,IAAgC;AAqBxD,SAAS,mBAAmB,SAAmC;AAErE,MAAI,CAAC,SAAS;AACb,UAAM,IAAI,MAAM,qCAAqC;AAAA,EACtD;AAEA,MAAI,OAAO,QAAQ,SAAS,YAAY;AACvC,UAAM,IAAI,MAAM,sCAAsC;AAAA,EACvD;AAEA,MAAI,OAAO,QAAQ,uBAAuB,YAAY;AACrD,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACrE;AAEA,MAAI,OAAO,QAAQ,iBAAiB,YAAY;AAC/C,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAC/D;AAEA,QAAM,cAAc,QAAQ,KAAK;AAEjC,MAAI,CAAC,eAAe,OAAO,gBAAgB,UAAU;AACpD,UAAM,IAAI,MAAM,yCAAyC;AAAA,EAC1D;AAGA,MAAI,mBAAmB,IAAI,WAAW,GAAG;AACxC,YAAQ,KAAK,gBAAgB,WAAW,8CAA8C;AAAA,EACvF;AAGA,qBAAmB,IAAI,aAAa,OAAO;AAC5C;AAkBO,SAAS,cAAc,MAA8C;AAC3E,SAAO,mBAAmB,IAAI,IAAI;AACnC;AAeO,SAAS,kBAA4B;AAC3C,SAAO,MAAM,KAAK,mBAAmB,KAAK,CAAC;AAC5C;AAeA,eAAsB,qBAAqB,MAA6B;AACvE,QAAM,UAAU,mBAAmB,IAAI,IAAI;AAE3C,MAAI,CAAC,SAAS;AACb,UAAM,IAAI;AAAA,MACT,gBAAgB,IAAI,4CAA4C,MAAM,KAAK,mBAAmB,KAAK,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,IACjH;AAAA,EACD;AAGA,MAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,QAAI;AACH,YAAM,QAAQ,SAAS;AAAA,IACxB,SAAS,OAAO;AACf,cAAQ;AAAA,QACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,MACrG;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,OAAO,IAAI;AAC/B;AAeA,eAAsB,mBAAkC;AACvD,QAAM,WAAW,MAAM,KAAK,mBAAmB,QAAQ,CAAC;AAExD,aAAW,CAAC,MAAM,OAAO,KAAK,UAAU;AACvC,QAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,UAAI;AACH,cAAM,QAAQ,SAAS;AAAA,MACxB,SAAS,OAAO;AACf,gBAAQ;AAAA,UACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,QACrG;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,MAAM;AAC1B;","names":[]}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { O as OcrBackendProtocol } from '../types-CKjcIYcX.cjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OCR Backend Registry
|
|
5
|
+
*
|
|
6
|
+
* Provides a registry for OCR backends in the WASM environment.
|
|
7
|
+
* This enables auto-registration and management of OCR backends.
|
|
8
|
+
*
|
|
9
|
+
* Note: The WASM package provides a lightweight registry in the browser.
|
|
10
|
+
* For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
|
|
15
|
+
* import { enableOcr } from '@kreuzberg/wasm';
|
|
16
|
+
*
|
|
17
|
+
* // Simple auto-registration
|
|
18
|
+
* await enableOcr();
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Register an OCR backend
|
|
24
|
+
*
|
|
25
|
+
* Registers an OCR backend with the WASM extraction pipeline.
|
|
26
|
+
* If a backend with the same name is already registered, it will be replaced.
|
|
27
|
+
*
|
|
28
|
+
* @param backend - OCR backend implementing OcrBackendProtocol
|
|
29
|
+
* @throws {Error} If backend validation fails
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```typescript
|
|
33
|
+
* import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
|
|
34
|
+
* import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
35
|
+
*
|
|
36
|
+
* const backend = new TesseractWasmBackend();
|
|
37
|
+
* await backend.initialize();
|
|
38
|
+
* registerOcrBackend(backend);
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
declare function registerOcrBackend(backend: OcrBackendProtocol): void;
|
|
42
|
+
/**
|
|
43
|
+
* Get a registered OCR backend by name
|
|
44
|
+
*
|
|
45
|
+
* @param name - Backend name
|
|
46
|
+
* @returns The OCR backend or undefined if not found
|
|
47
|
+
*
|
|
48
|
+
* @example
|
|
49
|
+
* ```typescript
|
|
50
|
+
* import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
51
|
+
*
|
|
52
|
+
* const backend = getOcrBackend('tesseract-wasm');
|
|
53
|
+
* if (backend) {
|
|
54
|
+
* console.log('Available languages:', backend.supportedLanguages());
|
|
55
|
+
* }
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
declare function getOcrBackend(name: string): OcrBackendProtocol | undefined;
|
|
59
|
+
/**
|
|
60
|
+
* List all registered OCR backends
|
|
61
|
+
*
|
|
62
|
+
* @returns Array of registered backend names
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* ```typescript
|
|
66
|
+
* import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';
|
|
67
|
+
*
|
|
68
|
+
* const backends = listOcrBackends();
|
|
69
|
+
* console.log('Available OCR backends:', backends);
|
|
70
|
+
* ```
|
|
71
|
+
*/
|
|
72
|
+
declare function listOcrBackends(): string[];
|
|
73
|
+
/**
|
|
74
|
+
* Unregister an OCR backend
|
|
75
|
+
*
|
|
76
|
+
* @param name - Backend name to unregister
|
|
77
|
+
* @throws {Error} If backend is not found
|
|
78
|
+
*
|
|
79
|
+
* @example
|
|
80
|
+
* ```typescript
|
|
81
|
+
* import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
82
|
+
*
|
|
83
|
+
* unregisterOcrBackend('tesseract-wasm');
|
|
84
|
+
* ```
|
|
85
|
+
*/
|
|
86
|
+
declare function unregisterOcrBackend(name: string): Promise<void>;
|
|
87
|
+
/**
|
|
88
|
+
* Clear all registered OCR backends
|
|
89
|
+
*
|
|
90
|
+
* Unregisters all OCR backends and calls their shutdown methods.
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';
|
|
95
|
+
*
|
|
96
|
+
* // Clean up all backends when shutting down
|
|
97
|
+
* await clearOcrBackends();
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
declare function clearOcrBackends(): Promise<void>;
|
|
101
|
+
|
|
102
|
+
export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend };
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { O as OcrBackendProtocol } from '../types-CKjcIYcX.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OCR Backend Registry
|
|
5
|
+
*
|
|
6
|
+
* Provides a registry for OCR backends in the WASM environment.
|
|
7
|
+
* This enables auto-registration and management of OCR backends.
|
|
8
|
+
*
|
|
9
|
+
* Note: The WASM package provides a lightweight registry in the browser.
|
|
10
|
+
* For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
|
|
15
|
+
* import { enableOcr } from '@kreuzberg/wasm';
|
|
16
|
+
*
|
|
17
|
+
* // Simple auto-registration
|
|
18
|
+
* await enableOcr();
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Register an OCR backend
|
|
24
|
+
*
|
|
25
|
+
* Registers an OCR backend with the WASM extraction pipeline.
|
|
26
|
+
* If a backend with the same name is already registered, it will be replaced.
|
|
27
|
+
*
|
|
28
|
+
* @param backend - OCR backend implementing OcrBackendProtocol
|
|
29
|
+
* @throws {Error} If backend validation fails
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```typescript
|
|
33
|
+
* import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';
|
|
34
|
+
* import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
35
|
+
*
|
|
36
|
+
* const backend = new TesseractWasmBackend();
|
|
37
|
+
* await backend.initialize();
|
|
38
|
+
* registerOcrBackend(backend);
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
declare function registerOcrBackend(backend: OcrBackendProtocol): void;
|
|
42
|
+
/**
|
|
43
|
+
* Get a registered OCR backend by name
|
|
44
|
+
*
|
|
45
|
+
* @param name - Backend name
|
|
46
|
+
* @returns The OCR backend or undefined if not found
|
|
47
|
+
*
|
|
48
|
+
* @example
|
|
49
|
+
* ```typescript
|
|
50
|
+
* import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
51
|
+
*
|
|
52
|
+
* const backend = getOcrBackend('tesseract-wasm');
|
|
53
|
+
* if (backend) {
|
|
54
|
+
* console.log('Available languages:', backend.supportedLanguages());
|
|
55
|
+
* }
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
declare function getOcrBackend(name: string): OcrBackendProtocol | undefined;
|
|
59
|
+
/**
|
|
60
|
+
* List all registered OCR backends
|
|
61
|
+
*
|
|
62
|
+
* @returns Array of registered backend names
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* ```typescript
|
|
66
|
+
* import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';
|
|
67
|
+
*
|
|
68
|
+
* const backends = listOcrBackends();
|
|
69
|
+
* console.log('Available OCR backends:', backends);
|
|
70
|
+
* ```
|
|
71
|
+
*/
|
|
72
|
+
declare function listOcrBackends(): string[];
|
|
73
|
+
/**
|
|
74
|
+
* Unregister an OCR backend
|
|
75
|
+
*
|
|
76
|
+
* @param name - Backend name to unregister
|
|
77
|
+
* @throws {Error} If backend is not found
|
|
78
|
+
*
|
|
79
|
+
* @example
|
|
80
|
+
* ```typescript
|
|
81
|
+
* import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';
|
|
82
|
+
*
|
|
83
|
+
* unregisterOcrBackend('tesseract-wasm');
|
|
84
|
+
* ```
|
|
85
|
+
*/
|
|
86
|
+
declare function unregisterOcrBackend(name: string): Promise<void>;
|
|
87
|
+
/**
|
|
88
|
+
* Clear all registered OCR backends
|
|
89
|
+
*
|
|
90
|
+
* Unregisters all OCR backends and calls their shutdown methods.
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';
|
|
95
|
+
*
|
|
96
|
+
* // Clean up all backends when shutting down
|
|
97
|
+
* await clearOcrBackends();
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
declare function clearOcrBackends(): Promise<void>;
|
|
101
|
+
|
|
102
|
+
export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend };
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
// typescript/ocr/registry.ts
|
|
2
|
+
var ocrBackendRegistry = /* @__PURE__ */ new Map();
|
|
3
|
+
function registerOcrBackend(backend) {
|
|
4
|
+
if (!backend) {
|
|
5
|
+
throw new Error("Backend cannot be null or undefined");
|
|
6
|
+
}
|
|
7
|
+
if (typeof backend.name !== "function") {
|
|
8
|
+
throw new Error("Backend must implement name() method");
|
|
9
|
+
}
|
|
10
|
+
if (typeof backend.supportedLanguages !== "function") {
|
|
11
|
+
throw new Error("Backend must implement supportedLanguages() method");
|
|
12
|
+
}
|
|
13
|
+
if (typeof backend.processImage !== "function") {
|
|
14
|
+
throw new Error("Backend must implement processImage() method");
|
|
15
|
+
}
|
|
16
|
+
const backendName = backend.name();
|
|
17
|
+
if (!backendName || typeof backendName !== "string") {
|
|
18
|
+
throw new Error("Backend name must be a non-empty string");
|
|
19
|
+
}
|
|
20
|
+
if (ocrBackendRegistry.has(backendName)) {
|
|
21
|
+
console.warn(`OCR backend "${backendName}" is already registered and will be replaced`);
|
|
22
|
+
}
|
|
23
|
+
ocrBackendRegistry.set(backendName, backend);
|
|
24
|
+
}
|
|
25
|
+
function getOcrBackend(name) {
|
|
26
|
+
return ocrBackendRegistry.get(name);
|
|
27
|
+
}
|
|
28
|
+
function listOcrBackends() {
|
|
29
|
+
return Array.from(ocrBackendRegistry.keys());
|
|
30
|
+
}
|
|
31
|
+
async function unregisterOcrBackend(name) {
|
|
32
|
+
const backend = ocrBackendRegistry.get(name);
|
|
33
|
+
if (!backend) {
|
|
34
|
+
throw new Error(
|
|
35
|
+
`OCR backend "${name}" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(", ")}`
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
if (typeof backend.shutdown === "function") {
|
|
39
|
+
try {
|
|
40
|
+
await backend.shutdown();
|
|
41
|
+
} catch (error) {
|
|
42
|
+
console.warn(
|
|
43
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
ocrBackendRegistry.delete(name);
|
|
48
|
+
}
|
|
49
|
+
async function clearOcrBackends() {
|
|
50
|
+
const backends = Array.from(ocrBackendRegistry.entries());
|
|
51
|
+
for (const [name, backend] of backends) {
|
|
52
|
+
if (typeof backend.shutdown === "function") {
|
|
53
|
+
try {
|
|
54
|
+
await backend.shutdown();
|
|
55
|
+
} catch (error) {
|
|
56
|
+
console.warn(
|
|
57
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
ocrBackendRegistry.clear();
|
|
63
|
+
}
|
|
64
|
+
export {
|
|
65
|
+
clearOcrBackends,
|
|
66
|
+
getOcrBackend,
|
|
67
|
+
listOcrBackends,
|
|
68
|
+
registerOcrBackend,
|
|
69
|
+
unregisterOcrBackend
|
|
70
|
+
};
|
|
71
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../typescript/ocr/registry.ts"],"sourcesContent":["/**\n * OCR Backend Registry\n *\n * Provides a registry for OCR backends in the WASM environment.\n * This enables auto-registration and management of OCR backends.\n *\n * Note: The WASM package provides a lightweight registry in the browser.\n * For more advanced features like Rust integration, use @kreuzberg/node or @kreuzberg/deno.\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { enableOcr } from '@kreuzberg/wasm';\n *\n * // Simple auto-registration\n * await enableOcr();\n * ```\n */\n\nimport type { OcrBackendProtocol } from \"../types.js\";\n\n/** Global registry of OCR backends */\nconst ocrBackendRegistry = new Map<string, OcrBackendProtocol>();\n\n/**\n * Register an OCR backend\n *\n * Registers an OCR backend with the WASM extraction pipeline.\n * If a backend with the same name is already registered, it will be replaced.\n *\n * @param backend - OCR backend implementing OcrBackendProtocol\n * @throws {Error} If backend validation fails\n *\n * @example\n * ```typescript\n * import { TesseractWasmBackend } from '@kreuzberg/wasm/ocr/tesseract-wasm-backend';\n * import { registerOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = new TesseractWasmBackend();\n * await backend.initialize();\n * registerOcrBackend(backend);\n * ```\n */\nexport function registerOcrBackend(backend: OcrBackendProtocol): void {\n\t// Validate backend\n\tif (!backend) {\n\t\tthrow new Error(\"Backend cannot be null or undefined\");\n\t}\n\n\tif (typeof backend.name !== \"function\") {\n\t\tthrow new Error(\"Backend must implement name() method\");\n\t}\n\n\tif (typeof backend.supportedLanguages !== \"function\") {\n\t\tthrow new Error(\"Backend must implement supportedLanguages() method\");\n\t}\n\n\tif (typeof backend.processImage !== \"function\") {\n\t\tthrow new Error(\"Backend must implement processImage() method\");\n\t}\n\n\tconst backendName = backend.name();\n\n\tif (!backendName || typeof backendName !== \"string\") {\n\t\tthrow new Error(\"Backend name must be a non-empty string\");\n\t}\n\n\t// Check for duplicate registration (allow overwriting with warning)\n\tif (ocrBackendRegistry.has(backendName)) {\n\t\tconsole.warn(`OCR backend \"${backendName}\" is already registered and will be replaced`);\n\t}\n\n\t// Register the backend\n\tocrBackendRegistry.set(backendName, backend);\n}\n\n/**\n * Get a registered OCR backend by name\n *\n * @param name - Backend name\n * @returns The OCR backend or undefined if not found\n *\n * @example\n * ```typescript\n * import { getOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backend = getOcrBackend('tesseract-wasm');\n * if (backend) {\n * console.log('Available languages:', backend.supportedLanguages());\n * }\n * ```\n */\nexport function getOcrBackend(name: string): OcrBackendProtocol | undefined {\n\treturn ocrBackendRegistry.get(name);\n}\n\n/**\n * List all registered OCR backends\n *\n * @returns Array of registered backend names\n *\n * @example\n * ```typescript\n * import { listOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * const backends = listOcrBackends();\n * console.log('Available OCR backends:', backends);\n * ```\n */\nexport function listOcrBackends(): string[] {\n\treturn Array.from(ocrBackendRegistry.keys());\n}\n\n/**\n * Unregister an OCR backend\n *\n * @param name - Backend name to unregister\n * @throws {Error} If backend is not found\n *\n * @example\n * ```typescript\n * import { unregisterOcrBackend } from '@kreuzberg/wasm/ocr/registry';\n *\n * unregisterOcrBackend('tesseract-wasm');\n * ```\n */\nexport async function unregisterOcrBackend(name: string): Promise<void> {\n\tconst backend = ocrBackendRegistry.get(name);\n\n\tif (!backend) {\n\t\tthrow new Error(\n\t\t\t`OCR backend \"${name}\" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(\", \")}`,\n\t\t);\n\t}\n\n\t// Call shutdown if available\n\tif (typeof backend.shutdown === \"function\") {\n\t\ttry {\n\t\t\tawait backend.shutdown();\n\t\t} catch (error) {\n\t\t\tconsole.warn(\n\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t);\n\t\t}\n\t}\n\n\tocrBackendRegistry.delete(name);\n}\n\n/**\n * Clear all registered OCR backends\n *\n * Unregisters all OCR backends and calls their shutdown methods.\n *\n * @example\n * ```typescript\n * import { clearOcrBackends } from '@kreuzberg/wasm/ocr/registry';\n *\n * // Clean up all backends when shutting down\n * await clearOcrBackends();\n * ```\n */\nexport async function clearOcrBackends(): Promise<void> {\n\tconst backends = Array.from(ocrBackendRegistry.entries());\n\n\tfor (const [name, backend] of backends) {\n\t\tif (typeof backend.shutdown === \"function\") {\n\t\t\ttry {\n\t\t\t\tawait backend.shutdown();\n\t\t\t} catch (error) {\n\t\t\t\tconsole.warn(\n\t\t\t\t\t`Error shutting down OCR backend \"${name}\": ${error instanceof Error ? error.message : String(error)}`,\n\t\t\t\t);\n\t\t\t}\n\t\t}\n\t}\n\n\tocrBackendRegistry.clear();\n}\n"],"mappings":";AAsBA,IAAM,qBAAqB,oBAAI,IAAgC;AAqBxD,SAAS,mBAAmB,SAAmC;AAErE,MAAI,CAAC,SAAS;AACb,UAAM,IAAI,MAAM,qCAAqC;AAAA,EACtD;AAEA,MAAI,OAAO,QAAQ,SAAS,YAAY;AACvC,UAAM,IAAI,MAAM,sCAAsC;AAAA,EACvD;AAEA,MAAI,OAAO,QAAQ,uBAAuB,YAAY;AACrD,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACrE;AAEA,MAAI,OAAO,QAAQ,iBAAiB,YAAY;AAC/C,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAC/D;AAEA,QAAM,cAAc,QAAQ,KAAK;AAEjC,MAAI,CAAC,eAAe,OAAO,gBAAgB,UAAU;AACpD,UAAM,IAAI,MAAM,yCAAyC;AAAA,EAC1D;AAGA,MAAI,mBAAmB,IAAI,WAAW,GAAG;AACxC,YAAQ,KAAK,gBAAgB,WAAW,8CAA8C;AAAA,EACvF;AAGA,qBAAmB,IAAI,aAAa,OAAO;AAC5C;AAkBO,SAAS,cAAc,MAA8C;AAC3E,SAAO,mBAAmB,IAAI,IAAI;AACnC;AAeO,SAAS,kBAA4B;AAC3C,SAAO,MAAM,KAAK,mBAAmB,KAAK,CAAC;AAC5C;AAeA,eAAsB,qBAAqB,MAA6B;AACvE,QAAM,UAAU,mBAAmB,IAAI,IAAI;AAE3C,MAAI,CAAC,SAAS;AACb,UAAM,IAAI;AAAA,MACT,gBAAgB,IAAI,4CAA4C,MAAM,KAAK,mBAAmB,KAAK,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,IACjH;AAAA,EACD;AAGA,MAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,QAAI;AACH,YAAM,QAAQ,SAAS;AAAA,IACxB,SAAS,OAAO;AACf,cAAQ;AAAA,QACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,MACrG;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,OAAO,IAAI;AAC/B;AAeA,eAAsB,mBAAkC;AACvD,QAAM,WAAW,MAAM,KAAK,mBAAmB,QAAQ,CAAC;AAExD,aAAW,CAAC,MAAM,OAAO,KAAK,UAAU;AACvC,QAAI,OAAO,QAAQ,aAAa,YAAY;AAC3C,UAAI;AACH,cAAM,QAAQ,SAAS;AAAA,MACxB,SAAS,OAAO;AACf,gBAAQ;AAAA,UACP,oCAAoC,IAAI,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,QACrG;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,qBAAmB,MAAM;AAC1B;","names":[]}
|