@kreuzberg/wasm 4.0.8 → 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/extraction/batch.d.ts +80 -0
- package/dist/extraction/batch.d.ts.map +1 -0
- package/dist/extraction/bytes.d.ts +69 -0
- package/dist/extraction/bytes.d.ts.map +1 -0
- package/dist/extraction/files.d.ts +77 -0
- package/dist/extraction/files.d.ts.map +1 -0
- package/dist/extraction/index.d.ts +11 -0
- package/dist/extraction/index.d.ts.map +1 -0
- package/dist/extraction/internal.d.ts +21 -0
- package/dist/extraction/internal.d.ts.map +1 -0
- package/dist/index.d.ts +9 -323
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +677 -591
- package/dist/index.js.map +1 -1
- package/dist/initialization/pdfium-loader.d.ts +30 -0
- package/dist/initialization/pdfium-loader.d.ts.map +1 -0
- package/dist/initialization/state.d.ts +100 -0
- package/dist/initialization/state.d.ts.map +1 -0
- package/dist/initialization/wasm-loader.d.ts +81 -0
- package/dist/initialization/wasm-loader.d.ts.map +1 -0
- package/dist/ocr/enabler.d.ts +86 -0
- package/dist/ocr/enabler.d.ts.map +1 -0
- package/dist/pkg/README.md +1 -1
- package/dist/pkg/kreuzberg_wasm.d.ts +76 -0
- package/dist/pkg/kreuzberg_wasm.js +142 -82
- package/dist/pkg/kreuzberg_wasm_bg.js +7 -7
- package/dist/pkg/kreuzberg_wasm_bg.wasm +0 -0
- package/dist/pkg/kreuzberg_wasm_bg.wasm.d.ts +3 -3
- package/dist/pkg/package.json +5 -1
- package/dist/runtime.d.ts +22 -2
- package/dist/runtime.d.ts.map +1 -1
- package/dist/runtime.js +21 -1
- package/dist/runtime.js.map +1 -1
- package/dist/types.d.ts +75 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +6 -6
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDFium WASM Loader
|
|
3
|
+
*
|
|
4
|
+
* Handles PDFium-specific WASM module loading and initialization.
|
|
5
|
+
* Provides asynchronous loading of the PDFium WASM module with
|
|
6
|
+
* proper error handling and browser environment detection.
|
|
7
|
+
*/
|
|
8
|
+
import type { WasmModule } from "./state.d.ts";
|
|
9
|
+
/**
|
|
10
|
+
* Initialize PDFium WASM module asynchronously
|
|
11
|
+
*
|
|
12
|
+
* Loads and binds the PDFium WASM module for PDF extraction.
|
|
13
|
+
* This function is designed for internal use and is called automatically
|
|
14
|
+
* during WASM initialization in browser environments.
|
|
15
|
+
*
|
|
16
|
+
* PDFium provides high-performance PDF parsing and extraction capabilities,
|
|
17
|
+
* enabling reliable text and metadata extraction from PDF documents.
|
|
18
|
+
*
|
|
19
|
+
* @param wasmModule - The loaded Kreuzberg WASM module
|
|
20
|
+
*
|
|
21
|
+
* @internal
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```typescript
|
|
25
|
+
* // Called automatically during initWasm() in browser environments
|
|
26
|
+
* // See wasm-loader.ts for integration
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
export declare function initializePdfiumAsync(wasmModule: WasmModule): Promise<void>;
|
|
30
|
+
//# sourceMappingURL=pdfium-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfium-loader.d.ts","sourceRoot":"","sources":["../../typescript/initialization/pdfium-loader.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE7C;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAsB,qBAAqB,CAAC,UAAU,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAsBjF"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WASM Initialization State
|
|
3
|
+
*
|
|
4
|
+
* Centralized state management for WASM module initialization.
|
|
5
|
+
* Provides access to the loaded WASM module and initialization status.
|
|
6
|
+
* This module has no dependencies to avoid circular imports.
|
|
7
|
+
*/
|
|
8
|
+
export type WasmModule = {
|
|
9
|
+
extractBytes: (data: Uint8Array, mimeType: string, config: Record<string, unknown> | null) => Promise<unknown>;
|
|
10
|
+
extractBytesSync: (data: Uint8Array, mimeType: string, config: Record<string, unknown> | null) => unknown;
|
|
11
|
+
batchExtractBytes: (dataList: Uint8Array[], mimeTypes: string[], config: Record<string, unknown> | null) => Promise<unknown>;
|
|
12
|
+
batchExtractBytesSync: (dataList: Uint8Array[], mimeTypes: string[], config: Record<string, unknown> | null) => unknown;
|
|
13
|
+
extractFile: (file: File, mimeType: string | null, config: Record<string, unknown> | null) => Promise<unknown>;
|
|
14
|
+
batchExtractFiles: (files: File[], config: Record<string, unknown> | null) => Promise<unknown>;
|
|
15
|
+
detectMimeFromBytes: (data: Uint8Array) => string;
|
|
16
|
+
normalizeMimeType: (mimeType: string) => string;
|
|
17
|
+
getMimeFromExtension: (extension: string) => string | null;
|
|
18
|
+
getExtensionsForMime: (mimeType: string) => string[];
|
|
19
|
+
loadConfigFromString: (content: string, format: string) => Record<string, unknown>;
|
|
20
|
+
discoverConfig: () => Record<string, unknown>;
|
|
21
|
+
version: () => string;
|
|
22
|
+
get_module_info: () => ModuleInfo;
|
|
23
|
+
register_ocr_backend: (backend: unknown) => void;
|
|
24
|
+
unregister_ocr_backend: (name: string) => void;
|
|
25
|
+
list_ocr_backends: () => string[];
|
|
26
|
+
clear_ocr_backends: () => void;
|
|
27
|
+
register_post_processor: (processor: unknown) => void;
|
|
28
|
+
unregister_post_processor: (name: string) => void;
|
|
29
|
+
list_post_processors: () => string[];
|
|
30
|
+
clear_post_processors: () => void;
|
|
31
|
+
register_validator: (validator: unknown) => void;
|
|
32
|
+
unregister_validator: (name: string) => void;
|
|
33
|
+
list_validators: () => string[];
|
|
34
|
+
clear_validators: () => void;
|
|
35
|
+
initialize_pdfium_render: (pdfiumWasmModule: unknown, localWasmModule: unknown, debug: boolean) => boolean;
|
|
36
|
+
read_block_from_callback_wasm: (param: number, position: number, pBuf: number, size: number) => number;
|
|
37
|
+
write_block_from_callback_wasm: (param: number, buf: number, size: number) => number;
|
|
38
|
+
default?: (moduleOrPath?: BufferSource | WebAssembly.Module | string | URL | Response | Request) => Promise<void>;
|
|
39
|
+
};
|
|
40
|
+
export type ModuleInfo = {
|
|
41
|
+
name: () => string;
|
|
42
|
+
version: () => string;
|
|
43
|
+
free: () => void;
|
|
44
|
+
};
|
|
45
|
+
/**
|
|
46
|
+
* Get the loaded WASM module
|
|
47
|
+
*
|
|
48
|
+
* @returns The WASM module instance or null if not loaded
|
|
49
|
+
* @internal
|
|
50
|
+
*/
|
|
51
|
+
export declare function getWasmModule(): WasmModule | null;
|
|
52
|
+
/**
|
|
53
|
+
* Set the WASM module instance
|
|
54
|
+
*
|
|
55
|
+
* @param module The WASM module instance
|
|
56
|
+
* @internal
|
|
57
|
+
*/
|
|
58
|
+
export declare function setWasmModule(module: WasmModule): void;
|
|
59
|
+
/**
|
|
60
|
+
* Check if WASM module is initialized
|
|
61
|
+
*
|
|
62
|
+
* @returns True if WASM module is initialized, false otherwise
|
|
63
|
+
*/
|
|
64
|
+
export declare function isInitialized(): boolean;
|
|
65
|
+
/**
|
|
66
|
+
* Set the initialized flag
|
|
67
|
+
*
|
|
68
|
+
* @param value The initialized state
|
|
69
|
+
* @internal
|
|
70
|
+
*/
|
|
71
|
+
export declare function setInitialized(value: boolean): void;
|
|
72
|
+
/**
|
|
73
|
+
* Get initialization error if module failed to load
|
|
74
|
+
*
|
|
75
|
+
* @returns The error that occurred during initialization, or null if no error
|
|
76
|
+
* @internal
|
|
77
|
+
*/
|
|
78
|
+
export declare function getInitializationError(): Error | null;
|
|
79
|
+
/**
|
|
80
|
+
* Set the initialization error
|
|
81
|
+
*
|
|
82
|
+
* @param error The error that occurred during initialization
|
|
83
|
+
* @internal
|
|
84
|
+
*/
|
|
85
|
+
export declare function setInitializationError(error: Error | null): void;
|
|
86
|
+
/**
|
|
87
|
+
* Get the current initialization promise
|
|
88
|
+
*
|
|
89
|
+
* @returns The initialization promise or null if not initializing
|
|
90
|
+
* @internal
|
|
91
|
+
*/
|
|
92
|
+
export declare function getInitializationPromise(): Promise<void> | null;
|
|
93
|
+
/**
|
|
94
|
+
* Set the initialization promise
|
|
95
|
+
*
|
|
96
|
+
* @param promise The initialization promise
|
|
97
|
+
* @internal
|
|
98
|
+
*/
|
|
99
|
+
export declare function setInitializationPromise(promise: Promise<void> | null): void;
|
|
100
|
+
//# sourceMappingURL=state.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../typescript/initialization/state.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,MAAM,MAAM,UAAU,GAAG;IACxB,YAAY,EAAE,CAAC,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAC/G,gBAAgB,EAAE,CAAC,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,KAAK,OAAO,CAAC;IAC1G,iBAAiB,EAAE,CAClB,QAAQ,EAAE,UAAU,EAAE,EACtB,SAAS,EAAE,MAAM,EAAE,EACnB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,KAClC,OAAO,CAAC,OAAO,CAAC,CAAC;IACtB,qBAAqB,EAAE,CACtB,QAAQ,EAAE,UAAU,EAAE,EACtB,SAAS,EAAE,MAAM,EAAE,EACnB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,KAClC,OAAO,CAAC;IACb,WAAW,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAC/G,iBAAiB,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/F,mBAAmB,EAAE,CAAC,IAAI,EAAE,UAAU,KAAK,MAAM,CAAC;IAClD,iBAAiB,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,MAAM,CAAC;IAChD,oBAAoB,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,MAAM,GAAG,IAAI,CAAC;IAC3D,oBAAoB,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;IAErD,oBAAoB,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnF,cAAc,EAAE,MAAM,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAE9C,OAAO,EAAE,MAAM,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,UAAU,CAAC;IAElC,oBAAoB,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;IACjD,sBAAsB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IAC/C,iBAAiB,EAAE,MAAM,MAAM,EAAE,CAAC;IAClC,kBAAkB,EAAE,MAAM,IAAI,CAAC;IAE/B,uBAAuB,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;IACtD,yBAAyB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IAClD,oBAAoB,EAAE,MAAM,MAAM,EAAE,CAAC;IACrC,qBAAqB,EAAE,MAAM,IAAI,CAAC;IAElC,kBAAkB,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;IACjD,oBAAoB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IAC7C,eAAe,EAAE,MAAM,MAAM,EAAE,CAAC;IAChC,gBAAgB,EAAE,MAAM,IAAI,CAAC;IAE7B,wBAAwB,EAAE,CAAC,gBAAgB,EAAE,OAAO,EAAE,eAAe,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,KAAK,OAAO,CAAC;IAC3G,6BAA6B,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IACvG,8BAA8B,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IAErF,OAAO,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,YAAY,GAAG,WAAW,CAAC,MAAM,GAAG,MAAM,GAAG,GAAG,GAAG,QAAQ,GAAG,OAAO,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CAClH,CAAC;AAEF,MAAM,MAAM,UAAU,GAAG;IACxB,IAAI,EAAE,MAAM,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,MAAM,CAAC;IACtB,IAAI,EAAE,MAAM,IAAI,CAAC;CACjB,CAAC;AAcF;;;;;GAKG;AACH,wBAAgB,aAAa,IAAI,UAAU,GAAG,IAAI,CAEjD;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,UAAU,GAAG,IAAI,CAEtD;AAED;;;;GAIG;AACH,wBAAgB,aAAa,IAAI,OAAO,CAEvC;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI,CAEnD;AAED;;;;;GAKG;AACH,wBAAgB,sBAAsB,IAAI,KAAK,GAAG,IAAI,CAErD;AAED;;;;;GAKG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,GAAG,IAAI,CAEhE;AAED;;;;;GAKG;AACH,wBAAgB,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAE/D;AAED;;;;;GAKG;AACH,wBAAgB,wBAAwB,CAAC,OAAO,EAAE,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,GAAG,IAAI,CAE5E"}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WASM Module Loader
|
|
3
|
+
*
|
|
4
|
+
* Handles WASM module loading, initialization, and state management.
|
|
5
|
+
* Provides a clean interface for loading the Kreuzberg WASM module
|
|
6
|
+
* with support for concurrent initialization calls.
|
|
7
|
+
*/
|
|
8
|
+
import { getInitializationError, getWasmModule, isInitialized, type ModuleInfo, type WasmModule } from "./state.d.ts";
|
|
9
|
+
export type { WasmModule, ModuleInfo };
|
|
10
|
+
/**
|
|
11
|
+
* Get the loaded WASM module
|
|
12
|
+
*
|
|
13
|
+
* @returns The WASM module instance or null if not loaded
|
|
14
|
+
* @internal
|
|
15
|
+
*/
|
|
16
|
+
export { getWasmModule };
|
|
17
|
+
/**
|
|
18
|
+
* Check if WASM module is initialized
|
|
19
|
+
*
|
|
20
|
+
* @returns True if WASM module is initialized, false otherwise
|
|
21
|
+
*/
|
|
22
|
+
export { isInitialized };
|
|
23
|
+
/**
|
|
24
|
+
* Get initialization error if module failed to load
|
|
25
|
+
*
|
|
26
|
+
* @returns The error that occurred during initialization, or null if no error
|
|
27
|
+
* @internal
|
|
28
|
+
*/
|
|
29
|
+
export { getInitializationError };
|
|
30
|
+
/**
|
|
31
|
+
* Get WASM module version
|
|
32
|
+
*
|
|
33
|
+
* @throws {Error} If WASM module is not initialized
|
|
34
|
+
* @returns The version string of the WASM module
|
|
35
|
+
*/
|
|
36
|
+
export declare function getVersion(): string;
|
|
37
|
+
/**
|
|
38
|
+
* Initialize the WASM module
|
|
39
|
+
*
|
|
40
|
+
* This function must be called once before using any extraction functions.
|
|
41
|
+
* It loads and initializes the WASM module in the current runtime environment,
|
|
42
|
+
* automatically selecting the appropriate WASM variant for the detected runtime.
|
|
43
|
+
*
|
|
44
|
+
* Multiple calls to initWasm() are safe and will return immediately if already initialized.
|
|
45
|
+
*
|
|
46
|
+
* @throws {Error} If WASM module fails to load or is not supported in the current environment
|
|
47
|
+
*
|
|
48
|
+
* @example Basic Usage
|
|
49
|
+
* ```typescript
|
|
50
|
+
* import { initWasm } from '@kreuzberg/wasm';
|
|
51
|
+
*
|
|
52
|
+
* async function main() {
|
|
53
|
+
* await initWasm();
|
|
54
|
+
* // Now you can use extraction functions
|
|
55
|
+
* }
|
|
56
|
+
*
|
|
57
|
+
* main().catch(console.error);
|
|
58
|
+
* ```
|
|
59
|
+
*
|
|
60
|
+
* @example With Error Handling
|
|
61
|
+
* ```typescript
|
|
62
|
+
* import { initWasm, getWasmCapabilities } from '@kreuzberg/wasm';
|
|
63
|
+
*
|
|
64
|
+
* async function initializeKreuzberg() {
|
|
65
|
+
* const caps = getWasmCapabilities();
|
|
66
|
+
* if (!caps.hasWasm) {
|
|
67
|
+
* throw new Error('WebAssembly is not supported in this environment');
|
|
68
|
+
* }
|
|
69
|
+
*
|
|
70
|
+
* try {
|
|
71
|
+
* await initWasm();
|
|
72
|
+
* console.log('Kreuzberg initialized successfully');
|
|
73
|
+
* } catch (error) {
|
|
74
|
+
* console.error('Failed to initialize Kreuzberg:', error);
|
|
75
|
+
* throw error;
|
|
76
|
+
* }
|
|
77
|
+
* }
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
export declare function initWasm(): Promise<void>;
|
|
81
|
+
//# sourceMappingURL=wasm-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"wasm-loader.d.ts","sourceRoot":"","sources":["../../typescript/initialization/wasm-loader.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAmCH,OAAO,EACN,sBAAsB,EAEtB,aAAa,EACb,aAAa,EACb,KAAK,UAAU,EAKf,KAAK,UAAU,EACf,MAAM,YAAY,CAAC;AAEpB,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC;AAEvC;;;;;GAKG;AACH,OAAO,EAAE,aAAa,EAAE,CAAC;AAEzB;;;;GAIG;AACH,OAAO,EAAE,aAAa,EAAE,CAAC;AAEzB;;;;;GAKG;AACH,OAAO,EAAE,sBAAsB,EAAE,CAAC;AAElC;;;;;GAKG;AACH,wBAAgB,UAAU,IAAI,MAAM,CAWnC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAwD9C"}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR enabler module
|
|
3
|
+
*
|
|
4
|
+
* Provides convenient functions for enabling and setting up OCR backends.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Enable OCR functionality with tesseract-wasm backend
|
|
8
|
+
*
|
|
9
|
+
* Convenience function that automatically initializes and registers the Tesseract WASM backend.
|
|
10
|
+
* This is the recommended approach for enabling OCR in WASM-based applications.
|
|
11
|
+
*
|
|
12
|
+
* ## Browser Requirement
|
|
13
|
+
*
|
|
14
|
+
* This function requires a browser environment with support for:
|
|
15
|
+
* - WebWorkers (for Tesseract processing)
|
|
16
|
+
* - createImageBitmap (for image conversion)
|
|
17
|
+
* - Blob API
|
|
18
|
+
*
|
|
19
|
+
* ## Network Requirement
|
|
20
|
+
*
|
|
21
|
+
* Training data will be loaded from jsDelivr CDN on first use of each language.
|
|
22
|
+
* Ensure network access to cdn.jsdelivr.net is available.
|
|
23
|
+
*
|
|
24
|
+
* @throws {Error} If not in browser environment or tesseract-wasm is not available
|
|
25
|
+
*
|
|
26
|
+
* @example Basic Usage
|
|
27
|
+
* ```typescript
|
|
28
|
+
* import { enableOcr, extractBytes, initWasm } from '@kreuzberg/wasm';
|
|
29
|
+
*
|
|
30
|
+
* async function main() {
|
|
31
|
+
* // Initialize WASM module
|
|
32
|
+
* await initWasm();
|
|
33
|
+
*
|
|
34
|
+
* // Enable OCR with tesseract-wasm
|
|
35
|
+
* await enableOcr();
|
|
36
|
+
*
|
|
37
|
+
* // Now you can use OCR in extraction
|
|
38
|
+
* const imageBytes = new Uint8Array(buffer);
|
|
39
|
+
* const result = await extractBytes(imageBytes, 'image/png', {
|
|
40
|
+
* ocr: { backend: 'tesseract-wasm', language: 'eng' }
|
|
41
|
+
* });
|
|
42
|
+
*
|
|
43
|
+
* console.log(result.content); // Extracted text
|
|
44
|
+
* }
|
|
45
|
+
*
|
|
46
|
+
* main().catch(console.error);
|
|
47
|
+
* ```
|
|
48
|
+
*
|
|
49
|
+
* @example With Progress Tracking
|
|
50
|
+
* ```typescript
|
|
51
|
+
* import { enableOcr, TesseractWasmBackend } from '@kreuzberg/wasm';
|
|
52
|
+
*
|
|
53
|
+
* async function setupOcrWithProgress() {
|
|
54
|
+
* const backend = new TesseractWasmBackend();
|
|
55
|
+
* backend.setProgressCallback((progress) => {
|
|
56
|
+
* console.log(`OCR Progress: ${progress}%`);
|
|
57
|
+
* updateProgressBar(progress);
|
|
58
|
+
* });
|
|
59
|
+
*
|
|
60
|
+
* await backend.initialize();
|
|
61
|
+
* registerOcrBackend(backend);
|
|
62
|
+
* }
|
|
63
|
+
*
|
|
64
|
+
* setupOcrWithProgress().catch(console.error);
|
|
65
|
+
* ```
|
|
66
|
+
*
|
|
67
|
+
* @example Multiple Languages
|
|
68
|
+
* ```typescript
|
|
69
|
+
* import { enableOcr, extractBytes, initWasm } from '@kreuzberg/wasm';
|
|
70
|
+
*
|
|
71
|
+
* await initWasm();
|
|
72
|
+
* await enableOcr();
|
|
73
|
+
*
|
|
74
|
+
* // Extract English text
|
|
75
|
+
* const englishResult = await extractBytes(engImageBytes, 'image/png', {
|
|
76
|
+
* ocr: { backend: 'tesseract-wasm', language: 'eng' }
|
|
77
|
+
* });
|
|
78
|
+
*
|
|
79
|
+
* // Extract German text - model is cached after first use
|
|
80
|
+
* const germanResult = await extractBytes(deImageBytes, 'image/png', {
|
|
81
|
+
* ocr: { backend: 'tesseract-wasm', language: 'deu' }
|
|
82
|
+
* });
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
export declare function enableOcr(): Promise<void>;
|
|
86
|
+
//# sourceMappingURL=enabler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enabler.d.ts","sourceRoot":"","sources":["../../typescript/ocr/enabler.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAOH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8EG;AACH,wBAAsB,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC,CAoB/C"}
|
package/dist/pkg/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.1.1" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -771,3 +771,79 @@ export function version(): string;
|
|
|
771
771
|
* from Pdfium's WASM memory heap to our WASM memory heap as they are written.
|
|
772
772
|
*/
|
|
773
773
|
export function write_block_from_callback_wasm(param: number, buf: number, size: number): number;
|
|
774
|
+
|
|
775
|
+
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
|
|
776
|
+
|
|
777
|
+
export interface InitOutput {
|
|
778
|
+
readonly memory: WebAssembly.Memory;
|
|
779
|
+
readonly __wbg_moduleinfo_free: (a: number, b: number) => void;
|
|
780
|
+
readonly batchExtractBytes: (a: number, b: number, c: number, d: number, e: number) => any;
|
|
781
|
+
readonly batchExtractBytesSync: (a: number, b: number, c: number, d: number, e: number) => [number, number, number];
|
|
782
|
+
readonly batchExtractFiles: (a: number, b: number, c: number) => any;
|
|
783
|
+
readonly batchExtractFilesSync: () => [number, number, number];
|
|
784
|
+
readonly clear_ocr_backends: () => [number, number];
|
|
785
|
+
readonly clear_post_processors: () => [number, number];
|
|
786
|
+
readonly clear_validators: () => [number, number];
|
|
787
|
+
readonly detectMimeFromBytes: (a: any) => [number, number, number, number];
|
|
788
|
+
readonly discoverConfig: () => [number, number, number];
|
|
789
|
+
readonly extractBytes: (a: any, b: number, c: number, d: number) => any;
|
|
790
|
+
readonly extractBytesSync: (a: any, b: number, c: number, d: number) => [number, number, number];
|
|
791
|
+
readonly extractFile: (a: any, b: number, c: number, d: number) => any;
|
|
792
|
+
readonly extractFileSync: () => [number, number, number];
|
|
793
|
+
readonly getExtensionsForMime: (a: number, b: number) => [number, number, number];
|
|
794
|
+
readonly getMimeFromExtension: (a: number, b: number) => [number, number];
|
|
795
|
+
readonly get_module_info: () => number;
|
|
796
|
+
readonly initThreadPool: (a: number) => any;
|
|
797
|
+
readonly init_thread_pool_safe: (a: number) => number;
|
|
798
|
+
readonly list_ocr_backends: () => [number, number, number];
|
|
799
|
+
readonly list_post_processors: () => [number, number, number];
|
|
800
|
+
readonly list_validators: () => [number, number, number];
|
|
801
|
+
readonly loadConfigFromString: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
802
|
+
readonly moduleinfo_name: (a: number) => [number, number];
|
|
803
|
+
readonly moduleinfo_version: (a: number) => [number, number];
|
|
804
|
+
readonly normalizeMimeType: (a: number, b: number) => [number, number];
|
|
805
|
+
readonly register_ocr_backend: (a: any) => [number, number];
|
|
806
|
+
readonly register_post_processor: (a: any) => [number, number];
|
|
807
|
+
readonly register_validator: (a: any) => [number, number];
|
|
808
|
+
readonly unregister_ocr_backend: (a: number, b: number) => [number, number];
|
|
809
|
+
readonly unregister_post_processor: (a: number, b: number) => [number, number];
|
|
810
|
+
readonly unregister_validator: (a: number, b: number) => [number, number];
|
|
811
|
+
readonly version: () => [number, number];
|
|
812
|
+
readonly init: () => void;
|
|
813
|
+
readonly initialize_pdfium_render: (a: any, b: any, c: number) => number;
|
|
814
|
+
readonly read_block_from_callback_wasm: (a: number, b: number, c: number, d: number) => number;
|
|
815
|
+
readonly write_block_from_callback_wasm: (a: number, b: number, c: number) => number;
|
|
816
|
+
readonly wasm_bindgen_37c4960e3490cba2___closure__destroy___dyn_core_67558b4ca73dc0a8___ops__function__FnMut__wasm_bindgen_37c4960e3490cba2___JsValue____Output_______: (a: number, b: number) => void;
|
|
817
|
+
readonly wasm_bindgen_37c4960e3490cba2___convert__closures_____invoke___wasm_bindgen_37c4960e3490cba2___JsValue__wasm_bindgen_37c4960e3490cba2___JsValue_____: (a: number, b: number, c: any, d: any) => void;
|
|
818
|
+
readonly wasm_bindgen_37c4960e3490cba2___convert__closures_____invoke___wasm_bindgen_37c4960e3490cba2___JsValue_____: (a: number, b: number, c: any) => void;
|
|
819
|
+
readonly __wbindgen_externrefs: WebAssembly.Table;
|
|
820
|
+
readonly __wbindgen_malloc: (a: number, b: number) => number;
|
|
821
|
+
readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
|
|
822
|
+
readonly __wbindgen_exn_store: (a: number) => void;
|
|
823
|
+
readonly __externref_table_alloc: () => number;
|
|
824
|
+
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
|
|
825
|
+
readonly __externref_table_dealloc: (a: number) => void;
|
|
826
|
+
readonly __wbindgen_start: () => void;
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
export type SyncInitInput = BufferSource | WebAssembly.Module;
|
|
830
|
+
|
|
831
|
+
/**
|
|
832
|
+
* Instantiates the given `module`, which can either be bytes or
|
|
833
|
+
* a precompiled `WebAssembly.Module`.
|
|
834
|
+
*
|
|
835
|
+
* @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
|
|
836
|
+
*
|
|
837
|
+
* @returns {InitOutput}
|
|
838
|
+
*/
|
|
839
|
+
export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
|
|
840
|
+
|
|
841
|
+
/**
|
|
842
|
+
* If `module_or_path` is {RequestInfo} or {URL}, makes a request and
|
|
843
|
+
* for everything else, calls `WebAssembly.instantiate` directly.
|
|
844
|
+
*
|
|
845
|
+
* @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
|
|
846
|
+
*
|
|
847
|
+
* @returns {Promise<InitOutput>}
|
|
848
|
+
*/
|
|
849
|
+
export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;
|