@kreuzberg/wasm 4.0.0-rc.18 → 4.0.0-rc.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +9 -3123
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +9 -3129
- package/dist/index.js.map +1 -1
- package/dist/ocr/tesseract-wasm-backend.cjs +11 -3121
- package/dist/ocr/tesseract-wasm-backend.cjs.map +1 -1
- package/dist/ocr/tesseract-wasm-backend.js +1 -3127
- package/dist/ocr/tesseract-wasm-backend.js.map +1 -1
- package/dist/pdfium.js +82 -0
- package/package.json +22 -1
package/dist/pdfium.js
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDFium WASM Initialization Helper
|
|
3
|
+
*
|
|
4
|
+
* This module provides a helper function to initialize PDFium for use with kreuzberg-wasm.
|
|
5
|
+
*
|
|
6
|
+
* IMPORTANT: PDFium must be initialized before any PDF extraction operations.
|
|
7
|
+
*
|
|
8
|
+
* @module pdfium-init
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Initialize PDFium WASM module for use with kreuzberg.
|
|
13
|
+
*
|
|
14
|
+
* This function MUST be called after loading both the kreuzberg WASM module
|
|
15
|
+
* and the PDFium WASM module, but before performing any PDF operations.
|
|
16
|
+
*
|
|
17
|
+
* @param {Object} pdfiumModule - The loaded PDFium WASM module (from pdfium.js)
|
|
18
|
+
* @param {Object} wasmModule - The kreuzberg WASM instance
|
|
19
|
+
* @param {boolean} [debug=false] - Enable debug logging
|
|
20
|
+
* @returns {boolean} true if initialization succeeded, false otherwise
|
|
21
|
+
*
|
|
22
|
+
* @example
|
|
23
|
+
* import init from './kreuzberg_wasm.js';
|
|
24
|
+
* import pdfiumModule from './pdfium.js';
|
|
25
|
+
* import { initializePdfiumWasm } from './pdfium_init.js';
|
|
26
|
+
*
|
|
27
|
+
* // Load kreuzberg WASM
|
|
28
|
+
* const wasm = await init();
|
|
29
|
+
*
|
|
30
|
+
* // Load PDFium WASM
|
|
31
|
+
* const pdfium = await pdfiumModule();
|
|
32
|
+
*
|
|
33
|
+
* // Initialize PDFium (required before PDF operations)
|
|
34
|
+
* if (!initializePdfiumWasm(pdfium, wasm, false)) {
|
|
35
|
+
* throw new Error('Failed to initialize PDFium');
|
|
36
|
+
* }
|
|
37
|
+
*
|
|
38
|
+
* // Now you can use PDF extraction
|
|
39
|
+
* const result = await wasm.extract_from_bytes(pdfBytes, config);
|
|
40
|
+
*/
|
|
41
|
+
export function initializePdfiumWasm(pdfiumModule, wasmModule, debug = false) {
|
|
42
|
+
// pdfium-render exports initialize_pdfium_render as a global function
|
|
43
|
+
// when compiled as part of our WASM module
|
|
44
|
+
if (typeof wasmModule.initialize_pdfium_render === "function") {
|
|
45
|
+
try {
|
|
46
|
+
return wasmModule.initialize_pdfium_render(pdfiumModule, wasmModule, debug);
|
|
47
|
+
} catch (error) {
|
|
48
|
+
console.error("Failed to initialize PDFium:", error);
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Fallback: Try to find it in global scope (legacy behavior)
|
|
54
|
+
if (typeof initialize_pdfium_render === "function") {
|
|
55
|
+
try {
|
|
56
|
+
return initialize_pdfium_render(pdfiumModule, wasmModule, debug);
|
|
57
|
+
} catch (error) {
|
|
58
|
+
console.error("Failed to initialize PDFium (global):", error);
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
console.error("initialize_pdfium_render function not found. This may indicate a build issue.");
|
|
64
|
+
console.error("PDFium initialization requires pdfium-render WASM bindings to be present.");
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Load PDFium WASM module from a URL.
|
|
70
|
+
*
|
|
71
|
+
* @param {string} pdfiumJsUrl - URL to pdfium.js file
|
|
72
|
+
* @returns {Promise<Object>} Loaded PDFium module
|
|
73
|
+
*/
|
|
74
|
+
export async function loadPdfiumModule(pdfiumJsUrl) {
|
|
75
|
+
// Dynamic import of PDFium module
|
|
76
|
+
const pdfiumLoader = await import(pdfiumJsUrl);
|
|
77
|
+
|
|
78
|
+
// PDFium uses Emscripten module pattern
|
|
79
|
+
const pdfiumModule = await pdfiumLoader.default();
|
|
80
|
+
|
|
81
|
+
return pdfiumModule;
|
|
82
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kreuzberg/wasm",
|
|
3
|
-
"version": "4.0.0-rc.
|
|
3
|
+
"version": "4.0.0-rc.19",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"packageManager": "pnpm@10.17.0",
|
|
6
6
|
"description": "Kreuzberg document intelligence - WebAssembly bindings",
|
|
@@ -18,6 +18,26 @@
|
|
|
18
18
|
"types": "dist/index.d.ts",
|
|
19
19
|
"exports": {
|
|
20
20
|
".": {
|
|
21
|
+
"browser": {
|
|
22
|
+
"import": {
|
|
23
|
+
"types": "./dist/index.d.ts",
|
|
24
|
+
"default": "./dist/index.js"
|
|
25
|
+
},
|
|
26
|
+
"require": {
|
|
27
|
+
"types": "./dist/index.d.cts",
|
|
28
|
+
"default": "./dist/index.cjs"
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"node": {
|
|
32
|
+
"import": {
|
|
33
|
+
"types": "./dist/index.d.ts",
|
|
34
|
+
"default": "./dist/index.js"
|
|
35
|
+
},
|
|
36
|
+
"require": {
|
|
37
|
+
"types": "./dist/index.d.cts",
|
|
38
|
+
"default": "./dist/index.cjs"
|
|
39
|
+
}
|
|
40
|
+
},
|
|
21
41
|
"import": {
|
|
22
42
|
"types": "./dist/index.d.ts",
|
|
23
43
|
"default": "./dist/index.js"
|
|
@@ -97,6 +117,7 @@
|
|
|
97
117
|
"pkg",
|
|
98
118
|
"*.wasm",
|
|
99
119
|
"*.d.ts",
|
|
120
|
+
"pdfium.js",
|
|
100
121
|
"README.md"
|
|
101
122
|
],
|
|
102
123
|
"engines": {
|