@kreuzberg/wasm 4.0.0-rc.10 → 4.0.0-rc.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -0
- package/dist/index.cjs +25 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +0 -43
- package/dist/index.d.ts +0 -43
- package/dist/index.js +25 -1
- package/dist/index.js.map +1 -1
- package/package.json +139 -138
package/README.md
CHANGED
|
@@ -88,6 +88,55 @@ yarn add @kreuzberg/wasm
|
|
|
88
88
|
import { extractBytes } from "npm:@kreuzberg/wasm@^4.0.0";
|
|
89
89
|
```
|
|
90
90
|
|
|
91
|
+
## PDF Support and PDFium Initialization
|
|
92
|
+
|
|
93
|
+
**IMPORTANT**: PDF extraction requires a one-time initialization step to load the PDFium WASM module.
|
|
94
|
+
|
|
95
|
+
### Why PDFium Initialization is Needed
|
|
96
|
+
|
|
97
|
+
Kreuzberg uses the high-performance PDFium library (from Google Chrome) for PDF processing. In WASM environments, PDFium runs as a separate WASM module that must be loaded and bound to the main kreuzberg module before PDF extraction can work.
|
|
98
|
+
|
|
99
|
+
### How to Initialize PDFium
|
|
100
|
+
|
|
101
|
+
```javascript
|
|
102
|
+
import init, { initialize_pdfium_render, extractBytes } from '@kreuzberg/wasm';
|
|
103
|
+
import pdfiumModule from '@kreuzberg/wasm/pdfium.js';
|
|
104
|
+
|
|
105
|
+
// Step 1: Initialize kreuzberg WASM
|
|
106
|
+
await init();
|
|
107
|
+
|
|
108
|
+
// Step 2: Load PDFium WASM module
|
|
109
|
+
const pdfium = await pdfiumModule();
|
|
110
|
+
|
|
111
|
+
// Step 3: Bind kreuzberg to PDFium (required before any PDF operations)
|
|
112
|
+
const success = initialize_pdfium_render(pdfium, wasm, false);
|
|
113
|
+
if (!success) {
|
|
114
|
+
throw new Error('Failed to initialize PDFium');
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Step 4: Now PDF extraction works
|
|
118
|
+
const pdfBytes = new Uint8Array(await pdfFile.arrayBuffer());
|
|
119
|
+
const result = await extractBytes(pdfBytes);
|
|
120
|
+
console.log(result.text);
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Error: "PdfiumWASMModuleNotConfigured"
|
|
124
|
+
|
|
125
|
+
If you see this error, it means `initialize_pdfium_render()` was not called before attempting PDF extraction. Make sure to follow the initialization sequence above.
|
|
126
|
+
|
|
127
|
+
### PDFium Files Location
|
|
128
|
+
|
|
129
|
+
The PDFium WASM files (`pdfium.js`, `pdfium.wasm`) should be included in the `@kreuzberg/wasm` package. If they're missing:
|
|
130
|
+
|
|
131
|
+
1. Check your `node_modules/@kreuzberg/wasm/` directory
|
|
132
|
+
2. Ensure both `pdfium.js` and `pdfium.wasm` are present
|
|
133
|
+
3. If missing, reinstall the package
|
|
134
|
+
|
|
135
|
+
For self-hosted builds, copy the files from:
|
|
136
|
+
```bash
|
|
137
|
+
target/wasm32-unknown-unknown/release/build/kreuzberg-*/out/pdfium/release/node/
|
|
138
|
+
```
|
|
139
|
+
|
|
91
140
|
## Quick Start
|
|
92
141
|
|
|
93
142
|
### Browser (ESM)
|
package/dist/index.cjs
CHANGED
|
@@ -4026,6 +4026,25 @@ var wasm = null;
|
|
|
4026
4026
|
var initialized = false;
|
|
4027
4027
|
var initializationError = null;
|
|
4028
4028
|
var initializationPromise = null;
|
|
4029
|
+
async function initializePdfiumAsync(wasmModule) {
|
|
4030
|
+
if (!wasmModule || typeof wasmModule.initialize_pdfium_render !== "function") {
|
|
4031
|
+
return;
|
|
4032
|
+
}
|
|
4033
|
+
if (!isBrowser()) {
|
|
4034
|
+
console.debug("PDFium initialization skipped (non-browser environment)");
|
|
4035
|
+
return;
|
|
4036
|
+
}
|
|
4037
|
+
try {
|
|
4038
|
+
const pdfiumModule = await import("./pdfium.js");
|
|
4039
|
+
const pdfium = typeof pdfiumModule.default === "function" ? await pdfiumModule.default() : pdfiumModule;
|
|
4040
|
+
const success = wasmModule.initialize_pdfium_render(pdfium, wasmModule, false);
|
|
4041
|
+
if (!success) {
|
|
4042
|
+
console.warn("PDFium initialization returned false");
|
|
4043
|
+
}
|
|
4044
|
+
} catch (error) {
|
|
4045
|
+
console.debug("PDFium initialization error:", error);
|
|
4046
|
+
}
|
|
4047
|
+
}
|
|
4029
4048
|
async function initWasm() {
|
|
4030
4049
|
if (initialized) {
|
|
4031
4050
|
return;
|
|
@@ -4040,7 +4059,7 @@ async function initWasm() {
|
|
|
4040
4059
|
}
|
|
4041
4060
|
let wasmModule;
|
|
4042
4061
|
try {
|
|
4043
|
-
wasmModule = await import("
|
|
4062
|
+
wasmModule = await import("./pkg/kreuzberg_wasm.js");
|
|
4044
4063
|
} catch {
|
|
4045
4064
|
wasmModule = await import("./kreuzberg_wasm.js");
|
|
4046
4065
|
}
|
|
@@ -4048,6 +4067,11 @@ async function initWasm() {
|
|
|
4048
4067
|
if (wasm && typeof wasm.default === "function") {
|
|
4049
4068
|
await wasm.default();
|
|
4050
4069
|
}
|
|
4070
|
+
if (isBrowser() && wasm && typeof wasm.initialize_pdfium_render === "function") {
|
|
4071
|
+
initializePdfiumAsync(wasm).catch((error) => {
|
|
4072
|
+
console.warn("PDFium auto-initialization failed (PDF extraction disabled):", error);
|
|
4073
|
+
});
|
|
4074
|
+
}
|
|
4051
4075
|
initialized = true;
|
|
4052
4076
|
initializationError = null;
|
|
4053
4077
|
} catch (error) {
|