@kreuzberg/wasm 4.0.0-rc.10 → 4.0.0-rc.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,6 +88,55 @@ yarn add @kreuzberg/wasm
88
88
  import { extractBytes } from "npm:@kreuzberg/wasm@^4.0.0";
89
89
  ```
90
90
 
91
+ ## PDF Support and PDFium Initialization
92
+
93
+ **IMPORTANT**: PDF extraction requires a one-time initialization step to load the PDFium WASM module.
94
+
95
+ ### Why PDFium Initialization is Needed
96
+
97
+ Kreuzberg uses the high-performance PDFium library (from Google Chrome) for PDF processing. In WASM environments, PDFium runs as a separate WASM module that must be loaded and bound to the main kreuzberg module before PDF extraction can work.
98
+
99
+ ### How to Initialize PDFium
100
+
101
+ ```javascript
102
+ import init, { initialize_pdfium_render, extractBytes } from '@kreuzberg/wasm';
103
+ import pdfiumModule from '@kreuzberg/wasm/pdfium.js';
104
+
105
+ // Step 1: Initialize kreuzberg WASM
106
+ await init();
107
+
108
+ // Step 2: Load PDFium WASM module
109
+ const pdfium = await pdfiumModule();
110
+
111
+ // Step 3: Bind kreuzberg to PDFium (required before any PDF operations)
112
+ const success = initialize_pdfium_render(pdfium, wasm, false);
113
+ if (!success) {
114
+ throw new Error('Failed to initialize PDFium');
115
+ }
116
+
117
+ // Step 4: Now PDF extraction works
118
+ const pdfBytes = new Uint8Array(await pdfFile.arrayBuffer());
119
+ const result = await extractBytes(pdfBytes);
120
+ console.log(result.text);
121
+ ```
122
+
123
+ ### Error: "PdfiumWASMModuleNotConfigured"
124
+
125
+ If you see this error, it means `initialize_pdfium_render()` was not called before attempting PDF extraction. Make sure to follow the initialization sequence above.
126
+
127
+ ### PDFium Files Location
128
+
129
+ The PDFium WASM files (`pdfium.js`, `pdfium.wasm`) should be included in the `@kreuzberg/wasm` package. If they're missing:
130
+
131
+ 1. Check your `node_modules/@kreuzberg/wasm/` directory
132
+ 2. Ensure both `pdfium.js` and `pdfium.wasm` are present
133
+ 3. If missing, reinstall the package
134
+
135
+ For self-hosted builds, copy the files from:
136
+ ```bash
137
+ target/wasm32-unknown-unknown/release/build/kreuzberg-*/out/pdfium/release/node/
138
+ ```
139
+
91
140
  ## Quick Start
92
141
 
93
142
  ### Browser (ESM)
package/dist/index.cjs CHANGED
@@ -4026,6 +4026,25 @@ var wasm = null;
4026
4026
  var initialized = false;
4027
4027
  var initializationError = null;
4028
4028
  var initializationPromise = null;
4029
+ async function initializePdfiumAsync(wasmModule) {
4030
+ if (!wasmModule || typeof wasmModule.initialize_pdfium_render !== "function") {
4031
+ return;
4032
+ }
4033
+ if (!isBrowser()) {
4034
+ console.debug("PDFium initialization skipped (non-browser environment)");
4035
+ return;
4036
+ }
4037
+ try {
4038
+ const pdfiumModule = await import("./pdfium.js");
4039
+ const pdfium = typeof pdfiumModule.default === "function" ? await pdfiumModule.default() : pdfiumModule;
4040
+ const success = wasmModule.initialize_pdfium_render(pdfium, wasmModule, false);
4041
+ if (!success) {
4042
+ console.warn("PDFium initialization returned false");
4043
+ }
4044
+ } catch (error) {
4045
+ console.debug("PDFium initialization error:", error);
4046
+ }
4047
+ }
4029
4048
  async function initWasm() {
4030
4049
  if (initialized) {
4031
4050
  return;
@@ -4040,7 +4059,7 @@ async function initWasm() {
4040
4059
  }
4041
4060
  let wasmModule;
4042
4061
  try {
4043
- wasmModule = await import("../pkg/kreuzberg_wasm.js");
4062
+ wasmModule = await import("./pkg/kreuzberg_wasm.js");
4044
4063
  } catch {
4045
4064
  wasmModule = await import("./kreuzberg_wasm.js");
4046
4065
  }
@@ -4048,6 +4067,11 @@ async function initWasm() {
4048
4067
  if (wasm && typeof wasm.default === "function") {
4049
4068
  await wasm.default();
4050
4069
  }
4070
+ if (isBrowser() && wasm && typeof wasm.initialize_pdfium_render === "function") {
4071
+ initializePdfiumAsync(wasm).catch((error) => {
4072
+ console.warn("PDFium auto-initialization failed (PDF extraction disabled):", error);
4073
+ });
4074
+ }
4051
4075
  initialized = true;
4052
4076
  initializationError = null;
4053
4077
  } catch (error) {