@kreuzberg/wasm 4.5.1 → 4.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/index.js +36 -23
- package/dist/index.js.map +1 -1
- package/dist/initialization/wasm-loader.d.ts +14 -3
- package/dist/initialization/wasm-loader.d.ts.map +1 -1
- package/dist/pkg/README.md +5 -5
- package/dist/pkg/kreuzberg_wasm.js +4 -4
- package/dist/pkg/kreuzberg_wasm_bg.js +4 -4
- package/dist/pkg/kreuzberg_wasm_bg.wasm +0 -0
- package/dist/pkg/kreuzberg_wasm_bg.wasm.d.ts +2 -2
- package/package.json +122 -120
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.4" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -61,7 +61,7 @@
|
|
|
61
61
|
</div>
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
Extract text, tables, images, and metadata from
|
|
64
|
+
Extract text, tables, images, and metadata from 91+ file formats including PDF, Office documents, and images. WebAssembly bindings for browsers, Deno, and Cloudflare Workers with portable deployment and multi-threading support.
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
## Installation
|
|
@@ -271,9 +271,9 @@ extractDocuments(fileBytes, mimes)
|
|
|
271
271
|
|
|
272
272
|
## Features
|
|
273
273
|
|
|
274
|
-
### Supported File Formats (
|
|
274
|
+
### Supported File Formats (91+)
|
|
275
275
|
|
|
276
|
-
|
|
276
|
+
91+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
|
|
277
277
|
|
|
278
278
|
#### Office Documents
|
|
279
279
|
|
|
@@ -510,7 +510,7 @@ WASM runs in single-threaded environments without access to ONNX Runtime, which
|
|
|
510
510
|
- **Language Detection** – Identify document language
|
|
511
511
|
- **Image Extraction** – Embedded images from documents
|
|
512
512
|
|
|
513
|
-
All
|
|
513
|
+
All 91+ file formats supported by Kreuzberg are available in WASM, with the exception that features requiring ONNX Runtime (layout detection) will fail gracefully with an unsupported error.
|
|
514
514
|
|
|
515
515
|
## Documentation
|
|
516
516
|
|
package/dist/index.js
CHANGED
|
@@ -550,29 +550,42 @@ function setInitializationPromise(promise) {
|
|
|
550
550
|
|
|
551
551
|
// typescript/initialization/wasm-loader.ts
|
|
552
552
|
async function loadWasmBinaryForNode() {
|
|
553
|
-
if (
|
|
554
|
-
|
|
553
|
+
if (isNode()) {
|
|
554
|
+
try {
|
|
555
|
+
const fs = await import(
|
|
556
|
+
/* @vite-ignore */
|
|
557
|
+
"fs/promises"
|
|
558
|
+
);
|
|
559
|
+
const path = await import(
|
|
560
|
+
/* @vite-ignore */
|
|
561
|
+
"path"
|
|
562
|
+
);
|
|
563
|
+
const url = await import(
|
|
564
|
+
/* @vite-ignore */
|
|
565
|
+
"url"
|
|
566
|
+
);
|
|
567
|
+
const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
|
|
568
|
+
const wasmPath = path.join(__dirname, "..", "pkg", "kreuzberg_wasm_bg.wasm");
|
|
569
|
+
const wasmBuffer = await fs.readFile(wasmPath);
|
|
570
|
+
return new Uint8Array(wasmBuffer);
|
|
571
|
+
} catch {
|
|
572
|
+
return void 0;
|
|
573
|
+
}
|
|
555
574
|
}
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
);
|
|
569
|
-
const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
|
|
570
|
-
const wasmPath = path.join(__dirname, "..", "pkg", "kreuzberg_wasm_bg.wasm");
|
|
571
|
-
const wasmBuffer = await fs.readFile(wasmPath);
|
|
572
|
-
return new Uint8Array(wasmBuffer);
|
|
573
|
-
} catch {
|
|
574
|
-
return void 0;
|
|
575
|
+
if (isDeno()) {
|
|
576
|
+
try {
|
|
577
|
+
const denoGlobal = globalThis;
|
|
578
|
+
const DenoNs = denoGlobal.Deno;
|
|
579
|
+
const readFile = DenoNs.readFile;
|
|
580
|
+
const moduleUrl = new URL(import.meta.url);
|
|
581
|
+
const wasmUrl = new URL("../pkg/kreuzberg_wasm_bg.wasm", moduleUrl);
|
|
582
|
+
const wasmBuffer = await readFile(wasmUrl);
|
|
583
|
+
return wasmBuffer;
|
|
584
|
+
} catch {
|
|
585
|
+
return void 0;
|
|
586
|
+
}
|
|
575
587
|
}
|
|
588
|
+
return void 0;
|
|
576
589
|
}
|
|
577
590
|
function getVersion() {
|
|
578
591
|
if (!isInitialized()) {
|
|
@@ -629,9 +642,9 @@ async function initWasm(options) {
|
|
|
629
642
|
const wasmBinary = await loadWasmBinaryForNode();
|
|
630
643
|
if (wasmBinary) {
|
|
631
644
|
await loadedModule.default(wasmBinary);
|
|
632
|
-
} else if (isEdgeEnvironment()) {
|
|
645
|
+
} else if (isEdgeEnvironment() || isDeno()) {
|
|
633
646
|
throw new Error(
|
|
634
|
-
"Edge environment detected (Cloudflare Workers / Vercel Edge). Cannot automatically load .wasm file because fetch() does not support file:// URLs. Pass the WASM module explicitly:\n\n import wasmModule from '@kreuzberg/wasm/kreuzberg_wasm_bg.wasm';\n await initWasm({ wasmModule });\n"
|
|
647
|
+
"Edge/restricted environment detected (Cloudflare Workers / Vercel Edge / Supabase). Cannot automatically load .wasm file because fetch() does not support file:// URLs. Pass the WASM module explicitly:\n\n import wasmModule from '@kreuzberg/wasm/kreuzberg_wasm_bg.wasm';\n await initWasm({ wasmModule });\n"
|
|
635
648
|
);
|
|
636
649
|
} else {
|
|
637
650
|
await loadedModule.default();
|