@kreuzberg/wasm 4.0.0-rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +982 -0
- package/dist/adapters/wasm-adapter.d.mts +121 -0
- package/dist/adapters/wasm-adapter.d.ts +121 -0
- package/dist/adapters/wasm-adapter.js +241 -0
- package/dist/adapters/wasm-adapter.js.map +1 -0
- package/dist/adapters/wasm-adapter.mjs +221 -0
- package/dist/adapters/wasm-adapter.mjs.map +1 -0
- package/dist/index.d.mts +466 -0
- package/dist/index.d.ts +466 -0
- package/dist/index.js +383 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +384 -0
- package/dist/index.mjs.map +1 -0
- package/dist/kreuzberg_wasm.d.mts +758 -0
- package/dist/kreuzberg_wasm.d.ts +758 -0
- package/dist/kreuzberg_wasm.js +1913 -0
- package/dist/kreuzberg_wasm.mjs +48 -0
- package/dist/kreuzberg_wasm_bg.wasm +0 -0
- package/dist/kreuzberg_wasm_bg.wasm.d.ts +54 -0
- package/dist/ocr/registry.d.mts +102 -0
- package/dist/ocr/registry.d.ts +102 -0
- package/dist/ocr/registry.js +90 -0
- package/dist/ocr/registry.js.map +1 -0
- package/dist/ocr/registry.mjs +70 -0
- package/dist/ocr/registry.mjs.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.d.mts +257 -0
- package/dist/ocr/tesseract-wasm-backend.d.ts +257 -0
- package/dist/ocr/tesseract-wasm-backend.js +454 -0
- package/dist/ocr/tesseract-wasm-backend.js.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.mjs +424 -0
- package/dist/ocr/tesseract-wasm-backend.mjs.map +1 -0
- package/dist/runtime.d.mts +256 -0
- package/dist/runtime.d.ts +256 -0
- package/dist/runtime.js +172 -0
- package/dist/runtime.js.map +1 -0
- package/dist/runtime.mjs +152 -0
- package/dist/runtime.mjs.map +1 -0
- package/dist/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +107 -0
- package/dist/types-GJVIvbPy.d.mts +221 -0
- package/dist/types-GJVIvbPy.d.ts +221 -0
- package/package.json +138 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../typescript/runtime.ts"],"sourcesContent":["/**\n * Runtime detection and environment-specific utilities\n *\n * This module provides utilities for detecting the JavaScript runtime environment,\n * checking for feature availability, and enabling environment-specific WASM loading strategies.\n *\n * @example Basic Runtime Detection\n * ```typescript\n * import { detectRuntime, isBrowser, isNode } from '@kreuzberg/wasm/runtime';\n *\n * if (isBrowser()) {\n * console.log('Running in browser');\n * } else if (isNode()) {\n * console.log('Running in Node.js');\n * }\n * ```\n *\n * @example Feature Detection\n * ```typescript\n * import { hasFileApi, hasWorkers } from '@kreuzberg/wasm/runtime';\n *\n * if (hasFileApi()) {\n * // Can use File API for browser file uploads\n * }\n *\n * if (hasWorkers()) {\n * // Can use Web Workers for parallel processing\n * }\n * ```\n */\n\nexport type RuntimeType = \"browser\" | \"node\" | \"deno\" | \"bun\" | \"unknown\";\n\n/**\n * WebAssembly capabilities available in the runtime\n */\nexport interface WasmCapabilities {\n\t/** Runtime environment type */\n\truntime: RuntimeType;\n\t/** WebAssembly support available */\n\thasWasm: boolean;\n\t/** Streaming WebAssembly instantiation available */\n\thasWasmStreaming: boolean;\n\t/** File API available (browser) */\n\thasFileApi: boolean;\n\t/** Blob API available */\n\thasBlob: boolean;\n\t/** Worker support available */\n\thasWorkers: boolean;\n\t/** SharedArrayBuffer available (may be restricted) */\n\thasSharedArrayBuffer: boolean;\n\t/** Module Workers available */\n\thasModuleWorkers: boolean;\n\t/** BigInt support */\n\thasBigInt: boolean;\n\t/** Specific runtime version if available */\n\truntimeVersion?: string;\n}\n\n/**\n * Detect the current JavaScript runtime\n *\n * Checks for various global objects and properties to determine\n * which JavaScript runtime environment is currently executing.\n *\n * @returns The detected runtime type\n *\n * @example\n * ```typescript\n * import { detectRuntime } from '@kreuzberg/wasm/runtime';\n *\n * const runtime = detectRuntime();\n * switch (runtime) {\n * case 'browser':\n * console.log('Running in browser');\n * break;\n * case 'node':\n * console.log('Running in Node.js');\n * break;\n * case 'deno':\n * console.log('Running in Deno');\n * break;\n * case 'bun':\n * console.log('Running in Bun');\n * break;\n * }\n * ```\n */\nexport function detectRuntime(): RuntimeType {\n\t// Check for Deno\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Deno !== \"undefined\") {\n\t\treturn \"deno\";\n\t}\n\n\t// Check for Bun\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Bun !== \"undefined\") {\n\t\treturn \"bun\";\n\t}\n\n\t// Check for Node.js\n\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\treturn \"node\";\n\t}\n\n\t// Check for browser\n\tif (typeof window !== \"undefined\" && typeof document !== \"undefined\") {\n\t\treturn \"browser\";\n\t}\n\n\treturn \"unknown\";\n}\n\n/**\n * Check if running in a browser environment\n *\n * @returns True if running in a browser, false otherwise\n */\nexport function isBrowser(): boolean {\n\treturn detectRuntime() === \"browser\";\n}\n\n/**\n * Check if running in Node.js\n *\n * @returns True if running in Node.js, false otherwise\n */\nexport function isNode(): boolean {\n\treturn detectRuntime() === \"node\";\n}\n\n/**\n * Check if running in Deno\n *\n * @returns True if running in Deno, false otherwise\n */\nexport function isDeno(): boolean {\n\treturn detectRuntime() === \"deno\";\n}\n\n/**\n * Check if running in Bun\n *\n * @returns True if running in Bun, false otherwise\n */\nexport function isBun(): boolean {\n\treturn detectRuntime() === \"bun\";\n}\n\n/**\n * Check if running in a web environment (browser or similar)\n *\n * @returns True if running in a web browser, false otherwise\n */\nexport function isWebEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"browser\";\n}\n\n/**\n * Check if running in a server-like environment (Node.js, Deno, Bun)\n *\n * @returns True if running on a server runtime, false otherwise\n */\nexport function isServerEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"node\" || runtime === \"deno\" || runtime === \"bun\";\n}\n\n/**\n * Check if File API is available\n *\n * The File API is required for handling browser file uploads.\n *\n * @returns True if File API is available, false otherwise\n *\n * @example\n * ```typescript\n * if (hasFileApi()) {\n * const fileInput = document.getElementById('file');\n * fileInput.addEventListener('change', (e) => {\n * const file = e.target.files?.[0];\n * // Handle file\n * });\n * }\n * ```\n */\nexport function hasFileApi(): boolean {\n\treturn typeof window !== \"undefined\" && typeof File !== \"undefined\" && typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Blob API is available\n *\n * @returns True if Blob API is available, false otherwise\n */\nexport function hasBlob(): boolean {\n\treturn typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Web Workers are available\n *\n * @returns True if Web Workers can be created, false otherwise\n */\nexport function hasWorkers(): boolean {\n\treturn typeof Worker !== \"undefined\";\n}\n\n/**\n * Check if SharedArrayBuffer is available\n *\n * Note: SharedArrayBuffer is restricted in some browser contexts\n * due to security considerations (Spectre/Meltdown mitigations).\n *\n * @returns True if SharedArrayBuffer is available, false otherwise\n */\nexport function hasSharedArrayBuffer(): boolean {\n\treturn typeof SharedArrayBuffer !== \"undefined\";\n}\n\n/**\n * Check if module workers are available\n *\n * Module workers allow importing ES modules in worker threads.\n *\n * @returns True if module workers are supported, false otherwise\n */\nexport function hasModuleWorkers(): boolean {\n\tif (!hasWorkers()) {\n\t\treturn false;\n\t}\n\n\ttry {\n\t\t// Try to detect module worker support\n\t\tconst blob = new Blob(['console.log(\"test\")'], {\n\t\t\ttype: \"application/javascript\",\n\t\t});\n\t\tconst workerUrl = URL.createObjectURL(blob);\n\t\ttry {\n\t\t\t// Module workers require type: 'module' option\n\t\t\t// We can't actually instantiate without issues, so we check the API exists\n\t\t\treturn true;\n\t\t} finally {\n\t\t\tURL.revokeObjectURL(workerUrl);\n\t\t}\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Check if WebAssembly is available\n *\n * @returns True if WebAssembly is supported, false otherwise\n */\nexport function hasWasm(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiate !== undefined;\n}\n\n/**\n * Check if WebAssembly.instantiateStreaming is available\n *\n * Streaming instantiation is more efficient than buffering the entire WASM module.\n *\n * @returns True if streaming WebAssembly is supported, false otherwise\n */\nexport function hasWasmStreaming(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiateStreaming !== undefined;\n}\n\n/**\n * Check if BigInt is available\n *\n * @returns True if BigInt type is supported, false otherwise\n */\nexport function hasBigInt(): boolean {\n\ttry {\n\t\tconst test = BigInt(\"1\");\n\t\treturn typeof test === \"bigint\";\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Get runtime version information\n *\n * @returns Version string if available, undefined otherwise\n *\n * @example\n * ```typescript\n * const version = getRuntimeVersion();\n * console.log(`Running on Node ${version}`); // \"Running on Node 18.12.0\"\n * ```\n */\nexport function getRuntimeVersion(): string | undefined {\n\tconst runtime = detectRuntime();\n\n\tswitch (runtime) {\n\t\tcase \"node\":\n\t\t\treturn process.version?.substring(1); // Remove 'v' prefix\n\t\tcase \"deno\": {\n\t\t\tconst deno = (globalThis as unknown as Record<string, unknown>).Deno as Record<string, unknown> | undefined;\n\t\t\tconst version = deno?.version as Record<string, unknown> | undefined;\n\t\t\treturn version?.deno as string | undefined;\n\t\t}\n\t\tcase \"bun\": {\n\t\t\tconst bun = (globalThis as unknown as Record<string, unknown>).Bun as Record<string, unknown> | undefined;\n\t\t\treturn bun?.version as string | undefined;\n\t\t}\n\t\tdefault:\n\t\t\treturn undefined;\n\t}\n}\n\n/**\n * Get comprehensive WebAssembly capabilities for current runtime\n *\n * Returns detailed information about WASM and related APIs available\n * in the current runtime environment.\n *\n * @returns Object describing available WASM capabilities\n *\n * @example\n * ```typescript\n * import { getWasmCapabilities } from '@kreuzberg/wasm/runtime';\n *\n * const caps = getWasmCapabilities();\n * console.log(`WASM available: ${caps.hasWasm}`);\n * console.log(`Streaming WASM: ${caps.hasWasmStreaming}`);\n * console.log(`Workers available: ${caps.hasWorkers}`);\n *\n * if (caps.hasWasm && caps.hasWorkers) {\n * // Can offload WASM processing to workers\n * }\n * ```\n */\nexport function getWasmCapabilities(): WasmCapabilities {\n\tconst runtime = detectRuntime();\n\tconst version = getRuntimeVersion();\n\tconst capabilities: WasmCapabilities = {\n\t\truntime,\n\t\thasWasm: hasWasm(),\n\t\thasWasmStreaming: hasWasmStreaming(),\n\t\thasFileApi: hasFileApi(),\n\t\thasBlob: hasBlob(),\n\t\thasWorkers: hasWorkers(),\n\t\thasSharedArrayBuffer: hasSharedArrayBuffer(),\n\t\thasModuleWorkers: hasModuleWorkers(),\n\t\thasBigInt: hasBigInt(),\n\t\t...(version !== undefined ? { runtimeVersion: version } : {}),\n\t};\n\treturn capabilities;\n}\n\n/**\n * Get comprehensive runtime information\n *\n * Returns detailed information about the current runtime environment,\n * capabilities, and identifying information.\n *\n * @returns Object with runtime details and capabilities\n *\n * @example\n * ```typescript\n * const info = getRuntimeInfo();\n * console.log(info.runtime); // 'browser' | 'node' | 'deno' | 'bun'\n * console.log(info.isBrowser); // true/false\n * console.log(info.userAgent); // Browser user agent string\n * console.log(info.capabilities); // Detailed capability information\n * ```\n */\nexport function getRuntimeInfo() {\n\tconst runtime = detectRuntime();\n\tconst capabilities = getWasmCapabilities();\n\n\treturn {\n\t\truntime,\n\t\tisBrowser: isBrowser(),\n\t\tisNode: isNode(),\n\t\tisDeno: isDeno(),\n\t\tisBun: isBun(),\n\t\tisWeb: isWebEnvironment(),\n\t\tisServer: isServerEnvironment(),\n\t\truntimeVersion: getRuntimeVersion(),\n\t\tuserAgent: typeof navigator !== \"undefined\" ? navigator.userAgent : \"N/A\",\n\t\tcapabilities,\n\t};\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAwFO,SAAS,gBAA6B;AAE5C,MAAI,OAAQ,WAAkD,SAAS,aAAa;AACnF,WAAO;AAAA,EACR;AAGA,MAAI,OAAQ,WAAkD,QAAQ,aAAa;AAClF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,WAAW,eAAe,OAAO,aAAa,aAAa;AACrE,WAAO;AAAA,EACR;AAEA,SAAO;AACR;AAOO,SAAS,YAAqB;AACpC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,QAAiB;AAChC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,mBAA4B;AAC3C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY;AACpB;AAOO,SAAS,sBAA+B;AAC9C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY,UAAU,YAAY,UAAU,YAAY;AAChE;AAoBO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW,eAAe,OAAO,SAAS,eAAe,OAAO,SAAS;AACxF;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,SAAS;AACxB;AAOO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW;AAC1B;AAUO,SAAS,uBAAgC;AAC/C,SAAO,OAAO,sBAAsB;AACrC;AASO,SAAS,mBAA4B;AAC3C,MAAI,CAAC,WAAW,GAAG;AAClB,WAAO;AAAA,EACR;AAEA,MAAI;AAEH,UAAM,OAAO,IAAI,KAAK,CAAC,qBAAqB,GAAG;AAAA,MAC9C,MAAM;AAAA,IACP,CAAC;AACD,UAAM,YAAY,IAAI,gBAAgB,IAAI;AAC1C,QAAI;AAGH,aAAO;AAAA,IACR,UAAE;AACD,UAAI,gBAAgB,SAAS;AAAA,IAC9B;AAAA,EACD,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,gBAAgB,eAAe,YAAY,gBAAgB;AAC1E;AASO,SAAS,mBAA4B;AAC3C,SAAO,OAAO,gBAAgB,eAAe,YAAY,yBAAyB;AACnF;AAOO,SAAS,YAAqB;AACpC,MAAI;AACH,UAAM,OAAO,OAAO,GAAG;AACvB,WAAO,OAAO,SAAS;AAAA,EACxB,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAaO,SAAS,oBAAwC;AACvD,QAAM,UAAU,cAAc;AAE9B,UAAQ,SAAS;AAAA,IAChB,KAAK;AACJ,aAAO,QAAQ,SAAS,UAAU,CAAC;AAAA;AAAA,IACpC,KAAK,QAAQ;AACZ,YAAM,OAAQ,WAAkD;AAChE,YAAM,UAAU,MAAM;AACtB,aAAO,SAAS;AAAA,IACjB;AAAA,IACA,KAAK,OAAO;AACX,YAAM,MAAO,WAAkD;AAC/D,aAAO,KAAK;AAAA,IACb;AAAA,IACA;AACC,aAAO;AAAA,EACT;AACD;AAwBO,SAAS,sBAAwC;AACvD,QAAM,UAAU,cAAc;AAC9B,QAAM,UAAU,kBAAkB;AAClC,QAAM,eAAiC;AAAA,IACtC;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,kBAAkB,iBAAiB;AAAA,IACnC,YAAY,WAAW;AAAA,IACvB,SAAS,QAAQ;AAAA,IACjB,YAAY,WAAW;AAAA,IACvB,sBAAsB,qBAAqB;AAAA,IAC3C,kBAAkB,iBAAiB;AAAA,IACnC,WAAW,UAAU;AAAA,IACrB,GAAI,YAAY,SAAY,EAAE,gBAAgB,QAAQ,IAAI,CAAC;AAAA,EAC5D;AACA,SAAO;AACR;AAmBO,SAAS,iBAAiB;AAChC,QAAM,UAAU,cAAc;AAC9B,QAAM,eAAe,oBAAoB;AAEzC,SAAO;AAAA,IACN;AAAA,IACA,WAAW,UAAU;AAAA,IACrB,QAAQ,OAAO;AAAA,IACf,QAAQ,OAAO;AAAA,IACf,OAAO,MAAM;AAAA,IACb,OAAO,iBAAiB;AAAA,IACxB,UAAU,oBAAoB;AAAA,IAC9B,gBAAgB,kBAAkB;AAAA,IAClC,WAAW,OAAO,cAAc,cAAc,UAAU,YAAY;AAAA,IACpE;AAAA,EACD;AACD;","names":[]}
|
package/dist/runtime.mjs
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
function detectRuntime() {
|
|
2
|
+
if (typeof globalThis.Deno !== "undefined") {
|
|
3
|
+
return "deno";
|
|
4
|
+
}
|
|
5
|
+
if (typeof globalThis.Bun !== "undefined") {
|
|
6
|
+
return "bun";
|
|
7
|
+
}
|
|
8
|
+
if (typeof process !== "undefined" && process.versions && process.versions.node) {
|
|
9
|
+
return "node";
|
|
10
|
+
}
|
|
11
|
+
if (typeof window !== "undefined" && typeof document !== "undefined") {
|
|
12
|
+
return "browser";
|
|
13
|
+
}
|
|
14
|
+
return "unknown";
|
|
15
|
+
}
|
|
16
|
+
function isBrowser() {
|
|
17
|
+
return detectRuntime() === "browser";
|
|
18
|
+
}
|
|
19
|
+
function isNode() {
|
|
20
|
+
return detectRuntime() === "node";
|
|
21
|
+
}
|
|
22
|
+
function isDeno() {
|
|
23
|
+
return detectRuntime() === "deno";
|
|
24
|
+
}
|
|
25
|
+
function isBun() {
|
|
26
|
+
return detectRuntime() === "bun";
|
|
27
|
+
}
|
|
28
|
+
function isWebEnvironment() {
|
|
29
|
+
const runtime = detectRuntime();
|
|
30
|
+
return runtime === "browser";
|
|
31
|
+
}
|
|
32
|
+
function isServerEnvironment() {
|
|
33
|
+
const runtime = detectRuntime();
|
|
34
|
+
return runtime === "node" || runtime === "deno" || runtime === "bun";
|
|
35
|
+
}
|
|
36
|
+
function hasFileApi() {
|
|
37
|
+
return typeof window !== "undefined" && typeof File !== "undefined" && typeof Blob !== "undefined";
|
|
38
|
+
}
|
|
39
|
+
function hasBlob() {
|
|
40
|
+
return typeof Blob !== "undefined";
|
|
41
|
+
}
|
|
42
|
+
function hasWorkers() {
|
|
43
|
+
return typeof Worker !== "undefined";
|
|
44
|
+
}
|
|
45
|
+
function hasSharedArrayBuffer() {
|
|
46
|
+
return typeof SharedArrayBuffer !== "undefined";
|
|
47
|
+
}
|
|
48
|
+
function hasModuleWorkers() {
|
|
49
|
+
if (!hasWorkers()) {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
try {
|
|
53
|
+
const blob = new Blob(['console.log("test")'], {
|
|
54
|
+
type: "application/javascript"
|
|
55
|
+
});
|
|
56
|
+
const workerUrl = URL.createObjectURL(blob);
|
|
57
|
+
try {
|
|
58
|
+
return true;
|
|
59
|
+
} finally {
|
|
60
|
+
URL.revokeObjectURL(workerUrl);
|
|
61
|
+
}
|
|
62
|
+
} catch {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
function hasWasm() {
|
|
67
|
+
return typeof WebAssembly !== "undefined" && WebAssembly.instantiate !== void 0;
|
|
68
|
+
}
|
|
69
|
+
function hasWasmStreaming() {
|
|
70
|
+
return typeof WebAssembly !== "undefined" && WebAssembly.instantiateStreaming !== void 0;
|
|
71
|
+
}
|
|
72
|
+
function hasBigInt() {
|
|
73
|
+
try {
|
|
74
|
+
const test = BigInt("1");
|
|
75
|
+
return typeof test === "bigint";
|
|
76
|
+
} catch {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
function getRuntimeVersion() {
|
|
81
|
+
const runtime = detectRuntime();
|
|
82
|
+
switch (runtime) {
|
|
83
|
+
case "node":
|
|
84
|
+
return process.version?.substring(1);
|
|
85
|
+
// Remove 'v' prefix
|
|
86
|
+
case "deno": {
|
|
87
|
+
const deno = globalThis.Deno;
|
|
88
|
+
const version = deno?.version;
|
|
89
|
+
return version?.deno;
|
|
90
|
+
}
|
|
91
|
+
case "bun": {
|
|
92
|
+
const bun = globalThis.Bun;
|
|
93
|
+
return bun?.version;
|
|
94
|
+
}
|
|
95
|
+
default:
|
|
96
|
+
return void 0;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
function getWasmCapabilities() {
|
|
100
|
+
const runtime = detectRuntime();
|
|
101
|
+
const version = getRuntimeVersion();
|
|
102
|
+
const capabilities = {
|
|
103
|
+
runtime,
|
|
104
|
+
hasWasm: hasWasm(),
|
|
105
|
+
hasWasmStreaming: hasWasmStreaming(),
|
|
106
|
+
hasFileApi: hasFileApi(),
|
|
107
|
+
hasBlob: hasBlob(),
|
|
108
|
+
hasWorkers: hasWorkers(),
|
|
109
|
+
hasSharedArrayBuffer: hasSharedArrayBuffer(),
|
|
110
|
+
hasModuleWorkers: hasModuleWorkers(),
|
|
111
|
+
hasBigInt: hasBigInt(),
|
|
112
|
+
...version !== void 0 ? { runtimeVersion: version } : {}
|
|
113
|
+
};
|
|
114
|
+
return capabilities;
|
|
115
|
+
}
|
|
116
|
+
function getRuntimeInfo() {
|
|
117
|
+
const runtime = detectRuntime();
|
|
118
|
+
const capabilities = getWasmCapabilities();
|
|
119
|
+
return {
|
|
120
|
+
runtime,
|
|
121
|
+
isBrowser: isBrowser(),
|
|
122
|
+
isNode: isNode(),
|
|
123
|
+
isDeno: isDeno(),
|
|
124
|
+
isBun: isBun(),
|
|
125
|
+
isWeb: isWebEnvironment(),
|
|
126
|
+
isServer: isServerEnvironment(),
|
|
127
|
+
runtimeVersion: getRuntimeVersion(),
|
|
128
|
+
userAgent: typeof navigator !== "undefined" ? navigator.userAgent : "N/A",
|
|
129
|
+
capabilities
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
export {
|
|
133
|
+
detectRuntime,
|
|
134
|
+
getRuntimeInfo,
|
|
135
|
+
getRuntimeVersion,
|
|
136
|
+
getWasmCapabilities,
|
|
137
|
+
hasBigInt,
|
|
138
|
+
hasBlob,
|
|
139
|
+
hasFileApi,
|
|
140
|
+
hasModuleWorkers,
|
|
141
|
+
hasSharedArrayBuffer,
|
|
142
|
+
hasWasm,
|
|
143
|
+
hasWasmStreaming,
|
|
144
|
+
hasWorkers,
|
|
145
|
+
isBrowser,
|
|
146
|
+
isBun,
|
|
147
|
+
isDeno,
|
|
148
|
+
isNode,
|
|
149
|
+
isServerEnvironment,
|
|
150
|
+
isWebEnvironment
|
|
151
|
+
};
|
|
152
|
+
//# sourceMappingURL=runtime.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../typescript/runtime.ts"],"sourcesContent":["/**\n * Runtime detection and environment-specific utilities\n *\n * This module provides utilities for detecting the JavaScript runtime environment,\n * checking for feature availability, and enabling environment-specific WASM loading strategies.\n *\n * @example Basic Runtime Detection\n * ```typescript\n * import { detectRuntime, isBrowser, isNode } from '@kreuzberg/wasm/runtime';\n *\n * if (isBrowser()) {\n * console.log('Running in browser');\n * } else if (isNode()) {\n * console.log('Running in Node.js');\n * }\n * ```\n *\n * @example Feature Detection\n * ```typescript\n * import { hasFileApi, hasWorkers } from '@kreuzberg/wasm/runtime';\n *\n * if (hasFileApi()) {\n * // Can use File API for browser file uploads\n * }\n *\n * if (hasWorkers()) {\n * // Can use Web Workers for parallel processing\n * }\n * ```\n */\n\nexport type RuntimeType = \"browser\" | \"node\" | \"deno\" | \"bun\" | \"unknown\";\n\n/**\n * WebAssembly capabilities available in the runtime\n */\nexport interface WasmCapabilities {\n\t/** Runtime environment type */\n\truntime: RuntimeType;\n\t/** WebAssembly support available */\n\thasWasm: boolean;\n\t/** Streaming WebAssembly instantiation available */\n\thasWasmStreaming: boolean;\n\t/** File API available (browser) */\n\thasFileApi: boolean;\n\t/** Blob API available */\n\thasBlob: boolean;\n\t/** Worker support available */\n\thasWorkers: boolean;\n\t/** SharedArrayBuffer available (may be restricted) */\n\thasSharedArrayBuffer: boolean;\n\t/** Module Workers available */\n\thasModuleWorkers: boolean;\n\t/** BigInt support */\n\thasBigInt: boolean;\n\t/** Specific runtime version if available */\n\truntimeVersion?: string;\n}\n\n/**\n * Detect the current JavaScript runtime\n *\n * Checks for various global objects and properties to determine\n * which JavaScript runtime environment is currently executing.\n *\n * @returns The detected runtime type\n *\n * @example\n * ```typescript\n * import { detectRuntime } from '@kreuzberg/wasm/runtime';\n *\n * const runtime = detectRuntime();\n * switch (runtime) {\n * case 'browser':\n * console.log('Running in browser');\n * break;\n * case 'node':\n * console.log('Running in Node.js');\n * break;\n * case 'deno':\n * console.log('Running in Deno');\n * break;\n * case 'bun':\n * console.log('Running in Bun');\n * break;\n * }\n * ```\n */\nexport function detectRuntime(): RuntimeType {\n\t// Check for Deno\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Deno !== \"undefined\") {\n\t\treturn \"deno\";\n\t}\n\n\t// Check for Bun\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Bun !== \"undefined\") {\n\t\treturn \"bun\";\n\t}\n\n\t// Check for Node.js\n\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\treturn \"node\";\n\t}\n\n\t// Check for browser\n\tif (typeof window !== \"undefined\" && typeof document !== \"undefined\") {\n\t\treturn \"browser\";\n\t}\n\n\treturn \"unknown\";\n}\n\n/**\n * Check if running in a browser environment\n *\n * @returns True if running in a browser, false otherwise\n */\nexport function isBrowser(): boolean {\n\treturn detectRuntime() === \"browser\";\n}\n\n/**\n * Check if running in Node.js\n *\n * @returns True if running in Node.js, false otherwise\n */\nexport function isNode(): boolean {\n\treturn detectRuntime() === \"node\";\n}\n\n/**\n * Check if running in Deno\n *\n * @returns True if running in Deno, false otherwise\n */\nexport function isDeno(): boolean {\n\treturn detectRuntime() === \"deno\";\n}\n\n/**\n * Check if running in Bun\n *\n * @returns True if running in Bun, false otherwise\n */\nexport function isBun(): boolean {\n\treturn detectRuntime() === \"bun\";\n}\n\n/**\n * Check if running in a web environment (browser or similar)\n *\n * @returns True if running in a web browser, false otherwise\n */\nexport function isWebEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"browser\";\n}\n\n/**\n * Check if running in a server-like environment (Node.js, Deno, Bun)\n *\n * @returns True if running on a server runtime, false otherwise\n */\nexport function isServerEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"node\" || runtime === \"deno\" || runtime === \"bun\";\n}\n\n/**\n * Check if File API is available\n *\n * The File API is required for handling browser file uploads.\n *\n * @returns True if File API is available, false otherwise\n *\n * @example\n * ```typescript\n * if (hasFileApi()) {\n * const fileInput = document.getElementById('file');\n * fileInput.addEventListener('change', (e) => {\n * const file = e.target.files?.[0];\n * // Handle file\n * });\n * }\n * ```\n */\nexport function hasFileApi(): boolean {\n\treturn typeof window !== \"undefined\" && typeof File !== \"undefined\" && typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Blob API is available\n *\n * @returns True if Blob API is available, false otherwise\n */\nexport function hasBlob(): boolean {\n\treturn typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Web Workers are available\n *\n * @returns True if Web Workers can be created, false otherwise\n */\nexport function hasWorkers(): boolean {\n\treturn typeof Worker !== \"undefined\";\n}\n\n/**\n * Check if SharedArrayBuffer is available\n *\n * Note: SharedArrayBuffer is restricted in some browser contexts\n * due to security considerations (Spectre/Meltdown mitigations).\n *\n * @returns True if SharedArrayBuffer is available, false otherwise\n */\nexport function hasSharedArrayBuffer(): boolean {\n\treturn typeof SharedArrayBuffer !== \"undefined\";\n}\n\n/**\n * Check if module workers are available\n *\n * Module workers allow importing ES modules in worker threads.\n *\n * @returns True if module workers are supported, false otherwise\n */\nexport function hasModuleWorkers(): boolean {\n\tif (!hasWorkers()) {\n\t\treturn false;\n\t}\n\n\ttry {\n\t\t// Try to detect module worker support\n\t\tconst blob = new Blob(['console.log(\"test\")'], {\n\t\t\ttype: \"application/javascript\",\n\t\t});\n\t\tconst workerUrl = URL.createObjectURL(blob);\n\t\ttry {\n\t\t\t// Module workers require type: 'module' option\n\t\t\t// We can't actually instantiate without issues, so we check the API exists\n\t\t\treturn true;\n\t\t} finally {\n\t\t\tURL.revokeObjectURL(workerUrl);\n\t\t}\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Check if WebAssembly is available\n *\n * @returns True if WebAssembly is supported, false otherwise\n */\nexport function hasWasm(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiate !== undefined;\n}\n\n/**\n * Check if WebAssembly.instantiateStreaming is available\n *\n * Streaming instantiation is more efficient than buffering the entire WASM module.\n *\n * @returns True if streaming WebAssembly is supported, false otherwise\n */\nexport function hasWasmStreaming(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiateStreaming !== undefined;\n}\n\n/**\n * Check if BigInt is available\n *\n * @returns True if BigInt type is supported, false otherwise\n */\nexport function hasBigInt(): boolean {\n\ttry {\n\t\tconst test = BigInt(\"1\");\n\t\treturn typeof test === \"bigint\";\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Get runtime version information\n *\n * @returns Version string if available, undefined otherwise\n *\n * @example\n * ```typescript\n * const version = getRuntimeVersion();\n * console.log(`Running on Node ${version}`); // \"Running on Node 18.12.0\"\n * ```\n */\nexport function getRuntimeVersion(): string | undefined {\n\tconst runtime = detectRuntime();\n\n\tswitch (runtime) {\n\t\tcase \"node\":\n\t\t\treturn process.version?.substring(1); // Remove 'v' prefix\n\t\tcase \"deno\": {\n\t\t\tconst deno = (globalThis as unknown as Record<string, unknown>).Deno as Record<string, unknown> | undefined;\n\t\t\tconst version = deno?.version as Record<string, unknown> | undefined;\n\t\t\treturn version?.deno as string | undefined;\n\t\t}\n\t\tcase \"bun\": {\n\t\t\tconst bun = (globalThis as unknown as Record<string, unknown>).Bun as Record<string, unknown> | undefined;\n\t\t\treturn bun?.version as string | undefined;\n\t\t}\n\t\tdefault:\n\t\t\treturn undefined;\n\t}\n}\n\n/**\n * Get comprehensive WebAssembly capabilities for current runtime\n *\n * Returns detailed information about WASM and related APIs available\n * in the current runtime environment.\n *\n * @returns Object describing available WASM capabilities\n *\n * @example\n * ```typescript\n * import { getWasmCapabilities } from '@kreuzberg/wasm/runtime';\n *\n * const caps = getWasmCapabilities();\n * console.log(`WASM available: ${caps.hasWasm}`);\n * console.log(`Streaming WASM: ${caps.hasWasmStreaming}`);\n * console.log(`Workers available: ${caps.hasWorkers}`);\n *\n * if (caps.hasWasm && caps.hasWorkers) {\n * // Can offload WASM processing to workers\n * }\n * ```\n */\nexport function getWasmCapabilities(): WasmCapabilities {\n\tconst runtime = detectRuntime();\n\tconst version = getRuntimeVersion();\n\tconst capabilities: WasmCapabilities = {\n\t\truntime,\n\t\thasWasm: hasWasm(),\n\t\thasWasmStreaming: hasWasmStreaming(),\n\t\thasFileApi: hasFileApi(),\n\t\thasBlob: hasBlob(),\n\t\thasWorkers: hasWorkers(),\n\t\thasSharedArrayBuffer: hasSharedArrayBuffer(),\n\t\thasModuleWorkers: hasModuleWorkers(),\n\t\thasBigInt: hasBigInt(),\n\t\t...(version !== undefined ? { runtimeVersion: version } : {}),\n\t};\n\treturn capabilities;\n}\n\n/**\n * Get comprehensive runtime information\n *\n * Returns detailed information about the current runtime environment,\n * capabilities, and identifying information.\n *\n * @returns Object with runtime details and capabilities\n *\n * @example\n * ```typescript\n * const info = getRuntimeInfo();\n * console.log(info.runtime); // 'browser' | 'node' | 'deno' | 'bun'\n * console.log(info.isBrowser); // true/false\n * console.log(info.userAgent); // Browser user agent string\n * console.log(info.capabilities); // Detailed capability information\n * ```\n */\nexport function getRuntimeInfo() {\n\tconst runtime = detectRuntime();\n\tconst capabilities = getWasmCapabilities();\n\n\treturn {\n\t\truntime,\n\t\tisBrowser: isBrowser(),\n\t\tisNode: isNode(),\n\t\tisDeno: isDeno(),\n\t\tisBun: isBun(),\n\t\tisWeb: isWebEnvironment(),\n\t\tisServer: isServerEnvironment(),\n\t\truntimeVersion: getRuntimeVersion(),\n\t\tuserAgent: typeof navigator !== \"undefined\" ? navigator.userAgent : \"N/A\",\n\t\tcapabilities,\n\t};\n}\n"],"mappings":"AAwFO,SAAS,gBAA6B;AAE5C,MAAI,OAAQ,WAAkD,SAAS,aAAa;AACnF,WAAO;AAAA,EACR;AAGA,MAAI,OAAQ,WAAkD,QAAQ,aAAa;AAClF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,WAAW,eAAe,OAAO,aAAa,aAAa;AACrE,WAAO;AAAA,EACR;AAEA,SAAO;AACR;AAOO,SAAS,YAAqB;AACpC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,QAAiB;AAChC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,mBAA4B;AAC3C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY;AACpB;AAOO,SAAS,sBAA+B;AAC9C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY,UAAU,YAAY,UAAU,YAAY;AAChE;AAoBO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW,eAAe,OAAO,SAAS,eAAe,OAAO,SAAS;AACxF;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,SAAS;AACxB;AAOO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW;AAC1B;AAUO,SAAS,uBAAgC;AAC/C,SAAO,OAAO,sBAAsB;AACrC;AASO,SAAS,mBAA4B;AAC3C,MAAI,CAAC,WAAW,GAAG;AAClB,WAAO;AAAA,EACR;AAEA,MAAI;AAEH,UAAM,OAAO,IAAI,KAAK,CAAC,qBAAqB,GAAG;AAAA,MAC9C,MAAM;AAAA,IACP,CAAC;AACD,UAAM,YAAY,IAAI,gBAAgB,IAAI;AAC1C,QAAI;AAGH,aAAO;AAAA,IACR,UAAE;AACD,UAAI,gBAAgB,SAAS;AAAA,IAC9B;AAAA,EACD,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,gBAAgB,eAAe,YAAY,gBAAgB;AAC1E;AASO,SAAS,mBAA4B;AAC3C,SAAO,OAAO,gBAAgB,eAAe,YAAY,yBAAyB;AACnF;AAOO,SAAS,YAAqB;AACpC,MAAI;AACH,UAAM,OAAO,OAAO,GAAG;AACvB,WAAO,OAAO,SAAS;AAAA,EACxB,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAaO,SAAS,oBAAwC;AACvD,QAAM,UAAU,cAAc;AAE9B,UAAQ,SAAS;AAAA,IAChB,KAAK;AACJ,aAAO,QAAQ,SAAS,UAAU,CAAC;AAAA;AAAA,IACpC,KAAK,QAAQ;AACZ,YAAM,OAAQ,WAAkD;AAChE,YAAM,UAAU,MAAM;AACtB,aAAO,SAAS;AAAA,IACjB;AAAA,IACA,KAAK,OAAO;AACX,YAAM,MAAO,WAAkD;AAC/D,aAAO,KAAK;AAAA,IACb;AAAA,IACA;AACC,aAAO;AAAA,EACT;AACD;AAwBO,SAAS,sBAAwC;AACvD,QAAM,UAAU,cAAc;AAC9B,QAAM,UAAU,kBAAkB;AAClC,QAAM,eAAiC;AAAA,IACtC;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,kBAAkB,iBAAiB;AAAA,IACnC,YAAY,WAAW;AAAA,IACvB,SAAS,QAAQ;AAAA,IACjB,YAAY,WAAW;AAAA,IACvB,sBAAsB,qBAAqB;AAAA,IAC3C,kBAAkB,iBAAiB;AAAA,IACnC,WAAW,UAAU;AAAA,IACrB,GAAI,YAAY,SAAY,EAAE,gBAAgB,QAAQ,IAAI,CAAC;AAAA,EAC5D;AACA,SAAO;AACR;AAmBO,SAAS,iBAAiB;AAChC,QAAM,UAAU,cAAc;AAC9B,QAAM,eAAe,oBAAoB;AAEzC,SAAO;AAAA,IACN;AAAA,IACA,WAAW,UAAU;AAAA,IACrB,QAAQ,OAAO;AAAA,IACf,QAAQ,OAAO;AAAA,IACf,OAAO,MAAM;AAAA,IACb,OAAO,iBAAiB;AAAA,IACxB,UAAU,oBAAoB;AAAA,IAC9B,gBAAgB,kBAAkB;AAAA,IAClC,WAAW,OAAO,cAAc,cAAc,UAAU,YAAY;AAAA,IACpE;AAAA,EACD;AACD;","names":[]}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2022 Google Inc. All Rights Reserved.
|
|
3
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License.
|
|
5
|
+
* You may obtain a copy of the License at
|
|
6
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
* See the License for the specific language governing permissions and
|
|
11
|
+
* limitations under the License.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// Note: we use `wasm_bindgen_worker_`-prefixed message types to make sure
|
|
15
|
+
// we can handle bundling into other files, which might happen to have their
|
|
16
|
+
// own `postMessage`/`onmessage` communication channels.
|
|
17
|
+
//
|
|
18
|
+
// If we didn't take that into the account, we could send much simpler signals
|
|
19
|
+
// like just `0` or whatever, but the code would be less resilient.
|
|
20
|
+
|
|
21
|
+
function waitForMsgType(target, type) {
|
|
22
|
+
return new Promise(resolve => {
|
|
23
|
+
target.addEventListener('message', function onMsg({ data }) {
|
|
24
|
+
if (data?.type !== type) return;
|
|
25
|
+
target.removeEventListener('message', onMsg);
|
|
26
|
+
resolve(data);
|
|
27
|
+
});
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
waitForMsgType(self, 'wasm_bindgen_worker_init').then(async ({ init, receiver }) => {
|
|
32
|
+
// # Note 1
|
|
33
|
+
// Our JS should have been generated in
|
|
34
|
+
// `[out-dir]/snippets/wasm-bindgen-rayon-[hash]/workerHelpers.js`,
|
|
35
|
+
// resolve the main module via `../../..`.
|
|
36
|
+
//
|
|
37
|
+
// This might need updating if the generated structure changes on wasm-bindgen
|
|
38
|
+
// side ever in the future, but works well with bundlers today. The whole
|
|
39
|
+
// point of this crate, after all, is to abstract away unstable features
|
|
40
|
+
// and temporary bugs so that you don't need to deal with them in your code.
|
|
41
|
+
//
|
|
42
|
+
// # Note 2
|
|
43
|
+
// This could be a regular import, but then some bundlers complain about
|
|
44
|
+
// circular deps.
|
|
45
|
+
//
|
|
46
|
+
// Dynamic import could be cheap if this file was inlined into the parent,
|
|
47
|
+
// which would require us just using `../../..` in `new Worker` below,
|
|
48
|
+
// but that doesn't work because wasm-pack unconditionally adds
|
|
49
|
+
// "sideEffects":false (see below).
|
|
50
|
+
//
|
|
51
|
+
// OTOH, even though it can't be inlined, it should be still reasonably
|
|
52
|
+
// cheap since the requested file is already in cache (it was loaded by
|
|
53
|
+
// the main thread).
|
|
54
|
+
const pkg = await import('../../..');
|
|
55
|
+
await pkg.default(init);
|
|
56
|
+
postMessage({ type: 'wasm_bindgen_worker_ready' });
|
|
57
|
+
pkg.wbg_rayon_start_worker(receiver);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// Note: this is never used, but necessary to prevent a bug in Firefox
|
|
61
|
+
// (https://bugzilla.mozilla.org/show_bug.cgi?id=1702191) where it collects
|
|
62
|
+
// Web Workers that have a shared WebAssembly memory with the main thread,
|
|
63
|
+
// but are not explicitly rooted via a `Worker` instance.
|
|
64
|
+
//
|
|
65
|
+
// By storing them in a variable, we can keep `Worker` objects around and
|
|
66
|
+
// prevent them from getting GC-d.
|
|
67
|
+
let _workers;
|
|
68
|
+
|
|
69
|
+
export async function startWorkers(module, memory, builder) {
|
|
70
|
+
if (builder.numThreads() === 0) {
|
|
71
|
+
throw new Error(`num_threads must be > 0.`);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const workerInit = {
|
|
75
|
+
type: 'wasm_bindgen_worker_init',
|
|
76
|
+
init: { module_or_path: module, memory },
|
|
77
|
+
receiver: builder.receiver()
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
_workers = await Promise.all(
|
|
81
|
+
Array.from({ length: builder.numThreads() }, async () => {
|
|
82
|
+
// Self-spawn into a new Worker.
|
|
83
|
+
//
|
|
84
|
+
// TODO: while `new URL('...', import.meta.url) becomes a semi-standard
|
|
85
|
+
// way to get asset URLs relative to the module across various bundlers
|
|
86
|
+
// and browser, ideally we should switch to `import.meta.resolve`
|
|
87
|
+
// once it becomes a standard.
|
|
88
|
+
//
|
|
89
|
+
// Note: we could use `../../..` as the URL here to inline workerHelpers.js
|
|
90
|
+
// into the parent entry instead of creating another split point -
|
|
91
|
+
// this would be preferable from optimization perspective -
|
|
92
|
+
// however, Webpack then eliminates all message handler code
|
|
93
|
+
// because wasm-pack produces "sideEffects":false in package.json
|
|
94
|
+
// unconditionally.
|
|
95
|
+
//
|
|
96
|
+
// The only way to work around that is to have side effect code
|
|
97
|
+
// in an entry point such as Worker file itself.
|
|
98
|
+
const worker = new Worker(new URL('./workerHelpers.js', import.meta.url), {
|
|
99
|
+
type: 'module'
|
|
100
|
+
});
|
|
101
|
+
worker.postMessage(workerInit);
|
|
102
|
+
await waitForMsgType(worker, 'wasm_bindgen_worker_ready');
|
|
103
|
+
return worker;
|
|
104
|
+
})
|
|
105
|
+
);
|
|
106
|
+
builder.build();
|
|
107
|
+
}
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for Kreuzberg WASM bindings
|
|
3
|
+
*
|
|
4
|
+
* These types are generated from the Rust core library and define
|
|
5
|
+
* the interface for extraction, configuration, and results.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Configuration for document extraction
|
|
9
|
+
*/
|
|
10
|
+
interface ExtractionConfig {
|
|
11
|
+
/** OCR configuration */
|
|
12
|
+
ocr?: OcrConfig;
|
|
13
|
+
/** Chunking configuration */
|
|
14
|
+
chunking?: ChunkingConfig;
|
|
15
|
+
/** Image extraction configuration */
|
|
16
|
+
images?: ImageExtractionConfig;
|
|
17
|
+
/** Page extraction configuration */
|
|
18
|
+
pages?: PageExtractionConfig;
|
|
19
|
+
/** Language detection configuration */
|
|
20
|
+
languageDetection?: LanguageDetectionConfig;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* OCR configuration
|
|
24
|
+
*/
|
|
25
|
+
interface OcrConfig {
|
|
26
|
+
/** OCR backend to use */
|
|
27
|
+
backend?: string;
|
|
28
|
+
/** Language codes (ISO 639) */
|
|
29
|
+
languages?: string[];
|
|
30
|
+
/** Whether to perform OCR */
|
|
31
|
+
enabled?: boolean;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Chunking configuration
|
|
35
|
+
*/
|
|
36
|
+
interface ChunkingConfig {
|
|
37
|
+
/** Maximum characters per chunk */
|
|
38
|
+
maxChars?: number;
|
|
39
|
+
/** Overlap between chunks */
|
|
40
|
+
maxOverlap?: number;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Image extraction configuration
|
|
44
|
+
*/
|
|
45
|
+
interface ImageExtractionConfig {
|
|
46
|
+
/** Whether to extract images */
|
|
47
|
+
enabled?: boolean;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Page extraction configuration
|
|
51
|
+
*/
|
|
52
|
+
interface PageExtractionConfig {
|
|
53
|
+
/** Whether to extract per-page content */
|
|
54
|
+
enabled?: boolean;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Language detection configuration
|
|
58
|
+
*/
|
|
59
|
+
interface LanguageDetectionConfig {
|
|
60
|
+
/** Whether to detect languages */
|
|
61
|
+
enabled?: boolean;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Result of document extraction
|
|
65
|
+
*/
|
|
66
|
+
interface ExtractionResult {
|
|
67
|
+
/** Extracted text content */
|
|
68
|
+
content: string;
|
|
69
|
+
/** MIME type of the document */
|
|
70
|
+
mimeType: string;
|
|
71
|
+
/** Document metadata */
|
|
72
|
+
metadata: Metadata;
|
|
73
|
+
/** Extracted tables */
|
|
74
|
+
tables: Table[];
|
|
75
|
+
/** Detected languages (ISO 639 codes) */
|
|
76
|
+
detectedLanguages?: string[] | null;
|
|
77
|
+
/** Text chunks when chunking is enabled */
|
|
78
|
+
chunks?: Chunk[] | null;
|
|
79
|
+
/** Extracted images */
|
|
80
|
+
images?: ExtractedImage[] | null;
|
|
81
|
+
/** Per-page content */
|
|
82
|
+
pages?: PageContent[] | null;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Document metadata
|
|
86
|
+
*/
|
|
87
|
+
interface Metadata {
|
|
88
|
+
/** Document title */
|
|
89
|
+
title?: string;
|
|
90
|
+
/** Document subject or description */
|
|
91
|
+
subject?: string;
|
|
92
|
+
/** Document author(s) */
|
|
93
|
+
authors?: string[];
|
|
94
|
+
/** Keywords/tags */
|
|
95
|
+
keywords?: string[];
|
|
96
|
+
/** Primary language (ISO 639 code) */
|
|
97
|
+
language?: string;
|
|
98
|
+
/** Creation timestamp (ISO 8601 format) */
|
|
99
|
+
createdAt?: string;
|
|
100
|
+
/** Last modification timestamp (ISO 8601 format) */
|
|
101
|
+
modifiedAt?: string;
|
|
102
|
+
/** User who created the document */
|
|
103
|
+
creator?: string;
|
|
104
|
+
/** User who last modified the document */
|
|
105
|
+
lastModifiedBy?: string;
|
|
106
|
+
/** Number of pages/slides */
|
|
107
|
+
pageCount?: number;
|
|
108
|
+
/** Format-specific metadata */
|
|
109
|
+
formatMetadata?: unknown;
|
|
110
|
+
/** Custom additional fields */
|
|
111
|
+
additional?: Record<string, unknown>;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Extracted table
|
|
115
|
+
*/
|
|
116
|
+
interface Table {
|
|
117
|
+
/** Table cells/rows */
|
|
118
|
+
cells?: string[][];
|
|
119
|
+
/** Table markdown representation */
|
|
120
|
+
markdown?: string;
|
|
121
|
+
/** Page number if available */
|
|
122
|
+
pageNumber?: number;
|
|
123
|
+
/** Table headers */
|
|
124
|
+
headers?: string[];
|
|
125
|
+
/** Table rows */
|
|
126
|
+
rows?: string[][];
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Chunk metadata
|
|
130
|
+
*/
|
|
131
|
+
interface ChunkMetadata {
|
|
132
|
+
/** Character start position in original content */
|
|
133
|
+
charStart: number;
|
|
134
|
+
/** Character end position in original content */
|
|
135
|
+
charEnd: number;
|
|
136
|
+
/** Token count if available */
|
|
137
|
+
tokenCount: number | null;
|
|
138
|
+
/** Index of this chunk */
|
|
139
|
+
chunkIndex: number;
|
|
140
|
+
/** Total number of chunks */
|
|
141
|
+
totalChunks: number;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Text chunk from chunked content
|
|
145
|
+
*/
|
|
146
|
+
interface Chunk {
|
|
147
|
+
/** Chunk text content */
|
|
148
|
+
content: string;
|
|
149
|
+
/** Chunk metadata */
|
|
150
|
+
metadata?: ChunkMetadata;
|
|
151
|
+
/** Character position in original content (legacy) */
|
|
152
|
+
charIndex?: number;
|
|
153
|
+
/** Token count if available (legacy) */
|
|
154
|
+
tokenCount?: number;
|
|
155
|
+
/** Embedding vector if computed */
|
|
156
|
+
embedding?: number[] | null;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Extracted image from document
|
|
160
|
+
*/
|
|
161
|
+
interface ExtractedImage {
|
|
162
|
+
/** Image data as Uint8Array or base64 string */
|
|
163
|
+
data: Uint8Array | string;
|
|
164
|
+
/** Image format/MIME type */
|
|
165
|
+
format?: string;
|
|
166
|
+
/** MIME type of the image */
|
|
167
|
+
mimeType?: string;
|
|
168
|
+
/** Image index in document */
|
|
169
|
+
imageIndex?: number;
|
|
170
|
+
/** Page number if available */
|
|
171
|
+
pageNumber?: number | null;
|
|
172
|
+
/** Image width in pixels */
|
|
173
|
+
width?: number | null;
|
|
174
|
+
/** Image height in pixels */
|
|
175
|
+
height?: number | null;
|
|
176
|
+
/** Color space of the image */
|
|
177
|
+
colorspace?: string | null;
|
|
178
|
+
/** Bits per color component */
|
|
179
|
+
bitsPerComponent?: number | null;
|
|
180
|
+
/** Whether this is a mask image */
|
|
181
|
+
isMask?: boolean;
|
|
182
|
+
/** Image description */
|
|
183
|
+
description?: string | null;
|
|
184
|
+
/** Optional OCR result from the image */
|
|
185
|
+
ocrResult?: ExtractionResult | string | null;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Per-page content
|
|
189
|
+
*/
|
|
190
|
+
interface PageContent {
|
|
191
|
+
/** Page number (1-indexed) */
|
|
192
|
+
pageNumber: number;
|
|
193
|
+
/** Text content of the page */
|
|
194
|
+
content: string;
|
|
195
|
+
/** Tables on this page */
|
|
196
|
+
tables?: Table[];
|
|
197
|
+
/** Images on this page */
|
|
198
|
+
images?: ExtractedImage[];
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* OCR backend protocol/interface
|
|
202
|
+
*/
|
|
203
|
+
interface OcrBackendProtocol {
|
|
204
|
+
/** Get the backend name */
|
|
205
|
+
name(): string;
|
|
206
|
+
/** Get supported language codes */
|
|
207
|
+
supportedLanguages?(): string[];
|
|
208
|
+
/** Initialize the backend */
|
|
209
|
+
initialize(options?: Record<string, unknown>): void | Promise<void>;
|
|
210
|
+
/** Shutdown the backend */
|
|
211
|
+
shutdown?(): void | Promise<void>;
|
|
212
|
+
/** Process an image with OCR */
|
|
213
|
+
processImage(imageData: Uint8Array | string, language?: string): Promise<{
|
|
214
|
+
content: string;
|
|
215
|
+
mime_type: string;
|
|
216
|
+
metadata?: Record<string, unknown>;
|
|
217
|
+
tables?: unknown[];
|
|
218
|
+
} | string>;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
export type { Chunk as C, ExtractionConfig as E, ImageExtractionConfig as I, LanguageDetectionConfig as L, Metadata as M, OcrConfig as O, PageExtractionConfig as P, Table as T, ExtractionResult as a, ChunkingConfig as b, ExtractedImage as c, ChunkMetadata as d, PageContent as e, OcrBackendProtocol as f };
|