@kreuzberg/wasm 4.0.0-rc.21 → 4.0.0-rc.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +520 -837
- package/dist/adapters/wasm-adapter.cjs.map +1 -1
- package/dist/adapters/wasm-adapter.d.cts +1 -1
- package/dist/adapters/wasm-adapter.d.ts +1 -1
- package/dist/adapters/wasm-adapter.js.map +1 -1
- package/dist/index.cjs +192 -48
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +219 -3
- package/dist/index.d.ts +219 -3
- package/dist/index.js +199 -48
- package/dist/index.js.map +1 -1
- package/dist/ocr/registry.cjs.map +1 -1
- package/dist/ocr/registry.d.cts +1 -1
- package/dist/ocr/registry.d.ts +1 -1
- package/dist/ocr/registry.js.map +1 -1
- package/dist/ocr/tesseract-wasm-backend.cjs +0 -46
- package/dist/ocr/tesseract-wasm-backend.cjs.map +1 -1
- package/dist/ocr/tesseract-wasm-backend.d.cts +1 -1
- package/dist/ocr/tesseract-wasm-backend.d.ts +1 -1
- package/dist/ocr/tesseract-wasm-backend.js +0 -46
- package/dist/ocr/tesseract-wasm-backend.js.map +1 -1
- package/dist/pdfium.js +0 -5
- package/dist/runtime.cjs +0 -1
- package/dist/runtime.cjs.map +1 -1
- package/dist/runtime.js +0 -1
- package/dist/runtime.js.map +1 -1
- package/dist/{types-CKjcIYcX.d.cts → types-wVLLDHkl.d.cts} +73 -3
- package/dist/{types-CKjcIYcX.d.ts → types-wVLLDHkl.d.ts} +73 -3
- package/package.json +162 -162
package/dist/runtime.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../typescript/runtime.ts"],"sourcesContent":["/**\n * Runtime detection and environment-specific utilities\n *\n * This module provides utilities for detecting the JavaScript runtime environment,\n * checking for feature availability, and enabling environment-specific WASM loading strategies.\n *\n * @example Basic Runtime Detection\n * ```typescript\n * import { detectRuntime, isBrowser, isNode } from '@kreuzberg/wasm/runtime';\n *\n * if (isBrowser()) {\n * console.log('Running in browser');\n * } else if (isNode()) {\n * console.log('Running in Node.js');\n * }\n * ```\n *\n * @example Feature Detection\n * ```typescript\n * import { hasFileApi, hasWorkers } from '@kreuzberg/wasm/runtime';\n *\n * if (hasFileApi()) {\n * // Can use File API for browser file uploads\n * }\n *\n * if (hasWorkers()) {\n * // Can use Web Workers for parallel processing\n * }\n * ```\n */\n\nexport type RuntimeType = \"browser\" | \"node\" | \"deno\" | \"bun\" | \"unknown\";\n\n/**\n * WebAssembly capabilities available in the runtime\n */\nexport interface WasmCapabilities {\n\t/** Runtime environment type */\n\truntime: RuntimeType;\n\t/** WebAssembly support available */\n\thasWasm: boolean;\n\t/** Streaming WebAssembly instantiation available */\n\thasWasmStreaming: boolean;\n\t/** File API available (browser) */\n\thasFileApi: boolean;\n\t/** Blob API available */\n\thasBlob: boolean;\n\t/** Worker support available */\n\thasWorkers: boolean;\n\t/** SharedArrayBuffer available (may be restricted) */\n\thasSharedArrayBuffer: boolean;\n\t/** Module Workers available */\n\thasModuleWorkers: boolean;\n\t/** BigInt support */\n\thasBigInt: boolean;\n\t/** Specific runtime version if available */\n\truntimeVersion?: string;\n}\n\n/**\n * Detect the current JavaScript runtime\n *\n * Checks for various global objects and properties to determine\n * which JavaScript runtime environment is currently executing.\n *\n * @returns The detected runtime type\n *\n * @example\n * ```typescript\n * import { detectRuntime } from '@kreuzberg/wasm/runtime';\n *\n * const runtime = detectRuntime();\n * switch (runtime) {\n * case 'browser':\n * console.log('Running in browser');\n * break;\n * case 'node':\n * console.log('Running in Node.js');\n * break;\n * case 'deno':\n * console.log('Running in Deno');\n * break;\n * case 'bun':\n * console.log('Running in Bun');\n * break;\n * }\n * ```\n */\nexport function detectRuntime(): RuntimeType {\n\t// Check for Deno\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Deno !== \"undefined\") {\n\t\treturn \"deno\";\n\t}\n\n\t// Check for Bun\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Bun !== \"undefined\") {\n\t\treturn \"bun\";\n\t}\n\n\t// Check for Node.js\n\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\treturn \"node\";\n\t}\n\n\t// Check for browser\n\tif (typeof window !== \"undefined\" && typeof document !== \"undefined\") {\n\t\treturn \"browser\";\n\t}\n\n\treturn \"unknown\";\n}\n\n/**\n * Check if running in a browser environment\n *\n * @returns True if running in a browser, false otherwise\n */\nexport function isBrowser(): boolean {\n\treturn detectRuntime() === \"browser\";\n}\n\n/**\n * Check if running in Node.js\n *\n * @returns True if running in Node.js, false otherwise\n */\nexport function isNode(): boolean {\n\treturn detectRuntime() === \"node\";\n}\n\n/**\n * Check if running in Deno\n *\n * @returns True if running in Deno, false otherwise\n */\nexport function isDeno(): boolean {\n\treturn detectRuntime() === \"deno\";\n}\n\n/**\n * Check if running in Bun\n *\n * @returns True if running in Bun, false otherwise\n */\nexport function isBun(): boolean {\n\treturn detectRuntime() === \"bun\";\n}\n\n/**\n * Check if running in a web environment (browser or similar)\n *\n * @returns True if running in a web browser, false otherwise\n */\nexport function isWebEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"browser\";\n}\n\n/**\n * Check if running in a server-like environment (Node.js, Deno, Bun)\n *\n * @returns True if running on a server runtime, false otherwise\n */\nexport function isServerEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"node\" || runtime === \"deno\" || runtime === \"bun\";\n}\n\n/**\n * Check if File API is available\n *\n * The File API is required for handling browser file uploads.\n *\n * @returns True if File API is available, false otherwise\n *\n * @example\n * ```typescript\n * if (hasFileApi()) {\n * const fileInput = document.getElementById('file');\n * fileInput.addEventListener('change', (e) => {\n * const file = e.target.files?.[0];\n * // Handle file\n * });\n * }\n * ```\n */\nexport function hasFileApi(): boolean {\n\treturn typeof window !== \"undefined\" && typeof File !== \"undefined\" && typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Blob API is available\n *\n * @returns True if Blob API is available, false otherwise\n */\nexport function hasBlob(): boolean {\n\treturn typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Web Workers are available\n *\n * @returns True if Web Workers can be created, false otherwise\n */\nexport function hasWorkers(): boolean {\n\treturn typeof Worker !== \"undefined\";\n}\n\n/**\n * Check if SharedArrayBuffer is available\n *\n * Note: SharedArrayBuffer is restricted in some browser contexts\n * due to security considerations (Spectre/Meltdown mitigations).\n *\n * @returns True if SharedArrayBuffer is available, false otherwise\n */\nexport function hasSharedArrayBuffer(): boolean {\n\treturn typeof SharedArrayBuffer !== \"undefined\";\n}\n\n/**\n * Check if module workers are available\n *\n * Module workers allow importing ES modules in worker threads.\n *\n * @returns True if module workers are supported, false otherwise\n */\nexport function hasModuleWorkers(): boolean {\n\tif (!hasWorkers()) {\n\t\treturn false;\n\t}\n\n\ttry {\n\t\t// Try to detect module worker support\n\t\tconst blob = new Blob(['console.log(\"test\")'], {\n\t\t\ttype: \"application/javascript\",\n\t\t});\n\t\tconst workerUrl = URL.createObjectURL(blob);\n\t\ttry {\n\t\t\t// Module workers require type: 'module' option\n\t\t\t// We can't actually instantiate without issues, so we check the API exists\n\t\t\treturn true;\n\t\t} finally {\n\t\t\tURL.revokeObjectURL(workerUrl);\n\t\t}\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Check if WebAssembly is available\n *\n * @returns True if WebAssembly is supported, false otherwise\n */\nexport function hasWasm(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiate !== undefined;\n}\n\n/**\n * Check if WebAssembly.instantiateStreaming is available\n *\n * Streaming instantiation is more efficient than buffering the entire WASM module.\n *\n * @returns True if streaming WebAssembly is supported, false otherwise\n */\nexport function hasWasmStreaming(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiateStreaming !== undefined;\n}\n\n/**\n * Check if BigInt is available\n *\n * @returns True if BigInt type is supported, false otherwise\n */\nexport function hasBigInt(): boolean {\n\ttry {\n\t\tconst test = BigInt(\"1\");\n\t\treturn typeof test === \"bigint\";\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Get runtime version information\n *\n * @returns Version string if available, undefined otherwise\n *\n * @example\n * ```typescript\n * const version = getRuntimeVersion();\n * console.log(`Running on Node ${version}`); // \"Running on Node 18.12.0\"\n * ```\n */\nexport function getRuntimeVersion(): string | undefined {\n\tconst runtime = detectRuntime();\n\n\tswitch (runtime) {\n\t\tcase \"node\":\n\t\t\treturn process.version?.substring(1); // Remove 'v' prefix\n\t\tcase \"deno\": {\n\t\t\tconst deno = (globalThis as unknown as Record<string, unknown>).Deno as Record<string, unknown> | undefined;\n\t\t\tconst version = deno?.version as Record<string, unknown> | undefined;\n\t\t\treturn version?.deno as string | undefined;\n\t\t}\n\t\tcase \"bun\": {\n\t\t\tconst bun = (globalThis as unknown as Record<string, unknown>).Bun as Record<string, unknown> | undefined;\n\t\t\treturn bun?.version as string | undefined;\n\t\t}\n\t\tdefault:\n\t\t\treturn undefined;\n\t}\n}\n\n/**\n * Get comprehensive WebAssembly capabilities for current runtime\n *\n * Returns detailed information about WASM and related APIs available\n * in the current runtime environment.\n *\n * @returns Object describing available WASM capabilities\n *\n * @example\n * ```typescript\n * import { getWasmCapabilities } from '@kreuzberg/wasm/runtime';\n *\n * const caps = getWasmCapabilities();\n * console.log(`WASM available: ${caps.hasWasm}`);\n * console.log(`Streaming WASM: ${caps.hasWasmStreaming}`);\n * console.log(`Workers available: ${caps.hasWorkers}`);\n *\n * if (caps.hasWasm && caps.hasWorkers) {\n * // Can offload WASM processing to workers\n * }\n * ```\n */\nexport function getWasmCapabilities(): WasmCapabilities {\n\tconst runtime = detectRuntime();\n\tconst version = getRuntimeVersion();\n\tconst capabilities: WasmCapabilities = {\n\t\truntime,\n\t\thasWasm: hasWasm(),\n\t\thasWasmStreaming: hasWasmStreaming(),\n\t\thasFileApi: hasFileApi(),\n\t\thasBlob: hasBlob(),\n\t\thasWorkers: hasWorkers(),\n\t\thasSharedArrayBuffer: hasSharedArrayBuffer(),\n\t\thasModuleWorkers: hasModuleWorkers(),\n\t\thasBigInt: hasBigInt(),\n\t\t...(version !== undefined ? { runtimeVersion: version } : {}),\n\t};\n\treturn capabilities;\n}\n\n/**\n * Get comprehensive runtime information\n *\n * Returns detailed information about the current runtime environment,\n * capabilities, and identifying information.\n *\n * @returns Object with runtime details and capabilities\n *\n * @example\n * ```typescript\n * const info = getRuntimeInfo();\n * console.log(info.runtime); // 'browser' | 'node' | 'deno' | 'bun'\n * console.log(info.isBrowser); // true/false\n * console.log(info.userAgent); // Browser user agent string\n * console.log(info.capabilities); // Detailed capability information\n * ```\n */\nexport function getRuntimeInfo() {\n\tconst runtime = detectRuntime();\n\tconst capabilities = getWasmCapabilities();\n\n\treturn {\n\t\truntime,\n\t\tisBrowser: isBrowser(),\n\t\tisNode: isNode(),\n\t\tisDeno: isDeno(),\n\t\tisBun: isBun(),\n\t\tisWeb: isWebEnvironment(),\n\t\tisServer: isServerEnvironment(),\n\t\truntimeVersion: getRuntimeVersion(),\n\t\tuserAgent: typeof navigator !== \"undefined\" ? navigator.userAgent : \"N/A\",\n\t\tcapabilities,\n\t};\n}\n"],"mappings":";AAwFO,SAAS,gBAA6B;AAE5C,MAAI,OAAQ,WAAkD,SAAS,aAAa;AACnF,WAAO;AAAA,EACR;AAGA,MAAI,OAAQ,WAAkD,QAAQ,aAAa;AAClF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,WAAW,eAAe,OAAO,aAAa,aAAa;AACrE,WAAO;AAAA,EACR;AAEA,SAAO;AACR;AAOO,SAAS,YAAqB;AACpC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,QAAiB;AAChC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,mBAA4B;AAC3C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY;AACpB;AAOO,SAAS,sBAA+B;AAC9C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY,UAAU,YAAY,UAAU,YAAY;AAChE;AAoBO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW,eAAe,OAAO,SAAS,eAAe,OAAO,SAAS;AACxF;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,SAAS;AACxB;AAOO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW;AAC1B;AAUO,SAAS,uBAAgC;AAC/C,SAAO,OAAO,sBAAsB;AACrC;AASO,SAAS,mBAA4B;AAC3C,MAAI,CAAC,WAAW,GAAG;AAClB,WAAO;AAAA,EACR;AAEA,MAAI;AAEH,UAAM,OAAO,IAAI,KAAK,CAAC,qBAAqB,GAAG;AAAA,MAC9C,MAAM;AAAA,IACP,CAAC;AACD,UAAM,YAAY,IAAI,gBAAgB,IAAI;AAC1C,QAAI;AAGH,aAAO;AAAA,IACR,UAAE;AACD,UAAI,gBAAgB,SAAS;AAAA,IAC9B;AAAA,EACD,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,gBAAgB,eAAe,YAAY,gBAAgB;AAC1E;AASO,SAAS,mBAA4B;AAC3C,SAAO,OAAO,gBAAgB,eAAe,YAAY,yBAAyB;AACnF;AAOO,SAAS,YAAqB;AACpC,MAAI;AACH,UAAM,OAAO,OAAO,GAAG;AACvB,WAAO,OAAO,SAAS;AAAA,EACxB,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAaO,SAAS,oBAAwC;AACvD,QAAM,UAAU,cAAc;AAE9B,UAAQ,SAAS;AAAA,IAChB,KAAK;AACJ,aAAO,QAAQ,SAAS,UAAU,CAAC;AAAA;AAAA,IACpC,KAAK,QAAQ;AACZ,YAAM,OAAQ,WAAkD;AAChE,YAAM,UAAU,MAAM;AACtB,aAAO,SAAS;AAAA,IACjB;AAAA,IACA,KAAK,OAAO;AACX,YAAM,MAAO,WAAkD;AAC/D,aAAO,KAAK;AAAA,IACb;AAAA,IACA;AACC,aAAO;AAAA,EACT;AACD;AAwBO,SAAS,sBAAwC;AACvD,QAAM,UAAU,cAAc;AAC9B,QAAM,UAAU,kBAAkB;AAClC,QAAM,eAAiC;AAAA,IACtC;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,kBAAkB,iBAAiB;AAAA,IACnC,YAAY,WAAW;AAAA,IACvB,SAAS,QAAQ;AAAA,IACjB,YAAY,WAAW;AAAA,IACvB,sBAAsB,qBAAqB;AAAA,IAC3C,kBAAkB,iBAAiB;AAAA,IACnC,WAAW,UAAU;AAAA,IACrB,GAAI,YAAY,SAAY,EAAE,gBAAgB,QAAQ,IAAI,CAAC;AAAA,EAC5D;AACA,SAAO;AACR;AAmBO,SAAS,iBAAiB;AAChC,QAAM,UAAU,cAAc;AAC9B,QAAM,eAAe,oBAAoB;AAEzC,SAAO;AAAA,IACN;AAAA,IACA,WAAW,UAAU;AAAA,IACrB,QAAQ,OAAO;AAAA,IACf,QAAQ,OAAO;AAAA,IACf,OAAO,MAAM;AAAA,IACb,OAAO,iBAAiB;AAAA,IACxB,UAAU,oBAAoB;AAAA,IAC9B,gBAAgB,kBAAkB;AAAA,IAClC,WAAW,OAAO,cAAc,cAAc,UAAU,YAAY;AAAA,IACpE;AAAA,EACD;AACD;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../typescript/runtime.ts"],"sourcesContent":["/**\n * Runtime detection and environment-specific utilities\n *\n * This module provides utilities for detecting the JavaScript runtime environment,\n * checking for feature availability, and enabling environment-specific WASM loading strategies.\n *\n * @example Basic Runtime Detection\n * ```typescript\n * import { detectRuntime, isBrowser, isNode } from '@kreuzberg/wasm/runtime';\n *\n * if (isBrowser()) {\n * console.log('Running in browser');\n * } else if (isNode()) {\n * console.log('Running in Node.js');\n * }\n * ```\n *\n * @example Feature Detection\n * ```typescript\n * import { hasFileApi, hasWorkers } from '@kreuzberg/wasm/runtime';\n *\n * if (hasFileApi()) {\n * // Can use File API for browser file uploads\n * }\n *\n * if (hasWorkers()) {\n * // Can use Web Workers for parallel processing\n * }\n * ```\n */\n\nexport type RuntimeType = \"browser\" | \"node\" | \"deno\" | \"bun\" | \"unknown\";\n\n/**\n * WebAssembly capabilities available in the runtime\n */\nexport interface WasmCapabilities {\n\t/** Runtime environment type */\n\truntime: RuntimeType;\n\t/** WebAssembly support available */\n\thasWasm: boolean;\n\t/** Streaming WebAssembly instantiation available */\n\thasWasmStreaming: boolean;\n\t/** File API available (browser) */\n\thasFileApi: boolean;\n\t/** Blob API available */\n\thasBlob: boolean;\n\t/** Worker support available */\n\thasWorkers: boolean;\n\t/** SharedArrayBuffer available (may be restricted) */\n\thasSharedArrayBuffer: boolean;\n\t/** Module Workers available */\n\thasModuleWorkers: boolean;\n\t/** BigInt support */\n\thasBigInt: boolean;\n\t/** Specific runtime version if available */\n\truntimeVersion?: string;\n}\n\n/**\n * Detect the current JavaScript runtime\n *\n * Checks for various global objects and properties to determine\n * which JavaScript runtime environment is currently executing.\n *\n * @returns The detected runtime type\n *\n * @example\n * ```typescript\n * import { detectRuntime } from '@kreuzberg/wasm/runtime';\n *\n * const runtime = detectRuntime();\n * switch (runtime) {\n * case 'browser':\n * console.log('Running in browser');\n * break;\n * case 'node':\n * console.log('Running in Node.js');\n * break;\n * case 'deno':\n * console.log('Running in Deno');\n * break;\n * case 'bun':\n * console.log('Running in Bun');\n * break;\n * }\n * ```\n */\nexport function detectRuntime(): RuntimeType {\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Deno !== \"undefined\") {\n\t\treturn \"deno\";\n\t}\n\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Bun !== \"undefined\") {\n\t\treturn \"bun\";\n\t}\n\n\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\treturn \"node\";\n\t}\n\n\tif (typeof window !== \"undefined\" && typeof document !== \"undefined\") {\n\t\treturn \"browser\";\n\t}\n\n\treturn \"unknown\";\n}\n\n/**\n * Check if running in a browser environment\n *\n * @returns True if running in a browser, false otherwise\n */\nexport function isBrowser(): boolean {\n\treturn detectRuntime() === \"browser\";\n}\n\n/**\n * Check if running in Node.js\n *\n * @returns True if running in Node.js, false otherwise\n */\nexport function isNode(): boolean {\n\treturn detectRuntime() === \"node\";\n}\n\n/**\n * Check if running in Deno\n *\n * @returns True if running in Deno, false otherwise\n */\nexport function isDeno(): boolean {\n\treturn detectRuntime() === \"deno\";\n}\n\n/**\n * Check if running in Bun\n *\n * @returns True if running in Bun, false otherwise\n */\nexport function isBun(): boolean {\n\treturn detectRuntime() === \"bun\";\n}\n\n/**\n * Check if running in a web environment (browser or similar)\n *\n * @returns True if running in a web browser, false otherwise\n */\nexport function isWebEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"browser\";\n}\n\n/**\n * Check if running in a server-like environment (Node.js, Deno, Bun)\n *\n * @returns True if running on a server runtime, false otherwise\n */\nexport function isServerEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"node\" || runtime === \"deno\" || runtime === \"bun\";\n}\n\n/**\n * Check if File API is available\n *\n * The File API is required for handling browser file uploads.\n *\n * @returns True if File API is available, false otherwise\n *\n * @example\n * ```typescript\n * if (hasFileApi()) {\n * const fileInput = document.getElementById('file');\n * fileInput.addEventListener('change', (e) => {\n * const file = e.target.files?.[0];\n * // Handle file\n * });\n * }\n * ```\n */\nexport function hasFileApi(): boolean {\n\treturn typeof window !== \"undefined\" && typeof File !== \"undefined\" && typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Blob API is available\n *\n * @returns True if Blob API is available, false otherwise\n */\nexport function hasBlob(): boolean {\n\treturn typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Web Workers are available\n *\n * @returns True if Web Workers can be created, false otherwise\n */\nexport function hasWorkers(): boolean {\n\treturn typeof Worker !== \"undefined\";\n}\n\n/**\n * Check if SharedArrayBuffer is available\n *\n * Note: SharedArrayBuffer is restricted in some browser contexts\n * due to security considerations (Spectre/Meltdown mitigations).\n *\n * @returns True if SharedArrayBuffer is available, false otherwise\n */\nexport function hasSharedArrayBuffer(): boolean {\n\treturn typeof SharedArrayBuffer !== \"undefined\";\n}\n\n/**\n * Check if module workers are available\n *\n * Module workers allow importing ES modules in worker threads.\n *\n * @returns True if module workers are supported, false otherwise\n */\nexport function hasModuleWorkers(): boolean {\n\tif (!hasWorkers()) {\n\t\treturn false;\n\t}\n\n\ttry {\n\t\tconst blob = new Blob(['console.log(\"test\")'], {\n\t\t\ttype: \"application/javascript\",\n\t\t});\n\t\tconst workerUrl = URL.createObjectURL(blob);\n\t\ttry {\n\t\t\treturn true;\n\t\t} finally {\n\t\t\tURL.revokeObjectURL(workerUrl);\n\t\t}\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Check if WebAssembly is available\n *\n * @returns True if WebAssembly is supported, false otherwise\n */\nexport function hasWasm(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiate !== undefined;\n}\n\n/**\n * Check if WebAssembly.instantiateStreaming is available\n *\n * Streaming instantiation is more efficient than buffering the entire WASM module.\n *\n * @returns True if streaming WebAssembly is supported, false otherwise\n */\nexport function hasWasmStreaming(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiateStreaming !== undefined;\n}\n\n/**\n * Check if BigInt is available\n *\n * @returns True if BigInt type is supported, false otherwise\n */\nexport function hasBigInt(): boolean {\n\ttry {\n\t\tconst test = BigInt(\"1\");\n\t\treturn typeof test === \"bigint\";\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Get runtime version information\n *\n * @returns Version string if available, undefined otherwise\n *\n * @example\n * ```typescript\n * const version = getRuntimeVersion();\n * console.log(`Running on Node ${version}`); // \"Running on Node 18.12.0\"\n * ```\n */\nexport function getRuntimeVersion(): string | undefined {\n\tconst runtime = detectRuntime();\n\n\tswitch (runtime) {\n\t\tcase \"node\":\n\t\t\treturn process.version?.substring(1);\n\t\tcase \"deno\": {\n\t\t\tconst deno = (globalThis as unknown as Record<string, unknown>).Deno as Record<string, unknown> | undefined;\n\t\t\tconst version = deno?.version as Record<string, unknown> | undefined;\n\t\t\treturn version?.deno as string | undefined;\n\t\t}\n\t\tcase \"bun\": {\n\t\t\tconst bun = (globalThis as unknown as Record<string, unknown>).Bun as Record<string, unknown> | undefined;\n\t\t\treturn bun?.version as string | undefined;\n\t\t}\n\t\tdefault:\n\t\t\treturn undefined;\n\t}\n}\n\n/**\n * Get comprehensive WebAssembly capabilities for current runtime\n *\n * Returns detailed information about WASM and related APIs available\n * in the current runtime environment.\n *\n * @returns Object describing available WASM capabilities\n *\n * @example\n * ```typescript\n * import { getWasmCapabilities } from '@kreuzberg/wasm/runtime';\n *\n * const caps = getWasmCapabilities();\n * console.log(`WASM available: ${caps.hasWasm}`);\n * console.log(`Streaming WASM: ${caps.hasWasmStreaming}`);\n * console.log(`Workers available: ${caps.hasWorkers}`);\n *\n * if (caps.hasWasm && caps.hasWorkers) {\n * // Can offload WASM processing to workers\n * }\n * ```\n */\nexport function getWasmCapabilities(): WasmCapabilities {\n\tconst runtime = detectRuntime();\n\tconst version = getRuntimeVersion();\n\tconst capabilities: WasmCapabilities = {\n\t\truntime,\n\t\thasWasm: hasWasm(),\n\t\thasWasmStreaming: hasWasmStreaming(),\n\t\thasFileApi: hasFileApi(),\n\t\thasBlob: hasBlob(),\n\t\thasWorkers: hasWorkers(),\n\t\thasSharedArrayBuffer: hasSharedArrayBuffer(),\n\t\thasModuleWorkers: hasModuleWorkers(),\n\t\thasBigInt: hasBigInt(),\n\t\t...(version !== undefined ? { runtimeVersion: version } : {}),\n\t};\n\treturn capabilities;\n}\n\n/**\n * Get comprehensive runtime information\n *\n * Returns detailed information about the current runtime environment,\n * capabilities, and identifying information.\n *\n * @returns Object with runtime details and capabilities\n *\n * @example\n * ```typescript\n * const info = getRuntimeInfo();\n * console.log(info.runtime); // 'browser' | 'node' | 'deno' | 'bun'\n * console.log(info.isBrowser); // true/false\n * console.log(info.userAgent); // Browser user agent string\n * console.log(info.capabilities); // Detailed capability information\n * ```\n */\nexport function getRuntimeInfo() {\n\tconst runtime = detectRuntime();\n\tconst capabilities = getWasmCapabilities();\n\n\treturn {\n\t\truntime,\n\t\tisBrowser: isBrowser(),\n\t\tisNode: isNode(),\n\t\tisDeno: isDeno(),\n\t\tisBun: isBun(),\n\t\tisWeb: isWebEnvironment(),\n\t\tisServer: isServerEnvironment(),\n\t\truntimeVersion: getRuntimeVersion(),\n\t\tuserAgent: typeof navigator !== \"undefined\" ? navigator.userAgent : \"N/A\",\n\t\tcapabilities,\n\t};\n}\n"],"mappings":";AAwFO,SAAS,gBAA6B;AAC5C,MAAI,OAAQ,WAAkD,SAAS,aAAa;AACnF,WAAO;AAAA,EACR;AAEA,MAAI,OAAQ,WAAkD,QAAQ,aAAa;AAClF,WAAO;AAAA,EACR;AAEA,MAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,WAAO;AAAA,EACR;AAEA,MAAI,OAAO,WAAW,eAAe,OAAO,aAAa,aAAa;AACrE,WAAO;AAAA,EACR;AAEA,SAAO;AACR;AAOO,SAAS,YAAqB;AACpC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,QAAiB;AAChC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,mBAA4B;AAC3C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY;AACpB;AAOO,SAAS,sBAA+B;AAC9C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY,UAAU,YAAY,UAAU,YAAY;AAChE;AAoBO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW,eAAe,OAAO,SAAS,eAAe,OAAO,SAAS;AACxF;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,SAAS;AACxB;AAOO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW;AAC1B;AAUO,SAAS,uBAAgC;AAC/C,SAAO,OAAO,sBAAsB;AACrC;AASO,SAAS,mBAA4B;AAC3C,MAAI,CAAC,WAAW,GAAG;AAClB,WAAO;AAAA,EACR;AAEA,MAAI;AACH,UAAM,OAAO,IAAI,KAAK,CAAC,qBAAqB,GAAG;AAAA,MAC9C,MAAM;AAAA,IACP,CAAC;AACD,UAAM,YAAY,IAAI,gBAAgB,IAAI;AAC1C,QAAI;AACH,aAAO;AAAA,IACR,UAAE;AACD,UAAI,gBAAgB,SAAS;AAAA,IAC9B;AAAA,EACD,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,gBAAgB,eAAe,YAAY,gBAAgB;AAC1E;AASO,SAAS,mBAA4B;AAC3C,SAAO,OAAO,gBAAgB,eAAe,YAAY,yBAAyB;AACnF;AAOO,SAAS,YAAqB;AACpC,MAAI;AACH,UAAM,OAAO,OAAO,GAAG;AACvB,WAAO,OAAO,SAAS;AAAA,EACxB,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAaO,SAAS,oBAAwC;AACvD,QAAM,UAAU,cAAc;AAE9B,UAAQ,SAAS;AAAA,IAChB,KAAK;AACJ,aAAO,QAAQ,SAAS,UAAU,CAAC;AAAA,IACpC,KAAK,QAAQ;AACZ,YAAM,OAAQ,WAAkD;AAChE,YAAM,UAAU,MAAM;AACtB,aAAO,SAAS;AAAA,IACjB;AAAA,IACA,KAAK,OAAO;AACX,YAAM,MAAO,WAAkD;AAC/D,aAAO,KAAK;AAAA,IACb;AAAA,IACA;AACC,aAAO;AAAA,EACT;AACD;AAwBO,SAAS,sBAAwC;AACvD,QAAM,UAAU,cAAc;AAC9B,QAAM,UAAU,kBAAkB;AAClC,QAAM,eAAiC;AAAA,IACtC;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,kBAAkB,iBAAiB;AAAA,IACnC,YAAY,WAAW;AAAA,IACvB,SAAS,QAAQ;AAAA,IACjB,YAAY,WAAW;AAAA,IACvB,sBAAsB,qBAAqB;AAAA,IAC3C,kBAAkB,iBAAiB;AAAA,IACnC,WAAW,UAAU;AAAA,IACrB,GAAI,YAAY,SAAY,EAAE,gBAAgB,QAAQ,IAAI,CAAC;AAAA,EAC5D;AACA,SAAO;AACR;AAmBO,SAAS,iBAAiB;AAChC,QAAM,UAAU,cAAc;AAC9B,QAAM,eAAe,oBAAoB;AAEzC,SAAO;AAAA,IACN;AAAA,IACA,WAAW,UAAU;AAAA,IACrB,QAAQ,OAAO;AAAA,IACf,QAAQ,OAAO;AAAA,IACf,OAAO,MAAM;AAAA,IACb,OAAO,iBAAiB;AAAA,IACxB,UAAU,oBAAoB;AAAA,IAC9B,gBAAgB,kBAAkB;AAAA,IAClC,WAAW,OAAO,cAAc,cAAc,UAAU,YAAY;AAAA,IACpE;AAAA,EACD;AACD;","names":[]}
|
|
@@ -24,6 +24,68 @@ interface PostProcessorConfig {
|
|
|
24
24
|
/** List of disabled processors */
|
|
25
25
|
disabledProcessors?: string[];
|
|
26
26
|
}
|
|
27
|
+
/**
|
|
28
|
+
* Keyword extraction algorithm type
|
|
29
|
+
*
|
|
30
|
+
* Supported algorithms:
|
|
31
|
+
* - "yake": YAKE (Yet Another Keyword Extractor) - statistical approach
|
|
32
|
+
* - "rake": RAKE (Rapid Automatic Keyword Extraction) - co-occurrence based
|
|
33
|
+
*/
|
|
34
|
+
type KeywordAlgorithm = "yake" | "rake";
|
|
35
|
+
/**
|
|
36
|
+
* YAKE algorithm-specific parameters
|
|
37
|
+
*/
|
|
38
|
+
interface YakeParams {
|
|
39
|
+
/** Window size for co-occurrence analysis (default: 2) */
|
|
40
|
+
windowSize?: number;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* RAKE algorithm-specific parameters
|
|
44
|
+
*/
|
|
45
|
+
interface RakeParams {
|
|
46
|
+
/** Minimum word length to consider (default: 1) */
|
|
47
|
+
minWordLength?: number;
|
|
48
|
+
/** Maximum words in a keyword phrase (default: 3) */
|
|
49
|
+
maxWordsPerPhrase?: number;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Keyword extraction configuration
|
|
53
|
+
*
|
|
54
|
+
* Controls how keywords are extracted from text, including algorithm selection,
|
|
55
|
+
* scoring thresholds, n-gram ranges, and language-specific settings.
|
|
56
|
+
*/
|
|
57
|
+
interface KeywordConfig {
|
|
58
|
+
/** Algorithm to use for extraction (default: "yake") */
|
|
59
|
+
algorithm?: KeywordAlgorithm;
|
|
60
|
+
/** Maximum number of keywords to extract (default: 10) */
|
|
61
|
+
maxKeywords?: number;
|
|
62
|
+
/** Minimum score threshold 0.0-1.0 (default: 0.0) */
|
|
63
|
+
minScore?: number;
|
|
64
|
+
/** N-gram range [min, max] for keyword extraction (default: [1, 3]) */
|
|
65
|
+
ngramRange?: [number, number];
|
|
66
|
+
/** Language code for stopword filtering (e.g., "en", "de", "fr") */
|
|
67
|
+
language?: string;
|
|
68
|
+
/** YAKE-specific tuning parameters */
|
|
69
|
+
yakeParams?: YakeParams;
|
|
70
|
+
/** RAKE-specific tuning parameters */
|
|
71
|
+
rakeParams?: RakeParams;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Extracted keyword with relevance metadata
|
|
75
|
+
*
|
|
76
|
+
* Represents a single keyword extracted from text along with its relevance score,
|
|
77
|
+
* the algorithm that extracted it, and optional position information.
|
|
78
|
+
*/
|
|
79
|
+
interface ExtractedKeyword {
|
|
80
|
+
/** The keyword text */
|
|
81
|
+
text: string;
|
|
82
|
+
/** Relevance score (higher is better, algorithm-specific range) */
|
|
83
|
+
score: number;
|
|
84
|
+
/** Algorithm that extracted this keyword */
|
|
85
|
+
algorithm: KeywordAlgorithm;
|
|
86
|
+
/** Optional positions where keyword appears in text (character offsets) */
|
|
87
|
+
positions?: number[];
|
|
88
|
+
}
|
|
27
89
|
/**
|
|
28
90
|
* Configuration for document extraction
|
|
29
91
|
*/
|
|
@@ -44,6 +106,8 @@ interface ExtractionConfig {
|
|
|
44
106
|
tokenReduction?: TokenReductionConfig;
|
|
45
107
|
/** Post-processor configuration */
|
|
46
108
|
postprocessor?: PostProcessorConfig;
|
|
109
|
+
/** Keyword extraction configuration */
|
|
110
|
+
keywords?: KeywordConfig;
|
|
47
111
|
/** Whether to use caching */
|
|
48
112
|
useCache?: boolean;
|
|
49
113
|
/** Enable quality processing */
|
|
@@ -120,8 +184,12 @@ interface PdfConfig {
|
|
|
120
184
|
* Page extraction configuration
|
|
121
185
|
*/
|
|
122
186
|
interface PageExtractionConfig {
|
|
123
|
-
/**
|
|
124
|
-
|
|
187
|
+
/** Extract pages as separate array (ExtractionResult.pages) */
|
|
188
|
+
extractPages?: boolean;
|
|
189
|
+
/** Insert page markers in main content string */
|
|
190
|
+
insertPageMarkers?: boolean;
|
|
191
|
+
/** Page marker format (use {page_num} placeholder) */
|
|
192
|
+
markerFormat?: string;
|
|
125
193
|
}
|
|
126
194
|
/**
|
|
127
195
|
* Language detection configuration
|
|
@@ -150,6 +218,8 @@ interface ExtractionResult {
|
|
|
150
218
|
images?: ExtractedImage[] | null;
|
|
151
219
|
/** Per-page content */
|
|
152
220
|
pages?: PageContent[] | null;
|
|
221
|
+
/** Extracted keywords when keyword extraction is enabled */
|
|
222
|
+
keywords?: ExtractedKeyword[] | null;
|
|
153
223
|
}
|
|
154
224
|
/**
|
|
155
225
|
* Document metadata
|
|
@@ -291,4 +361,4 @@ interface OcrBackendProtocol {
|
|
|
291
361
|
} | string>;
|
|
292
362
|
}
|
|
293
363
|
|
|
294
|
-
export type { Chunk as C,
|
|
364
|
+
export type { Chunk as C, ExtractionResult as E, ImageExtractionConfig as I, KeywordAlgorithm as K, LanguageDetectionConfig as L, Metadata as M, OcrBackendProtocol as O, PageContent as P, RakeParams as R, Table as T, YakeParams as Y, ExtractionConfig as a, ChunkingConfig as b, ChunkMetadata as c, ExtractedImage as d, OcrConfig as e, PageExtractionConfig as f, PdfConfig as g, PostProcessorConfig as h, TesseractConfig as i, TokenReductionConfig as j, KeywordConfig as k, ExtractedKeyword as l };
|
|
@@ -24,6 +24,68 @@ interface PostProcessorConfig {
|
|
|
24
24
|
/** List of disabled processors */
|
|
25
25
|
disabledProcessors?: string[];
|
|
26
26
|
}
|
|
27
|
+
/**
|
|
28
|
+
* Keyword extraction algorithm type
|
|
29
|
+
*
|
|
30
|
+
* Supported algorithms:
|
|
31
|
+
* - "yake": YAKE (Yet Another Keyword Extractor) - statistical approach
|
|
32
|
+
* - "rake": RAKE (Rapid Automatic Keyword Extraction) - co-occurrence based
|
|
33
|
+
*/
|
|
34
|
+
type KeywordAlgorithm = "yake" | "rake";
|
|
35
|
+
/**
|
|
36
|
+
* YAKE algorithm-specific parameters
|
|
37
|
+
*/
|
|
38
|
+
interface YakeParams {
|
|
39
|
+
/** Window size for co-occurrence analysis (default: 2) */
|
|
40
|
+
windowSize?: number;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* RAKE algorithm-specific parameters
|
|
44
|
+
*/
|
|
45
|
+
interface RakeParams {
|
|
46
|
+
/** Minimum word length to consider (default: 1) */
|
|
47
|
+
minWordLength?: number;
|
|
48
|
+
/** Maximum words in a keyword phrase (default: 3) */
|
|
49
|
+
maxWordsPerPhrase?: number;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Keyword extraction configuration
|
|
53
|
+
*
|
|
54
|
+
* Controls how keywords are extracted from text, including algorithm selection,
|
|
55
|
+
* scoring thresholds, n-gram ranges, and language-specific settings.
|
|
56
|
+
*/
|
|
57
|
+
interface KeywordConfig {
|
|
58
|
+
/** Algorithm to use for extraction (default: "yake") */
|
|
59
|
+
algorithm?: KeywordAlgorithm;
|
|
60
|
+
/** Maximum number of keywords to extract (default: 10) */
|
|
61
|
+
maxKeywords?: number;
|
|
62
|
+
/** Minimum score threshold 0.0-1.0 (default: 0.0) */
|
|
63
|
+
minScore?: number;
|
|
64
|
+
/** N-gram range [min, max] for keyword extraction (default: [1, 3]) */
|
|
65
|
+
ngramRange?: [number, number];
|
|
66
|
+
/** Language code for stopword filtering (e.g., "en", "de", "fr") */
|
|
67
|
+
language?: string;
|
|
68
|
+
/** YAKE-specific tuning parameters */
|
|
69
|
+
yakeParams?: YakeParams;
|
|
70
|
+
/** RAKE-specific tuning parameters */
|
|
71
|
+
rakeParams?: RakeParams;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Extracted keyword with relevance metadata
|
|
75
|
+
*
|
|
76
|
+
* Represents a single keyword extracted from text along with its relevance score,
|
|
77
|
+
* the algorithm that extracted it, and optional position information.
|
|
78
|
+
*/
|
|
79
|
+
interface ExtractedKeyword {
|
|
80
|
+
/** The keyword text */
|
|
81
|
+
text: string;
|
|
82
|
+
/** Relevance score (higher is better, algorithm-specific range) */
|
|
83
|
+
score: number;
|
|
84
|
+
/** Algorithm that extracted this keyword */
|
|
85
|
+
algorithm: KeywordAlgorithm;
|
|
86
|
+
/** Optional positions where keyword appears in text (character offsets) */
|
|
87
|
+
positions?: number[];
|
|
88
|
+
}
|
|
27
89
|
/**
|
|
28
90
|
* Configuration for document extraction
|
|
29
91
|
*/
|
|
@@ -44,6 +106,8 @@ interface ExtractionConfig {
|
|
|
44
106
|
tokenReduction?: TokenReductionConfig;
|
|
45
107
|
/** Post-processor configuration */
|
|
46
108
|
postprocessor?: PostProcessorConfig;
|
|
109
|
+
/** Keyword extraction configuration */
|
|
110
|
+
keywords?: KeywordConfig;
|
|
47
111
|
/** Whether to use caching */
|
|
48
112
|
useCache?: boolean;
|
|
49
113
|
/** Enable quality processing */
|
|
@@ -120,8 +184,12 @@ interface PdfConfig {
|
|
|
120
184
|
* Page extraction configuration
|
|
121
185
|
*/
|
|
122
186
|
interface PageExtractionConfig {
|
|
123
|
-
/**
|
|
124
|
-
|
|
187
|
+
/** Extract pages as separate array (ExtractionResult.pages) */
|
|
188
|
+
extractPages?: boolean;
|
|
189
|
+
/** Insert page markers in main content string */
|
|
190
|
+
insertPageMarkers?: boolean;
|
|
191
|
+
/** Page marker format (use {page_num} placeholder) */
|
|
192
|
+
markerFormat?: string;
|
|
125
193
|
}
|
|
126
194
|
/**
|
|
127
195
|
* Language detection configuration
|
|
@@ -150,6 +218,8 @@ interface ExtractionResult {
|
|
|
150
218
|
images?: ExtractedImage[] | null;
|
|
151
219
|
/** Per-page content */
|
|
152
220
|
pages?: PageContent[] | null;
|
|
221
|
+
/** Extracted keywords when keyword extraction is enabled */
|
|
222
|
+
keywords?: ExtractedKeyword[] | null;
|
|
153
223
|
}
|
|
154
224
|
/**
|
|
155
225
|
* Document metadata
|
|
@@ -291,4 +361,4 @@ interface OcrBackendProtocol {
|
|
|
291
361
|
} | string>;
|
|
292
362
|
}
|
|
293
363
|
|
|
294
|
-
export type { Chunk as C,
|
|
364
|
+
export type { Chunk as C, ExtractionResult as E, ImageExtractionConfig as I, KeywordAlgorithm as K, LanguageDetectionConfig as L, Metadata as M, OcrBackendProtocol as O, PageContent as P, RakeParams as R, Table as T, YakeParams as Y, ExtractionConfig as a, ChunkingConfig as b, ChunkMetadata as c, ExtractedImage as d, OcrConfig as e, PageExtractionConfig as f, PdfConfig as g, PostProcessorConfig as h, TesseractConfig as i, TokenReductionConfig as j, KeywordConfig as k, ExtractedKeyword as l };
|
package/package.json
CHANGED
|
@@ -1,164 +1,164 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
2
|
+
"name": "@kreuzberg/wasm",
|
|
3
|
+
"version": "4.0.0-rc.23",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"packageManager": "pnpm@10.17.0",
|
|
6
|
+
"description": "Kreuzberg document intelligence - WebAssembly bindings",
|
|
7
|
+
"author": {
|
|
8
|
+
"name": "Na'aman Hirschfeld",
|
|
9
|
+
"email": "nhirschfeld@gmail.com",
|
|
10
|
+
"url": "https://kreuzberg.dev"
|
|
11
|
+
},
|
|
12
|
+
"homepage": "https://kreuzberg.dev",
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/kreuzberg-dev/kreuzberg/issues"
|
|
15
|
+
},
|
|
16
|
+
"main": "dist/index.cjs",
|
|
17
|
+
"module": "dist/index.js",
|
|
18
|
+
"types": "dist/index.d.ts",
|
|
19
|
+
"exports": {
|
|
20
|
+
".": {
|
|
21
|
+
"browser": {
|
|
22
|
+
"import": {
|
|
23
|
+
"types": "./dist/index.d.ts",
|
|
24
|
+
"default": "./dist/index.js"
|
|
25
|
+
},
|
|
26
|
+
"require": {
|
|
27
|
+
"types": "./dist/index.d.cts",
|
|
28
|
+
"default": "./dist/index.cjs"
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"node": {
|
|
32
|
+
"import": {
|
|
33
|
+
"types": "./dist/index.d.ts",
|
|
34
|
+
"default": "./dist/index.js"
|
|
35
|
+
},
|
|
36
|
+
"require": {
|
|
37
|
+
"types": "./dist/index.d.cts",
|
|
38
|
+
"default": "./dist/index.cjs"
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
"import": {
|
|
42
|
+
"types": "./dist/index.d.ts",
|
|
43
|
+
"default": "./dist/index.js"
|
|
44
|
+
},
|
|
45
|
+
"require": {
|
|
46
|
+
"types": "./dist/index.d.cts",
|
|
47
|
+
"default": "./dist/index.cjs"
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
"./dist/pkg/*": "./dist/pkg/*",
|
|
51
|
+
"./dist/kreuzberg_wasm.js": "./dist/kreuzberg_wasm.js",
|
|
52
|
+
"./runtime": {
|
|
53
|
+
"import": {
|
|
54
|
+
"types": "./dist/runtime.d.ts",
|
|
55
|
+
"default": "./dist/runtime.js"
|
|
56
|
+
},
|
|
57
|
+
"require": {
|
|
58
|
+
"types": "./dist/runtime.d.cts",
|
|
59
|
+
"default": "./dist/runtime.cjs"
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
"./adapters/wasm-adapter": {
|
|
63
|
+
"import": {
|
|
64
|
+
"types": "./dist/adapters/wasm-adapter.d.ts",
|
|
65
|
+
"default": "./dist/adapters/wasm-adapter.js"
|
|
66
|
+
},
|
|
67
|
+
"require": {
|
|
68
|
+
"types": "./dist/adapters/wasm-adapter.d.cts",
|
|
69
|
+
"default": "./dist/adapters/wasm-adapter.cjs"
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
"./ocr/registry": {
|
|
73
|
+
"import": {
|
|
74
|
+
"types": "./dist/ocr/registry.d.ts",
|
|
75
|
+
"default": "./dist/ocr/registry.js"
|
|
76
|
+
},
|
|
77
|
+
"require": {
|
|
78
|
+
"types": "./dist/ocr/registry.d.cts",
|
|
79
|
+
"default": "./dist/ocr/registry.cjs"
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
"./ocr/tesseract-wasm-backend": {
|
|
83
|
+
"import": {
|
|
84
|
+
"types": "./dist/ocr/tesseract-wasm-backend.d.ts",
|
|
85
|
+
"default": "./dist/ocr/tesseract-wasm-backend.js"
|
|
86
|
+
},
|
|
87
|
+
"require": {
|
|
88
|
+
"types": "./dist/ocr/tesseract-wasm-backend.d.cts",
|
|
89
|
+
"default": "./dist/ocr/tesseract-wasm-backend.cjs"
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
"repository": {
|
|
94
|
+
"type": "git",
|
|
95
|
+
"url": "https://github.com/kreuzberg-dev/kreuzberg.git"
|
|
96
|
+
},
|
|
97
|
+
"license": "MIT",
|
|
98
|
+
"keywords": [
|
|
99
|
+
"wasm",
|
|
100
|
+
"webassembly",
|
|
101
|
+
"document-intelligence",
|
|
102
|
+
"document-extraction",
|
|
103
|
+
"text-extraction",
|
|
104
|
+
"pdf-extraction",
|
|
105
|
+
"ocr",
|
|
106
|
+
"pdf",
|
|
107
|
+
"docx",
|
|
108
|
+
"xlsx",
|
|
109
|
+
"pptx",
|
|
110
|
+
"office-documents",
|
|
111
|
+
"table-extraction",
|
|
112
|
+
"metadata-extraction",
|
|
113
|
+
"rust",
|
|
114
|
+
"binding",
|
|
115
|
+
"typescript"
|
|
116
|
+
],
|
|
117
|
+
"files": [
|
|
118
|
+
"dist",
|
|
119
|
+
"pkg",
|
|
120
|
+
"*.wasm",
|
|
121
|
+
"*.d.ts",
|
|
122
|
+
"pdfium.js",
|
|
123
|
+
"README.md"
|
|
124
|
+
],
|
|
125
|
+
"engines": {
|
|
126
|
+
"node": ">= 16"
|
|
127
|
+
},
|
|
128
|
+
"publishConfig": {
|
|
129
|
+
"registry": "https://registry.npmjs.org/",
|
|
130
|
+
"access": "public"
|
|
131
|
+
},
|
|
132
|
+
"scripts": {
|
|
133
|
+
"build:wasm:web": "wasm-pack build --target web --out-dir pkg --release",
|
|
134
|
+
"build:wasm:bundler": "wasm-pack build --target bundler --out-dir pkg --release",
|
|
135
|
+
"build:wasm:nodejs": "wasm-pack build --target nodejs --out-dir pkg --release",
|
|
136
|
+
"build:wasm:deno": "wasm-pack build --target deno --out-dir pkg --release",
|
|
137
|
+
"build:ts": "tsup && node scripts/fix-type-exports.js",
|
|
138
|
+
"copy:pkg": "node scripts/copy-pkg.js",
|
|
139
|
+
"build": "npm run build:wasm:nodejs && npm run build:ts && npm run copy:pkg",
|
|
140
|
+
"build:all": "npm run build:wasm:web && npm run build:wasm:bundler && npm run build:wasm:nodejs && npm run build:wasm:deno && npm run build:ts && npm run copy:pkg",
|
|
141
|
+
"typecheck": "tsc --noEmit",
|
|
142
|
+
"lint": "biome check typescript && oxlint typescript",
|
|
143
|
+
"lint:fix": "biome check --write typescript",
|
|
144
|
+
"format": "biome format --write typescript",
|
|
145
|
+
"test": "vitest run",
|
|
146
|
+
"test:watch": "vitest",
|
|
147
|
+
"test:coverage": "vitest run --coverage",
|
|
148
|
+
"test:ui": "vitest --ui",
|
|
149
|
+
"prepublishOnly": "npm run build"
|
|
150
|
+
},
|
|
151
|
+
"devDependencies": {
|
|
152
|
+
"@types/node": "^25.0.3",
|
|
153
|
+
"@vitest/coverage-v8": "^4.0.16",
|
|
154
|
+
"@vitest/ui": "^4.0.16",
|
|
155
|
+
"jsdom": "^27.4.0",
|
|
156
|
+
"oxlint": "^1.36.0",
|
|
157
|
+
"tsup": "^8.5.1",
|
|
158
|
+
"typescript": "^5.9.3",
|
|
159
|
+
"vitest": "^4.0.16"
|
|
160
|
+
},
|
|
161
|
+
"optionalDependencies": {
|
|
162
|
+
"tesseract-wasm": "^0.11.0"
|
|
163
|
+
}
|
|
164
164
|
}
|