@kreuzberg/wasm 4.0.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +982 -0
  2. package/dist/adapters/wasm-adapter.d.mts +121 -0
  3. package/dist/adapters/wasm-adapter.d.ts +121 -0
  4. package/dist/adapters/wasm-adapter.js +241 -0
  5. package/dist/adapters/wasm-adapter.js.map +1 -0
  6. package/dist/adapters/wasm-adapter.mjs +221 -0
  7. package/dist/adapters/wasm-adapter.mjs.map +1 -0
  8. package/dist/index.d.mts +466 -0
  9. package/dist/index.d.ts +466 -0
  10. package/dist/index.js +383 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/index.mjs +384 -0
  13. package/dist/index.mjs.map +1 -0
  14. package/dist/kreuzberg_wasm.d.mts +758 -0
  15. package/dist/kreuzberg_wasm.d.ts +758 -0
  16. package/dist/kreuzberg_wasm.js +1913 -0
  17. package/dist/kreuzberg_wasm.mjs +48 -0
  18. package/dist/kreuzberg_wasm_bg.wasm +0 -0
  19. package/dist/kreuzberg_wasm_bg.wasm.d.ts +54 -0
  20. package/dist/ocr/registry.d.mts +102 -0
  21. package/dist/ocr/registry.d.ts +102 -0
  22. package/dist/ocr/registry.js +90 -0
  23. package/dist/ocr/registry.js.map +1 -0
  24. package/dist/ocr/registry.mjs +70 -0
  25. package/dist/ocr/registry.mjs.map +1 -0
  26. package/dist/ocr/tesseract-wasm-backend.d.mts +257 -0
  27. package/dist/ocr/tesseract-wasm-backend.d.ts +257 -0
  28. package/dist/ocr/tesseract-wasm-backend.js +454 -0
  29. package/dist/ocr/tesseract-wasm-backend.js.map +1 -0
  30. package/dist/ocr/tesseract-wasm-backend.mjs +424 -0
  31. package/dist/ocr/tesseract-wasm-backend.mjs.map +1 -0
  32. package/dist/runtime.d.mts +256 -0
  33. package/dist/runtime.d.ts +256 -0
  34. package/dist/runtime.js +172 -0
  35. package/dist/runtime.js.map +1 -0
  36. package/dist/runtime.mjs +152 -0
  37. package/dist/runtime.mjs.map +1 -0
  38. package/dist/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +107 -0
  39. package/dist/types-GJVIvbPy.d.mts +221 -0
  40. package/dist/types-GJVIvbPy.d.ts +221 -0
  41. package/package.json +138 -0
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../typescript/runtime.ts"],"sourcesContent":["/**\n * Runtime detection and environment-specific utilities\n *\n * This module provides utilities for detecting the JavaScript runtime environment,\n * checking for feature availability, and enabling environment-specific WASM loading strategies.\n *\n * @example Basic Runtime Detection\n * ```typescript\n * import { detectRuntime, isBrowser, isNode } from '@kreuzberg/wasm/runtime';\n *\n * if (isBrowser()) {\n * console.log('Running in browser');\n * } else if (isNode()) {\n * console.log('Running in Node.js');\n * }\n * ```\n *\n * @example Feature Detection\n * ```typescript\n * import { hasFileApi, hasWorkers } from '@kreuzberg/wasm/runtime';\n *\n * if (hasFileApi()) {\n * // Can use File API for browser file uploads\n * }\n *\n * if (hasWorkers()) {\n * // Can use Web Workers for parallel processing\n * }\n * ```\n */\n\nexport type RuntimeType = \"browser\" | \"node\" | \"deno\" | \"bun\" | \"unknown\";\n\n/**\n * WebAssembly capabilities available in the runtime\n */\nexport interface WasmCapabilities {\n\t/** Runtime environment type */\n\truntime: RuntimeType;\n\t/** WebAssembly support available */\n\thasWasm: boolean;\n\t/** Streaming WebAssembly instantiation available */\n\thasWasmStreaming: boolean;\n\t/** File API available (browser) */\n\thasFileApi: boolean;\n\t/** Blob API available */\n\thasBlob: boolean;\n\t/** Worker support available */\n\thasWorkers: boolean;\n\t/** SharedArrayBuffer available (may be restricted) */\n\thasSharedArrayBuffer: boolean;\n\t/** Module Workers available */\n\thasModuleWorkers: boolean;\n\t/** BigInt support */\n\thasBigInt: boolean;\n\t/** Specific runtime version if available */\n\truntimeVersion?: string;\n}\n\n/**\n * Detect the current JavaScript runtime\n *\n * Checks for various global objects and properties to determine\n * which JavaScript runtime environment is currently executing.\n *\n * @returns The detected runtime type\n *\n * @example\n * ```typescript\n * import { detectRuntime } from '@kreuzberg/wasm/runtime';\n *\n * const runtime = detectRuntime();\n * switch (runtime) {\n * case 'browser':\n * console.log('Running in browser');\n * break;\n * case 'node':\n * console.log('Running in Node.js');\n * break;\n * case 'deno':\n * console.log('Running in Deno');\n * break;\n * case 'bun':\n * console.log('Running in Bun');\n * break;\n * }\n * ```\n */\nexport function detectRuntime(): RuntimeType {\n\t// Check for Deno\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Deno !== \"undefined\") {\n\t\treturn \"deno\";\n\t}\n\n\t// Check for Bun\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Bun !== \"undefined\") {\n\t\treturn \"bun\";\n\t}\n\n\t// Check for Node.js\n\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\treturn \"node\";\n\t}\n\n\t// Check for browser\n\tif (typeof window !== \"undefined\" && typeof document !== \"undefined\") {\n\t\treturn \"browser\";\n\t}\n\n\treturn \"unknown\";\n}\n\n/**\n * Check if running in a browser environment\n *\n * @returns True if running in a browser, false otherwise\n */\nexport function isBrowser(): boolean {\n\treturn detectRuntime() === \"browser\";\n}\n\n/**\n * Check if running in Node.js\n *\n * @returns True if running in Node.js, false otherwise\n */\nexport function isNode(): boolean {\n\treturn detectRuntime() === \"node\";\n}\n\n/**\n * Check if running in Deno\n *\n * @returns True if running in Deno, false otherwise\n */\nexport function isDeno(): boolean {\n\treturn detectRuntime() === \"deno\";\n}\n\n/**\n * Check if running in Bun\n *\n * @returns True if running in Bun, false otherwise\n */\nexport function isBun(): boolean {\n\treturn detectRuntime() === \"bun\";\n}\n\n/**\n * Check if running in a web environment (browser or similar)\n *\n * @returns True if running in a web browser, false otherwise\n */\nexport function isWebEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"browser\";\n}\n\n/**\n * Check if running in a server-like environment (Node.js, Deno, Bun)\n *\n * @returns True if running on a server runtime, false otherwise\n */\nexport function isServerEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"node\" || runtime === \"deno\" || runtime === \"bun\";\n}\n\n/**\n * Check if File API is available\n *\n * The File API is required for handling browser file uploads.\n *\n * @returns True if File API is available, false otherwise\n *\n * @example\n * ```typescript\n * if (hasFileApi()) {\n * const fileInput = document.getElementById('file');\n * fileInput.addEventListener('change', (e) => {\n * const file = e.target.files?.[0];\n * // Handle file\n * });\n * }\n * ```\n */\nexport function hasFileApi(): boolean {\n\treturn typeof window !== \"undefined\" && typeof File !== \"undefined\" && typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Blob API is available\n *\n * @returns True if Blob API is available, false otherwise\n */\nexport function hasBlob(): boolean {\n\treturn typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Web Workers are available\n *\n * @returns True if Web Workers can be created, false otherwise\n */\nexport function hasWorkers(): boolean {\n\treturn typeof Worker !== \"undefined\";\n}\n\n/**\n * Check if SharedArrayBuffer is available\n *\n * Note: SharedArrayBuffer is restricted in some browser contexts\n * due to security considerations (Spectre/Meltdown mitigations).\n *\n * @returns True if SharedArrayBuffer is available, false otherwise\n */\nexport function hasSharedArrayBuffer(): boolean {\n\treturn typeof SharedArrayBuffer !== \"undefined\";\n}\n\n/**\n * Check if module workers are available\n *\n * Module workers allow importing ES modules in worker threads.\n *\n * @returns True if module workers are supported, false otherwise\n */\nexport function hasModuleWorkers(): boolean {\n\tif (!hasWorkers()) {\n\t\treturn false;\n\t}\n\n\ttry {\n\t\t// Try to detect module worker support\n\t\tconst blob = new Blob(['console.log(\"test\")'], {\n\t\t\ttype: \"application/javascript\",\n\t\t});\n\t\tconst workerUrl = URL.createObjectURL(blob);\n\t\ttry {\n\t\t\t// Module workers require type: 'module' option\n\t\t\t// We can't actually instantiate without issues, so we check the API exists\n\t\t\treturn true;\n\t\t} finally {\n\t\t\tURL.revokeObjectURL(workerUrl);\n\t\t}\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Check if WebAssembly is available\n *\n * @returns True if WebAssembly is supported, false otherwise\n */\nexport function hasWasm(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiate !== undefined;\n}\n\n/**\n * Check if WebAssembly.instantiateStreaming is available\n *\n * Streaming instantiation is more efficient than buffering the entire WASM module.\n *\n * @returns True if streaming WebAssembly is supported, false otherwise\n */\nexport function hasWasmStreaming(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiateStreaming !== undefined;\n}\n\n/**\n * Check if BigInt is available\n *\n * @returns True if BigInt type is supported, false otherwise\n */\nexport function hasBigInt(): boolean {\n\ttry {\n\t\tconst test = BigInt(\"1\");\n\t\treturn typeof test === \"bigint\";\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Get runtime version information\n *\n * @returns Version string if available, undefined otherwise\n *\n * @example\n * ```typescript\n * const version = getRuntimeVersion();\n * console.log(`Running on Node ${version}`); // \"Running on Node 18.12.0\"\n * ```\n */\nexport function getRuntimeVersion(): string | undefined {\n\tconst runtime = detectRuntime();\n\n\tswitch (runtime) {\n\t\tcase \"node\":\n\t\t\treturn process.version?.substring(1); // Remove 'v' prefix\n\t\tcase \"deno\": {\n\t\t\tconst deno = (globalThis as unknown as Record<string, unknown>).Deno as Record<string, unknown> | undefined;\n\t\t\tconst version = deno?.version as Record<string, unknown> | undefined;\n\t\t\treturn version?.deno as string | undefined;\n\t\t}\n\t\tcase \"bun\": {\n\t\t\tconst bun = (globalThis as unknown as Record<string, unknown>).Bun as Record<string, unknown> | undefined;\n\t\t\treturn bun?.version as string | undefined;\n\t\t}\n\t\tdefault:\n\t\t\treturn undefined;\n\t}\n}\n\n/**\n * Get comprehensive WebAssembly capabilities for current runtime\n *\n * Returns detailed information about WASM and related APIs available\n * in the current runtime environment.\n *\n * @returns Object describing available WASM capabilities\n *\n * @example\n * ```typescript\n * import { getWasmCapabilities } from '@kreuzberg/wasm/runtime';\n *\n * const caps = getWasmCapabilities();\n * console.log(`WASM available: ${caps.hasWasm}`);\n * console.log(`Streaming WASM: ${caps.hasWasmStreaming}`);\n * console.log(`Workers available: ${caps.hasWorkers}`);\n *\n * if (caps.hasWasm && caps.hasWorkers) {\n * // Can offload WASM processing to workers\n * }\n * ```\n */\nexport function getWasmCapabilities(): WasmCapabilities {\n\tconst runtime = detectRuntime();\n\tconst version = getRuntimeVersion();\n\tconst capabilities: WasmCapabilities = {\n\t\truntime,\n\t\thasWasm: hasWasm(),\n\t\thasWasmStreaming: hasWasmStreaming(),\n\t\thasFileApi: hasFileApi(),\n\t\thasBlob: hasBlob(),\n\t\thasWorkers: hasWorkers(),\n\t\thasSharedArrayBuffer: hasSharedArrayBuffer(),\n\t\thasModuleWorkers: hasModuleWorkers(),\n\t\thasBigInt: hasBigInt(),\n\t\t...(version !== undefined ? { runtimeVersion: version } : {}),\n\t};\n\treturn capabilities;\n}\n\n/**\n * Get comprehensive runtime information\n *\n * Returns detailed information about the current runtime environment,\n * capabilities, and identifying information.\n *\n * @returns Object with runtime details and capabilities\n *\n * @example\n * ```typescript\n * const info = getRuntimeInfo();\n * console.log(info.runtime); // 'browser' | 'node' | 'deno' | 'bun'\n * console.log(info.isBrowser); // true/false\n * console.log(info.userAgent); // Browser user agent string\n * console.log(info.capabilities); // Detailed capability information\n * ```\n */\nexport function getRuntimeInfo() {\n\tconst runtime = detectRuntime();\n\tconst capabilities = getWasmCapabilities();\n\n\treturn {\n\t\truntime,\n\t\tisBrowser: isBrowser(),\n\t\tisNode: isNode(),\n\t\tisDeno: isDeno(),\n\t\tisBun: isBun(),\n\t\tisWeb: isWebEnvironment(),\n\t\tisServer: isServerEnvironment(),\n\t\truntimeVersion: getRuntimeVersion(),\n\t\tuserAgent: typeof navigator !== \"undefined\" ? navigator.userAgent : \"N/A\",\n\t\tcapabilities,\n\t};\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAwFO,SAAS,gBAA6B;AAE5C,MAAI,OAAQ,WAAkD,SAAS,aAAa;AACnF,WAAO;AAAA,EACR;AAGA,MAAI,OAAQ,WAAkD,QAAQ,aAAa;AAClF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,WAAW,eAAe,OAAO,aAAa,aAAa;AACrE,WAAO;AAAA,EACR;AAEA,SAAO;AACR;AAOO,SAAS,YAAqB;AACpC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,QAAiB;AAChC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,mBAA4B;AAC3C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY;AACpB;AAOO,SAAS,sBAA+B;AAC9C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY,UAAU,YAAY,UAAU,YAAY;AAChE;AAoBO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW,eAAe,OAAO,SAAS,eAAe,OAAO,SAAS;AACxF;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,SAAS;AACxB;AAOO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW;AAC1B;AAUO,SAAS,uBAAgC;AAC/C,SAAO,OAAO,sBAAsB;AACrC;AASO,SAAS,mBAA4B;AAC3C,MAAI,CAAC,WAAW,GAAG;AAClB,WAAO;AAAA,EACR;AAEA,MAAI;AAEH,UAAM,OAAO,IAAI,KAAK,CAAC,qBAAqB,GAAG;AAAA,MAC9C,MAAM;AAAA,IACP,CAAC;AACD,UAAM,YAAY,IAAI,gBAAgB,IAAI;AAC1C,QAAI;AAGH,aAAO;AAAA,IACR,UAAE;AACD,UAAI,gBAAgB,SAAS;AAAA,IAC9B;AAAA,EACD,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,gBAAgB,eAAe,YAAY,gBAAgB;AAC1E;AASO,SAAS,mBAA4B;AAC3C,SAAO,OAAO,gBAAgB,eAAe,YAAY,yBAAyB;AACnF;AAOO,SAAS,YAAqB;AACpC,MAAI;AACH,UAAM,OAAO,OAAO,GAAG;AACvB,WAAO,OAAO,SAAS;AAAA,EACxB,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAaO,SAAS,oBAAwC;AACvD,QAAM,UAAU,cAAc;AAE9B,UAAQ,SAAS;AAAA,IAChB,KAAK;AACJ,aAAO,QAAQ,SAAS,UAAU,CAAC;AAAA;AAAA,IACpC,KAAK,QAAQ;AACZ,YAAM,OAAQ,WAAkD;AAChE,YAAM,UAAU,MAAM;AACtB,aAAO,SAAS;AAAA,IACjB;AAAA,IACA,KAAK,OAAO;AACX,YAAM,MAAO,WAAkD;AAC/D,aAAO,KAAK;AAAA,IACb;AAAA,IACA;AACC,aAAO;AAAA,EACT;AACD;AAwBO,SAAS,sBAAwC;AACvD,QAAM,UAAU,cAAc;AAC9B,QAAM,UAAU,kBAAkB;AAClC,QAAM,eAAiC;AAAA,IACtC;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,kBAAkB,iBAAiB;AAAA,IACnC,YAAY,WAAW;AAAA,IACvB,SAAS,QAAQ;AAAA,IACjB,YAAY,WAAW;AAAA,IACvB,sBAAsB,qBAAqB;AAAA,IAC3C,kBAAkB,iBAAiB;AAAA,IACnC,WAAW,UAAU;AAAA,IACrB,GAAI,YAAY,SAAY,EAAE,gBAAgB,QAAQ,IAAI,CAAC;AAAA,EAC5D;AACA,SAAO;AACR;AAmBO,SAAS,iBAAiB;AAChC,QAAM,UAAU,cAAc;AAC9B,QAAM,eAAe,oBAAoB;AAEzC,SAAO;AAAA,IACN;AAAA,IACA,WAAW,UAAU;AAAA,IACrB,QAAQ,OAAO;AAAA,IACf,QAAQ,OAAO;AAAA,IACf,OAAO,MAAM;AAAA,IACb,OAAO,iBAAiB;AAAA,IACxB,UAAU,oBAAoB;AAAA,IAC9B,gBAAgB,kBAAkB;AAAA,IAClC,WAAW,OAAO,cAAc,cAAc,UAAU,YAAY;AAAA,IACpE;AAAA,EACD;AACD;","names":[]}
@@ -0,0 +1,152 @@
1
+ function detectRuntime() {
2
+ if (typeof globalThis.Deno !== "undefined") {
3
+ return "deno";
4
+ }
5
+ if (typeof globalThis.Bun !== "undefined") {
6
+ return "bun";
7
+ }
8
+ if (typeof process !== "undefined" && process.versions && process.versions.node) {
9
+ return "node";
10
+ }
11
+ if (typeof window !== "undefined" && typeof document !== "undefined") {
12
+ return "browser";
13
+ }
14
+ return "unknown";
15
+ }
16
+ function isBrowser() {
17
+ return detectRuntime() === "browser";
18
+ }
19
+ function isNode() {
20
+ return detectRuntime() === "node";
21
+ }
22
+ function isDeno() {
23
+ return detectRuntime() === "deno";
24
+ }
25
+ function isBun() {
26
+ return detectRuntime() === "bun";
27
+ }
28
+ function isWebEnvironment() {
29
+ const runtime = detectRuntime();
30
+ return runtime === "browser";
31
+ }
32
+ function isServerEnvironment() {
33
+ const runtime = detectRuntime();
34
+ return runtime === "node" || runtime === "deno" || runtime === "bun";
35
+ }
36
+ function hasFileApi() {
37
+ return typeof window !== "undefined" && typeof File !== "undefined" && typeof Blob !== "undefined";
38
+ }
39
+ function hasBlob() {
40
+ return typeof Blob !== "undefined";
41
+ }
42
+ function hasWorkers() {
43
+ return typeof Worker !== "undefined";
44
+ }
45
+ function hasSharedArrayBuffer() {
46
+ return typeof SharedArrayBuffer !== "undefined";
47
+ }
48
+ function hasModuleWorkers() {
49
+ if (!hasWorkers()) {
50
+ return false;
51
+ }
52
+ try {
53
+ const blob = new Blob(['console.log("test")'], {
54
+ type: "application/javascript"
55
+ });
56
+ const workerUrl = URL.createObjectURL(blob);
57
+ try {
58
+ return true;
59
+ } finally {
60
+ URL.revokeObjectURL(workerUrl);
61
+ }
62
+ } catch {
63
+ return false;
64
+ }
65
+ }
66
+ function hasWasm() {
67
+ return typeof WebAssembly !== "undefined" && WebAssembly.instantiate !== void 0;
68
+ }
69
+ function hasWasmStreaming() {
70
+ return typeof WebAssembly !== "undefined" && WebAssembly.instantiateStreaming !== void 0;
71
+ }
72
+ function hasBigInt() {
73
+ try {
74
+ const test = BigInt("1");
75
+ return typeof test === "bigint";
76
+ } catch {
77
+ return false;
78
+ }
79
+ }
80
+ function getRuntimeVersion() {
81
+ const runtime = detectRuntime();
82
+ switch (runtime) {
83
+ case "node":
84
+ return process.version?.substring(1);
85
+ // Remove 'v' prefix
86
+ case "deno": {
87
+ const deno = globalThis.Deno;
88
+ const version = deno?.version;
89
+ return version?.deno;
90
+ }
91
+ case "bun": {
92
+ const bun = globalThis.Bun;
93
+ return bun?.version;
94
+ }
95
+ default:
96
+ return void 0;
97
+ }
98
+ }
99
+ function getWasmCapabilities() {
100
+ const runtime = detectRuntime();
101
+ const version = getRuntimeVersion();
102
+ const capabilities = {
103
+ runtime,
104
+ hasWasm: hasWasm(),
105
+ hasWasmStreaming: hasWasmStreaming(),
106
+ hasFileApi: hasFileApi(),
107
+ hasBlob: hasBlob(),
108
+ hasWorkers: hasWorkers(),
109
+ hasSharedArrayBuffer: hasSharedArrayBuffer(),
110
+ hasModuleWorkers: hasModuleWorkers(),
111
+ hasBigInt: hasBigInt(),
112
+ ...version !== void 0 ? { runtimeVersion: version } : {}
113
+ };
114
+ return capabilities;
115
+ }
116
+ function getRuntimeInfo() {
117
+ const runtime = detectRuntime();
118
+ const capabilities = getWasmCapabilities();
119
+ return {
120
+ runtime,
121
+ isBrowser: isBrowser(),
122
+ isNode: isNode(),
123
+ isDeno: isDeno(),
124
+ isBun: isBun(),
125
+ isWeb: isWebEnvironment(),
126
+ isServer: isServerEnvironment(),
127
+ runtimeVersion: getRuntimeVersion(),
128
+ userAgent: typeof navigator !== "undefined" ? navigator.userAgent : "N/A",
129
+ capabilities
130
+ };
131
+ }
132
+ export {
133
+ detectRuntime,
134
+ getRuntimeInfo,
135
+ getRuntimeVersion,
136
+ getWasmCapabilities,
137
+ hasBigInt,
138
+ hasBlob,
139
+ hasFileApi,
140
+ hasModuleWorkers,
141
+ hasSharedArrayBuffer,
142
+ hasWasm,
143
+ hasWasmStreaming,
144
+ hasWorkers,
145
+ isBrowser,
146
+ isBun,
147
+ isDeno,
148
+ isNode,
149
+ isServerEnvironment,
150
+ isWebEnvironment
151
+ };
152
+ //# sourceMappingURL=runtime.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../typescript/runtime.ts"],"sourcesContent":["/**\n * Runtime detection and environment-specific utilities\n *\n * This module provides utilities for detecting the JavaScript runtime environment,\n * checking for feature availability, and enabling environment-specific WASM loading strategies.\n *\n * @example Basic Runtime Detection\n * ```typescript\n * import { detectRuntime, isBrowser, isNode } from '@kreuzberg/wasm/runtime';\n *\n * if (isBrowser()) {\n * console.log('Running in browser');\n * } else if (isNode()) {\n * console.log('Running in Node.js');\n * }\n * ```\n *\n * @example Feature Detection\n * ```typescript\n * import { hasFileApi, hasWorkers } from '@kreuzberg/wasm/runtime';\n *\n * if (hasFileApi()) {\n * // Can use File API for browser file uploads\n * }\n *\n * if (hasWorkers()) {\n * // Can use Web Workers for parallel processing\n * }\n * ```\n */\n\nexport type RuntimeType = \"browser\" | \"node\" | \"deno\" | \"bun\" | \"unknown\";\n\n/**\n * WebAssembly capabilities available in the runtime\n */\nexport interface WasmCapabilities {\n\t/** Runtime environment type */\n\truntime: RuntimeType;\n\t/** WebAssembly support available */\n\thasWasm: boolean;\n\t/** Streaming WebAssembly instantiation available */\n\thasWasmStreaming: boolean;\n\t/** File API available (browser) */\n\thasFileApi: boolean;\n\t/** Blob API available */\n\thasBlob: boolean;\n\t/** Worker support available */\n\thasWorkers: boolean;\n\t/** SharedArrayBuffer available (may be restricted) */\n\thasSharedArrayBuffer: boolean;\n\t/** Module Workers available */\n\thasModuleWorkers: boolean;\n\t/** BigInt support */\n\thasBigInt: boolean;\n\t/** Specific runtime version if available */\n\truntimeVersion?: string;\n}\n\n/**\n * Detect the current JavaScript runtime\n *\n * Checks for various global objects and properties to determine\n * which JavaScript runtime environment is currently executing.\n *\n * @returns The detected runtime type\n *\n * @example\n * ```typescript\n * import { detectRuntime } from '@kreuzberg/wasm/runtime';\n *\n * const runtime = detectRuntime();\n * switch (runtime) {\n * case 'browser':\n * console.log('Running in browser');\n * break;\n * case 'node':\n * console.log('Running in Node.js');\n * break;\n * case 'deno':\n * console.log('Running in Deno');\n * break;\n * case 'bun':\n * console.log('Running in Bun');\n * break;\n * }\n * ```\n */\nexport function detectRuntime(): RuntimeType {\n\t// Check for Deno\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Deno !== \"undefined\") {\n\t\treturn \"deno\";\n\t}\n\n\t// Check for Bun\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Bun !== \"undefined\") {\n\t\treturn \"bun\";\n\t}\n\n\t// Check for Node.js\n\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\treturn \"node\";\n\t}\n\n\t// Check for browser\n\tif (typeof window !== \"undefined\" && typeof document !== \"undefined\") {\n\t\treturn \"browser\";\n\t}\n\n\treturn \"unknown\";\n}\n\n/**\n * Check if running in a browser environment\n *\n * @returns True if running in a browser, false otherwise\n */\nexport function isBrowser(): boolean {\n\treturn detectRuntime() === \"browser\";\n}\n\n/**\n * Check if running in Node.js\n *\n * @returns True if running in Node.js, false otherwise\n */\nexport function isNode(): boolean {\n\treturn detectRuntime() === \"node\";\n}\n\n/**\n * Check if running in Deno\n *\n * @returns True if running in Deno, false otherwise\n */\nexport function isDeno(): boolean {\n\treturn detectRuntime() === \"deno\";\n}\n\n/**\n * Check if running in Bun\n *\n * @returns True if running in Bun, false otherwise\n */\nexport function isBun(): boolean {\n\treturn detectRuntime() === \"bun\";\n}\n\n/**\n * Check if running in a web environment (browser or similar)\n *\n * @returns True if running in a web browser, false otherwise\n */\nexport function isWebEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"browser\";\n}\n\n/**\n * Check if running in a server-like environment (Node.js, Deno, Bun)\n *\n * @returns True if running on a server runtime, false otherwise\n */\nexport function isServerEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"node\" || runtime === \"deno\" || runtime === \"bun\";\n}\n\n/**\n * Check if File API is available\n *\n * The File API is required for handling browser file uploads.\n *\n * @returns True if File API is available, false otherwise\n *\n * @example\n * ```typescript\n * if (hasFileApi()) {\n * const fileInput = document.getElementById('file');\n * fileInput.addEventListener('change', (e) => {\n * const file = e.target.files?.[0];\n * // Handle file\n * });\n * }\n * ```\n */\nexport function hasFileApi(): boolean {\n\treturn typeof window !== \"undefined\" && typeof File !== \"undefined\" && typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Blob API is available\n *\n * @returns True if Blob API is available, false otherwise\n */\nexport function hasBlob(): boolean {\n\treturn typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Web Workers are available\n *\n * @returns True if Web Workers can be created, false otherwise\n */\nexport function hasWorkers(): boolean {\n\treturn typeof Worker !== \"undefined\";\n}\n\n/**\n * Check if SharedArrayBuffer is available\n *\n * Note: SharedArrayBuffer is restricted in some browser contexts\n * due to security considerations (Spectre/Meltdown mitigations).\n *\n * @returns True if SharedArrayBuffer is available, false otherwise\n */\nexport function hasSharedArrayBuffer(): boolean {\n\treturn typeof SharedArrayBuffer !== \"undefined\";\n}\n\n/**\n * Check if module workers are available\n *\n * Module workers allow importing ES modules in worker threads.\n *\n * @returns True if module workers are supported, false otherwise\n */\nexport function hasModuleWorkers(): boolean {\n\tif (!hasWorkers()) {\n\t\treturn false;\n\t}\n\n\ttry {\n\t\t// Try to detect module worker support\n\t\tconst blob = new Blob(['console.log(\"test\")'], {\n\t\t\ttype: \"application/javascript\",\n\t\t});\n\t\tconst workerUrl = URL.createObjectURL(blob);\n\t\ttry {\n\t\t\t// Module workers require type: 'module' option\n\t\t\t// We can't actually instantiate without issues, so we check the API exists\n\t\t\treturn true;\n\t\t} finally {\n\t\t\tURL.revokeObjectURL(workerUrl);\n\t\t}\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Check if WebAssembly is available\n *\n * @returns True if WebAssembly is supported, false otherwise\n */\nexport function hasWasm(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiate !== undefined;\n}\n\n/**\n * Check if WebAssembly.instantiateStreaming is available\n *\n * Streaming instantiation is more efficient than buffering the entire WASM module.\n *\n * @returns True if streaming WebAssembly is supported, false otherwise\n */\nexport function hasWasmStreaming(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiateStreaming !== undefined;\n}\n\n/**\n * Check if BigInt is available\n *\n * @returns True if BigInt type is supported, false otherwise\n */\nexport function hasBigInt(): boolean {\n\ttry {\n\t\tconst test = BigInt(\"1\");\n\t\treturn typeof test === \"bigint\";\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Get runtime version information\n *\n * @returns Version string if available, undefined otherwise\n *\n * @example\n * ```typescript\n * const version = getRuntimeVersion();\n * console.log(`Running on Node ${version}`); // \"Running on Node 18.12.0\"\n * ```\n */\nexport function getRuntimeVersion(): string | undefined {\n\tconst runtime = detectRuntime();\n\n\tswitch (runtime) {\n\t\tcase \"node\":\n\t\t\treturn process.version?.substring(1); // Remove 'v' prefix\n\t\tcase \"deno\": {\n\t\t\tconst deno = (globalThis as unknown as Record<string, unknown>).Deno as Record<string, unknown> | undefined;\n\t\t\tconst version = deno?.version as Record<string, unknown> | undefined;\n\t\t\treturn version?.deno as string | undefined;\n\t\t}\n\t\tcase \"bun\": {\n\t\t\tconst bun = (globalThis as unknown as Record<string, unknown>).Bun as Record<string, unknown> | undefined;\n\t\t\treturn bun?.version as string | undefined;\n\t\t}\n\t\tdefault:\n\t\t\treturn undefined;\n\t}\n}\n\n/**\n * Get comprehensive WebAssembly capabilities for current runtime\n *\n * Returns detailed information about WASM and related APIs available\n * in the current runtime environment.\n *\n * @returns Object describing available WASM capabilities\n *\n * @example\n * ```typescript\n * import { getWasmCapabilities } from '@kreuzberg/wasm/runtime';\n *\n * const caps = getWasmCapabilities();\n * console.log(`WASM available: ${caps.hasWasm}`);\n * console.log(`Streaming WASM: ${caps.hasWasmStreaming}`);\n * console.log(`Workers available: ${caps.hasWorkers}`);\n *\n * if (caps.hasWasm && caps.hasWorkers) {\n * // Can offload WASM processing to workers\n * }\n * ```\n */\nexport function getWasmCapabilities(): WasmCapabilities {\n\tconst runtime = detectRuntime();\n\tconst version = getRuntimeVersion();\n\tconst capabilities: WasmCapabilities = {\n\t\truntime,\n\t\thasWasm: hasWasm(),\n\t\thasWasmStreaming: hasWasmStreaming(),\n\t\thasFileApi: hasFileApi(),\n\t\thasBlob: hasBlob(),\n\t\thasWorkers: hasWorkers(),\n\t\thasSharedArrayBuffer: hasSharedArrayBuffer(),\n\t\thasModuleWorkers: hasModuleWorkers(),\n\t\thasBigInt: hasBigInt(),\n\t\t...(version !== undefined ? { runtimeVersion: version } : {}),\n\t};\n\treturn capabilities;\n}\n\n/**\n * Get comprehensive runtime information\n *\n * Returns detailed information about the current runtime environment,\n * capabilities, and identifying information.\n *\n * @returns Object with runtime details and capabilities\n *\n * @example\n * ```typescript\n * const info = getRuntimeInfo();\n * console.log(info.runtime); // 'browser' | 'node' | 'deno' | 'bun'\n * console.log(info.isBrowser); // true/false\n * console.log(info.userAgent); // Browser user agent string\n * console.log(info.capabilities); // Detailed capability information\n * ```\n */\nexport function getRuntimeInfo() {\n\tconst runtime = detectRuntime();\n\tconst capabilities = getWasmCapabilities();\n\n\treturn {\n\t\truntime,\n\t\tisBrowser: isBrowser(),\n\t\tisNode: isNode(),\n\t\tisDeno: isDeno(),\n\t\tisBun: isBun(),\n\t\tisWeb: isWebEnvironment(),\n\t\tisServer: isServerEnvironment(),\n\t\truntimeVersion: getRuntimeVersion(),\n\t\tuserAgent: typeof navigator !== \"undefined\" ? navigator.userAgent : \"N/A\",\n\t\tcapabilities,\n\t};\n}\n"],"mappings":"AAwFO,SAAS,gBAA6B;AAE5C,MAAI,OAAQ,WAAkD,SAAS,aAAa;AACnF,WAAO;AAAA,EACR;AAGA,MAAI,OAAQ,WAAkD,QAAQ,aAAa;AAClF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,WAAW,eAAe,OAAO,aAAa,aAAa;AACrE,WAAO;AAAA,EACR;AAEA,SAAO;AACR;AAOO,SAAS,YAAqB;AACpC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,QAAiB;AAChC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,mBAA4B;AAC3C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY;AACpB;AAOO,SAAS,sBAA+B;AAC9C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY,UAAU,YAAY,UAAU,YAAY;AAChE;AAoBO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW,eAAe,OAAO,SAAS,eAAe,OAAO,SAAS;AACxF;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,SAAS;AACxB;AAOO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW;AAC1B;AAUO,SAAS,uBAAgC;AAC/C,SAAO,OAAO,sBAAsB;AACrC;AASO,SAAS,mBAA4B;AAC3C,MAAI,CAAC,WAAW,GAAG;AAClB,WAAO;AAAA,EACR;AAEA,MAAI;AAEH,UAAM,OAAO,IAAI,KAAK,CAAC,qBAAqB,GAAG;AAAA,MAC9C,MAAM;AAAA,IACP,CAAC;AACD,UAAM,YAAY,IAAI,gBAAgB,IAAI;AAC1C,QAAI;AAGH,aAAO;AAAA,IACR,UAAE;AACD,UAAI,gBAAgB,SAAS;AAAA,IAC9B;AAAA,EACD,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,gBAAgB,eAAe,YAAY,gBAAgB;AAC1E;AASO,SAAS,mBAA4B;AAC3C,SAAO,OAAO,gBAAgB,eAAe,YAAY,yBAAyB;AACnF;AAOO,SAAS,YAAqB;AACpC,MAAI;AACH,UAAM,OAAO,OAAO,GAAG;AACvB,WAAO,OAAO,SAAS;AAAA,EACxB,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAaO,SAAS,oBAAwC;AACvD,QAAM,UAAU,cAAc;AAE9B,UAAQ,SAAS;AAAA,IAChB,KAAK;AACJ,aAAO,QAAQ,SAAS,UAAU,CAAC;AAAA;AAAA,IACpC,KAAK,QAAQ;AACZ,YAAM,OAAQ,WAAkD;AAChE,YAAM,UAAU,MAAM;AACtB,aAAO,SAAS;AAAA,IACjB;AAAA,IACA,KAAK,OAAO;AACX,YAAM,MAAO,WAAkD;AAC/D,aAAO,KAAK;AAAA,IACb;AAAA,IACA;AACC,aAAO;AAAA,EACT;AACD;AAwBO,SAAS,sBAAwC;AACvD,QAAM,UAAU,cAAc;AAC9B,QAAM,UAAU,kBAAkB;AAClC,QAAM,eAAiC;AAAA,IACtC;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,kBAAkB,iBAAiB;AAAA,IACnC,YAAY,WAAW;AAAA,IACvB,SAAS,QAAQ;AAAA,IACjB,YAAY,WAAW;AAAA,IACvB,sBAAsB,qBAAqB;AAAA,IAC3C,kBAAkB,iBAAiB;AAAA,IACnC,WAAW,UAAU;AAAA,IACrB,GAAI,YAAY,SAAY,EAAE,gBAAgB,QAAQ,IAAI,CAAC;AAAA,EAC5D;AACA,SAAO;AACR;AAmBO,SAAS,iBAAiB;AAChC,QAAM,UAAU,cAAc;AAC9B,QAAM,eAAe,oBAAoB;AAEzC,SAAO;AAAA,IACN;AAAA,IACA,WAAW,UAAU;AAAA,IACrB,QAAQ,OAAO;AAAA,IACf,QAAQ,OAAO;AAAA,IACf,OAAO,MAAM;AAAA,IACb,OAAO,iBAAiB;AAAA,IACxB,UAAU,oBAAoB;AAAA,IAC9B,gBAAgB,kBAAkB;AAAA,IAClC,WAAW,OAAO,cAAc,cAAc,UAAU,YAAY;AAAA,IACpE;AAAA,EACD;AACD;","names":[]}
@@ -0,0 +1,107 @@
1
+ /*
2
+ * Copyright 2022 Google Inc. All Rights Reserved.
3
+ * Licensed under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License.
5
+ * You may obtain a copy of the License at
6
+ * http://www.apache.org/licenses/LICENSE-2.0
7
+ * Unless required by applicable law or agreed to in writing, software
8
+ * distributed under the License is distributed on an "AS IS" BASIS,
9
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ * See the License for the specific language governing permissions and
11
+ * limitations under the License.
12
+ */
13
+
14
+ // Note: we use `wasm_bindgen_worker_`-prefixed message types to make sure
15
+ // we can handle bundling into other files, which might happen to have their
16
+ // own `postMessage`/`onmessage` communication channels.
17
+ //
18
+ // If we didn't take that into the account, we could send much simpler signals
19
+ // like just `0` or whatever, but the code would be less resilient.
20
+
21
+ function waitForMsgType(target, type) {
22
+ return new Promise(resolve => {
23
+ target.addEventListener('message', function onMsg({ data }) {
24
+ if (data?.type !== type) return;
25
+ target.removeEventListener('message', onMsg);
26
+ resolve(data);
27
+ });
28
+ });
29
+ }
30
+
31
+ waitForMsgType(self, 'wasm_bindgen_worker_init').then(async ({ init, receiver }) => {
32
+ // # Note 1
33
+ // Our JS should have been generated in
34
+ // `[out-dir]/snippets/wasm-bindgen-rayon-[hash]/workerHelpers.js`,
35
+ // resolve the main module via `../../..`.
36
+ //
37
+ // This might need updating if the generated structure changes on wasm-bindgen
38
+ // side ever in the future, but works well with bundlers today. The whole
39
+ // point of this crate, after all, is to abstract away unstable features
40
+ // and temporary bugs so that you don't need to deal with them in your code.
41
+ //
42
+ // # Note 2
43
+ // This could be a regular import, but then some bundlers complain about
44
+ // circular deps.
45
+ //
46
+ // Dynamic import could be cheap if this file was inlined into the parent,
47
+ // which would require us just using `../../..` in `new Worker` below,
48
+ // but that doesn't work because wasm-pack unconditionally adds
49
+ // "sideEffects":false (see below).
50
+ //
51
+ // OTOH, even though it can't be inlined, it should be still reasonably
52
+ // cheap since the requested file is already in cache (it was loaded by
53
+ // the main thread).
54
+ const pkg = await import('../../..');
55
+ await pkg.default(init);
56
+ postMessage({ type: 'wasm_bindgen_worker_ready' });
57
+ pkg.wbg_rayon_start_worker(receiver);
58
+ });
59
+
60
+ // Note: this is never used, but necessary to prevent a bug in Firefox
61
+ // (https://bugzilla.mozilla.org/show_bug.cgi?id=1702191) where it collects
62
+ // Web Workers that have a shared WebAssembly memory with the main thread,
63
+ // but are not explicitly rooted via a `Worker` instance.
64
+ //
65
+ // By storing them in a variable, we can keep `Worker` objects around and
66
+ // prevent them from getting GC-d.
67
+ let _workers;
68
+
69
+ export async function startWorkers(module, memory, builder) {
70
+ if (builder.numThreads() === 0) {
71
+ throw new Error(`num_threads must be > 0.`);
72
+ }
73
+
74
+ const workerInit = {
75
+ type: 'wasm_bindgen_worker_init',
76
+ init: { module_or_path: module, memory },
77
+ receiver: builder.receiver()
78
+ };
79
+
80
+ _workers = await Promise.all(
81
+ Array.from({ length: builder.numThreads() }, async () => {
82
+ // Self-spawn into a new Worker.
83
+ //
84
+ // TODO: while `new URL('...', import.meta.url) becomes a semi-standard
85
+ // way to get asset URLs relative to the module across various bundlers
86
+ // and browser, ideally we should switch to `import.meta.resolve`
87
+ // once it becomes a standard.
88
+ //
89
+ // Note: we could use `../../..` as the URL here to inline workerHelpers.js
90
+ // into the parent entry instead of creating another split point -
91
+ // this would be preferable from optimization perspective -
92
+ // however, Webpack then eliminates all message handler code
93
+ // because wasm-pack produces "sideEffects":false in package.json
94
+ // unconditionally.
95
+ //
96
+ // The only way to work around that is to have side effect code
97
+ // in an entry point such as Worker file itself.
98
+ const worker = new Worker(new URL('./workerHelpers.js', import.meta.url), {
99
+ type: 'module'
100
+ });
101
+ worker.postMessage(workerInit);
102
+ await waitForMsgType(worker, 'wasm_bindgen_worker_ready');
103
+ return worker;
104
+ })
105
+ );
106
+ builder.build();
107
+ }
@@ -0,0 +1,221 @@
1
+ /**
2
+ * Type definitions for Kreuzberg WASM bindings
3
+ *
4
+ * These types are generated from the Rust core library and define
5
+ * the interface for extraction, configuration, and results.
6
+ */
7
+ /**
8
+ * Configuration for document extraction
9
+ */
10
+ interface ExtractionConfig {
11
+ /** OCR configuration */
12
+ ocr?: OcrConfig;
13
+ /** Chunking configuration */
14
+ chunking?: ChunkingConfig;
15
+ /** Image extraction configuration */
16
+ images?: ImageExtractionConfig;
17
+ /** Page extraction configuration */
18
+ pages?: PageExtractionConfig;
19
+ /** Language detection configuration */
20
+ languageDetection?: LanguageDetectionConfig;
21
+ }
22
+ /**
23
+ * OCR configuration
24
+ */
25
+ interface OcrConfig {
26
+ /** OCR backend to use */
27
+ backend?: string;
28
+ /** Language codes (ISO 639) */
29
+ languages?: string[];
30
+ /** Whether to perform OCR */
31
+ enabled?: boolean;
32
+ }
33
+ /**
34
+ * Chunking configuration
35
+ */
36
+ interface ChunkingConfig {
37
+ /** Maximum characters per chunk */
38
+ maxChars?: number;
39
+ /** Overlap between chunks */
40
+ maxOverlap?: number;
41
+ }
42
+ /**
43
+ * Image extraction configuration
44
+ */
45
+ interface ImageExtractionConfig {
46
+ /** Whether to extract images */
47
+ enabled?: boolean;
48
+ }
49
+ /**
50
+ * Page extraction configuration
51
+ */
52
+ interface PageExtractionConfig {
53
+ /** Whether to extract per-page content */
54
+ enabled?: boolean;
55
+ }
56
+ /**
57
+ * Language detection configuration
58
+ */
59
+ interface LanguageDetectionConfig {
60
+ /** Whether to detect languages */
61
+ enabled?: boolean;
62
+ }
63
+ /**
64
+ * Result of document extraction
65
+ */
66
+ interface ExtractionResult {
67
+ /** Extracted text content */
68
+ content: string;
69
+ /** MIME type of the document */
70
+ mimeType: string;
71
+ /** Document metadata */
72
+ metadata: Metadata;
73
+ /** Extracted tables */
74
+ tables: Table[];
75
+ /** Detected languages (ISO 639 codes) */
76
+ detectedLanguages?: string[] | null;
77
+ /** Text chunks when chunking is enabled */
78
+ chunks?: Chunk[] | null;
79
+ /** Extracted images */
80
+ images?: ExtractedImage[] | null;
81
+ /** Per-page content */
82
+ pages?: PageContent[] | null;
83
+ }
84
+ /**
85
+ * Document metadata
86
+ */
87
+ interface Metadata {
88
+ /** Document title */
89
+ title?: string;
90
+ /** Document subject or description */
91
+ subject?: string;
92
+ /** Document author(s) */
93
+ authors?: string[];
94
+ /** Keywords/tags */
95
+ keywords?: string[];
96
+ /** Primary language (ISO 639 code) */
97
+ language?: string;
98
+ /** Creation timestamp (ISO 8601 format) */
99
+ createdAt?: string;
100
+ /** Last modification timestamp (ISO 8601 format) */
101
+ modifiedAt?: string;
102
+ /** User who created the document */
103
+ creator?: string;
104
+ /** User who last modified the document */
105
+ lastModifiedBy?: string;
106
+ /** Number of pages/slides */
107
+ pageCount?: number;
108
+ /** Format-specific metadata */
109
+ formatMetadata?: unknown;
110
+ /** Custom additional fields */
111
+ additional?: Record<string, unknown>;
112
+ }
113
+ /**
114
+ * Extracted table
115
+ */
116
+ interface Table {
117
+ /** Table cells/rows */
118
+ cells?: string[][];
119
+ /** Table markdown representation */
120
+ markdown?: string;
121
+ /** Page number if available */
122
+ pageNumber?: number;
123
+ /** Table headers */
124
+ headers?: string[];
125
+ /** Table rows */
126
+ rows?: string[][];
127
+ }
128
+ /**
129
+ * Chunk metadata
130
+ */
131
+ interface ChunkMetadata {
132
+ /** Character start position in original content */
133
+ charStart: number;
134
+ /** Character end position in original content */
135
+ charEnd: number;
136
+ /** Token count if available */
137
+ tokenCount: number | null;
138
+ /** Index of this chunk */
139
+ chunkIndex: number;
140
+ /** Total number of chunks */
141
+ totalChunks: number;
142
+ }
143
+ /**
144
+ * Text chunk from chunked content
145
+ */
146
+ interface Chunk {
147
+ /** Chunk text content */
148
+ content: string;
149
+ /** Chunk metadata */
150
+ metadata?: ChunkMetadata;
151
+ /** Character position in original content (legacy) */
152
+ charIndex?: number;
153
+ /** Token count if available (legacy) */
154
+ tokenCount?: number;
155
+ /** Embedding vector if computed */
156
+ embedding?: number[] | null;
157
+ }
158
+ /**
159
+ * Extracted image from document
160
+ */
161
+ interface ExtractedImage {
162
+ /** Image data as Uint8Array or base64 string */
163
+ data: Uint8Array | string;
164
+ /** Image format/MIME type */
165
+ format?: string;
166
+ /** MIME type of the image */
167
+ mimeType?: string;
168
+ /** Image index in document */
169
+ imageIndex?: number;
170
+ /** Page number if available */
171
+ pageNumber?: number | null;
172
+ /** Image width in pixels */
173
+ width?: number | null;
174
+ /** Image height in pixels */
175
+ height?: number | null;
176
+ /** Color space of the image */
177
+ colorspace?: string | null;
178
+ /** Bits per color component */
179
+ bitsPerComponent?: number | null;
180
+ /** Whether this is a mask image */
181
+ isMask?: boolean;
182
+ /** Image description */
183
+ description?: string | null;
184
+ /** Optional OCR result from the image */
185
+ ocrResult?: ExtractionResult | string | null;
186
+ }
187
+ /**
188
+ * Per-page content
189
+ */
190
+ interface PageContent {
191
+ /** Page number (1-indexed) */
192
+ pageNumber: number;
193
+ /** Text content of the page */
194
+ content: string;
195
+ /** Tables on this page */
196
+ tables?: Table[];
197
+ /** Images on this page */
198
+ images?: ExtractedImage[];
199
+ }
200
+ /**
201
+ * OCR backend protocol/interface
202
+ */
203
+ interface OcrBackendProtocol {
204
+ /** Get the backend name */
205
+ name(): string;
206
+ /** Get supported language codes */
207
+ supportedLanguages?(): string[];
208
+ /** Initialize the backend */
209
+ initialize(options?: Record<string, unknown>): void | Promise<void>;
210
+ /** Shutdown the backend */
211
+ shutdown?(): void | Promise<void>;
212
+ /** Process an image with OCR */
213
+ processImage(imageData: Uint8Array | string, language?: string): Promise<{
214
+ content: string;
215
+ mime_type: string;
216
+ metadata?: Record<string, unknown>;
217
+ tables?: unknown[];
218
+ } | string>;
219
+ }
220
+
221
+ export type { Chunk as C, ExtractionConfig as E, ImageExtractionConfig as I, LanguageDetectionConfig as L, Metadata as M, OcrConfig as O, PageExtractionConfig as P, Table as T, ExtractionResult as a, ChunkingConfig as b, ExtractedImage as c, ChunkMetadata as d, PageContent as e, OcrBackendProtocol as f };