@kreuzberg/wasm 4.0.0-rc.21 → 4.0.0-rc.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +520 -837
  2. package/dist/adapters/wasm-adapter.d.ts +7 -10
  3. package/dist/adapters/wasm-adapter.d.ts.map +1 -0
  4. package/dist/adapters/wasm-adapter.js +41 -19
  5. package/dist/adapters/wasm-adapter.js.map +1 -1
  6. package/dist/index.d.ts +23 -24
  7. package/dist/index.d.ts.map +1 -0
  8. package/dist/index.js +240 -67
  9. package/dist/index.js.map +1 -1
  10. package/dist/ocr/registry.d.ts +7 -10
  11. package/dist/ocr/registry.d.ts.map +1 -0
  12. package/dist/ocr/registry.js.map +1 -1
  13. package/dist/ocr/tesseract-wasm-backend.d.ts +3 -6
  14. package/dist/ocr/tesseract-wasm-backend.d.ts.map +1 -0
  15. package/dist/ocr/tesseract-wasm-backend.js +0 -46
  16. package/dist/ocr/tesseract-wasm-backend.js.map +1 -1
  17. package/dist/pdfium.js +0 -5
  18. package/dist/plugin-registry.d.ts +246 -0
  19. package/dist/plugin-registry.d.ts.map +1 -0
  20. package/dist/runtime.d.ts +21 -22
  21. package/dist/runtime.d.ts.map +1 -0
  22. package/dist/runtime.js +0 -1
  23. package/dist/runtime.js.map +1 -1
  24. package/dist/{types-CKjcIYcX.d.ts → types.d.ts} +91 -22
  25. package/dist/types.d.ts.map +1 -0
  26. package/package.json +119 -162
  27. package/dist/adapters/wasm-adapter.cjs +0 -245
  28. package/dist/adapters/wasm-adapter.cjs.map +0 -1
  29. package/dist/adapters/wasm-adapter.d.cts +0 -121
  30. package/dist/index.cjs +0 -1245
  31. package/dist/index.cjs.map +0 -1
  32. package/dist/index.d.cts +0 -423
  33. package/dist/ocr/registry.cjs +0 -92
  34. package/dist/ocr/registry.cjs.map +0 -1
  35. package/dist/ocr/registry.d.cts +0 -102
  36. package/dist/ocr/tesseract-wasm-backend.cjs +0 -456
  37. package/dist/ocr/tesseract-wasm-backend.cjs.map +0 -1
  38. package/dist/ocr/tesseract-wasm-backend.d.cts +0 -257
  39. package/dist/runtime.cjs +0 -174
  40. package/dist/runtime.cjs.map +0 -1
  41. package/dist/runtime.d.cts +0 -256
  42. package/dist/types-CKjcIYcX.d.cts +0 -294
package/dist/runtime.d.ts CHANGED
@@ -28,11 +28,11 @@
28
28
  * }
29
29
  * ```
30
30
  */
31
- type RuntimeType = "browser" | "node" | "deno" | "bun" | "unknown";
31
+ export type RuntimeType = "browser" | "node" | "deno" | "bun" | "unknown";
32
32
  /**
33
33
  * WebAssembly capabilities available in the runtime
34
34
  */
35
- interface WasmCapabilities {
35
+ export interface WasmCapabilities {
36
36
  /** Runtime environment type */
37
37
  runtime: RuntimeType;
38
38
  /** WebAssembly support available */
@@ -83,43 +83,43 @@ interface WasmCapabilities {
83
83
  * }
84
84
  * ```
85
85
  */
86
- declare function detectRuntime(): RuntimeType;
86
+ export declare function detectRuntime(): RuntimeType;
87
87
  /**
88
88
  * Check if running in a browser environment
89
89
  *
90
90
  * @returns True if running in a browser, false otherwise
91
91
  */
92
- declare function isBrowser(): boolean;
92
+ export declare function isBrowser(): boolean;
93
93
  /**
94
94
  * Check if running in Node.js
95
95
  *
96
96
  * @returns True if running in Node.js, false otherwise
97
97
  */
98
- declare function isNode(): boolean;
98
+ export declare function isNode(): boolean;
99
99
  /**
100
100
  * Check if running in Deno
101
101
  *
102
102
  * @returns True if running in Deno, false otherwise
103
103
  */
104
- declare function isDeno(): boolean;
104
+ export declare function isDeno(): boolean;
105
105
  /**
106
106
  * Check if running in Bun
107
107
  *
108
108
  * @returns True if running in Bun, false otherwise
109
109
  */
110
- declare function isBun(): boolean;
110
+ export declare function isBun(): boolean;
111
111
  /**
112
112
  * Check if running in a web environment (browser or similar)
113
113
  *
114
114
  * @returns True if running in a web browser, false otherwise
115
115
  */
116
- declare function isWebEnvironment(): boolean;
116
+ export declare function isWebEnvironment(): boolean;
117
117
  /**
118
118
  * Check if running in a server-like environment (Node.js, Deno, Bun)
119
119
  *
120
120
  * @returns True if running on a server runtime, false otherwise
121
121
  */
122
- declare function isServerEnvironment(): boolean;
122
+ export declare function isServerEnvironment(): boolean;
123
123
  /**
124
124
  * Check if File API is available
125
125
  *
@@ -138,19 +138,19 @@ declare function isServerEnvironment(): boolean;
138
138
  * }
139
139
  * ```
140
140
  */
141
- declare function hasFileApi(): boolean;
141
+ export declare function hasFileApi(): boolean;
142
142
  /**
143
143
  * Check if Blob API is available
144
144
  *
145
145
  * @returns True if Blob API is available, false otherwise
146
146
  */
147
- declare function hasBlob(): boolean;
147
+ export declare function hasBlob(): boolean;
148
148
  /**
149
149
  * Check if Web Workers are available
150
150
  *
151
151
  * @returns True if Web Workers can be created, false otherwise
152
152
  */
153
- declare function hasWorkers(): boolean;
153
+ export declare function hasWorkers(): boolean;
154
154
  /**
155
155
  * Check if SharedArrayBuffer is available
156
156
  *
@@ -159,7 +159,7 @@ declare function hasWorkers(): boolean;
159
159
  *
160
160
  * @returns True if SharedArrayBuffer is available, false otherwise
161
161
  */
162
- declare function hasSharedArrayBuffer(): boolean;
162
+ export declare function hasSharedArrayBuffer(): boolean;
163
163
  /**
164
164
  * Check if module workers are available
165
165
  *
@@ -167,13 +167,13 @@ declare function hasSharedArrayBuffer(): boolean;
167
167
  *
168
168
  * @returns True if module workers are supported, false otherwise
169
169
  */
170
- declare function hasModuleWorkers(): boolean;
170
+ export declare function hasModuleWorkers(): boolean;
171
171
  /**
172
172
  * Check if WebAssembly is available
173
173
  *
174
174
  * @returns True if WebAssembly is supported, false otherwise
175
175
  */
176
- declare function hasWasm(): boolean;
176
+ export declare function hasWasm(): boolean;
177
177
  /**
178
178
  * Check if WebAssembly.instantiateStreaming is available
179
179
  *
@@ -181,13 +181,13 @@ declare function hasWasm(): boolean;
181
181
  *
182
182
  * @returns True if streaming WebAssembly is supported, false otherwise
183
183
  */
184
- declare function hasWasmStreaming(): boolean;
184
+ export declare function hasWasmStreaming(): boolean;
185
185
  /**
186
186
  * Check if BigInt is available
187
187
  *
188
188
  * @returns True if BigInt type is supported, false otherwise
189
189
  */
190
- declare function hasBigInt(): boolean;
190
+ export declare function hasBigInt(): boolean;
191
191
  /**
192
192
  * Get runtime version information
193
193
  *
@@ -199,7 +199,7 @@ declare function hasBigInt(): boolean;
199
199
  * console.log(`Running on Node ${version}`); // "Running on Node 18.12.0"
200
200
  * ```
201
201
  */
202
- declare function getRuntimeVersion(): string | undefined;
202
+ export declare function getRuntimeVersion(): string | undefined;
203
203
  /**
204
204
  * Get comprehensive WebAssembly capabilities for current runtime
205
205
  *
@@ -222,7 +222,7 @@ declare function getRuntimeVersion(): string | undefined;
222
222
  * }
223
223
  * ```
224
224
  */
225
- declare function getWasmCapabilities(): WasmCapabilities;
225
+ export declare function getWasmCapabilities(): WasmCapabilities;
226
226
  /**
227
227
  * Get comprehensive runtime information
228
228
  *
@@ -240,7 +240,7 @@ declare function getWasmCapabilities(): WasmCapabilities;
240
240
  * console.log(info.capabilities); // Detailed capability information
241
241
  * ```
242
242
  */
243
- declare function getRuntimeInfo(): {
243
+ export declare function getRuntimeInfo(): {
244
244
  runtime: RuntimeType;
245
245
  isBrowser: boolean;
246
246
  isNode: boolean;
@@ -252,5 +252,4 @@ declare function getRuntimeInfo(): {
252
252
  userAgent: string;
253
253
  capabilities: WasmCapabilities;
254
254
  };
255
-
256
- export { type RuntimeType, type WasmCapabilities, detectRuntime, getRuntimeInfo, getRuntimeVersion, getWasmCapabilities, hasBigInt, hasBlob, hasFileApi, hasModuleWorkers, hasSharedArrayBuffer, hasWasm, hasWasmStreaming, hasWorkers, isBrowser, isBun, isDeno, isNode, isServerEnvironment, isWebEnvironment };
255
+ //# sourceMappingURL=runtime.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runtime.d.ts","sourceRoot":"","sources":["../typescript/runtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,MAAM,MAAM,WAAW,GAAG,SAAS,GAAG,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,SAAS,CAAC;AAE1E;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,+BAA+B;IAC/B,OAAO,EAAE,WAAW,CAAC;IACrB,oCAAoC;IACpC,OAAO,EAAE,OAAO,CAAC;IACjB,oDAAoD;IACpD,gBAAgB,EAAE,OAAO,CAAC;IAC1B,mCAAmC;IACnC,UAAU,EAAE,OAAO,CAAC;IACpB,yBAAyB;IACzB,OAAO,EAAE,OAAO,CAAC;IACjB,+BAA+B;IAC/B,UAAU,EAAE,OAAO,CAAC;IACpB,sDAAsD;IACtD,oBAAoB,EAAE,OAAO,CAAC;IAC9B,+BAA+B;IAC/B,gBAAgB,EAAE,OAAO,CAAC;IAC1B,qBAAqB;IACrB,SAAS,EAAE,OAAO,CAAC;IACnB,4CAA4C;IAC5C,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAgB,aAAa,IAAI,WAAW,CAkB3C;AAED;;;;GAIG;AACH,wBAAgB,SAAS,IAAI,OAAO,CAEnC;AAED;;;;GAIG;AACH,wBAAgB,MAAM,IAAI,OAAO,CAEhC;AAED;;;;GAIG;AACH,wBAAgB,MAAM,IAAI,OAAO,CAEhC;AAED;;;;GAIG;AACH,wBAAgB,KAAK,IAAI,OAAO,CAE/B;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,IAAI,OAAO,CAG1C;AAED;;;;GAIG;AACH,wBAAgB,mBAAmB,IAAI,OAAO,CAG7C;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,UAAU,IAAI,OAAO,CAEpC;AAED;;;;GAIG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;;;GAIG;AACH,wBAAgB,UAAU,IAAI,OAAO,CAEpC;AAED;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,IAAI,OAAO,CAE9C;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,IAAI,OAAO,CAkB1C;AAED;;;;GAIG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,IAAI,OAAO,CAE1C;AAED;;;;GAIG;AACH,wBAAgB,SAAS,IAAI,OAAO,CAOnC;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,iBAAiB,IAAI,MAAM,GAAG,SAAS,CAkBtD;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,mBAAmB,IAAI,gBAAgB,CAgBtD;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,cAAc;;;;;;;;;;;EAgB7B"}
package/dist/runtime.js CHANGED
@@ -83,7 +83,6 @@ function getRuntimeVersion() {
83
83
  switch (runtime) {
84
84
  case "node":
85
85
  return process.version?.substring(1);
86
- // Remove 'v' prefix
87
86
  case "deno": {
88
87
  const deno = globalThis.Deno;
89
88
  const version = deno?.version;
@@ -1 +1 @@
1
- {"version":3,"sources":["../typescript/runtime.ts"],"sourcesContent":["/**\n * Runtime detection and environment-specific utilities\n *\n * This module provides utilities for detecting the JavaScript runtime environment,\n * checking for feature availability, and enabling environment-specific WASM loading strategies.\n *\n * @example Basic Runtime Detection\n * ```typescript\n * import { detectRuntime, isBrowser, isNode } from '@kreuzberg/wasm/runtime';\n *\n * if (isBrowser()) {\n * console.log('Running in browser');\n * } else if (isNode()) {\n * console.log('Running in Node.js');\n * }\n * ```\n *\n * @example Feature Detection\n * ```typescript\n * import { hasFileApi, hasWorkers } from '@kreuzberg/wasm/runtime';\n *\n * if (hasFileApi()) {\n * // Can use File API for browser file uploads\n * }\n *\n * if (hasWorkers()) {\n * // Can use Web Workers for parallel processing\n * }\n * ```\n */\n\nexport type RuntimeType = \"browser\" | \"node\" | \"deno\" | \"bun\" | \"unknown\";\n\n/**\n * WebAssembly capabilities available in the runtime\n */\nexport interface WasmCapabilities {\n\t/** Runtime environment type */\n\truntime: RuntimeType;\n\t/** WebAssembly support available */\n\thasWasm: boolean;\n\t/** Streaming WebAssembly instantiation available */\n\thasWasmStreaming: boolean;\n\t/** File API available (browser) */\n\thasFileApi: boolean;\n\t/** Blob API available */\n\thasBlob: boolean;\n\t/** Worker support available */\n\thasWorkers: boolean;\n\t/** SharedArrayBuffer available (may be restricted) */\n\thasSharedArrayBuffer: boolean;\n\t/** Module Workers available */\n\thasModuleWorkers: boolean;\n\t/** BigInt support */\n\thasBigInt: boolean;\n\t/** Specific runtime version if available */\n\truntimeVersion?: string;\n}\n\n/**\n * Detect the current JavaScript runtime\n *\n * Checks for various global objects and properties to determine\n * which JavaScript runtime environment is currently executing.\n *\n * @returns The detected runtime type\n *\n * @example\n * ```typescript\n * import { detectRuntime } from '@kreuzberg/wasm/runtime';\n *\n * const runtime = detectRuntime();\n * switch (runtime) {\n * case 'browser':\n * console.log('Running in browser');\n * break;\n * case 'node':\n * console.log('Running in Node.js');\n * break;\n * case 'deno':\n * console.log('Running in Deno');\n * break;\n * case 'bun':\n * console.log('Running in Bun');\n * break;\n * }\n * ```\n */\nexport function detectRuntime(): RuntimeType {\n\t// Check for Deno\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Deno !== \"undefined\") {\n\t\treturn \"deno\";\n\t}\n\n\t// Check for Bun\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Bun !== \"undefined\") {\n\t\treturn \"bun\";\n\t}\n\n\t// Check for Node.js\n\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\treturn \"node\";\n\t}\n\n\t// Check for browser\n\tif (typeof window !== \"undefined\" && typeof document !== \"undefined\") {\n\t\treturn \"browser\";\n\t}\n\n\treturn \"unknown\";\n}\n\n/**\n * Check if running in a browser environment\n *\n * @returns True if running in a browser, false otherwise\n */\nexport function isBrowser(): boolean {\n\treturn detectRuntime() === \"browser\";\n}\n\n/**\n * Check if running in Node.js\n *\n * @returns True if running in Node.js, false otherwise\n */\nexport function isNode(): boolean {\n\treturn detectRuntime() === \"node\";\n}\n\n/**\n * Check if running in Deno\n *\n * @returns True if running in Deno, false otherwise\n */\nexport function isDeno(): boolean {\n\treturn detectRuntime() === \"deno\";\n}\n\n/**\n * Check if running in Bun\n *\n * @returns True if running in Bun, false otherwise\n */\nexport function isBun(): boolean {\n\treturn detectRuntime() === \"bun\";\n}\n\n/**\n * Check if running in a web environment (browser or similar)\n *\n * @returns True if running in a web browser, false otherwise\n */\nexport function isWebEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"browser\";\n}\n\n/**\n * Check if running in a server-like environment (Node.js, Deno, Bun)\n *\n * @returns True if running on a server runtime, false otherwise\n */\nexport function isServerEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"node\" || runtime === \"deno\" || runtime === \"bun\";\n}\n\n/**\n * Check if File API is available\n *\n * The File API is required for handling browser file uploads.\n *\n * @returns True if File API is available, false otherwise\n *\n * @example\n * ```typescript\n * if (hasFileApi()) {\n * const fileInput = document.getElementById('file');\n * fileInput.addEventListener('change', (e) => {\n * const file = e.target.files?.[0];\n * // Handle file\n * });\n * }\n * ```\n */\nexport function hasFileApi(): boolean {\n\treturn typeof window !== \"undefined\" && typeof File !== \"undefined\" && typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Blob API is available\n *\n * @returns True if Blob API is available, false otherwise\n */\nexport function hasBlob(): boolean {\n\treturn typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Web Workers are available\n *\n * @returns True if Web Workers can be created, false otherwise\n */\nexport function hasWorkers(): boolean {\n\treturn typeof Worker !== \"undefined\";\n}\n\n/**\n * Check if SharedArrayBuffer is available\n *\n * Note: SharedArrayBuffer is restricted in some browser contexts\n * due to security considerations (Spectre/Meltdown mitigations).\n *\n * @returns True if SharedArrayBuffer is available, false otherwise\n */\nexport function hasSharedArrayBuffer(): boolean {\n\treturn typeof SharedArrayBuffer !== \"undefined\";\n}\n\n/**\n * Check if module workers are available\n *\n * Module workers allow importing ES modules in worker threads.\n *\n * @returns True if module workers are supported, false otherwise\n */\nexport function hasModuleWorkers(): boolean {\n\tif (!hasWorkers()) {\n\t\treturn false;\n\t}\n\n\ttry {\n\t\t// Try to detect module worker support\n\t\tconst blob = new Blob(['console.log(\"test\")'], {\n\t\t\ttype: \"application/javascript\",\n\t\t});\n\t\tconst workerUrl = URL.createObjectURL(blob);\n\t\ttry {\n\t\t\t// Module workers require type: 'module' option\n\t\t\t// We can't actually instantiate without issues, so we check the API exists\n\t\t\treturn true;\n\t\t} finally {\n\t\t\tURL.revokeObjectURL(workerUrl);\n\t\t}\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Check if WebAssembly is available\n *\n * @returns True if WebAssembly is supported, false otherwise\n */\nexport function hasWasm(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiate !== undefined;\n}\n\n/**\n * Check if WebAssembly.instantiateStreaming is available\n *\n * Streaming instantiation is more efficient than buffering the entire WASM module.\n *\n * @returns True if streaming WebAssembly is supported, false otherwise\n */\nexport function hasWasmStreaming(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiateStreaming !== undefined;\n}\n\n/**\n * Check if BigInt is available\n *\n * @returns True if BigInt type is supported, false otherwise\n */\nexport function hasBigInt(): boolean {\n\ttry {\n\t\tconst test = BigInt(\"1\");\n\t\treturn typeof test === \"bigint\";\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Get runtime version information\n *\n * @returns Version string if available, undefined otherwise\n *\n * @example\n * ```typescript\n * const version = getRuntimeVersion();\n * console.log(`Running on Node ${version}`); // \"Running on Node 18.12.0\"\n * ```\n */\nexport function getRuntimeVersion(): string | undefined {\n\tconst runtime = detectRuntime();\n\n\tswitch (runtime) {\n\t\tcase \"node\":\n\t\t\treturn process.version?.substring(1); // Remove 'v' prefix\n\t\tcase \"deno\": {\n\t\t\tconst deno = (globalThis as unknown as Record<string, unknown>).Deno as Record<string, unknown> | undefined;\n\t\t\tconst version = deno?.version as Record<string, unknown> | undefined;\n\t\t\treturn version?.deno as string | undefined;\n\t\t}\n\t\tcase \"bun\": {\n\t\t\tconst bun = (globalThis as unknown as Record<string, unknown>).Bun as Record<string, unknown> | undefined;\n\t\t\treturn bun?.version as string | undefined;\n\t\t}\n\t\tdefault:\n\t\t\treturn undefined;\n\t}\n}\n\n/**\n * Get comprehensive WebAssembly capabilities for current runtime\n *\n * Returns detailed information about WASM and related APIs available\n * in the current runtime environment.\n *\n * @returns Object describing available WASM capabilities\n *\n * @example\n * ```typescript\n * import { getWasmCapabilities } from '@kreuzberg/wasm/runtime';\n *\n * const caps = getWasmCapabilities();\n * console.log(`WASM available: ${caps.hasWasm}`);\n * console.log(`Streaming WASM: ${caps.hasWasmStreaming}`);\n * console.log(`Workers available: ${caps.hasWorkers}`);\n *\n * if (caps.hasWasm && caps.hasWorkers) {\n * // Can offload WASM processing to workers\n * }\n * ```\n */\nexport function getWasmCapabilities(): WasmCapabilities {\n\tconst runtime = detectRuntime();\n\tconst version = getRuntimeVersion();\n\tconst capabilities: WasmCapabilities = {\n\t\truntime,\n\t\thasWasm: hasWasm(),\n\t\thasWasmStreaming: hasWasmStreaming(),\n\t\thasFileApi: hasFileApi(),\n\t\thasBlob: hasBlob(),\n\t\thasWorkers: hasWorkers(),\n\t\thasSharedArrayBuffer: hasSharedArrayBuffer(),\n\t\thasModuleWorkers: hasModuleWorkers(),\n\t\thasBigInt: hasBigInt(),\n\t\t...(version !== undefined ? { runtimeVersion: version } : {}),\n\t};\n\treturn capabilities;\n}\n\n/**\n * Get comprehensive runtime information\n *\n * Returns detailed information about the current runtime environment,\n * capabilities, and identifying information.\n *\n * @returns Object with runtime details and capabilities\n *\n * @example\n * ```typescript\n * const info = getRuntimeInfo();\n * console.log(info.runtime); // 'browser' | 'node' | 'deno' | 'bun'\n * console.log(info.isBrowser); // true/false\n * console.log(info.userAgent); // Browser user agent string\n * console.log(info.capabilities); // Detailed capability information\n * ```\n */\nexport function getRuntimeInfo() {\n\tconst runtime = detectRuntime();\n\tconst capabilities = getWasmCapabilities();\n\n\treturn {\n\t\truntime,\n\t\tisBrowser: isBrowser(),\n\t\tisNode: isNode(),\n\t\tisDeno: isDeno(),\n\t\tisBun: isBun(),\n\t\tisWeb: isWebEnvironment(),\n\t\tisServer: isServerEnvironment(),\n\t\truntimeVersion: getRuntimeVersion(),\n\t\tuserAgent: typeof navigator !== \"undefined\" ? navigator.userAgent : \"N/A\",\n\t\tcapabilities,\n\t};\n}\n"],"mappings":";AAwFO,SAAS,gBAA6B;AAE5C,MAAI,OAAQ,WAAkD,SAAS,aAAa;AACnF,WAAO;AAAA,EACR;AAGA,MAAI,OAAQ,WAAkD,QAAQ,aAAa;AAClF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,WAAO;AAAA,EACR;AAGA,MAAI,OAAO,WAAW,eAAe,OAAO,aAAa,aAAa;AACrE,WAAO;AAAA,EACR;AAEA,SAAO;AACR;AAOO,SAAS,YAAqB;AACpC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,QAAiB;AAChC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,mBAA4B;AAC3C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY;AACpB;AAOO,SAAS,sBAA+B;AAC9C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY,UAAU,YAAY,UAAU,YAAY;AAChE;AAoBO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW,eAAe,OAAO,SAAS,eAAe,OAAO,SAAS;AACxF;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,SAAS;AACxB;AAOO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW;AAC1B;AAUO,SAAS,uBAAgC;AAC/C,SAAO,OAAO,sBAAsB;AACrC;AASO,SAAS,mBAA4B;AAC3C,MAAI,CAAC,WAAW,GAAG;AAClB,WAAO;AAAA,EACR;AAEA,MAAI;AAEH,UAAM,OAAO,IAAI,KAAK,CAAC,qBAAqB,GAAG;AAAA,MAC9C,MAAM;AAAA,IACP,CAAC;AACD,UAAM,YAAY,IAAI,gBAAgB,IAAI;AAC1C,QAAI;AAGH,aAAO;AAAA,IACR,UAAE;AACD,UAAI,gBAAgB,SAAS;AAAA,IAC9B;AAAA,EACD,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,gBAAgB,eAAe,YAAY,gBAAgB;AAC1E;AASO,SAAS,mBAA4B;AAC3C,SAAO,OAAO,gBAAgB,eAAe,YAAY,yBAAyB;AACnF;AAOO,SAAS,YAAqB;AACpC,MAAI;AACH,UAAM,OAAO,OAAO,GAAG;AACvB,WAAO,OAAO,SAAS;AAAA,EACxB,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAaO,SAAS,oBAAwC;AACvD,QAAM,UAAU,cAAc;AAE9B,UAAQ,SAAS;AAAA,IAChB,KAAK;AACJ,aAAO,QAAQ,SAAS,UAAU,CAAC;AAAA;AAAA,IACpC,KAAK,QAAQ;AACZ,YAAM,OAAQ,WAAkD;AAChE,YAAM,UAAU,MAAM;AACtB,aAAO,SAAS;AAAA,IACjB;AAAA,IACA,KAAK,OAAO;AACX,YAAM,MAAO,WAAkD;AAC/D,aAAO,KAAK;AAAA,IACb;AAAA,IACA;AACC,aAAO;AAAA,EACT;AACD;AAwBO,SAAS,sBAAwC;AACvD,QAAM,UAAU,cAAc;AAC9B,QAAM,UAAU,kBAAkB;AAClC,QAAM,eAAiC;AAAA,IACtC;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,kBAAkB,iBAAiB;AAAA,IACnC,YAAY,WAAW;AAAA,IACvB,SAAS,QAAQ;AAAA,IACjB,YAAY,WAAW;AAAA,IACvB,sBAAsB,qBAAqB;AAAA,IAC3C,kBAAkB,iBAAiB;AAAA,IACnC,WAAW,UAAU;AAAA,IACrB,GAAI,YAAY,SAAY,EAAE,gBAAgB,QAAQ,IAAI,CAAC;AAAA,EAC5D;AACA,SAAO;AACR;AAmBO,SAAS,iBAAiB;AAChC,QAAM,UAAU,cAAc;AAC9B,QAAM,eAAe,oBAAoB;AAEzC,SAAO;AAAA,IACN;AAAA,IACA,WAAW,UAAU;AAAA,IACrB,QAAQ,OAAO;AAAA,IACf,QAAQ,OAAO;AAAA,IACf,OAAO,MAAM;AAAA,IACb,OAAO,iBAAiB;AAAA,IACxB,UAAU,oBAAoB;AAAA,IAC9B,gBAAgB,kBAAkB;AAAA,IAClC,WAAW,OAAO,cAAc,cAAc,UAAU,YAAY;AAAA,IACpE;AAAA,EACD;AACD;","names":[]}
1
+ {"version":3,"sources":["../typescript/runtime.ts"],"sourcesContent":["/**\n * Runtime detection and environment-specific utilities\n *\n * This module provides utilities for detecting the JavaScript runtime environment,\n * checking for feature availability, and enabling environment-specific WASM loading strategies.\n *\n * @example Basic Runtime Detection\n * ```typescript\n * import { detectRuntime, isBrowser, isNode } from '@kreuzberg/wasm/runtime';\n *\n * if (isBrowser()) {\n * console.log('Running in browser');\n * } else if (isNode()) {\n * console.log('Running in Node.js');\n * }\n * ```\n *\n * @example Feature Detection\n * ```typescript\n * import { hasFileApi, hasWorkers } from '@kreuzberg/wasm/runtime';\n *\n * if (hasFileApi()) {\n * // Can use File API for browser file uploads\n * }\n *\n * if (hasWorkers()) {\n * // Can use Web Workers for parallel processing\n * }\n * ```\n */\n\nexport type RuntimeType = \"browser\" | \"node\" | \"deno\" | \"bun\" | \"unknown\";\n\n/**\n * WebAssembly capabilities available in the runtime\n */\nexport interface WasmCapabilities {\n\t/** Runtime environment type */\n\truntime: RuntimeType;\n\t/** WebAssembly support available */\n\thasWasm: boolean;\n\t/** Streaming WebAssembly instantiation available */\n\thasWasmStreaming: boolean;\n\t/** File API available (browser) */\n\thasFileApi: boolean;\n\t/** Blob API available */\n\thasBlob: boolean;\n\t/** Worker support available */\n\thasWorkers: boolean;\n\t/** SharedArrayBuffer available (may be restricted) */\n\thasSharedArrayBuffer: boolean;\n\t/** Module Workers available */\n\thasModuleWorkers: boolean;\n\t/** BigInt support */\n\thasBigInt: boolean;\n\t/** Specific runtime version if available */\n\truntimeVersion?: string;\n}\n\n/**\n * Detect the current JavaScript runtime\n *\n * Checks for various global objects and properties to determine\n * which JavaScript runtime environment is currently executing.\n *\n * @returns The detected runtime type\n *\n * @example\n * ```typescript\n * import { detectRuntime } from '@kreuzberg/wasm/runtime';\n *\n * const runtime = detectRuntime();\n * switch (runtime) {\n * case 'browser':\n * console.log('Running in browser');\n * break;\n * case 'node':\n * console.log('Running in Node.js');\n * break;\n * case 'deno':\n * console.log('Running in Deno');\n * break;\n * case 'bun':\n * console.log('Running in Bun');\n * break;\n * }\n * ```\n */\nexport function detectRuntime(): RuntimeType {\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Deno !== \"undefined\") {\n\t\treturn \"deno\";\n\t}\n\n\tif (typeof (globalThis as unknown as Record<string, unknown>).Bun !== \"undefined\") {\n\t\treturn \"bun\";\n\t}\n\n\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\treturn \"node\";\n\t}\n\n\tif (typeof window !== \"undefined\" && typeof document !== \"undefined\") {\n\t\treturn \"browser\";\n\t}\n\n\treturn \"unknown\";\n}\n\n/**\n * Check if running in a browser environment\n *\n * @returns True if running in a browser, false otherwise\n */\nexport function isBrowser(): boolean {\n\treturn detectRuntime() === \"browser\";\n}\n\n/**\n * Check if running in Node.js\n *\n * @returns True if running in Node.js, false otherwise\n */\nexport function isNode(): boolean {\n\treturn detectRuntime() === \"node\";\n}\n\n/**\n * Check if running in Deno\n *\n * @returns True if running in Deno, false otherwise\n */\nexport function isDeno(): boolean {\n\treturn detectRuntime() === \"deno\";\n}\n\n/**\n * Check if running in Bun\n *\n * @returns True if running in Bun, false otherwise\n */\nexport function isBun(): boolean {\n\treturn detectRuntime() === \"bun\";\n}\n\n/**\n * Check if running in a web environment (browser or similar)\n *\n * @returns True if running in a web browser, false otherwise\n */\nexport function isWebEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"browser\";\n}\n\n/**\n * Check if running in a server-like environment (Node.js, Deno, Bun)\n *\n * @returns True if running on a server runtime, false otherwise\n */\nexport function isServerEnvironment(): boolean {\n\tconst runtime = detectRuntime();\n\treturn runtime === \"node\" || runtime === \"deno\" || runtime === \"bun\";\n}\n\n/**\n * Check if File API is available\n *\n * The File API is required for handling browser file uploads.\n *\n * @returns True if File API is available, false otherwise\n *\n * @example\n * ```typescript\n * if (hasFileApi()) {\n * const fileInput = document.getElementById('file');\n * fileInput.addEventListener('change', (e) => {\n * const file = e.target.files?.[0];\n * // Handle file\n * });\n * }\n * ```\n */\nexport function hasFileApi(): boolean {\n\treturn typeof window !== \"undefined\" && typeof File !== \"undefined\" && typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Blob API is available\n *\n * @returns True if Blob API is available, false otherwise\n */\nexport function hasBlob(): boolean {\n\treturn typeof Blob !== \"undefined\";\n}\n\n/**\n * Check if Web Workers are available\n *\n * @returns True if Web Workers can be created, false otherwise\n */\nexport function hasWorkers(): boolean {\n\treturn typeof Worker !== \"undefined\";\n}\n\n/**\n * Check if SharedArrayBuffer is available\n *\n * Note: SharedArrayBuffer is restricted in some browser contexts\n * due to security considerations (Spectre/Meltdown mitigations).\n *\n * @returns True if SharedArrayBuffer is available, false otherwise\n */\nexport function hasSharedArrayBuffer(): boolean {\n\treturn typeof SharedArrayBuffer !== \"undefined\";\n}\n\n/**\n * Check if module workers are available\n *\n * Module workers allow importing ES modules in worker threads.\n *\n * @returns True if module workers are supported, false otherwise\n */\nexport function hasModuleWorkers(): boolean {\n\tif (!hasWorkers()) {\n\t\treturn false;\n\t}\n\n\ttry {\n\t\tconst blob = new Blob(['console.log(\"test\")'], {\n\t\t\ttype: \"application/javascript\",\n\t\t});\n\t\tconst workerUrl = URL.createObjectURL(blob);\n\t\ttry {\n\t\t\treturn true;\n\t\t} finally {\n\t\t\tURL.revokeObjectURL(workerUrl);\n\t\t}\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Check if WebAssembly is available\n *\n * @returns True if WebAssembly is supported, false otherwise\n */\nexport function hasWasm(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiate !== undefined;\n}\n\n/**\n * Check if WebAssembly.instantiateStreaming is available\n *\n * Streaming instantiation is more efficient than buffering the entire WASM module.\n *\n * @returns True if streaming WebAssembly is supported, false otherwise\n */\nexport function hasWasmStreaming(): boolean {\n\treturn typeof WebAssembly !== \"undefined\" && WebAssembly.instantiateStreaming !== undefined;\n}\n\n/**\n * Check if BigInt is available\n *\n * @returns True if BigInt type is supported, false otherwise\n */\nexport function hasBigInt(): boolean {\n\ttry {\n\t\tconst test = BigInt(\"1\");\n\t\treturn typeof test === \"bigint\";\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Get runtime version information\n *\n * @returns Version string if available, undefined otherwise\n *\n * @example\n * ```typescript\n * const version = getRuntimeVersion();\n * console.log(`Running on Node ${version}`); // \"Running on Node 18.12.0\"\n * ```\n */\nexport function getRuntimeVersion(): string | undefined {\n\tconst runtime = detectRuntime();\n\n\tswitch (runtime) {\n\t\tcase \"node\":\n\t\t\treturn process.version?.substring(1);\n\t\tcase \"deno\": {\n\t\t\tconst deno = (globalThis as unknown as Record<string, unknown>).Deno as Record<string, unknown> | undefined;\n\t\t\tconst version = deno?.version as Record<string, unknown> | undefined;\n\t\t\treturn version?.deno as string | undefined;\n\t\t}\n\t\tcase \"bun\": {\n\t\t\tconst bun = (globalThis as unknown as Record<string, unknown>).Bun as Record<string, unknown> | undefined;\n\t\t\treturn bun?.version as string | undefined;\n\t\t}\n\t\tdefault:\n\t\t\treturn undefined;\n\t}\n}\n\n/**\n * Get comprehensive WebAssembly capabilities for current runtime\n *\n * Returns detailed information about WASM and related APIs available\n * in the current runtime environment.\n *\n * @returns Object describing available WASM capabilities\n *\n * @example\n * ```typescript\n * import { getWasmCapabilities } from '@kreuzberg/wasm/runtime';\n *\n * const caps = getWasmCapabilities();\n * console.log(`WASM available: ${caps.hasWasm}`);\n * console.log(`Streaming WASM: ${caps.hasWasmStreaming}`);\n * console.log(`Workers available: ${caps.hasWorkers}`);\n *\n * if (caps.hasWasm && caps.hasWorkers) {\n * // Can offload WASM processing to workers\n * }\n * ```\n */\nexport function getWasmCapabilities(): WasmCapabilities {\n\tconst runtime = detectRuntime();\n\tconst version = getRuntimeVersion();\n\tconst capabilities: WasmCapabilities = {\n\t\truntime,\n\t\thasWasm: hasWasm(),\n\t\thasWasmStreaming: hasWasmStreaming(),\n\t\thasFileApi: hasFileApi(),\n\t\thasBlob: hasBlob(),\n\t\thasWorkers: hasWorkers(),\n\t\thasSharedArrayBuffer: hasSharedArrayBuffer(),\n\t\thasModuleWorkers: hasModuleWorkers(),\n\t\thasBigInt: hasBigInt(),\n\t\t...(version !== undefined ? { runtimeVersion: version } : {}),\n\t};\n\treturn capabilities;\n}\n\n/**\n * Get comprehensive runtime information\n *\n * Returns detailed information about the current runtime environment,\n * capabilities, and identifying information.\n *\n * @returns Object with runtime details and capabilities\n *\n * @example\n * ```typescript\n * const info = getRuntimeInfo();\n * console.log(info.runtime); // 'browser' | 'node' | 'deno' | 'bun'\n * console.log(info.isBrowser); // true/false\n * console.log(info.userAgent); // Browser user agent string\n * console.log(info.capabilities); // Detailed capability information\n * ```\n */\nexport function getRuntimeInfo() {\n\tconst runtime = detectRuntime();\n\tconst capabilities = getWasmCapabilities();\n\n\treturn {\n\t\truntime,\n\t\tisBrowser: isBrowser(),\n\t\tisNode: isNode(),\n\t\tisDeno: isDeno(),\n\t\tisBun: isBun(),\n\t\tisWeb: isWebEnvironment(),\n\t\tisServer: isServerEnvironment(),\n\t\truntimeVersion: getRuntimeVersion(),\n\t\tuserAgent: typeof navigator !== \"undefined\" ? navigator.userAgent : \"N/A\",\n\t\tcapabilities,\n\t};\n}\n"],"mappings":";AAwFO,SAAS,gBAA6B;AAC5C,MAAI,OAAQ,WAAkD,SAAS,aAAa;AACnF,WAAO;AAAA,EACR;AAEA,MAAI,OAAQ,WAAkD,QAAQ,aAAa;AAClF,WAAO;AAAA,EACR;AAEA,MAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,WAAO;AAAA,EACR;AAEA,MAAI,OAAO,WAAW,eAAe,OAAO,aAAa,aAAa;AACrE,WAAO;AAAA,EACR;AAEA,SAAO;AACR;AAOO,SAAS,YAAqB;AACpC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,SAAkB;AACjC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,QAAiB;AAChC,SAAO,cAAc,MAAM;AAC5B;AAOO,SAAS,mBAA4B;AAC3C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY;AACpB;AAOO,SAAS,sBAA+B;AAC9C,QAAM,UAAU,cAAc;AAC9B,SAAO,YAAY,UAAU,YAAY,UAAU,YAAY;AAChE;AAoBO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW,eAAe,OAAO,SAAS,eAAe,OAAO,SAAS;AACxF;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,SAAS;AACxB;AAOO,SAAS,aAAsB;AACrC,SAAO,OAAO,WAAW;AAC1B;AAUO,SAAS,uBAAgC;AAC/C,SAAO,OAAO,sBAAsB;AACrC;AASO,SAAS,mBAA4B;AAC3C,MAAI,CAAC,WAAW,GAAG;AAClB,WAAO;AAAA,EACR;AAEA,MAAI;AACH,UAAM,OAAO,IAAI,KAAK,CAAC,qBAAqB,GAAG;AAAA,MAC9C,MAAM;AAAA,IACP,CAAC;AACD,UAAM,YAAY,IAAI,gBAAgB,IAAI;AAC1C,QAAI;AACH,aAAO;AAAA,IACR,UAAE;AACD,UAAI,gBAAgB,SAAS;AAAA,IAC9B;AAAA,EACD,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAOO,SAAS,UAAmB;AAClC,SAAO,OAAO,gBAAgB,eAAe,YAAY,gBAAgB;AAC1E;AASO,SAAS,mBAA4B;AAC3C,SAAO,OAAO,gBAAgB,eAAe,YAAY,yBAAyB;AACnF;AAOO,SAAS,YAAqB;AACpC,MAAI;AACH,UAAM,OAAO,OAAO,GAAG;AACvB,WAAO,OAAO,SAAS;AAAA,EACxB,QAAQ;AACP,WAAO;AAAA,EACR;AACD;AAaO,SAAS,oBAAwC;AACvD,QAAM,UAAU,cAAc;AAE9B,UAAQ,SAAS;AAAA,IAChB,KAAK;AACJ,aAAO,QAAQ,SAAS,UAAU,CAAC;AAAA,IACpC,KAAK,QAAQ;AACZ,YAAM,OAAQ,WAAkD;AAChE,YAAM,UAAU,MAAM;AACtB,aAAO,SAAS;AAAA,IACjB;AAAA,IACA,KAAK,OAAO;AACX,YAAM,MAAO,WAAkD;AAC/D,aAAO,KAAK;AAAA,IACb;AAAA,IACA;AACC,aAAO;AAAA,EACT;AACD;AAwBO,SAAS,sBAAwC;AACvD,QAAM,UAAU,cAAc;AAC9B,QAAM,UAAU,kBAAkB;AAClC,QAAM,eAAiC;AAAA,IACtC;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,kBAAkB,iBAAiB;AAAA,IACnC,YAAY,WAAW;AAAA,IACvB,SAAS,QAAQ;AAAA,IACjB,YAAY,WAAW;AAAA,IACvB,sBAAsB,qBAAqB;AAAA,IAC3C,kBAAkB,iBAAiB;AAAA,IACnC,WAAW,UAAU;AAAA,IACrB,GAAI,YAAY,SAAY,EAAE,gBAAgB,QAAQ,IAAI,CAAC;AAAA,EAC5D;AACA,SAAO;AACR;AAmBO,SAAS,iBAAiB;AAChC,QAAM,UAAU,cAAc;AAC9B,QAAM,eAAe,oBAAoB;AAEzC,SAAO;AAAA,IACN;AAAA,IACA,WAAW,UAAU;AAAA,IACrB,QAAQ,OAAO;AAAA,IACf,QAAQ,OAAO;AAAA,IACf,OAAO,MAAM;AAAA,IACb,OAAO,iBAAiB;AAAA,IACxB,UAAU,oBAAoB;AAAA,IAC9B,gBAAgB,kBAAkB;AAAA,IAClC,WAAW,OAAO,cAAc,cAAc,UAAU,YAAY;AAAA,IACpE;AAAA,EACD;AACD;","names":[]}
@@ -7,7 +7,7 @@
7
7
  /**
8
8
  * Token reduction configuration
9
9
  */
10
- interface TokenReductionConfig {
10
+ export interface TokenReductionConfig {
11
11
  /** Token reduction mode */
12
12
  mode?: string;
13
13
  /** Preserve important words during reduction */
@@ -16,7 +16,7 @@ interface TokenReductionConfig {
16
16
  /**
17
17
  * Post-processor configuration
18
18
  */
19
- interface PostProcessorConfig {
19
+ export interface PostProcessorConfig {
20
20
  /** Whether post-processing is enabled */
21
21
  enabled?: boolean;
22
22
  /** List of enabled processors */
@@ -24,10 +24,72 @@ interface PostProcessorConfig {
24
24
  /** List of disabled processors */
25
25
  disabledProcessors?: string[];
26
26
  }
27
+ /**
28
+ * Keyword extraction algorithm type
29
+ *
30
+ * Supported algorithms:
31
+ * - "yake": YAKE (Yet Another Keyword Extractor) - statistical approach
32
+ * - "rake": RAKE (Rapid Automatic Keyword Extraction) - co-occurrence based
33
+ */
34
+ export type KeywordAlgorithm = "yake" | "rake";
35
+ /**
36
+ * YAKE algorithm-specific parameters
37
+ */
38
+ export interface YakeParams {
39
+ /** Window size for co-occurrence analysis (default: 2) */
40
+ windowSize?: number;
41
+ }
42
+ /**
43
+ * RAKE algorithm-specific parameters
44
+ */
45
+ export interface RakeParams {
46
+ /** Minimum word length to consider (default: 1) */
47
+ minWordLength?: number;
48
+ /** Maximum words in a keyword phrase (default: 3) */
49
+ maxWordsPerPhrase?: number;
50
+ }
51
+ /**
52
+ * Keyword extraction configuration
53
+ *
54
+ * Controls how keywords are extracted from text, including algorithm selection,
55
+ * scoring thresholds, n-gram ranges, and language-specific settings.
56
+ */
57
+ export interface KeywordConfig {
58
+ /** Algorithm to use for extraction (default: "yake") */
59
+ algorithm?: KeywordAlgorithm;
60
+ /** Maximum number of keywords to extract (default: 10) */
61
+ maxKeywords?: number;
62
+ /** Minimum score threshold 0.0-1.0 (default: 0.0) */
63
+ minScore?: number;
64
+ /** N-gram range [min, max] for keyword extraction (default: [1, 3]) */
65
+ ngramRange?: [number, number];
66
+ /** Language code for stopword filtering (e.g., "en", "de", "fr") */
67
+ language?: string;
68
+ /** YAKE-specific tuning parameters */
69
+ yakeParams?: YakeParams;
70
+ /** RAKE-specific tuning parameters */
71
+ rakeParams?: RakeParams;
72
+ }
73
+ /**
74
+ * Extracted keyword with relevance metadata
75
+ *
76
+ * Represents a single keyword extracted from text along with its relevance score,
77
+ * the algorithm that extracted it, and optional position information.
78
+ */
79
+ export interface ExtractedKeyword {
80
+ /** The keyword text */
81
+ text: string;
82
+ /** Relevance score (higher is better, algorithm-specific range) */
83
+ score: number;
84
+ /** Algorithm that extracted this keyword */
85
+ algorithm: KeywordAlgorithm;
86
+ /** Optional positions where keyword appears in text (character offsets) */
87
+ positions?: number[];
88
+ }
27
89
  /**
28
90
  * Configuration for document extraction
29
91
  */
30
- interface ExtractionConfig {
92
+ export interface ExtractionConfig {
31
93
  /** OCR configuration */
32
94
  ocr?: OcrConfig;
33
95
  /** Chunking configuration */
@@ -44,6 +106,8 @@ interface ExtractionConfig {
44
106
  tokenReduction?: TokenReductionConfig;
45
107
  /** Post-processor configuration */
46
108
  postprocessor?: PostProcessorConfig;
109
+ /** Keyword extraction configuration */
110
+ keywords?: KeywordConfig;
47
111
  /** Whether to use caching */
48
112
  useCache?: boolean;
49
113
  /** Enable quality processing */
@@ -56,7 +120,7 @@ interface ExtractionConfig {
56
120
  /**
57
121
  * Tesseract OCR configuration
58
122
  */
59
- interface TesseractConfig {
123
+ export interface TesseractConfig {
60
124
  /** Tesseract page segmentation mode */
61
125
  psm?: number;
62
126
  /** Enable table detection */
@@ -67,7 +131,7 @@ interface TesseractConfig {
67
131
  /**
68
132
  * OCR configuration
69
133
  */
70
- interface OcrConfig {
134
+ export interface OcrConfig {
71
135
  /** OCR backend to use */
72
136
  backend?: string;
73
137
  /** Language codes (ISO 639) */
@@ -82,7 +146,7 @@ interface OcrConfig {
82
146
  /**
83
147
  * Chunking configuration
84
148
  */
85
- interface ChunkingConfig {
149
+ export interface ChunkingConfig {
86
150
  /** Maximum characters per chunk */
87
151
  maxChars?: number;
88
152
  /** Overlap between chunks */
@@ -91,7 +155,7 @@ interface ChunkingConfig {
91
155
  /**
92
156
  * Image extraction configuration
93
157
  */
94
- interface ImageExtractionConfig {
158
+ export interface ImageExtractionConfig {
95
159
  /** Whether to extract images */
96
160
  enabled?: boolean;
97
161
  /** Target DPI for image extraction */
@@ -108,7 +172,7 @@ interface ImageExtractionConfig {
108
172
  /**
109
173
  * PDF extraction configuration
110
174
  */
111
- interface PdfConfig {
175
+ export interface PdfConfig {
112
176
  /** Whether to extract images from PDF */
113
177
  extractImages?: boolean;
114
178
  /** Passwords for encrypted PDFs */
@@ -119,21 +183,25 @@ interface PdfConfig {
119
183
  /**
120
184
  * Page extraction configuration
121
185
  */
122
- interface PageExtractionConfig {
123
- /** Whether to extract per-page content */
124
- enabled?: boolean;
186
+ export interface PageExtractionConfig {
187
+ /** Extract pages as separate array (ExtractionResult.pages) */
188
+ extractPages?: boolean;
189
+ /** Insert page markers in main content string */
190
+ insertPageMarkers?: boolean;
191
+ /** Page marker format (use {page_num} placeholder) */
192
+ markerFormat?: string;
125
193
  }
126
194
  /**
127
195
  * Language detection configuration
128
196
  */
129
- interface LanguageDetectionConfig {
197
+ export interface LanguageDetectionConfig {
130
198
  /** Whether to detect languages */
131
199
  enabled?: boolean;
132
200
  }
133
201
  /**
134
202
  * Result of document extraction
135
203
  */
136
- interface ExtractionResult {
204
+ export interface ExtractionResult {
137
205
  /** Extracted text content */
138
206
  content: string;
139
207
  /** MIME type of the document */
@@ -150,11 +218,13 @@ interface ExtractionResult {
150
218
  images?: ExtractedImage[] | null;
151
219
  /** Per-page content */
152
220
  pages?: PageContent[] | null;
221
+ /** Extracted keywords when keyword extraction is enabled */
222
+ keywords?: ExtractedKeyword[] | null;
153
223
  }
154
224
  /**
155
225
  * Document metadata
156
226
  */
157
- interface Metadata {
227
+ export interface Metadata {
158
228
  /** Document title */
159
229
  title?: string;
160
230
  /** Document subject or description */
@@ -186,7 +256,7 @@ interface Metadata {
186
256
  /**
187
257
  * Extracted table
188
258
  */
189
- interface Table {
259
+ export interface Table {
190
260
  /** Table cells/rows */
191
261
  cells?: string[][];
192
262
  /** Table markdown representation */
@@ -201,7 +271,7 @@ interface Table {
201
271
  /**
202
272
  * Chunk metadata
203
273
  */
204
- interface ChunkMetadata {
274
+ export interface ChunkMetadata {
205
275
  /** Character start position in original content */
206
276
  charStart: number;
207
277
  /** Character end position in original content */
@@ -216,7 +286,7 @@ interface ChunkMetadata {
216
286
  /**
217
287
  * Text chunk from chunked content
218
288
  */
219
- interface Chunk {
289
+ export interface Chunk {
220
290
  /** Chunk text content */
221
291
  content: string;
222
292
  /** Chunk metadata */
@@ -231,7 +301,7 @@ interface Chunk {
231
301
  /**
232
302
  * Extracted image from document
233
303
  */
234
- interface ExtractedImage {
304
+ export interface ExtractedImage {
235
305
  /** Image data as Uint8Array or base64 string */
236
306
  data: Uint8Array | string;
237
307
  /** Image format/MIME type */
@@ -260,7 +330,7 @@ interface ExtractedImage {
260
330
  /**
261
331
  * Per-page content
262
332
  */
263
- interface PageContent {
333
+ export interface PageContent {
264
334
  /** Page number (1-indexed) */
265
335
  pageNumber: number;
266
336
  /** Text content of the page */
@@ -273,7 +343,7 @@ interface PageContent {
273
343
  /**
274
344
  * OCR backend protocol/interface
275
345
  */
276
- interface OcrBackendProtocol {
346
+ export interface OcrBackendProtocol {
277
347
  /** Get the backend name */
278
348
  name(): string;
279
349
  /** Get supported language codes */
@@ -290,5 +360,4 @@ interface OcrBackendProtocol {
290
360
  tables?: unknown[];
291
361
  } | string>;
292
362
  }
293
-
294
- export type { Chunk as C, ExtractionConfig as E, ImageExtractionConfig as I, LanguageDetectionConfig as L, Metadata as M, OcrBackendProtocol as O, PageContent as P, Table as T, ExtractionResult as a, ChunkingConfig as b, ChunkMetadata as c, ExtractedImage as d, OcrConfig as e, PageExtractionConfig as f, PdfConfig as g, PostProcessorConfig as h, TesseractConfig as i, TokenReductionConfig as j };
363
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../typescript/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACpC,2BAA2B;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,sBAAsB,CAAC,EAAE,OAAO,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IACnC,yCAAyC;IACzC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,iCAAiC;IACjC,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B,kCAAkC;IAClC,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED;;;;;;GAMG;AACH,MAAM,MAAM,gBAAgB,GAAG,MAAM,GAAG,MAAM,CAAC;AAE/C;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,0DAA0D;IAC1D,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,mDAAmD;IACnD,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,qDAAqD;IACrD,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;GAKG;AACH,MAAM,WAAW,aAAa;IAC7B,wDAAwD;IACxD,SAAS,CAAC,EAAE,gBAAgB,CAAC;IAC7B,0DAA0D;IAC1D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,qDAAqD;IACrD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,uEAAuE;IACvE,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,oEAAoE;IACpE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,sCAAsC;IACtC,UAAU,CAAC,EAAE,UAAU,CAAC;CACxB;AAED;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAChC,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,mEAAmE;IACnE,KAAK,EAAE,MAAM,CAAC;IACd,4CAA4C;IAC5C,SAAS,EAAE,gBAAgB,CAAC;IAC5B,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,wBAAwB;IACxB,GAAG,CAAC,EAAE,SAAS,CAAC;IAChB,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,cAAc,CAAC;IAC1B,qCAAqC;IACrC,MAAM,CAAC,EAAE,qBAAqB,CAAC;IAC/B,oCAAoC;IACpC,KAAK,CAAC,EAAE,oBAAoB,CAAC;IAC7B,uCAAuC;IACvC,iBAAiB,CAAC,EAAE,uBAAuB,CAAC;IAC5C,6BAA6B;IAC7B,UAAU,CAAC,EAAE,SAAS,CAAC;IACvB,oCAAoC;IACpC,cAAc,CAAC,EAAE,oBAAoB,CAAC;IACtC,mCAAmC;IACnC,aAAa,CAAC,EAAE,mBAAmB,CAAC;IACpC,uCAAuC;IACvC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,gCAAgC;IAChC,uBAAuB,CAAC,EAAE,OAAO,CAAC;IAClC,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,qCAAqC;IACrC,wBAAwB,CAAC,EAAE,MAAM,CAAC;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,uCAAuC;IACvC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,6BAA6B;IAC7B,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,0CAA0C;IAC1C,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,6BAA6B;IAC7B,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,uCAAuC;IACvC,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,4BAA4B;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC9B,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,6BAA6B;IAC7B,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACrC,gCAAgC;IAChC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,+BAA+B;IAC/B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB,yCAAyC;IACzC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,kCAAkC;IAClC,eAAe,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACpC,+DAA+D;IAC/D,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,iDAAiD;IACjD,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,sDAAsD;IACtD,YAAY,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC,kCAAkC;IAClC,OAAO,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,wBAAwB;IACxB,QAAQ,EAAE,QAAQ,CAAC;IACnB,uBAAuB;IACvB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,yCAAyC;IACzC,iBAAiB,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IACpC,2CAA2C;IAC3C,MAAM,CAAC,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC;IACxB,uBAAuB;IACvB,MAAM,CAAC,EAAE,cAAc,EAAE,GAAG,IAAI,CAAC;IACjC,uBAAuB;IACvB,KAAK,CAAC,EAAE,WAAW,EAAE,GAAG,IAAI,CAAC;IAC7B,4DAA4D;IAC5D,QAAQ,CAAC,EAAE,gBAAgB,EAAE,GAAG,IAAI,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACxB,qBAAqB;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,sCAAsC;IACtC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,oBAAoB;IACpB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,sCAAsC;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oCAAoC;IACpC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,6BAA6B;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB;;;OAGG;IACH,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,KAAK;IACrB,uBAAuB;IACvB,KAAK,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACnB,oCAAoC;IACpC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oBAAoB;IACpB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,iBAAiB;IACjB,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,mDAAmD;IACnD,SAAS,EAAE,MAAM,CAAC;IAClB,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,0BAA0B;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,6BAA6B;IAC7B,WAAW,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,KAAK;IACrB,yBAAyB;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,qBAAqB;IACrB,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC9B,gDAAgD;IAChD,IAAI,EAAE,UAAU,GAAG,MAAM,CAAC;IAC1B,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,4BAA4B;IAC5B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,+BAA+B;IAC/B,gBAAgB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,mCAAmC;IACnC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wBAAwB;IACxB,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,yCAAyC;IACzC,SAAS,CAAC,EAAE,gBAAgB,GAAG,MAAM,GAAG,IAAI,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC3B,8BAA8B;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,0BAA0B;IAC1B,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,0BAA0B;IAC1B,MAAM,CAAC,EAAE,cAAc,EAAE,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC,2BAA2B;IAC3B,IAAI,IAAI,MAAM,CAAC;IACf,mCAAmC;IACnC,kBAAkB,CAAC,IAAI,MAAM,EAAE,CAAC;IAChC,6BAA6B;IAC7B,UAAU,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACpE,2BAA2B;IAC3B,QAAQ,CAAC,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClC,gCAAgC;IAChC,YAAY,CACX,SAAS,EAAE,UAAU,GAAG,MAAM,EAC9B,QAAQ,CAAC,EAAE,MAAM,GACf,OAAO,CACP;QACA,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC;KAClB,GACD,MAAM,CACR,CAAC;CACF"}