@kreuzberg/wasm 4.0.0-rc.6 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/LICENSE +7 -0
  2. package/README.md +317 -801
  3. package/dist/adapters/wasm-adapter.d.ts +7 -10
  4. package/dist/adapters/wasm-adapter.d.ts.map +1 -0
  5. package/dist/adapters/wasm-adapter.js +53 -54
  6. package/dist/adapters/wasm-adapter.js.map +1 -1
  7. package/dist/index.d.ts +23 -67
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +1102 -104
  10. package/dist/index.js.map +1 -1
  11. package/dist/ocr/registry.d.ts +7 -10
  12. package/dist/ocr/registry.d.ts.map +1 -0
  13. package/dist/ocr/registry.js +9 -28
  14. package/dist/ocr/registry.js.map +1 -1
  15. package/dist/ocr/tesseract-wasm-backend.d.ts +3 -6
  16. package/dist/ocr/tesseract-wasm-backend.d.ts.map +1 -0
  17. package/dist/ocr/tesseract-wasm-backend.js +8 -83
  18. package/dist/ocr/tesseract-wasm-backend.js.map +1 -1
  19. package/dist/pdfium.js +77 -0
  20. package/dist/pkg/LICENSE +7 -0
  21. package/dist/pkg/README.md +498 -0
  22. package/dist/{kreuzberg_wasm.d.ts → pkg/kreuzberg_wasm.d.ts} +24 -12
  23. package/dist/{kreuzberg_wasm.js → pkg/kreuzberg_wasm.js} +224 -233
  24. package/dist/pkg/kreuzberg_wasm_bg.js +1871 -0
  25. package/dist/{kreuzberg_wasm_bg.wasm → pkg/kreuzberg_wasm_bg.wasm} +0 -0
  26. package/dist/{kreuzberg_wasm_bg.wasm.d.ts → pkg/kreuzberg_wasm_bg.wasm.d.ts} +10 -13
  27. package/dist/pkg/package.json +27 -0
  28. package/dist/plugin-registry.d.ts +246 -0
  29. package/dist/plugin-registry.d.ts.map +1 -0
  30. package/dist/runtime.d.ts +21 -22
  31. package/dist/runtime.d.ts.map +1 -0
  32. package/dist/runtime.js +21 -41
  33. package/dist/runtime.js.map +1 -1
  34. package/dist/types.d.ts +363 -0
  35. package/dist/types.d.ts.map +1 -0
  36. package/package.json +34 -51
  37. package/dist/adapters/wasm-adapter.d.mts +0 -121
  38. package/dist/adapters/wasm-adapter.mjs +0 -221
  39. package/dist/adapters/wasm-adapter.mjs.map +0 -1
  40. package/dist/index.d.mts +0 -466
  41. package/dist/index.mjs +0 -384
  42. package/dist/index.mjs.map +0 -1
  43. package/dist/kreuzberg_wasm.d.mts +0 -758
  44. package/dist/kreuzberg_wasm.mjs +0 -48
  45. package/dist/ocr/registry.d.mts +0 -102
  46. package/dist/ocr/registry.mjs +0 -70
  47. package/dist/ocr/registry.mjs.map +0 -1
  48. package/dist/ocr/tesseract-wasm-backend.d.mts +0 -257
  49. package/dist/ocr/tesseract-wasm-backend.mjs +0 -424
  50. package/dist/ocr/tesseract-wasm-backend.mjs.map +0 -1
  51. package/dist/runtime.d.mts +0 -256
  52. package/dist/runtime.mjs +0 -152
  53. package/dist/runtime.mjs.map +0 -1
  54. package/dist/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +0 -107
  55. package/dist/types-GJVIvbPy.d.mts +0 -221
  56. package/dist/types-GJVIvbPy.d.ts +0 -221
@@ -1,5 +1,3 @@
1
- import { E as ExtractionConfig, a as ExtractionResult } from '../types-GJVIvbPy.js';
2
-
3
1
  /**
4
2
  * WASM Type Adapter
5
3
  *
@@ -27,7 +25,7 @@ import { E as ExtractionConfig, a as ExtractionResult } from '../types-GJVIvbPy.
27
25
  * const normalized = configToJS(config);
28
26
  * ```
29
27
  */
30
-
28
+ import type { ExtractionConfig, ExtractionResult } from "../types.d.ts";
31
29
  /**
32
30
  * Convert a File or Blob to Uint8Array
33
31
  *
@@ -45,7 +43,7 @@ import { E as ExtractionConfig, a as ExtractionResult } from '../types-GJVIvbPy.
45
43
  * const result = await extractBytes(bytes, 'application/pdf');
46
44
  * ```
47
45
  */
48
- declare function fileToUint8Array(file: File | Blob): Promise<Uint8Array>;
46
+ export declare function fileToUint8Array(file: File | Blob): Promise<Uint8Array>;
49
47
  /**
50
48
  * Normalize ExtractionConfig for WASM processing
51
49
  *
@@ -64,7 +62,7 @@ declare function fileToUint8Array(file: File | Blob): Promise<Uint8Array>;
64
62
  * const wasmConfig = configToJS(config);
65
63
  * ```
66
64
  */
67
- declare function configToJS(config: ExtractionConfig | null): Record<string, unknown>;
65
+ export declare function configToJS(config: ExtractionConfig | null): Record<string, unknown>;
68
66
  /**
69
67
  * Parse WASM extraction result and convert to TypeScript type
70
68
  *
@@ -82,7 +80,7 @@ declare function configToJS(config: ExtractionConfig | null): Record<string, unk
82
80
  * console.log(result.content);
83
81
  * ```
84
82
  */
85
- declare function jsToExtractionResult(jsValue: unknown): ExtractionResult;
83
+ export declare function jsToExtractionResult(jsValue: unknown): ExtractionResult;
86
84
  /**
87
85
  * Wrap and format WASM errors with context
88
86
  *
@@ -104,7 +102,7 @@ declare function jsToExtractionResult(jsValue: unknown): ExtractionResult;
104
102
  * }
105
103
  * ```
106
104
  */
107
- declare function wrapWasmError(error: unknown, context: string): Error;
105
+ export declare function wrapWasmError(error: unknown, context: string): Error;
108
106
  /**
109
107
  * Validate that a WASM-returned value conforms to ExtractionResult structure
110
108
  *
@@ -116,6 +114,5 @@ declare function wrapWasmError(error: unknown, context: string): Error;
116
114
  *
117
115
  * @internal
118
116
  */
119
- declare function isValidExtractionResult(value: unknown): value is ExtractionResult;
120
-
121
- export { configToJS, fileToUint8Array, isValidExtractionResult, jsToExtractionResult, wrapWasmError };
117
+ export declare function isValidExtractionResult(value: unknown): value is ExtractionResult;
118
+ //# sourceMappingURL=wasm-adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wasm-adapter.d.ts","sourceRoot":"","sources":["../../typescript/adapters/wasm-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,KAAK,EAAyB,gBAAgB,EAAE,gBAAgB,EAAmB,MAAM,aAAa,CAAC;AAoC9G;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,CAa7E;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAoCnF;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,GAAG,gBAAgB,CAkMvE;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,GAAG,KAAK,CASpE;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,gBAAgB,CAajF"}
@@ -1,31 +1,5 @@
1
- "use strict";
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __hasOwnProp = Object.prototype.hasOwnProperty;
6
- var __export = (target, all) => {
7
- for (var name in all)
8
- __defProp(target, name, { get: all[name], enumerable: true });
9
- };
10
- var __copyProps = (to, from, except, desc) => {
11
- if (from && typeof from === "object" || typeof from === "function") {
12
- for (let key of __getOwnPropNames(from))
13
- if (!__hasOwnProp.call(to, key) && key !== except)
14
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
- }
16
- return to;
17
- };
18
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
- var wasm_adapter_exports = {};
20
- __export(wasm_adapter_exports, {
21
- configToJS: () => configToJS,
22
- fileToUint8Array: () => fileToUint8Array,
23
- isValidExtractionResult: () => isValidExtractionResult,
24
- jsToExtractionResult: () => jsToExtractionResult,
25
- wrapWasmError: () => wrapWasmError
26
- });
27
- module.exports = __toCommonJS(wasm_adapter_exports);
28
- const MAX_FILE_SIZE = 512 * 1024 * 1024;
1
+ // typescript/adapters/wasm-adapter.ts
2
+ var MAX_FILE_SIZE = 512 * 1024 * 1024;
29
3
  function isNumberOrNull(value) {
30
4
  return typeof value === "number" || value === null;
31
5
  }
@@ -86,10 +60,11 @@ function jsToExtractionResult(jsValue) {
86
60
  throw new Error("Invalid extraction result: value is not an object");
87
61
  }
88
62
  const result = jsValue;
63
+ const mimeType = typeof result.mimeType === "string" ? result.mimeType : typeof result.mime_type === "string" ? result.mime_type : null;
89
64
  if (typeof result.content !== "string") {
90
65
  throw new Error("Invalid extraction result: missing or invalid content");
91
66
  }
92
- if (typeof result.mimeType !== "string") {
67
+ if (typeof mimeType !== "string") {
93
68
  throw new Error("Invalid extraction result: missing or invalid mimeType");
94
69
  }
95
70
  if (!result.metadata || typeof result.metadata !== "object") {
@@ -129,30 +104,46 @@ function jsToExtractionResult(jsValue) {
129
104
  }
130
105
  embedding = c.embedding;
131
106
  }
132
- if (typeof metadata.charStart !== "number") {
133
- throw new Error("Invalid chunk metadata: charStart must be a number");
134
- }
135
- if (typeof metadata.charEnd !== "number") {
136
- throw new Error("Invalid chunk metadata: charEnd must be a number");
137
- }
138
- if (!isNumberOrNull(metadata.tokenCount)) {
139
- throw new Error("Invalid chunk metadata: tokenCount must be a number or null");
140
- }
141
- if (typeof metadata.chunkIndex !== "number") {
142
- throw new Error("Invalid chunk metadata: chunkIndex must be a number");
143
- }
144
- if (typeof metadata.totalChunks !== "number") {
145
- throw new Error("Invalid chunk metadata: totalChunks must be a number");
107
+ const coerceToNumber = (value, fieldName) => {
108
+ if (typeof value === "number") {
109
+ return value;
110
+ }
111
+ if (typeof value === "bigint") {
112
+ return Number(value);
113
+ }
114
+ if (typeof value === "string") {
115
+ const parsed = parseInt(value, 10);
116
+ if (Number.isNaN(parsed)) {
117
+ throw new Error(`Invalid chunk metadata: ${fieldName} must be a valid number, got "${value}"`);
118
+ }
119
+ return parsed;
120
+ }
121
+ throw new Error(`Invalid chunk metadata: ${fieldName} must be a number, got ${typeof value}`);
122
+ };
123
+ const charStart = coerceToNumber(
124
+ metadata.charStart ?? metadata.char_start ?? metadata.byteStart ?? metadata.byte_start,
125
+ "charStart"
126
+ );
127
+ const charEnd = coerceToNumber(
128
+ metadata.charEnd ?? metadata.char_end ?? metadata.byteEnd ?? metadata.byte_end,
129
+ "charEnd"
130
+ );
131
+ const chunkIndex = coerceToNumber(metadata.chunkIndex ?? metadata.chunk_index, "chunkIndex");
132
+ const totalChunks = coerceToNumber(metadata.totalChunks ?? metadata.total_chunks, "totalChunks");
133
+ let tokenCount = null;
134
+ const tokenCountValue = metadata.tokenCount ?? metadata.token_count;
135
+ if (tokenCountValue !== null && tokenCountValue !== void 0) {
136
+ tokenCount = coerceToNumber(tokenCountValue, "tokenCount");
146
137
  }
147
138
  return {
148
139
  content: c.content,
149
140
  embedding,
150
141
  metadata: {
151
- charStart: metadata.charStart,
152
- charEnd: metadata.charEnd,
153
- tokenCount: metadata.tokenCount,
154
- chunkIndex: metadata.chunkIndex,
155
- totalChunks: metadata.totalChunks
142
+ charStart,
143
+ charEnd,
144
+ tokenCount,
145
+ chunkIndex,
146
+ totalChunks
156
147
  }
157
148
  };
158
149
  }) : null;
@@ -206,16 +197,17 @@ function jsToExtractionResult(jsValue) {
206
197
  };
207
198
  }) : null;
208
199
  let detectedLanguages = null;
209
- if (Array.isArray(result.detectedLanguages)) {
210
- if (!result.detectedLanguages.every((lang) => typeof lang === "string")) {
200
+ const detectedLanguagesRaw = Array.isArray(result.detectedLanguages) ? result.detectedLanguages : result.detected_languages;
201
+ if (Array.isArray(detectedLanguagesRaw)) {
202
+ if (!detectedLanguagesRaw.every((lang) => typeof lang === "string")) {
211
203
  throw new Error("Invalid result: detectedLanguages must contain only strings");
212
204
  }
213
- detectedLanguages = result.detectedLanguages;
205
+ detectedLanguages = detectedLanguagesRaw;
214
206
  }
215
207
  return {
216
208
  content: result.content,
217
- mimeType: result.mimeType,
218
- metadata: result.metadata,
209
+ mimeType,
210
+ metadata: result.metadata ?? {},
219
211
  tables,
220
212
  detectedLanguages,
221
213
  chunks,
@@ -236,6 +228,13 @@ function isValidExtractionResult(value) {
236
228
  return false;
237
229
  }
238
230
  const obj = value;
239
- return typeof obj.content === "string" && typeof obj.mimeType === "string" && obj.metadata !== null && typeof obj.metadata === "object" && Array.isArray(obj.tables);
231
+ return typeof obj.content === "string" && (typeof obj.mimeType === "string" || typeof obj.mime_type === "string") && obj.metadata !== null && typeof obj.metadata === "object" && Array.isArray(obj.tables);
240
232
  }
233
+ export {
234
+ configToJS,
235
+ fileToUint8Array,
236
+ isValidExtractionResult,
237
+ jsToExtractionResult,
238
+ wrapWasmError
239
+ };
241
240
  //# sourceMappingURL=wasm-adapter.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../typescript/adapters/wasm-adapter.ts"],"sourcesContent":["/**\n * WASM Type Adapter\n *\n * This module provides type adapters for converting between JavaScript/TypeScript\n * types and WASM-compatible types, handling File/Blob conversions, config normalization,\n * and result parsing.\n *\n * @example File Conversion\n * ```typescript\n * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const file = event.target.files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, file.type);\n * ```\n *\n * @example Config Normalization\n * ```typescript\n * import { configToJS } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const config = {\n * ocr: { backend: 'tesseract', language: 'eng' },\n * chunking: { maxChars: 1000 }\n * };\n * const normalized = configToJS(config);\n * ```\n */\n\nimport type { Chunk, ExtractedImage, ExtractionConfig, ExtractionResult, Table } from \"../types.js\";\n\n/**\n * Maximum file size for processing (512 MB)\n *\n * @internal\n */\nconst MAX_FILE_SIZE = 512 * 1024 * 1024;\n\n/**\n * Type predicate to validate numeric value or null\n *\n * @internal\n */\nfunction isNumberOrNull(value: unknown): value is number | null {\n\treturn typeof value === \"number\" || value === null;\n}\n\n/**\n * Type predicate to validate string value or null\n *\n * @internal\n */\nfunction isStringOrNull(value: unknown): value is string | null {\n\treturn typeof value === \"string\" || value === null;\n}\n\n/**\n * Type predicate to validate boolean value\n *\n * @internal\n */\nfunction isBoolean(value: unknown): value is boolean {\n\treturn typeof value === \"boolean\";\n}\n\n/**\n * Convert a File or Blob to Uint8Array\n *\n * Handles both browser File API and server-side Blob-like objects,\n * providing a unified interface for reading binary data.\n *\n * @param file - The File or Blob to convert\n * @returns Promise resolving to the byte array\n * @throws {Error} If the file cannot be read or exceeds size limit\n *\n * @example\n * ```typescript\n * const file = document.getElementById('input').files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, 'application/pdf');\n * ```\n */\nexport async function fileToUint8Array(file: File | Blob): Promise<Uint8Array> {\n\ttry {\n\t\tif (file.size > MAX_FILE_SIZE) {\n\t\t\tthrow new Error(\n\t\t\t\t`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`,\n\t\t\t);\n\t\t}\n\n\t\tconst arrayBuffer = await file.arrayBuffer();\n\t\treturn new Uint8Array(arrayBuffer);\n\t} catch (error) {\n\t\tthrow new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);\n\t}\n}\n\n/**\n * Normalize ExtractionConfig for WASM processing\n *\n * Converts TypeScript configuration objects to a WASM-compatible format,\n * handling null values, undefined properties, and nested structures.\n *\n * @param config - The extraction configuration or null\n * @returns Normalized configuration object suitable for WASM\n *\n * @example\n * ```typescript\n * const config: ExtractionConfig = {\n * ocr: { backend: 'tesseract' },\n * chunking: { maxChars: 1000 }\n * };\n * const wasmConfig = configToJS(config);\n * ```\n */\nexport function configToJS(config: ExtractionConfig | null): Record<string, unknown> {\n\tif (!config) {\n\t\treturn {};\n\t}\n\n\tconst normalized: Record<string, unknown> = {};\n\n\t// Recursively normalize nested objects\n\tconst normalizeValue = (value: unknown): unknown => {\n\t\tif (value === null || value === undefined) {\n\t\t\treturn null;\n\t\t}\n\t\tif (typeof value === \"object\") {\n\t\t\tif (Array.isArray(value)) {\n\t\t\t\treturn value.map(normalizeValue);\n\t\t\t}\n\t\t\tconst obj = value as Record<string, unknown>;\n\t\t\tconst normalized: Record<string, unknown> = {};\n\t\t\tfor (const [key, val] of Object.entries(obj)) {\n\t\t\t\tconst normalizedVal = normalizeValue(val);\n\t\t\t\tif (normalizedVal !== null && normalizedVal !== undefined) {\n\t\t\t\t\tnormalized[key] = normalizedVal;\n\t\t\t\t}\n\t\t\t}\n\t\t\treturn Object.keys(normalized).length > 0 ? normalized : null;\n\t\t}\n\t\treturn value;\n\t};\n\n\tfor (const [key, value] of Object.entries(config)) {\n\t\tconst normalizedValue = normalizeValue(value);\n\t\tif (normalizedValue !== null && normalizedValue !== undefined) {\n\t\t\tnormalized[key] = normalizedValue;\n\t\t}\n\t}\n\n\treturn normalized;\n}\n\n/**\n * Parse WASM extraction result and convert to TypeScript type\n *\n * Handles conversion of WASM-returned objects to proper ExtractionResult types,\n * including proper array conversions and type assertions for tables, chunks, and images.\n *\n * @param jsValue - The raw WASM result value\n * @returns Properly typed ExtractionResult\n * @throws {Error} If the result structure is invalid\n *\n * @example\n * ```typescript\n * const wasmResult = await wasmExtract(bytes, mimeType, config);\n * const result = jsToExtractionResult(wasmResult);\n * console.log(result.content);\n * ```\n */\nexport function jsToExtractionResult(jsValue: unknown): ExtractionResult {\n\tif (!jsValue || typeof jsValue !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: value is not an object\");\n\t}\n\n\tconst result = jsValue as Record<string, unknown>;\n\n\t// Validate required fields\n\tif (typeof result.content !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid content\");\n\t}\n\tif (typeof result.mimeType !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid mimeType\");\n\t}\n\tif (!result.metadata || typeof result.metadata !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid metadata\");\n\t}\n\n\t// Parse tables\n\tconst tables: Table[] = [];\n\tif (Array.isArray(result.tables)) {\n\t\tfor (const table of result.tables) {\n\t\t\tif (table && typeof table === \"object\") {\n\t\t\t\tconst t = table as Record<string, unknown>;\n\t\t\t\t// Validate table structure before type casting\n\t\t\t\tif (\n\t\t\t\t\tArray.isArray(t.cells) &&\n\t\t\t\t\tt.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === \"string\")) &&\n\t\t\t\t\ttypeof t.markdown === \"string\" &&\n\t\t\t\t\ttypeof t.pageNumber === \"number\"\n\t\t\t\t) {\n\t\t\t\t\ttables.push({\n\t\t\t\t\t\tcells: t.cells as string[][],\n\t\t\t\t\t\tmarkdown: t.markdown,\n\t\t\t\t\t\tpageNumber: t.pageNumber,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t// Parse chunks\n\tconst chunks: Chunk[] | null = Array.isArray(result.chunks)\n\t\t? result.chunks.map((chunk) => {\n\t\t\t\tif (!chunk || typeof chunk !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk structure\");\n\t\t\t\t}\n\t\t\t\tconst c = chunk as Record<string, unknown>;\n\t\t\t\tif (typeof c.content !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing content\");\n\t\t\t\t}\n\t\t\t\tif (!c.metadata || typeof c.metadata !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing metadata\");\n\t\t\t\t}\n\t\t\t\tconst metadata = c.metadata as Record<string, unknown>;\n\n\t\t\t\t// Validate embedding array contains only numbers\n\t\t\t\tlet embedding: number[] | null = null;\n\t\t\t\tif (Array.isArray(c.embedding)) {\n\t\t\t\t\tif (!c.embedding.every((item) => typeof item === \"number\")) {\n\t\t\t\t\t\tthrow new Error(\"Invalid chunk: embedding must contain only numbers\");\n\t\t\t\t\t}\n\t\t\t\t\tembedding = c.embedding;\n\t\t\t\t}\n\n\t\t\t\t// Validate metadata fields\n\t\t\t\tif (typeof metadata.charStart !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: charStart must be a number\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.charEnd !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: charEnd must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(metadata.tokenCount)) {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: tokenCount must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.chunkIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: chunkIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.totalChunks !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: totalChunks must be a number\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tcontent: c.content,\n\t\t\t\t\tembedding,\n\t\t\t\t\tmetadata: {\n\t\t\t\t\t\tcharStart: metadata.charStart,\n\t\t\t\t\t\tcharEnd: metadata.charEnd,\n\t\t\t\t\t\ttokenCount: metadata.tokenCount,\n\t\t\t\t\t\tchunkIndex: metadata.chunkIndex,\n\t\t\t\t\t\ttotalChunks: metadata.totalChunks,\n\t\t\t\t\t},\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\t// Parse images\n\tconst images: ExtractedImage[] | null = Array.isArray(result.images)\n\t\t? result.images.map((image) => {\n\t\t\t\tif (!image || typeof image !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid image structure\");\n\t\t\t\t}\n\t\t\t\tconst img = image as Record<string, unknown>;\n\t\t\t\tif (!(img.data instanceof Uint8Array)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: data must be Uint8Array\");\n\t\t\t\t}\n\t\t\t\tif (typeof img.format !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: missing format\");\n\t\t\t\t}\n\n\t\t\t\t// Validate numeric fields\n\t\t\t\tif (typeof img.imageIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: imageIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.pageNumber)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: pageNumber must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.width)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: width must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.height)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: height must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.bitsPerComponent)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: bitsPerComponent must be a number or null\");\n\t\t\t\t}\n\n\t\t\t\t// Validate boolean field\n\t\t\t\tif (!isBoolean(img.isMask)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: isMask must be a boolean\");\n\t\t\t\t}\n\n\t\t\t\t// Validate string fields\n\t\t\t\tif (!isStringOrNull(img.colorspace)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: colorspace must be a string or null\");\n\t\t\t\t}\n\t\t\t\tif (!isStringOrNull(img.description)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: description must be a string or null\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tdata: img.data,\n\t\t\t\t\tformat: img.format,\n\t\t\t\t\timageIndex: img.imageIndex,\n\t\t\t\t\tpageNumber: img.pageNumber,\n\t\t\t\t\twidth: img.width,\n\t\t\t\t\theight: img.height,\n\t\t\t\t\tcolorspace: img.colorspace,\n\t\t\t\t\tbitsPerComponent: img.bitsPerComponent,\n\t\t\t\t\tisMask: img.isMask,\n\t\t\t\t\tdescription: img.description,\n\t\t\t\t\tocrResult: img.ocrResult ? jsToExtractionResult(img.ocrResult) : null,\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\t// Validate detectedLanguages array\n\tlet detectedLanguages: string[] | null = null;\n\tif (Array.isArray(result.detectedLanguages)) {\n\t\tif (!result.detectedLanguages.every((lang) => typeof lang === \"string\")) {\n\t\t\tthrow new Error(\"Invalid result: detectedLanguages must contain only strings\");\n\t\t}\n\t\tdetectedLanguages = result.detectedLanguages;\n\t}\n\n\treturn {\n\t\tcontent: result.content,\n\t\tmimeType: result.mimeType,\n\t\tmetadata: result.metadata,\n\t\ttables,\n\t\tdetectedLanguages,\n\t\tchunks,\n\t\timages,\n\t};\n}\n\n/**\n * Wrap and format WASM errors with context\n *\n * Converts WASM error messages to JavaScript Error objects with proper context\n * and stack trace information when available.\n *\n * @param error - The error from WASM\n * @param context - Additional context about what operation failed\n * @returns A formatted Error object\n *\n * @internal\n *\n * @example\n * ```typescript\n * try {\n * await wasmExtract(bytes, mimeType);\n * } catch (error) {\n * throw wrapWasmError(error, 'extracting document');\n * }\n * ```\n */\nexport function wrapWasmError(error: unknown, context: string): Error {\n\tif (error instanceof Error) {\n\t\treturn new Error(`Error ${context}: ${error.message}`, {\n\t\t\tcause: error,\n\t\t});\n\t}\n\n\tconst message = String(error);\n\treturn new Error(`Error ${context}: ${message}`);\n}\n\n/**\n * Validate that a WASM-returned value conforms to ExtractionResult structure\n *\n * Performs structural validation without full type checking,\n * useful for runtime validation of WASM output.\n *\n * @param value - The value to validate\n * @returns True if value appears to be a valid ExtractionResult\n *\n * @internal\n */\nexport function isValidExtractionResult(value: unknown): value is ExtractionResult {\n\tif (!value || typeof value !== \"object\") {\n\t\treturn false;\n\t}\n\n\tconst obj = value as Record<string, unknown>;\n\treturn (\n\t\ttypeof obj.content === \"string\" &&\n\t\ttypeof obj.mimeType === \"string\" &&\n\t\tobj.metadata !== null &&\n\t\ttypeof obj.metadata === \"object\" &&\n\t\tArray.isArray(obj.tables)\n\t);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAmCA,MAAM,gBAAgB,MAAM,OAAO;AAOnC,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,UAAU,OAAkC;AACpD,SAAO,OAAO,UAAU;AACzB;AAmBA,eAAsB,iBAAiB,MAAwC;AAC9E,MAAI;AACH,QAAI,KAAK,OAAO,eAAe;AAC9B,YAAM,IAAI;AAAA,QACT,cAAc,KAAK,IAAI,4BAA4B,aAAa;AAAA,MACjE;AAAA,IACD;AAEA,UAAM,cAAc,MAAM,KAAK,YAAY;AAC3C,WAAO,IAAI,WAAW,WAAW;AAAA,EAClC,SAAS,OAAO;AACf,UAAM,IAAI,MAAM,wBAAwB,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC,EAAE;AAAA,EACjG;AACD;AAoBO,SAAS,WAAW,QAA0D;AACpF,MAAI,CAAC,QAAQ;AACZ,WAAO,CAAC;AAAA,EACT;AAEA,QAAM,aAAsC,CAAC;AAG7C,QAAM,iBAAiB,CAAC,UAA4B;AACnD,QAAI,UAAU,QAAQ,UAAU,QAAW;AAC1C,aAAO;AAAA,IACR;AACA,QAAI,OAAO,UAAU,UAAU;AAC9B,UAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,eAAO,MAAM,IAAI,cAAc;AAAA,MAChC;AACA,YAAM,MAAM;AACZ,YAAMA,cAAsC,CAAC;AAC7C,iBAAW,CAAC,KAAK,GAAG,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC7C,cAAM,gBAAgB,eAAe,GAAG;AACxC,YAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,UAAAA,YAAW,GAAG,IAAI;AAAA,QACnB;AAAA,MACD;AACA,aAAO,OAAO,KAAKA,WAAU,EAAE,SAAS,IAAIA,cAAa;AAAA,IAC1D;AACA,WAAO;AAAA,EACR;AAEA,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,GAAG;AAClD,UAAM,kBAAkB,eAAe,KAAK;AAC5C,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,iBAAW,GAAG,IAAI;AAAA,IACnB;AAAA,EACD;AAEA,SAAO;AACR;AAmBO,SAAS,qBAAqB,SAAoC;AACxE,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACpE;AAEA,QAAM,SAAS;AAGf,MAAI,OAAO,OAAO,YAAY,UAAU;AACvC,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACxE;AACA,MAAI,OAAO,OAAO,aAAa,UAAU;AACxC,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AACA,MAAI,CAAC,OAAO,YAAY,OAAO,OAAO,aAAa,UAAU;AAC5D,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AAGA,QAAM,SAAkB,CAAC;AACzB,MAAI,MAAM,QAAQ,OAAO,MAAM,GAAG;AACjC,eAAW,SAAS,OAAO,QAAQ;AAClC,UAAI,SAAS,OAAO,UAAU,UAAU;AACvC,cAAM,IAAI;AAEV,YACC,MAAM,QAAQ,EAAE,KAAK,KACrB,EAAE,MAAM,MAAM,CAAC,QAAQ,MAAM,QAAQ,GAAG,KAAK,IAAI,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,CAAC,KAC1F,OAAO,EAAE,aAAa,YACtB,OAAO,EAAE,eAAe,UACvB;AACD,iBAAO,KAAK;AAAA,YACX,OAAO,EAAE;AAAA,YACT,UAAU,EAAE;AAAA,YACZ,YAAY,EAAE;AAAA,UACf,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAGA,QAAM,SAAyB,MAAM,QAAQ,OAAO,MAAM,IACvD,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,YAAY,UAAU;AAClC,YAAM,IAAI,MAAM,gCAAgC;AAAA,IACjD;AACA,QAAI,CAAC,EAAE,YAAY,OAAO,EAAE,aAAa,UAAU;AAClD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IAClD;AACA,UAAM,WAAW,EAAE;AAGnB,QAAI,YAA6B;AACjC,QAAI,MAAM,QAAQ,EAAE,SAAS,GAAG;AAC/B,UAAI,CAAC,EAAE,UAAU,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC3D,cAAM,IAAI,MAAM,oDAAoD;AAAA,MACrE;AACA,kBAAY,EAAE;AAAA,IACf;AAGA,QAAI,OAAO,SAAS,cAAc,UAAU;AAC3C,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,OAAO,SAAS,YAAY,UAAU;AACzC,YAAM,IAAI,MAAM,kDAAkD;AAAA,IACnE;AACA,QAAI,CAAC,eAAe,SAAS,UAAU,GAAG;AACzC,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,QAAI,OAAO,SAAS,eAAe,UAAU;AAC5C,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AACA,QAAI,OAAO,SAAS,gBAAgB,UAAU;AAC7C,YAAM,IAAI,MAAM,sDAAsD;AAAA,IACvE;AAEA,WAAO;AAAA,MACN,SAAS,EAAE;AAAA,MACX;AAAA,MACA,UAAU;AAAA,QACT,WAAW,SAAS;AAAA,QACpB,SAAS,SAAS;AAAA,QAClB,YAAY,SAAS;AAAA,QACrB,YAAY,SAAS;AAAA,QACrB,aAAa,SAAS;AAAA,MACvB;AAAA,IACD;AAAA,EACD,CAAC,IACA;AAGH,QAAM,SAAkC,MAAM,QAAQ,OAAO,MAAM,IAChE,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,MAAM;AACZ,QAAI,EAAE,IAAI,gBAAgB,aAAa;AACtC,YAAM,IAAI,MAAM,wCAAwC;AAAA,IACzD;AACA,QAAI,OAAO,IAAI,WAAW,UAAU;AACnC,YAAM,IAAI,MAAM,+BAA+B;AAAA,IAChD;AAGA,QAAI,OAAO,IAAI,eAAe,UAAU;AACvC,YAAM,IAAI,MAAM,4CAA4C;AAAA,IAC7D;AACA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,KAAK,GAAG;AAC/B,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,QAAI,CAAC,eAAe,IAAI,MAAM,GAAG;AAChC,YAAM,IAAI,MAAM,gDAAgD;AAAA,IACjE;AACA,QAAI,CAAC,eAAe,IAAI,gBAAgB,GAAG;AAC1C,YAAM,IAAI,MAAM,0DAA0D;AAAA,IAC3E;AAGA,QAAI,CAAC,UAAU,IAAI,MAAM,GAAG;AAC3B,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC1D;AAGA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,WAAW,GAAG;AACrC,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AAEA,WAAO;AAAA,MACN,MAAM,IAAI;AAAA,MACV,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,YAAY,IAAI;AAAA,MAChB,OAAO,IAAI;AAAA,MACX,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,kBAAkB,IAAI;AAAA,MACtB,QAAQ,IAAI;AAAA,MACZ,aAAa,IAAI;AAAA,MACjB,WAAW,IAAI,YAAY,qBAAqB,IAAI,SAAS,IAAI;AAAA,IAClE;AAAA,EACD,CAAC,IACA;AAGH,MAAI,oBAAqC;AACzC,MAAI,MAAM,QAAQ,OAAO,iBAAiB,GAAG;AAC5C,QAAI,CAAC,OAAO,kBAAkB,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AACxE,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,wBAAoB,OAAO;AAAA,EAC5B;AAEA,SAAO;AAAA,IACN,SAAS,OAAO;AAAA,IAChB,UAAU,OAAO;AAAA,IACjB,UAAU,OAAO;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AACD;AAuBO,SAAS,cAAc,OAAgB,SAAwB;AACrE,MAAI,iBAAiB,OAAO;AAC3B,WAAO,IAAI,MAAM,SAAS,OAAO,KAAK,MAAM,OAAO,IAAI;AAAA,MACtD,OAAO;AAAA,IACR,CAAC;AAAA,EACF;AAEA,QAAM,UAAU,OAAO,KAAK;AAC5B,SAAO,IAAI,MAAM,SAAS,OAAO,KAAK,OAAO,EAAE;AAChD;AAaO,SAAS,wBAAwB,OAA2C;AAClF,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,WAAO;AAAA,EACR;AAEA,QAAM,MAAM;AACZ,SACC,OAAO,IAAI,YAAY,YACvB,OAAO,IAAI,aAAa,YACxB,IAAI,aAAa,QACjB,OAAO,IAAI,aAAa,YACxB,MAAM,QAAQ,IAAI,MAAM;AAE1B;","names":["normalized"]}
1
+ {"version":3,"sources":["../../typescript/adapters/wasm-adapter.ts"],"sourcesContent":["/**\n * WASM Type Adapter\n *\n * This module provides type adapters for converting between JavaScript/TypeScript\n * types and WASM-compatible types, handling File/Blob conversions, config normalization,\n * and result parsing.\n *\n * @example File Conversion\n * ```typescript\n * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const file = event.target.files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, file.type);\n * ```\n *\n * @example Config Normalization\n * ```typescript\n * import { configToJS } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const config = {\n * ocr: { backend: 'tesseract', language: 'eng' },\n * chunking: { maxChars: 1000 }\n * };\n * const normalized = configToJS(config);\n * ```\n */\n\nimport type { Chunk, ExtractedImage, ExtractionConfig, ExtractionResult, Metadata, Table } from \"../types.js\";\n\n/**\n * Maximum file size for processing (512 MB)\n *\n * @internal\n */\nconst MAX_FILE_SIZE = 512 * 1024 * 1024;\n\n/**\n * Type predicate to validate numeric value or null\n *\n * @internal\n */\nfunction isNumberOrNull(value: unknown): value is number | null {\n\treturn typeof value === \"number\" || value === null;\n}\n\n/**\n * Type predicate to validate string value or null\n *\n * @internal\n */\nfunction isStringOrNull(value: unknown): value is string | null {\n\treturn typeof value === \"string\" || value === null;\n}\n\n/**\n * Type predicate to validate boolean value\n *\n * @internal\n */\nfunction isBoolean(value: unknown): value is boolean {\n\treturn typeof value === \"boolean\";\n}\n\n/**\n * Convert a File or Blob to Uint8Array\n *\n * Handles both browser File API and server-side Blob-like objects,\n * providing a unified interface for reading binary data.\n *\n * @param file - The File or Blob to convert\n * @returns Promise resolving to the byte array\n * @throws {Error} If the file cannot be read or exceeds size limit\n *\n * @example\n * ```typescript\n * const file = document.getElementById('input').files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, 'application/pdf');\n * ```\n */\nexport async function fileToUint8Array(file: File | Blob): Promise<Uint8Array> {\n\ttry {\n\t\tif (file.size > MAX_FILE_SIZE) {\n\t\t\tthrow new Error(\n\t\t\t\t`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`,\n\t\t\t);\n\t\t}\n\n\t\tconst arrayBuffer = await file.arrayBuffer();\n\t\treturn new Uint8Array(arrayBuffer);\n\t} catch (error) {\n\t\tthrow new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);\n\t}\n}\n\n/**\n * Normalize ExtractionConfig for WASM processing\n *\n * Converts TypeScript configuration objects to a WASM-compatible format,\n * handling null values, undefined properties, and nested structures.\n *\n * @param config - The extraction configuration or null\n * @returns Normalized configuration object suitable for WASM\n *\n * @example\n * ```typescript\n * const config: ExtractionConfig = {\n * ocr: { backend: 'tesseract' },\n * chunking: { maxChars: 1000 }\n * };\n * const wasmConfig = configToJS(config);\n * ```\n */\nexport function configToJS(config: ExtractionConfig | null): Record<string, unknown> {\n\tif (!config) {\n\t\treturn {};\n\t}\n\n\tconst normalized: Record<string, unknown> = {};\n\n\tconst normalizeValue = (value: unknown): unknown => {\n\t\tif (value === null || value === undefined) {\n\t\t\treturn null;\n\t\t}\n\t\tif (typeof value === \"object\") {\n\t\t\tif (Array.isArray(value)) {\n\t\t\t\treturn value.map(normalizeValue);\n\t\t\t}\n\t\t\tconst obj = value as Record<string, unknown>;\n\t\t\tconst normalized: Record<string, unknown> = {};\n\t\t\tfor (const [key, val] of Object.entries(obj)) {\n\t\t\t\tconst normalizedVal = normalizeValue(val);\n\t\t\t\tif (normalizedVal !== null && normalizedVal !== undefined) {\n\t\t\t\t\tnormalized[key] = normalizedVal;\n\t\t\t\t}\n\t\t\t}\n\t\t\treturn Object.keys(normalized).length > 0 ? normalized : null;\n\t\t}\n\t\treturn value;\n\t};\n\n\tfor (const [key, value] of Object.entries(config)) {\n\t\tconst normalizedValue = normalizeValue(value);\n\t\tif (normalizedValue !== null && normalizedValue !== undefined) {\n\t\t\tnormalized[key] = normalizedValue;\n\t\t}\n\t}\n\n\treturn normalized;\n}\n\n/**\n * Parse WASM extraction result and convert to TypeScript type\n *\n * Handles conversion of WASM-returned objects to proper ExtractionResult types,\n * including proper array conversions and type assertions for tables, chunks, and images.\n *\n * @param jsValue - The raw WASM result value\n * @returns Properly typed ExtractionResult\n * @throws {Error} If the result structure is invalid\n *\n * @example\n * ```typescript\n * const wasmResult = await wasmExtract(bytes, mimeType, config);\n * const result = jsToExtractionResult(wasmResult);\n * console.log(result.content);\n * ```\n */\nexport function jsToExtractionResult(jsValue: unknown): ExtractionResult {\n\tif (!jsValue || typeof jsValue !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: value is not an object\");\n\t}\n\n\tconst result = jsValue as Record<string, unknown>;\n\tconst mimeType =\n\t\ttypeof result.mimeType === \"string\"\n\t\t\t? result.mimeType\n\t\t\t: typeof result.mime_type === \"string\"\n\t\t\t\t? result.mime_type\n\t\t\t\t: null;\n\n\tif (typeof result.content !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid content\");\n\t}\n\tif (typeof mimeType !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid mimeType\");\n\t}\n\tif (!result.metadata || typeof result.metadata !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid metadata\");\n\t}\n\n\tconst tables: Table[] = [];\n\tif (Array.isArray(result.tables)) {\n\t\tfor (const table of result.tables) {\n\t\t\tif (table && typeof table === \"object\") {\n\t\t\t\tconst t = table as Record<string, unknown>;\n\t\t\t\tif (\n\t\t\t\t\tArray.isArray(t.cells) &&\n\t\t\t\t\tt.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === \"string\")) &&\n\t\t\t\t\ttypeof t.markdown === \"string\" &&\n\t\t\t\t\ttypeof t.pageNumber === \"number\"\n\t\t\t\t) {\n\t\t\t\t\ttables.push({\n\t\t\t\t\t\tcells: t.cells as string[][],\n\t\t\t\t\t\tmarkdown: t.markdown,\n\t\t\t\t\t\tpageNumber: t.pageNumber,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\tconst chunks: Chunk[] | null = Array.isArray(result.chunks)\n\t\t? result.chunks.map((chunk) => {\n\t\t\t\tif (!chunk || typeof chunk !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk structure\");\n\t\t\t\t}\n\t\t\t\tconst c = chunk as Record<string, unknown>;\n\t\t\t\tif (typeof c.content !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing content\");\n\t\t\t\t}\n\t\t\t\tif (!c.metadata || typeof c.metadata !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing metadata\");\n\t\t\t\t}\n\t\t\t\tconst metadata = c.metadata as Record<string, unknown>;\n\n\t\t\t\tlet embedding: number[] | null = null;\n\t\t\t\tif (Array.isArray(c.embedding)) {\n\t\t\t\t\tif (!c.embedding.every((item) => typeof item === \"number\")) {\n\t\t\t\t\t\tthrow new Error(\"Invalid chunk: embedding must contain only numbers\");\n\t\t\t\t\t}\n\t\t\t\t\tembedding = c.embedding;\n\t\t\t\t}\n\n\t\t\t\t// Coerce numeric values - handle BigInt, strings, and numbers\n\t\t\t\tconst coerceToNumber = (value: unknown, fieldName: string): number => {\n\t\t\t\t\tif (typeof value === \"number\") {\n\t\t\t\t\t\treturn value;\n\t\t\t\t\t}\n\t\t\t\t\tif (typeof value === \"bigint\") {\n\t\t\t\t\t\treturn Number(value);\n\t\t\t\t\t}\n\t\t\t\t\tif (typeof value === \"string\") {\n\t\t\t\t\t\tconst parsed = parseInt(value, 10);\n\t\t\t\t\t\tif (Number.isNaN(parsed)) {\n\t\t\t\t\t\t\tthrow new Error(`Invalid chunk metadata: ${fieldName} must be a valid number, got \"${value}\"`);\n\t\t\t\t\t\t}\n\t\t\t\t\t\treturn parsed;\n\t\t\t\t\t}\n\t\t\t\t\tthrow new Error(`Invalid chunk metadata: ${fieldName} must be a number, got ${typeof value}`);\n\t\t\t\t};\n\n\t\t\t\t// The Rust code uses snake_case field names (byte_start, byte_end, etc)\n\t\t\t\t// but TypeScript expects camelCase (charStart, charEnd, etc)\n\t\t\t\t// For now, treat byte offsets as character offsets since the content is UTF-8\n\t\t\t\tconst charStart = coerceToNumber(\n\t\t\t\t\tmetadata.charStart ?? metadata.char_start ?? metadata.byteStart ?? metadata.byte_start,\n\t\t\t\t\t\"charStart\",\n\t\t\t\t);\n\t\t\t\tconst charEnd = coerceToNumber(\n\t\t\t\t\tmetadata.charEnd ?? metadata.char_end ?? metadata.byteEnd ?? metadata.byte_end,\n\t\t\t\t\t\"charEnd\",\n\t\t\t\t);\n\t\t\t\tconst chunkIndex = coerceToNumber(metadata.chunkIndex ?? metadata.chunk_index, \"chunkIndex\");\n\t\t\t\tconst totalChunks = coerceToNumber(metadata.totalChunks ?? metadata.total_chunks, \"totalChunks\");\n\n\t\t\t\tlet tokenCount: number | null = null;\n\t\t\t\tconst tokenCountValue = metadata.tokenCount ?? metadata.token_count;\n\t\t\t\tif (tokenCountValue !== null && tokenCountValue !== undefined) {\n\t\t\t\t\ttokenCount = coerceToNumber(tokenCountValue, \"tokenCount\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tcontent: c.content,\n\t\t\t\t\tembedding,\n\t\t\t\t\tmetadata: {\n\t\t\t\t\t\tcharStart,\n\t\t\t\t\t\tcharEnd,\n\t\t\t\t\t\ttokenCount,\n\t\t\t\t\t\tchunkIndex,\n\t\t\t\t\t\ttotalChunks,\n\t\t\t\t\t},\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tconst images: ExtractedImage[] | null = Array.isArray(result.images)\n\t\t? result.images.map((image) => {\n\t\t\t\tif (!image || typeof image !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid image structure\");\n\t\t\t\t}\n\t\t\t\tconst img = image as Record<string, unknown>;\n\t\t\t\tif (!(img.data instanceof Uint8Array)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: data must be Uint8Array\");\n\t\t\t\t}\n\t\t\t\tif (typeof img.format !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: missing format\");\n\t\t\t\t}\n\n\t\t\t\tif (typeof img.imageIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: imageIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.pageNumber)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: pageNumber must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.width)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: width must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.height)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: height must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.bitsPerComponent)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: bitsPerComponent must be a number or null\");\n\t\t\t\t}\n\n\t\t\t\tif (!isBoolean(img.isMask)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: isMask must be a boolean\");\n\t\t\t\t}\n\n\t\t\t\tif (!isStringOrNull(img.colorspace)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: colorspace must be a string or null\");\n\t\t\t\t}\n\t\t\t\tif (!isStringOrNull(img.description)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: description must be a string or null\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tdata: img.data,\n\t\t\t\t\tformat: img.format,\n\t\t\t\t\timageIndex: img.imageIndex,\n\t\t\t\t\tpageNumber: img.pageNumber,\n\t\t\t\t\twidth: img.width,\n\t\t\t\t\theight: img.height,\n\t\t\t\t\tcolorspace: img.colorspace,\n\t\t\t\t\tbitsPerComponent: img.bitsPerComponent,\n\t\t\t\t\tisMask: img.isMask,\n\t\t\t\t\tdescription: img.description,\n\t\t\t\t\tocrResult: img.ocrResult ? jsToExtractionResult(img.ocrResult) : null,\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tlet detectedLanguages: string[] | null = null;\n\tconst detectedLanguagesRaw = Array.isArray(result.detectedLanguages)\n\t\t? result.detectedLanguages\n\t\t: result.detected_languages;\n\tif (Array.isArray(detectedLanguagesRaw)) {\n\t\tif (!detectedLanguagesRaw.every((lang) => typeof lang === \"string\")) {\n\t\t\tthrow new Error(\"Invalid result: detectedLanguages must contain only strings\");\n\t\t}\n\t\tdetectedLanguages = detectedLanguagesRaw;\n\t}\n\n\treturn {\n\t\tcontent: result.content,\n\t\tmimeType,\n\t\tmetadata: (result.metadata ?? {}) as Metadata,\n\t\ttables,\n\t\tdetectedLanguages,\n\t\tchunks,\n\t\timages,\n\t};\n}\n\n/**\n * Wrap and format WASM errors with context\n *\n * Converts WASM error messages to JavaScript Error objects with proper context\n * and stack trace information when available.\n *\n * @param error - The error from WASM\n * @param context - Additional context about what operation failed\n * @returns A formatted Error object\n *\n * @internal\n *\n * @example\n * ```typescript\n * try {\n * await wasmExtract(bytes, mimeType);\n * } catch (error) {\n * throw wrapWasmError(error, 'extracting document');\n * }\n * ```\n */\nexport function wrapWasmError(error: unknown, context: string): Error {\n\tif (error instanceof Error) {\n\t\treturn new Error(`Error ${context}: ${error.message}`, {\n\t\t\tcause: error,\n\t\t});\n\t}\n\n\tconst message = String(error);\n\treturn new Error(`Error ${context}: ${message}`);\n}\n\n/**\n * Validate that a WASM-returned value conforms to ExtractionResult structure\n *\n * Performs structural validation without full type checking,\n * useful for runtime validation of WASM output.\n *\n * @param value - The value to validate\n * @returns True if value appears to be a valid ExtractionResult\n *\n * @internal\n */\nexport function isValidExtractionResult(value: unknown): value is ExtractionResult {\n\tif (!value || typeof value !== \"object\") {\n\t\treturn false;\n\t}\n\n\tconst obj = value as Record<string, unknown>;\n\treturn (\n\t\ttypeof obj.content === \"string\" &&\n\t\t(typeof obj.mimeType === \"string\" || typeof obj.mime_type === \"string\") &&\n\t\tobj.metadata !== null &&\n\t\ttypeof obj.metadata === \"object\" &&\n\t\tArray.isArray(obj.tables)\n\t);\n}\n"],"mappings":";AAmCA,IAAM,gBAAgB,MAAM,OAAO;AAOnC,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,UAAU,OAAkC;AACpD,SAAO,OAAO,UAAU;AACzB;AAmBA,eAAsB,iBAAiB,MAAwC;AAC9E,MAAI;AACH,QAAI,KAAK,OAAO,eAAe;AAC9B,YAAM,IAAI;AAAA,QACT,cAAc,KAAK,IAAI,4BAA4B,aAAa;AAAA,MACjE;AAAA,IACD;AAEA,UAAM,cAAc,MAAM,KAAK,YAAY;AAC3C,WAAO,IAAI,WAAW,WAAW;AAAA,EAClC,SAAS,OAAO;AACf,UAAM,IAAI,MAAM,wBAAwB,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC,EAAE;AAAA,EACjG;AACD;AAoBO,SAAS,WAAW,QAA0D;AACpF,MAAI,CAAC,QAAQ;AACZ,WAAO,CAAC;AAAA,EACT;AAEA,QAAM,aAAsC,CAAC;AAE7C,QAAM,iBAAiB,CAAC,UAA4B;AACnD,QAAI,UAAU,QAAQ,UAAU,QAAW;AAC1C,aAAO;AAAA,IACR;AACA,QAAI,OAAO,UAAU,UAAU;AAC9B,UAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,eAAO,MAAM,IAAI,cAAc;AAAA,MAChC;AACA,YAAM,MAAM;AACZ,YAAMA,cAAsC,CAAC;AAC7C,iBAAW,CAAC,KAAK,GAAG,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC7C,cAAM,gBAAgB,eAAe,GAAG;AACxC,YAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,UAAAA,YAAW,GAAG,IAAI;AAAA,QACnB;AAAA,MACD;AACA,aAAO,OAAO,KAAKA,WAAU,EAAE,SAAS,IAAIA,cAAa;AAAA,IAC1D;AACA,WAAO;AAAA,EACR;AAEA,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,GAAG;AAClD,UAAM,kBAAkB,eAAe,KAAK;AAC5C,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,iBAAW,GAAG,IAAI;AAAA,IACnB;AAAA,EACD;AAEA,SAAO;AACR;AAmBO,SAAS,qBAAqB,SAAoC;AACxE,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACpE;AAEA,QAAM,SAAS;AACf,QAAM,WACL,OAAO,OAAO,aAAa,WACxB,OAAO,WACP,OAAO,OAAO,cAAc,WAC3B,OAAO,YACP;AAEL,MAAI,OAAO,OAAO,YAAY,UAAU;AACvC,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACxE;AACA,MAAI,OAAO,aAAa,UAAU;AACjC,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AACA,MAAI,CAAC,OAAO,YAAY,OAAO,OAAO,aAAa,UAAU;AAC5D,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AAEA,QAAM,SAAkB,CAAC;AACzB,MAAI,MAAM,QAAQ,OAAO,MAAM,GAAG;AACjC,eAAW,SAAS,OAAO,QAAQ;AAClC,UAAI,SAAS,OAAO,UAAU,UAAU;AACvC,cAAM,IAAI;AACV,YACC,MAAM,QAAQ,EAAE,KAAK,KACrB,EAAE,MAAM,MAAM,CAAC,QAAQ,MAAM,QAAQ,GAAG,KAAK,IAAI,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,CAAC,KAC1F,OAAO,EAAE,aAAa,YACtB,OAAO,EAAE,eAAe,UACvB;AACD,iBAAO,KAAK;AAAA,YACX,OAAO,EAAE;AAAA,YACT,UAAU,EAAE;AAAA,YACZ,YAAY,EAAE;AAAA,UACf,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,QAAM,SAAyB,MAAM,QAAQ,OAAO,MAAM,IACvD,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,YAAY,UAAU;AAClC,YAAM,IAAI,MAAM,gCAAgC;AAAA,IACjD;AACA,QAAI,CAAC,EAAE,YAAY,OAAO,EAAE,aAAa,UAAU;AAClD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IAClD;AACA,UAAM,WAAW,EAAE;AAEnB,QAAI,YAA6B;AACjC,QAAI,MAAM,QAAQ,EAAE,SAAS,GAAG;AAC/B,UAAI,CAAC,EAAE,UAAU,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC3D,cAAM,IAAI,MAAM,oDAAoD;AAAA,MACrE;AACA,kBAAY,EAAE;AAAA,IACf;AAGA,UAAM,iBAAiB,CAAC,OAAgB,cAA8B;AACrE,UAAI,OAAO,UAAU,UAAU;AAC9B,eAAO;AAAA,MACR;AACA,UAAI,OAAO,UAAU,UAAU;AAC9B,eAAO,OAAO,KAAK;AAAA,MACpB;AACA,UAAI,OAAO,UAAU,UAAU;AAC9B,cAAM,SAAS,SAAS,OAAO,EAAE;AACjC,YAAI,OAAO,MAAM,MAAM,GAAG;AACzB,gBAAM,IAAI,MAAM,2BAA2B,SAAS,iCAAiC,KAAK,GAAG;AAAA,QAC9F;AACA,eAAO;AAAA,MACR;AACA,YAAM,IAAI,MAAM,2BAA2B,SAAS,0BAA0B,OAAO,KAAK,EAAE;AAAA,IAC7F;AAKA,UAAM,YAAY;AAAA,MACjB,SAAS,aAAa,SAAS,cAAc,SAAS,aAAa,SAAS;AAAA,MAC5E;AAAA,IACD;AACA,UAAM,UAAU;AAAA,MACf,SAAS,WAAW,SAAS,YAAY,SAAS,WAAW,SAAS;AAAA,MACtE;AAAA,IACD;AACA,UAAM,aAAa,eAAe,SAAS,cAAc,SAAS,aAAa,YAAY;AAC3F,UAAM,cAAc,eAAe,SAAS,eAAe,SAAS,cAAc,aAAa;AAE/F,QAAI,aAA4B;AAChC,UAAM,kBAAkB,SAAS,cAAc,SAAS;AACxD,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,mBAAa,eAAe,iBAAiB,YAAY;AAAA,IAC1D;AAEA,WAAO;AAAA,MACN,SAAS,EAAE;AAAA,MACX;AAAA,MACA,UAAU;AAAA,QACT;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACD;AAAA,IACD;AAAA,EACD,CAAC,IACA;AAEH,QAAM,SAAkC,MAAM,QAAQ,OAAO,MAAM,IAChE,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,MAAM;AACZ,QAAI,EAAE,IAAI,gBAAgB,aAAa;AACtC,YAAM,IAAI,MAAM,wCAAwC;AAAA,IACzD;AACA,QAAI,OAAO,IAAI,WAAW,UAAU;AACnC,YAAM,IAAI,MAAM,+BAA+B;AAAA,IAChD;AAEA,QAAI,OAAO,IAAI,eAAe,UAAU;AACvC,YAAM,IAAI,MAAM,4CAA4C;AAAA,IAC7D;AACA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,KAAK,GAAG;AAC/B,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,QAAI,CAAC,eAAe,IAAI,MAAM,GAAG;AAChC,YAAM,IAAI,MAAM,gDAAgD;AAAA,IACjE;AACA,QAAI,CAAC,eAAe,IAAI,gBAAgB,GAAG;AAC1C,YAAM,IAAI,MAAM,0DAA0D;AAAA,IAC3E;AAEA,QAAI,CAAC,UAAU,IAAI,MAAM,GAAG;AAC3B,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC1D;AAEA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,WAAW,GAAG;AACrC,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AAEA,WAAO;AAAA,MACN,MAAM,IAAI;AAAA,MACV,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,YAAY,IAAI;AAAA,MAChB,OAAO,IAAI;AAAA,MACX,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,kBAAkB,IAAI;AAAA,MACtB,QAAQ,IAAI;AAAA,MACZ,aAAa,IAAI;AAAA,MACjB,WAAW,IAAI,YAAY,qBAAqB,IAAI,SAAS,IAAI;AAAA,IAClE;AAAA,EACD,CAAC,IACA;AAEH,MAAI,oBAAqC;AACzC,QAAM,uBAAuB,MAAM,QAAQ,OAAO,iBAAiB,IAChE,OAAO,oBACP,OAAO;AACV,MAAI,MAAM,QAAQ,oBAAoB,GAAG;AACxC,QAAI,CAAC,qBAAqB,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AACpE,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,wBAAoB;AAAA,EACrB;AAEA,SAAO;AAAA,IACN,SAAS,OAAO;AAAA,IAChB;AAAA,IACA,UAAW,OAAO,YAAY,CAAC;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AACD;AAuBO,SAAS,cAAc,OAAgB,SAAwB;AACrE,MAAI,iBAAiB,OAAO;AAC3B,WAAO,IAAI,MAAM,SAAS,OAAO,KAAK,MAAM,OAAO,IAAI;AAAA,MACtD,OAAO;AAAA,IACR,CAAC;AAAA,EACF;AAEA,QAAM,UAAU,OAAO,KAAK;AAC5B,SAAO,IAAI,MAAM,SAAS,OAAO,KAAK,OAAO,EAAE;AAChD;AAaO,SAAS,wBAAwB,OAA2C;AAClF,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,WAAO;AAAA,EACR;AAEA,QAAM,MAAM;AACZ,SACC,OAAO,IAAI,YAAY,aACtB,OAAO,IAAI,aAAa,YAAY,OAAO,IAAI,cAAc,aAC9D,IAAI,aAAa,QACjB,OAAO,IAAI,aAAa,YACxB,MAAM,QAAQ,IAAI,MAAM;AAE1B;","names":["normalized"]}
package/dist/index.d.ts CHANGED
@@ -1,10 +1,3 @@
1
- import { E as ExtractionConfig, a as ExtractionResult } from './types-GJVIvbPy.js';
2
- export { C as Chunk, d as ChunkMetadata, b as ChunkingConfig, c as ExtractedImage, I as ImageExtractionConfig, L as LanguageDetectionConfig, M as Metadata, f as OcrBackendProtocol, O as OcrConfig, e as PageContent, P as PageExtractionConfig, T as Table } from './types-GJVIvbPy.js';
3
- export { configToJS, fileToUint8Array, isValidExtractionResult, jsToExtractionResult, wrapWasmError } from './adapters/wasm-adapter.js';
4
- export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend } from './ocr/registry.js';
5
- export { TesseractWasmBackend } from './ocr/tesseract-wasm-backend.js';
6
- export { RuntimeType, WasmCapabilities, detectRuntime, getRuntimeInfo, getRuntimeVersion, getWasmCapabilities, hasBigInt, hasBlob, hasFileApi, hasModuleWorkers, hasSharedArrayBuffer, hasWasm, hasWasmStreaming, hasWorkers, isBrowser, isBun, isDeno, isNode, isServerEnvironment, isWebEnvironment } from './runtime.js';
7
-
8
1
  /**
9
2
  * Kreuzberg - WebAssembly Bindings for Browser and Runtime Environments
10
3
  *
@@ -100,51 +93,15 @@ export { RuntimeType, WasmCapabilities, detectRuntime, getRuntimeInfo, getRuntim
100
93
  * const result = await extractBytes(bytes, 'application/pdf', config);
101
94
  * ```
102
95
  */
103
-
104
- /**
105
- * Initialize the WASM module
106
- *
107
- * This function must be called once before using any extraction functions.
108
- * It loads and initializes the WASM module in the current runtime environment,
109
- * automatically selecting the appropriate WASM variant for the detected runtime.
110
- *
111
- * Multiple calls to initWasm() are safe and will return immediately if already initialized.
112
- *
113
- * @throws {Error} If WASM module fails to load or is not supported in the current environment
114
- *
115
- * @example Basic Usage
116
- * ```typescript
117
- * import { initWasm } from '@kreuzberg/wasm';
118
- *
119
- * async function main() {
120
- * await initWasm();
121
- * // Now you can use extraction functions
122
- * }
123
- *
124
- * main().catch(console.error);
125
- * ```
126
- *
127
- * @example With Error Handling
128
- * ```typescript
129
- * import { initWasm, getWasmCapabilities } from '@kreuzberg/wasm';
130
- *
131
- * async function initializeKreuzberg() {
132
- * const caps = getWasmCapabilities();
133
- * if (!caps.hasWasm) {
134
- * throw new Error('WebAssembly is not supported in this environment');
135
- * }
136
- *
137
- * try {
138
- * await initWasm();
139
- * console.log('Kreuzberg initialized successfully');
140
- * } catch (error) {
141
- * console.error('Failed to initialize Kreuzberg:', error);
142
- * throw error;
143
- * }
144
- * }
145
- * ```
146
- */
147
- declare function initWasm(): Promise<void>;
96
+ import type { ExtractionConfig as ExtractionConfigType, ExtractionResult } from "./types.d.ts";
97
+ export { configToJS, fileToUint8Array, isValidExtractionResult, jsToExtractionResult, wrapWasmError, } from "./adapters/wasm-adapter.d.ts";
98
+ export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend, } from "./ocr/registry.d.ts";
99
+ export { TesseractWasmBackend } from "./ocr/tesseract-wasm-backend.d.ts";
100
+ export { clearPostProcessors, clearValidators, getPostProcessor, getValidator, listPostProcessors, listValidators, type PostProcessor, registerPostProcessor, registerValidator, unregisterPostProcessor, unregisterValidator, type Validator, } from "./plugin-registry.d.ts";
101
+ export { detectRuntime, getRuntimeInfo, getRuntimeVersion, getWasmCapabilities, hasBigInt, hasBlob, hasFileApi, hasModuleWorkers, hasSharedArrayBuffer, hasWasm, hasWasmStreaming, hasWorkers, isBrowser, isBun, isDeno, isNode, isServerEnvironment, isWebEnvironment, type RuntimeType, type WasmCapabilities, } from "./runtime.d.ts";
102
+ export type * from "./types.d.ts";
103
+ export type { Chunk, ChunkingConfig, ChunkMetadata, ExtractedImage, ExtractionConfig, ExtractionResult, ImageExtractionConfig, LanguageDetectionConfig, Metadata, OcrBackendProtocol, OcrConfig, PageContent, PageExtractionConfig, PdfConfig, PostProcessorConfig, Table, TesseractConfig, TokenReductionConfig, } from "./types.d.ts";
104
+ export declare function initWasm(): Promise<void>;
148
105
  /**
149
106
  * Check if WASM module is initialized
150
107
  *
@@ -157,7 +114,7 @@ declare function initWasm(): Promise<void>;
157
114
  * }
158
115
  * ```
159
116
  */
160
- declare function isInitialized(): boolean;
117
+ export declare function isInitialized(): boolean;
161
118
  /**
162
119
  * Get WASM module version
163
120
  *
@@ -170,7 +127,7 @@ declare function isInitialized(): boolean;
170
127
  * console.log(`Using Kreuzberg ${version}`);
171
128
  * ```
172
129
  */
173
- declare function getVersion(): string;
130
+ export declare function getVersion(): string;
174
131
  /**
175
132
  * Get initialization error if module failed to load
176
133
  *
@@ -178,7 +135,7 @@ declare function getVersion(): string;
178
135
  *
179
136
  * @internal
180
137
  */
181
- declare function getInitializationError(): Error | null;
138
+ export declare function getInitializationError(): Error | null;
182
139
  /**
183
140
  * Extract content from bytes (document data)
184
141
  *
@@ -220,7 +177,7 @@ declare function getInitializationError(): Error | null;
220
177
  * const result = await extractBytes(bytes, file.type);
221
178
  * ```
222
179
  */
223
- declare function extractBytes(data: Uint8Array, mimeType: string, config?: ExtractionConfig | null): Promise<ExtractionResult>;
180
+ export declare function extractBytes(data: Uint8Array, mimeType: string, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
224
181
  /**
225
182
  * Extract content from a file on the file system
226
183
  *
@@ -256,7 +213,7 @@ declare function extractBytes(data: Uint8Array, mimeType: string, config?: Extra
256
213
  * });
257
214
  * ```
258
215
  */
259
- declare function extractFile(path: string, mimeType?: string | null, config?: ExtractionConfig | null): Promise<ExtractionResult>;
216
+ export declare function extractFile(path: string, mimeType?: string | null, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
260
217
  /**
261
218
  * Extract content from a File or Blob (browser-friendly wrapper)
262
219
  *
@@ -289,7 +246,7 @@ declare function extractFile(path: string, mimeType?: string | null, config?: Ex
289
246
  * });
290
247
  * ```
291
248
  */
292
- declare function extractFromFile(file: File | Blob, mimeType?: string | null, config?: ExtractionConfig | null): Promise<ExtractionResult>;
249
+ export declare function extractFromFile(file: File | Blob, mimeType?: string | null, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
293
250
  /**
294
251
  * Extract content from bytes synchronously
295
252
  *
@@ -309,7 +266,7 @@ declare function extractFromFile(file: File | Blob, mimeType?: string | null, co
309
266
  * console.log(result.content);
310
267
  * ```
311
268
  */
312
- declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: ExtractionConfig | null): ExtractionResult;
269
+ export declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: ExtractionConfigType | null): ExtractionResult;
313
270
  /**
314
271
  * Batch extract content from multiple byte arrays asynchronously
315
272
  *
@@ -331,10 +288,10 @@ declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: E
331
288
  * results.forEach((result) => console.log(result.content));
332
289
  * ```
333
290
  */
334
- declare function batchExtractBytes(files: Array<{
291
+ export declare function batchExtractBytes(files: Array<{
335
292
  data: Uint8Array;
336
293
  mimeType: string;
337
- }>, config?: ExtractionConfig | null): Promise<ExtractionResult[]>;
294
+ }>, config?: ExtractionConfigType | null): Promise<ExtractionResult[]>;
338
295
  /**
339
296
  * Batch extract content from multiple byte arrays synchronously
340
297
  *
@@ -356,10 +313,10 @@ declare function batchExtractBytes(files: Array<{
356
313
  * results.forEach((result) => console.log(result.content));
357
314
  * ```
358
315
  */
359
- declare function batchExtractBytesSync(files: Array<{
316
+ export declare function batchExtractBytesSync(files: Array<{
360
317
  data: Uint8Array;
361
318
  mimeType: string;
362
- }>, config?: ExtractionConfig | null): ExtractionResult[];
319
+ }>, config?: ExtractionConfigType | null): ExtractionResult[];
363
320
  /**
364
321
  * Batch extract content from multiple File objects asynchronously
365
322
  *
@@ -381,7 +338,7 @@ declare function batchExtractBytesSync(files: Array<{
381
338
  * });
382
339
  * ```
383
340
  */
384
- declare function batchExtractFiles(files: File[], config?: ExtractionConfig | null): Promise<ExtractionResult[]>;
341
+ export declare function batchExtractFiles(files: File[], config?: ExtractionConfigType | null): Promise<ExtractionResult[]>;
385
342
  /**
386
343
  * Enable OCR functionality with tesseract-wasm backend
387
344
  *
@@ -461,6 +418,5 @@ declare function batchExtractFiles(files: File[], config?: ExtractionConfig | nu
461
418
  * });
462
419
  * ```
463
420
  */
464
- declare function enableOcr(): Promise<void>;
465
-
466
- export { ExtractionConfig, ExtractionResult, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, enableOcr, extractBytes, extractBytesSync, extractFile, extractFromFile, getInitializationError, getVersion, initWasm, isInitialized };
421
+ export declare function enableOcr(): Promise<void>;
422
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../typescript/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8FG;AAMH,OAAO,KAAK,EAAE,gBAAgB,IAAI,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE7F,OAAO,EACN,UAAU,EACV,gBAAgB,EAChB,uBAAuB,EACvB,oBAAoB,EACpB,aAAa,GACb,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACN,gBAAgB,EAChB,aAAa,EACb,eAAe,EACf,kBAAkB,EAClB,oBAAoB,GACpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EACN,mBAAmB,EACnB,eAAe,EACf,gBAAgB,EAChB,YAAY,EACZ,kBAAkB,EAClB,cAAc,EACd,KAAK,aAAa,EAClB,qBAAqB,EACrB,iBAAiB,EACjB,uBAAuB,EACvB,mBAAmB,EACnB,KAAK,SAAS,GACd,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACN,aAAa,EACb,cAAc,EACd,iBAAiB,EACjB,mBAAmB,EACnB,SAAS,EACT,OAAO,EACP,UAAU,EACV,gBAAgB,EAChB,oBAAoB,EACpB,OAAO,EACP,gBAAgB,EAChB,UAAU,EACV,SAAS,EACT,KAAK,EACL,MAAM,EACN,MAAM,EACN,mBAAmB,EACnB,gBAAgB,EAChB,KAAK,WAAW,EAChB,KAAK,gBAAgB,GACrB,MAAM,cAAc,CAAC;AACtB,mBAAmB,YAAY,CAAC;AAChC,YAAY,EACX,KAAK,EACL,cAAc,EACd,aAAa,EACb,cAAc,EACd,gBAAgB,EAChB,gBAAgB,EAChB,qBAAqB,EACrB,uBAAuB,EACvB,QAAQ,EACR,kBAAkB,EAClB,SAAS,EACT,WAAW,EACX,oBAAoB,EACpB,SAAS,EACT,mBAAmB,EACnB,KAAK,EACL,eAAe,EACf,oBAAoB,GACpB,MAAM,YAAY,CAAC;AA4IpB,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CA8C9C;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,aAAa,IAAI,OAAO,CAEvC;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,UAAU,IAAI,MAAM,CAUnC;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,IAAI,KAAK,GAAG,IAAI,CAErD;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AACH,wBAAsB,YAAY,CACjC,IAAI,EAAE,UAAU,EAChB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,CAAC,CA8B3B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,wBAAsB,WAAW,CAChC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,EACxB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,CAAC,CAqD3B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,eAAe,CACpC,IAAI,EAAE,IAAI,GAAG,IAAI,EACjB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,EACxB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,CAAC,CAmB3B;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,gBAAgB,CAC/B,IAAI,EAAE,UAAU,EAChB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,gBAAgB,CA8BlB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAsB,iBAAiB,CACtC,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,EACpD,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,EAAE,CAAC,CA+D7B;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,qBAAqB,CACpC,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,EACpD,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,gBAAgB,EAAE,CA+DpB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAsB,iBAAiB,CACtC,KAAK,EAAE,IAAI,EAAE,EACb,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAiC7B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8EG;AACH,wBAAsB,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC,CAoB/C"}