@kreuzberg/wasm 4.0.0-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +982 -0
- package/dist/adapters/wasm-adapter.cjs +245 -0
- package/dist/adapters/wasm-adapter.cjs.map +1 -0
- package/dist/adapters/wasm-adapter.d.cts +121 -0
- package/dist/adapters/wasm-adapter.d.ts +121 -0
- package/dist/adapters/wasm-adapter.js +224 -0
- package/dist/adapters/wasm-adapter.js.map +1 -0
- package/dist/index.cjs +4335 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +466 -0
- package/dist/index.d.ts +466 -0
- package/dist/index.js +4308 -0
- package/dist/index.js.map +1 -0
- package/dist/ocr/registry.cjs +92 -0
- package/dist/ocr/registry.cjs.map +1 -0
- package/dist/ocr/registry.d.cts +102 -0
- package/dist/ocr/registry.d.ts +102 -0
- package/dist/ocr/registry.js +71 -0
- package/dist/ocr/registry.js.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.cjs +3566 -0
- package/dist/ocr/tesseract-wasm-backend.cjs.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.d.cts +257 -0
- package/dist/ocr/tesseract-wasm-backend.d.ts +257 -0
- package/dist/ocr/tesseract-wasm-backend.js +3551 -0
- package/dist/ocr/tesseract-wasm-backend.js.map +1 -0
- package/dist/runtime.cjs +174 -0
- package/dist/runtime.cjs.map +1 -0
- package/dist/runtime.d.cts +256 -0
- package/dist/runtime.d.ts +256 -0
- package/dist/runtime.js +153 -0
- package/dist/runtime.js.map +1 -0
- package/dist/types-CKjcIYcX.d.cts +294 -0
- package/dist/types-CKjcIYcX.d.ts +294 -0
- package/package.json +140 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
// typescript/adapters/wasm-adapter.ts
|
|
2
|
+
var MAX_FILE_SIZE = 512 * 1024 * 1024;
|
|
3
|
+
function isNumberOrNull(value) {
|
|
4
|
+
return typeof value === "number" || value === null;
|
|
5
|
+
}
|
|
6
|
+
function isStringOrNull(value) {
|
|
7
|
+
return typeof value === "string" || value === null;
|
|
8
|
+
}
|
|
9
|
+
function isBoolean(value) {
|
|
10
|
+
return typeof value === "boolean";
|
|
11
|
+
}
|
|
12
|
+
async function fileToUint8Array(file) {
|
|
13
|
+
try {
|
|
14
|
+
if (file.size > MAX_FILE_SIZE) {
|
|
15
|
+
throw new Error(
|
|
16
|
+
`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`
|
|
17
|
+
);
|
|
18
|
+
}
|
|
19
|
+
const arrayBuffer = await file.arrayBuffer();
|
|
20
|
+
return new Uint8Array(arrayBuffer);
|
|
21
|
+
} catch (error) {
|
|
22
|
+
throw new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
function configToJS(config) {
|
|
26
|
+
if (!config) {
|
|
27
|
+
return {};
|
|
28
|
+
}
|
|
29
|
+
const normalized = {};
|
|
30
|
+
const normalizeValue = (value) => {
|
|
31
|
+
if (value === null || value === void 0) {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
if (typeof value === "object") {
|
|
35
|
+
if (Array.isArray(value)) {
|
|
36
|
+
return value.map(normalizeValue);
|
|
37
|
+
}
|
|
38
|
+
const obj = value;
|
|
39
|
+
const normalized2 = {};
|
|
40
|
+
for (const [key, val] of Object.entries(obj)) {
|
|
41
|
+
const normalizedVal = normalizeValue(val);
|
|
42
|
+
if (normalizedVal !== null && normalizedVal !== void 0) {
|
|
43
|
+
normalized2[key] = normalizedVal;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return Object.keys(normalized2).length > 0 ? normalized2 : null;
|
|
47
|
+
}
|
|
48
|
+
return value;
|
|
49
|
+
};
|
|
50
|
+
for (const [key, value] of Object.entries(config)) {
|
|
51
|
+
const normalizedValue = normalizeValue(value);
|
|
52
|
+
if (normalizedValue !== null && normalizedValue !== void 0) {
|
|
53
|
+
normalized[key] = normalizedValue;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return normalized;
|
|
57
|
+
}
|
|
58
|
+
function jsToExtractionResult(jsValue) {
|
|
59
|
+
if (!jsValue || typeof jsValue !== "object") {
|
|
60
|
+
throw new Error("Invalid extraction result: value is not an object");
|
|
61
|
+
}
|
|
62
|
+
const result = jsValue;
|
|
63
|
+
const mimeType = typeof result.mimeType === "string" ? result.mimeType : typeof result.mime_type === "string" ? result.mime_type : null;
|
|
64
|
+
if (typeof result.content !== "string") {
|
|
65
|
+
throw new Error("Invalid extraction result: missing or invalid content");
|
|
66
|
+
}
|
|
67
|
+
if (typeof mimeType !== "string") {
|
|
68
|
+
throw new Error("Invalid extraction result: missing or invalid mimeType");
|
|
69
|
+
}
|
|
70
|
+
if (!result.metadata || typeof result.metadata !== "object") {
|
|
71
|
+
throw new Error("Invalid extraction result: missing or invalid metadata");
|
|
72
|
+
}
|
|
73
|
+
const tables = [];
|
|
74
|
+
if (Array.isArray(result.tables)) {
|
|
75
|
+
for (const table of result.tables) {
|
|
76
|
+
if (table && typeof table === "object") {
|
|
77
|
+
const t = table;
|
|
78
|
+
if (Array.isArray(t.cells) && t.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === "string")) && typeof t.markdown === "string" && typeof t.pageNumber === "number") {
|
|
79
|
+
tables.push({
|
|
80
|
+
cells: t.cells,
|
|
81
|
+
markdown: t.markdown,
|
|
82
|
+
pageNumber: t.pageNumber
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const chunks = Array.isArray(result.chunks) ? result.chunks.map((chunk) => {
|
|
89
|
+
if (!chunk || typeof chunk !== "object") {
|
|
90
|
+
throw new Error("Invalid chunk structure");
|
|
91
|
+
}
|
|
92
|
+
const c = chunk;
|
|
93
|
+
if (typeof c.content !== "string") {
|
|
94
|
+
throw new Error("Invalid chunk: missing content");
|
|
95
|
+
}
|
|
96
|
+
if (!c.metadata || typeof c.metadata !== "object") {
|
|
97
|
+
throw new Error("Invalid chunk: missing metadata");
|
|
98
|
+
}
|
|
99
|
+
const metadata = c.metadata;
|
|
100
|
+
let embedding = null;
|
|
101
|
+
if (Array.isArray(c.embedding)) {
|
|
102
|
+
if (!c.embedding.every((item) => typeof item === "number")) {
|
|
103
|
+
throw new Error("Invalid chunk: embedding must contain only numbers");
|
|
104
|
+
}
|
|
105
|
+
embedding = c.embedding;
|
|
106
|
+
}
|
|
107
|
+
if (typeof metadata.charStart !== "number") {
|
|
108
|
+
throw new Error("Invalid chunk metadata: charStart must be a number");
|
|
109
|
+
}
|
|
110
|
+
if (typeof metadata.charEnd !== "number") {
|
|
111
|
+
throw new Error("Invalid chunk metadata: charEnd must be a number");
|
|
112
|
+
}
|
|
113
|
+
if (!isNumberOrNull(metadata.tokenCount)) {
|
|
114
|
+
throw new Error("Invalid chunk metadata: tokenCount must be a number or null");
|
|
115
|
+
}
|
|
116
|
+
if (typeof metadata.chunkIndex !== "number") {
|
|
117
|
+
throw new Error("Invalid chunk metadata: chunkIndex must be a number");
|
|
118
|
+
}
|
|
119
|
+
if (typeof metadata.totalChunks !== "number") {
|
|
120
|
+
throw new Error("Invalid chunk metadata: totalChunks must be a number");
|
|
121
|
+
}
|
|
122
|
+
return {
|
|
123
|
+
content: c.content,
|
|
124
|
+
embedding,
|
|
125
|
+
metadata: {
|
|
126
|
+
charStart: metadata.charStart,
|
|
127
|
+
charEnd: metadata.charEnd,
|
|
128
|
+
tokenCount: metadata.tokenCount,
|
|
129
|
+
chunkIndex: metadata.chunkIndex,
|
|
130
|
+
totalChunks: metadata.totalChunks
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
}) : null;
|
|
134
|
+
const images = Array.isArray(result.images) ? result.images.map((image) => {
|
|
135
|
+
if (!image || typeof image !== "object") {
|
|
136
|
+
throw new Error("Invalid image structure");
|
|
137
|
+
}
|
|
138
|
+
const img = image;
|
|
139
|
+
if (!(img.data instanceof Uint8Array)) {
|
|
140
|
+
throw new Error("Invalid image: data must be Uint8Array");
|
|
141
|
+
}
|
|
142
|
+
if (typeof img.format !== "string") {
|
|
143
|
+
throw new Error("Invalid image: missing format");
|
|
144
|
+
}
|
|
145
|
+
if (typeof img.imageIndex !== "number") {
|
|
146
|
+
throw new Error("Invalid image: imageIndex must be a number");
|
|
147
|
+
}
|
|
148
|
+
if (!isNumberOrNull(img.pageNumber)) {
|
|
149
|
+
throw new Error("Invalid image: pageNumber must be a number or null");
|
|
150
|
+
}
|
|
151
|
+
if (!isNumberOrNull(img.width)) {
|
|
152
|
+
throw new Error("Invalid image: width must be a number or null");
|
|
153
|
+
}
|
|
154
|
+
if (!isNumberOrNull(img.height)) {
|
|
155
|
+
throw new Error("Invalid image: height must be a number or null");
|
|
156
|
+
}
|
|
157
|
+
if (!isNumberOrNull(img.bitsPerComponent)) {
|
|
158
|
+
throw new Error("Invalid image: bitsPerComponent must be a number or null");
|
|
159
|
+
}
|
|
160
|
+
if (!isBoolean(img.isMask)) {
|
|
161
|
+
throw new Error("Invalid image: isMask must be a boolean");
|
|
162
|
+
}
|
|
163
|
+
if (!isStringOrNull(img.colorspace)) {
|
|
164
|
+
throw new Error("Invalid image: colorspace must be a string or null");
|
|
165
|
+
}
|
|
166
|
+
if (!isStringOrNull(img.description)) {
|
|
167
|
+
throw new Error("Invalid image: description must be a string or null");
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
data: img.data,
|
|
171
|
+
format: img.format,
|
|
172
|
+
imageIndex: img.imageIndex,
|
|
173
|
+
pageNumber: img.pageNumber,
|
|
174
|
+
width: img.width,
|
|
175
|
+
height: img.height,
|
|
176
|
+
colorspace: img.colorspace,
|
|
177
|
+
bitsPerComponent: img.bitsPerComponent,
|
|
178
|
+
isMask: img.isMask,
|
|
179
|
+
description: img.description,
|
|
180
|
+
ocrResult: img.ocrResult ? jsToExtractionResult(img.ocrResult) : null
|
|
181
|
+
};
|
|
182
|
+
}) : null;
|
|
183
|
+
let detectedLanguages = null;
|
|
184
|
+
const detectedLanguagesRaw = Array.isArray(result.detectedLanguages) ? result.detectedLanguages : result.detected_languages;
|
|
185
|
+
if (Array.isArray(detectedLanguagesRaw)) {
|
|
186
|
+
if (!detectedLanguagesRaw.every((lang) => typeof lang === "string")) {
|
|
187
|
+
throw new Error("Invalid result: detectedLanguages must contain only strings");
|
|
188
|
+
}
|
|
189
|
+
detectedLanguages = detectedLanguagesRaw;
|
|
190
|
+
}
|
|
191
|
+
return {
|
|
192
|
+
content: result.content,
|
|
193
|
+
mimeType,
|
|
194
|
+
metadata: result.metadata ?? {},
|
|
195
|
+
tables,
|
|
196
|
+
detectedLanguages,
|
|
197
|
+
chunks,
|
|
198
|
+
images
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
function wrapWasmError(error, context) {
|
|
202
|
+
if (error instanceof Error) {
|
|
203
|
+
return new Error(`Error ${context}: ${error.message}`, {
|
|
204
|
+
cause: error
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
const message = String(error);
|
|
208
|
+
return new Error(`Error ${context}: ${message}`);
|
|
209
|
+
}
|
|
210
|
+
function isValidExtractionResult(value) {
|
|
211
|
+
if (!value || typeof value !== "object") {
|
|
212
|
+
return false;
|
|
213
|
+
}
|
|
214
|
+
const obj = value;
|
|
215
|
+
return typeof obj.content === "string" && (typeof obj.mimeType === "string" || typeof obj.mime_type === "string") && obj.metadata !== null && typeof obj.metadata === "object" && Array.isArray(obj.tables);
|
|
216
|
+
}
|
|
217
|
+
export {
|
|
218
|
+
configToJS,
|
|
219
|
+
fileToUint8Array,
|
|
220
|
+
isValidExtractionResult,
|
|
221
|
+
jsToExtractionResult,
|
|
222
|
+
wrapWasmError
|
|
223
|
+
};
|
|
224
|
+
//# sourceMappingURL=wasm-adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../typescript/adapters/wasm-adapter.ts"],"sourcesContent":["/**\n * WASM Type Adapter\n *\n * This module provides type adapters for converting between JavaScript/TypeScript\n * types and WASM-compatible types, handling File/Blob conversions, config normalization,\n * and result parsing.\n *\n * @example File Conversion\n * ```typescript\n * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const file = event.target.files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, file.type);\n * ```\n *\n * @example Config Normalization\n * ```typescript\n * import { configToJS } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const config = {\n * ocr: { backend: 'tesseract', language: 'eng' },\n * chunking: { maxChars: 1000 }\n * };\n * const normalized = configToJS(config);\n * ```\n */\n\nimport type { Chunk, ExtractedImage, ExtractionConfig, ExtractionResult, Metadata, Table } from \"../types.js\";\n\n/**\n * Maximum file size for processing (512 MB)\n *\n * @internal\n */\nconst MAX_FILE_SIZE = 512 * 1024 * 1024;\n\n/**\n * Type predicate to validate numeric value or null\n *\n * @internal\n */\nfunction isNumberOrNull(value: unknown): value is number | null {\n\treturn typeof value === \"number\" || value === null;\n}\n\n/**\n * Type predicate to validate string value or null\n *\n * @internal\n */\nfunction isStringOrNull(value: unknown): value is string | null {\n\treturn typeof value === \"string\" || value === null;\n}\n\n/**\n * Type predicate to validate boolean value\n *\n * @internal\n */\nfunction isBoolean(value: unknown): value is boolean {\n\treturn typeof value === \"boolean\";\n}\n\n/**\n * Convert a File or Blob to Uint8Array\n *\n * Handles both browser File API and server-side Blob-like objects,\n * providing a unified interface for reading binary data.\n *\n * @param file - The File or Blob to convert\n * @returns Promise resolving to the byte array\n * @throws {Error} If the file cannot be read or exceeds size limit\n *\n * @example\n * ```typescript\n * const file = document.getElementById('input').files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, 'application/pdf');\n * ```\n */\nexport async function fileToUint8Array(file: File | Blob): Promise<Uint8Array> {\n\ttry {\n\t\tif (file.size > MAX_FILE_SIZE) {\n\t\t\tthrow new Error(\n\t\t\t\t`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`,\n\t\t\t);\n\t\t}\n\n\t\tconst arrayBuffer = await file.arrayBuffer();\n\t\treturn new Uint8Array(arrayBuffer);\n\t} catch (error) {\n\t\tthrow new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);\n\t}\n}\n\n/**\n * Normalize ExtractionConfig for WASM processing\n *\n * Converts TypeScript configuration objects to a WASM-compatible format,\n * handling null values, undefined properties, and nested structures.\n *\n * @param config - The extraction configuration or null\n * @returns Normalized configuration object suitable for WASM\n *\n * @example\n * ```typescript\n * const config: ExtractionConfig = {\n * ocr: { backend: 'tesseract' },\n * chunking: { maxChars: 1000 }\n * };\n * const wasmConfig = configToJS(config);\n * ```\n */\nexport function configToJS(config: ExtractionConfig | null): Record<string, unknown> {\n\tif (!config) {\n\t\treturn {};\n\t}\n\n\tconst normalized: Record<string, unknown> = {};\n\n\t// Recursively normalize nested objects\n\tconst normalizeValue = (value: unknown): unknown => {\n\t\tif (value === null || value === undefined) {\n\t\t\treturn null;\n\t\t}\n\t\tif (typeof value === \"object\") {\n\t\t\tif (Array.isArray(value)) {\n\t\t\t\treturn value.map(normalizeValue);\n\t\t\t}\n\t\t\tconst obj = value as Record<string, unknown>;\n\t\t\tconst normalized: Record<string, unknown> = {};\n\t\t\tfor (const [key, val] of Object.entries(obj)) {\n\t\t\t\tconst normalizedVal = normalizeValue(val);\n\t\t\t\tif (normalizedVal !== null && normalizedVal !== undefined) {\n\t\t\t\t\tnormalized[key] = normalizedVal;\n\t\t\t\t}\n\t\t\t}\n\t\t\treturn Object.keys(normalized).length > 0 ? normalized : null;\n\t\t}\n\t\treturn value;\n\t};\n\n\tfor (const [key, value] of Object.entries(config)) {\n\t\tconst normalizedValue = normalizeValue(value);\n\t\tif (normalizedValue !== null && normalizedValue !== undefined) {\n\t\t\tnormalized[key] = normalizedValue;\n\t\t}\n\t}\n\n\treturn normalized;\n}\n\n/**\n * Parse WASM extraction result and convert to TypeScript type\n *\n * Handles conversion of WASM-returned objects to proper ExtractionResult types,\n * including proper array conversions and type assertions for tables, chunks, and images.\n *\n * @param jsValue - The raw WASM result value\n * @returns Properly typed ExtractionResult\n * @throws {Error} If the result structure is invalid\n *\n * @example\n * ```typescript\n * const wasmResult = await wasmExtract(bytes, mimeType, config);\n * const result = jsToExtractionResult(wasmResult);\n * console.log(result.content);\n * ```\n */\nexport function jsToExtractionResult(jsValue: unknown): ExtractionResult {\n\tif (!jsValue || typeof jsValue !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: value is not an object\");\n\t}\n\n\tconst result = jsValue as Record<string, unknown>;\n\tconst mimeType =\n\t\ttypeof result.mimeType === \"string\"\n\t\t\t? result.mimeType\n\t\t\t: typeof result.mime_type === \"string\"\n\t\t\t\t? result.mime_type\n\t\t\t\t: null;\n\n\t// Validate required fields\n\tif (typeof result.content !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid content\");\n\t}\n\tif (typeof mimeType !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid mimeType\");\n\t}\n\tif (!result.metadata || typeof result.metadata !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid metadata\");\n\t}\n\n\t// Parse tables\n\tconst tables: Table[] = [];\n\tif (Array.isArray(result.tables)) {\n\t\tfor (const table of result.tables) {\n\t\t\tif (table && typeof table === \"object\") {\n\t\t\t\tconst t = table as Record<string, unknown>;\n\t\t\t\t// Validate table structure before type casting\n\t\t\t\tif (\n\t\t\t\t\tArray.isArray(t.cells) &&\n\t\t\t\t\tt.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === \"string\")) &&\n\t\t\t\t\ttypeof t.markdown === \"string\" &&\n\t\t\t\t\ttypeof t.pageNumber === \"number\"\n\t\t\t\t) {\n\t\t\t\t\ttables.push({\n\t\t\t\t\t\tcells: t.cells as string[][],\n\t\t\t\t\t\tmarkdown: t.markdown,\n\t\t\t\t\t\tpageNumber: t.pageNumber,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t// Parse chunks\n\tconst chunks: Chunk[] | null = Array.isArray(result.chunks)\n\t\t? result.chunks.map((chunk) => {\n\t\t\t\tif (!chunk || typeof chunk !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk structure\");\n\t\t\t\t}\n\t\t\t\tconst c = chunk as Record<string, unknown>;\n\t\t\t\tif (typeof c.content !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing content\");\n\t\t\t\t}\n\t\t\t\tif (!c.metadata || typeof c.metadata !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing metadata\");\n\t\t\t\t}\n\t\t\t\tconst metadata = c.metadata as Record<string, unknown>;\n\n\t\t\t\t// Validate embedding array contains only numbers\n\t\t\t\tlet embedding: number[] | null = null;\n\t\t\t\tif (Array.isArray(c.embedding)) {\n\t\t\t\t\tif (!c.embedding.every((item) => typeof item === \"number\")) {\n\t\t\t\t\t\tthrow new Error(\"Invalid chunk: embedding must contain only numbers\");\n\t\t\t\t\t}\n\t\t\t\t\tembedding = c.embedding;\n\t\t\t\t}\n\n\t\t\t\t// Validate metadata fields\n\t\t\t\tif (typeof metadata.charStart !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: charStart must be a number\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.charEnd !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: charEnd must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(metadata.tokenCount)) {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: tokenCount must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.chunkIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: chunkIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.totalChunks !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: totalChunks must be a number\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tcontent: c.content,\n\t\t\t\t\tembedding,\n\t\t\t\t\tmetadata: {\n\t\t\t\t\t\tcharStart: metadata.charStart,\n\t\t\t\t\t\tcharEnd: metadata.charEnd,\n\t\t\t\t\t\ttokenCount: metadata.tokenCount,\n\t\t\t\t\t\tchunkIndex: metadata.chunkIndex,\n\t\t\t\t\t\ttotalChunks: metadata.totalChunks,\n\t\t\t\t\t},\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\t// Parse images\n\tconst images: ExtractedImage[] | null = Array.isArray(result.images)\n\t\t? result.images.map((image) => {\n\t\t\t\tif (!image || typeof image !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid image structure\");\n\t\t\t\t}\n\t\t\t\tconst img = image as Record<string, unknown>;\n\t\t\t\tif (!(img.data instanceof Uint8Array)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: data must be Uint8Array\");\n\t\t\t\t}\n\t\t\t\tif (typeof img.format !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: missing format\");\n\t\t\t\t}\n\n\t\t\t\t// Validate numeric fields\n\t\t\t\tif (typeof img.imageIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: imageIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.pageNumber)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: pageNumber must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.width)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: width must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.height)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: height must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.bitsPerComponent)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: bitsPerComponent must be a number or null\");\n\t\t\t\t}\n\n\t\t\t\t// Validate boolean field\n\t\t\t\tif (!isBoolean(img.isMask)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: isMask must be a boolean\");\n\t\t\t\t}\n\n\t\t\t\t// Validate string fields\n\t\t\t\tif (!isStringOrNull(img.colorspace)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: colorspace must be a string or null\");\n\t\t\t\t}\n\t\t\t\tif (!isStringOrNull(img.description)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: description must be a string or null\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tdata: img.data,\n\t\t\t\t\tformat: img.format,\n\t\t\t\t\timageIndex: img.imageIndex,\n\t\t\t\t\tpageNumber: img.pageNumber,\n\t\t\t\t\twidth: img.width,\n\t\t\t\t\theight: img.height,\n\t\t\t\t\tcolorspace: img.colorspace,\n\t\t\t\t\tbitsPerComponent: img.bitsPerComponent,\n\t\t\t\t\tisMask: img.isMask,\n\t\t\t\t\tdescription: img.description,\n\t\t\t\t\tocrResult: img.ocrResult ? jsToExtractionResult(img.ocrResult) : null,\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\t// Validate detectedLanguages array\n\tlet detectedLanguages: string[] | null = null;\n\tconst detectedLanguagesRaw = Array.isArray(result.detectedLanguages)\n\t\t? result.detectedLanguages\n\t\t: result.detected_languages;\n\tif (Array.isArray(detectedLanguagesRaw)) {\n\t\tif (!detectedLanguagesRaw.every((lang) => typeof lang === \"string\")) {\n\t\t\tthrow new Error(\"Invalid result: detectedLanguages must contain only strings\");\n\t\t}\n\t\tdetectedLanguages = detectedLanguagesRaw;\n\t}\n\n\treturn {\n\t\tcontent: result.content,\n\t\tmimeType,\n\t\tmetadata: (result.metadata ?? {}) as Metadata,\n\t\ttables,\n\t\tdetectedLanguages,\n\t\tchunks,\n\t\timages,\n\t};\n}\n\n/**\n * Wrap and format WASM errors with context\n *\n * Converts WASM error messages to JavaScript Error objects with proper context\n * and stack trace information when available.\n *\n * @param error - The error from WASM\n * @param context - Additional context about what operation failed\n * @returns A formatted Error object\n *\n * @internal\n *\n * @example\n * ```typescript\n * try {\n * await wasmExtract(bytes, mimeType);\n * } catch (error) {\n * throw wrapWasmError(error, 'extracting document');\n * }\n * ```\n */\nexport function wrapWasmError(error: unknown, context: string): Error {\n\tif (error instanceof Error) {\n\t\treturn new Error(`Error ${context}: ${error.message}`, {\n\t\t\tcause: error,\n\t\t});\n\t}\n\n\tconst message = String(error);\n\treturn new Error(`Error ${context}: ${message}`);\n}\n\n/**\n * Validate that a WASM-returned value conforms to ExtractionResult structure\n *\n * Performs structural validation without full type checking,\n * useful for runtime validation of WASM output.\n *\n * @param value - The value to validate\n * @returns True if value appears to be a valid ExtractionResult\n *\n * @internal\n */\nexport function isValidExtractionResult(value: unknown): value is ExtractionResult {\n\tif (!value || typeof value !== \"object\") {\n\t\treturn false;\n\t}\n\n\tconst obj = value as Record<string, unknown>;\n\treturn (\n\t\ttypeof obj.content === \"string\" &&\n\t\t(typeof obj.mimeType === \"string\" || typeof obj.mime_type === \"string\") &&\n\t\tobj.metadata !== null &&\n\t\ttypeof obj.metadata === \"object\" &&\n\t\tArray.isArray(obj.tables)\n\t);\n}\n"],"mappings":";AAmCA,IAAM,gBAAgB,MAAM,OAAO;AAOnC,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,UAAU,OAAkC;AACpD,SAAO,OAAO,UAAU;AACzB;AAmBA,eAAsB,iBAAiB,MAAwC;AAC9E,MAAI;AACH,QAAI,KAAK,OAAO,eAAe;AAC9B,YAAM,IAAI;AAAA,QACT,cAAc,KAAK,IAAI,4BAA4B,aAAa;AAAA,MACjE;AAAA,IACD;AAEA,UAAM,cAAc,MAAM,KAAK,YAAY;AAC3C,WAAO,IAAI,WAAW,WAAW;AAAA,EAClC,SAAS,OAAO;AACf,UAAM,IAAI,MAAM,wBAAwB,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC,EAAE;AAAA,EACjG;AACD;AAoBO,SAAS,WAAW,QAA0D;AACpF,MAAI,CAAC,QAAQ;AACZ,WAAO,CAAC;AAAA,EACT;AAEA,QAAM,aAAsC,CAAC;AAG7C,QAAM,iBAAiB,CAAC,UAA4B;AACnD,QAAI,UAAU,QAAQ,UAAU,QAAW;AAC1C,aAAO;AAAA,IACR;AACA,QAAI,OAAO,UAAU,UAAU;AAC9B,UAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,eAAO,MAAM,IAAI,cAAc;AAAA,MAChC;AACA,YAAM,MAAM;AACZ,YAAMA,cAAsC,CAAC;AAC7C,iBAAW,CAAC,KAAK,GAAG,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC7C,cAAM,gBAAgB,eAAe,GAAG;AACxC,YAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,UAAAA,YAAW,GAAG,IAAI;AAAA,QACnB;AAAA,MACD;AACA,aAAO,OAAO,KAAKA,WAAU,EAAE,SAAS,IAAIA,cAAa;AAAA,IAC1D;AACA,WAAO;AAAA,EACR;AAEA,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,GAAG;AAClD,UAAM,kBAAkB,eAAe,KAAK;AAC5C,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,iBAAW,GAAG,IAAI;AAAA,IACnB;AAAA,EACD;AAEA,SAAO;AACR;AAmBO,SAAS,qBAAqB,SAAoC;AACxE,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACpE;AAEA,QAAM,SAAS;AACf,QAAM,WACL,OAAO,OAAO,aAAa,WACxB,OAAO,WACP,OAAO,OAAO,cAAc,WAC3B,OAAO,YACP;AAGL,MAAI,OAAO,OAAO,YAAY,UAAU;AACvC,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACxE;AACA,MAAI,OAAO,aAAa,UAAU;AACjC,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AACA,MAAI,CAAC,OAAO,YAAY,OAAO,OAAO,aAAa,UAAU;AAC5D,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AAGA,QAAM,SAAkB,CAAC;AACzB,MAAI,MAAM,QAAQ,OAAO,MAAM,GAAG;AACjC,eAAW,SAAS,OAAO,QAAQ;AAClC,UAAI,SAAS,OAAO,UAAU,UAAU;AACvC,cAAM,IAAI;AAEV,YACC,MAAM,QAAQ,EAAE,KAAK,KACrB,EAAE,MAAM,MAAM,CAAC,QAAQ,MAAM,QAAQ,GAAG,KAAK,IAAI,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,CAAC,KAC1F,OAAO,EAAE,aAAa,YACtB,OAAO,EAAE,eAAe,UACvB;AACD,iBAAO,KAAK;AAAA,YACX,OAAO,EAAE;AAAA,YACT,UAAU,EAAE;AAAA,YACZ,YAAY,EAAE;AAAA,UACf,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAGA,QAAM,SAAyB,MAAM,QAAQ,OAAO,MAAM,IACvD,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,YAAY,UAAU;AAClC,YAAM,IAAI,MAAM,gCAAgC;AAAA,IACjD;AACA,QAAI,CAAC,EAAE,YAAY,OAAO,EAAE,aAAa,UAAU;AAClD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IAClD;AACA,UAAM,WAAW,EAAE;AAGnB,QAAI,YAA6B;AACjC,QAAI,MAAM,QAAQ,EAAE,SAAS,GAAG;AAC/B,UAAI,CAAC,EAAE,UAAU,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC3D,cAAM,IAAI,MAAM,oDAAoD;AAAA,MACrE;AACA,kBAAY,EAAE;AAAA,IACf;AAGA,QAAI,OAAO,SAAS,cAAc,UAAU;AAC3C,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,OAAO,SAAS,YAAY,UAAU;AACzC,YAAM,IAAI,MAAM,kDAAkD;AAAA,IACnE;AACA,QAAI,CAAC,eAAe,SAAS,UAAU,GAAG;AACzC,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,QAAI,OAAO,SAAS,eAAe,UAAU;AAC5C,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AACA,QAAI,OAAO,SAAS,gBAAgB,UAAU;AAC7C,YAAM,IAAI,MAAM,sDAAsD;AAAA,IACvE;AAEA,WAAO;AAAA,MACN,SAAS,EAAE;AAAA,MACX;AAAA,MACA,UAAU;AAAA,QACT,WAAW,SAAS;AAAA,QACpB,SAAS,SAAS;AAAA,QAClB,YAAY,SAAS;AAAA,QACrB,YAAY,SAAS;AAAA,QACrB,aAAa,SAAS;AAAA,MACvB;AAAA,IACD;AAAA,EACD,CAAC,IACA;AAGH,QAAM,SAAkC,MAAM,QAAQ,OAAO,MAAM,IAChE,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,MAAM;AACZ,QAAI,EAAE,IAAI,gBAAgB,aAAa;AACtC,YAAM,IAAI,MAAM,wCAAwC;AAAA,IACzD;AACA,QAAI,OAAO,IAAI,WAAW,UAAU;AACnC,YAAM,IAAI,MAAM,+BAA+B;AAAA,IAChD;AAGA,QAAI,OAAO,IAAI,eAAe,UAAU;AACvC,YAAM,IAAI,MAAM,4CAA4C;AAAA,IAC7D;AACA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,KAAK,GAAG;AAC/B,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,QAAI,CAAC,eAAe,IAAI,MAAM,GAAG;AAChC,YAAM,IAAI,MAAM,gDAAgD;AAAA,IACjE;AACA,QAAI,CAAC,eAAe,IAAI,gBAAgB,GAAG;AAC1C,YAAM,IAAI,MAAM,0DAA0D;AAAA,IAC3E;AAGA,QAAI,CAAC,UAAU,IAAI,MAAM,GAAG;AAC3B,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC1D;AAGA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,WAAW,GAAG;AACrC,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AAEA,WAAO;AAAA,MACN,MAAM,IAAI;AAAA,MACV,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,YAAY,IAAI;AAAA,MAChB,OAAO,IAAI;AAAA,MACX,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,kBAAkB,IAAI;AAAA,MACtB,QAAQ,IAAI;AAAA,MACZ,aAAa,IAAI;AAAA,MACjB,WAAW,IAAI,YAAY,qBAAqB,IAAI,SAAS,IAAI;AAAA,IAClE;AAAA,EACD,CAAC,IACA;AAGH,MAAI,oBAAqC;AACzC,QAAM,uBAAuB,MAAM,QAAQ,OAAO,iBAAiB,IAChE,OAAO,oBACP,OAAO;AACV,MAAI,MAAM,QAAQ,oBAAoB,GAAG;AACxC,QAAI,CAAC,qBAAqB,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AACpE,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,wBAAoB;AAAA,EACrB;AAEA,SAAO;AAAA,IACN,SAAS,OAAO;AAAA,IAChB;AAAA,IACA,UAAW,OAAO,YAAY,CAAC;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AACD;AAuBO,SAAS,cAAc,OAAgB,SAAwB;AACrE,MAAI,iBAAiB,OAAO;AAC3B,WAAO,IAAI,MAAM,SAAS,OAAO,KAAK,MAAM,OAAO,IAAI;AAAA,MACtD,OAAO;AAAA,IACR,CAAC;AAAA,EACF;AAEA,QAAM,UAAU,OAAO,KAAK;AAC5B,SAAO,IAAI,MAAM,SAAS,OAAO,KAAK,OAAO,EAAE;AAChD;AAaO,SAAS,wBAAwB,OAA2C;AAClF,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,WAAO;AAAA,EACR;AAEA,QAAM,MAAM;AACZ,SACC,OAAO,IAAI,YAAY,aACtB,OAAO,IAAI,aAAa,YAAY,OAAO,IAAI,cAAc,aAC9D,IAAI,aAAa,QACjB,OAAO,IAAI,aAAa,YACxB,MAAM,QAAQ,IAAI,MAAM;AAE1B;","names":["normalized"]}
|