@kreuzberg/wasm 4.0.0-rc.23 → 4.0.0-rc.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/wasm-adapter.d.ts +7 -10
- package/dist/adapters/wasm-adapter.d.ts.map +1 -0
- package/dist/adapters/wasm-adapter.js +41 -19
- package/dist/adapters/wasm-adapter.js.map +1 -1
- package/dist/index.d.ts +23 -240
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -19
- package/dist/index.js.map +1 -1
- package/dist/ocr/registry.d.ts +7 -10
- package/dist/ocr/registry.d.ts.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.d.ts +3 -6
- package/dist/ocr/tesseract-wasm-backend.d.ts.map +1 -0
- package/dist/plugin-registry.d.ts +246 -0
- package/dist/plugin-registry.d.ts.map +1 -0
- package/dist/runtime.d.ts +21 -22
- package/dist/runtime.d.ts.map +1 -0
- package/dist/{types-wVLLDHkl.d.cts → types.d.ts} +24 -25
- package/dist/types.d.ts.map +1 -0
- package/package.json +20 -63
- package/dist/adapters/wasm-adapter.cjs +0 -245
- package/dist/adapters/wasm-adapter.cjs.map +0 -1
- package/dist/adapters/wasm-adapter.d.cts +0 -121
- package/dist/index.cjs +0 -1389
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.cts +0 -639
- package/dist/ocr/registry.cjs +0 -92
- package/dist/ocr/registry.cjs.map +0 -1
- package/dist/ocr/registry.d.cts +0 -102
- package/dist/ocr/tesseract-wasm-backend.cjs +0 -410
- package/dist/ocr/tesseract-wasm-backend.cjs.map +0 -1
- package/dist/ocr/tesseract-wasm-backend.d.cts +0 -257
- package/dist/runtime.cjs +0 -173
- package/dist/runtime.cjs.map +0 -1
- package/dist/runtime.d.cts +0 -256
- package/dist/types-wVLLDHkl.d.ts +0 -364
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { a as ExtractionConfig, E as ExtractionResult } from '../types-wVLLDHkl.js';
|
|
2
|
-
|
|
3
1
|
/**
|
|
4
2
|
* WASM Type Adapter
|
|
5
3
|
*
|
|
@@ -27,7 +25,7 @@ import { a as ExtractionConfig, E as ExtractionResult } from '../types-wVLLDHkl.
|
|
|
27
25
|
* const normalized = configToJS(config);
|
|
28
26
|
* ```
|
|
29
27
|
*/
|
|
30
|
-
|
|
28
|
+
import type { ExtractionConfig, ExtractionResult } from "../types.d.ts";
|
|
31
29
|
/**
|
|
32
30
|
* Convert a File or Blob to Uint8Array
|
|
33
31
|
*
|
|
@@ -45,7 +43,7 @@ import { a as ExtractionConfig, E as ExtractionResult } from '../types-wVLLDHkl.
|
|
|
45
43
|
* const result = await extractBytes(bytes, 'application/pdf');
|
|
46
44
|
* ```
|
|
47
45
|
*/
|
|
48
|
-
declare function fileToUint8Array(file: File | Blob): Promise<Uint8Array>;
|
|
46
|
+
export declare function fileToUint8Array(file: File | Blob): Promise<Uint8Array>;
|
|
49
47
|
/**
|
|
50
48
|
* Normalize ExtractionConfig for WASM processing
|
|
51
49
|
*
|
|
@@ -64,7 +62,7 @@ declare function fileToUint8Array(file: File | Blob): Promise<Uint8Array>;
|
|
|
64
62
|
* const wasmConfig = configToJS(config);
|
|
65
63
|
* ```
|
|
66
64
|
*/
|
|
67
|
-
declare function configToJS(config: ExtractionConfig | null): Record<string, unknown>;
|
|
65
|
+
export declare function configToJS(config: ExtractionConfig | null): Record<string, unknown>;
|
|
68
66
|
/**
|
|
69
67
|
* Parse WASM extraction result and convert to TypeScript type
|
|
70
68
|
*
|
|
@@ -82,7 +80,7 @@ declare function configToJS(config: ExtractionConfig | null): Record<string, unk
|
|
|
82
80
|
* console.log(result.content);
|
|
83
81
|
* ```
|
|
84
82
|
*/
|
|
85
|
-
declare function jsToExtractionResult(jsValue: unknown): ExtractionResult;
|
|
83
|
+
export declare function jsToExtractionResult(jsValue: unknown): ExtractionResult;
|
|
86
84
|
/**
|
|
87
85
|
* Wrap and format WASM errors with context
|
|
88
86
|
*
|
|
@@ -104,7 +102,7 @@ declare function jsToExtractionResult(jsValue: unknown): ExtractionResult;
|
|
|
104
102
|
* }
|
|
105
103
|
* ```
|
|
106
104
|
*/
|
|
107
|
-
declare function wrapWasmError(error: unknown, context: string): Error;
|
|
105
|
+
export declare function wrapWasmError(error: unknown, context: string): Error;
|
|
108
106
|
/**
|
|
109
107
|
* Validate that a WASM-returned value conforms to ExtractionResult structure
|
|
110
108
|
*
|
|
@@ -116,6 +114,5 @@ declare function wrapWasmError(error: unknown, context: string): Error;
|
|
|
116
114
|
*
|
|
117
115
|
* @internal
|
|
118
116
|
*/
|
|
119
|
-
declare function isValidExtractionResult(value: unknown): value is ExtractionResult;
|
|
120
|
-
|
|
121
|
-
export { configToJS, fileToUint8Array, isValidExtractionResult, jsToExtractionResult, wrapWasmError };
|
|
117
|
+
export declare function isValidExtractionResult(value: unknown): value is ExtractionResult;
|
|
118
|
+
//# sourceMappingURL=wasm-adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"wasm-adapter.d.ts","sourceRoot":"","sources":["../../typescript/adapters/wasm-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,KAAK,EAAyB,gBAAgB,EAAE,gBAAgB,EAAmB,MAAM,aAAa,CAAC;AAoC9G;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,CAa7E;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAoCnF;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,GAAG,gBAAgB,CAwMvE;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,GAAG,KAAK,CASpE;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,gBAAgB,CAajF"}
|
|
@@ -104,30 +104,52 @@ function jsToExtractionResult(jsValue) {
|
|
|
104
104
|
}
|
|
105
105
|
embedding = c.embedding;
|
|
106
106
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
107
|
+
const coerceToNumber = (value, fieldName) => {
|
|
108
|
+
if (typeof value === "number") {
|
|
109
|
+
return value;
|
|
110
|
+
}
|
|
111
|
+
if (typeof value === "bigint") {
|
|
112
|
+
return Number(value);
|
|
113
|
+
}
|
|
114
|
+
if (typeof value === "string") {
|
|
115
|
+
const parsed = parseInt(value, 10);
|
|
116
|
+
if (isNaN(parsed)) {
|
|
117
|
+
throw new Error(`Invalid chunk metadata: ${fieldName} must be a valid number, got "${value}"`);
|
|
118
|
+
}
|
|
119
|
+
return parsed;
|
|
120
|
+
}
|
|
121
|
+
throw new Error(`Invalid chunk metadata: ${fieldName} must be a number, got ${typeof value}`);
|
|
122
|
+
};
|
|
123
|
+
const charStart = coerceToNumber(
|
|
124
|
+
metadata.charStart ?? metadata.char_start ?? metadata.byteStart ?? metadata.byte_start,
|
|
125
|
+
"charStart"
|
|
126
|
+
);
|
|
127
|
+
const charEnd = coerceToNumber(
|
|
128
|
+
metadata.charEnd ?? metadata.char_end ?? metadata.byteEnd ?? metadata.byte_end,
|
|
129
|
+
"charEnd"
|
|
130
|
+
);
|
|
131
|
+
const chunkIndex = coerceToNumber(
|
|
132
|
+
metadata.chunkIndex ?? metadata.chunk_index,
|
|
133
|
+
"chunkIndex"
|
|
134
|
+
);
|
|
135
|
+
const totalChunks = coerceToNumber(
|
|
136
|
+
metadata.totalChunks ?? metadata.total_chunks,
|
|
137
|
+
"totalChunks"
|
|
138
|
+
);
|
|
139
|
+
let tokenCount = null;
|
|
140
|
+
const tokenCountValue = metadata.tokenCount ?? metadata.token_count;
|
|
141
|
+
if (tokenCountValue !== null && tokenCountValue !== void 0) {
|
|
142
|
+
tokenCount = coerceToNumber(tokenCountValue, "tokenCount");
|
|
121
143
|
}
|
|
122
144
|
return {
|
|
123
145
|
content: c.content,
|
|
124
146
|
embedding,
|
|
125
147
|
metadata: {
|
|
126
|
-
charStart
|
|
127
|
-
charEnd
|
|
128
|
-
tokenCount
|
|
129
|
-
chunkIndex
|
|
130
|
-
totalChunks
|
|
148
|
+
charStart,
|
|
149
|
+
charEnd,
|
|
150
|
+
tokenCount,
|
|
151
|
+
chunkIndex,
|
|
152
|
+
totalChunks
|
|
131
153
|
}
|
|
132
154
|
};
|
|
133
155
|
}) : null;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../typescript/adapters/wasm-adapter.ts"],"sourcesContent":["/**\n * WASM Type Adapter\n *\n * This module provides type adapters for converting between JavaScript/TypeScript\n * types and WASM-compatible types, handling File/Blob conversions, config normalization,\n * and result parsing.\n *\n * @example File Conversion\n * ```typescript\n * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const file = event.target.files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, file.type);\n * ```\n *\n * @example Config Normalization\n * ```typescript\n * import { configToJS } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const config = {\n * ocr: { backend: 'tesseract', language: 'eng' },\n * chunking: { maxChars: 1000 }\n * };\n * const normalized = configToJS(config);\n * ```\n */\n\nimport type { Chunk, ExtractedImage, ExtractionConfig, ExtractionResult, Metadata, Table } from \"../types.js\";\n\n/**\n * Maximum file size for processing (512 MB)\n *\n * @internal\n */\nconst MAX_FILE_SIZE = 512 * 1024 * 1024;\n\n/**\n * Type predicate to validate numeric value or null\n *\n * @internal\n */\nfunction isNumberOrNull(value: unknown): value is number | null {\n\treturn typeof value === \"number\" || value === null;\n}\n\n/**\n * Type predicate to validate string value or null\n *\n * @internal\n */\nfunction isStringOrNull(value: unknown): value is string | null {\n\treturn typeof value === \"string\" || value === null;\n}\n\n/**\n * Type predicate to validate boolean value\n *\n * @internal\n */\nfunction isBoolean(value: unknown): value is boolean {\n\treturn typeof value === \"boolean\";\n}\n\n/**\n * Convert a File or Blob to Uint8Array\n *\n * Handles both browser File API and server-side Blob-like objects,\n * providing a unified interface for reading binary data.\n *\n * @param file - The File or Blob to convert\n * @returns Promise resolving to the byte array\n * @throws {Error} If the file cannot be read or exceeds size limit\n *\n * @example\n * ```typescript\n * const file = document.getElementById('input').files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, 'application/pdf');\n * ```\n */\nexport async function fileToUint8Array(file: File | Blob): Promise<Uint8Array> {\n\ttry {\n\t\tif (file.size > MAX_FILE_SIZE) {\n\t\t\tthrow new Error(\n\t\t\t\t`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`,\n\t\t\t);\n\t\t}\n\n\t\tconst arrayBuffer = await file.arrayBuffer();\n\t\treturn new Uint8Array(arrayBuffer);\n\t} catch (error) {\n\t\tthrow new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);\n\t}\n}\n\n/**\n * Normalize ExtractionConfig for WASM processing\n *\n * Converts TypeScript configuration objects to a WASM-compatible format,\n * handling null values, undefined properties, and nested structures.\n *\n * @param config - The extraction configuration or null\n * @returns Normalized configuration object suitable for WASM\n *\n * @example\n * ```typescript\n * const config: ExtractionConfig = {\n * ocr: { backend: 'tesseract' },\n * chunking: { maxChars: 1000 }\n * };\n * const wasmConfig = configToJS(config);\n * ```\n */\nexport function configToJS(config: ExtractionConfig | null): Record<string, unknown> {\n\tif (!config) {\n\t\treturn {};\n\t}\n\n\tconst normalized: Record<string, unknown> = {};\n\n\tconst normalizeValue = (value: unknown): unknown => {\n\t\tif (value === null || value === undefined) {\n\t\t\treturn null;\n\t\t}\n\t\tif (typeof value === \"object\") {\n\t\t\tif (Array.isArray(value)) {\n\t\t\t\treturn value.map(normalizeValue);\n\t\t\t}\n\t\t\tconst obj = value as Record<string, unknown>;\n\t\t\tconst normalized: Record<string, unknown> = {};\n\t\t\tfor (const [key, val] of Object.entries(obj)) {\n\t\t\t\tconst normalizedVal = normalizeValue(val);\n\t\t\t\tif (normalizedVal !== null && normalizedVal !== undefined) {\n\t\t\t\t\tnormalized[key] = normalizedVal;\n\t\t\t\t}\n\t\t\t}\n\t\t\treturn Object.keys(normalized).length > 0 ? normalized : null;\n\t\t}\n\t\treturn value;\n\t};\n\n\tfor (const [key, value] of Object.entries(config)) {\n\t\tconst normalizedValue = normalizeValue(value);\n\t\tif (normalizedValue !== null && normalizedValue !== undefined) {\n\t\t\tnormalized[key] = normalizedValue;\n\t\t}\n\t}\n\n\treturn normalized;\n}\n\n/**\n * Parse WASM extraction result and convert to TypeScript type\n *\n * Handles conversion of WASM-returned objects to proper ExtractionResult types,\n * including proper array conversions and type assertions for tables, chunks, and images.\n *\n * @param jsValue - The raw WASM result value\n * @returns Properly typed ExtractionResult\n * @throws {Error} If the result structure is invalid\n *\n * @example\n * ```typescript\n * const wasmResult = await wasmExtract(bytes, mimeType, config);\n * const result = jsToExtractionResult(wasmResult);\n * console.log(result.content);\n * ```\n */\nexport function jsToExtractionResult(jsValue: unknown): ExtractionResult {\n\tif (!jsValue || typeof jsValue !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: value is not an object\");\n\t}\n\n\tconst result = jsValue as Record<string, unknown>;\n\tconst mimeType =\n\t\ttypeof result.mimeType === \"string\"\n\t\t\t? result.mimeType\n\t\t\t: typeof result.mime_type === \"string\"\n\t\t\t\t? result.mime_type\n\t\t\t\t: null;\n\n\tif (typeof result.content !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid content\");\n\t}\n\tif (typeof mimeType !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid mimeType\");\n\t}\n\tif (!result.metadata || typeof result.metadata !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid metadata\");\n\t}\n\n\tconst tables: Table[] = [];\n\tif (Array.isArray(result.tables)) {\n\t\tfor (const table of result.tables) {\n\t\t\tif (table && typeof table === \"object\") {\n\t\t\t\tconst t = table as Record<string, unknown>;\n\t\t\t\tif (\n\t\t\t\t\tArray.isArray(t.cells) &&\n\t\t\t\t\tt.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === \"string\")) &&\n\t\t\t\t\ttypeof t.markdown === \"string\" &&\n\t\t\t\t\ttypeof t.pageNumber === \"number\"\n\t\t\t\t) {\n\t\t\t\t\ttables.push({\n\t\t\t\t\t\tcells: t.cells as string[][],\n\t\t\t\t\t\tmarkdown: t.markdown,\n\t\t\t\t\t\tpageNumber: t.pageNumber,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\tconst chunks: Chunk[] | null = Array.isArray(result.chunks)\n\t\t? result.chunks.map((chunk) => {\n\t\t\t\tif (!chunk || typeof chunk !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk structure\");\n\t\t\t\t}\n\t\t\t\tconst c = chunk as Record<string, unknown>;\n\t\t\t\tif (typeof c.content !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing content\");\n\t\t\t\t}\n\t\t\t\tif (!c.metadata || typeof c.metadata !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing metadata\");\n\t\t\t\t}\n\t\t\t\tconst metadata = c.metadata as Record<string, unknown>;\n\n\t\t\t\tlet embedding: number[] | null = null;\n\t\t\t\tif (Array.isArray(c.embedding)) {\n\t\t\t\t\tif (!c.embedding.every((item) => typeof item === \"number\")) {\n\t\t\t\t\t\tthrow new Error(\"Invalid chunk: embedding must contain only numbers\");\n\t\t\t\t\t}\n\t\t\t\t\tembedding = c.embedding;\n\t\t\t\t}\n\n\t\t\t\tif (typeof metadata.charStart !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: charStart must be a number\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.charEnd !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: charEnd must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(metadata.tokenCount)) {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: tokenCount must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.chunkIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: chunkIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.totalChunks !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: totalChunks must be a number\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tcontent: c.content,\n\t\t\t\t\tembedding,\n\t\t\t\t\tmetadata: {\n\t\t\t\t\t\tcharStart: metadata.charStart,\n\t\t\t\t\t\tcharEnd: metadata.charEnd,\n\t\t\t\t\t\ttokenCount: metadata.tokenCount,\n\t\t\t\t\t\tchunkIndex: metadata.chunkIndex,\n\t\t\t\t\t\ttotalChunks: metadata.totalChunks,\n\t\t\t\t\t},\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tconst images: ExtractedImage[] | null = Array.isArray(result.images)\n\t\t? result.images.map((image) => {\n\t\t\t\tif (!image || typeof image !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid image structure\");\n\t\t\t\t}\n\t\t\t\tconst img = image as Record<string, unknown>;\n\t\t\t\tif (!(img.data instanceof Uint8Array)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: data must be Uint8Array\");\n\t\t\t\t}\n\t\t\t\tif (typeof img.format !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: missing format\");\n\t\t\t\t}\n\n\t\t\t\tif (typeof img.imageIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: imageIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.pageNumber)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: pageNumber must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.width)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: width must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.height)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: height must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.bitsPerComponent)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: bitsPerComponent must be a number or null\");\n\t\t\t\t}\n\n\t\t\t\tif (!isBoolean(img.isMask)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: isMask must be a boolean\");\n\t\t\t\t}\n\n\t\t\t\tif (!isStringOrNull(img.colorspace)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: colorspace must be a string or null\");\n\t\t\t\t}\n\t\t\t\tif (!isStringOrNull(img.description)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: description must be a string or null\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tdata: img.data,\n\t\t\t\t\tformat: img.format,\n\t\t\t\t\timageIndex: img.imageIndex,\n\t\t\t\t\tpageNumber: img.pageNumber,\n\t\t\t\t\twidth: img.width,\n\t\t\t\t\theight: img.height,\n\t\t\t\t\tcolorspace: img.colorspace,\n\t\t\t\t\tbitsPerComponent: img.bitsPerComponent,\n\t\t\t\t\tisMask: img.isMask,\n\t\t\t\t\tdescription: img.description,\n\t\t\t\t\tocrResult: img.ocrResult ? jsToExtractionResult(img.ocrResult) : null,\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tlet detectedLanguages: string[] | null = null;\n\tconst detectedLanguagesRaw = Array.isArray(result.detectedLanguages)\n\t\t? result.detectedLanguages\n\t\t: result.detected_languages;\n\tif (Array.isArray(detectedLanguagesRaw)) {\n\t\tif (!detectedLanguagesRaw.every((lang) => typeof lang === \"string\")) {\n\t\t\tthrow new Error(\"Invalid result: detectedLanguages must contain only strings\");\n\t\t}\n\t\tdetectedLanguages = detectedLanguagesRaw;\n\t}\n\n\treturn {\n\t\tcontent: result.content,\n\t\tmimeType,\n\t\tmetadata: (result.metadata ?? {}) as Metadata,\n\t\ttables,\n\t\tdetectedLanguages,\n\t\tchunks,\n\t\timages,\n\t};\n}\n\n/**\n * Wrap and format WASM errors with context\n *\n * Converts WASM error messages to JavaScript Error objects with proper context\n * and stack trace information when available.\n *\n * @param error - The error from WASM\n * @param context - Additional context about what operation failed\n * @returns A formatted Error object\n *\n * @internal\n *\n * @example\n * ```typescript\n * try {\n * await wasmExtract(bytes, mimeType);\n * } catch (error) {\n * throw wrapWasmError(error, 'extracting document');\n * }\n * ```\n */\nexport function wrapWasmError(error: unknown, context: string): Error {\n\tif (error instanceof Error) {\n\t\treturn new Error(`Error ${context}: ${error.message}`, {\n\t\t\tcause: error,\n\t\t});\n\t}\n\n\tconst message = String(error);\n\treturn new Error(`Error ${context}: ${message}`);\n}\n\n/**\n * Validate that a WASM-returned value conforms to ExtractionResult structure\n *\n * Performs structural validation without full type checking,\n * useful for runtime validation of WASM output.\n *\n * @param value - The value to validate\n * @returns True if value appears to be a valid ExtractionResult\n *\n * @internal\n */\nexport function isValidExtractionResult(value: unknown): value is ExtractionResult {\n\tif (!value || typeof value !== \"object\") {\n\t\treturn false;\n\t}\n\n\tconst obj = value as Record<string, unknown>;\n\treturn (\n\t\ttypeof obj.content === \"string\" &&\n\t\t(typeof obj.mimeType === \"string\" || typeof obj.mime_type === \"string\") &&\n\t\tobj.metadata !== null &&\n\t\ttypeof obj.metadata === \"object\" &&\n\t\tArray.isArray(obj.tables)\n\t);\n}\n"],"mappings":";AAmCA,IAAM,gBAAgB,MAAM,OAAO;AAOnC,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,UAAU,OAAkC;AACpD,SAAO,OAAO,UAAU;AACzB;AAmBA,eAAsB,iBAAiB,MAAwC;AAC9E,MAAI;AACH,QAAI,KAAK,OAAO,eAAe;AAC9B,YAAM,IAAI;AAAA,QACT,cAAc,KAAK,IAAI,4BAA4B,aAAa;AAAA,MACjE;AAAA,IACD;AAEA,UAAM,cAAc,MAAM,KAAK,YAAY;AAC3C,WAAO,IAAI,WAAW,WAAW;AAAA,EAClC,SAAS,OAAO;AACf,UAAM,IAAI,MAAM,wBAAwB,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC,EAAE;AAAA,EACjG;AACD;AAoBO,SAAS,WAAW,QAA0D;AACpF,MAAI,CAAC,QAAQ;AACZ,WAAO,CAAC;AAAA,EACT;AAEA,QAAM,aAAsC,CAAC;AAE7C,QAAM,iBAAiB,CAAC,UAA4B;AACnD,QAAI,UAAU,QAAQ,UAAU,QAAW;AAC1C,aAAO;AAAA,IACR;AACA,QAAI,OAAO,UAAU,UAAU;AAC9B,UAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,eAAO,MAAM,IAAI,cAAc;AAAA,MAChC;AACA,YAAM,MAAM;AACZ,YAAMA,cAAsC,CAAC;AAC7C,iBAAW,CAAC,KAAK,GAAG,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC7C,cAAM,gBAAgB,eAAe,GAAG;AACxC,YAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,UAAAA,YAAW,GAAG,IAAI;AAAA,QACnB;AAAA,MACD;AACA,aAAO,OAAO,KAAKA,WAAU,EAAE,SAAS,IAAIA,cAAa;AAAA,IAC1D;AACA,WAAO;AAAA,EACR;AAEA,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,GAAG;AAClD,UAAM,kBAAkB,eAAe,KAAK;AAC5C,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,iBAAW,GAAG,IAAI;AAAA,IACnB;AAAA,EACD;AAEA,SAAO;AACR;AAmBO,SAAS,qBAAqB,SAAoC;AACxE,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACpE;AAEA,QAAM,SAAS;AACf,QAAM,WACL,OAAO,OAAO,aAAa,WACxB,OAAO,WACP,OAAO,OAAO,cAAc,WAC3B,OAAO,YACP;AAEL,MAAI,OAAO,OAAO,YAAY,UAAU;AACvC,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACxE;AACA,MAAI,OAAO,aAAa,UAAU;AACjC,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AACA,MAAI,CAAC,OAAO,YAAY,OAAO,OAAO,aAAa,UAAU;AAC5D,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AAEA,QAAM,SAAkB,CAAC;AACzB,MAAI,MAAM,QAAQ,OAAO,MAAM,GAAG;AACjC,eAAW,SAAS,OAAO,QAAQ;AAClC,UAAI,SAAS,OAAO,UAAU,UAAU;AACvC,cAAM,IAAI;AACV,YACC,MAAM,QAAQ,EAAE,KAAK,KACrB,EAAE,MAAM,MAAM,CAAC,QAAQ,MAAM,QAAQ,GAAG,KAAK,IAAI,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,CAAC,KAC1F,OAAO,EAAE,aAAa,YACtB,OAAO,EAAE,eAAe,UACvB;AACD,iBAAO,KAAK;AAAA,YACX,OAAO,EAAE;AAAA,YACT,UAAU,EAAE;AAAA,YACZ,YAAY,EAAE;AAAA,UACf,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,QAAM,SAAyB,MAAM,QAAQ,OAAO,MAAM,IACvD,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,YAAY,UAAU;AAClC,YAAM,IAAI,MAAM,gCAAgC;AAAA,IACjD;AACA,QAAI,CAAC,EAAE,YAAY,OAAO,EAAE,aAAa,UAAU;AAClD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IAClD;AACA,UAAM,WAAW,EAAE;AAEnB,QAAI,YAA6B;AACjC,QAAI,MAAM,QAAQ,EAAE,SAAS,GAAG;AAC/B,UAAI,CAAC,EAAE,UAAU,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC3D,cAAM,IAAI,MAAM,oDAAoD;AAAA,MACrE;AACA,kBAAY,EAAE;AAAA,IACf;AAEA,QAAI,OAAO,SAAS,cAAc,UAAU;AAC3C,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,OAAO,SAAS,YAAY,UAAU;AACzC,YAAM,IAAI,MAAM,kDAAkD;AAAA,IACnE;AACA,QAAI,CAAC,eAAe,SAAS,UAAU,GAAG;AACzC,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,QAAI,OAAO,SAAS,eAAe,UAAU;AAC5C,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AACA,QAAI,OAAO,SAAS,gBAAgB,UAAU;AAC7C,YAAM,IAAI,MAAM,sDAAsD;AAAA,IACvE;AAEA,WAAO;AAAA,MACN,SAAS,EAAE;AAAA,MACX;AAAA,MACA,UAAU;AAAA,QACT,WAAW,SAAS;AAAA,QACpB,SAAS,SAAS;AAAA,QAClB,YAAY,SAAS;AAAA,QACrB,YAAY,SAAS;AAAA,QACrB,aAAa,SAAS;AAAA,MACvB;AAAA,IACD;AAAA,EACD,CAAC,IACA;AAEH,QAAM,SAAkC,MAAM,QAAQ,OAAO,MAAM,IAChE,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,MAAM;AACZ,QAAI,EAAE,IAAI,gBAAgB,aAAa;AACtC,YAAM,IAAI,MAAM,wCAAwC;AAAA,IACzD;AACA,QAAI,OAAO,IAAI,WAAW,UAAU;AACnC,YAAM,IAAI,MAAM,+BAA+B;AAAA,IAChD;AAEA,QAAI,OAAO,IAAI,eAAe,UAAU;AACvC,YAAM,IAAI,MAAM,4CAA4C;AAAA,IAC7D;AACA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,KAAK,GAAG;AAC/B,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,QAAI,CAAC,eAAe,IAAI,MAAM,GAAG;AAChC,YAAM,IAAI,MAAM,gDAAgD;AAAA,IACjE;AACA,QAAI,CAAC,eAAe,IAAI,gBAAgB,GAAG;AAC1C,YAAM,IAAI,MAAM,0DAA0D;AAAA,IAC3E;AAEA,QAAI,CAAC,UAAU,IAAI,MAAM,GAAG;AAC3B,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC1D;AAEA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,WAAW,GAAG;AACrC,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AAEA,WAAO;AAAA,MACN,MAAM,IAAI;AAAA,MACV,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,YAAY,IAAI;AAAA,MAChB,OAAO,IAAI;AAAA,MACX,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,kBAAkB,IAAI;AAAA,MACtB,QAAQ,IAAI;AAAA,MACZ,aAAa,IAAI;AAAA,MACjB,WAAW,IAAI,YAAY,qBAAqB,IAAI,SAAS,IAAI;AAAA,IAClE;AAAA,EACD,CAAC,IACA;AAEH,MAAI,oBAAqC;AACzC,QAAM,uBAAuB,MAAM,QAAQ,OAAO,iBAAiB,IAChE,OAAO,oBACP,OAAO;AACV,MAAI,MAAM,QAAQ,oBAAoB,GAAG;AACxC,QAAI,CAAC,qBAAqB,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AACpE,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,wBAAoB;AAAA,EACrB;AAEA,SAAO;AAAA,IACN,SAAS,OAAO;AAAA,IAChB;AAAA,IACA,UAAW,OAAO,YAAY,CAAC;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AACD;AAuBO,SAAS,cAAc,OAAgB,SAAwB;AACrE,MAAI,iBAAiB,OAAO;AAC3B,WAAO,IAAI,MAAM,SAAS,OAAO,KAAK,MAAM,OAAO,IAAI;AAAA,MACtD,OAAO;AAAA,IACR,CAAC;AAAA,EACF;AAEA,QAAM,UAAU,OAAO,KAAK;AAC5B,SAAO,IAAI,MAAM,SAAS,OAAO,KAAK,OAAO,EAAE;AAChD;AAaO,SAAS,wBAAwB,OAA2C;AAClF,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,WAAO;AAAA,EACR;AAEA,QAAM,MAAM;AACZ,SACC,OAAO,IAAI,YAAY,aACtB,OAAO,IAAI,aAAa,YAAY,OAAO,IAAI,cAAc,aAC9D,IAAI,aAAa,QACjB,OAAO,IAAI,aAAa,YACxB,MAAM,QAAQ,IAAI,MAAM;AAE1B;","names":["normalized"]}
|
|
1
|
+
{"version":3,"sources":["../../typescript/adapters/wasm-adapter.ts"],"sourcesContent":["/**\n * WASM Type Adapter\n *\n * This module provides type adapters for converting between JavaScript/TypeScript\n * types and WASM-compatible types, handling File/Blob conversions, config normalization,\n * and result parsing.\n *\n * @example File Conversion\n * ```typescript\n * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const file = event.target.files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, file.type);\n * ```\n *\n * @example Config Normalization\n * ```typescript\n * import { configToJS } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const config = {\n * ocr: { backend: 'tesseract', language: 'eng' },\n * chunking: { maxChars: 1000 }\n * };\n * const normalized = configToJS(config);\n * ```\n */\n\nimport type { Chunk, ExtractedImage, ExtractionConfig, ExtractionResult, Metadata, Table } from \"../types.js\";\n\n/**\n * Maximum file size for processing (512 MB)\n *\n * @internal\n */\nconst MAX_FILE_SIZE = 512 * 1024 * 1024;\n\n/**\n * Type predicate to validate numeric value or null\n *\n * @internal\n */\nfunction isNumberOrNull(value: unknown): value is number | null {\n\treturn typeof value === \"number\" || value === null;\n}\n\n/**\n * Type predicate to validate string value or null\n *\n * @internal\n */\nfunction isStringOrNull(value: unknown): value is string | null {\n\treturn typeof value === \"string\" || value === null;\n}\n\n/**\n * Type predicate to validate boolean value\n *\n * @internal\n */\nfunction isBoolean(value: unknown): value is boolean {\n\treturn typeof value === \"boolean\";\n}\n\n/**\n * Convert a File or Blob to Uint8Array\n *\n * Handles both browser File API and server-side Blob-like objects,\n * providing a unified interface for reading binary data.\n *\n * @param file - The File or Blob to convert\n * @returns Promise resolving to the byte array\n * @throws {Error} If the file cannot be read or exceeds size limit\n *\n * @example\n * ```typescript\n * const file = document.getElementById('input').files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, 'application/pdf');\n * ```\n */\nexport async function fileToUint8Array(file: File | Blob): Promise<Uint8Array> {\n\ttry {\n\t\tif (file.size > MAX_FILE_SIZE) {\n\t\t\tthrow new Error(\n\t\t\t\t`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`,\n\t\t\t);\n\t\t}\n\n\t\tconst arrayBuffer = await file.arrayBuffer();\n\t\treturn new Uint8Array(arrayBuffer);\n\t} catch (error) {\n\t\tthrow new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);\n\t}\n}\n\n/**\n * Normalize ExtractionConfig for WASM processing\n *\n * Converts TypeScript configuration objects to a WASM-compatible format,\n * handling null values, undefined properties, and nested structures.\n *\n * @param config - The extraction configuration or null\n * @returns Normalized configuration object suitable for WASM\n *\n * @example\n * ```typescript\n * const config: ExtractionConfig = {\n * ocr: { backend: 'tesseract' },\n * chunking: { maxChars: 1000 }\n * };\n * const wasmConfig = configToJS(config);\n * ```\n */\nexport function configToJS(config: ExtractionConfig | null): Record<string, unknown> {\n\tif (!config) {\n\t\treturn {};\n\t}\n\n\tconst normalized: Record<string, unknown> = {};\n\n\tconst normalizeValue = (value: unknown): unknown => {\n\t\tif (value === null || value === undefined) {\n\t\t\treturn null;\n\t\t}\n\t\tif (typeof value === \"object\") {\n\t\t\tif (Array.isArray(value)) {\n\t\t\t\treturn value.map(normalizeValue);\n\t\t\t}\n\t\t\tconst obj = value as Record<string, unknown>;\n\t\t\tconst normalized: Record<string, unknown> = {};\n\t\t\tfor (const [key, val] of Object.entries(obj)) {\n\t\t\t\tconst normalizedVal = normalizeValue(val);\n\t\t\t\tif (normalizedVal !== null && normalizedVal !== undefined) {\n\t\t\t\t\tnormalized[key] = normalizedVal;\n\t\t\t\t}\n\t\t\t}\n\t\t\treturn Object.keys(normalized).length > 0 ? normalized : null;\n\t\t}\n\t\treturn value;\n\t};\n\n\tfor (const [key, value] of Object.entries(config)) {\n\t\tconst normalizedValue = normalizeValue(value);\n\t\tif (normalizedValue !== null && normalizedValue !== undefined) {\n\t\t\tnormalized[key] = normalizedValue;\n\t\t}\n\t}\n\n\treturn normalized;\n}\n\n/**\n * Parse WASM extraction result and convert to TypeScript type\n *\n * Handles conversion of WASM-returned objects to proper ExtractionResult types,\n * including proper array conversions and type assertions for tables, chunks, and images.\n *\n * @param jsValue - The raw WASM result value\n * @returns Properly typed ExtractionResult\n * @throws {Error} If the result structure is invalid\n *\n * @example\n * ```typescript\n * const wasmResult = await wasmExtract(bytes, mimeType, config);\n * const result = jsToExtractionResult(wasmResult);\n * console.log(result.content);\n * ```\n */\nexport function jsToExtractionResult(jsValue: unknown): ExtractionResult {\n\tif (!jsValue || typeof jsValue !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: value is not an object\");\n\t}\n\n\tconst result = jsValue as Record<string, unknown>;\n\tconst mimeType =\n\t\ttypeof result.mimeType === \"string\"\n\t\t\t? result.mimeType\n\t\t\t: typeof result.mime_type === \"string\"\n\t\t\t\t? result.mime_type\n\t\t\t\t: null;\n\n\tif (typeof result.content !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid content\");\n\t}\n\tif (typeof mimeType !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid mimeType\");\n\t}\n\tif (!result.metadata || typeof result.metadata !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid metadata\");\n\t}\n\n\tconst tables: Table[] = [];\n\tif (Array.isArray(result.tables)) {\n\t\tfor (const table of result.tables) {\n\t\t\tif (table && typeof table === \"object\") {\n\t\t\t\tconst t = table as Record<string, unknown>;\n\t\t\t\tif (\n\t\t\t\t\tArray.isArray(t.cells) &&\n\t\t\t\t\tt.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === \"string\")) &&\n\t\t\t\t\ttypeof t.markdown === \"string\" &&\n\t\t\t\t\ttypeof t.pageNumber === \"number\"\n\t\t\t\t) {\n\t\t\t\t\ttables.push({\n\t\t\t\t\t\tcells: t.cells as string[][],\n\t\t\t\t\t\tmarkdown: t.markdown,\n\t\t\t\t\t\tpageNumber: t.pageNumber,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\tconst chunks: Chunk[] | null = Array.isArray(result.chunks)\n\t\t? result.chunks.map((chunk) => {\n\t\t\t\tif (!chunk || typeof chunk !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk structure\");\n\t\t\t\t}\n\t\t\t\tconst c = chunk as Record<string, unknown>;\n\t\t\t\tif (typeof c.content !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing content\");\n\t\t\t\t}\n\t\t\t\tif (!c.metadata || typeof c.metadata !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing metadata\");\n\t\t\t\t}\n\t\t\t\tconst metadata = c.metadata as Record<string, unknown>;\n\n\t\t\t\tlet embedding: number[] | null = null;\n\t\t\t\tif (Array.isArray(c.embedding)) {\n\t\t\t\t\tif (!c.embedding.every((item) => typeof item === \"number\")) {\n\t\t\t\t\t\tthrow new Error(\"Invalid chunk: embedding must contain only numbers\");\n\t\t\t\t\t}\n\t\t\t\t\tembedding = c.embedding;\n\t\t\t\t}\n\n\t\t\t\t// Coerce numeric values - handle BigInt, strings, and numbers\n\t\t\t\tconst coerceToNumber = (value: unknown, fieldName: string): number => {\n\t\t\t\t\tif (typeof value === \"number\") {\n\t\t\t\t\t\treturn value;\n\t\t\t\t\t}\n\t\t\t\t\tif (typeof value === \"bigint\") {\n\t\t\t\t\t\treturn Number(value);\n\t\t\t\t\t}\n\t\t\t\t\tif (typeof value === \"string\") {\n\t\t\t\t\t\tconst parsed = parseInt(value, 10);\n\t\t\t\t\t\tif (isNaN(parsed)) {\n\t\t\t\t\t\t\tthrow new Error(`Invalid chunk metadata: ${fieldName} must be a valid number, got \"${value}\"`);\n\t\t\t\t\t\t}\n\t\t\t\t\t\treturn parsed;\n\t\t\t\t\t}\n\t\t\t\t\tthrow new Error(`Invalid chunk metadata: ${fieldName} must be a number, got ${typeof value}`);\n\t\t\t\t};\n\n\t\t\t\t// The Rust code uses snake_case field names (byte_start, byte_end, etc)\n\t\t\t\t// but TypeScript expects camelCase (charStart, charEnd, etc)\n\t\t\t\t// For now, treat byte offsets as character offsets since the content is UTF-8\n\t\t\t\tconst charStart = coerceToNumber(\n\t\t\t\t\tmetadata.charStart ?? metadata.char_start ?? metadata.byteStart ?? metadata.byte_start,\n\t\t\t\t\t\"charStart\"\n\t\t\t\t);\n\t\t\t\tconst charEnd = coerceToNumber(\n\t\t\t\t\tmetadata.charEnd ?? metadata.char_end ?? metadata.byteEnd ?? metadata.byte_end,\n\t\t\t\t\t\"charEnd\"\n\t\t\t\t);\n\t\t\t\tconst chunkIndex = coerceToNumber(\n\t\t\t\t\tmetadata.chunkIndex ?? metadata.chunk_index,\n\t\t\t\t\t\"chunkIndex\"\n\t\t\t\t);\n\t\t\t\tconst totalChunks = coerceToNumber(\n\t\t\t\t\tmetadata.totalChunks ?? metadata.total_chunks,\n\t\t\t\t\t\"totalChunks\"\n\t\t\t\t);\n\n\t\t\t\tlet tokenCount: number | null = null;\n\t\t\t\tconst tokenCountValue = metadata.tokenCount ?? metadata.token_count;\n\t\t\t\tif (tokenCountValue !== null && tokenCountValue !== undefined) {\n\t\t\t\t\ttokenCount = coerceToNumber(tokenCountValue, \"tokenCount\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tcontent: c.content,\n\t\t\t\t\tembedding,\n\t\t\t\t\tmetadata: {\n\t\t\t\t\t\tcharStart,\n\t\t\t\t\t\tcharEnd,\n\t\t\t\t\t\ttokenCount,\n\t\t\t\t\t\tchunkIndex,\n\t\t\t\t\t\ttotalChunks,\n\t\t\t\t\t},\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tconst images: ExtractedImage[] | null = Array.isArray(result.images)\n\t\t? result.images.map((image) => {\n\t\t\t\tif (!image || typeof image !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid image structure\");\n\t\t\t\t}\n\t\t\t\tconst img = image as Record<string, unknown>;\n\t\t\t\tif (!(img.data instanceof Uint8Array)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: data must be Uint8Array\");\n\t\t\t\t}\n\t\t\t\tif (typeof img.format !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: missing format\");\n\t\t\t\t}\n\n\t\t\t\tif (typeof img.imageIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: imageIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.pageNumber)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: pageNumber must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.width)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: width must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.height)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: height must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.bitsPerComponent)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: bitsPerComponent must be a number or null\");\n\t\t\t\t}\n\n\t\t\t\tif (!isBoolean(img.isMask)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: isMask must be a boolean\");\n\t\t\t\t}\n\n\t\t\t\tif (!isStringOrNull(img.colorspace)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: colorspace must be a string or null\");\n\t\t\t\t}\n\t\t\t\tif (!isStringOrNull(img.description)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: description must be a string or null\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tdata: img.data,\n\t\t\t\t\tformat: img.format,\n\t\t\t\t\timageIndex: img.imageIndex,\n\t\t\t\t\tpageNumber: img.pageNumber,\n\t\t\t\t\twidth: img.width,\n\t\t\t\t\theight: img.height,\n\t\t\t\t\tcolorspace: img.colorspace,\n\t\t\t\t\tbitsPerComponent: img.bitsPerComponent,\n\t\t\t\t\tisMask: img.isMask,\n\t\t\t\t\tdescription: img.description,\n\t\t\t\t\tocrResult: img.ocrResult ? jsToExtractionResult(img.ocrResult) : null,\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tlet detectedLanguages: string[] | null = null;\n\tconst detectedLanguagesRaw = Array.isArray(result.detectedLanguages)\n\t\t? result.detectedLanguages\n\t\t: result.detected_languages;\n\tif (Array.isArray(detectedLanguagesRaw)) {\n\t\tif (!detectedLanguagesRaw.every((lang) => typeof lang === \"string\")) {\n\t\t\tthrow new Error(\"Invalid result: detectedLanguages must contain only strings\");\n\t\t}\n\t\tdetectedLanguages = detectedLanguagesRaw;\n\t}\n\n\treturn {\n\t\tcontent: result.content,\n\t\tmimeType,\n\t\tmetadata: (result.metadata ?? {}) as Metadata,\n\t\ttables,\n\t\tdetectedLanguages,\n\t\tchunks,\n\t\timages,\n\t};\n}\n\n/**\n * Wrap and format WASM errors with context\n *\n * Converts WASM error messages to JavaScript Error objects with proper context\n * and stack trace information when available.\n *\n * @param error - The error from WASM\n * @param context - Additional context about what operation failed\n * @returns A formatted Error object\n *\n * @internal\n *\n * @example\n * ```typescript\n * try {\n * await wasmExtract(bytes, mimeType);\n * } catch (error) {\n * throw wrapWasmError(error, 'extracting document');\n * }\n * ```\n */\nexport function wrapWasmError(error: unknown, context: string): Error {\n\tif (error instanceof Error) {\n\t\treturn new Error(`Error ${context}: ${error.message}`, {\n\t\t\tcause: error,\n\t\t});\n\t}\n\n\tconst message = String(error);\n\treturn new Error(`Error ${context}: ${message}`);\n}\n\n/**\n * Validate that a WASM-returned value conforms to ExtractionResult structure\n *\n * Performs structural validation without full type checking,\n * useful for runtime validation of WASM output.\n *\n * @param value - The value to validate\n * @returns True if value appears to be a valid ExtractionResult\n *\n * @internal\n */\nexport function isValidExtractionResult(value: unknown): value is ExtractionResult {\n\tif (!value || typeof value !== \"object\") {\n\t\treturn false;\n\t}\n\n\tconst obj = value as Record<string, unknown>;\n\treturn (\n\t\ttypeof obj.content === \"string\" &&\n\t\t(typeof obj.mimeType === \"string\" || typeof obj.mime_type === \"string\") &&\n\t\tobj.metadata !== null &&\n\t\ttypeof obj.metadata === \"object\" &&\n\t\tArray.isArray(obj.tables)\n\t);\n}\n"],"mappings":";AAmCA,IAAM,gBAAgB,MAAM,OAAO;AAOnC,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,UAAU,OAAkC;AACpD,SAAO,OAAO,UAAU;AACzB;AAmBA,eAAsB,iBAAiB,MAAwC;AAC9E,MAAI;AACH,QAAI,KAAK,OAAO,eAAe;AAC9B,YAAM,IAAI;AAAA,QACT,cAAc,KAAK,IAAI,4BAA4B,aAAa;AAAA,MACjE;AAAA,IACD;AAEA,UAAM,cAAc,MAAM,KAAK,YAAY;AAC3C,WAAO,IAAI,WAAW,WAAW;AAAA,EAClC,SAAS,OAAO;AACf,UAAM,IAAI,MAAM,wBAAwB,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC,EAAE;AAAA,EACjG;AACD;AAoBO,SAAS,WAAW,QAA0D;AACpF,MAAI,CAAC,QAAQ;AACZ,WAAO,CAAC;AAAA,EACT;AAEA,QAAM,aAAsC,CAAC;AAE7C,QAAM,iBAAiB,CAAC,UAA4B;AACnD,QAAI,UAAU,QAAQ,UAAU,QAAW;AAC1C,aAAO;AAAA,IACR;AACA,QAAI,OAAO,UAAU,UAAU;AAC9B,UAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,eAAO,MAAM,IAAI,cAAc;AAAA,MAChC;AACA,YAAM,MAAM;AACZ,YAAMA,cAAsC,CAAC;AAC7C,iBAAW,CAAC,KAAK,GAAG,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC7C,cAAM,gBAAgB,eAAe,GAAG;AACxC,YAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,UAAAA,YAAW,GAAG,IAAI;AAAA,QACnB;AAAA,MACD;AACA,aAAO,OAAO,KAAKA,WAAU,EAAE,SAAS,IAAIA,cAAa;AAAA,IAC1D;AACA,WAAO;AAAA,EACR;AAEA,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,GAAG;AAClD,UAAM,kBAAkB,eAAe,KAAK;AAC5C,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,iBAAW,GAAG,IAAI;AAAA,IACnB;AAAA,EACD;AAEA,SAAO;AACR;AAmBO,SAAS,qBAAqB,SAAoC;AACxE,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACpE;AAEA,QAAM,SAAS;AACf,QAAM,WACL,OAAO,OAAO,aAAa,WACxB,OAAO,WACP,OAAO,OAAO,cAAc,WAC3B,OAAO,YACP;AAEL,MAAI,OAAO,OAAO,YAAY,UAAU;AACvC,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACxE;AACA,MAAI,OAAO,aAAa,UAAU;AACjC,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AACA,MAAI,CAAC,OAAO,YAAY,OAAO,OAAO,aAAa,UAAU;AAC5D,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AAEA,QAAM,SAAkB,CAAC;AACzB,MAAI,MAAM,QAAQ,OAAO,MAAM,GAAG;AACjC,eAAW,SAAS,OAAO,QAAQ;AAClC,UAAI,SAAS,OAAO,UAAU,UAAU;AACvC,cAAM,IAAI;AACV,YACC,MAAM,QAAQ,EAAE,KAAK,KACrB,EAAE,MAAM,MAAM,CAAC,QAAQ,MAAM,QAAQ,GAAG,KAAK,IAAI,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,CAAC,KAC1F,OAAO,EAAE,aAAa,YACtB,OAAO,EAAE,eAAe,UACvB;AACD,iBAAO,KAAK;AAAA,YACX,OAAO,EAAE;AAAA,YACT,UAAU,EAAE;AAAA,YACZ,YAAY,EAAE;AAAA,UACf,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,QAAM,SAAyB,MAAM,QAAQ,OAAO,MAAM,IACvD,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,YAAY,UAAU;AAClC,YAAM,IAAI,MAAM,gCAAgC;AAAA,IACjD;AACA,QAAI,CAAC,EAAE,YAAY,OAAO,EAAE,aAAa,UAAU;AAClD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IAClD;AACA,UAAM,WAAW,EAAE;AAEnB,QAAI,YAA6B;AACjC,QAAI,MAAM,QAAQ,EAAE,SAAS,GAAG;AAC/B,UAAI,CAAC,EAAE,UAAU,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC3D,cAAM,IAAI,MAAM,oDAAoD;AAAA,MACrE;AACA,kBAAY,EAAE;AAAA,IACf;AAGA,UAAM,iBAAiB,CAAC,OAAgB,cAA8B;AACrE,UAAI,OAAO,UAAU,UAAU;AAC9B,eAAO;AAAA,MACR;AACA,UAAI,OAAO,UAAU,UAAU;AAC9B,eAAO,OAAO,KAAK;AAAA,MACpB;AACA,UAAI,OAAO,UAAU,UAAU;AAC9B,cAAM,SAAS,SAAS,OAAO,EAAE;AACjC,YAAI,MAAM,MAAM,GAAG;AAClB,gBAAM,IAAI,MAAM,2BAA2B,SAAS,iCAAiC,KAAK,GAAG;AAAA,QAC9F;AACA,eAAO;AAAA,MACR;AACA,YAAM,IAAI,MAAM,2BAA2B,SAAS,0BAA0B,OAAO,KAAK,EAAE;AAAA,IAC7F;AAKA,UAAM,YAAY;AAAA,MACjB,SAAS,aAAa,SAAS,cAAc,SAAS,aAAa,SAAS;AAAA,MAC5E;AAAA,IACD;AACA,UAAM,UAAU;AAAA,MACf,SAAS,WAAW,SAAS,YAAY,SAAS,WAAW,SAAS;AAAA,MACtE;AAAA,IACD;AACA,UAAM,aAAa;AAAA,MAClB,SAAS,cAAc,SAAS;AAAA,MAChC;AAAA,IACD;AACA,UAAM,cAAc;AAAA,MACnB,SAAS,eAAe,SAAS;AAAA,MACjC;AAAA,IACD;AAEA,QAAI,aAA4B;AAChC,UAAM,kBAAkB,SAAS,cAAc,SAAS;AACxD,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,mBAAa,eAAe,iBAAiB,YAAY;AAAA,IAC1D;AAEA,WAAO;AAAA,MACN,SAAS,EAAE;AAAA,MACX;AAAA,MACA,UAAU;AAAA,QACT;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACD;AAAA,IACD;AAAA,EACD,CAAC,IACA;AAEH,QAAM,SAAkC,MAAM,QAAQ,OAAO,MAAM,IAChE,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,MAAM;AACZ,QAAI,EAAE,IAAI,gBAAgB,aAAa;AACtC,YAAM,IAAI,MAAM,wCAAwC;AAAA,IACzD;AACA,QAAI,OAAO,IAAI,WAAW,UAAU;AACnC,YAAM,IAAI,MAAM,+BAA+B;AAAA,IAChD;AAEA,QAAI,OAAO,IAAI,eAAe,UAAU;AACvC,YAAM,IAAI,MAAM,4CAA4C;AAAA,IAC7D;AACA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,KAAK,GAAG;AAC/B,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,QAAI,CAAC,eAAe,IAAI,MAAM,GAAG;AAChC,YAAM,IAAI,MAAM,gDAAgD;AAAA,IACjE;AACA,QAAI,CAAC,eAAe,IAAI,gBAAgB,GAAG;AAC1C,YAAM,IAAI,MAAM,0DAA0D;AAAA,IAC3E;AAEA,QAAI,CAAC,UAAU,IAAI,MAAM,GAAG;AAC3B,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC1D;AAEA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,WAAW,GAAG;AACrC,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AAEA,WAAO;AAAA,MACN,MAAM,IAAI;AAAA,MACV,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,YAAY,IAAI;AAAA,MAChB,OAAO,IAAI;AAAA,MACX,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,kBAAkB,IAAI;AAAA,MACtB,QAAQ,IAAI;AAAA,MACZ,aAAa,IAAI;AAAA,MACjB,WAAW,IAAI,YAAY,qBAAqB,IAAI,SAAS,IAAI;AAAA,IAClE;AAAA,EACD,CAAC,IACA;AAEH,MAAI,oBAAqC;AACzC,QAAM,uBAAuB,MAAM,QAAQ,OAAO,iBAAiB,IAChE,OAAO,oBACP,OAAO;AACV,MAAI,MAAM,QAAQ,oBAAoB,GAAG;AACxC,QAAI,CAAC,qBAAqB,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AACpE,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,wBAAoB;AAAA,EACrB;AAEA,SAAO;AAAA,IACN,SAAS,OAAO;AAAA,IAChB;AAAA,IACA,UAAW,OAAO,YAAY,CAAC;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AACD;AAuBO,SAAS,cAAc,OAAgB,SAAwB;AACrE,MAAI,iBAAiB,OAAO;AAC3B,WAAO,IAAI,MAAM,SAAS,OAAO,KAAK,MAAM,OAAO,IAAI;AAAA,MACtD,OAAO;AAAA,IACR,CAAC;AAAA,EACF;AAEA,QAAM,UAAU,OAAO,KAAK;AAC5B,SAAO,IAAI,MAAM,SAAS,OAAO,KAAK,OAAO,EAAE;AAChD;AAaO,SAAS,wBAAwB,OAA2C;AAClF,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,WAAO;AAAA,EACR;AAEA,QAAM,MAAM;AACZ,SACC,OAAO,IAAI,YAAY,aACtB,OAAO,IAAI,aAAa,YAAY,OAAO,IAAI,cAAc,aAC9D,IAAI,aAAa,QACjB,OAAO,IAAI,aAAa,YACxB,MAAM,QAAQ,IAAI,MAAM;AAE1B;","names":["normalized"]}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,226 +1,3 @@
|
|
|
1
|
-
import { E as ExtractionConfig, a as ExtractionResult } from './types-wVLLDHkl.d.ts';
|
|
2
|
-
export { C as Chunk, b as ChunkingConfig, c as ChunkMetadata, d as ExtractedImage, I as ImageExtractionConfig, L as LanguageDetectionConfig, M as Metadata, O as OcrBackendProtocol, e as OcrConfig, P as PageContent, f as PageExtractionConfig, g as PdfConfig, h as PostProcessorConfig, T as Table, i as TesseractConfig, j as TokenReductionConfig, E as ExtractionConfig, a as ExtractionResult } from './types-wVLLDHkl.d.ts';
|
|
3
|
-
export { configToJS, fileToUint8Array, isValidExtractionResult, jsToExtractionResult, wrapWasmError } from './adapters/wasm-adapter.js';
|
|
4
|
-
export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend } from './ocr/registry.js';
|
|
5
|
-
export { TesseractWasmBackend } from './ocr/tesseract-wasm-backend.js';
|
|
6
|
-
export { type RuntimeType, type WasmCapabilities, detectRuntime, getRuntimeInfo, getRuntimeVersion, getWasmCapabilities, hasBigInt, hasBlob, hasFileApi, hasModuleWorkers, hasSharedArrayBuffer, hasWasm, hasWasmStreaming, hasWorkers, isBrowser, isBun, isDeno, isNode, isServerEnvironment, isWebEnvironment } from './runtime.d.ts';
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Plugin Registry Module
|
|
10
|
-
*
|
|
11
|
-
* This module manages registrations and execution of post-processors and validators
|
|
12
|
-
* for document extraction pipelines.
|
|
13
|
-
*
|
|
14
|
-
* # Thread Safety
|
|
15
|
-
* All registrations are stored in Maps and are single-threaded safe for WASM environments.
|
|
16
|
-
*
|
|
17
|
-
* # Global Callback Functions
|
|
18
|
-
* The WASM module can invoke processing via global callback functions:
|
|
19
|
-
* - `__kreuzberg_execute_post_processor`: Execute a registered post-processor
|
|
20
|
-
* - `__kreuzberg_execute_validator`: Execute a registered validator
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Post-processor plugin interface
|
|
25
|
-
*
|
|
26
|
-
* A post-processor modifies extraction results after extraction completes.
|
|
27
|
-
*/
|
|
28
|
-
interface PostProcessor {
|
|
29
|
-
/**
|
|
30
|
-
* Get the processor name (must be non-empty string)
|
|
31
|
-
*/
|
|
32
|
-
name(): string;
|
|
33
|
-
/**
|
|
34
|
-
* Get the processing stage (optional, defaults to "middle")
|
|
35
|
-
* - "early": Process early in the pipeline
|
|
36
|
-
* - "middle": Process in the middle of the pipeline
|
|
37
|
-
* - "late": Process late in the pipeline
|
|
38
|
-
*/
|
|
39
|
-
stage?(): "early" | "middle" | "late";
|
|
40
|
-
/**
|
|
41
|
-
* Process an extraction result
|
|
42
|
-
* Can be sync or async
|
|
43
|
-
*/
|
|
44
|
-
process(result: ExtractionResult): ExtractionResult | Promise<ExtractionResult>;
|
|
45
|
-
/**
|
|
46
|
-
* Shutdown the processor (optional)
|
|
47
|
-
*/
|
|
48
|
-
shutdown?(): void | Promise<void>;
|
|
49
|
-
}
|
|
50
|
-
/**
|
|
51
|
-
* Validator plugin interface
|
|
52
|
-
*
|
|
53
|
-
* A validator checks extraction results for correctness
|
|
54
|
-
*/
|
|
55
|
-
interface Validator {
|
|
56
|
-
/**
|
|
57
|
-
* Get the validator name (must be non-empty string)
|
|
58
|
-
*/
|
|
59
|
-
name(): string;
|
|
60
|
-
/**
|
|
61
|
-
* Get the validation priority (optional, defaults to 50)
|
|
62
|
-
* Higher numbers = higher priority (execute first)
|
|
63
|
-
*/
|
|
64
|
-
priority?(): number;
|
|
65
|
-
/**
|
|
66
|
-
* Validate an extraction result
|
|
67
|
-
* Can be sync or async
|
|
68
|
-
*/
|
|
69
|
-
validate(result: ExtractionResult): {
|
|
70
|
-
valid: boolean;
|
|
71
|
-
errors: string[];
|
|
72
|
-
} | Promise<{
|
|
73
|
-
valid: boolean;
|
|
74
|
-
errors: string[];
|
|
75
|
-
}>;
|
|
76
|
-
/**
|
|
77
|
-
* Shutdown the validator (optional)
|
|
78
|
-
*/
|
|
79
|
-
shutdown?(): void | Promise<void>;
|
|
80
|
-
}
|
|
81
|
-
/**
|
|
82
|
-
* Register a post-processor plugin
|
|
83
|
-
*
|
|
84
|
-
* @param processor - The post-processor to register
|
|
85
|
-
* @throws {Error} If the processor is invalid or missing required methods
|
|
86
|
-
*
|
|
87
|
-
* @example
|
|
88
|
-
* ```typescript
|
|
89
|
-
* const processor = {
|
|
90
|
-
* name: () => "my-processor",
|
|
91
|
-
* stage: () => "middle",
|
|
92
|
-
* process: async (result) => {
|
|
93
|
-
* result.content = result.content.toUpperCase();
|
|
94
|
-
* return result;
|
|
95
|
-
* }
|
|
96
|
-
* };
|
|
97
|
-
* registerPostProcessor(processor);
|
|
98
|
-
* ```
|
|
99
|
-
*/
|
|
100
|
-
declare function registerPostProcessor(processor: PostProcessor): void;
|
|
101
|
-
/**
|
|
102
|
-
* Get a registered post-processor by name
|
|
103
|
-
*
|
|
104
|
-
* @param name - The processor name
|
|
105
|
-
* @returns The processor, or undefined if not found
|
|
106
|
-
*
|
|
107
|
-
* @example
|
|
108
|
-
* ```typescript
|
|
109
|
-
* const processor = getPostProcessor("my-processor");
|
|
110
|
-
* if (processor) {
|
|
111
|
-
* console.log("Found processor:", processor.name());
|
|
112
|
-
* }
|
|
113
|
-
* ```
|
|
114
|
-
*/
|
|
115
|
-
declare function getPostProcessor(name: string): PostProcessor | undefined;
|
|
116
|
-
/**
|
|
117
|
-
* List all registered post-processor names
|
|
118
|
-
*
|
|
119
|
-
* @returns Array of processor names
|
|
120
|
-
*
|
|
121
|
-
* @example
|
|
122
|
-
* ```typescript
|
|
123
|
-
* const names = listPostProcessors();
|
|
124
|
-
* console.log("Registered processors:", names);
|
|
125
|
-
* ```
|
|
126
|
-
*/
|
|
127
|
-
declare function listPostProcessors(): string[];
|
|
128
|
-
/**
|
|
129
|
-
* Unregister a post-processor and call its shutdown method
|
|
130
|
-
*
|
|
131
|
-
* @param name - The processor name
|
|
132
|
-
* @throws {Error} If the processor is not registered
|
|
133
|
-
*
|
|
134
|
-
* @example
|
|
135
|
-
* ```typescript
|
|
136
|
-
* await unregisterPostProcessor("my-processor");
|
|
137
|
-
* ```
|
|
138
|
-
*/
|
|
139
|
-
declare function unregisterPostProcessor(name: string): Promise<void>;
|
|
140
|
-
/**
|
|
141
|
-
* Clear all registered post-processors
|
|
142
|
-
*
|
|
143
|
-
* Calls shutdown on all processors before clearing.
|
|
144
|
-
*
|
|
145
|
-
* @example
|
|
146
|
-
* ```typescript
|
|
147
|
-
* await clearPostProcessors();
|
|
148
|
-
* ```
|
|
149
|
-
*/
|
|
150
|
-
declare function clearPostProcessors(): Promise<void>;
|
|
151
|
-
/**
|
|
152
|
-
* Register a validator plugin
|
|
153
|
-
*
|
|
154
|
-
* @param validator - The validator to register
|
|
155
|
-
* @throws {Error} If the validator is invalid or missing required methods
|
|
156
|
-
*
|
|
157
|
-
* @example
|
|
158
|
-
* ```typescript
|
|
159
|
-
* const validator = {
|
|
160
|
-
* name: () => "my-validator",
|
|
161
|
-
* priority: () => 50,
|
|
162
|
-
* validate: async (result) => {
|
|
163
|
-
* if (!result.content) {
|
|
164
|
-
* return { valid: false, errors: ["Content is empty"] };
|
|
165
|
-
* }
|
|
166
|
-
* return { valid: true, errors: [] };
|
|
167
|
-
* }
|
|
168
|
-
* };
|
|
169
|
-
* registerValidator(validator);
|
|
170
|
-
* ```
|
|
171
|
-
*/
|
|
172
|
-
declare function registerValidator(validator: Validator): void;
|
|
173
|
-
/**
|
|
174
|
-
* Get a registered validator by name
|
|
175
|
-
*
|
|
176
|
-
* @param name - The validator name
|
|
177
|
-
* @returns The validator, or undefined if not found
|
|
178
|
-
*
|
|
179
|
-
* @example
|
|
180
|
-
* ```typescript
|
|
181
|
-
* const validator = getValidator("my-validator");
|
|
182
|
-
* if (validator) {
|
|
183
|
-
* console.log("Found validator:", validator.name());
|
|
184
|
-
* }
|
|
185
|
-
* ```
|
|
186
|
-
*/
|
|
187
|
-
declare function getValidator(name: string): Validator | undefined;
|
|
188
|
-
/**
|
|
189
|
-
* List all registered validator names
|
|
190
|
-
*
|
|
191
|
-
* @returns Array of validator names
|
|
192
|
-
*
|
|
193
|
-
* @example
|
|
194
|
-
* ```typescript
|
|
195
|
-
* const names = listValidators();
|
|
196
|
-
* console.log("Registered validators:", names);
|
|
197
|
-
* ```
|
|
198
|
-
*/
|
|
199
|
-
declare function listValidators(): string[];
|
|
200
|
-
/**
|
|
201
|
-
* Unregister a validator and call its shutdown method
|
|
202
|
-
*
|
|
203
|
-
* @param name - The validator name
|
|
204
|
-
* @throws {Error} If the validator is not registered
|
|
205
|
-
*
|
|
206
|
-
* @example
|
|
207
|
-
* ```typescript
|
|
208
|
-
* await unregisterValidator("my-validator");
|
|
209
|
-
* ```
|
|
210
|
-
*/
|
|
211
|
-
declare function unregisterValidator(name: string): Promise<void>;
|
|
212
|
-
/**
|
|
213
|
-
* Clear all registered validators
|
|
214
|
-
*
|
|
215
|
-
* Calls shutdown on all validators before clearing.
|
|
216
|
-
*
|
|
217
|
-
* @example
|
|
218
|
-
* ```typescript
|
|
219
|
-
* await clearValidators();
|
|
220
|
-
* ```
|
|
221
|
-
*/
|
|
222
|
-
declare function clearValidators(): Promise<void>;
|
|
223
|
-
|
|
224
1
|
/**
|
|
225
2
|
* Kreuzberg - WebAssembly Bindings for Browser and Runtime Environments
|
|
226
3
|
*
|
|
@@ -316,8 +93,15 @@ declare function clearValidators(): Promise<void>;
|
|
|
316
93
|
* const result = await extractBytes(bytes, 'application/pdf', config);
|
|
317
94
|
* ```
|
|
318
95
|
*/
|
|
319
|
-
|
|
320
|
-
|
|
96
|
+
import type { ExtractionConfig as ExtractionConfigType, ExtractionResult } from "./types.d.ts";
|
|
97
|
+
export { configToJS, fileToUint8Array, isValidExtractionResult, jsToExtractionResult, wrapWasmError, } from "./adapters/wasm-adapter.d.ts";
|
|
98
|
+
export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend, } from "./ocr/registry.d.ts";
|
|
99
|
+
export { TesseractWasmBackend } from "./ocr/tesseract-wasm-backend.d.ts";
|
|
100
|
+
export { clearPostProcessors, clearValidators, getPostProcessor, getValidator, listPostProcessors, listValidators, type PostProcessor, registerPostProcessor, registerValidator, unregisterPostProcessor, unregisterValidator, type Validator, } from "./plugin-registry.d.ts";
|
|
101
|
+
export { detectRuntime, getRuntimeInfo, getRuntimeVersion, getWasmCapabilities, hasBigInt, hasBlob, hasFileApi, hasModuleWorkers, hasSharedArrayBuffer, hasWasm, hasWasmStreaming, hasWorkers, isBrowser, isBun, isDeno, isNode, isServerEnvironment, isWebEnvironment, type RuntimeType, type WasmCapabilities, } from "./runtime.d.ts";
|
|
102
|
+
export type * from "./types.d.ts";
|
|
103
|
+
export type { Chunk, ChunkingConfig, ChunkMetadata, ExtractedImage, ExtractionConfig, ExtractionResult, ImageExtractionConfig, LanguageDetectionConfig, Metadata, OcrBackendProtocol, OcrConfig, PageContent, PageExtractionConfig, PdfConfig, PostProcessorConfig, Table, TesseractConfig, TokenReductionConfig, } from "./types.d.ts";
|
|
104
|
+
export declare function initWasm(): Promise<void>;
|
|
321
105
|
/**
|
|
322
106
|
* Check if WASM module is initialized
|
|
323
107
|
*
|
|
@@ -330,7 +114,7 @@ declare function initWasm(): Promise<void>;
|
|
|
330
114
|
* }
|
|
331
115
|
* ```
|
|
332
116
|
*/
|
|
333
|
-
declare function isInitialized(): boolean;
|
|
117
|
+
export declare function isInitialized(): boolean;
|
|
334
118
|
/**
|
|
335
119
|
* Get WASM module version
|
|
336
120
|
*
|
|
@@ -343,7 +127,7 @@ declare function isInitialized(): boolean;
|
|
|
343
127
|
* console.log(`Using Kreuzberg ${version}`);
|
|
344
128
|
* ```
|
|
345
129
|
*/
|
|
346
|
-
declare function getVersion(): string;
|
|
130
|
+
export declare function getVersion(): string;
|
|
347
131
|
/**
|
|
348
132
|
* Get initialization error if module failed to load
|
|
349
133
|
*
|
|
@@ -351,7 +135,7 @@ declare function getVersion(): string;
|
|
|
351
135
|
*
|
|
352
136
|
* @internal
|
|
353
137
|
*/
|
|
354
|
-
declare function getInitializationError(): Error | null;
|
|
138
|
+
export declare function getInitializationError(): Error | null;
|
|
355
139
|
/**
|
|
356
140
|
* Extract content from bytes (document data)
|
|
357
141
|
*
|
|
@@ -393,7 +177,7 @@ declare function getInitializationError(): Error | null;
|
|
|
393
177
|
* const result = await extractBytes(bytes, file.type);
|
|
394
178
|
* ```
|
|
395
179
|
*/
|
|
396
|
-
declare function extractBytes(data: Uint8Array, mimeType: string, config?:
|
|
180
|
+
export declare function extractBytes(data: Uint8Array, mimeType: string, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
|
|
397
181
|
/**
|
|
398
182
|
* Extract content from a file on the file system
|
|
399
183
|
*
|
|
@@ -429,7 +213,7 @@ declare function extractBytes(data: Uint8Array, mimeType: string, config?: Extra
|
|
|
429
213
|
* });
|
|
430
214
|
* ```
|
|
431
215
|
*/
|
|
432
|
-
declare function extractFile(path: string, mimeType?: string | null, config?:
|
|
216
|
+
export declare function extractFile(path: string, mimeType?: string | null, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
|
|
433
217
|
/**
|
|
434
218
|
* Extract content from a File or Blob (browser-friendly wrapper)
|
|
435
219
|
*
|
|
@@ -462,7 +246,7 @@ declare function extractFile(path: string, mimeType?: string | null, config?: Ex
|
|
|
462
246
|
* });
|
|
463
247
|
* ```
|
|
464
248
|
*/
|
|
465
|
-
declare function extractFromFile(file: File | Blob, mimeType?: string | null, config?:
|
|
249
|
+
export declare function extractFromFile(file: File | Blob, mimeType?: string | null, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
|
|
466
250
|
/**
|
|
467
251
|
* Extract content from bytes synchronously
|
|
468
252
|
*
|
|
@@ -482,7 +266,7 @@ declare function extractFromFile(file: File | Blob, mimeType?: string | null, co
|
|
|
482
266
|
* console.log(result.content);
|
|
483
267
|
* ```
|
|
484
268
|
*/
|
|
485
|
-
declare function extractBytesSync(data: Uint8Array, mimeType: string, config?:
|
|
269
|
+
export declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: ExtractionConfigType | null): ExtractionResult;
|
|
486
270
|
/**
|
|
487
271
|
* Batch extract content from multiple byte arrays asynchronously
|
|
488
272
|
*
|
|
@@ -504,10 +288,10 @@ declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: E
|
|
|
504
288
|
* results.forEach((result) => console.log(result.content));
|
|
505
289
|
* ```
|
|
506
290
|
*/
|
|
507
|
-
declare function batchExtractBytes(files: Array<{
|
|
291
|
+
export declare function batchExtractBytes(files: Array<{
|
|
508
292
|
data: Uint8Array;
|
|
509
293
|
mimeType: string;
|
|
510
|
-
}>, config?:
|
|
294
|
+
}>, config?: ExtractionConfigType | null): Promise<ExtractionResult[]>;
|
|
511
295
|
/**
|
|
512
296
|
* Batch extract content from multiple byte arrays synchronously
|
|
513
297
|
*
|
|
@@ -529,10 +313,10 @@ declare function batchExtractBytes(files: Array<{
|
|
|
529
313
|
* results.forEach((result) => console.log(result.content));
|
|
530
314
|
* ```
|
|
531
315
|
*/
|
|
532
|
-
declare function batchExtractBytesSync(files: Array<{
|
|
316
|
+
export declare function batchExtractBytesSync(files: Array<{
|
|
533
317
|
data: Uint8Array;
|
|
534
318
|
mimeType: string;
|
|
535
|
-
}>, config?:
|
|
319
|
+
}>, config?: ExtractionConfigType | null): ExtractionResult[];
|
|
536
320
|
/**
|
|
537
321
|
* Batch extract content from multiple File objects asynchronously
|
|
538
322
|
*
|
|
@@ -554,7 +338,7 @@ declare function batchExtractBytesSync(files: Array<{
|
|
|
554
338
|
* });
|
|
555
339
|
* ```
|
|
556
340
|
*/
|
|
557
|
-
declare function batchExtractFiles(files: File[], config?:
|
|
341
|
+
export declare function batchExtractFiles(files: File[], config?: ExtractionConfigType | null): Promise<ExtractionResult[]>;
|
|
558
342
|
/**
|
|
559
343
|
* Enable OCR functionality with tesseract-wasm backend
|
|
560
344
|
*
|
|
@@ -634,6 +418,5 @@ declare function batchExtractFiles(files: File[], config?: ExtractionConfig | nu
|
|
|
634
418
|
* });
|
|
635
419
|
* ```
|
|
636
420
|
*/
|
|
637
|
-
declare function enableOcr(): Promise<void>;
|
|
638
|
-
|
|
639
|
-
export { type PostProcessor, type Validator, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, clearPostProcessors, clearValidators, enableOcr, extractBytes, extractBytesSync, extractFile, extractFromFile, getInitializationError, getPostProcessor, getValidator, getVersion, initWasm, isInitialized, listPostProcessors, listValidators, registerPostProcessor, registerValidator, unregisterPostProcessor, unregisterValidator };
|
|
421
|
+
export declare function enableOcr(): Promise<void>;
|
|
422
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../typescript/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8FG;AAMH,OAAO,KAAK,EAAE,gBAAgB,IAAI,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE7F,OAAO,EACN,UAAU,EACV,gBAAgB,EAChB,uBAAuB,EACvB,oBAAoB,EACpB,aAAa,GACb,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACN,gBAAgB,EAChB,aAAa,EACb,eAAe,EACf,kBAAkB,EAClB,oBAAoB,GACpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EACN,mBAAmB,EACnB,eAAe,EACf,gBAAgB,EAChB,YAAY,EACZ,kBAAkB,EAClB,cAAc,EACd,KAAK,aAAa,EAClB,qBAAqB,EACrB,iBAAiB,EACjB,uBAAuB,EACvB,mBAAmB,EACnB,KAAK,SAAS,GACd,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACN,aAAa,EACb,cAAc,EACd,iBAAiB,EACjB,mBAAmB,EACnB,SAAS,EACT,OAAO,EACP,UAAU,EACV,gBAAgB,EAChB,oBAAoB,EACpB,OAAO,EACP,gBAAgB,EAChB,UAAU,EACV,SAAS,EACT,KAAK,EACL,MAAM,EACN,MAAM,EACN,mBAAmB,EACnB,gBAAgB,EAChB,KAAK,WAAW,EAChB,KAAK,gBAAgB,GACrB,MAAM,cAAc,CAAC;AACtB,mBAAmB,YAAY,CAAC;AAChC,YAAY,EACX,KAAK,EACL,cAAc,EACd,aAAa,EACb,cAAc,EACd,gBAAgB,EAChB,gBAAgB,EAChB,qBAAqB,EACrB,uBAAuB,EACvB,QAAQ,EACR,kBAAkB,EAClB,SAAS,EACT,WAAW,EACX,oBAAoB,EACpB,SAAS,EACT,mBAAmB,EACnB,KAAK,EACL,eAAe,EACf,oBAAoB,GACpB,MAAM,YAAY,CAAC;AA4IpB,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CA6C9C;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,aAAa,IAAI,OAAO,CAEvC;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,UAAU,IAAI,MAAM,CAUnC;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,IAAI,KAAK,GAAG,IAAI,CAErD;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AACH,wBAAsB,YAAY,CACjC,IAAI,EAAE,UAAU,EAChB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,CAAC,CA8B3B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,wBAAsB,WAAW,CAChC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,EACxB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,CAAC,CAqD3B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,eAAe,CACpC,IAAI,EAAE,IAAI,GAAG,IAAI,EACjB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,EACxB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,CAAC,CAmB3B;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,gBAAgB,CAC/B,IAAI,EAAE,UAAU,EAChB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,gBAAgB,CA8BlB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAsB,iBAAiB,CACtC,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,EACpD,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,EAAE,CAAC,CA+D7B;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,qBAAqB,CACpC,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,EACpD,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,gBAAgB,EAAE,CA+DpB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAsB,iBAAiB,CACtC,KAAK,EAAE,IAAI,EAAE,EACb,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAiC7B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8EG;AACH,wBAAsB,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC,CAoB/C"}
|
package/dist/index.js
CHANGED
|
@@ -130,30 +130,52 @@ function jsToExtractionResult(jsValue) {
|
|
|
130
130
|
}
|
|
131
131
|
embedding = c.embedding;
|
|
132
132
|
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
133
|
+
const coerceToNumber = (value, fieldName) => {
|
|
134
|
+
if (typeof value === "number") {
|
|
135
|
+
return value;
|
|
136
|
+
}
|
|
137
|
+
if (typeof value === "bigint") {
|
|
138
|
+
return Number(value);
|
|
139
|
+
}
|
|
140
|
+
if (typeof value === "string") {
|
|
141
|
+
const parsed = parseInt(value, 10);
|
|
142
|
+
if (isNaN(parsed)) {
|
|
143
|
+
throw new Error(`Invalid chunk metadata: ${fieldName} must be a valid number, got "${value}"`);
|
|
144
|
+
}
|
|
145
|
+
return parsed;
|
|
146
|
+
}
|
|
147
|
+
throw new Error(`Invalid chunk metadata: ${fieldName} must be a number, got ${typeof value}`);
|
|
148
|
+
};
|
|
149
|
+
const charStart = coerceToNumber(
|
|
150
|
+
metadata.charStart ?? metadata.char_start ?? metadata.byteStart ?? metadata.byte_start,
|
|
151
|
+
"charStart"
|
|
152
|
+
);
|
|
153
|
+
const charEnd = coerceToNumber(
|
|
154
|
+
metadata.charEnd ?? metadata.char_end ?? metadata.byteEnd ?? metadata.byte_end,
|
|
155
|
+
"charEnd"
|
|
156
|
+
);
|
|
157
|
+
const chunkIndex = coerceToNumber(
|
|
158
|
+
metadata.chunkIndex ?? metadata.chunk_index,
|
|
159
|
+
"chunkIndex"
|
|
160
|
+
);
|
|
161
|
+
const totalChunks = coerceToNumber(
|
|
162
|
+
metadata.totalChunks ?? metadata.total_chunks,
|
|
163
|
+
"totalChunks"
|
|
164
|
+
);
|
|
165
|
+
let tokenCount = null;
|
|
166
|
+
const tokenCountValue = metadata.tokenCount ?? metadata.token_count;
|
|
167
|
+
if (tokenCountValue !== null && tokenCountValue !== void 0) {
|
|
168
|
+
tokenCount = coerceToNumber(tokenCountValue, "tokenCount");
|
|
147
169
|
}
|
|
148
170
|
return {
|
|
149
171
|
content: c.content,
|
|
150
172
|
embedding,
|
|
151
173
|
metadata: {
|
|
152
|
-
charStart
|
|
153
|
-
charEnd
|
|
154
|
-
tokenCount
|
|
155
|
-
chunkIndex
|
|
156
|
-
totalChunks
|
|
174
|
+
charStart,
|
|
175
|
+
charEnd,
|
|
176
|
+
tokenCount,
|
|
177
|
+
chunkIndex,
|
|
178
|
+
totalChunks
|
|
157
179
|
}
|
|
158
180
|
};
|
|
159
181
|
}) : null;
|