@kreuzberg/node 4.0.0-rc.14 → 4.0.0-rc.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +72 -3
- package/dist/index.d.ts +72 -3
- package/dist/index.js +20 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +17 -1
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.mts +37 -1
- package/dist/types.d.ts +37 -1
- package/dist/types.js.map +1 -1
- package/index.d.ts +478 -0
- package/index.js +72 -52
- package/package.json +4 -3
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../typescript/index.ts"],"sourcesContent":["/**\n * Kreuzberg - Multi-language document intelligence framework.\n *\n * This is a TypeScript SDK around a high-performance Rust core.\n * All extraction logic, chunking, quality processing, and language detection\n * are implemented in Rust for maximum performance.\n *\n * ## API Usage Recommendations\n *\n * **For processing multiple documents**, prefer batch APIs:\n * - Use `batchExtractFiles()` / `batchExtractFilesSync()` for multiple files\n * - Use `batchExtractBytes()` / `batchExtractBytesSync()` for multiple byte arrays\n *\n * **Batch APIs provide**:\n * - Better performance (parallel processing in Rust)\n * - More reliable memory management\n * - Recommended for all multi-document workflows\n *\n * **Single extraction APIs** (`extractFile`, `extractBytes`) are suitable for:\n * - One-off document processing\n * - Interactive applications processing documents on-demand\n * - Avoid calling these in tight loops - use batch APIs instead\n *\n * ## Supported Formats\n *\n * - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT (with LibreOffice)\n * - **Text**: Markdown, Plain Text, XML\n * - **Web**: HTML (converted to Markdown)\n * - **Data**: JSON, YAML, TOML\n * - **Email**: EML, MSG\n * - **Images**: PNG, JPEG, TIFF (with OCR support)\n *\n * @example\n * ```typescript\n * import { extractFile, batchExtractFiles } from '@kreuzberg/node';\n *\n * // Single file extraction\n * const result = await extractFile('document.pdf');\n * console.log(result.content);\n *\n * // Multiple files (recommended approach)\n * const files = ['doc1.pdf', 'doc2.docx', 'doc3.xlsx'];\n * const results = await batchExtractFiles(files);\n * results.forEach(r => console.log(r.content));\n * ```\n */\n\nimport { createRequire } from \"node:module\";\nimport type { PanicContext } from \"./errors.js\";\nimport type {\n\tChunk,\n\tChunkingConfig,\n\tExtractedImage,\n\tExtractionConfig as ExtractionConfigType,\n\tExtractionResult,\n\tHtmlConversionOptions,\n\tHtmlPreprocessingOptions,\n\tImageExtractionConfig,\n\tKeywordConfig,\n\tLanguageDetectionConfig,\n\tOcrBackendProtocol,\n\tOcrConfig,\n\tPageConfig,\n\tPageContent,\n\tPdfConfig,\n\tPostProcessorConfig,\n\tPostProcessorProtocol,\n\tTable,\n\tTesseractConfig,\n\tTokenReductionConfig,\n\tValidatorProtocol,\n} from \"./types.js\";\n\n/**\n * @internal Native NAPI binding interface for the Kreuzberg native module.\n * This interface defines the shape of methods available in the compiled native addon.\n */\ninterface NativeBinding {\n\textractFileSync(\n\t\tfilePath: string,\n\t\tmimeType: string | null,\n\t\tconfig: Record<string, unknown> | null,\n\t): Record<string, unknown>;\n\textractFile(\n\t\tfilePath: string,\n\t\tmimeType: string | null,\n\t\tconfig: Record<string, unknown> | null,\n\t): Promise<Record<string, unknown>>;\n\textractBytesSync(data: Buffer, mimeType: string, config: Record<string, unknown> | null): Record<string, unknown>;\n\textractBytes(\n\t\tdata: Buffer,\n\t\tmimeType: string,\n\t\tconfig: Record<string, unknown> | null,\n\t): Promise<Record<string, unknown>>;\n\tbatchExtractFilesSync(paths: string[], config: Record<string, unknown> | null): Record<string, unknown>[];\n\tbatchExtractFiles(paths: string[], config: Record<string, unknown> | null): Promise<Record<string, unknown>[]>;\n\tbatchExtractBytesSync(\n\t\tdataArray: Buffer[],\n\t\tmimeTypes: string[],\n\t\tconfig: Record<string, unknown> | null,\n\t): Record<string, unknown>[];\n\tbatchExtractBytes(\n\t\tdataArray: Buffer[],\n\t\tmimeTypes: string[],\n\t\tconfig: Record<string, unknown> | null,\n\t): Promise<Record<string, unknown>[]>;\n\tregisterPostProcessor(processor: Record<string, unknown>): void;\n\tunregisterPostProcessor(name: string): void;\n\tclearPostProcessors(): void;\n\tlistPostProcessors(): string[];\n\tregisterValidator(validator: Record<string, unknown>): void;\n\tunregisterValidator(name: string): void;\n\tclearValidators(): void;\n\tlistValidators(): string[];\n\tregisterOcrBackend(backend: Record<string, unknown>): void;\n\tunregisterOcrBackend(name: string): void;\n\tclearOcrBackends(): void;\n\tlistOcrBackends(): string[];\n\tregisterDocumentExtractor(extractor: Record<string, unknown>): void;\n\tunregisterDocumentExtractor(name: string): void;\n\tclearDocumentExtractors(): void;\n\tlistDocumentExtractors(): string[];\n\tdetectMimeType(filePath: string): string;\n\tdetectMimeTypeFromBytes(data: Buffer): string;\n\tdetectMimeTypeFromPath(filePath: string): string;\n\tvalidateMimeType(mimeType: string): string;\n\tgetExtensionsForMime(mimeType: string): string[];\n\tlistEmbeddingPresets(): string[];\n\tgetEmbeddingPreset(name: string): Record<string, unknown> | null;\n\tgetLastErrorCode(): number;\n\tgetLastPanicContext(): Record<string, unknown> | null;\n\tloadExtractionConfigFromFile(filePath: string): Record<string, unknown>;\n\tdiscoverExtractionConfig(): Record<string, unknown> | null;\n}\n\nexport {\n\tCacheError,\n\tErrorCode,\n\tImageProcessingError,\n\tKreuzbergError,\n\tMissingDependencyError,\n\tOcrError,\n\ttype PanicContext,\n\tParsingError,\n\tPluginError,\n\tValidationError,\n} from \"./errors.js\";\nexport { GutenOcrBackend } from \"./ocr/guten-ocr.js\";\nexport * from \"./types.js\";\n\nlet binding: NativeBinding | null = null;\nlet bindingInitialized = false;\n\nfunction createNativeBindingError(error: unknown): Error {\n\tconst hintParts: string[] = [];\n\tlet detail = \"Unknown error while requiring native module.\";\n\n\tif (error instanceof Error) {\n\t\tdetail = error.message || error.toString();\n\t\tif (/pdfium/i.test(detail)) {\n\t\t\thintParts.push(\n\t\t\t\t\"Pdfium runtime library was not found. Ensure the bundled libpdfium (dll/dylib/so) is present next to the native module.\",\n\t\t\t);\n\t\t}\n\t\treturn new Error(\n\t\t\t[\n\t\t\t\t\"Failed to load Kreuzberg native bindings.\",\n\t\t\t\thintParts.length ? hintParts.join(\" \") : \"\",\n\t\t\t\t\"Report this error and attach the logs/stack trace for investigation.\",\n\t\t\t\t`Underlying error: ${detail}`,\n\t\t\t]\n\t\t\t\t.filter(Boolean)\n\t\t\t\t.join(\" \"),\n\t\t\t{ cause: error },\n\t\t);\n\t}\n\n\treturn new Error(\n\t\t[\n\t\t\t\"Failed to load Kreuzberg native bindings.\",\n\t\t\t\"Report this error and attach the logs/stack trace for investigation.\",\n\t\t\t`Underlying error: ${String(error)}`,\n\t\t].join(\" \"),\n\t);\n}\n\nfunction assertUint8Array(value: unknown, name: string): Uint8Array {\n\tif (!(value instanceof Uint8Array)) {\n\t\tthrow new TypeError(`${name} must be a Uint8Array`);\n\t}\n\treturn value;\n}\n\nfunction assertUint8ArrayList(values: unknown, name: string): Uint8Array[] {\n\tif (!Array.isArray(values)) {\n\t\tthrow new TypeError(`${name} must be an array of Uint8Array`);\n\t}\n\n\tconst array = values as unknown[];\n\treturn array.map((value, index) => {\n\t\ttry {\n\t\t\treturn assertUint8Array(value, `${name}[${index}]`);\n\t\t} catch {\n\t\t\tthrow new TypeError(`${name}[${index}] must be a Uint8Array`);\n\t\t}\n\t});\n}\n\n/**\n * @internal Allows tests to provide a mocked native binding.\n */\nexport function __setBindingForTests(mock: unknown): void {\n\tbinding = mock as NativeBinding;\n\tbindingInitialized = true;\n}\n\n/**\n * @internal Resets the cached native binding for tests.\n */\nexport function __resetBindingForTests(): void {\n\tbinding = null;\n\tbindingInitialized = false;\n}\n\nfunction loadNativeBinding(): NativeBinding {\n\tconst localRequire: ((path: string) => unknown) | undefined =\n\t\ttypeof require !== \"undefined\" ? (require as (path: string) => unknown) : createRequire(import.meta.url);\n\n\tif (!localRequire) {\n\t\tthrow new Error(\"Unable to resolve native binding loader (require not available).\");\n\t}\n\n\tconst loadedModule = localRequire(\"../index.js\") as unknown;\n\n\t// Validate that the loaded module is an object\n\tif (typeof loadedModule !== \"object\" || loadedModule === null) {\n\t\tthrow new Error(\n\t\t\t\"Native binding is not a valid object. \" + \"Ensure the native module is properly built and compatible.\",\n\t\t);\n\t}\n\n\tconst module = loadedModule as Record<string, unknown>;\n\n\t// Validate that the loaded module has the expected methods\n\tconst requiredMethods = [\n\t\t\"extractFileSync\",\n\t\t\"extractFile\",\n\t\t\"extractBytesSync\",\n\t\t\"extractBytes\",\n\t\t\"batchExtractFilesSync\",\n\t\t\"batchExtractFiles\",\n\t\t\"batchExtractBytesSync\",\n\t\t\"batchExtractBytes\",\n\t];\n\n\tfor (const method of requiredMethods) {\n\t\tif (typeof module[method] !== \"function\") {\n\t\t\tthrow new Error(\n\t\t\t\t`Native binding is missing required method: ${method}. ` +\n\t\t\t\t\t\"Ensure the native module is properly built and compatible.\",\n\t\t\t);\n\t\t}\n\t}\n\n\treturn module as unknown as NativeBinding;\n}\n\nfunction getBinding(): NativeBinding {\n\tif (bindingInitialized) {\n\t\tif (binding === null) {\n\t\t\tthrow new Error(\"Native binding was previously failed to load.\");\n\t\t}\n\t\treturn binding;\n\t}\n\n\ttry {\n\t\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\t\tbinding = loadNativeBinding();\n\t\t\tbindingInitialized = true;\n\t\t\treturn binding;\n\t\t}\n\t} catch (error) {\n\t\tbindingInitialized = true; // Mark as attempted even on failure\n\t\tthrow createNativeBindingError(error);\n\t}\n\n\tthrow new Error(\n\t\t\"Failed to load Kreuzberg bindings. Neither NAPI (Node.js) nor WASM (browsers/Deno) bindings are available. \" +\n\t\t\t\"Make sure you have installed the @kreuzberg/node package for Node.js/Bun.\",\n\t);\n}\n\nfunction parseMetadata(metadataStr: string): Record<string, unknown> {\n\ttry {\n\t\tconst parsed = JSON.parse(metadataStr) as unknown;\n\t\tif (typeof parsed === \"object\" && parsed !== null) {\n\t\t\treturn parsed as Record<string, unknown>;\n\t\t}\n\t\treturn {};\n\t} catch {\n\t\treturn {};\n\t}\n}\n\nfunction ensureUint8Array(value: unknown): Uint8Array {\n\tif (value instanceof Uint8Array) {\n\t\treturn value;\n\t}\n\tif (typeof Buffer !== \"undefined\" && value instanceof Buffer) {\n\t\treturn new Uint8Array(value);\n\t}\n\tif (Array.isArray(value)) {\n\t\treturn new Uint8Array(value);\n\t}\n\treturn new Uint8Array();\n}\n\nfunction convertChunk(rawChunk: unknown): Chunk {\n\tif (!rawChunk || typeof rawChunk !== \"object\") {\n\t\treturn {\n\t\t\tcontent: \"\",\n\t\t\tmetadata: {\n\t\t\t\tbyteStart: 0,\n\t\t\t\tbyteEnd: 0,\n\t\t\t\ttokenCount: null,\n\t\t\t\tchunkIndex: 0,\n\t\t\t\ttotalChunks: 0,\n\t\t\t},\n\t\t\tembedding: null,\n\t\t};\n\t}\n\n\tconst chunk = rawChunk as Record<string, unknown>;\n\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\tconst metadata = (chunk[\"metadata\"] as Record<string, unknown>) ?? {};\n\treturn {\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tcontent: (chunk[\"content\"] as string) ?? \"\",\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tembedding: (chunk[\"embedding\"] as number[] | null) ?? null,\n\t\tmetadata: {\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tbyteStart: ((metadata[\"byte_start\"] ?? metadata[\"charStart\"]) as number) ?? 0,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tbyteEnd: ((metadata[\"byte_end\"] ?? metadata[\"charEnd\"]) as number) ?? 0,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\ttokenCount: ((metadata[\"token_count\"] ?? metadata[\"tokenCount\"]) as number | null) ?? null,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tchunkIndex: ((metadata[\"chunk_index\"] ?? metadata[\"chunkIndex\"]) as number) ?? 0,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\ttotalChunks: ((metadata[\"total_chunks\"] ?? metadata[\"totalChunks\"]) as number) ?? 0,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tfirstPage: ((metadata[\"first_page\"] ?? metadata[\"firstPage\"]) as number | null) ?? null,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tlastPage: ((metadata[\"last_page\"] ?? metadata[\"lastPage\"]) as number | null) ?? null,\n\t\t},\n\t};\n}\n\nfunction convertImage(rawImage: unknown): ExtractedImage {\n\tif (!rawImage || typeof rawImage !== \"object\") {\n\t\treturn {\n\t\t\tdata: new Uint8Array(),\n\t\t\tformat: \"unknown\",\n\t\t\timageIndex: 0,\n\t\t\tpageNumber: null,\n\t\t\twidth: null,\n\t\t\theight: null,\n\t\t\tcolorspace: null,\n\t\t\tbitsPerComponent: null,\n\t\t\tisMask: false,\n\t\t\tdescription: null,\n\t\t\tocrResult: null,\n\t\t};\n\t}\n\n\tconst image = rawImage as Record<string, unknown>;\n\treturn {\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tdata: ensureUint8Array(image[\"data\"]),\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tformat: (image[\"format\"] as string) ?? \"unknown\",\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\timageIndex: (image[\"imageIndex\"] as number) ?? 0,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tpageNumber: (image[\"pageNumber\"] as number | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\twidth: (image[\"width\"] as number | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\theight: (image[\"height\"] as number | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tcolorspace: (image[\"colorspace\"] as string | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tbitsPerComponent: (image[\"bitsPerComponent\"] as number | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tisMask: (image[\"isMask\"] as boolean) ?? false,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tdescription: (image[\"description\"] as string | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tocrResult: image[\"ocrResult\"] ? convertResult(image[\"ocrResult\"]) : null,\n\t};\n}\n\nfunction convertPageContent(rawPage: unknown): PageContent {\n\tif (!rawPage || typeof rawPage !== \"object\") {\n\t\treturn {\n\t\t\tpageNumber: 0,\n\t\t\tcontent: \"\",\n\t\t\ttables: [],\n\t\t\timages: [],\n\t\t};\n\t}\n\n\tconst page = rawPage as Record<string, unknown>;\n\treturn {\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tpageNumber: (page[\"pageNumber\"] as number) ?? 0,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tcontent: (page[\"content\"] as string) ?? \"\",\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\ttables: Array.isArray(page[\"tables\"]) ? (page[\"tables\"] as Table[]) : [],\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\timages: Array.isArray(page[\"images\"]) ? (page[\"images\"] as unknown[]).map((image) => convertImage(image)) : [],\n\t};\n}\n\nfunction convertResult(rawResult: unknown): ExtractionResult {\n\tif (!rawResult || typeof rawResult !== \"object\") {\n\t\treturn {\n\t\t\tcontent: \"\",\n\t\t\tmimeType: \"application/octet-stream\",\n\t\t\tmetadata: {},\n\t\t\ttables: [],\n\t\t\tdetectedLanguages: null,\n\t\t\tchunks: null,\n\t\t\timages: null,\n\t\t\tpages: null,\n\t\t};\n\t}\n\n\tconst result = rawResult as Record<string, unknown>;\n\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\tconst metadata = result[\"metadata\"];\n\tconst metadataValue =\n\t\ttypeof metadata === \"string\" ? parseMetadata(metadata) : ((metadata as Record<string, unknown>) ?? {});\n\n\treturn {\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tcontent: (result[\"content\"] as string) ?? \"\",\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tmimeType: (result[\"mimeType\"] as string) ?? \"application/octet-stream\",\n\t\tmetadata: metadataValue,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\ttables: Array.isArray(result[\"tables\"]) ? (result[\"tables\"] as Table[]) : [],\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tdetectedLanguages: Array.isArray(result[\"detectedLanguages\"]) ? (result[\"detectedLanguages\"] as string[]) : null,\n\t\tchunks: (() => {\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tconst chunksData = result[\"chunks\"];\n\t\t\treturn Array.isArray(chunksData) ? (chunksData as unknown[]).map((chunk) => convertChunk(chunk)) : null;\n\t\t})(),\n\t\timages: (() => {\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tconst imagesData = result[\"images\"];\n\t\t\treturn Array.isArray(imagesData) ? (imagesData as unknown[]).map((image) => convertImage(image)) : null;\n\t\t})(),\n\t\tpages: (() => {\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tconst pagesData = result[\"pages\"];\n\t\t\treturn Array.isArray(pagesData) ? (pagesData as unknown[]).map((page) => convertPageContent(page)) : null;\n\t\t})(),\n\t};\n}\n\ntype NativeExtractionConfig = Record<string, unknown>;\n\nfunction setIfDefined<T>(target: NativeExtractionConfig, key: string, value: T | undefined): void {\n\tif (value !== undefined) {\n\t\ttarget[key] = value;\n\t}\n}\n\nfunction normalizeTesseractConfig(config?: TesseractConfig) {\n\tif (!config) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"psm\", config.psm);\n\tsetIfDefined(normalized, \"enableTableDetection\", config.enableTableDetection);\n\tsetIfDefined(normalized, \"tesseditCharWhitelist\", config.tesseditCharWhitelist);\n\treturn normalized;\n}\n\nfunction normalizeOcrConfig(ocr?: OcrConfig): NativeExtractionConfig | undefined {\n\tif (!ocr) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {\n\t\tbackend: ocr.backend,\n\t};\n\tsetIfDefined(normalized, \"language\", ocr.language);\n\n\tconst tesseract = normalizeTesseractConfig(ocr.tesseractConfig);\n\tif (tesseract) {\n\t\tsetIfDefined(normalized, \"tesseractConfig\", tesseract);\n\t}\n\n\treturn normalized;\n}\n\nfunction normalizeChunkingConfig(chunking?: ChunkingConfig): NativeExtractionConfig | undefined {\n\tif (!chunking) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"maxChars\", chunking.maxChars);\n\tsetIfDefined(normalized, \"maxOverlap\", chunking.maxOverlap);\n\tsetIfDefined(normalized, \"preset\", chunking.preset);\n\tsetIfDefined(normalized, \"embedding\", chunking.embedding);\n\tsetIfDefined(normalized, \"enabled\", chunking.enabled);\n\treturn normalized;\n}\n\nfunction normalizeImageExtractionConfig(images?: ImageExtractionConfig): NativeExtractionConfig | undefined {\n\tif (!images) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"extractImages\", images.extractImages);\n\tsetIfDefined(normalized, \"targetDpi\", images.targetDpi);\n\tsetIfDefined(normalized, \"maxImageDimension\", images.maxImageDimension);\n\tsetIfDefined(normalized, \"autoAdjustDpi\", images.autoAdjustDpi);\n\tsetIfDefined(normalized, \"minDpi\", images.minDpi);\n\tsetIfDefined(normalized, \"maxDpi\", images.maxDpi);\n\treturn normalized;\n}\n\nfunction normalizePdfConfig(pdf?: PdfConfig): NativeExtractionConfig | undefined {\n\tif (!pdf) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"extractImages\", pdf.extractImages);\n\tsetIfDefined(normalized, \"passwords\", pdf.passwords);\n\tsetIfDefined(normalized, \"extractMetadata\", pdf.extractMetadata);\n\treturn normalized;\n}\n\nfunction normalizeTokenReductionConfig(tokenReduction?: TokenReductionConfig): NativeExtractionConfig | undefined {\n\tif (!tokenReduction) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"mode\", tokenReduction.mode);\n\tsetIfDefined(normalized, \"preserveImportantWords\", tokenReduction.preserveImportantWords);\n\treturn normalized;\n}\n\nfunction normalizeLanguageDetectionConfig(\n\tlanguageDetection?: LanguageDetectionConfig,\n): NativeExtractionConfig | undefined {\n\tif (!languageDetection) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"enabled\", languageDetection.enabled);\n\tsetIfDefined(normalized, \"minConfidence\", languageDetection.minConfidence);\n\tsetIfDefined(normalized, \"detectMultiple\", languageDetection.detectMultiple);\n\treturn normalized;\n}\n\nfunction normalizePostProcessorConfig(postprocessor?: PostProcessorConfig): NativeExtractionConfig | undefined {\n\tif (!postprocessor) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"enabled\", postprocessor.enabled);\n\tsetIfDefined(normalized, \"enabledProcessors\", postprocessor.enabledProcessors);\n\tsetIfDefined(normalized, \"disabledProcessors\", postprocessor.disabledProcessors);\n\treturn normalized;\n}\n\nfunction normalizeHtmlPreprocessing(options?: HtmlPreprocessingOptions): NativeExtractionConfig | undefined {\n\tif (!options) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"enabled\", options.enabled);\n\tsetIfDefined(normalized, \"preset\", options.preset);\n\tsetIfDefined(normalized, \"removeNavigation\", options.removeNavigation);\n\tsetIfDefined(normalized, \"removeForms\", options.removeForms);\n\treturn normalized;\n}\n\nfunction normalizeHtmlOptions(options?: HtmlConversionOptions): NativeExtractionConfig | undefined {\n\tif (!options) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"headingStyle\", options.headingStyle);\n\tsetIfDefined(normalized, \"listIndentType\", options.listIndentType);\n\tsetIfDefined(normalized, \"listIndentWidth\", options.listIndentWidth);\n\tsetIfDefined(normalized, \"bullets\", options.bullets);\n\tsetIfDefined(normalized, \"strongEmSymbol\", options.strongEmSymbol);\n\tsetIfDefined(normalized, \"escapeAsterisks\", options.escapeAsterisks);\n\tsetIfDefined(normalized, \"escapeUnderscores\", options.escapeUnderscores);\n\tsetIfDefined(normalized, \"escapeMisc\", options.escapeMisc);\n\tsetIfDefined(normalized, \"escapeAscii\", options.escapeAscii);\n\tsetIfDefined(normalized, \"codeLanguage\", options.codeLanguage);\n\tsetIfDefined(normalized, \"autolinks\", options.autolinks);\n\tsetIfDefined(normalized, \"defaultTitle\", options.defaultTitle);\n\tsetIfDefined(normalized, \"brInTables\", options.brInTables);\n\tsetIfDefined(normalized, \"hocrSpatialTables\", options.hocrSpatialTables);\n\tsetIfDefined(normalized, \"highlightStyle\", options.highlightStyle);\n\tsetIfDefined(normalized, \"extractMetadata\", options.extractMetadata);\n\tsetIfDefined(normalized, \"whitespaceMode\", options.whitespaceMode);\n\tsetIfDefined(normalized, \"stripNewlines\", options.stripNewlines);\n\tsetIfDefined(normalized, \"wrap\", options.wrap);\n\tsetIfDefined(normalized, \"wrapWidth\", options.wrapWidth);\n\tsetIfDefined(normalized, \"convertAsInline\", options.convertAsInline);\n\tsetIfDefined(normalized, \"subSymbol\", options.subSymbol);\n\tsetIfDefined(normalized, \"supSymbol\", options.supSymbol);\n\tsetIfDefined(normalized, \"newlineStyle\", options.newlineStyle);\n\tsetIfDefined(normalized, \"codeBlockStyle\", options.codeBlockStyle);\n\tsetIfDefined(normalized, \"keepInlineImagesIn\", options.keepInlineImagesIn);\n\tsetIfDefined(normalized, \"encoding\", options.encoding);\n\tsetIfDefined(normalized, \"debug\", options.debug);\n\tsetIfDefined(normalized, \"stripTags\", options.stripTags);\n\tsetIfDefined(normalized, \"preserveTags\", options.preserveTags);\n\n\tconst preprocessing = normalizeHtmlPreprocessing(options.preprocessing);\n\tsetIfDefined(normalized, \"preprocessing\", preprocessing);\n\n\treturn normalized;\n}\n\nfunction normalizeKeywordConfig(config?: KeywordConfig): NativeExtractionConfig | undefined {\n\tif (!config) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"algorithm\", config.algorithm);\n\tsetIfDefined(normalized, \"maxKeywords\", config.maxKeywords);\n\tsetIfDefined(normalized, \"minScore\", config.minScore);\n\tsetIfDefined(normalized, \"ngramRange\", config.ngramRange);\n\tsetIfDefined(normalized, \"language\", config.language);\n\tsetIfDefined(normalized, \"yakeParams\", config.yakeParams);\n\tsetIfDefined(normalized, \"rakeParams\", config.rakeParams);\n\treturn normalized;\n}\n\nfunction normalizePageConfig(pages?: PageConfig): NativeExtractionConfig | undefined {\n\tif (!pages) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"extract_pages\", pages.extractPages);\n\tsetIfDefined(normalized, \"insert_page_markers\", pages.insertPageMarkers);\n\tsetIfDefined(normalized, \"marker_format\", pages.markerFormat);\n\treturn normalized;\n}\n\nfunction normalizeExtractionConfig(config: ExtractionConfigType | null): NativeExtractionConfig | null {\n\tif (!config) {\n\t\treturn null;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"useCache\", config.useCache);\n\tsetIfDefined(normalized, \"enableQualityProcessing\", config.enableQualityProcessing);\n\tsetIfDefined(normalized, \"forceOcr\", config.forceOcr);\n\tsetIfDefined(normalized, \"maxConcurrentExtractions\", config.maxConcurrentExtractions);\n\n\tconst ocr = normalizeOcrConfig(config.ocr);\n\tsetIfDefined(normalized, \"ocr\", ocr);\n\n\tconst chunking = normalizeChunkingConfig(config.chunking);\n\tsetIfDefined(normalized, \"chunking\", chunking);\n\n\tconst images = normalizeImageExtractionConfig(config.images);\n\tsetIfDefined(normalized, \"images\", images);\n\n\tconst pdf = normalizePdfConfig(config.pdfOptions);\n\tsetIfDefined(normalized, \"pdfOptions\", pdf);\n\n\tconst tokenReduction = normalizeTokenReductionConfig(config.tokenReduction);\n\tsetIfDefined(normalized, \"tokenReduction\", tokenReduction);\n\n\tconst languageDetection = normalizeLanguageDetectionConfig(config.languageDetection);\n\tsetIfDefined(normalized, \"languageDetection\", languageDetection);\n\n\tconst postprocessor = normalizePostProcessorConfig(config.postprocessor);\n\tsetIfDefined(normalized, \"postprocessor\", postprocessor);\n\n\tconst keywords = normalizeKeywordConfig(config.keywords);\n\tsetIfDefined(normalized, \"keywords\", keywords);\n\n\tconst pages = normalizePageConfig(config.pages);\n\tsetIfDefined(normalized, \"pages\", pages);\n\n\tconst htmlOptions = normalizeHtmlOptions(config.htmlOptions);\n\tsetIfDefined(normalized, \"htmlOptions\", htmlOptions);\n\n\treturn normalized;\n}\n\n/**\n * Extract content from a single file (synchronous).\n *\n * **Usage Note**: For processing multiple files, prefer `batchExtractFilesSync()` which\n * provides better performance and memory management.\n *\n * @param filePath - Path to the file to extract (string). Can be absolute or relative.\n * @param mimeType - Optional MIME type hint for format detection. If null, MIME type is auto-detected from file extension or content.\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns ExtractionResult containing extracted content, metadata, tables, and optional chunks/images\n * @throws {Error} If file doesn't exist, cannot be accessed, or cannot be read\n * @throws {ParsingError} When document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { extractFileSync } from '@kreuzberg/node';\n *\n * // Basic usage\n * const result = extractFileSync('document.pdf');\n * console.log(result.content);\n *\n * // With OCR configuration\n * const config = {\n * ocr: {\n * backend: 'tesseract',\n * language: 'eng',\n * tesseractConfig: {\n * psm: 6,\n * enableTableDetection: true,\n * },\n * },\n * };\n * const result2 = extractFileSync('scanned.pdf', null, config);\n * ```\n */\nexport function extractFileSync(\n\tfilePath: string,\n\tmimeType: string | null = null,\n\tconfig: ExtractionConfigType | null = null,\n): ExtractionResult {\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResult = getBinding().extractFileSync(filePath, mimeType, normalizedConfig);\n\treturn convertResult(rawResult);\n}\n\n/**\n * Extract content from a single file (asynchronous).\n *\n * **Usage Note**: For processing multiple files, prefer `batchExtractFiles()` which\n * provides better performance and memory management.\n *\n * @param filePath - Path to the file to extract (string). Can be absolute or relative.\n * @param mimeType - Optional MIME type hint for format detection. If null, MIME type is auto-detected from file extension or content.\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Promise<ExtractionResult> containing extracted content, metadata, tables, and optional chunks/images\n * @throws {Error} If file doesn't exist, cannot be accessed, or cannot be read\n * @throws {ParsingError} When document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { extractFile } from '@kreuzberg/node';\n *\n * // Basic usage\n * const result = await extractFile('document.pdf');\n * console.log(result.content);\n *\n * // With chunking enabled\n * const config = {\n * chunking: {\n * maxChars: 1000,\n * maxOverlap: 200,\n * },\n * };\n * const result2 = await extractFile('long_document.pdf', null, config);\n * console.log(result2.chunks); // Array of text chunks\n * ```\n */\nexport async function extractFile(\n\tfilePath: string,\n\tmimeType: string | null = null,\n\tconfig: ExtractionConfigType | null = null,\n): Promise<ExtractionResult> {\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResult = await getBinding().extractFile(filePath, mimeType, normalizedConfig);\n\treturn convertResult(rawResult);\n}\n\n/**\n * Extract content from raw bytes (synchronous).\n *\n * **Usage Note**: For processing multiple byte arrays, prefer `batchExtractBytesSync()`\n * which provides better performance and memory management.\n *\n * @param data - File content as Uint8Array (Buffer will be converted)\n * @param mimeType - MIME type of the data (required for accurate format detection). Must be a valid MIME type string.\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns ExtractionResult containing extracted content, metadata, tables, and optional chunks/images\n * @throws {TypeError} When data is not a valid Uint8Array\n * @throws {Error} When file cannot be read or parsed\n * @throws {ParsingError} When document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { extractBytesSync } from '@kreuzberg/node';\n * import { readFileSync } from 'fs';\n *\n * const data = readFileSync('document.pdf');\n * const result = extractBytesSync(data, 'application/pdf');\n * console.log(result.content);\n * ```\n */\nexport function extractBytesSync(\n\tdata: Uint8Array,\n\tmimeType: string,\n\tconfig: ExtractionConfigType | null = null,\n): ExtractionResult {\n\tconst validated = assertUint8Array(data, \"data\");\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResult = getBinding().extractBytesSync(Buffer.from(validated), mimeType, normalizedConfig);\n\treturn convertResult(rawResult);\n}\n\n/**\n * Extract content from raw bytes (asynchronous).\n *\n * **Usage Note**: For processing multiple byte arrays, prefer `batchExtractBytes()`\n * which provides better performance and memory management.\n *\n * @param data - File content as Uint8Array (Buffer will be converted)\n * @param mimeType - MIME type of the data (required for accurate format detection). Must be a valid MIME type string.\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Promise<ExtractionResult> containing extracted content, metadata, tables, and optional chunks/images\n * @throws {TypeError} When data is not a valid Uint8Array\n * @throws {Error} When file cannot be read or parsed\n * @throws {ParsingError} When document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { extractBytes } from '@kreuzberg/node';\n * import { readFile } from 'fs/promises';\n *\n * const data = await readFile('document.pdf');\n * const result = await extractBytes(data, 'application/pdf');\n * console.log(result.content);\n * ```\n */\nexport async function extractBytes(\n\tdata: Uint8Array,\n\tmimeType: string,\n\tconfig: ExtractionConfigType | null = null,\n): Promise<ExtractionResult> {\n\tconst validated = assertUint8Array(data, \"data\");\n\t// biome-ignore lint/complexity/useLiteralKeys: required for environment variable access\n\tif (process.env[\"KREUZBERG_DEBUG_GUTEN\"] === \"1\") {\n\t\tconsole.log(\"[TypeScript] Debug input header:\", Array.from(validated.slice(0, 8)));\n\t}\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResult = await getBinding().extractBytes(Buffer.from(validated), mimeType, normalizedConfig);\n\treturn convertResult(rawResult);\n}\n\n/**\n * Extract content from multiple files in parallel (synchronous).\n *\n * **Recommended for**: Processing multiple documents efficiently with better\n * performance and memory management compared to individual `extractFileSync()` calls.\n *\n * **Benefits**:\n * - Parallel processing in Rust for maximum performance\n * - Optimized memory usage across all extractions\n * - More reliable for batch document processing\n *\n * @param paths - List of file paths to extract (absolute or relative paths)\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Array of ExtractionResults (one per file, in same order as input)\n * @throws {Error} If any file cannot be read or parsed\n * @throws {ParsingError} When any document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When any extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { batchExtractFilesSync } from '@kreuzberg/node';\n *\n * const files = ['doc1.pdf', 'doc2.docx', 'doc3.xlsx'];\n * const results = batchExtractFilesSync(files);\n *\n * results.forEach((result, i) => {\n * console.log(`File ${files[i]}: ${result.content.substring(0, 100)}...`);\n * });\n * ```\n */\nexport function batchExtractFilesSync(paths: string[], config: ExtractionConfigType | null = null): ExtractionResult[] {\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResults = getBinding().batchExtractFilesSync(paths, normalizedConfig);\n\treturn rawResults.map(convertResult);\n}\n\n/**\n * Extract content from multiple files in parallel (asynchronous).\n *\n * **Recommended for**: Processing multiple documents efficiently with better\n * performance and memory management compared to individual `extractFile()` calls.\n *\n * **Benefits**:\n * - Parallel processing in Rust for maximum performance\n * - Optimized memory usage across all extractions\n * - More reliable for batch document processing\n *\n * @param paths - List of file paths to extract (absolute or relative paths)\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Promise resolving to array of ExtractionResults (one per file, in same order as input)\n * @throws {Error} If any file cannot be read or parsed\n * @throws {ParsingError} When any document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When any extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { batchExtractFiles } from '@kreuzberg/node';\n *\n * const files = ['invoice1.pdf', 'invoice2.pdf', 'invoice3.pdf'];\n * const results = await batchExtractFiles(files, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n *\n * // Process all results\n * const totalAmount = results\n * .map(r => extractAmount(r.content))\n * .reduce((a, b) => a + b, 0);\n * ```\n */\nexport async function batchExtractFiles(\n\tpaths: string[],\n\tconfig: ExtractionConfigType | null = null,\n): Promise<ExtractionResult[]> {\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResults = await getBinding().batchExtractFiles(paths, normalizedConfig);\n\treturn rawResults.map(convertResult);\n}\n\n/**\n * Extract content from multiple byte arrays in parallel (synchronous).\n *\n * **Recommended for**: Processing multiple documents from memory efficiently with better\n * performance and memory management compared to individual `extractBytesSync()` calls.\n *\n * **Benefits**:\n * - Parallel processing in Rust for maximum performance\n * - Optimized memory usage across all extractions\n * - More reliable for batch document processing\n *\n * @param dataList - List of file contents as Uint8Arrays (must be same length as mimeTypes)\n * @param mimeTypes - List of MIME types (one per data item, required for accurate format detection)\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Array of ExtractionResults (one per data item, in same order as input)\n * @throws {TypeError} When dataList contains non-Uint8Array items or length mismatch with mimeTypes\n * @throws {Error} If any data cannot be read or parsed\n * @throws {ParsingError} When any document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When any extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { batchExtractBytesSync } from '@kreuzberg/node';\n * import { readFileSync } from 'fs';\n *\n * const files = ['doc1.pdf', 'doc2.docx', 'doc3.xlsx'];\n * const dataList = files.map(f => readFileSync(f));\n * const mimeTypes = ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'];\n *\n * const results = batchExtractBytesSync(dataList, mimeTypes);\n * results.forEach((result, i) => {\n * console.log(`File ${files[i]}: ${result.content.substring(0, 100)}...`);\n * });\n * ```\n */\nexport function batchExtractBytesSync(\n\tdataList: Uint8Array[],\n\tmimeTypes: string[],\n\tconfig: ExtractionConfigType | null = null,\n): ExtractionResult[] {\n\tconst buffers = assertUint8ArrayList(dataList, \"dataList\").map((data) => Buffer.from(data));\n\n\tif (buffers.length !== mimeTypes.length) {\n\t\tthrow new TypeError(\"dataList and mimeTypes must have the same length\");\n\t}\n\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResults = getBinding().batchExtractBytesSync(buffers, mimeTypes, normalizedConfig);\n\treturn rawResults.map(convertResult);\n}\n\n/**\n * Extract content from multiple byte arrays in parallel (asynchronous).\n *\n * **Recommended for**: Processing multiple documents from memory efficiently with better\n * performance and memory management compared to individual `extractBytes()` calls.\n *\n * **Benefits**:\n * - Parallel processing in Rust for maximum performance\n * - Optimized memory usage across all extractions\n * - More reliable for batch document processing\n *\n * @param dataList - List of file contents as Uint8Arrays (must be same length as mimeTypes)\n * @param mimeTypes - List of MIME types (one per data item, required for accurate format detection)\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Promise resolving to array of ExtractionResults (one per data item, in same order as input)\n * @throws {TypeError} When dataList contains non-Uint8Array items or length mismatch with mimeTypes\n * @throws {Error} If any data cannot be read or parsed\n * @throws {ParsingError} When any document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When any extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { batchExtractBytes } from '@kreuzberg/node';\n * import { readFile } from 'fs/promises';\n *\n * const files = ['invoice1.pdf', 'invoice2.pdf', 'invoice3.pdf'];\n * const dataList = await Promise.all(files.map(f => readFile(f)));\n * const mimeTypes = files.map(() => 'application/pdf');\n *\n * const results = await batchExtractBytes(dataList, mimeTypes, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n *\n * // Process all results\n * const totalAmount = results\n * .map(r => extractAmount(r.content))\n * .reduce((a, b) => a + b, 0);\n * ```\n */\nexport async function batchExtractBytes(\n\tdataList: Uint8Array[],\n\tmimeTypes: string[],\n\tconfig: ExtractionConfigType | null = null,\n): Promise<ExtractionResult[]> {\n\tconst buffers = assertUint8ArrayList(dataList, \"dataList\").map((data) => Buffer.from(data));\n\n\tif (buffers.length !== mimeTypes.length) {\n\t\tthrow new TypeError(\"dataList and mimeTypes must have the same length\");\n\t}\n\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResults = await getBinding().batchExtractBytes(buffers, mimeTypes, normalizedConfig);\n\treturn rawResults.map(convertResult);\n}\n\n/**\n * Register a custom postprocessor.\n *\n * **IMPORTANT**: Custom processors only work with **async extraction functions**:\n * - ✅ `extractFile()`, `extractBytes()`, `batchExtractFiles()`, `batchExtractBytes()`\n * - ❌ `extractFileSync()`, `extractBytesSync()`, etc. (will skip custom processors)\n *\n * This limitation exists because sync extraction blocks the Node.js event loop,\n * preventing JavaScript callbacks from executing. For v4.0, use async extraction\n * when you need custom processors.\n *\n * @param processor - PostProcessorProtocol implementation with name(), process(), and optional processingStage()\n * @throws {Error} If processor is missing required methods (name or process)\n * @throws {Error} If processor name is empty string\n * @throws {Error} If a processor with the same name is already registered\n *\n * @example\n * ```typescript\n * import { registerPostProcessor, extractFile, ExtractionResult } from '@kreuzberg/node';\n *\n * class MyProcessor implements PostProcessorProtocol {\n * name(): string {\n * return 'my_processor';\n * }\n *\n * process(result: ExtractionResult): ExtractionResult {\n * result.metadata.customField = 'custom_value';\n * return result;\n * }\n *\n * processingStage(): 'early' | 'middle' | 'late' {\n * return 'middle';\n * }\n * }\n *\n * registerPostProcessor(new MyProcessor());\n *\n * // Use async extraction (required for custom processors)\n * const result = await extractFile('document.pdf');\n * console.log(result.metadata.customField); // 'custom_value'\n * ```\n */\nexport function registerPostProcessor(processor: PostProcessorProtocol): void {\n\tconst binding = getBinding();\n\n\tconst wrappedProcessor = {\n\t\tname: processor.name.bind(processor),\n\t\tprocessingStage: processor.processingStage?.bind(processor),\n\t\tasync process(...args: unknown[]): Promise<string> {\n\t\t\tconst wrappedValue = args[0] as unknown[];\n\t\t\tconst jsonString = wrappedValue[0] as string;\n\n\t\t\tconst wireResult = JSON.parse(jsonString) as {\n\t\t\t\tcontent: string;\n\t\t\t\tmime_type: string;\n\t\t\t\tmetadata: string | Record<string, unknown>;\n\t\t\t\ttables?: unknown[];\n\t\t\t\tdetected_languages?: string[];\n\t\t\t\tchunks?: unknown[];\n\t\t\t\timages?: unknown[];\n\t\t\t};\n\n\t\t\tconst result: ExtractionResult = {\n\t\t\t\tcontent: wireResult.content,\n\t\t\t\tmimeType: wireResult.mime_type,\n\t\t\t\tmetadata: typeof wireResult.metadata === \"string\" ? JSON.parse(wireResult.metadata) : wireResult.metadata,\n\t\t\t\ttables: (wireResult.tables || []) as Table[],\n\t\t\t\tdetectedLanguages: wireResult.detected_languages ?? null,\n\t\t\t\tchunks: (wireResult.chunks as Chunk[] | null | undefined) ?? null,\n\t\t\t\timages: (wireResult.images as ExtractedImage[] | null | undefined) ?? null,\n\t\t\t};\n\n\t\t\tconst updated = await processor.process(result);\n\n\t\t\tconst wireUpdated = {\n\t\t\t\tcontent: updated.content,\n\t\t\t\tmime_type: updated.mimeType,\n\t\t\t\tmetadata: updated.metadata,\n\t\t\t\ttables: updated.tables,\n\t\t\t\tdetected_languages: updated.detectedLanguages,\n\t\t\t\tchunks: updated.chunks,\n\t\t\t\timages: updated.images,\n\t\t\t};\n\n\t\t\treturn JSON.stringify(wireUpdated);\n\t\t},\n\t};\n\n\tObject.defineProperty(wrappedProcessor, \"__original\", {\n\t\tvalue: processor,\n\t\tenumerable: false,\n\t});\n\n\tconst stage = processor.processingStage?.() ?? \"middle\";\n\tObject.defineProperty(wrappedProcessor, \"__stage\", {\n\t\tvalue: stage,\n\t\tenumerable: false,\n\t});\n\n\tbinding.registerPostProcessor(wrappedProcessor);\n}\n\n/**\n * Unregister a postprocessor by name.\n *\n * Removes a previously registered postprocessor from the registry.\n * If the processor doesn't exist, this is a no-op (does not throw).\n *\n * @param name - Name of the processor to unregister (case-sensitive)\n *\n * @example\n * ```typescript\n * import { unregisterPostProcessor } from '@kreuzberg/node';\n *\n * unregisterPostProcessor('my_processor');\n * ```\n */\nexport function unregisterPostProcessor(name: string): void {\n\tconst binding = getBinding();\n\tbinding.unregisterPostProcessor(name);\n}\n\n/**\n * Clear all registered postprocessors.\n *\n * Removes all postprocessors from the registry. Useful for test cleanup or resetting state.\n * If no postprocessors are registered, this is a no-op.\n *\n * @example\n * ```typescript\n * import { clearPostProcessors } from '@kreuzberg/node';\n *\n * clearPostProcessors();\n * ```\n */\nexport function clearPostProcessors(): void {\n\tconst binding = getBinding();\n\tbinding.clearPostProcessors();\n}\n\n/**\n * List all registered post-processors.\n *\n * Returns the names of all currently registered post-processors (both built-in and custom).\n *\n * @returns Array of post-processor names (empty array if none registered)\n *\n * @example\n * ```typescript\n * import { listPostProcessors } from '@kreuzberg/node';\n *\n * const names = listPostProcessors();\n * console.log('Registered post-processors:', names);\n * ```\n */\nexport function listPostProcessors(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listPostProcessors();\n}\n\n/**\n * Register a custom validator.\n *\n * Validators check extraction results for quality, completeness, or correctness.\n * Unlike post-processors, validator errors **fail fast** - if a validator throws an error,\n * the extraction fails immediately.\n *\n * @param validator - ValidatorProtocol implementation with name(), validate(), and optional priority()/shouldValidate()\n * @throws {Error} If validator is missing required methods (name or validate)\n * @throws {Error} If validator name is empty string\n * @throws {Error} If a validator with the same name is already registered\n *\n * @example\n * ```typescript\n * import { registerValidator } from '@kreuzberg/node';\n *\n * class MinLengthValidator implements ValidatorProtocol {\n * name(): string {\n * return 'min_length_validator';\n * }\n *\n * priority(): number {\n * return 100; // Run early\n * }\n *\n * validate(result: ExtractionResult): void {\n * if (result.content.length < 100) {\n * throw new Error('Content too short: minimum 100 characters required');\n * }\n * }\n * }\n *\n * registerValidator(new MinLengthValidator());\n * ```\n */\nexport function registerValidator(validator: ValidatorProtocol): void {\n\tconst binding = getBinding();\n\n\tconst wrappedValidator = {\n\t\tname: validator.name.bind(validator),\n\t\tpriority: validator.priority?.bind(validator),\n\t\tasync validate(...args: unknown[]): Promise<string> {\n\t\t\tconst jsonString = args[0] as string;\n\n\t\t\tif (!jsonString || jsonString === \"undefined\") {\n\t\t\t\tthrow new Error(\"Validator received invalid JSON string\");\n\t\t\t}\n\n\t\t\tconst wireResult = JSON.parse(jsonString);\n\t\t\tconst result: ExtractionResult = {\n\t\t\t\tcontent: wireResult.content,\n\t\t\t\tmimeType: wireResult.mime_type,\n\t\t\t\tmetadata: typeof wireResult.metadata === \"string\" ? JSON.parse(wireResult.metadata) : wireResult.metadata,\n\t\t\t\ttables: wireResult.tables || [],\n\t\t\t\tdetectedLanguages: wireResult.detected_languages,\n\t\t\t\tchunks: wireResult.chunks,\n\t\t\t\timages: wireResult.images ?? null,\n\t\t\t};\n\n\t\t\tawait Promise.resolve(validator.validate(result));\n\t\t\treturn \"\";\n\t\t},\n\t};\n\n\tbinding.registerValidator(wrappedValidator);\n}\n\n/**\n * Unregister a validator by name.\n *\n * Removes a previously registered validator from the global registry.\n * If the validator doesn't exist, this is a no-op (does not throw).\n *\n * @param name - Validator name to unregister (case-sensitive)\n *\n * @example\n * ```typescript\n * import { unregisterValidator } from '@kreuzberg/node';\n *\n * unregisterValidator('min_length_validator');\n * ```\n */\nexport function unregisterValidator(name: string): void {\n\tconst binding = getBinding();\n\tbinding.unregisterValidator(name);\n}\n\n/**\n * Clear all registered validators.\n *\n * Removes all validators from the global registry. Useful for test cleanup\n * or resetting state.\n *\n * @example\n * ```typescript\n * import { clearValidators } from '@kreuzberg/node';\n *\n * clearValidators();\n * ```\n */\nexport function clearValidators(): void {\n\tconst binding = getBinding();\n\tbinding.clearValidators();\n}\n\n/**\n * List all registered validators.\n *\n * Returns the names of all currently registered validators (both built-in and custom).\n *\n * @returns Array of validator names (empty array if none registered)\n *\n * @example\n * ```typescript\n * import { listValidators } from '@kreuzberg/node';\n *\n * const names = listValidators();\n * console.log('Registered validators:', names);\n * ```\n */\nexport function listValidators(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listValidators();\n}\n\n/**\n * Register a custom OCR backend.\n *\n * This function registers a JavaScript OCR backend that will be used by Kreuzberg's\n * extraction pipeline when OCR is enabled. The backend must implement the\n * {@link OcrBackendProtocol} interface.\n *\n * ## Usage\n *\n * 1. Create a class implementing {@link OcrBackendProtocol}\n * 2. Call `initialize()` on your backend instance (if needed)\n * 3. Register the backend with `registerOcrBackend()`\n * 4. Use the backend name in extraction config\n *\n * ## Thread Safety\n *\n * The registered backend must be thread-safe as it may be called concurrently\n * from multiple Rust async tasks. Ensure your implementation handles concurrent\n * calls properly.\n *\n * @param backend - OcrBackendProtocol implementation with name(), supportedLanguages(), and processImage()\n * @throws {Error} If backend is missing required methods (name, supportedLanguages, or processImage)\n * @throws {Error} If backend name is empty string or contains invalid characters\n * @throws {Error} If a backend with the same name is already registered\n * @throws {Error} If registration fails due to FFI issues\n *\n * @example\n * ```typescript\n * import { GutenOcrBackend } from '@kreuzberg/node/ocr/guten-ocr';\n * import { registerOcrBackend, extractFile } from '@kreuzberg/node';\n *\n * // Create and initialize backend\n * const backend = new GutenOcrBackend();\n * await backend.initialize();\n *\n * // Register with Kreuzberg\n * registerOcrBackend(backend);\n *\n * // Use in extraction\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'guten-ocr', language: 'en' }\n * });\n * console.log(result.content);\n * ```\n *\n * @example\n * ```typescript\n * // Custom OCR backend implementation\n * class MyOcrBackend implements OcrBackendProtocol {\n * name(): string {\n * return 'my-ocr';\n * }\n *\n * supportedLanguages(): string[] {\n * return ['en', 'de', 'fr'];\n * }\n *\n * async processImage(imageBytes: Uint8Array, language: string) {\n * const text = await myCustomOcrEngine(imageBytes, language);\n * return {\n * content: text,\n * mime_type: 'text/plain',\n * metadata: { confidence: 0.95, language },\n * tables: []\n * };\n * }\n * }\n *\n * registerOcrBackend(new MyOcrBackend());\n * ```\n */\ntype OcrProcessPayload = Buffer | string;\ntype OcrProcessTuple = [OcrProcessPayload, string];\ntype NestedOcrProcessTuple = [OcrProcessTuple];\n\nfunction isOcrProcessTuple(value: unknown): value is OcrProcessTuple {\n\treturn (\n\t\tArray.isArray(value) &&\n\t\tvalue.length === 2 &&\n\t\ttypeof value[1] === \"string\" &&\n\t\t(typeof value[0] === \"string\" || Buffer.isBuffer(value[0]) || value[0] instanceof Uint8Array)\n\t);\n}\n\nfunction isNestedOcrProcessTuple(value: unknown): value is NestedOcrProcessTuple {\n\treturn Array.isArray(value) && value.length === 1 && isOcrProcessTuple(value[0]);\n}\n\nfunction describePayload(value: OcrProcessPayload) {\n\tif (typeof value === \"string\") {\n\t\treturn { ctor: \"String\", length: value.length };\n\t}\n\n\treturn { ctor: value.constructor?.name ?? \"Buffer\", length: value.length };\n}\n\nexport function registerOcrBackend(backend: OcrBackendProtocol): void {\n\tconst binding = getBinding();\n\n\tconst wrappedBackend = {\n\t\tname: backend.name.bind(backend),\n\t\tsupportedLanguages: backend.supportedLanguages.bind(backend),\n\t\tasync processImage(\n\t\t\t...processArgs: [OcrProcessPayload | OcrProcessTuple | NestedOcrProcessTuple, string?]\n\t\t): Promise<string> {\n\t\t\tconst [imagePayload, maybeLanguage] = processArgs;\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for environment variable access\n\t\t\tif (process.env[\"KREUZBERG_DEBUG_GUTEN\"] === \"1\") {\n\t\t\t\tconsole.log(\"[registerOcrBackend] JS arguments\", { length: processArgs.length });\n\t\t\t\tconsole.log(\"[registerOcrBackend] Raw args\", {\n\t\t\t\t\timagePayloadType: Array.isArray(imagePayload) ? \"tuple\" : typeof imagePayload,\n\t\t\t\t\tmaybeLanguageType: typeof maybeLanguage,\n\t\t\t\t\tmetadata: Array.isArray(imagePayload) ? { tupleLength: imagePayload.length } : describePayload(imagePayload),\n\t\t\t\t});\n\t\t\t}\n\n\t\t\tlet rawBytes: OcrProcessPayload;\n\t\t\tlet language = maybeLanguage;\n\n\t\t\tif (isNestedOcrProcessTuple(imagePayload)) {\n\t\t\t\t[rawBytes, language] = imagePayload[0];\n\t\t\t} else if (isOcrProcessTuple(imagePayload)) {\n\t\t\t\t[rawBytes, language] = imagePayload;\n\t\t\t} else {\n\t\t\t\trawBytes = imagePayload;\n\t\t\t}\n\n\t\t\tif (typeof language !== \"string\") {\n\t\t\t\tthrow new Error(\"OCR backend did not receive a language parameter\");\n\t\t\t}\n\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for environment variable access\n\t\t\tif (process.env[\"KREUZBERG_DEBUG_GUTEN\"] === \"1\") {\n\t\t\t\tconst length = typeof rawBytes === \"string\" ? rawBytes.length : rawBytes.length;\n\t\t\t\tconsole.log(\n\t\t\t\t\t\"[registerOcrBackend] Received payload\",\n\t\t\t\t\tArray.isArray(imagePayload) ? \"tuple\" : typeof rawBytes,\n\t\t\t\t\t\"ctor\",\n\t\t\t\t\tdescribePayload(rawBytes).ctor,\n\t\t\t\t\t\"length\",\n\t\t\t\t\tlength,\n\t\t\t\t);\n\t\t\t}\n\n\t\t\tconst buffer = typeof rawBytes === \"string\" ? Buffer.from(rawBytes, \"base64\") : Buffer.from(rawBytes);\n\t\t\tconst result = await backend.processImage(new Uint8Array(buffer), language);\n\n\t\t\treturn JSON.stringify(result);\n\t\t},\n\t};\n\n\tbinding.registerOcrBackend(wrappedBackend);\n}\n\n/**\n * List all registered OCR backends.\n *\n * Returns an array of names of all currently registered OCR backends,\n * including built-in backends like \"tesseract\".\n *\n * @returns Array of OCR backend names (empty array if none registered)\n *\n * @example\n * ```typescript\n * import { listOcrBackends } from '@kreuzberg/node';\n *\n * const backends = listOcrBackends();\n * console.log(backends); // ['tesseract', 'my-custom-backend', ...]\n * ```\n */\nexport function listOcrBackends(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listOcrBackends();\n}\n\n/**\n * Unregister an OCR backend by name.\n *\n * Removes the specified OCR backend from the registry. If the backend doesn't exist,\n * this operation is a no-op (does not throw an error).\n *\n * @param name - Name of the OCR backend to unregister\n *\n * @example\n * ```typescript\n * import { unregisterOcrBackend } from '@kreuzberg/node';\n *\n * // Unregister a custom backend\n * unregisterOcrBackend('my-custom-ocr');\n * ```\n */\nexport function unregisterOcrBackend(name: string): void {\n\tconst binding = getBinding();\n\tbinding.unregisterOcrBackend(name);\n}\n\n/**\n * Clear all registered OCR backends.\n *\n * Removes all OCR backends from the registry, including built-in backends.\n * Use with caution as this will make OCR functionality unavailable until\n * backends are re-registered. If no backends are registered, this is a no-op.\n *\n * @example\n * ```typescript\n * import { clearOcrBackends } from '@kreuzberg/node';\n *\n * clearOcrBackends();\n * ```\n */\nexport function clearOcrBackends(): void {\n\tconst binding = getBinding();\n\tbinding.clearOcrBackends();\n}\n\n/**\n * List all registered document extractors.\n *\n * Returns an array of names of all currently registered document extractors,\n * including built-in extractors for PDF, Office documents, images, etc.\n *\n * @returns Array of document extractor names (empty array if none registered)\n *\n * @example\n * ```typescript\n * import { listDocumentExtractors } from '@kreuzberg/node';\n *\n * const extractors = listDocumentExtractors();\n * console.log(extractors); // ['PDFExtractor', 'ImageExtractor', ...]\n * ```\n */\nexport function listDocumentExtractors(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listDocumentExtractors();\n}\n\n/**\n * Unregister a document extractor by name.\n *\n * Removes the specified document extractor from the registry. If the extractor\n * doesn't exist, this operation is a no-op (does not throw an error).\n *\n * @param name - Name of the document extractor to unregister\n *\n * @example\n * ```typescript\n * import { unregisterDocumentExtractor } from '@kreuzberg/node';\n *\n * // Unregister a custom extractor\n * unregisterDocumentExtractor('MyCustomExtractor');\n * ```\n */\nexport function unregisterDocumentExtractor(name: string): void {\n\tconst binding = getBinding();\n\tbinding.unregisterDocumentExtractor(name);\n}\n\n/**\n * Clear all registered document extractors.\n *\n * Removes all document extractors from the registry, including built-in extractors.\n * Use with caution as this will make document extraction unavailable until\n * extractors are re-registered.\n *\n * @example\n * ```typescript\n * import { clearDocumentExtractors } from '@kreuzberg/node';\n *\n * clearDocumentExtractors();\n * ```\n */\nexport function clearDocumentExtractors(): void {\n\tconst binding = getBinding();\n\tbinding.clearDocumentExtractors();\n}\n\n/**\n * ExtractionConfig namespace with static methods for loading configuration from files.\n *\n * Provides a factory method to load extraction configuration from TOML, YAML, or JSON files.\n * The file format is automatically detected based on the file extension.\n *\n * @example\n * ```typescript\n * import { ExtractionConfig, extractFile } from '@kreuzberg/node';\n *\n * // Load configuration from file\n * const config = ExtractionConfig.fromFile('config.toml');\n *\n * // Use with extraction\n * const result = await extractFile('document.pdf', null, config);\n * ```\n */\nexport const ExtractionConfig = {\n\t/**\n\t * Load extraction configuration from a file.\n\t *\n\t * Automatically detects the file format based on extension:\n\t * - `.toml` - TOML format\n\t * - `.yaml` - YAML format\n\t * - `.json` - JSON format\n\t *\n\t * @param filePath - Path to the configuration file (absolute or relative)\n\t * @returns ExtractionConfig object loaded from the file\n\t *\n\t * @throws {Error} If file does not exist or is not accessible\n\t * @throws {Error} If file content is not valid TOML/YAML/JSON\n\t * @throws {Error} If configuration structure is invalid\n\t * @throws {Error} If file extension is not supported\n\t *\n\t * @example\n\t * ```typescript\n\t * import { ExtractionConfig } from '@kreuzberg/node';\n\t *\n\t * // Load from TOML file\n\t * const config1 = ExtractionConfig.fromFile('kreuzberg.toml');\n\t *\n\t * // Load from YAML file\n\t * const config2 = ExtractionConfig.fromFile('./config.yaml');\n\t *\n\t * // Load from JSON file\n\t * const config3 = ExtractionConfig.fromFile('./config.json');\n\t * ```\n\t */\n\tfromFile(filePath: string): ExtractionConfigType {\n\t\tconst binding = getBinding();\n\t\treturn binding.loadExtractionConfigFromFile(filePath);\n\t},\n\n\t/**\n\t * Discover and load configuration from current or parent directories.\n\t *\n\t * Searches for a `kreuzberg.toml` file starting from the current working directory\n\t * and traversing up the directory tree. Returns the first configuration file found.\n\t *\n\t * @returns ExtractionConfig object if found, or null if no configuration file exists\n\t *\n\t * @example\n\t * ```typescript\n\t * import { ExtractionConfig } from '@kreuzberg/node';\n\t *\n\t * // Try to find config in current or parent directories\n\t * const config = ExtractionConfig.discover();\n\t * if (config) {\n\t * console.log('Found configuration');\n\t * // Use config for extraction\n\t * } else {\n\t * console.log('No configuration file found, using defaults');\n\t * }\n\t * ```\n\t */\n\tdiscover(): ExtractionConfigType | null {\n\t\tconst binding = getBinding();\n\t\treturn binding.discoverExtractionConfig();\n\t},\n};\n\n/**\n * Detect MIME type from raw bytes.\n *\n * Uses content inspection (magic bytes) to determine MIME type.\n * This is more accurate than extension-based detection but requires\n * reading the file content.\n *\n * @param bytes - Raw file content as Buffer\n * @returns The detected MIME type string\n *\n * @throws {Error} If MIME type cannot be determined from content\n *\n * @example\n * ```typescript\n * import { detectMimeType } from '@kreuzberg/node';\n * import * as fs from 'fs';\n *\n * // Read file content\n * const content = fs.readFileSync('document.pdf');\n *\n * // Detect MIME type from bytes\n * const mimeType = detectMimeType(content);\n * console.log(mimeType); // 'application/pdf'\n * ```\n */\nexport function detectMimeType(bytes: Buffer): string {\n\tconst binding = getBinding();\n\treturn binding.detectMimeTypeFromBytes(bytes);\n}\n\n/**\n * Detect MIME type from a file path.\n *\n * Uses file extension to determine MIME type. Falls back to `mime_guess` crate\n * if extension-based detection fails.\n *\n * @param path - Path to the file (string)\n * @returns The detected MIME type string\n *\n * @throws {Error} If MIME type cannot be determined from path/extension\n * @throws {Error} If extension is unknown\n *\n * @example\n * ```typescript\n * import { detectMimeTypeFromPath } from '@kreuzberg/node';\n *\n * // Detect from existing file\n * const mimeType = detectMimeTypeFromPath('document.pdf');\n * console.log(mimeType); // 'application/pdf'\n *\n * const mimeType2 = detectMimeTypeFromPath('document.docx');\n * console.log(mimeType2); // 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'\n * ```\n */\nexport function detectMimeTypeFromPath(path: string): string {\n\tconst binding = getBinding();\n\treturn binding.detectMimeTypeFromPath(path);\n}\n\n/**\n * Validate that a MIME type is supported by Kreuzberg.\n *\n * Checks if a MIME type is in the list of supported formats. Note that any\n * `image/*` MIME type is automatically considered valid.\n *\n * @param mimeType - The MIME type to validate (string)\n * @returns The validated MIME type (may be normalized)\n *\n * @throws {Error} If the MIME type is not supported\n *\n * @example\n * ```typescript\n * import { validateMimeType } from '@kreuzberg/node';\n *\n * // Validate supported type\n * const validated = validateMimeType('application/pdf');\n * console.log(validated); // 'application/pdf'\n *\n * // Validate custom image type\n * const validated2 = validateMimeType('image/custom-format');\n * console.log(validated2); // 'image/custom-format' (any image/* is valid)\n *\n * // Validate unsupported type (throws error)\n * try {\n * validateMimeType('video/mp4');\n * } catch (err) {\n * console.error(err); // Error: Unsupported format: video/mp4\n * }\n * ```\n */\nexport function validateMimeType(mimeType: string): string {\n\tconst binding = getBinding();\n\treturn binding.validateMimeType(mimeType);\n}\n\n/**\n * Get file extensions for a given MIME type.\n *\n * Returns an array of file extensions commonly associated with the specified\n * MIME type. For example, 'application/pdf' returns ['pdf'].\n *\n * @param mimeType - The MIME type to look up (e.g., 'application/pdf', 'image/jpeg')\n * @returns Array of file extensions (without leading dots)\n *\n * @throws {Error} If the MIME type is not recognized or supported\n *\n * @example\n * ```typescript\n * import { getExtensionsForMime } from '@kreuzberg/node';\n *\n * // Get extensions for PDF\n * const pdfExts = getExtensionsForMime('application/pdf');\n * console.log(pdfExts); // ['pdf']\n *\n * // Get extensions for JPEG\n * const jpegExts = getExtensionsForMime('image/jpeg');\n * console.log(jpegExts); // ['jpg', 'jpeg']\n * ```\n */\nexport function getExtensionsForMime(mimeType: string): string[] {\n\tconst binding = getBinding();\n\treturn binding.getExtensionsForMime(mimeType);\n}\n\n/**\n * Embedding preset configuration.\n *\n * Contains all settings for a specific embedding model preset.\n */\nexport interface EmbeddingPreset {\n\t/** Name of the preset (e.g., \"fast\", \"balanced\", \"quality\", \"multilingual\") */\n\tname: string;\n\t/** Recommended chunk size in characters */\n\tchunkSize: number;\n\t/** Recommended overlap in characters */\n\toverlap: number;\n\t/** Model identifier (e.g., \"AllMiniLML6V2Q\", \"BGEBaseENV15\") */\n\tmodelName: string;\n\t/** Embedding vector dimensions */\n\tdimensions: number;\n\t/** Human-readable description of the preset */\n\tdescription: string;\n}\n\n/**\n * List all available embedding preset names.\n *\n * Returns an array of preset names that can be used with `getEmbeddingPreset`.\n *\n * @returns Array of 4 preset names: [\"fast\", \"balanced\", \"quality\", \"multilingual\"]\n *\n * @example\n * ```typescript\n * import { listEmbeddingPresets } from '@kreuzberg/node';\n *\n * const presets = listEmbeddingPresets();\n * console.log(presets); // ['fast', 'balanced', 'quality', 'multilingual']\n * ```\n */\nexport function listEmbeddingPresets(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listEmbeddingPresets();\n}\n\n/**\n * Get a specific embedding preset by name.\n *\n * Returns a preset configuration object, or null if the preset name is not found.\n *\n * @param name - The preset name (case-sensitive)\n * @returns An `EmbeddingPreset` object or `null` if not found\n *\n * @example\n * ```typescript\n * import { getEmbeddingPreset } from '@kreuzberg/node';\n *\n * const preset = getEmbeddingPreset('balanced');\n * if (preset) {\n * console.log(`Model: ${preset.modelName}, Dims: ${preset.dimensions}`);\n * // Model: BGEBaseENV15, Dims: 768\n * }\n * ```\n */\nexport function getEmbeddingPreset(name: string): EmbeddingPreset | null {\n\tconst binding = getBinding();\n\tconst result = binding.getEmbeddingPreset(name);\n\treturn result as unknown as EmbeddingPreset | null;\n}\n\n/**\n * Get the error code for the last FFI error.\n *\n * Returns the FFI error code as an integer. This is useful for programmatic error handling\n * and distinguishing between different types of failures in native code.\n *\n * Error codes:\n * - 0: Success (no error)\n * - 1: GenericError\n * - 2: Panic\n * - 3: InvalidArgument\n * - 4: IoError\n * - 5: ParsingError\n * - 6: OcrError\n * - 7: MissingDependency\n *\n * @returns The integer error code\n *\n * @example\n * ```typescript\n * import { extractFile, getLastErrorCode, ErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('Native code panic detected');\n * }\n * }\n * ```\n */\nexport function getLastErrorCode(): number {\n\tconst binding = getBinding();\n\treturn binding.getLastErrorCode();\n}\n\n/**\n * Get panic context information if the last error was a panic.\n *\n * Returns detailed information about a panic in native code, or null if the last error was not a panic.\n * This provides debugging information when native code panics.\n *\n * @returns A `PanicContext` object with file, line, function, message, and timestamp_secs, or null if no panic context is available\n *\n * @example\n * ```typescript\n * import { extractFile, getLastPanicContext } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const context = getLastPanicContext();\n * if (context) {\n * console.error(`Panic at ${context.file}:${context.line}`);\n * console.error(`In function: ${context.function}`);\n * console.error(`Message: ${context.message}`);\n * }\n * }\n * ```\n */\nexport function getLastPanicContext(): PanicContext | null {\n\tconst binding = getBinding();\n\tconst result = binding.getLastPanicContext();\n\treturn result as unknown as PanicContext | null;\n}\n\nexport const __version__ = \"4.0.0-rc.14\";\n"],"mappings":"AA+CA,SAAS,qBAAqB;AAwF9B;AAAA,EACC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,OACM;AACP,SAAS,uBAAuB;AAChC,cAAc;AAEd,IAAI,UAAgC;AACpC,IAAI,qBAAqB;AAEzB,SAAS,yBAAyB,OAAuB;AACxD,QAAM,YAAsB,CAAC;AAC7B,MAAI,SAAS;AAEb,MAAI,iBAAiB,OAAO;AAC3B,aAAS,MAAM,WAAW,MAAM,SAAS;AACzC,QAAI,UAAU,KAAK,MAAM,GAAG;AAC3B,gBAAU;AAAA,QACT;AAAA,MACD;AAAA,IACD;AACA,WAAO,IAAI;AAAA,MACV;AAAA,QACC;AAAA,QACA,UAAU,SAAS,UAAU,KAAK,GAAG,IAAI;AAAA,QACzC;AAAA,QACA,qBAAqB,MAAM;AAAA,MAC5B,EACE,OAAO,OAAO,EACd,KAAK,GAAG;AAAA,MACV,EAAE,OAAO,MAAM;AAAA,IAChB;AAAA,EACD;AAEA,SAAO,IAAI;AAAA,IACV;AAAA,MACC;AAAA,MACA;AAAA,MACA,qBAAqB,OAAO,KAAK,CAAC;AAAA,IACnC,EAAE,KAAK,GAAG;AAAA,EACX;AACD;AAEA,SAAS,iBAAiB,OAAgB,MAA0B;AACnE,MAAI,EAAE,iBAAiB,aAAa;AACnC,UAAM,IAAI,UAAU,GAAG,IAAI,uBAAuB;AAAA,EACnD;AACA,SAAO;AACR;AAEA,SAAS,qBAAqB,QAAiB,MAA4B;AAC1E,MAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC3B,UAAM,IAAI,UAAU,GAAG,IAAI,iCAAiC;AAAA,EAC7D;AAEA,QAAM,QAAQ;AACd,SAAO,MAAM,IAAI,CAAC,OAAO,UAAU;AAClC,QAAI;AACH,aAAO,iBAAiB,OAAO,GAAG,IAAI,IAAI,KAAK,GAAG;AAAA,IACnD,QAAQ;AACP,YAAM,IAAI,UAAU,GAAG,IAAI,IAAI,KAAK,wBAAwB;AAAA,IAC7D;AAAA,EACD,CAAC;AACF;AAKO,SAAS,qBAAqB,MAAqB;AACzD,YAAU;AACV,uBAAqB;AACtB;AAKO,SAAS,yBAA+B;AAC9C,YAAU;AACV,uBAAqB;AACtB;AAEA,SAAS,oBAAmC;AAC3C,QAAM,eACL,OAAO,YAAY,cAAe,UAAwC,cAAc,YAAY,GAAG;AAExG,MAAI,CAAC,cAAc;AAClB,UAAM,IAAI,MAAM,kEAAkE;AAAA,EACnF;AAEA,QAAM,eAAe,aAAa,aAAa;AAG/C,MAAI,OAAO,iBAAiB,YAAY,iBAAiB,MAAM;AAC9D,UAAM,IAAI;AAAA,MACT;AAAA,IACD;AAAA,EACD;AAEA,QAAM,SAAS;AAGf,QAAM,kBAAkB;AAAA,IACvB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AAEA,aAAW,UAAU,iBAAiB;AACrC,QAAI,OAAO,OAAO,MAAM,MAAM,YAAY;AACzC,YAAM,IAAI;AAAA,QACT,8CAA8C,MAAM;AAAA,MAErD;AAAA,IACD;AAAA,EACD;AAEA,SAAO;AACR;AAEA,SAAS,aAA4B;AACpC,MAAI,oBAAoB;AACvB,QAAI,YAAY,MAAM;AACrB,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,WAAO;AAAA,EACR;AAEA,MAAI;AACH,QAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,gBAAU,kBAAkB;AAC5B,2BAAqB;AACrB,aAAO;AAAA,IACR;AAAA,EACD,SAAS,OAAO;AACf,yBAAqB;AACrB,UAAM,yBAAyB,KAAK;AAAA,EACrC;AAEA,QAAM,IAAI;AAAA,IACT;AAAA,EAED;AACD;AAEA,SAAS,cAAc,aAA8C;AACpE,MAAI;AACH,UAAM,SAAS,KAAK,MAAM,WAAW;AACrC,QAAI,OAAO,WAAW,YAAY,WAAW,MAAM;AAClD,aAAO;AAAA,IACR;AACA,WAAO,CAAC;AAAA,EACT,QAAQ;AACP,WAAO,CAAC;AAAA,EACT;AACD;AAEA,SAAS,iBAAiB,OAA4B;AACrD,MAAI,iBAAiB,YAAY;AAChC,WAAO;AAAA,EACR;AACA,MAAI,OAAO,WAAW,eAAe,iBAAiB,QAAQ;AAC7D,WAAO,IAAI,WAAW,KAAK;AAAA,EAC5B;AACA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,WAAO,IAAI,WAAW,KAAK;AAAA,EAC5B;AACA,SAAO,IAAI,WAAW;AACvB;AAEA,SAAS,aAAa,UAA0B;AAC/C,MAAI,CAAC,YAAY,OAAO,aAAa,UAAU;AAC9C,WAAO;AAAA,MACN,SAAS;AAAA,MACT,UAAU;AAAA,QACT,WAAW;AAAA,QACX,SAAS;AAAA,QACT,YAAY;AAAA,QACZ,YAAY;AAAA,QACZ,aAAa;AAAA,MACd;AAAA,MACA,WAAW;AAAA,IACZ;AAAA,EACD;AAEA,QAAM,QAAQ;AAEd,QAAM,WAAY,MAAM,UAAU,KAAiC,CAAC;AACpE,SAAO;AAAA;AAAA,IAEN,SAAU,MAAM,SAAS,KAAgB;AAAA;AAAA,IAEzC,WAAY,MAAM,WAAW,KAAyB;AAAA,IACtD,UAAU;AAAA;AAAA,MAET,WAAa,SAAS,YAAY,KAAK,SAAS,WAAW,KAAiB;AAAA;AAAA,MAE5E,SAAW,SAAS,UAAU,KAAK,SAAS,SAAS,KAAiB;AAAA;AAAA,MAEtE,YAAc,SAAS,aAAa,KAAK,SAAS,YAAY,KAAwB;AAAA;AAAA,MAEtF,YAAc,SAAS,aAAa,KAAK,SAAS,YAAY,KAAiB;AAAA;AAAA,MAE/E,aAAe,SAAS,cAAc,KAAK,SAAS,aAAa,KAAiB;AAAA;AAAA,MAElF,WAAa,SAAS,YAAY,KAAK,SAAS,WAAW,KAAwB;AAAA;AAAA,MAEnF,UAAY,SAAS,WAAW,KAAK,SAAS,UAAU,KAAwB;AAAA,IACjF;AAAA,EACD;AACD;AAEA,SAAS,aAAa,UAAmC;AACxD,MAAI,CAAC,YAAY,OAAO,aAAa,UAAU;AAC9C,WAAO;AAAA,MACN,MAAM,IAAI,WAAW;AAAA,MACrB,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,kBAAkB;AAAA,MAClB,QAAQ;AAAA,MACR,aAAa;AAAA,MACb,WAAW;AAAA,IACZ;AAAA,EACD;AAEA,QAAM,QAAQ;AACd,SAAO;AAAA;AAAA,IAEN,MAAM,iBAAiB,MAAM,MAAM,CAAC;AAAA;AAAA,IAEpC,QAAS,MAAM,QAAQ,KAAgB;AAAA;AAAA,IAEvC,YAAa,MAAM,YAAY,KAAgB;AAAA;AAAA,IAE/C,YAAa,MAAM,YAAY,KAAuB;AAAA;AAAA,IAEtD,OAAQ,MAAM,OAAO,KAAuB;AAAA;AAAA,IAE5C,QAAS,MAAM,QAAQ,KAAuB;AAAA;AAAA,IAE9C,YAAa,MAAM,YAAY,KAAuB;AAAA;AAAA,IAEtD,kBAAmB,MAAM,kBAAkB,KAAuB;AAAA;AAAA,IAElE,QAAS,MAAM,QAAQ,KAAiB;AAAA;AAAA,IAExC,aAAc,MAAM,aAAa,KAAuB;AAAA;AAAA,IAExD,WAAW,MAAM,WAAW,IAAI,cAAc,MAAM,WAAW,CAAC,IAAI;AAAA,EACrE;AACD;AAEA,SAAS,mBAAmB,SAA+B;AAC1D,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,WAAO;AAAA,MACN,YAAY;AAAA,MACZ,SAAS;AAAA,MACT,QAAQ,CAAC;AAAA,MACT,QAAQ,CAAC;AAAA,IACV;AAAA,EACD;AAEA,QAAM,OAAO;AACb,SAAO;AAAA;AAAA,IAEN,YAAa,KAAK,YAAY,KAAgB;AAAA;AAAA,IAE9C,SAAU,KAAK,SAAS,KAAgB;AAAA;AAAA,IAExC,QAAQ,MAAM,QAAQ,KAAK,QAAQ,CAAC,IAAK,KAAK,QAAQ,IAAgB,CAAC;AAAA;AAAA,IAEvE,QAAQ,MAAM,QAAQ,KAAK,QAAQ,CAAC,IAAK,KAAK,QAAQ,EAAgB,IAAI,CAAC,UAAU,aAAa,KAAK,CAAC,IAAI,CAAC;AAAA,EAC9G;AACD;AAEA,SAAS,cAAc,WAAsC;AAC5D,MAAI,CAAC,aAAa,OAAO,cAAc,UAAU;AAChD,WAAO;AAAA,MACN,SAAS;AAAA,MACT,UAAU;AAAA,MACV,UAAU,CAAC;AAAA,MACX,QAAQ,CAAC;AAAA,MACT,mBAAmB;AAAA,MACnB,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,OAAO;AAAA,IACR;AAAA,EACD;AAEA,QAAM,SAAS;AAEf,QAAM,WAAW,OAAO,UAAU;AAClC,QAAM,gBACL,OAAO,aAAa,WAAW,cAAc,QAAQ,IAAM,YAAwC,CAAC;AAErG,SAAO;AAAA;AAAA,IAEN,SAAU,OAAO,SAAS,KAAgB;AAAA;AAAA,IAE1C,UAAW,OAAO,UAAU,KAAgB;AAAA,IAC5C,UAAU;AAAA;AAAA,IAEV,QAAQ,MAAM,QAAQ,OAAO,QAAQ,CAAC,IAAK,OAAO,QAAQ,IAAgB,CAAC;AAAA;AAAA,IAE3E,mBAAmB,MAAM,QAAQ,OAAO,mBAAmB,CAAC,IAAK,OAAO,mBAAmB,IAAiB;AAAA,IAC5G,SAAS,MAAM;AAEd,YAAM,aAAa,OAAO,QAAQ;AAClC,aAAO,MAAM,QAAQ,UAAU,IAAK,WAAyB,IAAI,CAAC,UAAU,aAAa,KAAK,CAAC,IAAI;AAAA,IACpG,GAAG;AAAA,IACH,SAAS,MAAM;AAEd,YAAM,aAAa,OAAO,QAAQ;AAClC,aAAO,MAAM,QAAQ,UAAU,IAAK,WAAyB,IAAI,CAAC,UAAU,aAAa,KAAK,CAAC,IAAI;AAAA,IACpG,GAAG;AAAA,IACH,QAAQ,MAAM;AAEb,YAAM,YAAY,OAAO,OAAO;AAChC,aAAO,MAAM,QAAQ,SAAS,IAAK,UAAwB,IAAI,CAAC,SAAS,mBAAmB,IAAI,CAAC,IAAI;AAAA,IACtG,GAAG;AAAA,EACJ;AACD;AAIA,SAAS,aAAgB,QAAgC,KAAa,OAA4B;AACjG,MAAI,UAAU,QAAW;AACxB,WAAO,GAAG,IAAI;AAAA,EACf;AACD;AAEA,SAAS,yBAAyB,QAA0B;AAC3D,MAAI,CAAC,QAAQ;AACZ,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,OAAO,OAAO,GAAG;AAC1C,eAAa,YAAY,wBAAwB,OAAO,oBAAoB;AAC5E,eAAa,YAAY,yBAAyB,OAAO,qBAAqB;AAC9E,SAAO;AACR;AAEA,SAAS,mBAAmB,KAAqD;AAChF,MAAI,CAAC,KAAK;AACT,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC;AAAA,IAC1C,SAAS,IAAI;AAAA,EACd;AACA,eAAa,YAAY,YAAY,IAAI,QAAQ;AAEjD,QAAM,YAAY,yBAAyB,IAAI,eAAe;AAC9D,MAAI,WAAW;AACd,iBAAa,YAAY,mBAAmB,SAAS;AAAA,EACtD;AAEA,SAAO;AACR;AAEA,SAAS,wBAAwB,UAA+D;AAC/F,MAAI,CAAC,UAAU;AACd,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,YAAY,SAAS,QAAQ;AACtD,eAAa,YAAY,cAAc,SAAS,UAAU;AAC1D,eAAa,YAAY,UAAU,SAAS,MAAM;AAClD,eAAa,YAAY,aAAa,SAAS,SAAS;AACxD,eAAa,YAAY,WAAW,SAAS,OAAO;AACpD,SAAO;AACR;AAEA,SAAS,+BAA+B,QAAoE;AAC3G,MAAI,CAAC,QAAQ;AACZ,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,iBAAiB,OAAO,aAAa;AAC9D,eAAa,YAAY,aAAa,OAAO,SAAS;AACtD,eAAa,YAAY,qBAAqB,OAAO,iBAAiB;AACtE,eAAa,YAAY,iBAAiB,OAAO,aAAa;AAC9D,eAAa,YAAY,UAAU,OAAO,MAAM;AAChD,eAAa,YAAY,UAAU,OAAO,MAAM;AAChD,SAAO;AACR;AAEA,SAAS,mBAAmB,KAAqD;AAChF,MAAI,CAAC,KAAK;AACT,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,iBAAiB,IAAI,aAAa;AAC3D,eAAa,YAAY,aAAa,IAAI,SAAS;AACnD,eAAa,YAAY,mBAAmB,IAAI,eAAe;AAC/D,SAAO;AACR;AAEA,SAAS,8BAA8B,gBAA2E;AACjH,MAAI,CAAC,gBAAgB;AACpB,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,QAAQ,eAAe,IAAI;AACpD,eAAa,YAAY,0BAA0B,eAAe,sBAAsB;AACxF,SAAO;AACR;AAEA,SAAS,iCACR,mBACqC;AACrC,MAAI,CAAC,mBAAmB;AACvB,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,WAAW,kBAAkB,OAAO;AAC7D,eAAa,YAAY,iBAAiB,kBAAkB,aAAa;AACzE,eAAa,YAAY,kBAAkB,kBAAkB,cAAc;AAC3E,SAAO;AACR;AAEA,SAAS,6BAA6B,eAAyE;AAC9G,MAAI,CAAC,eAAe;AACnB,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,WAAW,cAAc,OAAO;AACzD,eAAa,YAAY,qBAAqB,cAAc,iBAAiB;AAC7E,eAAa,YAAY,sBAAsB,cAAc,kBAAkB;AAC/E,SAAO;AACR;AAEA,SAAS,2BAA2B,SAAwE;AAC3G,MAAI,CAAC,SAAS;AACb,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,WAAW,QAAQ,OAAO;AACnD,eAAa,YAAY,UAAU,QAAQ,MAAM;AACjD,eAAa,YAAY,oBAAoB,QAAQ,gBAAgB;AACrE,eAAa,YAAY,eAAe,QAAQ,WAAW;AAC3D,SAAO;AACR;AAEA,SAAS,qBAAqB,SAAqE;AAClG,MAAI,CAAC,SAAS;AACb,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAC7D,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,mBAAmB,QAAQ,eAAe;AACnE,eAAa,YAAY,WAAW,QAAQ,OAAO;AACnD,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,mBAAmB,QAAQ,eAAe;AACnE,eAAa,YAAY,qBAAqB,QAAQ,iBAAiB;AACvE,eAAa,YAAY,cAAc,QAAQ,UAAU;AACzD,eAAa,YAAY,eAAe,QAAQ,WAAW;AAC3D,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAC7D,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAC7D,eAAa,YAAY,cAAc,QAAQ,UAAU;AACzD,eAAa,YAAY,qBAAqB,QAAQ,iBAAiB;AACvE,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,mBAAmB,QAAQ,eAAe;AACnE,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,iBAAiB,QAAQ,aAAa;AAC/D,eAAa,YAAY,QAAQ,QAAQ,IAAI;AAC7C,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,mBAAmB,QAAQ,eAAe;AACnE,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAC7D,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,sBAAsB,QAAQ,kBAAkB;AACzE,eAAa,YAAY,YAAY,QAAQ,QAAQ;AACrD,eAAa,YAAY,SAAS,QAAQ,KAAK;AAC/C,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAE7D,QAAM,gBAAgB,2BAA2B,QAAQ,aAAa;AACtE,eAAa,YAAY,iBAAiB,aAAa;AAEvD,SAAO;AACR;AAEA,SAAS,uBAAuB,QAA4D;AAC3F,MAAI,CAAC,QAAQ;AACZ,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,aAAa,OAAO,SAAS;AACtD,eAAa,YAAY,eAAe,OAAO,WAAW;AAC1D,eAAa,YAAY,YAAY,OAAO,QAAQ;AACpD,eAAa,YAAY,cAAc,OAAO,UAAU;AACxD,eAAa,YAAY,YAAY,OAAO,QAAQ;AACpD,eAAa,YAAY,cAAc,OAAO,UAAU;AACxD,eAAa,YAAY,cAAc,OAAO,UAAU;AACxD,SAAO;AACR;AAEA,SAAS,oBAAoB,OAAwD;AACpF,MAAI,CAAC,OAAO;AACX,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,iBAAiB,MAAM,YAAY;AAC5D,eAAa,YAAY,uBAAuB,MAAM,iBAAiB;AACvE,eAAa,YAAY,iBAAiB,MAAM,YAAY;AAC5D,SAAO;AACR;AAEA,SAAS,0BAA0B,QAAoE;AACtG,MAAI,CAAC,QAAQ;AACZ,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,YAAY,OAAO,QAAQ;AACpD,eAAa,YAAY,2BAA2B,OAAO,uBAAuB;AAClF,eAAa,YAAY,YAAY,OAAO,QAAQ;AACpD,eAAa,YAAY,4BAA4B,OAAO,wBAAwB;AAEpF,QAAM,MAAM,mBAAmB,OAAO,GAAG;AACzC,eAAa,YAAY,OAAO,GAAG;AAEnC,QAAM,WAAW,wBAAwB,OAAO,QAAQ;AACxD,eAAa,YAAY,YAAY,QAAQ;AAE7C,QAAM,SAAS,+BAA+B,OAAO,MAAM;AAC3D,eAAa,YAAY,UAAU,MAAM;AAEzC,QAAM,MAAM,mBAAmB,OAAO,UAAU;AAChD,eAAa,YAAY,cAAc,GAAG;AAE1C,QAAM,iBAAiB,8BAA8B,OAAO,cAAc;AAC1E,eAAa,YAAY,kBAAkB,cAAc;AAEzD,QAAM,oBAAoB,iCAAiC,OAAO,iBAAiB;AACnF,eAAa,YAAY,qBAAqB,iBAAiB;AAE/D,QAAM,gBAAgB,6BAA6B,OAAO,aAAa;AACvE,eAAa,YAAY,iBAAiB,aAAa;AAEvD,QAAM,WAAW,uBAAuB,OAAO,QAAQ;AACvD,eAAa,YAAY,YAAY,QAAQ;AAE7C,QAAM,QAAQ,oBAAoB,OAAO,KAAK;AAC9C,eAAa,YAAY,SAAS,KAAK;AAEvC,QAAM,cAAc,qBAAqB,OAAO,WAAW;AAC3D,eAAa,YAAY,eAAe,WAAW;AAEnD,SAAO;AACR;AAwCO,SAAS,gBACf,UACA,WAA0B,MAC1B,SAAsC,MACnB;AACnB,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,YAAY,WAAW,EAAE,gBAAgB,UAAU,UAAU,gBAAgB;AACnF,SAAO,cAAc,SAAS;AAC/B;AAqCA,eAAsB,YACrB,UACA,WAA0B,MAC1B,SAAsC,MACV;AAC5B,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,YAAY,MAAM,WAAW,EAAE,YAAY,UAAU,UAAU,gBAAgB;AACrF,SAAO,cAAc,SAAS;AAC/B;AA6BO,SAAS,iBACf,MACA,UACA,SAAsC,MACnB;AACnB,QAAM,YAAY,iBAAiB,MAAM,MAAM;AAC/C,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,YAAY,WAAW,EAAE,iBAAiB,OAAO,KAAK,SAAS,GAAG,UAAU,gBAAgB;AAClG,SAAO,cAAc,SAAS;AAC/B;AA6BA,eAAsB,aACrB,MACA,UACA,SAAsC,MACV;AAC5B,QAAM,YAAY,iBAAiB,MAAM,MAAM;AAE/C,MAAI,QAAQ,IAAI,uBAAuB,MAAM,KAAK;AACjD,YAAQ,IAAI,oCAAoC,MAAM,KAAK,UAAU,MAAM,GAAG,CAAC,CAAC,CAAC;AAAA,EAClF;AACA,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,YAAY,MAAM,WAAW,EAAE,aAAa,OAAO,KAAK,SAAS,GAAG,UAAU,gBAAgB;AACpG,SAAO,cAAc,SAAS;AAC/B;AAkCO,SAAS,sBAAsB,OAAiB,SAAsC,MAA0B;AACtH,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,aAAa,WAAW,EAAE,sBAAsB,OAAO,gBAAgB;AAC7E,SAAO,WAAW,IAAI,aAAa;AACpC;AAqCA,eAAsB,kBACrB,OACA,SAAsC,MACR;AAC9B,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,aAAa,MAAM,WAAW,EAAE,kBAAkB,OAAO,gBAAgB;AAC/E,SAAO,WAAW,IAAI,aAAa;AACpC;AAuCO,SAAS,sBACf,UACA,WACA,SAAsC,MACjB;AACrB,QAAM,UAAU,qBAAqB,UAAU,UAAU,EAAE,IAAI,CAAC,SAAS,OAAO,KAAK,IAAI,CAAC;AAE1F,MAAI,QAAQ,WAAW,UAAU,QAAQ;AACxC,UAAM,IAAI,UAAU,kDAAkD;AAAA,EACvE;AAEA,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,aAAa,WAAW,EAAE,sBAAsB,SAAS,WAAW,gBAAgB;AAC1F,SAAO,WAAW,IAAI,aAAa;AACpC;AA2CA,eAAsB,kBACrB,UACA,WACA,SAAsC,MACR;AAC9B,QAAM,UAAU,qBAAqB,UAAU,UAAU,EAAE,IAAI,CAAC,SAAS,OAAO,KAAK,IAAI,CAAC;AAE1F,MAAI,QAAQ,WAAW,UAAU,QAAQ;AACxC,UAAM,IAAI,UAAU,kDAAkD;AAAA,EACvE;AAEA,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,aAAa,MAAM,WAAW,EAAE,kBAAkB,SAAS,WAAW,gBAAgB;AAC5F,SAAO,WAAW,IAAI,aAAa;AACpC;AA4CO,SAAS,sBAAsB,WAAwC;AAC7E,QAAMA,WAAU,WAAW;AAE3B,QAAM,mBAAmB;AAAA,IACxB,MAAM,UAAU,KAAK,KAAK,SAAS;AAAA,IACnC,iBAAiB,UAAU,iBAAiB,KAAK,SAAS;AAAA,IAC1D,MAAM,WAAW,MAAkC;AAClD,YAAM,eAAe,KAAK,CAAC;AAC3B,YAAM,aAAa,aAAa,CAAC;AAEjC,YAAM,aAAa,KAAK,MAAM,UAAU;AAUxC,YAAM,SAA2B;AAAA,QAChC,SAAS,WAAW;AAAA,QACpB,UAAU,WAAW;AAAA,QACrB,UAAU,OAAO,WAAW,aAAa,WAAW,KAAK,MAAM,WAAW,QAAQ,IAAI,WAAW;AAAA,QACjG,QAAS,WAAW,UAAU,CAAC;AAAA,QAC/B,mBAAmB,WAAW,sBAAsB;AAAA,QACpD,QAAS,WAAW,UAAyC;AAAA,QAC7D,QAAS,WAAW,UAAkD;AAAA,MACvE;AAEA,YAAM,UAAU,MAAM,UAAU,QAAQ,MAAM;AAE9C,YAAM,cAAc;AAAA,QACnB,SAAS,QAAQ;AAAA,QACjB,WAAW,QAAQ;AAAA,QACnB,UAAU,QAAQ;AAAA,QAClB,QAAQ,QAAQ;AAAA,QAChB,oBAAoB,QAAQ;AAAA,QAC5B,QAAQ,QAAQ;AAAA,QAChB,QAAQ,QAAQ;AAAA,MACjB;AAEA,aAAO,KAAK,UAAU,WAAW;AAAA,IAClC;AAAA,EACD;AAEA,SAAO,eAAe,kBAAkB,cAAc;AAAA,IACrD,OAAO;AAAA,IACP,YAAY;AAAA,EACb,CAAC;AAED,QAAM,QAAQ,UAAU,kBAAkB,KAAK;AAC/C,SAAO,eAAe,kBAAkB,WAAW;AAAA,IAClD,OAAO;AAAA,IACP,YAAY;AAAA,EACb,CAAC;AAED,EAAAA,SAAQ,sBAAsB,gBAAgB;AAC/C;AAiBO,SAAS,wBAAwB,MAAoB;AAC3D,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,wBAAwB,IAAI;AACrC;AAeO,SAAS,sBAA4B;AAC3C,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,oBAAoB;AAC7B;AAiBO,SAAS,qBAA+B;AAC9C,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,mBAAmB;AACnC;AAqCO,SAAS,kBAAkB,WAAoC;AACrE,QAAMA,WAAU,WAAW;AAE3B,QAAM,mBAAmB;AAAA,IACxB,MAAM,UAAU,KAAK,KAAK,SAAS;AAAA,IACnC,UAAU,UAAU,UAAU,KAAK,SAAS;AAAA,IAC5C,MAAM,YAAY,MAAkC;AACnD,YAAM,aAAa,KAAK,CAAC;AAEzB,UAAI,CAAC,cAAc,eAAe,aAAa;AAC9C,cAAM,IAAI,MAAM,wCAAwC;AAAA,MACzD;AAEA,YAAM,aAAa,KAAK,MAAM,UAAU;AACxC,YAAM,SAA2B;AAAA,QAChC,SAAS,WAAW;AAAA,QACpB,UAAU,WAAW;AAAA,QACrB,UAAU,OAAO,WAAW,aAAa,WAAW,KAAK,MAAM,WAAW,QAAQ,IAAI,WAAW;AAAA,QACjG,QAAQ,WAAW,UAAU,CAAC;AAAA,QAC9B,mBAAmB,WAAW;AAAA,QAC9B,QAAQ,WAAW;AAAA,QACnB,QAAQ,WAAW,UAAU;AAAA,MAC9B;AAEA,YAAM,QAAQ,QAAQ,UAAU,SAAS,MAAM,CAAC;AAChD,aAAO;AAAA,IACR;AAAA,EACD;AAEA,EAAAA,SAAQ,kBAAkB,gBAAgB;AAC3C;AAiBO,SAAS,oBAAoB,MAAoB;AACvD,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,oBAAoB,IAAI;AACjC;AAeO,SAAS,kBAAwB;AACvC,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,gBAAgB;AACzB;AAiBO,SAAS,iBAA2B;AAC1C,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,eAAe;AAC/B;AA6EA,SAAS,kBAAkB,OAA0C;AACpE,SACC,MAAM,QAAQ,KAAK,KACnB,MAAM,WAAW,KACjB,OAAO,MAAM,CAAC,MAAM,aACnB,OAAO,MAAM,CAAC,MAAM,YAAY,OAAO,SAAS,MAAM,CAAC,CAAC,KAAK,MAAM,CAAC,aAAa;AAEpF;AAEA,SAAS,wBAAwB,OAAgD;AAChF,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,WAAW,KAAK,kBAAkB,MAAM,CAAC,CAAC;AAChF;AAEA,SAAS,gBAAgB,OAA0B;AAClD,MAAI,OAAO,UAAU,UAAU;AAC9B,WAAO,EAAE,MAAM,UAAU,QAAQ,MAAM,OAAO;AAAA,EAC/C;AAEA,SAAO,EAAE,MAAM,MAAM,aAAa,QAAQ,UAAU,QAAQ,MAAM,OAAO;AAC1E;AAEO,SAAS,mBAAmB,SAAmC;AACrE,QAAMA,WAAU,WAAW;AAE3B,QAAM,iBAAiB;AAAA,IACtB,MAAM,QAAQ,KAAK,KAAK,OAAO;AAAA,IAC/B,oBAAoB,QAAQ,mBAAmB,KAAK,OAAO;AAAA,IAC3D,MAAM,gBACF,aACe;AAClB,YAAM,CAAC,cAAc,aAAa,IAAI;AAEtC,UAAI,QAAQ,IAAI,uBAAuB,MAAM,KAAK;AACjD,gBAAQ,IAAI,qCAAqC,EAAE,QAAQ,YAAY,OAAO,CAAC;AAC/E,gBAAQ,IAAI,iCAAiC;AAAA,UAC5C,kBAAkB,MAAM,QAAQ,YAAY,IAAI,UAAU,OAAO;AAAA,UACjE,mBAAmB,OAAO;AAAA,UAC1B,UAAU,MAAM,QAAQ,YAAY,IAAI,EAAE,aAAa,aAAa,OAAO,IAAI,gBAAgB,YAAY;AAAA,QAC5G,CAAC;AAAA,MACF;AAEA,UAAI;AACJ,UAAI,WAAW;AAEf,UAAI,wBAAwB,YAAY,GAAG;AAC1C,SAAC,UAAU,QAAQ,IAAI,aAAa,CAAC;AAAA,MACtC,WAAW,kBAAkB,YAAY,GAAG;AAC3C,SAAC,UAAU,QAAQ,IAAI;AAAA,MACxB,OAAO;AACN,mBAAW;AAAA,MACZ;AAEA,UAAI,OAAO,aAAa,UAAU;AACjC,cAAM,IAAI,MAAM,kDAAkD;AAAA,MACnE;AAGA,UAAI,QAAQ,IAAI,uBAAuB,MAAM,KAAK;AACjD,cAAM,SAAS,OAAO,aAAa,WAAW,SAAS,SAAS,SAAS;AACzE,gBAAQ;AAAA,UACP;AAAA,UACA,MAAM,QAAQ,YAAY,IAAI,UAAU,OAAO;AAAA,UAC/C;AAAA,UACA,gBAAgB,QAAQ,EAAE;AAAA,UAC1B;AAAA,UACA;AAAA,QACD;AAAA,MACD;AAEA,YAAM,SAAS,OAAO,aAAa,WAAW,OAAO,KAAK,UAAU,QAAQ,IAAI,OAAO,KAAK,QAAQ;AACpG,YAAM,SAAS,MAAM,QAAQ,aAAa,IAAI,WAAW,MAAM,GAAG,QAAQ;AAE1E,aAAO,KAAK,UAAU,MAAM;AAAA,IAC7B;AAAA,EACD;AAEA,EAAAA,SAAQ,mBAAmB,cAAc;AAC1C;AAkBO,SAAS,kBAA4B;AAC3C,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,gBAAgB;AAChC;AAkBO,SAAS,qBAAqB,MAAoB;AACxD,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,qBAAqB,IAAI;AAClC;AAgBO,SAAS,mBAAyB;AACxC,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,iBAAiB;AAC1B;AAkBO,SAAS,yBAAmC;AAClD,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,uBAAuB;AACvC;AAkBO,SAAS,4BAA4B,MAAoB;AAC/D,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,4BAA4B,IAAI;AACzC;AAgBO,SAAS,0BAAgC;AAC/C,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,wBAAwB;AACjC;AAmBO,MAAM,mBAAmB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA+B/B,SAAS,UAAwC;AAChD,UAAMA,WAAU,WAAW;AAC3B,WAAOA,SAAQ,6BAA6B,QAAQ;AAAA,EACrD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAwBA,WAAwC;AACvC,UAAMA,WAAU,WAAW;AAC3B,WAAOA,SAAQ,yBAAyB;AAAA,EACzC;AACD;AA2BO,SAAS,eAAe,OAAuB;AACrD,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,wBAAwB,KAAK;AAC7C;AA0BO,SAAS,uBAAuB,MAAsB;AAC5D,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,uBAAuB,IAAI;AAC3C;AAiCO,SAAS,iBAAiB,UAA0B;AAC1D,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,iBAAiB,QAAQ;AACzC;AA0BO,SAAS,qBAAqB,UAA4B;AAChE,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,qBAAqB,QAAQ;AAC7C;AAqCO,SAAS,uBAAiC;AAChD,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,qBAAqB;AACrC;AAqBO,SAAS,mBAAmB,MAAsC;AACxE,QAAMA,WAAU,WAAW;AAC3B,QAAM,SAASA,SAAQ,mBAAmB,IAAI;AAC9C,SAAO;AACR;AAkCO,SAAS,mBAA2B;AAC1C,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,iBAAiB;AACjC;AA0BO,SAAS,sBAA2C;AAC1D,QAAMA,WAAU,WAAW;AAC3B,QAAM,SAASA,SAAQ,oBAAoB;AAC3C,SAAO;AACR;AAEO,MAAM,cAAc;","names":["binding"]}
|
|
1
|
+
{"version":3,"sources":["../typescript/index.ts"],"sourcesContent":["/**\n * Kreuzberg - Multi-language document intelligence framework.\n *\n * This is a TypeScript SDK around a high-performance Rust core.\n * All extraction logic, chunking, quality processing, and language detection\n * are implemented in Rust for maximum performance.\n *\n * ## API Usage Recommendations\n *\n * **For processing multiple documents**, prefer batch APIs:\n * - Use `batchExtractFiles()` / `batchExtractFilesSync()` for multiple files\n * - Use `batchExtractBytes()` / `batchExtractBytesSync()` for multiple byte arrays\n *\n * **Batch APIs provide**:\n * - Better performance (parallel processing in Rust)\n * - More reliable memory management\n * - Recommended for all multi-document workflows\n *\n * **Single extraction APIs** (`extractFile`, `extractBytes`) are suitable for:\n * - One-off document processing\n * - Interactive applications processing documents on-demand\n * - Avoid calling these in tight loops - use batch APIs instead\n *\n * ## Supported Formats\n *\n * - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT (with LibreOffice)\n * - **Text**: Markdown, Plain Text, XML\n * - **Web**: HTML (converted to Markdown)\n * - **Data**: JSON, YAML, TOML\n * - **Email**: EML, MSG\n * - **Images**: PNG, JPEG, TIFF (with OCR support)\n *\n * @example\n * ```typescript\n * import { extractFile, batchExtractFiles } from '@kreuzberg/node';\n *\n * // Single file extraction\n * const result = await extractFile('document.pdf');\n * console.log(result.content);\n *\n * // Multiple files (recommended approach)\n * const files = ['doc1.pdf', 'doc2.docx', 'doc3.xlsx'];\n * const results = await batchExtractFiles(files);\n * results.forEach(r => console.log(r.content));\n * ```\n */\n\nimport { createRequire } from \"node:module\";\nimport type { PanicContext } from \"./errors.js\";\nimport type {\n\tChunk,\n\tChunkingConfig,\n\tErrorClassification,\n\tExtractedImage,\n\tExtractionConfig as ExtractionConfigType,\n\tExtractionResult,\n\tHtmlConversionOptions,\n\tHtmlPreprocessingOptions,\n\tImageExtractionConfig,\n\tKeywordConfig,\n\tLanguageDetectionConfig,\n\tOcrBackendProtocol,\n\tOcrConfig,\n\tPageConfig,\n\tPageContent,\n\tPdfConfig,\n\tPostProcessorConfig,\n\tPostProcessorProtocol,\n\tTable,\n\tTesseractConfig,\n\tTokenReductionConfig,\n\tValidatorProtocol,\n} from \"./types.js\";\n\n/**\n * @internal Native NAPI binding interface for the Kreuzberg native module.\n * This interface defines the shape of methods available in the compiled native addon.\n */\ninterface NativeBinding {\n\textractFileSync(\n\t\tfilePath: string,\n\t\tmimeType: string | null,\n\t\tconfig: Record<string, unknown> | null,\n\t): Record<string, unknown>;\n\textractFile(\n\t\tfilePath: string,\n\t\tmimeType: string | null,\n\t\tconfig: Record<string, unknown> | null,\n\t): Promise<Record<string, unknown>>;\n\textractBytesSync(data: Buffer, mimeType: string, config: Record<string, unknown> | null): Record<string, unknown>;\n\textractBytes(\n\t\tdata: Buffer,\n\t\tmimeType: string,\n\t\tconfig: Record<string, unknown> | null,\n\t): Promise<Record<string, unknown>>;\n\tbatchExtractFilesSync(paths: string[], config: Record<string, unknown> | null): Record<string, unknown>[];\n\tbatchExtractFiles(paths: string[], config: Record<string, unknown> | null): Promise<Record<string, unknown>[]>;\n\tbatchExtractBytesSync(\n\t\tdataArray: Buffer[],\n\t\tmimeTypes: string[],\n\t\tconfig: Record<string, unknown> | null,\n\t): Record<string, unknown>[];\n\tbatchExtractBytes(\n\t\tdataArray: Buffer[],\n\t\tmimeTypes: string[],\n\t\tconfig: Record<string, unknown> | null,\n\t): Promise<Record<string, unknown>[]>;\n\tregisterPostProcessor(processor: Record<string, unknown>): void;\n\tunregisterPostProcessor(name: string): void;\n\tclearPostProcessors(): void;\n\tlistPostProcessors(): string[];\n\tregisterValidator(validator: Record<string, unknown>): void;\n\tunregisterValidator(name: string): void;\n\tclearValidators(): void;\n\tlistValidators(): string[];\n\tregisterOcrBackend(backend: Record<string, unknown>): void;\n\tunregisterOcrBackend(name: string): void;\n\tclearOcrBackends(): void;\n\tlistOcrBackends(): string[];\n\tregisterDocumentExtractor(extractor: Record<string, unknown>): void;\n\tunregisterDocumentExtractor(name: string): void;\n\tclearDocumentExtractors(): void;\n\tlistDocumentExtractors(): string[];\n\tdetectMimeType(filePath: string): string;\n\tdetectMimeTypeFromBytes(data: Buffer): string;\n\tdetectMimeTypeFromPath(filePath: string): string;\n\tvalidateMimeType(mimeType: string): string;\n\tgetExtensionsForMime(mimeType: string): string[];\n\tlistEmbeddingPresets(): string[];\n\tgetEmbeddingPreset(name: string): Record<string, unknown> | null;\n\tgetErrorCodeName(code: number): string;\n\tgetErrorCodeDescription(code: number): string;\n\tclassifyError(errorMessage: string): Record<string, unknown>;\n\tgetLastErrorCode(): number;\n\tgetLastPanicContext(): Record<string, unknown> | null;\n\tloadExtractionConfigFromFile(filePath: string): Record<string, unknown>;\n\tdiscoverExtractionConfig(): Record<string, unknown> | null;\n}\n\nexport {\n\tCacheError,\n\tErrorCode,\n\tImageProcessingError,\n\tKreuzbergError,\n\tMissingDependencyError,\n\tOcrError,\n\ttype PanicContext,\n\tParsingError,\n\tPluginError,\n\tValidationError,\n} from \"./errors.js\";\nexport { GutenOcrBackend } from \"./ocr/guten-ocr.js\";\nexport * from \"./types.js\";\n\nlet binding: NativeBinding | null = null;\nlet bindingInitialized = false;\n\nfunction createNativeBindingError(error: unknown): Error {\n\tconst hintParts: string[] = [];\n\tlet detail = \"Unknown error while requiring native module.\";\n\n\tif (error instanceof Error) {\n\t\tdetail = error.message || error.toString();\n\t\tif (/pdfium/i.test(detail)) {\n\t\t\thintParts.push(\n\t\t\t\t\"Pdfium runtime library was not found. Ensure the bundled libpdfium (dll/dylib/so) is present next to the native module.\",\n\t\t\t);\n\t\t}\n\t\treturn new Error(\n\t\t\t[\n\t\t\t\t\"Failed to load Kreuzberg native bindings.\",\n\t\t\t\thintParts.length ? hintParts.join(\" \") : \"\",\n\t\t\t\t\"Report this error and attach the logs/stack trace for investigation.\",\n\t\t\t\t`Underlying error: ${detail}`,\n\t\t\t]\n\t\t\t\t.filter(Boolean)\n\t\t\t\t.join(\" \"),\n\t\t\t{ cause: error },\n\t\t);\n\t}\n\n\treturn new Error(\n\t\t[\n\t\t\t\"Failed to load Kreuzberg native bindings.\",\n\t\t\t\"Report this error and attach the logs/stack trace for investigation.\",\n\t\t\t`Underlying error: ${String(error)}`,\n\t\t].join(\" \"),\n\t);\n}\n\nfunction assertUint8Array(value: unknown, name: string): Uint8Array {\n\tif (!(value instanceof Uint8Array)) {\n\t\tthrow new TypeError(`${name} must be a Uint8Array`);\n\t}\n\treturn value;\n}\n\nfunction assertUint8ArrayList(values: unknown, name: string): Uint8Array[] {\n\tif (!Array.isArray(values)) {\n\t\tthrow new TypeError(`${name} must be an array of Uint8Array`);\n\t}\n\n\tconst array = values as unknown[];\n\treturn array.map((value, index) => {\n\t\ttry {\n\t\t\treturn assertUint8Array(value, `${name}[${index}]`);\n\t\t} catch {\n\t\t\tthrow new TypeError(`${name}[${index}] must be a Uint8Array`);\n\t\t}\n\t});\n}\n\n/**\n * @internal Allows tests to provide a mocked native binding.\n */\nexport function __setBindingForTests(mock: unknown): void {\n\tbinding = mock as NativeBinding;\n\tbindingInitialized = true;\n}\n\n/**\n * @internal Resets the cached native binding for tests.\n */\nexport function __resetBindingForTests(): void {\n\tbinding = null;\n\tbindingInitialized = false;\n}\n\nfunction loadNativeBinding(): NativeBinding {\n\tconst localRequire: ((path: string) => unknown) | undefined =\n\t\ttypeof require !== \"undefined\" ? (require as (path: string) => unknown) : createRequire(import.meta.url);\n\n\tif (!localRequire) {\n\t\tthrow new Error(\"Unable to resolve native binding loader (require not available).\");\n\t}\n\n\tconst loadedModule = localRequire(\"../index.js\") as unknown;\n\n\t// Validate that the loaded module is an object\n\tif (typeof loadedModule !== \"object\" || loadedModule === null) {\n\t\tthrow new Error(\n\t\t\t\"Native binding is not a valid object. \" + \"Ensure the native module is properly built and compatible.\",\n\t\t);\n\t}\n\n\tconst module = loadedModule as Record<string, unknown>;\n\n\t// Validate that the loaded module has the expected methods\n\tconst requiredMethods = [\n\t\t\"extractFileSync\",\n\t\t\"extractFile\",\n\t\t\"extractBytesSync\",\n\t\t\"extractBytes\",\n\t\t\"batchExtractFilesSync\",\n\t\t\"batchExtractFiles\",\n\t\t\"batchExtractBytesSync\",\n\t\t\"batchExtractBytes\",\n\t];\n\n\tfor (const method of requiredMethods) {\n\t\tif (typeof module[method] !== \"function\") {\n\t\t\tthrow new Error(\n\t\t\t\t`Native binding is missing required method: ${method}. ` +\n\t\t\t\t\t\"Ensure the native module is properly built and compatible.\",\n\t\t\t);\n\t\t}\n\t}\n\n\treturn module as unknown as NativeBinding;\n}\n\nfunction getBinding(): NativeBinding {\n\tif (bindingInitialized) {\n\t\tif (binding === null) {\n\t\t\tthrow new Error(\"Native binding was previously failed to load.\");\n\t\t}\n\t\treturn binding;\n\t}\n\n\ttry {\n\t\tif (typeof process !== \"undefined\" && process.versions && process.versions.node) {\n\t\t\tbinding = loadNativeBinding();\n\t\t\tbindingInitialized = true;\n\t\t\treturn binding;\n\t\t}\n\t} catch (error) {\n\t\tbindingInitialized = true; // Mark as attempted even on failure\n\t\tthrow createNativeBindingError(error);\n\t}\n\n\tthrow new Error(\n\t\t\"Failed to load Kreuzberg bindings. Neither NAPI (Node.js) nor WASM (browsers/Deno) bindings are available. \" +\n\t\t\t\"Make sure you have installed the @kreuzberg/node package for Node.js/Bun.\",\n\t);\n}\n\nfunction parseMetadata(metadataStr: string): Record<string, unknown> {\n\ttry {\n\t\tconst parsed = JSON.parse(metadataStr) as unknown;\n\t\tif (typeof parsed === \"object\" && parsed !== null) {\n\t\t\treturn parsed as Record<string, unknown>;\n\t\t}\n\t\treturn {};\n\t} catch {\n\t\treturn {};\n\t}\n}\n\nfunction ensureUint8Array(value: unknown): Uint8Array {\n\tif (value instanceof Uint8Array) {\n\t\treturn value;\n\t}\n\tif (typeof Buffer !== \"undefined\" && value instanceof Buffer) {\n\t\treturn new Uint8Array(value);\n\t}\n\tif (Array.isArray(value)) {\n\t\treturn new Uint8Array(value);\n\t}\n\treturn new Uint8Array();\n}\n\nfunction convertChunk(rawChunk: unknown): Chunk {\n\tif (!rawChunk || typeof rawChunk !== \"object\") {\n\t\treturn {\n\t\t\tcontent: \"\",\n\t\t\tmetadata: {\n\t\t\t\tbyteStart: 0,\n\t\t\t\tbyteEnd: 0,\n\t\t\t\ttokenCount: null,\n\t\t\t\tchunkIndex: 0,\n\t\t\t\ttotalChunks: 0,\n\t\t\t},\n\t\t\tembedding: null,\n\t\t};\n\t}\n\n\tconst chunk = rawChunk as Record<string, unknown>;\n\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\tconst metadata = (chunk[\"metadata\"] as Record<string, unknown>) ?? {};\n\treturn {\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tcontent: (chunk[\"content\"] as string) ?? \"\",\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tembedding: (chunk[\"embedding\"] as number[] | null) ?? null,\n\t\tmetadata: {\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tbyteStart: ((metadata[\"byte_start\"] ?? metadata[\"charStart\"]) as number) ?? 0,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tbyteEnd: ((metadata[\"byte_end\"] ?? metadata[\"charEnd\"]) as number) ?? 0,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\ttokenCount: ((metadata[\"token_count\"] ?? metadata[\"tokenCount\"]) as number | null) ?? null,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tchunkIndex: ((metadata[\"chunk_index\"] ?? metadata[\"chunkIndex\"]) as number) ?? 0,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\ttotalChunks: ((metadata[\"total_chunks\"] ?? metadata[\"totalChunks\"]) as number) ?? 0,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tfirstPage: ((metadata[\"first_page\"] ?? metadata[\"firstPage\"]) as number | null) ?? null,\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tlastPage: ((metadata[\"last_page\"] ?? metadata[\"lastPage\"]) as number | null) ?? null,\n\t\t},\n\t};\n}\n\nfunction convertImage(rawImage: unknown): ExtractedImage {\n\tif (!rawImage || typeof rawImage !== \"object\") {\n\t\treturn {\n\t\t\tdata: new Uint8Array(),\n\t\t\tformat: \"unknown\",\n\t\t\timageIndex: 0,\n\t\t\tpageNumber: null,\n\t\t\twidth: null,\n\t\t\theight: null,\n\t\t\tcolorspace: null,\n\t\t\tbitsPerComponent: null,\n\t\t\tisMask: false,\n\t\t\tdescription: null,\n\t\t\tocrResult: null,\n\t\t};\n\t}\n\n\tconst image = rawImage as Record<string, unknown>;\n\treturn {\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tdata: ensureUint8Array(image[\"data\"]),\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tformat: (image[\"format\"] as string) ?? \"unknown\",\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\timageIndex: (image[\"imageIndex\"] as number) ?? 0,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tpageNumber: (image[\"pageNumber\"] as number | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\twidth: (image[\"width\"] as number | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\theight: (image[\"height\"] as number | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tcolorspace: (image[\"colorspace\"] as string | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tbitsPerComponent: (image[\"bitsPerComponent\"] as number | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tisMask: (image[\"isMask\"] as boolean) ?? false,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tdescription: (image[\"description\"] as string | null) ?? null,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tocrResult: image[\"ocrResult\"] ? convertResult(image[\"ocrResult\"]) : null,\n\t};\n}\n\nfunction convertPageContent(rawPage: unknown): PageContent {\n\tif (!rawPage || typeof rawPage !== \"object\") {\n\t\treturn {\n\t\t\tpageNumber: 0,\n\t\t\tcontent: \"\",\n\t\t\ttables: [],\n\t\t\timages: [],\n\t\t};\n\t}\n\n\tconst page = rawPage as Record<string, unknown>;\n\treturn {\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tpageNumber: (page[\"pageNumber\"] as number) ?? 0,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tcontent: (page[\"content\"] as string) ?? \"\",\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\ttables: Array.isArray(page[\"tables\"]) ? (page[\"tables\"] as Table[]) : [],\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\timages: Array.isArray(page[\"images\"]) ? (page[\"images\"] as unknown[]).map((image) => convertImage(image)) : [],\n\t};\n}\n\nfunction convertResult(rawResult: unknown): ExtractionResult {\n\tif (!rawResult || typeof rawResult !== \"object\") {\n\t\treturn {\n\t\t\tcontent: \"\",\n\t\t\tmimeType: \"application/octet-stream\",\n\t\t\tmetadata: {},\n\t\t\ttables: [],\n\t\t\tdetectedLanguages: null,\n\t\t\tchunks: null,\n\t\t\timages: null,\n\t\t\tpages: null,\n\t\t};\n\t}\n\n\tconst result = rawResult as Record<string, unknown>;\n\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\tconst metadata = result[\"metadata\"];\n\tconst metadataValue =\n\t\ttypeof metadata === \"string\" ? parseMetadata(metadata) : ((metadata as Record<string, unknown>) ?? {});\n\n\treturn {\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tcontent: (result[\"content\"] as string) ?? \"\",\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tmimeType: (result[\"mimeType\"] as string) ?? \"application/octet-stream\",\n\t\tmetadata: metadataValue,\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\ttables: Array.isArray(result[\"tables\"]) ? (result[\"tables\"] as Table[]) : [],\n\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\tdetectedLanguages: Array.isArray(result[\"detectedLanguages\"]) ? (result[\"detectedLanguages\"] as string[]) : null,\n\t\tchunks: (() => {\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tconst chunksData = result[\"chunks\"];\n\t\t\treturn Array.isArray(chunksData) ? (chunksData as unknown[]).map((chunk) => convertChunk(chunk)) : null;\n\t\t})(),\n\t\timages: (() => {\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tconst imagesData = result[\"images\"];\n\t\t\treturn Array.isArray(imagesData) ? (imagesData as unknown[]).map((image) => convertImage(image)) : null;\n\t\t})(),\n\t\tpages: (() => {\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature\n\t\t\tconst pagesData = result[\"pages\"];\n\t\t\treturn Array.isArray(pagesData) ? (pagesData as unknown[]).map((page) => convertPageContent(page)) : null;\n\t\t})(),\n\t};\n}\n\ntype NativeExtractionConfig = Record<string, unknown>;\n\nfunction setIfDefined<T>(target: NativeExtractionConfig, key: string, value: T | undefined): void {\n\tif (value !== undefined) {\n\t\ttarget[key] = value;\n\t}\n}\n\nfunction normalizeTesseractConfig(config?: TesseractConfig) {\n\tif (!config) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"psm\", config.psm);\n\tsetIfDefined(normalized, \"enableTableDetection\", config.enableTableDetection);\n\tsetIfDefined(normalized, \"tesseditCharWhitelist\", config.tesseditCharWhitelist);\n\treturn normalized;\n}\n\nfunction normalizeOcrConfig(ocr?: OcrConfig): NativeExtractionConfig | undefined {\n\tif (!ocr) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {\n\t\tbackend: ocr.backend,\n\t};\n\tsetIfDefined(normalized, \"language\", ocr.language);\n\n\tconst tesseract = normalizeTesseractConfig(ocr.tesseractConfig);\n\tif (tesseract) {\n\t\tsetIfDefined(normalized, \"tesseractConfig\", tesseract);\n\t}\n\n\treturn normalized;\n}\n\nfunction normalizeChunkingConfig(chunking?: ChunkingConfig): NativeExtractionConfig | undefined {\n\tif (!chunking) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"maxChars\", chunking.maxChars);\n\tsetIfDefined(normalized, \"maxOverlap\", chunking.maxOverlap);\n\tsetIfDefined(normalized, \"preset\", chunking.preset);\n\tsetIfDefined(normalized, \"embedding\", chunking.embedding);\n\tsetIfDefined(normalized, \"enabled\", chunking.enabled);\n\treturn normalized;\n}\n\nfunction normalizeImageExtractionConfig(images?: ImageExtractionConfig): NativeExtractionConfig | undefined {\n\tif (!images) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"extractImages\", images.extractImages);\n\tsetIfDefined(normalized, \"targetDpi\", images.targetDpi);\n\tsetIfDefined(normalized, \"maxImageDimension\", images.maxImageDimension);\n\tsetIfDefined(normalized, \"autoAdjustDpi\", images.autoAdjustDpi);\n\tsetIfDefined(normalized, \"minDpi\", images.minDpi);\n\tsetIfDefined(normalized, \"maxDpi\", images.maxDpi);\n\treturn normalized;\n}\n\nfunction normalizePdfConfig(pdf?: PdfConfig): NativeExtractionConfig | undefined {\n\tif (!pdf) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"extractImages\", pdf.extractImages);\n\tsetIfDefined(normalized, \"passwords\", pdf.passwords);\n\tsetIfDefined(normalized, \"extractMetadata\", pdf.extractMetadata);\n\treturn normalized;\n}\n\nfunction normalizeTokenReductionConfig(tokenReduction?: TokenReductionConfig): NativeExtractionConfig | undefined {\n\tif (!tokenReduction) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"mode\", tokenReduction.mode);\n\tsetIfDefined(normalized, \"preserveImportantWords\", tokenReduction.preserveImportantWords);\n\treturn normalized;\n}\n\nfunction normalizeLanguageDetectionConfig(\n\tlanguageDetection?: LanguageDetectionConfig,\n): NativeExtractionConfig | undefined {\n\tif (!languageDetection) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"enabled\", languageDetection.enabled);\n\tsetIfDefined(normalized, \"minConfidence\", languageDetection.minConfidence);\n\tsetIfDefined(normalized, \"detectMultiple\", languageDetection.detectMultiple);\n\treturn normalized;\n}\n\nfunction normalizePostProcessorConfig(postprocessor?: PostProcessorConfig): NativeExtractionConfig | undefined {\n\tif (!postprocessor) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"enabled\", postprocessor.enabled);\n\tsetIfDefined(normalized, \"enabledProcessors\", postprocessor.enabledProcessors);\n\tsetIfDefined(normalized, \"disabledProcessors\", postprocessor.disabledProcessors);\n\treturn normalized;\n}\n\nfunction normalizeHtmlPreprocessing(options?: HtmlPreprocessingOptions): NativeExtractionConfig | undefined {\n\tif (!options) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"enabled\", options.enabled);\n\tsetIfDefined(normalized, \"preset\", options.preset);\n\tsetIfDefined(normalized, \"removeNavigation\", options.removeNavigation);\n\tsetIfDefined(normalized, \"removeForms\", options.removeForms);\n\treturn normalized;\n}\n\nfunction normalizeHtmlOptions(options?: HtmlConversionOptions): NativeExtractionConfig | undefined {\n\tif (!options) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"headingStyle\", options.headingStyle);\n\tsetIfDefined(normalized, \"listIndentType\", options.listIndentType);\n\tsetIfDefined(normalized, \"listIndentWidth\", options.listIndentWidth);\n\tsetIfDefined(normalized, \"bullets\", options.bullets);\n\tsetIfDefined(normalized, \"strongEmSymbol\", options.strongEmSymbol);\n\tsetIfDefined(normalized, \"escapeAsterisks\", options.escapeAsterisks);\n\tsetIfDefined(normalized, \"escapeUnderscores\", options.escapeUnderscores);\n\tsetIfDefined(normalized, \"escapeMisc\", options.escapeMisc);\n\tsetIfDefined(normalized, \"escapeAscii\", options.escapeAscii);\n\tsetIfDefined(normalized, \"codeLanguage\", options.codeLanguage);\n\tsetIfDefined(normalized, \"autolinks\", options.autolinks);\n\tsetIfDefined(normalized, \"defaultTitle\", options.defaultTitle);\n\tsetIfDefined(normalized, \"brInTables\", options.brInTables);\n\tsetIfDefined(normalized, \"hocrSpatialTables\", options.hocrSpatialTables);\n\tsetIfDefined(normalized, \"highlightStyle\", options.highlightStyle);\n\tsetIfDefined(normalized, \"extractMetadata\", options.extractMetadata);\n\tsetIfDefined(normalized, \"whitespaceMode\", options.whitespaceMode);\n\tsetIfDefined(normalized, \"stripNewlines\", options.stripNewlines);\n\tsetIfDefined(normalized, \"wrap\", options.wrap);\n\tsetIfDefined(normalized, \"wrapWidth\", options.wrapWidth);\n\tsetIfDefined(normalized, \"convertAsInline\", options.convertAsInline);\n\tsetIfDefined(normalized, \"subSymbol\", options.subSymbol);\n\tsetIfDefined(normalized, \"supSymbol\", options.supSymbol);\n\tsetIfDefined(normalized, \"newlineStyle\", options.newlineStyle);\n\tsetIfDefined(normalized, \"codeBlockStyle\", options.codeBlockStyle);\n\tsetIfDefined(normalized, \"keepInlineImagesIn\", options.keepInlineImagesIn);\n\tsetIfDefined(normalized, \"encoding\", options.encoding);\n\tsetIfDefined(normalized, \"debug\", options.debug);\n\tsetIfDefined(normalized, \"stripTags\", options.stripTags);\n\tsetIfDefined(normalized, \"preserveTags\", options.preserveTags);\n\n\tconst preprocessing = normalizeHtmlPreprocessing(options.preprocessing);\n\tsetIfDefined(normalized, \"preprocessing\", preprocessing);\n\n\treturn normalized;\n}\n\nfunction normalizeKeywordConfig(config?: KeywordConfig): NativeExtractionConfig | undefined {\n\tif (!config) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"algorithm\", config.algorithm);\n\tsetIfDefined(normalized, \"maxKeywords\", config.maxKeywords);\n\tsetIfDefined(normalized, \"minScore\", config.minScore);\n\tsetIfDefined(normalized, \"ngramRange\", config.ngramRange);\n\tsetIfDefined(normalized, \"language\", config.language);\n\tsetIfDefined(normalized, \"yakeParams\", config.yakeParams);\n\tsetIfDefined(normalized, \"rakeParams\", config.rakeParams);\n\treturn normalized;\n}\n\nfunction normalizePageConfig(pages?: PageConfig): NativeExtractionConfig | undefined {\n\tif (!pages) {\n\t\treturn undefined;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"extract_pages\", pages.extractPages);\n\tsetIfDefined(normalized, \"insert_page_markers\", pages.insertPageMarkers);\n\tsetIfDefined(normalized, \"marker_format\", pages.markerFormat);\n\treturn normalized;\n}\n\nfunction normalizeExtractionConfig(config: ExtractionConfigType | null): NativeExtractionConfig | null {\n\tif (!config) {\n\t\treturn null;\n\t}\n\n\tconst normalized: NativeExtractionConfig = {};\n\tsetIfDefined(normalized, \"useCache\", config.useCache);\n\tsetIfDefined(normalized, \"enableQualityProcessing\", config.enableQualityProcessing);\n\tsetIfDefined(normalized, \"forceOcr\", config.forceOcr);\n\tsetIfDefined(normalized, \"maxConcurrentExtractions\", config.maxConcurrentExtractions);\n\n\tconst ocr = normalizeOcrConfig(config.ocr);\n\tsetIfDefined(normalized, \"ocr\", ocr);\n\n\tconst chunking = normalizeChunkingConfig(config.chunking);\n\tsetIfDefined(normalized, \"chunking\", chunking);\n\n\tconst images = normalizeImageExtractionConfig(config.images);\n\tsetIfDefined(normalized, \"images\", images);\n\n\tconst pdf = normalizePdfConfig(config.pdfOptions);\n\tsetIfDefined(normalized, \"pdfOptions\", pdf);\n\n\tconst tokenReduction = normalizeTokenReductionConfig(config.tokenReduction);\n\tsetIfDefined(normalized, \"tokenReduction\", tokenReduction);\n\n\tconst languageDetection = normalizeLanguageDetectionConfig(config.languageDetection);\n\tsetIfDefined(normalized, \"languageDetection\", languageDetection);\n\n\tconst postprocessor = normalizePostProcessorConfig(config.postprocessor);\n\tsetIfDefined(normalized, \"postprocessor\", postprocessor);\n\n\tconst keywords = normalizeKeywordConfig(config.keywords);\n\tsetIfDefined(normalized, \"keywords\", keywords);\n\n\tconst pages = normalizePageConfig(config.pages);\n\tsetIfDefined(normalized, \"pages\", pages);\n\n\tconst htmlOptions = normalizeHtmlOptions(config.htmlOptions);\n\tsetIfDefined(normalized, \"htmlOptions\", htmlOptions);\n\n\treturn normalized;\n}\n\n/**\n * Extract content from a single file (synchronous).\n *\n * **Usage Note**: For processing multiple files, prefer `batchExtractFilesSync()` which\n * provides better performance and memory management.\n *\n * @param filePath - Path to the file to extract (string). Can be absolute or relative.\n * @param mimeType - Optional MIME type hint for format detection. If null, MIME type is auto-detected from file extension or content.\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns ExtractionResult containing extracted content, metadata, tables, and optional chunks/images\n * @throws {Error} If file doesn't exist, cannot be accessed, or cannot be read\n * @throws {ParsingError} When document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { extractFileSync } from '@kreuzberg/node';\n *\n * // Basic usage\n * const result = extractFileSync('document.pdf');\n * console.log(result.content);\n *\n * // With OCR configuration\n * const config = {\n * ocr: {\n * backend: 'tesseract',\n * language: 'eng',\n * tesseractConfig: {\n * psm: 6,\n * enableTableDetection: true,\n * },\n * },\n * };\n * const result2 = extractFileSync('scanned.pdf', null, config);\n * ```\n */\nexport function extractFileSync(\n\tfilePath: string,\n\tmimeType: string | null = null,\n\tconfig: ExtractionConfigType | null = null,\n): ExtractionResult {\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResult = getBinding().extractFileSync(filePath, mimeType, normalizedConfig);\n\treturn convertResult(rawResult);\n}\n\n/**\n * Extract content from a single file (asynchronous).\n *\n * **Usage Note**: For processing multiple files, prefer `batchExtractFiles()` which\n * provides better performance and memory management.\n *\n * @param filePath - Path to the file to extract (string). Can be absolute or relative.\n * @param mimeType - Optional MIME type hint for format detection. If null, MIME type is auto-detected from file extension or content.\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Promise<ExtractionResult> containing extracted content, metadata, tables, and optional chunks/images\n * @throws {Error} If file doesn't exist, cannot be accessed, or cannot be read\n * @throws {ParsingError} When document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { extractFile } from '@kreuzberg/node';\n *\n * // Basic usage\n * const result = await extractFile('document.pdf');\n * console.log(result.content);\n *\n * // With chunking enabled\n * const config = {\n * chunking: {\n * maxChars: 1000,\n * maxOverlap: 200,\n * },\n * };\n * const result2 = await extractFile('long_document.pdf', null, config);\n * console.log(result2.chunks); // Array of text chunks\n * ```\n */\nexport async function extractFile(\n\tfilePath: string,\n\tmimeType: string | null = null,\n\tconfig: ExtractionConfigType | null = null,\n): Promise<ExtractionResult> {\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResult = await getBinding().extractFile(filePath, mimeType, normalizedConfig);\n\treturn convertResult(rawResult);\n}\n\n/**\n * Extract content from raw bytes (synchronous).\n *\n * **Usage Note**: For processing multiple byte arrays, prefer `batchExtractBytesSync()`\n * which provides better performance and memory management.\n *\n * @param data - File content as Uint8Array (Buffer will be converted)\n * @param mimeType - MIME type of the data (required for accurate format detection). Must be a valid MIME type string.\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns ExtractionResult containing extracted content, metadata, tables, and optional chunks/images\n * @throws {TypeError} When data is not a valid Uint8Array\n * @throws {Error} When file cannot be read or parsed\n * @throws {ParsingError} When document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { extractBytesSync } from '@kreuzberg/node';\n * import { readFileSync } from 'fs';\n *\n * const data = readFileSync('document.pdf');\n * const result = extractBytesSync(data, 'application/pdf');\n * console.log(result.content);\n * ```\n */\nexport function extractBytesSync(\n\tdata: Uint8Array,\n\tmimeType: string,\n\tconfig: ExtractionConfigType | null = null,\n): ExtractionResult {\n\tconst validated = assertUint8Array(data, \"data\");\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResult = getBinding().extractBytesSync(Buffer.from(validated), mimeType, normalizedConfig);\n\treturn convertResult(rawResult);\n}\n\n/**\n * Extract content from raw bytes (asynchronous).\n *\n * **Usage Note**: For processing multiple byte arrays, prefer `batchExtractBytes()`\n * which provides better performance and memory management.\n *\n * @param data - File content as Uint8Array (Buffer will be converted)\n * @param mimeType - MIME type of the data (required for accurate format detection). Must be a valid MIME type string.\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Promise<ExtractionResult> containing extracted content, metadata, tables, and optional chunks/images\n * @throws {TypeError} When data is not a valid Uint8Array\n * @throws {Error} When file cannot be read or parsed\n * @throws {ParsingError} When document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { extractBytes } from '@kreuzberg/node';\n * import { readFile } from 'fs/promises';\n *\n * const data = await readFile('document.pdf');\n * const result = await extractBytes(data, 'application/pdf');\n * console.log(result.content);\n * ```\n */\nexport async function extractBytes(\n\tdata: Uint8Array,\n\tmimeType: string,\n\tconfig: ExtractionConfigType | null = null,\n): Promise<ExtractionResult> {\n\tconst validated = assertUint8Array(data, \"data\");\n\t// biome-ignore lint/complexity/useLiteralKeys: required for environment variable access\n\tif (process.env[\"KREUZBERG_DEBUG_GUTEN\"] === \"1\") {\n\t\tconsole.log(\"[TypeScript] Debug input header:\", Array.from(validated.slice(0, 8)));\n\t}\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResult = await getBinding().extractBytes(Buffer.from(validated), mimeType, normalizedConfig);\n\treturn convertResult(rawResult);\n}\n\n/**\n * Extract content from multiple files in parallel (synchronous).\n *\n * **Recommended for**: Processing multiple documents efficiently with better\n * performance and memory management compared to individual `extractFileSync()` calls.\n *\n * **Benefits**:\n * - Parallel processing in Rust for maximum performance\n * - Optimized memory usage across all extractions\n * - More reliable for batch document processing\n *\n * @param paths - List of file paths to extract (absolute or relative paths)\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Array of ExtractionResults (one per file, in same order as input)\n * @throws {Error} If any file cannot be read or parsed\n * @throws {ParsingError} When any document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When any extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { batchExtractFilesSync } from '@kreuzberg/node';\n *\n * const files = ['doc1.pdf', 'doc2.docx', 'doc3.xlsx'];\n * const results = batchExtractFilesSync(files);\n *\n * results.forEach((result, i) => {\n * console.log(`File ${files[i]}: ${result.content.substring(0, 100)}...`);\n * });\n * ```\n */\nexport function batchExtractFilesSync(paths: string[], config: ExtractionConfigType | null = null): ExtractionResult[] {\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResults = getBinding().batchExtractFilesSync(paths, normalizedConfig);\n\treturn rawResults.map(convertResult);\n}\n\n/**\n * Extract content from multiple files in parallel (asynchronous).\n *\n * **Recommended for**: Processing multiple documents efficiently with better\n * performance and memory management compared to individual `extractFile()` calls.\n *\n * **Benefits**:\n * - Parallel processing in Rust for maximum performance\n * - Optimized memory usage across all extractions\n * - More reliable for batch document processing\n *\n * @param paths - List of file paths to extract (absolute or relative paths)\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Promise resolving to array of ExtractionResults (one per file, in same order as input)\n * @throws {Error} If any file cannot be read or parsed\n * @throws {ParsingError} When any document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When any extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { batchExtractFiles } from '@kreuzberg/node';\n *\n * const files = ['invoice1.pdf', 'invoice2.pdf', 'invoice3.pdf'];\n * const results = await batchExtractFiles(files, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n *\n * // Process all results\n * const totalAmount = results\n * .map(r => extractAmount(r.content))\n * .reduce((a, b) => a + b, 0);\n * ```\n */\nexport async function batchExtractFiles(\n\tpaths: string[],\n\tconfig: ExtractionConfigType | null = null,\n): Promise<ExtractionResult[]> {\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResults = await getBinding().batchExtractFiles(paths, normalizedConfig);\n\treturn rawResults.map(convertResult);\n}\n\n/**\n * Extract content from multiple byte arrays in parallel (synchronous).\n *\n * **Recommended for**: Processing multiple documents from memory efficiently with better\n * performance and memory management compared to individual `extractBytesSync()` calls.\n *\n * **Benefits**:\n * - Parallel processing in Rust for maximum performance\n * - Optimized memory usage across all extractions\n * - More reliable for batch document processing\n *\n * @param dataList - List of file contents as Uint8Arrays (must be same length as mimeTypes)\n * @param mimeTypes - List of MIME types (one per data item, required for accurate format detection)\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Array of ExtractionResults (one per data item, in same order as input)\n * @throws {TypeError} When dataList contains non-Uint8Array items or length mismatch with mimeTypes\n * @throws {Error} If any data cannot be read or parsed\n * @throws {ParsingError} When any document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When any extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { batchExtractBytesSync } from '@kreuzberg/node';\n * import { readFileSync } from 'fs';\n *\n * const files = ['doc1.pdf', 'doc2.docx', 'doc3.xlsx'];\n * const dataList = files.map(f => readFileSync(f));\n * const mimeTypes = ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'];\n *\n * const results = batchExtractBytesSync(dataList, mimeTypes);\n * results.forEach((result, i) => {\n * console.log(`File ${files[i]}: ${result.content.substring(0, 100)}...`);\n * });\n * ```\n */\nexport function batchExtractBytesSync(\n\tdataList: Uint8Array[],\n\tmimeTypes: string[],\n\tconfig: ExtractionConfigType | null = null,\n): ExtractionResult[] {\n\tconst buffers = assertUint8ArrayList(dataList, \"dataList\").map((data) => Buffer.from(data));\n\n\tif (buffers.length !== mimeTypes.length) {\n\t\tthrow new TypeError(\"dataList and mimeTypes must have the same length\");\n\t}\n\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResults = getBinding().batchExtractBytesSync(buffers, mimeTypes, normalizedConfig);\n\treturn rawResults.map(convertResult);\n}\n\n/**\n * Extract content from multiple byte arrays in parallel (asynchronous).\n *\n * **Recommended for**: Processing multiple documents from memory efficiently with better\n * performance and memory management compared to individual `extractBytes()` calls.\n *\n * **Benefits**:\n * - Parallel processing in Rust for maximum performance\n * - Optimized memory usage across all extractions\n * - More reliable for batch document processing\n *\n * @param dataList - List of file contents as Uint8Arrays (must be same length as mimeTypes)\n * @param mimeTypes - List of MIME types (one per data item, required for accurate format detection)\n * @param config - Extraction configuration object. If null, uses default extraction settings.\n * @returns Promise resolving to array of ExtractionResults (one per data item, in same order as input)\n * @throws {TypeError} When dataList contains non-Uint8Array items or length mismatch with mimeTypes\n * @throws {Error} If any data cannot be read or parsed\n * @throws {ParsingError} When any document format is invalid or corrupted\n * @throws {OcrError} When OCR processing fails (if OCR is enabled)\n * @throws {ValidationError} When any extraction result fails validation (if validators registered)\n * @throws {KreuzbergError} For other extraction-related failures\n *\n * @example\n * ```typescript\n * import { batchExtractBytes } from '@kreuzberg/node';\n * import { readFile } from 'fs/promises';\n *\n * const files = ['invoice1.pdf', 'invoice2.pdf', 'invoice3.pdf'];\n * const dataList = await Promise.all(files.map(f => readFile(f)));\n * const mimeTypes = files.map(() => 'application/pdf');\n *\n * const results = await batchExtractBytes(dataList, mimeTypes, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n *\n * // Process all results\n * const totalAmount = results\n * .map(r => extractAmount(r.content))\n * .reduce((a, b) => a + b, 0);\n * ```\n */\nexport async function batchExtractBytes(\n\tdataList: Uint8Array[],\n\tmimeTypes: string[],\n\tconfig: ExtractionConfigType | null = null,\n): Promise<ExtractionResult[]> {\n\tconst buffers = assertUint8ArrayList(dataList, \"dataList\").map((data) => Buffer.from(data));\n\n\tif (buffers.length !== mimeTypes.length) {\n\t\tthrow new TypeError(\"dataList and mimeTypes must have the same length\");\n\t}\n\n\tconst normalizedConfig = normalizeExtractionConfig(config);\n\tconst rawResults = await getBinding().batchExtractBytes(buffers, mimeTypes, normalizedConfig);\n\treturn rawResults.map(convertResult);\n}\n\n/**\n * Register a custom postprocessor.\n *\n * **IMPORTANT**: Custom processors only work with **async extraction functions**:\n * - ✅ `extractFile()`, `extractBytes()`, `batchExtractFiles()`, `batchExtractBytes()`\n * - ❌ `extractFileSync()`, `extractBytesSync()`, etc. (will skip custom processors)\n *\n * This limitation exists because sync extraction blocks the Node.js event loop,\n * preventing JavaScript callbacks from executing. For v4.0, use async extraction\n * when you need custom processors.\n *\n * @param processor - PostProcessorProtocol implementation with name(), process(), and optional processingStage()\n * @throws {Error} If processor is missing required methods (name or process)\n * @throws {Error} If processor name is empty string\n * @throws {Error} If a processor with the same name is already registered\n *\n * @example\n * ```typescript\n * import { registerPostProcessor, extractFile, ExtractionResult } from '@kreuzberg/node';\n *\n * class MyProcessor implements PostProcessorProtocol {\n * name(): string {\n * return 'my_processor';\n * }\n *\n * process(result: ExtractionResult): ExtractionResult {\n * result.metadata.customField = 'custom_value';\n * return result;\n * }\n *\n * processingStage(): 'early' | 'middle' | 'late' {\n * return 'middle';\n * }\n * }\n *\n * registerPostProcessor(new MyProcessor());\n *\n * // Use async extraction (required for custom processors)\n * const result = await extractFile('document.pdf');\n * console.log(result.metadata.customField); // 'custom_value'\n * ```\n */\nexport function registerPostProcessor(processor: PostProcessorProtocol): void {\n\tconst binding = getBinding();\n\n\tconst wrappedProcessor = {\n\t\tname: processor.name.bind(processor),\n\t\tprocessingStage: processor.processingStage?.bind(processor),\n\t\tasync process(...args: unknown[]): Promise<string> {\n\t\t\tconst wrappedValue = args[0] as unknown[];\n\t\t\tconst jsonString = wrappedValue[0] as string;\n\n\t\t\tconst wireResult = JSON.parse(jsonString) as {\n\t\t\t\tcontent: string;\n\t\t\t\tmime_type: string;\n\t\t\t\tmetadata: string | Record<string, unknown>;\n\t\t\t\ttables?: unknown[];\n\t\t\t\tdetected_languages?: string[];\n\t\t\t\tchunks?: unknown[];\n\t\t\t\timages?: unknown[];\n\t\t\t};\n\n\t\t\tconst result: ExtractionResult = {\n\t\t\t\tcontent: wireResult.content,\n\t\t\t\tmimeType: wireResult.mime_type,\n\t\t\t\tmetadata: typeof wireResult.metadata === \"string\" ? JSON.parse(wireResult.metadata) : wireResult.metadata,\n\t\t\t\ttables: (wireResult.tables || []) as Table[],\n\t\t\t\tdetectedLanguages: wireResult.detected_languages ?? null,\n\t\t\t\tchunks: (wireResult.chunks as Chunk[] | null | undefined) ?? null,\n\t\t\t\timages: (wireResult.images as ExtractedImage[] | null | undefined) ?? null,\n\t\t\t};\n\n\t\t\tconst updated = await processor.process(result);\n\n\t\t\tconst wireUpdated = {\n\t\t\t\tcontent: updated.content,\n\t\t\t\tmime_type: updated.mimeType,\n\t\t\t\tmetadata: updated.metadata,\n\t\t\t\ttables: updated.tables,\n\t\t\t\tdetected_languages: updated.detectedLanguages,\n\t\t\t\tchunks: updated.chunks,\n\t\t\t\timages: updated.images,\n\t\t\t};\n\n\t\t\treturn JSON.stringify(wireUpdated);\n\t\t},\n\t};\n\n\tObject.defineProperty(wrappedProcessor, \"__original\", {\n\t\tvalue: processor,\n\t\tenumerable: false,\n\t});\n\n\tconst stage = processor.processingStage?.() ?? \"middle\";\n\tObject.defineProperty(wrappedProcessor, \"__stage\", {\n\t\tvalue: stage,\n\t\tenumerable: false,\n\t});\n\n\tbinding.registerPostProcessor(wrappedProcessor);\n}\n\n/**\n * Unregister a postprocessor by name.\n *\n * Removes a previously registered postprocessor from the registry.\n * If the processor doesn't exist, this is a no-op (does not throw).\n *\n * @param name - Name of the processor to unregister (case-sensitive)\n *\n * @example\n * ```typescript\n * import { unregisterPostProcessor } from '@kreuzberg/node';\n *\n * unregisterPostProcessor('my_processor');\n * ```\n */\nexport function unregisterPostProcessor(name: string): void {\n\tconst binding = getBinding();\n\tbinding.unregisterPostProcessor(name);\n}\n\n/**\n * Clear all registered postprocessors.\n *\n * Removes all postprocessors from the registry. Useful for test cleanup or resetting state.\n * If no postprocessors are registered, this is a no-op.\n *\n * @example\n * ```typescript\n * import { clearPostProcessors } from '@kreuzberg/node';\n *\n * clearPostProcessors();\n * ```\n */\nexport function clearPostProcessors(): void {\n\tconst binding = getBinding();\n\tbinding.clearPostProcessors();\n}\n\n/**\n * List all registered post-processors.\n *\n * Returns the names of all currently registered post-processors (both built-in and custom).\n *\n * @returns Array of post-processor names (empty array if none registered)\n *\n * @example\n * ```typescript\n * import { listPostProcessors } from '@kreuzberg/node';\n *\n * const names = listPostProcessors();\n * console.log('Registered post-processors:', names);\n * ```\n */\nexport function listPostProcessors(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listPostProcessors();\n}\n\n/**\n * Register a custom validator.\n *\n * Validators check extraction results for quality, completeness, or correctness.\n * Unlike post-processors, validator errors **fail fast** - if a validator throws an error,\n * the extraction fails immediately.\n *\n * @param validator - ValidatorProtocol implementation with name(), validate(), and optional priority()/shouldValidate()\n * @throws {Error} If validator is missing required methods (name or validate)\n * @throws {Error} If validator name is empty string\n * @throws {Error} If a validator with the same name is already registered\n *\n * @example\n * ```typescript\n * import { registerValidator } from '@kreuzberg/node';\n *\n * class MinLengthValidator implements ValidatorProtocol {\n * name(): string {\n * return 'min_length_validator';\n * }\n *\n * priority(): number {\n * return 100; // Run early\n * }\n *\n * validate(result: ExtractionResult): void {\n * if (result.content.length < 100) {\n * throw new Error('Content too short: minimum 100 characters required');\n * }\n * }\n * }\n *\n * registerValidator(new MinLengthValidator());\n * ```\n */\nexport function registerValidator(validator: ValidatorProtocol): void {\n\tconst binding = getBinding();\n\n\tconst wrappedValidator = {\n\t\tname: validator.name.bind(validator),\n\t\tpriority: validator.priority?.bind(validator),\n\t\tasync validate(...args: unknown[]): Promise<string> {\n\t\t\tconst jsonString = args[0] as string;\n\n\t\t\tif (!jsonString || jsonString === \"undefined\") {\n\t\t\t\tthrow new Error(\"Validator received invalid JSON string\");\n\t\t\t}\n\n\t\t\tconst wireResult = JSON.parse(jsonString);\n\t\t\tconst result: ExtractionResult = {\n\t\t\t\tcontent: wireResult.content,\n\t\t\t\tmimeType: wireResult.mime_type,\n\t\t\t\tmetadata: typeof wireResult.metadata === \"string\" ? JSON.parse(wireResult.metadata) : wireResult.metadata,\n\t\t\t\ttables: wireResult.tables || [],\n\t\t\t\tdetectedLanguages: wireResult.detected_languages,\n\t\t\t\tchunks: wireResult.chunks,\n\t\t\t\timages: wireResult.images ?? null,\n\t\t\t};\n\n\t\t\tawait Promise.resolve(validator.validate(result));\n\t\t\treturn \"\";\n\t\t},\n\t};\n\n\tbinding.registerValidator(wrappedValidator);\n}\n\n/**\n * Unregister a validator by name.\n *\n * Removes a previously registered validator from the global registry.\n * If the validator doesn't exist, this is a no-op (does not throw).\n *\n * @param name - Validator name to unregister (case-sensitive)\n *\n * @example\n * ```typescript\n * import { unregisterValidator } from '@kreuzberg/node';\n *\n * unregisterValidator('min_length_validator');\n * ```\n */\nexport function unregisterValidator(name: string): void {\n\tconst binding = getBinding();\n\tbinding.unregisterValidator(name);\n}\n\n/**\n * Clear all registered validators.\n *\n * Removes all validators from the global registry. Useful for test cleanup\n * or resetting state.\n *\n * @example\n * ```typescript\n * import { clearValidators } from '@kreuzberg/node';\n *\n * clearValidators();\n * ```\n */\nexport function clearValidators(): void {\n\tconst binding = getBinding();\n\tbinding.clearValidators();\n}\n\n/**\n * List all registered validators.\n *\n * Returns the names of all currently registered validators (both built-in and custom).\n *\n * @returns Array of validator names (empty array if none registered)\n *\n * @example\n * ```typescript\n * import { listValidators } from '@kreuzberg/node';\n *\n * const names = listValidators();\n * console.log('Registered validators:', names);\n * ```\n */\nexport function listValidators(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listValidators();\n}\n\n/**\n * Register a custom OCR backend.\n *\n * This function registers a JavaScript OCR backend that will be used by Kreuzberg's\n * extraction pipeline when OCR is enabled. The backend must implement the\n * {@link OcrBackendProtocol} interface.\n *\n * ## Usage\n *\n * 1. Create a class implementing {@link OcrBackendProtocol}\n * 2. Call `initialize()` on your backend instance (if needed)\n * 3. Register the backend with `registerOcrBackend()`\n * 4. Use the backend name in extraction config\n *\n * ## Thread Safety\n *\n * The registered backend must be thread-safe as it may be called concurrently\n * from multiple Rust async tasks. Ensure your implementation handles concurrent\n * calls properly.\n *\n * @param backend - OcrBackendProtocol implementation with name(), supportedLanguages(), and processImage()\n * @throws {Error} If backend is missing required methods (name, supportedLanguages, or processImage)\n * @throws {Error} If backend name is empty string or contains invalid characters\n * @throws {Error} If a backend with the same name is already registered\n * @throws {Error} If registration fails due to FFI issues\n *\n * @example\n * ```typescript\n * import { GutenOcrBackend } from '@kreuzberg/node/ocr/guten-ocr';\n * import { registerOcrBackend, extractFile } from '@kreuzberg/node';\n *\n * // Create and initialize backend\n * const backend = new GutenOcrBackend();\n * await backend.initialize();\n *\n * // Register with Kreuzberg\n * registerOcrBackend(backend);\n *\n * // Use in extraction\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'guten-ocr', language: 'en' }\n * });\n * console.log(result.content);\n * ```\n *\n * @example\n * ```typescript\n * // Custom OCR backend implementation\n * class MyOcrBackend implements OcrBackendProtocol {\n * name(): string {\n * return 'my-ocr';\n * }\n *\n * supportedLanguages(): string[] {\n * return ['en', 'de', 'fr'];\n * }\n *\n * async processImage(imageBytes: Uint8Array, language: string) {\n * const text = await myCustomOcrEngine(imageBytes, language);\n * return {\n * content: text,\n * mime_type: 'text/plain',\n * metadata: { confidence: 0.95, language },\n * tables: []\n * };\n * }\n * }\n *\n * registerOcrBackend(new MyOcrBackend());\n * ```\n */\ntype OcrProcessPayload = Buffer | string;\ntype OcrProcessTuple = [OcrProcessPayload, string];\ntype NestedOcrProcessTuple = [OcrProcessTuple];\n\nfunction isOcrProcessTuple(value: unknown): value is OcrProcessTuple {\n\treturn (\n\t\tArray.isArray(value) &&\n\t\tvalue.length === 2 &&\n\t\ttypeof value[1] === \"string\" &&\n\t\t(typeof value[0] === \"string\" || Buffer.isBuffer(value[0]) || value[0] instanceof Uint8Array)\n\t);\n}\n\nfunction isNestedOcrProcessTuple(value: unknown): value is NestedOcrProcessTuple {\n\treturn Array.isArray(value) && value.length === 1 && isOcrProcessTuple(value[0]);\n}\n\nfunction describePayload(value: OcrProcessPayload) {\n\tif (typeof value === \"string\") {\n\t\treturn { ctor: \"String\", length: value.length };\n\t}\n\n\treturn { ctor: value.constructor?.name ?? \"Buffer\", length: value.length };\n}\n\nexport function registerOcrBackend(backend: OcrBackendProtocol): void {\n\tconst binding = getBinding();\n\n\tconst wrappedBackend = {\n\t\tname: backend.name.bind(backend),\n\t\tsupportedLanguages: backend.supportedLanguages.bind(backend),\n\t\tasync processImage(\n\t\t\t...processArgs: [OcrProcessPayload | OcrProcessTuple | NestedOcrProcessTuple, string?]\n\t\t): Promise<string> {\n\t\t\tconst [imagePayload, maybeLanguage] = processArgs;\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for environment variable access\n\t\t\tif (process.env[\"KREUZBERG_DEBUG_GUTEN\"] === \"1\") {\n\t\t\t\tconsole.log(\"[registerOcrBackend] JS arguments\", { length: processArgs.length });\n\t\t\t\tconsole.log(\"[registerOcrBackend] Raw args\", {\n\t\t\t\t\timagePayloadType: Array.isArray(imagePayload) ? \"tuple\" : typeof imagePayload,\n\t\t\t\t\tmaybeLanguageType: typeof maybeLanguage,\n\t\t\t\t\tmetadata: Array.isArray(imagePayload) ? { tupleLength: imagePayload.length } : describePayload(imagePayload),\n\t\t\t\t});\n\t\t\t}\n\n\t\t\tlet rawBytes: OcrProcessPayload;\n\t\t\tlet language = maybeLanguage;\n\n\t\t\tif (isNestedOcrProcessTuple(imagePayload)) {\n\t\t\t\t[rawBytes, language] = imagePayload[0];\n\t\t\t} else if (isOcrProcessTuple(imagePayload)) {\n\t\t\t\t[rawBytes, language] = imagePayload;\n\t\t\t} else {\n\t\t\t\trawBytes = imagePayload;\n\t\t\t}\n\n\t\t\tif (typeof language !== \"string\") {\n\t\t\t\tthrow new Error(\"OCR backend did not receive a language parameter\");\n\t\t\t}\n\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for environment variable access\n\t\t\tif (process.env[\"KREUZBERG_DEBUG_GUTEN\"] === \"1\") {\n\t\t\t\tconst length = typeof rawBytes === \"string\" ? rawBytes.length : rawBytes.length;\n\t\t\t\tconsole.log(\n\t\t\t\t\t\"[registerOcrBackend] Received payload\",\n\t\t\t\t\tArray.isArray(imagePayload) ? \"tuple\" : typeof rawBytes,\n\t\t\t\t\t\"ctor\",\n\t\t\t\t\tdescribePayload(rawBytes).ctor,\n\t\t\t\t\t\"length\",\n\t\t\t\t\tlength,\n\t\t\t\t);\n\t\t\t}\n\n\t\t\tconst buffer = typeof rawBytes === \"string\" ? Buffer.from(rawBytes, \"base64\") : Buffer.from(rawBytes);\n\t\t\tconst result = await backend.processImage(new Uint8Array(buffer), language);\n\n\t\t\treturn JSON.stringify(result);\n\t\t},\n\t};\n\n\tbinding.registerOcrBackend(wrappedBackend);\n}\n\n/**\n * List all registered OCR backends.\n *\n * Returns an array of names of all currently registered OCR backends,\n * including built-in backends like \"tesseract\".\n *\n * @returns Array of OCR backend names (empty array if none registered)\n *\n * @example\n * ```typescript\n * import { listOcrBackends } from '@kreuzberg/node';\n *\n * const backends = listOcrBackends();\n * console.log(backends); // ['tesseract', 'my-custom-backend', ...]\n * ```\n */\nexport function listOcrBackends(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listOcrBackends();\n}\n\n/**\n * Unregister an OCR backend by name.\n *\n * Removes the specified OCR backend from the registry. If the backend doesn't exist,\n * this operation is a no-op (does not throw an error).\n *\n * @param name - Name of the OCR backend to unregister\n *\n * @example\n * ```typescript\n * import { unregisterOcrBackend } from '@kreuzberg/node';\n *\n * // Unregister a custom backend\n * unregisterOcrBackend('my-custom-ocr');\n * ```\n */\nexport function unregisterOcrBackend(name: string): void {\n\tconst binding = getBinding();\n\tbinding.unregisterOcrBackend(name);\n}\n\n/**\n * Clear all registered OCR backends.\n *\n * Removes all OCR backends from the registry, including built-in backends.\n * Use with caution as this will make OCR functionality unavailable until\n * backends are re-registered. If no backends are registered, this is a no-op.\n *\n * @example\n * ```typescript\n * import { clearOcrBackends } from '@kreuzberg/node';\n *\n * clearOcrBackends();\n * ```\n */\nexport function clearOcrBackends(): void {\n\tconst binding = getBinding();\n\tbinding.clearOcrBackends();\n}\n\n/**\n * List all registered document extractors.\n *\n * Returns an array of names of all currently registered document extractors,\n * including built-in extractors for PDF, Office documents, images, etc.\n *\n * @returns Array of document extractor names (empty array if none registered)\n *\n * @example\n * ```typescript\n * import { listDocumentExtractors } from '@kreuzberg/node';\n *\n * const extractors = listDocumentExtractors();\n * console.log(extractors); // ['PDFExtractor', 'ImageExtractor', ...]\n * ```\n */\nexport function listDocumentExtractors(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listDocumentExtractors();\n}\n\n/**\n * Unregister a document extractor by name.\n *\n * Removes the specified document extractor from the registry. If the extractor\n * doesn't exist, this operation is a no-op (does not throw an error).\n *\n * @param name - Name of the document extractor to unregister\n *\n * @example\n * ```typescript\n * import { unregisterDocumentExtractor } from '@kreuzberg/node';\n *\n * // Unregister a custom extractor\n * unregisterDocumentExtractor('MyCustomExtractor');\n * ```\n */\nexport function unregisterDocumentExtractor(name: string): void {\n\tconst binding = getBinding();\n\tbinding.unregisterDocumentExtractor(name);\n}\n\n/**\n * Clear all registered document extractors.\n *\n * Removes all document extractors from the registry, including built-in extractors.\n * Use with caution as this will make document extraction unavailable until\n * extractors are re-registered.\n *\n * @example\n * ```typescript\n * import { clearDocumentExtractors } from '@kreuzberg/node';\n *\n * clearDocumentExtractors();\n * ```\n */\nexport function clearDocumentExtractors(): void {\n\tconst binding = getBinding();\n\tbinding.clearDocumentExtractors();\n}\n\n/**\n * ExtractionConfig namespace with static methods for loading configuration from files.\n *\n * Provides a factory method to load extraction configuration from TOML, YAML, or JSON files.\n * The file format is automatically detected based on the file extension.\n *\n * @example\n * ```typescript\n * import { ExtractionConfig, extractFile } from '@kreuzberg/node';\n *\n * // Load configuration from file\n * const config = ExtractionConfig.fromFile('config.toml');\n *\n * // Use with extraction\n * const result = await extractFile('document.pdf', null, config);\n * ```\n */\nexport const ExtractionConfig = {\n\t/**\n\t * Load extraction configuration from a file.\n\t *\n\t * Automatically detects the file format based on extension:\n\t * - `.toml` - TOML format\n\t * - `.yaml` - YAML format\n\t * - `.json` - JSON format\n\t *\n\t * @param filePath - Path to the configuration file (absolute or relative)\n\t * @returns ExtractionConfig object loaded from the file\n\t *\n\t * @throws {Error} If file does not exist or is not accessible\n\t * @throws {Error} If file content is not valid TOML/YAML/JSON\n\t * @throws {Error} If configuration structure is invalid\n\t * @throws {Error} If file extension is not supported\n\t *\n\t * @example\n\t * ```typescript\n\t * import { ExtractionConfig } from '@kreuzberg/node';\n\t *\n\t * // Load from TOML file\n\t * const config1 = ExtractionConfig.fromFile('kreuzberg.toml');\n\t *\n\t * // Load from YAML file\n\t * const config2 = ExtractionConfig.fromFile('./config.yaml');\n\t *\n\t * // Load from JSON file\n\t * const config3 = ExtractionConfig.fromFile('./config.json');\n\t * ```\n\t */\n\tfromFile(filePath: string): ExtractionConfigType {\n\t\tconst binding = getBinding();\n\t\treturn binding.loadExtractionConfigFromFile(filePath);\n\t},\n\n\t/**\n\t * Discover and load configuration from current or parent directories.\n\t *\n\t * Searches for a `kreuzberg.toml` file starting from the current working directory\n\t * and traversing up the directory tree. Returns the first configuration file found.\n\t *\n\t * @returns ExtractionConfig object if found, or null if no configuration file exists\n\t *\n\t * @example\n\t * ```typescript\n\t * import { ExtractionConfig } from '@kreuzberg/node';\n\t *\n\t * // Try to find config in current or parent directories\n\t * const config = ExtractionConfig.discover();\n\t * if (config) {\n\t * console.log('Found configuration');\n\t * // Use config for extraction\n\t * } else {\n\t * console.log('No configuration file found, using defaults');\n\t * }\n\t * ```\n\t */\n\tdiscover(): ExtractionConfigType | null {\n\t\tconst binding = getBinding();\n\t\treturn binding.discoverExtractionConfig();\n\t},\n};\n\n/**\n * Detect MIME type from raw bytes.\n *\n * Uses content inspection (magic bytes) to determine MIME type.\n * This is more accurate than extension-based detection but requires\n * reading the file content.\n *\n * @param bytes - Raw file content as Buffer\n * @returns The detected MIME type string\n *\n * @throws {Error} If MIME type cannot be determined from content\n *\n * @example\n * ```typescript\n * import { detectMimeType } from '@kreuzberg/node';\n * import * as fs from 'fs';\n *\n * // Read file content\n * const content = fs.readFileSync('document.pdf');\n *\n * // Detect MIME type from bytes\n * const mimeType = detectMimeType(content);\n * console.log(mimeType); // 'application/pdf'\n * ```\n */\nexport function detectMimeType(bytes: Buffer): string {\n\tconst binding = getBinding();\n\treturn binding.detectMimeTypeFromBytes(bytes);\n}\n\n/**\n * Detect MIME type from a file path.\n *\n * Uses file extension to determine MIME type. Falls back to `mime_guess` crate\n * if extension-based detection fails.\n *\n * @param path - Path to the file (string)\n * @returns The detected MIME type string\n *\n * @throws {Error} If MIME type cannot be determined from path/extension\n * @throws {Error} If extension is unknown\n *\n * @example\n * ```typescript\n * import { detectMimeTypeFromPath } from '@kreuzberg/node';\n *\n * // Detect from existing file\n * const mimeType = detectMimeTypeFromPath('document.pdf');\n * console.log(mimeType); // 'application/pdf'\n *\n * const mimeType2 = detectMimeTypeFromPath('document.docx');\n * console.log(mimeType2); // 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'\n * ```\n */\nexport function detectMimeTypeFromPath(path: string): string {\n\tconst binding = getBinding();\n\treturn binding.detectMimeTypeFromPath(path);\n}\n\n/**\n * Validate that a MIME type is supported by Kreuzberg.\n *\n * Checks if a MIME type is in the list of supported formats. Note that any\n * `image/*` MIME type is automatically considered valid.\n *\n * @param mimeType - The MIME type to validate (string)\n * @returns The validated MIME type (may be normalized)\n *\n * @throws {Error} If the MIME type is not supported\n *\n * @example\n * ```typescript\n * import { validateMimeType } from '@kreuzberg/node';\n *\n * // Validate supported type\n * const validated = validateMimeType('application/pdf');\n * console.log(validated); // 'application/pdf'\n *\n * // Validate custom image type\n * const validated2 = validateMimeType('image/custom-format');\n * console.log(validated2); // 'image/custom-format' (any image/* is valid)\n *\n * // Validate unsupported type (throws error)\n * try {\n * validateMimeType('video/mp4');\n * } catch (err) {\n * console.error(err); // Error: Unsupported format: video/mp4\n * }\n * ```\n */\nexport function validateMimeType(mimeType: string): string {\n\tconst binding = getBinding();\n\treturn binding.validateMimeType(mimeType);\n}\n\n/**\n * Get file extensions for a given MIME type.\n *\n * Returns an array of file extensions commonly associated with the specified\n * MIME type. For example, 'application/pdf' returns ['pdf'].\n *\n * @param mimeType - The MIME type to look up (e.g., 'application/pdf', 'image/jpeg')\n * @returns Array of file extensions (without leading dots)\n *\n * @throws {Error} If the MIME type is not recognized or supported\n *\n * @example\n * ```typescript\n * import { getExtensionsForMime } from '@kreuzberg/node';\n *\n * // Get extensions for PDF\n * const pdfExts = getExtensionsForMime('application/pdf');\n * console.log(pdfExts); // ['pdf']\n *\n * // Get extensions for JPEG\n * const jpegExts = getExtensionsForMime('image/jpeg');\n * console.log(jpegExts); // ['jpg', 'jpeg']\n * ```\n */\nexport function getExtensionsForMime(mimeType: string): string[] {\n\tconst binding = getBinding();\n\treturn binding.getExtensionsForMime(mimeType);\n}\n\n/**\n * Embedding preset configuration.\n *\n * Contains all settings for a specific embedding model preset.\n */\nexport interface EmbeddingPreset {\n\t/** Name of the preset (e.g., \"fast\", \"balanced\", \"quality\", \"multilingual\") */\n\tname: string;\n\t/** Recommended chunk size in characters */\n\tchunkSize: number;\n\t/** Recommended overlap in characters */\n\toverlap: number;\n\t/** Model identifier (e.g., \"AllMiniLML6V2Q\", \"BGEBaseENV15\") */\n\tmodelName: string;\n\t/** Embedding vector dimensions */\n\tdimensions: number;\n\t/** Human-readable description of the preset */\n\tdescription: string;\n}\n\n/**\n * List all available embedding preset names.\n *\n * Returns an array of preset names that can be used with `getEmbeddingPreset`.\n *\n * @returns Array of 4 preset names: [\"fast\", \"balanced\", \"quality\", \"multilingual\"]\n *\n * @example\n * ```typescript\n * import { listEmbeddingPresets } from '@kreuzberg/node';\n *\n * const presets = listEmbeddingPresets();\n * console.log(presets); // ['fast', 'balanced', 'quality', 'multilingual']\n * ```\n */\nexport function listEmbeddingPresets(): string[] {\n\tconst binding = getBinding();\n\treturn binding.listEmbeddingPresets();\n}\n\n/**\n * Get a specific embedding preset by name.\n *\n * Returns a preset configuration object, or null if the preset name is not found.\n *\n * @param name - The preset name (case-sensitive)\n * @returns An `EmbeddingPreset` object or `null` if not found\n *\n * @example\n * ```typescript\n * import { getEmbeddingPreset } from '@kreuzberg/node';\n *\n * const preset = getEmbeddingPreset('balanced');\n * if (preset) {\n * console.log(`Model: ${preset.modelName}, Dims: ${preset.dimensions}`);\n * // Model: BGEBaseENV15, Dims: 768\n * }\n * ```\n */\nexport function getEmbeddingPreset(name: string): EmbeddingPreset | null {\n\tconst binding = getBinding();\n\tconst result = binding.getEmbeddingPreset(name);\n\treturn result as unknown as EmbeddingPreset | null;\n}\n\n/**\n * Get the error code for the last FFI error.\n *\n * Returns the FFI error code as an integer. This is useful for programmatic error handling\n * and distinguishing between different types of failures in native code.\n *\n * Error codes:\n * - 0: Success (no error)\n * - 1: GenericError\n * - 2: Panic\n * - 3: InvalidArgument\n * - 4: IoError\n * - 5: ParsingError\n * - 6: OcrError\n * - 7: MissingDependency\n *\n * @returns The integer error code\n *\n * @example\n * ```typescript\n * import { extractFile, getLastErrorCode, ErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('Native code panic detected');\n * }\n * }\n * ```\n */\nexport function getLastErrorCode(): number {\n\tconst binding = getBinding();\n\treturn binding.getLastErrorCode();\n}\n\n/**\n * Get panic context information if the last error was a panic.\n *\n * Returns detailed information about a panic in native code, or null if the last error was not a panic.\n * This provides debugging information when native code panics.\n *\n * @returns A `PanicContext` object with file, line, function, message, and timestamp_secs, or null if no panic context is available\n *\n * @example\n * ```typescript\n * import { extractFile, getLastPanicContext } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const context = getLastPanicContext();\n * if (context) {\n * console.error(`Panic at ${context.file}:${context.line}`);\n * console.error(`In function: ${context.function}`);\n * console.error(`Message: ${context.message}`);\n * }\n * }\n * ```\n */\nexport function getLastPanicContext(): PanicContext | null {\n\tconst binding = getBinding();\n\tconst result = binding.getLastPanicContext();\n\treturn result as unknown as PanicContext | null;\n}\n\n/**\n * Returns the human-readable name for an error code.\n *\n * Maps numeric error codes to their string names, providing a consistent way\n * to get error code names across all platforms.\n *\n * @param code - The numeric error code (0-7)\n * @returns The error code name as a string (e.g., \"validation\", \"ocr\", \"unknown\")\n *\n * @example\n * ```typescript\n * import { getErrorCodeName } from '@kreuzberg/node';\n *\n * const name = getErrorCodeName(0); // returns \"validation\"\n * const name = getErrorCodeName(2); // returns \"ocr\"\n * const name = getErrorCodeName(99); // returns \"unknown\"\n * ```\n */\nexport function getErrorCodeName(code: number): string {\n\tconst binding = getBinding();\n\treturn binding.getErrorCodeName(code);\n}\n\n/**\n * Returns the description for an error code.\n *\n * Retrieves user-friendly descriptions of error types from the FFI layer.\n *\n * @param code - The numeric error code (0-7)\n * @returns A brief description of the error type\n *\n * @example\n * ```typescript\n * import { getErrorCodeDescription } from '@kreuzberg/node';\n *\n * const desc = getErrorCodeDescription(0); // returns \"Input validation error\"\n * const desc = getErrorCodeDescription(4); // returns \"File system I/O error\"\n * const desc = getErrorCodeDescription(99); // returns \"Unknown error code\"\n * ```\n */\nexport function getErrorCodeDescription(code: number): string {\n\tconst binding = getBinding();\n\treturn binding.getErrorCodeDescription(code);\n}\n\n/**\n * Classifies an error message string into an error code category.\n *\n * This function analyzes the error message content and returns the most likely\n * error code (0-7) based on keyword patterns. Used to programmatically classify\n * errors for handling purposes.\n *\n * The classification is based on keyword matching:\n * - **Validation (0)**: Keywords like \"invalid\", \"validation\", \"schema\", \"required\"\n * - **Parsing (1)**: Keywords like \"parsing\", \"corrupted\", \"malformed\"\n * - **Ocr (2)**: Keywords like \"ocr\", \"tesseract\", \"language\", \"model\"\n * - **MissingDependency (3)**: Keywords like \"not found\", \"missing\", \"dependency\"\n * - **Io (4)**: Keywords like \"file\", \"disk\", \"read\", \"write\", \"permission\"\n * - **Plugin (5)**: Keywords like \"plugin\", \"register\", \"extension\"\n * - **UnsupportedFormat (6)**: Keywords like \"unsupported\", \"format\", \"mime\"\n * - **Internal (7)**: Keywords like \"internal\", \"bug\", \"panic\"\n *\n * @param errorMessage - The error message string to classify\n * @returns An object with the classification details\n *\n * @example\n * ```typescript\n * import { classifyError } from '@kreuzberg/node';\n *\n * const result = classifyError(\"PDF file is corrupted\");\n * // Returns: { code: 1, name: \"parsing\", confidence: 0.95 }\n *\n * const result = classifyError(\"Tesseract not found\");\n * // Returns: { code: 3, name: \"missing_dependency\", confidence: 0.9 }\n * ```\n */\nexport function classifyError(errorMessage: string): ErrorClassification {\n\tconst binding = getBinding();\n\tconst result = binding.classifyError(errorMessage);\n\treturn result as unknown as ErrorClassification;\n}\n\nexport const __version__ = \"4.0.0-rc.16\";\n"],"mappings":"AA+CA,SAAS,qBAAqB;AA4F9B;AAAA,EACC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,OACM;AACP,SAAS,uBAAuB;AAChC,cAAc;AAEd,IAAI,UAAgC;AACpC,IAAI,qBAAqB;AAEzB,SAAS,yBAAyB,OAAuB;AACxD,QAAM,YAAsB,CAAC;AAC7B,MAAI,SAAS;AAEb,MAAI,iBAAiB,OAAO;AAC3B,aAAS,MAAM,WAAW,MAAM,SAAS;AACzC,QAAI,UAAU,KAAK,MAAM,GAAG;AAC3B,gBAAU;AAAA,QACT;AAAA,MACD;AAAA,IACD;AACA,WAAO,IAAI;AAAA,MACV;AAAA,QACC;AAAA,QACA,UAAU,SAAS,UAAU,KAAK,GAAG,IAAI;AAAA,QACzC;AAAA,QACA,qBAAqB,MAAM;AAAA,MAC5B,EACE,OAAO,OAAO,EACd,KAAK,GAAG;AAAA,MACV,EAAE,OAAO,MAAM;AAAA,IAChB;AAAA,EACD;AAEA,SAAO,IAAI;AAAA,IACV;AAAA,MACC;AAAA,MACA;AAAA,MACA,qBAAqB,OAAO,KAAK,CAAC;AAAA,IACnC,EAAE,KAAK,GAAG;AAAA,EACX;AACD;AAEA,SAAS,iBAAiB,OAAgB,MAA0B;AACnE,MAAI,EAAE,iBAAiB,aAAa;AACnC,UAAM,IAAI,UAAU,GAAG,IAAI,uBAAuB;AAAA,EACnD;AACA,SAAO;AACR;AAEA,SAAS,qBAAqB,QAAiB,MAA4B;AAC1E,MAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC3B,UAAM,IAAI,UAAU,GAAG,IAAI,iCAAiC;AAAA,EAC7D;AAEA,QAAM,QAAQ;AACd,SAAO,MAAM,IAAI,CAAC,OAAO,UAAU;AAClC,QAAI;AACH,aAAO,iBAAiB,OAAO,GAAG,IAAI,IAAI,KAAK,GAAG;AAAA,IACnD,QAAQ;AACP,YAAM,IAAI,UAAU,GAAG,IAAI,IAAI,KAAK,wBAAwB;AAAA,IAC7D;AAAA,EACD,CAAC;AACF;AAKO,SAAS,qBAAqB,MAAqB;AACzD,YAAU;AACV,uBAAqB;AACtB;AAKO,SAAS,yBAA+B;AAC9C,YAAU;AACV,uBAAqB;AACtB;AAEA,SAAS,oBAAmC;AAC3C,QAAM,eACL,OAAO,YAAY,cAAe,UAAwC,cAAc,YAAY,GAAG;AAExG,MAAI,CAAC,cAAc;AAClB,UAAM,IAAI,MAAM,kEAAkE;AAAA,EACnF;AAEA,QAAM,eAAe,aAAa,aAAa;AAG/C,MAAI,OAAO,iBAAiB,YAAY,iBAAiB,MAAM;AAC9D,UAAM,IAAI;AAAA,MACT;AAAA,IACD;AAAA,EACD;AAEA,QAAM,SAAS;AAGf,QAAM,kBAAkB;AAAA,IACvB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AAEA,aAAW,UAAU,iBAAiB;AACrC,QAAI,OAAO,OAAO,MAAM,MAAM,YAAY;AACzC,YAAM,IAAI;AAAA,QACT,8CAA8C,MAAM;AAAA,MAErD;AAAA,IACD;AAAA,EACD;AAEA,SAAO;AACR;AAEA,SAAS,aAA4B;AACpC,MAAI,oBAAoB;AACvB,QAAI,YAAY,MAAM;AACrB,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,WAAO;AAAA,EACR;AAEA,MAAI;AACH,QAAI,OAAO,YAAY,eAAe,QAAQ,YAAY,QAAQ,SAAS,MAAM;AAChF,gBAAU,kBAAkB;AAC5B,2BAAqB;AACrB,aAAO;AAAA,IACR;AAAA,EACD,SAAS,OAAO;AACf,yBAAqB;AACrB,UAAM,yBAAyB,KAAK;AAAA,EACrC;AAEA,QAAM,IAAI;AAAA,IACT;AAAA,EAED;AACD;AAEA,SAAS,cAAc,aAA8C;AACpE,MAAI;AACH,UAAM,SAAS,KAAK,MAAM,WAAW;AACrC,QAAI,OAAO,WAAW,YAAY,WAAW,MAAM;AAClD,aAAO;AAAA,IACR;AACA,WAAO,CAAC;AAAA,EACT,QAAQ;AACP,WAAO,CAAC;AAAA,EACT;AACD;AAEA,SAAS,iBAAiB,OAA4B;AACrD,MAAI,iBAAiB,YAAY;AAChC,WAAO;AAAA,EACR;AACA,MAAI,OAAO,WAAW,eAAe,iBAAiB,QAAQ;AAC7D,WAAO,IAAI,WAAW,KAAK;AAAA,EAC5B;AACA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,WAAO,IAAI,WAAW,KAAK;AAAA,EAC5B;AACA,SAAO,IAAI,WAAW;AACvB;AAEA,SAAS,aAAa,UAA0B;AAC/C,MAAI,CAAC,YAAY,OAAO,aAAa,UAAU;AAC9C,WAAO;AAAA,MACN,SAAS;AAAA,MACT,UAAU;AAAA,QACT,WAAW;AAAA,QACX,SAAS;AAAA,QACT,YAAY;AAAA,QACZ,YAAY;AAAA,QACZ,aAAa;AAAA,MACd;AAAA,MACA,WAAW;AAAA,IACZ;AAAA,EACD;AAEA,QAAM,QAAQ;AAEd,QAAM,WAAY,MAAM,UAAU,KAAiC,CAAC;AACpE,SAAO;AAAA;AAAA,IAEN,SAAU,MAAM,SAAS,KAAgB;AAAA;AAAA,IAEzC,WAAY,MAAM,WAAW,KAAyB;AAAA,IACtD,UAAU;AAAA;AAAA,MAET,WAAa,SAAS,YAAY,KAAK,SAAS,WAAW,KAAiB;AAAA;AAAA,MAE5E,SAAW,SAAS,UAAU,KAAK,SAAS,SAAS,KAAiB;AAAA;AAAA,MAEtE,YAAc,SAAS,aAAa,KAAK,SAAS,YAAY,KAAwB;AAAA;AAAA,MAEtF,YAAc,SAAS,aAAa,KAAK,SAAS,YAAY,KAAiB;AAAA;AAAA,MAE/E,aAAe,SAAS,cAAc,KAAK,SAAS,aAAa,KAAiB;AAAA;AAAA,MAElF,WAAa,SAAS,YAAY,KAAK,SAAS,WAAW,KAAwB;AAAA;AAAA,MAEnF,UAAY,SAAS,WAAW,KAAK,SAAS,UAAU,KAAwB;AAAA,IACjF;AAAA,EACD;AACD;AAEA,SAAS,aAAa,UAAmC;AACxD,MAAI,CAAC,YAAY,OAAO,aAAa,UAAU;AAC9C,WAAO;AAAA,MACN,MAAM,IAAI,WAAW;AAAA,MACrB,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,kBAAkB;AAAA,MAClB,QAAQ;AAAA,MACR,aAAa;AAAA,MACb,WAAW;AAAA,IACZ;AAAA,EACD;AAEA,QAAM,QAAQ;AACd,SAAO;AAAA;AAAA,IAEN,MAAM,iBAAiB,MAAM,MAAM,CAAC;AAAA;AAAA,IAEpC,QAAS,MAAM,QAAQ,KAAgB;AAAA;AAAA,IAEvC,YAAa,MAAM,YAAY,KAAgB;AAAA;AAAA,IAE/C,YAAa,MAAM,YAAY,KAAuB;AAAA;AAAA,IAEtD,OAAQ,MAAM,OAAO,KAAuB;AAAA;AAAA,IAE5C,QAAS,MAAM,QAAQ,KAAuB;AAAA;AAAA,IAE9C,YAAa,MAAM,YAAY,KAAuB;AAAA;AAAA,IAEtD,kBAAmB,MAAM,kBAAkB,KAAuB;AAAA;AAAA,IAElE,QAAS,MAAM,QAAQ,KAAiB;AAAA;AAAA,IAExC,aAAc,MAAM,aAAa,KAAuB;AAAA;AAAA,IAExD,WAAW,MAAM,WAAW,IAAI,cAAc,MAAM,WAAW,CAAC,IAAI;AAAA,EACrE;AACD;AAEA,SAAS,mBAAmB,SAA+B;AAC1D,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,WAAO;AAAA,MACN,YAAY;AAAA,MACZ,SAAS;AAAA,MACT,QAAQ,CAAC;AAAA,MACT,QAAQ,CAAC;AAAA,IACV;AAAA,EACD;AAEA,QAAM,OAAO;AACb,SAAO;AAAA;AAAA,IAEN,YAAa,KAAK,YAAY,KAAgB;AAAA;AAAA,IAE9C,SAAU,KAAK,SAAS,KAAgB;AAAA;AAAA,IAExC,QAAQ,MAAM,QAAQ,KAAK,QAAQ,CAAC,IAAK,KAAK,QAAQ,IAAgB,CAAC;AAAA;AAAA,IAEvE,QAAQ,MAAM,QAAQ,KAAK,QAAQ,CAAC,IAAK,KAAK,QAAQ,EAAgB,IAAI,CAAC,UAAU,aAAa,KAAK,CAAC,IAAI,CAAC;AAAA,EAC9G;AACD;AAEA,SAAS,cAAc,WAAsC;AAC5D,MAAI,CAAC,aAAa,OAAO,cAAc,UAAU;AAChD,WAAO;AAAA,MACN,SAAS;AAAA,MACT,UAAU;AAAA,MACV,UAAU,CAAC;AAAA,MACX,QAAQ,CAAC;AAAA,MACT,mBAAmB;AAAA,MACnB,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,OAAO;AAAA,IACR;AAAA,EACD;AAEA,QAAM,SAAS;AAEf,QAAM,WAAW,OAAO,UAAU;AAClC,QAAM,gBACL,OAAO,aAAa,WAAW,cAAc,QAAQ,IAAM,YAAwC,CAAC;AAErG,SAAO;AAAA;AAAA,IAEN,SAAU,OAAO,SAAS,KAAgB;AAAA;AAAA,IAE1C,UAAW,OAAO,UAAU,KAAgB;AAAA,IAC5C,UAAU;AAAA;AAAA,IAEV,QAAQ,MAAM,QAAQ,OAAO,QAAQ,CAAC,IAAK,OAAO,QAAQ,IAAgB,CAAC;AAAA;AAAA,IAE3E,mBAAmB,MAAM,QAAQ,OAAO,mBAAmB,CAAC,IAAK,OAAO,mBAAmB,IAAiB;AAAA,IAC5G,SAAS,MAAM;AAEd,YAAM,aAAa,OAAO,QAAQ;AAClC,aAAO,MAAM,QAAQ,UAAU,IAAK,WAAyB,IAAI,CAAC,UAAU,aAAa,KAAK,CAAC,IAAI;AAAA,IACpG,GAAG;AAAA,IACH,SAAS,MAAM;AAEd,YAAM,aAAa,OAAO,QAAQ;AAClC,aAAO,MAAM,QAAQ,UAAU,IAAK,WAAyB,IAAI,CAAC,UAAU,aAAa,KAAK,CAAC,IAAI;AAAA,IACpG,GAAG;AAAA,IACH,QAAQ,MAAM;AAEb,YAAM,YAAY,OAAO,OAAO;AAChC,aAAO,MAAM,QAAQ,SAAS,IAAK,UAAwB,IAAI,CAAC,SAAS,mBAAmB,IAAI,CAAC,IAAI;AAAA,IACtG,GAAG;AAAA,EACJ;AACD;AAIA,SAAS,aAAgB,QAAgC,KAAa,OAA4B;AACjG,MAAI,UAAU,QAAW;AACxB,WAAO,GAAG,IAAI;AAAA,EACf;AACD;AAEA,SAAS,yBAAyB,QAA0B;AAC3D,MAAI,CAAC,QAAQ;AACZ,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,OAAO,OAAO,GAAG;AAC1C,eAAa,YAAY,wBAAwB,OAAO,oBAAoB;AAC5E,eAAa,YAAY,yBAAyB,OAAO,qBAAqB;AAC9E,SAAO;AACR;AAEA,SAAS,mBAAmB,KAAqD;AAChF,MAAI,CAAC,KAAK;AACT,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC;AAAA,IAC1C,SAAS,IAAI;AAAA,EACd;AACA,eAAa,YAAY,YAAY,IAAI,QAAQ;AAEjD,QAAM,YAAY,yBAAyB,IAAI,eAAe;AAC9D,MAAI,WAAW;AACd,iBAAa,YAAY,mBAAmB,SAAS;AAAA,EACtD;AAEA,SAAO;AACR;AAEA,SAAS,wBAAwB,UAA+D;AAC/F,MAAI,CAAC,UAAU;AACd,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,YAAY,SAAS,QAAQ;AACtD,eAAa,YAAY,cAAc,SAAS,UAAU;AAC1D,eAAa,YAAY,UAAU,SAAS,MAAM;AAClD,eAAa,YAAY,aAAa,SAAS,SAAS;AACxD,eAAa,YAAY,WAAW,SAAS,OAAO;AACpD,SAAO;AACR;AAEA,SAAS,+BAA+B,QAAoE;AAC3G,MAAI,CAAC,QAAQ;AACZ,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,iBAAiB,OAAO,aAAa;AAC9D,eAAa,YAAY,aAAa,OAAO,SAAS;AACtD,eAAa,YAAY,qBAAqB,OAAO,iBAAiB;AACtE,eAAa,YAAY,iBAAiB,OAAO,aAAa;AAC9D,eAAa,YAAY,UAAU,OAAO,MAAM;AAChD,eAAa,YAAY,UAAU,OAAO,MAAM;AAChD,SAAO;AACR;AAEA,SAAS,mBAAmB,KAAqD;AAChF,MAAI,CAAC,KAAK;AACT,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,iBAAiB,IAAI,aAAa;AAC3D,eAAa,YAAY,aAAa,IAAI,SAAS;AACnD,eAAa,YAAY,mBAAmB,IAAI,eAAe;AAC/D,SAAO;AACR;AAEA,SAAS,8BAA8B,gBAA2E;AACjH,MAAI,CAAC,gBAAgB;AACpB,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,QAAQ,eAAe,IAAI;AACpD,eAAa,YAAY,0BAA0B,eAAe,sBAAsB;AACxF,SAAO;AACR;AAEA,SAAS,iCACR,mBACqC;AACrC,MAAI,CAAC,mBAAmB;AACvB,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,WAAW,kBAAkB,OAAO;AAC7D,eAAa,YAAY,iBAAiB,kBAAkB,aAAa;AACzE,eAAa,YAAY,kBAAkB,kBAAkB,cAAc;AAC3E,SAAO;AACR;AAEA,SAAS,6BAA6B,eAAyE;AAC9G,MAAI,CAAC,eAAe;AACnB,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,WAAW,cAAc,OAAO;AACzD,eAAa,YAAY,qBAAqB,cAAc,iBAAiB;AAC7E,eAAa,YAAY,sBAAsB,cAAc,kBAAkB;AAC/E,SAAO;AACR;AAEA,SAAS,2BAA2B,SAAwE;AAC3G,MAAI,CAAC,SAAS;AACb,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,WAAW,QAAQ,OAAO;AACnD,eAAa,YAAY,UAAU,QAAQ,MAAM;AACjD,eAAa,YAAY,oBAAoB,QAAQ,gBAAgB;AACrE,eAAa,YAAY,eAAe,QAAQ,WAAW;AAC3D,SAAO;AACR;AAEA,SAAS,qBAAqB,SAAqE;AAClG,MAAI,CAAC,SAAS;AACb,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAC7D,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,mBAAmB,QAAQ,eAAe;AACnE,eAAa,YAAY,WAAW,QAAQ,OAAO;AACnD,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,mBAAmB,QAAQ,eAAe;AACnE,eAAa,YAAY,qBAAqB,QAAQ,iBAAiB;AACvE,eAAa,YAAY,cAAc,QAAQ,UAAU;AACzD,eAAa,YAAY,eAAe,QAAQ,WAAW;AAC3D,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAC7D,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAC7D,eAAa,YAAY,cAAc,QAAQ,UAAU;AACzD,eAAa,YAAY,qBAAqB,QAAQ,iBAAiB;AACvE,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,mBAAmB,QAAQ,eAAe;AACnE,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,iBAAiB,QAAQ,aAAa;AAC/D,eAAa,YAAY,QAAQ,QAAQ,IAAI;AAC7C,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,mBAAmB,QAAQ,eAAe;AACnE,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAC7D,eAAa,YAAY,kBAAkB,QAAQ,cAAc;AACjE,eAAa,YAAY,sBAAsB,QAAQ,kBAAkB;AACzE,eAAa,YAAY,YAAY,QAAQ,QAAQ;AACrD,eAAa,YAAY,SAAS,QAAQ,KAAK;AAC/C,eAAa,YAAY,aAAa,QAAQ,SAAS;AACvD,eAAa,YAAY,gBAAgB,QAAQ,YAAY;AAE7D,QAAM,gBAAgB,2BAA2B,QAAQ,aAAa;AACtE,eAAa,YAAY,iBAAiB,aAAa;AAEvD,SAAO;AACR;AAEA,SAAS,uBAAuB,QAA4D;AAC3F,MAAI,CAAC,QAAQ;AACZ,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,aAAa,OAAO,SAAS;AACtD,eAAa,YAAY,eAAe,OAAO,WAAW;AAC1D,eAAa,YAAY,YAAY,OAAO,QAAQ;AACpD,eAAa,YAAY,cAAc,OAAO,UAAU;AACxD,eAAa,YAAY,YAAY,OAAO,QAAQ;AACpD,eAAa,YAAY,cAAc,OAAO,UAAU;AACxD,eAAa,YAAY,cAAc,OAAO,UAAU;AACxD,SAAO;AACR;AAEA,SAAS,oBAAoB,OAAwD;AACpF,MAAI,CAAC,OAAO;AACX,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,iBAAiB,MAAM,YAAY;AAC5D,eAAa,YAAY,uBAAuB,MAAM,iBAAiB;AACvE,eAAa,YAAY,iBAAiB,MAAM,YAAY;AAC5D,SAAO;AACR;AAEA,SAAS,0BAA0B,QAAoE;AACtG,MAAI,CAAC,QAAQ;AACZ,WAAO;AAAA,EACR;AAEA,QAAM,aAAqC,CAAC;AAC5C,eAAa,YAAY,YAAY,OAAO,QAAQ;AACpD,eAAa,YAAY,2BAA2B,OAAO,uBAAuB;AAClF,eAAa,YAAY,YAAY,OAAO,QAAQ;AACpD,eAAa,YAAY,4BAA4B,OAAO,wBAAwB;AAEpF,QAAM,MAAM,mBAAmB,OAAO,GAAG;AACzC,eAAa,YAAY,OAAO,GAAG;AAEnC,QAAM,WAAW,wBAAwB,OAAO,QAAQ;AACxD,eAAa,YAAY,YAAY,QAAQ;AAE7C,QAAM,SAAS,+BAA+B,OAAO,MAAM;AAC3D,eAAa,YAAY,UAAU,MAAM;AAEzC,QAAM,MAAM,mBAAmB,OAAO,UAAU;AAChD,eAAa,YAAY,cAAc,GAAG;AAE1C,QAAM,iBAAiB,8BAA8B,OAAO,cAAc;AAC1E,eAAa,YAAY,kBAAkB,cAAc;AAEzD,QAAM,oBAAoB,iCAAiC,OAAO,iBAAiB;AACnF,eAAa,YAAY,qBAAqB,iBAAiB;AAE/D,QAAM,gBAAgB,6BAA6B,OAAO,aAAa;AACvE,eAAa,YAAY,iBAAiB,aAAa;AAEvD,QAAM,WAAW,uBAAuB,OAAO,QAAQ;AACvD,eAAa,YAAY,YAAY,QAAQ;AAE7C,QAAM,QAAQ,oBAAoB,OAAO,KAAK;AAC9C,eAAa,YAAY,SAAS,KAAK;AAEvC,QAAM,cAAc,qBAAqB,OAAO,WAAW;AAC3D,eAAa,YAAY,eAAe,WAAW;AAEnD,SAAO;AACR;AAwCO,SAAS,gBACf,UACA,WAA0B,MAC1B,SAAsC,MACnB;AACnB,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,YAAY,WAAW,EAAE,gBAAgB,UAAU,UAAU,gBAAgB;AACnF,SAAO,cAAc,SAAS;AAC/B;AAqCA,eAAsB,YACrB,UACA,WAA0B,MAC1B,SAAsC,MACV;AAC5B,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,YAAY,MAAM,WAAW,EAAE,YAAY,UAAU,UAAU,gBAAgB;AACrF,SAAO,cAAc,SAAS;AAC/B;AA6BO,SAAS,iBACf,MACA,UACA,SAAsC,MACnB;AACnB,QAAM,YAAY,iBAAiB,MAAM,MAAM;AAC/C,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,YAAY,WAAW,EAAE,iBAAiB,OAAO,KAAK,SAAS,GAAG,UAAU,gBAAgB;AAClG,SAAO,cAAc,SAAS;AAC/B;AA6BA,eAAsB,aACrB,MACA,UACA,SAAsC,MACV;AAC5B,QAAM,YAAY,iBAAiB,MAAM,MAAM;AAE/C,MAAI,QAAQ,IAAI,uBAAuB,MAAM,KAAK;AACjD,YAAQ,IAAI,oCAAoC,MAAM,KAAK,UAAU,MAAM,GAAG,CAAC,CAAC,CAAC;AAAA,EAClF;AACA,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,YAAY,MAAM,WAAW,EAAE,aAAa,OAAO,KAAK,SAAS,GAAG,UAAU,gBAAgB;AACpG,SAAO,cAAc,SAAS;AAC/B;AAkCO,SAAS,sBAAsB,OAAiB,SAAsC,MAA0B;AACtH,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,aAAa,WAAW,EAAE,sBAAsB,OAAO,gBAAgB;AAC7E,SAAO,WAAW,IAAI,aAAa;AACpC;AAqCA,eAAsB,kBACrB,OACA,SAAsC,MACR;AAC9B,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,aAAa,MAAM,WAAW,EAAE,kBAAkB,OAAO,gBAAgB;AAC/E,SAAO,WAAW,IAAI,aAAa;AACpC;AAuCO,SAAS,sBACf,UACA,WACA,SAAsC,MACjB;AACrB,QAAM,UAAU,qBAAqB,UAAU,UAAU,EAAE,IAAI,CAAC,SAAS,OAAO,KAAK,IAAI,CAAC;AAE1F,MAAI,QAAQ,WAAW,UAAU,QAAQ;AACxC,UAAM,IAAI,UAAU,kDAAkD;AAAA,EACvE;AAEA,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,aAAa,WAAW,EAAE,sBAAsB,SAAS,WAAW,gBAAgB;AAC1F,SAAO,WAAW,IAAI,aAAa;AACpC;AA2CA,eAAsB,kBACrB,UACA,WACA,SAAsC,MACR;AAC9B,QAAM,UAAU,qBAAqB,UAAU,UAAU,EAAE,IAAI,CAAC,SAAS,OAAO,KAAK,IAAI,CAAC;AAE1F,MAAI,QAAQ,WAAW,UAAU,QAAQ;AACxC,UAAM,IAAI,UAAU,kDAAkD;AAAA,EACvE;AAEA,QAAM,mBAAmB,0BAA0B,MAAM;AACzD,QAAM,aAAa,MAAM,WAAW,EAAE,kBAAkB,SAAS,WAAW,gBAAgB;AAC5F,SAAO,WAAW,IAAI,aAAa;AACpC;AA4CO,SAAS,sBAAsB,WAAwC;AAC7E,QAAMA,WAAU,WAAW;AAE3B,QAAM,mBAAmB;AAAA,IACxB,MAAM,UAAU,KAAK,KAAK,SAAS;AAAA,IACnC,iBAAiB,UAAU,iBAAiB,KAAK,SAAS;AAAA,IAC1D,MAAM,WAAW,MAAkC;AAClD,YAAM,eAAe,KAAK,CAAC;AAC3B,YAAM,aAAa,aAAa,CAAC;AAEjC,YAAM,aAAa,KAAK,MAAM,UAAU;AAUxC,YAAM,SAA2B;AAAA,QAChC,SAAS,WAAW;AAAA,QACpB,UAAU,WAAW;AAAA,QACrB,UAAU,OAAO,WAAW,aAAa,WAAW,KAAK,MAAM,WAAW,QAAQ,IAAI,WAAW;AAAA,QACjG,QAAS,WAAW,UAAU,CAAC;AAAA,QAC/B,mBAAmB,WAAW,sBAAsB;AAAA,QACpD,QAAS,WAAW,UAAyC;AAAA,QAC7D,QAAS,WAAW,UAAkD;AAAA,MACvE;AAEA,YAAM,UAAU,MAAM,UAAU,QAAQ,MAAM;AAE9C,YAAM,cAAc;AAAA,QACnB,SAAS,QAAQ;AAAA,QACjB,WAAW,QAAQ;AAAA,QACnB,UAAU,QAAQ;AAAA,QAClB,QAAQ,QAAQ;AAAA,QAChB,oBAAoB,QAAQ;AAAA,QAC5B,QAAQ,QAAQ;AAAA,QAChB,QAAQ,QAAQ;AAAA,MACjB;AAEA,aAAO,KAAK,UAAU,WAAW;AAAA,IAClC;AAAA,EACD;AAEA,SAAO,eAAe,kBAAkB,cAAc;AAAA,IACrD,OAAO;AAAA,IACP,YAAY;AAAA,EACb,CAAC;AAED,QAAM,QAAQ,UAAU,kBAAkB,KAAK;AAC/C,SAAO,eAAe,kBAAkB,WAAW;AAAA,IAClD,OAAO;AAAA,IACP,YAAY;AAAA,EACb,CAAC;AAED,EAAAA,SAAQ,sBAAsB,gBAAgB;AAC/C;AAiBO,SAAS,wBAAwB,MAAoB;AAC3D,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,wBAAwB,IAAI;AACrC;AAeO,SAAS,sBAA4B;AAC3C,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,oBAAoB;AAC7B;AAiBO,SAAS,qBAA+B;AAC9C,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,mBAAmB;AACnC;AAqCO,SAAS,kBAAkB,WAAoC;AACrE,QAAMA,WAAU,WAAW;AAE3B,QAAM,mBAAmB;AAAA,IACxB,MAAM,UAAU,KAAK,KAAK,SAAS;AAAA,IACnC,UAAU,UAAU,UAAU,KAAK,SAAS;AAAA,IAC5C,MAAM,YAAY,MAAkC;AACnD,YAAM,aAAa,KAAK,CAAC;AAEzB,UAAI,CAAC,cAAc,eAAe,aAAa;AAC9C,cAAM,IAAI,MAAM,wCAAwC;AAAA,MACzD;AAEA,YAAM,aAAa,KAAK,MAAM,UAAU;AACxC,YAAM,SAA2B;AAAA,QAChC,SAAS,WAAW;AAAA,QACpB,UAAU,WAAW;AAAA,QACrB,UAAU,OAAO,WAAW,aAAa,WAAW,KAAK,MAAM,WAAW,QAAQ,IAAI,WAAW;AAAA,QACjG,QAAQ,WAAW,UAAU,CAAC;AAAA,QAC9B,mBAAmB,WAAW;AAAA,QAC9B,QAAQ,WAAW;AAAA,QACnB,QAAQ,WAAW,UAAU;AAAA,MAC9B;AAEA,YAAM,QAAQ,QAAQ,UAAU,SAAS,MAAM,CAAC;AAChD,aAAO;AAAA,IACR;AAAA,EACD;AAEA,EAAAA,SAAQ,kBAAkB,gBAAgB;AAC3C;AAiBO,SAAS,oBAAoB,MAAoB;AACvD,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,oBAAoB,IAAI;AACjC;AAeO,SAAS,kBAAwB;AACvC,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,gBAAgB;AACzB;AAiBO,SAAS,iBAA2B;AAC1C,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,eAAe;AAC/B;AA6EA,SAAS,kBAAkB,OAA0C;AACpE,SACC,MAAM,QAAQ,KAAK,KACnB,MAAM,WAAW,KACjB,OAAO,MAAM,CAAC,MAAM,aACnB,OAAO,MAAM,CAAC,MAAM,YAAY,OAAO,SAAS,MAAM,CAAC,CAAC,KAAK,MAAM,CAAC,aAAa;AAEpF;AAEA,SAAS,wBAAwB,OAAgD;AAChF,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,WAAW,KAAK,kBAAkB,MAAM,CAAC,CAAC;AAChF;AAEA,SAAS,gBAAgB,OAA0B;AAClD,MAAI,OAAO,UAAU,UAAU;AAC9B,WAAO,EAAE,MAAM,UAAU,QAAQ,MAAM,OAAO;AAAA,EAC/C;AAEA,SAAO,EAAE,MAAM,MAAM,aAAa,QAAQ,UAAU,QAAQ,MAAM,OAAO;AAC1E;AAEO,SAAS,mBAAmB,SAAmC;AACrE,QAAMA,WAAU,WAAW;AAE3B,QAAM,iBAAiB;AAAA,IACtB,MAAM,QAAQ,KAAK,KAAK,OAAO;AAAA,IAC/B,oBAAoB,QAAQ,mBAAmB,KAAK,OAAO;AAAA,IAC3D,MAAM,gBACF,aACe;AAClB,YAAM,CAAC,cAAc,aAAa,IAAI;AAEtC,UAAI,QAAQ,IAAI,uBAAuB,MAAM,KAAK;AACjD,gBAAQ,IAAI,qCAAqC,EAAE,QAAQ,YAAY,OAAO,CAAC;AAC/E,gBAAQ,IAAI,iCAAiC;AAAA,UAC5C,kBAAkB,MAAM,QAAQ,YAAY,IAAI,UAAU,OAAO;AAAA,UACjE,mBAAmB,OAAO;AAAA,UAC1B,UAAU,MAAM,QAAQ,YAAY,IAAI,EAAE,aAAa,aAAa,OAAO,IAAI,gBAAgB,YAAY;AAAA,QAC5G,CAAC;AAAA,MACF;AAEA,UAAI;AACJ,UAAI,WAAW;AAEf,UAAI,wBAAwB,YAAY,GAAG;AAC1C,SAAC,UAAU,QAAQ,IAAI,aAAa,CAAC;AAAA,MACtC,WAAW,kBAAkB,YAAY,GAAG;AAC3C,SAAC,UAAU,QAAQ,IAAI;AAAA,MACxB,OAAO;AACN,mBAAW;AAAA,MACZ;AAEA,UAAI,OAAO,aAAa,UAAU;AACjC,cAAM,IAAI,MAAM,kDAAkD;AAAA,MACnE;AAGA,UAAI,QAAQ,IAAI,uBAAuB,MAAM,KAAK;AACjD,cAAM,SAAS,OAAO,aAAa,WAAW,SAAS,SAAS,SAAS;AACzE,gBAAQ;AAAA,UACP;AAAA,UACA,MAAM,QAAQ,YAAY,IAAI,UAAU,OAAO;AAAA,UAC/C;AAAA,UACA,gBAAgB,QAAQ,EAAE;AAAA,UAC1B;AAAA,UACA;AAAA,QACD;AAAA,MACD;AAEA,YAAM,SAAS,OAAO,aAAa,WAAW,OAAO,KAAK,UAAU,QAAQ,IAAI,OAAO,KAAK,QAAQ;AACpG,YAAM,SAAS,MAAM,QAAQ,aAAa,IAAI,WAAW,MAAM,GAAG,QAAQ;AAE1E,aAAO,KAAK,UAAU,MAAM;AAAA,IAC7B;AAAA,EACD;AAEA,EAAAA,SAAQ,mBAAmB,cAAc;AAC1C;AAkBO,SAAS,kBAA4B;AAC3C,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,gBAAgB;AAChC;AAkBO,SAAS,qBAAqB,MAAoB;AACxD,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,qBAAqB,IAAI;AAClC;AAgBO,SAAS,mBAAyB;AACxC,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,iBAAiB;AAC1B;AAkBO,SAAS,yBAAmC;AAClD,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,uBAAuB;AACvC;AAkBO,SAAS,4BAA4B,MAAoB;AAC/D,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,4BAA4B,IAAI;AACzC;AAgBO,SAAS,0BAAgC;AAC/C,QAAMA,WAAU,WAAW;AAC3B,EAAAA,SAAQ,wBAAwB;AACjC;AAmBO,MAAM,mBAAmB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA+B/B,SAAS,UAAwC;AAChD,UAAMA,WAAU,WAAW;AAC3B,WAAOA,SAAQ,6BAA6B,QAAQ;AAAA,EACrD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAwBA,WAAwC;AACvC,UAAMA,WAAU,WAAW;AAC3B,WAAOA,SAAQ,yBAAyB;AAAA,EACzC;AACD;AA2BO,SAAS,eAAe,OAAuB;AACrD,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,wBAAwB,KAAK;AAC7C;AA0BO,SAAS,uBAAuB,MAAsB;AAC5D,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,uBAAuB,IAAI;AAC3C;AAiCO,SAAS,iBAAiB,UAA0B;AAC1D,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,iBAAiB,QAAQ;AACzC;AA0BO,SAAS,qBAAqB,UAA4B;AAChE,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,qBAAqB,QAAQ;AAC7C;AAqCO,SAAS,uBAAiC;AAChD,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,qBAAqB;AACrC;AAqBO,SAAS,mBAAmB,MAAsC;AACxE,QAAMA,WAAU,WAAW;AAC3B,QAAM,SAASA,SAAQ,mBAAmB,IAAI;AAC9C,SAAO;AACR;AAkCO,SAAS,mBAA2B;AAC1C,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,iBAAiB;AACjC;AA0BO,SAAS,sBAA2C;AAC1D,QAAMA,WAAU,WAAW;AAC3B,QAAM,SAASA,SAAQ,oBAAoB;AAC3C,SAAO;AACR;AAoBO,SAAS,iBAAiB,MAAsB;AACtD,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,iBAAiB,IAAI;AACrC;AAmBO,SAAS,wBAAwB,MAAsB;AAC7D,QAAMA,WAAU,WAAW;AAC3B,SAAOA,SAAQ,wBAAwB,IAAI;AAC5C;AAiCO,SAAS,cAAc,cAA2C;AACxE,QAAMA,WAAU,WAAW;AAC3B,QAAM,SAASA,SAAQ,cAAc,YAAY;AACjD,SAAO;AACR;AAEO,MAAM,cAAc;","names":["binding"]}
|
package/dist/types.d.mts
CHANGED
|
@@ -936,5 +936,41 @@ interface OcrBackendProtocol {
|
|
|
936
936
|
*/
|
|
937
937
|
shutdown?(): void | Promise<void>;
|
|
938
938
|
}
|
|
939
|
+
/**
|
|
940
|
+
* Result of error message classification into error codes.
|
|
941
|
+
*
|
|
942
|
+
* Provides classification details including the error code, name,
|
|
943
|
+
* description, and confidence score for the classification.
|
|
944
|
+
*
|
|
945
|
+
* @example
|
|
946
|
+
* ```typescript
|
|
947
|
+
* import { classifyError, ErrorCode } from '@kreuzberg/node';
|
|
948
|
+
*
|
|
949
|
+
* const result = classifyError("File not found in read operation");
|
|
950
|
+
* if (result.code === ErrorCode.IoError) {
|
|
951
|
+
* console.error(`I/O Error: ${result.description}`);
|
|
952
|
+
* console.log(`Confidence: ${result.confidence}`);
|
|
953
|
+
* }
|
|
954
|
+
* ```
|
|
955
|
+
*/
|
|
956
|
+
interface ErrorClassification {
|
|
957
|
+
/**
|
|
958
|
+
* The numeric error code (0-7) representing the error type.
|
|
959
|
+
*/
|
|
960
|
+
code: number;
|
|
961
|
+
/**
|
|
962
|
+
* The human-readable name of the error code (e.g., "validation", "ocr").
|
|
963
|
+
*/
|
|
964
|
+
name: string;
|
|
965
|
+
/**
|
|
966
|
+
* A brief description of the error type.
|
|
967
|
+
*/
|
|
968
|
+
description: string;
|
|
969
|
+
/**
|
|
970
|
+
* Confidence score (0.0-1.0) indicating how certain the classification is.
|
|
971
|
+
* Higher values indicate higher confidence in the classification.
|
|
972
|
+
*/
|
|
973
|
+
confidence: number;
|
|
974
|
+
}
|
|
939
975
|
|
|
940
|
-
export type { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, ExtractionConfig, ExtractionResult, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, Metadata, OcrBackendProtocol, OcrConfig, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PostProcessorProtocol, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, ValidatorProtocol, XmlMetadata, YakeParams };
|
|
976
|
+
export type { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorClassification, ErrorMetadata, ExcelMetadata, ExtractedImage, ExtractionConfig, ExtractionResult, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, Metadata, OcrBackendProtocol, OcrConfig, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PostProcessorProtocol, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, ValidatorProtocol, XmlMetadata, YakeParams };
|
package/dist/types.d.ts
CHANGED
|
@@ -936,5 +936,41 @@ interface OcrBackendProtocol {
|
|
|
936
936
|
*/
|
|
937
937
|
shutdown?(): void | Promise<void>;
|
|
938
938
|
}
|
|
939
|
+
/**
|
|
940
|
+
* Result of error message classification into error codes.
|
|
941
|
+
*
|
|
942
|
+
* Provides classification details including the error code, name,
|
|
943
|
+
* description, and confidence score for the classification.
|
|
944
|
+
*
|
|
945
|
+
* @example
|
|
946
|
+
* ```typescript
|
|
947
|
+
* import { classifyError, ErrorCode } from '@kreuzberg/node';
|
|
948
|
+
*
|
|
949
|
+
* const result = classifyError("File not found in read operation");
|
|
950
|
+
* if (result.code === ErrorCode.IoError) {
|
|
951
|
+
* console.error(`I/O Error: ${result.description}`);
|
|
952
|
+
* console.log(`Confidence: ${result.confidence}`);
|
|
953
|
+
* }
|
|
954
|
+
* ```
|
|
955
|
+
*/
|
|
956
|
+
interface ErrorClassification {
|
|
957
|
+
/**
|
|
958
|
+
* The numeric error code (0-7) representing the error type.
|
|
959
|
+
*/
|
|
960
|
+
code: number;
|
|
961
|
+
/**
|
|
962
|
+
* The human-readable name of the error code (e.g., "validation", "ocr").
|
|
963
|
+
*/
|
|
964
|
+
name: string;
|
|
965
|
+
/**
|
|
966
|
+
* A brief description of the error type.
|
|
967
|
+
*/
|
|
968
|
+
description: string;
|
|
969
|
+
/**
|
|
970
|
+
* Confidence score (0.0-1.0) indicating how certain the classification is.
|
|
971
|
+
* Higher values indicate higher confidence in the classification.
|
|
972
|
+
*/
|
|
973
|
+
confidence: number;
|
|
974
|
+
}
|
|
939
975
|
|
|
940
|
-
export type { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, ExtractionConfig, ExtractionResult, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, Metadata, OcrBackendProtocol, OcrConfig, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PostProcessorProtocol, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, ValidatorProtocol, XmlMetadata, YakeParams };
|
|
976
|
+
export type { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorClassification, ErrorMetadata, ExcelMetadata, ExtractedImage, ExtractionConfig, ExtractionResult, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, Metadata, OcrBackendProtocol, OcrConfig, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PostProcessorProtocol, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, ValidatorProtocol, XmlMetadata, YakeParams };
|