@kreuzberg/node 4.2.15 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.15" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.3.0" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -55,7 +55,7 @@
55
55
  </div>
56
56
 
57
57
 
58
- Extract text, tables, images, and metadata from 62+ file formats including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
58
+ Extract text, tables, images, and metadata from 75+ file formats including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
59
59
 
60
60
 
61
61
  ## Installation
@@ -95,15 +95,13 @@ yarn add @kreuzberg/node
95
95
  ### System Requirements
96
96
 
97
97
  - **Node.js 22+** required (NAPI-RS native bindings)
98
- - Optional: [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) version 1.22.x for embeddings support
98
+ - Optional: [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) version 1.24+ for embeddings support
99
99
  - Optional: [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) for OCR functionality
100
100
 
101
- - Optional: [LibreOffice](https://www.libreoffice.org/download/download/) for legacy Office formats (DOC, XLS, PPT, RTF, ODT, ODS, ODP)
102
-
103
101
  **Format Support Notes:**
104
- - Modern Office formats (DOCX, XLSX, PPTX) work without LibreOffice
105
- - Legacy formats (DOC, XLS, PPT) require LibreOffice installation
106
- - WASM binding supports DOCX, XLSX, PPTX, and ODT (no LibreOffice required)
102
+ - Legacy formats (DOC, XLS, PPT) are now extracted natively without external tools
103
+ - Modern Office formats (DOCX, XLSX, PPTX) are fully supported
104
+ - WASM binding supports all document formats via in-memory parsing
107
105
 
108
106
 
109
107
 
@@ -322,9 +320,9 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
322
320
 
323
321
  ## Features
324
322
 
325
- ### Supported File Formats (62+)
323
+ ### Supported File Formats (75+)
326
324
 
327
- 62+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
325
+ 75+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
328
326
 
329
327
  #### Office Documents
330
328
 
@@ -409,7 +407,7 @@ Kreuzberg supports multiple OCR backends for extracting text from scanned docume
409
407
 
410
408
  - **Tesseract**
411
409
 
412
- - **Guten**
410
+ - **Paddleocr**
413
411
 
414
412
 
415
413
  ### OCR Configuration Example
package/dist/errors.d.mts CHANGED
@@ -332,7 +332,6 @@ declare class PluginError extends KreuzbergError {
332
332
  * Error thrown when a required system dependency is missing.
333
333
  *
334
334
  * Missing dependency errors occur when external tools or libraries are not available, such as:
335
- * - LibreOffice (for DOC/PPT/XLS files)
336
335
  * - Tesseract OCR (for OCR processing)
337
336
  * - ImageMagick (for image processing)
338
337
  * - Poppler (for PDF rendering)
@@ -342,11 +341,11 @@ declare class PluginError extends KreuzbergError {
342
341
  * import { extractFile, MissingDependencyError } from '@kreuzberg/node';
343
342
  *
344
343
  * try {
345
- * const result = await extractFile('document.doc');
344
+ * const result = await extractFile('document.pdf');
346
345
  * } catch (error) {
347
346
  * if (error instanceof MissingDependencyError) {
348
347
  * console.error('Missing dependency:', error.message);
349
- * console.log('Please install LibreOffice to process DOC files');
348
+ * console.log('Please install Tesseract OCR for image processing');
350
349
  * }
351
350
  * }
352
351
  * ```
package/dist/errors.d.ts CHANGED
@@ -332,7 +332,6 @@ declare class PluginError extends KreuzbergError {
332
332
  * Error thrown when a required system dependency is missing.
333
333
  *
334
334
  * Missing dependency errors occur when external tools or libraries are not available, such as:
335
- * - LibreOffice (for DOC/PPT/XLS files)
336
335
  * - Tesseract OCR (for OCR processing)
337
336
  * - ImageMagick (for image processing)
338
337
  * - Poppler (for PDF rendering)
@@ -342,11 +341,11 @@ declare class PluginError extends KreuzbergError {
342
341
  * import { extractFile, MissingDependencyError } from '@kreuzberg/node';
343
342
  *
344
343
  * try {
345
- * const result = await extractFile('document.doc');
344
+ * const result = await extractFile('document.pdf');
346
345
  * } catch (error) {
347
346
  * if (error instanceof MissingDependencyError) {
348
347
  * console.error('Missing dependency:', error.message);
349
- * console.log('Please install LibreOffice to process DOC files');
348
+ * console.log('Please install Tesseract OCR for image processing');
350
349
  * }
351
350
  * }
352
351
  * ```
@@ -1 +1 @@
1
- {"version":3,"sources":["../typescript/errors.ts"],"sourcesContent":["/**\n * Error types for Kreuzberg document intelligence framework.\n *\n * These error classes mirror the Rust core error types and provide\n * type-safe error handling for TypeScript consumers.\n *\n * ## Error Hierarchy\n *\n * ```\n * Error (JavaScript built-in)\n * └── KreuzbergError (base class)\n * ├── ValidationError\n * ├── ParsingError\n * ├── OcrError\n * ├── CacheError\n * ├── ImageProcessingError\n * ├── PluginError\n * ├── MissingDependencyError\n * └── ... (other error types)\n * ```\n *\n * @module errors\n */\n\n/**\n * FFI error codes matching kreuzberg-ffi C library error types.\n *\n * @example\n * ```typescript\n * import { ErrorCode, getLastErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('A panic occurred in the native library');\n * }\n * }\n * ```\n */\nexport enum ErrorCode {\n\t/**\n\t * No error (success)\n\t */\n\tSuccess = 0,\n\t/**\n\t * Generic error\n\t */\n\tGenericError = 1,\n\t/**\n\t * Panic occurred in native code\n\t */\n\tPanic = 2,\n\t/**\n\t * Invalid argument provided\n\t */\n\tInvalidArgument = 3,\n\t/**\n\t * I/O error (file system, network, etc.)\n\t */\n\tIoError = 4,\n\t/**\n\t * Error parsing document content\n\t */\n\tParsingError = 5,\n\t/**\n\t * Error in OCR processing\n\t */\n\tOcrError = 6,\n\t/**\n\t * Required system dependency is missing\n\t */\n\tMissingDependency = 7,\n}\n\n/**\n * Context information for panics in native code.\n *\n * Contains file location, line number, function name, panic message,\n * and timestamp for debugging native library issues.\n *\n * @example\n * ```typescript\n * import { KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError && error.panicContext) {\n * console.error('Panic occurred:');\n * console.error(`File: ${error.panicContext.file}`);\n * console.error(`Line: ${error.panicContext.line}`);\n * console.error(`Function: ${error.panicContext.function}`);\n * console.error(`Message: ${error.panicContext.message}`);\n * }\n * }\n * ```\n */\nexport interface PanicContext {\n\t/**\n\t * Source file where panic occurred\n\t */\n\tfile: string;\n\t/**\n\t * Line number in source file\n\t */\n\tline: number;\n\t/**\n\t * Function name where panic occurred\n\t */\n\tfunction: string;\n\t/**\n\t * Panic message\n\t */\n\tmessage: string;\n\t/**\n\t * Unix timestamp (seconds since epoch)\n\t */\n\ttimestamp_secs: number;\n}\n\n/**\n * Base error class for all Kreuzberg errors.\n *\n * All error types thrown by Kreuzberg extend this class, allowing\n * consumers to catch all Kreuzberg-specific errors with a single catch block.\n *\n * @example\n * ```typescript\n * import { extractFile, KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError) {\n * console.error('Kreuzberg error:', error.message);\n * if (error.panicContext) {\n * console.error('Panic at:', error.panicContext.file + ':' + error.panicContext.line);\n * }\n * } else {\n * throw error; // Re-throw non-Kreuzberg errors\n * }\n * }\n * ```\n */\nexport class KreuzbergError extends Error {\n\t/**\n\t * Panic context if error was caused by a panic in native code.\n\t * Will be null for non-panic errors.\n\t */\n\tpublic readonly panicContext: PanicContext | null;\n\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message);\n\t\tthis.name = \"KreuzbergError\";\n\t\tthis.panicContext = panicContext ?? null;\n\t\tObject.setPrototypeOf(this, KreuzbergError.prototype);\n\t}\n\n\ttoJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when document validation fails.\n *\n * Validation errors occur when a document doesn't meet specified criteria,\n * such as minimum content length, required metadata fields, or quality thresholds.\n *\n * @example\n * ```typescript\n * import { extractFile, ValidationError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof ValidationError) {\n * console.error('Document validation failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ValidationError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ValidationError\";\n\t\tObject.setPrototypeOf(this, ValidationError.prototype);\n\t}\n}\n\n/**\n * Error thrown when document parsing fails.\n *\n * Parsing errors occur when a document is corrupted, malformed, or cannot\n * be processed by the extraction engine. This includes issues like:\n * - Corrupted PDF files\n * - Invalid XML/JSON syntax\n * - Unsupported file format versions\n * - Encrypted documents without valid passwords\n *\n * @example\n * ```typescript\n * import { extractFile, ParsingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('corrupted.pdf');\n * } catch (error) {\n * if (error instanceof ParsingError) {\n * console.error('Failed to parse document:', error.message);\n * }\n * }\n * ```\n */\nexport class ParsingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ParsingError\";\n\t\tObject.setPrototypeOf(this, ParsingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when OCR processing fails.\n *\n * OCR errors occur during optical character recognition, such as:\n * - OCR backend initialization failures\n * - Image preprocessing errors\n * - Language model loading issues\n * - OCR engine crashes\n *\n * @example\n * ```typescript\n * import { extractFile, OcrError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n * } catch (error) {\n * if (error instanceof OcrError) {\n * console.error('OCR processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class OcrError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"OcrError\";\n\t\tObject.setPrototypeOf(this, OcrError.prototype);\n\t}\n}\n\n/**\n * Error thrown when cache operations fail.\n *\n * Cache errors are typically non-fatal and occur during caching operations, such as:\n * - Cache directory creation failures\n * - Disk write errors\n * - Cache entry corruption\n * - Insufficient disk space\n *\n * These errors are usually logged but don't prevent extraction from completing.\n *\n * @example\n * ```typescript\n * import { extractFile, CacheError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * useCache: true\n * });\n * } catch (error) {\n * if (error instanceof CacheError) {\n * console.warn('Cache operation failed, continuing without cache:', error.message);\n * }\n * }\n * ```\n */\nexport class CacheError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"CacheError\";\n\t\tObject.setPrototypeOf(this, CacheError.prototype);\n\t}\n}\n\n/**\n * Error thrown when image processing operations fail.\n *\n * Image processing errors occur during image manipulation, such as:\n * - Image decoding failures\n * - Unsupported image formats\n * - Image resizing/scaling errors\n * - DPI adjustment failures\n * - Color space conversion issues\n *\n * @example\n * ```typescript\n * import { extractFile, ImageProcessingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * images: {\n * extractImages: true,\n * targetDpi: 300\n * }\n * });\n * } catch (error) {\n * if (error instanceof ImageProcessingError) {\n * console.error('Image processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ImageProcessingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ImageProcessingError\";\n\t\tObject.setPrototypeOf(this, ImageProcessingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when a plugin operation fails.\n *\n * Plugin errors occur in custom plugins (postprocessors, validators, OCR backends), such as:\n * - Plugin initialization failures\n * - Plugin processing errors\n * - Plugin crashes or timeouts\n * - Invalid plugin configuration\n *\n * The error message includes the plugin name to help identify which plugin failed.\n *\n * @example\n * ```typescript\n * import { extractFile, PluginError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof PluginError) {\n * console.error(`Plugin '${error.pluginName}' failed:`, error.message);\n * }\n * }\n * ```\n */\nexport class PluginError extends KreuzbergError {\n\t/**\n\t * Name of the plugin that threw the error.\n\t */\n\tpublic readonly pluginName: string;\n\n\tconstructor(message: string, pluginName: string, panicContext?: PanicContext | null) {\n\t\tsuper(`Plugin error in '${pluginName}': ${message}`, panicContext);\n\t\tthis.name = \"PluginError\";\n\t\tthis.pluginName = pluginName;\n\t\tObject.setPrototypeOf(this, PluginError.prototype);\n\t}\n\n\toverride toJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpluginName: this.pluginName,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when a required system dependency is missing.\n *\n * Missing dependency errors occur when external tools or libraries are not available, such as:\n * - LibreOffice (for DOC/PPT/XLS files)\n * - Tesseract OCR (for OCR processing)\n * - ImageMagick (for image processing)\n * - Poppler (for PDF rendering)\n *\n * @example\n * ```typescript\n * import { extractFile, MissingDependencyError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.doc');\n * } catch (error) {\n * if (error instanceof MissingDependencyError) {\n * console.error('Missing dependency:', error.message);\n * console.log('Please install LibreOffice to process DOC files');\n * }\n * }\n * ```\n */\nexport class MissingDependencyError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"MissingDependencyError\";\n\t\tObject.setPrototypeOf(this, MissingDependencyError.prototype);\n\t}\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAyCO,IAAK,YAAL,kBAAKA,eAAL;AAIN,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,WAAQ,KAAR;AAIA,EAAAA,sBAAA,qBAAkB,KAAlB;AAIA,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,cAAW,KAAX;AAIA,EAAAA,sBAAA,uBAAoB,KAApB;AAhCW,SAAAA;AAAA,GAAA;AAyGL,IAAM,iBAAN,MAAM,wBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKzB;AAAA,EAEhB,YAAY,SAAiB,cAAoC;AAChE,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,eAAe,gBAAgB;AACpC,WAAO,eAAe,MAAM,gBAAe,SAAS;AAAA,EACrD;AAAA,EAEA,SAAS;AACR,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAqBO,IAAM,kBAAN,MAAM,yBAAwB,eAAe;AAAA,EACnD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,iBAAgB,SAAS;AAAA,EACtD;AACD;AAyBO,IAAM,eAAN,MAAM,sBAAqB,eAAe;AAAA,EAChD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,cAAa,SAAS;AAAA,EACnD;AACD;AA0BO,IAAM,WAAN,MAAM,kBAAiB,eAAe;AAAA,EAC5C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,UAAS,SAAS;AAAA,EAC/C;AACD;AA4BO,IAAM,aAAN,MAAM,oBAAmB,eAAe;AAAA,EAC9C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,YAAW,SAAS;AAAA,EACjD;AACD;AA8BO,IAAM,uBAAN,MAAM,8BAA6B,eAAe;AAAA,EACxD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,sBAAqB,SAAS;AAAA,EAC3D;AACD;AA0BO,IAAM,cAAN,MAAM,qBAAoB,eAAe;AAAA;AAAA;AAAA;AAAA,EAI/B;AAAA,EAEhB,YAAY,SAAiB,YAAoB,cAAoC;AACpF,UAAM,oBAAoB,UAAU,MAAM,OAAO,IAAI,YAAY;AACjE,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,WAAO,eAAe,MAAM,aAAY,SAAS;AAAA,EAClD;AAAA,EAES,SAAS;AACjB,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,YAAY,KAAK;AAAA,MACjB,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAyBO,IAAM,yBAAN,MAAM,gCAA+B,eAAe;AAAA,EAC1D,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,wBAAuB,SAAS;AAAA,EAC7D;AACD;","names":["ErrorCode"]}
1
+ {"version":3,"sources":["../typescript/errors.ts"],"sourcesContent":["/**\n * Error types for Kreuzberg document intelligence framework.\n *\n * These error classes mirror the Rust core error types and provide\n * type-safe error handling for TypeScript consumers.\n *\n * ## Error Hierarchy\n *\n * ```\n * Error (JavaScript built-in)\n * └── KreuzbergError (base class)\n * ├── ValidationError\n * ├── ParsingError\n * ├── OcrError\n * ├── CacheError\n * ├── ImageProcessingError\n * ├── PluginError\n * ├── MissingDependencyError\n * └── ... (other error types)\n * ```\n *\n * @module errors\n */\n\n/**\n * FFI error codes matching kreuzberg-ffi C library error types.\n *\n * @example\n * ```typescript\n * import { ErrorCode, getLastErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('A panic occurred in the native library');\n * }\n * }\n * ```\n */\nexport enum ErrorCode {\n\t/**\n\t * No error (success)\n\t */\n\tSuccess = 0,\n\t/**\n\t * Generic error\n\t */\n\tGenericError = 1,\n\t/**\n\t * Panic occurred in native code\n\t */\n\tPanic = 2,\n\t/**\n\t * Invalid argument provided\n\t */\n\tInvalidArgument = 3,\n\t/**\n\t * I/O error (file system, network, etc.)\n\t */\n\tIoError = 4,\n\t/**\n\t * Error parsing document content\n\t */\n\tParsingError = 5,\n\t/**\n\t * Error in OCR processing\n\t */\n\tOcrError = 6,\n\t/**\n\t * Required system dependency is missing\n\t */\n\tMissingDependency = 7,\n}\n\n/**\n * Context information for panics in native code.\n *\n * Contains file location, line number, function name, panic message,\n * and timestamp for debugging native library issues.\n *\n * @example\n * ```typescript\n * import { KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError && error.panicContext) {\n * console.error('Panic occurred:');\n * console.error(`File: ${error.panicContext.file}`);\n * console.error(`Line: ${error.panicContext.line}`);\n * console.error(`Function: ${error.panicContext.function}`);\n * console.error(`Message: ${error.panicContext.message}`);\n * }\n * }\n * ```\n */\nexport interface PanicContext {\n\t/**\n\t * Source file where panic occurred\n\t */\n\tfile: string;\n\t/**\n\t * Line number in source file\n\t */\n\tline: number;\n\t/**\n\t * Function name where panic occurred\n\t */\n\tfunction: string;\n\t/**\n\t * Panic message\n\t */\n\tmessage: string;\n\t/**\n\t * Unix timestamp (seconds since epoch)\n\t */\n\ttimestamp_secs: number;\n}\n\n/**\n * Base error class for all Kreuzberg errors.\n *\n * All error types thrown by Kreuzberg extend this class, allowing\n * consumers to catch all Kreuzberg-specific errors with a single catch block.\n *\n * @example\n * ```typescript\n * import { extractFile, KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError) {\n * console.error('Kreuzberg error:', error.message);\n * if (error.panicContext) {\n * console.error('Panic at:', error.panicContext.file + ':' + error.panicContext.line);\n * }\n * } else {\n * throw error; // Re-throw non-Kreuzberg errors\n * }\n * }\n * ```\n */\nexport class KreuzbergError extends Error {\n\t/**\n\t * Panic context if error was caused by a panic in native code.\n\t * Will be null for non-panic errors.\n\t */\n\tpublic readonly panicContext: PanicContext | null;\n\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message);\n\t\tthis.name = \"KreuzbergError\";\n\t\tthis.panicContext = panicContext ?? null;\n\t\tObject.setPrototypeOf(this, KreuzbergError.prototype);\n\t}\n\n\ttoJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when document validation fails.\n *\n * Validation errors occur when a document doesn't meet specified criteria,\n * such as minimum content length, required metadata fields, or quality thresholds.\n *\n * @example\n * ```typescript\n * import { extractFile, ValidationError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof ValidationError) {\n * console.error('Document validation failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ValidationError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ValidationError\";\n\t\tObject.setPrototypeOf(this, ValidationError.prototype);\n\t}\n}\n\n/**\n * Error thrown when document parsing fails.\n *\n * Parsing errors occur when a document is corrupted, malformed, or cannot\n * be processed by the extraction engine. This includes issues like:\n * - Corrupted PDF files\n * - Invalid XML/JSON syntax\n * - Unsupported file format versions\n * - Encrypted documents without valid passwords\n *\n * @example\n * ```typescript\n * import { extractFile, ParsingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('corrupted.pdf');\n * } catch (error) {\n * if (error instanceof ParsingError) {\n * console.error('Failed to parse document:', error.message);\n * }\n * }\n * ```\n */\nexport class ParsingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ParsingError\";\n\t\tObject.setPrototypeOf(this, ParsingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when OCR processing fails.\n *\n * OCR errors occur during optical character recognition, such as:\n * - OCR backend initialization failures\n * - Image preprocessing errors\n * - Language model loading issues\n * - OCR engine crashes\n *\n * @example\n * ```typescript\n * import { extractFile, OcrError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n * } catch (error) {\n * if (error instanceof OcrError) {\n * console.error('OCR processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class OcrError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"OcrError\";\n\t\tObject.setPrototypeOf(this, OcrError.prototype);\n\t}\n}\n\n/**\n * Error thrown when cache operations fail.\n *\n * Cache errors are typically non-fatal and occur during caching operations, such as:\n * - Cache directory creation failures\n * - Disk write errors\n * - Cache entry corruption\n * - Insufficient disk space\n *\n * These errors are usually logged but don't prevent extraction from completing.\n *\n * @example\n * ```typescript\n * import { extractFile, CacheError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * useCache: true\n * });\n * } catch (error) {\n * if (error instanceof CacheError) {\n * console.warn('Cache operation failed, continuing without cache:', error.message);\n * }\n * }\n * ```\n */\nexport class CacheError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"CacheError\";\n\t\tObject.setPrototypeOf(this, CacheError.prototype);\n\t}\n}\n\n/**\n * Error thrown when image processing operations fail.\n *\n * Image processing errors occur during image manipulation, such as:\n * - Image decoding failures\n * - Unsupported image formats\n * - Image resizing/scaling errors\n * - DPI adjustment failures\n * - Color space conversion issues\n *\n * @example\n * ```typescript\n * import { extractFile, ImageProcessingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * images: {\n * extractImages: true,\n * targetDpi: 300\n * }\n * });\n * } catch (error) {\n * if (error instanceof ImageProcessingError) {\n * console.error('Image processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ImageProcessingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ImageProcessingError\";\n\t\tObject.setPrototypeOf(this, ImageProcessingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when a plugin operation fails.\n *\n * Plugin errors occur in custom plugins (postprocessors, validators, OCR backends), such as:\n * - Plugin initialization failures\n * - Plugin processing errors\n * - Plugin crashes or timeouts\n * - Invalid plugin configuration\n *\n * The error message includes the plugin name to help identify which plugin failed.\n *\n * @example\n * ```typescript\n * import { extractFile, PluginError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof PluginError) {\n * console.error(`Plugin '${error.pluginName}' failed:`, error.message);\n * }\n * }\n * ```\n */\nexport class PluginError extends KreuzbergError {\n\t/**\n\t * Name of the plugin that threw the error.\n\t */\n\tpublic readonly pluginName: string;\n\n\tconstructor(message: string, pluginName: string, panicContext?: PanicContext | null) {\n\t\tsuper(`Plugin error in '${pluginName}': ${message}`, panicContext);\n\t\tthis.name = \"PluginError\";\n\t\tthis.pluginName = pluginName;\n\t\tObject.setPrototypeOf(this, PluginError.prototype);\n\t}\n\n\toverride toJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpluginName: this.pluginName,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when a required system dependency is missing.\n *\n * Missing dependency errors occur when external tools or libraries are not available, such as:\n * - Tesseract OCR (for OCR processing)\n * - ImageMagick (for image processing)\n * - Poppler (for PDF rendering)\n *\n * @example\n * ```typescript\n * import { extractFile, MissingDependencyError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof MissingDependencyError) {\n * console.error('Missing dependency:', error.message);\n * console.log('Please install Tesseract OCR for image processing');\n * }\n * }\n * ```\n */\nexport class MissingDependencyError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"MissingDependencyError\";\n\t\tObject.setPrototypeOf(this, MissingDependencyError.prototype);\n\t}\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAyCO,IAAK,YAAL,kBAAKA,eAAL;AAIN,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,WAAQ,KAAR;AAIA,EAAAA,sBAAA,qBAAkB,KAAlB;AAIA,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,cAAW,KAAX;AAIA,EAAAA,sBAAA,uBAAoB,KAApB;AAhCW,SAAAA;AAAA,GAAA;AAyGL,IAAM,iBAAN,MAAM,wBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKzB;AAAA,EAEhB,YAAY,SAAiB,cAAoC;AAChE,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,eAAe,gBAAgB;AACpC,WAAO,eAAe,MAAM,gBAAe,SAAS;AAAA,EACrD;AAAA,EAEA,SAAS;AACR,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAqBO,IAAM,kBAAN,MAAM,yBAAwB,eAAe;AAAA,EACnD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,iBAAgB,SAAS;AAAA,EACtD;AACD;AAyBO,IAAM,eAAN,MAAM,sBAAqB,eAAe;AAAA,EAChD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,cAAa,SAAS;AAAA,EACnD;AACD;AA0BO,IAAM,WAAN,MAAM,kBAAiB,eAAe;AAAA,EAC5C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,UAAS,SAAS;AAAA,EAC/C;AACD;AA4BO,IAAM,aAAN,MAAM,oBAAmB,eAAe;AAAA,EAC9C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,YAAW,SAAS;AAAA,EACjD;AACD;AA8BO,IAAM,uBAAN,MAAM,8BAA6B,eAAe;AAAA,EACxD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,sBAAqB,SAAS;AAAA,EAC3D;AACD;AA0BO,IAAM,cAAN,MAAM,qBAAoB,eAAe;AAAA;AAAA;AAAA;AAAA,EAI/B;AAAA,EAEhB,YAAY,SAAiB,YAAoB,cAAoC;AACpF,UAAM,oBAAoB,UAAU,MAAM,OAAO,IAAI,YAAY;AACjE,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,WAAO,eAAe,MAAM,aAAY,SAAS;AAAA,EAClD;AAAA,EAES,SAAS;AACjB,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,YAAY,KAAK;AAAA,MACjB,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAwBO,IAAM,yBAAN,MAAM,gCAA+B,eAAe;AAAA,EAC1D,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,wBAAuB,SAAS;AAAA,EAC7D;AACD;","names":["ErrorCode"]}
@@ -1 +1 @@
1
- {"version":3,"sources":["../typescript/errors.ts"],"sourcesContent":["/**\n * Error types for Kreuzberg document intelligence framework.\n *\n * These error classes mirror the Rust core error types and provide\n * type-safe error handling for TypeScript consumers.\n *\n * ## Error Hierarchy\n *\n * ```\n * Error (JavaScript built-in)\n * └── KreuzbergError (base class)\n * ├── ValidationError\n * ├── ParsingError\n * ├── OcrError\n * ├── CacheError\n * ├── ImageProcessingError\n * ├── PluginError\n * ├── MissingDependencyError\n * └── ... (other error types)\n * ```\n *\n * @module errors\n */\n\n/**\n * FFI error codes matching kreuzberg-ffi C library error types.\n *\n * @example\n * ```typescript\n * import { ErrorCode, getLastErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('A panic occurred in the native library');\n * }\n * }\n * ```\n */\nexport enum ErrorCode {\n\t/**\n\t * No error (success)\n\t */\n\tSuccess = 0,\n\t/**\n\t * Generic error\n\t */\n\tGenericError = 1,\n\t/**\n\t * Panic occurred in native code\n\t */\n\tPanic = 2,\n\t/**\n\t * Invalid argument provided\n\t */\n\tInvalidArgument = 3,\n\t/**\n\t * I/O error (file system, network, etc.)\n\t */\n\tIoError = 4,\n\t/**\n\t * Error parsing document content\n\t */\n\tParsingError = 5,\n\t/**\n\t * Error in OCR processing\n\t */\n\tOcrError = 6,\n\t/**\n\t * Required system dependency is missing\n\t */\n\tMissingDependency = 7,\n}\n\n/**\n * Context information for panics in native code.\n *\n * Contains file location, line number, function name, panic message,\n * and timestamp for debugging native library issues.\n *\n * @example\n * ```typescript\n * import { KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError && error.panicContext) {\n * console.error('Panic occurred:');\n * console.error(`File: ${error.panicContext.file}`);\n * console.error(`Line: ${error.panicContext.line}`);\n * console.error(`Function: ${error.panicContext.function}`);\n * console.error(`Message: ${error.panicContext.message}`);\n * }\n * }\n * ```\n */\nexport interface PanicContext {\n\t/**\n\t * Source file where panic occurred\n\t */\n\tfile: string;\n\t/**\n\t * Line number in source file\n\t */\n\tline: number;\n\t/**\n\t * Function name where panic occurred\n\t */\n\tfunction: string;\n\t/**\n\t * Panic message\n\t */\n\tmessage: string;\n\t/**\n\t * Unix timestamp (seconds since epoch)\n\t */\n\ttimestamp_secs: number;\n}\n\n/**\n * Base error class for all Kreuzberg errors.\n *\n * All error types thrown by Kreuzberg extend this class, allowing\n * consumers to catch all Kreuzberg-specific errors with a single catch block.\n *\n * @example\n * ```typescript\n * import { extractFile, KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError) {\n * console.error('Kreuzberg error:', error.message);\n * if (error.panicContext) {\n * console.error('Panic at:', error.panicContext.file + ':' + error.panicContext.line);\n * }\n * } else {\n * throw error; // Re-throw non-Kreuzberg errors\n * }\n * }\n * ```\n */\nexport class KreuzbergError extends Error {\n\t/**\n\t * Panic context if error was caused by a panic in native code.\n\t * Will be null for non-panic errors.\n\t */\n\tpublic readonly panicContext: PanicContext | null;\n\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message);\n\t\tthis.name = \"KreuzbergError\";\n\t\tthis.panicContext = panicContext ?? null;\n\t\tObject.setPrototypeOf(this, KreuzbergError.prototype);\n\t}\n\n\ttoJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when document validation fails.\n *\n * Validation errors occur when a document doesn't meet specified criteria,\n * such as minimum content length, required metadata fields, or quality thresholds.\n *\n * @example\n * ```typescript\n * import { extractFile, ValidationError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof ValidationError) {\n * console.error('Document validation failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ValidationError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ValidationError\";\n\t\tObject.setPrototypeOf(this, ValidationError.prototype);\n\t}\n}\n\n/**\n * Error thrown when document parsing fails.\n *\n * Parsing errors occur when a document is corrupted, malformed, or cannot\n * be processed by the extraction engine. This includes issues like:\n * - Corrupted PDF files\n * - Invalid XML/JSON syntax\n * - Unsupported file format versions\n * - Encrypted documents without valid passwords\n *\n * @example\n * ```typescript\n * import { extractFile, ParsingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('corrupted.pdf');\n * } catch (error) {\n * if (error instanceof ParsingError) {\n * console.error('Failed to parse document:', error.message);\n * }\n * }\n * ```\n */\nexport class ParsingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ParsingError\";\n\t\tObject.setPrototypeOf(this, ParsingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when OCR processing fails.\n *\n * OCR errors occur during optical character recognition, such as:\n * - OCR backend initialization failures\n * - Image preprocessing errors\n * - Language model loading issues\n * - OCR engine crashes\n *\n * @example\n * ```typescript\n * import { extractFile, OcrError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n * } catch (error) {\n * if (error instanceof OcrError) {\n * console.error('OCR processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class OcrError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"OcrError\";\n\t\tObject.setPrototypeOf(this, OcrError.prototype);\n\t}\n}\n\n/**\n * Error thrown when cache operations fail.\n *\n * Cache errors are typically non-fatal and occur during caching operations, such as:\n * - Cache directory creation failures\n * - Disk write errors\n * - Cache entry corruption\n * - Insufficient disk space\n *\n * These errors are usually logged but don't prevent extraction from completing.\n *\n * @example\n * ```typescript\n * import { extractFile, CacheError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * useCache: true\n * });\n * } catch (error) {\n * if (error instanceof CacheError) {\n * console.warn('Cache operation failed, continuing without cache:', error.message);\n * }\n * }\n * ```\n */\nexport class CacheError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"CacheError\";\n\t\tObject.setPrototypeOf(this, CacheError.prototype);\n\t}\n}\n\n/**\n * Error thrown when image processing operations fail.\n *\n * Image processing errors occur during image manipulation, such as:\n * - Image decoding failures\n * - Unsupported image formats\n * - Image resizing/scaling errors\n * - DPI adjustment failures\n * - Color space conversion issues\n *\n * @example\n * ```typescript\n * import { extractFile, ImageProcessingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * images: {\n * extractImages: true,\n * targetDpi: 300\n * }\n * });\n * } catch (error) {\n * if (error instanceof ImageProcessingError) {\n * console.error('Image processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ImageProcessingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ImageProcessingError\";\n\t\tObject.setPrototypeOf(this, ImageProcessingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when a plugin operation fails.\n *\n * Plugin errors occur in custom plugins (postprocessors, validators, OCR backends), such as:\n * - Plugin initialization failures\n * - Plugin processing errors\n * - Plugin crashes or timeouts\n * - Invalid plugin configuration\n *\n * The error message includes the plugin name to help identify which plugin failed.\n *\n * @example\n * ```typescript\n * import { extractFile, PluginError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof PluginError) {\n * console.error(`Plugin '${error.pluginName}' failed:`, error.message);\n * }\n * }\n * ```\n */\nexport class PluginError extends KreuzbergError {\n\t/**\n\t * Name of the plugin that threw the error.\n\t */\n\tpublic readonly pluginName: string;\n\n\tconstructor(message: string, pluginName: string, panicContext?: PanicContext | null) {\n\t\tsuper(`Plugin error in '${pluginName}': ${message}`, panicContext);\n\t\tthis.name = \"PluginError\";\n\t\tthis.pluginName = pluginName;\n\t\tObject.setPrototypeOf(this, PluginError.prototype);\n\t}\n\n\toverride toJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpluginName: this.pluginName,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when a required system dependency is missing.\n *\n * Missing dependency errors occur when external tools or libraries are not available, such as:\n * - LibreOffice (for DOC/PPT/XLS files)\n * - Tesseract OCR (for OCR processing)\n * - ImageMagick (for image processing)\n * - Poppler (for PDF rendering)\n *\n * @example\n * ```typescript\n * import { extractFile, MissingDependencyError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.doc');\n * } catch (error) {\n * if (error instanceof MissingDependencyError) {\n * console.error('Missing dependency:', error.message);\n * console.log('Please install LibreOffice to process DOC files');\n * }\n * }\n * ```\n */\nexport class MissingDependencyError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"MissingDependencyError\";\n\t\tObject.setPrototypeOf(this, MissingDependencyError.prototype);\n\t}\n}\n"],"mappings":";AAyCO,IAAK,YAAL,kBAAKA,eAAL;AAIN,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,WAAQ,KAAR;AAIA,EAAAA,sBAAA,qBAAkB,KAAlB;AAIA,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,cAAW,KAAX;AAIA,EAAAA,sBAAA,uBAAoB,KAApB;AAhCW,SAAAA;AAAA,GAAA;AAyGL,IAAM,iBAAN,MAAM,wBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKzB;AAAA,EAEhB,YAAY,SAAiB,cAAoC;AAChE,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,eAAe,gBAAgB;AACpC,WAAO,eAAe,MAAM,gBAAe,SAAS;AAAA,EACrD;AAAA,EAEA,SAAS;AACR,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAqBO,IAAM,kBAAN,MAAM,yBAAwB,eAAe;AAAA,EACnD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,iBAAgB,SAAS;AAAA,EACtD;AACD;AAyBO,IAAM,eAAN,MAAM,sBAAqB,eAAe;AAAA,EAChD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,cAAa,SAAS;AAAA,EACnD;AACD;AA0BO,IAAM,WAAN,MAAM,kBAAiB,eAAe;AAAA,EAC5C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,UAAS,SAAS;AAAA,EAC/C;AACD;AA4BO,IAAM,aAAN,MAAM,oBAAmB,eAAe;AAAA,EAC9C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,YAAW,SAAS;AAAA,EACjD;AACD;AA8BO,IAAM,uBAAN,MAAM,8BAA6B,eAAe;AAAA,EACxD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,sBAAqB,SAAS;AAAA,EAC3D;AACD;AA0BO,IAAM,cAAN,MAAM,qBAAoB,eAAe;AAAA;AAAA;AAAA;AAAA,EAI/B;AAAA,EAEhB,YAAY,SAAiB,YAAoB,cAAoC;AACpF,UAAM,oBAAoB,UAAU,MAAM,OAAO,IAAI,YAAY;AACjE,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,WAAO,eAAe,MAAM,aAAY,SAAS;AAAA,EAClD;AAAA,EAES,SAAS;AACjB,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,YAAY,KAAK;AAAA,MACjB,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAyBO,IAAM,yBAAN,MAAM,gCAA+B,eAAe;AAAA,EAC1D,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,wBAAuB,SAAS;AAAA,EAC7D;AACD;","names":["ErrorCode"]}
1
+ {"version":3,"sources":["../typescript/errors.ts"],"sourcesContent":["/**\n * Error types for Kreuzberg document intelligence framework.\n *\n * These error classes mirror the Rust core error types and provide\n * type-safe error handling for TypeScript consumers.\n *\n * ## Error Hierarchy\n *\n * ```\n * Error (JavaScript built-in)\n * └── KreuzbergError (base class)\n * ├── ValidationError\n * ├── ParsingError\n * ├── OcrError\n * ├── CacheError\n * ├── ImageProcessingError\n * ├── PluginError\n * ├── MissingDependencyError\n * └── ... (other error types)\n * ```\n *\n * @module errors\n */\n\n/**\n * FFI error codes matching kreuzberg-ffi C library error types.\n *\n * @example\n * ```typescript\n * import { ErrorCode, getLastErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('A panic occurred in the native library');\n * }\n * }\n * ```\n */\nexport enum ErrorCode {\n\t/**\n\t * No error (success)\n\t */\n\tSuccess = 0,\n\t/**\n\t * Generic error\n\t */\n\tGenericError = 1,\n\t/**\n\t * Panic occurred in native code\n\t */\n\tPanic = 2,\n\t/**\n\t * Invalid argument provided\n\t */\n\tInvalidArgument = 3,\n\t/**\n\t * I/O error (file system, network, etc.)\n\t */\n\tIoError = 4,\n\t/**\n\t * Error parsing document content\n\t */\n\tParsingError = 5,\n\t/**\n\t * Error in OCR processing\n\t */\n\tOcrError = 6,\n\t/**\n\t * Required system dependency is missing\n\t */\n\tMissingDependency = 7,\n}\n\n/**\n * Context information for panics in native code.\n *\n * Contains file location, line number, function name, panic message,\n * and timestamp for debugging native library issues.\n *\n * @example\n * ```typescript\n * import { KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError && error.panicContext) {\n * console.error('Panic occurred:');\n * console.error(`File: ${error.panicContext.file}`);\n * console.error(`Line: ${error.panicContext.line}`);\n * console.error(`Function: ${error.panicContext.function}`);\n * console.error(`Message: ${error.panicContext.message}`);\n * }\n * }\n * ```\n */\nexport interface PanicContext {\n\t/**\n\t * Source file where panic occurred\n\t */\n\tfile: string;\n\t/**\n\t * Line number in source file\n\t */\n\tline: number;\n\t/**\n\t * Function name where panic occurred\n\t */\n\tfunction: string;\n\t/**\n\t * Panic message\n\t */\n\tmessage: string;\n\t/**\n\t * Unix timestamp (seconds since epoch)\n\t */\n\ttimestamp_secs: number;\n}\n\n/**\n * Base error class for all Kreuzberg errors.\n *\n * All error types thrown by Kreuzberg extend this class, allowing\n * consumers to catch all Kreuzberg-specific errors with a single catch block.\n *\n * @example\n * ```typescript\n * import { extractFile, KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError) {\n * console.error('Kreuzberg error:', error.message);\n * if (error.panicContext) {\n * console.error('Panic at:', error.panicContext.file + ':' + error.panicContext.line);\n * }\n * } else {\n * throw error; // Re-throw non-Kreuzberg errors\n * }\n * }\n * ```\n */\nexport class KreuzbergError extends Error {\n\t/**\n\t * Panic context if error was caused by a panic in native code.\n\t * Will be null for non-panic errors.\n\t */\n\tpublic readonly panicContext: PanicContext | null;\n\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message);\n\t\tthis.name = \"KreuzbergError\";\n\t\tthis.panicContext = panicContext ?? null;\n\t\tObject.setPrototypeOf(this, KreuzbergError.prototype);\n\t}\n\n\ttoJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when document validation fails.\n *\n * Validation errors occur when a document doesn't meet specified criteria,\n * such as minimum content length, required metadata fields, or quality thresholds.\n *\n * @example\n * ```typescript\n * import { extractFile, ValidationError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof ValidationError) {\n * console.error('Document validation failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ValidationError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ValidationError\";\n\t\tObject.setPrototypeOf(this, ValidationError.prototype);\n\t}\n}\n\n/**\n * Error thrown when document parsing fails.\n *\n * Parsing errors occur when a document is corrupted, malformed, or cannot\n * be processed by the extraction engine. This includes issues like:\n * - Corrupted PDF files\n * - Invalid XML/JSON syntax\n * - Unsupported file format versions\n * - Encrypted documents without valid passwords\n *\n * @example\n * ```typescript\n * import { extractFile, ParsingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('corrupted.pdf');\n * } catch (error) {\n * if (error instanceof ParsingError) {\n * console.error('Failed to parse document:', error.message);\n * }\n * }\n * ```\n */\nexport class ParsingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ParsingError\";\n\t\tObject.setPrototypeOf(this, ParsingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when OCR processing fails.\n *\n * OCR errors occur during optical character recognition, such as:\n * - OCR backend initialization failures\n * - Image preprocessing errors\n * - Language model loading issues\n * - OCR engine crashes\n *\n * @example\n * ```typescript\n * import { extractFile, OcrError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n * } catch (error) {\n * if (error instanceof OcrError) {\n * console.error('OCR processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class OcrError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"OcrError\";\n\t\tObject.setPrototypeOf(this, OcrError.prototype);\n\t}\n}\n\n/**\n * Error thrown when cache operations fail.\n *\n * Cache errors are typically non-fatal and occur during caching operations, such as:\n * - Cache directory creation failures\n * - Disk write errors\n * - Cache entry corruption\n * - Insufficient disk space\n *\n * These errors are usually logged but don't prevent extraction from completing.\n *\n * @example\n * ```typescript\n * import { extractFile, CacheError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * useCache: true\n * });\n * } catch (error) {\n * if (error instanceof CacheError) {\n * console.warn('Cache operation failed, continuing without cache:', error.message);\n * }\n * }\n * ```\n */\nexport class CacheError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"CacheError\";\n\t\tObject.setPrototypeOf(this, CacheError.prototype);\n\t}\n}\n\n/**\n * Error thrown when image processing operations fail.\n *\n * Image processing errors occur during image manipulation, such as:\n * - Image decoding failures\n * - Unsupported image formats\n * - Image resizing/scaling errors\n * - DPI adjustment failures\n * - Color space conversion issues\n *\n * @example\n * ```typescript\n * import { extractFile, ImageProcessingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * images: {\n * extractImages: true,\n * targetDpi: 300\n * }\n * });\n * } catch (error) {\n * if (error instanceof ImageProcessingError) {\n * console.error('Image processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ImageProcessingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ImageProcessingError\";\n\t\tObject.setPrototypeOf(this, ImageProcessingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when a plugin operation fails.\n *\n * Plugin errors occur in custom plugins (postprocessors, validators, OCR backends), such as:\n * - Plugin initialization failures\n * - Plugin processing errors\n * - Plugin crashes or timeouts\n * - Invalid plugin configuration\n *\n * The error message includes the plugin name to help identify which plugin failed.\n *\n * @example\n * ```typescript\n * import { extractFile, PluginError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof PluginError) {\n * console.error(`Plugin '${error.pluginName}' failed:`, error.message);\n * }\n * }\n * ```\n */\nexport class PluginError extends KreuzbergError {\n\t/**\n\t * Name of the plugin that threw the error.\n\t */\n\tpublic readonly pluginName: string;\n\n\tconstructor(message: string, pluginName: string, panicContext?: PanicContext | null) {\n\t\tsuper(`Plugin error in '${pluginName}': ${message}`, panicContext);\n\t\tthis.name = \"PluginError\";\n\t\tthis.pluginName = pluginName;\n\t\tObject.setPrototypeOf(this, PluginError.prototype);\n\t}\n\n\toverride toJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpluginName: this.pluginName,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when a required system dependency is missing.\n *\n * Missing dependency errors occur when external tools or libraries are not available, such as:\n * - Tesseract OCR (for OCR processing)\n * - ImageMagick (for image processing)\n * - Poppler (for PDF rendering)\n *\n * @example\n * ```typescript\n * import { extractFile, MissingDependencyError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof MissingDependencyError) {\n * console.error('Missing dependency:', error.message);\n * console.log('Please install Tesseract OCR for image processing');\n * }\n * }\n * ```\n */\nexport class MissingDependencyError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"MissingDependencyError\";\n\t\tObject.setPrototypeOf(this, MissingDependencyError.prototype);\n\t}\n}\n"],"mappings":";AAyCO,IAAK,YAAL,kBAAKA,eAAL;AAIN,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,WAAQ,KAAR;AAIA,EAAAA,sBAAA,qBAAkB,KAAlB;AAIA,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,cAAW,KAAX;AAIA,EAAAA,sBAAA,uBAAoB,KAApB;AAhCW,SAAAA;AAAA,GAAA;AAyGL,IAAM,iBAAN,MAAM,wBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKzB;AAAA,EAEhB,YAAY,SAAiB,cAAoC;AAChE,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,eAAe,gBAAgB;AACpC,WAAO,eAAe,MAAM,gBAAe,SAAS;AAAA,EACrD;AAAA,EAEA,SAAS;AACR,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAqBO,IAAM,kBAAN,MAAM,yBAAwB,eAAe;AAAA,EACnD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,iBAAgB,SAAS;AAAA,EACtD;AACD;AAyBO,IAAM,eAAN,MAAM,sBAAqB,eAAe;AAAA,EAChD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,cAAa,SAAS;AAAA,EACnD;AACD;AA0BO,IAAM,WAAN,MAAM,kBAAiB,eAAe;AAAA,EAC5C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,UAAS,SAAS;AAAA,EAC/C;AACD;AA4BO,IAAM,aAAN,MAAM,oBAAmB,eAAe;AAAA,EAC9C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,YAAW,SAAS;AAAA,EACjD;AACD;AA8BO,IAAM,uBAAN,MAAM,8BAA6B,eAAe;AAAA,EACxD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,sBAAqB,SAAS;AAAA,EAC3D;AACD;AA0BO,IAAM,cAAN,MAAM,qBAAoB,eAAe;AAAA;AAAA;AAAA;AAAA,EAI/B;AAAA,EAEhB,YAAY,SAAiB,YAAoB,cAAoC;AACpF,UAAM,oBAAoB,UAAU,MAAM,OAAO,IAAI,YAAY;AACjE,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,WAAO,eAAe,MAAM,aAAY,SAAS;AAAA,EAClD;AAAA,EAES,SAAS;AACjB,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,YAAY,KAAK;AAAA,MACjB,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAwBO,IAAM,yBAAN,MAAM,gCAA+B,eAAe;AAAA,EAC1D,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,wBAAuB,SAAS;AAAA,EAC7D;AACD;","names":["ErrorCode"]}
package/dist/index.d.mts CHANGED
@@ -2,7 +2,6 @@ import { ErrorClassification, ExtractionConfig, ExtractionResult, WorkerPool, Wo
2
2
  export { Chunk, ChunkingConfig, ExtractedImage, HtmlConversionOptions, HtmlPreprocessingOptions, ImageExtractionConfig, KeywordConfig, LanguageDetectionConfig, OcrConfig, PageContent, PageExtractionConfig, PdfConfig, PostProcessorConfig, Table, TesseractConfig, TokenReductionConfig } from './types.mjs';
3
3
  import { PanicContext } from './errors.mjs';
4
4
  export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.mjs';
5
- export { GutenOcrBackend } from './ocr/guten-ocr.mjs';
6
5
 
7
6
  /**
8
7
  * Get the error code for the last FFI error.
@@ -797,19 +796,11 @@ declare function listValidators(): string[];
797
796
  *
798
797
  * @example
799
798
  * ```typescript
800
- * import { GutenOcrBackend } from '@kreuzberg/node/ocr/guten-ocr';
801
- * import { registerOcrBackend, extractFile } from '@kreuzberg/node';
802
- *
803
- * // Create and initialize backend
804
- * const backend = new GutenOcrBackend();
805
- * await backend.initialize();
806
- *
807
- * // Register with Kreuzberg
808
- * registerOcrBackend(backend);
799
+ * import { extractFile } from '@kreuzberg/node';
809
800
  *
810
- * // Use in extraction
801
+ * // PaddleOCR is built into the native Rust core - just use the backend name
811
802
  * const result = await extractFile('scanned.pdf', null, {
812
- * ocr: { backend: 'guten-ocr', language: 'en' }
803
+ * ocr: { backend: 'paddle-ocr', language: 'en' }
813
804
  * });
814
805
  * console.log(result.content);
815
806
  * ```
@@ -1176,7 +1167,7 @@ declare function __resetBindingForTests(): void;
1176
1167
  *
1177
1168
  * ## Supported Formats
1178
1169
  *
1179
- * - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT (with LibreOffice)
1170
+ * - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT
1180
1171
  * - **Text**: Markdown, Plain Text, XML
1181
1172
  * - **Web**: HTML (converted to Markdown)
1182
1173
  * - **Data**: JSON, YAML, TOML
@@ -1200,6 +1191,6 @@ declare function __resetBindingForTests(): void;
1200
1191
  * @module @kreuzberg/node
1201
1192
  */
1202
1193
 
1203
- declare const __version__ = "4.2.15";
1194
+ declare const __version__ = "4.3.0";
1204
1195
 
1205
1196
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.d.ts CHANGED
@@ -2,7 +2,6 @@ import { ErrorClassification, ExtractionConfig, ExtractionResult, WorkerPool, Wo
2
2
  export { Chunk, ChunkingConfig, ExtractedImage, HtmlConversionOptions, HtmlPreprocessingOptions, ImageExtractionConfig, KeywordConfig, LanguageDetectionConfig, OcrConfig, PageContent, PageExtractionConfig, PdfConfig, PostProcessorConfig, Table, TesseractConfig, TokenReductionConfig } from './types.js';
3
3
  import { PanicContext } from './errors.js';
4
4
  export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.js';
5
- export { GutenOcrBackend } from './ocr/guten-ocr.js';
6
5
 
7
6
  /**
8
7
  * Get the error code for the last FFI error.
@@ -797,19 +796,11 @@ declare function listValidators(): string[];
797
796
  *
798
797
  * @example
799
798
  * ```typescript
800
- * import { GutenOcrBackend } from '@kreuzberg/node/ocr/guten-ocr';
801
- * import { registerOcrBackend, extractFile } from '@kreuzberg/node';
802
- *
803
- * // Create and initialize backend
804
- * const backend = new GutenOcrBackend();
805
- * await backend.initialize();
806
- *
807
- * // Register with Kreuzberg
808
- * registerOcrBackend(backend);
799
+ * import { extractFile } from '@kreuzberg/node';
809
800
  *
810
- * // Use in extraction
801
+ * // PaddleOCR is built into the native Rust core - just use the backend name
811
802
  * const result = await extractFile('scanned.pdf', null, {
812
- * ocr: { backend: 'guten-ocr', language: 'en' }
803
+ * ocr: { backend: 'paddle-ocr', language: 'en' }
813
804
  * });
814
805
  * console.log(result.content);
815
806
  * ```
@@ -1176,7 +1167,7 @@ declare function __resetBindingForTests(): void;
1176
1167
  *
1177
1168
  * ## Supported Formats
1178
1169
  *
1179
- * - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT (with LibreOffice)
1170
+ * - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT
1180
1171
  * - **Text**: Markdown, Plain Text, XML
1181
1172
  * - **Web**: HTML (converted to Markdown)
1182
1173
  * - **Data**: JSON, YAML, TOML
@@ -1200,6 +1191,6 @@ declare function __resetBindingForTests(): void;
1200
1191
  * @module @kreuzberg/node
1201
1192
  */
1202
1193
 
1203
- declare const __version__ = "4.2.15";
1194
+ declare const __version__ = "4.3.0";
1204
1195
 
1205
1196
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };