@kreuzberg/node 4.2.15 → 4.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -11
- package/dist/errors.d.mts +2 -3
- package/dist/errors.d.ts +2 -3
- package/dist/errors.js.map +1 -1
- package/dist/errors.mjs.map +1 -1
- package/dist/index.d.mts +5 -14
- package/dist/index.d.ts +5 -14
- package/dist/index.js +19 -215
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +19 -204
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.mts +137 -11
- package/dist/types.d.ts +137 -11
- package/dist/types.js.map +1 -1
- package/index.d.ts +27 -0
- package/index.js +52 -52
- package/package.json +11 -9
- package/dist/ocr/guten-ocr.d.mts +0 -193
- package/dist/ocr/guten-ocr.d.ts +0 -193
- package/dist/ocr/guten-ocr.js +0 -234
- package/dist/ocr/guten-ocr.js.map +0 -1
- package/dist/ocr/guten-ocr.mjs +0 -199
- package/dist/ocr/guten-ocr.mjs.map +0 -1
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.3.1" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
</div>
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
Extract text, tables, images, and metadata from
|
|
58
|
+
Extract text, tables, images, and metadata from 75+ file formats including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
## Installation
|
|
@@ -95,15 +95,13 @@ yarn add @kreuzberg/node
|
|
|
95
95
|
### System Requirements
|
|
96
96
|
|
|
97
97
|
- **Node.js 22+** required (NAPI-RS native bindings)
|
|
98
|
-
- Optional: [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) version 1.
|
|
98
|
+
- Optional: [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) version 1.24+ for embeddings support
|
|
99
99
|
- Optional: [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) for OCR functionality
|
|
100
100
|
|
|
101
|
-
- Optional: [LibreOffice](https://www.libreoffice.org/download/download/) for legacy Office formats (DOC, XLS, PPT, RTF, ODT, ODS, ODP)
|
|
102
|
-
|
|
103
101
|
**Format Support Notes:**
|
|
104
|
-
-
|
|
105
|
-
-
|
|
106
|
-
- WASM binding supports
|
|
102
|
+
- Legacy formats (DOC, XLS, PPT) are now extracted natively without external tools
|
|
103
|
+
- Modern Office formats (DOCX, XLSX, PPTX) are fully supported
|
|
104
|
+
- WASM binding supports all document formats via in-memory parsing
|
|
107
105
|
|
|
108
106
|
|
|
109
107
|
|
|
@@ -322,9 +320,9 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
|
|
|
322
320
|
|
|
323
321
|
## Features
|
|
324
322
|
|
|
325
|
-
### Supported File Formats (
|
|
323
|
+
### Supported File Formats (75+)
|
|
326
324
|
|
|
327
|
-
|
|
325
|
+
75+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
|
|
328
326
|
|
|
329
327
|
#### Office Documents
|
|
330
328
|
|
|
@@ -409,7 +407,7 @@ Kreuzberg supports multiple OCR backends for extracting text from scanned docume
|
|
|
409
407
|
|
|
410
408
|
- **Tesseract**
|
|
411
409
|
|
|
412
|
-
- **
|
|
410
|
+
- **Paddleocr**
|
|
413
411
|
|
|
414
412
|
|
|
415
413
|
### OCR Configuration Example
|
package/dist/errors.d.mts
CHANGED
|
@@ -332,7 +332,6 @@ declare class PluginError extends KreuzbergError {
|
|
|
332
332
|
* Error thrown when a required system dependency is missing.
|
|
333
333
|
*
|
|
334
334
|
* Missing dependency errors occur when external tools or libraries are not available, such as:
|
|
335
|
-
* - LibreOffice (for DOC/PPT/XLS files)
|
|
336
335
|
* - Tesseract OCR (for OCR processing)
|
|
337
336
|
* - ImageMagick (for image processing)
|
|
338
337
|
* - Poppler (for PDF rendering)
|
|
@@ -342,11 +341,11 @@ declare class PluginError extends KreuzbergError {
|
|
|
342
341
|
* import { extractFile, MissingDependencyError } from '@kreuzberg/node';
|
|
343
342
|
*
|
|
344
343
|
* try {
|
|
345
|
-
* const result = await extractFile('document.
|
|
344
|
+
* const result = await extractFile('document.pdf');
|
|
346
345
|
* } catch (error) {
|
|
347
346
|
* if (error instanceof MissingDependencyError) {
|
|
348
347
|
* console.error('Missing dependency:', error.message);
|
|
349
|
-
* console.log('Please install
|
|
348
|
+
* console.log('Please install Tesseract OCR for image processing');
|
|
350
349
|
* }
|
|
351
350
|
* }
|
|
352
351
|
* ```
|
package/dist/errors.d.ts
CHANGED
|
@@ -332,7 +332,6 @@ declare class PluginError extends KreuzbergError {
|
|
|
332
332
|
* Error thrown when a required system dependency is missing.
|
|
333
333
|
*
|
|
334
334
|
* Missing dependency errors occur when external tools or libraries are not available, such as:
|
|
335
|
-
* - LibreOffice (for DOC/PPT/XLS files)
|
|
336
335
|
* - Tesseract OCR (for OCR processing)
|
|
337
336
|
* - ImageMagick (for image processing)
|
|
338
337
|
* - Poppler (for PDF rendering)
|
|
@@ -342,11 +341,11 @@ declare class PluginError extends KreuzbergError {
|
|
|
342
341
|
* import { extractFile, MissingDependencyError } from '@kreuzberg/node';
|
|
343
342
|
*
|
|
344
343
|
* try {
|
|
345
|
-
* const result = await extractFile('document.
|
|
344
|
+
* const result = await extractFile('document.pdf');
|
|
346
345
|
* } catch (error) {
|
|
347
346
|
* if (error instanceof MissingDependencyError) {
|
|
348
347
|
* console.error('Missing dependency:', error.message);
|
|
349
|
-
* console.log('Please install
|
|
348
|
+
* console.log('Please install Tesseract OCR for image processing');
|
|
350
349
|
* }
|
|
351
350
|
* }
|
|
352
351
|
* ```
|
package/dist/errors.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../typescript/errors.ts"],"sourcesContent":["/**\n * Error types for Kreuzberg document intelligence framework.\n *\n * These error classes mirror the Rust core error types and provide\n * type-safe error handling for TypeScript consumers.\n *\n * ## Error Hierarchy\n *\n * ```\n * Error (JavaScript built-in)\n * └── KreuzbergError (base class)\n * ├── ValidationError\n * ├── ParsingError\n * ├── OcrError\n * ├── CacheError\n * ├── ImageProcessingError\n * ├── PluginError\n * ├── MissingDependencyError\n * └── ... (other error types)\n * ```\n *\n * @module errors\n */\n\n/**\n * FFI error codes matching kreuzberg-ffi C library error types.\n *\n * @example\n * ```typescript\n * import { ErrorCode, getLastErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('A panic occurred in the native library');\n * }\n * }\n * ```\n */\nexport enum ErrorCode {\n\t/**\n\t * No error (success)\n\t */\n\tSuccess = 0,\n\t/**\n\t * Generic error\n\t */\n\tGenericError = 1,\n\t/**\n\t * Panic occurred in native code\n\t */\n\tPanic = 2,\n\t/**\n\t * Invalid argument provided\n\t */\n\tInvalidArgument = 3,\n\t/**\n\t * I/O error (file system, network, etc.)\n\t */\n\tIoError = 4,\n\t/**\n\t * Error parsing document content\n\t */\n\tParsingError = 5,\n\t/**\n\t * Error in OCR processing\n\t */\n\tOcrError = 6,\n\t/**\n\t * Required system dependency is missing\n\t */\n\tMissingDependency = 7,\n}\n\n/**\n * Context information for panics in native code.\n *\n * Contains file location, line number, function name, panic message,\n * and timestamp for debugging native library issues.\n *\n * @example\n * ```typescript\n * import { KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError && error.panicContext) {\n * console.error('Panic occurred:');\n * console.error(`File: ${error.panicContext.file}`);\n * console.error(`Line: ${error.panicContext.line}`);\n * console.error(`Function: ${error.panicContext.function}`);\n * console.error(`Message: ${error.panicContext.message}`);\n * }\n * }\n * ```\n */\nexport interface PanicContext {\n\t/**\n\t * Source file where panic occurred\n\t */\n\tfile: string;\n\t/**\n\t * Line number in source file\n\t */\n\tline: number;\n\t/**\n\t * Function name where panic occurred\n\t */\n\tfunction: string;\n\t/**\n\t * Panic message\n\t */\n\tmessage: string;\n\t/**\n\t * Unix timestamp (seconds since epoch)\n\t */\n\ttimestamp_secs: number;\n}\n\n/**\n * Base error class for all Kreuzberg errors.\n *\n * All error types thrown by Kreuzberg extend this class, allowing\n * consumers to catch all Kreuzberg-specific errors with a single catch block.\n *\n * @example\n * ```typescript\n * import { extractFile, KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError) {\n * console.error('Kreuzberg error:', error.message);\n * if (error.panicContext) {\n * console.error('Panic at:', error.panicContext.file + ':' + error.panicContext.line);\n * }\n * } else {\n * throw error; // Re-throw non-Kreuzberg errors\n * }\n * }\n * ```\n */\nexport class KreuzbergError extends Error {\n\t/**\n\t * Panic context if error was caused by a panic in native code.\n\t * Will be null for non-panic errors.\n\t */\n\tpublic readonly panicContext: PanicContext | null;\n\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message);\n\t\tthis.name = \"KreuzbergError\";\n\t\tthis.panicContext = panicContext ?? null;\n\t\tObject.setPrototypeOf(this, KreuzbergError.prototype);\n\t}\n\n\ttoJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when document validation fails.\n *\n * Validation errors occur when a document doesn't meet specified criteria,\n * such as minimum content length, required metadata fields, or quality thresholds.\n *\n * @example\n * ```typescript\n * import { extractFile, ValidationError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof ValidationError) {\n * console.error('Document validation failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ValidationError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ValidationError\";\n\t\tObject.setPrototypeOf(this, ValidationError.prototype);\n\t}\n}\n\n/**\n * Error thrown when document parsing fails.\n *\n * Parsing errors occur when a document is corrupted, malformed, or cannot\n * be processed by the extraction engine. This includes issues like:\n * - Corrupted PDF files\n * - Invalid XML/JSON syntax\n * - Unsupported file format versions\n * - Encrypted documents without valid passwords\n *\n * @example\n * ```typescript\n * import { extractFile, ParsingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('corrupted.pdf');\n * } catch (error) {\n * if (error instanceof ParsingError) {\n * console.error('Failed to parse document:', error.message);\n * }\n * }\n * ```\n */\nexport class ParsingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ParsingError\";\n\t\tObject.setPrototypeOf(this, ParsingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when OCR processing fails.\n *\n * OCR errors occur during optical character recognition, such as:\n * - OCR backend initialization failures\n * - Image preprocessing errors\n * - Language model loading issues\n * - OCR engine crashes\n *\n * @example\n * ```typescript\n * import { extractFile, OcrError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n * } catch (error) {\n * if (error instanceof OcrError) {\n * console.error('OCR processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class OcrError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"OcrError\";\n\t\tObject.setPrototypeOf(this, OcrError.prototype);\n\t}\n}\n\n/**\n * Error thrown when cache operations fail.\n *\n * Cache errors are typically non-fatal and occur during caching operations, such as:\n * - Cache directory creation failures\n * - Disk write errors\n * - Cache entry corruption\n * - Insufficient disk space\n *\n * These errors are usually logged but don't prevent extraction from completing.\n *\n * @example\n * ```typescript\n * import { extractFile, CacheError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * useCache: true\n * });\n * } catch (error) {\n * if (error instanceof CacheError) {\n * console.warn('Cache operation failed, continuing without cache:', error.message);\n * }\n * }\n * ```\n */\nexport class CacheError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"CacheError\";\n\t\tObject.setPrototypeOf(this, CacheError.prototype);\n\t}\n}\n\n/**\n * Error thrown when image processing operations fail.\n *\n * Image processing errors occur during image manipulation, such as:\n * - Image decoding failures\n * - Unsupported image formats\n * - Image resizing/scaling errors\n * - DPI adjustment failures\n * - Color space conversion issues\n *\n * @example\n * ```typescript\n * import { extractFile, ImageProcessingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * images: {\n * extractImages: true,\n * targetDpi: 300\n * }\n * });\n * } catch (error) {\n * if (error instanceof ImageProcessingError) {\n * console.error('Image processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ImageProcessingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ImageProcessingError\";\n\t\tObject.setPrototypeOf(this, ImageProcessingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when a plugin operation fails.\n *\n * Plugin errors occur in custom plugins (postprocessors, validators, OCR backends), such as:\n * - Plugin initialization failures\n * - Plugin processing errors\n * - Plugin crashes or timeouts\n * - Invalid plugin configuration\n *\n * The error message includes the plugin name to help identify which plugin failed.\n *\n * @example\n * ```typescript\n * import { extractFile, PluginError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof PluginError) {\n * console.error(`Plugin '${error.pluginName}' failed:`, error.message);\n * }\n * }\n * ```\n */\nexport class PluginError extends KreuzbergError {\n\t/**\n\t * Name of the plugin that threw the error.\n\t */\n\tpublic readonly pluginName: string;\n\n\tconstructor(message: string, pluginName: string, panicContext?: PanicContext | null) {\n\t\tsuper(`Plugin error in '${pluginName}': ${message}`, panicContext);\n\t\tthis.name = \"PluginError\";\n\t\tthis.pluginName = pluginName;\n\t\tObject.setPrototypeOf(this, PluginError.prototype);\n\t}\n\n\toverride toJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpluginName: this.pluginName,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when a required system dependency is missing.\n *\n * Missing dependency errors occur when external tools or libraries are not available, such as:\n * - LibreOffice (for DOC/PPT/XLS files)\n * - Tesseract OCR (for OCR processing)\n * - ImageMagick (for image processing)\n * - Poppler (for PDF rendering)\n *\n * @example\n * ```typescript\n * import { extractFile, MissingDependencyError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.doc');\n * } catch (error) {\n * if (error instanceof MissingDependencyError) {\n * console.error('Missing dependency:', error.message);\n * console.log('Please install LibreOffice to process DOC files');\n * }\n * }\n * ```\n */\nexport class MissingDependencyError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"MissingDependencyError\";\n\t\tObject.setPrototypeOf(this, MissingDependencyError.prototype);\n\t}\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAyCO,IAAK,YAAL,kBAAKA,eAAL;AAIN,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,WAAQ,KAAR;AAIA,EAAAA,sBAAA,qBAAkB,KAAlB;AAIA,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,cAAW,KAAX;AAIA,EAAAA,sBAAA,uBAAoB,KAApB;AAhCW,SAAAA;AAAA,GAAA;AAyGL,IAAM,iBAAN,MAAM,wBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKzB;AAAA,EAEhB,YAAY,SAAiB,cAAoC;AAChE,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,eAAe,gBAAgB;AACpC,WAAO,eAAe,MAAM,gBAAe,SAAS;AAAA,EACrD;AAAA,EAEA,SAAS;AACR,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAqBO,IAAM,kBAAN,MAAM,yBAAwB,eAAe;AAAA,EACnD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,iBAAgB,SAAS;AAAA,EACtD;AACD;AAyBO,IAAM,eAAN,MAAM,sBAAqB,eAAe;AAAA,EAChD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,cAAa,SAAS;AAAA,EACnD;AACD;AA0BO,IAAM,WAAN,MAAM,kBAAiB,eAAe;AAAA,EAC5C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,UAAS,SAAS;AAAA,EAC/C;AACD;AA4BO,IAAM,aAAN,MAAM,oBAAmB,eAAe;AAAA,EAC9C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,YAAW,SAAS;AAAA,EACjD;AACD;AA8BO,IAAM,uBAAN,MAAM,8BAA6B,eAAe;AAAA,EACxD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,sBAAqB,SAAS;AAAA,EAC3D;AACD;AA0BO,IAAM,cAAN,MAAM,qBAAoB,eAAe;AAAA;AAAA;AAAA;AAAA,EAI/B;AAAA,EAEhB,YAAY,SAAiB,YAAoB,cAAoC;AACpF,UAAM,oBAAoB,UAAU,MAAM,OAAO,IAAI,YAAY;AACjE,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,WAAO,eAAe,MAAM,aAAY,SAAS;AAAA,EAClD;AAAA,EAES,SAAS;AACjB,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,YAAY,KAAK;AAAA,MACjB,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAyBO,IAAM,yBAAN,MAAM,gCAA+B,eAAe;AAAA,EAC1D,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,wBAAuB,SAAS;AAAA,EAC7D;AACD;","names":["ErrorCode"]}
|
|
1
|
+
{"version":3,"sources":["../typescript/errors.ts"],"sourcesContent":["/**\n * Error types for Kreuzberg document intelligence framework.\n *\n * These error classes mirror the Rust core error types and provide\n * type-safe error handling for TypeScript consumers.\n *\n * ## Error Hierarchy\n *\n * ```\n * Error (JavaScript built-in)\n * └── KreuzbergError (base class)\n * ├── ValidationError\n * ├── ParsingError\n * ├── OcrError\n * ├── CacheError\n * ├── ImageProcessingError\n * ├── PluginError\n * ├── MissingDependencyError\n * └── ... (other error types)\n * ```\n *\n * @module errors\n */\n\n/**\n * FFI error codes matching kreuzberg-ffi C library error types.\n *\n * @example\n * ```typescript\n * import { ErrorCode, getLastErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('A panic occurred in the native library');\n * }\n * }\n * ```\n */\nexport enum ErrorCode {\n\t/**\n\t * No error (success)\n\t */\n\tSuccess = 0,\n\t/**\n\t * Generic error\n\t */\n\tGenericError = 1,\n\t/**\n\t * Panic occurred in native code\n\t */\n\tPanic = 2,\n\t/**\n\t * Invalid argument provided\n\t */\n\tInvalidArgument = 3,\n\t/**\n\t * I/O error (file system, network, etc.)\n\t */\n\tIoError = 4,\n\t/**\n\t * Error parsing document content\n\t */\n\tParsingError = 5,\n\t/**\n\t * Error in OCR processing\n\t */\n\tOcrError = 6,\n\t/**\n\t * Required system dependency is missing\n\t */\n\tMissingDependency = 7,\n}\n\n/**\n * Context information for panics in native code.\n *\n * Contains file location, line number, function name, panic message,\n * and timestamp for debugging native library issues.\n *\n * @example\n * ```typescript\n * import { KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError && error.panicContext) {\n * console.error('Panic occurred:');\n * console.error(`File: ${error.panicContext.file}`);\n * console.error(`Line: ${error.panicContext.line}`);\n * console.error(`Function: ${error.panicContext.function}`);\n * console.error(`Message: ${error.panicContext.message}`);\n * }\n * }\n * ```\n */\nexport interface PanicContext {\n\t/**\n\t * Source file where panic occurred\n\t */\n\tfile: string;\n\t/**\n\t * Line number in source file\n\t */\n\tline: number;\n\t/**\n\t * Function name where panic occurred\n\t */\n\tfunction: string;\n\t/**\n\t * Panic message\n\t */\n\tmessage: string;\n\t/**\n\t * Unix timestamp (seconds since epoch)\n\t */\n\ttimestamp_secs: number;\n}\n\n/**\n * Base error class for all Kreuzberg errors.\n *\n * All error types thrown by Kreuzberg extend this class, allowing\n * consumers to catch all Kreuzberg-specific errors with a single catch block.\n *\n * @example\n * ```typescript\n * import { extractFile, KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError) {\n * console.error('Kreuzberg error:', error.message);\n * if (error.panicContext) {\n * console.error('Panic at:', error.panicContext.file + ':' + error.panicContext.line);\n * }\n * } else {\n * throw error; // Re-throw non-Kreuzberg errors\n * }\n * }\n * ```\n */\nexport class KreuzbergError extends Error {\n\t/**\n\t * Panic context if error was caused by a panic in native code.\n\t * Will be null for non-panic errors.\n\t */\n\tpublic readonly panicContext: PanicContext | null;\n\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message);\n\t\tthis.name = \"KreuzbergError\";\n\t\tthis.panicContext = panicContext ?? null;\n\t\tObject.setPrototypeOf(this, KreuzbergError.prototype);\n\t}\n\n\ttoJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when document validation fails.\n *\n * Validation errors occur when a document doesn't meet specified criteria,\n * such as minimum content length, required metadata fields, or quality thresholds.\n *\n * @example\n * ```typescript\n * import { extractFile, ValidationError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof ValidationError) {\n * console.error('Document validation failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ValidationError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ValidationError\";\n\t\tObject.setPrototypeOf(this, ValidationError.prototype);\n\t}\n}\n\n/**\n * Error thrown when document parsing fails.\n *\n * Parsing errors occur when a document is corrupted, malformed, or cannot\n * be processed by the extraction engine. This includes issues like:\n * - Corrupted PDF files\n * - Invalid XML/JSON syntax\n * - Unsupported file format versions\n * - Encrypted documents without valid passwords\n *\n * @example\n * ```typescript\n * import { extractFile, ParsingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('corrupted.pdf');\n * } catch (error) {\n * if (error instanceof ParsingError) {\n * console.error('Failed to parse document:', error.message);\n * }\n * }\n * ```\n */\nexport class ParsingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ParsingError\";\n\t\tObject.setPrototypeOf(this, ParsingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when OCR processing fails.\n *\n * OCR errors occur during optical character recognition, such as:\n * - OCR backend initialization failures\n * - Image preprocessing errors\n * - Language model loading issues\n * - OCR engine crashes\n *\n * @example\n * ```typescript\n * import { extractFile, OcrError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n * } catch (error) {\n * if (error instanceof OcrError) {\n * console.error('OCR processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class OcrError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"OcrError\";\n\t\tObject.setPrototypeOf(this, OcrError.prototype);\n\t}\n}\n\n/**\n * Error thrown when cache operations fail.\n *\n * Cache errors are typically non-fatal and occur during caching operations, such as:\n * - Cache directory creation failures\n * - Disk write errors\n * - Cache entry corruption\n * - Insufficient disk space\n *\n * These errors are usually logged but don't prevent extraction from completing.\n *\n * @example\n * ```typescript\n * import { extractFile, CacheError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * useCache: true\n * });\n * } catch (error) {\n * if (error instanceof CacheError) {\n * console.warn('Cache operation failed, continuing without cache:', error.message);\n * }\n * }\n * ```\n */\nexport class CacheError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"CacheError\";\n\t\tObject.setPrototypeOf(this, CacheError.prototype);\n\t}\n}\n\n/**\n * Error thrown when image processing operations fail.\n *\n * Image processing errors occur during image manipulation, such as:\n * - Image decoding failures\n * - Unsupported image formats\n * - Image resizing/scaling errors\n * - DPI adjustment failures\n * - Color space conversion issues\n *\n * @example\n * ```typescript\n * import { extractFile, ImageProcessingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * images: {\n * extractImages: true,\n * targetDpi: 300\n * }\n * });\n * } catch (error) {\n * if (error instanceof ImageProcessingError) {\n * console.error('Image processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ImageProcessingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ImageProcessingError\";\n\t\tObject.setPrototypeOf(this, ImageProcessingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when a plugin operation fails.\n *\n * Plugin errors occur in custom plugins (postprocessors, validators, OCR backends), such as:\n * - Plugin initialization failures\n * - Plugin processing errors\n * - Plugin crashes or timeouts\n * - Invalid plugin configuration\n *\n * The error message includes the plugin name to help identify which plugin failed.\n *\n * @example\n * ```typescript\n * import { extractFile, PluginError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof PluginError) {\n * console.error(`Plugin '${error.pluginName}' failed:`, error.message);\n * }\n * }\n * ```\n */\nexport class PluginError extends KreuzbergError {\n\t/**\n\t * Name of the plugin that threw the error.\n\t */\n\tpublic readonly pluginName: string;\n\n\tconstructor(message: string, pluginName: string, panicContext?: PanicContext | null) {\n\t\tsuper(`Plugin error in '${pluginName}': ${message}`, panicContext);\n\t\tthis.name = \"PluginError\";\n\t\tthis.pluginName = pluginName;\n\t\tObject.setPrototypeOf(this, PluginError.prototype);\n\t}\n\n\toverride toJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpluginName: this.pluginName,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when a required system dependency is missing.\n *\n * Missing dependency errors occur when external tools or libraries are not available, such as:\n * - Tesseract OCR (for OCR processing)\n * - ImageMagick (for image processing)\n * - Poppler (for PDF rendering)\n *\n * @example\n * ```typescript\n * import { extractFile, MissingDependencyError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof MissingDependencyError) {\n * console.error('Missing dependency:', error.message);\n * console.log('Please install Tesseract OCR for image processing');\n * }\n * }\n * ```\n */\nexport class MissingDependencyError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"MissingDependencyError\";\n\t\tObject.setPrototypeOf(this, MissingDependencyError.prototype);\n\t}\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAyCO,IAAK,YAAL,kBAAKA,eAAL;AAIN,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,WAAQ,KAAR;AAIA,EAAAA,sBAAA,qBAAkB,KAAlB;AAIA,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,cAAW,KAAX;AAIA,EAAAA,sBAAA,uBAAoB,KAApB;AAhCW,SAAAA;AAAA,GAAA;AAyGL,IAAM,iBAAN,MAAM,wBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKzB;AAAA,EAEhB,YAAY,SAAiB,cAAoC;AAChE,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,eAAe,gBAAgB;AACpC,WAAO,eAAe,MAAM,gBAAe,SAAS;AAAA,EACrD;AAAA,EAEA,SAAS;AACR,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAqBO,IAAM,kBAAN,MAAM,yBAAwB,eAAe;AAAA,EACnD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,iBAAgB,SAAS;AAAA,EACtD;AACD;AAyBO,IAAM,eAAN,MAAM,sBAAqB,eAAe;AAAA,EAChD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,cAAa,SAAS;AAAA,EACnD;AACD;AA0BO,IAAM,WAAN,MAAM,kBAAiB,eAAe;AAAA,EAC5C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,UAAS,SAAS;AAAA,EAC/C;AACD;AA4BO,IAAM,aAAN,MAAM,oBAAmB,eAAe;AAAA,EAC9C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,YAAW,SAAS;AAAA,EACjD;AACD;AA8BO,IAAM,uBAAN,MAAM,8BAA6B,eAAe;AAAA,EACxD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,sBAAqB,SAAS;AAAA,EAC3D;AACD;AA0BO,IAAM,cAAN,MAAM,qBAAoB,eAAe;AAAA;AAAA;AAAA;AAAA,EAI/B;AAAA,EAEhB,YAAY,SAAiB,YAAoB,cAAoC;AACpF,UAAM,oBAAoB,UAAU,MAAM,OAAO,IAAI,YAAY;AACjE,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,WAAO,eAAe,MAAM,aAAY,SAAS;AAAA,EAClD;AAAA,EAES,SAAS;AACjB,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,YAAY,KAAK;AAAA,MACjB,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAwBO,IAAM,yBAAN,MAAM,gCAA+B,eAAe;AAAA,EAC1D,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,wBAAuB,SAAS;AAAA,EAC7D;AACD;","names":["ErrorCode"]}
|
package/dist/errors.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../typescript/errors.ts"],"sourcesContent":["/**\n * Error types for Kreuzberg document intelligence framework.\n *\n * These error classes mirror the Rust core error types and provide\n * type-safe error handling for TypeScript consumers.\n *\n * ## Error Hierarchy\n *\n * ```\n * Error (JavaScript built-in)\n * └── KreuzbergError (base class)\n * ├── ValidationError\n * ├── ParsingError\n * ├── OcrError\n * ├── CacheError\n * ├── ImageProcessingError\n * ├── PluginError\n * ├── MissingDependencyError\n * └── ... (other error types)\n * ```\n *\n * @module errors\n */\n\n/**\n * FFI error codes matching kreuzberg-ffi C library error types.\n *\n * @example\n * ```typescript\n * import { ErrorCode, getLastErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('A panic occurred in the native library');\n * }\n * }\n * ```\n */\nexport enum ErrorCode {\n\t/**\n\t * No error (success)\n\t */\n\tSuccess = 0,\n\t/**\n\t * Generic error\n\t */\n\tGenericError = 1,\n\t/**\n\t * Panic occurred in native code\n\t */\n\tPanic = 2,\n\t/**\n\t * Invalid argument provided\n\t */\n\tInvalidArgument = 3,\n\t/**\n\t * I/O error (file system, network, etc.)\n\t */\n\tIoError = 4,\n\t/**\n\t * Error parsing document content\n\t */\n\tParsingError = 5,\n\t/**\n\t * Error in OCR processing\n\t */\n\tOcrError = 6,\n\t/**\n\t * Required system dependency is missing\n\t */\n\tMissingDependency = 7,\n}\n\n/**\n * Context information for panics in native code.\n *\n * Contains file location, line number, function name, panic message,\n * and timestamp for debugging native library issues.\n *\n * @example\n * ```typescript\n * import { KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError && error.panicContext) {\n * console.error('Panic occurred:');\n * console.error(`File: ${error.panicContext.file}`);\n * console.error(`Line: ${error.panicContext.line}`);\n * console.error(`Function: ${error.panicContext.function}`);\n * console.error(`Message: ${error.panicContext.message}`);\n * }\n * }\n * ```\n */\nexport interface PanicContext {\n\t/**\n\t * Source file where panic occurred\n\t */\n\tfile: string;\n\t/**\n\t * Line number in source file\n\t */\n\tline: number;\n\t/**\n\t * Function name where panic occurred\n\t */\n\tfunction: string;\n\t/**\n\t * Panic message\n\t */\n\tmessage: string;\n\t/**\n\t * Unix timestamp (seconds since epoch)\n\t */\n\ttimestamp_secs: number;\n}\n\n/**\n * Base error class for all Kreuzberg errors.\n *\n * All error types thrown by Kreuzberg extend this class, allowing\n * consumers to catch all Kreuzberg-specific errors with a single catch block.\n *\n * @example\n * ```typescript\n * import { extractFile, KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError) {\n * console.error('Kreuzberg error:', error.message);\n * if (error.panicContext) {\n * console.error('Panic at:', error.panicContext.file + ':' + error.panicContext.line);\n * }\n * } else {\n * throw error; // Re-throw non-Kreuzberg errors\n * }\n * }\n * ```\n */\nexport class KreuzbergError extends Error {\n\t/**\n\t * Panic context if error was caused by a panic in native code.\n\t * Will be null for non-panic errors.\n\t */\n\tpublic readonly panicContext: PanicContext | null;\n\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message);\n\t\tthis.name = \"KreuzbergError\";\n\t\tthis.panicContext = panicContext ?? null;\n\t\tObject.setPrototypeOf(this, KreuzbergError.prototype);\n\t}\n\n\ttoJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when document validation fails.\n *\n * Validation errors occur when a document doesn't meet specified criteria,\n * such as minimum content length, required metadata fields, or quality thresholds.\n *\n * @example\n * ```typescript\n * import { extractFile, ValidationError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof ValidationError) {\n * console.error('Document validation failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ValidationError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ValidationError\";\n\t\tObject.setPrototypeOf(this, ValidationError.prototype);\n\t}\n}\n\n/**\n * Error thrown when document parsing fails.\n *\n * Parsing errors occur when a document is corrupted, malformed, or cannot\n * be processed by the extraction engine. This includes issues like:\n * - Corrupted PDF files\n * - Invalid XML/JSON syntax\n * - Unsupported file format versions\n * - Encrypted documents without valid passwords\n *\n * @example\n * ```typescript\n * import { extractFile, ParsingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('corrupted.pdf');\n * } catch (error) {\n * if (error instanceof ParsingError) {\n * console.error('Failed to parse document:', error.message);\n * }\n * }\n * ```\n */\nexport class ParsingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ParsingError\";\n\t\tObject.setPrototypeOf(this, ParsingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when OCR processing fails.\n *\n * OCR errors occur during optical character recognition, such as:\n * - OCR backend initialization failures\n * - Image preprocessing errors\n * - Language model loading issues\n * - OCR engine crashes\n *\n * @example\n * ```typescript\n * import { extractFile, OcrError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n * } catch (error) {\n * if (error instanceof OcrError) {\n * console.error('OCR processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class OcrError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"OcrError\";\n\t\tObject.setPrototypeOf(this, OcrError.prototype);\n\t}\n}\n\n/**\n * Error thrown when cache operations fail.\n *\n * Cache errors are typically non-fatal and occur during caching operations, such as:\n * - Cache directory creation failures\n * - Disk write errors\n * - Cache entry corruption\n * - Insufficient disk space\n *\n * These errors are usually logged but don't prevent extraction from completing.\n *\n * @example\n * ```typescript\n * import { extractFile, CacheError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * useCache: true\n * });\n * } catch (error) {\n * if (error instanceof CacheError) {\n * console.warn('Cache operation failed, continuing without cache:', error.message);\n * }\n * }\n * ```\n */\nexport class CacheError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"CacheError\";\n\t\tObject.setPrototypeOf(this, CacheError.prototype);\n\t}\n}\n\n/**\n * Error thrown when image processing operations fail.\n *\n * Image processing errors occur during image manipulation, such as:\n * - Image decoding failures\n * - Unsupported image formats\n * - Image resizing/scaling errors\n * - DPI adjustment failures\n * - Color space conversion issues\n *\n * @example\n * ```typescript\n * import { extractFile, ImageProcessingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * images: {\n * extractImages: true,\n * targetDpi: 300\n * }\n * });\n * } catch (error) {\n * if (error instanceof ImageProcessingError) {\n * console.error('Image processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ImageProcessingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ImageProcessingError\";\n\t\tObject.setPrototypeOf(this, ImageProcessingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when a plugin operation fails.\n *\n * Plugin errors occur in custom plugins (postprocessors, validators, OCR backends), such as:\n * - Plugin initialization failures\n * - Plugin processing errors\n * - Plugin crashes or timeouts\n * - Invalid plugin configuration\n *\n * The error message includes the plugin name to help identify which plugin failed.\n *\n * @example\n * ```typescript\n * import { extractFile, PluginError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof PluginError) {\n * console.error(`Plugin '${error.pluginName}' failed:`, error.message);\n * }\n * }\n * ```\n */\nexport class PluginError extends KreuzbergError {\n\t/**\n\t * Name of the plugin that threw the error.\n\t */\n\tpublic readonly pluginName: string;\n\n\tconstructor(message: string, pluginName: string, panicContext?: PanicContext | null) {\n\t\tsuper(`Plugin error in '${pluginName}': ${message}`, panicContext);\n\t\tthis.name = \"PluginError\";\n\t\tthis.pluginName = pluginName;\n\t\tObject.setPrototypeOf(this, PluginError.prototype);\n\t}\n\n\toverride toJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpluginName: this.pluginName,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when a required system dependency is missing.\n *\n * Missing dependency errors occur when external tools or libraries are not available, such as:\n * - LibreOffice (for DOC/PPT/XLS files)\n * - Tesseract OCR (for OCR processing)\n * - ImageMagick (for image processing)\n * - Poppler (for PDF rendering)\n *\n * @example\n * ```typescript\n * import { extractFile, MissingDependencyError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.doc');\n * } catch (error) {\n * if (error instanceof MissingDependencyError) {\n * console.error('Missing dependency:', error.message);\n * console.log('Please install LibreOffice to process DOC files');\n * }\n * }\n * ```\n */\nexport class MissingDependencyError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"MissingDependencyError\";\n\t\tObject.setPrototypeOf(this, MissingDependencyError.prototype);\n\t}\n}\n"],"mappings":";AAyCO,IAAK,YAAL,kBAAKA,eAAL;AAIN,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,WAAQ,KAAR;AAIA,EAAAA,sBAAA,qBAAkB,KAAlB;AAIA,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,cAAW,KAAX;AAIA,EAAAA,sBAAA,uBAAoB,KAApB;AAhCW,SAAAA;AAAA,GAAA;AAyGL,IAAM,iBAAN,MAAM,wBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKzB;AAAA,EAEhB,YAAY,SAAiB,cAAoC;AAChE,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,eAAe,gBAAgB;AACpC,WAAO,eAAe,MAAM,gBAAe,SAAS;AAAA,EACrD;AAAA,EAEA,SAAS;AACR,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAqBO,IAAM,kBAAN,MAAM,yBAAwB,eAAe;AAAA,EACnD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,iBAAgB,SAAS;AAAA,EACtD;AACD;AAyBO,IAAM,eAAN,MAAM,sBAAqB,eAAe;AAAA,EAChD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,cAAa,SAAS;AAAA,EACnD;AACD;AA0BO,IAAM,WAAN,MAAM,kBAAiB,eAAe;AAAA,EAC5C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,UAAS,SAAS;AAAA,EAC/C;AACD;AA4BO,IAAM,aAAN,MAAM,oBAAmB,eAAe;AAAA,EAC9C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,YAAW,SAAS;AAAA,EACjD;AACD;AA8BO,IAAM,uBAAN,MAAM,8BAA6B,eAAe;AAAA,EACxD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,sBAAqB,SAAS;AAAA,EAC3D;AACD;AA0BO,IAAM,cAAN,MAAM,qBAAoB,eAAe;AAAA;AAAA;AAAA;AAAA,EAI/B;AAAA,EAEhB,YAAY,SAAiB,YAAoB,cAAoC;AACpF,UAAM,oBAAoB,UAAU,MAAM,OAAO,IAAI,YAAY;AACjE,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,WAAO,eAAe,MAAM,aAAY,SAAS;AAAA,EAClD;AAAA,EAES,SAAS;AACjB,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,YAAY,KAAK;AAAA,MACjB,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAyBO,IAAM,yBAAN,MAAM,gCAA+B,eAAe;AAAA,EAC1D,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,wBAAuB,SAAS;AAAA,EAC7D;AACD;","names":["ErrorCode"]}
|
|
1
|
+
{"version":3,"sources":["../typescript/errors.ts"],"sourcesContent":["/**\n * Error types for Kreuzberg document intelligence framework.\n *\n * These error classes mirror the Rust core error types and provide\n * type-safe error handling for TypeScript consumers.\n *\n * ## Error Hierarchy\n *\n * ```\n * Error (JavaScript built-in)\n * └── KreuzbergError (base class)\n * ├── ValidationError\n * ├── ParsingError\n * ├── OcrError\n * ├── CacheError\n * ├── ImageProcessingError\n * ├── PluginError\n * ├── MissingDependencyError\n * └── ... (other error types)\n * ```\n *\n * @module errors\n */\n\n/**\n * FFI error codes matching kreuzberg-ffi C library error types.\n *\n * @example\n * ```typescript\n * import { ErrorCode, getLastErrorCode } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * const code = getLastErrorCode();\n * if (code === ErrorCode.Panic) {\n * console.error('A panic occurred in the native library');\n * }\n * }\n * ```\n */\nexport enum ErrorCode {\n\t/**\n\t * No error (success)\n\t */\n\tSuccess = 0,\n\t/**\n\t * Generic error\n\t */\n\tGenericError = 1,\n\t/**\n\t * Panic occurred in native code\n\t */\n\tPanic = 2,\n\t/**\n\t * Invalid argument provided\n\t */\n\tInvalidArgument = 3,\n\t/**\n\t * I/O error (file system, network, etc.)\n\t */\n\tIoError = 4,\n\t/**\n\t * Error parsing document content\n\t */\n\tParsingError = 5,\n\t/**\n\t * Error in OCR processing\n\t */\n\tOcrError = 6,\n\t/**\n\t * Required system dependency is missing\n\t */\n\tMissingDependency = 7,\n}\n\n/**\n * Context information for panics in native code.\n *\n * Contains file location, line number, function name, panic message,\n * and timestamp for debugging native library issues.\n *\n * @example\n * ```typescript\n * import { KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError && error.panicContext) {\n * console.error('Panic occurred:');\n * console.error(`File: ${error.panicContext.file}`);\n * console.error(`Line: ${error.panicContext.line}`);\n * console.error(`Function: ${error.panicContext.function}`);\n * console.error(`Message: ${error.panicContext.message}`);\n * }\n * }\n * ```\n */\nexport interface PanicContext {\n\t/**\n\t * Source file where panic occurred\n\t */\n\tfile: string;\n\t/**\n\t * Line number in source file\n\t */\n\tline: number;\n\t/**\n\t * Function name where panic occurred\n\t */\n\tfunction: string;\n\t/**\n\t * Panic message\n\t */\n\tmessage: string;\n\t/**\n\t * Unix timestamp (seconds since epoch)\n\t */\n\ttimestamp_secs: number;\n}\n\n/**\n * Base error class for all Kreuzberg errors.\n *\n * All error types thrown by Kreuzberg extend this class, allowing\n * consumers to catch all Kreuzberg-specific errors with a single catch block.\n *\n * @example\n * ```typescript\n * import { extractFile, KreuzbergError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof KreuzbergError) {\n * console.error('Kreuzberg error:', error.message);\n * if (error.panicContext) {\n * console.error('Panic at:', error.panicContext.file + ':' + error.panicContext.line);\n * }\n * } else {\n * throw error; // Re-throw non-Kreuzberg errors\n * }\n * }\n * ```\n */\nexport class KreuzbergError extends Error {\n\t/**\n\t * Panic context if error was caused by a panic in native code.\n\t * Will be null for non-panic errors.\n\t */\n\tpublic readonly panicContext: PanicContext | null;\n\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message);\n\t\tthis.name = \"KreuzbergError\";\n\t\tthis.panicContext = panicContext ?? null;\n\t\tObject.setPrototypeOf(this, KreuzbergError.prototype);\n\t}\n\n\ttoJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when document validation fails.\n *\n * Validation errors occur when a document doesn't meet specified criteria,\n * such as minimum content length, required metadata fields, or quality thresholds.\n *\n * @example\n * ```typescript\n * import { extractFile, ValidationError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof ValidationError) {\n * console.error('Document validation failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ValidationError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ValidationError\";\n\t\tObject.setPrototypeOf(this, ValidationError.prototype);\n\t}\n}\n\n/**\n * Error thrown when document parsing fails.\n *\n * Parsing errors occur when a document is corrupted, malformed, or cannot\n * be processed by the extraction engine. This includes issues like:\n * - Corrupted PDF files\n * - Invalid XML/JSON syntax\n * - Unsupported file format versions\n * - Encrypted documents without valid passwords\n *\n * @example\n * ```typescript\n * import { extractFile, ParsingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('corrupted.pdf');\n * } catch (error) {\n * if (error instanceof ParsingError) {\n * console.error('Failed to parse document:', error.message);\n * }\n * }\n * ```\n */\nexport class ParsingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ParsingError\";\n\t\tObject.setPrototypeOf(this, ParsingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when OCR processing fails.\n *\n * OCR errors occur during optical character recognition, such as:\n * - OCR backend initialization failures\n * - Image preprocessing errors\n * - Language model loading issues\n * - OCR engine crashes\n *\n * @example\n * ```typescript\n * import { extractFile, OcrError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'tesseract', language: 'eng' }\n * });\n * } catch (error) {\n * if (error instanceof OcrError) {\n * console.error('OCR processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class OcrError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"OcrError\";\n\t\tObject.setPrototypeOf(this, OcrError.prototype);\n\t}\n}\n\n/**\n * Error thrown when cache operations fail.\n *\n * Cache errors are typically non-fatal and occur during caching operations, such as:\n * - Cache directory creation failures\n * - Disk write errors\n * - Cache entry corruption\n * - Insufficient disk space\n *\n * These errors are usually logged but don't prevent extraction from completing.\n *\n * @example\n * ```typescript\n * import { extractFile, CacheError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * useCache: true\n * });\n * } catch (error) {\n * if (error instanceof CacheError) {\n * console.warn('Cache operation failed, continuing without cache:', error.message);\n * }\n * }\n * ```\n */\nexport class CacheError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"CacheError\";\n\t\tObject.setPrototypeOf(this, CacheError.prototype);\n\t}\n}\n\n/**\n * Error thrown when image processing operations fail.\n *\n * Image processing errors occur during image manipulation, such as:\n * - Image decoding failures\n * - Unsupported image formats\n * - Image resizing/scaling errors\n * - DPI adjustment failures\n * - Color space conversion issues\n *\n * @example\n * ```typescript\n * import { extractFile, ImageProcessingError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf', null, {\n * images: {\n * extractImages: true,\n * targetDpi: 300\n * }\n * });\n * } catch (error) {\n * if (error instanceof ImageProcessingError) {\n * console.error('Image processing failed:', error.message);\n * }\n * }\n * ```\n */\nexport class ImageProcessingError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"ImageProcessingError\";\n\t\tObject.setPrototypeOf(this, ImageProcessingError.prototype);\n\t}\n}\n\n/**\n * Error thrown when a plugin operation fails.\n *\n * Plugin errors occur in custom plugins (postprocessors, validators, OCR backends), such as:\n * - Plugin initialization failures\n * - Plugin processing errors\n * - Plugin crashes or timeouts\n * - Invalid plugin configuration\n *\n * The error message includes the plugin name to help identify which plugin failed.\n *\n * @example\n * ```typescript\n * import { extractFile, PluginError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof PluginError) {\n * console.error(`Plugin '${error.pluginName}' failed:`, error.message);\n * }\n * }\n * ```\n */\nexport class PluginError extends KreuzbergError {\n\t/**\n\t * Name of the plugin that threw the error.\n\t */\n\tpublic readonly pluginName: string;\n\n\tconstructor(message: string, pluginName: string, panicContext?: PanicContext | null) {\n\t\tsuper(`Plugin error in '${pluginName}': ${message}`, panicContext);\n\t\tthis.name = \"PluginError\";\n\t\tthis.pluginName = pluginName;\n\t\tObject.setPrototypeOf(this, PluginError.prototype);\n\t}\n\n\toverride toJSON() {\n\t\treturn {\n\t\t\tname: this.name,\n\t\t\tmessage: this.message,\n\t\t\tpluginName: this.pluginName,\n\t\t\tpanicContext: this.panicContext,\n\t\t\tstack: this.stack,\n\t\t};\n\t}\n}\n\n/**\n * Error thrown when a required system dependency is missing.\n *\n * Missing dependency errors occur when external tools or libraries are not available, such as:\n * - Tesseract OCR (for OCR processing)\n * - ImageMagick (for image processing)\n * - Poppler (for PDF rendering)\n *\n * @example\n * ```typescript\n * import { extractFile, MissingDependencyError } from '@kreuzberg/node';\n *\n * try {\n * const result = await extractFile('document.pdf');\n * } catch (error) {\n * if (error instanceof MissingDependencyError) {\n * console.error('Missing dependency:', error.message);\n * console.log('Please install Tesseract OCR for image processing');\n * }\n * }\n * ```\n */\nexport class MissingDependencyError extends KreuzbergError {\n\tconstructor(message: string, panicContext?: PanicContext | null) {\n\t\tsuper(message, panicContext);\n\t\tthis.name = \"MissingDependencyError\";\n\t\tObject.setPrototypeOf(this, MissingDependencyError.prototype);\n\t}\n}\n"],"mappings":";AAyCO,IAAK,YAAL,kBAAKA,eAAL;AAIN,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,WAAQ,KAAR;AAIA,EAAAA,sBAAA,qBAAkB,KAAlB;AAIA,EAAAA,sBAAA,aAAU,KAAV;AAIA,EAAAA,sBAAA,kBAAe,KAAf;AAIA,EAAAA,sBAAA,cAAW,KAAX;AAIA,EAAAA,sBAAA,uBAAoB,KAApB;AAhCW,SAAAA;AAAA,GAAA;AAyGL,IAAM,iBAAN,MAAM,wBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKzB;AAAA,EAEhB,YAAY,SAAiB,cAAoC;AAChE,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,eAAe,gBAAgB;AACpC,WAAO,eAAe,MAAM,gBAAe,SAAS;AAAA,EACrD;AAAA,EAEA,SAAS;AACR,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAqBO,IAAM,kBAAN,MAAM,yBAAwB,eAAe;AAAA,EACnD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,iBAAgB,SAAS;AAAA,EACtD;AACD;AAyBO,IAAM,eAAN,MAAM,sBAAqB,eAAe;AAAA,EAChD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,cAAa,SAAS;AAAA,EACnD;AACD;AA0BO,IAAM,WAAN,MAAM,kBAAiB,eAAe;AAAA,EAC5C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,UAAS,SAAS;AAAA,EAC/C;AACD;AA4BO,IAAM,aAAN,MAAM,oBAAmB,eAAe;AAAA,EAC9C,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,YAAW,SAAS;AAAA,EACjD;AACD;AA8BO,IAAM,uBAAN,MAAM,8BAA6B,eAAe;AAAA,EACxD,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,sBAAqB,SAAS;AAAA,EAC3D;AACD;AA0BO,IAAM,cAAN,MAAM,qBAAoB,eAAe;AAAA;AAAA;AAAA;AAAA,EAI/B;AAAA,EAEhB,YAAY,SAAiB,YAAoB,cAAoC;AACpF,UAAM,oBAAoB,UAAU,MAAM,OAAO,IAAI,YAAY;AACjE,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,WAAO,eAAe,MAAM,aAAY,SAAS;AAAA,EAClD;AAAA,EAES,SAAS;AACjB,WAAO;AAAA,MACN,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,YAAY,KAAK;AAAA,MACjB,cAAc,KAAK;AAAA,MACnB,OAAO,KAAK;AAAA,IACb;AAAA,EACD;AACD;AAwBO,IAAM,yBAAN,MAAM,gCAA+B,eAAe;AAAA,EAC1D,YAAY,SAAiB,cAAoC;AAChE,UAAM,SAAS,YAAY;AAC3B,SAAK,OAAO;AACZ,WAAO,eAAe,MAAM,wBAAuB,SAAS;AAAA,EAC7D;AACD;","names":["ErrorCode"]}
|
package/dist/index.d.mts
CHANGED
|
@@ -2,7 +2,6 @@ import { ErrorClassification, ExtractionConfig, ExtractionResult, WorkerPool, Wo
|
|
|
2
2
|
export { Chunk, ChunkingConfig, ExtractedImage, HtmlConversionOptions, HtmlPreprocessingOptions, ImageExtractionConfig, KeywordConfig, LanguageDetectionConfig, OcrConfig, PageContent, PageExtractionConfig, PdfConfig, PostProcessorConfig, Table, TesseractConfig, TokenReductionConfig } from './types.mjs';
|
|
3
3
|
import { PanicContext } from './errors.mjs';
|
|
4
4
|
export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.mjs';
|
|
5
|
-
export { GutenOcrBackend } from './ocr/guten-ocr.mjs';
|
|
6
5
|
|
|
7
6
|
/**
|
|
8
7
|
* Get the error code for the last FFI error.
|
|
@@ -797,19 +796,11 @@ declare function listValidators(): string[];
|
|
|
797
796
|
*
|
|
798
797
|
* @example
|
|
799
798
|
* ```typescript
|
|
800
|
-
* import {
|
|
801
|
-
* import { registerOcrBackend, extractFile } from '@kreuzberg/node';
|
|
802
|
-
*
|
|
803
|
-
* // Create and initialize backend
|
|
804
|
-
* const backend = new GutenOcrBackend();
|
|
805
|
-
* await backend.initialize();
|
|
806
|
-
*
|
|
807
|
-
* // Register with Kreuzberg
|
|
808
|
-
* registerOcrBackend(backend);
|
|
799
|
+
* import { extractFile } from '@kreuzberg/node';
|
|
809
800
|
*
|
|
810
|
-
* //
|
|
801
|
+
* // PaddleOCR is built into the native Rust core - just use the backend name
|
|
811
802
|
* const result = await extractFile('scanned.pdf', null, {
|
|
812
|
-
* ocr: { backend: '
|
|
803
|
+
* ocr: { backend: 'paddle-ocr', language: 'en' }
|
|
813
804
|
* });
|
|
814
805
|
* console.log(result.content);
|
|
815
806
|
* ```
|
|
@@ -1176,7 +1167,7 @@ declare function __resetBindingForTests(): void;
|
|
|
1176
1167
|
*
|
|
1177
1168
|
* ## Supported Formats
|
|
1178
1169
|
*
|
|
1179
|
-
* - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT
|
|
1170
|
+
* - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT
|
|
1180
1171
|
* - **Text**: Markdown, Plain Text, XML
|
|
1181
1172
|
* - **Web**: HTML (converted to Markdown)
|
|
1182
1173
|
* - **Data**: JSON, YAML, TOML
|
|
@@ -1200,6 +1191,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1200
1191
|
* @module @kreuzberg/node
|
|
1201
1192
|
*/
|
|
1202
1193
|
|
|
1203
|
-
declare const __version__ = "4.
|
|
1194
|
+
declare const __version__ = "4.3.1";
|
|
1204
1195
|
|
|
1205
1196
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.d.ts
CHANGED
|
@@ -2,7 +2,6 @@ import { ErrorClassification, ExtractionConfig, ExtractionResult, WorkerPool, Wo
|
|
|
2
2
|
export { Chunk, ChunkingConfig, ExtractedImage, HtmlConversionOptions, HtmlPreprocessingOptions, ImageExtractionConfig, KeywordConfig, LanguageDetectionConfig, OcrConfig, PageContent, PageExtractionConfig, PdfConfig, PostProcessorConfig, Table, TesseractConfig, TokenReductionConfig } from './types.js';
|
|
3
3
|
import { PanicContext } from './errors.js';
|
|
4
4
|
export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.js';
|
|
5
|
-
export { GutenOcrBackend } from './ocr/guten-ocr.js';
|
|
6
5
|
|
|
7
6
|
/**
|
|
8
7
|
* Get the error code for the last FFI error.
|
|
@@ -797,19 +796,11 @@ declare function listValidators(): string[];
|
|
|
797
796
|
*
|
|
798
797
|
* @example
|
|
799
798
|
* ```typescript
|
|
800
|
-
* import {
|
|
801
|
-
* import { registerOcrBackend, extractFile } from '@kreuzberg/node';
|
|
802
|
-
*
|
|
803
|
-
* // Create and initialize backend
|
|
804
|
-
* const backend = new GutenOcrBackend();
|
|
805
|
-
* await backend.initialize();
|
|
806
|
-
*
|
|
807
|
-
* // Register with Kreuzberg
|
|
808
|
-
* registerOcrBackend(backend);
|
|
799
|
+
* import { extractFile } from '@kreuzberg/node';
|
|
809
800
|
*
|
|
810
|
-
* //
|
|
801
|
+
* // PaddleOCR is built into the native Rust core - just use the backend name
|
|
811
802
|
* const result = await extractFile('scanned.pdf', null, {
|
|
812
|
-
* ocr: { backend: '
|
|
803
|
+
* ocr: { backend: 'paddle-ocr', language: 'en' }
|
|
813
804
|
* });
|
|
814
805
|
* console.log(result.content);
|
|
815
806
|
* ```
|
|
@@ -1176,7 +1167,7 @@ declare function __resetBindingForTests(): void;
|
|
|
1176
1167
|
*
|
|
1177
1168
|
* ## Supported Formats
|
|
1178
1169
|
*
|
|
1179
|
-
* - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT
|
|
1170
|
+
* - **Documents**: PDF, DOCX, PPTX, XLSX, DOC, PPT
|
|
1180
1171
|
* - **Text**: Markdown, Plain Text, XML
|
|
1181
1172
|
* - **Web**: HTML (converted to Markdown)
|
|
1182
1173
|
* - **Data**: JSON, YAML, TOML
|
|
@@ -1200,6 +1191,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1200
1191
|
* @module @kreuzberg/node
|
|
1201
1192
|
*/
|
|
1202
1193
|
|
|
1203
|
-
declare const __version__ = "4.
|
|
1194
|
+
declare const __version__ = "4.3.1";
|
|
1204
1195
|
|
|
1205
1196
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|