@kreuzberg/node 4.0.0-rc.15 → 4.0.0-rc.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +72 -3
- package/dist/index.d.ts +72 -3
- package/dist/index.js +20 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +17 -1
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.mts +37 -1
- package/dist/types.d.ts +37 -1
- package/dist/types.js.map +1 -1
- package/index.d.ts +478 -0
- package/index.js +72 -52
- package/package.json +4 -4
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { PanicContext } from './errors.mjs';
|
|
2
2
|
export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.mjs';
|
|
3
|
-
import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol } from './types.mjs';
|
|
3
|
+
import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, ErrorClassification } from './types.mjs';
|
|
4
4
|
export { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, Metadata, OcrConfig, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.mjs';
|
|
5
5
|
export { GutenOcrBackend } from './ocr/guten-ocr.mjs';
|
|
6
6
|
|
|
@@ -903,6 +903,75 @@ declare function getLastErrorCode(): number;
|
|
|
903
903
|
* ```
|
|
904
904
|
*/
|
|
905
905
|
declare function getLastPanicContext(): PanicContext | null;
|
|
906
|
-
|
|
906
|
+
/**
|
|
907
|
+
* Returns the human-readable name for an error code.
|
|
908
|
+
*
|
|
909
|
+
* Maps numeric error codes to their string names, providing a consistent way
|
|
910
|
+
* to get error code names across all platforms.
|
|
911
|
+
*
|
|
912
|
+
* @param code - The numeric error code (0-7)
|
|
913
|
+
* @returns The error code name as a string (e.g., "validation", "ocr", "unknown")
|
|
914
|
+
*
|
|
915
|
+
* @example
|
|
916
|
+
* ```typescript
|
|
917
|
+
* import { getErrorCodeName } from '@kreuzberg/node';
|
|
918
|
+
*
|
|
919
|
+
* const name = getErrorCodeName(0); // returns "validation"
|
|
920
|
+
* const name = getErrorCodeName(2); // returns "ocr"
|
|
921
|
+
* const name = getErrorCodeName(99); // returns "unknown"
|
|
922
|
+
* ```
|
|
923
|
+
*/
|
|
924
|
+
declare function getErrorCodeName(code: number): string;
|
|
925
|
+
/**
|
|
926
|
+
* Returns the description for an error code.
|
|
927
|
+
*
|
|
928
|
+
* Retrieves user-friendly descriptions of error types from the FFI layer.
|
|
929
|
+
*
|
|
930
|
+
* @param code - The numeric error code (0-7)
|
|
931
|
+
* @returns A brief description of the error type
|
|
932
|
+
*
|
|
933
|
+
* @example
|
|
934
|
+
* ```typescript
|
|
935
|
+
* import { getErrorCodeDescription } from '@kreuzberg/node';
|
|
936
|
+
*
|
|
937
|
+
* const desc = getErrorCodeDescription(0); // returns "Input validation error"
|
|
938
|
+
* const desc = getErrorCodeDescription(4); // returns "File system I/O error"
|
|
939
|
+
* const desc = getErrorCodeDescription(99); // returns "Unknown error code"
|
|
940
|
+
* ```
|
|
941
|
+
*/
|
|
942
|
+
declare function getErrorCodeDescription(code: number): string;
|
|
943
|
+
/**
|
|
944
|
+
* Classifies an error message string into an error code category.
|
|
945
|
+
*
|
|
946
|
+
* This function analyzes the error message content and returns the most likely
|
|
947
|
+
* error code (0-7) based on keyword patterns. Used to programmatically classify
|
|
948
|
+
* errors for handling purposes.
|
|
949
|
+
*
|
|
950
|
+
* The classification is based on keyword matching:
|
|
951
|
+
* - **Validation (0)**: Keywords like "invalid", "validation", "schema", "required"
|
|
952
|
+
* - **Parsing (1)**: Keywords like "parsing", "corrupted", "malformed"
|
|
953
|
+
* - **Ocr (2)**: Keywords like "ocr", "tesseract", "language", "model"
|
|
954
|
+
* - **MissingDependency (3)**: Keywords like "not found", "missing", "dependency"
|
|
955
|
+
* - **Io (4)**: Keywords like "file", "disk", "read", "write", "permission"
|
|
956
|
+
* - **Plugin (5)**: Keywords like "plugin", "register", "extension"
|
|
957
|
+
* - **UnsupportedFormat (6)**: Keywords like "unsupported", "format", "mime"
|
|
958
|
+
* - **Internal (7)**: Keywords like "internal", "bug", "panic"
|
|
959
|
+
*
|
|
960
|
+
* @param errorMessage - The error message string to classify
|
|
961
|
+
* @returns An object with the classification details
|
|
962
|
+
*
|
|
963
|
+
* @example
|
|
964
|
+
* ```typescript
|
|
965
|
+
* import { classifyError } from '@kreuzberg/node';
|
|
966
|
+
*
|
|
967
|
+
* const result = classifyError("PDF file is corrupted");
|
|
968
|
+
* // Returns: { code: 1, name: "parsing", confidence: 0.95 }
|
|
969
|
+
*
|
|
970
|
+
* const result = classifyError("Tesseract not found");
|
|
971
|
+
* // Returns: { code: 3, name: "missing_dependency", confidence: 0.9 }
|
|
972
|
+
* ```
|
|
973
|
+
*/
|
|
974
|
+
declare function classifyError(errorMessage: string): ErrorClassification;
|
|
975
|
+
declare const __version__ = "4.0.0-rc.16";
|
|
907
976
|
|
|
908
|
-
export { type EmbeddingPreset, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
|
977
|
+
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { PanicContext } from './errors.js';
|
|
2
2
|
export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.js';
|
|
3
|
-
import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol } from './types.js';
|
|
3
|
+
import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, ErrorClassification } from './types.js';
|
|
4
4
|
export { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, Metadata, OcrConfig, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.js';
|
|
5
5
|
export { GutenOcrBackend } from './ocr/guten-ocr.js';
|
|
6
6
|
|
|
@@ -903,6 +903,75 @@ declare function getLastErrorCode(): number;
|
|
|
903
903
|
* ```
|
|
904
904
|
*/
|
|
905
905
|
declare function getLastPanicContext(): PanicContext | null;
|
|
906
|
-
|
|
906
|
+
/**
|
|
907
|
+
* Returns the human-readable name for an error code.
|
|
908
|
+
*
|
|
909
|
+
* Maps numeric error codes to their string names, providing a consistent way
|
|
910
|
+
* to get error code names across all platforms.
|
|
911
|
+
*
|
|
912
|
+
* @param code - The numeric error code (0-7)
|
|
913
|
+
* @returns The error code name as a string (e.g., "validation", "ocr", "unknown")
|
|
914
|
+
*
|
|
915
|
+
* @example
|
|
916
|
+
* ```typescript
|
|
917
|
+
* import { getErrorCodeName } from '@kreuzberg/node';
|
|
918
|
+
*
|
|
919
|
+
* const name = getErrorCodeName(0); // returns "validation"
|
|
920
|
+
* const name = getErrorCodeName(2); // returns "ocr"
|
|
921
|
+
* const name = getErrorCodeName(99); // returns "unknown"
|
|
922
|
+
* ```
|
|
923
|
+
*/
|
|
924
|
+
declare function getErrorCodeName(code: number): string;
|
|
925
|
+
/**
|
|
926
|
+
* Returns the description for an error code.
|
|
927
|
+
*
|
|
928
|
+
* Retrieves user-friendly descriptions of error types from the FFI layer.
|
|
929
|
+
*
|
|
930
|
+
* @param code - The numeric error code (0-7)
|
|
931
|
+
* @returns A brief description of the error type
|
|
932
|
+
*
|
|
933
|
+
* @example
|
|
934
|
+
* ```typescript
|
|
935
|
+
* import { getErrorCodeDescription } from '@kreuzberg/node';
|
|
936
|
+
*
|
|
937
|
+
* const desc = getErrorCodeDescription(0); // returns "Input validation error"
|
|
938
|
+
* const desc = getErrorCodeDescription(4); // returns "File system I/O error"
|
|
939
|
+
* const desc = getErrorCodeDescription(99); // returns "Unknown error code"
|
|
940
|
+
* ```
|
|
941
|
+
*/
|
|
942
|
+
declare function getErrorCodeDescription(code: number): string;
|
|
943
|
+
/**
|
|
944
|
+
* Classifies an error message string into an error code category.
|
|
945
|
+
*
|
|
946
|
+
* This function analyzes the error message content and returns the most likely
|
|
947
|
+
* error code (0-7) based on keyword patterns. Used to programmatically classify
|
|
948
|
+
* errors for handling purposes.
|
|
949
|
+
*
|
|
950
|
+
* The classification is based on keyword matching:
|
|
951
|
+
* - **Validation (0)**: Keywords like "invalid", "validation", "schema", "required"
|
|
952
|
+
* - **Parsing (1)**: Keywords like "parsing", "corrupted", "malformed"
|
|
953
|
+
* - **Ocr (2)**: Keywords like "ocr", "tesseract", "language", "model"
|
|
954
|
+
* - **MissingDependency (3)**: Keywords like "not found", "missing", "dependency"
|
|
955
|
+
* - **Io (4)**: Keywords like "file", "disk", "read", "write", "permission"
|
|
956
|
+
* - **Plugin (5)**: Keywords like "plugin", "register", "extension"
|
|
957
|
+
* - **UnsupportedFormat (6)**: Keywords like "unsupported", "format", "mime"
|
|
958
|
+
* - **Internal (7)**: Keywords like "internal", "bug", "panic"
|
|
959
|
+
*
|
|
960
|
+
* @param errorMessage - The error message string to classify
|
|
961
|
+
* @returns An object with the classification details
|
|
962
|
+
*
|
|
963
|
+
* @example
|
|
964
|
+
* ```typescript
|
|
965
|
+
* import { classifyError } from '@kreuzberg/node';
|
|
966
|
+
*
|
|
967
|
+
* const result = classifyError("PDF file is corrupted");
|
|
968
|
+
* // Returns: { code: 1, name: "parsing", confidence: 0.95 }
|
|
969
|
+
*
|
|
970
|
+
* const result = classifyError("Tesseract not found");
|
|
971
|
+
* // Returns: { code: 3, name: "missing_dependency", confidence: 0.9 }
|
|
972
|
+
* ```
|
|
973
|
+
*/
|
|
974
|
+
declare function classifyError(errorMessage: string): ErrorClassification;
|
|
975
|
+
declare const __version__ = "4.0.0-rc.16";
|
|
907
976
|
|
|
908
|
-
export { type EmbeddingPreset, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
|
977
|
+
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.js
CHANGED
|
@@ -37,6 +37,7 @@ __export(index_exports, {
|
|
|
37
37
|
batchExtractBytesSync: () => batchExtractBytesSync,
|
|
38
38
|
batchExtractFiles: () => batchExtractFiles,
|
|
39
39
|
batchExtractFilesSync: () => batchExtractFilesSync,
|
|
40
|
+
classifyError: () => classifyError,
|
|
40
41
|
clearDocumentExtractors: () => clearDocumentExtractors,
|
|
41
42
|
clearOcrBackends: () => clearOcrBackends,
|
|
42
43
|
clearPostProcessors: () => clearPostProcessors,
|
|
@@ -48,6 +49,8 @@ __export(index_exports, {
|
|
|
48
49
|
extractFile: () => extractFile,
|
|
49
50
|
extractFileSync: () => extractFileSync,
|
|
50
51
|
getEmbeddingPreset: () => getEmbeddingPreset,
|
|
52
|
+
getErrorCodeDescription: () => getErrorCodeDescription,
|
|
53
|
+
getErrorCodeName: () => getErrorCodeName,
|
|
51
54
|
getExtensionsForMime: () => getExtensionsForMime,
|
|
52
55
|
getLastErrorCode: () => getLastErrorCode,
|
|
53
56
|
getLastPanicContext: () => getLastPanicContext,
|
|
@@ -865,7 +868,20 @@ function getLastPanicContext() {
|
|
|
865
868
|
const result = binding2.getLastPanicContext();
|
|
866
869
|
return result;
|
|
867
870
|
}
|
|
868
|
-
|
|
871
|
+
function getErrorCodeName(code) {
|
|
872
|
+
const binding2 = getBinding();
|
|
873
|
+
return binding2.getErrorCodeName(code);
|
|
874
|
+
}
|
|
875
|
+
function getErrorCodeDescription(code) {
|
|
876
|
+
const binding2 = getBinding();
|
|
877
|
+
return binding2.getErrorCodeDescription(code);
|
|
878
|
+
}
|
|
879
|
+
function classifyError(errorMessage) {
|
|
880
|
+
const binding2 = getBinding();
|
|
881
|
+
const result = binding2.classifyError(errorMessage);
|
|
882
|
+
return result;
|
|
883
|
+
}
|
|
884
|
+
const __version__ = "4.0.0-rc.16";
|
|
869
885
|
// Annotate the CommonJS export names for ESM import in node:
|
|
870
886
|
0 && (module.exports = {
|
|
871
887
|
CacheError,
|
|
@@ -886,6 +902,7 @@ const __version__ = "4.0.0-rc.15";
|
|
|
886
902
|
batchExtractBytesSync,
|
|
887
903
|
batchExtractFiles,
|
|
888
904
|
batchExtractFilesSync,
|
|
905
|
+
classifyError,
|
|
889
906
|
clearDocumentExtractors,
|
|
890
907
|
clearOcrBackends,
|
|
891
908
|
clearPostProcessors,
|
|
@@ -897,6 +914,8 @@ const __version__ = "4.0.0-rc.15";
|
|
|
897
914
|
extractFile,
|
|
898
915
|
extractFileSync,
|
|
899
916
|
getEmbeddingPreset,
|
|
917
|
+
getErrorCodeDescription,
|
|
918
|
+
getErrorCodeName,
|
|
900
919
|
getExtensionsForMime,
|
|
901
920
|
getLastErrorCode,
|
|
902
921
|
getLastPanicContext,
|