@kreuzberg/node 4.0.0-rc.16 → 4.0.0-rc.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { PanicContext } from './errors.mjs';
2
2
  export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.mjs';
3
- import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, ErrorClassification } from './types.mjs';
4
- export { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, Metadata, OcrConfig, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.mjs';
3
+ import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, OcrConfig, ChunkingConfig, ImageExtractionConfig, PdfConfig, KeywordConfig, LanguageDetectionConfig, ErrorClassification } from './types.mjs';
4
+ export { ArchiveMetadata, Chunk, ChunkMetadata, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, Metadata, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.mjs';
5
5
  export { GutenOcrBackend } from './ocr/guten-ocr.mjs';
6
6
 
7
7
  /**
@@ -97,7 +97,7 @@ declare function __resetBindingForTests(): void;
97
97
  * const result2 = extractFileSync('scanned.pdf', null, config);
98
98
  * ```
99
99
  */
100
- declare function extractFileSync(filePath: string, mimeType?: string | null, config?: ExtractionConfig$1 | null): ExtractionResult;
100
+ declare function extractFileSync(filePath: string, mimeTypeOrConfig?: string | null | ExtractionConfig$1, maybeConfig?: ExtractionConfig$1 | null): ExtractionResult;
101
101
  /**
102
102
  * Extract content from a single file (asynchronous).
103
103
  *
@@ -133,7 +133,7 @@ declare function extractFileSync(filePath: string, mimeType?: string | null, con
133
133
  * console.log(result2.chunks); // Array of text chunks
134
134
  * ```
135
135
  */
136
- declare function extractFile(filePath: string, mimeType?: string | null, config?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
136
+ declare function extractFile(filePath: string, mimeTypeOrConfig?: string | null | ExtractionConfig$1, maybeConfig?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
137
137
  /**
138
138
  * Extract content from raw bytes (synchronous).
139
139
  *
@@ -161,7 +161,7 @@ declare function extractFile(filePath: string, mimeType?: string | null, config?
161
161
  * console.log(result.content);
162
162
  * ```
163
163
  */
164
- declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: ExtractionConfig$1 | null): ExtractionResult;
164
+ declare function extractBytesSync(dataOrPath: Uint8Array | string, mimeType: string, config?: ExtractionConfig$1 | null): ExtractionResult;
165
165
  /**
166
166
  * Extract content from raw bytes (asynchronous).
167
167
  *
@@ -189,7 +189,7 @@ declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: E
189
189
  * console.log(result.content);
190
190
  * ```
191
191
  */
192
- declare function extractBytes(data: Uint8Array, mimeType: string, config?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
192
+ declare function extractBytes(dataOrPath: Uint8Array | string, mimeType: string, config?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
193
193
  /**
194
194
  * Extract content from multiple files in parallel (synchronous).
195
195
  *
@@ -610,23 +610,109 @@ declare function unregisterDocumentExtractor(name: string): void;
610
610
  */
611
611
  declare function clearDocumentExtractors(): void;
612
612
  /**
613
- * ExtractionConfig namespace with static methods for loading configuration from files.
613
+ * Builder class for creating ExtractionConfig objects with a fluent API.
614
614
  *
615
- * Provides a factory method to load extraction configuration from TOML, YAML, or JSON files.
616
- * The file format is automatically detected based on the file extension.
615
+ * Provides a convenient way to build extraction configurations using method chaining.
616
+ *
617
+ * @example
618
+ * ```typescript
619
+ * import { ExtractionConfig, extractFile } from '@kreuzberg/node';
620
+ *
621
+ * // Create with builder pattern
622
+ * const config = ExtractionConfig.default()
623
+ * .withChunking({ maxChars: 2048 })
624
+ * .withOcr({ backend: 'tesseract', language: 'eng' })
625
+ * .build();
626
+ *
627
+ * const result = await extractFile('document.pdf', null, config);
628
+ * ```
629
+ */
630
+ declare class ExtractionConfigBuilder {
631
+ private config;
632
+ /**
633
+ * Create a new builder with default configuration.
634
+ */
635
+ static default(): ExtractionConfigBuilder;
636
+ /**
637
+ * Set OCR configuration.
638
+ */
639
+ withOcr(ocr: OcrConfig): ExtractionConfigBuilder;
640
+ /**
641
+ * Set chunking configuration.
642
+ */
643
+ withChunking(chunking: ChunkingConfig): ExtractionConfigBuilder;
644
+ /**
645
+ * Set image extraction configuration.
646
+ */
647
+ withImageExtraction(images: ImageExtractionConfig): ExtractionConfigBuilder;
648
+ /**
649
+ * Set PDF configuration.
650
+ */
651
+ withPdf(pdf: PdfConfig): ExtractionConfigBuilder;
652
+ /**
653
+ * Set keyword extraction configuration.
654
+ */
655
+ withKeywords(keywords: KeywordConfig): ExtractionConfigBuilder;
656
+ /**
657
+ * Set language detection configuration.
658
+ */
659
+ withLanguageDetection(languageDetection: LanguageDetectionConfig): ExtractionConfigBuilder;
660
+ /**
661
+ * Set whether to enable metadata extraction.
662
+ */
663
+ withMetadataExtraction(enabled: boolean): ExtractionConfigBuilder;
664
+ /**
665
+ * Set whether to enable quality mode.
666
+ */
667
+ withQualityMode(enabled: boolean): ExtractionConfigBuilder;
668
+ /**
669
+ * Build and return the final ExtractionConfig object.
670
+ */
671
+ build(): ExtractionConfig$1;
672
+ }
673
+ /**
674
+ * ExtractionConfig namespace with static methods for loading configuration from files
675
+ * and creating new configurations with the builder pattern.
676
+ *
677
+ * Provides factory methods to load extraction configuration from TOML, YAML, or JSON files,
678
+ * or to create configurations using a fluent builder API.
617
679
  *
618
680
  * @example
619
681
  * ```typescript
620
682
  * import { ExtractionConfig, extractFile } from '@kreuzberg/node';
621
683
  *
622
684
  * // Load configuration from file
623
- * const config = ExtractionConfig.fromFile('config.toml');
685
+ * const config1 = ExtractionConfig.fromFile('config.toml');
686
+ *
687
+ * // Create with builder pattern
688
+ * const config2 = ExtractionConfig.default()
689
+ * .withChunking({ maxChars: 2048 })
690
+ * .build();
624
691
  *
625
692
  * // Use with extraction
626
- * const result = await extractFile('document.pdf', null, config);
693
+ * const result = await extractFile('document.pdf', null, config2);
627
694
  * ```
628
695
  */
629
696
  declare const ExtractionConfig: {
697
+ /**
698
+ * Create a default extraction configuration using the builder pattern.
699
+ *
700
+ * Returns a builder object that allows you to configure extraction settings
701
+ * using method chaining.
702
+ *
703
+ * @returns ExtractionConfigBuilder for chaining configuration calls
704
+ *
705
+ * @example
706
+ * ```typescript
707
+ * import { ExtractionConfig } from '@kreuzberg/node';
708
+ *
709
+ * const config = ExtractionConfig.default()
710
+ * .withChunking({ maxChars: 2048 })
711
+ * .withOcr({ backend: 'tesseract', language: 'eng' })
712
+ * .build();
713
+ * ```
714
+ */
715
+ default(): ExtractionConfigBuilder;
630
716
  /**
631
717
  * Load extraction configuration from a file.
632
718
  *
@@ -711,28 +797,30 @@ declare function detectMimeType(bytes: Buffer): string;
711
797
  /**
712
798
  * Detect MIME type from a file path.
713
799
  *
714
- * Uses file extension to determine MIME type. Falls back to `mime_guess` crate
715
- * if extension-based detection fails.
800
+ * Determines the MIME type based on the file extension in the provided path.
801
+ * By default, checks if the file exists; can be disabled with checkExists parameter.
716
802
  *
717
- * @param path - Path to the file (string)
718
- * @returns The detected MIME type string
803
+ * @param filePath - The file path to detect MIME type from (e.g., 'document.pdf')
804
+ * @param checkExists - Whether to verify the file exists (default: true)
805
+ * @returns The detected MIME type as a string (e.g., 'application/pdf')
719
806
  *
720
- * @throws {Error} If MIME type cannot be determined from path/extension
721
- * @throws {Error} If extension is unknown
807
+ * @throws {Error} If MIME type cannot be determined from the file extension,
808
+ * or if checkExists is true and the file does not exist
722
809
  *
723
810
  * @example
724
811
  * ```typescript
725
812
  * import { detectMimeTypeFromPath } from '@kreuzberg/node';
726
813
  *
727
- * // Detect from existing file
728
- * const mimeType = detectMimeTypeFromPath('document.pdf');
814
+ * // Detect MIME type from existing file
815
+ * const mimeType = detectMimeTypeFromPath('/path/to/document.pdf');
729
816
  * console.log(mimeType); // 'application/pdf'
730
817
  *
731
- * const mimeType2 = detectMimeTypeFromPath('document.docx');
818
+ * // Detect without checking file existence
819
+ * const mimeType2 = detectMimeTypeFromPath('document.docx', false);
732
820
  * console.log(mimeType2); // 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
733
821
  * ```
734
822
  */
735
- declare function detectMimeTypeFromPath(path: string): string;
823
+ declare function detectMimeTypeFromPath(filePath: string, checkExists?: boolean): string;
736
824
  /**
737
825
  * Validate that a MIME type is supported by Kreuzberg.
738
826
  *
@@ -972,6 +1060,6 @@ declare function getErrorCodeDescription(code: number): string;
972
1060
  * ```
973
1061
  */
974
1062
  declare function classifyError(errorMessage: string): ErrorClassification;
975
- declare const __version__ = "4.0.0-rc.16";
1063
+ declare const __version__ = "4.0.0-rc.18";
976
1064
 
977
- export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
1065
+ export { ChunkingConfig, type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, ImageExtractionConfig, KeywordConfig, LanguageDetectionConfig, OcrBackendProtocol, OcrConfig, PanicContext, PdfConfig, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { PanicContext } from './errors.js';
2
2
  export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.js';
3
- import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, ErrorClassification } from './types.js';
4
- export { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, Metadata, OcrConfig, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.js';
3
+ import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, OcrConfig, ChunkingConfig, ImageExtractionConfig, PdfConfig, KeywordConfig, LanguageDetectionConfig, ErrorClassification } from './types.js';
4
+ export { ArchiveMetadata, Chunk, ChunkMetadata, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, Metadata, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.js';
5
5
  export { GutenOcrBackend } from './ocr/guten-ocr.js';
6
6
 
7
7
  /**
@@ -97,7 +97,7 @@ declare function __resetBindingForTests(): void;
97
97
  * const result2 = extractFileSync('scanned.pdf', null, config);
98
98
  * ```
99
99
  */
100
- declare function extractFileSync(filePath: string, mimeType?: string | null, config?: ExtractionConfig$1 | null): ExtractionResult;
100
+ declare function extractFileSync(filePath: string, mimeTypeOrConfig?: string | null | ExtractionConfig$1, maybeConfig?: ExtractionConfig$1 | null): ExtractionResult;
101
101
  /**
102
102
  * Extract content from a single file (asynchronous).
103
103
  *
@@ -133,7 +133,7 @@ declare function extractFileSync(filePath: string, mimeType?: string | null, con
133
133
  * console.log(result2.chunks); // Array of text chunks
134
134
  * ```
135
135
  */
136
- declare function extractFile(filePath: string, mimeType?: string | null, config?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
136
+ declare function extractFile(filePath: string, mimeTypeOrConfig?: string | null | ExtractionConfig$1, maybeConfig?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
137
137
  /**
138
138
  * Extract content from raw bytes (synchronous).
139
139
  *
@@ -161,7 +161,7 @@ declare function extractFile(filePath: string, mimeType?: string | null, config?
161
161
  * console.log(result.content);
162
162
  * ```
163
163
  */
164
- declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: ExtractionConfig$1 | null): ExtractionResult;
164
+ declare function extractBytesSync(dataOrPath: Uint8Array | string, mimeType: string, config?: ExtractionConfig$1 | null): ExtractionResult;
165
165
  /**
166
166
  * Extract content from raw bytes (asynchronous).
167
167
  *
@@ -189,7 +189,7 @@ declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: E
189
189
  * console.log(result.content);
190
190
  * ```
191
191
  */
192
- declare function extractBytes(data: Uint8Array, mimeType: string, config?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
192
+ declare function extractBytes(dataOrPath: Uint8Array | string, mimeType: string, config?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
193
193
  /**
194
194
  * Extract content from multiple files in parallel (synchronous).
195
195
  *
@@ -610,23 +610,109 @@ declare function unregisterDocumentExtractor(name: string): void;
610
610
  */
611
611
  declare function clearDocumentExtractors(): void;
612
612
  /**
613
- * ExtractionConfig namespace with static methods for loading configuration from files.
613
+ * Builder class for creating ExtractionConfig objects with a fluent API.
614
614
  *
615
- * Provides a factory method to load extraction configuration from TOML, YAML, or JSON files.
616
- * The file format is automatically detected based on the file extension.
615
+ * Provides a convenient way to build extraction configurations using method chaining.
616
+ *
617
+ * @example
618
+ * ```typescript
619
+ * import { ExtractionConfig, extractFile } from '@kreuzberg/node';
620
+ *
621
+ * // Create with builder pattern
622
+ * const config = ExtractionConfig.default()
623
+ * .withChunking({ maxChars: 2048 })
624
+ * .withOcr({ backend: 'tesseract', language: 'eng' })
625
+ * .build();
626
+ *
627
+ * const result = await extractFile('document.pdf', null, config);
628
+ * ```
629
+ */
630
+ declare class ExtractionConfigBuilder {
631
+ private config;
632
+ /**
633
+ * Create a new builder with default configuration.
634
+ */
635
+ static default(): ExtractionConfigBuilder;
636
+ /**
637
+ * Set OCR configuration.
638
+ */
639
+ withOcr(ocr: OcrConfig): ExtractionConfigBuilder;
640
+ /**
641
+ * Set chunking configuration.
642
+ */
643
+ withChunking(chunking: ChunkingConfig): ExtractionConfigBuilder;
644
+ /**
645
+ * Set image extraction configuration.
646
+ */
647
+ withImageExtraction(images: ImageExtractionConfig): ExtractionConfigBuilder;
648
+ /**
649
+ * Set PDF configuration.
650
+ */
651
+ withPdf(pdf: PdfConfig): ExtractionConfigBuilder;
652
+ /**
653
+ * Set keyword extraction configuration.
654
+ */
655
+ withKeywords(keywords: KeywordConfig): ExtractionConfigBuilder;
656
+ /**
657
+ * Set language detection configuration.
658
+ */
659
+ withLanguageDetection(languageDetection: LanguageDetectionConfig): ExtractionConfigBuilder;
660
+ /**
661
+ * Set whether to enable metadata extraction.
662
+ */
663
+ withMetadataExtraction(enabled: boolean): ExtractionConfigBuilder;
664
+ /**
665
+ * Set whether to enable quality mode.
666
+ */
667
+ withQualityMode(enabled: boolean): ExtractionConfigBuilder;
668
+ /**
669
+ * Build and return the final ExtractionConfig object.
670
+ */
671
+ build(): ExtractionConfig$1;
672
+ }
673
+ /**
674
+ * ExtractionConfig namespace with static methods for loading configuration from files
675
+ * and creating new configurations with the builder pattern.
676
+ *
677
+ * Provides factory methods to load extraction configuration from TOML, YAML, or JSON files,
678
+ * or to create configurations using a fluent builder API.
617
679
  *
618
680
  * @example
619
681
  * ```typescript
620
682
  * import { ExtractionConfig, extractFile } from '@kreuzberg/node';
621
683
  *
622
684
  * // Load configuration from file
623
- * const config = ExtractionConfig.fromFile('config.toml');
685
+ * const config1 = ExtractionConfig.fromFile('config.toml');
686
+ *
687
+ * // Create with builder pattern
688
+ * const config2 = ExtractionConfig.default()
689
+ * .withChunking({ maxChars: 2048 })
690
+ * .build();
624
691
  *
625
692
  * // Use with extraction
626
- * const result = await extractFile('document.pdf', null, config);
693
+ * const result = await extractFile('document.pdf', null, config2);
627
694
  * ```
628
695
  */
629
696
  declare const ExtractionConfig: {
697
+ /**
698
+ * Create a default extraction configuration using the builder pattern.
699
+ *
700
+ * Returns a builder object that allows you to configure extraction settings
701
+ * using method chaining.
702
+ *
703
+ * @returns ExtractionConfigBuilder for chaining configuration calls
704
+ *
705
+ * @example
706
+ * ```typescript
707
+ * import { ExtractionConfig } from '@kreuzberg/node';
708
+ *
709
+ * const config = ExtractionConfig.default()
710
+ * .withChunking({ maxChars: 2048 })
711
+ * .withOcr({ backend: 'tesseract', language: 'eng' })
712
+ * .build();
713
+ * ```
714
+ */
715
+ default(): ExtractionConfigBuilder;
630
716
  /**
631
717
  * Load extraction configuration from a file.
632
718
  *
@@ -711,28 +797,30 @@ declare function detectMimeType(bytes: Buffer): string;
711
797
  /**
712
798
  * Detect MIME type from a file path.
713
799
  *
714
- * Uses file extension to determine MIME type. Falls back to `mime_guess` crate
715
- * if extension-based detection fails.
800
+ * Determines the MIME type based on the file extension in the provided path.
801
+ * By default, checks if the file exists; can be disabled with checkExists parameter.
716
802
  *
717
- * @param path - Path to the file (string)
718
- * @returns The detected MIME type string
803
+ * @param filePath - The file path to detect MIME type from (e.g., 'document.pdf')
804
+ * @param checkExists - Whether to verify the file exists (default: true)
805
+ * @returns The detected MIME type as a string (e.g., 'application/pdf')
719
806
  *
720
- * @throws {Error} If MIME type cannot be determined from path/extension
721
- * @throws {Error} If extension is unknown
807
+ * @throws {Error} If MIME type cannot be determined from the file extension,
808
+ * or if checkExists is true and the file does not exist
722
809
  *
723
810
  * @example
724
811
  * ```typescript
725
812
  * import { detectMimeTypeFromPath } from '@kreuzberg/node';
726
813
  *
727
- * // Detect from existing file
728
- * const mimeType = detectMimeTypeFromPath('document.pdf');
814
+ * // Detect MIME type from existing file
815
+ * const mimeType = detectMimeTypeFromPath('/path/to/document.pdf');
729
816
  * console.log(mimeType); // 'application/pdf'
730
817
  *
731
- * const mimeType2 = detectMimeTypeFromPath('document.docx');
818
+ * // Detect without checking file existence
819
+ * const mimeType2 = detectMimeTypeFromPath('document.docx', false);
732
820
  * console.log(mimeType2); // 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
733
821
  * ```
734
822
  */
735
- declare function detectMimeTypeFromPath(path: string): string;
823
+ declare function detectMimeTypeFromPath(filePath: string, checkExists?: boolean): string;
736
824
  /**
737
825
  * Validate that a MIME type is supported by Kreuzberg.
738
826
  *
@@ -972,6 +1060,6 @@ declare function getErrorCodeDescription(code: number): string;
972
1060
  * ```
973
1061
  */
974
1062
  declare function classifyError(errorMessage: string): ErrorClassification;
975
- declare const __version__ = "4.0.0-rc.16";
1063
+ declare const __version__ = "4.0.0-rc.18";
976
1064
 
977
- export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
1065
+ export { ChunkingConfig, type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, ImageExtractionConfig, KeywordConfig, LanguageDetectionConfig, OcrBackendProtocol, OcrConfig, PanicContext, PdfConfig, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };