@kreuzberg/node 4.0.0-rc.21 → 4.0.0-rc.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { PanicContext } from './errors.js';
2
2
  export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.js';
3
- import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, OcrConfig, ChunkingConfig, ImageExtractionConfig, PdfConfig, KeywordConfig, LanguageDetectionConfig, ErrorClassification } from './types.js';
4
- export { ArchiveMetadata, Chunk, ChunkMetadata, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, Metadata, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.js';
3
+ import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, ErrorClassification, WorkerPool, WorkerPoolStats } from './types.js';
4
+ export { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, ExtractedKeyword, HeaderMetadata, HierarchyConfig, HtmlConversionOptions, HtmlImageMetadata, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, LinkMetadata, Metadata, OcrConfig, OcrMetadata, PageBoundary, PageContent, PageExtractionConfig, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, StructuredData, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.js';
5
5
  export { GutenOcrBackend } from './ocr/guten-ocr.js';
6
6
 
7
7
  /**
@@ -610,72 +610,12 @@ declare function unregisterDocumentExtractor(name: string): void;
610
610
  */
611
611
  declare function clearDocumentExtractors(): void;
612
612
  /**
613
- * Builder class for creating ExtractionConfig objects with a fluent API.
614
- *
615
- * Provides a convenient way to build extraction configurations using method chaining.
616
- *
617
- * @example
618
- * ```typescript
619
- * import { ExtractionConfig, extractFile } from '@kreuzberg/node';
620
- *
621
- * // Create with builder pattern
622
- * const config = ExtractionConfig.default()
623
- * .withChunking({ maxChars: 2048 })
624
- * .withOcr({ backend: 'tesseract', language: 'eng' })
625
- * .build();
626
- *
627
- * const result = await extractFile('document.pdf', null, config);
628
- * ```
629
- */
630
- declare class ExtractionConfigBuilder {
631
- private config;
632
- /**
633
- * Create a new builder with default configuration.
634
- */
635
- static default(): ExtractionConfigBuilder;
636
- /**
637
- * Set OCR configuration.
638
- */
639
- withOcr(ocr: OcrConfig): ExtractionConfigBuilder;
640
- /**
641
- * Set chunking configuration.
642
- */
643
- withChunking(chunking: ChunkingConfig): ExtractionConfigBuilder;
644
- /**
645
- * Set image extraction configuration.
646
- */
647
- withImageExtraction(images: ImageExtractionConfig): ExtractionConfigBuilder;
648
- /**
649
- * Set PDF configuration.
650
- */
651
- withPdf(pdf: PdfConfig): ExtractionConfigBuilder;
652
- /**
653
- * Set keyword extraction configuration.
654
- */
655
- withKeywords(keywords: KeywordConfig): ExtractionConfigBuilder;
656
- /**
657
- * Set language detection configuration.
658
- */
659
- withLanguageDetection(languageDetection: LanguageDetectionConfig): ExtractionConfigBuilder;
660
- /**
661
- * Set whether to enable metadata extraction.
662
- */
663
- withMetadataExtraction(enabled: boolean): ExtractionConfigBuilder;
664
- /**
665
- * Set whether to enable quality mode.
666
- */
667
- withQualityMode(enabled: boolean): ExtractionConfigBuilder;
668
- /**
669
- * Build and return the final ExtractionConfig object.
670
- */
671
- build(): ExtractionConfig$1;
672
- }
673
- /**
674
- * ExtractionConfig namespace with static methods for loading configuration from files
675
- * and creating new configurations with the builder pattern.
613
+ * ExtractionConfig namespace with static methods for loading configuration from files.
676
614
  *
677
615
  * Provides factory methods to load extraction configuration from TOML, YAML, or JSON files,
678
- * or to create configurations using a fluent builder API.
616
+ * or to discover configuration files in the current directory tree.
617
+ *
618
+ * For creating configurations programmatically, use plain TypeScript objects instead:
679
619
  *
680
620
  * @example
681
621
  * ```typescript
@@ -684,35 +624,17 @@ declare class ExtractionConfigBuilder {
684
624
  * // Load configuration from file
685
625
  * const config1 = ExtractionConfig.fromFile('config.toml');
686
626
  *
687
- * // Create with builder pattern
688
- * const config2 = ExtractionConfig.default()
689
- * .withChunking({ maxChars: 2048 })
690
- * .build();
627
+ * // Or create with plain object
628
+ * const config2 = {
629
+ * chunking: { maxChars: 2048 },
630
+ * ocr: { backend: 'tesseract', language: 'eng' }
631
+ * };
691
632
  *
692
633
  * // Use with extraction
693
634
  * const result = await extractFile('document.pdf', null, config2);
694
635
  * ```
695
636
  */
696
637
  declare const ExtractionConfig: {
697
- /**
698
- * Create a default extraction configuration using the builder pattern.
699
- *
700
- * Returns a builder object that allows you to configure extraction settings
701
- * using method chaining.
702
- *
703
- * @returns ExtractionConfigBuilder for chaining configuration calls
704
- *
705
- * @example
706
- * ```typescript
707
- * import { ExtractionConfig } from '@kreuzberg/node';
708
- *
709
- * const config = ExtractionConfig.default()
710
- * .withChunking({ maxChars: 2048 })
711
- * .withOcr({ backend: 'tesseract', language: 'eng' })
712
- * .build();
713
- * ```
714
- */
715
- default(): ExtractionConfigBuilder;
716
638
  /**
717
639
  * Load extraction configuration from a file.
718
640
  *
@@ -1060,6 +982,151 @@ declare function getErrorCodeDescription(code: number): string;
1060
982
  * ```
1061
983
  */
1062
984
  declare function classifyError(errorMessage: string): ErrorClassification;
1063
- declare const __version__ = "4.0.0-rc.21";
985
+ /**
986
+ * Create a worker pool for concurrent file extraction.
987
+ *
988
+ * The worker pool manages a set of background worker threads that can process
989
+ * extraction requests concurrently, improving throughput when handling multiple files.
990
+ *
991
+ * @param size - Optional number of worker threads (defaults to CPU count). Must be > 0
992
+ * @returns A WorkerPool instance to use with extraction functions
993
+ *
994
+ * @throws {Error} If size is invalid or pool creation fails
995
+ *
996
+ * @example
997
+ * ```typescript
998
+ * import { createWorkerPool, extractFileInWorker, closeWorkerPool } from '@kreuzberg/node';
999
+ *
1000
+ * // Create pool with 4 workers
1001
+ * const pool = createWorkerPool(4);
1002
+ *
1003
+ * try {
1004
+ * const result = await extractFileInWorker(pool, 'document.pdf');
1005
+ * console.log(result.content);
1006
+ * } finally {
1007
+ * // Always close the pool when done
1008
+ * await closeWorkerPool(pool);
1009
+ * }
1010
+ * ```
1011
+ */
1012
+ declare function createWorkerPool(size?: number): WorkerPool;
1013
+ /**
1014
+ * Get statistics about a worker pool.
1015
+ *
1016
+ * Returns information about the pool's current state, including the number of active workers,
1017
+ * queued tasks, and total processed tasks.
1018
+ *
1019
+ * @param pool - The worker pool instance
1020
+ * @returns WorkerPoolStats with pool information
1021
+ *
1022
+ * @example
1023
+ * ```typescript
1024
+ * import { createWorkerPool, getWorkerPoolStats } from '@kreuzberg/node';
1025
+ *
1026
+ * const pool = createWorkerPool(4);
1027
+ * const stats = getWorkerPoolStats(pool);
1028
+ *
1029
+ * console.log(`Pool size: ${stats.size}`);
1030
+ * console.log(`Active workers: ${stats.activeWorkers}`);
1031
+ * console.log(`Queued tasks: ${stats.queuedTasks}`);
1032
+ * ```
1033
+ */
1034
+ declare function getWorkerPoolStats(pool: WorkerPool): WorkerPoolStats;
1035
+ /**
1036
+ * Extract content from a single file using a worker pool (asynchronous).
1037
+ *
1038
+ * Submits an extraction task to the worker pool. The task is executed by one of the
1039
+ * available workers in the background, allowing other tasks to be processed concurrently.
1040
+ *
1041
+ * @param pool - The worker pool instance
1042
+ * @param filePath - Path to the file to extract
1043
+ * @param mimeTypeOrConfig - Optional MIME type or extraction configuration
1044
+ * @param maybeConfig - Optional extraction configuration (if second param is MIME type)
1045
+ * @returns Promise<ExtractionResult> containing extracted content and metadata
1046
+ *
1047
+ * @throws {Error} If the file cannot be read or extraction fails
1048
+ *
1049
+ * @example
1050
+ * ```typescript
1051
+ * import { createWorkerPool, extractFileInWorker, closeWorkerPool } from '@kreuzberg/node';
1052
+ *
1053
+ * const pool = createWorkerPool(4);
1054
+ *
1055
+ * try {
1056
+ * const files = ['doc1.pdf', 'doc2.docx', 'doc3.xlsx'];
1057
+ * const results = await Promise.all(
1058
+ * files.map(f => extractFileInWorker(pool, f))
1059
+ * );
1060
+ *
1061
+ * results.forEach((r, i) => {
1062
+ * console.log(`${files[i]}: ${r.content.substring(0, 100)}...`);
1063
+ * });
1064
+ * } finally {
1065
+ * await closeWorkerPool(pool);
1066
+ * }
1067
+ * ```
1068
+ */
1069
+ declare function extractFileInWorker(pool: WorkerPool, filePath: string, mimeTypeOrConfig?: string | null | ExtractionConfig$1, maybeConfig?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
1070
+ /**
1071
+ * Extract content from multiple files in parallel using a worker pool (asynchronous).
1072
+ *
1073
+ * Submits multiple extraction tasks to the worker pool for concurrent processing.
1074
+ * This is more efficient than using `extractFileInWorker` multiple times sequentially.
1075
+ *
1076
+ * @param pool - The worker pool instance
1077
+ * @param paths - Array of file paths to extract
1078
+ * @param config - Extraction configuration object (applies to all files)
1079
+ * @returns Promise<ExtractionResult[]> array of results (one per file, in same order)
1080
+ *
1081
+ * @throws {Error} If any file cannot be read or extraction fails
1082
+ *
1083
+ * @example
1084
+ * ```typescript
1085
+ * import { createWorkerPool, batchExtractFilesInWorker, closeWorkerPool } from '@kreuzberg/node';
1086
+ *
1087
+ * const pool = createWorkerPool(4);
1088
+ *
1089
+ * try {
1090
+ * const files = ['invoice1.pdf', 'invoice2.pdf', 'invoice3.pdf'];
1091
+ * const results = await batchExtractFilesInWorker(pool, files, {
1092
+ * ocr: { backend: 'tesseract', language: 'eng' }
1093
+ * });
1094
+ *
1095
+ * const total = results.reduce((sum, r) => sum + extractAmount(r.content), 0);
1096
+ * console.log(`Total: $${total}`);
1097
+ * } finally {
1098
+ * await closeWorkerPool(pool);
1099
+ * }
1100
+ * ```
1101
+ */
1102
+ declare function batchExtractFilesInWorker(pool: WorkerPool, paths: string[], config?: ExtractionConfig$1 | null): Promise<ExtractionResult[]>;
1103
+ /**
1104
+ * Close a worker pool and shut down all worker threads.
1105
+ *
1106
+ * Should be called when the pool is no longer needed to clean up resources
1107
+ * and gracefully shut down worker threads. Any pending tasks will be cancelled.
1108
+ *
1109
+ * @param pool - The worker pool instance to close
1110
+ * @returns Promise that resolves when the pool is fully closed
1111
+ *
1112
+ * @throws {Error} If pool shutdown fails
1113
+ *
1114
+ * @example
1115
+ * ```typescript
1116
+ * import { createWorkerPool, extractFileInWorker, closeWorkerPool } from '@kreuzberg/node';
1117
+ *
1118
+ * const pool = createWorkerPool(4);
1119
+ *
1120
+ * try {
1121
+ * const result = await extractFileInWorker(pool, 'document.pdf');
1122
+ * console.log(result.content);
1123
+ * } finally {
1124
+ * // Clean up the pool
1125
+ * await closeWorkerPool(pool);
1126
+ * }
1127
+ * ```
1128
+ */
1129
+ declare function closeWorkerPool(pool: WorkerPool): Promise<void>;
1130
+ declare const __version__ = "4.0.0-rc.24";
1064
1131
 
1065
- export { ChunkingConfig, type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, ImageExtractionConfig, KeywordConfig, LanguageDetectionConfig, OcrBackendProtocol, OcrConfig, PanicContext, PdfConfig, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
1132
+ export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.js CHANGED
@@ -36,17 +36,21 @@ __export(index_exports, {
36
36
  batchExtractBytes: () => batchExtractBytes,
37
37
  batchExtractBytesSync: () => batchExtractBytesSync,
38
38
  batchExtractFiles: () => batchExtractFiles,
39
+ batchExtractFilesInWorker: () => batchExtractFilesInWorker,
39
40
  batchExtractFilesSync: () => batchExtractFilesSync,
40
41
  classifyError: () => classifyError,
41
42
  clearDocumentExtractors: () => clearDocumentExtractors,
42
43
  clearOcrBackends: () => clearOcrBackends,
43
44
  clearPostProcessors: () => clearPostProcessors,
44
45
  clearValidators: () => clearValidators,
46
+ closeWorkerPool: () => closeWorkerPool,
47
+ createWorkerPool: () => createWorkerPool,
45
48
  detectMimeType: () => detectMimeType,
46
49
  detectMimeTypeFromPath: () => detectMimeTypeFromPath,
47
50
  extractBytes: () => extractBytes,
48
51
  extractBytesSync: () => extractBytesSync,
49
52
  extractFile: () => extractFile,
53
+ extractFileInWorker: () => extractFileInWorker,
50
54
  extractFileSync: () => extractFileSync,
51
55
  getEmbeddingPreset: () => getEmbeddingPreset,
52
56
  getErrorCodeDescription: () => getErrorCodeDescription,
@@ -54,6 +58,7 @@ __export(index_exports, {
54
58
  getExtensionsForMime: () => getExtensionsForMime,
55
59
  getLastErrorCode: () => getLastErrorCode,
56
60
  getLastPanicContext: () => getLastPanicContext,
61
+ getWorkerPoolStats: () => getWorkerPoolStats,
57
62
  listDocumentExtractors: () => listDocumentExtractors,
58
63
  listEmbeddingPresets: () => listEmbeddingPresets,
59
64
  listOcrBackends: () => listOcrBackends,
@@ -133,7 +138,16 @@ function __resetBindingForTests() {
133
138
  bindingInitialized = false;
134
139
  }
135
140
  function loadNativeBinding() {
136
- const localRequire = typeof require !== "undefined" ? require : (0, import_node_module.createRequire)(import_meta.url);
141
+ let localRequire;
142
+ if (typeof require !== "undefined") {
143
+ localRequire = require;
144
+ } else {
145
+ try {
146
+ localRequire = (0, import_node_module.createRequire)(import_meta.url);
147
+ } catch {
148
+ localRequire = void 0;
149
+ }
150
+ }
137
151
  if (!localRequire) {
138
152
  throw new Error("Unable to resolve native binding loader (require not available).");
139
153
  }
@@ -317,9 +331,9 @@ function convertResult(rawResult) {
317
331
  metadata: {},
318
332
  tables: [],
319
333
  detectedLanguages: null,
320
- chunks: void 0,
321
- images: void 0,
322
- pages: void 0
334
+ chunks: null,
335
+ images: null,
336
+ pages: null
323
337
  };
324
338
  }
325
339
  const result = rawResult;
@@ -335,9 +349,9 @@ function convertResult(rawResult) {
335
349
  tables: Array.isArray(result["tables"]) ? result["tables"] : [],
336
350
  // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
337
351
  detectedLanguages: Array.isArray(result["detectedLanguages"]) ? result["detectedLanguages"] : null,
338
- chunks: void 0,
339
- images: void 0,
340
- pages: void 0
352
+ chunks: null,
353
+ images: null,
354
+ pages: null
341
355
  };
342
356
  const chunksData = result["chunks"];
343
357
  if (Array.isArray(chunksData)) {
@@ -515,9 +529,9 @@ function normalizePageConfig(pages) {
515
529
  return void 0;
516
530
  }
517
531
  const normalized = {};
518
- setIfDefined(normalized, "extract_pages", pages.extractPages);
519
- setIfDefined(normalized, "insert_page_markers", pages.insertPageMarkers);
520
- setIfDefined(normalized, "marker_format", pages.markerFormat);
532
+ setIfDefined(normalized, "extractPages", pages.extractPages);
533
+ setIfDefined(normalized, "insertPageMarkers", pages.insertPageMarkers);
534
+ setIfDefined(normalized, "markerFormat", pages.markerFormat);
521
535
  return normalized;
522
536
  }
523
537
  function normalizeExtractionConfig(config) {
@@ -813,99 +827,7 @@ function clearDocumentExtractors() {
813
827
  const binding2 = getBinding();
814
828
  binding2.clearDocumentExtractors();
815
829
  }
816
- class ExtractionConfigBuilder {
817
- config = {};
818
- /**
819
- * Create a new builder with default configuration.
820
- */
821
- static default() {
822
- return new ExtractionConfigBuilder();
823
- }
824
- /**
825
- * Set OCR configuration.
826
- */
827
- withOcr(ocr) {
828
- this.config["ocr"] = ocr;
829
- return this;
830
- }
831
- /**
832
- * Set chunking configuration.
833
- */
834
- withChunking(chunking) {
835
- this.config["chunking"] = chunking;
836
- return this;
837
- }
838
- /**
839
- * Set image extraction configuration.
840
- */
841
- withImageExtraction(images) {
842
- this.config["imageExtraction"] = images;
843
- return this;
844
- }
845
- /**
846
- * Set PDF configuration.
847
- */
848
- withPdf(pdf) {
849
- this.config["pdf"] = pdf;
850
- return this;
851
- }
852
- /**
853
- * Set keyword extraction configuration.
854
- */
855
- withKeywords(keywords) {
856
- this.config["keywords"] = keywords;
857
- return this;
858
- }
859
- /**
860
- * Set language detection configuration.
861
- */
862
- withLanguageDetection(languageDetection) {
863
- this.config["languageDetection"] = languageDetection;
864
- return this;
865
- }
866
- /**
867
- * Set whether to enable metadata extraction.
868
- */
869
- withMetadataExtraction(enabled) {
870
- this.config["metadataExtraction"] = enabled;
871
- return this;
872
- }
873
- /**
874
- * Set whether to enable quality mode.
875
- */
876
- withQualityMode(enabled) {
877
- this.config["qualityMode"] = enabled;
878
- return this;
879
- }
880
- /**
881
- * Build and return the final ExtractionConfig object.
882
- */
883
- build() {
884
- return this.config;
885
- }
886
- }
887
830
  const ExtractionConfig = {
888
- /**
889
- * Create a default extraction configuration using the builder pattern.
890
- *
891
- * Returns a builder object that allows you to configure extraction settings
892
- * using method chaining.
893
- *
894
- * @returns ExtractionConfigBuilder for chaining configuration calls
895
- *
896
- * @example
897
- * ```typescript
898
- * import { ExtractionConfig } from '@kreuzberg/node';
899
- *
900
- * const config = ExtractionConfig.default()
901
- * .withChunking({ maxChars: 2048 })
902
- * .withOcr({ backend: 'tesseract', language: 'eng' })
903
- * .build();
904
- * ```
905
- */
906
- default() {
907
- return ExtractionConfigBuilder.default();
908
- },
909
831
  /**
910
832
  * Load extraction configuration from a file.
911
833
  *
@@ -1014,7 +936,54 @@ function classifyError(errorMessage) {
1014
936
  const result = binding2.classifyError(errorMessage);
1015
937
  return result;
1016
938
  }
1017
- const __version__ = "4.0.0-rc.21";
939
+ function createWorkerPool(size) {
940
+ const binding2 = getBinding();
941
+ const rawPool = binding2.createWorkerPool(size);
942
+ return rawPool;
943
+ }
944
+ function getWorkerPoolStats(pool) {
945
+ const binding2 = getBinding();
946
+ const rawStats = binding2.getWorkerPoolStats(pool);
947
+ return rawStats;
948
+ }
949
+ async function extractFileInWorker(pool, filePath, mimeTypeOrConfig, maybeConfig) {
950
+ let mimeType = null;
951
+ let config = null;
952
+ if (typeof mimeTypeOrConfig === "string") {
953
+ mimeType = mimeTypeOrConfig;
954
+ config = maybeConfig ?? null;
955
+ } else if (mimeTypeOrConfig !== null && typeof mimeTypeOrConfig === "object") {
956
+ config = mimeTypeOrConfig;
957
+ mimeType = null;
958
+ } else {
959
+ config = maybeConfig ?? null;
960
+ mimeType = null;
961
+ }
962
+ const normalizedConfig = normalizeExtractionConfig(config);
963
+ const binding2 = getBinding();
964
+ const rawResult = await binding2.extractFileInWorker(
965
+ pool,
966
+ filePath,
967
+ mimeType,
968
+ normalizedConfig
969
+ );
970
+ return convertResult(rawResult);
971
+ }
972
+ async function batchExtractFilesInWorker(pool, paths, config = null) {
973
+ const normalizedConfig = normalizeExtractionConfig(config);
974
+ const binding2 = getBinding();
975
+ const rawResults = await binding2.batchExtractFilesInWorker(
976
+ pool,
977
+ paths,
978
+ normalizedConfig
979
+ );
980
+ return rawResults.map(convertResult);
981
+ }
982
+ async function closeWorkerPool(pool) {
983
+ const binding2 = getBinding();
984
+ await binding2.closeWorkerPool(pool);
985
+ }
986
+ const __version__ = "4.0.0-rc.24";
1018
987
  // Annotate the CommonJS export names for ESM import in node:
1019
988
  0 && (module.exports = {
1020
989
  CacheError,
@@ -1034,17 +1003,21 @@ const __version__ = "4.0.0-rc.21";
1034
1003
  batchExtractBytes,
1035
1004
  batchExtractBytesSync,
1036
1005
  batchExtractFiles,
1006
+ batchExtractFilesInWorker,
1037
1007
  batchExtractFilesSync,
1038
1008
  classifyError,
1039
1009
  clearDocumentExtractors,
1040
1010
  clearOcrBackends,
1041
1011
  clearPostProcessors,
1042
1012
  clearValidators,
1013
+ closeWorkerPool,
1014
+ createWorkerPool,
1043
1015
  detectMimeType,
1044
1016
  detectMimeTypeFromPath,
1045
1017
  extractBytes,
1046
1018
  extractBytesSync,
1047
1019
  extractFile,
1020
+ extractFileInWorker,
1048
1021
  extractFileSync,
1049
1022
  getEmbeddingPreset,
1050
1023
  getErrorCodeDescription,
@@ -1052,6 +1025,7 @@ const __version__ = "4.0.0-rc.21";
1052
1025
  getExtensionsForMime,
1053
1026
  getLastErrorCode,
1054
1027
  getLastPanicContext,
1028
+ getWorkerPoolStats,
1055
1029
  listDocumentExtractors,
1056
1030
  listEmbeddingPresets,
1057
1031
  listOcrBackends,