@kreuzberg/node 4.0.0-rc.21 → 4.0.0-rc.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -71,7 +71,16 @@ function __resetBindingForTests() {
71
71
  bindingInitialized = false;
72
72
  }
73
73
  function loadNativeBinding() {
74
- const localRequire = typeof require !== "undefined" ? require : createRequire(import.meta.url);
74
+ let localRequire;
75
+ if (typeof require !== "undefined") {
76
+ localRequire = require;
77
+ } else {
78
+ try {
79
+ localRequire = createRequire(import.meta.url);
80
+ } catch {
81
+ localRequire = void 0;
82
+ }
83
+ }
75
84
  if (!localRequire) {
76
85
  throw new Error("Unable to resolve native binding loader (require not available).");
77
86
  }
@@ -255,9 +264,9 @@ function convertResult(rawResult) {
255
264
  metadata: {},
256
265
  tables: [],
257
266
  detectedLanguages: null,
258
- chunks: void 0,
259
- images: void 0,
260
- pages: void 0
267
+ chunks: null,
268
+ images: null,
269
+ pages: null
261
270
  };
262
271
  }
263
272
  const result = rawResult;
@@ -273,9 +282,9 @@ function convertResult(rawResult) {
273
282
  tables: Array.isArray(result["tables"]) ? result["tables"] : [],
274
283
  // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
275
284
  detectedLanguages: Array.isArray(result["detectedLanguages"]) ? result["detectedLanguages"] : null,
276
- chunks: void 0,
277
- images: void 0,
278
- pages: void 0
285
+ chunks: null,
286
+ images: null,
287
+ pages: null
279
288
  };
280
289
  const chunksData = result["chunks"];
281
290
  if (Array.isArray(chunksData)) {
@@ -453,9 +462,9 @@ function normalizePageConfig(pages) {
453
462
  return void 0;
454
463
  }
455
464
  const normalized = {};
456
- setIfDefined(normalized, "extract_pages", pages.extractPages);
457
- setIfDefined(normalized, "insert_page_markers", pages.insertPageMarkers);
458
- setIfDefined(normalized, "marker_format", pages.markerFormat);
465
+ setIfDefined(normalized, "extractPages", pages.extractPages);
466
+ setIfDefined(normalized, "insertPageMarkers", pages.insertPageMarkers);
467
+ setIfDefined(normalized, "markerFormat", pages.markerFormat);
459
468
  return normalized;
460
469
  }
461
470
  function normalizeExtractionConfig(config) {
@@ -751,99 +760,7 @@ function clearDocumentExtractors() {
751
760
  const binding2 = getBinding();
752
761
  binding2.clearDocumentExtractors();
753
762
  }
754
- class ExtractionConfigBuilder {
755
- config = {};
756
- /**
757
- * Create a new builder with default configuration.
758
- */
759
- static default() {
760
- return new ExtractionConfigBuilder();
761
- }
762
- /**
763
- * Set OCR configuration.
764
- */
765
- withOcr(ocr) {
766
- this.config["ocr"] = ocr;
767
- return this;
768
- }
769
- /**
770
- * Set chunking configuration.
771
- */
772
- withChunking(chunking) {
773
- this.config["chunking"] = chunking;
774
- return this;
775
- }
776
- /**
777
- * Set image extraction configuration.
778
- */
779
- withImageExtraction(images) {
780
- this.config["imageExtraction"] = images;
781
- return this;
782
- }
783
- /**
784
- * Set PDF configuration.
785
- */
786
- withPdf(pdf) {
787
- this.config["pdf"] = pdf;
788
- return this;
789
- }
790
- /**
791
- * Set keyword extraction configuration.
792
- */
793
- withKeywords(keywords) {
794
- this.config["keywords"] = keywords;
795
- return this;
796
- }
797
- /**
798
- * Set language detection configuration.
799
- */
800
- withLanguageDetection(languageDetection) {
801
- this.config["languageDetection"] = languageDetection;
802
- return this;
803
- }
804
- /**
805
- * Set whether to enable metadata extraction.
806
- */
807
- withMetadataExtraction(enabled) {
808
- this.config["metadataExtraction"] = enabled;
809
- return this;
810
- }
811
- /**
812
- * Set whether to enable quality mode.
813
- */
814
- withQualityMode(enabled) {
815
- this.config["qualityMode"] = enabled;
816
- return this;
817
- }
818
- /**
819
- * Build and return the final ExtractionConfig object.
820
- */
821
- build() {
822
- return this.config;
823
- }
824
- }
825
763
  const ExtractionConfig = {
826
- /**
827
- * Create a default extraction configuration using the builder pattern.
828
- *
829
- * Returns a builder object that allows you to configure extraction settings
830
- * using method chaining.
831
- *
832
- * @returns ExtractionConfigBuilder for chaining configuration calls
833
- *
834
- * @example
835
- * ```typescript
836
- * import { ExtractionConfig } from '@kreuzberg/node';
837
- *
838
- * const config = ExtractionConfig.default()
839
- * .withChunking({ maxChars: 2048 })
840
- * .withOcr({ backend: 'tesseract', language: 'eng' })
841
- * .build();
842
- * ```
843
- */
844
- default() {
845
- return ExtractionConfigBuilder.default();
846
- },
847
764
  /**
848
765
  * Load extraction configuration from a file.
849
766
  *
@@ -952,7 +869,54 @@ function classifyError(errorMessage) {
952
869
  const result = binding2.classifyError(errorMessage);
953
870
  return result;
954
871
  }
955
- const __version__ = "4.0.0-rc.21";
872
+ function createWorkerPool(size) {
873
+ const binding2 = getBinding();
874
+ const rawPool = binding2.createWorkerPool(size);
875
+ return rawPool;
876
+ }
877
+ function getWorkerPoolStats(pool) {
878
+ const binding2 = getBinding();
879
+ const rawStats = binding2.getWorkerPoolStats(pool);
880
+ return rawStats;
881
+ }
882
+ async function extractFileInWorker(pool, filePath, mimeTypeOrConfig, maybeConfig) {
883
+ let mimeType = null;
884
+ let config = null;
885
+ if (typeof mimeTypeOrConfig === "string") {
886
+ mimeType = mimeTypeOrConfig;
887
+ config = maybeConfig ?? null;
888
+ } else if (mimeTypeOrConfig !== null && typeof mimeTypeOrConfig === "object") {
889
+ config = mimeTypeOrConfig;
890
+ mimeType = null;
891
+ } else {
892
+ config = maybeConfig ?? null;
893
+ mimeType = null;
894
+ }
895
+ const normalizedConfig = normalizeExtractionConfig(config);
896
+ const binding2 = getBinding();
897
+ const rawResult = await binding2.extractFileInWorker(
898
+ pool,
899
+ filePath,
900
+ mimeType,
901
+ normalizedConfig
902
+ );
903
+ return convertResult(rawResult);
904
+ }
905
+ async function batchExtractFilesInWorker(pool, paths, config = null) {
906
+ const normalizedConfig = normalizeExtractionConfig(config);
907
+ const binding2 = getBinding();
908
+ const rawResults = await binding2.batchExtractFilesInWorker(
909
+ pool,
910
+ paths,
911
+ normalizedConfig
912
+ );
913
+ return rawResults.map(convertResult);
914
+ }
915
+ async function closeWorkerPool(pool) {
916
+ const binding2 = getBinding();
917
+ await binding2.closeWorkerPool(pool);
918
+ }
919
+ const __version__ = "4.0.0-rc.24";
956
920
  export {
957
921
  CacheError,
958
922
  ErrorCode,
@@ -971,17 +935,21 @@ export {
971
935
  batchExtractBytes,
972
936
  batchExtractBytesSync,
973
937
  batchExtractFiles,
938
+ batchExtractFilesInWorker,
974
939
  batchExtractFilesSync,
975
940
  classifyError,
976
941
  clearDocumentExtractors,
977
942
  clearOcrBackends,
978
943
  clearPostProcessors,
979
944
  clearValidators,
945
+ closeWorkerPool,
946
+ createWorkerPool,
980
947
  detectMimeType,
981
948
  detectMimeTypeFromPath,
982
949
  extractBytes,
983
950
  extractBytesSync,
984
951
  extractFile,
952
+ extractFileInWorker,
985
953
  extractFileSync,
986
954
  getEmbeddingPreset,
987
955
  getErrorCodeDescription,
@@ -989,6 +957,7 @@ export {
989
957
  getExtensionsForMime,
990
958
  getLastErrorCode,
991
959
  getLastPanicContext,
960
+ getWorkerPoolStats,
992
961
  listDocumentExtractors,
993
962
  listEmbeddingPresets,
994
963
  listOcrBackends,