@kreuzberg/node 4.2.3 → 4.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.3" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.5" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
package/dist/index.d.mts CHANGED
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
1200
1200
  * @module @kreuzberg/node
1201
1201
  */
1202
1202
 
1203
- declare const __version__ = "4.2.3";
1203
+ declare const __version__ = "4.2.5";
1204
1204
 
1205
1205
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.d.ts CHANGED
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
1200
1200
  * @module @kreuzberg/node
1201
1201
  */
1202
1202
 
1203
- declare const __version__ = "4.2.3";
1203
+ declare const __version__ = "4.2.5";
1204
1204
 
1205
1205
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.js CHANGED
@@ -91,6 +91,12 @@ module.exports = __toCommonJS(index_exports);
91
91
  // typescript/core/binding.ts
92
92
  var import_node_module = require("module");
93
93
  var import_meta = {};
94
+ function isNapiRuntime() {
95
+ if (typeof process === "undefined") return false;
96
+ if (process.versions?.["bun"]) return true;
97
+ if (process.versions?.node) return true;
98
+ return false;
99
+ }
94
100
  var binding = null;
95
101
  var bindingInitialized = false;
96
102
  function createNativeBindingError(error) {
@@ -167,7 +173,7 @@ function getBinding() {
167
173
  return binding;
168
174
  }
169
175
  try {
170
- if (typeof process !== "undefined" && process.versions && process.versions.node) {
176
+ if (isNapiRuntime()) {
171
177
  binding = loadNativeBinding();
172
178
  bindingInitialized = true;
173
179
  return binding;
@@ -528,6 +534,8 @@ function normalizeExtractionConfig(config) {
528
534
  setIfDefined(normalized, "pages", pages);
529
535
  const htmlOptions = normalizeHtmlOptions(config.htmlOptions);
530
536
  setIfDefined(normalized, "htmlOptions", htmlOptions);
537
+ setIfDefined(normalized, "outputFormat", config.outputFormat);
538
+ setIfDefined(normalized, "resultFormat", config.resultFormat);
531
539
  return normalized;
532
540
  }
533
541
 
@@ -594,6 +602,38 @@ function convertChunk(rawChunk) {
594
602
  }
595
603
  };
596
604
  }
605
+ function convertElement(rawElement) {
606
+ if (!rawElement || typeof rawElement !== "object") {
607
+ return {
608
+ elementId: "",
609
+ elementType: "narrative_text",
610
+ text: "",
611
+ metadata: {}
612
+ };
613
+ }
614
+ const element = rawElement;
615
+ const elementMetadata = element["metadata"] ?? {};
616
+ return {
617
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
618
+ elementId: element["element_id"] ?? element["elementId"] ?? "",
619
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
620
+ elementType: element["element_type"] ?? element["elementType"] ?? "narrative_text",
621
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
622
+ text: element["text"] ?? "",
623
+ metadata: {
624
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
625
+ pageNumber: elementMetadata["page_number"] ?? elementMetadata["pageNumber"] ?? null,
626
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
627
+ filename: elementMetadata["filename"] ?? null,
628
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
629
+ coordinates: elementMetadata["coordinates"] ? elementMetadata["coordinates"] : null,
630
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
631
+ elementIndex: elementMetadata["element_index"] ?? elementMetadata["elementIndex"] ?? null,
632
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
633
+ additional: elementMetadata["additional"] ?? {}
634
+ }
635
+ };
636
+ }
597
637
  function convertImage(rawImage) {
598
638
  if (!rawImage || typeof rawImage !== "object") {
599
639
  return {
@@ -667,6 +707,7 @@ function convertResult(rawResult) {
667
707
  detectedLanguages: null,
668
708
  chunks: null,
669
709
  images: null,
710
+ elements: null,
670
711
  pages: null
671
712
  };
672
713
  }
@@ -685,6 +726,7 @@ function convertResult(rawResult) {
685
726
  detectedLanguages: Array.isArray(result["detectedLanguages"]) ? result["detectedLanguages"] : null,
686
727
  chunks: null,
687
728
  images: null,
729
+ elements: null,
688
730
  pages: null
689
731
  };
690
732
  const chunksData = result["chunks"];
@@ -695,6 +737,10 @@ function convertResult(rawResult) {
695
737
  if (Array.isArray(imagesData)) {
696
738
  returnObj.images = imagesData.map((image) => convertImage(image));
697
739
  }
740
+ const elementsData = result["elements"];
741
+ if (Array.isArray(elementsData)) {
742
+ returnObj.elements = elementsData.map((element) => convertElement(element));
743
+ }
698
744
  const pagesData = result["pages"];
699
745
  if (Array.isArray(pagesData)) {
700
746
  returnObj.pages = pagesData.map((page) => convertPageContent(page));
@@ -1326,7 +1372,7 @@ function getEmbeddingPreset(name) {
1326
1372
  }
1327
1373
 
1328
1374
  // typescript/index.ts
1329
- var __version__ = "4.2.3";
1375
+ var __version__ = "4.2.5";
1330
1376
  // Annotate the CommonJS export names for ESM import in node:
1331
1377
  0 && (module.exports = {
1332
1378
  CacheError,