@kreuzberg/node 4.2.2 → 4.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.2" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.4" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
package/dist/index.d.mts CHANGED
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
1200
1200
  * @module @kreuzberg/node
1201
1201
  */
1202
1202
 
1203
- declare const __version__ = "4.2.2";
1203
+ declare const __version__ = "4.2.4";
1204
1204
 
1205
1205
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.d.ts CHANGED
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
1200
1200
  * @module @kreuzberg/node
1201
1201
  */
1202
1202
 
1203
- declare const __version__ = "4.2.2";
1203
+ declare const __version__ = "4.2.4";
1204
1204
 
1205
1205
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.js CHANGED
@@ -594,6 +594,38 @@ function convertChunk(rawChunk) {
594
594
  }
595
595
  };
596
596
  }
597
+ function convertElement(rawElement) {
598
+ if (!rawElement || typeof rawElement !== "object") {
599
+ return {
600
+ elementId: "",
601
+ elementType: "narrative_text",
602
+ text: "",
603
+ metadata: {}
604
+ };
605
+ }
606
+ const element = rawElement;
607
+ const elementMetadata = element["metadata"] ?? {};
608
+ return {
609
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
610
+ elementId: element["element_id"] ?? element["elementId"] ?? "",
611
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
612
+ elementType: element["element_type"] ?? element["elementType"] ?? "narrative_text",
613
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
614
+ text: element["text"] ?? "",
615
+ metadata: {
616
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
617
+ pageNumber: elementMetadata["page_number"] ?? elementMetadata["pageNumber"] ?? null,
618
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
619
+ filename: elementMetadata["filename"] ?? null,
620
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
621
+ coordinates: elementMetadata["coordinates"] ? elementMetadata["coordinates"] : null,
622
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
623
+ elementIndex: elementMetadata["element_index"] ?? elementMetadata["elementIndex"] ?? null,
624
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
625
+ additional: elementMetadata["additional"] ?? {}
626
+ }
627
+ };
628
+ }
597
629
  function convertImage(rawImage) {
598
630
  if (!rawImage || typeof rawImage !== "object") {
599
631
  return {
@@ -667,6 +699,7 @@ function convertResult(rawResult) {
667
699
  detectedLanguages: null,
668
700
  chunks: null,
669
701
  images: null,
702
+ elements: null,
670
703
  pages: null
671
704
  };
672
705
  }
@@ -685,6 +718,7 @@ function convertResult(rawResult) {
685
718
  detectedLanguages: Array.isArray(result["detectedLanguages"]) ? result["detectedLanguages"] : null,
686
719
  chunks: null,
687
720
  images: null,
721
+ elements: null,
688
722
  pages: null
689
723
  };
690
724
  const chunksData = result["chunks"];
@@ -695,6 +729,10 @@ function convertResult(rawResult) {
695
729
  if (Array.isArray(imagesData)) {
696
730
  returnObj.images = imagesData.map((image) => convertImage(image));
697
731
  }
732
+ const elementsData = result["elements"];
733
+ if (Array.isArray(elementsData)) {
734
+ returnObj.elements = elementsData.map((element) => convertElement(element));
735
+ }
698
736
  const pagesData = result["pages"];
699
737
  if (Array.isArray(pagesData)) {
700
738
  returnObj.pages = pagesData.map((page) => convertPageContent(page));
@@ -1326,7 +1364,7 @@ function getEmbeddingPreset(name) {
1326
1364
  }
1327
1365
 
1328
1366
  // typescript/index.ts
1329
- var __version__ = "4.2.2";
1367
+ var __version__ = "4.2.4";
1330
1368
  // Annotate the CommonJS export names for ESM import in node:
1331
1369
  0 && (module.exports = {
1332
1370
  CacheError,