@kreuzberg/node 4.2.2 → 4.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.d.mts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +39 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +39 -1
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.mts +58 -1
- package/dist/types.d.ts +58 -1
- package/dist/types.js.map +1 -1
- package/index.js +52 -52
- package/package.json +6 -6
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.4" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
package/dist/index.d.mts
CHANGED
|
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1200
1200
|
* @module @kreuzberg/node
|
|
1201
1201
|
*/
|
|
1202
1202
|
|
|
1203
|
-
declare const __version__ = "4.2.
|
|
1203
|
+
declare const __version__ = "4.2.4";
|
|
1204
1204
|
|
|
1205
1205
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.d.ts
CHANGED
|
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1200
1200
|
* @module @kreuzberg/node
|
|
1201
1201
|
*/
|
|
1202
1202
|
|
|
1203
|
-
declare const __version__ = "4.2.
|
|
1203
|
+
declare const __version__ = "4.2.4";
|
|
1204
1204
|
|
|
1205
1205
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.js
CHANGED
|
@@ -594,6 +594,38 @@ function convertChunk(rawChunk) {
|
|
|
594
594
|
}
|
|
595
595
|
};
|
|
596
596
|
}
|
|
597
|
+
function convertElement(rawElement) {
|
|
598
|
+
if (!rawElement || typeof rawElement !== "object") {
|
|
599
|
+
return {
|
|
600
|
+
elementId: "",
|
|
601
|
+
elementType: "narrative_text",
|
|
602
|
+
text: "",
|
|
603
|
+
metadata: {}
|
|
604
|
+
};
|
|
605
|
+
}
|
|
606
|
+
const element = rawElement;
|
|
607
|
+
const elementMetadata = element["metadata"] ?? {};
|
|
608
|
+
return {
|
|
609
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
610
|
+
elementId: element["element_id"] ?? element["elementId"] ?? "",
|
|
611
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
612
|
+
elementType: element["element_type"] ?? element["elementType"] ?? "narrative_text",
|
|
613
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
614
|
+
text: element["text"] ?? "",
|
|
615
|
+
metadata: {
|
|
616
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
617
|
+
pageNumber: elementMetadata["page_number"] ?? elementMetadata["pageNumber"] ?? null,
|
|
618
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
619
|
+
filename: elementMetadata["filename"] ?? null,
|
|
620
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
621
|
+
coordinates: elementMetadata["coordinates"] ? elementMetadata["coordinates"] : null,
|
|
622
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
623
|
+
elementIndex: elementMetadata["element_index"] ?? elementMetadata["elementIndex"] ?? null,
|
|
624
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
625
|
+
additional: elementMetadata["additional"] ?? {}
|
|
626
|
+
}
|
|
627
|
+
};
|
|
628
|
+
}
|
|
597
629
|
function convertImage(rawImage) {
|
|
598
630
|
if (!rawImage || typeof rawImage !== "object") {
|
|
599
631
|
return {
|
|
@@ -667,6 +699,7 @@ function convertResult(rawResult) {
|
|
|
667
699
|
detectedLanguages: null,
|
|
668
700
|
chunks: null,
|
|
669
701
|
images: null,
|
|
702
|
+
elements: null,
|
|
670
703
|
pages: null
|
|
671
704
|
};
|
|
672
705
|
}
|
|
@@ -685,6 +718,7 @@ function convertResult(rawResult) {
|
|
|
685
718
|
detectedLanguages: Array.isArray(result["detectedLanguages"]) ? result["detectedLanguages"] : null,
|
|
686
719
|
chunks: null,
|
|
687
720
|
images: null,
|
|
721
|
+
elements: null,
|
|
688
722
|
pages: null
|
|
689
723
|
};
|
|
690
724
|
const chunksData = result["chunks"];
|
|
@@ -695,6 +729,10 @@ function convertResult(rawResult) {
|
|
|
695
729
|
if (Array.isArray(imagesData)) {
|
|
696
730
|
returnObj.images = imagesData.map((image) => convertImage(image));
|
|
697
731
|
}
|
|
732
|
+
const elementsData = result["elements"];
|
|
733
|
+
if (Array.isArray(elementsData)) {
|
|
734
|
+
returnObj.elements = elementsData.map((element) => convertElement(element));
|
|
735
|
+
}
|
|
698
736
|
const pagesData = result["pages"];
|
|
699
737
|
if (Array.isArray(pagesData)) {
|
|
700
738
|
returnObj.pages = pagesData.map((page) => convertPageContent(page));
|
|
@@ -1326,7 +1364,7 @@ function getEmbeddingPreset(name) {
|
|
|
1326
1364
|
}
|
|
1327
1365
|
|
|
1328
1366
|
// typescript/index.ts
|
|
1329
|
-
var __version__ = "4.2.
|
|
1367
|
+
var __version__ = "4.2.4";
|
|
1330
1368
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1331
1369
|
0 && (module.exports = {
|
|
1332
1370
|
CacheError,
|