@kreuzberg/node 4.2.3 → 4.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.d.mts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +48 -2
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +48 -2
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.mts +58 -1
- package/dist/types.d.ts +58 -1
- package/dist/types.js.map +1 -1
- package/index.d.ts +23 -0
- package/index.js +52 -52
- package/package.json +8 -7
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.5" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
package/dist/index.d.mts
CHANGED
|
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1200
1200
|
* @module @kreuzberg/node
|
|
1201
1201
|
*/
|
|
1202
1202
|
|
|
1203
|
-
declare const __version__ = "4.2.
|
|
1203
|
+
declare const __version__ = "4.2.5";
|
|
1204
1204
|
|
|
1205
1205
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.d.ts
CHANGED
|
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1200
1200
|
* @module @kreuzberg/node
|
|
1201
1201
|
*/
|
|
1202
1202
|
|
|
1203
|
-
declare const __version__ = "4.2.
|
|
1203
|
+
declare const __version__ = "4.2.5";
|
|
1204
1204
|
|
|
1205
1205
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.js
CHANGED
|
@@ -91,6 +91,12 @@ module.exports = __toCommonJS(index_exports);
|
|
|
91
91
|
// typescript/core/binding.ts
|
|
92
92
|
var import_node_module = require("module");
|
|
93
93
|
var import_meta = {};
|
|
94
|
+
function isNapiRuntime() {
|
|
95
|
+
if (typeof process === "undefined") return false;
|
|
96
|
+
if (process.versions?.["bun"]) return true;
|
|
97
|
+
if (process.versions?.node) return true;
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
94
100
|
var binding = null;
|
|
95
101
|
var bindingInitialized = false;
|
|
96
102
|
function createNativeBindingError(error) {
|
|
@@ -167,7 +173,7 @@ function getBinding() {
|
|
|
167
173
|
return binding;
|
|
168
174
|
}
|
|
169
175
|
try {
|
|
170
|
-
if (
|
|
176
|
+
if (isNapiRuntime()) {
|
|
171
177
|
binding = loadNativeBinding();
|
|
172
178
|
bindingInitialized = true;
|
|
173
179
|
return binding;
|
|
@@ -528,6 +534,8 @@ function normalizeExtractionConfig(config) {
|
|
|
528
534
|
setIfDefined(normalized, "pages", pages);
|
|
529
535
|
const htmlOptions = normalizeHtmlOptions(config.htmlOptions);
|
|
530
536
|
setIfDefined(normalized, "htmlOptions", htmlOptions);
|
|
537
|
+
setIfDefined(normalized, "outputFormat", config.outputFormat);
|
|
538
|
+
setIfDefined(normalized, "resultFormat", config.resultFormat);
|
|
531
539
|
return normalized;
|
|
532
540
|
}
|
|
533
541
|
|
|
@@ -594,6 +602,38 @@ function convertChunk(rawChunk) {
|
|
|
594
602
|
}
|
|
595
603
|
};
|
|
596
604
|
}
|
|
605
|
+
function convertElement(rawElement) {
|
|
606
|
+
if (!rawElement || typeof rawElement !== "object") {
|
|
607
|
+
return {
|
|
608
|
+
elementId: "",
|
|
609
|
+
elementType: "narrative_text",
|
|
610
|
+
text: "",
|
|
611
|
+
metadata: {}
|
|
612
|
+
};
|
|
613
|
+
}
|
|
614
|
+
const element = rawElement;
|
|
615
|
+
const elementMetadata = element["metadata"] ?? {};
|
|
616
|
+
return {
|
|
617
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
618
|
+
elementId: element["element_id"] ?? element["elementId"] ?? "",
|
|
619
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
620
|
+
elementType: element["element_type"] ?? element["elementType"] ?? "narrative_text",
|
|
621
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
622
|
+
text: element["text"] ?? "",
|
|
623
|
+
metadata: {
|
|
624
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
625
|
+
pageNumber: elementMetadata["page_number"] ?? elementMetadata["pageNumber"] ?? null,
|
|
626
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
627
|
+
filename: elementMetadata["filename"] ?? null,
|
|
628
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
629
|
+
coordinates: elementMetadata["coordinates"] ? elementMetadata["coordinates"] : null,
|
|
630
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
631
|
+
elementIndex: elementMetadata["element_index"] ?? elementMetadata["elementIndex"] ?? null,
|
|
632
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
633
|
+
additional: elementMetadata["additional"] ?? {}
|
|
634
|
+
}
|
|
635
|
+
};
|
|
636
|
+
}
|
|
597
637
|
function convertImage(rawImage) {
|
|
598
638
|
if (!rawImage || typeof rawImage !== "object") {
|
|
599
639
|
return {
|
|
@@ -667,6 +707,7 @@ function convertResult(rawResult) {
|
|
|
667
707
|
detectedLanguages: null,
|
|
668
708
|
chunks: null,
|
|
669
709
|
images: null,
|
|
710
|
+
elements: null,
|
|
670
711
|
pages: null
|
|
671
712
|
};
|
|
672
713
|
}
|
|
@@ -685,6 +726,7 @@ function convertResult(rawResult) {
|
|
|
685
726
|
detectedLanguages: Array.isArray(result["detectedLanguages"]) ? result["detectedLanguages"] : null,
|
|
686
727
|
chunks: null,
|
|
687
728
|
images: null,
|
|
729
|
+
elements: null,
|
|
688
730
|
pages: null
|
|
689
731
|
};
|
|
690
732
|
const chunksData = result["chunks"];
|
|
@@ -695,6 +737,10 @@ function convertResult(rawResult) {
|
|
|
695
737
|
if (Array.isArray(imagesData)) {
|
|
696
738
|
returnObj.images = imagesData.map((image) => convertImage(image));
|
|
697
739
|
}
|
|
740
|
+
const elementsData = result["elements"];
|
|
741
|
+
if (Array.isArray(elementsData)) {
|
|
742
|
+
returnObj.elements = elementsData.map((element) => convertElement(element));
|
|
743
|
+
}
|
|
698
744
|
const pagesData = result["pages"];
|
|
699
745
|
if (Array.isArray(pagesData)) {
|
|
700
746
|
returnObj.pages = pagesData.map((page) => convertPageContent(page));
|
|
@@ -1326,7 +1372,7 @@ function getEmbeddingPreset(name) {
|
|
|
1326
1372
|
}
|
|
1327
1373
|
|
|
1328
1374
|
// typescript/index.ts
|
|
1329
|
-
var __version__ = "4.2.
|
|
1375
|
+
var __version__ = "4.2.5";
|
|
1330
1376
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1331
1377
|
0 && (module.exports = {
|
|
1332
1378
|
CacheError,
|