@kreuzberg/node 4.0.0-rc.8 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +342 -530
- package/dist/cli.d.mts +4 -0
- package/dist/cli.d.ts +4 -0
- package/dist/cli.js +12 -2
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +12 -1
- package/dist/cli.mjs.map +1 -1
- package/dist/index.d.mts +337 -62
- package/dist/index.d.ts +337 -62
- package/dist/index.js +285 -56
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +277 -56
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.mts +469 -54
- package/dist/types.d.ts +469 -54
- package/dist/types.js.map +1 -1
- package/index.d.ts +662 -1
- package/index.js +85 -55
- package/metadata.d.ts +53 -33
- package/package.json +17 -19
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
1
2
|
import { createRequire } from "node:module";
|
|
2
3
|
import {
|
|
3
4
|
CacheError,
|
|
@@ -70,17 +71,50 @@ function __resetBindingForTests() {
|
|
|
70
71
|
bindingInitialized = false;
|
|
71
72
|
}
|
|
72
73
|
function loadNativeBinding() {
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
require
|
|
76
|
-
|
|
74
|
+
let localRequire;
|
|
75
|
+
if (typeof require !== "undefined") {
|
|
76
|
+
localRequire = require;
|
|
77
|
+
} else {
|
|
78
|
+
try {
|
|
79
|
+
localRequire = createRequire(import.meta.url);
|
|
80
|
+
} catch {
|
|
81
|
+
localRequire = void 0;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
77
84
|
if (!localRequire) {
|
|
78
85
|
throw new Error("Unable to resolve native binding loader (require not available).");
|
|
79
86
|
}
|
|
80
|
-
|
|
87
|
+
const loadedModule = localRequire("../index.js");
|
|
88
|
+
if (typeof loadedModule !== "object" || loadedModule === null) {
|
|
89
|
+
throw new Error(
|
|
90
|
+
"Native binding is not a valid object. Ensure the native module is properly built and compatible."
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
const module = loadedModule;
|
|
94
|
+
const requiredMethods = [
|
|
95
|
+
"extractFileSync",
|
|
96
|
+
"extractFile",
|
|
97
|
+
"extractBytesSync",
|
|
98
|
+
"extractBytes",
|
|
99
|
+
"batchExtractFilesSync",
|
|
100
|
+
"batchExtractFiles",
|
|
101
|
+
"batchExtractBytesSync",
|
|
102
|
+
"batchExtractBytes"
|
|
103
|
+
];
|
|
104
|
+
for (const method of requiredMethods) {
|
|
105
|
+
if (typeof module[method] !== "function") {
|
|
106
|
+
throw new Error(
|
|
107
|
+
`Native binding is missing required method: ${method}. Ensure the native module is properly built and compatible.`
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return module;
|
|
81
112
|
}
|
|
82
113
|
function getBinding() {
|
|
83
114
|
if (bindingInitialized) {
|
|
115
|
+
if (binding === null) {
|
|
116
|
+
throw new Error("Native binding was previously failed to load.");
|
|
117
|
+
}
|
|
84
118
|
return binding;
|
|
85
119
|
}
|
|
86
120
|
try {
|
|
@@ -90,6 +124,7 @@ function getBinding() {
|
|
|
90
124
|
return binding;
|
|
91
125
|
}
|
|
92
126
|
} catch (error) {
|
|
127
|
+
bindingInitialized = true;
|
|
93
128
|
throw createNativeBindingError(error);
|
|
94
129
|
}
|
|
95
130
|
throw new Error(
|
|
@@ -98,7 +133,11 @@ function getBinding() {
|
|
|
98
133
|
}
|
|
99
134
|
function parseMetadata(metadataStr) {
|
|
100
135
|
try {
|
|
101
|
-
|
|
136
|
+
const parsed = JSON.parse(metadataStr);
|
|
137
|
+
if (typeof parsed === "object" && parsed !== null) {
|
|
138
|
+
return parsed;
|
|
139
|
+
}
|
|
140
|
+
return {};
|
|
102
141
|
} catch {
|
|
103
142
|
return {};
|
|
104
143
|
}
|
|
@@ -116,7 +155,7 @@ function ensureUint8Array(value) {
|
|
|
116
155
|
return new Uint8Array();
|
|
117
156
|
}
|
|
118
157
|
function convertChunk(rawChunk) {
|
|
119
|
-
if (!rawChunk) {
|
|
158
|
+
if (!rawChunk || typeof rawChunk !== "object") {
|
|
120
159
|
return {
|
|
121
160
|
content: "",
|
|
122
161
|
metadata: {
|
|
@@ -129,23 +168,33 @@ function convertChunk(rawChunk) {
|
|
|
129
168
|
embedding: null
|
|
130
169
|
};
|
|
131
170
|
}
|
|
132
|
-
const
|
|
171
|
+
const chunk = rawChunk;
|
|
172
|
+
const metadata = chunk["metadata"] ?? {};
|
|
133
173
|
return {
|
|
134
|
-
|
|
135
|
-
|
|
174
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
175
|
+
content: chunk["content"] ?? "",
|
|
176
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
177
|
+
embedding: chunk["embedding"] ?? null,
|
|
136
178
|
metadata: {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
179
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
180
|
+
byteStart: metadata["byte_start"] ?? metadata["charStart"] ?? 0,
|
|
181
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
182
|
+
byteEnd: metadata["byte_end"] ?? metadata["charEnd"] ?? 0,
|
|
183
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
184
|
+
tokenCount: metadata["token_count"] ?? metadata["tokenCount"] ?? null,
|
|
185
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
186
|
+
chunkIndex: metadata["chunk_index"] ?? metadata["chunkIndex"] ?? 0,
|
|
187
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
188
|
+
totalChunks: metadata["total_chunks"] ?? metadata["totalChunks"] ?? 0,
|
|
189
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
190
|
+
firstPage: metadata["first_page"] ?? metadata["firstPage"] ?? null,
|
|
191
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
192
|
+
lastPage: metadata["last_page"] ?? metadata["lastPage"] ?? null
|
|
144
193
|
}
|
|
145
194
|
};
|
|
146
195
|
}
|
|
147
196
|
function convertImage(rawImage) {
|
|
148
|
-
if (!rawImage) {
|
|
197
|
+
if (!rawImage || typeof rawImage !== "object") {
|
|
149
198
|
return {
|
|
150
199
|
data: new Uint8Array(),
|
|
151
200
|
format: "unknown",
|
|
@@ -160,31 +209,97 @@ function convertImage(rawImage) {
|
|
|
160
209
|
ocrResult: null
|
|
161
210
|
};
|
|
162
211
|
}
|
|
212
|
+
const image = rawImage;
|
|
163
213
|
return {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
214
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
215
|
+
data: ensureUint8Array(image["data"]),
|
|
216
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
217
|
+
format: image["format"] ?? "unknown",
|
|
218
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
219
|
+
imageIndex: image["imageIndex"] ?? 0,
|
|
220
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
221
|
+
pageNumber: image["pageNumber"] ?? null,
|
|
222
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
223
|
+
width: image["width"] ?? null,
|
|
224
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
225
|
+
height: image["height"] ?? null,
|
|
226
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
227
|
+
colorspace: image["colorspace"] ?? null,
|
|
228
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
229
|
+
bitsPerComponent: image["bitsPerComponent"] ?? null,
|
|
230
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
231
|
+
isMask: image["isMask"] ?? false,
|
|
232
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
233
|
+
description: image["description"] ?? null,
|
|
234
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
235
|
+
ocrResult: image["ocrResult"] ? convertResult(image["ocrResult"]) : null
|
|
175
236
|
};
|
|
176
237
|
}
|
|
177
|
-
function
|
|
238
|
+
function convertPageContent(rawPage) {
|
|
239
|
+
if (!rawPage || typeof rawPage !== "object") {
|
|
240
|
+
return {
|
|
241
|
+
pageNumber: 0,
|
|
242
|
+
content: "",
|
|
243
|
+
tables: [],
|
|
244
|
+
images: []
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
const page = rawPage;
|
|
178
248
|
return {
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
249
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
250
|
+
pageNumber: page["pageNumber"] ?? 0,
|
|
251
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
252
|
+
content: page["content"] ?? "",
|
|
253
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
254
|
+
tables: Array.isArray(page["tables"]) ? page["tables"] : [],
|
|
255
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
256
|
+
images: Array.isArray(page["images"]) ? page["images"].map((image) => convertImage(image)) : []
|
|
186
257
|
};
|
|
187
258
|
}
|
|
259
|
+
function convertResult(rawResult) {
|
|
260
|
+
if (!rawResult || typeof rawResult !== "object") {
|
|
261
|
+
return {
|
|
262
|
+
content: "",
|
|
263
|
+
mimeType: "application/octet-stream",
|
|
264
|
+
metadata: {},
|
|
265
|
+
tables: [],
|
|
266
|
+
detectedLanguages: null,
|
|
267
|
+
chunks: null,
|
|
268
|
+
images: null,
|
|
269
|
+
pages: null
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
const result = rawResult;
|
|
273
|
+
const metadata = result["metadata"];
|
|
274
|
+
const metadataValue = typeof metadata === "string" ? parseMetadata(metadata) : metadata ?? {};
|
|
275
|
+
const returnObj = {
|
|
276
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
277
|
+
content: result["content"] ?? "",
|
|
278
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
279
|
+
mimeType: result["mimeType"] ?? "application/octet-stream",
|
|
280
|
+
metadata: metadataValue,
|
|
281
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
282
|
+
tables: Array.isArray(result["tables"]) ? result["tables"] : [],
|
|
283
|
+
// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
|
|
284
|
+
detectedLanguages: Array.isArray(result["detectedLanguages"]) ? result["detectedLanguages"] : null,
|
|
285
|
+
chunks: null,
|
|
286
|
+
images: null,
|
|
287
|
+
pages: null
|
|
288
|
+
};
|
|
289
|
+
const chunksData = result["chunks"];
|
|
290
|
+
if (Array.isArray(chunksData)) {
|
|
291
|
+
returnObj.chunks = chunksData.map((chunk) => convertChunk(chunk));
|
|
292
|
+
}
|
|
293
|
+
const imagesData = result["images"];
|
|
294
|
+
if (Array.isArray(imagesData)) {
|
|
295
|
+
returnObj.images = imagesData.map((image) => convertImage(image));
|
|
296
|
+
}
|
|
297
|
+
const pagesData = result["pages"];
|
|
298
|
+
if (Array.isArray(pagesData)) {
|
|
299
|
+
returnObj.pages = pagesData.map((page) => convertPageContent(page));
|
|
300
|
+
}
|
|
301
|
+
return returnObj;
|
|
302
|
+
}
|
|
188
303
|
function setIfDefined(target, key, value) {
|
|
189
304
|
if (value !== void 0) {
|
|
190
305
|
target[key] = value;
|
|
@@ -347,9 +462,9 @@ function normalizePageConfig(pages) {
|
|
|
347
462
|
return void 0;
|
|
348
463
|
}
|
|
349
464
|
const normalized = {};
|
|
350
|
-
setIfDefined(normalized, "
|
|
351
|
-
setIfDefined(normalized, "
|
|
352
|
-
setIfDefined(normalized, "
|
|
465
|
+
setIfDefined(normalized, "extractPages", pages.extractPages);
|
|
466
|
+
setIfDefined(normalized, "insertPageMarkers", pages.insertPageMarkers);
|
|
467
|
+
setIfDefined(normalized, "markerFormat", pages.markerFormat);
|
|
353
468
|
return normalized;
|
|
354
469
|
}
|
|
355
470
|
function normalizeExtractionConfig(config) {
|
|
@@ -383,23 +498,59 @@ function normalizeExtractionConfig(config) {
|
|
|
383
498
|
setIfDefined(normalized, "htmlOptions", htmlOptions);
|
|
384
499
|
return normalized;
|
|
385
500
|
}
|
|
386
|
-
function extractFileSync(filePath,
|
|
501
|
+
function extractFileSync(filePath, mimeTypeOrConfig, maybeConfig) {
|
|
502
|
+
let mimeType = null;
|
|
503
|
+
let config = null;
|
|
504
|
+
if (typeof mimeTypeOrConfig === "string") {
|
|
505
|
+
mimeType = mimeTypeOrConfig;
|
|
506
|
+
config = maybeConfig ?? null;
|
|
507
|
+
} else if (mimeTypeOrConfig !== null && typeof mimeTypeOrConfig === "object") {
|
|
508
|
+
config = mimeTypeOrConfig;
|
|
509
|
+
mimeType = null;
|
|
510
|
+
} else {
|
|
511
|
+
config = maybeConfig ?? null;
|
|
512
|
+
mimeType = null;
|
|
513
|
+
}
|
|
387
514
|
const normalizedConfig = normalizeExtractionConfig(config);
|
|
388
515
|
const rawResult = getBinding().extractFileSync(filePath, mimeType, normalizedConfig);
|
|
389
516
|
return convertResult(rawResult);
|
|
390
517
|
}
|
|
391
|
-
async function extractFile(filePath,
|
|
518
|
+
async function extractFile(filePath, mimeTypeOrConfig, maybeConfig) {
|
|
519
|
+
let mimeType = null;
|
|
520
|
+
let config = null;
|
|
521
|
+
if (typeof mimeTypeOrConfig === "string") {
|
|
522
|
+
mimeType = mimeTypeOrConfig;
|
|
523
|
+
config = maybeConfig ?? null;
|
|
524
|
+
} else if (mimeTypeOrConfig !== null && typeof mimeTypeOrConfig === "object") {
|
|
525
|
+
config = mimeTypeOrConfig;
|
|
526
|
+
mimeType = null;
|
|
527
|
+
} else {
|
|
528
|
+
config = maybeConfig ?? null;
|
|
529
|
+
mimeType = null;
|
|
530
|
+
}
|
|
392
531
|
const normalizedConfig = normalizeExtractionConfig(config);
|
|
393
532
|
const rawResult = await getBinding().extractFile(filePath, mimeType, normalizedConfig);
|
|
394
533
|
return convertResult(rawResult);
|
|
395
534
|
}
|
|
396
|
-
function extractBytesSync(
|
|
535
|
+
function extractBytesSync(dataOrPath, mimeType, config = null) {
|
|
536
|
+
let data;
|
|
537
|
+
if (typeof dataOrPath === "string") {
|
|
538
|
+
data = readFileSync(dataOrPath);
|
|
539
|
+
} else {
|
|
540
|
+
data = dataOrPath;
|
|
541
|
+
}
|
|
397
542
|
const validated = assertUint8Array(data, "data");
|
|
398
543
|
const normalizedConfig = normalizeExtractionConfig(config);
|
|
399
544
|
const rawResult = getBinding().extractBytesSync(Buffer.from(validated), mimeType, normalizedConfig);
|
|
400
545
|
return convertResult(rawResult);
|
|
401
546
|
}
|
|
402
|
-
async function extractBytes(
|
|
547
|
+
async function extractBytes(dataOrPath, mimeType, config = null) {
|
|
548
|
+
let data;
|
|
549
|
+
if (typeof dataOrPath === "string") {
|
|
550
|
+
data = readFileSync(dataOrPath);
|
|
551
|
+
} else {
|
|
552
|
+
data = dataOrPath;
|
|
553
|
+
}
|
|
403
554
|
const validated = assertUint8Array(data, "data");
|
|
404
555
|
if (process.env["KREUZBERG_DEBUG_GUTEN"] === "1") {
|
|
405
556
|
console.log("[TypeScript] Debug input header:", Array.from(validated.slice(0, 8)));
|
|
@@ -439,8 +590,8 @@ async function batchExtractBytes(dataList, mimeTypes, config = null) {
|
|
|
439
590
|
function registerPostProcessor(processor) {
|
|
440
591
|
const binding2 = getBinding();
|
|
441
592
|
const wrappedProcessor = {
|
|
442
|
-
name: processor.name.
|
|
443
|
-
processingStage: processor.processingStage
|
|
593
|
+
name: typeof processor.name === "function" ? processor.name() : processor.name,
|
|
594
|
+
processingStage: typeof processor.processingStage === "function" ? processor.processingStage() : processor.processingStage,
|
|
444
595
|
async process(...args) {
|
|
445
596
|
const wrappedValue = args[0];
|
|
446
597
|
const jsonString = wrappedValue[0];
|
|
@@ -493,8 +644,8 @@ function listPostProcessors() {
|
|
|
493
644
|
function registerValidator(validator) {
|
|
494
645
|
const binding2 = getBinding();
|
|
495
646
|
const wrappedValidator = {
|
|
496
|
-
name: validator.name.
|
|
497
|
-
priority: validator.priority
|
|
647
|
+
name: typeof validator.name === "function" ? validator.name() : validator.name,
|
|
648
|
+
priority: typeof validator.priority === "function" ? validator.priority() : validator.priority,
|
|
498
649
|
async validate(...args) {
|
|
499
650
|
const jsonString = args[0];
|
|
500
651
|
if (!jsonString || jsonString === "undefined") {
|
|
@@ -543,8 +694,8 @@ function describePayload(value) {
|
|
|
543
694
|
function registerOcrBackend(backend) {
|
|
544
695
|
const binding2 = getBinding();
|
|
545
696
|
const wrappedBackend = {
|
|
546
|
-
name: backend.name.
|
|
547
|
-
supportedLanguages: backend.supportedLanguages.
|
|
697
|
+
name: typeof backend.name === "function" ? backend.name() : backend.name,
|
|
698
|
+
supportedLanguages: typeof backend.supportedLanguages === "function" ? backend.supportedLanguages() : backend.supportedLanguages ?? ["en"],
|
|
548
699
|
async processImage(...processArgs) {
|
|
549
700
|
const [imagePayload, maybeLanguage] = processArgs;
|
|
550
701
|
if (process.env["KREUZBERG_DEBUG_GUTEN"] === "1") {
|
|
@@ -673,11 +824,11 @@ const ExtractionConfig = {
|
|
|
673
824
|
};
|
|
674
825
|
function detectMimeType(bytes) {
|
|
675
826
|
const binding2 = getBinding();
|
|
676
|
-
return binding2.
|
|
827
|
+
return binding2.detectMimeTypeFromBytes(bytes);
|
|
677
828
|
}
|
|
678
|
-
function detectMimeTypeFromPath(
|
|
829
|
+
function detectMimeTypeFromPath(filePath, checkExists) {
|
|
679
830
|
const binding2 = getBinding();
|
|
680
|
-
return binding2.detectMimeTypeFromPath(
|
|
831
|
+
return binding2.detectMimeTypeFromPath(filePath, checkExists);
|
|
681
832
|
}
|
|
682
833
|
function validateMimeType(mimeType) {
|
|
683
834
|
const binding2 = getBinding();
|
|
@@ -693,7 +844,8 @@ function listEmbeddingPresets() {
|
|
|
693
844
|
}
|
|
694
845
|
function getEmbeddingPreset(name) {
|
|
695
846
|
const binding2 = getBinding();
|
|
696
|
-
|
|
847
|
+
const result = binding2.getEmbeddingPreset(name);
|
|
848
|
+
return result;
|
|
697
849
|
}
|
|
698
850
|
function getLastErrorCode() {
|
|
699
851
|
const binding2 = getBinding();
|
|
@@ -701,9 +853,70 @@ function getLastErrorCode() {
|
|
|
701
853
|
}
|
|
702
854
|
function getLastPanicContext() {
|
|
703
855
|
const binding2 = getBinding();
|
|
704
|
-
|
|
856
|
+
const result = binding2.getLastPanicContext();
|
|
857
|
+
return result;
|
|
858
|
+
}
|
|
859
|
+
function getErrorCodeName(code) {
|
|
860
|
+
const binding2 = getBinding();
|
|
861
|
+
return binding2.getErrorCodeName(code);
|
|
862
|
+
}
|
|
863
|
+
function getErrorCodeDescription(code) {
|
|
864
|
+
const binding2 = getBinding();
|
|
865
|
+
return binding2.getErrorCodeDescription(code);
|
|
866
|
+
}
|
|
867
|
+
function classifyError(errorMessage) {
|
|
868
|
+
const binding2 = getBinding();
|
|
869
|
+
const result = binding2.classifyError(errorMessage);
|
|
870
|
+
return result;
|
|
871
|
+
}
|
|
872
|
+
function createWorkerPool(size) {
|
|
873
|
+
const binding2 = getBinding();
|
|
874
|
+
const rawPool = binding2.createWorkerPool(size);
|
|
875
|
+
return rawPool;
|
|
876
|
+
}
|
|
877
|
+
function getWorkerPoolStats(pool) {
|
|
878
|
+
const binding2 = getBinding();
|
|
879
|
+
const rawStats = binding2.getWorkerPoolStats(pool);
|
|
880
|
+
return rawStats;
|
|
881
|
+
}
|
|
882
|
+
async function extractFileInWorker(pool, filePath, mimeTypeOrConfig, maybeConfig) {
|
|
883
|
+
let mimeType = null;
|
|
884
|
+
let config = null;
|
|
885
|
+
if (typeof mimeTypeOrConfig === "string") {
|
|
886
|
+
mimeType = mimeTypeOrConfig;
|
|
887
|
+
config = maybeConfig ?? null;
|
|
888
|
+
} else if (mimeTypeOrConfig !== null && typeof mimeTypeOrConfig === "object") {
|
|
889
|
+
config = mimeTypeOrConfig;
|
|
890
|
+
mimeType = null;
|
|
891
|
+
} else {
|
|
892
|
+
config = maybeConfig ?? null;
|
|
893
|
+
mimeType = null;
|
|
894
|
+
}
|
|
895
|
+
const normalizedConfig = normalizeExtractionConfig(config);
|
|
896
|
+
const binding2 = getBinding();
|
|
897
|
+
const rawResult = await binding2.extractFileInWorker(
|
|
898
|
+
pool,
|
|
899
|
+
filePath,
|
|
900
|
+
mimeType,
|
|
901
|
+
normalizedConfig
|
|
902
|
+
);
|
|
903
|
+
return convertResult(rawResult);
|
|
904
|
+
}
|
|
905
|
+
async function batchExtractFilesInWorker(pool, paths, config = null) {
|
|
906
|
+
const normalizedConfig = normalizeExtractionConfig(config);
|
|
907
|
+
const binding2 = getBinding();
|
|
908
|
+
const rawResults = await binding2.batchExtractFilesInWorker(
|
|
909
|
+
pool,
|
|
910
|
+
paths,
|
|
911
|
+
normalizedConfig
|
|
912
|
+
);
|
|
913
|
+
return rawResults.map(convertResult);
|
|
914
|
+
}
|
|
915
|
+
async function closeWorkerPool(pool) {
|
|
916
|
+
const binding2 = getBinding();
|
|
917
|
+
await binding2.closeWorkerPool(pool);
|
|
705
918
|
}
|
|
706
|
-
const __version__ = "4.0.0
|
|
919
|
+
const __version__ = "4.0.0";
|
|
707
920
|
export {
|
|
708
921
|
CacheError,
|
|
709
922
|
ErrorCode,
|
|
@@ -722,21 +935,29 @@ export {
|
|
|
722
935
|
batchExtractBytes,
|
|
723
936
|
batchExtractBytesSync,
|
|
724
937
|
batchExtractFiles,
|
|
938
|
+
batchExtractFilesInWorker,
|
|
725
939
|
batchExtractFilesSync,
|
|
940
|
+
classifyError,
|
|
726
941
|
clearDocumentExtractors,
|
|
727
942
|
clearOcrBackends,
|
|
728
943
|
clearPostProcessors,
|
|
729
944
|
clearValidators,
|
|
945
|
+
closeWorkerPool,
|
|
946
|
+
createWorkerPool,
|
|
730
947
|
detectMimeType,
|
|
731
948
|
detectMimeTypeFromPath,
|
|
732
949
|
extractBytes,
|
|
733
950
|
extractBytesSync,
|
|
734
951
|
extractFile,
|
|
952
|
+
extractFileInWorker,
|
|
735
953
|
extractFileSync,
|
|
736
954
|
getEmbeddingPreset,
|
|
955
|
+
getErrorCodeDescription,
|
|
956
|
+
getErrorCodeName,
|
|
737
957
|
getExtensionsForMime,
|
|
738
958
|
getLastErrorCode,
|
|
739
959
|
getLastPanicContext,
|
|
960
|
+
getWorkerPoolStats,
|
|
740
961
|
listDocumentExtractors,
|
|
741
962
|
listEmbeddingPresets,
|
|
742
963
|
listOcrBackends,
|