@kreuzberg/node 4.0.8 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,20 +1,14 @@
1
- import { readFileSync } from "node:fs";
2
- import { createRequire } from "node:module";
3
- import {
4
- CacheError,
5
- ErrorCode,
6
- ImageProcessingError,
7
- KreuzbergError,
8
- MissingDependencyError,
9
- OcrError,
10
- ParsingError,
11
- PluginError,
12
- ValidationError
13
- } from "./errors.js";
14
- import { GutenOcrBackend } from "./ocr/guten-ocr.js";
15
- export * from "./types.js";
16
- let binding = null;
17
- let bindingInitialized = false;
1
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
2
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
3
+ }) : x)(function(x) {
4
+ if (typeof require !== "undefined") return require.apply(this, arguments);
5
+ throw Error('Dynamic require of "' + x + '" is not supported');
6
+ });
7
+
8
+ // typescript/core/binding.ts
9
+ import { createRequire } from "module";
10
+ var binding = null;
11
+ var bindingInitialized = false;
18
12
  function createNativeBindingError(error) {
19
13
  const hintParts = [];
20
14
  let detail = "Unknown error while requiring native module.";
@@ -43,42 +37,13 @@ function createNativeBindingError(error) {
43
37
  ].join(" ")
44
38
  );
45
39
  }
46
- function assertUint8Array(value, name) {
47
- if (!(value instanceof Uint8Array)) {
48
- throw new TypeError(`${name} must be a Uint8Array`);
49
- }
50
- return value;
51
- }
52
- function assertUint8ArrayList(values, name) {
53
- if (!Array.isArray(values)) {
54
- throw new TypeError(`${name} must be an array of Uint8Array`);
55
- }
56
- const array = values;
57
- return array.map((value, index) => {
58
- try {
59
- return assertUint8Array(value, `${name}[${index}]`);
60
- } catch {
61
- throw new TypeError(`${name}[${index}] must be a Uint8Array`);
62
- }
63
- });
64
- }
65
- function __setBindingForTests(mock) {
66
- binding = mock;
67
- bindingInitialized = true;
68
- }
69
- function __resetBindingForTests() {
70
- binding = null;
71
- bindingInitialized = false;
72
- }
73
40
  function loadNativeBinding() {
74
41
  let localRequire;
75
- if (typeof require !== "undefined") {
76
- localRequire = require;
77
- } else {
78
- try {
79
- localRequire = createRequire(import.meta.url);
80
- } catch {
81
- localRequire = void 0;
42
+ try {
43
+ localRequire = createRequire(import.meta.url);
44
+ } catch {
45
+ if (typeof __require !== "undefined") {
46
+ localRequire = __require;
82
47
  }
83
48
  }
84
49
  if (!localRequire) {
@@ -131,175 +96,158 @@ function getBinding() {
131
96
  "Failed to load Kreuzberg bindings. Neither NAPI (Node.js) nor WASM (browsers/Deno) bindings are available. Make sure you have installed the @kreuzberg/node package for Node.js/Bun."
132
97
  );
133
98
  }
134
- function parseMetadata(metadataStr) {
135
- try {
136
- const parsed = JSON.parse(metadataStr);
137
- if (typeof parsed === "object" && parsed !== null) {
138
- return parsed;
139
- }
140
- return {};
141
- } catch {
142
- return {};
143
- }
99
+ function __setBindingForTests(mock) {
100
+ binding = mock;
101
+ bindingInitialized = true;
144
102
  }
145
- function ensureUint8Array(value) {
146
- if (value instanceof Uint8Array) {
147
- return value;
148
- }
149
- if (typeof Buffer !== "undefined" && value instanceof Buffer) {
150
- return new Uint8Array(value);
151
- }
152
- if (Array.isArray(value)) {
153
- return new Uint8Array(value);
154
- }
155
- return new Uint8Array();
103
+ function __resetBindingForTests() {
104
+ binding = null;
105
+ bindingInitialized = false;
156
106
  }
157
- function convertChunk(rawChunk) {
158
- if (!rawChunk || typeof rawChunk !== "object") {
159
- return {
160
- content: "",
161
- metadata: {
162
- byteStart: 0,
163
- byteEnd: 0,
164
- tokenCount: null,
165
- chunkIndex: 0,
166
- totalChunks: 0
167
- },
168
- embedding: null
169
- };
170
- }
171
- const chunk = rawChunk;
172
- const metadata = chunk["metadata"] ?? {};
173
- return {
174
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
175
- content: chunk["content"] ?? "",
176
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
177
- embedding: chunk["embedding"] ?? null,
178
- metadata: {
179
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
180
- byteStart: metadata["byte_start"] ?? metadata["charStart"] ?? 0,
181
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
182
- byteEnd: metadata["byte_end"] ?? metadata["charEnd"] ?? 0,
183
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
184
- tokenCount: metadata["token_count"] ?? metadata["tokenCount"] ?? null,
185
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
186
- chunkIndex: metadata["chunk_index"] ?? metadata["chunkIndex"] ?? 0,
187
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
188
- totalChunks: metadata["total_chunks"] ?? metadata["totalChunks"] ?? 0,
189
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
190
- firstPage: metadata["first_page"] ?? metadata["firstPage"] ?? null,
191
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
192
- lastPage: metadata["last_page"] ?? metadata["lastPage"] ?? null
193
- }
194
- };
107
+
108
+ // typescript/errors/diagnostics.ts
109
+ function getLastErrorCode() {
110
+ const binding2 = getBinding();
111
+ return binding2.getLastErrorCode();
195
112
  }
196
- function convertImage(rawImage) {
197
- if (!rawImage || typeof rawImage !== "object") {
198
- return {
199
- data: new Uint8Array(),
200
- format: "unknown",
201
- imageIndex: 0,
202
- pageNumber: null,
203
- width: null,
204
- height: null,
205
- colorspace: null,
206
- bitsPerComponent: null,
207
- isMask: false,
208
- description: null,
209
- ocrResult: null
210
- };
211
- }
212
- const image = rawImage;
213
- return {
214
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
215
- data: ensureUint8Array(image["data"]),
216
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
217
- format: image["format"] ?? "unknown",
218
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
219
- imageIndex: image["imageIndex"] ?? 0,
220
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
221
- pageNumber: image["pageNumber"] ?? null,
222
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
223
- width: image["width"] ?? null,
224
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
225
- height: image["height"] ?? null,
226
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
227
- colorspace: image["colorspace"] ?? null,
228
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
229
- bitsPerComponent: image["bitsPerComponent"] ?? null,
230
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
231
- isMask: image["isMask"] ?? false,
232
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
233
- description: image["description"] ?? null,
234
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
235
- ocrResult: image["ocrResult"] ? convertResult(image["ocrResult"]) : null
236
- };
113
+ function getLastPanicContext() {
114
+ const binding2 = getBinding();
115
+ const result = binding2.getLastPanicContext();
116
+ return result;
237
117
  }
238
- function convertPageContent(rawPage) {
239
- if (!rawPage || typeof rawPage !== "object") {
118
+ function getErrorCodeName(code) {
119
+ const binding2 = getBinding();
120
+ return binding2.getErrorCodeName(code);
121
+ }
122
+ function getErrorCodeDescription(code) {
123
+ const binding2 = getBinding();
124
+ return binding2.getErrorCodeDescription(code);
125
+ }
126
+ function classifyError(errorMessage) {
127
+ const binding2 = getBinding();
128
+ const result = binding2.classifyError(errorMessage);
129
+ return result;
130
+ }
131
+
132
+ // typescript/errors.ts
133
+ var ErrorCode = /* @__PURE__ */ ((ErrorCode2) => {
134
+ ErrorCode2[ErrorCode2["Success"] = 0] = "Success";
135
+ ErrorCode2[ErrorCode2["GenericError"] = 1] = "GenericError";
136
+ ErrorCode2[ErrorCode2["Panic"] = 2] = "Panic";
137
+ ErrorCode2[ErrorCode2["InvalidArgument"] = 3] = "InvalidArgument";
138
+ ErrorCode2[ErrorCode2["IoError"] = 4] = "IoError";
139
+ ErrorCode2[ErrorCode2["ParsingError"] = 5] = "ParsingError";
140
+ ErrorCode2[ErrorCode2["OcrError"] = 6] = "OcrError";
141
+ ErrorCode2[ErrorCode2["MissingDependency"] = 7] = "MissingDependency";
142
+ return ErrorCode2;
143
+ })(ErrorCode || {});
144
+ var KreuzbergError = class _KreuzbergError extends Error {
145
+ /**
146
+ * Panic context if error was caused by a panic in native code.
147
+ * Will be null for non-panic errors.
148
+ */
149
+ panicContext;
150
+ constructor(message, panicContext) {
151
+ super(message);
152
+ this.name = "KreuzbergError";
153
+ this.panicContext = panicContext ?? null;
154
+ Object.setPrototypeOf(this, _KreuzbergError.prototype);
155
+ }
156
+ toJSON() {
240
157
  return {
241
- pageNumber: 0,
242
- content: "",
243
- tables: [],
244
- images: []
158
+ name: this.name,
159
+ message: this.message,
160
+ panicContext: this.panicContext,
161
+ stack: this.stack
245
162
  };
246
163
  }
247
- const page = rawPage;
248
- return {
249
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
250
- pageNumber: page["pageNumber"] ?? 0,
251
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
252
- content: page["content"] ?? "",
253
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
254
- tables: Array.isArray(page["tables"]) ? page["tables"] : [],
255
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
256
- images: Array.isArray(page["images"]) ? page["images"].map((image) => convertImage(image)) : []
257
- };
258
- }
259
- function convertResult(rawResult) {
260
- if (!rawResult || typeof rawResult !== "object") {
164
+ };
165
+ var ValidationError = class _ValidationError extends KreuzbergError {
166
+ constructor(message, panicContext) {
167
+ super(message, panicContext);
168
+ this.name = "ValidationError";
169
+ Object.setPrototypeOf(this, _ValidationError.prototype);
170
+ }
171
+ };
172
+ var ParsingError = class _ParsingError extends KreuzbergError {
173
+ constructor(message, panicContext) {
174
+ super(message, panicContext);
175
+ this.name = "ParsingError";
176
+ Object.setPrototypeOf(this, _ParsingError.prototype);
177
+ }
178
+ };
179
+ var OcrError = class _OcrError extends KreuzbergError {
180
+ constructor(message, panicContext) {
181
+ super(message, panicContext);
182
+ this.name = "OcrError";
183
+ Object.setPrototypeOf(this, _OcrError.prototype);
184
+ }
185
+ };
186
+ var CacheError = class _CacheError extends KreuzbergError {
187
+ constructor(message, panicContext) {
188
+ super(message, panicContext);
189
+ this.name = "CacheError";
190
+ Object.setPrototypeOf(this, _CacheError.prototype);
191
+ }
192
+ };
193
+ var ImageProcessingError = class _ImageProcessingError extends KreuzbergError {
194
+ constructor(message, panicContext) {
195
+ super(message, panicContext);
196
+ this.name = "ImageProcessingError";
197
+ Object.setPrototypeOf(this, _ImageProcessingError.prototype);
198
+ }
199
+ };
200
+ var PluginError = class _PluginError extends KreuzbergError {
201
+ /**
202
+ * Name of the plugin that threw the error.
203
+ */
204
+ pluginName;
205
+ constructor(message, pluginName, panicContext) {
206
+ super(`Plugin error in '${pluginName}': ${message}`, panicContext);
207
+ this.name = "PluginError";
208
+ this.pluginName = pluginName;
209
+ Object.setPrototypeOf(this, _PluginError.prototype);
210
+ }
211
+ toJSON() {
261
212
  return {
262
- content: "",
263
- mimeType: "application/octet-stream",
264
- metadata: {},
265
- tables: [],
266
- detectedLanguages: null,
267
- chunks: null,
268
- images: null,
269
- pages: null
213
+ name: this.name,
214
+ message: this.message,
215
+ pluginName: this.pluginName,
216
+ panicContext: this.panicContext,
217
+ stack: this.stack
270
218
  };
271
219
  }
272
- const result = rawResult;
273
- const metadata = result["metadata"];
274
- const metadataValue = typeof metadata === "string" ? parseMetadata(metadata) : metadata ?? {};
275
- const returnObj = {
276
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
277
- content: result["content"] ?? "",
278
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
279
- mimeType: result["mimeType"] ?? "application/octet-stream",
280
- metadata: metadataValue,
281
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
282
- tables: Array.isArray(result["tables"]) ? result["tables"] : [],
283
- // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
284
- detectedLanguages: Array.isArray(result["detectedLanguages"]) ? result["detectedLanguages"] : null,
285
- chunks: null,
286
- images: null,
287
- pages: null
288
- };
289
- const chunksData = result["chunks"];
290
- if (Array.isArray(chunksData)) {
291
- returnObj.chunks = chunksData.map((chunk) => convertChunk(chunk));
220
+ };
221
+ var MissingDependencyError = class _MissingDependencyError extends KreuzbergError {
222
+ constructor(message, panicContext) {
223
+ super(message, panicContext);
224
+ this.name = "MissingDependencyError";
225
+ Object.setPrototypeOf(this, _MissingDependencyError.prototype);
292
226
  }
293
- const imagesData = result["images"];
294
- if (Array.isArray(imagesData)) {
295
- returnObj.images = imagesData.map((image) => convertImage(image));
227
+ };
228
+
229
+ // typescript/core/assertions.ts
230
+ function assertUint8Array(value, name) {
231
+ if (!(value instanceof Uint8Array)) {
232
+ throw new TypeError(`${name} must be a Uint8Array`);
296
233
  }
297
- const pagesData = result["pages"];
298
- if (Array.isArray(pagesData)) {
299
- returnObj.pages = pagesData.map((page) => convertPageContent(page));
234
+ return value;
235
+ }
236
+ function assertUint8ArrayList(values, name) {
237
+ if (!Array.isArray(values)) {
238
+ throw new TypeError(`${name} must be an array of Uint8Array`);
300
239
  }
301
- return returnObj;
240
+ const array = values;
241
+ return array.map((value, index) => {
242
+ try {
243
+ return assertUint8Array(value, `${name}[${index}]`);
244
+ } catch {
245
+ throw new TypeError(`${name}[${index}] must be a Uint8Array`);
246
+ }
247
+ });
302
248
  }
249
+
250
+ // typescript/core/config-normalizer.ts
303
251
  function setIfDefined(target, key, value) {
304
252
  if (value !== void 0) {
305
253
  target[key] = value;
@@ -457,47 +405,251 @@ function normalizeKeywordConfig(config) {
457
405
  setIfDefined(normalized, "rakeParams", config.rakeParams);
458
406
  return normalized;
459
407
  }
460
- function normalizePageConfig(pages) {
461
- if (!pages) {
462
- return void 0;
408
+ function normalizePageConfig(pages) {
409
+ if (!pages) {
410
+ return void 0;
411
+ }
412
+ const normalized = {};
413
+ setIfDefined(normalized, "extractPages", pages.extractPages);
414
+ setIfDefined(normalized, "insertPageMarkers", pages.insertPageMarkers);
415
+ setIfDefined(normalized, "markerFormat", pages.markerFormat);
416
+ return normalized;
417
+ }
418
+ function normalizeExtractionConfig(config) {
419
+ if (!config) {
420
+ return null;
421
+ }
422
+ const normalized = {};
423
+ setIfDefined(normalized, "useCache", config.useCache);
424
+ setIfDefined(normalized, "enableQualityProcessing", config.enableQualityProcessing);
425
+ setIfDefined(normalized, "forceOcr", config.forceOcr);
426
+ setIfDefined(normalized, "maxConcurrentExtractions", config.maxConcurrentExtractions);
427
+ const ocr = normalizeOcrConfig(config.ocr);
428
+ setIfDefined(normalized, "ocr", ocr);
429
+ const chunking = normalizeChunkingConfig(config.chunking);
430
+ setIfDefined(normalized, "chunking", chunking);
431
+ const images = normalizeImageExtractionConfig(config.images);
432
+ setIfDefined(normalized, "images", images);
433
+ const pdf = normalizePdfConfig(config.pdfOptions);
434
+ setIfDefined(normalized, "pdfOptions", pdf);
435
+ const tokenReduction = normalizeTokenReductionConfig(config.tokenReduction);
436
+ setIfDefined(normalized, "tokenReduction", tokenReduction);
437
+ const languageDetection = normalizeLanguageDetectionConfig(config.languageDetection);
438
+ setIfDefined(normalized, "languageDetection", languageDetection);
439
+ const postprocessor = normalizePostProcessorConfig(config.postprocessor);
440
+ setIfDefined(normalized, "postprocessor", postprocessor);
441
+ const keywords = normalizeKeywordConfig(config.keywords);
442
+ setIfDefined(normalized, "keywords", keywords);
443
+ const pages = normalizePageConfig(config.pages);
444
+ setIfDefined(normalized, "pages", pages);
445
+ const htmlOptions = normalizeHtmlOptions(config.htmlOptions);
446
+ setIfDefined(normalized, "htmlOptions", htmlOptions);
447
+ return normalized;
448
+ }
449
+
450
+ // typescript/core/type-converters.ts
451
+ function parseMetadata(metadataStr) {
452
+ try {
453
+ const parsed = JSON.parse(metadataStr);
454
+ if (typeof parsed === "object" && parsed !== null) {
455
+ return parsed;
456
+ }
457
+ return {};
458
+ } catch {
459
+ return {};
460
+ }
461
+ }
462
+ function ensureUint8Array(value) {
463
+ if (value instanceof Uint8Array) {
464
+ return value;
465
+ }
466
+ if (typeof Buffer !== "undefined" && value instanceof Buffer) {
467
+ return new Uint8Array(value);
468
+ }
469
+ if (Array.isArray(value)) {
470
+ return new Uint8Array(value);
471
+ }
472
+ return new Uint8Array();
473
+ }
474
+ function convertChunk(rawChunk) {
475
+ if (!rawChunk || typeof rawChunk !== "object") {
476
+ return {
477
+ content: "",
478
+ metadata: {
479
+ byteStart: 0,
480
+ byteEnd: 0,
481
+ tokenCount: null,
482
+ chunkIndex: 0,
483
+ totalChunks: 0
484
+ },
485
+ embedding: null
486
+ };
487
+ }
488
+ const chunk = rawChunk;
489
+ const metadata = chunk["metadata"] ?? {};
490
+ return {
491
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
492
+ content: chunk["content"] ?? "",
493
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
494
+ embedding: chunk["embedding"] ?? null,
495
+ metadata: {
496
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
497
+ byteStart: metadata["byte_start"] ?? metadata["charStart"] ?? 0,
498
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
499
+ byteEnd: metadata["byte_end"] ?? metadata["charEnd"] ?? 0,
500
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
501
+ tokenCount: metadata["token_count"] ?? metadata["tokenCount"] ?? null,
502
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
503
+ chunkIndex: metadata["chunk_index"] ?? metadata["chunkIndex"] ?? 0,
504
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
505
+ totalChunks: metadata["total_chunks"] ?? metadata["totalChunks"] ?? 0,
506
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
507
+ firstPage: metadata["first_page"] ?? metadata["firstPage"] ?? null,
508
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
509
+ lastPage: metadata["last_page"] ?? metadata["lastPage"] ?? null
510
+ }
511
+ };
512
+ }
513
+ function convertImage(rawImage) {
514
+ if (!rawImage || typeof rawImage !== "object") {
515
+ return {
516
+ data: new Uint8Array(),
517
+ format: "unknown",
518
+ imageIndex: 0,
519
+ pageNumber: null,
520
+ width: null,
521
+ height: null,
522
+ colorspace: null,
523
+ bitsPerComponent: null,
524
+ isMask: false,
525
+ description: null,
526
+ ocrResult: null
527
+ };
528
+ }
529
+ const image = rawImage;
530
+ return {
531
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
532
+ data: ensureUint8Array(image["data"]),
533
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
534
+ format: image["format"] ?? "unknown",
535
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
536
+ imageIndex: image["imageIndex"] ?? 0,
537
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
538
+ pageNumber: image["pageNumber"] ?? null,
539
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
540
+ width: image["width"] ?? null,
541
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
542
+ height: image["height"] ?? null,
543
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
544
+ colorspace: image["colorspace"] ?? null,
545
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
546
+ bitsPerComponent: image["bitsPerComponent"] ?? null,
547
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
548
+ isMask: image["isMask"] ?? false,
549
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
550
+ description: image["description"] ?? null,
551
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
552
+ ocrResult: image["ocrResult"] ? convertResult(image["ocrResult"]) : null
553
+ };
554
+ }
555
+ function convertPageContent(rawPage) {
556
+ if (!rawPage || typeof rawPage !== "object") {
557
+ return {
558
+ pageNumber: 0,
559
+ content: "",
560
+ tables: [],
561
+ images: []
562
+ };
563
+ }
564
+ const page = rawPage;
565
+ return {
566
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
567
+ pageNumber: page["pageNumber"] ?? 0,
568
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
569
+ content: page["content"] ?? "",
570
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
571
+ tables: Array.isArray(page["tables"]) ? page["tables"] : [],
572
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
573
+ images: Array.isArray(page["images"]) ? page["images"].map((image) => convertImage(image)) : []
574
+ };
575
+ }
576
+ function convertResult(rawResult) {
577
+ if (!rawResult || typeof rawResult !== "object") {
578
+ return {
579
+ content: "",
580
+ mimeType: "application/octet-stream",
581
+ metadata: {},
582
+ tables: [],
583
+ detectedLanguages: null,
584
+ chunks: null,
585
+ images: null,
586
+ pages: null
587
+ };
588
+ }
589
+ const result = rawResult;
590
+ const metadata = result["metadata"];
591
+ const metadataValue = typeof metadata === "string" ? parseMetadata(metadata) : metadata ?? {};
592
+ const returnObj = {
593
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
594
+ content: result["content"] ?? "",
595
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
596
+ mimeType: result["mimeType"] ?? "application/octet-stream",
597
+ metadata: metadataValue,
598
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
599
+ tables: Array.isArray(result["tables"]) ? result["tables"] : [],
600
+ // biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noPropertyAccessFromIndexSignature
601
+ detectedLanguages: Array.isArray(result["detectedLanguages"]) ? result["detectedLanguages"] : null,
602
+ chunks: null,
603
+ images: null,
604
+ pages: null
605
+ };
606
+ const chunksData = result["chunks"];
607
+ if (Array.isArray(chunksData)) {
608
+ returnObj.chunks = chunksData.map((chunk) => convertChunk(chunk));
609
+ }
610
+ const imagesData = result["images"];
611
+ if (Array.isArray(imagesData)) {
612
+ returnObj.images = imagesData.map((image) => convertImage(image));
613
+ }
614
+ const pagesData = result["pages"];
615
+ if (Array.isArray(pagesData)) {
616
+ returnObj.pages = pagesData.map((page) => convertPageContent(page));
617
+ }
618
+ return returnObj;
619
+ }
620
+
621
+ // typescript/extraction/batch.ts
622
+ function batchExtractFilesSync(paths, config = null) {
623
+ const normalizedConfig = normalizeExtractionConfig(config);
624
+ const rawResults = getBinding().batchExtractFilesSync(paths, normalizedConfig);
625
+ return rawResults.map(convertResult);
626
+ }
627
+ async function batchExtractFiles(paths, config = null) {
628
+ const normalizedConfig = normalizeExtractionConfig(config);
629
+ const rawResults = await getBinding().batchExtractFiles(paths, normalizedConfig);
630
+ return rawResults.map(convertResult);
631
+ }
632
+ function batchExtractBytesSync(dataList, mimeTypes, config = null) {
633
+ const buffers = assertUint8ArrayList(dataList, "dataList").map((data) => Buffer.from(data));
634
+ if (buffers.length !== mimeTypes.length) {
635
+ throw new TypeError("dataList and mimeTypes must have the same length");
463
636
  }
464
- const normalized = {};
465
- setIfDefined(normalized, "extractPages", pages.extractPages);
466
- setIfDefined(normalized, "insertPageMarkers", pages.insertPageMarkers);
467
- setIfDefined(normalized, "markerFormat", pages.markerFormat);
468
- return normalized;
637
+ const normalizedConfig = normalizeExtractionConfig(config);
638
+ const rawResults = getBinding().batchExtractBytesSync(buffers, mimeTypes, normalizedConfig);
639
+ return rawResults.map(convertResult);
469
640
  }
470
- function normalizeExtractionConfig(config) {
471
- if (!config) {
472
- return null;
641
+ async function batchExtractBytes(dataList, mimeTypes, config = null) {
642
+ const buffers = assertUint8ArrayList(dataList, "dataList").map((data) => Buffer.from(data));
643
+ if (buffers.length !== mimeTypes.length) {
644
+ throw new TypeError("dataList and mimeTypes must have the same length");
473
645
  }
474
- const normalized = {};
475
- setIfDefined(normalized, "useCache", config.useCache);
476
- setIfDefined(normalized, "enableQualityProcessing", config.enableQualityProcessing);
477
- setIfDefined(normalized, "forceOcr", config.forceOcr);
478
- setIfDefined(normalized, "maxConcurrentExtractions", config.maxConcurrentExtractions);
479
- const ocr = normalizeOcrConfig(config.ocr);
480
- setIfDefined(normalized, "ocr", ocr);
481
- const chunking = normalizeChunkingConfig(config.chunking);
482
- setIfDefined(normalized, "chunking", chunking);
483
- const images = normalizeImageExtractionConfig(config.images);
484
- setIfDefined(normalized, "images", images);
485
- const pdf = normalizePdfConfig(config.pdfOptions);
486
- setIfDefined(normalized, "pdfOptions", pdf);
487
- const tokenReduction = normalizeTokenReductionConfig(config.tokenReduction);
488
- setIfDefined(normalized, "tokenReduction", tokenReduction);
489
- const languageDetection = normalizeLanguageDetectionConfig(config.languageDetection);
490
- setIfDefined(normalized, "languageDetection", languageDetection);
491
- const postprocessor = normalizePostProcessorConfig(config.postprocessor);
492
- setIfDefined(normalized, "postprocessor", postprocessor);
493
- const keywords = normalizeKeywordConfig(config.keywords);
494
- setIfDefined(normalized, "keywords", keywords);
495
- const pages = normalizePageConfig(config.pages);
496
- setIfDefined(normalized, "pages", pages);
497
- const htmlOptions = normalizeHtmlOptions(config.htmlOptions);
498
- setIfDefined(normalized, "htmlOptions", htmlOptions);
499
- return normalized;
646
+ const normalizedConfig = normalizeExtractionConfig(config);
647
+ const rawResults = await getBinding().batchExtractBytes(buffers, mimeTypes, normalizedConfig);
648
+ return rawResults.map(convertResult);
500
649
  }
650
+
651
+ // typescript/extraction/single.ts
652
+ import { readFileSync } from "fs";
501
653
  function extractFileSync(filePath, mimeTypeOrConfig, maybeConfig) {
502
654
  let mimeType = null;
503
655
  let config = null;
@@ -559,34 +711,57 @@ async function extractBytes(dataOrPath, mimeType, config = null) {
559
711
  const rawResult = await getBinding().extractBytes(Buffer.from(validated), mimeType, normalizedConfig);
560
712
  return convertResult(rawResult);
561
713
  }
562
- function batchExtractFilesSync(paths, config = null) {
563
- const normalizedConfig = normalizeExtractionConfig(config);
564
- const rawResults = getBinding().batchExtractFilesSync(paths, normalizedConfig);
565
- return rawResults.map(convertResult);
714
+
715
+ // typescript/extraction/worker-pool.ts
716
+ function createWorkerPool(size) {
717
+ const binding2 = getBinding();
718
+ const rawPool = binding2.createWorkerPool(size);
719
+ return rawPool;
566
720
  }
567
- async function batchExtractFiles(paths, config = null) {
568
- const normalizedConfig = normalizeExtractionConfig(config);
569
- const rawResults = await getBinding().batchExtractFiles(paths, normalizedConfig);
570
- return rawResults.map(convertResult);
721
+ function getWorkerPoolStats(pool) {
722
+ const binding2 = getBinding();
723
+ const rawStats = binding2.getWorkerPoolStats(pool);
724
+ return rawStats;
571
725
  }
572
- function batchExtractBytesSync(dataList, mimeTypes, config = null) {
573
- const buffers = assertUint8ArrayList(dataList, "dataList").map((data) => Buffer.from(data));
574
- if (buffers.length !== mimeTypes.length) {
575
- throw new TypeError("dataList and mimeTypes must have the same length");
726
+ async function extractFileInWorker(pool, filePath, mimeTypeOrConfig, maybeConfig) {
727
+ let mimeType = null;
728
+ let config = null;
729
+ if (typeof mimeTypeOrConfig === "string") {
730
+ mimeType = mimeTypeOrConfig;
731
+ config = maybeConfig ?? null;
732
+ } else if (mimeTypeOrConfig !== null && typeof mimeTypeOrConfig === "object") {
733
+ config = mimeTypeOrConfig;
734
+ mimeType = null;
735
+ } else {
736
+ config = maybeConfig ?? null;
737
+ mimeType = null;
576
738
  }
577
739
  const normalizedConfig = normalizeExtractionConfig(config);
578
- const rawResults = getBinding().batchExtractBytesSync(buffers, mimeTypes, normalizedConfig);
579
- return rawResults.map(convertResult);
740
+ const binding2 = getBinding();
741
+ const rawResult = await binding2.extractFileInWorker(
742
+ pool,
743
+ filePath,
744
+ mimeType,
745
+ normalizedConfig
746
+ );
747
+ return convertResult(rawResult);
580
748
  }
581
- async function batchExtractBytes(dataList, mimeTypes, config = null) {
582
- const buffers = assertUint8ArrayList(dataList, "dataList").map((data) => Buffer.from(data));
583
- if (buffers.length !== mimeTypes.length) {
584
- throw new TypeError("dataList and mimeTypes must have the same length");
585
- }
749
+ async function batchExtractFilesInWorker(pool, paths, config = null) {
586
750
  const normalizedConfig = normalizeExtractionConfig(config);
587
- const rawResults = await getBinding().batchExtractBytes(buffers, mimeTypes, normalizedConfig);
751
+ const binding2 = getBinding();
752
+ const rawResults = await binding2.batchExtractFilesInWorker(
753
+ pool,
754
+ paths,
755
+ normalizedConfig
756
+ );
588
757
  return rawResults.map(convertResult);
589
758
  }
759
+ async function closeWorkerPool(pool) {
760
+ const binding2 = getBinding();
761
+ await binding2.closeWorkerPool(pool);
762
+ }
763
+
764
+ // typescript/plugins/post-processors.ts
590
765
  function registerPostProcessor(processor) {
591
766
  const binding2 = getBinding();
592
767
  const wrappedProcessor = {
@@ -641,6 +816,8 @@ function listPostProcessors() {
641
816
  const binding2 = getBinding();
642
817
  return binding2.listPostProcessors();
643
818
  }
819
+
820
+ // typescript/plugins/validators.ts
644
821
  function registerValidator(validator) {
645
822
  const binding2 = getBinding();
646
823
  const wrappedValidator = {
@@ -679,6 +856,204 @@ function listValidators() {
679
856
  const binding2 = getBinding();
680
857
  return binding2.listValidators();
681
858
  }
859
+
860
+ // typescript/ocr/guten-ocr.ts
861
+ var GutenOcrBackend = class {
862
+ ocr = null;
863
+ ocrModule = null;
864
+ options;
865
+ /**
866
+ * Create a new Guten OCR backend.
867
+ *
868
+ * @param options - Optional configuration for Guten OCR
869
+ * @param options.models - Custom model paths (default: uses bundled models)
870
+ * @param options.isDebug - Enable debug mode (default: false)
871
+ * @param options.debugOutputDir - Directory for debug output (default: undefined)
872
+ * @param options.onnxOptions - Custom ONNX Runtime options (default: undefined)
873
+ *
874
+ * @example
875
+ * ```typescript
876
+ * // Default configuration
877
+ * const backend = new GutenOcrBackend();
878
+ *
879
+ * // With debug enabled
880
+ * const debugBackend = new GutenOcrBackend({
881
+ * isDebug: true,
882
+ * debugOutputDir: './ocr_debug'
883
+ * });
884
+ * ```
885
+ */
886
+ constructor(options) {
887
+ if (options !== void 0) {
888
+ this.options = options;
889
+ }
890
+ }
891
+ /**
892
+ * Get the backend name.
893
+ *
894
+ * @returns Backend name ("guten-ocr")
895
+ */
896
+ name() {
897
+ return "guten-ocr";
898
+ }
899
+ /**
900
+ * Get list of supported language codes.
901
+ *
902
+ * Guten OCR supports multiple languages depending on the model configuration.
903
+ * The default models support English and Chinese.
904
+ *
905
+ * @returns Array of ISO 639-1/2 language codes
906
+ */
907
+ supportedLanguages() {
908
+ return ["en", "eng", "ch_sim", "ch_tra", "chinese"];
909
+ }
910
+ /**
911
+ * Initialize the OCR backend.
912
+ *
913
+ * This method loads the Guten OCR module and creates an OCR instance.
914
+ * Call this before using processImage().
915
+ *
916
+ * @throws {Error} If @gutenye/ocr-node is not installed
917
+ * @throws {Error} If OCR initialization fails
918
+ *
919
+ * @example
920
+ * ```typescript
921
+ * const backend = new GutenOcrBackend();
922
+ * await backend.initialize();
923
+ * ```
924
+ */
925
+ async initialize() {
926
+ if (this.ocr !== null) {
927
+ return;
928
+ }
929
+ try {
930
+ this.ocrModule = await import("@gutenye/ocr-node").then((m) => m.default || m);
931
+ } catch (e) {
932
+ const error = e;
933
+ throw new Error(
934
+ `Guten OCR support requires the '@gutenye/ocr-node' package. Install with: npm install @gutenye/ocr-node. Error: ${error.message}`
935
+ );
936
+ }
937
+ try {
938
+ this.ocr = await this.ocrModule?.create(this.options) ?? null;
939
+ } catch (e) {
940
+ const error = e;
941
+ throw new Error(`Failed to initialize Guten OCR: ${error.message}`);
942
+ }
943
+ }
944
+ /**
945
+ * Shutdown the backend and release resources.
946
+ *
947
+ * This method cleans up all resources associated with the backend,
948
+ * including the GutenOCR instance and module references.
949
+ *
950
+ * @example
951
+ * ```typescript
952
+ * const backend = new GutenOcrBackend();
953
+ * await backend.initialize();
954
+ * // ... use backend ...
955
+ * await backend.shutdown();
956
+ * ```
957
+ */
958
+ async shutdown() {
959
+ if (this.ocr !== null) {
960
+ this.ocr = null;
961
+ }
962
+ if (this.ocrModule !== null) {
963
+ this.ocrModule = null;
964
+ }
965
+ }
966
+ /**
967
+ * Process image bytes and extract text using Guten OCR.
968
+ *
969
+ * This method:
970
+ * 1. Decodes the image using sharp (if pixel data is needed) or passes bytes directly
971
+ * 2. Runs OCR detection to find text regions
972
+ * 3. Runs OCR recognition on each text region
973
+ * 4. Returns extracted text with metadata
974
+ *
975
+ * @param imageBytes - Raw image data (PNG, JPEG, TIFF, etc.)
976
+ * @param language - Language code (must be in supportedLanguages())
977
+ * @returns Promise resolving to OCR result with content and metadata
978
+ *
979
+ * @throws {Error} If backend is not initialized
980
+ * @throws {Error} If OCR processing fails
981
+ *
982
+ * @example
983
+ * ```typescript
984
+ * import { readFile } from 'fs/promises';
985
+ *
986
+ * const backend = new GutenOcrBackend();
987
+ * await backend.initialize();
988
+ *
989
+ * const imageBytes = await readFile('scanned.png');
990
+ * const result = await backend.processImage(imageBytes, 'en');
991
+ * console.log(result.content);
992
+ * console.log(result.metadata.confidence);
993
+ * ```
994
+ */
995
+ async processImage(imageBytes, language) {
996
+ if (this.ocr === null) {
997
+ await this.initialize();
998
+ }
999
+ if (this.ocr === null) {
1000
+ throw new Error("Guten OCR backend failed to initialize");
1001
+ }
1002
+ try {
1003
+ const buffer = typeof imageBytes === "string" ? Buffer.from(imageBytes, "base64") : Buffer.from(imageBytes);
1004
+ const debugEnv = process.env["KREUZBERG_DEBUG_GUTEN"];
1005
+ if (debugEnv === "1") {
1006
+ const header = Array.from(buffer.subarray(0, 8));
1007
+ console.log("[Guten OCR] Debug input header:", header);
1008
+ console.log(
1009
+ "[Guten OCR] Buffer?",
1010
+ Buffer.isBuffer(buffer),
1011
+ "constructor",
1012
+ imageBytes?.constructor?.name,
1013
+ "length",
1014
+ buffer.length,
1015
+ "type",
1016
+ typeof imageBytes
1017
+ );
1018
+ }
1019
+ let width = 0;
1020
+ let height = 0;
1021
+ try {
1022
+ const sharpModule = await import("sharp");
1023
+ const sharp = sharpModule.default || sharpModule;
1024
+ const image = sharp(buffer);
1025
+ const metadata = await image.metadata();
1026
+ const metadataRecord = metadata;
1027
+ width = metadataRecord["width"] ?? 0;
1028
+ height = metadataRecord["height"] ?? 0;
1029
+ } catch (metadataError) {
1030
+ const error = metadataError;
1031
+ console.warn(`[Guten OCR] Unable to read image metadata via sharp: ${error.message}`);
1032
+ }
1033
+ const result = await this.ocr.detect(buffer);
1034
+ const textLines = result.map((line) => line.text);
1035
+ const content = textLines.join("\n");
1036
+ const avgConfidence = result.length > 0 ? result.reduce((sum, line) => sum + line.mean, 0) / result.length : 0;
1037
+ return {
1038
+ content,
1039
+ mime_type: "text/plain",
1040
+ metadata: {
1041
+ width,
1042
+ height,
1043
+ confidence: avgConfidence,
1044
+ text_regions: result.length,
1045
+ language
1046
+ },
1047
+ tables: []
1048
+ };
1049
+ } catch (e) {
1050
+ const error = e;
1051
+ throw new Error(`Guten OCR processing failed: ${error.message}`);
1052
+ }
1053
+ }
1054
+ };
1055
+
1056
+ // typescript/plugins/ocr-backends.ts
682
1057
  function isOcrProcessTuple(value) {
683
1058
  return Array.isArray(value) && value.length === 2 && typeof value[1] === "string" && (typeof value[0] === "string" || Buffer.isBuffer(value[0]) || value[0] instanceof Uint8Array);
684
1059
  }
@@ -748,6 +1123,8 @@ function clearOcrBackends() {
748
1123
  const binding2 = getBinding();
749
1124
  binding2.clearOcrBackends();
750
1125
  }
1126
+
1127
+ // typescript/registry/document-extractors.ts
751
1128
  function listDocumentExtractors() {
752
1129
  const binding2 = getBinding();
753
1130
  return binding2.listDocumentExtractors();
@@ -760,7 +1137,9 @@ function clearDocumentExtractors() {
760
1137
  const binding2 = getBinding();
761
1138
  binding2.clearDocumentExtractors();
762
1139
  }
763
- const ExtractionConfig = {
1140
+
1141
+ // typescript/config/loader.ts
1142
+ var ExtractionConfig = {
764
1143
  /**
765
1144
  * Load extraction configuration from a file.
766
1145
  *
@@ -822,6 +1201,18 @@ const ExtractionConfig = {
822
1201
  return binding2.discoverExtractionConfig();
823
1202
  }
824
1203
  };
1204
+ function loadConfigFile(filePath) {
1205
+ return ExtractionConfig.fromFile(filePath);
1206
+ }
1207
+ function loadConfigFromPath(path) {
1208
+ try {
1209
+ return ExtractionConfig.fromFile(path);
1210
+ } catch {
1211
+ return ExtractionConfig.discover();
1212
+ }
1213
+ }
1214
+
1215
+ // typescript/mime/utilities.ts
825
1216
  function detectMimeType(bytes) {
826
1217
  const binding2 = getBinding();
827
1218
  return binding2.detectMimeTypeFromBytes(bytes);
@@ -838,6 +1229,8 @@ function getExtensionsForMime(mimeType) {
838
1229
  const binding2 = getBinding();
839
1230
  return binding2.getExtensionsForMime(mimeType);
840
1231
  }
1232
+
1233
+ // typescript/embeddings/presets.ts
841
1234
  function listEmbeddingPresets() {
842
1235
  const binding2 = getBinding();
843
1236
  return binding2.listEmbeddingPresets();
@@ -847,76 +1240,9 @@ function getEmbeddingPreset(name) {
847
1240
  const result = binding2.getEmbeddingPreset(name);
848
1241
  return result;
849
1242
  }
850
- function getLastErrorCode() {
851
- const binding2 = getBinding();
852
- return binding2.getLastErrorCode();
853
- }
854
- function getLastPanicContext() {
855
- const binding2 = getBinding();
856
- const result = binding2.getLastPanicContext();
857
- return result;
858
- }
859
- function getErrorCodeName(code) {
860
- const binding2 = getBinding();
861
- return binding2.getErrorCodeName(code);
862
- }
863
- function getErrorCodeDescription(code) {
864
- const binding2 = getBinding();
865
- return binding2.getErrorCodeDescription(code);
866
- }
867
- function classifyError(errorMessage) {
868
- const binding2 = getBinding();
869
- const result = binding2.classifyError(errorMessage);
870
- return result;
871
- }
872
- function createWorkerPool(size) {
873
- const binding2 = getBinding();
874
- const rawPool = binding2.createWorkerPool(size);
875
- return rawPool;
876
- }
877
- function getWorkerPoolStats(pool) {
878
- const binding2 = getBinding();
879
- const rawStats = binding2.getWorkerPoolStats(pool);
880
- return rawStats;
881
- }
882
- async function extractFileInWorker(pool, filePath, mimeTypeOrConfig, maybeConfig) {
883
- let mimeType = null;
884
- let config = null;
885
- if (typeof mimeTypeOrConfig === "string") {
886
- mimeType = mimeTypeOrConfig;
887
- config = maybeConfig ?? null;
888
- } else if (mimeTypeOrConfig !== null && typeof mimeTypeOrConfig === "object") {
889
- config = mimeTypeOrConfig;
890
- mimeType = null;
891
- } else {
892
- config = maybeConfig ?? null;
893
- mimeType = null;
894
- }
895
- const normalizedConfig = normalizeExtractionConfig(config);
896
- const binding2 = getBinding();
897
- const rawResult = await binding2.extractFileInWorker(
898
- pool,
899
- filePath,
900
- mimeType,
901
- normalizedConfig
902
- );
903
- return convertResult(rawResult);
904
- }
905
- async function batchExtractFilesInWorker(pool, paths, config = null) {
906
- const normalizedConfig = normalizeExtractionConfig(config);
907
- const binding2 = getBinding();
908
- const rawResults = await binding2.batchExtractFilesInWorker(
909
- pool,
910
- paths,
911
- normalizedConfig
912
- );
913
- return rawResults.map(convertResult);
914
- }
915
- async function closeWorkerPool(pool) {
916
- const binding2 = getBinding();
917
- await binding2.closeWorkerPool(pool);
918
- }
919
- const __version__ = "4.0.8";
1243
+
1244
+ // typescript/index.ts
1245
+ var __version__ = "4.1.1";
920
1246
  export {
921
1247
  CacheError,
922
1248
  ErrorCode,
@@ -963,6 +1289,8 @@ export {
963
1289
  listOcrBackends,
964
1290
  listPostProcessors,
965
1291
  listValidators,
1292
+ loadConfigFile,
1293
+ loadConfigFromPath,
966
1294
  registerOcrBackend,
967
1295
  registerPostProcessor,
968
1296
  registerValidator,