@kreuzberg/wasm 4.4.5 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.5" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.0" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -33,6 +33,9 @@
33
33
  <a href="https://rubygems.org/gems/kreuzberg">
34
34
  <img src="https://img.shields.io/gem/v/kreuzberg?label=Ruby&color=007ec6" alt="Ruby">
35
35
  </a>
36
+ <a href="https://kreuzberg-dev.r-universe.dev/kreuzberg">
37
+ <img src="https://img.shields.io/badge/R-kreuzberg-007ec6" alt="R">
38
+ </a>
36
39
  <a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
37
40
  <img src="https://img.shields.io/badge/Docker-007ec6?logo=docker&logoColor=white" alt="Docker">
38
41
  </a>
@@ -44,6 +47,9 @@
44
47
  <a href="https://docs.kreuzberg.dev">
45
48
  <img src="https://img.shields.io/badge/docs-kreuzberg.dev-blue" alt="Documentation">
46
49
  </a>
50
+ <a href="https://huggingface.co/Kreuzberg">
51
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-yellow" alt="Hugging Face">
52
+ </a>
47
53
  </div>
48
54
 
49
55
  <img width="1128" height="191" alt="Banner2" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
@@ -55,9 +61,7 @@
55
61
  </div>
56
62
 
57
63
 
58
- Extract text, tables, images, and metadata from 75+ file formats including PDF, Office documents, and images. WebAssembly bindings for browsers, Deno, and Cloudflare Workers with portable deployment and multi-threading support.
59
-
60
- > **Full Feature Parity** — The WASM package supports all extraction capabilities at full parity with native bindings: PDF (via PDFium), Excel/spreadsheets (via Calamine), archives (ZIP, TAR, 7z, GZIP), and OCR (via built-in Tesseract-WASM). No external dependencies required.
64
+ Extract text, tables, images, and metadata from 88+ file formats including PDF, Office documents, and images. WebAssembly bindings for browsers, Deno, and Cloudflare Workers with portable deployment and multi-threading support.
61
65
 
62
66
 
63
67
  ## Installation
@@ -97,7 +101,7 @@ yarn add @kreuzberg/wasm
97
101
  ### System Requirements
98
102
 
99
103
  - Modern browser with WebAssembly support, or Deno 1.0+, or Cloudflare Workers
100
- - OCR is built-in via Tesseract-WASM (enable at runtime with `enableOcr()`)
104
+ - Optional: [Tesseract WASM](https://github.com/naptha/tesseract.js) for OCR functionality
101
105
 
102
106
 
103
107
 
@@ -176,40 +180,6 @@ extractWithOcr().catch(console.error);
176
180
  See [Table Extraction Guide](https://kreuzberg.dev/features/table-extraction/) for detailed examples.
177
181
 
178
182
 
179
- #### Excel/Spreadsheet Extraction
180
-
181
- Extract structured data from Excel files directly in the browser or server-side runtimes:
182
-
183
- ```ts
184
- import { extractBytes, initWasm } from "@kreuzberg/wasm";
185
-
186
- async function extractSpreadsheet() {
187
- await initWasm();
188
-
189
- const bytes = new Uint8Array(
190
- await fetch("report.xlsx").then((r) => r.arrayBuffer()),
191
- );
192
-
193
- const result = await extractBytes(
194
- bytes,
195
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
196
- );
197
-
198
- console.log("Spreadsheet content:");
199
- console.log(result.content);
200
-
201
- if (result.tables && result.tables.length > 0) {
202
- result.tables.forEach((table, index) => {
203
- console.log(`\nSheet ${index + 1}:`);
204
- console.log(table.markdown);
205
- });
206
- }
207
- }
208
-
209
- extractSpreadsheet().catch(console.error);
210
- ```
211
-
212
-
213
183
 
214
184
  #### Processing Multiple Files
215
185
 
@@ -301,19 +271,21 @@ extractDocuments(fileBytes, mimes)
301
271
 
302
272
  ## Features
303
273
 
304
- ### Supported File Formats (75+)
274
+ ### Supported File Formats (88+)
305
275
 
306
- 75+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
276
+ 88+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
307
277
 
308
278
  #### Office Documents
309
279
 
310
280
  | Category | Formats | Capabilities |
311
281
  |----------|---------|--------------|
312
- | **Word Processing** | `.docx`, `.odt` | Full text, tables, images, metadata, styles |
313
- | **Spreadsheets** | `.xlsx`, `.xlsm`, `.xlsb`, `.xls`, `.xla`, `.xlam`, `.xltm`, `.ods` | Sheet data, formulas, cell metadata, charts |
314
- | **Presentations** | `.pptx`, `.ppt`, `.ppsx` | Slides, speaker notes, images, metadata |
282
+ | **Word Processing** | `.docx`, `.docm`, `.dotx`, `.dotm`, `.dot`, `.odt` | Full text, tables, images, metadata, styles |
283
+ | **Spreadsheets** | `.xlsx`, `.xlsm`, `.xlsb`, `.xls`, `.xla`, `.xlam`, `.xltm`, `.xltx`, `.xlt`, `.ods` | Sheet data, formulas, cell metadata, charts |
284
+ | **Presentations** | `.pptx`, `.pptm`, `.ppsx`, `.potx`, `.potm`, `.pot`, `.ppt` | Slides, speaker notes, images, metadata |
315
285
  | **PDF** | `.pdf` | Text, tables, images, metadata, OCR support |
316
286
  | **eBooks** | `.epub`, `.fb2` | Chapters, metadata, embedded resources |
287
+ | **Database** | `.dbf` | Table data extraction, field type support |
288
+ | **Hangul** | `.hwp`, `.hwpx` | Korean document format, text extraction |
317
289
 
318
290
  #### Images (OCR-Enabled)
319
291
 
@@ -354,10 +326,14 @@ extractDocuments(fileBytes, mimes)
354
326
  - **Metadata Extraction** - Retrieve document properties, creation date, author, etc.
355
327
  - **Table Extraction** - Parse tables with structure and cell content preservation
356
328
  - **Image Extraction** - Extract embedded images and render page previews
357
- - **OCR Support** - Built-in Tesseract-WASM for scanned documents and images
358
- - **Full Feature Parity** - All extraction capabilities at parity with native bindings: PDF, Excel, archives, OCR, and 75+ formats
329
+ - **OCR Support** - Integrate multiple OCR backends for scanned documents
330
+
359
331
  - **Async/Await** - Non-blocking document processing with concurrent operations
332
+
333
+
360
334
  - **Plugin System** - Extensible post-processing for custom text transformation
335
+
336
+
361
337
  - **Batch Processing** - Efficiently process multiple documents in parallel
362
338
  - **Memory Efficient** - Stream large files without loading entirely into memory
363
339
  - **Language Detection** - Detect and support multiple languages in documents
@@ -29,6 +29,7 @@ export class ModuleInfo {
29
29
  * * `dataList: Uint8Array[]` - Array of document bytes
30
30
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
31
31
  * * `config?: object` - Optional extraction configuration (applied to all)
32
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
32
33
  *
33
34
  * # Returns
34
35
  *
@@ -36,7 +37,8 @@ export class ModuleInfo {
36
37
  *
37
38
  * # Throws
38
39
  *
39
- * Rejects if dataList and mimeTypes lengths don't match.
40
+ * Rejects if dataList and mimeTypes lengths don't match, or if fileConfigs
41
+ * is provided and its length doesn't match dataList.
40
42
  *
41
43
  * # Example
42
44
  *
@@ -57,21 +59,26 @@ export class ModuleInfo {
57
59
  * ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
58
60
  * null
59
61
  * );
62
+ *
63
+ * // With per-file configs:
64
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null];
65
+ * const results2 = await batchExtractBytes(buffers, mimeTypes, null, fileConfigs);
60
66
  * ```
61
67
  */
62
- export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[], config?: any | null): Promise<any>;
68
+ export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[], config?: any | null, file_configs?: any[] | null): Promise<any>;
63
69
 
64
70
  /**
65
71
  * Batch extract from multiple byte arrays (synchronous).
66
72
  *
67
73
  * Processes multiple document byte arrays in parallel. All documents use the
68
- * same extraction configuration.
74
+ * same extraction configuration unless per-file configs are provided.
69
75
  *
70
76
  * # JavaScript Parameters
71
77
  *
72
78
  * * `dataList: Uint8Array[]` - Array of document bytes
73
79
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
74
80
  * * `config?: object` - Optional extraction configuration (applied to all)
81
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
75
82
  *
76
83
  * # Returns
77
84
  *
@@ -79,7 +86,8 @@ export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[],
79
86
  *
80
87
  * # Throws
81
88
  *
82
- * Throws if dataList and mimeTypes lengths don't match.
89
+ * Throws if dataList and mimeTypes lengths don't match, or if fileConfigs
90
+ * is provided and its length doesn't match dataList.
83
91
  *
84
92
  * # Example
85
93
  *
@@ -93,9 +101,13 @@ export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[],
93
101
  * results.forEach((result, i) => {
94
102
  * console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
95
103
  * });
104
+ *
105
+ * // With per-file configs:
106
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null, null];
107
+ * const results2 = batchExtractBytesSync(buffers, mimeTypes, null, fileConfigs);
96
108
  * ```
97
109
  */
98
- export function batchExtractBytesSync(data_list: Uint8Array[], mime_types: string[], config?: any | null): any;
110
+ export function batchExtractBytesSync(data_list: Uint8Array[], mime_types: string[], config?: any | null, file_configs?: any[] | null): any;
99
111
 
100
112
  /**
101
113
  * Batch extract from multiple Files or Blobs (asynchronous).
@@ -55,6 +55,7 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
55
55
  * * `dataList: Uint8Array[]` - Array of document bytes
56
56
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
57
57
  * * `config?: object` - Optional extraction configuration (applied to all)
58
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
58
59
  *
59
60
  * # Returns
60
61
  *
@@ -62,7 +63,8 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
62
63
  *
63
64
  * # Throws
64
65
  *
65
- * Rejects if dataList and mimeTypes lengths don't match.
66
+ * Rejects if dataList and mimeTypes lengths don't match, or if fileConfigs
67
+ * is provided and its length doesn't match dataList.
66
68
  *
67
69
  * # Example
68
70
  *
@@ -83,18 +85,25 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
83
85
  * ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
84
86
  * null
85
87
  * );
88
+ *
89
+ * // With per-file configs:
90
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null];
91
+ * const results2 = await batchExtractBytes(buffers, mimeTypes, null, fileConfigs);
86
92
  * ```
87
93
  * @param {Uint8Array[]} data_list
88
94
  * @param {string[]} mime_types
89
95
  * @param {any | null} [config]
96
+ * @param {any[] | null} [file_configs]
90
97
  * @returns {Promise<any>}
91
98
  */
92
- export function batchExtractBytes(data_list, mime_types, config) {
99
+ export function batchExtractBytes(data_list, mime_types, config, file_configs) {
93
100
  const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
94
101
  const len0 = WASM_VECTOR_LEN;
95
102
  const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
96
103
  const len1 = WASM_VECTOR_LEN;
97
- const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config));
104
+ var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
105
+ var len2 = WASM_VECTOR_LEN;
106
+ const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
98
107
  return ret;
99
108
  }
100
109
 
@@ -102,13 +111,14 @@ export function batchExtractBytes(data_list, mime_types, config) {
102
111
  * Batch extract from multiple byte arrays (synchronous).
103
112
  *
104
113
  * Processes multiple document byte arrays in parallel. All documents use the
105
- * same extraction configuration.
114
+ * same extraction configuration unless per-file configs are provided.
106
115
  *
107
116
  * # JavaScript Parameters
108
117
  *
109
118
  * * `dataList: Uint8Array[]` - Array of document bytes
110
119
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
111
120
  * * `config?: object` - Optional extraction configuration (applied to all)
121
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
112
122
  *
113
123
  * # Returns
114
124
  *
@@ -116,7 +126,8 @@ export function batchExtractBytes(data_list, mime_types, config) {
116
126
  *
117
127
  * # Throws
118
128
  *
119
- * Throws if dataList and mimeTypes lengths don't match.
129
+ * Throws if dataList and mimeTypes lengths don't match, or if fileConfigs
130
+ * is provided and its length doesn't match dataList.
120
131
  *
121
132
  * # Example
122
133
  *
@@ -130,18 +141,25 @@ export function batchExtractBytes(data_list, mime_types, config) {
130
141
  * results.forEach((result, i) => {
131
142
  * console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
132
143
  * });
144
+ *
145
+ * // With per-file configs:
146
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null, null];
147
+ * const results2 = batchExtractBytesSync(buffers, mimeTypes, null, fileConfigs);
133
148
  * ```
134
149
  * @param {Uint8Array[]} data_list
135
150
  * @param {string[]} mime_types
136
151
  * @param {any | null} [config]
152
+ * @param {any[] | null} [file_configs]
137
153
  * @returns {any}
138
154
  */
139
- export function batchExtractBytesSync(data_list, mime_types, config) {
155
+ export function batchExtractBytesSync(data_list, mime_types, config, file_configs) {
140
156
  const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
141
157
  const len0 = WASM_VECTOR_LEN;
142
158
  const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
143
159
  const len1 = WASM_VECTOR_LEN;
144
- const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config));
160
+ var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
161
+ var len2 = WASM_VECTOR_LEN;
162
+ const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
145
163
  if (ret[2]) {
146
164
  throw takeFromExternrefTable0(ret[1]);
147
165
  }
@@ -1345,6 +1363,10 @@ function __wbg_get_imports() {
1345
1363
  const ret = typeof(arg0) === 'function';
1346
1364
  return ret;
1347
1365
  },
1366
+ __wbg___wbindgen_is_null_0b605fc6b167c56f: function(arg0) {
1367
+ const ret = arg0 === null;
1368
+ return ret;
1369
+ },
1348
1370
  __wbg___wbindgen_is_object_781bc9f159099513: function(arg0) {
1349
1371
  const val = arg0;
1350
1372
  const ret = typeof(val) === 'object' && val !== null;
@@ -1448,6 +1470,9 @@ function __wbg_get_imports() {
1448
1470
  __wbg_getRandomValues_3f44b700395062e5: function() { return handleError(function (arg0, arg1) {
1449
1471
  globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
1450
1472
  }, arguments); },
1473
+ __wbg_getRandomValues_76dfc69825c9c552: function() { return handleError(function (arg0, arg1) {
1474
+ globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
1475
+ }, arguments); },
1451
1476
  __wbg_getTime_1dad7b5386ddd2d9: function(arg0) {
1452
1477
  const ret = arg0.getTime();
1453
1478
  return ret;
@@ -1575,7 +1600,7 @@ function __wbg_get_imports() {
1575
1600
  const a = state0.a;
1576
1601
  state0.a = 0;
1577
1602
  try {
1578
- return wasm_bindgen_179aebea8749b817___convert__closures_____invoke___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined_______true_(a, state0.b, arg0, arg1);
1603
+ return wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2(a, state0.b, arg0, arg1);
1579
1604
  } finally {
1580
1605
  state0.a = a;
1581
1606
  }
@@ -1597,7 +1622,7 @@ function __wbg_get_imports() {
1597
1622
  const a = state0.a;
1598
1623
  state0.a = 0;
1599
1624
  try {
1600
- return wasm_bindgen_179aebea8749b817___convert__closures_____invoke___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined_______true_(a, state0.b, arg0, arg1);
1625
+ return wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2(a, state0.b, arg0, arg1);
1601
1626
  } finally {
1602
1627
  state0.a = a;
1603
1628
  }
@@ -1760,13 +1785,13 @@ function __wbg_get_imports() {
1760
1785
  console.warn(arg0);
1761
1786
  },
1762
1787
  __wbindgen_cast_0000000000000001: function(arg0, arg1) {
1763
- // Cast intrinsic for `Closure(Closure { dtor_idx: 21, function: Function { arguments: [Externref], shim_idx: 23, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
1764
- const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen_179aebea8749b817___closure__destroy___dyn_core_dde6c4b55a98adc4___ops__function__FnMut__wasm_bindgen_179aebea8749b817___JsValue____Output_______, wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue______true_);
1788
+ // Cast intrinsic for `Closure(Closure { dtor_idx: 3637, function: Function { arguments: [Externref], shim_idx: 3638, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
1789
+ const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hcbea9f36b367d3d8, wasm_bindgen__convert__closures_____invoke__h240eec0b57535315);
1765
1790
  return ret;
1766
1791
  },
1767
1792
  __wbindgen_cast_0000000000000002: function(arg0, arg1) {
1768
- // Cast intrinsic for `Closure(Closure { dtor_idx: 3543, function: Function { arguments: [Externref], shim_idx: 3544, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
1769
- const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen_179aebea8749b817___closure__destroy___dyn_core_dde6c4b55a98adc4___ops__function__FnMut__wasm_bindgen_179aebea8749b817___JsValue____Output___core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___, wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue__core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___true_);
1793
+ // Cast intrinsic for `Closure(Closure { dtor_idx: 4, function: Function { arguments: [Externref], shim_idx: 5, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
1794
+ const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__h60b03db66f836c8f, wasm_bindgen__convert__closures_____invoke__h4c966767d2820559);
1770
1795
  return ret;
1771
1796
  },
1772
1797
  __wbindgen_cast_0000000000000003: function(arg0) {
@@ -1808,19 +1833,19 @@ function __wbg_get_imports() {
1808
1833
  };
1809
1834
  }
1810
1835
 
1811
- function wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue______true_(arg0, arg1, arg2) {
1812
- wasm.wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue______true_(arg0, arg1, arg2);
1836
+ function wasm_bindgen__convert__closures_____invoke__h4c966767d2820559(arg0, arg1, arg2) {
1837
+ wasm.wasm_bindgen__convert__closures_____invoke__h4c966767d2820559(arg0, arg1, arg2);
1813
1838
  }
1814
1839
 
1815
- function wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue__core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___true_(arg0, arg1, arg2) {
1816
- const ret = wasm.wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue__core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___true_(arg0, arg1, arg2);
1840
+ function wasm_bindgen__convert__closures_____invoke__h240eec0b57535315(arg0, arg1, arg2) {
1841
+ const ret = wasm.wasm_bindgen__convert__closures_____invoke__h240eec0b57535315(arg0, arg1, arg2);
1817
1842
  if (ret[1]) {
1818
1843
  throw takeFromExternrefTable0(ret[0]);
1819
1844
  }
1820
1845
  }
1821
1846
 
1822
- function wasm_bindgen_179aebea8749b817___convert__closures_____invoke___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined_______true_(arg0, arg1, arg2, arg3) {
1823
- wasm.wasm_bindgen_179aebea8749b817___convert__closures_____invoke___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined_______true_(arg0, arg1, arg2, arg3);
1847
+ function wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2(arg0, arg1, arg2, arg3) {
1848
+ wasm.wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2(arg0, arg1, arg2, arg3);
1824
1849
  }
1825
1850
 
1826
1851
  const ModuleInfoFinalization = (typeof FinalizationRegistry === 'undefined')
@@ -53,6 +53,7 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
53
53
  * * `dataList: Uint8Array[]` - Array of document bytes
54
54
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
55
55
  * * `config?: object` - Optional extraction configuration (applied to all)
56
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
56
57
  *
57
58
  * # Returns
58
59
  *
@@ -60,7 +61,8 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
60
61
  *
61
62
  * # Throws
62
63
  *
63
- * Rejects if dataList and mimeTypes lengths don't match.
64
+ * Rejects if dataList and mimeTypes lengths don't match, or if fileConfigs
65
+ * is provided and its length doesn't match dataList.
64
66
  *
65
67
  * # Example
66
68
  *
@@ -81,18 +83,25 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
81
83
  * ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
82
84
  * null
83
85
  * );
86
+ *
87
+ * // With per-file configs:
88
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null];
89
+ * const results2 = await batchExtractBytes(buffers, mimeTypes, null, fileConfigs);
84
90
  * ```
85
91
  * @param {Uint8Array[]} data_list
86
92
  * @param {string[]} mime_types
87
93
  * @param {any | null} [config]
94
+ * @param {any[] | null} [file_configs]
88
95
  * @returns {Promise<any>}
89
96
  */
90
- export function batchExtractBytes(data_list, mime_types, config) {
97
+ export function batchExtractBytes(data_list, mime_types, config, file_configs) {
91
98
  const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
92
99
  const len0 = WASM_VECTOR_LEN;
93
100
  const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
94
101
  const len1 = WASM_VECTOR_LEN;
95
- const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config));
102
+ var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
103
+ var len2 = WASM_VECTOR_LEN;
104
+ const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
96
105
  return ret;
97
106
  }
98
107
 
@@ -100,13 +109,14 @@ export function batchExtractBytes(data_list, mime_types, config) {
100
109
  * Batch extract from multiple byte arrays (synchronous).
101
110
  *
102
111
  * Processes multiple document byte arrays in parallel. All documents use the
103
- * same extraction configuration.
112
+ * same extraction configuration unless per-file configs are provided.
104
113
  *
105
114
  * # JavaScript Parameters
106
115
  *
107
116
  * * `dataList: Uint8Array[]` - Array of document bytes
108
117
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
109
118
  * * `config?: object` - Optional extraction configuration (applied to all)
119
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
110
120
  *
111
121
  * # Returns
112
122
  *
@@ -114,7 +124,8 @@ export function batchExtractBytes(data_list, mime_types, config) {
114
124
  *
115
125
  * # Throws
116
126
  *
117
- * Throws if dataList and mimeTypes lengths don't match.
127
+ * Throws if dataList and mimeTypes lengths don't match, or if fileConfigs
128
+ * is provided and its length doesn't match dataList.
118
129
  *
119
130
  * # Example
120
131
  *
@@ -128,18 +139,25 @@ export function batchExtractBytes(data_list, mime_types, config) {
128
139
  * results.forEach((result, i) => {
129
140
  * console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
130
141
  * });
142
+ *
143
+ * // With per-file configs:
144
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null, null];
145
+ * const results2 = batchExtractBytesSync(buffers, mimeTypes, null, fileConfigs);
131
146
  * ```
132
147
  * @param {Uint8Array[]} data_list
133
148
  * @param {string[]} mime_types
134
149
  * @param {any | null} [config]
150
+ * @param {any[] | null} [file_configs]
135
151
  * @returns {any}
136
152
  */
137
- export function batchExtractBytesSync(data_list, mime_types, config) {
153
+ export function batchExtractBytesSync(data_list, mime_types, config, file_configs) {
138
154
  const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
139
155
  const len0 = WASM_VECTOR_LEN;
140
156
  const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
141
157
  const len1 = WASM_VECTOR_LEN;
142
- const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config));
158
+ var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
159
+ var len2 = WASM_VECTOR_LEN;
160
+ const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
143
161
  if (ret[2]) {
144
162
  throw takeFromExternrefTable0(ret[1]);
145
163
  }
@@ -1251,6 +1269,10 @@ export function __wbg___wbindgen_is_function_3c846841762788c1(arg0) {
1251
1269
  const ret = typeof(arg0) === 'function';
1252
1270
  return ret;
1253
1271
  }
1272
+ export function __wbg___wbindgen_is_null_0b605fc6b167c56f(arg0) {
1273
+ const ret = arg0 === null;
1274
+ return ret;
1275
+ }
1254
1276
  export function __wbg___wbindgen_is_object_781bc9f159099513(arg0) {
1255
1277
  const val = arg0;
1256
1278
  const ret = typeof(val) === 'object' && val !== null;
@@ -1354,6 +1376,9 @@ export function __wbg_from_4bdf88943703fd48(arg0) {
1354
1376
  export function __wbg_getRandomValues_3f44b700395062e5() { return handleError(function (arg0, arg1) {
1355
1377
  globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
1356
1378
  }, arguments); }
1379
+ export function __wbg_getRandomValues_76dfc69825c9c552() { return handleError(function (arg0, arg1) {
1380
+ globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
1381
+ }, arguments); }
1357
1382
  export function __wbg_getTime_1dad7b5386ddd2d9(arg0) {
1358
1383
  const ret = arg0.getTime();
1359
1384
  return ret;
@@ -1481,7 +1506,7 @@ export function __wbg_new_d098e265629cd10f(arg0, arg1) {
1481
1506
  const a = state0.a;
1482
1507
  state0.a = 0;
1483
1508
  try {
1484
- return wasm_bindgen_179aebea8749b817___convert__closures_____invoke___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined_______true_(a, state0.b, arg0, arg1);
1509
+ return wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2(a, state0.b, arg0, arg1);
1485
1510
  } finally {
1486
1511
  state0.a = a;
1487
1512
  }
@@ -1503,7 +1528,7 @@ export function __wbg_new_typed_aaaeaf29cf802876(arg0, arg1) {
1503
1528
  const a = state0.a;
1504
1529
  state0.a = 0;
1505
1530
  try {
1506
- return wasm_bindgen_179aebea8749b817___convert__closures_____invoke___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined_______true_(a, state0.b, arg0, arg1);
1531
+ return wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2(a, state0.b, arg0, arg1);
1507
1532
  } finally {
1508
1533
  state0.a = a;
1509
1534
  }
@@ -1666,13 +1691,13 @@ export function __wbg_warn_69424c2d92a2fa73(arg0) {
1666
1691
  console.warn(arg0);
1667
1692
  }
1668
1693
  export function __wbindgen_cast_0000000000000001(arg0, arg1) {
1669
- // Cast intrinsic for `Closure(Closure { dtor_idx: 21, function: Function { arguments: [Externref], shim_idx: 23, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
1670
- const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen_179aebea8749b817___closure__destroy___dyn_core_dde6c4b55a98adc4___ops__function__FnMut__wasm_bindgen_179aebea8749b817___JsValue____Output_______, wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue______true_);
1694
+ // Cast intrinsic for `Closure(Closure { dtor_idx: 3637, function: Function { arguments: [Externref], shim_idx: 3638, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
1695
+ const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hcbea9f36b367d3d8, wasm_bindgen__convert__closures_____invoke__h240eec0b57535315);
1671
1696
  return ret;
1672
1697
  }
1673
1698
  export function __wbindgen_cast_0000000000000002(arg0, arg1) {
1674
- // Cast intrinsic for `Closure(Closure { dtor_idx: 3543, function: Function { arguments: [Externref], shim_idx: 3544, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
1675
- const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen_179aebea8749b817___closure__destroy___dyn_core_dde6c4b55a98adc4___ops__function__FnMut__wasm_bindgen_179aebea8749b817___JsValue____Output___core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___, wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue__core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___true_);
1699
+ // Cast intrinsic for `Closure(Closure { dtor_idx: 4, function: Function { arguments: [Externref], shim_idx: 5, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
1700
+ const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__h60b03db66f836c8f, wasm_bindgen__convert__closures_____invoke__h4c966767d2820559);
1676
1701
  return ret;
1677
1702
  }
1678
1703
  export function __wbindgen_cast_0000000000000003(arg0) {
@@ -1705,19 +1730,19 @@ export function __wbindgen_init_externref_table() {
1705
1730
  table.set(offset + 2, true);
1706
1731
  table.set(offset + 3, false);
1707
1732
  }
1708
- function wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue______true_(arg0, arg1, arg2) {
1709
- wasm.wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue______true_(arg0, arg1, arg2);
1733
+ function wasm_bindgen__convert__closures_____invoke__h4c966767d2820559(arg0, arg1, arg2) {
1734
+ wasm.wasm_bindgen__convert__closures_____invoke__h4c966767d2820559(arg0, arg1, arg2);
1710
1735
  }
1711
1736
 
1712
- function wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue__core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___true_(arg0, arg1, arg2) {
1713
- const ret = wasm.wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue__core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___true_(arg0, arg1, arg2);
1737
+ function wasm_bindgen__convert__closures_____invoke__h240eec0b57535315(arg0, arg1, arg2) {
1738
+ const ret = wasm.wasm_bindgen__convert__closures_____invoke__h240eec0b57535315(arg0, arg1, arg2);
1714
1739
  if (ret[1]) {
1715
1740
  throw takeFromExternrefTable0(ret[0]);
1716
1741
  }
1717
1742
  }
1718
1743
 
1719
- function wasm_bindgen_179aebea8749b817___convert__closures_____invoke___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined_______true_(arg0, arg1, arg2, arg3) {
1720
- wasm.wasm_bindgen_179aebea8749b817___convert__closures_____invoke___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined_______true_(arg0, arg1, arg2, arg3);
1744
+ function wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2(arg0, arg1, arg2, arg3) {
1745
+ wasm.wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2(arg0, arg1, arg2, arg3);
1721
1746
  }
1722
1747
 
1723
1748
  const ModuleInfoFinalization = (typeof FinalizationRegistry === 'undefined')
Binary file
@@ -2,8 +2,8 @@
2
2
  /* eslint-disable */
3
3
  export const memory: WebAssembly.Memory;
4
4
  export const __wbg_moduleinfo_free: (a: number, b: number) => void;
5
- export const batchExtractBytes: (a: number, b: number, c: number, d: number, e: number) => any;
6
- export const batchExtractBytesSync: (a: number, b: number, c: number, d: number, e: number) => [number, number, number];
5
+ export const batchExtractBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => any;
6
+ export const batchExtractBytesSync: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => [number, number, number];
7
7
  export const batchExtractFiles: (a: number, b: number, c: number) => any;
8
8
  export const batchExtractFilesSync: () => [number, number, number];
9
9
  export const clear_ocr_backends: () => [number, number];
@@ -45,11 +45,11 @@ export const read_block_from_callback_wasm: (a: number, b: number, c: number, d:
45
45
  export const write_block_from_callback_wasm: (a: number, b: number, c: number) => number;
46
46
  export const compress: (a: number, b: number, c: number, d: number) => [number, number, number];
47
47
  export const decompress: (a: any, b: number, c: number, d: any) => [number, number];
48
- export const wasm_bindgen_179aebea8749b817___closure__destroy___dyn_core_dde6c4b55a98adc4___ops__function__FnMut__wasm_bindgen_179aebea8749b817___JsValue____Output_______: (a: number, b: number) => void;
49
- export const wasm_bindgen_179aebea8749b817___closure__destroy___dyn_core_dde6c4b55a98adc4___ops__function__FnMut__wasm_bindgen_179aebea8749b817___JsValue____Output___core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___: (a: number, b: number) => void;
50
- export const wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue__core_dde6c4b55a98adc4___result__Result_____wasm_bindgen_179aebea8749b817___JsError___true_: (a: number, b: number, c: any) => [number, number];
51
- export const wasm_bindgen_179aebea8749b817___convert__closures_____invoke___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined___js_sys_3900621f03eac4bc___Function_fn_wasm_bindgen_179aebea8749b817___JsValue_____wasm_bindgen_179aebea8749b817___sys__Undefined_______true_: (a: number, b: number, c: any, d: any) => void;
52
- export const wasm_bindgen_179aebea8749b817___convert__closures_____invoke___wasm_bindgen_179aebea8749b817___JsValue______true_: (a: number, b: number, c: any) => void;
48
+ export const wasm_bindgen__closure__destroy__hcbea9f36b367d3d8: (a: number, b: number) => void;
49
+ export const wasm_bindgen__closure__destroy__h60b03db66f836c8f: (a: number, b: number) => void;
50
+ export const wasm_bindgen__convert__closures_____invoke__h240eec0b57535315: (a: number, b: number, c: any) => [number, number];
51
+ export const wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2: (a: number, b: number, c: any, d: any) => void;
52
+ export const wasm_bindgen__convert__closures_____invoke__h4c966767d2820559: (a: number, b: number, c: any) => void;
53
53
  export const __wbindgen_externrefs: WebAssembly.Table;
54
54
  export const __wbindgen_malloc_command_export: (a: number, b: number) => number;
55
55
  export const __wbindgen_realloc_command_export: (a: number, b: number, c: number, d: number) => number;