@kreuzberg/wasm 4.4.6 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.6" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.1" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -47,6 +47,9 @@
47
47
  <a href="https://docs.kreuzberg.dev">
48
48
  <img src="https://img.shields.io/badge/docs-kreuzberg.dev-blue" alt="Documentation">
49
49
  </a>
50
+ <a href="https://huggingface.co/Kreuzberg">
51
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-yellow" alt="Hugging Face">
52
+ </a>
50
53
  </div>
51
54
 
52
55
  <img width="1128" height="191" alt="Banner2" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
@@ -485,6 +488,30 @@ For advanced configuration options including language detection, table extractio
485
488
 
486
489
  **[Configuration Guide](https://kreuzberg.dev/guides/configuration/)**
487
490
 
491
+ ## Platform Limitations
492
+
493
+ WASM runs in single-threaded environments without access to ONNX Runtime, which constrains some features:
494
+
495
+ ### Unsupported Features
496
+
497
+ - **Layout Detection** – Requires RT-DETR model inference via ONNX Runtime, which is unavailable in WebAssembly
498
+ - **Hardware Acceleration** – No GPU support (AccelerationConfig is not applicable)
499
+ - **Concurrency Configuration** – Single-threaded WASM environment (ConcurrencyConfig does not apply)
500
+ - **Email Codepage Configuration** – EmailConfig is not supported in WASM
501
+
502
+ ### Supported Features
503
+
504
+ - **Text Extraction** – Full text content from all supported formats
505
+ - **OCR via Tesseract WASM** – Scanned document and image OCR using browser-native Tesseract
506
+ - **Embeddings** – FastEmbed-based vector generation
507
+ - **Chunking** – Text segmentation for RAG pipelines
508
+ - **Metadata Extraction** – Document properties, creation dates, page counts
509
+ - **Table Extraction** – Structured table data from PDFs and spreadsheets
510
+ - **Language Detection** – Identify document language
511
+ - **Image Extraction** – Embedded images from documents
512
+
513
+ All 88+ file formats supported by Kreuzberg are available in WASM, with the exception that features requiring ONNX Runtime (layout detection) will fail gracefully with an unsupported error.
514
+
488
515
  ## Documentation
489
516
 
490
517
  - **[Official Documentation](https://kreuzberg.dev/)**
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.6" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.1" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -47,6 +47,9 @@
47
47
  <a href="https://docs.kreuzberg.dev">
48
48
  <img src="https://img.shields.io/badge/docs-kreuzberg.dev-blue" alt="Documentation">
49
49
  </a>
50
+ <a href="https://huggingface.co/Kreuzberg">
51
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-yellow" alt="Hugging Face">
52
+ </a>
50
53
  </div>
51
54
 
52
55
  <img width="1128" height="191" alt="Banner2" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
@@ -485,6 +488,30 @@ For advanced configuration options including language detection, table extractio
485
488
 
486
489
  **[Configuration Guide](https://kreuzberg.dev/guides/configuration/)**
487
490
 
491
+ ## Platform Limitations
492
+
493
+ WASM runs in single-threaded environments without access to ONNX Runtime, which constrains some features:
494
+
495
+ ### Unsupported Features
496
+
497
+ - **Layout Detection** – Requires RT-DETR model inference via ONNX Runtime, which is unavailable in WebAssembly
498
+ - **Hardware Acceleration** – No GPU support (AccelerationConfig is not applicable)
499
+ - **Concurrency Configuration** – Single-threaded WASM environment (ConcurrencyConfig does not apply)
500
+ - **Email Codepage Configuration** – EmailConfig is not supported in WASM
501
+
502
+ ### Supported Features
503
+
504
+ - **Text Extraction** – Full text content from all supported formats
505
+ - **OCR via Tesseract WASM** – Scanned document and image OCR using browser-native Tesseract
506
+ - **Embeddings** – FastEmbed-based vector generation
507
+ - **Chunking** – Text segmentation for RAG pipelines
508
+ - **Metadata Extraction** – Document properties, creation dates, page counts
509
+ - **Table Extraction** – Structured table data from PDFs and spreadsheets
510
+ - **Language Detection** – Identify document language
511
+ - **Image Extraction** – Embedded images from documents
512
+
513
+ All 88+ file formats supported by Kreuzberg are available in WASM, with the exception that features requiring ONNX Runtime (layout detection) will fail gracefully with an unsupported error.
514
+
488
515
  ## Documentation
489
516
 
490
517
  - **[Official Documentation](https://kreuzberg.dev/)**
@@ -29,6 +29,7 @@ export class ModuleInfo {
29
29
  * * `dataList: Uint8Array[]` - Array of document bytes
30
30
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
31
31
  * * `config?: object` - Optional extraction configuration (applied to all)
32
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
32
33
  *
33
34
  * # Returns
34
35
  *
@@ -36,7 +37,8 @@ export class ModuleInfo {
36
37
  *
37
38
  * # Throws
38
39
  *
39
- * Rejects if dataList and mimeTypes lengths don't match.
40
+ * Rejects if dataList and mimeTypes lengths don't match, or if fileConfigs
41
+ * is provided and its length doesn't match dataList.
40
42
  *
41
43
  * # Example
42
44
  *
@@ -57,21 +59,26 @@ export class ModuleInfo {
57
59
  * ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
58
60
  * null
59
61
  * );
62
+ *
63
+ * // With per-file configs:
64
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null];
65
+ * const results2 = await batchExtractBytes(buffers, mimeTypes, null, fileConfigs);
60
66
  * ```
61
67
  */
62
- export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[], config?: any | null): Promise<any>;
68
+ export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[], config?: any | null, file_configs?: any[] | null): Promise<any>;
63
69
 
64
70
  /**
65
71
  * Batch extract from multiple byte arrays (synchronous).
66
72
  *
67
73
  * Processes multiple document byte arrays in parallel. All documents use the
68
- * same extraction configuration.
74
+ * same extraction configuration unless per-file configs are provided.
69
75
  *
70
76
  * # JavaScript Parameters
71
77
  *
72
78
  * * `dataList: Uint8Array[]` - Array of document bytes
73
79
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
74
80
  * * `config?: object` - Optional extraction configuration (applied to all)
81
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
75
82
  *
76
83
  * # Returns
77
84
  *
@@ -79,7 +86,8 @@ export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[],
79
86
  *
80
87
  * # Throws
81
88
  *
82
- * Throws if dataList and mimeTypes lengths don't match.
89
+ * Throws if dataList and mimeTypes lengths don't match, or if fileConfigs
90
+ * is provided and its length doesn't match dataList.
83
91
  *
84
92
  * # Example
85
93
  *
@@ -93,9 +101,13 @@ export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[],
93
101
  * results.forEach((result, i) => {
94
102
  * console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
95
103
  * });
104
+ *
105
+ * // With per-file configs:
106
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null, null];
107
+ * const results2 = batchExtractBytesSync(buffers, mimeTypes, null, fileConfigs);
96
108
  * ```
97
109
  */
98
- export function batchExtractBytesSync(data_list: Uint8Array[], mime_types: string[], config?: any | null): any;
110
+ export function batchExtractBytesSync(data_list: Uint8Array[], mime_types: string[], config?: any | null, file_configs?: any[] | null): any;
99
111
 
100
112
  /**
101
113
  * Batch extract from multiple Files or Blobs (asynchronous).
@@ -55,6 +55,7 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
55
55
  * * `dataList: Uint8Array[]` - Array of document bytes
56
56
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
57
57
  * * `config?: object` - Optional extraction configuration (applied to all)
58
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
58
59
  *
59
60
  * # Returns
60
61
  *
@@ -62,7 +63,8 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
62
63
  *
63
64
  * # Throws
64
65
  *
65
- * Rejects if dataList and mimeTypes lengths don't match.
66
+ * Rejects if dataList and mimeTypes lengths don't match, or if fileConfigs
67
+ * is provided and its length doesn't match dataList.
66
68
  *
67
69
  * # Example
68
70
  *
@@ -83,18 +85,25 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
83
85
  * ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
84
86
  * null
85
87
  * );
88
+ *
89
+ * // With per-file configs:
90
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null];
91
+ * const results2 = await batchExtractBytes(buffers, mimeTypes, null, fileConfigs);
86
92
  * ```
87
93
  * @param {Uint8Array[]} data_list
88
94
  * @param {string[]} mime_types
89
95
  * @param {any | null} [config]
96
+ * @param {any[] | null} [file_configs]
90
97
  * @returns {Promise<any>}
91
98
  */
92
- export function batchExtractBytes(data_list, mime_types, config) {
99
+ export function batchExtractBytes(data_list, mime_types, config, file_configs) {
93
100
  const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
94
101
  const len0 = WASM_VECTOR_LEN;
95
102
  const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
96
103
  const len1 = WASM_VECTOR_LEN;
97
- const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config));
104
+ var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
105
+ var len2 = WASM_VECTOR_LEN;
106
+ const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
98
107
  return ret;
99
108
  }
100
109
 
@@ -102,13 +111,14 @@ export function batchExtractBytes(data_list, mime_types, config) {
102
111
  * Batch extract from multiple byte arrays (synchronous).
103
112
  *
104
113
  * Processes multiple document byte arrays in parallel. All documents use the
105
- * same extraction configuration.
114
+ * same extraction configuration unless per-file configs are provided.
106
115
  *
107
116
  * # JavaScript Parameters
108
117
  *
109
118
  * * `dataList: Uint8Array[]` - Array of document bytes
110
119
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
111
120
  * * `config?: object` - Optional extraction configuration (applied to all)
121
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
112
122
  *
113
123
  * # Returns
114
124
  *
@@ -116,7 +126,8 @@ export function batchExtractBytes(data_list, mime_types, config) {
116
126
  *
117
127
  * # Throws
118
128
  *
119
- * Throws if dataList and mimeTypes lengths don't match.
129
+ * Throws if dataList and mimeTypes lengths don't match, or if fileConfigs
130
+ * is provided and its length doesn't match dataList.
120
131
  *
121
132
  * # Example
122
133
  *
@@ -130,18 +141,25 @@ export function batchExtractBytes(data_list, mime_types, config) {
130
141
  * results.forEach((result, i) => {
131
142
  * console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
132
143
  * });
144
+ *
145
+ * // With per-file configs:
146
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null, null];
147
+ * const results2 = batchExtractBytesSync(buffers, mimeTypes, null, fileConfigs);
133
148
  * ```
134
149
  * @param {Uint8Array[]} data_list
135
150
  * @param {string[]} mime_types
136
151
  * @param {any | null} [config]
152
+ * @param {any[] | null} [file_configs]
137
153
  * @returns {any}
138
154
  */
139
- export function batchExtractBytesSync(data_list, mime_types, config) {
155
+ export function batchExtractBytesSync(data_list, mime_types, config, file_configs) {
140
156
  const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
141
157
  const len0 = WASM_VECTOR_LEN;
142
158
  const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
143
159
  const len1 = WASM_VECTOR_LEN;
144
- const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config));
160
+ var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
161
+ var len2 = WASM_VECTOR_LEN;
162
+ const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
145
163
  if (ret[2]) {
146
164
  throw takeFromExternrefTable0(ret[1]);
147
165
  }
@@ -1345,6 +1363,10 @@ function __wbg_get_imports() {
1345
1363
  const ret = typeof(arg0) === 'function';
1346
1364
  return ret;
1347
1365
  },
1366
+ __wbg___wbindgen_is_null_0b605fc6b167c56f: function(arg0) {
1367
+ const ret = arg0 === null;
1368
+ return ret;
1369
+ },
1348
1370
  __wbg___wbindgen_is_object_781bc9f159099513: function(arg0) {
1349
1371
  const val = arg0;
1350
1372
  const ret = typeof(val) === 'object' && val !== null;
@@ -1448,6 +1470,9 @@ function __wbg_get_imports() {
1448
1470
  __wbg_getRandomValues_3f44b700395062e5: function() { return handleError(function (arg0, arg1) {
1449
1471
  globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
1450
1472
  }, arguments); },
1473
+ __wbg_getRandomValues_76dfc69825c9c552: function() { return handleError(function (arg0, arg1) {
1474
+ globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
1475
+ }, arguments); },
1451
1476
  __wbg_getTime_1dad7b5386ddd2d9: function(arg0) {
1452
1477
  const ret = arg0.getTime();
1453
1478
  return ret;
@@ -1760,13 +1785,13 @@ function __wbg_get_imports() {
1760
1785
  console.warn(arg0);
1761
1786
  },
1762
1787
  __wbindgen_cast_0000000000000001: function(arg0, arg1) {
1763
- // Cast intrinsic for `Closure(Closure { dtor_idx: 2, function: Function { arguments: [Externref], shim_idx: 3, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
1764
- const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hb1815f7351a8f99b, wasm_bindgen__convert__closures_____invoke__h10e2de02a95760ed);
1788
+ // Cast intrinsic for `Closure(Closure { dtor_idx: 3637, function: Function { arguments: [Externref], shim_idx: 3638, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
1789
+ const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hcbea9f36b367d3d8, wasm_bindgen__convert__closures_____invoke__h240eec0b57535315);
1765
1790
  return ret;
1766
1791
  },
1767
1792
  __wbindgen_cast_0000000000000002: function(arg0, arg1) {
1768
- // Cast intrinsic for `Closure(Closure { dtor_idx: 3600, function: Function { arguments: [Externref], shim_idx: 3601, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
1769
- const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hcbea9f36b367d3d8, wasm_bindgen__convert__closures_____invoke__h240eec0b57535315);
1793
+ // Cast intrinsic for `Closure(Closure { dtor_idx: 4, function: Function { arguments: [Externref], shim_idx: 5, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
1794
+ const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__h594455dbca0a1257, wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689);
1770
1795
  return ret;
1771
1796
  },
1772
1797
  __wbindgen_cast_0000000000000003: function(arg0) {
@@ -1808,8 +1833,8 @@ function __wbg_get_imports() {
1808
1833
  };
1809
1834
  }
1810
1835
 
1811
- function wasm_bindgen__convert__closures_____invoke__h10e2de02a95760ed(arg0, arg1, arg2) {
1812
- wasm.wasm_bindgen__convert__closures_____invoke__h10e2de02a95760ed(arg0, arg1, arg2);
1836
+ function wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689(arg0, arg1, arg2) {
1837
+ wasm.wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689(arg0, arg1, arg2);
1813
1838
  }
1814
1839
 
1815
1840
  function wasm_bindgen__convert__closures_____invoke__h240eec0b57535315(arg0, arg1, arg2) {
@@ -53,6 +53,7 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
53
53
  * * `dataList: Uint8Array[]` - Array of document bytes
54
54
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
55
55
  * * `config?: object` - Optional extraction configuration (applied to all)
56
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
56
57
  *
57
58
  * # Returns
58
59
  *
@@ -60,7 +61,8 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
60
61
  *
61
62
  * # Throws
62
63
  *
63
- * Rejects if dataList and mimeTypes lengths don't match.
64
+ * Rejects if dataList and mimeTypes lengths don't match, or if fileConfigs
65
+ * is provided and its length doesn't match dataList.
64
66
  *
65
67
  * # Example
66
68
  *
@@ -81,18 +83,25 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
81
83
  * ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
82
84
  * null
83
85
  * );
86
+ *
87
+ * // With per-file configs:
88
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null];
89
+ * const results2 = await batchExtractBytes(buffers, mimeTypes, null, fileConfigs);
84
90
  * ```
85
91
  * @param {Uint8Array[]} data_list
86
92
  * @param {string[]} mime_types
87
93
  * @param {any | null} [config]
94
+ * @param {any[] | null} [file_configs]
88
95
  * @returns {Promise<any>}
89
96
  */
90
- export function batchExtractBytes(data_list, mime_types, config) {
97
+ export function batchExtractBytes(data_list, mime_types, config, file_configs) {
91
98
  const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
92
99
  const len0 = WASM_VECTOR_LEN;
93
100
  const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
94
101
  const len1 = WASM_VECTOR_LEN;
95
- const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config));
102
+ var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
103
+ var len2 = WASM_VECTOR_LEN;
104
+ const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
96
105
  return ret;
97
106
  }
98
107
 
@@ -100,13 +109,14 @@ export function batchExtractBytes(data_list, mime_types, config) {
100
109
  * Batch extract from multiple byte arrays (synchronous).
101
110
  *
102
111
  * Processes multiple document byte arrays in parallel. All documents use the
103
- * same extraction configuration.
112
+ * same extraction configuration unless per-file configs are provided.
104
113
  *
105
114
  * # JavaScript Parameters
106
115
  *
107
116
  * * `dataList: Uint8Array[]` - Array of document bytes
108
117
  * * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
109
118
  * * `config?: object` - Optional extraction configuration (applied to all)
119
+ * * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
110
120
  *
111
121
  * # Returns
112
122
  *
@@ -114,7 +124,8 @@ export function batchExtractBytes(data_list, mime_types, config) {
114
124
  *
115
125
  * # Throws
116
126
  *
117
- * Throws if dataList and mimeTypes lengths don't match.
127
+ * Throws if dataList and mimeTypes lengths don't match, or if fileConfigs
128
+ * is provided and its length doesn't match dataList.
118
129
  *
119
130
  * # Example
120
131
  *
@@ -128,18 +139,25 @@ export function batchExtractBytes(data_list, mime_types, config) {
128
139
  * results.forEach((result, i) => {
129
140
  * console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
130
141
  * });
142
+ *
143
+ * // With per-file configs:
144
+ * const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null, null];
145
+ * const results2 = batchExtractBytesSync(buffers, mimeTypes, null, fileConfigs);
131
146
  * ```
132
147
  * @param {Uint8Array[]} data_list
133
148
  * @param {string[]} mime_types
134
149
  * @param {any | null} [config]
150
+ * @param {any[] | null} [file_configs]
135
151
  * @returns {any}
136
152
  */
137
- export function batchExtractBytesSync(data_list, mime_types, config) {
153
+ export function batchExtractBytesSync(data_list, mime_types, config, file_configs) {
138
154
  const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
139
155
  const len0 = WASM_VECTOR_LEN;
140
156
  const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
141
157
  const len1 = WASM_VECTOR_LEN;
142
- const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config));
158
+ var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
159
+ var len2 = WASM_VECTOR_LEN;
160
+ const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
143
161
  if (ret[2]) {
144
162
  throw takeFromExternrefTable0(ret[1]);
145
163
  }
@@ -1251,6 +1269,10 @@ export function __wbg___wbindgen_is_function_3c846841762788c1(arg0) {
1251
1269
  const ret = typeof(arg0) === 'function';
1252
1270
  return ret;
1253
1271
  }
1272
+ export function __wbg___wbindgen_is_null_0b605fc6b167c56f(arg0) {
1273
+ const ret = arg0 === null;
1274
+ return ret;
1275
+ }
1254
1276
  export function __wbg___wbindgen_is_object_781bc9f159099513(arg0) {
1255
1277
  const val = arg0;
1256
1278
  const ret = typeof(val) === 'object' && val !== null;
@@ -1354,6 +1376,9 @@ export function __wbg_from_4bdf88943703fd48(arg0) {
1354
1376
  export function __wbg_getRandomValues_3f44b700395062e5() { return handleError(function (arg0, arg1) {
1355
1377
  globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
1356
1378
  }, arguments); }
1379
+ export function __wbg_getRandomValues_76dfc69825c9c552() { return handleError(function (arg0, arg1) {
1380
+ globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
1381
+ }, arguments); }
1357
1382
  export function __wbg_getTime_1dad7b5386ddd2d9(arg0) {
1358
1383
  const ret = arg0.getTime();
1359
1384
  return ret;
@@ -1666,13 +1691,13 @@ export function __wbg_warn_69424c2d92a2fa73(arg0) {
1666
1691
  console.warn(arg0);
1667
1692
  }
1668
1693
  export function __wbindgen_cast_0000000000000001(arg0, arg1) {
1669
- // Cast intrinsic for `Closure(Closure { dtor_idx: 2, function: Function { arguments: [Externref], shim_idx: 3, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
1670
- const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hb1815f7351a8f99b, wasm_bindgen__convert__closures_____invoke__h10e2de02a95760ed);
1694
+ // Cast intrinsic for `Closure(Closure { dtor_idx: 3637, function: Function { arguments: [Externref], shim_idx: 3638, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
1695
+ const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hcbea9f36b367d3d8, wasm_bindgen__convert__closures_____invoke__h240eec0b57535315);
1671
1696
  return ret;
1672
1697
  }
1673
1698
  export function __wbindgen_cast_0000000000000002(arg0, arg1) {
1674
- // Cast intrinsic for `Closure(Closure { dtor_idx: 3600, function: Function { arguments: [Externref], shim_idx: 3601, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
1675
- const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hcbea9f36b367d3d8, wasm_bindgen__convert__closures_____invoke__h240eec0b57535315);
1699
+ // Cast intrinsic for `Closure(Closure { dtor_idx: 4, function: Function { arguments: [Externref], shim_idx: 5, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
1700
+ const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__h594455dbca0a1257, wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689);
1676
1701
  return ret;
1677
1702
  }
1678
1703
  export function __wbindgen_cast_0000000000000003(arg0) {
@@ -1705,8 +1730,8 @@ export function __wbindgen_init_externref_table() {
1705
1730
  table.set(offset + 2, true);
1706
1731
  table.set(offset + 3, false);
1707
1732
  }
1708
- function wasm_bindgen__convert__closures_____invoke__h10e2de02a95760ed(arg0, arg1, arg2) {
1709
- wasm.wasm_bindgen__convert__closures_____invoke__h10e2de02a95760ed(arg0, arg1, arg2);
1733
+ function wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689(arg0, arg1, arg2) {
1734
+ wasm.wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689(arg0, arg1, arg2);
1710
1735
  }
1711
1736
 
1712
1737
  function wasm_bindgen__convert__closures_____invoke__h240eec0b57535315(arg0, arg1, arg2) {
Binary file
@@ -2,8 +2,8 @@
2
2
  /* eslint-disable */
3
3
  export const memory: WebAssembly.Memory;
4
4
  export const __wbg_moduleinfo_free: (a: number, b: number) => void;
5
- export const batchExtractBytes: (a: number, b: number, c: number, d: number, e: number) => any;
6
- export const batchExtractBytesSync: (a: number, b: number, c: number, d: number, e: number) => [number, number, number];
5
+ export const batchExtractBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => any;
6
+ export const batchExtractBytesSync: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => [number, number, number];
7
7
  export const batchExtractFiles: (a: number, b: number, c: number) => any;
8
8
  export const batchExtractFilesSync: () => [number, number, number];
9
9
  export const clear_ocr_backends: () => [number, number];
@@ -45,11 +45,11 @@ export const read_block_from_callback_wasm: (a: number, b: number, c: number, d:
45
45
  export const write_block_from_callback_wasm: (a: number, b: number, c: number) => number;
46
46
  export const compress: (a: number, b: number, c: number, d: number) => [number, number, number];
47
47
  export const decompress: (a: any, b: number, c: number, d: any) => [number, number];
48
- export const wasm_bindgen__closure__destroy__hb1815f7351a8f99b: (a: number, b: number) => void;
49
48
  export const wasm_bindgen__closure__destroy__hcbea9f36b367d3d8: (a: number, b: number) => void;
49
+ export const wasm_bindgen__closure__destroy__h594455dbca0a1257: (a: number, b: number) => void;
50
50
  export const wasm_bindgen__convert__closures_____invoke__h240eec0b57535315: (a: number, b: number, c: any) => [number, number];
51
51
  export const wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2: (a: number, b: number, c: any, d: any) => void;
52
- export const wasm_bindgen__convert__closures_____invoke__h10e2de02a95760ed: (a: number, b: number, c: any) => void;
52
+ export const wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689: (a: number, b: number, c: any) => void;
53
53
  export const __wbindgen_externrefs: WebAssembly.Table;
54
54
  export const __wbindgen_malloc_command_export: (a: number, b: number) => number;
55
55
  export const __wbindgen_realloc_command_export: (a: number, b: number, c: number, d: number) => number;
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@kreuzberg/wasm",
3
- "version": "4.4.6",
3
+ "version": "4.5.1",
4
4
  "type": "module",
5
5
  "packageManager": "pnpm@10.17.0",
6
6
  "description": "Kreuzberg document intelligence - WebAssembly bindings",
7
7
  "author": {
8
8
  "name": "Na'aman Hirschfeld",
9
- "email": "nhirschfeld@gmail.com",
9
+ "email": "naaman@kreuzberg.dev",
10
10
  "url": "https://kreuzberg.dev"
11
11
  },
12
12
  "homepage": "https://kreuzberg.dev",
@@ -110,8 +110,8 @@
110
110
  "@types/node": "^25.5.0",
111
111
  "@vitest/coverage-v8": "^4.1.0",
112
112
  "@vitest/ui": "^4.1.0",
113
- "jsdom": "^28.1.0",
114
- "oxlint": "^1.55.0",
113
+ "jsdom": "^29.0.1",
114
+ "oxlint": "^1.56.0",
115
115
  "tsup": "^8.5.1",
116
116
  "typescript": "^5.9.3",
117
117
  "vitest": "^4.1.0"