@kreuzberg/wasm 4.4.6 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -1
- package/dist/pkg/README.md +28 -1
- package/dist/pkg/kreuzberg_wasm.d.ts +17 -5
- package/dist/pkg/kreuzberg_wasm.js +38 -13
- package/dist/pkg/kreuzberg_wasm_bg.js +38 -13
- package/dist/pkg/kreuzberg_wasm_bg.wasm +0 -0
- package/dist/pkg/kreuzberg_wasm_bg.wasm.d.ts +4 -4
- package/package.json +4 -4
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.1" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -47,6 +47,9 @@
|
|
|
47
47
|
<a href="https://docs.kreuzberg.dev">
|
|
48
48
|
<img src="https://img.shields.io/badge/docs-kreuzberg.dev-blue" alt="Documentation">
|
|
49
49
|
</a>
|
|
50
|
+
<a href="https://huggingface.co/Kreuzberg">
|
|
51
|
+
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-yellow" alt="Hugging Face">
|
|
52
|
+
</a>
|
|
50
53
|
</div>
|
|
51
54
|
|
|
52
55
|
<img width="1128" height="191" alt="Banner2" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
|
|
@@ -485,6 +488,30 @@ For advanced configuration options including language detection, table extractio
|
|
|
485
488
|
|
|
486
489
|
**[Configuration Guide](https://kreuzberg.dev/guides/configuration/)**
|
|
487
490
|
|
|
491
|
+
## Platform Limitations
|
|
492
|
+
|
|
493
|
+
WASM runs in single-threaded environments without access to ONNX Runtime, which constrains some features:
|
|
494
|
+
|
|
495
|
+
### Unsupported Features
|
|
496
|
+
|
|
497
|
+
- **Layout Detection** – Requires RT-DETR model inference via ONNX Runtime, which is unavailable in WebAssembly
|
|
498
|
+
- **Hardware Acceleration** – No GPU support (AccelerationConfig is not applicable)
|
|
499
|
+
- **Concurrency Configuration** – Single-threaded WASM environment (ConcurrencyConfig does not apply)
|
|
500
|
+
- **Email Codepage Configuration** – EmailConfig is not supported in WASM
|
|
501
|
+
|
|
502
|
+
### Supported Features
|
|
503
|
+
|
|
504
|
+
- **Text Extraction** – Full text content from all supported formats
|
|
505
|
+
- **OCR via Tesseract WASM** – Scanned document and image OCR using browser-native Tesseract
|
|
506
|
+
- **Embeddings** – FastEmbed-based vector generation
|
|
507
|
+
- **Chunking** – Text segmentation for RAG pipelines
|
|
508
|
+
- **Metadata Extraction** – Document properties, creation dates, page counts
|
|
509
|
+
- **Table Extraction** – Structured table data from PDFs and spreadsheets
|
|
510
|
+
- **Language Detection** – Identify document language
|
|
511
|
+
- **Image Extraction** – Embedded images from documents
|
|
512
|
+
|
|
513
|
+
All 88+ file formats supported by Kreuzberg are available in WASM, with the exception that features requiring ONNX Runtime (layout detection) will fail gracefully with an unsupported error.
|
|
514
|
+
|
|
488
515
|
## Documentation
|
|
489
516
|
|
|
490
517
|
- **[Official Documentation](https://kreuzberg.dev/)**
|
package/dist/pkg/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.1" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -47,6 +47,9 @@
|
|
|
47
47
|
<a href="https://docs.kreuzberg.dev">
|
|
48
48
|
<img src="https://img.shields.io/badge/docs-kreuzberg.dev-blue" alt="Documentation">
|
|
49
49
|
</a>
|
|
50
|
+
<a href="https://huggingface.co/Kreuzberg">
|
|
51
|
+
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-yellow" alt="Hugging Face">
|
|
52
|
+
</a>
|
|
50
53
|
</div>
|
|
51
54
|
|
|
52
55
|
<img width="1128" height="191" alt="Banner2" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
|
|
@@ -485,6 +488,30 @@ For advanced configuration options including language detection, table extractio
|
|
|
485
488
|
|
|
486
489
|
**[Configuration Guide](https://kreuzberg.dev/guides/configuration/)**
|
|
487
490
|
|
|
491
|
+
## Platform Limitations
|
|
492
|
+
|
|
493
|
+
WASM runs in single-threaded environments without access to ONNX Runtime, which constrains some features:
|
|
494
|
+
|
|
495
|
+
### Unsupported Features
|
|
496
|
+
|
|
497
|
+
- **Layout Detection** – Requires RT-DETR model inference via ONNX Runtime, which is unavailable in WebAssembly
|
|
498
|
+
- **Hardware Acceleration** – No GPU support (AccelerationConfig is not applicable)
|
|
499
|
+
- **Concurrency Configuration** – Single-threaded WASM environment (ConcurrencyConfig does not apply)
|
|
500
|
+
- **Email Codepage Configuration** – EmailConfig is not supported in WASM
|
|
501
|
+
|
|
502
|
+
### Supported Features
|
|
503
|
+
|
|
504
|
+
- **Text Extraction** – Full text content from all supported formats
|
|
505
|
+
- **OCR via Tesseract WASM** – Scanned document and image OCR using browser-native Tesseract
|
|
506
|
+
- **Embeddings** – FastEmbed-based vector generation
|
|
507
|
+
- **Chunking** – Text segmentation for RAG pipelines
|
|
508
|
+
- **Metadata Extraction** – Document properties, creation dates, page counts
|
|
509
|
+
- **Table Extraction** – Structured table data from PDFs and spreadsheets
|
|
510
|
+
- **Language Detection** – Identify document language
|
|
511
|
+
- **Image Extraction** – Embedded images from documents
|
|
512
|
+
|
|
513
|
+
All 88+ file formats supported by Kreuzberg are available in WASM, with the exception that features requiring ONNX Runtime (layout detection) will fail gracefully with an unsupported error.
|
|
514
|
+
|
|
488
515
|
## Documentation
|
|
489
516
|
|
|
490
517
|
- **[Official Documentation](https://kreuzberg.dev/)**
|
|
@@ -29,6 +29,7 @@ export class ModuleInfo {
|
|
|
29
29
|
* * `dataList: Uint8Array[]` - Array of document bytes
|
|
30
30
|
* * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
|
|
31
31
|
* * `config?: object` - Optional extraction configuration (applied to all)
|
|
32
|
+
* * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
|
|
32
33
|
*
|
|
33
34
|
* # Returns
|
|
34
35
|
*
|
|
@@ -36,7 +37,8 @@ export class ModuleInfo {
|
|
|
36
37
|
*
|
|
37
38
|
* # Throws
|
|
38
39
|
*
|
|
39
|
-
* Rejects if dataList and mimeTypes lengths don't match
|
|
40
|
+
* Rejects if dataList and mimeTypes lengths don't match, or if fileConfigs
|
|
41
|
+
* is provided and its length doesn't match dataList.
|
|
40
42
|
*
|
|
41
43
|
* # Example
|
|
42
44
|
*
|
|
@@ -57,21 +59,26 @@ export class ModuleInfo {
|
|
|
57
59
|
* ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
|
|
58
60
|
* null
|
|
59
61
|
* );
|
|
62
|
+
*
|
|
63
|
+
* // With per-file configs:
|
|
64
|
+
* const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null];
|
|
65
|
+
* const results2 = await batchExtractBytes(buffers, mimeTypes, null, fileConfigs);
|
|
60
66
|
* ```
|
|
61
67
|
*/
|
|
62
|
-
export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[], config?: any | null): Promise<any>;
|
|
68
|
+
export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[], config?: any | null, file_configs?: any[] | null): Promise<any>;
|
|
63
69
|
|
|
64
70
|
/**
|
|
65
71
|
* Batch extract from multiple byte arrays (synchronous).
|
|
66
72
|
*
|
|
67
73
|
* Processes multiple document byte arrays in parallel. All documents use the
|
|
68
|
-
* same extraction configuration.
|
|
74
|
+
* same extraction configuration unless per-file configs are provided.
|
|
69
75
|
*
|
|
70
76
|
* # JavaScript Parameters
|
|
71
77
|
*
|
|
72
78
|
* * `dataList: Uint8Array[]` - Array of document bytes
|
|
73
79
|
* * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
|
|
74
80
|
* * `config?: object` - Optional extraction configuration (applied to all)
|
|
81
|
+
* * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
|
|
75
82
|
*
|
|
76
83
|
* # Returns
|
|
77
84
|
*
|
|
@@ -79,7 +86,8 @@ export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[],
|
|
|
79
86
|
*
|
|
80
87
|
* # Throws
|
|
81
88
|
*
|
|
82
|
-
* Throws if dataList and mimeTypes lengths don't match
|
|
89
|
+
* Throws if dataList and mimeTypes lengths don't match, or if fileConfigs
|
|
90
|
+
* is provided and its length doesn't match dataList.
|
|
83
91
|
*
|
|
84
92
|
* # Example
|
|
85
93
|
*
|
|
@@ -93,9 +101,13 @@ export function batchExtractBytes(data_list: Uint8Array[], mime_types: string[],
|
|
|
93
101
|
* results.forEach((result, i) => {
|
|
94
102
|
* console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
|
|
95
103
|
* });
|
|
104
|
+
*
|
|
105
|
+
* // With per-file configs:
|
|
106
|
+
* const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null, null];
|
|
107
|
+
* const results2 = batchExtractBytesSync(buffers, mimeTypes, null, fileConfigs);
|
|
96
108
|
* ```
|
|
97
109
|
*/
|
|
98
|
-
export function batchExtractBytesSync(data_list: Uint8Array[], mime_types: string[], config?: any | null): any;
|
|
110
|
+
export function batchExtractBytesSync(data_list: Uint8Array[], mime_types: string[], config?: any | null, file_configs?: any[] | null): any;
|
|
99
111
|
|
|
100
112
|
/**
|
|
101
113
|
* Batch extract from multiple Files or Blobs (asynchronous).
|
|
@@ -55,6 +55,7 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
|
|
|
55
55
|
* * `dataList: Uint8Array[]` - Array of document bytes
|
|
56
56
|
* * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
|
|
57
57
|
* * `config?: object` - Optional extraction configuration (applied to all)
|
|
58
|
+
* * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
|
|
58
59
|
*
|
|
59
60
|
* # Returns
|
|
60
61
|
*
|
|
@@ -62,7 +63,8 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
|
|
|
62
63
|
*
|
|
63
64
|
* # Throws
|
|
64
65
|
*
|
|
65
|
-
* Rejects if dataList and mimeTypes lengths don't match
|
|
66
|
+
* Rejects if dataList and mimeTypes lengths don't match, or if fileConfigs
|
|
67
|
+
* is provided and its length doesn't match dataList.
|
|
66
68
|
*
|
|
67
69
|
* # Example
|
|
68
70
|
*
|
|
@@ -83,18 +85,25 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
|
|
|
83
85
|
* ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
|
|
84
86
|
* null
|
|
85
87
|
* );
|
|
88
|
+
*
|
|
89
|
+
* // With per-file configs:
|
|
90
|
+
* const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null];
|
|
91
|
+
* const results2 = await batchExtractBytes(buffers, mimeTypes, null, fileConfigs);
|
|
86
92
|
* ```
|
|
87
93
|
* @param {Uint8Array[]} data_list
|
|
88
94
|
* @param {string[]} mime_types
|
|
89
95
|
* @param {any | null} [config]
|
|
96
|
+
* @param {any[] | null} [file_configs]
|
|
90
97
|
* @returns {Promise<any>}
|
|
91
98
|
*/
|
|
92
|
-
export function batchExtractBytes(data_list, mime_types, config) {
|
|
99
|
+
export function batchExtractBytes(data_list, mime_types, config, file_configs) {
|
|
93
100
|
const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
|
|
94
101
|
const len0 = WASM_VECTOR_LEN;
|
|
95
102
|
const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
|
|
96
103
|
const len1 = WASM_VECTOR_LEN;
|
|
97
|
-
|
|
104
|
+
var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
|
|
105
|
+
var len2 = WASM_VECTOR_LEN;
|
|
106
|
+
const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
|
|
98
107
|
return ret;
|
|
99
108
|
}
|
|
100
109
|
|
|
@@ -102,13 +111,14 @@ export function batchExtractBytes(data_list, mime_types, config) {
|
|
|
102
111
|
* Batch extract from multiple byte arrays (synchronous).
|
|
103
112
|
*
|
|
104
113
|
* Processes multiple document byte arrays in parallel. All documents use the
|
|
105
|
-
* same extraction configuration.
|
|
114
|
+
* same extraction configuration unless per-file configs are provided.
|
|
106
115
|
*
|
|
107
116
|
* # JavaScript Parameters
|
|
108
117
|
*
|
|
109
118
|
* * `dataList: Uint8Array[]` - Array of document bytes
|
|
110
119
|
* * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
|
|
111
120
|
* * `config?: object` - Optional extraction configuration (applied to all)
|
|
121
|
+
* * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
|
|
112
122
|
*
|
|
113
123
|
* # Returns
|
|
114
124
|
*
|
|
@@ -116,7 +126,8 @@ export function batchExtractBytes(data_list, mime_types, config) {
|
|
|
116
126
|
*
|
|
117
127
|
* # Throws
|
|
118
128
|
*
|
|
119
|
-
* Throws if dataList and mimeTypes lengths don't match
|
|
129
|
+
* Throws if dataList and mimeTypes lengths don't match, or if fileConfigs
|
|
130
|
+
* is provided and its length doesn't match dataList.
|
|
120
131
|
*
|
|
121
132
|
* # Example
|
|
122
133
|
*
|
|
@@ -130,18 +141,25 @@ export function batchExtractBytes(data_list, mime_types, config) {
|
|
|
130
141
|
* results.forEach((result, i) => {
|
|
131
142
|
* console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
|
|
132
143
|
* });
|
|
144
|
+
*
|
|
145
|
+
* // With per-file configs:
|
|
146
|
+
* const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null, null];
|
|
147
|
+
* const results2 = batchExtractBytesSync(buffers, mimeTypes, null, fileConfigs);
|
|
133
148
|
* ```
|
|
134
149
|
* @param {Uint8Array[]} data_list
|
|
135
150
|
* @param {string[]} mime_types
|
|
136
151
|
* @param {any | null} [config]
|
|
152
|
+
* @param {any[] | null} [file_configs]
|
|
137
153
|
* @returns {any}
|
|
138
154
|
*/
|
|
139
|
-
export function batchExtractBytesSync(data_list, mime_types, config) {
|
|
155
|
+
export function batchExtractBytesSync(data_list, mime_types, config, file_configs) {
|
|
140
156
|
const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
|
|
141
157
|
const len0 = WASM_VECTOR_LEN;
|
|
142
158
|
const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
|
|
143
159
|
const len1 = WASM_VECTOR_LEN;
|
|
144
|
-
|
|
160
|
+
var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
|
|
161
|
+
var len2 = WASM_VECTOR_LEN;
|
|
162
|
+
const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
|
|
145
163
|
if (ret[2]) {
|
|
146
164
|
throw takeFromExternrefTable0(ret[1]);
|
|
147
165
|
}
|
|
@@ -1345,6 +1363,10 @@ function __wbg_get_imports() {
|
|
|
1345
1363
|
const ret = typeof(arg0) === 'function';
|
|
1346
1364
|
return ret;
|
|
1347
1365
|
},
|
|
1366
|
+
__wbg___wbindgen_is_null_0b605fc6b167c56f: function(arg0) {
|
|
1367
|
+
const ret = arg0 === null;
|
|
1368
|
+
return ret;
|
|
1369
|
+
},
|
|
1348
1370
|
__wbg___wbindgen_is_object_781bc9f159099513: function(arg0) {
|
|
1349
1371
|
const val = arg0;
|
|
1350
1372
|
const ret = typeof(val) === 'object' && val !== null;
|
|
@@ -1448,6 +1470,9 @@ function __wbg_get_imports() {
|
|
|
1448
1470
|
__wbg_getRandomValues_3f44b700395062e5: function() { return handleError(function (arg0, arg1) {
|
|
1449
1471
|
globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
|
|
1450
1472
|
}, arguments); },
|
|
1473
|
+
__wbg_getRandomValues_76dfc69825c9c552: function() { return handleError(function (arg0, arg1) {
|
|
1474
|
+
globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
|
|
1475
|
+
}, arguments); },
|
|
1451
1476
|
__wbg_getTime_1dad7b5386ddd2d9: function(arg0) {
|
|
1452
1477
|
const ret = arg0.getTime();
|
|
1453
1478
|
return ret;
|
|
@@ -1760,13 +1785,13 @@ function __wbg_get_imports() {
|
|
|
1760
1785
|
console.warn(arg0);
|
|
1761
1786
|
},
|
|
1762
1787
|
__wbindgen_cast_0000000000000001: function(arg0, arg1) {
|
|
1763
|
-
// Cast intrinsic for `Closure(Closure { dtor_idx:
|
|
1764
|
-
const ret = makeMutClosure(arg0, arg1, wasm.
|
|
1788
|
+
// Cast intrinsic for `Closure(Closure { dtor_idx: 3637, function: Function { arguments: [Externref], shim_idx: 3638, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
|
|
1789
|
+
const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hcbea9f36b367d3d8, wasm_bindgen__convert__closures_____invoke__h240eec0b57535315);
|
|
1765
1790
|
return ret;
|
|
1766
1791
|
},
|
|
1767
1792
|
__wbindgen_cast_0000000000000002: function(arg0, arg1) {
|
|
1768
|
-
// Cast intrinsic for `Closure(Closure { dtor_idx:
|
|
1769
|
-
const ret = makeMutClosure(arg0, arg1, wasm.
|
|
1793
|
+
// Cast intrinsic for `Closure(Closure { dtor_idx: 4, function: Function { arguments: [Externref], shim_idx: 5, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
|
|
1794
|
+
const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__h594455dbca0a1257, wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689);
|
|
1770
1795
|
return ret;
|
|
1771
1796
|
},
|
|
1772
1797
|
__wbindgen_cast_0000000000000003: function(arg0) {
|
|
@@ -1808,8 +1833,8 @@ function __wbg_get_imports() {
|
|
|
1808
1833
|
};
|
|
1809
1834
|
}
|
|
1810
1835
|
|
|
1811
|
-
function
|
|
1812
|
-
wasm.
|
|
1836
|
+
function wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689(arg0, arg1, arg2) {
|
|
1837
|
+
wasm.wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689(arg0, arg1, arg2);
|
|
1813
1838
|
}
|
|
1814
1839
|
|
|
1815
1840
|
function wasm_bindgen__convert__closures_____invoke__h240eec0b57535315(arg0, arg1, arg2) {
|
|
@@ -53,6 +53,7 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
|
|
|
53
53
|
* * `dataList: Uint8Array[]` - Array of document bytes
|
|
54
54
|
* * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
|
|
55
55
|
* * `config?: object` - Optional extraction configuration (applied to all)
|
|
56
|
+
* * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
|
|
56
57
|
*
|
|
57
58
|
* # Returns
|
|
58
59
|
*
|
|
@@ -60,7 +61,8 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
|
|
|
60
61
|
*
|
|
61
62
|
* # Throws
|
|
62
63
|
*
|
|
63
|
-
* Rejects if dataList and mimeTypes lengths don't match
|
|
64
|
+
* Rejects if dataList and mimeTypes lengths don't match, or if fileConfigs
|
|
65
|
+
* is provided and its length doesn't match dataList.
|
|
64
66
|
*
|
|
65
67
|
* # Example
|
|
66
68
|
*
|
|
@@ -81,18 +83,25 @@ if (Symbol.dispose) ModuleInfo.prototype[Symbol.dispose] = ModuleInfo.prototype.
|
|
|
81
83
|
* ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
|
|
82
84
|
* null
|
|
83
85
|
* );
|
|
86
|
+
*
|
|
87
|
+
* // With per-file configs:
|
|
88
|
+
* const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null];
|
|
89
|
+
* const results2 = await batchExtractBytes(buffers, mimeTypes, null, fileConfigs);
|
|
84
90
|
* ```
|
|
85
91
|
* @param {Uint8Array[]} data_list
|
|
86
92
|
* @param {string[]} mime_types
|
|
87
93
|
* @param {any | null} [config]
|
|
94
|
+
* @param {any[] | null} [file_configs]
|
|
88
95
|
* @returns {Promise<any>}
|
|
89
96
|
*/
|
|
90
|
-
export function batchExtractBytes(data_list, mime_types, config) {
|
|
97
|
+
export function batchExtractBytes(data_list, mime_types, config, file_configs) {
|
|
91
98
|
const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
|
|
92
99
|
const len0 = WASM_VECTOR_LEN;
|
|
93
100
|
const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
|
|
94
101
|
const len1 = WASM_VECTOR_LEN;
|
|
95
|
-
|
|
102
|
+
var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
|
|
103
|
+
var len2 = WASM_VECTOR_LEN;
|
|
104
|
+
const ret = wasm.batchExtractBytes(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
|
|
96
105
|
return ret;
|
|
97
106
|
}
|
|
98
107
|
|
|
@@ -100,13 +109,14 @@ export function batchExtractBytes(data_list, mime_types, config) {
|
|
|
100
109
|
* Batch extract from multiple byte arrays (synchronous).
|
|
101
110
|
*
|
|
102
111
|
* Processes multiple document byte arrays in parallel. All documents use the
|
|
103
|
-
* same extraction configuration.
|
|
112
|
+
* same extraction configuration unless per-file configs are provided.
|
|
104
113
|
*
|
|
105
114
|
* # JavaScript Parameters
|
|
106
115
|
*
|
|
107
116
|
* * `dataList: Uint8Array[]` - Array of document bytes
|
|
108
117
|
* * `mimeTypes: string[]` - Array of MIME types (must match dataList length)
|
|
109
118
|
* * `config?: object` - Optional extraction configuration (applied to all)
|
|
119
|
+
* * `fileConfigs?: (object | null)[]` - Optional per-file config overrides (must match dataList length if provided)
|
|
110
120
|
*
|
|
111
121
|
* # Returns
|
|
112
122
|
*
|
|
@@ -114,7 +124,8 @@ export function batchExtractBytes(data_list, mime_types, config) {
|
|
|
114
124
|
*
|
|
115
125
|
* # Throws
|
|
116
126
|
*
|
|
117
|
-
* Throws if dataList and mimeTypes lengths don't match
|
|
127
|
+
* Throws if dataList and mimeTypes lengths don't match, or if fileConfigs
|
|
128
|
+
* is provided and its length doesn't match dataList.
|
|
118
129
|
*
|
|
119
130
|
* # Example
|
|
120
131
|
*
|
|
@@ -128,18 +139,25 @@ export function batchExtractBytes(data_list, mime_types, config) {
|
|
|
128
139
|
* results.forEach((result, i) => {
|
|
129
140
|
* console.log(`Document ${i}: ${result.content.substring(0, 50)}...`);
|
|
130
141
|
* });
|
|
142
|
+
*
|
|
143
|
+
* // With per-file configs:
|
|
144
|
+
* const fileConfigs = [{ ocrConfig: { language: 'eng' } }, null, null];
|
|
145
|
+
* const results2 = batchExtractBytesSync(buffers, mimeTypes, null, fileConfigs);
|
|
131
146
|
* ```
|
|
132
147
|
* @param {Uint8Array[]} data_list
|
|
133
148
|
* @param {string[]} mime_types
|
|
134
149
|
* @param {any | null} [config]
|
|
150
|
+
* @param {any[] | null} [file_configs]
|
|
135
151
|
* @returns {any}
|
|
136
152
|
*/
|
|
137
|
-
export function batchExtractBytesSync(data_list, mime_types, config) {
|
|
153
|
+
export function batchExtractBytesSync(data_list, mime_types, config, file_configs) {
|
|
138
154
|
const ptr0 = passArrayJsValueToWasm0(data_list, wasm.__wbindgen_malloc_command_export);
|
|
139
155
|
const len0 = WASM_VECTOR_LEN;
|
|
140
156
|
const ptr1 = passArrayJsValueToWasm0(mime_types, wasm.__wbindgen_malloc_command_export);
|
|
141
157
|
const len1 = WASM_VECTOR_LEN;
|
|
142
|
-
|
|
158
|
+
var ptr2 = isLikeNone(file_configs) ? 0 : passArrayJsValueToWasm0(file_configs, wasm.__wbindgen_malloc_command_export);
|
|
159
|
+
var len2 = WASM_VECTOR_LEN;
|
|
160
|
+
const ret = wasm.batchExtractBytesSync(ptr0, len0, ptr1, len1, isLikeNone(config) ? 0 : addToExternrefTable0(config), ptr2, len2);
|
|
143
161
|
if (ret[2]) {
|
|
144
162
|
throw takeFromExternrefTable0(ret[1]);
|
|
145
163
|
}
|
|
@@ -1251,6 +1269,10 @@ export function __wbg___wbindgen_is_function_3c846841762788c1(arg0) {
|
|
|
1251
1269
|
const ret = typeof(arg0) === 'function';
|
|
1252
1270
|
return ret;
|
|
1253
1271
|
}
|
|
1272
|
+
export function __wbg___wbindgen_is_null_0b605fc6b167c56f(arg0) {
|
|
1273
|
+
const ret = arg0 === null;
|
|
1274
|
+
return ret;
|
|
1275
|
+
}
|
|
1254
1276
|
export function __wbg___wbindgen_is_object_781bc9f159099513(arg0) {
|
|
1255
1277
|
const val = arg0;
|
|
1256
1278
|
const ret = typeof(val) === 'object' && val !== null;
|
|
@@ -1354,6 +1376,9 @@ export function __wbg_from_4bdf88943703fd48(arg0) {
|
|
|
1354
1376
|
export function __wbg_getRandomValues_3f44b700395062e5() { return handleError(function (arg0, arg1) {
|
|
1355
1377
|
globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
|
|
1356
1378
|
}, arguments); }
|
|
1379
|
+
export function __wbg_getRandomValues_76dfc69825c9c552() { return handleError(function (arg0, arg1) {
|
|
1380
|
+
globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1));
|
|
1381
|
+
}, arguments); }
|
|
1357
1382
|
export function __wbg_getTime_1dad7b5386ddd2d9(arg0) {
|
|
1358
1383
|
const ret = arg0.getTime();
|
|
1359
1384
|
return ret;
|
|
@@ -1666,13 +1691,13 @@ export function __wbg_warn_69424c2d92a2fa73(arg0) {
|
|
|
1666
1691
|
console.warn(arg0);
|
|
1667
1692
|
}
|
|
1668
1693
|
export function __wbindgen_cast_0000000000000001(arg0, arg1) {
|
|
1669
|
-
// Cast intrinsic for `Closure(Closure { dtor_idx:
|
|
1670
|
-
const ret = makeMutClosure(arg0, arg1, wasm.
|
|
1694
|
+
// Cast intrinsic for `Closure(Closure { dtor_idx: 3637, function: Function { arguments: [Externref], shim_idx: 3638, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
|
|
1695
|
+
const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__hcbea9f36b367d3d8, wasm_bindgen__convert__closures_____invoke__h240eec0b57535315);
|
|
1671
1696
|
return ret;
|
|
1672
1697
|
}
|
|
1673
1698
|
export function __wbindgen_cast_0000000000000002(arg0, arg1) {
|
|
1674
|
-
// Cast intrinsic for `Closure(Closure { dtor_idx:
|
|
1675
|
-
const ret = makeMutClosure(arg0, arg1, wasm.
|
|
1699
|
+
// Cast intrinsic for `Closure(Closure { dtor_idx: 4, function: Function { arguments: [Externref], shim_idx: 5, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
|
|
1700
|
+
const ret = makeMutClosure(arg0, arg1, wasm.wasm_bindgen__closure__destroy__h594455dbca0a1257, wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689);
|
|
1676
1701
|
return ret;
|
|
1677
1702
|
}
|
|
1678
1703
|
export function __wbindgen_cast_0000000000000003(arg0) {
|
|
@@ -1705,8 +1730,8 @@ export function __wbindgen_init_externref_table() {
|
|
|
1705
1730
|
table.set(offset + 2, true);
|
|
1706
1731
|
table.set(offset + 3, false);
|
|
1707
1732
|
}
|
|
1708
|
-
function
|
|
1709
|
-
wasm.
|
|
1733
|
+
function wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689(arg0, arg1, arg2) {
|
|
1734
|
+
wasm.wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689(arg0, arg1, arg2);
|
|
1710
1735
|
}
|
|
1711
1736
|
|
|
1712
1737
|
function wasm_bindgen__convert__closures_____invoke__h240eec0b57535315(arg0, arg1, arg2) {
|
|
Binary file
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
/* eslint-disable */
|
|
3
3
|
export const memory: WebAssembly.Memory;
|
|
4
4
|
export const __wbg_moduleinfo_free: (a: number, b: number) => void;
|
|
5
|
-
export const batchExtractBytes: (a: number, b: number, c: number, d: number, e: number) => any;
|
|
6
|
-
export const batchExtractBytesSync: (a: number, b: number, c: number, d: number, e: number) => [number, number, number];
|
|
5
|
+
export const batchExtractBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => any;
|
|
6
|
+
export const batchExtractBytesSync: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => [number, number, number];
|
|
7
7
|
export const batchExtractFiles: (a: number, b: number, c: number) => any;
|
|
8
8
|
export const batchExtractFilesSync: () => [number, number, number];
|
|
9
9
|
export const clear_ocr_backends: () => [number, number];
|
|
@@ -45,11 +45,11 @@ export const read_block_from_callback_wasm: (a: number, b: number, c: number, d:
|
|
|
45
45
|
export const write_block_from_callback_wasm: (a: number, b: number, c: number) => number;
|
|
46
46
|
export const compress: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
47
47
|
export const decompress: (a: any, b: number, c: number, d: any) => [number, number];
|
|
48
|
-
export const wasm_bindgen__closure__destroy__hb1815f7351a8f99b: (a: number, b: number) => void;
|
|
49
48
|
export const wasm_bindgen__closure__destroy__hcbea9f36b367d3d8: (a: number, b: number) => void;
|
|
49
|
+
export const wasm_bindgen__closure__destroy__h594455dbca0a1257: (a: number, b: number) => void;
|
|
50
50
|
export const wasm_bindgen__convert__closures_____invoke__h240eec0b57535315: (a: number, b: number, c: any) => [number, number];
|
|
51
51
|
export const wasm_bindgen__convert__closures_____invoke__h8e9fc28c4e841be2: (a: number, b: number, c: any, d: any) => void;
|
|
52
|
-
export const
|
|
52
|
+
export const wasm_bindgen__convert__closures_____invoke__h56e58b9b796ad689: (a: number, b: number, c: any) => void;
|
|
53
53
|
export const __wbindgen_externrefs: WebAssembly.Table;
|
|
54
54
|
export const __wbindgen_malloc_command_export: (a: number, b: number) => number;
|
|
55
55
|
export const __wbindgen_realloc_command_export: (a: number, b: number, c: number, d: number) => number;
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kreuzberg/wasm",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.5.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"packageManager": "pnpm@10.17.0",
|
|
6
6
|
"description": "Kreuzberg document intelligence - WebAssembly bindings",
|
|
7
7
|
"author": {
|
|
8
8
|
"name": "Na'aman Hirschfeld",
|
|
9
|
-
"email": "
|
|
9
|
+
"email": "naaman@kreuzberg.dev",
|
|
10
10
|
"url": "https://kreuzberg.dev"
|
|
11
11
|
},
|
|
12
12
|
"homepage": "https://kreuzberg.dev",
|
|
@@ -110,8 +110,8 @@
|
|
|
110
110
|
"@types/node": "^25.5.0",
|
|
111
111
|
"@vitest/coverage-v8": "^4.1.0",
|
|
112
112
|
"@vitest/ui": "^4.1.0",
|
|
113
|
-
"jsdom": "^
|
|
114
|
-
"oxlint": "^1.
|
|
113
|
+
"jsdom": "^29.0.1",
|
|
114
|
+
"oxlint": "^1.56.0",
|
|
115
115
|
"tsup": "^8.5.1",
|
|
116
116
|
"typescript": "^5.9.3",
|
|
117
117
|
"vitest": "^4.1.0"
|