@kreuzberg/wasm 4.0.0-rc.6 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/LICENSE +7 -0
  2. package/README.md +321 -800
  3. package/dist/adapters/wasm-adapter.d.ts +7 -10
  4. package/dist/adapters/wasm-adapter.d.ts.map +1 -0
  5. package/dist/adapters/wasm-adapter.js +53 -54
  6. package/dist/adapters/wasm-adapter.js.map +1 -1
  7. package/dist/index.d.ts +23 -67
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +1102 -104
  10. package/dist/index.js.map +1 -1
  11. package/dist/ocr/registry.d.ts +7 -10
  12. package/dist/ocr/registry.d.ts.map +1 -0
  13. package/dist/ocr/registry.js +9 -28
  14. package/dist/ocr/registry.js.map +1 -1
  15. package/dist/ocr/tesseract-wasm-backend.d.ts +3 -6
  16. package/dist/ocr/tesseract-wasm-backend.d.ts.map +1 -0
  17. package/dist/ocr/tesseract-wasm-backend.js +8 -83
  18. package/dist/ocr/tesseract-wasm-backend.js.map +1 -1
  19. package/dist/pdfium.js +77 -0
  20. package/dist/pkg/LICENSE +7 -0
  21. package/dist/pkg/README.md +503 -0
  22. package/dist/{kreuzberg_wasm.d.ts → pkg/kreuzberg_wasm.d.ts} +24 -12
  23. package/dist/{kreuzberg_wasm.js → pkg/kreuzberg_wasm.js} +224 -233
  24. package/dist/pkg/kreuzberg_wasm_bg.js +1871 -0
  25. package/dist/{kreuzberg_wasm_bg.wasm → pkg/kreuzberg_wasm_bg.wasm} +0 -0
  26. package/dist/{kreuzberg_wasm_bg.wasm.d.ts → pkg/kreuzberg_wasm_bg.wasm.d.ts} +10 -13
  27. package/dist/pkg/package.json +27 -0
  28. package/dist/plugin-registry.d.ts +246 -0
  29. package/dist/plugin-registry.d.ts.map +1 -0
  30. package/dist/runtime.d.ts +21 -22
  31. package/dist/runtime.d.ts.map +1 -0
  32. package/dist/runtime.js +21 -41
  33. package/dist/runtime.js.map +1 -1
  34. package/dist/types.d.ts +363 -0
  35. package/dist/types.d.ts.map +1 -0
  36. package/package.json +34 -51
  37. package/dist/adapters/wasm-adapter.d.mts +0 -121
  38. package/dist/adapters/wasm-adapter.mjs +0 -221
  39. package/dist/adapters/wasm-adapter.mjs.map +0 -1
  40. package/dist/index.d.mts +0 -466
  41. package/dist/index.mjs +0 -384
  42. package/dist/index.mjs.map +0 -1
  43. package/dist/kreuzberg_wasm.d.mts +0 -758
  44. package/dist/kreuzberg_wasm.mjs +0 -48
  45. package/dist/ocr/registry.d.mts +0 -102
  46. package/dist/ocr/registry.mjs +0 -70
  47. package/dist/ocr/registry.mjs.map +0 -1
  48. package/dist/ocr/tesseract-wasm-backend.d.mts +0 -257
  49. package/dist/ocr/tesseract-wasm-backend.mjs +0 -424
  50. package/dist/ocr/tesseract-wasm-backend.mjs.map +0 -1
  51. package/dist/runtime.d.mts +0 -256
  52. package/dist/runtime.mjs +0 -152
  53. package/dist/runtime.mjs.map +0 -1
  54. package/dist/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +0 -107
  55. package/dist/types-GJVIvbPy.d.mts +0 -221
  56. package/dist/types-GJVIvbPy.d.ts +0 -221
@@ -0,0 +1,503 @@
1
+ # WebAssembly
2
+
3
+ <div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
4
+ <!-- Language Bindings -->
5
+ <a href="https://crates.io/crates/kreuzberg">
6
+ <img src="https://img.shields.io/crates/v/kreuzberg?label=Rust&color=007ec6" alt="Rust">
7
+ </a>
8
+ <a href="https://hex.pm/packages/kreuzberg">
9
+ <img src="https://img.shields.io/hexpm/v/kreuzberg?label=Elixir&color=007ec6" alt="Elixir">
10
+ </a>
11
+ <a href="https://pypi.org/project/kreuzberg/">
12
+ <img src="https://img.shields.io/pypi/v/kreuzberg?label=Python&color=007ec6" alt="Python">
13
+ </a>
14
+ <a href="https://www.npmjs.com/package/@kreuzberg/node">
15
+ <img src="https://img.shields.io/npm/v/@kreuzberg/node?label=Node.js&color=007ec6" alt="Node.js">
16
+ </a>
17
+ <a href="https://www.npmjs.com/package/@kreuzberg/wasm">
18
+ <img src="https://img.shields.io/npm/v/@kreuzberg/wasm?label=WASM&color=007ec6" alt="WASM">
19
+ </a>
20
+
21
+ <a href="https://central.sonatype.com/artifact/dev.kreuzberg/kreuzberg">
22
+ <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
+ </a>
24
+ <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.0.0" alt="Go">
26
+ </a>
27
+ <a href="https://www.nuget.org/packages/Kreuzberg/">
28
+ <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
29
+ </a>
30
+ <a href="https://packagist.org/packages/kreuzberg/kreuzberg">
31
+ <img src="https://img.shields.io/packagist/v/kreuzberg/kreuzberg?label=PHP&color=007ec6" alt="PHP">
32
+ </a>
33
+ <a href="https://rubygems.org/gems/kreuzberg">
34
+ <img src="https://img.shields.io/gem/v/kreuzberg?label=Ruby&color=007ec6" alt="Ruby">
35
+ </a>
36
+
37
+ <!-- Project Info -->
38
+ <a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
39
+ <img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License">
40
+ </a>
41
+ <a href="https://docs.kreuzberg.dev">
42
+ <img src="https://img.shields.io/badge/docs-kreuzberg.dev-blue" alt="Documentation">
43
+ </a>
44
+ </div>
45
+
46
+ <img width="1128" height="191" alt="Banner2" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
47
+
48
+ <div align="center" style="margin-top: 20px;">
49
+ <a href="https://discord.gg/pXxagNK2zN">
50
+ <img height="22" src="https://img.shields.io/badge/Discord-Join%20our%20community-7289da?logo=discord&logoColor=white" alt="Discord">
51
+ </a>
52
+ </div>
53
+
54
+
55
+ Extract text, tables, images, and metadata from 56 file formats including PDF, Office documents, and images. WebAssembly bindings for browsers, Deno, and Cloudflare Workers with portable deployment and multi-threading support.
56
+
57
+
58
+ ## Installation
59
+
60
+ ### Package Installation
61
+
62
+
63
+ Install via one of the supported package managers:
64
+
65
+
66
+
67
+ **npm:**
68
+ ```bash
69
+ npm install @kreuzberg/wasm
70
+ ```
71
+
72
+
73
+
74
+
75
+ **pnpm:**
76
+ ```bash
77
+ pnpm add @kreuzberg/wasm
78
+ ```
79
+
80
+
81
+
82
+
83
+ **yarn:**
84
+ ```bash
85
+ yarn add @kreuzberg/wasm
86
+ ```
87
+
88
+
89
+
90
+
91
+
92
+ ### System Requirements
93
+
94
+ - Modern browser with WebAssembly support, or Deno 1.0+, or Cloudflare Workers
95
+ - Optional: [Tesseract WASM](https://github.com/naptha/tesseract.js) for OCR functionality
96
+
97
+
98
+
99
+ ## Quick Start
100
+
101
+ ### Basic Extraction
102
+
103
+ Extract text, metadata, and structure from any supported document format:
104
+
105
+ ```ts
106
+ import { extractBytes, initWasm } from "@kreuzberg/wasm";
107
+
108
+ async function main() {
109
+ await initWasm();
110
+
111
+ const buffer = await fetch("document.pdf").then((r) => r.arrayBuffer());
112
+ const bytes = new Uint8Array(buffer);
113
+
114
+ const result = await extractBytes(bytes, "application/pdf");
115
+
116
+ console.log("Extracted content:");
117
+ console.log(result.content);
118
+ console.log("MIME type:", result.mimeType);
119
+ console.log("Metadata:", result.metadata);
120
+ }
121
+
122
+ main().catch(console.error);
123
+ ```
124
+
125
+
126
+ ### Common Use Cases
127
+
128
+ #### Extract with Custom Configuration
129
+
130
+ Most use cases benefit from configuration to control extraction behavior:
131
+
132
+
133
+ **With OCR (for scanned documents):**
134
+
135
+ ```ts
136
+ import { enableOcr, extractBytes, initWasm } from "@kreuzberg/wasm";
137
+
138
+ async function extractWithOcr() {
139
+ await initWasm();
140
+
141
+ try {
142
+ await enableOcr();
143
+ console.log("OCR enabled successfully");
144
+ } catch (error) {
145
+ console.error("Failed to enable OCR:", error);
146
+ return;
147
+ }
148
+
149
+ const bytes = new Uint8Array(await fetch("scanned-page.png").then((r) => r.arrayBuffer()));
150
+
151
+ const result = await extractBytes(bytes, "image/png", {
152
+ ocr: {
153
+ backend: "tesseract-wasm",
154
+ language: "eng",
155
+ },
156
+ });
157
+
158
+ console.log("Extracted text:");
159
+ console.log(result.content);
160
+ }
161
+
162
+ extractWithOcr().catch(console.error);
163
+ ```
164
+
165
+
166
+
167
+
168
+ #### Table Extraction
169
+
170
+
171
+ See [Table Extraction Guide](https://kreuzberg.dev/features/table-extraction/) for detailed examples.
172
+
173
+
174
+
175
+ #### Processing Multiple Files
176
+
177
+
178
+ ```ts
179
+ import { extractBytes, initWasm } from "@kreuzberg/wasm";
180
+
181
+ interface DocumentJob {
182
+ name: string;
183
+ bytes: Uint8Array;
184
+ mimeType: string;
185
+ }
186
+
187
+ async function _processBatch(documents: DocumentJob[], concurrency: number = 3) {
188
+ await initWasm();
189
+
190
+ const results: Record<string, string> = {};
191
+ const queue = [...documents];
192
+
193
+ const workers = Array(concurrency)
194
+ .fill(null)
195
+ .map(async () => {
196
+ while (queue.length > 0) {
197
+ const doc = queue.shift();
198
+ if (!doc) break;
199
+
200
+ try {
201
+ const result = await extractBytes(doc.bytes, doc.mimeType);
202
+ results[doc.name] = result.content;
203
+ } catch (error) {
204
+ console.error(`Failed to process ${doc.name}:`, error);
205
+ }
206
+ }
207
+ });
208
+
209
+ await Promise.all(workers);
210
+ return results;
211
+ }
212
+ ```
213
+
214
+
215
+
216
+
217
+
218
+ #### Async Processing
219
+
220
+ For non-blocking document processing:
221
+
222
+ ```ts
223
+ import { extractBytes, getWasmCapabilities, initWasm } from "@kreuzberg/wasm";
224
+
225
+ async function extractDocuments(files: Uint8Array[], mimeTypes: string[]) {
226
+ const caps = getWasmCapabilities();
227
+ if (!caps.hasWasm) {
228
+ throw new Error("WebAssembly not supported");
229
+ }
230
+
231
+ await initWasm();
232
+
233
+ const results = await Promise.all(files.map((bytes, index) => extractBytes(bytes, mimeTypes[index])));
234
+
235
+ return results.map((r) => ({
236
+ content: r.content,
237
+ pageCount: r.metadata?.pageCount,
238
+ }));
239
+ }
240
+
241
+ const fileBytes = [new Uint8Array([1, 2, 3])];
242
+ const mimes = ["application/pdf"];
243
+
244
+ extractDocuments(fileBytes, mimes)
245
+ .then((results) => console.log(results))
246
+ .catch(console.error);
247
+ ```
248
+
249
+
250
+
251
+
252
+
253
+
254
+ ### Next Steps
255
+
256
+ - **[Installation Guide](https://kreuzberg.dev/getting-started/installation/)** - Platform-specific setup
257
+ - **[API Documentation](https://kreuzberg.dev/api/)** - Complete API reference
258
+ - **[Examples & Guides](https://kreuzberg.dev/guides/)** - Full code examples and usage guides
259
+ - **[Configuration Guide](https://kreuzberg.dev/configuration/)** - Advanced configuration options
260
+ - **[Troubleshooting](https://kreuzberg.dev/troubleshooting/)** - Common issues and solutions
261
+
262
+
263
+
264
+ ## Features
265
+
266
+ ### Supported File Formats (56+)
267
+
268
+ 56 file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
269
+
270
+ #### Office Documents
271
+
272
+ | Category | Formats | Capabilities |
273
+ |----------|---------|--------------|
274
+ | **Word Processing** | `.docx`, `.odt` | Full text, tables, images, metadata, styles |
275
+ | **Spreadsheets** | `.xlsx`, `.xlsm`, `.xlsb`, `.xls`, `.xla`, `.xlam`, `.xltm`, `.ods` | Sheet data, formulas, cell metadata, charts |
276
+ | **Presentations** | `.pptx`, `.ppt`, `.ppsx` | Slides, speaker notes, images, metadata |
277
+ | **PDF** | `.pdf` | Text, tables, images, metadata, OCR support |
278
+ | **eBooks** | `.epub`, `.fb2` | Chapters, metadata, embedded resources |
279
+
280
+ #### Images (OCR-Enabled)
281
+
282
+ | Category | Formats | Features |
283
+ |----------|---------|----------|
284
+ | **Raster** | `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.bmp`, `.tiff`, `.tif` | OCR, table detection, EXIF metadata, dimensions, color space |
285
+ | **Advanced** | `.jp2`, `.jpx`, `.jpm`, `.mj2`, `.pnm`, `.pbm`, `.pgm`, `.ppm` | OCR, table detection, format-specific metadata |
286
+ | **Vector** | `.svg` | DOM parsing, embedded text, graphics metadata |
287
+
288
+ #### Web & Data
289
+
290
+ | Category | Formats | Features |
291
+ |----------|---------|----------|
292
+ | **Markup** | `.html`, `.htm`, `.xhtml`, `.xml`, `.svg` | DOM parsing, metadata (Open Graph, Twitter Card), link extraction |
293
+ | **Structured Data** | `.json`, `.yaml`, `.yml`, `.toml`, `.csv`, `.tsv` | Schema detection, nested structures, validation |
294
+ | **Text & Markdown** | `.txt`, `.md`, `.markdown`, `.rst`, `.org`, `.rtf` | CommonMark, GFM, reStructuredText, Org Mode |
295
+
296
+ #### Email & Archives
297
+
298
+ | Category | Formats | Features |
299
+ |----------|---------|----------|
300
+ | **Email** | `.eml`, `.msg` | Headers, body (HTML/plain), attachments, threading |
301
+ | **Archives** | `.zip`, `.tar`, `.tgz`, `.gz`, `.7z` | File listing, nested archives, metadata |
302
+
303
+ #### Academic & Scientific
304
+
305
+ | Category | Formats | Features |
306
+ |----------|---------|----------|
307
+ | **Citations** | `.bib`, `.biblatex`, `.ris`, `.enw`, `.csl` | Bibliography parsing, citation extraction |
308
+ | **Scientific** | `.tex`, `.latex`, `.typst`, `.jats`, `.ipynb`, `.docbook` | LaTeX, Jupyter notebooks, PubMed JATS |
309
+ | **Documentation** | `.opml`, `.pod`, `.mdoc`, `.troff` | Technical documentation formats |
310
+
311
+ **[Complete Format Reference](https://kreuzberg.dev/reference/formats/)**
312
+
313
+ ### Key Capabilities
314
+
315
+ - **Text Extraction** - Extract all text content with position and formatting information
316
+ - **Metadata Extraction** - Retrieve document properties, creation date, author, etc.
317
+ - **Table Extraction** - Parse tables with structure and cell content preservation
318
+ - **Image Extraction** - Extract embedded images and render page previews
319
+ - **OCR Support** - Integrate multiple OCR backends for scanned documents
320
+
321
+ - **Async/Await** - Non-blocking document processing with concurrent operations
322
+
323
+
324
+ - **Plugin System** - Extensible post-processing for custom text transformation
325
+
326
+
327
+ - **Batch Processing** - Efficiently process multiple documents in parallel
328
+ - **Memory Efficient** - Stream large files without loading entirely into memory
329
+ - **Language Detection** - Detect and support multiple languages in documents
330
+ - **Configuration** - Fine-grained control over extraction behavior
331
+
332
+ ### Performance Characteristics
333
+
334
+ | Format | Speed | Memory | Notes |
335
+ |--------|-------|--------|-------|
336
+ | **PDF (text)** | 10-100 MB/s | ~50MB per doc | Fastest extraction |
337
+ | **Office docs** | 20-200 MB/s | ~100MB per doc | DOCX, XLSX, PPTX |
338
+ | **Images (OCR)** | 1-5 MB/s | Variable | Depends on OCR backend |
339
+ | **Archives** | 5-50 MB/s | ~200MB per doc | ZIP, TAR, etc. |
340
+ | **Web formats** | 50-200 MB/s | Streaming | HTML, XML, JSON |
341
+
342
+
343
+
344
+ ## OCR Support
345
+
346
+ Kreuzberg supports multiple OCR backends for extracting text from scanned documents and images:
347
+
348
+
349
+ - **Tesseract-Wasm**
350
+
351
+
352
+ ### OCR Configuration Example
353
+
354
+ ```ts
355
+ import { enableOcr, extractBytes, initWasm } from "@kreuzberg/wasm";
356
+
357
+ async function extractWithOcr() {
358
+ await initWasm();
359
+
360
+ try {
361
+ await enableOcr();
362
+ console.log("OCR enabled successfully");
363
+ } catch (error) {
364
+ console.error("Failed to enable OCR:", error);
365
+ return;
366
+ }
367
+
368
+ const bytes = new Uint8Array(await fetch("scanned-page.png").then((r) => r.arrayBuffer()));
369
+
370
+ const result = await extractBytes(bytes, "image/png", {
371
+ ocr: {
372
+ backend: "tesseract-wasm",
373
+ language: "eng",
374
+ },
375
+ });
376
+
377
+ console.log("Extracted text:");
378
+ console.log(result.content);
379
+ }
380
+
381
+ extractWithOcr().catch(console.error);
382
+ ```
383
+
384
+
385
+
386
+
387
+ ## Async Support
388
+
389
+ This binding provides full async/await support for non-blocking document processing:
390
+
391
+ ```ts
392
+ import { extractBytes, getWasmCapabilities, initWasm } from "@kreuzberg/wasm";
393
+
394
+ async function extractDocuments(files: Uint8Array[], mimeTypes: string[]) {
395
+ const caps = getWasmCapabilities();
396
+ if (!caps.hasWasm) {
397
+ throw new Error("WebAssembly not supported");
398
+ }
399
+
400
+ await initWasm();
401
+
402
+ const results = await Promise.all(files.map((bytes, index) => extractBytes(bytes, mimeTypes[index])));
403
+
404
+ return results.map((r) => ({
405
+ content: r.content,
406
+ pageCount: r.metadata?.pageCount,
407
+ }));
408
+ }
409
+
410
+ const fileBytes = [new Uint8Array([1, 2, 3])];
411
+ const mimes = ["application/pdf"];
412
+
413
+ extractDocuments(fileBytes, mimes)
414
+ .then((results) => console.log(results))
415
+ .catch(console.error);
416
+ ```
417
+
418
+
419
+
420
+
421
+ ## Plugin System
422
+
423
+ Kreuzberg supports extensible post-processing plugins for custom text transformation and filtering.
424
+
425
+ For detailed plugin documentation, visit [Plugin System Guide](https://kreuzberg.dev/plugins/).
426
+
427
+
428
+
429
+
430
+
431
+
432
+ ## Batch Processing
433
+
434
+ Process multiple documents efficiently:
435
+
436
+ ```ts
437
+ import { extractBytes, initWasm } from "@kreuzberg/wasm";
438
+
439
+ interface DocumentJob {
440
+ name: string;
441
+ bytes: Uint8Array;
442
+ mimeType: string;
443
+ }
444
+
445
+ async function _processBatch(documents: DocumentJob[], concurrency: number = 3) {
446
+ await initWasm();
447
+
448
+ const results: Record<string, string> = {};
449
+ const queue = [...documents];
450
+
451
+ const workers = Array(concurrency)
452
+ .fill(null)
453
+ .map(async () => {
454
+ while (queue.length > 0) {
455
+ const doc = queue.shift();
456
+ if (!doc) break;
457
+
458
+ try {
459
+ const result = await extractBytes(doc.bytes, doc.mimeType);
460
+ results[doc.name] = result.content;
461
+ } catch (error) {
462
+ console.error(`Failed to process ${doc.name}:`, error);
463
+ }
464
+ }
465
+ });
466
+
467
+ await Promise.all(workers);
468
+ return results;
469
+ }
470
+ ```
471
+
472
+
473
+
474
+
475
+ ## Configuration
476
+
477
+ For advanced configuration options including language detection, table extraction, OCR settings, and more:
478
+
479
+ **[Configuration Guide](https://kreuzberg.dev/configuration/)**
480
+
481
+ ## Documentation
482
+
483
+ - **[Official Documentation](https://kreuzberg.dev/)**
484
+ - **[API Reference](https://kreuzberg.dev/reference/api-wasm/)**
485
+ - **[Examples & Guides](https://kreuzberg.dev/guides/)**
486
+
487
+ ## Troubleshooting
488
+
489
+ For common issues and solutions, visit [Troubleshooting Guide](https://kreuzberg.dev/troubleshooting/).
490
+
491
+ ## Contributing
492
+
493
+ Contributions are welcome! See [Contributing Guide](https://github.com/kreuzberg-dev/kreuzberg/blob/main/CONTRIBUTING.md).
494
+
495
+ ## License
496
+
497
+ MIT License - see LICENSE file for details.
498
+
499
+ ## Support
500
+
501
+ - **Discord Community**: [Join our Discord](https://discord.gg/pXxagNK2zN)
502
+ - **GitHub Issues**: [Report bugs](https://github.com/kreuzberg-dev/kreuzberg/issues)
503
+ - **Discussions**: [Ask questions](https://github.com/kreuzberg-dev/kreuzberg/discussions)
@@ -429,7 +429,7 @@ export function getExtensionsForMime(mime_type: string): Array<any>;
429
429
  * console.log(unknownMime); // null
430
430
  * ```
431
431
  */
432
- export function getMimeFromExtension(extension: string): string | undefined;
432
+ export function getMimeFromExtension(extension: string): string;
433
433
 
434
434
  /**
435
435
  * Get module information
@@ -442,7 +442,7 @@ export function get_module_info(): ModuleInfo;
442
442
  */
443
443
  export function init(): void;
444
444
 
445
- export function initThreadPool(num_threads: number): Promise<any>;
445
+ export function initThreadPool(_num_threads: number): Promise<any>;
446
446
 
447
447
  /**
448
448
  * Helper function to initialize the thread pool with error handling
@@ -455,6 +455,15 @@ export function initThreadPool(num_threads: number): Promise<any>;
455
455
  */
456
456
  export function init_thread_pool_safe(num_threads: number): boolean;
457
457
 
458
+ /**
459
+ * Establishes a binding between an external Pdfium WASM module and `pdfium-render`'s WASM module.
460
+ * This function should be called from Javascript once the external Pdfium WASM module has been loaded
461
+ * into the browser. It is essential that this function is called _before_ initializing
462
+ * `pdfium-render` from within Rust code. For an example, see:
463
+ * <https://github.com/ajrcarey/pdfium-render/blob/master/examples/index.html>
464
+ */
465
+ export function initialize_pdfium_render(pdfium_wasm_module: any, local_wasm_module: any, debug: boolean): boolean;
466
+
458
467
  /**
459
468
  * List all registered OCR backend names.
460
469
  *
@@ -586,6 +595,13 @@ export function loadConfigFromString(content: string, format: string): any;
586
595
  */
587
596
  export function normalizeMimeType(mime_type: string): string;
588
597
 
598
+ /**
599
+ * A callback function that can be invoked by Pdfium's `FPDF_LoadCustomDocument()` function,
600
+ * wrapping around `crate::utils::files::read_block_from_callback()` to shuffle data buffers
601
+ * from our WASM memory heap to Pdfium's WASM memory heap as they are loaded.
602
+ */
603
+ export function read_block_from_callback_wasm(param: number, position: number, pBuf: number, size: number): number;
604
+
589
605
  /**
590
606
  * Register a custom OCR backend.
591
607
  *
@@ -746,13 +762,9 @@ export function unregister_validator(name: string): void;
746
762
  */
747
763
  export function version(): string;
748
764
 
749
- export class wbg_rayon_PoolBuilder {
750
- private constructor();
751
- free(): void;
752
- [Symbol.dispose](): void;
753
- numThreads(): number;
754
- build(): void;
755
- receiver(): number;
756
- }
757
-
758
- export function wbg_rayon_start_worker(receiver: number): void;
765
+ /**
766
+ * A callback function that can be invoked by Pdfium's `FPDF_SaveAsCopy()` and `FPDF_SaveWithVersion()`
767
+ * functions, wrapping around `crate::utils::files::write_block_from_callback()` to shuffle data buffers
768
+ * from Pdfium's WASM memory heap to our WASM memory heap as they are written.
769
+ */
770
+ export function write_block_from_callback_wasm(param: number, buf: number, size: number): number;