@kreuzberg/wasm 4.0.7 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +6 -3
  2. package/dist/extraction/batch.d.ts +80 -0
  3. package/dist/extraction/batch.d.ts.map +1 -0
  4. package/dist/extraction/bytes.d.ts +69 -0
  5. package/dist/extraction/bytes.d.ts.map +1 -0
  6. package/dist/extraction/files.d.ts +77 -0
  7. package/dist/extraction/files.d.ts.map +1 -0
  8. package/dist/extraction/index.d.ts +11 -0
  9. package/dist/extraction/index.d.ts.map +1 -0
  10. package/dist/extraction/internal.d.ts +21 -0
  11. package/dist/extraction/internal.d.ts.map +1 -0
  12. package/dist/index.d.ts +9 -323
  13. package/dist/index.d.ts.map +1 -1
  14. package/dist/index.js +677 -591
  15. package/dist/index.js.map +1 -1
  16. package/dist/initialization/pdfium-loader.d.ts +30 -0
  17. package/dist/initialization/pdfium-loader.d.ts.map +1 -0
  18. package/dist/initialization/state.d.ts +100 -0
  19. package/dist/initialization/state.d.ts.map +1 -0
  20. package/dist/initialization/wasm-loader.d.ts +81 -0
  21. package/dist/initialization/wasm-loader.d.ts.map +1 -0
  22. package/dist/ocr/enabler.d.ts +86 -0
  23. package/dist/ocr/enabler.d.ts.map +1 -0
  24. package/dist/pkg/README.md +6 -3
  25. package/dist/pkg/kreuzberg_wasm.d.ts +76 -0
  26. package/dist/pkg/kreuzberg_wasm.js +142 -82
  27. package/dist/pkg/kreuzberg_wasm_bg.js +7 -7
  28. package/dist/pkg/kreuzberg_wasm_bg.wasm +0 -0
  29. package/dist/pkg/kreuzberg_wasm_bg.wasm.d.ts +3 -3
  30. package/dist/pkg/package.json +5 -1
  31. package/dist/runtime.d.ts +22 -2
  32. package/dist/runtime.d.ts.map +1 -1
  33. package/dist/runtime.js +21 -1
  34. package/dist/runtime.js.map +1 -1
  35. package/dist/types.d.ts +75 -0
  36. package/dist/types.d.ts.map +1 -1
  37. package/package.json +6 -6
package/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.0.0" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.1.0" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -33,6 +33,9 @@
33
33
  <a href="https://rubygems.org/gems/kreuzberg">
34
34
  <img src="https://img.shields.io/gem/v/kreuzberg?label=Ruby&color=007ec6" alt="Ruby">
35
35
  </a>
36
+ <a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
37
+ <img src="https://img.shields.io/badge/Docker-007ec6?logo=docker&logoColor=white" alt="Docker">
38
+ </a>
36
39
 
37
40
  <!-- Project Info -->
38
41
  <a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
@@ -184,7 +187,7 @@ interface DocumentJob {
184
187
  mimeType: string;
185
188
  }
186
189
 
187
- async function processBatch(documents: DocumentJob[], concurrency: number = 3) {
190
+ async function _processBatch(documents: DocumentJob[], concurrency: number = 3) {
188
191
  await initWasm();
189
192
 
190
193
  const results: Record<string, string> = {};
@@ -441,7 +444,7 @@ interface DocumentJob {
441
444
  mimeType: string;
442
445
  }
443
446
 
444
- async function processBatch(documents: DocumentJob[], concurrency: number = 3) {
447
+ async function _processBatch(documents: DocumentJob[], concurrency: number = 3) {
445
448
  await initWasm();
446
449
 
447
450
  const results: Record<string, string> = {};
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Batch extraction functions
3
+ *
4
+ * Provides batch processing capabilities for extracting from multiple documents
5
+ * in a single operation for improved efficiency.
6
+ */
7
+ import type { ExtractionConfig as ExtractionConfigType, ExtractionResult } from "../types.d.ts";
8
+ /**
9
+ * Batch extract content from multiple byte arrays asynchronously
10
+ *
11
+ * Extracts content from multiple documents in a single batch operation,
12
+ * allowing for more efficient processing of multiple files.
13
+ *
14
+ * @param files - Array of objects containing data (Uint8Array) and mimeType (string)
15
+ * @param config - Optional extraction configuration applied to all files
16
+ * @returns Promise resolving to array of extraction results
17
+ * @throws {Error} If WASM module is not initialized or extraction fails
18
+ *
19
+ * @example
20
+ * ```typescript
21
+ * const files = [
22
+ * { data: pdfBytes, mimeType: 'application/pdf' },
23
+ * { data: docxBytes, mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }
24
+ * ];
25
+ * const results = await batchExtractBytes(files);
26
+ * results.forEach((result) => console.log(result.content));
27
+ * ```
28
+ */
29
+ export declare function batchExtractBytes(files: Array<{
30
+ data: Uint8Array;
31
+ mimeType: string;
32
+ }>, config?: ExtractionConfigType | null): Promise<ExtractionResult[]>;
33
+ /**
34
+ * Batch extract content from multiple byte arrays synchronously
35
+ *
36
+ * Synchronous version of batchExtractBytes. Extracts content from multiple documents
37
+ * in a single batch operation without async operations.
38
+ *
39
+ * @param files - Array of objects containing data (Uint8Array) and mimeType (string)
40
+ * @param config - Optional extraction configuration applied to all files
41
+ * @returns Array of extraction results
42
+ * @throws {Error} If WASM module is not initialized or extraction fails
43
+ *
44
+ * @example
45
+ * ```typescript
46
+ * const files = [
47
+ * { data: pdfBytes, mimeType: 'application/pdf' },
48
+ * { data: docxBytes, mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }
49
+ * ];
50
+ * const results = batchExtractBytesSync(files);
51
+ * results.forEach((result) => console.log(result.content));
52
+ * ```
53
+ */
54
+ export declare function batchExtractBytesSync(files: Array<{
55
+ data: Uint8Array;
56
+ mimeType: string;
57
+ }>, config?: ExtractionConfigType | null): ExtractionResult[];
58
+ /**
59
+ * Batch extract content from multiple File objects asynchronously
60
+ *
61
+ * Convenience function that converts File objects to Uint8Array and calls batchExtractBytes.
62
+ * Automatically uses the file.type as MIME type if available.
63
+ *
64
+ * @param files - Array of File objects to extract from
65
+ * @param config - Optional extraction configuration applied to all files
66
+ * @returns Promise resolving to array of extraction results
67
+ * @throws {Error} If WASM module is not initialized, files cannot be read, or extraction fails
68
+ *
69
+ * @example
70
+ * ```typescript
71
+ * const fileInput = document.getElementById('files');
72
+ * const files = Array.from(fileInput.files ?? []);
73
+ * const results = await batchExtractFiles(files);
74
+ * results.forEach((result, index) => {
75
+ * console.log(`File ${index}: ${result.content.substring(0, 50)}...`);
76
+ * });
77
+ * ```
78
+ */
79
+ export declare function batchExtractFiles(files: File[], config?: ExtractionConfigType | null): Promise<ExtractionResult[]>;
80
+ //# sourceMappingURL=batch.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"batch.d.ts","sourceRoot":"","sources":["../../typescript/extraction/batch.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,gBAAgB,IAAI,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAG9F;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAsB,iBAAiB,CACtC,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,EACpD,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,EAAE,CAAC,CA6D7B;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,qBAAqB,CACpC,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,EACpD,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,gBAAgB,EAAE,CA6DpB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAsB,iBAAiB,CACtC,KAAK,EAAE,IAAI,EAAE,EACb,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAiC7B"}
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Byte-based extraction functions
3
+ *
4
+ * Provides synchronous and asynchronous extraction functions for document bytes.
5
+ */
6
+ import type { ExtractionConfig as ExtractionConfigType, ExtractionResult } from "../types.d.ts";
7
+ /**
8
+ * Extract content from bytes (document data)
9
+ *
10
+ * Extracts text, metadata, tables, images, and other content from document bytes.
11
+ * Automatically detects document type from MIME type and applies appropriate extraction logic.
12
+ *
13
+ * @param data - The document bytes to extract from
14
+ * @param mimeType - MIME type of the document (e.g., 'application/pdf', 'image/jpeg')
15
+ * @param config - Optional extraction configuration
16
+ * @returns Promise resolving to the extraction result
17
+ * @throws {Error} If WASM module is not initialized or extraction fails
18
+ *
19
+ * @example Extract PDF
20
+ * ```typescript
21
+ * const bytes = new Uint8Array(buffer);
22
+ * const result = await extractBytes(bytes, 'application/pdf');
23
+ * console.log(result.content);
24
+ * console.log(result.tables);
25
+ * ```
26
+ *
27
+ * @example Extract with Configuration
28
+ * ```typescript
29
+ * const result = await extractBytes(bytes, 'application/pdf', {
30
+ * ocr: {
31
+ * backend: 'tesseract',
32
+ * language: 'deu' // German
33
+ * },
34
+ * images: {
35
+ * extractImages: true,
36
+ * targetDpi: 200
37
+ * }
38
+ * });
39
+ * ```
40
+ *
41
+ * @example Extract from File
42
+ * ```typescript
43
+ * const file = inputEvent.target.files[0];
44
+ * const bytes = await fileToUint8Array(file);
45
+ * const result = await extractBytes(bytes, file.type);
46
+ * ```
47
+ */
48
+ export declare function extractBytes(data: Uint8Array, mimeType: string, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
49
+ /**
50
+ * Extract content from bytes synchronously
51
+ *
52
+ * Synchronous version of extractBytes. Performs extraction without async operations.
53
+ * Note: Some extraction features may still be async internally, but the wrapper is synchronous.
54
+ *
55
+ * @param data - The document bytes to extract from
56
+ * @param mimeType - MIME type of the document
57
+ * @param config - Optional extraction configuration
58
+ * @returns The extraction result
59
+ * @throws {Error} If WASM module is not initialized or extraction fails
60
+ *
61
+ * @example
62
+ * ```typescript
63
+ * const bytes = new Uint8Array(buffer);
64
+ * const result = extractBytesSync(bytes, 'application/pdf');
65
+ * console.log(result.content);
66
+ * ```
67
+ */
68
+ export declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: ExtractionConfigType | null): ExtractionResult;
69
+ //# sourceMappingURL=bytes.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bytes.d.ts","sourceRoot":"","sources":["../../typescript/extraction/bytes.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,gBAAgB,IAAI,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAG9F;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AACH,wBAAsB,YAAY,CACjC,IAAI,EAAE,UAAU,EAChB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,CAAC,CA4B3B;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,gBAAgB,CAC/B,IAAI,EAAE,UAAU,EAChB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,gBAAgB,CA4BlB"}
@@ -0,0 +1,77 @@
1
+ /**
2
+ * File-based extraction functions
3
+ *
4
+ * Provides extraction functions for files in filesystem-based environments (Node.js, Deno, Bun)
5
+ * and browser File/Blob objects.
6
+ */
7
+ import type { ExtractionConfig as ExtractionConfigType, ExtractionResult } from "../types.d.ts";
8
+ /**
9
+ * Extract content from a file on the file system
10
+ *
11
+ * Node.js and Deno specific function that reads a file from the file system
12
+ * and extracts content from it. Automatically detects MIME type if not provided.
13
+ *
14
+ * @param path - Path to the file to extract from
15
+ * @param mimeType - Optional MIME type of the file. If not provided, will attempt to detect
16
+ * @param config - Optional extraction configuration
17
+ * @returns Promise resolving to the extraction result
18
+ * @throws {Error} If WASM module is not initialized, file doesn't exist, or extraction fails
19
+ *
20
+ * @example Extract with auto-detection
21
+ * ```typescript
22
+ * const result = await extractFile('./document.pdf');
23
+ * console.log(result.content);
24
+ * ```
25
+ *
26
+ * @example Extract with explicit MIME type
27
+ * ```typescript
28
+ * const result = await extractFile('./document.docx', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
29
+ * ```
30
+ *
31
+ * @example Extract from Node.js with config
32
+ * ```typescript
33
+ * import { extractFile } from '@kreuzberg/wasm';
34
+ * import { readFile } from 'fs/promises';
35
+ *
36
+ * const result = await extractFile('./report.xlsx', null, {
37
+ * chunking: {
38
+ * maxChars: 1000
39
+ * }
40
+ * });
41
+ * ```
42
+ */
43
+ export declare function extractFile(path: string, mimeType?: string | null, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
44
+ /**
45
+ * Extract content from a File or Blob (browser-friendly wrapper)
46
+ *
47
+ * Convenience function that wraps fileToUint8Array and extractBytes,
48
+ * providing a streamlined API for browser applications handling file inputs.
49
+ *
50
+ * @param file - The File or Blob to extract from
51
+ * @param mimeType - Optional MIME type. If not provided, uses file.type if available
52
+ * @param config - Optional extraction configuration
53
+ * @returns Promise resolving to the extraction result
54
+ * @throws {Error} If WASM module is not initialized or extraction fails
55
+ *
56
+ * @example Simple file extraction
57
+ * ```typescript
58
+ * const fileInput = document.getElementById('file');
59
+ * fileInput.addEventListener('change', async (e) => {
60
+ * const file = e.target.files?.[0];
61
+ * if (file) {
62
+ * const result = await extractFromFile(file);
63
+ * console.log(result.content);
64
+ * }
65
+ * });
66
+ * ```
67
+ *
68
+ * @example With configuration
69
+ * ```typescript
70
+ * const result = await extractFromFile(file, file.type, {
71
+ * chunking: { maxChars: 1000 },
72
+ * images: { extractImages: true }
73
+ * });
74
+ * ```
75
+ */
76
+ export declare function extractFromFile(file: File | Blob, mimeType?: string | null, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
77
+ //# sourceMappingURL=files.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"files.d.ts","sourceRoot":"","sources":["../../typescript/extraction/files.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,gBAAgB,IAAI,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAI9F;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,wBAAsB,WAAW,CAChC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,EACxB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,CAAC,CAmD3B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,eAAe,CACpC,IAAI,EAAE,IAAI,GAAG,IAAI,EACjB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,EACxB,MAAM,CAAC,EAAE,oBAAoB,GAAG,IAAI,GAClC,OAAO,CAAC,gBAAgB,CAAC,CAiB3B"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Extraction module
3
+ *
4
+ * Provides comprehensive extraction functionality for various document formats.
5
+ * Includes byte-based, file-based, and batch processing capabilities.
6
+ */
7
+ export type { ExtractionConfig, ExtractionResult } from "../types.d.ts";
8
+ export { batchExtractBytes, batchExtractBytesSync, batchExtractFiles } from "./batch.d.ts";
9
+ export { extractBytes, extractBytesSync } from "./bytes.d.ts";
10
+ export { extractFile, extractFromFile } from "./files.d.ts";
11
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../typescript/extraction/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,YAAY,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AACtE,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AACzF,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC5D,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC"}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Internal extraction module helpers
3
+ *
4
+ * Provides internal utilities and access to the WASM module state.
5
+ * Re-exports state management from the centralized state module.
6
+ */
7
+ import { type WasmModule } from "../initialization/state.d.ts";
8
+ /**
9
+ * Get the WASM module
10
+ *
11
+ * @returns The WASM module
12
+ * @throws {Error} If WASM module is not loaded
13
+ */
14
+ export declare function getWasmModule(): WasmModule;
15
+ /**
16
+ * Check if WASM module is initialized
17
+ *
18
+ * @returns True if WASM module is initialized
19
+ */
20
+ export declare function isInitialized(): boolean;
21
+ //# sourceMappingURL=internal.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"internal.d.ts","sourceRoot":"","sources":["../../typescript/extraction/internal.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAGN,KAAK,UAAU,EACf,MAAM,4BAA4B,CAAC;AAEpC;;;;;GAKG;AACH,wBAAgB,aAAa,IAAI,UAAU,CAO1C;AAED;;;;GAIG;AACH,wBAAgB,aAAa,IAAI,OAAO,CAEvC"}
package/dist/index.d.ts CHANGED
@@ -57,7 +57,7 @@
57
57
  * ## Runtime Detection
58
58
  *
59
59
  * ```typescript
60
- * import { detectRuntime, getWasmCapabilities } from '@kreuzberg/wasm/runtime';
60
+ * import { detectRuntime, getWasmCapabilities } from '@kreuzberg/wasm';
61
61
  *
62
62
  * const runtime = detectRuntime();
63
63
  * const caps = getWasmCapabilities();
@@ -93,330 +93,16 @@
93
93
  * const result = await extractBytes(bytes, 'application/pdf', config);
94
94
  * ```
95
95
  */
96
- import type { ExtractionConfig as ExtractionConfigType, ExtractionResult } from "./types.d.ts";
96
+ export type * from "./types.d.ts";
97
+ export { initializePdfiumAsync } from "./initialization/pdfium-loader.d.ts";
98
+ export { getInitializationError, getVersion, getWasmModule, initWasm, isInitialized, type ModuleInfo, type WasmModule, } from "./initialization/wasm-loader.d.ts";
99
+ export { extractBytes, extractBytesSync } from "./extraction/bytes.d.ts";
100
+ export { extractFile, extractFromFile } from "./extraction/files.d.ts";
101
+ export { batchExtractBytes, batchExtractBytesSync, batchExtractFiles, } from "./extraction/batch.d.ts";
102
+ export { enableOcr } from "./ocr/enabler.d.ts";
97
103
  export { configToJS, fileToUint8Array, isValidExtractionResult, jsToExtractionResult, wrapWasmError, } from "./adapters/wasm-adapter.d.ts";
98
104
  export { clearOcrBackends, getOcrBackend, listOcrBackends, registerOcrBackend, unregisterOcrBackend, } from "./ocr/registry.d.ts";
99
105
  export { TesseractWasmBackend } from "./ocr/tesseract-wasm-backend.d.ts";
100
106
  export { clearPostProcessors, clearValidators, getPostProcessor, getValidator, listPostProcessors, listValidators, type PostProcessor, registerPostProcessor, registerValidator, unregisterPostProcessor, unregisterValidator, type Validator, } from "./plugin-registry.d.ts";
101
- export { detectRuntime, getRuntimeInfo, getRuntimeVersion, getWasmCapabilities, hasBigInt, hasBlob, hasFileApi, hasModuleWorkers, hasSharedArrayBuffer, hasWasm, hasWasmStreaming, hasWorkers, isBrowser, isBun, isDeno, isNode, isServerEnvironment, isWebEnvironment, type RuntimeType, type WasmCapabilities, } from "./runtime.d.ts";
102
- export type * from "./types.d.ts";
103
- export type { Chunk, ChunkingConfig, ChunkMetadata, ExtractedImage, ExtractionConfig, ExtractionResult, ImageExtractionConfig, LanguageDetectionConfig, Metadata, OcrBackendProtocol, OcrConfig, PageContent, PageExtractionConfig, PdfConfig, PostProcessorConfig, Table, TesseractConfig, TokenReductionConfig, } from "./types.d.ts";
104
- export declare function initWasm(): Promise<void>;
105
- /**
106
- * Check if WASM module is initialized
107
- *
108
- * @returns True if WASM module is initialized, false otherwise
109
- *
110
- * @example
111
- * ```typescript
112
- * if (!isInitialized()) {
113
- * await initWasm();
114
- * }
115
- * ```
116
- */
117
- export declare function isInitialized(): boolean;
118
- /**
119
- * Get WASM module version
120
- *
121
- * @throws {Error} If WASM module is not initialized
122
- * @returns The version string of the WASM module
123
- *
124
- * @example
125
- * ```typescript
126
- * const version = getVersion();
127
- * console.log(`Using Kreuzberg ${version}`);
128
- * ```
129
- */
130
- export declare function getVersion(): string;
131
- /**
132
- * Get initialization error if module failed to load
133
- *
134
- * @returns The error that occurred during initialization, or null if no error
135
- *
136
- * @internal
137
- */
138
- export declare function getInitializationError(): Error | null;
139
- /**
140
- * Extract content from bytes (document data)
141
- *
142
- * Extracts text, metadata, tables, images, and other content from document bytes.
143
- * Automatically detects document type from MIME type and applies appropriate extraction logic.
144
- *
145
- * @param data - The document bytes to extract from
146
- * @param mimeType - MIME type of the document (e.g., 'application/pdf', 'image/jpeg')
147
- * @param config - Optional extraction configuration
148
- * @returns Promise resolving to the extraction result
149
- * @throws {Error} If WASM module is not initialized or extraction fails
150
- *
151
- * @example Extract PDF
152
- * ```typescript
153
- * const bytes = new Uint8Array(buffer);
154
- * const result = await extractBytes(bytes, 'application/pdf');
155
- * console.log(result.content);
156
- * console.log(result.tables);
157
- * ```
158
- *
159
- * @example Extract with Configuration
160
- * ```typescript
161
- * const result = await extractBytes(bytes, 'application/pdf', {
162
- * ocr: {
163
- * backend: 'tesseract',
164
- * language: 'deu' // German
165
- * },
166
- * images: {
167
- * extractImages: true,
168
- * targetDpi: 200
169
- * }
170
- * });
171
- * ```
172
- *
173
- * @example Extract from File
174
- * ```typescript
175
- * const file = inputEvent.target.files[0];
176
- * const bytes = await fileToUint8Array(file);
177
- * const result = await extractBytes(bytes, file.type);
178
- * ```
179
- */
180
- export declare function extractBytes(data: Uint8Array, mimeType: string, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
181
- /**
182
- * Extract content from a file on the file system
183
- *
184
- * Node.js and Deno specific function that reads a file from the file system
185
- * and extracts content from it. Automatically detects MIME type if not provided.
186
- *
187
- * @param path - Path to the file to extract from
188
- * @param mimeType - Optional MIME type of the file. If not provided, will attempt to detect
189
- * @param config - Optional extraction configuration
190
- * @returns Promise resolving to the extraction result
191
- * @throws {Error} If WASM module is not initialized, file doesn't exist, or extraction fails
192
- *
193
- * @example Extract with auto-detection
194
- * ```typescript
195
- * const result = await extractFile('./document.pdf');
196
- * console.log(result.content);
197
- * ```
198
- *
199
- * @example Extract with explicit MIME type
200
- * ```typescript
201
- * const result = await extractFile('./document.docx', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
202
- * ```
203
- *
204
- * @example Extract from Node.js with config
205
- * ```typescript
206
- * import { extractFile } from '@kreuzberg/wasm';
207
- * import { readFile } from 'fs/promises';
208
- *
209
- * const result = await extractFile('./report.xlsx', null, {
210
- * chunking: {
211
- * maxChars: 1000
212
- * }
213
- * });
214
- * ```
215
- */
216
- export declare function extractFile(path: string, mimeType?: string | null, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
217
- /**
218
- * Extract content from a File or Blob (browser-friendly wrapper)
219
- *
220
- * Convenience function that wraps fileToUint8Array and extractBytes,
221
- * providing a streamlined API for browser applications handling file inputs.
222
- *
223
- * @param file - The File or Blob to extract from
224
- * @param mimeType - Optional MIME type. If not provided, uses file.type if available
225
- * @param config - Optional extraction configuration
226
- * @returns Promise resolving to the extraction result
227
- * @throws {Error} If WASM module is not initialized or extraction fails
228
- *
229
- * @example Simple file extraction
230
- * ```typescript
231
- * const fileInput = document.getElementById('file');
232
- * fileInput.addEventListener('change', async (e) => {
233
- * const file = e.target.files?.[0];
234
- * if (file) {
235
- * const result = await extractFromFile(file);
236
- * console.log(result.content);
237
- * }
238
- * });
239
- * ```
240
- *
241
- * @example With configuration
242
- * ```typescript
243
- * const result = await extractFromFile(file, file.type, {
244
- * chunking: { maxChars: 1000 },
245
- * images: { extractImages: true }
246
- * });
247
- * ```
248
- */
249
- export declare function extractFromFile(file: File | Blob, mimeType?: string | null, config?: ExtractionConfigType | null): Promise<ExtractionResult>;
250
- /**
251
- * Extract content from bytes synchronously
252
- *
253
- * Synchronous version of extractBytes. Performs extraction without async operations.
254
- * Note: Some extraction features may still be async internally, but the wrapper is synchronous.
255
- *
256
- * @param data - The document bytes to extract from
257
- * @param mimeType - MIME type of the document
258
- * @param config - Optional extraction configuration
259
- * @returns The extraction result
260
- * @throws {Error} If WASM module is not initialized or extraction fails
261
- *
262
- * @example
263
- * ```typescript
264
- * const bytes = new Uint8Array(buffer);
265
- * const result = extractBytesSync(bytes, 'application/pdf');
266
- * console.log(result.content);
267
- * ```
268
- */
269
- export declare function extractBytesSync(data: Uint8Array, mimeType: string, config?: ExtractionConfigType | null): ExtractionResult;
270
- /**
271
- * Batch extract content from multiple byte arrays asynchronously
272
- *
273
- * Extracts content from multiple documents in a single batch operation,
274
- * allowing for more efficient processing of multiple files.
275
- *
276
- * @param files - Array of objects containing data (Uint8Array) and mimeType (string)
277
- * @param config - Optional extraction configuration applied to all files
278
- * @returns Promise resolving to array of extraction results
279
- * @throws {Error} If WASM module is not initialized or extraction fails
280
- *
281
- * @example
282
- * ```typescript
283
- * const files = [
284
- * { data: pdfBytes, mimeType: 'application/pdf' },
285
- * { data: docxBytes, mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }
286
- * ];
287
- * const results = await batchExtractBytes(files);
288
- * results.forEach((result) => console.log(result.content));
289
- * ```
290
- */
291
- export declare function batchExtractBytes(files: Array<{
292
- data: Uint8Array;
293
- mimeType: string;
294
- }>, config?: ExtractionConfigType | null): Promise<ExtractionResult[]>;
295
- /**
296
- * Batch extract content from multiple byte arrays synchronously
297
- *
298
- * Synchronous version of batchExtractBytes. Extracts content from multiple documents
299
- * in a single batch operation without async operations.
300
- *
301
- * @param files - Array of objects containing data (Uint8Array) and mimeType (string)
302
- * @param config - Optional extraction configuration applied to all files
303
- * @returns Array of extraction results
304
- * @throws {Error} If WASM module is not initialized or extraction fails
305
- *
306
- * @example
307
- * ```typescript
308
- * const files = [
309
- * { data: pdfBytes, mimeType: 'application/pdf' },
310
- * { data: docxBytes, mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }
311
- * ];
312
- * const results = batchExtractBytesSync(files);
313
- * results.forEach((result) => console.log(result.content));
314
- * ```
315
- */
316
- export declare function batchExtractBytesSync(files: Array<{
317
- data: Uint8Array;
318
- mimeType: string;
319
- }>, config?: ExtractionConfigType | null): ExtractionResult[];
320
- /**
321
- * Batch extract content from multiple File objects asynchronously
322
- *
323
- * Convenience function that converts File objects to Uint8Array and calls batchExtractBytes.
324
- * Automatically uses the file.type as MIME type if available.
325
- *
326
- * @param files - Array of File objects to extract from
327
- * @param config - Optional extraction configuration applied to all files
328
- * @returns Promise resolving to array of extraction results
329
- * @throws {Error} If WASM module is not initialized, files cannot be read, or extraction fails
330
- *
331
- * @example
332
- * ```typescript
333
- * const fileInput = document.getElementById('files');
334
- * const files = Array.from(fileInput.files ?? []);
335
- * const results = await batchExtractFiles(files);
336
- * results.forEach((result, index) => {
337
- * console.log(`File ${index}: ${result.content.substring(0, 50)}...`);
338
- * });
339
- * ```
340
- */
341
- export declare function batchExtractFiles(files: File[], config?: ExtractionConfigType | null): Promise<ExtractionResult[]>;
342
- /**
343
- * Enable OCR functionality with tesseract-wasm backend
344
- *
345
- * Convenience function that automatically initializes and registers the Tesseract WASM backend.
346
- * This is the recommended approach for enabling OCR in WASM-based applications.
347
- *
348
- * ## Browser Requirement
349
- *
350
- * This function requires a browser environment with support for:
351
- * - WebWorkers (for Tesseract processing)
352
- * - createImageBitmap (for image conversion)
353
- * - Blob API
354
- *
355
- * ## Network Requirement
356
- *
357
- * Training data will be loaded from jsDelivr CDN on first use of each language.
358
- * Ensure network access to cdn.jsdelivr.net is available.
359
- *
360
- * @throws {Error} If not in browser environment or tesseract-wasm is not available
361
- *
362
- * @example Basic Usage
363
- * ```typescript
364
- * import { enableOcr, extractBytes, initWasm } from '@kreuzberg/wasm';
365
- *
366
- * async function main() {
367
- * // Initialize WASM module
368
- * await initWasm();
369
- *
370
- * // Enable OCR with tesseract-wasm
371
- * await enableOcr();
372
- *
373
- * // Now you can use OCR in extraction
374
- * const imageBytes = new Uint8Array(buffer);
375
- * const result = await extractBytes(imageBytes, 'image/png', {
376
- * ocr: { backend: 'tesseract-wasm', language: 'eng' }
377
- * });
378
- *
379
- * console.log(result.content); // Extracted text
380
- * }
381
- *
382
- * main().catch(console.error);
383
- * ```
384
- *
385
- * @example With Progress Tracking
386
- * ```typescript
387
- * import { enableOcr, TesseractWasmBackend } from '@kreuzberg/wasm';
388
- *
389
- * async function setupOcrWithProgress() {
390
- * const backend = new TesseractWasmBackend();
391
- * backend.setProgressCallback((progress) => {
392
- * console.log(`OCR Progress: ${progress}%`);
393
- * updateProgressBar(progress);
394
- * });
395
- *
396
- * await backend.initialize();
397
- * registerOcrBackend(backend);
398
- * }
399
- *
400
- * setupOcrWithProgress().catch(console.error);
401
- * ```
402
- *
403
- * @example Multiple Languages
404
- * ```typescript
405
- * import { enableOcr, extractBytes, initWasm } from '@kreuzberg/wasm';
406
- *
407
- * await initWasm();
408
- * await enableOcr();
409
- *
410
- * // Extract English text
411
- * const englishResult = await extractBytes(engImageBytes, 'image/png', {
412
- * ocr: { backend: 'tesseract-wasm', language: 'eng' }
413
- * });
414
- *
415
- * // Extract German text - model is cached after first use
416
- * const germanResult = await extractBytes(deImageBytes, 'image/png', {
417
- * ocr: { backend: 'tesseract-wasm', language: 'deu' }
418
- * });
419
- * ```
420
- */
421
- export declare function enableOcr(): Promise<void>;
107
+ export { detectRuntime, getRuntimeInfo, getRuntimeVersion, getWasmCapabilities, hasBigInt, hasBlob, hasFileApi, hasModuleWorkers, hasSharedArrayBuffer, hasWasm, hasWasmStreaming, hasWorkers, isBrowser, isBun, isCloudflareWorkers, isDeno, isEdgeEnvironment, isEdgeRuntime, isNode, isServerEnvironment, isWebEnvironment, type RuntimeType, type WasmCapabilities, } from "./runtime.d.ts";
422
108
  //# sourceMappingURL=index.d.ts.map