@kreuzberg/wasm 4.0.0-rc.6 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +7 -0
- package/README.md +317 -801
- package/dist/adapters/wasm-adapter.d.ts +7 -10
- package/dist/adapters/wasm-adapter.d.ts.map +1 -0
- package/dist/adapters/wasm-adapter.js +53 -54
- package/dist/adapters/wasm-adapter.js.map +1 -1
- package/dist/index.d.ts +23 -67
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1102 -104
- package/dist/index.js.map +1 -1
- package/dist/ocr/registry.d.ts +7 -10
- package/dist/ocr/registry.d.ts.map +1 -0
- package/dist/ocr/registry.js +9 -28
- package/dist/ocr/registry.js.map +1 -1
- package/dist/ocr/tesseract-wasm-backend.d.ts +3 -6
- package/dist/ocr/tesseract-wasm-backend.d.ts.map +1 -0
- package/dist/ocr/tesseract-wasm-backend.js +8 -83
- package/dist/ocr/tesseract-wasm-backend.js.map +1 -1
- package/dist/pdfium.js +77 -0
- package/dist/pkg/LICENSE +7 -0
- package/dist/pkg/README.md +498 -0
- package/dist/{kreuzberg_wasm.d.ts → pkg/kreuzberg_wasm.d.ts} +24 -12
- package/dist/{kreuzberg_wasm.js → pkg/kreuzberg_wasm.js} +224 -233
- package/dist/pkg/kreuzberg_wasm_bg.js +1871 -0
- package/dist/{kreuzberg_wasm_bg.wasm → pkg/kreuzberg_wasm_bg.wasm} +0 -0
- package/dist/{kreuzberg_wasm_bg.wasm.d.ts → pkg/kreuzberg_wasm_bg.wasm.d.ts} +10 -13
- package/dist/pkg/package.json +27 -0
- package/dist/plugin-registry.d.ts +246 -0
- package/dist/plugin-registry.d.ts.map +1 -0
- package/dist/runtime.d.ts +21 -22
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +21 -41
- package/dist/runtime.js.map +1 -1
- package/dist/types.d.ts +363 -0
- package/dist/types.d.ts.map +1 -0
- package/package.json +34 -51
- package/dist/adapters/wasm-adapter.d.mts +0 -121
- package/dist/adapters/wasm-adapter.mjs +0 -221
- package/dist/adapters/wasm-adapter.mjs.map +0 -1
- package/dist/index.d.mts +0 -466
- package/dist/index.mjs +0 -384
- package/dist/index.mjs.map +0 -1
- package/dist/kreuzberg_wasm.d.mts +0 -758
- package/dist/kreuzberg_wasm.mjs +0 -48
- package/dist/ocr/registry.d.mts +0 -102
- package/dist/ocr/registry.mjs +0 -70
- package/dist/ocr/registry.mjs.map +0 -1
- package/dist/ocr/tesseract-wasm-backend.d.mts +0 -257
- package/dist/ocr/tesseract-wasm-backend.mjs +0 -424
- package/dist/ocr/tesseract-wasm-backend.mjs.map +0 -1
- package/dist/runtime.d.mts +0 -256
- package/dist/runtime.mjs +0 -152
- package/dist/runtime.mjs.map +0 -1
- package/dist/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +0 -107
- package/dist/types-GJVIvbPy.d.mts +0 -221
- package/dist/types-GJVIvbPy.d.ts +0 -221
package/dist/index.js
CHANGED
|
@@ -1,88 +1,1020 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
1
|
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
2
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var
|
|
7
|
-
|
|
3
|
+
var __esm = (fn, res) => function __init() {
|
|
4
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
5
|
+
};
|
|
8
6
|
var __export = (target, all) => {
|
|
9
7
|
for (var name in all)
|
|
10
8
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
9
|
};
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
|
|
11
|
+
// typescript/pdfium.js
|
|
12
|
+
var pdfium_exports = {};
|
|
13
|
+
__export(pdfium_exports, {
|
|
14
|
+
default: () => initPdfium
|
|
15
|
+
});
|
|
16
|
+
async function initPdfium() {
|
|
17
|
+
return {
|
|
18
|
+
// Dummy implementation for testing
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
var init_pdfium = __esm({
|
|
22
|
+
"typescript/pdfium.js"() {
|
|
23
|
+
"use strict";
|
|
17
24
|
}
|
|
18
|
-
return to;
|
|
19
|
-
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
-
var index_exports = {};
|
|
30
|
-
__export(index_exports, {
|
|
31
|
-
TesseractWasmBackend: () => import_tesseract_wasm_backend2.TesseractWasmBackend,
|
|
32
|
-
batchExtractBytes: () => batchExtractBytes,
|
|
33
|
-
batchExtractBytesSync: () => batchExtractBytesSync,
|
|
34
|
-
batchExtractFiles: () => batchExtractFiles,
|
|
35
|
-
clearOcrBackends: () => import_registry2.clearOcrBackends,
|
|
36
|
-
configToJS: () => import_wasm_adapter2.configToJS,
|
|
37
|
-
detectRuntime: () => import_runtime2.detectRuntime,
|
|
38
|
-
enableOcr: () => enableOcr,
|
|
39
|
-
extractBytes: () => extractBytes,
|
|
40
|
-
extractBytesSync: () => extractBytesSync,
|
|
41
|
-
extractFile: () => extractFile,
|
|
42
|
-
extractFromFile: () => extractFromFile,
|
|
43
|
-
fileToUint8Array: () => import_wasm_adapter2.fileToUint8Array,
|
|
44
|
-
getInitializationError: () => getInitializationError,
|
|
45
|
-
getOcrBackend: () => import_registry2.getOcrBackend,
|
|
46
|
-
getRuntimeInfo: () => import_runtime2.getRuntimeInfo,
|
|
47
|
-
getRuntimeVersion: () => import_runtime2.getRuntimeVersion,
|
|
48
|
-
getVersion: () => getVersion,
|
|
49
|
-
getWasmCapabilities: () => import_runtime2.getWasmCapabilities,
|
|
50
|
-
hasBigInt: () => import_runtime2.hasBigInt,
|
|
51
|
-
hasBlob: () => import_runtime2.hasBlob,
|
|
52
|
-
hasFileApi: () => import_runtime2.hasFileApi,
|
|
53
|
-
hasModuleWorkers: () => import_runtime2.hasModuleWorkers,
|
|
54
|
-
hasSharedArrayBuffer: () => import_runtime2.hasSharedArrayBuffer,
|
|
55
|
-
hasWasm: () => import_runtime2.hasWasm,
|
|
56
|
-
hasWasmStreaming: () => import_runtime2.hasWasmStreaming,
|
|
57
|
-
hasWorkers: () => import_runtime2.hasWorkers,
|
|
58
|
-
initWasm: () => initWasm,
|
|
59
|
-
isBrowser: () => import_runtime2.isBrowser,
|
|
60
|
-
isBun: () => import_runtime2.isBun,
|
|
61
|
-
isDeno: () => import_runtime2.isDeno,
|
|
62
|
-
isInitialized: () => isInitialized,
|
|
63
|
-
isNode: () => import_runtime2.isNode,
|
|
64
|
-
isServerEnvironment: () => import_runtime2.isServerEnvironment,
|
|
65
|
-
isValidExtractionResult: () => import_wasm_adapter2.isValidExtractionResult,
|
|
66
|
-
isWebEnvironment: () => import_runtime2.isWebEnvironment,
|
|
67
|
-
jsToExtractionResult: () => import_wasm_adapter2.jsToExtractionResult,
|
|
68
|
-
listOcrBackends: () => import_registry2.listOcrBackends,
|
|
69
|
-
registerOcrBackend: () => import_registry2.registerOcrBackend,
|
|
70
|
-
unregisterOcrBackend: () => import_registry2.unregisterOcrBackend,
|
|
71
|
-
wrapWasmError: () => import_wasm_adapter2.wrapWasmError
|
|
72
25
|
});
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
var
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
26
|
+
|
|
27
|
+
// typescript/adapters/wasm-adapter.ts
|
|
28
|
+
var MAX_FILE_SIZE = 512 * 1024 * 1024;
|
|
29
|
+
function isNumberOrNull(value) {
|
|
30
|
+
return typeof value === "number" || value === null;
|
|
31
|
+
}
|
|
32
|
+
function isStringOrNull(value) {
|
|
33
|
+
return typeof value === "string" || value === null;
|
|
34
|
+
}
|
|
35
|
+
function isBoolean(value) {
|
|
36
|
+
return typeof value === "boolean";
|
|
37
|
+
}
|
|
38
|
+
async function fileToUint8Array(file) {
|
|
39
|
+
try {
|
|
40
|
+
if (file.size > MAX_FILE_SIZE) {
|
|
41
|
+
throw new Error(
|
|
42
|
+
`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
const arrayBuffer = await file.arrayBuffer();
|
|
46
|
+
return new Uint8Array(arrayBuffer);
|
|
47
|
+
} catch (error) {
|
|
48
|
+
throw new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
function configToJS(config) {
|
|
52
|
+
if (!config) {
|
|
53
|
+
return {};
|
|
54
|
+
}
|
|
55
|
+
const normalized = {};
|
|
56
|
+
const normalizeValue = (value) => {
|
|
57
|
+
if (value === null || value === void 0) {
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
if (typeof value === "object") {
|
|
61
|
+
if (Array.isArray(value)) {
|
|
62
|
+
return value.map(normalizeValue);
|
|
63
|
+
}
|
|
64
|
+
const obj = value;
|
|
65
|
+
const normalized2 = {};
|
|
66
|
+
for (const [key, val] of Object.entries(obj)) {
|
|
67
|
+
const normalizedVal = normalizeValue(val);
|
|
68
|
+
if (normalizedVal !== null && normalizedVal !== void 0) {
|
|
69
|
+
normalized2[key] = normalizedVal;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return Object.keys(normalized2).length > 0 ? normalized2 : null;
|
|
73
|
+
}
|
|
74
|
+
return value;
|
|
75
|
+
};
|
|
76
|
+
for (const [key, value] of Object.entries(config)) {
|
|
77
|
+
const normalizedValue = normalizeValue(value);
|
|
78
|
+
if (normalizedValue !== null && normalizedValue !== void 0) {
|
|
79
|
+
normalized[key] = normalizedValue;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return normalized;
|
|
83
|
+
}
|
|
84
|
+
function jsToExtractionResult(jsValue) {
|
|
85
|
+
if (!jsValue || typeof jsValue !== "object") {
|
|
86
|
+
throw new Error("Invalid extraction result: value is not an object");
|
|
87
|
+
}
|
|
88
|
+
const result = jsValue;
|
|
89
|
+
const mimeType = typeof result.mimeType === "string" ? result.mimeType : typeof result.mime_type === "string" ? result.mime_type : null;
|
|
90
|
+
if (typeof result.content !== "string") {
|
|
91
|
+
throw new Error("Invalid extraction result: missing or invalid content");
|
|
92
|
+
}
|
|
93
|
+
if (typeof mimeType !== "string") {
|
|
94
|
+
throw new Error("Invalid extraction result: missing or invalid mimeType");
|
|
95
|
+
}
|
|
96
|
+
if (!result.metadata || typeof result.metadata !== "object") {
|
|
97
|
+
throw new Error("Invalid extraction result: missing or invalid metadata");
|
|
98
|
+
}
|
|
99
|
+
const tables = [];
|
|
100
|
+
if (Array.isArray(result.tables)) {
|
|
101
|
+
for (const table of result.tables) {
|
|
102
|
+
if (table && typeof table === "object") {
|
|
103
|
+
const t = table;
|
|
104
|
+
if (Array.isArray(t.cells) && t.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === "string")) && typeof t.markdown === "string" && typeof t.pageNumber === "number") {
|
|
105
|
+
tables.push({
|
|
106
|
+
cells: t.cells,
|
|
107
|
+
markdown: t.markdown,
|
|
108
|
+
pageNumber: t.pageNumber
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
const chunks = Array.isArray(result.chunks) ? result.chunks.map((chunk) => {
|
|
115
|
+
if (!chunk || typeof chunk !== "object") {
|
|
116
|
+
throw new Error("Invalid chunk structure");
|
|
117
|
+
}
|
|
118
|
+
const c = chunk;
|
|
119
|
+
if (typeof c.content !== "string") {
|
|
120
|
+
throw new Error("Invalid chunk: missing content");
|
|
121
|
+
}
|
|
122
|
+
if (!c.metadata || typeof c.metadata !== "object") {
|
|
123
|
+
throw new Error("Invalid chunk: missing metadata");
|
|
124
|
+
}
|
|
125
|
+
const metadata = c.metadata;
|
|
126
|
+
let embedding = null;
|
|
127
|
+
if (Array.isArray(c.embedding)) {
|
|
128
|
+
if (!c.embedding.every((item) => typeof item === "number")) {
|
|
129
|
+
throw new Error("Invalid chunk: embedding must contain only numbers");
|
|
130
|
+
}
|
|
131
|
+
embedding = c.embedding;
|
|
132
|
+
}
|
|
133
|
+
const coerceToNumber = (value, fieldName) => {
|
|
134
|
+
if (typeof value === "number") {
|
|
135
|
+
return value;
|
|
136
|
+
}
|
|
137
|
+
if (typeof value === "bigint") {
|
|
138
|
+
return Number(value);
|
|
139
|
+
}
|
|
140
|
+
if (typeof value === "string") {
|
|
141
|
+
const parsed = parseInt(value, 10);
|
|
142
|
+
if (Number.isNaN(parsed)) {
|
|
143
|
+
throw new Error(`Invalid chunk metadata: ${fieldName} must be a valid number, got "${value}"`);
|
|
144
|
+
}
|
|
145
|
+
return parsed;
|
|
146
|
+
}
|
|
147
|
+
throw new Error(`Invalid chunk metadata: ${fieldName} must be a number, got ${typeof value}`);
|
|
148
|
+
};
|
|
149
|
+
const charStart = coerceToNumber(
|
|
150
|
+
metadata.charStart ?? metadata.char_start ?? metadata.byteStart ?? metadata.byte_start,
|
|
151
|
+
"charStart"
|
|
152
|
+
);
|
|
153
|
+
const charEnd = coerceToNumber(
|
|
154
|
+
metadata.charEnd ?? metadata.char_end ?? metadata.byteEnd ?? metadata.byte_end,
|
|
155
|
+
"charEnd"
|
|
156
|
+
);
|
|
157
|
+
const chunkIndex = coerceToNumber(metadata.chunkIndex ?? metadata.chunk_index, "chunkIndex");
|
|
158
|
+
const totalChunks = coerceToNumber(metadata.totalChunks ?? metadata.total_chunks, "totalChunks");
|
|
159
|
+
let tokenCount = null;
|
|
160
|
+
const tokenCountValue = metadata.tokenCount ?? metadata.token_count;
|
|
161
|
+
if (tokenCountValue !== null && tokenCountValue !== void 0) {
|
|
162
|
+
tokenCount = coerceToNumber(tokenCountValue, "tokenCount");
|
|
163
|
+
}
|
|
164
|
+
return {
|
|
165
|
+
content: c.content,
|
|
166
|
+
embedding,
|
|
167
|
+
metadata: {
|
|
168
|
+
charStart,
|
|
169
|
+
charEnd,
|
|
170
|
+
tokenCount,
|
|
171
|
+
chunkIndex,
|
|
172
|
+
totalChunks
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
}) : null;
|
|
176
|
+
const images = Array.isArray(result.images) ? result.images.map((image) => {
|
|
177
|
+
if (!image || typeof image !== "object") {
|
|
178
|
+
throw new Error("Invalid image structure");
|
|
179
|
+
}
|
|
180
|
+
const img = image;
|
|
181
|
+
if (!(img.data instanceof Uint8Array)) {
|
|
182
|
+
throw new Error("Invalid image: data must be Uint8Array");
|
|
183
|
+
}
|
|
184
|
+
if (typeof img.format !== "string") {
|
|
185
|
+
throw new Error("Invalid image: missing format");
|
|
186
|
+
}
|
|
187
|
+
if (typeof img.imageIndex !== "number") {
|
|
188
|
+
throw new Error("Invalid image: imageIndex must be a number");
|
|
189
|
+
}
|
|
190
|
+
if (!isNumberOrNull(img.pageNumber)) {
|
|
191
|
+
throw new Error("Invalid image: pageNumber must be a number or null");
|
|
192
|
+
}
|
|
193
|
+
if (!isNumberOrNull(img.width)) {
|
|
194
|
+
throw new Error("Invalid image: width must be a number or null");
|
|
195
|
+
}
|
|
196
|
+
if (!isNumberOrNull(img.height)) {
|
|
197
|
+
throw new Error("Invalid image: height must be a number or null");
|
|
198
|
+
}
|
|
199
|
+
if (!isNumberOrNull(img.bitsPerComponent)) {
|
|
200
|
+
throw new Error("Invalid image: bitsPerComponent must be a number or null");
|
|
201
|
+
}
|
|
202
|
+
if (!isBoolean(img.isMask)) {
|
|
203
|
+
throw new Error("Invalid image: isMask must be a boolean");
|
|
204
|
+
}
|
|
205
|
+
if (!isStringOrNull(img.colorspace)) {
|
|
206
|
+
throw new Error("Invalid image: colorspace must be a string or null");
|
|
207
|
+
}
|
|
208
|
+
if (!isStringOrNull(img.description)) {
|
|
209
|
+
throw new Error("Invalid image: description must be a string or null");
|
|
210
|
+
}
|
|
211
|
+
return {
|
|
212
|
+
data: img.data,
|
|
213
|
+
format: img.format,
|
|
214
|
+
imageIndex: img.imageIndex,
|
|
215
|
+
pageNumber: img.pageNumber,
|
|
216
|
+
width: img.width,
|
|
217
|
+
height: img.height,
|
|
218
|
+
colorspace: img.colorspace,
|
|
219
|
+
bitsPerComponent: img.bitsPerComponent,
|
|
220
|
+
isMask: img.isMask,
|
|
221
|
+
description: img.description,
|
|
222
|
+
ocrResult: img.ocrResult ? jsToExtractionResult(img.ocrResult) : null
|
|
223
|
+
};
|
|
224
|
+
}) : null;
|
|
225
|
+
let detectedLanguages = null;
|
|
226
|
+
const detectedLanguagesRaw = Array.isArray(result.detectedLanguages) ? result.detectedLanguages : result.detected_languages;
|
|
227
|
+
if (Array.isArray(detectedLanguagesRaw)) {
|
|
228
|
+
if (!detectedLanguagesRaw.every((lang) => typeof lang === "string")) {
|
|
229
|
+
throw new Error("Invalid result: detectedLanguages must contain only strings");
|
|
230
|
+
}
|
|
231
|
+
detectedLanguages = detectedLanguagesRaw;
|
|
232
|
+
}
|
|
233
|
+
return {
|
|
234
|
+
content: result.content,
|
|
235
|
+
mimeType,
|
|
236
|
+
metadata: result.metadata ?? {},
|
|
237
|
+
tables,
|
|
238
|
+
detectedLanguages,
|
|
239
|
+
chunks,
|
|
240
|
+
images
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
function wrapWasmError(error, context) {
|
|
244
|
+
if (error instanceof Error) {
|
|
245
|
+
return new Error(`Error ${context}: ${error.message}`, {
|
|
246
|
+
cause: error
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
const message = String(error);
|
|
250
|
+
return new Error(`Error ${context}: ${message}`);
|
|
251
|
+
}
|
|
252
|
+
function isValidExtractionResult(value) {
|
|
253
|
+
if (!value || typeof value !== "object") {
|
|
254
|
+
return false;
|
|
255
|
+
}
|
|
256
|
+
const obj = value;
|
|
257
|
+
return typeof obj.content === "string" && (typeof obj.mimeType === "string" || typeof obj.mime_type === "string") && obj.metadata !== null && typeof obj.metadata === "object" && Array.isArray(obj.tables);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// typescript/ocr/registry.ts
|
|
261
|
+
var ocrBackendRegistry = /* @__PURE__ */ new Map();
|
|
262
|
+
function registerOcrBackend(backend) {
|
|
263
|
+
if (!backend) {
|
|
264
|
+
throw new Error("Backend cannot be null or undefined");
|
|
265
|
+
}
|
|
266
|
+
if (typeof backend.name !== "function") {
|
|
267
|
+
throw new Error("Backend must implement name() method");
|
|
268
|
+
}
|
|
269
|
+
if (typeof backend.supportedLanguages !== "function") {
|
|
270
|
+
throw new Error("Backend must implement supportedLanguages() method");
|
|
271
|
+
}
|
|
272
|
+
if (typeof backend.processImage !== "function") {
|
|
273
|
+
throw new Error("Backend must implement processImage() method");
|
|
274
|
+
}
|
|
275
|
+
const backendName = backend.name();
|
|
276
|
+
if (!backendName || typeof backendName !== "string") {
|
|
277
|
+
throw new Error("Backend name must be a non-empty string");
|
|
278
|
+
}
|
|
279
|
+
if (ocrBackendRegistry.has(backendName)) {
|
|
280
|
+
console.warn(`OCR backend "${backendName}" is already registered and will be replaced`);
|
|
281
|
+
}
|
|
282
|
+
ocrBackendRegistry.set(backendName, backend);
|
|
283
|
+
}
|
|
284
|
+
function getOcrBackend(name) {
|
|
285
|
+
return ocrBackendRegistry.get(name);
|
|
286
|
+
}
|
|
287
|
+
function listOcrBackends() {
|
|
288
|
+
return Array.from(ocrBackendRegistry.keys());
|
|
289
|
+
}
|
|
290
|
+
async function unregisterOcrBackend(name) {
|
|
291
|
+
const backend = ocrBackendRegistry.get(name);
|
|
292
|
+
if (!backend) {
|
|
293
|
+
throw new Error(
|
|
294
|
+
`OCR backend "${name}" is not registered. Available backends: ${Array.from(ocrBackendRegistry.keys()).join(", ")}`
|
|
295
|
+
);
|
|
296
|
+
}
|
|
297
|
+
if (typeof backend.shutdown === "function") {
|
|
298
|
+
try {
|
|
299
|
+
await backend.shutdown();
|
|
300
|
+
} catch (error) {
|
|
301
|
+
console.warn(
|
|
302
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
303
|
+
);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
ocrBackendRegistry.delete(name);
|
|
307
|
+
}
|
|
308
|
+
async function clearOcrBackends() {
|
|
309
|
+
const backends = Array.from(ocrBackendRegistry.entries());
|
|
310
|
+
for (const [name, backend] of backends) {
|
|
311
|
+
if (typeof backend.shutdown === "function") {
|
|
312
|
+
try {
|
|
313
|
+
await backend.shutdown();
|
|
314
|
+
} catch (error) {
|
|
315
|
+
console.warn(
|
|
316
|
+
`Error shutting down OCR backend "${name}": ${error instanceof Error ? error.message : String(error)}`
|
|
317
|
+
);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
ocrBackendRegistry.clear();
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// typescript/ocr/tesseract-wasm-backend.ts
|
|
325
|
+
var TesseractWasmBackend = class {
|
|
326
|
+
/** Tesseract WASM client instance */
|
|
327
|
+
client = null;
|
|
328
|
+
/** Track which models are currently loaded to avoid redundant loads */
|
|
329
|
+
loadedLanguages = /* @__PURE__ */ new Set();
|
|
330
|
+
/** Cache for language availability validation */
|
|
331
|
+
supportedLangsCache = null;
|
|
332
|
+
/** Progress callback for UI updates */
|
|
333
|
+
progressCallback = null;
|
|
334
|
+
/** Base URL for training data CDN */
|
|
335
|
+
CDN_BASE_URL = "https://cdn.jsdelivr.net/npm/tesseract-wasm@0.11.0/dist";
|
|
336
|
+
/**
|
|
337
|
+
* Return the unique name of this OCR backend
|
|
338
|
+
*
|
|
339
|
+
* @returns Backend identifier "tesseract-wasm"
|
|
340
|
+
*/
|
|
341
|
+
name() {
|
|
342
|
+
return "tesseract-wasm";
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Return list of supported language codes
|
|
346
|
+
*
|
|
347
|
+
* Returns a curated list of commonly available Tesseract language models.
|
|
348
|
+
* Tesseract supports many more languages through custom models.
|
|
349
|
+
*
|
|
350
|
+
* @returns Array of ISO 639-1/2/3 language codes
|
|
351
|
+
*/
|
|
352
|
+
supportedLanguages() {
|
|
353
|
+
if (this.supportedLangsCache) {
|
|
354
|
+
return this.supportedLangsCache;
|
|
355
|
+
}
|
|
356
|
+
this.supportedLangsCache = [
|
|
357
|
+
"eng",
|
|
358
|
+
"deu",
|
|
359
|
+
"fra",
|
|
360
|
+
"spa",
|
|
361
|
+
"ita",
|
|
362
|
+
"por",
|
|
363
|
+
"nld",
|
|
364
|
+
"rus",
|
|
365
|
+
"jpn",
|
|
366
|
+
"kor",
|
|
367
|
+
"chi_sim",
|
|
368
|
+
"chi_tra",
|
|
369
|
+
"pol",
|
|
370
|
+
"tur",
|
|
371
|
+
"swe",
|
|
372
|
+
"dan",
|
|
373
|
+
"fin",
|
|
374
|
+
"nor",
|
|
375
|
+
"ces",
|
|
376
|
+
"slk",
|
|
377
|
+
"ron",
|
|
378
|
+
"hun",
|
|
379
|
+
"hrv",
|
|
380
|
+
"srp",
|
|
381
|
+
"bul",
|
|
382
|
+
"ukr",
|
|
383
|
+
"ell",
|
|
384
|
+
"ara",
|
|
385
|
+
"heb",
|
|
386
|
+
"hin",
|
|
387
|
+
"tha",
|
|
388
|
+
"vie",
|
|
389
|
+
"mkd",
|
|
390
|
+
"ben",
|
|
391
|
+
"tam",
|
|
392
|
+
"tel",
|
|
393
|
+
"kan",
|
|
394
|
+
"mal",
|
|
395
|
+
"mya",
|
|
396
|
+
"khm",
|
|
397
|
+
"lao",
|
|
398
|
+
"sin"
|
|
399
|
+
];
|
|
400
|
+
return this.supportedLangsCache;
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Initialize the OCR backend
|
|
404
|
+
*
|
|
405
|
+
* Creates the Tesseract WASM client instance. This is called once when
|
|
406
|
+
* the backend is registered with the extraction pipeline.
|
|
407
|
+
*
|
|
408
|
+
* The actual model loading happens in processImage() on-demand to avoid
|
|
409
|
+
* loading all models upfront.
|
|
410
|
+
*
|
|
411
|
+
* @throws {Error} If tesseract-wasm is not available or initialization fails
|
|
412
|
+
*
|
|
413
|
+
* @example
|
|
414
|
+
* ```typescript
|
|
415
|
+
* const backend = new TesseractWasmBackend();
|
|
416
|
+
* try {
|
|
417
|
+
* await backend.initialize();
|
|
418
|
+
* } catch (error) {
|
|
419
|
+
* console.error('Failed to initialize OCR:', error);
|
|
420
|
+
* }
|
|
421
|
+
* ```
|
|
422
|
+
*/
|
|
423
|
+
async initialize() {
|
|
424
|
+
if (this.client) {
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
try {
|
|
428
|
+
const tesseractModule = await this.loadTesseractWasm();
|
|
429
|
+
if (!tesseractModule || typeof tesseractModule.OCRClient !== "function") {
|
|
430
|
+
throw new Error("tesseract-wasm OCRClient not found. Ensure tesseract-wasm is installed and available.");
|
|
431
|
+
}
|
|
432
|
+
this.client = new tesseractModule.OCRClient();
|
|
433
|
+
this.loadedLanguages.clear();
|
|
434
|
+
} catch (error) {
|
|
435
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
436
|
+
throw new Error(`Failed to initialize TesseractWasmBackend: ${message}`);
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
/**
|
|
440
|
+
* Process image bytes and extract text via OCR
|
|
441
|
+
*
|
|
442
|
+
* Handles image loading, model loading, OCR processing, and result formatting.
|
|
443
|
+
* Automatically loads the language model on first use and caches it for subsequent calls.
|
|
444
|
+
*
|
|
445
|
+
* @param imageBytes - Raw image data (Uint8Array) or Base64-encoded string
|
|
446
|
+
* @param language - ISO 639-2/3 language code (e.g., "eng", "deu")
|
|
447
|
+
* @returns Promise resolving to OCR result with content and metadata
|
|
448
|
+
* @throws {Error} If image processing fails, model loading fails, or language is unsupported
|
|
449
|
+
*
|
|
450
|
+
* @example
|
|
451
|
+
* ```typescript
|
|
452
|
+
* const backend = new TesseractWasmBackend();
|
|
453
|
+
* await backend.initialize();
|
|
454
|
+
*
|
|
455
|
+
* const imageBuffer = fs.readFileSync('scanned.png');
|
|
456
|
+
* const result = await backend.processImage(
|
|
457
|
+
* new Uint8Array(imageBuffer),
|
|
458
|
+
* 'eng'
|
|
459
|
+
* );
|
|
460
|
+
*
|
|
461
|
+
* console.log(result.content); // Extracted text
|
|
462
|
+
* console.log(result.metadata.confidence); // OCR confidence score
|
|
463
|
+
* ```
|
|
464
|
+
*/
|
|
465
|
+
async processImage(imageBytes, language) {
|
|
466
|
+
if (!this.client) {
|
|
467
|
+
throw new Error("TesseractWasmBackend not initialized. Call initialize() first.");
|
|
468
|
+
}
|
|
469
|
+
const supported = this.supportedLanguages();
|
|
470
|
+
const normalizedLang = language.toLowerCase();
|
|
471
|
+
const isSupported = supported.some((lang) => lang.toLowerCase() === normalizedLang);
|
|
472
|
+
if (!isSupported) {
|
|
473
|
+
throw new Error(`Language "${language}" is not supported. Supported languages: ${supported.join(", ")}`);
|
|
474
|
+
}
|
|
475
|
+
try {
|
|
476
|
+
if (!this.loadedLanguages.has(normalizedLang)) {
|
|
477
|
+
this.reportProgress(10);
|
|
478
|
+
await this.loadLanguageModel(normalizedLang);
|
|
479
|
+
this.loadedLanguages.add(normalizedLang);
|
|
480
|
+
this.reportProgress(30);
|
|
481
|
+
}
|
|
482
|
+
this.reportProgress(40);
|
|
483
|
+
const imageBitmap = await this.convertToImageBitmap(imageBytes);
|
|
484
|
+
this.reportProgress(50);
|
|
485
|
+
await this.client.loadImage(imageBitmap);
|
|
486
|
+
this.reportProgress(70);
|
|
487
|
+
const text = await this.client.getText();
|
|
488
|
+
const confidence = await this.getConfidenceScore();
|
|
489
|
+
const pageMetadata = await this.getPageMetadata();
|
|
490
|
+
this.reportProgress(90);
|
|
491
|
+
return {
|
|
492
|
+
content: text,
|
|
493
|
+
mime_type: "text/plain",
|
|
494
|
+
metadata: {
|
|
495
|
+
language: normalizedLang,
|
|
496
|
+
confidence,
|
|
497
|
+
...pageMetadata
|
|
498
|
+
},
|
|
499
|
+
tables: []
|
|
500
|
+
};
|
|
501
|
+
} catch (error) {
|
|
502
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
503
|
+
throw new Error(`OCR processing failed for language "${language}": ${message}`);
|
|
504
|
+
} finally {
|
|
505
|
+
this.reportProgress(100);
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
/**
|
|
509
|
+
* Shutdown the OCR backend and release resources
|
|
510
|
+
*
|
|
511
|
+
* Properly cleans up the Tesseract WASM client, freeing memory and Web Workers.
|
|
512
|
+
* Called when the backend is unregistered or the application shuts down.
|
|
513
|
+
*
|
|
514
|
+
* @throws {Error} If cleanup fails (errors are logged but not critical)
|
|
515
|
+
*
|
|
516
|
+
* @example
|
|
517
|
+
* ```typescript
|
|
518
|
+
* const backend = new TesseractWasmBackend();
|
|
519
|
+
* await backend.initialize();
|
|
520
|
+
* // ... use backend ...
|
|
521
|
+
* await backend.shutdown(); // Clean up resources
|
|
522
|
+
* ```
|
|
523
|
+
*/
|
|
524
|
+
async shutdown() {
|
|
525
|
+
try {
|
|
526
|
+
if (this.client) {
|
|
527
|
+
if (typeof this.client.destroy === "function") {
|
|
528
|
+
this.client.destroy();
|
|
529
|
+
}
|
|
530
|
+
if (typeof this.client.terminate === "function") {
|
|
531
|
+
this.client.terminate();
|
|
532
|
+
}
|
|
533
|
+
this.client = null;
|
|
534
|
+
}
|
|
535
|
+
this.loadedLanguages.clear();
|
|
536
|
+
this.supportedLangsCache = null;
|
|
537
|
+
this.progressCallback = null;
|
|
538
|
+
} catch (error) {
|
|
539
|
+
console.warn(
|
|
540
|
+
`Warning during TesseractWasmBackend shutdown: ${error instanceof Error ? error.message : String(error)}`
|
|
541
|
+
);
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
/**
|
|
545
|
+
* Set a progress callback for UI updates
|
|
546
|
+
*
|
|
547
|
+
* Allows the UI to display progress during OCR processing.
|
|
548
|
+
* The callback will be called with values from 0 to 100.
|
|
549
|
+
*
|
|
550
|
+
* @param callback - Function to call with progress percentage
|
|
551
|
+
*
|
|
552
|
+
* @example
|
|
553
|
+
* ```typescript
|
|
554
|
+
* const backend = new TesseractWasmBackend();
|
|
555
|
+
* backend.setProgressCallback((progress) => {
|
|
556
|
+
* console.log(`OCR Progress: ${progress}%`);
|
|
557
|
+
* document.getElementById('progress-bar').style.width = `${progress}%`;
|
|
558
|
+
* });
|
|
559
|
+
* ```
|
|
560
|
+
*/
|
|
561
|
+
setProgressCallback(callback) {
|
|
562
|
+
this.progressCallback = callback;
|
|
563
|
+
}
|
|
564
|
+
/**
|
|
565
|
+
* Load language model from CDN
|
|
566
|
+
*
|
|
567
|
+
* Fetches the training data for a specific language from jsDelivr CDN.
|
|
568
|
+
* This is an MVP approach - models are cached by the browser.
|
|
569
|
+
*
|
|
570
|
+
* @param language - ISO 639-2/3 language code
|
|
571
|
+
* @throws {Error} If model download fails or language is not available
|
|
572
|
+
*
|
|
573
|
+
* @internal
|
|
574
|
+
*/
|
|
575
|
+
async loadLanguageModel(language) {
|
|
576
|
+
if (!this.client) {
|
|
577
|
+
throw new Error("Client not initialized");
|
|
578
|
+
}
|
|
579
|
+
const modelFilename = `${language}.traineddata`;
|
|
580
|
+
const modelUrl = `${this.CDN_BASE_URL}/${modelFilename}`;
|
|
581
|
+
try {
|
|
582
|
+
await this.client.loadModel(modelUrl);
|
|
583
|
+
} catch (error) {
|
|
584
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
585
|
+
throw new Error(`Failed to load model for language "${language}" from ${modelUrl}: ${message}`);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Convert image bytes or Base64 string to ImageBitmap
|
|
590
|
+
*
|
|
591
|
+
* Handles both Uint8Array and Base64-encoded image data, converting to
|
|
592
|
+
* ImageBitmap format required by Tesseract WASM.
|
|
593
|
+
*
|
|
594
|
+
* @param imageBytes - Image data as Uint8Array or Base64 string
|
|
595
|
+
* @returns Promise resolving to ImageBitmap
|
|
596
|
+
* @throws {Error} If conversion fails (browser API not available or invalid image data)
|
|
597
|
+
*
|
|
598
|
+
* @internal
|
|
599
|
+
*/
|
|
600
|
+
async convertToImageBitmap(imageBytes) {
|
|
601
|
+
if (typeof createImageBitmap === "undefined") {
|
|
602
|
+
throw new Error("createImageBitmap is not available. TesseractWasmBackend requires a browser environment.");
|
|
603
|
+
}
|
|
604
|
+
try {
|
|
605
|
+
let bytes = imageBytes;
|
|
606
|
+
if (typeof imageBytes === "string") {
|
|
607
|
+
const binaryString = atob(imageBytes);
|
|
608
|
+
bytes = new Uint8Array(binaryString.length);
|
|
609
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
610
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
const blob = new Blob([bytes]);
|
|
614
|
+
const imageBitmap = await createImageBitmap(blob);
|
|
615
|
+
return imageBitmap;
|
|
616
|
+
} catch (error) {
|
|
617
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
618
|
+
throw new Error(`Failed to convert image bytes to ImageBitmap: ${message}`);
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
/**
|
|
622
|
+
* Get confidence score from OCR result
|
|
623
|
+
*
|
|
624
|
+
* Attempts to retrieve confidence score from Tesseract.
|
|
625
|
+
* Returns a safe default if unavailable.
|
|
626
|
+
*
|
|
627
|
+
* @returns Confidence score between 0 and 1
|
|
628
|
+
*
|
|
629
|
+
* @internal
|
|
630
|
+
*/
|
|
631
|
+
async getConfidenceScore() {
|
|
632
|
+
try {
|
|
633
|
+
if (this.client && typeof this.client.getConfidence === "function") {
|
|
634
|
+
const confidence = await this.client.getConfidence();
|
|
635
|
+
return confidence > 1 ? confidence / 100 : confidence;
|
|
636
|
+
}
|
|
637
|
+
} catch {
|
|
638
|
+
}
|
|
639
|
+
return 0.9;
|
|
640
|
+
}
|
|
641
|
+
/**
|
|
642
|
+
* Get page metadata from OCR result
|
|
643
|
+
*
|
|
644
|
+
* Retrieves additional metadata like image dimensions and processing info.
|
|
645
|
+
*
|
|
646
|
+
* @returns Metadata object (may be empty if unavailable)
|
|
647
|
+
*
|
|
648
|
+
* @internal
|
|
649
|
+
*/
|
|
650
|
+
async getPageMetadata() {
|
|
651
|
+
try {
|
|
652
|
+
if (this.client && typeof this.client.getPageMetadata === "function") {
|
|
653
|
+
return await this.client.getPageMetadata();
|
|
654
|
+
}
|
|
655
|
+
} catch {
|
|
656
|
+
}
|
|
657
|
+
return {};
|
|
658
|
+
}
|
|
659
|
+
/**
|
|
660
|
+
* Dynamically load tesseract-wasm module
|
|
661
|
+
*
|
|
662
|
+
* Uses dynamic import to load tesseract-wasm only when needed,
|
|
663
|
+
* avoiding hard dependency in browser environments where it may not be bundled.
|
|
664
|
+
*
|
|
665
|
+
* @returns tesseract-wasm module object
|
|
666
|
+
* @throws {Error} If module cannot be imported
|
|
667
|
+
*
|
|
668
|
+
* @internal
|
|
669
|
+
*/
|
|
670
|
+
async loadTesseractWasm() {
|
|
671
|
+
try {
|
|
672
|
+
const module = await import("tesseract-wasm");
|
|
673
|
+
return module;
|
|
674
|
+
} catch (error) {
|
|
675
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
676
|
+
throw new Error(
|
|
677
|
+
`Failed to import tesseract-wasm. Ensure it is installed via: npm install tesseract-wasm. Error: ${message}`
|
|
678
|
+
);
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
/**
|
|
682
|
+
* Report progress to progress callback
|
|
683
|
+
*
|
|
684
|
+
* Internal helper for notifying progress updates during OCR processing.
|
|
685
|
+
*
|
|
686
|
+
* @param progress - Progress percentage (0-100)
|
|
687
|
+
*
|
|
688
|
+
* @internal
|
|
689
|
+
*/
|
|
690
|
+
reportProgress(progress) {
|
|
691
|
+
if (this.progressCallback) {
|
|
692
|
+
try {
|
|
693
|
+
this.progressCallback(Math.min(100, Math.max(0, progress)));
|
|
694
|
+
} catch {
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
};
|
|
699
|
+
|
|
700
|
+
// typescript/runtime.ts
|
|
701
|
+
function detectRuntime() {
|
|
702
|
+
if (typeof globalThis.Deno !== "undefined") {
|
|
703
|
+
return "deno";
|
|
704
|
+
}
|
|
705
|
+
if (typeof globalThis.Bun !== "undefined") {
|
|
706
|
+
return "bun";
|
|
707
|
+
}
|
|
708
|
+
if (typeof process !== "undefined" && process.versions && process.versions.node) {
|
|
709
|
+
return "node";
|
|
710
|
+
}
|
|
711
|
+
if (typeof window !== "undefined" && typeof document !== "undefined") {
|
|
712
|
+
return "browser";
|
|
713
|
+
}
|
|
714
|
+
return "unknown";
|
|
715
|
+
}
|
|
716
|
+
function isBrowser() {
|
|
717
|
+
return detectRuntime() === "browser";
|
|
718
|
+
}
|
|
719
|
+
function isNode() {
|
|
720
|
+
return detectRuntime() === "node";
|
|
721
|
+
}
|
|
722
|
+
function isDeno() {
|
|
723
|
+
return detectRuntime() === "deno";
|
|
724
|
+
}
|
|
725
|
+
function isBun() {
|
|
726
|
+
return detectRuntime() === "bun";
|
|
727
|
+
}
|
|
728
|
+
function isWebEnvironment() {
|
|
729
|
+
const runtime = detectRuntime();
|
|
730
|
+
return runtime === "browser";
|
|
731
|
+
}
|
|
732
|
+
function isServerEnvironment() {
|
|
733
|
+
const runtime = detectRuntime();
|
|
734
|
+
return runtime === "node" || runtime === "deno" || runtime === "bun";
|
|
735
|
+
}
|
|
736
|
+
function hasFileApi() {
|
|
737
|
+
return typeof window !== "undefined" && typeof File !== "undefined" && typeof Blob !== "undefined";
|
|
738
|
+
}
|
|
739
|
+
function hasBlob() {
|
|
740
|
+
return typeof Blob !== "undefined";
|
|
741
|
+
}
|
|
742
|
+
function hasWorkers() {
|
|
743
|
+
return typeof Worker !== "undefined";
|
|
744
|
+
}
|
|
745
|
+
function hasSharedArrayBuffer() {
|
|
746
|
+
return typeof SharedArrayBuffer !== "undefined";
|
|
747
|
+
}
|
|
748
|
+
function hasModuleWorkers() {
|
|
749
|
+
if (!hasWorkers()) {
|
|
750
|
+
return false;
|
|
751
|
+
}
|
|
752
|
+
try {
|
|
753
|
+
const blob = new Blob(['console.log("test")'], {
|
|
754
|
+
type: "application/javascript"
|
|
755
|
+
});
|
|
756
|
+
const workerUrl = URL.createObjectURL(blob);
|
|
757
|
+
try {
|
|
758
|
+
return true;
|
|
759
|
+
} finally {
|
|
760
|
+
URL.revokeObjectURL(workerUrl);
|
|
761
|
+
}
|
|
762
|
+
} catch {
|
|
763
|
+
return false;
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
function hasWasm() {
|
|
767
|
+
return typeof WebAssembly !== "undefined" && WebAssembly.instantiate !== void 0;
|
|
768
|
+
}
|
|
769
|
+
function hasWasmStreaming() {
|
|
770
|
+
return typeof WebAssembly !== "undefined" && WebAssembly.instantiateStreaming !== void 0;
|
|
771
|
+
}
|
|
772
|
+
function hasBigInt() {
|
|
773
|
+
try {
|
|
774
|
+
const test = BigInt("1");
|
|
775
|
+
return typeof test === "bigint";
|
|
776
|
+
} catch {
|
|
777
|
+
return false;
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
function getRuntimeVersion() {
|
|
781
|
+
const runtime = detectRuntime();
|
|
782
|
+
switch (runtime) {
|
|
783
|
+
case "node":
|
|
784
|
+
return process.version?.substring(1);
|
|
785
|
+
case "deno": {
|
|
786
|
+
const deno = globalThis.Deno;
|
|
787
|
+
const version = deno?.version;
|
|
788
|
+
return version?.deno;
|
|
789
|
+
}
|
|
790
|
+
case "bun": {
|
|
791
|
+
const bun = globalThis.Bun;
|
|
792
|
+
return bun?.version;
|
|
793
|
+
}
|
|
794
|
+
default:
|
|
795
|
+
return void 0;
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
function getWasmCapabilities() {
|
|
799
|
+
const runtime = detectRuntime();
|
|
800
|
+
const version = getRuntimeVersion();
|
|
801
|
+
const capabilities = {
|
|
802
|
+
runtime,
|
|
803
|
+
hasWasm: hasWasm(),
|
|
804
|
+
hasWasmStreaming: hasWasmStreaming(),
|
|
805
|
+
hasFileApi: hasFileApi(),
|
|
806
|
+
hasBlob: hasBlob(),
|
|
807
|
+
hasWorkers: hasWorkers(),
|
|
808
|
+
hasSharedArrayBuffer: hasSharedArrayBuffer(),
|
|
809
|
+
hasModuleWorkers: hasModuleWorkers(),
|
|
810
|
+
hasBigInt: hasBigInt(),
|
|
811
|
+
...version !== void 0 ? { runtimeVersion: version } : {}
|
|
812
|
+
};
|
|
813
|
+
return capabilities;
|
|
814
|
+
}
|
|
815
|
+
function getRuntimeInfo() {
|
|
816
|
+
const runtime = detectRuntime();
|
|
817
|
+
const capabilities = getWasmCapabilities();
|
|
818
|
+
return {
|
|
819
|
+
runtime,
|
|
820
|
+
isBrowser: isBrowser(),
|
|
821
|
+
isNode: isNode(),
|
|
822
|
+
isDeno: isDeno(),
|
|
823
|
+
isBun: isBun(),
|
|
824
|
+
isWeb: isWebEnvironment(),
|
|
825
|
+
isServer: isServerEnvironment(),
|
|
826
|
+
runtimeVersion: getRuntimeVersion(),
|
|
827
|
+
userAgent: typeof navigator !== "undefined" ? navigator.userAgent : "N/A",
|
|
828
|
+
capabilities
|
|
829
|
+
};
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
// typescript/plugin-registry.ts
|
|
833
|
+
var postProcessors = /* @__PURE__ */ new Map();
|
|
834
|
+
var validators = /* @__PURE__ */ new Map();
|
|
835
|
+
function validatePostProcessor(processor) {
|
|
836
|
+
if (processor === null || processor === void 0) {
|
|
837
|
+
throw new Error("Post-processor cannot be null or undefined");
|
|
838
|
+
}
|
|
839
|
+
const obj = processor;
|
|
840
|
+
if (typeof obj.name !== "function") {
|
|
841
|
+
throw new Error("Post-processor must implement name() method");
|
|
842
|
+
}
|
|
843
|
+
if (typeof obj.process !== "function") {
|
|
844
|
+
throw new Error("Post-processor must implement process() method");
|
|
845
|
+
}
|
|
846
|
+
const name = obj.name();
|
|
847
|
+
if (typeof name !== "string" || name.trim() === "") {
|
|
848
|
+
throw new Error("Post-processor name must be a non-empty string");
|
|
849
|
+
}
|
|
850
|
+
return true;
|
|
851
|
+
}
|
|
852
|
+
function registerPostProcessor(processor) {
|
|
853
|
+
validatePostProcessor(processor);
|
|
854
|
+
const name = processor.name();
|
|
855
|
+
if (postProcessors.has(name)) {
|
|
856
|
+
console.warn(`Post-processor "${name}" already registered, overwriting with new implementation`);
|
|
857
|
+
}
|
|
858
|
+
postProcessors.set(name, processor);
|
|
859
|
+
}
|
|
860
|
+
function getPostProcessor(name) {
|
|
861
|
+
return postProcessors.get(name);
|
|
862
|
+
}
|
|
863
|
+
function listPostProcessors() {
|
|
864
|
+
return Array.from(postProcessors.keys());
|
|
865
|
+
}
|
|
866
|
+
async function unregisterPostProcessor(name) {
|
|
867
|
+
const processor = postProcessors.get(name);
|
|
868
|
+
if (!processor) {
|
|
869
|
+
const available = Array.from(postProcessors.keys());
|
|
870
|
+
const availableStr = available.length > 0 ? ` Available: ${available.join(", ")}` : "";
|
|
871
|
+
throw new Error(`Post-processor "${name}" is not registered.${availableStr}`);
|
|
872
|
+
}
|
|
873
|
+
try {
|
|
874
|
+
if (processor.shutdown) {
|
|
875
|
+
await processor.shutdown();
|
|
876
|
+
}
|
|
877
|
+
} catch (error) {
|
|
878
|
+
console.warn(`Error during shutdown of post-processor "${name}":`, error);
|
|
879
|
+
}
|
|
880
|
+
postProcessors.delete(name);
|
|
881
|
+
}
|
|
882
|
+
async function clearPostProcessors() {
|
|
883
|
+
const entries = Array.from(postProcessors.entries());
|
|
884
|
+
for (const [_name, processor] of entries) {
|
|
885
|
+
try {
|
|
886
|
+
if (processor.shutdown) {
|
|
887
|
+
await processor.shutdown();
|
|
888
|
+
}
|
|
889
|
+
} catch (error) {
|
|
890
|
+
console.warn(`Error during shutdown of post-processor "${_name}":`, error);
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
postProcessors.clear();
|
|
894
|
+
}
|
|
895
|
+
function validateValidator(validator) {
|
|
896
|
+
if (validator === null || validator === void 0) {
|
|
897
|
+
throw new Error("Validator cannot be null or undefined");
|
|
898
|
+
}
|
|
899
|
+
const obj = validator;
|
|
900
|
+
if (typeof obj.name !== "function") {
|
|
901
|
+
throw new Error("Validator must implement name() method");
|
|
902
|
+
}
|
|
903
|
+
if (typeof obj.validate !== "function") {
|
|
904
|
+
throw new Error("Validator must implement validate() method");
|
|
905
|
+
}
|
|
906
|
+
const name = obj.name();
|
|
907
|
+
if (typeof name !== "string" || name.trim() === "") {
|
|
908
|
+
throw new Error("Validator name must be a non-empty string");
|
|
909
|
+
}
|
|
910
|
+
return true;
|
|
911
|
+
}
|
|
912
|
+
function registerValidator(validator) {
|
|
913
|
+
validateValidator(validator);
|
|
914
|
+
const name = validator.name();
|
|
915
|
+
if (validators.has(name)) {
|
|
916
|
+
console.warn(`Validator "${name}" already registered, overwriting with new implementation`);
|
|
917
|
+
}
|
|
918
|
+
validators.set(name, validator);
|
|
919
|
+
}
|
|
920
|
+
function getValidator(name) {
|
|
921
|
+
return validators.get(name);
|
|
922
|
+
}
|
|
923
|
+
function listValidators() {
|
|
924
|
+
return Array.from(validators.keys());
|
|
925
|
+
}
|
|
926
|
+
async function unregisterValidator(name) {
|
|
927
|
+
const validator = validators.get(name);
|
|
928
|
+
if (!validator) {
|
|
929
|
+
const available = Array.from(validators.keys());
|
|
930
|
+
const availableStr = available.length > 0 ? ` Available: ${available.join(", ")}` : "";
|
|
931
|
+
throw new Error(`Validator "${name}" is not registered.${availableStr}`);
|
|
932
|
+
}
|
|
933
|
+
try {
|
|
934
|
+
if (validator.shutdown) {
|
|
935
|
+
await validator.shutdown();
|
|
936
|
+
}
|
|
937
|
+
} catch (error) {
|
|
938
|
+
console.warn(`Error during shutdown of validator "${name}":`, error);
|
|
939
|
+
}
|
|
940
|
+
validators.delete(name);
|
|
941
|
+
}
|
|
942
|
+
async function clearValidators() {
|
|
943
|
+
const entries = Array.from(validators.entries());
|
|
944
|
+
for (const [_name, validator] of entries) {
|
|
945
|
+
try {
|
|
946
|
+
if (validator.shutdown) {
|
|
947
|
+
await validator.shutdown();
|
|
948
|
+
}
|
|
949
|
+
} catch (error) {
|
|
950
|
+
console.warn(`Error during shutdown of validator "${_name}":`, error);
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
validators.clear();
|
|
954
|
+
}
|
|
955
|
+
function executePostProcessor(name, result) {
|
|
956
|
+
const processor = postProcessors.get(name);
|
|
957
|
+
if (!processor) {
|
|
958
|
+
return Promise.reject(new Error(`Post-processor "${name}" is not registered`));
|
|
959
|
+
}
|
|
960
|
+
try {
|
|
961
|
+
const output = processor.process(result);
|
|
962
|
+
if (output instanceof Promise) {
|
|
963
|
+
return output;
|
|
964
|
+
}
|
|
965
|
+
return Promise.resolve(output);
|
|
966
|
+
} catch (error) {
|
|
967
|
+
return Promise.reject(new Error(`Error executing post-processor "${name}": ${String(error)}`));
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
function executeValidator(name, result) {
|
|
971
|
+
const validator = validators.get(name);
|
|
972
|
+
if (!validator) {
|
|
973
|
+
return Promise.reject(new Error(`Validator "${name}" is not registered`));
|
|
974
|
+
}
|
|
975
|
+
try {
|
|
976
|
+
const output = validator.validate(result);
|
|
977
|
+
if (output instanceof Promise) {
|
|
978
|
+
return output;
|
|
979
|
+
}
|
|
980
|
+
return Promise.resolve(output);
|
|
981
|
+
} catch (error) {
|
|
982
|
+
return Promise.reject(new Error(`Error executing validator "${name}": ${String(error)}`));
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
function setupGlobalCallbacks() {
|
|
986
|
+
if (typeof globalThis !== "undefined") {
|
|
987
|
+
const callbacksObj = globalThis;
|
|
988
|
+
callbacksObj.__kreuzberg_execute_post_processor = executePostProcessor;
|
|
989
|
+
callbacksObj.__kreuzberg_execute_validator = executeValidator;
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
setupGlobalCallbacks();
|
|
993
|
+
|
|
994
|
+
// typescript/index.ts
|
|
995
|
+
var wasm = null;
|
|
996
|
+
var initialized = false;
|
|
997
|
+
var initializationError = null;
|
|
998
|
+
var initializationPromise = null;
|
|
999
|
+
async function initializePdfiumAsync(wasmModule) {
|
|
1000
|
+
if (!wasmModule || typeof wasmModule.initialize_pdfium_render !== "function") {
|
|
1001
|
+
return;
|
|
1002
|
+
}
|
|
1003
|
+
if (!isBrowser()) {
|
|
1004
|
+
console.debug("PDFium initialization skipped (non-browser environment)");
|
|
1005
|
+
return;
|
|
1006
|
+
}
|
|
1007
|
+
try {
|
|
1008
|
+
const pdfiumModule = await Promise.resolve().then(() => (init_pdfium(), pdfium_exports));
|
|
1009
|
+
const pdfium = typeof pdfiumModule.default === "function" ? await pdfiumModule.default() : pdfiumModule;
|
|
1010
|
+
const success = wasmModule.initialize_pdfium_render(pdfium, wasmModule, false);
|
|
1011
|
+
if (!success) {
|
|
1012
|
+
console.warn("PDFium initialization returned false");
|
|
1013
|
+
}
|
|
1014
|
+
} catch (error) {
|
|
1015
|
+
console.debug("PDFium initialization error:", error);
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
86
1018
|
async function initWasm() {
|
|
87
1019
|
if (initialized) {
|
|
88
1020
|
return;
|
|
@@ -92,24 +1024,37 @@ async function initWasm() {
|
|
|
92
1024
|
}
|
|
93
1025
|
initializationPromise = (async () => {
|
|
94
1026
|
try {
|
|
95
|
-
if (!
|
|
1027
|
+
if (!hasWasm()) {
|
|
96
1028
|
throw new Error("WebAssembly is not supported in this environment");
|
|
97
1029
|
}
|
|
98
1030
|
let wasmModule;
|
|
1031
|
+
const pkgPath = "./pkg/kreuzberg_wasm.js";
|
|
1032
|
+
const fallbackPath = "./kreuzberg_wasm.js";
|
|
99
1033
|
try {
|
|
100
|
-
wasmModule = await import(
|
|
1034
|
+
wasmModule = await import(
|
|
1035
|
+
/* @vite-ignore */
|
|
1036
|
+
pkgPath
|
|
1037
|
+
);
|
|
101
1038
|
} catch {
|
|
102
|
-
wasmModule = await import(
|
|
1039
|
+
wasmModule = await import(
|
|
1040
|
+
/* @vite-ignore */
|
|
1041
|
+
fallbackPath
|
|
1042
|
+
);
|
|
103
1043
|
}
|
|
104
1044
|
wasm = wasmModule;
|
|
105
1045
|
if (wasm && typeof wasm.default === "function") {
|
|
106
1046
|
await wasm.default();
|
|
107
1047
|
}
|
|
1048
|
+
if (isBrowser() && wasm && typeof wasm.initialize_pdfium_render === "function") {
|
|
1049
|
+
initializePdfiumAsync(wasm).catch((error) => {
|
|
1050
|
+
console.warn("PDFium auto-initialization failed (PDF extraction disabled):", error);
|
|
1051
|
+
});
|
|
1052
|
+
}
|
|
108
1053
|
initialized = true;
|
|
109
1054
|
initializationError = null;
|
|
110
1055
|
} catch (error) {
|
|
111
1056
|
initializationError = error instanceof Error ? error : new Error(String(error));
|
|
112
|
-
throw
|
|
1057
|
+
throw wrapWasmError(error, "initializing Kreuzberg WASM module");
|
|
113
1058
|
}
|
|
114
1059
|
})();
|
|
115
1060
|
return initializationPromise;
|
|
@@ -143,14 +1088,14 @@ async function extractBytes(data, mimeType, config) {
|
|
|
143
1088
|
if (!mimeType) {
|
|
144
1089
|
throw new Error("MIME type is required");
|
|
145
1090
|
}
|
|
146
|
-
const normalizedConfig =
|
|
1091
|
+
const normalizedConfig = configToJS(config ?? null);
|
|
147
1092
|
const result = await wasm.extractBytes(data, mimeType, normalizedConfig);
|
|
148
1093
|
if (!result) {
|
|
149
1094
|
throw new Error("Invalid extraction result: no result from WASM module");
|
|
150
1095
|
}
|
|
151
|
-
return
|
|
1096
|
+
return jsToExtractionResult(result);
|
|
152
1097
|
} catch (error) {
|
|
153
|
-
throw
|
|
1098
|
+
throw wrapWasmError(error, "extracting from bytes");
|
|
154
1099
|
}
|
|
155
1100
|
}
|
|
156
1101
|
async function extractFile(path, mimeType, config) {
|
|
@@ -164,20 +1109,20 @@ async function extractFile(path, mimeType, config) {
|
|
|
164
1109
|
if (!path) {
|
|
165
1110
|
throw new Error("File path is required");
|
|
166
1111
|
}
|
|
167
|
-
const runtime =
|
|
1112
|
+
const runtime = detectRuntime();
|
|
168
1113
|
if (runtime === "browser") {
|
|
169
1114
|
throw new Error("Use extractBytes with fileToUint8Array for browser environments");
|
|
170
1115
|
}
|
|
171
1116
|
let fileData;
|
|
172
1117
|
if (runtime === "node") {
|
|
173
|
-
const { readFile } = await import("
|
|
1118
|
+
const { readFile } = await import("fs/promises");
|
|
174
1119
|
const buffer = await readFile(path);
|
|
175
1120
|
fileData = new Uint8Array(buffer);
|
|
176
1121
|
} else if (runtime === "deno") {
|
|
177
1122
|
const deno = globalThis.Deno;
|
|
178
1123
|
fileData = await deno.readFile(path);
|
|
179
1124
|
} else if (runtime === "bun") {
|
|
180
|
-
const { readFile } = await import("
|
|
1125
|
+
const { readFile } = await import("fs/promises");
|
|
181
1126
|
const buffer = await readFile(path);
|
|
182
1127
|
fileData = new Uint8Array(buffer);
|
|
183
1128
|
} else {
|
|
@@ -193,7 +1138,7 @@ async function extractFile(path, mimeType, config) {
|
|
|
193
1138
|
detectedMimeType = wasm.normalizeMimeType(detectedMimeType);
|
|
194
1139
|
return await extractBytes(fileData, detectedMimeType, config);
|
|
195
1140
|
} catch (error) {
|
|
196
|
-
throw
|
|
1141
|
+
throw wrapWasmError(error, `extracting from file: ${path}`);
|
|
197
1142
|
}
|
|
198
1143
|
}
|
|
199
1144
|
async function extractFromFile(file, mimeType, config) {
|
|
@@ -204,12 +1149,12 @@ async function extractFromFile(file, mimeType, config) {
|
|
|
204
1149
|
throw new Error("WASM module not loaded. Call initWasm() first.");
|
|
205
1150
|
}
|
|
206
1151
|
try {
|
|
207
|
-
const bytes = await
|
|
1152
|
+
const bytes = await fileToUint8Array(file);
|
|
208
1153
|
let type = mimeType ?? (file instanceof File ? file.type : "application/octet-stream");
|
|
209
1154
|
type = wasm.normalizeMimeType(type);
|
|
210
1155
|
return await extractBytes(bytes, type, config);
|
|
211
1156
|
} catch (error) {
|
|
212
|
-
throw
|
|
1157
|
+
throw wrapWasmError(error, `extracting from ${file instanceof File ? "file" : "blob"}`);
|
|
213
1158
|
}
|
|
214
1159
|
}
|
|
215
1160
|
function extractBytesSync(data, mimeType, config) {
|
|
@@ -226,14 +1171,14 @@ function extractBytesSync(data, mimeType, config) {
|
|
|
226
1171
|
if (!mimeType) {
|
|
227
1172
|
throw new Error("MIME type is required");
|
|
228
1173
|
}
|
|
229
|
-
const normalizedConfig =
|
|
1174
|
+
const normalizedConfig = configToJS(config ?? null);
|
|
230
1175
|
const result = wasm.extractBytesSync(data, mimeType, normalizedConfig);
|
|
231
1176
|
if (!result) {
|
|
232
1177
|
throw new Error("Invalid extraction result: no result from WASM module");
|
|
233
1178
|
}
|
|
234
|
-
return
|
|
1179
|
+
return jsToExtractionResult(result);
|
|
235
1180
|
} catch (error) {
|
|
236
|
-
throw
|
|
1181
|
+
throw wrapWasmError(error, "extracting from bytes (sync)");
|
|
237
1182
|
}
|
|
238
1183
|
}
|
|
239
1184
|
async function batchExtractBytes(files, config) {
|
|
@@ -270,7 +1215,7 @@ async function batchExtractBytes(files, config) {
|
|
|
270
1215
|
dataList.push(f.data);
|
|
271
1216
|
mimeTypes.push(f.mimeType);
|
|
272
1217
|
}
|
|
273
|
-
const normalizedConfig =
|
|
1218
|
+
const normalizedConfig = configToJS(config ?? null);
|
|
274
1219
|
const results = await wasm.batchExtractBytes(dataList, mimeTypes, normalizedConfig);
|
|
275
1220
|
if (!Array.isArray(results)) {
|
|
276
1221
|
throw new Error("Invalid batch extraction result: expected array");
|
|
@@ -279,10 +1224,10 @@ async function batchExtractBytes(files, config) {
|
|
|
279
1224
|
if (!result) {
|
|
280
1225
|
throw new Error(`Invalid extraction result at index ${index}: no result from WASM module`);
|
|
281
1226
|
}
|
|
282
|
-
return
|
|
1227
|
+
return jsToExtractionResult(result);
|
|
283
1228
|
});
|
|
284
1229
|
} catch (error) {
|
|
285
|
-
throw
|
|
1230
|
+
throw wrapWasmError(error, "batch extracting from bytes");
|
|
286
1231
|
}
|
|
287
1232
|
}
|
|
288
1233
|
function batchExtractBytesSync(files, config) {
|
|
@@ -319,7 +1264,7 @@ function batchExtractBytesSync(files, config) {
|
|
|
319
1264
|
dataList.push(f.data);
|
|
320
1265
|
mimeTypes.push(f.mimeType);
|
|
321
1266
|
}
|
|
322
|
-
const normalizedConfig =
|
|
1267
|
+
const normalizedConfig = configToJS(config ?? null);
|
|
323
1268
|
const results = wasm.batchExtractBytesSync(dataList, mimeTypes, normalizedConfig);
|
|
324
1269
|
if (!Array.isArray(results)) {
|
|
325
1270
|
throw new Error("Invalid batch extraction result: expected array");
|
|
@@ -328,10 +1273,10 @@ function batchExtractBytesSync(files, config) {
|
|
|
328
1273
|
if (!result) {
|
|
329
1274
|
throw new Error(`Invalid extraction result at index ${index}: no result from WASM module`);
|
|
330
1275
|
}
|
|
331
|
-
return
|
|
1276
|
+
return jsToExtractionResult(result);
|
|
332
1277
|
});
|
|
333
1278
|
} catch (error) {
|
|
334
|
-
throw
|
|
1279
|
+
throw wrapWasmError(error, "batch extracting from bytes (sync)");
|
|
335
1280
|
}
|
|
336
1281
|
}
|
|
337
1282
|
async function batchExtractFiles(files, config) {
|
|
@@ -351,7 +1296,7 @@ async function batchExtractFiles(files, config) {
|
|
|
351
1296
|
if (!(file instanceof File)) {
|
|
352
1297
|
throw new Error(`Invalid file at index ${i}: must be a File object`);
|
|
353
1298
|
}
|
|
354
|
-
const bytes = await
|
|
1299
|
+
const bytes = await fileToUint8Array(file);
|
|
355
1300
|
byteFiles.push({
|
|
356
1301
|
data: bytes,
|
|
357
1302
|
mimeType: file.type || "application/octet-stream"
|
|
@@ -359,25 +1304,78 @@ async function batchExtractFiles(files, config) {
|
|
|
359
1304
|
}
|
|
360
1305
|
return await batchExtractBytes(byteFiles, config);
|
|
361
1306
|
} catch (error) {
|
|
362
|
-
throw
|
|
1307
|
+
throw wrapWasmError(error, "batch extracting from files");
|
|
363
1308
|
}
|
|
364
1309
|
}
|
|
365
1310
|
async function enableOcr() {
|
|
366
1311
|
if (!initialized) {
|
|
367
1312
|
throw new Error("WASM module not initialized. Call initWasm() first.");
|
|
368
1313
|
}
|
|
369
|
-
if (!
|
|
1314
|
+
if (!isBrowser()) {
|
|
370
1315
|
throw new Error(
|
|
371
1316
|
"OCR is only available in browser environments. TesseractWasmBackend requires Web Workers and createImageBitmap."
|
|
372
1317
|
);
|
|
373
1318
|
}
|
|
374
1319
|
try {
|
|
375
|
-
const backend = new
|
|
1320
|
+
const backend = new TesseractWasmBackend();
|
|
376
1321
|
await backend.initialize();
|
|
377
|
-
|
|
1322
|
+
registerOcrBackend(backend);
|
|
378
1323
|
} catch (error) {
|
|
379
1324
|
const message = error instanceof Error ? error.message : String(error);
|
|
380
1325
|
throw new Error(`Failed to enable OCR: ${message}`);
|
|
381
1326
|
}
|
|
382
1327
|
}
|
|
1328
|
+
export {
|
|
1329
|
+
TesseractWasmBackend,
|
|
1330
|
+
batchExtractBytes,
|
|
1331
|
+
batchExtractBytesSync,
|
|
1332
|
+
batchExtractFiles,
|
|
1333
|
+
clearOcrBackends,
|
|
1334
|
+
clearPostProcessors,
|
|
1335
|
+
clearValidators,
|
|
1336
|
+
configToJS,
|
|
1337
|
+
detectRuntime,
|
|
1338
|
+
enableOcr,
|
|
1339
|
+
extractBytes,
|
|
1340
|
+
extractBytesSync,
|
|
1341
|
+
extractFile,
|
|
1342
|
+
extractFromFile,
|
|
1343
|
+
fileToUint8Array,
|
|
1344
|
+
getInitializationError,
|
|
1345
|
+
getOcrBackend,
|
|
1346
|
+
getPostProcessor,
|
|
1347
|
+
getRuntimeInfo,
|
|
1348
|
+
getRuntimeVersion,
|
|
1349
|
+
getValidator,
|
|
1350
|
+
getVersion,
|
|
1351
|
+
getWasmCapabilities,
|
|
1352
|
+
hasBigInt,
|
|
1353
|
+
hasBlob,
|
|
1354
|
+
hasFileApi,
|
|
1355
|
+
hasModuleWorkers,
|
|
1356
|
+
hasSharedArrayBuffer,
|
|
1357
|
+
hasWasm,
|
|
1358
|
+
hasWasmStreaming,
|
|
1359
|
+
hasWorkers,
|
|
1360
|
+
initWasm,
|
|
1361
|
+
isBrowser,
|
|
1362
|
+
isBun,
|
|
1363
|
+
isDeno,
|
|
1364
|
+
isInitialized,
|
|
1365
|
+
isNode,
|
|
1366
|
+
isServerEnvironment,
|
|
1367
|
+
isValidExtractionResult,
|
|
1368
|
+
isWebEnvironment,
|
|
1369
|
+
jsToExtractionResult,
|
|
1370
|
+
listOcrBackends,
|
|
1371
|
+
listPostProcessors,
|
|
1372
|
+
listValidators,
|
|
1373
|
+
registerOcrBackend,
|
|
1374
|
+
registerPostProcessor,
|
|
1375
|
+
registerValidator,
|
|
1376
|
+
unregisterOcrBackend,
|
|
1377
|
+
unregisterPostProcessor,
|
|
1378
|
+
unregisterValidator,
|
|
1379
|
+
wrapWasmError
|
|
1380
|
+
};
|
|
383
1381
|
//# sourceMappingURL=index.js.map
|