@kreuzberg/node 4.0.0-rc.6 → 4.0.0-rc.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -14
- package/dist/cli.d.mts +9 -0
- package/dist/cli.d.ts +9 -0
- package/dist/cli.js +78 -0
- package/dist/cli.js.map +1 -0
- package/dist/cli.mjs +43 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/errors.d.mts +358 -0
- package/dist/errors.d.ts +358 -0
- package/dist/errors.js +139 -0
- package/dist/errors.js.map +1 -0
- package/dist/errors.mjs +107 -0
- package/dist/errors.mjs.map +1 -0
- package/dist/index.d.mts +857 -0
- package/dist/index.d.ts +857 -0
- package/dist/index.js +815 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +754 -0
- package/dist/index.mjs.map +1 -0
- package/dist/ocr/guten-ocr.d.mts +193 -0
- package/dist/ocr/guten-ocr.d.ts +193 -0
- package/dist/ocr/guten-ocr.js +232 -0
- package/dist/ocr/guten-ocr.js.map +1 -0
- package/dist/ocr/guten-ocr.mjs +198 -0
- package/dist/ocr/guten-ocr.mjs.map +1 -0
- package/dist/types.d.mts +666 -0
- package/dist/types.d.ts +666 -0
- package/dist/types.js +17 -0
- package/dist/types.js.map +1 -0
- package/dist/types.mjs +1 -0
- package/dist/types.mjs.map +1 -0
- package/index.d.ts +11 -2
- package/index.js +52 -52
- package/package.json +30 -29
- package/LICENSE +0 -7
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
var guten_ocr_exports = {};
|
|
30
|
+
__export(guten_ocr_exports, {
|
|
31
|
+
GutenOcrBackend: () => GutenOcrBackend
|
|
32
|
+
});
|
|
33
|
+
module.exports = __toCommonJS(guten_ocr_exports);
|
|
34
|
+
class GutenOcrBackend {
|
|
35
|
+
ocr = null;
|
|
36
|
+
ocrModule = null;
|
|
37
|
+
options;
|
|
38
|
+
/**
|
|
39
|
+
* Create a new Guten OCR backend.
|
|
40
|
+
*
|
|
41
|
+
* @param options - Optional configuration for Guten OCR
|
|
42
|
+
* @param options.models - Custom model paths (default: uses bundled models)
|
|
43
|
+
* @param options.isDebug - Enable debug mode (default: false)
|
|
44
|
+
* @param options.debugOutputDir - Directory for debug output (default: undefined)
|
|
45
|
+
* @param options.onnxOptions - Custom ONNX Runtime options (default: undefined)
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```typescript
|
|
49
|
+
* // Default configuration
|
|
50
|
+
* const backend = new GutenOcrBackend();
|
|
51
|
+
*
|
|
52
|
+
* // With debug enabled
|
|
53
|
+
* const debugBackend = new GutenOcrBackend({
|
|
54
|
+
* isDebug: true,
|
|
55
|
+
* debugOutputDir: './ocr_debug'
|
|
56
|
+
* });
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
constructor(options) {
|
|
60
|
+
if (options !== void 0) {
|
|
61
|
+
this.options = options;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Get the backend name.
|
|
66
|
+
*
|
|
67
|
+
* @returns Backend name ("guten-ocr")
|
|
68
|
+
*/
|
|
69
|
+
name() {
|
|
70
|
+
return "guten-ocr";
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Get list of supported language codes.
|
|
74
|
+
*
|
|
75
|
+
* Guten OCR supports multiple languages depending on the model configuration.
|
|
76
|
+
* The default models support English and Chinese.
|
|
77
|
+
*
|
|
78
|
+
* @returns Array of ISO 639-1/2 language codes
|
|
79
|
+
*/
|
|
80
|
+
supportedLanguages() {
|
|
81
|
+
return ["en", "eng", "ch_sim", "ch_tra", "chinese"];
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Initialize the OCR backend.
|
|
85
|
+
*
|
|
86
|
+
* This method loads the Guten OCR module and creates an OCR instance.
|
|
87
|
+
* Call this before using processImage().
|
|
88
|
+
*
|
|
89
|
+
* @throws {Error} If @gutenye/ocr-node is not installed
|
|
90
|
+
* @throws {Error} If OCR initialization fails
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* const backend = new GutenOcrBackend();
|
|
95
|
+
* await backend.initialize();
|
|
96
|
+
* ```
|
|
97
|
+
*/
|
|
98
|
+
async initialize() {
|
|
99
|
+
if (this.ocr !== null) {
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
try {
|
|
103
|
+
this.ocrModule = await import("@gutenye/ocr-node").then((m) => m.default || m);
|
|
104
|
+
} catch (e) {
|
|
105
|
+
const error = e;
|
|
106
|
+
throw new Error(
|
|
107
|
+
`Guten OCR support requires the '@gutenye/ocr-node' package. Install with: npm install @gutenye/ocr-node. Error: ${error.message}`
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
try {
|
|
111
|
+
this.ocr = await this.ocrModule?.create(this.options) ?? null;
|
|
112
|
+
} catch (e) {
|
|
113
|
+
const error = e;
|
|
114
|
+
throw new Error(`Failed to initialize Guten OCR: ${error.message}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Shutdown the backend and release resources.
|
|
119
|
+
*
|
|
120
|
+
* This method cleans up all resources associated with the backend,
|
|
121
|
+
* including the GutenOCR instance and module references.
|
|
122
|
+
*
|
|
123
|
+
* @example
|
|
124
|
+
* ```typescript
|
|
125
|
+
* const backend = new GutenOcrBackend();
|
|
126
|
+
* await backend.initialize();
|
|
127
|
+
* // ... use backend ...
|
|
128
|
+
* await backend.shutdown();
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
async shutdown() {
|
|
132
|
+
if (this.ocr !== null) {
|
|
133
|
+
this.ocr = null;
|
|
134
|
+
}
|
|
135
|
+
if (this.ocrModule !== null) {
|
|
136
|
+
this.ocrModule = null;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Process image bytes and extract text using Guten OCR.
|
|
141
|
+
*
|
|
142
|
+
* This method:
|
|
143
|
+
* 1. Decodes the image using sharp (if pixel data is needed) or passes bytes directly
|
|
144
|
+
* 2. Runs OCR detection to find text regions
|
|
145
|
+
* 3. Runs OCR recognition on each text region
|
|
146
|
+
* 4. Returns extracted text with metadata
|
|
147
|
+
*
|
|
148
|
+
* @param imageBytes - Raw image data (PNG, JPEG, TIFF, etc.)
|
|
149
|
+
* @param language - Language code (must be in supportedLanguages())
|
|
150
|
+
* @returns Promise resolving to OCR result with content and metadata
|
|
151
|
+
*
|
|
152
|
+
* @throws {Error} If backend is not initialized
|
|
153
|
+
* @throws {Error} If OCR processing fails
|
|
154
|
+
*
|
|
155
|
+
* @example
|
|
156
|
+
* ```typescript
|
|
157
|
+
* import { readFile } from 'fs/promises';
|
|
158
|
+
*
|
|
159
|
+
* const backend = new GutenOcrBackend();
|
|
160
|
+
* await backend.initialize();
|
|
161
|
+
*
|
|
162
|
+
* const imageBytes = await readFile('scanned.png');
|
|
163
|
+
* const result = await backend.processImage(imageBytes, 'en');
|
|
164
|
+
* console.log(result.content);
|
|
165
|
+
* console.log(result.metadata.confidence);
|
|
166
|
+
* ```
|
|
167
|
+
*/
|
|
168
|
+
async processImage(imageBytes, language) {
|
|
169
|
+
if (this.ocr === null) {
|
|
170
|
+
await this.initialize();
|
|
171
|
+
}
|
|
172
|
+
if (this.ocr === null) {
|
|
173
|
+
throw new Error("Guten OCR backend failed to initialize");
|
|
174
|
+
}
|
|
175
|
+
try {
|
|
176
|
+
const buffer = typeof imageBytes === "string" ? Buffer.from(imageBytes, "base64") : Buffer.from(imageBytes);
|
|
177
|
+
const debugEnv = process.env["KREUZBERG_DEBUG_GUTEN"];
|
|
178
|
+
if (debugEnv === "1") {
|
|
179
|
+
const header = Array.from(buffer.subarray(0, 8));
|
|
180
|
+
console.log("[Guten OCR] Debug input header:", header);
|
|
181
|
+
console.log(
|
|
182
|
+
"[Guten OCR] Buffer?",
|
|
183
|
+
Buffer.isBuffer(buffer),
|
|
184
|
+
"constructor",
|
|
185
|
+
imageBytes?.constructor?.name,
|
|
186
|
+
"length",
|
|
187
|
+
buffer.length,
|
|
188
|
+
"type",
|
|
189
|
+
typeof imageBytes
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
let width = 0;
|
|
193
|
+
let height = 0;
|
|
194
|
+
try {
|
|
195
|
+
const sharpModule = await import("sharp");
|
|
196
|
+
const sharp = sharpModule.default || sharpModule;
|
|
197
|
+
const image = sharp(buffer);
|
|
198
|
+
const metadata = await image.metadata();
|
|
199
|
+
const metadataRecord = metadata;
|
|
200
|
+
width = metadataRecord["width"] ?? 0;
|
|
201
|
+
height = metadataRecord["height"] ?? 0;
|
|
202
|
+
} catch (metadataError) {
|
|
203
|
+
const error = metadataError;
|
|
204
|
+
console.warn(`[Guten OCR] Unable to read image metadata via sharp: ${error.message}`);
|
|
205
|
+
}
|
|
206
|
+
const result = await this.ocr.detect(buffer);
|
|
207
|
+
const textLines = result.map((line) => line.text);
|
|
208
|
+
const content = textLines.join("\n");
|
|
209
|
+
const avgConfidence = result.length > 0 ? result.reduce((sum, line) => sum + line.mean, 0) / result.length : 0;
|
|
210
|
+
return {
|
|
211
|
+
content,
|
|
212
|
+
mime_type: "text/plain",
|
|
213
|
+
metadata: {
|
|
214
|
+
width,
|
|
215
|
+
height,
|
|
216
|
+
confidence: avgConfidence,
|
|
217
|
+
text_regions: result.length,
|
|
218
|
+
language
|
|
219
|
+
},
|
|
220
|
+
tables: []
|
|
221
|
+
};
|
|
222
|
+
} catch (e) {
|
|
223
|
+
const error = e;
|
|
224
|
+
throw new Error(`Guten OCR processing failed: ${error.message}`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
229
|
+
0 && (module.exports = {
|
|
230
|
+
GutenOcrBackend
|
|
231
|
+
});
|
|
232
|
+
//# sourceMappingURL=guten-ocr.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../typescript/ocr/guten-ocr.ts"],"sourcesContent":["/**\n * Guten OCR backend for document OCR processing.\n *\n * This module provides integration with @gutenye/ocr-node for optical character recognition.\n * Guten OCR uses PaddleOCR models via ONNX Runtime for high-performance text extraction.\n *\n * @module ocr/guten-ocr\n */\n\nimport type { OcrBackendProtocol } from \"../types.js\";\n\n/**\n * Text line detected by Guten OCR.\n */\ninterface TextLine {\n\ttext: string;\n\tmean: number;\n\tbox: number[][];\n}\n\n/**\n * Guten OCR instance interface.\n */\ninterface GutenOcr {\n\tdetect(imagePath: string | Buffer, options?: { onnxOptions?: unknown }): Promise<TextLine[]>;\n}\n\n/**\n * Guten OCR module interface.\n */\ninterface GutenOcrModule {\n\tcreate(options?: {\n\t\tmodels?: {\n\t\t\tdetectionPath: string;\n\t\t\trecognitionPath: string;\n\t\t\tdictionaryPath: string;\n\t\t};\n\t\tisDebug?: boolean;\n\t\tdebugOutputDir?: string;\n\t\tonnxOptions?: unknown;\n\t}): Promise<GutenOcr>;\n}\n\n/**\n * Guten OCR backend for OCR processing.\n *\n * This backend uses @gutenye/ocr-node for text extraction from images.\n * It uses PaddleOCR models via ONNX Runtime for efficient processing.\n *\n * ## Installation\n *\n * Install the optional dependency:\n * ```bash\n * npm install @gutenye/ocr-node\n * # or\n * pnpm add @gutenye/ocr-node\n * # or\n * bun add @gutenye/ocr-node\n * ```\n *\n * ## Usage\n *\n * ```typescript\n * import { GutenOcrBackend } from '@kreuzberg/node/ocr/guten-ocr';\n * import { registerOcrBackend, extractFile } from '@kreuzberg/node';\n *\n * // Create and register the backend\n * const backend = new GutenOcrBackend();\n * await backend.initialize();\n * registerOcrBackend(backend);\n *\n * // Extract with OCR enabled\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'guten-ocr', language: 'en' },\n * });\n * console.log(result.content);\n * ```\n *\n * ## Supported Languages\n *\n * Guten OCR supports multiple languages via different model configurations.\n * The default models support English (\"en\") and Chinese (\"ch_sim\", \"ch_tra\").\n *\n * @example\n * ```typescript\n * // Basic usage with default settings\n * const backend = new GutenOcrBackend();\n * await backend.initialize();\n *\n * // Custom model configuration\n * const customBackend = new GutenOcrBackend({\n * models: {\n * detectionPath: './models/detection.onnx',\n * recognitionPath: './models/recognition.onnx',\n * dictionaryPath: './models/dict.txt'\n * }\n * });\n * await customBackend.initialize();\n * ```\n */\nexport class GutenOcrBackend implements OcrBackendProtocol {\n\tprivate ocr: GutenOcr | null = null;\n\tprivate ocrModule: GutenOcrModule | null = null;\n\tprivate options?: {\n\t\tmodels?: {\n\t\t\tdetectionPath: string;\n\t\t\trecognitionPath: string;\n\t\t\tdictionaryPath: string;\n\t\t};\n\t\tisDebug?: boolean;\n\t\tdebugOutputDir?: string;\n\t\tonnxOptions?: unknown;\n\t};\n\n\t/**\n\t * Create a new Guten OCR backend.\n\t *\n\t * @param options - Optional configuration for Guten OCR\n\t * @param options.models - Custom model paths (default: uses bundled models)\n\t * @param options.isDebug - Enable debug mode (default: false)\n\t * @param options.debugOutputDir - Directory for debug output (default: undefined)\n\t * @param options.onnxOptions - Custom ONNX Runtime options (default: undefined)\n\t *\n\t * @example\n\t * ```typescript\n\t * // Default configuration\n\t * const backend = new GutenOcrBackend();\n\t *\n\t * // With debug enabled\n\t * const debugBackend = new GutenOcrBackend({\n\t * isDebug: true,\n\t * debugOutputDir: './ocr_debug'\n\t * });\n\t * ```\n\t */\n\tconstructor(options?: {\n\t\tmodels?: {\n\t\t\tdetectionPath: string;\n\t\t\trecognitionPath: string;\n\t\t\tdictionaryPath: string;\n\t\t};\n\t\tisDebug?: boolean;\n\t\tdebugOutputDir?: string;\n\t\tonnxOptions?: unknown;\n\t}) {\n\t\tif (options !== undefined) {\n\t\t\tthis.options = options;\n\t\t}\n\t}\n\n\t/**\n\t * Get the backend name.\n\t *\n\t * @returns Backend name (\"guten-ocr\")\n\t */\n\tname(): string {\n\t\treturn \"guten-ocr\";\n\t}\n\n\t/**\n\t * Get list of supported language codes.\n\t *\n\t * Guten OCR supports multiple languages depending on the model configuration.\n\t * The default models support English and Chinese.\n\t *\n\t * @returns Array of ISO 639-1/2 language codes\n\t */\n\tsupportedLanguages(): string[] {\n\t\treturn [\"en\", \"eng\", \"ch_sim\", \"ch_tra\", \"chinese\"];\n\t}\n\n\t/**\n\t * Initialize the OCR backend.\n\t *\n\t * This method loads the Guten OCR module and creates an OCR instance.\n\t * Call this before using processImage().\n\t *\n\t * @throws {Error} If @gutenye/ocr-node is not installed\n\t * @throws {Error} If OCR initialization fails\n\t *\n\t * @example\n\t * ```typescript\n\t * const backend = new GutenOcrBackend();\n\t * await backend.initialize();\n\t * ```\n\t */\n\tasync initialize(): Promise<void> {\n\t\tif (this.ocr !== null) {\n\t\t\treturn;\n\t\t}\n\n\t\ttry {\n\t\t\tthis.ocrModule = await import(\"@gutenye/ocr-node\").then((m) => (m.default || m) as GutenOcrModule);\n\t\t} catch (e) {\n\t\t\tconst error = e as Error;\n\t\t\tthrow new Error(\n\t\t\t\t`Guten OCR support requires the '@gutenye/ocr-node' package. ` +\n\t\t\t\t\t`Install with: npm install @gutenye/ocr-node. ` +\n\t\t\t\t\t`Error: ${error.message}`,\n\t\t\t);\n\t\t}\n\n\t\ttry {\n\t\t\tthis.ocr = (await this.ocrModule?.create(this.options)) ?? null;\n\t\t} catch (e) {\n\t\t\tconst error = e as Error;\n\t\t\tthrow new Error(`Failed to initialize Guten OCR: ${error.message}`);\n\t\t}\n\t}\n\n\t/**\n\t * Shutdown the backend and release resources.\n\t *\n\t * This method cleans up all resources associated with the backend,\n\t * including the GutenOCR instance and module references.\n\t *\n\t * @example\n\t * ```typescript\n\t * const backend = new GutenOcrBackend();\n\t * await backend.initialize();\n\t * // ... use backend ...\n\t * await backend.shutdown();\n\t * ```\n\t */\n\tasync shutdown(): Promise<void> {\n\t\tif (this.ocr !== null) {\n\t\t\tthis.ocr = null;\n\t\t}\n\n\t\tif (this.ocrModule !== null) {\n\t\t\tthis.ocrModule = null;\n\t\t}\n\t}\n\n\t/**\n\t * Process image bytes and extract text using Guten OCR.\n\t *\n\t * This method:\n\t * 1. Decodes the image using sharp (if pixel data is needed) or passes bytes directly\n\t * 2. Runs OCR detection to find text regions\n\t * 3. Runs OCR recognition on each text region\n\t * 4. Returns extracted text with metadata\n\t *\n\t * @param imageBytes - Raw image data (PNG, JPEG, TIFF, etc.)\n\t * @param language - Language code (must be in supportedLanguages())\n\t * @returns Promise resolving to OCR result with content and metadata\n\t *\n\t * @throws {Error} If backend is not initialized\n\t * @throws {Error} If OCR processing fails\n\t *\n\t * @example\n\t * ```typescript\n\t * import { readFile } from 'fs/promises';\n\t *\n\t * const backend = new GutenOcrBackend();\n\t * await backend.initialize();\n\t *\n\t * const imageBytes = await readFile('scanned.png');\n\t * const result = await backend.processImage(imageBytes, 'en');\n\t * console.log(result.content);\n\t * console.log(result.metadata.confidence);\n\t * ```\n\t */\n\tasync processImage(\n\t\timageBytes: Uint8Array | string,\n\t\tlanguage: string,\n\t): Promise<{\n\t\tcontent: string;\n\t\tmime_type: string;\n\t\tmetadata: {\n\t\t\twidth: number;\n\t\t\theight: number;\n\t\t\tconfidence: number;\n\t\t\ttext_regions: number;\n\t\t\tlanguage: string;\n\t\t};\n\t\ttables: never[];\n\t}> {\n\t\tif (this.ocr === null) {\n\t\t\tawait this.initialize();\n\t\t}\n\n\t\tif (this.ocr === null) {\n\t\t\tthrow new Error(\"Guten OCR backend failed to initialize\");\n\t\t}\n\n\t\ttry {\n\t\t\tconst buffer = typeof imageBytes === \"string\" ? Buffer.from(imageBytes, \"base64\") : Buffer.from(imageBytes);\n\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noUncheckedIndexedAccess\n\t\t\tconst debugEnv = process.env[\"KREUZBERG_DEBUG_GUTEN\"];\n\t\t\tif (debugEnv === \"1\") {\n\t\t\t\tconst header = Array.from(buffer.subarray(0, 8));\n\t\t\t\tconsole.log(\"[Guten OCR] Debug input header:\", header);\n\t\t\t\tconsole.log(\n\t\t\t\t\t\"[Guten OCR] Buffer?\",\n\t\t\t\t\tBuffer.isBuffer(buffer),\n\t\t\t\t\t\"constructor\",\n\t\t\t\t\timageBytes?.constructor?.name,\n\t\t\t\t\t\"length\",\n\t\t\t\t\tbuffer.length,\n\t\t\t\t\t\"type\",\n\t\t\t\t\ttypeof imageBytes,\n\t\t\t\t);\n\t\t\t}\n\n\t\t\tlet width = 0;\n\t\t\tlet height = 0;\n\n\t\t\ttry {\n\t\t\t\tconst sharpModule = await import(\"sharp\");\n\t\t\t\tconst sharp = (sharpModule as unknown as { default?: unknown }).default || sharpModule;\n\t\t\t\tconst image = (sharp as (buffer: Buffer) => { metadata: () => Promise<Record<string, unknown>> })(buffer);\n\t\t\t\tconst metadata = await image.metadata();\n\t\t\t\tconst metadataRecord = metadata as Record<string, unknown>;\n\t\t\t\t// biome-ignore lint/complexity/useLiteralKeys: TypeScript TS4111 requires bracket notation for index signature properties\n\t\t\t\twidth = (metadataRecord[\"width\"] as number | undefined) ?? 0;\n\t\t\t\t// biome-ignore lint/complexity/useLiteralKeys: TypeScript TS4111 requires bracket notation for index signature properties\n\t\t\t\theight = (metadataRecord[\"height\"] as number | undefined) ?? 0;\n\t\t\t} catch (metadataError) {\n\t\t\t\tconst error = metadataError as Error;\n\t\t\t\tconsole.warn(`[Guten OCR] Unable to read image metadata via sharp: ${error.message}`);\n\t\t\t}\n\n\t\t\tconst result = await this.ocr.detect(buffer);\n\n\t\t\tconst textLines = result.map((line) => line.text);\n\t\t\tconst content = textLines.join(\"\\n\");\n\n\t\t\tconst avgConfidence = result.length > 0 ? result.reduce((sum, line) => sum + line.mean, 0) / result.length : 0;\n\n\t\t\treturn {\n\t\t\t\tcontent,\n\t\t\t\tmime_type: \"text/plain\",\n\t\t\t\tmetadata: {\n\t\t\t\t\twidth,\n\t\t\t\t\theight,\n\t\t\t\t\tconfidence: avgConfidence,\n\t\t\t\t\ttext_regions: result.length,\n\t\t\t\t\tlanguage,\n\t\t\t\t},\n\t\t\t\ttables: [],\n\t\t\t};\n\t\t} catch (e) {\n\t\t\tconst error = e as Error;\n\t\t\tthrow new Error(`Guten OCR processing failed: ${error.message}`);\n\t\t}\n\t}\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAoGO,MAAM,gBAA8C;AAAA,EAClD,MAAuB;AAAA,EACvB,YAAmC;AAAA,EACnC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgCR,YAAY,SAST;AACF,QAAI,YAAY,QAAW;AAC1B,WAAK,UAAU;AAAA,IAChB;AAAA,EACD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,OAAe;AACd,WAAO;AAAA,EACR;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,qBAA+B;AAC9B,WAAO,CAAC,MAAM,OAAO,UAAU,UAAU,SAAS;AAAA,EACnD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBA,MAAM,aAA4B;AACjC,QAAI,KAAK,QAAQ,MAAM;AACtB;AAAA,IACD;AAEA,QAAI;AACH,WAAK,YAAY,MAAM,OAAO,mBAAmB,EAAE,KAAK,CAAC,MAAO,EAAE,WAAW,CAAoB;AAAA,IAClG,SAAS,GAAG;AACX,YAAM,QAAQ;AACd,YAAM,IAAI;AAAA,QACT,mHAEW,MAAM,OAAO;AAAA,MACzB;AAAA,IACD;AAEA,QAAI;AACH,WAAK,MAAO,MAAM,KAAK,WAAW,OAAO,KAAK,OAAO,KAAM;AAAA,IAC5D,SAAS,GAAG;AACX,YAAM,QAAQ;AACd,YAAM,IAAI,MAAM,mCAAmC,MAAM,OAAO,EAAE;AAAA,IACnE;AAAA,EACD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBA,MAAM,WAA0B;AAC/B,QAAI,KAAK,QAAQ,MAAM;AACtB,WAAK,MAAM;AAAA,IACZ;AAEA,QAAI,KAAK,cAAc,MAAM;AAC5B,WAAK,YAAY;AAAA,IAClB;AAAA,EACD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA+BA,MAAM,aACL,YACA,UAYE;AACF,QAAI,KAAK,QAAQ,MAAM;AACtB,YAAM,KAAK,WAAW;AAAA,IACvB;AAEA,QAAI,KAAK,QAAQ,MAAM;AACtB,YAAM,IAAI,MAAM,wCAAwC;AAAA,IACzD;AAEA,QAAI;AACH,YAAM,SAAS,OAAO,eAAe,WAAW,OAAO,KAAK,YAAY,QAAQ,IAAI,OAAO,KAAK,UAAU;AAG1G,YAAM,WAAW,QAAQ,IAAI,uBAAuB;AACpD,UAAI,aAAa,KAAK;AACrB,cAAM,SAAS,MAAM,KAAK,OAAO,SAAS,GAAG,CAAC,CAAC;AAC/C,gBAAQ,IAAI,mCAAmC,MAAM;AACrD,gBAAQ;AAAA,UACP;AAAA,UACA,OAAO,SAAS,MAAM;AAAA,UACtB;AAAA,UACA,YAAY,aAAa;AAAA,UACzB;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,OAAO;AAAA,QACR;AAAA,MACD;AAEA,UAAI,QAAQ;AACZ,UAAI,SAAS;AAEb,UAAI;AACH,cAAM,cAAc,MAAM,OAAO,OAAO;AACxC,cAAM,QAAS,YAAiD,WAAW;AAC3E,cAAM,QAAS,MAAmF,MAAM;AACxG,cAAM,WAAW,MAAM,MAAM,SAAS;AACtC,cAAM,iBAAiB;AAEvB,gBAAS,eAAe,OAAO,KAA4B;AAE3D,iBAAU,eAAe,QAAQ,KAA4B;AAAA,MAC9D,SAAS,eAAe;AACvB,cAAM,QAAQ;AACd,gBAAQ,KAAK,wDAAwD,MAAM,OAAO,EAAE;AAAA,MACrF;AAEA,YAAM,SAAS,MAAM,KAAK,IAAI,OAAO,MAAM;AAE3C,YAAM,YAAY,OAAO,IAAI,CAAC,SAAS,KAAK,IAAI;AAChD,YAAM,UAAU,UAAU,KAAK,IAAI;AAEnC,YAAM,gBAAgB,OAAO,SAAS,IAAI,OAAO,OAAO,CAAC,KAAK,SAAS,MAAM,KAAK,MAAM,CAAC,IAAI,OAAO,SAAS;AAE7G,aAAO;AAAA,QACN;AAAA,QACA,WAAW;AAAA,QACX,UAAU;AAAA,UACT;AAAA,UACA;AAAA,UACA,YAAY;AAAA,UACZ,cAAc,OAAO;AAAA,UACrB;AAAA,QACD;AAAA,QACA,QAAQ,CAAC;AAAA,MACV;AAAA,IACD,SAAS,GAAG;AACX,YAAM,QAAQ;AACd,YAAM,IAAI,MAAM,gCAAgC,MAAM,OAAO,EAAE;AAAA,IAChE;AAAA,EACD;AACD;","names":[]}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
class GutenOcrBackend {
|
|
2
|
+
ocr = null;
|
|
3
|
+
ocrModule = null;
|
|
4
|
+
options;
|
|
5
|
+
/**
|
|
6
|
+
* Create a new Guten OCR backend.
|
|
7
|
+
*
|
|
8
|
+
* @param options - Optional configuration for Guten OCR
|
|
9
|
+
* @param options.models - Custom model paths (default: uses bundled models)
|
|
10
|
+
* @param options.isDebug - Enable debug mode (default: false)
|
|
11
|
+
* @param options.debugOutputDir - Directory for debug output (default: undefined)
|
|
12
|
+
* @param options.onnxOptions - Custom ONNX Runtime options (default: undefined)
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* // Default configuration
|
|
17
|
+
* const backend = new GutenOcrBackend();
|
|
18
|
+
*
|
|
19
|
+
* // With debug enabled
|
|
20
|
+
* const debugBackend = new GutenOcrBackend({
|
|
21
|
+
* isDebug: true,
|
|
22
|
+
* debugOutputDir: './ocr_debug'
|
|
23
|
+
* });
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
constructor(options) {
|
|
27
|
+
if (options !== void 0) {
|
|
28
|
+
this.options = options;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Get the backend name.
|
|
33
|
+
*
|
|
34
|
+
* @returns Backend name ("guten-ocr")
|
|
35
|
+
*/
|
|
36
|
+
name() {
|
|
37
|
+
return "guten-ocr";
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Get list of supported language codes.
|
|
41
|
+
*
|
|
42
|
+
* Guten OCR supports multiple languages depending on the model configuration.
|
|
43
|
+
* The default models support English and Chinese.
|
|
44
|
+
*
|
|
45
|
+
* @returns Array of ISO 639-1/2 language codes
|
|
46
|
+
*/
|
|
47
|
+
supportedLanguages() {
|
|
48
|
+
return ["en", "eng", "ch_sim", "ch_tra", "chinese"];
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Initialize the OCR backend.
|
|
52
|
+
*
|
|
53
|
+
* This method loads the Guten OCR module and creates an OCR instance.
|
|
54
|
+
* Call this before using processImage().
|
|
55
|
+
*
|
|
56
|
+
* @throws {Error} If @gutenye/ocr-node is not installed
|
|
57
|
+
* @throws {Error} If OCR initialization fails
|
|
58
|
+
*
|
|
59
|
+
* @example
|
|
60
|
+
* ```typescript
|
|
61
|
+
* const backend = new GutenOcrBackend();
|
|
62
|
+
* await backend.initialize();
|
|
63
|
+
* ```
|
|
64
|
+
*/
|
|
65
|
+
async initialize() {
|
|
66
|
+
if (this.ocr !== null) {
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
try {
|
|
70
|
+
this.ocrModule = await import("@gutenye/ocr-node").then((m) => m.default || m);
|
|
71
|
+
} catch (e) {
|
|
72
|
+
const error = e;
|
|
73
|
+
throw new Error(
|
|
74
|
+
`Guten OCR support requires the '@gutenye/ocr-node' package. Install with: npm install @gutenye/ocr-node. Error: ${error.message}`
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
try {
|
|
78
|
+
this.ocr = await this.ocrModule?.create(this.options) ?? null;
|
|
79
|
+
} catch (e) {
|
|
80
|
+
const error = e;
|
|
81
|
+
throw new Error(`Failed to initialize Guten OCR: ${error.message}`);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Shutdown the backend and release resources.
|
|
86
|
+
*
|
|
87
|
+
* This method cleans up all resources associated with the backend,
|
|
88
|
+
* including the GutenOCR instance and module references.
|
|
89
|
+
*
|
|
90
|
+
* @example
|
|
91
|
+
* ```typescript
|
|
92
|
+
* const backend = new GutenOcrBackend();
|
|
93
|
+
* await backend.initialize();
|
|
94
|
+
* // ... use backend ...
|
|
95
|
+
* await backend.shutdown();
|
|
96
|
+
* ```
|
|
97
|
+
*/
|
|
98
|
+
async shutdown() {
|
|
99
|
+
if (this.ocr !== null) {
|
|
100
|
+
this.ocr = null;
|
|
101
|
+
}
|
|
102
|
+
if (this.ocrModule !== null) {
|
|
103
|
+
this.ocrModule = null;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Process image bytes and extract text using Guten OCR.
|
|
108
|
+
*
|
|
109
|
+
* This method:
|
|
110
|
+
* 1. Decodes the image using sharp (if pixel data is needed) or passes bytes directly
|
|
111
|
+
* 2. Runs OCR detection to find text regions
|
|
112
|
+
* 3. Runs OCR recognition on each text region
|
|
113
|
+
* 4. Returns extracted text with metadata
|
|
114
|
+
*
|
|
115
|
+
* @param imageBytes - Raw image data (PNG, JPEG, TIFF, etc.)
|
|
116
|
+
* @param language - Language code (must be in supportedLanguages())
|
|
117
|
+
* @returns Promise resolving to OCR result with content and metadata
|
|
118
|
+
*
|
|
119
|
+
* @throws {Error} If backend is not initialized
|
|
120
|
+
* @throws {Error} If OCR processing fails
|
|
121
|
+
*
|
|
122
|
+
* @example
|
|
123
|
+
* ```typescript
|
|
124
|
+
* import { readFile } from 'fs/promises';
|
|
125
|
+
*
|
|
126
|
+
* const backend = new GutenOcrBackend();
|
|
127
|
+
* await backend.initialize();
|
|
128
|
+
*
|
|
129
|
+
* const imageBytes = await readFile('scanned.png');
|
|
130
|
+
* const result = await backend.processImage(imageBytes, 'en');
|
|
131
|
+
* console.log(result.content);
|
|
132
|
+
* console.log(result.metadata.confidence);
|
|
133
|
+
* ```
|
|
134
|
+
*/
|
|
135
|
+
async processImage(imageBytes, language) {
|
|
136
|
+
if (this.ocr === null) {
|
|
137
|
+
await this.initialize();
|
|
138
|
+
}
|
|
139
|
+
if (this.ocr === null) {
|
|
140
|
+
throw new Error("Guten OCR backend failed to initialize");
|
|
141
|
+
}
|
|
142
|
+
try {
|
|
143
|
+
const buffer = typeof imageBytes === "string" ? Buffer.from(imageBytes, "base64") : Buffer.from(imageBytes);
|
|
144
|
+
const debugEnv = process.env["KREUZBERG_DEBUG_GUTEN"];
|
|
145
|
+
if (debugEnv === "1") {
|
|
146
|
+
const header = Array.from(buffer.subarray(0, 8));
|
|
147
|
+
console.log("[Guten OCR] Debug input header:", header);
|
|
148
|
+
console.log(
|
|
149
|
+
"[Guten OCR] Buffer?",
|
|
150
|
+
Buffer.isBuffer(buffer),
|
|
151
|
+
"constructor",
|
|
152
|
+
imageBytes?.constructor?.name,
|
|
153
|
+
"length",
|
|
154
|
+
buffer.length,
|
|
155
|
+
"type",
|
|
156
|
+
typeof imageBytes
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
let width = 0;
|
|
160
|
+
let height = 0;
|
|
161
|
+
try {
|
|
162
|
+
const sharpModule = await import("sharp");
|
|
163
|
+
const sharp = sharpModule.default || sharpModule;
|
|
164
|
+
const image = sharp(buffer);
|
|
165
|
+
const metadata = await image.metadata();
|
|
166
|
+
const metadataRecord = metadata;
|
|
167
|
+
width = metadataRecord["width"] ?? 0;
|
|
168
|
+
height = metadataRecord["height"] ?? 0;
|
|
169
|
+
} catch (metadataError) {
|
|
170
|
+
const error = metadataError;
|
|
171
|
+
console.warn(`[Guten OCR] Unable to read image metadata via sharp: ${error.message}`);
|
|
172
|
+
}
|
|
173
|
+
const result = await this.ocr.detect(buffer);
|
|
174
|
+
const textLines = result.map((line) => line.text);
|
|
175
|
+
const content = textLines.join("\n");
|
|
176
|
+
const avgConfidence = result.length > 0 ? result.reduce((sum, line) => sum + line.mean, 0) / result.length : 0;
|
|
177
|
+
return {
|
|
178
|
+
content,
|
|
179
|
+
mime_type: "text/plain",
|
|
180
|
+
metadata: {
|
|
181
|
+
width,
|
|
182
|
+
height,
|
|
183
|
+
confidence: avgConfidence,
|
|
184
|
+
text_regions: result.length,
|
|
185
|
+
language
|
|
186
|
+
},
|
|
187
|
+
tables: []
|
|
188
|
+
};
|
|
189
|
+
} catch (e) {
|
|
190
|
+
const error = e;
|
|
191
|
+
throw new Error(`Guten OCR processing failed: ${error.message}`);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
export {
|
|
196
|
+
GutenOcrBackend
|
|
197
|
+
};
|
|
198
|
+
//# sourceMappingURL=guten-ocr.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../typescript/ocr/guten-ocr.ts"],"sourcesContent":["/**\n * Guten OCR backend for document OCR processing.\n *\n * This module provides integration with @gutenye/ocr-node for optical character recognition.\n * Guten OCR uses PaddleOCR models via ONNX Runtime for high-performance text extraction.\n *\n * @module ocr/guten-ocr\n */\n\nimport type { OcrBackendProtocol } from \"../types.js\";\n\n/**\n * Text line detected by Guten OCR.\n */\ninterface TextLine {\n\ttext: string;\n\tmean: number;\n\tbox: number[][];\n}\n\n/**\n * Guten OCR instance interface.\n */\ninterface GutenOcr {\n\tdetect(imagePath: string | Buffer, options?: { onnxOptions?: unknown }): Promise<TextLine[]>;\n}\n\n/**\n * Guten OCR module interface.\n */\ninterface GutenOcrModule {\n\tcreate(options?: {\n\t\tmodels?: {\n\t\t\tdetectionPath: string;\n\t\t\trecognitionPath: string;\n\t\t\tdictionaryPath: string;\n\t\t};\n\t\tisDebug?: boolean;\n\t\tdebugOutputDir?: string;\n\t\tonnxOptions?: unknown;\n\t}): Promise<GutenOcr>;\n}\n\n/**\n * Guten OCR backend for OCR processing.\n *\n * This backend uses @gutenye/ocr-node for text extraction from images.\n * It uses PaddleOCR models via ONNX Runtime for efficient processing.\n *\n * ## Installation\n *\n * Install the optional dependency:\n * ```bash\n * npm install @gutenye/ocr-node\n * # or\n * pnpm add @gutenye/ocr-node\n * # or\n * bun add @gutenye/ocr-node\n * ```\n *\n * ## Usage\n *\n * ```typescript\n * import { GutenOcrBackend } from '@kreuzberg/node/ocr/guten-ocr';\n * import { registerOcrBackend, extractFile } from '@kreuzberg/node';\n *\n * // Create and register the backend\n * const backend = new GutenOcrBackend();\n * await backend.initialize();\n * registerOcrBackend(backend);\n *\n * // Extract with OCR enabled\n * const result = await extractFile('scanned.pdf', null, {\n * ocr: { backend: 'guten-ocr', language: 'en' },\n * });\n * console.log(result.content);\n * ```\n *\n * ## Supported Languages\n *\n * Guten OCR supports multiple languages via different model configurations.\n * The default models support English (\"en\") and Chinese (\"ch_sim\", \"ch_tra\").\n *\n * @example\n * ```typescript\n * // Basic usage with default settings\n * const backend = new GutenOcrBackend();\n * await backend.initialize();\n *\n * // Custom model configuration\n * const customBackend = new GutenOcrBackend({\n * models: {\n * detectionPath: './models/detection.onnx',\n * recognitionPath: './models/recognition.onnx',\n * dictionaryPath: './models/dict.txt'\n * }\n * });\n * await customBackend.initialize();\n * ```\n */\nexport class GutenOcrBackend implements OcrBackendProtocol {\n\tprivate ocr: GutenOcr | null = null;\n\tprivate ocrModule: GutenOcrModule | null = null;\n\tprivate options?: {\n\t\tmodels?: {\n\t\t\tdetectionPath: string;\n\t\t\trecognitionPath: string;\n\t\t\tdictionaryPath: string;\n\t\t};\n\t\tisDebug?: boolean;\n\t\tdebugOutputDir?: string;\n\t\tonnxOptions?: unknown;\n\t};\n\n\t/**\n\t * Create a new Guten OCR backend.\n\t *\n\t * @param options - Optional configuration for Guten OCR\n\t * @param options.models - Custom model paths (default: uses bundled models)\n\t * @param options.isDebug - Enable debug mode (default: false)\n\t * @param options.debugOutputDir - Directory for debug output (default: undefined)\n\t * @param options.onnxOptions - Custom ONNX Runtime options (default: undefined)\n\t *\n\t * @example\n\t * ```typescript\n\t * // Default configuration\n\t * const backend = new GutenOcrBackend();\n\t *\n\t * // With debug enabled\n\t * const debugBackend = new GutenOcrBackend({\n\t * isDebug: true,\n\t * debugOutputDir: './ocr_debug'\n\t * });\n\t * ```\n\t */\n\tconstructor(options?: {\n\t\tmodels?: {\n\t\t\tdetectionPath: string;\n\t\t\trecognitionPath: string;\n\t\t\tdictionaryPath: string;\n\t\t};\n\t\tisDebug?: boolean;\n\t\tdebugOutputDir?: string;\n\t\tonnxOptions?: unknown;\n\t}) {\n\t\tif (options !== undefined) {\n\t\t\tthis.options = options;\n\t\t}\n\t}\n\n\t/**\n\t * Get the backend name.\n\t *\n\t * @returns Backend name (\"guten-ocr\")\n\t */\n\tname(): string {\n\t\treturn \"guten-ocr\";\n\t}\n\n\t/**\n\t * Get list of supported language codes.\n\t *\n\t * Guten OCR supports multiple languages depending on the model configuration.\n\t * The default models support English and Chinese.\n\t *\n\t * @returns Array of ISO 639-1/2 language codes\n\t */\n\tsupportedLanguages(): string[] {\n\t\treturn [\"en\", \"eng\", \"ch_sim\", \"ch_tra\", \"chinese\"];\n\t}\n\n\t/**\n\t * Initialize the OCR backend.\n\t *\n\t * This method loads the Guten OCR module and creates an OCR instance.\n\t * Call this before using processImage().\n\t *\n\t * @throws {Error} If @gutenye/ocr-node is not installed\n\t * @throws {Error} If OCR initialization fails\n\t *\n\t * @example\n\t * ```typescript\n\t * const backend = new GutenOcrBackend();\n\t * await backend.initialize();\n\t * ```\n\t */\n\tasync initialize(): Promise<void> {\n\t\tif (this.ocr !== null) {\n\t\t\treturn;\n\t\t}\n\n\t\ttry {\n\t\t\tthis.ocrModule = await import(\"@gutenye/ocr-node\").then((m) => (m.default || m) as GutenOcrModule);\n\t\t} catch (e) {\n\t\t\tconst error = e as Error;\n\t\t\tthrow new Error(\n\t\t\t\t`Guten OCR support requires the '@gutenye/ocr-node' package. ` +\n\t\t\t\t\t`Install with: npm install @gutenye/ocr-node. ` +\n\t\t\t\t\t`Error: ${error.message}`,\n\t\t\t);\n\t\t}\n\n\t\ttry {\n\t\t\tthis.ocr = (await this.ocrModule?.create(this.options)) ?? null;\n\t\t} catch (e) {\n\t\t\tconst error = e as Error;\n\t\t\tthrow new Error(`Failed to initialize Guten OCR: ${error.message}`);\n\t\t}\n\t}\n\n\t/**\n\t * Shutdown the backend and release resources.\n\t *\n\t * This method cleans up all resources associated with the backend,\n\t * including the GutenOCR instance and module references.\n\t *\n\t * @example\n\t * ```typescript\n\t * const backend = new GutenOcrBackend();\n\t * await backend.initialize();\n\t * // ... use backend ...\n\t * await backend.shutdown();\n\t * ```\n\t */\n\tasync shutdown(): Promise<void> {\n\t\tif (this.ocr !== null) {\n\t\t\tthis.ocr = null;\n\t\t}\n\n\t\tif (this.ocrModule !== null) {\n\t\t\tthis.ocrModule = null;\n\t\t}\n\t}\n\n\t/**\n\t * Process image bytes and extract text using Guten OCR.\n\t *\n\t * This method:\n\t * 1. Decodes the image using sharp (if pixel data is needed) or passes bytes directly\n\t * 2. Runs OCR detection to find text regions\n\t * 3. Runs OCR recognition on each text region\n\t * 4. Returns extracted text with metadata\n\t *\n\t * @param imageBytes - Raw image data (PNG, JPEG, TIFF, etc.)\n\t * @param language - Language code (must be in supportedLanguages())\n\t * @returns Promise resolving to OCR result with content and metadata\n\t *\n\t * @throws {Error} If backend is not initialized\n\t * @throws {Error} If OCR processing fails\n\t *\n\t * @example\n\t * ```typescript\n\t * import { readFile } from 'fs/promises';\n\t *\n\t * const backend = new GutenOcrBackend();\n\t * await backend.initialize();\n\t *\n\t * const imageBytes = await readFile('scanned.png');\n\t * const result = await backend.processImage(imageBytes, 'en');\n\t * console.log(result.content);\n\t * console.log(result.metadata.confidence);\n\t * ```\n\t */\n\tasync processImage(\n\t\timageBytes: Uint8Array | string,\n\t\tlanguage: string,\n\t): Promise<{\n\t\tcontent: string;\n\t\tmime_type: string;\n\t\tmetadata: {\n\t\t\twidth: number;\n\t\t\theight: number;\n\t\t\tconfidence: number;\n\t\t\ttext_regions: number;\n\t\t\tlanguage: string;\n\t\t};\n\t\ttables: never[];\n\t}> {\n\t\tif (this.ocr === null) {\n\t\t\tawait this.initialize();\n\t\t}\n\n\t\tif (this.ocr === null) {\n\t\t\tthrow new Error(\"Guten OCR backend failed to initialize\");\n\t\t}\n\n\t\ttry {\n\t\t\tconst buffer = typeof imageBytes === \"string\" ? Buffer.from(imageBytes, \"base64\") : Buffer.from(imageBytes);\n\n\t\t\t// biome-ignore lint/complexity/useLiteralKeys: required for strict TypeScript noUncheckedIndexedAccess\n\t\t\tconst debugEnv = process.env[\"KREUZBERG_DEBUG_GUTEN\"];\n\t\t\tif (debugEnv === \"1\") {\n\t\t\t\tconst header = Array.from(buffer.subarray(0, 8));\n\t\t\t\tconsole.log(\"[Guten OCR] Debug input header:\", header);\n\t\t\t\tconsole.log(\n\t\t\t\t\t\"[Guten OCR] Buffer?\",\n\t\t\t\t\tBuffer.isBuffer(buffer),\n\t\t\t\t\t\"constructor\",\n\t\t\t\t\timageBytes?.constructor?.name,\n\t\t\t\t\t\"length\",\n\t\t\t\t\tbuffer.length,\n\t\t\t\t\t\"type\",\n\t\t\t\t\ttypeof imageBytes,\n\t\t\t\t);\n\t\t\t}\n\n\t\t\tlet width = 0;\n\t\t\tlet height = 0;\n\n\t\t\ttry {\n\t\t\t\tconst sharpModule = await import(\"sharp\");\n\t\t\t\tconst sharp = (sharpModule as unknown as { default?: unknown }).default || sharpModule;\n\t\t\t\tconst image = (sharp as (buffer: Buffer) => { metadata: () => Promise<Record<string, unknown>> })(buffer);\n\t\t\t\tconst metadata = await image.metadata();\n\t\t\t\tconst metadataRecord = metadata as Record<string, unknown>;\n\t\t\t\t// biome-ignore lint/complexity/useLiteralKeys: TypeScript TS4111 requires bracket notation for index signature properties\n\t\t\t\twidth = (metadataRecord[\"width\"] as number | undefined) ?? 0;\n\t\t\t\t// biome-ignore lint/complexity/useLiteralKeys: TypeScript TS4111 requires bracket notation for index signature properties\n\t\t\t\theight = (metadataRecord[\"height\"] as number | undefined) ?? 0;\n\t\t\t} catch (metadataError) {\n\t\t\t\tconst error = metadataError as Error;\n\t\t\t\tconsole.warn(`[Guten OCR] Unable to read image metadata via sharp: ${error.message}`);\n\t\t\t}\n\n\t\t\tconst result = await this.ocr.detect(buffer);\n\n\t\t\tconst textLines = result.map((line) => line.text);\n\t\t\tconst content = textLines.join(\"\\n\");\n\n\t\t\tconst avgConfidence = result.length > 0 ? result.reduce((sum, line) => sum + line.mean, 0) / result.length : 0;\n\n\t\t\treturn {\n\t\t\t\tcontent,\n\t\t\t\tmime_type: \"text/plain\",\n\t\t\t\tmetadata: {\n\t\t\t\t\twidth,\n\t\t\t\t\theight,\n\t\t\t\t\tconfidence: avgConfidence,\n\t\t\t\t\ttext_regions: result.length,\n\t\t\t\t\tlanguage,\n\t\t\t\t},\n\t\t\t\ttables: [],\n\t\t\t};\n\t\t} catch (e) {\n\t\t\tconst error = e as Error;\n\t\t\tthrow new Error(`Guten OCR processing failed: ${error.message}`);\n\t\t}\n\t}\n}\n"],"mappings":"AAoGO,MAAM,gBAA8C;AAAA,EAClD,MAAuB;AAAA,EACvB,YAAmC;AAAA,EACnC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgCR,YAAY,SAST;AACF,QAAI,YAAY,QAAW;AAC1B,WAAK,UAAU;AAAA,IAChB;AAAA,EACD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,OAAe;AACd,WAAO;AAAA,EACR;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,qBAA+B;AAC9B,WAAO,CAAC,MAAM,OAAO,UAAU,UAAU,SAAS;AAAA,EACnD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBA,MAAM,aAA4B;AACjC,QAAI,KAAK,QAAQ,MAAM;AACtB;AAAA,IACD;AAEA,QAAI;AACH,WAAK,YAAY,MAAM,OAAO,mBAAmB,EAAE,KAAK,CAAC,MAAO,EAAE,WAAW,CAAoB;AAAA,IAClG,SAAS,GAAG;AACX,YAAM,QAAQ;AACd,YAAM,IAAI;AAAA,QACT,mHAEW,MAAM,OAAO;AAAA,MACzB;AAAA,IACD;AAEA,QAAI;AACH,WAAK,MAAO,MAAM,KAAK,WAAW,OAAO,KAAK,OAAO,KAAM;AAAA,IAC5D,SAAS,GAAG;AACX,YAAM,QAAQ;AACd,YAAM,IAAI,MAAM,mCAAmC,MAAM,OAAO,EAAE;AAAA,IACnE;AAAA,EACD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBA,MAAM,WAA0B;AAC/B,QAAI,KAAK,QAAQ,MAAM;AACtB,WAAK,MAAM;AAAA,IACZ;AAEA,QAAI,KAAK,cAAc,MAAM;AAC5B,WAAK,YAAY;AAAA,IAClB;AAAA,EACD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA+BA,MAAM,aACL,YACA,UAYE;AACF,QAAI,KAAK,QAAQ,MAAM;AACtB,YAAM,KAAK,WAAW;AAAA,IACvB;AAEA,QAAI,KAAK,QAAQ,MAAM;AACtB,YAAM,IAAI,MAAM,wCAAwC;AAAA,IACzD;AAEA,QAAI;AACH,YAAM,SAAS,OAAO,eAAe,WAAW,OAAO,KAAK,YAAY,QAAQ,IAAI,OAAO,KAAK,UAAU;AAG1G,YAAM,WAAW,QAAQ,IAAI,uBAAuB;AACpD,UAAI,aAAa,KAAK;AACrB,cAAM,SAAS,MAAM,KAAK,OAAO,SAAS,GAAG,CAAC,CAAC;AAC/C,gBAAQ,IAAI,mCAAmC,MAAM;AACrD,gBAAQ;AAAA,UACP;AAAA,UACA,OAAO,SAAS,MAAM;AAAA,UACtB;AAAA,UACA,YAAY,aAAa;AAAA,UACzB;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,OAAO;AAAA,QACR;AAAA,MACD;AAEA,UAAI,QAAQ;AACZ,UAAI,SAAS;AAEb,UAAI;AACH,cAAM,cAAc,MAAM,OAAO,OAAO;AACxC,cAAM,QAAS,YAAiD,WAAW;AAC3E,cAAM,QAAS,MAAmF,MAAM;AACxG,cAAM,WAAW,MAAM,MAAM,SAAS;AACtC,cAAM,iBAAiB;AAEvB,gBAAS,eAAe,OAAO,KAA4B;AAE3D,iBAAU,eAAe,QAAQ,KAA4B;AAAA,MAC9D,SAAS,eAAe;AACvB,cAAM,QAAQ;AACd,gBAAQ,KAAK,wDAAwD,MAAM,OAAO,EAAE;AAAA,MACrF;AAEA,YAAM,SAAS,MAAM,KAAK,IAAI,OAAO,MAAM;AAE3C,YAAM,YAAY,OAAO,IAAI,CAAC,SAAS,KAAK,IAAI;AAChD,YAAM,UAAU,UAAU,KAAK,IAAI;AAEnC,YAAM,gBAAgB,OAAO,SAAS,IAAI,OAAO,OAAO,CAAC,KAAK,SAAS,MAAM,KAAK,MAAM,CAAC,IAAI,OAAO,SAAS;AAE7G,aAAO;AAAA,QACN;AAAA,QACA,WAAW;AAAA,QACX,UAAU;AAAA,UACT;AAAA,UACA;AAAA,UACA,YAAY;AAAA,UACZ,cAAc,OAAO;AAAA,UACrB;AAAA,QACD;AAAA,QACA,QAAQ,CAAC;AAAA,MACV;AAAA,IACD,SAAS,GAAG;AACX,YAAM,QAAQ;AACd,YAAM,IAAI,MAAM,gCAAgC,MAAM,OAAO,EAAE;AAAA,IAChE;AAAA,EACD;AACD;","names":[]}
|