@luii/node-tesseract-ocr 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -49
- package/dist/cjs/index.cjs +132 -18
- package/dist/cjs/index.d.ts +72 -3
- package/dist/cjs/utils.d.ts +1 -0
- package/dist/cjs/utils.js +23 -0
- package/dist/esm/index.d.ts +72 -3
- package/dist/esm/index.mjs +122 -14
- package/dist/esm/utils.d.ts +1 -0
- package/dist/esm/utils.js +10 -0
- package/package.json +10 -8
- package/prebuilds/node-tesseract-ocr-darwin-arm64/node-napi-v10.node +0 -0
- package/prebuilds/node-tesseract-ocr-linux-x64/node-napi-v10.node +0 -0
- package/src/commands.hpp +0 -1
- package/src/tesseract_wrapper.cpp +18 -5
- package/src/tesseract_wrapper.hpp +0 -2
package/README.md
CHANGED
|
@@ -16,7 +16,6 @@ Native C++ addon for Node.js that exposes Tesseract OCR (`libtesseract-dev`) to
|
|
|
16
16
|
- [Enums](#enums)
|
|
17
17
|
- [Types](#types)
|
|
18
18
|
- [Tesseract API](#tesseract-api)
|
|
19
|
-
- [Example](#example)
|
|
20
19
|
- [License](#license)
|
|
21
20
|
- [Special Thanks](#special-thanks)
|
|
22
21
|
|
|
@@ -25,6 +24,7 @@ Native C++ addon for Node.js that exposes Tesseract OCR (`libtesseract-dev`) to
|
|
|
25
24
|
- Native bindings to Tesseract (prebuilds via `pkg-prebuilds`)
|
|
26
25
|
- Access to Tesseract enums and configuration from TypeScript
|
|
27
26
|
- Progress callback and multiple output formats
|
|
27
|
+
- Lazy download of missing traineddata (configurable)
|
|
28
28
|
|
|
29
29
|
## Prerequisites
|
|
30
30
|
|
|
@@ -33,7 +33,7 @@ Native C++ addon for Node.js that exposes Tesseract OCR (`libtesseract-dev`) to
|
|
|
33
33
|
- c++ build toolchain (e.g. build-essentials)
|
|
34
34
|
- libtesseract-dev
|
|
35
35
|
- libleptonica-dev
|
|
36
|
-
- Tesseract training data (eng, deu, ...)
|
|
36
|
+
- Tesseract training data (eng, deu, ...) or let the library handle that
|
|
37
37
|
|
|
38
38
|
> See [Install](#install)
|
|
39
39
|
|
|
@@ -59,7 +59,9 @@ Install additional languages as needed, for example:
|
|
|
59
59
|
sudo apt install -y tesseract-ocr-deu tesseract-ocr-eng tesseract-ocr-jpn
|
|
60
60
|
```
|
|
61
61
|
|
|
62
|
-
If you install traineddata files manually, make sure `
|
|
62
|
+
If you install traineddata files manually, make sure `TESSDATA_PREFIX` points to the directory that contains them (for example `/usr/share/tessdata`).
|
|
63
|
+
|
|
64
|
+
If traineddata is missing, this package will download it lazily during `init` by default. You can control this behavior via `ensureTraineddata`, `cachePath`, and `dataPath`.
|
|
63
65
|
|
|
64
66
|
## Build
|
|
65
67
|
|
|
@@ -73,12 +75,14 @@ npm run build:release
|
|
|
73
75
|
|
|
74
76
|
## Start
|
|
75
77
|
|
|
76
|
-
Set `
|
|
78
|
+
Set `TESSDATA_PREFIX` to your traineddata directory (usually `/usr/share/tesseract-ocr/5/tessdata` or `/usr/share/tessdata`).
|
|
77
79
|
|
|
78
80
|
```sh
|
|
79
|
-
env
|
|
81
|
+
env TESSDATA_PREFIX=/usr/share/tessdata node path/to/your/app.js
|
|
80
82
|
```
|
|
81
83
|
|
|
84
|
+
If you prefer automatic downloads, you can skip setting `TESSDATA_PREFIX` and let the default cache directory handle traineddata on first use.
|
|
85
|
+
|
|
82
86
|
## Scripts
|
|
83
87
|
|
|
84
88
|
```bash
|
|
@@ -86,9 +90,6 @@ env NODE_TESSERACT_DATAPATH=/usr/share/tesseract-ocr/5/tessdata node path/to/you
|
|
|
86
90
|
npm run build:debug
|
|
87
91
|
npm run build:release
|
|
88
92
|
|
|
89
|
-
# Build precompiled binaries for distribution
|
|
90
|
-
npm run prebuild
|
|
91
|
-
|
|
92
93
|
# Run the JS example (builds debug first)
|
|
93
94
|
npm run example:recognize
|
|
94
95
|
|
|
@@ -100,8 +101,73 @@ npm run test:js:watch
|
|
|
100
101
|
|
|
101
102
|
## Examples
|
|
102
103
|
|
|
104
|
+
### Run Included Example
|
|
105
|
+
|
|
103
106
|
```sh
|
|
104
|
-
env
|
|
107
|
+
env TESSDATA_PREFIX=/usr/share/tessdata npm run example:recognize
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Basic OCR (Local Traineddata)
|
|
111
|
+
|
|
112
|
+
You can find a similar example in the `examples/` folder of the project.
|
|
113
|
+
|
|
114
|
+
```ts
|
|
115
|
+
import fs from "node:fs";
|
|
116
|
+
import Tesseract, { OcrEngineModes } from "node-tesseract-ocr";
|
|
117
|
+
|
|
118
|
+
process.env.TESSDATA_PREFIX = "/usr/share/tessdata/";
|
|
119
|
+
|
|
120
|
+
async function main() {
|
|
121
|
+
const tesseract = new Tesseract();
|
|
122
|
+
await tesseract.init({
|
|
123
|
+
langs: ["eng"],
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
const buffer = fs.readFileSync("example1.png");
|
|
127
|
+
await tesseract.setImage(buffer);
|
|
128
|
+
await tesseract.recognize((info) => {
|
|
129
|
+
console.log(`Progress: ${info.percent}%`);
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
const text = await tesseract.getUTF8Text();
|
|
133
|
+
console.log(text);
|
|
134
|
+
|
|
135
|
+
await tesseract.end();
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
main().catch((err) => {
|
|
139
|
+
console.error(err);
|
|
140
|
+
process.exit(1);
|
|
141
|
+
});
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Lazy Traineddata Download (Default)
|
|
145
|
+
|
|
146
|
+
```ts
|
|
147
|
+
import fs from "node:fs";
|
|
148
|
+
import Tesseract from "node-tesseract-ocr";
|
|
149
|
+
|
|
150
|
+
async function main() {
|
|
151
|
+
const tesseract = new Tesseract();
|
|
152
|
+
await tesseract.init({
|
|
153
|
+
langs: ["eng"],
|
|
154
|
+
ensureTraineddata: true
|
|
155
|
+
dataPath: './tessdata-local'
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
const buffer = fs.readFileSync("example1.png");
|
|
159
|
+
await tesseract.setImage(buffer);
|
|
160
|
+
await tesseract.recognize();
|
|
161
|
+
const text = await tesseract.getUTF8Text();
|
|
162
|
+
console.log(text);
|
|
163
|
+
|
|
164
|
+
await tesseract.end();
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
main().catch((err) => {
|
|
168
|
+
console.error(err);
|
|
169
|
+
process.exit(1);
|
|
170
|
+
});
|
|
105
171
|
```
|
|
106
172
|
|
|
107
173
|
## Public API
|
|
@@ -151,13 +217,17 @@ Full list of page segmentation modes from Tesseract.
|
|
|
151
217
|
|
|
152
218
|
#### `TesseractInitOptions`
|
|
153
219
|
|
|
154
|
-
| Field | Type | Optional | Default
|
|
155
|
-
| ----------------------- | ----------------------------------------------------------------------------------------------------- | -------- |
|
|
156
|
-
| `
|
|
157
|
-
| `oem` | [`OcrEngineMode`](#ocrenginemode) | Yes | `undefined`
|
|
158
|
-
| `vars` | `Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>` | Yes | `undefined`
|
|
159
|
-
| `configs` | `Array<string>` | Yes | `undefined`
|
|
160
|
-
| `setOnlyNonDebugParams` | `boolean` | Yes | `undefined`
|
|
220
|
+
| Field | Type | Optional | Default | Description |
|
|
221
|
+
| ----------------------- | ----------------------------------------------------------------------------------------------------- | -------- | -------------------------------------- | --------------------------------------- |
|
|
222
|
+
| `langs` | [`Language[]`](#availablelanguages) | Yes | `undefined` | Languages to load as an array. |
|
|
223
|
+
| `oem` | [`OcrEngineMode`](#ocrenginemode) | Yes | `undefined` | OCR engine mode. |
|
|
224
|
+
| `vars` | `Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>` | Yes | `undefined` | Variables to set. |
|
|
225
|
+
| `configs` | `Array<string>` | Yes | `undefined` | Tesseract config files to apply. |
|
|
226
|
+
| `setOnlyNonDebugParams` | `boolean` | Yes | `undefined` | If true, only non-debug params are set. |
|
|
227
|
+
| `ensureTraineddata` | `boolean` | Yes | `true` | Download missing traineddata lazily. |
|
|
228
|
+
| `cachePath` | `string` | Yes | `~/.cache/node-tesseract-ocr/tessdata` | Cache directory for downloads. |
|
|
229
|
+
| `dataPath` | `string` | Yes | `TESSDATA_PREFIX` or `cachePath` | Directory used by Tesseract for data. |
|
|
230
|
+
| `progressCallback` | `(info: TrainingDataDownloadProgress) => void` | Yes | `undefined` | Download progress callback. |
|
|
161
231
|
|
|
162
232
|
#### `TesseractSetRectangleOptions`
|
|
163
233
|
|
|
@@ -464,39 +534,6 @@ Ends the instance.
|
|
|
464
534
|
end(): Promise<void>
|
|
465
535
|
```
|
|
466
536
|
|
|
467
|
-
## Example
|
|
468
|
-
|
|
469
|
-
You can find a similar example in the `examples/` folder of the project
|
|
470
|
-
|
|
471
|
-
```ts
|
|
472
|
-
import fs from "node:fs";
|
|
473
|
-
import Tesseract, { OcrEngineModes } from "node-tesseract-ocr";
|
|
474
|
-
|
|
475
|
-
async function main() {
|
|
476
|
-
const tesseract = new Tesseract();
|
|
477
|
-
await tesseract.init({
|
|
478
|
-
lang: ["eng"],
|
|
479
|
-
oem: OcrEngineModes.OEM_LSTM_ONLY,
|
|
480
|
-
});
|
|
481
|
-
|
|
482
|
-
const buffer = fs.readFileSync("example1.png");
|
|
483
|
-
await tesseract.setImage(buffer);
|
|
484
|
-
await tesseract.recognize((info) => {
|
|
485
|
-
console.log(`Progress: ${info.percent}%`);
|
|
486
|
-
});
|
|
487
|
-
|
|
488
|
-
const text = await tesseract.getUTF8Text();
|
|
489
|
-
console.log(text);
|
|
490
|
-
|
|
491
|
-
await tesseract.end();
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
main().catch((err) => {
|
|
495
|
-
console.error(err);
|
|
496
|
-
process.exit(1);
|
|
497
|
-
});
|
|
498
|
-
```
|
|
499
|
-
|
|
500
537
|
## License
|
|
501
538
|
|
|
502
539
|
Apache-2.0. See [`LICENSE.md`](/LICENSE.md) for full terms.
|
package/dist/cjs/index.cjs
CHANGED
|
@@ -23,6 +23,9 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
23
23
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
24
24
|
});
|
|
25
25
|
};
|
|
26
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
27
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
28
|
+
};
|
|
26
29
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
27
30
|
exports.NativeTesseract = exports.Tesseract = exports.LogLevels = exports.PageSegmentationModes = exports.OcrEngineModes = exports.Language = void 0;
|
|
28
31
|
/**
|
|
@@ -239,14 +242,24 @@ exports.LogLevels = {
|
|
|
239
242
|
FATAL: "50000",
|
|
240
243
|
OFF: "2147483647",
|
|
241
244
|
};
|
|
242
|
-
const
|
|
243
|
-
const
|
|
244
|
-
const
|
|
245
|
-
const
|
|
246
|
-
const
|
|
245
|
+
const node_fs_1 = require("node:fs");
|
|
246
|
+
const promises_1 = require("node:fs/promises");
|
|
247
|
+
const node_os_1 = __importDefault(require("node:os"));
|
|
248
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
249
|
+
const node_stream_1 = require("node:stream");
|
|
250
|
+
const promises_2 = require("node:stream/promises");
|
|
251
|
+
const node_zlib_1 = require("node:zlib");
|
|
252
|
+
const proper_lockfile_1 = require("proper-lockfile");
|
|
253
|
+
const utils_1 = require("./utils");
|
|
254
|
+
const TESSDATA4_BEST = (lang) => `https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0_best_int/`;
|
|
255
|
+
const TESSDATA4 = (lang) => `https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0/`;
|
|
256
|
+
const DEFAULT_CACHE_DIR = node_path_1.default.join(node_os_1.default.homedir(), ".cache", "node-tesseract-ocr", "tessdata");
|
|
257
|
+
const rootFromSource = node_path_1.default.resolve(__dirname, "../../");
|
|
258
|
+
const bindingOptionsFromSource = node_path_1.default.resolve(rootFromSource, "binding-options.js");
|
|
259
|
+
const bindingOptionsPath = (0, node_fs_1.existsSync)(bindingOptionsFromSource)
|
|
247
260
|
? bindingOptionsFromSource
|
|
248
|
-
:
|
|
249
|
-
const prebuildRoot =
|
|
261
|
+
: node_path_1.default.resolve(process.cwd(), "binding-options.js");
|
|
262
|
+
const prebuildRoot = (0, node_fs_1.existsSync)(bindingOptionsFromSource)
|
|
250
263
|
? rootFromSource
|
|
251
264
|
: process.cwd();
|
|
252
265
|
const { Tesseract: NativeTesseract } = require("pkg-prebuilds")(prebuildRoot, require(bindingOptionsPath));
|
|
@@ -255,23 +268,124 @@ class Tesseract extends NativeTesseract {
|
|
|
255
268
|
constructor() {
|
|
256
269
|
super();
|
|
257
270
|
}
|
|
258
|
-
init(
|
|
271
|
+
init() {
|
|
259
272
|
const _super = Object.create(null, {
|
|
260
273
|
init: { get: () => super.init }
|
|
261
274
|
});
|
|
262
|
-
return __awaiter(this,
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
275
|
+
return __awaiter(this, arguments, void 0, function* (options = {}) {
|
|
276
|
+
var _a, _b, _c, _d, _e, _f;
|
|
277
|
+
(_a = options.langs) !== null && _a !== void 0 ? _a : (options.langs = []);
|
|
278
|
+
(_b = options.ensureTraineddata) !== null && _b !== void 0 ? _b : (options.ensureTraineddata = true);
|
|
279
|
+
(_c = options.cachePath) !== null && _c !== void 0 ? _c : (options.cachePath = DEFAULT_CACHE_DIR);
|
|
280
|
+
(_d = options.dataPath) !== null && _d !== void 0 ? _d : (options.dataPath = (_e = process.env.TESSDATA_PREFIX) !== null && _e !== void 0 ? _e : options.cachePath);
|
|
281
|
+
(_f = options.progressCallback) !== null && _f !== void 0 ? _f : (options.progressCallback = undefined);
|
|
282
|
+
const cachePath = node_path_1.default.resolve(options.cachePath);
|
|
283
|
+
const dataPath = node_path_1.default.resolve(options.dataPath);
|
|
284
|
+
if (options.ensureTraineddata) {
|
|
285
|
+
for (const lang of [...options.langs, exports.Language.osd]) {
|
|
286
|
+
const downloadBaseUrl = options.oem === exports.OcrEngineModes.OEM_LSTM_ONLY
|
|
287
|
+
? TESSDATA4_BEST(lang)
|
|
288
|
+
: TESSDATA4(lang);
|
|
289
|
+
lang &&
|
|
290
|
+
(yield this.ensureTrainingData({ lang, dataPath, cachePath, downloadBaseUrl }, options.progressCallback));
|
|
291
|
+
}
|
|
292
|
+
}
|
|
272
293
|
return _super.init.call(this, options);
|
|
273
294
|
});
|
|
274
295
|
}
|
|
296
|
+
ensureTrainingData(_a, progressCallback_1) {
|
|
297
|
+
return __awaiter(this, arguments, void 0, function* ({ lang, dataPath, cachePath, downloadBaseUrl }, progressCallback) {
|
|
298
|
+
const traineddataPath = node_path_1.default.join(dataPath, `${lang}.traineddata`);
|
|
299
|
+
const cacheTraineddataPath = node_path_1.default.join(cachePath, `${lang}.traineddata`);
|
|
300
|
+
if (yield (0, utils_1.isValidTraineddata)(cacheTraineddataPath)) {
|
|
301
|
+
if (traineddataPath !== cacheTraineddataPath) {
|
|
302
|
+
yield (0, promises_1.mkdir)(dataPath, { recursive: true });
|
|
303
|
+
yield (0, promises_1.copyFile)(cacheTraineddataPath, traineddataPath);
|
|
304
|
+
}
|
|
305
|
+
return traineddataPath;
|
|
306
|
+
}
|
|
307
|
+
if (yield (0, utils_1.isValidTraineddata)(traineddataPath)) {
|
|
308
|
+
return traineddataPath;
|
|
309
|
+
}
|
|
310
|
+
yield (0, promises_1.mkdir)(dataPath, { recursive: true });
|
|
311
|
+
const release = yield (0, proper_lockfile_1.lock)(traineddataPath, {
|
|
312
|
+
lockfilePath: `${traineddataPath}.lock`,
|
|
313
|
+
stale: 10 * 60 * 1000,
|
|
314
|
+
update: 30 * 1000,
|
|
315
|
+
realpath: false,
|
|
316
|
+
retries: { retries: 50, minTimeout: 200, maxTimeout: 2000 },
|
|
317
|
+
});
|
|
318
|
+
try {
|
|
319
|
+
if (yield (0, utils_1.isValidTraineddata)(traineddataPath)) {
|
|
320
|
+
return traineddataPath;
|
|
321
|
+
}
|
|
322
|
+
if (traineddataPath !== cacheTraineddataPath &&
|
|
323
|
+
(yield (0, utils_1.isValidTraineddata)(cacheTraineddataPath))) {
|
|
324
|
+
yield (0, promises_1.copyFile)(cacheTraineddataPath, traineddataPath);
|
|
325
|
+
return traineddataPath;
|
|
326
|
+
}
|
|
327
|
+
const url = new URL(`${lang}.traineddata.gz`, downloadBaseUrl).toString();
|
|
328
|
+
const response = yield fetch(url);
|
|
329
|
+
if (!response.ok || !response.body) {
|
|
330
|
+
throw new Error(`Failed to download traineddata for ${lang}: ${response.status} ${response.statusText}`);
|
|
331
|
+
}
|
|
332
|
+
const tmpPath = node_path_1.default.join(node_os_1.default.tmpdir(), [
|
|
333
|
+
"node-tesseract-ocr",
|
|
334
|
+
lang,
|
|
335
|
+
"traineddata",
|
|
336
|
+
process.pid,
|
|
337
|
+
Date.now(),
|
|
338
|
+
Math.random().toString(36).slice(2),
|
|
339
|
+
].join("-"));
|
|
340
|
+
const totalBytesHeader = response.headers.get("content-length");
|
|
341
|
+
const totalBytes = totalBytesHeader
|
|
342
|
+
? Number(totalBytesHeader)
|
|
343
|
+
: undefined;
|
|
344
|
+
let downloadedBytes = 0;
|
|
345
|
+
const progressStream = new node_stream_1.Transform({
|
|
346
|
+
transform(chunk, _, callback) {
|
|
347
|
+
if (progressCallback) {
|
|
348
|
+
downloadedBytes += chunk.length;
|
|
349
|
+
const percent = typeof totalBytes === "number" && Number.isFinite(totalBytes)
|
|
350
|
+
? (downloadedBytes / totalBytes) * 100
|
|
351
|
+
: undefined;
|
|
352
|
+
progressCallback({
|
|
353
|
+
lang,
|
|
354
|
+
url,
|
|
355
|
+
downloadedBytes,
|
|
356
|
+
totalBytes: Number.isFinite(totalBytes) ? totalBytes : undefined,
|
|
357
|
+
percent,
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
callback(null, chunk);
|
|
361
|
+
},
|
|
362
|
+
});
|
|
363
|
+
try {
|
|
364
|
+
yield (0, promises_2.pipeline)(node_stream_1.Readable.fromWeb(response.body), progressStream, (0, node_zlib_1.createGunzip)(), (0, node_fs_1.createWriteStream)(tmpPath));
|
|
365
|
+
try {
|
|
366
|
+
yield (0, promises_1.rename)(tmpPath, traineddataPath);
|
|
367
|
+
}
|
|
368
|
+
catch (error) {
|
|
369
|
+
if (error.code === "EXDEV") {
|
|
370
|
+
yield (0, promises_1.copyFile)(tmpPath, traineddataPath);
|
|
371
|
+
yield (0, promises_1.rm)(tmpPath, { force: true });
|
|
372
|
+
}
|
|
373
|
+
else {
|
|
374
|
+
throw error;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
catch (error) {
|
|
379
|
+
yield (0, promises_1.rm)(tmpPath, { force: true });
|
|
380
|
+
throw error;
|
|
381
|
+
}
|
|
382
|
+
return traineddataPath;
|
|
383
|
+
}
|
|
384
|
+
finally {
|
|
385
|
+
yield release();
|
|
386
|
+
}
|
|
387
|
+
});
|
|
388
|
+
}
|
|
275
389
|
}
|
|
276
390
|
exports.Tesseract = Tesseract;
|
|
277
391
|
exports.default = Tesseract;
|
package/dist/cjs/index.d.ts
CHANGED
|
@@ -839,20 +839,81 @@ export interface TesseractInitOptions {
|
|
|
839
839
|
/**
|
|
840
840
|
* Its generally safer to use as few languages as possible.
|
|
841
841
|
* The more languages Tesseract needs to load the longer it takes to recognize a image.
|
|
842
|
-
*
|
|
842
|
+
* The OSD Language will always be loaded to support orientation and script detection
|
|
843
|
+
* IMPORTANT: if you specify more than one language here (e.g.: `deu, eng` for example)
|
|
844
|
+
* tesseract will try to recognize german and english in the same image.
|
|
845
|
+
* Originally tesseract itself accepts it as `deu+eng`, but since this
|
|
846
|
+
* makes typing very hard to near impossible its safer to just accept a
|
|
847
|
+
* array with the languages it should look for.
|
|
848
|
+
* When talking about "hard typing/impossible typing" its because typescript
|
|
849
|
+
* itself cannot create recursive types, and chaining template types
|
|
850
|
+
* (e.g.: `${Language}+${Language}+...`) stretches out the compilation time
|
|
851
|
+
* to a unacceptable amount
|
|
852
|
+
*
|
|
853
|
+
* @default [Language.osd]
|
|
843
854
|
*/
|
|
844
|
-
|
|
855
|
+
langs?: Language[];
|
|
856
|
+
/**
|
|
857
|
+
* Specify where the trainingdata is located
|
|
858
|
+
* Besides the datapath in general it is versioned to the
|
|
859
|
+
* version of tesseract
|
|
860
|
+
* @default '~/.cache/node-tesseract-ocr/'
|
|
861
|
+
*/
|
|
862
|
+
cachePath?: string;
|
|
863
|
+
/**
|
|
864
|
+
* Explicit datapath for traineddata. Takes precedence over
|
|
865
|
+
* the `TESSDATA_PREFIX` environment variable.
|
|
866
|
+
*/
|
|
867
|
+
dataPath?: string;
|
|
868
|
+
/**
|
|
869
|
+
* This will be called for every language that was specified in `lang`,
|
|
870
|
+
* it allows the user to be flexible about the training data's location
|
|
871
|
+
* Or if he needs to specify his own location for certain languages/custom languages
|
|
872
|
+
* IMPORTANT: Ensures that trainingdata will be downloaded from the following cdn
|
|
873
|
+
* in case they dont exist
|
|
874
|
+
* OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0_best_int
|
|
875
|
+
* NON OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0
|
|
876
|
+
* NOTE: Tesseract 5.x.x still uses the 4.x.x trainingdata
|
|
877
|
+
*
|
|
878
|
+
* @default true
|
|
879
|
+
*/
|
|
880
|
+
ensureTraineddata?: boolean;
|
|
881
|
+
/**
|
|
882
|
+
* Optional progress callback for traineddata downloads.
|
|
883
|
+
*/
|
|
884
|
+
progressCallback?: (info: TrainingDataDownloadProgress) => void;
|
|
845
885
|
/**
|
|
846
886
|
* OCR Engine Modes
|
|
847
887
|
* The engine mode cannot be changed after creating the instance
|
|
848
888
|
* If another mode is needed, its advised to create a new instance.
|
|
889
|
+
* @default OEM_DEFAULT
|
|
849
890
|
* @throws {Error} Will throw an error when oem mode is below 0 or over 3
|
|
850
891
|
*/
|
|
851
892
|
oem?: OcrEngineMode;
|
|
893
|
+
/**
|
|
894
|
+
* Controls if only non debug parameters will be set upon initialization
|
|
895
|
+
* @default false
|
|
896
|
+
*/
|
|
852
897
|
setOnlyNonDebugParams?: boolean;
|
|
898
|
+
/**
|
|
899
|
+
* Array of paths that point to their corresponding config files
|
|
900
|
+
* usually located in the `dataPath` location alongside the training data
|
|
901
|
+
*/
|
|
853
902
|
configs?: Array<string>;
|
|
903
|
+
/**
|
|
904
|
+
* Record of parameters that should be set upon initialization
|
|
905
|
+
* Consult the original documentation of tesseract on which variables
|
|
906
|
+
* can actually be set
|
|
907
|
+
*/
|
|
854
908
|
vars?: Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>;
|
|
855
909
|
}
|
|
910
|
+
export interface TrainingDataDownloadProgress {
|
|
911
|
+
lang: Language;
|
|
912
|
+
url: string;
|
|
913
|
+
downloadedBytes: number;
|
|
914
|
+
totalBytes?: number;
|
|
915
|
+
percent?: number;
|
|
916
|
+
}
|
|
856
917
|
export interface TesseractSetRectangleOptions {
|
|
857
918
|
top: number;
|
|
858
919
|
left: number;
|
|
@@ -913,6 +974,13 @@ export interface DetectOrientationScriptResult {
|
|
|
913
974
|
*/
|
|
914
975
|
scriptConfidence: number;
|
|
915
976
|
}
|
|
977
|
+
export type EnsureTrainedDataOptions = {
|
|
978
|
+
lang: Language;
|
|
979
|
+
cachePath: string;
|
|
980
|
+
dataPath: string;
|
|
981
|
+
downloadBaseUrl: string;
|
|
982
|
+
progressCallback?: (info: TrainingDataDownloadProgress) => void;
|
|
983
|
+
};
|
|
916
984
|
export interface TesseractInstance {
|
|
917
985
|
/**
|
|
918
986
|
* Initialize the engine with the given options.
|
|
@@ -1063,7 +1131,8 @@ export type TesseractConstructor = new () => TesseractInstance;
|
|
|
1063
1131
|
declare const NativeTesseract: TesseractConstructor;
|
|
1064
1132
|
declare class Tesseract extends NativeTesseract {
|
|
1065
1133
|
constructor();
|
|
1066
|
-
init(options
|
|
1134
|
+
init(options?: TesseractInitOptions): Promise<void>;
|
|
1135
|
+
ensureTrainingData({ lang, dataPath, cachePath, downloadBaseUrl }: EnsureTrainedDataOptions, progressCallback?: (info: TrainingDataDownloadProgress) => void): Promise<string>;
|
|
1067
1136
|
}
|
|
1068
1137
|
export { Tesseract, NativeTesseract };
|
|
1069
1138
|
export default Tesseract;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const isValidTraineddata: (filePath: string) => Promise<boolean>;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.isValidTraineddata = void 0;
|
|
13
|
+
const promises_1 = require("node:fs/promises");
|
|
14
|
+
const isValidTraineddata = (filePath) => __awaiter(void 0, void 0, void 0, function* () {
|
|
15
|
+
try {
|
|
16
|
+
const info = yield (0, promises_1.stat)(filePath);
|
|
17
|
+
return info.isFile() && info.size > 0;
|
|
18
|
+
}
|
|
19
|
+
catch (_a) {
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
exports.isValidTraineddata = isValidTraineddata;
|
package/dist/esm/index.d.ts
CHANGED
|
@@ -839,20 +839,81 @@ export interface TesseractInitOptions {
|
|
|
839
839
|
/**
|
|
840
840
|
* Its generally safer to use as few languages as possible.
|
|
841
841
|
* The more languages Tesseract needs to load the longer it takes to recognize a image.
|
|
842
|
-
*
|
|
842
|
+
* The OSD Language will always be loaded to support orientation and script detection
|
|
843
|
+
* IMPORTANT: if you specify more than one language here (e.g.: `deu, eng` for example)
|
|
844
|
+
* tesseract will try to recognize german and english in the same image.
|
|
845
|
+
* Originally tesseract itself accepts it as `deu+eng`, but since this
|
|
846
|
+
* makes typing very hard to near impossible its safer to just accept a
|
|
847
|
+
* array with the languages it should look for.
|
|
848
|
+
* When talking about "hard typing/impossible typing" its because typescript
|
|
849
|
+
* itself cannot create recursive types, and chaining template types
|
|
850
|
+
* (e.g.: `${Language}+${Language}+...`) stretches out the compilation time
|
|
851
|
+
* to a unacceptable amount
|
|
852
|
+
*
|
|
853
|
+
* @default [Language.osd]
|
|
843
854
|
*/
|
|
844
|
-
|
|
855
|
+
langs?: Language[];
|
|
856
|
+
/**
|
|
857
|
+
* Specify where the trainingdata is located
|
|
858
|
+
* Besides the datapath in general it is versioned to the
|
|
859
|
+
* version of tesseract
|
|
860
|
+
* @default '~/.cache/node-tesseract-ocr/'
|
|
861
|
+
*/
|
|
862
|
+
cachePath?: string;
|
|
863
|
+
/**
|
|
864
|
+
* Explicit datapath for traineddata. Takes precedence over
|
|
865
|
+
* the `TESSDATA_PREFIX` environment variable.
|
|
866
|
+
*/
|
|
867
|
+
dataPath?: string;
|
|
868
|
+
/**
|
|
869
|
+
* This will be called for every language that was specified in `lang`,
|
|
870
|
+
* it allows the user to be flexible about the training data's location
|
|
871
|
+
* Or if he needs to specify his own location for certain languages/custom languages
|
|
872
|
+
* IMPORTANT: Ensures that trainingdata will be downloaded from the following cdn
|
|
873
|
+
* in case they dont exist
|
|
874
|
+
* OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0_best_int
|
|
875
|
+
* NON OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0
|
|
876
|
+
* NOTE: Tesseract 5.x.x still uses the 4.x.x trainingdata
|
|
877
|
+
*
|
|
878
|
+
* @default true
|
|
879
|
+
*/
|
|
880
|
+
ensureTraineddata?: boolean;
|
|
881
|
+
/**
|
|
882
|
+
* Optional progress callback for traineddata downloads.
|
|
883
|
+
*/
|
|
884
|
+
progressCallback?: (info: TrainingDataDownloadProgress) => void;
|
|
845
885
|
/**
|
|
846
886
|
* OCR Engine Modes
|
|
847
887
|
* The engine mode cannot be changed after creating the instance
|
|
848
888
|
* If another mode is needed, its advised to create a new instance.
|
|
889
|
+
* @default OEM_DEFAULT
|
|
849
890
|
* @throws {Error} Will throw an error when oem mode is below 0 or over 3
|
|
850
891
|
*/
|
|
851
892
|
oem?: OcrEngineMode;
|
|
893
|
+
/**
|
|
894
|
+
* Controls if only non debug parameters will be set upon initialization
|
|
895
|
+
* @default false
|
|
896
|
+
*/
|
|
852
897
|
setOnlyNonDebugParams?: boolean;
|
|
898
|
+
/**
|
|
899
|
+
* Array of paths that point to their corresponding config files
|
|
900
|
+
* usually located in the `dataPath` location alongside the training data
|
|
901
|
+
*/
|
|
853
902
|
configs?: Array<string>;
|
|
903
|
+
/**
|
|
904
|
+
* Record of parameters that should be set upon initialization
|
|
905
|
+
* Consult the original documentation of tesseract on which variables
|
|
906
|
+
* can actually be set
|
|
907
|
+
*/
|
|
854
908
|
vars?: Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>;
|
|
855
909
|
}
|
|
910
|
+
export interface TrainingDataDownloadProgress {
|
|
911
|
+
lang: Language;
|
|
912
|
+
url: string;
|
|
913
|
+
downloadedBytes: number;
|
|
914
|
+
totalBytes?: number;
|
|
915
|
+
percent?: number;
|
|
916
|
+
}
|
|
856
917
|
export interface TesseractSetRectangleOptions {
|
|
857
918
|
top: number;
|
|
858
919
|
left: number;
|
|
@@ -913,6 +974,13 @@ export interface DetectOrientationScriptResult {
|
|
|
913
974
|
*/
|
|
914
975
|
scriptConfidence: number;
|
|
915
976
|
}
|
|
977
|
+
export type EnsureTrainedDataOptions = {
|
|
978
|
+
lang: Language;
|
|
979
|
+
cachePath: string;
|
|
980
|
+
dataPath: string;
|
|
981
|
+
downloadBaseUrl: string;
|
|
982
|
+
progressCallback?: (info: TrainingDataDownloadProgress) => void;
|
|
983
|
+
};
|
|
916
984
|
export interface TesseractInstance {
|
|
917
985
|
/**
|
|
918
986
|
* Initialize the engine with the given options.
|
|
@@ -1063,7 +1131,8 @@ export type TesseractConstructor = new () => TesseractInstance;
|
|
|
1063
1131
|
declare const NativeTesseract: TesseractConstructor;
|
|
1064
1132
|
declare class Tesseract extends NativeTesseract {
|
|
1065
1133
|
constructor();
|
|
1066
|
-
init(options
|
|
1134
|
+
init(options?: TesseractInitOptions): Promise<void>;
|
|
1135
|
+
ensureTrainingData({ lang, dataPath, cachePath, downloadBaseUrl }: EnsureTrainedDataOptions, progressCallback?: (info: TrainingDataDownloadProgress) => void): Promise<string>;
|
|
1067
1136
|
}
|
|
1068
1137
|
export { Tesseract, NativeTesseract };
|
|
1069
1138
|
export default Tesseract;
|
package/dist/esm/index.mjs
CHANGED
|
@@ -227,14 +227,24 @@ export const LogLevels = {
|
|
|
227
227
|
FATAL: "50000",
|
|
228
228
|
OFF: "2147483647",
|
|
229
229
|
};
|
|
230
|
-
|
|
231
|
-
|
|
230
|
+
import { existsSync, createWriteStream } from "node:fs";
|
|
231
|
+
import { mkdir, rename, rm, copyFile } from "node:fs/promises";
|
|
232
|
+
import os from "node:os";
|
|
233
|
+
import path from "node:path";
|
|
234
|
+
import { Readable, Transform } from "node:stream";
|
|
235
|
+
import { pipeline } from "node:stream/promises";
|
|
236
|
+
import { createGunzip } from "node:zlib";
|
|
237
|
+
import { lock } from "proper-lockfile";
|
|
238
|
+
import { isValidTraineddata } from "./utils";
|
|
239
|
+
const TESSDATA4_BEST = (lang) => `https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0_best_int/`;
|
|
240
|
+
const TESSDATA4 = (lang) => `https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0/`;
|
|
241
|
+
const DEFAULT_CACHE_DIR = path.join(os.homedir(), ".cache", "node-tesseract-ocr", "tessdata");
|
|
232
242
|
const rootFromSource = path.resolve(__dirname, "../../");
|
|
233
243
|
const bindingOptionsFromSource = path.resolve(rootFromSource, "binding-options.js");
|
|
234
|
-
const bindingOptionsPath =
|
|
244
|
+
const bindingOptionsPath = existsSync(bindingOptionsFromSource)
|
|
235
245
|
? bindingOptionsFromSource
|
|
236
246
|
: path.resolve(process.cwd(), "binding-options.js");
|
|
237
|
-
const prebuildRoot =
|
|
247
|
+
const prebuildRoot = existsSync(bindingOptionsFromSource)
|
|
238
248
|
? rootFromSource
|
|
239
249
|
: process.cwd();
|
|
240
250
|
const { Tesseract: NativeTesseract } = require("pkg-prebuilds")(prebuildRoot, require(bindingOptionsPath));
|
|
@@ -242,18 +252,116 @@ class Tesseract extends NativeTesseract {
|
|
|
242
252
|
constructor() {
|
|
243
253
|
super();
|
|
244
254
|
}
|
|
245
|
-
async init(options) {
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
+
async init(options = {}) {
|
|
256
|
+
options.langs ??= [];
|
|
257
|
+
options.ensureTraineddata ??= true;
|
|
258
|
+
options.cachePath ??= DEFAULT_CACHE_DIR;
|
|
259
|
+
options.dataPath ??= process.env.TESSDATA_PREFIX ?? options.cachePath;
|
|
260
|
+
options.progressCallback ??= undefined;
|
|
261
|
+
const cachePath = path.resolve(options.cachePath);
|
|
262
|
+
const dataPath = path.resolve(options.dataPath);
|
|
263
|
+
if (options.ensureTraineddata) {
|
|
264
|
+
for (const lang of [...options.langs, Language.osd]) {
|
|
265
|
+
const downloadBaseUrl = options.oem === OcrEngineModes.OEM_LSTM_ONLY
|
|
266
|
+
? TESSDATA4_BEST(lang)
|
|
267
|
+
: TESSDATA4(lang);
|
|
268
|
+
lang &&
|
|
269
|
+
(await this.ensureTrainingData({ lang, dataPath, cachePath, downloadBaseUrl }, options.progressCallback));
|
|
270
|
+
}
|
|
271
|
+
}
|
|
255
272
|
return super.init(options);
|
|
256
273
|
}
|
|
274
|
+
async ensureTrainingData({ lang, dataPath, cachePath, downloadBaseUrl }, progressCallback) {
|
|
275
|
+
const traineddataPath = path.join(dataPath, `${lang}.traineddata`);
|
|
276
|
+
const cacheTraineddataPath = path.join(cachePath, `${lang}.traineddata`);
|
|
277
|
+
if (await isValidTraineddata(cacheTraineddataPath)) {
|
|
278
|
+
if (traineddataPath !== cacheTraineddataPath) {
|
|
279
|
+
await mkdir(dataPath, { recursive: true });
|
|
280
|
+
await copyFile(cacheTraineddataPath, traineddataPath);
|
|
281
|
+
}
|
|
282
|
+
return traineddataPath;
|
|
283
|
+
}
|
|
284
|
+
if (await isValidTraineddata(traineddataPath)) {
|
|
285
|
+
return traineddataPath;
|
|
286
|
+
}
|
|
287
|
+
await mkdir(dataPath, { recursive: true });
|
|
288
|
+
const release = await lock(traineddataPath, {
|
|
289
|
+
lockfilePath: `${traineddataPath}.lock`,
|
|
290
|
+
stale: 10 * 60 * 1000,
|
|
291
|
+
update: 30 * 1000,
|
|
292
|
+
realpath: false,
|
|
293
|
+
retries: { retries: 50, minTimeout: 200, maxTimeout: 2000 },
|
|
294
|
+
});
|
|
295
|
+
try {
|
|
296
|
+
if (await isValidTraineddata(traineddataPath)) {
|
|
297
|
+
return traineddataPath;
|
|
298
|
+
}
|
|
299
|
+
if (traineddataPath !== cacheTraineddataPath &&
|
|
300
|
+
(await isValidTraineddata(cacheTraineddataPath))) {
|
|
301
|
+
await copyFile(cacheTraineddataPath, traineddataPath);
|
|
302
|
+
return traineddataPath;
|
|
303
|
+
}
|
|
304
|
+
const url = new URL(`${lang}.traineddata.gz`, downloadBaseUrl).toString();
|
|
305
|
+
const response = await fetch(url);
|
|
306
|
+
if (!response.ok || !response.body) {
|
|
307
|
+
throw new Error(`Failed to download traineddata for ${lang}: ${response.status} ${response.statusText}`);
|
|
308
|
+
}
|
|
309
|
+
const tmpPath = path.join(os.tmpdir(), [
|
|
310
|
+
"node-tesseract-ocr",
|
|
311
|
+
lang,
|
|
312
|
+
"traineddata",
|
|
313
|
+
process.pid,
|
|
314
|
+
Date.now(),
|
|
315
|
+
Math.random().toString(36).slice(2),
|
|
316
|
+
].join("-"));
|
|
317
|
+
const totalBytesHeader = response.headers.get("content-length");
|
|
318
|
+
const totalBytes = totalBytesHeader
|
|
319
|
+
? Number(totalBytesHeader)
|
|
320
|
+
: undefined;
|
|
321
|
+
let downloadedBytes = 0;
|
|
322
|
+
const progressStream = new Transform({
|
|
323
|
+
transform(chunk, _, callback) {
|
|
324
|
+
if (progressCallback) {
|
|
325
|
+
downloadedBytes += chunk.length;
|
|
326
|
+
const percent = typeof totalBytes === "number" && Number.isFinite(totalBytes)
|
|
327
|
+
? (downloadedBytes / totalBytes) * 100
|
|
328
|
+
: undefined;
|
|
329
|
+
progressCallback({
|
|
330
|
+
lang,
|
|
331
|
+
url,
|
|
332
|
+
downloadedBytes,
|
|
333
|
+
totalBytes: Number.isFinite(totalBytes) ? totalBytes : undefined,
|
|
334
|
+
percent,
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
callback(null, chunk);
|
|
338
|
+
},
|
|
339
|
+
});
|
|
340
|
+
try {
|
|
341
|
+
await pipeline(Readable.fromWeb(response.body), progressStream, createGunzip(), createWriteStream(tmpPath));
|
|
342
|
+
try {
|
|
343
|
+
await rename(tmpPath, traineddataPath);
|
|
344
|
+
}
|
|
345
|
+
catch (error) {
|
|
346
|
+
if (error.code === "EXDEV") {
|
|
347
|
+
await copyFile(tmpPath, traineddataPath);
|
|
348
|
+
await rm(tmpPath, { force: true });
|
|
349
|
+
}
|
|
350
|
+
else {
|
|
351
|
+
throw error;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
catch (error) {
|
|
356
|
+
await rm(tmpPath, { force: true });
|
|
357
|
+
throw error;
|
|
358
|
+
}
|
|
359
|
+
return traineddataPath;
|
|
360
|
+
}
|
|
361
|
+
finally {
|
|
362
|
+
await release();
|
|
363
|
+
}
|
|
364
|
+
}
|
|
257
365
|
}
|
|
258
366
|
export { Tesseract, NativeTesseract };
|
|
259
367
|
export default Tesseract;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const isValidTraineddata: (filePath: string) => Promise<boolean>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@luii/node-tesseract-ocr",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"private": false,
|
|
5
5
|
"binary": {
|
|
6
6
|
"napi_versions": [
|
|
@@ -63,15 +63,17 @@
|
|
|
63
63
|
"LICENSE.md"
|
|
64
64
|
],
|
|
65
65
|
"devDependencies": {
|
|
66
|
-
"
|
|
67
|
-
"@types/node": "^22.0.0",
|
|
68
|
-
"typescript": "^5.6.0"
|
|
69
|
-
},
|
|
70
|
-
"dependencies": {
|
|
71
|
-
"cmake-js": "^7.4.0",
|
|
66
|
+
"cmake-js": "^8.0.0",
|
|
72
67
|
"node-addon-api": "^8.5.0",
|
|
68
|
+
"@types/node": "^25.1.0",
|
|
69
|
+
"@types/proper-lockfile": "^4.1.4",
|
|
73
70
|
"dotenv": "^16.4.5",
|
|
74
|
-
"
|
|
71
|
+
"typescript": "^5.6.0",
|
|
72
|
+
"vitest": "^4.0.18"
|
|
73
|
+
},
|
|
74
|
+
"dependencies": {
|
|
75
|
+
"pkg-prebuilds": "^1.0.0",
|
|
76
|
+
"proper-lockfile": "^4.1.2"
|
|
75
77
|
},
|
|
76
78
|
"exports": {
|
|
77
79
|
"require": {
|
|
Binary file
|
|
Binary file
|
package/src/commands.hpp
CHANGED
|
@@ -94,16 +94,29 @@ Napi::Value TesseractWrapper::Init(const Napi::CallbackInfo &info) {
|
|
|
94
94
|
auto options = info[0].As<Napi::Object>();
|
|
95
95
|
CommandInit command{};
|
|
96
96
|
|
|
97
|
-
const Napi::Value
|
|
98
|
-
if (!
|
|
99
|
-
if (!
|
|
97
|
+
const Napi::Value dataPathOption = options.Get("dataPath");
|
|
98
|
+
if (!dataPathOption.IsUndefined()) {
|
|
99
|
+
if (!dataPathOption.IsString()) {
|
|
100
100
|
deferred.Reject(
|
|
101
|
-
Napi::TypeError::New(env, "Option '
|
|
101
|
+
Napi::TypeError::New(env, "Option 'dataPath' must be a string")
|
|
102
102
|
.Value());
|
|
103
103
|
return deferred.Promise();
|
|
104
104
|
}
|
|
105
105
|
|
|
106
|
-
Napi::
|
|
106
|
+
Napi::String dataPath = dataPathOption.As<Napi::String>();
|
|
107
|
+
command.data_path = dataPath.Utf8Value();
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const Napi::Value langsOption = options.Get("langs");
|
|
111
|
+
if (!langsOption.IsUndefined()) {
|
|
112
|
+
if (!langsOption.IsArray()) {
|
|
113
|
+
deferred.Reject(
|
|
114
|
+
Napi::TypeError::New(env, "Option 'langs' must be a array of strings")
|
|
115
|
+
.Value());
|
|
116
|
+
return deferred.Promise();
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
Napi::Array languages = langsOption.As<Napi::Array>();
|
|
107
120
|
std::string language;
|
|
108
121
|
|
|
109
122
|
for (uint32_t i = 0; i < languages.Length(); ++i) {
|