@luii/node-tesseract-ocr 1.0.15 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/binding.gyp ADDED
@@ -0,0 +1,60 @@
1
+ {
2
+ "targets": [
3
+ {
4
+ "target_name": "node-tesseract-ocr",
5
+ "sources": [
6
+ "src/addon.cpp",
7
+ "src/ocr_result.cpp",
8
+ "src/ocr_worker.cpp",
9
+ "src/handle.cpp"
10
+ ],
11
+ "include_dirs": [
12
+ "<!(node -p \"require('node-addon-api').include\")"
13
+ ],
14
+ "dependencies": [
15
+ "<!(node -p \"require('node-addon-api').targets\"):node_addon_api",
16
+ ],
17
+ "cflags_cc": [
18
+ "-std=c++23",
19
+ "-Wall",
20
+ "-Wextra",
21
+ "-Wpedantic",
22
+ "-Wconversion",
23
+ "-Werror",
24
+ "-fexceptions"
25
+ ],
26
+ "defines": [
27
+ "NODE_ADDON_API_DISABLE_DEPRECATED",
28
+ ],
29
+ "conditions": [
30
+ [
31
+ "OS=='linux'",
32
+ {
33
+ "include_dirs": [
34
+ "<!(node -p \"require('node-addon-api').include\")",
35
+ "<!@(pkg-config --cflags-only-I tesseract lept | sed -e 's/-I//g')"
36
+ ],
37
+ "libraries": [
38
+ "<!@(pkg-config --libs tesseract lept)"
39
+ ]
40
+ }
41
+ ],
42
+ [
43
+ "OS=='mac'",
44
+ {
45
+ "include_dirs": [
46
+ "<!(node -p \"require('node-addon-api').include\")",
47
+ "/opt/homebrew/include",
48
+ "/usr/local/include"
49
+ ],
50
+ "libraries": [
51
+ "-L/opt/homebrew/lib",
52
+ "-L/usr/local/lib",
53
+ "-ltesseract"
54
+ ]
55
+ }
56
+ ],
57
+ ]
58
+ }
59
+ ]
60
+ }
@@ -0,0 +1,351 @@
1
+ /*
2
+ * node-tesseract-ocr
3
+ * Copyright (C) 2025 Philipp Czarnetzki
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, either version 3 of the License, or
8
+ * (at your option) any later version.
9
+ *
10
+ * This program is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ * GNU Affero General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Affero General Public License
16
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ declare module 'node-tesseract-ocr' {
20
+
21
+ /**
22
+ * All available languages for tesseract
23
+ * @readonly
24
+ * @enum {string}
25
+ */
26
+ export enum AvailableLanguages {
27
+ afr = 'afr',
28
+ amh = 'amh',
29
+ ara = 'ara',
30
+ asm = 'asm',
31
+ aze = 'aze',
32
+ aze_cyrl = 'aze_cyrl',
33
+ bel = 'bel',
34
+ ben = 'ben',
35
+ bod = 'bod',
36
+ bos = 'bos',
37
+ bre = 'bre',
38
+ bul = 'bul',
39
+ cat = 'cat',
40
+ ceb = 'ceb',
41
+ ces = 'ces',
42
+ chi_sim = 'chi_sim',
43
+ chi_tra = 'chi_tra',
44
+ chr = 'chr',
45
+ cos = 'cos',
46
+ cym = 'cym',
47
+ dan = 'dan',
48
+ deu = 'deu',
49
+ deu_latf = 'deu_latf',
50
+ div = 'div',
51
+ dzo = 'dzo',
52
+ ell = 'ell',
53
+ eng = 'eng',
54
+ enm = 'enm',
55
+ epo = 'epo',
56
+ equ = 'equ',
57
+ est = 'est',
58
+ eus = 'eus',
59
+ fao = 'fao',
60
+ fas = 'fas',
61
+ fil = 'fil',
62
+ fin = 'fin',
63
+ fra = 'fra',
64
+ frm = 'frm',
65
+ fry = 'fry',
66
+ gla = 'gla',
67
+ gle = 'gle',
68
+ glg = 'glg',
69
+ grc = 'grc',
70
+ guj = 'guj',
71
+ hat = 'hat',
72
+ heb = 'heb',
73
+ hin = 'hin',
74
+ hrv = 'hrv',
75
+ hun = 'hun',
76
+ hye = 'hye',
77
+ iku = 'iku',
78
+ ind = 'ind',
79
+ isl = 'isl',
80
+ ita = 'ita',
81
+ ita_old = 'ita_old',
82
+ jav = 'jav',
83
+ jpn = 'jpn',
84
+ kan = 'kan',
85
+ kat = 'kat',
86
+ kat_old = 'kat_old',
87
+ kaz = 'kaz',
88
+ khm = 'khm',
89
+ kir = 'kir',
90
+ kmr = 'kmr',
91
+ kor = 'kor',
92
+ kor_vert = 'kor_vert',
93
+ kur = 'kur',
94
+ lao = 'lao',
95
+ lat = 'lat',
96
+ lav = 'lav',
97
+ lit = 'lit',
98
+ ltz = 'ltz',
99
+ mal = 'mal',
100
+ mar = 'mar',
101
+ mkd = 'mkd',
102
+ mlt = 'mlt',
103
+ mon = 'mon',
104
+ mri = 'mri',
105
+ msa = 'msa',
106
+ mya = 'mya',
107
+ nep = 'nep',
108
+ nld = 'nld',
109
+ nor = 'nor',
110
+ oci = 'oci',
111
+ ori = 'ori',
112
+ osd = 'osd',
113
+ pan = 'pan',
114
+ pol = 'pol',
115
+ por = 'por',
116
+ pus = 'pus',
117
+ que = 'que',
118
+ ron = 'ron',
119
+ rus = 'rus',
120
+ san = 'san',
121
+ sin = 'sin',
122
+ slk = 'slk',
123
+ slv = 'slv',
124
+ snd = 'snd',
125
+ spa = 'spa',
126
+ spa_old = 'spa_old',
127
+ sqi = 'sqi',
128
+ srp = 'srp',
129
+ srp_latn = 'srp_latn',
130
+ sun = 'sun',
131
+ swa = 'swa',
132
+ swe = 'swe',
133
+ syr = 'syr',
134
+ tam = 'tam',
135
+ tat = 'tat',
136
+ tel = 'tel',
137
+ tgk = 'tgk',
138
+ tha = 'tha',
139
+ tir = 'tir',
140
+ ton = 'ton',
141
+ tur = 'tur',
142
+ uig = 'uig',
143
+ ukr = 'ukr',
144
+ urd = 'urd',
145
+ uzb = 'uzb',
146
+ uzb_cyrl = 'uzb_cyrl',
147
+ vie = 'vie',
148
+ yid = 'yid',
149
+ yor = 'yor'
150
+ }
151
+
152
+ /**
153
+ * When Tesseract/Cube is initialized we can choose to instantiate/load/run
154
+ * only the Tesseract part, only the Cube part or both along with the combiner.
155
+ * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
156
+ * @readonly
157
+ * @enum {number}
158
+ */
159
+ export enum OcrEngineMode {
160
+ // Run Tesseract only - fastest; deprecated
161
+ OEM_TESSERACT_ONLY = 0,
162
+ // Run just the LSTM line recognizer.
163
+ OEM_LSTM_ONLY = 1,
164
+ // Run the LSTM recognizer, but allow fallback
165
+ // to Tesseract when things get difficult.
166
+ // deprecated
167
+ OEM_TESSERACT_LSTM_COMBINED = 2,
168
+ // Specify this mode when calling init_*(),
169
+ // to indicate that any of the above modes
170
+ // should be automatically inferred from the
171
+ // variables in the language-specific config,
172
+ // command-line configs, or if not specified
173
+ // in any of the above should be set to the
174
+ // default OEM_TESSERACT_ONLY.
175
+ OEM_DEFAULT = 3,
176
+ }
177
+
178
+
179
+ /**
180
+ * Possible modes for page layout analysis.
181
+ * @readonly
182
+ * @enum {number}
183
+ */
184
+ export enum PageSegmentationMode {
185
+ // Orientation and script detection only.
186
+ PSM_OSD_ONLY = 0,
187
+ // Automatic page segmentation with orientation and script detection. (OSD)
188
+ PSM_AUTO_OSD = 1,
189
+ // Automatic page segmentation, but no OSD, or OCR.
190
+ PSM_AUTO_ONLY = 2,
191
+ // Fully automatic page segmentation, but no OSD.
192
+ PSM_AUTO = 3,
193
+ // Assume a single column of text of variable sizes.
194
+ PSM_SINGLE_COLUMN = 4,
195
+ // Assume a single uniform block of vertically aligned text.
196
+ PSM_SINGLE_BLOCK_VERT_TEXT = 5,
197
+ // Assume a single uniform block of text. (Default.)
198
+ PSM_SINGLE_BLOCK = 6,
199
+ // Treat the image as a single text line.
200
+ PSM_SINGLE_LINE = 7,
201
+ // Treat the image as a single word.
202
+ PSM_SINGLE_WORD = 8,
203
+ // Treat the image as a single word in a circle.
204
+ PSM_CIRCLE_WORD = 9,
205
+ // Treat the image as a single character.
206
+ PSM_SINGLE_CHAR = 10,
207
+ // Find as much text as possible in no particular order.
208
+ PSM_SPARSE_TEXT = 11,
209
+ // Sparse text with orientation and script det.
210
+ PSM_SPARSE_TEXT_OSD = 12,
211
+ // Treat the image as a single text line, bypassing hacks that are Tesseract-specific.
212
+ PSM_RAW_LINE = 13,
213
+ }
214
+
215
+ /**
216
+ * Tesseract constructor options
217
+ */
218
+ export interface TesseractOptions {
219
+ /**
220
+ * Its generally safer to use as few languages as possible.
221
+ * The more languages Tesseract needs to load the longer it takes to recognize a image.
222
+ * @public
223
+ */
224
+ lang: Array<keyof typeof AvailableLanguages>;
225
+
226
+ /**
227
+ * Skip Ocr for when you only want to (for example) analyze the layout
228
+ * @property {boolean} [skipOcr]
229
+ */
230
+ skipOcr?: boolean;
231
+
232
+ /**
233
+ * OCR Engine Modes
234
+ * The engine mode cannot be changed after creating the instance
235
+ * If another mode is needed, its advised to create a new instance.
236
+ * @throws {Error} Will throw an error when oem mode is below 0 or over 3
237
+ */
238
+ oemMode?: OcrEngineMode;
239
+
240
+ /**
241
+ * Page Segmentation Modes
242
+ * The page segmentation mode cannot be changed after creating the instance
243
+ * If another mode is needed, its advised to create a new instance.
244
+ * @throws {Error} Will throw a error when psm mode is below 0 or over 13
245
+ */
246
+ psm?: PageSegmentationMode;
247
+ }
248
+
249
+ export interface ProgressChangedInfo {
250
+
251
+ /**
252
+ * Chars in this buffer
253
+ */
254
+ progress: number;
255
+
256
+ /**
257
+ * Percent complete increasing (0-100)
258
+ */
259
+ percent: number;
260
+
261
+ /**
262
+ * States if the worker is still alive
263
+ */
264
+ ocrAlive: number;
265
+
266
+ /**
267
+ * top coordinate of the bbox of the current element that tesseract is processing
268
+ */
269
+ top: number;
270
+
271
+ /**
272
+ * right coordinate of the bbox of the current element that tesseract is processing
273
+ */
274
+ right: number;
275
+
276
+ /**
277
+ * bottom coordinate of the bbox of the current element that tesseract is processing
278
+ */
279
+ bottom: number;
280
+
281
+ /**
282
+ * left coordinate of the bbox of the current element that tesseract is processing
283
+ */
284
+ left: number;
285
+ }
286
+
287
+ /**
288
+ * Progress callback thats called during `recognize`
289
+ */
290
+ export type RecognizeProgressChangedCallback = (info: ProgressChangedInfo) => void;
291
+
292
+ export interface RecognizeOptions {
293
+
294
+ /**
295
+ * @property {RecognizeProgressChangedCallback} [progressChanged]
296
+ */
297
+ progressChanged?: RecognizeProgressChangedCallback;
298
+ }
299
+
300
+ export interface RecognizeResult {
301
+
302
+ /**
303
+ * Returns the recognized text as utf8
304
+ * @returns {string}
305
+ */
306
+ getText: () => string;
307
+
308
+ /**
309
+ * Returns the recognized text as tsv format (Tab seperated values)
310
+ * @returns {string}
311
+ */
312
+ getTSV: () => string;
313
+
314
+ /**
315
+ * Returns the recognized results in an hOCR format.
316
+ * hOCR is a derivative of the XML format and can be parsed as such.
317
+ * The hOCR format contains layout information, so that
318
+ * it could be overlayed over the source material for example.
319
+ * @see https://en.wikipedia.org/wiki/HOCR
320
+ * @returns {string}
321
+ */
322
+ getHOCR: () => string;
323
+
324
+ /**
325
+ * Returns the recognized results in an ALTO format.
326
+ * ALTO is a derivative of the XML format and can be parsed as such.
327
+ * The ALTO format contains layout information, so that
328
+ * it could be overlayed over the source material for example.
329
+ * @see https://en.wikipedia.org/wiki/Analyzed_Layout_and_Text_Object
330
+ * @returns {string}
331
+ */
332
+ getALTO: () => string;
333
+ }
334
+
335
+ export class Tesseract {
336
+ constructor(options: TesseractOptions);
337
+
338
+ /**
339
+ * @throws {Error} Will throw an error if no available underlying api is available
340
+ * @throws {Error} Will throw an error if skipOcr is on and this function is called
341
+ * @throws {Error} Will throw an error if the parameter at index 0 is not a buffer
342
+ * @param {Buffer<ArrayBuffer>} imageBuffer The image that should be recognized
343
+ * @param {RecognizeOptions} [options] Optional options,
344
+ * @returns {Promise<RecognizeResult>} Returns a result object containing
345
+ * various functions to retrieve the results
346
+ */
347
+ recognize: (imageBuffer: Buffer<ArrayBuffer>, options?: RecognizeOptions) => Promise<RecognizeResult>;
348
+ }
349
+
350
+ export default Tesseract;
351
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@luii/node-tesseract-ocr",
3
- "version": "1.0.15",
3
+ "version": "1.0.18",
4
4
  "private": false,
5
5
  "gypfile": true,
6
6
  "main": "dist/cjs/index.cjs",
@@ -41,7 +41,8 @@
41
41
  "build": "npm run build:release",
42
42
  "build:addon:debug": "node-gyp rebuild --debug",
43
43
  "build:addon:release": "node-gyp rebuild --release",
44
- "build:ts": "npm run build:cjs && npm run build:esm",
44
+ "build:types": "mkdir -p dist && cp lib/index.d.ts dist/index.d.ts",
45
+ "build:ts": "npm run build:cjs && npm run build:esm && npm run build:types",
45
46
  "build:cjs": "tsc -p tsconfig.cjs.json && mv dist/cjs/index.js dist/cjs/index.cjs",
46
47
  "build:esm": "tsc -p tsconfig.esm.json && mv dist/esm/index.js dist/esm/index.mjs",
47
48
  "build:debug": "rm -rf dist && mkdir dist && npm run build:addon:debug && npm run build:cjs && npm run build:esm",
@@ -52,9 +53,12 @@
52
53
  "files": [
53
54
  "dist/**",
54
55
  "prebuilds/**",
56
+ "src/**",
55
57
  "build/Release/*.node",
56
58
  "package.json",
57
- "README.md"
59
+ "binding.gyp",
60
+ "README.md",
61
+ "LICENSE.md"
58
62
  ],
59
63
  "devDependencies": {
60
64
  "@types/node": "^22.0.0",
@@ -70,7 +74,7 @@
70
74
  ".": {
71
75
  "import": "./dist/esm/index.mjs",
72
76
  "require": "./dist/cjs/index.cjs",
73
- "types": "./lib/index.d.ts"
77
+ "types": "./dist/index.d.ts"
74
78
  }
75
79
  }
76
80
  }
package/src/addon.cpp ADDED
@@ -0,0 +1,26 @@
1
+ /*
2
+ * node-tesseract-ocr
3
+ * Copyright (C) 2025 Philipp Czarnetzki
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, either version 3 of the License, or
8
+ * (at your option) any later version.
9
+ *
10
+ * This program is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ * GNU Affero General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Affero General Public License
16
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ #include "handle.h"
20
+ #include <napi.h>
21
+
22
+ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
23
+ return Handle::GetClass(env, exports);
24
+ }
25
+
26
+ NODE_API_MODULE(hello, InitAll)
package/src/handle.cpp ADDED
@@ -0,0 +1,172 @@
1
+ /*
2
+ * node-tesseract-ocr
3
+ * Copyright (C) 2025 Philipp Czarnetzki
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, either version 3 of the License, or
8
+ * (at your option) any later version.
9
+ *
10
+ * This program is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ * GNU Affero General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Affero General Public License
16
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ #include "handle.h"
20
+ #include "napi.h"
21
+ #include "ocr_worker.h"
22
+ #include <tesseract/pageiterator.h>
23
+ #include <tesseract/publictypes.h>
24
+
25
+ Napi::Object Handle::GetClass(Napi::Env env, Napi::Object exports) {
26
+ Napi::Function funcs = DefineClass(
27
+ env, "Tesseract", {InstanceMethod("recognize", &Handle::Recognize)});
28
+
29
+ Napi::FunctionReference *constructor = new Napi::FunctionReference();
30
+
31
+ *constructor = Napi::Persistent(funcs);
32
+ exports.Set("Tesseract", funcs);
33
+ env.SetInstanceData<Napi::FunctionReference>(constructor);
34
+
35
+ return exports;
36
+ };
37
+
38
+ Handle::Handle(const Napi::CallbackInfo &info)
39
+ : Napi::ObjectWrap<Handle>(info) {
40
+
41
+ Napi::Env env = info.Env();
42
+
43
+ if (info.Length() == 1 && info[0].IsObject()) {
44
+ auto ctorOptions = info[0].As<Napi::Object>();
45
+
46
+ const Napi::Value skipOcrOption = ctorOptions.Get("skipOcr");
47
+ if (!skipOcrOption.IsUndefined() && skipOcrOption.IsBoolean()) {
48
+ skipOcr_ = skipOcrOption.As<Napi::Boolean>().Value();
49
+ }
50
+
51
+ const Napi::Value dataPathOption = ctorOptions.Get("dataPath");
52
+ if (!dataPathOption.IsUndefined() && dataPathOption.IsString()) {
53
+ dataPath_ = dataPathOption.As<Napi::String>().Utf8Value();
54
+ }
55
+
56
+ const Napi::Value langOption = ctorOptions.Get("lang");
57
+ if (!langOption.IsUndefined() && langOption.IsString()) {
58
+ lang_ = langOption.As<Napi::String>().Utf8Value();
59
+ }
60
+
61
+ const Napi::Value engineModeOption = ctorOptions.Get("engineMode");
62
+ if (!engineModeOption.IsUndefined() && engineModeOption.IsNumber()) {
63
+ oemMode_ = static_cast<tesseract::OcrEngineMode>(
64
+ engineModeOption.As<Napi::Number>().Int32Value());
65
+ }
66
+
67
+ const Napi::Value psmOption = ctorOptions.Get("psm");
68
+ if (!psmOption.IsUndefined() && psmOption.IsNumber()) {
69
+ psm_ = static_cast<tesseract::PageSegMode>(
70
+ psmOption.As<Napi::Number>().Int32Value());
71
+ }
72
+
73
+ if (oemMode_ < 0 || oemMode_ >= tesseract::OEM_COUNT) {
74
+ Napi::TypeError::New(env, "Unsupported OCR Engine Mode")
75
+ .ThrowAsJavaScriptException();
76
+ return;
77
+ }
78
+
79
+ if (psm_ < 0 || psm_ >= tesseract::PSM_COUNT) {
80
+ Napi::TypeError::New(env, "Unsupported Page Segmentation Mode")
81
+ .ThrowAsJavaScriptException();
82
+ return;
83
+ }
84
+ }
85
+ }
86
+
87
+ Handle::~Handle() {}
88
+
89
+ std::unique_ptr<tesseract::TessBaseAPI> Handle::CreateApi() {
90
+ auto api = std::make_unique<tesseract::TessBaseAPI>();
91
+ if (skipOcr_) {
92
+ api->InitForAnalysePage();
93
+ } else {
94
+ if (api->Init(dataPath_.c_str(), lang_.c_str(), oemMode_) == -1) {
95
+ api->End();
96
+ return nullptr;
97
+ }
98
+ }
99
+
100
+ api->SetPageSegMode(static_cast<tesseract::PageSegMode>(psm_));
101
+ return api;
102
+ }
103
+
104
+ Napi::Value Handle::Recognize(const Napi::CallbackInfo &info) {
105
+ const Napi::Env env = info.Env();
106
+ const Napi::Promise::Deferred deffered = Napi::Promise::Deferred::New(env);
107
+
108
+ if (skipOcr_) {
109
+ deffered.Reject(Napi::Error::New(env, "OCR not available when handle was "
110
+ "created with `skipOcr` turned on")
111
+ .Value());
112
+ return deffered.Promise();
113
+ }
114
+
115
+ if (info.Length() <= 0 || !info[0].IsBuffer()) {
116
+ deffered.Reject(
117
+ Napi::TypeError::New(env, "Expected argument at index 0 to be a Buffer")
118
+ .Value());
119
+ return deffered.Promise();
120
+ }
121
+
122
+ Napi::Function progressCallback = Napi::Function();
123
+ if (info.Length() == 2 && info[1].IsObject()) {
124
+ const Napi::Object recognizeOptions = info[1].As<Napi::Object>();
125
+ const Napi::Value progressChangedOption =
126
+ recognizeOptions.Get("progressChanged");
127
+ if (!progressChangedOption.IsUndefined() &&
128
+ progressChangedOption.IsFunction()) {
129
+ progressCallback = progressChangedOption.As<Napi::Function>();
130
+ }
131
+ }
132
+
133
+ auto imageBuffer = info[0].As<Napi::Buffer<uint8_t>>();
134
+ auto *pWorker = new OCRWorker(this, info.This().As<Napi::Object>(),
135
+ imageBuffer, deffered, progressCallback);
136
+
137
+ pWorker->Queue();
138
+
139
+ return deffered.Promise();
140
+ }
141
+
142
+ // Napi::Value Handle::AnalyzeLayout(const Napi::CallbackInfo &info) {
143
+ // const Napi::Env env = info.Env();
144
+ // ArgParser args(info);
145
+ //
146
+ // const Napi::Promise::Deferred deffered = Napi::Promise::Deferred::New(env);
147
+ //
148
+ // if (!skipOcr_) {
149
+ // deffered.Reject(
150
+ // Napi::Error::New(
151
+ // env, "Page analysis not available unless `skipOcr` is turned on")
152
+ // .Value());
153
+ //
154
+ // return deffered.Promise();
155
+ // }
156
+ //
157
+ // if (info.Length() < 1 || !info[0].IsBoolean()) {
158
+ // deffered.Reject(Napi::TypeError::New(
159
+ // info.Env(), "Expected first argument to be a
160
+ // boolean") .Value());
161
+ // return deffered.Promise();
162
+ // }
163
+ //
164
+ // bool merge_similar_words = info[0].As<Napi::Boolean>().Value();
165
+ //
166
+ // api_->SetImage();
167
+ //
168
+ // tesseract::PageIterator *iterator =
169
+ // api_->AnalyseLayout(merge_similar_words); return;
170
+ // }
171
+
172
+ std::mutex &Handle::Mutex() { return mutex_; }
package/src/handle.h ADDED
@@ -0,0 +1,59 @@
1
+ /*
2
+ * node-tesseract-ocr
3
+ * Copyright (C) 2025 Philipp Czarnetzki
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, either version 3 of the License, or
8
+ * (at your option) any later version.
9
+ *
10
+ * This program is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ * GNU Affero General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Affero General Public License
16
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ #ifndef HANDLE_H
20
+ #define HANDLE_H
21
+
22
+ #include "napi.h"
23
+ #include <cstdint>
24
+ #include <cstdlib>
25
+ #include <memory>
26
+ #include <mutex>
27
+ #include <string>
28
+ #include <tesseract/baseapi.h>
29
+ #include <tesseract/ocrclass.h>
30
+ #include <tesseract/publictypes.h>
31
+
32
+ using Napi::CallbackInfo;
33
+
34
+ class Handle : public Napi::ObjectWrap<Handle> {
35
+ public:
36
+ static Napi::Object GetClass(Napi::Env env, Napi::Object exports);
37
+ Handle(const Napi::CallbackInfo &info);
38
+ ~Handle();
39
+
40
+ std::unique_ptr<tesseract::TessBaseAPI> CreateApi();
41
+ std::mutex &Mutex();
42
+ tesseract::ETEXT_DESC *Monitor();
43
+
44
+ private:
45
+ bool skipOcr_ = false;
46
+
47
+ std::string dataPath_ = std::getenv("NODE_TESSERACT_DATAPATH");
48
+ std::string lang_ = "eng";
49
+ tesseract::OcrEngineMode oemMode_ = tesseract::OEM_DEFAULT;
50
+ tesseract::PageSegMode psm_ = tesseract::PSM_SINGLE_BLOCK;
51
+
52
+ std::unique_ptr<tesseract::TessBaseAPI> api_;
53
+ std::mutex mutex_;
54
+
55
+ Napi::Value Recognize(const CallbackInfo &info);
56
+ Napi::Value AnalyzeLayout(const CallbackInfo &info);
57
+ };
58
+
59
+ #endif // HANDLE_H
@@ -0,0 +1,101 @@
1
+ /*
2
+ * node-tesseract-ocr
3
+ * Copyright (C) 2025 Philipp Czarnetzki
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, either version 3 of the License, or
8
+ * (at your option) any later version.
9
+ *
10
+ * This program is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ * GNU Affero General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Affero General Public License
16
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ #include "ocr_result.h"
20
+ #include "napi.h"
21
+ #include <mutex>
22
+ #include <tesseract/baseapi.h>
23
+ #include <tesseract/ocrclass.h>
24
+
25
+ Napi::Function OCRResult::GetClass(Napi::Env env) {
26
+ return DefineClass(env, "OCRResult",
27
+ {
28
+ InstanceMethod("getText", &OCRResult::GetText),
29
+ InstanceMethod("getHOCR", &OCRResult::GetHOCR),
30
+ InstanceMethod("getTSV", &OCRResult::GetTSV),
31
+ InstanceMethod("getALTO", &OCRResult::GetALTO),
32
+ });
33
+ }
34
+
35
+ OCRResult::OCRResult(const Napi::CallbackInfo &info)
36
+ : Napi::ObjectWrap<OCRResult>(info), handle_(nullptr) {
37
+ // Expect constructor args: text, hocr, tsv (all strings)
38
+ if (info.Length() >= 1 && info[0].IsString()) {
39
+ text_ = info[0].As<Napi::String>().Utf8Value();
40
+ }
41
+ if (info.Length() >= 2 && info[1].IsString()) {
42
+ hocr_ = info[1].As<Napi::String>().Utf8Value();
43
+ }
44
+ if (info.Length() >= 3 && info[2].IsString()) {
45
+ tsv_ = info[2].As<Napi::String>().Utf8Value();
46
+ }
47
+ if (info.Length() >= 4 && info[3].IsString()) {
48
+ alto_ = info[3].As<Napi::String>().Utf8Value();
49
+ }
50
+ }
51
+
52
+ Napi::Object OCRResult::NewInstance(Napi::Env env, const std::string &text,
53
+ const std::string &hocr,
54
+ const std::string &tsv,
55
+ const std::string &alto) {
56
+ Napi::EscapableHandleScope scope(env);
57
+
58
+ Napi::Function ctor = OCRResult::GetClass(env);
59
+ Napi::Object obj =
60
+ ctor.New({Napi::String::New(env, text), Napi::String::New(env, hocr),
61
+ Napi::String::New(env, tsv), Napi::String::New(env, alto)});
62
+
63
+ return scope.Escape(obj).As<Napi::Object>();
64
+ }
65
+
66
+ void OCRResult::Cancel(const Napi::CallbackInfo &info) {
67
+ // Napi::Env env = info.Env();
68
+
69
+ {
70
+ std::lock_guard<std::mutex> lock(handle_->Mutex());
71
+ tesseract::ETEXT_DESC *monitor = handle_->Monitor();
72
+
73
+ monitor->cancel = [](void *should_cancel, int wordcount) -> bool {
74
+ return true;
75
+ };
76
+ monitor->cancel_this = (void *)true;
77
+ monitor->cancel(monitor->cancel_this, monitor->count);
78
+ }
79
+
80
+ return;
81
+ }
82
+
83
+ Napi::Value OCRResult::GetText(const Napi::CallbackInfo &info) {
84
+ Napi::Env env = info.Env();
85
+ return Napi::String::New(env, text_);
86
+ }
87
+
88
+ Napi::Value OCRResult::GetHOCR(const Napi::CallbackInfo &info) {
89
+ Napi::Env env = info.Env();
90
+ return Napi::String::New(env, hocr_);
91
+ }
92
+
93
+ Napi::Value OCRResult::GetTSV(const Napi::CallbackInfo &info) {
94
+ Napi::Env env = info.Env();
95
+ return Napi::String::New(env, tsv_);
96
+ }
97
+
98
+ Napi::Value OCRResult::GetALTO(const Napi::CallbackInfo &info) {
99
+ Napi::Env env = info.Env();
100
+ return Napi::String::New(env, alto_);
101
+ }
@@ -0,0 +1,49 @@
1
+ /*
2
+ * node-tesseract-ocr
3
+ * Copyright (C) 2025 Philipp Czarnetzki
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, either version 3 of the License, or
8
+ * (at your option) any later version.
9
+ *
10
+ * This program is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ * GNU Affero General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Affero General Public License
16
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ #ifndef OCRRESULT_H
20
+ #define OCRRESULT_H
21
+
22
+ #include "handle.h"
23
+ #include <napi.h>
24
+
25
+ class OCRResult : public Napi::ObjectWrap<OCRResult> {
26
+
27
+ public:
28
+ OCRResult(const Napi::CallbackInfo &info);
29
+ static Napi::Function GetClass(Napi::Env env);
30
+ static Napi::Object NewInstance(Napi::Env env, const std::string &text,
31
+ const std::string &hocr,
32
+ const std::string &tsv,
33
+ const std::string &alto);
34
+
35
+ private:
36
+ void Cancel(const CallbackInfo &info);
37
+ Napi::Value GetText(const CallbackInfo &info);
38
+ Napi::Value GetHOCR(const CallbackInfo &info);
39
+ Napi::Value GetTSV(const CallbackInfo &info);
40
+ Napi::Value GetALTO(const CallbackInfo &info);
41
+
42
+ Handle *handle_;
43
+ std::string text_;
44
+ std::string hocr_;
45
+ std::string tsv_;
46
+ std::string alto_;
47
+ };
48
+
49
+ #endif // OCRRESULT_H
@@ -0,0 +1,193 @@
1
+ /*
2
+ * node-tesseract-ocr
3
+ * Copyright (C) 2025 Philipp Czarnetzki
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, either version 3 of the License, or
8
+ * (at your option) any later version.
9
+ *
10
+ * This program is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ * GNU Affero General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Affero General Public License
16
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ #include "ocr_worker.h"
20
+ #include "napi.h"
21
+ #include "ocr_result.h"
22
+ #include <cstddef>
23
+ #include <cstdio>
24
+ #include <tesseract/baseapi.h>
25
+ #include <tesseract/ocrclass.h>
26
+
27
+ OCRWorker::OCRWorker(Handle *handle, Napi::Object handleObject,
28
+ Napi::Buffer<uint8_t> buffer,
29
+ Napi::Promise::Deferred deffered,
30
+ Napi::Function &progressCallback)
31
+ : Napi::AsyncProgressWorker<ProgressPayload>{handle->Env(), "OCRWorker"},
32
+ handle_(handle), data_(buffer.Data()), length_(buffer.Length()),
33
+ deffered_{deffered} {
34
+
35
+ this->progressCallback_.Reset(progressCallback, 1);
36
+
37
+ // Hold a persistent reference to the JS Buffer.
38
+ // So its memory isn't freed while the async worker is running.
39
+ this->bufferRef_.Reset(buffer, 1);
40
+ // Hold a persistent reference to the JS Handle object.
41
+ // So it isn't GC'd while this worker is active.
42
+ this->handleRef_.Reset(handleObject, 1);
43
+
44
+ // allocate and zero-init the monitor to avoid uninitialized fields
45
+ monitor_ = new tesseract::ETEXT_DESC();
46
+
47
+ monitor_->progress_callback2 = [](tesseract::ETEXT_DESC *monitor, int left,
48
+ int right, int top, int bottom) -> bool {
49
+ ProgressPayload payload{monitor->more_to_come,
50
+ monitor->progress,
51
+ monitor->ocr_alive,
52
+ top,
53
+ right,
54
+ bottom,
55
+ left};
56
+ auto *executionProgress =
57
+ static_cast<OCRWorker::ExecutionProgress *>(monitor->cancel_this);
58
+ if (executionProgress == nullptr) {
59
+ return false;
60
+ }
61
+ executionProgress->Send(&payload, 1);
62
+ return false;
63
+ };
64
+ monitor_->ocr_alive = 1;
65
+ };
66
+
67
+ OCRWorker::~OCRWorker() {
68
+ // release the persistent buffer reference
69
+ if (!this->bufferRef_.IsEmpty()) {
70
+ this->bufferRef_.Reset();
71
+ }
72
+
73
+ // free monitor
74
+ if (monitor_) {
75
+ delete monitor_;
76
+ monitor_ = nullptr;
77
+ }
78
+ if (!this->handleRef_.IsEmpty()) {
79
+ this->handleRef_.Reset();
80
+ }
81
+ }
82
+
83
+ void OCRWorker::Execute(const ExecutionProgress &executionProgress) {
84
+
85
+ Pix *pPix = pixReadMem(data_, length_);
86
+
87
+ if (!pPix) {
88
+ SetError("Could not read image from buffer");
89
+ return;
90
+ }
91
+
92
+ // determine image depth and normalize to 8-bit grayscale (if needed)
93
+ int depth = pixGetDepth(pPix);
94
+ if (depth != 8) {
95
+ Pix *pGray = pixConvertTo8(pPix, 0); /* 0 = no colormap */
96
+ if (pGray) {
97
+ pixDestroy(&pPix);
98
+ pPix = pGray;
99
+ }
100
+ }
101
+
102
+ std::unique_ptr<tesseract::TessBaseAPI> localApi;
103
+ {
104
+ std::lock_guard<std::mutex> lock(handle_->Mutex());
105
+
106
+ // Create a thread-local TessBaseAPI to avoid sharing across threads;
107
+ localApi = handle_->CreateApi();
108
+
109
+ if (!localApi) {
110
+ pixDestroy(&pPix);
111
+ SetError("Could not initialize thread-local Tesseract API");
112
+ return;
113
+ }
114
+
115
+ localApi->SetImage(pPix);
116
+ monitor_->cancel_this = (void *)&executionProgress;
117
+
118
+ if (localApi->Recognize(monitor_) != 0) {
119
+ pixDestroy(&pPix);
120
+ monitor_->cancel_this = nullptr;
121
+ SetError("Recognize failed");
122
+ return;
123
+ }
124
+ }
125
+
126
+ // retrieve results from the local API
127
+ {
128
+ char *t = nullptr;
129
+ t = localApi->GetUTF8Text();
130
+ if (t) {
131
+ resultText_.assign(t);
132
+ delete[] t;
133
+ }
134
+
135
+ t = localApi->GetHOCRText(0);
136
+ if (t) {
137
+ resultHOCR_.assign(t);
138
+ delete[] t;
139
+ }
140
+
141
+ t = localApi->GetTSVText(0);
142
+ if (t) {
143
+ resultTSV_.assign(t);
144
+ delete[] t;
145
+ }
146
+
147
+ t = localApi->GetAltoText(0);
148
+ if (t) {
149
+ resultALTO_.assign(t);
150
+ delete[] t;
151
+ }
152
+ }
153
+
154
+ monitor_->cancel_this = nullptr;
155
+ pixDestroy(&pPix);
156
+ }
157
+
158
+ void OCRWorker::OnOK() {
159
+ Napi::Env env = Env();
160
+ Napi::HandleScope scope(env);
161
+
162
+ Napi::Object resultObj = OCRResult::NewInstance(env, resultText_, resultHOCR_,
163
+ resultTSV_, resultALTO_);
164
+ // Attach a reference to the Handle JS object on the result.
165
+ // So the Handle stays alive as long as the OCRResult is reachable from JS.
166
+ if (!handleRef_.IsEmpty()) {
167
+ resultObj.Set("_handle", handleRef_.Value());
168
+ }
169
+
170
+ this->deffered_.Resolve(resultObj);
171
+ }
172
+
173
+ void OCRWorker::OnError(const Napi::Error &error) {
174
+ Napi::HandleScope scope(Env());
175
+ this->deffered_.Reject(error.Value());
176
+ }
177
+
178
+ void OCRWorker::OnProgress(const ProgressPayload *payload, size_t count) {
179
+ Napi::HandleScope scope(Env());
180
+ Napi::Object progress = Napi::Object::New(Env());
181
+
182
+ progress.Set("percent", payload->percent);
183
+ progress.Set("progress", payload->progress);
184
+ progress.Set("ocr_alive", payload->ocr_alive);
185
+ progress.Set("top", payload->top);
186
+ progress.Set("right", payload->right);
187
+ progress.Set("bottom", payload->bottom);
188
+ progress.Set("left", payload->left);
189
+
190
+ if (!this->progressCallback_.IsEmpty()) {
191
+ progressCallback_.Call(Env().Undefined(), {progress});
192
+ }
193
+ }
@@ -0,0 +1,69 @@
1
+ /*
2
+ * node-tesseract-ocr
3
+ * Copyright (C) 2025 Philipp Czarnetzki
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, either version 3 of the License, or
8
+ * (at your option) any later version.
9
+ *
10
+ * This program is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ * GNU Affero General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Affero General Public License
16
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ #ifndef OCRWORKER_H
20
+ #define OCRWORKER_H
21
+
22
+ #include "handle.h"
23
+ #include "leptonica/allheaders.h"
24
+ #include "napi.h"
25
+ #include <cstddef>
26
+ #include <memory>
27
+ #include <tesseract/baseapi.h>
28
+ #include <tesseract/ocrclass.h>
29
+
30
+ struct ProgressPayload {
31
+ int percent;
32
+ int progress;
33
+ int ocr_alive;
34
+ int top;
35
+ int right;
36
+ int bottom;
37
+ int left;
38
+ };
39
+
40
+ class OCRWorker : public Napi::AsyncProgressWorker<ProgressPayload> {
41
+ public:
42
+ OCRWorker(Handle *handle, Napi::Object handleObject,
43
+ Napi::Buffer<uint8_t> buffer, Napi::Promise::Deferred deffered,
44
+ Napi::Function &progressCallback);
45
+ ~OCRWorker();
46
+
47
+ protected:
48
+ void Execute(const ExecutionProgress &executionProgress) override;
49
+
50
+ void OnOK() override;
51
+ void OnError(const Napi::Error &error) override;
52
+ void OnProgress(const ProgressPayload *payload, size_t count) override;
53
+
54
+ private:
55
+ Handle *handle_;
56
+ Napi::Reference<Napi::Object> handleRef_;
57
+ uint8_t *data_;
58
+ size_t length_;
59
+ Napi::Reference<Napi::Buffer<uint8_t>> bufferRef_;
60
+ Napi::Promise::Deferred deffered_;
61
+ tesseract::ETEXT_DESC *monitor_ = nullptr;
62
+ Napi::FunctionReference progressCallback_;
63
+ std::string resultText_;
64
+ std::string resultHOCR_;
65
+ std::string resultTSV_;
66
+ std::string resultALTO_;
67
+ };
68
+
69
+ #endif // OCRWORKER_H