@luii/node-tesseract-ocr 1.0.19 → 2.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,259 @@
1
1
  /*
2
- * Copyright 2025 Philipp Czarnetzki
3
- *
4
- * Licensed under the Apache License, Version 2.0 (the "License");
5
- * you may not use this file except in compliance with the License.
6
- * You may obtain a copy of the License at
7
- *
8
- * http://www.apache.org/licenses/LICENSE-2.0
9
- *
10
- * Unless required by applicable law or agreed to in writing, software
11
- * distributed under the License is distributed on an "AS IS" BASIS,
12
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13
- * or implied. See the License for the specific language governing
14
- * permissions and limitations under the License.
15
- */
16
- const { Tesseract } = require('node-gyp-build')(__dirname);
17
- export { Tesseract };
2
+ * Copyright 2025 Philipp Czarnetzki
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13
+ * or implied. See the License for the specific language governing
14
+ * permissions and limitations under the License.
15
+ */
16
+ /**
17
+ * All available languages for tesseract
18
+ * @readonly
19
+ * @enum {string}
20
+ */
21
+ export const Language = {
22
+ afr: "afr",
23
+ amh: "amh",
24
+ ara: "ara",
25
+ asm: "asm",
26
+ aze: "aze",
27
+ aze_cyrl: "aze_cyrl",
28
+ bel: "bel",
29
+ ben: "ben",
30
+ bod: "bod",
31
+ bos: "bos",
32
+ bre: "bre",
33
+ bul: "bul",
34
+ cat: "cat",
35
+ ceb: "ceb",
36
+ ces: "ces",
37
+ chi_sim: "chi_sim",
38
+ chi_tra: "chi_tra",
39
+ chr: "chr",
40
+ cos: "cos",
41
+ cym: "cym",
42
+ dan: "dan",
43
+ deu: "deu",
44
+ deu_latf: "deu_latf",
45
+ div: "div",
46
+ dzo: "dzo",
47
+ ell: "ell",
48
+ eng: "eng",
49
+ enm: "enm",
50
+ epo: "epo",
51
+ equ: "equ",
52
+ est: "est",
53
+ eus: "eus",
54
+ fao: "fao",
55
+ fas: "fas",
56
+ fil: "fil",
57
+ fin: "fin",
58
+ fra: "fra",
59
+ frm: "frm",
60
+ fry: "fry",
61
+ gla: "gla",
62
+ gle: "gle",
63
+ glg: "glg",
64
+ grc: "grc",
65
+ guj: "guj",
66
+ hat: "hat",
67
+ heb: "heb",
68
+ hin: "hin",
69
+ hrv: "hrv",
70
+ hun: "hun",
71
+ hye: "hye",
72
+ iku: "iku",
73
+ ind: "ind",
74
+ isl: "isl",
75
+ ita: "ita",
76
+ ita_old: "ita_old",
77
+ jav: "jav",
78
+ jpn: "jpn",
79
+ kan: "kan",
80
+ kat: "kat",
81
+ kat_old: "kat_old",
82
+ kaz: "kaz",
83
+ khm: "khm",
84
+ kir: "kir",
85
+ kmr: "kmr",
86
+ kor: "kor",
87
+ kor_vert: "kor_vert",
88
+ kur: "kur",
89
+ lao: "lao",
90
+ lat: "lat",
91
+ lav: "lav",
92
+ lit: "lit",
93
+ ltz: "ltz",
94
+ mal: "mal",
95
+ mar: "mar",
96
+ mkd: "mkd",
97
+ mlt: "mlt",
98
+ mon: "mon",
99
+ mri: "mri",
100
+ msa: "msa",
101
+ mya: "mya",
102
+ nep: "nep",
103
+ nld: "nld",
104
+ nor: "nor",
105
+ oci: "oci",
106
+ ori: "ori",
107
+ osd: "osd",
108
+ pan: "pan",
109
+ pol: "pol",
110
+ por: "por",
111
+ pus: "pus",
112
+ que: "que",
113
+ ron: "ron",
114
+ rus: "rus",
115
+ san: "san",
116
+ sin: "sin",
117
+ slk: "slk",
118
+ slv: "slv",
119
+ snd: "snd",
120
+ spa: "spa",
121
+ spa_old: "spa_old",
122
+ sqi: "sqi",
123
+ srp: "srp",
124
+ srp_latn: "srp_latn",
125
+ sun: "sun",
126
+ swa: "swa",
127
+ swe: "swe",
128
+ syr: "syr",
129
+ tam: "tam",
130
+ tat: "tat",
131
+ tel: "tel",
132
+ tgk: "tgk",
133
+ tha: "tha",
134
+ tir: "tir",
135
+ ton: "ton",
136
+ tur: "tur",
137
+ uig: "uig",
138
+ ukr: "ukr",
139
+ urd: "urd",
140
+ uzb: "uzb",
141
+ uzb_cyrl: "uzb_cyrl",
142
+ vie: "vie",
143
+ yid: "yid",
144
+ yor: "yor",
145
+ };
146
+ /**
147
+ * When Tesseract/Cube is initialized we can choose to instantiate/load/run
148
+ * only the Tesseract part, only the Cube part or both along with the combiner.
149
+ * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
150
+ * @readonly
151
+ * @enum {number}
152
+ */
153
+ export const OcrEngineModes = {
154
+ /**
155
+ * Run Tesseract only - fastest
156
+ * @deprecated
157
+ * @type {number}
158
+ */
159
+ OEM_TESSERACT_ONLY: 0,
160
+ /**
161
+ * Run just the LSTM line recognizer.
162
+ * @type {nmumber}
163
+ */
164
+ OEM_LSTM_ONLY: 1,
165
+ /**
166
+ * Run the LSTM recognizer, but allow fallback
167
+ * to Tesseract when things get difficult.
168
+ * @deprecated
169
+ * @type {number}
170
+ */
171
+ OEM_TESSERACT_LSTM_COMBINED: 2,
172
+ /**
173
+ * Specify this mode when calling init(),
174
+ * to indicate that any of the above modes
175
+ * should be automatically inferred from the
176
+ * variables in the language-specific config,
177
+ * command-line configs, or if not specified
178
+ * in any of the above should be set to the
179
+ * default OEM_TESSERACT_ONLY.
180
+ * @type {number}
181
+ * @default
182
+ */
183
+ OEM_DEFAULT: 3,
184
+ };
185
+ /**
186
+ * Possible modes for page layout analysis.
187
+ * @readonly
188
+ * @enum {number}
189
+ */
190
+ export const PageSegmentationModes = {
191
+ // Orientation and script detection only.
192
+ PSM_OSD_ONLY: 0,
193
+ // Automatic page segmentation with orientation and script detection. (OSD)
194
+ PSM_AUTO_OSD: 1,
195
+ // Automatic page segmentation, but no OSD, or OCR.
196
+ PSM_AUTO_ONLY: 2,
197
+ // Fully automatic page segmentation, but no OSD.
198
+ PSM_AUTO: 3,
199
+ // Assume a single column of text of variable sizes.
200
+ PSM_SINGLE_COLUMN: 4,
201
+ // Assume a single uniform block of vertically aligned text.
202
+ PSM_SINGLE_BLOCK_VERT_TEXT: 5,
203
+ // Assume a single uniform block of text. (Default.)
204
+ PSM_SINGLE_BLOCK: 6,
205
+ // Treat the image as a single text line.
206
+ PSM_SINGLE_LINE: 7,
207
+ // Treat the image as a single word.
208
+ PSM_SINGLE_WORD: 8,
209
+ // Treat the image as a single word in a circle.
210
+ PSM_CIRCLE_WORD: 9,
211
+ // Treat the image as a single character.
212
+ PSM_SINGLE_CHAR: 10,
213
+ // Find as much text as possible in no particular order.
214
+ PSM_SPARSE_TEXT: 11,
215
+ // Sparse text with orientation and script det.
216
+ PSM_SPARSE_TEXT_OSD: 12,
217
+ // Treat the image as a single text line, bypassing hacks that are Tesseract-specific.
218
+ PSM_RAW_LINE: 13,
219
+ };
220
+ export const LogLevels = {
221
+ ALL: "-2147483648",
222
+ TRACE: "5000",
223
+ DEBUG: "10000",
224
+ INFO: "20000",
225
+ WARN: "30000",
226
+ ERROR: "40000",
227
+ FATAL: "50000",
228
+ OFF: "2147483647",
229
+ };
230
+ const fs = require("node:fs");
231
+ const path = require("node:path");
232
+ const rootFromSource = path.resolve(__dirname, "../../");
233
+ const bindingOptionsFromSource = path.resolve(rootFromSource, "binding-options.js");
234
+ const bindingOptionsPath = fs.existsSync(bindingOptionsFromSource)
235
+ ? bindingOptionsFromSource
236
+ : path.resolve(process.cwd(), "binding-options.js");
237
+ const prebuildRoot = fs.existsSync(bindingOptionsFromSource)
238
+ ? rootFromSource
239
+ : process.cwd();
240
+ const { Tesseract: NativeTesseract } = require("pkg-prebuilds")(prebuildRoot, require(bindingOptionsPath));
241
+ class Tesseract extends NativeTesseract {
242
+ constructor() {
243
+ super();
244
+ }
245
+ async init(options) {
246
+ // scan train data for any files
247
+ // check whether the requested langs are available/cached
248
+ // if not
249
+ // fetch traineddata from cdn
250
+ // - add .lock file to downloaded file (while downloading, so other instances
251
+ // can wait on it and dont have to download again)
252
+ // - place into tesseract standard folder
253
+ // if available
254
+ // just go on with the init function of the native addon
255
+ return super.init(options);
256
+ }
257
+ }
258
+ export { Tesseract, NativeTesseract };
18
259
  export default Tesseract;
package/package.json CHANGED
@@ -1,10 +1,17 @@
1
1
  {
2
2
  "name": "@luii/node-tesseract-ocr",
3
- "version": "1.0.19",
3
+ "version": "2.0.13",
4
4
  "private": false,
5
- "gypfile": true,
5
+ "binary": {
6
+ "napi_versions": [
7
+ 10
8
+ ]
9
+ },
6
10
  "main": "dist/cjs/index.cjs",
7
11
  "module": "dist/esm/index.mjs",
12
+ "engines": {
13
+ "node": ">=22.14.0"
14
+ },
8
15
  "types": "dist/index.d.ts",
9
16
  "homepage": "https://github.com/luii/node-tesseract-ocr",
10
17
  "repository": {
@@ -29,52 +36,51 @@
29
36
  "ocr",
30
37
  "tesseract",
31
38
  "leptonica",
32
- "node-gyp",
39
+ "cmake-js",
33
40
  "node-addon-api"
34
41
  ],
35
42
  "publishConfig": {
36
43
  "access": "public"
37
44
  },
38
45
  "scripts": {
39
- "install": "node-gyp-build",
40
- "prebuild": "prebuildify --napi --strip",
41
- "build": "npm run build:release",
42
- "build:addon:debug": "node-gyp rebuild --debug",
43
- "build:addon:release": "node-gyp rebuild --release",
44
- "build:types": "mkdir -p dist && cp lib/index.d.ts dist/index.d.ts",
45
- "build:ts": "npm run build:cjs && npm run build:esm && npm run build:types",
46
- "build:cjs": "tsc -p tsconfig.cjs.json && mv dist/cjs/index.js dist/cjs/index.cjs",
47
- "build:esm": "tsc -p tsconfig.esm.json && mv dist/esm/index.js dist/esm/index.mjs",
48
- "build:debug": "rm -rf dist && mkdir dist && npm run build:addon:debug && npm run build:cjs && npm run build:esm",
49
- "build:release": "rm -rf dist && mkdir dist && npm run build:addon:release && npm run build:cjs && npm run build:esm",
50
- "build:examples": "npm run build && npx tsc -p tsconfig.examples.json",
51
- "example:recognize": "npm run build:examples && node dist/examples/recognize.js"
46
+ "install": "cmake-js compile",
47
+ "build:ts": "tsc -p tsconfig.cjs.json && tsc -p tsconfig.esm.json && mkdir -p dist && mv dist/cjs/index.js dist/cjs/index.cjs && mv dist/esm/index.js dist/esm/index.mjs",
48
+ "build:debug": "cmake-js compile --debug && npm run build:ts",
49
+ "build:release": "cmake-js compile --release && npm run build:ts",
50
+ "example:recognize": "npm run build:debug && tsc -p tsconfig.examples.json && node -r dotenv/config dist/examples/recognize.js dotenv_config_path=.env.local",
51
+ "test:cpp": "cmake-js compile --release && ./build/release/node-tesseract-ocr-tests",
52
+ "test:js": "vitest run",
53
+ "test:js:watch": "vitest"
52
54
  },
53
55
  "files": [
54
56
  "dist/**",
55
57
  "prebuilds/**",
56
58
  "src/**",
57
- "build/Release/*.node",
59
+ "build/release/*.node",
58
60
  "package.json",
59
- "binding.gyp",
61
+ "CMakeLists.txt",
60
62
  "README.md",
61
63
  "LICENSE.md"
62
64
  ],
63
65
  "devDependencies": {
66
+ "vitest": "^2.1.9",
64
67
  "@types/node": "^22.0.0",
65
- "node-addon-api": "^8.5.0",
66
- "prebuildify": "^5.0.0",
67
- "node-gyp": "^10.0.0",
68
68
  "typescript": "^5.6.0"
69
69
  },
70
70
  "dependencies": {
71
- "node-gyp-build": "^4.0.0"
71
+ "cmake-js": "^7.4.0",
72
+ "node-addon-api": "^8.5.0",
73
+ "dotenv": "^16.4.5",
74
+ "pkg-prebuilds": "^1.0.0"
72
75
  },
73
76
  "exports": {
74
- ".": {
75
- "import": "./dist/esm/index.mjs",
76
- "require": "./dist/cjs/index.cjs",
77
- "types": "./dist/index.d.ts"
77
+ "require": {
78
+ "types": "./dist/cjs/index.d.ts",
79
+ "default": "./dist/cjs/index.cjs"
80
+ },
81
+ "import": {
82
+ "types": "./dist/esm/index.d.ts",
83
+ "default": "./dist/esm/index.mjs"
78
84
  }
79
85
  }
80
86
  }
package/src/addon.cpp CHANGED
@@ -1,24 +1,9 @@
1
- /*
2
- * Copyright 2025 Philipp Czarnetzki
3
- *
4
- * Licensed under the Apache License, Version 2.0 (the "License");
5
- * you may not use this file except in compliance with the License.
6
- * You may obtain a copy of the License at
7
- *
8
- * http://www.apache.org/licenses/LICENSE-2.0
9
- *
10
- * Unless required by applicable law or agreed to in writing, software
11
- * distributed under the License is distributed on an "AS IS" BASIS,
12
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13
- * or implied. See the License for the specific language governing
14
- * permissions and limitations under the License.
15
- */
16
-
17
- #include "handle.h"
18
- #include <napi.h>
19
-
20
- Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
21
- return Handle::GetClass(env, exports);
22
- }
23
-
24
- NODE_API_MODULE(hello, InitAll)
1
+
2
+ #include "tesseract_wrapper.hpp"
3
+ #include <napi.h>
4
+
5
+ Napi::Object Init(Napi::Env env, Napi::Object exports) {
6
+ return TesseractWrapper::InitAddon(env, exports);
7
+ }
8
+
9
+ NODE_API_MODULE(NODE_GYP_MODULE_NAME, Init)