@luii/node-tesseract-ocr 1.0.19 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CMakeLists.txt ADDED
@@ -0,0 +1,45 @@
1
+ cmake_minimum_required(VERSION 3.10...4.2.1)
2
+ project(node-tesseract-ocr)
3
+ add_compile_definitions(NAPI_VERSION=10)
4
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
5
+
6
+ file(GLOB SOURCE_FILES "src/*.cpp" "src/*.hpp")
7
+
8
+ set(CMAKE_CXX_STANDARD 20)
9
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
10
+ set(CMAKE_CXX_EXTENSIONS OFF)
11
+
12
+ if(APPLE)
13
+ set(CMAKE_OSX_DEPLOYMENT_TARGET "14.0" CACHE STRING "" FORCE)
14
+ endif()
15
+
16
+ add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
17
+ set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
18
+
19
+ find_package(PkgConfig REQUIRED)
20
+ pkg_check_modules(TESS REQUIRED IMPORTED_TARGET "tesseract>=5")
21
+ pkg_check_modules(LEPT REQUIRED IMPORTED_TARGET "lept>=1.74")
22
+
23
+ target_include_directories(${PROJECT_NAME} PRIVATE
24
+ ${CMAKE_JS_INC}
25
+ ${TESS_INCLUDE_DIRS}
26
+ ${LEPT_INCLUDE_DIRS}
27
+ )
28
+
29
+ if(APPLE)
30
+ foreach(dir IN LISTS LEPT_INCLUDE_DIRS)
31
+ target_include_directories(${PROJECT_NAME} PRIVATE "${dir}/..")
32
+ endforeach()
33
+ endif()
34
+
35
+ target_link_libraries(${PROJECT_NAME} PRIVATE
36
+ ${CMAKE_JS_LIB}
37
+ PkgConfig::TESS
38
+ PkgConfig::LEPT
39
+ )
40
+ target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)
41
+
42
+ if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
43
+ # Generate node.lib
44
+ execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS})
45
+ endif()
package/README.md CHANGED
@@ -1,134 +1,506 @@
1
1
  # node-tesseract-ocr
2
2
 
3
- C++ Addon for Node.js, that uses Tesseract OCR (`libtesseract-dev`) in JavaScript/TypeScript.
3
+ Native C++ addon for Node.js that exposes Tesseract OCR (`libtesseract-dev`) to JavaScript/TypeScript.
4
+
5
+ ## Table of Contents
6
+
7
+ - [Features](#features)
8
+ - [Prerequisites](#prerequisites)
9
+ - [Install](#install)
10
+ - [Install additional training data](#install-additional-training-data)
11
+ - [Build](#build)
12
+ - [Start](#start)
13
+ - [Scripts](#scripts)
14
+ - [Examples](#examples)
15
+ - [Public API](#public-api)
16
+ - [Enums](#enums)
17
+ - [Types](#types)
18
+ - [Tesseract API](#tesseract-api)
19
+ - [Example](#example)
20
+ - [License](#license)
21
+ - [Special Thanks](#special-thanks)
22
+
23
+ ## Features
24
+
25
+ - Native bindings to Tesseract (prebuilds via `pkg-prebuilds`)
26
+ - Access to Tesseract enums and configuration from TypeScript
27
+ - Progress callback and multiple output formats
28
+
29
+ ## Prerequisites
4
30
 
5
- Status: **WIP**
31
+ - nodejs
32
+ - node-addon-api
33
+ - c++ build toolchain (e.g. build-essentials)
34
+ - libtesseract-dev
35
+ - libleptonica-dev
36
+ - Tesseract training data (eng, deu, ...)
37
+
38
+ > See [Install](#install)
39
+
40
+ ## Install
41
+
42
+ ```bash
43
+ sudo apt update
44
+ sudo apt install -y nodejs npm build-essential pkg-config libtesseract-dev libleptonica-dev tesseract-ocr-eng
45
+ ```
46
+
47
+ ```bash
48
+ git clone git@github.com:luii/node-tesseract-ocr.git
49
+ cd node-tesseract-ocr
50
+ npm install
51
+ ```
52
+
53
+ ### Install additional training data
54
+
55
+ On Debian/Ubuntu, language data is provided as packages named `tesseract-ocr-<lang>`.
56
+ Install additional languages as needed, for example:
57
+
58
+ ```bash
59
+ sudo apt install -y tesseract-ocr-deu tesseract-ocr-eng tesseract-ocr-jpn
60
+ ```
61
+
62
+ If you install traineddata files manually, make sure `NODE_TESSERACT_DATAPATH` points to the directory that contains them (for example `/usr/share/tesseract-ocr/5/tessdata`).
63
+
64
+ ## Build
65
+
66
+ ```bash
67
+ # Debug build (native addon + TS outputs)
68
+ npm run build:debug
69
+
70
+ # Release build
71
+ npm run build:release
72
+ ```
73
+
74
+ ## Start
75
+
76
+ Set `NODE_TESSERACT_DATAPATH` to your traineddata directory (usually `/usr/share/tesseract-ocr/5/tessdata`).
77
+
78
+ ```sh
79
+ env NODE_TESSERACT_DATAPATH=/usr/share/tesseract-ocr/5/tessdata node path/to/your/app.js
80
+ ```
81
+
82
+ ## Scripts
83
+
84
+ ```bash
85
+ # Build native addon + TS outputs (debug / release)
86
+ npm run build:debug
87
+ npm run build:release
6
88
 
7
- Lizenz: **Apache-2.0**
89
+ # Build precompiled binaries for distribution
90
+ npm run prebuild
91
+
92
+ # Run the JS example (builds debug first)
93
+ npm run example:recognize
94
+
95
+ # Tests
96
+ npm run test:cpp
97
+ npm run test:js
98
+ npm run test:js:watch
99
+ ```
100
+
101
+ ## Examples
102
+
103
+ ```sh
104
+ env NODE_TESSERACT_DATAPATH=/usr/share/tesseract-ocr/5/tessdata npm run example:recognize
105
+ ```
8
106
 
9
107
  ## Public API
10
108
 
11
- ### Class: `Tesseract`
109
+ ### Enums
110
+
111
+ #### `Language`
112
+
113
+ Mapping of available Tesseract language codes. Most are [_3-letter ISO 639-2/T style_](https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes) (e.g. `eng`, `deu`, `jpn`), with Tesseract-specific variants such as `chi_sim`, `deu_latf`, or `osd`. The value must match the installed traineddata filename (without the `.traineddata` suffix). You can pass a single code via `TesseractInitOptions.lang`.
114
+
115
+ > [!IMPORTANT]
116
+ > **If you join codes with a plus sign (e.g. `deu+eng`), Tesseract will look for multiple languages in the same image (here: German and English).**
117
+
118
+ #### `OcrEngineMode`
119
+
120
+ Full list of OCR engine modes from Tesseract.
12
121
 
13
- #### TesseractOptions
122
+ | Name | Value | Deprecated | Description |
123
+ | ----------------------------- | ----- | ---------- | ---------------------------------------------------------- |
124
+ | `OEM_TESSERACT_ONLY` | 0 | Yes | Run Tesseract only (fastest). |
125
+ | `OEM_LSTM_ONLY` | 1 | No | Run only the LSTM line recognizer. |
126
+ | `OEM_TESSERACT_LSTM_COMBINED` | 2 | Yes | Run LSTM with fallback to Tesseract. |
127
+ | `OEM_DEFAULT` | 3 | No | Infer engine mode from configs; default is Tesseract-only. |
128
+
129
+ #### `PageSegmentationMode`
130
+
131
+ Full list of page segmentation modes from Tesseract.
132
+
133
+ | Name | Value | Deprecated | Description |
134
+ | ---------------------------- | ----- | ---------- | --------------------------------------------------------- |
135
+ | `PSM_OSD_ONLY` | 0 | No | Orientation and script detection only. |
136
+ | `PSM_AUTO_OSD` | 1 | No | Automatic page segmentation with OSD. |
137
+ | `PSM_AUTO_ONLY` | 2 | No | Automatic page segmentation, no OSD or OCR. |
138
+ | `PSM_AUTO` | 3 | No | Fully automatic page segmentation, no OSD. |
139
+ | `PSM_SINGLE_COLUMN` | 4 | No | Assume a single column of text of variable sizes. |
140
+ | `PSM_SINGLE_BLOCK_VERT_TEXT` | 5 | No | Assume a single uniform block of vertically aligned text. |
141
+ | `PSM_SINGLE_BLOCK` | 6 | No | Assume a single uniform block of text (default). |
142
+ | `PSM_SINGLE_LINE` | 7 | No | Treat the image as a single text line. |
143
+ | `PSM_SINGLE_WORD` | 8 | No | Treat the image as a single word. |
144
+ | `PSM_CIRCLE_WORD` | 9 | No | Treat the image as a single word in a circle. |
145
+ | `PSM_SINGLE_CHAR` | 10 | No | Treat the image as a single character. |
146
+ | `PSM_SPARSE_TEXT` | 11 | No | Find as much text as possible in no particular order. |
147
+ | `PSM_SPARSE_TEXT_OSD` | 12 | No | Sparse text with orientation and script detection. |
148
+ | `PSM_RAW_LINE` | 13 | No | Single text line, bypassing Tesseract-specific hacks. |
149
+
150
+ ### Types
151
+
152
+ #### `TesseractInitOptions`
153
+
154
+ | Field | Type | Optional | Default | Description |
155
+ | ----------------------- | ----------------------------------------------------------------------------------------------------- | -------- | ----------- | --------------------------------------- |
156
+ | `lang` | [`Language[]`](#availablelanguages) | Yes | `undefined` | Languages to load as an array. |
157
+ | `oem` | [`OcrEngineMode`](#ocrenginemode) | Yes | `undefined` | OCR engine mode. |
158
+ | `vars` | `Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>` | Yes | `undefined` | Variables to set. |
159
+ | `configs` | `Array<string>` | Yes | `undefined` | Tesseract config files to apply. |
160
+ | `setOnlyNonDebugParams` | `boolean` | Yes | `undefined` | If true, only non-debug params are set. |
161
+
162
+ #### `TesseractSetRectangleOptions`
163
+
164
+ | Field | Type | Optional | Default | Description |
165
+ | -------- | -------- | -------- | ------- | ----------------- |
166
+ | `top` | `number` | No | n/a | Top coordinate. |
167
+ | `left` | `number` | No | n/a | Left coordinate. |
168
+ | `width` | `number` | No | n/a | Rectangle width. |
169
+ | `height` | `number` | No | n/a | Rectangle height. |
170
+
171
+ #### `ProgressChangedInfo`
172
+
173
+ | Field | Type | Optional | Default | Description |
174
+ | ---------- | -------- | -------- | ------- | ------------------------------------------ |
175
+ | `progress` | `number` | No | n/a | Chars in the current buffer. |
176
+ | `percent` | `number` | No | n/a | Percent complete (0-100). |
177
+ | `ocrAlive` | `number` | No | n/a | Non-zero if worker is alive. |
178
+ | `top` | `number` | No | n/a | Top coordinate of current element bbox. |
179
+ | `right` | `number` | No | n/a | Right coordinate of current element bbox. |
180
+ | `bottom` | `number` | No | n/a | Bottom coordinate of current element bbox. |
181
+ | `left` | `number` | No | n/a | Left coordinate of current element bbox. |
182
+
183
+ #### `DetectOrientationScriptResult`
184
+
185
+ | Field | Type | Optional | Default | Description |
186
+ | ----------------------- | -------- | -------- | ------- | -------------------------------------------------- |
187
+ | `orientationDegrees` | `number` | No | n/a | Orientation of the source image (0, 90, 180, 270). |
188
+ | `orientationConfidence` | `number` | No | n/a | Confidence for the orientation. |
189
+ | `scriptName` | `string` | No | n/a | Detected script name. |
190
+ | `scriptConfidence` | `number` | No | n/a | Confidence for the script. |
191
+
192
+ ### Tesseract API
193
+
194
+ #### Constructor
14
195
 
15
196
  ```ts
16
- {
17
- /**
18
- * Its generally safer to use as few languages as possible.
19
- * The more languages Tesseract needs to load the longer
20
- * it takes to recognize a image.
21
- * @public
22
- */
23
- lang: Array<keyof typeof AvailableLanguages>;
197
+ new Tesseract();
198
+ ```
199
+
200
+ Creates a new Tesseract instance.
24
201
 
25
- /**
26
- * Skip Ocr for when you only want to (for example) analyze the layout
27
- * @property {boolean} [skipOcr]
28
- */
29
- skipOcr?: boolean;
202
+ #### init
30
203
 
31
- /**
32
- * OCR Engine Modes
33
- * The engine mode cannot be changed after creating the instance
34
- * If another mode is needed, its advised to create a new instance.
35
- * @throws {Error} Will throw an error when oem mode is below 0 or over 3
36
- */
37
- oemMode?: OcrEngineMode;
204
+ Initializes Tesseract with language, engine mode, configs, and variables.
38
205
 
39
- /**
40
- * Page Segmentation Modes
41
- * The page segmentation mode cannot be changed after creating the instance
42
- * If another mode is needed, its advised to create a new instance.
43
- * @throws {Error} Will throw a error when psm mode is below 0 or over 13
44
- */
45
- psm?: PageSegmentationMode;
46
- }
206
+ | Name | Type | Optional | Default | Description |
207
+ | ------- | ----------------------------------------------- | -------- | ------- | ----------------------- |
208
+ | options | [`TesseractInitOptions`](#tesseractinitoptions) | No | n/a | Initialization options. |
209
+
210
+ ```ts
211
+ init(options: TesseractInitOptions): Promise<void>
47
212
  ```
48
213
 
49
- #### constructor(options: TesseractOptions)
214
+ #### initForAnalysePage
215
+
216
+ Initializes for layout analysis only.
50
217
 
51
218
  ```ts
52
- new Tesseract({
53
- lang: string,
54
- skipOcr: boolean
55
- })
219
+ initForAnalysePage(): Promise<void>
56
220
  ```
57
221
 
58
- ##### recognize
222
+ #### analysePage
223
+
224
+ Runs the layout analysis.
225
+
226
+ | Name | Type | Optional | Default | Description |
227
+ | ----------------- | ------- | -------- | ------- | ------------------------------- |
228
+ | mergeSimilarWords | boolean | No | n/a | Whether to merge similar words. |
59
229
 
60
230
  ```ts
61
- recognize(buffer: Buffer, RecognizeOptions) => Promise<{
62
- getText() => string;
63
- getHOCR() => string;
64
- getTSV() => string;
65
- getALTO() => string;
66
- }>
231
+ analysePage(mergeSimilarWords: boolean): Promise<void>
232
+ ```
233
+
234
+ #### setPageMode
235
+
236
+ Sets the page segmentation mode.
237
+
238
+ | Name | Type | Optional | Default | Description |
239
+ | ---- | ------------------------------------------------ | -------- | ------- | ----------------------- |
240
+ | psm | [`PageSegmentationMode`](#pagesegmentationmodes) | No | n/a | Page segmentation mode. |
67
241
 
242
+ ```ts
243
+ setPageMode(psm: PageSegmentationMode): Promise<void>
68
244
  ```
69
245
 
70
- ###### Recognize Options
246
+ #### setVariable
247
+
248
+ Sets a Tesseract variable. Returns `false` if the lookup failed.
249
+
250
+ | Name | Type | Optional | Default | Description |
251
+ | ----- | -------------------------------------------------------------- | -------- | ------- | --------------- |
252
+ | name | keyof SetVariableConfigVariables | No | n/a | Variable name. |
253
+ | value | SetVariableConfigVariables\[keyof SetVariableConfigVariables\] | No | n/a | Variable value. |
71
254
 
72
255
  ```ts
73
- {
74
- progressChanged?: ({
75
- progress: number;
76
- ocrAlive: number;
77
- top: number;
78
- right: number;
79
- bottom: number;
80
- left: number;
81
- }) => void,
82
-
83
- }
256
+ setVariable(name: keyof SetVariableConfigVariables, value: SetVariableConfigVariables[keyof SetVariableConfigVariables]): Promise<boolean>
84
257
  ```
85
258
 
86
- ## Prerequisities
259
+ #### getIntVariable
87
260
 
88
- - nodejs
89
- - python3 (for `node-gyp`)
90
- - node-addon-api
91
- - c++ build-toolchain (e.g. build-essentials)
92
- - libtesseract-dev
93
- - libleptonica-dev
94
- - Tesseract Training-data (eng, deu, ...)
261
+ Reads an integer variable from Tesseract.
95
262
 
96
- > See [Install](#install)
263
+ | Name | Type | Optional | Default | Description |
264
+ | ---- | -------------------------------- | -------- | ------- | -------------- |
265
+ | name | keyof SetVariableConfigVariables | No | n/a | Variable name. |
97
266
 
98
- ## Install
267
+ ```ts
268
+ getIntVariable(name: keyof SetVariableConfigVariables): Promise<number>
269
+ ```
99
270
 
100
- ```bash
101
- sudo apt update
102
- sudo apt install -y nodejs npm build-essential python3 pkg-config libtesseract-dev libleptonica-dev tesseract-ocr-eng
271
+ #### getBoolVariable
272
+
273
+ Reads a boolean variable from Tesseract. Returns `0` or `1`.
274
+
275
+ | Name | Type | Optional | Default | Description |
276
+ | ---- | -------------------------------- | -------- | ------- | -------------- |
277
+ | name | keyof SetVariableConfigVariables | No | n/a | Variable name. |
278
+
279
+ ```ts
280
+ getBoolVariable(name: keyof SetVariableConfigVariables): Promise<number>
103
281
  ```
104
282
 
105
- ```bash
106
- git clone git@github.com:luii/node-tesseract-ocr.git
107
- cd node-tesseract-ocr
108
- npm install
283
+ #### getDoubleVariable
284
+
285
+ Reads a double variable from Tesseract.
286
+
287
+ | Name | Type | Optional | Default | Description |
288
+ | ---- | -------------------------------- | -------- | ------- | -------------- |
289
+ | name | keyof SetVariableConfigVariables | No | n/a | Variable name. |
290
+
291
+ ```ts
292
+ getDoubleVariable(name: keyof SetVariableConfigVariables): Promise<number>
109
293
  ```
110
294
 
111
- ## Build
295
+ #### getStringVariable
112
296
 
113
- ```bash
114
- npm run build
297
+ Reads a string variable from Tesseract.
298
+
299
+ | Name | Type | Optional | Default | Description |
300
+ | ---- | -------------------------------- | -------- | ------- | -------------- |
301
+ | name | keyof SetVariableConfigVariables | No | n/a | Variable name. |
302
+
303
+ ```ts
304
+ getStringVariable(name: keyof SetVariableConfigVariables): Promise<string>
115
305
  ```
116
306
 
117
- ## Start
307
+ #### setImage
118
308
 
119
- Either set the `NODE_TESSERACT_DATAPATH` beforehand or do it in one go, it needs to point to where the training data is located.
120
- On a standard install this is usually `/usr/share/tesseract-ocr/5/tessdata`
309
+ Sets the image from a Buffer.
121
310
 
122
- ```bash
123
- NODE_TESSERACT_DATAPATH=/usr/share/tesseract-ocr/5/tessdata npm run dev
311
+ | Name | Type | Optional | Default | Description |
312
+ | ------ | ------ | -------- | ------- | ----------- |
313
+ | buffer | Buffer | No | n/a | Image data. |
314
+
315
+ ```ts
316
+ setImage(buffer: Buffer): Promise<void>
124
317
  ```
125
318
 
126
- ## Examples
319
+ #### setRectangle
127
320
 
128
- ```bash
129
- NODE_TESSERACT_DATAPATH=/usr/share/tesseract-ocr/5/tessdata npm run examples:recognize
321
+ Sets the image region using coordinates and size.
322
+
323
+ | Name | Type | Optional | Default | Description |
324
+ | ------- | --------------------------------------------------------------- | -------- | ------- | ------------------ |
325
+ | options | [`TesseractSetRectangleOptions`](#tesseractsetrectangleoptions) | No | n/a | Region definition. |
326
+
327
+ ```ts
328
+ setRectangle(options: TesseractSetRectangleOptions): Promise<void>
329
+ ```
330
+
331
+ #### setSourceResolution
332
+
333
+ Sets the source resolution in PPI.
334
+
335
+ | Name | Type | Optional | Default | Description |
336
+ | ---- | ------ | -------- | ------- | ---------------- |
337
+ | ppi | number | No | n/a | Pixels per inch. |
338
+
339
+ ```ts
340
+ setSourceResolution(ppi: number): Promise<void>
341
+ ```
342
+
343
+ #### recognize
344
+
345
+ Starts OCR and calls the callback with progress info.
346
+
347
+ | Name | Type | Optional | Default | Description |
348
+ | ---------------- | ------------------------------------------------------------- | -------- | ------- | ------------------ |
349
+ | progressCallback | (info: [`ProgressChangedInfo`](#progresschangedinfo)) => void | No | n/a | Progress callback. |
350
+
351
+ ```ts
352
+ recognize(progressCallback: (info: ProgressChangedInfo) => void): Promise<void>
130
353
  ```
131
354
 
355
+ #### getUTF8Text
356
+
357
+ Returns recognized text as UTF-8.
358
+
359
+ ```ts
360
+ getUTF8Text(): Promise<string>
361
+ ```
362
+
363
+ #### getHOCRText
364
+
365
+ Returns HOCR output. Optional progress callback and page number.
366
+
367
+ | Name | Type | Optional | Default | Description |
368
+ | ---------------- | ------------------------------------------------------------- | -------- | --------- | ---------------------- |
369
+ | progressCallback | (info: [`ProgressChangedInfo`](#progresschangedinfo)) => void | Yes | undefined | Progress callback. |
370
+ | pageNumber | number | Yes | undefined | Page number (0-based). |
371
+
372
+ ```ts
373
+ getHOCRText(
374
+ progressCallback?: (info: ProgressChangedInfo) => void,
375
+ pageNumber?: number,
376
+ ): Promise<string>
377
+ ```
378
+
379
+ #### getTSVText
380
+
381
+ Returns TSV output.
382
+
383
+ ```ts
384
+ getTSVText(): Promise<string>
385
+ ```
386
+
387
+ #### getUNLVText
388
+
389
+ Returns UNLV output.
390
+
391
+ ```ts
392
+ getUNLVText(): Promise<string>
393
+ ```
394
+
395
+ #### getALTOText
396
+
397
+ Returns ALTO output. Optional progress callback and page number.
398
+
399
+ | Name | Type | Optional | Default | Description |
400
+ | ---------------- | ------------------------------------------------------------- | -------- | --------- | ---------------------- |
401
+ | progressCallback | (info: [`ProgressChangedInfo`](#progresschangedinfo)) => void | Yes | undefined | Progress callback. |
402
+ | pageNumber | number | Yes | undefined | Page number (0-based). |
403
+
404
+ ```ts
405
+ getALTOText(
406
+ progressCallback?: (info: ProgressChangedInfo) => void,
407
+ pageNumber?: number,
408
+ ): Promise<string>
409
+ ```
410
+
411
+ #### detectOrientationScript
412
+
413
+ Detects orientation and script with confidences. Returns [`DetectOrientationScriptResult`](#detectorientationscriptresult).
414
+
415
+ ```ts
416
+ detectOrientationScript(): Promise<DetectOrientationScriptResult>
417
+ ```
418
+
419
+ #### meanTextConf
420
+
421
+ Mean text confidence (0-100).
422
+
423
+ ```ts
424
+ meanTextConf(): Promise<number>
425
+ ```
426
+
427
+ #### getInitLanguages
428
+
429
+ Returns [`Language`](#availablelanguages) in raw Tesseract format (e.g. "deu+eng").
430
+
431
+ ```ts
432
+ getInitLanguages(): Promise<string>
433
+ ```
434
+
435
+ #### getLoadedLanguages
436
+
437
+ Returns [`Language[]`](#availablelanguages) in raw Tesseract format.
438
+
439
+ ```ts
440
+ getLoadedLanguages(): Promise<Language[]>
441
+ ```
442
+
443
+ #### getAvailableLanguages
444
+
445
+ Returns [`Language[]`](#availablelanguages) in raw Tesseract format.
446
+
447
+ ```ts
448
+ getAvailableLanguages(): Promise<Language[]>
449
+ ```
450
+
451
+ #### clear
452
+
453
+ Clears internal state.
454
+
455
+ ```ts
456
+ clear(): Promise<void>
457
+ ```
458
+
459
+ #### end
460
+
461
+ Ends the instance.
462
+
463
+ ```ts
464
+ end(): Promise<void>
465
+ ```
466
+
467
+ ## Example
468
+
469
+ You can find a similar example in the `examples/` folder of the project
470
+
471
+ ```ts
472
+ import fs from "node:fs";
473
+ import Tesseract, { OcrEngineModes } from "node-tesseract-ocr";
474
+
475
+ async function main() {
476
+ const tesseract = new Tesseract();
477
+ await tesseract.init({
478
+ lang: ["eng"],
479
+ oem: OcrEngineModes.OEM_LSTM_ONLY,
480
+ });
481
+
482
+ const buffer = fs.readFileSync("example1.png");
483
+ await tesseract.setImage(buffer);
484
+ await tesseract.recognize((info) => {
485
+ console.log(`Progress: ${info.percent}%`);
486
+ });
487
+
488
+ const text = await tesseract.getUTF8Text();
489
+ console.log(text);
490
+
491
+ await tesseract.end();
492
+ }
493
+
494
+ main().catch((err) => {
495
+ console.error(err);
496
+ process.exit(1);
497
+ });
498
+ ```
499
+
500
+ ## License
501
+
502
+ Apache-2.0. See [`LICENSE.md`](/LICENSE.md) for full terms.
503
+
132
504
  ## Special Thanks
133
505
 
134
506
  - **Stunt3000**