@luii/node-tesseract-ocr 1.0.18 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +45 -0
- package/LICENSE.md +201 -661
- package/README.md +457 -85
- package/dist/cjs/index.cjs +272 -18
- package/dist/cjs/index.d.ts +1069 -0
- package/dist/esm/index.d.ts +1069 -0
- package/dist/esm/index.mjs +257 -18
- package/package.json +33 -27
- package/prebuilds/node-tesseract-ocr-darwin-arm64/node-napi-v10.node +0 -0
- package/prebuilds/node-tesseract-ocr-linux-x64/node-napi-v10.node +0 -0
- package/src/addon.cpp +9 -26
- package/src/commands.hpp +489 -0
- package/src/monitor.hpp +81 -0
- package/src/tesseract_wrapper.cpp +714 -0
- package/src/tesseract_wrapper.hpp +70 -0
- package/src/utils.hpp +8 -0
- package/src/worker_thread.cpp +141 -0
- package/src/worker_thread.hpp +79 -0
- package/binding.gyp +0 -60
- package/dist/index.d.ts +0 -351
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/src/handle.cpp +0 -172
- package/src/handle.h +0 -59
- package/src/ocr_result.cpp +0 -101
- package/src/ocr_result.h +0 -49
- package/src/ocr_worker.cpp +0 -193
- package/src/ocr_worker.h +0 -69
|
@@ -0,0 +1,1069 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* All available languages for tesseract
|
|
3
|
+
* @readonly
|
|
4
|
+
* @enum {string}
|
|
5
|
+
*/
|
|
6
|
+
export declare const Language: {
|
|
7
|
+
readonly afr: "afr";
|
|
8
|
+
readonly amh: "amh";
|
|
9
|
+
readonly ara: "ara";
|
|
10
|
+
readonly asm: "asm";
|
|
11
|
+
readonly aze: "aze";
|
|
12
|
+
readonly aze_cyrl: "aze_cyrl";
|
|
13
|
+
readonly bel: "bel";
|
|
14
|
+
readonly ben: "ben";
|
|
15
|
+
readonly bod: "bod";
|
|
16
|
+
readonly bos: "bos";
|
|
17
|
+
readonly bre: "bre";
|
|
18
|
+
readonly bul: "bul";
|
|
19
|
+
readonly cat: "cat";
|
|
20
|
+
readonly ceb: "ceb";
|
|
21
|
+
readonly ces: "ces";
|
|
22
|
+
readonly chi_sim: "chi_sim";
|
|
23
|
+
readonly chi_tra: "chi_tra";
|
|
24
|
+
readonly chr: "chr";
|
|
25
|
+
readonly cos: "cos";
|
|
26
|
+
readonly cym: "cym";
|
|
27
|
+
readonly dan: "dan";
|
|
28
|
+
readonly deu: "deu";
|
|
29
|
+
readonly deu_latf: "deu_latf";
|
|
30
|
+
readonly div: "div";
|
|
31
|
+
readonly dzo: "dzo";
|
|
32
|
+
readonly ell: "ell";
|
|
33
|
+
readonly eng: "eng";
|
|
34
|
+
readonly enm: "enm";
|
|
35
|
+
readonly epo: "epo";
|
|
36
|
+
readonly equ: "equ";
|
|
37
|
+
readonly est: "est";
|
|
38
|
+
readonly eus: "eus";
|
|
39
|
+
readonly fao: "fao";
|
|
40
|
+
readonly fas: "fas";
|
|
41
|
+
readonly fil: "fil";
|
|
42
|
+
readonly fin: "fin";
|
|
43
|
+
readonly fra: "fra";
|
|
44
|
+
readonly frm: "frm";
|
|
45
|
+
readonly fry: "fry";
|
|
46
|
+
readonly gla: "gla";
|
|
47
|
+
readonly gle: "gle";
|
|
48
|
+
readonly glg: "glg";
|
|
49
|
+
readonly grc: "grc";
|
|
50
|
+
readonly guj: "guj";
|
|
51
|
+
readonly hat: "hat";
|
|
52
|
+
readonly heb: "heb";
|
|
53
|
+
readonly hin: "hin";
|
|
54
|
+
readonly hrv: "hrv";
|
|
55
|
+
readonly hun: "hun";
|
|
56
|
+
readonly hye: "hye";
|
|
57
|
+
readonly iku: "iku";
|
|
58
|
+
readonly ind: "ind";
|
|
59
|
+
readonly isl: "isl";
|
|
60
|
+
readonly ita: "ita";
|
|
61
|
+
readonly ita_old: "ita_old";
|
|
62
|
+
readonly jav: "jav";
|
|
63
|
+
readonly jpn: "jpn";
|
|
64
|
+
readonly kan: "kan";
|
|
65
|
+
readonly kat: "kat";
|
|
66
|
+
readonly kat_old: "kat_old";
|
|
67
|
+
readonly kaz: "kaz";
|
|
68
|
+
readonly khm: "khm";
|
|
69
|
+
readonly kir: "kir";
|
|
70
|
+
readonly kmr: "kmr";
|
|
71
|
+
readonly kor: "kor";
|
|
72
|
+
readonly kor_vert: "kor_vert";
|
|
73
|
+
readonly kur: "kur";
|
|
74
|
+
readonly lao: "lao";
|
|
75
|
+
readonly lat: "lat";
|
|
76
|
+
readonly lav: "lav";
|
|
77
|
+
readonly lit: "lit";
|
|
78
|
+
readonly ltz: "ltz";
|
|
79
|
+
readonly mal: "mal";
|
|
80
|
+
readonly mar: "mar";
|
|
81
|
+
readonly mkd: "mkd";
|
|
82
|
+
readonly mlt: "mlt";
|
|
83
|
+
readonly mon: "mon";
|
|
84
|
+
readonly mri: "mri";
|
|
85
|
+
readonly msa: "msa";
|
|
86
|
+
readonly mya: "mya";
|
|
87
|
+
readonly nep: "nep";
|
|
88
|
+
readonly nld: "nld";
|
|
89
|
+
readonly nor: "nor";
|
|
90
|
+
readonly oci: "oci";
|
|
91
|
+
readonly ori: "ori";
|
|
92
|
+
readonly osd: "osd";
|
|
93
|
+
readonly pan: "pan";
|
|
94
|
+
readonly pol: "pol";
|
|
95
|
+
readonly por: "por";
|
|
96
|
+
readonly pus: "pus";
|
|
97
|
+
readonly que: "que";
|
|
98
|
+
readonly ron: "ron";
|
|
99
|
+
readonly rus: "rus";
|
|
100
|
+
readonly san: "san";
|
|
101
|
+
readonly sin: "sin";
|
|
102
|
+
readonly slk: "slk";
|
|
103
|
+
readonly slv: "slv";
|
|
104
|
+
readonly snd: "snd";
|
|
105
|
+
readonly spa: "spa";
|
|
106
|
+
readonly spa_old: "spa_old";
|
|
107
|
+
readonly sqi: "sqi";
|
|
108
|
+
readonly srp: "srp";
|
|
109
|
+
readonly srp_latn: "srp_latn";
|
|
110
|
+
readonly sun: "sun";
|
|
111
|
+
readonly swa: "swa";
|
|
112
|
+
readonly swe: "swe";
|
|
113
|
+
readonly syr: "syr";
|
|
114
|
+
readonly tam: "tam";
|
|
115
|
+
readonly tat: "tat";
|
|
116
|
+
readonly tel: "tel";
|
|
117
|
+
readonly tgk: "tgk";
|
|
118
|
+
readonly tha: "tha";
|
|
119
|
+
readonly tir: "tir";
|
|
120
|
+
readonly ton: "ton";
|
|
121
|
+
readonly tur: "tur";
|
|
122
|
+
readonly uig: "uig";
|
|
123
|
+
readonly ukr: "ukr";
|
|
124
|
+
readonly urd: "urd";
|
|
125
|
+
readonly uzb: "uzb";
|
|
126
|
+
readonly uzb_cyrl: "uzb_cyrl";
|
|
127
|
+
readonly vie: "vie";
|
|
128
|
+
readonly yid: "yid";
|
|
129
|
+
readonly yor: "yor";
|
|
130
|
+
};
|
|
131
|
+
export type Language = (typeof Language)[keyof typeof Language];
|
|
132
|
+
/**
|
|
133
|
+
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
|
134
|
+
* only the Tesseract part, only the Cube part or both along with the combiner.
|
|
135
|
+
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
|
|
136
|
+
* @readonly
|
|
137
|
+
* @enum {number}
|
|
138
|
+
*/
|
|
139
|
+
export declare const OcrEngineModes: {
|
|
140
|
+
/**
|
|
141
|
+
* Run Tesseract only - fastest
|
|
142
|
+
* @deprecated
|
|
143
|
+
* @type {number}
|
|
144
|
+
*/
|
|
145
|
+
readonly OEM_TESSERACT_ONLY: 0;
|
|
146
|
+
/**
|
|
147
|
+
* Run just the LSTM line recognizer.
|
|
148
|
+
* @type {nmumber}
|
|
149
|
+
*/
|
|
150
|
+
readonly OEM_LSTM_ONLY: 1;
|
|
151
|
+
/**
|
|
152
|
+
* Run the LSTM recognizer, but allow fallback
|
|
153
|
+
* to Tesseract when things get difficult.
|
|
154
|
+
* @deprecated
|
|
155
|
+
* @type {number}
|
|
156
|
+
*/
|
|
157
|
+
readonly OEM_TESSERACT_LSTM_COMBINED: 2;
|
|
158
|
+
/**
|
|
159
|
+
* Specify this mode when calling init(),
|
|
160
|
+
* to indicate that any of the above modes
|
|
161
|
+
* should be automatically inferred from the
|
|
162
|
+
* variables in the language-specific config,
|
|
163
|
+
* command-line configs, or if not specified
|
|
164
|
+
* in any of the above should be set to the
|
|
165
|
+
* default OEM_TESSERACT_ONLY.
|
|
166
|
+
* @type {number}
|
|
167
|
+
* @default
|
|
168
|
+
*/
|
|
169
|
+
readonly OEM_DEFAULT: 3;
|
|
170
|
+
};
|
|
171
|
+
export type OcrEngineMode = (typeof OcrEngineModes)[keyof typeof OcrEngineModes];
|
|
172
|
+
/**
|
|
173
|
+
* Possible modes for page layout analysis.
|
|
174
|
+
* @readonly
|
|
175
|
+
* @enum {number}
|
|
176
|
+
*/
|
|
177
|
+
export declare const PageSegmentationModes: {
|
|
178
|
+
readonly PSM_OSD_ONLY: 0;
|
|
179
|
+
readonly PSM_AUTO_OSD: 1;
|
|
180
|
+
readonly PSM_AUTO_ONLY: 2;
|
|
181
|
+
readonly PSM_AUTO: 3;
|
|
182
|
+
readonly PSM_SINGLE_COLUMN: 4;
|
|
183
|
+
readonly PSM_SINGLE_BLOCK_VERT_TEXT: 5;
|
|
184
|
+
readonly PSM_SINGLE_BLOCK: 6;
|
|
185
|
+
readonly PSM_SINGLE_LINE: 7;
|
|
186
|
+
readonly PSM_SINGLE_WORD: 8;
|
|
187
|
+
readonly PSM_CIRCLE_WORD: 9;
|
|
188
|
+
readonly PSM_SINGLE_CHAR: 10;
|
|
189
|
+
readonly PSM_SPARSE_TEXT: 11;
|
|
190
|
+
readonly PSM_SPARSE_TEXT_OSD: 12;
|
|
191
|
+
readonly PSM_RAW_LINE: 13;
|
|
192
|
+
};
|
|
193
|
+
export type PageSegmentationMode = (typeof PageSegmentationModes)[keyof typeof PageSegmentationModes];
|
|
194
|
+
export declare const LogLevels: {
|
|
195
|
+
readonly ALL: "-2147483648";
|
|
196
|
+
readonly TRACE: "5000";
|
|
197
|
+
readonly DEBUG: "10000";
|
|
198
|
+
readonly INFO: "20000";
|
|
199
|
+
readonly WARN: "30000";
|
|
200
|
+
readonly ERROR: "40000";
|
|
201
|
+
readonly FATAL: "50000";
|
|
202
|
+
readonly OFF: "2147483647";
|
|
203
|
+
};
|
|
204
|
+
export type LogLevel = (typeof LogLevels)[keyof typeof LogLevels];
|
|
205
|
+
export type ConfigurationVariables = {
|
|
206
|
+
log_level: `${LogLevel}`;
|
|
207
|
+
textord_dotmatrix_gap: `${number}`;
|
|
208
|
+
textord_debug_block: `${0 | 1}`;
|
|
209
|
+
textord_pitch_range: `${number}`;
|
|
210
|
+
textord_words_veto_power: `${number}`;
|
|
211
|
+
textord_tabfind_show_strokewidths: `${0 | 1}`;
|
|
212
|
+
pitsync_linear_version: `${number}`;
|
|
213
|
+
oldbl_holed_losscount: `${number}`;
|
|
214
|
+
textord_skewsmooth_offset: `${number}`;
|
|
215
|
+
textord_skewsmooth_offset2: `${0 | 1}`;
|
|
216
|
+
textord_test_x: `${number}`;
|
|
217
|
+
textord_test_y: `${number}`;
|
|
218
|
+
textord_min_blobs_in_row: `${number}`;
|
|
219
|
+
textord_spline_minblobs: `${number}`;
|
|
220
|
+
textord_spline_medianwin: `${number}`;
|
|
221
|
+
textord_max_blob_overlaps: `${number}`;
|
|
222
|
+
textord_min_xheight: `${number}`;
|
|
223
|
+
textord_lms_line_trials: `${number}`;
|
|
224
|
+
textord_tabfind_show_images: `${0 | 1}`;
|
|
225
|
+
textord_fp_chop_error: `${number}`;
|
|
226
|
+
edges_max_children_per_outline: `${number}`;
|
|
227
|
+
edges_max_children_layers: `${number}`;
|
|
228
|
+
edges_children_per_grandchild: `${number}`;
|
|
229
|
+
edges_children_count_limit: `${number}`;
|
|
230
|
+
edges_min_nonhole: `${number}`;
|
|
231
|
+
edges_patharea_ratio: `${number}`;
|
|
232
|
+
devanagari_split_debuglevel: `${0 | 1}`;
|
|
233
|
+
textord_tabfind_show_partitions: `${0 | 1}`;
|
|
234
|
+
textord_debug_tabfind: `${0 | 1}`;
|
|
235
|
+
textord_debug_bugs: `${0 | 1}`;
|
|
236
|
+
textord_testregion_left: `${number}`;
|
|
237
|
+
textord_testregion_top: `${number}`;
|
|
238
|
+
textord_testregion_right: `${number}`;
|
|
239
|
+
textord_testregion_bottom: `${number}`;
|
|
240
|
+
classify_num_cp_levels: `${number}`;
|
|
241
|
+
editor_image_xpos: `${number}`;
|
|
242
|
+
editor_image_ypos: `${number}`;
|
|
243
|
+
editor_image_menuheight: `${number}`;
|
|
244
|
+
editor_image_blob_bb_color: `${number}`;
|
|
245
|
+
editor_word_ypos: `${number}`;
|
|
246
|
+
editor_word_width: `${number}`;
|
|
247
|
+
curl_timeout: `${0 | 1}`;
|
|
248
|
+
wordrec_display_all_blobs: `${0 | 1}`;
|
|
249
|
+
wordrec_blob_pause: `${0 | 1}`;
|
|
250
|
+
textord_force_make_prop_words: `${0 | 1}`;
|
|
251
|
+
textord_chopper_test: `${0 | 1}`;
|
|
252
|
+
textord_restore_underlines: `${0 | 1}`;
|
|
253
|
+
textord_show_initial_words: `${0 | 1}`;
|
|
254
|
+
textord_blocksall_fixed: `${0 | 1}`;
|
|
255
|
+
textord_blocksall_prop: `${0 | 1}`;
|
|
256
|
+
textord_pitch_scalebigwords: `${0 | 1}`;
|
|
257
|
+
textord_debug_pitch_test: `${0 | 1}`;
|
|
258
|
+
textord_disable_pitch_test: `${0 | 1}`;
|
|
259
|
+
textord_fast_pitch_test: `${0 | 1}`;
|
|
260
|
+
textord_debug_pitch_metric: `${0 | 1}`;
|
|
261
|
+
textord_show_row_cuts: `${0 | 1}`;
|
|
262
|
+
textord_show_page_cuts: `${0 | 1}`;
|
|
263
|
+
textord_blockndoc_fixed: `${0 | 1}`;
|
|
264
|
+
textord_show_tables: `${0 | 1}`;
|
|
265
|
+
textord_tablefind_show_mark: `${0 | 1}`;
|
|
266
|
+
textord_tablefind_show_stats: `${0 | 1}`;
|
|
267
|
+
textord_tablefind_recognize_tables: `${0 | 1}`;
|
|
268
|
+
textord_tabfind_show_initialtabs: `${0 | 1}`;
|
|
269
|
+
textord_tabfind_show_finaltabs: `${0 | 1}`;
|
|
270
|
+
textord_tabfind_only_strokewidths: `${0 | 1}`;
|
|
271
|
+
textord_really_old_xheight: `${0 | 1}`;
|
|
272
|
+
textord_oldbl_debug: `${0 | 1}`;
|
|
273
|
+
textord_debug_baselines: `${0 | 1}`;
|
|
274
|
+
textord_oldbl_paradef: `${0 | 1}`;
|
|
275
|
+
textord_oldbl_split_splines: `${0 | 1}`;
|
|
276
|
+
textord_oldbl_merge_parts: `${0 | 1}`;
|
|
277
|
+
oldbl_corrfix: `${0 | 1}`;
|
|
278
|
+
oldbl_xhfix: `${0 | 1}`;
|
|
279
|
+
textord_ocropus_mode: `${0 | 1}`;
|
|
280
|
+
textord_heavy_nr: `${0 | 1}`;
|
|
281
|
+
textord_show_initial_rows: `${0 | 1}`;
|
|
282
|
+
textord_show_parallel_rows: `${0 | 1}`;
|
|
283
|
+
textord_show_expanded_rows: `${0 | 1}`;
|
|
284
|
+
textord_show_final_rows: `${0 | 1}`;
|
|
285
|
+
textord_show_final_blobs: `${0 | 1}`;
|
|
286
|
+
textord_test_landscape: `${0 | 1}`;
|
|
287
|
+
textord_parallel_baselines: `${0 | 1}`;
|
|
288
|
+
textord_straight_baselines: `${0 | 1}`;
|
|
289
|
+
textord_old_baselines: `${0 | 1}`;
|
|
290
|
+
textord_old_xheight: `${0 | 1}`;
|
|
291
|
+
textord_fix_xheight_bug: `${0 | 1}`;
|
|
292
|
+
textord_fix_makerow_bug: `${0 | 1}`;
|
|
293
|
+
textord_debug_xheights: `${0 | 1}`;
|
|
294
|
+
textord_biased_skewcalc: `${0 | 1}`;
|
|
295
|
+
textord_interpolating_skew: `${0 | 1}`;
|
|
296
|
+
textord_new_initial_xheight: `${0 | 1}`;
|
|
297
|
+
textord_debug_blob: `${0 | 1}`;
|
|
298
|
+
gapmap_debug: `${0 | 1}`;
|
|
299
|
+
gapmap_use_ends: `${0 | 1}`;
|
|
300
|
+
gapmap_no_isolated_quanta: `${0 | 1}`;
|
|
301
|
+
edges_use_new_outline_complexity: `${0 | 1}`;
|
|
302
|
+
edges_debug: `${0 | 1}`;
|
|
303
|
+
edges_children_fix: `${0 | 1}`;
|
|
304
|
+
textord_show_fixed_cuts: `${0 | 1}`;
|
|
305
|
+
devanagari_split_debugimage: `${0 | 1}`;
|
|
306
|
+
textord_tabfind_show_initial_partitions: `${0 | 1}`;
|
|
307
|
+
textord_tabfind_show_reject_blobs: `${0 | 1}`;
|
|
308
|
+
textord_tabfind_show_columns: `${0 | 1}`;
|
|
309
|
+
textord_tabfind_show_blocks: `${0 | 1}`;
|
|
310
|
+
textord_tabfind_find_tables: `${0 | 1}`;
|
|
311
|
+
textord_space_size_is_variable: `${0 | 1}`;
|
|
312
|
+
textord_debug_printable: `${0 | 1}`;
|
|
313
|
+
wordrec_display_splits: `${0 | 1}`;
|
|
314
|
+
poly_debug: `${0 | 1}`;
|
|
315
|
+
poly_wide_objects_better: `${0 | 1}`;
|
|
316
|
+
equationdetect_save_bi_image: `${0 | 1}`;
|
|
317
|
+
equationdetect_save_spt_image: `${0 | 1}`;
|
|
318
|
+
equationdetect_save_seed_image: `${0 | 1}`;
|
|
319
|
+
equationdetect_save_merged_image: `${0 | 1}`;
|
|
320
|
+
debug_file: `${string}`;
|
|
321
|
+
editor_word_name: `${string}`;
|
|
322
|
+
dotproduct: `${string}`;
|
|
323
|
+
document_title: `${string}`;
|
|
324
|
+
curl_cookiefile: `${string}`;
|
|
325
|
+
classify_font_name: `${string}`;
|
|
326
|
+
textord_underline_offset: `${number}`;
|
|
327
|
+
textord_wordstats_smooth_factor: `${number}`;
|
|
328
|
+
textord_words_maxspace: `${number}`;
|
|
329
|
+
textord_words_default_maxspace: `${number}`;
|
|
330
|
+
textord_words_default_minspace: `${number}`;
|
|
331
|
+
textord_words_min_minspace: `${number}`;
|
|
332
|
+
textord_words_default_nonspace: `${number}`;
|
|
333
|
+
textord_words_initial_lower: `${number}`;
|
|
334
|
+
textord_words_initial_upper: `${number}`;
|
|
335
|
+
textord_words_minlarge: `${number}`;
|
|
336
|
+
textord_words_pitchsd_threshold: `${number}`;
|
|
337
|
+
textord_words_def_fixed: `${number}`;
|
|
338
|
+
textord_words_def_prop: `${number}`;
|
|
339
|
+
textord_pitch_rowsimilarity: `${number}`;
|
|
340
|
+
words_initial_lower: `${number}`;
|
|
341
|
+
words_initial_upper: `${number}`;
|
|
342
|
+
words_default_prop_nonspace: `${number}`;
|
|
343
|
+
words_default_fixed_space: `${number}`;
|
|
344
|
+
words_default_fixed_limit: `${number}`;
|
|
345
|
+
textord_words_definite_spread: `${number}`;
|
|
346
|
+
textord_spacesize_ratioprop: `${number}`;
|
|
347
|
+
textord_fpiqr_ratio: `${number}`;
|
|
348
|
+
textord_max_pitch_iqr: `${number}`;
|
|
349
|
+
textord_projection_scale: `${number}`;
|
|
350
|
+
textord_balance_factor: `${0 | 1}`;
|
|
351
|
+
textord_tabvector_vertical_gap_fraction: `${number}`;
|
|
352
|
+
textord_tabvector_vertical_box_ratio: `${number}`;
|
|
353
|
+
pitsync_joined_edge: `${number}`;
|
|
354
|
+
pitsync_offset_freecut_fraction: `${number}`;
|
|
355
|
+
oldbl_xhfract: `${number}`;
|
|
356
|
+
oldbl_dot_error_size: `${number}`;
|
|
357
|
+
textord_oldbl_jumplimit: `${number}`;
|
|
358
|
+
textord_spline_shift_fraction: `${number}`;
|
|
359
|
+
textord_skew_ile: `${number}`;
|
|
360
|
+
textord_skew_lag: `${number}`;
|
|
361
|
+
textord_linespace_iqrlimit: `${number}`;
|
|
362
|
+
textord_width_limit: `${number}`;
|
|
363
|
+
textord_chop_width: `${number}`;
|
|
364
|
+
textord_expansion_factor: `${0 | 1}`;
|
|
365
|
+
textord_overlap_x: `${number}`;
|
|
366
|
+
textord_minxh: `${number}`;
|
|
367
|
+
textord_min_linesize: `${number}`;
|
|
368
|
+
textord_excess_blobsize: `${number}`;
|
|
369
|
+
textord_occupancy_threshold: `${number}`;
|
|
370
|
+
textord_underline_width: `${number}`;
|
|
371
|
+
textord_min_blob_height_fraction: `${number}`;
|
|
372
|
+
textord_xheight_mode_fraction: `${number}`;
|
|
373
|
+
textord_ascheight_mode_fraction: `${number}`;
|
|
374
|
+
textord_descheight_mode_fraction: `${number}`;
|
|
375
|
+
textord_ascx_ratio_min: `${number}`;
|
|
376
|
+
textord_ascx_ratio_max: `${number}`;
|
|
377
|
+
textord_descx_ratio_min: `${number}`;
|
|
378
|
+
textord_descx_ratio_max: `${number}`;
|
|
379
|
+
textord_xheight_error_margin: `${number}`;
|
|
380
|
+
gapmap_big_gaps: `${number}`;
|
|
381
|
+
edges_childarea: `${number}`;
|
|
382
|
+
edges_boxarea: `${number}`;
|
|
383
|
+
textord_underline_threshold: `${number}`;
|
|
384
|
+
classify_pico_feature_length: `${number}`;
|
|
385
|
+
classify_norm_adj_midpoint: `${number}`;
|
|
386
|
+
classify_norm_adj_curl: `${number}`;
|
|
387
|
+
classify_min_slope: `${number}`;
|
|
388
|
+
classify_max_slope: `${number}`;
|
|
389
|
+
classify_cp_angle_pad_loose: `${number}`;
|
|
390
|
+
classify_cp_angle_pad_medium: `${number}`;
|
|
391
|
+
classify_cp_angle_pad_tight: `${number}`;
|
|
392
|
+
classify_cp_end_pad_loose: `${number}`;
|
|
393
|
+
classify_cp_end_pad_medium: `${number}`;
|
|
394
|
+
classify_cp_end_pad_tight: `${number}`;
|
|
395
|
+
classify_cp_side_pad_loose: `${number}`;
|
|
396
|
+
classify_cp_side_pad_medium: `${number}`;
|
|
397
|
+
classify_cp_side_pad_tight: `${number}`;
|
|
398
|
+
classify_pp_angle_pad: `${number}`;
|
|
399
|
+
classify_pp_end_pad: `${number}`;
|
|
400
|
+
classify_pp_side_pad: `${number}`;
|
|
401
|
+
ambigs_debug_level: `${0 | 1}`;
|
|
402
|
+
classify_debug_level: `${0 | 1}`;
|
|
403
|
+
classify_norm_method: `${0 | 1}`;
|
|
404
|
+
matcher_debug_level: `${0 | 1}`;
|
|
405
|
+
matcher_debug_flags: `${0 | 1}`;
|
|
406
|
+
classify_learning_debug_level: `${0 | 1}`;
|
|
407
|
+
matcher_permanent_classes_min: `${0 | 1}`;
|
|
408
|
+
matcher_min_examples_for_prototyping: `${number}`;
|
|
409
|
+
matcher_sufficient_examples_for_prototyping: `${number}`;
|
|
410
|
+
classify_adapt_proto_threshold: `${number}`;
|
|
411
|
+
classify_adapt_feature_threshold: `${number}`;
|
|
412
|
+
classify_class_pruner_threshold: `${number}`;
|
|
413
|
+
classify_class_pruner_multiplier: `${number}`;
|
|
414
|
+
classify_cp_cutoff_strength: `${number}`;
|
|
415
|
+
classify_integer_matcher_multiplier: `${number}`;
|
|
416
|
+
dawg_debug_level: `${0 | 1}`;
|
|
417
|
+
hyphen_debug_level: `${0 | 1}`;
|
|
418
|
+
stopper_smallword_size: `${number}`;
|
|
419
|
+
stopper_debug_level: `${0 | 1}`;
|
|
420
|
+
tessedit_truncate_wordchoice_log: `${number}`;
|
|
421
|
+
max_permuter_attempts: `${number}`;
|
|
422
|
+
repair_unchopped_blobs: `${0 | 1}`;
|
|
423
|
+
chop_debug: `${0 | 1}`;
|
|
424
|
+
chop_split_length: `${number}`;
|
|
425
|
+
chop_same_distance: `${number}`;
|
|
426
|
+
chop_min_outline_points: `${number}`;
|
|
427
|
+
chop_seam_pile_size: `${number}`;
|
|
428
|
+
chop_inside_angle: `${number}`;
|
|
429
|
+
chop_min_outline_area: `${number}`;
|
|
430
|
+
chop_centered_maxwidth: `${number}`;
|
|
431
|
+
chop_x_y_weight: `${number}`;
|
|
432
|
+
wordrec_debug_level: `${0 | 1}`;
|
|
433
|
+
wordrec_max_join_chunks: `${number}`;
|
|
434
|
+
segsearch_debug_level: `${0 | 1}`;
|
|
435
|
+
segsearch_max_pain_points: `${number}`;
|
|
436
|
+
segsearch_max_futile_classifications: `${number}`;
|
|
437
|
+
language_model_debug_level: `${0 | 1}`;
|
|
438
|
+
language_model_ngram_order: `${number}`;
|
|
439
|
+
language_model_viterbi_list_max_num_prunable: `${number}`;
|
|
440
|
+
language_model_viterbi_list_max_size: `${number}`;
|
|
441
|
+
language_model_min_compound_length: `${number}`;
|
|
442
|
+
wordrec_display_segmentations: `${0 | 1}`;
|
|
443
|
+
tessedit_pageseg_mode: `${number}`;
|
|
444
|
+
thresholding_method: `${0 | 1}`;
|
|
445
|
+
tessedit_ocr_engine_mode: `${number}`;
|
|
446
|
+
pageseg_devanagari_split_strategy: `${0 | 1}`;
|
|
447
|
+
ocr_devanagari_split_strategy: `${0 | 1}`;
|
|
448
|
+
bidi_debug: `${0 | 1}`;
|
|
449
|
+
applybox_debug: `${0 | 1}`;
|
|
450
|
+
applybox_page: `${0 | 1}`;
|
|
451
|
+
tessedit_font_id: `${0 | 1}`;
|
|
452
|
+
tessedit_bigram_debug: `${0 | 1}`;
|
|
453
|
+
debug_noise_removal: `${0 | 1}`;
|
|
454
|
+
noise_maxperblob: `${number}`;
|
|
455
|
+
noise_maxperword: `${number}`;
|
|
456
|
+
debug_x_ht_level: `${0 | 1}`;
|
|
457
|
+
quality_min_initial_alphas_reqd: `${number}`;
|
|
458
|
+
tessedit_tess_adaption_mode: `${number}`;
|
|
459
|
+
multilang_debug_level: `${0 | 1}`;
|
|
460
|
+
paragraph_debug_level: `${0 | 1}`;
|
|
461
|
+
tessedit_preserve_min_wd_len: `${number}`;
|
|
462
|
+
crunch_rating_max: `${number}`;
|
|
463
|
+
crunch_pot_indicators: `${0 | 1}`;
|
|
464
|
+
crunch_leave_lc_strings: `${number}`;
|
|
465
|
+
crunch_leave_uc_strings: `${number}`;
|
|
466
|
+
crunch_long_repetitions: `${number}`;
|
|
467
|
+
crunch_debug: `${0 | 1}`;
|
|
468
|
+
fixsp_non_noise_limit: `${0 | 1}`;
|
|
469
|
+
fixsp_done_mode: `${0 | 1}`;
|
|
470
|
+
debug_fix_space_level: `${0 | 1}`;
|
|
471
|
+
x_ht_acceptance_tolerance: `${number}`;
|
|
472
|
+
x_ht_min_change: `${number}`;
|
|
473
|
+
superscript_debug: `${0 | 1}`;
|
|
474
|
+
page_xml_level: `${0 | 1}`;
|
|
475
|
+
jpg_quality: `${number}`;
|
|
476
|
+
user_defined_dpi: `${0 | 1}`;
|
|
477
|
+
min_characters_to_try: `${number}`;
|
|
478
|
+
suspect_level: `${number}`;
|
|
479
|
+
suspect_short_words: `${number}`;
|
|
480
|
+
tessedit_reject_mode: `${0 | 1}`;
|
|
481
|
+
tessedit_image_border: `${number}`;
|
|
482
|
+
min_sane_x_ht_pixels: `${number}`;
|
|
483
|
+
tessedit_page_number: `${number}`;
|
|
484
|
+
tessedit_parallelize: `${0 | 1}`;
|
|
485
|
+
lstm_choice_mode: `${0 | 1}`;
|
|
486
|
+
lstm_choice_iterations: `${number}`;
|
|
487
|
+
tosp_debug_level: `${0 | 1}`;
|
|
488
|
+
tosp_enough_space_samples_for_median: `${number}`;
|
|
489
|
+
tosp_redo_kern_limit: `${number}`;
|
|
490
|
+
tosp_few_samples: `${number}`;
|
|
491
|
+
tosp_short_row: `${number}`;
|
|
492
|
+
tosp_sanity_method: `${0 | 1}`;
|
|
493
|
+
textord_max_noise_size: `${number}`;
|
|
494
|
+
textord_baseline_debug: `${0 | 1}`;
|
|
495
|
+
textord_noise_sizefraction: `${number}`;
|
|
496
|
+
textord_noise_translimit: `${number}`;
|
|
497
|
+
textord_noise_sncount: `${0 | 1}`;
|
|
498
|
+
use_ambigs_for_adaption: `${0 | 1}`;
|
|
499
|
+
allow_blob_division: `${0 | 1}`;
|
|
500
|
+
prioritize_division: `${0 | 1}`;
|
|
501
|
+
classify_enable_learning: `${0 | 1}`;
|
|
502
|
+
tess_cn_matching: `${0 | 1}`;
|
|
503
|
+
tess_bn_matching: `${0 | 1}`;
|
|
504
|
+
classify_enable_adaptive_matcher: `${0 | 1}`;
|
|
505
|
+
classify_use_pre_adapted_templates: `${0 | 1}`;
|
|
506
|
+
classify_save_adapted_templates: `${0 | 1}`;
|
|
507
|
+
classify_enable_adaptive_debugger: `${0 | 1}`;
|
|
508
|
+
classify_nonlinear_norm: `${0 | 1}`;
|
|
509
|
+
disable_character_fragments: `${0 | 1}`;
|
|
510
|
+
classify_debug_character_fragments: `${0 | 1}`;
|
|
511
|
+
matcher_debug_separate_windows: `${0 | 1}`;
|
|
512
|
+
classify_bln_numeric_mode: `${0 | 1}`;
|
|
513
|
+
load_system_dawg: `${0 | 1}`;
|
|
514
|
+
load_freq_dawg: `${0 | 1}`;
|
|
515
|
+
load_unambig_dawg: `${0 | 1}`;
|
|
516
|
+
load_punc_dawg: `${0 | 1}`;
|
|
517
|
+
load_number_dawg: `${0 | 1}`;
|
|
518
|
+
load_bigram_dawg: `${0 | 1}`;
|
|
519
|
+
use_only_first_uft8_step: `${0 | 1}`;
|
|
520
|
+
stopper_no_acceptable_choices: `${0 | 1}`;
|
|
521
|
+
segment_nonalphabetic_script: `${0 | 1}`;
|
|
522
|
+
save_doc_words: `${0 | 1}`;
|
|
523
|
+
merge_fragments_in_matrix: `${0 | 1}`;
|
|
524
|
+
wordrec_enable_assoc: `${0 | 1}`;
|
|
525
|
+
force_word_assoc: `${0 | 1}`;
|
|
526
|
+
chop_enable: `${0 | 1}`;
|
|
527
|
+
chop_vertical_creep: `${0 | 1}`;
|
|
528
|
+
chop_new_seam_pile: `${0 | 1}`;
|
|
529
|
+
assume_fixed_pitch_char_segment: `${0 | 1}`;
|
|
530
|
+
wordrec_skip_no_truth_words: `${0 | 1}`;
|
|
531
|
+
wordrec_debug_blamer: `${0 | 1}`;
|
|
532
|
+
wordrec_run_blamer: `${0 | 1}`;
|
|
533
|
+
save_alt_choices: `${0 | 1}`;
|
|
534
|
+
language_model_ngram_on: `${0 | 1}`;
|
|
535
|
+
language_model_ngram_use_only_first_uft8_step: `${0 | 1}`;
|
|
536
|
+
language_model_ngram_space_delimited_language: `${0 | 1}`;
|
|
537
|
+
language_model_use_sigmoidal_certainty: `${0 | 1}`;
|
|
538
|
+
tessedit_resegment_from_boxes: `${0 | 1}`;
|
|
539
|
+
tessedit_resegment_from_line_boxes: `${0 | 1}`;
|
|
540
|
+
tessedit_train_from_boxes: `${0 | 1}`;
|
|
541
|
+
tessedit_make_boxes_from_boxes: `${0 | 1}`;
|
|
542
|
+
tessedit_train_line_recognizer: `${0 | 1}`;
|
|
543
|
+
tessedit_dump_pageseg_images: `${0 | 1}`;
|
|
544
|
+
tessedit_do_invert: `${0 | 1}`;
|
|
545
|
+
thresholding_debug: `${0 | 1}`;
|
|
546
|
+
tessedit_ambigs_training: `${0 | 1}`;
|
|
547
|
+
tessedit_adaption_debug: `${0 | 1}`;
|
|
548
|
+
applybox_learn_chars_and_char_frags_mode: `${0 | 1}`;
|
|
549
|
+
applybox_learn_ngrams_mode: `${0 | 1}`;
|
|
550
|
+
tessedit_display_outwords: `${0 | 1}`;
|
|
551
|
+
tessedit_dump_choices: `${0 | 1}`;
|
|
552
|
+
tessedit_timing_debug: `${0 | 1}`;
|
|
553
|
+
tessedit_fix_fuzzy_spaces: `${0 | 1}`;
|
|
554
|
+
tessedit_unrej_any_wd: `${0 | 1}`;
|
|
555
|
+
tessedit_fix_hyphens: `${0 | 1}`;
|
|
556
|
+
tessedit_enable_doc_dict: `${0 | 1}`;
|
|
557
|
+
tessedit_debug_fonts: `${0 | 1}`;
|
|
558
|
+
tessedit_debug_block_rejection: `${0 | 1}`;
|
|
559
|
+
tessedit_enable_bigram_correction: `${0 | 1}`;
|
|
560
|
+
tessedit_enable_dict_correction: `${0 | 1}`;
|
|
561
|
+
enable_noise_removal: `${0 | 1}`;
|
|
562
|
+
tessedit_minimal_rej_pass1: `${0 | 1}`;
|
|
563
|
+
tessedit_test_adaption: `${0 | 1}`;
|
|
564
|
+
test_pt: `${0 | 1}`;
|
|
565
|
+
paragraph_text_based: `${0 | 1}`;
|
|
566
|
+
lstm_use_matrix: `${0 | 1}`;
|
|
567
|
+
tessedit_good_quality_unrej: `${0 | 1}`;
|
|
568
|
+
tessedit_use_reject_spaces: `${0 | 1}`;
|
|
569
|
+
tessedit_preserve_blk_rej_perfect_wds: `${0 | 1}`;
|
|
570
|
+
tessedit_preserve_row_rej_perfect_wds: `${0 | 1}`;
|
|
571
|
+
tessedit_dont_blkrej_good_wds: `${0 | 1}`;
|
|
572
|
+
tessedit_dont_rowrej_good_wds: `${0 | 1}`;
|
|
573
|
+
tessedit_row_rej_good_docs: `${0 | 1}`;
|
|
574
|
+
tessedit_reject_bad_qual_wds: `${0 | 1}`;
|
|
575
|
+
tessedit_debug_doc_rejection: `${0 | 1}`;
|
|
576
|
+
tessedit_debug_quality_metrics: `${0 | 1}`;
|
|
577
|
+
bland_unrej: `${0 | 1}`;
|
|
578
|
+
unlv_tilde_crunching: `${0 | 1}`;
|
|
579
|
+
hocr_font_info: `${0 | 1}`;
|
|
580
|
+
hocr_char_boxes: `${0 | 1}`;
|
|
581
|
+
crunch_early_merge_tess_fails: `${0 | 1}`;
|
|
582
|
+
crunch_early_convert_bad_unlv_chs: `${0 | 1}`;
|
|
583
|
+
crunch_terrible_garbage: `${0 | 1}`;
|
|
584
|
+
crunch_leave_ok_strings: `${0 | 1}`;
|
|
585
|
+
crunch_accept_ok: `${0 | 1}`;
|
|
586
|
+
crunch_leave_accept_strings: `${0 | 1}`;
|
|
587
|
+
crunch_include_numerals: `${0 | 1}`;
|
|
588
|
+
tessedit_prefer_joined_punct: `${0 | 1}`;
|
|
589
|
+
tessedit_write_block_separators: `${0 | 1}`;
|
|
590
|
+
tessedit_write_rep_codes: `${0 | 1}`;
|
|
591
|
+
tessedit_write_unlv: `${0 | 1}`;
|
|
592
|
+
tessedit_create_txt: `${0 | 1}`;
|
|
593
|
+
tessedit_create_hocr: `${0 | 1}`;
|
|
594
|
+
tessedit_create_alto: `${0 | 1}`;
|
|
595
|
+
tessedit_create_page_xml: `${0 | 1}`;
|
|
596
|
+
page_xml_polygon: `${0 | 1}`;
|
|
597
|
+
tessedit_create_lstmbox: `${0 | 1}`;
|
|
598
|
+
tessedit_create_tsv: `${0 | 1}`;
|
|
599
|
+
tessedit_create_wordstrbox: `${0 | 1}`;
|
|
600
|
+
tessedit_create_pdf: `${0 | 1}`;
|
|
601
|
+
textonly_pdf: `${0 | 1}`;
|
|
602
|
+
suspect_constrain_1Il: `${0 | 1}`;
|
|
603
|
+
tessedit_minimal_rejection: `${0 | 1}`;
|
|
604
|
+
tessedit_zero_rejection: `${0 | 1}`;
|
|
605
|
+
tessedit_word_for_word: `${0 | 1}`;
|
|
606
|
+
tessedit_zero_kelvin_rejection: `${0 | 1}`;
|
|
607
|
+
tessedit_rejection_debug: `${0 | 1}`;
|
|
608
|
+
tessedit_flip_0O: `${0 | 1}`;
|
|
609
|
+
rej_trust_doc_dawg: `${0 | 1}`;
|
|
610
|
+
rej_1Il_use_dict_word: `${0 | 1}`;
|
|
611
|
+
rej_1Il_trust_permuter_type: `${0 | 1}`;
|
|
612
|
+
rej_use_tess_accepted: `${0 | 1}`;
|
|
613
|
+
rej_use_tess_blanks: `${0 | 1}`;
|
|
614
|
+
rej_use_good_perm: `${0 | 1}`;
|
|
615
|
+
rej_use_sensible_wd: `${0 | 1}`;
|
|
616
|
+
rej_alphas_in_number_perm: `${0 | 1}`;
|
|
617
|
+
tessedit_create_boxfile: `${0 | 1}`;
|
|
618
|
+
tessedit_write_images: `${0 | 1}`;
|
|
619
|
+
interactive_display_mode: `${0 | 1}`;
|
|
620
|
+
tessedit_override_permuter: `${0 | 1}`;
|
|
621
|
+
tessedit_use_primary_params_model: `${0 | 1}`;
|
|
622
|
+
textord_tabfind_show_vlines: `${0 | 1}`;
|
|
623
|
+
textord_use_cjk_fp_model: `${0 | 1}`;
|
|
624
|
+
poly_allow_detailed_fx: `${0 | 1}`;
|
|
625
|
+
tessedit_init_config_only: `${0 | 1}`;
|
|
626
|
+
textord_equation_detect: `${0 | 1}`;
|
|
627
|
+
textord_tabfind_vertical_text: `${0 | 1}`;
|
|
628
|
+
textord_tabfind_force_vertical_text: `${0 | 1}`;
|
|
629
|
+
preserve_interword_spaces: `${0 | 1}`;
|
|
630
|
+
pageseg_apply_music_mask: `${0 | 1}`;
|
|
631
|
+
textord_single_height_mode: `${0 | 1}`;
|
|
632
|
+
tosp_old_to_method: `${0 | 1}`;
|
|
633
|
+
tosp_old_to_constrain_sp_kn: `${0 | 1}`;
|
|
634
|
+
tosp_only_use_prop_rows: `${0 | 1}`;
|
|
635
|
+
tosp_force_wordbreak_on_punct: `${0 | 1}`;
|
|
636
|
+
tosp_use_pre_chopping: `${0 | 1}`;
|
|
637
|
+
tosp_old_to_bug_fix: `${0 | 1}`;
|
|
638
|
+
tosp_block_use_cert_spaces: `${0 | 1}`;
|
|
639
|
+
tosp_row_use_cert_spaces: `${0 | 1}`;
|
|
640
|
+
tosp_narrow_blobs_not_cert: `${0 | 1}`;
|
|
641
|
+
tosp_row_use_cert_spaces1: `${0 | 1}`;
|
|
642
|
+
tosp_recovery_isolated_row_stats: `${0 | 1}`;
|
|
643
|
+
tosp_only_small_gaps_for_kern: `${0 | 1}`;
|
|
644
|
+
tosp_all_flips_fuzzy: `${0 | 1}`;
|
|
645
|
+
tosp_fuzzy_limit_all: `${0 | 1}`;
|
|
646
|
+
tosp_stats_use_xht_gaps: `${0 | 1}`;
|
|
647
|
+
tosp_use_xht_gaps: `${0 | 1}`;
|
|
648
|
+
tosp_only_use_xht_gaps: `${0 | 1}`;
|
|
649
|
+
tosp_rule_9_test_punct: `${0 | 1}`;
|
|
650
|
+
tosp_flip_fuzz_kn_to_sp: `${0 | 1}`;
|
|
651
|
+
tosp_flip_fuzz_sp_to_kn: `${0 | 1}`;
|
|
652
|
+
tosp_improve_thresh: `${0 | 1}`;
|
|
653
|
+
textord_no_rejects: `${0 | 1}`;
|
|
654
|
+
textord_show_blobs: `${0 | 1}`;
|
|
655
|
+
textord_show_boxes: `${0 | 1}`;
|
|
656
|
+
textord_noise_rejwords: `${0 | 1}`;
|
|
657
|
+
textord_noise_rejrows: `${0 | 1}`;
|
|
658
|
+
textord_noise_debug: `${0 | 1}`;
|
|
659
|
+
classify_learn_debug_str: `${string}`;
|
|
660
|
+
user_words_file: `${string}`;
|
|
661
|
+
user_words_suffix: `${string}`;
|
|
662
|
+
user_patterns_file: `${string}`;
|
|
663
|
+
user_patterns_suffix: `${string}`;
|
|
664
|
+
output_ambig_words_file: `${string}`;
|
|
665
|
+
word_to_debug: `${string}`;
|
|
666
|
+
tessedit_char_blacklist: `${string}`;
|
|
667
|
+
tessedit_char_whitelist: `${string}`;
|
|
668
|
+
tessedit_char_unblacklist: `${string}`;
|
|
669
|
+
tessedit_write_params_to_file: `${string}`;
|
|
670
|
+
applybox_exposure_pattern: `${string}`;
|
|
671
|
+
chs_leading_punct: `${string}`;
|
|
672
|
+
chs_trailing_punct1: `${string}`;
|
|
673
|
+
chs_trailing_punct2: `${string}`;
|
|
674
|
+
outlines_odd: `${string}`;
|
|
675
|
+
outlines_2: `${string}`;
|
|
676
|
+
numeric_punctuation: `${string}`;
|
|
677
|
+
unrecognised_char: `${string}`;
|
|
678
|
+
ok_repeated_ch_non_alphanum_wds: `${string}`;
|
|
679
|
+
conflict_set_I_l_1: `${string}`;
|
|
680
|
+
file_type: `${string}`;
|
|
681
|
+
tessedit_load_sublangs: `${string}`;
|
|
682
|
+
page_separator: `${string}`;
|
|
683
|
+
classify_char_norm_range: `${number}`;
|
|
684
|
+
classify_max_rating_ratio: `${number}`;
|
|
685
|
+
classify_max_certainty_margin: `${number}`;
|
|
686
|
+
matcher_good_threshold: `${number}`;
|
|
687
|
+
matcher_reliable_adaptive_result: `${0 | 1}`;
|
|
688
|
+
matcher_perfect_threshold: `${number}`;
|
|
689
|
+
matcher_bad_match_pad: `${number}`;
|
|
690
|
+
matcher_rating_margin: `${number}`;
|
|
691
|
+
matcher_avg_noise_size: `${number}`;
|
|
692
|
+
matcher_clustering_max_angle_delta: `${number}`;
|
|
693
|
+
classify_misfit_junk_penalty: `${0 | 1}`;
|
|
694
|
+
rating_scale: `${number}`;
|
|
695
|
+
tessedit_class_miss_scale: `${number}`;
|
|
696
|
+
classify_adapted_pruning_factor: `${number}`;
|
|
697
|
+
classify_adapted_pruning_threshold: `${number}`;
|
|
698
|
+
classify_character_fragments_garbage_certainty_threshold: `${number}`;
|
|
699
|
+
speckle_large_max_size: `${number}`;
|
|
700
|
+
speckle_rating_penalty: `${number}`;
|
|
701
|
+
xheight_penalty_subscripts: `${number}`;
|
|
702
|
+
xheight_penalty_inconsistent: `${number}`;
|
|
703
|
+
segment_penalty_dict_frequent_word: `${0 | 1}`;
|
|
704
|
+
segment_penalty_dict_case_ok: `${number}`;
|
|
705
|
+
segment_penalty_dict_case_bad: `${number}`;
|
|
706
|
+
segment_penalty_dict_nonword: `${number}`;
|
|
707
|
+
segment_penalty_garbage: `${number}`;
|
|
708
|
+
certainty_scale: `${number}`;
|
|
709
|
+
stopper_nondict_certainty_base: `${number}`;
|
|
710
|
+
stopper_phase2_certainty_rejection_offset: `${0 | 1}`;
|
|
711
|
+
stopper_certainty_per_char: `${number}`;
|
|
712
|
+
stopper_allowable_character_badness: `${number}`;
|
|
713
|
+
doc_dict_pending_threshold: `${0 | 1}`;
|
|
714
|
+
doc_dict_certainty_threshold: `${number}`;
|
|
715
|
+
tessedit_certainty_threshold: `${number}`;
|
|
716
|
+
chop_split_dist_knob: `${number}`;
|
|
717
|
+
chop_overlap_knob: `${number}`;
|
|
718
|
+
chop_center_knob: `${number}`;
|
|
719
|
+
chop_sharpness_knob: `${number}`;
|
|
720
|
+
chop_width_change_knob: `${number}`;
|
|
721
|
+
chop_ok_split: `${number}`;
|
|
722
|
+
chop_good_split: `${number}`;
|
|
723
|
+
segsearch_max_char_wh_ratio: `${number}`;
|
|
724
|
+
language_model_ngram_small_prob: `${number}`;
|
|
725
|
+
language_model_ngram_nonmatch_score: `${number}`;
|
|
726
|
+
language_model_ngram_scale_factor: `${number}`;
|
|
727
|
+
language_model_ngram_rating_factor: `${number}`;
|
|
728
|
+
language_model_penalty_non_freq_dict_word: `${number}`;
|
|
729
|
+
language_model_penalty_non_dict_word: `${number}`;
|
|
730
|
+
language_model_penalty_punc: `${number}`;
|
|
731
|
+
language_model_penalty_case: `${number}`;
|
|
732
|
+
language_model_penalty_script: `${number}`;
|
|
733
|
+
language_model_penalty_chartype: `${number}`;
|
|
734
|
+
language_model_penalty_font: `${0 | 1}`;
|
|
735
|
+
language_model_penalty_spacing: `${number}`;
|
|
736
|
+
language_model_penalty_increment: `${number}`;
|
|
737
|
+
invert_threshold: `${number}`;
|
|
738
|
+
thresholding_window_size: `${number}`;
|
|
739
|
+
thresholding_kfactor: `${number}`;
|
|
740
|
+
thresholding_tile_size: `${number}`;
|
|
741
|
+
thresholding_smooth_kernel_size: `${0 | 1}`;
|
|
742
|
+
thresholding_score_fraction: `${number}`;
|
|
743
|
+
noise_cert_basechar: `${number}`;
|
|
744
|
+
noise_cert_disjoint: `${number}`;
|
|
745
|
+
noise_cert_punc: `${number}`;
|
|
746
|
+
noise_cert_factor: `${number}`;
|
|
747
|
+
quality_rej_pc: `${number}`;
|
|
748
|
+
quality_blob_pc: `${0 | 1}`;
|
|
749
|
+
quality_outline_pc: `${0 | 1}`;
|
|
750
|
+
quality_char_pc: `${number}`;
|
|
751
|
+
test_pt_x: `${number}`;
|
|
752
|
+
test_pt_y: `${number}`;
|
|
753
|
+
tessedit_reject_doc_percent: `${number}`;
|
|
754
|
+
tessedit_reject_block_percent: `${number}`;
|
|
755
|
+
tessedit_reject_row_percent: `${number}`;
|
|
756
|
+
tessedit_whole_wd_rej_row_percent: `${number}`;
|
|
757
|
+
tessedit_good_doc_still_rowrej_wd: `${number}`;
|
|
758
|
+
quality_rowrej_pc: `${number}`;
|
|
759
|
+
crunch_terrible_rating: `${number}`;
|
|
760
|
+
crunch_poor_garbage_cert: `${number}`;
|
|
761
|
+
crunch_poor_garbage_rate: `${number}`;
|
|
762
|
+
crunch_pot_poor_rate: `${number}`;
|
|
763
|
+
crunch_pot_poor_cert: `${number}`;
|
|
764
|
+
crunch_del_rating: `${number}`;
|
|
765
|
+
crunch_del_cert: `${number}`;
|
|
766
|
+
crunch_del_min_ht: `${number}`;
|
|
767
|
+
crunch_del_max_ht: `${number}`;
|
|
768
|
+
crunch_del_min_width: `${number}`;
|
|
769
|
+
crunch_del_high_word: `${number}`;
|
|
770
|
+
crunch_del_low_word: `${number}`;
|
|
771
|
+
crunch_small_outlines_size: `${number}`;
|
|
772
|
+
fixsp_small_outlines_size: `${number}`;
|
|
773
|
+
superscript_worse_certainty: `${number}`;
|
|
774
|
+
superscript_bettered_certainty: `${number}`;
|
|
775
|
+
superscript_scaledown_ratio: `${number}`;
|
|
776
|
+
subscript_max_y_top: `${number}`;
|
|
777
|
+
superscript_min_y_bottom: `${number}`;
|
|
778
|
+
suspect_rating_per_ch: `${number}`;
|
|
779
|
+
suspect_accept_rating: `${number}`;
|
|
780
|
+
tessedit_lower_flip_hyphen: `${number}`;
|
|
781
|
+
tessedit_upper_flip_hyphen: `${number}`;
|
|
782
|
+
rej_whole_of_mostly_reject_word_fract: `${number}`;
|
|
783
|
+
min_orientation_margin: `${number}`;
|
|
784
|
+
textord_tabfind_vertical_text_ratio: `${number}`;
|
|
785
|
+
textord_tabfind_aligned_gap_fraction: `${number}`;
|
|
786
|
+
lstm_rating_coefficient: `${number}`;
|
|
787
|
+
tosp_old_sp_kn_th_factor: `${number}`;
|
|
788
|
+
tosp_threshold_bias1: `${0 | 1}`;
|
|
789
|
+
tosp_threshold_bias2: `${0 | 1}`;
|
|
790
|
+
tosp_narrow_fraction: `${number}`;
|
|
791
|
+
tosp_narrow_aspect_ratio: `${number}`;
|
|
792
|
+
tosp_wide_fraction: `${number}`;
|
|
793
|
+
tosp_wide_aspect_ratio: `${0 | 1}`;
|
|
794
|
+
tosp_fuzzy_space_factor: `${number}`;
|
|
795
|
+
tosp_fuzzy_space_factor1: `${number}`;
|
|
796
|
+
tosp_fuzzy_space_factor2: `${number}`;
|
|
797
|
+
tosp_gap_factor: `${number}`;
|
|
798
|
+
tosp_kern_gap_factor1: `${number}`;
|
|
799
|
+
tosp_kern_gap_factor2: `${number}`;
|
|
800
|
+
tosp_kern_gap_factor3: `${number}`;
|
|
801
|
+
tosp_ignore_big_gaps: `${number}`;
|
|
802
|
+
tosp_ignore_very_big_gaps: `${number}`;
|
|
803
|
+
tosp_rep_space: `${number}`;
|
|
804
|
+
tosp_enough_small_gaps: `${number}`;
|
|
805
|
+
tosp_table_kn_sp_ratio: `${number}`;
|
|
806
|
+
tosp_table_xht_sp_ratio: `${number}`;
|
|
807
|
+
tosp_table_fuzzy_kn_sp_ratio: `${number}`;
|
|
808
|
+
tosp_fuzzy_kn_fraction: `${number}`;
|
|
809
|
+
tosp_fuzzy_sp_fraction: `${number}`;
|
|
810
|
+
tosp_min_sane_kn_sp: `${number}`;
|
|
811
|
+
tosp_init_guess_kn_mult: `${number}`;
|
|
812
|
+
tosp_init_guess_xht_mult: `${number}`;
|
|
813
|
+
tosp_max_sane_kn_thresh: `${number}`;
|
|
814
|
+
tosp_flip_caution: `${0 | 1}`;
|
|
815
|
+
tosp_large_kerning: `${number}`;
|
|
816
|
+
tosp_dont_fool_with_small_kerns: `${number}`;
|
|
817
|
+
tosp_near_lh_edge: `${0 | 1}`;
|
|
818
|
+
tosp_silly_kn_sp_gap: `${number}`;
|
|
819
|
+
tosp_pass_wide_fuzz_sp_to_context: `${number}`;
|
|
820
|
+
textord_noise_area_ratio: `${number}`;
|
|
821
|
+
textord_initialx_ile: `${number}`;
|
|
822
|
+
textord_initialasc_ile: `${number}`;
|
|
823
|
+
textord_noise_sizelimit: `${number}`;
|
|
824
|
+
textord_noise_normratio: `${number}`;
|
|
825
|
+
textord_noise_syfract: `${number}`;
|
|
826
|
+
textord_noise_sxfract: `${number}`;
|
|
827
|
+
textord_noise_hfract: `${number}`;
|
|
828
|
+
textord_noise_rowratio: `${number}`;
|
|
829
|
+
textord_blshift_maxshift: `${0 | 1}`;
|
|
830
|
+
textord_blshift_xfraction: `${number}`;
|
|
831
|
+
};
|
|
832
|
+
type InitOnlyConfigurationVariableNames = "ambigs_debug_level" | "language_model_ngram_on" | "language_model_use_sigmoidal_certainty" | "load_bigram_dawg" | "load_freq_dawg" | "load_number_dawg" | "load_punc_dawg" | "load_system_dawg" | "load_unambig_dawg" | "tessedit_init_config_only" | "tessedit_ocr_engine_mode" | "user_patterns_suffix" | "user_words_suffix";
|
|
833
|
+
export type InitOnlyConfigurationVariables = Pick<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
|
|
834
|
+
export type SetVariableConfigVariables = Omit<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
|
|
835
|
+
/**
|
|
836
|
+
* Tesseract init options
|
|
837
|
+
*/
|
|
838
|
+
export interface TesseractInitOptions {
|
|
839
|
+
/**
|
|
840
|
+
* Its generally safer to use as few languages as possible.
|
|
841
|
+
* The more languages Tesseract needs to load the longer it takes to recognize a image.
|
|
842
|
+
* @public
|
|
843
|
+
*/
|
|
844
|
+
lang?: Language[];
|
|
845
|
+
/**
|
|
846
|
+
* OCR Engine Modes
|
|
847
|
+
* The engine mode cannot be changed after creating the instance
|
|
848
|
+
* If another mode is needed, its advised to create a new instance.
|
|
849
|
+
* @throws {Error} Will throw an error when oem mode is below 0 or over 3
|
|
850
|
+
*/
|
|
851
|
+
oem?: OcrEngineMode;
|
|
852
|
+
setOnlyNonDebugParams?: boolean;
|
|
853
|
+
configs?: Array<string>;
|
|
854
|
+
vars?: Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>;
|
|
855
|
+
}
|
|
856
|
+
export interface TesseractSetRectangleOptions {
|
|
857
|
+
top: number;
|
|
858
|
+
left: number;
|
|
859
|
+
width: number;
|
|
860
|
+
height: number;
|
|
861
|
+
}
|
|
862
|
+
export interface ProgressChangedInfo {
|
|
863
|
+
/**
|
|
864
|
+
* Chars in this buffer
|
|
865
|
+
*/
|
|
866
|
+
progress: number;
|
|
867
|
+
/**
|
|
868
|
+
* Percent complete increasing (0-100)
|
|
869
|
+
*/
|
|
870
|
+
percent: number;
|
|
871
|
+
/**
|
|
872
|
+
* States if the worker is still alive
|
|
873
|
+
*/
|
|
874
|
+
ocrAlive: number;
|
|
875
|
+
/**
|
|
876
|
+
* top coordinate of the bbox of the current element that tesseract is processing
|
|
877
|
+
*/
|
|
878
|
+
top: number;
|
|
879
|
+
/**
|
|
880
|
+
* right coordinate of the bbox of the current element that tesseract is processing
|
|
881
|
+
*/
|
|
882
|
+
right: number;
|
|
883
|
+
/**
|
|
884
|
+
* bottom coordinate of the bbox of the current element that tesseract is processing
|
|
885
|
+
*/
|
|
886
|
+
bottom: number;
|
|
887
|
+
/**
|
|
888
|
+
* left coordinate of the bbox of the current element that tesseract is processing
|
|
889
|
+
*/
|
|
890
|
+
left: number;
|
|
891
|
+
}
|
|
892
|
+
export interface DetectOrientationScriptResult {
|
|
893
|
+
/**
|
|
894
|
+
* Orientation of the source image in degrees
|
|
895
|
+
* Orientation refers to the way the source is rotated, **not** how the text is
|
|
896
|
+
* aligned. It ranges from 0° to 360° degrees.
|
|
897
|
+
* @type {number}
|
|
898
|
+
*/
|
|
899
|
+
orientationDegrees: number;
|
|
900
|
+
/**
|
|
901
|
+
* The confidence of tesseract for the orientation
|
|
902
|
+
* @type {number}
|
|
903
|
+
*/
|
|
904
|
+
orientationConfidence: number;
|
|
905
|
+
/**
|
|
906
|
+
* The name of the script that is used in the source image
|
|
907
|
+
* @type {string}
|
|
908
|
+
*/
|
|
909
|
+
scriptName: string;
|
|
910
|
+
/**
|
|
911
|
+
* The confidence of tesseract about the detected script of the source image
|
|
912
|
+
* @type {number}
|
|
913
|
+
*/
|
|
914
|
+
scriptConfidence: number;
|
|
915
|
+
}
|
|
916
|
+
export interface TesseractInstance {
|
|
917
|
+
/**
|
|
918
|
+
* Initialize the engine with the given options.
|
|
919
|
+
* @param {TesseractInitOptions} options Initialization options (languages, datapath, engine mode, etc.).
|
|
920
|
+
* @returns {Promise<void>}
|
|
921
|
+
*/
|
|
922
|
+
init(options: TesseractInitOptions): Promise<void>;
|
|
923
|
+
/**
|
|
924
|
+
* Initialize the engine for page analysis only.
|
|
925
|
+
* @returns {Promise<void>}
|
|
926
|
+
*/
|
|
927
|
+
initForAnalysePage(): Promise<void>;
|
|
928
|
+
/**
|
|
929
|
+
* Run page layout analysis.
|
|
930
|
+
* @param {boolean} mergeSimilarWords Whether to merge similar words during analysis.
|
|
931
|
+
* @returns {Promise<void>}
|
|
932
|
+
*/
|
|
933
|
+
analysePage(mergeSimilarWords: boolean): Promise<void>;
|
|
934
|
+
/**
|
|
935
|
+
* Set the page segmentation mode (PSM).
|
|
936
|
+
* @param {PageSegmentationMode} psm Page segmentation mode.
|
|
937
|
+
* @returns {Promise<void>}
|
|
938
|
+
*/
|
|
939
|
+
setPageMode(psm: PageSegmentationMode): Promise<void>;
|
|
940
|
+
/**
|
|
941
|
+
* Set a configuration variable.
|
|
942
|
+
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
943
|
+
* @param {SetVariableConfigVariables[keyof SetVariableConfigVariables]} value Variable value.
|
|
944
|
+
* @returns Returns `false` if the lookup failed.
|
|
945
|
+
*/
|
|
946
|
+
setVariable(name: keyof SetVariableConfigVariables, value: SetVariableConfigVariables[keyof SetVariableConfigVariables]): Promise<boolean>;
|
|
947
|
+
/**
|
|
948
|
+
* Get a configuration variable as integer.
|
|
949
|
+
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
950
|
+
* @returns {Promise<number>} Returns the value of the variable.
|
|
951
|
+
*/
|
|
952
|
+
getIntVariable(name: keyof SetVariableConfigVariables): Promise<number>;
|
|
953
|
+
/**
|
|
954
|
+
* Get a configuration variable as boolean (0/1).
|
|
955
|
+
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
956
|
+
* @returns {Promise<number>} Returns the value of the variable.
|
|
957
|
+
*/
|
|
958
|
+
getBoolVariable(name: keyof SetVariableConfigVariables): Promise<number>;
|
|
959
|
+
/**
|
|
960
|
+
* Get a configuration variable as double.
|
|
961
|
+
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
962
|
+
* @returns {Promise<number>} Returns the value of the variable.
|
|
963
|
+
*/
|
|
964
|
+
getDoubleVariable(name: keyof SetVariableConfigVariables): Promise<number>;
|
|
965
|
+
/**
|
|
966
|
+
* Get a configuration variable as string.
|
|
967
|
+
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
968
|
+
* @returns {Promise<string>} Returns the value of the variable.
|
|
969
|
+
*/
|
|
970
|
+
getStringVariable(name: keyof SetVariableConfigVariables): Promise<string>;
|
|
971
|
+
/**
|
|
972
|
+
* Set the image to be recognized.
|
|
973
|
+
* @param {Buffer<ArrayBuffer>} buffer Image data buffer.
|
|
974
|
+
* @returns {Promise<void>}
|
|
975
|
+
*/
|
|
976
|
+
setImage(buffer: Buffer<ArrayBuffer>): Promise<void>;
|
|
977
|
+
/**
|
|
978
|
+
* Restrict recognition to a rectangle.
|
|
979
|
+
* @param {TesseractSetRectangleOptions} options Rectangle options.
|
|
980
|
+
* @returns {Promise<void>}
|
|
981
|
+
*/
|
|
982
|
+
setRectangle(options: TesseractSetRectangleOptions): Promise<void>;
|
|
983
|
+
/**
|
|
984
|
+
* Set the source resolution in PPI.
|
|
985
|
+
* @param {number} ppi Source resolution in PPI.
|
|
986
|
+
* @returns {Promise<void>}
|
|
987
|
+
*/
|
|
988
|
+
setSourceResolution(ppi: number): Promise<void>;
|
|
989
|
+
/**
|
|
990
|
+
* @throws {Error} Will throw an error if the parameter at index 0 is not a function
|
|
991
|
+
* @param {(info: ProgressChangedInfo) => void} progressCallback Callback will be called to inform the user about progress changes
|
|
992
|
+
* @returns {Promise<void>}
|
|
993
|
+
*/
|
|
994
|
+
recognize(progressCallback: (info: ProgressChangedInfo) => void): Promise<void>;
|
|
995
|
+
/**
|
|
996
|
+
* Detect orientation and script (OSD).
|
|
997
|
+
* @returns {Promise<DetectOrientationScriptResult>}
|
|
998
|
+
*/
|
|
999
|
+
detectOrientationScript(): Promise<DetectOrientationScriptResult>;
|
|
1000
|
+
/**
|
|
1001
|
+
* Get mean text confidence.
|
|
1002
|
+
* @returns {Promise<number>} Returns the mean text confidence on resolve
|
|
1003
|
+
*/
|
|
1004
|
+
meanTextConf(): Promise<number>;
|
|
1005
|
+
/**
|
|
1006
|
+
* Get recognized text as UTF-8.
|
|
1007
|
+
* @returns {Promise<string>} Returns the recognized test as utf-8 on resolve
|
|
1008
|
+
*/
|
|
1009
|
+
getUTF8Text(): Promise<string>;
|
|
1010
|
+
/**
|
|
1011
|
+
* Get hOCR output.
|
|
1012
|
+
* @param {Function} progressCallback Optional progress callback.
|
|
1013
|
+
* @param {number} pageNumber Optional page number (0-based).
|
|
1014
|
+
* @returns {Promise<string>} Returns the `hOCR` upon resolve
|
|
1015
|
+
*/
|
|
1016
|
+
getHOCRText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
|
|
1017
|
+
/**
|
|
1018
|
+
* Get TSV output.
|
|
1019
|
+
* @returns {Promise<string>} Returns the `tsv` upon resolve
|
|
1020
|
+
*/
|
|
1021
|
+
getTSVText(): Promise<string>;
|
|
1022
|
+
/**
|
|
1023
|
+
* Get UNLV output.
|
|
1024
|
+
* @returns {Promise<string>} Returns the `unlv` upon resolve
|
|
1025
|
+
*/
|
|
1026
|
+
getUNLVText(): Promise<string>;
|
|
1027
|
+
/**
|
|
1028
|
+
* Get ALTO XML output.
|
|
1029
|
+
* @param {Function} progressCallback Optional progress callback.
|
|
1030
|
+
* @param {number} pageNumber Optional page number (0-based).
|
|
1031
|
+
* @returns {Promise<string>} Returns the `alto` upon resolve
|
|
1032
|
+
*/
|
|
1033
|
+
getALTOText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
|
|
1034
|
+
/**
|
|
1035
|
+
* Get languages used at initialization.
|
|
1036
|
+
* @returns {Promise<Language>} Returns the languages used when init was called
|
|
1037
|
+
*/
|
|
1038
|
+
getInitLanguages(): Promise<Language>;
|
|
1039
|
+
/**
|
|
1040
|
+
* Get languages currently loaded.
|
|
1041
|
+
* @returns {Promise<Language[]>} Returns the languages that were actually loaded by `init`
|
|
1042
|
+
*/
|
|
1043
|
+
getLoadedLanguages(): Promise<Language[]>;
|
|
1044
|
+
/**
|
|
1045
|
+
* Get available languages from tessdata.
|
|
1046
|
+
* NOTE: this only will return anything after `init` was called before with a valid selection of languages
|
|
1047
|
+
* @returns {Promise<Language[]>} Returns the languages that are available to tesseract.
|
|
1048
|
+
*/
|
|
1049
|
+
getAvailableLanguages(): Promise<Language[]>;
|
|
1050
|
+
/**
|
|
1051
|
+
* Clear internal recognition results/state.
|
|
1052
|
+
* @returns {Promise<void>}
|
|
1053
|
+
*/
|
|
1054
|
+
clear(): Promise<void>;
|
|
1055
|
+
/**
|
|
1056
|
+
* Release native resources and destroy the instance.
|
|
1057
|
+
* @returns {Promise<void>}
|
|
1058
|
+
*/
|
|
1059
|
+
end(): Promise<void>;
|
|
1060
|
+
}
|
|
1061
|
+
export type NativeTesseract = TesseractInstance;
|
|
1062
|
+
export type TesseractConstructor = new () => TesseractInstance;
|
|
1063
|
+
declare const NativeTesseract: TesseractConstructor;
|
|
1064
|
+
declare class Tesseract extends NativeTesseract {
|
|
1065
|
+
constructor();
|
|
1066
|
+
init(options: TesseractInitOptions): Promise<void>;
|
|
1067
|
+
}
|
|
1068
|
+
export { Tesseract, NativeTesseract };
|
|
1069
|
+
export default Tesseract;
|