@luii/node-tesseract-ocr 2.1.0 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -3
- package/README.md +461 -104
- package/binding-options.js +4 -0
- package/dist/cjs/index.cjs +21 -9
- package/dist/cjs/index.d.ts +4 -926
- package/dist/cjs/types.d.ts +1272 -0
- package/dist/cjs/types.js +17 -0
- package/dist/cjs/utils.js +15 -0
- package/dist/esm/index.d.ts +4 -926
- package/dist/esm/index.mjs +16 -9
- package/dist/esm/types.d.ts +1272 -0
- package/dist/esm/types.js +16 -0
- package/dist/esm/utils.js +15 -0
- package/package.json +6 -3
- package/prebuilds/node-tesseract-ocr-darwin-arm64/node-napi-v10.node +0 -0
- package/prebuilds/node-tesseract-ocr-linux-x64/node-napi-v10.node +0 -0
- package/src/commands.hpp +657 -88
- package/src/tesseract_wrapper.cpp +630 -187
- package/src/tesseract_wrapper.hpp +27 -2
- package/src/worker_thread.cpp +146 -2
- package/src/worker_thread.hpp +4 -1
|
@@ -0,0 +1,1272 @@
|
|
|
1
|
+
import type { Language, LogLevel, OcrEngineMode, PageSegmentationMode } from "./index";
|
|
2
|
+
export type ConfigurationVariables = {
|
|
3
|
+
log_level: `${LogLevel}`;
|
|
4
|
+
textord_dotmatrix_gap: `${number}`;
|
|
5
|
+
textord_debug_block: `${0 | 1}`;
|
|
6
|
+
textord_pitch_range: `${number}`;
|
|
7
|
+
textord_words_veto_power: `${number}`;
|
|
8
|
+
textord_tabfind_show_strokewidths: `${0 | 1}`;
|
|
9
|
+
pitsync_linear_version: `${number}`;
|
|
10
|
+
oldbl_holed_losscount: `${number}`;
|
|
11
|
+
textord_skewsmooth_offset: `${number}`;
|
|
12
|
+
textord_skewsmooth_offset2: `${0 | 1}`;
|
|
13
|
+
textord_test_x: `${number}`;
|
|
14
|
+
textord_test_y: `${number}`;
|
|
15
|
+
textord_min_blobs_in_row: `${number}`;
|
|
16
|
+
textord_spline_minblobs: `${number}`;
|
|
17
|
+
textord_spline_medianwin: `${number}`;
|
|
18
|
+
textord_max_blob_overlaps: `${number}`;
|
|
19
|
+
textord_min_xheight: `${number}`;
|
|
20
|
+
textord_lms_line_trials: `${number}`;
|
|
21
|
+
textord_tabfind_show_images: `${0 | 1}`;
|
|
22
|
+
textord_fp_chop_error: `${number}`;
|
|
23
|
+
edges_max_children_per_outline: `${number}`;
|
|
24
|
+
edges_max_children_layers: `${number}`;
|
|
25
|
+
edges_children_per_grandchild: `${number}`;
|
|
26
|
+
edges_children_count_limit: `${number}`;
|
|
27
|
+
edges_min_nonhole: `${number}`;
|
|
28
|
+
edges_patharea_ratio: `${number}`;
|
|
29
|
+
devanagari_split_debuglevel: `${0 | 1}`;
|
|
30
|
+
textord_tabfind_show_partitions: `${0 | 1}`;
|
|
31
|
+
textord_debug_tabfind: `${0 | 1}`;
|
|
32
|
+
textord_debug_bugs: `${0 | 1}`;
|
|
33
|
+
textord_testregion_left: `${number}`;
|
|
34
|
+
textord_testregion_top: `${number}`;
|
|
35
|
+
textord_testregion_right: `${number}`;
|
|
36
|
+
textord_testregion_bottom: `${number}`;
|
|
37
|
+
classify_num_cp_levels: `${number}`;
|
|
38
|
+
editor_image_xpos: `${number}`;
|
|
39
|
+
editor_image_ypos: `${number}`;
|
|
40
|
+
editor_image_menuheight: `${number}`;
|
|
41
|
+
editor_image_blob_bb_color: `${number}`;
|
|
42
|
+
editor_word_ypos: `${number}`;
|
|
43
|
+
editor_word_width: `${number}`;
|
|
44
|
+
curl_timeout: `${0 | 1}`;
|
|
45
|
+
wordrec_display_all_blobs: `${0 | 1}`;
|
|
46
|
+
wordrec_blob_pause: `${0 | 1}`;
|
|
47
|
+
textord_force_make_prop_words: `${0 | 1}`;
|
|
48
|
+
textord_chopper_test: `${0 | 1}`;
|
|
49
|
+
textord_restore_underlines: `${0 | 1}`;
|
|
50
|
+
textord_show_initial_words: `${0 | 1}`;
|
|
51
|
+
textord_blocksall_fixed: `${0 | 1}`;
|
|
52
|
+
textord_blocksall_prop: `${0 | 1}`;
|
|
53
|
+
textord_pitch_scalebigwords: `${0 | 1}`;
|
|
54
|
+
textord_debug_pitch_test: `${0 | 1}`;
|
|
55
|
+
textord_disable_pitch_test: `${0 | 1}`;
|
|
56
|
+
textord_fast_pitch_test: `${0 | 1}`;
|
|
57
|
+
textord_debug_pitch_metric: `${0 | 1}`;
|
|
58
|
+
textord_show_row_cuts: `${0 | 1}`;
|
|
59
|
+
textord_show_page_cuts: `${0 | 1}`;
|
|
60
|
+
textord_blockndoc_fixed: `${0 | 1}`;
|
|
61
|
+
textord_show_tables: `${0 | 1}`;
|
|
62
|
+
textord_tablefind_show_mark: `${0 | 1}`;
|
|
63
|
+
textord_tablefind_show_stats: `${0 | 1}`;
|
|
64
|
+
textord_tablefind_recognize_tables: `${0 | 1}`;
|
|
65
|
+
textord_tabfind_show_initialtabs: `${0 | 1}`;
|
|
66
|
+
textord_tabfind_show_finaltabs: `${0 | 1}`;
|
|
67
|
+
textord_tabfind_only_strokewidths: `${0 | 1}`;
|
|
68
|
+
textord_really_old_xheight: `${0 | 1}`;
|
|
69
|
+
textord_oldbl_debug: `${0 | 1}`;
|
|
70
|
+
textord_debug_baselines: `${0 | 1}`;
|
|
71
|
+
textord_oldbl_paradef: `${0 | 1}`;
|
|
72
|
+
textord_oldbl_split_splines: `${0 | 1}`;
|
|
73
|
+
textord_oldbl_merge_parts: `${0 | 1}`;
|
|
74
|
+
oldbl_corrfix: `${0 | 1}`;
|
|
75
|
+
oldbl_xhfix: `${0 | 1}`;
|
|
76
|
+
textord_ocropus_mode: `${0 | 1}`;
|
|
77
|
+
textord_heavy_nr: `${0 | 1}`;
|
|
78
|
+
textord_show_initial_rows: `${0 | 1}`;
|
|
79
|
+
textord_show_parallel_rows: `${0 | 1}`;
|
|
80
|
+
textord_show_expanded_rows: `${0 | 1}`;
|
|
81
|
+
textord_show_final_rows: `${0 | 1}`;
|
|
82
|
+
textord_show_final_blobs: `${0 | 1}`;
|
|
83
|
+
textord_test_landscape: `${0 | 1}`;
|
|
84
|
+
textord_parallel_baselines: `${0 | 1}`;
|
|
85
|
+
textord_straight_baselines: `${0 | 1}`;
|
|
86
|
+
textord_old_baselines: `${0 | 1}`;
|
|
87
|
+
textord_old_xheight: `${0 | 1}`;
|
|
88
|
+
textord_fix_xheight_bug: `${0 | 1}`;
|
|
89
|
+
textord_fix_makerow_bug: `${0 | 1}`;
|
|
90
|
+
textord_debug_xheights: `${0 | 1}`;
|
|
91
|
+
textord_biased_skewcalc: `${0 | 1}`;
|
|
92
|
+
textord_interpolating_skew: `${0 | 1}`;
|
|
93
|
+
textord_new_initial_xheight: `${0 | 1}`;
|
|
94
|
+
textord_debug_blob: `${0 | 1}`;
|
|
95
|
+
gapmap_debug: `${0 | 1}`;
|
|
96
|
+
gapmap_use_ends: `${0 | 1}`;
|
|
97
|
+
gapmap_no_isolated_quanta: `${0 | 1}`;
|
|
98
|
+
edges_use_new_outline_complexity: `${0 | 1}`;
|
|
99
|
+
edges_debug: `${0 | 1}`;
|
|
100
|
+
edges_children_fix: `${0 | 1}`;
|
|
101
|
+
textord_show_fixed_cuts: `${0 | 1}`;
|
|
102
|
+
devanagari_split_debugimage: `${0 | 1}`;
|
|
103
|
+
textord_tabfind_show_initial_partitions: `${0 | 1}`;
|
|
104
|
+
textord_tabfind_show_reject_blobs: `${0 | 1}`;
|
|
105
|
+
textord_tabfind_show_columns: `${0 | 1}`;
|
|
106
|
+
textord_tabfind_show_blocks: `${0 | 1}`;
|
|
107
|
+
textord_tabfind_find_tables: `${0 | 1}`;
|
|
108
|
+
textord_space_size_is_variable: `${0 | 1}`;
|
|
109
|
+
textord_debug_printable: `${0 | 1}`;
|
|
110
|
+
wordrec_display_splits: `${0 | 1}`;
|
|
111
|
+
poly_debug: `${0 | 1}`;
|
|
112
|
+
poly_wide_objects_better: `${0 | 1}`;
|
|
113
|
+
equationdetect_save_bi_image: `${0 | 1}`;
|
|
114
|
+
equationdetect_save_spt_image: `${0 | 1}`;
|
|
115
|
+
equationdetect_save_seed_image: `${0 | 1}`;
|
|
116
|
+
equationdetect_save_merged_image: `${0 | 1}`;
|
|
117
|
+
debug_file: `${string}`;
|
|
118
|
+
editor_word_name: `${string}`;
|
|
119
|
+
dotproduct: `${string}`;
|
|
120
|
+
document_title: `${string}`;
|
|
121
|
+
curl_cookiefile: `${string}`;
|
|
122
|
+
classify_font_name: `${string}`;
|
|
123
|
+
textord_underline_offset: `${number}`;
|
|
124
|
+
textord_wordstats_smooth_factor: `${number}`;
|
|
125
|
+
textord_words_maxspace: `${number}`;
|
|
126
|
+
textord_words_default_maxspace: `${number}`;
|
|
127
|
+
textord_words_default_minspace: `${number}`;
|
|
128
|
+
textord_words_min_minspace: `${number}`;
|
|
129
|
+
textord_words_default_nonspace: `${number}`;
|
|
130
|
+
textord_words_initial_lower: `${number}`;
|
|
131
|
+
textord_words_initial_upper: `${number}`;
|
|
132
|
+
textord_words_minlarge: `${number}`;
|
|
133
|
+
textord_words_pitchsd_threshold: `${number}`;
|
|
134
|
+
textord_words_def_fixed: `${number}`;
|
|
135
|
+
textord_words_def_prop: `${number}`;
|
|
136
|
+
textord_pitch_rowsimilarity: `${number}`;
|
|
137
|
+
words_initial_lower: `${number}`;
|
|
138
|
+
words_initial_upper: `${number}`;
|
|
139
|
+
words_default_prop_nonspace: `${number}`;
|
|
140
|
+
words_default_fixed_space: `${number}`;
|
|
141
|
+
words_default_fixed_limit: `${number}`;
|
|
142
|
+
textord_words_definite_spread: `${number}`;
|
|
143
|
+
textord_spacesize_ratioprop: `${number}`;
|
|
144
|
+
textord_fpiqr_ratio: `${number}`;
|
|
145
|
+
textord_max_pitch_iqr: `${number}`;
|
|
146
|
+
textord_projection_scale: `${number}`;
|
|
147
|
+
textord_balance_factor: `${0 | 1}`;
|
|
148
|
+
textord_tabvector_vertical_gap_fraction: `${number}`;
|
|
149
|
+
textord_tabvector_vertical_box_ratio: `${number}`;
|
|
150
|
+
pitsync_joined_edge: `${number}`;
|
|
151
|
+
pitsync_offset_freecut_fraction: `${number}`;
|
|
152
|
+
oldbl_xhfract: `${number}`;
|
|
153
|
+
oldbl_dot_error_size: `${number}`;
|
|
154
|
+
textord_oldbl_jumplimit: `${number}`;
|
|
155
|
+
textord_spline_shift_fraction: `${number}`;
|
|
156
|
+
textord_skew_ile: `${number}`;
|
|
157
|
+
textord_skew_lag: `${number}`;
|
|
158
|
+
textord_linespace_iqrlimit: `${number}`;
|
|
159
|
+
textord_width_limit: `${number}`;
|
|
160
|
+
textord_chop_width: `${number}`;
|
|
161
|
+
textord_expansion_factor: `${0 | 1}`;
|
|
162
|
+
textord_overlap_x: `${number}`;
|
|
163
|
+
textord_minxh: `${number}`;
|
|
164
|
+
textord_min_linesize: `${number}`;
|
|
165
|
+
textord_excess_blobsize: `${number}`;
|
|
166
|
+
textord_occupancy_threshold: `${number}`;
|
|
167
|
+
textord_underline_width: `${number}`;
|
|
168
|
+
textord_min_blob_height_fraction: `${number}`;
|
|
169
|
+
textord_xheight_mode_fraction: `${number}`;
|
|
170
|
+
textord_ascheight_mode_fraction: `${number}`;
|
|
171
|
+
textord_descheight_mode_fraction: `${number}`;
|
|
172
|
+
textord_ascx_ratio_min: `${number}`;
|
|
173
|
+
textord_ascx_ratio_max: `${number}`;
|
|
174
|
+
textord_descx_ratio_min: `${number}`;
|
|
175
|
+
textord_descx_ratio_max: `${number}`;
|
|
176
|
+
textord_xheight_error_margin: `${number}`;
|
|
177
|
+
gapmap_big_gaps: `${number}`;
|
|
178
|
+
edges_childarea: `${number}`;
|
|
179
|
+
edges_boxarea: `${number}`;
|
|
180
|
+
textord_underline_threshold: `${number}`;
|
|
181
|
+
classify_pico_feature_length: `${number}`;
|
|
182
|
+
classify_norm_adj_midpoint: `${number}`;
|
|
183
|
+
classify_norm_adj_curl: `${number}`;
|
|
184
|
+
classify_min_slope: `${number}`;
|
|
185
|
+
classify_max_slope: `${number}`;
|
|
186
|
+
classify_cp_angle_pad_loose: `${number}`;
|
|
187
|
+
classify_cp_angle_pad_medium: `${number}`;
|
|
188
|
+
classify_cp_angle_pad_tight: `${number}`;
|
|
189
|
+
classify_cp_end_pad_loose: `${number}`;
|
|
190
|
+
classify_cp_end_pad_medium: `${number}`;
|
|
191
|
+
classify_cp_end_pad_tight: `${number}`;
|
|
192
|
+
classify_cp_side_pad_loose: `${number}`;
|
|
193
|
+
classify_cp_side_pad_medium: `${number}`;
|
|
194
|
+
classify_cp_side_pad_tight: `${number}`;
|
|
195
|
+
classify_pp_angle_pad: `${number}`;
|
|
196
|
+
classify_pp_end_pad: `${number}`;
|
|
197
|
+
classify_pp_side_pad: `${number}`;
|
|
198
|
+
ambigs_debug_level: `${0 | 1}`;
|
|
199
|
+
classify_debug_level: `${0 | 1}`;
|
|
200
|
+
classify_norm_method: `${0 | 1}`;
|
|
201
|
+
matcher_debug_level: `${0 | 1}`;
|
|
202
|
+
matcher_debug_flags: `${0 | 1}`;
|
|
203
|
+
classify_learning_debug_level: `${0 | 1}`;
|
|
204
|
+
matcher_permanent_classes_min: `${0 | 1}`;
|
|
205
|
+
matcher_min_examples_for_prototyping: `${number}`;
|
|
206
|
+
matcher_sufficient_examples_for_prototyping: `${number}`;
|
|
207
|
+
classify_adapt_proto_threshold: `${number}`;
|
|
208
|
+
classify_adapt_feature_threshold: `${number}`;
|
|
209
|
+
classify_class_pruner_threshold: `${number}`;
|
|
210
|
+
classify_class_pruner_multiplier: `${number}`;
|
|
211
|
+
classify_cp_cutoff_strength: `${number}`;
|
|
212
|
+
classify_integer_matcher_multiplier: `${number}`;
|
|
213
|
+
dawg_debug_level: `${0 | 1}`;
|
|
214
|
+
hyphen_debug_level: `${0 | 1}`;
|
|
215
|
+
stopper_smallword_size: `${number}`;
|
|
216
|
+
stopper_debug_level: `${0 | 1}`;
|
|
217
|
+
tessedit_truncate_wordchoice_log: `${number}`;
|
|
218
|
+
max_permuter_attempts: `${number}`;
|
|
219
|
+
repair_unchopped_blobs: `${0 | 1}`;
|
|
220
|
+
chop_debug: `${0 | 1}`;
|
|
221
|
+
chop_split_length: `${number}`;
|
|
222
|
+
chop_same_distance: `${number}`;
|
|
223
|
+
chop_min_outline_points: `${number}`;
|
|
224
|
+
chop_seam_pile_size: `${number}`;
|
|
225
|
+
chop_inside_angle: `${number}`;
|
|
226
|
+
chop_min_outline_area: `${number}`;
|
|
227
|
+
chop_centered_maxwidth: `${number}`;
|
|
228
|
+
chop_x_y_weight: `${number}`;
|
|
229
|
+
wordrec_debug_level: `${0 | 1}`;
|
|
230
|
+
wordrec_max_join_chunks: `${number}`;
|
|
231
|
+
segsearch_debug_level: `${0 | 1}`;
|
|
232
|
+
segsearch_max_pain_points: `${number}`;
|
|
233
|
+
segsearch_max_futile_classifications: `${number}`;
|
|
234
|
+
language_model_debug_level: `${0 | 1}`;
|
|
235
|
+
language_model_ngram_order: `${number}`;
|
|
236
|
+
language_model_viterbi_list_max_num_prunable: `${number}`;
|
|
237
|
+
language_model_viterbi_list_max_size: `${number}`;
|
|
238
|
+
language_model_min_compound_length: `${number}`;
|
|
239
|
+
wordrec_display_segmentations: `${0 | 1}`;
|
|
240
|
+
tessedit_pageseg_mode: `${number}`;
|
|
241
|
+
thresholding_method: `${0 | 1}`;
|
|
242
|
+
tessedit_ocr_engine_mode: `${number}`;
|
|
243
|
+
pageseg_devanagari_split_strategy: `${0 | 1}`;
|
|
244
|
+
ocr_devanagari_split_strategy: `${0 | 1}`;
|
|
245
|
+
bidi_debug: `${0 | 1}`;
|
|
246
|
+
applybox_debug: `${0 | 1}`;
|
|
247
|
+
applybox_page: `${0 | 1}`;
|
|
248
|
+
tessedit_font_id: `${0 | 1}`;
|
|
249
|
+
tessedit_bigram_debug: `${0 | 1}`;
|
|
250
|
+
debug_noise_removal: `${0 | 1}`;
|
|
251
|
+
noise_maxperblob: `${number}`;
|
|
252
|
+
noise_maxperword: `${number}`;
|
|
253
|
+
debug_x_ht_level: `${0 | 1}`;
|
|
254
|
+
quality_min_initial_alphas_reqd: `${number}`;
|
|
255
|
+
tessedit_tess_adaption_mode: `${number}`;
|
|
256
|
+
multilang_debug_level: `${0 | 1}`;
|
|
257
|
+
paragraph_debug_level: `${0 | 1}`;
|
|
258
|
+
tessedit_preserve_min_wd_len: `${number}`;
|
|
259
|
+
crunch_rating_max: `${number}`;
|
|
260
|
+
crunch_pot_indicators: `${0 | 1}`;
|
|
261
|
+
crunch_leave_lc_strings: `${number}`;
|
|
262
|
+
crunch_leave_uc_strings: `${number}`;
|
|
263
|
+
crunch_long_repetitions: `${number}`;
|
|
264
|
+
crunch_debug: `${0 | 1}`;
|
|
265
|
+
fixsp_non_noise_limit: `${0 | 1}`;
|
|
266
|
+
fixsp_done_mode: `${0 | 1}`;
|
|
267
|
+
debug_fix_space_level: `${0 | 1}`;
|
|
268
|
+
x_ht_acceptance_tolerance: `${number}`;
|
|
269
|
+
x_ht_min_change: `${number}`;
|
|
270
|
+
superscript_debug: `${0 | 1}`;
|
|
271
|
+
page_xml_level: `${0 | 1}`;
|
|
272
|
+
jpg_quality: `${number}`;
|
|
273
|
+
user_defined_dpi: `${0 | 1}`;
|
|
274
|
+
min_characters_to_try: `${number}`;
|
|
275
|
+
suspect_level: `${number}`;
|
|
276
|
+
suspect_short_words: `${number}`;
|
|
277
|
+
tessedit_reject_mode: `${0 | 1}`;
|
|
278
|
+
tessedit_image_border: `${number}`;
|
|
279
|
+
min_sane_x_ht_pixels: `${number}`;
|
|
280
|
+
tessedit_page_number: `${number}`;
|
|
281
|
+
tessedit_parallelize: `${0 | 1}`;
|
|
282
|
+
lstm_choice_mode: `${0 | 1}`;
|
|
283
|
+
lstm_choice_iterations: `${number}`;
|
|
284
|
+
tosp_debug_level: `${0 | 1}`;
|
|
285
|
+
tosp_enough_space_samples_for_median: `${number}`;
|
|
286
|
+
tosp_redo_kern_limit: `${number}`;
|
|
287
|
+
tosp_few_samples: `${number}`;
|
|
288
|
+
tosp_short_row: `${number}`;
|
|
289
|
+
tosp_sanity_method: `${0 | 1}`;
|
|
290
|
+
textord_max_noise_size: `${number}`;
|
|
291
|
+
textord_baseline_debug: `${0 | 1}`;
|
|
292
|
+
textord_noise_sizefraction: `${number}`;
|
|
293
|
+
textord_noise_translimit: `${number}`;
|
|
294
|
+
textord_noise_sncount: `${0 | 1}`;
|
|
295
|
+
use_ambigs_for_adaption: `${0 | 1}`;
|
|
296
|
+
allow_blob_division: `${0 | 1}`;
|
|
297
|
+
prioritize_division: `${0 | 1}`;
|
|
298
|
+
classify_enable_learning: `${0 | 1}`;
|
|
299
|
+
tess_cn_matching: `${0 | 1}`;
|
|
300
|
+
tess_bn_matching: `${0 | 1}`;
|
|
301
|
+
classify_enable_adaptive_matcher: `${0 | 1}`;
|
|
302
|
+
classify_use_pre_adapted_templates: `${0 | 1}`;
|
|
303
|
+
classify_save_adapted_templates: `${0 | 1}`;
|
|
304
|
+
classify_enable_adaptive_debugger: `${0 | 1}`;
|
|
305
|
+
classify_nonlinear_norm: `${0 | 1}`;
|
|
306
|
+
disable_character_fragments: `${0 | 1}`;
|
|
307
|
+
classify_debug_character_fragments: `${0 | 1}`;
|
|
308
|
+
matcher_debug_separate_windows: `${0 | 1}`;
|
|
309
|
+
classify_bln_numeric_mode: `${0 | 1}`;
|
|
310
|
+
load_system_dawg: `${0 | 1}`;
|
|
311
|
+
load_freq_dawg: `${0 | 1}`;
|
|
312
|
+
load_unambig_dawg: `${0 | 1}`;
|
|
313
|
+
load_punc_dawg: `${0 | 1}`;
|
|
314
|
+
load_number_dawg: `${0 | 1}`;
|
|
315
|
+
load_bigram_dawg: `${0 | 1}`;
|
|
316
|
+
use_only_first_uft8_step: `${0 | 1}`;
|
|
317
|
+
stopper_no_acceptable_choices: `${0 | 1}`;
|
|
318
|
+
segment_nonalphabetic_script: `${0 | 1}`;
|
|
319
|
+
save_doc_words: `${0 | 1}`;
|
|
320
|
+
merge_fragments_in_matrix: `${0 | 1}`;
|
|
321
|
+
wordrec_enable_assoc: `${0 | 1}`;
|
|
322
|
+
force_word_assoc: `${0 | 1}`;
|
|
323
|
+
chop_enable: `${0 | 1}`;
|
|
324
|
+
chop_vertical_creep: `${0 | 1}`;
|
|
325
|
+
chop_new_seam_pile: `${0 | 1}`;
|
|
326
|
+
assume_fixed_pitch_char_segment: `${0 | 1}`;
|
|
327
|
+
wordrec_skip_no_truth_words: `${0 | 1}`;
|
|
328
|
+
wordrec_debug_blamer: `${0 | 1}`;
|
|
329
|
+
wordrec_run_blamer: `${0 | 1}`;
|
|
330
|
+
save_alt_choices: `${0 | 1}`;
|
|
331
|
+
language_model_ngram_on: `${0 | 1}`;
|
|
332
|
+
language_model_ngram_use_only_first_uft8_step: `${0 | 1}`;
|
|
333
|
+
language_model_ngram_space_delimited_language: `${0 | 1}`;
|
|
334
|
+
language_model_use_sigmoidal_certainty: `${0 | 1}`;
|
|
335
|
+
tessedit_resegment_from_boxes: `${0 | 1}`;
|
|
336
|
+
tessedit_resegment_from_line_boxes: `${0 | 1}`;
|
|
337
|
+
tessedit_train_from_boxes: `${0 | 1}`;
|
|
338
|
+
tessedit_make_boxes_from_boxes: `${0 | 1}`;
|
|
339
|
+
tessedit_train_line_recognizer: `${0 | 1}`;
|
|
340
|
+
tessedit_dump_pageseg_images: `${0 | 1}`;
|
|
341
|
+
tessedit_do_invert: `${0 | 1}`;
|
|
342
|
+
thresholding_debug: `${0 | 1}`;
|
|
343
|
+
tessedit_ambigs_training: `${0 | 1}`;
|
|
344
|
+
tessedit_adaption_debug: `${0 | 1}`;
|
|
345
|
+
applybox_learn_chars_and_char_frags_mode: `${0 | 1}`;
|
|
346
|
+
applybox_learn_ngrams_mode: `${0 | 1}`;
|
|
347
|
+
tessedit_display_outwords: `${0 | 1}`;
|
|
348
|
+
tessedit_dump_choices: `${0 | 1}`;
|
|
349
|
+
tessedit_timing_debug: `${0 | 1}`;
|
|
350
|
+
tessedit_fix_fuzzy_spaces: `${0 | 1}`;
|
|
351
|
+
tessedit_unrej_any_wd: `${0 | 1}`;
|
|
352
|
+
tessedit_fix_hyphens: `${0 | 1}`;
|
|
353
|
+
tessedit_enable_doc_dict: `${0 | 1}`;
|
|
354
|
+
tessedit_debug_fonts: `${0 | 1}`;
|
|
355
|
+
tessedit_debug_block_rejection: `${0 | 1}`;
|
|
356
|
+
tessedit_enable_bigram_correction: `${0 | 1}`;
|
|
357
|
+
tessedit_enable_dict_correction: `${0 | 1}`;
|
|
358
|
+
enable_noise_removal: `${0 | 1}`;
|
|
359
|
+
tessedit_minimal_rej_pass1: `${0 | 1}`;
|
|
360
|
+
tessedit_test_adaption: `${0 | 1}`;
|
|
361
|
+
test_pt: `${0 | 1}`;
|
|
362
|
+
paragraph_text_based: `${0 | 1}`;
|
|
363
|
+
lstm_use_matrix: `${0 | 1}`;
|
|
364
|
+
tessedit_good_quality_unrej: `${0 | 1}`;
|
|
365
|
+
tessedit_use_reject_spaces: `${0 | 1}`;
|
|
366
|
+
tessedit_preserve_blk_rej_perfect_wds: `${0 | 1}`;
|
|
367
|
+
tessedit_preserve_row_rej_perfect_wds: `${0 | 1}`;
|
|
368
|
+
tessedit_dont_blkrej_good_wds: `${0 | 1}`;
|
|
369
|
+
tessedit_dont_rowrej_good_wds: `${0 | 1}`;
|
|
370
|
+
tessedit_row_rej_good_docs: `${0 | 1}`;
|
|
371
|
+
tessedit_reject_bad_qual_wds: `${0 | 1}`;
|
|
372
|
+
tessedit_debug_doc_rejection: `${0 | 1}`;
|
|
373
|
+
tessedit_debug_quality_metrics: `${0 | 1}`;
|
|
374
|
+
bland_unrej: `${0 | 1}`;
|
|
375
|
+
unlv_tilde_crunching: `${0 | 1}`;
|
|
376
|
+
hocr_font_info: `${0 | 1}`;
|
|
377
|
+
hocr_char_boxes: `${0 | 1}`;
|
|
378
|
+
crunch_early_merge_tess_fails: `${0 | 1}`;
|
|
379
|
+
crunch_early_convert_bad_unlv_chs: `${0 | 1}`;
|
|
380
|
+
crunch_terrible_garbage: `${0 | 1}`;
|
|
381
|
+
crunch_leave_ok_strings: `${0 | 1}`;
|
|
382
|
+
crunch_accept_ok: `${0 | 1}`;
|
|
383
|
+
crunch_leave_accept_strings: `${0 | 1}`;
|
|
384
|
+
crunch_include_numerals: `${0 | 1}`;
|
|
385
|
+
tessedit_prefer_joined_punct: `${0 | 1}`;
|
|
386
|
+
tessedit_write_block_separators: `${0 | 1}`;
|
|
387
|
+
tessedit_write_rep_codes: `${0 | 1}`;
|
|
388
|
+
tessedit_write_unlv: `${0 | 1}`;
|
|
389
|
+
tessedit_create_txt: `${0 | 1}`;
|
|
390
|
+
tessedit_create_hocr: `${0 | 1}`;
|
|
391
|
+
tessedit_create_alto: `${0 | 1}`;
|
|
392
|
+
tessedit_create_page_xml: `${0 | 1}`;
|
|
393
|
+
page_xml_polygon: `${0 | 1}`;
|
|
394
|
+
tessedit_create_lstmbox: `${0 | 1}`;
|
|
395
|
+
tessedit_create_tsv: `${0 | 1}`;
|
|
396
|
+
tessedit_create_wordstrbox: `${0 | 1}`;
|
|
397
|
+
tessedit_create_pdf: `${0 | 1}`;
|
|
398
|
+
textonly_pdf: `${0 | 1}`;
|
|
399
|
+
suspect_constrain_1Il: `${0 | 1}`;
|
|
400
|
+
tessedit_minimal_rejection: `${0 | 1}`;
|
|
401
|
+
tessedit_zero_rejection: `${0 | 1}`;
|
|
402
|
+
tessedit_word_for_word: `${0 | 1}`;
|
|
403
|
+
tessedit_zero_kelvin_rejection: `${0 | 1}`;
|
|
404
|
+
tessedit_rejection_debug: `${0 | 1}`;
|
|
405
|
+
tessedit_flip_0O: `${0 | 1}`;
|
|
406
|
+
rej_trust_doc_dawg: `${0 | 1}`;
|
|
407
|
+
rej_1Il_use_dict_word: `${0 | 1}`;
|
|
408
|
+
rej_1Il_trust_permuter_type: `${0 | 1}`;
|
|
409
|
+
rej_use_tess_accepted: `${0 | 1}`;
|
|
410
|
+
rej_use_tess_blanks: `${0 | 1}`;
|
|
411
|
+
rej_use_good_perm: `${0 | 1}`;
|
|
412
|
+
rej_use_sensible_wd: `${0 | 1}`;
|
|
413
|
+
rej_alphas_in_number_perm: `${0 | 1}`;
|
|
414
|
+
tessedit_create_boxfile: `${0 | 1}`;
|
|
415
|
+
tessedit_write_images: `${0 | 1}`;
|
|
416
|
+
interactive_display_mode: `${0 | 1}`;
|
|
417
|
+
tessedit_override_permuter: `${0 | 1}`;
|
|
418
|
+
tessedit_use_primary_params_model: `${0 | 1}`;
|
|
419
|
+
textord_tabfind_show_vlines: `${0 | 1}`;
|
|
420
|
+
textord_use_cjk_fp_model: `${0 | 1}`;
|
|
421
|
+
poly_allow_detailed_fx: `${0 | 1}`;
|
|
422
|
+
tessedit_init_config_only: `${0 | 1}`;
|
|
423
|
+
textord_equation_detect: `${0 | 1}`;
|
|
424
|
+
textord_tabfind_vertical_text: `${0 | 1}`;
|
|
425
|
+
textord_tabfind_force_vertical_text: `${0 | 1}`;
|
|
426
|
+
preserve_interword_spaces: `${0 | 1}`;
|
|
427
|
+
pageseg_apply_music_mask: `${0 | 1}`;
|
|
428
|
+
textord_single_height_mode: `${0 | 1}`;
|
|
429
|
+
tosp_old_to_method: `${0 | 1}`;
|
|
430
|
+
tosp_old_to_constrain_sp_kn: `${0 | 1}`;
|
|
431
|
+
tosp_only_use_prop_rows: `${0 | 1}`;
|
|
432
|
+
tosp_force_wordbreak_on_punct: `${0 | 1}`;
|
|
433
|
+
tosp_use_pre_chopping: `${0 | 1}`;
|
|
434
|
+
tosp_old_to_bug_fix: `${0 | 1}`;
|
|
435
|
+
tosp_block_use_cert_spaces: `${0 | 1}`;
|
|
436
|
+
tosp_row_use_cert_spaces: `${0 | 1}`;
|
|
437
|
+
tosp_narrow_blobs_not_cert: `${0 | 1}`;
|
|
438
|
+
tosp_row_use_cert_spaces1: `${0 | 1}`;
|
|
439
|
+
tosp_recovery_isolated_row_stats: `${0 | 1}`;
|
|
440
|
+
tosp_only_small_gaps_for_kern: `${0 | 1}`;
|
|
441
|
+
tosp_all_flips_fuzzy: `${0 | 1}`;
|
|
442
|
+
tosp_fuzzy_limit_all: `${0 | 1}`;
|
|
443
|
+
tosp_stats_use_xht_gaps: `${0 | 1}`;
|
|
444
|
+
tosp_use_xht_gaps: `${0 | 1}`;
|
|
445
|
+
tosp_only_use_xht_gaps: `${0 | 1}`;
|
|
446
|
+
tosp_rule_9_test_punct: `${0 | 1}`;
|
|
447
|
+
tosp_flip_fuzz_kn_to_sp: `${0 | 1}`;
|
|
448
|
+
tosp_flip_fuzz_sp_to_kn: `${0 | 1}`;
|
|
449
|
+
tosp_improve_thresh: `${0 | 1}`;
|
|
450
|
+
textord_no_rejects: `${0 | 1}`;
|
|
451
|
+
textord_show_blobs: `${0 | 1}`;
|
|
452
|
+
textord_show_boxes: `${0 | 1}`;
|
|
453
|
+
textord_noise_rejwords: `${0 | 1}`;
|
|
454
|
+
textord_noise_rejrows: `${0 | 1}`;
|
|
455
|
+
textord_noise_debug: `${0 | 1}`;
|
|
456
|
+
classify_learn_debug_str: `${string}`;
|
|
457
|
+
user_words_file: `${string}`;
|
|
458
|
+
user_words_suffix: `${string}`;
|
|
459
|
+
user_patterns_file: `${string}`;
|
|
460
|
+
user_patterns_suffix: `${string}`;
|
|
461
|
+
output_ambig_words_file: `${string}`;
|
|
462
|
+
word_to_debug: `${string}`;
|
|
463
|
+
tessedit_char_blacklist: `${string}`;
|
|
464
|
+
tessedit_char_whitelist: `${string}`;
|
|
465
|
+
tessedit_char_unblacklist: `${string}`;
|
|
466
|
+
tessedit_write_params_to_file: `${string}`;
|
|
467
|
+
applybox_exposure_pattern: `${string}`;
|
|
468
|
+
chs_leading_punct: `${string}`;
|
|
469
|
+
chs_trailing_punct1: `${string}`;
|
|
470
|
+
chs_trailing_punct2: `${string}`;
|
|
471
|
+
outlines_odd: `${string}`;
|
|
472
|
+
outlines_2: `${string}`;
|
|
473
|
+
numeric_punctuation: `${string}`;
|
|
474
|
+
unrecognised_char: `${string}`;
|
|
475
|
+
ok_repeated_ch_non_alphanum_wds: `${string}`;
|
|
476
|
+
conflict_set_I_l_1: `${string}`;
|
|
477
|
+
file_type: `${string}`;
|
|
478
|
+
tessedit_load_sublangs: `${string}`;
|
|
479
|
+
page_separator: `${string}`;
|
|
480
|
+
classify_char_norm_range: `${number}`;
|
|
481
|
+
classify_max_rating_ratio: `${number}`;
|
|
482
|
+
classify_max_certainty_margin: `${number}`;
|
|
483
|
+
matcher_good_threshold: `${number}`;
|
|
484
|
+
matcher_reliable_adaptive_result: `${0 | 1}`;
|
|
485
|
+
matcher_perfect_threshold: `${number}`;
|
|
486
|
+
matcher_bad_match_pad: `${number}`;
|
|
487
|
+
matcher_rating_margin: `${number}`;
|
|
488
|
+
matcher_avg_noise_size: `${number}`;
|
|
489
|
+
matcher_clustering_max_angle_delta: `${number}`;
|
|
490
|
+
classify_misfit_junk_penalty: `${0 | 1}`;
|
|
491
|
+
rating_scale: `${number}`;
|
|
492
|
+
tessedit_class_miss_scale: `${number}`;
|
|
493
|
+
classify_adapted_pruning_factor: `${number}`;
|
|
494
|
+
classify_adapted_pruning_threshold: `${number}`;
|
|
495
|
+
classify_character_fragments_garbage_certainty_threshold: `${number}`;
|
|
496
|
+
speckle_large_max_size: `${number}`;
|
|
497
|
+
speckle_rating_penalty: `${number}`;
|
|
498
|
+
xheight_penalty_subscripts: `${number}`;
|
|
499
|
+
xheight_penalty_inconsistent: `${number}`;
|
|
500
|
+
segment_penalty_dict_frequent_word: `${0 | 1}`;
|
|
501
|
+
segment_penalty_dict_case_ok: `${number}`;
|
|
502
|
+
segment_penalty_dict_case_bad: `${number}`;
|
|
503
|
+
segment_penalty_dict_nonword: `${number}`;
|
|
504
|
+
segment_penalty_garbage: `${number}`;
|
|
505
|
+
certainty_scale: `${number}`;
|
|
506
|
+
stopper_nondict_certainty_base: `${number}`;
|
|
507
|
+
stopper_phase2_certainty_rejection_offset: `${0 | 1}`;
|
|
508
|
+
stopper_certainty_per_char: `${number}`;
|
|
509
|
+
stopper_allowable_character_badness: `${number}`;
|
|
510
|
+
doc_dict_pending_threshold: `${0 | 1}`;
|
|
511
|
+
doc_dict_certainty_threshold: `${number}`;
|
|
512
|
+
tessedit_certainty_threshold: `${number}`;
|
|
513
|
+
chop_split_dist_knob: `${number}`;
|
|
514
|
+
chop_overlap_knob: `${number}`;
|
|
515
|
+
chop_center_knob: `${number}`;
|
|
516
|
+
chop_sharpness_knob: `${number}`;
|
|
517
|
+
chop_width_change_knob: `${number}`;
|
|
518
|
+
chop_ok_split: `${number}`;
|
|
519
|
+
chop_good_split: `${number}`;
|
|
520
|
+
segsearch_max_char_wh_ratio: `${number}`;
|
|
521
|
+
language_model_ngram_small_prob: `${number}`;
|
|
522
|
+
language_model_ngram_nonmatch_score: `${number}`;
|
|
523
|
+
language_model_ngram_scale_factor: `${number}`;
|
|
524
|
+
language_model_ngram_rating_factor: `${number}`;
|
|
525
|
+
language_model_penalty_non_freq_dict_word: `${number}`;
|
|
526
|
+
language_model_penalty_non_dict_word: `${number}`;
|
|
527
|
+
language_model_penalty_punc: `${number}`;
|
|
528
|
+
language_model_penalty_case: `${number}`;
|
|
529
|
+
language_model_penalty_script: `${number}`;
|
|
530
|
+
language_model_penalty_chartype: `${number}`;
|
|
531
|
+
language_model_penalty_font: `${0 | 1}`;
|
|
532
|
+
language_model_penalty_spacing: `${number}`;
|
|
533
|
+
language_model_penalty_increment: `${number}`;
|
|
534
|
+
invert_threshold: `${number}`;
|
|
535
|
+
thresholding_window_size: `${number}`;
|
|
536
|
+
thresholding_kfactor: `${number}`;
|
|
537
|
+
thresholding_tile_size: `${number}`;
|
|
538
|
+
thresholding_smooth_kernel_size: `${0 | 1}`;
|
|
539
|
+
thresholding_score_fraction: `${number}`;
|
|
540
|
+
noise_cert_basechar: `${number}`;
|
|
541
|
+
noise_cert_disjoint: `${number}`;
|
|
542
|
+
noise_cert_punc: `${number}`;
|
|
543
|
+
noise_cert_factor: `${number}`;
|
|
544
|
+
quality_rej_pc: `${number}`;
|
|
545
|
+
quality_blob_pc: `${0 | 1}`;
|
|
546
|
+
quality_outline_pc: `${0 | 1}`;
|
|
547
|
+
quality_char_pc: `${number}`;
|
|
548
|
+
test_pt_x: `${number}`;
|
|
549
|
+
test_pt_y: `${number}`;
|
|
550
|
+
tessedit_reject_doc_percent: `${number}`;
|
|
551
|
+
tessedit_reject_block_percent: `${number}`;
|
|
552
|
+
tessedit_reject_row_percent: `${number}`;
|
|
553
|
+
tessedit_whole_wd_rej_row_percent: `${number}`;
|
|
554
|
+
tessedit_good_doc_still_rowrej_wd: `${number}`;
|
|
555
|
+
quality_rowrej_pc: `${number}`;
|
|
556
|
+
crunch_terrible_rating: `${number}`;
|
|
557
|
+
crunch_poor_garbage_cert: `${number}`;
|
|
558
|
+
crunch_poor_garbage_rate: `${number}`;
|
|
559
|
+
crunch_pot_poor_rate: `${number}`;
|
|
560
|
+
crunch_pot_poor_cert: `${number}`;
|
|
561
|
+
crunch_del_rating: `${number}`;
|
|
562
|
+
crunch_del_cert: `${number}`;
|
|
563
|
+
crunch_del_min_ht: `${number}`;
|
|
564
|
+
crunch_del_max_ht: `${number}`;
|
|
565
|
+
crunch_del_min_width: `${number}`;
|
|
566
|
+
crunch_del_high_word: `${number}`;
|
|
567
|
+
crunch_del_low_word: `${number}`;
|
|
568
|
+
crunch_small_outlines_size: `${number}`;
|
|
569
|
+
fixsp_small_outlines_size: `${number}`;
|
|
570
|
+
superscript_worse_certainty: `${number}`;
|
|
571
|
+
superscript_bettered_certainty: `${number}`;
|
|
572
|
+
superscript_scaledown_ratio: `${number}`;
|
|
573
|
+
subscript_max_y_top: `${number}`;
|
|
574
|
+
superscript_min_y_bottom: `${number}`;
|
|
575
|
+
suspect_rating_per_ch: `${number}`;
|
|
576
|
+
suspect_accept_rating: `${number}`;
|
|
577
|
+
tessedit_lower_flip_hyphen: `${number}`;
|
|
578
|
+
tessedit_upper_flip_hyphen: `${number}`;
|
|
579
|
+
rej_whole_of_mostly_reject_word_fract: `${number}`;
|
|
580
|
+
min_orientation_margin: `${number}`;
|
|
581
|
+
textord_tabfind_vertical_text_ratio: `${number}`;
|
|
582
|
+
textord_tabfind_aligned_gap_fraction: `${number}`;
|
|
583
|
+
lstm_rating_coefficient: `${number}`;
|
|
584
|
+
tosp_old_sp_kn_th_factor: `${number}`;
|
|
585
|
+
tosp_threshold_bias1: `${0 | 1}`;
|
|
586
|
+
tosp_threshold_bias2: `${0 | 1}`;
|
|
587
|
+
tosp_narrow_fraction: `${number}`;
|
|
588
|
+
tosp_narrow_aspect_ratio: `${number}`;
|
|
589
|
+
tosp_wide_fraction: `${number}`;
|
|
590
|
+
tosp_wide_aspect_ratio: `${0 | 1}`;
|
|
591
|
+
tosp_fuzzy_space_factor: `${number}`;
|
|
592
|
+
tosp_fuzzy_space_factor1: `${number}`;
|
|
593
|
+
tosp_fuzzy_space_factor2: `${number}`;
|
|
594
|
+
tosp_gap_factor: `${number}`;
|
|
595
|
+
tosp_kern_gap_factor1: `${number}`;
|
|
596
|
+
tosp_kern_gap_factor2: `${number}`;
|
|
597
|
+
tosp_kern_gap_factor3: `${number}`;
|
|
598
|
+
tosp_ignore_big_gaps: `${number}`;
|
|
599
|
+
tosp_ignore_very_big_gaps: `${number}`;
|
|
600
|
+
tosp_rep_space: `${number}`;
|
|
601
|
+
tosp_enough_small_gaps: `${number}`;
|
|
602
|
+
tosp_table_kn_sp_ratio: `${number}`;
|
|
603
|
+
tosp_table_xht_sp_ratio: `${number}`;
|
|
604
|
+
tosp_table_fuzzy_kn_sp_ratio: `${number}`;
|
|
605
|
+
tosp_fuzzy_kn_fraction: `${number}`;
|
|
606
|
+
tosp_fuzzy_sp_fraction: `${number}`;
|
|
607
|
+
tosp_min_sane_kn_sp: `${number}`;
|
|
608
|
+
tosp_init_guess_kn_mult: `${number}`;
|
|
609
|
+
tosp_init_guess_xht_mult: `${number}`;
|
|
610
|
+
tosp_max_sane_kn_thresh: `${number}`;
|
|
611
|
+
tosp_flip_caution: `${0 | 1}`;
|
|
612
|
+
tosp_large_kerning: `${number}`;
|
|
613
|
+
tosp_dont_fool_with_small_kerns: `${number}`;
|
|
614
|
+
tosp_near_lh_edge: `${0 | 1}`;
|
|
615
|
+
tosp_silly_kn_sp_gap: `${number}`;
|
|
616
|
+
tosp_pass_wide_fuzz_sp_to_context: `${number}`;
|
|
617
|
+
textord_noise_area_ratio: `${number}`;
|
|
618
|
+
textord_initialx_ile: `${number}`;
|
|
619
|
+
textord_initialasc_ile: `${number}`;
|
|
620
|
+
textord_noise_sizelimit: `${number}`;
|
|
621
|
+
textord_noise_normratio: `${number}`;
|
|
622
|
+
textord_noise_syfract: `${number}`;
|
|
623
|
+
textord_noise_sxfract: `${number}`;
|
|
624
|
+
textord_noise_hfract: `${number}`;
|
|
625
|
+
textord_noise_rowratio: `${number}`;
|
|
626
|
+
textord_blshift_maxshift: `${0 | 1}`;
|
|
627
|
+
textord_blshift_xfraction: `${number}`;
|
|
628
|
+
};
|
|
629
|
+
export type InitOnlyConfigurationVariableNames = "ambigs_debug_level" | "language_model_ngram_on" | "language_model_use_sigmoidal_certainty" | "load_bigram_dawg" | "load_freq_dawg" | "load_number_dawg" | "load_punc_dawg" | "load_system_dawg" | "load_unambig_dawg" | "tessedit_init_config_only" | "tessedit_ocr_engine_mode" | "user_patterns_suffix" | "user_words_suffix";
|
|
630
|
+
export type DebugConfigurationVariableNames = "textord_debug_block" | "devanagari_split_debuglevel" | "textord_debug_tabfind" | "textord_debug_bugs" | "textord_debug_pitch_test" | "textord_debug_pitch_metric" | "textord_oldbl_debug" | "textord_debug_baselines" | "textord_debug_xheights" | "textord_debug_blob" | "gapmap_debug" | "edges_debug" | "devanagari_split_debugimage" | "textord_debug_printable" | "poly_debug" | "debug_file" | "ambigs_debug_level" | "classify_debug_level" | "matcher_debug_level" | "matcher_debug_flags" | "classify_learning_debug_level" | "dawg_debug_level" | "hyphen_debug_level" | "stopper_debug_level" | "chop_debug" | "wordrec_debug_level" | "segsearch_debug_level" | "language_model_debug_level" | "bidi_debug" | "applybox_debug" | "tessedit_bigram_debug" | "debug_noise_removal" | "debug_x_ht_level" | "multilang_debug_level" | "paragraph_debug_level" | "crunch_debug" | "debug_fix_space_level" | "superscript_debug" | "tosp_debug_level" | "textord_baseline_debug" | "classify_enable_adaptive_debugger" | "classify_debug_character_fragments" | "matcher_debug_separate_windows" | "wordrec_debug_blamer" | "thresholding_debug" | "tessedit_adaption_debug" | "tessedit_timing_debug" | "tessedit_debug_fonts" | "tessedit_debug_block_rejection" | "tessedit_debug_doc_rejection" | "tessedit_debug_quality_metrics" | "tessedit_rejection_debug" | "textord_noise_debug" | "classify_learn_debug_str" | "word_to_debug";
|
|
631
|
+
export type InitOnlyConfigurationVariables = Pick<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
|
|
632
|
+
export type DebugOnlyConfigurationVariables = Pick<ConfigurationVariables, DebugConfigurationVariableNames>;
|
|
633
|
+
export type SetVariableConfigVariables = Omit<ConfigurationVariables, InitOnlyConfigurationVariableNames | DebugConfigurationVariableNames>;
|
|
634
|
+
export type SetConfigurationVariableNames = keyof SetVariableConfigVariables;
|
|
635
|
+
export type SetNumberConfigurationVariableNames = {
|
|
636
|
+
[Name in SetConfigurationVariableNames]: SetVariableConfigVariables[Name] extends `${number}` ? SetVariableConfigVariables[Name] extends `${0 | 1}` ? never : Name : never;
|
|
637
|
+
}[SetConfigurationVariableNames];
|
|
638
|
+
export type SetBoolConfigurationVariableNames = {
|
|
639
|
+
[Name in SetConfigurationVariableNames]: SetVariableConfigVariables[Name] extends `${0 | 1}` ? Name : never;
|
|
640
|
+
}[SetConfigurationVariableNames];
|
|
641
|
+
export type SetStringConfigurationVariableNames = {
|
|
642
|
+
[Name in SetConfigurationVariableNames]: SetVariableConfigVariables[Name] extends `${number}` ? never : Name;
|
|
643
|
+
}[SetConfigurationVariableNames];
|
|
644
|
+
/**
|
|
645
|
+
* Tesseract init options
|
|
646
|
+
*/
|
|
647
|
+
export interface TesseractInitOptions {
|
|
648
|
+
/**
|
|
649
|
+
* Its generally safer to use as few languages as possible.
|
|
650
|
+
* The more languages Tesseract needs to load the longer it takes to recognize a image.
|
|
651
|
+
* The OSD Language will always be loaded to support orientation and script detection
|
|
652
|
+
* IMPORTANT: if you specify more than one language here (e.g.: `deu, eng` for example)
|
|
653
|
+
* tesseract will try to recognize german and english in the same image.
|
|
654
|
+
* Originally tesseract itself accepts it as `deu+eng`, but since this
|
|
655
|
+
* makes typing very hard to near impossible its safer to just accept a
|
|
656
|
+
* array with the languages it should look for.
|
|
657
|
+
* When talking about "hard typing/impossible typing" its because typescript
|
|
658
|
+
* itself cannot create recursive types, and chaining template types
|
|
659
|
+
* (e.g.: `${Language}+${Language}+...`) stretches out the compilation time
|
|
660
|
+
* to a unacceptable amount
|
|
661
|
+
*
|
|
662
|
+
* @default [Language.osd]
|
|
663
|
+
*/
|
|
664
|
+
langs?: Language[];
|
|
665
|
+
/**
|
|
666
|
+
* Specify where the trainingdata is located
|
|
667
|
+
* Besides the datapath in general it is versioned to the
|
|
668
|
+
* version of tesseract
|
|
669
|
+
* @default '~/.cache/node-tesseract-ocr/'
|
|
670
|
+
*/
|
|
671
|
+
cachePath?: string;
|
|
672
|
+
/**
|
|
673
|
+
* Explicit datapath for traineddata. Takes precedence over
|
|
674
|
+
* the `TESSDATA_PREFIX` environment variable.
|
|
675
|
+
*/
|
|
676
|
+
dataPath?: string;
|
|
677
|
+
/**
|
|
678
|
+
* This will be called for every language that was specified in `lang`,
|
|
679
|
+
* it allows the user to be flexible about the training data's location
|
|
680
|
+
* Or if he needs to specify his own location for certain languages/custom languages
|
|
681
|
+
* IMPORTANT: Ensures that trainingdata will be downloaded from the following cdn
|
|
682
|
+
* in case they dont exist
|
|
683
|
+
* OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0_best_int
|
|
684
|
+
* NON OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0
|
|
685
|
+
* NOTE: Tesseract 5.x.x still uses the 4.x.x trainingdata
|
|
686
|
+
*
|
|
687
|
+
* @default true
|
|
688
|
+
*/
|
|
689
|
+
ensureTraineddata?: boolean;
|
|
690
|
+
/**
|
|
691
|
+
* Optional progress callback for traineddata downloads.
|
|
692
|
+
*/
|
|
693
|
+
progressCallback?: (info: TrainingDataDownloadProgress) => void;
|
|
694
|
+
/**
|
|
695
|
+
* OCR Engine Modes
|
|
696
|
+
* The engine mode cannot be changed after creating the instance
|
|
697
|
+
* If another mode is needed, its advised to create a new instance.
|
|
698
|
+
* @default OEM_DEFAULT
|
|
699
|
+
* @throws {Error} Will throw an error when oem mode is below 0 or over 3
|
|
700
|
+
*/
|
|
701
|
+
oem?: OcrEngineMode;
|
|
702
|
+
/**
|
|
703
|
+
* Controls if only non debug parameters will be set upon initialization
|
|
704
|
+
* @default false
|
|
705
|
+
*/
|
|
706
|
+
setOnlyNonDebugParams?: boolean;
|
|
707
|
+
/**
|
|
708
|
+
* Array of paths that point to their corresponding config files
|
|
709
|
+
* usually located in the `dataPath` location alongside the training data
|
|
710
|
+
*/
|
|
711
|
+
configs?: Array<string>;
|
|
712
|
+
/**
|
|
713
|
+
* Record of parameters that should be set upon initialization
|
|
714
|
+
* Consult the original documentation of tesseract on which variables
|
|
715
|
+
* can actually be set
|
|
716
|
+
*/
|
|
717
|
+
vars?: Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>;
|
|
718
|
+
}
|
|
719
|
+
export interface TrainingDataDownloadProgress {
|
|
720
|
+
lang: Language;
|
|
721
|
+
url: string;
|
|
722
|
+
downloadedBytes: number;
|
|
723
|
+
totalBytes?: number;
|
|
724
|
+
percent?: number;
|
|
725
|
+
}
|
|
726
|
+
export interface TesseractSetRectangleOptions {
|
|
727
|
+
top: number;
|
|
728
|
+
left: number;
|
|
729
|
+
width: number;
|
|
730
|
+
height: number;
|
|
731
|
+
}
|
|
732
|
+
export interface TesseractBeginProcessPagesOptions {
|
|
733
|
+
outputBase: string;
|
|
734
|
+
title: string;
|
|
735
|
+
timeout: number;
|
|
736
|
+
textonly: boolean;
|
|
737
|
+
}
|
|
738
|
+
export interface TesseractProcessPagesStatus {
|
|
739
|
+
active: boolean;
|
|
740
|
+
healthy: boolean;
|
|
741
|
+
processedPages: number;
|
|
742
|
+
nextPageIndex: number;
|
|
743
|
+
outputBase: string;
|
|
744
|
+
timeoutMillisec: number;
|
|
745
|
+
textonly: boolean;
|
|
746
|
+
}
|
|
747
|
+
export interface ProgressChangedInfo {
|
|
748
|
+
/**
|
|
749
|
+
* Chars in this buffer
|
|
750
|
+
*/
|
|
751
|
+
progress: number;
|
|
752
|
+
/**
|
|
753
|
+
* Percent complete increasing (0-100)
|
|
754
|
+
*/
|
|
755
|
+
percent: number;
|
|
756
|
+
/**
|
|
757
|
+
* States if the worker is still alive
|
|
758
|
+
*/
|
|
759
|
+
ocrAlive: number;
|
|
760
|
+
/**
|
|
761
|
+
* top coordinate of the bbox of the current element that tesseract is processing
|
|
762
|
+
*/
|
|
763
|
+
top: number;
|
|
764
|
+
/**
|
|
765
|
+
* right coordinate of the bbox of the current element that tesseract is processing
|
|
766
|
+
*/
|
|
767
|
+
right: number;
|
|
768
|
+
/**
|
|
769
|
+
* bottom coordinate of the bbox of the current element that tesseract is processing
|
|
770
|
+
*/
|
|
771
|
+
bottom: number;
|
|
772
|
+
/**
|
|
773
|
+
* left coordinate of the bbox of the current element that tesseract is processing
|
|
774
|
+
*/
|
|
775
|
+
left: number;
|
|
776
|
+
}
|
|
777
|
+
export interface DetectOrientationScriptResult {
|
|
778
|
+
/**
|
|
779
|
+
* Orientation of the source image in degrees
|
|
780
|
+
* Orientation refers to the way the source is rotated, **not** how the text is
|
|
781
|
+
* aligned. It ranges from 0° to 360° degrees.
|
|
782
|
+
* @type {number}
|
|
783
|
+
*/
|
|
784
|
+
orientationDegrees: number;
|
|
785
|
+
/**
|
|
786
|
+
* The confidence of tesseract for the orientation
|
|
787
|
+
* @type {number}
|
|
788
|
+
*/
|
|
789
|
+
orientationConfidence: number;
|
|
790
|
+
/**
|
|
791
|
+
* The name of the script that is used in the source image
|
|
792
|
+
* @type {string}
|
|
793
|
+
*/
|
|
794
|
+
scriptName: string;
|
|
795
|
+
/**
|
|
796
|
+
* The confidence of tesseract about the detected script of the source image
|
|
797
|
+
* @type {number}
|
|
798
|
+
*/
|
|
799
|
+
scriptConfidence: number;
|
|
800
|
+
}
|
|
801
|
+
export type EnsureTrainedDataOptions = {
|
|
802
|
+
lang: Language;
|
|
803
|
+
cachePath: string;
|
|
804
|
+
dataPath: string;
|
|
805
|
+
downloadBaseUrl: string;
|
|
806
|
+
progressCallback?: (info: TrainingDataDownloadProgress) => void;
|
|
807
|
+
};
|
|
808
|
+
/**
|
|
809
|
+
* Stable native error codes emitted by addon-backed OCR methods.
|
|
810
|
+
*/
|
|
811
|
+
export type TesseractErrorCode = "ERR_INVALID_ARGUMENT" | "ERR_OUT_OF_RANGE" | "ERR_TESSERACT_RUNTIME" | "ERR_WORKER_CLOSED" | "ERR_WORKER_STOPPED";
|
|
812
|
+
/**
|
|
813
|
+
* Base shape for errors rejected by native OCR methods.
|
|
814
|
+
*/
|
|
815
|
+
export type TesseractNativeError = Error & {
|
|
816
|
+
code?: TesseractErrorCode;
|
|
817
|
+
method?: string;
|
|
818
|
+
};
|
|
819
|
+
/**
|
|
820
|
+
* Argument validation error (`TypeError` + native metadata).
|
|
821
|
+
*/
|
|
822
|
+
export type TesseractArgumentError = TypeError & TesseractNativeError;
|
|
823
|
+
/**
|
|
824
|
+
* Range/domain error (`RangeError` + native metadata).
|
|
825
|
+
*/
|
|
826
|
+
export type TesseractRangeError = RangeError & TesseractNativeError;
|
|
827
|
+
/**
|
|
828
|
+
* Runtime/native engine error (`Error` + native metadata).
|
|
829
|
+
*/
|
|
830
|
+
export type TesseractRuntimeError = Error & TesseractNativeError;
|
|
831
|
+
/**
|
|
832
|
+
* Worker lifecycle error (worker is closing/stopped).
|
|
833
|
+
*/
|
|
834
|
+
export type TesseractWorkerError = Error & TesseractNativeError;
|
|
835
|
+
export interface TesseractDocumentApi {
|
|
836
|
+
/**
|
|
837
|
+
* Starts a multipage processing session.
|
|
838
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
839
|
+
* @throws {TesseractArgumentError} If options are missing/invalid.
|
|
840
|
+
* @throws {TesseractRuntimeError} If session already exists or renderer setup fails.
|
|
841
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
842
|
+
*/
|
|
843
|
+
begin(options: TesseractBeginProcessPagesOptions): Promise<void>;
|
|
844
|
+
/**
|
|
845
|
+
* Adds one encoded page to the active multipage session.
|
|
846
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
847
|
+
* @throws {TesseractArgumentError} If `buffer` is not a non-empty Buffer.
|
|
848
|
+
* @throws {TesseractArgumentError} If `filename` is provided but is not a string.
|
|
849
|
+
* @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
|
|
850
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
851
|
+
*/
|
|
852
|
+
addPage(buffer: Buffer<ArrayBuffer>, filename?: string): Promise<void>;
|
|
853
|
+
/**
|
|
854
|
+
* Finalizes the active multipage session and returns output PDF path.
|
|
855
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
856
|
+
* @throws {TesseractRuntimeError} If no session is active or finalization fails.
|
|
857
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
858
|
+
*/
|
|
859
|
+
finish(): Promise<string>;
|
|
860
|
+
/**
|
|
861
|
+
* Aborts the active multipage session.
|
|
862
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
863
|
+
*/
|
|
864
|
+
abort(): Promise<void>;
|
|
865
|
+
/**
|
|
866
|
+
* Returns the current multipage session status.
|
|
867
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
868
|
+
*/
|
|
869
|
+
status(): Promise<TesseractProcessPagesStatus>;
|
|
870
|
+
}
|
|
871
|
+
export interface TesseractInstance {
|
|
872
|
+
/**
|
|
873
|
+
* Multipage document processing facade.
|
|
874
|
+
*/
|
|
875
|
+
document: TesseractDocumentApi;
|
|
876
|
+
/**
|
|
877
|
+
* Gets the currently loaded libtesseract version string.
|
|
878
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
879
|
+
*/
|
|
880
|
+
version(): Promise<string>;
|
|
881
|
+
/**
|
|
882
|
+
* Returns whether `init(...)` was completed and not reset via `end()`.
|
|
883
|
+
* @throws {TesseractArgumentError} If called with unexpected arguments.
|
|
884
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
885
|
+
*/
|
|
886
|
+
isInitialized(): Promise<boolean>;
|
|
887
|
+
/**
|
|
888
|
+
* Set the name of the input file.
|
|
889
|
+
* This is used for training/zone files and searchable PDF metadata.
|
|
890
|
+
* @param {string} inputName The name of the input file
|
|
891
|
+
* @throws {TesseractArgumentError} If `inputName` is not a string.
|
|
892
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
893
|
+
*/
|
|
894
|
+
setInputName(inputName: string): Promise<void>;
|
|
895
|
+
/**
|
|
896
|
+
* Returns the current input name from Tesseract state.
|
|
897
|
+
* @throws {TesseractRuntimeError} If no input name is currently available.
|
|
898
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
899
|
+
*/
|
|
900
|
+
getInputName(): Promise<string>;
|
|
901
|
+
/**
|
|
902
|
+
* Sets the encoded source image buffer used by Tesseract.
|
|
903
|
+
* @param {Buffer<ArrayBuffer>} buffer
|
|
904
|
+
* @throws {TesseractArgumentError} If `buffer` is not a non-empty Buffer.
|
|
905
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
906
|
+
* @throws {TesseractRuntimeError} If leptonica cannot decode `buffer`.
|
|
907
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
908
|
+
*/
|
|
909
|
+
setInputImage(buffer: Buffer<ArrayBuffer>): Promise<void>;
|
|
910
|
+
/**
|
|
911
|
+
* Returns the current input image bytes.
|
|
912
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
913
|
+
* @throws {TesseractRuntimeError} If no input image is available.
|
|
914
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
915
|
+
*/
|
|
916
|
+
getInputImage(): Promise<Buffer<ArrayBuffer>>;
|
|
917
|
+
/**
|
|
918
|
+
* Returns source image Y resolution.
|
|
919
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
920
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
921
|
+
*/
|
|
922
|
+
getSourceYResolution(): Promise<number>;
|
|
923
|
+
/**
|
|
924
|
+
* Returns the tessdata path used by the engine.
|
|
925
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
926
|
+
* @throws {TesseractRuntimeError} If datapath is unavailable.
|
|
927
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
928
|
+
*/
|
|
929
|
+
getDataPath(): Promise<string>;
|
|
930
|
+
/**
|
|
931
|
+
* Sets output base name used by renderer-based outputs.
|
|
932
|
+
* @param {string} outputName The output base name.
|
|
933
|
+
* @throws {TesseractArgumentError} If `outputName` is not a string.
|
|
934
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
935
|
+
* @throws {TesseractRuntimeError} If `outputName` is empty.
|
|
936
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
937
|
+
*/
|
|
938
|
+
setOutputName(outputName: string): Promise<void>;
|
|
939
|
+
/**
|
|
940
|
+
* Clears global library-level caches (for example language dictionaries).
|
|
941
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
942
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
943
|
+
*/
|
|
944
|
+
clearPersistentCache(): Promise<void>;
|
|
945
|
+
/**
|
|
946
|
+
* Clears adaptive classifier state between pages/documents.
|
|
947
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
948
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
949
|
+
*/
|
|
950
|
+
clearAdaptiveClassifier(): Promise<void>;
|
|
951
|
+
/**
|
|
952
|
+
* Get a copy of the internal thresholded image from Tesseract.
|
|
953
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
954
|
+
* @throws {TesseractRuntimeError} If no thresholded image is available.
|
|
955
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
956
|
+
*/
|
|
957
|
+
getThresholdedImage(): Promise<Buffer<ArrayBuffer>>;
|
|
958
|
+
/**
|
|
959
|
+
* Returns the scale factor for thresholded/component images.
|
|
960
|
+
* May return `0` if no thresholder is active.
|
|
961
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
962
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
963
|
+
*/
|
|
964
|
+
getThresholdedImageScaleFactor(): Promise<number>;
|
|
965
|
+
/**
|
|
966
|
+
* Initialize the engine with the given options.
|
|
967
|
+
* @param {TesseractInitOptions} options Initialization options.
|
|
968
|
+
* @throws {TesseractArgumentError} If option types are invalid.
|
|
969
|
+
* @throws {TesseractRangeError} If `options.oem` is out of range.
|
|
970
|
+
* @throws {TesseractRuntimeError} If native init fails.
|
|
971
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
972
|
+
*/
|
|
973
|
+
init(options: TesseractInitOptions): Promise<void>;
|
|
974
|
+
/**
|
|
975
|
+
* Initialize the engine for page analysis only.
|
|
976
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
977
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
978
|
+
*/
|
|
979
|
+
initForAnalysePage(): Promise<void>;
|
|
980
|
+
/**
|
|
981
|
+
* Run page layout analysis.
|
|
982
|
+
* @param {boolean} mergeSimilarWords Whether to merge similar words during analysis.
|
|
983
|
+
* @throws {TesseractArgumentError} If `mergeSimilarWords` is not a boolean.
|
|
984
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
985
|
+
* @throws {TesseractRuntimeError} If analysis fails or returns null.
|
|
986
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
987
|
+
*/
|
|
988
|
+
analyseLayout(mergeSimilarWords: boolean): Promise<void>;
|
|
989
|
+
/**
|
|
990
|
+
* Starts a multipage processing session.
|
|
991
|
+
* @deprecated use `document.begin()`
|
|
992
|
+
* @throws {TesseractArgumentError} If options are missing/invalid.
|
|
993
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
994
|
+
* @throws {TesseractRuntimeError} If session already exists or renderer setup fails.
|
|
995
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
996
|
+
*/
|
|
997
|
+
beginProcessPages(options: TesseractBeginProcessPagesOptions): Promise<void>;
|
|
998
|
+
/**
|
|
999
|
+
* Adds one encoded page to the current multipage session.
|
|
1000
|
+
* @deprecated use `document.addPage()`
|
|
1001
|
+
* @throws {TesseractArgumentError} If `buffer` is not a non-empty Buffer.
|
|
1002
|
+
* @throws {TesseractArgumentError} If `filename` is provided but is not a string.
|
|
1003
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1004
|
+
* @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
|
|
1005
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1006
|
+
*/
|
|
1007
|
+
addProcessPage(buffer: Buffer<ArrayBuffer>, filename?: string): Promise<void>;
|
|
1008
|
+
/**
|
|
1009
|
+
* Finalizes the current multipage session and returns the output PDF path.
|
|
1010
|
+
* @deprecated use `document.finish()`
|
|
1011
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1012
|
+
* @throws {TesseractRuntimeError} If no session is active or finalization fails.
|
|
1013
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1014
|
+
*/
|
|
1015
|
+
finishProcessPages(): Promise<string>;
|
|
1016
|
+
/**
|
|
1017
|
+
* Aborts the active multipage session and resets related state.
|
|
1018
|
+
* @deprecated use `document.abort()`
|
|
1019
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1020
|
+
*/
|
|
1021
|
+
abortProcessPages(): Promise<void>;
|
|
1022
|
+
/**
|
|
1023
|
+
* Returns the current multipage processing status.
|
|
1024
|
+
* @deprecated use `document.status()`
|
|
1025
|
+
* @throws {TesseractArgumentError} If called with unexpected arguments.
|
|
1026
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1027
|
+
*/
|
|
1028
|
+
getProcessPagesStatus(): Promise<TesseractProcessPagesStatus>;
|
|
1029
|
+
/**
|
|
1030
|
+
* Sets a debug configuration variable.
|
|
1031
|
+
* @param {DebugConfigurationVariableNames} name Debug variable name.
|
|
1032
|
+
* @param {DebugOnlyConfigurationVariables[DebugConfigurationVariableNames]} value Debug variable value.
|
|
1033
|
+
* @returns `false` if lookup/set failed.
|
|
1034
|
+
* @throws {TesseractArgumentError} If `name`/`value` are invalid types.
|
|
1035
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1036
|
+
* @throws {TesseractRuntimeError} If `name`/`value` are empty.
|
|
1037
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1038
|
+
*/
|
|
1039
|
+
setDebugVariable<Name extends DebugConfigurationVariableNames>(name: Name, value: DebugOnlyConfigurationVariables[Name]): Promise<boolean>;
|
|
1040
|
+
/**
|
|
1041
|
+
* Set a configuration variable.
|
|
1042
|
+
* @param {SetConfigurationVariableNames} name Variable name.
|
|
1043
|
+
* @param {SetVariableConfigVariables[SetConfigurationVariableNames]} value Variable value.
|
|
1044
|
+
* @returns `false` if lookup/set failed.
|
|
1045
|
+
* @throws {TesseractArgumentError} If `name`/`value` are invalid types.
|
|
1046
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1047
|
+
* @throws {TesseractRuntimeError} If `name`/`value` are empty.
|
|
1048
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1049
|
+
*/
|
|
1050
|
+
setVariable<Name extends SetConfigurationVariableNames>(name: Name, value: SetVariableConfigVariables[Name]): Promise<boolean>;
|
|
1051
|
+
/**
|
|
1052
|
+
* Get a configuration variable as integer.
|
|
1053
|
+
* @param {SetNumberConfigurationVariableNames} name Numeric variable name.
|
|
1054
|
+
* @throws {TesseractArgumentError} If `name` has an invalid type.
|
|
1055
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1056
|
+
* @throws {TesseractRuntimeError} If variable was not found.
|
|
1057
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1058
|
+
*/
|
|
1059
|
+
getIntVariable(name: SetNumberConfigurationVariableNames): Promise<number>;
|
|
1060
|
+
/**
|
|
1061
|
+
* Get a configuration variable as boolean (0/1).
|
|
1062
|
+
* @param {SetBoolConfigurationVariableNames} name Boolean variable name.
|
|
1063
|
+
* @throws {TesseractArgumentError} If `name` has an invalid type.
|
|
1064
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1065
|
+
* @throws {TesseractRuntimeError} If variable was not found.
|
|
1066
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1067
|
+
*/
|
|
1068
|
+
getBoolVariable(name: SetBoolConfigurationVariableNames): Promise<number>;
|
|
1069
|
+
/**
|
|
1070
|
+
* Get a configuration variable as double.
|
|
1071
|
+
* @param {SetNumberConfigurationVariableNames} name Numeric variable name.
|
|
1072
|
+
* @throws {TesseractArgumentError} If `name` has an invalid type.
|
|
1073
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1074
|
+
* @throws {TesseractRuntimeError} If variable was not found.
|
|
1075
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1076
|
+
*/
|
|
1077
|
+
getDoubleVariable(name: SetNumberConfigurationVariableNames): Promise<number>;
|
|
1078
|
+
/**
|
|
1079
|
+
* Get a configuration variable as string.
|
|
1080
|
+
* @param {SetStringConfigurationVariableNames} name String variable name.
|
|
1081
|
+
* @throws {TesseractArgumentError} If `name` has an invalid type.
|
|
1082
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1083
|
+
* @throws {TesseractRuntimeError} If variable was not found.
|
|
1084
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1085
|
+
*/
|
|
1086
|
+
getStringVariable(name: SetStringConfigurationVariableNames): Promise<string>;
|
|
1087
|
+
/**
|
|
1088
|
+
* Set the image to be recognized.
|
|
1089
|
+
* @param {Buffer<ArrayBuffer>} buffer Image data buffer.
|
|
1090
|
+
* @throws {TesseractArgumentError} If `buffer` is not a non-empty Buffer.
|
|
1091
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1092
|
+
* @throws {TesseractRuntimeError} If decoding fails or decoded data is invalid.
|
|
1093
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1094
|
+
*/
|
|
1095
|
+
setImage(buffer: Buffer<ArrayBuffer>): Promise<void>;
|
|
1096
|
+
/**
|
|
1097
|
+
* Set the page segmentation mode (PSM).
|
|
1098
|
+
* @param {PageSegmentationMode} psm Page segmentation mode.
|
|
1099
|
+
* @throws {TesseractArgumentError} If `psm` is not a number.
|
|
1100
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1101
|
+
* @throws {TesseractRangeError} If `psm` is outside valid mode range.
|
|
1102
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1103
|
+
*/
|
|
1104
|
+
setPageMode(psm: PageSegmentationMode): Promise<void>;
|
|
1105
|
+
/**
|
|
1106
|
+
* Restrict recognition to a rectangle.
|
|
1107
|
+
* @param {TesseractSetRectangleOptions} options Rectangle options.
|
|
1108
|
+
* @throws {TesseractArgumentError} If rectangle options are missing/invalid.
|
|
1109
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1110
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1111
|
+
*/
|
|
1112
|
+
setRectangle(options: TesseractSetRectangleOptions): Promise<void>;
|
|
1113
|
+
/**
|
|
1114
|
+
* Set the source resolution in PPI.
|
|
1115
|
+
* @param {number} ppi Source resolution in PPI.
|
|
1116
|
+
* @throws {TesseractArgumentError} If `ppi` is missing or not a number.
|
|
1117
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1118
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1119
|
+
*/
|
|
1120
|
+
setSourceResolution(ppi: number): Promise<void>;
|
|
1121
|
+
/**
|
|
1122
|
+
* Runs OCR recognition.
|
|
1123
|
+
* @param {(info: ProgressChangedInfo) => void} progressCallback Optional progress callback.
|
|
1124
|
+
* @throws {TesseractArgumentError} If `progressCallback` is provided but not a function.
|
|
1125
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1126
|
+
* @throws {TesseractRuntimeError} If native recognition fails.
|
|
1127
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1128
|
+
*/
|
|
1129
|
+
recognize(progressCallback?: (info: ProgressChangedInfo) => void): Promise<void>;
|
|
1130
|
+
/**
|
|
1131
|
+
* Detect orientation and script (OSD).
|
|
1132
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1133
|
+
* @throws {TesseractRuntimeError} If OSD detection fails.
|
|
1134
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1135
|
+
*/
|
|
1136
|
+
detectOrientationScript(): Promise<DetectOrientationScriptResult>;
|
|
1137
|
+
/**
|
|
1138
|
+
* Returns mean text confidence.
|
|
1139
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1140
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1141
|
+
*/
|
|
1142
|
+
meanTextConf(): Promise<number>;
|
|
1143
|
+
/**
|
|
1144
|
+
* Returns all word confidences.
|
|
1145
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1146
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1147
|
+
*/
|
|
1148
|
+
allWordConfidences(): Promise<number[]>;
|
|
1149
|
+
/**
|
|
1150
|
+
* Make an XML-formatted string with PAGE markup from the internal data structures.
|
|
1151
|
+
* @param {(info: ProgressChangedInfo) => void} progressCallback callback to monitor the progress
|
|
1152
|
+
* @param {number} pageNumber pageNumber is a 0-based page index
|
|
1153
|
+
* @throws {TesseractArgumentError} If callback/page number types are invalid.
|
|
1154
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1155
|
+
* @throws {TesseractRuntimeError} If PAGE generation fails or returns null.
|
|
1156
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1157
|
+
*/
|
|
1158
|
+
getPAGEText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
|
|
1159
|
+
/**
|
|
1160
|
+
* Make a box file for LSTM training from the internal data structures.
|
|
1161
|
+
* Constructs coordinates in the original image - not just the rectangle.
|
|
1162
|
+
* @param {number} pageNumber pageNumber is a 0-based page index that will appear in the box file.
|
|
1163
|
+
* @throws {TesseractArgumentError} If `pageNumber` has invalid type.
|
|
1164
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1165
|
+
* @throws {TesseractRuntimeError} If LSTM box text generation returns null.
|
|
1166
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1167
|
+
*/
|
|
1168
|
+
getLSTMBoxText(pageNumber?: number): Promise<string>;
|
|
1169
|
+
/**
|
|
1170
|
+
* The recognized text is returned as a string which is coded in the same format as a box file used in training.
|
|
1171
|
+
* Constructs coordinates in the original image - not just the rectangle.
|
|
1172
|
+
* @param {number} pageNumber page_number is a 0-based page index that will appear in the box file.
|
|
1173
|
+
* @throws {TesseractArgumentError} If `pageNumber` has invalid type.
|
|
1174
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1175
|
+
* @throws {TesseractRuntimeError} If box text generation returns null.
|
|
1176
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1177
|
+
*/
|
|
1178
|
+
getBoxText(pageNumber?: number): Promise<string>;
|
|
1179
|
+
/**
|
|
1180
|
+
* The recognized text is returned as a string which is coded in the same format as a WordStr box file used in training.
|
|
1181
|
+
* @param {number} pageNumber pageNumber is a 0-based page index that will appear in the box file.
|
|
1182
|
+
* @throws {TesseractArgumentError} If `pageNumber` has invalid type.
|
|
1183
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1184
|
+
* @throws {TesseractRuntimeError} If WordStr box generation returns null.
|
|
1185
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1186
|
+
*/
|
|
1187
|
+
getWordStrBoxText(pageNumber?: number): Promise<string>;
|
|
1188
|
+
/**
|
|
1189
|
+
* The recognized text is returned as a string which is coded as UTF8
|
|
1190
|
+
* @param {number} pageNumber pageNumber is a 0-based page index that will appear in the osd file.
|
|
1191
|
+
* @throws {TesseractArgumentError} If `pageNumber` has invalid type.
|
|
1192
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1193
|
+
* @throws {TesseractRuntimeError} If OSD text generation returns null.
|
|
1194
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1195
|
+
*/
|
|
1196
|
+
getOSDText(pageNumber?: number): Promise<string>;
|
|
1197
|
+
/**
|
|
1198
|
+
* Get recognized text as UTF-8.
|
|
1199
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1200
|
+
* @throws {TesseractRuntimeError} If UTF-8 extraction returns null.
|
|
1201
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1202
|
+
*/
|
|
1203
|
+
getUTF8Text(): Promise<string>;
|
|
1204
|
+
/**
|
|
1205
|
+
* Get hOCR output.
|
|
1206
|
+
* @param {Function} progressCallback Optional progress callback.
|
|
1207
|
+
* @param {number} pageNumber Optional page number (0-based).
|
|
1208
|
+
* @throws {TesseractArgumentError} If callback/page number types are invalid.
|
|
1209
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1210
|
+
* @throws {TesseractRuntimeError} If hOCR generation returns null.
|
|
1211
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1212
|
+
*/
|
|
1213
|
+
getHOCRText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
|
|
1214
|
+
/**
|
|
1215
|
+
* Get TSV output.
|
|
1216
|
+
* @param {number} pageNumber Optional page number (0-based).
|
|
1217
|
+
* @throws {TesseractArgumentError} If `pageNumber` has invalid type.
|
|
1218
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1219
|
+
* @throws {TesseractRuntimeError} If TSV generation returns null.
|
|
1220
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1221
|
+
*/
|
|
1222
|
+
getTSVText(pageNumber?: number): Promise<string>;
|
|
1223
|
+
/**
|
|
1224
|
+
* Get UNLV output.
|
|
1225
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1226
|
+
* @throws {TesseractRuntimeError} If UNLV generation returns null.
|
|
1227
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1228
|
+
*/
|
|
1229
|
+
getUNLVText(): Promise<string>;
|
|
1230
|
+
/**
|
|
1231
|
+
* Get ALTO XML output.
|
|
1232
|
+
* @param {number} pageNumber Optional page number (0-based).
|
|
1233
|
+
* @throws {TesseractArgumentError} If `pageNumber` has invalid type.
|
|
1234
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1235
|
+
* @throws {TesseractRuntimeError} If ALTO generation returns null.
|
|
1236
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1237
|
+
*/
|
|
1238
|
+
getALTOText(pageNumber?: number): Promise<string>;
|
|
1239
|
+
/**
|
|
1240
|
+
* Get languages used at initialization.
|
|
1241
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1242
|
+
* @throws {TesseractRuntimeError} If initialization languages are unavailable.
|
|
1243
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1244
|
+
*/
|
|
1245
|
+
getInitLanguages(): Promise<Language>;
|
|
1246
|
+
/**
|
|
1247
|
+
* Get languages currently loaded.
|
|
1248
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1249
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1250
|
+
*/
|
|
1251
|
+
getLoadedLanguages(): Promise<Language[]>;
|
|
1252
|
+
/**
|
|
1253
|
+
* Get available languages from tessdata.
|
|
1254
|
+
* NOTE: this only will return anything after `init` was called before with a valid selection of languages
|
|
1255
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1256
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1257
|
+
*/
|
|
1258
|
+
getAvailableLanguages(): Promise<Language[]>;
|
|
1259
|
+
/**
|
|
1260
|
+
* Clear internal recognition results/state.
|
|
1261
|
+
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1262
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1263
|
+
*/
|
|
1264
|
+
clear(): Promise<void>;
|
|
1265
|
+
/**
|
|
1266
|
+
* Release native resources and destroy the instance.
|
|
1267
|
+
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1268
|
+
*/
|
|
1269
|
+
end(): Promise<void>;
|
|
1270
|
+
}
|
|
1271
|
+
export type NativeTesseract = TesseractInstance;
|
|
1272
|
+
export type TesseractConstructor = new () => TesseractInstance;
|