@luii/node-tesseract-ocr 2.1.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -3
- package/README.md +461 -104
- package/binding-options.js +4 -0
- package/dist/cjs/index.cjs +21 -9
- package/dist/cjs/index.d.ts +4 -926
- package/dist/cjs/types.d.ts +1283 -0
- package/dist/cjs/types.js +17 -0
- package/dist/cjs/utils.js +15 -0
- package/dist/esm/index.d.ts +4 -926
- package/dist/esm/index.mjs +16 -9
- package/dist/esm/types.d.ts +1283 -0
- package/dist/esm/types.js +16 -0
- package/dist/esm/utils.js +15 -0
- package/package.json +6 -3
- package/prebuilds/node-tesseract-ocr-darwin-arm64/node-napi-v10.node +0 -0
- package/prebuilds/node-tesseract-ocr-linux-x64/node-napi-v10.node +0 -0
- package/src/commands.hpp +688 -88
- package/src/tesseract_wrapper.cpp +652 -187
- package/src/tesseract_wrapper.hpp +27 -2
- package/src/worker_thread.cpp +146 -2
- package/src/worker_thread.hpp +4 -1
package/dist/esm/index.d.ts
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import type { EnsureTrainedDataOptions, TesseractDocumentApi, TesseractConstructor, TesseractInitOptions, TrainingDataDownloadProgress } from "./types";
|
|
2
|
+
export type { ConfigurationVariables, DebugConfigurationVariableNames, DebugOnlyConfigurationVariables, DetectOrientationScriptResult, EnsureTrainedDataOptions, InitOnlyConfigurationVariables, ProgressChangedInfo, SetBoolConfigurationVariableNames, SetConfigurationVariableNames, SetNumberConfigurationVariableNames, SetStringConfigurationVariableNames, SetVariableConfigVariables, TesseractBeginProcessPagesOptions, TesseractConstructor, TesseractDocumentApi, TesseractInitOptions, TesseractInstance, TesseractProcessPagesStatus, TesseractSetRectangleOptions, TrainingDataDownloadProgress, } from "./types";
|
|
3
|
+
export type NativeTesseract = import("./types").TesseractInstance;
|
|
1
4
|
/**
|
|
2
5
|
* All available languages for tesseract
|
|
3
6
|
* @readonly
|
|
@@ -202,934 +205,9 @@ export declare const LogLevels: {
|
|
|
202
205
|
readonly OFF: "2147483647";
|
|
203
206
|
};
|
|
204
207
|
export type LogLevel = (typeof LogLevels)[keyof typeof LogLevels];
|
|
205
|
-
export type ConfigurationVariables = {
|
|
206
|
-
log_level: `${LogLevel}`;
|
|
207
|
-
textord_dotmatrix_gap: `${number}`;
|
|
208
|
-
textord_debug_block: `${0 | 1}`;
|
|
209
|
-
textord_pitch_range: `${number}`;
|
|
210
|
-
textord_words_veto_power: `${number}`;
|
|
211
|
-
textord_tabfind_show_strokewidths: `${0 | 1}`;
|
|
212
|
-
pitsync_linear_version: `${number}`;
|
|
213
|
-
oldbl_holed_losscount: `${number}`;
|
|
214
|
-
textord_skewsmooth_offset: `${number}`;
|
|
215
|
-
textord_skewsmooth_offset2: `${0 | 1}`;
|
|
216
|
-
textord_test_x: `${number}`;
|
|
217
|
-
textord_test_y: `${number}`;
|
|
218
|
-
textord_min_blobs_in_row: `${number}`;
|
|
219
|
-
textord_spline_minblobs: `${number}`;
|
|
220
|
-
textord_spline_medianwin: `${number}`;
|
|
221
|
-
textord_max_blob_overlaps: `${number}`;
|
|
222
|
-
textord_min_xheight: `${number}`;
|
|
223
|
-
textord_lms_line_trials: `${number}`;
|
|
224
|
-
textord_tabfind_show_images: `${0 | 1}`;
|
|
225
|
-
textord_fp_chop_error: `${number}`;
|
|
226
|
-
edges_max_children_per_outline: `${number}`;
|
|
227
|
-
edges_max_children_layers: `${number}`;
|
|
228
|
-
edges_children_per_grandchild: `${number}`;
|
|
229
|
-
edges_children_count_limit: `${number}`;
|
|
230
|
-
edges_min_nonhole: `${number}`;
|
|
231
|
-
edges_patharea_ratio: `${number}`;
|
|
232
|
-
devanagari_split_debuglevel: `${0 | 1}`;
|
|
233
|
-
textord_tabfind_show_partitions: `${0 | 1}`;
|
|
234
|
-
textord_debug_tabfind: `${0 | 1}`;
|
|
235
|
-
textord_debug_bugs: `${0 | 1}`;
|
|
236
|
-
textord_testregion_left: `${number}`;
|
|
237
|
-
textord_testregion_top: `${number}`;
|
|
238
|
-
textord_testregion_right: `${number}`;
|
|
239
|
-
textord_testregion_bottom: `${number}`;
|
|
240
|
-
classify_num_cp_levels: `${number}`;
|
|
241
|
-
editor_image_xpos: `${number}`;
|
|
242
|
-
editor_image_ypos: `${number}`;
|
|
243
|
-
editor_image_menuheight: `${number}`;
|
|
244
|
-
editor_image_blob_bb_color: `${number}`;
|
|
245
|
-
editor_word_ypos: `${number}`;
|
|
246
|
-
editor_word_width: `${number}`;
|
|
247
|
-
curl_timeout: `${0 | 1}`;
|
|
248
|
-
wordrec_display_all_blobs: `${0 | 1}`;
|
|
249
|
-
wordrec_blob_pause: `${0 | 1}`;
|
|
250
|
-
textord_force_make_prop_words: `${0 | 1}`;
|
|
251
|
-
textord_chopper_test: `${0 | 1}`;
|
|
252
|
-
textord_restore_underlines: `${0 | 1}`;
|
|
253
|
-
textord_show_initial_words: `${0 | 1}`;
|
|
254
|
-
textord_blocksall_fixed: `${0 | 1}`;
|
|
255
|
-
textord_blocksall_prop: `${0 | 1}`;
|
|
256
|
-
textord_pitch_scalebigwords: `${0 | 1}`;
|
|
257
|
-
textord_debug_pitch_test: `${0 | 1}`;
|
|
258
|
-
textord_disable_pitch_test: `${0 | 1}`;
|
|
259
|
-
textord_fast_pitch_test: `${0 | 1}`;
|
|
260
|
-
textord_debug_pitch_metric: `${0 | 1}`;
|
|
261
|
-
textord_show_row_cuts: `${0 | 1}`;
|
|
262
|
-
textord_show_page_cuts: `${0 | 1}`;
|
|
263
|
-
textord_blockndoc_fixed: `${0 | 1}`;
|
|
264
|
-
textord_show_tables: `${0 | 1}`;
|
|
265
|
-
textord_tablefind_show_mark: `${0 | 1}`;
|
|
266
|
-
textord_tablefind_show_stats: `${0 | 1}`;
|
|
267
|
-
textord_tablefind_recognize_tables: `${0 | 1}`;
|
|
268
|
-
textord_tabfind_show_initialtabs: `${0 | 1}`;
|
|
269
|
-
textord_tabfind_show_finaltabs: `${0 | 1}`;
|
|
270
|
-
textord_tabfind_only_strokewidths: `${0 | 1}`;
|
|
271
|
-
textord_really_old_xheight: `${0 | 1}`;
|
|
272
|
-
textord_oldbl_debug: `${0 | 1}`;
|
|
273
|
-
textord_debug_baselines: `${0 | 1}`;
|
|
274
|
-
textord_oldbl_paradef: `${0 | 1}`;
|
|
275
|
-
textord_oldbl_split_splines: `${0 | 1}`;
|
|
276
|
-
textord_oldbl_merge_parts: `${0 | 1}`;
|
|
277
|
-
oldbl_corrfix: `${0 | 1}`;
|
|
278
|
-
oldbl_xhfix: `${0 | 1}`;
|
|
279
|
-
textord_ocropus_mode: `${0 | 1}`;
|
|
280
|
-
textord_heavy_nr: `${0 | 1}`;
|
|
281
|
-
textord_show_initial_rows: `${0 | 1}`;
|
|
282
|
-
textord_show_parallel_rows: `${0 | 1}`;
|
|
283
|
-
textord_show_expanded_rows: `${0 | 1}`;
|
|
284
|
-
textord_show_final_rows: `${0 | 1}`;
|
|
285
|
-
textord_show_final_blobs: `${0 | 1}`;
|
|
286
|
-
textord_test_landscape: `${0 | 1}`;
|
|
287
|
-
textord_parallel_baselines: `${0 | 1}`;
|
|
288
|
-
textord_straight_baselines: `${0 | 1}`;
|
|
289
|
-
textord_old_baselines: `${0 | 1}`;
|
|
290
|
-
textord_old_xheight: `${0 | 1}`;
|
|
291
|
-
textord_fix_xheight_bug: `${0 | 1}`;
|
|
292
|
-
textord_fix_makerow_bug: `${0 | 1}`;
|
|
293
|
-
textord_debug_xheights: `${0 | 1}`;
|
|
294
|
-
textord_biased_skewcalc: `${0 | 1}`;
|
|
295
|
-
textord_interpolating_skew: `${0 | 1}`;
|
|
296
|
-
textord_new_initial_xheight: `${0 | 1}`;
|
|
297
|
-
textord_debug_blob: `${0 | 1}`;
|
|
298
|
-
gapmap_debug: `${0 | 1}`;
|
|
299
|
-
gapmap_use_ends: `${0 | 1}`;
|
|
300
|
-
gapmap_no_isolated_quanta: `${0 | 1}`;
|
|
301
|
-
edges_use_new_outline_complexity: `${0 | 1}`;
|
|
302
|
-
edges_debug: `${0 | 1}`;
|
|
303
|
-
edges_children_fix: `${0 | 1}`;
|
|
304
|
-
textord_show_fixed_cuts: `${0 | 1}`;
|
|
305
|
-
devanagari_split_debugimage: `${0 | 1}`;
|
|
306
|
-
textord_tabfind_show_initial_partitions: `${0 | 1}`;
|
|
307
|
-
textord_tabfind_show_reject_blobs: `${0 | 1}`;
|
|
308
|
-
textord_tabfind_show_columns: `${0 | 1}`;
|
|
309
|
-
textord_tabfind_show_blocks: `${0 | 1}`;
|
|
310
|
-
textord_tabfind_find_tables: `${0 | 1}`;
|
|
311
|
-
textord_space_size_is_variable: `${0 | 1}`;
|
|
312
|
-
textord_debug_printable: `${0 | 1}`;
|
|
313
|
-
wordrec_display_splits: `${0 | 1}`;
|
|
314
|
-
poly_debug: `${0 | 1}`;
|
|
315
|
-
poly_wide_objects_better: `${0 | 1}`;
|
|
316
|
-
equationdetect_save_bi_image: `${0 | 1}`;
|
|
317
|
-
equationdetect_save_spt_image: `${0 | 1}`;
|
|
318
|
-
equationdetect_save_seed_image: `${0 | 1}`;
|
|
319
|
-
equationdetect_save_merged_image: `${0 | 1}`;
|
|
320
|
-
debug_file: `${string}`;
|
|
321
|
-
editor_word_name: `${string}`;
|
|
322
|
-
dotproduct: `${string}`;
|
|
323
|
-
document_title: `${string}`;
|
|
324
|
-
curl_cookiefile: `${string}`;
|
|
325
|
-
classify_font_name: `${string}`;
|
|
326
|
-
textord_underline_offset: `${number}`;
|
|
327
|
-
textord_wordstats_smooth_factor: `${number}`;
|
|
328
|
-
textord_words_maxspace: `${number}`;
|
|
329
|
-
textord_words_default_maxspace: `${number}`;
|
|
330
|
-
textord_words_default_minspace: `${number}`;
|
|
331
|
-
textord_words_min_minspace: `${number}`;
|
|
332
|
-
textord_words_default_nonspace: `${number}`;
|
|
333
|
-
textord_words_initial_lower: `${number}`;
|
|
334
|
-
textord_words_initial_upper: `${number}`;
|
|
335
|
-
textord_words_minlarge: `${number}`;
|
|
336
|
-
textord_words_pitchsd_threshold: `${number}`;
|
|
337
|
-
textord_words_def_fixed: `${number}`;
|
|
338
|
-
textord_words_def_prop: `${number}`;
|
|
339
|
-
textord_pitch_rowsimilarity: `${number}`;
|
|
340
|
-
words_initial_lower: `${number}`;
|
|
341
|
-
words_initial_upper: `${number}`;
|
|
342
|
-
words_default_prop_nonspace: `${number}`;
|
|
343
|
-
words_default_fixed_space: `${number}`;
|
|
344
|
-
words_default_fixed_limit: `${number}`;
|
|
345
|
-
textord_words_definite_spread: `${number}`;
|
|
346
|
-
textord_spacesize_ratioprop: `${number}`;
|
|
347
|
-
textord_fpiqr_ratio: `${number}`;
|
|
348
|
-
textord_max_pitch_iqr: `${number}`;
|
|
349
|
-
textord_projection_scale: `${number}`;
|
|
350
|
-
textord_balance_factor: `${0 | 1}`;
|
|
351
|
-
textord_tabvector_vertical_gap_fraction: `${number}`;
|
|
352
|
-
textord_tabvector_vertical_box_ratio: `${number}`;
|
|
353
|
-
pitsync_joined_edge: `${number}`;
|
|
354
|
-
pitsync_offset_freecut_fraction: `${number}`;
|
|
355
|
-
oldbl_xhfract: `${number}`;
|
|
356
|
-
oldbl_dot_error_size: `${number}`;
|
|
357
|
-
textord_oldbl_jumplimit: `${number}`;
|
|
358
|
-
textord_spline_shift_fraction: `${number}`;
|
|
359
|
-
textord_skew_ile: `${number}`;
|
|
360
|
-
textord_skew_lag: `${number}`;
|
|
361
|
-
textord_linespace_iqrlimit: `${number}`;
|
|
362
|
-
textord_width_limit: `${number}`;
|
|
363
|
-
textord_chop_width: `${number}`;
|
|
364
|
-
textord_expansion_factor: `${0 | 1}`;
|
|
365
|
-
textord_overlap_x: `${number}`;
|
|
366
|
-
textord_minxh: `${number}`;
|
|
367
|
-
textord_min_linesize: `${number}`;
|
|
368
|
-
textord_excess_blobsize: `${number}`;
|
|
369
|
-
textord_occupancy_threshold: `${number}`;
|
|
370
|
-
textord_underline_width: `${number}`;
|
|
371
|
-
textord_min_blob_height_fraction: `${number}`;
|
|
372
|
-
textord_xheight_mode_fraction: `${number}`;
|
|
373
|
-
textord_ascheight_mode_fraction: `${number}`;
|
|
374
|
-
textord_descheight_mode_fraction: `${number}`;
|
|
375
|
-
textord_ascx_ratio_min: `${number}`;
|
|
376
|
-
textord_ascx_ratio_max: `${number}`;
|
|
377
|
-
textord_descx_ratio_min: `${number}`;
|
|
378
|
-
textord_descx_ratio_max: `${number}`;
|
|
379
|
-
textord_xheight_error_margin: `${number}`;
|
|
380
|
-
gapmap_big_gaps: `${number}`;
|
|
381
|
-
edges_childarea: `${number}`;
|
|
382
|
-
edges_boxarea: `${number}`;
|
|
383
|
-
textord_underline_threshold: `${number}`;
|
|
384
|
-
classify_pico_feature_length: `${number}`;
|
|
385
|
-
classify_norm_adj_midpoint: `${number}`;
|
|
386
|
-
classify_norm_adj_curl: `${number}`;
|
|
387
|
-
classify_min_slope: `${number}`;
|
|
388
|
-
classify_max_slope: `${number}`;
|
|
389
|
-
classify_cp_angle_pad_loose: `${number}`;
|
|
390
|
-
classify_cp_angle_pad_medium: `${number}`;
|
|
391
|
-
classify_cp_angle_pad_tight: `${number}`;
|
|
392
|
-
classify_cp_end_pad_loose: `${number}`;
|
|
393
|
-
classify_cp_end_pad_medium: `${number}`;
|
|
394
|
-
classify_cp_end_pad_tight: `${number}`;
|
|
395
|
-
classify_cp_side_pad_loose: `${number}`;
|
|
396
|
-
classify_cp_side_pad_medium: `${number}`;
|
|
397
|
-
classify_cp_side_pad_tight: `${number}`;
|
|
398
|
-
classify_pp_angle_pad: `${number}`;
|
|
399
|
-
classify_pp_end_pad: `${number}`;
|
|
400
|
-
classify_pp_side_pad: `${number}`;
|
|
401
|
-
ambigs_debug_level: `${0 | 1}`;
|
|
402
|
-
classify_debug_level: `${0 | 1}`;
|
|
403
|
-
classify_norm_method: `${0 | 1}`;
|
|
404
|
-
matcher_debug_level: `${0 | 1}`;
|
|
405
|
-
matcher_debug_flags: `${0 | 1}`;
|
|
406
|
-
classify_learning_debug_level: `${0 | 1}`;
|
|
407
|
-
matcher_permanent_classes_min: `${0 | 1}`;
|
|
408
|
-
matcher_min_examples_for_prototyping: `${number}`;
|
|
409
|
-
matcher_sufficient_examples_for_prototyping: `${number}`;
|
|
410
|
-
classify_adapt_proto_threshold: `${number}`;
|
|
411
|
-
classify_adapt_feature_threshold: `${number}`;
|
|
412
|
-
classify_class_pruner_threshold: `${number}`;
|
|
413
|
-
classify_class_pruner_multiplier: `${number}`;
|
|
414
|
-
classify_cp_cutoff_strength: `${number}`;
|
|
415
|
-
classify_integer_matcher_multiplier: `${number}`;
|
|
416
|
-
dawg_debug_level: `${0 | 1}`;
|
|
417
|
-
hyphen_debug_level: `${0 | 1}`;
|
|
418
|
-
stopper_smallword_size: `${number}`;
|
|
419
|
-
stopper_debug_level: `${0 | 1}`;
|
|
420
|
-
tessedit_truncate_wordchoice_log: `${number}`;
|
|
421
|
-
max_permuter_attempts: `${number}`;
|
|
422
|
-
repair_unchopped_blobs: `${0 | 1}`;
|
|
423
|
-
chop_debug: `${0 | 1}`;
|
|
424
|
-
chop_split_length: `${number}`;
|
|
425
|
-
chop_same_distance: `${number}`;
|
|
426
|
-
chop_min_outline_points: `${number}`;
|
|
427
|
-
chop_seam_pile_size: `${number}`;
|
|
428
|
-
chop_inside_angle: `${number}`;
|
|
429
|
-
chop_min_outline_area: `${number}`;
|
|
430
|
-
chop_centered_maxwidth: `${number}`;
|
|
431
|
-
chop_x_y_weight: `${number}`;
|
|
432
|
-
wordrec_debug_level: `${0 | 1}`;
|
|
433
|
-
wordrec_max_join_chunks: `${number}`;
|
|
434
|
-
segsearch_debug_level: `${0 | 1}`;
|
|
435
|
-
segsearch_max_pain_points: `${number}`;
|
|
436
|
-
segsearch_max_futile_classifications: `${number}`;
|
|
437
|
-
language_model_debug_level: `${0 | 1}`;
|
|
438
|
-
language_model_ngram_order: `${number}`;
|
|
439
|
-
language_model_viterbi_list_max_num_prunable: `${number}`;
|
|
440
|
-
language_model_viterbi_list_max_size: `${number}`;
|
|
441
|
-
language_model_min_compound_length: `${number}`;
|
|
442
|
-
wordrec_display_segmentations: `${0 | 1}`;
|
|
443
|
-
tessedit_pageseg_mode: `${number}`;
|
|
444
|
-
thresholding_method: `${0 | 1}`;
|
|
445
|
-
tessedit_ocr_engine_mode: `${number}`;
|
|
446
|
-
pageseg_devanagari_split_strategy: `${0 | 1}`;
|
|
447
|
-
ocr_devanagari_split_strategy: `${0 | 1}`;
|
|
448
|
-
bidi_debug: `${0 | 1}`;
|
|
449
|
-
applybox_debug: `${0 | 1}`;
|
|
450
|
-
applybox_page: `${0 | 1}`;
|
|
451
|
-
tessedit_font_id: `${0 | 1}`;
|
|
452
|
-
tessedit_bigram_debug: `${0 | 1}`;
|
|
453
|
-
debug_noise_removal: `${0 | 1}`;
|
|
454
|
-
noise_maxperblob: `${number}`;
|
|
455
|
-
noise_maxperword: `${number}`;
|
|
456
|
-
debug_x_ht_level: `${0 | 1}`;
|
|
457
|
-
quality_min_initial_alphas_reqd: `${number}`;
|
|
458
|
-
tessedit_tess_adaption_mode: `${number}`;
|
|
459
|
-
multilang_debug_level: `${0 | 1}`;
|
|
460
|
-
paragraph_debug_level: `${0 | 1}`;
|
|
461
|
-
tessedit_preserve_min_wd_len: `${number}`;
|
|
462
|
-
crunch_rating_max: `${number}`;
|
|
463
|
-
crunch_pot_indicators: `${0 | 1}`;
|
|
464
|
-
crunch_leave_lc_strings: `${number}`;
|
|
465
|
-
crunch_leave_uc_strings: `${number}`;
|
|
466
|
-
crunch_long_repetitions: `${number}`;
|
|
467
|
-
crunch_debug: `${0 | 1}`;
|
|
468
|
-
fixsp_non_noise_limit: `${0 | 1}`;
|
|
469
|
-
fixsp_done_mode: `${0 | 1}`;
|
|
470
|
-
debug_fix_space_level: `${0 | 1}`;
|
|
471
|
-
x_ht_acceptance_tolerance: `${number}`;
|
|
472
|
-
x_ht_min_change: `${number}`;
|
|
473
|
-
superscript_debug: `${0 | 1}`;
|
|
474
|
-
page_xml_level: `${0 | 1}`;
|
|
475
|
-
jpg_quality: `${number}`;
|
|
476
|
-
user_defined_dpi: `${0 | 1}`;
|
|
477
|
-
min_characters_to_try: `${number}`;
|
|
478
|
-
suspect_level: `${number}`;
|
|
479
|
-
suspect_short_words: `${number}`;
|
|
480
|
-
tessedit_reject_mode: `${0 | 1}`;
|
|
481
|
-
tessedit_image_border: `${number}`;
|
|
482
|
-
min_sane_x_ht_pixels: `${number}`;
|
|
483
|
-
tessedit_page_number: `${number}`;
|
|
484
|
-
tessedit_parallelize: `${0 | 1}`;
|
|
485
|
-
lstm_choice_mode: `${0 | 1}`;
|
|
486
|
-
lstm_choice_iterations: `${number}`;
|
|
487
|
-
tosp_debug_level: `${0 | 1}`;
|
|
488
|
-
tosp_enough_space_samples_for_median: `${number}`;
|
|
489
|
-
tosp_redo_kern_limit: `${number}`;
|
|
490
|
-
tosp_few_samples: `${number}`;
|
|
491
|
-
tosp_short_row: `${number}`;
|
|
492
|
-
tosp_sanity_method: `${0 | 1}`;
|
|
493
|
-
textord_max_noise_size: `${number}`;
|
|
494
|
-
textord_baseline_debug: `${0 | 1}`;
|
|
495
|
-
textord_noise_sizefraction: `${number}`;
|
|
496
|
-
textord_noise_translimit: `${number}`;
|
|
497
|
-
textord_noise_sncount: `${0 | 1}`;
|
|
498
|
-
use_ambigs_for_adaption: `${0 | 1}`;
|
|
499
|
-
allow_blob_division: `${0 | 1}`;
|
|
500
|
-
prioritize_division: `${0 | 1}`;
|
|
501
|
-
classify_enable_learning: `${0 | 1}`;
|
|
502
|
-
tess_cn_matching: `${0 | 1}`;
|
|
503
|
-
tess_bn_matching: `${0 | 1}`;
|
|
504
|
-
classify_enable_adaptive_matcher: `${0 | 1}`;
|
|
505
|
-
classify_use_pre_adapted_templates: `${0 | 1}`;
|
|
506
|
-
classify_save_adapted_templates: `${0 | 1}`;
|
|
507
|
-
classify_enable_adaptive_debugger: `${0 | 1}`;
|
|
508
|
-
classify_nonlinear_norm: `${0 | 1}`;
|
|
509
|
-
disable_character_fragments: `${0 | 1}`;
|
|
510
|
-
classify_debug_character_fragments: `${0 | 1}`;
|
|
511
|
-
matcher_debug_separate_windows: `${0 | 1}`;
|
|
512
|
-
classify_bln_numeric_mode: `${0 | 1}`;
|
|
513
|
-
load_system_dawg: `${0 | 1}`;
|
|
514
|
-
load_freq_dawg: `${0 | 1}`;
|
|
515
|
-
load_unambig_dawg: `${0 | 1}`;
|
|
516
|
-
load_punc_dawg: `${0 | 1}`;
|
|
517
|
-
load_number_dawg: `${0 | 1}`;
|
|
518
|
-
load_bigram_dawg: `${0 | 1}`;
|
|
519
|
-
use_only_first_uft8_step: `${0 | 1}`;
|
|
520
|
-
stopper_no_acceptable_choices: `${0 | 1}`;
|
|
521
|
-
segment_nonalphabetic_script: `${0 | 1}`;
|
|
522
|
-
save_doc_words: `${0 | 1}`;
|
|
523
|
-
merge_fragments_in_matrix: `${0 | 1}`;
|
|
524
|
-
wordrec_enable_assoc: `${0 | 1}`;
|
|
525
|
-
force_word_assoc: `${0 | 1}`;
|
|
526
|
-
chop_enable: `${0 | 1}`;
|
|
527
|
-
chop_vertical_creep: `${0 | 1}`;
|
|
528
|
-
chop_new_seam_pile: `${0 | 1}`;
|
|
529
|
-
assume_fixed_pitch_char_segment: `${0 | 1}`;
|
|
530
|
-
wordrec_skip_no_truth_words: `${0 | 1}`;
|
|
531
|
-
wordrec_debug_blamer: `${0 | 1}`;
|
|
532
|
-
wordrec_run_blamer: `${0 | 1}`;
|
|
533
|
-
save_alt_choices: `${0 | 1}`;
|
|
534
|
-
language_model_ngram_on: `${0 | 1}`;
|
|
535
|
-
language_model_ngram_use_only_first_uft8_step: `${0 | 1}`;
|
|
536
|
-
language_model_ngram_space_delimited_language: `${0 | 1}`;
|
|
537
|
-
language_model_use_sigmoidal_certainty: `${0 | 1}`;
|
|
538
|
-
tessedit_resegment_from_boxes: `${0 | 1}`;
|
|
539
|
-
tessedit_resegment_from_line_boxes: `${0 | 1}`;
|
|
540
|
-
tessedit_train_from_boxes: `${0 | 1}`;
|
|
541
|
-
tessedit_make_boxes_from_boxes: `${0 | 1}`;
|
|
542
|
-
tessedit_train_line_recognizer: `${0 | 1}`;
|
|
543
|
-
tessedit_dump_pageseg_images: `${0 | 1}`;
|
|
544
|
-
tessedit_do_invert: `${0 | 1}`;
|
|
545
|
-
thresholding_debug: `${0 | 1}`;
|
|
546
|
-
tessedit_ambigs_training: `${0 | 1}`;
|
|
547
|
-
tessedit_adaption_debug: `${0 | 1}`;
|
|
548
|
-
applybox_learn_chars_and_char_frags_mode: `${0 | 1}`;
|
|
549
|
-
applybox_learn_ngrams_mode: `${0 | 1}`;
|
|
550
|
-
tessedit_display_outwords: `${0 | 1}`;
|
|
551
|
-
tessedit_dump_choices: `${0 | 1}`;
|
|
552
|
-
tessedit_timing_debug: `${0 | 1}`;
|
|
553
|
-
tessedit_fix_fuzzy_spaces: `${0 | 1}`;
|
|
554
|
-
tessedit_unrej_any_wd: `${0 | 1}`;
|
|
555
|
-
tessedit_fix_hyphens: `${0 | 1}`;
|
|
556
|
-
tessedit_enable_doc_dict: `${0 | 1}`;
|
|
557
|
-
tessedit_debug_fonts: `${0 | 1}`;
|
|
558
|
-
tessedit_debug_block_rejection: `${0 | 1}`;
|
|
559
|
-
tessedit_enable_bigram_correction: `${0 | 1}`;
|
|
560
|
-
tessedit_enable_dict_correction: `${0 | 1}`;
|
|
561
|
-
enable_noise_removal: `${0 | 1}`;
|
|
562
|
-
tessedit_minimal_rej_pass1: `${0 | 1}`;
|
|
563
|
-
tessedit_test_adaption: `${0 | 1}`;
|
|
564
|
-
test_pt: `${0 | 1}`;
|
|
565
|
-
paragraph_text_based: `${0 | 1}`;
|
|
566
|
-
lstm_use_matrix: `${0 | 1}`;
|
|
567
|
-
tessedit_good_quality_unrej: `${0 | 1}`;
|
|
568
|
-
tessedit_use_reject_spaces: `${0 | 1}`;
|
|
569
|
-
tessedit_preserve_blk_rej_perfect_wds: `${0 | 1}`;
|
|
570
|
-
tessedit_preserve_row_rej_perfect_wds: `${0 | 1}`;
|
|
571
|
-
tessedit_dont_blkrej_good_wds: `${0 | 1}`;
|
|
572
|
-
tessedit_dont_rowrej_good_wds: `${0 | 1}`;
|
|
573
|
-
tessedit_row_rej_good_docs: `${0 | 1}`;
|
|
574
|
-
tessedit_reject_bad_qual_wds: `${0 | 1}`;
|
|
575
|
-
tessedit_debug_doc_rejection: `${0 | 1}`;
|
|
576
|
-
tessedit_debug_quality_metrics: `${0 | 1}`;
|
|
577
|
-
bland_unrej: `${0 | 1}`;
|
|
578
|
-
unlv_tilde_crunching: `${0 | 1}`;
|
|
579
|
-
hocr_font_info: `${0 | 1}`;
|
|
580
|
-
hocr_char_boxes: `${0 | 1}`;
|
|
581
|
-
crunch_early_merge_tess_fails: `${0 | 1}`;
|
|
582
|
-
crunch_early_convert_bad_unlv_chs: `${0 | 1}`;
|
|
583
|
-
crunch_terrible_garbage: `${0 | 1}`;
|
|
584
|
-
crunch_leave_ok_strings: `${0 | 1}`;
|
|
585
|
-
crunch_accept_ok: `${0 | 1}`;
|
|
586
|
-
crunch_leave_accept_strings: `${0 | 1}`;
|
|
587
|
-
crunch_include_numerals: `${0 | 1}`;
|
|
588
|
-
tessedit_prefer_joined_punct: `${0 | 1}`;
|
|
589
|
-
tessedit_write_block_separators: `${0 | 1}`;
|
|
590
|
-
tessedit_write_rep_codes: `${0 | 1}`;
|
|
591
|
-
tessedit_write_unlv: `${0 | 1}`;
|
|
592
|
-
tessedit_create_txt: `${0 | 1}`;
|
|
593
|
-
tessedit_create_hocr: `${0 | 1}`;
|
|
594
|
-
tessedit_create_alto: `${0 | 1}`;
|
|
595
|
-
tessedit_create_page_xml: `${0 | 1}`;
|
|
596
|
-
page_xml_polygon: `${0 | 1}`;
|
|
597
|
-
tessedit_create_lstmbox: `${0 | 1}`;
|
|
598
|
-
tessedit_create_tsv: `${0 | 1}`;
|
|
599
|
-
tessedit_create_wordstrbox: `${0 | 1}`;
|
|
600
|
-
tessedit_create_pdf: `${0 | 1}`;
|
|
601
|
-
textonly_pdf: `${0 | 1}`;
|
|
602
|
-
suspect_constrain_1Il: `${0 | 1}`;
|
|
603
|
-
tessedit_minimal_rejection: `${0 | 1}`;
|
|
604
|
-
tessedit_zero_rejection: `${0 | 1}`;
|
|
605
|
-
tessedit_word_for_word: `${0 | 1}`;
|
|
606
|
-
tessedit_zero_kelvin_rejection: `${0 | 1}`;
|
|
607
|
-
tessedit_rejection_debug: `${0 | 1}`;
|
|
608
|
-
tessedit_flip_0O: `${0 | 1}`;
|
|
609
|
-
rej_trust_doc_dawg: `${0 | 1}`;
|
|
610
|
-
rej_1Il_use_dict_word: `${0 | 1}`;
|
|
611
|
-
rej_1Il_trust_permuter_type: `${0 | 1}`;
|
|
612
|
-
rej_use_tess_accepted: `${0 | 1}`;
|
|
613
|
-
rej_use_tess_blanks: `${0 | 1}`;
|
|
614
|
-
rej_use_good_perm: `${0 | 1}`;
|
|
615
|
-
rej_use_sensible_wd: `${0 | 1}`;
|
|
616
|
-
rej_alphas_in_number_perm: `${0 | 1}`;
|
|
617
|
-
tessedit_create_boxfile: `${0 | 1}`;
|
|
618
|
-
tessedit_write_images: `${0 | 1}`;
|
|
619
|
-
interactive_display_mode: `${0 | 1}`;
|
|
620
|
-
tessedit_override_permuter: `${0 | 1}`;
|
|
621
|
-
tessedit_use_primary_params_model: `${0 | 1}`;
|
|
622
|
-
textord_tabfind_show_vlines: `${0 | 1}`;
|
|
623
|
-
textord_use_cjk_fp_model: `${0 | 1}`;
|
|
624
|
-
poly_allow_detailed_fx: `${0 | 1}`;
|
|
625
|
-
tessedit_init_config_only: `${0 | 1}`;
|
|
626
|
-
textord_equation_detect: `${0 | 1}`;
|
|
627
|
-
textord_tabfind_vertical_text: `${0 | 1}`;
|
|
628
|
-
textord_tabfind_force_vertical_text: `${0 | 1}`;
|
|
629
|
-
preserve_interword_spaces: `${0 | 1}`;
|
|
630
|
-
pageseg_apply_music_mask: `${0 | 1}`;
|
|
631
|
-
textord_single_height_mode: `${0 | 1}`;
|
|
632
|
-
tosp_old_to_method: `${0 | 1}`;
|
|
633
|
-
tosp_old_to_constrain_sp_kn: `${0 | 1}`;
|
|
634
|
-
tosp_only_use_prop_rows: `${0 | 1}`;
|
|
635
|
-
tosp_force_wordbreak_on_punct: `${0 | 1}`;
|
|
636
|
-
tosp_use_pre_chopping: `${0 | 1}`;
|
|
637
|
-
tosp_old_to_bug_fix: `${0 | 1}`;
|
|
638
|
-
tosp_block_use_cert_spaces: `${0 | 1}`;
|
|
639
|
-
tosp_row_use_cert_spaces: `${0 | 1}`;
|
|
640
|
-
tosp_narrow_blobs_not_cert: `${0 | 1}`;
|
|
641
|
-
tosp_row_use_cert_spaces1: `${0 | 1}`;
|
|
642
|
-
tosp_recovery_isolated_row_stats: `${0 | 1}`;
|
|
643
|
-
tosp_only_small_gaps_for_kern: `${0 | 1}`;
|
|
644
|
-
tosp_all_flips_fuzzy: `${0 | 1}`;
|
|
645
|
-
tosp_fuzzy_limit_all: `${0 | 1}`;
|
|
646
|
-
tosp_stats_use_xht_gaps: `${0 | 1}`;
|
|
647
|
-
tosp_use_xht_gaps: `${0 | 1}`;
|
|
648
|
-
tosp_only_use_xht_gaps: `${0 | 1}`;
|
|
649
|
-
tosp_rule_9_test_punct: `${0 | 1}`;
|
|
650
|
-
tosp_flip_fuzz_kn_to_sp: `${0 | 1}`;
|
|
651
|
-
tosp_flip_fuzz_sp_to_kn: `${0 | 1}`;
|
|
652
|
-
tosp_improve_thresh: `${0 | 1}`;
|
|
653
|
-
textord_no_rejects: `${0 | 1}`;
|
|
654
|
-
textord_show_blobs: `${0 | 1}`;
|
|
655
|
-
textord_show_boxes: `${0 | 1}`;
|
|
656
|
-
textord_noise_rejwords: `${0 | 1}`;
|
|
657
|
-
textord_noise_rejrows: `${0 | 1}`;
|
|
658
|
-
textord_noise_debug: `${0 | 1}`;
|
|
659
|
-
classify_learn_debug_str: `${string}`;
|
|
660
|
-
user_words_file: `${string}`;
|
|
661
|
-
user_words_suffix: `${string}`;
|
|
662
|
-
user_patterns_file: `${string}`;
|
|
663
|
-
user_patterns_suffix: `${string}`;
|
|
664
|
-
output_ambig_words_file: `${string}`;
|
|
665
|
-
word_to_debug: `${string}`;
|
|
666
|
-
tessedit_char_blacklist: `${string}`;
|
|
667
|
-
tessedit_char_whitelist: `${string}`;
|
|
668
|
-
tessedit_char_unblacklist: `${string}`;
|
|
669
|
-
tessedit_write_params_to_file: `${string}`;
|
|
670
|
-
applybox_exposure_pattern: `${string}`;
|
|
671
|
-
chs_leading_punct: `${string}`;
|
|
672
|
-
chs_trailing_punct1: `${string}`;
|
|
673
|
-
chs_trailing_punct2: `${string}`;
|
|
674
|
-
outlines_odd: `${string}`;
|
|
675
|
-
outlines_2: `${string}`;
|
|
676
|
-
numeric_punctuation: `${string}`;
|
|
677
|
-
unrecognised_char: `${string}`;
|
|
678
|
-
ok_repeated_ch_non_alphanum_wds: `${string}`;
|
|
679
|
-
conflict_set_I_l_1: `${string}`;
|
|
680
|
-
file_type: `${string}`;
|
|
681
|
-
tessedit_load_sublangs: `${string}`;
|
|
682
|
-
page_separator: `${string}`;
|
|
683
|
-
classify_char_norm_range: `${number}`;
|
|
684
|
-
classify_max_rating_ratio: `${number}`;
|
|
685
|
-
classify_max_certainty_margin: `${number}`;
|
|
686
|
-
matcher_good_threshold: `${number}`;
|
|
687
|
-
matcher_reliable_adaptive_result: `${0 | 1}`;
|
|
688
|
-
matcher_perfect_threshold: `${number}`;
|
|
689
|
-
matcher_bad_match_pad: `${number}`;
|
|
690
|
-
matcher_rating_margin: `${number}`;
|
|
691
|
-
matcher_avg_noise_size: `${number}`;
|
|
692
|
-
matcher_clustering_max_angle_delta: `${number}`;
|
|
693
|
-
classify_misfit_junk_penalty: `${0 | 1}`;
|
|
694
|
-
rating_scale: `${number}`;
|
|
695
|
-
tessedit_class_miss_scale: `${number}`;
|
|
696
|
-
classify_adapted_pruning_factor: `${number}`;
|
|
697
|
-
classify_adapted_pruning_threshold: `${number}`;
|
|
698
|
-
classify_character_fragments_garbage_certainty_threshold: `${number}`;
|
|
699
|
-
speckle_large_max_size: `${number}`;
|
|
700
|
-
speckle_rating_penalty: `${number}`;
|
|
701
|
-
xheight_penalty_subscripts: `${number}`;
|
|
702
|
-
xheight_penalty_inconsistent: `${number}`;
|
|
703
|
-
segment_penalty_dict_frequent_word: `${0 | 1}`;
|
|
704
|
-
segment_penalty_dict_case_ok: `${number}`;
|
|
705
|
-
segment_penalty_dict_case_bad: `${number}`;
|
|
706
|
-
segment_penalty_dict_nonword: `${number}`;
|
|
707
|
-
segment_penalty_garbage: `${number}`;
|
|
708
|
-
certainty_scale: `${number}`;
|
|
709
|
-
stopper_nondict_certainty_base: `${number}`;
|
|
710
|
-
stopper_phase2_certainty_rejection_offset: `${0 | 1}`;
|
|
711
|
-
stopper_certainty_per_char: `${number}`;
|
|
712
|
-
stopper_allowable_character_badness: `${number}`;
|
|
713
|
-
doc_dict_pending_threshold: `${0 | 1}`;
|
|
714
|
-
doc_dict_certainty_threshold: `${number}`;
|
|
715
|
-
tessedit_certainty_threshold: `${number}`;
|
|
716
|
-
chop_split_dist_knob: `${number}`;
|
|
717
|
-
chop_overlap_knob: `${number}`;
|
|
718
|
-
chop_center_knob: `${number}`;
|
|
719
|
-
chop_sharpness_knob: `${number}`;
|
|
720
|
-
chop_width_change_knob: `${number}`;
|
|
721
|
-
chop_ok_split: `${number}`;
|
|
722
|
-
chop_good_split: `${number}`;
|
|
723
|
-
segsearch_max_char_wh_ratio: `${number}`;
|
|
724
|
-
language_model_ngram_small_prob: `${number}`;
|
|
725
|
-
language_model_ngram_nonmatch_score: `${number}`;
|
|
726
|
-
language_model_ngram_scale_factor: `${number}`;
|
|
727
|
-
language_model_ngram_rating_factor: `${number}`;
|
|
728
|
-
language_model_penalty_non_freq_dict_word: `${number}`;
|
|
729
|
-
language_model_penalty_non_dict_word: `${number}`;
|
|
730
|
-
language_model_penalty_punc: `${number}`;
|
|
731
|
-
language_model_penalty_case: `${number}`;
|
|
732
|
-
language_model_penalty_script: `${number}`;
|
|
733
|
-
language_model_penalty_chartype: `${number}`;
|
|
734
|
-
language_model_penalty_font: `${0 | 1}`;
|
|
735
|
-
language_model_penalty_spacing: `${number}`;
|
|
736
|
-
language_model_penalty_increment: `${number}`;
|
|
737
|
-
invert_threshold: `${number}`;
|
|
738
|
-
thresholding_window_size: `${number}`;
|
|
739
|
-
thresholding_kfactor: `${number}`;
|
|
740
|
-
thresholding_tile_size: `${number}`;
|
|
741
|
-
thresholding_smooth_kernel_size: `${0 | 1}`;
|
|
742
|
-
thresholding_score_fraction: `${number}`;
|
|
743
|
-
noise_cert_basechar: `${number}`;
|
|
744
|
-
noise_cert_disjoint: `${number}`;
|
|
745
|
-
noise_cert_punc: `${number}`;
|
|
746
|
-
noise_cert_factor: `${number}`;
|
|
747
|
-
quality_rej_pc: `${number}`;
|
|
748
|
-
quality_blob_pc: `${0 | 1}`;
|
|
749
|
-
quality_outline_pc: `${0 | 1}`;
|
|
750
|
-
quality_char_pc: `${number}`;
|
|
751
|
-
test_pt_x: `${number}`;
|
|
752
|
-
test_pt_y: `${number}`;
|
|
753
|
-
tessedit_reject_doc_percent: `${number}`;
|
|
754
|
-
tessedit_reject_block_percent: `${number}`;
|
|
755
|
-
tessedit_reject_row_percent: `${number}`;
|
|
756
|
-
tessedit_whole_wd_rej_row_percent: `${number}`;
|
|
757
|
-
tessedit_good_doc_still_rowrej_wd: `${number}`;
|
|
758
|
-
quality_rowrej_pc: `${number}`;
|
|
759
|
-
crunch_terrible_rating: `${number}`;
|
|
760
|
-
crunch_poor_garbage_cert: `${number}`;
|
|
761
|
-
crunch_poor_garbage_rate: `${number}`;
|
|
762
|
-
crunch_pot_poor_rate: `${number}`;
|
|
763
|
-
crunch_pot_poor_cert: `${number}`;
|
|
764
|
-
crunch_del_rating: `${number}`;
|
|
765
|
-
crunch_del_cert: `${number}`;
|
|
766
|
-
crunch_del_min_ht: `${number}`;
|
|
767
|
-
crunch_del_max_ht: `${number}`;
|
|
768
|
-
crunch_del_min_width: `${number}`;
|
|
769
|
-
crunch_del_high_word: `${number}`;
|
|
770
|
-
crunch_del_low_word: `${number}`;
|
|
771
|
-
crunch_small_outlines_size: `${number}`;
|
|
772
|
-
fixsp_small_outlines_size: `${number}`;
|
|
773
|
-
superscript_worse_certainty: `${number}`;
|
|
774
|
-
superscript_bettered_certainty: `${number}`;
|
|
775
|
-
superscript_scaledown_ratio: `${number}`;
|
|
776
|
-
subscript_max_y_top: `${number}`;
|
|
777
|
-
superscript_min_y_bottom: `${number}`;
|
|
778
|
-
suspect_rating_per_ch: `${number}`;
|
|
779
|
-
suspect_accept_rating: `${number}`;
|
|
780
|
-
tessedit_lower_flip_hyphen: `${number}`;
|
|
781
|
-
tessedit_upper_flip_hyphen: `${number}`;
|
|
782
|
-
rej_whole_of_mostly_reject_word_fract: `${number}`;
|
|
783
|
-
min_orientation_margin: `${number}`;
|
|
784
|
-
textord_tabfind_vertical_text_ratio: `${number}`;
|
|
785
|
-
textord_tabfind_aligned_gap_fraction: `${number}`;
|
|
786
|
-
lstm_rating_coefficient: `${number}`;
|
|
787
|
-
tosp_old_sp_kn_th_factor: `${number}`;
|
|
788
|
-
tosp_threshold_bias1: `${0 | 1}`;
|
|
789
|
-
tosp_threshold_bias2: `${0 | 1}`;
|
|
790
|
-
tosp_narrow_fraction: `${number}`;
|
|
791
|
-
tosp_narrow_aspect_ratio: `${number}`;
|
|
792
|
-
tosp_wide_fraction: `${number}`;
|
|
793
|
-
tosp_wide_aspect_ratio: `${0 | 1}`;
|
|
794
|
-
tosp_fuzzy_space_factor: `${number}`;
|
|
795
|
-
tosp_fuzzy_space_factor1: `${number}`;
|
|
796
|
-
tosp_fuzzy_space_factor2: `${number}`;
|
|
797
|
-
tosp_gap_factor: `${number}`;
|
|
798
|
-
tosp_kern_gap_factor1: `${number}`;
|
|
799
|
-
tosp_kern_gap_factor2: `${number}`;
|
|
800
|
-
tosp_kern_gap_factor3: `${number}`;
|
|
801
|
-
tosp_ignore_big_gaps: `${number}`;
|
|
802
|
-
tosp_ignore_very_big_gaps: `${number}`;
|
|
803
|
-
tosp_rep_space: `${number}`;
|
|
804
|
-
tosp_enough_small_gaps: `${number}`;
|
|
805
|
-
tosp_table_kn_sp_ratio: `${number}`;
|
|
806
|
-
tosp_table_xht_sp_ratio: `${number}`;
|
|
807
|
-
tosp_table_fuzzy_kn_sp_ratio: `${number}`;
|
|
808
|
-
tosp_fuzzy_kn_fraction: `${number}`;
|
|
809
|
-
tosp_fuzzy_sp_fraction: `${number}`;
|
|
810
|
-
tosp_min_sane_kn_sp: `${number}`;
|
|
811
|
-
tosp_init_guess_kn_mult: `${number}`;
|
|
812
|
-
tosp_init_guess_xht_mult: `${number}`;
|
|
813
|
-
tosp_max_sane_kn_thresh: `${number}`;
|
|
814
|
-
tosp_flip_caution: `${0 | 1}`;
|
|
815
|
-
tosp_large_kerning: `${number}`;
|
|
816
|
-
tosp_dont_fool_with_small_kerns: `${number}`;
|
|
817
|
-
tosp_near_lh_edge: `${0 | 1}`;
|
|
818
|
-
tosp_silly_kn_sp_gap: `${number}`;
|
|
819
|
-
tosp_pass_wide_fuzz_sp_to_context: `${number}`;
|
|
820
|
-
textord_noise_area_ratio: `${number}`;
|
|
821
|
-
textord_initialx_ile: `${number}`;
|
|
822
|
-
textord_initialasc_ile: `${number}`;
|
|
823
|
-
textord_noise_sizelimit: `${number}`;
|
|
824
|
-
textord_noise_normratio: `${number}`;
|
|
825
|
-
textord_noise_syfract: `${number}`;
|
|
826
|
-
textord_noise_sxfract: `${number}`;
|
|
827
|
-
textord_noise_hfract: `${number}`;
|
|
828
|
-
textord_noise_rowratio: `${number}`;
|
|
829
|
-
textord_blshift_maxshift: `${0 | 1}`;
|
|
830
|
-
textord_blshift_xfraction: `${number}`;
|
|
831
|
-
};
|
|
832
|
-
type InitOnlyConfigurationVariableNames = "ambigs_debug_level" | "language_model_ngram_on" | "language_model_use_sigmoidal_certainty" | "load_bigram_dawg" | "load_freq_dawg" | "load_number_dawg" | "load_punc_dawg" | "load_system_dawg" | "load_unambig_dawg" | "tessedit_init_config_only" | "tessedit_ocr_engine_mode" | "user_patterns_suffix" | "user_words_suffix";
|
|
833
|
-
export type InitOnlyConfigurationVariables = Pick<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
|
|
834
|
-
export type SetVariableConfigVariables = Omit<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
|
|
835
|
-
/**
|
|
836
|
-
* Tesseract init options
|
|
837
|
-
*/
|
|
838
|
-
export interface TesseractInitOptions {
|
|
839
|
-
/**
|
|
840
|
-
* Its generally safer to use as few languages as possible.
|
|
841
|
-
* The more languages Tesseract needs to load the longer it takes to recognize a image.
|
|
842
|
-
* The OSD Language will always be loaded to support orientation and script detection
|
|
843
|
-
* IMPORTANT: if you specify more than one language here (e.g.: `deu, eng` for example)
|
|
844
|
-
* tesseract will try to recognize german and english in the same image.
|
|
845
|
-
* Originally tesseract itself accepts it as `deu+eng`, but since this
|
|
846
|
-
* makes typing very hard to near impossible its safer to just accept a
|
|
847
|
-
* array with the languages it should look for.
|
|
848
|
-
* When talking about "hard typing/impossible typing" its because typescript
|
|
849
|
-
* itself cannot create recursive types, and chaining template types
|
|
850
|
-
* (e.g.: `${Language}+${Language}+...`) stretches out the compilation time
|
|
851
|
-
* to a unacceptable amount
|
|
852
|
-
*
|
|
853
|
-
* @default [Language.osd]
|
|
854
|
-
*/
|
|
855
|
-
langs?: Language[];
|
|
856
|
-
/**
|
|
857
|
-
* Specify where the trainingdata is located
|
|
858
|
-
* Besides the datapath in general it is versioned to the
|
|
859
|
-
* version of tesseract
|
|
860
|
-
* @default '~/.cache/node-tesseract-ocr/'
|
|
861
|
-
*/
|
|
862
|
-
cachePath?: string;
|
|
863
|
-
/**
|
|
864
|
-
* Explicit datapath for traineddata. Takes precedence over
|
|
865
|
-
* the `TESSDATA_PREFIX` environment variable.
|
|
866
|
-
*/
|
|
867
|
-
dataPath?: string;
|
|
868
|
-
/**
|
|
869
|
-
* This will be called for every language that was specified in `lang`,
|
|
870
|
-
* it allows the user to be flexible about the training data's location
|
|
871
|
-
* Or if he needs to specify his own location for certain languages/custom languages
|
|
872
|
-
* IMPORTANT: Ensures that trainingdata will be downloaded from the following cdn
|
|
873
|
-
* in case they dont exist
|
|
874
|
-
* OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0_best_int
|
|
875
|
-
* NON OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0
|
|
876
|
-
* NOTE: Tesseract 5.x.x still uses the 4.x.x trainingdata
|
|
877
|
-
*
|
|
878
|
-
* @default true
|
|
879
|
-
*/
|
|
880
|
-
ensureTraineddata?: boolean;
|
|
881
|
-
/**
|
|
882
|
-
* Optional progress callback for traineddata downloads.
|
|
883
|
-
*/
|
|
884
|
-
progressCallback?: (info: TrainingDataDownloadProgress) => void;
|
|
885
|
-
/**
|
|
886
|
-
* OCR Engine Modes
|
|
887
|
-
* The engine mode cannot be changed after creating the instance
|
|
888
|
-
* If another mode is needed, its advised to create a new instance.
|
|
889
|
-
* @default OEM_DEFAULT
|
|
890
|
-
* @throws {Error} Will throw an error when oem mode is below 0 or over 3
|
|
891
|
-
*/
|
|
892
|
-
oem?: OcrEngineMode;
|
|
893
|
-
/**
|
|
894
|
-
* Controls if only non debug parameters will be set upon initialization
|
|
895
|
-
* @default false
|
|
896
|
-
*/
|
|
897
|
-
setOnlyNonDebugParams?: boolean;
|
|
898
|
-
/**
|
|
899
|
-
* Array of paths that point to their corresponding config files
|
|
900
|
-
* usually located in the `dataPath` location alongside the training data
|
|
901
|
-
*/
|
|
902
|
-
configs?: Array<string>;
|
|
903
|
-
/**
|
|
904
|
-
* Record of parameters that should be set upon initialization
|
|
905
|
-
* Consult the original documentation of tesseract on which variables
|
|
906
|
-
* can actually be set
|
|
907
|
-
*/
|
|
908
|
-
vars?: Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>;
|
|
909
|
-
}
|
|
910
|
-
export interface TrainingDataDownloadProgress {
|
|
911
|
-
lang: Language;
|
|
912
|
-
url: string;
|
|
913
|
-
downloadedBytes: number;
|
|
914
|
-
totalBytes?: number;
|
|
915
|
-
percent?: number;
|
|
916
|
-
}
|
|
917
|
-
export interface TesseractSetRectangleOptions {
|
|
918
|
-
top: number;
|
|
919
|
-
left: number;
|
|
920
|
-
width: number;
|
|
921
|
-
height: number;
|
|
922
|
-
}
|
|
923
|
-
export interface ProgressChangedInfo {
|
|
924
|
-
/**
|
|
925
|
-
* Chars in this buffer
|
|
926
|
-
*/
|
|
927
|
-
progress: number;
|
|
928
|
-
/**
|
|
929
|
-
* Percent complete increasing (0-100)
|
|
930
|
-
*/
|
|
931
|
-
percent: number;
|
|
932
|
-
/**
|
|
933
|
-
* States if the worker is still alive
|
|
934
|
-
*/
|
|
935
|
-
ocrAlive: number;
|
|
936
|
-
/**
|
|
937
|
-
* top coordinate of the bbox of the current element that tesseract is processing
|
|
938
|
-
*/
|
|
939
|
-
top: number;
|
|
940
|
-
/**
|
|
941
|
-
* right coordinate of the bbox of the current element that tesseract is processing
|
|
942
|
-
*/
|
|
943
|
-
right: number;
|
|
944
|
-
/**
|
|
945
|
-
* bottom coordinate of the bbox of the current element that tesseract is processing
|
|
946
|
-
*/
|
|
947
|
-
bottom: number;
|
|
948
|
-
/**
|
|
949
|
-
* left coordinate of the bbox of the current element that tesseract is processing
|
|
950
|
-
*/
|
|
951
|
-
left: number;
|
|
952
|
-
}
|
|
953
|
-
export interface DetectOrientationScriptResult {
|
|
954
|
-
/**
|
|
955
|
-
* Orientation of the source image in degrees
|
|
956
|
-
* Orientation refers to the way the source is rotated, **not** how the text is
|
|
957
|
-
* aligned. It ranges from 0° to 360° degrees.
|
|
958
|
-
* @type {number}
|
|
959
|
-
*/
|
|
960
|
-
orientationDegrees: number;
|
|
961
|
-
/**
|
|
962
|
-
* The confidence of tesseract for the orientation
|
|
963
|
-
* @type {number}
|
|
964
|
-
*/
|
|
965
|
-
orientationConfidence: number;
|
|
966
|
-
/**
|
|
967
|
-
* The name of the script that is used in the source image
|
|
968
|
-
* @type {string}
|
|
969
|
-
*/
|
|
970
|
-
scriptName: string;
|
|
971
|
-
/**
|
|
972
|
-
* The confidence of tesseract about the detected script of the source image
|
|
973
|
-
* @type {number}
|
|
974
|
-
*/
|
|
975
|
-
scriptConfidence: number;
|
|
976
|
-
}
|
|
977
|
-
export type EnsureTrainedDataOptions = {
|
|
978
|
-
lang: Language;
|
|
979
|
-
cachePath: string;
|
|
980
|
-
dataPath: string;
|
|
981
|
-
downloadBaseUrl: string;
|
|
982
|
-
progressCallback?: (info: TrainingDataDownloadProgress) => void;
|
|
983
|
-
};
|
|
984
|
-
export interface TesseractInstance {
|
|
985
|
-
/**
|
|
986
|
-
* Initialize the engine with the given options.
|
|
987
|
-
* @param {TesseractInitOptions} options Initialization options (languages, datapath, engine mode, etc.).
|
|
988
|
-
* @returns {Promise<void>}
|
|
989
|
-
*/
|
|
990
|
-
init(options: TesseractInitOptions): Promise<void>;
|
|
991
|
-
/**
|
|
992
|
-
* Initialize the engine for page analysis only.
|
|
993
|
-
* @returns {Promise<void>}
|
|
994
|
-
*/
|
|
995
|
-
initForAnalysePage(): Promise<void>;
|
|
996
|
-
/**
|
|
997
|
-
* Run page layout analysis.
|
|
998
|
-
* @param {boolean} mergeSimilarWords Whether to merge similar words during analysis.
|
|
999
|
-
* @returns {Promise<void>}
|
|
1000
|
-
*/
|
|
1001
|
-
analysePage(mergeSimilarWords: boolean): Promise<void>;
|
|
1002
|
-
/**
|
|
1003
|
-
* Set the page segmentation mode (PSM).
|
|
1004
|
-
* @param {PageSegmentationMode} psm Page segmentation mode.
|
|
1005
|
-
* @returns {Promise<void>}
|
|
1006
|
-
*/
|
|
1007
|
-
setPageMode(psm: PageSegmentationMode): Promise<void>;
|
|
1008
|
-
/**
|
|
1009
|
-
* Set a configuration variable.
|
|
1010
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
1011
|
-
* @param {SetVariableConfigVariables[keyof SetVariableConfigVariables]} value Variable value.
|
|
1012
|
-
* @returns Returns `false` if the lookup failed.
|
|
1013
|
-
*/
|
|
1014
|
-
setVariable(name: keyof SetVariableConfigVariables, value: SetVariableConfigVariables[keyof SetVariableConfigVariables]): Promise<boolean>;
|
|
1015
|
-
/**
|
|
1016
|
-
* Get a configuration variable as integer.
|
|
1017
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
1018
|
-
* @returns {Promise<number>} Returns the value of the variable.
|
|
1019
|
-
*/
|
|
1020
|
-
getIntVariable(name: keyof SetVariableConfigVariables): Promise<number>;
|
|
1021
|
-
/**
|
|
1022
|
-
* Get a configuration variable as boolean (0/1).
|
|
1023
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
1024
|
-
* @returns {Promise<number>} Returns the value of the variable.
|
|
1025
|
-
*/
|
|
1026
|
-
getBoolVariable(name: keyof SetVariableConfigVariables): Promise<number>;
|
|
1027
|
-
/**
|
|
1028
|
-
* Get a configuration variable as double.
|
|
1029
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
1030
|
-
* @returns {Promise<number>} Returns the value of the variable.
|
|
1031
|
-
*/
|
|
1032
|
-
getDoubleVariable(name: keyof SetVariableConfigVariables): Promise<number>;
|
|
1033
|
-
/**
|
|
1034
|
-
* Get a configuration variable as string.
|
|
1035
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
1036
|
-
* @returns {Promise<string>} Returns the value of the variable.
|
|
1037
|
-
*/
|
|
1038
|
-
getStringVariable(name: keyof SetVariableConfigVariables): Promise<string>;
|
|
1039
|
-
/**
|
|
1040
|
-
* Set the image to be recognized.
|
|
1041
|
-
* @param {Buffer<ArrayBuffer>} buffer Image data buffer.
|
|
1042
|
-
* @returns {Promise<void>}
|
|
1043
|
-
*/
|
|
1044
|
-
setImage(buffer: Buffer<ArrayBuffer>): Promise<void>;
|
|
1045
|
-
/**
|
|
1046
|
-
* Restrict recognition to a rectangle.
|
|
1047
|
-
* @param {TesseractSetRectangleOptions} options Rectangle options.
|
|
1048
|
-
* @returns {Promise<void>}
|
|
1049
|
-
*/
|
|
1050
|
-
setRectangle(options: TesseractSetRectangleOptions): Promise<void>;
|
|
1051
|
-
/**
|
|
1052
|
-
* Set the source resolution in PPI.
|
|
1053
|
-
* @param {number} ppi Source resolution in PPI.
|
|
1054
|
-
* @returns {Promise<void>}
|
|
1055
|
-
*/
|
|
1056
|
-
setSourceResolution(ppi: number): Promise<void>;
|
|
1057
|
-
/**
|
|
1058
|
-
* @throws {Error} Will throw an error if the parameter at index 0 is not a function
|
|
1059
|
-
* @param {(info: ProgressChangedInfo) => void} progressCallback Callback will be called to inform the user about progress changes
|
|
1060
|
-
* @returns {Promise<void>}
|
|
1061
|
-
*/
|
|
1062
|
-
recognize(progressCallback: (info: ProgressChangedInfo) => void): Promise<void>;
|
|
1063
|
-
/**
|
|
1064
|
-
* Detect orientation and script (OSD).
|
|
1065
|
-
* @returns {Promise<DetectOrientationScriptResult>}
|
|
1066
|
-
*/
|
|
1067
|
-
detectOrientationScript(): Promise<DetectOrientationScriptResult>;
|
|
1068
|
-
/**
|
|
1069
|
-
* Get mean text confidence.
|
|
1070
|
-
* @returns {Promise<number>} Returns the mean text confidence on resolve
|
|
1071
|
-
*/
|
|
1072
|
-
meanTextConf(): Promise<number>;
|
|
1073
|
-
/**
|
|
1074
|
-
* Get recognized text as UTF-8.
|
|
1075
|
-
* @returns {Promise<string>} Returns the recognized test as utf-8 on resolve
|
|
1076
|
-
*/
|
|
1077
|
-
getUTF8Text(): Promise<string>;
|
|
1078
|
-
/**
|
|
1079
|
-
* Get hOCR output.
|
|
1080
|
-
* @param {Function} progressCallback Optional progress callback.
|
|
1081
|
-
* @param {number} pageNumber Optional page number (0-based).
|
|
1082
|
-
* @returns {Promise<string>} Returns the `hOCR` upon resolve
|
|
1083
|
-
*/
|
|
1084
|
-
getHOCRText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
|
|
1085
|
-
/**
|
|
1086
|
-
* Get TSV output.
|
|
1087
|
-
* @returns {Promise<string>} Returns the `tsv` upon resolve
|
|
1088
|
-
*/
|
|
1089
|
-
getTSVText(): Promise<string>;
|
|
1090
|
-
/**
|
|
1091
|
-
* Get UNLV output.
|
|
1092
|
-
* @returns {Promise<string>} Returns the `unlv` upon resolve
|
|
1093
|
-
*/
|
|
1094
|
-
getUNLVText(): Promise<string>;
|
|
1095
|
-
/**
|
|
1096
|
-
* Get ALTO XML output.
|
|
1097
|
-
* @param {Function} progressCallback Optional progress callback.
|
|
1098
|
-
* @param {number} pageNumber Optional page number (0-based).
|
|
1099
|
-
* @returns {Promise<string>} Returns the `alto` upon resolve
|
|
1100
|
-
*/
|
|
1101
|
-
getALTOText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
|
|
1102
|
-
/**
|
|
1103
|
-
* Get languages used at initialization.
|
|
1104
|
-
* @returns {Promise<Language>} Returns the languages used when init was called
|
|
1105
|
-
*/
|
|
1106
|
-
getInitLanguages(): Promise<Language>;
|
|
1107
|
-
/**
|
|
1108
|
-
* Get languages currently loaded.
|
|
1109
|
-
* @returns {Promise<Language[]>} Returns the languages that were actually loaded by `init`
|
|
1110
|
-
*/
|
|
1111
|
-
getLoadedLanguages(): Promise<Language[]>;
|
|
1112
|
-
/**
|
|
1113
|
-
* Get available languages from tessdata.
|
|
1114
|
-
* NOTE: this only will return anything after `init` was called before with a valid selection of languages
|
|
1115
|
-
* @returns {Promise<Language[]>} Returns the languages that are available to tesseract.
|
|
1116
|
-
*/
|
|
1117
|
-
getAvailableLanguages(): Promise<Language[]>;
|
|
1118
|
-
/**
|
|
1119
|
-
* Clear internal recognition results/state.
|
|
1120
|
-
* @returns {Promise<void>}
|
|
1121
|
-
*/
|
|
1122
|
-
clear(): Promise<void>;
|
|
1123
|
-
/**
|
|
1124
|
-
* Release native resources and destroy the instance.
|
|
1125
|
-
* @returns {Promise<void>}
|
|
1126
|
-
*/
|
|
1127
|
-
end(): Promise<void>;
|
|
1128
|
-
}
|
|
1129
|
-
export type NativeTesseract = TesseractInstance;
|
|
1130
|
-
export type TesseractConstructor = new () => TesseractInstance;
|
|
1131
208
|
declare const NativeTesseract: TesseractConstructor;
|
|
1132
209
|
declare class Tesseract extends NativeTesseract {
|
|
210
|
+
document: TesseractDocumentApi;
|
|
1133
211
|
constructor();
|
|
1134
212
|
init(options?: TesseractInitOptions): Promise<void>;
|
|
1135
213
|
ensureTrainingData({ lang, dataPath, cachePath, downloadBaseUrl }: EnsureTrainedDataOptions, progressCallback?: (info: TrainingDataDownloadProgress) => void): Promise<string>;
|