@luii/node-tesseract-ocr 2.0.13 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,6 @@
1
+ import type { EnsureTrainedDataOptions, TesseractDocumentApi, TesseractConstructor, TesseractInitOptions, TrainingDataDownloadProgress } from "./types";
2
+ export type { ConfigurationVariables, DebugConfigurationVariableNames, DebugOnlyConfigurationVariables, DetectOrientationScriptResult, EnsureTrainedDataOptions, InitOnlyConfigurationVariables, ProgressChangedInfo, SetBoolConfigurationVariableNames, SetConfigurationVariableNames, SetNumberConfigurationVariableNames, SetStringConfigurationVariableNames, SetVariableConfigVariables, TesseractBeginProcessPagesOptions, TesseractConstructor, TesseractDocumentApi, TesseractInitOptions, TesseractInstance, TesseractProcessPagesStatus, TesseractSetRectangleOptions, TrainingDataDownloadProgress, } from "./types";
3
+ export type NativeTesseract = import("./types").TesseractInstance;
1
4
  /**
2
5
  * All available languages for tesseract
3
6
  * @readonly
@@ -202,868 +205,12 @@ export declare const LogLevels: {
202
205
  readonly OFF: "2147483647";
203
206
  };
204
207
  export type LogLevel = (typeof LogLevels)[keyof typeof LogLevels];
205
- export type ConfigurationVariables = {
206
- log_level: `${LogLevel}`;
207
- textord_dotmatrix_gap: `${number}`;
208
- textord_debug_block: `${0 | 1}`;
209
- textord_pitch_range: `${number}`;
210
- textord_words_veto_power: `${number}`;
211
- textord_tabfind_show_strokewidths: `${0 | 1}`;
212
- pitsync_linear_version: `${number}`;
213
- oldbl_holed_losscount: `${number}`;
214
- textord_skewsmooth_offset: `${number}`;
215
- textord_skewsmooth_offset2: `${0 | 1}`;
216
- textord_test_x: `${number}`;
217
- textord_test_y: `${number}`;
218
- textord_min_blobs_in_row: `${number}`;
219
- textord_spline_minblobs: `${number}`;
220
- textord_spline_medianwin: `${number}`;
221
- textord_max_blob_overlaps: `${number}`;
222
- textord_min_xheight: `${number}`;
223
- textord_lms_line_trials: `${number}`;
224
- textord_tabfind_show_images: `${0 | 1}`;
225
- textord_fp_chop_error: `${number}`;
226
- edges_max_children_per_outline: `${number}`;
227
- edges_max_children_layers: `${number}`;
228
- edges_children_per_grandchild: `${number}`;
229
- edges_children_count_limit: `${number}`;
230
- edges_min_nonhole: `${number}`;
231
- edges_patharea_ratio: `${number}`;
232
- devanagari_split_debuglevel: `${0 | 1}`;
233
- textord_tabfind_show_partitions: `${0 | 1}`;
234
- textord_debug_tabfind: `${0 | 1}`;
235
- textord_debug_bugs: `${0 | 1}`;
236
- textord_testregion_left: `${number}`;
237
- textord_testregion_top: `${number}`;
238
- textord_testregion_right: `${number}`;
239
- textord_testregion_bottom: `${number}`;
240
- classify_num_cp_levels: `${number}`;
241
- editor_image_xpos: `${number}`;
242
- editor_image_ypos: `${number}`;
243
- editor_image_menuheight: `${number}`;
244
- editor_image_blob_bb_color: `${number}`;
245
- editor_word_ypos: `${number}`;
246
- editor_word_width: `${number}`;
247
- curl_timeout: `${0 | 1}`;
248
- wordrec_display_all_blobs: `${0 | 1}`;
249
- wordrec_blob_pause: `${0 | 1}`;
250
- textord_force_make_prop_words: `${0 | 1}`;
251
- textord_chopper_test: `${0 | 1}`;
252
- textord_restore_underlines: `${0 | 1}`;
253
- textord_show_initial_words: `${0 | 1}`;
254
- textord_blocksall_fixed: `${0 | 1}`;
255
- textord_blocksall_prop: `${0 | 1}`;
256
- textord_pitch_scalebigwords: `${0 | 1}`;
257
- textord_debug_pitch_test: `${0 | 1}`;
258
- textord_disable_pitch_test: `${0 | 1}`;
259
- textord_fast_pitch_test: `${0 | 1}`;
260
- textord_debug_pitch_metric: `${0 | 1}`;
261
- textord_show_row_cuts: `${0 | 1}`;
262
- textord_show_page_cuts: `${0 | 1}`;
263
- textord_blockndoc_fixed: `${0 | 1}`;
264
- textord_show_tables: `${0 | 1}`;
265
- textord_tablefind_show_mark: `${0 | 1}`;
266
- textord_tablefind_show_stats: `${0 | 1}`;
267
- textord_tablefind_recognize_tables: `${0 | 1}`;
268
- textord_tabfind_show_initialtabs: `${0 | 1}`;
269
- textord_tabfind_show_finaltabs: `${0 | 1}`;
270
- textord_tabfind_only_strokewidths: `${0 | 1}`;
271
- textord_really_old_xheight: `${0 | 1}`;
272
- textord_oldbl_debug: `${0 | 1}`;
273
- textord_debug_baselines: `${0 | 1}`;
274
- textord_oldbl_paradef: `${0 | 1}`;
275
- textord_oldbl_split_splines: `${0 | 1}`;
276
- textord_oldbl_merge_parts: `${0 | 1}`;
277
- oldbl_corrfix: `${0 | 1}`;
278
- oldbl_xhfix: `${0 | 1}`;
279
- textord_ocropus_mode: `${0 | 1}`;
280
- textord_heavy_nr: `${0 | 1}`;
281
- textord_show_initial_rows: `${0 | 1}`;
282
- textord_show_parallel_rows: `${0 | 1}`;
283
- textord_show_expanded_rows: `${0 | 1}`;
284
- textord_show_final_rows: `${0 | 1}`;
285
- textord_show_final_blobs: `${0 | 1}`;
286
- textord_test_landscape: `${0 | 1}`;
287
- textord_parallel_baselines: `${0 | 1}`;
288
- textord_straight_baselines: `${0 | 1}`;
289
- textord_old_baselines: `${0 | 1}`;
290
- textord_old_xheight: `${0 | 1}`;
291
- textord_fix_xheight_bug: `${0 | 1}`;
292
- textord_fix_makerow_bug: `${0 | 1}`;
293
- textord_debug_xheights: `${0 | 1}`;
294
- textord_biased_skewcalc: `${0 | 1}`;
295
- textord_interpolating_skew: `${0 | 1}`;
296
- textord_new_initial_xheight: `${0 | 1}`;
297
- textord_debug_blob: `${0 | 1}`;
298
- gapmap_debug: `${0 | 1}`;
299
- gapmap_use_ends: `${0 | 1}`;
300
- gapmap_no_isolated_quanta: `${0 | 1}`;
301
- edges_use_new_outline_complexity: `${0 | 1}`;
302
- edges_debug: `${0 | 1}`;
303
- edges_children_fix: `${0 | 1}`;
304
- textord_show_fixed_cuts: `${0 | 1}`;
305
- devanagari_split_debugimage: `${0 | 1}`;
306
- textord_tabfind_show_initial_partitions: `${0 | 1}`;
307
- textord_tabfind_show_reject_blobs: `${0 | 1}`;
308
- textord_tabfind_show_columns: `${0 | 1}`;
309
- textord_tabfind_show_blocks: `${0 | 1}`;
310
- textord_tabfind_find_tables: `${0 | 1}`;
311
- textord_space_size_is_variable: `${0 | 1}`;
312
- textord_debug_printable: `${0 | 1}`;
313
- wordrec_display_splits: `${0 | 1}`;
314
- poly_debug: `${0 | 1}`;
315
- poly_wide_objects_better: `${0 | 1}`;
316
- equationdetect_save_bi_image: `${0 | 1}`;
317
- equationdetect_save_spt_image: `${0 | 1}`;
318
- equationdetect_save_seed_image: `${0 | 1}`;
319
- equationdetect_save_merged_image: `${0 | 1}`;
320
- debug_file: `${string}`;
321
- editor_word_name: `${string}`;
322
- dotproduct: `${string}`;
323
- document_title: `${string}`;
324
- curl_cookiefile: `${string}`;
325
- classify_font_name: `${string}`;
326
- textord_underline_offset: `${number}`;
327
- textord_wordstats_smooth_factor: `${number}`;
328
- textord_words_maxspace: `${number}`;
329
- textord_words_default_maxspace: `${number}`;
330
- textord_words_default_minspace: `${number}`;
331
- textord_words_min_minspace: `${number}`;
332
- textord_words_default_nonspace: `${number}`;
333
- textord_words_initial_lower: `${number}`;
334
- textord_words_initial_upper: `${number}`;
335
- textord_words_minlarge: `${number}`;
336
- textord_words_pitchsd_threshold: `${number}`;
337
- textord_words_def_fixed: `${number}`;
338
- textord_words_def_prop: `${number}`;
339
- textord_pitch_rowsimilarity: `${number}`;
340
- words_initial_lower: `${number}`;
341
- words_initial_upper: `${number}`;
342
- words_default_prop_nonspace: `${number}`;
343
- words_default_fixed_space: `${number}`;
344
- words_default_fixed_limit: `${number}`;
345
- textord_words_definite_spread: `${number}`;
346
- textord_spacesize_ratioprop: `${number}`;
347
- textord_fpiqr_ratio: `${number}`;
348
- textord_max_pitch_iqr: `${number}`;
349
- textord_projection_scale: `${number}`;
350
- textord_balance_factor: `${0 | 1}`;
351
- textord_tabvector_vertical_gap_fraction: `${number}`;
352
- textord_tabvector_vertical_box_ratio: `${number}`;
353
- pitsync_joined_edge: `${number}`;
354
- pitsync_offset_freecut_fraction: `${number}`;
355
- oldbl_xhfract: `${number}`;
356
- oldbl_dot_error_size: `${number}`;
357
- textord_oldbl_jumplimit: `${number}`;
358
- textord_spline_shift_fraction: `${number}`;
359
- textord_skew_ile: `${number}`;
360
- textord_skew_lag: `${number}`;
361
- textord_linespace_iqrlimit: `${number}`;
362
- textord_width_limit: `${number}`;
363
- textord_chop_width: `${number}`;
364
- textord_expansion_factor: `${0 | 1}`;
365
- textord_overlap_x: `${number}`;
366
- textord_minxh: `${number}`;
367
- textord_min_linesize: `${number}`;
368
- textord_excess_blobsize: `${number}`;
369
- textord_occupancy_threshold: `${number}`;
370
- textord_underline_width: `${number}`;
371
- textord_min_blob_height_fraction: `${number}`;
372
- textord_xheight_mode_fraction: `${number}`;
373
- textord_ascheight_mode_fraction: `${number}`;
374
- textord_descheight_mode_fraction: `${number}`;
375
- textord_ascx_ratio_min: `${number}`;
376
- textord_ascx_ratio_max: `${number}`;
377
- textord_descx_ratio_min: `${number}`;
378
- textord_descx_ratio_max: `${number}`;
379
- textord_xheight_error_margin: `${number}`;
380
- gapmap_big_gaps: `${number}`;
381
- edges_childarea: `${number}`;
382
- edges_boxarea: `${number}`;
383
- textord_underline_threshold: `${number}`;
384
- classify_pico_feature_length: `${number}`;
385
- classify_norm_adj_midpoint: `${number}`;
386
- classify_norm_adj_curl: `${number}`;
387
- classify_min_slope: `${number}`;
388
- classify_max_slope: `${number}`;
389
- classify_cp_angle_pad_loose: `${number}`;
390
- classify_cp_angle_pad_medium: `${number}`;
391
- classify_cp_angle_pad_tight: `${number}`;
392
- classify_cp_end_pad_loose: `${number}`;
393
- classify_cp_end_pad_medium: `${number}`;
394
- classify_cp_end_pad_tight: `${number}`;
395
- classify_cp_side_pad_loose: `${number}`;
396
- classify_cp_side_pad_medium: `${number}`;
397
- classify_cp_side_pad_tight: `${number}`;
398
- classify_pp_angle_pad: `${number}`;
399
- classify_pp_end_pad: `${number}`;
400
- classify_pp_side_pad: `${number}`;
401
- ambigs_debug_level: `${0 | 1}`;
402
- classify_debug_level: `${0 | 1}`;
403
- classify_norm_method: `${0 | 1}`;
404
- matcher_debug_level: `${0 | 1}`;
405
- matcher_debug_flags: `${0 | 1}`;
406
- classify_learning_debug_level: `${0 | 1}`;
407
- matcher_permanent_classes_min: `${0 | 1}`;
408
- matcher_min_examples_for_prototyping: `${number}`;
409
- matcher_sufficient_examples_for_prototyping: `${number}`;
410
- classify_adapt_proto_threshold: `${number}`;
411
- classify_adapt_feature_threshold: `${number}`;
412
- classify_class_pruner_threshold: `${number}`;
413
- classify_class_pruner_multiplier: `${number}`;
414
- classify_cp_cutoff_strength: `${number}`;
415
- classify_integer_matcher_multiplier: `${number}`;
416
- dawg_debug_level: `${0 | 1}`;
417
- hyphen_debug_level: `${0 | 1}`;
418
- stopper_smallword_size: `${number}`;
419
- stopper_debug_level: `${0 | 1}`;
420
- tessedit_truncate_wordchoice_log: `${number}`;
421
- max_permuter_attempts: `${number}`;
422
- repair_unchopped_blobs: `${0 | 1}`;
423
- chop_debug: `${0 | 1}`;
424
- chop_split_length: `${number}`;
425
- chop_same_distance: `${number}`;
426
- chop_min_outline_points: `${number}`;
427
- chop_seam_pile_size: `${number}`;
428
- chop_inside_angle: `${number}`;
429
- chop_min_outline_area: `${number}`;
430
- chop_centered_maxwidth: `${number}`;
431
- chop_x_y_weight: `${number}`;
432
- wordrec_debug_level: `${0 | 1}`;
433
- wordrec_max_join_chunks: `${number}`;
434
- segsearch_debug_level: `${0 | 1}`;
435
- segsearch_max_pain_points: `${number}`;
436
- segsearch_max_futile_classifications: `${number}`;
437
- language_model_debug_level: `${0 | 1}`;
438
- language_model_ngram_order: `${number}`;
439
- language_model_viterbi_list_max_num_prunable: `${number}`;
440
- language_model_viterbi_list_max_size: `${number}`;
441
- language_model_min_compound_length: `${number}`;
442
- wordrec_display_segmentations: `${0 | 1}`;
443
- tessedit_pageseg_mode: `${number}`;
444
- thresholding_method: `${0 | 1}`;
445
- tessedit_ocr_engine_mode: `${number}`;
446
- pageseg_devanagari_split_strategy: `${0 | 1}`;
447
- ocr_devanagari_split_strategy: `${0 | 1}`;
448
- bidi_debug: `${0 | 1}`;
449
- applybox_debug: `${0 | 1}`;
450
- applybox_page: `${0 | 1}`;
451
- tessedit_font_id: `${0 | 1}`;
452
- tessedit_bigram_debug: `${0 | 1}`;
453
- debug_noise_removal: `${0 | 1}`;
454
- noise_maxperblob: `${number}`;
455
- noise_maxperword: `${number}`;
456
- debug_x_ht_level: `${0 | 1}`;
457
- quality_min_initial_alphas_reqd: `${number}`;
458
- tessedit_tess_adaption_mode: `${number}`;
459
- multilang_debug_level: `${0 | 1}`;
460
- paragraph_debug_level: `${0 | 1}`;
461
- tessedit_preserve_min_wd_len: `${number}`;
462
- crunch_rating_max: `${number}`;
463
- crunch_pot_indicators: `${0 | 1}`;
464
- crunch_leave_lc_strings: `${number}`;
465
- crunch_leave_uc_strings: `${number}`;
466
- crunch_long_repetitions: `${number}`;
467
- crunch_debug: `${0 | 1}`;
468
- fixsp_non_noise_limit: `${0 | 1}`;
469
- fixsp_done_mode: `${0 | 1}`;
470
- debug_fix_space_level: `${0 | 1}`;
471
- x_ht_acceptance_tolerance: `${number}`;
472
- x_ht_min_change: `${number}`;
473
- superscript_debug: `${0 | 1}`;
474
- page_xml_level: `${0 | 1}`;
475
- jpg_quality: `${number}`;
476
- user_defined_dpi: `${0 | 1}`;
477
- min_characters_to_try: `${number}`;
478
- suspect_level: `${number}`;
479
- suspect_short_words: `${number}`;
480
- tessedit_reject_mode: `${0 | 1}`;
481
- tessedit_image_border: `${number}`;
482
- min_sane_x_ht_pixels: `${number}`;
483
- tessedit_page_number: `${number}`;
484
- tessedit_parallelize: `${0 | 1}`;
485
- lstm_choice_mode: `${0 | 1}`;
486
- lstm_choice_iterations: `${number}`;
487
- tosp_debug_level: `${0 | 1}`;
488
- tosp_enough_space_samples_for_median: `${number}`;
489
- tosp_redo_kern_limit: `${number}`;
490
- tosp_few_samples: `${number}`;
491
- tosp_short_row: `${number}`;
492
- tosp_sanity_method: `${0 | 1}`;
493
- textord_max_noise_size: `${number}`;
494
- textord_baseline_debug: `${0 | 1}`;
495
- textord_noise_sizefraction: `${number}`;
496
- textord_noise_translimit: `${number}`;
497
- textord_noise_sncount: `${0 | 1}`;
498
- use_ambigs_for_adaption: `${0 | 1}`;
499
- allow_blob_division: `${0 | 1}`;
500
- prioritize_division: `${0 | 1}`;
501
- classify_enable_learning: `${0 | 1}`;
502
- tess_cn_matching: `${0 | 1}`;
503
- tess_bn_matching: `${0 | 1}`;
504
- classify_enable_adaptive_matcher: `${0 | 1}`;
505
- classify_use_pre_adapted_templates: `${0 | 1}`;
506
- classify_save_adapted_templates: `${0 | 1}`;
507
- classify_enable_adaptive_debugger: `${0 | 1}`;
508
- classify_nonlinear_norm: `${0 | 1}`;
509
- disable_character_fragments: `${0 | 1}`;
510
- classify_debug_character_fragments: `${0 | 1}`;
511
- matcher_debug_separate_windows: `${0 | 1}`;
512
- classify_bln_numeric_mode: `${0 | 1}`;
513
- load_system_dawg: `${0 | 1}`;
514
- load_freq_dawg: `${0 | 1}`;
515
- load_unambig_dawg: `${0 | 1}`;
516
- load_punc_dawg: `${0 | 1}`;
517
- load_number_dawg: `${0 | 1}`;
518
- load_bigram_dawg: `${0 | 1}`;
519
- use_only_first_uft8_step: `${0 | 1}`;
520
- stopper_no_acceptable_choices: `${0 | 1}`;
521
- segment_nonalphabetic_script: `${0 | 1}`;
522
- save_doc_words: `${0 | 1}`;
523
- merge_fragments_in_matrix: `${0 | 1}`;
524
- wordrec_enable_assoc: `${0 | 1}`;
525
- force_word_assoc: `${0 | 1}`;
526
- chop_enable: `${0 | 1}`;
527
- chop_vertical_creep: `${0 | 1}`;
528
- chop_new_seam_pile: `${0 | 1}`;
529
- assume_fixed_pitch_char_segment: `${0 | 1}`;
530
- wordrec_skip_no_truth_words: `${0 | 1}`;
531
- wordrec_debug_blamer: `${0 | 1}`;
532
- wordrec_run_blamer: `${0 | 1}`;
533
- save_alt_choices: `${0 | 1}`;
534
- language_model_ngram_on: `${0 | 1}`;
535
- language_model_ngram_use_only_first_uft8_step: `${0 | 1}`;
536
- language_model_ngram_space_delimited_language: `${0 | 1}`;
537
- language_model_use_sigmoidal_certainty: `${0 | 1}`;
538
- tessedit_resegment_from_boxes: `${0 | 1}`;
539
- tessedit_resegment_from_line_boxes: `${0 | 1}`;
540
- tessedit_train_from_boxes: `${0 | 1}`;
541
- tessedit_make_boxes_from_boxes: `${0 | 1}`;
542
- tessedit_train_line_recognizer: `${0 | 1}`;
543
- tessedit_dump_pageseg_images: `${0 | 1}`;
544
- tessedit_do_invert: `${0 | 1}`;
545
- thresholding_debug: `${0 | 1}`;
546
- tessedit_ambigs_training: `${0 | 1}`;
547
- tessedit_adaption_debug: `${0 | 1}`;
548
- applybox_learn_chars_and_char_frags_mode: `${0 | 1}`;
549
- applybox_learn_ngrams_mode: `${0 | 1}`;
550
- tessedit_display_outwords: `${0 | 1}`;
551
- tessedit_dump_choices: `${0 | 1}`;
552
- tessedit_timing_debug: `${0 | 1}`;
553
- tessedit_fix_fuzzy_spaces: `${0 | 1}`;
554
- tessedit_unrej_any_wd: `${0 | 1}`;
555
- tessedit_fix_hyphens: `${0 | 1}`;
556
- tessedit_enable_doc_dict: `${0 | 1}`;
557
- tessedit_debug_fonts: `${0 | 1}`;
558
- tessedit_debug_block_rejection: `${0 | 1}`;
559
- tessedit_enable_bigram_correction: `${0 | 1}`;
560
- tessedit_enable_dict_correction: `${0 | 1}`;
561
- enable_noise_removal: `${0 | 1}`;
562
- tessedit_minimal_rej_pass1: `${0 | 1}`;
563
- tessedit_test_adaption: `${0 | 1}`;
564
- test_pt: `${0 | 1}`;
565
- paragraph_text_based: `${0 | 1}`;
566
- lstm_use_matrix: `${0 | 1}`;
567
- tessedit_good_quality_unrej: `${0 | 1}`;
568
- tessedit_use_reject_spaces: `${0 | 1}`;
569
- tessedit_preserve_blk_rej_perfect_wds: `${0 | 1}`;
570
- tessedit_preserve_row_rej_perfect_wds: `${0 | 1}`;
571
- tessedit_dont_blkrej_good_wds: `${0 | 1}`;
572
- tessedit_dont_rowrej_good_wds: `${0 | 1}`;
573
- tessedit_row_rej_good_docs: `${0 | 1}`;
574
- tessedit_reject_bad_qual_wds: `${0 | 1}`;
575
- tessedit_debug_doc_rejection: `${0 | 1}`;
576
- tessedit_debug_quality_metrics: `${0 | 1}`;
577
- bland_unrej: `${0 | 1}`;
578
- unlv_tilde_crunching: `${0 | 1}`;
579
- hocr_font_info: `${0 | 1}`;
580
- hocr_char_boxes: `${0 | 1}`;
581
- crunch_early_merge_tess_fails: `${0 | 1}`;
582
- crunch_early_convert_bad_unlv_chs: `${0 | 1}`;
583
- crunch_terrible_garbage: `${0 | 1}`;
584
- crunch_leave_ok_strings: `${0 | 1}`;
585
- crunch_accept_ok: `${0 | 1}`;
586
- crunch_leave_accept_strings: `${0 | 1}`;
587
- crunch_include_numerals: `${0 | 1}`;
588
- tessedit_prefer_joined_punct: `${0 | 1}`;
589
- tessedit_write_block_separators: `${0 | 1}`;
590
- tessedit_write_rep_codes: `${0 | 1}`;
591
- tessedit_write_unlv: `${0 | 1}`;
592
- tessedit_create_txt: `${0 | 1}`;
593
- tessedit_create_hocr: `${0 | 1}`;
594
- tessedit_create_alto: `${0 | 1}`;
595
- tessedit_create_page_xml: `${0 | 1}`;
596
- page_xml_polygon: `${0 | 1}`;
597
- tessedit_create_lstmbox: `${0 | 1}`;
598
- tessedit_create_tsv: `${0 | 1}`;
599
- tessedit_create_wordstrbox: `${0 | 1}`;
600
- tessedit_create_pdf: `${0 | 1}`;
601
- textonly_pdf: `${0 | 1}`;
602
- suspect_constrain_1Il: `${0 | 1}`;
603
- tessedit_minimal_rejection: `${0 | 1}`;
604
- tessedit_zero_rejection: `${0 | 1}`;
605
- tessedit_word_for_word: `${0 | 1}`;
606
- tessedit_zero_kelvin_rejection: `${0 | 1}`;
607
- tessedit_rejection_debug: `${0 | 1}`;
608
- tessedit_flip_0O: `${0 | 1}`;
609
- rej_trust_doc_dawg: `${0 | 1}`;
610
- rej_1Il_use_dict_word: `${0 | 1}`;
611
- rej_1Il_trust_permuter_type: `${0 | 1}`;
612
- rej_use_tess_accepted: `${0 | 1}`;
613
- rej_use_tess_blanks: `${0 | 1}`;
614
- rej_use_good_perm: `${0 | 1}`;
615
- rej_use_sensible_wd: `${0 | 1}`;
616
- rej_alphas_in_number_perm: `${0 | 1}`;
617
- tessedit_create_boxfile: `${0 | 1}`;
618
- tessedit_write_images: `${0 | 1}`;
619
- interactive_display_mode: `${0 | 1}`;
620
- tessedit_override_permuter: `${0 | 1}`;
621
- tessedit_use_primary_params_model: `${0 | 1}`;
622
- textord_tabfind_show_vlines: `${0 | 1}`;
623
- textord_use_cjk_fp_model: `${0 | 1}`;
624
- poly_allow_detailed_fx: `${0 | 1}`;
625
- tessedit_init_config_only: `${0 | 1}`;
626
- textord_equation_detect: `${0 | 1}`;
627
- textord_tabfind_vertical_text: `${0 | 1}`;
628
- textord_tabfind_force_vertical_text: `${0 | 1}`;
629
- preserve_interword_spaces: `${0 | 1}`;
630
- pageseg_apply_music_mask: `${0 | 1}`;
631
- textord_single_height_mode: `${0 | 1}`;
632
- tosp_old_to_method: `${0 | 1}`;
633
- tosp_old_to_constrain_sp_kn: `${0 | 1}`;
634
- tosp_only_use_prop_rows: `${0 | 1}`;
635
- tosp_force_wordbreak_on_punct: `${0 | 1}`;
636
- tosp_use_pre_chopping: `${0 | 1}`;
637
- tosp_old_to_bug_fix: `${0 | 1}`;
638
- tosp_block_use_cert_spaces: `${0 | 1}`;
639
- tosp_row_use_cert_spaces: `${0 | 1}`;
640
- tosp_narrow_blobs_not_cert: `${0 | 1}`;
641
- tosp_row_use_cert_spaces1: `${0 | 1}`;
642
- tosp_recovery_isolated_row_stats: `${0 | 1}`;
643
- tosp_only_small_gaps_for_kern: `${0 | 1}`;
644
- tosp_all_flips_fuzzy: `${0 | 1}`;
645
- tosp_fuzzy_limit_all: `${0 | 1}`;
646
- tosp_stats_use_xht_gaps: `${0 | 1}`;
647
- tosp_use_xht_gaps: `${0 | 1}`;
648
- tosp_only_use_xht_gaps: `${0 | 1}`;
649
- tosp_rule_9_test_punct: `${0 | 1}`;
650
- tosp_flip_fuzz_kn_to_sp: `${0 | 1}`;
651
- tosp_flip_fuzz_sp_to_kn: `${0 | 1}`;
652
- tosp_improve_thresh: `${0 | 1}`;
653
- textord_no_rejects: `${0 | 1}`;
654
- textord_show_blobs: `${0 | 1}`;
655
- textord_show_boxes: `${0 | 1}`;
656
- textord_noise_rejwords: `${0 | 1}`;
657
- textord_noise_rejrows: `${0 | 1}`;
658
- textord_noise_debug: `${0 | 1}`;
659
- classify_learn_debug_str: `${string}`;
660
- user_words_file: `${string}`;
661
- user_words_suffix: `${string}`;
662
- user_patterns_file: `${string}`;
663
- user_patterns_suffix: `${string}`;
664
- output_ambig_words_file: `${string}`;
665
- word_to_debug: `${string}`;
666
- tessedit_char_blacklist: `${string}`;
667
- tessedit_char_whitelist: `${string}`;
668
- tessedit_char_unblacklist: `${string}`;
669
- tessedit_write_params_to_file: `${string}`;
670
- applybox_exposure_pattern: `${string}`;
671
- chs_leading_punct: `${string}`;
672
- chs_trailing_punct1: `${string}`;
673
- chs_trailing_punct2: `${string}`;
674
- outlines_odd: `${string}`;
675
- outlines_2: `${string}`;
676
- numeric_punctuation: `${string}`;
677
- unrecognised_char: `${string}`;
678
- ok_repeated_ch_non_alphanum_wds: `${string}`;
679
- conflict_set_I_l_1: `${string}`;
680
- file_type: `${string}`;
681
- tessedit_load_sublangs: `${string}`;
682
- page_separator: `${string}`;
683
- classify_char_norm_range: `${number}`;
684
- classify_max_rating_ratio: `${number}`;
685
- classify_max_certainty_margin: `${number}`;
686
- matcher_good_threshold: `${number}`;
687
- matcher_reliable_adaptive_result: `${0 | 1}`;
688
- matcher_perfect_threshold: `${number}`;
689
- matcher_bad_match_pad: `${number}`;
690
- matcher_rating_margin: `${number}`;
691
- matcher_avg_noise_size: `${number}`;
692
- matcher_clustering_max_angle_delta: `${number}`;
693
- classify_misfit_junk_penalty: `${0 | 1}`;
694
- rating_scale: `${number}`;
695
- tessedit_class_miss_scale: `${number}`;
696
- classify_adapted_pruning_factor: `${number}`;
697
- classify_adapted_pruning_threshold: `${number}`;
698
- classify_character_fragments_garbage_certainty_threshold: `${number}`;
699
- speckle_large_max_size: `${number}`;
700
- speckle_rating_penalty: `${number}`;
701
- xheight_penalty_subscripts: `${number}`;
702
- xheight_penalty_inconsistent: `${number}`;
703
- segment_penalty_dict_frequent_word: `${0 | 1}`;
704
- segment_penalty_dict_case_ok: `${number}`;
705
- segment_penalty_dict_case_bad: `${number}`;
706
- segment_penalty_dict_nonword: `${number}`;
707
- segment_penalty_garbage: `${number}`;
708
- certainty_scale: `${number}`;
709
- stopper_nondict_certainty_base: `${number}`;
710
- stopper_phase2_certainty_rejection_offset: `${0 | 1}`;
711
- stopper_certainty_per_char: `${number}`;
712
- stopper_allowable_character_badness: `${number}`;
713
- doc_dict_pending_threshold: `${0 | 1}`;
714
- doc_dict_certainty_threshold: `${number}`;
715
- tessedit_certainty_threshold: `${number}`;
716
- chop_split_dist_knob: `${number}`;
717
- chop_overlap_knob: `${number}`;
718
- chop_center_knob: `${number}`;
719
- chop_sharpness_knob: `${number}`;
720
- chop_width_change_knob: `${number}`;
721
- chop_ok_split: `${number}`;
722
- chop_good_split: `${number}`;
723
- segsearch_max_char_wh_ratio: `${number}`;
724
- language_model_ngram_small_prob: `${number}`;
725
- language_model_ngram_nonmatch_score: `${number}`;
726
- language_model_ngram_scale_factor: `${number}`;
727
- language_model_ngram_rating_factor: `${number}`;
728
- language_model_penalty_non_freq_dict_word: `${number}`;
729
- language_model_penalty_non_dict_word: `${number}`;
730
- language_model_penalty_punc: `${number}`;
731
- language_model_penalty_case: `${number}`;
732
- language_model_penalty_script: `${number}`;
733
- language_model_penalty_chartype: `${number}`;
734
- language_model_penalty_font: `${0 | 1}`;
735
- language_model_penalty_spacing: `${number}`;
736
- language_model_penalty_increment: `${number}`;
737
- invert_threshold: `${number}`;
738
- thresholding_window_size: `${number}`;
739
- thresholding_kfactor: `${number}`;
740
- thresholding_tile_size: `${number}`;
741
- thresholding_smooth_kernel_size: `${0 | 1}`;
742
- thresholding_score_fraction: `${number}`;
743
- noise_cert_basechar: `${number}`;
744
- noise_cert_disjoint: `${number}`;
745
- noise_cert_punc: `${number}`;
746
- noise_cert_factor: `${number}`;
747
- quality_rej_pc: `${number}`;
748
- quality_blob_pc: `${0 | 1}`;
749
- quality_outline_pc: `${0 | 1}`;
750
- quality_char_pc: `${number}`;
751
- test_pt_x: `${number}`;
752
- test_pt_y: `${number}`;
753
- tessedit_reject_doc_percent: `${number}`;
754
- tessedit_reject_block_percent: `${number}`;
755
- tessedit_reject_row_percent: `${number}`;
756
- tessedit_whole_wd_rej_row_percent: `${number}`;
757
- tessedit_good_doc_still_rowrej_wd: `${number}`;
758
- quality_rowrej_pc: `${number}`;
759
- crunch_terrible_rating: `${number}`;
760
- crunch_poor_garbage_cert: `${number}`;
761
- crunch_poor_garbage_rate: `${number}`;
762
- crunch_pot_poor_rate: `${number}`;
763
- crunch_pot_poor_cert: `${number}`;
764
- crunch_del_rating: `${number}`;
765
- crunch_del_cert: `${number}`;
766
- crunch_del_min_ht: `${number}`;
767
- crunch_del_max_ht: `${number}`;
768
- crunch_del_min_width: `${number}`;
769
- crunch_del_high_word: `${number}`;
770
- crunch_del_low_word: `${number}`;
771
- crunch_small_outlines_size: `${number}`;
772
- fixsp_small_outlines_size: `${number}`;
773
- superscript_worse_certainty: `${number}`;
774
- superscript_bettered_certainty: `${number}`;
775
- superscript_scaledown_ratio: `${number}`;
776
- subscript_max_y_top: `${number}`;
777
- superscript_min_y_bottom: `${number}`;
778
- suspect_rating_per_ch: `${number}`;
779
- suspect_accept_rating: `${number}`;
780
- tessedit_lower_flip_hyphen: `${number}`;
781
- tessedit_upper_flip_hyphen: `${number}`;
782
- rej_whole_of_mostly_reject_word_fract: `${number}`;
783
- min_orientation_margin: `${number}`;
784
- textord_tabfind_vertical_text_ratio: `${number}`;
785
- textord_tabfind_aligned_gap_fraction: `${number}`;
786
- lstm_rating_coefficient: `${number}`;
787
- tosp_old_sp_kn_th_factor: `${number}`;
788
- tosp_threshold_bias1: `${0 | 1}`;
789
- tosp_threshold_bias2: `${0 | 1}`;
790
- tosp_narrow_fraction: `${number}`;
791
- tosp_narrow_aspect_ratio: `${number}`;
792
- tosp_wide_fraction: `${number}`;
793
- tosp_wide_aspect_ratio: `${0 | 1}`;
794
- tosp_fuzzy_space_factor: `${number}`;
795
- tosp_fuzzy_space_factor1: `${number}`;
796
- tosp_fuzzy_space_factor2: `${number}`;
797
- tosp_gap_factor: `${number}`;
798
- tosp_kern_gap_factor1: `${number}`;
799
- tosp_kern_gap_factor2: `${number}`;
800
- tosp_kern_gap_factor3: `${number}`;
801
- tosp_ignore_big_gaps: `${number}`;
802
- tosp_ignore_very_big_gaps: `${number}`;
803
- tosp_rep_space: `${number}`;
804
- tosp_enough_small_gaps: `${number}`;
805
- tosp_table_kn_sp_ratio: `${number}`;
806
- tosp_table_xht_sp_ratio: `${number}`;
807
- tosp_table_fuzzy_kn_sp_ratio: `${number}`;
808
- tosp_fuzzy_kn_fraction: `${number}`;
809
- tosp_fuzzy_sp_fraction: `${number}`;
810
- tosp_min_sane_kn_sp: `${number}`;
811
- tosp_init_guess_kn_mult: `${number}`;
812
- tosp_init_guess_xht_mult: `${number}`;
813
- tosp_max_sane_kn_thresh: `${number}`;
814
- tosp_flip_caution: `${0 | 1}`;
815
- tosp_large_kerning: `${number}`;
816
- tosp_dont_fool_with_small_kerns: `${number}`;
817
- tosp_near_lh_edge: `${0 | 1}`;
818
- tosp_silly_kn_sp_gap: `${number}`;
819
- tosp_pass_wide_fuzz_sp_to_context: `${number}`;
820
- textord_noise_area_ratio: `${number}`;
821
- textord_initialx_ile: `${number}`;
822
- textord_initialasc_ile: `${number}`;
823
- textord_noise_sizelimit: `${number}`;
824
- textord_noise_normratio: `${number}`;
825
- textord_noise_syfract: `${number}`;
826
- textord_noise_sxfract: `${number}`;
827
- textord_noise_hfract: `${number}`;
828
- textord_noise_rowratio: `${number}`;
829
- textord_blshift_maxshift: `${0 | 1}`;
830
- textord_blshift_xfraction: `${number}`;
831
- };
832
- type InitOnlyConfigurationVariableNames = "ambigs_debug_level" | "language_model_ngram_on" | "language_model_use_sigmoidal_certainty" | "load_bigram_dawg" | "load_freq_dawg" | "load_number_dawg" | "load_punc_dawg" | "load_system_dawg" | "load_unambig_dawg" | "tessedit_init_config_only" | "tessedit_ocr_engine_mode" | "user_patterns_suffix" | "user_words_suffix";
833
- export type InitOnlyConfigurationVariables = Pick<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
834
- export type SetVariableConfigVariables = Omit<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
835
- /**
836
- * Tesseract init options
837
- */
838
- export interface TesseractInitOptions {
839
- /**
840
- * Its generally safer to use as few languages as possible.
841
- * The more languages Tesseract needs to load the longer it takes to recognize a image.
842
- * @public
843
- */
844
- lang?: Language[];
845
- /**
846
- * OCR Engine Modes
847
- * The engine mode cannot be changed after creating the instance
848
- * If another mode is needed, its advised to create a new instance.
849
- * @throws {Error} Will throw an error when oem mode is below 0 or over 3
850
- */
851
- oem?: OcrEngineMode;
852
- setOnlyNonDebugParams?: boolean;
853
- configs?: Array<string>;
854
- vars?: Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>;
855
- }
856
- export interface TesseractSetRectangleOptions {
857
- top: number;
858
- left: number;
859
- width: number;
860
- height: number;
861
- }
862
- export interface ProgressChangedInfo {
863
- /**
864
- * Chars in this buffer
865
- */
866
- progress: number;
867
- /**
868
- * Percent complete increasing (0-100)
869
- */
870
- percent: number;
871
- /**
872
- * States if the worker is still alive
873
- */
874
- ocrAlive: number;
875
- /**
876
- * top coordinate of the bbox of the current element that tesseract is processing
877
- */
878
- top: number;
879
- /**
880
- * right coordinate of the bbox of the current element that tesseract is processing
881
- */
882
- right: number;
883
- /**
884
- * bottom coordinate of the bbox of the current element that tesseract is processing
885
- */
886
- bottom: number;
887
- /**
888
- * left coordinate of the bbox of the current element that tesseract is processing
889
- */
890
- left: number;
891
- }
892
- export interface DetectOrientationScriptResult {
893
- /**
894
- * Orientation of the source image in degrees
895
- * Orientation refers to the way the source is rotated, **not** how the text is
896
- * aligned. It ranges from 0° to 360° degrees.
897
- * @type {number}
898
- */
899
- orientationDegrees: number;
900
- /**
901
- * The confidence of tesseract for the orientation
902
- * @type {number}
903
- */
904
- orientationConfidence: number;
905
- /**
906
- * The name of the script that is used in the source image
907
- * @type {string}
908
- */
909
- scriptName: string;
910
- /**
911
- * The confidence of tesseract about the detected script of the source image
912
- * @type {number}
913
- */
914
- scriptConfidence: number;
915
- }
916
- export interface TesseractInstance {
917
- /**
918
- * Initialize the engine with the given options.
919
- * @param {TesseractInitOptions} options Initialization options (languages, datapath, engine mode, etc.).
920
- * @returns {Promise<void>}
921
- */
922
- init(options: TesseractInitOptions): Promise<void>;
923
- /**
924
- * Initialize the engine for page analysis only.
925
- * @returns {Promise<void>}
926
- */
927
- initForAnalysePage(): Promise<void>;
928
- /**
929
- * Run page layout analysis.
930
- * @param {boolean} mergeSimilarWords Whether to merge similar words during analysis.
931
- * @returns {Promise<void>}
932
- */
933
- analysePage(mergeSimilarWords: boolean): Promise<void>;
934
- /**
935
- * Set the page segmentation mode (PSM).
936
- * @param {PageSegmentationMode} psm Page segmentation mode.
937
- * @returns {Promise<void>}
938
- */
939
- setPageMode(psm: PageSegmentationMode): Promise<void>;
940
- /**
941
- * Set a configuration variable.
942
- * @param {keyof SetVariableConfigVariables} name Variable name.
943
- * @param {SetVariableConfigVariables[keyof SetVariableConfigVariables]} value Variable value.
944
- * @returns Returns `false` if the lookup failed.
945
- */
946
- setVariable(name: keyof SetVariableConfigVariables, value: SetVariableConfigVariables[keyof SetVariableConfigVariables]): Promise<boolean>;
947
- /**
948
- * Get a configuration variable as integer.
949
- * @param {keyof SetVariableConfigVariables} name Variable name.
950
- * @returns {Promise<number>} Returns the value of the variable.
951
- */
952
- getIntVariable(name: keyof SetVariableConfigVariables): Promise<number>;
953
- /**
954
- * Get a configuration variable as boolean (0/1).
955
- * @param {keyof SetVariableConfigVariables} name Variable name.
956
- * @returns {Promise<number>} Returns the value of the variable.
957
- */
958
- getBoolVariable(name: keyof SetVariableConfigVariables): Promise<number>;
959
- /**
960
- * Get a configuration variable as double.
961
- * @param {keyof SetVariableConfigVariables} name Variable name.
962
- * @returns {Promise<number>} Returns the value of the variable.
963
- */
964
- getDoubleVariable(name: keyof SetVariableConfigVariables): Promise<number>;
965
- /**
966
- * Get a configuration variable as string.
967
- * @param {keyof SetVariableConfigVariables} name Variable name.
968
- * @returns {Promise<string>} Returns the value of the variable.
969
- */
970
- getStringVariable(name: keyof SetVariableConfigVariables): Promise<string>;
971
- /**
972
- * Set the image to be recognized.
973
- * @param {Buffer<ArrayBuffer>} buffer Image data buffer.
974
- * @returns {Promise<void>}
975
- */
976
- setImage(buffer: Buffer<ArrayBuffer>): Promise<void>;
977
- /**
978
- * Restrict recognition to a rectangle.
979
- * @param {TesseractSetRectangleOptions} options Rectangle options.
980
- * @returns {Promise<void>}
981
- */
982
- setRectangle(options: TesseractSetRectangleOptions): Promise<void>;
983
- /**
984
- * Set the source resolution in PPI.
985
- * @param {number} ppi Source resolution in PPI.
986
- * @returns {Promise<void>}
987
- */
988
- setSourceResolution(ppi: number): Promise<void>;
989
- /**
990
- * @throws {Error} Will throw an error if the parameter at index 0 is not a function
991
- * @param {(info: ProgressChangedInfo) => void} progressCallback Callback will be called to inform the user about progress changes
992
- * @returns {Promise<void>}
993
- */
994
- recognize(progressCallback: (info: ProgressChangedInfo) => void): Promise<void>;
995
- /**
996
- * Detect orientation and script (OSD).
997
- * @returns {Promise<DetectOrientationScriptResult>}
998
- */
999
- detectOrientationScript(): Promise<DetectOrientationScriptResult>;
1000
- /**
1001
- * Get mean text confidence.
1002
- * @returns {Promise<number>} Returns the mean text confidence on resolve
1003
- */
1004
- meanTextConf(): Promise<number>;
1005
- /**
1006
- * Get recognized text as UTF-8.
1007
- * @returns {Promise<string>} Returns the recognized test as utf-8 on resolve
1008
- */
1009
- getUTF8Text(): Promise<string>;
1010
- /**
1011
- * Get hOCR output.
1012
- * @param {Function} progressCallback Optional progress callback.
1013
- * @param {number} pageNumber Optional page number (0-based).
1014
- * @returns {Promise<string>} Returns the `hOCR` upon resolve
1015
- */
1016
- getHOCRText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
1017
- /**
1018
- * Get TSV output.
1019
- * @returns {Promise<string>} Returns the `tsv` upon resolve
1020
- */
1021
- getTSVText(): Promise<string>;
1022
- /**
1023
- * Get UNLV output.
1024
- * @returns {Promise<string>} Returns the `unlv` upon resolve
1025
- */
1026
- getUNLVText(): Promise<string>;
1027
- /**
1028
- * Get ALTO XML output.
1029
- * @param {Function} progressCallback Optional progress callback.
1030
- * @param {number} pageNumber Optional page number (0-based).
1031
- * @returns {Promise<string>} Returns the `alto` upon resolve
1032
- */
1033
- getALTOText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
1034
- /**
1035
- * Get languages used at initialization.
1036
- * @returns {Promise<Language>} Returns the languages used when init was called
1037
- */
1038
- getInitLanguages(): Promise<Language>;
1039
- /**
1040
- * Get languages currently loaded.
1041
- * @returns {Promise<Language[]>} Returns the languages that were actually loaded by `init`
1042
- */
1043
- getLoadedLanguages(): Promise<Language[]>;
1044
- /**
1045
- * Get available languages from tessdata.
1046
- * NOTE: this only will return anything after `init` was called before with a valid selection of languages
1047
- * @returns {Promise<Language[]>} Returns the languages that are available to tesseract.
1048
- */
1049
- getAvailableLanguages(): Promise<Language[]>;
1050
- /**
1051
- * Clear internal recognition results/state.
1052
- * @returns {Promise<void>}
1053
- */
1054
- clear(): Promise<void>;
1055
- /**
1056
- * Release native resources and destroy the instance.
1057
- * @returns {Promise<void>}
1058
- */
1059
- end(): Promise<void>;
1060
- }
1061
- export type NativeTesseract = TesseractInstance;
1062
- export type TesseractConstructor = new () => TesseractInstance;
1063
208
  declare const NativeTesseract: TesseractConstructor;
1064
209
  declare class Tesseract extends NativeTesseract {
210
+ document: TesseractDocumentApi;
1065
211
  constructor();
1066
- init(options: TesseractInitOptions): Promise<void>;
212
+ init(options?: TesseractInitOptions): Promise<void>;
213
+ ensureTrainingData({ lang, dataPath, cachePath, downloadBaseUrl }: EnsureTrainedDataOptions, progressCallback?: (info: TrainingDataDownloadProgress) => void): Promise<string>;
1067
214
  }
1068
215
  export { Tesseract, NativeTesseract };
1069
216
  export default Tesseract;