@luii/node-tesseract-ocr 2.1.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,6 @@
1
+ import type { EnsureTrainedDataOptions, TesseractDocumentApi, TesseractConstructor, TesseractInitOptions, TrainingDataDownloadProgress } from "./types";
2
+ export type { ConfigurationVariables, DebugConfigurationVariableNames, DebugOnlyConfigurationVariables, DetectOrientationScriptResult, EnsureTrainedDataOptions, InitOnlyConfigurationVariables, ProgressChangedInfo, SetBoolConfigurationVariableNames, SetConfigurationVariableNames, SetNumberConfigurationVariableNames, SetStringConfigurationVariableNames, SetVariableConfigVariables, TesseractBeginProcessPagesOptions, TesseractConstructor, TesseractDocumentApi, TesseractInitOptions, TesseractInstance, TesseractProcessPagesStatus, TesseractSetRectangleOptions, TrainingDataDownloadProgress, } from "./types";
3
+ export type NativeTesseract = import("./types").TesseractInstance;
1
4
  /**
2
5
  * All available languages for tesseract
3
6
  * @readonly
@@ -202,934 +205,9 @@ export declare const LogLevels: {
202
205
  readonly OFF: "2147483647";
203
206
  };
204
207
  export type LogLevel = (typeof LogLevels)[keyof typeof LogLevels];
205
- export type ConfigurationVariables = {
206
- log_level: `${LogLevel}`;
207
- textord_dotmatrix_gap: `${number}`;
208
- textord_debug_block: `${0 | 1}`;
209
- textord_pitch_range: `${number}`;
210
- textord_words_veto_power: `${number}`;
211
- textord_tabfind_show_strokewidths: `${0 | 1}`;
212
- pitsync_linear_version: `${number}`;
213
- oldbl_holed_losscount: `${number}`;
214
- textord_skewsmooth_offset: `${number}`;
215
- textord_skewsmooth_offset2: `${0 | 1}`;
216
- textord_test_x: `${number}`;
217
- textord_test_y: `${number}`;
218
- textord_min_blobs_in_row: `${number}`;
219
- textord_spline_minblobs: `${number}`;
220
- textord_spline_medianwin: `${number}`;
221
- textord_max_blob_overlaps: `${number}`;
222
- textord_min_xheight: `${number}`;
223
- textord_lms_line_trials: `${number}`;
224
- textord_tabfind_show_images: `${0 | 1}`;
225
- textord_fp_chop_error: `${number}`;
226
- edges_max_children_per_outline: `${number}`;
227
- edges_max_children_layers: `${number}`;
228
- edges_children_per_grandchild: `${number}`;
229
- edges_children_count_limit: `${number}`;
230
- edges_min_nonhole: `${number}`;
231
- edges_patharea_ratio: `${number}`;
232
- devanagari_split_debuglevel: `${0 | 1}`;
233
- textord_tabfind_show_partitions: `${0 | 1}`;
234
- textord_debug_tabfind: `${0 | 1}`;
235
- textord_debug_bugs: `${0 | 1}`;
236
- textord_testregion_left: `${number}`;
237
- textord_testregion_top: `${number}`;
238
- textord_testregion_right: `${number}`;
239
- textord_testregion_bottom: `${number}`;
240
- classify_num_cp_levels: `${number}`;
241
- editor_image_xpos: `${number}`;
242
- editor_image_ypos: `${number}`;
243
- editor_image_menuheight: `${number}`;
244
- editor_image_blob_bb_color: `${number}`;
245
- editor_word_ypos: `${number}`;
246
- editor_word_width: `${number}`;
247
- curl_timeout: `${0 | 1}`;
248
- wordrec_display_all_blobs: `${0 | 1}`;
249
- wordrec_blob_pause: `${0 | 1}`;
250
- textord_force_make_prop_words: `${0 | 1}`;
251
- textord_chopper_test: `${0 | 1}`;
252
- textord_restore_underlines: `${0 | 1}`;
253
- textord_show_initial_words: `${0 | 1}`;
254
- textord_blocksall_fixed: `${0 | 1}`;
255
- textord_blocksall_prop: `${0 | 1}`;
256
- textord_pitch_scalebigwords: `${0 | 1}`;
257
- textord_debug_pitch_test: `${0 | 1}`;
258
- textord_disable_pitch_test: `${0 | 1}`;
259
- textord_fast_pitch_test: `${0 | 1}`;
260
- textord_debug_pitch_metric: `${0 | 1}`;
261
- textord_show_row_cuts: `${0 | 1}`;
262
- textord_show_page_cuts: `${0 | 1}`;
263
- textord_blockndoc_fixed: `${0 | 1}`;
264
- textord_show_tables: `${0 | 1}`;
265
- textord_tablefind_show_mark: `${0 | 1}`;
266
- textord_tablefind_show_stats: `${0 | 1}`;
267
- textord_tablefind_recognize_tables: `${0 | 1}`;
268
- textord_tabfind_show_initialtabs: `${0 | 1}`;
269
- textord_tabfind_show_finaltabs: `${0 | 1}`;
270
- textord_tabfind_only_strokewidths: `${0 | 1}`;
271
- textord_really_old_xheight: `${0 | 1}`;
272
- textord_oldbl_debug: `${0 | 1}`;
273
- textord_debug_baselines: `${0 | 1}`;
274
- textord_oldbl_paradef: `${0 | 1}`;
275
- textord_oldbl_split_splines: `${0 | 1}`;
276
- textord_oldbl_merge_parts: `${0 | 1}`;
277
- oldbl_corrfix: `${0 | 1}`;
278
- oldbl_xhfix: `${0 | 1}`;
279
- textord_ocropus_mode: `${0 | 1}`;
280
- textord_heavy_nr: `${0 | 1}`;
281
- textord_show_initial_rows: `${0 | 1}`;
282
- textord_show_parallel_rows: `${0 | 1}`;
283
- textord_show_expanded_rows: `${0 | 1}`;
284
- textord_show_final_rows: `${0 | 1}`;
285
- textord_show_final_blobs: `${0 | 1}`;
286
- textord_test_landscape: `${0 | 1}`;
287
- textord_parallel_baselines: `${0 | 1}`;
288
- textord_straight_baselines: `${0 | 1}`;
289
- textord_old_baselines: `${0 | 1}`;
290
- textord_old_xheight: `${0 | 1}`;
291
- textord_fix_xheight_bug: `${0 | 1}`;
292
- textord_fix_makerow_bug: `${0 | 1}`;
293
- textord_debug_xheights: `${0 | 1}`;
294
- textord_biased_skewcalc: `${0 | 1}`;
295
- textord_interpolating_skew: `${0 | 1}`;
296
- textord_new_initial_xheight: `${0 | 1}`;
297
- textord_debug_blob: `${0 | 1}`;
298
- gapmap_debug: `${0 | 1}`;
299
- gapmap_use_ends: `${0 | 1}`;
300
- gapmap_no_isolated_quanta: `${0 | 1}`;
301
- edges_use_new_outline_complexity: `${0 | 1}`;
302
- edges_debug: `${0 | 1}`;
303
- edges_children_fix: `${0 | 1}`;
304
- textord_show_fixed_cuts: `${0 | 1}`;
305
- devanagari_split_debugimage: `${0 | 1}`;
306
- textord_tabfind_show_initial_partitions: `${0 | 1}`;
307
- textord_tabfind_show_reject_blobs: `${0 | 1}`;
308
- textord_tabfind_show_columns: `${0 | 1}`;
309
- textord_tabfind_show_blocks: `${0 | 1}`;
310
- textord_tabfind_find_tables: `${0 | 1}`;
311
- textord_space_size_is_variable: `${0 | 1}`;
312
- textord_debug_printable: `${0 | 1}`;
313
- wordrec_display_splits: `${0 | 1}`;
314
- poly_debug: `${0 | 1}`;
315
- poly_wide_objects_better: `${0 | 1}`;
316
- equationdetect_save_bi_image: `${0 | 1}`;
317
- equationdetect_save_spt_image: `${0 | 1}`;
318
- equationdetect_save_seed_image: `${0 | 1}`;
319
- equationdetect_save_merged_image: `${0 | 1}`;
320
- debug_file: `${string}`;
321
- editor_word_name: `${string}`;
322
- dotproduct: `${string}`;
323
- document_title: `${string}`;
324
- curl_cookiefile: `${string}`;
325
- classify_font_name: `${string}`;
326
- textord_underline_offset: `${number}`;
327
- textord_wordstats_smooth_factor: `${number}`;
328
- textord_words_maxspace: `${number}`;
329
- textord_words_default_maxspace: `${number}`;
330
- textord_words_default_minspace: `${number}`;
331
- textord_words_min_minspace: `${number}`;
332
- textord_words_default_nonspace: `${number}`;
333
- textord_words_initial_lower: `${number}`;
334
- textord_words_initial_upper: `${number}`;
335
- textord_words_minlarge: `${number}`;
336
- textord_words_pitchsd_threshold: `${number}`;
337
- textord_words_def_fixed: `${number}`;
338
- textord_words_def_prop: `${number}`;
339
- textord_pitch_rowsimilarity: `${number}`;
340
- words_initial_lower: `${number}`;
341
- words_initial_upper: `${number}`;
342
- words_default_prop_nonspace: `${number}`;
343
- words_default_fixed_space: `${number}`;
344
- words_default_fixed_limit: `${number}`;
345
- textord_words_definite_spread: `${number}`;
346
- textord_spacesize_ratioprop: `${number}`;
347
- textord_fpiqr_ratio: `${number}`;
348
- textord_max_pitch_iqr: `${number}`;
349
- textord_projection_scale: `${number}`;
350
- textord_balance_factor: `${0 | 1}`;
351
- textord_tabvector_vertical_gap_fraction: `${number}`;
352
- textord_tabvector_vertical_box_ratio: `${number}`;
353
- pitsync_joined_edge: `${number}`;
354
- pitsync_offset_freecut_fraction: `${number}`;
355
- oldbl_xhfract: `${number}`;
356
- oldbl_dot_error_size: `${number}`;
357
- textord_oldbl_jumplimit: `${number}`;
358
- textord_spline_shift_fraction: `${number}`;
359
- textord_skew_ile: `${number}`;
360
- textord_skew_lag: `${number}`;
361
- textord_linespace_iqrlimit: `${number}`;
362
- textord_width_limit: `${number}`;
363
- textord_chop_width: `${number}`;
364
- textord_expansion_factor: `${0 | 1}`;
365
- textord_overlap_x: `${number}`;
366
- textord_minxh: `${number}`;
367
- textord_min_linesize: `${number}`;
368
- textord_excess_blobsize: `${number}`;
369
- textord_occupancy_threshold: `${number}`;
370
- textord_underline_width: `${number}`;
371
- textord_min_blob_height_fraction: `${number}`;
372
- textord_xheight_mode_fraction: `${number}`;
373
- textord_ascheight_mode_fraction: `${number}`;
374
- textord_descheight_mode_fraction: `${number}`;
375
- textord_ascx_ratio_min: `${number}`;
376
- textord_ascx_ratio_max: `${number}`;
377
- textord_descx_ratio_min: `${number}`;
378
- textord_descx_ratio_max: `${number}`;
379
- textord_xheight_error_margin: `${number}`;
380
- gapmap_big_gaps: `${number}`;
381
- edges_childarea: `${number}`;
382
- edges_boxarea: `${number}`;
383
- textord_underline_threshold: `${number}`;
384
- classify_pico_feature_length: `${number}`;
385
- classify_norm_adj_midpoint: `${number}`;
386
- classify_norm_adj_curl: `${number}`;
387
- classify_min_slope: `${number}`;
388
- classify_max_slope: `${number}`;
389
- classify_cp_angle_pad_loose: `${number}`;
390
- classify_cp_angle_pad_medium: `${number}`;
391
- classify_cp_angle_pad_tight: `${number}`;
392
- classify_cp_end_pad_loose: `${number}`;
393
- classify_cp_end_pad_medium: `${number}`;
394
- classify_cp_end_pad_tight: `${number}`;
395
- classify_cp_side_pad_loose: `${number}`;
396
- classify_cp_side_pad_medium: `${number}`;
397
- classify_cp_side_pad_tight: `${number}`;
398
- classify_pp_angle_pad: `${number}`;
399
- classify_pp_end_pad: `${number}`;
400
- classify_pp_side_pad: `${number}`;
401
- ambigs_debug_level: `${0 | 1}`;
402
- classify_debug_level: `${0 | 1}`;
403
- classify_norm_method: `${0 | 1}`;
404
- matcher_debug_level: `${0 | 1}`;
405
- matcher_debug_flags: `${0 | 1}`;
406
- classify_learning_debug_level: `${0 | 1}`;
407
- matcher_permanent_classes_min: `${0 | 1}`;
408
- matcher_min_examples_for_prototyping: `${number}`;
409
- matcher_sufficient_examples_for_prototyping: `${number}`;
410
- classify_adapt_proto_threshold: `${number}`;
411
- classify_adapt_feature_threshold: `${number}`;
412
- classify_class_pruner_threshold: `${number}`;
413
- classify_class_pruner_multiplier: `${number}`;
414
- classify_cp_cutoff_strength: `${number}`;
415
- classify_integer_matcher_multiplier: `${number}`;
416
- dawg_debug_level: `${0 | 1}`;
417
- hyphen_debug_level: `${0 | 1}`;
418
- stopper_smallword_size: `${number}`;
419
- stopper_debug_level: `${0 | 1}`;
420
- tessedit_truncate_wordchoice_log: `${number}`;
421
- max_permuter_attempts: `${number}`;
422
- repair_unchopped_blobs: `${0 | 1}`;
423
- chop_debug: `${0 | 1}`;
424
- chop_split_length: `${number}`;
425
- chop_same_distance: `${number}`;
426
- chop_min_outline_points: `${number}`;
427
- chop_seam_pile_size: `${number}`;
428
- chop_inside_angle: `${number}`;
429
- chop_min_outline_area: `${number}`;
430
- chop_centered_maxwidth: `${number}`;
431
- chop_x_y_weight: `${number}`;
432
- wordrec_debug_level: `${0 | 1}`;
433
- wordrec_max_join_chunks: `${number}`;
434
- segsearch_debug_level: `${0 | 1}`;
435
- segsearch_max_pain_points: `${number}`;
436
- segsearch_max_futile_classifications: `${number}`;
437
- language_model_debug_level: `${0 | 1}`;
438
- language_model_ngram_order: `${number}`;
439
- language_model_viterbi_list_max_num_prunable: `${number}`;
440
- language_model_viterbi_list_max_size: `${number}`;
441
- language_model_min_compound_length: `${number}`;
442
- wordrec_display_segmentations: `${0 | 1}`;
443
- tessedit_pageseg_mode: `${number}`;
444
- thresholding_method: `${0 | 1}`;
445
- tessedit_ocr_engine_mode: `${number}`;
446
- pageseg_devanagari_split_strategy: `${0 | 1}`;
447
- ocr_devanagari_split_strategy: `${0 | 1}`;
448
- bidi_debug: `${0 | 1}`;
449
- applybox_debug: `${0 | 1}`;
450
- applybox_page: `${0 | 1}`;
451
- tessedit_font_id: `${0 | 1}`;
452
- tessedit_bigram_debug: `${0 | 1}`;
453
- debug_noise_removal: `${0 | 1}`;
454
- noise_maxperblob: `${number}`;
455
- noise_maxperword: `${number}`;
456
- debug_x_ht_level: `${0 | 1}`;
457
- quality_min_initial_alphas_reqd: `${number}`;
458
- tessedit_tess_adaption_mode: `${number}`;
459
- multilang_debug_level: `${0 | 1}`;
460
- paragraph_debug_level: `${0 | 1}`;
461
- tessedit_preserve_min_wd_len: `${number}`;
462
- crunch_rating_max: `${number}`;
463
- crunch_pot_indicators: `${0 | 1}`;
464
- crunch_leave_lc_strings: `${number}`;
465
- crunch_leave_uc_strings: `${number}`;
466
- crunch_long_repetitions: `${number}`;
467
- crunch_debug: `${0 | 1}`;
468
- fixsp_non_noise_limit: `${0 | 1}`;
469
- fixsp_done_mode: `${0 | 1}`;
470
- debug_fix_space_level: `${0 | 1}`;
471
- x_ht_acceptance_tolerance: `${number}`;
472
- x_ht_min_change: `${number}`;
473
- superscript_debug: `${0 | 1}`;
474
- page_xml_level: `${0 | 1}`;
475
- jpg_quality: `${number}`;
476
- user_defined_dpi: `${0 | 1}`;
477
- min_characters_to_try: `${number}`;
478
- suspect_level: `${number}`;
479
- suspect_short_words: `${number}`;
480
- tessedit_reject_mode: `${0 | 1}`;
481
- tessedit_image_border: `${number}`;
482
- min_sane_x_ht_pixels: `${number}`;
483
- tessedit_page_number: `${number}`;
484
- tessedit_parallelize: `${0 | 1}`;
485
- lstm_choice_mode: `${0 | 1}`;
486
- lstm_choice_iterations: `${number}`;
487
- tosp_debug_level: `${0 | 1}`;
488
- tosp_enough_space_samples_for_median: `${number}`;
489
- tosp_redo_kern_limit: `${number}`;
490
- tosp_few_samples: `${number}`;
491
- tosp_short_row: `${number}`;
492
- tosp_sanity_method: `${0 | 1}`;
493
- textord_max_noise_size: `${number}`;
494
- textord_baseline_debug: `${0 | 1}`;
495
- textord_noise_sizefraction: `${number}`;
496
- textord_noise_translimit: `${number}`;
497
- textord_noise_sncount: `${0 | 1}`;
498
- use_ambigs_for_adaption: `${0 | 1}`;
499
- allow_blob_division: `${0 | 1}`;
500
- prioritize_division: `${0 | 1}`;
501
- classify_enable_learning: `${0 | 1}`;
502
- tess_cn_matching: `${0 | 1}`;
503
- tess_bn_matching: `${0 | 1}`;
504
- classify_enable_adaptive_matcher: `${0 | 1}`;
505
- classify_use_pre_adapted_templates: `${0 | 1}`;
506
- classify_save_adapted_templates: `${0 | 1}`;
507
- classify_enable_adaptive_debugger: `${0 | 1}`;
508
- classify_nonlinear_norm: `${0 | 1}`;
509
- disable_character_fragments: `${0 | 1}`;
510
- classify_debug_character_fragments: `${0 | 1}`;
511
- matcher_debug_separate_windows: `${0 | 1}`;
512
- classify_bln_numeric_mode: `${0 | 1}`;
513
- load_system_dawg: `${0 | 1}`;
514
- load_freq_dawg: `${0 | 1}`;
515
- load_unambig_dawg: `${0 | 1}`;
516
- load_punc_dawg: `${0 | 1}`;
517
- load_number_dawg: `${0 | 1}`;
518
- load_bigram_dawg: `${0 | 1}`;
519
- use_only_first_uft8_step: `${0 | 1}`;
520
- stopper_no_acceptable_choices: `${0 | 1}`;
521
- segment_nonalphabetic_script: `${0 | 1}`;
522
- save_doc_words: `${0 | 1}`;
523
- merge_fragments_in_matrix: `${0 | 1}`;
524
- wordrec_enable_assoc: `${0 | 1}`;
525
- force_word_assoc: `${0 | 1}`;
526
- chop_enable: `${0 | 1}`;
527
- chop_vertical_creep: `${0 | 1}`;
528
- chop_new_seam_pile: `${0 | 1}`;
529
- assume_fixed_pitch_char_segment: `${0 | 1}`;
530
- wordrec_skip_no_truth_words: `${0 | 1}`;
531
- wordrec_debug_blamer: `${0 | 1}`;
532
- wordrec_run_blamer: `${0 | 1}`;
533
- save_alt_choices: `${0 | 1}`;
534
- language_model_ngram_on: `${0 | 1}`;
535
- language_model_ngram_use_only_first_uft8_step: `${0 | 1}`;
536
- language_model_ngram_space_delimited_language: `${0 | 1}`;
537
- language_model_use_sigmoidal_certainty: `${0 | 1}`;
538
- tessedit_resegment_from_boxes: `${0 | 1}`;
539
- tessedit_resegment_from_line_boxes: `${0 | 1}`;
540
- tessedit_train_from_boxes: `${0 | 1}`;
541
- tessedit_make_boxes_from_boxes: `${0 | 1}`;
542
- tessedit_train_line_recognizer: `${0 | 1}`;
543
- tessedit_dump_pageseg_images: `${0 | 1}`;
544
- tessedit_do_invert: `${0 | 1}`;
545
- thresholding_debug: `${0 | 1}`;
546
- tessedit_ambigs_training: `${0 | 1}`;
547
- tessedit_adaption_debug: `${0 | 1}`;
548
- applybox_learn_chars_and_char_frags_mode: `${0 | 1}`;
549
- applybox_learn_ngrams_mode: `${0 | 1}`;
550
- tessedit_display_outwords: `${0 | 1}`;
551
- tessedit_dump_choices: `${0 | 1}`;
552
- tessedit_timing_debug: `${0 | 1}`;
553
- tessedit_fix_fuzzy_spaces: `${0 | 1}`;
554
- tessedit_unrej_any_wd: `${0 | 1}`;
555
- tessedit_fix_hyphens: `${0 | 1}`;
556
- tessedit_enable_doc_dict: `${0 | 1}`;
557
- tessedit_debug_fonts: `${0 | 1}`;
558
- tessedit_debug_block_rejection: `${0 | 1}`;
559
- tessedit_enable_bigram_correction: `${0 | 1}`;
560
- tessedit_enable_dict_correction: `${0 | 1}`;
561
- enable_noise_removal: `${0 | 1}`;
562
- tessedit_minimal_rej_pass1: `${0 | 1}`;
563
- tessedit_test_adaption: `${0 | 1}`;
564
- test_pt: `${0 | 1}`;
565
- paragraph_text_based: `${0 | 1}`;
566
- lstm_use_matrix: `${0 | 1}`;
567
- tessedit_good_quality_unrej: `${0 | 1}`;
568
- tessedit_use_reject_spaces: `${0 | 1}`;
569
- tessedit_preserve_blk_rej_perfect_wds: `${0 | 1}`;
570
- tessedit_preserve_row_rej_perfect_wds: `${0 | 1}`;
571
- tessedit_dont_blkrej_good_wds: `${0 | 1}`;
572
- tessedit_dont_rowrej_good_wds: `${0 | 1}`;
573
- tessedit_row_rej_good_docs: `${0 | 1}`;
574
- tessedit_reject_bad_qual_wds: `${0 | 1}`;
575
- tessedit_debug_doc_rejection: `${0 | 1}`;
576
- tessedit_debug_quality_metrics: `${0 | 1}`;
577
- bland_unrej: `${0 | 1}`;
578
- unlv_tilde_crunching: `${0 | 1}`;
579
- hocr_font_info: `${0 | 1}`;
580
- hocr_char_boxes: `${0 | 1}`;
581
- crunch_early_merge_tess_fails: `${0 | 1}`;
582
- crunch_early_convert_bad_unlv_chs: `${0 | 1}`;
583
- crunch_terrible_garbage: `${0 | 1}`;
584
- crunch_leave_ok_strings: `${0 | 1}`;
585
- crunch_accept_ok: `${0 | 1}`;
586
- crunch_leave_accept_strings: `${0 | 1}`;
587
- crunch_include_numerals: `${0 | 1}`;
588
- tessedit_prefer_joined_punct: `${0 | 1}`;
589
- tessedit_write_block_separators: `${0 | 1}`;
590
- tessedit_write_rep_codes: `${0 | 1}`;
591
- tessedit_write_unlv: `${0 | 1}`;
592
- tessedit_create_txt: `${0 | 1}`;
593
- tessedit_create_hocr: `${0 | 1}`;
594
- tessedit_create_alto: `${0 | 1}`;
595
- tessedit_create_page_xml: `${0 | 1}`;
596
- page_xml_polygon: `${0 | 1}`;
597
- tessedit_create_lstmbox: `${0 | 1}`;
598
- tessedit_create_tsv: `${0 | 1}`;
599
- tessedit_create_wordstrbox: `${0 | 1}`;
600
- tessedit_create_pdf: `${0 | 1}`;
601
- textonly_pdf: `${0 | 1}`;
602
- suspect_constrain_1Il: `${0 | 1}`;
603
- tessedit_minimal_rejection: `${0 | 1}`;
604
- tessedit_zero_rejection: `${0 | 1}`;
605
- tessedit_word_for_word: `${0 | 1}`;
606
- tessedit_zero_kelvin_rejection: `${0 | 1}`;
607
- tessedit_rejection_debug: `${0 | 1}`;
608
- tessedit_flip_0O: `${0 | 1}`;
609
- rej_trust_doc_dawg: `${0 | 1}`;
610
- rej_1Il_use_dict_word: `${0 | 1}`;
611
- rej_1Il_trust_permuter_type: `${0 | 1}`;
612
- rej_use_tess_accepted: `${0 | 1}`;
613
- rej_use_tess_blanks: `${0 | 1}`;
614
- rej_use_good_perm: `${0 | 1}`;
615
- rej_use_sensible_wd: `${0 | 1}`;
616
- rej_alphas_in_number_perm: `${0 | 1}`;
617
- tessedit_create_boxfile: `${0 | 1}`;
618
- tessedit_write_images: `${0 | 1}`;
619
- interactive_display_mode: `${0 | 1}`;
620
- tessedit_override_permuter: `${0 | 1}`;
621
- tessedit_use_primary_params_model: `${0 | 1}`;
622
- textord_tabfind_show_vlines: `${0 | 1}`;
623
- textord_use_cjk_fp_model: `${0 | 1}`;
624
- poly_allow_detailed_fx: `${0 | 1}`;
625
- tessedit_init_config_only: `${0 | 1}`;
626
- textord_equation_detect: `${0 | 1}`;
627
- textord_tabfind_vertical_text: `${0 | 1}`;
628
- textord_tabfind_force_vertical_text: `${0 | 1}`;
629
- preserve_interword_spaces: `${0 | 1}`;
630
- pageseg_apply_music_mask: `${0 | 1}`;
631
- textord_single_height_mode: `${0 | 1}`;
632
- tosp_old_to_method: `${0 | 1}`;
633
- tosp_old_to_constrain_sp_kn: `${0 | 1}`;
634
- tosp_only_use_prop_rows: `${0 | 1}`;
635
- tosp_force_wordbreak_on_punct: `${0 | 1}`;
636
- tosp_use_pre_chopping: `${0 | 1}`;
637
- tosp_old_to_bug_fix: `${0 | 1}`;
638
- tosp_block_use_cert_spaces: `${0 | 1}`;
639
- tosp_row_use_cert_spaces: `${0 | 1}`;
640
- tosp_narrow_blobs_not_cert: `${0 | 1}`;
641
- tosp_row_use_cert_spaces1: `${0 | 1}`;
642
- tosp_recovery_isolated_row_stats: `${0 | 1}`;
643
- tosp_only_small_gaps_for_kern: `${0 | 1}`;
644
- tosp_all_flips_fuzzy: `${0 | 1}`;
645
- tosp_fuzzy_limit_all: `${0 | 1}`;
646
- tosp_stats_use_xht_gaps: `${0 | 1}`;
647
- tosp_use_xht_gaps: `${0 | 1}`;
648
- tosp_only_use_xht_gaps: `${0 | 1}`;
649
- tosp_rule_9_test_punct: `${0 | 1}`;
650
- tosp_flip_fuzz_kn_to_sp: `${0 | 1}`;
651
- tosp_flip_fuzz_sp_to_kn: `${0 | 1}`;
652
- tosp_improve_thresh: `${0 | 1}`;
653
- textord_no_rejects: `${0 | 1}`;
654
- textord_show_blobs: `${0 | 1}`;
655
- textord_show_boxes: `${0 | 1}`;
656
- textord_noise_rejwords: `${0 | 1}`;
657
- textord_noise_rejrows: `${0 | 1}`;
658
- textord_noise_debug: `${0 | 1}`;
659
- classify_learn_debug_str: `${string}`;
660
- user_words_file: `${string}`;
661
- user_words_suffix: `${string}`;
662
- user_patterns_file: `${string}`;
663
- user_patterns_suffix: `${string}`;
664
- output_ambig_words_file: `${string}`;
665
- word_to_debug: `${string}`;
666
- tessedit_char_blacklist: `${string}`;
667
- tessedit_char_whitelist: `${string}`;
668
- tessedit_char_unblacklist: `${string}`;
669
- tessedit_write_params_to_file: `${string}`;
670
- applybox_exposure_pattern: `${string}`;
671
- chs_leading_punct: `${string}`;
672
- chs_trailing_punct1: `${string}`;
673
- chs_trailing_punct2: `${string}`;
674
- outlines_odd: `${string}`;
675
- outlines_2: `${string}`;
676
- numeric_punctuation: `${string}`;
677
- unrecognised_char: `${string}`;
678
- ok_repeated_ch_non_alphanum_wds: `${string}`;
679
- conflict_set_I_l_1: `${string}`;
680
- file_type: `${string}`;
681
- tessedit_load_sublangs: `${string}`;
682
- page_separator: `${string}`;
683
- classify_char_norm_range: `${number}`;
684
- classify_max_rating_ratio: `${number}`;
685
- classify_max_certainty_margin: `${number}`;
686
- matcher_good_threshold: `${number}`;
687
- matcher_reliable_adaptive_result: `${0 | 1}`;
688
- matcher_perfect_threshold: `${number}`;
689
- matcher_bad_match_pad: `${number}`;
690
- matcher_rating_margin: `${number}`;
691
- matcher_avg_noise_size: `${number}`;
692
- matcher_clustering_max_angle_delta: `${number}`;
693
- classify_misfit_junk_penalty: `${0 | 1}`;
694
- rating_scale: `${number}`;
695
- tessedit_class_miss_scale: `${number}`;
696
- classify_adapted_pruning_factor: `${number}`;
697
- classify_adapted_pruning_threshold: `${number}`;
698
- classify_character_fragments_garbage_certainty_threshold: `${number}`;
699
- speckle_large_max_size: `${number}`;
700
- speckle_rating_penalty: `${number}`;
701
- xheight_penalty_subscripts: `${number}`;
702
- xheight_penalty_inconsistent: `${number}`;
703
- segment_penalty_dict_frequent_word: `${0 | 1}`;
704
- segment_penalty_dict_case_ok: `${number}`;
705
- segment_penalty_dict_case_bad: `${number}`;
706
- segment_penalty_dict_nonword: `${number}`;
707
- segment_penalty_garbage: `${number}`;
708
- certainty_scale: `${number}`;
709
- stopper_nondict_certainty_base: `${number}`;
710
- stopper_phase2_certainty_rejection_offset: `${0 | 1}`;
711
- stopper_certainty_per_char: `${number}`;
712
- stopper_allowable_character_badness: `${number}`;
713
- doc_dict_pending_threshold: `${0 | 1}`;
714
- doc_dict_certainty_threshold: `${number}`;
715
- tessedit_certainty_threshold: `${number}`;
716
- chop_split_dist_knob: `${number}`;
717
- chop_overlap_knob: `${number}`;
718
- chop_center_knob: `${number}`;
719
- chop_sharpness_knob: `${number}`;
720
- chop_width_change_knob: `${number}`;
721
- chop_ok_split: `${number}`;
722
- chop_good_split: `${number}`;
723
- segsearch_max_char_wh_ratio: `${number}`;
724
- language_model_ngram_small_prob: `${number}`;
725
- language_model_ngram_nonmatch_score: `${number}`;
726
- language_model_ngram_scale_factor: `${number}`;
727
- language_model_ngram_rating_factor: `${number}`;
728
- language_model_penalty_non_freq_dict_word: `${number}`;
729
- language_model_penalty_non_dict_word: `${number}`;
730
- language_model_penalty_punc: `${number}`;
731
- language_model_penalty_case: `${number}`;
732
- language_model_penalty_script: `${number}`;
733
- language_model_penalty_chartype: `${number}`;
734
- language_model_penalty_font: `${0 | 1}`;
735
- language_model_penalty_spacing: `${number}`;
736
- language_model_penalty_increment: `${number}`;
737
- invert_threshold: `${number}`;
738
- thresholding_window_size: `${number}`;
739
- thresholding_kfactor: `${number}`;
740
- thresholding_tile_size: `${number}`;
741
- thresholding_smooth_kernel_size: `${0 | 1}`;
742
- thresholding_score_fraction: `${number}`;
743
- noise_cert_basechar: `${number}`;
744
- noise_cert_disjoint: `${number}`;
745
- noise_cert_punc: `${number}`;
746
- noise_cert_factor: `${number}`;
747
- quality_rej_pc: `${number}`;
748
- quality_blob_pc: `${0 | 1}`;
749
- quality_outline_pc: `${0 | 1}`;
750
- quality_char_pc: `${number}`;
751
- test_pt_x: `${number}`;
752
- test_pt_y: `${number}`;
753
- tessedit_reject_doc_percent: `${number}`;
754
- tessedit_reject_block_percent: `${number}`;
755
- tessedit_reject_row_percent: `${number}`;
756
- tessedit_whole_wd_rej_row_percent: `${number}`;
757
- tessedit_good_doc_still_rowrej_wd: `${number}`;
758
- quality_rowrej_pc: `${number}`;
759
- crunch_terrible_rating: `${number}`;
760
- crunch_poor_garbage_cert: `${number}`;
761
- crunch_poor_garbage_rate: `${number}`;
762
- crunch_pot_poor_rate: `${number}`;
763
- crunch_pot_poor_cert: `${number}`;
764
- crunch_del_rating: `${number}`;
765
- crunch_del_cert: `${number}`;
766
- crunch_del_min_ht: `${number}`;
767
- crunch_del_max_ht: `${number}`;
768
- crunch_del_min_width: `${number}`;
769
- crunch_del_high_word: `${number}`;
770
- crunch_del_low_word: `${number}`;
771
- crunch_small_outlines_size: `${number}`;
772
- fixsp_small_outlines_size: `${number}`;
773
- superscript_worse_certainty: `${number}`;
774
- superscript_bettered_certainty: `${number}`;
775
- superscript_scaledown_ratio: `${number}`;
776
- subscript_max_y_top: `${number}`;
777
- superscript_min_y_bottom: `${number}`;
778
- suspect_rating_per_ch: `${number}`;
779
- suspect_accept_rating: `${number}`;
780
- tessedit_lower_flip_hyphen: `${number}`;
781
- tessedit_upper_flip_hyphen: `${number}`;
782
- rej_whole_of_mostly_reject_word_fract: `${number}`;
783
- min_orientation_margin: `${number}`;
784
- textord_tabfind_vertical_text_ratio: `${number}`;
785
- textord_tabfind_aligned_gap_fraction: `${number}`;
786
- lstm_rating_coefficient: `${number}`;
787
- tosp_old_sp_kn_th_factor: `${number}`;
788
- tosp_threshold_bias1: `${0 | 1}`;
789
- tosp_threshold_bias2: `${0 | 1}`;
790
- tosp_narrow_fraction: `${number}`;
791
- tosp_narrow_aspect_ratio: `${number}`;
792
- tosp_wide_fraction: `${number}`;
793
- tosp_wide_aspect_ratio: `${0 | 1}`;
794
- tosp_fuzzy_space_factor: `${number}`;
795
- tosp_fuzzy_space_factor1: `${number}`;
796
- tosp_fuzzy_space_factor2: `${number}`;
797
- tosp_gap_factor: `${number}`;
798
- tosp_kern_gap_factor1: `${number}`;
799
- tosp_kern_gap_factor2: `${number}`;
800
- tosp_kern_gap_factor3: `${number}`;
801
- tosp_ignore_big_gaps: `${number}`;
802
- tosp_ignore_very_big_gaps: `${number}`;
803
- tosp_rep_space: `${number}`;
804
- tosp_enough_small_gaps: `${number}`;
805
- tosp_table_kn_sp_ratio: `${number}`;
806
- tosp_table_xht_sp_ratio: `${number}`;
807
- tosp_table_fuzzy_kn_sp_ratio: `${number}`;
808
- tosp_fuzzy_kn_fraction: `${number}`;
809
- tosp_fuzzy_sp_fraction: `${number}`;
810
- tosp_min_sane_kn_sp: `${number}`;
811
- tosp_init_guess_kn_mult: `${number}`;
812
- tosp_init_guess_xht_mult: `${number}`;
813
- tosp_max_sane_kn_thresh: `${number}`;
814
- tosp_flip_caution: `${0 | 1}`;
815
- tosp_large_kerning: `${number}`;
816
- tosp_dont_fool_with_small_kerns: `${number}`;
817
- tosp_near_lh_edge: `${0 | 1}`;
818
- tosp_silly_kn_sp_gap: `${number}`;
819
- tosp_pass_wide_fuzz_sp_to_context: `${number}`;
820
- textord_noise_area_ratio: `${number}`;
821
- textord_initialx_ile: `${number}`;
822
- textord_initialasc_ile: `${number}`;
823
- textord_noise_sizelimit: `${number}`;
824
- textord_noise_normratio: `${number}`;
825
- textord_noise_syfract: `${number}`;
826
- textord_noise_sxfract: `${number}`;
827
- textord_noise_hfract: `${number}`;
828
- textord_noise_rowratio: `${number}`;
829
- textord_blshift_maxshift: `${0 | 1}`;
830
- textord_blshift_xfraction: `${number}`;
831
- };
832
- type InitOnlyConfigurationVariableNames = "ambigs_debug_level" | "language_model_ngram_on" | "language_model_use_sigmoidal_certainty" | "load_bigram_dawg" | "load_freq_dawg" | "load_number_dawg" | "load_punc_dawg" | "load_system_dawg" | "load_unambig_dawg" | "tessedit_init_config_only" | "tessedit_ocr_engine_mode" | "user_patterns_suffix" | "user_words_suffix";
833
- export type InitOnlyConfigurationVariables = Pick<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
834
- export type SetVariableConfigVariables = Omit<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
835
- /**
836
- * Tesseract init options
837
- */
838
- export interface TesseractInitOptions {
839
- /**
840
- * Its generally safer to use as few languages as possible.
841
- * The more languages Tesseract needs to load the longer it takes to recognize a image.
842
- * The OSD Language will always be loaded to support orientation and script detection
843
- * IMPORTANT: if you specify more than one language here (e.g.: `deu, eng` for example)
844
- * tesseract will try to recognize german and english in the same image.
845
- * Originally tesseract itself accepts it as `deu+eng`, but since this
846
- * makes typing very hard to near impossible its safer to just accept a
847
- * array with the languages it should look for.
848
- * When talking about "hard typing/impossible typing" its because typescript
849
- * itself cannot create recursive types, and chaining template types
850
- * (e.g.: `${Language}+${Language}+...`) stretches out the compilation time
851
- * to a unacceptable amount
852
- *
853
- * @default [Language.osd]
854
- */
855
- langs?: Language[];
856
- /**
857
- * Specify where the trainingdata is located
858
- * Besides the datapath in general it is versioned to the
859
- * version of tesseract
860
- * @default '~/.cache/node-tesseract-ocr/'
861
- */
862
- cachePath?: string;
863
- /**
864
- * Explicit datapath for traineddata. Takes precedence over
865
- * the `TESSDATA_PREFIX` environment variable.
866
- */
867
- dataPath?: string;
868
- /**
869
- * This will be called for every language that was specified in `lang`,
870
- * it allows the user to be flexible about the training data's location
871
- * Or if he needs to specify his own location for certain languages/custom languages
872
- * IMPORTANT: Ensures that trainingdata will be downloaded from the following cdn
873
- * in case they dont exist
874
- * OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0_best_int
875
- * NON OEM_LSTM_ONLY => https://cdn.jsdelivr.net/npm/@tesseract.js-data/${lang}/4.0.0
876
- * NOTE: Tesseract 5.x.x still uses the 4.x.x trainingdata
877
- *
878
- * @default true
879
- */
880
- ensureTraineddata?: boolean;
881
- /**
882
- * Optional progress callback for traineddata downloads.
883
- */
884
- progressCallback?: (info: TrainingDataDownloadProgress) => void;
885
- /**
886
- * OCR Engine Modes
887
- * The engine mode cannot be changed after creating the instance
888
- * If another mode is needed, its advised to create a new instance.
889
- * @default OEM_DEFAULT
890
- * @throws {Error} Will throw an error when oem mode is below 0 or over 3
891
- */
892
- oem?: OcrEngineMode;
893
- /**
894
- * Controls if only non debug parameters will be set upon initialization
895
- * @default false
896
- */
897
- setOnlyNonDebugParams?: boolean;
898
- /**
899
- * Array of paths that point to their corresponding config files
900
- * usually located in the `dataPath` location alongside the training data
901
- */
902
- configs?: Array<string>;
903
- /**
904
- * Record of parameters that should be set upon initialization
905
- * Consult the original documentation of tesseract on which variables
906
- * can actually be set
907
- */
908
- vars?: Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>;
909
- }
910
- export interface TrainingDataDownloadProgress {
911
- lang: Language;
912
- url: string;
913
- downloadedBytes: number;
914
- totalBytes?: number;
915
- percent?: number;
916
- }
917
- export interface TesseractSetRectangleOptions {
918
- top: number;
919
- left: number;
920
- width: number;
921
- height: number;
922
- }
923
- export interface ProgressChangedInfo {
924
- /**
925
- * Chars in this buffer
926
- */
927
- progress: number;
928
- /**
929
- * Percent complete increasing (0-100)
930
- */
931
- percent: number;
932
- /**
933
- * States if the worker is still alive
934
- */
935
- ocrAlive: number;
936
- /**
937
- * top coordinate of the bbox of the current element that tesseract is processing
938
- */
939
- top: number;
940
- /**
941
- * right coordinate of the bbox of the current element that tesseract is processing
942
- */
943
- right: number;
944
- /**
945
- * bottom coordinate of the bbox of the current element that tesseract is processing
946
- */
947
- bottom: number;
948
- /**
949
- * left coordinate of the bbox of the current element that tesseract is processing
950
- */
951
- left: number;
952
- }
953
- export interface DetectOrientationScriptResult {
954
- /**
955
- * Orientation of the source image in degrees
956
- * Orientation refers to the way the source is rotated, **not** how the text is
957
- * aligned. It ranges from 0° to 360° degrees.
958
- * @type {number}
959
- */
960
- orientationDegrees: number;
961
- /**
962
- * The confidence of tesseract for the orientation
963
- * @type {number}
964
- */
965
- orientationConfidence: number;
966
- /**
967
- * The name of the script that is used in the source image
968
- * @type {string}
969
- */
970
- scriptName: string;
971
- /**
972
- * The confidence of tesseract about the detected script of the source image
973
- * @type {number}
974
- */
975
- scriptConfidence: number;
976
- }
977
- export type EnsureTrainedDataOptions = {
978
- lang: Language;
979
- cachePath: string;
980
- dataPath: string;
981
- downloadBaseUrl: string;
982
- progressCallback?: (info: TrainingDataDownloadProgress) => void;
983
- };
984
- export interface TesseractInstance {
985
- /**
986
- * Initialize the engine with the given options.
987
- * @param {TesseractInitOptions} options Initialization options (languages, datapath, engine mode, etc.).
988
- * @returns {Promise<void>}
989
- */
990
- init(options: TesseractInitOptions): Promise<void>;
991
- /**
992
- * Initialize the engine for page analysis only.
993
- * @returns {Promise<void>}
994
- */
995
- initForAnalysePage(): Promise<void>;
996
- /**
997
- * Run page layout analysis.
998
- * @param {boolean} mergeSimilarWords Whether to merge similar words during analysis.
999
- * @returns {Promise<void>}
1000
- */
1001
- analysePage(mergeSimilarWords: boolean): Promise<void>;
1002
- /**
1003
- * Set the page segmentation mode (PSM).
1004
- * @param {PageSegmentationMode} psm Page segmentation mode.
1005
- * @returns {Promise<void>}
1006
- */
1007
- setPageMode(psm: PageSegmentationMode): Promise<void>;
1008
- /**
1009
- * Set a configuration variable.
1010
- * @param {keyof SetVariableConfigVariables} name Variable name.
1011
- * @param {SetVariableConfigVariables[keyof SetVariableConfigVariables]} value Variable value.
1012
- * @returns Returns `false` if the lookup failed.
1013
- */
1014
- setVariable(name: keyof SetVariableConfigVariables, value: SetVariableConfigVariables[keyof SetVariableConfigVariables]): Promise<boolean>;
1015
- /**
1016
- * Get a configuration variable as integer.
1017
- * @param {keyof SetVariableConfigVariables} name Variable name.
1018
- * @returns {Promise<number>} Returns the value of the variable.
1019
- */
1020
- getIntVariable(name: keyof SetVariableConfigVariables): Promise<number>;
1021
- /**
1022
- * Get a configuration variable as boolean (0/1).
1023
- * @param {keyof SetVariableConfigVariables} name Variable name.
1024
- * @returns {Promise<number>} Returns the value of the variable.
1025
- */
1026
- getBoolVariable(name: keyof SetVariableConfigVariables): Promise<number>;
1027
- /**
1028
- * Get a configuration variable as double.
1029
- * @param {keyof SetVariableConfigVariables} name Variable name.
1030
- * @returns {Promise<number>} Returns the value of the variable.
1031
- */
1032
- getDoubleVariable(name: keyof SetVariableConfigVariables): Promise<number>;
1033
- /**
1034
- * Get a configuration variable as string.
1035
- * @param {keyof SetVariableConfigVariables} name Variable name.
1036
- * @returns {Promise<string>} Returns the value of the variable.
1037
- */
1038
- getStringVariable(name: keyof SetVariableConfigVariables): Promise<string>;
1039
- /**
1040
- * Set the image to be recognized.
1041
- * @param {Buffer<ArrayBuffer>} buffer Image data buffer.
1042
- * @returns {Promise<void>}
1043
- */
1044
- setImage(buffer: Buffer<ArrayBuffer>): Promise<void>;
1045
- /**
1046
- * Restrict recognition to a rectangle.
1047
- * @param {TesseractSetRectangleOptions} options Rectangle options.
1048
- * @returns {Promise<void>}
1049
- */
1050
- setRectangle(options: TesseractSetRectangleOptions): Promise<void>;
1051
- /**
1052
- * Set the source resolution in PPI.
1053
- * @param {number} ppi Source resolution in PPI.
1054
- * @returns {Promise<void>}
1055
- */
1056
- setSourceResolution(ppi: number): Promise<void>;
1057
- /**
1058
- * @throws {Error} Will throw an error if the parameter at index 0 is not a function
1059
- * @param {(info: ProgressChangedInfo) => void} progressCallback Callback will be called to inform the user about progress changes
1060
- * @returns {Promise<void>}
1061
- */
1062
- recognize(progressCallback: (info: ProgressChangedInfo) => void): Promise<void>;
1063
- /**
1064
- * Detect orientation and script (OSD).
1065
- * @returns {Promise<DetectOrientationScriptResult>}
1066
- */
1067
- detectOrientationScript(): Promise<DetectOrientationScriptResult>;
1068
- /**
1069
- * Get mean text confidence.
1070
- * @returns {Promise<number>} Returns the mean text confidence on resolve
1071
- */
1072
- meanTextConf(): Promise<number>;
1073
- /**
1074
- * Get recognized text as UTF-8.
1075
- * @returns {Promise<string>} Returns the recognized test as utf-8 on resolve
1076
- */
1077
- getUTF8Text(): Promise<string>;
1078
- /**
1079
- * Get hOCR output.
1080
- * @param {Function} progressCallback Optional progress callback.
1081
- * @param {number} pageNumber Optional page number (0-based).
1082
- * @returns {Promise<string>} Returns the `hOCR` upon resolve
1083
- */
1084
- getHOCRText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
1085
- /**
1086
- * Get TSV output.
1087
- * @returns {Promise<string>} Returns the `tsv` upon resolve
1088
- */
1089
- getTSVText(): Promise<string>;
1090
- /**
1091
- * Get UNLV output.
1092
- * @returns {Promise<string>} Returns the `unlv` upon resolve
1093
- */
1094
- getUNLVText(): Promise<string>;
1095
- /**
1096
- * Get ALTO XML output.
1097
- * @param {Function} progressCallback Optional progress callback.
1098
- * @param {number} pageNumber Optional page number (0-based).
1099
- * @returns {Promise<string>} Returns the `alto` upon resolve
1100
- */
1101
- getALTOText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
1102
- /**
1103
- * Get languages used at initialization.
1104
- * @returns {Promise<Language>} Returns the languages used when init was called
1105
- */
1106
- getInitLanguages(): Promise<Language>;
1107
- /**
1108
- * Get languages currently loaded.
1109
- * @returns {Promise<Language[]>} Returns the languages that were actually loaded by `init`
1110
- */
1111
- getLoadedLanguages(): Promise<Language[]>;
1112
- /**
1113
- * Get available languages from tessdata.
1114
- * NOTE: this only will return anything after `init` was called before with a valid selection of languages
1115
- * @returns {Promise<Language[]>} Returns the languages that are available to tesseract.
1116
- */
1117
- getAvailableLanguages(): Promise<Language[]>;
1118
- /**
1119
- * Clear internal recognition results/state.
1120
- * @returns {Promise<void>}
1121
- */
1122
- clear(): Promise<void>;
1123
- /**
1124
- * Release native resources and destroy the instance.
1125
- * @returns {Promise<void>}
1126
- */
1127
- end(): Promise<void>;
1128
- }
1129
- export type NativeTesseract = TesseractInstance;
1130
- export type TesseractConstructor = new () => TesseractInstance;
1131
208
  declare const NativeTesseract: TesseractConstructor;
1132
209
  declare class Tesseract extends NativeTesseract {
210
+ document: TesseractDocumentApi;
1133
211
  constructor();
1134
212
  init(options?: TesseractInitOptions): Promise<void>;
1135
213
  ensureTrainingData({ lang, dataPath, cachePath, downloadBaseUrl }: EnsureTrainedDataOptions, progressCallback?: (info: TrainingDataDownloadProgress) => void): Promise<string>;