@luii/node-tesseract-ocr 2.0.13 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -3
- package/README.md +547 -153
- package/binding-options.js +4 -0
- package/dist/cjs/index.cjs +144 -18
- package/dist/cjs/index.d.ts +6 -859
- package/dist/cjs/types.d.ts +1272 -0
- package/dist/cjs/types.js +17 -0
- package/dist/cjs/utils.d.ts +1 -0
- package/dist/cjs/utils.js +38 -0
- package/dist/esm/index.d.ts +6 -859
- package/dist/esm/index.mjs +129 -14
- package/dist/esm/types.d.ts +1272 -0
- package/dist/esm/types.js +16 -0
- package/dist/esm/utils.d.ts +1 -0
- package/dist/esm/utils.js +25 -0
- package/package.json +15 -10
- package/prebuilds/node-tesseract-ocr-darwin-arm64/node-napi-v10.node +0 -0
- package/prebuilds/node-tesseract-ocr-linux-x64/node-napi-v10.node +0 -0
- package/src/commands.hpp +656 -88
- package/src/tesseract_wrapper.cpp +643 -187
- package/src/tesseract_wrapper.hpp +27 -4
- package/src/worker_thread.cpp +146 -2
- package/src/worker_thread.hpp +4 -1
package/dist/esm/index.d.ts
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import type { EnsureTrainedDataOptions, TesseractDocumentApi, TesseractConstructor, TesseractInitOptions, TrainingDataDownloadProgress } from "./types";
|
|
2
|
+
export type { ConfigurationVariables, DebugConfigurationVariableNames, DebugOnlyConfigurationVariables, DetectOrientationScriptResult, EnsureTrainedDataOptions, InitOnlyConfigurationVariables, ProgressChangedInfo, SetBoolConfigurationVariableNames, SetConfigurationVariableNames, SetNumberConfigurationVariableNames, SetStringConfigurationVariableNames, SetVariableConfigVariables, TesseractBeginProcessPagesOptions, TesseractConstructor, TesseractDocumentApi, TesseractInitOptions, TesseractInstance, TesseractProcessPagesStatus, TesseractSetRectangleOptions, TrainingDataDownloadProgress, } from "./types";
|
|
3
|
+
export type NativeTesseract = import("./types").TesseractInstance;
|
|
1
4
|
/**
|
|
2
5
|
* All available languages for tesseract
|
|
3
6
|
* @readonly
|
|
@@ -202,868 +205,12 @@ export declare const LogLevels: {
|
|
|
202
205
|
readonly OFF: "2147483647";
|
|
203
206
|
};
|
|
204
207
|
export type LogLevel = (typeof LogLevels)[keyof typeof LogLevels];
|
|
205
|
-
export type ConfigurationVariables = {
|
|
206
|
-
log_level: `${LogLevel}`;
|
|
207
|
-
textord_dotmatrix_gap: `${number}`;
|
|
208
|
-
textord_debug_block: `${0 | 1}`;
|
|
209
|
-
textord_pitch_range: `${number}`;
|
|
210
|
-
textord_words_veto_power: `${number}`;
|
|
211
|
-
textord_tabfind_show_strokewidths: `${0 | 1}`;
|
|
212
|
-
pitsync_linear_version: `${number}`;
|
|
213
|
-
oldbl_holed_losscount: `${number}`;
|
|
214
|
-
textord_skewsmooth_offset: `${number}`;
|
|
215
|
-
textord_skewsmooth_offset2: `${0 | 1}`;
|
|
216
|
-
textord_test_x: `${number}`;
|
|
217
|
-
textord_test_y: `${number}`;
|
|
218
|
-
textord_min_blobs_in_row: `${number}`;
|
|
219
|
-
textord_spline_minblobs: `${number}`;
|
|
220
|
-
textord_spline_medianwin: `${number}`;
|
|
221
|
-
textord_max_blob_overlaps: `${number}`;
|
|
222
|
-
textord_min_xheight: `${number}`;
|
|
223
|
-
textord_lms_line_trials: `${number}`;
|
|
224
|
-
textord_tabfind_show_images: `${0 | 1}`;
|
|
225
|
-
textord_fp_chop_error: `${number}`;
|
|
226
|
-
edges_max_children_per_outline: `${number}`;
|
|
227
|
-
edges_max_children_layers: `${number}`;
|
|
228
|
-
edges_children_per_grandchild: `${number}`;
|
|
229
|
-
edges_children_count_limit: `${number}`;
|
|
230
|
-
edges_min_nonhole: `${number}`;
|
|
231
|
-
edges_patharea_ratio: `${number}`;
|
|
232
|
-
devanagari_split_debuglevel: `${0 | 1}`;
|
|
233
|
-
textord_tabfind_show_partitions: `${0 | 1}`;
|
|
234
|
-
textord_debug_tabfind: `${0 | 1}`;
|
|
235
|
-
textord_debug_bugs: `${0 | 1}`;
|
|
236
|
-
textord_testregion_left: `${number}`;
|
|
237
|
-
textord_testregion_top: `${number}`;
|
|
238
|
-
textord_testregion_right: `${number}`;
|
|
239
|
-
textord_testregion_bottom: `${number}`;
|
|
240
|
-
classify_num_cp_levels: `${number}`;
|
|
241
|
-
editor_image_xpos: `${number}`;
|
|
242
|
-
editor_image_ypos: `${number}`;
|
|
243
|
-
editor_image_menuheight: `${number}`;
|
|
244
|
-
editor_image_blob_bb_color: `${number}`;
|
|
245
|
-
editor_word_ypos: `${number}`;
|
|
246
|
-
editor_word_width: `${number}`;
|
|
247
|
-
curl_timeout: `${0 | 1}`;
|
|
248
|
-
wordrec_display_all_blobs: `${0 | 1}`;
|
|
249
|
-
wordrec_blob_pause: `${0 | 1}`;
|
|
250
|
-
textord_force_make_prop_words: `${0 | 1}`;
|
|
251
|
-
textord_chopper_test: `${0 | 1}`;
|
|
252
|
-
textord_restore_underlines: `${0 | 1}`;
|
|
253
|
-
textord_show_initial_words: `${0 | 1}`;
|
|
254
|
-
textord_blocksall_fixed: `${0 | 1}`;
|
|
255
|
-
textord_blocksall_prop: `${0 | 1}`;
|
|
256
|
-
textord_pitch_scalebigwords: `${0 | 1}`;
|
|
257
|
-
textord_debug_pitch_test: `${0 | 1}`;
|
|
258
|
-
textord_disable_pitch_test: `${0 | 1}`;
|
|
259
|
-
textord_fast_pitch_test: `${0 | 1}`;
|
|
260
|
-
textord_debug_pitch_metric: `${0 | 1}`;
|
|
261
|
-
textord_show_row_cuts: `${0 | 1}`;
|
|
262
|
-
textord_show_page_cuts: `${0 | 1}`;
|
|
263
|
-
textord_blockndoc_fixed: `${0 | 1}`;
|
|
264
|
-
textord_show_tables: `${0 | 1}`;
|
|
265
|
-
textord_tablefind_show_mark: `${0 | 1}`;
|
|
266
|
-
textord_tablefind_show_stats: `${0 | 1}`;
|
|
267
|
-
textord_tablefind_recognize_tables: `${0 | 1}`;
|
|
268
|
-
textord_tabfind_show_initialtabs: `${0 | 1}`;
|
|
269
|
-
textord_tabfind_show_finaltabs: `${0 | 1}`;
|
|
270
|
-
textord_tabfind_only_strokewidths: `${0 | 1}`;
|
|
271
|
-
textord_really_old_xheight: `${0 | 1}`;
|
|
272
|
-
textord_oldbl_debug: `${0 | 1}`;
|
|
273
|
-
textord_debug_baselines: `${0 | 1}`;
|
|
274
|
-
textord_oldbl_paradef: `${0 | 1}`;
|
|
275
|
-
textord_oldbl_split_splines: `${0 | 1}`;
|
|
276
|
-
textord_oldbl_merge_parts: `${0 | 1}`;
|
|
277
|
-
oldbl_corrfix: `${0 | 1}`;
|
|
278
|
-
oldbl_xhfix: `${0 | 1}`;
|
|
279
|
-
textord_ocropus_mode: `${0 | 1}`;
|
|
280
|
-
textord_heavy_nr: `${0 | 1}`;
|
|
281
|
-
textord_show_initial_rows: `${0 | 1}`;
|
|
282
|
-
textord_show_parallel_rows: `${0 | 1}`;
|
|
283
|
-
textord_show_expanded_rows: `${0 | 1}`;
|
|
284
|
-
textord_show_final_rows: `${0 | 1}`;
|
|
285
|
-
textord_show_final_blobs: `${0 | 1}`;
|
|
286
|
-
textord_test_landscape: `${0 | 1}`;
|
|
287
|
-
textord_parallel_baselines: `${0 | 1}`;
|
|
288
|
-
textord_straight_baselines: `${0 | 1}`;
|
|
289
|
-
textord_old_baselines: `${0 | 1}`;
|
|
290
|
-
textord_old_xheight: `${0 | 1}`;
|
|
291
|
-
textord_fix_xheight_bug: `${0 | 1}`;
|
|
292
|
-
textord_fix_makerow_bug: `${0 | 1}`;
|
|
293
|
-
textord_debug_xheights: `${0 | 1}`;
|
|
294
|
-
textord_biased_skewcalc: `${0 | 1}`;
|
|
295
|
-
textord_interpolating_skew: `${0 | 1}`;
|
|
296
|
-
textord_new_initial_xheight: `${0 | 1}`;
|
|
297
|
-
textord_debug_blob: `${0 | 1}`;
|
|
298
|
-
gapmap_debug: `${0 | 1}`;
|
|
299
|
-
gapmap_use_ends: `${0 | 1}`;
|
|
300
|
-
gapmap_no_isolated_quanta: `${0 | 1}`;
|
|
301
|
-
edges_use_new_outline_complexity: `${0 | 1}`;
|
|
302
|
-
edges_debug: `${0 | 1}`;
|
|
303
|
-
edges_children_fix: `${0 | 1}`;
|
|
304
|
-
textord_show_fixed_cuts: `${0 | 1}`;
|
|
305
|
-
devanagari_split_debugimage: `${0 | 1}`;
|
|
306
|
-
textord_tabfind_show_initial_partitions: `${0 | 1}`;
|
|
307
|
-
textord_tabfind_show_reject_blobs: `${0 | 1}`;
|
|
308
|
-
textord_tabfind_show_columns: `${0 | 1}`;
|
|
309
|
-
textord_tabfind_show_blocks: `${0 | 1}`;
|
|
310
|
-
textord_tabfind_find_tables: `${0 | 1}`;
|
|
311
|
-
textord_space_size_is_variable: `${0 | 1}`;
|
|
312
|
-
textord_debug_printable: `${0 | 1}`;
|
|
313
|
-
wordrec_display_splits: `${0 | 1}`;
|
|
314
|
-
poly_debug: `${0 | 1}`;
|
|
315
|
-
poly_wide_objects_better: `${0 | 1}`;
|
|
316
|
-
equationdetect_save_bi_image: `${0 | 1}`;
|
|
317
|
-
equationdetect_save_spt_image: `${0 | 1}`;
|
|
318
|
-
equationdetect_save_seed_image: `${0 | 1}`;
|
|
319
|
-
equationdetect_save_merged_image: `${0 | 1}`;
|
|
320
|
-
debug_file: `${string}`;
|
|
321
|
-
editor_word_name: `${string}`;
|
|
322
|
-
dotproduct: `${string}`;
|
|
323
|
-
document_title: `${string}`;
|
|
324
|
-
curl_cookiefile: `${string}`;
|
|
325
|
-
classify_font_name: `${string}`;
|
|
326
|
-
textord_underline_offset: `${number}`;
|
|
327
|
-
textord_wordstats_smooth_factor: `${number}`;
|
|
328
|
-
textord_words_maxspace: `${number}`;
|
|
329
|
-
textord_words_default_maxspace: `${number}`;
|
|
330
|
-
textord_words_default_minspace: `${number}`;
|
|
331
|
-
textord_words_min_minspace: `${number}`;
|
|
332
|
-
textord_words_default_nonspace: `${number}`;
|
|
333
|
-
textord_words_initial_lower: `${number}`;
|
|
334
|
-
textord_words_initial_upper: `${number}`;
|
|
335
|
-
textord_words_minlarge: `${number}`;
|
|
336
|
-
textord_words_pitchsd_threshold: `${number}`;
|
|
337
|
-
textord_words_def_fixed: `${number}`;
|
|
338
|
-
textord_words_def_prop: `${number}`;
|
|
339
|
-
textord_pitch_rowsimilarity: `${number}`;
|
|
340
|
-
words_initial_lower: `${number}`;
|
|
341
|
-
words_initial_upper: `${number}`;
|
|
342
|
-
words_default_prop_nonspace: `${number}`;
|
|
343
|
-
words_default_fixed_space: `${number}`;
|
|
344
|
-
words_default_fixed_limit: `${number}`;
|
|
345
|
-
textord_words_definite_spread: `${number}`;
|
|
346
|
-
textord_spacesize_ratioprop: `${number}`;
|
|
347
|
-
textord_fpiqr_ratio: `${number}`;
|
|
348
|
-
textord_max_pitch_iqr: `${number}`;
|
|
349
|
-
textord_projection_scale: `${number}`;
|
|
350
|
-
textord_balance_factor: `${0 | 1}`;
|
|
351
|
-
textord_tabvector_vertical_gap_fraction: `${number}`;
|
|
352
|
-
textord_tabvector_vertical_box_ratio: `${number}`;
|
|
353
|
-
pitsync_joined_edge: `${number}`;
|
|
354
|
-
pitsync_offset_freecut_fraction: `${number}`;
|
|
355
|
-
oldbl_xhfract: `${number}`;
|
|
356
|
-
oldbl_dot_error_size: `${number}`;
|
|
357
|
-
textord_oldbl_jumplimit: `${number}`;
|
|
358
|
-
textord_spline_shift_fraction: `${number}`;
|
|
359
|
-
textord_skew_ile: `${number}`;
|
|
360
|
-
textord_skew_lag: `${number}`;
|
|
361
|
-
textord_linespace_iqrlimit: `${number}`;
|
|
362
|
-
textord_width_limit: `${number}`;
|
|
363
|
-
textord_chop_width: `${number}`;
|
|
364
|
-
textord_expansion_factor: `${0 | 1}`;
|
|
365
|
-
textord_overlap_x: `${number}`;
|
|
366
|
-
textord_minxh: `${number}`;
|
|
367
|
-
textord_min_linesize: `${number}`;
|
|
368
|
-
textord_excess_blobsize: `${number}`;
|
|
369
|
-
textord_occupancy_threshold: `${number}`;
|
|
370
|
-
textord_underline_width: `${number}`;
|
|
371
|
-
textord_min_blob_height_fraction: `${number}`;
|
|
372
|
-
textord_xheight_mode_fraction: `${number}`;
|
|
373
|
-
textord_ascheight_mode_fraction: `${number}`;
|
|
374
|
-
textord_descheight_mode_fraction: `${number}`;
|
|
375
|
-
textord_ascx_ratio_min: `${number}`;
|
|
376
|
-
textord_ascx_ratio_max: `${number}`;
|
|
377
|
-
textord_descx_ratio_min: `${number}`;
|
|
378
|
-
textord_descx_ratio_max: `${number}`;
|
|
379
|
-
textord_xheight_error_margin: `${number}`;
|
|
380
|
-
gapmap_big_gaps: `${number}`;
|
|
381
|
-
edges_childarea: `${number}`;
|
|
382
|
-
edges_boxarea: `${number}`;
|
|
383
|
-
textord_underline_threshold: `${number}`;
|
|
384
|
-
classify_pico_feature_length: `${number}`;
|
|
385
|
-
classify_norm_adj_midpoint: `${number}`;
|
|
386
|
-
classify_norm_adj_curl: `${number}`;
|
|
387
|
-
classify_min_slope: `${number}`;
|
|
388
|
-
classify_max_slope: `${number}`;
|
|
389
|
-
classify_cp_angle_pad_loose: `${number}`;
|
|
390
|
-
classify_cp_angle_pad_medium: `${number}`;
|
|
391
|
-
classify_cp_angle_pad_tight: `${number}`;
|
|
392
|
-
classify_cp_end_pad_loose: `${number}`;
|
|
393
|
-
classify_cp_end_pad_medium: `${number}`;
|
|
394
|
-
classify_cp_end_pad_tight: `${number}`;
|
|
395
|
-
classify_cp_side_pad_loose: `${number}`;
|
|
396
|
-
classify_cp_side_pad_medium: `${number}`;
|
|
397
|
-
classify_cp_side_pad_tight: `${number}`;
|
|
398
|
-
classify_pp_angle_pad: `${number}`;
|
|
399
|
-
classify_pp_end_pad: `${number}`;
|
|
400
|
-
classify_pp_side_pad: `${number}`;
|
|
401
|
-
ambigs_debug_level: `${0 | 1}`;
|
|
402
|
-
classify_debug_level: `${0 | 1}`;
|
|
403
|
-
classify_norm_method: `${0 | 1}`;
|
|
404
|
-
matcher_debug_level: `${0 | 1}`;
|
|
405
|
-
matcher_debug_flags: `${0 | 1}`;
|
|
406
|
-
classify_learning_debug_level: `${0 | 1}`;
|
|
407
|
-
matcher_permanent_classes_min: `${0 | 1}`;
|
|
408
|
-
matcher_min_examples_for_prototyping: `${number}`;
|
|
409
|
-
matcher_sufficient_examples_for_prototyping: `${number}`;
|
|
410
|
-
classify_adapt_proto_threshold: `${number}`;
|
|
411
|
-
classify_adapt_feature_threshold: `${number}`;
|
|
412
|
-
classify_class_pruner_threshold: `${number}`;
|
|
413
|
-
classify_class_pruner_multiplier: `${number}`;
|
|
414
|
-
classify_cp_cutoff_strength: `${number}`;
|
|
415
|
-
classify_integer_matcher_multiplier: `${number}`;
|
|
416
|
-
dawg_debug_level: `${0 | 1}`;
|
|
417
|
-
hyphen_debug_level: `${0 | 1}`;
|
|
418
|
-
stopper_smallword_size: `${number}`;
|
|
419
|
-
stopper_debug_level: `${0 | 1}`;
|
|
420
|
-
tessedit_truncate_wordchoice_log: `${number}`;
|
|
421
|
-
max_permuter_attempts: `${number}`;
|
|
422
|
-
repair_unchopped_blobs: `${0 | 1}`;
|
|
423
|
-
chop_debug: `${0 | 1}`;
|
|
424
|
-
chop_split_length: `${number}`;
|
|
425
|
-
chop_same_distance: `${number}`;
|
|
426
|
-
chop_min_outline_points: `${number}`;
|
|
427
|
-
chop_seam_pile_size: `${number}`;
|
|
428
|
-
chop_inside_angle: `${number}`;
|
|
429
|
-
chop_min_outline_area: `${number}`;
|
|
430
|
-
chop_centered_maxwidth: `${number}`;
|
|
431
|
-
chop_x_y_weight: `${number}`;
|
|
432
|
-
wordrec_debug_level: `${0 | 1}`;
|
|
433
|
-
wordrec_max_join_chunks: `${number}`;
|
|
434
|
-
segsearch_debug_level: `${0 | 1}`;
|
|
435
|
-
segsearch_max_pain_points: `${number}`;
|
|
436
|
-
segsearch_max_futile_classifications: `${number}`;
|
|
437
|
-
language_model_debug_level: `${0 | 1}`;
|
|
438
|
-
language_model_ngram_order: `${number}`;
|
|
439
|
-
language_model_viterbi_list_max_num_prunable: `${number}`;
|
|
440
|
-
language_model_viterbi_list_max_size: `${number}`;
|
|
441
|
-
language_model_min_compound_length: `${number}`;
|
|
442
|
-
wordrec_display_segmentations: `${0 | 1}`;
|
|
443
|
-
tessedit_pageseg_mode: `${number}`;
|
|
444
|
-
thresholding_method: `${0 | 1}`;
|
|
445
|
-
tessedit_ocr_engine_mode: `${number}`;
|
|
446
|
-
pageseg_devanagari_split_strategy: `${0 | 1}`;
|
|
447
|
-
ocr_devanagari_split_strategy: `${0 | 1}`;
|
|
448
|
-
bidi_debug: `${0 | 1}`;
|
|
449
|
-
applybox_debug: `${0 | 1}`;
|
|
450
|
-
applybox_page: `${0 | 1}`;
|
|
451
|
-
tessedit_font_id: `${0 | 1}`;
|
|
452
|
-
tessedit_bigram_debug: `${0 | 1}`;
|
|
453
|
-
debug_noise_removal: `${0 | 1}`;
|
|
454
|
-
noise_maxperblob: `${number}`;
|
|
455
|
-
noise_maxperword: `${number}`;
|
|
456
|
-
debug_x_ht_level: `${0 | 1}`;
|
|
457
|
-
quality_min_initial_alphas_reqd: `${number}`;
|
|
458
|
-
tessedit_tess_adaption_mode: `${number}`;
|
|
459
|
-
multilang_debug_level: `${0 | 1}`;
|
|
460
|
-
paragraph_debug_level: `${0 | 1}`;
|
|
461
|
-
tessedit_preserve_min_wd_len: `${number}`;
|
|
462
|
-
crunch_rating_max: `${number}`;
|
|
463
|
-
crunch_pot_indicators: `${0 | 1}`;
|
|
464
|
-
crunch_leave_lc_strings: `${number}`;
|
|
465
|
-
crunch_leave_uc_strings: `${number}`;
|
|
466
|
-
crunch_long_repetitions: `${number}`;
|
|
467
|
-
crunch_debug: `${0 | 1}`;
|
|
468
|
-
fixsp_non_noise_limit: `${0 | 1}`;
|
|
469
|
-
fixsp_done_mode: `${0 | 1}`;
|
|
470
|
-
debug_fix_space_level: `${0 | 1}`;
|
|
471
|
-
x_ht_acceptance_tolerance: `${number}`;
|
|
472
|
-
x_ht_min_change: `${number}`;
|
|
473
|
-
superscript_debug: `${0 | 1}`;
|
|
474
|
-
page_xml_level: `${0 | 1}`;
|
|
475
|
-
jpg_quality: `${number}`;
|
|
476
|
-
user_defined_dpi: `${0 | 1}`;
|
|
477
|
-
min_characters_to_try: `${number}`;
|
|
478
|
-
suspect_level: `${number}`;
|
|
479
|
-
suspect_short_words: `${number}`;
|
|
480
|
-
tessedit_reject_mode: `${0 | 1}`;
|
|
481
|
-
tessedit_image_border: `${number}`;
|
|
482
|
-
min_sane_x_ht_pixels: `${number}`;
|
|
483
|
-
tessedit_page_number: `${number}`;
|
|
484
|
-
tessedit_parallelize: `${0 | 1}`;
|
|
485
|
-
lstm_choice_mode: `${0 | 1}`;
|
|
486
|
-
lstm_choice_iterations: `${number}`;
|
|
487
|
-
tosp_debug_level: `${0 | 1}`;
|
|
488
|
-
tosp_enough_space_samples_for_median: `${number}`;
|
|
489
|
-
tosp_redo_kern_limit: `${number}`;
|
|
490
|
-
tosp_few_samples: `${number}`;
|
|
491
|
-
tosp_short_row: `${number}`;
|
|
492
|
-
tosp_sanity_method: `${0 | 1}`;
|
|
493
|
-
textord_max_noise_size: `${number}`;
|
|
494
|
-
textord_baseline_debug: `${0 | 1}`;
|
|
495
|
-
textord_noise_sizefraction: `${number}`;
|
|
496
|
-
textord_noise_translimit: `${number}`;
|
|
497
|
-
textord_noise_sncount: `${0 | 1}`;
|
|
498
|
-
use_ambigs_for_adaption: `${0 | 1}`;
|
|
499
|
-
allow_blob_division: `${0 | 1}`;
|
|
500
|
-
prioritize_division: `${0 | 1}`;
|
|
501
|
-
classify_enable_learning: `${0 | 1}`;
|
|
502
|
-
tess_cn_matching: `${0 | 1}`;
|
|
503
|
-
tess_bn_matching: `${0 | 1}`;
|
|
504
|
-
classify_enable_adaptive_matcher: `${0 | 1}`;
|
|
505
|
-
classify_use_pre_adapted_templates: `${0 | 1}`;
|
|
506
|
-
classify_save_adapted_templates: `${0 | 1}`;
|
|
507
|
-
classify_enable_adaptive_debugger: `${0 | 1}`;
|
|
508
|
-
classify_nonlinear_norm: `${0 | 1}`;
|
|
509
|
-
disable_character_fragments: `${0 | 1}`;
|
|
510
|
-
classify_debug_character_fragments: `${0 | 1}`;
|
|
511
|
-
matcher_debug_separate_windows: `${0 | 1}`;
|
|
512
|
-
classify_bln_numeric_mode: `${0 | 1}`;
|
|
513
|
-
load_system_dawg: `${0 | 1}`;
|
|
514
|
-
load_freq_dawg: `${0 | 1}`;
|
|
515
|
-
load_unambig_dawg: `${0 | 1}`;
|
|
516
|
-
load_punc_dawg: `${0 | 1}`;
|
|
517
|
-
load_number_dawg: `${0 | 1}`;
|
|
518
|
-
load_bigram_dawg: `${0 | 1}`;
|
|
519
|
-
use_only_first_uft8_step: `${0 | 1}`;
|
|
520
|
-
stopper_no_acceptable_choices: `${0 | 1}`;
|
|
521
|
-
segment_nonalphabetic_script: `${0 | 1}`;
|
|
522
|
-
save_doc_words: `${0 | 1}`;
|
|
523
|
-
merge_fragments_in_matrix: `${0 | 1}`;
|
|
524
|
-
wordrec_enable_assoc: `${0 | 1}`;
|
|
525
|
-
force_word_assoc: `${0 | 1}`;
|
|
526
|
-
chop_enable: `${0 | 1}`;
|
|
527
|
-
chop_vertical_creep: `${0 | 1}`;
|
|
528
|
-
chop_new_seam_pile: `${0 | 1}`;
|
|
529
|
-
assume_fixed_pitch_char_segment: `${0 | 1}`;
|
|
530
|
-
wordrec_skip_no_truth_words: `${0 | 1}`;
|
|
531
|
-
wordrec_debug_blamer: `${0 | 1}`;
|
|
532
|
-
wordrec_run_blamer: `${0 | 1}`;
|
|
533
|
-
save_alt_choices: `${0 | 1}`;
|
|
534
|
-
language_model_ngram_on: `${0 | 1}`;
|
|
535
|
-
language_model_ngram_use_only_first_uft8_step: `${0 | 1}`;
|
|
536
|
-
language_model_ngram_space_delimited_language: `${0 | 1}`;
|
|
537
|
-
language_model_use_sigmoidal_certainty: `${0 | 1}`;
|
|
538
|
-
tessedit_resegment_from_boxes: `${0 | 1}`;
|
|
539
|
-
tessedit_resegment_from_line_boxes: `${0 | 1}`;
|
|
540
|
-
tessedit_train_from_boxes: `${0 | 1}`;
|
|
541
|
-
tessedit_make_boxes_from_boxes: `${0 | 1}`;
|
|
542
|
-
tessedit_train_line_recognizer: `${0 | 1}`;
|
|
543
|
-
tessedit_dump_pageseg_images: `${0 | 1}`;
|
|
544
|
-
tessedit_do_invert: `${0 | 1}`;
|
|
545
|
-
thresholding_debug: `${0 | 1}`;
|
|
546
|
-
tessedit_ambigs_training: `${0 | 1}`;
|
|
547
|
-
tessedit_adaption_debug: `${0 | 1}`;
|
|
548
|
-
applybox_learn_chars_and_char_frags_mode: `${0 | 1}`;
|
|
549
|
-
applybox_learn_ngrams_mode: `${0 | 1}`;
|
|
550
|
-
tessedit_display_outwords: `${0 | 1}`;
|
|
551
|
-
tessedit_dump_choices: `${0 | 1}`;
|
|
552
|
-
tessedit_timing_debug: `${0 | 1}`;
|
|
553
|
-
tessedit_fix_fuzzy_spaces: `${0 | 1}`;
|
|
554
|
-
tessedit_unrej_any_wd: `${0 | 1}`;
|
|
555
|
-
tessedit_fix_hyphens: `${0 | 1}`;
|
|
556
|
-
tessedit_enable_doc_dict: `${0 | 1}`;
|
|
557
|
-
tessedit_debug_fonts: `${0 | 1}`;
|
|
558
|
-
tessedit_debug_block_rejection: `${0 | 1}`;
|
|
559
|
-
tessedit_enable_bigram_correction: `${0 | 1}`;
|
|
560
|
-
tessedit_enable_dict_correction: `${0 | 1}`;
|
|
561
|
-
enable_noise_removal: `${0 | 1}`;
|
|
562
|
-
tessedit_minimal_rej_pass1: `${0 | 1}`;
|
|
563
|
-
tessedit_test_adaption: `${0 | 1}`;
|
|
564
|
-
test_pt: `${0 | 1}`;
|
|
565
|
-
paragraph_text_based: `${0 | 1}`;
|
|
566
|
-
lstm_use_matrix: `${0 | 1}`;
|
|
567
|
-
tessedit_good_quality_unrej: `${0 | 1}`;
|
|
568
|
-
tessedit_use_reject_spaces: `${0 | 1}`;
|
|
569
|
-
tessedit_preserve_blk_rej_perfect_wds: `${0 | 1}`;
|
|
570
|
-
tessedit_preserve_row_rej_perfect_wds: `${0 | 1}`;
|
|
571
|
-
tessedit_dont_blkrej_good_wds: `${0 | 1}`;
|
|
572
|
-
tessedit_dont_rowrej_good_wds: `${0 | 1}`;
|
|
573
|
-
tessedit_row_rej_good_docs: `${0 | 1}`;
|
|
574
|
-
tessedit_reject_bad_qual_wds: `${0 | 1}`;
|
|
575
|
-
tessedit_debug_doc_rejection: `${0 | 1}`;
|
|
576
|
-
tessedit_debug_quality_metrics: `${0 | 1}`;
|
|
577
|
-
bland_unrej: `${0 | 1}`;
|
|
578
|
-
unlv_tilde_crunching: `${0 | 1}`;
|
|
579
|
-
hocr_font_info: `${0 | 1}`;
|
|
580
|
-
hocr_char_boxes: `${0 | 1}`;
|
|
581
|
-
crunch_early_merge_tess_fails: `${0 | 1}`;
|
|
582
|
-
crunch_early_convert_bad_unlv_chs: `${0 | 1}`;
|
|
583
|
-
crunch_terrible_garbage: `${0 | 1}`;
|
|
584
|
-
crunch_leave_ok_strings: `${0 | 1}`;
|
|
585
|
-
crunch_accept_ok: `${0 | 1}`;
|
|
586
|
-
crunch_leave_accept_strings: `${0 | 1}`;
|
|
587
|
-
crunch_include_numerals: `${0 | 1}`;
|
|
588
|
-
tessedit_prefer_joined_punct: `${0 | 1}`;
|
|
589
|
-
tessedit_write_block_separators: `${0 | 1}`;
|
|
590
|
-
tessedit_write_rep_codes: `${0 | 1}`;
|
|
591
|
-
tessedit_write_unlv: `${0 | 1}`;
|
|
592
|
-
tessedit_create_txt: `${0 | 1}`;
|
|
593
|
-
tessedit_create_hocr: `${0 | 1}`;
|
|
594
|
-
tessedit_create_alto: `${0 | 1}`;
|
|
595
|
-
tessedit_create_page_xml: `${0 | 1}`;
|
|
596
|
-
page_xml_polygon: `${0 | 1}`;
|
|
597
|
-
tessedit_create_lstmbox: `${0 | 1}`;
|
|
598
|
-
tessedit_create_tsv: `${0 | 1}`;
|
|
599
|
-
tessedit_create_wordstrbox: `${0 | 1}`;
|
|
600
|
-
tessedit_create_pdf: `${0 | 1}`;
|
|
601
|
-
textonly_pdf: `${0 | 1}`;
|
|
602
|
-
suspect_constrain_1Il: `${0 | 1}`;
|
|
603
|
-
tessedit_minimal_rejection: `${0 | 1}`;
|
|
604
|
-
tessedit_zero_rejection: `${0 | 1}`;
|
|
605
|
-
tessedit_word_for_word: `${0 | 1}`;
|
|
606
|
-
tessedit_zero_kelvin_rejection: `${0 | 1}`;
|
|
607
|
-
tessedit_rejection_debug: `${0 | 1}`;
|
|
608
|
-
tessedit_flip_0O: `${0 | 1}`;
|
|
609
|
-
rej_trust_doc_dawg: `${0 | 1}`;
|
|
610
|
-
rej_1Il_use_dict_word: `${0 | 1}`;
|
|
611
|
-
rej_1Il_trust_permuter_type: `${0 | 1}`;
|
|
612
|
-
rej_use_tess_accepted: `${0 | 1}`;
|
|
613
|
-
rej_use_tess_blanks: `${0 | 1}`;
|
|
614
|
-
rej_use_good_perm: `${0 | 1}`;
|
|
615
|
-
rej_use_sensible_wd: `${0 | 1}`;
|
|
616
|
-
rej_alphas_in_number_perm: `${0 | 1}`;
|
|
617
|
-
tessedit_create_boxfile: `${0 | 1}`;
|
|
618
|
-
tessedit_write_images: `${0 | 1}`;
|
|
619
|
-
interactive_display_mode: `${0 | 1}`;
|
|
620
|
-
tessedit_override_permuter: `${0 | 1}`;
|
|
621
|
-
tessedit_use_primary_params_model: `${0 | 1}`;
|
|
622
|
-
textord_tabfind_show_vlines: `${0 | 1}`;
|
|
623
|
-
textord_use_cjk_fp_model: `${0 | 1}`;
|
|
624
|
-
poly_allow_detailed_fx: `${0 | 1}`;
|
|
625
|
-
tessedit_init_config_only: `${0 | 1}`;
|
|
626
|
-
textord_equation_detect: `${0 | 1}`;
|
|
627
|
-
textord_tabfind_vertical_text: `${0 | 1}`;
|
|
628
|
-
textord_tabfind_force_vertical_text: `${0 | 1}`;
|
|
629
|
-
preserve_interword_spaces: `${0 | 1}`;
|
|
630
|
-
pageseg_apply_music_mask: `${0 | 1}`;
|
|
631
|
-
textord_single_height_mode: `${0 | 1}`;
|
|
632
|
-
tosp_old_to_method: `${0 | 1}`;
|
|
633
|
-
tosp_old_to_constrain_sp_kn: `${0 | 1}`;
|
|
634
|
-
tosp_only_use_prop_rows: `${0 | 1}`;
|
|
635
|
-
tosp_force_wordbreak_on_punct: `${0 | 1}`;
|
|
636
|
-
tosp_use_pre_chopping: `${0 | 1}`;
|
|
637
|
-
tosp_old_to_bug_fix: `${0 | 1}`;
|
|
638
|
-
tosp_block_use_cert_spaces: `${0 | 1}`;
|
|
639
|
-
tosp_row_use_cert_spaces: `${0 | 1}`;
|
|
640
|
-
tosp_narrow_blobs_not_cert: `${0 | 1}`;
|
|
641
|
-
tosp_row_use_cert_spaces1: `${0 | 1}`;
|
|
642
|
-
tosp_recovery_isolated_row_stats: `${0 | 1}`;
|
|
643
|
-
tosp_only_small_gaps_for_kern: `${0 | 1}`;
|
|
644
|
-
tosp_all_flips_fuzzy: `${0 | 1}`;
|
|
645
|
-
tosp_fuzzy_limit_all: `${0 | 1}`;
|
|
646
|
-
tosp_stats_use_xht_gaps: `${0 | 1}`;
|
|
647
|
-
tosp_use_xht_gaps: `${0 | 1}`;
|
|
648
|
-
tosp_only_use_xht_gaps: `${0 | 1}`;
|
|
649
|
-
tosp_rule_9_test_punct: `${0 | 1}`;
|
|
650
|
-
tosp_flip_fuzz_kn_to_sp: `${0 | 1}`;
|
|
651
|
-
tosp_flip_fuzz_sp_to_kn: `${0 | 1}`;
|
|
652
|
-
tosp_improve_thresh: `${0 | 1}`;
|
|
653
|
-
textord_no_rejects: `${0 | 1}`;
|
|
654
|
-
textord_show_blobs: `${0 | 1}`;
|
|
655
|
-
textord_show_boxes: `${0 | 1}`;
|
|
656
|
-
textord_noise_rejwords: `${0 | 1}`;
|
|
657
|
-
textord_noise_rejrows: `${0 | 1}`;
|
|
658
|
-
textord_noise_debug: `${0 | 1}`;
|
|
659
|
-
classify_learn_debug_str: `${string}`;
|
|
660
|
-
user_words_file: `${string}`;
|
|
661
|
-
user_words_suffix: `${string}`;
|
|
662
|
-
user_patterns_file: `${string}`;
|
|
663
|
-
user_patterns_suffix: `${string}`;
|
|
664
|
-
output_ambig_words_file: `${string}`;
|
|
665
|
-
word_to_debug: `${string}`;
|
|
666
|
-
tessedit_char_blacklist: `${string}`;
|
|
667
|
-
tessedit_char_whitelist: `${string}`;
|
|
668
|
-
tessedit_char_unblacklist: `${string}`;
|
|
669
|
-
tessedit_write_params_to_file: `${string}`;
|
|
670
|
-
applybox_exposure_pattern: `${string}`;
|
|
671
|
-
chs_leading_punct: `${string}`;
|
|
672
|
-
chs_trailing_punct1: `${string}`;
|
|
673
|
-
chs_trailing_punct2: `${string}`;
|
|
674
|
-
outlines_odd: `${string}`;
|
|
675
|
-
outlines_2: `${string}`;
|
|
676
|
-
numeric_punctuation: `${string}`;
|
|
677
|
-
unrecognised_char: `${string}`;
|
|
678
|
-
ok_repeated_ch_non_alphanum_wds: `${string}`;
|
|
679
|
-
conflict_set_I_l_1: `${string}`;
|
|
680
|
-
file_type: `${string}`;
|
|
681
|
-
tessedit_load_sublangs: `${string}`;
|
|
682
|
-
page_separator: `${string}`;
|
|
683
|
-
classify_char_norm_range: `${number}`;
|
|
684
|
-
classify_max_rating_ratio: `${number}`;
|
|
685
|
-
classify_max_certainty_margin: `${number}`;
|
|
686
|
-
matcher_good_threshold: `${number}`;
|
|
687
|
-
matcher_reliable_adaptive_result: `${0 | 1}`;
|
|
688
|
-
matcher_perfect_threshold: `${number}`;
|
|
689
|
-
matcher_bad_match_pad: `${number}`;
|
|
690
|
-
matcher_rating_margin: `${number}`;
|
|
691
|
-
matcher_avg_noise_size: `${number}`;
|
|
692
|
-
matcher_clustering_max_angle_delta: `${number}`;
|
|
693
|
-
classify_misfit_junk_penalty: `${0 | 1}`;
|
|
694
|
-
rating_scale: `${number}`;
|
|
695
|
-
tessedit_class_miss_scale: `${number}`;
|
|
696
|
-
classify_adapted_pruning_factor: `${number}`;
|
|
697
|
-
classify_adapted_pruning_threshold: `${number}`;
|
|
698
|
-
classify_character_fragments_garbage_certainty_threshold: `${number}`;
|
|
699
|
-
speckle_large_max_size: `${number}`;
|
|
700
|
-
speckle_rating_penalty: `${number}`;
|
|
701
|
-
xheight_penalty_subscripts: `${number}`;
|
|
702
|
-
xheight_penalty_inconsistent: `${number}`;
|
|
703
|
-
segment_penalty_dict_frequent_word: `${0 | 1}`;
|
|
704
|
-
segment_penalty_dict_case_ok: `${number}`;
|
|
705
|
-
segment_penalty_dict_case_bad: `${number}`;
|
|
706
|
-
segment_penalty_dict_nonword: `${number}`;
|
|
707
|
-
segment_penalty_garbage: `${number}`;
|
|
708
|
-
certainty_scale: `${number}`;
|
|
709
|
-
stopper_nondict_certainty_base: `${number}`;
|
|
710
|
-
stopper_phase2_certainty_rejection_offset: `${0 | 1}`;
|
|
711
|
-
stopper_certainty_per_char: `${number}`;
|
|
712
|
-
stopper_allowable_character_badness: `${number}`;
|
|
713
|
-
doc_dict_pending_threshold: `${0 | 1}`;
|
|
714
|
-
doc_dict_certainty_threshold: `${number}`;
|
|
715
|
-
tessedit_certainty_threshold: `${number}`;
|
|
716
|
-
chop_split_dist_knob: `${number}`;
|
|
717
|
-
chop_overlap_knob: `${number}`;
|
|
718
|
-
chop_center_knob: `${number}`;
|
|
719
|
-
chop_sharpness_knob: `${number}`;
|
|
720
|
-
chop_width_change_knob: `${number}`;
|
|
721
|
-
chop_ok_split: `${number}`;
|
|
722
|
-
chop_good_split: `${number}`;
|
|
723
|
-
segsearch_max_char_wh_ratio: `${number}`;
|
|
724
|
-
language_model_ngram_small_prob: `${number}`;
|
|
725
|
-
language_model_ngram_nonmatch_score: `${number}`;
|
|
726
|
-
language_model_ngram_scale_factor: `${number}`;
|
|
727
|
-
language_model_ngram_rating_factor: `${number}`;
|
|
728
|
-
language_model_penalty_non_freq_dict_word: `${number}`;
|
|
729
|
-
language_model_penalty_non_dict_word: `${number}`;
|
|
730
|
-
language_model_penalty_punc: `${number}`;
|
|
731
|
-
language_model_penalty_case: `${number}`;
|
|
732
|
-
language_model_penalty_script: `${number}`;
|
|
733
|
-
language_model_penalty_chartype: `${number}`;
|
|
734
|
-
language_model_penalty_font: `${0 | 1}`;
|
|
735
|
-
language_model_penalty_spacing: `${number}`;
|
|
736
|
-
language_model_penalty_increment: `${number}`;
|
|
737
|
-
invert_threshold: `${number}`;
|
|
738
|
-
thresholding_window_size: `${number}`;
|
|
739
|
-
thresholding_kfactor: `${number}`;
|
|
740
|
-
thresholding_tile_size: `${number}`;
|
|
741
|
-
thresholding_smooth_kernel_size: `${0 | 1}`;
|
|
742
|
-
thresholding_score_fraction: `${number}`;
|
|
743
|
-
noise_cert_basechar: `${number}`;
|
|
744
|
-
noise_cert_disjoint: `${number}`;
|
|
745
|
-
noise_cert_punc: `${number}`;
|
|
746
|
-
noise_cert_factor: `${number}`;
|
|
747
|
-
quality_rej_pc: `${number}`;
|
|
748
|
-
quality_blob_pc: `${0 | 1}`;
|
|
749
|
-
quality_outline_pc: `${0 | 1}`;
|
|
750
|
-
quality_char_pc: `${number}`;
|
|
751
|
-
test_pt_x: `${number}`;
|
|
752
|
-
test_pt_y: `${number}`;
|
|
753
|
-
tessedit_reject_doc_percent: `${number}`;
|
|
754
|
-
tessedit_reject_block_percent: `${number}`;
|
|
755
|
-
tessedit_reject_row_percent: `${number}`;
|
|
756
|
-
tessedit_whole_wd_rej_row_percent: `${number}`;
|
|
757
|
-
tessedit_good_doc_still_rowrej_wd: `${number}`;
|
|
758
|
-
quality_rowrej_pc: `${number}`;
|
|
759
|
-
crunch_terrible_rating: `${number}`;
|
|
760
|
-
crunch_poor_garbage_cert: `${number}`;
|
|
761
|
-
crunch_poor_garbage_rate: `${number}`;
|
|
762
|
-
crunch_pot_poor_rate: `${number}`;
|
|
763
|
-
crunch_pot_poor_cert: `${number}`;
|
|
764
|
-
crunch_del_rating: `${number}`;
|
|
765
|
-
crunch_del_cert: `${number}`;
|
|
766
|
-
crunch_del_min_ht: `${number}`;
|
|
767
|
-
crunch_del_max_ht: `${number}`;
|
|
768
|
-
crunch_del_min_width: `${number}`;
|
|
769
|
-
crunch_del_high_word: `${number}`;
|
|
770
|
-
crunch_del_low_word: `${number}`;
|
|
771
|
-
crunch_small_outlines_size: `${number}`;
|
|
772
|
-
fixsp_small_outlines_size: `${number}`;
|
|
773
|
-
superscript_worse_certainty: `${number}`;
|
|
774
|
-
superscript_bettered_certainty: `${number}`;
|
|
775
|
-
superscript_scaledown_ratio: `${number}`;
|
|
776
|
-
subscript_max_y_top: `${number}`;
|
|
777
|
-
superscript_min_y_bottom: `${number}`;
|
|
778
|
-
suspect_rating_per_ch: `${number}`;
|
|
779
|
-
suspect_accept_rating: `${number}`;
|
|
780
|
-
tessedit_lower_flip_hyphen: `${number}`;
|
|
781
|
-
tessedit_upper_flip_hyphen: `${number}`;
|
|
782
|
-
rej_whole_of_mostly_reject_word_fract: `${number}`;
|
|
783
|
-
min_orientation_margin: `${number}`;
|
|
784
|
-
textord_tabfind_vertical_text_ratio: `${number}`;
|
|
785
|
-
textord_tabfind_aligned_gap_fraction: `${number}`;
|
|
786
|
-
lstm_rating_coefficient: `${number}`;
|
|
787
|
-
tosp_old_sp_kn_th_factor: `${number}`;
|
|
788
|
-
tosp_threshold_bias1: `${0 | 1}`;
|
|
789
|
-
tosp_threshold_bias2: `${0 | 1}`;
|
|
790
|
-
tosp_narrow_fraction: `${number}`;
|
|
791
|
-
tosp_narrow_aspect_ratio: `${number}`;
|
|
792
|
-
tosp_wide_fraction: `${number}`;
|
|
793
|
-
tosp_wide_aspect_ratio: `${0 | 1}`;
|
|
794
|
-
tosp_fuzzy_space_factor: `${number}`;
|
|
795
|
-
tosp_fuzzy_space_factor1: `${number}`;
|
|
796
|
-
tosp_fuzzy_space_factor2: `${number}`;
|
|
797
|
-
tosp_gap_factor: `${number}`;
|
|
798
|
-
tosp_kern_gap_factor1: `${number}`;
|
|
799
|
-
tosp_kern_gap_factor2: `${number}`;
|
|
800
|
-
tosp_kern_gap_factor3: `${number}`;
|
|
801
|
-
tosp_ignore_big_gaps: `${number}`;
|
|
802
|
-
tosp_ignore_very_big_gaps: `${number}`;
|
|
803
|
-
tosp_rep_space: `${number}`;
|
|
804
|
-
tosp_enough_small_gaps: `${number}`;
|
|
805
|
-
tosp_table_kn_sp_ratio: `${number}`;
|
|
806
|
-
tosp_table_xht_sp_ratio: `${number}`;
|
|
807
|
-
tosp_table_fuzzy_kn_sp_ratio: `${number}`;
|
|
808
|
-
tosp_fuzzy_kn_fraction: `${number}`;
|
|
809
|
-
tosp_fuzzy_sp_fraction: `${number}`;
|
|
810
|
-
tosp_min_sane_kn_sp: `${number}`;
|
|
811
|
-
tosp_init_guess_kn_mult: `${number}`;
|
|
812
|
-
tosp_init_guess_xht_mult: `${number}`;
|
|
813
|
-
tosp_max_sane_kn_thresh: `${number}`;
|
|
814
|
-
tosp_flip_caution: `${0 | 1}`;
|
|
815
|
-
tosp_large_kerning: `${number}`;
|
|
816
|
-
tosp_dont_fool_with_small_kerns: `${number}`;
|
|
817
|
-
tosp_near_lh_edge: `${0 | 1}`;
|
|
818
|
-
tosp_silly_kn_sp_gap: `${number}`;
|
|
819
|
-
tosp_pass_wide_fuzz_sp_to_context: `${number}`;
|
|
820
|
-
textord_noise_area_ratio: `${number}`;
|
|
821
|
-
textord_initialx_ile: `${number}`;
|
|
822
|
-
textord_initialasc_ile: `${number}`;
|
|
823
|
-
textord_noise_sizelimit: `${number}`;
|
|
824
|
-
textord_noise_normratio: `${number}`;
|
|
825
|
-
textord_noise_syfract: `${number}`;
|
|
826
|
-
textord_noise_sxfract: `${number}`;
|
|
827
|
-
textord_noise_hfract: `${number}`;
|
|
828
|
-
textord_noise_rowratio: `${number}`;
|
|
829
|
-
textord_blshift_maxshift: `${0 | 1}`;
|
|
830
|
-
textord_blshift_xfraction: `${number}`;
|
|
831
|
-
};
|
|
832
|
-
type InitOnlyConfigurationVariableNames = "ambigs_debug_level" | "language_model_ngram_on" | "language_model_use_sigmoidal_certainty" | "load_bigram_dawg" | "load_freq_dawg" | "load_number_dawg" | "load_punc_dawg" | "load_system_dawg" | "load_unambig_dawg" | "tessedit_init_config_only" | "tessedit_ocr_engine_mode" | "user_patterns_suffix" | "user_words_suffix";
|
|
833
|
-
export type InitOnlyConfigurationVariables = Pick<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
|
|
834
|
-
export type SetVariableConfigVariables = Omit<ConfigurationVariables, InitOnlyConfigurationVariableNames>;
|
|
835
|
-
/**
|
|
836
|
-
* Tesseract init options
|
|
837
|
-
*/
|
|
838
|
-
export interface TesseractInitOptions {
|
|
839
|
-
/**
|
|
840
|
-
* Its generally safer to use as few languages as possible.
|
|
841
|
-
* The more languages Tesseract needs to load the longer it takes to recognize a image.
|
|
842
|
-
* @public
|
|
843
|
-
*/
|
|
844
|
-
lang?: Language[];
|
|
845
|
-
/**
|
|
846
|
-
* OCR Engine Modes
|
|
847
|
-
* The engine mode cannot be changed after creating the instance
|
|
848
|
-
* If another mode is needed, its advised to create a new instance.
|
|
849
|
-
* @throws {Error} Will throw an error when oem mode is below 0 or over 3
|
|
850
|
-
*/
|
|
851
|
-
oem?: OcrEngineMode;
|
|
852
|
-
setOnlyNonDebugParams?: boolean;
|
|
853
|
-
configs?: Array<string>;
|
|
854
|
-
vars?: Partial<Record<keyof ConfigurationVariables, ConfigurationVariables[keyof ConfigurationVariables]>>;
|
|
855
|
-
}
|
|
856
|
-
export interface TesseractSetRectangleOptions {
|
|
857
|
-
top: number;
|
|
858
|
-
left: number;
|
|
859
|
-
width: number;
|
|
860
|
-
height: number;
|
|
861
|
-
}
|
|
862
|
-
export interface ProgressChangedInfo {
|
|
863
|
-
/**
|
|
864
|
-
* Chars in this buffer
|
|
865
|
-
*/
|
|
866
|
-
progress: number;
|
|
867
|
-
/**
|
|
868
|
-
* Percent complete increasing (0-100)
|
|
869
|
-
*/
|
|
870
|
-
percent: number;
|
|
871
|
-
/**
|
|
872
|
-
* States if the worker is still alive
|
|
873
|
-
*/
|
|
874
|
-
ocrAlive: number;
|
|
875
|
-
/**
|
|
876
|
-
* top coordinate of the bbox of the current element that tesseract is processing
|
|
877
|
-
*/
|
|
878
|
-
top: number;
|
|
879
|
-
/**
|
|
880
|
-
* right coordinate of the bbox of the current element that tesseract is processing
|
|
881
|
-
*/
|
|
882
|
-
right: number;
|
|
883
|
-
/**
|
|
884
|
-
* bottom coordinate of the bbox of the current element that tesseract is processing
|
|
885
|
-
*/
|
|
886
|
-
bottom: number;
|
|
887
|
-
/**
|
|
888
|
-
* left coordinate of the bbox of the current element that tesseract is processing
|
|
889
|
-
*/
|
|
890
|
-
left: number;
|
|
891
|
-
}
|
|
892
|
-
export interface DetectOrientationScriptResult {
|
|
893
|
-
/**
|
|
894
|
-
* Orientation of the source image in degrees
|
|
895
|
-
* Orientation refers to the way the source is rotated, **not** how the text is
|
|
896
|
-
* aligned. It ranges from 0° to 360° degrees.
|
|
897
|
-
* @type {number}
|
|
898
|
-
*/
|
|
899
|
-
orientationDegrees: number;
|
|
900
|
-
/**
|
|
901
|
-
* The confidence of tesseract for the orientation
|
|
902
|
-
* @type {number}
|
|
903
|
-
*/
|
|
904
|
-
orientationConfidence: number;
|
|
905
|
-
/**
|
|
906
|
-
* The name of the script that is used in the source image
|
|
907
|
-
* @type {string}
|
|
908
|
-
*/
|
|
909
|
-
scriptName: string;
|
|
910
|
-
/**
|
|
911
|
-
* The confidence of tesseract about the detected script of the source image
|
|
912
|
-
* @type {number}
|
|
913
|
-
*/
|
|
914
|
-
scriptConfidence: number;
|
|
915
|
-
}
|
|
916
|
-
export interface TesseractInstance {
|
|
917
|
-
/**
|
|
918
|
-
* Initialize the engine with the given options.
|
|
919
|
-
* @param {TesseractInitOptions} options Initialization options (languages, datapath, engine mode, etc.).
|
|
920
|
-
* @returns {Promise<void>}
|
|
921
|
-
*/
|
|
922
|
-
init(options: TesseractInitOptions): Promise<void>;
|
|
923
|
-
/**
|
|
924
|
-
* Initialize the engine for page analysis only.
|
|
925
|
-
* @returns {Promise<void>}
|
|
926
|
-
*/
|
|
927
|
-
initForAnalysePage(): Promise<void>;
|
|
928
|
-
/**
|
|
929
|
-
* Run page layout analysis.
|
|
930
|
-
* @param {boolean} mergeSimilarWords Whether to merge similar words during analysis.
|
|
931
|
-
* @returns {Promise<void>}
|
|
932
|
-
*/
|
|
933
|
-
analysePage(mergeSimilarWords: boolean): Promise<void>;
|
|
934
|
-
/**
|
|
935
|
-
* Set the page segmentation mode (PSM).
|
|
936
|
-
* @param {PageSegmentationMode} psm Page segmentation mode.
|
|
937
|
-
* @returns {Promise<void>}
|
|
938
|
-
*/
|
|
939
|
-
setPageMode(psm: PageSegmentationMode): Promise<void>;
|
|
940
|
-
/**
|
|
941
|
-
* Set a configuration variable.
|
|
942
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
943
|
-
* @param {SetVariableConfigVariables[keyof SetVariableConfigVariables]} value Variable value.
|
|
944
|
-
* @returns Returns `false` if the lookup failed.
|
|
945
|
-
*/
|
|
946
|
-
setVariable(name: keyof SetVariableConfigVariables, value: SetVariableConfigVariables[keyof SetVariableConfigVariables]): Promise<boolean>;
|
|
947
|
-
/**
|
|
948
|
-
* Get a configuration variable as integer.
|
|
949
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
950
|
-
* @returns {Promise<number>} Returns the value of the variable.
|
|
951
|
-
*/
|
|
952
|
-
getIntVariable(name: keyof SetVariableConfigVariables): Promise<number>;
|
|
953
|
-
/**
|
|
954
|
-
* Get a configuration variable as boolean (0/1).
|
|
955
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
956
|
-
* @returns {Promise<number>} Returns the value of the variable.
|
|
957
|
-
*/
|
|
958
|
-
getBoolVariable(name: keyof SetVariableConfigVariables): Promise<number>;
|
|
959
|
-
/**
|
|
960
|
-
* Get a configuration variable as double.
|
|
961
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
962
|
-
* @returns {Promise<number>} Returns the value of the variable.
|
|
963
|
-
*/
|
|
964
|
-
getDoubleVariable(name: keyof SetVariableConfigVariables): Promise<number>;
|
|
965
|
-
/**
|
|
966
|
-
* Get a configuration variable as string.
|
|
967
|
-
* @param {keyof SetVariableConfigVariables} name Variable name.
|
|
968
|
-
* @returns {Promise<string>} Returns the value of the variable.
|
|
969
|
-
*/
|
|
970
|
-
getStringVariable(name: keyof SetVariableConfigVariables): Promise<string>;
|
|
971
|
-
/**
|
|
972
|
-
* Set the image to be recognized.
|
|
973
|
-
* @param {Buffer<ArrayBuffer>} buffer Image data buffer.
|
|
974
|
-
* @returns {Promise<void>}
|
|
975
|
-
*/
|
|
976
|
-
setImage(buffer: Buffer<ArrayBuffer>): Promise<void>;
|
|
977
|
-
/**
|
|
978
|
-
* Restrict recognition to a rectangle.
|
|
979
|
-
* @param {TesseractSetRectangleOptions} options Rectangle options.
|
|
980
|
-
* @returns {Promise<void>}
|
|
981
|
-
*/
|
|
982
|
-
setRectangle(options: TesseractSetRectangleOptions): Promise<void>;
|
|
983
|
-
/**
|
|
984
|
-
* Set the source resolution in PPI.
|
|
985
|
-
* @param {number} ppi Source resolution in PPI.
|
|
986
|
-
* @returns {Promise<void>}
|
|
987
|
-
*/
|
|
988
|
-
setSourceResolution(ppi: number): Promise<void>;
|
|
989
|
-
/**
|
|
990
|
-
* @throws {Error} Will throw an error if the parameter at index 0 is not a function
|
|
991
|
-
* @param {(info: ProgressChangedInfo) => void} progressCallback Callback will be called to inform the user about progress changes
|
|
992
|
-
* @returns {Promise<void>}
|
|
993
|
-
*/
|
|
994
|
-
recognize(progressCallback: (info: ProgressChangedInfo) => void): Promise<void>;
|
|
995
|
-
/**
|
|
996
|
-
* Detect orientation and script (OSD).
|
|
997
|
-
* @returns {Promise<DetectOrientationScriptResult>}
|
|
998
|
-
*/
|
|
999
|
-
detectOrientationScript(): Promise<DetectOrientationScriptResult>;
|
|
1000
|
-
/**
|
|
1001
|
-
* Get mean text confidence.
|
|
1002
|
-
* @returns {Promise<number>} Returns the mean text confidence on resolve
|
|
1003
|
-
*/
|
|
1004
|
-
meanTextConf(): Promise<number>;
|
|
1005
|
-
/**
|
|
1006
|
-
* Get recognized text as UTF-8.
|
|
1007
|
-
* @returns {Promise<string>} Returns the recognized test as utf-8 on resolve
|
|
1008
|
-
*/
|
|
1009
|
-
getUTF8Text(): Promise<string>;
|
|
1010
|
-
/**
|
|
1011
|
-
* Get hOCR output.
|
|
1012
|
-
* @param {Function} progressCallback Optional progress callback.
|
|
1013
|
-
* @param {number} pageNumber Optional page number (0-based).
|
|
1014
|
-
* @returns {Promise<string>} Returns the `hOCR` upon resolve
|
|
1015
|
-
*/
|
|
1016
|
-
getHOCRText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
|
|
1017
|
-
/**
|
|
1018
|
-
* Get TSV output.
|
|
1019
|
-
* @returns {Promise<string>} Returns the `tsv` upon resolve
|
|
1020
|
-
*/
|
|
1021
|
-
getTSVText(): Promise<string>;
|
|
1022
|
-
/**
|
|
1023
|
-
* Get UNLV output.
|
|
1024
|
-
* @returns {Promise<string>} Returns the `unlv` upon resolve
|
|
1025
|
-
*/
|
|
1026
|
-
getUNLVText(): Promise<string>;
|
|
1027
|
-
/**
|
|
1028
|
-
* Get ALTO XML output.
|
|
1029
|
-
* @param {Function} progressCallback Optional progress callback.
|
|
1030
|
-
* @param {number} pageNumber Optional page number (0-based).
|
|
1031
|
-
* @returns {Promise<string>} Returns the `alto` upon resolve
|
|
1032
|
-
*/
|
|
1033
|
-
getALTOText(progressCallback?: (info: ProgressChangedInfo) => void, pageNumber?: number): Promise<string>;
|
|
1034
|
-
/**
|
|
1035
|
-
* Get languages used at initialization.
|
|
1036
|
-
* @returns {Promise<Language>} Returns the languages used when init was called
|
|
1037
|
-
*/
|
|
1038
|
-
getInitLanguages(): Promise<Language>;
|
|
1039
|
-
/**
|
|
1040
|
-
* Get languages currently loaded.
|
|
1041
|
-
* @returns {Promise<Language[]>} Returns the languages that were actually loaded by `init`
|
|
1042
|
-
*/
|
|
1043
|
-
getLoadedLanguages(): Promise<Language[]>;
|
|
1044
|
-
/**
|
|
1045
|
-
* Get available languages from tessdata.
|
|
1046
|
-
* NOTE: this only will return anything after `init` was called before with a valid selection of languages
|
|
1047
|
-
* @returns {Promise<Language[]>} Returns the languages that are available to tesseract.
|
|
1048
|
-
*/
|
|
1049
|
-
getAvailableLanguages(): Promise<Language[]>;
|
|
1050
|
-
/**
|
|
1051
|
-
* Clear internal recognition results/state.
|
|
1052
|
-
* @returns {Promise<void>}
|
|
1053
|
-
*/
|
|
1054
|
-
clear(): Promise<void>;
|
|
1055
|
-
/**
|
|
1056
|
-
* Release native resources and destroy the instance.
|
|
1057
|
-
* @returns {Promise<void>}
|
|
1058
|
-
*/
|
|
1059
|
-
end(): Promise<void>;
|
|
1060
|
-
}
|
|
1061
|
-
export type NativeTesseract = TesseractInstance;
|
|
1062
|
-
export type TesseractConstructor = new () => TesseractInstance;
|
|
1063
208
|
declare const NativeTesseract: TesseractConstructor;
|
|
1064
209
|
declare class Tesseract extends NativeTesseract {
|
|
210
|
+
document: TesseractDocumentApi;
|
|
1065
211
|
constructor();
|
|
1066
|
-
init(options
|
|
212
|
+
init(options?: TesseractInitOptions): Promise<void>;
|
|
213
|
+
ensureTrainingData({ lang, dataPath, cachePath, downloadBaseUrl }: EnsureTrainedDataOptions, progressCallback?: (info: TrainingDataDownloadProgress) => void): Promise<string>;
|
|
1067
214
|
}
|
|
1068
215
|
export { Tesseract, NativeTesseract };
|
|
1069
216
|
export default Tesseract;
|