PyPI - openocr-python - Versions diffs - 0.0.2__py3-none-any.whl - Mend

openocr-python 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (323) hide show

openocr/__init__.py +11 -0
openocr/configs/det/dbnet/repvit_db.yml +173 -0
openocr/configs/rec/abinet/resnet45_trans_abinet_lang.yml +94 -0
openocr/configs/rec/abinet/resnet45_trans_abinet_wo_lang.yml +93 -0
openocr/configs/rec/abinet/svtrv2_abinet_lang.yml +130 -0
openocr/configs/rec/abinet/svtrv2_abinet_wo_lang.yml +128 -0
openocr/configs/rec/aster/resnet31_lstm_aster_tps_on.yml +93 -0
openocr/configs/rec/aster/svtrv2_aster.yml +127 -0
openocr/configs/rec/aster/svtrv2_aster_tps_on.yml +102 -0
openocr/configs/rec/autostr/autostr_lstm_aster_tps_on.yml +95 -0
openocr/configs/rec/busnet/svtrv2_busnet.yml +135 -0
openocr/configs/rec/busnet/svtrv2_busnet_pretraining.yml +134 -0
openocr/configs/rec/busnet/vit_busnet.yml +104 -0
openocr/configs/rec/busnet/vit_busnet_pretraining.yml +104 -0
openocr/configs/rec/cam/convnextv2_cam_tps_on.yml +118 -0
openocr/configs/rec/cam/convnextv2_tiny_cam_tps_on.yml +118 -0
openocr/configs/rec/cam/svtrv2_cam_tps_on.yml +123 -0
openocr/configs/rec/cdistnet/resnet45_trans_cdistnet.yml +93 -0
openocr/configs/rec/cdistnet/svtrv2_cdistnet.yml +139 -0
openocr/configs/rec/cppd/svtr_base_cppd.yml +123 -0
openocr/configs/rec/cppd/svtr_base_cppd_ch.yml +126 -0
openocr/configs/rec/cppd/svtr_base_cppd_h8.yml +123 -0
openocr/configs/rec/cppd/svtr_base_cppd_syn.yml +124 -0
openocr/configs/rec/cppd/svtrv2_cppd.yml +150 -0
openocr/configs/rec/dan/resnet45_fpn_dan.yml +98 -0
openocr/configs/rec/dan/svtrv2_dan.yml +130 -0
openocr/configs/rec/focalsvtr/focalsvtr_ctc.yml +137 -0
openocr/configs/rec/gtc/svtrv2_lnconv_nrtr_gtc.yml +168 -0
openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_long_infer.yml +151 -0
openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_smtr_long.yml +150 -0
openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_stream.yml +152 -0
openocr/configs/rec/igtr/svtr_base_ds_igtr.yml +157 -0
openocr/configs/rec/lister/focalsvtr_lister_wo_fem_maxratio12.yml +133 -0
openocr/configs/rec/lister/svtrv2_lister_wo_fem_maxratio12.yml +138 -0
openocr/configs/rec/lpv/svtr_base_lpv.yml +124 -0
openocr/configs/rec/lpv/svtr_base_lpv_wo_glrm.yml +123 -0
openocr/configs/rec/lpv/svtrv2_lpv.yml +147 -0
openocr/configs/rec/lpv/svtrv2_lpv_wo_glrm.yml +146 -0
openocr/configs/rec/maerec/vit_nrtr.yml +116 -0
openocr/configs/rec/matrn/resnet45_trans_matrn.yml +95 -0
openocr/configs/rec/matrn/svtrv2_matrn.yml +130 -0
openocr/configs/rec/mgpstr/svtrv2_mgpstr_only_char.yml +140 -0
openocr/configs/rec/mgpstr/vit_base_mgpstr_only_char.yml +111 -0
openocr/configs/rec/mgpstr/vit_large_mgpstr_only_char.yml +110 -0
openocr/configs/rec/mgpstr/vit_mgpstr.yml +110 -0
openocr/configs/rec/mgpstr/vit_mgpstr_only_char.yml +110 -0
openocr/configs/rec/moran/resnet31_lstm_moran.yml +92 -0
openocr/configs/rec/nrtr/focalsvtr_nrtr_maxraio12.yml +145 -0
openocr/configs/rec/nrtr/nrtr.yml +107 -0
openocr/configs/rec/nrtr/svtr_base_nrtr.yml +118 -0
openocr/configs/rec/nrtr/svtr_base_nrtr_syn.yml +119 -0
openocr/configs/rec/nrtr/svtrv2_nrtr.yml +146 -0
openocr/configs/rec/ote/svtr_base_h8_ote.yml +117 -0
openocr/configs/rec/ote/svtr_base_ote.yml +116 -0
openocr/configs/rec/parseq/focalsvtr_parseq_maxratio12.yml +140 -0
openocr/configs/rec/parseq/svrtv2_parseq.yml +136 -0
openocr/configs/rec/parseq/vit_parseq.yml +100 -0
openocr/configs/rec/robustscanner/resnet31_robustscanner.yml +102 -0
openocr/configs/rec/robustscanner/svtrv2_robustscanner.yml +134 -0
openocr/configs/rec/sar/resnet31_lstm_sar.yml +94 -0
openocr/configs/rec/sar/svtrv2_sar.yml +128 -0
openocr/configs/rec/seed/resnet31_lstm_seed_tps_on.yml +96 -0
openocr/configs/rec/smtr/focalsvtr_smtr.yml +150 -0
openocr/configs/rec/smtr/focalsvtr_smtr_long.yml +133 -0
openocr/configs/rec/smtr/svtrv2_smtr.yml +150 -0
openocr/configs/rec/smtr/svtrv2_smtr_bi.yml +136 -0
openocr/configs/rec/srn/resnet50_fpn_srn.yml +97 -0
openocr/configs/rec/srn/svtrv2_srn.yml +131 -0
openocr/configs/rec/svtrs/convnextv2_ctc.yml +105 -0
openocr/configs/rec/svtrs/convnextv2_h8_ctc.yml +105 -0
openocr/configs/rec/svtrs/convnextv2_h8_rctc.yml +106 -0
openocr/configs/rec/svtrs/convnextv2_rctc.yml +106 -0
openocr/configs/rec/svtrs/convnextv2_tiny_h8_ctc.yml +105 -0
openocr/configs/rec/svtrs/convnextv2_tiny_h8_rctc.yml +106 -0
openocr/configs/rec/svtrs/crnn_ctc.yml +99 -0
openocr/configs/rec/svtrs/crnn_ctc_long.yml +116 -0
openocr/configs/rec/svtrs/focalnet_base_ctc.yml +108 -0
openocr/configs/rec/svtrs/focalnet_base_rctc.yml +109 -0
openocr/configs/rec/svtrs/focalsvtr_ctc.yml +106 -0
openocr/configs/rec/svtrs/focalsvtr_rctc.yml +107 -0
openocr/configs/rec/svtrs/resnet45_trans_ctc.yml +103 -0
openocr/configs/rec/svtrs/resnet45_trans_rctc.yml +104 -0
openocr/configs/rec/svtrs/svtr_base_ctc.yml +110 -0
openocr/configs/rec/svtrs/svtr_base_rctc.yml +111 -0
openocr/configs/rec/svtrs/svtrnet_ctc_syn.yml +111 -0
openocr/configs/rec/svtrs/vit_ctc.yml +103 -0
openocr/configs/rec/svtrs/vit_rctc.yml +103 -0
openocr/configs/rec/svtrv2/repsvtr_ch.yml +121 -0
openocr/configs/rec/svtrv2/svtrv2_ch.yml +133 -0
openocr/configs/rec/svtrv2/svtrv2_ctc.yml +136 -0
openocr/configs/rec/svtrv2/svtrv2_rctc.yml +135 -0
openocr/configs/rec/svtrv2/svtrv2_small_rctc.yml +135 -0
openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc.yml +162 -0
openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc_ch.yml +153 -0
openocr/configs/rec/svtrv2/svtrv2_tiny_rctc.yml +135 -0
openocr/configs/rec/visionlan/resnet45_trans_visionlan_LA.yml +103 -0
openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_1.yml +102 -0
openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_2.yml +103 -0
openocr/configs/rec/visionlan/svtrv2_visionlan_LA.yml +112 -0
openocr/configs/rec/visionlan/svtrv2_visionlan_LF_1.yml +111 -0
openocr/configs/rec/visionlan/svtrv2_visionlan_LF_2.yml +112 -0
openocr/demo_gradio.py +128 -0
openocr/opendet/modeling/__init__.py +11 -0
openocr/opendet/modeling/backbones/__init__.py +14 -0
openocr/opendet/modeling/backbones/repvit.py +340 -0
openocr/opendet/modeling/base_detector.py +69 -0
openocr/opendet/modeling/heads/__init__.py +14 -0
openocr/opendet/modeling/heads/db_head.py +73 -0
openocr/opendet/modeling/necks/__init__.py +14 -0
openocr/opendet/modeling/necks/db_fpn.py +609 -0
openocr/opendet/postprocess/__init__.py +18 -0
openocr/opendet/postprocess/db_postprocess.py +273 -0
openocr/opendet/preprocess/__init__.py +154 -0
openocr/opendet/preprocess/crop_resize.py +121 -0
openocr/opendet/preprocess/db_resize_for_test.py +135 -0
openocr/openrec/losses/__init__.py +62 -0
openocr/openrec/losses/abinet_loss.py +42 -0
openocr/openrec/losses/ar_loss.py +23 -0
openocr/openrec/losses/cam_loss.py +48 -0
openocr/openrec/losses/cdistnet_loss.py +34 -0
openocr/openrec/losses/ce_loss.py +68 -0
openocr/openrec/losses/cppd_loss.py +77 -0
openocr/openrec/losses/ctc_loss.py +33 -0
openocr/openrec/losses/igtr_loss.py +12 -0
openocr/openrec/losses/lister_loss.py +14 -0
openocr/openrec/losses/lpv_loss.py +30 -0
openocr/openrec/losses/mgp_loss.py +34 -0
openocr/openrec/losses/parseq_loss.py +12 -0
openocr/openrec/losses/robustscanner_loss.py +20 -0
openocr/openrec/losses/seed_loss.py +46 -0
openocr/openrec/losses/smtr_loss.py +12 -0
openocr/openrec/losses/srn_loss.py +40 -0
openocr/openrec/losses/visionlan_loss.py +58 -0
openocr/openrec/metrics/__init__.py +19 -0
openocr/openrec/metrics/rec_metric.py +270 -0
openocr/openrec/metrics/rec_metric_gtc.py +58 -0
openocr/openrec/metrics/rec_metric_long.py +142 -0
openocr/openrec/metrics/rec_metric_mgp.py +93 -0
openocr/openrec/modeling/__init__.py +11 -0
openocr/openrec/modeling/base_recognizer.py +69 -0
openocr/openrec/modeling/common.py +238 -0
openocr/openrec/modeling/decoders/__init__.py +109 -0
openocr/openrec/modeling/decoders/abinet_decoder.py +283 -0
openocr/openrec/modeling/decoders/aster_decoder.py +170 -0
openocr/openrec/modeling/decoders/bus_decoder.py +133 -0
openocr/openrec/modeling/decoders/cam_decoder.py +43 -0
openocr/openrec/modeling/decoders/cdistnet_decoder.py +334 -0
openocr/openrec/modeling/decoders/cppd_decoder.py +393 -0
openocr/openrec/modeling/decoders/ctc_decoder.py +203 -0
openocr/openrec/modeling/decoders/dan_decoder.py +203 -0
openocr/openrec/modeling/decoders/igtr_decoder.py +815 -0
openocr/openrec/modeling/decoders/lister_decoder.py +535 -0
openocr/openrec/modeling/decoders/lpv_decoder.py +119 -0
openocr/openrec/modeling/decoders/matrn_decoder.py +236 -0
openocr/openrec/modeling/decoders/mgp_decoder.py +99 -0
openocr/openrec/modeling/decoders/nrtr_decoder.py +439 -0
openocr/openrec/modeling/decoders/ote_decoder.py +205 -0
openocr/openrec/modeling/decoders/parseq_decoder.py +504 -0
openocr/openrec/modeling/decoders/rctc_decoder.py +70 -0
openocr/openrec/modeling/decoders/robustscanner_decoder.py +749 -0
openocr/openrec/modeling/decoders/sar_decoder.py +236 -0
openocr/openrec/modeling/decoders/smtr_decoder.py +621 -0
openocr/openrec/modeling/decoders/smtr_decoder_nattn.py +521 -0
openocr/openrec/modeling/decoders/srn_decoder.py +283 -0
openocr/openrec/modeling/decoders/visionlan_decoder.py +321 -0
openocr/openrec/modeling/encoders/__init__.py +39 -0
openocr/openrec/modeling/encoders/autostr_encoder.py +327 -0
openocr/openrec/modeling/encoders/cam_encoder.py +760 -0
openocr/openrec/modeling/encoders/convnextv2.py +213 -0
openocr/openrec/modeling/encoders/focalsvtr.py +631 -0
openocr/openrec/modeling/encoders/nrtr_encoder.py +28 -0
openocr/openrec/modeling/encoders/rec_hgnet.py +346 -0
openocr/openrec/modeling/encoders/rec_lcnetv3.py +488 -0
openocr/openrec/modeling/encoders/rec_mobilenet_v3.py +132 -0
openocr/openrec/modeling/encoders/rec_mv1_enhance.py +254 -0
openocr/openrec/modeling/encoders/rec_nrtr_mtb.py +37 -0
openocr/openrec/modeling/encoders/rec_resnet_31.py +213 -0
openocr/openrec/modeling/encoders/rec_resnet_45.py +183 -0
openocr/openrec/modeling/encoders/rec_resnet_fpn.py +216 -0
openocr/openrec/modeling/encoders/rec_resnet_vd.py +252 -0
openocr/openrec/modeling/encoders/repvit.py +338 -0
openocr/openrec/modeling/encoders/resnet31_rnn.py +123 -0
openocr/openrec/modeling/encoders/svtrnet.py +574 -0
openocr/openrec/modeling/encoders/svtrnet2dpos.py +616 -0
openocr/openrec/modeling/encoders/svtrv2.py +470 -0
openocr/openrec/modeling/encoders/svtrv2_lnconv.py +503 -0
openocr/openrec/modeling/encoders/svtrv2_lnconv_two33.py +517 -0
openocr/openrec/modeling/encoders/vit.py +120 -0
openocr/openrec/modeling/transforms/__init__.py +15 -0
openocr/openrec/modeling/transforms/aster_tps.py +262 -0
openocr/openrec/modeling/transforms/moran.py +136 -0
openocr/openrec/modeling/transforms/tps.py +246 -0
openocr/openrec/optimizer/__init__.py +73 -0
openocr/openrec/optimizer/lr.py +227 -0
openocr/openrec/postprocess/__init__.py +72 -0
openocr/openrec/postprocess/abinet_postprocess.py +37 -0
openocr/openrec/postprocess/ar_postprocess.py +63 -0
openocr/openrec/postprocess/ce_postprocess.py +43 -0
openocr/openrec/postprocess/char_postprocess.py +108 -0
openocr/openrec/postprocess/cppd_postprocess.py +42 -0
openocr/openrec/postprocess/ctc_postprocess.py +119 -0
openocr/openrec/postprocess/igtr_postprocess.py +100 -0
openocr/openrec/postprocess/lister_postprocess.py +59 -0
openocr/openrec/postprocess/mgp_postprocess.py +143 -0
openocr/openrec/postprocess/nrtr_postprocess.py +75 -0
openocr/openrec/postprocess/smtr_postprocess.py +73 -0
openocr/openrec/postprocess/srn_postprocess.py +80 -0
openocr/openrec/postprocess/visionlan_postprocess.py +81 -0
openocr/openrec/preprocess/__init__.py +173 -0
openocr/openrec/preprocess/abinet_aug.py +473 -0
openocr/openrec/preprocess/abinet_label_encode.py +36 -0
openocr/openrec/preprocess/ar_label_encode.py +36 -0
openocr/openrec/preprocess/auto_augment.py +1012 -0
openocr/openrec/preprocess/cam_label_encode.py +141 -0
openocr/openrec/preprocess/ce_label_encode.py +116 -0
openocr/openrec/preprocess/char_label_encode.py +36 -0
openocr/openrec/preprocess/cppd_label_encode.py +173 -0
openocr/openrec/preprocess/ctc_label_encode.py +124 -0
openocr/openrec/preprocess/ep_label_encode.py +38 -0
openocr/openrec/preprocess/igtr_label_encode.py +360 -0
openocr/openrec/preprocess/mgp_label_encode.py +95 -0
openocr/openrec/preprocess/parseq_aug.py +150 -0
openocr/openrec/preprocess/rec_aug.py +211 -0
openocr/openrec/preprocess/resize.py +534 -0
openocr/openrec/preprocess/smtr_label_encode.py +125 -0
openocr/openrec/preprocess/srn_label_encode.py +37 -0
openocr/openrec/preprocess/visionlan_label_encode.py +67 -0
openocr/tools/create_lmdb_dataset.py +118 -0
openocr/tools/data/__init__.py +94 -0
openocr/tools/data/collate_fn.py +100 -0
openocr/tools/data/lmdb_dataset.py +142 -0
openocr/tools/data/lmdb_dataset_test.py +166 -0
openocr/tools/data/multi_scale_sampler.py +177 -0
openocr/tools/data/ratio_dataset.py +217 -0
openocr/tools/data/ratio_dataset_test.py +273 -0
openocr/tools/data/ratio_dataset_tvresize.py +213 -0
openocr/tools/data/ratio_dataset_tvresize_test.py +276 -0
openocr/tools/data/ratio_sampler.py +190 -0
openocr/tools/data/simple_dataset.py +263 -0
openocr/tools/data/strlmdb_dataset.py +143 -0
openocr/tools/engine/__init__.py +5 -0
openocr/tools/engine/config.py +158 -0
openocr/tools/engine/trainer.py +621 -0
openocr/tools/eval_rec.py +41 -0
openocr/tools/eval_rec_all_ch.py +184 -0
openocr/tools/eval_rec_all_en.py +206 -0
openocr/tools/eval_rec_all_long.py +119 -0
openocr/tools/eval_rec_all_long_simple.py +122 -0
openocr/tools/export_rec.py +118 -0
openocr/tools/infer/onnx_engine.py +65 -0
openocr/tools/infer/predict_rec.py +140 -0
openocr/tools/infer/utility.py +234 -0
openocr/tools/infer_det.py +449 -0
openocr/tools/infer_e2e.py +462 -0
openocr/tools/infer_e2e_parallel.py +184 -0
openocr/tools/infer_rec.py +371 -0
openocr/tools/train_rec.py +37 -0
openocr/tools/utility.py +45 -0
openocr/tools/utils/EN_symbol_dict.txt +94 -0
openocr/tools/utils/__init__.py +0 -0
openocr/tools/utils/ckpt.py +87 -0
openocr/tools/utils/dict/ar_dict.txt +117 -0
openocr/tools/utils/dict/arabic_dict.txt +161 -0
openocr/tools/utils/dict/be_dict.txt +145 -0
openocr/tools/utils/dict/bg_dict.txt +140 -0
openocr/tools/utils/dict/chinese_cht_dict.txt +8421 -0
openocr/tools/utils/dict/cyrillic_dict.txt +163 -0
openocr/tools/utils/dict/devanagari_dict.txt +167 -0
openocr/tools/utils/dict/en_dict.txt +63 -0
openocr/tools/utils/dict/fa_dict.txt +136 -0
openocr/tools/utils/dict/french_dict.txt +136 -0
openocr/tools/utils/dict/german_dict.txt +143 -0
openocr/tools/utils/dict/hi_dict.txt +162 -0
openocr/tools/utils/dict/it_dict.txt +118 -0
openocr/tools/utils/dict/japan_dict.txt +4399 -0
openocr/tools/utils/dict/ka_dict.txt +153 -0
openocr/tools/utils/dict/kie_dict/xfund_class_list.txt +4 -0
openocr/tools/utils/dict/korean_dict.txt +3688 -0
openocr/tools/utils/dict/latex_symbol_dict.txt +111 -0
openocr/tools/utils/dict/latin_dict.txt +185 -0
openocr/tools/utils/dict/layout_dict/layout_cdla_dict.txt +10 -0
openocr/tools/utils/dict/layout_dict/layout_publaynet_dict.txt +5 -0
openocr/tools/utils/dict/layout_dict/layout_table_dict.txt +1 -0
openocr/tools/utils/dict/mr_dict.txt +153 -0
openocr/tools/utils/dict/ne_dict.txt +153 -0
openocr/tools/utils/dict/oc_dict.txt +96 -0
openocr/tools/utils/dict/pu_dict.txt +130 -0
openocr/tools/utils/dict/rs_dict.txt +91 -0
openocr/tools/utils/dict/rsc_dict.txt +134 -0
openocr/tools/utils/dict/ru_dict.txt +125 -0
openocr/tools/utils/dict/spin_dict.txt +68 -0
openocr/tools/utils/dict/ta_dict.txt +128 -0
openocr/tools/utils/dict/table_dict.txt +277 -0
openocr/tools/utils/dict/table_master_structure_dict.txt +39 -0
openocr/tools/utils/dict/table_structure_dict.txt +28 -0
openocr/tools/utils/dict/table_structure_dict_ch.txt +48 -0
openocr/tools/utils/dict/te_dict.txt +151 -0
openocr/tools/utils/dict/ug_dict.txt +114 -0
openocr/tools/utils/dict/uk_dict.txt +142 -0
openocr/tools/utils/dict/ur_dict.txt +137 -0
openocr/tools/utils/dict/xi_dict.txt +110 -0
openocr/tools/utils/dict90.txt +90 -0
openocr/tools/utils/e2e_metric/Deteval.py +802 -0
openocr/tools/utils/e2e_metric/polygon_fast.py +70 -0
openocr/tools/utils/e2e_utils/extract_batchsize.py +86 -0
openocr/tools/utils/e2e_utils/extract_textpoint_fast.py +479 -0
openocr/tools/utils/e2e_utils/extract_textpoint_slow.py +582 -0
openocr/tools/utils/e2e_utils/pgnet_pp_utils.py +159 -0
openocr/tools/utils/e2e_utils/visual.py +152 -0
openocr/tools/utils/en_dict.txt +95 -0
openocr/tools/utils/gen_label.py +68 -0
openocr/tools/utils/ic15_dict.txt +36 -0
openocr/tools/utils/logging.py +56 -0
openocr/tools/utils/poly_nms.py +132 -0
openocr/tools/utils/ppocr_keys_v1.txt +6623 -0
openocr/tools/utils/stats.py +58 -0
openocr/tools/utils/utility.py +165 -0
openocr/tools/utils/visual.py +117 -0
openocr_python-0.0.2.dist-info/LICENCE +201 -0
openocr_python-0.0.2.dist-info/METADATA +98 -0
openocr_python-0.0.2.dist-info/RECORD +323 -0
openocr_python-0.0.2.dist-info/WHEEL +5 -0
openocr_python-0.0.2.dist-info/top_level.txt +1 -0

openocr/tools/infer_det.py ADDED Viewed

@@ -0,0 +1,449 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from pathlib import Path
+import time
+import numpy as np
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ['FLAGS_allocator_strategy'] = 'auto_growth'
+import cv2
+import json
+import torch
+from tools.engine import Config
+from tools.utility import ArgsParser
+from tools.utils.ckpt import load_ckpt
+from tools.utils.logging import get_logger
+from tools.utils.utility import get_image_file_list
+logger = get_logger()
+root_dir = Path(__file__).resolve().parent
+DEFAULT_CFG_PATH_DET = str(root_dir / '../configs/det/dbnet/repvit_db.yml')
+MODEL_NAME_DET = './openocr_det_repvit_ch.pth'  # 模型文件名称
+DOWNLOAD_URL_DET = 'https://github.com/Topdu/OpenOCR/releases/download/develop0.0.1/openocr_det_repvit_ch.pth'  # 模型文件 URL
+def check_and_download_model(model_name: str, url: str):
+    """
+    检查预训练模型是否存在，若不存在则从指定 URL 下载到固定缓存目录。
+    Args:
+        model_name (str): 模型文件的名称，例如 "model.pt"
+        url (str): 模型文件的下载地址
+    Returns:
+        str: 模型文件的完整路径
+    """
+    if os.path.exists(model_name):
+        return model_name
+    # 固定缓存路径为用户主目录下的 ".cache/openocr"
+    cache_dir = Path.home() / '.cache' / 'openocr'
+    model_path = cache_dir / model_name
+    # 如果模型文件已存在，直接返回路径
+    if model_path.exists():
+        logger.info(f'Model already exists at: {model_path}')
+        return str(model_path)
+    # 如果文件不存在，下载模型
+    logger.info(f'Model not found. Downloading from {url}...')
+    # 创建缓存目录（如果不存在）
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    try:
+        # 下载文件
+        import urllib.request
+        with urllib.request.urlopen(url) as response, open(model_path,
+                                                           'wb') as out_file:
+            out_file.write(response.read())
+        logger.info(f'Model downloaded and saved at: {model_path}')
+        return str(model_path)
+    except Exception as e:
+        logger.info(f'Error downloading the model: {e}')
+        raise
+def replace_batchnorm(net):
+    for child_name, child in net.named_children():
+        if hasattr(child, 'fuse'):
+            fused = child.fuse()
+            setattr(net, child_name, fused)
+            replace_batchnorm(fused)
+        elif isinstance(child, torch.nn.BatchNorm2d):
+            setattr(net, child_name, torch.nn.Identity())
+        else:
+            replace_batchnorm(child)
+def padding_image(img, size=(640, 640)):
+    """
+    Padding an image using OpenCV:
+    - If the image is smaller than the target size, pad it to 640x640.
+    - If the image is larger than the target size, split it into multiple 640x640 images and record positions.
+    :param image_path: Path to the input image.
+    :param output_dir: Directory to save the output images.
+    :param size: The target size for padding or splitting (default 640x640).
+    :return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image.
+    """
+    img_height, img_width = img.shape[:2]
+    target_width, target_height = size
+    # If image is smaller than target size, pad the image to 640x640
+    # Calculate padding amounts (top, bottom, left, right)
+    pad_top = 0
+    pad_bottom = target_height - img_height
+    pad_left = 0
+    pad_right = target_width - img_width
+    # Pad the image (white padding, border type: constant)
+    padded_img = cv2.copyMakeBorder(img,
+                                    pad_top,
+                                    pad_bottom,
+                                    pad_left,
+                                    pad_right,
+                                    cv2.BORDER_CONSTANT,
+                                    value=[0, 0, 0])
+    # Return the padded area positions (top-left and bottom-right coordinates of the original image)
+    return padded_img
+def resize_image(img, size=(640, 640), over_lap=64):
+    """
+    Resize an image using OpenCV:
+    - If the image is smaller than the target size, pad it to 640x640.
+    - If the image is larger than the target size, split it into multiple 640x640 images and record positions.
+    :param image_path: Path to the input image.
+    :param output_dir: Directory to save the output images.
+    :param size: The target size for padding or splitting (default 640x640).
+    :return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image.
+    """
+    img_height, img_width = img.shape[:2]
+    target_width, target_height = size
+    # If image is smaller than target size, pad the image to 640x640
+    if img_width <= target_width and img_height <= target_height:
+        # Calculate padding amounts (top, bottom, left, right)
+        if img_width == target_width and img_height == target_height:
+            return [img], [[0, 0, img_width, img_height]]
+        padded_img = padding_image(img, size)
+        # Return the padded area positions (top-left and bottom-right coordinates of the original image)
+        return [padded_img], [[0, 0, img_width, img_height]]
+    img_height, img_width = img.shape[:2]
+    # If image is larger than or equal to target size, crop it into 640x640 tiles
+    crop_positions = []
+    count = 0
+    cropped_img_list = []
+    for top in range(0, img_height - over_lap, target_height - over_lap):
+        for left in range(0, img_width - over_lap, target_width - over_lap):
+            # Calculate the bottom and right boundaries for the crop
+            right = min(left + target_width, img_width)
+            bottom = min(top + target_height, img_height)
+            if right >= img_width:
+                right = img_width
+                left = max(0, right - target_width)
+            if bottom >= img_height:
+                bottom = img_height
+                top = max(0, bottom - target_height)
+            # Crop the image
+            cropped_img = img[top:bottom, left:right]
+            if bottom - top < target_height or right - left < target_width:
+                cropped_img = padding_image(cropped_img, size)
+            count += 1
+            cropped_img_list.append(cropped_img)
+            # Record the position of the cropped image
+            crop_positions.append([left, top, right, bottom])
+    return cropped_img_list, crop_positions
+def restore_preds(preds, crop_positions, original_size):
+    restored_pred = torch.zeros((1, 1, original_size[0], original_size[1]),
+                                dtype=preds.dtype,
+                                device=preds.device)
+    count = 0
+    for cropped_pred, (left, top, right, bottom) in zip(preds, crop_positions):
+        crop_height = bottom - top
+        crop_width = right - left
+        corp_vis_img = cropped_pred[:, :crop_height, :crop_width]
+        mask = corp_vis_img > 0.3
+        count += 1
+        restored_pred[:, :, top:top + crop_height, left:left +
+                      crop_width] += mask[:, :crop_height, :crop_width].to(
+                          preds.dtype)
+    return restored_pred
+def draw_det_res(dt_boxes, img, img_name, save_path):
+    src_im = img
+    for box in dt_boxes:
+        box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
+        cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
+    if not os.path.exists(save_path):
+        os.makedirs(save_path)
+    save_path = os.path.join(save_path, os.path.basename(img_name))
+    cv2.imwrite(save_path, src_im)
+def set_device(device, numId=0):
+    if device == 'gpu' and torch.cuda.is_available():
+        device = torch.device(f'cuda:{numId}')
+    else:
+        device = torch.device('cpu')
+    return device
+class OpenDetector(object):
+    def __init__(self, config=None, numId=0):
+        """
+        初始化函数。
+        Args:
+            config (dict, optional): 配置文件，默认为None。如果为None，则使用默认配置文件。
+            numId (int, optional): 设备编号，默认为0。
+        Returns:
+            None
+        Raises:
+            无
+        """
+        if config is None:
+            config = Config(DEFAULT_CFG_PATH_DET).cfg
+            config['Global']['pretrained_model'] = check_and_download_model(
+                MODEL_NAME_DET, DOWNLOAD_URL_DET)
+        from opendet.modeling import build_model as build_det_model
+        from opendet.postprocess import build_post_process
+        from opendet.preprocess import create_operators, transform
+        self.transform = transform
+        global_config = config['Global']
+        # build model
+        self.model = build_det_model(config['Architecture'])
+        self.model.eval()
+        load_ckpt(self.model, config)
+        replace_batchnorm(self.model.backbone)
+        self.device = set_device(config['Global']['device'], numId=numId)
+        self.model.to(device=self.device)
+        # create data ops
+        transforms = []
+        for op in config['Eval']['dataset']['transforms']:
+            op_name = list(op)[0]
+            if 'Label' in op_name:
+                continue
+            elif op_name == 'KeepKeys':
+                op[op_name]['keep_keys'] = ['image', 'shape']
+            transforms.append(op)
+        self.ops = create_operators(transforms, global_config)
+        # build post process
+        self.post_process_class = build_post_process(config['PostProcess'],
+                                                     global_config)
+    def crop_infer(
+        self,
+        img_path=None,
+        img_numpy_list=None,
+        img_numpy=None,
+    ):
+        if img_numpy is not None:
+            img_numpy_list = [img_numpy]
+            num_img = 1
+        elif img_path is not None:
+            num_img = len(img_path)
+        elif img_numpy_list is not None:
+            num_img = len(img_numpy_list)
+        else:
+            raise Exception('No input image path or numpy array.')
+        results = []
+        for img_idx in range(num_img):
+            if img_numpy_list is not None:
+                img = img_numpy_list[img_idx]
+                data = {'image': img}
+            elif img_path is not None:
+                with open(img_path[img_idx], 'rb') as f:
+                    img = f.read()
+                    data = {'image': img}
+                data = self.transform(data, self.ops[:1])
+            src_img_ori = data['image']
+            img_height, img_width = src_img_ori.shape[:2]
+            target_size = 640
+            over_lap = 64
+            if img_height > img_width:
+                r_h = target_size * 2 - over_lap
+                r_w = img_width * (target_size * 2 - over_lap) // img_height
+            else:
+                r_w = target_size * 2 - over_lap
+                r_h = img_height * (target_size * 2 - over_lap) // img_width
+            src_img = cv2.resize(src_img_ori, (r_w, r_h))
+            shape_list_ori = np.array([[
+                img_height, img_width,
+                float(r_h) / img_height,
+                float(r_w) / img_width
+            ]])
+            img_height, img_width = src_img.shape[:2]
+            cropped_img_list, crop_positions = resize_image(src_img,
+                                                            size=(target_size,
+                                                                  target_size),
+                                                            over_lap=over_lap)
+            image_list = []
+            shape_list = []
+            for img in cropped_img_list:
+                batch_i = self.transform({'image': img}, self.ops[-3:-1])
+                image_list.append(batch_i['image'])
+                shape_list.append([640, 640, 1, 1])
+            images = np.array(image_list)
+            shape_list = np.array(shape_list)
+            images = torch.from_numpy(images).to(device=self.device)
+            t_start = time.time()
+            preds = self.model(images)
+            torch.cuda.synchronize()
+            t_cost = time.time() - t_start
+            preds['maps'] = restore_preds(preds['maps'], crop_positions,
+                                          (img_height, img_width))
+            post_result = self.post_process_class(preds, shape_list_ori)
+            info = {'boxes': post_result[0]['points'], 'elapse': t_cost}
+            results.append(info)
+        return results
+    def __call__(self,
+                 img_path=None,
+                 img_numpy_list=None,
+                 img_numpy=None,
+                 return_mask=False):
+        """
+        对输入图像进行处理，并返回处理结果。
+        Args:
+            img_path (str, optional): 图像文件路径。默认为 None。
+            img_numpy_list (list, optional): 图像数据列表，每个元素为 numpy 数组。默认为 None。
+            img_numpy (numpy.ndarray, optional): 图像数据，numpy 数组格式。默认为 None。
+        Returns:
+            list: 包含处理结果的列表。每个元素为一个字典，包含 'boxes' 和 'elapse' 两个键。
+                'boxes' 的值为检测到的目标框点集，'elapse' 的值为处理时间。
+        Raises:
+            Exception: 若没有提供图像路径或 numpy 数组，则抛出异常。
+        """
+        if img_numpy is not None:
+            img_numpy_list = [img_numpy]
+            num_img = 1
+        elif img_path is not None:
+            img_path = get_image_file_list(img_path)
+            num_img = len(img_path)
+        elif img_numpy_list is not None:
+            num_img = len(img_numpy_list)
+        else:
+            raise Exception('No input image path or numpy array.')
+        results = []
+        for img_idx in range(num_img):
+            if img_numpy_list is not None:
+                img = img_numpy_list[img_idx]
+                data = {'image': img}
+            elif img_path is not None:
+                with open(img_path[img_idx], 'rb') as f:
+                    img = f.read()
+                    data = {'image': img}
+                data = self.transform(data, self.ops[:1])
+            batch = self.transform(data, self.ops[1:])
+            images = np.expand_dims(batch[0], axis=0)
+            shape_list = np.expand_dims(batch[1], axis=0)
+            images = torch.from_numpy(images).to(device=self.device)
+            with torch.no_grad():
+                t_start = time.time()
+                preds = self.model(images)
+                t_cost = time.time() - t_start
+            post_result = self.post_process_class(preds, shape_list)
+            info = {'boxes': post_result[0]['points'], 'elapse': t_cost}
+            if return_mask:
+                if isinstance(preds['maps'], torch.Tensor):
+                    mask = preds['maps'].detach().cpu().numpy()
+                else:
+                    mask = preds['maps']
+                info['mask'] = mask
+            results.append(info)
+        return results
+@torch.no_grad()
+def main(cfg):
+    is_visualize = cfg['Global'].get('is_visualize', False)
+    model = OpenDetector(cfg)
+    save_res_path = cfg['Global']['output_dir']
+    if not os.path.exists(save_res_path):
+        os.makedirs(save_res_path)
+    sample_num = 0
+    with open(save_res_path + '/det_results.txt', 'wb') as fout:
+        for file in get_image_file_list(cfg['Global']['infer_img']):
+            preds_result = model(img_path=file)[0]
+            logger.info('{} infer_img: {}, time cost: {}'.format(
+                sample_num, file, preds_result['elapse']))
+            boxes = preds_result['boxes']
+            dt_boxes_json = []
+            for box in boxes:
+                tmp_json = {}
+                tmp_json['points'] = np.array(box).tolist()
+                dt_boxes_json.append(tmp_json)
+            if is_visualize:
+                src_img = cv2.imread(file)
+                save_det_path = save_res_path + '/det_results/'
+                draw_det_res(boxes, src_img, file, save_det_path)
+                logger.info('The detected Image saved in {}'.format(
+                    os.path.join(save_det_path, os.path.basename(file))))
+            otstr = file + '\t' + json.dumps(dt_boxes_json) + '\n'
+            logger.info('results: {}'.format(json.dumps(dt_boxes_json)))
+            fout.write(otstr.encode())
+            sample_num += 1
+    logger.info('success!')
+if __name__ == '__main__':
+    FLAGS = ArgsParser().parse_args()
+    cfg = Config(FLAGS.config)
+    FLAGS = vars(FLAGS)
+    opt = FLAGS.pop('opt')
+    cfg.merge_dict(FLAGS)
+    cfg.merge_dict(opt)
+    main(cfg.cfg)