openocr-python 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openocr/__init__.py +11 -0
- openocr/configs/det/dbnet/repvit_db.yml +173 -0
- openocr/configs/rec/abinet/resnet45_trans_abinet_lang.yml +94 -0
- openocr/configs/rec/abinet/resnet45_trans_abinet_wo_lang.yml +93 -0
- openocr/configs/rec/abinet/svtrv2_abinet_lang.yml +130 -0
- openocr/configs/rec/abinet/svtrv2_abinet_wo_lang.yml +128 -0
- openocr/configs/rec/aster/resnet31_lstm_aster_tps_on.yml +93 -0
- openocr/configs/rec/aster/svtrv2_aster.yml +127 -0
- openocr/configs/rec/aster/svtrv2_aster_tps_on.yml +102 -0
- openocr/configs/rec/autostr/autostr_lstm_aster_tps_on.yml +95 -0
- openocr/configs/rec/busnet/svtrv2_busnet.yml +135 -0
- openocr/configs/rec/busnet/svtrv2_busnet_pretraining.yml +134 -0
- openocr/configs/rec/busnet/vit_busnet.yml +104 -0
- openocr/configs/rec/busnet/vit_busnet_pretraining.yml +104 -0
- openocr/configs/rec/cam/convnextv2_cam_tps_on.yml +118 -0
- openocr/configs/rec/cam/convnextv2_tiny_cam_tps_on.yml +118 -0
- openocr/configs/rec/cam/svtrv2_cam_tps_on.yml +123 -0
- openocr/configs/rec/cdistnet/resnet45_trans_cdistnet.yml +93 -0
- openocr/configs/rec/cdistnet/svtrv2_cdistnet.yml +139 -0
- openocr/configs/rec/cppd/svtr_base_cppd.yml +123 -0
- openocr/configs/rec/cppd/svtr_base_cppd_ch.yml +126 -0
- openocr/configs/rec/cppd/svtr_base_cppd_h8.yml +123 -0
- openocr/configs/rec/cppd/svtr_base_cppd_syn.yml +124 -0
- openocr/configs/rec/cppd/svtrv2_cppd.yml +150 -0
- openocr/configs/rec/dan/resnet45_fpn_dan.yml +98 -0
- openocr/configs/rec/dan/svtrv2_dan.yml +130 -0
- openocr/configs/rec/focalsvtr/focalsvtr_ctc.yml +137 -0
- openocr/configs/rec/gtc/svtrv2_lnconv_nrtr_gtc.yml +168 -0
- openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_long_infer.yml +151 -0
- openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_smtr_long.yml +150 -0
- openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_stream.yml +152 -0
- openocr/configs/rec/igtr/svtr_base_ds_igtr.yml +157 -0
- openocr/configs/rec/lister/focalsvtr_lister_wo_fem_maxratio12.yml +133 -0
- openocr/configs/rec/lister/svtrv2_lister_wo_fem_maxratio12.yml +138 -0
- openocr/configs/rec/lpv/svtr_base_lpv.yml +124 -0
- openocr/configs/rec/lpv/svtr_base_lpv_wo_glrm.yml +123 -0
- openocr/configs/rec/lpv/svtrv2_lpv.yml +147 -0
- openocr/configs/rec/lpv/svtrv2_lpv_wo_glrm.yml +146 -0
- openocr/configs/rec/maerec/vit_nrtr.yml +116 -0
- openocr/configs/rec/matrn/resnet45_trans_matrn.yml +95 -0
- openocr/configs/rec/matrn/svtrv2_matrn.yml +130 -0
- openocr/configs/rec/mgpstr/svtrv2_mgpstr_only_char.yml +140 -0
- openocr/configs/rec/mgpstr/vit_base_mgpstr_only_char.yml +111 -0
- openocr/configs/rec/mgpstr/vit_large_mgpstr_only_char.yml +110 -0
- openocr/configs/rec/mgpstr/vit_mgpstr.yml +110 -0
- openocr/configs/rec/mgpstr/vit_mgpstr_only_char.yml +110 -0
- openocr/configs/rec/moran/resnet31_lstm_moran.yml +92 -0
- openocr/configs/rec/nrtr/focalsvtr_nrtr_maxraio12.yml +145 -0
- openocr/configs/rec/nrtr/nrtr.yml +107 -0
- openocr/configs/rec/nrtr/svtr_base_nrtr.yml +118 -0
- openocr/configs/rec/nrtr/svtr_base_nrtr_syn.yml +119 -0
- openocr/configs/rec/nrtr/svtrv2_nrtr.yml +146 -0
- openocr/configs/rec/ote/svtr_base_h8_ote.yml +117 -0
- openocr/configs/rec/ote/svtr_base_ote.yml +116 -0
- openocr/configs/rec/parseq/focalsvtr_parseq_maxratio12.yml +140 -0
- openocr/configs/rec/parseq/svrtv2_parseq.yml +136 -0
- openocr/configs/rec/parseq/vit_parseq.yml +100 -0
- openocr/configs/rec/robustscanner/resnet31_robustscanner.yml +102 -0
- openocr/configs/rec/robustscanner/svtrv2_robustscanner.yml +134 -0
- openocr/configs/rec/sar/resnet31_lstm_sar.yml +94 -0
- openocr/configs/rec/sar/svtrv2_sar.yml +128 -0
- openocr/configs/rec/seed/resnet31_lstm_seed_tps_on.yml +96 -0
- openocr/configs/rec/smtr/focalsvtr_smtr.yml +150 -0
- openocr/configs/rec/smtr/focalsvtr_smtr_long.yml +133 -0
- openocr/configs/rec/smtr/svtrv2_smtr.yml +150 -0
- openocr/configs/rec/smtr/svtrv2_smtr_bi.yml +136 -0
- openocr/configs/rec/srn/resnet50_fpn_srn.yml +97 -0
- openocr/configs/rec/srn/svtrv2_srn.yml +131 -0
- openocr/configs/rec/svtrs/convnextv2_ctc.yml +105 -0
- openocr/configs/rec/svtrs/convnextv2_h8_ctc.yml +105 -0
- openocr/configs/rec/svtrs/convnextv2_h8_rctc.yml +106 -0
- openocr/configs/rec/svtrs/convnextv2_rctc.yml +106 -0
- openocr/configs/rec/svtrs/convnextv2_tiny_h8_ctc.yml +105 -0
- openocr/configs/rec/svtrs/convnextv2_tiny_h8_rctc.yml +106 -0
- openocr/configs/rec/svtrs/crnn_ctc.yml +99 -0
- openocr/configs/rec/svtrs/crnn_ctc_long.yml +116 -0
- openocr/configs/rec/svtrs/focalnet_base_ctc.yml +108 -0
- openocr/configs/rec/svtrs/focalnet_base_rctc.yml +109 -0
- openocr/configs/rec/svtrs/focalsvtr_ctc.yml +106 -0
- openocr/configs/rec/svtrs/focalsvtr_rctc.yml +107 -0
- openocr/configs/rec/svtrs/resnet45_trans_ctc.yml +103 -0
- openocr/configs/rec/svtrs/resnet45_trans_rctc.yml +104 -0
- openocr/configs/rec/svtrs/svtr_base_ctc.yml +110 -0
- openocr/configs/rec/svtrs/svtr_base_rctc.yml +111 -0
- openocr/configs/rec/svtrs/svtrnet_ctc_syn.yml +111 -0
- openocr/configs/rec/svtrs/vit_ctc.yml +103 -0
- openocr/configs/rec/svtrs/vit_rctc.yml +103 -0
- openocr/configs/rec/svtrv2/repsvtr_ch.yml +121 -0
- openocr/configs/rec/svtrv2/svtrv2_ch.yml +133 -0
- openocr/configs/rec/svtrv2/svtrv2_ctc.yml +136 -0
- openocr/configs/rec/svtrv2/svtrv2_rctc.yml +135 -0
- openocr/configs/rec/svtrv2/svtrv2_small_rctc.yml +135 -0
- openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc.yml +162 -0
- openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc_ch.yml +153 -0
- openocr/configs/rec/svtrv2/svtrv2_tiny_rctc.yml +135 -0
- openocr/configs/rec/visionlan/resnet45_trans_visionlan_LA.yml +103 -0
- openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_1.yml +102 -0
- openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_2.yml +103 -0
- openocr/configs/rec/visionlan/svtrv2_visionlan_LA.yml +112 -0
- openocr/configs/rec/visionlan/svtrv2_visionlan_LF_1.yml +111 -0
- openocr/configs/rec/visionlan/svtrv2_visionlan_LF_2.yml +112 -0
- openocr/demo_gradio.py +128 -0
- openocr/opendet/modeling/__init__.py +11 -0
- openocr/opendet/modeling/backbones/__init__.py +14 -0
- openocr/opendet/modeling/backbones/repvit.py +340 -0
- openocr/opendet/modeling/base_detector.py +69 -0
- openocr/opendet/modeling/heads/__init__.py +14 -0
- openocr/opendet/modeling/heads/db_head.py +73 -0
- openocr/opendet/modeling/necks/__init__.py +14 -0
- openocr/opendet/modeling/necks/db_fpn.py +609 -0
- openocr/opendet/postprocess/__init__.py +18 -0
- openocr/opendet/postprocess/db_postprocess.py +273 -0
- openocr/opendet/preprocess/__init__.py +154 -0
- openocr/opendet/preprocess/crop_resize.py +121 -0
- openocr/opendet/preprocess/db_resize_for_test.py +135 -0
- openocr/openrec/losses/__init__.py +62 -0
- openocr/openrec/losses/abinet_loss.py +42 -0
- openocr/openrec/losses/ar_loss.py +23 -0
- openocr/openrec/losses/cam_loss.py +48 -0
- openocr/openrec/losses/cdistnet_loss.py +34 -0
- openocr/openrec/losses/ce_loss.py +68 -0
- openocr/openrec/losses/cppd_loss.py +77 -0
- openocr/openrec/losses/ctc_loss.py +33 -0
- openocr/openrec/losses/igtr_loss.py +12 -0
- openocr/openrec/losses/lister_loss.py +14 -0
- openocr/openrec/losses/lpv_loss.py +30 -0
- openocr/openrec/losses/mgp_loss.py +34 -0
- openocr/openrec/losses/parseq_loss.py +12 -0
- openocr/openrec/losses/robustscanner_loss.py +20 -0
- openocr/openrec/losses/seed_loss.py +46 -0
- openocr/openrec/losses/smtr_loss.py +12 -0
- openocr/openrec/losses/srn_loss.py +40 -0
- openocr/openrec/losses/visionlan_loss.py +58 -0
- openocr/openrec/metrics/__init__.py +19 -0
- openocr/openrec/metrics/rec_metric.py +270 -0
- openocr/openrec/metrics/rec_metric_gtc.py +58 -0
- openocr/openrec/metrics/rec_metric_long.py +142 -0
- openocr/openrec/metrics/rec_metric_mgp.py +93 -0
- openocr/openrec/modeling/__init__.py +11 -0
- openocr/openrec/modeling/base_recognizer.py +69 -0
- openocr/openrec/modeling/common.py +238 -0
- openocr/openrec/modeling/decoders/__init__.py +109 -0
- openocr/openrec/modeling/decoders/abinet_decoder.py +283 -0
- openocr/openrec/modeling/decoders/aster_decoder.py +170 -0
- openocr/openrec/modeling/decoders/bus_decoder.py +133 -0
- openocr/openrec/modeling/decoders/cam_decoder.py +43 -0
- openocr/openrec/modeling/decoders/cdistnet_decoder.py +334 -0
- openocr/openrec/modeling/decoders/cppd_decoder.py +393 -0
- openocr/openrec/modeling/decoders/ctc_decoder.py +203 -0
- openocr/openrec/modeling/decoders/dan_decoder.py +203 -0
- openocr/openrec/modeling/decoders/igtr_decoder.py +815 -0
- openocr/openrec/modeling/decoders/lister_decoder.py +535 -0
- openocr/openrec/modeling/decoders/lpv_decoder.py +119 -0
- openocr/openrec/modeling/decoders/matrn_decoder.py +236 -0
- openocr/openrec/modeling/decoders/mgp_decoder.py +99 -0
- openocr/openrec/modeling/decoders/nrtr_decoder.py +439 -0
- openocr/openrec/modeling/decoders/ote_decoder.py +205 -0
- openocr/openrec/modeling/decoders/parseq_decoder.py +504 -0
- openocr/openrec/modeling/decoders/rctc_decoder.py +70 -0
- openocr/openrec/modeling/decoders/robustscanner_decoder.py +749 -0
- openocr/openrec/modeling/decoders/sar_decoder.py +236 -0
- openocr/openrec/modeling/decoders/smtr_decoder.py +621 -0
- openocr/openrec/modeling/decoders/smtr_decoder_nattn.py +521 -0
- openocr/openrec/modeling/decoders/srn_decoder.py +283 -0
- openocr/openrec/modeling/decoders/visionlan_decoder.py +321 -0
- openocr/openrec/modeling/encoders/__init__.py +39 -0
- openocr/openrec/modeling/encoders/autostr_encoder.py +327 -0
- openocr/openrec/modeling/encoders/cam_encoder.py +760 -0
- openocr/openrec/modeling/encoders/convnextv2.py +213 -0
- openocr/openrec/modeling/encoders/focalsvtr.py +631 -0
- openocr/openrec/modeling/encoders/nrtr_encoder.py +28 -0
- openocr/openrec/modeling/encoders/rec_hgnet.py +346 -0
- openocr/openrec/modeling/encoders/rec_lcnetv3.py +488 -0
- openocr/openrec/modeling/encoders/rec_mobilenet_v3.py +132 -0
- openocr/openrec/modeling/encoders/rec_mv1_enhance.py +254 -0
- openocr/openrec/modeling/encoders/rec_nrtr_mtb.py +37 -0
- openocr/openrec/modeling/encoders/rec_resnet_31.py +213 -0
- openocr/openrec/modeling/encoders/rec_resnet_45.py +183 -0
- openocr/openrec/modeling/encoders/rec_resnet_fpn.py +216 -0
- openocr/openrec/modeling/encoders/rec_resnet_vd.py +252 -0
- openocr/openrec/modeling/encoders/repvit.py +338 -0
- openocr/openrec/modeling/encoders/resnet31_rnn.py +123 -0
- openocr/openrec/modeling/encoders/svtrnet.py +574 -0
- openocr/openrec/modeling/encoders/svtrnet2dpos.py +616 -0
- openocr/openrec/modeling/encoders/svtrv2.py +470 -0
- openocr/openrec/modeling/encoders/svtrv2_lnconv.py +503 -0
- openocr/openrec/modeling/encoders/svtrv2_lnconv_two33.py +517 -0
- openocr/openrec/modeling/encoders/vit.py +120 -0
- openocr/openrec/modeling/transforms/__init__.py +15 -0
- openocr/openrec/modeling/transforms/aster_tps.py +262 -0
- openocr/openrec/modeling/transforms/moran.py +136 -0
- openocr/openrec/modeling/transforms/tps.py +246 -0
- openocr/openrec/optimizer/__init__.py +73 -0
- openocr/openrec/optimizer/lr.py +227 -0
- openocr/openrec/postprocess/__init__.py +72 -0
- openocr/openrec/postprocess/abinet_postprocess.py +37 -0
- openocr/openrec/postprocess/ar_postprocess.py +63 -0
- openocr/openrec/postprocess/ce_postprocess.py +43 -0
- openocr/openrec/postprocess/char_postprocess.py +108 -0
- openocr/openrec/postprocess/cppd_postprocess.py +42 -0
- openocr/openrec/postprocess/ctc_postprocess.py +119 -0
- openocr/openrec/postprocess/igtr_postprocess.py +100 -0
- openocr/openrec/postprocess/lister_postprocess.py +59 -0
- openocr/openrec/postprocess/mgp_postprocess.py +143 -0
- openocr/openrec/postprocess/nrtr_postprocess.py +75 -0
- openocr/openrec/postprocess/smtr_postprocess.py +73 -0
- openocr/openrec/postprocess/srn_postprocess.py +80 -0
- openocr/openrec/postprocess/visionlan_postprocess.py +81 -0
- openocr/openrec/preprocess/__init__.py +173 -0
- openocr/openrec/preprocess/abinet_aug.py +473 -0
- openocr/openrec/preprocess/abinet_label_encode.py +36 -0
- openocr/openrec/preprocess/ar_label_encode.py +36 -0
- openocr/openrec/preprocess/auto_augment.py +1012 -0
- openocr/openrec/preprocess/cam_label_encode.py +141 -0
- openocr/openrec/preprocess/ce_label_encode.py +116 -0
- openocr/openrec/preprocess/char_label_encode.py +36 -0
- openocr/openrec/preprocess/cppd_label_encode.py +173 -0
- openocr/openrec/preprocess/ctc_label_encode.py +124 -0
- openocr/openrec/preprocess/ep_label_encode.py +38 -0
- openocr/openrec/preprocess/igtr_label_encode.py +360 -0
- openocr/openrec/preprocess/mgp_label_encode.py +95 -0
- openocr/openrec/preprocess/parseq_aug.py +150 -0
- openocr/openrec/preprocess/rec_aug.py +211 -0
- openocr/openrec/preprocess/resize.py +534 -0
- openocr/openrec/preprocess/smtr_label_encode.py +125 -0
- openocr/openrec/preprocess/srn_label_encode.py +37 -0
- openocr/openrec/preprocess/visionlan_label_encode.py +67 -0
- openocr/tools/create_lmdb_dataset.py +118 -0
- openocr/tools/data/__init__.py +94 -0
- openocr/tools/data/collate_fn.py +100 -0
- openocr/tools/data/lmdb_dataset.py +142 -0
- openocr/tools/data/lmdb_dataset_test.py +166 -0
- openocr/tools/data/multi_scale_sampler.py +177 -0
- openocr/tools/data/ratio_dataset.py +217 -0
- openocr/tools/data/ratio_dataset_test.py +273 -0
- openocr/tools/data/ratio_dataset_tvresize.py +213 -0
- openocr/tools/data/ratio_dataset_tvresize_test.py +276 -0
- openocr/tools/data/ratio_sampler.py +190 -0
- openocr/tools/data/simple_dataset.py +263 -0
- openocr/tools/data/strlmdb_dataset.py +143 -0
- openocr/tools/engine/__init__.py +5 -0
- openocr/tools/engine/config.py +158 -0
- openocr/tools/engine/trainer.py +621 -0
- openocr/tools/eval_rec.py +41 -0
- openocr/tools/eval_rec_all_ch.py +184 -0
- openocr/tools/eval_rec_all_en.py +206 -0
- openocr/tools/eval_rec_all_long.py +119 -0
- openocr/tools/eval_rec_all_long_simple.py +122 -0
- openocr/tools/export_rec.py +118 -0
- openocr/tools/infer/onnx_engine.py +65 -0
- openocr/tools/infer/predict_rec.py +140 -0
- openocr/tools/infer/utility.py +234 -0
- openocr/tools/infer_det.py +449 -0
- openocr/tools/infer_e2e.py +462 -0
- openocr/tools/infer_e2e_parallel.py +184 -0
- openocr/tools/infer_rec.py +371 -0
- openocr/tools/train_rec.py +37 -0
- openocr/tools/utility.py +45 -0
- openocr/tools/utils/EN_symbol_dict.txt +94 -0
- openocr/tools/utils/__init__.py +0 -0
- openocr/tools/utils/ckpt.py +87 -0
- openocr/tools/utils/dict/ar_dict.txt +117 -0
- openocr/tools/utils/dict/arabic_dict.txt +161 -0
- openocr/tools/utils/dict/be_dict.txt +145 -0
- openocr/tools/utils/dict/bg_dict.txt +140 -0
- openocr/tools/utils/dict/chinese_cht_dict.txt +8421 -0
- openocr/tools/utils/dict/cyrillic_dict.txt +163 -0
- openocr/tools/utils/dict/devanagari_dict.txt +167 -0
- openocr/tools/utils/dict/en_dict.txt +63 -0
- openocr/tools/utils/dict/fa_dict.txt +136 -0
- openocr/tools/utils/dict/french_dict.txt +136 -0
- openocr/tools/utils/dict/german_dict.txt +143 -0
- openocr/tools/utils/dict/hi_dict.txt +162 -0
- openocr/tools/utils/dict/it_dict.txt +118 -0
- openocr/tools/utils/dict/japan_dict.txt +4399 -0
- openocr/tools/utils/dict/ka_dict.txt +153 -0
- openocr/tools/utils/dict/kie_dict/xfund_class_list.txt +4 -0
- openocr/tools/utils/dict/korean_dict.txt +3688 -0
- openocr/tools/utils/dict/latex_symbol_dict.txt +111 -0
- openocr/tools/utils/dict/latin_dict.txt +185 -0
- openocr/tools/utils/dict/layout_dict/layout_cdla_dict.txt +10 -0
- openocr/tools/utils/dict/layout_dict/layout_publaynet_dict.txt +5 -0
- openocr/tools/utils/dict/layout_dict/layout_table_dict.txt +1 -0
- openocr/tools/utils/dict/mr_dict.txt +153 -0
- openocr/tools/utils/dict/ne_dict.txt +153 -0
- openocr/tools/utils/dict/oc_dict.txt +96 -0
- openocr/tools/utils/dict/pu_dict.txt +130 -0
- openocr/tools/utils/dict/rs_dict.txt +91 -0
- openocr/tools/utils/dict/rsc_dict.txt +134 -0
- openocr/tools/utils/dict/ru_dict.txt +125 -0
- openocr/tools/utils/dict/spin_dict.txt +68 -0
- openocr/tools/utils/dict/ta_dict.txt +128 -0
- openocr/tools/utils/dict/table_dict.txt +277 -0
- openocr/tools/utils/dict/table_master_structure_dict.txt +39 -0
- openocr/tools/utils/dict/table_structure_dict.txt +28 -0
- openocr/tools/utils/dict/table_structure_dict_ch.txt +48 -0
- openocr/tools/utils/dict/te_dict.txt +151 -0
- openocr/tools/utils/dict/ug_dict.txt +114 -0
- openocr/tools/utils/dict/uk_dict.txt +142 -0
- openocr/tools/utils/dict/ur_dict.txt +137 -0
- openocr/tools/utils/dict/xi_dict.txt +110 -0
- openocr/tools/utils/dict90.txt +90 -0
- openocr/tools/utils/e2e_metric/Deteval.py +802 -0
- openocr/tools/utils/e2e_metric/polygon_fast.py +70 -0
- openocr/tools/utils/e2e_utils/extract_batchsize.py +86 -0
- openocr/tools/utils/e2e_utils/extract_textpoint_fast.py +479 -0
- openocr/tools/utils/e2e_utils/extract_textpoint_slow.py +582 -0
- openocr/tools/utils/e2e_utils/pgnet_pp_utils.py +159 -0
- openocr/tools/utils/e2e_utils/visual.py +152 -0
- openocr/tools/utils/en_dict.txt +95 -0
- openocr/tools/utils/gen_label.py +68 -0
- openocr/tools/utils/ic15_dict.txt +36 -0
- openocr/tools/utils/logging.py +56 -0
- openocr/tools/utils/poly_nms.py +132 -0
- openocr/tools/utils/ppocr_keys_v1.txt +6623 -0
- openocr/tools/utils/stats.py +58 -0
- openocr/tools/utils/utility.py +165 -0
- openocr/tools/utils/visual.py +117 -0
- openocr_python-0.0.2.dist-info/LICENCE +201 -0
- openocr_python-0.0.2.dist-info/METADATA +98 -0
- openocr_python-0.0.2.dist-info/RECORD +323 -0
- openocr_python-0.0.2.dist-info/WHEEL +5 -0
- openocr_python-0.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import cv2
|
|
3
|
+
import torch
|
|
4
|
+
from shapely.geometry import Polygon
|
|
5
|
+
import pyclipper
|
|
6
|
+
"""
|
|
7
|
+
This code is refered from:
|
|
8
|
+
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DBPostProcess(object):
|
|
13
|
+
"""
|
|
14
|
+
The post process for Differentiable Binarization (DB).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
thresh=0.3,
|
|
20
|
+
box_thresh=0.7,
|
|
21
|
+
max_candidates=1000,
|
|
22
|
+
unclip_ratio=2.0,
|
|
23
|
+
use_dilation=False,
|
|
24
|
+
score_mode='fast',
|
|
25
|
+
box_type='quad',
|
|
26
|
+
**kwargs,
|
|
27
|
+
):
|
|
28
|
+
self.thresh = thresh
|
|
29
|
+
self.box_thresh = box_thresh
|
|
30
|
+
self.max_candidates = max_candidates
|
|
31
|
+
self.unclip_ratio = unclip_ratio
|
|
32
|
+
self.min_size = 3
|
|
33
|
+
self.score_mode = score_mode
|
|
34
|
+
self.box_type = box_type
|
|
35
|
+
assert score_mode in [
|
|
36
|
+
'slow',
|
|
37
|
+
'fast',
|
|
38
|
+
], 'Score mode must be in [slow, fast] but got: {}'.format(score_mode)
|
|
39
|
+
|
|
40
|
+
self.dilation_kernel = None if not use_dilation else np.array([[1, 1],
|
|
41
|
+
[1, 1]])
|
|
42
|
+
|
|
43
|
+
def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
|
44
|
+
"""
|
|
45
|
+
_bitmap: single map with shape (1, H, W),
|
|
46
|
+
whose values are binarized as {0, 1}
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
bitmap = _bitmap
|
|
50
|
+
height, width = bitmap.shape
|
|
51
|
+
|
|
52
|
+
boxes = []
|
|
53
|
+
scores = []
|
|
54
|
+
|
|
55
|
+
contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
|
|
56
|
+
cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
|
57
|
+
|
|
58
|
+
for contour in contours[:self.max_candidates]:
|
|
59
|
+
epsilon = 0.002 * cv2.arcLength(contour, True)
|
|
60
|
+
approx = cv2.approxPolyDP(contour, epsilon, True)
|
|
61
|
+
points = approx.reshape((-1, 2))
|
|
62
|
+
if points.shape[0] < 4:
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
score = self.box_score_fast(pred, points.reshape(-1, 2))
|
|
66
|
+
if self.box_thresh > score:
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
if points.shape[0] > 2:
|
|
70
|
+
box = self.unclip(points, self.unclip_ratio)
|
|
71
|
+
if len(box) > 1:
|
|
72
|
+
continue
|
|
73
|
+
else:
|
|
74
|
+
continue
|
|
75
|
+
box = np.array(box).reshape(-1, 2)
|
|
76
|
+
if len(box) == 0:
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
_, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
|
|
80
|
+
if sside < self.min_size + 2:
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
box = np.array(box)
|
|
84
|
+
box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0,
|
|
85
|
+
dest_width)
|
|
86
|
+
box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0,
|
|
87
|
+
dest_height)
|
|
88
|
+
boxes.append(box.tolist())
|
|
89
|
+
scores.append(score)
|
|
90
|
+
return boxes, scores
|
|
91
|
+
|
|
92
|
+
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
|
93
|
+
"""
|
|
94
|
+
_bitmap: single map with shape (1, H, W),
|
|
95
|
+
whose values are binarized as {0, 1}
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
bitmap = _bitmap
|
|
99
|
+
height, width = bitmap.shape
|
|
100
|
+
|
|
101
|
+
outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
|
|
102
|
+
cv2.CHAIN_APPROX_SIMPLE)
|
|
103
|
+
if len(outs) == 3:
|
|
104
|
+
img, contours, _ = outs[0], outs[1], outs[2]
|
|
105
|
+
elif len(outs) == 2:
|
|
106
|
+
contours, _ = outs[0], outs[1]
|
|
107
|
+
|
|
108
|
+
num_contours = min(len(contours), self.max_candidates)
|
|
109
|
+
|
|
110
|
+
boxes = []
|
|
111
|
+
scores = []
|
|
112
|
+
for index in range(num_contours):
|
|
113
|
+
contour = contours[index]
|
|
114
|
+
points, sside = self.get_mini_boxes(contour)
|
|
115
|
+
if sside < self.min_size:
|
|
116
|
+
continue
|
|
117
|
+
points = np.array(points)
|
|
118
|
+
if self.score_mode == 'fast':
|
|
119
|
+
score = self.box_score_fast(pred, points.reshape(-1, 2))
|
|
120
|
+
else:
|
|
121
|
+
score = self.box_score_slow(pred, contour)
|
|
122
|
+
if self.box_thresh > score:
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
box = self.unclip(points, self.unclip_ratio)
|
|
126
|
+
if len(box) > 1:
|
|
127
|
+
continue
|
|
128
|
+
box = np.array(box).reshape(-1, 1, 2)
|
|
129
|
+
box, sside = self.get_mini_boxes(box)
|
|
130
|
+
if sside < self.min_size + 2:
|
|
131
|
+
continue
|
|
132
|
+
box = np.array(box)
|
|
133
|
+
|
|
134
|
+
box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0,
|
|
135
|
+
dest_width)
|
|
136
|
+
box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0,
|
|
137
|
+
dest_height)
|
|
138
|
+
boxes.append(box.astype('int32'))
|
|
139
|
+
scores.append(score)
|
|
140
|
+
return np.array(boxes, dtype='int32'), scores
|
|
141
|
+
|
|
142
|
+
def unclip(self, box, unclip_ratio):
|
|
143
|
+
poly = Polygon(box)
|
|
144
|
+
distance = poly.area * unclip_ratio / poly.length
|
|
145
|
+
offset = pyclipper.PyclipperOffset()
|
|
146
|
+
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
|
|
147
|
+
expanded = offset.Execute(distance)
|
|
148
|
+
return expanded
|
|
149
|
+
|
|
150
|
+
def get_mini_boxes(self, contour):
|
|
151
|
+
bounding_box = cv2.minAreaRect(contour)
|
|
152
|
+
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
|
|
153
|
+
|
|
154
|
+
index_1, index_2, index_3, index_4 = 0, 1, 2, 3
|
|
155
|
+
if points[1][1] > points[0][1]:
|
|
156
|
+
index_1 = 0
|
|
157
|
+
index_4 = 1
|
|
158
|
+
else:
|
|
159
|
+
index_1 = 1
|
|
160
|
+
index_4 = 0
|
|
161
|
+
if points[3][1] > points[2][1]:
|
|
162
|
+
index_2 = 2
|
|
163
|
+
index_3 = 3
|
|
164
|
+
else:
|
|
165
|
+
index_2 = 3
|
|
166
|
+
index_3 = 2
|
|
167
|
+
|
|
168
|
+
box = [
|
|
169
|
+
points[index_1], points[index_2], points[index_3], points[index_4]
|
|
170
|
+
]
|
|
171
|
+
return box, min(bounding_box[1])
|
|
172
|
+
|
|
173
|
+
def box_score_fast(self, bitmap, _box):
|
|
174
|
+
"""
|
|
175
|
+
box_score_fast: use bbox mean score as the mean score
|
|
176
|
+
"""
|
|
177
|
+
h, w = bitmap.shape[:2]
|
|
178
|
+
box = _box.copy()
|
|
179
|
+
xmin = np.clip(np.floor(box[:, 0].min()).astype('int32'), 0, w - 1)
|
|
180
|
+
xmax = np.clip(np.ceil(box[:, 0].max()).astype('int32'), 0, w - 1)
|
|
181
|
+
ymin = np.clip(np.floor(box[:, 1].min()).astype('int32'), 0, h - 1)
|
|
182
|
+
ymax = np.clip(np.ceil(box[:, 1].max()).astype('int32'), 0, h - 1)
|
|
183
|
+
|
|
184
|
+
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
|
|
185
|
+
box[:, 0] = box[:, 0] - xmin
|
|
186
|
+
box[:, 1] = box[:, 1] - ymin
|
|
187
|
+
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype('int32'), 1)
|
|
188
|
+
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
|
|
189
|
+
|
|
190
|
+
def box_score_slow(self, bitmap, contour):
|
|
191
|
+
"""
|
|
192
|
+
box_score_slow: use polyon mean score as the mean score
|
|
193
|
+
"""
|
|
194
|
+
h, w = bitmap.shape[:2]
|
|
195
|
+
contour = contour.copy()
|
|
196
|
+
contour = np.reshape(contour, (-1, 2))
|
|
197
|
+
|
|
198
|
+
xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
|
|
199
|
+
xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
|
|
200
|
+
ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
|
|
201
|
+
ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
|
|
202
|
+
|
|
203
|
+
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
|
|
204
|
+
|
|
205
|
+
contour[:, 0] = contour[:, 0] - xmin
|
|
206
|
+
contour[:, 1] = contour[:, 1] - ymin
|
|
207
|
+
|
|
208
|
+
cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype('int32'), 1)
|
|
209
|
+
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
|
|
210
|
+
|
|
211
|
+
def __call__(self, outs_dict, shape_list):
|
|
212
|
+
pred = outs_dict['maps']
|
|
213
|
+
if isinstance(pred, torch.Tensor):
|
|
214
|
+
pred = pred.detach().cpu().numpy()
|
|
215
|
+
pred = pred[:, 0, :, :]
|
|
216
|
+
segmentation = pred > self.thresh
|
|
217
|
+
|
|
218
|
+
boxes_batch = []
|
|
219
|
+
for batch_index in range(pred.shape[0]):
|
|
220
|
+
src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
|
|
221
|
+
if self.dilation_kernel is not None:
|
|
222
|
+
mask = cv2.dilate(
|
|
223
|
+
np.array(segmentation[batch_index]).astype(np.uint8),
|
|
224
|
+
self.dilation_kernel,
|
|
225
|
+
)
|
|
226
|
+
else:
|
|
227
|
+
mask = segmentation[batch_index]
|
|
228
|
+
if self.box_type == 'poly':
|
|
229
|
+
boxes, scores = self.polygons_from_bitmap(
|
|
230
|
+
pred[batch_index], mask, src_w, src_h)
|
|
231
|
+
elif self.box_type == 'quad':
|
|
232
|
+
boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
|
|
233
|
+
src_w, src_h)
|
|
234
|
+
else:
|
|
235
|
+
raise ValueError(
|
|
236
|
+
"box_type can only be one of ['quad', 'poly']")
|
|
237
|
+
|
|
238
|
+
boxes_batch.append({'points': boxes})
|
|
239
|
+
return boxes_batch
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class DistillationDBPostProcess(object):
|
|
243
|
+
|
|
244
|
+
def __init__(
|
|
245
|
+
self,
|
|
246
|
+
model_name=['student'],
|
|
247
|
+
key=None,
|
|
248
|
+
thresh=0.3,
|
|
249
|
+
box_thresh=0.6,
|
|
250
|
+
max_candidates=1000,
|
|
251
|
+
unclip_ratio=1.5,
|
|
252
|
+
use_dilation=False,
|
|
253
|
+
score_mode='fast',
|
|
254
|
+
box_type='quad',
|
|
255
|
+
**kwargs,
|
|
256
|
+
):
|
|
257
|
+
self.model_name = model_name
|
|
258
|
+
self.key = key
|
|
259
|
+
self.post_process = DBPostProcess(
|
|
260
|
+
thresh=thresh,
|
|
261
|
+
box_thresh=box_thresh,
|
|
262
|
+
max_candidates=max_candidates,
|
|
263
|
+
unclip_ratio=unclip_ratio,
|
|
264
|
+
use_dilation=use_dilation,
|
|
265
|
+
score_mode=score_mode,
|
|
266
|
+
box_type=box_type,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def __call__(self, predicts, shape_list):
|
|
270
|
+
results = {}
|
|
271
|
+
for k in self.model_name:
|
|
272
|
+
results[k] = self.post_process(predicts[k], shape_list=shape_list)
|
|
273
|
+
return results
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import io
|
|
2
|
+
|
|
3
|
+
import cv2
|
|
4
|
+
import numpy as np
|
|
5
|
+
from PIL import Image
|
|
6
|
+
|
|
7
|
+
from .db_resize_for_test import DetResizeForTest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NormalizeImage(object):
|
|
11
|
+
"""normalize image such as substract mean, divide std"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
|
|
14
|
+
if isinstance(scale, str):
|
|
15
|
+
scale = eval(scale)
|
|
16
|
+
self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
|
|
17
|
+
mean = mean if mean is not None else [0.485, 0.456, 0.406]
|
|
18
|
+
std = std if std is not None else [0.229, 0.224, 0.225]
|
|
19
|
+
|
|
20
|
+
shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
|
|
21
|
+
self.mean = np.array(mean).reshape(shape).astype('float32')
|
|
22
|
+
self.std = np.array(std).reshape(shape).astype('float32')
|
|
23
|
+
|
|
24
|
+
def __call__(self, data):
|
|
25
|
+
img = data['image']
|
|
26
|
+
from PIL import Image
|
|
27
|
+
|
|
28
|
+
if isinstance(img, Image.Image):
|
|
29
|
+
img = np.array(img)
|
|
30
|
+
assert isinstance(img,
|
|
31
|
+
np.ndarray), "invalid input 'img' in NormalizeImage"
|
|
32
|
+
data['image'] = (img.astype('float32') * self.scale -
|
|
33
|
+
self.mean) / self.std
|
|
34
|
+
return data
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ToCHWImage(object):
|
|
38
|
+
"""convert hwc image to chw image"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, **kwargs):
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
def __call__(self, data):
|
|
44
|
+
img = data['image']
|
|
45
|
+
from PIL import Image
|
|
46
|
+
|
|
47
|
+
if isinstance(img, Image.Image):
|
|
48
|
+
img = np.array(img)
|
|
49
|
+
data['image'] = img.transpose((2, 0, 1))
|
|
50
|
+
return data
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class KeepKeys(object):
|
|
54
|
+
|
|
55
|
+
def __init__(self, keep_keys, **kwargs):
|
|
56
|
+
self.keep_keys = keep_keys
|
|
57
|
+
|
|
58
|
+
def __call__(self, data):
|
|
59
|
+
data_list = []
|
|
60
|
+
for key in self.keep_keys:
|
|
61
|
+
data_list.append(data[key])
|
|
62
|
+
return data_list
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def transform(data, ops=None):
|
|
66
|
+
"""transform."""
|
|
67
|
+
if ops is None:
|
|
68
|
+
ops = []
|
|
69
|
+
for op in ops:
|
|
70
|
+
data = op(data)
|
|
71
|
+
if data is None:
|
|
72
|
+
return None
|
|
73
|
+
return data
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class DecodeImage(object):
|
|
77
|
+
"""decode image."""
|
|
78
|
+
|
|
79
|
+
def __init__(self,
|
|
80
|
+
img_mode='RGB',
|
|
81
|
+
channel_first=False,
|
|
82
|
+
ignore_orientation=False,
|
|
83
|
+
**kwargs):
|
|
84
|
+
self.img_mode = img_mode
|
|
85
|
+
self.channel_first = channel_first
|
|
86
|
+
self.ignore_orientation = ignore_orientation
|
|
87
|
+
|
|
88
|
+
def __call__(self, data):
|
|
89
|
+
img = data['image']
|
|
90
|
+
|
|
91
|
+
assert type(img) is bytes and len(
|
|
92
|
+
img) > 0, "invalid input 'img' in DecodeImage"
|
|
93
|
+
img = np.frombuffer(img, dtype='uint8')
|
|
94
|
+
if self.ignore_orientation:
|
|
95
|
+
img = cv2.imdecode(
|
|
96
|
+
img, cv2.IMREAD_IGNORE_ORIENTATION | cv2.IMREAD_COLOR)
|
|
97
|
+
else:
|
|
98
|
+
img = cv2.imdecode(img, 1)
|
|
99
|
+
if img is None:
|
|
100
|
+
return None
|
|
101
|
+
if self.img_mode == 'GRAY':
|
|
102
|
+
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
|
103
|
+
elif self.img_mode == 'RGB':
|
|
104
|
+
assert img.shape[2] == 3, 'invalid shape of image[%s]' % (
|
|
105
|
+
img.shape)
|
|
106
|
+
img = img[:, :, ::-1]
|
|
107
|
+
|
|
108
|
+
if self.channel_first:
|
|
109
|
+
img = img.transpose((2, 0, 1))
|
|
110
|
+
|
|
111
|
+
data['image'] = img
|
|
112
|
+
return data
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class DecodeImagePIL(object):
|
|
116
|
+
"""decode image."""
|
|
117
|
+
|
|
118
|
+
def __init__(self, img_mode='RGB', **kwargs):
|
|
119
|
+
self.img_mode = img_mode
|
|
120
|
+
|
|
121
|
+
def __call__(self, data):
|
|
122
|
+
img = data['image']
|
|
123
|
+
assert type(img) is bytes and len(
|
|
124
|
+
img) > 0, "invalid input 'img' in DecodeImage"
|
|
125
|
+
img = data['image']
|
|
126
|
+
buf = io.BytesIO(img)
|
|
127
|
+
img = Image.open(buf).convert('RGB')
|
|
128
|
+
if self.img_mode == 'Gray':
|
|
129
|
+
img = img.convert('L')
|
|
130
|
+
elif self.img_mode == 'BGR':
|
|
131
|
+
img = np.array(img)[:, :, ::-1] # 将图片转为numpy格式,并将最后一维通道倒序
|
|
132
|
+
img = Image.fromarray(np.uint8(img))
|
|
133
|
+
data['image'] = img
|
|
134
|
+
return data
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def create_operators(op_param_list, global_config=None):
|
|
138
|
+
"""create operators based on the config.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
params(list): a dict list, used to create some operators
|
|
142
|
+
"""
|
|
143
|
+
assert isinstance(op_param_list, list), 'operator config should be a list'
|
|
144
|
+
ops = []
|
|
145
|
+
for operator in op_param_list:
|
|
146
|
+
assert isinstance(operator,
|
|
147
|
+
dict) and len(operator) == 1, 'yaml format error'
|
|
148
|
+
op_name = list(operator)[0]
|
|
149
|
+
param = {} if operator[op_name] is None else operator[op_name]
|
|
150
|
+
if global_config is not None:
|
|
151
|
+
param.update(global_config)
|
|
152
|
+
op = eval(op_name)(**param)
|
|
153
|
+
ops.append(op)
|
|
154
|
+
return ops
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import cv2
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def padding_image(img, size=(640, 640)):
|
|
5
|
+
"""
|
|
6
|
+
Padding an image using OpenCV:
|
|
7
|
+
- If the image is smaller than the target size, pad it to 640x640.
|
|
8
|
+
- If the image is larger than the target size, split it into multiple 640x640 images and record positions.
|
|
9
|
+
|
|
10
|
+
:param image_path: Path to the input image.
|
|
11
|
+
:param output_dir: Directory to save the output images.
|
|
12
|
+
:param size: The target size for padding or splitting (default 640x640).
|
|
13
|
+
:return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
img_height, img_width = img.shape[:2]
|
|
17
|
+
target_width, target_height = size
|
|
18
|
+
|
|
19
|
+
# If image is smaller than target size, pad the image to 640x640
|
|
20
|
+
|
|
21
|
+
# Calculate padding amounts (top, bottom, left, right)
|
|
22
|
+
pad_top = 0
|
|
23
|
+
pad_bottom = target_height - img_height
|
|
24
|
+
pad_left = 0
|
|
25
|
+
pad_right = target_width - img_width
|
|
26
|
+
|
|
27
|
+
# Pad the image (white padding, border type: constant)
|
|
28
|
+
padded_img = cv2.copyMakeBorder(img,
|
|
29
|
+
pad_top,
|
|
30
|
+
pad_bottom,
|
|
31
|
+
pad_left,
|
|
32
|
+
pad_right,
|
|
33
|
+
cv2.BORDER_CONSTANT,
|
|
34
|
+
value=[0, 0, 0])
|
|
35
|
+
|
|
36
|
+
# Return the padded area positions (top-left and bottom-right coordinates of the original image)
|
|
37
|
+
return padded_img
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CropResize(object):
|
|
41
|
+
|
|
42
|
+
def __init__(self, size=(640, 640), interpolation=cv2.INTER_LINEAR):
|
|
43
|
+
self.size = size
|
|
44
|
+
self.interpolation = interpolation
|
|
45
|
+
|
|
46
|
+
def __call__(self, data):
|
|
47
|
+
"""
|
|
48
|
+
Resize an image using OpenCV:
|
|
49
|
+
- If the image is smaller than the target size, pad it to 640x640.
|
|
50
|
+
- If the image is larger than the target size, split it into multiple 640x640 images and record positions.
|
|
51
|
+
|
|
52
|
+
:param image_path: Path to the input image.
|
|
53
|
+
:param output_dir: Directory to save the output images.
|
|
54
|
+
:param size: The target size for padding or splitting (default 640x640).
|
|
55
|
+
:return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image.
|
|
56
|
+
"""
|
|
57
|
+
img = data['image']
|
|
58
|
+
img_height, img_width = img.shape[:2]
|
|
59
|
+
target_width, target_height = self.size
|
|
60
|
+
|
|
61
|
+
# If image is smaller than target size, pad the image to 640x640
|
|
62
|
+
if img_width <= target_width and img_height <= target_height:
|
|
63
|
+
# Calculate padding amounts (top, bottom, left, right)
|
|
64
|
+
if img_width == target_width and img_height == target_height:
|
|
65
|
+
return [img], [[0, 0, img_width, img_height]]
|
|
66
|
+
padded_img = padding_image(img, self.size)
|
|
67
|
+
|
|
68
|
+
# Return the padded area positions (top-left and bottom-right coordinates of the original image)
|
|
69
|
+
return [padded_img], [[0, 0, img_width, img_height]]
|
|
70
|
+
|
|
71
|
+
if img_width < target_width:
|
|
72
|
+
img = cv2.copyMakeBorder(img,
|
|
73
|
+
0,
|
|
74
|
+
0,
|
|
75
|
+
0,
|
|
76
|
+
target_width - img_width,
|
|
77
|
+
cv2.BORDER_CONSTANT,
|
|
78
|
+
value=[0, 0, 0])
|
|
79
|
+
|
|
80
|
+
if img_height < target_height:
|
|
81
|
+
img = cv2.copyMakeBorder(img,
|
|
82
|
+
0,
|
|
83
|
+
target_height - img_height,
|
|
84
|
+
0,
|
|
85
|
+
0,
|
|
86
|
+
cv2.BORDER_CONSTANT,
|
|
87
|
+
value=[0, 0, 0])
|
|
88
|
+
# raise ValueError("Image dimensions must be greater than or equal to target size")
|
|
89
|
+
|
|
90
|
+
img_height, img_width = img.shape[:2]
|
|
91
|
+
# If image is larger than or equal to target size, crop it into 640x640 tiles
|
|
92
|
+
crop_positions = []
|
|
93
|
+
count = 0
|
|
94
|
+
cropped_img_list = []
|
|
95
|
+
for top in range(0, img_height - target_height // 2,
|
|
96
|
+
target_height // 2):
|
|
97
|
+
for left in range(0, img_width - target_height // 2,
|
|
98
|
+
target_width // 2):
|
|
99
|
+
# Calculate the bottom and right boundaries for the crop
|
|
100
|
+
right = min(left + target_width, img_width)
|
|
101
|
+
bottom = min(top + target_height, img_height)
|
|
102
|
+
if right > img_width:
|
|
103
|
+
right = img_width
|
|
104
|
+
left = max(0, right - target_width)
|
|
105
|
+
if bottom > img_height:
|
|
106
|
+
bottom = img_height
|
|
107
|
+
top = max(0, bottom - target_height)
|
|
108
|
+
# Crop the image
|
|
109
|
+
cropped_img = img[top:bottom, left:right]
|
|
110
|
+
if bottom - top < target_height or right - left < target_width:
|
|
111
|
+
cropped_img = padding_image(cropped_img, self.size)
|
|
112
|
+
|
|
113
|
+
count += 1
|
|
114
|
+
cropped_img_list.append(cropped_img)
|
|
115
|
+
|
|
116
|
+
# Record the position of the cropped image
|
|
117
|
+
crop_positions.append([left, top, right, bottom])
|
|
118
|
+
|
|
119
|
+
# print(f"Images cropped and saved at {output_dir}.")
|
|
120
|
+
|
|
121
|
+
return cropped_img_list, crop_positions
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import math
|
|
2
|
+
import sys
|
|
3
|
+
import cv2
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DetResizeForTest(object):
|
|
8
|
+
|
|
9
|
+
def __init__(self, **kwargs):
|
|
10
|
+
super(DetResizeForTest, self).__init__()
|
|
11
|
+
self.resize_type = 0
|
|
12
|
+
self.keep_ratio = False
|
|
13
|
+
if 'image_shape' in kwargs:
|
|
14
|
+
self.image_shape = kwargs['image_shape']
|
|
15
|
+
self.resize_type = 1
|
|
16
|
+
if 'keep_ratio' in kwargs:
|
|
17
|
+
self.keep_ratio = kwargs['keep_ratio']
|
|
18
|
+
elif 'limit_side_len' in kwargs:
|
|
19
|
+
self.limit_side_len = kwargs['limit_side_len']
|
|
20
|
+
self.limit_type = kwargs.get('limit_type', 'min')
|
|
21
|
+
elif 'resize_long' in kwargs:
|
|
22
|
+
self.resize_type = 2
|
|
23
|
+
self.resize_long = kwargs.get('resize_long', 960)
|
|
24
|
+
else:
|
|
25
|
+
self.limit_side_len = 736
|
|
26
|
+
self.limit_type = 'min'
|
|
27
|
+
|
|
28
|
+
def __call__(self, data):
|
|
29
|
+
img = data['image']
|
|
30
|
+
src_h, src_w, _ = img.shape
|
|
31
|
+
if sum([src_h, src_w]) < 64:
|
|
32
|
+
img = self.image_padding(img)
|
|
33
|
+
|
|
34
|
+
if self.resize_type == 0:
|
|
35
|
+
# img, shape = self.resize_image_type0(img)
|
|
36
|
+
img, [ratio_h, ratio_w] = self.resize_image_type0(img)
|
|
37
|
+
elif self.resize_type == 2:
|
|
38
|
+
img, [ratio_h, ratio_w] = self.resize_image_type2(img)
|
|
39
|
+
else:
|
|
40
|
+
# img, shape = self.resize_image_type1(img)
|
|
41
|
+
img, [ratio_h, ratio_w] = self.resize_image_type1(img)
|
|
42
|
+
data['image'] = img
|
|
43
|
+
data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
|
|
44
|
+
return data
|
|
45
|
+
|
|
46
|
+
def image_padding(self, im, value=0):
|
|
47
|
+
h, w, c = im.shape
|
|
48
|
+
im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
|
|
49
|
+
im_pad[:h, :w, :] = im
|
|
50
|
+
return im_pad
|
|
51
|
+
|
|
52
|
+
def resize_image_type1(self, img):
|
|
53
|
+
resize_h, resize_w = self.image_shape
|
|
54
|
+
ori_h, ori_w = img.shape[:2] # (h, w, c)
|
|
55
|
+
if self.keep_ratio is True:
|
|
56
|
+
resize_w = ori_w * resize_h / ori_h
|
|
57
|
+
N = math.ceil(resize_w / 32)
|
|
58
|
+
resize_w = N * 32
|
|
59
|
+
ratio_h = float(resize_h) / ori_h
|
|
60
|
+
ratio_w = float(resize_w) / ori_w
|
|
61
|
+
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
|
62
|
+
# return img, np.array([ori_h, ori_w])
|
|
63
|
+
return img, [ratio_h, ratio_w]
|
|
64
|
+
|
|
65
|
+
def resize_image_type0(self, img):
|
|
66
|
+
"""
|
|
67
|
+
resize image to a size multiple of 32 which is required by the network
|
|
68
|
+
args:
|
|
69
|
+
img(array): array with shape [h, w, c]
|
|
70
|
+
return(tuple):
|
|
71
|
+
img, (ratio_h, ratio_w)
|
|
72
|
+
"""
|
|
73
|
+
limit_side_len = self.limit_side_len
|
|
74
|
+
h, w, c = img.shape
|
|
75
|
+
|
|
76
|
+
# limit the max side
|
|
77
|
+
if self.limit_type == 'max':
|
|
78
|
+
if max(h, w) > limit_side_len:
|
|
79
|
+
if h > w:
|
|
80
|
+
ratio = float(limit_side_len) / h
|
|
81
|
+
else:
|
|
82
|
+
ratio = float(limit_side_len) / w
|
|
83
|
+
else:
|
|
84
|
+
ratio = 1.0
|
|
85
|
+
elif self.limit_type == 'min':
|
|
86
|
+
if min(h, w) < limit_side_len:
|
|
87
|
+
if h < w:
|
|
88
|
+
ratio = float(limit_side_len) / h
|
|
89
|
+
else:
|
|
90
|
+
ratio = float(limit_side_len) / w
|
|
91
|
+
else:
|
|
92
|
+
ratio = 1.0
|
|
93
|
+
elif self.limit_type == 'resize_long':
|
|
94
|
+
ratio = float(limit_side_len) / max(h, w)
|
|
95
|
+
else:
|
|
96
|
+
raise Exception('not support limit type, image ')
|
|
97
|
+
resize_h = int(h * ratio)
|
|
98
|
+
resize_w = int(w * ratio)
|
|
99
|
+
|
|
100
|
+
resize_h = max(int(round(resize_h / 32) * 32), 32)
|
|
101
|
+
resize_w = max(int(round(resize_w / 32) * 32), 32)
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
if int(resize_w) <= 0 or int(resize_h) <= 0:
|
|
105
|
+
return None, (None, None)
|
|
106
|
+
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
|
107
|
+
except:
|
|
108
|
+
print(img.shape, resize_w, resize_h)
|
|
109
|
+
sys.exit(0)
|
|
110
|
+
ratio_h = resize_h / float(h)
|
|
111
|
+
ratio_w = resize_w / float(w)
|
|
112
|
+
return img, [ratio_h, ratio_w]
|
|
113
|
+
|
|
114
|
+
def resize_image_type2(self, img):
|
|
115
|
+
h, w, _ = img.shape
|
|
116
|
+
|
|
117
|
+
resize_w = w
|
|
118
|
+
resize_h = h
|
|
119
|
+
|
|
120
|
+
if resize_h > resize_w:
|
|
121
|
+
ratio = float(self.resize_long) / resize_h
|
|
122
|
+
else:
|
|
123
|
+
ratio = float(self.resize_long) / resize_w
|
|
124
|
+
|
|
125
|
+
resize_h = int(resize_h * ratio)
|
|
126
|
+
resize_w = int(resize_w * ratio)
|
|
127
|
+
|
|
128
|
+
max_stride = 128
|
|
129
|
+
resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
|
|
130
|
+
resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
|
|
131
|
+
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
|
132
|
+
ratio_h = resize_h / float(h)
|
|
133
|
+
ratio_w = resize_w / float(w)
|
|
134
|
+
|
|
135
|
+
return img, [ratio_h, ratio_w]
|