openocr-python 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. openocr/__init__.py +11 -0
  2. openocr/configs/det/dbnet/repvit_db.yml +173 -0
  3. openocr/configs/rec/abinet/resnet45_trans_abinet_lang.yml +94 -0
  4. openocr/configs/rec/abinet/resnet45_trans_abinet_wo_lang.yml +93 -0
  5. openocr/configs/rec/abinet/svtrv2_abinet_lang.yml +130 -0
  6. openocr/configs/rec/abinet/svtrv2_abinet_wo_lang.yml +128 -0
  7. openocr/configs/rec/aster/resnet31_lstm_aster_tps_on.yml +93 -0
  8. openocr/configs/rec/aster/svtrv2_aster.yml +127 -0
  9. openocr/configs/rec/aster/svtrv2_aster_tps_on.yml +102 -0
  10. openocr/configs/rec/autostr/autostr_lstm_aster_tps_on.yml +95 -0
  11. openocr/configs/rec/busnet/svtrv2_busnet.yml +135 -0
  12. openocr/configs/rec/busnet/svtrv2_busnet_pretraining.yml +134 -0
  13. openocr/configs/rec/busnet/vit_busnet.yml +104 -0
  14. openocr/configs/rec/busnet/vit_busnet_pretraining.yml +104 -0
  15. openocr/configs/rec/cam/convnextv2_cam_tps_on.yml +118 -0
  16. openocr/configs/rec/cam/convnextv2_tiny_cam_tps_on.yml +118 -0
  17. openocr/configs/rec/cam/svtrv2_cam_tps_on.yml +123 -0
  18. openocr/configs/rec/cdistnet/resnet45_trans_cdistnet.yml +93 -0
  19. openocr/configs/rec/cdistnet/svtrv2_cdistnet.yml +139 -0
  20. openocr/configs/rec/cppd/svtr_base_cppd.yml +123 -0
  21. openocr/configs/rec/cppd/svtr_base_cppd_ch.yml +126 -0
  22. openocr/configs/rec/cppd/svtr_base_cppd_h8.yml +123 -0
  23. openocr/configs/rec/cppd/svtr_base_cppd_syn.yml +124 -0
  24. openocr/configs/rec/cppd/svtrv2_cppd.yml +150 -0
  25. openocr/configs/rec/dan/resnet45_fpn_dan.yml +98 -0
  26. openocr/configs/rec/dan/svtrv2_dan.yml +130 -0
  27. openocr/configs/rec/focalsvtr/focalsvtr_ctc.yml +137 -0
  28. openocr/configs/rec/gtc/svtrv2_lnconv_nrtr_gtc.yml +168 -0
  29. openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_long_infer.yml +151 -0
  30. openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_smtr_long.yml +150 -0
  31. openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_stream.yml +152 -0
  32. openocr/configs/rec/igtr/svtr_base_ds_igtr.yml +157 -0
  33. openocr/configs/rec/lister/focalsvtr_lister_wo_fem_maxratio12.yml +133 -0
  34. openocr/configs/rec/lister/svtrv2_lister_wo_fem_maxratio12.yml +138 -0
  35. openocr/configs/rec/lpv/svtr_base_lpv.yml +124 -0
  36. openocr/configs/rec/lpv/svtr_base_lpv_wo_glrm.yml +123 -0
  37. openocr/configs/rec/lpv/svtrv2_lpv.yml +147 -0
  38. openocr/configs/rec/lpv/svtrv2_lpv_wo_glrm.yml +146 -0
  39. openocr/configs/rec/maerec/vit_nrtr.yml +116 -0
  40. openocr/configs/rec/matrn/resnet45_trans_matrn.yml +95 -0
  41. openocr/configs/rec/matrn/svtrv2_matrn.yml +130 -0
  42. openocr/configs/rec/mgpstr/svtrv2_mgpstr_only_char.yml +140 -0
  43. openocr/configs/rec/mgpstr/vit_base_mgpstr_only_char.yml +111 -0
  44. openocr/configs/rec/mgpstr/vit_large_mgpstr_only_char.yml +110 -0
  45. openocr/configs/rec/mgpstr/vit_mgpstr.yml +110 -0
  46. openocr/configs/rec/mgpstr/vit_mgpstr_only_char.yml +110 -0
  47. openocr/configs/rec/moran/resnet31_lstm_moran.yml +92 -0
  48. openocr/configs/rec/nrtr/focalsvtr_nrtr_maxraio12.yml +145 -0
  49. openocr/configs/rec/nrtr/nrtr.yml +107 -0
  50. openocr/configs/rec/nrtr/svtr_base_nrtr.yml +118 -0
  51. openocr/configs/rec/nrtr/svtr_base_nrtr_syn.yml +119 -0
  52. openocr/configs/rec/nrtr/svtrv2_nrtr.yml +146 -0
  53. openocr/configs/rec/ote/svtr_base_h8_ote.yml +117 -0
  54. openocr/configs/rec/ote/svtr_base_ote.yml +116 -0
  55. openocr/configs/rec/parseq/focalsvtr_parseq_maxratio12.yml +140 -0
  56. openocr/configs/rec/parseq/svrtv2_parseq.yml +136 -0
  57. openocr/configs/rec/parseq/vit_parseq.yml +100 -0
  58. openocr/configs/rec/robustscanner/resnet31_robustscanner.yml +102 -0
  59. openocr/configs/rec/robustscanner/svtrv2_robustscanner.yml +134 -0
  60. openocr/configs/rec/sar/resnet31_lstm_sar.yml +94 -0
  61. openocr/configs/rec/sar/svtrv2_sar.yml +128 -0
  62. openocr/configs/rec/seed/resnet31_lstm_seed_tps_on.yml +96 -0
  63. openocr/configs/rec/smtr/focalsvtr_smtr.yml +150 -0
  64. openocr/configs/rec/smtr/focalsvtr_smtr_long.yml +133 -0
  65. openocr/configs/rec/smtr/svtrv2_smtr.yml +150 -0
  66. openocr/configs/rec/smtr/svtrv2_smtr_bi.yml +136 -0
  67. openocr/configs/rec/srn/resnet50_fpn_srn.yml +97 -0
  68. openocr/configs/rec/srn/svtrv2_srn.yml +131 -0
  69. openocr/configs/rec/svtrs/convnextv2_ctc.yml +105 -0
  70. openocr/configs/rec/svtrs/convnextv2_h8_ctc.yml +105 -0
  71. openocr/configs/rec/svtrs/convnextv2_h8_rctc.yml +106 -0
  72. openocr/configs/rec/svtrs/convnextv2_rctc.yml +106 -0
  73. openocr/configs/rec/svtrs/convnextv2_tiny_h8_ctc.yml +105 -0
  74. openocr/configs/rec/svtrs/convnextv2_tiny_h8_rctc.yml +106 -0
  75. openocr/configs/rec/svtrs/crnn_ctc.yml +99 -0
  76. openocr/configs/rec/svtrs/crnn_ctc_long.yml +116 -0
  77. openocr/configs/rec/svtrs/focalnet_base_ctc.yml +108 -0
  78. openocr/configs/rec/svtrs/focalnet_base_rctc.yml +109 -0
  79. openocr/configs/rec/svtrs/focalsvtr_ctc.yml +106 -0
  80. openocr/configs/rec/svtrs/focalsvtr_rctc.yml +107 -0
  81. openocr/configs/rec/svtrs/resnet45_trans_ctc.yml +103 -0
  82. openocr/configs/rec/svtrs/resnet45_trans_rctc.yml +104 -0
  83. openocr/configs/rec/svtrs/svtr_base_ctc.yml +110 -0
  84. openocr/configs/rec/svtrs/svtr_base_rctc.yml +111 -0
  85. openocr/configs/rec/svtrs/svtrnet_ctc_syn.yml +111 -0
  86. openocr/configs/rec/svtrs/vit_ctc.yml +103 -0
  87. openocr/configs/rec/svtrs/vit_rctc.yml +103 -0
  88. openocr/configs/rec/svtrv2/repsvtr_ch.yml +121 -0
  89. openocr/configs/rec/svtrv2/svtrv2_ch.yml +133 -0
  90. openocr/configs/rec/svtrv2/svtrv2_ctc.yml +136 -0
  91. openocr/configs/rec/svtrv2/svtrv2_rctc.yml +135 -0
  92. openocr/configs/rec/svtrv2/svtrv2_small_rctc.yml +135 -0
  93. openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc.yml +162 -0
  94. openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc_ch.yml +153 -0
  95. openocr/configs/rec/svtrv2/svtrv2_tiny_rctc.yml +135 -0
  96. openocr/configs/rec/visionlan/resnet45_trans_visionlan_LA.yml +103 -0
  97. openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_1.yml +102 -0
  98. openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_2.yml +103 -0
  99. openocr/configs/rec/visionlan/svtrv2_visionlan_LA.yml +112 -0
  100. openocr/configs/rec/visionlan/svtrv2_visionlan_LF_1.yml +111 -0
  101. openocr/configs/rec/visionlan/svtrv2_visionlan_LF_2.yml +112 -0
  102. openocr/demo_gradio.py +128 -0
  103. openocr/opendet/modeling/__init__.py +11 -0
  104. openocr/opendet/modeling/backbones/__init__.py +14 -0
  105. openocr/opendet/modeling/backbones/repvit.py +340 -0
  106. openocr/opendet/modeling/base_detector.py +69 -0
  107. openocr/opendet/modeling/heads/__init__.py +14 -0
  108. openocr/opendet/modeling/heads/db_head.py +73 -0
  109. openocr/opendet/modeling/necks/__init__.py +14 -0
  110. openocr/opendet/modeling/necks/db_fpn.py +609 -0
  111. openocr/opendet/postprocess/__init__.py +18 -0
  112. openocr/opendet/postprocess/db_postprocess.py +273 -0
  113. openocr/opendet/preprocess/__init__.py +154 -0
  114. openocr/opendet/preprocess/crop_resize.py +121 -0
  115. openocr/opendet/preprocess/db_resize_for_test.py +135 -0
  116. openocr/openrec/losses/__init__.py +62 -0
  117. openocr/openrec/losses/abinet_loss.py +42 -0
  118. openocr/openrec/losses/ar_loss.py +23 -0
  119. openocr/openrec/losses/cam_loss.py +48 -0
  120. openocr/openrec/losses/cdistnet_loss.py +34 -0
  121. openocr/openrec/losses/ce_loss.py +68 -0
  122. openocr/openrec/losses/cppd_loss.py +77 -0
  123. openocr/openrec/losses/ctc_loss.py +33 -0
  124. openocr/openrec/losses/igtr_loss.py +12 -0
  125. openocr/openrec/losses/lister_loss.py +14 -0
  126. openocr/openrec/losses/lpv_loss.py +30 -0
  127. openocr/openrec/losses/mgp_loss.py +34 -0
  128. openocr/openrec/losses/parseq_loss.py +12 -0
  129. openocr/openrec/losses/robustscanner_loss.py +20 -0
  130. openocr/openrec/losses/seed_loss.py +46 -0
  131. openocr/openrec/losses/smtr_loss.py +12 -0
  132. openocr/openrec/losses/srn_loss.py +40 -0
  133. openocr/openrec/losses/visionlan_loss.py +58 -0
  134. openocr/openrec/metrics/__init__.py +19 -0
  135. openocr/openrec/metrics/rec_metric.py +270 -0
  136. openocr/openrec/metrics/rec_metric_gtc.py +58 -0
  137. openocr/openrec/metrics/rec_metric_long.py +142 -0
  138. openocr/openrec/metrics/rec_metric_mgp.py +93 -0
  139. openocr/openrec/modeling/__init__.py +11 -0
  140. openocr/openrec/modeling/base_recognizer.py +69 -0
  141. openocr/openrec/modeling/common.py +238 -0
  142. openocr/openrec/modeling/decoders/__init__.py +109 -0
  143. openocr/openrec/modeling/decoders/abinet_decoder.py +283 -0
  144. openocr/openrec/modeling/decoders/aster_decoder.py +170 -0
  145. openocr/openrec/modeling/decoders/bus_decoder.py +133 -0
  146. openocr/openrec/modeling/decoders/cam_decoder.py +43 -0
  147. openocr/openrec/modeling/decoders/cdistnet_decoder.py +334 -0
  148. openocr/openrec/modeling/decoders/cppd_decoder.py +393 -0
  149. openocr/openrec/modeling/decoders/ctc_decoder.py +203 -0
  150. openocr/openrec/modeling/decoders/dan_decoder.py +203 -0
  151. openocr/openrec/modeling/decoders/igtr_decoder.py +815 -0
  152. openocr/openrec/modeling/decoders/lister_decoder.py +535 -0
  153. openocr/openrec/modeling/decoders/lpv_decoder.py +119 -0
  154. openocr/openrec/modeling/decoders/matrn_decoder.py +236 -0
  155. openocr/openrec/modeling/decoders/mgp_decoder.py +99 -0
  156. openocr/openrec/modeling/decoders/nrtr_decoder.py +439 -0
  157. openocr/openrec/modeling/decoders/ote_decoder.py +205 -0
  158. openocr/openrec/modeling/decoders/parseq_decoder.py +504 -0
  159. openocr/openrec/modeling/decoders/rctc_decoder.py +70 -0
  160. openocr/openrec/modeling/decoders/robustscanner_decoder.py +749 -0
  161. openocr/openrec/modeling/decoders/sar_decoder.py +236 -0
  162. openocr/openrec/modeling/decoders/smtr_decoder.py +621 -0
  163. openocr/openrec/modeling/decoders/smtr_decoder_nattn.py +521 -0
  164. openocr/openrec/modeling/decoders/srn_decoder.py +283 -0
  165. openocr/openrec/modeling/decoders/visionlan_decoder.py +321 -0
  166. openocr/openrec/modeling/encoders/__init__.py +39 -0
  167. openocr/openrec/modeling/encoders/autostr_encoder.py +327 -0
  168. openocr/openrec/modeling/encoders/cam_encoder.py +760 -0
  169. openocr/openrec/modeling/encoders/convnextv2.py +213 -0
  170. openocr/openrec/modeling/encoders/focalsvtr.py +631 -0
  171. openocr/openrec/modeling/encoders/nrtr_encoder.py +28 -0
  172. openocr/openrec/modeling/encoders/rec_hgnet.py +346 -0
  173. openocr/openrec/modeling/encoders/rec_lcnetv3.py +488 -0
  174. openocr/openrec/modeling/encoders/rec_mobilenet_v3.py +132 -0
  175. openocr/openrec/modeling/encoders/rec_mv1_enhance.py +254 -0
  176. openocr/openrec/modeling/encoders/rec_nrtr_mtb.py +37 -0
  177. openocr/openrec/modeling/encoders/rec_resnet_31.py +213 -0
  178. openocr/openrec/modeling/encoders/rec_resnet_45.py +183 -0
  179. openocr/openrec/modeling/encoders/rec_resnet_fpn.py +216 -0
  180. openocr/openrec/modeling/encoders/rec_resnet_vd.py +252 -0
  181. openocr/openrec/modeling/encoders/repvit.py +338 -0
  182. openocr/openrec/modeling/encoders/resnet31_rnn.py +123 -0
  183. openocr/openrec/modeling/encoders/svtrnet.py +574 -0
  184. openocr/openrec/modeling/encoders/svtrnet2dpos.py +616 -0
  185. openocr/openrec/modeling/encoders/svtrv2.py +470 -0
  186. openocr/openrec/modeling/encoders/svtrv2_lnconv.py +503 -0
  187. openocr/openrec/modeling/encoders/svtrv2_lnconv_two33.py +517 -0
  188. openocr/openrec/modeling/encoders/vit.py +120 -0
  189. openocr/openrec/modeling/transforms/__init__.py +15 -0
  190. openocr/openrec/modeling/transforms/aster_tps.py +262 -0
  191. openocr/openrec/modeling/transforms/moran.py +136 -0
  192. openocr/openrec/modeling/transforms/tps.py +246 -0
  193. openocr/openrec/optimizer/__init__.py +73 -0
  194. openocr/openrec/optimizer/lr.py +227 -0
  195. openocr/openrec/postprocess/__init__.py +72 -0
  196. openocr/openrec/postprocess/abinet_postprocess.py +37 -0
  197. openocr/openrec/postprocess/ar_postprocess.py +63 -0
  198. openocr/openrec/postprocess/ce_postprocess.py +43 -0
  199. openocr/openrec/postprocess/char_postprocess.py +108 -0
  200. openocr/openrec/postprocess/cppd_postprocess.py +42 -0
  201. openocr/openrec/postprocess/ctc_postprocess.py +119 -0
  202. openocr/openrec/postprocess/igtr_postprocess.py +100 -0
  203. openocr/openrec/postprocess/lister_postprocess.py +59 -0
  204. openocr/openrec/postprocess/mgp_postprocess.py +143 -0
  205. openocr/openrec/postprocess/nrtr_postprocess.py +75 -0
  206. openocr/openrec/postprocess/smtr_postprocess.py +73 -0
  207. openocr/openrec/postprocess/srn_postprocess.py +80 -0
  208. openocr/openrec/postprocess/visionlan_postprocess.py +81 -0
  209. openocr/openrec/preprocess/__init__.py +173 -0
  210. openocr/openrec/preprocess/abinet_aug.py +473 -0
  211. openocr/openrec/preprocess/abinet_label_encode.py +36 -0
  212. openocr/openrec/preprocess/ar_label_encode.py +36 -0
  213. openocr/openrec/preprocess/auto_augment.py +1012 -0
  214. openocr/openrec/preprocess/cam_label_encode.py +141 -0
  215. openocr/openrec/preprocess/ce_label_encode.py +116 -0
  216. openocr/openrec/preprocess/char_label_encode.py +36 -0
  217. openocr/openrec/preprocess/cppd_label_encode.py +173 -0
  218. openocr/openrec/preprocess/ctc_label_encode.py +124 -0
  219. openocr/openrec/preprocess/ep_label_encode.py +38 -0
  220. openocr/openrec/preprocess/igtr_label_encode.py +360 -0
  221. openocr/openrec/preprocess/mgp_label_encode.py +95 -0
  222. openocr/openrec/preprocess/parseq_aug.py +150 -0
  223. openocr/openrec/preprocess/rec_aug.py +211 -0
  224. openocr/openrec/preprocess/resize.py +534 -0
  225. openocr/openrec/preprocess/smtr_label_encode.py +125 -0
  226. openocr/openrec/preprocess/srn_label_encode.py +37 -0
  227. openocr/openrec/preprocess/visionlan_label_encode.py +67 -0
  228. openocr/tools/create_lmdb_dataset.py +118 -0
  229. openocr/tools/data/__init__.py +94 -0
  230. openocr/tools/data/collate_fn.py +100 -0
  231. openocr/tools/data/lmdb_dataset.py +142 -0
  232. openocr/tools/data/lmdb_dataset_test.py +166 -0
  233. openocr/tools/data/multi_scale_sampler.py +177 -0
  234. openocr/tools/data/ratio_dataset.py +217 -0
  235. openocr/tools/data/ratio_dataset_test.py +273 -0
  236. openocr/tools/data/ratio_dataset_tvresize.py +213 -0
  237. openocr/tools/data/ratio_dataset_tvresize_test.py +276 -0
  238. openocr/tools/data/ratio_sampler.py +190 -0
  239. openocr/tools/data/simple_dataset.py +263 -0
  240. openocr/tools/data/strlmdb_dataset.py +143 -0
  241. openocr/tools/engine/__init__.py +5 -0
  242. openocr/tools/engine/config.py +158 -0
  243. openocr/tools/engine/trainer.py +621 -0
  244. openocr/tools/eval_rec.py +41 -0
  245. openocr/tools/eval_rec_all_ch.py +184 -0
  246. openocr/tools/eval_rec_all_en.py +206 -0
  247. openocr/tools/eval_rec_all_long.py +119 -0
  248. openocr/tools/eval_rec_all_long_simple.py +122 -0
  249. openocr/tools/export_rec.py +118 -0
  250. openocr/tools/infer/onnx_engine.py +65 -0
  251. openocr/tools/infer/predict_rec.py +140 -0
  252. openocr/tools/infer/utility.py +234 -0
  253. openocr/tools/infer_det.py +449 -0
  254. openocr/tools/infer_e2e.py +462 -0
  255. openocr/tools/infer_e2e_parallel.py +184 -0
  256. openocr/tools/infer_rec.py +371 -0
  257. openocr/tools/train_rec.py +37 -0
  258. openocr/tools/utility.py +45 -0
  259. openocr/tools/utils/EN_symbol_dict.txt +94 -0
  260. openocr/tools/utils/__init__.py +0 -0
  261. openocr/tools/utils/ckpt.py +87 -0
  262. openocr/tools/utils/dict/ar_dict.txt +117 -0
  263. openocr/tools/utils/dict/arabic_dict.txt +161 -0
  264. openocr/tools/utils/dict/be_dict.txt +145 -0
  265. openocr/tools/utils/dict/bg_dict.txt +140 -0
  266. openocr/tools/utils/dict/chinese_cht_dict.txt +8421 -0
  267. openocr/tools/utils/dict/cyrillic_dict.txt +163 -0
  268. openocr/tools/utils/dict/devanagari_dict.txt +167 -0
  269. openocr/tools/utils/dict/en_dict.txt +63 -0
  270. openocr/tools/utils/dict/fa_dict.txt +136 -0
  271. openocr/tools/utils/dict/french_dict.txt +136 -0
  272. openocr/tools/utils/dict/german_dict.txt +143 -0
  273. openocr/tools/utils/dict/hi_dict.txt +162 -0
  274. openocr/tools/utils/dict/it_dict.txt +118 -0
  275. openocr/tools/utils/dict/japan_dict.txt +4399 -0
  276. openocr/tools/utils/dict/ka_dict.txt +153 -0
  277. openocr/tools/utils/dict/kie_dict/xfund_class_list.txt +4 -0
  278. openocr/tools/utils/dict/korean_dict.txt +3688 -0
  279. openocr/tools/utils/dict/latex_symbol_dict.txt +111 -0
  280. openocr/tools/utils/dict/latin_dict.txt +185 -0
  281. openocr/tools/utils/dict/layout_dict/layout_cdla_dict.txt +10 -0
  282. openocr/tools/utils/dict/layout_dict/layout_publaynet_dict.txt +5 -0
  283. openocr/tools/utils/dict/layout_dict/layout_table_dict.txt +1 -0
  284. openocr/tools/utils/dict/mr_dict.txt +153 -0
  285. openocr/tools/utils/dict/ne_dict.txt +153 -0
  286. openocr/tools/utils/dict/oc_dict.txt +96 -0
  287. openocr/tools/utils/dict/pu_dict.txt +130 -0
  288. openocr/tools/utils/dict/rs_dict.txt +91 -0
  289. openocr/tools/utils/dict/rsc_dict.txt +134 -0
  290. openocr/tools/utils/dict/ru_dict.txt +125 -0
  291. openocr/tools/utils/dict/spin_dict.txt +68 -0
  292. openocr/tools/utils/dict/ta_dict.txt +128 -0
  293. openocr/tools/utils/dict/table_dict.txt +277 -0
  294. openocr/tools/utils/dict/table_master_structure_dict.txt +39 -0
  295. openocr/tools/utils/dict/table_structure_dict.txt +28 -0
  296. openocr/tools/utils/dict/table_structure_dict_ch.txt +48 -0
  297. openocr/tools/utils/dict/te_dict.txt +151 -0
  298. openocr/tools/utils/dict/ug_dict.txt +114 -0
  299. openocr/tools/utils/dict/uk_dict.txt +142 -0
  300. openocr/tools/utils/dict/ur_dict.txt +137 -0
  301. openocr/tools/utils/dict/xi_dict.txt +110 -0
  302. openocr/tools/utils/dict90.txt +90 -0
  303. openocr/tools/utils/e2e_metric/Deteval.py +802 -0
  304. openocr/tools/utils/e2e_metric/polygon_fast.py +70 -0
  305. openocr/tools/utils/e2e_utils/extract_batchsize.py +86 -0
  306. openocr/tools/utils/e2e_utils/extract_textpoint_fast.py +479 -0
  307. openocr/tools/utils/e2e_utils/extract_textpoint_slow.py +582 -0
  308. openocr/tools/utils/e2e_utils/pgnet_pp_utils.py +159 -0
  309. openocr/tools/utils/e2e_utils/visual.py +152 -0
  310. openocr/tools/utils/en_dict.txt +95 -0
  311. openocr/tools/utils/gen_label.py +68 -0
  312. openocr/tools/utils/ic15_dict.txt +36 -0
  313. openocr/tools/utils/logging.py +56 -0
  314. openocr/tools/utils/poly_nms.py +132 -0
  315. openocr/tools/utils/ppocr_keys_v1.txt +6623 -0
  316. openocr/tools/utils/stats.py +58 -0
  317. openocr/tools/utils/utility.py +165 -0
  318. openocr/tools/utils/visual.py +117 -0
  319. openocr_python-0.0.2.dist-info/LICENCE +201 -0
  320. openocr_python-0.0.2.dist-info/METADATA +98 -0
  321. openocr_python-0.0.2.dist-info/RECORD +323 -0
  322. openocr_python-0.0.2.dist-info/WHEEL +5 -0
  323. openocr_python-0.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,273 @@
1
+ import numpy as np
2
+ import cv2
3
+ import torch
4
+ from shapely.geometry import Polygon
5
+ import pyclipper
6
+ """
7
+ This code is refered from:
8
+ https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py
9
+ """
10
+
11
+
12
+ class DBPostProcess(object):
13
+ """
14
+ The post process for Differentiable Binarization (DB).
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ thresh=0.3,
20
+ box_thresh=0.7,
21
+ max_candidates=1000,
22
+ unclip_ratio=2.0,
23
+ use_dilation=False,
24
+ score_mode='fast',
25
+ box_type='quad',
26
+ **kwargs,
27
+ ):
28
+ self.thresh = thresh
29
+ self.box_thresh = box_thresh
30
+ self.max_candidates = max_candidates
31
+ self.unclip_ratio = unclip_ratio
32
+ self.min_size = 3
33
+ self.score_mode = score_mode
34
+ self.box_type = box_type
35
+ assert score_mode in [
36
+ 'slow',
37
+ 'fast',
38
+ ], 'Score mode must be in [slow, fast] but got: {}'.format(score_mode)
39
+
40
+ self.dilation_kernel = None if not use_dilation else np.array([[1, 1],
41
+ [1, 1]])
42
+
43
+ def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
44
+ """
45
+ _bitmap: single map with shape (1, H, W),
46
+ whose values are binarized as {0, 1}
47
+ """
48
+
49
+ bitmap = _bitmap
50
+ height, width = bitmap.shape
51
+
52
+ boxes = []
53
+ scores = []
54
+
55
+ contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
56
+ cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
57
+
58
+ for contour in contours[:self.max_candidates]:
59
+ epsilon = 0.002 * cv2.arcLength(contour, True)
60
+ approx = cv2.approxPolyDP(contour, epsilon, True)
61
+ points = approx.reshape((-1, 2))
62
+ if points.shape[0] < 4:
63
+ continue
64
+
65
+ score = self.box_score_fast(pred, points.reshape(-1, 2))
66
+ if self.box_thresh > score:
67
+ continue
68
+
69
+ if points.shape[0] > 2:
70
+ box = self.unclip(points, self.unclip_ratio)
71
+ if len(box) > 1:
72
+ continue
73
+ else:
74
+ continue
75
+ box = np.array(box).reshape(-1, 2)
76
+ if len(box) == 0:
77
+ continue
78
+
79
+ _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
80
+ if sside < self.min_size + 2:
81
+ continue
82
+
83
+ box = np.array(box)
84
+ box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0,
85
+ dest_width)
86
+ box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0,
87
+ dest_height)
88
+ boxes.append(box.tolist())
89
+ scores.append(score)
90
+ return boxes, scores
91
+
92
+ def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
93
+ """
94
+ _bitmap: single map with shape (1, H, W),
95
+ whose values are binarized as {0, 1}
96
+ """
97
+
98
+ bitmap = _bitmap
99
+ height, width = bitmap.shape
100
+
101
+ outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
102
+ cv2.CHAIN_APPROX_SIMPLE)
103
+ if len(outs) == 3:
104
+ img, contours, _ = outs[0], outs[1], outs[2]
105
+ elif len(outs) == 2:
106
+ contours, _ = outs[0], outs[1]
107
+
108
+ num_contours = min(len(contours), self.max_candidates)
109
+
110
+ boxes = []
111
+ scores = []
112
+ for index in range(num_contours):
113
+ contour = contours[index]
114
+ points, sside = self.get_mini_boxes(contour)
115
+ if sside < self.min_size:
116
+ continue
117
+ points = np.array(points)
118
+ if self.score_mode == 'fast':
119
+ score = self.box_score_fast(pred, points.reshape(-1, 2))
120
+ else:
121
+ score = self.box_score_slow(pred, contour)
122
+ if self.box_thresh > score:
123
+ continue
124
+
125
+ box = self.unclip(points, self.unclip_ratio)
126
+ if len(box) > 1:
127
+ continue
128
+ box = np.array(box).reshape(-1, 1, 2)
129
+ box, sside = self.get_mini_boxes(box)
130
+ if sside < self.min_size + 2:
131
+ continue
132
+ box = np.array(box)
133
+
134
+ box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0,
135
+ dest_width)
136
+ box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0,
137
+ dest_height)
138
+ boxes.append(box.astype('int32'))
139
+ scores.append(score)
140
+ return np.array(boxes, dtype='int32'), scores
141
+
142
+ def unclip(self, box, unclip_ratio):
143
+ poly = Polygon(box)
144
+ distance = poly.area * unclip_ratio / poly.length
145
+ offset = pyclipper.PyclipperOffset()
146
+ offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
147
+ expanded = offset.Execute(distance)
148
+ return expanded
149
+
150
+ def get_mini_boxes(self, contour):
151
+ bounding_box = cv2.minAreaRect(contour)
152
+ points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
153
+
154
+ index_1, index_2, index_3, index_4 = 0, 1, 2, 3
155
+ if points[1][1] > points[0][1]:
156
+ index_1 = 0
157
+ index_4 = 1
158
+ else:
159
+ index_1 = 1
160
+ index_4 = 0
161
+ if points[3][1] > points[2][1]:
162
+ index_2 = 2
163
+ index_3 = 3
164
+ else:
165
+ index_2 = 3
166
+ index_3 = 2
167
+
168
+ box = [
169
+ points[index_1], points[index_2], points[index_3], points[index_4]
170
+ ]
171
+ return box, min(bounding_box[1])
172
+
173
+ def box_score_fast(self, bitmap, _box):
174
+ """
175
+ box_score_fast: use bbox mean score as the mean score
176
+ """
177
+ h, w = bitmap.shape[:2]
178
+ box = _box.copy()
179
+ xmin = np.clip(np.floor(box[:, 0].min()).astype('int32'), 0, w - 1)
180
+ xmax = np.clip(np.ceil(box[:, 0].max()).astype('int32'), 0, w - 1)
181
+ ymin = np.clip(np.floor(box[:, 1].min()).astype('int32'), 0, h - 1)
182
+ ymax = np.clip(np.ceil(box[:, 1].max()).astype('int32'), 0, h - 1)
183
+
184
+ mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
185
+ box[:, 0] = box[:, 0] - xmin
186
+ box[:, 1] = box[:, 1] - ymin
187
+ cv2.fillPoly(mask, box.reshape(1, -1, 2).astype('int32'), 1)
188
+ return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
189
+
190
+ def box_score_slow(self, bitmap, contour):
191
+ """
192
+ box_score_slow: use polyon mean score as the mean score
193
+ """
194
+ h, w = bitmap.shape[:2]
195
+ contour = contour.copy()
196
+ contour = np.reshape(contour, (-1, 2))
197
+
198
+ xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
199
+ xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
200
+ ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
201
+ ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
202
+
203
+ mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
204
+
205
+ contour[:, 0] = contour[:, 0] - xmin
206
+ contour[:, 1] = contour[:, 1] - ymin
207
+
208
+ cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype('int32'), 1)
209
+ return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
210
+
211
+ def __call__(self, outs_dict, shape_list):
212
+ pred = outs_dict['maps']
213
+ if isinstance(pred, torch.Tensor):
214
+ pred = pred.detach().cpu().numpy()
215
+ pred = pred[:, 0, :, :]
216
+ segmentation = pred > self.thresh
217
+
218
+ boxes_batch = []
219
+ for batch_index in range(pred.shape[0]):
220
+ src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
221
+ if self.dilation_kernel is not None:
222
+ mask = cv2.dilate(
223
+ np.array(segmentation[batch_index]).astype(np.uint8),
224
+ self.dilation_kernel,
225
+ )
226
+ else:
227
+ mask = segmentation[batch_index]
228
+ if self.box_type == 'poly':
229
+ boxes, scores = self.polygons_from_bitmap(
230
+ pred[batch_index], mask, src_w, src_h)
231
+ elif self.box_type == 'quad':
232
+ boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
233
+ src_w, src_h)
234
+ else:
235
+ raise ValueError(
236
+ "box_type can only be one of ['quad', 'poly']")
237
+
238
+ boxes_batch.append({'points': boxes})
239
+ return boxes_batch
240
+
241
+
242
+ class DistillationDBPostProcess(object):
243
+
244
+ def __init__(
245
+ self,
246
+ model_name=['student'],
247
+ key=None,
248
+ thresh=0.3,
249
+ box_thresh=0.6,
250
+ max_candidates=1000,
251
+ unclip_ratio=1.5,
252
+ use_dilation=False,
253
+ score_mode='fast',
254
+ box_type='quad',
255
+ **kwargs,
256
+ ):
257
+ self.model_name = model_name
258
+ self.key = key
259
+ self.post_process = DBPostProcess(
260
+ thresh=thresh,
261
+ box_thresh=box_thresh,
262
+ max_candidates=max_candidates,
263
+ unclip_ratio=unclip_ratio,
264
+ use_dilation=use_dilation,
265
+ score_mode=score_mode,
266
+ box_type=box_type,
267
+ )
268
+
269
+ def __call__(self, predicts, shape_list):
270
+ results = {}
271
+ for k in self.model_name:
272
+ results[k] = self.post_process(predicts[k], shape_list=shape_list)
273
+ return results
@@ -0,0 +1,154 @@
1
+ import io
2
+
3
+ import cv2
4
+ import numpy as np
5
+ from PIL import Image
6
+
7
+ from .db_resize_for_test import DetResizeForTest
8
+
9
+
10
+ class NormalizeImage(object):
11
+ """normalize image such as substract mean, divide std"""
12
+
13
+ def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
14
+ if isinstance(scale, str):
15
+ scale = eval(scale)
16
+ self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
17
+ mean = mean if mean is not None else [0.485, 0.456, 0.406]
18
+ std = std if std is not None else [0.229, 0.224, 0.225]
19
+
20
+ shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
21
+ self.mean = np.array(mean).reshape(shape).astype('float32')
22
+ self.std = np.array(std).reshape(shape).astype('float32')
23
+
24
+ def __call__(self, data):
25
+ img = data['image']
26
+ from PIL import Image
27
+
28
+ if isinstance(img, Image.Image):
29
+ img = np.array(img)
30
+ assert isinstance(img,
31
+ np.ndarray), "invalid input 'img' in NormalizeImage"
32
+ data['image'] = (img.astype('float32') * self.scale -
33
+ self.mean) / self.std
34
+ return data
35
+
36
+
37
+ class ToCHWImage(object):
38
+ """convert hwc image to chw image"""
39
+
40
+ def __init__(self, **kwargs):
41
+ pass
42
+
43
+ def __call__(self, data):
44
+ img = data['image']
45
+ from PIL import Image
46
+
47
+ if isinstance(img, Image.Image):
48
+ img = np.array(img)
49
+ data['image'] = img.transpose((2, 0, 1))
50
+ return data
51
+
52
+
53
+ class KeepKeys(object):
54
+
55
+ def __init__(self, keep_keys, **kwargs):
56
+ self.keep_keys = keep_keys
57
+
58
+ def __call__(self, data):
59
+ data_list = []
60
+ for key in self.keep_keys:
61
+ data_list.append(data[key])
62
+ return data_list
63
+
64
+
65
+ def transform(data, ops=None):
66
+ """transform."""
67
+ if ops is None:
68
+ ops = []
69
+ for op in ops:
70
+ data = op(data)
71
+ if data is None:
72
+ return None
73
+ return data
74
+
75
+
76
+ class DecodeImage(object):
77
+ """decode image."""
78
+
79
+ def __init__(self,
80
+ img_mode='RGB',
81
+ channel_first=False,
82
+ ignore_orientation=False,
83
+ **kwargs):
84
+ self.img_mode = img_mode
85
+ self.channel_first = channel_first
86
+ self.ignore_orientation = ignore_orientation
87
+
88
+ def __call__(self, data):
89
+ img = data['image']
90
+
91
+ assert type(img) is bytes and len(
92
+ img) > 0, "invalid input 'img' in DecodeImage"
93
+ img = np.frombuffer(img, dtype='uint8')
94
+ if self.ignore_orientation:
95
+ img = cv2.imdecode(
96
+ img, cv2.IMREAD_IGNORE_ORIENTATION | cv2.IMREAD_COLOR)
97
+ else:
98
+ img = cv2.imdecode(img, 1)
99
+ if img is None:
100
+ return None
101
+ if self.img_mode == 'GRAY':
102
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
103
+ elif self.img_mode == 'RGB':
104
+ assert img.shape[2] == 3, 'invalid shape of image[%s]' % (
105
+ img.shape)
106
+ img = img[:, :, ::-1]
107
+
108
+ if self.channel_first:
109
+ img = img.transpose((2, 0, 1))
110
+
111
+ data['image'] = img
112
+ return data
113
+
114
+
115
+ class DecodeImagePIL(object):
116
+ """decode image."""
117
+
118
+ def __init__(self, img_mode='RGB', **kwargs):
119
+ self.img_mode = img_mode
120
+
121
+ def __call__(self, data):
122
+ img = data['image']
123
+ assert type(img) is bytes and len(
124
+ img) > 0, "invalid input 'img' in DecodeImage"
125
+ img = data['image']
126
+ buf = io.BytesIO(img)
127
+ img = Image.open(buf).convert('RGB')
128
+ if self.img_mode == 'Gray':
129
+ img = img.convert('L')
130
+ elif self.img_mode == 'BGR':
131
+ img = np.array(img)[:, :, ::-1] # 将图片转为numpy格式,并将最后一维通道倒序
132
+ img = Image.fromarray(np.uint8(img))
133
+ data['image'] = img
134
+ return data
135
+
136
+
137
+ def create_operators(op_param_list, global_config=None):
138
+ """create operators based on the config.
139
+
140
+ Args:
141
+ params(list): a dict list, used to create some operators
142
+ """
143
+ assert isinstance(op_param_list, list), 'operator config should be a list'
144
+ ops = []
145
+ for operator in op_param_list:
146
+ assert isinstance(operator,
147
+ dict) and len(operator) == 1, 'yaml format error'
148
+ op_name = list(operator)[0]
149
+ param = {} if operator[op_name] is None else operator[op_name]
150
+ if global_config is not None:
151
+ param.update(global_config)
152
+ op = eval(op_name)(**param)
153
+ ops.append(op)
154
+ return ops
@@ -0,0 +1,121 @@
1
+ import cv2
2
+
3
+
4
+ def padding_image(img, size=(640, 640)):
5
+ """
6
+ Padding an image using OpenCV:
7
+ - If the image is smaller than the target size, pad it to 640x640.
8
+ - If the image is larger than the target size, split it into multiple 640x640 images and record positions.
9
+
10
+ :param image_path: Path to the input image.
11
+ :param output_dir: Directory to save the output images.
12
+ :param size: The target size for padding or splitting (default 640x640).
13
+ :return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image.
14
+ """
15
+
16
+ img_height, img_width = img.shape[:2]
17
+ target_width, target_height = size
18
+
19
+ # If image is smaller than target size, pad the image to 640x640
20
+
21
+ # Calculate padding amounts (top, bottom, left, right)
22
+ pad_top = 0
23
+ pad_bottom = target_height - img_height
24
+ pad_left = 0
25
+ pad_right = target_width - img_width
26
+
27
+ # Pad the image (white padding, border type: constant)
28
+ padded_img = cv2.copyMakeBorder(img,
29
+ pad_top,
30
+ pad_bottom,
31
+ pad_left,
32
+ pad_right,
33
+ cv2.BORDER_CONSTANT,
34
+ value=[0, 0, 0])
35
+
36
+ # Return the padded area positions (top-left and bottom-right coordinates of the original image)
37
+ return padded_img
38
+
39
+
40
+ class CropResize(object):
41
+
42
+ def __init__(self, size=(640, 640), interpolation=cv2.INTER_LINEAR):
43
+ self.size = size
44
+ self.interpolation = interpolation
45
+
46
+ def __call__(self, data):
47
+ """
48
+ Resize an image using OpenCV:
49
+ - If the image is smaller than the target size, pad it to 640x640.
50
+ - If the image is larger than the target size, split it into multiple 640x640 images and record positions.
51
+
52
+ :param image_path: Path to the input image.
53
+ :param output_dir: Directory to save the output images.
54
+ :param size: The target size for padding or splitting (default 640x640).
55
+ :return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image.
56
+ """
57
+ img = data['image']
58
+ img_height, img_width = img.shape[:2]
59
+ target_width, target_height = self.size
60
+
61
+ # If image is smaller than target size, pad the image to 640x640
62
+ if img_width <= target_width and img_height <= target_height:
63
+ # Calculate padding amounts (top, bottom, left, right)
64
+ if img_width == target_width and img_height == target_height:
65
+ return [img], [[0, 0, img_width, img_height]]
66
+ padded_img = padding_image(img, self.size)
67
+
68
+ # Return the padded area positions (top-left and bottom-right coordinates of the original image)
69
+ return [padded_img], [[0, 0, img_width, img_height]]
70
+
71
+ if img_width < target_width:
72
+ img = cv2.copyMakeBorder(img,
73
+ 0,
74
+ 0,
75
+ 0,
76
+ target_width - img_width,
77
+ cv2.BORDER_CONSTANT,
78
+ value=[0, 0, 0])
79
+
80
+ if img_height < target_height:
81
+ img = cv2.copyMakeBorder(img,
82
+ 0,
83
+ target_height - img_height,
84
+ 0,
85
+ 0,
86
+ cv2.BORDER_CONSTANT,
87
+ value=[0, 0, 0])
88
+ # raise ValueError("Image dimensions must be greater than or equal to target size")
89
+
90
+ img_height, img_width = img.shape[:2]
91
+ # If image is larger than or equal to target size, crop it into 640x640 tiles
92
+ crop_positions = []
93
+ count = 0
94
+ cropped_img_list = []
95
+ for top in range(0, img_height - target_height // 2,
96
+ target_height // 2):
97
+ for left in range(0, img_width - target_height // 2,
98
+ target_width // 2):
99
+ # Calculate the bottom and right boundaries for the crop
100
+ right = min(left + target_width, img_width)
101
+ bottom = min(top + target_height, img_height)
102
+ if right > img_width:
103
+ right = img_width
104
+ left = max(0, right - target_width)
105
+ if bottom > img_height:
106
+ bottom = img_height
107
+ top = max(0, bottom - target_height)
108
+ # Crop the image
109
+ cropped_img = img[top:bottom, left:right]
110
+ if bottom - top < target_height or right - left < target_width:
111
+ cropped_img = padding_image(cropped_img, self.size)
112
+
113
+ count += 1
114
+ cropped_img_list.append(cropped_img)
115
+
116
+ # Record the position of the cropped image
117
+ crop_positions.append([left, top, right, bottom])
118
+
119
+ # print(f"Images cropped and saved at {output_dir}.")
120
+
121
+ return cropped_img_list, crop_positions
@@ -0,0 +1,135 @@
1
+ import math
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+
6
+
7
+ class DetResizeForTest(object):
8
+
9
+ def __init__(self, **kwargs):
10
+ super(DetResizeForTest, self).__init__()
11
+ self.resize_type = 0
12
+ self.keep_ratio = False
13
+ if 'image_shape' in kwargs:
14
+ self.image_shape = kwargs['image_shape']
15
+ self.resize_type = 1
16
+ if 'keep_ratio' in kwargs:
17
+ self.keep_ratio = kwargs['keep_ratio']
18
+ elif 'limit_side_len' in kwargs:
19
+ self.limit_side_len = kwargs['limit_side_len']
20
+ self.limit_type = kwargs.get('limit_type', 'min')
21
+ elif 'resize_long' in kwargs:
22
+ self.resize_type = 2
23
+ self.resize_long = kwargs.get('resize_long', 960)
24
+ else:
25
+ self.limit_side_len = 736
26
+ self.limit_type = 'min'
27
+
28
+ def __call__(self, data):
29
+ img = data['image']
30
+ src_h, src_w, _ = img.shape
31
+ if sum([src_h, src_w]) < 64:
32
+ img = self.image_padding(img)
33
+
34
+ if self.resize_type == 0:
35
+ # img, shape = self.resize_image_type0(img)
36
+ img, [ratio_h, ratio_w] = self.resize_image_type0(img)
37
+ elif self.resize_type == 2:
38
+ img, [ratio_h, ratio_w] = self.resize_image_type2(img)
39
+ else:
40
+ # img, shape = self.resize_image_type1(img)
41
+ img, [ratio_h, ratio_w] = self.resize_image_type1(img)
42
+ data['image'] = img
43
+ data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
44
+ return data
45
+
46
+ def image_padding(self, im, value=0):
47
+ h, w, c = im.shape
48
+ im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
49
+ im_pad[:h, :w, :] = im
50
+ return im_pad
51
+
52
+ def resize_image_type1(self, img):
53
+ resize_h, resize_w = self.image_shape
54
+ ori_h, ori_w = img.shape[:2] # (h, w, c)
55
+ if self.keep_ratio is True:
56
+ resize_w = ori_w * resize_h / ori_h
57
+ N = math.ceil(resize_w / 32)
58
+ resize_w = N * 32
59
+ ratio_h = float(resize_h) / ori_h
60
+ ratio_w = float(resize_w) / ori_w
61
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
62
+ # return img, np.array([ori_h, ori_w])
63
+ return img, [ratio_h, ratio_w]
64
+
65
+ def resize_image_type0(self, img):
66
+ """
67
+ resize image to a size multiple of 32 which is required by the network
68
+ args:
69
+ img(array): array with shape [h, w, c]
70
+ return(tuple):
71
+ img, (ratio_h, ratio_w)
72
+ """
73
+ limit_side_len = self.limit_side_len
74
+ h, w, c = img.shape
75
+
76
+ # limit the max side
77
+ if self.limit_type == 'max':
78
+ if max(h, w) > limit_side_len:
79
+ if h > w:
80
+ ratio = float(limit_side_len) / h
81
+ else:
82
+ ratio = float(limit_side_len) / w
83
+ else:
84
+ ratio = 1.0
85
+ elif self.limit_type == 'min':
86
+ if min(h, w) < limit_side_len:
87
+ if h < w:
88
+ ratio = float(limit_side_len) / h
89
+ else:
90
+ ratio = float(limit_side_len) / w
91
+ else:
92
+ ratio = 1.0
93
+ elif self.limit_type == 'resize_long':
94
+ ratio = float(limit_side_len) / max(h, w)
95
+ else:
96
+ raise Exception('not support limit type, image ')
97
+ resize_h = int(h * ratio)
98
+ resize_w = int(w * ratio)
99
+
100
+ resize_h = max(int(round(resize_h / 32) * 32), 32)
101
+ resize_w = max(int(round(resize_w / 32) * 32), 32)
102
+
103
+ try:
104
+ if int(resize_w) <= 0 or int(resize_h) <= 0:
105
+ return None, (None, None)
106
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
107
+ except:
108
+ print(img.shape, resize_w, resize_h)
109
+ sys.exit(0)
110
+ ratio_h = resize_h / float(h)
111
+ ratio_w = resize_w / float(w)
112
+ return img, [ratio_h, ratio_w]
113
+
114
+ def resize_image_type2(self, img):
115
+ h, w, _ = img.shape
116
+
117
+ resize_w = w
118
+ resize_h = h
119
+
120
+ if resize_h > resize_w:
121
+ ratio = float(self.resize_long) / resize_h
122
+ else:
123
+ ratio = float(self.resize_long) / resize_w
124
+
125
+ resize_h = int(resize_h * ratio)
126
+ resize_w = int(resize_w * ratio)
127
+
128
+ max_stride = 128
129
+ resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
130
+ resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
131
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
132
+ ratio_h = resize_h / float(h)
133
+ ratio_w = resize_w / float(w)
134
+
135
+ return img, [ratio_h, ratio_w]