openocr-python 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. openocr/__init__.py +11 -0
  2. openocr/configs/det/dbnet/repvit_db.yml +173 -0
  3. openocr/configs/rec/abinet/resnet45_trans_abinet_lang.yml +94 -0
  4. openocr/configs/rec/abinet/resnet45_trans_abinet_wo_lang.yml +93 -0
  5. openocr/configs/rec/abinet/svtrv2_abinet_lang.yml +130 -0
  6. openocr/configs/rec/abinet/svtrv2_abinet_wo_lang.yml +128 -0
  7. openocr/configs/rec/aster/resnet31_lstm_aster_tps_on.yml +93 -0
  8. openocr/configs/rec/aster/svtrv2_aster.yml +127 -0
  9. openocr/configs/rec/aster/svtrv2_aster_tps_on.yml +102 -0
  10. openocr/configs/rec/autostr/autostr_lstm_aster_tps_on.yml +95 -0
  11. openocr/configs/rec/busnet/svtrv2_busnet.yml +135 -0
  12. openocr/configs/rec/busnet/svtrv2_busnet_pretraining.yml +134 -0
  13. openocr/configs/rec/busnet/vit_busnet.yml +104 -0
  14. openocr/configs/rec/busnet/vit_busnet_pretraining.yml +104 -0
  15. openocr/configs/rec/cam/convnextv2_cam_tps_on.yml +118 -0
  16. openocr/configs/rec/cam/convnextv2_tiny_cam_tps_on.yml +118 -0
  17. openocr/configs/rec/cam/svtrv2_cam_tps_on.yml +123 -0
  18. openocr/configs/rec/cdistnet/resnet45_trans_cdistnet.yml +93 -0
  19. openocr/configs/rec/cdistnet/svtrv2_cdistnet.yml +139 -0
  20. openocr/configs/rec/cppd/svtr_base_cppd.yml +123 -0
  21. openocr/configs/rec/cppd/svtr_base_cppd_ch.yml +126 -0
  22. openocr/configs/rec/cppd/svtr_base_cppd_h8.yml +123 -0
  23. openocr/configs/rec/cppd/svtr_base_cppd_syn.yml +124 -0
  24. openocr/configs/rec/cppd/svtrv2_cppd.yml +150 -0
  25. openocr/configs/rec/dan/resnet45_fpn_dan.yml +98 -0
  26. openocr/configs/rec/dan/svtrv2_dan.yml +130 -0
  27. openocr/configs/rec/focalsvtr/focalsvtr_ctc.yml +137 -0
  28. openocr/configs/rec/gtc/svtrv2_lnconv_nrtr_gtc.yml +168 -0
  29. openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_long_infer.yml +151 -0
  30. openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_smtr_long.yml +150 -0
  31. openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_stream.yml +152 -0
  32. openocr/configs/rec/igtr/svtr_base_ds_igtr.yml +157 -0
  33. openocr/configs/rec/lister/focalsvtr_lister_wo_fem_maxratio12.yml +133 -0
  34. openocr/configs/rec/lister/svtrv2_lister_wo_fem_maxratio12.yml +138 -0
  35. openocr/configs/rec/lpv/svtr_base_lpv.yml +124 -0
  36. openocr/configs/rec/lpv/svtr_base_lpv_wo_glrm.yml +123 -0
  37. openocr/configs/rec/lpv/svtrv2_lpv.yml +147 -0
  38. openocr/configs/rec/lpv/svtrv2_lpv_wo_glrm.yml +146 -0
  39. openocr/configs/rec/maerec/vit_nrtr.yml +116 -0
  40. openocr/configs/rec/matrn/resnet45_trans_matrn.yml +95 -0
  41. openocr/configs/rec/matrn/svtrv2_matrn.yml +130 -0
  42. openocr/configs/rec/mgpstr/svtrv2_mgpstr_only_char.yml +140 -0
  43. openocr/configs/rec/mgpstr/vit_base_mgpstr_only_char.yml +111 -0
  44. openocr/configs/rec/mgpstr/vit_large_mgpstr_only_char.yml +110 -0
  45. openocr/configs/rec/mgpstr/vit_mgpstr.yml +110 -0
  46. openocr/configs/rec/mgpstr/vit_mgpstr_only_char.yml +110 -0
  47. openocr/configs/rec/moran/resnet31_lstm_moran.yml +92 -0
  48. openocr/configs/rec/nrtr/focalsvtr_nrtr_maxraio12.yml +145 -0
  49. openocr/configs/rec/nrtr/nrtr.yml +107 -0
  50. openocr/configs/rec/nrtr/svtr_base_nrtr.yml +118 -0
  51. openocr/configs/rec/nrtr/svtr_base_nrtr_syn.yml +119 -0
  52. openocr/configs/rec/nrtr/svtrv2_nrtr.yml +146 -0
  53. openocr/configs/rec/ote/svtr_base_h8_ote.yml +117 -0
  54. openocr/configs/rec/ote/svtr_base_ote.yml +116 -0
  55. openocr/configs/rec/parseq/focalsvtr_parseq_maxratio12.yml +140 -0
  56. openocr/configs/rec/parseq/svrtv2_parseq.yml +136 -0
  57. openocr/configs/rec/parseq/vit_parseq.yml +100 -0
  58. openocr/configs/rec/robustscanner/resnet31_robustscanner.yml +102 -0
  59. openocr/configs/rec/robustscanner/svtrv2_robustscanner.yml +134 -0
  60. openocr/configs/rec/sar/resnet31_lstm_sar.yml +94 -0
  61. openocr/configs/rec/sar/svtrv2_sar.yml +128 -0
  62. openocr/configs/rec/seed/resnet31_lstm_seed_tps_on.yml +96 -0
  63. openocr/configs/rec/smtr/focalsvtr_smtr.yml +150 -0
  64. openocr/configs/rec/smtr/focalsvtr_smtr_long.yml +133 -0
  65. openocr/configs/rec/smtr/svtrv2_smtr.yml +150 -0
  66. openocr/configs/rec/smtr/svtrv2_smtr_bi.yml +136 -0
  67. openocr/configs/rec/srn/resnet50_fpn_srn.yml +97 -0
  68. openocr/configs/rec/srn/svtrv2_srn.yml +131 -0
  69. openocr/configs/rec/svtrs/convnextv2_ctc.yml +105 -0
  70. openocr/configs/rec/svtrs/convnextv2_h8_ctc.yml +105 -0
  71. openocr/configs/rec/svtrs/convnextv2_h8_rctc.yml +106 -0
  72. openocr/configs/rec/svtrs/convnextv2_rctc.yml +106 -0
  73. openocr/configs/rec/svtrs/convnextv2_tiny_h8_ctc.yml +105 -0
  74. openocr/configs/rec/svtrs/convnextv2_tiny_h8_rctc.yml +106 -0
  75. openocr/configs/rec/svtrs/crnn_ctc.yml +99 -0
  76. openocr/configs/rec/svtrs/crnn_ctc_long.yml +116 -0
  77. openocr/configs/rec/svtrs/focalnet_base_ctc.yml +108 -0
  78. openocr/configs/rec/svtrs/focalnet_base_rctc.yml +109 -0
  79. openocr/configs/rec/svtrs/focalsvtr_ctc.yml +106 -0
  80. openocr/configs/rec/svtrs/focalsvtr_rctc.yml +107 -0
  81. openocr/configs/rec/svtrs/resnet45_trans_ctc.yml +103 -0
  82. openocr/configs/rec/svtrs/resnet45_trans_rctc.yml +104 -0
  83. openocr/configs/rec/svtrs/svtr_base_ctc.yml +110 -0
  84. openocr/configs/rec/svtrs/svtr_base_rctc.yml +111 -0
  85. openocr/configs/rec/svtrs/svtrnet_ctc_syn.yml +111 -0
  86. openocr/configs/rec/svtrs/vit_ctc.yml +103 -0
  87. openocr/configs/rec/svtrs/vit_rctc.yml +103 -0
  88. openocr/configs/rec/svtrv2/repsvtr_ch.yml +121 -0
  89. openocr/configs/rec/svtrv2/svtrv2_ch.yml +133 -0
  90. openocr/configs/rec/svtrv2/svtrv2_ctc.yml +136 -0
  91. openocr/configs/rec/svtrv2/svtrv2_rctc.yml +135 -0
  92. openocr/configs/rec/svtrv2/svtrv2_small_rctc.yml +135 -0
  93. openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc.yml +162 -0
  94. openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc_ch.yml +153 -0
  95. openocr/configs/rec/svtrv2/svtrv2_tiny_rctc.yml +135 -0
  96. openocr/configs/rec/visionlan/resnet45_trans_visionlan_LA.yml +103 -0
  97. openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_1.yml +102 -0
  98. openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_2.yml +103 -0
  99. openocr/configs/rec/visionlan/svtrv2_visionlan_LA.yml +112 -0
  100. openocr/configs/rec/visionlan/svtrv2_visionlan_LF_1.yml +111 -0
  101. openocr/configs/rec/visionlan/svtrv2_visionlan_LF_2.yml +112 -0
  102. openocr/demo_gradio.py +128 -0
  103. openocr/opendet/modeling/__init__.py +11 -0
  104. openocr/opendet/modeling/backbones/__init__.py +14 -0
  105. openocr/opendet/modeling/backbones/repvit.py +340 -0
  106. openocr/opendet/modeling/base_detector.py +69 -0
  107. openocr/opendet/modeling/heads/__init__.py +14 -0
  108. openocr/opendet/modeling/heads/db_head.py +73 -0
  109. openocr/opendet/modeling/necks/__init__.py +14 -0
  110. openocr/opendet/modeling/necks/db_fpn.py +609 -0
  111. openocr/opendet/postprocess/__init__.py +18 -0
  112. openocr/opendet/postprocess/db_postprocess.py +273 -0
  113. openocr/opendet/preprocess/__init__.py +154 -0
  114. openocr/opendet/preprocess/crop_resize.py +121 -0
  115. openocr/opendet/preprocess/db_resize_for_test.py +135 -0
  116. openocr/openrec/losses/__init__.py +62 -0
  117. openocr/openrec/losses/abinet_loss.py +42 -0
  118. openocr/openrec/losses/ar_loss.py +23 -0
  119. openocr/openrec/losses/cam_loss.py +48 -0
  120. openocr/openrec/losses/cdistnet_loss.py +34 -0
  121. openocr/openrec/losses/ce_loss.py +68 -0
  122. openocr/openrec/losses/cppd_loss.py +77 -0
  123. openocr/openrec/losses/ctc_loss.py +33 -0
  124. openocr/openrec/losses/igtr_loss.py +12 -0
  125. openocr/openrec/losses/lister_loss.py +14 -0
  126. openocr/openrec/losses/lpv_loss.py +30 -0
  127. openocr/openrec/losses/mgp_loss.py +34 -0
  128. openocr/openrec/losses/parseq_loss.py +12 -0
  129. openocr/openrec/losses/robustscanner_loss.py +20 -0
  130. openocr/openrec/losses/seed_loss.py +46 -0
  131. openocr/openrec/losses/smtr_loss.py +12 -0
  132. openocr/openrec/losses/srn_loss.py +40 -0
  133. openocr/openrec/losses/visionlan_loss.py +58 -0
  134. openocr/openrec/metrics/__init__.py +19 -0
  135. openocr/openrec/metrics/rec_metric.py +270 -0
  136. openocr/openrec/metrics/rec_metric_gtc.py +58 -0
  137. openocr/openrec/metrics/rec_metric_long.py +142 -0
  138. openocr/openrec/metrics/rec_metric_mgp.py +93 -0
  139. openocr/openrec/modeling/__init__.py +11 -0
  140. openocr/openrec/modeling/base_recognizer.py +69 -0
  141. openocr/openrec/modeling/common.py +238 -0
  142. openocr/openrec/modeling/decoders/__init__.py +109 -0
  143. openocr/openrec/modeling/decoders/abinet_decoder.py +283 -0
  144. openocr/openrec/modeling/decoders/aster_decoder.py +170 -0
  145. openocr/openrec/modeling/decoders/bus_decoder.py +133 -0
  146. openocr/openrec/modeling/decoders/cam_decoder.py +43 -0
  147. openocr/openrec/modeling/decoders/cdistnet_decoder.py +334 -0
  148. openocr/openrec/modeling/decoders/cppd_decoder.py +393 -0
  149. openocr/openrec/modeling/decoders/ctc_decoder.py +203 -0
  150. openocr/openrec/modeling/decoders/dan_decoder.py +203 -0
  151. openocr/openrec/modeling/decoders/igtr_decoder.py +815 -0
  152. openocr/openrec/modeling/decoders/lister_decoder.py +535 -0
  153. openocr/openrec/modeling/decoders/lpv_decoder.py +119 -0
  154. openocr/openrec/modeling/decoders/matrn_decoder.py +236 -0
  155. openocr/openrec/modeling/decoders/mgp_decoder.py +99 -0
  156. openocr/openrec/modeling/decoders/nrtr_decoder.py +439 -0
  157. openocr/openrec/modeling/decoders/ote_decoder.py +205 -0
  158. openocr/openrec/modeling/decoders/parseq_decoder.py +504 -0
  159. openocr/openrec/modeling/decoders/rctc_decoder.py +70 -0
  160. openocr/openrec/modeling/decoders/robustscanner_decoder.py +749 -0
  161. openocr/openrec/modeling/decoders/sar_decoder.py +236 -0
  162. openocr/openrec/modeling/decoders/smtr_decoder.py +621 -0
  163. openocr/openrec/modeling/decoders/smtr_decoder_nattn.py +521 -0
  164. openocr/openrec/modeling/decoders/srn_decoder.py +283 -0
  165. openocr/openrec/modeling/decoders/visionlan_decoder.py +321 -0
  166. openocr/openrec/modeling/encoders/__init__.py +39 -0
  167. openocr/openrec/modeling/encoders/autostr_encoder.py +327 -0
  168. openocr/openrec/modeling/encoders/cam_encoder.py +760 -0
  169. openocr/openrec/modeling/encoders/convnextv2.py +213 -0
  170. openocr/openrec/modeling/encoders/focalsvtr.py +631 -0
  171. openocr/openrec/modeling/encoders/nrtr_encoder.py +28 -0
  172. openocr/openrec/modeling/encoders/rec_hgnet.py +346 -0
  173. openocr/openrec/modeling/encoders/rec_lcnetv3.py +488 -0
  174. openocr/openrec/modeling/encoders/rec_mobilenet_v3.py +132 -0
  175. openocr/openrec/modeling/encoders/rec_mv1_enhance.py +254 -0
  176. openocr/openrec/modeling/encoders/rec_nrtr_mtb.py +37 -0
  177. openocr/openrec/modeling/encoders/rec_resnet_31.py +213 -0
  178. openocr/openrec/modeling/encoders/rec_resnet_45.py +183 -0
  179. openocr/openrec/modeling/encoders/rec_resnet_fpn.py +216 -0
  180. openocr/openrec/modeling/encoders/rec_resnet_vd.py +252 -0
  181. openocr/openrec/modeling/encoders/repvit.py +338 -0
  182. openocr/openrec/modeling/encoders/resnet31_rnn.py +123 -0
  183. openocr/openrec/modeling/encoders/svtrnet.py +574 -0
  184. openocr/openrec/modeling/encoders/svtrnet2dpos.py +616 -0
  185. openocr/openrec/modeling/encoders/svtrv2.py +470 -0
  186. openocr/openrec/modeling/encoders/svtrv2_lnconv.py +503 -0
  187. openocr/openrec/modeling/encoders/svtrv2_lnconv_two33.py +517 -0
  188. openocr/openrec/modeling/encoders/vit.py +120 -0
  189. openocr/openrec/modeling/transforms/__init__.py +15 -0
  190. openocr/openrec/modeling/transforms/aster_tps.py +262 -0
  191. openocr/openrec/modeling/transforms/moran.py +136 -0
  192. openocr/openrec/modeling/transforms/tps.py +246 -0
  193. openocr/openrec/optimizer/__init__.py +73 -0
  194. openocr/openrec/optimizer/lr.py +227 -0
  195. openocr/openrec/postprocess/__init__.py +72 -0
  196. openocr/openrec/postprocess/abinet_postprocess.py +37 -0
  197. openocr/openrec/postprocess/ar_postprocess.py +63 -0
  198. openocr/openrec/postprocess/ce_postprocess.py +43 -0
  199. openocr/openrec/postprocess/char_postprocess.py +108 -0
  200. openocr/openrec/postprocess/cppd_postprocess.py +42 -0
  201. openocr/openrec/postprocess/ctc_postprocess.py +119 -0
  202. openocr/openrec/postprocess/igtr_postprocess.py +100 -0
  203. openocr/openrec/postprocess/lister_postprocess.py +59 -0
  204. openocr/openrec/postprocess/mgp_postprocess.py +143 -0
  205. openocr/openrec/postprocess/nrtr_postprocess.py +75 -0
  206. openocr/openrec/postprocess/smtr_postprocess.py +73 -0
  207. openocr/openrec/postprocess/srn_postprocess.py +80 -0
  208. openocr/openrec/postprocess/visionlan_postprocess.py +81 -0
  209. openocr/openrec/preprocess/__init__.py +173 -0
  210. openocr/openrec/preprocess/abinet_aug.py +473 -0
  211. openocr/openrec/preprocess/abinet_label_encode.py +36 -0
  212. openocr/openrec/preprocess/ar_label_encode.py +36 -0
  213. openocr/openrec/preprocess/auto_augment.py +1012 -0
  214. openocr/openrec/preprocess/cam_label_encode.py +141 -0
  215. openocr/openrec/preprocess/ce_label_encode.py +116 -0
  216. openocr/openrec/preprocess/char_label_encode.py +36 -0
  217. openocr/openrec/preprocess/cppd_label_encode.py +173 -0
  218. openocr/openrec/preprocess/ctc_label_encode.py +124 -0
  219. openocr/openrec/preprocess/ep_label_encode.py +38 -0
  220. openocr/openrec/preprocess/igtr_label_encode.py +360 -0
  221. openocr/openrec/preprocess/mgp_label_encode.py +95 -0
  222. openocr/openrec/preprocess/parseq_aug.py +150 -0
  223. openocr/openrec/preprocess/rec_aug.py +211 -0
  224. openocr/openrec/preprocess/resize.py +534 -0
  225. openocr/openrec/preprocess/smtr_label_encode.py +125 -0
  226. openocr/openrec/preprocess/srn_label_encode.py +37 -0
  227. openocr/openrec/preprocess/visionlan_label_encode.py +67 -0
  228. openocr/tools/create_lmdb_dataset.py +118 -0
  229. openocr/tools/data/__init__.py +94 -0
  230. openocr/tools/data/collate_fn.py +100 -0
  231. openocr/tools/data/lmdb_dataset.py +142 -0
  232. openocr/tools/data/lmdb_dataset_test.py +166 -0
  233. openocr/tools/data/multi_scale_sampler.py +177 -0
  234. openocr/tools/data/ratio_dataset.py +217 -0
  235. openocr/tools/data/ratio_dataset_test.py +273 -0
  236. openocr/tools/data/ratio_dataset_tvresize.py +213 -0
  237. openocr/tools/data/ratio_dataset_tvresize_test.py +276 -0
  238. openocr/tools/data/ratio_sampler.py +190 -0
  239. openocr/tools/data/simple_dataset.py +263 -0
  240. openocr/tools/data/strlmdb_dataset.py +143 -0
  241. openocr/tools/engine/__init__.py +5 -0
  242. openocr/tools/engine/config.py +158 -0
  243. openocr/tools/engine/trainer.py +621 -0
  244. openocr/tools/eval_rec.py +41 -0
  245. openocr/tools/eval_rec_all_ch.py +184 -0
  246. openocr/tools/eval_rec_all_en.py +206 -0
  247. openocr/tools/eval_rec_all_long.py +119 -0
  248. openocr/tools/eval_rec_all_long_simple.py +122 -0
  249. openocr/tools/export_rec.py +118 -0
  250. openocr/tools/infer/onnx_engine.py +65 -0
  251. openocr/tools/infer/predict_rec.py +140 -0
  252. openocr/tools/infer/utility.py +234 -0
  253. openocr/tools/infer_det.py +449 -0
  254. openocr/tools/infer_e2e.py +462 -0
  255. openocr/tools/infer_e2e_parallel.py +184 -0
  256. openocr/tools/infer_rec.py +371 -0
  257. openocr/tools/train_rec.py +37 -0
  258. openocr/tools/utility.py +45 -0
  259. openocr/tools/utils/EN_symbol_dict.txt +94 -0
  260. openocr/tools/utils/__init__.py +0 -0
  261. openocr/tools/utils/ckpt.py +87 -0
  262. openocr/tools/utils/dict/ar_dict.txt +117 -0
  263. openocr/tools/utils/dict/arabic_dict.txt +161 -0
  264. openocr/tools/utils/dict/be_dict.txt +145 -0
  265. openocr/tools/utils/dict/bg_dict.txt +140 -0
  266. openocr/tools/utils/dict/chinese_cht_dict.txt +8421 -0
  267. openocr/tools/utils/dict/cyrillic_dict.txt +163 -0
  268. openocr/tools/utils/dict/devanagari_dict.txt +167 -0
  269. openocr/tools/utils/dict/en_dict.txt +63 -0
  270. openocr/tools/utils/dict/fa_dict.txt +136 -0
  271. openocr/tools/utils/dict/french_dict.txt +136 -0
  272. openocr/tools/utils/dict/german_dict.txt +143 -0
  273. openocr/tools/utils/dict/hi_dict.txt +162 -0
  274. openocr/tools/utils/dict/it_dict.txt +118 -0
  275. openocr/tools/utils/dict/japan_dict.txt +4399 -0
  276. openocr/tools/utils/dict/ka_dict.txt +153 -0
  277. openocr/tools/utils/dict/kie_dict/xfund_class_list.txt +4 -0
  278. openocr/tools/utils/dict/korean_dict.txt +3688 -0
  279. openocr/tools/utils/dict/latex_symbol_dict.txt +111 -0
  280. openocr/tools/utils/dict/latin_dict.txt +185 -0
  281. openocr/tools/utils/dict/layout_dict/layout_cdla_dict.txt +10 -0
  282. openocr/tools/utils/dict/layout_dict/layout_publaynet_dict.txt +5 -0
  283. openocr/tools/utils/dict/layout_dict/layout_table_dict.txt +1 -0
  284. openocr/tools/utils/dict/mr_dict.txt +153 -0
  285. openocr/tools/utils/dict/ne_dict.txt +153 -0
  286. openocr/tools/utils/dict/oc_dict.txt +96 -0
  287. openocr/tools/utils/dict/pu_dict.txt +130 -0
  288. openocr/tools/utils/dict/rs_dict.txt +91 -0
  289. openocr/tools/utils/dict/rsc_dict.txt +134 -0
  290. openocr/tools/utils/dict/ru_dict.txt +125 -0
  291. openocr/tools/utils/dict/spin_dict.txt +68 -0
  292. openocr/tools/utils/dict/ta_dict.txt +128 -0
  293. openocr/tools/utils/dict/table_dict.txt +277 -0
  294. openocr/tools/utils/dict/table_master_structure_dict.txt +39 -0
  295. openocr/tools/utils/dict/table_structure_dict.txt +28 -0
  296. openocr/tools/utils/dict/table_structure_dict_ch.txt +48 -0
  297. openocr/tools/utils/dict/te_dict.txt +151 -0
  298. openocr/tools/utils/dict/ug_dict.txt +114 -0
  299. openocr/tools/utils/dict/uk_dict.txt +142 -0
  300. openocr/tools/utils/dict/ur_dict.txt +137 -0
  301. openocr/tools/utils/dict/xi_dict.txt +110 -0
  302. openocr/tools/utils/dict90.txt +90 -0
  303. openocr/tools/utils/e2e_metric/Deteval.py +802 -0
  304. openocr/tools/utils/e2e_metric/polygon_fast.py +70 -0
  305. openocr/tools/utils/e2e_utils/extract_batchsize.py +86 -0
  306. openocr/tools/utils/e2e_utils/extract_textpoint_fast.py +479 -0
  307. openocr/tools/utils/e2e_utils/extract_textpoint_slow.py +582 -0
  308. openocr/tools/utils/e2e_utils/pgnet_pp_utils.py +159 -0
  309. openocr/tools/utils/e2e_utils/visual.py +152 -0
  310. openocr/tools/utils/en_dict.txt +95 -0
  311. openocr/tools/utils/gen_label.py +68 -0
  312. openocr/tools/utils/ic15_dict.txt +36 -0
  313. openocr/tools/utils/logging.py +56 -0
  314. openocr/tools/utils/poly_nms.py +132 -0
  315. openocr/tools/utils/ppocr_keys_v1.txt +6623 -0
  316. openocr/tools/utils/stats.py +58 -0
  317. openocr/tools/utils/utility.py +165 -0
  318. openocr/tools/utils/visual.py +117 -0
  319. openocr_python-0.0.2.dist-info/LICENCE +201 -0
  320. openocr_python-0.0.2.dist-info/METADATA +98 -0
  321. openocr_python-0.0.2.dist-info/RECORD +323 -0
  322. openocr_python-0.0.2.dist-info/WHEEL +5 -0
  323. openocr_python-0.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,150 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 20
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/u14m_filter/svtrv2_cppd/
7
+ save_epoch_step: 1
8
+ # evaluation is run every 2000 iterations
9
+ eval_batch_step: [0, 500]
10
+ eval_epoch_step: [0, 1]
11
+ cal_metric_during_train: True
12
+ pretrained_model:
13
+ checkpoints:
14
+ use_tensorboard: false
15
+ infer_img:
16
+ # for data or label process
17
+ character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
18
+ # ./tools/utils/ppocr_keys_v1.txt # ch
19
+ max_text_length: &max_text_length 25
20
+ use_space_char: &use_space_char False
21
+ save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_cppd.txt
22
+ use_amp: True
23
+
24
+ Optimizer:
25
+ name: AdamW
26
+ lr: 0.00065 # for 4gpus bs256/gpu
27
+ weight_decay: 0.05
28
+ filter_bias_and_bn: True
29
+
30
+ LRScheduler:
31
+ name: OneCycleLR
32
+ warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
33
+ cycle_momentum: False
34
+
35
+ Architecture:
36
+ model_type: rec
37
+ algorithm: CPPD
38
+ in_channels: 3
39
+ Transform:
40
+ Encoder:
41
+ name: SVTRv2LNConvTwo33
42
+ use_pos_embed: False
43
+ out_channels: 256
44
+ dims: [128, 256, 384]
45
+ depths: [6, 6, 6]
46
+ num_heads: [4, 8, 12]
47
+ mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
48
+ local_k: [[5, 5], [5, 5], [-1, -1]]
49
+ sub_k: [[1, 1], [2, 1], [-1, -1]]
50
+ last_stage: false
51
+ feat2d: False
52
+ Decoder:
53
+ name: CPPDDecoder
54
+ ds: True
55
+ num_layer: 2
56
+ pos_len: False
57
+ rec_layer: 1
58
+
59
+
60
+ Loss:
61
+ name: CPPDLoss
62
+ ignore_index: 100
63
+ smoothing: True
64
+ pos_len: False
65
+ sideloss_weight: 1.0
66
+
67
+ PostProcess:
68
+ name: CPPDLabelDecode
69
+ character_dict_path: *character_dict_path
70
+ use_space_char: *use_space_char
71
+
72
+ Metric:
73
+ name: RecMetric
74
+ main_indicator: acc
75
+ is_filter: True
76
+
77
+ Train:
78
+ dataset:
79
+ name: RatioDataSetTVResize
80
+ ds_width: True
81
+ padding: false
82
+ data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
83
+ '../Union14M-L-LMDB-Filtered/filter_train_hard',
84
+ '../Union14M-L-LMDB-Filtered/filter_train_medium',
85
+ '../Union14M-L-LMDB-Filtered/filter_train_normal',
86
+ '../Union14M-L-LMDB-Filtered/filter_train_easy',
87
+ ]
88
+ transforms:
89
+ - DecodeImagePIL: # load image
90
+ img_mode: RGB
91
+ - PARSeqAugPIL:
92
+ - CPPDLabelEncode: # Class handling label
93
+ pos_len: False
94
+ character_dict_path: *character_dict_path
95
+ use_space_char: *use_space_char
96
+ max_text_length: *max_text_length
97
+ - KeepKeys:
98
+ keep_keys: ['image', 'label', 'label_node', 'length'] # dataloader will return list in this order
99
+ sampler:
100
+ name: RatioSampler
101
+ scales: [[128, 32]] # w, h
102
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
103
+ first_bs: &bs 256
104
+ fix_bs: false
105
+ divided_factor: [4, 16] # w, h
106
+ is_training: True
107
+ loader:
108
+ shuffle: True
109
+ batch_size_per_card: *bs
110
+ drop_last: True
111
+ max_ratio: &max_ratio 4
112
+ num_workers: 4
113
+
114
+ Eval:
115
+ dataset:
116
+ name: RatioDataSetTVResize
117
+ ds_width: True
118
+ padding: False
119
+ data_dir_list: [
120
+ '../evaluation/CUTE80',
121
+ '../evaluation/IC13_857',
122
+ '../evaluation/IC15_1811',
123
+ '../evaluation/IIIT5k',
124
+ '../evaluation/SVT',
125
+ '../evaluation/SVTP',
126
+ ]
127
+ transforms:
128
+ - DecodeImagePIL: # load image
129
+ img_mode: RGB
130
+ - CPPDLabelEncode: # Class handling label
131
+ pos_len: False
132
+ character_dict_path: *character_dict_path
133
+ use_space_char: *use_space_char
134
+ max_text_length: *max_text_length
135
+ - KeepKeys:
136
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
137
+ sampler:
138
+ name: RatioSampler
139
+ scales: [[128, 32]] # w, h
140
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
141
+ first_bs: *bs
142
+ fix_bs: false
143
+ divided_factor: [4, 16] # w, h
144
+ is_training: False
145
+ loader:
146
+ shuffle: False
147
+ drop_last: False
148
+ batch_size_per_card: *bs
149
+ max_ratio: *max_ratio
150
+ num_workers: 4
@@ -0,0 +1,98 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 20
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/u14m_filter/resnet45_fpn_dan/
7
+ eval_epoch_step: [0, 1]
8
+ eval_batch_step: [0, 500]
9
+ cal_metric_during_train: True
10
+ pretrained_model:
11
+ checkpoints:
12
+ use_tensorboard: false
13
+ infer_img:
14
+ # for data or label process
15
+ character_dict_path: ./tools/utils/EN_symbol_dict.txt
16
+ max_text_length: 25
17
+ use_space_char: False
18
+ save_res_path: ./output/rec/u14m_filter/predicts_resnet45_fpn_dan.txt
19
+ use_amp: True
20
+ grad_clip_val: 20
21
+
22
+ Optimizer:
23
+ name: Adam
24
+ lr: 0.00065 # for 4gpus bs256/gpu
25
+ weight_decay: 0.0
26
+ filter_bias_and_bn: False
27
+
28
+ LRScheduler:
29
+ name: OneCycleLR
30
+ warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
31
+ cycle_momentum: False
32
+
33
+ Architecture:
34
+ model_type: rec
35
+ algorithm: DAN
36
+ Transform:
37
+ Encoder:
38
+ name: ResNet45
39
+ in_channels: 3
40
+ strides: [2, 1, 2, 1, 1]
41
+ return_list: True
42
+ Decoder:
43
+ name: DANDecoder
44
+ max_len: 25
45
+ channels_list: [64, 128, 256, 512]
46
+ strides_list: [[2, 2], [1, 1], [1, 1]]
47
+ in_shape: [8, 32]
48
+ depth: 4
49
+
50
+ Loss:
51
+ name: ARLoss
52
+
53
+ PostProcess:
54
+ name: ARLabelDecode
55
+
56
+ Metric:
57
+ name: RecMetric
58
+ main_indicator: acc
59
+ is_filter: True
60
+
61
+ Train:
62
+ dataset:
63
+ name: LMDBDataSet
64
+ data_dir: ../Union14M-L-LMDB-Filtered
65
+ transforms:
66
+ - DecodeImagePIL: # load image
67
+ img_mode: RGB
68
+ - PARSeqAugPIL:
69
+ - ARLabelEncode:
70
+ - RecTVResize:
71
+ image_shape: [32, 128]
72
+ padding: False
73
+ - KeepKeys:
74
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
75
+ loader:
76
+ shuffle: True
77
+ batch_size_per_card: 256
78
+ drop_last: True
79
+ num_workers: 4
80
+
81
+ Eval:
82
+ dataset:
83
+ name: LMDBDataSet
84
+ data_dir: ../evaluation
85
+ transforms:
86
+ - DecodeImagePIL: # load image
87
+ img_mode: RGB
88
+ - ARLabelEncode:
89
+ - RecTVResize:
90
+ image_shape: [32, 128]
91
+ padding: False
92
+ - KeepKeys:
93
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
94
+ loader:
95
+ shuffle: False
96
+ drop_last: False
97
+ batch_size_per_card: 256
98
+ num_workers: 2
@@ -0,0 +1,130 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 20
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/u14m_filter/svtrv2_dan
7
+ eval_epoch_step: [0, 1]
8
+ eval_batch_step: [0, 500]
9
+ cal_metric_during_train: True
10
+ pretrained_model:
11
+ checkpoints:
12
+ use_tensorboard: false
13
+ infer_img:
14
+ # for data or label process
15
+ character_dict_path: ./tools/utils/EN_symbol_dict.txt
16
+ max_text_length: 25
17
+ use_space_char: False
18
+ save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_dan.txt
19
+ use_amp: True
20
+ grad_clip_val: 20
21
+
22
+ Optimizer:
23
+ name: AdamW
24
+ lr: 0.00065 # 4gpus 256bs/gpu
25
+ weight_decay: 0.05
26
+ filter_bias_and_bn: True
27
+
28
+ LRScheduler:
29
+ name: OneCycleLR
30
+ warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
31
+ cycle_momentum: False
32
+
33
+ Architecture:
34
+ model_type: rec
35
+ algorithm: DAN
36
+ Transform:
37
+ Encoder:
38
+ name: SVTRv2LNConvTwo33
39
+ use_pos_embed: False
40
+ out_channels: 256
41
+ dims: [128, 256, 384]
42
+ depths: [6, 6, 6]
43
+ num_heads: [4, 8, 12]
44
+ mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
45
+ local_k: [[5, 5], [5, 5], [-1, -1]]
46
+ sub_k: [[1, 1], [2, 1], [-1, -1]]
47
+ last_stage: false
48
+ feat2d: True
49
+ Decoder:
50
+ name: DANDecoder
51
+ use_cam: False
52
+ max_len: 25
53
+
54
+ Loss:
55
+ name: ARLoss
56
+
57
+ PostProcess:
58
+ name: ARLabelDecode
59
+
60
+ Metric:
61
+ name: RecMetric
62
+ main_indicator: acc
63
+ is_filter: True
64
+
65
+ Train:
66
+ dataset:
67
+ name: RatioDataSetTVResize
68
+ ds_width: True
69
+ padding: false
70
+ data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_filter_train_challenging',
71
+ '../Union14M-L-LMDB-Filtered/filter_filter_train_hard',
72
+ '../Union14M-L-LMDB-Filtered/filter_filter_train_medium',
73
+ '../Union14M-L-LMDB-Filtered/filter_filter_train_normal',
74
+ '../Union14M-L-LMDB-Filtered/filter_filter_train_easy',
75
+ ]
76
+ transforms:
77
+ - DecodeImagePIL: # load image
78
+ img_mode: RGB
79
+ - PARSeqAugPIL:
80
+ - ARLabelEncode:
81
+ - KeepKeys:
82
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
83
+ sampler:
84
+ name: RatioSampler
85
+ scales: [[128, 32]] # w, h
86
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
87
+ first_bs: &bs 256
88
+ fix_bs: false
89
+ divided_factor: [4, 16] # w, h
90
+ is_training: True
91
+ loader:
92
+ shuffle: True
93
+ batch_size_per_card: *bs
94
+ drop_last: True
95
+ max_ratio: &max_ratio 4
96
+ num_workers: 4
97
+
98
+ Eval:
99
+ dataset:
100
+ name: RatioDataSetTVResize
101
+ ds_width: True
102
+ padding: False
103
+ data_dir_list: [
104
+ '../evaluation/CUTE80',
105
+ '../evaluation/IC13_857',
106
+ '../evaluation/IC15_1811',
107
+ '../evaluation/IIIT5k',
108
+ '../evaluation/SVT',
109
+ '../evaluation/SVTP',
110
+ ]
111
+ transforms:
112
+ - DecodeImagePIL: # load image
113
+ img_mode: RGB
114
+ - ARLabelEncode:
115
+ - KeepKeys:
116
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
117
+ sampler:
118
+ name: RatioSampler
119
+ scales: [[128, 32]] # w, h
120
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
121
+ first_bs: *bs
122
+ fix_bs: false
123
+ divided_factor: [4, 16] # w, h
124
+ is_training: False
125
+ loader:
126
+ shuffle: False
127
+ drop_last: False
128
+ batch_size_per_card: *bs
129
+ max_ratio: *max_ratio
130
+ num_workers: 4
@@ -0,0 +1,137 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 20
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/u14m_filter/focalsvtr_ctc/
7
+ eval_epoch_step: [0, 1]
8
+ eval_batch_step: [0, 500]
9
+ cal_metric_during_train: True
10
+ pretrained_model:
11
+ checkpoints:
12
+ use_tensorboard: false
13
+ infer_img:
14
+ # for data or label process
15
+ character_dict_path: &character_dict_path
16
+ # ./tools/utils/EN_symbol_dict.txt
17
+ max_text_length: &max_text_length 25
18
+ use_space_char: &use_space_char False
19
+ save_res_path: ./output/rec/u14m_filter/predicts_focalsvtr_ctc.txt
20
+
21
+
22
+ Optimizer:
23
+ name: AdamW
24
+ lr: 0.00065 # for 4gpus bs256/gpu
25
+ weight_decay: 0.05
26
+ filter_bias_and_bn: True
27
+
28
+ LRScheduler:
29
+ name: OneCycleLR
30
+
31
+ warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
32
+ cycle_momentum: False
33
+
34
+ Architecture:
35
+ model_type: rec
36
+ algorithm: SVTR
37
+ Transform:
38
+ Encoder:
39
+ name: FocalSVTR
40
+ img_size: [32, 128]
41
+ depths: [6, 6, 6]
42
+ embed_dim: 96
43
+ sub_k: [[1, 1], [2, 1], [1, 1]]
44
+ focal_levels: [3, 3, 3]
45
+ out_channels: 256
46
+ last_stage: True
47
+ Decoder:
48
+ name: CTCDecoder
49
+
50
+ Loss:
51
+ name: CTCLoss
52
+ zero_infinity: True
53
+
54
+ PostProcess:
55
+ name: CTCLabelDecode
56
+ character_dict_path: *character_dict_path
57
+ use_space_char: *use_space_char
58
+
59
+ Metric:
60
+ name: RecMetric
61
+ main_indicator: acc
62
+ is_filter: True
63
+
64
+
65
+ Train:
66
+ dataset:
67
+ name: RatioDataSet
68
+ ds_width: True
69
+ padding: &padding False
70
+ data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
71
+ '../Union14M-L-LMDB-Filtered/filter_train_hard',
72
+ '../Union14M-L-LMDB-Filtered/filter_train_medium',
73
+ '../Union14M-L-LMDB-Filtered/filter_train_normal',
74
+ '../Union14M-L-LMDB-Filtered/filter_train_easy',
75
+ ]
76
+ transforms:
77
+ - DecodeImage: # load image
78
+ img_mode: BGR
79
+ channel_first: False
80
+ - PARSeqAug:
81
+ - CTCLabelEncode: # Class handling label
82
+ character_dict_path: *character_dict_path
83
+ use_space_char: *use_space_char
84
+ max_text_length: *max_text_length
85
+ - KeepKeys:
86
+ keep_keys: ['image', 'label', 'length']
87
+ sampler:
88
+ name: RatioSampler
89
+ scales: [[128, 32]] # w, h
90
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
91
+ first_bs: &bs 256
92
+ fix_bs: false
93
+ divided_factor: [4, 16] # w, h
94
+ is_training: True
95
+ loader:
96
+ shuffle: True
97
+ batch_size_per_card: *bs
98
+ drop_last: True
99
+ max_ratio: 12
100
+ num_workers: 4
101
+
102
+ Eval:
103
+ dataset:
104
+ name: RatioDataSet
105
+ ds_width: True
106
+ padding: True
107
+ data_dir_list: ['../evaluation/CUTE80',
108
+ '../evaluation/IC13_857',
109
+ '../evaluation/IC15_1811',
110
+ '../evaluation/IIIT5k',
111
+ '../evaluation/SVT',
112
+ '../evaluation/SVTP',
113
+ ]
114
+ transforms:
115
+ - DecodeImage: # load image
116
+ img_mode: BGR
117
+ channel_first: False
118
+ - CTCLabelEncode: # Class handling label
119
+ character_dict_path: *character_dict_path
120
+ use_space_char: *use_space_char
121
+ max_text_length: *max_text_length
122
+ - KeepKeys:
123
+ keep_keys: ['image', 'label', 'length']
124
+ sampler:
125
+ name: RatioSampler
126
+ scales: [[128, 32]] # w, h
127
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
128
+ first_bs: 128
129
+ fix_bs: false
130
+ divided_factor: [4, 16] # w, h
131
+ is_training: False
132
+ loader:
133
+ shuffle: False
134
+ drop_last: False
135
+ batch_size_per_card: 128
136
+ max_ratio: 12
137
+ num_workers: 4
@@ -0,0 +1,168 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 20
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/svtrv2_lnconv_nrtr_gtc
7
+ save_epoch_step: 1
8
+ # evaluation is run every 2000 iterations
9
+ eval_batch_step: [0, 500]
10
+ eval_epoch_step: [0, 1]
11
+ cal_metric_during_train: True
12
+ pretrained_model:
13
+ checkpoints:
14
+ use_tensorboard: false
15
+ infer_img: ../ltb/img
16
+ # for data or label process
17
+ character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
18
+ # ./tools/utils/ppocr_keys_v1.txt # ch
19
+ max_text_length: &max_text_length 25
20
+ use_space_char: &use_space_char False
21
+ save_res_path: ./output/rec/predicts_smtr.txt
22
+ use_amp: True
23
+ distributed: true
24
+
25
+ Optimizer:
26
+ name: AdamW
27
+ lr: 0.00065
28
+ weight_decay: 0.05
29
+ filter_bias_and_bn: True
30
+
31
+ LRScheduler:
32
+ name: OneCycleLR
33
+ warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
34
+ cycle_momentum: False
35
+
36
+ Architecture:
37
+ model_type: rec
38
+ algorithm: BGPD
39
+ in_channels: 3
40
+ Transform:
41
+ Encoder:
42
+ name: SVTRv2LNConvTwo33
43
+ use_pos_embed: False
44
+ out_channels: 256
45
+ dims: [128, 256, 384]
46
+ depths: [6, 6, 6]
47
+ num_heads: [4, 8, 12]
48
+ mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
49
+ local_k: [[5, 5], [5, 5], [-1, -1]]
50
+ sub_k: [[1, 1], [2, 1], [-1, -1]]
51
+ last_stage: false
52
+ feat2d: True
53
+ Decoder:
54
+ name: GTCDecoder
55
+ infer_gtc: True
56
+ detach: False
57
+ gtc_decoder:
58
+ name: NRTRDecoder
59
+ num_encoder_layers: -1
60
+ beam_size: 0
61
+ num_decoder_layers: 2
62
+ nhead: 12
63
+ max_len: *max_text_length
64
+ ctc_decoder:
65
+ name: RCTCDecoder
66
+
67
+ Loss:
68
+ name: GTCLoss
69
+ gtc_loss:
70
+ name: ARLoss
71
+
72
+ PostProcess:
73
+ name: GTCLabelDecode
74
+ gtc_label_decode:
75
+ name: ARLabelDecode
76
+ character_dict_path: *character_dict_path
77
+ use_space_char: *use_space_char
78
+
79
+ Metric:
80
+ name: RecGTCMetric
81
+ main_indicator: acc
82
+ is_filter: True
83
+
84
+ Train:
85
+ dataset:
86
+ name: RatioDataSet
87
+ ds_width: True
88
+ # max_ratio: &max_ratio 4
89
+ # min_ratio: 1
90
+ # base_shape: &base_shape [[64, 64], [96, 48], [112, 40], [128, 32]]
91
+ # base_h: &base_h 32
92
+ # padding: &padding False
93
+ padding: false
94
+ # padding_rand: true
95
+ # padding_doub: true
96
+ data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
97
+ '../Union14M-L-LMDB-Filtered/filter_train_hard',
98
+ '../Union14M-L-LMDB-Filtered/filter_train_medium',
99
+ '../Union14M-L-LMDB-Filtered/filter_train_normal',
100
+ '../Union14M-L-LMDB-Filtered/filter_train_easy',
101
+ ]
102
+ transforms:
103
+ - DecodeImage: # load image
104
+ img_mode: BGR
105
+ channel_first: False
106
+ - PARSeqAug:
107
+ - GTCLabelEncode: # Class handling label
108
+ gtc_label_encode:
109
+ name: ARLabelEncode
110
+ character_dict_path: *character_dict_path
111
+ use_space_char: *use_space_char
112
+ max_text_length: *max_text_length
113
+ - KeepKeys:
114
+ keep_keys: ['image', 'label', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
115
+ sampler:
116
+ name: RatioSampler
117
+ scales: [[128, 32]] # w, h
118
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
119
+ first_bs: &bs 256
120
+ fix_bs: false
121
+ divided_factor: [4, 16] # w, h
122
+ is_training: True
123
+ loader:
124
+ shuffle: True
125
+ batch_size_per_card: *bs
126
+ drop_last: True
127
+ max_ratio: &max_ratio 4
128
+ num_workers: 4
129
+
130
+ Eval:
131
+ dataset:
132
+ name: RatioDataSet
133
+ ds_width: True
134
+ padding: False
135
+ data_dir_list: [
136
+ '../evaluation/CUTE80',
137
+ '../evaluation/IC13_857',
138
+ '../evaluation/IC15_1811',
139
+ '../evaluation/IIIT5k',
140
+ '../evaluation/SVT',
141
+ '../evaluation/SVTP',
142
+ ]
143
+ transforms:
144
+ - DecodeImage: # load image
145
+ img_mode: BGR
146
+ channel_first: False
147
+ - GTCLabelEncode: # Class handling label
148
+ gtc_label_encode:
149
+ name: ARLabelEncode
150
+ character_dict_path: *character_dict_path
151
+ use_space_char: *use_space_char
152
+ max_text_length: *max_text_length
153
+ - KeepKeys:
154
+ keep_keys: ['image', 'label', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
155
+ sampler:
156
+ name: RatioSampler
157
+ scales: [[128, 32]] # w, h
158
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
159
+ first_bs: *bs
160
+ fix_bs: false
161
+ divided_factor: [4, 16] # w, h
162
+ is_training: False
163
+ loader:
164
+ shuffle: False
165
+ drop_last: False
166
+ batch_size_per_card: *bs
167
+ max_ratio: *max_ratio
168
+ num_workers: 4