openocr-python 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. openocr/__init__.py +11 -0
  2. openocr/configs/det/dbnet/repvit_db.yml +173 -0
  3. openocr/configs/rec/abinet/resnet45_trans_abinet_lang.yml +94 -0
  4. openocr/configs/rec/abinet/resnet45_trans_abinet_wo_lang.yml +93 -0
  5. openocr/configs/rec/abinet/svtrv2_abinet_lang.yml +130 -0
  6. openocr/configs/rec/abinet/svtrv2_abinet_wo_lang.yml +128 -0
  7. openocr/configs/rec/aster/resnet31_lstm_aster_tps_on.yml +93 -0
  8. openocr/configs/rec/aster/svtrv2_aster.yml +127 -0
  9. openocr/configs/rec/aster/svtrv2_aster_tps_on.yml +102 -0
  10. openocr/configs/rec/autostr/autostr_lstm_aster_tps_on.yml +95 -0
  11. openocr/configs/rec/busnet/svtrv2_busnet.yml +135 -0
  12. openocr/configs/rec/busnet/svtrv2_busnet_pretraining.yml +134 -0
  13. openocr/configs/rec/busnet/vit_busnet.yml +104 -0
  14. openocr/configs/rec/busnet/vit_busnet_pretraining.yml +104 -0
  15. openocr/configs/rec/cam/convnextv2_cam_tps_on.yml +118 -0
  16. openocr/configs/rec/cam/convnextv2_tiny_cam_tps_on.yml +118 -0
  17. openocr/configs/rec/cam/svtrv2_cam_tps_on.yml +123 -0
  18. openocr/configs/rec/cdistnet/resnet45_trans_cdistnet.yml +93 -0
  19. openocr/configs/rec/cdistnet/svtrv2_cdistnet.yml +139 -0
  20. openocr/configs/rec/cppd/svtr_base_cppd.yml +123 -0
  21. openocr/configs/rec/cppd/svtr_base_cppd_ch.yml +126 -0
  22. openocr/configs/rec/cppd/svtr_base_cppd_h8.yml +123 -0
  23. openocr/configs/rec/cppd/svtr_base_cppd_syn.yml +124 -0
  24. openocr/configs/rec/cppd/svtrv2_cppd.yml +150 -0
  25. openocr/configs/rec/dan/resnet45_fpn_dan.yml +98 -0
  26. openocr/configs/rec/dan/svtrv2_dan.yml +130 -0
  27. openocr/configs/rec/focalsvtr/focalsvtr_ctc.yml +137 -0
  28. openocr/configs/rec/gtc/svtrv2_lnconv_nrtr_gtc.yml +168 -0
  29. openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_long_infer.yml +151 -0
  30. openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_smtr_long.yml +150 -0
  31. openocr/configs/rec/gtc/svtrv2_lnconv_smtr_gtc_stream.yml +152 -0
  32. openocr/configs/rec/igtr/svtr_base_ds_igtr.yml +157 -0
  33. openocr/configs/rec/lister/focalsvtr_lister_wo_fem_maxratio12.yml +133 -0
  34. openocr/configs/rec/lister/svtrv2_lister_wo_fem_maxratio12.yml +138 -0
  35. openocr/configs/rec/lpv/svtr_base_lpv.yml +124 -0
  36. openocr/configs/rec/lpv/svtr_base_lpv_wo_glrm.yml +123 -0
  37. openocr/configs/rec/lpv/svtrv2_lpv.yml +147 -0
  38. openocr/configs/rec/lpv/svtrv2_lpv_wo_glrm.yml +146 -0
  39. openocr/configs/rec/maerec/vit_nrtr.yml +116 -0
  40. openocr/configs/rec/matrn/resnet45_trans_matrn.yml +95 -0
  41. openocr/configs/rec/matrn/svtrv2_matrn.yml +130 -0
  42. openocr/configs/rec/mgpstr/svtrv2_mgpstr_only_char.yml +140 -0
  43. openocr/configs/rec/mgpstr/vit_base_mgpstr_only_char.yml +111 -0
  44. openocr/configs/rec/mgpstr/vit_large_mgpstr_only_char.yml +110 -0
  45. openocr/configs/rec/mgpstr/vit_mgpstr.yml +110 -0
  46. openocr/configs/rec/mgpstr/vit_mgpstr_only_char.yml +110 -0
  47. openocr/configs/rec/moran/resnet31_lstm_moran.yml +92 -0
  48. openocr/configs/rec/nrtr/focalsvtr_nrtr_maxraio12.yml +145 -0
  49. openocr/configs/rec/nrtr/nrtr.yml +107 -0
  50. openocr/configs/rec/nrtr/svtr_base_nrtr.yml +118 -0
  51. openocr/configs/rec/nrtr/svtr_base_nrtr_syn.yml +119 -0
  52. openocr/configs/rec/nrtr/svtrv2_nrtr.yml +146 -0
  53. openocr/configs/rec/ote/svtr_base_h8_ote.yml +117 -0
  54. openocr/configs/rec/ote/svtr_base_ote.yml +116 -0
  55. openocr/configs/rec/parseq/focalsvtr_parseq_maxratio12.yml +140 -0
  56. openocr/configs/rec/parseq/svrtv2_parseq.yml +136 -0
  57. openocr/configs/rec/parseq/vit_parseq.yml +100 -0
  58. openocr/configs/rec/robustscanner/resnet31_robustscanner.yml +102 -0
  59. openocr/configs/rec/robustscanner/svtrv2_robustscanner.yml +134 -0
  60. openocr/configs/rec/sar/resnet31_lstm_sar.yml +94 -0
  61. openocr/configs/rec/sar/svtrv2_sar.yml +128 -0
  62. openocr/configs/rec/seed/resnet31_lstm_seed_tps_on.yml +96 -0
  63. openocr/configs/rec/smtr/focalsvtr_smtr.yml +150 -0
  64. openocr/configs/rec/smtr/focalsvtr_smtr_long.yml +133 -0
  65. openocr/configs/rec/smtr/svtrv2_smtr.yml +150 -0
  66. openocr/configs/rec/smtr/svtrv2_smtr_bi.yml +136 -0
  67. openocr/configs/rec/srn/resnet50_fpn_srn.yml +97 -0
  68. openocr/configs/rec/srn/svtrv2_srn.yml +131 -0
  69. openocr/configs/rec/svtrs/convnextv2_ctc.yml +105 -0
  70. openocr/configs/rec/svtrs/convnextv2_h8_ctc.yml +105 -0
  71. openocr/configs/rec/svtrs/convnextv2_h8_rctc.yml +106 -0
  72. openocr/configs/rec/svtrs/convnextv2_rctc.yml +106 -0
  73. openocr/configs/rec/svtrs/convnextv2_tiny_h8_ctc.yml +105 -0
  74. openocr/configs/rec/svtrs/convnextv2_tiny_h8_rctc.yml +106 -0
  75. openocr/configs/rec/svtrs/crnn_ctc.yml +99 -0
  76. openocr/configs/rec/svtrs/crnn_ctc_long.yml +116 -0
  77. openocr/configs/rec/svtrs/focalnet_base_ctc.yml +108 -0
  78. openocr/configs/rec/svtrs/focalnet_base_rctc.yml +109 -0
  79. openocr/configs/rec/svtrs/focalsvtr_ctc.yml +106 -0
  80. openocr/configs/rec/svtrs/focalsvtr_rctc.yml +107 -0
  81. openocr/configs/rec/svtrs/resnet45_trans_ctc.yml +103 -0
  82. openocr/configs/rec/svtrs/resnet45_trans_rctc.yml +104 -0
  83. openocr/configs/rec/svtrs/svtr_base_ctc.yml +110 -0
  84. openocr/configs/rec/svtrs/svtr_base_rctc.yml +111 -0
  85. openocr/configs/rec/svtrs/svtrnet_ctc_syn.yml +111 -0
  86. openocr/configs/rec/svtrs/vit_ctc.yml +103 -0
  87. openocr/configs/rec/svtrs/vit_rctc.yml +103 -0
  88. openocr/configs/rec/svtrv2/repsvtr_ch.yml +121 -0
  89. openocr/configs/rec/svtrv2/svtrv2_ch.yml +133 -0
  90. openocr/configs/rec/svtrv2/svtrv2_ctc.yml +136 -0
  91. openocr/configs/rec/svtrv2/svtrv2_rctc.yml +135 -0
  92. openocr/configs/rec/svtrv2/svtrv2_small_rctc.yml +135 -0
  93. openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc.yml +162 -0
  94. openocr/configs/rec/svtrv2/svtrv2_smtr_gtc_rctc_ch.yml +153 -0
  95. openocr/configs/rec/svtrv2/svtrv2_tiny_rctc.yml +135 -0
  96. openocr/configs/rec/visionlan/resnet45_trans_visionlan_LA.yml +103 -0
  97. openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_1.yml +102 -0
  98. openocr/configs/rec/visionlan/resnet45_trans_visionlan_LF_2.yml +103 -0
  99. openocr/configs/rec/visionlan/svtrv2_visionlan_LA.yml +112 -0
  100. openocr/configs/rec/visionlan/svtrv2_visionlan_LF_1.yml +111 -0
  101. openocr/configs/rec/visionlan/svtrv2_visionlan_LF_2.yml +112 -0
  102. openocr/demo_gradio.py +128 -0
  103. openocr/opendet/modeling/__init__.py +11 -0
  104. openocr/opendet/modeling/backbones/__init__.py +14 -0
  105. openocr/opendet/modeling/backbones/repvit.py +340 -0
  106. openocr/opendet/modeling/base_detector.py +69 -0
  107. openocr/opendet/modeling/heads/__init__.py +14 -0
  108. openocr/opendet/modeling/heads/db_head.py +73 -0
  109. openocr/opendet/modeling/necks/__init__.py +14 -0
  110. openocr/opendet/modeling/necks/db_fpn.py +609 -0
  111. openocr/opendet/postprocess/__init__.py +18 -0
  112. openocr/opendet/postprocess/db_postprocess.py +273 -0
  113. openocr/opendet/preprocess/__init__.py +154 -0
  114. openocr/opendet/preprocess/crop_resize.py +121 -0
  115. openocr/opendet/preprocess/db_resize_for_test.py +135 -0
  116. openocr/openrec/losses/__init__.py +62 -0
  117. openocr/openrec/losses/abinet_loss.py +42 -0
  118. openocr/openrec/losses/ar_loss.py +23 -0
  119. openocr/openrec/losses/cam_loss.py +48 -0
  120. openocr/openrec/losses/cdistnet_loss.py +34 -0
  121. openocr/openrec/losses/ce_loss.py +68 -0
  122. openocr/openrec/losses/cppd_loss.py +77 -0
  123. openocr/openrec/losses/ctc_loss.py +33 -0
  124. openocr/openrec/losses/igtr_loss.py +12 -0
  125. openocr/openrec/losses/lister_loss.py +14 -0
  126. openocr/openrec/losses/lpv_loss.py +30 -0
  127. openocr/openrec/losses/mgp_loss.py +34 -0
  128. openocr/openrec/losses/parseq_loss.py +12 -0
  129. openocr/openrec/losses/robustscanner_loss.py +20 -0
  130. openocr/openrec/losses/seed_loss.py +46 -0
  131. openocr/openrec/losses/smtr_loss.py +12 -0
  132. openocr/openrec/losses/srn_loss.py +40 -0
  133. openocr/openrec/losses/visionlan_loss.py +58 -0
  134. openocr/openrec/metrics/__init__.py +19 -0
  135. openocr/openrec/metrics/rec_metric.py +270 -0
  136. openocr/openrec/metrics/rec_metric_gtc.py +58 -0
  137. openocr/openrec/metrics/rec_metric_long.py +142 -0
  138. openocr/openrec/metrics/rec_metric_mgp.py +93 -0
  139. openocr/openrec/modeling/__init__.py +11 -0
  140. openocr/openrec/modeling/base_recognizer.py +69 -0
  141. openocr/openrec/modeling/common.py +238 -0
  142. openocr/openrec/modeling/decoders/__init__.py +109 -0
  143. openocr/openrec/modeling/decoders/abinet_decoder.py +283 -0
  144. openocr/openrec/modeling/decoders/aster_decoder.py +170 -0
  145. openocr/openrec/modeling/decoders/bus_decoder.py +133 -0
  146. openocr/openrec/modeling/decoders/cam_decoder.py +43 -0
  147. openocr/openrec/modeling/decoders/cdistnet_decoder.py +334 -0
  148. openocr/openrec/modeling/decoders/cppd_decoder.py +393 -0
  149. openocr/openrec/modeling/decoders/ctc_decoder.py +203 -0
  150. openocr/openrec/modeling/decoders/dan_decoder.py +203 -0
  151. openocr/openrec/modeling/decoders/igtr_decoder.py +815 -0
  152. openocr/openrec/modeling/decoders/lister_decoder.py +535 -0
  153. openocr/openrec/modeling/decoders/lpv_decoder.py +119 -0
  154. openocr/openrec/modeling/decoders/matrn_decoder.py +236 -0
  155. openocr/openrec/modeling/decoders/mgp_decoder.py +99 -0
  156. openocr/openrec/modeling/decoders/nrtr_decoder.py +439 -0
  157. openocr/openrec/modeling/decoders/ote_decoder.py +205 -0
  158. openocr/openrec/modeling/decoders/parseq_decoder.py +504 -0
  159. openocr/openrec/modeling/decoders/rctc_decoder.py +70 -0
  160. openocr/openrec/modeling/decoders/robustscanner_decoder.py +749 -0
  161. openocr/openrec/modeling/decoders/sar_decoder.py +236 -0
  162. openocr/openrec/modeling/decoders/smtr_decoder.py +621 -0
  163. openocr/openrec/modeling/decoders/smtr_decoder_nattn.py +521 -0
  164. openocr/openrec/modeling/decoders/srn_decoder.py +283 -0
  165. openocr/openrec/modeling/decoders/visionlan_decoder.py +321 -0
  166. openocr/openrec/modeling/encoders/__init__.py +39 -0
  167. openocr/openrec/modeling/encoders/autostr_encoder.py +327 -0
  168. openocr/openrec/modeling/encoders/cam_encoder.py +760 -0
  169. openocr/openrec/modeling/encoders/convnextv2.py +213 -0
  170. openocr/openrec/modeling/encoders/focalsvtr.py +631 -0
  171. openocr/openrec/modeling/encoders/nrtr_encoder.py +28 -0
  172. openocr/openrec/modeling/encoders/rec_hgnet.py +346 -0
  173. openocr/openrec/modeling/encoders/rec_lcnetv3.py +488 -0
  174. openocr/openrec/modeling/encoders/rec_mobilenet_v3.py +132 -0
  175. openocr/openrec/modeling/encoders/rec_mv1_enhance.py +254 -0
  176. openocr/openrec/modeling/encoders/rec_nrtr_mtb.py +37 -0
  177. openocr/openrec/modeling/encoders/rec_resnet_31.py +213 -0
  178. openocr/openrec/modeling/encoders/rec_resnet_45.py +183 -0
  179. openocr/openrec/modeling/encoders/rec_resnet_fpn.py +216 -0
  180. openocr/openrec/modeling/encoders/rec_resnet_vd.py +252 -0
  181. openocr/openrec/modeling/encoders/repvit.py +338 -0
  182. openocr/openrec/modeling/encoders/resnet31_rnn.py +123 -0
  183. openocr/openrec/modeling/encoders/svtrnet.py +574 -0
  184. openocr/openrec/modeling/encoders/svtrnet2dpos.py +616 -0
  185. openocr/openrec/modeling/encoders/svtrv2.py +470 -0
  186. openocr/openrec/modeling/encoders/svtrv2_lnconv.py +503 -0
  187. openocr/openrec/modeling/encoders/svtrv2_lnconv_two33.py +517 -0
  188. openocr/openrec/modeling/encoders/vit.py +120 -0
  189. openocr/openrec/modeling/transforms/__init__.py +15 -0
  190. openocr/openrec/modeling/transforms/aster_tps.py +262 -0
  191. openocr/openrec/modeling/transforms/moran.py +136 -0
  192. openocr/openrec/modeling/transforms/tps.py +246 -0
  193. openocr/openrec/optimizer/__init__.py +73 -0
  194. openocr/openrec/optimizer/lr.py +227 -0
  195. openocr/openrec/postprocess/__init__.py +72 -0
  196. openocr/openrec/postprocess/abinet_postprocess.py +37 -0
  197. openocr/openrec/postprocess/ar_postprocess.py +63 -0
  198. openocr/openrec/postprocess/ce_postprocess.py +43 -0
  199. openocr/openrec/postprocess/char_postprocess.py +108 -0
  200. openocr/openrec/postprocess/cppd_postprocess.py +42 -0
  201. openocr/openrec/postprocess/ctc_postprocess.py +119 -0
  202. openocr/openrec/postprocess/igtr_postprocess.py +100 -0
  203. openocr/openrec/postprocess/lister_postprocess.py +59 -0
  204. openocr/openrec/postprocess/mgp_postprocess.py +143 -0
  205. openocr/openrec/postprocess/nrtr_postprocess.py +75 -0
  206. openocr/openrec/postprocess/smtr_postprocess.py +73 -0
  207. openocr/openrec/postprocess/srn_postprocess.py +80 -0
  208. openocr/openrec/postprocess/visionlan_postprocess.py +81 -0
  209. openocr/openrec/preprocess/__init__.py +173 -0
  210. openocr/openrec/preprocess/abinet_aug.py +473 -0
  211. openocr/openrec/preprocess/abinet_label_encode.py +36 -0
  212. openocr/openrec/preprocess/ar_label_encode.py +36 -0
  213. openocr/openrec/preprocess/auto_augment.py +1012 -0
  214. openocr/openrec/preprocess/cam_label_encode.py +141 -0
  215. openocr/openrec/preprocess/ce_label_encode.py +116 -0
  216. openocr/openrec/preprocess/char_label_encode.py +36 -0
  217. openocr/openrec/preprocess/cppd_label_encode.py +173 -0
  218. openocr/openrec/preprocess/ctc_label_encode.py +124 -0
  219. openocr/openrec/preprocess/ep_label_encode.py +38 -0
  220. openocr/openrec/preprocess/igtr_label_encode.py +360 -0
  221. openocr/openrec/preprocess/mgp_label_encode.py +95 -0
  222. openocr/openrec/preprocess/parseq_aug.py +150 -0
  223. openocr/openrec/preprocess/rec_aug.py +211 -0
  224. openocr/openrec/preprocess/resize.py +534 -0
  225. openocr/openrec/preprocess/smtr_label_encode.py +125 -0
  226. openocr/openrec/preprocess/srn_label_encode.py +37 -0
  227. openocr/openrec/preprocess/visionlan_label_encode.py +67 -0
  228. openocr/tools/create_lmdb_dataset.py +118 -0
  229. openocr/tools/data/__init__.py +94 -0
  230. openocr/tools/data/collate_fn.py +100 -0
  231. openocr/tools/data/lmdb_dataset.py +142 -0
  232. openocr/tools/data/lmdb_dataset_test.py +166 -0
  233. openocr/tools/data/multi_scale_sampler.py +177 -0
  234. openocr/tools/data/ratio_dataset.py +217 -0
  235. openocr/tools/data/ratio_dataset_test.py +273 -0
  236. openocr/tools/data/ratio_dataset_tvresize.py +213 -0
  237. openocr/tools/data/ratio_dataset_tvresize_test.py +276 -0
  238. openocr/tools/data/ratio_sampler.py +190 -0
  239. openocr/tools/data/simple_dataset.py +263 -0
  240. openocr/tools/data/strlmdb_dataset.py +143 -0
  241. openocr/tools/engine/__init__.py +5 -0
  242. openocr/tools/engine/config.py +158 -0
  243. openocr/tools/engine/trainer.py +621 -0
  244. openocr/tools/eval_rec.py +41 -0
  245. openocr/tools/eval_rec_all_ch.py +184 -0
  246. openocr/tools/eval_rec_all_en.py +206 -0
  247. openocr/tools/eval_rec_all_long.py +119 -0
  248. openocr/tools/eval_rec_all_long_simple.py +122 -0
  249. openocr/tools/export_rec.py +118 -0
  250. openocr/tools/infer/onnx_engine.py +65 -0
  251. openocr/tools/infer/predict_rec.py +140 -0
  252. openocr/tools/infer/utility.py +234 -0
  253. openocr/tools/infer_det.py +449 -0
  254. openocr/tools/infer_e2e.py +462 -0
  255. openocr/tools/infer_e2e_parallel.py +184 -0
  256. openocr/tools/infer_rec.py +371 -0
  257. openocr/tools/train_rec.py +37 -0
  258. openocr/tools/utility.py +45 -0
  259. openocr/tools/utils/EN_symbol_dict.txt +94 -0
  260. openocr/tools/utils/__init__.py +0 -0
  261. openocr/tools/utils/ckpt.py +87 -0
  262. openocr/tools/utils/dict/ar_dict.txt +117 -0
  263. openocr/tools/utils/dict/arabic_dict.txt +161 -0
  264. openocr/tools/utils/dict/be_dict.txt +145 -0
  265. openocr/tools/utils/dict/bg_dict.txt +140 -0
  266. openocr/tools/utils/dict/chinese_cht_dict.txt +8421 -0
  267. openocr/tools/utils/dict/cyrillic_dict.txt +163 -0
  268. openocr/tools/utils/dict/devanagari_dict.txt +167 -0
  269. openocr/tools/utils/dict/en_dict.txt +63 -0
  270. openocr/tools/utils/dict/fa_dict.txt +136 -0
  271. openocr/tools/utils/dict/french_dict.txt +136 -0
  272. openocr/tools/utils/dict/german_dict.txt +143 -0
  273. openocr/tools/utils/dict/hi_dict.txt +162 -0
  274. openocr/tools/utils/dict/it_dict.txt +118 -0
  275. openocr/tools/utils/dict/japan_dict.txt +4399 -0
  276. openocr/tools/utils/dict/ka_dict.txt +153 -0
  277. openocr/tools/utils/dict/kie_dict/xfund_class_list.txt +4 -0
  278. openocr/tools/utils/dict/korean_dict.txt +3688 -0
  279. openocr/tools/utils/dict/latex_symbol_dict.txt +111 -0
  280. openocr/tools/utils/dict/latin_dict.txt +185 -0
  281. openocr/tools/utils/dict/layout_dict/layout_cdla_dict.txt +10 -0
  282. openocr/tools/utils/dict/layout_dict/layout_publaynet_dict.txt +5 -0
  283. openocr/tools/utils/dict/layout_dict/layout_table_dict.txt +1 -0
  284. openocr/tools/utils/dict/mr_dict.txt +153 -0
  285. openocr/tools/utils/dict/ne_dict.txt +153 -0
  286. openocr/tools/utils/dict/oc_dict.txt +96 -0
  287. openocr/tools/utils/dict/pu_dict.txt +130 -0
  288. openocr/tools/utils/dict/rs_dict.txt +91 -0
  289. openocr/tools/utils/dict/rsc_dict.txt +134 -0
  290. openocr/tools/utils/dict/ru_dict.txt +125 -0
  291. openocr/tools/utils/dict/spin_dict.txt +68 -0
  292. openocr/tools/utils/dict/ta_dict.txt +128 -0
  293. openocr/tools/utils/dict/table_dict.txt +277 -0
  294. openocr/tools/utils/dict/table_master_structure_dict.txt +39 -0
  295. openocr/tools/utils/dict/table_structure_dict.txt +28 -0
  296. openocr/tools/utils/dict/table_structure_dict_ch.txt +48 -0
  297. openocr/tools/utils/dict/te_dict.txt +151 -0
  298. openocr/tools/utils/dict/ug_dict.txt +114 -0
  299. openocr/tools/utils/dict/uk_dict.txt +142 -0
  300. openocr/tools/utils/dict/ur_dict.txt +137 -0
  301. openocr/tools/utils/dict/xi_dict.txt +110 -0
  302. openocr/tools/utils/dict90.txt +90 -0
  303. openocr/tools/utils/e2e_metric/Deteval.py +802 -0
  304. openocr/tools/utils/e2e_metric/polygon_fast.py +70 -0
  305. openocr/tools/utils/e2e_utils/extract_batchsize.py +86 -0
  306. openocr/tools/utils/e2e_utils/extract_textpoint_fast.py +479 -0
  307. openocr/tools/utils/e2e_utils/extract_textpoint_slow.py +582 -0
  308. openocr/tools/utils/e2e_utils/pgnet_pp_utils.py +159 -0
  309. openocr/tools/utils/e2e_utils/visual.py +152 -0
  310. openocr/tools/utils/en_dict.txt +95 -0
  311. openocr/tools/utils/gen_label.py +68 -0
  312. openocr/tools/utils/ic15_dict.txt +36 -0
  313. openocr/tools/utils/logging.py +56 -0
  314. openocr/tools/utils/poly_nms.py +132 -0
  315. openocr/tools/utils/ppocr_keys_v1.txt +6623 -0
  316. openocr/tools/utils/stats.py +58 -0
  317. openocr/tools/utils/utility.py +165 -0
  318. openocr/tools/utils/visual.py +117 -0
  319. openocr_python-0.0.2.dist-info/LICENCE +201 -0
  320. openocr_python-0.0.2.dist-info/METADATA +98 -0
  321. openocr_python-0.0.2.dist-info/RECORD +323 -0
  322. openocr_python-0.0.2.dist-info/WHEEL +5 -0
  323. openocr_python-0.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,135 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 20
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/u14m_filter/svtrv2_small_rctc/
7
+ save_epoch_step: 1
8
+ # evaluation is run every 2000 iterations
9
+ eval_epoch_step: [0, 1]
10
+ eval_batch_step: [0, 500]
11
+ cal_metric_during_train: True
12
+ pretrained_model:
13
+ checkpoints:
14
+ use_tensorboard: false
15
+ infer_img:
16
+ # for data or label process
17
+ character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
18
+ max_text_length: &max_text_length 25
19
+ use_space_char: &use_space_char False
20
+ save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_rctc.txt
21
+ use_amp: True
22
+
23
+ Optimizer:
24
+ name: AdamW
25
+ lr: 0.00065 # for 4gpus bs256/gpu
26
+ weight_decay: 0.05
27
+ filter_bias_and_bn: True
28
+
29
+ LRScheduler:
30
+ name: OneCycleLR
31
+ warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
32
+ cycle_momentum: False
33
+
34
+ Architecture:
35
+ model_type: rec
36
+ algorithm: SVTRv2
37
+ Transform:
38
+ Encoder:
39
+ name: SVTRv2LNConvTwo33
40
+ use_pos_embed: False
41
+ dims: [96, 192, 384]
42
+ depths: [3, 6, 3]
43
+ num_heads: [3, 6, 12]
44
+ mixer: [['Conv','Conv','Conv'],['Conv','Conv','Conv','FGlobal','Global','Global'],['Global','Global','Global']]
45
+ local_k: [[5, 5], [5, 5], [-1, -1]]
46
+ sub_k: [[1, 1], [2, 1], [-1, -1]]
47
+ last_stage: false
48
+ feat2d: True
49
+ Decoder:
50
+ name: RCTCDecoder
51
+
52
+ Loss:
53
+ name: CTCLoss
54
+ zero_infinity: True
55
+
56
+ PostProcess:
57
+ name: CTCLabelDecode
58
+
59
+ Metric:
60
+ name: RecMetric
61
+ main_indicator: acc
62
+ is_filter: True
63
+
64
+ Train:
65
+ dataset:
66
+ name: RatioDataSetTVResize
67
+ ds_width: True
68
+ padding: False
69
+ data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
70
+ '../Union14M-L-LMDB-Filtered/filter_train_hard',
71
+ '../Union14M-L-LMDB-Filtered/filter_train_medium',
72
+ '../Union14M-L-LMDB-Filtered/filter_train_normal',
73
+ '../Union14M-L-LMDB-Filtered/filter_train_easy',
74
+ ]
75
+ transforms:
76
+ - DecodeImagePIL: # load image
77
+ img_mode: RGB
78
+ - PARSeqAugPIL:
79
+ - CTCLabelEncode: # Class handling label
80
+ character_dict_path: *character_dict_path
81
+ use_space_char: *use_space_char
82
+ max_text_length: *max_text_length
83
+ - KeepKeys:
84
+ keep_keys: ['image', 'label', 'length']
85
+ sampler:
86
+ name: RatioSampler
87
+ scales: [[128, 32]] # w, h
88
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
89
+ first_bs: &bs 256
90
+ fix_bs: false
91
+ divided_factor: [4, 16] # w, h
92
+ is_training: True
93
+ loader:
94
+ shuffle: True
95
+ batch_size_per_card: *bs
96
+ drop_last: True
97
+ max_ratio: 4
98
+ num_workers: 4
99
+
100
+ Eval:
101
+ dataset:
102
+ name: RatioDataSetTVResize
103
+ ds_width: True
104
+ padding: False
105
+ data_dir_list: [
106
+ '../evaluation/CUTE80',
107
+ '../evaluation/IC13_857',
108
+ '../evaluation/IC15_1811',
109
+ '../evaluation/IIIT5k',
110
+ '../evaluation/SVT',
111
+ '../evaluation/SVTP',
112
+ ]
113
+ transforms:
114
+ - DecodeImagePIL: # load image
115
+ img_mode: RGB
116
+ - CTCLabelEncode: # Class handling label
117
+ character_dict_path: *character_dict_path
118
+ use_space_char: *use_space_char
119
+ max_text_length: *max_text_length
120
+ - KeepKeys:
121
+ keep_keys: ['image', 'label', 'length']
122
+ sampler:
123
+ name: RatioSampler
124
+ scales: [[128, 32]] # w, h
125
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
126
+ first_bs: 256
127
+ fix_bs: false
128
+ divided_factor: [4, 16] # w, h
129
+ is_training: False
130
+ loader:
131
+ shuffle: False
132
+ drop_last: False
133
+ batch_size_per_card: *bs
134
+ max_ratio: 4
135
+ num_workers: 4
@@ -0,0 +1,162 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 20
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/u14m_filter/svtrv2_smtr_gtc_rctc_maxratio12
7
+ save_epoch_step: 1
8
+ # evaluation is run every 2000 iterations
9
+ eval_batch_step: [0, 500]
10
+ eval_epoch_step: [0, 1]
11
+ cal_metric_during_train: True
12
+ pretrained_model:
13
+ # ./output/rec/u14m_filter/svtrv2_rctc/best.pth
14
+ checkpoints:
15
+ use_tensorboard: false
16
+ infer_img:
17
+ # for data or label process
18
+ character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
19
+ # ./tools/utils/ppocr_keys_v1.txt # ch
20
+ max_text_length: &max_text_length 25
21
+ use_space_char: &use_space_char False
22
+ save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_smtr_gtc_rctc.txt
23
+ use_amp: True
24
+
25
+ Optimizer:
26
+ name: AdamW
27
+ lr: 0.000325 # for 4gpus bs128/gpu
28
+ weight_decay: 0.05
29
+ filter_bias_and_bn: True
30
+
31
+ LRScheduler:
32
+ name: OneCycleLR
33
+ warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
34
+ cycle_momentum: False
35
+
36
+ Architecture:
37
+ model_type: rec
38
+ algorithm: SVTRv2
39
+ in_channels: 3
40
+ Transform:
41
+ Encoder:
42
+ name: SVTRv2LNConvTwo33
43
+ use_pos_embed: False
44
+ dims: [128, 256, 384]
45
+ depths: [6, 6, 6]
46
+ num_heads: [4, 8, 12]
47
+ mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
48
+ local_k: [[5, 5], [5, 5], [-1, -1]]
49
+ sub_k: [[1, 1], [2, 1], [-1, -1]]
50
+ last_stage: false
51
+ feat2d: True
52
+ Decoder:
53
+ name: GTCDecoder
54
+ infer_gtc: True
55
+ detach: False
56
+ gtc_decoder:
57
+ name: SMTRDecoder
58
+ num_layer: 1
59
+ ds: True
60
+ max_len: *max_text_length
61
+ next_mode: &next True
62
+ sub_str_len: &subsl 5
63
+ ctc_decoder:
64
+ name: RCTCDecoder
65
+
66
+ Loss:
67
+ name: GTCLoss
68
+ ctc_weight: 0.1
69
+ gtc_loss:
70
+ name: SMTRLoss
71
+
72
+ PostProcess:
73
+ name: GTCLabelDecode
74
+ gtc_label_decode:
75
+ name: SMTRLabelDecode
76
+ next_mode: *next
77
+ character_dict_path: *character_dict_path
78
+ use_space_char: *use_space_char
79
+
80
+ Metric:
81
+ name: RecGTCMetric
82
+ main_indicator: acc
83
+ is_filter: True
84
+
85
+ Train:
86
+ dataset:
87
+ name: RatioDataSetTVResize
88
+ ds_width: True
89
+ padding: false
90
+ data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
91
+ '../Union14M-L-LMDB-Filtered/filter_train_hard',
92
+ '../Union14M-L-LMDB-Filtered/filter_train_medium',
93
+ '../Union14M-L-LMDB-Filtered/filter_train_normal',
94
+ '../Union14M-L-LMDB-Filtered/filter_train_easy',
95
+ ]
96
+ transforms:
97
+ - DecodeImagePIL: # load image
98
+ img_mode: RGB
99
+ - PARSeqAugPIL:
100
+ - GTCLabelEncode: # Class handling label
101
+ gtc_label_encode:
102
+ name: SMTRLabelEncode
103
+ sub_str_len: *subsl
104
+ character_dict_path: *character_dict_path
105
+ use_space_char: *use_space_char
106
+ max_text_length: *max_text_length
107
+ - KeepKeys:
108
+ keep_keys: ['image', 'label', 'label_subs', 'label_next', 'length_subs',
109
+ 'label_subs_pre', 'label_next_pre', 'length_subs_pre', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
110
+ sampler:
111
+ name: RatioSampler
112
+ scales: [[128, 32]] # w, h
113
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
114
+ first_bs: &bs 128
115
+ fix_bs: false
116
+ divided_factor: [4, 16] # w, h
117
+ is_training: True
118
+ loader:
119
+ shuffle: True
120
+ batch_size_per_card: *bs
121
+ drop_last: True
122
+ max_ratio: &max_ratio 12
123
+ num_workers: 4
124
+
125
+ Eval:
126
+ dataset:
127
+ name: RatioDataSetTVResize
128
+ ds_width: True
129
+ padding: False
130
+ data_dir_list: [
131
+ '../evaluation/CUTE80',
132
+ '../evaluation/IC13_857',
133
+ '../evaluation/IC15_1811',
134
+ '../evaluation/IIIT5k',
135
+ '../evaluation/SVT',
136
+ '../evaluation/SVTP',
137
+ ]
138
+ transforms:
139
+ - DecodeImagePIL: # load image
140
+ img_mode: RGB
141
+ - GTCLabelEncode: # Class handling label
142
+ gtc_label_encode:
143
+ name: ARLabelEncode
144
+ character_dict_path: *character_dict_path
145
+ use_space_char: *use_space_char
146
+ max_text_length: *max_text_length
147
+ - KeepKeys:
148
+ keep_keys: ['image', 'label', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
149
+ sampler:
150
+ name: RatioSampler
151
+ scales: [[128, 32]] # w, h
152
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
153
+ first_bs: *bs
154
+ fix_bs: false
155
+ divided_factor: [4, 16] # w, h
156
+ is_training: False
157
+ loader:
158
+ shuffle: False
159
+ drop_last: False
160
+ batch_size_per_card: *bs
161
+ max_ratio: *max_ratio
162
+ num_workers: 4
@@ -0,0 +1,153 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 100
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/ch/svtrv2_smtr_gtc_rctc_ch
7
+ save_epoch_step: 1
8
+ # evaluation is run every 2000 iterations
9
+ eval_batch_step: [0, 2000]
10
+ eval_epoch_step: [0, 1]
11
+ cal_metric_during_train: False
12
+ pretrained_model:
13
+ # ./output/rec/u14m_filter/svtrv2_rctc/best.pth
14
+ checkpoints:
15
+ use_tensorboard: false
16
+ infer_img:
17
+ # for data or label process
18
+ character_dict_path: &character_dict_path ./tools/utils/ppocr_keys_v1.txt # ch
19
+ max_text_length: &max_text_length 25
20
+ use_space_char: &use_space_char False
21
+ save_res_path: ./output/rec/ch/predicts_svtrv2_smtr_gtc_rctc_ch.txt
22
+ use_amp: True
23
+
24
+ Optimizer:
25
+ name: AdamW
26
+ lr: 0.00065 # for 4gpus bs256/gpu
27
+ weight_decay: 0.05
28
+ filter_bias_and_bn: True
29
+
30
+ LRScheduler:
31
+ name: OneCycleLR
32
+ warmup_epoch: 5
33
+ cycle_momentum: False
34
+
35
+ Architecture:
36
+ model_type: rec
37
+ algorithm: SVTRv2
38
+ in_channels: 3
39
+ Transform:
40
+ Encoder:
41
+ name: SVTRv2LNConvTwo33
42
+ use_pos_embed: False
43
+ dims: [128, 256, 384]
44
+ depths: [6, 6, 6]
45
+ num_heads: [4, 8, 12]
46
+ mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
47
+ local_k: [[5, 5], [5, 5], [-1, -1]]
48
+ sub_k: [[1, 1], [2, 1], [-1, -1]]
49
+ last_stage: false
50
+ feat2d: True
51
+ Decoder:
52
+ name: GTCDecoder
53
+ infer_gtc: True
54
+ detach: False
55
+ gtc_decoder:
56
+ name: SMTRDecoder
57
+ num_layer: 1
58
+ ds: True
59
+ max_len: *max_text_length
60
+ next_mode: &next True
61
+ sub_str_len: &subsl 5
62
+ ctc_decoder:
63
+ name: RCTCDecoder
64
+
65
+ Loss:
66
+ name: GTCLoss
67
+ ctc_weight: 0.1
68
+ gtc_loss:
69
+ name: SMTRLoss
70
+
71
+ PostProcess:
72
+ name: GTCLabelDecode
73
+ gtc_label_decode:
74
+ name: SMTRLabelDecode
75
+ next_mode: *next
76
+ character_dict_path: *character_dict_path
77
+ use_space_char: *use_space_char
78
+
79
+ Metric:
80
+ name: RecGTCMetric
81
+ main_indicator: acc
82
+ # is_filter: True
83
+
84
+ Train:
85
+ dataset:
86
+ name: RatioDataSetTVResize
87
+ ds_width: True
88
+ padding: false
89
+ data_dir_list: ['../benchmark_bctr/benchmark_bctr_train/document_train',
90
+ '../benchmark_bctr/benchmark_bctr_train/handwriting_train',
91
+ '../benchmark_bctr/benchmark_bctr_train/scene_train',
92
+ '../benchmark_bctr/benchmark_bctr_train/web_train'
93
+ ]
94
+ transforms:
95
+ - DecodeImagePIL: # load image
96
+ img_mode: RGB
97
+ - PARSeqAugPIL:
98
+ - GTCLabelEncode: # Class handling label
99
+ gtc_label_encode:
100
+ name: SMTRLabelEncode
101
+ sub_str_len: *subsl
102
+ character_dict_path: *character_dict_path
103
+ use_space_char: *use_space_char
104
+ max_text_length: *max_text_length
105
+ - KeepKeys:
106
+ keep_keys: ['image', 'label', 'label_subs', 'label_next', 'length_subs',
107
+ 'label_subs_pre', 'label_next_pre', 'length_subs_pre', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
108
+ sampler:
109
+ name: RatioSampler
110
+ scales: [[128, 32]] # w, h
111
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
112
+ first_bs: &bs 256
113
+ fix_bs: false
114
+ divided_factor: [4, 16] # w, h
115
+ is_training: True
116
+ loader:
117
+ shuffle: True
118
+ batch_size_per_card: *bs
119
+ drop_last: True
120
+ max_ratio: &max_ratio 8
121
+ num_workers: 4
122
+
123
+ Eval:
124
+ dataset:
125
+ name: RatioDataSetTVResize
126
+ ds_width: True
127
+ padding: False
128
+ data_dir_list: ['../benchmark_bctr/benchmark_bctr_test/scene_test']
129
+ transforms:
130
+ - DecodeImagePIL: # load image
131
+ img_mode: RGB
132
+ - GTCLabelEncode: # Class handling label
133
+ gtc_label_encode:
134
+ name: ARLabelEncode
135
+ character_dict_path: *character_dict_path
136
+ use_space_char: *use_space_char
137
+ max_text_length: *max_text_length
138
+ - KeepKeys:
139
+ keep_keys: ['image', 'label', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
140
+ sampler:
141
+ name: RatioSampler
142
+ scales: [[128, 32]] # w, h
143
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
144
+ first_bs: *bs
145
+ fix_bs: false
146
+ divided_factor: [4, 16] # w, h
147
+ is_training: False
148
+ loader:
149
+ shuffle: False
150
+ drop_last: False
151
+ batch_size_per_card: *bs
152
+ max_ratio: *max_ratio
153
+ num_workers: 4
@@ -0,0 +1,135 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 20
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/u14m_filter/svtrv2_tiny_rctc/
7
+ save_epoch_step: 1
8
+ # evaluation is run every 2000 iterations
9
+ eval_epoch_step: [0, 1]
10
+ eval_batch_step: [0, 500]
11
+ cal_metric_during_train: True
12
+ pretrained_model:
13
+ checkpoints:
14
+ use_tensorboard: false
15
+ infer_img:
16
+ # for data or label process
17
+ character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
18
+ max_text_length: &max_text_length 25
19
+ use_space_char: &use_space_char False
20
+ save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_rctc.txt
21
+ use_amp: True
22
+
23
+ Optimizer:
24
+ name: AdamW
25
+ lr: 0.00065 # for 4gpus bs256/gpu
26
+ weight_decay: 0.05
27
+ filter_bias_and_bn: True
28
+
29
+ LRScheduler:
30
+ name: OneCycleLR
31
+ warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
32
+ cycle_momentum: False
33
+
34
+ Architecture:
35
+ model_type: rec
36
+ algorithm: SVTRv2
37
+ Transform:
38
+ Encoder:
39
+ name: SVTRv2LNConvTwo33
40
+ use_pos_embed: False
41
+ dims: [64, 128, 256]
42
+ depths: [3, 6, 3]
43
+ num_heads: [2, 4, 8]
44
+ mixer: [['Conv','Conv','Conv'],['Conv','Conv','Conv','FGlobal','Global','Global'],['Global','Global','Global']]
45
+ local_k: [[5, 5], [5, 5], [-1, -1]]
46
+ sub_k: [[1, 1], [2, 1], [-1, -1]]
47
+ last_stage: false
48
+ feat2d: True
49
+ Decoder:
50
+ name: RCTCDecoder
51
+
52
+ Loss:
53
+ name: CTCLoss
54
+ zero_infinity: True
55
+
56
+ PostProcess:
57
+ name: CTCLabelDecode
58
+
59
+ Metric:
60
+ name: RecMetric
61
+ main_indicator: acc
62
+ is_filter: True
63
+
64
+ Train:
65
+ dataset:
66
+ name: RatioDataSetTVResize
67
+ ds_width: True
68
+ padding: False
69
+ data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
70
+ '../Union14M-L-LMDB-Filtered/filter_train_hard',
71
+ '../Union14M-L-LMDB-Filtered/filter_train_medium',
72
+ '../Union14M-L-LMDB-Filtered/filter_train_normal',
73
+ '../Union14M-L-LMDB-Filtered/filter_train_easy',
74
+ ]
75
+ transforms:
76
+ - DecodeImagePIL: # load image
77
+ img_mode: RGB
78
+ - PARSeqAugPIL:
79
+ - CTCLabelEncode: # Class handling label
80
+ character_dict_path: *character_dict_path
81
+ use_space_char: *use_space_char
82
+ max_text_length: *max_text_length
83
+ - KeepKeys:
84
+ keep_keys: ['image', 'label', 'length']
85
+ sampler:
86
+ name: RatioSampler
87
+ scales: [[128, 32]] # w, h
88
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
89
+ first_bs: &bs 256
90
+ fix_bs: false
91
+ divided_factor: [4, 16] # w, h
92
+ is_training: True
93
+ loader:
94
+ shuffle: True
95
+ batch_size_per_card: *bs
96
+ drop_last: True
97
+ max_ratio: 4
98
+ num_workers: 4
99
+
100
+ Eval:
101
+ dataset:
102
+ name: RatioDataSetTVResize
103
+ ds_width: True
104
+ padding: False
105
+ data_dir_list: [
106
+ '../evaluation/CUTE80',
107
+ '../evaluation/IC13_857',
108
+ '../evaluation/IC15_1811',
109
+ '../evaluation/IIIT5k',
110
+ '../evaluation/SVT',
111
+ '../evaluation/SVTP',
112
+ ]
113
+ transforms:
114
+ - DecodeImagePIL: # load image
115
+ img_mode: RGB
116
+ - CTCLabelEncode: # Class handling label
117
+ character_dict_path: *character_dict_path
118
+ use_space_char: *use_space_char
119
+ max_text_length: *max_text_length
120
+ - KeepKeys:
121
+ keep_keys: ['image', 'label', 'length']
122
+ sampler:
123
+ name: RatioSampler
124
+ scales: [[128, 32]] # w, h
125
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
126
+ first_bs: 256
127
+ fix_bs: false
128
+ divided_factor: [4, 16] # w, h
129
+ is_training: False
130
+ loader:
131
+ shuffle: False
132
+ drop_last: False
133
+ batch_size_per_card: *bs
134
+ max_ratio: 4
135
+ num_workers: 4
@@ -0,0 +1,103 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 20
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/u14m_filter/resnet45_trans_visionlan_LA/
7
+ eval_epoch_step: [0, 1]
8
+ eval_batch_step: [0, 500]
9
+ cal_metric_during_train: True
10
+ pretrained_model:
11
+ # ./output/rec/u14m_filter/resnet45_trans_visionlan_LF2/best.pth
12
+ checkpoints:
13
+ use_tensorboard: false
14
+ infer_img:
15
+ # for data or label process
16
+ character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
17
+ # ./tools/utils/ppocr_keys_v1.txt # ch
18
+ max_text_length: &max_text_length 25
19
+ use_space_char: &use_space_char False
20
+ save_res_path: ./output/rec/u14m_filter/predicts_resnet45_trans_visionlan_LA.txt
21
+ grad_clip_val: 20
22
+ use_amp: True
23
+
24
+ Optimizer:
25
+ name: Adam
26
+ lr: 0.0002 # for 4gpus bs128/gpu
27
+ weight_decay: 0.0
28
+
29
+ LRScheduler:
30
+ name: MultiStepLR
31
+ milestones: [12]
32
+
33
+ Architecture:
34
+ model_type: rec
35
+ algorithm: VisionLAN
36
+ Transform:
37
+ Encoder:
38
+ name: ResNet45
39
+ in_channels: 3
40
+ strides: [2, 2, 2, 1, 1]
41
+ Decoder:
42
+ name: VisionLANDecoder
43
+ training_step: &training_step 'LA'
44
+ n_position: 256
45
+
46
+ Loss:
47
+ name: VisionLANLoss
48
+ training_step: *training_step
49
+
50
+ PostProcess:
51
+ name: VisionLANLabelDecode
52
+ character_dict_path: *character_dict_path
53
+ use_space_char: *use_space_char
54
+
55
+ Metric:
56
+ name: RecMetric
57
+ main_indicator: acc
58
+ is_filter: True
59
+
60
+ Train:
61
+ dataset:
62
+ name: LMDBDataSet
63
+ data_dir: ../Union14M-L-LMDB-Filtered
64
+ transforms:
65
+ - DecodeImagePIL: # load image
66
+ img_mode: RGB
67
+ - PARSeqAugPIL:
68
+ - VisionLANLabelEncode:
69
+ character_dict_path: *character_dict_path
70
+ use_space_char: *use_space_char
71
+ max_text_length: *max_text_length
72
+ - RecTVResize:
73
+ image_shape: [64, 256]
74
+ padding: False
75
+ - KeepKeys:
76
+ keep_keys: ['image', 'label', 'label_res', 'label_sub', 'label_id', 'length'] # dataloader will return list in this order
77
+ loader:
78
+ shuffle: True
79
+ batch_size_per_card: 128
80
+ drop_last: True
81
+ num_workers: 4
82
+
83
+ Eval:
84
+ dataset:
85
+ name: LMDBDataSet
86
+ data_dir: ../evaluation
87
+ transforms:
88
+ - DecodeImagePIL: # load image
89
+ img_mode: RGB
90
+ - VisionLANLabelEncode:
91
+ character_dict_path: *character_dict_path
92
+ use_space_char: *use_space_char
93
+ max_text_length: *max_text_length
94
+ - RecTVResize:
95
+ image_shape: [64, 256]
96
+ padding: False
97
+ - KeepKeys:
98
+ keep_keys: ['image', 'label', 'label_res', 'label_sub', 'label_id', 'length'] # dataloader will return list in this order
99
+ loader:
100
+ shuffle: False
101
+ drop_last: False
102
+ batch_size_per_card: 128
103
+ num_workers: 2