pyxllib 0.3.96__py3-none-any.whl → 0.3.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. pyxllib/__init__.py +21 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +541 -529
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -311
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -461
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -240
  11. pyxllib/algo/stat.py +494 -458
  12. pyxllib/algo/treelib.py +149 -149
  13. pyxllib/algo/unitlib.py +66 -66
  14. {pyxlpr → pyxllib/autogui}/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -0
  16. pyxllib/autogui/all.py +9 -0
  17. pyxllib/{ext/autogui → autogui}/autogui.py +852 -823
  18. pyxllib/autogui/uiautolib.py +362 -0
  19. pyxllib/{ext/autogui → autogui}/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -0
  21. pyxllib/autogui/wechat_msg.py +421 -0
  22. pyxllib/autogui/wxautolib.py +84 -0
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -0
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +240 -129
  34. pyxllib/data/jsonlib.py +89 -0
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1127 -643
  37. pyxllib/data/sqlite.py +568 -341
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -492
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +246 -246
  42. pyxllib/ext/drissionlib.py +277 -0
  43. pyxllib/ext/kq5034lib.py +12 -1606
  44. pyxllib/ext/old.py +663 -663
  45. pyxllib/ext/qt.py +449 -449
  46. pyxllib/ext/robustprocfile.py +497 -0
  47. pyxllib/ext/seleniumlib.py +76 -76
  48. pyxllib/ext/tk.py +173 -173
  49. pyxllib/ext/unixlib.py +827 -826
  50. pyxllib/ext/utools.py +351 -338
  51. pyxllib/ext/webhook.py +124 -101
  52. pyxllib/ext/win32lib.py +40 -40
  53. pyxllib/ext/wjxlib.py +88 -0
  54. pyxllib/ext/wpsapi.py +124 -0
  55. pyxllib/ext/xlwork.py +9 -0
  56. pyxllib/ext/yuquelib.py +1105 -173
  57. pyxllib/file/__init__.py +17 -17
  58. pyxllib/file/docxlib.py +761 -761
  59. pyxllib/file/gitlib.py +309 -309
  60. pyxllib/file/libreoffice.py +165 -0
  61. pyxllib/file/movielib.py +148 -139
  62. pyxllib/file/newbie.py +10 -10
  63. pyxllib/file/onenotelib.py +1469 -1469
  64. pyxllib/file/packlib/__init__.py +330 -293
  65. pyxllib/file/packlib/zipfile.py +2441 -2441
  66. pyxllib/file/pdflib.py +426 -426
  67. pyxllib/file/pupil.py +185 -185
  68. pyxllib/file/specialist/__init__.py +685 -685
  69. pyxllib/file/specialist/dirlib.py +799 -799
  70. pyxllib/file/specialist/download.py +193 -186
  71. pyxllib/file/specialist/filelib.py +2829 -2618
  72. pyxllib/file/xlsxlib.py +3131 -2976
  73. pyxllib/file/xlsyncfile.py +341 -0
  74. pyxllib/prog/__init__.py +5 -5
  75. pyxllib/prog/cachetools.py +64 -0
  76. pyxllib/prog/deprecatedlib.py +233 -233
  77. pyxllib/prog/filelock.py +42 -0
  78. pyxllib/prog/ipyexec.py +253 -253
  79. pyxllib/prog/multiprogs.py +940 -0
  80. pyxllib/prog/newbie.py +451 -444
  81. pyxllib/prog/pupil.py +1197 -1128
  82. pyxllib/prog/sitepackages.py +33 -33
  83. pyxllib/prog/specialist/__init__.py +391 -217
  84. pyxllib/prog/specialist/bc.py +203 -200
  85. pyxllib/prog/specialist/browser.py +497 -488
  86. pyxllib/prog/specialist/common.py +347 -347
  87. pyxllib/prog/specialist/datetime.py +199 -131
  88. pyxllib/prog/specialist/tictoc.py +240 -241
  89. pyxllib/prog/specialist/xllog.py +180 -180
  90. pyxllib/prog/xlosenv.py +108 -101
  91. pyxllib/stdlib/__init__.py +17 -17
  92. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  93. pyxllib/stdlib/tablepyxl/style.py +303 -303
  94. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  95. pyxllib/text/__init__.py +8 -8
  96. pyxllib/text/ahocorasick.py +39 -39
  97. pyxllib/text/airscript.js +744 -0
  98. pyxllib/text/charclasslib.py +121 -109
  99. pyxllib/text/jiebalib.py +267 -264
  100. pyxllib/text/jinjalib.py +32 -0
  101. pyxllib/text/jsa_ai_prompt.md +271 -0
  102. pyxllib/text/jscode.py +922 -767
  103. pyxllib/text/latex/__init__.py +158 -158
  104. pyxllib/text/levenshtein.py +303 -303
  105. pyxllib/text/nestenv.py +1215 -1215
  106. pyxllib/text/newbie.py +300 -288
  107. pyxllib/text/pupil/__init__.py +8 -8
  108. pyxllib/text/pupil/common.py +1121 -1095
  109. pyxllib/text/pupil/xlalign.py +326 -326
  110. pyxllib/text/pycode.py +47 -47
  111. pyxllib/text/specialist/__init__.py +8 -8
  112. pyxllib/text/specialist/common.py +112 -112
  113. pyxllib/text/specialist/ptag.py +186 -186
  114. pyxllib/text/spellchecker.py +172 -172
  115. pyxllib/text/templates/echart_base.html +11 -0
  116. pyxllib/text/templates/highlight_code.html +17 -0
  117. pyxllib/text/templates/latex_editor.html +103 -0
  118. pyxllib/text/vbacode.py +17 -17
  119. pyxllib/text/xmllib.py +747 -685
  120. pyxllib/xl.py +42 -38
  121. pyxllib/xlcv.py +17 -17
  122. pyxllib-0.3.200.dist-info/METADATA +48 -0
  123. pyxllib-0.3.200.dist-info/RECORD +126 -0
  124. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +1 -2
  125. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info/licenses}/LICENSE +190 -190
  126. pyxllib/ext/autogui/__init__.py +0 -8
  127. pyxllib-0.3.96.dist-info/METADATA +0 -51
  128. pyxllib-0.3.96.dist-info/RECORD +0 -333
  129. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  130. pyxlpr/ai/__init__.py +0 -5
  131. pyxlpr/ai/clientlib.py +0 -1281
  132. pyxlpr/ai/specialist.py +0 -286
  133. pyxlpr/ai/torch_app.py +0 -172
  134. pyxlpr/ai/xlpaddle.py +0 -655
  135. pyxlpr/ai/xltorch.py +0 -705
  136. pyxlpr/data/__init__.py +0 -11
  137. pyxlpr/data/coco.py +0 -1325
  138. pyxlpr/data/datacls.py +0 -365
  139. pyxlpr/data/datasets.py +0 -200
  140. pyxlpr/data/gptlib.py +0 -1291
  141. pyxlpr/data/icdar/__init__.py +0 -96
  142. pyxlpr/data/icdar/deteval.py +0 -377
  143. pyxlpr/data/icdar/icdar2013.py +0 -341
  144. pyxlpr/data/icdar/iou.py +0 -340
  145. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  146. pyxlpr/data/imtextline.py +0 -473
  147. pyxlpr/data/labelme.py +0 -866
  148. pyxlpr/data/removeline.py +0 -179
  149. pyxlpr/data/specialist.py +0 -57
  150. pyxlpr/eval/__init__.py +0 -85
  151. pyxlpr/paddleocr.py +0 -776
  152. pyxlpr/ppocr/__init__.py +0 -15
  153. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  154. pyxlpr/ppocr/data/__init__.py +0 -135
  155. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  156. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  157. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  158. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  159. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  160. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  161. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  162. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  163. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  164. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  165. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  166. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  167. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  168. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  169. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  170. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  171. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  172. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  173. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  174. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  175. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  176. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  177. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  178. pyxlpr/ppocr/losses/__init__.py +0 -61
  179. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  180. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  181. pyxlpr/ppocr/losses/center_loss.py +0 -88
  182. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  183. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  184. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  185. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  186. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  187. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  188. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  189. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  190. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  191. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  192. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  193. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  194. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  195. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  196. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  197. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  198. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  199. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  200. pyxlpr/ppocr/metrics/__init__.py +0 -44
  201. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  202. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  203. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  204. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  205. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  206. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  207. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  208. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  209. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  210. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  211. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  212. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  213. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  214. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  215. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  216. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  217. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  218. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  219. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  220. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  221. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  222. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  223. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  224. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  225. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  226. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  227. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  228. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  229. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  230. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  231. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  232. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  233. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  234. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  235. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  236. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  237. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  238. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  239. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  240. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  241. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  242. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  243. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  244. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  245. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  246. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  247. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  248. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  249. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  250. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  251. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  252. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  253. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  254. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  255. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  256. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  257. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  258. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  259. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  260. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  261. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  262. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  263. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  264. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  265. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  266. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  267. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  268. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  269. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  270. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  271. pyxlpr/ppocr/tools/__init__.py +0 -14
  272. pyxlpr/ppocr/tools/eval.py +0 -83
  273. pyxlpr/ppocr/tools/export_center.py +0 -77
  274. pyxlpr/ppocr/tools/export_model.py +0 -129
  275. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  276. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  277. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  278. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  279. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  280. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  281. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  282. pyxlpr/ppocr/tools/infer_det.py +0 -134
  283. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  284. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  285. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  286. pyxlpr/ppocr/tools/infer_table.py +0 -107
  287. pyxlpr/ppocr/tools/program.py +0 -596
  288. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  289. pyxlpr/ppocr/tools/train.py +0 -163
  290. pyxlpr/ppocr/tools/xlprog.py +0 -748
  291. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  292. pyxlpr/ppocr/utils/__init__.py +0 -24
  293. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  294. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  295. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  296. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  297. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  298. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  299. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  300. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  301. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  302. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  303. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  304. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  305. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  306. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  307. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  308. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  309. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  310. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  311. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  312. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  313. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  314. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  315. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  316. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  317. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  318. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  319. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  320. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  321. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  322. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  323. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  324. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  325. pyxlpr/ppocr/utils/dict90.txt +0 -90
  326. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  327. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  328. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  329. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  330. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  331. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  332. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  333. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  334. pyxlpr/ppocr/utils/gen_label.py +0 -81
  335. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  336. pyxlpr/ppocr/utils/iou.py +0 -54
  337. pyxlpr/ppocr/utils/logging.py +0 -69
  338. pyxlpr/ppocr/utils/network.py +0 -84
  339. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  340. pyxlpr/ppocr/utils/profiler.py +0 -110
  341. pyxlpr/ppocr/utils/save_load.py +0 -150
  342. pyxlpr/ppocr/utils/stats.py +0 -72
  343. pyxlpr/ppocr/utils/utility.py +0 -80
  344. pyxlpr/ppstructure/__init__.py +0 -13
  345. pyxlpr/ppstructure/predict_system.py +0 -187
  346. pyxlpr/ppstructure/table/__init__.py +0 -13
  347. pyxlpr/ppstructure/table/eval_table.py +0 -72
  348. pyxlpr/ppstructure/table/matcher.py +0 -192
  349. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  350. pyxlpr/ppstructure/table/predict_table.py +0 -221
  351. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  352. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  353. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  354. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  355. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  356. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  357. pyxlpr/ppstructure/utility.py +0 -71
  358. pyxlpr/xlai.py +0 -10
pyxlpr/paddleocr.py DELETED
@@ -1,776 +0,0 @@
1
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from pyxllib.prog.pupil import check_install_package
16
-
17
- # 没有paddle的时候,默认安装
18
- check_install_package('paddle', 'paddlepaddle')
19
- # 可能会遇到这个问题
20
- # https://blog.csdn.net/qq_47997583/article/details/122430776
21
- # pip install opencv-python-headless==4.1.2.30
22
- # 其他依赖库
23
- check_install_package('pyclipper')
24
- check_install_package('imgaug')
25
- check_install_package('lmdb')
26
-
27
- import os
28
- import sys
29
-
30
- __dir__ = os.path.dirname(__file__)
31
-
32
- sys.path.append(os.path.join(__dir__, ''))
33
-
34
- import cv2
35
- import logging
36
- import numpy as np
37
- from pathlib import Path
38
- import json
39
-
40
- from pyxlpr.ppocr.tools.infer import predict_system
41
- from pyxlpr.ppocr.utils.logging import get_logger
42
-
43
- logger = get_logger()
44
- from pyxlpr.ppocr.utils.utility import check_and_read_gif, get_image_file_list
45
- from pyxlpr.ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
46
- from pyxlpr.ppocr.tools.infer.utility import draw_ocr, str2bool, check_gpu
47
- from pyxlpr.ppstructure.utility import init_args, draw_structure_result
48
- from pyxlpr.ppstructure.predict_system import OCRSystem, save_structure_res
49
-
50
- from tqdm import tqdm
51
- from pyxllib.xl import run_once, XlPath, Timer
52
- from pyxllib.xlcv import xlcv, xlpil
53
- from pyxllib.algo.geo import rect_bounds
54
-
55
- __all__ = [
56
- 'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result',
57
- 'save_structure_res', 'download_with_progressbar'
58
- ]
59
-
60
- SUPPORT_DET_MODEL = ['DB']
61
- VERSION = '2.4'
62
- SUPPORT_REC_MODEL = ['CRNN']
63
- BASE_DIR = os.path.expanduser("~/.paddleocr/")
64
-
65
- DEFAULT_OCR_MODEL_VERSION = 'PP-OCR'
66
- DEFAULT_STRUCTURE_MODEL_VERSION = 'STRUCTURE'
67
- MODEL_URLS = {
68
- 'OCR': {
69
- 'PP-OCRv2': {
70
- 'det': {
71
- 'ch': {
72
- 'url':
73
- 'https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar',
74
- },
75
- },
76
- 'rec': {
77
- 'ch': {
78
- 'url':
79
- 'https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar',
80
- 'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
81
- }
82
- }
83
- },
84
- DEFAULT_OCR_MODEL_VERSION: {
85
- 'det': {
86
- 'ch': {
87
- 'url':
88
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
89
- },
90
- 'en': {
91
- 'url':
92
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar',
93
- },
94
- 'structure': {
95
- 'url':
96
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
97
- }
98
- },
99
- 'rec': {
100
- 'ch': {
101
- 'url':
102
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
103
- 'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
104
- },
105
- 'en': {
106
- 'url':
107
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
108
- 'dict_path': './ppocr/utils/en_dict.txt'
109
- },
110
- 'french': {
111
- 'url':
112
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
113
- 'dict_path': './ppocr/utils/dict/french_dict.txt'
114
- },
115
- 'german': {
116
- 'url':
117
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
118
- 'dict_path': './ppocr/utils/dict/german_dict.txt'
119
- },
120
- 'korean': {
121
- 'url':
122
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
123
- 'dict_path': './ppocr/utils/dict/korean_dict.txt'
124
- },
125
- 'japan': {
126
- 'url':
127
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
128
- 'dict_path': './ppocr/utils/dict/japan_dict.txt'
129
- },
130
- 'chinese_cht': {
131
- 'url':
132
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar',
133
- 'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
134
- },
135
- 'ta': {
136
- 'url':
137
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar',
138
- 'dict_path': './ppocr/utils/dict/ta_dict.txt'
139
- },
140
- 'te': {
141
- 'url':
142
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar',
143
- 'dict_path': './ppocr/utils/dict/te_dict.txt'
144
- },
145
- 'ka': {
146
- 'url':
147
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar',
148
- 'dict_path': './ppocr/utils/dict/ka_dict.txt'
149
- },
150
- 'latin': {
151
- 'url':
152
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar',
153
- 'dict_path': './ppocr/utils/dict/latin_dict.txt'
154
- },
155
- 'arabic': {
156
- 'url':
157
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar',
158
- 'dict_path': './ppocr/utils/dict/arabic_dict.txt'
159
- },
160
- 'cyrillic': {
161
- 'url':
162
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar',
163
- 'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
164
- },
165
- 'devanagari': {
166
- 'url':
167
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
168
- 'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
169
- },
170
- 'structure': {
171
- 'url':
172
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
173
- 'dict_path': 'ppocr/utils/dict/table_dict.txt'
174
- }
175
- },
176
- 'cls': {
177
- 'ch': {
178
- 'url':
179
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
180
- }
181
- },
182
- }
183
- },
184
- 'STRUCTURE': {
185
- DEFAULT_STRUCTURE_MODEL_VERSION: {
186
- 'table': {
187
- 'en': {
188
- 'url':
189
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
190
- 'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
191
- }
192
- }
193
- }
194
- }
195
- }
196
-
197
-
198
- def parse_args(mMain=True):
199
- import argparse
200
- parser = init_args()
201
- parser.add_help = mMain
202
- parser.add_argument("--lang", type=str, default='ch')
203
- parser.add_argument("--det", type=str2bool, default=True)
204
- parser.add_argument("--rec", type=str2bool, default=True)
205
- parser.add_argument("--type", type=str, default='ocr')
206
- parser.add_argument(
207
- "--ocr_version",
208
- type=str,
209
- default='PP-OCRv2',
210
- help='OCR Model version, the current model support list is as follows: '
211
- '1. PP-OCRv2 Support Chinese detection and recognition model. '
212
- '2. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.'
213
- )
214
- parser.add_argument(
215
- "--structure_version",
216
- type=str,
217
- default='STRUCTURE',
218
- help='Model version, the current model support list is as follows:'
219
- ' 1. STRUCTURE Support en table structure model.')
220
-
221
- for action in parser._actions:
222
- if action.dest in ['rec_char_dict_path', 'table_char_dict_path']:
223
- action.default = None
224
- if mMain:
225
- return parser.parse_args()
226
- else:
227
- inference_args_dict = {}
228
- for action in parser._actions:
229
- inference_args_dict[action.dest] = action.default
230
- return argparse.Namespace(**inference_args_dict)
231
-
232
-
233
- def parse_lang(lang):
234
- latin_lang = [
235
- 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
236
- 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
237
- 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
238
- 'sw', 'tl', 'tr', 'uz', 'vi'
239
- ]
240
- arabic_lang = ['ar', 'fa', 'ug', 'ur']
241
- cyrillic_lang = [
242
- 'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
243
- 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
244
- ]
245
- devanagari_lang = [
246
- 'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
247
- 'sa', 'bgc'
248
- ]
249
- if lang in latin_lang:
250
- lang = "latin"
251
- elif lang in arabic_lang:
252
- lang = "arabic"
253
- elif lang in cyrillic_lang:
254
- lang = "cyrillic"
255
- elif lang in devanagari_lang:
256
- lang = "devanagari"
257
- assert lang in MODEL_URLS['OCR'][DEFAULT_OCR_MODEL_VERSION][
258
- 'rec'], 'param lang must in {}, but got {}'.format(
259
- MODEL_URLS['OCR'][DEFAULT_OCR_MODEL_VERSION]['rec'].keys(), lang)
260
- if lang == "ch":
261
- det_lang = "ch"
262
- elif lang == 'structure':
263
- det_lang = 'structure'
264
- else:
265
- det_lang = "en"
266
- return lang, det_lang
267
-
268
-
269
- def get_model_config(type, version, model_type, lang):
270
- if type == 'OCR':
271
- DEFAULT_MODEL_VERSION = DEFAULT_OCR_MODEL_VERSION
272
- elif type == 'STRUCTURE':
273
- DEFAULT_MODEL_VERSION = DEFAULT_STRUCTURE_MODEL_VERSION
274
- else:
275
- raise NotImplementedError
276
- model_urls = MODEL_URLS[type]
277
- if version not in model_urls:
278
- logger.warning('version {} not in {}, auto switch to version {}'.format(
279
- version, model_urls.keys(), DEFAULT_MODEL_VERSION))
280
- version = DEFAULT_MODEL_VERSION
281
- if model_type not in model_urls[version]:
282
- if model_type in model_urls[DEFAULT_MODEL_VERSION]:
283
- logger.warning(
284
- 'version {} not support {} models, auto switch to version {}'.
285
- format(version, model_type, DEFAULT_MODEL_VERSION))
286
- version = DEFAULT_MODEL_VERSION
287
- else:
288
- logger.error('{} models is not support, we only support {}'.format(
289
- model_type, model_urls[DEFAULT_MODEL_VERSION].keys()))
290
- sys.exit(-1)
291
- if lang not in model_urls[version][model_type]:
292
- if lang in model_urls[DEFAULT_MODEL_VERSION][model_type]:
293
- logger.warning(
294
- 'lang {} is not support in {}, auto switch to version {}'.
295
- format(lang, version, DEFAULT_MODEL_VERSION))
296
- version = DEFAULT_MODEL_VERSION
297
- else:
298
- logger.error(
299
- 'lang {} is not support, we only support {} for {} models'.
300
- format(lang, model_urls[DEFAULT_MODEL_VERSION][model_type].keys(
301
- ), model_type))
302
- sys.exit(-1)
303
- return model_urls[version][model_type][lang]
304
-
305
-
306
- class PaddleOCR(predict_system.TextSystem):
307
- def __init__(self, **kwargs):
308
- """
309
- paddleocr package
310
- args:
311
- **kwargs: other params show in paddleocr --help
312
- """
313
- params = parse_args(mMain=False)
314
- params.__dict__.update(**kwargs)
315
- params.use_gpu = check_gpu(params.use_gpu)
316
-
317
- if not params.show_log:
318
- logger.setLevel(logging.INFO)
319
- self.use_angle_cls = params.use_angle_cls
320
- lang, det_lang = parse_lang(params.lang)
321
-
322
- # init model dir
323
- det_model_config = get_model_config('OCR', params.ocr_version, 'det',
324
- det_lang)
325
- params.det_model_dir, det_url = confirm_model_dir_url(
326
- params.det_model_dir,
327
- os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
328
- det_model_config['url'])
329
- rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
330
- lang)
331
- params.rec_model_dir, rec_url = confirm_model_dir_url(
332
- params.rec_model_dir,
333
- os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
334
- rec_model_config['url'])
335
- cls_model_config = get_model_config('OCR', params.ocr_version, 'cls',
336
- 'ch')
337
- params.cls_model_dir, cls_url = confirm_model_dir_url(
338
- params.cls_model_dir,
339
- os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'),
340
- cls_model_config['url'])
341
- # download model
342
- maybe_download(params.det_model_dir, det_url)
343
- maybe_download(params.rec_model_dir, rec_url)
344
- maybe_download(params.cls_model_dir, cls_url)
345
-
346
- if params.det_algorithm not in SUPPORT_DET_MODEL:
347
- logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
348
- sys.exit(0)
349
- if params.rec_algorithm not in SUPPORT_REC_MODEL:
350
- logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
351
- sys.exit(0)
352
-
353
- if params.rec_char_dict_path is None:
354
- params.rec_char_dict_path = str(
355
- Path(__file__).parent / rec_model_config['dict_path'])
356
-
357
- print(params)
358
- # init det_model and rec_model
359
- super().__init__(params)
360
-
361
- @classmethod
362
- @run_once('ignore,str')
363
- def build_ppocr(cls, use_angle_cls=True, lang="ch", show_log=False, **kwargs):
364
- """ 这个识别模型大概要占用850M显存
365
-
366
- 指定的 det_model_dir、rec_model_dir 不存在时,会自动下载最新模型放到指定目录里
367
- """
368
- # 1 一些参数智能分析判断
369
-
370
- # 如果设置了识别模型路径,并且模型目录里、或目录旁有char_dict.txt,则以此作为字典文件
371
- if 'rec_model_dir' in kwargs and 'rec_char_dict_path' not in kwargs:
372
- p1 = XlPath(kwargs['rec_model_dir']) / 'char_dict.txt'
373
- p2 = XlPath(kwargs['rec_model_dir']).parent / 'char_dict.txt'
374
- if p1.is_file():
375
- kwargs['rec_char_dict_path'] = p1
376
- elif p2.is_file(): # 如果同级目录有char_dict.txt也行
377
- kwargs['rec_char_dict_path'] = p2
378
-
379
- # 路径类变量自动转str类型,注意None的要跳过
380
- for k, v in kwargs.items():
381
- if k.endswith('_dir') or k.endswith('_path'):
382
- if v is not None:
383
- kwargs[k] = str(kwargs[k])
384
-
385
- # 2 构建一个ppocr对象
386
- ppocr = PaddleOCR(use_angle_cls=use_angle_cls, lang=lang, show_log=show_log, **kwargs)
387
-
388
- # 3 识别一张空图,预初始化,使得后面的计时更准确
389
- ppocr.ocr(np.zeros([320, 320, 3], dtype='uint8'))
390
-
391
- return ppocr
392
-
393
- def __1_ocr(self):
394
- """ 识别功能 """
395
- pass
396
-
397
- def ocr(self, img, det=True, rec=True, cls=True):
398
- """
399
- ocr with paddleocr
400
- args:
401
- img: img for ocr, support ndarray, img_path and list or ndarray
402
- det: use text detection or not, if false, only rec will be exec. default is True
403
- rec: use text recognition or not, if false, only det will be exec. default is True
404
-
405
- 使用示例:
406
- lines = ppocr.ocr(str(imfile))
407
- for line in lines:
408
- pts, [text, score] = line
409
- """
410
- img = xlcv.read(img, 0)
411
-
412
- assert isinstance(img, (np.ndarray, list, str))
413
- if isinstance(img, list) and det == True:
414
- logger.error('When input a list of images, det must be false')
415
- exit(0)
416
- if cls == True and self.use_angle_cls == False:
417
- logger.warning(
418
- 'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
419
- )
420
-
421
- if isinstance(img, str):
422
- # download net image
423
- if img.startswith('http'):
424
- download_with_progressbar(img, 'tmp.jpg')
425
- img = 'tmp.jpg'
426
- image_file = img
427
- img, flag = check_and_read_gif(image_file)
428
- if not flag:
429
- with open(image_file, 'rb') as f:
430
- np_arr = np.frombuffer(f.read(), dtype=np.uint8)
431
- img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
432
- if img is None:
433
- logger.error("error in loading image:{}".format(image_file))
434
- return None
435
- if isinstance(img, np.ndarray) and len(img.shape) == 2:
436
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
437
- if det and rec:
438
- dt_boxes, rec_res = self.__call__(img, cls)
439
- return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
440
- elif det and not rec:
441
- dt_boxes, elapse = self.text_detector(img)
442
- if dt_boxes is None:
443
- return None
444
- return [box.tolist() for box in dt_boxes]
445
- else:
446
- if not isinstance(img, list):
447
- img = [img]
448
- if self.use_angle_cls and cls:
449
- img, cls_res, elapse = self.text_classifier(img)
450
- if not rec:
451
- return cls_res
452
- rec_res, elapse = self.text_recognizer(img)
453
- return rec_res
454
-
455
- def ocr2texts(self, img, sort_textline=False):
456
- """ 识别后,只返回文本清单
457
-
458
- :param sort_textline: 是否按文本行的几何关系重新排序
459
- """
460
- from pyxlpr.data.imtextline import TextlineShape
461
- lines = self.ocr(img)
462
- if sort_textline:
463
- lines.sort(key=lambda x: TextlineShape(x[0]))
464
- return [x[1][0] for x in lines]
465
-
466
- def rec_singleline(self, im, cls=False):
467
- """ 只识别一行文本 """
468
- lines = self.ocr(im, det=False, cls=cls)
469
- text = ' '.join([line[0] for line in lines])
470
- score = round(float(sum([line[1] for line in lines])) / len(lines), 4)
471
- return text, score
472
-
473
- def __2_view(self):
474
- """ 可视化、生成标注文件(也可以用于自动标注) """
475
- pass
476
-
477
- def ocr2img(self, img, det=True, cls=True):
478
- """ 识别并返回结果
479
- 返回np.ndarray类型
480
-
481
- TODO 带det、rec参数的不同可视化效果还没写
482
- """
483
- result = self.ocr(img, cls=cls)
484
- for i in range(len(result)):
485
- result[i][1] = [result[i][1][0], float(result[i][1][1])]
486
-
487
- boxes = [line[0] for line in result]
488
- txts = [line[1][0] for line in result]
489
- scores = [round(float(line[1][1]), 4) for line in result]
490
-
491
- image = xlpil.read(img).convert('RGB')
492
- im_show = draw_ocr(image, boxes, txts, scores)
493
- return im_show
494
-
495
- def ocr2labelme(self, root, det=False, rec=False):
496
- """ 对root目录下的所有图片,自动进行文字检测、识别
497
-
498
- :param root:
499
- 输入如果是目录,会对目录里所有图片进行处理
500
- 如果是图片,则只会对单张图进行处理,返回对应的json格式数据
501
- :param det: det和rec默认都关闭,没有功能效果,需要明确指定,防止意外覆盖已有的检测结果
502
- True,会重置检测结果
503
- False,直接使用已有的检测结果
504
- 2,将检测结果转为矩形
505
- :param rec:
506
- True,检测后获得识别结果
507
- False,不执行识别,一般是只需要检测的场合
508
- """
509
- from pyxlpr.data.labelme import LabelmeDict
510
-
511
- # 1 工具函数
512
- def det_ocr(f):
513
- """ 使用程序完整生成一套标注数据 """
514
- data = LabelmeDict.gen_data(f)
515
- lines = self.ocr(str(f))
516
- for line in lines:
517
- pts, [text, score] = line
518
- pts = [[int(p[0]), int(p[1])] for p in pts] # 转整数
519
- if det == 2:
520
- pts = rect_bounds(pts)
521
- sp = LabelmeDict.gen_shape({'text': text, 'score': round(float(score), 4)}, pts)
522
- data['shapes'].append(sp)
523
- f.with_suffix('.json').write_json(data)
524
-
525
- def det_(f):
526
- """ 只检测不识别,这个一般没太必要,既然检测了,就一起识别了 """
527
- data = LabelmeDict.gen_data(f)
528
- lines = self.ocr(str(f), rec=False)
529
- for pts in lines:
530
- pts = [[int(p[0]), int(p[1])] for p in pts] # 转整数
531
- if det == 2:
532
- pts = rect_bounds(pts)
533
- sp = LabelmeDict.gen_shape('', pts)
534
-
535
- data['shapes'].append(sp)
536
- f.with_suffix('.json').write_json(data)
537
-
538
- def ocr(f):
539
- """ 只识别不检测。常用于手动调整框后,再自动识别一遍文本内容 """
540
- # 没有对应json文件不处理
541
- f2 = f.with_suffix('.json')
542
- if not f2.is_file():
543
- return
544
-
545
- # 读取已有检测数据,只更新识别结果
546
- image = xlcv.read(f)
547
- data = f2.read_json()
548
- for sp in data['shapes']:
549
- pts = LabelmeDict.to_quad_pts(sp)
550
- im = xlcv.get_sub(image, pts)
551
- lines = self.ocr(im, det=False)
552
- text = ' '.join([line[0] for line in lines])
553
- score = sum([line[1] for line in lines]) / len(lines)
554
- sp['label'] = json.dumps({'text': text, 'score': round(float(score), 4)}, ensure_ascii=False)
555
- f2.write_json(data)
556
-
557
- # 2 遍历文件批量处理
558
- root = XlPath(root)
559
- images = list(root.rglob_images('*'))
560
- for f in tqdm(images):
561
- if det and rec:
562
- det_ocr(f)
563
- elif det and not rec:
564
- det_(f)
565
- elif not det and rec:
566
- ocr(f)
567
-
568
- def __3_has_label_dataset(self):
569
- """ 有标注的数据的相关处理功能
570
- 比如可以测算指标分数
571
- """
572
- pass
573
-
574
- def det_metric(self, dataset, *, print_mode=False):
575
- """ 计算检测的分数和速度
576
-
577
- :param dataset:
578
- 输入一个dataset,一般是用build_dataset生成的,可以把各种类型的数据统一为一个标准结构
579
- 也可以自定义输入,只要可遍历,每个元素有polys标注,和image图片的二进制数据就行
580
- 但一般还是建议走ppocr框架,里面有很多内置好的数据解析功能,能省很多重复工作
581
- :param print_mode: 是否输出运行速度信息
582
- """
583
- from pyxlpr.ppocr.metrics.eval_det_iou import DetectionIoUEvaluator
584
-
585
- # 1 对所有数据图片进行推断,并计时
586
- timer = Timer('总共耗时')
587
- gts, preds = [], []
588
- for x in dataset:
589
- # 注意这里图片已经先读入二进制数据了,所以会比实际部署中输入路径的方式快一个读取二进制数据的时间
590
- timer.start()
591
- # 要去掉难样本
592
- # trick: 没有文本的图在处理中,dataset会自动过滤掉,为了避免被过滤,会加一个w=h=1的难样本框
593
- gts.append([poly for poly, tag in zip(x['polys'], x['ignore_tags']) if not tag])
594
- img = xlcv.read_from_buffer(x['image'])
595
- lines = self.ocr(img, rec=False)
596
- preds.append(lines)
597
- timer.stop()
598
-
599
- if print_mode:
600
- timer.report()
601
-
602
- # 2 精度测评
603
- metric = DetectionIoUEvaluator.eval(gts, preds)
604
- metric['total_frame'] = len(timer.data)
605
- metric['fps'] = metric['total_frame'] / sum(timer.data)
606
- return metric
607
-
608
- def rec_metric_labelme(self, root, *, cls=False, bc=False, print_mode=True,
609
- max_file_num=None, attr_filter=None):
610
- """
611
- :param bc: 是否打开bcompare比较所有识别错误的内容
612
- :param max_file_num: 有时候只想简单测下速度,可以只测几张图就够
613
- :param attr_filter: def attr_filter(attr) 返回True才保留
614
- """
615
- from pyxllib.prog.specialist import bcompare
616
- from pyxlpr.ppocr.metrics.rec_metric import RecMetric
617
- from pyxllib.prog.pupil import DictTool
618
-
619
- # 1 读取检测标注、调用self进行检测
620
- timer1, timer2 = Timer('读图速度'), Timer('总共耗时')
621
- # 有json文件才算有标注,空图最好也能对应一份空shapes的json文件才会进行判断
622
- files = list(XlPath(root).rglob_files('*.json'))
623
- if max_file_num:
624
- files = files[:max_file_num]
625
- tags, gts, preds = [], [], []
626
- for f in tqdm(files):
627
- data = f.read_json()
628
-
629
- timer1.start()
630
- img = xlcv.read(f.parent / data['imagePath'])
631
- timer1.stop()
632
-
633
- for i, sp in enumerate(data['shapes']):
634
- attr = DictTool.json_loads(sp['label'], 'text')
635
-
636
- if attr_filter and not attr_filter(attr):
637
- continue
638
-
639
- tags.append(f'{f.stem}_{i:03}') # 这里并不是要真的生成图片,所以有一定重复没有关系
640
- subimg = xlcv.get_sub(img, sp['points'])
641
- # xlcv.write(subimg, f.parent / f'{f.stem}/{i:03}.png')
642
- timer2.start()
643
- text, score = self.rec_singleline(subimg, cls=cls)
644
- timer2.stop()
645
- preds.append(text)
646
- gts.append(attr['text'].strip())
647
-
648
- # 2 精度测评及测速
649
- metrics = RecMetric.eval(preds, gts)
650
- for k, v in metrics.items():
651
- metrics[k] = round(v, 4)
652
- if print_mode:
653
- timer1.report()
654
- timer2.report()
655
- print(metrics)
656
- print('fps={:.2f}'.format(1 / np.mean(timer2.data)))
657
-
658
- # 3 bc可视化比较
659
- if bc:
660
- left = '\n'.join([f'{t}\t{x}' for t, x, y in zip(tags, gts, preds) if x != y])
661
- right = '\n'.join([f'{t}\t{y}' for t, x, y in zip(tags, gts, preds) if x != y])
662
- bcompare(left, right)
663
-
664
- def rec_bc_labelme(self, root):
665
- """ 用bc可视化显示识别模型效果
666
-
667
- :param root: labelme格式数据所在的根目录
668
- """
669
-
670
-
671
- build_paddleocr = PaddleOCR.build_ppocr
672
-
673
-
674
- class PPStructure(OCRSystem):
675
- def __init__(self, **kwargs):
676
- params = parse_args(mMain=False)
677
- params.__dict__.update(**kwargs)
678
- params.use_gpu = check_gpu(params.use_gpu)
679
-
680
- if not params.show_log:
681
- logger.setLevel(logging.INFO)
682
- lang, det_lang = parse_lang(params.lang)
683
-
684
- # init model dir
685
- det_model_config = get_model_config('OCR', params.ocr_version, 'det',
686
- det_lang)
687
- params.det_model_dir, det_url = confirm_model_dir_url(
688
- params.det_model_dir,
689
- os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
690
- det_model_config['url'])
691
- rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
692
- lang)
693
- params.rec_model_dir, rec_url = confirm_model_dir_url(
694
- params.rec_model_dir,
695
- os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
696
- rec_model_config['url'])
697
- table_model_config = get_model_config(
698
- 'STRUCTURE', params.structure_version, 'table', 'en')
699
- params.table_model_dir, table_url = confirm_model_dir_url(
700
- params.table_model_dir,
701
- os.path.join(BASE_DIR, VERSION, 'ocr', 'table'),
702
- table_model_config['url'])
703
- # download model
704
- maybe_download(params.det_model_dir, det_url)
705
- maybe_download(params.rec_model_dir, rec_url)
706
- maybe_download(params.table_model_dir, table_url)
707
-
708
- if params.rec_char_dict_path is None:
709
- params.rec_char_dict_path = str(
710
- Path(__file__).parent / rec_model_config['dict_path'])
711
- if params.table_char_dict_path is None:
712
- params.table_char_dict_path = str(
713
- Path(__file__).parent / table_model_config['dict_path'])
714
-
715
- print(params)
716
- super().__init__(params)
717
-
718
- def __call__(self, img):
719
- if isinstance(img, str):
720
- # download net image
721
- if img.startswith('http'):
722
- download_with_progressbar(img, 'tmp.jpg')
723
- img = 'tmp.jpg'
724
- image_file = img
725
- img, flag = check_and_read_gif(image_file)
726
- if not flag:
727
- with open(image_file, 'rb') as f:
728
- np_arr = np.frombuffer(f.read(), dtype=np.uint8)
729
- img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
730
- if img is None:
731
- logger.error("error in loading image:{}".format(image_file))
732
- return None
733
- if isinstance(img, np.ndarray) and len(img.shape) == 2:
734
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
735
-
736
- res = super().__call__(img)
737
- return res
738
-
739
-
740
- def main():
741
- # for cmd
742
- args = parse_args(mMain=True)
743
- image_dir = args.image_dir
744
- if is_link(image_dir):
745
- download_with_progressbar(image_dir, 'tmp.jpg')
746
- image_file_list = ['tmp.jpg']
747
- else:
748
- image_file_list = get_image_file_list(args.image_dir)
749
- if len(image_file_list) == 0:
750
- logger.error('no images find in {}'.format(args.image_dir))
751
- return
752
- if args.type == 'ocr':
753
- engine = PaddleOCR(**(args.__dict__))
754
- elif args.type == 'structure':
755
- engine = PPStructure(**(args.__dict__))
756
- else:
757
- raise NotImplementedError
758
-
759
- for img_path in image_file_list:
760
- img_name = os.path.basename(img_path).split('.')[0]
761
- logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
762
- if args.type == 'ocr':
763
- result = engine.ocr(img_path,
764
- det=args.det,
765
- rec=args.rec,
766
- cls=args.use_angle_cls)
767
- if result is not None:
768
- for line in result:
769
- logger.info(line)
770
- elif args.type == 'structure':
771
- result = engine(img_path)
772
- save_structure_res(result, args.output, img_name)
773
-
774
- for item in result:
775
- item.pop('img')
776
- logger.info(item)