pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. pyxllib/algo/geo.py +12 -0
  2. pyxllib/algo/intervals.py +1 -1
  3. pyxllib/algo/matcher.py +78 -0
  4. pyxllib/algo/pupil.py +187 -19
  5. pyxllib/algo/specialist.py +2 -1
  6. pyxllib/algo/stat.py +38 -2
  7. {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
  8. pyxllib/autogui/activewin.py +246 -0
  9. pyxllib/autogui/all.py +9 -0
  10. pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
  11. pyxllib/autogui/uiautolib.py +362 -0
  12. pyxllib/autogui/wechat.py +827 -0
  13. pyxllib/autogui/wechat_msg.py +421 -0
  14. pyxllib/autogui/wxautolib.py +84 -0
  15. pyxllib/cv/slidercaptcha.py +137 -0
  16. pyxllib/data/echarts.py +123 -12
  17. pyxllib/data/jsonlib.py +89 -0
  18. pyxllib/data/pglib.py +514 -30
  19. pyxllib/data/sqlite.py +231 -4
  20. pyxllib/ext/JLineViewer.py +14 -1
  21. pyxllib/ext/drissionlib.py +277 -0
  22. pyxllib/ext/kq5034lib.py +0 -1594
  23. pyxllib/ext/robustprocfile.py +497 -0
  24. pyxllib/ext/unixlib.py +6 -5
  25. pyxllib/ext/utools.py +108 -95
  26. pyxllib/ext/webhook.py +32 -14
  27. pyxllib/ext/wjxlib.py +88 -0
  28. pyxllib/ext/wpsapi.py +124 -0
  29. pyxllib/ext/xlwork.py +9 -0
  30. pyxllib/ext/yuquelib.py +1003 -71
  31. pyxllib/file/docxlib.py +1 -1
  32. pyxllib/file/libreoffice.py +165 -0
  33. pyxllib/file/movielib.py +9 -0
  34. pyxllib/file/packlib/__init__.py +112 -75
  35. pyxllib/file/pdflib.py +1 -1
  36. pyxllib/file/pupil.py +1 -1
  37. pyxllib/file/specialist/dirlib.py +1 -1
  38. pyxllib/file/specialist/download.py +10 -3
  39. pyxllib/file/specialist/filelib.py +266 -55
  40. pyxllib/file/xlsxlib.py +205 -50
  41. pyxllib/file/xlsyncfile.py +341 -0
  42. pyxllib/prog/cachetools.py +64 -0
  43. pyxllib/prog/filelock.py +42 -0
  44. pyxllib/prog/multiprogs.py +940 -0
  45. pyxllib/prog/newbie.py +9 -2
  46. pyxllib/prog/pupil.py +129 -60
  47. pyxllib/prog/specialist/__init__.py +176 -2
  48. pyxllib/prog/specialist/bc.py +5 -2
  49. pyxllib/prog/specialist/browser.py +11 -2
  50. pyxllib/prog/specialist/datetime.py +68 -0
  51. pyxllib/prog/specialist/tictoc.py +12 -13
  52. pyxllib/prog/specialist/xllog.py +5 -5
  53. pyxllib/prog/xlosenv.py +7 -0
  54. pyxllib/text/airscript.js +744 -0
  55. pyxllib/text/charclasslib.py +17 -5
  56. pyxllib/text/jiebalib.py +6 -3
  57. pyxllib/text/jinjalib.py +32 -0
  58. pyxllib/text/jsa_ai_prompt.md +271 -0
  59. pyxllib/text/jscode.py +159 -4
  60. pyxllib/text/nestenv.py +1 -1
  61. pyxllib/text/newbie.py +12 -0
  62. pyxllib/text/pupil/common.py +26 -0
  63. pyxllib/text/specialist/ptag.py +2 -2
  64. pyxllib/text/templates/echart_base.html +11 -0
  65. pyxllib/text/templates/highlight_code.html +17 -0
  66. pyxllib/text/templates/latex_editor.html +103 -0
  67. pyxllib/text/xmllib.py +76 -14
  68. pyxllib/xl.py +2 -1
  69. pyxllib-0.3.197.dist-info/METADATA +48 -0
  70. pyxllib-0.3.197.dist-info/RECORD +126 -0
  71. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
  72. pyxllib/ext/autogui/__init__.py +0 -8
  73. pyxllib-0.3.96.dist-info/METADATA +0 -51
  74. pyxllib-0.3.96.dist-info/RECORD +0 -333
  75. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  76. pyxlpr/ai/__init__.py +0 -5
  77. pyxlpr/ai/clientlib.py +0 -1281
  78. pyxlpr/ai/specialist.py +0 -286
  79. pyxlpr/ai/torch_app.py +0 -172
  80. pyxlpr/ai/xlpaddle.py +0 -655
  81. pyxlpr/ai/xltorch.py +0 -705
  82. pyxlpr/data/__init__.py +0 -11
  83. pyxlpr/data/coco.py +0 -1325
  84. pyxlpr/data/datacls.py +0 -365
  85. pyxlpr/data/datasets.py +0 -200
  86. pyxlpr/data/gptlib.py +0 -1291
  87. pyxlpr/data/icdar/__init__.py +0 -96
  88. pyxlpr/data/icdar/deteval.py +0 -377
  89. pyxlpr/data/icdar/icdar2013.py +0 -341
  90. pyxlpr/data/icdar/iou.py +0 -340
  91. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  92. pyxlpr/data/imtextline.py +0 -473
  93. pyxlpr/data/labelme.py +0 -866
  94. pyxlpr/data/removeline.py +0 -179
  95. pyxlpr/data/specialist.py +0 -57
  96. pyxlpr/eval/__init__.py +0 -85
  97. pyxlpr/paddleocr.py +0 -776
  98. pyxlpr/ppocr/__init__.py +0 -15
  99. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  100. pyxlpr/ppocr/data/__init__.py +0 -135
  101. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  102. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  103. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  104. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  105. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  106. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  107. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  108. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  109. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  110. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  111. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  112. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  113. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  114. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  115. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  116. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  117. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  118. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  119. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  120. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  121. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  122. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  123. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  124. pyxlpr/ppocr/losses/__init__.py +0 -61
  125. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  126. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  127. pyxlpr/ppocr/losses/center_loss.py +0 -88
  128. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  129. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  130. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  131. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  132. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  133. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  134. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  135. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  136. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  137. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  138. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  139. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  140. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  141. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  142. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  143. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  144. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  145. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  146. pyxlpr/ppocr/metrics/__init__.py +0 -44
  147. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  148. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  149. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  150. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  151. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  152. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  153. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  154. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  155. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  156. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  157. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  158. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  159. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  160. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  161. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  162. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  163. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  164. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  165. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  166. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  167. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  168. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  169. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  170. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  171. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  172. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  173. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  174. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  175. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  176. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  177. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  178. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  179. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  180. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  181. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  182. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  183. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  184. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  185. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  186. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  187. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  188. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  189. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  190. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  191. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  192. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  193. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  194. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  195. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  196. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  197. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  198. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  199. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  200. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  201. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  202. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  203. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  204. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  205. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  206. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  207. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  208. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  209. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  210. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  211. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  212. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  213. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  214. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  215. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  216. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  217. pyxlpr/ppocr/tools/__init__.py +0 -14
  218. pyxlpr/ppocr/tools/eval.py +0 -83
  219. pyxlpr/ppocr/tools/export_center.py +0 -77
  220. pyxlpr/ppocr/tools/export_model.py +0 -129
  221. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  222. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  223. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  224. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  225. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  226. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  227. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  228. pyxlpr/ppocr/tools/infer_det.py +0 -134
  229. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  230. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  231. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  232. pyxlpr/ppocr/tools/infer_table.py +0 -107
  233. pyxlpr/ppocr/tools/program.py +0 -596
  234. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  235. pyxlpr/ppocr/tools/train.py +0 -163
  236. pyxlpr/ppocr/tools/xlprog.py +0 -748
  237. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  238. pyxlpr/ppocr/utils/__init__.py +0 -24
  239. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  240. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  241. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  242. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  243. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  244. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  245. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  246. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  247. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  248. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  249. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  250. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  251. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  252. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  253. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  254. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  255. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  256. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  257. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  258. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  259. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  260. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  261. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  262. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  263. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  264. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  265. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  266. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  267. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  268. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  269. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  270. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  271. pyxlpr/ppocr/utils/dict90.txt +0 -90
  272. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  273. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  274. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  275. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  276. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  277. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  278. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  279. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  280. pyxlpr/ppocr/utils/gen_label.py +0 -81
  281. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  282. pyxlpr/ppocr/utils/iou.py +0 -54
  283. pyxlpr/ppocr/utils/logging.py +0 -69
  284. pyxlpr/ppocr/utils/network.py +0 -84
  285. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  286. pyxlpr/ppocr/utils/profiler.py +0 -110
  287. pyxlpr/ppocr/utils/save_load.py +0 -150
  288. pyxlpr/ppocr/utils/stats.py +0 -72
  289. pyxlpr/ppocr/utils/utility.py +0 -80
  290. pyxlpr/ppstructure/__init__.py +0 -13
  291. pyxlpr/ppstructure/predict_system.py +0 -187
  292. pyxlpr/ppstructure/table/__init__.py +0 -13
  293. pyxlpr/ppstructure/table/eval_table.py +0 -72
  294. pyxlpr/ppstructure/table/matcher.py +0 -192
  295. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  296. pyxlpr/ppstructure/table/predict_table.py +0 -221
  297. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  298. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  299. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  300. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  301. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  302. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  303. pyxlpr/ppstructure/utility.py +0 -71
  304. pyxlpr/xlai.py +0 -10
  305. /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
  306. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxlpr/data/coco.py DELETED
@@ -1,1325 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2021/01/20 14:51
6
-
7
- """
8
- 对pycocotools的一些功能整合、封装
9
-
10
- 代码中,gt指ground truth,真实标注
11
- dt指detection,模型检测出来的结果
12
-
13
- 除了 label.py 中定义的
14
- CocoGtData 专门处理 gt 格式数据
15
- CocoData 同时处理 gt dt 格式数据
16
- 这里对外有两个类
17
- CocoEval 计算coco指标
18
- CocoMatch 进行一些高级的结果分析
19
- 生成的结果可以用 xllabelme 打开 (pip install xllabelme)
20
- """
21
-
22
- from pyxllib.prog.pupil import check_install_package
23
-
24
- check_install_package('xlcocotools')
25
-
26
- from collections import ChainMap, defaultdict, Counter
27
- import copy
28
- import json
29
- import os
30
- import pathlib
31
- import random
32
- import sys
33
-
34
- import pandas as pd
35
- import PIL
36
- from tqdm import tqdm
37
-
38
- from pyxllib.file.packlib.zipfile import ZipFile
39
- from pyxllib.prog.newbie import round_int
40
- from pyxllib.prog.pupil import DictTool
41
- from pyxllib.prog.specialist import mtqdm
42
- from pyxllib.algo.pupil import Groups, make_index_function, matchpairs
43
- from pyxllib.algo.geo import rect_bounds, rect2polygon, reshape_coords, ltrb2xywh, xywh2ltrb, ComputeIou
44
- from pyxllib.algo.stat import write_dataframes_to_excel
45
- from pyxllib.file.specialist import PathGroups, XlPath
46
- from pyxllib.prog.specialist import get_xllog
47
- from pyxlpr.data.icdar import IcdarEval
48
- from pyxlpr.data.labelme import LABEL_COLORMAP7, ToLabelmeJson, LabelmeDataset, LabelmeDict
49
- from xlcocotools.coco import COCO
50
- from xlcocotools.cocoeval import COCOeval
51
-
52
-
53
- class CocoGtData:
54
- """ 类coco格式的json数据处理
55
-
56
- 不一定要跟coco gt结构完全相同,只要相似就行,
57
- 比如images、annotaions、categories都可以扩展自定义字段
58
- """
59
-
60
- def __init__(self, gt):
61
- self.gt_dict = gt if isinstance(gt, dict) else XlPath(gt).read_json()
62
-
63
- @classmethod
64
- def gen_image(cls, image_id, file_name, height=None, width=None, **kwargs):
65
- """ 初始化一个图片标注,使用位置参数,复用的时候可以节省代码量 """
66
-
67
- # 没输入height、width时会自动从file_name读取计算
68
- # 但千万注意,这里coco的file_name输入的是相对路径,并不一定在工作目录下能work,一般还是推荐自己输入height、width
69
- if height is None or width is None:
70
- width, height = PIL.Image.open(str(file_name)).size
71
-
72
- im = {'id': int(image_id), 'file_name': file_name,
73
- 'height': int(height), 'width': int(width)}
74
- if kwargs:
75
- im.update(kwargs)
76
- return im
77
-
78
- @classmethod
79
- def gen_images(cls, imdir, start_idx=1):
80
- """ 自动生成标准的images字段
81
-
82
- :param imdir: 图片目录
83
- :param start_idx: 图片起始下标
84
- :return: list[dict(id, file_name, width, height)]
85
- """
86
- # files = Dir(imdir).select_files(['*.jpg', '*.png'])
87
- files = XlPath(imdir).rglob_images()
88
- images = []
89
- for i, f in enumerate(files, start=start_idx):
90
- w, h = PIL.Image.open(str(f)).size
91
- images.append({'id': i, 'file_name': f.relpath(imdir).as_posix(),
92
- 'width': w, 'height': h})
93
- return images
94
-
95
- @classmethod
96
- def points2segmentation(cls, pts):
97
- """ labelme的points结构转segmentation分割结构
98
- """
99
- # 1 两个点要转4个点
100
- if len(pts) == 2:
101
- pts = rect2polygon(pts)
102
- else:
103
- pts = list(pts)
104
-
105
- # 2 点集要封闭,末尾要加上第0个点
106
- pts.append(pts[0])
107
-
108
- # 多边形因为要画出所有的点,还要封闭,数据有点多,还是只存整数节省空间
109
- pts = [round_int(v) for v in reshape_coords(pts, 1)]
110
-
111
- return pts
112
-
113
- @classmethod
114
- def gen_annotation(cls, **kwargs):
115
- """ 智能地生成一个annotation字典
116
-
117
- 这个略微有点过度封装了
118
- 但没事,先放着,可以不拿出来用~~
119
-
120
- :param points: 必须是n*2的结构
121
- """
122
- a = kwargs.copy()
123
-
124
- # a = {'id': 0, 'area': 0, 'bbox': [0, 0, 0, 0],
125
- # 'category_id': 1, 'image_id': 0, 'iscrowd': 0, 'segmentation': []}
126
-
127
- if 'points' in a: # points是一个特殊参数,使用“一个”多边形来标注(注意区别segmentation是多个多边形)
128
- if 'segmentation' not in a:
129
- a['segmentation'] = [cls.points2segmentation(a['points'])]
130
- del a['points']
131
- if 'bbox' not in a:
132
- pts = []
133
- for seg in a['segmentation']:
134
- pts += seg
135
- a['bbox'] = ltrb2xywh(rect_bounds(pts))
136
- if 'area' not in a: # 自动计算面积
137
- a['area'] = int(a['bbox'][2] * a['bbox'][3])
138
- for k in ['id', 'image_id']:
139
- if k not in a:
140
- a[k] = 0
141
- if 'category_id' not in a:
142
- a['category_id'] = 1
143
- if 'iscrowd' not in a:
144
- a['iscrowd'] = 0
145
-
146
- return a
147
-
148
- @classmethod
149
- def gen_quad_annotations(cls, file, *, image_id, start_box_id, category_id=1, **kwargs):
150
- """ 解析一张图片对应的txt标注文件
151
-
152
- :param file: 标注文件,有多行标注
153
- 每行是x1,y1,x2,y2,x3,y3,x4,y4[,label] (label可以不存在)
154
- :param image_id: 该图片id
155
- :param start_box_id: box_id起始编号
156
- :param category_id: 归属类别
157
- """
158
- lines = XlPath(file).read_text()
159
- box_id = start_box_id
160
- annotations = []
161
- for line in lines.splitlines():
162
- vals = line.split(',', maxsplit=8)
163
- if len(vals) < 2: continue
164
- attrs = {'id': box_id, 'image_id': image_id, 'category_id': category_id}
165
- if len(vals) == 9:
166
- attrs['label'] = vals[8]
167
- # print(vals)
168
- seg = [int(v) for v in vals[:8]]
169
- attrs['segmentation'] = [seg]
170
- attrs['bbox'] = ltrb2xywh(rect_bounds(seg))
171
- if kwargs:
172
- attrs.update(kwargs)
173
- annotations.append(cls.gen_annotation(**attrs))
174
- box_id += 1
175
- return annotations
176
-
177
- @classmethod
178
- def gen_categories(cls, cats):
179
- if isinstance(cats, list):
180
- # 如果输入是一个类别列表清单,则按1、2、3的顺序给其编号
181
- return [{'id': i, 'name': x, 'supercategory': ''} for i, x in enumerate(cats, start=1)]
182
- else:
183
- raise TypeError
184
-
185
- # TODO 扩展支持其他构造方法
186
-
187
- @classmethod
188
- def gen_gt_dict(cls, images, annotations, categories, outfile=None):
189
- data = {'images': images, 'annotations': annotations, 'categories': categories}
190
- if outfile is not None:
191
- XlPath(outfile).write_json(data)
192
- return data
193
-
194
- @classmethod
195
- def is_gt_dict(cls, gt_dict):
196
- if isinstance(gt_dict, (tuple, list)):
197
- return False
198
- has_keys = set('images annotations categories'.split())
199
- return not (has_keys - gt_dict.keys())
200
-
201
- def clear_gt_segmentation(self, *, inplace=False):
202
- """ 有的coco json文件太大,如果只做普通的bbox检测任务,可以把segmentation的值删掉
203
- """
204
- gt_dict = self.gt_dict if inplace else copy.deepcopy(self.gt_dict)
205
- for an in gt_dict['annotations']:
206
- an['segmentation'] = []
207
- return gt_dict
208
-
209
- def get_catname_func(self):
210
- id2name = {x['id']: x['name'] for x in self.gt_dict['categories']}
211
-
212
- def warpper(cat_id, default=...):
213
- """
214
- :param cat_id:
215
- :param default: 没匹配到的默认值
216
- ... 不是默认值,而是代表匹配不到直接报错
217
- :return:
218
- """
219
- if cat_id in id2name:
220
- return id2name[cat_id]
221
- else:
222
- if default is ...:
223
- raise IndexError(f'{cat_id}')
224
- else:
225
- return default
226
-
227
- return warpper
228
-
229
- def _group_base(self, group_anns, reserve_empty=False):
230
- if reserve_empty:
231
- for im in self.gt_dict['images']:
232
- yield im, group_anns.get(im['id'], [])
233
- else:
234
- id2im = {im['id']: im for im in self.gt_dict['images']}
235
- for k, v in group_anns.items():
236
- yield id2im[k], v
237
-
238
- def group_gt(self, *, reserve_empty=False):
239
- """ 遍历gt的每一张图片的标注
240
-
241
- 这个是用字典的方式来实现分组,没用 df.groupby 的功能
242
-
243
- :param reserve_empty: 是否保留空im对应的结果
244
-
245
- :return: [(im, annos), ...] 每一组是im标注和对应的一组annos标注
246
- """
247
- group_anns = defaultdict(list)
248
- [group_anns[an['image_id']].append(an) for an in self.gt_dict['annotations']]
249
- return self._group_base(group_anns, reserve_empty)
250
-
251
- def select_gt(self, ids, *, inplace=False):
252
- """ 删除一些images标注(会删除对应的annotations),挑选数据,或者减小json大小
253
-
254
- :param ids: int类型表示保留的图片id,str类型表示保留的图片名,可以混合使用
255
- [341427, 'PMC4055390_00006.jpg', ...]
256
- :return: 筛选出的新字典
257
- """
258
- gt_dict = self.gt_dict
259
- # 1 ids 统一为int类型的id值
260
- if not isinstance(ids, (list, tuple, set)):
261
- ids = [ids]
262
- map_name2id = {item['file_name']: item['id'] for item in gt_dict['images']}
263
- ids = set([(map_name2id[x] if isinstance(x, str) else x) for x in ids])
264
-
265
- # 2 简化images和annotations
266
- dst = {'images': [x for x in gt_dict['images'] if (x['id'] in ids)],
267
- 'annotations': [x for x in gt_dict['annotations'] if (x['image_id'] in ids)],
268
- 'categories': gt_dict['categories']}
269
- if inplace: self.gt_dict = dst
270
- return dst
271
-
272
- def random_select_gt(self, number=20, *, inplace=False):
273
- """ 从gt中随机抽出number个数据 """
274
- ids = [x['id'] for x in self.gt_dict['images']]
275
- random.shuffle(ids)
276
- gt_dict = self.select_gt(ids[:number])
277
- if inplace: self.gt_dict = gt_dict
278
- return gt_dict
279
-
280
- def select_gt_by_imdir(self, imdir, *, inplace=False):
281
- """ 基于imdir目录下的图片来过滤src_json """
282
- # 1 对比下差异
283
- json_images = set([x['file_name'] for x in self.gt_dict['images']])
284
- dir_images = set(os.listdir(str(imdir)))
285
-
286
- # df = SetCmper({'json_images': json_images, 'dir_images': dir_images}).intersection()
287
- # print('json_images intersection dir_images:')
288
- # print(df)
289
-
290
- # 2 精简json
291
- gt_dict = self.select_gt(json_images & dir_images)
292
- if inplace: self.gt_dict = gt_dict
293
- return gt_dict
294
-
295
- def reset_image_id(self, start=1, *, inplace=False):
296
- """ 按images顺序对图片重编号 """
297
- gt_dict = self.gt_dict if inplace else copy.deepcopy(self.gt_dict)
298
- # 1 重置 images 的 id
299
- old2new = {}
300
- for i, im in enumerate(gt_dict['images'], start=start):
301
- old2new[im['id']] = i
302
- im['id'] = i
303
-
304
- # 2 重置 annotations 的 id
305
- for anno in gt_dict['annotations']:
306
- anno['image_id'] = old2new[anno['image_id']]
307
-
308
- return gt_dict
309
-
310
- def reset_box_id(self, start=1, *, inplace=False):
311
- anns = self.gt_dict['annotations']
312
- if not inplace:
313
- anns = copy.deepcopy(anns)
314
-
315
- for i, anno in enumerate(anns, start=start):
316
- anno['id'] = i
317
- return anns
318
-
319
- def to_labelme_cls(self, root, *, bbox=True, seg=False, info=False, seg_color=(191, 191, 191)):
320
- """
321
- :param root: 图片根目录
322
- :param tuple|int seg_color: 可以输入-1,表示随机颜色
323
- :return:
324
- extdata,存储了一些匹配异常信息
325
- """
326
- root, data = XlPath(root), {}
327
- catid2name = {x['id']: x['name'] for x in self.gt_dict['categories']}
328
-
329
- # 0 工具函数
330
- def get_seg_color():
331
- if seg_color == -1:
332
- return [random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)]
333
- else:
334
- return seg_color
335
-
336
- # 1 准备工作,构建文件名索引字典
337
- gs = PathGroups.groupby([x for x in root.rglob_files()])
338
-
339
- # 2 遍历生成labelme数据
340
- not_finds = set() # coco里有的图片,root里没有找到
341
- multimatch = dict() # coco里的某张图片,在root找到多个匹配文件
342
- for img, anns in tqdm(self.group_gt(reserve_empty=True), disable=not info):
343
- # 2.1 文件匹配
344
- imfiles = gs.find_files(img['file_name'])
345
- if not imfiles: # 没有匹配图片的,不处理
346
- not_finds.add(img['file_name'])
347
- continue
348
- elif len(imfiles) > 1:
349
- multimatch[img['file_name']] = imfiles
350
- imfile = imfiles[0]
351
- else:
352
- imfile = imfiles[0]
353
-
354
- # 2.2 数据内容转换
355
- lmdict = LabelmeDict.gen_data(imfile)
356
- img = DictTool.or_(img, {'xltype': 'image'})
357
- lmdict['shapes'].append(LabelmeDict.gen_shape(json.dumps(img, ensure_ascii=False), [[-10, 0], [-5, 0]]))
358
- for ann in anns:
359
- ann = DictTool.or_(ann, {'category_name': catid2name[ann['category_id']]})
360
-
361
- if bbox:
362
- label = json.dumps(ann, ensure_ascii=False)
363
- shape = LabelmeDict.gen_shape(label, xywh2ltrb(ann['bbox']))
364
- if not seg:
365
- lmdict['shapes'].append(shape)
366
- if seg:
367
- for x in ann['segmentation']:
368
- if bbox:
369
- # 把分割也显示出来(用灰色)
370
- an = {'box_id': ann['id'], 'xltype': 'seg', 'shape_color': get_seg_color()}
371
- for k in ['text']:
372
- if k in ann:
373
- an[k] = ann[k]
374
- label = json.dumps(an, ensure_ascii=False)
375
- lmdict['shapes'].append(LabelmeDict.gen_shape(label, x))
376
- else:
377
- # 如果关闭了bbox的显示,这里segmentation要特殊处理
378
- an = {'box_id': ann['id'], 'xltype': 'seg'}
379
- label = json.dumps(an, ensure_ascii=False)
380
- lmdict['shapes'].append(LabelmeDict.gen_shape(label, x))
381
-
382
- f = imfile.with_suffix('.json')
383
-
384
- data[f.relpath(root)] = lmdict
385
-
386
- return LabelmeDataset(root, data, extdata={'categories': self.gt_dict['categories'], 'not_finds': not_finds,
387
- 'multimatch': Groups(multimatch)})
388
-
389
- def to_labelme(self, root, *, bbox=True, seg=False, info=False):
390
- if not bbox and seg:
391
- seg_color = -1
392
- else:
393
- seg_color = (191, 191, 191)
394
- self.to_labelme_cls(root, bbox=bbox, seg=seg, info=info, seg_color=seg_color).writes()
395
-
396
- def split_data(self, parts, *, shuffle=True):
397
- """ 数据拆分器
398
-
399
- :param dict parts: 每个部分要拆分、写入的文件名,以及数据比例
400
- py≥3.6的版本中,dict的key是有序的,会按顺序处理开发者输入的清单
401
- 这里比例求和可以不满1,但不能超过1
402
- :param bool shuffle: 是否打乱原有images顺序
403
- :return: 同parts的字典,但值变成了拆分后的coco数据
404
- """
405
- # 1 读入data
406
- assert sum(parts.values()) <= 1, '比例和不能超过1'
407
- data = self.gt_dict
408
- if shuffle:
409
- data = data.copy()
410
- data['images'] = data['images'].copy()
411
- random.shuffle(data['images'])
412
-
413
- # 2 生成每一个部分的文件
414
- def select_annotations(annotations, image_ids):
415
- # 简单的for循环和if操作,可以用“列表推导式”写
416
- return [an for an in annotations if (an['image_id'] in image_ids)]
417
-
418
- res = {}
419
- total_num, used_rate = len(data['images']), 0
420
- for k, v in parts.items():
421
- # 2.1 选择子集图片
422
- images = data['images'][int(used_rate * total_num):int((used_rate + v) * total_num)]
423
- image_ids = {im['id'] for im in images}
424
-
425
- # 2.2 生成新的字典
426
- res[k] = {'images': images,
427
- 'annotations': select_annotations(data['annotations'], image_ids),
428
- 'categories': data['categories']}
429
-
430
- # 2.4 更新使用率
431
- used_rate += v
432
- return res
433
-
434
-
435
- class CocoData(CocoGtData):
436
- """ 这个类可以封装一些需要gt和dt衔接的功能 """
437
-
438
- def __init__(self, gt, dt=None, *, min_score=0):
439
- """
440
- :param gt: gt的dict或文件
441
- gt是必须传入的,可以只传入gt
442
- 有些任务理论上可以只有dt,但把配套的gt传入,能做更多事
443
- :param dt: dt的list或文件
444
- :param min_score: CocoMatch这个系列的类,初始化增加min_score参数,支持直接滤除dt低置信度的框
445
- """
446
- super().__init__(gt)
447
-
448
- def get_dt_list(dt, min_score=0):
449
- # dt
450
- default_dt = []
451
- # default_dt = [{'image_id': self.gt_dict['images'][0]['id'],
452
- # 'category_id': self.gt_dict['categories'][0]['id'],
453
- # 'bbox': [0, 0, 1, 1],
454
- # 'score': 1}]
455
- # 这样直接填id有很大的风险,可能会报错。但是要正确填就需要gt的信息,传参麻烦~~
456
- # default_dt = [{'image_id': 1, 'category_id': 1, 'bbox': [0, 0, 1, 1], 'score': 1}]
457
-
458
- if not dt:
459
- dt_list = default_dt
460
- else:
461
- dt_list = dt if isinstance(dt, (list, tuple)) else XlPath(dt).read_json()
462
- if min_score:
463
- dt_list = [b for b in dt_list if (b['score'] >= min_score)]
464
- if not dt_list:
465
- dt_list = default_dt
466
- return dt_list
467
-
468
- self.dt_list = get_dt_list(dt, min_score)
469
-
470
- @classmethod
471
- def is_dt_list(cls, dt_list):
472
- if not isinstance(dt_list, (tuple, list)):
473
- return False
474
- item = dt_list[0]
475
- has_keys = set('score image_id category_id bbox'.split())
476
- return not (has_keys - item.keys())
477
-
478
- def select_dt(self, ids, *, inplace=False):
479
- gt_dict, dt_list = self.gt_dict, self.dt_list
480
- # 1 ids 统一为int类型的id值
481
- if not isinstance(ids, (list, tuple, set)):
482
- ids = [ids]
483
- if gt_dict:
484
- map_name2id = {item['file_name']: item['id'] for item in gt_dict['images']}
485
- ids = [(map_name2id[x] if isinstance(x, str) else x) for x in ids]
486
- ids = set(ids)
487
-
488
- # 2 简化images
489
- dst = [x for x in dt_list if (x['image_id'] in ids)]
490
- if inplace: self.dt_list = dst
491
- return dst
492
-
493
- def group_dt(self, *, reserve_empty=False):
494
- """ 对annos按image_id分组,返回 [(im1, dt_anns1), (im2, dt_anns2), ...] """
495
- group_anns = defaultdict(list)
496
- [group_anns[an['image_id']].append(an) for an in self.dt_list]
497
- return self._group_base(group_anns, reserve_empty)
498
-
499
- def group_gt_dt(self, *, reserve_empty=False):
500
- """ 获得一张图片上gt和dt的标注结果
501
-
502
- [(im, gt_anns, dt_anns), ...]
503
- """
504
- raise NotImplementedError
505
-
506
- def to_icdar_label_quad(self, outfile, *, min_score=0):
507
- """ 将coco的dt结果转为icdar的标注格式
508
-
509
- 存成一个zip文件,zip里面每张图对应一个txt标注文件
510
- 每个txt文件用quad八个数值代表一个标注框
511
-
512
- 适用于 sroie 检测格式
513
- """
514
- # 1 获取dt_list
515
- if min_score:
516
- dt_list = [b for b in self.dt_list if (b['score'] >= min_score)]
517
- else:
518
- dt_list = self.dt_list
519
-
520
- # 2 转df,按图片分组处理
521
- df = pd.DataFrame.from_dict(dt_list) # noqa from_dict可以传入List[Dict]
522
- df = df.groupby('image_id')
523
-
524
- # 3 建立一个zip文件
525
- myzip = ZipFile(str(outfile), 'w')
526
-
527
- # 4 遍历每一组数据,生成一个文件放到zip里面
528
- id2name = {im['id']: pathlib.Path(im['file_name']).stem for im in self.gt_dict['images']}
529
- for image_id, items in df:
530
- label_file = id2name[image_id] + '.txt'
531
- quads = [rect2polygon(xywh2ltrb(x), dtype=int).reshape(-1) for x in items['bbox']]
532
- quads = [','.join(map(str, x)) for x in quads]
533
- myzip.writestr(label_file, '\n'.join(quads))
534
- myzip.close()
535
-
536
-
537
- class Coco2Labelme(ToLabelmeJson):
538
- """ coco格式的可视化
539
-
540
- TODO segmentation 分割 效果的可视化
541
- """
542
-
543
- def add_segmentation(self, row):
544
- """ 分割默认先都用灰色标注 """
545
- r = dict()
546
- r['gt_box_id'] = row['gt_box_id']
547
- r['label'] = 'seg'
548
- r['points'] = row['gt_ltrb']
549
- r['shape_color'] = [191, 191, 191]
550
-
551
- # 5 保存
552
- self.add_shape2(**r)
553
-
554
- # def _sort_anns(self, anns):
555
- # if anns and 'score' in anns[0]:
556
- # anns = sorted(anns, key=lambda x: -x['score']) # 权重从大到小排序
557
- # return anns
558
-
559
- def add_gt_shape(self, row, attrs=None):
560
- """
561
- :param row: df的一行数据series
562
- :param attrs: 其他扩展字段值
563
- """
564
- # 1 基本字段
565
- r = dict()
566
- for name in ['gt_box_id', 'gt_category_id', 'gt_area']:
567
- r[name] = row[name]
568
- r['gt_ltrb'] = ','.join(map(str, row['gt_ltrb']))
569
-
570
- # 2 主要字段
571
- r['label'] = row['gt_category_name'] # 这个需要上层的anns_match2, labelme_match传入的df实现提供这个字段
572
- r['points'] = row['gt_ltrb']
573
- if row['gt_supercategory'] != '':
574
- r['group_id'] = row['gt_supercategory']
575
-
576
- # 3 row中其他自定义字段
577
- # 这些是已经处理过的标准字段,进入黑名单,不显示;其他字段默认白名单都显示
578
- std_an_keys = set('gt_box_id gt_category_id gt_ltrb gt_area iscrowd file_name '
579
- 'gt_category_name gt_supercategory gt_segmentation dt_segmentation'.split())
580
-
581
- # 如果跟labelme的标准字段重名了,需要区分下:比如 label
582
- std_lm_keys = set('label points group_id shape_type flags'.split()) # labelme的标准字段
583
- ks = set(row.index) - std_an_keys
584
- for k in ks:
585
- if k in std_lm_keys:
586
- r['_' + k] = row[k]
587
- else:
588
- r[k] = row[k]
589
- if 'dt_ltrb' in r:
590
- r['dt_ltrb'] = ','.join(map(str, r['dt_ltrb']))
591
-
592
- # 4 精简字段:聚合以dt、gt为前缀的所有字段
593
- group_keys = defaultdict(list)
594
- res = dict()
595
- for k, v in r.items():
596
- for part in ('dt', 'gt'):
597
- if k.startswith(part + '_'):
598
- group_keys[part].append(k)
599
- break
600
- else:
601
- res[k] = v
602
-
603
- # 聚合后的属性排序准则
604
- order = ['category_id', 'category_name', 'score', 'ltrb', 'area', 'box_id']
605
- idxfunc = make_index_function(order)
606
- for part in ('dt', 'gt'):
607
- keys = group_keys[part]
608
- m = len(part) + 1
609
- keys.sort(key=lambda k: idxfunc(k[m:]))
610
- res[part] = '/'.join([str(r[k]) for k in keys]) # 数值拼接
611
- res['~' + part] = '/'.join([str(k[m:]) for k in keys]) # 解释key,如果很熟悉了可以选择关闭
612
-
613
- # 5 扩展字段
614
- if attrs:
615
- res.update(attrs)
616
-
617
- # 6 保存
618
- self.add_shape2(**res)
619
-
620
- def add_dt_shape(self, row, attrs=None):
621
- # 1 基本字段
622
- r = dict()
623
- for name in ['iou', 'dt_category_id', 'dt_score']:
624
- r[name] = row[name]
625
- r['dt_ltrb'] = ','.join(map(str, row['dt_ltrb']))
626
-
627
- # 2 主要字段
628
- r['label'] = row['dt_category_name']
629
- if 'dt_segmentation' in row:
630
- r['points'] = row['dt_segmentation'][0]
631
- else:
632
- r['points'] = row['dt_ltrb']
633
-
634
- # 3 扩展字段
635
- if attrs:
636
- r.update(attrs)
637
-
638
- # 4 保存
639
- self.add_shape2(**r)
640
-
641
- def _anns_init(self, df, segmentation=False):
642
- df = df.copy()
643
- df.drop(['image_id'], axis=1, inplace=True)
644
-
645
- columns = df.columns
646
- if segmentation:
647
- pass
648
- else:
649
- if 'gt_segmentation' in columns:
650
- df.drop('gt_segmentation', axis=1, inplace=True)
651
- if 'dt_segmentation' in columns:
652
- df.drop('dt_segmentation', axis=1, inplace=True)
653
-
654
- return df
655
-
656
- def anns_gt(self, df, *, segmentation=False, shape_attrs=None):
657
- """ Coco2Df.gt的可视化
658
-
659
- :param df: Coco2Df生成的df后,输入特定的某一组image_id、file_name
660
- :param segmentation: 是否显示segmentation分割效果
661
- :param shape_attrs: 人工额外强制设置的字段值
662
- """
663
- df = self._anns_init(df, segmentation)
664
- for idx, row in df.iterrows():
665
- if segmentation:
666
- self.add_segmentation(row)
667
- self.add_gt_shape(row, shape_attrs)
668
-
669
- def anns_match(self, df, *, hide_match_dt=False, segmentation=False, shape_attrs=None):
670
- """ Coco2Df.match的可视化
671
-
672
- 正确的gt用绿框,位置匹配到但类别错误的用黄框,绿黄根据iou设置颜色深浅,此时dt统一用灰色框
673
- 漏检的gt用红框,多余的dt用蓝框
674
-
675
- :param hide_match_dt: 不显示灰色的dt框
676
-
677
- TODO 研究labelme shape的flags参数含义,支持shape的过滤显示?
678
- """
679
- df = self._anns_init(df, segmentation)
680
- if not shape_attrs:
681
- shape_attrs = {}
682
-
683
- def get_attrs(d):
684
- return dict(ChainMap(shape_attrs, d))
685
-
686
- for idx, row in df.iterrows():
687
- r = row
688
- if r['gt_category_id'] == -1: # 多余的dt
689
- self.add_dt_shape(r, get_attrs({'shape_color': [0, 0, 255]}))
690
- elif r['dt_category_id'] == -1: # 没有被匹配到的gt
691
- self.add_gt_shape(r, get_attrs({'shape_color': [255, 0, 0]}))
692
- else: # 匹配到的gt和dt
693
- if not hide_match_dt:
694
- self.add_dt_shape(r, get_attrs({'shape_color': [191, 191, 191]}))
695
- color_value = int(255 * r['iou'])
696
-
697
- if r['gt_category_id'] == r['dt_category_id']:
698
- self.add_gt_shape(r, get_attrs({'shape_color': [0, color_value, 0]}))
699
- else:
700
- self.add_gt_shape(r, get_attrs({'shape_color': [color_value, color_value, 0]}))
701
-
702
- def anns_match2(self, df, *, hide_match_dt=False, segmentation=False, shape_attrs=None, colormap=None):
703
- """ 按类别区分框颜色
704
- """
705
- import imgviz
706
-
707
- df = self._anns_init(df, segmentation)
708
- if not shape_attrs:
709
- shape_attrs = {}
710
-
711
- def get_attrs(d):
712
- return dict(ChainMap(shape_attrs, d))
713
-
714
- if not colormap:
715
- colormap = imgviz.label_colormap(value=200)
716
- m = len(colormap)
717
-
718
- for idx, row in df.iterrows():
719
- r = row
720
- attrs = {'shape_color': colormap[r['gt_category_id'] % m],
721
- 'vertex_fill_color': colormap[r['dt_category_id'] % m]}
722
-
723
- if r['gt_category_id'] == -1: # 多余的dt
724
- self.add_dt_shape(r, get_attrs(attrs))
725
- elif r['dt_category_id'] == -1: # 没有被匹配到的gt
726
- self.add_gt_shape(r, get_attrs(attrs))
727
- else: # 匹配到的gt和dt
728
- if not hide_match_dt:
729
- self.add_dt_shape(r, get_attrs({'shape_color': [191, 191, 191]}))
730
- attrs['vertex_fill_color'] = [int(r['iou'] * v) for v in attrs['vertex_fill_color']]
731
- self.add_gt_shape(r, get_attrs(attrs))
732
-
733
-
734
- class CocoEval(CocoData):
735
- def __init__(self, gt, dt, iou_type='bbox', *, min_score=0, print_mode=False):
736
- """
737
- TODO coco_gt、coco_dt本来已存储了很多标注信息,有些冗余了,是否可以跟gt_dict、dt_list等整合,去掉些没必要的组件?
738
- """
739
- super().__init__(gt, dt, min_score=min_score)
740
-
741
- # type
742
- self.iou_type = iou_type
743
-
744
- # evaluater
745
- self.coco_gt = COCO(gt, print_mode=print_mode) # 这不需要按图片、类型分类处理
746
- self.coco_dt, self.evaluater = None, None
747
- if self.dt_list:
748
- self.coco_dt = self.coco_gt.loadRes(self.dt_list) # 这个返回也是coco对象
749
- self.evaluater = COCOeval(self.coco_gt, self.coco_dt, iou_type, print_mode=print_mode)
750
-
751
- @classmethod
752
- def evaluater_eval(cls, et, img_ids=None, *, print_mode=False):
753
- """ coco官方目标检测测评方法
754
- https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
755
-
756
- :param img_ids:
757
- :param print_mode: 注意这里的print_mode不同于初始化的print_mode,指的是不同的东西
758
- :return:
759
- """
760
- # 1 coco是有方法支持过滤id,只计算部分图的分值结果
761
- # 没有输入img_ids,也要显式重置为全部数据
762
- if not img_ids:
763
- img_ids = et.cocoGt.imgIds.values()
764
- et.params.imgIds = list(img_ids)
765
-
766
- # 2 每张图片、每个类别的iou等核心数据的计算
767
- et.evaluate()
768
- # 在不同参数测评指标下的分数
769
- et.accumulate()
770
-
771
- # 3 显示结果
772
- if print_mode: # 如果要显示结果则使用标准计算策略
773
- et.summarize(print_mode=print_mode)
774
- return round(et.stats[0], 4)
775
- else: # 否则简化计算过程
776
- return round(et.step_summarize(), 4)
777
-
778
- def eval(self, img_ids=None, *, print_mode=False):
779
- return self.evaluater_eval(self.evaluater, img_ids=img_ids, print_mode=print_mode)
780
-
781
- def eval_dt_score(self, step=0.1):
782
- """ 计算按一定阈值滤除框后,对coco指标产生的影响 """
783
- dt_list = copy.copy(self.dt_list)
784
-
785
- i = 0
786
- records = []
787
- columns = ['≥dt_score', 'n_dt_box', 'coco_score']
788
- while i < 1:
789
- dt_list = [x for x in dt_list if x['score'] >= i]
790
- if not dt_list: break
791
- coco_dt = self.coco_gt.loadRes(dt_list)
792
- evaluater = COCOeval(self.coco_gt, coco_dt, self.iou_type)
793
- records.append([i, len(dt_list), self.evaluater_eval(evaluater)])
794
- i += step
795
- df = pd.DataFrame.from_records(records, columns=columns)
796
- return df
797
-
798
- def parse_dt_score(self, step=0.1, *, print_mode=False):
799
- """ dt按不同score过滤后效果
800
-
801
- 注意如果数据集很大,这个功能运算特别慢,目前测试仅20张图都要10秒
802
- 可以把print_mode=True打开观察中间结果
803
-
804
- 注意这个方法,需要调用后面的 CocoMatch
805
- """
806
- gt_dict, dt_list = self.gt_dict, self.dt_list
807
-
808
- i = 0
809
- records = []
810
- columns = ['≥dt_score', 'n_dt_box', 'n_match_box', 'n_matchcat_box',
811
- 'coco_score',
812
- 'icdar2013', 'ic13_precision', 'ic13_recall',
813
- 'f1_score']
814
- if print_mode: print(columns)
815
- while i < 1:
816
- dt_list = [x for x in dt_list if x['score'] >= i]
817
- if not dt_list: break
818
- cm = CocoMatch(gt_dict, dt_list, eval_im=False)
819
-
820
- ie = IcdarEval(*cm.to_icdareval_data())
821
- ic13 = ie.icdar2013()
822
-
823
- row = [i, cm.n_dt_box(), cm.n_match_box(), cm.n_matchcat_box(),
824
- cm.eval(), ic13['hmean'], ic13['precision'], ic13['recall'], cm.f1_score()]
825
-
826
- if print_mode: print(row)
827
- records.append(row)
828
- i += step
829
- df = pd.DataFrame.from_records(records, columns=columns)
830
-
831
- if print_mode:
832
- with pd.option_context('display.max_colwidth', -1, 'display.max_columns', 20,
833
- 'display.width', 200): # 上下文控制格式
834
- print(df)
835
-
836
- return df
837
-
838
-
839
- class CocoParser(CocoEval):
840
- def __init__(self, gt, dt=None, iou_type='bbox', *, min_score=0, print_mode=False):
841
- """ coco格式相关分析工具,dt不输入也行,当做没有任何识别结果处理~~
842
- 相比CocoMatch比较轻量级,不会初始化太久,但提供了一些常用的基础功能
843
- """
844
- super().__init__(gt, dt, iou_type, min_score=min_score, print_mode=print_mode)
845
- # gt里的images、categories数据,已转成df表格格式
846
- self.images, self.categories = self._get_images_df(), self._get_categories_df()
847
- # gt、dt的统计表
848
- self.gt_anns, self.dt_anns = self._get_gt_anns_df(), self._get_dt_anns_df()
849
-
850
- @classmethod
851
- def bbox2ltrb(cls, b):
852
- return [int(round(v, 0)) for v in xywh2ltrb(b)]
853
-
854
- def _get_images_df(self):
855
- """ 从gt['images']转df
856
- """
857
- df = pd.DataFrame.from_dict(self.gt_dict['images'])
858
- df.rename(columns={'id': 'image_id'}, inplace=True)
859
- df.set_index('image_id', inplace=True)
860
- return df
861
-
862
- def _get_categories_df(self):
863
- """ 把gt['categories']转df
864
- """
865
- df = pd.DataFrame.from_dict(self.gt_dict['categories'])
866
- df.rename(columns={'id': 'category_id'}, inplace=True)
867
- df.set_index('category_id', inplace=True)
868
- return df
869
-
870
- def _get_gt_anns_df(self):
871
- """ 输入gt的json文件或字典,转df格式
872
-
873
- # TODO 暂时没考虑iscrowd=1的情况,先不处理这个字段
874
- """
875
-
876
- # 1 读取数据,转字典
877
- df = pd.DataFrame.from_dict(self.gt_dict['annotations'])
878
-
879
- # 2 构建完整表格信息
880
- df['gt_ltrb'] = [self.bbox2ltrb(b) for b in df['bbox']]
881
- df['area'] = [int(round(v, 0)) for v in df['area']]
882
- df.rename(columns={'id': 'gt_box_id', 'category_id': 'gt_category_id',
883
- 'area': 'gt_area', 'segmentation': 'gt_segmentation'}, inplace=True)
884
-
885
- # 3 筛选最终使用的表格及顺序
886
- columns = ['image_id', 'gt_box_id', 'gt_category_id', 'gt_ltrb', 'gt_area', 'gt_segmentation']
887
- ext = set(df.columns) - set(columns + ['bbox']) # 扩展字段
888
- columns += list(ext)
889
- return df[columns]
890
-
891
- def _get_dt_anns_df(self):
892
- # 1 读取数据,转列表
893
- df = pd.DataFrame.from_dict(self.dt_list) # noqa
894
-
895
- # 2 构建完整表格信息
896
- columns = ['image_id', 'dt_category_id', 'dt_ltrb', 'dt_score']
897
- if len(df) > 0:
898
- df['dt_ltrb'] = [self.bbox2ltrb(b) for b in df['bbox']]
899
- df['dt_score'] = [round(v, 4) for v in df['score']]
900
- df['dt_segmentation'] = df['segmentation'] # 就算构建的时候没有segmentation字段,xlcocotools也会自动添加生成的
901
- df.rename(columns={'category_id': 'dt_category_id'}, inplace=True)
902
- # 3 筛选最终使用的表格及顺序
903
- ext = set(df.columns) - set(columns + ['bbox', 'score', 'category_id', 'segmentation']) # 扩展字段
904
- columns += list(ext)
905
- return df[columns]
906
- else:
907
- return pd.DataFrame(columns=columns)
908
-
909
- def to_icdareval_data(self, *, min_score=0.):
910
- """ 转成可供IcdarEval测评的数据格式
911
-
912
- :param min_score: dt框至少需要的score置信度,可以按0.5过滤掉低置信度的框再计算
913
- :return: gt, dt
914
- 两个数据,一个gt, 一个dt
915
- """
916
- # 1 gt的格式转换
917
- res = defaultdict(list)
918
- for item in self.gt_dict['annotations']:
919
- ltrb = self.bbox2ltrb(item['bbox'])
920
- # gt的label需要加上一个文字内容标注,这里用iscrowd代替
921
- label = ','.join(map(lambda x: str(round(x)), ltrb + [item['iscrowd']]))
922
- # 除了"图片",还要区分"类别"
923
- res[f"{item['image_id']},{item['category_id']}"].append(label)
924
- gt = {k: '\n'.join(v).encode() for k, v in res.items()}
925
-
926
- # 2 dt的格式转换
927
- res = defaultdict(list)
928
- for item in self.dt_list:
929
- if item['score'] < min_score: continue
930
- ltrb = self.bbox2ltrb(item['bbox'])
931
- label = ','.join(map(lambda x: str(round(x)), ltrb))
932
- res[f"{item['image_id']},{item['category_id']}"].append(label)
933
- dt = {k: '\n'.join(v).encode() for k, v in res.items()}
934
-
935
- return gt, dt
936
-
937
- def icdar2013(self):
938
- ie = IcdarEval(*self.to_icdareval_data())
939
- return ie.icdar2013()['hmean']
940
-
941
- def to_excel(self, savepath, segmentation=False):
942
- """ 将所有统计表导入到一个excel文件
943
-
944
- :param savepath: 保存的文件名
945
- :param segmentation: 是否保存segmentation的值
946
- """
947
- with pd.ExcelWriter(str(savepath)) as writer:
948
- self.images.to_excel(writer, sheet_name='images', freeze_panes=(1, 0))
949
- self.categories.to_excel(writer, sheet_name='categories', freeze_panes=(1, 0))
950
- gt_anns = self.gt_anns
951
- if not segmentation: gt_anns = gt_anns.drop('gt_segmentation', axis=1)
952
- gt_anns.to_excel(writer, sheet_name='gt_anns', freeze_panes=(1, 0))
953
- self.dt_anns.to_excel(writer, sheet_name='dt_anns', freeze_panes=(1, 0))
954
-
955
- def to_labelme_gt(self, imdir, dst_dir=None, *, segmentation=False, max_workers=4):
956
- """ 在图片目录里生成图片的可视化json配置文件
957
-
958
- :param segmentation: 是否显示分割效果
959
- """
960
-
961
- def func(g):
962
- # 1 获得图片id和文件
963
- image_id, df = g
964
- imfile = XlPath(imdir) / df.iloc[0]['file_name']
965
- if not imfile:
966
- return # 如果没有图片不处理
967
-
968
- # 2 生成这张图片对应的json标注
969
- if dst_dir:
970
- imfile = imfile.copy(dst_dir, if_exists='skip')
971
- lm = Coco2Labelme(imfile)
972
- width, height = lm.img.size # 也可以用image['height'], image['width']获取
973
- # 注意df取出来的image_id默认是int64类型,要转成int,否则json会保存不了int64类型
974
- lm.add_shape('', [0, 0, 10, 0], shape_type='line', shape_color=[0, 0, 0],
975
- n_gt_box=len(df), image_id=int(image_id),
976
- size=f'{height}x{width}')
977
- lm.anns_gt(df, segmentation=segmentation)
978
- lm.write() # 保存json文件到img对应目录下
979
-
980
- if dst_dir:
981
- os.makedirs(dst_dir, exist_ok=True)
982
- gt_anns = self.gt_anns.copy()
983
- # 为了方便labelme操作,需要扩展几列内容
984
- gt_anns['file_name'] = [self.images.loc[x, 'file_name'] for x in gt_anns['image_id']]
985
- gt_anns['gt_category_name'] = [self.categories.loc[x, 'name'] for x in gt_anns['gt_category_id']]
986
- gt_anns['gt_supercategory'] = [self.categories.loc[x, 'supercategory'] for x in gt_anns['gt_category_id']]
987
- mtqdm(func, list(gt_anns.groupby('image_id').__iter__()), 'create labelme gt jsons', max_workers=max_workers)
988
-
989
-
990
- class CocoMatchBase:
991
- def __init__(self, match_df):
992
- """ match_df匹配表格相关算法
993
-
994
- 这个类是算法本质,而CocoMatch做了一层封装,默认对整个match_df进行处理。
995
- 这个底层类可以用来计算每张图的具体情况
996
- """
997
- self.match_anns = match_df
998
-
999
- def n_gt_box(self):
1000
- return sum([x != -1 for x in self.match_anns['gt_category_id']])
1001
-
1002
- def n_dt_box(self):
1003
- return sum([x != -1 for x in self.match_anns['dt_category_id']])
1004
-
1005
- def n_match_box(self, iou=0.5):
1006
- """ 不小于iou的框匹配到的数量 """
1007
- return sum(self.match_anns['iou'] >= iou)
1008
-
1009
- def n_matchcat_box(self, iou=0.5):
1010
- """ 不仅框匹配到,类别也对应的数量 """
1011
- df = self.match_anns
1012
- return sum(((df['iou'] >= iou) & (df['gt_category_id'].eq(df['dt_category_id']))))
1013
-
1014
- def get_clsmatch_arr(self, iou=0.5):
1015
- """ 返回不小于iou下,框匹配的gt、dt对应的类别编号矩阵arr1, arr2 """
1016
- df = self.match_anns
1017
- df = df[df['iou'] >= iou]
1018
- return list(df['gt_category_id']), list(df['dt_category_id'])
1019
-
1020
- def f1_score(self, average='weighted', iou=sys.float_info.epsilon):
1021
- """ coco本来是同时做检测、分类的,所以有coco自己的评价指标
1022
-
1023
- 一般单独做多分类任务是用F1分值
1024
- 这里尝试将结果dt框和原始的gt框做匹配,然后强行算出一个f1值
1025
-
1026
- :param average:
1027
- weighted:每一类都算出f1,然后加权平均
1028
- macro:每一类都算出f1,然后求平均值(因为合同样本不均衡问题,而且有的类甚至只出现1次,结果会大大偏)
1029
- micro:按二分类形式直接计算全样本的f1 (就相当于直接 正确数/总数)
1030
- :param iou: 不小于iou的匹配框才计算分类准确率
1031
- :return:
1032
- """
1033
- from sklearn.metrics import f1_score
1034
- gt_arr, dt_arr = self.get_clsmatch_arr(iou)
1035
- if gt_arr:
1036
- return round(f1_score(gt_arr, dt_arr, average=average), 4)
1037
- else:
1038
- return -1
1039
-
1040
- def multi_iou_f1_df(self, step=0.1):
1041
- """ 计算多种iou下匹配的框数量,和分类质量 """
1042
- records = []
1043
- columns = ['iou', 'n_boxmatch', 'n_clsmatch',
1044
- 'f1_weighted', 'f1_macro', 'f1_micro']
1045
- i = 0
1046
- while i <= 1:
1047
- r = [i, self.n_match_box(i), self.n_matchcat_box(i)]
1048
- if r[1]:
1049
- r.append(self.f1_score('weighted', i))
1050
- r.append(self.f1_score('macro', i))
1051
- r.append(self.f1_score('micro', i))
1052
- records.append(r)
1053
- else:
1054
- records.append(r + [0, 0, 0])
1055
- break
1056
- i += step
1057
-
1058
- df = pd.DataFrame.from_records(records, columns=columns)
1059
- return df
1060
-
1061
-
1062
- class CocoMatch(CocoParser, CocoMatchBase):
1063
- def __init__(self, gt, dt=None, *, min_score=0, eval_im=True, print_mode=False):
1064
- """ coco格式相关分析工具,dt不输入也行,当做没有任何识别结果处理~~
1065
-
1066
- :param min_score: 滤除dt中score小余min_score的框
1067
- :param eval_im: 是否对每张图片计算coco分数
1068
- """
1069
- # 因为这里 CocoEval、_CocoMatchBase 都没有父级,不会出现初始化顺序混乱问题
1070
- # 所以我直接指定类初始化顺序了,没用super
1071
- CocoParser.__init__(self, gt, dt, min_score=min_score)
1072
- match_anns = self._get_match_anns_df(print_mode=print_mode)
1073
- CocoMatchBase.__init__(self, match_anns)
1074
- self.images = self._get_match_images_df(eval_im=eval_im, print_mode=print_mode)
1075
- self.categories = self._get_match_categories_df()
1076
-
1077
- def _get_match_anns_df(self, *, print_mode=False):
1078
- """ 将结果的dt框跟gt的框做匹配,注意iou非常低的情况也会匹配上
1079
-
1080
- TODO 有些框虽然没匹配到,但并不是没有iou,只是被其他iou更高的框抢掉了而已,可以考虑新增一个实际最大iou值列
1081
- TODO 这里有个隐患,我找不到的框是用-1的类id来标记。但如果coco数据里恰好有个-1标记的类,就暴雷了~~
1082
- TODO 210512周三11:27,目前新增扩展了label,这个是采用白名单机制加的,后续是可以考虑用黑名单机制来设定
1083
- """
1084
- from tqdm import tqdm
1085
-
1086
- # 1 读取数据
1087
- gt_df, dt_df = self.gt_anns.groupby('image_id'), self.dt_anns.groupby('image_id')
1088
-
1089
- # 2 初始化
1090
- records = []
1091
- gt_columns = ['gt_box_id', 'gt_category_id', 'gt_ltrb', 'gt_area']
1092
- ext = set(self.gt_anns.keys()) - set(gt_columns + ['image_id'])
1093
- gt_columns += list(ext)
1094
- gt_default = [-1, -1, '', 0] + [None] * len(ext) # 没有配对项时填充的默认值
1095
- if 'label' in self.gt_anns.columns:
1096
- gt_columns.append('label')
1097
- gt_default.append('')
1098
-
1099
- dt_columns = ['dt_category_id', 'dt_ltrb', 'dt_score', 'dt_segmentation']
1100
- ext = set(self.dt_anns.keys()) - set(dt_columns + ['image_id', 'iscrowd', 'area', 'id'])
1101
- dt_columns += list(ext)
1102
- dt_default = [-1, '', 0] + [None] * len(ext)
1103
-
1104
- columns = ['image_id'] + gt_columns + ['iou'] + dt_columns
1105
-
1106
- def gt_msg(x=None):
1107
- if x is None:
1108
- return [image_id] + gt_default
1109
- else:
1110
- return [image_id] + [x[k] for k in gt_columns]
1111
-
1112
- def dt_msg(y=None, iou_score=0):
1113
- if y is None:
1114
- return [0] + dt_default
1115
- else:
1116
- return [round(iou_score, 4)] + [y[k] for k in dt_columns]
1117
-
1118
- # 3 遍历匹配
1119
- for image_id, image in tqdm(self.images.iterrows(),
1120
- f'_get_match_anns_df, groups={len(self.images)}', disable=not print_mode):
1121
- # 3.1 计算匹配项
1122
- # gt和dt关于某张图都有可能没有框
1123
- # 比如合同检测,有的图可能什么类别对象都没有,gt中这张图本来就没有box;dt检测也是,某张图不一定有结果
1124
- gt_group_df = gt_df.get_group(image_id) if image_id in gt_df.groups else []
1125
- dt_group_df = dt_df.get_group(image_id) if image_id in dt_df.groups else []
1126
- n, m = len(gt_group_df), len(dt_group_df)
1127
-
1128
- pairs = []
1129
- if n and m:
1130
- # 任意多边形相交面积算法速度太慢
1131
- # gt_bboxes = [ShapelyPolygon.gen(b) for b in gt_group_df['gt_ltrb']] # noqa 已经用if做了判断过滤
1132
- # dt_bboxes = [ShapelyPolygon.gen(b) for b in dt_group_df['dt_ltrb']] # noqa
1133
- # pairs = matchpairs(gt_bboxes, dt_bboxes, ComputeIou.polygon2, index=True)
1134
-
1135
- # 改成ltrb的相交面积算法会快一点
1136
- # gt_bboxes = [ShapelyPolygon.gen(b) for b in gt_group_df['gt_ltrb']] # noqa 已经用if做了判断过滤
1137
- # dt_bboxes = [ShapelyPolygon.gen(b) for b in dt_group_df['dt_ltrb']] # noqa
1138
- pairs = matchpairs(gt_group_df['gt_ltrb'].to_list(), dt_group_df['dt_ltrb'].to_list(),
1139
- ComputeIou.ltrb, index=True)
1140
-
1141
- # 3.2 按gt顺序存入每条信息
1142
- dt_ids = set(range(m))
1143
- match_ids = {p[0]: (p[1], p[2]) for p in pairs}
1144
- for i in range(n):
1145
- x = gt_group_df.iloc[i]
1146
- if i in match_ids:
1147
- # 3.2.1 gt与dt匹配的box
1148
- j, iou_score = match_ids[i]
1149
- dt_ids.remove(j)
1150
- records.append(gt_msg(x) + dt_msg(dt_group_df.iloc[j], iou_score))
1151
- else:
1152
- # 3.2.2 有gt没有对应dt的box
1153
- records.append(gt_msg(x) + dt_msg())
1154
-
1155
- # 3.2.3 还有剩余未匹配到的dt也要记录
1156
- for j in dt_ids:
1157
- records.append(gt_msg() + dt_msg(dt_group_df.iloc[j]))
1158
-
1159
- # 4 保存结果
1160
- return pd.DataFrame.from_records(records, columns=columns)
1161
-
1162
- def _get_match_images_df(self, *, eval_im=True, print_mode=False):
1163
- """ 在原有images基础上,扩展一些图像级别的识别结果情况数据 """
1164
- # 1 初始化,新增字段
1165
- images, match_anns = self.images.copy(), self.match_anns.groupby('image_id')
1166
- columns = ['coco_score', 'n_gt_box', 'n_dt_box', 'n_match0.5_box', 'n_matchcat0.5_box', 'f1_micro0.5',
1167
- 'ic13_score']
1168
- for c in columns:
1169
- images[c] = -1.0
1170
-
1171
- # 2 填写扩展字段的值
1172
- for image_id in tqdm(images.index, '_get_match_images_df', disable=not print_mode):
1173
- # 2.1 跳过不存在的图
1174
- if image_id not in match_anns.groups:
1175
- continue
1176
- df = match_anns.get_group(image_id)
1177
-
1178
- # 2.2 增加每张图片的coco分数
1179
- if eval_im:
1180
- images.loc[image_id, 'coco_score'] = self.eval([image_id])
1181
-
1182
- # 2.3 增加每张图片的多分类分数
1183
- m = CocoMatchBase(df)
1184
- images.loc[image_id, 'n_gt_box'] = m.n_gt_box()
1185
- images.loc[image_id, 'n_dt_box'] = m.n_dt_box()
1186
- images.loc[image_id, 'n_match0.5_box'] = m.n_match_box(0.5)
1187
- images.loc[image_id, 'n_matchcat0.5_box'] = m.n_matchcat_box(0.5)
1188
- images.loc[image_id, 'f1_micro0.5'] = m.f1_score('micro', 0.5)
1189
-
1190
- # 2.4 增加每张图片的ic13分数
1191
- # df要先按category_id分组,多个ltrb值存成list
1192
- if eval_im:
1193
- gt, dt = dict(), dict()
1194
- for key, items in df.groupby('gt_category_id'):
1195
- if key != -1:
1196
- gt[key] = list(items['gt_ltrb'])
1197
- for key, items in df.groupby('dt_category_id'):
1198
- if key != -1:
1199
- dt[key] = list(items['dt_ltrb'])
1200
- images.loc[image_id, 'ic13_score'] = IcdarEval(gt, dt).icdar2013()['hmean']
1201
-
1202
- return images
1203
-
1204
- def _get_match_categories_df(self):
1205
- """ 在原有categories基础上,扩展一些每个类别上整体情况的数据
1206
-
1207
- match_support: iou为0.5时实际匹配上的框数量,match从"匹配"而来
1208
- 而support则是出自f1指标,称为gt提供了多少框
1209
- """
1210
- from sklearn.metrics import classification_report
1211
-
1212
- # 1 初始化,新增字段
1213
- categories, match_anns = self.categories.copy(), self.match_anns
1214
- columns = ['n_gt_box', 'n_dt_box', 'match_support', 'f1_score', 'precision', 'recall']
1215
- for c in columns:
1216
- categories[c] = -1.0
1217
- categories.loc[-1] = ['', 'non_match'] + [-1] * 6 # noqa
1218
-
1219
- # 2 填写扩展字段的值
1220
- for k, v in Counter(match_anns['gt_category_id']).items():
1221
- categories.loc[k, 'n_gt_box'] = v
1222
-
1223
- for k, v in Counter(match_anns['dt_category_id']).items():
1224
- categories.loc[k, 'n_dt_box'] = v
1225
-
1226
- # 要调换一下-1这个类的情况,这样才是对应的没找到的gt,和多余的dt
1227
- categories.loc[-1, 'n_gt_box'], categories.loc[-1, 'n_dt_box'] = \
1228
- categories.loc[-1, 'n_dt_box'], categories.loc[-1, 'n_gt_box']
1229
-
1230
- gt_arr, dt_arr = self.get_clsmatch_arr(0.5)
1231
- if gt_arr:
1232
- d = classification_report(gt_arr, dt_arr, output_dict=True)
1233
- for k, v in d.items():
1234
- if k not in ('accuracy', 'macro avg', 'weighted avg'):
1235
- k = int(k)
1236
- categories.loc[k, 'match_support'] = v['support']
1237
- categories.loc[k, 'f1_score'] = round(v['f1-score'], 4)
1238
- categories.loc[k, 'precision'] = round(v['precision'], 4)
1239
- categories.loc[k, 'recall'] = round(v['recall'], 4)
1240
-
1241
- return categories
1242
-
1243
- def eval_all(self, multi_iou_step=0.1):
1244
- """ 把目前支持的所有coco格式的测评全部跑一遍
1245
- """
1246
- xllog = get_xllog()
1247
- xllog.info('1 coco官方评测指标(综合性指标)')
1248
- self.eval(print_mode=True)
1249
-
1250
- xllog.info('2 icdar官方三种评测方法')
1251
- ie = IcdarEval(*self.to_icdareval_data())
1252
- print('icdar2013 ', ie.icdar2013())
1253
- print('deteval ', ie.deteval())
1254
- if sys.platform != 'win32': # 这个功能好像在windows运行不了
1255
- print('iou ', ie.iou())
1256
- ie = IcdarEval(*self.to_icdareval_data(min_score=0.5))
1257
- print('如果滤除dt中score<0.5的低置信度框:')
1258
- print('icdar2013 ', ie.icdar2013())
1259
- print('deteval ', ie.deteval())
1260
- if sys.platform != 'win32':
1261
- print('iou ', ie.iou())
1262
- sys.stdout.flush()
1263
-
1264
- xllog.info('3 框匹配情况,多分类F1值')
1265
- # TODO 这个结果补充画个图表?
1266
- print(f'gt共有{self.n_gt_box()},dt共有{self.n_dt_box()}')
1267
- print(self.multi_iou_f1_df(multi_iou_step))
1268
-
1269
- xllog.info('4 dt按不同score过滤后效果')
1270
- with pd.option_context('display.max_colwidth', -1, 'display.max_columns', 20,
1271
- 'display.width', 200): # 上下文控制格式
1272
- print(self.parse_dt_score())
1273
-
1274
- def to_excel(self, savepath, *, segmentation=False):
1275
- write_dataframes_to_excel(savepath,
1276
- {'images': self.images,
1277
- 'categories': self.categories,
1278
- 'match_anns': self.match_anns})
1279
-
1280
- def _to_labelme_match(self, match_func_name, imdir, dst_dir=None, *, segmentation=False, hide_match_dt=False,
1281
- **kwargs):
1282
- """ 可视化目标检测效果
1283
-
1284
- :param imdir: 默认会把结果存储到imdir
1285
- :param dst_dir: 但如果写了dst_dir参数,则会有选择地从imdir筛选出图片到dst_dir
1286
- """
1287
-
1288
- def func(g):
1289
- # 1 获得图片id和文件
1290
- image_id, df = g
1291
- imfile = XlPath(imdir) / df.iloc[0]['file_name']
1292
- if not imfile:
1293
- return # 如果没有图片不处理
1294
- image = self.images.loc[image_id]
1295
- image = image.drop(['file_name', 'height', 'width'])
1296
-
1297
- # 2 生成这张图片对应的json标注
1298
- if dst_dir and dst_dir.exists():
1299
- imfile = imfile.copy(dst_dir, if_exists='skip')
1300
- lm = Coco2Labelme(imfile)
1301
-
1302
- height, width = lm.data['imageHeight'], lm.data['imageWidth']
1303
- # 注意df取出来的image_id默认是int64类型,要转成int,否则json会保存不了int64类型
1304
- lm.add_shape('', [0, 0, 10, 0], shape_type='line', shape_color=[0, 0, 0],
1305
- size=f'{height}x{width}', **(image.to_dict()))
1306
- getattr(lm, match_func_name)(df, segmentation=segmentation, hide_match_dt=hide_match_dt, **kwargs)
1307
- lm.write(if_exists=None) # 保存json文件到img对应目录下
1308
-
1309
- if dst_dir is not None:
1310
- os.makedirs(dst_dir, exist_ok=True)
1311
- match_anns = self.match_anns.copy()
1312
- # 为了方便labelme操作,需要扩展几列内容
1313
- match_anns['file_name'] = [self.images.loc[x, 'file_name'] for x in match_anns['image_id']]
1314
- match_anns['gt_category_name'] = [self.categories.loc[x, 'name'] for x in match_anns['gt_category_id']]
1315
- match_anns['dt_category_name'] = [self.categories.loc[x, 'name'] for x in match_anns['dt_category_id']]
1316
- match_anns['gt_supercategory'] = [self.categories.loc[x, 'supercategory'] for x in match_anns['gt_category_id']]
1317
- mtqdm(func, list(iter(match_anns.groupby('image_id'))), max_workers=8, desc='make labelme json:')
1318
-
1319
- def to_labelme_match(self, imdir, dst_dir=None, *, segmentation=False, hide_match_dt=False):
1320
- self._to_labelme_match('anns_match', imdir, dst_dir, segmentation=segmentation, hide_match_dt=hide_match_dt)
1321
-
1322
- def to_labelme_match2(self, imdir, dst_dir=None, *, segmentation=False, hide_match_dt=False,
1323
- colormap=LABEL_COLORMAP7):
1324
- self._to_labelme_match('anns_match2', imdir, dst_dir, segmentation=segmentation, hide_match_dt=hide_match_dt,
1325
- colormap=colormap)