pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. pyxllib/algo/geo.py +12 -0
  2. pyxllib/algo/intervals.py +1 -1
  3. pyxllib/algo/matcher.py +78 -0
  4. pyxllib/algo/pupil.py +187 -19
  5. pyxllib/algo/specialist.py +2 -1
  6. pyxllib/algo/stat.py +38 -2
  7. {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
  8. pyxllib/autogui/activewin.py +246 -0
  9. pyxllib/autogui/all.py +9 -0
  10. pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
  11. pyxllib/autogui/uiautolib.py +362 -0
  12. pyxllib/autogui/wechat.py +827 -0
  13. pyxllib/autogui/wechat_msg.py +421 -0
  14. pyxllib/autogui/wxautolib.py +84 -0
  15. pyxllib/cv/slidercaptcha.py +137 -0
  16. pyxllib/data/echarts.py +123 -12
  17. pyxllib/data/jsonlib.py +89 -0
  18. pyxllib/data/pglib.py +514 -30
  19. pyxllib/data/sqlite.py +231 -4
  20. pyxllib/ext/JLineViewer.py +14 -1
  21. pyxllib/ext/drissionlib.py +277 -0
  22. pyxllib/ext/kq5034lib.py +0 -1594
  23. pyxllib/ext/robustprocfile.py +497 -0
  24. pyxllib/ext/unixlib.py +6 -5
  25. pyxllib/ext/utools.py +108 -95
  26. pyxllib/ext/webhook.py +32 -14
  27. pyxllib/ext/wjxlib.py +88 -0
  28. pyxllib/ext/wpsapi.py +124 -0
  29. pyxllib/ext/xlwork.py +9 -0
  30. pyxllib/ext/yuquelib.py +1003 -71
  31. pyxllib/file/docxlib.py +1 -1
  32. pyxllib/file/libreoffice.py +165 -0
  33. pyxllib/file/movielib.py +9 -0
  34. pyxllib/file/packlib/__init__.py +112 -75
  35. pyxllib/file/pdflib.py +1 -1
  36. pyxllib/file/pupil.py +1 -1
  37. pyxllib/file/specialist/dirlib.py +1 -1
  38. pyxllib/file/specialist/download.py +10 -3
  39. pyxllib/file/specialist/filelib.py +266 -55
  40. pyxllib/file/xlsxlib.py +205 -50
  41. pyxllib/file/xlsyncfile.py +341 -0
  42. pyxllib/prog/cachetools.py +64 -0
  43. pyxllib/prog/filelock.py +42 -0
  44. pyxllib/prog/multiprogs.py +940 -0
  45. pyxllib/prog/newbie.py +9 -2
  46. pyxllib/prog/pupil.py +129 -60
  47. pyxllib/prog/specialist/__init__.py +176 -2
  48. pyxllib/prog/specialist/bc.py +5 -2
  49. pyxllib/prog/specialist/browser.py +11 -2
  50. pyxllib/prog/specialist/datetime.py +68 -0
  51. pyxllib/prog/specialist/tictoc.py +12 -13
  52. pyxllib/prog/specialist/xllog.py +5 -5
  53. pyxllib/prog/xlosenv.py +7 -0
  54. pyxllib/text/airscript.js +744 -0
  55. pyxllib/text/charclasslib.py +17 -5
  56. pyxllib/text/jiebalib.py +6 -3
  57. pyxllib/text/jinjalib.py +32 -0
  58. pyxllib/text/jsa_ai_prompt.md +271 -0
  59. pyxllib/text/jscode.py +159 -4
  60. pyxllib/text/nestenv.py +1 -1
  61. pyxllib/text/newbie.py +12 -0
  62. pyxllib/text/pupil/common.py +26 -0
  63. pyxllib/text/specialist/ptag.py +2 -2
  64. pyxllib/text/templates/echart_base.html +11 -0
  65. pyxllib/text/templates/highlight_code.html +17 -0
  66. pyxllib/text/templates/latex_editor.html +103 -0
  67. pyxllib/text/xmllib.py +76 -14
  68. pyxllib/xl.py +2 -1
  69. pyxllib-0.3.197.dist-info/METADATA +48 -0
  70. pyxllib-0.3.197.dist-info/RECORD +126 -0
  71. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
  72. pyxllib/ext/autogui/__init__.py +0 -8
  73. pyxllib-0.3.96.dist-info/METADATA +0 -51
  74. pyxllib-0.3.96.dist-info/RECORD +0 -333
  75. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  76. pyxlpr/ai/__init__.py +0 -5
  77. pyxlpr/ai/clientlib.py +0 -1281
  78. pyxlpr/ai/specialist.py +0 -286
  79. pyxlpr/ai/torch_app.py +0 -172
  80. pyxlpr/ai/xlpaddle.py +0 -655
  81. pyxlpr/ai/xltorch.py +0 -705
  82. pyxlpr/data/__init__.py +0 -11
  83. pyxlpr/data/coco.py +0 -1325
  84. pyxlpr/data/datacls.py +0 -365
  85. pyxlpr/data/datasets.py +0 -200
  86. pyxlpr/data/gptlib.py +0 -1291
  87. pyxlpr/data/icdar/__init__.py +0 -96
  88. pyxlpr/data/icdar/deteval.py +0 -377
  89. pyxlpr/data/icdar/icdar2013.py +0 -341
  90. pyxlpr/data/icdar/iou.py +0 -340
  91. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  92. pyxlpr/data/imtextline.py +0 -473
  93. pyxlpr/data/labelme.py +0 -866
  94. pyxlpr/data/removeline.py +0 -179
  95. pyxlpr/data/specialist.py +0 -57
  96. pyxlpr/eval/__init__.py +0 -85
  97. pyxlpr/paddleocr.py +0 -776
  98. pyxlpr/ppocr/__init__.py +0 -15
  99. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  100. pyxlpr/ppocr/data/__init__.py +0 -135
  101. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  102. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  103. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  104. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  105. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  106. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  107. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  108. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  109. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  110. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  111. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  112. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  113. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  114. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  115. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  116. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  117. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  118. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  119. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  120. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  121. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  122. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  123. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  124. pyxlpr/ppocr/losses/__init__.py +0 -61
  125. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  126. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  127. pyxlpr/ppocr/losses/center_loss.py +0 -88
  128. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  129. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  130. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  131. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  132. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  133. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  134. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  135. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  136. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  137. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  138. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  139. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  140. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  141. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  142. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  143. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  144. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  145. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  146. pyxlpr/ppocr/metrics/__init__.py +0 -44
  147. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  148. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  149. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  150. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  151. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  152. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  153. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  154. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  155. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  156. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  157. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  158. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  159. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  160. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  161. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  162. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  163. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  164. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  165. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  166. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  167. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  168. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  169. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  170. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  171. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  172. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  173. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  174. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  175. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  176. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  177. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  178. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  179. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  180. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  181. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  182. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  183. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  184. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  185. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  186. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  187. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  188. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  189. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  190. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  191. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  192. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  193. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  194. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  195. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  196. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  197. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  198. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  199. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  200. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  201. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  202. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  203. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  204. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  205. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  206. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  207. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  208. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  209. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  210. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  211. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  212. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  213. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  214. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  215. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  216. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  217. pyxlpr/ppocr/tools/__init__.py +0 -14
  218. pyxlpr/ppocr/tools/eval.py +0 -83
  219. pyxlpr/ppocr/tools/export_center.py +0 -77
  220. pyxlpr/ppocr/tools/export_model.py +0 -129
  221. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  222. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  223. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  224. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  225. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  226. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  227. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  228. pyxlpr/ppocr/tools/infer_det.py +0 -134
  229. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  230. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  231. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  232. pyxlpr/ppocr/tools/infer_table.py +0 -107
  233. pyxlpr/ppocr/tools/program.py +0 -596
  234. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  235. pyxlpr/ppocr/tools/train.py +0 -163
  236. pyxlpr/ppocr/tools/xlprog.py +0 -748
  237. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  238. pyxlpr/ppocr/utils/__init__.py +0 -24
  239. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  240. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  241. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  242. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  243. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  244. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  245. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  246. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  247. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  248. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  249. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  250. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  251. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  252. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  253. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  254. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  255. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  256. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  257. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  258. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  259. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  260. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  261. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  262. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  263. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  264. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  265. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  266. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  267. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  268. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  269. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  270. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  271. pyxlpr/ppocr/utils/dict90.txt +0 -90
  272. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  273. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  274. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  275. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  276. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  277. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  278. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  279. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  280. pyxlpr/ppocr/utils/gen_label.py +0 -81
  281. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  282. pyxlpr/ppocr/utils/iou.py +0 -54
  283. pyxlpr/ppocr/utils/logging.py +0 -69
  284. pyxlpr/ppocr/utils/network.py +0 -84
  285. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  286. pyxlpr/ppocr/utils/profiler.py +0 -110
  287. pyxlpr/ppocr/utils/save_load.py +0 -150
  288. pyxlpr/ppocr/utils/stats.py +0 -72
  289. pyxlpr/ppocr/utils/utility.py +0 -80
  290. pyxlpr/ppstructure/__init__.py +0 -13
  291. pyxlpr/ppstructure/predict_system.py +0 -187
  292. pyxlpr/ppstructure/table/__init__.py +0 -13
  293. pyxlpr/ppstructure/table/eval_table.py +0 -72
  294. pyxlpr/ppstructure/table/matcher.py +0 -192
  295. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  296. pyxlpr/ppstructure/table/predict_table.py +0 -221
  297. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  298. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  299. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  300. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  301. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  302. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  303. pyxlpr/ppstructure/utility.py +0 -71
  304. pyxlpr/xlai.py +0 -10
  305. /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
  306. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxlpr/data/removeline.py DELETED
@@ -1,179 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2020/09/15 21:00
6
-
7
-
8
- from pyxllib.xlcv import *
9
-
10
-
11
- class RemoveLine:
12
- def __init__(self, filename):
13
- self.bgr_img = xlcv.read(filename)
14
- self.gray_img = cv2.cvtColor(self.bgr_img, cv2.COLOR_BGR2GRAY)
15
- self.binary_img = cv2.adaptiveThreshold(self.gray_img, 255, 0, 1, 11, 3)
16
-
17
- def debug(self):
18
- """ 同run,只是debug模式,会输出中间结果
19
- """
20
- bgr_img = self.bgr_img
21
- xlcv.show(bgr_img, '0 src')
22
- dprint(bgr_img.shape)
23
-
24
- lines = self.detect_lines()
25
- xlcv.show(xlcv.lines(bgr_img, lines, [0, 0, 255]), '1 hough')
26
- lines = self.refine_lines(lines)
27
- xlcv.show(xlcv.lines(bgr_img, lines, [0, 0, 255]), '2 expand')
28
- dprint(lines.shape)
29
-
30
- dst = self.remove_lines(lines)
31
- xlcv.show(dst, 'result')
32
-
33
- def run(self):
34
- """
35
- :return: 获得去掉线后的图片
36
- """
37
- lines = self.detect_lines()
38
- if lines.any():
39
- lines = self.refine_lines2(lines)
40
- return self.remove_lines(lines)
41
- else:
42
- return self.bgr_img
43
-
44
- def detect_lines(self, rho=1, theta=np.pi / 180, threshold=80, min_line_length=50, max_line_gap=30):
45
- lines = cv2.HoughLinesP(self.binary_img, rho, theta, threshold, min_line_length, max_line_gap)
46
- # 不知道为什么返回值第2维会多一个1,把它删了~
47
- lines = np.array([]) if lines is None else lines.reshape(-1, 4)
48
- return lines
49
-
50
- def refine_lines(self, lines):
51
- im = self.binary_img
52
- n, m = im.shape
53
-
54
- def f(v):
55
- """ 辅助函数:四舍五入取整 """
56
- return int(round(v))
57
-
58
- def expand(x, y, dx, dy):
59
- """ 从(x,y)开始,按dx,dy逐步遍历,直到遇到im二值图为0的位置 """
60
- while True:
61
- j, i = f(x + dx), f(y + dy)
62
- if 0 <= j < m and 0 <= i < n and im[i][j]:
63
- x, y = x + dx, y + dy
64
- else:
65
- return f(x), f(y)
66
-
67
- new_lines = []
68
- for line in lines:
69
- # 1 确保x1是左边,x2是右边
70
- x1, y1, x2, y2 = line
71
- if x2 < x1:
72
- x1, y1, x2, y2 = x2, y2, x1, y1
73
- # 2 计算斜率,分两类处理
74
- if abs(x1 - x2) > abs(y1 - y2):
75
- dx, dy = 1, (y1 - y2) / (x1 - x2)
76
- else:
77
- dx, dy = (x1 - x2) / (y1 - y2), 1
78
- # 3 向左右各自延展
79
- x1, y1 = expand(x1, y1, -dx, -dy)
80
- x2, y2 = expand(x1, y1, dx, dy)
81
- new_lines.append((x1, y1, x2, y2))
82
- return np.array(new_lines)
83
-
84
- def refine_lines2(self, lines):
85
- """ 标准的延展会有些问题,会误删很多斜的笔划
86
-
87
- 这个版本的删除,会只删除比较水平的情况,宁愿少删,但不误删
88
- """
89
- im = self.binary_img
90
- n, m = im.shape
91
-
92
- def f(v):
93
- """ 辅助函数:四舍五入取整 """
94
- return int(round(v))
95
-
96
- def expand(x, y, dx, dy):
97
- """ 从(x,y)开始,按dx,dy逐步遍历,直到遇到im二值图为0的位置 """
98
- while True:
99
- j, i = f(x + dx), f(y + dy)
100
- if 0 <= j < m and 0 <= i < n and im[i][j]:
101
- x, y = x + dx, y + dy
102
- else:
103
- return f(x), f(y)
104
-
105
- new_lines = []
106
- for line in lines:
107
- # 1 确保x1是左边,x2是右边
108
- x1, y1, x2, y2 = line
109
- if x2 < x1:
110
- x1, y1, x2, y2 = x2, y2, x1, y1
111
- # 2 计算斜率,分两类处理
112
- dx, dy = 1, (y1 - y2) / (x1 - x2 + sys.float_info.epsilon)
113
- if abs(dy) > math.tan(math.pi / 180 * 10):
114
- # 倾斜度必须在10度以内(偏水平的线)
115
- continue
116
- # 3 向左右各自延展
117
- x1, y1 = expand(x1, y1, -dx, -dy)
118
- x2, y2 = expand(x1, y1, dx, dy)
119
- new_lines.append((x1, y1, x2, y2))
120
- return np.array(new_lines)
121
-
122
- def remove_lines(self, lines, box_size=(7, 3), num=5):
123
- """
124
- :param lines:
125
- :param box_size: 所有尺寸,默认是7*3的矩阵
126
- 7*3矩阵中,如果有超出4个前景点,则恢复该背景点为原始像素值
127
- :param num: 矩阵至少需要的点数量
128
- :return:
129
- """
130
- # 1 计算填充用的默认背景色
131
- src = self.bgr_img
132
- if not lines.any(): return src
133
- bg_color = xlcv.bg_color(src, binary_img=self.binary_img)
134
- # dprint(bg_color)
135
-
136
- # 2 删笔画的mask矩阵
137
- mask = xlcv.lines(np.zeros(src.shape[:2]), lines, [255], 2)
138
- tmp = xlcv.lines(src, lines, bg_color, 2) # 在原图画上背景颜色的线,产生抹除效果
139
- dst = np.array(tmp)
140
- tmp = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
141
-
142
- # 3 恢复断开的笔划
143
- top, left = box_size[0] // 2, box_size[1] // 2
144
- bottom, right = box_size[0] - top, box_size[1] - left
145
- for i in range(src.shape[0]):
146
- for j in range(src.shape[1]):
147
- if not mask[i, j]: continue
148
- arr = tmp[max(i - top, 0):i + bottom, max(j - left, 0):j + right].reshape(-1)
149
- if sum(arr < 128) >= num:
150
- dst[i, j] = src[i, j]
151
- # xlcv.show(tmp)
152
- return dst
153
-
154
-
155
- def test_removeline(path):
156
- """ 批量处理,检查去线功能效果 """
157
-
158
- def func(p1, p2):
159
- dst = RemoveLine(str(p1)).run()
160
- cv2.imwrite(str(p2), dst)
161
-
162
- d1, d2 = Dir(path), Dir(path + '+')
163
- d2.ensure_dir()
164
- d1_state = d1.select('*.png')
165
- d1_state.subs = d1_state.subs
166
- d1_state.procpaths(func, ref_dir=d2, pinterval=1000)
167
-
168
-
169
- if __name__ == '__main__':
170
- TicToc.process_time(f'{dformat()}启动准备共用时')
171
- tictoc = TicToc(__file__)
172
- os.chdir(r'D:\RealEstate2020')
173
-
174
- # RemoveLine(r'5_label+\agreement_label1_text2\handwriting\C100001245519-003.png').debug()
175
- test_removeline(r'5_label+\agreement_label1_text2\handwriting')
176
- test_removeline(r'5_label+\agreement_label1_text2\printed') # 65680张图,704秒
177
-
178
- # cv2.waitKey(0)
179
- tictoc.toc()
pyxlpr/data/specialist.py DELETED
@@ -1,57 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2021/06/06 11:16
6
-
7
-
8
- import numpy as np
9
-
10
- from pyxllib.file.pupil import struct_unpack
11
- from pyxllib.file.specialist import XlBytesIO
12
-
13
-
14
- def read_from_dgrl(dgrl):
15
- """ 解析中科院的DGRL格式数据
16
-
17
- Database Home: http://www.nlpr.ia.ac.cn/databases/handwriting/Home.html
18
- CASIA 在线和离线中文手写数据库的一些数据读取功能
19
-
20
- 参考代码:https://blog.csdn.net/DaGongJiGuoMaLu09/article/details/107050519
21
- 有做了大量简化、工程封装
22
-
23
- TODO 可以考虑做一个返回类似labelme格式的接口,会更通用
24
- 因为有时候会需要取整张原图
25
- 而且如果有整个原图,那么每个文本行用shape形状标记即可,不需要取出子图
26
-
27
- :param dgrl: dgrl 格式的文件,或者对应的二进制数据流
28
- :return: [(img0, label0), (img1, label1), ...]
29
- """
30
- # 输入参数可以是bytes,也可以是文件
31
- f = XlBytesIO(dgrl)
32
- # 表头尺寸
33
- header_size = f.unpack('I')
34
- # 表头剩下内容,提取 code_length
35
- header = f.read(header_size - 4)
36
- code_length = struct_unpack(header[-4:-2], 'H') # 每个字符存储的字节数,一般都是用gbk编码,2个字节
37
- # 读取图像尺寸信息,文本行数量
38
- height, width, line_num = f.unpack('I' * 3)
39
-
40
- # 读取每一行的信息
41
- res = []
42
- for k in range(line_num):
43
- # 读取该行的字符数量
44
- char_num = f.unpack('I')
45
- label = f.readtext(char_num, code_length=code_length)
46
- label = label.replace('\x00', '') # 去掉不可见字符 \x00,这一步不加的话后面保存的内容会出现看不见的问题
47
-
48
- # 读取该行的位置和尺寸
49
- y, x, h, w = f.unpack('I' * 4)
50
-
51
- # 读取该行的图片
52
- bitmap = f.unpack('B' * (h * w))
53
- bitmap = np.array(bitmap).reshape(h, w)
54
-
55
- res.append((bitmap, label))
56
-
57
- return res
pyxlpr/eval/__init__.py DELETED
@@ -1,85 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2021/03/03 09:35
6
-
7
- """ 主要含有功能
8
-
9
- 结果分数测评
10
- 结果调试、可视化分析
11
- """
12
-
13
- import pandas as pd
14
-
15
- from pyxlpr.data.coco import *
16
-
17
-
18
- class KieResParser:
19
- """ Key Information Extraction Result Parser
20
-
21
- 关键信息提取结果的分析工具
22
- """
23
-
24
- def __init__(self, df, keys):
25
- """
26
- :param df: 必须要有一列file文件标识,用来进行分组判断,每一组的识别效果
27
- :param keys: 信息种类,有哪些键
28
- for k in keys,都有对应名称的 dt_{k}, gt_{k} 属性列,测试集场合,可以没有 gt 列
29
- """
30
- assert 'file' in df.columns
31
- self.df = df
32
- self.keys = keys
33
- self.add_diff_columns()
34
-
35
- def add_diff_columns(self):
36
- """ 使用该功能,必须要有对应的gt列 """
37
- for k in self.keys:
38
- diff_col = f'diff_{k}'
39
- if diff_col not in self.df.columns:
40
- dt_col, gt_col = f'dt_{k}', f'gt_{k}'
41
- self.df[diff_col] = [StrDiffType.difftype(row[dt_col], row[gt_col]) for idx, row in self.df.iterrows()]
42
-
43
- def check_file(self):
44
- """ 按每份文档为单位查看效果
45
-
46
- TODO
47
- 1、支持查看单文件
48
- 2、支持筛选key,只对比查看部分key
49
- 3、支持显示key名称
50
- """
51
- pc = PairContent(Dir.TEMP / 'dt.txt', Dir.TEMP / 'gt.txt')
52
- for i, (idx, item) in enumerate(self.df.iterrows(), start=1):
53
- pc.add(f'{i}、{item.file}')
54
- for k in self.keys:
55
- pc.add(item[f'dt_{k}'], item[f'gt_{k}'])
56
- pc.add('')
57
- pc.bcompare(wait=False)
58
-
59
- def stat_difftype(self):
60
- """ 分析dt各种情况下检测效果、质量 """
61
- items = []
62
- index = []
63
- for i in StrDiffType.typename.keys():
64
- vals = []
65
- for k in self.keys:
66
- vals.append(sum(self.df[f'diff_{k}'] == i))
67
- if sum(vals):
68
- items.append(vals)
69
- index.append(i)
70
- df = pd.DataFrame.from_records(items, columns=self.keys, index=index)
71
- return df
72
-
73
- def check_key(self, key):
74
- """ 检查某个特定key的识别效果 """
75
- n = len(self.df) # 总条目数
76
- pc = PairContent(Dir.TEMP / f'dt-{key}.txt', Dir.TEMP / f'gt-{key}.txt')
77
- dt_col, gt_col, diff_col = f'dt_{key}', f'gt_{key}', f'diff_{key}'
78
- for difftype, items in self.df.groupby(diff_col):
79
- m = len(items)
80
- pc.add(f'{difftype}、{StrDiffType.typename[difftype]}({m}/{n}≈{m / n:.2%})')
81
- for _, x in items.iterrows():
82
- f = x['file'] + ', '
83
- pc.add(f + x[dt_col], f + x[gt_col])
84
- pc.add('')
85
- pc.bcompare(wait=False)