pyxllib 0.3.96__py3-none-any.whl → 0.3.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. pyxllib/__init__.py +21 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +541 -529
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -311
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -461
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -240
  11. pyxllib/algo/stat.py +494 -458
  12. pyxllib/algo/treelib.py +149 -149
  13. pyxllib/algo/unitlib.py +66 -66
  14. {pyxlpr → pyxllib/autogui}/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -0
  16. pyxllib/autogui/all.py +9 -0
  17. pyxllib/{ext/autogui → autogui}/autogui.py +852 -823
  18. pyxllib/autogui/uiautolib.py +362 -0
  19. pyxllib/{ext/autogui → autogui}/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -0
  21. pyxllib/autogui/wechat_msg.py +421 -0
  22. pyxllib/autogui/wxautolib.py +84 -0
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -0
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +240 -129
  34. pyxllib/data/jsonlib.py +89 -0
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1127 -643
  37. pyxllib/data/sqlite.py +568 -341
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -492
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +246 -246
  42. pyxllib/ext/drissionlib.py +277 -0
  43. pyxllib/ext/kq5034lib.py +12 -1606
  44. pyxllib/ext/old.py +663 -663
  45. pyxllib/ext/qt.py +449 -449
  46. pyxllib/ext/robustprocfile.py +497 -0
  47. pyxllib/ext/seleniumlib.py +76 -76
  48. pyxllib/ext/tk.py +173 -173
  49. pyxllib/ext/unixlib.py +827 -826
  50. pyxllib/ext/utools.py +351 -338
  51. pyxllib/ext/webhook.py +124 -101
  52. pyxllib/ext/win32lib.py +40 -40
  53. pyxllib/ext/wjxlib.py +88 -0
  54. pyxllib/ext/wpsapi.py +124 -0
  55. pyxllib/ext/xlwork.py +9 -0
  56. pyxllib/ext/yuquelib.py +1105 -173
  57. pyxllib/file/__init__.py +17 -17
  58. pyxllib/file/docxlib.py +761 -761
  59. pyxllib/file/gitlib.py +309 -309
  60. pyxllib/file/libreoffice.py +165 -0
  61. pyxllib/file/movielib.py +148 -139
  62. pyxllib/file/newbie.py +10 -10
  63. pyxllib/file/onenotelib.py +1469 -1469
  64. pyxllib/file/packlib/__init__.py +330 -293
  65. pyxllib/file/packlib/zipfile.py +2441 -2441
  66. pyxllib/file/pdflib.py +426 -426
  67. pyxllib/file/pupil.py +185 -185
  68. pyxllib/file/specialist/__init__.py +685 -685
  69. pyxllib/file/specialist/dirlib.py +799 -799
  70. pyxllib/file/specialist/download.py +193 -186
  71. pyxllib/file/specialist/filelib.py +2829 -2618
  72. pyxllib/file/xlsxlib.py +3131 -2976
  73. pyxllib/file/xlsyncfile.py +341 -0
  74. pyxllib/prog/__init__.py +5 -5
  75. pyxllib/prog/cachetools.py +64 -0
  76. pyxllib/prog/deprecatedlib.py +233 -233
  77. pyxllib/prog/filelock.py +42 -0
  78. pyxllib/prog/ipyexec.py +253 -253
  79. pyxllib/prog/multiprogs.py +940 -0
  80. pyxllib/prog/newbie.py +451 -444
  81. pyxllib/prog/pupil.py +1197 -1128
  82. pyxllib/prog/sitepackages.py +33 -33
  83. pyxllib/prog/specialist/__init__.py +391 -217
  84. pyxllib/prog/specialist/bc.py +203 -200
  85. pyxllib/prog/specialist/browser.py +497 -488
  86. pyxllib/prog/specialist/common.py +347 -347
  87. pyxllib/prog/specialist/datetime.py +199 -131
  88. pyxllib/prog/specialist/tictoc.py +240 -241
  89. pyxllib/prog/specialist/xllog.py +180 -180
  90. pyxllib/prog/xlosenv.py +108 -101
  91. pyxllib/stdlib/__init__.py +17 -17
  92. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  93. pyxllib/stdlib/tablepyxl/style.py +303 -303
  94. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  95. pyxllib/text/__init__.py +8 -8
  96. pyxllib/text/ahocorasick.py +39 -39
  97. pyxllib/text/airscript.js +744 -0
  98. pyxllib/text/charclasslib.py +121 -109
  99. pyxllib/text/jiebalib.py +267 -264
  100. pyxllib/text/jinjalib.py +32 -0
  101. pyxllib/text/jsa_ai_prompt.md +271 -0
  102. pyxllib/text/jscode.py +922 -767
  103. pyxllib/text/latex/__init__.py +158 -158
  104. pyxllib/text/levenshtein.py +303 -303
  105. pyxllib/text/nestenv.py +1215 -1215
  106. pyxllib/text/newbie.py +300 -288
  107. pyxllib/text/pupil/__init__.py +8 -8
  108. pyxllib/text/pupil/common.py +1121 -1095
  109. pyxllib/text/pupil/xlalign.py +326 -326
  110. pyxllib/text/pycode.py +47 -47
  111. pyxllib/text/specialist/__init__.py +8 -8
  112. pyxllib/text/specialist/common.py +112 -112
  113. pyxllib/text/specialist/ptag.py +186 -186
  114. pyxllib/text/spellchecker.py +172 -172
  115. pyxllib/text/templates/echart_base.html +11 -0
  116. pyxllib/text/templates/highlight_code.html +17 -0
  117. pyxllib/text/templates/latex_editor.html +103 -0
  118. pyxllib/text/vbacode.py +17 -17
  119. pyxllib/text/xmllib.py +747 -685
  120. pyxllib/xl.py +42 -38
  121. pyxllib/xlcv.py +17 -17
  122. pyxllib-0.3.200.dist-info/METADATA +48 -0
  123. pyxllib-0.3.200.dist-info/RECORD +126 -0
  124. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +1 -2
  125. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info/licenses}/LICENSE +190 -190
  126. pyxllib/ext/autogui/__init__.py +0 -8
  127. pyxllib-0.3.96.dist-info/METADATA +0 -51
  128. pyxllib-0.3.96.dist-info/RECORD +0 -333
  129. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  130. pyxlpr/ai/__init__.py +0 -5
  131. pyxlpr/ai/clientlib.py +0 -1281
  132. pyxlpr/ai/specialist.py +0 -286
  133. pyxlpr/ai/torch_app.py +0 -172
  134. pyxlpr/ai/xlpaddle.py +0 -655
  135. pyxlpr/ai/xltorch.py +0 -705
  136. pyxlpr/data/__init__.py +0 -11
  137. pyxlpr/data/coco.py +0 -1325
  138. pyxlpr/data/datacls.py +0 -365
  139. pyxlpr/data/datasets.py +0 -200
  140. pyxlpr/data/gptlib.py +0 -1291
  141. pyxlpr/data/icdar/__init__.py +0 -96
  142. pyxlpr/data/icdar/deteval.py +0 -377
  143. pyxlpr/data/icdar/icdar2013.py +0 -341
  144. pyxlpr/data/icdar/iou.py +0 -340
  145. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  146. pyxlpr/data/imtextline.py +0 -473
  147. pyxlpr/data/labelme.py +0 -866
  148. pyxlpr/data/removeline.py +0 -179
  149. pyxlpr/data/specialist.py +0 -57
  150. pyxlpr/eval/__init__.py +0 -85
  151. pyxlpr/paddleocr.py +0 -776
  152. pyxlpr/ppocr/__init__.py +0 -15
  153. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  154. pyxlpr/ppocr/data/__init__.py +0 -135
  155. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  156. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  157. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  158. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  159. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  160. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  161. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  162. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  163. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  164. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  165. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  166. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  167. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  168. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  169. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  170. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  171. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  172. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  173. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  174. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  175. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  176. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  177. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  178. pyxlpr/ppocr/losses/__init__.py +0 -61
  179. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  180. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  181. pyxlpr/ppocr/losses/center_loss.py +0 -88
  182. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  183. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  184. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  185. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  186. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  187. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  188. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  189. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  190. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  191. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  192. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  193. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  194. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  195. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  196. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  197. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  198. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  199. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  200. pyxlpr/ppocr/metrics/__init__.py +0 -44
  201. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  202. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  203. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  204. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  205. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  206. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  207. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  208. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  209. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  210. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  211. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  212. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  213. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  214. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  215. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  216. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  217. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  218. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  219. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  220. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  221. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  222. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  223. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  224. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  225. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  226. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  227. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  228. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  229. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  230. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  231. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  232. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  233. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  234. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  235. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  236. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  237. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  238. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  239. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  240. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  241. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  242. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  243. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  244. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  245. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  246. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  247. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  248. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  249. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  250. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  251. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  252. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  253. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  254. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  255. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  256. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  257. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  258. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  259. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  260. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  261. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  262. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  263. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  264. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  265. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  266. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  267. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  268. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  269. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  270. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  271. pyxlpr/ppocr/tools/__init__.py +0 -14
  272. pyxlpr/ppocr/tools/eval.py +0 -83
  273. pyxlpr/ppocr/tools/export_center.py +0 -77
  274. pyxlpr/ppocr/tools/export_model.py +0 -129
  275. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  276. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  277. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  278. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  279. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  280. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  281. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  282. pyxlpr/ppocr/tools/infer_det.py +0 -134
  283. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  284. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  285. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  286. pyxlpr/ppocr/tools/infer_table.py +0 -107
  287. pyxlpr/ppocr/tools/program.py +0 -596
  288. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  289. pyxlpr/ppocr/tools/train.py +0 -163
  290. pyxlpr/ppocr/tools/xlprog.py +0 -748
  291. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  292. pyxlpr/ppocr/utils/__init__.py +0 -24
  293. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  294. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  295. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  296. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  297. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  298. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  299. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  300. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  301. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  302. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  303. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  304. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  305. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  306. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  307. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  308. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  309. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  310. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  311. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  312. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  313. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  314. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  315. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  316. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  317. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  318. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  319. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  320. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  321. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  322. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  323. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  324. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  325. pyxlpr/ppocr/utils/dict90.txt +0 -90
  326. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  327. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  328. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  329. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  330. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  331. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  332. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  333. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  334. pyxlpr/ppocr/utils/gen_label.py +0 -81
  335. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  336. pyxlpr/ppocr/utils/iou.py +0 -54
  337. pyxlpr/ppocr/utils/logging.py +0 -69
  338. pyxlpr/ppocr/utils/network.py +0 -84
  339. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  340. pyxlpr/ppocr/utils/profiler.py +0 -110
  341. pyxlpr/ppocr/utils/save_load.py +0 -150
  342. pyxlpr/ppocr/utils/stats.py +0 -72
  343. pyxlpr/ppocr/utils/utility.py +0 -80
  344. pyxlpr/ppstructure/__init__.py +0 -13
  345. pyxlpr/ppstructure/predict_system.py +0 -187
  346. pyxlpr/ppstructure/table/__init__.py +0 -13
  347. pyxlpr/ppstructure/table/eval_table.py +0 -72
  348. pyxlpr/ppstructure/table/matcher.py +0 -192
  349. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  350. pyxlpr/ppstructure/table/predict_table.py +0 -221
  351. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  352. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  353. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  354. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  355. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  356. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  357. pyxlpr/ppstructure/utility.py +0 -71
  358. pyxlpr/xlai.py +0 -10
pyxlpr/data/imtextline.py DELETED
@@ -1,473 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2020/11/17
6
-
7
- """ 图片文本行标注相关处理
8
- """
9
-
10
- from pyxllib.xlcv import *
11
-
12
- from functools import reduce
13
-
14
- from shapely.geometry import MultiPolygon
15
-
16
- from pyxllib.algo.geo import split_vector_interval
17
- from pyxllib.algo.disjoint import disjoint_set
18
- from pyxllib.algo.shapelylib import ShapelyPolygon
19
-
20
-
21
- class TextlineShape:
22
- """ 一个文本行标注对象
23
-
24
- 这里的基础功能主要是几何关系计算,可以继承类后扩展其他功能
25
- """
26
-
27
- def __init__(self, box, *, shrink_bound=False):
28
- """
29
- :param box: 可以转成Polygon的数据类型
30
- :param shrink_bound: 倾斜度过大的文本框,需要特殊处理,把外接矩形缩小会更准确些。
31
- 使用这个参数后,斜的框可以当成矩形框理解、处理
32
-
33
- 详细文档:https://www.yuque.com/xlpr/pyxllib/textlineshape
34
- """
35
- self.polygon = ShapelyPolygon.gen(box)
36
- self.bounds = self.polygon.bounds
37
- if shrink_bound:
38
- b = self.bounds
39
- total_area = (b[2] - b[0]) * (b[3] - b[1])
40
- # 缩放比例
41
- self.bounds = bound_scale(self.bounds, self.polygon.area / total_area)
42
- # self.bounds = bound_scale(self.bounds, 0.5)
43
-
44
- self.minx, self.maxx = self.bounds[0], self.bounds[2]
45
- self.width = self.maxx - self.minx
46
- self.miny, self.maxy = self.bounds[1], self.bounds[3]
47
- self.height = self.maxy - self.miny
48
- self.centroid = self.polygon.centroid
49
-
50
- def in_the_same_line(self, other):
51
- """ 两个框在同一个文本行(一般特指在同一水平行的文本) """
52
- if other.miny < self.centroid.y < other.maxy:
53
- return True
54
- elif self.miny < other.centroid.y < self.maxy:
55
- return True
56
- else:
57
- return False
58
-
59
- def in_the_same_column(self, other):
60
- if other.minx < self.centroid.x < other.maxx:
61
- return True
62
- elif self.minx < other.centroid.x < self.maxx:
63
- return True
64
- else:
65
- return False
66
-
67
- def is_lr_intersect(self, other, gap=5):
68
- """ 左右相交
69
- """
70
- if other.minx - gap <= self.minx <= other.maxx + gap:
71
- return True
72
- elif other.minx - gap <= self.maxx <= other.maxx + gap:
73
- return True
74
- else:
75
- return False
76
-
77
- def is_tb_intersect(self, other, gap=5):
78
- """ 上下相交
79
- """
80
- # 这个 gap 规则是不动产的,不能放在通用规则里
81
- # gap = min(50, self.height / 2, other.height / 2) # 允许的最大间距,默认按照最小的高,但还要再设置一个50的上限
82
- if other.miny - gap <= self.miny <= other.maxy + gap:
83
- return True
84
- elif other.miny - gap <= self.maxy <= other.maxy + gap:
85
- return True
86
- else:
87
- return False
88
-
89
- def is_intersect(self, other):
90
- return self.polygon.intersects(other)
91
-
92
- def __add__(self, other):
93
- """ 合并两个文本行 """
94
- box = rect2polygon(MultiPolygon([self.polygon, other.polygon]).bounds)
95
- return TextlineShape(box)
96
-
97
- def __lt__(self, other):
98
- """ 框的排序准则 """
99
- if self.in_the_same_line(other):
100
- return self.centroid.x < other.centroid.x
101
- else:
102
- return self.centroid.y < other.centroid.y
103
-
104
- @classmethod
105
- def merge(cls, shapes):
106
- """ 将同张图片里的多个shape进行合并 """
107
- # 1 对文本框分组
108
- shape_groups = disjoint_set(shapes, lambda x, y: x.is_intersect(y))
109
-
110
- # 2 合并文本内容
111
- new_shapes = []
112
- for group in shape_groups:
113
- shape = reduce(lambda x, y: x + y, sorted(group))
114
- new_shapes.append(shape)
115
- return new_shapes
116
-
117
-
118
- def im_textline_split(im, maxsplit=None, minwidth=3):
119
- """ 这是最基础版本的示例:比较干净,白底黑字,没有太大倾斜的处理情况
120
-
121
- 一般各种特殊任务的数据,需要根据具体任务定制、修改该函数
122
- """
123
- img = xlcv.read(im, 0)
124
- m = np.mean(img)
125
- # 比较干净的图可以这样,直接做二值化,并且对二值化中的图要求比较高,基本不能出现一个文字的像素
126
- bi = img < m
127
- vec = bi.sum(axis=0)
128
- return split_vector_interval(vec, maxsplit=maxsplit, minwidth=minwidth)
129
-
130
-
131
- def merge_labels_by_widths(labels, widths, sep=' '):
132
- """ 一组数量不少于len(widths)的labels,参照widths给的每一部分权重,合并文本内容
133
-
134
- 算是和图片分割配套的相关功能,往往文本内容要跟着图片的切割情况进行拆分
135
-
136
- 这个算法其实也可以用来做拆分,比如要把'abcdefg'拆成[20, 30]的两段,
137
- 可以用list先把前者变成单字符的list就行了 ['a', 'b', 'c', 'd', 'e', 'f', 'g']
138
-
139
- :param labels: 一组字符串
140
- :param widths: 一组参考宽度
141
- :param sep: 拼接的时候使用的间隔符
142
- :return: 尽可能拼接出符合参考宽度的一组字符串
143
-
144
- >>> merge_labels_by_widths(['aa', 'bbb', 'c', 'ccc'], [10,10,20])
145
- ['aa', 'bbb', 'c ccc']
146
- >>> merge_labels_by_widths(['a', 'a', 'b', 'b'], [13, 10, 10])
147
- ['a a', 'b', 'b']
148
- >>> merge_labels_by_widths(['a', 'a', 'b', 'b'], [10, 10, 10])
149
- ['a', 'a', 'b b']
150
- >>> merge_labels_by_widths(['a', 'b', 'c'], [11, 12, 13])
151
- ['a', 'b', 'c']
152
- >>> merge_labels_by_widths(['a'], [10, 12]) # labels比widths少时,后面的统一用''填充
153
- ['a', '']
154
- >>> merge_labels_by_widths([''], [10, 12])
155
- ['', '']
156
-
157
- TODO 感觉实现的代码还有点凌乱,可能还有改进空间
158
- """
159
- # 1 统一量纲
160
- if len(labels) < len(widths):
161
- labels += [''] * (len(widths) - len(labels))
162
- label_widths = [strwidth(x) for x in labels]
163
- n_label = len(labels)
164
- assert sum(widths), 'widths必须要有权重值'
165
- r = sum(label_widths) / sum(widths)
166
- widths = [r * w for w in widths]
167
-
168
- # 2 用贪心算法合并
169
- need_merge = n_label - len(widths)
170
- i, k, new_labels = 0, 0, []
171
- for w in widths:
172
- if k < need_merge:
173
- label_width = label_widths[i]
174
- j = i + 1
175
- while j < n_label and k < need_merge and abs(label_width + label_widths[j] - w) < abs(label_width - w):
176
- label_width += label_widths[j]
177
- j += 1
178
- k += 1
179
- new_labels.append(sep.join(labels[i:j]))
180
- i = j
181
- elif k == need_merge:
182
- new_labels += labels[i:]
183
- i = n_label
184
- break
185
- # 还有未匹配使用的,全部拼接到末尾
186
- if i + 1 <= n_label:
187
- new_labels[-1] = ' '.join([new_labels[-1]] + labels[i:])
188
-
189
- return new_labels
190
-
191
-
192
- class TextlineAnnotation(TextlineShape):
193
- """ coco格式的标注 """
194
-
195
- def __init__(self, anno):
196
- super().__init__(xywh2ltrb(anno['bbox']))
197
- self.anno = anno
198
-
199
- def __add__(self, other):
200
- """ 两个coco标注的合并 """
201
- # 以 self 框的属性为基准
202
- anno, anno2 = self.anno.copy(), other.anno
203
-
204
- # 合并后的 bbox
205
- anno['bbox'] = ltrb2xywh(MultiPolygon([self.polygon, other.polygon]).bounds)
206
-
207
- # 合并分割属性
208
- if anno2['segmentation']:
209
- anno['segmentation'] += anno2['segmentation']
210
-
211
- # 合并 label
212
- if 'label' in anno or 'label' in anno2:
213
- text = anno2.get('label', '')
214
- if text: text = ' ' + text
215
- anno['label'] = anno.get('label', '') + text
216
-
217
- return TextlineAnnotation(anno)
218
-
219
- @classmethod
220
- def merge(cls, annotations):
221
- """ 合并同一文本行上相近、相交的文本标注 """
222
- # 1 转 shape 格式
223
- shapes = [cls(x) for x in annotations]
224
-
225
- # 2 对文本框分组
226
- shape_groups = disjoint_set(shapes, lambda x, y: x.in_the_same_line(y) and x.is_lr_intersect(y))
227
-
228
- # 3 合并文本内容
229
- new_shapes = []
230
- for group in shape_groups:
231
- shape = reduce(lambda x, y: x + y, sorted(group))
232
- new_shapes.append(shape)
233
-
234
- # 4 转回 annotations 格式
235
- return [x.anno for x in new_shapes]
236
-
237
- @classmethod
238
- def split(cls, im, annotations, split_func=im_textline_split):
239
- """ coco标注格式的处理,将图片im对应的文本行标注结果 annos,按照空白背景切分开
240
-
241
- :param im: 图片数据
242
- :param annotations: coco 格式的 annotations
243
- :param split_func: 分析图片数据时所用投影分析函数,需要返回带有文本内容的列区间
244
-
245
- 如果有label文本,会跟着一起切割处理
246
-
247
- :return:
248
- 新的annotations数组
249
- 注意,有的图片处理起来会有问题,此时会返回 [],建议丢弃这些图片
250
- """
251
- new_annos = []
252
- for anno in annotations:
253
- # 仅测试某个特定的 anno
254
- # if anno['id'] != 2345:
255
- # continue
256
-
257
- x, y, w, h = anno['bbox']
258
- _, t, _, b = xywh2ltrb(anno['bbox'])
259
- subim = xlcv.get_sub(im, xywh2ltrb(anno['bbox']))
260
- spans = split_func(subim)
261
- # print(anno['label'], spans)
262
- # 左右放宽一些,并且计算基于全图的绝对坐标
263
- spans = [[x + max(span[0] - 3, 0), x + min(span[1] + 3, w)] for span in spans]
264
-
265
- if len(spans) == 0:
266
- # 一些特殊情况,很可能是框标的位置偏了,质量不行
267
- return [] # 整张图的标注都不要了,直接返回空值
268
- elif len(spans) == 1:
269
- l, r = spans[0]
270
- a = copy.copy(anno)
271
- a['bbox'] = ltrb2xywh([l, t, r, b])
272
- new_annos.append(a)
273
- else: # 拆分出了多段
274
- # 这里 label 最好也要拆一下
275
- labels = anno['label'].split()
276
- if len(labels) > len(spans):
277
- labels = merge_labels_by_widths(labels, [(span[1] - span[0]) for span in spans])
278
- elif len(labels) < len(spans):
279
- # imwrite(subim, 'subim.jpg')
280
- # print(x, y, w, h)
281
- # 要检查出现这些情况的所有数据:labels的少于spans
282
- get_xllog().warning(DPrint.format({'$异常': 'len(labels)<len(spans)',
283
- 'labels': labels, 'spans': spans}))
284
- # 这种情况先保留原始框
285
- new_annos.append(anno)
286
- continue
287
-
288
- for span, label in zip(spans, labels):
289
- l, r = span
290
- a = copy.copy(anno)
291
- a['bbox'] = ltrb2xywh([l, t, r, b])
292
- a['label'] = label
293
- new_annos.append(a)
294
-
295
- return new_annos
296
-
297
-
298
- class TextlineSpliter:
299
- """
300
- TextString2016、Casia 基本都可以直接用
301
- """
302
-
303
- @classmethod
304
- def spliter(cls, im, maxsplit=None, minwidth=3):
305
- """ (核心处理接口功能)比较干净,白底黑字,没有太大倾斜的处理情况
306
- 如果有其他特殊情况,记得要重置这个处理方式,见EnglishWord
307
-
308
- :param im: 输入图片路径,或者np.ndarray矩阵
309
- :param maxsplit: 最大切分数量,即最多得到几个子区间
310
- 没设置的时候,会对所有满足条件的情况进行切割
311
- :param minwidth: 每个切分位置最小具有的宽度
312
- :return: [(l, r), (l, r), ...] 每一段文本的左右区间
313
-
314
- 详细文档:https://www.yuque.com/xlpr/data/cx6xm5
315
- """
316
- img = xlcv.read(im, 0)
317
- m = np.mean(img)
318
- # 比较干净的图可以这样,直接做二值化,并且对二值化中的图要求比较高,基本不能出现一个文字的像素
319
- bi = img < m
320
- vec = bi.sum(axis=0) - 2
321
- return split_vector_interval(vec, maxsplit=maxsplit, minwidth=minwidth)
322
-
323
- @classmethod
324
- def split_img(cls, file, maxsplit=None, minwidth=3):
325
- """
326
- :param file: 输入np.ndarray图片,或者pil图片,或者图片路径
327
- :param maxsplit:
328
- :param minwidth:
329
- :return: 返回切分后的np.ndarray格式的图片清单
330
- """
331
- img = xlcv.read(file)
332
- vec = cls.spliter(img, maxsplit, minwidth)
333
- imgs = [img[:, l:r + 1] for l, r in vec]
334
- return imgs
335
-
336
- @classmethod
337
- def spliter_img(cls, file, maxsplit=None, minwidth=3):
338
- """ 可视化,测试一张图的切分效果
339
- 如果不是测试self.root里的图片,可以直接输入一个绝对路径的图片file
340
- """
341
- im = xlcv.read(file, 0)
342
- cols = cls.spliter(im, maxsplit=maxsplit, minwidth=minwidth)
343
-
344
- lines = [[c, 0, c, im.shape[0] - 1] for c in np.array(cols, dtype=int).reshape(-1)]
345
- # 偶数区间划为为红色
346
- im2 = xlcv.lines(im, lines[::4], [0, 0, 255])
347
- im2 = xlcv.lines(im2, lines[1::4], [0, 0, 255])
348
- # 奇数区间划分为蓝色
349
- im2 = xlcv.lines(im2, lines[2::4], [255, 0, 0])
350
- im2 = xlcv.lines(im2, lines[3::4], [255, 0, 0])
351
-
352
- return im2
353
-
354
- @classmethod
355
- def show_spliter_imgs(cls, dir_state, *, save=None, show=True):
356
- ImagesDir.debug_func(dir_state, # 随机抽取10张图片
357
- lambda img_file: cls.spliter_img(img_file, maxsplit=None, minwidth=3), # 执行功能
358
- save=save, # 结果保存位置
359
- show=show) # 是否imshow结果图
360
-
361
- @classmethod
362
- def relabel_labelfile(cls, p, maxsplit=None, minwidth=3, imgdir='images'):
363
- """ 对一份文件里标注的所有图片,批量进行转换,并加入一列新的坐标数据 """
364
- lines = p.read().splitlines()
365
- res = []
366
- for line in lines:
367
- line = line.split(maxsplit=1)
368
- im = xlcv.read(p.parent / f'{imgdir}/{line[0]}', 0)
369
- cols = cls.spliter(im, maxsplit, minwidth)
370
- line.append(' '.join(map(str, np.array(cols, dtype=int).reshape(-1))))
371
- res.append('\t'.join(line))
372
- content = '\n'.join(res)
373
- p.with_stem(p.stem + f'+text_interval-minw={minwidth}').write(content, if_exists='replace')
374
-
375
- @classmethod
376
- def relabel_labelfiles(cls, root, maxsplit=None, minwidth=3, imgdir='images'):
377
- """ 切分所有的文件
378
- :param root: 根目录
379
- :param imgdir: 图片所在子目录名称
380
- :return:
381
- """
382
- root = Dir(root)
383
- cls.relabel_labelfile(root / 'val.txt', maxsplit, minwidth, imgdir)
384
- cls.relabel_labelfile(root / 'test.txt', maxsplit, minwidth, imgdir)
385
- cls.relabel_labelfile(root / 'train.txt', maxsplit, minwidth, imgdir)
386
-
387
- @classmethod
388
- def split_labelfiles(cls, src, dst, minwidth=3, imgdir='images'):
389
- def func(name):
390
- """ 对一份文件里标注的所有图片,批量进行转换,并加入一列新的坐标数据
391
-
392
- p 原来的.txt标注文件路径
393
- p_im 原来的图片路径
394
- q 切割后的.txt标注文件路径
395
- q_im 切割后的图片路径
396
-
397
- """
398
- p, q = File(name, src), File(name, dst)
399
- if not p: return
400
- lines = p.read().splitlines()
401
- res = []
402
- for line in lines:
403
- # 获得图片文件,切分的单词
404
- line = line.split(maxsplit=1)
405
- if len(line) < 2: continue
406
-
407
- p_im = File(p.parent / f'{imgdir}/{line[0]}')
408
- # print(p_im)
409
- words = line[1].split()
410
-
411
- if len(words) < 2:
412
- q_im = File(f'{imgdir}/{p_im.name}', dst)
413
- p_im.copy(q_im)
414
- res.append(f'{q_im.name}\t{words[0]}')
415
- else:
416
- # 切分图片
417
- imgs = cls.split_img(p_im, len(words), minwidth)
418
- # 重新生成标注
419
- for k, im in enumerate(imgs):
420
- q_im = File(f'{imgdir}/{p_im.stem}_{k}', dst, suffix=p_im.suffix)
421
- xlcv.write(im, q_im, if_exists='replace')
422
- res.append(f'{q_im.name}\t{words[k]}')
423
- content = '\n'.join(res)
424
- q.write(content, if_exists='replace')
425
-
426
- src, dst = Dir(src), Dir(dst)
427
- for name in ['val.txt', 'test.txt', 'train.txt']:
428
- # for name in ['append.txt']:
429
- # for name in ['val.txt']:
430
- func(name)
431
-
432
-
433
- class EnglishWordTLS(TextlineSpliter):
434
- @classmethod
435
- def spliter(cls, img, maxsplit=None, minwidth=3):
436
- """ 同 TextLineSpliter.spliter
437
- 这个功能针对处理 带噪声干扰的白底黑字图片
438
- """
439
- img = xlcv.read(img, 0)
440
- h, w = img.shape
441
- vec = img[int(h / 3):int(2 * h / 3)].mean(axis=0) # 只用上下中间的三分之一
442
- vec = vec.mean() - vec + 5 # 文字变正,背景变负;因为背景有很多黑点噪声,还要多减一
443
- return split_vector_interval(vec, maxsplit=maxsplit, minwidth=minwidth)
444
-
445
-
446
- class TLSMain:
447
- def textstring2016(self):
448
- # d = TextLineSpliter('/home/datasets/textGroup/TextString2016/')
449
- d = r'D:\datasets\TextString2016'
450
- # ob.test('images/T0000-03.jpg', minwidth=3)
451
- TextlineSpliter.relabel_labelfiles(d, minwidth=3)
452
-
453
- def casia(self):
454
- os.chdir('/home/datasets/textGroup/casia/offlinehw/CASIA-HWDB2.x_pngImg_line')
455
- TextlineSpliter.relabel_labelfiles('CASIA-HWDB2.0_savePTTSImg_line', minwidth=3)
456
- TextlineSpliter.relabel_labelfiles('CASIA-HWDB2.1_savePTTSImg_line', minwidth=3)
457
- TextlineSpliter.relabel_labelfiles('CASIA-HWDB2.2_savePTTSImg_line', minwidth=3)
458
-
459
- def english_word(self):
460
- # ob.test('total/1.jpg', 4, 3)
461
- EnglishWordTLS.relabel_labelfiles(r'D:\datasets\english-word', minwidth=10, imgdir='total')
462
-
463
- def sroie(self):
464
- path = Dir('SROIE2019/task1train_626p_repo/task1train_626p_patch/')
465
- root = Dir(path, '/home/datasets/textGroup')
466
- TextlineSpliter.show_spliter_imgs(root.select('images/*.png').sample(10),
467
- save=File(path / 'temp', '/home/datasets/textGroup'),
468
- show=False)
469
-
470
-
471
- if __name__ == '__main__':
472
- with TicToc(__name__):
473
- pass