pyxllib 0.3.96__py3-none-any.whl → 0.3.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. pyxllib/__init__.py +21 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +541 -529
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -311
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -461
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -240
  11. pyxllib/algo/stat.py +494 -458
  12. pyxllib/algo/treelib.py +149 -149
  13. pyxllib/algo/unitlib.py +66 -66
  14. {pyxlpr → pyxllib/autogui}/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -0
  16. pyxllib/autogui/all.py +9 -0
  17. pyxllib/{ext/autogui → autogui}/autogui.py +852 -823
  18. pyxllib/autogui/uiautolib.py +362 -0
  19. pyxllib/{ext/autogui → autogui}/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -0
  21. pyxllib/autogui/wechat_msg.py +421 -0
  22. pyxllib/autogui/wxautolib.py +84 -0
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -0
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +240 -129
  34. pyxllib/data/jsonlib.py +89 -0
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1127 -643
  37. pyxllib/data/sqlite.py +568 -341
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -492
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +246 -246
  42. pyxllib/ext/drissionlib.py +277 -0
  43. pyxllib/ext/kq5034lib.py +12 -1606
  44. pyxllib/ext/old.py +663 -663
  45. pyxllib/ext/qt.py +449 -449
  46. pyxllib/ext/robustprocfile.py +497 -0
  47. pyxllib/ext/seleniumlib.py +76 -76
  48. pyxllib/ext/tk.py +173 -173
  49. pyxllib/ext/unixlib.py +827 -826
  50. pyxllib/ext/utools.py +351 -338
  51. pyxllib/ext/webhook.py +124 -101
  52. pyxllib/ext/win32lib.py +40 -40
  53. pyxllib/ext/wjxlib.py +88 -0
  54. pyxllib/ext/wpsapi.py +124 -0
  55. pyxllib/ext/xlwork.py +9 -0
  56. pyxllib/ext/yuquelib.py +1105 -173
  57. pyxllib/file/__init__.py +17 -17
  58. pyxllib/file/docxlib.py +761 -761
  59. pyxllib/file/gitlib.py +309 -309
  60. pyxllib/file/libreoffice.py +165 -0
  61. pyxllib/file/movielib.py +148 -139
  62. pyxllib/file/newbie.py +10 -10
  63. pyxllib/file/onenotelib.py +1469 -1469
  64. pyxllib/file/packlib/__init__.py +330 -293
  65. pyxllib/file/packlib/zipfile.py +2441 -2441
  66. pyxllib/file/pdflib.py +426 -426
  67. pyxllib/file/pupil.py +185 -185
  68. pyxllib/file/specialist/__init__.py +685 -685
  69. pyxllib/file/specialist/dirlib.py +799 -799
  70. pyxllib/file/specialist/download.py +193 -186
  71. pyxllib/file/specialist/filelib.py +2829 -2618
  72. pyxllib/file/xlsxlib.py +3131 -2976
  73. pyxllib/file/xlsyncfile.py +341 -0
  74. pyxllib/prog/__init__.py +5 -5
  75. pyxllib/prog/cachetools.py +64 -0
  76. pyxllib/prog/deprecatedlib.py +233 -233
  77. pyxllib/prog/filelock.py +42 -0
  78. pyxllib/prog/ipyexec.py +253 -253
  79. pyxllib/prog/multiprogs.py +940 -0
  80. pyxllib/prog/newbie.py +451 -444
  81. pyxllib/prog/pupil.py +1197 -1128
  82. pyxllib/prog/sitepackages.py +33 -33
  83. pyxllib/prog/specialist/__init__.py +391 -217
  84. pyxllib/prog/specialist/bc.py +203 -200
  85. pyxllib/prog/specialist/browser.py +497 -488
  86. pyxllib/prog/specialist/common.py +347 -347
  87. pyxllib/prog/specialist/datetime.py +199 -131
  88. pyxllib/prog/specialist/tictoc.py +240 -241
  89. pyxllib/prog/specialist/xllog.py +180 -180
  90. pyxllib/prog/xlosenv.py +108 -101
  91. pyxllib/stdlib/__init__.py +17 -17
  92. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  93. pyxllib/stdlib/tablepyxl/style.py +303 -303
  94. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  95. pyxllib/text/__init__.py +8 -8
  96. pyxllib/text/ahocorasick.py +39 -39
  97. pyxllib/text/airscript.js +744 -0
  98. pyxllib/text/charclasslib.py +121 -109
  99. pyxllib/text/jiebalib.py +267 -264
  100. pyxllib/text/jinjalib.py +32 -0
  101. pyxllib/text/jsa_ai_prompt.md +271 -0
  102. pyxllib/text/jscode.py +922 -767
  103. pyxllib/text/latex/__init__.py +158 -158
  104. pyxllib/text/levenshtein.py +303 -303
  105. pyxllib/text/nestenv.py +1215 -1215
  106. pyxllib/text/newbie.py +300 -288
  107. pyxllib/text/pupil/__init__.py +8 -8
  108. pyxllib/text/pupil/common.py +1121 -1095
  109. pyxllib/text/pupil/xlalign.py +326 -326
  110. pyxllib/text/pycode.py +47 -47
  111. pyxllib/text/specialist/__init__.py +8 -8
  112. pyxllib/text/specialist/common.py +112 -112
  113. pyxllib/text/specialist/ptag.py +186 -186
  114. pyxllib/text/spellchecker.py +172 -172
  115. pyxllib/text/templates/echart_base.html +11 -0
  116. pyxllib/text/templates/highlight_code.html +17 -0
  117. pyxllib/text/templates/latex_editor.html +103 -0
  118. pyxllib/text/vbacode.py +17 -17
  119. pyxllib/text/xmllib.py +747 -685
  120. pyxllib/xl.py +42 -38
  121. pyxllib/xlcv.py +17 -17
  122. pyxllib-0.3.200.dist-info/METADATA +48 -0
  123. pyxllib-0.3.200.dist-info/RECORD +126 -0
  124. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +1 -2
  125. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info/licenses}/LICENSE +190 -190
  126. pyxllib/ext/autogui/__init__.py +0 -8
  127. pyxllib-0.3.96.dist-info/METADATA +0 -51
  128. pyxllib-0.3.96.dist-info/RECORD +0 -333
  129. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  130. pyxlpr/ai/__init__.py +0 -5
  131. pyxlpr/ai/clientlib.py +0 -1281
  132. pyxlpr/ai/specialist.py +0 -286
  133. pyxlpr/ai/torch_app.py +0 -172
  134. pyxlpr/ai/xlpaddle.py +0 -655
  135. pyxlpr/ai/xltorch.py +0 -705
  136. pyxlpr/data/__init__.py +0 -11
  137. pyxlpr/data/coco.py +0 -1325
  138. pyxlpr/data/datacls.py +0 -365
  139. pyxlpr/data/datasets.py +0 -200
  140. pyxlpr/data/gptlib.py +0 -1291
  141. pyxlpr/data/icdar/__init__.py +0 -96
  142. pyxlpr/data/icdar/deteval.py +0 -377
  143. pyxlpr/data/icdar/icdar2013.py +0 -341
  144. pyxlpr/data/icdar/iou.py +0 -340
  145. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  146. pyxlpr/data/imtextline.py +0 -473
  147. pyxlpr/data/labelme.py +0 -866
  148. pyxlpr/data/removeline.py +0 -179
  149. pyxlpr/data/specialist.py +0 -57
  150. pyxlpr/eval/__init__.py +0 -85
  151. pyxlpr/paddleocr.py +0 -776
  152. pyxlpr/ppocr/__init__.py +0 -15
  153. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  154. pyxlpr/ppocr/data/__init__.py +0 -135
  155. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  156. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  157. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  158. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  159. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  160. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  161. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  162. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  163. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  164. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  165. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  166. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  167. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  168. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  169. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  170. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  171. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  172. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  173. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  174. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  175. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  176. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  177. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  178. pyxlpr/ppocr/losses/__init__.py +0 -61
  179. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  180. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  181. pyxlpr/ppocr/losses/center_loss.py +0 -88
  182. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  183. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  184. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  185. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  186. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  187. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  188. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  189. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  190. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  191. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  192. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  193. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  194. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  195. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  196. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  197. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  198. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  199. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  200. pyxlpr/ppocr/metrics/__init__.py +0 -44
  201. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  202. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  203. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  204. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  205. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  206. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  207. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  208. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  209. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  210. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  211. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  212. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  213. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  214. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  215. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  216. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  217. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  218. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  219. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  220. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  221. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  222. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  223. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  224. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  225. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  226. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  227. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  228. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  229. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  230. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  231. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  232. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  233. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  234. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  235. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  236. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  237. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  238. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  239. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  240. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  241. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  242. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  243. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  244. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  245. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  246. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  247. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  248. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  249. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  250. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  251. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  252. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  253. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  254. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  255. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  256. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  257. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  258. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  259. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  260. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  261. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  262. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  263. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  264. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  265. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  266. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  267. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  268. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  269. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  270. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  271. pyxlpr/ppocr/tools/__init__.py +0 -14
  272. pyxlpr/ppocr/tools/eval.py +0 -83
  273. pyxlpr/ppocr/tools/export_center.py +0 -77
  274. pyxlpr/ppocr/tools/export_model.py +0 -129
  275. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  276. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  277. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  278. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  279. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  280. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  281. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  282. pyxlpr/ppocr/tools/infer_det.py +0 -134
  283. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  284. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  285. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  286. pyxlpr/ppocr/tools/infer_table.py +0 -107
  287. pyxlpr/ppocr/tools/program.py +0 -596
  288. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  289. pyxlpr/ppocr/tools/train.py +0 -163
  290. pyxlpr/ppocr/tools/xlprog.py +0 -748
  291. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  292. pyxlpr/ppocr/utils/__init__.py +0 -24
  293. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  294. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  295. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  296. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  297. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  298. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  299. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  300. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  301. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  302. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  303. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  304. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  305. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  306. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  307. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  308. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  309. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  310. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  311. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  312. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  313. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  314. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  315. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  316. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  317. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  318. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  319. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  320. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  321. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  322. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  323. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  324. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  325. pyxlpr/ppocr/utils/dict90.txt +0 -90
  326. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  327. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  328. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  329. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  330. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  331. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  332. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  333. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  334. pyxlpr/ppocr/utils/gen_label.py +0 -81
  335. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  336. pyxlpr/ppocr/utils/iou.py +0 -54
  337. pyxlpr/ppocr/utils/logging.py +0 -69
  338. pyxlpr/ppocr/utils/network.py +0 -84
  339. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  340. pyxlpr/ppocr/utils/profiler.py +0 -110
  341. pyxlpr/ppocr/utils/save_load.py +0 -150
  342. pyxlpr/ppocr/utils/stats.py +0 -72
  343. pyxlpr/ppocr/utils/utility.py +0 -80
  344. pyxlpr/ppstructure/__init__.py +0 -13
  345. pyxlpr/ppstructure/predict_system.py +0 -187
  346. pyxlpr/ppstructure/table/__init__.py +0 -13
  347. pyxlpr/ppstructure/table/eval_table.py +0 -72
  348. pyxlpr/ppstructure/table/matcher.py +0 -192
  349. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  350. pyxlpr/ppstructure/table/predict_table.py +0 -221
  351. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  352. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  353. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  354. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  355. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  356. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  357. pyxlpr/ppstructure/utility.py +0 -71
  358. pyxlpr/xlai.py +0 -10
@@ -1,488 +1,497 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2020/05/30 22:43
6
-
7
- import builtins
8
- import enum
9
- import html
10
- import inspect
11
- import os
12
- import subprocess
13
- import sys
14
- import datetime
15
- import platform
16
-
17
- import pandas as pd
18
- from bs4 import BeautifulSoup
19
-
20
- from pyxllib.prog.newbie import typename
21
- from pyxllib.prog.pupil import dprint, func_input_message, is_url, is_file
22
- from pyxllib.prog.specialist.common import TypeConvert, NestedDict, KeyValuesCounter, dataframe_str
23
- from pyxllib.prog.specialist.tictoc import TicToc
24
- from pyxllib.text.pupil import ensure_gbk, shorten
25
- from pyxllib.file.specialist.dirlib import File, Dir, get_etag, XlPath
26
-
27
-
28
- def getasizeof(*objs, **opts):
29
- """获得所有类的大小,底层用pympler.asizeof实现"""
30
- from pympler import asizeof
31
-
32
- try:
33
- res = asizeof.asizeof(*objs, **opts)
34
- # except TypeError: # sqlalchemy.exc.InvalidRequestError
35
- except:
36
- res = -1
37
- return res
38
-
39
-
40
- def viewfiles(procname, *files, **kwargs):
41
- """ 调用procname相关的文件程序打开files
42
-
43
- :param procname: 程序名
44
- :param files: 一个文件名参数清单,每一个都是文件路径,或者是字符串等可以用writefile转成文件的路径
45
- :param kwargs:
46
- save: 如果True,则会按时间保存文件名;否则采用特定名称,每次运行就会把上次的覆盖掉
47
- wait: 是否等待当前进程结束后,再运行后续py代码
48
- filename: 控制写入的文件名
49
- TODO:根据不同软件,这里还可以扩展很多功能
50
- :param kwargs:
51
- wait:
52
- True:在同一个进程中执行子程序,即会等待bc退出后,再进入下一步
53
- False:在新的进程中执行子程序
54
-
55
- 细节:注意bc跟其他程序有比较大不同,建议使用专用的bcompare函数
56
- 目前已知可以扩展多文件的有:chrome、notepad++、texstudio
57
-
58
- >> ls = list(range(100))
59
- >> viewfiles('notepad++', ls, save=True)
60
- """
61
- # 1 生成文件名
62
- ls = [] # 将最终所有绝对路径文件名存储到ls
63
- save = kwargs.get('save')
64
-
65
- basename = ext = None
66
- if 'filename' in kwargs and kwargs['filename']:
67
- basename, ext = os.path.splitext(kwargs['filename'])
68
-
69
- for i, t in enumerate(files):
70
- if File(t) or is_url(t):
71
- ls.append(str(t))
72
- else:
73
- bn = basename or ...
74
- ls.append(File(bn, Dir.TEMP, suffix=ext).write(t, if_exists=kwargs.get('if_exists', 'error')).to_str())
75
-
76
- # 2 调用程序(并计算外部操作时间)
77
- tictoc = TicToc()
78
- try:
79
- if kwargs.get('wait'):
80
- subprocess.run([procname, *ls])
81
- else:
82
- subprocess.Popen([procname, *ls])
83
- except FileNotFoundError:
84
- if procname in ('chrome', 'chrome.exe'):
85
- procname = 'explorer' # 如果是谷歌浏览器找不到,尝试用系统默认浏览器
86
- viewfiles(procname, *files, **kwargs)
87
- else:
88
- raise FileNotFoundError(f'未找到程序:{procname}。请检查是否有安装及设置了环境变量。')
89
- return tictoc.tocvalue()
90
-
91
-
92
- class Explorer:
93
- def __init__(self, app='explorer', shell=False):
94
- self.app = app
95
- self.shell = shell
96
-
97
- # def check_app(self, raise_error=False):
98
- # """ 检查是否能找到对应的app
99
- #
100
- # FIXME 不能提前检查,因为有些命令运行是会产生实际影响的,无法静默测试
101
- # 例如explorer是会打开资源管理器的
102
- # """
103
- # try:
104
- # subprocess.run(self.app)
105
- # return True
106
- # except FileNotFoundError:
107
- # if raise_error:
108
- # raise FileNotFoundError(f'Application/Command not found:{self.app}')
109
- # return False
110
-
111
- def __call__(self, *args, wait=True, **kwargs):
112
- """
113
- :param args: 命令行参数
114
- :param wait: 是否等待程序运行结束再继续执行后续python命令
115
- :param kwargs: 扩展参数,参考subprocess接口
116
- :return:
117
-
118
- TODO 获得返回值分析
119
- """
120
- args = [self.app] + list(args)
121
- if 'shell' not in kwargs:
122
- kwargs.update({'shell': self.shell})
123
-
124
- try:
125
- if wait:
126
- subprocess.run(args, **kwargs)
127
- else:
128
- subprocess.Popen(args, **kwargs)
129
- except FileNotFoundError:
130
- raise FileNotFoundError(f'Application/Command not found:{" ".join(args)}')
131
-
132
-
133
- class Browser(Explorer):
134
- """ 使用浏览器查看数据文件
135
-
136
- 标准库 webbrowser 也有一套类似的功能,那套主要用于url的查看,不支持文件
137
- 而我这个主要就是把各种数据转成文件来查看
138
- """
139
-
140
- def __init__(self, app=None, shell=False):
141
- """
142
- :param app: 使用的浏览器程序,例如'msedge', 'chrome',也可以输入程序绝对路径
143
- 默认值None会自动检测标准的msedge、chrome目录是否在环境变量,自动获取
144
- 如果要用其他浏览器,或者不在标准目录,请务必要设置app参数值
145
- 在找没有的情况下,默认使用 'explorer'
146
- :param shell:
147
- """
148
- if app is None:
149
- if platform.system() == 'Windows':
150
- paths = os.environ['PATH']
151
- chrome_dir = r'Google\Chrome\Application'
152
- msedge_dir = r'Microsoft\Edge\Application'
153
- if chrome_dir in paths:
154
- app = 'chrome'
155
- elif msedge_dir in paths:
156
- app = 'msedge'
157
- else: # 默认使用谷歌。之前试过explorer不行~~
158
- app = 'C:/Program Files/Google/Chrome/Application/chrome.exe'
159
- elif platform.system() == 'Linux': # Linux系统(包括Ubuntu)
160
- # 可以在这里添加对应的Unix-like系统浏览器的命令行名称
161
- # 这里默认设置为 'google-chrome',如果你想使用其他的浏览器,例如Firefox,可以修改为 'firefox'
162
- app = 'google-chrome'
163
- else:
164
- # 其他系统的处理
165
- pass
166
- super().__init__(app, shell)
167
-
168
- @classmethod
169
- def to_brower_file(cls, arg, file=None, clsmsg=True, to_html_args=None):
170
- """ 将任意数值类型的arg转存到文件,转换风格会尽量适配浏览器的使用
171
-
172
- :param arg: 任意类型的一个数据
173
- :param file: 想要存储的文件名,没有输入的时候会默认生成到临时文件夹,文件名使用哈希值避重
174
- :param clsmsg: 显示开头一段类型继承关系、对象占用空间的信息
175
- :param to_html_args: df.to_html相关格式参数,写成字典的形式输入,常用的参数有如下
176
- escape, 默认True,将内容转移明文显示;可以设为False,这样在df存储的链接等html语法会起作用
177
-
178
- 说明:其实所谓的用更适合浏览器的方式查看,在我目前的算法版本里,就是尽可能把数据转成DataFrame表格
179
- """
180
- # 1 如果已经是文件、url,则不处理
181
- if is_file(arg) or is_url(arg) or isinstance(arg, File):
182
- return arg
183
-
184
- # 2 如果是其他类型,则先转成文件,再打开
185
- arg_ = TypeConvert.try2df(arg)
186
- if isinstance(arg_, pd.DataFrame): # DataFrame在网页上有更合适的显示效果
187
- if clsmsg:
188
- t = f'==== 类继承关系:{inspect.getmro(type(arg))},' \
189
- + f'内存消耗:{sys.getsizeof(arg)}(递归子类总大小:{getasizeof(arg)})Byte ===='
190
- content = '<p>' + html.escape(t) + '</p>'
191
- else:
192
- content = ''
193
- # TODO 把标题栏改成蓝色~~
194
- content += arg_.to_html(**(to_html_args or {}))
195
- if file is None:
196
- file = File(..., Dir.TEMP, suffix='.html').write(content)
197
- file = file.rename(get_etag(str(file)) + '.html', if_exists='replace')
198
- else:
199
- file = File(file).write(content)
200
- elif getattr(arg, 'render', None): # pyecharts 等表格对象,可以用render生成html表格显示
201
- try:
202
- name = arg.options['title'][0]['text']
203
- except (LookupError, TypeError):
204
- name = datetime.datetime.now().strftime('%H%M%S_%f')
205
- if file is None:
206
- file = File(name, Dir.TEMP, suffix='.html').to_str()
207
- arg.render(path=str(file))
208
- else: # 不在预设格式里的数据,转成普通的txt查看
209
- # if File.safe_init(arg).exists():
210
- # file = arg
211
- if file is None:
212
- file = File(..., Dir.TEMP, suffix='.txt').write(arg)
213
- file = file.rename(get_etag(str(file)) + file.suffix, if_exists='replace')
214
- else:
215
- file = File(file).write(arg)
216
- return file
217
-
218
- def html(self, arg, **kwargs):
219
- """ 将内容转为html展示 """
220
- if 'file' in kwargs:
221
- file = File(kwargs['file'], Dir.TEMP, suffix='.html').write(arg)
222
- del kwargs['file']
223
- else:
224
- file = File(..., Dir.TEMP, suffix='.html').write(arg)
225
- file = file.rename(get_etag(str(file)) + file.suffix, if_exists='replace')
226
- self.__call__(arg, file, **kwargs)
227
-
228
- def __call__(self, arg, file=None, *, wait=True, clsmsg=True, to_html_args=None,
229
- **kwargs): # NOQA Browser的操作跟标准接口略有差异
230
- """ 该版本会把arg转存文件重设为文件名
231
-
232
- :param file: 默认可以不输入,会按七牛的etag哈希值生成临时文件
233
- 如果输入,则按照指定的名称生成文件
234
- """
235
- if XlPath.safe_init(arg).is_file():
236
- file = arg
237
- else:
238
- file = str(self.to_brower_file(arg, file, clsmsg=clsmsg, to_html_args=to_html_args))
239
- super().__call__(str(file), wait=wait, **kwargs)
240
-
241
-
242
- browser = Browser()
243
-
244
-
245
- def browser_json(f):
246
- """ 可视化一个json文件结构 """
247
- data = File(f).read()
248
- # 使用NestedDict.to_html_table转成html的嵌套表格代码,存储到临时文件夹
249
- htmlfile = File(r'chrome_json.html', root=Dir.TEMP).write(NestedDict.to_html_table(data))
250
- # 展示html文件内容
251
- browser(htmlfile)
252
-
253
-
254
- def browser_jsons_kv(fd, files='**/*.json', encoding=None, max_items=10, max_value_length=100):
255
- """ demo_keyvaluescounter,查看目录下json数据的键值对信息
256
-
257
- :param fd: 目录
258
- :param files: 匹配的文件格式
259
- :param encoding: 文件编码
260
- :param max_items: 项目显示上限,有些数据项目太多了,要精简下
261
- 设为假值则不设上限
262
- :param max_value_length: 添加的值,进行截断,防止有些值太长
263
- :return:
264
- """
265
- kvc = KeyValuesCounter()
266
- d = Dir(fd)
267
- for p in d.select_files(files):
268
- # print(p)
269
- data = p.read(encoding=encoding, mode='.json')
270
- kvc.add(data, max_value_length=max_value_length)
271
- p = File(r'demo_keyvaluescounter.html', Dir.TEMP)
272
- p.write(kvc.to_html_table(max_items=max_items), if_exists='replace')
273
- browser(p.to_str())
274
-
275
-
276
- def check_repeat_filenames(dir, key='stem', link=True):
277
- """ 检查目录下文件结构情况的功能函数
278
-
279
- https://www.yuque.com/xlpr/pyxllib/check_repeat_filenames
280
-
281
- :param dir: 目录Dir类型,也可以输入路径,如果没有files成员,则默认会获取所有子文件
282
- :param key: 以什么作为行分组的key名称,基本上都是用'stem',偶尔可能用'name'
283
- 遇到要忽略 -eps-to-pdf.pdf 这种后缀的,也可以自定义处理规则
284
- 例如 key=lambda p: re.sub(r'-eps-to-pdf', '', p.stem).lower()
285
- :param link: 默认True会生成文件超链接
286
- :return: 一个df表格,行按照key的规则分组,列默认按suffix扩展名分组
287
- """
288
- # 1 智能解析dir参数
289
- if not isinstance(dir, Dir):
290
- dir = Dir(dir)
291
- if not dir.subs:
292
- dir = dir.select('**/*', type_='file')
293
-
294
- # 2 辅助函数,智能解析key参数
295
- if isinstance(key, str):
296
- def extract_key(p):
297
- return getattr(p, key).lower()
298
- elif callable(key):
299
- extract_key = key
300
- else:
301
- raise TypeError
302
-
303
- # 3 制作df表格数据
304
- columns = ['key', 'suffix', 'filename']
305
- li = []
306
- for f in dir.subs:
307
- p = File(f)
308
- li.append([extract_key(p), p.suffix.lower(), f])
309
- df = pd.DataFrame.from_records(li, columns=columns)
310
-
311
- # 4 分组
312
- def joinfile(files):
313
- if len(files):
314
- if link:
315
- return ', '.join([f'<a href="{dir / f}" target="_blank">{f}</a>' for f in files])
316
- else:
317
- return ', '.join(files)
318
- else:
319
- return ''
320
-
321
- groups = df.groupby(['key', 'suffix']).agg({'filename': joinfile})
322
- groups.reset_index(inplace=True)
323
- view_table = groups.pivot(index='key', columns='suffix', values='filename')
324
- view_table.fillna('', inplace=True)
325
-
326
- # 5 判断每个key的文件总数
327
- count_df = df.groupby('key').agg({'filename': 'count'})
328
- view_table = pd.concat([view_table, count_df], axis=1)
329
- view_table.rename({'filename': 'count'}, axis=1, inplace=True)
330
-
331
- browser(view_table, to_html_args={'escape': not link})
332
- return df
333
-
334
-
335
- def getmembers(object, predicate=None):
336
- """自己重写改动的 inspect.getmembers
337
-
338
- TODO 这个实现好复杂,对于成员,直接用dir不就好了?
339
- """
340
- from inspect import isclass, getmro
341
- import types
342
-
343
- if isclass(object):
344
- mro = (object,) + getmro(object)
345
- else:
346
- mro = ()
347
- results = []
348
- processed = set()
349
- names = dir(object)
350
- # :dd any DynamicClassAttributes to the list of names if object is a class;
351
- # this may result in duplicate entries if, for example, a virtual
352
- # attribute with the same name as a DynamicClassAttribute exists
353
- try:
354
- for base in object.__bases__:
355
- for k, v in base.__dict__.items():
356
- if isinstance(v, types.DynamicClassAttribute):
357
- names.append(k)
358
- except AttributeError:
359
- pass
360
- for key in names:
361
- # First try to get the value via getattr. Some descriptors don't
362
- # like calling their __get__ (see bug #1785), so fall back to
363
- # looking in the __dict__.
364
- try:
365
- value = getattr(object, key)
366
- # handle the duplicate key
367
- if key in processed:
368
- raise AttributeError
369
- # except AttributeError:
370
- except: # 加了这种异常获取,190919周四15:14,sqlalchemy.exc.InvalidRequestError
371
- dprint(key) # 抓不到对应的这个属性
372
- for base in mro:
373
- if key in base.__dict__:
374
- value = base.__dict__[key]
375
- break
376
- else:
377
- # could be a (currently) missing slot member, or a buggy
378
- # __dir__; discard and move on
379
- continue
380
-
381
- if not predicate or predicate(value):
382
- results.append((key, value))
383
- processed.add(key)
384
- results.sort(key=lambda pair: pair[0])
385
- return results
386
-
387
-
388
- def showdir(c, *, to_html=None, printf=True, width=200):
389
- """查看类信息
390
- 会罗列出类c的所有成员方法、成员变量,并生成一个html文
391
-
392
- 查阅一个对象的成员变量及成员方法
393
- 为了兼容linux输出df时也能对齐,有几个中文域宽处理相关的函数
394
-
395
- :param c: 要处理的对象
396
- :param to_html:
397
- win32上默认True,用chrome、explorer打开
398
- linux上默认False,直接输出到控制台
399
- :param printf:
400
- 默认是True,会输出到浏览器或控制条
401
- 设为False则不输出
402
- :param width: 属性列显示值的上限字符数
403
- """
404
- # 1 输出类表头
405
- from humanfriendly import format_size
406
-
407
- res = []
408
- object_name = func_input_message(2)['argnames'][0]
409
- if to_html is None:
410
- to_html = sys.platform == 'win32'
411
- newline = '<br/>' if to_html else '\n'
412
-
413
- t = f'==== 对象名称:{object_name},类继承关系:{inspect.getmro(type(c))},' \
414
- + f'内存消耗:{format_size(sys.getsizeof(c), binary=True)}' \
415
- + f'(递归子类总大小:{format_size(getasizeof(c), binary=True)}) ===='
416
-
417
- if to_html:
418
- res.append('<p>')
419
- t = html.escape(t) + '</p>'
420
- res.append(t + newline)
421
-
422
- # 2 html的样式精调
423
- def df2str(df):
424
- if to_html:
425
- df = df.applymap(str) # 不转成文本经常有些特殊函数会报错
426
- df.index += 1 # 编号从1开始
427
- # pd.options.display.max_colwidth = -1 # 如果临时需要显示完整内容
428
- t = df.to_html()
429
- table = BeautifulSoup(t, 'lxml')
430
- table.thead.tr['bgcolor'] = 'LightSkyBlue' # 设置表头颜色
431
- # 根据pycharm的规则,命名应该是成员变量Field,成员方法Member
432
- ch = 'F' if '成员变量' in table.tr.contents[3].string else 'M'
433
- table.thead.tr.th.string = f'编号{ch}{len(df)}'
434
- t = table.prettify()
435
- else:
436
- # 直接转文本,遇到中文是会对不齐的,但是showdir主要用途本来就是在浏览器看的,这里就不做调整了
437
- t = dataframe_str(df)
438
- return t
439
-
440
- # 3 添加成员变量和成员函数
441
- # 成员变量
442
- members = getmembers(c)
443
- methods = filter(lambda m: not callable(getattr(c, m[0])), members)
444
- ls = []
445
- for ele in methods:
446
- k, v = ele
447
- if k.endswith(r'________'): # 这个名称的变量是我代码里的特殊标记,不显示
448
- continue
449
- attr = getattr(c, k)
450
- if isinstance(attr, enum.IntFlag): # 对re.RegexFlag等枚举类输出整数值
451
- v = typename(attr) + ',' + str(int(attr)) + ',' + str(v)
452
- else:
453
- try:
454
- text = str(v)
455
- except:
456
- text = '取不到str值'
457
-
458
- v = typename(attr) + ',' + shorten(text, width=width)
459
- ls.append([k, v])
460
- df = pd.DataFrame.from_records(ls, columns=['成员变量', '描述'])
461
- res.append(df2str(df) + newline)
462
-
463
- # 成员函数
464
- methods = filter(lambda m: callable(getattr(c, m[0])), members)
465
- df = pd.DataFrame.from_records(methods, columns=['成员函数', '描述'])
466
- res.append(df2str(df) + newline)
467
- res = newline.join(res)
468
-
469
- # 4 使用chrome.exe浏览或输出到控制台
470
- # 这里底层可以封装一个chrome函数来调用,但是这个chrome需要依赖太多功能,故这里暂时手动简单调用
471
- if to_html:
472
- if isinstance(to_html, str):
473
- # 如果是字符串,则认为是指定了输出文件的路径
474
- f = File(to_html, suffix='.html')
475
- else:
476
- f = File(object_name, Dir.TEMP, suffix='.html')
477
-
478
- filename = f.write(ensure_gbk(res), if_exists='replace').to_str()
479
- browser(filename)
480
- else: # linux环境直接输出表格
481
- print(res)
482
-
483
- return res
484
-
485
-
486
- # 注册进builtins,可以在任意地方直接使用
487
- setattr(builtins, 'browser', browser)
488
- setattr(builtins, 'showdir', showdir)
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2020/05/30 22:43
6
+
7
+ import builtins
8
+ import enum
9
+ import html
10
+ import inspect
11
+ import os
12
+ import subprocess
13
+ import sys
14
+ import datetime
15
+ import platform
16
+ import re
17
+
18
+ import pandas as pd
19
+ from bs4 import BeautifulSoup
20
+
21
+ from pyxllib.prog.newbie import typename
22
+ from pyxllib.prog.pupil import dprint, func_input_message, is_url, is_file
23
+ from pyxllib.prog.specialist.common import TypeConvert, NestedDict, KeyValuesCounter, dataframe_str
24
+ from pyxllib.prog.specialist.tictoc import TicToc
25
+ from pyxllib.text.pupil import ensure_gbk, shorten
26
+ from pyxllib.file.specialist.dirlib import File, Dir, get_etag, XlPath
27
+
28
+
29
+ def getasizeof(*objs, **opts):
30
+ """获得所有类的大小,底层用pympler.asizeof实现"""
31
+ from pympler import asizeof
32
+
33
+ try:
34
+ res = asizeof.asizeof(*objs, **opts)
35
+ # except TypeError: # sqlalchemy.exc.InvalidRequestError
36
+ except:
37
+ res = -1
38
+ return res
39
+
40
+
41
+ def viewfiles(procname, *files, **kwargs):
42
+ """ 调用procname相关的文件程序打开files
43
+
44
+ :param procname: 程序名
45
+ :param files: 一个文件名参数清单,每一个都是文件路径,或者是字符串等可以用writefile转成文件的路径
46
+ :param kwargs:
47
+ save: 如果True,则会按时间保存文件名;否则采用特定名称,每次运行就会把上次的覆盖掉
48
+ wait: 是否等待当前进程结束后,再运行后续py代码
49
+ filename: 控制写入的文件名
50
+ TODO:根据不同软件,这里还可以扩展很多功能
51
+ :param kwargs:
52
+ wait:
53
+ True:在同一个进程中执行子程序,即会等待bc退出后,再进入下一步
54
+ False:在新的进程中执行子程序
55
+
56
+ 细节:注意bc跟其他程序有比较大不同,建议使用专用的bcompare函数
57
+ 目前已知可以扩展多文件的有:chrome、notepad++、texstudio
58
+
59
+ >> ls = list(range(100))
60
+ >> viewfiles('notepad++', ls, save=True)
61
+ """
62
+ # 1 生成文件名
63
+ ls = [] # 将最终所有绝对路径文件名存储到ls
64
+ save = kwargs.get('save')
65
+
66
+ basename = ext = None
67
+ if 'filename' in kwargs and kwargs['filename']:
68
+ basename, ext = os.path.splitext(kwargs['filename'])
69
+
70
+ for i, t in enumerate(files):
71
+ if File(t) or is_url(t):
72
+ ls.append(str(t))
73
+ else:
74
+ bn = basename or ...
75
+ ls.append(File(bn, Dir.TEMP, suffix=ext).write(t, if_exists=kwargs.get('if_exists', 'error')).to_str())
76
+
77
+ # 2 调用程序(并计算外部操作时间)
78
+ tictoc = TicToc()
79
+ try:
80
+ if kwargs.get('wait'):
81
+ subprocess.run([procname, *ls])
82
+ else:
83
+ subprocess.Popen([procname, *ls])
84
+ except FileNotFoundError:
85
+ if procname in ('chrome', 'chrome.exe'):
86
+ procname = 'explorer' # 如果是谷歌浏览器找不到,尝试用系统默认浏览器
87
+ viewfiles(procname, *files, **kwargs)
88
+ else:
89
+ raise FileNotFoundError(f'未找到程序:{procname}。请检查是否有安装及设置了环境变量。')
90
+ return tictoc.tocvalue()
91
+
92
+
93
+ class Explorer:
94
+ def __init__(self, app='explorer', shell=False):
95
+ self.app = app
96
+ self.shell = shell
97
+
98
+ # def check_app(self, raise_error=False):
99
+ # """ 检查是否能找到对应的app
100
+ #
101
+ # FIXME 不能提前检查,因为有些命令运行是会产生实际影响的,无法静默测试
102
+ # 例如explorer是会打开资源管理器的
103
+ # """
104
+ # try:
105
+ # subprocess.run(self.app)
106
+ # return True
107
+ # except FileNotFoundError:
108
+ # if raise_error:
109
+ # raise FileNotFoundError(f'Application/Command not found:{self.app}')
110
+ # return False
111
+
112
+ def __call__(self, *args, wait=True, **kwargs):
113
+ """
114
+ :param args: 命令行参数
115
+ :param wait: 是否等待程序运行结束再继续执行后续python命令
116
+ :param kwargs: 扩展参数,参考subprocess接口
117
+ :return:
118
+
119
+ TODO 获得返回值分析
120
+ """
121
+ args = [self.app] + list(args)
122
+
123
+ if 'shell' not in kwargs:
124
+ kwargs.update({'shell': self.shell})
125
+ if re.match(r'open\s', self.app):
126
+ args = args[0] + ' ' + args[1]
127
+ kwargs.update({'shell': True})
128
+ try:
129
+ if wait:
130
+ subprocess.run(args, **kwargs)
131
+ else:
132
+ subprocess.Popen(args, **kwargs)
133
+ except FileNotFoundError:
134
+ raise FileNotFoundError(f'Application/Command not found:{" ".join(args)}')
135
+
136
+
137
+ class Browser(Explorer):
138
+ """ 使用浏览器查看数据文件
139
+
140
+ 标准库 webbrowser 也有一套类似的功能,那套主要用于url的查看,不支持文件
141
+ 而我这个主要就是把各种数据转成文件来查看
142
+ """
143
+
144
+ def __init__(self, app=None, shell=False):
145
+ """
146
+ :param app: 使用的浏览器程序,例如'msedge', 'chrome',也可以输入程序绝对路径
147
+ 默认值None会自动检测标准的msedge、chrome目录是否在环境变量,自动获取
148
+ 如果要用其他浏览器,或者不在标准目录,请务必要设置app参数值
149
+ 在找没有的情况下,默认使用 'explorer'
150
+ :param shell:
151
+ """
152
+ if app is None:
153
+ if platform.system() == 'Windows':
154
+ paths = os.environ['PATH']
155
+ chrome_dir = r'Google\Chrome\Application'
156
+ msedge_dir = r'Microsoft\Edge\Application'
157
+ if chrome_dir in paths:
158
+ app = 'chrome'
159
+ elif msedge_dir in paths:
160
+ app = 'msedge'
161
+ else: # 默认使用谷歌。之前试过explorer不行~~
162
+ app = 'C:/Program Files/Google/Chrome/Application/chrome.exe'
163
+ elif platform.system() == 'Linux': # Linux系统(包括Ubuntu)
164
+ # 可以在这里添加对应的Unix-like系统浏览器的命令行名称
165
+ # 这里默认设置为 'google-chrome',如果你想使用其他的浏览器,例如Firefox,可以修改为 'firefox'
166
+ app = 'google-chrome'
167
+ else:
168
+ app = 'open -a "Google Chrome"'
169
+ # 其他系统的处理
170
+ pass
171
+ super().__init__(app, shell)
172
+
173
+ @classmethod
174
+ def to_brower_file(cls, arg, file=None, clsmsg=True, to_html_args=None):
175
+ """ 将任意数值类型的arg转存到文件,转换风格会尽量适配浏览器的使用
176
+
177
+ :param arg: 任意类型的一个数据
178
+ :param file: 想要存储的文件名,没有输入的时候会默认生成到临时文件夹,文件名使用哈希值避重
179
+ :param clsmsg: 显示开头一段类型继承关系、对象占用空间的信息
180
+ :param to_html_args: df.to_html相关格式参数,写成字典的形式输入,常用的参数有如下
181
+ escape, 默认True,将内容转移明文显示;可以设为False,这样在df存储的链接等html语法会起作用
182
+
183
+ 说明:其实所谓的用更适合浏览器的方式查看,在我目前的算法版本里,就是尽可能把数据转成DataFrame表格
184
+ """
185
+ # 1 如果已经是文件、url,则不处理
186
+ if is_file(arg) or is_url(arg) or isinstance(arg, File):
187
+ return arg
188
+
189
+ # 2 如果是其他类型,则先转成文件,再打开
190
+ arg_ = TypeConvert.try2df(arg)
191
+ if isinstance(arg_, pd.DataFrame): # DataFrame在网页上有更合适的显示效果
192
+ if clsmsg:
193
+ t = f'==== 类继承关系:{inspect.getmro(type(arg))},' \
194
+ + f'内存消耗:{sys.getsizeof(arg)}(递归子类总大小:{getasizeof(arg)})Byte ===='
195
+ content = '<p>' + html.escape(t) + '</p>'
196
+ else:
197
+ content = ''
198
+ # TODO 把标题栏改成蓝色~~
199
+ content += arg_.to_html(**(to_html_args or {}))
200
+ if file is None:
201
+ file = File(..., Dir.TEMP, suffix='.html').write(content)
202
+ file = file.rename(get_etag(str(file)) + '.html', if_exists='replace')
203
+ else:
204
+ file = File(file).write(content)
205
+ elif getattr(arg, 'render', None): # pyecharts 等表格对象,可以用render生成html表格显示
206
+ try:
207
+ name = arg.options['title'][0]['text']
208
+ except (LookupError, TypeError):
209
+ name = datetime.datetime.now().strftime('%H%M%S_%f')
210
+ if file is None:
211
+ file = File(name, Dir.TEMP, suffix='.html').to_str()
212
+ arg.render(path=str(file))
213
+ else: # 不在预设格式里的数据,转成普通的txt查看
214
+ # if File.safe_init(arg).exists():
215
+ # file = arg
216
+ if file is None:
217
+ file = File(..., Dir.TEMP, suffix='.txt').write(arg)
218
+ file = file.rename(get_etag(str(file)) + file.suffix, if_exists='replace')
219
+ else:
220
+ file = File(file).write(arg)
221
+ return file
222
+
223
+ def html(self, arg, **kwargs):
224
+ """ 将内容转为html展示 """
225
+ if 'file' in kwargs:
226
+ file = File(kwargs['file'], Dir.TEMP, suffix='.html').write(arg)
227
+ del kwargs['file']
228
+ else:
229
+ file = File(..., Dir.TEMP, suffix='.html').write(arg)
230
+ file = file.rename(get_etag(str(file)) + file.suffix, if_exists='replace')
231
+ self.__call__(arg, file, **kwargs)
232
+
233
+ def url(self, *args, wait=True, **kwargs):
234
+ super().__call__(*args, wait=wait, **kwargs)
235
+
236
+ def __call__(self, arg, file=None, *, wait=True, clsmsg=True, to_html_args=None,
237
+ **kwargs): # NOQA Browser的操作跟标准接口略有差异
238
+ """ 该版本会把arg转存文件重设为文件名
239
+
240
+ :param file: 默认可以不输入,会按七牛的etag哈希值生成临时文件
241
+ 如果输入,则按照指定的名称生成文件
242
+ """
243
+ f = XlPath.safe_init(arg)
244
+ if f is not None and f.is_file():
245
+ file = arg
246
+ else:
247
+ file = str(self.to_brower_file(arg, file, clsmsg=clsmsg, to_html_args=to_html_args))
248
+ super().__call__(str(file), wait=wait, **kwargs)
249
+
250
+
251
+ browser = Browser()
252
+
253
+
254
+ def browser_json(f):
255
+ """ 可视化一个json文件结构 """
256
+ data = File(f).read()
257
+ # 使用NestedDict.to_html_table转成html的嵌套表格代码,存储到临时文件夹
258
+ htmlfile = File(r'chrome_json.html', root=Dir.TEMP).write(NestedDict.to_html_table(data))
259
+ # 展示html文件内容
260
+ browser(htmlfile)
261
+
262
+
263
+ def browser_jsons_kv(fd, files='**/*.json', encoding=None, max_items=10, max_value_length=100):
264
+ """ demo_keyvaluescounter,查看目录下json数据的键值对信息
265
+
266
+ :param fd: 目录
267
+ :param files: 匹配的文件格式
268
+ :param encoding: 文件编码
269
+ :param max_items: 项目显示上限,有些数据项目太多了,要精简下
270
+ 设为假值则不设上限
271
+ :param max_value_length: 添加的值,进行截断,防止有些值太长
272
+ :return:
273
+ """
274
+ kvc = KeyValuesCounter()
275
+ d = Dir(fd)
276
+ for p in d.select_files(files):
277
+ # print(p)
278
+ data = p.read(encoding=encoding, mode='.json')
279
+ kvc.add(data, max_value_length=max_value_length)
280
+ p = File(r'demo_keyvaluescounter.html', Dir.TEMP)
281
+ p.write(kvc.to_html_table(max_items=max_items), if_exists='replace')
282
+ browser(p.to_str())
283
+
284
+
285
+ def check_repeat_filenames(dir, key='stem', link=True):
286
+ """ 检查目录下文件结构情况的功能函数
287
+
288
+ https://www.yuque.com/xlpr/pyxllib/check_repeat_filenames
289
+
290
+ :param dir: 目录Dir类型,也可以输入路径,如果没有files成员,则默认会获取所有子文件
291
+ :param key: 以什么作为行分组的key名称,基本上都是用'stem',偶尔可能用'name'
292
+ 遇到要忽略 -eps-to-pdf.pdf 这种后缀的,也可以自定义处理规则
293
+ 例如 key=lambda p: re.sub(r'-eps-to-pdf', '', p.stem).lower()
294
+ :param link: 默认True会生成文件超链接
295
+ :return: 一个df表格,行按照key的规则分组,列默认按suffix扩展名分组
296
+ """
297
+ # 1 智能解析dir参数
298
+ if not isinstance(dir, Dir):
299
+ dir = Dir(dir)
300
+ if not dir.subs:
301
+ dir = dir.select('**/*', type_='file')
302
+
303
+ # 2 辅助函数,智能解析key参数
304
+ if isinstance(key, str):
305
+ def extract_key(p):
306
+ return getattr(p, key).lower()
307
+ elif callable(key):
308
+ extract_key = key
309
+ else:
310
+ raise TypeError
311
+
312
+ # 3 制作df表格数据
313
+ columns = ['key', 'suffix', 'filename']
314
+ li = []
315
+ for f in dir.subs:
316
+ p = File(f)
317
+ li.append([extract_key(p), p.suffix.lower(), f])
318
+ df = pd.DataFrame.from_records(li, columns=columns)
319
+
320
+ # 4 分组
321
+ def joinfile(files):
322
+ if len(files):
323
+ if link:
324
+ return ', '.join([f'<a href="{dir / f}" target="_blank">{f}</a>' for f in files])
325
+ else:
326
+ return ', '.join(files)
327
+ else:
328
+ return ''
329
+
330
+ groups = df.groupby(['key', 'suffix']).agg({'filename': joinfile})
331
+ groups.reset_index(inplace=True)
332
+ view_table = groups.pivot(index='key', columns='suffix', values='filename')
333
+ view_table.fillna('', inplace=True)
334
+
335
+ # 5 判断每个key的文件总数
336
+ count_df = df.groupby('key').agg({'filename': 'count'})
337
+ view_table = pd.concat([view_table, count_df], axis=1)
338
+ view_table.rename({'filename': 'count'}, axis=1, inplace=True)
339
+
340
+ browser(view_table, to_html_args={'escape': not link})
341
+ return df
342
+
343
+
344
+ def getmembers(object, predicate=None):
345
+ """自己重写改动的 inspect.getmembers
346
+
347
+ TODO 这个实现好复杂,对于成员,直接用dir不就好了?
348
+ """
349
+ from inspect import isclass, getmro
350
+ import types
351
+
352
+ if isclass(object):
353
+ mro = (object,) + getmro(object)
354
+ else:
355
+ mro = ()
356
+ results = []
357
+ processed = set()
358
+ names = dir(object)
359
+ # :dd any DynamicClassAttributes to the list of names if object is a class;
360
+ # this may result in duplicate entries if, for example, a virtual
361
+ # attribute with the same name as a DynamicClassAttribute exists
362
+ try:
363
+ for base in object.__bases__:
364
+ for k, v in base.__dict__.items():
365
+ if isinstance(v, types.DynamicClassAttribute):
366
+ names.append(k)
367
+ except AttributeError:
368
+ pass
369
+ for key in names:
370
+ # First try to get the value via getattr. Some descriptors don't
371
+ # like calling their __get__ (see bug #1785), so fall back to
372
+ # looking in the __dict__.
373
+ try:
374
+ value = getattr(object, key)
375
+ # handle the duplicate key
376
+ if key in processed:
377
+ raise AttributeError
378
+ # except AttributeError:
379
+ except: # 加了这种异常获取,190919周四15:14,sqlalchemy.exc.InvalidRequestError
380
+ dprint(key) # 抓不到对应的这个属性
381
+ for base in mro:
382
+ if key in base.__dict__:
383
+ value = base.__dict__[key]
384
+ break
385
+ else:
386
+ # could be a (currently) missing slot member, or a buggy
387
+ # __dir__; discard and move on
388
+ continue
389
+
390
+ if not predicate or predicate(value):
391
+ results.append((key, value))
392
+ processed.add(key)
393
+ results.sort(key=lambda pair: pair[0])
394
+ return results
395
+
396
+
397
+ def showdir(c, *, to_html=None, printf=True, width=200):
398
+ """查看类信息
399
+ 会罗列出类c的所有成员方法、成员变量,并生成一个html文
400
+
401
+ 查阅一个对象的成员变量及成员方法
402
+ 为了兼容linux输出df时也能对齐,有几个中文域宽处理相关的函数
403
+
404
+ :param c: 要处理的对象
405
+ :param to_html:
406
+ win32上默认True,用chrome、explorer打开
407
+ linux上默认False,直接输出到控制台
408
+ :param printf:
409
+ 默认是True,会输出到浏览器或控制条
410
+ 设为False则不输出
411
+ :param width: 属性列显示值的上限字符数
412
+ """
413
+ # 1 输出类表头
414
+ from humanfriendly import format_size
415
+
416
+ res = []
417
+ object_name = func_input_message(2)['argnames'][0]
418
+ if to_html is None:
419
+ to_html = sys.platform == 'win32'
420
+ newline = '<br/>' if to_html else '\n'
421
+
422
+ t = f'==== 对象名称:{object_name},类继承关系:{inspect.getmro(type(c))},' \
423
+ + f'内存消耗:{format_size(sys.getsizeof(c), binary=True)}' \
424
+ + f'(递归子类总大小:{format_size(getasizeof(c), binary=True)}) ===='
425
+
426
+ if to_html:
427
+ res.append('<p>')
428
+ t = html.escape(t) + '</p>'
429
+ res.append(t + newline)
430
+
431
+ # 2 html的样式精调
432
+ def df2str(df):
433
+ if to_html:
434
+ df = df.applymap(str) # 不转成文本经常有些特殊函数会报错
435
+ df.index += 1 # 编号从1开始
436
+ # pd.options.display.max_colwidth = -1 # 如果临时需要显示完整内容
437
+ t = df.to_html()
438
+ table = BeautifulSoup(t, 'lxml')
439
+ table.thead.tr['bgcolor'] = 'LightSkyBlue' # 设置表头颜色
440
+ # 根据pycharm的规则,命名应该是成员变量Field,成员方法Member
441
+ ch = 'F' if '成员变量' in table.tr.contents[3].string else 'M'
442
+ table.thead.tr.th.string = f'编号{ch}{len(df)}'
443
+ t = table.prettify()
444
+ else:
445
+ # 直接转文本,遇到中文是会对不齐的,但是showdir主要用途本来就是在浏览器看的,这里就不做调整了
446
+ t = dataframe_str(df)
447
+ return t
448
+
449
+ # 3 添加成员变量和成员函数
450
+ # 成员变量
451
+ members = getmembers(c)
452
+ methods = filter(lambda m: not callable(getattr(c, m[0])), members)
453
+ ls = []
454
+ for ele in methods:
455
+ k, v = ele
456
+ if k.endswith(r'________'): # 这个名称的变量是我代码里的特殊标记,不显示
457
+ continue
458
+ attr = getattr(c, k)
459
+ if isinstance(attr, enum.IntFlag): # 对re.RegexFlag等枚举类输出整数值
460
+ v = typename(attr) + '' + str(int(attr)) + '' + str(v)
461
+ else:
462
+ try:
463
+ text = str(v)
464
+ except:
465
+ text = '取不到str值'
466
+
467
+ v = typename(attr) + ',' + shorten(text, width=width)
468
+ ls.append([k, v])
469
+ df = pd.DataFrame.from_records(ls, columns=['成员变量', '描述'])
470
+ res.append(df2str(df) + newline)
471
+
472
+ # 成员函数
473
+ methods = filter(lambda m: callable(getattr(c, m[0])), members)
474
+ df = pd.DataFrame.from_records(methods, columns=['成员函数', '描述'])
475
+ res.append(df2str(df) + newline)
476
+ res = newline.join(res)
477
+
478
+ # 4 使用chrome.exe浏览或输出到控制台
479
+ # 这里底层可以封装一个chrome函数来调用,但是这个chrome需要依赖太多功能,故这里暂时手动简单调用
480
+ if to_html:
481
+ if isinstance(to_html, str):
482
+ # 如果是字符串,则认为是指定了输出文件的路径
483
+ f = File(to_html, suffix='.html')
484
+ else:
485
+ f = File(object_name, Dir.TEMP, suffix='.html')
486
+
487
+ filename = f.write(ensure_gbk(res), if_exists='replace').to_str()
488
+ browser(filename)
489
+ else: # linux环境直接输出表格
490
+ print(res)
491
+
492
+ return res
493
+
494
+
495
+ # 注册进builtins,可以在任意地方直接使用
496
+ setattr(builtins, 'browser', browser)
497
+ setattr(builtins, 'showdir', showdir)