pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. pyxllib/algo/geo.py +12 -0
  2. pyxllib/algo/intervals.py +1 -1
  3. pyxllib/algo/matcher.py +78 -0
  4. pyxllib/algo/pupil.py +187 -19
  5. pyxllib/algo/specialist.py +2 -1
  6. pyxllib/algo/stat.py +38 -2
  7. {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
  8. pyxllib/autogui/activewin.py +246 -0
  9. pyxllib/autogui/all.py +9 -0
  10. pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
  11. pyxllib/autogui/uiautolib.py +362 -0
  12. pyxllib/autogui/wechat.py +827 -0
  13. pyxllib/autogui/wechat_msg.py +421 -0
  14. pyxllib/autogui/wxautolib.py +84 -0
  15. pyxllib/cv/slidercaptcha.py +137 -0
  16. pyxllib/data/echarts.py +123 -12
  17. pyxllib/data/jsonlib.py +89 -0
  18. pyxllib/data/pglib.py +514 -30
  19. pyxllib/data/sqlite.py +231 -4
  20. pyxllib/ext/JLineViewer.py +14 -1
  21. pyxllib/ext/drissionlib.py +277 -0
  22. pyxllib/ext/kq5034lib.py +0 -1594
  23. pyxllib/ext/robustprocfile.py +497 -0
  24. pyxllib/ext/unixlib.py +6 -5
  25. pyxllib/ext/utools.py +108 -95
  26. pyxllib/ext/webhook.py +32 -14
  27. pyxllib/ext/wjxlib.py +88 -0
  28. pyxllib/ext/wpsapi.py +124 -0
  29. pyxllib/ext/xlwork.py +9 -0
  30. pyxllib/ext/yuquelib.py +1003 -71
  31. pyxllib/file/docxlib.py +1 -1
  32. pyxllib/file/libreoffice.py +165 -0
  33. pyxllib/file/movielib.py +9 -0
  34. pyxllib/file/packlib/__init__.py +112 -75
  35. pyxllib/file/pdflib.py +1 -1
  36. pyxllib/file/pupil.py +1 -1
  37. pyxllib/file/specialist/dirlib.py +1 -1
  38. pyxllib/file/specialist/download.py +10 -3
  39. pyxllib/file/specialist/filelib.py +266 -55
  40. pyxllib/file/xlsxlib.py +205 -50
  41. pyxllib/file/xlsyncfile.py +341 -0
  42. pyxllib/prog/cachetools.py +64 -0
  43. pyxllib/prog/filelock.py +42 -0
  44. pyxllib/prog/multiprogs.py +940 -0
  45. pyxllib/prog/newbie.py +9 -2
  46. pyxllib/prog/pupil.py +129 -60
  47. pyxllib/prog/specialist/__init__.py +176 -2
  48. pyxllib/prog/specialist/bc.py +5 -2
  49. pyxllib/prog/specialist/browser.py +11 -2
  50. pyxllib/prog/specialist/datetime.py +68 -0
  51. pyxllib/prog/specialist/tictoc.py +12 -13
  52. pyxllib/prog/specialist/xllog.py +5 -5
  53. pyxllib/prog/xlosenv.py +7 -0
  54. pyxllib/text/airscript.js +744 -0
  55. pyxllib/text/charclasslib.py +17 -5
  56. pyxllib/text/jiebalib.py +6 -3
  57. pyxllib/text/jinjalib.py +32 -0
  58. pyxllib/text/jsa_ai_prompt.md +271 -0
  59. pyxllib/text/jscode.py +159 -4
  60. pyxllib/text/nestenv.py +1 -1
  61. pyxllib/text/newbie.py +12 -0
  62. pyxllib/text/pupil/common.py +26 -0
  63. pyxllib/text/specialist/ptag.py +2 -2
  64. pyxllib/text/templates/echart_base.html +11 -0
  65. pyxllib/text/templates/highlight_code.html +17 -0
  66. pyxllib/text/templates/latex_editor.html +103 -0
  67. pyxllib/text/xmllib.py +76 -14
  68. pyxllib/xl.py +2 -1
  69. pyxllib-0.3.197.dist-info/METADATA +48 -0
  70. pyxllib-0.3.197.dist-info/RECORD +126 -0
  71. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
  72. pyxllib/ext/autogui/__init__.py +0 -8
  73. pyxllib-0.3.96.dist-info/METADATA +0 -51
  74. pyxllib-0.3.96.dist-info/RECORD +0 -333
  75. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  76. pyxlpr/ai/__init__.py +0 -5
  77. pyxlpr/ai/clientlib.py +0 -1281
  78. pyxlpr/ai/specialist.py +0 -286
  79. pyxlpr/ai/torch_app.py +0 -172
  80. pyxlpr/ai/xlpaddle.py +0 -655
  81. pyxlpr/ai/xltorch.py +0 -705
  82. pyxlpr/data/__init__.py +0 -11
  83. pyxlpr/data/coco.py +0 -1325
  84. pyxlpr/data/datacls.py +0 -365
  85. pyxlpr/data/datasets.py +0 -200
  86. pyxlpr/data/gptlib.py +0 -1291
  87. pyxlpr/data/icdar/__init__.py +0 -96
  88. pyxlpr/data/icdar/deteval.py +0 -377
  89. pyxlpr/data/icdar/icdar2013.py +0 -341
  90. pyxlpr/data/icdar/iou.py +0 -340
  91. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  92. pyxlpr/data/imtextline.py +0 -473
  93. pyxlpr/data/labelme.py +0 -866
  94. pyxlpr/data/removeline.py +0 -179
  95. pyxlpr/data/specialist.py +0 -57
  96. pyxlpr/eval/__init__.py +0 -85
  97. pyxlpr/paddleocr.py +0 -776
  98. pyxlpr/ppocr/__init__.py +0 -15
  99. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  100. pyxlpr/ppocr/data/__init__.py +0 -135
  101. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  102. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  103. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  104. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  105. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  106. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  107. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  108. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  109. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  110. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  111. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  112. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  113. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  114. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  115. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  116. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  117. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  118. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  119. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  120. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  121. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  122. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  123. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  124. pyxlpr/ppocr/losses/__init__.py +0 -61
  125. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  126. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  127. pyxlpr/ppocr/losses/center_loss.py +0 -88
  128. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  129. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  130. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  131. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  132. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  133. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  134. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  135. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  136. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  137. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  138. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  139. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  140. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  141. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  142. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  143. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  144. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  145. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  146. pyxlpr/ppocr/metrics/__init__.py +0 -44
  147. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  148. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  149. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  150. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  151. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  152. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  153. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  154. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  155. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  156. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  157. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  158. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  159. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  160. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  161. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  162. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  163. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  164. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  165. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  166. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  167. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  168. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  169. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  170. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  171. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  172. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  173. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  174. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  175. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  176. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  177. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  178. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  179. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  180. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  181. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  182. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  183. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  184. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  185. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  186. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  187. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  188. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  189. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  190. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  191. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  192. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  193. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  194. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  195. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  196. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  197. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  198. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  199. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  200. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  201. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  202. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  203. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  204. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  205. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  206. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  207. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  208. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  209. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  210. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  211. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  212. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  213. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  214. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  215. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  216. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  217. pyxlpr/ppocr/tools/__init__.py +0 -14
  218. pyxlpr/ppocr/tools/eval.py +0 -83
  219. pyxlpr/ppocr/tools/export_center.py +0 -77
  220. pyxlpr/ppocr/tools/export_model.py +0 -129
  221. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  222. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  223. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  224. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  225. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  226. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  227. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  228. pyxlpr/ppocr/tools/infer_det.py +0 -134
  229. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  230. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  231. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  232. pyxlpr/ppocr/tools/infer_table.py +0 -107
  233. pyxlpr/ppocr/tools/program.py +0 -596
  234. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  235. pyxlpr/ppocr/tools/train.py +0 -163
  236. pyxlpr/ppocr/tools/xlprog.py +0 -748
  237. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  238. pyxlpr/ppocr/utils/__init__.py +0 -24
  239. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  240. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  241. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  242. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  243. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  244. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  245. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  246. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  247. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  248. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  249. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  250. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  251. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  252. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  253. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  254. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  255. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  256. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  257. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  258. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  259. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  260. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  261. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  262. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  263. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  264. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  265. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  266. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  267. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  268. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  269. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  270. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  271. pyxlpr/ppocr/utils/dict90.txt +0 -90
  272. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  273. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  274. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  275. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  276. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  277. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  278. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  279. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  280. pyxlpr/ppocr/utils/gen_label.py +0 -81
  281. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  282. pyxlpr/ppocr/utils/iou.py +0 -54
  283. pyxlpr/ppocr/utils/logging.py +0 -69
  284. pyxlpr/ppocr/utils/network.py +0 -84
  285. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  286. pyxlpr/ppocr/utils/profiler.py +0 -110
  287. pyxlpr/ppocr/utils/save_load.py +0 -150
  288. pyxlpr/ppocr/utils/stats.py +0 -72
  289. pyxlpr/ppocr/utils/utility.py +0 -80
  290. pyxlpr/ppstructure/__init__.py +0 -13
  291. pyxlpr/ppstructure/predict_system.py +0 -187
  292. pyxlpr/ppstructure/table/__init__.py +0 -13
  293. pyxlpr/ppstructure/table/eval_table.py +0 -72
  294. pyxlpr/ppstructure/table/matcher.py +0 -192
  295. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  296. pyxlpr/ppstructure/table/predict_table.py +0 -221
  297. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  298. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  299. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  300. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  301. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  302. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  303. pyxlpr/ppstructure/utility.py +0 -71
  304. pyxlpr/xlai.py +0 -10
  305. /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
  306. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
@@ -57,6 +57,28 @@ def __2_qiniu():
57
57
  pass
58
58
 
59
59
 
60
+ class GetEtag:
61
+ """ 七牛原有etag功能基础上做封装 """
62
+
63
+ @classmethod
64
+ def from_bytes(cls, _bytes):
65
+ return qiniu.utils.etag_stream(io.BytesIO(_bytes))
66
+
67
+ @classmethod
68
+ def from_text(cls, text):
69
+ _bytes = text.encode('utf8')
70
+ return qiniu.utils.etag_stream(io.BytesIO(_bytes))
71
+
72
+ @classmethod
73
+ def from_file(cls, file):
74
+ return qiniu.etag(file)
75
+
76
+ @classmethod
77
+ def from_url(cls, url):
78
+ return cls(requests.get(url).content)
79
+
80
+
81
+ # @deprecated.deprecated
60
82
  def get_etag(arg):
61
83
  """ 七牛原有etag功能基础上做封装
62
84
 
@@ -784,7 +806,7 @@ class XlPath(type(pathlib.Path())):
784
806
 
785
807
  @classmethod
786
808
  def desktop(cls):
787
- if os.environ.get('Desktop', None): # 如果修改了win10默认的桌面路径,需要在环境变量添加一个正确的Desktop路径值
809
+ if os.getenv('Desktop', None): # 如果修改了win10默认的桌面路径,需要在环境变量添加一个正确的Desktop路径值
788
810
  desktop = os.environ['Desktop']
789
811
  else:
790
812
  desktop = os.path.join(pathlib.Path.home(), 'Desktop') # 这个不一定准,桌面是有可能被移到D盘等的
@@ -865,6 +887,7 @@ class XlPath(type(pathlib.Path())):
865
887
  return 0
866
888
 
867
889
  def mtime(self):
890
+ """ 文件的修改时间 """
868
891
  # windows会带小数,linux使用%Ts只有整数部分。
869
892
  # 这里不用四舍五入,取整数部分就是对应的。
870
893
  return int(os.stat(self).st_mtime)
@@ -900,7 +923,7 @@ class XlPath(type(pathlib.Path())):
900
923
  res.add(p.relative_to(self).as_posix())
901
924
  return res
902
925
 
903
- def relpath(self, ref_dir) -> str:
926
+ def relpath(self, ref_dir) -> 'XlPath':
904
927
  r""" 当前路径,相对于ref_dir的路径位置
905
928
 
906
929
  >>> File('C:/a/b/c.txt').relpath('C:/a/')
@@ -1067,7 +1090,7 @@ class XlPath(type(pathlib.Path())):
1067
1090
  src_dir = XlPath(src_dir)
1068
1091
  stem = src_dir.name
1069
1092
 
1070
- pattern = filename_template.format(stem=stem, index="(\d+)", suffix=".*")
1093
+ pattern = filename_template.format(stem=stem, index=r"(\d+)", suffix=".*")
1071
1094
  files = [file for file in src_dir.iterdir() if re.match(pattern, file.name)] # 获取目录中符合模式的文件
1072
1095
 
1073
1096
  self.merge_from_files(files, ignore_empty_lines_between_files=True, encoding=encoding)
@@ -1089,7 +1112,7 @@ class XlPath(type(pathlib.Path())):
1089
1112
  s = str(best_match)
1090
1113
  encoding = best_match.encoding
1091
1114
  else:
1092
- with open(self, 'r', encoding=encoding) as f:
1115
+ with open(self, 'r', encoding=encoding, errors=errors) as f:
1093
1116
  s = f.read()
1094
1117
 
1095
1118
  # 如果用\r\n作为换行符会有一些意外不好处理
@@ -1101,6 +1124,22 @@ class XlPath(type(pathlib.Path())):
1101
1124
  else:
1102
1125
  return s
1103
1126
 
1127
+ def read_text2(self):
1128
+ """ 智能识别编码的文本读取,这里收集了我见过的一些常见类型 """
1129
+ for encoding in ['utf8',
1130
+ 'gbk',
1131
+ 'gb18030',
1132
+ 'utf_16',
1133
+ 'cp932', # 日文,Shift-JIS
1134
+ 'Big5', # 繁体字,Big5
1135
+ 'big5hkscs', # 繁体字
1136
+ ]:
1137
+ try:
1138
+ content = self.read_text(encoding=encoding)
1139
+ return content, encoding
1140
+ except (UnicodeDecodeError, UnicodeError):
1141
+ continue
1142
+
1104
1143
  def readlines_batch(self, batch_size, *, encoding='utf8'):
1105
1144
  """ 将文本行打包,每次返回一个批次多行数据
1106
1145
 
@@ -1118,12 +1157,12 @@ class XlPath(type(pathlib.Path())):
1118
1157
  f = open(self, 'r', encoding=encoding)
1119
1158
  return chunked(f, batch_size)
1120
1159
 
1121
- def write_text(self, data, encoding='utf8', errors=None, newline=None):
1122
- with open(self, 'w', encoding=encoding, errors=errors, newline=newline) as f:
1160
+ def write_text(self, data, encoding='utf8', mode='w', errors=None, newline=None):
1161
+ with open(self, mode, encoding=encoding, errors=errors, newline=newline) as f:
1123
1162
  return f.write(data)
1124
1163
 
1125
- def write_text_unix(self, data, encoding='utf8', errors=None, newline='\n'):
1126
- with open(self, 'w', encoding=encoding, errors=errors, newline=newline) as f:
1164
+ def write_text_unix(self, data, encoding='utf8', mode='w', errors=None, newline='\n'):
1165
+ with open(self, mode, encoding=encoding, errors=errors, newline=newline) as f:
1127
1166
  return f.write(data)
1128
1167
 
1129
1168
  def read_pkl(self):
@@ -1162,40 +1201,62 @@ class XlPath(type(pathlib.Path())):
1162
1201
  json.dump(data, f, **kwargs)
1163
1202
 
1164
1203
  def read_jsonl(self, encoding='utf8', max_items=None, *,
1165
- errors='strict', return_mode: bool = False):
1204
+ errors='strict', return_mode=0, batch_size=None):
1166
1205
  """ 从文件中读取JSONL格式的数据
1167
1206
 
1168
1207
  :param str encoding: 文件编码格式,默认为utf8
1169
1208
  :param str errors: 读取文件时的错误处理方式,默认为strict
1170
1209
  :param bool return_mode: 是否返回文件编码格式,默认为False
1210
+ 0, 读取全量数据返回
1211
+ 1,返回文件编码格式
1171
1212
  :param int max_items: 限制读取的条目数,默认为None,表示读取所有条目
1213
+ :param int batch_size:
1214
+ 默认为None,表示一次性读取所有数据
1215
+ 如果设置了数值,则会流式读取,常用于太大,超过内存大小等的jsonl文件读取
1216
+ 注意如果设置了大小,只是底层每次一批读取的大小,但返回的data仍然是一维的数据格式迭代器
1172
1217
  :return: 返回读取到的数据列表,如果return_mode为True,则同时返回文件编码格式
1173
1218
 
1174
1219
  >> read_jsonl('data.jsonl', max_items=10) # 读取前10条数据
1175
1220
  """
1176
- s, encoding = self.read_text(encoding=encoding, errors=errors, return_mode=True)
1221
+ if batch_size is None:
1222
+ s, encoding = self.read_text(encoding=encoding, errors=errors, return_mode=True)
1223
+
1224
+ data = []
1225
+ # todo 这一步可能不够严谨,不同的操作系统文件格式不同。但使用splitlines也不太好,在数据含有NEL等特殊字符时会多换行。
1226
+ for line in s.split('\n'):
1227
+ if line:
1228
+ try: # 注意,这里可能会有数据读取失败
1229
+ data.append(json.loads(line))
1230
+ except json.decoder.JSONDecodeError:
1231
+ pass
1232
+ # 如果达到了限制的条目数,就停止读取
1233
+ if max_items is not None and len(data) >= max_items:
1234
+ break
1235
+ else:
1236
+ def get_data():
1237
+ for batch in self.yield_line(batch_size=batch_size, encoding=encoding):
1238
+ for line in batch:
1239
+ try: # 注意,这里可能会有数据读取失败
1240
+ yield json.loads(line)
1241
+ except json.decoder.JSONDecodeError:
1242
+ pass
1177
1243
 
1178
- data = []
1179
- # todo 这一步可能不够严谨,不同的操作系统文件格式不同。但使用splitlines也不太好,在数据含有NEL等特殊字符时会多换行。
1180
- for line in s.split('\n'):
1181
- if line:
1182
- try: # 注意,这里可能会有数据读取失败
1183
- data.append(json.loads(line))
1184
- except json.decoder.JSONDecodeError:
1185
- pass
1186
- # 如果达到了限制的条目数,就停止读取
1187
- if max_items is not None and len(data) >= max_items:
1188
- break
1244
+ data = get_data()
1189
1245
 
1190
1246
  if return_mode:
1191
1247
  return data, encoding
1192
1248
  else:
1193
1249
  return data
1194
1250
 
1195
- def write_jsonl(self, list_data, ensure_ascii=False, default=None):
1251
+ def write_jsonl(self, list_data, ensure_ascii=False, default=None, mode='w', errors=None):
1196
1252
  """ 由于这种格式主要是跟商汤这边对接,就尽量跟它们的格式进行兼容 """
1197
1253
  content = '\n'.join([json.dumps(x, ensure_ascii=ensure_ascii, default=default) for x in list_data])
1198
- self.write_text_unix(content + '\n')
1254
+ self.write_text_unix(content + '\n', mode=mode, errors=errors)
1255
+
1256
+ def add_json_line(self, data, ensure_ascii=False, default=None, mode='a'):
1257
+ """ 在文件末尾添加一行JSON数据 """
1258
+ content = json.dumps(data, ensure_ascii=ensure_ascii, default=default)
1259
+ self.write_text_unix(content + '\n', mode=mode)
1199
1260
 
1200
1261
  def read_csv(self, encoding='utf8', *, errors='strict', return_mode: bool = False,
1201
1262
  delimiter=',', quotechar='"', **kwargs):
@@ -1243,6 +1304,7 @@ class XlPath(type(pathlib.Path())):
1243
1304
 
1244
1305
  def write_auto(self, data, *args, if_exists=None, **kwargs):
1245
1306
  """ 根据文件后缀自动识别写入函数 """
1307
+ self.parent.mkdir(exist_ok=True, parents=True)
1246
1308
  mode = self.suffix.lower()[1:]
1247
1309
  write_func = getattr(self, 'write_' + mode, None)
1248
1310
  if self.exist_preprcs(if_exists):
@@ -1410,19 +1472,58 @@ class XlPath(type(pathlib.Path())):
1410
1472
  dst = XlPath(dst)
1411
1473
  if dst.exist_preprcs(if_exists):
1412
1474
  if self.is_file():
1413
- shutil.copy2(self, dst)
1475
+ return shutil.copy2(self, dst)
1414
1476
  else:
1415
- shutil.copytree(self, dst)
1477
+ return shutil.copytree(self, dst)
1416
1478
 
1417
- def move(self, dst, if_exists=None):
1418
- return self.rename2(dst, if_exists)
1479
+ def move(self, dst, *, cross_disk=False, if_exists=None):
1480
+ """
1481
+ :param cross_disk: 是否可能涉及跨磁盘操作
1482
+ """
1483
+ if not self.exists():
1484
+ return self
1419
1485
 
1420
- def rename2(self, dst, if_exists=None):
1421
- """ 相比原版的rename,搞了更多骚操作,但性能也会略微下降,所以重写一个功能名 """
1486
+ if cross_disk: # 显式设置跨磁盘操作
1487
+ dst = self.copy(dst, if_exists=if_exists)
1488
+ self.delete()
1489
+ return dst
1490
+
1491
+ try:
1492
+ dst = XlPath(dst)
1493
+ if self == dst:
1494
+ # 同一个文件,可能是调整了大小写名称
1495
+ if self.as_posix() != dst.as_posix():
1496
+ tmp = self.tempfile(dir=self.parent) # self不一定是file,也可能是dir,但这个名称通用
1497
+ self.rename(tmp)
1498
+ self.delete()
1499
+ tmp.rename(dst)
1500
+ elif dst.exist_preprcs(if_exists):
1501
+ self.rename(dst)
1502
+ except OSError:
1503
+ # 有可能是跨磁盘操作,这个时候就只能先拷贝再删除了
1504
+ dst = self.copy(dst, if_exists=if_exists)
1505
+ self.delete()
1506
+ return dst
1507
+
1508
+ def rename2(self, new_name, if_exists=None):
1509
+ """ 相比原版的rename,搞了更多骚操作,但性能也会略微下降,所以重写一个功能名
1510
+
1511
+ 240416周二12:49,这个接口将真的只做重命名,不做移动!所以将会不再支持dst中出现"/"路径配置
1512
+ """
1422
1513
  if not self.exists():
1423
1514
  return self
1424
1515
 
1425
- dst = XlPath(dst)
1516
+ if not isinstance(new_name, str):
1517
+ raise ValueError(f'rename2只能做重命名操作,目标路径必须是一个str')
1518
+ elif '/' in new_name:
1519
+ raise ValueError(f'rename2只能做重命名操作,目标路径中不能包含"/"')
1520
+ elif '\\' in new_name:
1521
+ raise ValueError(f'rename2只能做重命名操作,目标路径中不能包含"\\"')
1522
+
1523
+ if self.name == new_name: # 没有修改名称,跟原来相同
1524
+ return self
1525
+
1526
+ dst = self.parent / new_name
1426
1527
  if self == dst:
1427
1528
  # 同一个文件,可能是调整了大小写名称
1428
1529
  if self.as_posix() != dst.as_posix():
@@ -1434,6 +1535,14 @@ class XlPath(type(pathlib.Path())):
1434
1535
  self.rename(dst)
1435
1536
  return dst
1436
1537
 
1538
+ def rename_stem(self, stem, if_exists=None):
1539
+ """ 重命名文件的stem """
1540
+ return self.rename2(stem + self.suffix, if_exists)
1541
+
1542
+ def rename_suffix(self, suffix, if_exists=None):
1543
+ """ 重命名文件的suffix """
1544
+ return self.rename2(self.stem + suffix, if_exists)
1545
+
1437
1546
  def delete(self):
1438
1547
  if self.is_file():
1439
1548
  os.remove(self)
@@ -1599,6 +1708,10 @@ class XlPath(type(pathlib.Path())):
1599
1708
 
1600
1709
  ext = '.' + t.extension
1601
1710
  ext0 = file_path.suffix
1711
+ if ext == ext0:
1712
+ continue
1713
+ elif ext == '.xls' and ext0 == '.et':
1714
+ continue
1602
1715
 
1603
1716
  if ext0 in ('.docx', '.xlsx', '.pptx', '.xlsm'):
1604
1717
  ext0 = '.zip'
@@ -1712,7 +1825,7 @@ class XlPath(type(pathlib.Path())):
1712
1825
 
1713
1826
  return file_summary
1714
1827
 
1715
- def _check_dir_summary(self, print_mode=True, hash_func=None, run_mode=99):
1828
+ def _check_dir_summary(self, print_mode=True, hash_func=None, run_mode=31):
1716
1829
  """ 对文件夹情况进行通用的状态检查
1717
1830
 
1718
1831
  :param hash_func: 可以传入自定义的hash函数,用于第四块的重复文件运算
@@ -1729,28 +1842,28 @@ class XlPath(type(pathlib.Path())):
1729
1842
 
1730
1843
  # 一 目录大小,二 各后缀文件大小
1731
1844
  msg = []
1732
- if run_mode >= 1: # 1和2目前是绑定一起运行的
1845
+ if run_mode & 1: # 1和2目前是绑定一起运行的
1733
1846
  printf('【' + self.as_posix() + '】目录检查')
1734
1847
  printf('\n'.join(self.check_size('list')))
1735
1848
 
1736
1849
  # 三 重名文件
1737
- if run_mode >= 3:
1850
+ if run_mode & 2:
1738
1851
  printf('\n三、重名文件(忽略大小写,跨目录检查name重复情况)')
1739
1852
  printf('\n'.join(self.check_repeat_name_files(print_mode=False)))
1740
1853
 
1741
1854
  # 四 重复文件
1742
- if run_mode >= 4:
1855
+ if run_mode & 4:
1743
1856
  printf('\n四、重复文件(etag相同)')
1744
1857
  printf('\n'.join(self.check_repeat_files(print_mode=False, hash_func=hash_func)))
1745
1858
 
1746
1859
  # 五 错误扩展名
1747
- if run_mode >= 5:
1860
+ if run_mode & 8:
1748
1861
  printf('\n五、错误扩展名')
1749
1862
  for i, (f1, suffix2) in enumerate(self.xglob_faker_suffix_files('**/*'), start=1):
1750
1863
  printf(f'{i}、{f1.relpath(self)} -> {suffix2}')
1751
1864
 
1752
1865
  # 六 文件配对
1753
- if run_mode >= 6:
1866
+ if run_mode & 16:
1754
1867
  printf(
1755
1868
  '\n六、文件配对(检查每个目录里stem名称是否配对,列出文件组成不单一的目录结构,请重点检查落单未配对的情况)')
1756
1869
  prompt = False
@@ -2026,6 +2139,48 @@ class XlPath(type(pathlib.Path())):
2026
2139
  return file
2027
2140
 
2028
2141
 
2142
+ class StreamJsonlWriter:
2143
+ """ 流式存储,主要用于存储文本化、jsonl格式数据 """
2144
+
2145
+ def __init__(self, file_path, batch_size=2000, *,
2146
+ delete_origin_file=False, json_default=str):
2147
+ self.file_path = XlPath(file_path)
2148
+ self.cache_text_lines = []
2149
+ self.batch_size = batch_size
2150
+ self.total_lines = 0
2151
+
2152
+ self.delete_origin_file = delete_origin_file
2153
+ self.json_default = json_default
2154
+
2155
+ def append_line(self, line):
2156
+ self.append_lines([line])
2157
+
2158
+ def append_lines(self, data):
2159
+ """
2160
+ :param list data: 添加一组数据
2161
+ """
2162
+ for x in data:
2163
+ if isinstance(x, str):
2164
+ self.cache_text_lines.append(x)
2165
+ else:
2166
+ self.cache_text_lines.append(json.dumps(x, ensure_ascii=False,
2167
+ default=self.json_default))
2168
+ if len(self.cache_text_lines) >= self.batch_size:
2169
+ self.flush()
2170
+
2171
+ def flush(self):
2172
+ """ 刷新,将当前缓存写入文件 """
2173
+ if self.cache_text_lines:
2174
+ if self.total_lines == 0 and self.delete_origin_file: # 第一次写入时,删除旧缓存文件
2175
+ self.file_path.delete()
2176
+
2177
+ self.total_lines += len(self.cache_text_lines)
2178
+ self.file_path.parent.mkdir(exist_ok=True, parents=True)
2179
+ with open(self.file_path, 'a', encoding='utf8') as f:
2180
+ f.write('\n'.join(self.cache_text_lines) + '\n')
2181
+ self.cache_text_lines = []
2182
+
2183
+
2029
2184
  def demo_file():
2030
2185
  """ File类的综合测试"""
2031
2186
  temp = tempfile.gettempdir()
@@ -2159,8 +2314,11 @@ class PathGroups(Groups):
2159
2314
  return ls
2160
2315
 
2161
2316
 
2162
- def cache_file(file, make_data_func: Callable[[], Any] = None, *, reset=False, **kwargs):
2163
- """ 局部函数功能结果缓存
2317
+ def cache_file(file, make_data_func: Callable[[], Any] = None, *,
2318
+ mode='read_first',
2319
+ cache_time=None,
2320
+ **kwargs):
2321
+ """ 能将局部函数功能结果缓存进文件的功能
2164
2322
 
2165
2323
  输入的文件file如果存在则直接读取内容;
2166
2324
  否则用make_data_func生成,并且备份一个file文件
@@ -2168,20 +2326,52 @@ def cache_file(file, make_data_func: Callable[[], Any] = None, *, reset=False, *
2168
2326
  :param file: 需要缓存的文件路径
2169
2327
  :param make_data_func: 如果文件不存在,则需要生成一份,要提供数据生成函数
2170
2328
  cache_file可以当装饰器用,此时不用显式指定该参数
2171
- :param reset: 如果file是否已存在,都用make_data_func强制重置一遍
2329
+ :param mode:
2330
+ read_first(默认): 优先尝试从已有文件读取
2331
+ generate_first: 函数生成优先
2332
+ :param cache_time: 文件缓存时间,单位为秒,默认为None,表示始终使用缓存文件
2333
+ 如果设置60,表示超过60秒后,需要重新优先从函数获得更新内容
2172
2334
  :param kwargs: 可以传递read、write支持的扩展参数
2173
- :return: 从缓存文件直接读取到的数据
2335
+ :return: 读取到的数据
2174
2336
  """
2337
+ from datetime import datetime, timedelta
2338
+ from pyxllib.prog.pupil import format_exception
2175
2339
 
2176
2340
  def decorator(func):
2177
2341
  def wrapper(*args2, **kwargs2):
2342
+
2178
2343
  f = XlPath.init(file, XlPath.tempdir())
2179
- if f.exists() and not reset: # 文件存在,直接读取返回
2180
- data = f.read_auto(**kwargs)
2181
- else: # 文件不存在则要生成一份数据
2344
+ f.parent.mkdir(exist_ok=True, parents=True)
2345
+
2346
+ # 1 优先看是不是需要先从文件读取数据
2347
+ if mode == 'read_first' and f.is_file():
2348
+ if cache_time is None:
2349
+ return f.read_auto(**kwargs)
2350
+
2351
+ current_time = datetime.now()
2352
+ last_modified = datetime.fromtimestamp(f.mtime()) # 获取文件的修改时间
2353
+ if not isinstance(cache_time, timedelta):
2354
+ cache_time2 = timedelta(seconds=cache_time)
2355
+ else:
2356
+ cache_time2 = cache_time
2357
+
2358
+ if cache_time is None or (current_time - last_modified <= cache_time2):
2359
+ return f.read_auto(**kwargs)
2360
+
2361
+ # 2 如果需要重新生成数据,且没有已存在的保底文件
2362
+ if not f.is_file():
2182
2363
  data = func(*args2, **kwargs2)
2183
2364
  f.write_auto(data, **kwargs)
2184
- return data
2365
+ return data
2366
+
2367
+ # 3 需要重新生成,但是有保底文件
2368
+ try:
2369
+ data = func(*args2, **kwargs2)
2370
+ f.write_auto(data, **kwargs)
2371
+ return data
2372
+ except Exception as e:
2373
+ print(format_exception(e))
2374
+ return f.read_auto(**kwargs)
2185
2375
 
2186
2376
  return wrapper
2187
2377
 
@@ -2418,7 +2608,7 @@ def myoswalk(root, filter_rule=None, recur=True):
2418
2608
 
2419
2609
 
2420
2610
  def mygetfiles(root, filter_rule=None, recur=True):
2421
- """对myoswalk进一步封装,返回所有匹配的文件
2611
+ r""" 对myoswalk进一步封装,返回所有匹配的文件
2422
2612
  会递归查找所有子文件
2423
2613
 
2424
2614
  可以这样遍历一个目录下的所有文件:
@@ -2449,34 +2639,55 @@ class DirsFileFinder:
2449
2639
  for d in dirs:
2450
2640
  self.add_dir(d)
2451
2641
 
2452
- def add_dir(self, p):
2642
+ def add_dir(self, p, cvt_name_func=None):
2453
2643
  """ 添加备用检索目录
2454
2644
  当前面的目录找不到匹配项的时候,会使用备用目录的文件
2455
2645
  备用目录可以一直添加,有多个,优先级逐渐降低
2646
+
2647
+ :param cvt_name_func: 对名称做个转换再匹配
2456
2648
  """
2457
2649
  files = list(XlPath(p).rglob_files())
2458
- for f in files:
2459
- self.names[f.name].append(f)
2460
- self.stems[f.stem].append(f)
2650
+ if cvt_name_func:
2651
+ for f in files:
2652
+ self.names[cvt_name_func(f.name)].append(f)
2653
+ self.stems[cvt_name_func(f.stem)].append(f)
2654
+ else:
2655
+ for f in files:
2656
+ self.names[f.name].append(f)
2657
+ self.stems[f.stem].append(f)
2461
2658
 
2462
2659
  def find_name(self, name):
2463
2660
  """ 返回第一个匹配的结果 """
2464
- names = self.find_names(name)
2465
- if names:
2466
- return names[0]
2661
+ files = self.find_names(name)
2662
+ if files:
2663
+ return files[0]
2467
2664
 
2468
2665
  def find_names(self, name):
2469
2666
  """ 返回所有匹配的结果 """
2470
2667
  return self.names[name]
2471
2668
 
2472
2669
  def find_stem(self, stem):
2473
- stems = self.find_stems(stem)
2474
- if stems:
2475
- return stems[0]
2670
+ files = self.find_stems(stem)
2671
+ if files:
2672
+ return files[0]
2476
2673
 
2477
2674
  def find_stems(self, stem):
2478
2675
  return self.stems[stem]
2479
2676
 
2677
+ def find_prefix_name(self, prefix_name, suffix=None):
2678
+ files = self.find_prefix_names(prefix_name, suffix=suffix)
2679
+ if files:
2680
+ return files[0]
2681
+
2682
+ def find_prefix_names(self, prefix_name, suffix=None):
2683
+ """ name中前缀为prefix_name """
2684
+ filess = [files for name, files in self.names.items() if name.startswith(prefix_name)]
2685
+ # 将嵌套的list展平
2686
+ files = [file for files in filess for file in files]
2687
+ if suffix:
2688
+ files = [file for file in files if file.suffix == suffix]
2689
+ return files
2690
+
2480
2691
 
2481
2692
  class TwinDirs:
2482
2693
  def __init__(self, src_dir, dst_dir):