PyPI - pyxllib - Versions diffs - 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl - Mend

pyxllib 0.3.96py3-none-any.whl → 0.3.197py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (306) hide show

pyxllib/algo/geo.py +12 -0
pyxllib/algo/intervals.py +1 -1
pyxllib/algo/matcher.py +78 -0
pyxllib/algo/pupil.py +187 -19
pyxllib/algo/specialist.py +2 -1
pyxllib/algo/stat.py +38 -2
{pyxlpr → pyxllib/autogui}/__init__.py +1 -1
pyxllib/autogui/activewin.py +246 -0
pyxllib/autogui/all.py +9 -0
pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
pyxllib/autogui/uiautolib.py +362 -0
pyxllib/autogui/wechat.py +827 -0
pyxllib/autogui/wechat_msg.py +421 -0
pyxllib/autogui/wxautolib.py +84 -0
pyxllib/cv/slidercaptcha.py +137 -0
pyxllib/data/echarts.py +123 -12
pyxllib/data/jsonlib.py +89 -0
pyxllib/data/pglib.py +514 -30
pyxllib/data/sqlite.py +231 -4
pyxllib/ext/JLineViewer.py +14 -1
pyxllib/ext/drissionlib.py +277 -0
pyxllib/ext/kq5034lib.py +0 -1594
pyxllib/ext/robustprocfile.py +497 -0
pyxllib/ext/unixlib.py +6 -5
pyxllib/ext/utools.py +108 -95
pyxllib/ext/webhook.py +32 -14
pyxllib/ext/wjxlib.py +88 -0
pyxllib/ext/wpsapi.py +124 -0
pyxllib/ext/xlwork.py +9 -0
pyxllib/ext/yuquelib.py +1003 -71
pyxllib/file/docxlib.py +1 -1
pyxllib/file/libreoffice.py +165 -0
pyxllib/file/movielib.py +9 -0
pyxllib/file/packlib/__init__.py +112 -75
pyxllib/file/pdflib.py +1 -1
pyxllib/file/pupil.py +1 -1
pyxllib/file/specialist/dirlib.py +1 -1
pyxllib/file/specialist/download.py +10 -3
pyxllib/file/specialist/filelib.py +266 -55
pyxllib/file/xlsxlib.py +205 -50
pyxllib/file/xlsyncfile.py +341 -0
pyxllib/prog/cachetools.py +64 -0
pyxllib/prog/filelock.py +42 -0
pyxllib/prog/multiprogs.py +940 -0
pyxllib/prog/newbie.py +9 -2
pyxllib/prog/pupil.py +129 -60
pyxllib/prog/specialist/__init__.py +176 -2
pyxllib/prog/specialist/bc.py +5 -2
pyxllib/prog/specialist/browser.py +11 -2
pyxllib/prog/specialist/datetime.py +68 -0
pyxllib/prog/specialist/tictoc.py +12 -13
pyxllib/prog/specialist/xllog.py +5 -5
pyxllib/prog/xlosenv.py +7 -0
pyxllib/text/airscript.js +744 -0
pyxllib/text/charclasslib.py +17 -5
pyxllib/text/jiebalib.py +6 -3
pyxllib/text/jinjalib.py +32 -0
pyxllib/text/jsa_ai_prompt.md +271 -0
pyxllib/text/jscode.py +159 -4
pyxllib/text/nestenv.py +1 -1
pyxllib/text/newbie.py +12 -0
pyxllib/text/pupil/common.py +26 -0
pyxllib/text/specialist/ptag.py +2 -2
pyxllib/text/templates/echart_base.html +11 -0
pyxllib/text/templates/highlight_code.html +17 -0
pyxllib/text/templates/latex_editor.html +103 -0
pyxllib/text/xmllib.py +76 -14
pyxllib/xl.py +2 -1
pyxllib-0.3.197.dist-info/METADATA +48 -0
pyxllib-0.3.197.dist-info/RECORD +126 -0
{pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
pyxllib/ext/autogui/__init__.py +0 -8
pyxllib-0.3.96.dist-info/METADATA +0 -51
pyxllib-0.3.96.dist-info/RECORD +0 -333
pyxllib-0.3.96.dist-info/top_level.txt +0 -2
pyxlpr/ai/__init__.py +0 -5
pyxlpr/ai/clientlib.py +0 -1281
pyxlpr/ai/specialist.py +0 -286
pyxlpr/ai/torch_app.py +0 -172
pyxlpr/ai/xlpaddle.py +0 -655
pyxlpr/ai/xltorch.py +0 -705
pyxlpr/data/__init__.py +0 -11
pyxlpr/data/coco.py +0 -1325
pyxlpr/data/datacls.py +0 -365
pyxlpr/data/datasets.py +0 -200
pyxlpr/data/gptlib.py +0 -1291
pyxlpr/data/icdar/__init__.py +0 -96
pyxlpr/data/icdar/deteval.py +0 -377
pyxlpr/data/icdar/icdar2013.py +0 -341
pyxlpr/data/icdar/iou.py +0 -340
pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
pyxlpr/data/imtextline.py +0 -473
pyxlpr/data/labelme.py +0 -866
pyxlpr/data/removeline.py +0 -179
pyxlpr/data/specialist.py +0 -57
pyxlpr/eval/__init__.py +0 -85
pyxlpr/paddleocr.py +0 -776
pyxlpr/ppocr/__init__.py +0 -15
pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
pyxlpr/ppocr/data/__init__.py +0 -135
pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
pyxlpr/ppocr/data/imaug/__init__.py +0 -67
pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
pyxlpr/ppocr/data/imaug/east_process.py +0 -437
pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
pyxlpr/ppocr/data/imaug/operators.py +0 -433
pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
pyxlpr/ppocr/data/simple_dataset.py +0 -372
pyxlpr/ppocr/losses/__init__.py +0 -61
pyxlpr/ppocr/losses/ace_loss.py +0 -52
pyxlpr/ppocr/losses/basic_loss.py +0 -135
pyxlpr/ppocr/losses/center_loss.py +0 -88
pyxlpr/ppocr/losses/cls_loss.py +0 -30
pyxlpr/ppocr/losses/combined_loss.py +0 -67
pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
pyxlpr/ppocr/losses/det_db_loss.py +0 -80
pyxlpr/ppocr/losses/det_east_loss.py +0 -63
pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
pyxlpr/ppocr/losses/distillation_loss.py +0 -272
pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
pyxlpr/ppocr/losses/table_att_loss.py +0 -109
pyxlpr/ppocr/metrics/__init__.py +0 -44
pyxlpr/ppocr/metrics/cls_metric.py +0 -45
pyxlpr/ppocr/metrics/det_metric.py +0 -82
pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
pyxlpr/ppocr/metrics/kie_metric.py +0 -70
pyxlpr/ppocr/metrics/rec_metric.py +0 -75
pyxlpr/ppocr/metrics/table_metric.py +0 -50
pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
pyxlpr/ppocr/optimizer/__init__.py +0 -61
pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
pyxlpr/ppocr/optimizer/optimizer.py +0 -160
pyxlpr/ppocr/optimizer/regularizer.py +0 -52
pyxlpr/ppocr/postprocess/__init__.py +0 -55
pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
pyxlpr/ppocr/tools/__init__.py +0 -14
pyxlpr/ppocr/tools/eval.py +0 -83
pyxlpr/ppocr/tools/export_center.py +0 -77
pyxlpr/ppocr/tools/export_model.py +0 -129
pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
pyxlpr/ppocr/tools/infer/utility.py +0 -629
pyxlpr/ppocr/tools/infer_cls.py +0 -83
pyxlpr/ppocr/tools/infer_det.py +0 -134
pyxlpr/ppocr/tools/infer_e2e.py +0 -122
pyxlpr/ppocr/tools/infer_kie.py +0 -153
pyxlpr/ppocr/tools/infer_rec.py +0 -146
pyxlpr/ppocr/tools/infer_table.py +0 -107
pyxlpr/ppocr/tools/program.py +0 -596
pyxlpr/ppocr/tools/test_hubserving.py +0 -117
pyxlpr/ppocr/tools/train.py +0 -163
pyxlpr/ppocr/tools/xlprog.py +0 -748
pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
pyxlpr/ppocr/utils/__init__.py +0 -24
pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
pyxlpr/ppocr/utils/dict90.txt +0 -90
pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
pyxlpr/ppocr/utils/en_dict.txt +0 -95
pyxlpr/ppocr/utils/gen_label.py +0 -81
pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
pyxlpr/ppocr/utils/iou.py +0 -54
pyxlpr/ppocr/utils/logging.py +0 -69
pyxlpr/ppocr/utils/network.py +0 -84
pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
pyxlpr/ppocr/utils/profiler.py +0 -110
pyxlpr/ppocr/utils/save_load.py +0 -150
pyxlpr/ppocr/utils/stats.py +0 -72
pyxlpr/ppocr/utils/utility.py +0 -80
pyxlpr/ppstructure/__init__.py +0 -13
pyxlpr/ppstructure/predict_system.py +0 -187
pyxlpr/ppstructure/table/__init__.py +0 -13
pyxlpr/ppstructure/table/eval_table.py +0 -72
pyxlpr/ppstructure/table/matcher.py +0 -192
pyxlpr/ppstructure/table/predict_structure.py +0 -136
pyxlpr/ppstructure/table/predict_table.py +0 -221
pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
pyxlpr/ppstructure/utility.py +0 -71
pyxlpr/xlai.py +0 -10
/pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
{pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0

pyxllib/file/specialist/filelib.py CHANGED Viewed

@@ -57,6 +57,28 @@ def __2_qiniu():
     pass
+class GetEtag:
+    """ 七牛原有etag功能基础上做封装 """
+    @classmethod
+    def from_bytes(cls, _bytes):
+        return qiniu.utils.etag_stream(io.BytesIO(_bytes))
+    @classmethod
+    def from_text(cls, text):
+        _bytes = text.encode('utf8')
+        return qiniu.utils.etag_stream(io.BytesIO(_bytes))
+    @classmethod
+    def from_file(cls, file):
+        return qiniu.etag(file)
+    @classmethod
+    def from_url(cls, url):
+        return cls(requests.get(url).content)
+# @deprecated.deprecated
 def get_etag(arg):
     """ 七牛原有etag功能基础上做封装
@@ -784,7 +806,7 @@ class XlPath(type(pathlib.Path())):
     @classmethod
     def desktop(cls):
-        if os.environ.get('Desktop', None):  # 如果修改了win10默认的桌面路径，需要在环境变量添加一个正确的Desktop路径值
+        if os.getenv('Desktop', None):  # 如果修改了win10默认的桌面路径，需要在环境变量添加一个正确的Desktop路径值
             desktop = os.environ['Desktop']
         else:
             desktop = os.path.join(pathlib.Path.home(), 'Desktop')  # 这个不一定准，桌面是有可能被移到D盘等的
@@ -865,6 +887,7 @@ class XlPath(type(pathlib.Path())):
             return 0
     def mtime(self):
+        """ 文件的修改时间 """
         # windows会带小数，linux使用%Ts只有整数部分。
         # 这里不用四舍五入，取整数部分就是对应的。
         return int(os.stat(self).st_mtime)
@@ -900,7 +923,7 @@ class XlPath(type(pathlib.Path())):
                 res.add(p.relative_to(self).as_posix())
         return res
-    def relpath(self, ref_dir) -> str:
+    def relpath(self, ref_dir) -> 'XlPath':
         r""" 当前路径，相对于ref_dir的路径位置
         >>> File('C:/a/b/c.txt').relpath('C:/a/')
@@ -1067,7 +1090,7 @@ class XlPath(type(pathlib.Path())):
         src_dir = XlPath(src_dir)
         stem = src_dir.name
-        pattern = filename_template.format(stem=stem, index="(\d+)", suffix=".*")
+        pattern = filename_template.format(stem=stem, index=r"(\d+)", suffix=".*")
         files = [file for file in src_dir.iterdir() if re.match(pattern, file.name)]  # 获取目录中符合模式的文件
         self.merge_from_files(files, ignore_empty_lines_between_files=True, encoding=encoding)
@@ -1089,7 +1112,7 @@ class XlPath(type(pathlib.Path())):
             s = str(best_match)
             encoding = best_match.encoding
         else:
-            with open(self, 'r', encoding=encoding) as f:
+            with open(self, 'r', encoding=encoding, errors=errors) as f:
                 s = f.read()
         # 如果用\r\n作为换行符会有一些意外不好处理
@@ -1101,6 +1124,22 @@ class XlPath(type(pathlib.Path())):
         else:
             return s
+    def read_text2(self):
+        """ 智能识别编码的文本读取，这里收集了我见过的一些常见类型 """
+        for encoding in ['utf8',
+                         'gbk',
+                         'gb18030',
+                         'utf_16',
+                         'cp932',  # 日文，Shift-JIS
+                         'Big5',  # 繁体字，Big5
+                         'big5hkscs',  # 繁体字
+                         ]:
+            try:
+                content = self.read_text(encoding=encoding)
+                return content, encoding
+            except (UnicodeDecodeError, UnicodeError):
+                continue
     def readlines_batch(self, batch_size, *, encoding='utf8'):
         """ 将文本行打包，每次返回一个批次多行数据
@@ -1118,12 +1157,12 @@ class XlPath(type(pathlib.Path())):
         f = open(self, 'r', encoding=encoding)
         return chunked(f, batch_size)
-    def write_text(self, data, encoding='utf8', errors=None, newline=None):
-        with open(self, 'w', encoding=encoding, errors=errors, newline=newline) as f:
+    def write_text(self, data, encoding='utf8', mode='w', errors=None, newline=None):
+        with open(self, mode, encoding=encoding, errors=errors, newline=newline) as f:
             return f.write(data)
-    def write_text_unix(self, data, encoding='utf8', errors=None, newline='\n'):
-        with open(self, 'w', encoding=encoding, errors=errors, newline=newline) as f:
+    def write_text_unix(self, data, encoding='utf8', mode='w', errors=None, newline='\n'):
+        with open(self, mode, encoding=encoding, errors=errors, newline=newline) as f:
             return f.write(data)
     def read_pkl(self):
@@ -1162,40 +1201,62 @@ class XlPath(type(pathlib.Path())):
             json.dump(data, f, **kwargs)
     def read_jsonl(self, encoding='utf8', max_items=None, *,
-                   errors='strict', return_mode: bool = False):
+                   errors='strict', return_mode=0, batch_size=None):
         """ 从文件中读取JSONL格式的数据
         :param str encoding: 文件编码格式，默认为utf8
         :param str errors: 读取文件时的错误处理方式，默认为strict
         :param bool return_mode: 是否返回文件编码格式，默认为False
+            0, 读取全量数据返回
+            1，返回文件编码格式
         :param int max_items: 限制读取的条目数，默认为None，表示读取所有条目
+        :param int batch_size:
+            默认为None，表示一次性读取所有数据
+            如果设置了数值，则会流式读取，常用于太大，超过内存大小等的jsonl文件读取
+                注意如果设置了大小，只是底层每次一批读取的大小，但返回的data仍然是一维的数据格式迭代器
         :return: 返回读取到的数据列表，如果return_mode为True，则同时返回文件编码格式
         >> read_jsonl('data.jsonl', max_items=10)  # 读取前10条数据
         """
-        s, encoding = self.read_text(encoding=encoding, errors=errors, return_mode=True)
+        if batch_size is None:
+            s, encoding = self.read_text(encoding=encoding, errors=errors, return_mode=True)
+            data = []
+            # todo 这一步可能不够严谨，不同的操作系统文件格式不同。但使用splitlines也不太好，在数据含有NEL等特殊字符时会多换行。
+            for line in s.split('\n'):
+                if line:
+                    try:  # 注意，这里可能会有数据读取失败
+                        data.append(json.loads(line))
+                    except json.decoder.JSONDecodeError:
+                        pass
+                # 如果达到了限制的条目数，就停止读取
+                if max_items is not None and len(data) >= max_items:
+                    break
+        else:
+            def get_data():
+                for batch in self.yield_line(batch_size=batch_size, encoding=encoding):
+                    for line in batch:
+                        try:  # 注意，这里可能会有数据读取失败
+                            yield json.loads(line)
+                        except json.decoder.JSONDecodeError:
+                            pass
-        data = []
-        # todo 这一步可能不够严谨，不同的操作系统文件格式不同。但使用splitlines也不太好，在数据含有NEL等特殊字符时会多换行。
-        for line in s.split('\n'):
-            if line:
-                try:  # 注意，这里可能会有数据读取失败
-                    data.append(json.loads(line))
-                except json.decoder.JSONDecodeError:
-                    pass
-            # 如果达到了限制的条目数，就停止读取
-            if max_items is not None and len(data) >= max_items:
-                break
+            data = get_data()
         if return_mode:
             return data, encoding
         else:
             return data
-    def write_jsonl(self, list_data, ensure_ascii=False, default=None):
+    def write_jsonl(self, list_data, ensure_ascii=False, default=None, mode='w', errors=None):
         """ 由于这种格式主要是跟商汤这边对接，就尽量跟它们的格式进行兼容 """
         content = '\n'.join([json.dumps(x, ensure_ascii=ensure_ascii, default=default) for x in list_data])
-        self.write_text_unix(content + '\n')
+        self.write_text_unix(content + '\n', mode=mode, errors=errors)
+    def add_json_line(self, data, ensure_ascii=False, default=None, mode='a'):
+        """ 在文件末尾添加一行JSON数据 """
+        content = json.dumps(data, ensure_ascii=ensure_ascii, default=default)
+        self.write_text_unix(content + '\n', mode=mode)
     def read_csv(self, encoding='utf8', *, errors='strict', return_mode: bool = False,
                  delimiter=',', quotechar='"', **kwargs):
@@ -1243,6 +1304,7 @@ class XlPath(type(pathlib.Path())):
     def write_auto(self, data, *args, if_exists=None, **kwargs):
         """ 根据文件后缀自动识别写入函数 """
+        self.parent.mkdir(exist_ok=True, parents=True)
         mode = self.suffix.lower()[1:]
         write_func = getattr(self, 'write_' + mode, None)
         if self.exist_preprcs(if_exists):
@@ -1410,19 +1472,58 @@ class XlPath(type(pathlib.Path())):
         dst = XlPath(dst)
         if dst.exist_preprcs(if_exists):
             if self.is_file():
-                shutil.copy2(self, dst)
+                return shutil.copy2(self, dst)
             else:
-                shutil.copytree(self, dst)
+                return shutil.copytree(self, dst)
-    def move(self, dst, if_exists=None):
-        return self.rename2(dst, if_exists)
+    def move(self, dst, *, cross_disk=False, if_exists=None):
+        """
+        :param cross_disk: 是否可能涉及跨磁盘操作
+        """
+        if not self.exists():
+            return self
-    def rename2(self, dst, if_exists=None):
-        """ 相比原版的rename，搞了更多骚操作，但性能也会略微下降，所以重写一个功能名 """
+        if cross_disk:  # 显式设置跨磁盘操作
+            dst = self.copy(dst, if_exists=if_exists)
+            self.delete()
+            return dst
+        try:
+            dst = XlPath(dst)
+            if self == dst:
+                # 同一个文件，可能是调整了大小写名称
+                if self.as_posix() != dst.as_posix():
+                    tmp = self.tempfile(dir=self.parent)  # self不一定是file，也可能是dir，但这个名称通用
+                    self.rename(tmp)
+                    self.delete()
+                    tmp.rename(dst)
+            elif dst.exist_preprcs(if_exists):
+                self.rename(dst)
+        except OSError:
+            # 有可能是跨磁盘操作，这个时候就只能先拷贝再删除了
+            dst = self.copy(dst, if_exists=if_exists)
+            self.delete()
+        return dst
+    def rename2(self, new_name, if_exists=None):
+        """ 相比原版的rename，搞了更多骚操作，但性能也会略微下降，所以重写一个功能名
+        240416周二12:49，这个接口将真的只做重命名，不做移动！所以将会不再支持dst中出现"/"路径配置
+        """
         if not self.exists():
             return self
-        dst = XlPath(dst)
+        if not isinstance(new_name, str):
+            raise ValueError(f'rename2只能做重命名操作，目标路径必须是一个str')
+        elif '/' in new_name:
+            raise ValueError(f'rename2只能做重命名操作，目标路径中不能包含"/"')
+        elif '\\' in new_name:
+            raise ValueError(f'rename2只能做重命名操作，目标路径中不能包含"\\"')
+        if self.name == new_name:  # 没有修改名称，跟原来相同
+            return self
+        dst = self.parent / new_name
         if self == dst:
             # 同一个文件，可能是调整了大小写名称
             if self.as_posix() != dst.as_posix():
@@ -1434,6 +1535,14 @@ class XlPath(type(pathlib.Path())):
             self.rename(dst)
         return dst
+    def rename_stem(self, stem, if_exists=None):
+        """ 重命名文件的stem """
+        return self.rename2(stem + self.suffix, if_exists)
+    def rename_suffix(self, suffix, if_exists=None):
+        """ 重命名文件的suffix """
+        return self.rename2(self.stem + suffix, if_exists)
     def delete(self):
         if self.is_file():
             os.remove(self)
@@ -1599,6 +1708,10 @@ class XlPath(type(pathlib.Path())):
             ext = '.' + t.extension
             ext0 = file_path.suffix
+            if ext == ext0:
+                continue
+            elif ext == '.xls' and ext0 == '.et':
+                continue
             if ext0 in ('.docx', '.xlsx', '.pptx', '.xlsm'):
                 ext0 = '.zip'
@@ -1712,7 +1825,7 @@ class XlPath(type(pathlib.Path())):
         return file_summary
-    def _check_dir_summary(self, print_mode=True, hash_func=None, run_mode=99):
+    def _check_dir_summary(self, print_mode=True, hash_func=None, run_mode=31):
         """ 对文件夹情况进行通用的状态检查
         :param hash_func: 可以传入自定义的hash函数，用于第四块的重复文件运算
@@ -1729,28 +1842,28 @@ class XlPath(type(pathlib.Path())):
         # 一 目录大小，二 各后缀文件大小
         msg = []
-        if run_mode >= 1:  # 1和2目前是绑定一起运行的
+        if run_mode & 1:  # 1和2目前是绑定一起运行的
             printf('【' + self.as_posix() + '】目录检查')
             printf('\n'.join(self.check_size('list')))
         # 三 重名文件
-        if run_mode >= 3:
+        if run_mode & 2:
             printf('\n三、重名文件（忽略大小写，跨目录检查name重复情况）')
             printf('\n'.join(self.check_repeat_name_files(print_mode=False)))
         # 四 重复文件
-        if run_mode >= 4:
+        if run_mode & 4:
             printf('\n四、重复文件（etag相同）')
             printf('\n'.join(self.check_repeat_files(print_mode=False, hash_func=hash_func)))
         # 五 错误扩展名
-        if run_mode >= 5:
+        if run_mode & 8:
             printf('\n五、错误扩展名')
             for i, (f1, suffix2) in enumerate(self.xglob_faker_suffix_files('**/*'), start=1):
                 printf(f'{i}、{f1.relpath(self)} -> {suffix2}')
         # 六 文件配对
-        if run_mode >= 6:
+        if run_mode & 16:
             printf(
                 '\n六、文件配对（检查每个目录里stem名称是否配对，列出文件组成不单一的目录结构，请重点检查落单未配对的情况）')
             prompt = False
@@ -2026,6 +2139,48 @@ class XlPath(type(pathlib.Path())):
         return file
+class StreamJsonlWriter:
+    """ 流式存储，主要用于存储文本化、jsonl格式数据 """
+    def __init__(self, file_path, batch_size=2000, *,
+                 delete_origin_file=False, json_default=str):
+        self.file_path = XlPath(file_path)
+        self.cache_text_lines = []
+        self.batch_size = batch_size
+        self.total_lines = 0
+        self.delete_origin_file = delete_origin_file
+        self.json_default = json_default
+    def append_line(self, line):
+        self.append_lines([line])
+    def append_lines(self, data):
+        """
+        :param list data: 添加一组数据
+        """
+        for x in data:
+            if isinstance(x, str):
+                self.cache_text_lines.append(x)
+            else:
+                self.cache_text_lines.append(json.dumps(x, ensure_ascii=False,
+                                                        default=self.json_default))
+        if len(self.cache_text_lines) >= self.batch_size:
+            self.flush()
+    def flush(self):
+        """ 刷新，将当前缓存写入文件 """
+        if self.cache_text_lines:
+            if self.total_lines == 0 and self.delete_origin_file:  # 第一次写入时，删除旧缓存文件
+                self.file_path.delete()
+            self.total_lines += len(self.cache_text_lines)
+            self.file_path.parent.mkdir(exist_ok=True, parents=True)
+            with open(self.file_path, 'a', encoding='utf8') as f:
+                f.write('\n'.join(self.cache_text_lines) + '\n')
+            self.cache_text_lines = []
 def demo_file():
     """ File类的综合测试"""
     temp = tempfile.gettempdir()
@@ -2159,8 +2314,11 @@ class PathGroups(Groups):
         return ls
-def cache_file(file, make_data_func: Callable[[], Any] = None, *, reset=False, **kwargs):
-    """ 局部函数功能结果缓存
+def cache_file(file, make_data_func: Callable[[], Any] = None, *,
+               mode='read_first',
+               cache_time=None,
+               **kwargs):
+    """ 能将局部函数功能结果缓存进文件的功能
     输入的文件file如果存在则直接读取内容；
     否则用make_data_func生成，并且备份一个file文件
@@ -2168,20 +2326,52 @@ def cache_file(file, make_data_func: Callable[[], Any] = None, *, reset=False, *
     :param file: 需要缓存的文件路径
     :param make_data_func: 如果文件不存在，则需要生成一份，要提供数据生成函数
         cache_file可以当装饰器用，此时不用显式指定该参数
-    :param reset: 如果file是否已存在，都用make_data_func强制重置一遍
+    :param mode:
+        read_first（默认）: 优先尝试从已有文件读取
+        generate_first: 函数生成优先
+    :param cache_time: 文件缓存时间，单位为秒，默认为None，表示始终使用缓存文件
+        如果设置60，表示超过60秒后，需要重新优先从函数获得更新内容
     :param kwargs: 可以传递read、write支持的扩展参数
-    :return: 从缓存文件直接读取到的数据
+    :return: 读取到的数据
     """
+    from datetime import datetime, timedelta
+    from pyxllib.prog.pupil import format_exception
     def decorator(func):
         def wrapper(*args2, **kwargs2):
             f = XlPath.init(file, XlPath.tempdir())
-            if f.exists() and not reset:  # 文件存在，直接读取返回
-                data = f.read_auto(**kwargs)
-            else:  # 文件不存在则要生成一份数据
+            f.parent.mkdir(exist_ok=True, parents=True)
+            # 1 优先看是不是需要先从文件读取数据
+            if mode == 'read_first' and f.is_file():
+                if cache_time is None:
+                    return f.read_auto(**kwargs)
+                current_time = datetime.now()
+                last_modified = datetime.fromtimestamp(f.mtime())  # 获取文件的修改时间
+                if not isinstance(cache_time, timedelta):
+                    cache_time2 = timedelta(seconds=cache_time)
+                else:
+                    cache_time2 = cache_time
+                if cache_time is None or (current_time - last_modified <= cache_time2):
+                    return f.read_auto(**kwargs)
+            # 2 如果需要重新生成数据，且没有已存在的保底文件
+            if not f.is_file():
                 data = func(*args2, **kwargs2)
                 f.write_auto(data, **kwargs)
-            return data
+                return data
+            # 3 需要重新生成，但是有保底文件
+            try:
+                data = func(*args2, **kwargs2)
+                f.write_auto(data, **kwargs)
+                return data
+            except Exception as e:
+                print(format_exception(e))
+                return f.read_auto(**kwargs)
         return wrapper
@@ -2418,7 +2608,7 @@ def myoswalk(root, filter_rule=None, recur=True):
 def mygetfiles(root, filter_rule=None, recur=True):
-    """对myoswalk进一步封装，返回所有匹配的文件
+    r""" 对myoswalk进一步封装，返回所有匹配的文件
     会递归查找所有子文件
     可以这样遍历一个目录下的所有文件：
@@ -2449,34 +2639,55 @@ class DirsFileFinder:
         for d in dirs:
             self.add_dir(d)
-    def add_dir(self, p):
+    def add_dir(self, p, cvt_name_func=None):
         """ 添加备用检索目录
         当前面的目录找不到匹配项的时候，会使用备用目录的文件
         备用目录可以一直添加，有多个，优先级逐渐降低
+        :param cvt_name_func: 对名称做个转换再匹配
         """
         files = list(XlPath(p).rglob_files())
-        for f in files:
-            self.names[f.name].append(f)
-            self.stems[f.stem].append(f)
+        if cvt_name_func:
+            for f in files:
+                self.names[cvt_name_func(f.name)].append(f)
+                self.stems[cvt_name_func(f.stem)].append(f)
+        else:
+            for f in files:
+                self.names[f.name].append(f)
+                self.stems[f.stem].append(f)
     def find_name(self, name):
         """ 返回第一个匹配的结果 """
-        names = self.find_names(name)
-        if names:
-            return names[0]
+        files = self.find_names(name)
+        if files:
+            return files[0]
     def find_names(self, name):
         """ 返回所有匹配的结果 """
         return self.names[name]
     def find_stem(self, stem):
-        stems = self.find_stems(stem)
-        if stems:
-            return stems[0]
+        files = self.find_stems(stem)
+        if files:
+            return files[0]
     def find_stems(self, stem):
         return self.stems[stem]
+    def find_prefix_name(self, prefix_name, suffix=None):
+        files = self.find_prefix_names(prefix_name, suffix=suffix)
+        if files:
+            return files[0]
+    def find_prefix_names(self, prefix_name, suffix=None):
+        """ name中前缀为prefix_name """
+        filess = [files for name, files in self.names.items() if name.startswith(prefix_name)]
+        # 将嵌套的list展平
+        files = [file for files in filess for file in files]
+        if suffix:
+            files = [file for file in files if file.suffix == suffix]
+        return files
 class TwinDirs:
     def __init__(self, src_dir, dst_dir):

pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl

pyxllib 0.3.96py3-none-any.whl → 0.3.197py3-none-any.whl