pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/algo/geo.py +12 -0
- pyxllib/algo/intervals.py +1 -1
- pyxllib/algo/matcher.py +78 -0
- pyxllib/algo/pupil.py +187 -19
- pyxllib/algo/specialist.py +2 -1
- pyxllib/algo/stat.py +38 -2
- {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/data/echarts.py +123 -12
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/data/pglib.py +514 -30
- pyxllib/data/sqlite.py +231 -4
- pyxllib/ext/JLineViewer.py +14 -1
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +0 -1594
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/unixlib.py +6 -5
- pyxllib/ext/utools.py +108 -95
- pyxllib/ext/webhook.py +32 -14
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1003 -71
- pyxllib/file/docxlib.py +1 -1
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +9 -0
- pyxllib/file/packlib/__init__.py +112 -75
- pyxllib/file/pdflib.py +1 -1
- pyxllib/file/pupil.py +1 -1
- pyxllib/file/specialist/dirlib.py +1 -1
- pyxllib/file/specialist/download.py +10 -3
- pyxllib/file/specialist/filelib.py +266 -55
- pyxllib/file/xlsxlib.py +205 -50
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +9 -2
- pyxllib/prog/pupil.py +129 -60
- pyxllib/prog/specialist/__init__.py +176 -2
- pyxllib/prog/specialist/bc.py +5 -2
- pyxllib/prog/specialist/browser.py +11 -2
- pyxllib/prog/specialist/datetime.py +68 -0
- pyxllib/prog/specialist/tictoc.py +12 -13
- pyxllib/prog/specialist/xllog.py +5 -5
- pyxllib/prog/xlosenv.py +7 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +17 -5
- pyxllib/text/jiebalib.py +6 -3
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +159 -4
- pyxllib/text/nestenv.py +1 -1
- pyxllib/text/newbie.py +12 -0
- pyxllib/text/pupil/common.py +26 -0
- pyxllib/text/specialist/ptag.py +2 -2
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/xmllib.py +76 -14
- pyxllib/xl.py +2 -1
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
- pyxllib/ext/autogui/__init__.py +0 -8
- pyxllib-0.3.96.dist-info/METADATA +0 -51
- pyxllib-0.3.96.dist-info/RECORD +0 -333
- pyxllib-0.3.96.dist-info/top_level.txt +0 -2
- pyxlpr/ai/__init__.py +0 -5
- pyxlpr/ai/clientlib.py +0 -1281
- pyxlpr/ai/specialist.py +0 -286
- pyxlpr/ai/torch_app.py +0 -172
- pyxlpr/ai/xlpaddle.py +0 -655
- pyxlpr/ai/xltorch.py +0 -705
- pyxlpr/data/__init__.py +0 -11
- pyxlpr/data/coco.py +0 -1325
- pyxlpr/data/datacls.py +0 -365
- pyxlpr/data/datasets.py +0 -200
- pyxlpr/data/gptlib.py +0 -1291
- pyxlpr/data/icdar/__init__.py +0 -96
- pyxlpr/data/icdar/deteval.py +0 -377
- pyxlpr/data/icdar/icdar2013.py +0 -341
- pyxlpr/data/icdar/iou.py +0 -340
- pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
- pyxlpr/data/imtextline.py +0 -473
- pyxlpr/data/labelme.py +0 -866
- pyxlpr/data/removeline.py +0 -179
- pyxlpr/data/specialist.py +0 -57
- pyxlpr/eval/__init__.py +0 -85
- pyxlpr/paddleocr.py +0 -776
- pyxlpr/ppocr/__init__.py +0 -15
- pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
- pyxlpr/ppocr/data/__init__.py +0 -135
- pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
- pyxlpr/ppocr/data/imaug/__init__.py +0 -67
- pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
- pyxlpr/ppocr/data/imaug/east_process.py +0 -437
- pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
- pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
- pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
- pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
- pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
- pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
- pyxlpr/ppocr/data/imaug/operators.py +0 -433
- pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
- pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
- pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
- pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
- pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
- pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
- pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
- pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
- pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
- pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
- pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
- pyxlpr/ppocr/data/simple_dataset.py +0 -372
- pyxlpr/ppocr/losses/__init__.py +0 -61
- pyxlpr/ppocr/losses/ace_loss.py +0 -52
- pyxlpr/ppocr/losses/basic_loss.py +0 -135
- pyxlpr/ppocr/losses/center_loss.py +0 -88
- pyxlpr/ppocr/losses/cls_loss.py +0 -30
- pyxlpr/ppocr/losses/combined_loss.py +0 -67
- pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
- pyxlpr/ppocr/losses/det_db_loss.py +0 -80
- pyxlpr/ppocr/losses/det_east_loss.py +0 -63
- pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
- pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
- pyxlpr/ppocr/losses/distillation_loss.py +0 -272
- pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
- pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
- pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
- pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
- pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
- pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
- pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
- pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
- pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
- pyxlpr/ppocr/losses/table_att_loss.py +0 -109
- pyxlpr/ppocr/metrics/__init__.py +0 -44
- pyxlpr/ppocr/metrics/cls_metric.py +0 -45
- pyxlpr/ppocr/metrics/det_metric.py +0 -82
- pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
- pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
- pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
- pyxlpr/ppocr/metrics/kie_metric.py +0 -70
- pyxlpr/ppocr/metrics/rec_metric.py +0 -75
- pyxlpr/ppocr/metrics/table_metric.py +0 -50
- pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
- pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
- pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
- pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
- pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
- pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
- pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
- pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
- pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
- pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
- pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
- pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
- pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
- pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
- pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
- pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
- pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
- pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
- pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
- pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
- pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
- pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
- pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
- pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
- pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
- pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
- pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
- pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
- pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
- pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
- pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
- pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
- pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
- pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
- pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
- pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
- pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
- pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
- pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
- pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
- pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
- pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
- pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
- pyxlpr/ppocr/optimizer/__init__.py +0 -61
- pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
- pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
- pyxlpr/ppocr/optimizer/optimizer.py +0 -160
- pyxlpr/ppocr/optimizer/regularizer.py +0 -52
- pyxlpr/ppocr/postprocess/__init__.py +0 -55
- pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
- pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
- pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
- pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
- pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
- pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
- pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
- pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
- pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
- pyxlpr/ppocr/tools/__init__.py +0 -14
- pyxlpr/ppocr/tools/eval.py +0 -83
- pyxlpr/ppocr/tools/export_center.py +0 -77
- pyxlpr/ppocr/tools/export_model.py +0 -129
- pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
- pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
- pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
- pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
- pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
- pyxlpr/ppocr/tools/infer/utility.py +0 -629
- pyxlpr/ppocr/tools/infer_cls.py +0 -83
- pyxlpr/ppocr/tools/infer_det.py +0 -134
- pyxlpr/ppocr/tools/infer_e2e.py +0 -122
- pyxlpr/ppocr/tools/infer_kie.py +0 -153
- pyxlpr/ppocr/tools/infer_rec.py +0 -146
- pyxlpr/ppocr/tools/infer_table.py +0 -107
- pyxlpr/ppocr/tools/program.py +0 -596
- pyxlpr/ppocr/tools/test_hubserving.py +0 -117
- pyxlpr/ppocr/tools/train.py +0 -163
- pyxlpr/ppocr/tools/xlprog.py +0 -748
- pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
- pyxlpr/ppocr/utils/__init__.py +0 -24
- pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
- pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
- pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
- pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
- pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
- pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
- pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
- pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
- pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
- pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
- pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
- pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
- pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
- pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
- pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
- pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
- pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
- pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
- pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
- pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
- pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
- pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
- pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
- pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
- pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
- pyxlpr/ppocr/utils/dict90.txt +0 -90
- pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
- pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
- pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
- pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
- pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
- pyxlpr/ppocr/utils/en_dict.txt +0 -95
- pyxlpr/ppocr/utils/gen_label.py +0 -81
- pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
- pyxlpr/ppocr/utils/iou.py +0 -54
- pyxlpr/ppocr/utils/logging.py +0 -69
- pyxlpr/ppocr/utils/network.py +0 -84
- pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
- pyxlpr/ppocr/utils/profiler.py +0 -110
- pyxlpr/ppocr/utils/save_load.py +0 -150
- pyxlpr/ppocr/utils/stats.py +0 -72
- pyxlpr/ppocr/utils/utility.py +0 -80
- pyxlpr/ppstructure/__init__.py +0 -13
- pyxlpr/ppstructure/predict_system.py +0 -187
- pyxlpr/ppstructure/table/__init__.py +0 -13
- pyxlpr/ppstructure/table/eval_table.py +0 -72
- pyxlpr/ppstructure/table/matcher.py +0 -192
- pyxlpr/ppstructure/table/predict_structure.py +0 -136
- pyxlpr/ppstructure/table/predict_table.py +0 -221
- pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
- pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
- pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
- pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
- pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
- pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
- pyxlpr/ppstructure/utility.py +0 -71
- pyxlpr/xlai.py +0 -10
- /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
@@ -57,6 +57,28 @@ def __2_qiniu():
|
|
57
57
|
pass
|
58
58
|
|
59
59
|
|
60
|
+
class GetEtag:
|
61
|
+
""" 七牛原有etag功能基础上做封装 """
|
62
|
+
|
63
|
+
@classmethod
|
64
|
+
def from_bytes(cls, _bytes):
|
65
|
+
return qiniu.utils.etag_stream(io.BytesIO(_bytes))
|
66
|
+
|
67
|
+
@classmethod
|
68
|
+
def from_text(cls, text):
|
69
|
+
_bytes = text.encode('utf8')
|
70
|
+
return qiniu.utils.etag_stream(io.BytesIO(_bytes))
|
71
|
+
|
72
|
+
@classmethod
|
73
|
+
def from_file(cls, file):
|
74
|
+
return qiniu.etag(file)
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def from_url(cls, url):
|
78
|
+
return cls(requests.get(url).content)
|
79
|
+
|
80
|
+
|
81
|
+
# @deprecated.deprecated
|
60
82
|
def get_etag(arg):
|
61
83
|
""" 七牛原有etag功能基础上做封装
|
62
84
|
|
@@ -784,7 +806,7 @@ class XlPath(type(pathlib.Path())):
|
|
784
806
|
|
785
807
|
@classmethod
|
786
808
|
def desktop(cls):
|
787
|
-
if os.
|
809
|
+
if os.getenv('Desktop', None): # 如果修改了win10默认的桌面路径,需要在环境变量添加一个正确的Desktop路径值
|
788
810
|
desktop = os.environ['Desktop']
|
789
811
|
else:
|
790
812
|
desktop = os.path.join(pathlib.Path.home(), 'Desktop') # 这个不一定准,桌面是有可能被移到D盘等的
|
@@ -865,6 +887,7 @@ class XlPath(type(pathlib.Path())):
|
|
865
887
|
return 0
|
866
888
|
|
867
889
|
def mtime(self):
|
890
|
+
""" 文件的修改时间 """
|
868
891
|
# windows会带小数,linux使用%Ts只有整数部分。
|
869
892
|
# 这里不用四舍五入,取整数部分就是对应的。
|
870
893
|
return int(os.stat(self).st_mtime)
|
@@ -900,7 +923,7 @@ class XlPath(type(pathlib.Path())):
|
|
900
923
|
res.add(p.relative_to(self).as_posix())
|
901
924
|
return res
|
902
925
|
|
903
|
-
def relpath(self, ref_dir) ->
|
926
|
+
def relpath(self, ref_dir) -> 'XlPath':
|
904
927
|
r""" 当前路径,相对于ref_dir的路径位置
|
905
928
|
|
906
929
|
>>> File('C:/a/b/c.txt').relpath('C:/a/')
|
@@ -1067,7 +1090,7 @@ class XlPath(type(pathlib.Path())):
|
|
1067
1090
|
src_dir = XlPath(src_dir)
|
1068
1091
|
stem = src_dir.name
|
1069
1092
|
|
1070
|
-
pattern = filename_template.format(stem=stem, index="(\d+)", suffix=".*")
|
1093
|
+
pattern = filename_template.format(stem=stem, index=r"(\d+)", suffix=".*")
|
1071
1094
|
files = [file for file in src_dir.iterdir() if re.match(pattern, file.name)] # 获取目录中符合模式的文件
|
1072
1095
|
|
1073
1096
|
self.merge_from_files(files, ignore_empty_lines_between_files=True, encoding=encoding)
|
@@ -1089,7 +1112,7 @@ class XlPath(type(pathlib.Path())):
|
|
1089
1112
|
s = str(best_match)
|
1090
1113
|
encoding = best_match.encoding
|
1091
1114
|
else:
|
1092
|
-
with open(self, 'r', encoding=encoding) as f:
|
1115
|
+
with open(self, 'r', encoding=encoding, errors=errors) as f:
|
1093
1116
|
s = f.read()
|
1094
1117
|
|
1095
1118
|
# 如果用\r\n作为换行符会有一些意外不好处理
|
@@ -1101,6 +1124,22 @@ class XlPath(type(pathlib.Path())):
|
|
1101
1124
|
else:
|
1102
1125
|
return s
|
1103
1126
|
|
1127
|
+
def read_text2(self):
|
1128
|
+
""" 智能识别编码的文本读取,这里收集了我见过的一些常见类型 """
|
1129
|
+
for encoding in ['utf8',
|
1130
|
+
'gbk',
|
1131
|
+
'gb18030',
|
1132
|
+
'utf_16',
|
1133
|
+
'cp932', # 日文,Shift-JIS
|
1134
|
+
'Big5', # 繁体字,Big5
|
1135
|
+
'big5hkscs', # 繁体字
|
1136
|
+
]:
|
1137
|
+
try:
|
1138
|
+
content = self.read_text(encoding=encoding)
|
1139
|
+
return content, encoding
|
1140
|
+
except (UnicodeDecodeError, UnicodeError):
|
1141
|
+
continue
|
1142
|
+
|
1104
1143
|
def readlines_batch(self, batch_size, *, encoding='utf8'):
|
1105
1144
|
""" 将文本行打包,每次返回一个批次多行数据
|
1106
1145
|
|
@@ -1118,12 +1157,12 @@ class XlPath(type(pathlib.Path())):
|
|
1118
1157
|
f = open(self, 'r', encoding=encoding)
|
1119
1158
|
return chunked(f, batch_size)
|
1120
1159
|
|
1121
|
-
def write_text(self, data, encoding='utf8', errors=None, newline=None):
|
1122
|
-
with open(self,
|
1160
|
+
def write_text(self, data, encoding='utf8', mode='w', errors=None, newline=None):
|
1161
|
+
with open(self, mode, encoding=encoding, errors=errors, newline=newline) as f:
|
1123
1162
|
return f.write(data)
|
1124
1163
|
|
1125
|
-
def write_text_unix(self, data, encoding='utf8', errors=None, newline='\n'):
|
1126
|
-
with open(self,
|
1164
|
+
def write_text_unix(self, data, encoding='utf8', mode='w', errors=None, newline='\n'):
|
1165
|
+
with open(self, mode, encoding=encoding, errors=errors, newline=newline) as f:
|
1127
1166
|
return f.write(data)
|
1128
1167
|
|
1129
1168
|
def read_pkl(self):
|
@@ -1162,40 +1201,62 @@ class XlPath(type(pathlib.Path())):
|
|
1162
1201
|
json.dump(data, f, **kwargs)
|
1163
1202
|
|
1164
1203
|
def read_jsonl(self, encoding='utf8', max_items=None, *,
|
1165
|
-
errors='strict', return_mode
|
1204
|
+
errors='strict', return_mode=0, batch_size=None):
|
1166
1205
|
""" 从文件中读取JSONL格式的数据
|
1167
1206
|
|
1168
1207
|
:param str encoding: 文件编码格式,默认为utf8
|
1169
1208
|
:param str errors: 读取文件时的错误处理方式,默认为strict
|
1170
1209
|
:param bool return_mode: 是否返回文件编码格式,默认为False
|
1210
|
+
0, 读取全量数据返回
|
1211
|
+
1,返回文件编码格式
|
1171
1212
|
:param int max_items: 限制读取的条目数,默认为None,表示读取所有条目
|
1213
|
+
:param int batch_size:
|
1214
|
+
默认为None,表示一次性读取所有数据
|
1215
|
+
如果设置了数值,则会流式读取,常用于太大,超过内存大小等的jsonl文件读取
|
1216
|
+
注意如果设置了大小,只是底层每次一批读取的大小,但返回的data仍然是一维的数据格式迭代器
|
1172
1217
|
:return: 返回读取到的数据列表,如果return_mode为True,则同时返回文件编码格式
|
1173
1218
|
|
1174
1219
|
>> read_jsonl('data.jsonl', max_items=10) # 读取前10条数据
|
1175
1220
|
"""
|
1176
|
-
|
1221
|
+
if batch_size is None:
|
1222
|
+
s, encoding = self.read_text(encoding=encoding, errors=errors, return_mode=True)
|
1223
|
+
|
1224
|
+
data = []
|
1225
|
+
# todo 这一步可能不够严谨,不同的操作系统文件格式不同。但使用splitlines也不太好,在数据含有NEL等特殊字符时会多换行。
|
1226
|
+
for line in s.split('\n'):
|
1227
|
+
if line:
|
1228
|
+
try: # 注意,这里可能会有数据读取失败
|
1229
|
+
data.append(json.loads(line))
|
1230
|
+
except json.decoder.JSONDecodeError:
|
1231
|
+
pass
|
1232
|
+
# 如果达到了限制的条目数,就停止读取
|
1233
|
+
if max_items is not None and len(data) >= max_items:
|
1234
|
+
break
|
1235
|
+
else:
|
1236
|
+
def get_data():
|
1237
|
+
for batch in self.yield_line(batch_size=batch_size, encoding=encoding):
|
1238
|
+
for line in batch:
|
1239
|
+
try: # 注意,这里可能会有数据读取失败
|
1240
|
+
yield json.loads(line)
|
1241
|
+
except json.decoder.JSONDecodeError:
|
1242
|
+
pass
|
1177
1243
|
|
1178
|
-
|
1179
|
-
# todo 这一步可能不够严谨,不同的操作系统文件格式不同。但使用splitlines也不太好,在数据含有NEL等特殊字符时会多换行。
|
1180
|
-
for line in s.split('\n'):
|
1181
|
-
if line:
|
1182
|
-
try: # 注意,这里可能会有数据读取失败
|
1183
|
-
data.append(json.loads(line))
|
1184
|
-
except json.decoder.JSONDecodeError:
|
1185
|
-
pass
|
1186
|
-
# 如果达到了限制的条目数,就停止读取
|
1187
|
-
if max_items is not None and len(data) >= max_items:
|
1188
|
-
break
|
1244
|
+
data = get_data()
|
1189
1245
|
|
1190
1246
|
if return_mode:
|
1191
1247
|
return data, encoding
|
1192
1248
|
else:
|
1193
1249
|
return data
|
1194
1250
|
|
1195
|
-
def write_jsonl(self, list_data, ensure_ascii=False, default=None):
|
1251
|
+
def write_jsonl(self, list_data, ensure_ascii=False, default=None, mode='w', errors=None):
|
1196
1252
|
""" 由于这种格式主要是跟商汤这边对接,就尽量跟它们的格式进行兼容 """
|
1197
1253
|
content = '\n'.join([json.dumps(x, ensure_ascii=ensure_ascii, default=default) for x in list_data])
|
1198
|
-
self.write_text_unix(content + '\n')
|
1254
|
+
self.write_text_unix(content + '\n', mode=mode, errors=errors)
|
1255
|
+
|
1256
|
+
def add_json_line(self, data, ensure_ascii=False, default=None, mode='a'):
|
1257
|
+
""" 在文件末尾添加一行JSON数据 """
|
1258
|
+
content = json.dumps(data, ensure_ascii=ensure_ascii, default=default)
|
1259
|
+
self.write_text_unix(content + '\n', mode=mode)
|
1199
1260
|
|
1200
1261
|
def read_csv(self, encoding='utf8', *, errors='strict', return_mode: bool = False,
|
1201
1262
|
delimiter=',', quotechar='"', **kwargs):
|
@@ -1243,6 +1304,7 @@ class XlPath(type(pathlib.Path())):
|
|
1243
1304
|
|
1244
1305
|
def write_auto(self, data, *args, if_exists=None, **kwargs):
|
1245
1306
|
""" 根据文件后缀自动识别写入函数 """
|
1307
|
+
self.parent.mkdir(exist_ok=True, parents=True)
|
1246
1308
|
mode = self.suffix.lower()[1:]
|
1247
1309
|
write_func = getattr(self, 'write_' + mode, None)
|
1248
1310
|
if self.exist_preprcs(if_exists):
|
@@ -1410,19 +1472,58 @@ class XlPath(type(pathlib.Path())):
|
|
1410
1472
|
dst = XlPath(dst)
|
1411
1473
|
if dst.exist_preprcs(if_exists):
|
1412
1474
|
if self.is_file():
|
1413
|
-
shutil.copy2(self, dst)
|
1475
|
+
return shutil.copy2(self, dst)
|
1414
1476
|
else:
|
1415
|
-
shutil.copytree(self, dst)
|
1477
|
+
return shutil.copytree(self, dst)
|
1416
1478
|
|
1417
|
-
def move(self, dst, if_exists=None):
|
1418
|
-
|
1479
|
+
def move(self, dst, *, cross_disk=False, if_exists=None):
|
1480
|
+
"""
|
1481
|
+
:param cross_disk: 是否可能涉及跨磁盘操作
|
1482
|
+
"""
|
1483
|
+
if not self.exists():
|
1484
|
+
return self
|
1419
1485
|
|
1420
|
-
|
1421
|
-
|
1486
|
+
if cross_disk: # 显式设置跨磁盘操作
|
1487
|
+
dst = self.copy(dst, if_exists=if_exists)
|
1488
|
+
self.delete()
|
1489
|
+
return dst
|
1490
|
+
|
1491
|
+
try:
|
1492
|
+
dst = XlPath(dst)
|
1493
|
+
if self == dst:
|
1494
|
+
# 同一个文件,可能是调整了大小写名称
|
1495
|
+
if self.as_posix() != dst.as_posix():
|
1496
|
+
tmp = self.tempfile(dir=self.parent) # self不一定是file,也可能是dir,但这个名称通用
|
1497
|
+
self.rename(tmp)
|
1498
|
+
self.delete()
|
1499
|
+
tmp.rename(dst)
|
1500
|
+
elif dst.exist_preprcs(if_exists):
|
1501
|
+
self.rename(dst)
|
1502
|
+
except OSError:
|
1503
|
+
# 有可能是跨磁盘操作,这个时候就只能先拷贝再删除了
|
1504
|
+
dst = self.copy(dst, if_exists=if_exists)
|
1505
|
+
self.delete()
|
1506
|
+
return dst
|
1507
|
+
|
1508
|
+
def rename2(self, new_name, if_exists=None):
|
1509
|
+
""" 相比原版的rename,搞了更多骚操作,但性能也会略微下降,所以重写一个功能名
|
1510
|
+
|
1511
|
+
240416周二12:49,这个接口将真的只做重命名,不做移动!所以将会不再支持dst中出现"/"路径配置
|
1512
|
+
"""
|
1422
1513
|
if not self.exists():
|
1423
1514
|
return self
|
1424
1515
|
|
1425
|
-
|
1516
|
+
if not isinstance(new_name, str):
|
1517
|
+
raise ValueError(f'rename2只能做重命名操作,目标路径必须是一个str')
|
1518
|
+
elif '/' in new_name:
|
1519
|
+
raise ValueError(f'rename2只能做重命名操作,目标路径中不能包含"/"')
|
1520
|
+
elif '\\' in new_name:
|
1521
|
+
raise ValueError(f'rename2只能做重命名操作,目标路径中不能包含"\\"')
|
1522
|
+
|
1523
|
+
if self.name == new_name: # 没有修改名称,跟原来相同
|
1524
|
+
return self
|
1525
|
+
|
1526
|
+
dst = self.parent / new_name
|
1426
1527
|
if self == dst:
|
1427
1528
|
# 同一个文件,可能是调整了大小写名称
|
1428
1529
|
if self.as_posix() != dst.as_posix():
|
@@ -1434,6 +1535,14 @@ class XlPath(type(pathlib.Path())):
|
|
1434
1535
|
self.rename(dst)
|
1435
1536
|
return dst
|
1436
1537
|
|
1538
|
+
def rename_stem(self, stem, if_exists=None):
|
1539
|
+
""" 重命名文件的stem """
|
1540
|
+
return self.rename2(stem + self.suffix, if_exists)
|
1541
|
+
|
1542
|
+
def rename_suffix(self, suffix, if_exists=None):
|
1543
|
+
""" 重命名文件的suffix """
|
1544
|
+
return self.rename2(self.stem + suffix, if_exists)
|
1545
|
+
|
1437
1546
|
def delete(self):
|
1438
1547
|
if self.is_file():
|
1439
1548
|
os.remove(self)
|
@@ -1599,6 +1708,10 @@ class XlPath(type(pathlib.Path())):
|
|
1599
1708
|
|
1600
1709
|
ext = '.' + t.extension
|
1601
1710
|
ext0 = file_path.suffix
|
1711
|
+
if ext == ext0:
|
1712
|
+
continue
|
1713
|
+
elif ext == '.xls' and ext0 == '.et':
|
1714
|
+
continue
|
1602
1715
|
|
1603
1716
|
if ext0 in ('.docx', '.xlsx', '.pptx', '.xlsm'):
|
1604
1717
|
ext0 = '.zip'
|
@@ -1712,7 +1825,7 @@ class XlPath(type(pathlib.Path())):
|
|
1712
1825
|
|
1713
1826
|
return file_summary
|
1714
1827
|
|
1715
|
-
def _check_dir_summary(self, print_mode=True, hash_func=None, run_mode=
|
1828
|
+
def _check_dir_summary(self, print_mode=True, hash_func=None, run_mode=31):
|
1716
1829
|
""" 对文件夹情况进行通用的状态检查
|
1717
1830
|
|
1718
1831
|
:param hash_func: 可以传入自定义的hash函数,用于第四块的重复文件运算
|
@@ -1729,28 +1842,28 @@ class XlPath(type(pathlib.Path())):
|
|
1729
1842
|
|
1730
1843
|
# 一 目录大小,二 各后缀文件大小
|
1731
1844
|
msg = []
|
1732
|
-
if run_mode
|
1845
|
+
if run_mode & 1: # 1和2目前是绑定一起运行的
|
1733
1846
|
printf('【' + self.as_posix() + '】目录检查')
|
1734
1847
|
printf('\n'.join(self.check_size('list')))
|
1735
1848
|
|
1736
1849
|
# 三 重名文件
|
1737
|
-
if run_mode
|
1850
|
+
if run_mode & 2:
|
1738
1851
|
printf('\n三、重名文件(忽略大小写,跨目录检查name重复情况)')
|
1739
1852
|
printf('\n'.join(self.check_repeat_name_files(print_mode=False)))
|
1740
1853
|
|
1741
1854
|
# 四 重复文件
|
1742
|
-
if run_mode
|
1855
|
+
if run_mode & 4:
|
1743
1856
|
printf('\n四、重复文件(etag相同)')
|
1744
1857
|
printf('\n'.join(self.check_repeat_files(print_mode=False, hash_func=hash_func)))
|
1745
1858
|
|
1746
1859
|
# 五 错误扩展名
|
1747
|
-
if run_mode
|
1860
|
+
if run_mode & 8:
|
1748
1861
|
printf('\n五、错误扩展名')
|
1749
1862
|
for i, (f1, suffix2) in enumerate(self.xglob_faker_suffix_files('**/*'), start=1):
|
1750
1863
|
printf(f'{i}、{f1.relpath(self)} -> {suffix2}')
|
1751
1864
|
|
1752
1865
|
# 六 文件配对
|
1753
|
-
if run_mode
|
1866
|
+
if run_mode & 16:
|
1754
1867
|
printf(
|
1755
1868
|
'\n六、文件配对(检查每个目录里stem名称是否配对,列出文件组成不单一的目录结构,请重点检查落单未配对的情况)')
|
1756
1869
|
prompt = False
|
@@ -2026,6 +2139,48 @@ class XlPath(type(pathlib.Path())):
|
|
2026
2139
|
return file
|
2027
2140
|
|
2028
2141
|
|
2142
|
+
class StreamJsonlWriter:
|
2143
|
+
""" 流式存储,主要用于存储文本化、jsonl格式数据 """
|
2144
|
+
|
2145
|
+
def __init__(self, file_path, batch_size=2000, *,
|
2146
|
+
delete_origin_file=False, json_default=str):
|
2147
|
+
self.file_path = XlPath(file_path)
|
2148
|
+
self.cache_text_lines = []
|
2149
|
+
self.batch_size = batch_size
|
2150
|
+
self.total_lines = 0
|
2151
|
+
|
2152
|
+
self.delete_origin_file = delete_origin_file
|
2153
|
+
self.json_default = json_default
|
2154
|
+
|
2155
|
+
def append_line(self, line):
|
2156
|
+
self.append_lines([line])
|
2157
|
+
|
2158
|
+
def append_lines(self, data):
|
2159
|
+
"""
|
2160
|
+
:param list data: 添加一组数据
|
2161
|
+
"""
|
2162
|
+
for x in data:
|
2163
|
+
if isinstance(x, str):
|
2164
|
+
self.cache_text_lines.append(x)
|
2165
|
+
else:
|
2166
|
+
self.cache_text_lines.append(json.dumps(x, ensure_ascii=False,
|
2167
|
+
default=self.json_default))
|
2168
|
+
if len(self.cache_text_lines) >= self.batch_size:
|
2169
|
+
self.flush()
|
2170
|
+
|
2171
|
+
def flush(self):
|
2172
|
+
""" 刷新,将当前缓存写入文件 """
|
2173
|
+
if self.cache_text_lines:
|
2174
|
+
if self.total_lines == 0 and self.delete_origin_file: # 第一次写入时,删除旧缓存文件
|
2175
|
+
self.file_path.delete()
|
2176
|
+
|
2177
|
+
self.total_lines += len(self.cache_text_lines)
|
2178
|
+
self.file_path.parent.mkdir(exist_ok=True, parents=True)
|
2179
|
+
with open(self.file_path, 'a', encoding='utf8') as f:
|
2180
|
+
f.write('\n'.join(self.cache_text_lines) + '\n')
|
2181
|
+
self.cache_text_lines = []
|
2182
|
+
|
2183
|
+
|
2029
2184
|
def demo_file():
|
2030
2185
|
""" File类的综合测试"""
|
2031
2186
|
temp = tempfile.gettempdir()
|
@@ -2159,8 +2314,11 @@ class PathGroups(Groups):
|
|
2159
2314
|
return ls
|
2160
2315
|
|
2161
2316
|
|
2162
|
-
def cache_file(file, make_data_func: Callable[[], Any] = None, *,
|
2163
|
-
|
2317
|
+
def cache_file(file, make_data_func: Callable[[], Any] = None, *,
|
2318
|
+
mode='read_first',
|
2319
|
+
cache_time=None,
|
2320
|
+
**kwargs):
|
2321
|
+
""" 能将局部函数功能结果缓存进文件的功能
|
2164
2322
|
|
2165
2323
|
输入的文件file如果存在则直接读取内容;
|
2166
2324
|
否则用make_data_func生成,并且备份一个file文件
|
@@ -2168,20 +2326,52 @@ def cache_file(file, make_data_func: Callable[[], Any] = None, *, reset=False, *
|
|
2168
2326
|
:param file: 需要缓存的文件路径
|
2169
2327
|
:param make_data_func: 如果文件不存在,则需要生成一份,要提供数据生成函数
|
2170
2328
|
cache_file可以当装饰器用,此时不用显式指定该参数
|
2171
|
-
:param
|
2329
|
+
:param mode:
|
2330
|
+
read_first(默认): 优先尝试从已有文件读取
|
2331
|
+
generate_first: 函数生成优先
|
2332
|
+
:param cache_time: 文件缓存时间,单位为秒,默认为None,表示始终使用缓存文件
|
2333
|
+
如果设置60,表示超过60秒后,需要重新优先从函数获得更新内容
|
2172
2334
|
:param kwargs: 可以传递read、write支持的扩展参数
|
2173
|
-
:return:
|
2335
|
+
:return: 读取到的数据
|
2174
2336
|
"""
|
2337
|
+
from datetime import datetime, timedelta
|
2338
|
+
from pyxllib.prog.pupil import format_exception
|
2175
2339
|
|
2176
2340
|
def decorator(func):
|
2177
2341
|
def wrapper(*args2, **kwargs2):
|
2342
|
+
|
2178
2343
|
f = XlPath.init(file, XlPath.tempdir())
|
2179
|
-
|
2180
|
-
|
2181
|
-
|
2344
|
+
f.parent.mkdir(exist_ok=True, parents=True)
|
2345
|
+
|
2346
|
+
# 1 优先看是不是需要先从文件读取数据
|
2347
|
+
if mode == 'read_first' and f.is_file():
|
2348
|
+
if cache_time is None:
|
2349
|
+
return f.read_auto(**kwargs)
|
2350
|
+
|
2351
|
+
current_time = datetime.now()
|
2352
|
+
last_modified = datetime.fromtimestamp(f.mtime()) # 获取文件的修改时间
|
2353
|
+
if not isinstance(cache_time, timedelta):
|
2354
|
+
cache_time2 = timedelta(seconds=cache_time)
|
2355
|
+
else:
|
2356
|
+
cache_time2 = cache_time
|
2357
|
+
|
2358
|
+
if cache_time is None or (current_time - last_modified <= cache_time2):
|
2359
|
+
return f.read_auto(**kwargs)
|
2360
|
+
|
2361
|
+
# 2 如果需要重新生成数据,且没有已存在的保底文件
|
2362
|
+
if not f.is_file():
|
2182
2363
|
data = func(*args2, **kwargs2)
|
2183
2364
|
f.write_auto(data, **kwargs)
|
2184
|
-
|
2365
|
+
return data
|
2366
|
+
|
2367
|
+
# 3 需要重新生成,但是有保底文件
|
2368
|
+
try:
|
2369
|
+
data = func(*args2, **kwargs2)
|
2370
|
+
f.write_auto(data, **kwargs)
|
2371
|
+
return data
|
2372
|
+
except Exception as e:
|
2373
|
+
print(format_exception(e))
|
2374
|
+
return f.read_auto(**kwargs)
|
2185
2375
|
|
2186
2376
|
return wrapper
|
2187
2377
|
|
@@ -2418,7 +2608,7 @@ def myoswalk(root, filter_rule=None, recur=True):
|
|
2418
2608
|
|
2419
2609
|
|
2420
2610
|
def mygetfiles(root, filter_rule=None, recur=True):
|
2421
|
-
"""对myoswalk进一步封装,返回所有匹配的文件
|
2611
|
+
r""" 对myoswalk进一步封装,返回所有匹配的文件
|
2422
2612
|
会递归查找所有子文件
|
2423
2613
|
|
2424
2614
|
可以这样遍历一个目录下的所有文件:
|
@@ -2449,34 +2639,55 @@ class DirsFileFinder:
|
|
2449
2639
|
for d in dirs:
|
2450
2640
|
self.add_dir(d)
|
2451
2641
|
|
2452
|
-
def add_dir(self, p):
|
2642
|
+
def add_dir(self, p, cvt_name_func=None):
|
2453
2643
|
""" 添加备用检索目录
|
2454
2644
|
当前面的目录找不到匹配项的时候,会使用备用目录的文件
|
2455
2645
|
备用目录可以一直添加,有多个,优先级逐渐降低
|
2646
|
+
|
2647
|
+
:param cvt_name_func: 对名称做个转换再匹配
|
2456
2648
|
"""
|
2457
2649
|
files = list(XlPath(p).rglob_files())
|
2458
|
-
|
2459
|
-
|
2460
|
-
|
2650
|
+
if cvt_name_func:
|
2651
|
+
for f in files:
|
2652
|
+
self.names[cvt_name_func(f.name)].append(f)
|
2653
|
+
self.stems[cvt_name_func(f.stem)].append(f)
|
2654
|
+
else:
|
2655
|
+
for f in files:
|
2656
|
+
self.names[f.name].append(f)
|
2657
|
+
self.stems[f.stem].append(f)
|
2461
2658
|
|
2462
2659
|
def find_name(self, name):
|
2463
2660
|
""" 返回第一个匹配的结果 """
|
2464
|
-
|
2465
|
-
if
|
2466
|
-
return
|
2661
|
+
files = self.find_names(name)
|
2662
|
+
if files:
|
2663
|
+
return files[0]
|
2467
2664
|
|
2468
2665
|
def find_names(self, name):
|
2469
2666
|
""" 返回所有匹配的结果 """
|
2470
2667
|
return self.names[name]
|
2471
2668
|
|
2472
2669
|
def find_stem(self, stem):
|
2473
|
-
|
2474
|
-
if
|
2475
|
-
return
|
2670
|
+
files = self.find_stems(stem)
|
2671
|
+
if files:
|
2672
|
+
return files[0]
|
2476
2673
|
|
2477
2674
|
def find_stems(self, stem):
|
2478
2675
|
return self.stems[stem]
|
2479
2676
|
|
2677
|
+
def find_prefix_name(self, prefix_name, suffix=None):
|
2678
|
+
files = self.find_prefix_names(prefix_name, suffix=suffix)
|
2679
|
+
if files:
|
2680
|
+
return files[0]
|
2681
|
+
|
2682
|
+
def find_prefix_names(self, prefix_name, suffix=None):
|
2683
|
+
""" name中前缀为prefix_name """
|
2684
|
+
filess = [files for name, files in self.names.items() if name.startswith(prefix_name)]
|
2685
|
+
# 将嵌套的list展平
|
2686
|
+
files = [file for files in filess for file in files]
|
2687
|
+
if suffix:
|
2688
|
+
files = [file for file in files if file.suffix == suffix]
|
2689
|
+
return files
|
2690
|
+
|
2480
2691
|
|
2481
2692
|
class TwinDirs:
|
2482
2693
|
def __init__(self, src_dir, dst_dir):
|