pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +9 -2
- pyxllib/algo/__init__.py +8 -0
- pyxllib/algo/disjoint.py +54 -0
- pyxllib/algo/geo.py +541 -0
- pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
- pyxllib/algo/matcher.py +389 -0
- pyxllib/algo/newbie.py +166 -0
- pyxllib/algo/pupil.py +629 -0
- pyxllib/algo/shapelylib.py +67 -0
- pyxllib/algo/specialist.py +241 -0
- pyxllib/algo/stat.py +494 -0
- pyxllib/algo/treelib.py +149 -0
- pyxllib/algo/unitlib.py +66 -0
- pyxllib/autogui/__init__.py +5 -0
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/autogui/autogui.py +852 -0
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/virtualkey.py +102 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/__init__.py +1 -11
- pyxllib/cv/expert.py +267 -0
- pyxllib/cv/{imlib.py → imfile.py} +18 -83
- pyxllib/cv/imhash.py +39 -0
- pyxllib/cv/pupil.py +9 -0
- pyxllib/cv/rgbfmt.py +1525 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/cv/trackbartools.py +163 -49
- pyxllib/cv/xlcvlib.py +1040 -0
- pyxllib/cv/xlpillib.py +423 -0
- pyxllib/data/__init__.py +0 -0
- pyxllib/data/echarts.py +240 -0
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/{util/oss2_.py → data/oss.py} +11 -9
- pyxllib/data/pglib.py +1127 -0
- pyxllib/data/sqlite.py +568 -0
- pyxllib/{util → data}/sqllib.py +13 -31
- pyxllib/ext/JLineViewer.py +505 -0
- pyxllib/ext/__init__.py +6 -0
- pyxllib/{util → ext}/demolib.py +119 -35
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +12 -0
- pyxllib/{util/main.py → ext/old.py} +122 -284
- pyxllib/ext/qt.py +449 -0
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/seleniumlib.py +76 -0
- pyxllib/{util/tklib.py → ext/tk.py} +10 -11
- pyxllib/ext/unixlib.py +827 -0
- pyxllib/ext/utools.py +351 -0
- pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
- pyxllib/ext/win32lib.py +40 -0
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1105 -0
- pyxllib/file/__init__.py +17 -0
- pyxllib/file/docxlib.py +761 -0
- pyxllib/{util → file}/gitlib.py +40 -27
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +148 -0
- pyxllib/file/newbie.py +10 -0
- pyxllib/file/onenotelib.py +1469 -0
- pyxllib/file/packlib/__init__.py +330 -0
- pyxllib/{util → file/packlib}/zipfile.py +598 -195
- pyxllib/file/pdflib.py +426 -0
- pyxllib/file/pupil.py +185 -0
- pyxllib/file/specialist/__init__.py +685 -0
- pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
- pyxllib/file/specialist/download.py +193 -0
- pyxllib/file/specialist/filelib.py +2829 -0
- pyxllib/file/xlsxlib.py +3131 -0
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/__init__.py +5 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/deprecatedlib.py +233 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/ipyexec.py +253 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +451 -0
- pyxllib/prog/pupil.py +1197 -0
- pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
- pyxllib/prog/specialist/__init__.py +391 -0
- pyxllib/prog/specialist/bc.py +203 -0
- pyxllib/prog/specialist/browser.py +497 -0
- pyxllib/prog/specialist/common.py +347 -0
- pyxllib/prog/specialist/datetime.py +199 -0
- pyxllib/prog/specialist/tictoc.py +240 -0
- pyxllib/prog/specialist/xllog.py +180 -0
- pyxllib/prog/xlosenv.py +108 -0
- pyxllib/stdlib/__init__.py +17 -0
- pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
- pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
- pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
- pyxllib/text/__init__.py +8 -0
- pyxllib/text/ahocorasick.py +39 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +121 -0
- pyxllib/text/jiebalib.py +267 -0
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +922 -0
- pyxllib/text/latex/__init__.py +158 -0
- pyxllib/text/levenshtein.py +303 -0
- pyxllib/text/nestenv.py +1215 -0
- pyxllib/text/newbie.py +300 -0
- pyxllib/text/pupil/__init__.py +8 -0
- pyxllib/text/pupil/common.py +1121 -0
- pyxllib/text/pupil/xlalign.py +326 -0
- pyxllib/text/pycode.py +47 -0
- pyxllib/text/specialist/__init__.py +8 -0
- pyxllib/text/specialist/common.py +112 -0
- pyxllib/text/specialist/ptag.py +186 -0
- pyxllib/text/spellchecker.py +172 -0
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/vbacode.py +17 -0
- pyxllib/text/xmllib.py +747 -0
- pyxllib/xl.py +39 -0
- pyxllib/xlcv.py +17 -0
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
- pyxllib/basic/_1_strlib.py +0 -945
- pyxllib/basic/_2_timelib.py +0 -488
- pyxllib/basic/_3_pathlib.py +0 -916
- pyxllib/basic/_4_loglib.py +0 -419
- pyxllib/basic/__init__.py +0 -54
- pyxllib/basic/arrow_.py +0 -250
- pyxllib/basic/chardet_.py +0 -66
- pyxllib/basic/dirlib.py +0 -529
- pyxllib/basic/dprint.py +0 -202
- pyxllib/basic/extension.py +0 -12
- pyxllib/basic/judge.py +0 -31
- pyxllib/basic/log.py +0 -204
- pyxllib/basic/pathlib_.py +0 -705
- pyxllib/basic/pytictoc.py +0 -102
- pyxllib/basic/qiniu_.py +0 -61
- pyxllib/basic/strlib.py +0 -761
- pyxllib/basic/timer.py +0 -132
- pyxllib/cv/cv.py +0 -834
- pyxllib/cv/cvlib/_1_geo.py +0 -543
- pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
- pyxllib/cv/cvlib/_2_imgproc.py +0 -594
- pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
- pyxllib/cv/cvlib/_4_cvimg.py +0 -211
- pyxllib/cv/cvlib/__init__.py +0 -10
- pyxllib/cv/debugtools.py +0 -82
- pyxllib/cv/fitz_.py +0 -300
- pyxllib/cv/installer.py +0 -42
- pyxllib/debug/_0_installer.py +0 -38
- pyxllib/debug/_1_typelib.py +0 -277
- pyxllib/debug/_2_chrome.py +0 -198
- pyxllib/debug/_3_showdir.py +0 -161
- pyxllib/debug/_4_bcompare.py +0 -140
- pyxllib/debug/__init__.py +0 -49
- pyxllib/debug/bcompare.py +0 -132
- pyxllib/debug/chrome.py +0 -198
- pyxllib/debug/installer.py +0 -38
- pyxllib/debug/showdir.py +0 -158
- pyxllib/debug/typelib.py +0 -278
- pyxllib/image/__init__.py +0 -12
- pyxllib/torch/__init__.py +0 -20
- pyxllib/torch/modellib.py +0 -37
- pyxllib/torch/trainlib.py +0 -344
- pyxllib/util/__init__.py +0 -20
- pyxllib/util/aip_.py +0 -141
- pyxllib/util/casiadb.py +0 -59
- pyxllib/util/excellib.py +0 -495
- pyxllib/util/filelib.py +0 -612
- pyxllib/util/jsondata.py +0 -27
- pyxllib/util/jsondata2.py +0 -92
- pyxllib/util/labelmelib.py +0 -139
- pyxllib/util/onepy/__init__.py +0 -29
- pyxllib/util/onepy/onepy.py +0 -574
- pyxllib/util/onepy/onmanager.py +0 -170
- pyxllib/util/pyautogui_.py +0 -219
- pyxllib/util/textlib.py +0 -1305
- pyxllib/util/unorder.py +0 -22
- pyxllib/util/xmllib.py +0 -639
- pyxllib-0.0.43.dist-info/METADATA +0 -39
- pyxllib-0.0.43.dist-info/RECORD +0 -80
- pyxllib-0.0.43.dist-info/top_level.txt +0 -1
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
@@ -2,67 +2,215 @@
|
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
# @Author : 陈坤泽
|
4
4
|
# @Email : 877362867@qq.com
|
5
|
-
# @
|
5
|
+
# @Date : 2020/05/30
|
6
6
|
|
7
7
|
|
8
|
+
import collections
|
8
9
|
import filecmp
|
9
10
|
import os
|
11
|
+
import pathlib
|
12
|
+
import random
|
10
13
|
import re
|
11
14
|
import shutil
|
15
|
+
import tempfile
|
12
16
|
|
13
|
-
|
14
|
-
from requests.structures import CaseInsensitiveDict
|
15
|
-
|
16
|
-
from pyxllib.basic._1_strlib import strfind, natural_sort
|
17
|
-
from pyxllib.basic._2_timelib import Datetime
|
18
|
-
from pyxllib.basic._3_pathlib import Path
|
19
|
-
from pyxllib.basic._4_loglib import Iterate
|
17
|
+
import humanfriendly
|
20
18
|
|
21
|
-
|
22
|
-
|
19
|
+
# 大小写不敏感字典
|
20
|
+
from pyxllib.prog.newbie import first_nonnone
|
21
|
+
from pyxllib.algo.pupil import natural_sort
|
22
|
+
from pyxllib.text.pupil import strfind
|
23
|
+
from pyxllib.file.specialist import get_etag, PathBase, File, XlPath
|
23
24
|
|
24
|
-
主要是为了提供readfile、wrritefile函数
|
25
|
-
与普通的读写文件相比,有以下优点:
|
26
|
-
1、智能识别pkl等特殊格式文件的处理
|
27
|
-
2、智能处理编码
|
28
|
-
3、目录不存在自动创建
|
29
|
-
4、自动备份旧文件,而不是强制覆盖写入
|
30
25
|
|
31
|
-
|
32
|
-
|
33
|
-
|
26
|
+
def __1_Dir类():
|
27
|
+
"""
|
28
|
+
支持文件或文件夹的对比复制删除等操作的函数:filescmp、filesdel、filescopy
|
29
|
+
"""
|
34
30
|
|
35
31
|
|
36
|
-
class Dir(
|
32
|
+
class Dir(PathBase):
|
37
33
|
r"""类似NestEnv思想的文件夹处理类
|
38
34
|
|
39
35
|
这里的测试可以全程自己造一个
|
40
36
|
"""
|
41
|
-
__slots__ = ('
|
37
|
+
__slots__ = ('_path', 'subs', '_origin_wkdir')
|
38
|
+
|
39
|
+
# 零、常用的目录类
|
40
|
+
TEMP = pathlib.Path(tempfile.gettempdir())
|
41
|
+
if os.getenv('Desktop', None): # 如果修改了win10默认的桌面路径,需要在环境变量添加一个正确的Desktop路径值
|
42
|
+
DESKTOP = os.environ['Desktop']
|
43
|
+
else:
|
44
|
+
DESKTOP = os.path.join(str(pathlib.Path.home()), 'Desktop') # 这个不一定准,桌面是有可能被移到D盘等的
|
45
|
+
DESKTOP = pathlib.Path(DESKTOP)
|
42
46
|
|
43
|
-
|
47
|
+
# 添加 HOME 目录? 方便linux操作?
|
48
|
+
|
49
|
+
# 一、基本目录类功能
|
50
|
+
|
51
|
+
def __init__(self, path=None, root=None, *, subs=None, check=True):
|
44
52
|
"""根目录、工作目录
|
45
53
|
|
46
54
|
>> Dir() # 以当前文件夹作为root
|
47
55
|
>> Dir(r'C:/pycode/code4101py') # 指定目录
|
56
|
+
|
57
|
+
:param path: 注意哪怕path传入的是Dir,也只会设置目录,不会取其paths成员值
|
58
|
+
:param subs: 该目录下,选中的子文件(夹)
|
48
59
|
"""
|
49
|
-
|
50
|
-
self.
|
60
|
+
|
61
|
+
self._path = None
|
62
|
+
self.subs = subs or [] # 初始默认没有选中任何文件(夹)
|
63
|
+
|
64
|
+
# 1 快速初始化
|
65
|
+
if root is None:
|
66
|
+
if isinstance(path, Dir):
|
67
|
+
self._path = path._path
|
68
|
+
# 注意用Dir A 初始化 Dir B,并不会把A的subs传递给B
|
69
|
+
return
|
70
|
+
elif isinstance(path, pathlib.Path):
|
71
|
+
self._path = path
|
72
|
+
|
73
|
+
# 2 普通初始化
|
74
|
+
if self._path is None:
|
75
|
+
self._path = self.abspath(path, root)
|
76
|
+
|
77
|
+
# 3 检查
|
78
|
+
if check:
|
79
|
+
if not self._path:
|
80
|
+
raise ValueError(f'无效路径 {self._path}')
|
81
|
+
elif self._path.is_file():
|
82
|
+
raise ValueError(f'不能用文件初始化一个Dir对象 {self._path}')
|
83
|
+
|
84
|
+
@classmethod
|
85
|
+
def safe_init(cls, path, root=None, *, subs=None):
|
86
|
+
""" 如果失败不raise,而是返回None的初始化方式 """
|
87
|
+
try:
|
88
|
+
d = Dir(path, root, subs=subs)
|
89
|
+
d._path.is_file() # 有些问题上一步不一定测的出来,要再补一个测试
|
90
|
+
return d
|
91
|
+
except (ValueError, TypeError, OSError, PermissionError):
|
92
|
+
# ValueError:文件名过长,代表输入很可能是一段文本,根本不是路径
|
93
|
+
# TypeError:不是str等正常的参数
|
94
|
+
# OSError:非法路径名,例如有 *? 等
|
95
|
+
# PermissionError: linux上访问无权限、不存在的路径
|
96
|
+
return None
|
51
97
|
|
52
98
|
@property
|
53
|
-
def
|
54
|
-
"""
|
55
|
-
|
99
|
+
def size(self) -> int:
|
100
|
+
""" 计算目录的大小,会递归目录计算总大小
|
101
|
+
|
102
|
+
https://stackoverflow.com/questions/1392413/calculating-a-directory-size-using-python
|
103
|
+
|
104
|
+
>> Dir('D:/slns/pyxllib').size # 这个算的就是真实大小,不是占用空间
|
105
|
+
2939384
|
106
|
+
"""
|
107
|
+
if self:
|
108
|
+
total_size = 0
|
109
|
+
for dirpath, dirnames, Pathnames in os.walk(str(self)):
|
110
|
+
for f in Pathnames:
|
111
|
+
fp = os.path.join(dirpath, f)
|
112
|
+
total_size += os.path.getsize(fp)
|
113
|
+
else: # 不存在的对象
|
114
|
+
total_size = 0
|
115
|
+
return total_size
|
56
116
|
|
57
117
|
@property
|
58
|
-
def
|
59
|
-
"""
|
60
|
-
return
|
118
|
+
def psize(self) -> str:
|
119
|
+
""" 美化显示的文件大小 """
|
120
|
+
return humanfriendly.format_size(self.size, binary=True)
|
61
121
|
|
62
|
-
def
|
122
|
+
def __truediv__(self, key) -> pathlib.Path:
|
123
|
+
r""" 路径拼接功能
|
124
|
+
|
125
|
+
>>> Dir('C:/a') / 'b.txt'
|
126
|
+
WindowsPath('C:/a/b.txt')
|
127
|
+
"""
|
128
|
+
return self._path / str(key)
|
129
|
+
|
130
|
+
def with_dirname(self, value):
|
131
|
+
return Dir(self.name, value)
|
132
|
+
|
133
|
+
def absdst(self, dst):
|
134
|
+
""" 在copy、move等中,给了个"模糊"的目标位置dst,智能推导出实际file、dir绝对路径
|
135
|
+
"""
|
136
|
+
dst_ = self.abspath(dst)
|
137
|
+
if isinstance(dst, str) and dst[-1] in ('\\', '/'):
|
138
|
+
dst_ = Dir(self.name, dst_)
|
139
|
+
else:
|
140
|
+
dst_ = Dir(dst_)
|
141
|
+
return dst_
|
142
|
+
|
143
|
+
def ensure_dir(self):
|
144
|
+
r""" 确保目录存在
|
145
|
+
"""
|
146
|
+
if not self:
|
147
|
+
os.makedirs(str(self))
|
148
|
+
|
149
|
+
def copy(self, dst, if_exists=None):
|
150
|
+
return self.process(dst, shutil.copytree, if_exists)
|
151
|
+
|
152
|
+
def rename(self, dst, if_exists=None):
|
153
|
+
r""" 重命名
|
154
|
+
"""
|
155
|
+
return self.move(Dir(dst, self.parent), if_exists)
|
156
|
+
|
157
|
+
def delete(self):
|
158
|
+
r""" 删除自身文件
|
159
|
+
"""
|
160
|
+
if self:
|
161
|
+
try:
|
162
|
+
shutil.rmtree(str(self))
|
163
|
+
except OSError:
|
164
|
+
# OSError: Cannot call rmtree on a symbolic link
|
165
|
+
# TODO 本来不应该try except,而是先用os.path.islink判断的,但是这个好像有bug,判断不出来~~
|
166
|
+
os.unlink(str(self))
|
167
|
+
|
168
|
+
# 二、目录类专有功能
|
169
|
+
|
170
|
+
def sample(self, n=None, frac=None):
|
171
|
+
"""
|
172
|
+
:param n: 在 paths 中抽取n个文件
|
173
|
+
:param frac: 按比例抽取文件
|
174
|
+
:return: 新的Dir文件选取状态
|
175
|
+
"""
|
176
|
+
n = n or int(frac * len(self.subs))
|
177
|
+
paths = random.sample(self.subs, n)
|
178
|
+
return Dir(self._path, subs=paths)
|
179
|
+
|
180
|
+
def subpaths(self):
|
181
|
+
""" 返回所有subs的绝对路径 """
|
182
|
+
return [self._path / p for p in self.subs]
|
183
|
+
|
184
|
+
def subfiles(self):
|
185
|
+
""" 返回所有subs的File对象 (过滤掉文件夹对象) """
|
186
|
+
return list(map(File, filter(lambda p: not p.is_dir(), self.subpaths())))
|
187
|
+
|
188
|
+
def subdirs(self):
|
189
|
+
""" 返回所有subs的File对象 (过滤掉文件对象) """
|
190
|
+
return list(map(Dir, filter(lambda p: not p.is_file(), self.subpaths())))
|
191
|
+
|
192
|
+
def select(self, patter, nsort=True, type_=None,
|
193
|
+
ignore_backup=False, ignore_special=False,
|
194
|
+
min_size=None, max_size=None,
|
195
|
+
min_ctime=None, max_ctime=None, min_mtime=None, max_mtime=None,
|
196
|
+
**kwargs):
|
63
197
|
r""" 增加选中文件,从filesmatch衍生而来,参数含义见 filesfilter
|
64
198
|
|
65
|
-
:param nsort: 是否使用自然排序,关闭可以加速
|
199
|
+
:param bool nsort: 是否使用自然排序,关闭可以加速
|
200
|
+
:param str type_:
|
201
|
+
None,所有文件
|
202
|
+
'file',只匹配文件
|
203
|
+
'dir', 只匹配目录
|
204
|
+
:param bool ignore_backup: 如果设为False,会过滤掉自定义的备份文件格式,不获取备份类文件
|
205
|
+
:param bool ignore_special: 自动过滤掉 '.git'、'$RECYCLE.BIN' 目录下文件
|
206
|
+
:param int min_size: 文件大小过滤,单位Byte
|
207
|
+
:param int max_size: ~
|
208
|
+
:param str min_ctime: 创建时间的过滤,格式'2019-09-01'或'2019-09-01 00:00'
|
209
|
+
:param str max_ctime: ~
|
210
|
+
:param str min_mtime: 修改时间的过滤
|
211
|
+
:param str max_mtime: ~
|
212
|
+
:param kwargs: see filesfilter
|
213
|
+
:seealso: filesfilter
|
66
214
|
|
67
215
|
注意select和exclude的增减操作是不断叠加的,而不是每次重置!
|
68
216
|
如果需要重置,应该重新定义一个Folder类
|
@@ -75,20 +223,69 @@ class Dir(Path):
|
|
75
223
|
>> Dir('C:/pycode/code4101py').select('*.py', min_size=200*1024) # 200kb以上的文件
|
76
224
|
C:/pycode/code4101py: ['liangyb.py']
|
77
225
|
|
78
|
-
>> Dir(r'C:/pycode/code4101py').select('*.py', min_mtime=
|
226
|
+
>> Dir(r'C:/pycode/code4101py').select('*.py', min_mtime=datetime.date(2020, 3, 1)) # 修改时间在3月1日以上的
|
79
227
|
"""
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
228
|
+
subs = filesmatch(patter, root=str(self), type_=type_,
|
229
|
+
ignore_backup=ignore_backup, ignore_special=ignore_special,
|
230
|
+
min_size=min_size, max_size=max_size,
|
231
|
+
min_ctime=min_ctime, max_ctime=max_ctime, min_mtime=min_mtime, max_mtime=max_mtime,
|
232
|
+
**kwargs)
|
233
|
+
subs = self.subs + subs
|
234
|
+
if nsort: subs = natural_sort(subs)
|
235
|
+
return Dir(self._path, subs=subs)
|
236
|
+
|
237
|
+
def select_files(self, patter, nsort=True,
|
238
|
+
ignore_backup=False, ignore_special=False,
|
239
|
+
min_size=None, max_size=None,
|
240
|
+
min_ctime=None, max_ctime=None, min_mtime=None, max_mtime=None):
|
241
|
+
""" TODO 这系列的功能可以优化加速,在没有复杂规则的情况下,可以尽量用源生的py检索方式实现 """
|
242
|
+
subs = filesmatch(patter, root=str(self), type_='file',
|
243
|
+
ignore_backup=ignore_backup, ignore_special=ignore_special,
|
244
|
+
min_size=min_size, max_size=max_size,
|
245
|
+
min_ctime=min_ctime, max_ctime=max_ctime,
|
246
|
+
min_mtime=min_mtime, max_mtime=max_mtime)
|
247
|
+
if nsort:
|
248
|
+
subs = natural_sort(subs)
|
249
|
+
for x in subs:
|
250
|
+
yield File(self._path / x, check=False)
|
251
|
+
|
252
|
+
def select_dirs(self, patter, nsort=True,
|
253
|
+
ignore_backup=False, ignore_special=False,
|
254
|
+
min_size=None, max_size=None,
|
255
|
+
min_ctime=None, max_ctime=None, min_mtime=None, max_mtime=None):
|
256
|
+
subs = filesmatch(patter, root=str(self), type_='dir',
|
257
|
+
ignore_backup=ignore_backup, ignore_special=ignore_special,
|
258
|
+
min_size=min_size, max_size=max_size,
|
259
|
+
min_ctime=min_ctime, max_ctime=max_ctime,
|
260
|
+
min_mtime=min_mtime, max_mtime=max_mtime)
|
261
|
+
if nsort:
|
262
|
+
subs = natural_sort(subs)
|
263
|
+
for x in subs:
|
264
|
+
yield Dir(self._path / x, check=False)
|
265
|
+
|
266
|
+
def select_paths(self, patter, nsort=True,
|
267
|
+
ignore_backup=False, ignore_special=False,
|
268
|
+
min_size=None, max_size=None,
|
269
|
+
min_ctime=None, max_ctime=None, min_mtime=None, max_mtime=None):
|
270
|
+
subs = filesmatch(patter, root=str(self),
|
271
|
+
ignore_backup=ignore_backup, ignore_special=ignore_special,
|
272
|
+
min_size=min_size, max_size=max_size,
|
273
|
+
min_ctime=min_ctime, max_ctime=max_ctime,
|
274
|
+
min_mtime=min_mtime, max_mtime=max_mtime)
|
275
|
+
if nsort:
|
276
|
+
subs = natural_sort(subs)
|
277
|
+
for x in subs:
|
278
|
+
yield self._path / x
|
279
|
+
|
280
|
+
def procpaths(self, func, start=None, end=None, ref_dir=None, pinterval=None, max_workers=1, interrupt=True):
|
86
281
|
""" 对选中的文件迭代处理
|
87
282
|
|
88
283
|
:param func: 对每个文件进行处理的自定义接口函数
|
89
284
|
参数 p: 输入参数 Path 对象
|
90
|
-
return:
|
285
|
+
return: 可以没有返回值
|
91
286
|
TODO 以后可以返回字典结构,用不同的key表示不同的功能,可以控制些高级功能
|
287
|
+
:param ref_dir: 使用该参数时,则每次会给func传递两个路径参数
|
288
|
+
第一个是原始的file,第二个是ref_dir目录下对应路径的file
|
92
289
|
|
93
290
|
TODO 增设可以bfs还是dfs的功能?
|
94
291
|
|
@@ -96,23 +293,25 @@ class Dir(Path):
|
|
96
293
|
将目录 test 的所有文件拷贝到 test2 目录 示例代码:
|
97
294
|
|
98
295
|
def func(p1, p2):
|
99
|
-
p1.copy(p2)
|
296
|
+
File(p1).copy(p2)
|
100
297
|
|
101
298
|
Dir('test').select('**/*', type_='file').procfiles(func, ref_dir='test2')
|
102
299
|
|
103
300
|
"""
|
301
|
+
from pyxllib.prog.specialist import Iterate
|
302
|
+
|
104
303
|
if ref_dir:
|
105
304
|
ref_dir = Dir(ref_dir)
|
106
|
-
|
107
|
-
|
305
|
+
paths1 = self.subpaths()
|
306
|
+
paths2 = [(ref_dir / self.subs[i]) for i in range(len(self.subs))]
|
108
307
|
|
109
308
|
def wrap_func(data):
|
110
309
|
func(*data)
|
111
310
|
|
112
|
-
data = zip(
|
311
|
+
data = zip(paths1, paths2)
|
113
312
|
|
114
313
|
else:
|
115
|
-
data = self.
|
314
|
+
data = self.subpaths()
|
116
315
|
wrap_func = func
|
117
316
|
|
118
317
|
Iterate(data).run(wrap_func, start=start, end=end, pinterval=pinterval,
|
@@ -123,13 +322,13 @@ class Dir(Path):
|
|
123
322
|
|
124
323
|
这里设置的选择模式,是指全集的选择范围
|
125
324
|
"""
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
for
|
130
|
-
if
|
131
|
-
|
132
|
-
return Dir(self._path,
|
325
|
+
subs = Dir(self).select(patter, nsort, **kwargs).subs
|
326
|
+
cur_subs = set(self.subs)
|
327
|
+
new_subs = []
|
328
|
+
for s in subs:
|
329
|
+
if s not in cur_subs:
|
330
|
+
new_subs.append(s)
|
331
|
+
return Dir(self._path, subs=new_subs)
|
133
332
|
|
134
333
|
def exclude(self, patter, **kwargs):
|
135
334
|
""" 去掉部分选中文件
|
@@ -141,15 +340,26 @@ class Dir(Path):
|
|
141
340
|
print(d2.files) # ['AA20pH-c1=1-1.eps', 'AA20pH-c1=1-2.eps']
|
142
341
|
print(d3.files) # ['subdir/AA20pH-c1=1-2 - 副本.eps']
|
143
342
|
"""
|
144
|
-
|
145
|
-
|
146
|
-
for
|
147
|
-
if
|
148
|
-
|
149
|
-
return Dir(self._path,
|
150
|
-
|
151
|
-
def
|
152
|
-
|
343
|
+
subs = set(filesmatch(patter, root=str(self), **kwargs))
|
344
|
+
new_subs = []
|
345
|
+
for s in self.subs:
|
346
|
+
if s not in subs:
|
347
|
+
new_subs.append(s)
|
348
|
+
return Dir(self._path, subs=new_subs)
|
349
|
+
|
350
|
+
def describe(self):
|
351
|
+
""" 输出目录的一些基本统计信息
|
352
|
+
"""
|
353
|
+
msg = []
|
354
|
+
dir_state = self.select('*')
|
355
|
+
files = dir_state.subfiles()
|
356
|
+
suffixs = collections.Counter([f.suffix for f in files]).most_common()
|
357
|
+
dir_size = self.size
|
358
|
+
msg.append(f'size: {dir_size} ≈ {humanfriendly.format_size(dir_size, binary=True)}')
|
359
|
+
msg.append(f'files: {len(files)}, {suffixs}')
|
360
|
+
msg.append(f'dirs: {len(dir_state.subdirs())}')
|
361
|
+
res = '\n'.join(msg)
|
362
|
+
print(res)
|
153
363
|
|
154
364
|
def __enter__(self):
|
155
365
|
""" 使用with模式可以进行工作目录切换
|
@@ -158,13 +368,21 @@ class Dir(Path):
|
|
158
368
|
切换工作目录和多线程混合使用会有意想不到的坑,要慎重!
|
159
369
|
"""
|
160
370
|
self._origin_wkdir = os.getcwd()
|
161
|
-
os.chdir(self
|
371
|
+
os.chdir(str(self))
|
162
372
|
return self
|
163
373
|
|
164
374
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
165
375
|
os.chdir(self._origin_wkdir)
|
166
376
|
|
167
377
|
|
378
|
+
def __2_filesxxx():
|
379
|
+
"""
|
380
|
+
本来Path、File是能同时处理文件、目录的
|
381
|
+
改版后,files底层因为有用到File,现在却不能支持目录的操作了
|
382
|
+
可能会有些bug,尽量不要用这些旧功能,或者尽早移除
|
383
|
+
"""
|
384
|
+
|
385
|
+
|
168
386
|
def filescmp(f1, f2, shallow=True):
|
169
387
|
"""只有两个存在且是同类型的文件或文件夹,内容相同才会返回True,否则均返回False
|
170
388
|
:param f1: 待比较的第1个文件(文件夹)
|
@@ -192,7 +410,8 @@ def filescmp(f1, f2, shallow=True):
|
|
192
410
|
def filesfilter(files, *, root=os.curdir, type_=None,
|
193
411
|
ignore_backup=False, ignore_special=False,
|
194
412
|
min_size=None, max_size=None,
|
195
|
-
min_ctime=None, max_ctime=None,
|
413
|
+
min_ctime=None, max_ctime=None,
|
414
|
+
min_mtime=None, max_mtime=None):
|
196
415
|
"""
|
197
416
|
:param files: 类list对象
|
198
417
|
:param type_:
|
@@ -209,6 +428,7 @@ def filesfilter(files, *, root=os.curdir, type_=None,
|
|
209
428
|
:param max_mtime: ~
|
210
429
|
:return:
|
211
430
|
"""
|
431
|
+
from datetime import datetime
|
212
432
|
|
213
433
|
def judge(f):
|
214
434
|
if root: f = os.path.join(root, f)
|
@@ -217,28 +437,31 @@ def filesfilter(files, *, root=os.curdir, type_=None,
|
|
217
437
|
elif type_ == 'dir' and not os.path.isdir(f):
|
218
438
|
return False
|
219
439
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
if max_size is not None
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
if
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
if
|
440
|
+
# 尽量避免调用 os.stat,判断是否有自定义大小、时间规则,没有可以跳过这部分
|
441
|
+
check_arg = first_nonnone([min_size, max_size, min_ctime, max_ctime, min_mtime, max_mtime])
|
442
|
+
if check_arg is not None:
|
443
|
+
msg = os.stat(f)
|
444
|
+
if first_nonnone([min_size, max_size]) is not None:
|
445
|
+
size = File(f).size
|
446
|
+
if min_size is not None and size < min_size: return False
|
447
|
+
if max_size is not None and size > max_size: return False
|
448
|
+
|
449
|
+
if min_ctime or max_ctime:
|
450
|
+
file_ctime = datetime.fromtimestamp(msg.st_ctime)
|
451
|
+
if min_ctime and file_ctime < min_ctime: return False
|
452
|
+
if max_ctime and file_ctime > max_ctime: return False
|
453
|
+
|
454
|
+
if min_mtime or max_mtime:
|
455
|
+
file_mtime = datetime.fromtimestamp(msg.st_mtime)
|
456
|
+
if min_mtime and file_mtime < min_mtime: return False
|
457
|
+
if max_mtime and file_mtime > max_mtime: return False
|
235
458
|
|
236
459
|
if ignore_special:
|
237
|
-
parts =
|
460
|
+
parts = File(f).parts
|
238
461
|
if '.git' in parts or '$RECYCLE.BIN' in parts:
|
239
462
|
return False
|
240
463
|
|
241
|
-
if ignore_backup and
|
464
|
+
if ignore_backup and File(f).backup_time:
|
242
465
|
return False
|
243
466
|
|
244
467
|
return True
|
@@ -300,6 +523,7 @@ def filesmatch(patter, *, root=os.curdir, **kwargs) -> list:
|
|
300
523
|
>> filesmatch('**/*', type_='file', max_size=0) # 筛选空文件
|
301
524
|
['b/a', '[0-9]/3.txt']
|
302
525
|
"""
|
526
|
+
from pathlib import Path
|
303
527
|
root = os.path.abspath(root)
|
304
528
|
|
305
529
|
# 0 规则匹配
|
@@ -308,9 +532,9 @@ def filesmatch(patter, *, root=os.curdir, **kwargs) -> list:
|
|
308
532
|
|
309
533
|
# 1 普通文本匹配 (没有通配符,单文件查找)
|
310
534
|
if isinstance(patter, str) and glob_chars_pos == -1:
|
311
|
-
path = Path(
|
312
|
-
if path
|
313
|
-
p = str(
|
535
|
+
path = Path(os.path.join(root, patter))
|
536
|
+
if path: # 文件存在
|
537
|
+
p = str(path.resolve())
|
314
538
|
if p.startswith(root): p = p[len(root) + 1:]
|
315
539
|
res = [p]
|
316
540
|
else: # 文件不存在
|
@@ -337,6 +561,10 @@ def filesmatch(patter, *, root=os.curdir, **kwargs) -> list:
|
|
337
561
|
elif isinstance(patter, (list, tuple, set)):
|
338
562
|
res = []
|
339
563
|
for p in patter: res += filesmatch(p, root=root)
|
564
|
+
# 5 可调用对象
|
565
|
+
elif callable(patter):
|
566
|
+
from pyxllib.file.specialist import XlPath
|
567
|
+
res = [f.relpath(root).as_posix() for f in XlPath(root).rglob('*') if patter(f)]
|
340
568
|
else:
|
341
569
|
raise TypeError
|
342
570
|
|
@@ -376,19 +604,19 @@ def _files_copy_move_base(src, dst, filefunc, dirfunc,
|
|
376
604
|
func = dirfunc
|
377
605
|
|
378
606
|
# 2 根据目标是否已存在和if_exists分类处理
|
379
|
-
|
607
|
+
File(dst).ensure_parent()
|
380
608
|
# 目前存在,且不是把文件移向文件夹的操作
|
381
609
|
if os.path.exists(dst):
|
382
610
|
# 根据if_exists参数情况分类处理
|
383
611
|
if if_exists is None: # 智能判断
|
384
612
|
if not filescmp(f, dst): # 如果内容不同则backup
|
385
|
-
|
613
|
+
File(dst).backup(move=True)
|
386
614
|
func(f, dst)
|
387
615
|
elif os.path.abspath(f).lower() == os.path.abspath(dst).lower():
|
388
616
|
# 如果内容相同,再判断其是否实际是一个文件,则调用重命名功能
|
389
617
|
os.rename(f, dst)
|
390
618
|
elif if_exists == 'backup':
|
391
|
-
|
619
|
+
File(dst).backup(move=True)
|
392
620
|
func(f, dst)
|
393
621
|
elif if_exists == 'replace':
|
394
622
|
filesdel(dst)
|
@@ -502,12 +730,14 @@ def writefile(ob, path='', *, encoding='utf8', if_exists='backup', suffix=None,
|
|
502
730
|
:return: 返回写入的文件名,这个主要是在写临时文件时有用
|
503
731
|
"""
|
504
732
|
if etag is None: etag = (not path)
|
505
|
-
|
506
|
-
|
507
|
-
|
733
|
+
if path == '': path = ...
|
734
|
+
f = File(path, root, suffix=suffix).write(ob, encoding=encoding, if_exists=if_exists)
|
735
|
+
if etag:
|
736
|
+
f = f.rename(get_etag(str(f)))
|
737
|
+
return str(f)
|
508
738
|
|
509
739
|
|
510
|
-
def merge_dir(src, dst, if_exists='
|
740
|
+
def merge_dir(src, dst, if_exists='skip'):
|
511
741
|
""" 将src目录下的数据拷贝到dst目录
|
512
742
|
"""
|
513
743
|
|
@@ -515,14 +745,55 @@ def merge_dir(src, dst, if_exists='ignore'):
|
|
515
745
|
p1.copy(p2, if_exists=if_exists)
|
516
746
|
|
517
747
|
# 只拷文件和空目录,不然逻辑会乱
|
518
|
-
Dir(src).select('**/*', type_='dir', max_size=0).select('**/*', type_='file').
|
748
|
+
Dir(src).select('**/*', type_='dir', max_size=0).select('**/*', type_='file').procpaths(func, ref_dir=dst)
|
519
749
|
|
520
750
|
|
521
751
|
def extract_files(src, dst, pattern, if_exists='replace'):
|
522
752
|
""" 提取满足pattern模式的文件
|
523
753
|
"""
|
524
754
|
d1, d2 = Dir(src), Dir(dst)
|
525
|
-
files = d1.select(pattern).
|
755
|
+
files = d1.select(pattern).subs
|
526
756
|
for f in files:
|
527
|
-
p1, p2 =
|
757
|
+
p1, p2 = File(d1 / f), File(d2 / f)
|
528
758
|
p1.copy(p2, if_exists=if_exists)
|
759
|
+
|
760
|
+
|
761
|
+
def file_or_dir_size(path):
|
762
|
+
if os.path.isfile(path):
|
763
|
+
return File(path).size
|
764
|
+
elif os.path.isdir(path):
|
765
|
+
return Dir(path).size
|
766
|
+
else:
|
767
|
+
return 0
|
768
|
+
|
769
|
+
|
770
|
+
def reduce_dir_depth(srcdir, unwrap=999):
|
771
|
+
""" 精简冗余嵌套的目录
|
772
|
+
|
773
|
+
比如a目录下只有一个文件:a/b/1.txt,
|
774
|
+
那么可以精简为a/1.txt,不需要多嵌套一个b目录
|
775
|
+
|
776
|
+
:param srcdir: 要处理的目录
|
777
|
+
:param unwrap: 打算解开的层数,未设置则会尽可能多解开
|
778
|
+
"""
|
779
|
+
import tempfile
|
780
|
+
root = p = XlPath(srcdir)
|
781
|
+
depth = 0
|
782
|
+
|
783
|
+
ps = list(p.glob('*'))
|
784
|
+
while len(ps) == 1 and ps[0].is_dir() and depth < unwrap:
|
785
|
+
depth += 1
|
786
|
+
p = ps[0]
|
787
|
+
ps = list(p.glob('*'))
|
788
|
+
|
789
|
+
if depth:
|
790
|
+
# 注意这里技巧,为了避免多层目录里会有相对同名的目录,导致出现不可预料的bug
|
791
|
+
# 算法原理是把要搬家的那层目录里的文件先移到临时文件,然后把原目录树结构删除后,再报临时文件的文件移回来
|
792
|
+
tmpdir = tempfile.mktemp()
|
793
|
+
shutil.move(str(p), str(tmpdir))
|
794
|
+
if depth > 1:
|
795
|
+
shutil.rmtree(next(root.glob('*')))
|
796
|
+
|
797
|
+
for pp in XlPath(tmpdir).glob('*'):
|
798
|
+
shutil.move(str(pp), str(root))
|
799
|
+
shutil.rmtree(tmpdir)
|