pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +9 -2
- pyxllib/algo/__init__.py +8 -0
- pyxllib/algo/disjoint.py +54 -0
- pyxllib/algo/geo.py +541 -0
- pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
- pyxllib/algo/matcher.py +389 -0
- pyxllib/algo/newbie.py +166 -0
- pyxllib/algo/pupil.py +629 -0
- pyxllib/algo/shapelylib.py +67 -0
- pyxllib/algo/specialist.py +241 -0
- pyxllib/algo/stat.py +494 -0
- pyxllib/algo/treelib.py +149 -0
- pyxllib/algo/unitlib.py +66 -0
- pyxllib/autogui/__init__.py +5 -0
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/autogui/autogui.py +852 -0
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/virtualkey.py +102 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/__init__.py +1 -11
- pyxllib/cv/expert.py +267 -0
- pyxllib/cv/{imlib.py → imfile.py} +18 -83
- pyxllib/cv/imhash.py +39 -0
- pyxllib/cv/pupil.py +9 -0
- pyxllib/cv/rgbfmt.py +1525 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/cv/trackbartools.py +163 -49
- pyxllib/cv/xlcvlib.py +1040 -0
- pyxllib/cv/xlpillib.py +423 -0
- pyxllib/data/__init__.py +0 -0
- pyxllib/data/echarts.py +240 -0
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/{util/oss2_.py → data/oss.py} +11 -9
- pyxllib/data/pglib.py +1127 -0
- pyxllib/data/sqlite.py +568 -0
- pyxllib/{util → data}/sqllib.py +13 -31
- pyxllib/ext/JLineViewer.py +505 -0
- pyxllib/ext/__init__.py +6 -0
- pyxllib/{util → ext}/demolib.py +119 -35
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +12 -0
- pyxllib/{util/main.py → ext/old.py} +122 -284
- pyxllib/ext/qt.py +449 -0
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/seleniumlib.py +76 -0
- pyxllib/{util/tklib.py → ext/tk.py} +10 -11
- pyxllib/ext/unixlib.py +827 -0
- pyxllib/ext/utools.py +351 -0
- pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
- pyxllib/ext/win32lib.py +40 -0
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1105 -0
- pyxllib/file/__init__.py +17 -0
- pyxllib/file/docxlib.py +761 -0
- pyxllib/{util → file}/gitlib.py +40 -27
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +148 -0
- pyxllib/file/newbie.py +10 -0
- pyxllib/file/onenotelib.py +1469 -0
- pyxllib/file/packlib/__init__.py +330 -0
- pyxllib/{util → file/packlib}/zipfile.py +598 -195
- pyxllib/file/pdflib.py +426 -0
- pyxllib/file/pupil.py +185 -0
- pyxllib/file/specialist/__init__.py +685 -0
- pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
- pyxllib/file/specialist/download.py +193 -0
- pyxllib/file/specialist/filelib.py +2829 -0
- pyxllib/file/xlsxlib.py +3131 -0
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/__init__.py +5 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/deprecatedlib.py +233 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/ipyexec.py +253 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +451 -0
- pyxllib/prog/pupil.py +1197 -0
- pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
- pyxllib/prog/specialist/__init__.py +391 -0
- pyxllib/prog/specialist/bc.py +203 -0
- pyxllib/prog/specialist/browser.py +497 -0
- pyxllib/prog/specialist/common.py +347 -0
- pyxllib/prog/specialist/datetime.py +199 -0
- pyxllib/prog/specialist/tictoc.py +240 -0
- pyxllib/prog/specialist/xllog.py +180 -0
- pyxllib/prog/xlosenv.py +108 -0
- pyxllib/stdlib/__init__.py +17 -0
- pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
- pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
- pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
- pyxllib/text/__init__.py +8 -0
- pyxllib/text/ahocorasick.py +39 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +121 -0
- pyxllib/text/jiebalib.py +267 -0
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +922 -0
- pyxllib/text/latex/__init__.py +158 -0
- pyxllib/text/levenshtein.py +303 -0
- pyxllib/text/nestenv.py +1215 -0
- pyxllib/text/newbie.py +300 -0
- pyxllib/text/pupil/__init__.py +8 -0
- pyxllib/text/pupil/common.py +1121 -0
- pyxllib/text/pupil/xlalign.py +326 -0
- pyxllib/text/pycode.py +47 -0
- pyxllib/text/specialist/__init__.py +8 -0
- pyxllib/text/specialist/common.py +112 -0
- pyxllib/text/specialist/ptag.py +186 -0
- pyxllib/text/spellchecker.py +172 -0
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/vbacode.py +17 -0
- pyxllib/text/xmllib.py +747 -0
- pyxllib/xl.py +39 -0
- pyxllib/xlcv.py +17 -0
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
- pyxllib/basic/_1_strlib.py +0 -945
- pyxllib/basic/_2_timelib.py +0 -488
- pyxllib/basic/_3_pathlib.py +0 -916
- pyxllib/basic/_4_loglib.py +0 -419
- pyxllib/basic/__init__.py +0 -54
- pyxllib/basic/arrow_.py +0 -250
- pyxllib/basic/chardet_.py +0 -66
- pyxllib/basic/dirlib.py +0 -529
- pyxllib/basic/dprint.py +0 -202
- pyxllib/basic/extension.py +0 -12
- pyxllib/basic/judge.py +0 -31
- pyxllib/basic/log.py +0 -204
- pyxllib/basic/pathlib_.py +0 -705
- pyxllib/basic/pytictoc.py +0 -102
- pyxllib/basic/qiniu_.py +0 -61
- pyxllib/basic/strlib.py +0 -761
- pyxllib/basic/timer.py +0 -132
- pyxllib/cv/cv.py +0 -834
- pyxllib/cv/cvlib/_1_geo.py +0 -543
- pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
- pyxllib/cv/cvlib/_2_imgproc.py +0 -594
- pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
- pyxllib/cv/cvlib/_4_cvimg.py +0 -211
- pyxllib/cv/cvlib/__init__.py +0 -10
- pyxllib/cv/debugtools.py +0 -82
- pyxllib/cv/fitz_.py +0 -300
- pyxllib/cv/installer.py +0 -42
- pyxllib/debug/_0_installer.py +0 -38
- pyxllib/debug/_1_typelib.py +0 -277
- pyxllib/debug/_2_chrome.py +0 -198
- pyxllib/debug/_3_showdir.py +0 -161
- pyxllib/debug/_4_bcompare.py +0 -140
- pyxllib/debug/__init__.py +0 -49
- pyxllib/debug/bcompare.py +0 -132
- pyxllib/debug/chrome.py +0 -198
- pyxllib/debug/installer.py +0 -38
- pyxllib/debug/showdir.py +0 -158
- pyxllib/debug/typelib.py +0 -278
- pyxllib/image/__init__.py +0 -12
- pyxllib/torch/__init__.py +0 -20
- pyxllib/torch/modellib.py +0 -37
- pyxllib/torch/trainlib.py +0 -344
- pyxllib/util/__init__.py +0 -20
- pyxllib/util/aip_.py +0 -141
- pyxllib/util/casiadb.py +0 -59
- pyxllib/util/excellib.py +0 -495
- pyxllib/util/filelib.py +0 -612
- pyxllib/util/jsondata.py +0 -27
- pyxllib/util/jsondata2.py +0 -92
- pyxllib/util/labelmelib.py +0 -139
- pyxllib/util/onepy/__init__.py +0 -29
- pyxllib/util/onepy/onepy.py +0 -574
- pyxllib/util/onepy/onmanager.py +0 -170
- pyxllib/util/pyautogui_.py +0 -219
- pyxllib/util/textlib.py +0 -1305
- pyxllib/util/unorder.py +0 -22
- pyxllib/util/xmllib.py +0 -639
- pyxllib-0.0.43.dist-info/METADATA +0 -39
- pyxllib-0.0.43.dist-info/RECORD +0 -80
- pyxllib-0.0.43.dist-info/top_level.txt +0 -1
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/util/filelib.py
DELETED
@@ -1,612 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Data : 2018/07/12 09:09
|
6
|
-
|
7
|
-
"""
|
8
|
-
各种文件遍历功能
|
9
|
-
|
10
|
-
这里要强调,推荐os.walk功能
|
11
|
-
"""
|
12
|
-
|
13
|
-
from pyxllib.util.xmllib import *
|
14
|
-
import pyxllib.util.zipfile as zipfile # 重写了标准库的zipfile文件,cp437改为gbk,解决zip中文乱码问题
|
15
|
-
|
16
|
-
# 需要使用的第三方软件
|
17
|
-
# BCompare.exe, bcompare函数要用
|
18
|
-
|
19
|
-
|
20
|
-
____section_1_normal = """
|
21
|
-
一些通用文件、文件夹工具
|
22
|
-
"""
|
23
|
-
|
24
|
-
|
25
|
-
def add_quote(s):
|
26
|
-
return f'"{s}"'
|
27
|
-
|
28
|
-
|
29
|
-
def recreate_folders(*dsts):
|
30
|
-
"""重建一个空目录"""
|
31
|
-
for dst in dsts:
|
32
|
-
try:
|
33
|
-
# 删除一个目录(含内容),设置ignore_errors可以忽略目录不存在时的错误
|
34
|
-
shutil.rmtree(dst, ignore_errors=True)
|
35
|
-
os.makedirs(dst) # 重新新建一个目录,注意可能存在层级关系,所以要用makedirs
|
36
|
-
except TypeError:
|
37
|
-
pass
|
38
|
-
|
39
|
-
|
40
|
-
class UsedRecords:
|
41
|
-
"""存储用户的使用记录到一个文件"""
|
42
|
-
|
43
|
-
def __init__(self, filename, default_value=None, *, use_temp_root=False, limit_num=30):
|
44
|
-
"""记录存储文件
|
45
|
-
:param filename: 文件路径与名称
|
46
|
-
:param default_value:
|
47
|
-
:param use_temp_root: 使用临时文件夹作为根目录
|
48
|
-
:param limit_num: 限制条目上限
|
49
|
-
"""
|
50
|
-
from os.path import join, dirname, basename, exists
|
51
|
-
# 1 文件名处理
|
52
|
-
if use_temp_root:
|
53
|
-
dirname = join(os.getenv('TEMP'), 'code4101py_config')
|
54
|
-
basename = basename(filename)
|
55
|
-
fullname = join(dirname, basename)
|
56
|
-
else:
|
57
|
-
dirname = dirname(filename)
|
58
|
-
basename = basename(filename)
|
59
|
-
fullname = filename
|
60
|
-
|
61
|
-
# 2 读取值
|
62
|
-
if exists(fullname):
|
63
|
-
ls = ensure_content(fullname).splitlines()
|
64
|
-
else:
|
65
|
-
ls = list(default_value)
|
66
|
-
|
67
|
-
# 3 存储到类
|
68
|
-
self.dirname = dirname
|
69
|
-
self.basename = basename
|
70
|
-
self.fullname = fullname
|
71
|
-
self.ls = ls
|
72
|
-
self.limit_num = limit_num
|
73
|
-
|
74
|
-
def save(self):
|
75
|
-
"""保存记录文件"""
|
76
|
-
Path(self.dirname + '/').ensure_dir()
|
77
|
-
Path(self.fullname).write('\n'.join(self.ls), if_exists='replace')
|
78
|
-
|
79
|
-
def add(self, s):
|
80
|
-
"""新增一个使用方法
|
81
|
-
如果s在self.ls里,则把方法前置到第一条
|
82
|
-
否则在第一条添加新方法
|
83
|
-
|
84
|
-
如果总条数超过30要进行删减
|
85
|
-
"""
|
86
|
-
if s in self.ls:
|
87
|
-
del self.ls[self.ls.index(s)]
|
88
|
-
|
89
|
-
self.ls = [s] + list(self.ls)
|
90
|
-
|
91
|
-
if len(self.ls) > self.limit_num:
|
92
|
-
self.ls = self.ls[:self.limit_num]
|
93
|
-
|
94
|
-
def __str__(self):
|
95
|
-
res = list()
|
96
|
-
res.append(self.fullname)
|
97
|
-
for t in self.ls:
|
98
|
-
res.append(t)
|
99
|
-
return '\n'.join(res)
|
100
|
-
|
101
|
-
|
102
|
-
def checkpathfile(name):
|
103
|
-
r"""判断环境变量path下是否有name这个文件,有则返回绝对路径,无则返回None
|
104
|
-
常用的有:BCompare.exe、Chrome.exe、mogrify.exe、xelatex.exe
|
105
|
-
|
106
|
-
>> checkpathfile('xelatex.exe')
|
107
|
-
'C:\\CTEX\\MiKTeX\\miktex\\bin\\xelatex.exe'
|
108
|
-
>> checkpathfile('abcd.exe')
|
109
|
-
"""
|
110
|
-
for path in os.getenv('path').split(';'):
|
111
|
-
fn = os.path.join(path, name)
|
112
|
-
if os.path.exists(fn):
|
113
|
-
return fn
|
114
|
-
return None
|
115
|
-
|
116
|
-
|
117
|
-
def filename_tail(fn, tail):
|
118
|
-
"""在文件名末尾和扩展名前面加上一个tail"""
|
119
|
-
names = os.path.splitext(fn)
|
120
|
-
return names[0] + tail + names[1]
|
121
|
-
|
122
|
-
|
123
|
-
def hasext(f, *exts):
|
124
|
-
"""判断文件f是否是exts扩展名中的一种,如果不是返回False,否则返回对应的值
|
125
|
-
|
126
|
-
所有文件名统一按照小写处理
|
127
|
-
"""
|
128
|
-
ext = os.path.splitext(f)[1].lower()
|
129
|
-
exts = tuple(map(lambda x: x.lower(), exts))
|
130
|
-
if ext in exts:
|
131
|
-
return ext
|
132
|
-
else:
|
133
|
-
return False
|
134
|
-
|
135
|
-
|
136
|
-
def isdir(fn):
|
137
|
-
"""判断输入的是不是合法的路径格式,且存在确实是一个文件夹"""
|
138
|
-
try:
|
139
|
-
return os.path.isdir(fn)
|
140
|
-
except ValueError: # 出现文件名过长的问题
|
141
|
-
return False
|
142
|
-
except TypeError: # 输入不是字符串类型
|
143
|
-
return False
|
144
|
-
|
145
|
-
|
146
|
-
____section_4_mygetfiles = """
|
147
|
-
py有os.walk可以递归遍历得到一个目录下的所有文件
|
148
|
-
但是“我们”常常要过滤掉备份文件(171020-153959),Old、temp目、.git等目录
|
149
|
-
特别是windows还有一个很坑爹的$RECYCLE.BIN目录。
|
150
|
-
所以在os.walk的基础上,再做了封装得到myoswalk。
|
151
|
-
|
152
|
-
然后在myoswalk基础上,实现mygetfiles。
|
153
|
-
"""
|
154
|
-
|
155
|
-
|
156
|
-
def gen_file_filter(s):
|
157
|
-
"""生成一个文件名过滤函数"""
|
158
|
-
if s[0] == '.':
|
159
|
-
return lambda x: x.endswith(s)
|
160
|
-
else:
|
161
|
-
s = s.replace('?', r'[\u4e00-\u9fa5]') # 中文问号可以匹配任意中文字符
|
162
|
-
return lambda x: re.search(s, x)
|
163
|
-
|
164
|
-
|
165
|
-
def myoswalk(root, filter_rule=None, recur=True):
|
166
|
-
"""
|
167
|
-
:param root: 根目录
|
168
|
-
:param filter_rule:
|
169
|
-
字符串
|
170
|
-
以点.开头的,统一认为是进行后缀格式识别
|
171
|
-
其他字符串类型会认为是一个正则规则,只要相对root的全名能search到规则即认为匹配
|
172
|
-
可以将中文问号用于匹配任意汉字
|
173
|
-
也可以输入自定义函数: 输入参数是相对root目录下的文件全名
|
174
|
-
:param recur: 是否进行子文件夹递归
|
175
|
-
:return:
|
176
|
-
"""
|
177
|
-
if isinstance(filter_rule, str):
|
178
|
-
filter_rule = gen_file_filter(filter_rule)
|
179
|
-
|
180
|
-
# prefix_len = len(root) # 计算出前缀长度
|
181
|
-
for dirpath, dirnames, filenames in os.walk(root):
|
182
|
-
# relative_root = dirpath[prefix_len+1:] # 我想返回相对路径,但是好像不太规范会对很多东西造成麻烦
|
183
|
-
# 过滤掉特殊目录
|
184
|
-
for t in ('.git', '$RECYCLE.BIN', '__pycache__', 'temp', 'Old', 'old'):
|
185
|
-
try:
|
186
|
-
del dirnames[dirnames.index(t)]
|
187
|
-
except ValueError:
|
188
|
-
pass
|
189
|
-
# 去掉备份文件
|
190
|
-
dirnames = list(filter(lambda x: not Path(x).backup_time and '-冲突-' not in x, dirnames))
|
191
|
-
filenames = list(filter(lambda x: not Path(x).backup_time and '-冲突-' not in x, filenames))
|
192
|
-
|
193
|
-
# 调用特殊过滤规则
|
194
|
-
if filter_rule:
|
195
|
-
dirnames = list(filter(lambda x: filter_rule(f'{dirpath}\\{x}'), dirnames))
|
196
|
-
filenames = list(filter(lambda x: filter_rule(f'{dirpath}\\{x}'), filenames))
|
197
|
-
|
198
|
-
# 如果该文件夹下已经没有文件,不返回该目录
|
199
|
-
if not (filenames or dirnames):
|
200
|
-
continue
|
201
|
-
|
202
|
-
# 返回生成结果
|
203
|
-
yield dirpath, dirnames, filenames
|
204
|
-
|
205
|
-
if not recur: # 不进行递归
|
206
|
-
break
|
207
|
-
|
208
|
-
|
209
|
-
def getfiles(root, filter_rule=None):
|
210
|
-
"""对os.walk进一步封装,返回所有匹配的文件
|
211
|
-
|
212
|
-
可以这样遍历一个目录下的所有文件:
|
213
|
-
for f in getfiles(r'C:\pycode\code4101py', r'.py'):
|
214
|
-
print(f)
|
215
|
-
筛选规则除了“.+后缀”,还可以写正则匹配
|
216
|
-
"""
|
217
|
-
if isinstance(filter_rule, str):
|
218
|
-
filter_rule = gen_file_filter(filter_rule)
|
219
|
-
|
220
|
-
for root, _, files in os.walk(root, filter_rule):
|
221
|
-
for f in files:
|
222
|
-
if filter_rule and not filter_rule(f):
|
223
|
-
continue
|
224
|
-
yield root + '\\' + f
|
225
|
-
|
226
|
-
|
227
|
-
def tex_content_filefilter(f):
|
228
|
-
"""只获取正文类tex文件"""
|
229
|
-
if f.endswith('.tex') and 'Conf' not in f and 'settings' not in f:
|
230
|
-
return True
|
231
|
-
else:
|
232
|
-
return False
|
233
|
-
|
234
|
-
|
235
|
-
def tex_conf_filefilter(f):
|
236
|
-
"""只获取配置类tex文件"""
|
237
|
-
if f.endswith('.tex') and ('Conf' in f or 'settings' in f):
|
238
|
-
return True
|
239
|
-
else:
|
240
|
-
return False
|
241
|
-
|
242
|
-
|
243
|
-
def mygetfiles(root, filter_rule=None, recur=True):
|
244
|
-
"""对myoswalk进一步封装,返回所有匹配的文件
|
245
|
-
会递归查找所有子文件
|
246
|
-
|
247
|
-
可以这样遍历一个目录下的所有文件:
|
248
|
-
for f in mygetfiles(r'C:\pycode\code4101py', r'.py'):
|
249
|
-
print(f)
|
250
|
-
这个函数已经自动过滤掉备份文件了
|
251
|
-
筛选规则除了“.+后缀”,还可以写正则匹配
|
252
|
-
|
253
|
-
参数含义详见myoswalk
|
254
|
-
"""
|
255
|
-
for root, _, files in myoswalk(root, filter_rule, recur):
|
256
|
-
for f in files:
|
257
|
-
yield root + '\\' + f
|
258
|
-
|
259
|
-
|
260
|
-
def _test_getfile_speed():
|
261
|
-
"""
|
262
|
-
遍历D盘所有文件(205066个) 用时0.65秒
|
263
|
-
遍历D盘所有tex文件(7796个) 用时0.95秒
|
264
|
-
有筛选遍历D盘所有文件(193161个) 用时1.19秒
|
265
|
-
有筛选遍历D盘所有tex文件(4464个) 用时1.22秒
|
266
|
-
+ EnsureContent: 3.18秒,用list存储所有文本要 310 MB 开销,转str拼接差不多也是这个值
|
267
|
-
+ re.sub(r'\$.*?\$', r'', s): 4.48秒
|
268
|
-
"""
|
269
|
-
timer = Timer(start_now=True)
|
270
|
-
ls = list(getfiles(r'D:\\'))
|
271
|
-
timer.stop_and_report(f'遍历D盘所有文件({len(ls)}个)')
|
272
|
-
|
273
|
-
timer = Timer(start_now=True)
|
274
|
-
ls = list(getfiles(r'D:\\', '.tex'))
|
275
|
-
timer.stop_and_report(f'遍历D盘所有tex文件({len(ls)}个)')
|
276
|
-
|
277
|
-
timer = Timer(start_now=True)
|
278
|
-
ls = list(mygetfiles(r'D:\\'))
|
279
|
-
timer.stop_and_report(f'有筛选遍历D盘所有文件({len(ls)}个)')
|
280
|
-
|
281
|
-
timer = Timer(start_now=True)
|
282
|
-
ls = list(mygetfiles(r'D:\\', '.tex'))
|
283
|
-
timer.stop_and_report(f'有筛选遍历D盘所有tex文件({len(ls)}个)')
|
284
|
-
|
285
|
-
|
286
|
-
____section_5_filedfs = """
|
287
|
-
对目录的遍历查看目录结构
|
288
|
-
"""
|
289
|
-
|
290
|
-
|
291
|
-
def file_generator(f):
|
292
|
-
"""普通文件迭代生成器
|
293
|
-
:param f: 搜索目录
|
294
|
-
"""
|
295
|
-
if os.path.isdir(f):
|
296
|
-
try:
|
297
|
-
dirpath, dirnames, filenames = myoswalk(f).__next__()
|
298
|
-
except StopIteration:
|
299
|
-
return []
|
300
|
-
|
301
|
-
ls = filenames + dirnames
|
302
|
-
ls = map(lambda x: dirpath + '/' + x, ls)
|
303
|
-
return ls
|
304
|
-
else:
|
305
|
-
return []
|
306
|
-
|
307
|
-
|
308
|
-
def pyfile_generator(f):
|
309
|
-
"""py文件迭代生成器
|
310
|
-
:param f: 搜索目录
|
311
|
-
"""
|
312
|
-
if os.path.isdir(f):
|
313
|
-
try:
|
314
|
-
dirpath, dirnames, filenames = myoswalk(f).__next__()
|
315
|
-
except StopIteration:
|
316
|
-
return []
|
317
|
-
filenames = list(filter(lambda x: x.endswith('.py'), filenames))
|
318
|
-
ls = filenames + dirnames
|
319
|
-
ls = map(lambda x: dirpath + '/' + x, ls)
|
320
|
-
return ls
|
321
|
-
else:
|
322
|
-
return []
|
323
|
-
|
324
|
-
|
325
|
-
def texfile_generator(f):
|
326
|
-
"""tex 文件迭代生成器
|
327
|
-
:param f: 搜索目录
|
328
|
-
"""
|
329
|
-
if os.path.isdir(f):
|
330
|
-
try:
|
331
|
-
dirpath, dirnames, filenames = myoswalk(f).__next__()
|
332
|
-
except StopIteration:
|
333
|
-
return []
|
334
|
-
|
335
|
-
filenames = list(filter(lambda x: x.endswith('.tex'), filenames))
|
336
|
-
ls = filenames + dirnames
|
337
|
-
ls = map(lambda x: dirpath + '/' + x, ls)
|
338
|
-
return ls
|
339
|
-
else:
|
340
|
-
return []
|
341
|
-
|
342
|
-
|
343
|
-
def file_str(f):
|
344
|
-
"""
|
345
|
-
:param f: 输入完整路径的文件夹或文件名
|
346
|
-
:return: 返回简化的名称
|
347
|
-
a/b ==> <b>
|
348
|
-
a/b.txt ==> b.txt
|
349
|
-
"""
|
350
|
-
name = os.path.basename(f)
|
351
|
-
if os.path.isdir(f):
|
352
|
-
s = '<' + name + '>'
|
353
|
-
else:
|
354
|
-
s = name
|
355
|
-
return s
|
356
|
-
|
357
|
-
|
358
|
-
def filedfs(root,
|
359
|
-
child_generator=file_generator, select_depth=None, linenum=True,
|
360
|
-
mystr=file_str, msghead=True, lsstr=None, show_node_type=False, prefix='\t'):
|
361
|
-
"""对文件结构的递归遍历
|
362
|
-
注意这里的子节点生成器有对非常多特殊情况进行过滤,并不是通用的文件夹查看工具
|
363
|
-
"""
|
364
|
-
if isinstance(child_generator, str):
|
365
|
-
if child_generator == '.py':
|
366
|
-
child_generator = pyfile_generator
|
367
|
-
elif child_generator == '.tex':
|
368
|
-
child_generator = texfile_generator
|
369
|
-
else:
|
370
|
-
raise ValueError
|
371
|
-
|
372
|
-
return dfs_base(root, child_generator=child_generator, select_depth=select_depth, linenum=linenum,
|
373
|
-
mystr=mystr, msghead=msghead, lsstr=lsstr, show_node_type=show_node_type, prefix=prefix)
|
374
|
-
|
375
|
-
|
376
|
-
____section_6_viewfiles = """
|
377
|
-
使用外部程序查看文件
|
378
|
-
"""
|
379
|
-
|
380
|
-
|
381
|
-
def genfilename(fd='.'):
|
382
|
-
"""生成一个fd目录下的文件名
|
383
|
-
注意只是文件名,并未实际产生文件,输入目录是为了防止生成重名文件(以basename为标准的无重名)
|
384
|
-
|
385
|
-
格式为:180827周一195802,如果出现重名,前面的6位记为数值d1,是年份+月份+日期的标签
|
386
|
-
后面的6位记为数值d2,类似小时+分钟+秒的标签,但是在出现重名时,
|
387
|
-
d2会一直自加1直到没有重名文件,所以秒上是可能会出现“99”之类的值的。
|
388
|
-
"""
|
389
|
-
# 1 获取前段标签
|
390
|
-
s1 = Datetime().briefdateweek() # '180827周一'
|
391
|
-
|
392
|
-
# 2 获取后端数值标签
|
393
|
-
d2 = int(datetime.datetime.now().strftime('%H%M%S'))
|
394
|
-
|
395
|
-
# 3 获取目录下文件,并迭代确保生成一个不重名文件
|
396
|
-
ls = os.listdir(fd)
|
397
|
-
files = set(map(lambda x: os.path.basename(os.path.splitext(x)[0]), ls)) # 收集basename
|
398
|
-
|
399
|
-
while s1 + str(d2) in files:
|
400
|
-
d2 += 1
|
401
|
-
|
402
|
-
return s1 + str(d2)
|
403
|
-
|
404
|
-
|
405
|
-
____section_7_PackFile = """
|
406
|
-
处理压缩文件
|
407
|
-
"""
|
408
|
-
|
409
|
-
|
410
|
-
class PackFile:
|
411
|
-
def __init__(self, file, mode=None):
|
412
|
-
"""
|
413
|
-
:param file: 要处理的文件
|
414
|
-
:param mode: 要处理的格式,不输入会有一套智能匹配算法
|
415
|
-
'rar':
|
416
|
-
'zip': docx后缀的,默认采用zip格式解压
|
417
|
-
"""
|
418
|
-
# 1 确定压缩格式
|
419
|
-
name, ext = os.path.splitext(file)
|
420
|
-
ext = ext.lower()
|
421
|
-
if not mode:
|
422
|
-
if ext in ('.docx', '.zip'):
|
423
|
-
mode = 'zip'
|
424
|
-
elif ext == '.rar':
|
425
|
-
mode = 'rar'
|
426
|
-
else:
|
427
|
-
dprint(ext) # 从文件扩展名无法得知压缩格式
|
428
|
-
raise ValueError
|
429
|
-
self.mode = mode
|
430
|
-
|
431
|
-
# 2 确定是用的解压“引擎”
|
432
|
-
if mode == 'zip':
|
433
|
-
self.proc = zipfile.ZipFile(file)
|
434
|
-
elif mode == 'rar':
|
435
|
-
try:
|
436
|
-
from unrar.rarfile import RarFile
|
437
|
-
except ModuleNotFoundError:
|
438
|
-
dprint() # 缺少unrar模块,安装详见: https://blog.csdn.net/code4101/article/details/79328636
|
439
|
-
raise ModuleNotFoundError
|
440
|
-
self.proc = RarFile(file)
|
441
|
-
# 3 解压文件夹目录,None表示还未解压
|
442
|
-
self.tempfolder = None
|
443
|
-
|
444
|
-
def open(self, member, pwd=None):
|
445
|
-
"""Return file-like object for 'member'.
|
446
|
-
|
447
|
-
'member' may be a filename or a RarInfo object.
|
448
|
-
"""
|
449
|
-
return self.proc.open(member, pwd)
|
450
|
-
|
451
|
-
def read(self, member, pwd=None):
|
452
|
-
"""Return file bytes (as a string) for name."""
|
453
|
-
return self.proc.read(member, pwd)
|
454
|
-
|
455
|
-
def namelist(self):
|
456
|
-
""">> self.namelist() # 获得文件清单列表
|
457
|
-
1 [Content_Types].xml
|
458
|
-
2 _rels/.rels
|
459
|
-
......
|
460
|
-
20 word/fontTable.xml
|
461
|
-
21 docProps/app.xml
|
462
|
-
"""
|
463
|
-
return self.proc.namelist()
|
464
|
-
|
465
|
-
def setpassword(self, pwd):
|
466
|
-
"""Set default password for encrypted files."""
|
467
|
-
return self.proc.setpassword(pwd)
|
468
|
-
|
469
|
-
def getinfo(self, name):
|
470
|
-
"""
|
471
|
-
>> self.getinfo('word/document.xml') # 获得某个文件的信息
|
472
|
-
<ZipInfo filename='word/document.xml' compress_type=deflate file_size=140518 compress_size=10004>
|
473
|
-
"""
|
474
|
-
return self.proc.getinfo(name)
|
475
|
-
|
476
|
-
def infolist(self, prefix=None, zipinfo=True):
|
477
|
-
""">> self.infolist() # getinfo的多文件版本
|
478
|
-
1 <ZipInfo filename='[Content_Types].xml' compress_type=deflate file_size=1495 compress_size=383>
|
479
|
-
2 <ZipInfo filename='_rels/.rels' compress_type=deflate file_size=590 compress_size=243>
|
480
|
-
......
|
481
|
-
20 <ZipInfo filename='word/fontTable.xml' compress_type=deflate file_size=1590 compress_size=521>
|
482
|
-
21 <ZipInfo filename='docProps/app.xml' compress_type=deflate file_size=720 compress_size=384>
|
483
|
-
|
484
|
-
:param prefix:
|
485
|
-
可以筛选文件的前缀,例如“word/”可以筛选出word目录下的
|
486
|
-
:param zipinfo:
|
487
|
-
返回的list每个元素是zipinfo数据类型
|
488
|
-
"""
|
489
|
-
ls = self.proc.infolist()
|
490
|
-
if prefix:
|
491
|
-
ls = list(filter(lambda t: t.filename.startswith(prefix), ls))
|
492
|
-
if not zipinfo:
|
493
|
-
ls = list(map(lambda x: x.filename, ls))
|
494
|
-
return ls
|
495
|
-
|
496
|
-
def printdir(self):
|
497
|
-
"""Print a table of contents for the RAR file."""
|
498
|
-
return self.proc.printdir()
|
499
|
-
|
500
|
-
def testrar(self):
|
501
|
-
"""Read all the files and check the CRC."""
|
502
|
-
return self.proc.testrar()
|
503
|
-
|
504
|
-
def extract(self, member, path=None, pwd=None):
|
505
|
-
"""注意,如果写extract('word/document.xml', 'a'),那么提取出来的文件是在'a/word/document.xml'
|
506
|
-
"""
|
507
|
-
return self.proc.extract(member, path, pwd)
|
508
|
-
|
509
|
-
def extractall(self, path=None, members=None, pwd=None):
|
510
|
-
"""Extract all members from the archive to the current working
|
511
|
-
directory. `path' specifies a different directory to extract to.
|
512
|
-
`members' is optional and must be a subset of the list returned
|
513
|
-
by namelist().
|
514
|
-
"""
|
515
|
-
return self.proc.extractall(path, members, pwd)
|
516
|
-
|
517
|
-
def extractall2tempfolder(self):
|
518
|
-
"""将文件解压到一个临时文件夹,并返回临时文件夹目录"""
|
519
|
-
if not self.tempfolder:
|
520
|
-
self.tempfolder = tempfile.mkdtemp()
|
521
|
-
self.proc.extractall(path=self.tempfolder)
|
522
|
-
return self.tempfolder
|
523
|
-
|
524
|
-
def clear_tempfolder(self):
|
525
|
-
"""删除创建的临时文件夹内容"""
|
526
|
-
filesdel(self.tempfolder)
|
527
|
-
|
528
|
-
def __enter__(self):
|
529
|
-
"""使用with ... as ...语法能自动建立解压目录和删除
|
530
|
-
注意:这里返回的不是PackFile对象,而是解压后的目录
|
531
|
-
"""
|
532
|
-
path = self.extractall2tempfolder()
|
533
|
-
return path
|
534
|
-
|
535
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
536
|
-
self.clear_tempfolder()
|
537
|
-
|
538
|
-
|
539
|
-
____section_temp = """
|
540
|
-
临时添加的新功能
|
541
|
-
"""
|
542
|
-
|
543
|
-
|
544
|
-
def filetext_replace(files, func, *,
|
545
|
-
count=-1, start=1, bc=False, write=False, if_exists=None):
|
546
|
-
r"""遍历目录下的文本文件进行批量处理的功能函数
|
547
|
-
|
548
|
-
:param files: 文件匹配规则,详见filesmatch用法
|
549
|
-
:param func: 通用文本处理函数
|
550
|
-
:param count: 匹配到count个文件后结束,防止满足条件的文件太多,程序会跑死
|
551
|
-
:param start: 从编号几的文件开始查找,一般在遇到意外调试的时候使用
|
552
|
-
:param bc: 使用beyond compare软件
|
553
|
-
注意bc的优先级比write高,如果bc和write同时为True,则会开bc,但并不会执行write
|
554
|
-
:param write: 是否原地修改文件内容进行保存
|
555
|
-
:param if_exists: 是否进行备份,详见writefile里的参数文件
|
556
|
-
:return: 满足条件的文件清单
|
557
|
-
"""
|
558
|
-
ls = []
|
559
|
-
total = 0
|
560
|
-
for f in filesmatch(files):
|
561
|
-
# if 'A4-Exam' in f:
|
562
|
-
# continue
|
563
|
-
total += 1
|
564
|
-
if total < start:
|
565
|
-
continue
|
566
|
-
s0 = Path(f).read()
|
567
|
-
s1 = func(s0)
|
568
|
-
if s0 != s1:
|
569
|
-
match = len(ls) + 1
|
570
|
-
dprint(f, total, match)
|
571
|
-
if bc:
|
572
|
-
bcompare(f, s1)
|
573
|
-
elif write: # 如果开了bc,程序是绝对不会自动写入的
|
574
|
-
Path(f).write(s1, if_exists=if_exists)
|
575
|
-
ls.append(f)
|
576
|
-
if len(ls) == count:
|
577
|
-
break
|
578
|
-
|
579
|
-
match_num = len(ls)
|
580
|
-
dprint(total, match_num)
|
581
|
-
return ls
|
582
|
-
|
583
|
-
|
584
|
-
def change_ext(filename, ext):
|
585
|
-
"""更改文件名后缀
|
586
|
-
返回第1个参数是新的文件名,第2个参数是这个文件是否存在
|
587
|
-
|
588
|
-
输入的fileName可以没有扩展名,如'A/B/C/a',仍然可以找对应的扩展名为ext的文件
|
589
|
-
输入的ext不要含有'.',例如正确格式是输入'tex'、'txt'
|
590
|
-
"""
|
591
|
-
name = os.path.splitext(filename)[0] # 'A/B/C/a.txt' --> 'A/B/C/a'
|
592
|
-
newname = name + '.' + ext
|
593
|
-
return newname, os.path.exists(newname)
|
594
|
-
|
595
|
-
|
596
|
-
def download_file(url, fn=None, *, encoding=None, if_exists=None, ext=None, temp=False):
|
597
|
-
"""类似writefile,只是源数据是从url里下载
|
598
|
-
:param url: 数据下载链接
|
599
|
-
:param fn: 保存位置,会从url智能提取文件名
|
600
|
-
:param if_exists: 详见writefile参数解释
|
601
|
-
:para temp: 将文件写到临时文件夹
|
602
|
-
:return:
|
603
|
-
"""
|
604
|
-
if not fn: fn = url.split('/')[-1]
|
605
|
-
root = Path.TEMP if temp else None
|
606
|
-
fn = Path(fn, ext, root).write(requests.get(url).content,
|
607
|
-
encoding=encoding, if_exists=if_exists, etag=(not fn))
|
608
|
-
return fn.fullpath
|
609
|
-
|
610
|
-
|
611
|
-
____other = """
|
612
|
-
"""
|
pyxllib/util/jsondata.py
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Data : 2020/08/15 00:59
|
6
|
-
|
7
|
-
from pyxllib.basic import get_encoding, Path
|
8
|
-
|
9
|
-
____labelme_json = """
|
10
|
-
"""
|
11
|
-
|
12
|
-
|
13
|
-
def is_labelme_json_data(data):
|
14
|
-
""" 是labelme的标注格式
|
15
|
-
:param data: dict
|
16
|
-
:return: True or False
|
17
|
-
"""
|
18
|
-
has_keys = set('version flags shapes imagePath imageData imageHeight imageWidth'.split())
|
19
|
-
return not (has_keys - data.keys())
|
20
|
-
|
21
|
-
|
22
|
-
def reduce_labelme_jsonfile(jsonpath):
|
23
|
-
p = Path(jsonpath)
|
24
|
-
data = p.read(mode='.json')
|
25
|
-
if is_labelme_json_data(data) and data['imageData']:
|
26
|
-
data['imageData'] = None
|
27
|
-
p.write(data, encoding=p.encoding, if_exists='replace')
|