pyxllib 0.3.197__py3-none-any.whl → 3.201.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +14 -21
- pyxllib/algo/__init__.py +8 -8
- pyxllib/algo/disjoint.py +54 -54
- pyxllib/algo/geo.py +537 -541
- pyxllib/algo/intervals.py +964 -964
- pyxllib/algo/matcher.py +389 -389
- pyxllib/algo/newbie.py +166 -166
- pyxllib/algo/pupil.py +629 -629
- pyxllib/algo/shapelylib.py +67 -67
- pyxllib/algo/specialist.py +241 -241
- pyxllib/algo/stat.py +494 -494
- pyxllib/algo/treelib.py +145 -149
- pyxllib/algo/unitlib.py +62 -66
- pyxllib/autogui/__init__.py +5 -5
- pyxllib/autogui/activewin.py +246 -246
- pyxllib/autogui/all.py +9 -9
- pyxllib/autogui/autogui.py +846 -852
- pyxllib/autogui/uiautolib.py +362 -362
- pyxllib/autogui/virtualkey.py +102 -102
- pyxllib/autogui/wechat.py +827 -827
- pyxllib/autogui/wechat_msg.py +421 -421
- pyxllib/autogui/wxautolib.py +84 -84
- pyxllib/cv/__init__.py +5 -5
- pyxllib/cv/expert.py +267 -267
- pyxllib/cv/imfile.py +159 -159
- pyxllib/cv/imhash.py +39 -39
- pyxllib/cv/pupil.py +9 -9
- pyxllib/cv/rgbfmt.py +1525 -1525
- pyxllib/cv/slidercaptcha.py +137 -137
- pyxllib/cv/trackbartools.py +251 -251
- pyxllib/cv/xlcvlib.py +1040 -1040
- pyxllib/cv/xlpillib.py +423 -423
- pyxllib/data/echarts.py +236 -240
- pyxllib/data/jsonlib.py +85 -89
- pyxllib/data/oss.py +72 -72
- pyxllib/data/pglib.py +1111 -1127
- pyxllib/data/sqlite.py +568 -568
- pyxllib/data/sqllib.py +297 -297
- pyxllib/ext/JLineViewer.py +505 -505
- pyxllib/ext/__init__.py +6 -6
- pyxllib/ext/demolib.py +251 -246
- pyxllib/ext/drissionlib.py +277 -277
- pyxllib/ext/kq5034lib.py +12 -12
- pyxllib/ext/qt.py +449 -449
- pyxllib/ext/robustprocfile.py +493 -497
- pyxllib/ext/seleniumlib.py +76 -76
- pyxllib/ext/tk.py +173 -173
- pyxllib/ext/unixlib.py +821 -827
- pyxllib/ext/utools.py +345 -351
- pyxllib/ext/webhook.py +124 -119
- pyxllib/ext/win32lib.py +40 -40
- pyxllib/ext/wjxlib.py +91 -88
- pyxllib/ext/wpsapi.py +124 -124
- pyxllib/ext/xlwork.py +9 -9
- pyxllib/ext/yuquelib.py +1110 -1105
- pyxllib/file/__init__.py +17 -17
- pyxllib/file/docxlib.py +757 -761
- pyxllib/file/gitlib.py +309 -309
- pyxllib/file/libreoffice.py +165 -165
- pyxllib/file/movielib.py +144 -148
- pyxllib/file/newbie.py +10 -10
- pyxllib/file/onenotelib.py +1469 -1469
- pyxllib/file/packlib/__init__.py +330 -330
- pyxllib/file/packlib/zipfile.py +2441 -2441
- pyxllib/file/pdflib.py +422 -426
- pyxllib/file/pupil.py +185 -185
- pyxllib/file/specialist/__init__.py +681 -685
- pyxllib/file/specialist/dirlib.py +799 -799
- pyxllib/file/specialist/download.py +193 -193
- pyxllib/file/specialist/filelib.py +2825 -2829
- pyxllib/file/xlsxlib.py +3122 -3131
- pyxllib/file/xlsyncfile.py +341 -341
- pyxllib/prog/__init__.py +5 -5
- pyxllib/prog/cachetools.py +58 -64
- pyxllib/prog/deprecatedlib.py +233 -233
- pyxllib/prog/filelock.py +42 -42
- pyxllib/prog/ipyexec.py +253 -253
- pyxllib/prog/multiprogs.py +940 -940
- pyxllib/prog/newbie.py +451 -451
- pyxllib/prog/pupil.py +1208 -1197
- pyxllib/prog/sitepackages.py +33 -33
- pyxllib/prog/specialist/__init__.py +348 -391
- pyxllib/prog/specialist/bc.py +203 -203
- pyxllib/prog/specialist/browser.py +497 -497
- pyxllib/prog/specialist/common.py +347 -347
- pyxllib/prog/specialist/datetime.py +198 -198
- pyxllib/prog/specialist/tictoc.py +240 -240
- pyxllib/prog/specialist/xllog.py +180 -180
- pyxllib/prog/xlosenv.py +110 -108
- pyxllib/stdlib/__init__.py +17 -17
- pyxllib/stdlib/tablepyxl/__init__.py +10 -10
- pyxllib/stdlib/tablepyxl/style.py +303 -303
- pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
- pyxllib/text/__init__.py +8 -8
- pyxllib/text/ahocorasick.py +36 -39
- pyxllib/text/airscript.js +754 -744
- pyxllib/text/charclasslib.py +121 -121
- pyxllib/text/jiebalib.py +267 -267
- pyxllib/text/jinjalib.py +27 -32
- pyxllib/text/jsa_ai_prompt.md +271 -271
- pyxllib/text/jscode.py +922 -922
- pyxllib/text/latex/__init__.py +158 -158
- pyxllib/text/levenshtein.py +303 -303
- pyxllib/text/nestenv.py +1215 -1215
- pyxllib/text/newbie.py +300 -300
- pyxllib/text/pupil/__init__.py +8 -8
- pyxllib/text/pupil/common.py +1121 -1121
- pyxllib/text/pupil/xlalign.py +326 -326
- pyxllib/text/pycode.py +47 -47
- pyxllib/text/specialist/__init__.py +8 -8
- pyxllib/text/specialist/common.py +112 -112
- pyxllib/text/specialist/ptag.py +186 -186
- pyxllib/text/spellchecker.py +172 -172
- pyxllib/text/templates/echart_base.html +10 -10
- pyxllib/text/templates/highlight_code.html +16 -16
- pyxllib/text/templates/latex_editor.html +102 -102
- pyxllib/text/vbacode.py +17 -17
- pyxllib/text/xmllib.py +741 -747
- pyxllib/xl.py +42 -39
- pyxllib/xlcv.py +17 -17
- pyxllib-3.201.1.dist-info/METADATA +296 -0
- pyxllib-3.201.1.dist-info/RECORD +125 -0
- {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/licenses/LICENSE +190 -190
- pyxllib/ext/old.py +0 -663
- pyxllib-0.3.197.dist-info/METADATA +0 -48
- pyxllib-0.3.197.dist-info/RECORD +0 -126
- {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/WHEEL +0 -0
@@ -1,497 +1,497 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2020/05/30 22:43
|
6
|
-
|
7
|
-
import builtins
|
8
|
-
import enum
|
9
|
-
import html
|
10
|
-
import inspect
|
11
|
-
import os
|
12
|
-
import subprocess
|
13
|
-
import sys
|
14
|
-
import datetime
|
15
|
-
import platform
|
16
|
-
import re
|
17
|
-
|
18
|
-
import pandas as pd
|
19
|
-
from bs4 import BeautifulSoup
|
20
|
-
|
21
|
-
from pyxllib.prog.newbie import typename
|
22
|
-
from pyxllib.prog.pupil import dprint, func_input_message, is_url, is_file
|
23
|
-
from pyxllib.prog.specialist.common import TypeConvert, NestedDict, KeyValuesCounter, dataframe_str
|
24
|
-
from pyxllib.prog.specialist.tictoc import TicToc
|
25
|
-
from pyxllib.text.pupil import ensure_gbk, shorten
|
26
|
-
from pyxllib.file.specialist.dirlib import File, Dir, get_etag, XlPath
|
27
|
-
|
28
|
-
|
29
|
-
def getasizeof(*objs, **opts):
|
30
|
-
"""获得所有类的大小,底层用pympler.asizeof实现"""
|
31
|
-
from pympler import asizeof
|
32
|
-
|
33
|
-
try:
|
34
|
-
res = asizeof.asizeof(*objs, **opts)
|
35
|
-
# except TypeError: # sqlalchemy.exc.InvalidRequestError
|
36
|
-
except:
|
37
|
-
res = -1
|
38
|
-
return res
|
39
|
-
|
40
|
-
|
41
|
-
def viewfiles(procname, *files, **kwargs):
|
42
|
-
""" 调用procname相关的文件程序打开files
|
43
|
-
|
44
|
-
:param procname: 程序名
|
45
|
-
:param files: 一个文件名参数清单,每一个都是文件路径,或者是字符串等可以用writefile转成文件的路径
|
46
|
-
:param kwargs:
|
47
|
-
save: 如果True,则会按时间保存文件名;否则采用特定名称,每次运行就会把上次的覆盖掉
|
48
|
-
wait: 是否等待当前进程结束后,再运行后续py代码
|
49
|
-
filename: 控制写入的文件名
|
50
|
-
TODO:根据不同软件,这里还可以扩展很多功能
|
51
|
-
:param kwargs:
|
52
|
-
wait:
|
53
|
-
True:在同一个进程中执行子程序,即会等待bc退出后,再进入下一步
|
54
|
-
False:在新的进程中执行子程序
|
55
|
-
|
56
|
-
细节:注意bc跟其他程序有比较大不同,建议使用专用的bcompare函数
|
57
|
-
目前已知可以扩展多文件的有:chrome、notepad++、texstudio
|
58
|
-
|
59
|
-
>> ls = list(range(100))
|
60
|
-
>> viewfiles('notepad++', ls, save=True)
|
61
|
-
"""
|
62
|
-
# 1 生成文件名
|
63
|
-
ls = [] # 将最终所有绝对路径文件名存储到ls
|
64
|
-
save = kwargs.get('save')
|
65
|
-
|
66
|
-
basename = ext = None
|
67
|
-
if 'filename' in kwargs and kwargs['filename']:
|
68
|
-
basename, ext = os.path.splitext(kwargs['filename'])
|
69
|
-
|
70
|
-
for i, t in enumerate(files):
|
71
|
-
if File(t) or is_url(t):
|
72
|
-
ls.append(str(t))
|
73
|
-
else:
|
74
|
-
bn = basename or ...
|
75
|
-
ls.append(File(bn, Dir.TEMP, suffix=ext).write(t, if_exists=kwargs.get('if_exists', 'error')).to_str())
|
76
|
-
|
77
|
-
# 2 调用程序(并计算外部操作时间)
|
78
|
-
tictoc = TicToc()
|
79
|
-
try:
|
80
|
-
if kwargs.get('wait'):
|
81
|
-
subprocess.run([procname, *ls])
|
82
|
-
else:
|
83
|
-
subprocess.Popen([procname, *ls])
|
84
|
-
except FileNotFoundError:
|
85
|
-
if procname in ('chrome', 'chrome.exe'):
|
86
|
-
procname = 'explorer' # 如果是谷歌浏览器找不到,尝试用系统默认浏览器
|
87
|
-
viewfiles(procname, *files, **kwargs)
|
88
|
-
else:
|
89
|
-
raise FileNotFoundError(f'未找到程序:{procname}。请检查是否有安装及设置了环境变量。')
|
90
|
-
return tictoc.tocvalue()
|
91
|
-
|
92
|
-
|
93
|
-
class Explorer:
|
94
|
-
def __init__(self, app='explorer', shell=False):
|
95
|
-
self.app = app
|
96
|
-
self.shell = shell
|
97
|
-
|
98
|
-
# def check_app(self, raise_error=False):
|
99
|
-
# """ 检查是否能找到对应的app
|
100
|
-
#
|
101
|
-
# FIXME 不能提前检查,因为有些命令运行是会产生实际影响的,无法静默测试
|
102
|
-
# 例如explorer是会打开资源管理器的
|
103
|
-
# """
|
104
|
-
# try:
|
105
|
-
# subprocess.run(self.app)
|
106
|
-
# return True
|
107
|
-
# except FileNotFoundError:
|
108
|
-
# if raise_error:
|
109
|
-
# raise FileNotFoundError(f'Application/Command not found:{self.app}')
|
110
|
-
# return False
|
111
|
-
|
112
|
-
def __call__(self, *args, wait=True, **kwargs):
|
113
|
-
"""
|
114
|
-
:param args: 命令行参数
|
115
|
-
:param wait: 是否等待程序运行结束再继续执行后续python命令
|
116
|
-
:param kwargs: 扩展参数,参考subprocess接口
|
117
|
-
:return:
|
118
|
-
|
119
|
-
TODO 获得返回值分析
|
120
|
-
"""
|
121
|
-
args = [self.app] + list(args)
|
122
|
-
|
123
|
-
if 'shell' not in kwargs:
|
124
|
-
kwargs.update({'shell': self.shell})
|
125
|
-
if re.match(r'open\s', self.app):
|
126
|
-
args = args[0] + ' ' + args[1]
|
127
|
-
kwargs.update({'shell': True})
|
128
|
-
try:
|
129
|
-
if wait:
|
130
|
-
subprocess.run(args, **kwargs)
|
131
|
-
else:
|
132
|
-
subprocess.Popen(args, **kwargs)
|
133
|
-
except FileNotFoundError:
|
134
|
-
raise FileNotFoundError(f'Application/Command not found:{" ".join(args)}')
|
135
|
-
|
136
|
-
|
137
|
-
class Browser(Explorer):
|
138
|
-
""" 使用浏览器查看数据文件
|
139
|
-
|
140
|
-
标准库 webbrowser 也有一套类似的功能,那套主要用于url的查看,不支持文件
|
141
|
-
而我这个主要就是把各种数据转成文件来查看
|
142
|
-
"""
|
143
|
-
|
144
|
-
def __init__(self, app=None, shell=False):
|
145
|
-
"""
|
146
|
-
:param app: 使用的浏览器程序,例如'msedge', 'chrome',也可以输入程序绝对路径
|
147
|
-
默认值None会自动检测标准的msedge、chrome目录是否在环境变量,自动获取
|
148
|
-
如果要用其他浏览器,或者不在标准目录,请务必要设置app参数值
|
149
|
-
在找没有的情况下,默认使用 'explorer'
|
150
|
-
:param shell:
|
151
|
-
"""
|
152
|
-
if app is None:
|
153
|
-
if platform.system() == 'Windows':
|
154
|
-
paths = os.environ['PATH']
|
155
|
-
chrome_dir = r'Google\Chrome\Application'
|
156
|
-
msedge_dir = r'Microsoft\Edge\Application'
|
157
|
-
if chrome_dir in paths:
|
158
|
-
app = 'chrome'
|
159
|
-
elif msedge_dir in paths:
|
160
|
-
app = 'msedge'
|
161
|
-
else: # 默认使用谷歌。之前试过explorer不行~~
|
162
|
-
app = 'C:/Program Files/Google/Chrome/Application/chrome.exe'
|
163
|
-
elif platform.system() == 'Linux': # Linux系统(包括Ubuntu)
|
164
|
-
# 可以在这里添加对应的Unix-like系统浏览器的命令行名称
|
165
|
-
# 这里默认设置为 'google-chrome',如果你想使用其他的浏览器,例如Firefox,可以修改为 'firefox'
|
166
|
-
app = 'google-chrome'
|
167
|
-
else:
|
168
|
-
app = 'open -a "Google Chrome"'
|
169
|
-
# 其他系统的处理
|
170
|
-
pass
|
171
|
-
super().__init__(app, shell)
|
172
|
-
|
173
|
-
@classmethod
|
174
|
-
def to_brower_file(cls, arg, file=None, clsmsg=True, to_html_args=None):
|
175
|
-
""" 将任意数值类型的arg转存到文件,转换风格会尽量适配浏览器的使用
|
176
|
-
|
177
|
-
:param arg: 任意类型的一个数据
|
178
|
-
:param file: 想要存储的文件名,没有输入的时候会默认生成到临时文件夹,文件名使用哈希值避重
|
179
|
-
:param clsmsg: 显示开头一段类型继承关系、对象占用空间的信息
|
180
|
-
:param to_html_args: df.to_html相关格式参数,写成字典的形式输入,常用的参数有如下
|
181
|
-
escape, 默认True,将内容转移明文显示;可以设为False,这样在df存储的链接等html语法会起作用
|
182
|
-
|
183
|
-
说明:其实所谓的用更适合浏览器的方式查看,在我目前的算法版本里,就是尽可能把数据转成DataFrame表格
|
184
|
-
"""
|
185
|
-
# 1 如果已经是文件、url,则不处理
|
186
|
-
if is_file(arg) or is_url(arg) or isinstance(arg, File):
|
187
|
-
return arg
|
188
|
-
|
189
|
-
# 2 如果是其他类型,则先转成文件,再打开
|
190
|
-
arg_ = TypeConvert.try2df(arg)
|
191
|
-
if isinstance(arg_, pd.DataFrame): # DataFrame在网页上有更合适的显示效果
|
192
|
-
if clsmsg:
|
193
|
-
t = f'==== 类继承关系:{inspect.getmro(type(arg))},' \
|
194
|
-
+ f'内存消耗:{sys.getsizeof(arg)}(递归子类总大小:{getasizeof(arg)})Byte ===='
|
195
|
-
content = '<p>' + html.escape(t) + '</p>'
|
196
|
-
else:
|
197
|
-
content = ''
|
198
|
-
# TODO 把标题栏改成蓝色~~
|
199
|
-
content += arg_.to_html(**(to_html_args or {}))
|
200
|
-
if file is None:
|
201
|
-
file = File(..., Dir.TEMP, suffix='.html').write(content)
|
202
|
-
file = file.rename(get_etag(str(file)) + '.html', if_exists='replace')
|
203
|
-
else:
|
204
|
-
file = File(file).write(content)
|
205
|
-
elif getattr(arg, 'render', None): # pyecharts 等表格对象,可以用render生成html表格显示
|
206
|
-
try:
|
207
|
-
name = arg.options['title'][0]['text']
|
208
|
-
except (LookupError, TypeError):
|
209
|
-
name = datetime.datetime.now().strftime('%H%M%S_%f')
|
210
|
-
if file is None:
|
211
|
-
file = File(name, Dir.TEMP, suffix='.html').to_str()
|
212
|
-
arg.render(path=str(file))
|
213
|
-
else: # 不在预设格式里的数据,转成普通的txt查看
|
214
|
-
# if File.safe_init(arg).exists():
|
215
|
-
# file = arg
|
216
|
-
if file is None:
|
217
|
-
file = File(..., Dir.TEMP, suffix='.txt').write(arg)
|
218
|
-
file = file.rename(get_etag(str(file)) + file.suffix, if_exists='replace')
|
219
|
-
else:
|
220
|
-
file = File(file).write(arg)
|
221
|
-
return file
|
222
|
-
|
223
|
-
def html(self, arg, **kwargs):
|
224
|
-
""" 将内容转为html展示 """
|
225
|
-
if 'file' in kwargs:
|
226
|
-
file = File(kwargs['file'], Dir.TEMP, suffix='.html').write(arg)
|
227
|
-
del kwargs['file']
|
228
|
-
else:
|
229
|
-
file = File(..., Dir.TEMP, suffix='.html').write(arg)
|
230
|
-
file = file.rename(get_etag(str(file)) + file.suffix, if_exists='replace')
|
231
|
-
self.__call__(arg, file, **kwargs)
|
232
|
-
|
233
|
-
def url(self, *args, wait=True, **kwargs):
|
234
|
-
super().__call__(*args, wait=wait, **kwargs)
|
235
|
-
|
236
|
-
def __call__(self, arg, file=None, *, wait=True, clsmsg=True, to_html_args=None,
|
237
|
-
**kwargs): # NOQA Browser的操作跟标准接口略有差异
|
238
|
-
""" 该版本会把arg转存文件重设为文件名
|
239
|
-
|
240
|
-
:param file: 默认可以不输入,会按七牛的etag哈希值生成临时文件
|
241
|
-
如果输入,则按照指定的名称生成文件
|
242
|
-
"""
|
243
|
-
f = XlPath.safe_init(arg)
|
244
|
-
if f is not None and f.is_file():
|
245
|
-
file = arg
|
246
|
-
else:
|
247
|
-
file = str(self.to_brower_file(arg, file, clsmsg=clsmsg, to_html_args=to_html_args))
|
248
|
-
super().__call__(str(file), wait=wait, **kwargs)
|
249
|
-
|
250
|
-
|
251
|
-
browser = Browser()
|
252
|
-
|
253
|
-
|
254
|
-
def browser_json(f):
|
255
|
-
""" 可视化一个json文件结构 """
|
256
|
-
data = File(f).read()
|
257
|
-
# 使用NestedDict.to_html_table转成html的嵌套表格代码,存储到临时文件夹
|
258
|
-
htmlfile = File(r'chrome_json.html', root=Dir.TEMP).write(NestedDict.to_html_table(data))
|
259
|
-
# 展示html文件内容
|
260
|
-
browser(htmlfile)
|
261
|
-
|
262
|
-
|
263
|
-
def browser_jsons_kv(fd, files='**/*.json', encoding=None, max_items=10, max_value_length=100):
|
264
|
-
""" demo_keyvaluescounter,查看目录下json数据的键值对信息
|
265
|
-
|
266
|
-
:param fd: 目录
|
267
|
-
:param files: 匹配的文件格式
|
268
|
-
:param encoding: 文件编码
|
269
|
-
:param max_items: 项目显示上限,有些数据项目太多了,要精简下
|
270
|
-
设为假值则不设上限
|
271
|
-
:param max_value_length: 添加的值,进行截断,防止有些值太长
|
272
|
-
:return:
|
273
|
-
"""
|
274
|
-
kvc = KeyValuesCounter()
|
275
|
-
d = Dir(fd)
|
276
|
-
for p in d.select_files(files):
|
277
|
-
# print(p)
|
278
|
-
data = p.read(encoding=encoding, mode='.json')
|
279
|
-
kvc.add(data, max_value_length=max_value_length)
|
280
|
-
p = File(r'demo_keyvaluescounter.html', Dir.TEMP)
|
281
|
-
p.write(kvc.to_html_table(max_items=max_items), if_exists='replace')
|
282
|
-
browser(p.to_str())
|
283
|
-
|
284
|
-
|
285
|
-
def check_repeat_filenames(dir, key='stem', link=True):
|
286
|
-
""" 检查目录下文件结构情况的功能函数
|
287
|
-
|
288
|
-
https://www.yuque.com/xlpr/pyxllib/check_repeat_filenames
|
289
|
-
|
290
|
-
:param dir: 目录Dir类型,也可以输入路径,如果没有files成员,则默认会获取所有子文件
|
291
|
-
:param key: 以什么作为行分组的key名称,基本上都是用'stem',偶尔可能用'name'
|
292
|
-
遇到要忽略 -eps-to-pdf.pdf 这种后缀的,也可以自定义处理规则
|
293
|
-
例如 key=lambda p: re.sub(r'-eps-to-pdf', '', p.stem).lower()
|
294
|
-
:param link: 默认True会生成文件超链接
|
295
|
-
:return: 一个df表格,行按照key的规则分组,列默认按suffix扩展名分组
|
296
|
-
"""
|
297
|
-
# 1 智能解析dir参数
|
298
|
-
if not isinstance(dir, Dir):
|
299
|
-
dir = Dir(dir)
|
300
|
-
if not dir.subs:
|
301
|
-
dir = dir.select('**/*', type_='file')
|
302
|
-
|
303
|
-
# 2 辅助函数,智能解析key参数
|
304
|
-
if isinstance(key, str):
|
305
|
-
def extract_key(p):
|
306
|
-
return getattr(p, key).lower()
|
307
|
-
elif callable(key):
|
308
|
-
extract_key = key
|
309
|
-
else:
|
310
|
-
raise TypeError
|
311
|
-
|
312
|
-
# 3 制作df表格数据
|
313
|
-
columns = ['key', 'suffix', 'filename']
|
314
|
-
li = []
|
315
|
-
for f in dir.subs:
|
316
|
-
p = File(f)
|
317
|
-
li.append([extract_key(p), p.suffix.lower(), f])
|
318
|
-
df = pd.DataFrame.from_records(li, columns=columns)
|
319
|
-
|
320
|
-
# 4 分组
|
321
|
-
def joinfile(files):
|
322
|
-
if len(files):
|
323
|
-
if link:
|
324
|
-
return ', '.join([f'<a href="{dir / f}" target="_blank">{f}</a>' for f in files])
|
325
|
-
else:
|
326
|
-
return ', '.join(files)
|
327
|
-
else:
|
328
|
-
return ''
|
329
|
-
|
330
|
-
groups = df.groupby(['key', 'suffix']).agg({'filename': joinfile})
|
331
|
-
groups.reset_index(inplace=True)
|
332
|
-
view_table = groups.pivot(index='key', columns='suffix', values='filename')
|
333
|
-
view_table.fillna('', inplace=True)
|
334
|
-
|
335
|
-
# 5 判断每个key的文件总数
|
336
|
-
count_df = df.groupby('key').agg({'filename': 'count'})
|
337
|
-
view_table = pd.concat([view_table, count_df], axis=1)
|
338
|
-
view_table.rename({'filename': 'count'}, axis=1, inplace=True)
|
339
|
-
|
340
|
-
browser(view_table, to_html_args={'escape': not link})
|
341
|
-
return df
|
342
|
-
|
343
|
-
|
344
|
-
def getmembers(object, predicate=None):
|
345
|
-
"""自己重写改动的 inspect.getmembers
|
346
|
-
|
347
|
-
TODO 这个实现好复杂,对于成员,直接用dir不就好了?
|
348
|
-
"""
|
349
|
-
from inspect import isclass, getmro
|
350
|
-
import types
|
351
|
-
|
352
|
-
if isclass(object):
|
353
|
-
mro = (object,) + getmro(object)
|
354
|
-
else:
|
355
|
-
mro = ()
|
356
|
-
results = []
|
357
|
-
processed = set()
|
358
|
-
names = dir(object)
|
359
|
-
# :dd any DynamicClassAttributes to the list of names if object is a class;
|
360
|
-
# this may result in duplicate entries if, for example, a virtual
|
361
|
-
# attribute with the same name as a DynamicClassAttribute exists
|
362
|
-
try:
|
363
|
-
for base in object.__bases__:
|
364
|
-
for k, v in base.__dict__.items():
|
365
|
-
if isinstance(v, types.DynamicClassAttribute):
|
366
|
-
names.append(k)
|
367
|
-
except AttributeError:
|
368
|
-
pass
|
369
|
-
for key in names:
|
370
|
-
# First try to get the value via getattr. Some descriptors don't
|
371
|
-
# like calling their __get__ (see bug #1785), so fall back to
|
372
|
-
# looking in the __dict__.
|
373
|
-
try:
|
374
|
-
value = getattr(object, key)
|
375
|
-
# handle the duplicate key
|
376
|
-
if key in processed:
|
377
|
-
raise AttributeError
|
378
|
-
# except AttributeError:
|
379
|
-
except: # 加了这种异常获取,190919周四15:14,sqlalchemy.exc.InvalidRequestError
|
380
|
-
dprint(key) # 抓不到对应的这个属性
|
381
|
-
for base in mro:
|
382
|
-
if key in base.__dict__:
|
383
|
-
value = base.__dict__[key]
|
384
|
-
break
|
385
|
-
else:
|
386
|
-
# could be a (currently) missing slot member, or a buggy
|
387
|
-
# __dir__; discard and move on
|
388
|
-
continue
|
389
|
-
|
390
|
-
if not predicate or predicate(value):
|
391
|
-
results.append((key, value))
|
392
|
-
processed.add(key)
|
393
|
-
results.sort(key=lambda pair: pair[0])
|
394
|
-
return results
|
395
|
-
|
396
|
-
|
397
|
-
def showdir(c, *, to_html=None, printf=True, width=200):
|
398
|
-
"""查看类信息
|
399
|
-
会罗列出类c的所有成员方法、成员变量,并生成一个html文
|
400
|
-
|
401
|
-
查阅一个对象的成员变量及成员方法
|
402
|
-
为了兼容linux输出df时也能对齐,有几个中文域宽处理相关的函数
|
403
|
-
|
404
|
-
:param c: 要处理的对象
|
405
|
-
:param to_html:
|
406
|
-
win32上默认True,用chrome、explorer打开
|
407
|
-
linux上默认False,直接输出到控制台
|
408
|
-
:param printf:
|
409
|
-
默认是True,会输出到浏览器或控制条
|
410
|
-
设为False则不输出
|
411
|
-
:param width: 属性列显示值的上限字符数
|
412
|
-
"""
|
413
|
-
# 1 输出类表头
|
414
|
-
from humanfriendly import format_size
|
415
|
-
|
416
|
-
res = []
|
417
|
-
object_name = func_input_message(2)['argnames'][0]
|
418
|
-
if to_html is None:
|
419
|
-
to_html = sys.platform == 'win32'
|
420
|
-
newline = '<br/>' if to_html else '\n'
|
421
|
-
|
422
|
-
t = f'==== 对象名称:{object_name},类继承关系:{inspect.getmro(type(c))},' \
|
423
|
-
+ f'内存消耗:{format_size(sys.getsizeof(c), binary=True)}' \
|
424
|
-
+ f'(递归子类总大小:{format_size(getasizeof(c), binary=True)}) ===='
|
425
|
-
|
426
|
-
if to_html:
|
427
|
-
res.append('<p>')
|
428
|
-
t = html.escape(t) + '</p>'
|
429
|
-
res.append(t + newline)
|
430
|
-
|
431
|
-
# 2 html的样式精调
|
432
|
-
def df2str(df):
|
433
|
-
if to_html:
|
434
|
-
df = df.applymap(str) # 不转成文本经常有些特殊函数会报错
|
435
|
-
df.index += 1 # 编号从1开始
|
436
|
-
# pd.options.display.max_colwidth = -1 # 如果临时需要显示完整内容
|
437
|
-
t = df.to_html()
|
438
|
-
table = BeautifulSoup(t, 'lxml')
|
439
|
-
table.thead.tr['bgcolor'] = 'LightSkyBlue' # 设置表头颜色
|
440
|
-
# 根据pycharm的规则,命名应该是成员变量Field,成员方法Member
|
441
|
-
ch = 'F' if '成员变量' in table.tr.contents[3].string else 'M'
|
442
|
-
table.thead.tr.th.string = f'编号{ch}{len(df)}'
|
443
|
-
t = table.prettify()
|
444
|
-
else:
|
445
|
-
# 直接转文本,遇到中文是会对不齐的,但是showdir主要用途本来就是在浏览器看的,这里就不做调整了
|
446
|
-
t = dataframe_str(df)
|
447
|
-
return t
|
448
|
-
|
449
|
-
# 3 添加成员变量和成员函数
|
450
|
-
# 成员变量
|
451
|
-
members = getmembers(c)
|
452
|
-
methods = filter(lambda m: not callable(getattr(c, m[0])), members)
|
453
|
-
ls = []
|
454
|
-
for ele in methods:
|
455
|
-
k, v = ele
|
456
|
-
if k.endswith(r'________'): # 这个名称的变量是我代码里的特殊标记,不显示
|
457
|
-
continue
|
458
|
-
attr = getattr(c, k)
|
459
|
-
if isinstance(attr, enum.IntFlag): # 对re.RegexFlag等枚举类输出整数值
|
460
|
-
v = typename(attr) + ',' + str(int(attr)) + ',' + str(v)
|
461
|
-
else:
|
462
|
-
try:
|
463
|
-
text = str(v)
|
464
|
-
except:
|
465
|
-
text = '取不到str值'
|
466
|
-
|
467
|
-
v = typename(attr) + ',' + shorten(text, width=width)
|
468
|
-
ls.append([k, v])
|
469
|
-
df = pd.DataFrame.from_records(ls, columns=['成员变量', '描述'])
|
470
|
-
res.append(df2str(df) + newline)
|
471
|
-
|
472
|
-
# 成员函数
|
473
|
-
methods = filter(lambda m: callable(getattr(c, m[0])), members)
|
474
|
-
df = pd.DataFrame.from_records(methods, columns=['成员函数', '描述'])
|
475
|
-
res.append(df2str(df) + newline)
|
476
|
-
res = newline.join(res)
|
477
|
-
|
478
|
-
# 4 使用chrome.exe浏览或输出到控制台
|
479
|
-
# 这里底层可以封装一个chrome函数来调用,但是这个chrome需要依赖太多功能,故这里暂时手动简单调用
|
480
|
-
if to_html:
|
481
|
-
if isinstance(to_html, str):
|
482
|
-
# 如果是字符串,则认为是指定了输出文件的路径
|
483
|
-
f = File(to_html, suffix='.html')
|
484
|
-
else:
|
485
|
-
f = File(object_name, Dir.TEMP, suffix='.html')
|
486
|
-
|
487
|
-
filename = f.write(ensure_gbk(res), if_exists='replace').to_str()
|
488
|
-
browser(filename)
|
489
|
-
else: # linux环境直接输出表格
|
490
|
-
print(res)
|
491
|
-
|
492
|
-
return res
|
493
|
-
|
494
|
-
|
495
|
-
# 注册进builtins,可以在任意地方直接使用
|
496
|
-
setattr(builtins, 'browser', browser)
|
497
|
-
setattr(builtins, 'showdir', showdir)
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2020/05/30 22:43
|
6
|
+
|
7
|
+
import builtins
|
8
|
+
import enum
|
9
|
+
import html
|
10
|
+
import inspect
|
11
|
+
import os
|
12
|
+
import subprocess
|
13
|
+
import sys
|
14
|
+
import datetime
|
15
|
+
import platform
|
16
|
+
import re
|
17
|
+
|
18
|
+
import pandas as pd
|
19
|
+
from bs4 import BeautifulSoup
|
20
|
+
|
21
|
+
from pyxllib.prog.newbie import typename
|
22
|
+
from pyxllib.prog.pupil import dprint, func_input_message, is_url, is_file
|
23
|
+
from pyxllib.prog.specialist.common import TypeConvert, NestedDict, KeyValuesCounter, dataframe_str
|
24
|
+
from pyxllib.prog.specialist.tictoc import TicToc
|
25
|
+
from pyxllib.text.pupil import ensure_gbk, shorten
|
26
|
+
from pyxllib.file.specialist.dirlib import File, Dir, get_etag, XlPath
|
27
|
+
|
28
|
+
|
29
|
+
def getasizeof(*objs, **opts):
|
30
|
+
"""获得所有类的大小,底层用pympler.asizeof实现"""
|
31
|
+
from pympler import asizeof
|
32
|
+
|
33
|
+
try:
|
34
|
+
res = asizeof.asizeof(*objs, **opts)
|
35
|
+
# except TypeError: # sqlalchemy.exc.InvalidRequestError
|
36
|
+
except:
|
37
|
+
res = -1
|
38
|
+
return res
|
39
|
+
|
40
|
+
|
41
|
+
def viewfiles(procname, *files, **kwargs):
|
42
|
+
""" 调用procname相关的文件程序打开files
|
43
|
+
|
44
|
+
:param procname: 程序名
|
45
|
+
:param files: 一个文件名参数清单,每一个都是文件路径,或者是字符串等可以用writefile转成文件的路径
|
46
|
+
:param kwargs:
|
47
|
+
save: 如果True,则会按时间保存文件名;否则采用特定名称,每次运行就会把上次的覆盖掉
|
48
|
+
wait: 是否等待当前进程结束后,再运行后续py代码
|
49
|
+
filename: 控制写入的文件名
|
50
|
+
TODO:根据不同软件,这里还可以扩展很多功能
|
51
|
+
:param kwargs:
|
52
|
+
wait:
|
53
|
+
True:在同一个进程中执行子程序,即会等待bc退出后,再进入下一步
|
54
|
+
False:在新的进程中执行子程序
|
55
|
+
|
56
|
+
细节:注意bc跟其他程序有比较大不同,建议使用专用的bcompare函数
|
57
|
+
目前已知可以扩展多文件的有:chrome、notepad++、texstudio
|
58
|
+
|
59
|
+
>> ls = list(range(100))
|
60
|
+
>> viewfiles('notepad++', ls, save=True)
|
61
|
+
"""
|
62
|
+
# 1 生成文件名
|
63
|
+
ls = [] # 将最终所有绝对路径文件名存储到ls
|
64
|
+
save = kwargs.get('save')
|
65
|
+
|
66
|
+
basename = ext = None
|
67
|
+
if 'filename' in kwargs and kwargs['filename']:
|
68
|
+
basename, ext = os.path.splitext(kwargs['filename'])
|
69
|
+
|
70
|
+
for i, t in enumerate(files):
|
71
|
+
if File(t) or is_url(t):
|
72
|
+
ls.append(str(t))
|
73
|
+
else:
|
74
|
+
bn = basename or ...
|
75
|
+
ls.append(File(bn, Dir.TEMP, suffix=ext).write(t, if_exists=kwargs.get('if_exists', 'error')).to_str())
|
76
|
+
|
77
|
+
# 2 调用程序(并计算外部操作时间)
|
78
|
+
tictoc = TicToc()
|
79
|
+
try:
|
80
|
+
if kwargs.get('wait'):
|
81
|
+
subprocess.run([procname, *ls])
|
82
|
+
else:
|
83
|
+
subprocess.Popen([procname, *ls])
|
84
|
+
except FileNotFoundError:
|
85
|
+
if procname in ('chrome', 'chrome.exe'):
|
86
|
+
procname = 'explorer' # 如果是谷歌浏览器找不到,尝试用系统默认浏览器
|
87
|
+
viewfiles(procname, *files, **kwargs)
|
88
|
+
else:
|
89
|
+
raise FileNotFoundError(f'未找到程序:{procname}。请检查是否有安装及设置了环境变量。')
|
90
|
+
return tictoc.tocvalue()
|
91
|
+
|
92
|
+
|
93
|
+
class Explorer:
|
94
|
+
def __init__(self, app='explorer', shell=False):
|
95
|
+
self.app = app
|
96
|
+
self.shell = shell
|
97
|
+
|
98
|
+
# def check_app(self, raise_error=False):
|
99
|
+
# """ 检查是否能找到对应的app
|
100
|
+
#
|
101
|
+
# FIXME 不能提前检查,因为有些命令运行是会产生实际影响的,无法静默测试
|
102
|
+
# 例如explorer是会打开资源管理器的
|
103
|
+
# """
|
104
|
+
# try:
|
105
|
+
# subprocess.run(self.app)
|
106
|
+
# return True
|
107
|
+
# except FileNotFoundError:
|
108
|
+
# if raise_error:
|
109
|
+
# raise FileNotFoundError(f'Application/Command not found:{self.app}')
|
110
|
+
# return False
|
111
|
+
|
112
|
+
def __call__(self, *args, wait=True, **kwargs):
|
113
|
+
"""
|
114
|
+
:param args: 命令行参数
|
115
|
+
:param wait: 是否等待程序运行结束再继续执行后续python命令
|
116
|
+
:param kwargs: 扩展参数,参考subprocess接口
|
117
|
+
:return:
|
118
|
+
|
119
|
+
TODO 获得返回值分析
|
120
|
+
"""
|
121
|
+
args = [self.app] + list(args)
|
122
|
+
|
123
|
+
if 'shell' not in kwargs:
|
124
|
+
kwargs.update({'shell': self.shell})
|
125
|
+
if re.match(r'open\s', self.app):
|
126
|
+
args = args[0] + ' ' + args[1]
|
127
|
+
kwargs.update({'shell': True})
|
128
|
+
try:
|
129
|
+
if wait:
|
130
|
+
subprocess.run(args, **kwargs)
|
131
|
+
else:
|
132
|
+
subprocess.Popen(args, **kwargs)
|
133
|
+
except FileNotFoundError:
|
134
|
+
raise FileNotFoundError(f'Application/Command not found:{" ".join(args)}')
|
135
|
+
|
136
|
+
|
137
|
+
class Browser(Explorer):
|
138
|
+
""" 使用浏览器查看数据文件
|
139
|
+
|
140
|
+
标准库 webbrowser 也有一套类似的功能,那套主要用于url的查看,不支持文件
|
141
|
+
而我这个主要就是把各种数据转成文件来查看
|
142
|
+
"""
|
143
|
+
|
144
|
+
def __init__(self, app=None, shell=False):
|
145
|
+
"""
|
146
|
+
:param app: 使用的浏览器程序,例如'msedge', 'chrome',也可以输入程序绝对路径
|
147
|
+
默认值None会自动检测标准的msedge、chrome目录是否在环境变量,自动获取
|
148
|
+
如果要用其他浏览器,或者不在标准目录,请务必要设置app参数值
|
149
|
+
在找没有的情况下,默认使用 'explorer'
|
150
|
+
:param shell:
|
151
|
+
"""
|
152
|
+
if app is None:
|
153
|
+
if platform.system() == 'Windows':
|
154
|
+
paths = os.environ['PATH']
|
155
|
+
chrome_dir = r'Google\Chrome\Application'
|
156
|
+
msedge_dir = r'Microsoft\Edge\Application'
|
157
|
+
if chrome_dir in paths:
|
158
|
+
app = 'chrome'
|
159
|
+
elif msedge_dir in paths:
|
160
|
+
app = 'msedge'
|
161
|
+
else: # 默认使用谷歌。之前试过explorer不行~~
|
162
|
+
app = 'C:/Program Files/Google/Chrome/Application/chrome.exe'
|
163
|
+
elif platform.system() == 'Linux': # Linux系统(包括Ubuntu)
|
164
|
+
# 可以在这里添加对应的Unix-like系统浏览器的命令行名称
|
165
|
+
# 这里默认设置为 'google-chrome',如果你想使用其他的浏览器,例如Firefox,可以修改为 'firefox'
|
166
|
+
app = 'google-chrome'
|
167
|
+
else:
|
168
|
+
app = 'open -a "Google Chrome"'
|
169
|
+
# 其他系统的处理
|
170
|
+
pass
|
171
|
+
super().__init__(app, shell)
|
172
|
+
|
173
|
+
@classmethod
|
174
|
+
def to_brower_file(cls, arg, file=None, clsmsg=True, to_html_args=None):
|
175
|
+
""" 将任意数值类型的arg转存到文件,转换风格会尽量适配浏览器的使用
|
176
|
+
|
177
|
+
:param arg: 任意类型的一个数据
|
178
|
+
:param file: 想要存储的文件名,没有输入的时候会默认生成到临时文件夹,文件名使用哈希值避重
|
179
|
+
:param clsmsg: 显示开头一段类型继承关系、对象占用空间的信息
|
180
|
+
:param to_html_args: df.to_html相关格式参数,写成字典的形式输入,常用的参数有如下
|
181
|
+
escape, 默认True,将内容转移明文显示;可以设为False,这样在df存储的链接等html语法会起作用
|
182
|
+
|
183
|
+
说明:其实所谓的用更适合浏览器的方式查看,在我目前的算法版本里,就是尽可能把数据转成DataFrame表格
|
184
|
+
"""
|
185
|
+
# 1 如果已经是文件、url,则不处理
|
186
|
+
if is_file(arg) or is_url(arg) or isinstance(arg, File):
|
187
|
+
return arg
|
188
|
+
|
189
|
+
# 2 如果是其他类型,则先转成文件,再打开
|
190
|
+
arg_ = TypeConvert.try2df(arg)
|
191
|
+
if isinstance(arg_, pd.DataFrame): # DataFrame在网页上有更合适的显示效果
|
192
|
+
if clsmsg:
|
193
|
+
t = f'==== 类继承关系:{inspect.getmro(type(arg))},' \
|
194
|
+
+ f'内存消耗:{sys.getsizeof(arg)}(递归子类总大小:{getasizeof(arg)})Byte ===='
|
195
|
+
content = '<p>' + html.escape(t) + '</p>'
|
196
|
+
else:
|
197
|
+
content = ''
|
198
|
+
# TODO 把标题栏改成蓝色~~
|
199
|
+
content += arg_.to_html(**(to_html_args or {}))
|
200
|
+
if file is None:
|
201
|
+
file = File(..., Dir.TEMP, suffix='.html').write(content)
|
202
|
+
file = file.rename(get_etag(str(file)) + '.html', if_exists='replace')
|
203
|
+
else:
|
204
|
+
file = File(file).write(content)
|
205
|
+
elif getattr(arg, 'render', None): # pyecharts 等表格对象,可以用render生成html表格显示
|
206
|
+
try:
|
207
|
+
name = arg.options['title'][0]['text']
|
208
|
+
except (LookupError, TypeError):
|
209
|
+
name = datetime.datetime.now().strftime('%H%M%S_%f')
|
210
|
+
if file is None:
|
211
|
+
file = File(name, Dir.TEMP, suffix='.html').to_str()
|
212
|
+
arg.render(path=str(file))
|
213
|
+
else: # 不在预设格式里的数据,转成普通的txt查看
|
214
|
+
# if File.safe_init(arg).exists():
|
215
|
+
# file = arg
|
216
|
+
if file is None:
|
217
|
+
file = File(..., Dir.TEMP, suffix='.txt').write(arg)
|
218
|
+
file = file.rename(get_etag(str(file)) + file.suffix, if_exists='replace')
|
219
|
+
else:
|
220
|
+
file = File(file).write(arg)
|
221
|
+
return file
|
222
|
+
|
223
|
+
def html(self, arg, **kwargs):
|
224
|
+
""" 将内容转为html展示 """
|
225
|
+
if 'file' in kwargs:
|
226
|
+
file = File(kwargs['file'], Dir.TEMP, suffix='.html').write(arg)
|
227
|
+
del kwargs['file']
|
228
|
+
else:
|
229
|
+
file = File(..., Dir.TEMP, suffix='.html').write(arg)
|
230
|
+
file = file.rename(get_etag(str(file)) + file.suffix, if_exists='replace')
|
231
|
+
self.__call__(arg, file, **kwargs)
|
232
|
+
|
233
|
+
def url(self, *args, wait=True, **kwargs):
|
234
|
+
super().__call__(*args, wait=wait, **kwargs)
|
235
|
+
|
236
|
+
def __call__(self, arg, file=None, *, wait=True, clsmsg=True, to_html_args=None,
|
237
|
+
**kwargs): # NOQA Browser的操作跟标准接口略有差异
|
238
|
+
""" 该版本会把arg转存文件重设为文件名
|
239
|
+
|
240
|
+
:param file: 默认可以不输入,会按七牛的etag哈希值生成临时文件
|
241
|
+
如果输入,则按照指定的名称生成文件
|
242
|
+
"""
|
243
|
+
f = XlPath.safe_init(arg)
|
244
|
+
if f is not None and f.is_file():
|
245
|
+
file = arg
|
246
|
+
else:
|
247
|
+
file = str(self.to_brower_file(arg, file, clsmsg=clsmsg, to_html_args=to_html_args))
|
248
|
+
super().__call__(str(file), wait=wait, **kwargs)
|
249
|
+
|
250
|
+
|
251
|
+
browser = Browser()
|
252
|
+
|
253
|
+
|
254
|
+
def browser_json(f):
|
255
|
+
""" 可视化一个json文件结构 """
|
256
|
+
data = File(f).read()
|
257
|
+
# 使用NestedDict.to_html_table转成html的嵌套表格代码,存储到临时文件夹
|
258
|
+
htmlfile = File(r'chrome_json.html', root=Dir.TEMP).write(NestedDict.to_html_table(data))
|
259
|
+
# 展示html文件内容
|
260
|
+
browser(htmlfile)
|
261
|
+
|
262
|
+
|
263
|
+
def browser_jsons_kv(fd, files='**/*.json', encoding=None, max_items=10, max_value_length=100):
|
264
|
+
""" demo_keyvaluescounter,查看目录下json数据的键值对信息
|
265
|
+
|
266
|
+
:param fd: 目录
|
267
|
+
:param files: 匹配的文件格式
|
268
|
+
:param encoding: 文件编码
|
269
|
+
:param max_items: 项目显示上限,有些数据项目太多了,要精简下
|
270
|
+
设为假值则不设上限
|
271
|
+
:param max_value_length: 添加的值,进行截断,防止有些值太长
|
272
|
+
:return:
|
273
|
+
"""
|
274
|
+
kvc = KeyValuesCounter()
|
275
|
+
d = Dir(fd)
|
276
|
+
for p in d.select_files(files):
|
277
|
+
# print(p)
|
278
|
+
data = p.read(encoding=encoding, mode='.json')
|
279
|
+
kvc.add(data, max_value_length=max_value_length)
|
280
|
+
p = File(r'demo_keyvaluescounter.html', Dir.TEMP)
|
281
|
+
p.write(kvc.to_html_table(max_items=max_items), if_exists='replace')
|
282
|
+
browser(p.to_str())
|
283
|
+
|
284
|
+
|
285
|
+
def check_repeat_filenames(dir, key='stem', link=True):
|
286
|
+
""" 检查目录下文件结构情况的功能函数
|
287
|
+
|
288
|
+
https://www.yuque.com/xlpr/pyxllib/check_repeat_filenames
|
289
|
+
|
290
|
+
:param dir: 目录Dir类型,也可以输入路径,如果没有files成员,则默认会获取所有子文件
|
291
|
+
:param key: 以什么作为行分组的key名称,基本上都是用'stem',偶尔可能用'name'
|
292
|
+
遇到要忽略 -eps-to-pdf.pdf 这种后缀的,也可以自定义处理规则
|
293
|
+
例如 key=lambda p: re.sub(r'-eps-to-pdf', '', p.stem).lower()
|
294
|
+
:param link: 默认True会生成文件超链接
|
295
|
+
:return: 一个df表格,行按照key的规则分组,列默认按suffix扩展名分组
|
296
|
+
"""
|
297
|
+
# 1 智能解析dir参数
|
298
|
+
if not isinstance(dir, Dir):
|
299
|
+
dir = Dir(dir)
|
300
|
+
if not dir.subs:
|
301
|
+
dir = dir.select('**/*', type_='file')
|
302
|
+
|
303
|
+
# 2 辅助函数,智能解析key参数
|
304
|
+
if isinstance(key, str):
|
305
|
+
def extract_key(p):
|
306
|
+
return getattr(p, key).lower()
|
307
|
+
elif callable(key):
|
308
|
+
extract_key = key
|
309
|
+
else:
|
310
|
+
raise TypeError
|
311
|
+
|
312
|
+
# 3 制作df表格数据
|
313
|
+
columns = ['key', 'suffix', 'filename']
|
314
|
+
li = []
|
315
|
+
for f in dir.subs:
|
316
|
+
p = File(f)
|
317
|
+
li.append([extract_key(p), p.suffix.lower(), f])
|
318
|
+
df = pd.DataFrame.from_records(li, columns=columns)
|
319
|
+
|
320
|
+
# 4 分组
|
321
|
+
def joinfile(files):
|
322
|
+
if len(files):
|
323
|
+
if link:
|
324
|
+
return ', '.join([f'<a href="{dir / f}" target="_blank">{f}</a>' for f in files])
|
325
|
+
else:
|
326
|
+
return ', '.join(files)
|
327
|
+
else:
|
328
|
+
return ''
|
329
|
+
|
330
|
+
groups = df.groupby(['key', 'suffix']).agg({'filename': joinfile})
|
331
|
+
groups.reset_index(inplace=True)
|
332
|
+
view_table = groups.pivot(index='key', columns='suffix', values='filename')
|
333
|
+
view_table.fillna('', inplace=True)
|
334
|
+
|
335
|
+
# 5 判断每个key的文件总数
|
336
|
+
count_df = df.groupby('key').agg({'filename': 'count'})
|
337
|
+
view_table = pd.concat([view_table, count_df], axis=1)
|
338
|
+
view_table.rename({'filename': 'count'}, axis=1, inplace=True)
|
339
|
+
|
340
|
+
browser(view_table, to_html_args={'escape': not link})
|
341
|
+
return df
|
342
|
+
|
343
|
+
|
344
|
+
def getmembers(object, predicate=None):
|
345
|
+
"""自己重写改动的 inspect.getmembers
|
346
|
+
|
347
|
+
TODO 这个实现好复杂,对于成员,直接用dir不就好了?
|
348
|
+
"""
|
349
|
+
from inspect import isclass, getmro
|
350
|
+
import types
|
351
|
+
|
352
|
+
if isclass(object):
|
353
|
+
mro = (object,) + getmro(object)
|
354
|
+
else:
|
355
|
+
mro = ()
|
356
|
+
results = []
|
357
|
+
processed = set()
|
358
|
+
names = dir(object)
|
359
|
+
# :dd any DynamicClassAttributes to the list of names if object is a class;
|
360
|
+
# this may result in duplicate entries if, for example, a virtual
|
361
|
+
# attribute with the same name as a DynamicClassAttribute exists
|
362
|
+
try:
|
363
|
+
for base in object.__bases__:
|
364
|
+
for k, v in base.__dict__.items():
|
365
|
+
if isinstance(v, types.DynamicClassAttribute):
|
366
|
+
names.append(k)
|
367
|
+
except AttributeError:
|
368
|
+
pass
|
369
|
+
for key in names:
|
370
|
+
# First try to get the value via getattr. Some descriptors don't
|
371
|
+
# like calling their __get__ (see bug #1785), so fall back to
|
372
|
+
# looking in the __dict__.
|
373
|
+
try:
|
374
|
+
value = getattr(object, key)
|
375
|
+
# handle the duplicate key
|
376
|
+
if key in processed:
|
377
|
+
raise AttributeError
|
378
|
+
# except AttributeError:
|
379
|
+
except: # 加了这种异常获取,190919周四15:14,sqlalchemy.exc.InvalidRequestError
|
380
|
+
dprint(key) # 抓不到对应的这个属性
|
381
|
+
for base in mro:
|
382
|
+
if key in base.__dict__:
|
383
|
+
value = base.__dict__[key]
|
384
|
+
break
|
385
|
+
else:
|
386
|
+
# could be a (currently) missing slot member, or a buggy
|
387
|
+
# __dir__; discard and move on
|
388
|
+
continue
|
389
|
+
|
390
|
+
if not predicate or predicate(value):
|
391
|
+
results.append((key, value))
|
392
|
+
processed.add(key)
|
393
|
+
results.sort(key=lambda pair: pair[0])
|
394
|
+
return results
|
395
|
+
|
396
|
+
|
397
|
+
def showdir(c, *, to_html=None, printf=True, width=200):
|
398
|
+
"""查看类信息
|
399
|
+
会罗列出类c的所有成员方法、成员变量,并生成一个html文
|
400
|
+
|
401
|
+
查阅一个对象的成员变量及成员方法
|
402
|
+
为了兼容linux输出df时也能对齐,有几个中文域宽处理相关的函数
|
403
|
+
|
404
|
+
:param c: 要处理的对象
|
405
|
+
:param to_html:
|
406
|
+
win32上默认True,用chrome、explorer打开
|
407
|
+
linux上默认False,直接输出到控制台
|
408
|
+
:param printf:
|
409
|
+
默认是True,会输出到浏览器或控制条
|
410
|
+
设为False则不输出
|
411
|
+
:param width: 属性列显示值的上限字符数
|
412
|
+
"""
|
413
|
+
# 1 输出类表头
|
414
|
+
from humanfriendly import format_size
|
415
|
+
|
416
|
+
res = []
|
417
|
+
object_name = func_input_message(2)['argnames'][0]
|
418
|
+
if to_html is None:
|
419
|
+
to_html = sys.platform == 'win32'
|
420
|
+
newline = '<br/>' if to_html else '\n'
|
421
|
+
|
422
|
+
t = f'==== 对象名称:{object_name},类继承关系:{inspect.getmro(type(c))},' \
|
423
|
+
+ f'内存消耗:{format_size(sys.getsizeof(c), binary=True)}' \
|
424
|
+
+ f'(递归子类总大小:{format_size(getasizeof(c), binary=True)}) ===='
|
425
|
+
|
426
|
+
if to_html:
|
427
|
+
res.append('<p>')
|
428
|
+
t = html.escape(t) + '</p>'
|
429
|
+
res.append(t + newline)
|
430
|
+
|
431
|
+
# 2 html的样式精调
|
432
|
+
def df2str(df):
|
433
|
+
if to_html:
|
434
|
+
df = df.applymap(str) # 不转成文本经常有些特殊函数会报错
|
435
|
+
df.index += 1 # 编号从1开始
|
436
|
+
# pd.options.display.max_colwidth = -1 # 如果临时需要显示完整内容
|
437
|
+
t = df.to_html()
|
438
|
+
table = BeautifulSoup(t, 'lxml')
|
439
|
+
table.thead.tr['bgcolor'] = 'LightSkyBlue' # 设置表头颜色
|
440
|
+
# 根据pycharm的规则,命名应该是成员变量Field,成员方法Member
|
441
|
+
ch = 'F' if '成员变量' in table.tr.contents[3].string else 'M'
|
442
|
+
table.thead.tr.th.string = f'编号{ch}{len(df)}'
|
443
|
+
t = table.prettify()
|
444
|
+
else:
|
445
|
+
# 直接转文本,遇到中文是会对不齐的,但是showdir主要用途本来就是在浏览器看的,这里就不做调整了
|
446
|
+
t = dataframe_str(df)
|
447
|
+
return t
|
448
|
+
|
449
|
+
# 3 添加成员变量和成员函数
|
450
|
+
# 成员变量
|
451
|
+
members = getmembers(c)
|
452
|
+
methods = filter(lambda m: not callable(getattr(c, m[0])), members)
|
453
|
+
ls = []
|
454
|
+
for ele in methods:
|
455
|
+
k, v = ele
|
456
|
+
if k.endswith(r'________'): # 这个名称的变量是我代码里的特殊标记,不显示
|
457
|
+
continue
|
458
|
+
attr = getattr(c, k)
|
459
|
+
if isinstance(attr, enum.IntFlag): # 对re.RegexFlag等枚举类输出整数值
|
460
|
+
v = typename(attr) + ',' + str(int(attr)) + ',' + str(v)
|
461
|
+
else:
|
462
|
+
try:
|
463
|
+
text = str(v)
|
464
|
+
except:
|
465
|
+
text = '取不到str值'
|
466
|
+
|
467
|
+
v = typename(attr) + ',' + shorten(text, width=width)
|
468
|
+
ls.append([k, v])
|
469
|
+
df = pd.DataFrame.from_records(ls, columns=['成员变量', '描述'])
|
470
|
+
res.append(df2str(df) + newline)
|
471
|
+
|
472
|
+
# 成员函数
|
473
|
+
methods = filter(lambda m: callable(getattr(c, m[0])), members)
|
474
|
+
df = pd.DataFrame.from_records(methods, columns=['成员函数', '描述'])
|
475
|
+
res.append(df2str(df) + newline)
|
476
|
+
res = newline.join(res)
|
477
|
+
|
478
|
+
# 4 使用chrome.exe浏览或输出到控制台
|
479
|
+
# 这里底层可以封装一个chrome函数来调用,但是这个chrome需要依赖太多功能,故这里暂时手动简单调用
|
480
|
+
if to_html:
|
481
|
+
if isinstance(to_html, str):
|
482
|
+
# 如果是字符串,则认为是指定了输出文件的路径
|
483
|
+
f = File(to_html, suffix='.html')
|
484
|
+
else:
|
485
|
+
f = File(object_name, Dir.TEMP, suffix='.html')
|
486
|
+
|
487
|
+
filename = f.write(ensure_gbk(res), if_exists='replace').to_str()
|
488
|
+
browser(filename)
|
489
|
+
else: # linux环境直接输出表格
|
490
|
+
print(res)
|
491
|
+
|
492
|
+
return res
|
493
|
+
|
494
|
+
|
495
|
+
# 注册进builtins,可以在任意地方直接使用
|
496
|
+
setattr(builtins, 'browser', browser)
|
497
|
+
setattr(builtins, 'showdir', showdir)
|