pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. pyxllib/__init__.py +9 -2
  2. pyxllib/algo/__init__.py +8 -0
  3. pyxllib/algo/disjoint.py +54 -0
  4. pyxllib/algo/geo.py +541 -0
  5. pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
  6. pyxllib/algo/matcher.py +389 -0
  7. pyxllib/algo/newbie.py +166 -0
  8. pyxllib/algo/pupil.py +629 -0
  9. pyxllib/algo/shapelylib.py +67 -0
  10. pyxllib/algo/specialist.py +241 -0
  11. pyxllib/algo/stat.py +494 -0
  12. pyxllib/algo/treelib.py +149 -0
  13. pyxllib/algo/unitlib.py +66 -0
  14. pyxllib/autogui/__init__.py +5 -0
  15. pyxllib/autogui/activewin.py +246 -0
  16. pyxllib/autogui/all.py +9 -0
  17. pyxllib/autogui/autogui.py +852 -0
  18. pyxllib/autogui/uiautolib.py +362 -0
  19. pyxllib/autogui/virtualkey.py +102 -0
  20. pyxllib/autogui/wechat.py +827 -0
  21. pyxllib/autogui/wechat_msg.py +421 -0
  22. pyxllib/autogui/wxautolib.py +84 -0
  23. pyxllib/cv/__init__.py +1 -11
  24. pyxllib/cv/expert.py +267 -0
  25. pyxllib/cv/{imlib.py → imfile.py} +18 -83
  26. pyxllib/cv/imhash.py +39 -0
  27. pyxllib/cv/pupil.py +9 -0
  28. pyxllib/cv/rgbfmt.py +1525 -0
  29. pyxllib/cv/slidercaptcha.py +137 -0
  30. pyxllib/cv/trackbartools.py +163 -49
  31. pyxllib/cv/xlcvlib.py +1040 -0
  32. pyxllib/cv/xlpillib.py +423 -0
  33. pyxllib/data/__init__.py +0 -0
  34. pyxllib/data/echarts.py +240 -0
  35. pyxllib/data/jsonlib.py +89 -0
  36. pyxllib/{util/oss2_.py → data/oss.py} +11 -9
  37. pyxllib/data/pglib.py +1127 -0
  38. pyxllib/data/sqlite.py +568 -0
  39. pyxllib/{util → data}/sqllib.py +13 -31
  40. pyxllib/ext/JLineViewer.py +505 -0
  41. pyxllib/ext/__init__.py +6 -0
  42. pyxllib/{util → ext}/demolib.py +119 -35
  43. pyxllib/ext/drissionlib.py +277 -0
  44. pyxllib/ext/kq5034lib.py +12 -0
  45. pyxllib/{util/main.py → ext/old.py} +122 -284
  46. pyxllib/ext/qt.py +449 -0
  47. pyxllib/ext/robustprocfile.py +497 -0
  48. pyxllib/ext/seleniumlib.py +76 -0
  49. pyxllib/{util/tklib.py → ext/tk.py} +10 -11
  50. pyxllib/ext/unixlib.py +827 -0
  51. pyxllib/ext/utools.py +351 -0
  52. pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
  53. pyxllib/ext/win32lib.py +40 -0
  54. pyxllib/ext/wjxlib.py +88 -0
  55. pyxllib/ext/wpsapi.py +124 -0
  56. pyxllib/ext/xlwork.py +9 -0
  57. pyxllib/ext/yuquelib.py +1105 -0
  58. pyxllib/file/__init__.py +17 -0
  59. pyxllib/file/docxlib.py +761 -0
  60. pyxllib/{util → file}/gitlib.py +40 -27
  61. pyxllib/file/libreoffice.py +165 -0
  62. pyxllib/file/movielib.py +148 -0
  63. pyxllib/file/newbie.py +10 -0
  64. pyxllib/file/onenotelib.py +1469 -0
  65. pyxllib/file/packlib/__init__.py +330 -0
  66. pyxllib/{util → file/packlib}/zipfile.py +598 -195
  67. pyxllib/file/pdflib.py +426 -0
  68. pyxllib/file/pupil.py +185 -0
  69. pyxllib/file/specialist/__init__.py +685 -0
  70. pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
  71. pyxllib/file/specialist/download.py +193 -0
  72. pyxllib/file/specialist/filelib.py +2829 -0
  73. pyxllib/file/xlsxlib.py +3131 -0
  74. pyxllib/file/xlsyncfile.py +341 -0
  75. pyxllib/prog/__init__.py +5 -0
  76. pyxllib/prog/cachetools.py +64 -0
  77. pyxllib/prog/deprecatedlib.py +233 -0
  78. pyxllib/prog/filelock.py +42 -0
  79. pyxllib/prog/ipyexec.py +253 -0
  80. pyxllib/prog/multiprogs.py +940 -0
  81. pyxllib/prog/newbie.py +451 -0
  82. pyxllib/prog/pupil.py +1197 -0
  83. pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
  84. pyxllib/prog/specialist/__init__.py +391 -0
  85. pyxllib/prog/specialist/bc.py +203 -0
  86. pyxllib/prog/specialist/browser.py +497 -0
  87. pyxllib/prog/specialist/common.py +347 -0
  88. pyxllib/prog/specialist/datetime.py +199 -0
  89. pyxllib/prog/specialist/tictoc.py +240 -0
  90. pyxllib/prog/specialist/xllog.py +180 -0
  91. pyxllib/prog/xlosenv.py +108 -0
  92. pyxllib/stdlib/__init__.py +17 -0
  93. pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
  94. pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
  95. pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
  96. pyxllib/text/__init__.py +8 -0
  97. pyxllib/text/ahocorasick.py +39 -0
  98. pyxllib/text/airscript.js +744 -0
  99. pyxllib/text/charclasslib.py +121 -0
  100. pyxllib/text/jiebalib.py +267 -0
  101. pyxllib/text/jinjalib.py +32 -0
  102. pyxllib/text/jsa_ai_prompt.md +271 -0
  103. pyxllib/text/jscode.py +922 -0
  104. pyxllib/text/latex/__init__.py +158 -0
  105. pyxllib/text/levenshtein.py +303 -0
  106. pyxllib/text/nestenv.py +1215 -0
  107. pyxllib/text/newbie.py +300 -0
  108. pyxllib/text/pupil/__init__.py +8 -0
  109. pyxllib/text/pupil/common.py +1121 -0
  110. pyxllib/text/pupil/xlalign.py +326 -0
  111. pyxllib/text/pycode.py +47 -0
  112. pyxllib/text/specialist/__init__.py +8 -0
  113. pyxllib/text/specialist/common.py +112 -0
  114. pyxllib/text/specialist/ptag.py +186 -0
  115. pyxllib/text/spellchecker.py +172 -0
  116. pyxllib/text/templates/echart_base.html +11 -0
  117. pyxllib/text/templates/highlight_code.html +17 -0
  118. pyxllib/text/templates/latex_editor.html +103 -0
  119. pyxllib/text/vbacode.py +17 -0
  120. pyxllib/text/xmllib.py +747 -0
  121. pyxllib/xl.py +39 -0
  122. pyxllib/xlcv.py +17 -0
  123. pyxllib-0.3.197.dist-info/METADATA +48 -0
  124. pyxllib-0.3.197.dist-info/RECORD +126 -0
  125. {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
  126. pyxllib/basic/_1_strlib.py +0 -945
  127. pyxllib/basic/_2_timelib.py +0 -488
  128. pyxllib/basic/_3_pathlib.py +0 -916
  129. pyxllib/basic/_4_loglib.py +0 -419
  130. pyxllib/basic/__init__.py +0 -54
  131. pyxllib/basic/arrow_.py +0 -250
  132. pyxllib/basic/chardet_.py +0 -66
  133. pyxllib/basic/dirlib.py +0 -529
  134. pyxllib/basic/dprint.py +0 -202
  135. pyxllib/basic/extension.py +0 -12
  136. pyxllib/basic/judge.py +0 -31
  137. pyxllib/basic/log.py +0 -204
  138. pyxllib/basic/pathlib_.py +0 -705
  139. pyxllib/basic/pytictoc.py +0 -102
  140. pyxllib/basic/qiniu_.py +0 -61
  141. pyxllib/basic/strlib.py +0 -761
  142. pyxllib/basic/timer.py +0 -132
  143. pyxllib/cv/cv.py +0 -834
  144. pyxllib/cv/cvlib/_1_geo.py +0 -543
  145. pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
  146. pyxllib/cv/cvlib/_2_imgproc.py +0 -594
  147. pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
  148. pyxllib/cv/cvlib/_4_cvimg.py +0 -211
  149. pyxllib/cv/cvlib/__init__.py +0 -10
  150. pyxllib/cv/debugtools.py +0 -82
  151. pyxllib/cv/fitz_.py +0 -300
  152. pyxllib/cv/installer.py +0 -42
  153. pyxllib/debug/_0_installer.py +0 -38
  154. pyxllib/debug/_1_typelib.py +0 -277
  155. pyxllib/debug/_2_chrome.py +0 -198
  156. pyxllib/debug/_3_showdir.py +0 -161
  157. pyxllib/debug/_4_bcompare.py +0 -140
  158. pyxllib/debug/__init__.py +0 -49
  159. pyxllib/debug/bcompare.py +0 -132
  160. pyxllib/debug/chrome.py +0 -198
  161. pyxllib/debug/installer.py +0 -38
  162. pyxllib/debug/showdir.py +0 -158
  163. pyxllib/debug/typelib.py +0 -278
  164. pyxllib/image/__init__.py +0 -12
  165. pyxllib/torch/__init__.py +0 -20
  166. pyxllib/torch/modellib.py +0 -37
  167. pyxllib/torch/trainlib.py +0 -344
  168. pyxllib/util/__init__.py +0 -20
  169. pyxllib/util/aip_.py +0 -141
  170. pyxllib/util/casiadb.py +0 -59
  171. pyxllib/util/excellib.py +0 -495
  172. pyxllib/util/filelib.py +0 -612
  173. pyxllib/util/jsondata.py +0 -27
  174. pyxllib/util/jsondata2.py +0 -92
  175. pyxllib/util/labelmelib.py +0 -139
  176. pyxllib/util/onepy/__init__.py +0 -29
  177. pyxllib/util/onepy/onepy.py +0 -574
  178. pyxllib/util/onepy/onmanager.py +0 -170
  179. pyxllib/util/pyautogui_.py +0 -219
  180. pyxllib/util/textlib.py +0 -1305
  181. pyxllib/util/unorder.py +0 -22
  182. pyxllib/util/xmllib.py +0 -639
  183. pyxllib-0.0.43.dist-info/METADATA +0 -39
  184. pyxllib-0.0.43.dist-info/RECORD +0 -80
  185. pyxllib-0.0.43.dist-info/top_level.txt +0 -1
  186. {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,240 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2020/09/20
6
+
7
+
8
+ import time
9
+ import timeit
10
+
11
+ from loguru import logger
12
+ from humanfriendly import format_timespan
13
+
14
+ from pyxllib.algo.pupil import natural_sort, ValuesStat
15
+ from pyxllib.text.pupil import shorten, listalign
16
+ from pyxllib.prog.newbie import human_readable_number
17
+
18
+ __tictoc = """
19
+ 基于 pytictoc 代码,做了些自定义扩展
20
+
21
+ 原版备注:
22
+ Module with class TicToc to replicate the functionality of MATLAB's tic and toc.
23
+ Documentation: https://pypi.python.org/pypi/pytictoc
24
+ __author__ = 'Eric Fields'
25
+ __version__ = '1.4.0'
26
+ __version_date__ = '29 April 2017'
27
+ """
28
+
29
+
30
+ class TicToc:
31
+ """
32
+ Replicate the functionality of MATLAB's tic and toc.
33
+
34
+ #Methods
35
+ TicToc.tic() #start or re-start the timer
36
+ TicToc.toc() #print elapsed time since timer start
37
+ TicToc.tocvalue() #return floating point value of elapsed time since timer start
38
+
39
+ #Attributes
40
+ TicToc.start #Time from timeit.default_timer() when t.tic() was last called
41
+ TicToc.end #Time from timeit.default_timer() when t.toc() or t.tocvalue() was last called
42
+ TicToc.elapsed #t.end - t.start; i.e., time elapsed from t.start when t.toc() or t.tocvalue() was last called
43
+ """
44
+
45
+ def __init__(self, title='', *, disable=False, min_display_seconds=None):
46
+ """Create instance of TicToc class."""
47
+ self.start = timeit.default_timer()
48
+ self.end = float('nan')
49
+ self.elapsed = float('nan')
50
+ self.title = title
51
+ self.disable = disable
52
+ # 只有达到这个时间,才会显示耗时
53
+ self.min_display_seconds = min_display_seconds
54
+
55
+ def tic(self):
56
+ """Start the timer."""
57
+ self.start = timeit.default_timer()
58
+
59
+ def toc(self, msg='', restart=False):
60
+ """
61
+ Report time elapsed since last call to tic().
62
+
63
+ Optional arguments:
64
+ msg - String to replace default message of 'Elapsed time is'
65
+ restart - Boolean specifying whether to restart the timer
66
+ """
67
+ self.end = timeit.default_timer()
68
+ self.elapsed = self.end - self.start
69
+ if not self.disable:
70
+ # print(f'{self.title} {msg} {self.elapsed:.3f} 秒.')
71
+ print(f'{self.title} {msg} elapsed {human_readable_number(self.elapsed, "秒")}.')
72
+ if restart:
73
+ self.start = timeit.default_timer()
74
+
75
+ def tocvalue(self, restart=False):
76
+ """
77
+ Return time elapsed seconds since last call to tic().
78
+
79
+ Optional argument:
80
+ restart - Boolean specifying whether to restart the timer
81
+ """
82
+ self.end = timeit.default_timer()
83
+ self.elapsed = self.end - self.start
84
+ if restart:
85
+ self.start = timeit.default_timer()
86
+ return self.elapsed
87
+
88
+ @staticmethod
89
+ def process_time(msg='time.process_time():'):
90
+ """计算从python程序启动到目前为止总用时"""
91
+ print(f'{msg} {human_readable_number(time.process_time(), "秒")}.')
92
+
93
+ def __enter__(self):
94
+ """Start the timer when using TicToc in a context manager."""
95
+ if self.title == '__main__' and not self.disable:
96
+ logger.info(f'time.process_time(): {human_readable_number(time.process_time(), "秒")}.')
97
+ self.start = timeit.default_timer()
98
+
99
+ def __exit__(self, exc_type, exc_val, exc_tb):
100
+ """On exit, print time elapsed since entering context manager."""
101
+ elapsed = self.tocvalue()
102
+
103
+ if exc_tb is None:
104
+ if not self.disable and (self.min_display_seconds is None or elapsed >= self.min_display_seconds):
105
+ logger.info(f'{self.title} finished in {human_readable_number(elapsed, "秒")}.')
106
+ else:
107
+ logger.info(f'{self.title} interrupt in {human_readable_number(elapsed, "秒")},')
108
+
109
+
110
+ __timer = """
111
+
112
+ """
113
+
114
+
115
+ class Timer:
116
+ """分析性能用的计时器类,支持with语法调用
117
+ 必须显示地指明每一轮的start()和end(),否则会报错
118
+ """
119
+
120
+ def __init__(self, title=''):
121
+ """
122
+ :param title: 计时器名称
123
+ """
124
+ # 不同的平台应该使用的计时器不同,这个直接用timeit中的配置最好
125
+ self.default_timer = timeit.default_timer
126
+ # 标题
127
+ self.title = title
128
+ self.data = []
129
+ self.start_clock = float('nan')
130
+
131
+ def start(self):
132
+ self.start_clock = self.default_timer()
133
+
134
+ def stop(self):
135
+ self.data.append(self.default_timer() - self.start_clock)
136
+
137
+ def report(self, msg=''):
138
+ """ 报告目前性能统计情况
139
+ """
140
+ msg = f'{self.title} {msg}'
141
+ n = len(self.data)
142
+
143
+ if n >= 1:
144
+ print(msg, '用时(秒) ' + ValuesStat(self.data).summary(valfmt='.3f'))
145
+ elif n == 1:
146
+ sum_ = sum(self.data)
147
+ print(f'{msg} 用时: {sum_:.3f}s')
148
+ else: # 没有统计数据,则补充执行一次stop后汇报
149
+ print(f'{msg} 暂无计时信息')
150
+
151
+ def __enter__(self):
152
+ return self
153
+
154
+ def __exit__(self, *args):
155
+ self.report()
156
+
157
+
158
+ def performit(title, stmt="pass", setup="pass", repeat=1, number=1, globals=None):
159
+ """ 在timeit.repeat的基础上,做了层封装
160
+
161
+ 200920周日15:33,简化函数,该函数不再获得执行结果,避免重复运行
162
+
163
+ :param title: 测试标题、名称功能
164
+ :return: 返回原函数单次执行结果
165
+ """
166
+ data = timeit.repeat(stmt=stmt, setup=setup, repeat=repeat, number=number, globals=globals)
167
+ print(title, '用时(秒) ' + ValuesStat(data).summary(valfmt='.3f'))
168
+ return data
169
+
170
+
171
+ def perftest(title, stmt="pass", repeat=1, number=1, globals=None, res_width=None, print_=True):
172
+ """ 与performit的区别是,自己手动循环,记录程序运行结果
173
+
174
+ :param title: 测试标题、名称功能
175
+ :param res_width: 运行结果内容展示的字符上限数
176
+ :param print_: 输出报告
177
+ :return: 返回原函数单次执行结果
178
+
179
+ 这里为了同时获得表达式返回值,就没有用标注你的timeit.repeat实现了
180
+ """
181
+ # 1 确保stmt是可调用对象
182
+ if callable(stmt):
183
+ func = stmt
184
+ else:
185
+ code = compile(stmt, '', 'eval')
186
+
187
+ def func():
188
+ return eval(code, globals)
189
+
190
+ # 2 原函数运行结果(这里要先重载stdout)
191
+ data = []
192
+ res = ''
193
+ for i in range(repeat):
194
+ start = time.time()
195
+ for j in range(number):
196
+ res = func()
197
+ data.append(time.time() - start)
198
+
199
+ # 3 报告格式
200
+ if res_width is None:
201
+ # 如果性能报告比较短,只有一次测试,那res_width默认长度可以高一点
202
+ res_width = 50 if len(data) > 1 else 200
203
+ if res is None:
204
+ res = ''
205
+ else:
206
+ res = '运行结果:' + shorten(str(res), res_width)
207
+ if print_:
208
+ print(title, '用时(秒) ' + ValuesStat(data).summary(valfmt='.3f'), res)
209
+
210
+ return data
211
+
212
+
213
+ class PerfTest:
214
+ """ 这里模仿了unittest的机制
215
+
216
+ v0.0.38 重要改动,将number等参数移到perf操作,而不是类初始化中操作,继承使用上会更简单
217
+ """
218
+
219
+ def perf(self, number=1, repeat=1, globals=None):
220
+ """
221
+
222
+ :param number: 有些代码运算过快,可以多次运行迭代为一个单元
223
+ :param repeat: 对单元重复执行次数,最后会计算平均值、标准差
224
+ 关于number和repeat的区别:
225
+ number张纸量repeat次
226
+ 如果是纸箱这么厚的纸,number可以不设,默认是1
227
+ """
228
+ # 1 找到所有perf_为前缀,且callable的函数方法
229
+ funcnames = []
230
+ for k in dir(self):
231
+ if k.startswith('perf_'):
232
+ if callable(getattr(self, k)):
233
+ funcnames.append(k)
234
+
235
+ # 2 自然排序
236
+ funcnames = natural_sort(funcnames)
237
+ funcnames2 = listalign([fn[5:] for fn in funcnames], 'r')
238
+ for i, funcname in enumerate(funcnames):
239
+ perftest(funcnames2[i], getattr(self, funcname),
240
+ number=int(number), repeat=int(repeat), globals=globals)
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2020/09/18 22:16
6
+
7
+ import os
8
+ import concurrent.futures
9
+ import math
10
+ import time
11
+ import sys
12
+
13
+ from pyxllib.prog.pupil import EmptyPoolExecutor, format_exception
14
+ from pyxllib.text.pupil import shorten
15
+
16
+ XLLOG_CONF_FILE = 'xllog.yaml'
17
+
18
+ ____xllog = """
19
+ """
20
+
21
+
22
+ def get_xllog(name='xllog', *, log_file=None):
23
+ """ 获得pyxllib库的日志类
24
+
25
+ :param log_file: 增加输出到一个日志文件,该功能仅在首次初始化时有效
26
+ 注意这个是'w'机制,会删除之前的日志文件
27
+ # TODO 这样的功能设计问题挺大的,工程逻辑很莫名其妙,有空要把日志功能修缮下
28
+ # 例如一个通用的初始化类,然后xllog只是一个特定的实例日志类
29
+
30
+ TODO 增加输出到钉钉机器人、邮箱的Handler?
31
+ """
32
+ # import logging, coloredlogs
33
+ import logging
34
+
35
+ # 1 判断是否已存在,直接返回
36
+ if ('pyxllib.' + name) in logging.root.manager.loggerDict:
37
+ return logging.getLogger('pyxllib.' + name)
38
+
39
+ # 2 初次构建
40
+ if name == 'xllog': # 附带运行时间信息
41
+ if os.path.isfile(XLLOG_CONF_FILE):
42
+ # 尝试在默认位置是否有自定义配置文件,读取配置文件来创建
43
+ import logging.config
44
+ from pyxllib.file.specialist import File
45
+ data = File(XLLOG_CONF_FILE).read()
46
+ if isinstance(data, dict):
47
+ # 推荐使用yaml的字典结构,格式更简洁清晰
48
+ logging.config.dictConfig(data)
49
+ else:
50
+ # 但是普通的conf配置文件也支持
51
+ logging.config.fileConfig(XLLOG_CONF_FILE)
52
+ else:
53
+ # 3 否则生成一个非常简易版的xllog
54
+ xllog = logging.getLogger('pyxllib.xllog')
55
+ fmt = '%(asctime)s %(message)s'
56
+ if log_file:
57
+ file_handler = logging.FileHandler(f'{log_file}', 'a')
58
+ file_handler.setLevel(logging.DEBUG)
59
+ file_handler.setFormatter(logging.Formatter(fmt))
60
+ xllog.addHandler(file_handler)
61
+ # coloredlogs.install(level='DEBUG', logger=xllog, fmt=fmt)
62
+ elif name == 'location': # 附带代码所处位置信息
63
+ loclog = logging.getLogger('pyxllib.location')
64
+ # coloredlogs.install(level='DEBUG', logger=loclog, fmt='%(filename)s/%(lineno)d: %(message)s')
65
+ return logging.getLogger('pyxllib.' + name)
66
+
67
+
68
+ class Iterate:
69
+ """ 迭代器类,用来封装一些特定模式的for循环操作
70
+
71
+ TODO 双循环,需要内部两两对比的迭代功能
72
+
73
+ 200920周日18:20,最初设计的时候,是提供run_pair、run_pair2的功能的
74
+ 不过后来想想,这个其实就是排列组合,在itertools里有combinations, permutations可以代替
75
+ 甚至有放回的组合也有combinations_with_replacement,我实在是不需要再这里写这些冗余的功能
76
+ 所以就移除了
77
+ """
78
+
79
+ def __init__(self, items):
80
+ # 没有总长度倒也能接受,关键是可能要用start、end切片,所以还是先转成tuple更方便操作
81
+ self.items = tuple(items)
82
+ self.n_items = len(self.items)
83
+ self.format_width = math.ceil(math.log10(self.n_items + 1))
84
+ self.xllog = get_xllog()
85
+
86
+ def _format_pinterval(self, pinterval=None):
87
+ if isinstance(pinterval, str) and pinterval.endswith('%'):
88
+ # 百分比的情况,重算出间隔元素数
89
+ return int(round(self.n_items * float(pinterval[:-1]) / 100))
90
+ else: # 其他格式暂不解析,按原格式处理
91
+ return pinterval
92
+
93
+ def _step1_check_number(self, pinterval, func):
94
+ if pinterval:
95
+ sys.stdout.flush() # 让逻辑在前的标准输出先print出来,但其实这句也不一定能让print及时输出的~~可能会被日志提前抢输出了
96
+ self.xllog.info(f"使用 {func.__name__} 处理 {self.n_items} 个数据 {shorten(str(self.items), 30)}")
97
+
98
+ def _step2_check_range(self, start, end):
99
+ if start:
100
+ self.xllog.info(f"使用start参数,只处理≥{start}的条目")
101
+ else:
102
+ start = 0
103
+ if end:
104
+ # 这里空格是为了对齐,别删
105
+ self.xllog.info(f"使用 end 参数,只处理<{end}的条目")
106
+ else:
107
+ end = len(self.items)
108
+ return start, end
109
+
110
+ def _step3_executor(self, pinterval, max_workers):
111
+ if max_workers == 1:
112
+ # workers=1,实际上并不用多线程,用一个假的多线程类代替,能大大提速
113
+ executor = EmptyPoolExecutor()
114
+ # executor = concurrent.futures.ThreadPoolExecutor(max_workers)
115
+ else:
116
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers)
117
+ if pinterval:
118
+ self.xllog.info(f'多线程执行,当前迭代所用线程数:{executor._max_workers}')
119
+ return executor
120
+
121
+ def _step4_iter(self, i, pinterval, executor):
122
+ # 队列中没有新任务时,才放入新任务,这样能确保pinterval的输出能反应实时情况,而不是一下全部进入队列,把for循环跑完了
123
+ while executor._work_queue.qsize(): pass
124
+ if pinterval and (i or pinterval == 1) and i % pinterval == 0:
125
+ message = f' {self.items[i]}' if pinterval == 1 else ''
126
+ self.xllog.info(f'{i:{self.format_width}d}/{self.n_items}={i / self.n_items:6.2%}{message}')
127
+
128
+ def _step5_finish(self, pinterval, interrupt, start_time):
129
+ from humanfriendly import format_timespan
130
+ end_time = time.time()
131
+ span = end_time - start_time
132
+ if span:
133
+ speed = self.n_items / span
134
+ msg = f'总用时:{format_timespan(span)},速度:{speed:.2f}it/s'
135
+ else:
136
+ msg = f'总用时:{format_timespan(span)}'
137
+ if not interrupt and pinterval:
138
+ self.xllog.info(f'{self.n_items / self.n_items:6.2%} 完成迭代,{msg}')
139
+ sys.stderr.flush()
140
+
141
+ def run(self, func, start=0, end=None, pinterval=None, max_workers=1, interrupt=True):
142
+ """
143
+ :param func: 对每个item执行的功能
144
+ :param start: 跳过<start的数据,只处理>=start编号以上
145
+ :param end: 只处理 < end 的数据
146
+ :param pinterval: 每隔多少条目输出进度日志,默认不输出进度日志(但是错误日志依然会输出)
147
+ 支持按百分比进度显示,例如每20%,pinterval='20%',不过一些底层实现机制原因,会有些许误差
148
+ TODO 支持按指定时间间隔显示? 例如每15秒,pinterval='15s' 感觉这种功能太花哨了,没必要搞
149
+ :param max_workers: 默认线程数,默认1,即串行
150
+ :type max_workers: int, None
151
+ :param interrupt: 出现错误时是否中断,默认True会终止程序,否则只会输出错误日志
152
+ :return:
153
+ """
154
+
155
+ # 1 统一的参数处理部分
156
+ pinterval = self._format_pinterval(pinterval)
157
+ self._step1_check_number(pinterval, func)
158
+ start, end = self._step2_check_range(start, end)
159
+ error = False
160
+ executor = self._step3_executor(pinterval, max_workers)
161
+
162
+ # 2 封装的子处理部分
163
+ def wrap_func(func, i):
164
+ nonlocal error
165
+ item = self.items[i]
166
+ try:
167
+ func(item)
168
+ except Exception as e:
169
+ error = e
170
+ self.xllog.error(f'💔idx={i}运行出错:{item}\n{format_exception(e)}')
171
+
172
+ # 3 执行迭代
173
+ start_time = time.time()
174
+ for i in range(start, end):
175
+ self._step4_iter(i, pinterval, executor)
176
+ executor.submit(wrap_func, func, i)
177
+ if interrupt and error:
178
+ raise error
179
+ executor.shutdown() # 必须等executor结束,error才是准确的
180
+ self._step5_finish(pinterval, interrupt and error, start_time)
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2024/10/30
6
+
7
+
8
+ import os
9
+ import json
10
+ import base64
11
+
12
+ from pyxllib.text.newbie import add_quote
13
+
14
+
15
+ class XlOsEnv:
16
+ """ pyxllib库自带的一套环境变量数据解析类
17
+
18
+ 会将json的字符串值,或者普通str,存储到环境变量中
19
+
20
+ 环境变量也可以用来实现全局变量的信息传递,虽然不太建议这样做
21
+
22
+ >> XlOsEnv.persist_set('TP10_ACCOUNT',
23
+ {'server': '172.16.250.250', 'port': 22, 'user': 'ckz', 'passwd': '123456'},
24
+ True)
25
+ >> print(XlOsEnv.get('TP10_ACCOUNT'), True) # 展示存储的账号信息
26
+ eyJzZXJ2ZXIiOiAiMTcyLjE2LjE3MC4xMzQiLCAicG9ydCI6IDIyLCAidXNlciI6ICJjaGVua3VuemUiLCAicGFzc3dkIjogImNvZGV4bHByIn0=
27
+ >> XlOsEnv.unset('TP10_ACCOUNT')
28
+ """
29
+
30
+ @classmethod
31
+ def get(cls, name, *, decoding=False):
32
+ """ 获取环境变量值
33
+
34
+ :param name: 环境变量名
35
+ :param decoding: 是否需要先进行base64解码
36
+ :return:
37
+ 返回json解析后的数据
38
+ 或者普通的字符串值
39
+ """
40
+ value = os.getenv(name, None)
41
+ if value is None:
42
+ return value
43
+
44
+ if decoding:
45
+ value = base64.b64decode(value.encode())
46
+
47
+ try:
48
+ return json.loads(value)
49
+ except json.decoder.JSONDecodeError:
50
+ if isinstance(value, bytes):
51
+ return value.decode()
52
+ else:
53
+ return value
54
+
55
+ @classmethod
56
+ def set(cls, name, value, encoding=False):
57
+ """ 临时改变环境变量
58
+
59
+ :param name: 环境变量名
60
+ :param value: 要存储的值
61
+ :param encoding: 是否将内容转成base64后,再存储环境变量
62
+ 防止一些密码信息,明文写出来太容易泄露
63
+ 不过这个策略也很容易被破解;只防君子,难防小人
64
+
65
+ 当然,谁看到这有闲情功夫的话,可以考虑做一套更复杂的加密系统
66
+ 并且encoding支持多种不同的解加密策略,这样单看环境变量值就很难破译了
67
+ :return: str, 最终存储的字符串内容
68
+ """
69
+ # 1 打包
70
+ if isinstance(value, str):
71
+ value = add_quote(value)
72
+ else:
73
+ value = json.dumps(value)
74
+
75
+ # 2 编码
76
+ if encoding:
77
+ value = base64.b64encode(value.encode()).decode()
78
+
79
+ # 3 存储到环境变量
80
+ os.environ[name] = value
81
+
82
+ return value
83
+
84
+ @classmethod
85
+ def persist_set(cls, name, value, encoding=False, *, cfgfile=None):
86
+ """ python里默认是改不了系统变量的,需要使用一些特殊手段
87
+ https://stackoverflow.com/questions/17657686/is-it-possible-to-set-an-environment-variable-from-python-permanently/17657905
88
+
89
+ :param cfgfile: 在linux系统时,可以使用该参数
90
+ 默认是把环境变量写入 ~/.bashrc,可以考虑写到
91
+ TODO 有这个设想,但很不好实现,不是很关键的功能,所以还未开发
92
+
93
+ """
94
+ # 写入环境变量这里是有点小麻烦的,要考虑unix和windows不同平台,以及怎么持久化存储的问题,这里直接调用一个三方库来解决
95
+ from envariable import setenv
96
+
97
+ value = cls.set(name, value, encoding)
98
+ if value[0] == value[-1] == '"':
99
+ value = '\\' + value + '\\'
100
+ setenv(name, value)
101
+
102
+ return value
103
+
104
+ @classmethod
105
+ def unset(cls, name):
106
+ """ 删除环境变量 """
107
+ from envariable import unsetenv
108
+ unsetenv(name)
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2021/01/13 15:29
6
+
7
+ """
8
+ 对标准库或一些第三方库,进行的功能扩展
9
+ 也有可能对一些bug进行了修改
10
+
11
+ 有些是小的库,直接把源码搬过来了
12
+ 有些是较大的库,仍然要(会自动在需要使用时 pip install)安装
13
+
14
+ 改了底层标准库一些功能,修复一些bug,或者提升功能兼容性、强度
15
+
16
+ onepy: 做了些中文注解,其他修改了啥我也忘了~~可能是有改源码功能的
17
+ """
@@ -2,11 +2,9 @@
2
2
  # -*- coding: utf-8 -*-
3
3
  # @Author : 陈坤泽
4
4
  # @Email : 877362867@qq.com
5
- # @Data : 2020/06/02 20:00
5
+ # @Date : 2020/06/02 20:00
6
6
 
7
7
 
8
8
  """
9
9
  from https://github.com/martsberger/tablepyxl
10
10
  """
11
-
12
-
@@ -2,7 +2,7 @@
2
2
  # -*- coding: utf-8 -*-
3
3
  # @Author : 陈坤泽
4
4
  # @Email : 877362867@qq.com
5
- # @Data : 2020/06/02 19:57
5
+ # @Date : 2020/06/02 19:57
6
6
 
7
7
  """
8
8
  tablepyxl.style的代码
@@ -2,20 +2,18 @@
2
2
  # -*- coding: utf-8 -*-
3
3
  # @Author : 陈坤泽
4
4
  # @Email : 877362867@qq.com
5
- # @Data : 2020/06/02 19:57
5
+ # @Date : 2020/06/02 19:57
6
6
 
7
7
 
8
8
  # Do imports like python3 so our package works for 2 and 3
9
9
  from __future__ import absolute_import
10
10
 
11
- import subprocess
12
-
13
11
  from lxml import html
14
12
 
15
13
  from openpyxl import Workbook
16
14
  from openpyxl.utils import get_column_letter
17
15
  from premailer import Premailer
18
- from pyxllib.util.tablepyxl.style import Table
16
+ from pyxllib.stdlib.tablepyxl.style import Table
19
17
 
20
18
 
21
19
  def string_to_int(s):
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2020/12/08 15:33
6
+
7
+ """ 一些文本处理功能 """
8
+
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2021/06/06 16:57
6
+
7
+ from pyxllib.prog.pupil import check_install_package
8
+
9
+ check_install_package('ahocorasick', 'pyahocorasick')
10
+
11
+ from collections import Counter
12
+ import re
13
+
14
+ import ahocorasick
15
+
16
+
17
+ def make_automaton(words):
18
+ """ 根据输入的一串words模式,生成一个AC自动机 """
19
+ a = ahocorasick.Automaton()
20
+ for index, word in enumerate(words):
21
+ a.add_word(word, (index, word))
22
+ a.make_automaton()
23
+ return a
24
+
25
+
26
+ def count_words(content, word, scope=2, exclude=None):
27
+ # 1 统计所有词汇出现次数
28
+ c = Counter()
29
+ c += Counter(re.findall(f'.{{,{scope}}}{word}.{{,{scope}}}', content))
30
+ # 2 排除掉不处理的词 (注意因为这里每句话都已经是被筛选过的,所以处理比较简单,并不需要复杂到用区间集处理)
31
+ if exclude:
32
+ new_c = Counter()
33
+ a = make_automaton(exclude) # 创建AC自动机
34
+ for k in c.keys():
35
+ if not next(a.iter(k), None):
36
+ # 如果k没匹配到需要排除的词汇,则拷贝到新的计数器
37
+ new_c[k] = c[k]
38
+ c = new_c
39
+ return c