pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +9 -2
- pyxllib/algo/__init__.py +8 -0
- pyxllib/algo/disjoint.py +54 -0
- pyxllib/algo/geo.py +541 -0
- pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
- pyxllib/algo/matcher.py +389 -0
- pyxllib/algo/newbie.py +166 -0
- pyxllib/algo/pupil.py +629 -0
- pyxllib/algo/shapelylib.py +67 -0
- pyxllib/algo/specialist.py +241 -0
- pyxllib/algo/stat.py +494 -0
- pyxllib/algo/treelib.py +149 -0
- pyxllib/algo/unitlib.py +66 -0
- pyxllib/autogui/__init__.py +5 -0
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/autogui/autogui.py +852 -0
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/virtualkey.py +102 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/__init__.py +1 -11
- pyxllib/cv/expert.py +267 -0
- pyxllib/cv/{imlib.py → imfile.py} +18 -83
- pyxllib/cv/imhash.py +39 -0
- pyxllib/cv/pupil.py +9 -0
- pyxllib/cv/rgbfmt.py +1525 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/cv/trackbartools.py +163 -49
- pyxllib/cv/xlcvlib.py +1040 -0
- pyxllib/cv/xlpillib.py +423 -0
- pyxllib/data/__init__.py +0 -0
- pyxllib/data/echarts.py +240 -0
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/{util/oss2_.py → data/oss.py} +11 -9
- pyxllib/data/pglib.py +1127 -0
- pyxllib/data/sqlite.py +568 -0
- pyxllib/{util → data}/sqllib.py +13 -31
- pyxllib/ext/JLineViewer.py +505 -0
- pyxllib/ext/__init__.py +6 -0
- pyxllib/{util → ext}/demolib.py +119 -35
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +12 -0
- pyxllib/{util/main.py → ext/old.py} +122 -284
- pyxllib/ext/qt.py +449 -0
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/seleniumlib.py +76 -0
- pyxllib/{util/tklib.py → ext/tk.py} +10 -11
- pyxllib/ext/unixlib.py +827 -0
- pyxllib/ext/utools.py +351 -0
- pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
- pyxllib/ext/win32lib.py +40 -0
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1105 -0
- pyxllib/file/__init__.py +17 -0
- pyxllib/file/docxlib.py +761 -0
- pyxllib/{util → file}/gitlib.py +40 -27
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +148 -0
- pyxllib/file/newbie.py +10 -0
- pyxllib/file/onenotelib.py +1469 -0
- pyxllib/file/packlib/__init__.py +330 -0
- pyxllib/{util → file/packlib}/zipfile.py +598 -195
- pyxllib/file/pdflib.py +426 -0
- pyxllib/file/pupil.py +185 -0
- pyxllib/file/specialist/__init__.py +685 -0
- pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
- pyxllib/file/specialist/download.py +193 -0
- pyxllib/file/specialist/filelib.py +2829 -0
- pyxllib/file/xlsxlib.py +3131 -0
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/__init__.py +5 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/deprecatedlib.py +233 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/ipyexec.py +253 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +451 -0
- pyxllib/prog/pupil.py +1197 -0
- pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
- pyxllib/prog/specialist/__init__.py +391 -0
- pyxllib/prog/specialist/bc.py +203 -0
- pyxllib/prog/specialist/browser.py +497 -0
- pyxllib/prog/specialist/common.py +347 -0
- pyxllib/prog/specialist/datetime.py +199 -0
- pyxllib/prog/specialist/tictoc.py +240 -0
- pyxllib/prog/specialist/xllog.py +180 -0
- pyxllib/prog/xlosenv.py +108 -0
- pyxllib/stdlib/__init__.py +17 -0
- pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
- pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
- pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
- pyxllib/text/__init__.py +8 -0
- pyxllib/text/ahocorasick.py +39 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +121 -0
- pyxllib/text/jiebalib.py +267 -0
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +922 -0
- pyxllib/text/latex/__init__.py +158 -0
- pyxllib/text/levenshtein.py +303 -0
- pyxllib/text/nestenv.py +1215 -0
- pyxllib/text/newbie.py +300 -0
- pyxllib/text/pupil/__init__.py +8 -0
- pyxllib/text/pupil/common.py +1121 -0
- pyxllib/text/pupil/xlalign.py +326 -0
- pyxllib/text/pycode.py +47 -0
- pyxllib/text/specialist/__init__.py +8 -0
- pyxllib/text/specialist/common.py +112 -0
- pyxllib/text/specialist/ptag.py +186 -0
- pyxllib/text/spellchecker.py +172 -0
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/vbacode.py +17 -0
- pyxllib/text/xmllib.py +747 -0
- pyxllib/xl.py +39 -0
- pyxllib/xlcv.py +17 -0
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
- pyxllib/basic/_1_strlib.py +0 -945
- pyxllib/basic/_2_timelib.py +0 -488
- pyxllib/basic/_3_pathlib.py +0 -916
- pyxllib/basic/_4_loglib.py +0 -419
- pyxllib/basic/__init__.py +0 -54
- pyxllib/basic/arrow_.py +0 -250
- pyxllib/basic/chardet_.py +0 -66
- pyxllib/basic/dirlib.py +0 -529
- pyxllib/basic/dprint.py +0 -202
- pyxllib/basic/extension.py +0 -12
- pyxllib/basic/judge.py +0 -31
- pyxllib/basic/log.py +0 -204
- pyxllib/basic/pathlib_.py +0 -705
- pyxllib/basic/pytictoc.py +0 -102
- pyxllib/basic/qiniu_.py +0 -61
- pyxllib/basic/strlib.py +0 -761
- pyxllib/basic/timer.py +0 -132
- pyxllib/cv/cv.py +0 -834
- pyxllib/cv/cvlib/_1_geo.py +0 -543
- pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
- pyxllib/cv/cvlib/_2_imgproc.py +0 -594
- pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
- pyxllib/cv/cvlib/_4_cvimg.py +0 -211
- pyxllib/cv/cvlib/__init__.py +0 -10
- pyxllib/cv/debugtools.py +0 -82
- pyxllib/cv/fitz_.py +0 -300
- pyxllib/cv/installer.py +0 -42
- pyxllib/debug/_0_installer.py +0 -38
- pyxllib/debug/_1_typelib.py +0 -277
- pyxllib/debug/_2_chrome.py +0 -198
- pyxllib/debug/_3_showdir.py +0 -161
- pyxllib/debug/_4_bcompare.py +0 -140
- pyxllib/debug/__init__.py +0 -49
- pyxllib/debug/bcompare.py +0 -132
- pyxllib/debug/chrome.py +0 -198
- pyxllib/debug/installer.py +0 -38
- pyxllib/debug/showdir.py +0 -158
- pyxllib/debug/typelib.py +0 -278
- pyxllib/image/__init__.py +0 -12
- pyxllib/torch/__init__.py +0 -20
- pyxllib/torch/modellib.py +0 -37
- pyxllib/torch/trainlib.py +0 -344
- pyxllib/util/__init__.py +0 -20
- pyxllib/util/aip_.py +0 -141
- pyxllib/util/casiadb.py +0 -59
- pyxllib/util/excellib.py +0 -495
- pyxllib/util/filelib.py +0 -612
- pyxllib/util/jsondata.py +0 -27
- pyxllib/util/jsondata2.py +0 -92
- pyxllib/util/labelmelib.py +0 -139
- pyxllib/util/onepy/__init__.py +0 -29
- pyxllib/util/onepy/onepy.py +0 -574
- pyxllib/util/onepy/onmanager.py +0 -170
- pyxllib/util/pyautogui_.py +0 -219
- pyxllib/util/textlib.py +0 -1305
- pyxllib/util/unorder.py +0 -22
- pyxllib/util/xmllib.py +0 -639
- pyxllib-0.0.43.dist-info/METADATA +0 -39
- pyxllib-0.0.43.dist-info/RECORD +0 -80
- pyxllib-0.0.43.dist-info/top_level.txt +0 -1
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/basic/strlib.py
DELETED
@@ -1,761 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Data : 2020/06/01
|
6
|
-
|
7
|
-
|
8
|
-
import collections
|
9
|
-
import copy
|
10
|
-
import io
|
11
|
-
import logging
|
12
|
-
import math
|
13
|
-
import pprint
|
14
|
-
import re
|
15
|
-
import sys
|
16
|
-
|
17
|
-
from pyxllib.basic.dprint import dprint
|
18
|
-
|
19
|
-
|
20
|
-
def strfind(fullstr, objstr, *, start=None, times=0, overlap=False):
|
21
|
-
r"""进行强大功能扩展的的字符串查找函数
|
22
|
-
|
23
|
-
TODO 性能有待优化
|
24
|
-
|
25
|
-
:param fullstr: 原始完整字符串
|
26
|
-
>>> strfind('aabbaabb', 'bb') # 函数基本用法
|
27
|
-
2
|
28
|
-
|
29
|
-
:param objstr: 需要查找的目标字符串,可以是一个list或tuple
|
30
|
-
TODO 有空看下AC自动机,看这里是否可以优化提速,或者找现成的库接口
|
31
|
-
>>> strfind('bbaaaabb', 'bb') # 查找第1次出现的位置
|
32
|
-
0
|
33
|
-
>>> strfind('aabbaabb', 'bb', times=1) # 查找第2次出现的位置
|
34
|
-
6
|
35
|
-
>>> strfind('aabbaabb', 'cc') # 不存在时返回-1
|
36
|
-
-1
|
37
|
-
>>> strfind('aabbaabb', ['aa', 'bb'], times=2)
|
38
|
-
4
|
39
|
-
|
40
|
-
:param start: 起始查找位置。默认值为0,当times<0时start的默认值为-1。
|
41
|
-
>>> strfind('aabbaabb', 'bb', start=2) # 恰好在起始位置
|
42
|
-
2
|
43
|
-
>>> strfind('aabbaabb', 'bb', start=3)
|
44
|
-
6
|
45
|
-
>>> strfind('aabbaabb', ['aa', 'bb'], start=5)
|
46
|
-
6
|
47
|
-
|
48
|
-
:param times: 定位第几次出现的位置,默认值为0,即从前往后第1次出现的位置。
|
49
|
-
如果是负数,则反向查找,并返回的是目标字符串的起始位置。
|
50
|
-
>>> strfind('aabbaabb', 'aa', times=-1)
|
51
|
-
4
|
52
|
-
>>> strfind('aabbaabb', 'aa', start=5, times=-1)
|
53
|
-
4
|
54
|
-
>>> strfind('aabbaabb', 'aa', start=3, times=-1)
|
55
|
-
0
|
56
|
-
>>> strfind('aabbaabb', 'bb', start=7, times=-1)
|
57
|
-
6
|
58
|
-
|
59
|
-
:param overlap: 重叠情况是否重复计数
|
60
|
-
>>> strfind('aaaa', 'aa', times=1) # 默认不计算重叠部分
|
61
|
-
2
|
62
|
-
>>> strfind('aaaa', 'aa', times=1, overlap=True)
|
63
|
-
1
|
64
|
-
|
65
|
-
>>> strfind(r'\item=\item+', (r'\item', r'\test'), start=1)
|
66
|
-
6
|
67
|
-
"""
|
68
|
-
|
69
|
-
def nonnegative_min_value(*arr):
|
70
|
-
"""计算出最小非负整数,如果没有非负数,则返回-1"""
|
71
|
-
arr = tuple(filter(lambda x: x >= 0, arr))
|
72
|
-
return min(arr) if arr else -1
|
73
|
-
|
74
|
-
def nonnegative_max_value(*arr):
|
75
|
-
"""计算出最大非负整数,如果没有非负数,则返回-1"""
|
76
|
-
arr = tuple(filter(lambda x: x >= 0, arr))
|
77
|
-
return max(arr) if arr else -1
|
78
|
-
|
79
|
-
# 1 根据times不同,start的初始默认值设置方式也不同
|
80
|
-
if times < 0 and start is None:
|
81
|
-
start = len(fullstr) - 1 # 反向查找start设到末尾字符-1
|
82
|
-
if start is None:
|
83
|
-
start = 0 # 正向查找start设为0
|
84
|
-
p = -1 # 记录答案位置,默认找不到
|
85
|
-
|
86
|
-
# 2 单串匹配
|
87
|
-
if isinstance(objstr, str): # 单串匹配
|
88
|
-
offset = 1 if overlap else len(objstr) # overlap影响每次偏移量
|
89
|
-
|
90
|
-
# A、正向查找
|
91
|
-
if times >= 0:
|
92
|
-
p = start - offset
|
93
|
-
for _ in range(times + 1):
|
94
|
-
p = fullstr.find(objstr, p + offset)
|
95
|
-
if p == -1:
|
96
|
-
return -1
|
97
|
-
|
98
|
-
# B、反向查找
|
99
|
-
else:
|
100
|
-
p = start + offset + 1
|
101
|
-
for _ in range(-times):
|
102
|
-
p = fullstr.rfind(objstr, 0, p - offset)
|
103
|
-
if p == -1:
|
104
|
-
return -1
|
105
|
-
|
106
|
-
# 3 多模式匹配(递归调用,依赖单串匹配功能)
|
107
|
-
else:
|
108
|
-
# A、正向查找
|
109
|
-
if times >= 0:
|
110
|
-
p = start - 1
|
111
|
-
for _ in range(times + 1):
|
112
|
-
# 把每个目标串都找一遍下一次出现的位置,取最近的一个
|
113
|
-
# 因为只找第一次出现的位置,所以overlap参数传不传都没有影响
|
114
|
-
# TODO 需要进行性能对比分析,有必要的话后续可以改AC自动机实现多模式匹配
|
115
|
-
ls = tuple(map(lambda x: strfind(fullstr, x, start=p + 1, overlap=overlap), objstr))
|
116
|
-
p = nonnegative_min_value(*ls)
|
117
|
-
if p == -1:
|
118
|
-
return -1
|
119
|
-
|
120
|
-
# B、反向查找
|
121
|
-
else:
|
122
|
-
p = start + 1
|
123
|
-
for _ in range(-times): # 需要循环处理的次数
|
124
|
-
# 使用map对每个要查找的目标调用strfind
|
125
|
-
ls = tuple(map(lambda x: strfind(fullstr, x, start=p - 1, times=-1, overlap=overlap), objstr))
|
126
|
-
p = nonnegative_max_value(*ls)
|
127
|
-
if p == -1:
|
128
|
-
return -1
|
129
|
-
|
130
|
-
return p
|
131
|
-
|
132
|
-
|
133
|
-
def natural_sort_key(key):
|
134
|
-
def convert(text):
|
135
|
-
return int(text) if text.isdigit() else text.lower()
|
136
|
-
|
137
|
-
return [convert(c) for c in re.split('([0-9]+)', str(key))]
|
138
|
-
|
139
|
-
|
140
|
-
def natural_sort(ls, only_use_digits=False):
|
141
|
-
""" 自然排序
|
142
|
-
|
143
|
-
:param only_use_digits: 正常会用数字作为分隔,切割每一部分进行比较
|
144
|
-
如果只想比较数值部分,可以only_use_digits=True
|
145
|
-
"""
|
146
|
-
if only_use_digits:
|
147
|
-
def func(key):
|
148
|
-
return [int(c) for c in re.split('([0-9]+)', str(key)) if c.isdigit()]
|
149
|
-
else:
|
150
|
-
func = natural_sort_key
|
151
|
-
return sorted(ls, key=func)
|
152
|
-
|
153
|
-
|
154
|
-
def typename(c):
|
155
|
-
""" 简化输出的type类型
|
156
|
-
|
157
|
-
>>> typename(123)
|
158
|
-
'int'
|
159
|
-
"""
|
160
|
-
return str(type(c))[8:-2]
|
161
|
-
|
162
|
-
|
163
|
-
____str = """
|
164
|
-
文本处理相关功能
|
165
|
-
"""
|
166
|
-
|
167
|
-
|
168
|
-
class StrDecorator:
|
169
|
-
"""将函数的返回值字符串化,仅调用朴素的str字符串化
|
170
|
-
|
171
|
-
装饰器开发可参考: https://mp.weixin.qq.com/s/Om98PpncG52Ba1ZQ8NIjLA
|
172
|
-
"""
|
173
|
-
|
174
|
-
def __init__(self, func):
|
175
|
-
self.func = func # 使用self.func可以索引回原始函数名称
|
176
|
-
self.last_raw_res = None # last raw result,上一次执行函数的原始结果
|
177
|
-
|
178
|
-
def __call__(self, *args, **kwargs):
|
179
|
-
self.last_raw_res = self.func(*args, **kwargs)
|
180
|
-
return str(self.last_raw_res)
|
181
|
-
|
182
|
-
|
183
|
-
class PrintDecorator:
|
184
|
-
"""将函数返回结果直接输出"""
|
185
|
-
|
186
|
-
def __init__(self, func):
|
187
|
-
self.func = func
|
188
|
-
|
189
|
-
def __call__(self, *args, **kwargs):
|
190
|
-
s = self.func(*args, **kwargs)
|
191
|
-
print(s)
|
192
|
-
return s # 输出后仍然会返回原函数运行值
|
193
|
-
|
194
|
-
|
195
|
-
def realign(text, least_blank=4, tab2blank=4, support_chinese=False, sep=None):
|
196
|
-
r"""
|
197
|
-
:param text: 一段文本
|
198
|
-
支持每行列数不同
|
199
|
-
:param least_blank: 每列最少间距空格数
|
200
|
-
:param tab2blank:
|
201
|
-
:param support_chinese: 支持中文域宽计算
|
202
|
-
:param sep: 每列分隔符,默认为least_blank个空格
|
203
|
-
:return: 对齐美化的一段文本
|
204
|
-
|
205
|
-
>>> realign(' Aget keep hold show\nmaking selling giving collecting')
|
206
|
-
'Aget keep hold show\nmaking selling giving collecting'
|
207
|
-
"""
|
208
|
-
# 1 预处理
|
209
|
-
s = text.replace('\t', ' ' * tab2blank)
|
210
|
-
s = re.sub(' {' + str(least_blank) + ',}', r'\t', s) # 统一用\t作为分隔符
|
211
|
-
lenfunc = strwidth if support_chinese else len
|
212
|
-
if sep is None: sep = ' ' * least_blank
|
213
|
-
|
214
|
-
# 2 计算出每一列的最大宽度
|
215
|
-
lines = s.splitlines()
|
216
|
-
n = len(lines)
|
217
|
-
max_width = GrowingList() # 因为不知道有多少列,用自增长的list来存储每一列的最大宽度
|
218
|
-
for i, line in enumerate(lines):
|
219
|
-
line = line.strip().split('\t')
|
220
|
-
m = len(line)
|
221
|
-
for j in range(m): max_width[j] = max(max_width[j] or 0, lenfunc(line[j]))
|
222
|
-
lines[i] = line
|
223
|
-
if len(max_width) == 1: return '\n'.join(map(lambda x: x[0], lines))
|
224
|
-
|
225
|
-
# 3 重组内容
|
226
|
-
for i, line in enumerate(lines):
|
227
|
-
for j in range(len(line) - 1): line[j] += ' ' * (max_width[j] - lenfunc(line[j])) # 注意最后一列就不用加空格了
|
228
|
-
lines[i] = sep.join(line)
|
229
|
-
return '\n'.join(lines)
|
230
|
-
|
231
|
-
|
232
|
-
class Stdout:
|
233
|
-
"""重定向标准输出流,切换print标准输出位置
|
234
|
-
|
235
|
-
使用with语法调用
|
236
|
-
"""
|
237
|
-
|
238
|
-
def __init__(self, path=None, mode='w'):
|
239
|
-
"""
|
240
|
-
:param path: 可选参数
|
241
|
-
如果是一个合法的文件名,在__exit__时,会将结果写入文件
|
242
|
-
如果不合法不报错,只是没有功能效果
|
243
|
-
:param mode: 写入模式
|
244
|
-
'w': 默认模式,直接覆盖写入
|
245
|
-
'a': 追加写入
|
246
|
-
"""
|
247
|
-
self.origin_stdout = sys.stdout
|
248
|
-
self._path = path
|
249
|
-
self._mode = mode
|
250
|
-
self.strout = io.StringIO()
|
251
|
-
self.result = None
|
252
|
-
|
253
|
-
def __enter__(self):
|
254
|
-
sys.stdout = self.strout
|
255
|
-
return self
|
256
|
-
|
257
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
258
|
-
sys.stdout = self.origin_stdout
|
259
|
-
self.result = str(self)
|
260
|
-
|
261
|
-
# 如果输入的是一个合法的文件名,则将中间结果写入
|
262
|
-
if not self._path:
|
263
|
-
return
|
264
|
-
|
265
|
-
try:
|
266
|
-
with open(self._path, self._mode) as f:
|
267
|
-
f.write(self.result)
|
268
|
-
except TypeError as e:
|
269
|
-
logging.exception(e)
|
270
|
-
except FileNotFoundError as e:
|
271
|
-
logging.exception(e)
|
272
|
-
|
273
|
-
self.strout.close()
|
274
|
-
|
275
|
-
def __str__(self):
|
276
|
-
"""在这个期间获得的文本内容"""
|
277
|
-
if self.result:
|
278
|
-
return self.result
|
279
|
-
else:
|
280
|
-
return self.strout.getvalue()
|
281
|
-
|
282
|
-
|
283
|
-
def strwidth(s):
|
284
|
-
"""string width
|
285
|
-
中英字符串实际宽度
|
286
|
-
>>> strwidth('ab')
|
287
|
-
2
|
288
|
-
>>> strwidth('a⑪中⑩')
|
289
|
-
7
|
290
|
-
|
291
|
-
⑩等字符的宽度还是跟字体有关的,不过在大部分地方好像都是域宽2,目前算法问题不大
|
292
|
-
"""
|
293
|
-
try:
|
294
|
-
res = len(s.encode('gbk'))
|
295
|
-
except UnicodeEncodeError:
|
296
|
-
count = len(s)
|
297
|
-
for x in s:
|
298
|
-
if ord(x) > 127:
|
299
|
-
count += 1
|
300
|
-
res = count
|
301
|
-
return res
|
302
|
-
|
303
|
-
|
304
|
-
def strwidth_proc(s, fmt='r', chinese_char_width=1.8):
|
305
|
-
""" 此函数主要用于每个汉字域宽是w=1.8的情况
|
306
|
-
|
307
|
-
为了让字符串域宽为一个整数,需要补充中文空格,会对原始字符串进行修改。
|
308
|
-
故返回值有2个,第1个是修正后的字符串s,第2个是实际宽度w。
|
309
|
-
|
310
|
-
:param s: 一个字符串
|
311
|
-
:param fmt: 目标对齐格式
|
312
|
-
:param chinese_char_width: 每个汉字字符宽度
|
313
|
-
:return: (s, w)
|
314
|
-
s: 修正后的字符串值s
|
315
|
-
w: 修正后字符串的实际宽度
|
316
|
-
|
317
|
-
>>> strwidth_proc('哈哈a')
|
318
|
-
(' 哈哈a', 10)
|
319
|
-
"""
|
320
|
-
# 1 计算一些参数值
|
321
|
-
s = str(s) # 确保是字符串类型
|
322
|
-
l1 = len(s)
|
323
|
-
l2 = strwidth(s)
|
324
|
-
y = l2 - l1 # 中文字符数
|
325
|
-
x = l1 - y # 英文字符数
|
326
|
-
# ch = chr(12288) # 中文空格
|
327
|
-
ch = chr(12288) # 中文空格
|
328
|
-
w = x + y * chinese_char_width # 当前字符串宽度
|
329
|
-
# 2 计算需要补充t个中文空格
|
330
|
-
error = 0.05 # 允许误差范围
|
331
|
-
t = 0 # 需要补充中文字符数
|
332
|
-
while error < w % 1 < 1 - error: # 小数部分超过误差
|
333
|
-
t += 1
|
334
|
-
w += chinese_char_width
|
335
|
-
# 3 补充中文字符
|
336
|
-
if t:
|
337
|
-
if fmt == 'r':
|
338
|
-
s = ch * t + s
|
339
|
-
elif fmt == 'l':
|
340
|
-
s = s + ch * t
|
341
|
-
else:
|
342
|
-
s = ch * (t - t // 2) + s + ch * (t // 2)
|
343
|
-
return s, int(w)
|
344
|
-
|
345
|
-
|
346
|
-
def listalign(ls, fmt='r', *, width=None, fillchar=' ', prefix='', suffix='', chinese_char_width=2):
|
347
|
-
"""文档: https://blog.csdn.net/code4101/article/details/80985218(不过文档有些过时了)
|
348
|
-
listalign列表对齐
|
349
|
-
py3中str的len是计算字符数量,例如len('ab') --> 2, len('a中b') --> 3。
|
350
|
-
但在对齐等操作中,是需要将每个汉字当成宽度2来处理,计算字符串实际宽度的。
|
351
|
-
所以我们需要开发一个strwidth函数,效果: strwidth('ab') --> 2,strwidth('a中b') --> 4。
|
352
|
-
|
353
|
-
:param ls:
|
354
|
-
要处理的列表,会对所有元素调用str处理,确保全部转为string类型
|
355
|
-
且会将换行符转为\n显示
|
356
|
-
:param fmt: (format)
|
357
|
-
l: left,左对齐
|
358
|
-
c: center,居中
|
359
|
-
r: right,右对齐
|
360
|
-
多个字符: 扩展fmt长度跟ls一样,每一个元素单独设置对齐格式。如果fmt长度小于ls,则扩展的格式按照fmt[-1]设置
|
361
|
-
:param width:
|
362
|
-
None或者设置值小于最长字符串: 不设域宽,直接按照最长的字符串为准
|
363
|
-
:param fillchar: 填充字符
|
364
|
-
:param prefix: 添加前缀
|
365
|
-
:param suffix: 添加后缀
|
366
|
-
:param chinese_char_width: 每个汉字字符宽度
|
367
|
-
|
368
|
-
:return:
|
369
|
-
对齐后的数组ls,每个元素会转为str类型
|
370
|
-
|
371
|
-
>>> listalign(['a', '哈哈', 'ccd'])
|
372
|
-
[' a', '哈哈', ' ccd']
|
373
|
-
>>> listalign(['a', '哈哈', 'ccd'], chinese_char_width=1.8)
|
374
|
-
[' a', ' 哈哈', ' ccd']
|
375
|
-
"""
|
376
|
-
# 1 处理fmt数组
|
377
|
-
if len(fmt) == 1:
|
378
|
-
fmt = [fmt] * len(ls)
|
379
|
-
elif len(fmt) < len(ls):
|
380
|
-
fmt = list(fmt) + [fmt[-1]] * (len(ls) - len(fmt))
|
381
|
-
|
382
|
-
# 2 算出需要域宽
|
383
|
-
if chinese_char_width == 2:
|
384
|
-
strs = list(map(lambda x: str(x).replace('\n', r'\n'), ls)) # 存储转成字符串的元素
|
385
|
-
lens = list(map(strwidth, strs)) # 存储每个元素的实际域宽
|
386
|
-
else:
|
387
|
-
strs = [] # 存储转成字符串的元素
|
388
|
-
lens = [] # 存储每个元素的实际域宽
|
389
|
-
for i, t in enumerate(ls):
|
390
|
-
t, n = strwidth_proc(t, fmt[i], chinese_char_width)
|
391
|
-
strs.append(t)
|
392
|
-
lens.append(n)
|
393
|
-
w = max(lens)
|
394
|
-
if width and isinstance(width, int) and width > w:
|
395
|
-
w = width
|
396
|
-
|
397
|
-
# 3 对齐操作
|
398
|
-
for i, s in enumerate(strs):
|
399
|
-
if fmt[i] == 'r':
|
400
|
-
strs[i] = fillchar * (w - lens[i]) + strs[i]
|
401
|
-
elif fmt[i] == 'l':
|
402
|
-
strs[i] = strs[i] + fillchar * (w - lens[i])
|
403
|
-
elif fmt[i] == 'c':
|
404
|
-
t = w - lens[i]
|
405
|
-
strs[i] = fillchar * (t - t // 2) + strs[i] + fillchar * (t // 2)
|
406
|
-
strs[i] = prefix + strs[i] + suffix
|
407
|
-
return strs
|
408
|
-
|
409
|
-
|
410
|
-
def len_in_dim2_min(arr):
|
411
|
-
""" 计算类List结构在第2维上的最小长度
|
412
|
-
|
413
|
-
>>> len_in_dim2([[1,1], [2], [3,3,3]])
|
414
|
-
3
|
415
|
-
|
416
|
-
>>> len_in_dim2([1, 2, 3]) # TODO 是不是应该改成0合理?但不知道牵涉到哪些功能影响
|
417
|
-
1
|
418
|
-
"""
|
419
|
-
if not isinstance(arr, (list, tuple)):
|
420
|
-
raise TypeError('类型错误,不是list构成的二维数组')
|
421
|
-
|
422
|
-
# 找出元素最多的列
|
423
|
-
column_num = math.inf
|
424
|
-
for i, item in enumerate(arr):
|
425
|
-
if isinstance(item, (list, tuple)): # 该行是一个一维数组
|
426
|
-
column_num = min(column_num, len(item))
|
427
|
-
else: # 如果不是数组,是指单个元素,当成1列处理
|
428
|
-
column_num = min(column_num, 1)
|
429
|
-
break # 只要有个1,最小长度就一定是1了
|
430
|
-
|
431
|
-
return column_num
|
432
|
-
|
433
|
-
|
434
|
-
def len_in_dim2(arr):
|
435
|
-
""" 计算类List结构在第2维上的最大长度
|
436
|
-
|
437
|
-
>>> len_in_dim2([[1,1], [2], [3,3,3]])
|
438
|
-
3
|
439
|
-
|
440
|
-
>>> len_in_dim2([1, 2, 3]) # TODO 是不是应该改成0合理?但不知道牵涉到哪些功能影响
|
441
|
-
1
|
442
|
-
"""
|
443
|
-
if not isinstance(arr, (list, tuple)):
|
444
|
-
raise TypeError('类型错误,不是list构成的二维数组')
|
445
|
-
|
446
|
-
# 找出元素最多的列
|
447
|
-
column_num = 0
|
448
|
-
for i, item in enumerate(arr):
|
449
|
-
if isinstance(item, (list, tuple)): # 该行是一个一维数组
|
450
|
-
column_num = max(column_num, len(item))
|
451
|
-
else: # 如果不是数组,是指单个元素,当成1列处理
|
452
|
-
column_num = max(column_num, 1)
|
453
|
-
|
454
|
-
return column_num
|
455
|
-
|
456
|
-
|
457
|
-
def ensure_array(arr, default_value=''):
|
458
|
-
"""对一个由list、tuple组成的二维数组,确保所有第二维的列数都相同
|
459
|
-
|
460
|
-
>>> ensure_array([[1,1], [2], [3,3,3]])
|
461
|
-
[[1, 1, ''], [2, '', ''], [3, 3, 3]]
|
462
|
-
"""
|
463
|
-
max_cols = len_in_dim2(arr)
|
464
|
-
if max_cols == 1:
|
465
|
-
return arr
|
466
|
-
dv = str(default_value)
|
467
|
-
a = [[]] * len(arr)
|
468
|
-
for i, ls in enumerate(arr):
|
469
|
-
if isinstance(ls, (list, tuple)):
|
470
|
-
t = list(arr[i])
|
471
|
-
else:
|
472
|
-
t = [ls] # 如果不是数组,是指单个元素,当成1列处理
|
473
|
-
a[i] = t + [dv] * (max_cols - len(t)) # 左边的写list,是防止有的情况是tuple,要强制转list后拼接
|
474
|
-
return a
|
475
|
-
|
476
|
-
|
477
|
-
def swap_rowcol(a, *, ensure_arr=False, default_value=''):
|
478
|
-
"""矩阵行列互换
|
479
|
-
|
480
|
-
注:如果列数是不均匀的,则会以最小列数作为行数
|
481
|
-
|
482
|
-
>>> swap_rowcol([[1,2,3], [4,5,6]])
|
483
|
-
[[1, 4], [2, 5], [3, 6]]
|
484
|
-
"""
|
485
|
-
if ensure_arr:
|
486
|
-
a = ensure_array(a, default_value)
|
487
|
-
# 这是非常有教学意义的行列互换实现代码
|
488
|
-
return list(map(list, zip(*a)))
|
489
|
-
|
490
|
-
|
491
|
-
def int2excel_col_name(d):
|
492
|
-
"""
|
493
|
-
>>> int2excel_col_name(1)
|
494
|
-
'A'
|
495
|
-
>>> int2excel_col_name(28)
|
496
|
-
'AB'
|
497
|
-
>>> int2excel_col_name(100)
|
498
|
-
'CV'
|
499
|
-
"""
|
500
|
-
s = []
|
501
|
-
while d:
|
502
|
-
t = (d - 1) % 26
|
503
|
-
s.append(chr(65 + t))
|
504
|
-
d = (d - 1) // 26
|
505
|
-
return ''.join(reversed(s))
|
506
|
-
|
507
|
-
|
508
|
-
def excel_col_name2int(s):
|
509
|
-
"""
|
510
|
-
>>> excel_col_name2int('A')
|
511
|
-
1
|
512
|
-
>>> excel_col_name2int('AA')
|
513
|
-
27
|
514
|
-
>>> excel_col_name2int('AB')
|
515
|
-
28
|
516
|
-
"""
|
517
|
-
d = 0
|
518
|
-
for ch in s:
|
519
|
-
d = d * 26 + (ord(ch) - 64)
|
520
|
-
return d
|
521
|
-
|
522
|
-
|
523
|
-
def int2myalphaenum(n):
|
524
|
-
"""
|
525
|
-
:param n: 0~52的数字
|
526
|
-
"""
|
527
|
-
if 0 <= n <= 52:
|
528
|
-
return '_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'[n]
|
529
|
-
else:
|
530
|
-
dprint(n) # 不在处理范围内的数值
|
531
|
-
raise ValueError
|
532
|
-
|
533
|
-
|
534
|
-
def gentuple(n, tag):
|
535
|
-
"""有点类似range函数,但生成的数列更加灵活
|
536
|
-
:param n:
|
537
|
-
数组长度
|
538
|
-
:param tag:
|
539
|
-
整数,从指定整数开始编号
|
540
|
-
int类型,从指定数字开始编号
|
541
|
-
0,从0开始编号
|
542
|
-
1,从1开始编号
|
543
|
-
'A',用Excel的形式编号
|
544
|
-
tuple,按枚举值循环显示
|
545
|
-
('A', 'B'):循环使用A、B编号
|
546
|
-
|
547
|
-
>>> gentuple(4, 'A')
|
548
|
-
('A', 'B', 'C', 'D')
|
549
|
-
"""
|
550
|
-
a = [''] * n
|
551
|
-
if isinstance(tag, int):
|
552
|
-
for i in range(n):
|
553
|
-
a[i] = i + tag
|
554
|
-
elif tag == 'A':
|
555
|
-
a = tuple(map(lambda x: int2excel_col_name(x + 1), range(n)))
|
556
|
-
elif isinstance(tag, (list, tuple)):
|
557
|
-
k = len(tag)
|
558
|
-
a = tuple(map(lambda x: tag[x % k], range(n)))
|
559
|
-
return a
|
560
|
-
|
561
|
-
|
562
|
-
def ensure_gbk(s):
|
563
|
-
"""检查一个字符串的所有内容是否能正常转为gbk,
|
564
|
-
如果不能则ignore掉不能转换的部分"""
|
565
|
-
try:
|
566
|
-
s.encode('gbk')
|
567
|
-
except UnicodeEncodeError:
|
568
|
-
origin_s = s
|
569
|
-
s = s.encode('gbk', errors='ignore').decode('gbk')
|
570
|
-
dprint(origin_s, s) # 字符串存在无法转为gbk的字符
|
571
|
-
return s
|
572
|
-
|
573
|
-
|
574
|
-
def funcmsg(func):
|
575
|
-
"""输出函数func所在的文件、函数名、函数起始行"""
|
576
|
-
# showdir(func)
|
577
|
-
if not hasattr(func, '__name__'): # 没有__name__属性表示这很可能是一个装饰器去处理原函数了
|
578
|
-
if hasattr(func, 'func'): # 我的装饰器常用func成员存储原函数对象
|
579
|
-
func = func.func
|
580
|
-
else:
|
581
|
-
return f'装饰器:{type(func)},无法定位'
|
582
|
-
return f'函数名:{func.__name__},来自文件:{func.__code__.co_filename},所在行号={func.__code__.co_firstlineno}'
|
583
|
-
|
584
|
-
|
585
|
-
class GrowingList(list):
|
586
|
-
"""可变长list"""
|
587
|
-
|
588
|
-
def __init__(self, default_value=None):
|
589
|
-
super().__init__(self)
|
590
|
-
self.default_value = default_value
|
591
|
-
|
592
|
-
def __getitem__(self, index):
|
593
|
-
if index >= len(self):
|
594
|
-
self.extend([self.default_value] * (index + 1 - len(self)))
|
595
|
-
return list.__getitem__(self, index)
|
596
|
-
|
597
|
-
def __setitem__(self, index, value):
|
598
|
-
if index >= len(self):
|
599
|
-
self.extend([self.default_value] * (index + 1 - len(self)))
|
600
|
-
list.__setitem__(self, index, value)
|
601
|
-
|
602
|
-
|
603
|
-
def arr_hangclear(arr, depth=None):
|
604
|
-
""" 清除连续相同值,简化表格内容
|
605
|
-
>> arr_hangclear(arr, depth=2)
|
606
|
-
原表格:
|
607
|
-
A B D
|
608
|
-
A B E
|
609
|
-
A C E
|
610
|
-
A C E
|
611
|
-
新表格:
|
612
|
-
A B D
|
613
|
-
E
|
614
|
-
C E
|
615
|
-
E
|
616
|
-
|
617
|
-
:param arr: 二维数组
|
618
|
-
:param depth: 处理列上限
|
619
|
-
例如depth=1,则只处理第一层
|
620
|
-
depth=None,则处理所有列
|
621
|
-
|
622
|
-
>>> arr_hangclear([[1, 2, 4], [1, 2, 5], [1, 3, 5], [1, 3, 5]])
|
623
|
-
[[1, 2, 4], ['', '', 5], ['', 3, 5], ['', '', 5]]
|
624
|
-
>>> arr_hangclear([[1, 2, 4], [1, 2, 5], [2, 2, 5], [1, 2, 5]])
|
625
|
-
[[1, 2, 4], ['', '', 5], [2, 2, 5], [1, 2, 5]]
|
626
|
-
"""
|
627
|
-
m = depth or len_in_dim2(arr) - 1
|
628
|
-
a = copy.deepcopy(arr)
|
629
|
-
|
630
|
-
# 算法原理:从下到上,从右到左判断与上一行重叠了几列数据
|
631
|
-
for i in range(len(arr) - 1, 0, -1):
|
632
|
-
for j in range(m):
|
633
|
-
if a[i][j] == a[i - 1][j]:
|
634
|
-
a[i][j] = ''
|
635
|
-
else:
|
636
|
-
break
|
637
|
-
return a
|
638
|
-
|
639
|
-
|
640
|
-
def arr2table(arr, rowmerge=False):
|
641
|
-
"""数组转html表格代码
|
642
|
-
:param arr: 需要处理的数组
|
643
|
-
:param rowmerge: 行单元格合并
|
644
|
-
:return: html文本格式的<table>
|
645
|
-
|
646
|
-
这个arr2table是用来画合并单元格的
|
647
|
-
>> chrome(arr2table([['A', 1, 'a'], ['', 2, 'b'], ['B', 3, 'c'], ['', '', 'd'], ['', 5, 'e']], True), 'a.html')
|
648
|
-
效果图:http://i1.fuimg.com/582188/c452f40b5a072f8d.png
|
649
|
-
"""
|
650
|
-
n = len(arr)
|
651
|
-
m = len_in_dim2(arr)
|
652
|
-
res = ['<table border="1"><tbody>']
|
653
|
-
for i, line in enumerate(arr):
|
654
|
-
res.append('<tr>')
|
655
|
-
for j, ele in enumerate(line):
|
656
|
-
if rowmerge:
|
657
|
-
if ele != '':
|
658
|
-
cnt = 1
|
659
|
-
while i + cnt < n and arr[i + cnt][j] == '':
|
660
|
-
for k in range(j - 1, -1, -1):
|
661
|
-
if arr[i + cnt][k] != '':
|
662
|
-
break
|
663
|
-
else:
|
664
|
-
cnt += 1
|
665
|
-
continue
|
666
|
-
break
|
667
|
-
if cnt > 1:
|
668
|
-
res.append(f'<td rowspan="{cnt}">{ele}</td>')
|
669
|
-
else:
|
670
|
-
res.append(f'<td>{ele}</td>')
|
671
|
-
elif j == m - 1:
|
672
|
-
res.append(f'<td>{ele}</td>')
|
673
|
-
else:
|
674
|
-
res.append(f'<td>{ele}</td>')
|
675
|
-
res.append('</tr>')
|
676
|
-
res.append('</tbody></table>')
|
677
|
-
return ''.join(res)
|
678
|
-
|
679
|
-
|
680
|
-
def digit2weektag(d):
|
681
|
-
"""输入数字1~7,转为“周一~周日”
|
682
|
-
|
683
|
-
>>> digit2weektag(1)
|
684
|
-
'周一'
|
685
|
-
>>> digit2weektag('7')
|
686
|
-
'周日'
|
687
|
-
"""
|
688
|
-
d = int(d)
|
689
|
-
if 1 <= d <= 7:
|
690
|
-
return '周' + '一二三四五六日'[d - 1]
|
691
|
-
else:
|
692
|
-
raise ValueError
|
693
|
-
|
694
|
-
|
695
|
-
def fullwidth2halfwidth(ustring):
|
696
|
-
""" 把字符串全角转半角
|
697
|
-
|
698
|
-
python3环境下的全角与半角转换代码和测试_大数据挖掘SparkExpert的博客-CSDN博客:
|
699
|
-
https://blog.csdn.net/sparkexpert/article/details/82749207
|
700
|
-
|
701
|
-
>>> fullwidth2halfwidth("你好pythonabdalduizxcvbnm")
|
702
|
-
'你好pythonabdalduizxcvbnm'
|
703
|
-
"""
|
704
|
-
ss = []
|
705
|
-
for s in ustring:
|
706
|
-
for uchar in s:
|
707
|
-
inside_code = ord(uchar)
|
708
|
-
if inside_code == 12288: # 全角空格直接转换
|
709
|
-
inside_code = 32
|
710
|
-
elif 65281 <= inside_code <= 65374: # 全角字符(除空格)根据关系转化
|
711
|
-
inside_code -= 65248
|
712
|
-
ss.append(chr(inside_code))
|
713
|
-
return ''.join(ss)
|
714
|
-
|
715
|
-
|
716
|
-
def fullwidth2halfwidth2(ustring):
|
717
|
-
""" 不处理标点符号的版本
|
718
|
-
|
719
|
-
>>> fullwidth2halfwidth2("你好pythonabda,lduizxcvbnm")
|
720
|
-
'你好pythonabda,lduizxcvbnm'
|
721
|
-
"""
|
722
|
-
ss = []
|
723
|
-
for s in ustring:
|
724
|
-
for uchar in s:
|
725
|
-
if uchar in ':;!(),?".':
|
726
|
-
ss.append(uchar)
|
727
|
-
else:
|
728
|
-
inside_code = ord(uchar)
|
729
|
-
if inside_code == 12288: # 全角空格直接转换
|
730
|
-
inside_code = 32
|
731
|
-
elif 65281 <= inside_code <= 65374: # 全角字符(除空格)根据关系转化
|
732
|
-
inside_code -= 65248
|
733
|
-
ss.append(chr(inside_code))
|
734
|
-
return ''.join(ss)
|
735
|
-
|
736
|
-
|
737
|
-
def halfwidth2fullwidth(ustring):
|
738
|
-
""" 把字符串全角转半角
|
739
|
-
|
740
|
-
>>> halfwidth2fullwidth("你好pythonabdalduizxcvbnm")
|
741
|
-
'你好pythonabdalduizxcvbnm'
|
742
|
-
"""
|
743
|
-
ss = []
|
744
|
-
for s in ustring:
|
745
|
-
for uchar in s:
|
746
|
-
inside_code = ord(uchar)
|
747
|
-
if inside_code == 32: # 全角空格直接转换
|
748
|
-
inside_code = 12288
|
749
|
-
elif 33 <= inside_code <= 126: # 全角字符(除空格)根据关系转化
|
750
|
-
inside_code += 65248
|
751
|
-
ss.append(chr(inside_code))
|
752
|
-
return ''.join(ss)
|
753
|
-
|
754
|
-
|
755
|
-
def print2string(*args, **kwargs):
|
756
|
-
"""https://stackoverflow.com/questions/39823303/python3-print-to-string"""
|
757
|
-
output = io.StringIO()
|
758
|
-
print(*args, file=output, **kwargs)
|
759
|
-
contents = output.getvalue()
|
760
|
-
output.close()
|
761
|
-
return contents
|