pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +9 -2
- pyxllib/algo/__init__.py +8 -0
- pyxllib/algo/disjoint.py +54 -0
- pyxllib/algo/geo.py +541 -0
- pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
- pyxllib/algo/matcher.py +389 -0
- pyxllib/algo/newbie.py +166 -0
- pyxllib/algo/pupil.py +629 -0
- pyxllib/algo/shapelylib.py +67 -0
- pyxllib/algo/specialist.py +241 -0
- pyxllib/algo/stat.py +494 -0
- pyxllib/algo/treelib.py +149 -0
- pyxllib/algo/unitlib.py +66 -0
- pyxllib/autogui/__init__.py +5 -0
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/autogui/autogui.py +852 -0
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/virtualkey.py +102 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/__init__.py +1 -11
- pyxllib/cv/expert.py +267 -0
- pyxllib/cv/{imlib.py → imfile.py} +18 -83
- pyxllib/cv/imhash.py +39 -0
- pyxllib/cv/pupil.py +9 -0
- pyxllib/cv/rgbfmt.py +1525 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/cv/trackbartools.py +163 -49
- pyxllib/cv/xlcvlib.py +1040 -0
- pyxllib/cv/xlpillib.py +423 -0
- pyxllib/data/__init__.py +0 -0
- pyxllib/data/echarts.py +240 -0
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/{util/oss2_.py → data/oss.py} +11 -9
- pyxllib/data/pglib.py +1127 -0
- pyxllib/data/sqlite.py +568 -0
- pyxllib/{util → data}/sqllib.py +13 -31
- pyxllib/ext/JLineViewer.py +505 -0
- pyxllib/ext/__init__.py +6 -0
- pyxllib/{util → ext}/demolib.py +119 -35
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +12 -0
- pyxllib/{util/main.py → ext/old.py} +122 -284
- pyxllib/ext/qt.py +449 -0
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/seleniumlib.py +76 -0
- pyxllib/{util/tklib.py → ext/tk.py} +10 -11
- pyxllib/ext/unixlib.py +827 -0
- pyxllib/ext/utools.py +351 -0
- pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
- pyxllib/ext/win32lib.py +40 -0
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1105 -0
- pyxllib/file/__init__.py +17 -0
- pyxllib/file/docxlib.py +761 -0
- pyxllib/{util → file}/gitlib.py +40 -27
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +148 -0
- pyxllib/file/newbie.py +10 -0
- pyxllib/file/onenotelib.py +1469 -0
- pyxllib/file/packlib/__init__.py +330 -0
- pyxllib/{util → file/packlib}/zipfile.py +598 -195
- pyxllib/file/pdflib.py +426 -0
- pyxllib/file/pupil.py +185 -0
- pyxllib/file/specialist/__init__.py +685 -0
- pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
- pyxllib/file/specialist/download.py +193 -0
- pyxllib/file/specialist/filelib.py +2829 -0
- pyxllib/file/xlsxlib.py +3131 -0
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/__init__.py +5 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/deprecatedlib.py +233 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/ipyexec.py +253 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +451 -0
- pyxllib/prog/pupil.py +1197 -0
- pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
- pyxllib/prog/specialist/__init__.py +391 -0
- pyxllib/prog/specialist/bc.py +203 -0
- pyxllib/prog/specialist/browser.py +497 -0
- pyxllib/prog/specialist/common.py +347 -0
- pyxllib/prog/specialist/datetime.py +199 -0
- pyxllib/prog/specialist/tictoc.py +240 -0
- pyxllib/prog/specialist/xllog.py +180 -0
- pyxllib/prog/xlosenv.py +108 -0
- pyxllib/stdlib/__init__.py +17 -0
- pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
- pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
- pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
- pyxllib/text/__init__.py +8 -0
- pyxllib/text/ahocorasick.py +39 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +121 -0
- pyxllib/text/jiebalib.py +267 -0
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +922 -0
- pyxllib/text/latex/__init__.py +158 -0
- pyxllib/text/levenshtein.py +303 -0
- pyxllib/text/nestenv.py +1215 -0
- pyxllib/text/newbie.py +300 -0
- pyxllib/text/pupil/__init__.py +8 -0
- pyxllib/text/pupil/common.py +1121 -0
- pyxllib/text/pupil/xlalign.py +326 -0
- pyxllib/text/pycode.py +47 -0
- pyxllib/text/specialist/__init__.py +8 -0
- pyxllib/text/specialist/common.py +112 -0
- pyxllib/text/specialist/ptag.py +186 -0
- pyxllib/text/spellchecker.py +172 -0
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/vbacode.py +17 -0
- pyxllib/text/xmllib.py +747 -0
- pyxllib/xl.py +39 -0
- pyxllib/xlcv.py +17 -0
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
- pyxllib/basic/_1_strlib.py +0 -945
- pyxllib/basic/_2_timelib.py +0 -488
- pyxllib/basic/_3_pathlib.py +0 -916
- pyxllib/basic/_4_loglib.py +0 -419
- pyxllib/basic/__init__.py +0 -54
- pyxllib/basic/arrow_.py +0 -250
- pyxllib/basic/chardet_.py +0 -66
- pyxllib/basic/dirlib.py +0 -529
- pyxllib/basic/dprint.py +0 -202
- pyxllib/basic/extension.py +0 -12
- pyxllib/basic/judge.py +0 -31
- pyxllib/basic/log.py +0 -204
- pyxllib/basic/pathlib_.py +0 -705
- pyxllib/basic/pytictoc.py +0 -102
- pyxllib/basic/qiniu_.py +0 -61
- pyxllib/basic/strlib.py +0 -761
- pyxllib/basic/timer.py +0 -132
- pyxllib/cv/cv.py +0 -834
- pyxllib/cv/cvlib/_1_geo.py +0 -543
- pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
- pyxllib/cv/cvlib/_2_imgproc.py +0 -594
- pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
- pyxllib/cv/cvlib/_4_cvimg.py +0 -211
- pyxllib/cv/cvlib/__init__.py +0 -10
- pyxllib/cv/debugtools.py +0 -82
- pyxllib/cv/fitz_.py +0 -300
- pyxllib/cv/installer.py +0 -42
- pyxllib/debug/_0_installer.py +0 -38
- pyxllib/debug/_1_typelib.py +0 -277
- pyxllib/debug/_2_chrome.py +0 -198
- pyxllib/debug/_3_showdir.py +0 -161
- pyxllib/debug/_4_bcompare.py +0 -140
- pyxllib/debug/__init__.py +0 -49
- pyxllib/debug/bcompare.py +0 -132
- pyxllib/debug/chrome.py +0 -198
- pyxllib/debug/installer.py +0 -38
- pyxllib/debug/showdir.py +0 -158
- pyxllib/debug/typelib.py +0 -278
- pyxllib/image/__init__.py +0 -12
- pyxllib/torch/__init__.py +0 -20
- pyxllib/torch/modellib.py +0 -37
- pyxllib/torch/trainlib.py +0 -344
- pyxllib/util/__init__.py +0 -20
- pyxllib/util/aip_.py +0 -141
- pyxllib/util/casiadb.py +0 -59
- pyxllib/util/excellib.py +0 -495
- pyxllib/util/filelib.py +0 -612
- pyxllib/util/jsondata.py +0 -27
- pyxllib/util/jsondata2.py +0 -92
- pyxllib/util/labelmelib.py +0 -139
- pyxllib/util/onepy/__init__.py +0 -29
- pyxllib/util/onepy/onepy.py +0 -574
- pyxllib/util/onepy/onmanager.py +0 -170
- pyxllib/util/pyautogui_.py +0 -219
- pyxllib/util/textlib.py +0 -1305
- pyxllib/util/unorder.py +0 -22
- pyxllib/util/xmllib.py +0 -639
- pyxllib-0.0.43.dist-info/METADATA +0 -39
- pyxllib-0.0.43.dist-info/RECORD +0 -80
- pyxllib-0.0.43.dist-info/top_level.txt +0 -1
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/cv/cvlib/_4_cvimg.py
DELETED
@@ -1,211 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Data : 2020/11/17 15:21
|
6
|
-
|
7
|
-
from pyxllib.cv.cvlib._3_pilprcs import *
|
8
|
-
|
9
|
-
____XxImg = """
|
10
|
-
对CvPrcs、PilPrcs的类层级接口封装
|
11
|
-
"""
|
12
|
-
|
13
|
-
|
14
|
-
class CvImg:
|
15
|
-
prcs = CvPrcs
|
16
|
-
__slots__ = ('img',)
|
17
|
-
|
18
|
-
def __init__(self, img, flags=1, **kwargs):
|
19
|
-
if isinstance(img, type(self)):
|
20
|
-
img = img.img
|
21
|
-
else:
|
22
|
-
img = self.prcs.read(img, flags, **kwargs)
|
23
|
-
self.img = img
|
24
|
-
|
25
|
-
def cvt_channel(self, flags):
|
26
|
-
_t = type(self)
|
27
|
-
return _t(self.prcs.cvt_channel(self.img, flags))
|
28
|
-
|
29
|
-
def write(self, path, if_exists='replace', **kwargs):
|
30
|
-
return self.prcs.write(self.img, path, if_exists, **kwargs)
|
31
|
-
|
32
|
-
@property
|
33
|
-
def size(self):
|
34
|
-
return self.prcs.size(self.img)
|
35
|
-
|
36
|
-
@property
|
37
|
-
def n_channels(self):
|
38
|
-
return self.prcs.n_channels(self.img)
|
39
|
-
|
40
|
-
def resize(self, size, interpolation=cv2.INTER_CUBIC, **kwargs):
|
41
|
-
_t = type(self)
|
42
|
-
return _t(self.prcs.resize(self.img, size, interpolation, **kwargs))
|
43
|
-
|
44
|
-
def show(self, winname=None, flags=0):
|
45
|
-
return self.prcs.show(self.img, winname, flags)
|
46
|
-
|
47
|
-
def reduce_by_area(self, area):
|
48
|
-
_t = type(self)
|
49
|
-
return _t(self.prcs.reduce_by_area(self.img, area))
|
50
|
-
|
51
|
-
|
52
|
-
class PilImg(CvImg):
|
53
|
-
"""
|
54
|
-
注意这样继承实现虽然简单,但是如果是CvPrcs有,但PilPrcs没有的功能,运行是会报错的
|
55
|
-
"""
|
56
|
-
prcs = PilPrcs
|
57
|
-
|
58
|
-
def random_direction(self):
|
59
|
-
_t = type(self)
|
60
|
-
return _t(self.prcs.random_direction(self.img))
|
61
|
-
|
62
|
-
|
63
|
-
____alias = """
|
64
|
-
对CvPrcs中一些常用功能的名称简化
|
65
|
-
"""
|
66
|
-
|
67
|
-
imread = CvPrcs.read
|
68
|
-
imwrite = CvPrcs.write
|
69
|
-
imshow = CvPrcs.show
|
70
|
-
|
71
|
-
____get_sub_image = """
|
72
|
-
|
73
|
-
TODO 这里很多功能,都要抽时间整理进CvImg、PilImg
|
74
|
-
|
75
|
-
"""
|
76
|
-
|
77
|
-
|
78
|
-
def warp_image(img, warp_mat, dsize=None, *, view_rate=False, max_zoom=1, reserve_struct=False):
|
79
|
-
""" 对图像进行透视变换
|
80
|
-
|
81
|
-
:param img: np.ndarray的图像数据
|
82
|
-
TODO 支持PIL.Image格式?
|
83
|
-
:param warp_mat: 变换矩阵
|
84
|
-
:param dsize: 目标图片尺寸
|
85
|
-
没有任何输入时,同原图
|
86
|
-
如果有指定,则会决定最终的图片大小
|
87
|
-
如果使用了view_rate、max_zoom,会改变变换矩阵所展示的内容
|
88
|
-
:param view_rate: 视野比例,默认不开启,当输入非0正数时,几个数值功能效果如下
|
89
|
-
1,关注原图四个角点位置在变换后的位置,确保新的4个点依然在目标图中
|
90
|
-
为了达到该效果,会增加【平移】变换,以及自动控制dsize
|
91
|
-
2,将原图依中心面积放到至2倍,记录新的4个角点变换后的位置,确保变换后的4个点依然在目标图中
|
92
|
-
0.5,同理,只是只关注原图局部的一半位置
|
93
|
-
:param max_zoom: 默认1倍,当设置时(只在开启view_rate时有用),会增加【缩小】变换,限制view_rate扩展的上限
|
94
|
-
:param reserve_struct: 是否保留原来img的数据类型返回,默认True
|
95
|
-
关掉该功能可以提高性能,此时返回结果统一为 np 矩阵
|
96
|
-
:return: 见 reserve_struct
|
97
|
-
"""
|
98
|
-
from math import sqrt
|
99
|
-
|
100
|
-
# 0 参数整理
|
101
|
-
img = imread(img)
|
102
|
-
|
103
|
-
# 1 得到3*3的变换矩阵
|
104
|
-
warp_mat = np_array(warp_mat)
|
105
|
-
if warp_mat.shape[0] == 2:
|
106
|
-
warp_mat = np.concatenate([warp_mat, [[0, 0, 1]]], axis=0)
|
107
|
-
|
108
|
-
# 2 view_rate,视野比例改变导致的变换矩阵规则变化
|
109
|
-
if view_rate:
|
110
|
-
# 2.1 视野变化后的四个角点
|
111
|
-
h, w = img.shape[:2]
|
112
|
-
y, x = h / 2, w / 2 # 图片中心点坐标
|
113
|
-
h1, w1 = view_rate * h / 2, view_rate * w / 2
|
114
|
-
l, t, r, b = [-w1 + x, -h1 + y, w1 + x, h1 + y]
|
115
|
-
pts1 = np.array([[l, t], [r, t], [r, b], [l, b]])
|
116
|
-
# 2.2 变换后角点位置产生的外接矩形
|
117
|
-
left, top, right, bottom = rect_bounds1d(warp_points(pts1, warp_mat))
|
118
|
-
# 2.3 增加平移变换确保左上角在原点
|
119
|
-
warp_mat = np.dot([[1, 0, -left], [0, 1, -top], [0, 0, 1]], warp_mat)
|
120
|
-
# 2.4 控制面积变化率
|
121
|
-
h2, w2 = (bottom - top, right - left)
|
122
|
-
if max_zoom:
|
123
|
-
rate = w2 * h2 / w / h # 目标面积比原面积
|
124
|
-
if rate > max_zoom:
|
125
|
-
r = 1 / sqrt(rate / max_zoom)
|
126
|
-
warp_mat = np.dot([[r, 0, 0], [0, r, 0], [0, 0, 1]], warp_mat)
|
127
|
-
h2, w2 = round(h2 * r), round(w2 * r)
|
128
|
-
if not dsize:
|
129
|
-
dsize = (w2, h2)
|
130
|
-
|
131
|
-
# 3 标准操作,不做额外处理,按照原图默认的图片尺寸展示
|
132
|
-
if dsize is None:
|
133
|
-
dsize = (img.shape[1], img.shape[0])
|
134
|
-
dst = cv2.warpPerspective(img, warp_mat, dsize)
|
135
|
-
|
136
|
-
# 4 返回值
|
137
|
-
return dst
|
138
|
-
|
139
|
-
|
140
|
-
def get_sub_image(src_image, pts, warp_quad=False):
|
141
|
-
""" 从src_image取一个子图
|
142
|
-
|
143
|
-
:param src_image: 原图
|
144
|
-
可以是图片路径、np.ndarray、PIL.Image对象
|
145
|
-
TODO 目前只支持np.ndarray、pil图片输入,返回统一是np.ndarray
|
146
|
-
:param pts: 子图位置信息
|
147
|
-
只有两个点,认为是矩形的两个对角点
|
148
|
-
只有四个点,认为是任意四边形
|
149
|
-
同理,其他点数量,默认为
|
150
|
-
:param warp_quad: 变形的四边形
|
151
|
-
默认是截图pts的外接四边形区域,使用该参数
|
152
|
-
且当pts为四个点时,是否强行扭转为矩形
|
153
|
-
一般写 'average',也可以写'max'、'min',详见 quad_warp_wh()
|
154
|
-
:return: 子图
|
155
|
-
文件、np.ndarray --> np.ndarray
|
156
|
-
PIL.Image --> PIL.Image
|
157
|
-
"""
|
158
|
-
src_img = imread(src_image)
|
159
|
-
pts = coords2d(pts)
|
160
|
-
if not warp_quad or len(pts) != 4:
|
161
|
-
x1, y1, x2, y2 = rect_bounds1d(pts)
|
162
|
-
dst = src_img[y1:y2, x1:x2] # 这里越界不会报错,只是越界的那个维度shape为0
|
163
|
-
else:
|
164
|
-
w, h = quad_warp_wh(pts, method=warp_quad)
|
165
|
-
warp_mat = get_warp_mat(pts, rect2polygon([0, 0, w, h]))
|
166
|
-
dst = warp_image(src_img, warp_mat, (w, h))
|
167
|
-
return dst
|
168
|
-
|
169
|
-
|
170
|
-
____other = """
|
171
|
-
"""
|
172
|
-
|
173
|
-
|
174
|
-
def get_background_color(src_img, edge_size=5, binary_img=None):
|
175
|
-
""" 智能判断图片背景色
|
176
|
-
|
177
|
-
对全图二值化后,考虑最外一层宽度未edge_size的环中,0、1分布最多的作为背景色
|
178
|
-
然后取全部背景色的平均值返回
|
179
|
-
|
180
|
-
:param src_img: 支持黑白图、彩图
|
181
|
-
:param edge_size: 边缘宽度,宽度越高一般越准确,但也越耗性能
|
182
|
-
:param binary_img: 运算中需要用二值图,如果外部已经计算了,可以直接传入进来,避免重复运算
|
183
|
-
:return: color
|
184
|
-
|
185
|
-
TODO 可以写个获得前景色,道理类似,只是最后再图片中心去取平均值
|
186
|
-
"""
|
187
|
-
from itertools import chain
|
188
|
-
|
189
|
-
# 1 获得二值图,区分前背景
|
190
|
-
if binary_img is None:
|
191
|
-
gray_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2GRAY) if src_img.ndim == 3 else src_img
|
192
|
-
_, binary_img = cv2.threshold(gray_img, np.mean(gray_img), 255, cv2.THRESH_BINARY)
|
193
|
-
|
194
|
-
# 2 分别存储点集
|
195
|
-
n, m = src_img.shape[:2]
|
196
|
-
colors0, colors1 = [], []
|
197
|
-
for i in range(n):
|
198
|
-
if i < edge_size or i >= n - edge_size:
|
199
|
-
js = range(m)
|
200
|
-
else:
|
201
|
-
js = chain(range(edge_size), range(m - edge_size, m))
|
202
|
-
for j in js:
|
203
|
-
if binary_img[i, j]:
|
204
|
-
colors1.append(src_img[i, j])
|
205
|
-
else:
|
206
|
-
colors0.append(src_img[i, j])
|
207
|
-
|
208
|
-
# 3 计算平均像素
|
209
|
-
# 以数量多的作为背景像素
|
210
|
-
colors = colors0 if len(colors0) > len(colors1) else colors1
|
211
|
-
return np.mean(np.array(colors), axis=0, dtype='int').tolist()
|
pyxllib/cv/cvlib/__init__.py
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Data : 2020/11/15 10:04
|
6
|
-
|
7
|
-
from pyxllib.cv.cvlib._1_geo import *
|
8
|
-
from pyxllib.cv.cvlib._2_cvprcs import *
|
9
|
-
from pyxllib.cv.cvlib._3_pilprcs import *
|
10
|
-
from pyxllib.cv.cvlib._4_cvimg import *
|
pyxllib/cv/debugtools.py
DELETED
@@ -1,82 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Data : 2020/09/30 10:45
|
6
|
-
|
7
|
-
"""
|
8
|
-
这个文件默认不加载到cv里,需要使用的时候再导入
|
9
|
-
|
10
|
-
from pyxlib.cv.debugtools import *
|
11
|
-
"""
|
12
|
-
|
13
|
-
from pyxllib.cv import *
|
14
|
-
|
15
|
-
|
16
|
-
class ErodeTool(TrackbarTool):
|
17
|
-
r""" 腐蚀 erode
|
18
|
-
|
19
|
-
HoughLinesPTool('handwriting/C100001448573-002.png')
|
20
|
-
"""
|
21
|
-
def __init__(self, img, winname='ErodeTool', flags=1):
|
22
|
-
super().__init__(winname, img, flags)
|
23
|
-
|
24
|
-
# 1 增加控件
|
25
|
-
self.create_trackbar('size_x', 100, 15)
|
26
|
-
self.create_trackbar('size_y', 100, 15)
|
27
|
-
|
28
|
-
# 2 初始化执行一遍
|
29
|
-
self.default_run(0)
|
30
|
-
|
31
|
-
def default_run(self, x):
|
32
|
-
""" 默认执行器
|
33
|
-
"""
|
34
|
-
tt = TicToc()
|
35
|
-
size = self['size_x'], self['size_y']
|
36
|
-
element = cv2.getStructuringElement(cv2.MORPH_RECT, size)
|
37
|
-
dst = cv2.erode(self.img, element)
|
38
|
-
self.imshow(dst)
|
39
|
-
tt.toc(f'element_size={size}')
|
40
|
-
|
41
|
-
|
42
|
-
class HoughLinesPTool(TrackbarTool):
|
43
|
-
r""" 霍夫线段检测
|
44
|
-
|
45
|
-
HoughLinesPTool('handwriting/C100001448573-002.png')
|
46
|
-
"""
|
47
|
-
def __init__(self, img, winname='HoughLinesPTool', flags=1):
|
48
|
-
super().__init__(winname, img, flags)
|
49
|
-
|
50
|
-
# 1 增加控件
|
51
|
-
# 以像素为单位的距离精度
|
52
|
-
self.create_trackbar('rho*10', 100, 10)
|
53
|
-
# 以弧度为单位的角度精度
|
54
|
-
self.create_trackbar('theta*pi/360', 10, 2)
|
55
|
-
# 累加平面的阈值参数
|
56
|
-
self.create_trackbar('threshold', 200, 80)
|
57
|
-
# 最低线段长度
|
58
|
-
self.create_trackbar('minLineLength', 200, 50) # 如果要显示全,这里名称最好只有10个字符,也可能是跟图片尺寸有关,有待进一步观察
|
59
|
-
# 允许将同一行点与点之间连接起来的最大的距离
|
60
|
-
self.create_trackbar('maxLineGap', 100, 10)
|
61
|
-
|
62
|
-
# 2 图片预处理,这里暂时用自适应二值化,后面可以把该部分也变成可调试项
|
63
|
-
if self.img.ndim == 3:
|
64
|
-
gray_img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)
|
65
|
-
else:
|
66
|
-
gray_img = self.img
|
67
|
-
self.binary_img = cv2.adaptiveThreshold(gray_img, 255, 0, 1, 11, 3)
|
68
|
-
|
69
|
-
# 3 初始化执行一遍
|
70
|
-
self.default_run(0)
|
71
|
-
|
72
|
-
def default_run(self, x):
|
73
|
-
""" 默认执行器
|
74
|
-
"""
|
75
|
-
tt = TicToc()
|
76
|
-
lines = cv2.HoughLinesP(self.binary_img,
|
77
|
-
self['rho*10'] / 10, self['theta*pi/360'] * math.pi / 360,
|
78
|
-
self['threshold'], self['minLineLength'], self['maxLineGap'])
|
79
|
-
if lines is None: lines = np.array([])
|
80
|
-
# 处理用二值化,显示用原图
|
81
|
-
self.imshow(CvPlot.lines(self.img, lines))
|
82
|
-
tt.toc(f'x={x} lines.shape={lines.squeeze().shape}')
|
pyxllib/cv/fitz_.py
DELETED
@@ -1,300 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Data : 2020/06/02 16:06
|
6
|
-
|
7
|
-
|
8
|
-
import concurrent.futures
|
9
|
-
import math
|
10
|
-
import subprocess
|
11
|
-
import tempfile
|
12
|
-
|
13
|
-
import fitz
|
14
|
-
|
15
|
-
from pyxllib.basic import *
|
16
|
-
from pyxllib.debug import chrome, showdir
|
17
|
-
from .imlib import zoomsvg
|
18
|
-
|
19
|
-
|
20
|
-
class DemoFitz:
|
21
|
-
"""
|
22
|
-
安装: pip install PyMuPdf
|
23
|
-
使用: import fitz
|
24
|
-
官方文档: https://pymupdf.readthedocs.io/en/latest/intro/
|
25
|
-
demo: https://github.com/rk700/PyMuPDF/tree/master/demo
|
26
|
-
examples: https://github.com/rk700/PyMuPDF/tree/master/examples
|
27
|
-
"""
|
28
|
-
|
29
|
-
def __init__(self, file):
|
30
|
-
self.doc = fitz.open(file)
|
31
|
-
|
32
|
-
def message(self):
|
33
|
-
"""查看pdf文档一些基础信息"""
|
34
|
-
dprint(fitz.version) # fitz模块的版本
|
35
|
-
dprint(self.doc.pageCount) # pdf页数
|
36
|
-
dprint(self.doc._getXrefLength()) # 文档的对象总数
|
37
|
-
|
38
|
-
def getToC(self):
|
39
|
-
"""获得书签目录"""
|
40
|
-
toc = self.doc.getToC()
|
41
|
-
chrome(toc)
|
42
|
-
|
43
|
-
def setToC(self):
|
44
|
-
"""设置书签目录
|
45
|
-
可以调层级、改名称、修改指向页码
|
46
|
-
"""
|
47
|
-
toc = self.doc.getToC()
|
48
|
-
toc[1][1] = '改标题名称'
|
49
|
-
self.doc.setToC(toc)
|
50
|
-
file = Path('a.pdf', root=Path.TEMP).fullpath
|
51
|
-
self.doc.save(file, garbage=4)
|
52
|
-
chrome(file)
|
53
|
-
|
54
|
-
def setToC2(self):
|
55
|
-
"""修改人教版教材的标签名"""
|
56
|
-
toc = self.doc.getToC()
|
57
|
-
newtoc = []
|
58
|
-
for i in range(len(toc)):
|
59
|
-
name = toc[i][1]
|
60
|
-
if '.' in name: continue
|
61
|
-
# m = re.search(r'\d+', name)
|
62
|
-
# if m: name = name.replace(m.group(), digits2chinese(int(m.group())))
|
63
|
-
m = re.search(r'([一二三四五六]年级).*?([上下])', name)
|
64
|
-
if i < len(toc) - 1:
|
65
|
-
pages = toc[i + 1][2] - toc[i][2] + 1
|
66
|
-
else:
|
67
|
-
pages = self.doc.pageCount - toc[i][2] + 1
|
68
|
-
toc[i][1] = m.group(1) + m.group(2) + ',' + str(pages)
|
69
|
-
newtoc.append(toc[i])
|
70
|
-
self.doc.setToC(newtoc)
|
71
|
-
file = writefile(b'', 'a.pdf', if_exists='replace')
|
72
|
-
self.doc.save(file, garbage=4)
|
73
|
-
|
74
|
-
def rearrange_pages(self):
|
75
|
-
"""重新布局页面"""
|
76
|
-
self.doc.select([0, 0, 1]) # 第1页展示两次后,再跟第2页
|
77
|
-
file = writefile(b'', 'a.pdf', root=Path.TEMP, if_exists='replace')
|
78
|
-
self.doc.save(file, garbage=4) # 注意要设置garbage,否则文档并没有实际删除内容压缩文件大小
|
79
|
-
chrome(file)
|
80
|
-
|
81
|
-
def page2png(self):
|
82
|
-
"""查看单页渲染图片"""
|
83
|
-
page = self.doc.loadPage(0) # 索引第i页,下标规律同py,支持-1索引最后页
|
84
|
-
dprint(page.bound()) # 页面边界,x,y轴同图像处理中的常识定义,返回Rect(x0, y0, x1, y1)
|
85
|
-
|
86
|
-
pix = page.getPixmap() # 获得页面的RGBA图像,Pixmap类型;还可以用page.getSVGimage()获得矢量图
|
87
|
-
# pix.writePNG('page-0.png') # 将Pixmal
|
88
|
-
pngdata = pix.getPNGData() # 获png文件的bytes字节码
|
89
|
-
chrome(pngdata, 'a.png') # 用我的工具函数打开图片
|
90
|
-
return pngdata
|
91
|
-
|
92
|
-
def pagetext(self):
|
93
|
-
"""单页上的文本"""
|
94
|
-
page = self.doc[0]
|
95
|
-
|
96
|
-
# 获得页面上的所有文本,还支持参数: html,dict,xml,xhtml,json
|
97
|
-
text = page.getText('text')
|
98
|
-
dprint(text)
|
99
|
-
|
100
|
-
# 获得页面上的所有文本(返回字典对象)
|
101
|
-
textdict = page.getText('dict')
|
102
|
-
textdict['blocks'] = textdict['blocks'][:-1]
|
103
|
-
chrome(pprint.pformat(textdict))
|
104
|
-
|
105
|
-
def text(self):
|
106
|
-
"""获得整份pdf的所有文本"""
|
107
|
-
return '\n'.join([page.getText('text') for page in self.doc])
|
108
|
-
|
109
|
-
def xrefstr(self):
|
110
|
-
"""查看pdf文档的所有对象"""
|
111
|
-
xrefstr = []
|
112
|
-
n = self.doc._getXrefLength()
|
113
|
-
for i in range(1, n): # 注意下标实际要从1卡开始
|
114
|
-
# 可以边遍历边删除,不影响下标位置,因为其本质只是去除关联引用而已
|
115
|
-
xrefstr.append(self.doc._getXrefString(i))
|
116
|
-
chrome('\n'.join(xrefstr))
|
117
|
-
|
118
|
-
def page_add_ele(self):
|
119
|
-
"""往页面添加元素
|
120
|
-
添加元素前后xrefstr的区别: https://paste.ubuntu.com/p/Dxhnzp4XJ2/
|
121
|
-
"""
|
122
|
-
self.doc.select([0])
|
123
|
-
page = self.doc.loadPage(0)
|
124
|
-
# page.insertText(fitz.Point(100, 200), 'test\ntest')
|
125
|
-
file = Path('a.pdf', root=Path.TEMP).fullpath
|
126
|
-
dprint(file)
|
127
|
-
self.doc.save(file, garbage=4)
|
128
|
-
chrome(file)
|
129
|
-
|
130
|
-
|
131
|
-
def pdf2svg_oldversion(pdffile, target=None, *, trim=False):
|
132
|
-
"""新版的,pymupdf生成的svg无法配合inkscape进行trim,所以这个旧版暂时还是要保留
|
133
|
-
|
134
|
-
:param pdffile: 一份pdf文件
|
135
|
-
:param target: 目标目录
|
136
|
-
None:
|
137
|
-
如果只有一页,转换到对应目录下同名svg文件
|
138
|
-
如果有多页,转换到同名目录下多份svg文件
|
139
|
-
:param trim:
|
140
|
-
True: 去除边缘空白
|
141
|
-
:return:
|
142
|
-
|
143
|
-
需要第三方工具:pdf2svg(用于文件格式转换),inkscape(用于svg编辑优化)
|
144
|
-
注意pdf2svg的参数不支持中文名,因为这个功能限制,搞得我这个函数实现好麻烦!
|
145
|
-
还要在临时文件夹建立文件,因为重名文件+多线程问题,还曾引发一个bug搞了一下午。
|
146
|
-
(这些软件都以绿色版形式整理在win3/imgtools里)
|
147
|
-
|
148
|
-
注意!!! 这个版本的代码先不要删!先不要删!先不要删!包括pdf2svg.exe那个蠢货软件也先别删!
|
149
|
-
后续研究inkscape这个蠢货的-D参数在处理pymupdf生成的svg为什么没用的时候可以进行对比
|
150
|
-
"""
|
151
|
-
import fitz
|
152
|
-
pages = fitz.open(pdffile).pageCount
|
153
|
-
|
154
|
-
basename = tempfile.mktemp()
|
155
|
-
f1 = basename + '.pdf'
|
156
|
-
filescopy(pdffile, f1) # 复制到临时文件,防止中文名pdf2svg工具处理不了
|
157
|
-
|
158
|
-
if pages == 1:
|
159
|
-
if target is None: target = pdffile[:-3] + 'svg'
|
160
|
-
f2 = basename + '.svg'
|
161
|
-
# print(['pdf2svg.exe', f1, f2])
|
162
|
-
subprocess.run(['pdf2svg.exe', f1, f2])
|
163
|
-
|
164
|
-
if trim: subprocess.run(['inkscape.exe', '-f', f2, '-D', '-l', f2])
|
165
|
-
filescopy(f2, target)
|
166
|
-
else:
|
167
|
-
if target is None: target = pdffile[:-4] + '_svg\\'
|
168
|
-
executor = concurrent.futures.ThreadPoolExecutor()
|
169
|
-
Path(basename + '/').ensure_dir()
|
170
|
-
|
171
|
-
def func(f1, f2, i):
|
172
|
-
subprocess.run(['pdf2svg.exe', f1, f2, str(i)])
|
173
|
-
if trim: subprocess.run(['inkscape.exe', '-f', f2, '-D', '-l', f2])
|
174
|
-
filescopy(f2, target + f'{i}.svg')
|
175
|
-
|
176
|
-
for i in range(1, pages + 1):
|
177
|
-
f2 = basename + f'\\{i}.svg'
|
178
|
-
executor.submit(func, f1, f2, i)
|
179
|
-
executor.shutdown()
|
180
|
-
filescopy(basename, target[:-1])
|
181
|
-
filesdel(basename + '/')
|
182
|
-
|
183
|
-
filesdel(f1)
|
184
|
-
|
185
|
-
|
186
|
-
def pdf2imagebase(pdffile, target=None, scale=None, ext='.png'):
|
187
|
-
"""
|
188
|
-
使用python的PyMuPdf模块,不需要额外插件
|
189
|
-
导出的图片从1开始编号
|
190
|
-
TODO 要加多线程?效率影响大吗?
|
191
|
-
|
192
|
-
:param pdffile: pdf原文件
|
193
|
-
:type target: 相对于原文件所在目录的目标目录名,也可以写文件名,表示重命名
|
194
|
-
None:
|
195
|
-
当该pdf只有1页时,才默认把图片转换到当前目录。
|
196
|
-
否则默认新建一个文件夹来存储图片。(目录名默认为文件名)
|
197
|
-
:param scale: 缩放尺寸
|
198
|
-
1:原尺寸
|
199
|
-
1.5:放大为原来的1.5倍
|
200
|
-
:param ext: 导出的图片格式
|
201
|
-
:param return: 返回生成的图片列表
|
202
|
-
"""
|
203
|
-
import fitz
|
204
|
-
# 1 基本参数计算
|
205
|
-
pdf = fitz.open(pdffile)
|
206
|
-
num_pages = pdf.pageCount
|
207
|
-
|
208
|
-
# 大于1页的时候,默认新建一个文件夹来存储图片
|
209
|
-
if target is None:
|
210
|
-
if num_pages > 1:
|
211
|
-
target = Path(pdffile).stem + '/'
|
212
|
-
else:
|
213
|
-
target = Path(pdffile).dirname + '/'
|
214
|
-
|
215
|
-
newfile = Path(pdffile).abs_dstpath(target).fullpath
|
216
|
-
if newfile.endswith('.pdf'): newfile = os.path.splitext(newfile)[0] + ext
|
217
|
-
Path(newfile).ensure_dir()
|
218
|
-
|
219
|
-
# 2 图像数据的获取
|
220
|
-
def get_svg_image(n):
|
221
|
-
page = pdf.loadPage(n)
|
222
|
-
txt = page.getSVGimage()
|
223
|
-
if scale: txt = zoomsvg(txt, scale)
|
224
|
-
return txt
|
225
|
-
|
226
|
-
def get_png_image(n):
|
227
|
-
"""获得第n页的图片数据"""
|
228
|
-
page = pdf.loadPage(n)
|
229
|
-
if scale:
|
230
|
-
pix = page.getPixmap(fitz.Matrix(scale, scale)) # 长宽放大到scale倍
|
231
|
-
else:
|
232
|
-
pix = page.getPixmap()
|
233
|
-
return pix.getPNGData()
|
234
|
-
|
235
|
-
# 3 分析导出的图片文件名
|
236
|
-
files = []
|
237
|
-
if num_pages == 1:
|
238
|
-
image = get_svg_image(0) if ext == '.svg' else get_png_image(0)
|
239
|
-
files.append(newfile)
|
240
|
-
Path(newfile).write(image, if_exists='replace')
|
241
|
-
else: # 有多页
|
242
|
-
number_width = math.ceil(math.log10(num_pages + 1)) # 根据总页数计算需要的对齐域宽
|
243
|
-
stem, ext = os.path.splitext(newfile)
|
244
|
-
for i in range(num_pages):
|
245
|
-
image = get_svg_image(i) if ext == '.svg' else get_png_image(i)
|
246
|
-
name = ('-{:0' + str(number_width) + 'd}').format(i + 1) # 前面的括号不要删,这样才是完整的一个字符串来使用format
|
247
|
-
files.append(stem + name + ext)
|
248
|
-
Path(stem + name + ext).write(image, if_exists='replace')
|
249
|
-
return files
|
250
|
-
|
251
|
-
|
252
|
-
def pdf2png(pdffile, target=None, scale=None):
|
253
|
-
"""
|
254
|
-
:param pdffile: pdf路径
|
255
|
-
:param target: 目标位置
|
256
|
-
:param scale: 缩放比例
|
257
|
-
:return: list,生成的png图片清单
|
258
|
-
|
259
|
-
# 可以不写target,默认处理:如果单张png则在同目录,多张则会建个同名目录存储
|
260
|
-
>> pdf2png(r'D:\slns+\immovables\immovables_data\test\X\A0001.pdf')
|
261
|
-
|
262
|
-
# 指定存放位置:
|
263
|
-
>> pdf2png(r'D:\slns+\immovables\immovables_data\test\X\A0001.pdf', r'D:\slns+\immovables\immovables_data\test\X')
|
264
|
-
|
265
|
-
"""
|
266
|
-
return pdf2imagebase(pdffile, target=target, scale=scale, ext='.png')
|
267
|
-
|
268
|
-
|
269
|
-
def pdf2svg(pdffile, target=None, scale=None, trim=False):
|
270
|
-
"""
|
271
|
-
:param pdffile: 见pdf2imagebase
|
272
|
-
:param target: 见pdf2imagebase
|
273
|
-
:param scale: 见pdf2imagebase
|
274
|
-
:param trim: 如果使用裁剪功能,会调用pdf-crop-margins第三方工具
|
275
|
-
https://pypi.org/project/pdfCropMargins/
|
276
|
-
:return:
|
277
|
-
"""
|
278
|
-
if trim: # 先对pdf文件进行裁剪再转换
|
279
|
-
pdf = Path(pdffile)
|
280
|
-
newfile = pdf.abs_dstpath('origin.pdf').fullpath
|
281
|
-
pdf.copy(newfile)
|
282
|
-
# subprocess.run(['pdf-crop-margins.exe', '-p', '0', newfile, '-o', pdffile], stderr=subprocess.PIPE) # 本少: 会裁过头!
|
283
|
-
# 本少: 对于上下边处的 [] 分数等,会裁过头,先按百分比 -p 0 不留边,再按绝对点数收缩/扩张 -a -1 负数为扩张,单位为bp
|
284
|
-
# 本少被自己坑了,RamDisk 与 pdf-crop-margins.exe 配合,只能取 SCSI 硬盘,如果 Direct-IO 就不行,还不报错,还以为是泽少写的代码连报错都不会
|
285
|
-
subprocess.run(['pdf-crop-margins.exe', '-p', '0', '-a', '-1', newfile, '-o', pdffile],
|
286
|
-
stderr=subprocess.PIPE)
|
287
|
-
# TODO 有时丢图
|
288
|
-
pdf2imagebase(pdffile, target=target, scale=scale, ext='.svg')
|
289
|
-
|
290
|
-
|
291
|
-
def pdfs2pngs(src, scale=None, pinterval=None):
|
292
|
-
""" 将目录下所有pdf转png
|
293
|
-
:param src: 原pdf数据路径
|
294
|
-
:param scale: 转图片时缩放比例,例如2表示长宽放大至2被
|
295
|
-
:param pinterval: 每隔多少个pdf输出处理进度
|
296
|
-
默认None,不输出
|
297
|
-
"""
|
298
|
-
from functools import partial
|
299
|
-
func = partial(pdf2png, scale=scale)
|
300
|
-
Dir(src).select('**/*.pdf').procfiles(func, pinterval=pinterval)
|
pyxllib/cv/installer.py
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Data : 2020/08/14 22:20
|
6
|
-
|
7
|
-
import subprocess
|
8
|
-
|
9
|
-
try:
|
10
|
-
import PIL
|
11
|
-
except ModuleNotFoundError:
|
12
|
-
subprocess.run(['pip', 'install', 'pillow'])
|
13
|
-
import PIL
|
14
|
-
|
15
|
-
try:
|
16
|
-
from get_image_size import get_image_size
|
17
|
-
except ModuleNotFoundError:
|
18
|
-
subprocess.run(['pip', 'install', 'opsdroid-get-image-size'])
|
19
|
-
from get_image_size import get_image_size
|
20
|
-
|
21
|
-
try:
|
22
|
-
import fitz
|
23
|
-
except ModuleNotFoundError:
|
24
|
-
subprocess.run(['pip', 'install', 'PyMuPdf'])
|
25
|
-
import fitz
|
26
|
-
|
27
|
-
try:
|
28
|
-
import cv2
|
29
|
-
except ModuleNotFoundError:
|
30
|
-
subprocess.run(['pip', 'install', 'opencv-python'])
|
31
|
-
import cv2
|
32
|
-
|
33
|
-
try:
|
34
|
-
import shapely
|
35
|
-
except ModuleNotFoundError:
|
36
|
-
try:
|
37
|
-
subprocess.run(['conda', 'install', 'shapely'])
|
38
|
-
import shapely
|
39
|
-
except FileNotFoundError:
|
40
|
-
# 这个库用pip安装是不够的,正常要用conda,有些dll才会自动配置上
|
41
|
-
subprocess.run(['pip', 'install', 'shapely'])
|
42
|
-
import shapely
|
pyxllib/debug/_0_installer.py
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Data : 2020/08/14 22:20
|
6
|
-
|
7
|
-
|
8
|
-
import subprocess
|
9
|
-
|
10
|
-
try:
|
11
|
-
from pympler import asizeof
|
12
|
-
except ModuleNotFoundError:
|
13
|
-
subprocess.run(['pip', 'install', 'pympler'])
|
14
|
-
from pympler import asizeof
|
15
|
-
|
16
|
-
try:
|
17
|
-
import lxml
|
18
|
-
except ModuleNotFoundError:
|
19
|
-
# 好像很多人这个库都装不上,好奇怪~~在命令行装就可以
|
20
|
-
subprocess.run(['pip', 'install', 'lxml'])
|
21
|
-
import lxml
|
22
|
-
|
23
|
-
try:
|
24
|
-
import bs4
|
25
|
-
except:
|
26
|
-
subprocess.run(['pip', 'install', 'beautifulsoup4'])
|
27
|
-
|
28
|
-
try:
|
29
|
-
import numpy as np
|
30
|
-
except:
|
31
|
-
subprocess.run(['pip', 'install', 'numpy'])
|
32
|
-
|
33
|
-
try:
|
34
|
-
import pandas as pd
|
35
|
-
except:
|
36
|
-
subprocess.run(['pip', 'install', 'Jinja2'])
|
37
|
-
subprocess.run(['pip', 'install', 'pandas>=0.23.4'])
|
38
|
-
import pandas as pd
|