pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +9 -2
- pyxllib/algo/__init__.py +8 -0
- pyxllib/algo/disjoint.py +54 -0
- pyxllib/algo/geo.py +541 -0
- pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
- pyxllib/algo/matcher.py +389 -0
- pyxllib/algo/newbie.py +166 -0
- pyxllib/algo/pupil.py +629 -0
- pyxllib/algo/shapelylib.py +67 -0
- pyxllib/algo/specialist.py +241 -0
- pyxllib/algo/stat.py +494 -0
- pyxllib/algo/treelib.py +149 -0
- pyxllib/algo/unitlib.py +66 -0
- pyxllib/autogui/__init__.py +5 -0
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/autogui/autogui.py +852 -0
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/virtualkey.py +102 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/__init__.py +1 -11
- pyxllib/cv/expert.py +267 -0
- pyxllib/cv/{imlib.py → imfile.py} +18 -83
- pyxllib/cv/imhash.py +39 -0
- pyxllib/cv/pupil.py +9 -0
- pyxllib/cv/rgbfmt.py +1525 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/cv/trackbartools.py +163 -49
- pyxllib/cv/xlcvlib.py +1040 -0
- pyxllib/cv/xlpillib.py +423 -0
- pyxllib/data/__init__.py +0 -0
- pyxllib/data/echarts.py +240 -0
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/{util/oss2_.py → data/oss.py} +11 -9
- pyxllib/data/pglib.py +1127 -0
- pyxllib/data/sqlite.py +568 -0
- pyxllib/{util → data}/sqllib.py +13 -31
- pyxllib/ext/JLineViewer.py +505 -0
- pyxllib/ext/__init__.py +6 -0
- pyxllib/{util → ext}/demolib.py +119 -35
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +12 -0
- pyxllib/{util/main.py → ext/old.py} +122 -284
- pyxllib/ext/qt.py +449 -0
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/seleniumlib.py +76 -0
- pyxllib/{util/tklib.py → ext/tk.py} +10 -11
- pyxllib/ext/unixlib.py +827 -0
- pyxllib/ext/utools.py +351 -0
- pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
- pyxllib/ext/win32lib.py +40 -0
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1105 -0
- pyxllib/file/__init__.py +17 -0
- pyxllib/file/docxlib.py +761 -0
- pyxllib/{util → file}/gitlib.py +40 -27
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +148 -0
- pyxllib/file/newbie.py +10 -0
- pyxllib/file/onenotelib.py +1469 -0
- pyxllib/file/packlib/__init__.py +330 -0
- pyxllib/{util → file/packlib}/zipfile.py +598 -195
- pyxllib/file/pdflib.py +426 -0
- pyxllib/file/pupil.py +185 -0
- pyxllib/file/specialist/__init__.py +685 -0
- pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
- pyxllib/file/specialist/download.py +193 -0
- pyxllib/file/specialist/filelib.py +2829 -0
- pyxllib/file/xlsxlib.py +3131 -0
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/__init__.py +5 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/deprecatedlib.py +233 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/ipyexec.py +253 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +451 -0
- pyxllib/prog/pupil.py +1197 -0
- pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
- pyxllib/prog/specialist/__init__.py +391 -0
- pyxllib/prog/specialist/bc.py +203 -0
- pyxllib/prog/specialist/browser.py +497 -0
- pyxllib/prog/specialist/common.py +347 -0
- pyxllib/prog/specialist/datetime.py +199 -0
- pyxllib/prog/specialist/tictoc.py +240 -0
- pyxllib/prog/specialist/xllog.py +180 -0
- pyxllib/prog/xlosenv.py +108 -0
- pyxllib/stdlib/__init__.py +17 -0
- pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
- pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
- pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
- pyxllib/text/__init__.py +8 -0
- pyxllib/text/ahocorasick.py +39 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +121 -0
- pyxllib/text/jiebalib.py +267 -0
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +922 -0
- pyxllib/text/latex/__init__.py +158 -0
- pyxllib/text/levenshtein.py +303 -0
- pyxllib/text/nestenv.py +1215 -0
- pyxllib/text/newbie.py +300 -0
- pyxllib/text/pupil/__init__.py +8 -0
- pyxllib/text/pupil/common.py +1121 -0
- pyxllib/text/pupil/xlalign.py +326 -0
- pyxllib/text/pycode.py +47 -0
- pyxllib/text/specialist/__init__.py +8 -0
- pyxllib/text/specialist/common.py +112 -0
- pyxllib/text/specialist/ptag.py +186 -0
- pyxllib/text/spellchecker.py +172 -0
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/vbacode.py +17 -0
- pyxllib/text/xmllib.py +747 -0
- pyxllib/xl.py +39 -0
- pyxllib/xlcv.py +17 -0
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
- pyxllib/basic/_1_strlib.py +0 -945
- pyxllib/basic/_2_timelib.py +0 -488
- pyxllib/basic/_3_pathlib.py +0 -916
- pyxllib/basic/_4_loglib.py +0 -419
- pyxllib/basic/__init__.py +0 -54
- pyxllib/basic/arrow_.py +0 -250
- pyxllib/basic/chardet_.py +0 -66
- pyxllib/basic/dirlib.py +0 -529
- pyxllib/basic/dprint.py +0 -202
- pyxllib/basic/extension.py +0 -12
- pyxllib/basic/judge.py +0 -31
- pyxllib/basic/log.py +0 -204
- pyxllib/basic/pathlib_.py +0 -705
- pyxllib/basic/pytictoc.py +0 -102
- pyxllib/basic/qiniu_.py +0 -61
- pyxllib/basic/strlib.py +0 -761
- pyxllib/basic/timer.py +0 -132
- pyxllib/cv/cv.py +0 -834
- pyxllib/cv/cvlib/_1_geo.py +0 -543
- pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
- pyxllib/cv/cvlib/_2_imgproc.py +0 -594
- pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
- pyxllib/cv/cvlib/_4_cvimg.py +0 -211
- pyxllib/cv/cvlib/__init__.py +0 -10
- pyxllib/cv/debugtools.py +0 -82
- pyxllib/cv/fitz_.py +0 -300
- pyxllib/cv/installer.py +0 -42
- pyxllib/debug/_0_installer.py +0 -38
- pyxllib/debug/_1_typelib.py +0 -277
- pyxllib/debug/_2_chrome.py +0 -198
- pyxllib/debug/_3_showdir.py +0 -161
- pyxllib/debug/_4_bcompare.py +0 -140
- pyxllib/debug/__init__.py +0 -49
- pyxllib/debug/bcompare.py +0 -132
- pyxllib/debug/chrome.py +0 -198
- pyxllib/debug/installer.py +0 -38
- pyxllib/debug/showdir.py +0 -158
- pyxllib/debug/typelib.py +0 -278
- pyxllib/image/__init__.py +0 -12
- pyxllib/torch/__init__.py +0 -20
- pyxllib/torch/modellib.py +0 -37
- pyxllib/torch/trainlib.py +0 -344
- pyxllib/util/__init__.py +0 -20
- pyxllib/util/aip_.py +0 -141
- pyxllib/util/casiadb.py +0 -59
- pyxllib/util/excellib.py +0 -495
- pyxllib/util/filelib.py +0 -612
- pyxllib/util/jsondata.py +0 -27
- pyxllib/util/jsondata2.py +0 -92
- pyxllib/util/labelmelib.py +0 -139
- pyxllib/util/onepy/__init__.py +0 -29
- pyxllib/util/onepy/onepy.py +0 -574
- pyxllib/util/onepy/onmanager.py +0 -170
- pyxllib/util/pyautogui_.py +0 -219
- pyxllib/util/textlib.py +0 -1305
- pyxllib/util/unorder.py +0 -22
- pyxllib/util/xmllib.py +0 -639
- pyxllib-0.0.43.dist-info/METADATA +0 -39
- pyxllib-0.0.43.dist-info/RECORD +0 -80
- pyxllib-0.0.43.dist-info/top_level.txt +0 -1
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/cv/expert.py
ADDED
@@ -0,0 +1,267 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2021/08/25 15:57
|
6
|
+
|
7
|
+
from collections import defaultdict
|
8
|
+
import concurrent.futures
|
9
|
+
|
10
|
+
import cv2
|
11
|
+
import pandas as pd
|
12
|
+
from tqdm import tqdm
|
13
|
+
|
14
|
+
import PIL.Image
|
15
|
+
|
16
|
+
from pyxllib.algo.stat import update_dataframes_to_excel
|
17
|
+
from pyxllib.file.specialist import get_etag, XlPath
|
18
|
+
from pyxllib.prog.specialist import Iterate
|
19
|
+
from pyxllib.cv.xlcvlib import CvImg, xlcv
|
20
|
+
from pyxllib.cv.xlpillib import PilImg, xlpil
|
21
|
+
|
22
|
+
|
23
|
+
def __1_目录级处理图片的功能():
|
24
|
+
pass
|
25
|
+
|
26
|
+
|
27
|
+
class ImagesDir(XlPath):
|
28
|
+
""" 这个函数功能,默认都是原地操作,如果怕以防万一出问题,最好对原始数据有另外的备份,而在新的目录里操作 """
|
29
|
+
|
30
|
+
def debug_image_func(self, func, pattern='*', *, save=None, show=False):
|
31
|
+
"""
|
32
|
+
:param func: 对每张图片执行的功能,函数应该只有一个图片路径参数 new_img = func(img)
|
33
|
+
当函数有多个参数时,可以用lambda函数技巧: lambda im: func(im, arg1=..., arg2=...)
|
34
|
+
:param save: 如果输入一个目录,会将debug结果图存储到对应的目录里
|
35
|
+
:param show: 如果该参数为True,则每处理一张会imshow显示处理效果
|
36
|
+
此时弹出的窗口里,每按任意键则显示下一张,按ESC退出
|
37
|
+
:return:
|
38
|
+
|
39
|
+
TODO 显示原图、处理后图的对比效果
|
40
|
+
TODO 支持同时显示多张图处理效果
|
41
|
+
"""
|
42
|
+
if save:
|
43
|
+
save = XlPath(save)
|
44
|
+
|
45
|
+
for f in self.glob_images(pattern):
|
46
|
+
im1 = xlcv.read(f)
|
47
|
+
im2 = func(im1)
|
48
|
+
|
49
|
+
if save:
|
50
|
+
xlcv.write(im2, self / save / f.name)
|
51
|
+
|
52
|
+
if show:
|
53
|
+
xlcv.imshow2(im2)
|
54
|
+
key = cv2.waitKey()
|
55
|
+
if key == '0x1B': # ESC 键
|
56
|
+
break
|
57
|
+
|
58
|
+
def fix_suffixs(self, pattern='**/*', log_file='_图片统计.xlsx', max_workers=None, pinterval=None):
|
59
|
+
""" 修正错误的后缀名
|
60
|
+
|
61
|
+
:param pinterval: 支持智能地判断进度间隔
|
62
|
+
"""
|
63
|
+
|
64
|
+
# 1 修改后缀
|
65
|
+
# 定义并行处理子函数
|
66
|
+
def process_image_file(args):
|
67
|
+
""" 处理单个图片文件,修正后缀名 """
|
68
|
+
file, ext = args
|
69
|
+
xlcv.write(xlcv.read(file), file) # 读取图片,并按照原本文件名期望的格式存储
|
70
|
+
ls.append([file.relpath(self).as_posix(), ext])
|
71
|
+
|
72
|
+
ls = []
|
73
|
+
files_with_exts = list(self.xglob_faker_suffix_images(pattern))
|
74
|
+
if pinterval is None and files_with_exts:
|
75
|
+
p = max(1000 * 100 // len(files_with_exts), 1) # 最小也按1%进度展示
|
76
|
+
if p < 50: # 间隔只有小余50%,才比较有显示的意义
|
77
|
+
pinterval = f'{p}%' # 每1千张显示进度
|
78
|
+
Iterate(files_with_exts).run(process_image_file, max_workers=max_workers, pinterval=pinterval)
|
79
|
+
|
80
|
+
# 2 记录修改情况
|
81
|
+
df = pd.DataFrame.from_records(ls, columns=['图片名', '原图片类型'])
|
82
|
+
if log_file:
|
83
|
+
update_dataframes_to_excel(XlPath.init(log_file, self), {'修改后缀名': df})
|
84
|
+
return df
|
85
|
+
|
86
|
+
def reduce_image_filesize(self, pattern='**/*',
|
87
|
+
limit_size=4 * 1024 ** 2, *,
|
88
|
+
read_flags=None,
|
89
|
+
change_length=False,
|
90
|
+
suffix=None,
|
91
|
+
log_file='_图片统计.xlsx',
|
92
|
+
max_workers=None, pinterval=None):
|
93
|
+
""" 减小图片尺寸,可以限制目录里尺寸最大的图片不超过多少
|
94
|
+
|
95
|
+
:param limit_size: 限制的尺寸
|
96
|
+
一般自己的相册图片,亲测300kb其实就够了~~,即 300 * 1024
|
97
|
+
百度API那边,好像不同接口不太一样,4M、6M、10M等好像都有
|
98
|
+
但百度那是base64后的尺寸,会大出1/3
|
99
|
+
为了够用,一般要限定在4M等比例的3/4比例内
|
100
|
+
:param read_flags: 读取图片时的参数,设为1,可以把各种RGBA等奇怪的格式,统一为RGB
|
101
|
+
:param change_length: 默认是要减小图片的边长,尺寸,来压缩图片的
|
102
|
+
可以设为False,不调整尺寸,纯粹读取后再重写,可能也能压缩不少尺寸
|
103
|
+
:param suffix: 可以统一图片后缀格式,默认保留原图片名称
|
104
|
+
要带前缀'.',例如'.jpg'
|
105
|
+
注意其他格式的原图会被删除
|
106
|
+
|
107
|
+
因为所有图片都会读入后再重新写入,速度可能会稍慢
|
108
|
+
"""
|
109
|
+
|
110
|
+
# 1 调试信息
|
111
|
+
print('原始大小', self.size(human_readable=True))
|
112
|
+
|
113
|
+
# 2 精简图片尺寸
|
114
|
+
# 定义并行处理子函数
|
115
|
+
def process_image_file(f):
|
116
|
+
"""处理单个图片文件,减小图片尺寸"""
|
117
|
+
size1 = f.size()
|
118
|
+
im = xlpil.read(f, read_flags)
|
119
|
+
_suffix = suffix or f.suffix
|
120
|
+
if change_length:
|
121
|
+
im = xlpil.reduce_filesize(im, limit_size, _suffix)
|
122
|
+
size2 = xlpil.evaluate_image_file_size(im, _suffix)
|
123
|
+
dst_f = f.with_suffix(_suffix)
|
124
|
+
if size2 < size1: # 只有文件尺寸确实变小的才更新
|
125
|
+
xlpil.write(im, dst_f)
|
126
|
+
if f.suffix != _suffix:
|
127
|
+
f.delete()
|
128
|
+
ls.append([f.relpath(self).as_posix(), dst_f.relpath(self).as_posix(), size1, size2])
|
129
|
+
|
130
|
+
ls = []
|
131
|
+
files = list(self.glob_images(pattern))
|
132
|
+
if pinterval is None and files:
|
133
|
+
p = max(100 * 100 // len(files), 1) # 最小也按1%进度展示
|
134
|
+
if p < 50: # 间隔只有小余50%,才比较有显示的意义
|
135
|
+
pinterval = f'{p}%' # 每1千张显示进度
|
136
|
+
Iterate(files).run(process_image_file, max_workers=max_workers, pinterval=pinterval)
|
137
|
+
|
138
|
+
print('新目录大小', self.size(human_readable=True))
|
139
|
+
|
140
|
+
# 3 记录修改细节
|
141
|
+
# 注意,如果不使用suffix参数,'新图片'的值应该跟'原图片'是一样的
|
142
|
+
# 以及当尝试精简的'新文件大小'大于'原文件大小'时,图片其实是不会被覆盖更新的
|
143
|
+
df = pd.DataFrame.from_records(ls, columns=['原图片', '新图片', '原文件大小', '新文件大小'])
|
144
|
+
if log_file:
|
145
|
+
update_dataframes_to_excel(XlPath.init(log_file, self), {'图片瘦身': df})
|
146
|
+
return df
|
147
|
+
|
148
|
+
def adjust_image_shape(self, pattern='*', min_length=None, max_length=None, print_mode=True):
|
149
|
+
""" 调整图片尺寸 """
|
150
|
+
|
151
|
+
def printf(*args, **kwargs):
|
152
|
+
if print_mode:
|
153
|
+
print(*args, **kwargs)
|
154
|
+
|
155
|
+
j = 1
|
156
|
+
for f in self.glob_images(pattern):
|
157
|
+
# 用pil库判断图片尺寸更快,但处理过程用的是cv2库
|
158
|
+
h, w = xlpil.read(f).size[::-1]
|
159
|
+
x, y = min(h, w), max(h, w)
|
160
|
+
|
161
|
+
if (min_length and x < min_length) or (max_length and y > max_length):
|
162
|
+
im = xlcv.read(f)
|
163
|
+
im2 = xlcv.adjust_shape(im, min_length, max_length)
|
164
|
+
if im2.shape != im.shape:
|
165
|
+
printf(f'{j}、{f} {im.shape} -> {im2.shape}')
|
166
|
+
xlcv.write(im2, f)
|
167
|
+
j += 1
|
168
|
+
|
169
|
+
def check_repeat_phash_images(self, pattern='**/*', **kwargs):
|
170
|
+
from pyxllib.cv.imhash import phash
|
171
|
+
if 'files' not in kwargs:
|
172
|
+
kwargs['files'] = self.glob_images(pattern)
|
173
|
+
if 'hash_func' not in kwargs:
|
174
|
+
kwargs['hash_func'] = lambda p: phash(p)
|
175
|
+
self.check_repeat_files(pattern, **kwargs)
|
176
|
+
|
177
|
+
def check_repeat_dhash_images(self, pattern='**/*', **kwargs):
|
178
|
+
from pyxllib.cv.imhash import dhash
|
179
|
+
if 'files' not in kwargs:
|
180
|
+
kwargs['files'] = self.glob_images(pattern)
|
181
|
+
if 'hash_func' not in kwargs:
|
182
|
+
kwargs['hash_func'] = lambda p: dhash(p)
|
183
|
+
self.check_repeat_files(pattern, **kwargs)
|
184
|
+
|
185
|
+
def clear_exif(self):
|
186
|
+
""" 清除图片中的exif标记 """
|
187
|
+
cnt = 0
|
188
|
+
for file in tqdm(self.rglob_images()):
|
189
|
+
im = xlpil.read(file)
|
190
|
+
exif = xlpil.get_exif(im)
|
191
|
+
if exif:
|
192
|
+
orientation = exif.get("Orientation", None)
|
193
|
+
if orientation:
|
194
|
+
cnt += 1
|
195
|
+
im = xlpil.apply_exif_orientation(im)
|
196
|
+
xlpil.write(im, file)
|
197
|
+
print(f'处理了{cnt}份exif')
|
198
|
+
|
199
|
+
|
200
|
+
def find_modified_images(dirs, print_mode=False):
|
201
|
+
""" 查找可能被修改过的图片
|
202
|
+
|
203
|
+
一般用在数据标注工作中,对收回来的数据目录,和原本数据目录做个对比,
|
204
|
+
以name作为对应关联,看前后图片是否内容发生变换,比如旋转。
|
205
|
+
|
206
|
+
:param list[str] dirs: 图片所在目录列表
|
207
|
+
:param bool print_mode: 是否打印进度提示,默认为 False
|
208
|
+
:return dict[str, list[str]]: 包含图片名字和可能被修改过的图片路径列表的字典
|
209
|
+
|
210
|
+
示例用法:
|
211
|
+
import os
|
212
|
+
from pprint import pprint
|
213
|
+
from pyxllib.cv.expert import find_modified_images
|
214
|
+
|
215
|
+
os.chdir('/home/chenkunze/data')
|
216
|
+
res = find_modified_images([r'm2305latex2lgx/train_images_sub',
|
217
|
+
r'm2305latex2lg/1、做完的数据'])
|
218
|
+
pprint(res)
|
219
|
+
"""
|
220
|
+
from pyxllib.file.specialist import get_etag # 发现不能用相似,还是得用etag
|
221
|
+
|
222
|
+
# 1 将图片按名字分组
|
223
|
+
def group_by_name(dirs):
|
224
|
+
""" 将图片按名字分组
|
225
|
+
|
226
|
+
:param list[str] dirs: 图片所在目录列表
|
227
|
+
:return dict[str, list[str]]: 包含图片名字和对应图片路径列表的字典
|
228
|
+
|
229
|
+
>>> group_by_name(['path/to/dir1', 'path/to/dir2'])
|
230
|
+
{'image1.jpg': ['path/to/dir1/image1.jpg'], 'image2.png': ['path/to/dir2/image2.png']}
|
231
|
+
"""
|
232
|
+
image_groups = {}
|
233
|
+
for dir in dirs:
|
234
|
+
for path in XlPath(dir).rglob_images():
|
235
|
+
image_name = path.name
|
236
|
+
if image_name not in image_groups:
|
237
|
+
image_groups[image_name] = []
|
238
|
+
image_groups[image_name].append(path)
|
239
|
+
return image_groups
|
240
|
+
|
241
|
+
image_groups = group_by_name(dirs)
|
242
|
+
|
243
|
+
# 2 存储有哪些变化的分组
|
244
|
+
modified_images = {}
|
245
|
+
progress_counter = 0
|
246
|
+
|
247
|
+
if print_mode:
|
248
|
+
total_files = sum(len(paths) for paths in image_groups.values())
|
249
|
+
print(f"Total files: {total_files}")
|
250
|
+
|
251
|
+
for image_name, paths in image_groups.items():
|
252
|
+
if len(paths) <= 1:
|
253
|
+
continue
|
254
|
+
|
255
|
+
hash_values = [get_etag(str(path)) for path in paths]
|
256
|
+
sizes = [PIL.Image.open(path).size for path in paths]
|
257
|
+
|
258
|
+
# 这里可以增强,更加详细展示差异,比如是不是被旋转了90度、180度、270度,但会大大提升运算量,暂时不添加
|
259
|
+
if len(set(hash_values)) > 1 or len(set(sizes)) > 1:
|
260
|
+
# 获取posix风格路径
|
261
|
+
modified_images[image_name] = [XlPath(path).as_posix() for path in paths]
|
262
|
+
|
263
|
+
if print_mode:
|
264
|
+
progress_counter += len(paths)
|
265
|
+
print(f"Progress: {progress_counter}/{total_files}")
|
266
|
+
|
267
|
+
return modified_images
|
@@ -2,48 +2,21 @@
|
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
# @Author : 陈坤泽
|
4
4
|
# @Email : 877362867@qq.com
|
5
|
-
# @
|
5
|
+
# @Date : 2020/06/02 16:00
|
6
6
|
|
7
7
|
|
8
8
|
from collections import defaultdict
|
9
9
|
import concurrent.futures
|
10
|
+
import os
|
11
|
+
import re
|
10
12
|
import subprocess
|
11
13
|
|
12
|
-
import
|
13
|
-
from PIL import Image
|
14
|
-
|
15
|
-
from pyxllib.basic import *
|
16
|
-
|
17
|
-
|
18
|
-
def get_img_content(in_):
|
19
|
-
"""获取in_代表的图片的二进制数据
|
20
|
-
:param in_: 可以是本地文件,也可以是图片url地址,也可以是Image对象
|
21
|
-
"""
|
22
|
-
from pyxllib.basic import is_url, is_file
|
23
|
-
|
24
|
-
# 1 取不同来源的数据
|
25
|
-
if is_url(in_):
|
26
|
-
content = requests.get(in_).content
|
27
|
-
img = Image.open(io.BytesIO(content))
|
28
|
-
elif is_file(in_):
|
29
|
-
with open(in_, 'rb') as f:
|
30
|
-
content = f.read()
|
31
|
-
img = Image.open(in_)
|
32
|
-
elif isinstance(in_, Image.Image):
|
33
|
-
img = in_
|
34
|
-
else:
|
35
|
-
raise ValueError
|
36
|
-
|
37
|
-
img = image_rgba2rgb(img) # 如果是RGBA类型,要把透明底变成白色
|
38
|
-
file = io.BytesIO()
|
39
|
-
img.save(file, 'JPEG')
|
40
|
-
content = file.getvalue()
|
41
|
-
|
42
|
-
return content
|
14
|
+
from pyxllib.file.specialist import File
|
43
15
|
|
44
16
|
|
45
17
|
def magick(infile, *, outfile=None, if_exists='error', transparent=None, trim=False, density=None, other_args=None):
|
46
|
-
"""调用iamge magick的magick.exe工具
|
18
|
+
""" 调用iamge magick的magick.exe工具
|
19
|
+
|
47
20
|
:param infile: 处理对象文件
|
48
21
|
:param outfile: 输出文件,可以不写,默认原地操作(只设置透明度、裁剪时可能会原地操作)
|
49
22
|
:param if_exists: 如果目标文件已存在要怎么处理
|
@@ -64,10 +37,10 @@ def magick(infile, *, outfile=None, if_exists='error', transparent=None, trim=Fa
|
|
64
37
|
|
65
38
|
# 2
|
66
39
|
# 200914周一20:40,这有个相对路径的bug,修复了下,否则 test/a.png 会变成 test/test/a.png
|
67
|
-
if
|
40
|
+
if File(outfile).exist_preprcs(if_exists):
|
68
41
|
# 2.1 判断是否是支持的输入文件类型
|
69
42
|
ext = os.path.splitext(infile)[1].lower()
|
70
|
-
if not
|
43
|
+
if not File(infile) or not ext in ('.png', '.eps', '.pdf', '.jpg', '.jpeg', '.wmf', '.emf'):
|
71
44
|
return False
|
72
45
|
|
73
46
|
# 2.2 生成需要执行的参数
|
@@ -93,11 +66,12 @@ def magick(infile, *, outfile=None, if_exists='error', transparent=None, trim=Fa
|
|
93
66
|
return outfile
|
94
67
|
|
95
68
|
|
96
|
-
def ensure_pngs(folder, *, if_exists='
|
69
|
+
def ensure_pngs(folder, *, if_exists='skip',
|
97
70
|
transparent=None, trim=False,
|
98
71
|
density=None, epsdensity=None,
|
99
72
|
max_workers=None):
|
100
|
-
"""确保一个目录下的所有图片都有一个png版本格式的文件
|
73
|
+
""" 确保一个目录下的所有图片都有一个png版本格式的文件
|
74
|
+
|
101
75
|
:param folder: 目录名,会遍历直接目录下所有没png的stem名称生成png
|
102
76
|
:param if_exists: 如果文件已存在,要进行的操作
|
103
77
|
'replace',直接替换
|
@@ -129,7 +103,7 @@ def ensure_pngs(folder, *, if_exists='ignore',
|
|
129
103
|
if if_exists == 'ignore':
|
130
104
|
continue
|
131
105
|
elif if_exists == 'backup':
|
132
|
-
|
106
|
+
File(name, folder, suffix='.png').backup(move=True)
|
133
107
|
elif if_exists == 'replace':
|
134
108
|
pass
|
135
109
|
else:
|
@@ -156,7 +130,8 @@ def ensure_pngs(folder, *, if_exists='ignore',
|
|
156
130
|
|
157
131
|
|
158
132
|
def zoomsvg(file, scale=1):
|
159
|
-
"""
|
133
|
+
""" 缩放svg文件
|
134
|
+
|
160
135
|
:param file:
|
161
136
|
如果输入一个目录,会处理目录下所有的svg图片
|
162
137
|
否则只处理指定的文件
|
@@ -172,53 +147,13 @@ def zoomsvg(file, scale=1):
|
|
172
147
|
return re.sub(r'((?:height|width)=")(\d+(?:\.\d+)?)', g, m.group())
|
173
148
|
|
174
149
|
if os.path.isfile(file):
|
175
|
-
s = re.sub(r'<svg .+?>', func,
|
176
|
-
|
150
|
+
s = re.sub(r'<svg .+?>', func, File(file).read(), flags=re.DOTALL)
|
151
|
+
File(file).write(s, if_exists='replace')
|
177
152
|
elif os.path.isdir(file):
|
178
153
|
for f in os.listdir(file):
|
179
154
|
if not f.endswith('.svg'): continue
|
180
155
|
f = os.path.join(file, f)
|
181
|
-
s = re.sub(r'<svg\s+.+?>', func,
|
182
|
-
|
156
|
+
s = re.sub(r'<svg\s+.+?>', func, File(f).read(), flags=re.DOTALL)
|
157
|
+
File(file).write(s, if_exists='replace')
|
183
158
|
elif isinstance(file, str) and '<svg ' in file: # 输入svg的代码文本
|
184
159
|
return re.sub(r'<svg .+?>', func, file, flags=re.DOTALL)
|
185
|
-
|
186
|
-
|
187
|
-
def reduce_image_filesize(path, filesize):
|
188
|
-
"""
|
189
|
-
:param path: 图片路径,支持png、jpg等多种格式
|
190
|
-
:param filesize: 单位Bytes
|
191
|
-
可以用 300*1024 来表示 300KB
|
192
|
-
:return:
|
193
|
-
|
194
|
-
>> reduce_image_filesize('a.jpg', 300*1024)
|
195
|
-
"""
|
196
|
-
from PIL import Image
|
197
|
-
|
198
|
-
path = Path(path)
|
199
|
-
# 1 无论什么情况,都先做个100%的resize处理,很可能会去掉一些没用的冗余信息
|
200
|
-
im = Image.open(f'{path}')
|
201
|
-
im.resize(im.size).save(f'{path}')
|
202
|
-
|
203
|
-
# 2 然后开始循环处理
|
204
|
-
while True:
|
205
|
-
r = path.size / filesize
|
206
|
-
if r <= 1: break
|
207
|
-
# 假设图片面积和文件大小成正比,如果r=4,表示长宽要各减小至1/(r**0.5)才能到目标文件大小
|
208
|
-
rate = min(1 / (r ** 0.5), 0.95) # 并且限制每轮至少要缩小至95%,避免可能会迭代太多轮
|
209
|
-
im = Image.open(f'{path}')
|
210
|
-
im.resize((int(im.size[0] * rate), int(im.size[1] * rate))).save(f'{path}')
|
211
|
-
|
212
|
-
|
213
|
-
def image_rgba2rgb(im):
|
214
|
-
if im.mode in ('RGBA', 'P'):
|
215
|
-
# 判断图片mode模式,如果是RGBA或P等可能有透明底,则和一个白底图片合成去除透明底
|
216
|
-
background = Image.new('RGBA', im.size, (255, 255, 255))
|
217
|
-
# composite是合成的意思。将右图的alpha替换为左图内容
|
218
|
-
im = Image.alpha_composite(background, im.convert('RGBA')).convert('RGB')
|
219
|
-
return im
|
220
|
-
|
221
|
-
|
222
|
-
____section_temp = """
|
223
|
-
临时添加的新功能
|
224
|
-
"""
|
pyxllib/cv/imhash.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2021/06/08 22:53
|
6
|
+
|
7
|
+
"""
|
8
|
+
TODO 写一些图片相似度相关功能
|
9
|
+
"""
|
10
|
+
|
11
|
+
from pyxllib.prog.pupil import check_install_package
|
12
|
+
|
13
|
+
check_install_package('imagehash', 'ImageHash')
|
14
|
+
|
15
|
+
import imagehash
|
16
|
+
import numpy as np
|
17
|
+
|
18
|
+
from pyxllib.cv.xlpillib import xlpil
|
19
|
+
|
20
|
+
|
21
|
+
def get_init_hash():
|
22
|
+
""" 获得一个初始、空哈希值 """
|
23
|
+
return imagehash.ImageHash(np.zeros([8, 8]).astype(bool))
|
24
|
+
|
25
|
+
|
26
|
+
def phash(image, *args, **kwargs):
|
27
|
+
""" 修改了官方接口,这里输入的image支持泛用格式
|
28
|
+
"""
|
29
|
+
im = xlpil.read(image)
|
30
|
+
return imagehash.phash(im, *args, **kwargs)
|
31
|
+
|
32
|
+
|
33
|
+
def dhash(image, *args, **kwargs):
|
34
|
+
""" 修改了官方接口,这里输入的image支持泛用格式
|
35
|
+
|
36
|
+
官方比较推荐使用,性能速度又快的,就是dhash
|
37
|
+
"""
|
38
|
+
im = xlpil.read(image)
|
39
|
+
return imagehash.dhash(im, *args, **kwargs)
|
pyxllib/cv/pupil.py
ADDED