pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. pyxllib/__init__.py +9 -2
  2. pyxllib/algo/__init__.py +8 -0
  3. pyxllib/algo/disjoint.py +54 -0
  4. pyxllib/algo/geo.py +541 -0
  5. pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
  6. pyxllib/algo/matcher.py +389 -0
  7. pyxllib/algo/newbie.py +166 -0
  8. pyxllib/algo/pupil.py +629 -0
  9. pyxllib/algo/shapelylib.py +67 -0
  10. pyxllib/algo/specialist.py +241 -0
  11. pyxllib/algo/stat.py +494 -0
  12. pyxllib/algo/treelib.py +149 -0
  13. pyxllib/algo/unitlib.py +66 -0
  14. pyxllib/autogui/__init__.py +5 -0
  15. pyxllib/autogui/activewin.py +246 -0
  16. pyxllib/autogui/all.py +9 -0
  17. pyxllib/autogui/autogui.py +852 -0
  18. pyxllib/autogui/uiautolib.py +362 -0
  19. pyxllib/autogui/virtualkey.py +102 -0
  20. pyxllib/autogui/wechat.py +827 -0
  21. pyxllib/autogui/wechat_msg.py +421 -0
  22. pyxllib/autogui/wxautolib.py +84 -0
  23. pyxllib/cv/__init__.py +1 -11
  24. pyxllib/cv/expert.py +267 -0
  25. pyxllib/cv/{imlib.py → imfile.py} +18 -83
  26. pyxllib/cv/imhash.py +39 -0
  27. pyxllib/cv/pupil.py +9 -0
  28. pyxllib/cv/rgbfmt.py +1525 -0
  29. pyxllib/cv/slidercaptcha.py +137 -0
  30. pyxllib/cv/trackbartools.py +163 -49
  31. pyxllib/cv/xlcvlib.py +1040 -0
  32. pyxllib/cv/xlpillib.py +423 -0
  33. pyxllib/data/__init__.py +0 -0
  34. pyxllib/data/echarts.py +240 -0
  35. pyxllib/data/jsonlib.py +89 -0
  36. pyxllib/{util/oss2_.py → data/oss.py} +11 -9
  37. pyxllib/data/pglib.py +1127 -0
  38. pyxllib/data/sqlite.py +568 -0
  39. pyxllib/{util → data}/sqllib.py +13 -31
  40. pyxllib/ext/JLineViewer.py +505 -0
  41. pyxllib/ext/__init__.py +6 -0
  42. pyxllib/{util → ext}/demolib.py +119 -35
  43. pyxllib/ext/drissionlib.py +277 -0
  44. pyxllib/ext/kq5034lib.py +12 -0
  45. pyxllib/{util/main.py → ext/old.py} +122 -284
  46. pyxllib/ext/qt.py +449 -0
  47. pyxllib/ext/robustprocfile.py +497 -0
  48. pyxllib/ext/seleniumlib.py +76 -0
  49. pyxllib/{util/tklib.py → ext/tk.py} +10 -11
  50. pyxllib/ext/unixlib.py +827 -0
  51. pyxllib/ext/utools.py +351 -0
  52. pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
  53. pyxllib/ext/win32lib.py +40 -0
  54. pyxllib/ext/wjxlib.py +88 -0
  55. pyxllib/ext/wpsapi.py +124 -0
  56. pyxllib/ext/xlwork.py +9 -0
  57. pyxllib/ext/yuquelib.py +1105 -0
  58. pyxllib/file/__init__.py +17 -0
  59. pyxllib/file/docxlib.py +761 -0
  60. pyxllib/{util → file}/gitlib.py +40 -27
  61. pyxllib/file/libreoffice.py +165 -0
  62. pyxllib/file/movielib.py +148 -0
  63. pyxllib/file/newbie.py +10 -0
  64. pyxllib/file/onenotelib.py +1469 -0
  65. pyxllib/file/packlib/__init__.py +330 -0
  66. pyxllib/{util → file/packlib}/zipfile.py +598 -195
  67. pyxllib/file/pdflib.py +426 -0
  68. pyxllib/file/pupil.py +185 -0
  69. pyxllib/file/specialist/__init__.py +685 -0
  70. pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
  71. pyxllib/file/specialist/download.py +193 -0
  72. pyxllib/file/specialist/filelib.py +2829 -0
  73. pyxllib/file/xlsxlib.py +3131 -0
  74. pyxllib/file/xlsyncfile.py +341 -0
  75. pyxllib/prog/__init__.py +5 -0
  76. pyxllib/prog/cachetools.py +64 -0
  77. pyxllib/prog/deprecatedlib.py +233 -0
  78. pyxllib/prog/filelock.py +42 -0
  79. pyxllib/prog/ipyexec.py +253 -0
  80. pyxllib/prog/multiprogs.py +940 -0
  81. pyxllib/prog/newbie.py +451 -0
  82. pyxllib/prog/pupil.py +1197 -0
  83. pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
  84. pyxllib/prog/specialist/__init__.py +391 -0
  85. pyxllib/prog/specialist/bc.py +203 -0
  86. pyxllib/prog/specialist/browser.py +497 -0
  87. pyxllib/prog/specialist/common.py +347 -0
  88. pyxllib/prog/specialist/datetime.py +199 -0
  89. pyxllib/prog/specialist/tictoc.py +240 -0
  90. pyxllib/prog/specialist/xllog.py +180 -0
  91. pyxllib/prog/xlosenv.py +108 -0
  92. pyxllib/stdlib/__init__.py +17 -0
  93. pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
  94. pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
  95. pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
  96. pyxllib/text/__init__.py +8 -0
  97. pyxllib/text/ahocorasick.py +39 -0
  98. pyxllib/text/airscript.js +744 -0
  99. pyxllib/text/charclasslib.py +121 -0
  100. pyxllib/text/jiebalib.py +267 -0
  101. pyxllib/text/jinjalib.py +32 -0
  102. pyxllib/text/jsa_ai_prompt.md +271 -0
  103. pyxllib/text/jscode.py +922 -0
  104. pyxllib/text/latex/__init__.py +158 -0
  105. pyxllib/text/levenshtein.py +303 -0
  106. pyxllib/text/nestenv.py +1215 -0
  107. pyxllib/text/newbie.py +300 -0
  108. pyxllib/text/pupil/__init__.py +8 -0
  109. pyxllib/text/pupil/common.py +1121 -0
  110. pyxllib/text/pupil/xlalign.py +326 -0
  111. pyxllib/text/pycode.py +47 -0
  112. pyxllib/text/specialist/__init__.py +8 -0
  113. pyxllib/text/specialist/common.py +112 -0
  114. pyxllib/text/specialist/ptag.py +186 -0
  115. pyxllib/text/spellchecker.py +172 -0
  116. pyxllib/text/templates/echart_base.html +11 -0
  117. pyxllib/text/templates/highlight_code.html +17 -0
  118. pyxllib/text/templates/latex_editor.html +103 -0
  119. pyxllib/text/vbacode.py +17 -0
  120. pyxllib/text/xmllib.py +747 -0
  121. pyxllib/xl.py +39 -0
  122. pyxllib/xlcv.py +17 -0
  123. pyxllib-0.3.197.dist-info/METADATA +48 -0
  124. pyxllib-0.3.197.dist-info/RECORD +126 -0
  125. {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
  126. pyxllib/basic/_1_strlib.py +0 -945
  127. pyxllib/basic/_2_timelib.py +0 -488
  128. pyxllib/basic/_3_pathlib.py +0 -916
  129. pyxllib/basic/_4_loglib.py +0 -419
  130. pyxllib/basic/__init__.py +0 -54
  131. pyxllib/basic/arrow_.py +0 -250
  132. pyxllib/basic/chardet_.py +0 -66
  133. pyxllib/basic/dirlib.py +0 -529
  134. pyxllib/basic/dprint.py +0 -202
  135. pyxllib/basic/extension.py +0 -12
  136. pyxllib/basic/judge.py +0 -31
  137. pyxllib/basic/log.py +0 -204
  138. pyxllib/basic/pathlib_.py +0 -705
  139. pyxllib/basic/pytictoc.py +0 -102
  140. pyxllib/basic/qiniu_.py +0 -61
  141. pyxllib/basic/strlib.py +0 -761
  142. pyxllib/basic/timer.py +0 -132
  143. pyxllib/cv/cv.py +0 -834
  144. pyxllib/cv/cvlib/_1_geo.py +0 -543
  145. pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
  146. pyxllib/cv/cvlib/_2_imgproc.py +0 -594
  147. pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
  148. pyxllib/cv/cvlib/_4_cvimg.py +0 -211
  149. pyxllib/cv/cvlib/__init__.py +0 -10
  150. pyxllib/cv/debugtools.py +0 -82
  151. pyxllib/cv/fitz_.py +0 -300
  152. pyxllib/cv/installer.py +0 -42
  153. pyxllib/debug/_0_installer.py +0 -38
  154. pyxllib/debug/_1_typelib.py +0 -277
  155. pyxllib/debug/_2_chrome.py +0 -198
  156. pyxllib/debug/_3_showdir.py +0 -161
  157. pyxllib/debug/_4_bcompare.py +0 -140
  158. pyxllib/debug/__init__.py +0 -49
  159. pyxllib/debug/bcompare.py +0 -132
  160. pyxllib/debug/chrome.py +0 -198
  161. pyxllib/debug/installer.py +0 -38
  162. pyxllib/debug/showdir.py +0 -158
  163. pyxllib/debug/typelib.py +0 -278
  164. pyxllib/image/__init__.py +0 -12
  165. pyxllib/torch/__init__.py +0 -20
  166. pyxllib/torch/modellib.py +0 -37
  167. pyxllib/torch/trainlib.py +0 -344
  168. pyxllib/util/__init__.py +0 -20
  169. pyxllib/util/aip_.py +0 -141
  170. pyxllib/util/casiadb.py +0 -59
  171. pyxllib/util/excellib.py +0 -495
  172. pyxllib/util/filelib.py +0 -612
  173. pyxllib/util/jsondata.py +0 -27
  174. pyxllib/util/jsondata2.py +0 -92
  175. pyxllib/util/labelmelib.py +0 -139
  176. pyxllib/util/onepy/__init__.py +0 -29
  177. pyxllib/util/onepy/onepy.py +0 -574
  178. pyxllib/util/onepy/onmanager.py +0 -170
  179. pyxllib/util/pyautogui_.py +0 -219
  180. pyxllib/util/textlib.py +0 -1305
  181. pyxllib/util/unorder.py +0 -22
  182. pyxllib/util/xmllib.py +0 -639
  183. pyxllib-0.0.43.dist-info/METADATA +0 -39
  184. pyxllib-0.0.43.dist-info/RECORD +0 -80
  185. pyxllib-0.0.43.dist-info/top_level.txt +0 -1
  186. {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/cv/expert.py ADDED
@@ -0,0 +1,267 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2021/08/25 15:57
6
+
7
+ from collections import defaultdict
8
+ import concurrent.futures
9
+
10
+ import cv2
11
+ import pandas as pd
12
+ from tqdm import tqdm
13
+
14
+ import PIL.Image
15
+
16
+ from pyxllib.algo.stat import update_dataframes_to_excel
17
+ from pyxllib.file.specialist import get_etag, XlPath
18
+ from pyxllib.prog.specialist import Iterate
19
+ from pyxllib.cv.xlcvlib import CvImg, xlcv
20
+ from pyxllib.cv.xlpillib import PilImg, xlpil
21
+
22
+
23
+ def __1_目录级处理图片的功能():
24
+ pass
25
+
26
+
27
+ class ImagesDir(XlPath):
28
+ """ 这个函数功能,默认都是原地操作,如果怕以防万一出问题,最好对原始数据有另外的备份,而在新的目录里操作 """
29
+
30
+ def debug_image_func(self, func, pattern='*', *, save=None, show=False):
31
+ """
32
+ :param func: 对每张图片执行的功能,函数应该只有一个图片路径参数 new_img = func(img)
33
+ 当函数有多个参数时,可以用lambda函数技巧: lambda im: func(im, arg1=..., arg2=...)
34
+ :param save: 如果输入一个目录,会将debug结果图存储到对应的目录里
35
+ :param show: 如果该参数为True,则每处理一张会imshow显示处理效果
36
+ 此时弹出的窗口里,每按任意键则显示下一张,按ESC退出
37
+ :return:
38
+
39
+ TODO 显示原图、处理后图的对比效果
40
+ TODO 支持同时显示多张图处理效果
41
+ """
42
+ if save:
43
+ save = XlPath(save)
44
+
45
+ for f in self.glob_images(pattern):
46
+ im1 = xlcv.read(f)
47
+ im2 = func(im1)
48
+
49
+ if save:
50
+ xlcv.write(im2, self / save / f.name)
51
+
52
+ if show:
53
+ xlcv.imshow2(im2)
54
+ key = cv2.waitKey()
55
+ if key == '0x1B': # ESC 键
56
+ break
57
+
58
+ def fix_suffixs(self, pattern='**/*', log_file='_图片统计.xlsx', max_workers=None, pinterval=None):
59
+ """ 修正错误的后缀名
60
+
61
+ :param pinterval: 支持智能地判断进度间隔
62
+ """
63
+
64
+ # 1 修改后缀
65
+ # 定义并行处理子函数
66
+ def process_image_file(args):
67
+ """ 处理单个图片文件,修正后缀名 """
68
+ file, ext = args
69
+ xlcv.write(xlcv.read(file), file) # 读取图片,并按照原本文件名期望的格式存储
70
+ ls.append([file.relpath(self).as_posix(), ext])
71
+
72
+ ls = []
73
+ files_with_exts = list(self.xglob_faker_suffix_images(pattern))
74
+ if pinterval is None and files_with_exts:
75
+ p = max(1000 * 100 // len(files_with_exts), 1) # 最小也按1%进度展示
76
+ if p < 50: # 间隔只有小余50%,才比较有显示的意义
77
+ pinterval = f'{p}%' # 每1千张显示进度
78
+ Iterate(files_with_exts).run(process_image_file, max_workers=max_workers, pinterval=pinterval)
79
+
80
+ # 2 记录修改情况
81
+ df = pd.DataFrame.from_records(ls, columns=['图片名', '原图片类型'])
82
+ if log_file:
83
+ update_dataframes_to_excel(XlPath.init(log_file, self), {'修改后缀名': df})
84
+ return df
85
+
86
+ def reduce_image_filesize(self, pattern='**/*',
87
+ limit_size=4 * 1024 ** 2, *,
88
+ read_flags=None,
89
+ change_length=False,
90
+ suffix=None,
91
+ log_file='_图片统计.xlsx',
92
+ max_workers=None, pinterval=None):
93
+ """ 减小图片尺寸,可以限制目录里尺寸最大的图片不超过多少
94
+
95
+ :param limit_size: 限制的尺寸
96
+ 一般自己的相册图片,亲测300kb其实就够了~~,即 300 * 1024
97
+ 百度API那边,好像不同接口不太一样,4M、6M、10M等好像都有
98
+ 但百度那是base64后的尺寸,会大出1/3
99
+ 为了够用,一般要限定在4M等比例的3/4比例内
100
+ :param read_flags: 读取图片时的参数,设为1,可以把各种RGBA等奇怪的格式,统一为RGB
101
+ :param change_length: 默认是要减小图片的边长,尺寸,来压缩图片的
102
+ 可以设为False,不调整尺寸,纯粹读取后再重写,可能也能压缩不少尺寸
103
+ :param suffix: 可以统一图片后缀格式,默认保留原图片名称
104
+ 要带前缀'.',例如'.jpg'
105
+ 注意其他格式的原图会被删除
106
+
107
+ 因为所有图片都会读入后再重新写入,速度可能会稍慢
108
+ """
109
+
110
+ # 1 调试信息
111
+ print('原始大小', self.size(human_readable=True))
112
+
113
+ # 2 精简图片尺寸
114
+ # 定义并行处理子函数
115
+ def process_image_file(f):
116
+ """处理单个图片文件,减小图片尺寸"""
117
+ size1 = f.size()
118
+ im = xlpil.read(f, read_flags)
119
+ _suffix = suffix or f.suffix
120
+ if change_length:
121
+ im = xlpil.reduce_filesize(im, limit_size, _suffix)
122
+ size2 = xlpil.evaluate_image_file_size(im, _suffix)
123
+ dst_f = f.with_suffix(_suffix)
124
+ if size2 < size1: # 只有文件尺寸确实变小的才更新
125
+ xlpil.write(im, dst_f)
126
+ if f.suffix != _suffix:
127
+ f.delete()
128
+ ls.append([f.relpath(self).as_posix(), dst_f.relpath(self).as_posix(), size1, size2])
129
+
130
+ ls = []
131
+ files = list(self.glob_images(pattern))
132
+ if pinterval is None and files:
133
+ p = max(100 * 100 // len(files), 1) # 最小也按1%进度展示
134
+ if p < 50: # 间隔只有小余50%,才比较有显示的意义
135
+ pinterval = f'{p}%' # 每1千张显示进度
136
+ Iterate(files).run(process_image_file, max_workers=max_workers, pinterval=pinterval)
137
+
138
+ print('新目录大小', self.size(human_readable=True))
139
+
140
+ # 3 记录修改细节
141
+ # 注意,如果不使用suffix参数,'新图片'的值应该跟'原图片'是一样的
142
+ # 以及当尝试精简的'新文件大小'大于'原文件大小'时,图片其实是不会被覆盖更新的
143
+ df = pd.DataFrame.from_records(ls, columns=['原图片', '新图片', '原文件大小', '新文件大小'])
144
+ if log_file:
145
+ update_dataframes_to_excel(XlPath.init(log_file, self), {'图片瘦身': df})
146
+ return df
147
+
148
+ def adjust_image_shape(self, pattern='*', min_length=None, max_length=None, print_mode=True):
149
+ """ 调整图片尺寸 """
150
+
151
+ def printf(*args, **kwargs):
152
+ if print_mode:
153
+ print(*args, **kwargs)
154
+
155
+ j = 1
156
+ for f in self.glob_images(pattern):
157
+ # 用pil库判断图片尺寸更快,但处理过程用的是cv2库
158
+ h, w = xlpil.read(f).size[::-1]
159
+ x, y = min(h, w), max(h, w)
160
+
161
+ if (min_length and x < min_length) or (max_length and y > max_length):
162
+ im = xlcv.read(f)
163
+ im2 = xlcv.adjust_shape(im, min_length, max_length)
164
+ if im2.shape != im.shape:
165
+ printf(f'{j}、{f} {im.shape} -> {im2.shape}')
166
+ xlcv.write(im2, f)
167
+ j += 1
168
+
169
+ def check_repeat_phash_images(self, pattern='**/*', **kwargs):
170
+ from pyxllib.cv.imhash import phash
171
+ if 'files' not in kwargs:
172
+ kwargs['files'] = self.glob_images(pattern)
173
+ if 'hash_func' not in kwargs:
174
+ kwargs['hash_func'] = lambda p: phash(p)
175
+ self.check_repeat_files(pattern, **kwargs)
176
+
177
+ def check_repeat_dhash_images(self, pattern='**/*', **kwargs):
178
+ from pyxllib.cv.imhash import dhash
179
+ if 'files' not in kwargs:
180
+ kwargs['files'] = self.glob_images(pattern)
181
+ if 'hash_func' not in kwargs:
182
+ kwargs['hash_func'] = lambda p: dhash(p)
183
+ self.check_repeat_files(pattern, **kwargs)
184
+
185
+ def clear_exif(self):
186
+ """ 清除图片中的exif标记 """
187
+ cnt = 0
188
+ for file in tqdm(self.rglob_images()):
189
+ im = xlpil.read(file)
190
+ exif = xlpil.get_exif(im)
191
+ if exif:
192
+ orientation = exif.get("Orientation", None)
193
+ if orientation:
194
+ cnt += 1
195
+ im = xlpil.apply_exif_orientation(im)
196
+ xlpil.write(im, file)
197
+ print(f'处理了{cnt}份exif')
198
+
199
+
200
+ def find_modified_images(dirs, print_mode=False):
201
+ """ 查找可能被修改过的图片
202
+
203
+ 一般用在数据标注工作中,对收回来的数据目录,和原本数据目录做个对比,
204
+ 以name作为对应关联,看前后图片是否内容发生变换,比如旋转。
205
+
206
+ :param list[str] dirs: 图片所在目录列表
207
+ :param bool print_mode: 是否打印进度提示,默认为 False
208
+ :return dict[str, list[str]]: 包含图片名字和可能被修改过的图片路径列表的字典
209
+
210
+ 示例用法:
211
+ import os
212
+ from pprint import pprint
213
+ from pyxllib.cv.expert import find_modified_images
214
+
215
+ os.chdir('/home/chenkunze/data')
216
+ res = find_modified_images([r'm2305latex2lgx/train_images_sub',
217
+ r'm2305latex2lg/1、做完的数据'])
218
+ pprint(res)
219
+ """
220
+ from pyxllib.file.specialist import get_etag # 发现不能用相似,还是得用etag
221
+
222
+ # 1 将图片按名字分组
223
+ def group_by_name(dirs):
224
+ """ 将图片按名字分组
225
+
226
+ :param list[str] dirs: 图片所在目录列表
227
+ :return dict[str, list[str]]: 包含图片名字和对应图片路径列表的字典
228
+
229
+ >>> group_by_name(['path/to/dir1', 'path/to/dir2'])
230
+ {'image1.jpg': ['path/to/dir1/image1.jpg'], 'image2.png': ['path/to/dir2/image2.png']}
231
+ """
232
+ image_groups = {}
233
+ for dir in dirs:
234
+ for path in XlPath(dir).rglob_images():
235
+ image_name = path.name
236
+ if image_name not in image_groups:
237
+ image_groups[image_name] = []
238
+ image_groups[image_name].append(path)
239
+ return image_groups
240
+
241
+ image_groups = group_by_name(dirs)
242
+
243
+ # 2 存储有哪些变化的分组
244
+ modified_images = {}
245
+ progress_counter = 0
246
+
247
+ if print_mode:
248
+ total_files = sum(len(paths) for paths in image_groups.values())
249
+ print(f"Total files: {total_files}")
250
+
251
+ for image_name, paths in image_groups.items():
252
+ if len(paths) <= 1:
253
+ continue
254
+
255
+ hash_values = [get_etag(str(path)) for path in paths]
256
+ sizes = [PIL.Image.open(path).size for path in paths]
257
+
258
+ # 这里可以增强,更加详细展示差异,比如是不是被旋转了90度、180度、270度,但会大大提升运算量,暂时不添加
259
+ if len(set(hash_values)) > 1 or len(set(sizes)) > 1:
260
+ # 获取posix风格路径
261
+ modified_images[image_name] = [XlPath(path).as_posix() for path in paths]
262
+
263
+ if print_mode:
264
+ progress_counter += len(paths)
265
+ print(f"Progress: {progress_counter}/{total_files}")
266
+
267
+ return modified_images
@@ -2,48 +2,21 @@
2
2
  # -*- coding: utf-8 -*-
3
3
  # @Author : 陈坤泽
4
4
  # @Email : 877362867@qq.com
5
- # @Data : 2020/06/02 16:00
5
+ # @Date : 2020/06/02 16:00
6
6
 
7
7
 
8
8
  from collections import defaultdict
9
9
  import concurrent.futures
10
+ import os
11
+ import re
10
12
  import subprocess
11
13
 
12
- import requests
13
- from PIL import Image
14
-
15
- from pyxllib.basic import *
16
-
17
-
18
- def get_img_content(in_):
19
- """获取in_代表的图片的二进制数据
20
- :param in_: 可以是本地文件,也可以是图片url地址,也可以是Image对象
21
- """
22
- from pyxllib.basic import is_url, is_file
23
-
24
- # 1 取不同来源的数据
25
- if is_url(in_):
26
- content = requests.get(in_).content
27
- img = Image.open(io.BytesIO(content))
28
- elif is_file(in_):
29
- with open(in_, 'rb') as f:
30
- content = f.read()
31
- img = Image.open(in_)
32
- elif isinstance(in_, Image.Image):
33
- img = in_
34
- else:
35
- raise ValueError
36
-
37
- img = image_rgba2rgb(img) # 如果是RGBA类型,要把透明底变成白色
38
- file = io.BytesIO()
39
- img.save(file, 'JPEG')
40
- content = file.getvalue()
41
-
42
- return content
14
+ from pyxllib.file.specialist import File
43
15
 
44
16
 
45
17
  def magick(infile, *, outfile=None, if_exists='error', transparent=None, trim=False, density=None, other_args=None):
46
- """调用iamge magick的magick.exe工具
18
+ """ 调用iamge magick的magick.exe工具
19
+
47
20
  :param infile: 处理对象文件
48
21
  :param outfile: 输出文件,可以不写,默认原地操作(只设置透明度、裁剪时可能会原地操作)
49
22
  :param if_exists: 如果目标文件已存在要怎么处理
@@ -64,10 +37,10 @@ def magick(infile, *, outfile=None, if_exists='error', transparent=None, trim=Fa
64
37
 
65
38
  # 2
66
39
  # 200914周一20:40,这有个相对路径的bug,修复了下,否则 test/a.png 会变成 test/test/a.png
67
- if Path(outfile).preprocess(if_exists, exclude=Path(infile)):
40
+ if File(outfile).exist_preprcs(if_exists):
68
41
  # 2.1 判断是否是支持的输入文件类型
69
42
  ext = os.path.splitext(infile)[1].lower()
70
- if not Path(infile).is_file() or not ext in ('.png', '.eps', '.pdf', '.jpg', '.jpeg', '.wmf', '.emf'):
43
+ if not File(infile) or not ext in ('.png', '.eps', '.pdf', '.jpg', '.jpeg', '.wmf', '.emf'):
71
44
  return False
72
45
 
73
46
  # 2.2 生成需要执行的参数
@@ -93,11 +66,12 @@ def magick(infile, *, outfile=None, if_exists='error', transparent=None, trim=Fa
93
66
  return outfile
94
67
 
95
68
 
96
- def ensure_pngs(folder, *, if_exists='ignore',
69
+ def ensure_pngs(folder, *, if_exists='skip',
97
70
  transparent=None, trim=False,
98
71
  density=None, epsdensity=None,
99
72
  max_workers=None):
100
- """确保一个目录下的所有图片都有一个png版本格式的文件
73
+ """ 确保一个目录下的所有图片都有一个png版本格式的文件
74
+
101
75
  :param folder: 目录名,会遍历直接目录下所有没png的stem名称生成png
102
76
  :param if_exists: 如果文件已存在,要进行的操作
103
77
  'replace',直接替换
@@ -129,7 +103,7 @@ def ensure_pngs(folder, *, if_exists='ignore',
129
103
  if if_exists == 'ignore':
130
104
  continue
131
105
  elif if_exists == 'backup':
132
- Path(name, '.png', folder).backup(move=True)
106
+ File(name, folder, suffix='.png').backup(move=True)
133
107
  elif if_exists == 'replace':
134
108
  pass
135
109
  else:
@@ -156,7 +130,8 @@ def ensure_pngs(folder, *, if_exists='ignore',
156
130
 
157
131
 
158
132
  def zoomsvg(file, scale=1):
159
- """
133
+ """ 缩放svg文件
134
+
160
135
  :param file:
161
136
  如果输入一个目录,会处理目录下所有的svg图片
162
137
  否则只处理指定的文件
@@ -172,53 +147,13 @@ def zoomsvg(file, scale=1):
172
147
  return re.sub(r'((?:height|width)=")(\d+(?:\.\d+)?)', g, m.group())
173
148
 
174
149
  if os.path.isfile(file):
175
- s = re.sub(r'<svg .+?>', func, Path(file).read(), flags=re.DOTALL)
176
- Path(file).write(s, if_exists='replace')
150
+ s = re.sub(r'<svg .+?>', func, File(file).read(), flags=re.DOTALL)
151
+ File(file).write(s, if_exists='replace')
177
152
  elif os.path.isdir(file):
178
153
  for f in os.listdir(file):
179
154
  if not f.endswith('.svg'): continue
180
155
  f = os.path.join(file, f)
181
- s = re.sub(r'<svg\s+.+?>', func, Path(f).read(), flags=re.DOTALL)
182
- Path(file).write(s, if_exists='replace')
156
+ s = re.sub(r'<svg\s+.+?>', func, File(f).read(), flags=re.DOTALL)
157
+ File(file).write(s, if_exists='replace')
183
158
  elif isinstance(file, str) and '<svg ' in file: # 输入svg的代码文本
184
159
  return re.sub(r'<svg .+?>', func, file, flags=re.DOTALL)
185
-
186
-
187
- def reduce_image_filesize(path, filesize):
188
- """
189
- :param path: 图片路径,支持png、jpg等多种格式
190
- :param filesize: 单位Bytes
191
- 可以用 300*1024 来表示 300KB
192
- :return:
193
-
194
- >> reduce_image_filesize('a.jpg', 300*1024)
195
- """
196
- from PIL import Image
197
-
198
- path = Path(path)
199
- # 1 无论什么情况,都先做个100%的resize处理,很可能会去掉一些没用的冗余信息
200
- im = Image.open(f'{path}')
201
- im.resize(im.size).save(f'{path}')
202
-
203
- # 2 然后开始循环处理
204
- while True:
205
- r = path.size / filesize
206
- if r <= 1: break
207
- # 假设图片面积和文件大小成正比,如果r=4,表示长宽要各减小至1/(r**0.5)才能到目标文件大小
208
- rate = min(1 / (r ** 0.5), 0.95) # 并且限制每轮至少要缩小至95%,避免可能会迭代太多轮
209
- im = Image.open(f'{path}')
210
- im.resize((int(im.size[0] * rate), int(im.size[1] * rate))).save(f'{path}')
211
-
212
-
213
- def image_rgba2rgb(im):
214
- if im.mode in ('RGBA', 'P'):
215
- # 判断图片mode模式,如果是RGBA或P等可能有透明底,则和一个白底图片合成去除透明底
216
- background = Image.new('RGBA', im.size, (255, 255, 255))
217
- # composite是合成的意思。将右图的alpha替换为左图内容
218
- im = Image.alpha_composite(background, im.convert('RGBA')).convert('RGB')
219
- return im
220
-
221
-
222
- ____section_temp = """
223
- 临时添加的新功能
224
- """
pyxllib/cv/imhash.py ADDED
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2021/06/08 22:53
6
+
7
+ """
8
+ TODO 写一些图片相似度相关功能
9
+ """
10
+
11
+ from pyxllib.prog.pupil import check_install_package
12
+
13
+ check_install_package('imagehash', 'ImageHash')
14
+
15
+ import imagehash
16
+ import numpy as np
17
+
18
+ from pyxllib.cv.xlpillib import xlpil
19
+
20
+
21
+ def get_init_hash():
22
+ """ 获得一个初始、空哈希值 """
23
+ return imagehash.ImageHash(np.zeros([8, 8]).astype(bool))
24
+
25
+
26
+ def phash(image, *args, **kwargs):
27
+ """ 修改了官方接口,这里输入的image支持泛用格式
28
+ """
29
+ im = xlpil.read(image)
30
+ return imagehash.phash(im, *args, **kwargs)
31
+
32
+
33
+ def dhash(image, *args, **kwargs):
34
+ """ 修改了官方接口,这里输入的image支持泛用格式
35
+
36
+ 官方比较推荐使用,性能速度又快的,就是dhash
37
+ """
38
+ im = xlpil.read(image)
39
+ return imagehash.dhash(im, *args, **kwargs)
pyxllib/cv/pupil.py ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2021/10/07 09:06
6
+
7
+ # Rgb用了numpy,不再属于pupil了,而且会初始化很多颜色列表,也浪费空间,
8
+ # 所以改成独立包,需要的时候再手动导入
9
+ # from pyxllib.cv.rgbfmt import RgbFormatter