pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. pyxllib/__init__.py +9 -2
  2. pyxllib/algo/__init__.py +8 -0
  3. pyxllib/algo/disjoint.py +54 -0
  4. pyxllib/algo/geo.py +541 -0
  5. pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
  6. pyxllib/algo/matcher.py +389 -0
  7. pyxllib/algo/newbie.py +166 -0
  8. pyxllib/algo/pupil.py +629 -0
  9. pyxllib/algo/shapelylib.py +67 -0
  10. pyxllib/algo/specialist.py +241 -0
  11. pyxllib/algo/stat.py +494 -0
  12. pyxllib/algo/treelib.py +149 -0
  13. pyxllib/algo/unitlib.py +66 -0
  14. pyxllib/autogui/__init__.py +5 -0
  15. pyxllib/autogui/activewin.py +246 -0
  16. pyxllib/autogui/all.py +9 -0
  17. pyxllib/autogui/autogui.py +852 -0
  18. pyxllib/autogui/uiautolib.py +362 -0
  19. pyxllib/autogui/virtualkey.py +102 -0
  20. pyxllib/autogui/wechat.py +827 -0
  21. pyxllib/autogui/wechat_msg.py +421 -0
  22. pyxllib/autogui/wxautolib.py +84 -0
  23. pyxllib/cv/__init__.py +1 -11
  24. pyxllib/cv/expert.py +267 -0
  25. pyxllib/cv/{imlib.py → imfile.py} +18 -83
  26. pyxllib/cv/imhash.py +39 -0
  27. pyxllib/cv/pupil.py +9 -0
  28. pyxllib/cv/rgbfmt.py +1525 -0
  29. pyxllib/cv/slidercaptcha.py +137 -0
  30. pyxllib/cv/trackbartools.py +163 -49
  31. pyxllib/cv/xlcvlib.py +1040 -0
  32. pyxllib/cv/xlpillib.py +423 -0
  33. pyxllib/data/__init__.py +0 -0
  34. pyxllib/data/echarts.py +240 -0
  35. pyxllib/data/jsonlib.py +89 -0
  36. pyxllib/{util/oss2_.py → data/oss.py} +11 -9
  37. pyxllib/data/pglib.py +1127 -0
  38. pyxllib/data/sqlite.py +568 -0
  39. pyxllib/{util → data}/sqllib.py +13 -31
  40. pyxllib/ext/JLineViewer.py +505 -0
  41. pyxllib/ext/__init__.py +6 -0
  42. pyxllib/{util → ext}/demolib.py +119 -35
  43. pyxllib/ext/drissionlib.py +277 -0
  44. pyxllib/ext/kq5034lib.py +12 -0
  45. pyxllib/{util/main.py → ext/old.py} +122 -284
  46. pyxllib/ext/qt.py +449 -0
  47. pyxllib/ext/robustprocfile.py +497 -0
  48. pyxllib/ext/seleniumlib.py +76 -0
  49. pyxllib/{util/tklib.py → ext/tk.py} +10 -11
  50. pyxllib/ext/unixlib.py +827 -0
  51. pyxllib/ext/utools.py +351 -0
  52. pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
  53. pyxllib/ext/win32lib.py +40 -0
  54. pyxllib/ext/wjxlib.py +88 -0
  55. pyxllib/ext/wpsapi.py +124 -0
  56. pyxllib/ext/xlwork.py +9 -0
  57. pyxllib/ext/yuquelib.py +1105 -0
  58. pyxllib/file/__init__.py +17 -0
  59. pyxllib/file/docxlib.py +761 -0
  60. pyxllib/{util → file}/gitlib.py +40 -27
  61. pyxllib/file/libreoffice.py +165 -0
  62. pyxllib/file/movielib.py +148 -0
  63. pyxllib/file/newbie.py +10 -0
  64. pyxllib/file/onenotelib.py +1469 -0
  65. pyxllib/file/packlib/__init__.py +330 -0
  66. pyxllib/{util → file/packlib}/zipfile.py +598 -195
  67. pyxllib/file/pdflib.py +426 -0
  68. pyxllib/file/pupil.py +185 -0
  69. pyxllib/file/specialist/__init__.py +685 -0
  70. pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
  71. pyxllib/file/specialist/download.py +193 -0
  72. pyxllib/file/specialist/filelib.py +2829 -0
  73. pyxllib/file/xlsxlib.py +3131 -0
  74. pyxllib/file/xlsyncfile.py +341 -0
  75. pyxllib/prog/__init__.py +5 -0
  76. pyxllib/prog/cachetools.py +64 -0
  77. pyxllib/prog/deprecatedlib.py +233 -0
  78. pyxllib/prog/filelock.py +42 -0
  79. pyxllib/prog/ipyexec.py +253 -0
  80. pyxllib/prog/multiprogs.py +940 -0
  81. pyxllib/prog/newbie.py +451 -0
  82. pyxllib/prog/pupil.py +1197 -0
  83. pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
  84. pyxllib/prog/specialist/__init__.py +391 -0
  85. pyxllib/prog/specialist/bc.py +203 -0
  86. pyxllib/prog/specialist/browser.py +497 -0
  87. pyxllib/prog/specialist/common.py +347 -0
  88. pyxllib/prog/specialist/datetime.py +199 -0
  89. pyxllib/prog/specialist/tictoc.py +240 -0
  90. pyxllib/prog/specialist/xllog.py +180 -0
  91. pyxllib/prog/xlosenv.py +108 -0
  92. pyxllib/stdlib/__init__.py +17 -0
  93. pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
  94. pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
  95. pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
  96. pyxllib/text/__init__.py +8 -0
  97. pyxllib/text/ahocorasick.py +39 -0
  98. pyxllib/text/airscript.js +744 -0
  99. pyxllib/text/charclasslib.py +121 -0
  100. pyxllib/text/jiebalib.py +267 -0
  101. pyxllib/text/jinjalib.py +32 -0
  102. pyxllib/text/jsa_ai_prompt.md +271 -0
  103. pyxllib/text/jscode.py +922 -0
  104. pyxllib/text/latex/__init__.py +158 -0
  105. pyxllib/text/levenshtein.py +303 -0
  106. pyxllib/text/nestenv.py +1215 -0
  107. pyxllib/text/newbie.py +300 -0
  108. pyxllib/text/pupil/__init__.py +8 -0
  109. pyxllib/text/pupil/common.py +1121 -0
  110. pyxllib/text/pupil/xlalign.py +326 -0
  111. pyxllib/text/pycode.py +47 -0
  112. pyxllib/text/specialist/__init__.py +8 -0
  113. pyxllib/text/specialist/common.py +112 -0
  114. pyxllib/text/specialist/ptag.py +186 -0
  115. pyxllib/text/spellchecker.py +172 -0
  116. pyxllib/text/templates/echart_base.html +11 -0
  117. pyxllib/text/templates/highlight_code.html +17 -0
  118. pyxllib/text/templates/latex_editor.html +103 -0
  119. pyxllib/text/vbacode.py +17 -0
  120. pyxllib/text/xmllib.py +747 -0
  121. pyxllib/xl.py +39 -0
  122. pyxllib/xlcv.py +17 -0
  123. pyxllib-0.3.197.dist-info/METADATA +48 -0
  124. pyxllib-0.3.197.dist-info/RECORD +126 -0
  125. {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
  126. pyxllib/basic/_1_strlib.py +0 -945
  127. pyxllib/basic/_2_timelib.py +0 -488
  128. pyxllib/basic/_3_pathlib.py +0 -916
  129. pyxllib/basic/_4_loglib.py +0 -419
  130. pyxllib/basic/__init__.py +0 -54
  131. pyxllib/basic/arrow_.py +0 -250
  132. pyxllib/basic/chardet_.py +0 -66
  133. pyxllib/basic/dirlib.py +0 -529
  134. pyxllib/basic/dprint.py +0 -202
  135. pyxllib/basic/extension.py +0 -12
  136. pyxllib/basic/judge.py +0 -31
  137. pyxllib/basic/log.py +0 -204
  138. pyxllib/basic/pathlib_.py +0 -705
  139. pyxllib/basic/pytictoc.py +0 -102
  140. pyxllib/basic/qiniu_.py +0 -61
  141. pyxllib/basic/strlib.py +0 -761
  142. pyxllib/basic/timer.py +0 -132
  143. pyxllib/cv/cv.py +0 -834
  144. pyxllib/cv/cvlib/_1_geo.py +0 -543
  145. pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
  146. pyxllib/cv/cvlib/_2_imgproc.py +0 -594
  147. pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
  148. pyxllib/cv/cvlib/_4_cvimg.py +0 -211
  149. pyxllib/cv/cvlib/__init__.py +0 -10
  150. pyxllib/cv/debugtools.py +0 -82
  151. pyxllib/cv/fitz_.py +0 -300
  152. pyxllib/cv/installer.py +0 -42
  153. pyxllib/debug/_0_installer.py +0 -38
  154. pyxllib/debug/_1_typelib.py +0 -277
  155. pyxllib/debug/_2_chrome.py +0 -198
  156. pyxllib/debug/_3_showdir.py +0 -161
  157. pyxllib/debug/_4_bcompare.py +0 -140
  158. pyxllib/debug/__init__.py +0 -49
  159. pyxllib/debug/bcompare.py +0 -132
  160. pyxllib/debug/chrome.py +0 -198
  161. pyxllib/debug/installer.py +0 -38
  162. pyxllib/debug/showdir.py +0 -158
  163. pyxllib/debug/typelib.py +0 -278
  164. pyxllib/image/__init__.py +0 -12
  165. pyxllib/torch/__init__.py +0 -20
  166. pyxllib/torch/modellib.py +0 -37
  167. pyxllib/torch/trainlib.py +0 -344
  168. pyxllib/util/__init__.py +0 -20
  169. pyxllib/util/aip_.py +0 -141
  170. pyxllib/util/casiadb.py +0 -59
  171. pyxllib/util/excellib.py +0 -495
  172. pyxllib/util/filelib.py +0 -612
  173. pyxllib/util/jsondata.py +0 -27
  174. pyxllib/util/jsondata2.py +0 -92
  175. pyxllib/util/labelmelib.py +0 -139
  176. pyxllib/util/onepy/__init__.py +0 -29
  177. pyxllib/util/onepy/onepy.py +0 -574
  178. pyxllib/util/onepy/onmanager.py +0 -170
  179. pyxllib/util/pyautogui_.py +0 -219
  180. pyxllib/util/textlib.py +0 -1305
  181. pyxllib/util/unorder.py +0 -22
  182. pyxllib/util/xmllib.py +0 -639
  183. pyxllib-0.0.43.dist-info/METADATA +0 -39
  184. pyxllib-0.0.43.dist-info/RECORD +0 -80
  185. pyxllib-0.0.43.dist-info/top_level.txt +0 -1
  186. {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/ext/unixlib.py ADDED
@@ -0,0 +1,827 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2021/06/03 20:41
6
+ import time
7
+
8
+ from pyxllib.prog.pupil import check_install_package
9
+
10
+ check_install_package('paramiko')
11
+ check_install_package('scp')
12
+ # 对 paramiko 进一步封装的库
13
+ # check_install_package('fabric')
14
+
15
+ import warnings
16
+ from cryptography.utils import CryptographyDeprecationWarning
17
+
18
+ # 忽略特定的警告
19
+ warnings.filterwarnings("ignore", category=CryptographyDeprecationWarning)
20
+
21
+ from collections import defaultdict
22
+ import pathlib
23
+ import re
24
+ import shutil
25
+ import sys
26
+ import socket
27
+
28
+ import humanfriendly
29
+ import pandas as pd
30
+ import paramiko
31
+ import scp as scplib
32
+ from tqdm import tqdm
33
+
34
+ from pyxllib.prog.specialist import mtqdm, get_xllog
35
+ from pyxllib.algo.pupil import natural_sort
36
+ from pyxllib.file.specialist import XlPath
37
+
38
+ xllog = get_xllog()
39
+
40
+
41
+ class SshCommandError(Exception):
42
+ pass
43
+
44
+
45
+ class ScpLimitError(Exception):
46
+ pass
47
+
48
+
49
+ class ScpProgress:
50
+ # 这里的scp类比较特别,要用特殊的方式来实现一个tqdm式的进度条
51
+ def __init__(self, info, **kwargs):
52
+ self.info = info
53
+ self.kwargs = kwargs
54
+
55
+ self.trans_size = 0 # 总共传输了多少
56
+ self.this_file_sent = 0 # 当前文件传输了多少
57
+
58
+ if info == 0:
59
+ self.tqdm = None
60
+ elif info == 1:
61
+ self.kwargs['frequency'] = 0 # 传输频数
62
+ self.tqdm = tqdm(desc=self.kwargs.get('desc'), disable=False,
63
+ total=self.kwargs['total'], unit_scale=True)
64
+ else:
65
+ raise NotImplementedError
66
+
67
+ def __call__(self, filename, size, sent):
68
+ # 1 记录更新信息
69
+ if isinstance(filename, bytes):
70
+ filename = filename.decode('utf8')
71
+ if 'rf' in self.kwargs: # 仅显示相对路径
72
+ filename = self.kwargs['rf']
73
+ elif 'desc' in self.kwargs and filename.startswith(self.kwargs["desc"][1:]):
74
+ filename = filename[len(self.kwargs["desc"]):]
75
+
76
+ finish_file = (size == sent) # 传完一个文件
77
+ increment = sent - self.this_file_sent # 默认每次是上传16kb,可以在SCP初始化修改buff_size
78
+ self.trans_size += increment
79
+ self.this_file_sent = 0 if finish_file else sent
80
+
81
+ # 2 不同info的处理
82
+ if self.info == 0:
83
+ pass
84
+ elif self.info == 1:
85
+ self.kwargs['frequency'] += 1
86
+ self.tqdm.desc = f'{self.kwargs["desc"]} > {sent / size:4.0%} ' + filename
87
+ self.tqdm.n += increment
88
+ if finish_file or self.kwargs['frequency'] % 100 == 0: # 减小展示间隔
89
+ self.tqdm.update(0)
90
+
91
+ # 3 检查limit_bytes
92
+ if sent == size and self.kwargs.get('limit_bytes'):
93
+ if self.trans_size >= self.kwargs.get('limit_bytes'):
94
+ if self.info:
95
+ xllog.warning('达到限定上传大小,早停。')
96
+ raise ScpLimitError
97
+
98
+
99
+ class XlSSHClient(paramiko.SSHClient):
100
+ """ 自己封装的一套ssh工具
101
+
102
+ 因为我自己都是用ubuntu18系统,所以这里有些精细化的功能,可能不一定能兼容其他发行版
103
+ """
104
+
105
+ def __1_初始化和执行(self):
106
+ pass
107
+
108
+ def __init__(self, server, user, passwd, *, port=22, map_path=None,
109
+ relogin=0, relogin_interval=1, timeout=None):
110
+ """
111
+ :param str|list server:
112
+ :param map_path: 主要在上传、下载文件的时候,可以用来自动定位路径
113
+ 参考写法:{'C:/': '/'} # 将C盘映射到服务器位置
114
+ :param int relogin: 当连接失败的时候,可能只是某种特殊原因,可以尝试重新连接
115
+ 该参数设置重连的次数
116
+ :param int|float relogin_interval: 每次重连的间隔秒数
117
+ :param port: 为了向下兼容,暂时保留这个参数,但这个参数其实没用了
118
+ 如果要设port,可以直接在server参数里配置
119
+ """
120
+ super().__init__()
121
+
122
+ self.load_system_host_keys()
123
+ self.set_missing_host_key_policy(paramiko.AutoAddPolicy())
124
+
125
+ if isinstance(server, (list, tuple)):
126
+ servers = server
127
+ else:
128
+ servers = [server]
129
+
130
+ for server in servers:
131
+ logo = False
132
+ # 端口号可以卸载server里
133
+ if ':' in server:
134
+ server, port = server.split(':')
135
+
136
+ # 重试登录
137
+ for k in range(relogin + 1):
138
+ try:
139
+ self.connect(server, port, user, passwd, timeout=timeout)
140
+ logo = True
141
+ break
142
+ except paramiko.ssh_exception.SSHException:
143
+ if k < relogin:
144
+ time.sleep(relogin_interval)
145
+
146
+ if logo:
147
+ break
148
+
149
+ else:
150
+ raise paramiko.ssh_exception.SSHException
151
+
152
+ self.map_path = map_path
153
+
154
+ class Path(pathlib.PurePosixPath):
155
+ """ 生成一个可以管理远程服务器上的路径类
156
+
157
+ 该类只是在确实有单文件判断需求下方便使用
158
+ 实际处理问题中,推荐优先使用多文件一次性统一处理的命令,减小和服务器间的交互,提高运行速度
159
+ """
160
+ client = self
161
+
162
+ def exists_type(self):
163
+ # TODO 还没有考虑符号链接等情况,在特殊情况下可能会出意外
164
+ # 目录返回-1,文件返回1,不存在返回0
165
+ t = self.client.exec(f'if test -d "{self}"; then echo -1;'
166
+ f'elif test -f "{self}"; then echo 1;'
167
+ 'else echo 0; fi')
168
+ return int(t)
169
+
170
+ def is_file(self):
171
+ return self.exists_type() == 1
172
+
173
+ def is_dir(self):
174
+ return self.exists_type() == -1
175
+
176
+ def sub_rel_paths(self, mtime=False, *, extcmd=''):
177
+ """ 目录对象的时候,可以获取远程目录下的文件清单(相对路径)
178
+
179
+ :param extcmd: 一般是增加 -maxdepth 1 的参数,限制只遍历第1层
180
+ :param mtime: 增加返回mtime信息
181
+ """
182
+ printf = '%P %Ts\n' if mtime else r'%P\n'
183
+ cmd = f'find "{self}" -mindepth 1'
184
+ if extcmd: cmd += ' ' + extcmd
185
+ cmd += f' -printf "{printf}"'
186
+ stdout = self.client.exec(cmd)
187
+ lines = stdout.splitlines()
188
+ if mtime:
189
+ lines = [list(re.match(r'(.+?)\s(\d+)$', line).groups()) for line in lines]
190
+ return {line[0]: int(line[1]) for line in lines}
191
+ else:
192
+ return set(lines)
193
+
194
+ def mtime(self):
195
+ res = self.client.exec(f'find "{self}" -maxdepth 0 -printf "%Ts"')
196
+ return int(res)
197
+
198
+ def set_mtime(self, timestamp):
199
+ """ 手动修改文件的mtime
200
+
201
+ 我也不想搞这么复杂,但是scp底层传输时间戳有误差,没办法啊~~~
202
+ """
203
+ pass
204
+
205
+ def size(self, *, human_readable=False):
206
+ # 其实这里用-h也能美化,但怕计量方式跟XlPath.size不同,所以还是用humanfriendly再算
207
+ try:
208
+ res = self.client.exec(f'du "{self}" -s -b')
209
+ except SshCommandError:
210
+ res = '0'
211
+
212
+ sz = int(re.match(r'\d+', res).group())
213
+ if human_readable:
214
+ return humanfriendly.format_size(self.size, binary=True)
215
+ else:
216
+ return sz
217
+
218
+ self.Path = Path
219
+
220
+ def exec(self, command, *args, ignore_errors=False, pipe_in=None, **kwargs):
221
+ """ exec_command的简化版
222
+
223
+ :param ignore_errors:
224
+ 如果stderr出错,则抛出异常,否则返回运行结果的文本数据
225
+ 注意有些功能比较特别,是会往stderr写一些内容,但不一定是报错的,如果需要精细控制,建议直接使用exec_command接口
226
+
227
+ 【备忘】
228
+ nginx -t的两句返回,虽然是正确状态,默认是放在stderr的
229
+ 安装anaconda也是会有一些输出到stderr的内容
230
+ :param pipe_in: 通过管道输入课交互式操作的内容
231
+ """
232
+ # 这个命令有些交互性的操作,需要通过管道输入文本的机制来代替手动交互的过程
233
+ if pipe_in:
234
+ self.exec('mkdir -p /tmp/pipeins')
235
+ host_file = '/tmp/pipeins/' + XlPath.tempfile().name
236
+ # os.makedirs(XlPath(host_file).parent, exist_ok=True)
237
+ self.write_file(host_file, pipe_in, newline='\n')
238
+ command = f'{command} < {host_file}'
239
+
240
+ # 执行命令
241
+ stdin, stdout, stderr = self.exec_command(command, *args, **kwargs)
242
+ stderr = list(stderr)
243
+ if not ignore_errors and stderr: # TODO 目前警告也会报错,其实警告没关系
244
+ raise SshCommandError(f'服务器执行命令报错: {command},' + ''.join(stderr).rstrip())
245
+ # return '\n'.join([f.strip() for f in list(stdout)])
246
+ return ''.join([f for f in list(stdout)])
247
+
248
+ def exec_script(self, main_cmd, script='', *, file=None, **kwargs):
249
+ r""" 执行批量、脚本命令,常用语执行一段py程序
250
+
251
+ :paramn main_cmd: 主命令
252
+ :param script: 用字符串表达的一套命令
253
+ :param file: 也可以直接传入一个文件
254
+
255
+ 【使用示例】
256
+ ssh = XlSSHClient.log_in('xlpr10', 'chenkunze')
257
+ text = textwrap.dedent('''\
258
+ import os
259
+ print(len(os.listdir()))
260
+ ''')
261
+ print(ssh.exec_script('python3', text))
262
+ """
263
+ # 1 将脚本转成文件,上传到服务器
264
+ scp = scplib.SCPClient(self.get_transport())
265
+
266
+ # 虽然是基于本地的情况生成随机名称脚本,但一般在服务器也不会冲突,概率特别小
267
+ local_file = XlPath.tempfile()
268
+ self.exec(f'mkdir -p /tmp/scripts')
269
+ host_file = '/tmp/scripts/' + local_file.name
270
+
271
+ if file is not None:
272
+ shutil.copy2(XlPath(file), local_file)
273
+ elif script:
274
+ local_file.write_text(script)
275
+ else:
276
+ raise ValueError(f'没有待执行的脚本')
277
+
278
+ scp.put(local_file, host_file)
279
+ local_file.delete()
280
+
281
+ # 2 执行程序
282
+ res = self.exec(f'{main_cmd} {host_file}', **kwargs)
283
+ self.exec(f'rm {host_file}')
284
+ return res
285
+
286
+ def __2_scp(self):
287
+ """ 以下是为scp准备的功能 """
288
+ pass
289
+
290
+ def get_scp(self):
291
+ scp = scplib.SCPClient(self.get_transport())
292
+ return scp
293
+
294
+ def __local_dir(self, remote_path, local_dir):
295
+ if local_dir is not None:
296
+ return XlPath(local_dir)
297
+ else:
298
+ for k, v in self.map_path.items():
299
+ try:
300
+ relpath = remote_path.relative_to(v)
301
+ return (XlPath(k) / relpath).parent
302
+ except ValueError:
303
+ pass
304
+ raise ValueError(f'找不到对应的map_path路径映射规则 {remote_path}')
305
+
306
+ def __remote_dir(self, local_path, remote_dir):
307
+ if remote_dir is not None:
308
+ return self.Path(remote_dir)
309
+ else:
310
+ for k, v in self.map_path.items():
311
+ try:
312
+ relpath = local_path.relative_to(k)
313
+ return (self.Path(v) / relpath.as_posix()).parent
314
+ except ValueError:
315
+ pass
316
+ raise ValueError('找不到对应的map_path路径映射规则')
317
+
318
+ def __check_filetype(self, local_path, remote_path):
319
+ """ 同类型或有一边不存在都可以
320
+
321
+ 不存在记为0,文件记为1,目录记为-1,那么只要乘积不为负数即可。
322
+
323
+ 尽量少用这个,毕竟这个要连接服务器,是有资源开销的。
324
+ """
325
+ a = local_path.exists_type()
326
+ b = remote_path.exists_type()
327
+ if a * b < 0:
328
+ raise TypeError(f'本地和服务器的文件类型不匹配 {local_path} {remote_path}')
329
+ return a, b
330
+
331
+ def __scp_base(self, func, progress, from_path, to_dir, to_path, if_exists):
332
+ """
333
+
334
+ Args:
335
+ func:
336
+ progress:
337
+ from_path: 来源路径,可能是scp.put,来源在本地,也肯是scp.get,来源在服务器
338
+ to_dir: to_同理,跟from_相反
339
+ to_path:
340
+ if_exists:
341
+
342
+ Returns:
343
+
344
+ """
345
+
346
+ def scp_core(mtime=False):
347
+ a, b = self.__check_filetype(from_path, to_path)
348
+ if b == 0: # 目标位置不存在对应文件或目录,可以直接传输
349
+ func(str(from_path), str(to_dir), recursive=True, preserve_times=True)
350
+ elif a == b == 1:
351
+ if mtime and from_path.mtime() > to_path.mtime():
352
+ func(str(from_path), str(to_dir), preserve_times=True)
353
+ elif a == b == -1:
354
+ to_paths = to_path.sub_rel_paths(mtime)
355
+ if mtime:
356
+ # 因为不知道是from还是to是远程服务器,都统一一次性获得mtime更好
357
+ from_paths = from_path.sub_rel_paths(mtime)
358
+
359
+ def put_dir(dir0):
360
+ # 1 获得当前目录文件情况
361
+ sub_dirs, sub_files = [], []
362
+ if isinstance(dir0, self.Path):
363
+ sub_files = [(dir0 / f) for f in dir0.sub_rel_paths(extcmd='-maxdepth 1 -type f')]
364
+ sub_dirs = [(dir0 / d) for d in dir0.sub_rel_paths(extcmd='-maxdepth 1 -type d')]
365
+ else:
366
+ for p in dir0.glob('*'): # 本机文件
367
+ if p.is_file():
368
+ sub_files.append(p)
369
+ else:
370
+ sub_dirs.append(p)
371
+
372
+ # 2 上传文件
373
+ sub_files = natural_sort(sub_files)
374
+ for f in sub_files:
375
+ rf = f.relative_to(from_path).as_posix()
376
+ progress.kwargs['rf'] = rf
377
+ # 检查时间戳
378
+ # print(from_path, to_path, rf, from_paths[rf], to_paths.get(rf, 0),
379
+ # from_paths[rf] - to_paths.get(rf, 0))
380
+ if rf in to_paths and ((not mtime) or (from_paths[rf] <= to_paths[rf])):
381
+ if progress.tqdm:
382
+ if isinstance(f, self.Path):
383
+ # 如果不需要从服务器下载到本地,直接用本地文件尺寸代替去服务器找文件尺寸
384
+ progress.tqdm.total -= (to_path / rf).size()
385
+ else:
386
+ progress.tqdm.total -= f.size()
387
+ else:
388
+ func(str(f), str(to_path / rf), preserve_times=True)
389
+
390
+ # 3 上传目录
391
+ sub_dirs = natural_sort(sub_dirs)
392
+ for d in sub_dirs:
393
+ rd = d.relative_to(from_path).as_posix()
394
+ if rd in to_paths:
395
+ put_dir(d)
396
+ else:
397
+ func(str(d), str(to_path / rd), recursive=True, preserve_times=True)
398
+
399
+ put_dir(from_path)
400
+
401
+ if if_exists is None or if_exists == 'replace':
402
+ func(str(from_path), str(to_dir), recursive=True, preserve_times=True)
403
+ elif if_exists == 'skip':
404
+ scp_core(False)
405
+ elif if_exists == 'mtime':
406
+ scp_core(True)
407
+ else:
408
+ raise ValueError
409
+
410
+ def scp_get(self, remote_path=None, local_dir=None, *, local_path=None, info=True, limit_bytes=None,
411
+ if_exists=None):
412
+ """ 文档参考 self.scp_put
413
+
414
+ :param local_path: 可以不输入远程remote_path,仅输入local_path来映射、运行
415
+ get应该会比put更慢一点,因为get需要使用命令从服务器获得更多参考信息,而put很多文件信息可以在本地直接获得
416
+
417
+ >> self.scp_get('/home/datasets/doc3D', '/home/dataset')
418
+ """
419
+ if local_path:
420
+ local_path = XlPath(local_path)
421
+ remote_path = self.__remote_dir(local_path, None) / local_path.name
422
+ local_dir = local_path.parent
423
+ else:
424
+ remote_path = self.Path(remote_path.as_posix())
425
+ local_dir = self.__local_dir(remote_path, local_dir)
426
+ local_path = local_dir / remote_path.name
427
+
428
+ # 远程文件不存在,不用运行
429
+ if not remote_path.exists_type():
430
+ return
431
+
432
+ if not local_dir.exists():
433
+ local_dir.mkdir(parents=True, exist_ok=True)
434
+
435
+ if info == 0:
436
+ progress = ScpProgress(info, limit_bytes=limit_bytes)
437
+ elif info == 1:
438
+ progress = ScpProgress(info, desc=f'↓{local_path}',
439
+ total=remote_path.size(),
440
+ limit_bytes=limit_bytes)
441
+ else:
442
+ raise NotImplementedError
443
+
444
+ scp = scplib.SCPClient(self.get_transport(), progress=progress)
445
+
446
+ try:
447
+ self.__scp_base(scp.get, progress, remote_path, local_dir, local_path, if_exists)
448
+ except ScpLimitError:
449
+ pass
450
+
451
+ return local_path
452
+
453
+ def scp_put(self, local_path, remote_dir=None, *, mkdir=True, print_mode=True, limit_bytes=None, if_exists=None):
454
+ """ 将本地的local_path上传到服务器的remote_path
455
+
456
+ 差不多是标准的scp接口,可以强制上传一个文件,或者一个目录
457
+
458
+ :param local_path: 可以是文件,也可以是目录
459
+ 上传目录时
460
+ 同名子文件会被替换
461
+ 不存在的文件会增量
462
+ 本地没有,但服务器有的文件不做操作
463
+ :param remote_dir: 远程位置的父级目录,注意是父级
464
+ 因为DataSync侧重数据同步,所以上传的文件或目录默认同名
465
+ 如果要改名字,建议使用底层的scp接口实现
466
+ :param mkdir: remote_dir可能不存在,为了避免出错,是否执行下mkdir
467
+ :param bool|int print_mode:
468
+ 0: 不显示进度
469
+ 1: 显示整体上传进度(字节)
470
+ 2: 显示每个文件上传进度 (这个功能好像不常用没那么重要,暂未实现)
471
+ :param limit_bytes: 限制传输的字节上限,可以用来筛选部分样例数据,不用下载整个庞大的目录
472
+ 注意该限制
473
+ :param if_exists:
474
+ None, 'replace' 不处理,也就是直接替换掉
475
+ 'skip',跳过不处理,保留远程原文件
476
+ 'mtime',对比时间戳,如果本地文件时间更新,则会上传到远程,否则跳过不处理
477
+
478
+ >> self.scp_put('D:/home/chenkunze/test')
479
+ >> self.scp_put('D:/home/chenkunze/test', '/home/chenkunze')
480
+ """
481
+ local_path = XlPath(local_path)
482
+ remote_dir = self.__remote_dir(local_path, remote_dir)
483
+ remote_path = remote_dir / local_path.name
484
+
485
+ if mkdir: # 判断服务器remote_dir存不存在,也要用命令,还不如直接用mkdir了
486
+ self.exec(f'mkdir -p "{remote_dir}"') # remote如果不存在父目录则建立
487
+
488
+ if print_mode == 0:
489
+ # 虽然不显示运行信息,但也要记录已上传了多少流量
490
+ progress = ScpProgress(print_mode, limit_bytes=limit_bytes)
491
+ elif print_mode == 1:
492
+ progress = ScpProgress(print_mode, desc=f'↑{remote_path}',
493
+ total=local_path.size(),
494
+ limit_bytes=limit_bytes)
495
+ else:
496
+ raise NotImplementedError
497
+
498
+ # 这里可以设置 buff_size
499
+ scp = scplib.SCPClient(self.get_transport(), progress=progress)
500
+
501
+ try:
502
+ self.__scp_base(scp.put, progress, local_path, remote_dir, remote_path, if_exists)
503
+ except ScpLimitError:
504
+ pass
505
+
506
+ return remote_path
507
+
508
+ def scp_put_brief(self, _dir, **kwargs):
509
+ """ 过滤一些一般不同步的文件 """
510
+ for p in XlPath(_dir).glob('*'):
511
+ if p.name in ('.git', '.idea') or p.suffix == '.pyc':
512
+ continue
513
+
514
+ if p.is_dir():
515
+ # 如果是目录,继续递归规则处理
516
+ self.scp_put_brief(p, **kwargs)
517
+ else:
518
+ # 否则是没被过滤的文件,直接上传
519
+ self.scp_put(p, **kwargs)
520
+
521
+ def scp_sync(self, local_path, *, mkdir=True, info=True, limit_bytes=None):
522
+ """ 服务器和本地目录的数据同步
523
+
524
+ 该功能目前还不稳定,还有时间戳可能无法准确复制的bug,这个bug影响
525
+ 1、无法准确对比时间戳,会冗余传送文件
526
+ 2、误判时间,导致新文件时间戳不是最新的,漏同步
527
+
528
+ sync其实就是get、put都跑一次就行了
529
+ 注意此时limit_bytes表示的不是总流量,而是上传、下载分别的最大流量
530
+ 及没有if_exists参数,默认都通过mtime时间戳来更新
531
+ """
532
+ self.scp_get(local_path=local_path, info=info, limit_bytes=limit_bytes, if_exists='mtime')
533
+ self.scp_put(local_path, mkdir=mkdir, print_mode=info, limit_bytes=limit_bytes, if_exists='mtime')
534
+
535
+ def write_file(self, host_file, text, newline=None):
536
+ """ 在服务器写一个文件
537
+ 实现上通过先在本地生成一个文件,然后上传上去,算一个特殊的scp_put操作
538
+
539
+ :param newline: 默认跟使用的操作系统有关,一般在windows运行就是生成\r\n
540
+ 远程服务器,以unix居多,此类文件,最好都用\n作为换行符,而不是windows默认的\r\n
541
+ 尤其用到exec中的pipe_in时,必须使用\n
542
+ """
543
+ scp = scplib.SCPClient(self.get_transport())
544
+ local_file = XlPath.tempfile()
545
+ local_file.write_text(text, newline=newline)
546
+ scp.put(local_file, host_file)
547
+ local_file.delete()
548
+
549
+ def __3_host_trace(self):
550
+ pass
551
+
552
+ def set_user_passwd(self, name, passwd):
553
+ """ 修改账号密码
554
+
555
+ How to set user passwords using passwd without a prompt? - Ask Ubuntu:
556
+ https://askubuntu.com/questions/80444/how-to-set-user-passwords-using-passwd-without-a-prompt
557
+ """
558
+ self.exec(f'usermod --password $(echo {passwd} | openssl passwd -1 -stdin) {name}')
559
+
560
+ def add_user(self, name, passwd, sudo=False):
561
+ """ 添加新用户
562
+
563
+ :param name: 用户名
564
+ :param passwd: 密码
565
+ :param sudo: 是否开sudo权限
566
+ """
567
+ exists_user = self.exec("awk -F: '{ print $1}' /etc/passwd").splitlines()
568
+ if name in exists_user:
569
+ raise ValueError(f'{name} already exists')
570
+
571
+ self.exec(f'useradd -d /home/{name} -s /bin/bash -m {name}')
572
+ self.set_user_passwd(name, passwd)
573
+ if sudo:
574
+ self.exec(f'usermod -aG sudo {name}')
575
+
576
+ def check_cpu_usage(self, *, print_mode=False):
577
+ # 1 获取原始信息
578
+ cmds = ['ps --no-headers -eo "pcpu,pmem,user"', # 列出所有程序情况
579
+ # 使用awk,按用户分组统计cpu(百分比)、内存总使用量(绝对量)
580
+ """awk 'BEGIN{FS=OFS=" "}{a0[$3]+=$1; a1[$3]+=$2}END {for (i in a0) print i,a0[i],a1[i]}'""",
581
+ 'sort -rn -k1,2 -k2,3' # 按cpu、内存使用量从大到小排序,不是必须的
582
+ ]
583
+ lines = self.exec('|'.join(cmds)).splitlines()
584
+
585
+ # 2 按用户分组统计
586
+ def f(v):
587
+ return round(float(v), 2)
588
+
589
+ user_usage = {}
590
+ for line in lines:
591
+ user, cpu, mem = line.split()
592
+ cpu, mem = f(cpu), f(mem)
593
+ if cpu + mem > 0.0:
594
+ user_usage[user] = [cpu, mem]
595
+ _total = [f(sum([v[0] for v in user_usage.values()])), f(sum([v[1] for v in user_usage.values()]))]
596
+ user_usage['_total'] = _total
597
+
598
+ # 3 输出
599
+ if print_mode:
600
+ df = pd.DataFrame.from_records([[k, v[0], v[1]] for k, v in user_usage.items()],
601
+ columns=['user', '%cpu', '%mem'])
602
+ print(df)
603
+
604
+ del user_usage['_total']
605
+ return user_usage
606
+
607
+ def check_gpu_usage(self, *, print_mode=False):
608
+ """ 检查(每个用户)显存使用量
609
+
610
+ 使用这个命令,必须要求远程服务器安装了pip install gpustat
611
+
612
+ TODO 加上记录最高显卡温度?
613
+ """
614
+ # 1 获取原始信息
615
+ lines = self.exec('gpustat').splitlines()[1:]
616
+ if print_mode:
617
+ print('\n'.join(lines))
618
+
619
+ # 2 按用户分组统计
620
+ user_usage = defaultdict(float)
621
+ for line in lines:
622
+ name, temperature, capacity, uses = line.split('|')
623
+ used, total = map(int, re.findall(r'\d+', capacity))
624
+ user_usage['_other'] += used / 1024 # 会有些没有用户操作的僵尸显存占用
625
+ for x in uses.split():
626
+ user, one_used = re.search(r'(.+?)\((\d+)M\)', x).groups()
627
+ one_used = int(one_used) / 1024
628
+ user_usage[user] += one_used
629
+ user_usage['_other'] -= one_used
630
+
631
+ # 使用量从多到少排序。但注意如果转存到PG,jsonb会重新排序。
632
+ user_usage = {k: round(user_usage[k], 2) for k in sorted(user_usage, key=lambda k: -user_usage[k])}
633
+ if user_usage['_other'] < 0.01:
634
+ del user_usage['_other']
635
+
636
+ user_usage['_total'] = round(sum(user_usage.values()), 2)
637
+
638
+ # 3 输出
639
+ if print_mode:
640
+ df = pd.DataFrame.from_records([[k, v] for k, v in user_usage.items()], columns=['user', 'gpu_mem_GB'])
641
+ print(df)
642
+
643
+ del user_usage['_total']
644
+ return user_usage
645
+
646
+ def check_disk_usage(self, *, print_mode=False, timeout=1200):
647
+ """ 检查(每个用户)磁盘空间使用量
648
+
649
+ :return: total 总字节MBytes数, msg 所有用户、包括其他非/home目录的使用MBytes数
650
+ """
651
+ GB = 1024 ** 2 # df、du本身默认单位已经是KB,所以2次方后,就是GB了
652
+ # 1 整体情况
653
+ used, total_memory = 0, 0
654
+ for line in self.exec('df').splitlines()[1:]:
655
+ if not line.startswith('/dev/'):
656
+ continue
657
+ # 为了避免遇到路径中有空格的问题,用正则做了较复杂的判断、切割
658
+ _total, _used = map(int, re.search(r'\s+(\d+)\s+(\d+)\s+\d+\s+\d+', line).groups())
659
+ used += _used
660
+ total_memory += _total
661
+
662
+ # 2 /home目录下每个占用情况
663
+ user_usage = defaultdict(int)
664
+ for line in self.exec('du -d 1 /home', timeout=timeout).splitlines(): # 这个要限时,默认20分钟
665
+ _bytes, _dir = line.split(maxsplit=1)
666
+ _dir = _dir[6:]
667
+ if _dir and int(_bytes) > GB: # 达到1GB的才记录
668
+ user_usage[_dir] += int(_bytes)
669
+
670
+ user_usage['_other'] = used - sum(user_usage.values())
671
+ user_usage = {k: (v // GB) for k, v in user_usage.items()}
672
+ user_usage = {k: round(user_usage[k], 2) for k in sorted(user_usage, key=lambda k: -user_usage[k])}
673
+ user_usage['_total'] = total_memory // GB
674
+
675
+ # 3 展示
676
+ if print_mode:
677
+ df = pd.DataFrame.from_records([[k, v] for k, v in user_usage.items()], columns=['user', 'disk_mem_GB'])
678
+ print(df)
679
+
680
+ del user_usage['_total']
681
+ return user_usage
682
+
683
+ def __4_运维(self):
684
+ pass
685
+
686
+ def get_hostname(self):
687
+ return self.exec('hostname')
688
+
689
+ def set_hostname(self, name):
690
+ return self.exec(f'hostnamectl set-hostname {name}')
691
+
692
+ def restart_frps(self, frp_dir='/root/frp_0.37.0_linux_amd64'):
693
+ cmds = [
694
+ # 找到已有的 ./frpc 关闭
695
+ r"for i in $(ps -eo 'args,pid' | awk '/^.\/frps / {print $4}'); do kill ${i}; done",
696
+ # 重新启动 frps (注意:frps必须明确指定 frps.ini,否则用不了vhost_http_port。但frpc好像不用显式指定frpc.ini)
697
+ f"cd {frp_dir}; nohup ./frps -c ./frps.ini> /dev/null 2>&1 &"
698
+ ]
699
+ # 注意关闭和重启需要同时操作,不然在外网穿刺连接ssh,执行第1句后就断开连接了
700
+ self.exec('; '.join(cmds))
701
+
702
+ def restart_frpc(self, frp_dir='/root/frp_0.37.0_linux_amd64'):
703
+ """ 因为service不一定有效,这里通过暴力找frpc的方式来设置
704
+
705
+ :param frp_dir: frp文件所在目录
706
+ """
707
+ cmds = [
708
+ # 找到已有的 ./frpc 关闭
709
+ r"for i in $(ps -eo 'args,pid' | awk '/^.\/frpc / {print $4}'); do kill ${i}; done",
710
+ # 重新启动 frpc
711
+ f"cd {frp_dir}; nohup ./frpc -c ./frpc.ini> /dev/null 2>&1 &"
712
+ ]
713
+ # 注意关闭和重启需要同时操作,不然在外网穿刺连接ssh,执行第1句后就断开连接了
714
+ self.exec('; '.join(cmds))
715
+
716
+ def __5_开发环境(self):
717
+ pass
718
+
719
+ def download_file(self, url, package):
720
+ p = self.Path(f'/tmp/download/{package}')
721
+ if not p.is_file(): # 如果不存在文件,则自动下载
722
+ self.exec(f'wget {url} -P /tmp/download', ignore_errors=True)
723
+ self.exec(f'chmod 777 /tmp/download/{package}') # 下载的包,其他用户也可以读取、执行
724
+ return p
725
+
726
+ def reinstall_conda(self, package='Anaconda3-2023.03-Linux-x86_64.sh'):
727
+ """ 给当前用户重装anaconda
728
+
729
+ :param package: 要安装的目标版本,可以修改,以后有时间也可以考虑怎么做成自动找最新版
730
+ 这个版本配套的py是 3.9.12,默认路径在 /root/anaconda3/bin/python
731
+ """
732
+ xllog.info('清除已有的anaconda3...')
733
+ self.exec('rm -rf ~/anaconda3')
734
+
735
+ xllog.info(f'下载文件:{package}')
736
+ p = self.download_file(f'https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/{package}', package)
737
+
738
+ xllog.info('自动安装anaconda3...')
739
+ self.exec(f'bash {p}', pipe_in='\nyes\n' * 2, ignore_errors=True)
740
+
741
+ def install_pytorch(self, cuda_version='10.2'):
742
+ """ 有些驱动用的还是10.2的,所以为了兼容性装10.2,但一些新版的,可以考虑装11.3
743
+
744
+ # TODO 可以通过nvidia-smi算出当前cuda版本,然后找最适合的pytorch的~~
745
+ """
746
+ self.exec(f'anaconda3/bin/conda install '
747
+ f'pytorch torchvision torchaudio cudatoolkit={cuda_version} -c pytorch',
748
+ pipe_in='y\n')
749
+ print(self.exec_script(f'anaconda3/bin/python',
750
+ 'import torch\nprint(f"{torch.cuda.is_available()=}")'))
751
+
752
+ def install_paddle(self, cuda_version='10.2'):
753
+ """ https://www.paddlepaddle.org.cn/install
754
+
755
+ 比较大的文件,cudatoolkit要365M,cudnn要185M,paddle是285M
756
+ """
757
+ self.exec(f'anaconda3/bin/conda install '
758
+ f'paddlepaddle-gpu==2.3.2 cudatoolkit={cuda_version} '
759
+ f'--channel https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/Paddle/',
760
+ pipe_in='y\n')
761
+ print(self.exec_script(f'anaconda3/bin/python',
762
+ 'import paddle\npaddle.utils.run_check()',
763
+ ignore_errors=True))
764
+
765
+
766
+ class XlSSHs:
767
+ """ 多服务器管理器,常用于一些批量运维工作 """
768
+
769
+ def __init__(self, sshs: dict = None):
770
+ """ 为了方便使用,这个类一般是继承出来重定制的
771
+
772
+ :param dict sshs:
773
+ key: 昵称
774
+ value: 初始化好的ssh
775
+ """
776
+ sshs = sshs or {}
777
+ self.sshs: dict[str, XlSSHClient] = sshs
778
+
779
+ def add_ssh(self, name, server, user, passwd):
780
+ """
781
+ :param str name: 给当前连接设置一个方便描述的昵称
782
+ :param str|list server:
783
+ str, ip地址,可以附带port输入,默认是端口22
784
+ list,支持输入list,在第1个连接失败后,依次尝试后面的链接
785
+ :param user: 用户名
786
+ :param passwd: 密码
787
+ """
788
+ try:
789
+ self.sshs[name] = XlSSHClient(server, user, passwd, timeout=2)
790
+ except (TimeoutError, socket.timeout, paramiko.ssh_exception.SSHException) as e:
791
+ xllog.warning(f'{name} {server} {user} 连接失败!')
792
+
793
+ def run(self, func, parallel=False):
794
+ """
795
+
796
+ :param func: def func(ssh)
797
+ :param parallel: 是否并行运行
798
+ :return dict: 返回所有运行结果文本
799
+ """
800
+ res = {} # 存储所有运行结果(文本格式)
801
+
802
+ def wrap_func(item):
803
+ name, ssh = item
804
+ msg = [f'【{name}】']
805
+ try:
806
+ out = func(ssh)
807
+ except SshCommandError as e:
808
+ out = e.args[0]
809
+ res[name] = out
810
+ if out:
811
+ msg.append(out)
812
+ print('\n'.join(msg) + '\n')
813
+
814
+ if parallel:
815
+ # 并行需要打包输出,不然内容会乱掉
816
+ mtqdm(wrap_func, self.sshs.items(), max_workers=16, disable=True)
817
+ else:
818
+ # 串行可以动态输出,不会乱,但可以及时看到效果
819
+ for name, ssh in self.sshs.items():
820
+ print(f'【{name}】')
821
+ out = func(ssh)
822
+ if out:
823
+ print(out)
824
+ print()
825
+ res[name] = out
826
+
827
+ return res