re-common 2.0.1__py3-none-any.whl → 10.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. re_common/baselibrary/__init__.py +4 -0
  2. re_common/baselibrary/baseabs/__init__.py +7 -0
  3. re_common/baselibrary/baseabs/baseabs.py +26 -0
  4. re_common/baselibrary/database/__init__.py +0 -0
  5. re_common/baselibrary/database/mbuilder.py +132 -0
  6. re_common/baselibrary/database/moudle.py +93 -0
  7. re_common/baselibrary/database/msqlite3.py +194 -0
  8. re_common/baselibrary/database/mysql.py +169 -0
  9. re_common/baselibrary/database/sql_factory.py +26 -0
  10. re_common/baselibrary/mthread/MThreadingRun.py +486 -0
  11. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -0
  12. re_common/baselibrary/mthread/__init__.py +3 -0
  13. re_common/baselibrary/mthread/mythreading.py +695 -0
  14. re_common/baselibrary/pakge_other/__init__.py +0 -0
  15. re_common/baselibrary/pakge_other/socks.py +404 -0
  16. re_common/baselibrary/readconfig/__init__.py +0 -0
  17. re_common/baselibrary/readconfig/config_factory.py +18 -0
  18. re_common/baselibrary/readconfig/ini_config.py +317 -0
  19. re_common/baselibrary/readconfig/toml_config.py +49 -0
  20. re_common/baselibrary/temporary/__init__.py +0 -0
  21. re_common/baselibrary/temporary/envdata.py +36 -0
  22. re_common/baselibrary/tools/__init__.py +0 -0
  23. re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  24. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -0
  25. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -0
  26. re_common/baselibrary/tools/all_requests/mrequest.py +412 -0
  27. re_common/baselibrary/tools/all_requests/requests_request.py +81 -0
  28. re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  29. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -0
  30. re_common/baselibrary/tools/contrast_db3.py +123 -0
  31. re_common/baselibrary/tools/copy_file.py +39 -0
  32. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -0
  33. re_common/baselibrary/tools/foreachgz.py +40 -0
  34. re_common/baselibrary/tools/get_attr.py +11 -0
  35. re_common/baselibrary/tools/image_to_pdf.py +62 -0
  36. re_common/baselibrary/tools/java_code_deal.py +139 -0
  37. re_common/baselibrary/tools/javacode.py +79 -0
  38. re_common/baselibrary/tools/mdb_db3.py +48 -0
  39. re_common/baselibrary/tools/merge_file.py +171 -0
  40. re_common/baselibrary/tools/merge_gz_file.py +165 -0
  41. re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  42. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -0
  43. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -0
  44. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -0
  45. re_common/baselibrary/tools/mongo_tools.py +50 -0
  46. re_common/baselibrary/tools/move_file.py +170 -0
  47. re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  48. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -0
  49. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -0
  50. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -0
  51. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -0
  52. re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  53. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -0
  54. re_common/baselibrary/tools/mpandas/pandas_visualization.py +8 -0
  55. re_common/baselibrary/tools/myparsel.py +104 -0
  56. re_common/baselibrary/tools/rename_dir_file.py +37 -0
  57. re_common/baselibrary/tools/sequoiadb_utils.py +398 -0
  58. re_common/baselibrary/tools/split_line_to_many.py +25 -0
  59. re_common/baselibrary/tools/stringtodicts.py +33 -0
  60. re_common/baselibrary/tools/workwechant_bot.py +84 -0
  61. re_common/baselibrary/utils/__init__.py +0 -0
  62. re_common/baselibrary/utils/baseaiohttp.py +296 -0
  63. re_common/baselibrary/utils/baseaiomysql.py +87 -0
  64. re_common/baselibrary/utils/baseallstep.py +191 -0
  65. re_common/baselibrary/utils/baseavro.py +19 -0
  66. re_common/baselibrary/utils/baseboto3.py +291 -0
  67. re_common/baselibrary/utils/basecsv.py +32 -0
  68. re_common/baselibrary/utils/basedict.py +133 -0
  69. re_common/baselibrary/utils/basedir.py +241 -0
  70. re_common/baselibrary/utils/baseencode.py +351 -0
  71. re_common/baselibrary/utils/baseencoding.py +29 -0
  72. re_common/baselibrary/utils/baseesdsl.py +86 -0
  73. re_common/baselibrary/utils/baseexcel.py +264 -0
  74. re_common/baselibrary/utils/baseexcept.py +109 -0
  75. re_common/baselibrary/utils/basefile.py +654 -0
  76. re_common/baselibrary/utils/baseftp.py +214 -0
  77. re_common/baselibrary/utils/basegzip.py +60 -0
  78. re_common/baselibrary/utils/basehdfs.py +135 -0
  79. re_common/baselibrary/utils/basehttpx.py +268 -0
  80. re_common/baselibrary/utils/baseip.py +87 -0
  81. re_common/baselibrary/utils/basejson.py +2 -0
  82. re_common/baselibrary/utils/baselist.py +32 -0
  83. re_common/baselibrary/utils/basemotor.py +190 -0
  84. re_common/baselibrary/utils/basemssql.py +98 -0
  85. re_common/baselibrary/utils/baseodbc.py +113 -0
  86. re_common/baselibrary/utils/basepandas.py +302 -0
  87. re_common/baselibrary/utils/basepeewee.py +11 -0
  88. re_common/baselibrary/utils/basepika.py +180 -0
  89. re_common/baselibrary/utils/basepydash.py +143 -0
  90. re_common/baselibrary/utils/basepymongo.py +230 -0
  91. re_common/baselibrary/utils/basequeue.py +22 -0
  92. re_common/baselibrary/utils/baserar.py +57 -0
  93. re_common/baselibrary/utils/baserequest.py +279 -0
  94. re_common/baselibrary/utils/baseset.py +8 -0
  95. re_common/baselibrary/utils/basesmb.py +403 -0
  96. re_common/baselibrary/utils/basestring.py +382 -0
  97. re_common/baselibrary/utils/basetime.py +320 -0
  98. re_common/baselibrary/utils/basetuple.py +0 -0
  99. re_common/baselibrary/utils/baseurl.py +121 -0
  100. re_common/baselibrary/utils/basezip.py +57 -0
  101. re_common/baselibrary/utils/core/__init__.py +8 -0
  102. re_common/baselibrary/utils/core/bottomutils.py +18 -0
  103. re_common/baselibrary/utils/core/mdeprecated.py +327 -0
  104. re_common/baselibrary/utils/core/mlamada.py +16 -0
  105. re_common/baselibrary/utils/core/msginfo.py +25 -0
  106. re_common/baselibrary/utils/core/requests_core.py +103 -0
  107. re_common/baselibrary/utils/fateadm.py +429 -0
  108. re_common/baselibrary/utils/importfun.py +123 -0
  109. re_common/baselibrary/utils/mfaker.py +57 -0
  110. re_common/baselibrary/utils/my_abc/__init__.py +3 -0
  111. re_common/baselibrary/utils/my_abc/better_abc.py +32 -0
  112. re_common/baselibrary/utils/mylogger.py +414 -0
  113. re_common/baselibrary/utils/myredisclient.py +861 -0
  114. re_common/baselibrary/utils/pipupgrade.py +21 -0
  115. re_common/baselibrary/utils/ringlist.py +85 -0
  116. re_common/baselibrary/utils/version_compare.py +36 -0
  117. re_common/baselibrary/utils/ydmhttp.py +126 -0
  118. re_common/facade/__init__.py +1 -0
  119. re_common/facade/lazy_import.py +11 -0
  120. re_common/facade/loggerfacade.py +25 -0
  121. re_common/facade/mysqlfacade.py +467 -0
  122. re_common/facade/now.py +31 -0
  123. re_common/facade/sqlite3facade.py +257 -0
  124. re_common/facade/use/__init__.py +0 -0
  125. re_common/facade/use/mq_use_facade.py +83 -0
  126. re_common/facade/use/proxy_use_facade.py +20 -0
  127. re_common/libtest/__init__.py +0 -0
  128. re_common/libtest/base_dict_test.py +19 -0
  129. re_common/libtest/baseavro_test.py +13 -0
  130. re_common/libtest/basefile_test.py +14 -0
  131. re_common/libtest/basemssql_test.py +77 -0
  132. re_common/libtest/baseodbc_test.py +8 -0
  133. re_common/libtest/basepandas_test.py +38 -0
  134. re_common/libtest/get_attr_test/__init__.py +0 -0
  135. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -0
  136. re_common/libtest/get_attr_test/settings.py +55 -0
  137. re_common/libtest/idencode_test.py +54 -0
  138. re_common/libtest/iniconfig_test.py +35 -0
  139. re_common/libtest/ip_test.py +35 -0
  140. re_common/libtest/merge_file_test.py +20 -0
  141. re_common/libtest/mfaker_test.py +9 -0
  142. re_common/libtest/mm3_test.py +32 -0
  143. re_common/libtest/mylogger_test.py +89 -0
  144. re_common/libtest/myparsel_test.py +28 -0
  145. re_common/libtest/mysql_test.py +151 -0
  146. re_common/libtest/pymongo_test.py +21 -0
  147. re_common/libtest/split_test.py +12 -0
  148. re_common/libtest/sqlite3_merge_test.py +6 -0
  149. re_common/libtest/sqlite3_test.py +34 -0
  150. re_common/libtest/tomlconfig_test.py +30 -0
  151. re_common/libtest/use_tools_test/__init__.py +3 -0
  152. re_common/libtest/user/__init__.py +5 -0
  153. re_common/studio/__init__.py +5 -0
  154. re_common/studio/assignment_expressions.py +37 -0
  155. re_common/studio/mydash/__init__.py +0 -0
  156. re_common/studio/mydash/test1.py +19 -0
  157. re_common/studio/pydashstudio/__init__.py +0 -0
  158. re_common/studio/pydashstudio/first.py +9 -0
  159. re_common/studio/streamlitstudio/__init__.py +0 -0
  160. re_common/studio/streamlitstudio/first_app.py +66 -0
  161. re_common/studio/streamlitstudio/uber_pickups.py +24 -0
  162. re_common/studio/test.py +19 -0
  163. re_common/v2/baselibrary/utils/author_smi.py +14 -3
  164. re_common/v2/baselibrary/utils/stringutils.py +1 -0
  165. re_common/vip/__init__.py +0 -0
  166. re_common/vip/base_step_process.py +11 -0
  167. re_common/vip/baseencodeid.py +91 -0
  168. re_common/vip/changetaskname.py +28 -0
  169. re_common/vip/core_var.py +24 -0
  170. re_common/vip/mmh3Hash.py +90 -0
  171. re_common/vip/proxy/__init__.py +0 -0
  172. re_common/vip/proxy/allproxys.py +127 -0
  173. re_common/vip/proxy/allproxys_thread.py +159 -0
  174. re_common/vip/proxy/cnki_proxy.py +153 -0
  175. re_common/vip/proxy/kuaidaili.py +87 -0
  176. re_common/vip/proxy/proxy_all.py +113 -0
  177. re_common/vip/proxy/update_kuaidaili_0.py +42 -0
  178. re_common/vip/proxy/wanfang_proxy.py +152 -0
  179. re_common/vip/proxy/wp_proxy_all.py +182 -0
  180. re_common/vip/read_rawid_to_txt.py +92 -0
  181. re_common/vip/title/__init__.py +5 -0
  182. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -0
  183. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -0
  184. re_common/vip/title/transform/TransformCstadTitleToZt.py +196 -0
  185. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -0
  186. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -0
  187. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -0
  188. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -0
  189. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -0
  190. re_common/vip/title/transform/__init__.py +11 -0
  191. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/METADATA +1 -1
  192. re_common-10.0.1.dist-info/RECORD +213 -0
  193. re_common-2.0.1.dist-info/RECORD +0 -25
  194. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/LICENSE +0 -0
  195. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/WHEEL +0 -0
  196. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,214 @@
1
+ import os
2
+ import socket
3
+ import threading
4
+ import time
5
+ import traceback
6
+ from ftplib import FTP
7
+
8
+ import socks
9
+
10
+
11
+ class BaseFtp(object):
12
+ def __init__(self, encoding='utf-8'):
13
+ self.ftp = FTP()
14
+ self.ftp.encoding = encoding
15
+
16
+ def set_proxy_socks(self, type, ip, port):
17
+ """
18
+ type = socks.PROXY_TYPE_SOCKS5
19
+ :param type:
20
+ :param ip:
21
+ :param port:
22
+ :return:
23
+ """
24
+ socks.set_default_proxy(type, ip, port)
25
+ socket.socket = socks.socksocket
26
+ return self
27
+
28
+ def conn_ftp(self, ftp_host, port=21):
29
+ self.ftp.connect(ftp_host, port)
30
+ return self
31
+
32
+ def login(self, username, passwd):
33
+ self.ftp.login(username, passwd)
34
+ return self
35
+
36
+ def set_conn_login_pare(self, ftp_host, port, username, passwd):
37
+ """
38
+ 设置参数
39
+ :return:
40
+ """
41
+ self.ftp_host = ftp_host
42
+ self.port = port
43
+ self.username = username
44
+ self.passwd = passwd
45
+ return self
46
+
47
+ def conn_and_login(self):
48
+ self.conn_ftp(self.ftp_host, self.port)
49
+ self.login(self.username, self.passwd)
50
+ return self
51
+
52
+ def cwd(self, dir):
53
+ """
54
+ 更改到目录
55
+ :param dires:
56
+ :return:
57
+ """
58
+ return self.ftp.cwd(dir) # change remote work dir
59
+
60
+ def pwd(self):
61
+ """
62
+ 返回当前FTP操作的路径
63
+ :return:
64
+ """
65
+ return self.ftp.pwd()
66
+
67
+ def size(self, remotefile):
68
+ return self.ftp.size(remotefile)
69
+
70
+ def voidcmd(self, cmd):
71
+ """
72
+ 和FTP.sendcmd(command)功能相似,但返回代码不在200-299之间时抛出异常
73
+ FTP.sendcmd(command):向服务器发送一条简单的FTP命令,返回响应结果
74
+ 'TYPE I'
75
+ f"MDTM {ftp_path}" 获取时间
76
+ :return:
77
+ """
78
+ return self.ftp.voidcmd(cmd)
79
+
80
+ def retrlines(self, cmd, callback=None):
81
+ """
82
+ 返回list的数据格式为liunx权限的格式
83
+ dr-x------ 3 user group 0 Jan 26 11:10 CN-IC-DECO-RE 中国集成电路布图设计复审撤销案件数据
84
+ :param cmd: RETR、LIST或NLST命令
85
+ LIST以列表形式检索有关文件及其详细信息。NLST只是以列表展示文件名
86
+ RETR 下载
87
+ MLSD: 更好的信息展示
88
+ :param callback: 一个被调用的可选的单个参数,为每一行去掉后面的CRLF
89
+ 比如放入列表 可以传入
90
+ filelist = []
91
+ filelist.append
92
+ :return:
93
+ """
94
+ return self.ftp.retrlines(cmd, callback)
95
+
96
+ def set_pasv(self, val):
97
+ """
98
+ val:为True时,启用被动模式;反之,禁用。默认情况下是被动模式
99
+ :param val:
100
+ :return:
101
+ """
102
+ return self.ftp.set_pasv(val)
103
+
104
+ def get_file_info(self, file_str):
105
+ """
106
+ 将str 转换成字典
107
+ :param file_str:
108
+ :return:
109
+ """
110
+ dicts = {}
111
+ dicts["time"] = []
112
+ dicts["filename"] = []
113
+ lists = file_str.split()
114
+ count = 0
115
+ for item in lists:
116
+ item = item.strip()
117
+ if item != "" and count <= 8:
118
+ count = count + 1
119
+ if count == 1:
120
+ # 文件属性字段
121
+ """
122
+ 文件属性字段总共有10个字母组成;第一个字符代表文件的类型。
123
+ 字母“-”表示该文件是一个普通文件
124
+ 字母“d”表示该文件是一个目录,字母"d",是dirtectory(目录)的缩写
125
+ 注意:目录或者是特殊文件,这个特殊文件存放其他文件或目录的相关信息
126
+ 字母“l”表示该文件是一个链接文件。字母"l"是link(链接)的缩写,类似于windows下的快捷方式
127
+ 字母“b”的表示块设备文件(block),一般置于/dev目录下,设备文件是普通文件和程序访问硬件设备的入口,是很特殊的文件。没有文件大小,只有一个主设备号和一个辅设备号。一次传输数据为一整块的被称为块设备,如硬盘、光盘等。最小数据传输单位为一个数据块(通常一个数据块的大小为512字节)
128
+ 字母为“c”表示该文件是一个字符设备文件(character),一般置于/dev目录下,一次传输一个字节的设备被称为字符设备,如键盘、字符终端等,传输数据的最小单位为一个字节。
129
+ 字母为“p”表示该文件为命令管道文件。与shell编程有关的文件。
130
+ 字母“s”表示该文件为sock文件。与shell编程有关的文件。
131
+ """
132
+ dicts["file_info"] = item
133
+ if count == 2:
134
+ # 文件硬链接数 如果是一个目录,则第2字段表示该目录所含子目录的个数。
135
+ dicts["file_hardlink_num"] = item
136
+ if count == 3:
137
+ dicts["user"] = item
138
+ if count == 4:
139
+ dicts["group"] = item
140
+
141
+ if count == 5:
142
+ # 文件所占用的空间(以字节为单位)
143
+ dicts["size"] = item
144
+
145
+ if count in (6, 7, 8):
146
+ # 件(目录)最近访问(修改)时间
147
+ dicts["time"].append(item)
148
+ if count == 8:
149
+ count = count + 1
150
+ else:
151
+ dicts["filename"].append(item)
152
+
153
+ dicts["filename"] = " ".join(dicts["filename"])
154
+ dicts["time"] = " ".join(dicts["time"])
155
+ return dicts
156
+
157
+ def get_file_info_MLSD(self, file_str):
158
+ """
159
+ MLSD 模式下转换为字典
160
+ :param file_str:
161
+ :return:
162
+ """
163
+ dicts = {}
164
+ lists = file_str.split(";")
165
+ dicts["size"] = lists[0].replace("Size=", "")
166
+ dicts["time"] = lists[1].replace("Modify=", "")
167
+ dicts["type"] = lists[2].replace("Type=", "")
168
+ dicts["filename"] = lists[3][1:]
169
+ return dicts
170
+
171
+ def down_file(self, ftp_path, local_path, blocksize=1024):
172
+ """
173
+ 下载一个文件
174
+ :return:
175
+ """
176
+ local_file_size = 0
177
+ if os.path.exists(local_path):
178
+ local_file_size = os.path.getsize(local_path)
179
+ try:
180
+ ftp_file_size = self.size(ftp_path)
181
+ # TYPE I表示以二进制模式传输
182
+ self.voidcmd('TYPE I')
183
+ # 下载ftp 目标文件 local_file_size 需要跳过的size
184
+ conn = self.ftp.transfercmd('RETR ' + ftp_path, local_file_size)
185
+ with open(local_path, 'ab+') as file:
186
+ if local_file_size == 0 or local_file_size > ftp_file_size:
187
+ file.truncate()
188
+ recv_num = 0
189
+ # 次数
190
+ times = 0
191
+ while True:
192
+ data = conn.recv(blocksize)
193
+ times = times + 1
194
+ recv_num = recv_num + len(data)
195
+ if times >= 1000:
196
+ print("P/T {}:{} Time {}".format(os.getpid(), threading.get_ident(),
197
+ time.strftime('%Y-%m-%d %H:%M:%S',
198
+ time.localtime(time.time()))), recv_num)
199
+ times = 0
200
+ recv_num = 0
201
+ if not data:
202
+ break
203
+ file.write(data)
204
+ # 此命令不产生什么实际动作,它仅使服务器返回OK。
205
+ result = self.voidcmd('NOOP')
206
+ print(result)
207
+ # 期待以“2”开头的回复
208
+ result = self.ftp.voidresp()
209
+ print(result)
210
+ return True
211
+ except:
212
+ self.ftp.quit()
213
+ traceback.print_exc()
214
+ return False
@@ -0,0 +1,60 @@
1
+ import gzip
2
+
3
+ from re_common.baselibrary.utils.basefile import BaseFile
4
+
5
+
6
+ class BaseGzip(object):
7
+
8
+ def __init__(self, bufsize, fin=None, fout=None):
9
+ self.bufsize = bufsize
10
+ self.fin = fin
11
+ self.fout = fout
12
+
13
+ def compress(self, src, dst):
14
+ """
15
+ 压缩文件
16
+ :param src:
17
+ :param dst:
18
+ :return:
19
+ """
20
+ self.fin = open(src, 'rb')
21
+ self.fout = gzip.open(dst, 'wb')
22
+
23
+ self.__in2out()
24
+
25
+ def decompress(self, gzFile, dst):
26
+ """
27
+ 解压文件
28
+ :param gzFile:
29
+ :param dst:
30
+ :return:
31
+ """
32
+ self.fin = gzip.open(gzFile, 'rb')
33
+ self.fout = open(dst, 'wb')
34
+
35
+ self.__in2out()
36
+
37
+ def __in2out(self):
38
+ while True:
39
+ buf = self.fin.read(self.bufsize)
40
+ if len(buf) < 1:
41
+ break
42
+ self.fout.write(buf)
43
+
44
+ self.fin.close()
45
+ self.fout.close()
46
+
47
+ def read_gz_file(self, file):
48
+ with gzip.open(file, 'r') as f:
49
+ for lineb in f:
50
+ line = lineb.decode()
51
+ yield line.strip()
52
+
53
+ @classmethod
54
+ def get_gz_line_num(self, file):
55
+ i = 0
56
+ if BaseFile.is_file_exists(file):
57
+ with gzip.open(file, 'rb') as f:
58
+ for i, l in enumerate(f):
59
+ pass
60
+ return i
@@ -0,0 +1,135 @@
1
+ import os
2
+ import sys
3
+ import time
4
+
5
+ from pyhdfs import HdfsClient
6
+
7
+
8
+ class BaseHDFS(object):
9
+
10
+ def __init__(self):
11
+ self.hdfsdir = ""
12
+ self.localdir = ""
13
+ self.namenode = ""
14
+ self.user_name = ""
15
+ self.total = 0
16
+ self.client = None
17
+ self.FileSize = 0
18
+ self.FailedList = list() # 失败文件列表
19
+ self.StartTime = time.time()
20
+
21
+ def get_client(self):
22
+ self.client = HdfsClient(hosts=self.namenode, user_name=self.user_name)
23
+
24
+ def mk_hdfs_dirs(self, path):
25
+ """
26
+ 创建目录 目录存在创建不会报错 目录里文件不会被删除
27
+
28
+ :return:
29
+ """
30
+
31
+ return self.client.mkdirs(path)
32
+
33
+ def exists(self, path):
34
+ """
35
+ 判断路径是否存在
36
+ :return:
37
+ """
38
+ return self.client.exists(path)
39
+
40
+ def get_all_files_num(self):
41
+ """
42
+ 获取目录下所有文件数量
43
+ :return:
44
+ """
45
+ assert isinstance(self.client, HdfsClient)
46
+ total = 0
47
+ # 先遍历一遍,得到总文件个数
48
+ for parent, dirnames, filenames in self.client.walk(self.hdfsdir):
49
+ for filename in filenames:
50
+ total += 1
51
+ self.total = total
52
+ return total
53
+
54
+ def get_all_files(self):
55
+ assert isinstance(self.client, HdfsClient)
56
+ processed = 0
57
+ for parent, dirnames, filenames in self.client.walk(self.hdfsdir):
58
+ for filename in filenames:
59
+ srcFile = '%s/%s' % (parent, filename)
60
+ relPath = srcFile[len(self.hdfsdir) + 1:].replace('/', '\\') # 相对于根目录的路径
61
+ dstFile = os.path.join(self.localdir, relPath)
62
+ if not self.down_proc_one(srcFile, dstFile):
63
+ self.FailedList.append(srcFile)
64
+ processed += 1
65
+ print('%s:' % self.hdfsdir)
66
+ print('%d/%d/%d, time cost: %.2f s' % (
67
+ self.total, processed, len(self.FailedList), time.time() - self.StartTime))
68
+ print('%d B, %.2f MB/s \n' % (
69
+ self.FileSize, self.FileSize / 1024 / 1024 / (time.time() - self.StartTime)))
70
+
71
+ def down_proc_one(self, srcFile, dstFile):
72
+ print('ProcOne \n%s\n -> \n%s ' % (srcFile, dstFile))
73
+ dstDir = os.path.dirname(dstFile)
74
+ if not os.path.exists(dstDir):
75
+ os.makedirs(dstDir)
76
+
77
+ # 目标文件已经存在且大小相同
78
+ if os.path.exists(dstFile) and \
79
+ (os.path.getsize(dstFile) == self.client.list_status(srcFile)[0].length):
80
+ print('file exists: %s ' % dstFile)
81
+ return True
82
+
83
+ # 注意,如果已存在会被覆盖
84
+ self.client.copy_to_local(srcFile, dstFile, overwrite=True)
85
+
86
+ if os.path.getsize(dstFile) != self.client.list_status(srcFile)[0].length: # 校验文件大小
87
+ return False
88
+
89
+ self.FileSize += os.path.getsize(dstFile)
90
+ return True
91
+
92
+ # 处理一个
93
+ def up_proc_one(self, client, srcFile, dstFile):
94
+ print('ProcOne \n%s\n -> \n%s ' % (srcFile, dstFile))
95
+
96
+ # 目标文件已经存在且大小相同
97
+ if client.exists(dstFile) and \
98
+ (os.path.getsize(srcFile) == client.list_status(dstFile)[0].length):
99
+ print('file exists: %s ' % dstFile)
100
+ return True
101
+
102
+ # 注意,如果已存在会被覆盖
103
+ client.copy_from_local(srcFile, dstFile, overwrite=True)
104
+
105
+ if os.path.getsize(srcFile) == client.list_status(dstFile)[0].length: # 校验文件大小
106
+ self.FileSize += os.path.getsize(srcFile)
107
+ return True
108
+
109
+ return False
110
+
111
+ # 处理所有
112
+ def up_all_files(self):
113
+ assert isinstance(self.client, HdfsClient)
114
+ if not self.client.exists(self.hdfsdir):
115
+ print(self.hdfsdir + ' not found')
116
+ sys.exit(-1)
117
+ total = len(os.listdir(self.localdir))
118
+ processed = 0
119
+ for filename in os.listdir(self.localdir):
120
+ srcFile = os.path.join(self.localdir, filename)
121
+ dstFile = self.hdfsdir + '/' + filename
122
+ if not self.up_proc_one(self.client, srcFile, dstFile):
123
+ self.FailedList.append(srcFile)
124
+ processed += 1
125
+ print(
126
+ '%d/%d/%d, time cost: %.2f s' % (total, processed, len(self.FailedList), time.time() - self.StartTime))
127
+ print('%d B, %.2f MB/s \n' % (self.FileSize, self.FileSize / 1024 / 1024 / (time.time() - self.StartTime)))
128
+
129
+ if self.FailedList:
130
+ print('failedList: %s' % repr(self.FailedList))
131
+ else:
132
+ print('Good! No Error!')
133
+ print('%d B, %.2f MB, %.2f GB, %.2f MB/s' % \
134
+ (self.FileSize, self.FileSize / 1024 / 1024, self.FileSize / 1024 / 1024 / 1024,
135
+ self.FileSize / 1024 / 1024 / (time.time() - self.StartTime)))
@@ -0,0 +1,268 @@
1
+ """
2
+ basehttpx
3
+ """
4
+ import traceback
5
+
6
+ import httpx
7
+ import urllib3
8
+ from httpx import Timeout
9
+ from httpx._config import UNSET
10
+
11
+ from re_common.baselibrary.utils.core.mlamada import closeResult
12
+ from re_common.baselibrary.utils.core.requests_core import USER_AGENT, set_proxy_httpx
13
+
14
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
15
+
16
+
17
+ class BaseHttpx(object):
18
+ def __init__(self, logger=None):
19
+ if logger == None:
20
+ from re_common.baselibrary import MLogger
21
+ logger = MLogger().streamlogger
22
+ self.logger = logger
23
+ self.proxies = None
24
+ self.headers = {}
25
+ self.sn = None
26
+ self.as_sn = None
27
+
28
+ def httpx_get(self, url,
29
+ *,
30
+ params=None,
31
+ headers=None,
32
+ cookies=None,
33
+ auth=None,
34
+ allow_redirects=True,
35
+ cert=None,
36
+ verify=True,
37
+ timeout=Timeout(timeout=5.0),
38
+ trust_env=True):
39
+ """
40
+ 基础httpx使用教程
41
+ params = {'key1': 'value1', 'key2': 'value2'}
42
+ r = httpx.get('https://httpbin.org/get', params=params)
43
+ r.url
44
+ params = {'key1': 'value1', 'key2': ['value2', 'value3']}
45
+ r.status_code
46
+ r.headers['content-type']
47
+ r.encoding
48
+ r.encoding = 'ISO-8859-1'
49
+ r.content
50
+ r.text
51
+ r.json()
52
+ /////////////////
53
+ from PIL import Image
54
+ from io import BytesIO
55
+ i = Image.open(BytesIO(r.content))
56
+
57
+ :param url:
58
+ :return: r
59
+ """
60
+ return httpx.get(url, params=params, headers=headers, cookies=cookies,
61
+ auth=auth, follow_redirects=allow_redirects, cert=cert,
62
+ verify=verify, timeout=timeout, trust_env=trust_env)
63
+
64
+ def httpx_post(self, url, *,
65
+ data=None,
66
+ files=None,
67
+ json=None,
68
+ params=None,
69
+ headers=None,
70
+ cookies=None,
71
+ auth=None,
72
+ allow_redirects=True,
73
+ cert=None,
74
+ verify=True,
75
+ timeout=Timeout(timeout=5.0),
76
+ trust_env=True):
77
+ """
78
+ 基础的post请求
79
+ >>> r = httpx.put('https://httpbin.org/put', data={'key': 'value'})
80
+ >>> r = httpx.delete('https://httpbin.org/delete')
81
+ >>> r = httpx.head('https://httpbin.org/get')
82
+ >>> r = httpx.options('https://httpbin.org/get')
83
+ data={'key': 'value'}
84
+ :param url:
85
+ :param data:
86
+ :return:
87
+ """
88
+ r = httpx.post(url, data=data, json=json, files=files,
89
+ params=params, headers=headers, cookies=cookies,
90
+ auth=auth, follow_redirects=allow_redirects, cert=cert,
91
+ verify=verify, timeout=timeout, trust_env=trust_env)
92
+ return r
93
+
94
+ def creat_sn(self, proxy=None, headers=None, verify=False, **kwargs):
95
+ """
96
+ 创建httpx会话对象
97
+ :param proxy:
98
+ :param headers:
99
+ :param verify:
100
+ :param kwargs:
101
+ :return:
102
+ """
103
+ if proxy:
104
+ kwargs["proxies"] = set_proxy_httpx(proxy)
105
+ self.proxies = kwargs["proxies"]
106
+ if headers:
107
+ kwargs["headers"] = headers
108
+ if headers == "default":
109
+ kwargs["headers"]['User-Agent'] = USER_AGENT
110
+ kwargs["verify"] = verify
111
+ sn = httpx.Client(**kwargs)
112
+ self.sn = sn
113
+ return sn
114
+
115
+ def sn_close(self):
116
+ self.sn.close()
117
+
118
+ async def create_async_client(self, **kwargs):
119
+ """
120
+ 创建异步会话对象
121
+ :param kwargs:
122
+ :return:
123
+ """
124
+ sn = httpx.AsyncClient(**kwargs)
125
+ self.as_sn = sn
126
+ return sn
127
+
128
+ async def as_sn_close(self):
129
+ """
130
+ 关闭异步sn
131
+ """
132
+ await self.as_sn.close()
133
+
134
+ def base_sn_httpx(self, url, sn, endstring="", marks=[], **kwargs):
135
+
136
+ r = None
137
+ exMsg = None
138
+ try:
139
+ r = sn.get(url=url, **kwargs)
140
+ except:
141
+ exMsg = '* ' + traceback.format_exc()
142
+ self.logger.error(exMsg)
143
+ finally:
144
+ closeResult(r)
145
+
146
+ if exMsg:
147
+ self.logger.info("判断到except,请求出项错误{}".format(exMsg))
148
+ return False, "httpx", r
149
+
150
+ if r.status_code != 200:
151
+ self.logger.warning('r.status_code:' + str(r.status_code))
152
+ return False, "code", r
153
+
154
+ if endstring:
155
+ """
156
+ 请求有可能是html或者json等,如果有需要判断html结束的才启动这个选项
157
+ """
158
+ html = r.text.strip()
159
+ if not html.endswith(endstring):
160
+ self.logger.info("not endswith {}".format(endstring))
161
+ return False, "endString", r
162
+
163
+ if marks:
164
+ """
165
+ 验证请求是否成功 通过一个特征字符串或者html的标签来查找 保证下载的页面是我需要的页面
166
+ 而不是错误页面
167
+ 特征值有可能没有是网页出现问题 有可能是请求不完全 这个依照情况而定
168
+ """
169
+ html = r.text.strip()
170
+ for mark in marks:
171
+ if html.find(mark) == -1:
172
+ self.logger.info('not found {}'.format(mark))
173
+ return False, "Feature err", r
174
+ else:
175
+ self.logger.info("found mark is {}".format(mark))
176
+
177
+ return True, "", r
178
+
179
+ def base_sn_post_httpx(self, url, sn, data=None, endstring="", marks=[], **kwargs):
180
+ r = None
181
+ exMsg = None
182
+ try:
183
+ r = sn.post(url=url, data=data, **kwargs)
184
+ except:
185
+ exMsg = '* ' + traceback.format_exc()
186
+ self.logger.error(exMsg)
187
+ finally:
188
+ closeResult(r)
189
+
190
+ if exMsg:
191
+ self.logger.info("判断到except,请求出项错误{}".format(exMsg))
192
+ return False, "httpx", r
193
+
194
+ if r.status_code != 200:
195
+ self.logger.warning('r.status_code:' + str(r.status_code))
196
+ return False, "code", r
197
+
198
+ if endstring:
199
+ """
200
+ 请求有可能是html或者json等,如果有需要判断html结束的才启动这个选项
201
+ """
202
+ html = r.text.strip()
203
+ if not html.endswith(endstring):
204
+ self.logger.info("not endswith {}".format(endstring))
205
+ return False, "endString", r
206
+
207
+ if marks:
208
+ """
209
+ 验证请求是否成功 通过一个特征字符串或者html的标签来查找 保证下载的页面是我需要的页面
210
+ 而不是错误页面
211
+ 特征值有可能没有是网页出现问题 有可能是请求不完全 这个依照情况而定
212
+ """
213
+ html = r.text.strip()
214
+ for mark in marks:
215
+ if html.find(mark) == -1:
216
+ self.logger.info('not found {}'.format(mark))
217
+ return False, "Feature err", r
218
+ else:
219
+ self.logger.info("found mark is {}".format(mark))
220
+
221
+ return True, "", r
222
+
223
+ async def httpx_asyncclient(self, url, params=None,
224
+ headers=None,
225
+ cookies=None,
226
+ auth=None,
227
+ allow_redirects=True,
228
+ timeout=UNSET):
229
+ """
230
+ Python 3.8+ with
231
+ :param url:
232
+ :return:
233
+ """
234
+ async with httpx.AsyncClient() as client:
235
+ r = await client.get(url, params=params, headers=headers,
236
+ cookies=cookies, auth=auth, follow_redirects=allow_redirects,
237
+ timeout=timeout)
238
+ return r
239
+
240
+ async def httpx_asyncclient_post(self, url, *,
241
+ data=None,
242
+ files=None,
243
+ json=None,
244
+ params=None,
245
+ headers=None,
246
+ cookies=None,
247
+ auth=None,
248
+ allow_redirects=True,
249
+ timeout=UNSET):
250
+ async with httpx.AsyncClient() as client:
251
+ r = await client.post(url, data=data, files=files, json=json,
252
+ params=params, headers=headers, cookies=cookies,
253
+ auth=auth, follow_redirects=allow_redirects,
254
+ timeout=timeout)
255
+ return r
256
+
257
+ async def check_http2(self, url, **kwargs):
258
+ """
259
+ 检查http2的支持
260
+ :return:
261
+ """
262
+ client = httpx.AsyncClient(http2=True)
263
+ response = await client.get(url, **kwargs)
264
+ http_version = response.http_version # "HTTP/1.0", "HTTP/1.1", or "HTTP/2".
265
+ if http_version == "HTTP/2":
266
+ return True, http_version
267
+ else:
268
+ return False, http_version