re-common 10.0.21__py3-none-any.whl → 10.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/decorators/utils.py +59 -59
  145. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  146. re_common/v2/baselibrary/tools/WeChatRobot.py +79 -79
  147. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  148. re_common/v2/baselibrary/tools/dict_tools.py +37 -37
  149. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  150. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  151. re_common/v2/baselibrary/tools/list_tools.py +65 -65
  152. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  153. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  154. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  155. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  156. re_common/v2/baselibrary/utils/author_smi.py +360 -360
  157. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  158. re_common/v2/baselibrary/utils/basedict.py +37 -37
  159. re_common/v2/baselibrary/utils/basehdfs.py +161 -161
  160. re_common/v2/baselibrary/utils/basepika.py +180 -180
  161. re_common/v2/baselibrary/utils/basetime.py +77 -77
  162. re_common/v2/baselibrary/utils/db.py +38 -38
  163. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  164. re_common/v2/baselibrary/utils/mq.py +83 -83
  165. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  166. re_common/v2/baselibrary/utils/string_bool.py +149 -149
  167. re_common/v2/baselibrary/utils/string_clear.py +204 -202
  168. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  169. re_common/v2/baselibrary/utils/stringutils.py +213 -213
  170. re_common/vip/base_step_process.py +11 -11
  171. re_common/vip/baseencodeid.py +90 -90
  172. re_common/vip/changetaskname.py +28 -28
  173. re_common/vip/core_var.py +24 -24
  174. re_common/vip/mmh3Hash.py +89 -89
  175. re_common/vip/proxy/allproxys.py +127 -127
  176. re_common/vip/proxy/allproxys_thread.py +159 -159
  177. re_common/vip/proxy/cnki_proxy.py +153 -153
  178. re_common/vip/proxy/kuaidaili.py +87 -87
  179. re_common/vip/proxy/proxy_all.py +113 -113
  180. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  181. re_common/vip/proxy/wanfang_proxy.py +152 -152
  182. re_common/vip/proxy/wp_proxy_all.py +181 -181
  183. re_common/vip/read_rawid_to_txt.py +91 -91
  184. re_common/vip/title/__init__.py +5 -5
  185. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  186. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  187. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  188. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  189. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  190. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  191. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  192. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  193. re_common/vip/title/transform/__init__.py +10 -10
  194. {re_common-10.0.21.dist-info → re_common-10.0.22.dist-info}/LICENSE +201 -201
  195. {re_common-10.0.21.dist-info → re_common-10.0.22.dist-info}/METADATA +16 -16
  196. re_common-10.0.22.dist-info/RECORD +227 -0
  197. {re_common-10.0.21.dist-info → re_common-10.0.22.dist-info}/WHEEL +1 -1
  198. re_common-10.0.21.dist-info/RECORD +0 -227
  199. {re_common-10.0.21.dist-info → re_common-10.0.22.dist-info}/top_level.txt +0 -0
@@ -1,486 +1,486 @@
1
- import os
2
- import threading
3
- import time
4
- from abc import ABC, abstractmethod
5
-
6
- from re_common.baselibrary.mthread.mythreading import ThreadPoolManger, ThreadInfo, ThreadVal
7
- from re_common.baselibrary.utils.ringlist import RingList
8
- from re_common.facade.loggerfacade import get_streamlogger
9
-
10
-
11
- class especialThreadName(object):
12
-
13
- def __init__(self):
14
- self.taskthreadname = "my_threading_taskthread_1"
15
- self.proxythreadname = "my_threading_proxythread_1"
16
- self.dealresultthreadname = "my_threading_dealresult_1"
17
-
18
- def list_name(self):
19
- return [self.taskthreadname, self.proxythreadname, self.dealresultthreadname]
20
-
21
-
22
- class MThreadingRun(ABC):
23
- def __init__(self, num, logger=None):
24
- if logger:
25
- self.logger = logger
26
- else:
27
- self.logger = get_streamlogger()
28
- self.etn = especialThreadName()
29
- # 线程数
30
- self.threadingnum = num
31
- # 代理列表
32
- self.list_proxy = RingList()
33
- # 线程池
34
- self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
35
- self.thread_pool.set_callback(self.thread_pool_hook)
36
- # 结果集
37
- self.results = []
38
- # 线程方法
39
- self.func = self.fun
40
- # 结果数
41
- self.resultnum = 0
42
- # 工作线程数
43
- self.jobnum = 0
44
- # 结果集被处理标志 默认被处理 是为了兼容之前的代码不去改动
45
- self.dealresultstatus = True
46
- # 結果到达该数量后处理结果 默认及时处理
47
- self.dealresultsnum = 0
48
- # 代理设置时间
49
- self.proxytime = 0
50
- # 标识event set之后是否对任务进行了设置
51
- # self.eventbool = False
52
- self.modle = 1
53
- # 在任务和处理结果时event信号的状态
54
- self.result_event_status = True
55
- self.task_event_status = True
56
- self.thread_run_lock = threading.Lock()
57
-
58
- # 默认每次处理的结果数
59
- self.once_result_num = 100
60
-
61
- # 全局使用特殊的单词
62
- self.BREAK = "break"
63
-
64
- # 进程号
65
- if hasattr(os, 'getpid'):
66
- self.pid = os.getpid()
67
- else:
68
- self.pid = None
69
-
70
- def set_is_restart(self, name, is_restart):
71
- self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
72
-
73
- @abstractmethod
74
- def setProxy(self, proxysList=None):
75
- """
76
- 将代理加入到循环队列中 self.list_proxy
77
- :param proxysList:
78
- :return:
79
- """
80
- pass
81
-
82
- @abstractmethod
83
- def fun(self, threadval, *args, **kwargs):
84
- pass
85
-
86
- @abstractmethod
87
- def thread_pool_hook(self, threadinfo: ThreadInfo) -> dict:
88
- """
89
- 钩子函数 可以被重写
90
- 主要重写里面的dicts部分
91
- :return:
92
- """
93
- return {}
94
-
95
- @abstractmethod
96
- def setTask(self, results=None, *args, **kwargs):
97
- # self.thread_pool.event.set() # 自动释放信号
98
- # 当设置任务和处理结果使用同一个连接时尽量使用event信号保证不同时执行sql在一个执行未返回时
99
- # 请查询较大数据时主动释放
100
- pass
101
-
102
- @abstractmethod
103
- def getTask(self, *args, **kwargs):
104
- """
105
- 主要用于mysql的请求
106
- :param args:
107
- :param kwargs:
108
- :return:
109
- """
110
- pass
111
-
112
- @abstractmethod
113
- def dealresult(self, *args, **kwargs):
114
- """
115
- 如果没有在这里处理 请将 self.dealresultstatus = False这样不会丢数据
116
- :param args:
117
- :param kwargs:
118
- :return:
119
- """
120
- pass
121
-
122
- def set_task(self, *args, **kwargs):
123
- """
124
- 设置任务,在没有event限制的情况下将两个步骤写在一起
125
- :param args:
126
- :param kwargs:
127
- :return:
128
- """
129
- while True:
130
- results = self.getTask(*args, **kwargs)
131
- result = self.setTask(results, *args, **kwargs)
132
- if result == self.BREAK:
133
- break
134
-
135
- def deal_results(self, *args, **kwargs):
136
- """
137
- 该函数用于没有信号的机制里
138
- 使用信号主要是因为多线程无法同时使用一个链接
139
- 有可能出现错误
140
- :param args:
141
- :param kwargs:
142
- :return:
143
- """
144
- while True:
145
- # 处理结果标识
146
- self.dealresultstatus = True
147
- # 从结果队列获取结果到results
148
- self.getreustlFromQueue()
149
- if len(
150
- self.results) > self.dealresultsnum or self.thread_pool.work_queue.is_empty() or not self.thread_pool.thread_queue:
151
- if len(self.results) > 0 or not self.thread_pool.thread_queue.is_empty():
152
- # 处理结果
153
- self.dealresult()
154
- if self.dealresultstatus:
155
- # 处理完结果后要清理
156
- self.results.clear()
157
- else:
158
- time.sleep(10)
159
- else:
160
- time.sleep(3)
161
-
162
- def setfunc(self, func):
163
- # 设置线程方法
164
- self.func = func
165
-
166
- def add_job(self, func, *args, **kwargs):
167
- self.jobnum += 1
168
- self.thread_pool.add_job(func, *args, **kwargs)
169
-
170
- def getreustlFromQueue(self):
171
- """
172
- 从结果队列获取结果到results
173
- 默认取100
174
- :return:
175
- """
176
- once_result_num = 0
177
- while not self.thread_pool.result_queue.is_empty():
178
- self.resultnum += 1
179
- once_result_num += 1
180
- if once_result_num > self.once_result_num:
181
- return
182
- result = self.thread_pool.result_queue.get()
183
- self.results.append(result)
184
- self.thread_pool.result_queue.task_done()
185
-
186
- def checkResultsfininsh(self, *args, **kwargs):
187
- """
188
- 该函数用于处理运行到最后时结果不足100的情况
189
- :return:
190
- """
191
- if self.thread_pool.work_queue.is_empty():
192
- t1 = len(self.results)
193
- self.logger.info("self.results len is %s " % str(t1))
194
- if t1 != 0:
195
- return False
196
- else:
197
- return True
198
-
199
- def other(self):
200
- self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
201
- self.thread_pool.result_queue.get_size()))
202
- # 通过工作队列和结果队列观察是否结束
203
- if self.thread_pool.checkThreadRunFinish():
204
- self.logger.info("初次判断任务已经结束,各个队列为空")
205
- return True
206
- else:
207
- return False
208
-
209
- def check_especial_thread(self):
210
- task = self.set_task
211
- proxy = self.setProxy
212
- result = self.deal_results
213
-
214
- nowThreadsName = self.thread_pool.get_now_thread()
215
- for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
216
- thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
217
- # 如果线程字典为空 代表已被删除
218
- if name in nowThreadsName and thread.is_alive():
219
- # print(name + ": is run")
220
- pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
221
- else:
222
- self.logger.info("name is :" + name + "; 没有在线程中")
223
- if name in self.etn.list_name():
224
- if name == self.etn.taskthreadname:
225
- taskin = task
226
- elif name == self.etn.proxythreadname:
227
- taskin = proxy
228
- elif name == self.etn.dealresultthreadname:
229
- taskin = result
230
- else:
231
- raise Exception("没有对应的任务,请检查")
232
- is_start = False
233
- if name in self.thread_pool.especial_thread_pool_dicts:
234
- threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
235
- if threadinfo.get_thread().is_alive():
236
- is_start = True
237
- if not threadinfo.get_is_restart():
238
- is_start = True
239
- if not is_start:
240
- self.logger.info('重启中:' + name)
241
- args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
242
- kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
243
- thread = self.thread_pool.set_add_especial_thread(taskin, name, *args, **kwargs)
244
- thread.start()
245
-
246
- def start_especial_thread(self):
247
- # 开启一个线程设置任务
248
- self.thread_pool.set_add_especial_thread(self.set_task, self.etn.taskthreadname)
249
- self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname)
250
- self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname)
251
- self.thread_pool.especial_start()
252
-
253
- def is_break(self):
254
- return False
255
-
256
- def run(self):
257
- self.start_especial_thread()
258
- while True:
259
- time.sleep(3)
260
- self.thread_pool.checkThread()
261
- self.check_especial_thread()
262
- if self.other():
263
- if not self.checkResultsfininsh():
264
- continue
265
- else:
266
- print("进入other 判断 再次确认finish")
267
- if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty() \
268
- and len(self.results) == 0:
269
- print("运行完毕")
270
- if self.is_break():
271
- print("10 s break")
272
- time.sleep(10)
273
- break
274
- else:
275
- time.sleep(60)
276
-
277
-
278
- class MThreadingRun2(ABC):
279
- """
280
- 多线程的封装
281
- """
282
-
283
- def __init__(self, num, logger=None):
284
- # 日志模块,如果无会自定义一个只打印不输入到文件的logger
285
- if logger:
286
- self.logger = logger
287
- else:
288
- self.logger = get_streamlogger()
289
- # 特殊线程的类,对名称进行了统一
290
- self.etn = especialThreadName()
291
- # 工作线程数,不包含特殊线程
292
- self.threadingnum = num
293
- # 代理列表 RingList是一个环形列表
294
- self.list_proxy = RingList()
295
- # 工作线程池
296
- self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
297
- # 钩子函数,将thread_pool的钩子函数暴露到这一层,方便使用
298
- self.thread_pool.set_callback(self.thread_pool_hook)
299
- # 线程方法
300
- self.func = self.fun
301
- # 工作量的累计,每一次addjob都会累计一个数
302
- self.jobnum = 0
303
- # 代理设置时间
304
- # self.proxytime = 0
305
-
306
- # self.modle = 1
307
- # 线程锁,外部使用
308
- self.thread_run_lock = threading.Lock()
309
-
310
- # 全局使用特殊的单词
311
- self.BREAK = "break"
312
-
313
- # 进程号
314
- if hasattr(os, 'getpid'):
315
- self.pid = os.getpid()
316
- else:
317
- self.pid = None
318
-
319
- def set_is_restart(self, name, is_restart):
320
- """
321
- 特殊线程的重启设置,默认都会重启,设置False不允许重启
322
- :param name:
323
- :param is_restart:
324
- :return:
325
- """
326
- self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
327
-
328
- @abstractmethod
329
- def setProxy(self, threadval: ThreadVal, proxysList=None):
330
- """
331
- 将代理加入到循环队列中 self.list_proxy,或者自己管理代理
332
- :param proxysList:
333
- :return:
334
- """
335
- pass
336
-
337
- @abstractmethod
338
- def fun(self, threadval, *args, **kwargs):
339
- """
340
- 运行的方法
341
- :param threadval:
342
- :param args:
343
- :param kwargs:
344
- :return:
345
- """
346
- pass
347
-
348
- @abstractmethod
349
- def thread_pool_hook(self, threadinfo: ThreadInfo) -> dict:
350
- """
351
- 钩子函数 可以被重写
352
- 主要重写里面的ThreadInfo的信息
353
- :return:
354
- """
355
- return {}
356
-
357
- @abstractmethod
358
- def set_task(self, threadval: ThreadVal, *args, **kwargs):
359
- """
360
- 设置任务,这里调用addjob
361
- :param args:
362
- :param kwargs:
363
- :return:
364
- """
365
- pass
366
-
367
- @abstractmethod
368
- def deal_results(self, threadval: ThreadVal, *args, **kwargs):
369
- """
370
- 处理结果
371
- result_queue = self.thread_pool.result_queue
372
- :param args:
373
- :param kwargs:
374
- :return:
375
- """
376
- pass
377
-
378
- def setfunc(self, func):
379
- # 设置线程方法,默认是self.fun
380
- self.func = func
381
-
382
- def add_job(self, func, *args, **kwargs):
383
- """
384
- :param func: 运行的方法,一般使用self.func
385
- :param args: 元组参数
386
- :param kwargs: 键值对参数
387
- """
388
- self.jobnum += 1
389
- self.thread_pool.add_job(func, *args, **kwargs)
390
-
391
- def other(self):
392
- """
393
- 其他的一些判断
394
- :return:
395
- """
396
- self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
397
- self.thread_pool.result_queue.get_size()))
398
- # 通过工作队列和结果队列观察是否结束
399
- if self.thread_pool.checkThreadRunFinish():
400
- self.logger.info("初次判断任务已经结束,各个队列为空")
401
- return True
402
- else:
403
- return False
404
-
405
- def check_especial_thread(self):
406
- """
407
- 检查特殊线程是否挂掉
408
- :return:
409
- """
410
- task = self.set_task
411
- proxy = self.setProxy
412
- result = self.deal_results
413
-
414
- nowThreadsName = self.thread_pool.get_now_thread()
415
- for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
416
- thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
417
- # 如果线程字典为空 代表已被删除
418
- if name in nowThreadsName and thread.is_alive():
419
- # print(name + ": is run")
420
- pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
421
- else:
422
- self.logger.info("name is :" + name + "; 没有在线程中")
423
- if name in self.etn.list_name():
424
- if name == self.etn.taskthreadname:
425
- taskin = task
426
- elif name == self.etn.proxythreadname:
427
- taskin = proxy
428
- elif name == self.etn.dealresultthreadname:
429
- taskin = result
430
- else:
431
- raise Exception("没有对应的任务,请检查")
432
- is_start = False
433
- if name in self.thread_pool.especial_thread_pool_dicts:
434
- threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
435
- if threadinfo.get_thread().is_alive():
436
- is_start = True
437
- if not threadinfo.get_is_restart():
438
- is_start = True
439
- if not is_start:
440
- self.logger.info('重启中:' + name)
441
- args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
442
- kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
443
- thread = self.thread_pool.set_add_especial_thread(taskin, name, mode="mysuper", *args, **kwargs)
444
- thread.start()
445
-
446
- def start_especial_thread(self):
447
- # 开启特殊线程设置任务
448
- self.thread_pool.set_add_especial_thread(self.set_task, self.etn.taskthreadname, mode="mysuper")
449
- self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname, mode="mysuper")
450
- self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname, mode="mysuper")
451
- self.thread_pool.especial_start()
452
-
453
- def is_break(self):
454
- """
455
- 运行完毕是否退出多线程
456
- :return:
457
- """
458
- return False
459
-
460
- # 在主线程添加的钩子函数,每次循环都会调用到
461
- def run_hook_everytime(self):
462
- time.sleep(3)
463
-
464
- # 运行完毕的钩子函数
465
- def run_hook_result(self):
466
- pass
467
-
468
- def run(self):
469
- """
470
- 主进程代码,线程设置为主进程结束所有线程都会结束
471
- 所以任务完成之前主进程不应该结束
472
- :return:
473
- """
474
- self.start_especial_thread()
475
- while True:
476
- self.run_hook_everytime()
477
- self.thread_pool.checkThread()
478
- self.check_especial_thread()
479
- if self.other():
480
- print("进入other 判断 再次确认finish")
481
- if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty():
482
- print("运行完毕")
483
- self.run_hook_result()
484
- if self.is_break():
485
- print("退出主循环")
486
- break
1
+ import os
2
+ import threading
3
+ import time
4
+ from abc import ABC, abstractmethod
5
+
6
+ from re_common.baselibrary.mthread.mythreading import ThreadPoolManger, ThreadInfo, ThreadVal
7
+ from re_common.baselibrary.utils.ringlist import RingList
8
+ from re_common.facade.loggerfacade import get_streamlogger
9
+
10
+
11
+ class especialThreadName(object):
12
+
13
+ def __init__(self):
14
+ self.taskthreadname = "my_threading_taskthread_1"
15
+ self.proxythreadname = "my_threading_proxythread_1"
16
+ self.dealresultthreadname = "my_threading_dealresult_1"
17
+
18
+ def list_name(self):
19
+ return [self.taskthreadname, self.proxythreadname, self.dealresultthreadname]
20
+
21
+
22
+ class MThreadingRun(ABC):
23
+ def __init__(self, num, logger=None):
24
+ if logger:
25
+ self.logger = logger
26
+ else:
27
+ self.logger = get_streamlogger()
28
+ self.etn = especialThreadName()
29
+ # 线程数
30
+ self.threadingnum = num
31
+ # 代理列表
32
+ self.list_proxy = RingList()
33
+ # 线程池
34
+ self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
35
+ self.thread_pool.set_callback(self.thread_pool_hook)
36
+ # 结果集
37
+ self.results = []
38
+ # 线程方法
39
+ self.func = self.fun
40
+ # 结果数
41
+ self.resultnum = 0
42
+ # 工作线程数
43
+ self.jobnum = 0
44
+ # 结果集被处理标志 默认被处理 是为了兼容之前的代码不去改动
45
+ self.dealresultstatus = True
46
+ # 結果到达该数量后处理结果 默认及时处理
47
+ self.dealresultsnum = 0
48
+ # 代理设置时间
49
+ self.proxytime = 0
50
+ # 标识event set之后是否对任务进行了设置
51
+ # self.eventbool = False
52
+ self.modle = 1
53
+ # 在任务和处理结果时event信号的状态
54
+ self.result_event_status = True
55
+ self.task_event_status = True
56
+ self.thread_run_lock = threading.Lock()
57
+
58
+ # 默认每次处理的结果数
59
+ self.once_result_num = 100
60
+
61
+ # 全局使用特殊的单词
62
+ self.BREAK = "break"
63
+
64
+ # 进程号
65
+ if hasattr(os, 'getpid'):
66
+ self.pid = os.getpid()
67
+ else:
68
+ self.pid = None
69
+
70
+ def set_is_restart(self, name, is_restart):
71
+ self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
72
+
73
+ @abstractmethod
74
+ def setProxy(self, proxysList=None):
75
+ """
76
+ 将代理加入到循环队列中 self.list_proxy
77
+ :param proxysList:
78
+ :return:
79
+ """
80
+ pass
81
+
82
+ @abstractmethod
83
+ def fun(self, threadval, *args, **kwargs):
84
+ pass
85
+
86
+ @abstractmethod
87
+ def thread_pool_hook(self, threadinfo: ThreadInfo) -> dict:
88
+ """
89
+ 钩子函数 可以被重写
90
+ 主要重写里面的dicts部分
91
+ :return:
92
+ """
93
+ return {}
94
+
95
+ @abstractmethod
96
+ def setTask(self, results=None, *args, **kwargs):
97
+ # self.thread_pool.event.set() # 自动释放信号
98
+ # 当设置任务和处理结果使用同一个连接时尽量使用event信号保证不同时执行sql在一个执行未返回时
99
+ # 请查询较大数据时主动释放
100
+ pass
101
+
102
+ @abstractmethod
103
+ def getTask(self, *args, **kwargs):
104
+ """
105
+ 主要用于mysql的请求
106
+ :param args:
107
+ :param kwargs:
108
+ :return:
109
+ """
110
+ pass
111
+
112
+ @abstractmethod
113
+ def dealresult(self, *args, **kwargs):
114
+ """
115
+ 如果没有在这里处理 请将 self.dealresultstatus = False这样不会丢数据
116
+ :param args:
117
+ :param kwargs:
118
+ :return:
119
+ """
120
+ pass
121
+
122
+ def set_task(self, *args, **kwargs):
123
+ """
124
+ 设置任务,在没有event限制的情况下将两个步骤写在一起
125
+ :param args:
126
+ :param kwargs:
127
+ :return:
128
+ """
129
+ while True:
130
+ results = self.getTask(*args, **kwargs)
131
+ result = self.setTask(results, *args, **kwargs)
132
+ if result == self.BREAK:
133
+ break
134
+
135
+ def deal_results(self, *args, **kwargs):
136
+ """
137
+ 该函数用于没有信号的机制里
138
+ 使用信号主要是因为多线程无法同时使用一个链接
139
+ 有可能出现错误
140
+ :param args:
141
+ :param kwargs:
142
+ :return:
143
+ """
144
+ while True:
145
+ # 处理结果标识
146
+ self.dealresultstatus = True
147
+ # 从结果队列获取结果到results
148
+ self.getreustlFromQueue()
149
+ if len(
150
+ self.results) > self.dealresultsnum or self.thread_pool.work_queue.is_empty() or not self.thread_pool.thread_queue:
151
+ if len(self.results) > 0 or not self.thread_pool.thread_queue.is_empty():
152
+ # 处理结果
153
+ self.dealresult()
154
+ if self.dealresultstatus:
155
+ # 处理完结果后要清理
156
+ self.results.clear()
157
+ else:
158
+ time.sleep(10)
159
+ else:
160
+ time.sleep(3)
161
+
162
+ def setfunc(self, func):
163
+ # 设置线程方法
164
+ self.func = func
165
+
166
+ def add_job(self, func, *args, **kwargs):
167
+ self.jobnum += 1
168
+ self.thread_pool.add_job(func, *args, **kwargs)
169
+
170
+ def getreustlFromQueue(self):
171
+ """
172
+ 从结果队列获取结果到results
173
+ 默认取100
174
+ :return:
175
+ """
176
+ once_result_num = 0
177
+ while not self.thread_pool.result_queue.is_empty():
178
+ self.resultnum += 1
179
+ once_result_num += 1
180
+ if once_result_num > self.once_result_num:
181
+ return
182
+ result = self.thread_pool.result_queue.get()
183
+ self.results.append(result)
184
+ self.thread_pool.result_queue.task_done()
185
+
186
+ def checkResultsfininsh(self, *args, **kwargs):
187
+ """
188
+ 该函数用于处理运行到最后时结果不足100的情况
189
+ :return:
190
+ """
191
+ if self.thread_pool.work_queue.is_empty():
192
+ t1 = len(self.results)
193
+ self.logger.info("self.results len is %s " % str(t1))
194
+ if t1 != 0:
195
+ return False
196
+ else:
197
+ return True
198
+
199
+ def other(self):
200
+ self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
201
+ self.thread_pool.result_queue.get_size()))
202
+ # 通过工作队列和结果队列观察是否结束
203
+ if self.thread_pool.checkThreadRunFinish():
204
+ self.logger.info("初次判断任务已经结束,各个队列为空")
205
+ return True
206
+ else:
207
+ return False
208
+
209
+ def check_especial_thread(self):
210
+ task = self.set_task
211
+ proxy = self.setProxy
212
+ result = self.deal_results
213
+
214
+ nowThreadsName = self.thread_pool.get_now_thread()
215
+ for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
216
+ thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
217
+ # 如果线程字典为空 代表已被删除
218
+ if name in nowThreadsName and thread.is_alive():
219
+ # print(name + ": is run")
220
+ pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
221
+ else:
222
+ self.logger.info("name is :" + name + "; 没有在线程中")
223
+ if name in self.etn.list_name():
224
+ if name == self.etn.taskthreadname:
225
+ taskin = task
226
+ elif name == self.etn.proxythreadname:
227
+ taskin = proxy
228
+ elif name == self.etn.dealresultthreadname:
229
+ taskin = result
230
+ else:
231
+ raise Exception("没有对应的任务,请检查")
232
+ is_start = False
233
+ if name in self.thread_pool.especial_thread_pool_dicts:
234
+ threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
235
+ if threadinfo.get_thread().is_alive():
236
+ is_start = True
237
+ if not threadinfo.get_is_restart():
238
+ is_start = True
239
+ if not is_start:
240
+ self.logger.info('重启中:' + name)
241
+ args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
242
+ kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
243
+ thread = self.thread_pool.set_add_especial_thread(taskin, name, *args, **kwargs)
244
+ thread.start()
245
+
246
+ def start_especial_thread(self):
247
+ # 开启一个线程设置任务
248
+ self.thread_pool.set_add_especial_thread(self.set_task, self.etn.taskthreadname)
249
+ self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname)
250
+ self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname)
251
+ self.thread_pool.especial_start()
252
+
253
+ def is_break(self):
254
+ return False
255
+
256
+ def run(self):
257
+ self.start_especial_thread()
258
+ while True:
259
+ time.sleep(3)
260
+ self.thread_pool.checkThread()
261
+ self.check_especial_thread()
262
+ if self.other():
263
+ if not self.checkResultsfininsh():
264
+ continue
265
+ else:
266
+ print("进入other 判断 再次确认finish")
267
+ if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty() \
268
+ and len(self.results) == 0:
269
+ print("运行完毕")
270
+ if self.is_break():
271
+ print("10 s break")
272
+ time.sleep(10)
273
+ break
274
+ else:
275
+ time.sleep(60)
276
+
277
+
278
+ class MThreadingRun2(ABC):
279
+ """
280
+ 多线程的封装
281
+ """
282
+
283
+ def __init__(self, num, logger=None):
284
+ # 日志模块,如果无会自定义一个只打印不输入到文件的logger
285
+ if logger:
286
+ self.logger = logger
287
+ else:
288
+ self.logger = get_streamlogger()
289
+ # 特殊线程的类,对名称进行了统一
290
+ self.etn = especialThreadName()
291
+ # 工作线程数,不包含特殊线程
292
+ self.threadingnum = num
293
+ # 代理列表 RingList是一个环形列表
294
+ self.list_proxy = RingList()
295
+ # 工作线程池
296
+ self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
297
+ # 钩子函数,将thread_pool的钩子函数暴露到这一层,方便使用
298
+ self.thread_pool.set_callback(self.thread_pool_hook)
299
+ # 线程方法
300
+ self.func = self.fun
301
+ # 工作量的累计,每一次addjob都会累计一个数
302
+ self.jobnum = 0
303
+ # 代理设置时间
304
+ # self.proxytime = 0
305
+
306
+ # self.modle = 1
307
+ # 线程锁,外部使用
308
+ self.thread_run_lock = threading.Lock()
309
+
310
+ # 全局使用特殊的单词
311
+ self.BREAK = "break"
312
+
313
+ # 进程号
314
+ if hasattr(os, 'getpid'):
315
+ self.pid = os.getpid()
316
+ else:
317
+ self.pid = None
318
+
319
+ def set_is_restart(self, name, is_restart):
320
+ """
321
+ 特殊线程的重启设置,默认都会重启,设置False不允许重启
322
+ :param name:
323
+ :param is_restart:
324
+ :return:
325
+ """
326
+ self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
327
+
328
+ @abstractmethod
329
+ def setProxy(self, threadval: ThreadVal, proxysList=None):
330
+ """
331
+ 将代理加入到循环队列中 self.list_proxy,或者自己管理代理
332
+ :param proxysList:
333
+ :return:
334
+ """
335
+ pass
336
+
337
+ @abstractmethod
338
+ def fun(self, threadval, *args, **kwargs):
339
+ """
340
+ 运行的方法
341
+ :param threadval:
342
+ :param args:
343
+ :param kwargs:
344
+ :return:
345
+ """
346
+ pass
347
+
348
+ @abstractmethod
349
+ def thread_pool_hook(self, threadinfo: ThreadInfo) -> dict:
350
+ """
351
+ 钩子函数 可以被重写
352
+ 主要重写里面的ThreadInfo的信息
353
+ :return:
354
+ """
355
+ return {}
356
+
357
+ @abstractmethod
358
+ def set_task(self, threadval: ThreadVal, *args, **kwargs):
359
+ """
360
+ 设置任务,这里调用addjob
361
+ :param args:
362
+ :param kwargs:
363
+ :return:
364
+ """
365
+ pass
366
+
367
+ @abstractmethod
368
+ def deal_results(self, threadval: ThreadVal, *args, **kwargs):
369
+ """
370
+ 处理结果
371
+ result_queue = self.thread_pool.result_queue
372
+ :param args:
373
+ :param kwargs:
374
+ :return:
375
+ """
376
+ pass
377
+
378
+ def setfunc(self, func):
379
+ # 设置线程方法,默认是self.fun
380
+ self.func = func
381
+
382
+ def add_job(self, func, *args, **kwargs):
383
+ """
384
+ :param func: 运行的方法,一般使用self.func
385
+ :param args: 元组参数
386
+ :param kwargs: 键值对参数
387
+ """
388
+ self.jobnum += 1
389
+ self.thread_pool.add_job(func, *args, **kwargs)
390
+
391
+ def other(self):
392
+ """
393
+ 其他的一些判断
394
+ :return:
395
+ """
396
+ self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
397
+ self.thread_pool.result_queue.get_size()))
398
+ # 通过工作队列和结果队列观察是否结束
399
+ if self.thread_pool.checkThreadRunFinish():
400
+ self.logger.info("初次判断任务已经结束,各个队列为空")
401
+ return True
402
+ else:
403
+ return False
404
+
405
+ def check_especial_thread(self):
406
+ """
407
+ 检查特殊线程是否挂掉
408
+ :return:
409
+ """
410
+ task = self.set_task
411
+ proxy = self.setProxy
412
+ result = self.deal_results
413
+
414
+ nowThreadsName = self.thread_pool.get_now_thread()
415
+ for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
416
+ thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
417
+ # 如果线程字典为空 代表已被删除
418
+ if name in nowThreadsName and thread.is_alive():
419
+ # print(name + ": is run")
420
+ pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
421
+ else:
422
+ self.logger.info("name is :" + name + "; 没有在线程中")
423
+ if name in self.etn.list_name():
424
+ if name == self.etn.taskthreadname:
425
+ taskin = task
426
+ elif name == self.etn.proxythreadname:
427
+ taskin = proxy
428
+ elif name == self.etn.dealresultthreadname:
429
+ taskin = result
430
+ else:
431
+ raise Exception("没有对应的任务,请检查")
432
+ is_start = False
433
+ if name in self.thread_pool.especial_thread_pool_dicts:
434
+ threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
435
+ if threadinfo.get_thread().is_alive():
436
+ is_start = True
437
+ if not threadinfo.get_is_restart():
438
+ is_start = True
439
+ if not is_start:
440
+ self.logger.info('重启中:' + name)
441
+ args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
442
+ kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
443
+ thread = self.thread_pool.set_add_especial_thread(taskin, name, mode="mysuper", *args, **kwargs)
444
+ thread.start()
445
+
446
+ def start_especial_thread(self):
447
+ # 开启特殊线程设置任务
448
+ self.thread_pool.set_add_especial_thread(self.set_task, self.etn.taskthreadname, mode="mysuper")
449
+ self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname, mode="mysuper")
450
+ self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname, mode="mysuper")
451
+ self.thread_pool.especial_start()
452
+
453
+ def is_break(self):
454
+ """
455
+ 运行完毕是否退出多线程
456
+ :return:
457
+ """
458
+ return False
459
+
460
+ # 在主线程添加的钩子函数,每次循环都会调用到
461
+ def run_hook_everytime(self):
462
+ time.sleep(3)
463
+
464
+ # 运行完毕的钩子函数
465
+ def run_hook_result(self):
466
+ pass
467
+
468
+ def run(self):
469
+ """
470
+ 主进程代码,线程设置为主进程结束所有线程都会结束
471
+ 所以任务完成之前主进程不应该结束
472
+ :return:
473
+ """
474
+ self.start_especial_thread()
475
+ while True:
476
+ self.run_hook_everytime()
477
+ self.thread_pool.checkThread()
478
+ self.check_especial_thread()
479
+ if self.other():
480
+ print("进入other 判断 再次确认finish")
481
+ if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty():
482
+ print("运行完毕")
483
+ self.run_hook_result()
484
+ if self.is_break():
485
+ print("退出主循环")
486
+ break