re-common 10.0.22__py3-none-any.whl → 10.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +195 -0
  145. re_common/v2/baselibrary/business_utils/__init__.py +0 -0
  146. re_common/v2/baselibrary/business_utils/rel_tools.py +6 -0
  147. re_common/v2/baselibrary/decorators/utils.py +59 -59
  148. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  149. re_common/v2/baselibrary/tools/WeChatRobot.py +95 -79
  150. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  151. re_common/v2/baselibrary/tools/dict_tools.py +37 -37
  152. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  153. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  154. re_common/v2/baselibrary/tools/list_tools.py +65 -65
  155. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  156. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  157. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  158. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  159. re_common/v2/baselibrary/utils/author_smi.py +360 -360
  160. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  161. re_common/v2/baselibrary/utils/basedict.py +37 -37
  162. re_common/v2/baselibrary/utils/basehdfs.py +161 -161
  163. re_common/v2/baselibrary/utils/basepika.py +180 -180
  164. re_common/v2/baselibrary/utils/basetime.py +77 -77
  165. re_common/v2/baselibrary/utils/db.py +38 -38
  166. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  167. re_common/v2/baselibrary/utils/mq.py +83 -83
  168. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  169. re_common/v2/baselibrary/utils/string_bool.py +186 -149
  170. re_common/v2/baselibrary/utils/string_clear.py +227 -204
  171. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  172. re_common/v2/baselibrary/utils/stringutils.py +213 -213
  173. re_common/vip/base_step_process.py +11 -11
  174. re_common/vip/baseencodeid.py +90 -90
  175. re_common/vip/changetaskname.py +28 -28
  176. re_common/vip/core_var.py +24 -24
  177. re_common/vip/mmh3Hash.py +89 -89
  178. re_common/vip/proxy/allproxys.py +127 -127
  179. re_common/vip/proxy/allproxys_thread.py +159 -159
  180. re_common/vip/proxy/cnki_proxy.py +153 -153
  181. re_common/vip/proxy/kuaidaili.py +87 -87
  182. re_common/vip/proxy/proxy_all.py +113 -113
  183. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  184. re_common/vip/proxy/wanfang_proxy.py +152 -152
  185. re_common/vip/proxy/wp_proxy_all.py +181 -181
  186. re_common/vip/read_rawid_to_txt.py +91 -91
  187. re_common/vip/title/__init__.py +5 -5
  188. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  189. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  190. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  191. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  192. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  193. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  194. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  195. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  196. re_common/vip/title/transform/__init__.py +10 -10
  197. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/LICENSE +201 -201
  198. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/METADATA +16 -16
  199. re_common-10.0.24.dist-info/RECORD +230 -0
  200. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/WHEEL +1 -1
  201. re_common-10.0.22.dist-info/RECORD +0 -227
  202. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/top_level.txt +0 -0
@@ -1,349 +1,349 @@
1
- import threading
2
- import time
3
- from abc import ABC, abstractmethod
4
-
5
- from re_common.baselibrary.mthread.MThreadingRun import especialThreadName
6
- from re_common.baselibrary.mthread.mythreading import ThreadPoolManger
7
- from re_common.baselibrary.utils.ringlist import RingList
8
- from re_common.facade.loggerfacade import get_streamlogger
9
-
10
- """
11
- event版的多线程,现在已经不在使用 (已废弃)
12
- """
13
- class MThreadingRunEvent(ABC):
14
- def __init__(self, num, logger=None):
15
- if logger:
16
- self.logger = logger
17
- else:
18
- self.logger = get_streamlogger()
19
- self.etn = especialThreadName()
20
- # 线程数
21
- self.threadingnum = num
22
- # 代理列表
23
- self.list_proxy = RingList()
24
- # 线程池
25
- self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
26
- self.thread_pool.set_callback(self.thread_pool_hook)
27
- # 结果集
28
- self.results = []
29
- # 线程方法
30
- self.func = self.fun
31
- # 结果数
32
- self.resultnum = 0
33
- # 工作线程数
34
- self.jobnum = 0
35
- # 结果集被处理标志 默认被处理 是为了兼容之前的代码不去改动
36
- self.dealresultstatus = True
37
- # 結果到达该数量后处理结果 默认及时处理
38
- self.dealresultsnum = 0
39
- # 代理设置时间
40
- self.proxytime = 0
41
- # 标识event set之后是否对任务进行了设置
42
- # self.eventbool = False
43
- self.modle = 1
44
- # 在任务和处理结果时event信号的状态
45
- self.result_event_status = True
46
- self.task_event_status = True
47
- self.thread_run_lock = threading.Lock()
48
-
49
- # 全局使用特殊的单词
50
- self.BREAK = "break"
51
-
52
- def set_is_restart(self, name, is_restart):
53
- self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
54
-
55
- @abstractmethod
56
- def setProxy(self, proxysList=None):
57
- """
58
- 将代理加入到循环队列中 self.list_proxy
59
- :param proxysList:
60
- :return:
61
- """
62
- pass
63
-
64
- @abstractmethod
65
- def fun(self, threadval, *args, **kwargs):
66
- pass
67
-
68
- @abstractmethod
69
- def thread_pool_hook(self, thread_pool_dicts, thread, args, kwargs) -> dict:
70
- """
71
- 钩子函数 可以被重写
72
- 主要重写里面的dicts部分
73
- :return:
74
- """
75
- return {}
76
-
77
- @abstractmethod
78
- def setTask(self, results=None, *args, **kwargs):
79
- # self.thread_pool.event.set() # 自动释放信号
80
- # 当设置任务和处理结果使用同一个连接时尽量使用event信号保证不同时执行sql在一个执行未返回时
81
- # 请查询较大数据时主动释放
82
- pass
83
-
84
- @abstractmethod
85
- def getTask(self, *args, **kwargs):
86
- """
87
- 主要用于mysql的请求
88
- :param args:
89
- :param kwargs:
90
- :return:
91
- """
92
- pass
93
-
94
- def setTask_noevent(self, *args, **kwargs):
95
- """
96
- 设置任务,在没有event限制的情况下将两个步骤写在一起
97
- :param args:
98
- :param kwargs:
99
- :return:
100
- """
101
- while True:
102
- results = self.getTask(*args, **kwargs)
103
- reslt = self.setTask(results, *args, **kwargs)
104
- if reslt == self.BREAK:
105
- break
106
-
107
- def __setTask(self, *args, **kwargs):
108
- # 设置等待任务队列为空时继续添加任务
109
- # 继承类必须在第一步调用父类的该方法
110
- while True:
111
- # 获取Event 状态
112
- self.task_event_status = True
113
- self.logger.info("self.thread_pool.event_is_set() is {}".format(self.thread_pool.event_is_set()))
114
- # 如果状态为False 即 堵塞状态
115
- if not self.thread_pool.event_is_set():
116
- # 判断 result_event_status 和 task_event_status 都堵塞 理论上不存在 但Event并不完美
117
- if self.result_event_status and self.task_event_status:
118
- # 设置 信号允许堵塞通过 状态为True
119
- self.thread_pool.event_set()
120
- time.sleep(1)
121
- continue
122
- self.logger.info("event_wait")
123
- # 等待信号后执行设置任务
124
- self.thread_pool.event_wait(50)
125
- # 得到信号后clrar 不允许其他地方运行 状态 False
126
- self.thread_pool.event_clear()
127
- if self.result_event_status is False:
128
- self.logger.info("elf.result_event_status is False")
129
- continue
130
- self.task_event_status = False
131
- # 执行getTask
132
- self.logger.info("getTask")
133
- result = self.getTask(*args, **kwargs)
134
- # 运行完毕 其他地方等待的信号可以继续运行 True
135
- self.thread_pool.event_set()
136
- self.task_event_status = True
137
- # 执行ste()
138
- reslt = self.setTask(result, *args, **kwargs)
139
-
140
- # 是否跳出循环 结束线程
141
- if reslt == "break":
142
- break
143
- time.sleep(1)
144
-
145
- def dealresult(self, *args, **kwargs):
146
- """
147
- 如果没有在这里处理 请将 self.dealresultstatus = False这样不会丢数据
148
- :param args:
149
- :param kwargs:
150
- :return:
151
- """
152
- pass
153
-
154
- def deal_results_no_event(self, *args, **kwargs):
155
- """
156
- 该函数用于没有信号的机制里
157
- 使用信号主要是因为多线程无法同时使用一个链接
158
- 有可能出现错误
159
- :param args:
160
- :param kwargs:
161
- :return:
162
- """
163
- while True:
164
- # 处理结果标识
165
- self.dealresultstatus = True
166
- # 从结果队列获取结果到results
167
- self.getreustlFromQueue()
168
- if len(
169
- self.results) > self.dealresultsnum or self.thread_pool.work_queue.is_empty() or not self.thread_pool.thread_queue:
170
- if len(self.results) > 0 or not self.thread_pool.thread_queue.is_empty():
171
- # 处理结果
172
- self.dealresult()
173
- if self.dealresultstatus:
174
- # 处理完结果后要清理
175
- self.results.clear()
176
- else:
177
- time.sleep(10)
178
- else:
179
- time.sleep(3)
180
-
181
- def deal_results(self, *args, **kwargs):
182
- """
183
- 现在单开线程处理结果
184
- :return:
185
- """
186
- self.logger.info("开始处理结果")
187
- while True:
188
- self.result_event_status = True
189
- self.logger.info("self.thread_pool.event_is_set() is {}".format(self.thread_pool.event_is_set()))
190
- # 处理结果标识
191
- self.dealresultstatus = True
192
- # 从结果队列获取结果到results
193
- self.getreustlFromQueue()
194
- if len(self.results) > self.dealresultsnum or self.thread_pool.work_queue.is_empty() \
195
- or not self.thread_pool.thread_queue.is_empty():
196
- if len(self.results) > 0 or not self.thread_pool.thread_queue.is_empty():
197
- # 处理结果
198
- # 为防止同一个连接多线程操作出现问题使用信号加锁
199
- # 等待event为true
200
- if not self.thread_pool.event_is_set():
201
- # print("deal result is event: " + str(self.thread_pool.event.is_set()))
202
- if self.result_event_status and self.task_event_status:
203
- self.thread_pool.event_set()
204
- time.sleep(1)
205
- continue
206
- self.thread_pool.event_wait(60)
207
- # 设置event为false不允许其他等待线程操作
208
- self.thread_pool.event_clear()
209
- if self.task_event_status is False:
210
- continue
211
- self.result_event_status = False
212
- # 处理结果
213
- self.dealresult()
214
- # 本地操作完毕 允许其他线程操作mysql
215
- self.thread_pool.event_set()
216
- self.result_event_status = True
217
- if self.dealresultstatus:
218
- # 处理完结果后要清理
219
- self.results.clear()
220
- else:
221
- time.sleep(10)
222
- else:
223
- time.sleep(3)
224
-
225
- def setfunc(self, func):
226
- # 设置线程方法
227
- self.func = func
228
-
229
- def add_job(self, func, *args, **kwargs):
230
- self.jobnum += 1
231
- self.thread_pool.add_job(func, *args, **kwargs)
232
-
233
- def getreustlFromQueue(self):
234
- # 从结果队列获取结果到results
235
- once_result_num = 0
236
- while not self.thread_pool.result_queue.is_empty():
237
- self.resultnum += 1
238
- once_result_num += 1
239
- if once_result_num > 100:
240
- return
241
- result = self.thread_pool.result_queue.get()
242
- self.results.append(result)
243
- self.thread_pool.result_queue.task_done()
244
-
245
- def checkResultsfininsh(self, *args, **kwargs):
246
- """
247
- 该函数用于处理运行到最后时结果不足100的情况
248
- :return:
249
- """
250
- if self.thread_pool.work_queue.is_empty():
251
- t1 = len(self.results)
252
- self.logger.info("self.results len is %s " % str(t1))
253
- if t1 != 0:
254
- return False
255
- else:
256
- return True
257
-
258
- def other(self):
259
- self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
260
- self.thread_pool.result_queue.get_size()))
261
- # 通过工作队列和结果队列观察是否结束
262
- if self.thread_pool.checkThreadRunFinish():
263
- self.logger.info("初次判断任务已经结束,各个队列为空")
264
- return True
265
- else:
266
- return False
267
-
268
- def check_especial_thread(self):
269
- if self.modle == 1:
270
- task = self.__setTask
271
- proxy = self.setProxy
272
- result = self.deal_results
273
- elif self.modle == 2:
274
- task = self.setTask_noevent
275
- proxy = self.setProxy
276
- result = self.deal_results_no_event
277
- else:
278
- raise Exception("不存在指定model")
279
-
280
- nowThreadsName = self.thread_pool.get_now_thread()
281
- for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
282
- thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
283
- # 如果线程字典为空 代表已被删除
284
- if name in nowThreadsName and thread.is_alive():
285
- # print(name + ": is run")
286
- pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
287
- else:
288
- self.logger.info("name is :" + name + "; 没有在线程中")
289
- if name in self.etn.list_name():
290
- if name == self.etn.taskthreadname:
291
- taskin = task
292
- elif name == self.etn.proxythreadname:
293
- taskin = proxy
294
- elif name == self.etn.dealresultthreadname:
295
- taskin = result
296
- else:
297
- raise Exception("没有对应的任务,请检查")
298
- is_start = False
299
- if name in self.thread_pool.especial_thread_pool_dicts:
300
- threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
301
- if threadinfo.get_thread().is_alive():
302
- is_start = True
303
- if not threadinfo.get_is_restart():
304
- is_start = True
305
- if not is_start:
306
- args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
307
- kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
308
- self.thread_pool.set_add_especial_thread(taskin, name, *args, **kwargs)
309
-
310
- def start_especial_thread(self):
311
- # 开启一个线程设置任务
312
- self.thread_pool.set_add_especial_thread(self.__setTask, self.etn.taskthreadname)
313
- self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname)
314
- self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname)
315
- self.thread_pool.especial_start()
316
-
317
- def start_especial_thread_no_evnt(self):
318
- # 开启一个线程设置任务
319
- self.thread_pool.set_add_especial_thread(self.setTask_noevent, self.etn.taskthreadname)
320
- self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname)
321
- self.thread_pool.set_add_especial_thread(self.deal_results_no_event, self.etn.dealresultthreadname)
322
- self.thread_pool.especial_start()
323
-
324
- def is_break(self):
325
- return False
326
-
327
- def run(self, model=1):
328
- self.modle = model
329
- if model == 1:
330
- # 有event
331
- self.start_especial_thread()
332
- elif model == 2:
333
- self.start_especial_thread_no_evnt()
334
- while True:
335
- time.sleep(3)
336
- self.thread_pool.checkThread()
337
- self.check_especial_thread()
338
- if self.other():
339
- if not self.checkResultsfininsh():
340
- continue
341
- else:
342
- print("进入other 判断 再次确认finish")
343
- if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty() \
344
- and len(self.results) == 0:
345
- print("运行完毕")
346
- if self.is_break():
347
- print("10 s break")
348
- time.sleep(10)
349
- break
1
+ import threading
2
+ import time
3
+ from abc import ABC, abstractmethod
4
+
5
+ from re_common.baselibrary.mthread.MThreadingRun import especialThreadName
6
+ from re_common.baselibrary.mthread.mythreading import ThreadPoolManger
7
+ from re_common.baselibrary.utils.ringlist import RingList
8
+ from re_common.facade.loggerfacade import get_streamlogger
9
+
10
+ """
11
+ event版的多线程,现在已经不在使用 (已废弃)
12
+ """
13
+ class MThreadingRunEvent(ABC):
14
+ def __init__(self, num, logger=None):
15
+ if logger:
16
+ self.logger = logger
17
+ else:
18
+ self.logger = get_streamlogger()
19
+ self.etn = especialThreadName()
20
+ # 线程数
21
+ self.threadingnum = num
22
+ # 代理列表
23
+ self.list_proxy = RingList()
24
+ # 线程池
25
+ self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
26
+ self.thread_pool.set_callback(self.thread_pool_hook)
27
+ # 结果集
28
+ self.results = []
29
+ # 线程方法
30
+ self.func = self.fun
31
+ # 结果数
32
+ self.resultnum = 0
33
+ # 工作线程数
34
+ self.jobnum = 0
35
+ # 结果集被处理标志 默认被处理 是为了兼容之前的代码不去改动
36
+ self.dealresultstatus = True
37
+ # 結果到达该数量后处理结果 默认及时处理
38
+ self.dealresultsnum = 0
39
+ # 代理设置时间
40
+ self.proxytime = 0
41
+ # 标识event set之后是否对任务进行了设置
42
+ # self.eventbool = False
43
+ self.modle = 1
44
+ # 在任务和处理结果时event信号的状态
45
+ self.result_event_status = True
46
+ self.task_event_status = True
47
+ self.thread_run_lock = threading.Lock()
48
+
49
+ # 全局使用特殊的单词
50
+ self.BREAK = "break"
51
+
52
+ def set_is_restart(self, name, is_restart):
53
+ self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
54
+
55
+ @abstractmethod
56
+ def setProxy(self, proxysList=None):
57
+ """
58
+ 将代理加入到循环队列中 self.list_proxy
59
+ :param proxysList:
60
+ :return:
61
+ """
62
+ pass
63
+
64
+ @abstractmethod
65
+ def fun(self, threadval, *args, **kwargs):
66
+ pass
67
+
68
+ @abstractmethod
69
+ def thread_pool_hook(self, thread_pool_dicts, thread, args, kwargs) -> dict:
70
+ """
71
+ 钩子函数 可以被重写
72
+ 主要重写里面的dicts部分
73
+ :return:
74
+ """
75
+ return {}
76
+
77
+ @abstractmethod
78
+ def setTask(self, results=None, *args, **kwargs):
79
+ # self.thread_pool.event.set() # 自动释放信号
80
+ # 当设置任务和处理结果使用同一个连接时尽量使用event信号保证不同时执行sql在一个执行未返回时
81
+ # 请查询较大数据时主动释放
82
+ pass
83
+
84
+ @abstractmethod
85
+ def getTask(self, *args, **kwargs):
86
+ """
87
+ 主要用于mysql的请求
88
+ :param args:
89
+ :param kwargs:
90
+ :return:
91
+ """
92
+ pass
93
+
94
+ def setTask_noevent(self, *args, **kwargs):
95
+ """
96
+ 设置任务,在没有event限制的情况下将两个步骤写在一起
97
+ :param args:
98
+ :param kwargs:
99
+ :return:
100
+ """
101
+ while True:
102
+ results = self.getTask(*args, **kwargs)
103
+ reslt = self.setTask(results, *args, **kwargs)
104
+ if reslt == self.BREAK:
105
+ break
106
+
107
+ def __setTask(self, *args, **kwargs):
108
+ # 设置等待任务队列为空时继续添加任务
109
+ # 继承类必须在第一步调用父类的该方法
110
+ while True:
111
+ # 获取Event 状态
112
+ self.task_event_status = True
113
+ self.logger.info("self.thread_pool.event_is_set() is {}".format(self.thread_pool.event_is_set()))
114
+ # 如果状态为False 即 堵塞状态
115
+ if not self.thread_pool.event_is_set():
116
+ # 判断 result_event_status 和 task_event_status 都堵塞 理论上不存在 但Event并不完美
117
+ if self.result_event_status and self.task_event_status:
118
+ # 设置 信号允许堵塞通过 状态为True
119
+ self.thread_pool.event_set()
120
+ time.sleep(1)
121
+ continue
122
+ self.logger.info("event_wait")
123
+ # 等待信号后执行设置任务
124
+ self.thread_pool.event_wait(50)
125
+ # 得到信号后clrar 不允许其他地方运行 状态 False
126
+ self.thread_pool.event_clear()
127
+ if self.result_event_status is False:
128
+ self.logger.info("elf.result_event_status is False")
129
+ continue
130
+ self.task_event_status = False
131
+ # 执行getTask
132
+ self.logger.info("getTask")
133
+ result = self.getTask(*args, **kwargs)
134
+ # 运行完毕 其他地方等待的信号可以继续运行 True
135
+ self.thread_pool.event_set()
136
+ self.task_event_status = True
137
+ # 执行ste()
138
+ reslt = self.setTask(result, *args, **kwargs)
139
+
140
+ # 是否跳出循环 结束线程
141
+ if reslt == "break":
142
+ break
143
+ time.sleep(1)
144
+
145
+ def dealresult(self, *args, **kwargs):
146
+ """
147
+ 如果没有在这里处理 请将 self.dealresultstatus = False这样不会丢数据
148
+ :param args:
149
+ :param kwargs:
150
+ :return:
151
+ """
152
+ pass
153
+
154
+ def deal_results_no_event(self, *args, **kwargs):
155
+ """
156
+ 该函数用于没有信号的机制里
157
+ 使用信号主要是因为多线程无法同时使用一个链接
158
+ 有可能出现错误
159
+ :param args:
160
+ :param kwargs:
161
+ :return:
162
+ """
163
+ while True:
164
+ # 处理结果标识
165
+ self.dealresultstatus = True
166
+ # 从结果队列获取结果到results
167
+ self.getreustlFromQueue()
168
+ if len(
169
+ self.results) > self.dealresultsnum or self.thread_pool.work_queue.is_empty() or not self.thread_pool.thread_queue:
170
+ if len(self.results) > 0 or not self.thread_pool.thread_queue.is_empty():
171
+ # 处理结果
172
+ self.dealresult()
173
+ if self.dealresultstatus:
174
+ # 处理完结果后要清理
175
+ self.results.clear()
176
+ else:
177
+ time.sleep(10)
178
+ else:
179
+ time.sleep(3)
180
+
181
+ def deal_results(self, *args, **kwargs):
182
+ """
183
+ 现在单开线程处理结果
184
+ :return:
185
+ """
186
+ self.logger.info("开始处理结果")
187
+ while True:
188
+ self.result_event_status = True
189
+ self.logger.info("self.thread_pool.event_is_set() is {}".format(self.thread_pool.event_is_set()))
190
+ # 处理结果标识
191
+ self.dealresultstatus = True
192
+ # 从结果队列获取结果到results
193
+ self.getreustlFromQueue()
194
+ if len(self.results) > self.dealresultsnum or self.thread_pool.work_queue.is_empty() \
195
+ or not self.thread_pool.thread_queue.is_empty():
196
+ if len(self.results) > 0 or not self.thread_pool.thread_queue.is_empty():
197
+ # 处理结果
198
+ # 为防止同一个连接多线程操作出现问题使用信号加锁
199
+ # 等待event为true
200
+ if not self.thread_pool.event_is_set():
201
+ # print("deal result is event: " + str(self.thread_pool.event.is_set()))
202
+ if self.result_event_status and self.task_event_status:
203
+ self.thread_pool.event_set()
204
+ time.sleep(1)
205
+ continue
206
+ self.thread_pool.event_wait(60)
207
+ # 设置event为false不允许其他等待线程操作
208
+ self.thread_pool.event_clear()
209
+ if self.task_event_status is False:
210
+ continue
211
+ self.result_event_status = False
212
+ # 处理结果
213
+ self.dealresult()
214
+ # 本地操作完毕 允许其他线程操作mysql
215
+ self.thread_pool.event_set()
216
+ self.result_event_status = True
217
+ if self.dealresultstatus:
218
+ # 处理完结果后要清理
219
+ self.results.clear()
220
+ else:
221
+ time.sleep(10)
222
+ else:
223
+ time.sleep(3)
224
+
225
+ def setfunc(self, func):
226
+ # 设置线程方法
227
+ self.func = func
228
+
229
+ def add_job(self, func, *args, **kwargs):
230
+ self.jobnum += 1
231
+ self.thread_pool.add_job(func, *args, **kwargs)
232
+
233
+ def getreustlFromQueue(self):
234
+ # 从结果队列获取结果到results
235
+ once_result_num = 0
236
+ while not self.thread_pool.result_queue.is_empty():
237
+ self.resultnum += 1
238
+ once_result_num += 1
239
+ if once_result_num > 100:
240
+ return
241
+ result = self.thread_pool.result_queue.get()
242
+ self.results.append(result)
243
+ self.thread_pool.result_queue.task_done()
244
+
245
+ def checkResultsfininsh(self, *args, **kwargs):
246
+ """
247
+ 该函数用于处理运行到最后时结果不足100的情况
248
+ :return:
249
+ """
250
+ if self.thread_pool.work_queue.is_empty():
251
+ t1 = len(self.results)
252
+ self.logger.info("self.results len is %s " % str(t1))
253
+ if t1 != 0:
254
+ return False
255
+ else:
256
+ return True
257
+
258
+ def other(self):
259
+ self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
260
+ self.thread_pool.result_queue.get_size()))
261
+ # 通过工作队列和结果队列观察是否结束
262
+ if self.thread_pool.checkThreadRunFinish():
263
+ self.logger.info("初次判断任务已经结束,各个队列为空")
264
+ return True
265
+ else:
266
+ return False
267
+
268
+ def check_especial_thread(self):
269
+ if self.modle == 1:
270
+ task = self.__setTask
271
+ proxy = self.setProxy
272
+ result = self.deal_results
273
+ elif self.modle == 2:
274
+ task = self.setTask_noevent
275
+ proxy = self.setProxy
276
+ result = self.deal_results_no_event
277
+ else:
278
+ raise Exception("不存在指定model")
279
+
280
+ nowThreadsName = self.thread_pool.get_now_thread()
281
+ for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
282
+ thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
283
+ # 如果线程字典为空 代表已被删除
284
+ if name in nowThreadsName and thread.is_alive():
285
+ # print(name + ": is run")
286
+ pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
287
+ else:
288
+ self.logger.info("name is :" + name + "; 没有在线程中")
289
+ if name in self.etn.list_name():
290
+ if name == self.etn.taskthreadname:
291
+ taskin = task
292
+ elif name == self.etn.proxythreadname:
293
+ taskin = proxy
294
+ elif name == self.etn.dealresultthreadname:
295
+ taskin = result
296
+ else:
297
+ raise Exception("没有对应的任务,请检查")
298
+ is_start = False
299
+ if name in self.thread_pool.especial_thread_pool_dicts:
300
+ threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
301
+ if threadinfo.get_thread().is_alive():
302
+ is_start = True
303
+ if not threadinfo.get_is_restart():
304
+ is_start = True
305
+ if not is_start:
306
+ args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
307
+ kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
308
+ self.thread_pool.set_add_especial_thread(taskin, name, *args, **kwargs)
309
+
310
+ def start_especial_thread(self):
311
+ # 开启一个线程设置任务
312
+ self.thread_pool.set_add_especial_thread(self.__setTask, self.etn.taskthreadname)
313
+ self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname)
314
+ self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname)
315
+ self.thread_pool.especial_start()
316
+
317
+ def start_especial_thread_no_evnt(self):
318
+ # 开启一个线程设置任务
319
+ self.thread_pool.set_add_especial_thread(self.setTask_noevent, self.etn.taskthreadname)
320
+ self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname)
321
+ self.thread_pool.set_add_especial_thread(self.deal_results_no_event, self.etn.dealresultthreadname)
322
+ self.thread_pool.especial_start()
323
+
324
+ def is_break(self):
325
+ return False
326
+
327
+ def run(self, model=1):
328
+ self.modle = model
329
+ if model == 1:
330
+ # 有event
331
+ self.start_especial_thread()
332
+ elif model == 2:
333
+ self.start_especial_thread_no_evnt()
334
+ while True:
335
+ time.sleep(3)
336
+ self.thread_pool.checkThread()
337
+ self.check_especial_thread()
338
+ if self.other():
339
+ if not self.checkResultsfininsh():
340
+ continue
341
+ else:
342
+ print("进入other 判断 再次确认finish")
343
+ if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty() \
344
+ and len(self.results) == 0:
345
+ print("运行完毕")
346
+ if self.is_break():
347
+ print("10 s break")
348
+ time.sleep(10)
349
+ break