re-common 10.0.22__py3-none-any.whl → 10.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/baselibrary/__init__.py +4 -4
- re_common/baselibrary/baseabs/__init__.py +6 -6
- re_common/baselibrary/baseabs/baseabs.py +26 -26
- re_common/baselibrary/database/mbuilder.py +132 -132
- re_common/baselibrary/database/moudle.py +93 -93
- re_common/baselibrary/database/msqlite3.py +194 -194
- re_common/baselibrary/database/mysql.py +169 -169
- re_common/baselibrary/database/sql_factory.py +26 -26
- re_common/baselibrary/mthread/MThreadingRun.py +486 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
- re_common/baselibrary/mthread/__init__.py +2 -2
- re_common/baselibrary/mthread/mythreading.py +695 -695
- re_common/baselibrary/pakge_other/socks.py +404 -404
- re_common/baselibrary/readconfig/config_factory.py +18 -18
- re_common/baselibrary/readconfig/ini_config.py +317 -317
- re_common/baselibrary/readconfig/toml_config.py +49 -49
- re_common/baselibrary/temporary/envdata.py +36 -36
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
- re_common/baselibrary/tools/contrast_db3.py +123 -123
- re_common/baselibrary/tools/copy_file.py +39 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
- re_common/baselibrary/tools/foreachgz.py +39 -39
- re_common/baselibrary/tools/get_attr.py +10 -10
- re_common/baselibrary/tools/image_to_pdf.py +61 -61
- re_common/baselibrary/tools/java_code_deal.py +139 -139
- re_common/baselibrary/tools/javacode.py +79 -79
- re_common/baselibrary/tools/mdb_db3.py +48 -48
- re_common/baselibrary/tools/merge_file.py +171 -171
- re_common/baselibrary/tools/merge_gz_file.py +165 -165
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
- re_common/baselibrary/tools/mongo_tools.py +50 -50
- re_common/baselibrary/tools/move_file.py +170 -170
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
- re_common/baselibrary/tools/myparsel.py +104 -104
- re_common/baselibrary/tools/rename_dir_file.py +37 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
- re_common/baselibrary/tools/split_line_to_many.py +25 -25
- re_common/baselibrary/tools/stringtodicts.py +33 -33
- re_common/baselibrary/tools/workwechant_bot.py +84 -84
- re_common/baselibrary/utils/baseaiohttp.py +296 -296
- re_common/baselibrary/utils/baseaiomysql.py +87 -87
- re_common/baselibrary/utils/baseallstep.py +191 -191
- re_common/baselibrary/utils/baseavro.py +19 -19
- re_common/baselibrary/utils/baseboto3.py +291 -291
- re_common/baselibrary/utils/basecsv.py +32 -32
- re_common/baselibrary/utils/basedict.py +133 -133
- re_common/baselibrary/utils/basedir.py +241 -241
- re_common/baselibrary/utils/baseencode.py +351 -351
- re_common/baselibrary/utils/baseencoding.py +28 -28
- re_common/baselibrary/utils/baseesdsl.py +86 -86
- re_common/baselibrary/utils/baseexcel.py +264 -264
- re_common/baselibrary/utils/baseexcept.py +109 -109
- re_common/baselibrary/utils/basefile.py +654 -654
- re_common/baselibrary/utils/baseftp.py +214 -214
- re_common/baselibrary/utils/basegzip.py +60 -60
- re_common/baselibrary/utils/basehdfs.py +135 -135
- re_common/baselibrary/utils/basehttpx.py +268 -268
- re_common/baselibrary/utils/baseip.py +87 -87
- re_common/baselibrary/utils/basejson.py +2 -2
- re_common/baselibrary/utils/baselist.py +32 -32
- re_common/baselibrary/utils/basemotor.py +190 -190
- re_common/baselibrary/utils/basemssql.py +98 -98
- re_common/baselibrary/utils/baseodbc.py +113 -113
- re_common/baselibrary/utils/basepandas.py +302 -302
- re_common/baselibrary/utils/basepeewee.py +11 -11
- re_common/baselibrary/utils/basepika.py +180 -180
- re_common/baselibrary/utils/basepydash.py +143 -143
- re_common/baselibrary/utils/basepymongo.py +230 -230
- re_common/baselibrary/utils/basequeue.py +22 -22
- re_common/baselibrary/utils/baserar.py +57 -57
- re_common/baselibrary/utils/baserequest.py +279 -279
- re_common/baselibrary/utils/baseset.py +8 -8
- re_common/baselibrary/utils/basesmb.py +403 -403
- re_common/baselibrary/utils/basestring.py +382 -382
- re_common/baselibrary/utils/basetime.py +320 -320
- re_common/baselibrary/utils/baseurl.py +121 -121
- re_common/baselibrary/utils/basezip.py +57 -57
- re_common/baselibrary/utils/core/__init__.py +7 -7
- re_common/baselibrary/utils/core/bottomutils.py +18 -18
- re_common/baselibrary/utils/core/mdeprecated.py +327 -327
- re_common/baselibrary/utils/core/mlamada.py +16 -16
- re_common/baselibrary/utils/core/msginfo.py +25 -25
- re_common/baselibrary/utils/core/requests_core.py +103 -103
- re_common/baselibrary/utils/fateadm.py +429 -429
- re_common/baselibrary/utils/importfun.py +123 -123
- re_common/baselibrary/utils/mfaker.py +57 -57
- re_common/baselibrary/utils/my_abc/__init__.py +3 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
- re_common/baselibrary/utils/mylogger.py +414 -414
- re_common/baselibrary/utils/myredisclient.py +861 -861
- re_common/baselibrary/utils/pipupgrade.py +21 -21
- re_common/baselibrary/utils/ringlist.py +85 -85
- re_common/baselibrary/utils/version_compare.py +36 -36
- re_common/baselibrary/utils/ydmhttp.py +126 -126
- re_common/facade/lazy_import.py +11 -11
- re_common/facade/loggerfacade.py +25 -25
- re_common/facade/mysqlfacade.py +467 -467
- re_common/facade/now.py +31 -31
- re_common/facade/sqlite3facade.py +257 -257
- re_common/facade/use/mq_use_facade.py +83 -83
- re_common/facade/use/proxy_use_facade.py +19 -19
- re_common/libtest/base_dict_test.py +19 -19
- re_common/libtest/baseavro_test.py +13 -13
- re_common/libtest/basefile_test.py +14 -14
- re_common/libtest/basemssql_test.py +77 -77
- re_common/libtest/baseodbc_test.py +7 -7
- re_common/libtest/basepandas_test.py +38 -38
- re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
- re_common/libtest/get_attr_test/settings.py +54 -54
- re_common/libtest/idencode_test.py +53 -53
- re_common/libtest/iniconfig_test.py +35 -35
- re_common/libtest/ip_test.py +34 -34
- re_common/libtest/merge_file_test.py +20 -20
- re_common/libtest/mfaker_test.py +8 -8
- re_common/libtest/mm3_test.py +31 -31
- re_common/libtest/mylogger_test.py +88 -88
- re_common/libtest/myparsel_test.py +27 -27
- re_common/libtest/mysql_test.py +151 -151
- re_common/libtest/pymongo_test.py +21 -21
- re_common/libtest/split_test.py +11 -11
- re_common/libtest/sqlite3_merge_test.py +5 -5
- re_common/libtest/sqlite3_test.py +34 -34
- re_common/libtest/tomlconfig_test.py +30 -30
- re_common/libtest/use_tools_test/__init__.py +2 -2
- re_common/libtest/user/__init__.py +4 -4
- re_common/studio/__init__.py +4 -4
- re_common/studio/assignment_expressions.py +36 -36
- re_common/studio/mydash/test1.py +18 -18
- re_common/studio/pydashstudio/first.py +9 -9
- re_common/studio/streamlitstudio/first_app.py +65 -65
- re_common/studio/streamlitstudio/uber_pickups.py +23 -23
- re_common/studio/test.py +18 -18
- re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +195 -0
- re_common/v2/baselibrary/business_utils/__init__.py +0 -0
- re_common/v2/baselibrary/business_utils/rel_tools.py +6 -0
- re_common/v2/baselibrary/decorators/utils.py +59 -59
- re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
- re_common/v2/baselibrary/tools/WeChatRobot.py +95 -79
- re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
- re_common/v2/baselibrary/tools/dict_tools.py +37 -37
- re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
- re_common/v2/baselibrary/tools/list_tools.py +65 -65
- re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
- re_common/v2/baselibrary/tools/text_matcher.py +326 -326
- re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
- re_common/v2/baselibrary/utils/author_smi.py +360 -360
- re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
- re_common/v2/baselibrary/utils/basedict.py +37 -37
- re_common/v2/baselibrary/utils/basehdfs.py +161 -161
- re_common/v2/baselibrary/utils/basepika.py +180 -180
- re_common/v2/baselibrary/utils/basetime.py +77 -77
- re_common/v2/baselibrary/utils/db.py +38 -38
- re_common/v2/baselibrary/utils/json_cls.py +16 -16
- re_common/v2/baselibrary/utils/mq.py +83 -83
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
- re_common/v2/baselibrary/utils/string_bool.py +186 -149
- re_common/v2/baselibrary/utils/string_clear.py +227 -204
- re_common/v2/baselibrary/utils/string_smi.py +18 -18
- re_common/v2/baselibrary/utils/stringutils.py +213 -213
- re_common/vip/base_step_process.py +11 -11
- re_common/vip/baseencodeid.py +90 -90
- re_common/vip/changetaskname.py +28 -28
- re_common/vip/core_var.py +24 -24
- re_common/vip/mmh3Hash.py +89 -89
- re_common/vip/proxy/allproxys.py +127 -127
- re_common/vip/proxy/allproxys_thread.py +159 -159
- re_common/vip/proxy/cnki_proxy.py +153 -153
- re_common/vip/proxy/kuaidaili.py +87 -87
- re_common/vip/proxy/proxy_all.py +113 -113
- re_common/vip/proxy/update_kuaidaili_0.py +42 -42
- re_common/vip/proxy/wanfang_proxy.py +152 -152
- re_common/vip/proxy/wp_proxy_all.py +181 -181
- re_common/vip/read_rawid_to_txt.py +91 -91
- re_common/vip/title/__init__.py +5 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
- re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
- re_common/vip/title/transform/__init__.py +10 -10
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/LICENSE +201 -201
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/METADATA +16 -16
- re_common-10.0.24.dist-info/RECORD +230 -0
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/WHEEL +1 -1
- re_common-10.0.22.dist-info/RECORD +0 -227
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/top_level.txt +0 -0
|
@@ -1,486 +1,486 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import threading
|
|
3
|
-
import time
|
|
4
|
-
from abc import ABC, abstractmethod
|
|
5
|
-
|
|
6
|
-
from re_common.baselibrary.mthread.mythreading import ThreadPoolManger, ThreadInfo, ThreadVal
|
|
7
|
-
from re_common.baselibrary.utils.ringlist import RingList
|
|
8
|
-
from re_common.facade.loggerfacade import get_streamlogger
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class especialThreadName(object):
|
|
12
|
-
|
|
13
|
-
def __init__(self):
|
|
14
|
-
self.taskthreadname = "my_threading_taskthread_1"
|
|
15
|
-
self.proxythreadname = "my_threading_proxythread_1"
|
|
16
|
-
self.dealresultthreadname = "my_threading_dealresult_1"
|
|
17
|
-
|
|
18
|
-
def list_name(self):
|
|
19
|
-
return [self.taskthreadname, self.proxythreadname, self.dealresultthreadname]
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class MThreadingRun(ABC):
|
|
23
|
-
def __init__(self, num, logger=None):
|
|
24
|
-
if logger:
|
|
25
|
-
self.logger = logger
|
|
26
|
-
else:
|
|
27
|
-
self.logger = get_streamlogger()
|
|
28
|
-
self.etn = especialThreadName()
|
|
29
|
-
# 线程数
|
|
30
|
-
self.threadingnum = num
|
|
31
|
-
# 代理列表
|
|
32
|
-
self.list_proxy = RingList()
|
|
33
|
-
# 线程池
|
|
34
|
-
self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
|
|
35
|
-
self.thread_pool.set_callback(self.thread_pool_hook)
|
|
36
|
-
# 结果集
|
|
37
|
-
self.results = []
|
|
38
|
-
# 线程方法
|
|
39
|
-
self.func = self.fun
|
|
40
|
-
# 结果数
|
|
41
|
-
self.resultnum = 0
|
|
42
|
-
# 工作线程数
|
|
43
|
-
self.jobnum = 0
|
|
44
|
-
# 结果集被处理标志 默认被处理 是为了兼容之前的代码不去改动
|
|
45
|
-
self.dealresultstatus = True
|
|
46
|
-
# 結果到达该数量后处理结果 默认及时处理
|
|
47
|
-
self.dealresultsnum = 0
|
|
48
|
-
# 代理设置时间
|
|
49
|
-
self.proxytime = 0
|
|
50
|
-
# 标识event set之后是否对任务进行了设置
|
|
51
|
-
# self.eventbool = False
|
|
52
|
-
self.modle = 1
|
|
53
|
-
# 在任务和处理结果时event信号的状态
|
|
54
|
-
self.result_event_status = True
|
|
55
|
-
self.task_event_status = True
|
|
56
|
-
self.thread_run_lock = threading.Lock()
|
|
57
|
-
|
|
58
|
-
# 默认每次处理的结果数
|
|
59
|
-
self.once_result_num = 100
|
|
60
|
-
|
|
61
|
-
# 全局使用特殊的单词
|
|
62
|
-
self.BREAK = "break"
|
|
63
|
-
|
|
64
|
-
# 进程号
|
|
65
|
-
if hasattr(os, 'getpid'):
|
|
66
|
-
self.pid = os.getpid()
|
|
67
|
-
else:
|
|
68
|
-
self.pid = None
|
|
69
|
-
|
|
70
|
-
def set_is_restart(self, name, is_restart):
|
|
71
|
-
self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
|
|
72
|
-
|
|
73
|
-
@abstractmethod
|
|
74
|
-
def setProxy(self, proxysList=None):
|
|
75
|
-
"""
|
|
76
|
-
将代理加入到循环队列中 self.list_proxy
|
|
77
|
-
:param proxysList:
|
|
78
|
-
:return:
|
|
79
|
-
"""
|
|
80
|
-
pass
|
|
81
|
-
|
|
82
|
-
@abstractmethod
|
|
83
|
-
def fun(self, threadval, *args, **kwargs):
|
|
84
|
-
pass
|
|
85
|
-
|
|
86
|
-
@abstractmethod
|
|
87
|
-
def thread_pool_hook(self, threadinfo: ThreadInfo) -> dict:
|
|
88
|
-
"""
|
|
89
|
-
钩子函数 可以被重写
|
|
90
|
-
主要重写里面的dicts部分
|
|
91
|
-
:return:
|
|
92
|
-
"""
|
|
93
|
-
return {}
|
|
94
|
-
|
|
95
|
-
@abstractmethod
|
|
96
|
-
def setTask(self, results=None, *args, **kwargs):
|
|
97
|
-
# self.thread_pool.event.set() # 自动释放信号
|
|
98
|
-
# 当设置任务和处理结果使用同一个连接时尽量使用event信号保证不同时执行sql在一个执行未返回时
|
|
99
|
-
# 请查询较大数据时主动释放
|
|
100
|
-
pass
|
|
101
|
-
|
|
102
|
-
@abstractmethod
|
|
103
|
-
def getTask(self, *args, **kwargs):
|
|
104
|
-
"""
|
|
105
|
-
主要用于mysql的请求
|
|
106
|
-
:param args:
|
|
107
|
-
:param kwargs:
|
|
108
|
-
:return:
|
|
109
|
-
"""
|
|
110
|
-
pass
|
|
111
|
-
|
|
112
|
-
@abstractmethod
|
|
113
|
-
def dealresult(self, *args, **kwargs):
|
|
114
|
-
"""
|
|
115
|
-
如果没有在这里处理 请将 self.dealresultstatus = False这样不会丢数据
|
|
116
|
-
:param args:
|
|
117
|
-
:param kwargs:
|
|
118
|
-
:return:
|
|
119
|
-
"""
|
|
120
|
-
pass
|
|
121
|
-
|
|
122
|
-
def set_task(self, *args, **kwargs):
|
|
123
|
-
"""
|
|
124
|
-
设置任务,在没有event限制的情况下将两个步骤写在一起
|
|
125
|
-
:param args:
|
|
126
|
-
:param kwargs:
|
|
127
|
-
:return:
|
|
128
|
-
"""
|
|
129
|
-
while True:
|
|
130
|
-
results = self.getTask(*args, **kwargs)
|
|
131
|
-
result = self.setTask(results, *args, **kwargs)
|
|
132
|
-
if result == self.BREAK:
|
|
133
|
-
break
|
|
134
|
-
|
|
135
|
-
def deal_results(self, *args, **kwargs):
|
|
136
|
-
"""
|
|
137
|
-
该函数用于没有信号的机制里
|
|
138
|
-
使用信号主要是因为多线程无法同时使用一个链接
|
|
139
|
-
有可能出现错误
|
|
140
|
-
:param args:
|
|
141
|
-
:param kwargs:
|
|
142
|
-
:return:
|
|
143
|
-
"""
|
|
144
|
-
while True:
|
|
145
|
-
# 处理结果标识
|
|
146
|
-
self.dealresultstatus = True
|
|
147
|
-
# 从结果队列获取结果到results
|
|
148
|
-
self.getreustlFromQueue()
|
|
149
|
-
if len(
|
|
150
|
-
self.results) > self.dealresultsnum or self.thread_pool.work_queue.is_empty() or not self.thread_pool.thread_queue:
|
|
151
|
-
if len(self.results) > 0 or not self.thread_pool.thread_queue.is_empty():
|
|
152
|
-
# 处理结果
|
|
153
|
-
self.dealresult()
|
|
154
|
-
if self.dealresultstatus:
|
|
155
|
-
# 处理完结果后要清理
|
|
156
|
-
self.results.clear()
|
|
157
|
-
else:
|
|
158
|
-
time.sleep(10)
|
|
159
|
-
else:
|
|
160
|
-
time.sleep(3)
|
|
161
|
-
|
|
162
|
-
def setfunc(self, func):
|
|
163
|
-
# 设置线程方法
|
|
164
|
-
self.func = func
|
|
165
|
-
|
|
166
|
-
def add_job(self, func, *args, **kwargs):
|
|
167
|
-
self.jobnum += 1
|
|
168
|
-
self.thread_pool.add_job(func, *args, **kwargs)
|
|
169
|
-
|
|
170
|
-
def getreustlFromQueue(self):
|
|
171
|
-
"""
|
|
172
|
-
从结果队列获取结果到results
|
|
173
|
-
默认取100
|
|
174
|
-
:return:
|
|
175
|
-
"""
|
|
176
|
-
once_result_num = 0
|
|
177
|
-
while not self.thread_pool.result_queue.is_empty():
|
|
178
|
-
self.resultnum += 1
|
|
179
|
-
once_result_num += 1
|
|
180
|
-
if once_result_num > self.once_result_num:
|
|
181
|
-
return
|
|
182
|
-
result = self.thread_pool.result_queue.get()
|
|
183
|
-
self.results.append(result)
|
|
184
|
-
self.thread_pool.result_queue.task_done()
|
|
185
|
-
|
|
186
|
-
def checkResultsfininsh(self, *args, **kwargs):
|
|
187
|
-
"""
|
|
188
|
-
该函数用于处理运行到最后时结果不足100的情况
|
|
189
|
-
:return:
|
|
190
|
-
"""
|
|
191
|
-
if self.thread_pool.work_queue.is_empty():
|
|
192
|
-
t1 = len(self.results)
|
|
193
|
-
self.logger.info("self.results len is %s " % str(t1))
|
|
194
|
-
if t1 != 0:
|
|
195
|
-
return False
|
|
196
|
-
else:
|
|
197
|
-
return True
|
|
198
|
-
|
|
199
|
-
def other(self):
|
|
200
|
-
self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
|
|
201
|
-
self.thread_pool.result_queue.get_size()))
|
|
202
|
-
# 通过工作队列和结果队列观察是否结束
|
|
203
|
-
if self.thread_pool.checkThreadRunFinish():
|
|
204
|
-
self.logger.info("初次判断任务已经结束,各个队列为空")
|
|
205
|
-
return True
|
|
206
|
-
else:
|
|
207
|
-
return False
|
|
208
|
-
|
|
209
|
-
def check_especial_thread(self):
|
|
210
|
-
task = self.set_task
|
|
211
|
-
proxy = self.setProxy
|
|
212
|
-
result = self.deal_results
|
|
213
|
-
|
|
214
|
-
nowThreadsName = self.thread_pool.get_now_thread()
|
|
215
|
-
for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
|
|
216
|
-
thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
|
|
217
|
-
# 如果线程字典为空 代表已被删除
|
|
218
|
-
if name in nowThreadsName and thread.is_alive():
|
|
219
|
-
# print(name + ": is run")
|
|
220
|
-
pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
|
|
221
|
-
else:
|
|
222
|
-
self.logger.info("name is :" + name + "; 没有在线程中")
|
|
223
|
-
if name in self.etn.list_name():
|
|
224
|
-
if name == self.etn.taskthreadname:
|
|
225
|
-
taskin = task
|
|
226
|
-
elif name == self.etn.proxythreadname:
|
|
227
|
-
taskin = proxy
|
|
228
|
-
elif name == self.etn.dealresultthreadname:
|
|
229
|
-
taskin = result
|
|
230
|
-
else:
|
|
231
|
-
raise Exception("没有对应的任务,请检查")
|
|
232
|
-
is_start = False
|
|
233
|
-
if name in self.thread_pool.especial_thread_pool_dicts:
|
|
234
|
-
threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
|
|
235
|
-
if threadinfo.get_thread().is_alive():
|
|
236
|
-
is_start = True
|
|
237
|
-
if not threadinfo.get_is_restart():
|
|
238
|
-
is_start = True
|
|
239
|
-
if not is_start:
|
|
240
|
-
self.logger.info('重启中:' + name)
|
|
241
|
-
args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
|
|
242
|
-
kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
|
|
243
|
-
thread = self.thread_pool.set_add_especial_thread(taskin, name, *args, **kwargs)
|
|
244
|
-
thread.start()
|
|
245
|
-
|
|
246
|
-
def start_especial_thread(self):
|
|
247
|
-
# 开启一个线程设置任务
|
|
248
|
-
self.thread_pool.set_add_especial_thread(self.set_task, self.etn.taskthreadname)
|
|
249
|
-
self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname)
|
|
250
|
-
self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname)
|
|
251
|
-
self.thread_pool.especial_start()
|
|
252
|
-
|
|
253
|
-
def is_break(self):
|
|
254
|
-
return False
|
|
255
|
-
|
|
256
|
-
def run(self):
|
|
257
|
-
self.start_especial_thread()
|
|
258
|
-
while True:
|
|
259
|
-
time.sleep(3)
|
|
260
|
-
self.thread_pool.checkThread()
|
|
261
|
-
self.check_especial_thread()
|
|
262
|
-
if self.other():
|
|
263
|
-
if not self.checkResultsfininsh():
|
|
264
|
-
continue
|
|
265
|
-
else:
|
|
266
|
-
print("进入other 判断 再次确认finish")
|
|
267
|
-
if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty() \
|
|
268
|
-
and len(self.results) == 0:
|
|
269
|
-
print("运行完毕")
|
|
270
|
-
if self.is_break():
|
|
271
|
-
print("10 s break")
|
|
272
|
-
time.sleep(10)
|
|
273
|
-
break
|
|
274
|
-
else:
|
|
275
|
-
time.sleep(60)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
class MThreadingRun2(ABC):
|
|
279
|
-
"""
|
|
280
|
-
多线程的封装
|
|
281
|
-
"""
|
|
282
|
-
|
|
283
|
-
def __init__(self, num, logger=None):
|
|
284
|
-
# 日志模块,如果无会自定义一个只打印不输入到文件的logger
|
|
285
|
-
if logger:
|
|
286
|
-
self.logger = logger
|
|
287
|
-
else:
|
|
288
|
-
self.logger = get_streamlogger()
|
|
289
|
-
# 特殊线程的类,对名称进行了统一
|
|
290
|
-
self.etn = especialThreadName()
|
|
291
|
-
# 工作线程数,不包含特殊线程
|
|
292
|
-
self.threadingnum = num
|
|
293
|
-
# 代理列表 RingList是一个环形列表
|
|
294
|
-
self.list_proxy = RingList()
|
|
295
|
-
# 工作线程池
|
|
296
|
-
self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
|
|
297
|
-
# 钩子函数,将thread_pool的钩子函数暴露到这一层,方便使用
|
|
298
|
-
self.thread_pool.set_callback(self.thread_pool_hook)
|
|
299
|
-
# 线程方法
|
|
300
|
-
self.func = self.fun
|
|
301
|
-
# 工作量的累计,每一次addjob都会累计一个数
|
|
302
|
-
self.jobnum = 0
|
|
303
|
-
# 代理设置时间
|
|
304
|
-
# self.proxytime = 0
|
|
305
|
-
|
|
306
|
-
# self.modle = 1
|
|
307
|
-
# 线程锁,外部使用
|
|
308
|
-
self.thread_run_lock = threading.Lock()
|
|
309
|
-
|
|
310
|
-
# 全局使用特殊的单词
|
|
311
|
-
self.BREAK = "break"
|
|
312
|
-
|
|
313
|
-
# 进程号
|
|
314
|
-
if hasattr(os, 'getpid'):
|
|
315
|
-
self.pid = os.getpid()
|
|
316
|
-
else:
|
|
317
|
-
self.pid = None
|
|
318
|
-
|
|
319
|
-
def set_is_restart(self, name, is_restart):
|
|
320
|
-
"""
|
|
321
|
-
特殊线程的重启设置,默认都会重启,设置False不允许重启
|
|
322
|
-
:param name:
|
|
323
|
-
:param is_restart:
|
|
324
|
-
:return:
|
|
325
|
-
"""
|
|
326
|
-
self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
|
|
327
|
-
|
|
328
|
-
@abstractmethod
|
|
329
|
-
def setProxy(self, threadval: ThreadVal, proxysList=None):
|
|
330
|
-
"""
|
|
331
|
-
将代理加入到循环队列中 self.list_proxy,或者自己管理代理
|
|
332
|
-
:param proxysList:
|
|
333
|
-
:return:
|
|
334
|
-
"""
|
|
335
|
-
pass
|
|
336
|
-
|
|
337
|
-
@abstractmethod
|
|
338
|
-
def fun(self, threadval, *args, **kwargs):
|
|
339
|
-
"""
|
|
340
|
-
运行的方法
|
|
341
|
-
:param threadval:
|
|
342
|
-
:param args:
|
|
343
|
-
:param kwargs:
|
|
344
|
-
:return:
|
|
345
|
-
"""
|
|
346
|
-
pass
|
|
347
|
-
|
|
348
|
-
@abstractmethod
|
|
349
|
-
def thread_pool_hook(self, threadinfo: ThreadInfo) -> dict:
|
|
350
|
-
"""
|
|
351
|
-
钩子函数 可以被重写
|
|
352
|
-
主要重写里面的ThreadInfo的信息
|
|
353
|
-
:return:
|
|
354
|
-
"""
|
|
355
|
-
return {}
|
|
356
|
-
|
|
357
|
-
@abstractmethod
|
|
358
|
-
def set_task(self, threadval: ThreadVal, *args, **kwargs):
|
|
359
|
-
"""
|
|
360
|
-
设置任务,这里调用addjob
|
|
361
|
-
:param args:
|
|
362
|
-
:param kwargs:
|
|
363
|
-
:return:
|
|
364
|
-
"""
|
|
365
|
-
pass
|
|
366
|
-
|
|
367
|
-
@abstractmethod
|
|
368
|
-
def deal_results(self, threadval: ThreadVal, *args, **kwargs):
|
|
369
|
-
"""
|
|
370
|
-
处理结果
|
|
371
|
-
result_queue = self.thread_pool.result_queue
|
|
372
|
-
:param args:
|
|
373
|
-
:param kwargs:
|
|
374
|
-
:return:
|
|
375
|
-
"""
|
|
376
|
-
pass
|
|
377
|
-
|
|
378
|
-
def setfunc(self, func):
|
|
379
|
-
# 设置线程方法,默认是self.fun
|
|
380
|
-
self.func = func
|
|
381
|
-
|
|
382
|
-
def add_job(self, func, *args, **kwargs):
|
|
383
|
-
"""
|
|
384
|
-
:param func: 运行的方法,一般使用self.func
|
|
385
|
-
:param args: 元组参数
|
|
386
|
-
:param kwargs: 键值对参数
|
|
387
|
-
"""
|
|
388
|
-
self.jobnum += 1
|
|
389
|
-
self.thread_pool.add_job(func, *args, **kwargs)
|
|
390
|
-
|
|
391
|
-
def other(self):
|
|
392
|
-
"""
|
|
393
|
-
其他的一些判断
|
|
394
|
-
:return:
|
|
395
|
-
"""
|
|
396
|
-
self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
|
|
397
|
-
self.thread_pool.result_queue.get_size()))
|
|
398
|
-
# 通过工作队列和结果队列观察是否结束
|
|
399
|
-
if self.thread_pool.checkThreadRunFinish():
|
|
400
|
-
self.logger.info("初次判断任务已经结束,各个队列为空")
|
|
401
|
-
return True
|
|
402
|
-
else:
|
|
403
|
-
return False
|
|
404
|
-
|
|
405
|
-
def check_especial_thread(self):
|
|
406
|
-
"""
|
|
407
|
-
检查特殊线程是否挂掉
|
|
408
|
-
:return:
|
|
409
|
-
"""
|
|
410
|
-
task = self.set_task
|
|
411
|
-
proxy = self.setProxy
|
|
412
|
-
result = self.deal_results
|
|
413
|
-
|
|
414
|
-
nowThreadsName = self.thread_pool.get_now_thread()
|
|
415
|
-
for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
|
|
416
|
-
thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
|
|
417
|
-
# 如果线程字典为空 代表已被删除
|
|
418
|
-
if name in nowThreadsName and thread.is_alive():
|
|
419
|
-
# print(name + ": is run")
|
|
420
|
-
pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
|
|
421
|
-
else:
|
|
422
|
-
self.logger.info("name is :" + name + "; 没有在线程中")
|
|
423
|
-
if name in self.etn.list_name():
|
|
424
|
-
if name == self.etn.taskthreadname:
|
|
425
|
-
taskin = task
|
|
426
|
-
elif name == self.etn.proxythreadname:
|
|
427
|
-
taskin = proxy
|
|
428
|
-
elif name == self.etn.dealresultthreadname:
|
|
429
|
-
taskin = result
|
|
430
|
-
else:
|
|
431
|
-
raise Exception("没有对应的任务,请检查")
|
|
432
|
-
is_start = False
|
|
433
|
-
if name in self.thread_pool.especial_thread_pool_dicts:
|
|
434
|
-
threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
|
|
435
|
-
if threadinfo.get_thread().is_alive():
|
|
436
|
-
is_start = True
|
|
437
|
-
if not threadinfo.get_is_restart():
|
|
438
|
-
is_start = True
|
|
439
|
-
if not is_start:
|
|
440
|
-
self.logger.info('重启中:' + name)
|
|
441
|
-
args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
|
|
442
|
-
kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
|
|
443
|
-
thread = self.thread_pool.set_add_especial_thread(taskin, name, mode="mysuper", *args, **kwargs)
|
|
444
|
-
thread.start()
|
|
445
|
-
|
|
446
|
-
def start_especial_thread(self):
|
|
447
|
-
# 开启特殊线程设置任务
|
|
448
|
-
self.thread_pool.set_add_especial_thread(self.set_task, self.etn.taskthreadname, mode="mysuper")
|
|
449
|
-
self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname, mode="mysuper")
|
|
450
|
-
self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname, mode="mysuper")
|
|
451
|
-
self.thread_pool.especial_start()
|
|
452
|
-
|
|
453
|
-
def is_break(self):
|
|
454
|
-
"""
|
|
455
|
-
运行完毕是否退出多线程
|
|
456
|
-
:return:
|
|
457
|
-
"""
|
|
458
|
-
return False
|
|
459
|
-
|
|
460
|
-
# 在主线程添加的钩子函数,每次循环都会调用到
|
|
461
|
-
def run_hook_everytime(self):
|
|
462
|
-
time.sleep(3)
|
|
463
|
-
|
|
464
|
-
# 运行完毕的钩子函数
|
|
465
|
-
def run_hook_result(self):
|
|
466
|
-
pass
|
|
467
|
-
|
|
468
|
-
def run(self):
|
|
469
|
-
"""
|
|
470
|
-
主进程代码,线程设置为主进程结束所有线程都会结束
|
|
471
|
-
所以任务完成之前主进程不应该结束
|
|
472
|
-
:return:
|
|
473
|
-
"""
|
|
474
|
-
self.start_especial_thread()
|
|
475
|
-
while True:
|
|
476
|
-
self.run_hook_everytime()
|
|
477
|
-
self.thread_pool.checkThread()
|
|
478
|
-
self.check_especial_thread()
|
|
479
|
-
if self.other():
|
|
480
|
-
print("进入other 判断 再次确认finish")
|
|
481
|
-
if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty():
|
|
482
|
-
print("运行完毕")
|
|
483
|
-
self.run_hook_result()
|
|
484
|
-
if self.is_break():
|
|
485
|
-
print("退出主循环")
|
|
486
|
-
break
|
|
1
|
+
import os
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
|
|
6
|
+
from re_common.baselibrary.mthread.mythreading import ThreadPoolManger, ThreadInfo, ThreadVal
|
|
7
|
+
from re_common.baselibrary.utils.ringlist import RingList
|
|
8
|
+
from re_common.facade.loggerfacade import get_streamlogger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class especialThreadName(object):
|
|
12
|
+
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self.taskthreadname = "my_threading_taskthread_1"
|
|
15
|
+
self.proxythreadname = "my_threading_proxythread_1"
|
|
16
|
+
self.dealresultthreadname = "my_threading_dealresult_1"
|
|
17
|
+
|
|
18
|
+
def list_name(self):
|
|
19
|
+
return [self.taskthreadname, self.proxythreadname, self.dealresultthreadname]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MThreadingRun(ABC):
|
|
23
|
+
def __init__(self, num, logger=None):
|
|
24
|
+
if logger:
|
|
25
|
+
self.logger = logger
|
|
26
|
+
else:
|
|
27
|
+
self.logger = get_streamlogger()
|
|
28
|
+
self.etn = especialThreadName()
|
|
29
|
+
# 线程数
|
|
30
|
+
self.threadingnum = num
|
|
31
|
+
# 代理列表
|
|
32
|
+
self.list_proxy = RingList()
|
|
33
|
+
# 线程池
|
|
34
|
+
self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
|
|
35
|
+
self.thread_pool.set_callback(self.thread_pool_hook)
|
|
36
|
+
# 结果集
|
|
37
|
+
self.results = []
|
|
38
|
+
# 线程方法
|
|
39
|
+
self.func = self.fun
|
|
40
|
+
# 结果数
|
|
41
|
+
self.resultnum = 0
|
|
42
|
+
# 工作线程数
|
|
43
|
+
self.jobnum = 0
|
|
44
|
+
# 结果集被处理标志 默认被处理 是为了兼容之前的代码不去改动
|
|
45
|
+
self.dealresultstatus = True
|
|
46
|
+
# 結果到达该数量后处理结果 默认及时处理
|
|
47
|
+
self.dealresultsnum = 0
|
|
48
|
+
# 代理设置时间
|
|
49
|
+
self.proxytime = 0
|
|
50
|
+
# 标识event set之后是否对任务进行了设置
|
|
51
|
+
# self.eventbool = False
|
|
52
|
+
self.modle = 1
|
|
53
|
+
# 在任务和处理结果时event信号的状态
|
|
54
|
+
self.result_event_status = True
|
|
55
|
+
self.task_event_status = True
|
|
56
|
+
self.thread_run_lock = threading.Lock()
|
|
57
|
+
|
|
58
|
+
# 默认每次处理的结果数
|
|
59
|
+
self.once_result_num = 100
|
|
60
|
+
|
|
61
|
+
# 全局使用特殊的单词
|
|
62
|
+
self.BREAK = "break"
|
|
63
|
+
|
|
64
|
+
# 进程号
|
|
65
|
+
if hasattr(os, 'getpid'):
|
|
66
|
+
self.pid = os.getpid()
|
|
67
|
+
else:
|
|
68
|
+
self.pid = None
|
|
69
|
+
|
|
70
|
+
def set_is_restart(self, name, is_restart):
|
|
71
|
+
self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
|
|
72
|
+
|
|
73
|
+
@abstractmethod
|
|
74
|
+
def setProxy(self, proxysList=None):
|
|
75
|
+
"""
|
|
76
|
+
将代理加入到循环队列中 self.list_proxy
|
|
77
|
+
:param proxysList:
|
|
78
|
+
:return:
|
|
79
|
+
"""
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
@abstractmethod
|
|
83
|
+
def fun(self, threadval, *args, **kwargs):
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
def thread_pool_hook(self, threadinfo: ThreadInfo) -> dict:
|
|
88
|
+
"""
|
|
89
|
+
钩子函数 可以被重写
|
|
90
|
+
主要重写里面的dicts部分
|
|
91
|
+
:return:
|
|
92
|
+
"""
|
|
93
|
+
return {}
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def setTask(self, results=None, *args, **kwargs):
|
|
97
|
+
# self.thread_pool.event.set() # 自动释放信号
|
|
98
|
+
# 当设置任务和处理结果使用同一个连接时尽量使用event信号保证不同时执行sql在一个执行未返回时
|
|
99
|
+
# 请查询较大数据时主动释放
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
@abstractmethod
|
|
103
|
+
def getTask(self, *args, **kwargs):
|
|
104
|
+
"""
|
|
105
|
+
主要用于mysql的请求
|
|
106
|
+
:param args:
|
|
107
|
+
:param kwargs:
|
|
108
|
+
:return:
|
|
109
|
+
"""
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def dealresult(self, *args, **kwargs):
|
|
114
|
+
"""
|
|
115
|
+
如果没有在这里处理 请将 self.dealresultstatus = False这样不会丢数据
|
|
116
|
+
:param args:
|
|
117
|
+
:param kwargs:
|
|
118
|
+
:return:
|
|
119
|
+
"""
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
def set_task(self, *args, **kwargs):
|
|
123
|
+
"""
|
|
124
|
+
设置任务,在没有event限制的情况下将两个步骤写在一起
|
|
125
|
+
:param args:
|
|
126
|
+
:param kwargs:
|
|
127
|
+
:return:
|
|
128
|
+
"""
|
|
129
|
+
while True:
|
|
130
|
+
results = self.getTask(*args, **kwargs)
|
|
131
|
+
result = self.setTask(results, *args, **kwargs)
|
|
132
|
+
if result == self.BREAK:
|
|
133
|
+
break
|
|
134
|
+
|
|
135
|
+
def deal_results(self, *args, **kwargs):
|
|
136
|
+
"""
|
|
137
|
+
该函数用于没有信号的机制里
|
|
138
|
+
使用信号主要是因为多线程无法同时使用一个链接
|
|
139
|
+
有可能出现错误
|
|
140
|
+
:param args:
|
|
141
|
+
:param kwargs:
|
|
142
|
+
:return:
|
|
143
|
+
"""
|
|
144
|
+
while True:
|
|
145
|
+
# 处理结果标识
|
|
146
|
+
self.dealresultstatus = True
|
|
147
|
+
# 从结果队列获取结果到results
|
|
148
|
+
self.getreustlFromQueue()
|
|
149
|
+
if len(
|
|
150
|
+
self.results) > self.dealresultsnum or self.thread_pool.work_queue.is_empty() or not self.thread_pool.thread_queue:
|
|
151
|
+
if len(self.results) > 0 or not self.thread_pool.thread_queue.is_empty():
|
|
152
|
+
# 处理结果
|
|
153
|
+
self.dealresult()
|
|
154
|
+
if self.dealresultstatus:
|
|
155
|
+
# 处理完结果后要清理
|
|
156
|
+
self.results.clear()
|
|
157
|
+
else:
|
|
158
|
+
time.sleep(10)
|
|
159
|
+
else:
|
|
160
|
+
time.sleep(3)
|
|
161
|
+
|
|
162
|
+
def setfunc(self, func):
|
|
163
|
+
# 设置线程方法
|
|
164
|
+
self.func = func
|
|
165
|
+
|
|
166
|
+
def add_job(self, func, *args, **kwargs):
|
|
167
|
+
self.jobnum += 1
|
|
168
|
+
self.thread_pool.add_job(func, *args, **kwargs)
|
|
169
|
+
|
|
170
|
+
def getreustlFromQueue(self):
|
|
171
|
+
"""
|
|
172
|
+
从结果队列获取结果到results
|
|
173
|
+
默认取100
|
|
174
|
+
:return:
|
|
175
|
+
"""
|
|
176
|
+
once_result_num = 0
|
|
177
|
+
while not self.thread_pool.result_queue.is_empty():
|
|
178
|
+
self.resultnum += 1
|
|
179
|
+
once_result_num += 1
|
|
180
|
+
if once_result_num > self.once_result_num:
|
|
181
|
+
return
|
|
182
|
+
result = self.thread_pool.result_queue.get()
|
|
183
|
+
self.results.append(result)
|
|
184
|
+
self.thread_pool.result_queue.task_done()
|
|
185
|
+
|
|
186
|
+
def checkResultsfininsh(self, *args, **kwargs):
|
|
187
|
+
"""
|
|
188
|
+
该函数用于处理运行到最后时结果不足100的情况
|
|
189
|
+
:return:
|
|
190
|
+
"""
|
|
191
|
+
if self.thread_pool.work_queue.is_empty():
|
|
192
|
+
t1 = len(self.results)
|
|
193
|
+
self.logger.info("self.results len is %s " % str(t1))
|
|
194
|
+
if t1 != 0:
|
|
195
|
+
return False
|
|
196
|
+
else:
|
|
197
|
+
return True
|
|
198
|
+
|
|
199
|
+
def other(self):
|
|
200
|
+
self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
|
|
201
|
+
self.thread_pool.result_queue.get_size()))
|
|
202
|
+
# 通过工作队列和结果队列观察是否结束
|
|
203
|
+
if self.thread_pool.checkThreadRunFinish():
|
|
204
|
+
self.logger.info("初次判断任务已经结束,各个队列为空")
|
|
205
|
+
return True
|
|
206
|
+
else:
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
def check_especial_thread(self):
|
|
210
|
+
task = self.set_task
|
|
211
|
+
proxy = self.setProxy
|
|
212
|
+
result = self.deal_results
|
|
213
|
+
|
|
214
|
+
nowThreadsName = self.thread_pool.get_now_thread()
|
|
215
|
+
for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
|
|
216
|
+
thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
|
|
217
|
+
# 如果线程字典为空 代表已被删除
|
|
218
|
+
if name in nowThreadsName and thread.is_alive():
|
|
219
|
+
# print(name + ": is run")
|
|
220
|
+
pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
|
|
221
|
+
else:
|
|
222
|
+
self.logger.info("name is :" + name + "; 没有在线程中")
|
|
223
|
+
if name in self.etn.list_name():
|
|
224
|
+
if name == self.etn.taskthreadname:
|
|
225
|
+
taskin = task
|
|
226
|
+
elif name == self.etn.proxythreadname:
|
|
227
|
+
taskin = proxy
|
|
228
|
+
elif name == self.etn.dealresultthreadname:
|
|
229
|
+
taskin = result
|
|
230
|
+
else:
|
|
231
|
+
raise Exception("没有对应的任务,请检查")
|
|
232
|
+
is_start = False
|
|
233
|
+
if name in self.thread_pool.especial_thread_pool_dicts:
|
|
234
|
+
threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
|
|
235
|
+
if threadinfo.get_thread().is_alive():
|
|
236
|
+
is_start = True
|
|
237
|
+
if not threadinfo.get_is_restart():
|
|
238
|
+
is_start = True
|
|
239
|
+
if not is_start:
|
|
240
|
+
self.logger.info('重启中:' + name)
|
|
241
|
+
args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
|
|
242
|
+
kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
|
|
243
|
+
thread = self.thread_pool.set_add_especial_thread(taskin, name, *args, **kwargs)
|
|
244
|
+
thread.start()
|
|
245
|
+
|
|
246
|
+
def start_especial_thread(self):
|
|
247
|
+
# 开启一个线程设置任务
|
|
248
|
+
self.thread_pool.set_add_especial_thread(self.set_task, self.etn.taskthreadname)
|
|
249
|
+
self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname)
|
|
250
|
+
self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname)
|
|
251
|
+
self.thread_pool.especial_start()
|
|
252
|
+
|
|
253
|
+
def is_break(self):
|
|
254
|
+
return False
|
|
255
|
+
|
|
256
|
+
def run(self):
|
|
257
|
+
self.start_especial_thread()
|
|
258
|
+
while True:
|
|
259
|
+
time.sleep(3)
|
|
260
|
+
self.thread_pool.checkThread()
|
|
261
|
+
self.check_especial_thread()
|
|
262
|
+
if self.other():
|
|
263
|
+
if not self.checkResultsfininsh():
|
|
264
|
+
continue
|
|
265
|
+
else:
|
|
266
|
+
print("进入other 判断 再次确认finish")
|
|
267
|
+
if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty() \
|
|
268
|
+
and len(self.results) == 0:
|
|
269
|
+
print("运行完毕")
|
|
270
|
+
if self.is_break():
|
|
271
|
+
print("10 s break")
|
|
272
|
+
time.sleep(10)
|
|
273
|
+
break
|
|
274
|
+
else:
|
|
275
|
+
time.sleep(60)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class MThreadingRun2(ABC):
|
|
279
|
+
"""
|
|
280
|
+
多线程的封装
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
def __init__(self, num, logger=None):
|
|
284
|
+
# 日志模块,如果无会自定义一个只打印不输入到文件的logger
|
|
285
|
+
if logger:
|
|
286
|
+
self.logger = logger
|
|
287
|
+
else:
|
|
288
|
+
self.logger = get_streamlogger()
|
|
289
|
+
# 特殊线程的类,对名称进行了统一
|
|
290
|
+
self.etn = especialThreadName()
|
|
291
|
+
# 工作线程数,不包含特殊线程
|
|
292
|
+
self.threadingnum = num
|
|
293
|
+
# 代理列表 RingList是一个环形列表
|
|
294
|
+
self.list_proxy = RingList()
|
|
295
|
+
# 工作线程池
|
|
296
|
+
self.thread_pool = ThreadPoolManger(self.threadingnum, self.logger)
|
|
297
|
+
# 钩子函数,将thread_pool的钩子函数暴露到这一层,方便使用
|
|
298
|
+
self.thread_pool.set_callback(self.thread_pool_hook)
|
|
299
|
+
# 线程方法
|
|
300
|
+
self.func = self.fun
|
|
301
|
+
# 工作量的累计,每一次addjob都会累计一个数
|
|
302
|
+
self.jobnum = 0
|
|
303
|
+
# 代理设置时间
|
|
304
|
+
# self.proxytime = 0
|
|
305
|
+
|
|
306
|
+
# self.modle = 1
|
|
307
|
+
# 线程锁,外部使用
|
|
308
|
+
self.thread_run_lock = threading.Lock()
|
|
309
|
+
|
|
310
|
+
# 全局使用特殊的单词
|
|
311
|
+
self.BREAK = "break"
|
|
312
|
+
|
|
313
|
+
# 进程号
|
|
314
|
+
if hasattr(os, 'getpid'):
|
|
315
|
+
self.pid = os.getpid()
|
|
316
|
+
else:
|
|
317
|
+
self.pid = None
|
|
318
|
+
|
|
319
|
+
def set_is_restart(self, name, is_restart):
|
|
320
|
+
"""
|
|
321
|
+
特殊线程的重启设置,默认都会重启,设置False不允许重启
|
|
322
|
+
:param name:
|
|
323
|
+
:param is_restart:
|
|
324
|
+
:return:
|
|
325
|
+
"""
|
|
326
|
+
self.thread_pool.especial_thread_pool_dicts[name].set_is_restart(is_restart)
|
|
327
|
+
|
|
328
|
+
@abstractmethod
|
|
329
|
+
def setProxy(self, threadval: ThreadVal, proxysList=None):
|
|
330
|
+
"""
|
|
331
|
+
将代理加入到循环队列中 self.list_proxy,或者自己管理代理
|
|
332
|
+
:param proxysList:
|
|
333
|
+
:return:
|
|
334
|
+
"""
|
|
335
|
+
pass
|
|
336
|
+
|
|
337
|
+
@abstractmethod
|
|
338
|
+
def fun(self, threadval, *args, **kwargs):
|
|
339
|
+
"""
|
|
340
|
+
运行的方法
|
|
341
|
+
:param threadval:
|
|
342
|
+
:param args:
|
|
343
|
+
:param kwargs:
|
|
344
|
+
:return:
|
|
345
|
+
"""
|
|
346
|
+
pass
|
|
347
|
+
|
|
348
|
+
@abstractmethod
|
|
349
|
+
def thread_pool_hook(self, threadinfo: ThreadInfo) -> dict:
|
|
350
|
+
"""
|
|
351
|
+
钩子函数 可以被重写
|
|
352
|
+
主要重写里面的ThreadInfo的信息
|
|
353
|
+
:return:
|
|
354
|
+
"""
|
|
355
|
+
return {}
|
|
356
|
+
|
|
357
|
+
@abstractmethod
|
|
358
|
+
def set_task(self, threadval: ThreadVal, *args, **kwargs):
|
|
359
|
+
"""
|
|
360
|
+
设置任务,这里调用addjob
|
|
361
|
+
:param args:
|
|
362
|
+
:param kwargs:
|
|
363
|
+
:return:
|
|
364
|
+
"""
|
|
365
|
+
pass
|
|
366
|
+
|
|
367
|
+
@abstractmethod
|
|
368
|
+
def deal_results(self, threadval: ThreadVal, *args, **kwargs):
|
|
369
|
+
"""
|
|
370
|
+
处理结果
|
|
371
|
+
result_queue = self.thread_pool.result_queue
|
|
372
|
+
:param args:
|
|
373
|
+
:param kwargs:
|
|
374
|
+
:return:
|
|
375
|
+
"""
|
|
376
|
+
pass
|
|
377
|
+
|
|
378
|
+
def setfunc(self, func):
|
|
379
|
+
# 设置线程方法,默认是self.fun
|
|
380
|
+
self.func = func
|
|
381
|
+
|
|
382
|
+
def add_job(self, func, *args, **kwargs):
|
|
383
|
+
"""
|
|
384
|
+
:param func: 运行的方法,一般使用self.func
|
|
385
|
+
:param args: 元组参数
|
|
386
|
+
:param kwargs: 键值对参数
|
|
387
|
+
"""
|
|
388
|
+
self.jobnum += 1
|
|
389
|
+
self.thread_pool.add_job(func, *args, **kwargs)
|
|
390
|
+
|
|
391
|
+
def other(self):
|
|
392
|
+
"""
|
|
393
|
+
其他的一些判断
|
|
394
|
+
:return:
|
|
395
|
+
"""
|
|
396
|
+
self.logger.info("工作队列任务量为{},结果队列任务量为{}".format(self.thread_pool.work_queue.get_size(),
|
|
397
|
+
self.thread_pool.result_queue.get_size()))
|
|
398
|
+
# 通过工作队列和结果队列观察是否结束
|
|
399
|
+
if self.thread_pool.checkThreadRunFinish():
|
|
400
|
+
self.logger.info("初次判断任务已经结束,各个队列为空")
|
|
401
|
+
return True
|
|
402
|
+
else:
|
|
403
|
+
return False
|
|
404
|
+
|
|
405
|
+
def check_especial_thread(self):
|
|
406
|
+
"""
|
|
407
|
+
检查特殊线程是否挂掉
|
|
408
|
+
:return:
|
|
409
|
+
"""
|
|
410
|
+
task = self.set_task
|
|
411
|
+
proxy = self.setProxy
|
|
412
|
+
result = self.deal_results
|
|
413
|
+
|
|
414
|
+
nowThreadsName = self.thread_pool.get_now_thread()
|
|
415
|
+
for name in list(self.thread_pool.especial_thread_pool_dicts.keys()):
|
|
416
|
+
thread = self.thread_pool.especial_thread_pool_dicts[name].get_thread()
|
|
417
|
+
# 如果线程字典为空 代表已被删除
|
|
418
|
+
if name in nowThreadsName and thread.is_alive():
|
|
419
|
+
# print(name + ": is run")
|
|
420
|
+
pass # 当前某线程名包含在初始化线程组中,可以认为线程仍在运行
|
|
421
|
+
else:
|
|
422
|
+
self.logger.info("name is :" + name + "; 没有在线程中")
|
|
423
|
+
if name in self.etn.list_name():
|
|
424
|
+
if name == self.etn.taskthreadname:
|
|
425
|
+
taskin = task
|
|
426
|
+
elif name == self.etn.proxythreadname:
|
|
427
|
+
taskin = proxy
|
|
428
|
+
elif name == self.etn.dealresultthreadname:
|
|
429
|
+
taskin = result
|
|
430
|
+
else:
|
|
431
|
+
raise Exception("没有对应的任务,请检查")
|
|
432
|
+
is_start = False
|
|
433
|
+
if name in self.thread_pool.especial_thread_pool_dicts:
|
|
434
|
+
threadinfo = self.thread_pool.especial_thread_pool_dicts[name]
|
|
435
|
+
if threadinfo.get_thread().is_alive():
|
|
436
|
+
is_start = True
|
|
437
|
+
if not threadinfo.get_is_restart():
|
|
438
|
+
is_start = True
|
|
439
|
+
if not is_start:
|
|
440
|
+
self.logger.info('重启中:' + name)
|
|
441
|
+
args = self.thread_pool.especial_thread_pool_dicts[name].get_args()
|
|
442
|
+
kwargs = self.thread_pool.especial_thread_pool_dicts[name].get_kwargs()
|
|
443
|
+
thread = self.thread_pool.set_add_especial_thread(taskin, name, mode="mysuper", *args, **kwargs)
|
|
444
|
+
thread.start()
|
|
445
|
+
|
|
446
|
+
def start_especial_thread(self):
|
|
447
|
+
# 开启特殊线程设置任务
|
|
448
|
+
self.thread_pool.set_add_especial_thread(self.set_task, self.etn.taskthreadname, mode="mysuper")
|
|
449
|
+
self.thread_pool.set_add_especial_thread(self.setProxy, self.etn.proxythreadname, mode="mysuper")
|
|
450
|
+
self.thread_pool.set_add_especial_thread(self.deal_results, self.etn.dealresultthreadname, mode="mysuper")
|
|
451
|
+
self.thread_pool.especial_start()
|
|
452
|
+
|
|
453
|
+
def is_break(self):
|
|
454
|
+
"""
|
|
455
|
+
运行完毕是否退出多线程
|
|
456
|
+
:return:
|
|
457
|
+
"""
|
|
458
|
+
return False
|
|
459
|
+
|
|
460
|
+
# 在主线程添加的钩子函数,每次循环都会调用到
|
|
461
|
+
def run_hook_everytime(self):
|
|
462
|
+
time.sleep(3)
|
|
463
|
+
|
|
464
|
+
# 运行完毕的钩子函数
|
|
465
|
+
def run_hook_result(self):
|
|
466
|
+
pass
|
|
467
|
+
|
|
468
|
+
def run(self):
|
|
469
|
+
"""
|
|
470
|
+
主进程代码,线程设置为主进程结束所有线程都会结束
|
|
471
|
+
所以任务完成之前主进程不应该结束
|
|
472
|
+
:return:
|
|
473
|
+
"""
|
|
474
|
+
self.start_especial_thread()
|
|
475
|
+
while True:
|
|
476
|
+
self.run_hook_everytime()
|
|
477
|
+
self.thread_pool.checkThread()
|
|
478
|
+
self.check_especial_thread()
|
|
479
|
+
if self.other():
|
|
480
|
+
print("进入other 判断 再次确认finish")
|
|
481
|
+
if self.thread_pool.work_queue.is_empty() and self.thread_pool.result_queue.is_empty():
|
|
482
|
+
print("运行完毕")
|
|
483
|
+
self.run_hook_result()
|
|
484
|
+
if self.is_break():
|
|
485
|
+
print("退出主循环")
|
|
486
|
+
break
|