re-common 10.0.22__py3-none-any.whl → 10.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/baselibrary/__init__.py +4 -4
- re_common/baselibrary/baseabs/__init__.py +6 -6
- re_common/baselibrary/baseabs/baseabs.py +26 -26
- re_common/baselibrary/database/mbuilder.py +132 -132
- re_common/baselibrary/database/moudle.py +93 -93
- re_common/baselibrary/database/msqlite3.py +194 -194
- re_common/baselibrary/database/mysql.py +169 -169
- re_common/baselibrary/database/sql_factory.py +26 -26
- re_common/baselibrary/mthread/MThreadingRun.py +486 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
- re_common/baselibrary/mthread/__init__.py +2 -2
- re_common/baselibrary/mthread/mythreading.py +695 -695
- re_common/baselibrary/pakge_other/socks.py +404 -404
- re_common/baselibrary/readconfig/config_factory.py +18 -18
- re_common/baselibrary/readconfig/ini_config.py +317 -317
- re_common/baselibrary/readconfig/toml_config.py +49 -49
- re_common/baselibrary/temporary/envdata.py +36 -36
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
- re_common/baselibrary/tools/contrast_db3.py +123 -123
- re_common/baselibrary/tools/copy_file.py +39 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
- re_common/baselibrary/tools/foreachgz.py +39 -39
- re_common/baselibrary/tools/get_attr.py +10 -10
- re_common/baselibrary/tools/image_to_pdf.py +61 -61
- re_common/baselibrary/tools/java_code_deal.py +139 -139
- re_common/baselibrary/tools/javacode.py +79 -79
- re_common/baselibrary/tools/mdb_db3.py +48 -48
- re_common/baselibrary/tools/merge_file.py +171 -171
- re_common/baselibrary/tools/merge_gz_file.py +165 -165
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
- re_common/baselibrary/tools/mongo_tools.py +50 -50
- re_common/baselibrary/tools/move_file.py +170 -170
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
- re_common/baselibrary/tools/myparsel.py +104 -104
- re_common/baselibrary/tools/rename_dir_file.py +37 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
- re_common/baselibrary/tools/split_line_to_many.py +25 -25
- re_common/baselibrary/tools/stringtodicts.py +33 -33
- re_common/baselibrary/tools/workwechant_bot.py +84 -84
- re_common/baselibrary/utils/baseaiohttp.py +296 -296
- re_common/baselibrary/utils/baseaiomysql.py +87 -87
- re_common/baselibrary/utils/baseallstep.py +191 -191
- re_common/baselibrary/utils/baseavro.py +19 -19
- re_common/baselibrary/utils/baseboto3.py +291 -291
- re_common/baselibrary/utils/basecsv.py +32 -32
- re_common/baselibrary/utils/basedict.py +133 -133
- re_common/baselibrary/utils/basedir.py +241 -241
- re_common/baselibrary/utils/baseencode.py +351 -351
- re_common/baselibrary/utils/baseencoding.py +28 -28
- re_common/baselibrary/utils/baseesdsl.py +86 -86
- re_common/baselibrary/utils/baseexcel.py +264 -264
- re_common/baselibrary/utils/baseexcept.py +109 -109
- re_common/baselibrary/utils/basefile.py +654 -654
- re_common/baselibrary/utils/baseftp.py +214 -214
- re_common/baselibrary/utils/basegzip.py +60 -60
- re_common/baselibrary/utils/basehdfs.py +135 -135
- re_common/baselibrary/utils/basehttpx.py +268 -268
- re_common/baselibrary/utils/baseip.py +87 -87
- re_common/baselibrary/utils/basejson.py +2 -2
- re_common/baselibrary/utils/baselist.py +32 -32
- re_common/baselibrary/utils/basemotor.py +190 -190
- re_common/baselibrary/utils/basemssql.py +98 -98
- re_common/baselibrary/utils/baseodbc.py +113 -113
- re_common/baselibrary/utils/basepandas.py +302 -302
- re_common/baselibrary/utils/basepeewee.py +11 -11
- re_common/baselibrary/utils/basepika.py +180 -180
- re_common/baselibrary/utils/basepydash.py +143 -143
- re_common/baselibrary/utils/basepymongo.py +230 -230
- re_common/baselibrary/utils/basequeue.py +22 -22
- re_common/baselibrary/utils/baserar.py +57 -57
- re_common/baselibrary/utils/baserequest.py +279 -279
- re_common/baselibrary/utils/baseset.py +8 -8
- re_common/baselibrary/utils/basesmb.py +403 -403
- re_common/baselibrary/utils/basestring.py +382 -382
- re_common/baselibrary/utils/basetime.py +320 -320
- re_common/baselibrary/utils/baseurl.py +121 -121
- re_common/baselibrary/utils/basezip.py +57 -57
- re_common/baselibrary/utils/core/__init__.py +7 -7
- re_common/baselibrary/utils/core/bottomutils.py +18 -18
- re_common/baselibrary/utils/core/mdeprecated.py +327 -327
- re_common/baselibrary/utils/core/mlamada.py +16 -16
- re_common/baselibrary/utils/core/msginfo.py +25 -25
- re_common/baselibrary/utils/core/requests_core.py +103 -103
- re_common/baselibrary/utils/fateadm.py +429 -429
- re_common/baselibrary/utils/importfun.py +123 -123
- re_common/baselibrary/utils/mfaker.py +57 -57
- re_common/baselibrary/utils/my_abc/__init__.py +3 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
- re_common/baselibrary/utils/mylogger.py +414 -414
- re_common/baselibrary/utils/myredisclient.py +861 -861
- re_common/baselibrary/utils/pipupgrade.py +21 -21
- re_common/baselibrary/utils/ringlist.py +85 -85
- re_common/baselibrary/utils/version_compare.py +36 -36
- re_common/baselibrary/utils/ydmhttp.py +126 -126
- re_common/facade/lazy_import.py +11 -11
- re_common/facade/loggerfacade.py +25 -25
- re_common/facade/mysqlfacade.py +467 -467
- re_common/facade/now.py +31 -31
- re_common/facade/sqlite3facade.py +257 -257
- re_common/facade/use/mq_use_facade.py +83 -83
- re_common/facade/use/proxy_use_facade.py +19 -19
- re_common/libtest/base_dict_test.py +19 -19
- re_common/libtest/baseavro_test.py +13 -13
- re_common/libtest/basefile_test.py +14 -14
- re_common/libtest/basemssql_test.py +77 -77
- re_common/libtest/baseodbc_test.py +7 -7
- re_common/libtest/basepandas_test.py +38 -38
- re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
- re_common/libtest/get_attr_test/settings.py +54 -54
- re_common/libtest/idencode_test.py +53 -53
- re_common/libtest/iniconfig_test.py +35 -35
- re_common/libtest/ip_test.py +34 -34
- re_common/libtest/merge_file_test.py +20 -20
- re_common/libtest/mfaker_test.py +8 -8
- re_common/libtest/mm3_test.py +31 -31
- re_common/libtest/mylogger_test.py +88 -88
- re_common/libtest/myparsel_test.py +27 -27
- re_common/libtest/mysql_test.py +151 -151
- re_common/libtest/pymongo_test.py +21 -21
- re_common/libtest/split_test.py +11 -11
- re_common/libtest/sqlite3_merge_test.py +5 -5
- re_common/libtest/sqlite3_test.py +34 -34
- re_common/libtest/tomlconfig_test.py +30 -30
- re_common/libtest/use_tools_test/__init__.py +2 -2
- re_common/libtest/user/__init__.py +4 -4
- re_common/studio/__init__.py +4 -4
- re_common/studio/assignment_expressions.py +36 -36
- re_common/studio/mydash/test1.py +18 -18
- re_common/studio/pydashstudio/first.py +9 -9
- re_common/studio/streamlitstudio/first_app.py +65 -65
- re_common/studio/streamlitstudio/uber_pickups.py +23 -23
- re_common/studio/test.py +18 -18
- re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +195 -0
- re_common/v2/baselibrary/business_utils/__init__.py +0 -0
- re_common/v2/baselibrary/business_utils/rel_tools.py +6 -0
- re_common/v2/baselibrary/decorators/utils.py +59 -59
- re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
- re_common/v2/baselibrary/tools/WeChatRobot.py +95 -79
- re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
- re_common/v2/baselibrary/tools/dict_tools.py +37 -37
- re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
- re_common/v2/baselibrary/tools/list_tools.py +65 -65
- re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
- re_common/v2/baselibrary/tools/text_matcher.py +326 -326
- re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
- re_common/v2/baselibrary/utils/author_smi.py +360 -360
- re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
- re_common/v2/baselibrary/utils/basedict.py +37 -37
- re_common/v2/baselibrary/utils/basehdfs.py +161 -161
- re_common/v2/baselibrary/utils/basepika.py +180 -180
- re_common/v2/baselibrary/utils/basetime.py +77 -77
- re_common/v2/baselibrary/utils/db.py +38 -38
- re_common/v2/baselibrary/utils/json_cls.py +16 -16
- re_common/v2/baselibrary/utils/mq.py +83 -83
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
- re_common/v2/baselibrary/utils/string_bool.py +186 -149
- re_common/v2/baselibrary/utils/string_clear.py +227 -204
- re_common/v2/baselibrary/utils/string_smi.py +18 -18
- re_common/v2/baselibrary/utils/stringutils.py +213 -213
- re_common/vip/base_step_process.py +11 -11
- re_common/vip/baseencodeid.py +90 -90
- re_common/vip/changetaskname.py +28 -28
- re_common/vip/core_var.py +24 -24
- re_common/vip/mmh3Hash.py +89 -89
- re_common/vip/proxy/allproxys.py +127 -127
- re_common/vip/proxy/allproxys_thread.py +159 -159
- re_common/vip/proxy/cnki_proxy.py +153 -153
- re_common/vip/proxy/kuaidaili.py +87 -87
- re_common/vip/proxy/proxy_all.py +113 -113
- re_common/vip/proxy/update_kuaidaili_0.py +42 -42
- re_common/vip/proxy/wanfang_proxy.py +152 -152
- re_common/vip/proxy/wp_proxy_all.py +181 -181
- re_common/vip/read_rawid_to_txt.py +91 -91
- re_common/vip/title/__init__.py +5 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
- re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
- re_common/vip/title/transform/__init__.py +10 -10
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/LICENSE +201 -201
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/METADATA +16 -16
- re_common-10.0.24.dist-info/RECORD +230 -0
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/WHEEL +1 -1
- re_common-10.0.22.dist-info/RECORD +0 -227
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/top_level.txt +0 -0
|
@@ -1,152 +1,152 @@
|
|
|
1
|
-
###########################################
|
|
2
|
-
# 同项目调用基础包
|
|
3
|
-
import os
|
|
4
|
-
import sys
|
|
5
|
-
import time
|
|
6
|
-
|
|
7
|
-
filepath = os.path.abspath(__file__)
|
|
8
|
-
pathlist = filepath.split(os.sep)
|
|
9
|
-
pathlist = pathlist[:-4]
|
|
10
|
-
TopPath = os.sep.join(pathlist)
|
|
11
|
-
sys.path.insert(0, TopPath)
|
|
12
|
-
print(TopPath)
|
|
13
|
-
############################################
|
|
14
|
-
|
|
15
|
-
from re_common.facade.loggerfacade import get_streamlogger
|
|
16
|
-
from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun
|
|
17
|
-
from re_common.baselibrary.mthread.mythreading import ThreadPoolManger, ThreadInfo
|
|
18
|
-
from re_common.baselibrary.utils.baserequest import BaseRequest
|
|
19
|
-
from re_common.baselibrary.utils.core.requests_core import set_proxy
|
|
20
|
-
from re_common.baselibrary.utils.myredisclient import MyRedis
|
|
21
|
-
from re_common.facade.mysqlfacade import MysqlUtiles
|
|
22
|
-
|
|
23
|
-
from proxy_all import ProxyAll
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class WanfangProxy(object):
|
|
27
|
-
|
|
28
|
-
def __init__(self, config="./db.ini"):
|
|
29
|
-
self.config = config
|
|
30
|
-
self.logger = get_streamlogger()
|
|
31
|
-
self.mysqlutils = MysqlUtiles(self.config, "dbwanfang", self.logger)
|
|
32
|
-
self.Headers = {
|
|
33
|
-
'Accept': '*/*',
|
|
34
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
|
35
|
-
}
|
|
36
|
-
self.UserAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
|
37
|
-
self.baserequest = BaseRequest()
|
|
38
|
-
|
|
39
|
-
def checking_proxy(self, proxy):
|
|
40
|
-
url = "http://www.wanfangdata.com.cn/index.html"
|
|
41
|
-
proxies = set_proxy(proxy)
|
|
42
|
-
BoolResult, errString, r = self.baserequest.base_request(url,
|
|
43
|
-
headers=self.Headers,
|
|
44
|
-
proxies=proxies,
|
|
45
|
-
marks=["container"],
|
|
46
|
-
timeout=5)
|
|
47
|
-
if BoolResult:
|
|
48
|
-
return proxy
|
|
49
|
-
return ""
|
|
50
|
-
|
|
51
|
-
def get_mysql_proxy(self):
|
|
52
|
-
sql = "SELECT proxy FROM `proxy_pool`"
|
|
53
|
-
bools, rows = self.mysqlutils.SelectFromDB(sql)
|
|
54
|
-
if not bools:
|
|
55
|
-
return set()
|
|
56
|
-
results = set()
|
|
57
|
-
for row in rows:
|
|
58
|
-
results.add(row[0])
|
|
59
|
-
sql = "delete from proxy_pool"
|
|
60
|
-
self.mysqlutils.ExeSqlToDB(sql)
|
|
61
|
-
return results
|
|
62
|
-
|
|
63
|
-
def get_all_proxy(self):
|
|
64
|
-
proxy_set = set()
|
|
65
|
-
proxyall = ProxyAll()
|
|
66
|
-
redisproxy = proxyall.get_redis_all()
|
|
67
|
-
mimvpproxy = proxyall.getProxyFromMimvp(1000)
|
|
68
|
-
daxiangproxy = proxyall.getProxyFromDaxiang(1000)
|
|
69
|
-
xiciproxy1 = proxyall.getProxyFromXICIOnePage(1)
|
|
70
|
-
xiciproxy2 = proxyall.getProxyFromXICIOnePage(2)
|
|
71
|
-
mysqlproxy = self.get_mysql_proxy()
|
|
72
|
-
proxy_set = proxy_set.union(mysqlproxy, redisproxy, mimvpproxy, daxiangproxy, xiciproxy1, xiciproxy2)
|
|
73
|
-
self.logger.info("all proxy size is:{}".format(len(proxy_set)))
|
|
74
|
-
self.proxy_set = proxy_set
|
|
75
|
-
return proxy_set
|
|
76
|
-
|
|
77
|
-
def get_can_use_proxy(self):
|
|
78
|
-
count = 0
|
|
79
|
-
use_proxy = set()
|
|
80
|
-
for proxy in self.proxy_set:
|
|
81
|
-
proxy_ = self.checking_proxy(proxy)
|
|
82
|
-
use_proxy.add(proxy_)
|
|
83
|
-
if len(use_proxy) > 20:
|
|
84
|
-
count = count + 1
|
|
85
|
-
if count == 1:
|
|
86
|
-
sql = "delete from proxy_pool"
|
|
87
|
-
self.mysqlutils.ExeSqlToDB(sql)
|
|
88
|
-
sql = "insert into proxy_pool(proxy) values ('%s')"
|
|
89
|
-
self.mysqlutils.ExeSqlMany(sql, use_proxy)
|
|
90
|
-
use_proxy.clear()
|
|
91
|
-
if len(use_proxy) > 0:
|
|
92
|
-
sql = "insert into proxy_pool(proxy) values ('%s')"
|
|
93
|
-
self.mysqlutils.ExeSqlMany(sql, use_proxy)
|
|
94
|
-
use_proxy.clear()
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
class DetailThreadRun(MThreadingRun):
|
|
98
|
-
def __init__(self, num):
|
|
99
|
-
self.cnki = WanfangProxy()
|
|
100
|
-
super(DetailThreadRun, self).__init__(num)
|
|
101
|
-
self.config = "./db.ini"
|
|
102
|
-
self.myredisset = MyRedis(self.config)
|
|
103
|
-
self.myredisset.set_redis_from_config(sesc="proxysetwanfangjournal")
|
|
104
|
-
self.myredisset.conn_redis()
|
|
105
|
-
self.myredisset.get_pipeline()
|
|
106
|
-
|
|
107
|
-
@ThreadPoolManger.thread_lock
|
|
108
|
-
def getTask(self, *args, **kwargs):
|
|
109
|
-
self.myredisset.delete(self.myredisset.RedisKey)
|
|
110
|
-
proxy_set = self.cnki.get_all_proxy()
|
|
111
|
-
return proxy_set
|
|
112
|
-
|
|
113
|
-
def setTask(self, results=None, *args, **kwargs):
|
|
114
|
-
for url_tasks in results:
|
|
115
|
-
# 将每一页加入任务队列
|
|
116
|
-
self.add_job(self.func, url_tasks)
|
|
117
|
-
time.sleep(10 * 60)
|
|
118
|
-
|
|
119
|
-
@ThreadPoolManger.thread_lock
|
|
120
|
-
def dealresult(self, *args, **kwargs):
|
|
121
|
-
sql = "replace into proxy_pool(`proxy`) values (%s)"
|
|
122
|
-
self.cnki.mysqlutils.ExeSqlMany(sql, self.results)
|
|
123
|
-
self.myredisset.sadd(self.myredisset.RedisKey, set(self.results))
|
|
124
|
-
|
|
125
|
-
def setProxy(self, proxysList=None):
|
|
126
|
-
pass
|
|
127
|
-
|
|
128
|
-
def is_break(self):
|
|
129
|
-
return False
|
|
130
|
-
|
|
131
|
-
def thread_pool_hook(self, threadinfo: ThreadInfo):
|
|
132
|
-
# 设置代理线程不重启,默认会重启
|
|
133
|
-
if threadinfo.get_thread_name() == self.etn.proxythreadname:
|
|
134
|
-
threadinfo.set_is_restart(False)
|
|
135
|
-
if threadinfo.get_thread_name() == self.etn.taskthreadname:
|
|
136
|
-
threadinfo.set_is_restart(False)
|
|
137
|
-
return {}
|
|
138
|
-
|
|
139
|
-
def fun(self, threadval, *args, **kwargs):
|
|
140
|
-
standardid = args[0]
|
|
141
|
-
proxys = self.cnki.checking_proxy(standardid)
|
|
142
|
-
if proxys != "":
|
|
143
|
-
threadval.result_queue.put(proxys)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def main():
|
|
147
|
-
down = DetailThreadRun(40)
|
|
148
|
-
down.run()
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
if __name__ == "__main__":
|
|
152
|
-
main()
|
|
1
|
+
###########################################
|
|
2
|
+
# 同项目调用基础包
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
filepath = os.path.abspath(__file__)
|
|
8
|
+
pathlist = filepath.split(os.sep)
|
|
9
|
+
pathlist = pathlist[:-4]
|
|
10
|
+
TopPath = os.sep.join(pathlist)
|
|
11
|
+
sys.path.insert(0, TopPath)
|
|
12
|
+
print(TopPath)
|
|
13
|
+
############################################
|
|
14
|
+
|
|
15
|
+
from re_common.facade.loggerfacade import get_streamlogger
|
|
16
|
+
from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun
|
|
17
|
+
from re_common.baselibrary.mthread.mythreading import ThreadPoolManger, ThreadInfo
|
|
18
|
+
from re_common.baselibrary.utils.baserequest import BaseRequest
|
|
19
|
+
from re_common.baselibrary.utils.core.requests_core import set_proxy
|
|
20
|
+
from re_common.baselibrary.utils.myredisclient import MyRedis
|
|
21
|
+
from re_common.facade.mysqlfacade import MysqlUtiles
|
|
22
|
+
|
|
23
|
+
from proxy_all import ProxyAll
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class WanfangProxy(object):
|
|
27
|
+
|
|
28
|
+
def __init__(self, config="./db.ini"):
|
|
29
|
+
self.config = config
|
|
30
|
+
self.logger = get_streamlogger()
|
|
31
|
+
self.mysqlutils = MysqlUtiles(self.config, "dbwanfang", self.logger)
|
|
32
|
+
self.Headers = {
|
|
33
|
+
'Accept': '*/*',
|
|
34
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
|
35
|
+
}
|
|
36
|
+
self.UserAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
|
37
|
+
self.baserequest = BaseRequest()
|
|
38
|
+
|
|
39
|
+
def checking_proxy(self, proxy):
|
|
40
|
+
url = "http://www.wanfangdata.com.cn/index.html"
|
|
41
|
+
proxies = set_proxy(proxy)
|
|
42
|
+
BoolResult, errString, r = self.baserequest.base_request(url,
|
|
43
|
+
headers=self.Headers,
|
|
44
|
+
proxies=proxies,
|
|
45
|
+
marks=["container"],
|
|
46
|
+
timeout=5)
|
|
47
|
+
if BoolResult:
|
|
48
|
+
return proxy
|
|
49
|
+
return ""
|
|
50
|
+
|
|
51
|
+
def get_mysql_proxy(self):
|
|
52
|
+
sql = "SELECT proxy FROM `proxy_pool`"
|
|
53
|
+
bools, rows = self.mysqlutils.SelectFromDB(sql)
|
|
54
|
+
if not bools:
|
|
55
|
+
return set()
|
|
56
|
+
results = set()
|
|
57
|
+
for row in rows:
|
|
58
|
+
results.add(row[0])
|
|
59
|
+
sql = "delete from proxy_pool"
|
|
60
|
+
self.mysqlutils.ExeSqlToDB(sql)
|
|
61
|
+
return results
|
|
62
|
+
|
|
63
|
+
def get_all_proxy(self):
|
|
64
|
+
proxy_set = set()
|
|
65
|
+
proxyall = ProxyAll()
|
|
66
|
+
redisproxy = proxyall.get_redis_all()
|
|
67
|
+
mimvpproxy = proxyall.getProxyFromMimvp(1000)
|
|
68
|
+
daxiangproxy = proxyall.getProxyFromDaxiang(1000)
|
|
69
|
+
xiciproxy1 = proxyall.getProxyFromXICIOnePage(1)
|
|
70
|
+
xiciproxy2 = proxyall.getProxyFromXICIOnePage(2)
|
|
71
|
+
mysqlproxy = self.get_mysql_proxy()
|
|
72
|
+
proxy_set = proxy_set.union(mysqlproxy, redisproxy, mimvpproxy, daxiangproxy, xiciproxy1, xiciproxy2)
|
|
73
|
+
self.logger.info("all proxy size is:{}".format(len(proxy_set)))
|
|
74
|
+
self.proxy_set = proxy_set
|
|
75
|
+
return proxy_set
|
|
76
|
+
|
|
77
|
+
def get_can_use_proxy(self):
|
|
78
|
+
count = 0
|
|
79
|
+
use_proxy = set()
|
|
80
|
+
for proxy in self.proxy_set:
|
|
81
|
+
proxy_ = self.checking_proxy(proxy)
|
|
82
|
+
use_proxy.add(proxy_)
|
|
83
|
+
if len(use_proxy) > 20:
|
|
84
|
+
count = count + 1
|
|
85
|
+
if count == 1:
|
|
86
|
+
sql = "delete from proxy_pool"
|
|
87
|
+
self.mysqlutils.ExeSqlToDB(sql)
|
|
88
|
+
sql = "insert into proxy_pool(proxy) values ('%s')"
|
|
89
|
+
self.mysqlutils.ExeSqlMany(sql, use_proxy)
|
|
90
|
+
use_proxy.clear()
|
|
91
|
+
if len(use_proxy) > 0:
|
|
92
|
+
sql = "insert into proxy_pool(proxy) values ('%s')"
|
|
93
|
+
self.mysqlutils.ExeSqlMany(sql, use_proxy)
|
|
94
|
+
use_proxy.clear()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class DetailThreadRun(MThreadingRun):
|
|
98
|
+
def __init__(self, num):
|
|
99
|
+
self.cnki = WanfangProxy()
|
|
100
|
+
super(DetailThreadRun, self).__init__(num)
|
|
101
|
+
self.config = "./db.ini"
|
|
102
|
+
self.myredisset = MyRedis(self.config)
|
|
103
|
+
self.myredisset.set_redis_from_config(sesc="proxysetwanfangjournal")
|
|
104
|
+
self.myredisset.conn_redis()
|
|
105
|
+
self.myredisset.get_pipeline()
|
|
106
|
+
|
|
107
|
+
@ThreadPoolManger.thread_lock
|
|
108
|
+
def getTask(self, *args, **kwargs):
|
|
109
|
+
self.myredisset.delete(self.myredisset.RedisKey)
|
|
110
|
+
proxy_set = self.cnki.get_all_proxy()
|
|
111
|
+
return proxy_set
|
|
112
|
+
|
|
113
|
+
def setTask(self, results=None, *args, **kwargs):
|
|
114
|
+
for url_tasks in results:
|
|
115
|
+
# 将每一页加入任务队列
|
|
116
|
+
self.add_job(self.func, url_tasks)
|
|
117
|
+
time.sleep(10 * 60)
|
|
118
|
+
|
|
119
|
+
@ThreadPoolManger.thread_lock
|
|
120
|
+
def dealresult(self, *args, **kwargs):
|
|
121
|
+
sql = "replace into proxy_pool(`proxy`) values (%s)"
|
|
122
|
+
self.cnki.mysqlutils.ExeSqlMany(sql, self.results)
|
|
123
|
+
self.myredisset.sadd(self.myredisset.RedisKey, set(self.results))
|
|
124
|
+
|
|
125
|
+
def setProxy(self, proxysList=None):
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
def is_break(self):
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
def thread_pool_hook(self, threadinfo: ThreadInfo):
|
|
132
|
+
# 设置代理线程不重启,默认会重启
|
|
133
|
+
if threadinfo.get_thread_name() == self.etn.proxythreadname:
|
|
134
|
+
threadinfo.set_is_restart(False)
|
|
135
|
+
if threadinfo.get_thread_name() == self.etn.taskthreadname:
|
|
136
|
+
threadinfo.set_is_restart(False)
|
|
137
|
+
return {}
|
|
138
|
+
|
|
139
|
+
def fun(self, threadval, *args, **kwargs):
|
|
140
|
+
standardid = args[0]
|
|
141
|
+
proxys = self.cnki.checking_proxy(standardid)
|
|
142
|
+
if proxys != "":
|
|
143
|
+
threadval.result_queue.put(proxys)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def main():
|
|
147
|
+
down = DetailThreadRun(40)
|
|
148
|
+
down.run()
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
if __name__ == "__main__":
|
|
152
|
+
main()
|
|
@@ -1,182 +1,182 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# @Time : 2020/6/23 14:38
|
|
3
|
-
# @Author : suhong
|
|
4
|
-
# @File : wp_proxy_all.py
|
|
5
|
-
# @Software: PyCharm
|
|
6
|
-
|
|
7
|
-
"""
|
|
8
|
-
云代理 开放代理 刷新验证进入redis (db3 wp_proxy_all)
|
|
9
|
-
"""
|
|
10
|
-
import json
|
|
11
|
-
import random
|
|
12
|
-
import time
|
|
13
|
-
|
|
14
|
-
from re_common.baselibrary.utils.core.requests_core import USER_AGENTS
|
|
15
|
-
|
|
16
|
-
from re_common.facade.mysqlfacade import MysqlUtiles
|
|
17
|
-
|
|
18
|
-
from re_common.baselibrary.mthread.mythreading import ThreadInfo, ThreadVal, ThreadPoolManger
|
|
19
|
-
|
|
20
|
-
from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun
|
|
21
|
-
|
|
22
|
-
from re_common.baselibrary.utils.baserequest import BaseRequest
|
|
23
|
-
|
|
24
|
-
from re_common.baselibrary.utils.myredisclient import MyRedis
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class ProxyAll(object):
|
|
28
|
-
def __init__(self, config="./db.ini"):
|
|
29
|
-
self.config = config
|
|
30
|
-
self.myredis = MyRedis(configpath=self.config, sesc='wp_proxy',is_conn_or_pipe=False)
|
|
31
|
-
# self.myredis.get_pipeline()
|
|
32
|
-
self.myredis.builder()
|
|
33
|
-
|
|
34
|
-
self.Headers = {
|
|
35
|
-
'Accept': '*/*',
|
|
36
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
|
37
|
-
}
|
|
38
|
-
self.UserAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
|
39
|
-
self.baserequest = BaseRequest()
|
|
40
|
-
self.ProxyPoolTotal = set()
|
|
41
|
-
self.starttime = time.time()
|
|
42
|
-
self.mysqlutils_proxy = MysqlUtiles('./db.ini', "db_proxy")
|
|
43
|
-
|
|
44
|
-
def get_redis_all(self):
|
|
45
|
-
return self.myredis.getDataFromRedis()
|
|
46
|
-
|
|
47
|
-
def get_proxy_ip3366(self, num=600):
|
|
48
|
-
proxyPool = set()
|
|
49
|
-
# url = "http://gea.ip3366.net/api/?key=20200622135212736&getnum={}&order=1&formats=2&proxytype=01".format(num)
|
|
50
|
-
url = "http://gea.ip3366.net/api/?key=20200622135212736&getnum={}&formats=2&proxytype=01".format(num)
|
|
51
|
-
try:
|
|
52
|
-
bools, estring, r = self.baserequest.base_request(url,
|
|
53
|
-
headers=self.Headers,
|
|
54
|
-
marks=['Ip'])
|
|
55
|
-
if bools:
|
|
56
|
-
json_data = json.loads(r.text)
|
|
57
|
-
for info in json_data:
|
|
58
|
-
proxy = info['Ip'] + ":" + str(info['Port'])
|
|
59
|
-
proxyPool.add(proxy)
|
|
60
|
-
|
|
61
|
-
return True, proxyPool
|
|
62
|
-
|
|
63
|
-
return False, proxyPool
|
|
64
|
-
except Exception as e:
|
|
65
|
-
print(e)
|
|
66
|
-
return False, proxyPool
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class YanzhengThreadRun(MThreadingRun):
|
|
70
|
-
def __init__(self, num):
|
|
71
|
-
super(YanzhengThreadRun, self).__init__(num)
|
|
72
|
-
self.pro = ProxyAll()
|
|
73
|
-
self.yz_right_set = set()
|
|
74
|
-
self.is_clean_redis = False
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def getTask(self, *args, **kwargs):
|
|
78
|
-
return []
|
|
79
|
-
|
|
80
|
-
def setTask(self, results=None, *args, **kwargs):
|
|
81
|
-
|
|
82
|
-
if self.thread_pool.work_queue.is_empty():
|
|
83
|
-
|
|
84
|
-
redis_proxypool = self.pro.get_redis_all()
|
|
85
|
-
self.is_clean_redis = True
|
|
86
|
-
if time.time() - self.pro.starttime <= 5:
|
|
87
|
-
time.sleep(6 - (time.time() - self.pro.starttime))
|
|
88
|
-
bools, proxypool = self.pro.get_proxy_ip3366()
|
|
89
|
-
result_pro = proxypool.union(redis_proxypool)
|
|
90
|
-
if bools:
|
|
91
|
-
for raw in result_pro:
|
|
92
|
-
self.add_job(self.func,raw)
|
|
93
|
-
self.pro.starttime = time.time()
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def dealresult(self, *args, **kwargs):
|
|
97
|
-
if self.is_clean_redis:
|
|
98
|
-
# 清理reids
|
|
99
|
-
self.pipe.delete(self.pro.myredis.RedisKey)
|
|
100
|
-
self.is_clean_redis = False
|
|
101
|
-
|
|
102
|
-
# 处理self.yz_right_set集合
|
|
103
|
-
print('Write DataBase %s ...' % self.pro.myredis.RedisKey)
|
|
104
|
-
self.pipe.sadd(self.pro.myredis.RedisKey, *self.results)
|
|
105
|
-
curTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
|
|
106
|
-
self.pipe.hset('update_time', self.pro.myredis.RedisKey, curTime)
|
|
107
|
-
self.pipe.execute()
|
|
108
|
-
# 插入mysql
|
|
109
|
-
for raw in self.results:
|
|
110
|
-
self.deal_sql(raw)
|
|
111
|
-
|
|
112
|
-
@ThreadPoolManger.thread_lock
|
|
113
|
-
def deal_sql(self,raw):
|
|
114
|
-
# 插入mysql统计每天获取的代理个数
|
|
115
|
-
sql = "insert into gongwangproxy(proxy,cishu) Values('{}',1) on DUPLICATE key update cishu=cishu+1".format(raw)
|
|
116
|
-
self.pro.mysqlutils_proxy.ExeSqlToDB(sql)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def setProxy(self, proxysList=None):
|
|
121
|
-
pass
|
|
122
|
-
|
|
123
|
-
def is_break(self):
|
|
124
|
-
return False
|
|
125
|
-
|
|
126
|
-
def thread_pool_hook(self, threadinfo: ThreadInfo):
|
|
127
|
-
# 设置代理线程不重启,默认会重启
|
|
128
|
-
# if threadinfo.get_thread_name() == self.etn.proxythreadname:
|
|
129
|
-
# threadinfo.set_is_restart(True)
|
|
130
|
-
# if threadinfo.get_thread_name() == self.etn.taskthreadname:
|
|
131
|
-
# threadinfo.set_is_restart(False)
|
|
132
|
-
return {}
|
|
133
|
-
|
|
134
|
-
def fun(self, threadval: ThreadVal, *args, **kwargs):
|
|
135
|
-
"""
|
|
136
|
-
验证代理有效性,百度
|
|
137
|
-
"""
|
|
138
|
-
raw = args[0]
|
|
139
|
-
result_queue = threadval.get_result_queue()
|
|
140
|
-
ppp = {
|
|
141
|
-
'http': raw,
|
|
142
|
-
'https': raw
|
|
143
|
-
}
|
|
144
|
-
try:
|
|
145
|
-
url = "https://www.baidu.com/"
|
|
146
|
-
bools, e, r = self.pro.baserequest.base_request(url,
|
|
147
|
-
headers=self.pro.Headers,
|
|
148
|
-
proxies=ppp,
|
|
149
|
-
timeout=5,
|
|
150
|
-
marks=['百度一下,你就知道'])
|
|
151
|
-
if bools:
|
|
152
|
-
result_queue.put(raw)
|
|
153
|
-
except Exception as e:
|
|
154
|
-
print(e)
|
|
155
|
-
|
|
156
|
-
# 验证超星期刊
|
|
157
|
-
# yzurl = "http://qikan.chaoxing.com/mag/infos?mags=ea15bb11cfca2424ae72402ca8461604"
|
|
158
|
-
# mags = "ea15bb11cfca2424ae72402ca8461604"
|
|
159
|
-
# HEADER = {
|
|
160
|
-
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
161
|
-
# 'User-Agent': random.choice(USER_AGENTS),
|
|
162
|
-
# 'Accept-Encoding': 'gzip, deflate',
|
|
163
|
-
# 'Referer': 'http://qikan.chaoxing.com',
|
|
164
|
-
# }
|
|
165
|
-
# HEADER['Referer'] = 'http://qikan.chaoxing.com/mag/infos?mags=' + mags
|
|
166
|
-
# try:
|
|
167
|
-
# BoolResult, errString, r = self.pro.baserequest.base_request(url=yzurl,
|
|
168
|
-
# headers=HEADER,
|
|
169
|
-
# timeout=(5, 10),
|
|
170
|
-
# proxies=ppp,
|
|
171
|
-
# marks=['Fbookright fl'])
|
|
172
|
-
# if BoolResult:
|
|
173
|
-
# result_queue.put(raw)
|
|
174
|
-
# except Exception as e:
|
|
175
|
-
# print(e)
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
if __name__ == '__main__':
|
|
179
|
-
# p = ProxyAll()
|
|
180
|
-
# p.get_redis_all()
|
|
181
|
-
yz = YanzhengThreadRun(40)
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# @Time : 2020/6/23 14:38
|
|
3
|
+
# @Author : suhong
|
|
4
|
+
# @File : wp_proxy_all.py
|
|
5
|
+
# @Software: PyCharm
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
云代理 开放代理 刷新验证进入redis (db3 wp_proxy_all)
|
|
9
|
+
"""
|
|
10
|
+
import json
|
|
11
|
+
import random
|
|
12
|
+
import time
|
|
13
|
+
|
|
14
|
+
from re_common.baselibrary.utils.core.requests_core import USER_AGENTS
|
|
15
|
+
|
|
16
|
+
from re_common.facade.mysqlfacade import MysqlUtiles
|
|
17
|
+
|
|
18
|
+
from re_common.baselibrary.mthread.mythreading import ThreadInfo, ThreadVal, ThreadPoolManger
|
|
19
|
+
|
|
20
|
+
from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun
|
|
21
|
+
|
|
22
|
+
from re_common.baselibrary.utils.baserequest import BaseRequest
|
|
23
|
+
|
|
24
|
+
from re_common.baselibrary.utils.myredisclient import MyRedis
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ProxyAll(object):
|
|
28
|
+
def __init__(self, config="./db.ini"):
|
|
29
|
+
self.config = config
|
|
30
|
+
self.myredis = MyRedis(configpath=self.config, sesc='wp_proxy',is_conn_or_pipe=False)
|
|
31
|
+
# self.myredis.get_pipeline()
|
|
32
|
+
self.myredis.builder()
|
|
33
|
+
|
|
34
|
+
self.Headers = {
|
|
35
|
+
'Accept': '*/*',
|
|
36
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
|
37
|
+
}
|
|
38
|
+
self.UserAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
|
39
|
+
self.baserequest = BaseRequest()
|
|
40
|
+
self.ProxyPoolTotal = set()
|
|
41
|
+
self.starttime = time.time()
|
|
42
|
+
self.mysqlutils_proxy = MysqlUtiles('./db.ini', "db_proxy")
|
|
43
|
+
|
|
44
|
+
def get_redis_all(self):
|
|
45
|
+
return self.myredis.getDataFromRedis()
|
|
46
|
+
|
|
47
|
+
def get_proxy_ip3366(self, num=600):
|
|
48
|
+
proxyPool = set()
|
|
49
|
+
# url = "http://gea.ip3366.net/api/?key=20200622135212736&getnum={}&order=1&formats=2&proxytype=01".format(num)
|
|
50
|
+
url = "http://gea.ip3366.net/api/?key=20200622135212736&getnum={}&formats=2&proxytype=01".format(num)
|
|
51
|
+
try:
|
|
52
|
+
bools, estring, r = self.baserequest.base_request(url,
|
|
53
|
+
headers=self.Headers,
|
|
54
|
+
marks=['Ip'])
|
|
55
|
+
if bools:
|
|
56
|
+
json_data = json.loads(r.text)
|
|
57
|
+
for info in json_data:
|
|
58
|
+
proxy = info['Ip'] + ":" + str(info['Port'])
|
|
59
|
+
proxyPool.add(proxy)
|
|
60
|
+
|
|
61
|
+
return True, proxyPool
|
|
62
|
+
|
|
63
|
+
return False, proxyPool
|
|
64
|
+
except Exception as e:
|
|
65
|
+
print(e)
|
|
66
|
+
return False, proxyPool
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class YanzhengThreadRun(MThreadingRun):
|
|
70
|
+
def __init__(self, num):
|
|
71
|
+
super(YanzhengThreadRun, self).__init__(num)
|
|
72
|
+
self.pro = ProxyAll()
|
|
73
|
+
self.yz_right_set = set()
|
|
74
|
+
self.is_clean_redis = False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def getTask(self, *args, **kwargs):
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
def setTask(self, results=None, *args, **kwargs):
|
|
81
|
+
|
|
82
|
+
if self.thread_pool.work_queue.is_empty():
|
|
83
|
+
|
|
84
|
+
redis_proxypool = self.pro.get_redis_all()
|
|
85
|
+
self.is_clean_redis = True
|
|
86
|
+
if time.time() - self.pro.starttime <= 5:
|
|
87
|
+
time.sleep(6 - (time.time() - self.pro.starttime))
|
|
88
|
+
bools, proxypool = self.pro.get_proxy_ip3366()
|
|
89
|
+
result_pro = proxypool.union(redis_proxypool)
|
|
90
|
+
if bools:
|
|
91
|
+
for raw in result_pro:
|
|
92
|
+
self.add_job(self.func,raw)
|
|
93
|
+
self.pro.starttime = time.time()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def dealresult(self, *args, **kwargs):
|
|
97
|
+
if self.is_clean_redis:
|
|
98
|
+
# 清理reids
|
|
99
|
+
self.pipe.delete(self.pro.myredis.RedisKey)
|
|
100
|
+
self.is_clean_redis = False
|
|
101
|
+
|
|
102
|
+
# 处理self.yz_right_set集合
|
|
103
|
+
print('Write DataBase %s ...' % self.pro.myredis.RedisKey)
|
|
104
|
+
self.pipe.sadd(self.pro.myredis.RedisKey, *self.results)
|
|
105
|
+
curTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
|
|
106
|
+
self.pipe.hset('update_time', self.pro.myredis.RedisKey, curTime)
|
|
107
|
+
self.pipe.execute()
|
|
108
|
+
# 插入mysql
|
|
109
|
+
for raw in self.results:
|
|
110
|
+
self.deal_sql(raw)
|
|
111
|
+
|
|
112
|
+
@ThreadPoolManger.thread_lock
|
|
113
|
+
def deal_sql(self,raw):
|
|
114
|
+
# 插入mysql统计每天获取的代理个数
|
|
115
|
+
sql = "insert into gongwangproxy(proxy,cishu) Values('{}',1) on DUPLICATE key update cishu=cishu+1".format(raw)
|
|
116
|
+
self.pro.mysqlutils_proxy.ExeSqlToDB(sql)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def setProxy(self, proxysList=None):
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
def is_break(self):
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
def thread_pool_hook(self, threadinfo: ThreadInfo):
|
|
127
|
+
# 设置代理线程不重启,默认会重启
|
|
128
|
+
# if threadinfo.get_thread_name() == self.etn.proxythreadname:
|
|
129
|
+
# threadinfo.set_is_restart(True)
|
|
130
|
+
# if threadinfo.get_thread_name() == self.etn.taskthreadname:
|
|
131
|
+
# threadinfo.set_is_restart(False)
|
|
132
|
+
return {}
|
|
133
|
+
|
|
134
|
+
def fun(self, threadval: ThreadVal, *args, **kwargs):
|
|
135
|
+
"""
|
|
136
|
+
验证代理有效性,百度
|
|
137
|
+
"""
|
|
138
|
+
raw = args[0]
|
|
139
|
+
result_queue = threadval.get_result_queue()
|
|
140
|
+
ppp = {
|
|
141
|
+
'http': raw,
|
|
142
|
+
'https': raw
|
|
143
|
+
}
|
|
144
|
+
try:
|
|
145
|
+
url = "https://www.baidu.com/"
|
|
146
|
+
bools, e, r = self.pro.baserequest.base_request(url,
|
|
147
|
+
headers=self.pro.Headers,
|
|
148
|
+
proxies=ppp,
|
|
149
|
+
timeout=5,
|
|
150
|
+
marks=['百度一下,你就知道'])
|
|
151
|
+
if bools:
|
|
152
|
+
result_queue.put(raw)
|
|
153
|
+
except Exception as e:
|
|
154
|
+
print(e)
|
|
155
|
+
|
|
156
|
+
# 验证超星期刊
|
|
157
|
+
# yzurl = "http://qikan.chaoxing.com/mag/infos?mags=ea15bb11cfca2424ae72402ca8461604"
|
|
158
|
+
# mags = "ea15bb11cfca2424ae72402ca8461604"
|
|
159
|
+
# HEADER = {
|
|
160
|
+
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
161
|
+
# 'User-Agent': random.choice(USER_AGENTS),
|
|
162
|
+
# 'Accept-Encoding': 'gzip, deflate',
|
|
163
|
+
# 'Referer': 'http://qikan.chaoxing.com',
|
|
164
|
+
# }
|
|
165
|
+
# HEADER['Referer'] = 'http://qikan.chaoxing.com/mag/infos?mags=' + mags
|
|
166
|
+
# try:
|
|
167
|
+
# BoolResult, errString, r = self.pro.baserequest.base_request(url=yzurl,
|
|
168
|
+
# headers=HEADER,
|
|
169
|
+
# timeout=(5, 10),
|
|
170
|
+
# proxies=ppp,
|
|
171
|
+
# marks=['Fbookright fl'])
|
|
172
|
+
# if BoolResult:
|
|
173
|
+
# result_queue.put(raw)
|
|
174
|
+
# except Exception as e:
|
|
175
|
+
# print(e)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
if __name__ == '__main__':
|
|
179
|
+
# p = ProxyAll()
|
|
180
|
+
# p.get_redis_all()
|
|
181
|
+
yz = YanzhengThreadRun(40)
|
|
182
182
|
yz.run()
|