re-common 10.0.39__py3-none-any.whl → 10.0.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/baselibrary/__init__.py +4 -4
- re_common/baselibrary/baseabs/__init__.py +6 -6
- re_common/baselibrary/baseabs/baseabs.py +26 -26
- re_common/baselibrary/database/mbuilder.py +132 -132
- re_common/baselibrary/database/moudle.py +93 -93
- re_common/baselibrary/database/msqlite3.py +194 -194
- re_common/baselibrary/database/mysql.py +169 -169
- re_common/baselibrary/database/sql_factory.py +26 -26
- re_common/baselibrary/mthread/MThreadingRun.py +486 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
- re_common/baselibrary/mthread/__init__.py +2 -2
- re_common/baselibrary/mthread/mythreading.py +695 -695
- re_common/baselibrary/pakge_other/socks.py +404 -404
- re_common/baselibrary/readconfig/config_factory.py +18 -18
- re_common/baselibrary/readconfig/ini_config.py +317 -317
- re_common/baselibrary/readconfig/toml_config.py +49 -49
- re_common/baselibrary/temporary/envdata.py +36 -36
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
- re_common/baselibrary/tools/contrast_db3.py +123 -123
- re_common/baselibrary/tools/copy_file.py +39 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
- re_common/baselibrary/tools/foreachgz.py +39 -39
- re_common/baselibrary/tools/get_attr.py +10 -10
- re_common/baselibrary/tools/image_to_pdf.py +61 -61
- re_common/baselibrary/tools/java_code_deal.py +139 -139
- re_common/baselibrary/tools/javacode.py +79 -79
- re_common/baselibrary/tools/mdb_db3.py +48 -48
- re_common/baselibrary/tools/merge_file.py +171 -171
- re_common/baselibrary/tools/merge_gz_file.py +165 -165
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
- re_common/baselibrary/tools/mongo_tools.py +50 -50
- re_common/baselibrary/tools/move_file.py +170 -170
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
- re_common/baselibrary/tools/myparsel.py +104 -104
- re_common/baselibrary/tools/rename_dir_file.py +37 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
- re_common/baselibrary/tools/split_line_to_many.py +25 -25
- re_common/baselibrary/tools/stringtodicts.py +33 -33
- re_common/baselibrary/tools/workwechant_bot.py +84 -84
- re_common/baselibrary/utils/baseaiohttp.py +296 -296
- re_common/baselibrary/utils/baseaiomysql.py +87 -87
- re_common/baselibrary/utils/baseallstep.py +191 -191
- re_common/baselibrary/utils/baseavro.py +19 -19
- re_common/baselibrary/utils/baseboto3.py +291 -291
- re_common/baselibrary/utils/basecsv.py +32 -32
- re_common/baselibrary/utils/basedict.py +133 -133
- re_common/baselibrary/utils/basedir.py +241 -241
- re_common/baselibrary/utils/baseencode.py +351 -351
- re_common/baselibrary/utils/baseencoding.py +28 -28
- re_common/baselibrary/utils/baseesdsl.py +86 -86
- re_common/baselibrary/utils/baseexcel.py +264 -264
- re_common/baselibrary/utils/baseexcept.py +109 -109
- re_common/baselibrary/utils/basefile.py +654 -654
- re_common/baselibrary/utils/baseftp.py +214 -214
- re_common/baselibrary/utils/basegzip.py +60 -60
- re_common/baselibrary/utils/basehdfs.py +135 -135
- re_common/baselibrary/utils/basehttpx.py +268 -268
- re_common/baselibrary/utils/baseip.py +87 -87
- re_common/baselibrary/utils/basejson.py +2 -2
- re_common/baselibrary/utils/baselist.py +32 -32
- re_common/baselibrary/utils/basemotor.py +190 -190
- re_common/baselibrary/utils/basemssql.py +98 -98
- re_common/baselibrary/utils/baseodbc.py +113 -113
- re_common/baselibrary/utils/basepandas.py +302 -302
- re_common/baselibrary/utils/basepeewee.py +11 -11
- re_common/baselibrary/utils/basepika.py +180 -180
- re_common/baselibrary/utils/basepydash.py +143 -143
- re_common/baselibrary/utils/basepymongo.py +230 -230
- re_common/baselibrary/utils/basequeue.py +22 -22
- re_common/baselibrary/utils/baserar.py +57 -57
- re_common/baselibrary/utils/baserequest.py +279 -279
- re_common/baselibrary/utils/baseset.py +8 -8
- re_common/baselibrary/utils/basesmb.py +403 -403
- re_common/baselibrary/utils/basestring.py +382 -382
- re_common/baselibrary/utils/basetime.py +320 -320
- re_common/baselibrary/utils/baseurl.py +121 -121
- re_common/baselibrary/utils/basezip.py +57 -57
- re_common/baselibrary/utils/core/__init__.py +7 -7
- re_common/baselibrary/utils/core/bottomutils.py +18 -18
- re_common/baselibrary/utils/core/mdeprecated.py +327 -327
- re_common/baselibrary/utils/core/mlamada.py +16 -16
- re_common/baselibrary/utils/core/msginfo.py +25 -25
- re_common/baselibrary/utils/core/requests_core.py +103 -103
- re_common/baselibrary/utils/fateadm.py +429 -429
- re_common/baselibrary/utils/importfun.py +123 -123
- re_common/baselibrary/utils/mfaker.py +57 -57
- re_common/baselibrary/utils/my_abc/__init__.py +3 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
- re_common/baselibrary/utils/mylogger.py +414 -414
- re_common/baselibrary/utils/myredisclient.py +861 -861
- re_common/baselibrary/utils/pipupgrade.py +21 -21
- re_common/baselibrary/utils/ringlist.py +85 -85
- re_common/baselibrary/utils/version_compare.py +36 -36
- re_common/baselibrary/utils/ydmhttp.py +126 -126
- re_common/facade/lazy_import.py +11 -11
- re_common/facade/loggerfacade.py +25 -25
- re_common/facade/mysqlfacade.py +467 -467
- re_common/facade/now.py +31 -31
- re_common/facade/sqlite3facade.py +257 -257
- re_common/facade/use/mq_use_facade.py +83 -83
- re_common/facade/use/proxy_use_facade.py +19 -19
- re_common/libtest/base_dict_test.py +19 -19
- re_common/libtest/baseavro_test.py +13 -13
- re_common/libtest/basefile_test.py +14 -14
- re_common/libtest/basemssql_test.py +77 -77
- re_common/libtest/baseodbc_test.py +7 -7
- re_common/libtest/basepandas_test.py +38 -38
- re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
- re_common/libtest/get_attr_test/settings.py +54 -54
- re_common/libtest/idencode_test.py +53 -53
- re_common/libtest/iniconfig_test.py +35 -35
- re_common/libtest/ip_test.py +34 -34
- re_common/libtest/merge_file_test.py +20 -20
- re_common/libtest/mfaker_test.py +8 -8
- re_common/libtest/mm3_test.py +31 -31
- re_common/libtest/mylogger_test.py +88 -88
- re_common/libtest/myparsel_test.py +27 -27
- re_common/libtest/mysql_test.py +151 -151
- re_common/libtest/pymongo_test.py +21 -21
- re_common/libtest/split_test.py +11 -11
- re_common/libtest/sqlite3_merge_test.py +5 -5
- re_common/libtest/sqlite3_test.py +34 -34
- re_common/libtest/tomlconfig_test.py +30 -30
- re_common/libtest/use_tools_test/__init__.py +2 -2
- re_common/libtest/user/__init__.py +4 -4
- re_common/studio/__init__.py +4 -4
- re_common/studio/assignment_expressions.py +36 -36
- re_common/studio/mydash/test1.py +18 -18
- re_common/studio/pydashstudio/first.py +9 -9
- re_common/studio/streamlitstudio/first_app.py +65 -65
- re_common/studio/streamlitstudio/uber_pickups.py +23 -23
- re_common/studio/test.py +18 -18
- re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +235 -220
- re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
- re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
- re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
- re_common/v2/baselibrary/decorators/utils.py +59 -59
- re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
- re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
- re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
- re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
- re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
- re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
- re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
- re_common/v2/baselibrary/tools/concurrency.py +35 -35
- re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
- re_common/v2/baselibrary/tools/data_processer/data_processer.py +497 -508
- re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
- re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
- re_common/v2/baselibrary/tools/dict_tools.py +44 -44
- re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
- re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
- re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
- re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
- re_common/v2/baselibrary/tools/list_tools.py +69 -69
- re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
- re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
- re_common/v2/baselibrary/tools/text_matcher.py +326 -326
- re_common/v2/baselibrary/tools/tree_processor/__init__.py +0 -0
- re_common/v2/baselibrary/tools/tree_processor/builder.py +25 -0
- re_common/v2/baselibrary/tools/tree_processor/node.py +13 -0
- re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
- re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
- re_common/v2/baselibrary/utils/author_smi.py +361 -361
- re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
- re_common/v2/baselibrary/utils/basedict.py +37 -37
- re_common/v2/baselibrary/utils/basehdfs.py +163 -163
- re_common/v2/baselibrary/utils/basepika.py +180 -180
- re_common/v2/baselibrary/utils/basetime.py +94 -77
- re_common/v2/baselibrary/utils/db.py +174 -156
- re_common/v2/baselibrary/utils/elasticsearch.py +46 -0
- re_common/v2/baselibrary/utils/json_cls.py +16 -16
- re_common/v2/baselibrary/utils/mq.py +83 -83
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
- re_common/v2/baselibrary/utils/string_bool.py +187 -186
- re_common/v2/baselibrary/utils/string_clear.py +246 -246
- re_common/v2/baselibrary/utils/string_smi.py +18 -18
- re_common/v2/baselibrary/utils/stringutils.py +312 -271
- re_common/vip/base_step_process.py +11 -11
- re_common/vip/baseencodeid.py +90 -90
- re_common/vip/changetaskname.py +28 -28
- re_common/vip/core_var.py +24 -24
- re_common/vip/mmh3Hash.py +89 -89
- re_common/vip/proxy/allproxys.py +127 -127
- re_common/vip/proxy/allproxys_thread.py +159 -159
- re_common/vip/proxy/cnki_proxy.py +153 -153
- re_common/vip/proxy/kuaidaili.py +87 -87
- re_common/vip/proxy/proxy_all.py +113 -113
- re_common/vip/proxy/update_kuaidaili_0.py +42 -42
- re_common/vip/proxy/wanfang_proxy.py +152 -152
- re_common/vip/proxy/wp_proxy_all.py +181 -181
- re_common/vip/read_rawid_to_txt.py +91 -91
- re_common/vip/title/__init__.py +5 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
- re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
- re_common/vip/title/transform/__init__.py +10 -10
- {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/LICENSE +201 -201
- {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/METADATA +16 -16
- re_common-10.0.41.dist-info/RECORD +252 -0
- {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/WHEEL +1 -1
- re_common-10.0.39.dist-info/RECORD +0 -248
- {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/top_level.txt +0 -0
|
@@ -1,153 +1,153 @@
|
|
|
1
|
-
import time
|
|
2
|
-
|
|
3
|
-
###########################################
|
|
4
|
-
# 同项目调用基础包
|
|
5
|
-
import os
|
|
6
|
-
import sys
|
|
7
|
-
|
|
8
|
-
pathlist = os.path.abspath(__file__).split(os.sep)
|
|
9
|
-
root_path = os.sep.join(pathlist[:pathlist.index("re-common") + 1])
|
|
10
|
-
sys.path.insert(0, root_path)
|
|
11
|
-
print(root_path)
|
|
12
|
-
############################################
|
|
13
|
-
from re_common.baselibrary.utils.myredisclient import MyRedis
|
|
14
|
-
|
|
15
|
-
from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun
|
|
16
|
-
from re_common.baselibrary.mthread.mythreading import ThreadInfo, ThreadPoolManger
|
|
17
|
-
from re_common.baselibrary.utils.baserequest import BaseRequest
|
|
18
|
-
|
|
19
|
-
from re_common.baselibrary.utils.core.requests_core import set_proxy
|
|
20
|
-
|
|
21
|
-
from re_common.facade.loggerfacade import get_streamlogger
|
|
22
|
-
|
|
23
|
-
from re_common.facade.mysqlfacade import MysqlUtiles
|
|
24
|
-
from proxy_all import ProxyAll
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class CnkiProxy(object):
|
|
28
|
-
|
|
29
|
-
def __init__(self, config="./db.ini"):
|
|
30
|
-
self.config = config
|
|
31
|
-
self.logger = get_streamlogger()
|
|
32
|
-
self.mysqlutils = MysqlUtiles(self.config, "dbcnki", self.logger)
|
|
33
|
-
self.Headers = {
|
|
34
|
-
'Accept': '*/*',
|
|
35
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
|
36
|
-
}
|
|
37
|
-
self.UserAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
|
38
|
-
self.baserequest = BaseRequest()
|
|
39
|
-
|
|
40
|
-
def checking_proxy(self, proxy):
|
|
41
|
-
url = "https://www.cnki.net/"
|
|
42
|
-
proxies = set_proxy(proxy)
|
|
43
|
-
BoolResult, errString, r = self.baserequest.base_request(url,
|
|
44
|
-
headers=self.Headers,
|
|
45
|
-
proxies=proxies,
|
|
46
|
-
marks=["product-content"],
|
|
47
|
-
timeout=5)
|
|
48
|
-
if BoolResult:
|
|
49
|
-
return proxy
|
|
50
|
-
return ""
|
|
51
|
-
|
|
52
|
-
def get_mysql_proxy(self):
|
|
53
|
-
sql = "SELECT proxy FROM `proxy_pool`"
|
|
54
|
-
bools, rows = self.mysqlutils.SelectFromDB(sql)
|
|
55
|
-
if not bools:
|
|
56
|
-
return set()
|
|
57
|
-
results = set()
|
|
58
|
-
for row in rows:
|
|
59
|
-
results.add(row[0])
|
|
60
|
-
sql = "delete from proxy_pool"
|
|
61
|
-
self.mysqlutils.ExeSqlToDB(sql)
|
|
62
|
-
return results
|
|
63
|
-
|
|
64
|
-
def get_all_proxy(self):
|
|
65
|
-
proxy_set = set()
|
|
66
|
-
proxyall = ProxyAll()
|
|
67
|
-
redisproxy = proxyall.get_redis_all()
|
|
68
|
-
mimvpproxy = proxyall.getProxyFromMimvp(1000)
|
|
69
|
-
daxiangproxy = proxyall.getProxyFromDaxiang(1000)
|
|
70
|
-
xiciproxy1 = proxyall.getProxyFromXICIOnePage(1)
|
|
71
|
-
xiciproxy2 = proxyall.getProxyFromXICIOnePage(2)
|
|
72
|
-
mysqlproxy = self.get_mysql_proxy()
|
|
73
|
-
proxy_set = proxy_set.union(mysqlproxy, redisproxy, mimvpproxy, daxiangproxy, xiciproxy1, xiciproxy2)
|
|
74
|
-
self.logger.info("all proxy size is:{}".format(len(proxy_set)))
|
|
75
|
-
self.proxy_set = proxy_set
|
|
76
|
-
return proxy_set
|
|
77
|
-
|
|
78
|
-
def get_can_use_proxy(self):
|
|
79
|
-
count = 0
|
|
80
|
-
use_proxy = set()
|
|
81
|
-
for proxy in self.proxy_set:
|
|
82
|
-
proxy_ = self.checking_proxy(proxy)
|
|
83
|
-
use_proxy.add(proxy_)
|
|
84
|
-
if len(use_proxy) > 20:
|
|
85
|
-
count = count + 1
|
|
86
|
-
if count == 1:
|
|
87
|
-
sql = "delete from proxy_pool"
|
|
88
|
-
self.mysqlutils.ExeSqlToDB(sql)
|
|
89
|
-
sql = "insert into proxy_pool(proxy) values ('%s')"
|
|
90
|
-
self.mysqlutils.ExeSqlMany(sql, use_proxy)
|
|
91
|
-
use_proxy.clear()
|
|
92
|
-
if len(use_proxy) > 0:
|
|
93
|
-
sql = "insert into proxy_pool(proxy) values ('%s')"
|
|
94
|
-
self.mysqlutils.ExeSqlMany(sql, use_proxy)
|
|
95
|
-
use_proxy.clear()
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
class DetailThreadRun(MThreadingRun):
|
|
99
|
-
def __init__(self, num):
|
|
100
|
-
self.cnki = CnkiProxy()
|
|
101
|
-
super(DetailThreadRun, self).__init__(num)
|
|
102
|
-
self.config = "./db.ini"
|
|
103
|
-
self.myredisset = MyRedis(self.config)
|
|
104
|
-
self.myredisset.set_redis_from_config(sesc="proxysetcnkijournal")
|
|
105
|
-
self.myredisset.conn_redis()
|
|
106
|
-
self.myredisset.get_pipeline()
|
|
107
|
-
|
|
108
|
-
@ThreadPoolManger.thread_lock
|
|
109
|
-
def getTask(self, *args, **kwargs):
|
|
110
|
-
self.myredisset.delete(self.myredisset.RedisKey)
|
|
111
|
-
proxy_set = self.cnki.get_all_proxy()
|
|
112
|
-
return proxy_set
|
|
113
|
-
|
|
114
|
-
def setTask(self, results=None, *args, **kwargs):
|
|
115
|
-
for url_tasks in results:
|
|
116
|
-
# 将每一页加入任务队列
|
|
117
|
-
self.add_job(self.func, url_tasks)
|
|
118
|
-
time.sleep(10 * 60)
|
|
119
|
-
|
|
120
|
-
@ThreadPoolManger.thread_lock
|
|
121
|
-
def dealresult(self, *args, **kwargs):
|
|
122
|
-
sql = "replace into proxy_pool(`proxy`) values (%s)"
|
|
123
|
-
self.cnki.mysqlutils.ExeSqlMany(sql, self.results)
|
|
124
|
-
self.myredisset.sadd(self.myredisset.RedisKey, set(self.results))
|
|
125
|
-
|
|
126
|
-
def setProxy(self, proxysList=None):
|
|
127
|
-
pass
|
|
128
|
-
|
|
129
|
-
def is_break(self):
|
|
130
|
-
return False
|
|
131
|
-
|
|
132
|
-
def thread_pool_hook(self, threadinfo: ThreadInfo):
|
|
133
|
-
# 设置代理线程不重启,默认会重启
|
|
134
|
-
if threadinfo.get_thread_name() == self.etn.proxythreadname:
|
|
135
|
-
threadinfo.set_is_restart(False)
|
|
136
|
-
if threadinfo.get_thread_name() == self.etn.taskthreadname:
|
|
137
|
-
threadinfo.set_is_restart(False)
|
|
138
|
-
return {}
|
|
139
|
-
|
|
140
|
-
def fun(self, threadval, *args, **kwargs):
|
|
141
|
-
standardid = args[0]
|
|
142
|
-
proxys = self.cnki.checking_proxy(standardid)
|
|
143
|
-
if proxys != "":
|
|
144
|
-
threadval.result_queue.put(proxys)
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
def main():
|
|
148
|
-
down = DetailThreadRun(40)
|
|
149
|
-
down.run()
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
if __name__ == "__main__":
|
|
153
|
-
main()
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
###########################################
|
|
4
|
+
# 同项目调用基础包
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
pathlist = os.path.abspath(__file__).split(os.sep)
|
|
9
|
+
root_path = os.sep.join(pathlist[:pathlist.index("re-common") + 1])
|
|
10
|
+
sys.path.insert(0, root_path)
|
|
11
|
+
print(root_path)
|
|
12
|
+
############################################
|
|
13
|
+
from re_common.baselibrary.utils.myredisclient import MyRedis
|
|
14
|
+
|
|
15
|
+
from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun
|
|
16
|
+
from re_common.baselibrary.mthread.mythreading import ThreadInfo, ThreadPoolManger
|
|
17
|
+
from re_common.baselibrary.utils.baserequest import BaseRequest
|
|
18
|
+
|
|
19
|
+
from re_common.baselibrary.utils.core.requests_core import set_proxy
|
|
20
|
+
|
|
21
|
+
from re_common.facade.loggerfacade import get_streamlogger
|
|
22
|
+
|
|
23
|
+
from re_common.facade.mysqlfacade import MysqlUtiles
|
|
24
|
+
from proxy_all import ProxyAll
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CnkiProxy(object):
|
|
28
|
+
|
|
29
|
+
def __init__(self, config="./db.ini"):
|
|
30
|
+
self.config = config
|
|
31
|
+
self.logger = get_streamlogger()
|
|
32
|
+
self.mysqlutils = MysqlUtiles(self.config, "dbcnki", self.logger)
|
|
33
|
+
self.Headers = {
|
|
34
|
+
'Accept': '*/*',
|
|
35
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
|
36
|
+
}
|
|
37
|
+
self.UserAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
|
38
|
+
self.baserequest = BaseRequest()
|
|
39
|
+
|
|
40
|
+
def checking_proxy(self, proxy):
|
|
41
|
+
url = "https://www.cnki.net/"
|
|
42
|
+
proxies = set_proxy(proxy)
|
|
43
|
+
BoolResult, errString, r = self.baserequest.base_request(url,
|
|
44
|
+
headers=self.Headers,
|
|
45
|
+
proxies=proxies,
|
|
46
|
+
marks=["product-content"],
|
|
47
|
+
timeout=5)
|
|
48
|
+
if BoolResult:
|
|
49
|
+
return proxy
|
|
50
|
+
return ""
|
|
51
|
+
|
|
52
|
+
def get_mysql_proxy(self):
|
|
53
|
+
sql = "SELECT proxy FROM `proxy_pool`"
|
|
54
|
+
bools, rows = self.mysqlutils.SelectFromDB(sql)
|
|
55
|
+
if not bools:
|
|
56
|
+
return set()
|
|
57
|
+
results = set()
|
|
58
|
+
for row in rows:
|
|
59
|
+
results.add(row[0])
|
|
60
|
+
sql = "delete from proxy_pool"
|
|
61
|
+
self.mysqlutils.ExeSqlToDB(sql)
|
|
62
|
+
return results
|
|
63
|
+
|
|
64
|
+
def get_all_proxy(self):
|
|
65
|
+
proxy_set = set()
|
|
66
|
+
proxyall = ProxyAll()
|
|
67
|
+
redisproxy = proxyall.get_redis_all()
|
|
68
|
+
mimvpproxy = proxyall.getProxyFromMimvp(1000)
|
|
69
|
+
daxiangproxy = proxyall.getProxyFromDaxiang(1000)
|
|
70
|
+
xiciproxy1 = proxyall.getProxyFromXICIOnePage(1)
|
|
71
|
+
xiciproxy2 = proxyall.getProxyFromXICIOnePage(2)
|
|
72
|
+
mysqlproxy = self.get_mysql_proxy()
|
|
73
|
+
proxy_set = proxy_set.union(mysqlproxy, redisproxy, mimvpproxy, daxiangproxy, xiciproxy1, xiciproxy2)
|
|
74
|
+
self.logger.info("all proxy size is:{}".format(len(proxy_set)))
|
|
75
|
+
self.proxy_set = proxy_set
|
|
76
|
+
return proxy_set
|
|
77
|
+
|
|
78
|
+
def get_can_use_proxy(self):
|
|
79
|
+
count = 0
|
|
80
|
+
use_proxy = set()
|
|
81
|
+
for proxy in self.proxy_set:
|
|
82
|
+
proxy_ = self.checking_proxy(proxy)
|
|
83
|
+
use_proxy.add(proxy_)
|
|
84
|
+
if len(use_proxy) > 20:
|
|
85
|
+
count = count + 1
|
|
86
|
+
if count == 1:
|
|
87
|
+
sql = "delete from proxy_pool"
|
|
88
|
+
self.mysqlutils.ExeSqlToDB(sql)
|
|
89
|
+
sql = "insert into proxy_pool(proxy) values ('%s')"
|
|
90
|
+
self.mysqlutils.ExeSqlMany(sql, use_proxy)
|
|
91
|
+
use_proxy.clear()
|
|
92
|
+
if len(use_proxy) > 0:
|
|
93
|
+
sql = "insert into proxy_pool(proxy) values ('%s')"
|
|
94
|
+
self.mysqlutils.ExeSqlMany(sql, use_proxy)
|
|
95
|
+
use_proxy.clear()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class DetailThreadRun(MThreadingRun):
|
|
99
|
+
def __init__(self, num):
|
|
100
|
+
self.cnki = CnkiProxy()
|
|
101
|
+
super(DetailThreadRun, self).__init__(num)
|
|
102
|
+
self.config = "./db.ini"
|
|
103
|
+
self.myredisset = MyRedis(self.config)
|
|
104
|
+
self.myredisset.set_redis_from_config(sesc="proxysetcnkijournal")
|
|
105
|
+
self.myredisset.conn_redis()
|
|
106
|
+
self.myredisset.get_pipeline()
|
|
107
|
+
|
|
108
|
+
@ThreadPoolManger.thread_lock
|
|
109
|
+
def getTask(self, *args, **kwargs):
|
|
110
|
+
self.myredisset.delete(self.myredisset.RedisKey)
|
|
111
|
+
proxy_set = self.cnki.get_all_proxy()
|
|
112
|
+
return proxy_set
|
|
113
|
+
|
|
114
|
+
def setTask(self, results=None, *args, **kwargs):
|
|
115
|
+
for url_tasks in results:
|
|
116
|
+
# 将每一页加入任务队列
|
|
117
|
+
self.add_job(self.func, url_tasks)
|
|
118
|
+
time.sleep(10 * 60)
|
|
119
|
+
|
|
120
|
+
@ThreadPoolManger.thread_lock
|
|
121
|
+
def dealresult(self, *args, **kwargs):
|
|
122
|
+
sql = "replace into proxy_pool(`proxy`) values (%s)"
|
|
123
|
+
self.cnki.mysqlutils.ExeSqlMany(sql, self.results)
|
|
124
|
+
self.myredisset.sadd(self.myredisset.RedisKey, set(self.results))
|
|
125
|
+
|
|
126
|
+
def setProxy(self, proxysList=None):
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
def is_break(self):
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
def thread_pool_hook(self, threadinfo: ThreadInfo):
|
|
133
|
+
# 设置代理线程不重启,默认会重启
|
|
134
|
+
if threadinfo.get_thread_name() == self.etn.proxythreadname:
|
|
135
|
+
threadinfo.set_is_restart(False)
|
|
136
|
+
if threadinfo.get_thread_name() == self.etn.taskthreadname:
|
|
137
|
+
threadinfo.set_is_restart(False)
|
|
138
|
+
return {}
|
|
139
|
+
|
|
140
|
+
def fun(self, threadval, *args, **kwargs):
|
|
141
|
+
standardid = args[0]
|
|
142
|
+
proxys = self.cnki.checking_proxy(standardid)
|
|
143
|
+
if proxys != "":
|
|
144
|
+
threadval.result_queue.put(proxys)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def main():
|
|
148
|
+
down = DetailThreadRun(40)
|
|
149
|
+
down.run()
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
if __name__ == "__main__":
|
|
153
|
+
main()
|
re_common/vip/proxy/kuaidaili.py
CHANGED
|
@@ -1,87 +1,87 @@
|
|
|
1
|
-
import json
|
|
2
|
-
|
|
3
|
-
###########################################
|
|
4
|
-
# 同项目调用基础包
|
|
5
|
-
import os
|
|
6
|
-
import sys
|
|
7
|
-
import time
|
|
8
|
-
import traceback
|
|
9
|
-
|
|
10
|
-
filepath = os.path.abspath(__file__)
|
|
11
|
-
pathlist = filepath.split(os.sep)
|
|
12
|
-
pathlist = pathlist[:-4]
|
|
13
|
-
TopPath = os.sep.join(pathlist)
|
|
14
|
-
sys.path.insert(0, TopPath)
|
|
15
|
-
print(TopPath)
|
|
16
|
-
############################################
|
|
17
|
-
|
|
18
|
-
from re_common.baselibrary.utils.baserequest import BaseRequest
|
|
19
|
-
from re_common.facade.mysqlfacade import MysqlUtiles
|
|
20
|
-
from re_common.baselibrary.utils.basedir import BaseDir
|
|
21
|
-
from re_common.baselibrary.utils.basefile import BaseFile
|
|
22
|
-
from re_common.facade.lazy_import import get_streamlogger
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class Kproxy(object):
|
|
26
|
-
def __init__(self):
|
|
27
|
-
self.cur_path = BaseDir.get_file_dir_absolute(__file__)
|
|
28
|
-
self.configfile = BaseFile.get_new_path(self.cur_path, "db.ini")
|
|
29
|
-
self.logger = get_streamlogger()
|
|
30
|
-
self.mysqlutils = MysqlUtiles(self.configfile, "dbkuaidaili", self.logger)
|
|
31
|
-
self.url = "https://dps.kdlapi.com/api/getdps/?orderid=990171566857288&num=15&pt=1&format=json&sep=1"
|
|
32
|
-
self.bsrequest = BaseRequest()
|
|
33
|
-
self.starttime = time.time()
|
|
34
|
-
self.starttime_val = time.time()
|
|
35
|
-
|
|
36
|
-
def get_proxy(self):
|
|
37
|
-
self.starttime = time.time()
|
|
38
|
-
BoolResult, errString, r = self.bsrequest.base_request(self.url,
|
|
39
|
-
timeout=30
|
|
40
|
-
)
|
|
41
|
-
if BoolResult:
|
|
42
|
-
dicts = json.loads(r.text)
|
|
43
|
-
for proxy in dicts["data"]["proxy_list"]:
|
|
44
|
-
sql = "insert into kuaidailiproxy (proxy) values ('%s') on DUPLICATE key update stat=1" % proxy
|
|
45
|
-
self.mysqlutils.ExeSqlToDB(sql)
|
|
46
|
-
else:
|
|
47
|
-
self.logger.error("获取失败")
|
|
48
|
-
|
|
49
|
-
def val(self, proxy):
|
|
50
|
-
self.starttime_val = time.time()
|
|
51
|
-
url = f"https://dps.kdlapi.com/api/getdpsvalidtime?orderid=990171566857288&signature=wm4vq53pwrat1vye458elwyxyh9awzqj&proxy={proxy}"
|
|
52
|
-
BoolResult, errString, r = self.bsrequest.base_request(url,
|
|
53
|
-
timeout=30
|
|
54
|
-
)
|
|
55
|
-
if BoolResult:
|
|
56
|
-
dicts = json.loads(r.text)
|
|
57
|
-
if dicts["data"][proxy] > 0:
|
|
58
|
-
sql = "update kuaidailiproxy set val_stat=0 where proxy='%s'" % proxy
|
|
59
|
-
self.mysqlutils.ExeSqlToDB(sql)
|
|
60
|
-
else:
|
|
61
|
-
sql = "update kuaidailiproxy set val_stat=0,stat=0 where proxy='%s'" % proxy
|
|
62
|
-
self.mysqlutils.ExeSqlToDB(sql)
|
|
63
|
-
else:
|
|
64
|
-
self.logger.error("获取失败")
|
|
65
|
-
|
|
66
|
-
def val_all(self):
|
|
67
|
-
self.starttime_val = time.time()
|
|
68
|
-
sql = "select proxy from kuaidailiproxy where stat=1"
|
|
69
|
-
bools, rows = self.mysqlutils.SelectFromDB(sql)
|
|
70
|
-
for row in rows:
|
|
71
|
-
try:
|
|
72
|
-
self.val(row[0])
|
|
73
|
-
except:
|
|
74
|
-
traceback.print_exc()
|
|
75
|
-
|
|
76
|
-
def run(self):
|
|
77
|
-
self.get_proxy()
|
|
78
|
-
while True:
|
|
79
|
-
if int(time.time() - self.starttime) > 20:
|
|
80
|
-
self.get_proxy()
|
|
81
|
-
if int(time.time() - self.starttime_val) > 30:
|
|
82
|
-
self.val_all()
|
|
83
|
-
time.sleep(5)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if __name__ == "__main__":
|
|
87
|
-
Kproxy().run()
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
###########################################
|
|
4
|
+
# 同项目调用基础包
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
import traceback
|
|
9
|
+
|
|
10
|
+
filepath = os.path.abspath(__file__)
|
|
11
|
+
pathlist = filepath.split(os.sep)
|
|
12
|
+
pathlist = pathlist[:-4]
|
|
13
|
+
TopPath = os.sep.join(pathlist)
|
|
14
|
+
sys.path.insert(0, TopPath)
|
|
15
|
+
print(TopPath)
|
|
16
|
+
############################################
|
|
17
|
+
|
|
18
|
+
from re_common.baselibrary.utils.baserequest import BaseRequest
|
|
19
|
+
from re_common.facade.mysqlfacade import MysqlUtiles
|
|
20
|
+
from re_common.baselibrary.utils.basedir import BaseDir
|
|
21
|
+
from re_common.baselibrary.utils.basefile import BaseFile
|
|
22
|
+
from re_common.facade.lazy_import import get_streamlogger
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Kproxy(object):
|
|
26
|
+
def __init__(self):
|
|
27
|
+
self.cur_path = BaseDir.get_file_dir_absolute(__file__)
|
|
28
|
+
self.configfile = BaseFile.get_new_path(self.cur_path, "db.ini")
|
|
29
|
+
self.logger = get_streamlogger()
|
|
30
|
+
self.mysqlutils = MysqlUtiles(self.configfile, "dbkuaidaili", self.logger)
|
|
31
|
+
self.url = "https://dps.kdlapi.com/api/getdps/?orderid=990171566857288&num=15&pt=1&format=json&sep=1"
|
|
32
|
+
self.bsrequest = BaseRequest()
|
|
33
|
+
self.starttime = time.time()
|
|
34
|
+
self.starttime_val = time.time()
|
|
35
|
+
|
|
36
|
+
def get_proxy(self):
|
|
37
|
+
self.starttime = time.time()
|
|
38
|
+
BoolResult, errString, r = self.bsrequest.base_request(self.url,
|
|
39
|
+
timeout=30
|
|
40
|
+
)
|
|
41
|
+
if BoolResult:
|
|
42
|
+
dicts = json.loads(r.text)
|
|
43
|
+
for proxy in dicts["data"]["proxy_list"]:
|
|
44
|
+
sql = "insert into kuaidailiproxy (proxy) values ('%s') on DUPLICATE key update stat=1" % proxy
|
|
45
|
+
self.mysqlutils.ExeSqlToDB(sql)
|
|
46
|
+
else:
|
|
47
|
+
self.logger.error("获取失败")
|
|
48
|
+
|
|
49
|
+
def val(self, proxy):
|
|
50
|
+
self.starttime_val = time.time()
|
|
51
|
+
url = f"https://dps.kdlapi.com/api/getdpsvalidtime?orderid=990171566857288&signature=wm4vq53pwrat1vye458elwyxyh9awzqj&proxy={proxy}"
|
|
52
|
+
BoolResult, errString, r = self.bsrequest.base_request(url,
|
|
53
|
+
timeout=30
|
|
54
|
+
)
|
|
55
|
+
if BoolResult:
|
|
56
|
+
dicts = json.loads(r.text)
|
|
57
|
+
if dicts["data"][proxy] > 0:
|
|
58
|
+
sql = "update kuaidailiproxy set val_stat=0 where proxy='%s'" % proxy
|
|
59
|
+
self.mysqlutils.ExeSqlToDB(sql)
|
|
60
|
+
else:
|
|
61
|
+
sql = "update kuaidailiproxy set val_stat=0,stat=0 where proxy='%s'" % proxy
|
|
62
|
+
self.mysqlutils.ExeSqlToDB(sql)
|
|
63
|
+
else:
|
|
64
|
+
self.logger.error("获取失败")
|
|
65
|
+
|
|
66
|
+
def val_all(self):
|
|
67
|
+
self.starttime_val = time.time()
|
|
68
|
+
sql = "select proxy from kuaidailiproxy where stat=1"
|
|
69
|
+
bools, rows = self.mysqlutils.SelectFromDB(sql)
|
|
70
|
+
for row in rows:
|
|
71
|
+
try:
|
|
72
|
+
self.val(row[0])
|
|
73
|
+
except:
|
|
74
|
+
traceback.print_exc()
|
|
75
|
+
|
|
76
|
+
def run(self):
|
|
77
|
+
self.get_proxy()
|
|
78
|
+
while True:
|
|
79
|
+
if int(time.time() - self.starttime) > 20:
|
|
80
|
+
self.get_proxy()
|
|
81
|
+
if int(time.time() - self.starttime_val) > 30:
|
|
82
|
+
self.val_all()
|
|
83
|
+
time.sleep(5)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
Kproxy().run()
|