re-common 10.0.39__py3-none-any.whl → 10.0.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/baselibrary/__init__.py +4 -4
- re_common/baselibrary/baseabs/__init__.py +6 -6
- re_common/baselibrary/baseabs/baseabs.py +26 -26
- re_common/baselibrary/database/mbuilder.py +132 -132
- re_common/baselibrary/database/moudle.py +93 -93
- re_common/baselibrary/database/msqlite3.py +194 -194
- re_common/baselibrary/database/mysql.py +169 -169
- re_common/baselibrary/database/sql_factory.py +26 -26
- re_common/baselibrary/mthread/MThreadingRun.py +486 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
- re_common/baselibrary/mthread/__init__.py +2 -2
- re_common/baselibrary/mthread/mythreading.py +695 -695
- re_common/baselibrary/pakge_other/socks.py +404 -404
- re_common/baselibrary/readconfig/config_factory.py +18 -18
- re_common/baselibrary/readconfig/ini_config.py +317 -317
- re_common/baselibrary/readconfig/toml_config.py +49 -49
- re_common/baselibrary/temporary/envdata.py +36 -36
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
- re_common/baselibrary/tools/contrast_db3.py +123 -123
- re_common/baselibrary/tools/copy_file.py +39 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
- re_common/baselibrary/tools/foreachgz.py +39 -39
- re_common/baselibrary/tools/get_attr.py +10 -10
- re_common/baselibrary/tools/image_to_pdf.py +61 -61
- re_common/baselibrary/tools/java_code_deal.py +139 -139
- re_common/baselibrary/tools/javacode.py +79 -79
- re_common/baselibrary/tools/mdb_db3.py +48 -48
- re_common/baselibrary/tools/merge_file.py +171 -171
- re_common/baselibrary/tools/merge_gz_file.py +165 -165
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
- re_common/baselibrary/tools/mongo_tools.py +50 -50
- re_common/baselibrary/tools/move_file.py +170 -170
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
- re_common/baselibrary/tools/myparsel.py +104 -104
- re_common/baselibrary/tools/rename_dir_file.py +37 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
- re_common/baselibrary/tools/split_line_to_many.py +25 -25
- re_common/baselibrary/tools/stringtodicts.py +33 -33
- re_common/baselibrary/tools/workwechant_bot.py +84 -84
- re_common/baselibrary/utils/baseaiohttp.py +296 -296
- re_common/baselibrary/utils/baseaiomysql.py +87 -87
- re_common/baselibrary/utils/baseallstep.py +191 -191
- re_common/baselibrary/utils/baseavro.py +19 -19
- re_common/baselibrary/utils/baseboto3.py +291 -291
- re_common/baselibrary/utils/basecsv.py +32 -32
- re_common/baselibrary/utils/basedict.py +133 -133
- re_common/baselibrary/utils/basedir.py +241 -241
- re_common/baselibrary/utils/baseencode.py +351 -351
- re_common/baselibrary/utils/baseencoding.py +28 -28
- re_common/baselibrary/utils/baseesdsl.py +86 -86
- re_common/baselibrary/utils/baseexcel.py +264 -264
- re_common/baselibrary/utils/baseexcept.py +109 -109
- re_common/baselibrary/utils/basefile.py +654 -654
- re_common/baselibrary/utils/baseftp.py +214 -214
- re_common/baselibrary/utils/basegzip.py +60 -60
- re_common/baselibrary/utils/basehdfs.py +135 -135
- re_common/baselibrary/utils/basehttpx.py +268 -268
- re_common/baselibrary/utils/baseip.py +87 -87
- re_common/baselibrary/utils/basejson.py +2 -2
- re_common/baselibrary/utils/baselist.py +32 -32
- re_common/baselibrary/utils/basemotor.py +190 -190
- re_common/baselibrary/utils/basemssql.py +98 -98
- re_common/baselibrary/utils/baseodbc.py +113 -113
- re_common/baselibrary/utils/basepandas.py +302 -302
- re_common/baselibrary/utils/basepeewee.py +11 -11
- re_common/baselibrary/utils/basepika.py +180 -180
- re_common/baselibrary/utils/basepydash.py +143 -143
- re_common/baselibrary/utils/basepymongo.py +230 -230
- re_common/baselibrary/utils/basequeue.py +22 -22
- re_common/baselibrary/utils/baserar.py +57 -57
- re_common/baselibrary/utils/baserequest.py +279 -279
- re_common/baselibrary/utils/baseset.py +8 -8
- re_common/baselibrary/utils/basesmb.py +403 -403
- re_common/baselibrary/utils/basestring.py +382 -382
- re_common/baselibrary/utils/basetime.py +320 -320
- re_common/baselibrary/utils/baseurl.py +121 -121
- re_common/baselibrary/utils/basezip.py +57 -57
- re_common/baselibrary/utils/core/__init__.py +7 -7
- re_common/baselibrary/utils/core/bottomutils.py +18 -18
- re_common/baselibrary/utils/core/mdeprecated.py +327 -327
- re_common/baselibrary/utils/core/mlamada.py +16 -16
- re_common/baselibrary/utils/core/msginfo.py +25 -25
- re_common/baselibrary/utils/core/requests_core.py +103 -103
- re_common/baselibrary/utils/fateadm.py +429 -429
- re_common/baselibrary/utils/importfun.py +123 -123
- re_common/baselibrary/utils/mfaker.py +57 -57
- re_common/baselibrary/utils/my_abc/__init__.py +3 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
- re_common/baselibrary/utils/mylogger.py +414 -414
- re_common/baselibrary/utils/myredisclient.py +861 -861
- re_common/baselibrary/utils/pipupgrade.py +21 -21
- re_common/baselibrary/utils/ringlist.py +85 -85
- re_common/baselibrary/utils/version_compare.py +36 -36
- re_common/baselibrary/utils/ydmhttp.py +126 -126
- re_common/facade/lazy_import.py +11 -11
- re_common/facade/loggerfacade.py +25 -25
- re_common/facade/mysqlfacade.py +467 -467
- re_common/facade/now.py +31 -31
- re_common/facade/sqlite3facade.py +257 -257
- re_common/facade/use/mq_use_facade.py +83 -83
- re_common/facade/use/proxy_use_facade.py +19 -19
- re_common/libtest/base_dict_test.py +19 -19
- re_common/libtest/baseavro_test.py +13 -13
- re_common/libtest/basefile_test.py +14 -14
- re_common/libtest/basemssql_test.py +77 -77
- re_common/libtest/baseodbc_test.py +7 -7
- re_common/libtest/basepandas_test.py +38 -38
- re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
- re_common/libtest/get_attr_test/settings.py +54 -54
- re_common/libtest/idencode_test.py +53 -53
- re_common/libtest/iniconfig_test.py +35 -35
- re_common/libtest/ip_test.py +34 -34
- re_common/libtest/merge_file_test.py +20 -20
- re_common/libtest/mfaker_test.py +8 -8
- re_common/libtest/mm3_test.py +31 -31
- re_common/libtest/mylogger_test.py +88 -88
- re_common/libtest/myparsel_test.py +27 -27
- re_common/libtest/mysql_test.py +151 -151
- re_common/libtest/pymongo_test.py +21 -21
- re_common/libtest/split_test.py +11 -11
- re_common/libtest/sqlite3_merge_test.py +5 -5
- re_common/libtest/sqlite3_test.py +34 -34
- re_common/libtest/tomlconfig_test.py +30 -30
- re_common/libtest/use_tools_test/__init__.py +2 -2
- re_common/libtest/user/__init__.py +4 -4
- re_common/studio/__init__.py +4 -4
- re_common/studio/assignment_expressions.py +36 -36
- re_common/studio/mydash/test1.py +18 -18
- re_common/studio/pydashstudio/first.py +9 -9
- re_common/studio/streamlitstudio/first_app.py +65 -65
- re_common/studio/streamlitstudio/uber_pickups.py +23 -23
- re_common/studio/test.py +18 -18
- re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +219 -219
- re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
- re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
- re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
- re_common/v2/baselibrary/decorators/utils.py +59 -59
- re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
- re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
- re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
- re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
- re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
- re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
- re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
- re_common/v2/baselibrary/tools/concurrency.py +35 -35
- re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
- re_common/v2/baselibrary/tools/data_processer/data_processer.py +508 -508
- re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
- re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
- re_common/v2/baselibrary/tools/dict_tools.py +44 -44
- re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
- re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
- re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
- re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
- re_common/v2/baselibrary/tools/list_tools.py +69 -69
- re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
- re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
- re_common/v2/baselibrary/tools/text_matcher.py +326 -326
- re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
- re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
- re_common/v2/baselibrary/utils/author_smi.py +361 -361
- re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
- re_common/v2/baselibrary/utils/basedict.py +37 -37
- re_common/v2/baselibrary/utils/basehdfs.py +163 -163
- re_common/v2/baselibrary/utils/basepika.py +180 -180
- re_common/v2/baselibrary/utils/basetime.py +77 -77
- re_common/v2/baselibrary/utils/db.py +156 -156
- re_common/v2/baselibrary/utils/elasticsearch.py +46 -0
- re_common/v2/baselibrary/utils/json_cls.py +16 -16
- re_common/v2/baselibrary/utils/mq.py +83 -83
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
- re_common/v2/baselibrary/utils/string_bool.py +186 -186
- re_common/v2/baselibrary/utils/string_clear.py +246 -246
- re_common/v2/baselibrary/utils/string_smi.py +18 -18
- re_common/v2/baselibrary/utils/stringutils.py +271 -271
- re_common/vip/base_step_process.py +11 -11
- re_common/vip/baseencodeid.py +90 -90
- re_common/vip/changetaskname.py +28 -28
- re_common/vip/core_var.py +24 -24
- re_common/vip/mmh3Hash.py +89 -89
- re_common/vip/proxy/allproxys.py +127 -127
- re_common/vip/proxy/allproxys_thread.py +159 -159
- re_common/vip/proxy/cnki_proxy.py +153 -153
- re_common/vip/proxy/kuaidaili.py +87 -87
- re_common/vip/proxy/proxy_all.py +113 -113
- re_common/vip/proxy/update_kuaidaili_0.py +42 -42
- re_common/vip/proxy/wanfang_proxy.py +152 -152
- re_common/vip/proxy/wp_proxy_all.py +181 -181
- re_common/vip/read_rawid_to_txt.py +91 -91
- re_common/vip/title/__init__.py +5 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
- re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
- re_common/vip/title/transform/__init__.py +10 -10
- {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/LICENSE +201 -201
- {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/METADATA +24 -16
- re_common-10.0.40.dist-info/RECORD +249 -0
- {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/WHEEL +1 -1
- re_common-10.0.39.dist-info/RECORD +0 -248
- {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/top_level.txt +0 -0
|
@@ -1,354 +1,354 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import copy
|
|
3
|
-
import datetime
|
|
4
|
-
import json
|
|
5
|
-
import time
|
|
6
|
-
|
|
7
|
-
from pymongo.errors import DuplicateKeyError
|
|
8
|
-
import traceback
|
|
9
|
-
|
|
10
|
-
from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun2
|
|
11
|
-
from re_common.baselibrary.mthread.mythreading import ThreadVal, ThreadInfo
|
|
12
|
-
from re_common.baselibrary.utils.basefile import BaseFile
|
|
13
|
-
from re_common.baselibrary.utils.basemotor import BaseMotor
|
|
14
|
-
from re_common.baselibrary.utils.basepymongo import BasePyMongo
|
|
15
|
-
from re_common.facade.sqlite3facade import Sqlite3Utiles
|
|
16
|
-
from re_common.facade.use.mq_use_facade import UseMq
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class Configs(object):
|
|
20
|
-
|
|
21
|
-
def __init__(self):
|
|
22
|
-
self.db3_path = r"F:\fun2\test_images.db3"
|
|
23
|
-
self.db3_encoding = "utf-8"
|
|
24
|
-
self.mgdb_conn = "mongodb://192.168.31.30:32417/"
|
|
25
|
-
self.mgdb_conn_motor = "mongodb://192.168.31.30:32417/htmljson.wanfang_ref?authSource=htmljson"
|
|
26
|
-
self.mgdb_db = "htmljson"
|
|
27
|
-
self.mgdb_col = "wanfang_ref"
|
|
28
|
-
|
|
29
|
-
self.mgdb_conn2_motor = "mongodb://cjrw:vipdatacenter@192.168.31.243:32920,192.168.31.206:32920,192.168.31.208:32920/?authSource=htmljson"
|
|
30
|
-
self.mgdb_db2 = "htmljson"
|
|
31
|
-
self.mgdb_col2 = "wanfang_ref"
|
|
32
|
-
|
|
33
|
-
self.mq_name = "mongodb.move.send"
|
|
34
|
-
self.mq_name_work = "mongodb.move.worker"
|
|
35
|
-
|
|
36
|
-
self.error_dir = r"F:\fun2\log"
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class MoveMongodbColl(object):
|
|
40
|
-
def __init__(self, conf):
|
|
41
|
-
self.conf = conf
|
|
42
|
-
self.first_id = ""
|
|
43
|
-
self.id_list = []
|
|
44
|
-
self.recv_list = []
|
|
45
|
-
|
|
46
|
-
def init_conn_mongodb(self):
|
|
47
|
-
self.basemongo = BasePyMongo(self.conf.mgdb_conn)
|
|
48
|
-
self.basemongo.use_db(self.conf.mgdb_db)
|
|
49
|
-
self.basemongo.create_col(self.conf.mgdb_col)
|
|
50
|
-
|
|
51
|
-
self.bs = BaseMotor()
|
|
52
|
-
self.bs.AsyncIOMotorClient(
|
|
53
|
-
self.conf.mgdb_conn_motor,
|
|
54
|
-
self.conf.mgdb_db)
|
|
55
|
-
self.bs.get_col(self.conf.mgdb_col)
|
|
56
|
-
|
|
57
|
-
self.bs2 = BaseMotor()
|
|
58
|
-
self.bs2.AsyncIOMotorClient(
|
|
59
|
-
self.conf.mgdb_conn2_motor,
|
|
60
|
-
self.conf.mgdb_db2)
|
|
61
|
-
self.bs2.get_col(self.conf.mgdb_col2)
|
|
62
|
-
|
|
63
|
-
def create_db3_table(self):
|
|
64
|
-
"""
|
|
65
|
-
创建表
|
|
66
|
-
:return:
|
|
67
|
-
"""
|
|
68
|
-
sql1 = "PRAGMA foreign_keys = false;"
|
|
69
|
-
sql2 = 'DROP TABLE IF EXISTS "cxids";'
|
|
70
|
-
sql3 = 'CREATE TABLE "cxids" ("ids" TEXT NOT NULL,"stat" integer NOT NULL DEFAULT 0,PRIMARY KEY ("ids"));'
|
|
71
|
-
sql4 = 'PRAGMA foreign_keys = true;'
|
|
72
|
-
self.db3.ExeSqlliteList([sql1, sql2, sql3, sql4])
|
|
73
|
-
|
|
74
|
-
def init_db3(self):
|
|
75
|
-
self.db3 = Sqlite3Utiles().Sqlite3DBConnectFromFilePath(self.conf.db3_path, encoding=self.conf.db3_encoding)
|
|
76
|
-
|
|
77
|
-
def init_mq(self):
|
|
78
|
-
self.use_send = UseMq(self.conf.mq_name)
|
|
79
|
-
self.use_work = UseMq(self.conf.mq_name_work)
|
|
80
|
-
|
|
81
|
-
def send_list(self):
|
|
82
|
-
while True:
|
|
83
|
-
if self.use_send.get_server_mq_num(10000):
|
|
84
|
-
for i in self.id_list:
|
|
85
|
-
dict_info = {
|
|
86
|
-
'_id': i
|
|
87
|
-
}
|
|
88
|
-
info_str = json.dumps(dict_info)
|
|
89
|
-
print(info_str)
|
|
90
|
-
self.use_send.easy_send_mq(info_str)
|
|
91
|
-
self.id_list.clear()
|
|
92
|
-
break
|
|
93
|
-
else:
|
|
94
|
-
time.sleep(1)
|
|
95
|
-
|
|
96
|
-
def send_db3(self):
|
|
97
|
-
while True:
|
|
98
|
-
sql = 'select * from cxids where stat=0 limit 20000'
|
|
99
|
-
rows = self.db3.SelectFromSqlliteFetchall(sql)
|
|
100
|
-
if len(rows) == 0:
|
|
101
|
-
print('查询结束 0 状态结束 查询-1状态 time sleep 60s')
|
|
102
|
-
time.sleep(60)
|
|
103
|
-
sql = 'select * from cxids where stat=-1 limit 20000'
|
|
104
|
-
rows = self.db3.SelectFromSqlliteFetchall(sql)
|
|
105
|
-
if len(rows) == 0:
|
|
106
|
-
print('查询结束 -1 状态结束 结束发送')
|
|
107
|
-
break
|
|
108
|
-
for row in rows:
|
|
109
|
-
_id = row[0]
|
|
110
|
-
self.id_list.append(_id)
|
|
111
|
-
if len(self.id_list) >= 10000:
|
|
112
|
-
sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
|
|
113
|
-
self.db3.ExeSqlliteSql(sql)
|
|
114
|
-
self.send_list()
|
|
115
|
-
|
|
116
|
-
if len(self.id_list) > 1:
|
|
117
|
-
sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
|
|
118
|
-
self.db3.ExeSqlliteSql(sql)
|
|
119
|
-
self.send_list()
|
|
120
|
-
|
|
121
|
-
if len(self.id_list) == 1:
|
|
122
|
-
sql = "update cxids set stat = -1 where ids='{}'".format(self.id_list[0])
|
|
123
|
-
self.db3.ExeSqlliteSql(sql)
|
|
124
|
-
self.send_list()
|
|
125
|
-
|
|
126
|
-
def callback2(self, ch, method, properties, body):
|
|
127
|
-
json_data = json.loads(body)
|
|
128
|
-
_id = json_data['_id']
|
|
129
|
-
self.recv_list.append(_id)
|
|
130
|
-
if len(self.recv_list) >= 500:
|
|
131
|
-
sql = "update cxids set stat = 1 where ids in {}".format(tuple(self.recv_list))
|
|
132
|
-
if self.db3.ExeSqlliteSql(sql):
|
|
133
|
-
self.recv_list.clear()
|
|
134
|
-
else:
|
|
135
|
-
print('[{}]未更新stat条数{}'.format(datetime.datetime.now(), len(self.recv_list)))
|
|
136
|
-
|
|
137
|
-
def recv(self, results=None, *args, **kwargs):
|
|
138
|
-
self.use_work.callback2 = self.callback2
|
|
139
|
-
self.use_work.get_mq()
|
|
140
|
-
|
|
141
|
-
def get_first_mongo_id(self):
|
|
142
|
-
for i in self.basemongo.find({"_id": {"$gt": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(1):
|
|
143
|
-
self.first_id = i["_id"]
|
|
144
|
-
print("first_id is:" + self.first_id)
|
|
145
|
-
|
|
146
|
-
def init_data_db3(self):
|
|
147
|
-
c = 0
|
|
148
|
-
c1 = -1
|
|
149
|
-
while True:
|
|
150
|
-
lists = []
|
|
151
|
-
for i in self.basemongo.find({"_id": {"$gte": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(
|
|
152
|
-
1000000):
|
|
153
|
-
c = c + 1
|
|
154
|
-
self.first_id = i["_id"]
|
|
155
|
-
lists.append((i["_id"], 0))
|
|
156
|
-
if c % 10000 == 1:
|
|
157
|
-
print(len(lists))
|
|
158
|
-
|
|
159
|
-
sql = "insert or ignore into cxids(`ids`,`stat`) values (?,?)"
|
|
160
|
-
self.db3.ExeSqlliteMany(sql, lists)
|
|
161
|
-
print(c)
|
|
162
|
-
if c1 == c:
|
|
163
|
-
break
|
|
164
|
-
if len(lists) == 1:
|
|
165
|
-
break
|
|
166
|
-
c1 = c
|
|
167
|
-
lists.clear()
|
|
168
|
-
|
|
169
|
-
def one_init(self):
|
|
170
|
-
"""
|
|
171
|
-
第一步 初始化id数据到db3目录
|
|
172
|
-
:return:
|
|
173
|
-
"""
|
|
174
|
-
self.init_conn_mongodb()
|
|
175
|
-
self.init_db3()
|
|
176
|
-
self.create_db3_table()
|
|
177
|
-
self.get_first_mongo_id()
|
|
178
|
-
self.init_data_db3()
|
|
179
|
-
|
|
180
|
-
def two_send(self):
|
|
181
|
-
"""
|
|
182
|
-
分布式的send方法
|
|
183
|
-
:return:
|
|
184
|
-
"""
|
|
185
|
-
self.init_db3()
|
|
186
|
-
self.init_mq()
|
|
187
|
-
self.send_db3()
|
|
188
|
-
|
|
189
|
-
def two_recv(self):
|
|
190
|
-
self.init_db3()
|
|
191
|
-
self.init_mq()
|
|
192
|
-
self.recv()
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
class MoveMongodbThreadRun(MThreadingRun2):
|
|
196
|
-
def __init__(self, num, conf):
|
|
197
|
-
super(MoveMongodbThreadRun, self).__init__(num)
|
|
198
|
-
self.thread_pool.work_queue.set_size(10)
|
|
199
|
-
self.loop = asyncio.new_event_loop()
|
|
200
|
-
asyncio.set_event_loop(self.loop)
|
|
201
|
-
|
|
202
|
-
self.mvmc = MoveMongodbColl(conf)
|
|
203
|
-
self.mvmc.init_mq()
|
|
204
|
-
self.mvmc.init_conn_mongodb()
|
|
205
|
-
self.mvmc.use_send.callback2 = self.callback2
|
|
206
|
-
self.lists = []
|
|
207
|
-
self.info_list = []
|
|
208
|
-
self.is_many_move = False
|
|
209
|
-
# 配置二维数组的每组数据数量,如果为批量转移 建议设置为10000
|
|
210
|
-
self.num_list = 100
|
|
211
|
-
self.num_info_list = 10
|
|
212
|
-
|
|
213
|
-
def callback2(self, ch, method, properties, body):
|
|
214
|
-
json_data = json.loads(body.decode())
|
|
215
|
-
# self.add_job(self.func, dicts)
|
|
216
|
-
self.lists.append((json_data))
|
|
217
|
-
# 异步需要 10 * 100 的二维list
|
|
218
|
-
if self.is_many_move:
|
|
219
|
-
work_size = self.thread_pool.work_queue.get_size()
|
|
220
|
-
if work_size >= 3:
|
|
221
|
-
time.sleep(10)
|
|
222
|
-
if len(self.lists) >= self.num_list:
|
|
223
|
-
self.info_list.append(copy.deepcopy(self.lists))
|
|
224
|
-
self.lists.clear()
|
|
225
|
-
print(len(self.info_list))
|
|
226
|
-
|
|
227
|
-
if len(self.info_list) >= self.num_info_list:
|
|
228
|
-
self.add_job(self.func, copy.deepcopy(self.info_list))
|
|
229
|
-
self.info_list.clear()
|
|
230
|
-
|
|
231
|
-
def set_task(self, threadval: ThreadVal, *args, **kwargs):
|
|
232
|
-
self.mvmc.use_send.get_mq()
|
|
233
|
-
|
|
234
|
-
def deal_results(self, threadval: ThreadVal, *args, **kwargs):
|
|
235
|
-
result_queue = threadval.get_result_queue()
|
|
236
|
-
while True:
|
|
237
|
-
while not result_queue.is_empty():
|
|
238
|
-
result = result_queue.get()
|
|
239
|
-
t_1, t_2 = result
|
|
240
|
-
if t_1 == "err":
|
|
241
|
-
file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_parse_2.txt")
|
|
242
|
-
BaseFile.single_add_file(file_path, t_2 + '\n')
|
|
243
|
-
if t_1 == "err_Exception":
|
|
244
|
-
file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_Exception_2.txt")
|
|
245
|
-
BaseFile.single_add_file(file_path, t_2 + '\n')
|
|
246
|
-
if t_1 == 'right':
|
|
247
|
-
self.send_update_info(t_2)
|
|
248
|
-
self.thread_pool.result_queue.task_done()
|
|
249
|
-
time.sleep(1)
|
|
250
|
-
|
|
251
|
-
def send_update_info(self, _id):
|
|
252
|
-
dict_info = {
|
|
253
|
-
"_id": _id,
|
|
254
|
-
}
|
|
255
|
-
info_str = json.dumps(dict_info)
|
|
256
|
-
while True:
|
|
257
|
-
if self.mvmc.use_work.send_mq(info_str, num=10000):
|
|
258
|
-
break
|
|
259
|
-
time.sleep(1)
|
|
260
|
-
|
|
261
|
-
def setProxy(self, threadval: ThreadVal, proxysList=None):
|
|
262
|
-
time.sleep(60)
|
|
263
|
-
|
|
264
|
-
def is_break(self):
|
|
265
|
-
return False
|
|
266
|
-
|
|
267
|
-
def thread_pool_hook(self, threadinfo: ThreadInfo):
|
|
268
|
-
# 设置代理线程不重启,默认会重启
|
|
269
|
-
if threadinfo.get_thread_name() == self.etn.proxythreadname:
|
|
270
|
-
threadinfo.set_is_restart(False)
|
|
271
|
-
# if threadinfo.get_thread_name() == self.etn.taskthreadname:
|
|
272
|
-
# threadinfo.set_is_restart(False)
|
|
273
|
-
return {}
|
|
274
|
-
|
|
275
|
-
def doc_hook(self, item):
|
|
276
|
-
return item
|
|
277
|
-
|
|
278
|
-
async def par_html(self, result_queue, lists):
|
|
279
|
-
for info in lists:
|
|
280
|
-
_id = info["_id"]
|
|
281
|
-
try:
|
|
282
|
-
try:
|
|
283
|
-
# 不存在就插入
|
|
284
|
-
|
|
285
|
-
item = await self.mvmc.bs.select_one({"_id": _id})
|
|
286
|
-
item_result = self.doc_hook(item)
|
|
287
|
-
await self.mvmc.bs2.insert_one(item_result)
|
|
288
|
-
print("{}插入".format(_id))
|
|
289
|
-
result_queue.put(("right", _id))
|
|
290
|
-
except DuplicateKeyError as e:
|
|
291
|
-
print("{}存在".format(_id))
|
|
292
|
-
result_queue.put(("right", _id))
|
|
293
|
-
except Exception as e:
|
|
294
|
-
traceback.print_exc()
|
|
295
|
-
result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
|
|
296
|
-
except Exception as e:
|
|
297
|
-
traceback.print_exc()
|
|
298
|
-
result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
|
|
299
|
-
|
|
300
|
-
async def par_html_many(self, result_queue, lists):
|
|
301
|
-
insert_list = []
|
|
302
|
-
for i in range(0, len(lists), 500):
|
|
303
|
-
lists_item = lists[i:i + 500]
|
|
304
|
-
try:
|
|
305
|
-
try:
|
|
306
|
-
def deal_dicts(dd):
|
|
307
|
-
return dd["_id"]
|
|
308
|
-
lists_item = list(map(deal_dicts, lists_item))
|
|
309
|
-
# 不存在就插入
|
|
310
|
-
docs = await self.mvmc.bs.select({"_id": {"$in": lists_item}})
|
|
311
|
-
for item in docs:
|
|
312
|
-
item_result = self.doc_hook(item)
|
|
313
|
-
insert_list.append(item_result)
|
|
314
|
-
print("获取mongo num:" + str(len(insert_list)))
|
|
315
|
-
if len(insert_list) >= 100:
|
|
316
|
-
start_time = time.time()
|
|
317
|
-
result = await self.mvmc.bs2.insert_many(insert_list)
|
|
318
|
-
print("百条数据插入时间:" + str(time.time() - start_time))
|
|
319
|
-
for _id in result.inserted_ids:
|
|
320
|
-
print("{}插入".format(_id))
|
|
321
|
-
result_queue.put(("right", _id))
|
|
322
|
-
insert_list.clear()
|
|
323
|
-
except DuplicateKeyError as e:
|
|
324
|
-
print("有数据存在,无法插入")
|
|
325
|
-
|
|
326
|
-
except Exception as e:
|
|
327
|
-
traceback.print_exc()
|
|
328
|
-
result_queue.put(("err_Exception", traceback.format_exc()))
|
|
329
|
-
except Exception as e:
|
|
330
|
-
traceback.print_exc()
|
|
331
|
-
result_queue.put(("err_Exception", traceback.format_exc()))
|
|
332
|
-
|
|
333
|
-
if len(insert_list) > 0:
|
|
334
|
-
try:
|
|
335
|
-
result = await self.mvmc.bs2.insert_many(insert_list)
|
|
336
|
-
for _id in result.inserted_ids:
|
|
337
|
-
print("{}插入".format(_id))
|
|
338
|
-
result_queue.put(("right", _id))
|
|
339
|
-
insert_list.clear()
|
|
340
|
-
except DuplicateKeyError as e:
|
|
341
|
-
print("存在某个key 批量插入失败")
|
|
342
|
-
except Exception as e:
|
|
343
|
-
traceback.print_exc()
|
|
344
|
-
result_queue.put(("err_Exception", traceback.format_exc()))
|
|
345
|
-
|
|
346
|
-
def fun(self, threadval, *args, **kwargs):
|
|
347
|
-
result_queue = threadval.get_result_queue()
|
|
348
|
-
func_list = []
|
|
349
|
-
for lists in args[0]:
|
|
350
|
-
if self.is_many_move:
|
|
351
|
-
func_list.append(self.par_html_many(result_queue, lists))
|
|
352
|
-
else:
|
|
353
|
-
func_list.append(self.par_html(result_queue, lists))
|
|
354
|
-
self.loop.run_until_complete(asyncio.wait(func_list))
|
|
1
|
+
import asyncio
|
|
2
|
+
import copy
|
|
3
|
+
import datetime
|
|
4
|
+
import json
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from pymongo.errors import DuplicateKeyError
|
|
8
|
+
import traceback
|
|
9
|
+
|
|
10
|
+
from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun2
|
|
11
|
+
from re_common.baselibrary.mthread.mythreading import ThreadVal, ThreadInfo
|
|
12
|
+
from re_common.baselibrary.utils.basefile import BaseFile
|
|
13
|
+
from re_common.baselibrary.utils.basemotor import BaseMotor
|
|
14
|
+
from re_common.baselibrary.utils.basepymongo import BasePyMongo
|
|
15
|
+
from re_common.facade.sqlite3facade import Sqlite3Utiles
|
|
16
|
+
from re_common.facade.use.mq_use_facade import UseMq
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Configs(object):
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.db3_path = r"F:\fun2\test_images.db3"
|
|
23
|
+
self.db3_encoding = "utf-8"
|
|
24
|
+
self.mgdb_conn = "mongodb://192.168.31.30:32417/"
|
|
25
|
+
self.mgdb_conn_motor = "mongodb://192.168.31.30:32417/htmljson.wanfang_ref?authSource=htmljson"
|
|
26
|
+
self.mgdb_db = "htmljson"
|
|
27
|
+
self.mgdb_col = "wanfang_ref"
|
|
28
|
+
|
|
29
|
+
self.mgdb_conn2_motor = "mongodb://cjrw:vipdatacenter@192.168.31.243:32920,192.168.31.206:32920,192.168.31.208:32920/?authSource=htmljson"
|
|
30
|
+
self.mgdb_db2 = "htmljson"
|
|
31
|
+
self.mgdb_col2 = "wanfang_ref"
|
|
32
|
+
|
|
33
|
+
self.mq_name = "mongodb.move.send"
|
|
34
|
+
self.mq_name_work = "mongodb.move.worker"
|
|
35
|
+
|
|
36
|
+
self.error_dir = r"F:\fun2\log"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MoveMongodbColl(object):
|
|
40
|
+
def __init__(self, conf):
|
|
41
|
+
self.conf = conf
|
|
42
|
+
self.first_id = ""
|
|
43
|
+
self.id_list = []
|
|
44
|
+
self.recv_list = []
|
|
45
|
+
|
|
46
|
+
def init_conn_mongodb(self):
|
|
47
|
+
self.basemongo = BasePyMongo(self.conf.mgdb_conn)
|
|
48
|
+
self.basemongo.use_db(self.conf.mgdb_db)
|
|
49
|
+
self.basemongo.create_col(self.conf.mgdb_col)
|
|
50
|
+
|
|
51
|
+
self.bs = BaseMotor()
|
|
52
|
+
self.bs.AsyncIOMotorClient(
|
|
53
|
+
self.conf.mgdb_conn_motor,
|
|
54
|
+
self.conf.mgdb_db)
|
|
55
|
+
self.bs.get_col(self.conf.mgdb_col)
|
|
56
|
+
|
|
57
|
+
self.bs2 = BaseMotor()
|
|
58
|
+
self.bs2.AsyncIOMotorClient(
|
|
59
|
+
self.conf.mgdb_conn2_motor,
|
|
60
|
+
self.conf.mgdb_db2)
|
|
61
|
+
self.bs2.get_col(self.conf.mgdb_col2)
|
|
62
|
+
|
|
63
|
+
def create_db3_table(self):
|
|
64
|
+
"""
|
|
65
|
+
创建表
|
|
66
|
+
:return:
|
|
67
|
+
"""
|
|
68
|
+
sql1 = "PRAGMA foreign_keys = false;"
|
|
69
|
+
sql2 = 'DROP TABLE IF EXISTS "cxids";'
|
|
70
|
+
sql3 = 'CREATE TABLE "cxids" ("ids" TEXT NOT NULL,"stat" integer NOT NULL DEFAULT 0,PRIMARY KEY ("ids"));'
|
|
71
|
+
sql4 = 'PRAGMA foreign_keys = true;'
|
|
72
|
+
self.db3.ExeSqlliteList([sql1, sql2, sql3, sql4])
|
|
73
|
+
|
|
74
|
+
def init_db3(self):
|
|
75
|
+
self.db3 = Sqlite3Utiles().Sqlite3DBConnectFromFilePath(self.conf.db3_path, encoding=self.conf.db3_encoding)
|
|
76
|
+
|
|
77
|
+
def init_mq(self):
|
|
78
|
+
self.use_send = UseMq(self.conf.mq_name)
|
|
79
|
+
self.use_work = UseMq(self.conf.mq_name_work)
|
|
80
|
+
|
|
81
|
+
def send_list(self):
|
|
82
|
+
while True:
|
|
83
|
+
if self.use_send.get_server_mq_num(10000):
|
|
84
|
+
for i in self.id_list:
|
|
85
|
+
dict_info = {
|
|
86
|
+
'_id': i
|
|
87
|
+
}
|
|
88
|
+
info_str = json.dumps(dict_info)
|
|
89
|
+
print(info_str)
|
|
90
|
+
self.use_send.easy_send_mq(info_str)
|
|
91
|
+
self.id_list.clear()
|
|
92
|
+
break
|
|
93
|
+
else:
|
|
94
|
+
time.sleep(1)
|
|
95
|
+
|
|
96
|
+
def send_db3(self):
|
|
97
|
+
while True:
|
|
98
|
+
sql = 'select * from cxids where stat=0 limit 20000'
|
|
99
|
+
rows = self.db3.SelectFromSqlliteFetchall(sql)
|
|
100
|
+
if len(rows) == 0:
|
|
101
|
+
print('查询结束 0 状态结束 查询-1状态 time sleep 60s')
|
|
102
|
+
time.sleep(60)
|
|
103
|
+
sql = 'select * from cxids where stat=-1 limit 20000'
|
|
104
|
+
rows = self.db3.SelectFromSqlliteFetchall(sql)
|
|
105
|
+
if len(rows) == 0:
|
|
106
|
+
print('查询结束 -1 状态结束 结束发送')
|
|
107
|
+
break
|
|
108
|
+
for row in rows:
|
|
109
|
+
_id = row[0]
|
|
110
|
+
self.id_list.append(_id)
|
|
111
|
+
if len(self.id_list) >= 10000:
|
|
112
|
+
sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
|
|
113
|
+
self.db3.ExeSqlliteSql(sql)
|
|
114
|
+
self.send_list()
|
|
115
|
+
|
|
116
|
+
if len(self.id_list) > 1:
|
|
117
|
+
sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
|
|
118
|
+
self.db3.ExeSqlliteSql(sql)
|
|
119
|
+
self.send_list()
|
|
120
|
+
|
|
121
|
+
if len(self.id_list) == 1:
|
|
122
|
+
sql = "update cxids set stat = -1 where ids='{}'".format(self.id_list[0])
|
|
123
|
+
self.db3.ExeSqlliteSql(sql)
|
|
124
|
+
self.send_list()
|
|
125
|
+
|
|
126
|
+
def callback2(self, ch, method, properties, body):
|
|
127
|
+
json_data = json.loads(body)
|
|
128
|
+
_id = json_data['_id']
|
|
129
|
+
self.recv_list.append(_id)
|
|
130
|
+
if len(self.recv_list) >= 500:
|
|
131
|
+
sql = "update cxids set stat = 1 where ids in {}".format(tuple(self.recv_list))
|
|
132
|
+
if self.db3.ExeSqlliteSql(sql):
|
|
133
|
+
self.recv_list.clear()
|
|
134
|
+
else:
|
|
135
|
+
print('[{}]未更新stat条数{}'.format(datetime.datetime.now(), len(self.recv_list)))
|
|
136
|
+
|
|
137
|
+
def recv(self, results=None, *args, **kwargs):
|
|
138
|
+
self.use_work.callback2 = self.callback2
|
|
139
|
+
self.use_work.get_mq()
|
|
140
|
+
|
|
141
|
+
def get_first_mongo_id(self):
|
|
142
|
+
for i in self.basemongo.find({"_id": {"$gt": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(1):
|
|
143
|
+
self.first_id = i["_id"]
|
|
144
|
+
print("first_id is:" + self.first_id)
|
|
145
|
+
|
|
146
|
+
def init_data_db3(self):
|
|
147
|
+
c = 0
|
|
148
|
+
c1 = -1
|
|
149
|
+
while True:
|
|
150
|
+
lists = []
|
|
151
|
+
for i in self.basemongo.find({"_id": {"$gte": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(
|
|
152
|
+
1000000):
|
|
153
|
+
c = c + 1
|
|
154
|
+
self.first_id = i["_id"]
|
|
155
|
+
lists.append((i["_id"], 0))
|
|
156
|
+
if c % 10000 == 1:
|
|
157
|
+
print(len(lists))
|
|
158
|
+
|
|
159
|
+
sql = "insert or ignore into cxids(`ids`,`stat`) values (?,?)"
|
|
160
|
+
self.db3.ExeSqlliteMany(sql, lists)
|
|
161
|
+
print(c)
|
|
162
|
+
if c1 == c:
|
|
163
|
+
break
|
|
164
|
+
if len(lists) == 1:
|
|
165
|
+
break
|
|
166
|
+
c1 = c
|
|
167
|
+
lists.clear()
|
|
168
|
+
|
|
169
|
+
def one_init(self):
|
|
170
|
+
"""
|
|
171
|
+
第一步 初始化id数据到db3目录
|
|
172
|
+
:return:
|
|
173
|
+
"""
|
|
174
|
+
self.init_conn_mongodb()
|
|
175
|
+
self.init_db3()
|
|
176
|
+
self.create_db3_table()
|
|
177
|
+
self.get_first_mongo_id()
|
|
178
|
+
self.init_data_db3()
|
|
179
|
+
|
|
180
|
+
def two_send(self):
|
|
181
|
+
"""
|
|
182
|
+
分布式的send方法
|
|
183
|
+
:return:
|
|
184
|
+
"""
|
|
185
|
+
self.init_db3()
|
|
186
|
+
self.init_mq()
|
|
187
|
+
self.send_db3()
|
|
188
|
+
|
|
189
|
+
def two_recv(self):
|
|
190
|
+
self.init_db3()
|
|
191
|
+
self.init_mq()
|
|
192
|
+
self.recv()
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class MoveMongodbThreadRun(MThreadingRun2):
|
|
196
|
+
def __init__(self, num, conf):
|
|
197
|
+
super(MoveMongodbThreadRun, self).__init__(num)
|
|
198
|
+
self.thread_pool.work_queue.set_size(10)
|
|
199
|
+
self.loop = asyncio.new_event_loop()
|
|
200
|
+
asyncio.set_event_loop(self.loop)
|
|
201
|
+
|
|
202
|
+
self.mvmc = MoveMongodbColl(conf)
|
|
203
|
+
self.mvmc.init_mq()
|
|
204
|
+
self.mvmc.init_conn_mongodb()
|
|
205
|
+
self.mvmc.use_send.callback2 = self.callback2
|
|
206
|
+
self.lists = []
|
|
207
|
+
self.info_list = []
|
|
208
|
+
self.is_many_move = False
|
|
209
|
+
# 配置二维数组的每组数据数量,如果为批量转移 建议设置为10000
|
|
210
|
+
self.num_list = 100
|
|
211
|
+
self.num_info_list = 10
|
|
212
|
+
|
|
213
|
+
def callback2(self, ch, method, properties, body):
|
|
214
|
+
json_data = json.loads(body.decode())
|
|
215
|
+
# self.add_job(self.func, dicts)
|
|
216
|
+
self.lists.append((json_data))
|
|
217
|
+
# 异步需要 10 * 100 的二维list
|
|
218
|
+
if self.is_many_move:
|
|
219
|
+
work_size = self.thread_pool.work_queue.get_size()
|
|
220
|
+
if work_size >= 3:
|
|
221
|
+
time.sleep(10)
|
|
222
|
+
if len(self.lists) >= self.num_list:
|
|
223
|
+
self.info_list.append(copy.deepcopy(self.lists))
|
|
224
|
+
self.lists.clear()
|
|
225
|
+
print(len(self.info_list))
|
|
226
|
+
|
|
227
|
+
if len(self.info_list) >= self.num_info_list:
|
|
228
|
+
self.add_job(self.func, copy.deepcopy(self.info_list))
|
|
229
|
+
self.info_list.clear()
|
|
230
|
+
|
|
231
|
+
def set_task(self, threadval: ThreadVal, *args, **kwargs):
|
|
232
|
+
self.mvmc.use_send.get_mq()
|
|
233
|
+
|
|
234
|
+
def deal_results(self, threadval: ThreadVal, *args, **kwargs):
|
|
235
|
+
result_queue = threadval.get_result_queue()
|
|
236
|
+
while True:
|
|
237
|
+
while not result_queue.is_empty():
|
|
238
|
+
result = result_queue.get()
|
|
239
|
+
t_1, t_2 = result
|
|
240
|
+
if t_1 == "err":
|
|
241
|
+
file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_parse_2.txt")
|
|
242
|
+
BaseFile.single_add_file(file_path, t_2 + '\n')
|
|
243
|
+
if t_1 == "err_Exception":
|
|
244
|
+
file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_Exception_2.txt")
|
|
245
|
+
BaseFile.single_add_file(file_path, t_2 + '\n')
|
|
246
|
+
if t_1 == 'right':
|
|
247
|
+
self.send_update_info(t_2)
|
|
248
|
+
self.thread_pool.result_queue.task_done()
|
|
249
|
+
time.sleep(1)
|
|
250
|
+
|
|
251
|
+
def send_update_info(self, _id):
|
|
252
|
+
dict_info = {
|
|
253
|
+
"_id": _id,
|
|
254
|
+
}
|
|
255
|
+
info_str = json.dumps(dict_info)
|
|
256
|
+
while True:
|
|
257
|
+
if self.mvmc.use_work.send_mq(info_str, num=10000):
|
|
258
|
+
break
|
|
259
|
+
time.sleep(1)
|
|
260
|
+
|
|
261
|
+
def setProxy(self, threadval: ThreadVal, proxysList=None):
|
|
262
|
+
time.sleep(60)
|
|
263
|
+
|
|
264
|
+
def is_break(self):
|
|
265
|
+
return False
|
|
266
|
+
|
|
267
|
+
def thread_pool_hook(self, threadinfo: ThreadInfo):
|
|
268
|
+
# 设置代理线程不重启,默认会重启
|
|
269
|
+
if threadinfo.get_thread_name() == self.etn.proxythreadname:
|
|
270
|
+
threadinfo.set_is_restart(False)
|
|
271
|
+
# if threadinfo.get_thread_name() == self.etn.taskthreadname:
|
|
272
|
+
# threadinfo.set_is_restart(False)
|
|
273
|
+
return {}
|
|
274
|
+
|
|
275
|
+
def doc_hook(self, item):
|
|
276
|
+
return item
|
|
277
|
+
|
|
278
|
+
async def par_html(self, result_queue, lists):
|
|
279
|
+
for info in lists:
|
|
280
|
+
_id = info["_id"]
|
|
281
|
+
try:
|
|
282
|
+
try:
|
|
283
|
+
# 不存在就插入
|
|
284
|
+
|
|
285
|
+
item = await self.mvmc.bs.select_one({"_id": _id})
|
|
286
|
+
item_result = self.doc_hook(item)
|
|
287
|
+
await self.mvmc.bs2.insert_one(item_result)
|
|
288
|
+
print("{}插入".format(_id))
|
|
289
|
+
result_queue.put(("right", _id))
|
|
290
|
+
except DuplicateKeyError as e:
|
|
291
|
+
print("{}存在".format(_id))
|
|
292
|
+
result_queue.put(("right", _id))
|
|
293
|
+
except Exception as e:
|
|
294
|
+
traceback.print_exc()
|
|
295
|
+
result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
|
|
296
|
+
except Exception as e:
|
|
297
|
+
traceback.print_exc()
|
|
298
|
+
result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
|
|
299
|
+
|
|
300
|
+
async def par_html_many(self, result_queue, lists):
|
|
301
|
+
insert_list = []
|
|
302
|
+
for i in range(0, len(lists), 500):
|
|
303
|
+
lists_item = lists[i:i + 500]
|
|
304
|
+
try:
|
|
305
|
+
try:
|
|
306
|
+
def deal_dicts(dd):
|
|
307
|
+
return dd["_id"]
|
|
308
|
+
lists_item = list(map(deal_dicts, lists_item))
|
|
309
|
+
# 不存在就插入
|
|
310
|
+
docs = await self.mvmc.bs.select({"_id": {"$in": lists_item}})
|
|
311
|
+
for item in docs:
|
|
312
|
+
item_result = self.doc_hook(item)
|
|
313
|
+
insert_list.append(item_result)
|
|
314
|
+
print("获取mongo num:" + str(len(insert_list)))
|
|
315
|
+
if len(insert_list) >= 100:
|
|
316
|
+
start_time = time.time()
|
|
317
|
+
result = await self.mvmc.bs2.insert_many(insert_list)
|
|
318
|
+
print("百条数据插入时间:" + str(time.time() - start_time))
|
|
319
|
+
for _id in result.inserted_ids:
|
|
320
|
+
print("{}插入".format(_id))
|
|
321
|
+
result_queue.put(("right", _id))
|
|
322
|
+
insert_list.clear()
|
|
323
|
+
except DuplicateKeyError as e:
|
|
324
|
+
print("有数据存在,无法插入")
|
|
325
|
+
|
|
326
|
+
except Exception as e:
|
|
327
|
+
traceback.print_exc()
|
|
328
|
+
result_queue.put(("err_Exception", traceback.format_exc()))
|
|
329
|
+
except Exception as e:
|
|
330
|
+
traceback.print_exc()
|
|
331
|
+
result_queue.put(("err_Exception", traceback.format_exc()))
|
|
332
|
+
|
|
333
|
+
if len(insert_list) > 0:
|
|
334
|
+
try:
|
|
335
|
+
result = await self.mvmc.bs2.insert_many(insert_list)
|
|
336
|
+
for _id in result.inserted_ids:
|
|
337
|
+
print("{}插入".format(_id))
|
|
338
|
+
result_queue.put(("right", _id))
|
|
339
|
+
insert_list.clear()
|
|
340
|
+
except DuplicateKeyError as e:
|
|
341
|
+
print("存在某个key 批量插入失败")
|
|
342
|
+
except Exception as e:
|
|
343
|
+
traceback.print_exc()
|
|
344
|
+
result_queue.put(("err_Exception", traceback.format_exc()))
|
|
345
|
+
|
|
346
|
+
def fun(self, threadval, *args, **kwargs):
|
|
347
|
+
result_queue = threadval.get_result_queue()
|
|
348
|
+
func_list = []
|
|
349
|
+
for lists in args[0]:
|
|
350
|
+
if self.is_many_move:
|
|
351
|
+
func_list.append(self.par_html_many(result_queue, lists))
|
|
352
|
+
else:
|
|
353
|
+
func_list.append(self.par_html(result_queue, lists))
|
|
354
|
+
self.loop.run_until_complete(asyncio.wait(func_list))
|