re-common 10.0.39__py3-none-any.whl → 10.0.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +219 -219
  145. re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
  146. re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
  147. re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
  148. re_common/v2/baselibrary/decorators/utils.py +59 -59
  149. re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
  150. re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
  151. re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
  152. re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
  153. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  154. re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
  155. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  156. re_common/v2/baselibrary/tools/concurrency.py +35 -35
  157. re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
  158. re_common/v2/baselibrary/tools/data_processer/data_processer.py +508 -508
  159. re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
  160. re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
  161. re_common/v2/baselibrary/tools/dict_tools.py +44 -44
  162. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  163. re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
  164. re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
  165. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  166. re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
  167. re_common/v2/baselibrary/tools/list_tools.py +69 -69
  168. re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
  169. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  170. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  171. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  172. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  173. re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
  174. re_common/v2/baselibrary/utils/author_smi.py +361 -361
  175. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  176. re_common/v2/baselibrary/utils/basedict.py +37 -37
  177. re_common/v2/baselibrary/utils/basehdfs.py +163 -163
  178. re_common/v2/baselibrary/utils/basepika.py +180 -180
  179. re_common/v2/baselibrary/utils/basetime.py +77 -77
  180. re_common/v2/baselibrary/utils/db.py +156 -156
  181. re_common/v2/baselibrary/utils/elasticsearch.py +46 -0
  182. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  183. re_common/v2/baselibrary/utils/mq.py +83 -83
  184. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  185. re_common/v2/baselibrary/utils/string_bool.py +186 -186
  186. re_common/v2/baselibrary/utils/string_clear.py +246 -246
  187. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  188. re_common/v2/baselibrary/utils/stringutils.py +271 -271
  189. re_common/vip/base_step_process.py +11 -11
  190. re_common/vip/baseencodeid.py +90 -90
  191. re_common/vip/changetaskname.py +28 -28
  192. re_common/vip/core_var.py +24 -24
  193. re_common/vip/mmh3Hash.py +89 -89
  194. re_common/vip/proxy/allproxys.py +127 -127
  195. re_common/vip/proxy/allproxys_thread.py +159 -159
  196. re_common/vip/proxy/cnki_proxy.py +153 -153
  197. re_common/vip/proxy/kuaidaili.py +87 -87
  198. re_common/vip/proxy/proxy_all.py +113 -113
  199. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  200. re_common/vip/proxy/wanfang_proxy.py +152 -152
  201. re_common/vip/proxy/wp_proxy_all.py +181 -181
  202. re_common/vip/read_rawid_to_txt.py +91 -91
  203. re_common/vip/title/__init__.py +5 -5
  204. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  205. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  206. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  207. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  208. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  209. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  210. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  211. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  212. re_common/vip/title/transform/__init__.py +10 -10
  213. {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/LICENSE +201 -201
  214. {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/METADATA +24 -16
  215. re_common-10.0.40.dist-info/RECORD +249 -0
  216. {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/WHEEL +1 -1
  217. re_common-10.0.39.dist-info/RECORD +0 -248
  218. {re_common-10.0.39.dist-info → re_common-10.0.40.dist-info}/top_level.txt +0 -0
@@ -1,354 +1,354 @@
1
- import asyncio
2
- import copy
3
- import datetime
4
- import json
5
- import time
6
-
7
- from pymongo.errors import DuplicateKeyError
8
- import traceback
9
-
10
- from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun2
11
- from re_common.baselibrary.mthread.mythreading import ThreadVal, ThreadInfo
12
- from re_common.baselibrary.utils.basefile import BaseFile
13
- from re_common.baselibrary.utils.basemotor import BaseMotor
14
- from re_common.baselibrary.utils.basepymongo import BasePyMongo
15
- from re_common.facade.sqlite3facade import Sqlite3Utiles
16
- from re_common.facade.use.mq_use_facade import UseMq
17
-
18
-
19
- class Configs(object):
20
-
21
- def __init__(self):
22
- self.db3_path = r"F:\fun2\test_images.db3"
23
- self.db3_encoding = "utf-8"
24
- self.mgdb_conn = "mongodb://192.168.31.30:32417/"
25
- self.mgdb_conn_motor = "mongodb://192.168.31.30:32417/htmljson.wanfang_ref?authSource=htmljson"
26
- self.mgdb_db = "htmljson"
27
- self.mgdb_col = "wanfang_ref"
28
-
29
- self.mgdb_conn2_motor = "mongodb://cjrw:vipdatacenter@192.168.31.243:32920,192.168.31.206:32920,192.168.31.208:32920/?authSource=htmljson"
30
- self.mgdb_db2 = "htmljson"
31
- self.mgdb_col2 = "wanfang_ref"
32
-
33
- self.mq_name = "mongodb.move.send"
34
- self.mq_name_work = "mongodb.move.worker"
35
-
36
- self.error_dir = r"F:\fun2\log"
37
-
38
-
39
- class MoveMongodbColl(object):
40
- def __init__(self, conf):
41
- self.conf = conf
42
- self.first_id = ""
43
- self.id_list = []
44
- self.recv_list = []
45
-
46
- def init_conn_mongodb(self):
47
- self.basemongo = BasePyMongo(self.conf.mgdb_conn)
48
- self.basemongo.use_db(self.conf.mgdb_db)
49
- self.basemongo.create_col(self.conf.mgdb_col)
50
-
51
- self.bs = BaseMotor()
52
- self.bs.AsyncIOMotorClient(
53
- self.conf.mgdb_conn_motor,
54
- self.conf.mgdb_db)
55
- self.bs.get_col(self.conf.mgdb_col)
56
-
57
- self.bs2 = BaseMotor()
58
- self.bs2.AsyncIOMotorClient(
59
- self.conf.mgdb_conn2_motor,
60
- self.conf.mgdb_db2)
61
- self.bs2.get_col(self.conf.mgdb_col2)
62
-
63
- def create_db3_table(self):
64
- """
65
- 创建表
66
- :return:
67
- """
68
- sql1 = "PRAGMA foreign_keys = false;"
69
- sql2 = 'DROP TABLE IF EXISTS "cxids";'
70
- sql3 = 'CREATE TABLE "cxids" ("ids" TEXT NOT NULL,"stat" integer NOT NULL DEFAULT 0,PRIMARY KEY ("ids"));'
71
- sql4 = 'PRAGMA foreign_keys = true;'
72
- self.db3.ExeSqlliteList([sql1, sql2, sql3, sql4])
73
-
74
- def init_db3(self):
75
- self.db3 = Sqlite3Utiles().Sqlite3DBConnectFromFilePath(self.conf.db3_path, encoding=self.conf.db3_encoding)
76
-
77
- def init_mq(self):
78
- self.use_send = UseMq(self.conf.mq_name)
79
- self.use_work = UseMq(self.conf.mq_name_work)
80
-
81
- def send_list(self):
82
- while True:
83
- if self.use_send.get_server_mq_num(10000):
84
- for i in self.id_list:
85
- dict_info = {
86
- '_id': i
87
- }
88
- info_str = json.dumps(dict_info)
89
- print(info_str)
90
- self.use_send.easy_send_mq(info_str)
91
- self.id_list.clear()
92
- break
93
- else:
94
- time.sleep(1)
95
-
96
- def send_db3(self):
97
- while True:
98
- sql = 'select * from cxids where stat=0 limit 20000'
99
- rows = self.db3.SelectFromSqlliteFetchall(sql)
100
- if len(rows) == 0:
101
- print('查询结束 0 状态结束 查询-1状态 time sleep 60s')
102
- time.sleep(60)
103
- sql = 'select * from cxids where stat=-1 limit 20000'
104
- rows = self.db3.SelectFromSqlliteFetchall(sql)
105
- if len(rows) == 0:
106
- print('查询结束 -1 状态结束 结束发送')
107
- break
108
- for row in rows:
109
- _id = row[0]
110
- self.id_list.append(_id)
111
- if len(self.id_list) >= 10000:
112
- sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
113
- self.db3.ExeSqlliteSql(sql)
114
- self.send_list()
115
-
116
- if len(self.id_list) > 1:
117
- sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
118
- self.db3.ExeSqlliteSql(sql)
119
- self.send_list()
120
-
121
- if len(self.id_list) == 1:
122
- sql = "update cxids set stat = -1 where ids='{}'".format(self.id_list[0])
123
- self.db3.ExeSqlliteSql(sql)
124
- self.send_list()
125
-
126
- def callback2(self, ch, method, properties, body):
127
- json_data = json.loads(body)
128
- _id = json_data['_id']
129
- self.recv_list.append(_id)
130
- if len(self.recv_list) >= 500:
131
- sql = "update cxids set stat = 1 where ids in {}".format(tuple(self.recv_list))
132
- if self.db3.ExeSqlliteSql(sql):
133
- self.recv_list.clear()
134
- else:
135
- print('[{}]未更新stat条数{}'.format(datetime.datetime.now(), len(self.recv_list)))
136
-
137
- def recv(self, results=None, *args, **kwargs):
138
- self.use_work.callback2 = self.callback2
139
- self.use_work.get_mq()
140
-
141
- def get_first_mongo_id(self):
142
- for i in self.basemongo.find({"_id": {"$gt": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(1):
143
- self.first_id = i["_id"]
144
- print("first_id is:" + self.first_id)
145
-
146
- def init_data_db3(self):
147
- c = 0
148
- c1 = -1
149
- while True:
150
- lists = []
151
- for i in self.basemongo.find({"_id": {"$gte": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(
152
- 1000000):
153
- c = c + 1
154
- self.first_id = i["_id"]
155
- lists.append((i["_id"], 0))
156
- if c % 10000 == 1:
157
- print(len(lists))
158
-
159
- sql = "insert or ignore into cxids(`ids`,`stat`) values (?,?)"
160
- self.db3.ExeSqlliteMany(sql, lists)
161
- print(c)
162
- if c1 == c:
163
- break
164
- if len(lists) == 1:
165
- break
166
- c1 = c
167
- lists.clear()
168
-
169
- def one_init(self):
170
- """
171
- 第一步 初始化id数据到db3目录
172
- :return:
173
- """
174
- self.init_conn_mongodb()
175
- self.init_db3()
176
- self.create_db3_table()
177
- self.get_first_mongo_id()
178
- self.init_data_db3()
179
-
180
- def two_send(self):
181
- """
182
- 分布式的send方法
183
- :return:
184
- """
185
- self.init_db3()
186
- self.init_mq()
187
- self.send_db3()
188
-
189
- def two_recv(self):
190
- self.init_db3()
191
- self.init_mq()
192
- self.recv()
193
-
194
-
195
- class MoveMongodbThreadRun(MThreadingRun2):
196
- def __init__(self, num, conf):
197
- super(MoveMongodbThreadRun, self).__init__(num)
198
- self.thread_pool.work_queue.set_size(10)
199
- self.loop = asyncio.new_event_loop()
200
- asyncio.set_event_loop(self.loop)
201
-
202
- self.mvmc = MoveMongodbColl(conf)
203
- self.mvmc.init_mq()
204
- self.mvmc.init_conn_mongodb()
205
- self.mvmc.use_send.callback2 = self.callback2
206
- self.lists = []
207
- self.info_list = []
208
- self.is_many_move = False
209
- # 配置二维数组的每组数据数量,如果为批量转移 建议设置为10000
210
- self.num_list = 100
211
- self.num_info_list = 10
212
-
213
- def callback2(self, ch, method, properties, body):
214
- json_data = json.loads(body.decode())
215
- # self.add_job(self.func, dicts)
216
- self.lists.append((json_data))
217
- # 异步需要 10 * 100 的二维list
218
- if self.is_many_move:
219
- work_size = self.thread_pool.work_queue.get_size()
220
- if work_size >= 3:
221
- time.sleep(10)
222
- if len(self.lists) >= self.num_list:
223
- self.info_list.append(copy.deepcopy(self.lists))
224
- self.lists.clear()
225
- print(len(self.info_list))
226
-
227
- if len(self.info_list) >= self.num_info_list:
228
- self.add_job(self.func, copy.deepcopy(self.info_list))
229
- self.info_list.clear()
230
-
231
- def set_task(self, threadval: ThreadVal, *args, **kwargs):
232
- self.mvmc.use_send.get_mq()
233
-
234
- def deal_results(self, threadval: ThreadVal, *args, **kwargs):
235
- result_queue = threadval.get_result_queue()
236
- while True:
237
- while not result_queue.is_empty():
238
- result = result_queue.get()
239
- t_1, t_2 = result
240
- if t_1 == "err":
241
- file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_parse_2.txt")
242
- BaseFile.single_add_file(file_path, t_2 + '\n')
243
- if t_1 == "err_Exception":
244
- file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_Exception_2.txt")
245
- BaseFile.single_add_file(file_path, t_2 + '\n')
246
- if t_1 == 'right':
247
- self.send_update_info(t_2)
248
- self.thread_pool.result_queue.task_done()
249
- time.sleep(1)
250
-
251
- def send_update_info(self, _id):
252
- dict_info = {
253
- "_id": _id,
254
- }
255
- info_str = json.dumps(dict_info)
256
- while True:
257
- if self.mvmc.use_work.send_mq(info_str, num=10000):
258
- break
259
- time.sleep(1)
260
-
261
- def setProxy(self, threadval: ThreadVal, proxysList=None):
262
- time.sleep(60)
263
-
264
- def is_break(self):
265
- return False
266
-
267
- def thread_pool_hook(self, threadinfo: ThreadInfo):
268
- # 设置代理线程不重启,默认会重启
269
- if threadinfo.get_thread_name() == self.etn.proxythreadname:
270
- threadinfo.set_is_restart(False)
271
- # if threadinfo.get_thread_name() == self.etn.taskthreadname:
272
- # threadinfo.set_is_restart(False)
273
- return {}
274
-
275
- def doc_hook(self, item):
276
- return item
277
-
278
- async def par_html(self, result_queue, lists):
279
- for info in lists:
280
- _id = info["_id"]
281
- try:
282
- try:
283
- # 不存在就插入
284
-
285
- item = await self.mvmc.bs.select_one({"_id": _id})
286
- item_result = self.doc_hook(item)
287
- await self.mvmc.bs2.insert_one(item_result)
288
- print("{}插入".format(_id))
289
- result_queue.put(("right", _id))
290
- except DuplicateKeyError as e:
291
- print("{}存在".format(_id))
292
- result_queue.put(("right", _id))
293
- except Exception as e:
294
- traceback.print_exc()
295
- result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
296
- except Exception as e:
297
- traceback.print_exc()
298
- result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
299
-
300
- async def par_html_many(self, result_queue, lists):
301
- insert_list = []
302
- for i in range(0, len(lists), 500):
303
- lists_item = lists[i:i + 500]
304
- try:
305
- try:
306
- def deal_dicts(dd):
307
- return dd["_id"]
308
- lists_item = list(map(deal_dicts, lists_item))
309
- # 不存在就插入
310
- docs = await self.mvmc.bs.select({"_id": {"$in": lists_item}})
311
- for item in docs:
312
- item_result = self.doc_hook(item)
313
- insert_list.append(item_result)
314
- print("获取mongo num:" + str(len(insert_list)))
315
- if len(insert_list) >= 100:
316
- start_time = time.time()
317
- result = await self.mvmc.bs2.insert_many(insert_list)
318
- print("百条数据插入时间:" + str(time.time() - start_time))
319
- for _id in result.inserted_ids:
320
- print("{}插入".format(_id))
321
- result_queue.put(("right", _id))
322
- insert_list.clear()
323
- except DuplicateKeyError as e:
324
- print("有数据存在,无法插入")
325
-
326
- except Exception as e:
327
- traceback.print_exc()
328
- result_queue.put(("err_Exception", traceback.format_exc()))
329
- except Exception as e:
330
- traceback.print_exc()
331
- result_queue.put(("err_Exception", traceback.format_exc()))
332
-
333
- if len(insert_list) > 0:
334
- try:
335
- result = await self.mvmc.bs2.insert_many(insert_list)
336
- for _id in result.inserted_ids:
337
- print("{}插入".format(_id))
338
- result_queue.put(("right", _id))
339
- insert_list.clear()
340
- except DuplicateKeyError as e:
341
- print("存在某个key 批量插入失败")
342
- except Exception as e:
343
- traceback.print_exc()
344
- result_queue.put(("err_Exception", traceback.format_exc()))
345
-
346
- def fun(self, threadval, *args, **kwargs):
347
- result_queue = threadval.get_result_queue()
348
- func_list = []
349
- for lists in args[0]:
350
- if self.is_many_move:
351
- func_list.append(self.par_html_many(result_queue, lists))
352
- else:
353
- func_list.append(self.par_html(result_queue, lists))
354
- self.loop.run_until_complete(asyncio.wait(func_list))
1
+ import asyncio
2
+ import copy
3
+ import datetime
4
+ import json
5
+ import time
6
+
7
+ from pymongo.errors import DuplicateKeyError
8
+ import traceback
9
+
10
+ from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun2
11
+ from re_common.baselibrary.mthread.mythreading import ThreadVal, ThreadInfo
12
+ from re_common.baselibrary.utils.basefile import BaseFile
13
+ from re_common.baselibrary.utils.basemotor import BaseMotor
14
+ from re_common.baselibrary.utils.basepymongo import BasePyMongo
15
+ from re_common.facade.sqlite3facade import Sqlite3Utiles
16
+ from re_common.facade.use.mq_use_facade import UseMq
17
+
18
+
19
+ class Configs(object):
20
+
21
+ def __init__(self):
22
+ self.db3_path = r"F:\fun2\test_images.db3"
23
+ self.db3_encoding = "utf-8"
24
+ self.mgdb_conn = "mongodb://192.168.31.30:32417/"
25
+ self.mgdb_conn_motor = "mongodb://192.168.31.30:32417/htmljson.wanfang_ref?authSource=htmljson"
26
+ self.mgdb_db = "htmljson"
27
+ self.mgdb_col = "wanfang_ref"
28
+
29
+ self.mgdb_conn2_motor = "mongodb://cjrw:vipdatacenter@192.168.31.243:32920,192.168.31.206:32920,192.168.31.208:32920/?authSource=htmljson"
30
+ self.mgdb_db2 = "htmljson"
31
+ self.mgdb_col2 = "wanfang_ref"
32
+
33
+ self.mq_name = "mongodb.move.send"
34
+ self.mq_name_work = "mongodb.move.worker"
35
+
36
+ self.error_dir = r"F:\fun2\log"
37
+
38
+
39
+ class MoveMongodbColl(object):
40
+ def __init__(self, conf):
41
+ self.conf = conf
42
+ self.first_id = ""
43
+ self.id_list = []
44
+ self.recv_list = []
45
+
46
+ def init_conn_mongodb(self):
47
+ self.basemongo = BasePyMongo(self.conf.mgdb_conn)
48
+ self.basemongo.use_db(self.conf.mgdb_db)
49
+ self.basemongo.create_col(self.conf.mgdb_col)
50
+
51
+ self.bs = BaseMotor()
52
+ self.bs.AsyncIOMotorClient(
53
+ self.conf.mgdb_conn_motor,
54
+ self.conf.mgdb_db)
55
+ self.bs.get_col(self.conf.mgdb_col)
56
+
57
+ self.bs2 = BaseMotor()
58
+ self.bs2.AsyncIOMotorClient(
59
+ self.conf.mgdb_conn2_motor,
60
+ self.conf.mgdb_db2)
61
+ self.bs2.get_col(self.conf.mgdb_col2)
62
+
63
+ def create_db3_table(self):
64
+ """
65
+ 创建表
66
+ :return:
67
+ """
68
+ sql1 = "PRAGMA foreign_keys = false;"
69
+ sql2 = 'DROP TABLE IF EXISTS "cxids";'
70
+ sql3 = 'CREATE TABLE "cxids" ("ids" TEXT NOT NULL,"stat" integer NOT NULL DEFAULT 0,PRIMARY KEY ("ids"));'
71
+ sql4 = 'PRAGMA foreign_keys = true;'
72
+ self.db3.ExeSqlliteList([sql1, sql2, sql3, sql4])
73
+
74
+ def init_db3(self):
75
+ self.db3 = Sqlite3Utiles().Sqlite3DBConnectFromFilePath(self.conf.db3_path, encoding=self.conf.db3_encoding)
76
+
77
+ def init_mq(self):
78
+ self.use_send = UseMq(self.conf.mq_name)
79
+ self.use_work = UseMq(self.conf.mq_name_work)
80
+
81
+ def send_list(self):
82
+ while True:
83
+ if self.use_send.get_server_mq_num(10000):
84
+ for i in self.id_list:
85
+ dict_info = {
86
+ '_id': i
87
+ }
88
+ info_str = json.dumps(dict_info)
89
+ print(info_str)
90
+ self.use_send.easy_send_mq(info_str)
91
+ self.id_list.clear()
92
+ break
93
+ else:
94
+ time.sleep(1)
95
+
96
+ def send_db3(self):
97
+ while True:
98
+ sql = 'select * from cxids where stat=0 limit 20000'
99
+ rows = self.db3.SelectFromSqlliteFetchall(sql)
100
+ if len(rows) == 0:
101
+ print('查询结束 0 状态结束 查询-1状态 time sleep 60s')
102
+ time.sleep(60)
103
+ sql = 'select * from cxids where stat=-1 limit 20000'
104
+ rows = self.db3.SelectFromSqlliteFetchall(sql)
105
+ if len(rows) == 0:
106
+ print('查询结束 -1 状态结束 结束发送')
107
+ break
108
+ for row in rows:
109
+ _id = row[0]
110
+ self.id_list.append(_id)
111
+ if len(self.id_list) >= 10000:
112
+ sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
113
+ self.db3.ExeSqlliteSql(sql)
114
+ self.send_list()
115
+
116
+ if len(self.id_list) > 1:
117
+ sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
118
+ self.db3.ExeSqlliteSql(sql)
119
+ self.send_list()
120
+
121
+ if len(self.id_list) == 1:
122
+ sql = "update cxids set stat = -1 where ids='{}'".format(self.id_list[0])
123
+ self.db3.ExeSqlliteSql(sql)
124
+ self.send_list()
125
+
126
+ def callback2(self, ch, method, properties, body):
127
+ json_data = json.loads(body)
128
+ _id = json_data['_id']
129
+ self.recv_list.append(_id)
130
+ if len(self.recv_list) >= 500:
131
+ sql = "update cxids set stat = 1 where ids in {}".format(tuple(self.recv_list))
132
+ if self.db3.ExeSqlliteSql(sql):
133
+ self.recv_list.clear()
134
+ else:
135
+ print('[{}]未更新stat条数{}'.format(datetime.datetime.now(), len(self.recv_list)))
136
+
137
+ def recv(self, results=None, *args, **kwargs):
138
+ self.use_work.callback2 = self.callback2
139
+ self.use_work.get_mq()
140
+
141
+ def get_first_mongo_id(self):
142
+ for i in self.basemongo.find({"_id": {"$gt": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(1):
143
+ self.first_id = i["_id"]
144
+ print("first_id is:" + self.first_id)
145
+
146
+ def init_data_db3(self):
147
+ c = 0
148
+ c1 = -1
149
+ while True:
150
+ lists = []
151
+ for i in self.basemongo.find({"_id": {"$gte": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(
152
+ 1000000):
153
+ c = c + 1
154
+ self.first_id = i["_id"]
155
+ lists.append((i["_id"], 0))
156
+ if c % 10000 == 1:
157
+ print(len(lists))
158
+
159
+ sql = "insert or ignore into cxids(`ids`,`stat`) values (?,?)"
160
+ self.db3.ExeSqlliteMany(sql, lists)
161
+ print(c)
162
+ if c1 == c:
163
+ break
164
+ if len(lists) == 1:
165
+ break
166
+ c1 = c
167
+ lists.clear()
168
+
169
+ def one_init(self):
170
+ """
171
+ 第一步 初始化id数据到db3目录
172
+ :return:
173
+ """
174
+ self.init_conn_mongodb()
175
+ self.init_db3()
176
+ self.create_db3_table()
177
+ self.get_first_mongo_id()
178
+ self.init_data_db3()
179
+
180
+ def two_send(self):
181
+ """
182
+ 分布式的send方法
183
+ :return:
184
+ """
185
+ self.init_db3()
186
+ self.init_mq()
187
+ self.send_db3()
188
+
189
+ def two_recv(self):
190
+ self.init_db3()
191
+ self.init_mq()
192
+ self.recv()
193
+
194
+
195
+ class MoveMongodbThreadRun(MThreadingRun2):
196
+ def __init__(self, num, conf):
197
+ super(MoveMongodbThreadRun, self).__init__(num)
198
+ self.thread_pool.work_queue.set_size(10)
199
+ self.loop = asyncio.new_event_loop()
200
+ asyncio.set_event_loop(self.loop)
201
+
202
+ self.mvmc = MoveMongodbColl(conf)
203
+ self.mvmc.init_mq()
204
+ self.mvmc.init_conn_mongodb()
205
+ self.mvmc.use_send.callback2 = self.callback2
206
+ self.lists = []
207
+ self.info_list = []
208
+ self.is_many_move = False
209
+ # 配置二维数组的每组数据数量,如果为批量转移 建议设置为10000
210
+ self.num_list = 100
211
+ self.num_info_list = 10
212
+
213
+ def callback2(self, ch, method, properties, body):
214
+ json_data = json.loads(body.decode())
215
+ # self.add_job(self.func, dicts)
216
+ self.lists.append((json_data))
217
+ # 异步需要 10 * 100 的二维list
218
+ if self.is_many_move:
219
+ work_size = self.thread_pool.work_queue.get_size()
220
+ if work_size >= 3:
221
+ time.sleep(10)
222
+ if len(self.lists) >= self.num_list:
223
+ self.info_list.append(copy.deepcopy(self.lists))
224
+ self.lists.clear()
225
+ print(len(self.info_list))
226
+
227
+ if len(self.info_list) >= self.num_info_list:
228
+ self.add_job(self.func, copy.deepcopy(self.info_list))
229
+ self.info_list.clear()
230
+
231
+ def set_task(self, threadval: ThreadVal, *args, **kwargs):
232
+ self.mvmc.use_send.get_mq()
233
+
234
+ def deal_results(self, threadval: ThreadVal, *args, **kwargs):
235
+ result_queue = threadval.get_result_queue()
236
+ while True:
237
+ while not result_queue.is_empty():
238
+ result = result_queue.get()
239
+ t_1, t_2 = result
240
+ if t_1 == "err":
241
+ file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_parse_2.txt")
242
+ BaseFile.single_add_file(file_path, t_2 + '\n')
243
+ if t_1 == "err_Exception":
244
+ file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_Exception_2.txt")
245
+ BaseFile.single_add_file(file_path, t_2 + '\n')
246
+ if t_1 == 'right':
247
+ self.send_update_info(t_2)
248
+ self.thread_pool.result_queue.task_done()
249
+ time.sleep(1)
250
+
251
+ def send_update_info(self, _id):
252
+ dict_info = {
253
+ "_id": _id,
254
+ }
255
+ info_str = json.dumps(dict_info)
256
+ while True:
257
+ if self.mvmc.use_work.send_mq(info_str, num=10000):
258
+ break
259
+ time.sleep(1)
260
+
261
+ def setProxy(self, threadval: ThreadVal, proxysList=None):
262
+ time.sleep(60)
263
+
264
+ def is_break(self):
265
+ return False
266
+
267
+ def thread_pool_hook(self, threadinfo: ThreadInfo):
268
+ # 设置代理线程不重启,默认会重启
269
+ if threadinfo.get_thread_name() == self.etn.proxythreadname:
270
+ threadinfo.set_is_restart(False)
271
+ # if threadinfo.get_thread_name() == self.etn.taskthreadname:
272
+ # threadinfo.set_is_restart(False)
273
+ return {}
274
+
275
+ def doc_hook(self, item):
276
+ return item
277
+
278
+ async def par_html(self, result_queue, lists):
279
+ for info in lists:
280
+ _id = info["_id"]
281
+ try:
282
+ try:
283
+ # 不存在就插入
284
+
285
+ item = await self.mvmc.bs.select_one({"_id": _id})
286
+ item_result = self.doc_hook(item)
287
+ await self.mvmc.bs2.insert_one(item_result)
288
+ print("{}插入".format(_id))
289
+ result_queue.put(("right", _id))
290
+ except DuplicateKeyError as e:
291
+ print("{}存在".format(_id))
292
+ result_queue.put(("right", _id))
293
+ except Exception as e:
294
+ traceback.print_exc()
295
+ result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
296
+ except Exception as e:
297
+ traceback.print_exc()
298
+ result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
299
+
300
+ async def par_html_many(self, result_queue, lists):
301
+ insert_list = []
302
+ for i in range(0, len(lists), 500):
303
+ lists_item = lists[i:i + 500]
304
+ try:
305
+ try:
306
+ def deal_dicts(dd):
307
+ return dd["_id"]
308
+ lists_item = list(map(deal_dicts, lists_item))
309
+ # 不存在就插入
310
+ docs = await self.mvmc.bs.select({"_id": {"$in": lists_item}})
311
+ for item in docs:
312
+ item_result = self.doc_hook(item)
313
+ insert_list.append(item_result)
314
+ print("获取mongo num:" + str(len(insert_list)))
315
+ if len(insert_list) >= 100:
316
+ start_time = time.time()
317
+ result = await self.mvmc.bs2.insert_many(insert_list)
318
+ print("百条数据插入时间:" + str(time.time() - start_time))
319
+ for _id in result.inserted_ids:
320
+ print("{}插入".format(_id))
321
+ result_queue.put(("right", _id))
322
+ insert_list.clear()
323
+ except DuplicateKeyError as e:
324
+ print("有数据存在,无法插入")
325
+
326
+ except Exception as e:
327
+ traceback.print_exc()
328
+ result_queue.put(("err_Exception", traceback.format_exc()))
329
+ except Exception as e:
330
+ traceback.print_exc()
331
+ result_queue.put(("err_Exception", traceback.format_exc()))
332
+
333
+ if len(insert_list) > 0:
334
+ try:
335
+ result = await self.mvmc.bs2.insert_many(insert_list)
336
+ for _id in result.inserted_ids:
337
+ print("{}插入".format(_id))
338
+ result_queue.put(("right", _id))
339
+ insert_list.clear()
340
+ except DuplicateKeyError as e:
341
+ print("存在某个key 批量插入失败")
342
+ except Exception as e:
343
+ traceback.print_exc()
344
+ result_queue.put(("err_Exception", traceback.format_exc()))
345
+
346
+ def fun(self, threadval, *args, **kwargs):
347
+ result_queue = threadval.get_result_queue()
348
+ func_list = []
349
+ for lists in args[0]:
350
+ if self.is_many_move:
351
+ func_list.append(self.par_html_many(result_queue, lists))
352
+ else:
353
+ func_list.append(self.par_html(result_queue, lists))
354
+ self.loop.run_until_complete(asyncio.wait(func_list))