re-common 2.0.1__py3-none-any.whl → 10.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. re_common/baselibrary/__init__.py +4 -0
  2. re_common/baselibrary/baseabs/__init__.py +7 -0
  3. re_common/baselibrary/baseabs/baseabs.py +26 -0
  4. re_common/baselibrary/database/__init__.py +0 -0
  5. re_common/baselibrary/database/mbuilder.py +132 -0
  6. re_common/baselibrary/database/moudle.py +93 -0
  7. re_common/baselibrary/database/msqlite3.py +194 -0
  8. re_common/baselibrary/database/mysql.py +169 -0
  9. re_common/baselibrary/database/sql_factory.py +26 -0
  10. re_common/baselibrary/mthread/MThreadingRun.py +486 -0
  11. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -0
  12. re_common/baselibrary/mthread/__init__.py +3 -0
  13. re_common/baselibrary/mthread/mythreading.py +695 -0
  14. re_common/baselibrary/pakge_other/__init__.py +0 -0
  15. re_common/baselibrary/pakge_other/socks.py +404 -0
  16. re_common/baselibrary/readconfig/__init__.py +0 -0
  17. re_common/baselibrary/readconfig/config_factory.py +18 -0
  18. re_common/baselibrary/readconfig/ini_config.py +317 -0
  19. re_common/baselibrary/readconfig/toml_config.py +49 -0
  20. re_common/baselibrary/temporary/__init__.py +0 -0
  21. re_common/baselibrary/temporary/envdata.py +36 -0
  22. re_common/baselibrary/tools/__init__.py +0 -0
  23. re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  24. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -0
  25. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -0
  26. re_common/baselibrary/tools/all_requests/mrequest.py +412 -0
  27. re_common/baselibrary/tools/all_requests/requests_request.py +81 -0
  28. re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  29. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -0
  30. re_common/baselibrary/tools/contrast_db3.py +123 -0
  31. re_common/baselibrary/tools/copy_file.py +39 -0
  32. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -0
  33. re_common/baselibrary/tools/foreachgz.py +40 -0
  34. re_common/baselibrary/tools/get_attr.py +11 -0
  35. re_common/baselibrary/tools/image_to_pdf.py +62 -0
  36. re_common/baselibrary/tools/java_code_deal.py +139 -0
  37. re_common/baselibrary/tools/javacode.py +79 -0
  38. re_common/baselibrary/tools/mdb_db3.py +48 -0
  39. re_common/baselibrary/tools/merge_file.py +171 -0
  40. re_common/baselibrary/tools/merge_gz_file.py +165 -0
  41. re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  42. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -0
  43. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -0
  44. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -0
  45. re_common/baselibrary/tools/mongo_tools.py +50 -0
  46. re_common/baselibrary/tools/move_file.py +170 -0
  47. re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  48. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -0
  49. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -0
  50. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -0
  51. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -0
  52. re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  53. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -0
  54. re_common/baselibrary/tools/mpandas/pandas_visualization.py +8 -0
  55. re_common/baselibrary/tools/myparsel.py +104 -0
  56. re_common/baselibrary/tools/rename_dir_file.py +37 -0
  57. re_common/baselibrary/tools/sequoiadb_utils.py +398 -0
  58. re_common/baselibrary/tools/split_line_to_many.py +25 -0
  59. re_common/baselibrary/tools/stringtodicts.py +33 -0
  60. re_common/baselibrary/tools/workwechant_bot.py +84 -0
  61. re_common/baselibrary/utils/__init__.py +0 -0
  62. re_common/baselibrary/utils/baseaiohttp.py +296 -0
  63. re_common/baselibrary/utils/baseaiomysql.py +87 -0
  64. re_common/baselibrary/utils/baseallstep.py +191 -0
  65. re_common/baselibrary/utils/baseavro.py +19 -0
  66. re_common/baselibrary/utils/baseboto3.py +291 -0
  67. re_common/baselibrary/utils/basecsv.py +32 -0
  68. re_common/baselibrary/utils/basedict.py +133 -0
  69. re_common/baselibrary/utils/basedir.py +241 -0
  70. re_common/baselibrary/utils/baseencode.py +351 -0
  71. re_common/baselibrary/utils/baseencoding.py +29 -0
  72. re_common/baselibrary/utils/baseesdsl.py +86 -0
  73. re_common/baselibrary/utils/baseexcel.py +264 -0
  74. re_common/baselibrary/utils/baseexcept.py +109 -0
  75. re_common/baselibrary/utils/basefile.py +654 -0
  76. re_common/baselibrary/utils/baseftp.py +214 -0
  77. re_common/baselibrary/utils/basegzip.py +60 -0
  78. re_common/baselibrary/utils/basehdfs.py +135 -0
  79. re_common/baselibrary/utils/basehttpx.py +268 -0
  80. re_common/baselibrary/utils/baseip.py +87 -0
  81. re_common/baselibrary/utils/basejson.py +2 -0
  82. re_common/baselibrary/utils/baselist.py +32 -0
  83. re_common/baselibrary/utils/basemotor.py +190 -0
  84. re_common/baselibrary/utils/basemssql.py +98 -0
  85. re_common/baselibrary/utils/baseodbc.py +113 -0
  86. re_common/baselibrary/utils/basepandas.py +302 -0
  87. re_common/baselibrary/utils/basepeewee.py +11 -0
  88. re_common/baselibrary/utils/basepika.py +180 -0
  89. re_common/baselibrary/utils/basepydash.py +143 -0
  90. re_common/baselibrary/utils/basepymongo.py +230 -0
  91. re_common/baselibrary/utils/basequeue.py +22 -0
  92. re_common/baselibrary/utils/baserar.py +57 -0
  93. re_common/baselibrary/utils/baserequest.py +279 -0
  94. re_common/baselibrary/utils/baseset.py +8 -0
  95. re_common/baselibrary/utils/basesmb.py +403 -0
  96. re_common/baselibrary/utils/basestring.py +382 -0
  97. re_common/baselibrary/utils/basetime.py +320 -0
  98. re_common/baselibrary/utils/basetuple.py +0 -0
  99. re_common/baselibrary/utils/baseurl.py +121 -0
  100. re_common/baselibrary/utils/basezip.py +57 -0
  101. re_common/baselibrary/utils/core/__init__.py +8 -0
  102. re_common/baselibrary/utils/core/bottomutils.py +18 -0
  103. re_common/baselibrary/utils/core/mdeprecated.py +327 -0
  104. re_common/baselibrary/utils/core/mlamada.py +16 -0
  105. re_common/baselibrary/utils/core/msginfo.py +25 -0
  106. re_common/baselibrary/utils/core/requests_core.py +103 -0
  107. re_common/baselibrary/utils/fateadm.py +429 -0
  108. re_common/baselibrary/utils/importfun.py +123 -0
  109. re_common/baselibrary/utils/mfaker.py +57 -0
  110. re_common/baselibrary/utils/my_abc/__init__.py +3 -0
  111. re_common/baselibrary/utils/my_abc/better_abc.py +32 -0
  112. re_common/baselibrary/utils/mylogger.py +414 -0
  113. re_common/baselibrary/utils/myredisclient.py +861 -0
  114. re_common/baselibrary/utils/pipupgrade.py +21 -0
  115. re_common/baselibrary/utils/ringlist.py +85 -0
  116. re_common/baselibrary/utils/version_compare.py +36 -0
  117. re_common/baselibrary/utils/ydmhttp.py +126 -0
  118. re_common/facade/__init__.py +1 -0
  119. re_common/facade/lazy_import.py +11 -0
  120. re_common/facade/loggerfacade.py +25 -0
  121. re_common/facade/mysqlfacade.py +467 -0
  122. re_common/facade/now.py +31 -0
  123. re_common/facade/sqlite3facade.py +257 -0
  124. re_common/facade/use/__init__.py +0 -0
  125. re_common/facade/use/mq_use_facade.py +83 -0
  126. re_common/facade/use/proxy_use_facade.py +20 -0
  127. re_common/libtest/__init__.py +0 -0
  128. re_common/libtest/base_dict_test.py +19 -0
  129. re_common/libtest/baseavro_test.py +13 -0
  130. re_common/libtest/basefile_test.py +14 -0
  131. re_common/libtest/basemssql_test.py +77 -0
  132. re_common/libtest/baseodbc_test.py +8 -0
  133. re_common/libtest/basepandas_test.py +38 -0
  134. re_common/libtest/get_attr_test/__init__.py +0 -0
  135. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -0
  136. re_common/libtest/get_attr_test/settings.py +55 -0
  137. re_common/libtest/idencode_test.py +54 -0
  138. re_common/libtest/iniconfig_test.py +35 -0
  139. re_common/libtest/ip_test.py +35 -0
  140. re_common/libtest/merge_file_test.py +20 -0
  141. re_common/libtest/mfaker_test.py +9 -0
  142. re_common/libtest/mm3_test.py +32 -0
  143. re_common/libtest/mylogger_test.py +89 -0
  144. re_common/libtest/myparsel_test.py +28 -0
  145. re_common/libtest/mysql_test.py +151 -0
  146. re_common/libtest/pymongo_test.py +21 -0
  147. re_common/libtest/split_test.py +12 -0
  148. re_common/libtest/sqlite3_merge_test.py +6 -0
  149. re_common/libtest/sqlite3_test.py +34 -0
  150. re_common/libtest/tomlconfig_test.py +30 -0
  151. re_common/libtest/use_tools_test/__init__.py +3 -0
  152. re_common/libtest/user/__init__.py +5 -0
  153. re_common/studio/__init__.py +5 -0
  154. re_common/studio/assignment_expressions.py +37 -0
  155. re_common/studio/mydash/__init__.py +0 -0
  156. re_common/studio/mydash/test1.py +19 -0
  157. re_common/studio/pydashstudio/__init__.py +0 -0
  158. re_common/studio/pydashstudio/first.py +9 -0
  159. re_common/studio/streamlitstudio/__init__.py +0 -0
  160. re_common/studio/streamlitstudio/first_app.py +66 -0
  161. re_common/studio/streamlitstudio/uber_pickups.py +24 -0
  162. re_common/studio/test.py +19 -0
  163. re_common/v2/baselibrary/utils/author_smi.py +14 -3
  164. re_common/v2/baselibrary/utils/stringutils.py +1 -0
  165. re_common/vip/__init__.py +0 -0
  166. re_common/vip/base_step_process.py +11 -0
  167. re_common/vip/baseencodeid.py +91 -0
  168. re_common/vip/changetaskname.py +28 -0
  169. re_common/vip/core_var.py +24 -0
  170. re_common/vip/mmh3Hash.py +90 -0
  171. re_common/vip/proxy/__init__.py +0 -0
  172. re_common/vip/proxy/allproxys.py +127 -0
  173. re_common/vip/proxy/allproxys_thread.py +159 -0
  174. re_common/vip/proxy/cnki_proxy.py +153 -0
  175. re_common/vip/proxy/kuaidaili.py +87 -0
  176. re_common/vip/proxy/proxy_all.py +113 -0
  177. re_common/vip/proxy/update_kuaidaili_0.py +42 -0
  178. re_common/vip/proxy/wanfang_proxy.py +152 -0
  179. re_common/vip/proxy/wp_proxy_all.py +182 -0
  180. re_common/vip/read_rawid_to_txt.py +92 -0
  181. re_common/vip/title/__init__.py +5 -0
  182. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -0
  183. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -0
  184. re_common/vip/title/transform/TransformCstadTitleToZt.py +196 -0
  185. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -0
  186. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -0
  187. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -0
  188. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -0
  189. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -0
  190. re_common/vip/title/transform/__init__.py +11 -0
  191. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/METADATA +1 -1
  192. re_common-10.0.1.dist-info/RECORD +213 -0
  193. re_common-2.0.1.dist-info/RECORD +0 -25
  194. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/LICENSE +0 -0
  195. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/WHEEL +0 -0
  196. {re_common-2.0.1.dist-info → re_common-10.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,354 @@
1
+ import asyncio
2
+ import copy
3
+ import datetime
4
+ import json
5
+ import time
6
+
7
+ from pymongo.errors import DuplicateKeyError
8
+ import traceback
9
+
10
+ from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun2
11
+ from re_common.baselibrary.mthread.mythreading import ThreadVal, ThreadInfo
12
+ from re_common.baselibrary.utils.basefile import BaseFile
13
+ from re_common.baselibrary.utils.basemotor import BaseMotor
14
+ from re_common.baselibrary.utils.basepymongo import BasePyMongo
15
+ from re_common.facade.sqlite3facade import Sqlite3Utiles
16
+ from re_common.facade.use.mq_use_facade import UseMq
17
+
18
+
19
+ class Configs(object):
20
+
21
+ def __init__(self):
22
+ self.db3_path = r"F:\fun2\test_images.db3"
23
+ self.db3_encoding = "utf-8"
24
+ self.mgdb_conn = "mongodb://192.168.31.30:32417/"
25
+ self.mgdb_conn_motor = "mongodb://192.168.31.30:32417/htmljson.wanfang_ref?authSource=htmljson"
26
+ self.mgdb_db = "htmljson"
27
+ self.mgdb_col = "wanfang_ref"
28
+
29
+ self.mgdb_conn2_motor = "mongodb://cjrw:vipdatacenter@192.168.31.243:32920,192.168.31.206:32920,192.168.31.208:32920/?authSource=htmljson"
30
+ self.mgdb_db2 = "htmljson"
31
+ self.mgdb_col2 = "wanfang_ref"
32
+
33
+ self.mq_name = "mongodb.move.send"
34
+ self.mq_name_work = "mongodb.move.worker"
35
+
36
+ self.error_dir = r"F:\fun2\log"
37
+
38
+
39
+ class MoveMongodbColl(object):
40
+ def __init__(self, conf):
41
+ self.conf = conf
42
+ self.first_id = ""
43
+ self.id_list = []
44
+ self.recv_list = []
45
+
46
+ def init_conn_mongodb(self):
47
+ self.basemongo = BasePyMongo(self.conf.mgdb_conn)
48
+ self.basemongo.use_db(self.conf.mgdb_db)
49
+ self.basemongo.create_col(self.conf.mgdb_col)
50
+
51
+ self.bs = BaseMotor()
52
+ self.bs.AsyncIOMotorClient(
53
+ self.conf.mgdb_conn_motor,
54
+ self.conf.mgdb_db)
55
+ self.bs.get_col(self.conf.mgdb_col)
56
+
57
+ self.bs2 = BaseMotor()
58
+ self.bs2.AsyncIOMotorClient(
59
+ self.conf.mgdb_conn2_motor,
60
+ self.conf.mgdb_db2)
61
+ self.bs2.get_col(self.conf.mgdb_col2)
62
+
63
+ def create_db3_table(self):
64
+ """
65
+ 创建表
66
+ :return:
67
+ """
68
+ sql1 = "PRAGMA foreign_keys = false;"
69
+ sql2 = 'DROP TABLE IF EXISTS "cxids";'
70
+ sql3 = 'CREATE TABLE "cxids" ("ids" TEXT NOT NULL,"stat" integer NOT NULL DEFAULT 0,PRIMARY KEY ("ids"));'
71
+ sql4 = 'PRAGMA foreign_keys = true;'
72
+ self.db3.ExeSqlliteList([sql1, sql2, sql3, sql4])
73
+
74
+ def init_db3(self):
75
+ self.db3 = Sqlite3Utiles().Sqlite3DBConnectFromFilePath(self.conf.db3_path, encoding=self.conf.db3_encoding)
76
+
77
+ def init_mq(self):
78
+ self.use_send = UseMq(self.conf.mq_name)
79
+ self.use_work = UseMq(self.conf.mq_name_work)
80
+
81
+ def send_list(self):
82
+ while True:
83
+ if self.use_send.get_server_mq_num(10000):
84
+ for i in self.id_list:
85
+ dict_info = {
86
+ '_id': i
87
+ }
88
+ info_str = json.dumps(dict_info)
89
+ print(info_str)
90
+ self.use_send.easy_send_mq(info_str)
91
+ self.id_list.clear()
92
+ break
93
+ else:
94
+ time.sleep(1)
95
+
96
+ def send_db3(self):
97
+ while True:
98
+ sql = 'select * from cxids where stat=0 limit 20000'
99
+ rows = self.db3.SelectFromSqlliteFetchall(sql)
100
+ if len(rows) == 0:
101
+ print('查询结束 0 状态结束 查询-1状态 time sleep 60s')
102
+ time.sleep(60)
103
+ sql = 'select * from cxids where stat=-1 limit 20000'
104
+ rows = self.db3.SelectFromSqlliteFetchall(sql)
105
+ if len(rows) == 0:
106
+ print('查询结束 -1 状态结束 结束发送')
107
+ break
108
+ for row in rows:
109
+ _id = row[0]
110
+ self.id_list.append(_id)
111
+ if len(self.id_list) >= 10000:
112
+ sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
113
+ self.db3.ExeSqlliteSql(sql)
114
+ self.send_list()
115
+
116
+ if len(self.id_list) > 1:
117
+ sql = "update cxids set stat = -1 where ids in {}".format(tuple(self.id_list))
118
+ self.db3.ExeSqlliteSql(sql)
119
+ self.send_list()
120
+
121
+ if len(self.id_list) == 1:
122
+ sql = "update cxids set stat = -1 where ids='{}'".format(self.id_list[0])
123
+ self.db3.ExeSqlliteSql(sql)
124
+ self.send_list()
125
+
126
+ def callback2(self, ch, method, properties, body):
127
+ json_data = json.loads(body)
128
+ _id = json_data['_id']
129
+ self.recv_list.append(_id)
130
+ if len(self.recv_list) >= 500:
131
+ sql = "update cxids set stat = 1 where ids in {}".format(tuple(self.recv_list))
132
+ if self.db3.ExeSqlliteSql(sql):
133
+ self.recv_list.clear()
134
+ else:
135
+ print('[{}]未更新stat条数{}'.format(datetime.datetime.now(), len(self.recv_list)))
136
+
137
+ def recv(self, results=None, *args, **kwargs):
138
+ self.use_work.callback2 = self.callback2
139
+ self.use_work.get_mq()
140
+
141
+ def get_first_mongo_id(self):
142
+ for i in self.basemongo.find({"_id": {"$gt": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(1):
143
+ self.first_id = i["_id"]
144
+ print("first_id is:" + self.first_id)
145
+
146
+ def init_data_db3(self):
147
+ c = 0
148
+ c1 = -1
149
+ while True:
150
+ lists = []
151
+ for i in self.basemongo.find({"_id": {"$gte": self.first_id}}, {"_id": 1}).sort([("_id", 1)]).limit(
152
+ 1000000):
153
+ c = c + 1
154
+ self.first_id = i["_id"]
155
+ lists.append((i["_id"], 0))
156
+ if c % 10000 == 1:
157
+ print(len(lists))
158
+
159
+ sql = "insert or ignore into cxids(`ids`,`stat`) values (?,?)"
160
+ self.db3.ExeSqlliteMany(sql, lists)
161
+ print(c)
162
+ if c1 == c:
163
+ break
164
+ if len(lists) == 1:
165
+ break
166
+ c1 = c
167
+ lists.clear()
168
+
169
+ def one_init(self):
170
+ """
171
+ 第一步 初始化id数据到db3目录
172
+ :return:
173
+ """
174
+ self.init_conn_mongodb()
175
+ self.init_db3()
176
+ self.create_db3_table()
177
+ self.get_first_mongo_id()
178
+ self.init_data_db3()
179
+
180
+ def two_send(self):
181
+ """
182
+ 分布式的send方法
183
+ :return:
184
+ """
185
+ self.init_db3()
186
+ self.init_mq()
187
+ self.send_db3()
188
+
189
+ def two_recv(self):
190
+ self.init_db3()
191
+ self.init_mq()
192
+ self.recv()
193
+
194
+
195
+ class MoveMongodbThreadRun(MThreadingRun2):
196
+ def __init__(self, num, conf):
197
+ super(MoveMongodbThreadRun, self).__init__(num)
198
+ self.thread_pool.work_queue.set_size(10)
199
+ self.loop = asyncio.new_event_loop()
200
+ asyncio.set_event_loop(self.loop)
201
+
202
+ self.mvmc = MoveMongodbColl(conf)
203
+ self.mvmc.init_mq()
204
+ self.mvmc.init_conn_mongodb()
205
+ self.mvmc.use_send.callback2 = self.callback2
206
+ self.lists = []
207
+ self.info_list = []
208
+ self.is_many_move = False
209
+ # 配置二维数组的每组数据数量,如果为批量转移 建议设置为10000
210
+ self.num_list = 100
211
+ self.num_info_list = 10
212
+
213
+ def callback2(self, ch, method, properties, body):
214
+ json_data = json.loads(body.decode())
215
+ # self.add_job(self.func, dicts)
216
+ self.lists.append((json_data))
217
+ # 异步需要 10 * 100 的二维list
218
+ if self.is_many_move:
219
+ work_size = self.thread_pool.work_queue.get_size()
220
+ if work_size >= 3:
221
+ time.sleep(10)
222
+ if len(self.lists) >= self.num_list:
223
+ self.info_list.append(copy.deepcopy(self.lists))
224
+ self.lists.clear()
225
+ print(len(self.info_list))
226
+
227
+ if len(self.info_list) >= self.num_info_list:
228
+ self.add_job(self.func, copy.deepcopy(self.info_list))
229
+ self.info_list.clear()
230
+
231
+ def set_task(self, threadval: ThreadVal, *args, **kwargs):
232
+ self.mvmc.use_send.get_mq()
233
+
234
+ def deal_results(self, threadval: ThreadVal, *args, **kwargs):
235
+ result_queue = threadval.get_result_queue()
236
+ while True:
237
+ while not result_queue.is_empty():
238
+ result = result_queue.get()
239
+ t_1, t_2 = result
240
+ if t_1 == "err":
241
+ file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_parse_2.txt")
242
+ BaseFile.single_add_file(file_path, t_2 + '\n')
243
+ if t_1 == "err_Exception":
244
+ file_path = BaseFile.get_new_filename(self.mvmc.conf.error_dir, "err_Exception_2.txt")
245
+ BaseFile.single_add_file(file_path, t_2 + '\n')
246
+ if t_1 == 'right':
247
+ self.send_update_info(t_2)
248
+ self.thread_pool.result_queue.task_done()
249
+ time.sleep(1)
250
+
251
+ def send_update_info(self, _id):
252
+ dict_info = {
253
+ "_id": _id,
254
+ }
255
+ info_str = json.dumps(dict_info)
256
+ while True:
257
+ if self.mvmc.use_work.send_mq(info_str, num=10000):
258
+ break
259
+ time.sleep(1)
260
+
261
+ def setProxy(self, threadval: ThreadVal, proxysList=None):
262
+ time.sleep(60)
263
+
264
+ def is_break(self):
265
+ return False
266
+
267
+ def thread_pool_hook(self, threadinfo: ThreadInfo):
268
+ # 设置代理线程不重启,默认会重启
269
+ if threadinfo.get_thread_name() == self.etn.proxythreadname:
270
+ threadinfo.set_is_restart(False)
271
+ # if threadinfo.get_thread_name() == self.etn.taskthreadname:
272
+ # threadinfo.set_is_restart(False)
273
+ return {}
274
+
275
+ def doc_hook(self, item):
276
+ return item
277
+
278
+ async def par_html(self, result_queue, lists):
279
+ for info in lists:
280
+ _id = info["_id"]
281
+ try:
282
+ try:
283
+ # 不存在就插入
284
+
285
+ item = await self.mvmc.bs.select_one({"_id": _id})
286
+ item_result = self.doc_hook(item)
287
+ await self.mvmc.bs2.insert_one(item_result)
288
+ print("{}插入".format(_id))
289
+ result_queue.put(("right", _id))
290
+ except DuplicateKeyError as e:
291
+ print("{}存在".format(_id))
292
+ result_queue.put(("right", _id))
293
+ except Exception as e:
294
+ traceback.print_exc()
295
+ result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
296
+ except Exception as e:
297
+ traceback.print_exc()
298
+ result_queue.put(("err_Exception", _id + ":" + traceback.format_exc()))
299
+
300
+ async def par_html_many(self, result_queue, lists):
301
+ insert_list = []
302
+ for i in range(0, len(lists), 500):
303
+ lists_item = lists[i:i + 500]
304
+ try:
305
+ try:
306
+ def deal_dicts(dd):
307
+ return dd["_id"]
308
+ lists_item = list(map(deal_dicts, lists_item))
309
+ # 不存在就插入
310
+ docs = await self.mvmc.bs.select({"_id": {"$in": lists_item}})
311
+ for item in docs:
312
+ item_result = self.doc_hook(item)
313
+ insert_list.append(item_result)
314
+ print("获取mongo num:" + str(len(insert_list)))
315
+ if len(insert_list) >= 100:
316
+ start_time = time.time()
317
+ result = await self.mvmc.bs2.insert_many(insert_list)
318
+ print("百条数据插入时间:" + str(time.time() - start_time))
319
+ for _id in result.inserted_ids:
320
+ print("{}插入".format(_id))
321
+ result_queue.put(("right", _id))
322
+ insert_list.clear()
323
+ except DuplicateKeyError as e:
324
+ print("有数据存在,无法插入")
325
+
326
+ except Exception as e:
327
+ traceback.print_exc()
328
+ result_queue.put(("err_Exception", traceback.format_exc()))
329
+ except Exception as e:
330
+ traceback.print_exc()
331
+ result_queue.put(("err_Exception", traceback.format_exc()))
332
+
333
+ if len(insert_list) > 0:
334
+ try:
335
+ result = await self.mvmc.bs2.insert_many(insert_list)
336
+ for _id in result.inserted_ids:
337
+ print("{}插入".format(_id))
338
+ result_queue.put(("right", _id))
339
+ insert_list.clear()
340
+ except DuplicateKeyError as e:
341
+ print("存在某个key 批量插入失败")
342
+ except Exception as e:
343
+ traceback.print_exc()
344
+ result_queue.put(("err_Exception", traceback.format_exc()))
345
+
346
+ def fun(self, threadval, *args, **kwargs):
347
+ result_queue = threadval.get_result_queue()
348
+ func_list = []
349
+ for lists in args[0]:
350
+ if self.is_many_move:
351
+ func_list.append(self.par_html_many(result_queue, lists))
352
+ else:
353
+ func_list.append(self.par_html(result_queue, lists))
354
+ self.loop.run_until_complete(asyncio.wait(func_list))
@@ -0,0 +1,18 @@
1
+ from re_common.baselibrary.tools.move_mongo.mongo_table_to_file import Configs, MongoToFile
2
+
3
+ conf = Configs()
4
+
5
+
6
+ def hook_doc(doc):
7
+ doc_info = doc["step_info"]
8
+ id_ = doc_info["id"]
9
+ url = f"https://wiki.mbalib.com/wiki/{id_}"
10
+ doc_info["url"] = url
11
+ return doc
12
+
13
+
14
+ mtf = MongoToFile(conf)
15
+ mtf.hook_doc = hook_doc
16
+ mtf.init_conn_mongodb()
17
+ mtf.open_file()
18
+ mtf.asyncio_run()
@@ -0,0 +1,93 @@
1
+ import click
2
+
3
+ ###########################################
4
+ # 同项目调用基础包
5
+ import datetime
6
+ import gzip
7
+ import json
8
+ import os
9
+ import sys
10
+ import time
11
+
12
+ filepath = os.path.abspath(__file__)
13
+ pathlist = filepath.split(os.sep)
14
+ pathlist = pathlist[:-5]
15
+ TopPath = os.sep.join(pathlist)
16
+ sys.path.insert(0, TopPath)
17
+ print(TopPath)
18
+ ############################################
19
+
20
+
21
+ from re_common.baselibrary.tools.move_mongo.move_mongo_table import MoveMongodbColl, MoveMongodbThreadRun
22
+
23
+
24
+ class Configs(object):
25
+
26
+ def __init__(self):
27
+ self.db3_path = r"D:\config\mvmg\db3\test_images.db3"
28
+ self.db3_encoding = "utf-8"
29
+ self.mgdb_conn = "mongodb://192.168.31.30:32417/"
30
+ self.mgdb_conn_motor = "mongodb://192.168.31.30:32417/htmljson.wanfang_ref?authSource=htmljson"
31
+ self.mgdb_db = "htmljson"
32
+ self.mgdb_col = "cx_journal_detail"
33
+
34
+ self.mgdb_conn2_motor = "mongodb://cjrw:vipdatacenter@192.168.31.243:32920,192.168.31.206:32920,192.168.31.208:32920/?authSource=htmljson"
35
+ self.mgdb_db2 = "htmljson"
36
+ self.mgdb_col2 = "cx_journal_detail"
37
+
38
+ self.mq_name = "mongodb.move.send"
39
+ self.mq_name_work = "mongodb.move.worker"
40
+
41
+ self.error_dir = r"D:\config\mvmg\log"
42
+
43
+
44
+ conf = Configs()
45
+
46
+
47
+ def one_init():
48
+ # 第一步 初始化数据到db3文件
49
+ MoveMongodbColl(conf).one_init()
50
+
51
+
52
+ # 第二步 开始转移 使用send_work_recv结构 共开三个进程
53
+
54
+ def send():
55
+ MoveMongodbColl(conf).two_send()
56
+
57
+
58
+ def works():
59
+ MoveMongodbThreadRun(1, conf).run()
60
+
61
+
62
+ def works_many():
63
+ mmtr = MoveMongodbThreadRun(1, conf)
64
+ mmtr.is_many_move = True
65
+ mmtr.num_list = 1000
66
+ mmtr.num_info_list = 10
67
+ mmtr.run()
68
+
69
+
70
+ def recv():
71
+ MoveMongodbColl(conf).two_recv()
72
+
73
+
74
+ @click.command()
75
+ @click.option('--name',
76
+ help='func name')
77
+ def main(name):
78
+ if name == "send":
79
+ send()
80
+ elif name == "one_init":
81
+ one_init()
82
+ elif name == "works":
83
+ works()
84
+
85
+ elif name == "works_many":
86
+ works_many()
87
+
88
+ elif name == "recv":
89
+ recv()
90
+
91
+
92
+ if __name__ == '__main__':
93
+ main()
File without changes
@@ -0,0 +1,125 @@
1
+ ###############################################################
2
+ # import pandas as pd
3
+ #
4
+ # # io = r'C:\Users\xuzhu\Desktop\OutK.xlsx'
5
+ # io = r'.\raw.xlsx'
6
+ # data = pd.read_excel(io, sheet_name=0)
7
+ #
8
+ # # print(data.head())
9
+ # # print(data.tail())
10
+ # # print(data.groupby(["GCH","years"]).groups)
11
+ # onedata = data.groupby('GCH').apply(lambda t: t[t.years == t.years.min()])
12
+ # print("write excel")
13
+ # with pd.ExcelWriter('temp1.xlsx') as writer:
14
+ # onedata.to_excel(writer, sheet_name='Sheet1')
15
+
16
+ ##########################################################
17
+
18
+ # import pandas as pd
19
+ #
20
+ # # io = r'C:\Users\xuzhu\Desktop\OutK.xlsx'
21
+ # io = r'.\temp1.xlsx'
22
+ # data = pd.read_excel(io, sheet_name=0)
23
+ #
24
+ # # print(data.head())
25
+ # # print(data.tail())
26
+ # # print(data.groupby(["GCH","years"]).groups)
27
+ # onedata = data.groupby('GCH').apply(lambda t: t[t.num == t.num.min()])
28
+ # print("write excel")
29
+ # with pd.ExcelWriter('temp2.xlsx') as writer:
30
+ # onedata.to_excel(writer, sheet_name='Sheet1')
31
+
32
+ ############################################################
33
+
34
+ # import pandas as pd
35
+ #
36
+ # # io = r'C:\Users\xuzhu\Desktop\OutK.xlsx'
37
+ # io = r'.\temp2.xlsx'
38
+ # data = pd.read_excel(io, sheet_name=0)
39
+ #
40
+ # def bug_rule(x):
41
+ # gch = x.GCH
42
+ # gch1 = gch[:-1]
43
+ # return gch1
44
+ #
45
+ # data["gch5"] = data.apply(lambda x: bug_rule(x), axis=1)
46
+ # print("write excel")
47
+ # with pd.ExcelWriter('temp3.xlsx') as writer:
48
+ # data.to_excel(writer, sheet_name='Sheet1')
49
+
50
+
51
+ ###################################################
52
+ #
53
+ #
54
+ # import sys
55
+ #
56
+ # import pandas as pd
57
+ #
58
+ # # io = r'C:\Users\xuzhu\Desktop\OutK.xlsx'
59
+ # io = r'.\temp3.xlsx'
60
+ # io2 = r'.\raw2.xlsx'
61
+ # data = pd.read_excel(io, sheet_name=0)
62
+ # print(data.dtypes)
63
+ # print("****************")
64
+ # data2 = pd.read_excel(io2, sheet_name=1)
65
+ # print(data2.dtypes)
66
+ # data3 = pd.merge(data, data2, on='gch5')
67
+ #
68
+ # with pd.ExcelWriter('temp4.xlsx') as writer:
69
+ # data3.to_excel(writer, sheet_name='Sheet1')
70
+
71
+ ##########################################################
72
+
73
+
74
+ # import pandas as pd
75
+ #
76
+ # # io = r'C:\Users\xuzhu\Desktop\OutK.xlsx'
77
+ # io = r'.\temp4.xlsx'
78
+ # data = pd.read_excel(io, sheet_name=0)
79
+ #
80
+ # # print(data.head())
81
+ # # print(data.tail())
82
+ # # print(data.groupby(["GCH","years"]).groups)
83
+ # onedata = data.groupby('gch5').apply(lambda t: t[t.years == t.years.min()])
84
+ # print("write excel")
85
+ # with pd.ExcelWriter('temp5.xlsx') as writer:
86
+ # onedata.to_excel(writer, sheet_name='Sheet1')
87
+
88
+ ##########################################################
89
+
90
+ # import pandas as pd
91
+ #
92
+ # # io = r'C:\Users\xuzhu\Desktop\OutK.xlsx'
93
+ # io = r'.\temp5.xlsx'
94
+ # data = pd.read_excel(io, sheet_name=0)
95
+ #
96
+ # # print(data.head())
97
+ # # print(data.tail())
98
+ # # print(data.groupby(["GCH","years"]).groups)
99
+ # onedata = data.groupby('gch5').apply(lambda t: t[t.num == t.num.min()])
100
+ # print("write excel")
101
+ # with pd.ExcelWriter('temp6.xlsx') as writer:
102
+ # onedata.to_excel(writer, sheet_name='Sheet1')
103
+
104
+ #########################################################
105
+
106
+ import pandas as pd
107
+
108
+
109
+ io = r'.\temp4.xlsx'
110
+ io2 = r'.\raw2.xlsx'
111
+ data = pd.read_excel(io, sheet_name=0)
112
+ data2 = pd.read_excel(io2, sheet_name=1)
113
+
114
+ print(type(data["刊名"]))
115
+ print(data["刊名"].values.tolist())
116
+ print(type(data2["刊名"]))
117
+ print(data2["刊名"].values.tolist())
118
+
119
+ print(set(data2["刊名"].values.tolist()) - set(data["刊名"].values.tolist()))
120
+
121
+ # onedata = data.groupby('gch5').apply(lambda t: t[t.num == t.num.min()])
122
+ # print("write excel")
123
+ # with pd.ExcelWriter('temp6.xlsx') as writer:
124
+ # onedata.to_excel(writer, sheet_name='Sheet1')
125
+
@@ -0,0 +1,8 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ ts = pd.Series(np.random.randn(1000),
5
+ index=pd.date_range('1/1/2000', periods=1000))
6
+
7
+ ts = ts.cumsum()
8
+ ts.plot()