re-common 2.0.1__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. re_common/baselibrary/__init__.py +4 -0
  2. re_common/baselibrary/baseabs/__init__.py +7 -0
  3. re_common/baselibrary/baseabs/baseabs.py +26 -0
  4. re_common/baselibrary/database/__init__.py +0 -0
  5. re_common/baselibrary/database/mbuilder.py +132 -0
  6. re_common/baselibrary/database/moudle.py +93 -0
  7. re_common/baselibrary/database/msqlite3.py +194 -0
  8. re_common/baselibrary/database/mysql.py +169 -0
  9. re_common/baselibrary/database/sql_factory.py +26 -0
  10. re_common/baselibrary/mthread/MThreadingRun.py +486 -0
  11. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -0
  12. re_common/baselibrary/mthread/__init__.py +3 -0
  13. re_common/baselibrary/mthread/mythreading.py +695 -0
  14. re_common/baselibrary/pakge_other/__init__.py +0 -0
  15. re_common/baselibrary/pakge_other/socks.py +404 -0
  16. re_common/baselibrary/readconfig/__init__.py +0 -0
  17. re_common/baselibrary/readconfig/config_factory.py +18 -0
  18. re_common/baselibrary/readconfig/ini_config.py +317 -0
  19. re_common/baselibrary/readconfig/toml_config.py +49 -0
  20. re_common/baselibrary/temporary/__init__.py +0 -0
  21. re_common/baselibrary/temporary/envdata.py +36 -0
  22. re_common/baselibrary/tools/__init__.py +0 -0
  23. re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  24. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -0
  25. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -0
  26. re_common/baselibrary/tools/all_requests/mrequest.py +412 -0
  27. re_common/baselibrary/tools/all_requests/requests_request.py +81 -0
  28. re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  29. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -0
  30. re_common/baselibrary/tools/contrast_db3.py +123 -0
  31. re_common/baselibrary/tools/copy_file.py +39 -0
  32. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -0
  33. re_common/baselibrary/tools/foreachgz.py +40 -0
  34. re_common/baselibrary/tools/get_attr.py +11 -0
  35. re_common/baselibrary/tools/image_to_pdf.py +62 -0
  36. re_common/baselibrary/tools/java_code_deal.py +139 -0
  37. re_common/baselibrary/tools/javacode.py +79 -0
  38. re_common/baselibrary/tools/mdb_db3.py +48 -0
  39. re_common/baselibrary/tools/merge_file.py +171 -0
  40. re_common/baselibrary/tools/merge_gz_file.py +165 -0
  41. re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  42. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -0
  43. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -0
  44. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -0
  45. re_common/baselibrary/tools/mongo_tools.py +50 -0
  46. re_common/baselibrary/tools/move_file.py +170 -0
  47. re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  48. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -0
  49. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -0
  50. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -0
  51. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -0
  52. re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  53. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -0
  54. re_common/baselibrary/tools/mpandas/pandas_visualization.py +8 -0
  55. re_common/baselibrary/tools/myparsel.py +104 -0
  56. re_common/baselibrary/tools/rename_dir_file.py +37 -0
  57. re_common/baselibrary/tools/sequoiadb_utils.py +398 -0
  58. re_common/baselibrary/tools/split_line_to_many.py +25 -0
  59. re_common/baselibrary/tools/stringtodicts.py +33 -0
  60. re_common/baselibrary/tools/workwechant_bot.py +84 -0
  61. re_common/baselibrary/utils/__init__.py +0 -0
  62. re_common/baselibrary/utils/baseaiohttp.py +296 -0
  63. re_common/baselibrary/utils/baseaiomysql.py +87 -0
  64. re_common/baselibrary/utils/baseallstep.py +191 -0
  65. re_common/baselibrary/utils/baseavro.py +19 -0
  66. re_common/baselibrary/utils/baseboto3.py +291 -0
  67. re_common/baselibrary/utils/basecsv.py +32 -0
  68. re_common/baselibrary/utils/basedict.py +133 -0
  69. re_common/baselibrary/utils/basedir.py +241 -0
  70. re_common/baselibrary/utils/baseencode.py +351 -0
  71. re_common/baselibrary/utils/baseencoding.py +29 -0
  72. re_common/baselibrary/utils/baseesdsl.py +86 -0
  73. re_common/baselibrary/utils/baseexcel.py +264 -0
  74. re_common/baselibrary/utils/baseexcept.py +109 -0
  75. re_common/baselibrary/utils/basefile.py +654 -0
  76. re_common/baselibrary/utils/baseftp.py +214 -0
  77. re_common/baselibrary/utils/basegzip.py +60 -0
  78. re_common/baselibrary/utils/basehdfs.py +135 -0
  79. re_common/baselibrary/utils/basehttpx.py +268 -0
  80. re_common/baselibrary/utils/baseip.py +87 -0
  81. re_common/baselibrary/utils/basejson.py +2 -0
  82. re_common/baselibrary/utils/baselist.py +32 -0
  83. re_common/baselibrary/utils/basemotor.py +190 -0
  84. re_common/baselibrary/utils/basemssql.py +98 -0
  85. re_common/baselibrary/utils/baseodbc.py +113 -0
  86. re_common/baselibrary/utils/basepandas.py +302 -0
  87. re_common/baselibrary/utils/basepeewee.py +11 -0
  88. re_common/baselibrary/utils/basepika.py +180 -0
  89. re_common/baselibrary/utils/basepydash.py +143 -0
  90. re_common/baselibrary/utils/basepymongo.py +230 -0
  91. re_common/baselibrary/utils/basequeue.py +22 -0
  92. re_common/baselibrary/utils/baserar.py +57 -0
  93. re_common/baselibrary/utils/baserequest.py +279 -0
  94. re_common/baselibrary/utils/baseset.py +8 -0
  95. re_common/baselibrary/utils/basesmb.py +403 -0
  96. re_common/baselibrary/utils/basestring.py +382 -0
  97. re_common/baselibrary/utils/basetime.py +320 -0
  98. re_common/baselibrary/utils/basetuple.py +0 -0
  99. re_common/baselibrary/utils/baseurl.py +121 -0
  100. re_common/baselibrary/utils/basezip.py +57 -0
  101. re_common/baselibrary/utils/core/__init__.py +8 -0
  102. re_common/baselibrary/utils/core/bottomutils.py +18 -0
  103. re_common/baselibrary/utils/core/mdeprecated.py +327 -0
  104. re_common/baselibrary/utils/core/mlamada.py +16 -0
  105. re_common/baselibrary/utils/core/msginfo.py +25 -0
  106. re_common/baselibrary/utils/core/requests_core.py +103 -0
  107. re_common/baselibrary/utils/fateadm.py +429 -0
  108. re_common/baselibrary/utils/importfun.py +123 -0
  109. re_common/baselibrary/utils/mfaker.py +57 -0
  110. re_common/baselibrary/utils/my_abc/__init__.py +3 -0
  111. re_common/baselibrary/utils/my_abc/better_abc.py +32 -0
  112. re_common/baselibrary/utils/mylogger.py +414 -0
  113. re_common/baselibrary/utils/myredisclient.py +861 -0
  114. re_common/baselibrary/utils/pipupgrade.py +21 -0
  115. re_common/baselibrary/utils/ringlist.py +85 -0
  116. re_common/baselibrary/utils/version_compare.py +36 -0
  117. re_common/baselibrary/utils/ydmhttp.py +126 -0
  118. re_common/facade/__init__.py +1 -0
  119. re_common/facade/lazy_import.py +11 -0
  120. re_common/facade/loggerfacade.py +25 -0
  121. re_common/facade/mysqlfacade.py +467 -0
  122. re_common/facade/now.py +31 -0
  123. re_common/facade/sqlite3facade.py +257 -0
  124. re_common/facade/use/__init__.py +0 -0
  125. re_common/facade/use/mq_use_facade.py +83 -0
  126. re_common/facade/use/proxy_use_facade.py +20 -0
  127. re_common/libtest/__init__.py +0 -0
  128. re_common/libtest/base_dict_test.py +19 -0
  129. re_common/libtest/baseavro_test.py +13 -0
  130. re_common/libtest/basefile_test.py +14 -0
  131. re_common/libtest/basemssql_test.py +77 -0
  132. re_common/libtest/baseodbc_test.py +8 -0
  133. re_common/libtest/basepandas_test.py +38 -0
  134. re_common/libtest/get_attr_test/__init__.py +0 -0
  135. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -0
  136. re_common/libtest/get_attr_test/settings.py +55 -0
  137. re_common/libtest/idencode_test.py +54 -0
  138. re_common/libtest/iniconfig_test.py +35 -0
  139. re_common/libtest/ip_test.py +35 -0
  140. re_common/libtest/merge_file_test.py +20 -0
  141. re_common/libtest/mfaker_test.py +9 -0
  142. re_common/libtest/mm3_test.py +32 -0
  143. re_common/libtest/mylogger_test.py +89 -0
  144. re_common/libtest/myparsel_test.py +28 -0
  145. re_common/libtest/mysql_test.py +151 -0
  146. re_common/libtest/pymongo_test.py +21 -0
  147. re_common/libtest/split_test.py +12 -0
  148. re_common/libtest/sqlite3_merge_test.py +6 -0
  149. re_common/libtest/sqlite3_test.py +34 -0
  150. re_common/libtest/tomlconfig_test.py +30 -0
  151. re_common/libtest/use_tools_test/__init__.py +3 -0
  152. re_common/libtest/user/__init__.py +5 -0
  153. re_common/studio/__init__.py +5 -0
  154. re_common/studio/assignment_expressions.py +37 -0
  155. re_common/studio/mydash/__init__.py +0 -0
  156. re_common/studio/mydash/test1.py +19 -0
  157. re_common/studio/pydashstudio/__init__.py +0 -0
  158. re_common/studio/pydashstudio/first.py +9 -0
  159. re_common/studio/streamlitstudio/__init__.py +0 -0
  160. re_common/studio/streamlitstudio/first_app.py +66 -0
  161. re_common/studio/streamlitstudio/uber_pickups.py +24 -0
  162. re_common/studio/test.py +19 -0
  163. re_common/vip/__init__.py +0 -0
  164. re_common/vip/base_step_process.py +11 -0
  165. re_common/vip/baseencodeid.py +91 -0
  166. re_common/vip/changetaskname.py +28 -0
  167. re_common/vip/core_var.py +24 -0
  168. re_common/vip/mmh3Hash.py +90 -0
  169. re_common/vip/proxy/__init__.py +0 -0
  170. re_common/vip/proxy/allproxys.py +127 -0
  171. re_common/vip/proxy/allproxys_thread.py +159 -0
  172. re_common/vip/proxy/cnki_proxy.py +153 -0
  173. re_common/vip/proxy/kuaidaili.py +87 -0
  174. re_common/vip/proxy/proxy_all.py +113 -0
  175. re_common/vip/proxy/update_kuaidaili_0.py +42 -0
  176. re_common/vip/proxy/wanfang_proxy.py +152 -0
  177. re_common/vip/proxy/wp_proxy_all.py +182 -0
  178. re_common/vip/read_rawid_to_txt.py +92 -0
  179. re_common/vip/title/__init__.py +5 -0
  180. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -0
  181. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -0
  182. re_common/vip/title/transform/TransformCstadTitleToZt.py +196 -0
  183. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -0
  184. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -0
  185. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -0
  186. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -0
  187. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -0
  188. re_common/vip/title/transform/__init__.py +11 -0
  189. {re_common-2.0.1.dist-info → re_common-10.0.0.dist-info}/METADATA +1 -1
  190. re_common-10.0.0.dist-info/RECORD +213 -0
  191. re_common-2.0.1.dist-info/RECORD +0 -25
  192. {re_common-2.0.1.dist-info → re_common-10.0.0.dist-info}/LICENSE +0 -0
  193. {re_common-2.0.1.dist-info → re_common-10.0.0.dist-info}/WHEEL +0 -0
  194. {re_common-2.0.1.dist-info → re_common-10.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,257 @@
1
+ # db3数据的连接返回一个连接对象
2
+ import sys
3
+ import traceback
4
+
5
+ from re_common.baselibrary import MLogger
6
+ from re_common.baselibrary.baseabs import BaseAbs
7
+ from re_common.baselibrary.database.mbuilder import Sqlite3Builder
8
+
9
+
10
+ class Sqlite3Utiles(object):
11
+ def __init__(self, logger=None):
12
+ self.sqllite3 = None
13
+ self.conn = None
14
+ self.cursor = None
15
+
16
+ self._logger = logger
17
+
18
+ @property
19
+ def logger(self):
20
+ if self._logger is None:
21
+ return MLogger().streamlogger
22
+ return self._logger
23
+
24
+ @logger.setter
25
+ def logger(self, value):
26
+ assert isinstance(value, MLogger)
27
+ self._logger = value
28
+
29
+ def get_new_cursor(self):
30
+ """
31
+ 获取一个新的游标
32
+ :return:
33
+ """
34
+ # 检查db的存在及是否断掉 不知道为什么 不可以加括号 但编辑器会警告
35
+ return self.conn.cursor()
36
+
37
+ def Sqlite3DBConnectFromFilePath(self, dbfilepath, encoding="gbk", timeout=60):
38
+ """
39
+ 通过直接文件连接 我使用Hadoop导下来的db3是gbk编码的
40
+ 如果为utf-8编码请改为utf-8
41
+ :param sec: section
42
+ :return:
43
+ """
44
+ sqllite3 = BaseAbs.get_sql_factory().sqlite_factory()
45
+ # 传入的是配置文件和section选项 dbpath为配置文件路径
46
+ sqlite3_moudle = Sqlite3Builder().build_file_path(dbfilepath).build_timeout(timeout).get_moudle()
47
+ sqllite3.link(sqlite3_moudle)
48
+ # 设置txt的编码格式 hadoop 上的db3为gbk 默认为utf8
49
+ sqllite3.set_encoding(encoding=encoding)
50
+ # 返回一个连接
51
+ self.sqllite3 = sqllite3
52
+ self.conn = self.sqllite3.db
53
+ return self
54
+
55
+ def Sqlite3DBConnectFromConfig(self, cobnfigfilepath, sec, encoding="gbk"):
56
+ """
57
+ 通过配置文件连接
58
+ :param sec: section
59
+ :return:
60
+ """
61
+ sqllite3 = BaseAbs.get_sql_factory().sqlite_factory()
62
+ # 传入的是配置文件和section选项 dbpath为配置文件路径
63
+ sqlite3_moudle = Sqlite3Builder(cobnfigfilepath, sec).build_all().get_moudle()
64
+ sqllite3.link(sqlite3_moudle)
65
+ # 设置txt的编码格式 hadoop 上的db3为gbk 默认为utf8
66
+ sqllite3.set_encoding(encoding=encoding)
67
+ # 返回一个连接
68
+ self.sqllite3 = sqllite3
69
+ self.conn = self.sqllite3.db
70
+ return self
71
+
72
+ def ExeSqlliteList(self, sqlList, errExit=True):
73
+ """
74
+ 该函数和上面一样执行一个sql列表且不返回结果
75
+ 属于插入和更新类函数 但该函数针对db3
76
+ :param sqllitename:
77
+ :param sqlList:
78
+ :return:
79
+ """
80
+ dbMsg = None
81
+ cur = self.get_new_cursor()
82
+ if self.conn:
83
+ count = 0
84
+ for sql in sqlList:
85
+ count += 1
86
+ try:
87
+ self.logger.info("{} 执行sql数量:{}".format(sql, str(count)))
88
+ cur.execute(sql)
89
+ except:
90
+ self.logger.error('*errSql:' + sql)
91
+ dbMsg = '*InsertError:' + traceback.format_exc()
92
+ if errExit:
93
+ self.logger.error(dbMsg)
94
+ sys.exit()
95
+ if dbMsg:
96
+ self.logger.error(dbMsg)
97
+ continue
98
+ self.conn.commit()
99
+ cur.close()
100
+
101
+ def ExeSqlliteMany(self, sql, itermany, errExit=True):
102
+ dbMsg = None
103
+ cur = self.get_new_cursor()
104
+ if self.conn:
105
+ try:
106
+ self.logger.info("{}\n{}".format(sql, str(itermany)))
107
+ cur.executemany(sql, itermany)
108
+ except:
109
+ self.logger.error('*errSql:' + sql)
110
+ dbMsg = '*InsertError:' + traceback.format_exc()
111
+ if errExit:
112
+ self.logger.error(dbMsg)
113
+ sys.exit()
114
+ if dbMsg:
115
+ self.logger.error(dbMsg)
116
+ self.conn.commit()
117
+ cur.close()
118
+
119
+ def ExeSqlliteSql(self, sql):
120
+ """
121
+ 该函数和上面一样执行一个sql列表且不返回结果
122
+ 属于插入和更新类函数 但该函数针对db3
123
+ :param sqllitename:
124
+ :param sqlList:
125
+ :return:
126
+ """
127
+ dbMsg = None
128
+ cur = self.get_new_cursor()
129
+ if self.conn:
130
+ try:
131
+ self.logger.info(sql)
132
+ cur.execute(sql)
133
+ self.conn.commit()
134
+ except:
135
+ self.logger.error('*errSql:' + sql)
136
+ dbMsg = '*InsertError:' + traceback.format_exc()
137
+ if dbMsg:
138
+ self.logger.error(dbMsg)
139
+ return False
140
+ else:
141
+ return False
142
+ cur.close()
143
+ return True
144
+
145
+ def SelectFromSqlliteFetchall(self, sql):
146
+ """
147
+ 该函数和上面一样执行一个sql列表且不返回结果
148
+ 属于插入和更新类函数 但该函数针对db3
149
+ :param sqllitename:
150
+ :param sqlList:
151
+ :return:
152
+ """
153
+ cur = self.get_new_cursor()
154
+ if self.conn:
155
+ try:
156
+ self.logger.info(sql)
157
+ cur.execute(sql)
158
+ rows = cur.fetchall()
159
+ return rows
160
+ except:
161
+ self.logger.error('*errSql:' + sql)
162
+ dbMsg = '*InsertError:' + traceback.format_exc()
163
+ if dbMsg:
164
+ self.logger.error(dbMsg)
165
+ return False
166
+ else:
167
+ return False
168
+ cur.close()
169
+
170
+ def SelectFromSqlliteFetchall_dicts(self, sql):
171
+ """
172
+ 该函数和上面一样执行一个sql列表且不返回结果
173
+ 属于插入和更新类函数 但该函数针对db3
174
+ :param sqllitename:
175
+ :param sqlList:
176
+ :return:
177
+ """
178
+
179
+ def dict_factory(cursor, row):
180
+ d = {}
181
+ for idx, col in enumerate(cursor.description):
182
+ d[col[0]] = row[idx]
183
+ return d
184
+
185
+ self.conn.row_factory = dict_factory
186
+ cur = self.get_new_cursor()
187
+ if self.conn:
188
+ try:
189
+ self.logger.info(sql)
190
+ cur.execute(sql)
191
+ rows = cur.fetchall()
192
+ return rows
193
+ except:
194
+ self.logger.error('*errSql:' + sql)
195
+ dbMsg = '*InsertError:' + traceback.format_exc()
196
+ if dbMsg:
197
+ self.logger.error(dbMsg)
198
+ return False
199
+ else:
200
+ return False
201
+ cur.close()
202
+
203
+ def SelectFromSqlliteFetchOne(self, sql):
204
+ """
205
+ 该函数和上面一样执行一个sql列表且不返回结果
206
+ 属于插入和更新类函数 但该函数针对db3
207
+ :param sqllitename:
208
+ :param sqlList:
209
+ :return:
210
+ """
211
+ dbMsg = None
212
+ cur = self.get_new_cursor()
213
+ if self.conn:
214
+ try:
215
+ self.logger.info(sql)
216
+ cur.execute(sql)
217
+ while True:
218
+ row = cur.fetchone()
219
+ if row is None:
220
+ return None
221
+ else:
222
+ yield row
223
+ except:
224
+ self.logger.error('*errSql:' + sql)
225
+ dbMsg = '*InsertError:' + traceback.format_exc()
226
+ if dbMsg:
227
+ self.logger.error(dbMsg)
228
+ return False
229
+ else:
230
+ return False
231
+ cur.close()
232
+
233
+ def ExeVACUUM(self):
234
+ """
235
+ 清理空间
236
+ :return:
237
+ """
238
+ dbMsg = None
239
+ if self.conn:
240
+ try:
241
+ self.conn.execute("VACUUM")
242
+ except:
243
+ dbMsg = '*VACUUMError:' + traceback.format_exc()
244
+ if dbMsg:
245
+ self.logger.error(dbMsg)
246
+ return False
247
+ else:
248
+ return False
249
+ return True
250
+
251
+ def sqliteEscape(self, keyWord):
252
+ keyWord = keyWord.replace("'", "''")
253
+ keyWord = keyWord.replace("\\", "\\\\")
254
+ return keyWord
255
+
256
+ def close(self):
257
+ self.conn.close()
File without changes
@@ -0,0 +1,83 @@
1
+ import logging
2
+ import traceback
3
+
4
+ from re_common.baselibrary.utils.basepika import BasePika
5
+ from retry import retry
6
+
7
+ logging_logger = logging.getLogger(__name__)
8
+
9
+
10
+ class UseMq(object):
11
+
12
+ def __init__(self, queue, qos=1):
13
+ self.queue = queue
14
+ self.qos = qos
15
+ self.basepika = BasePika()
16
+ self.basepika.set_default()
17
+ self.basepika.connect()
18
+ self.basepika.create_channel()
19
+ self.basepika.queue_declare(queue=queue, durable=True)
20
+ self.basepika.basic_qos(qos)
21
+ self.properties = self.basepika.get_properties()
22
+
23
+ def re_conn(self):
24
+ """
25
+ 重新连接
26
+ :return:
27
+ """
28
+ self.basepika.connect()
29
+ self.basepika.create_channel()
30
+ self.basepika.queue_declare(queue=self.queue, durable=True)
31
+ self.basepika.basic_qos(self.qos)
32
+
33
+ @retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
34
+ def get_mq(self):
35
+ try:
36
+ if self.basepika.channel.is_closed:
37
+ logging_logger.info("重连中......")
38
+ self.re_conn()
39
+ logging_logger.info("重连完成......")
40
+ self.basepika.set_get_msg_callback(routing_key=self.queue, callback=self.callback, auto_ack=False)
41
+ self.basepika.start_get_msg()
42
+ except:
43
+ traceback.print_exc()
44
+ logging_logger.info("重连中......")
45
+ self.re_conn()
46
+
47
+ def callback(self, ch, method, properties, body):
48
+ # print(type(body))
49
+ # print(" [x] Received %r" % body)
50
+ # body = body.decode()
51
+ self.callback2(ch, method, properties, body)
52
+ if self.basepika.auto_ack is False:
53
+ self.basepika.basic_ack(ch, method)
54
+
55
+ def callback2(self, ch, method, properties, body):
56
+ pass
57
+
58
+ @retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
59
+ def send_mq(self, body, num=100):
60
+ try:
61
+ if self.basepika.get_queue_size(self.queue) < num:
62
+ self.basepika.easy_send_msg(routing_key=self.queue,
63
+ body=body,
64
+ properties=self.properties)
65
+ return True
66
+ else:
67
+ return False
68
+ except:
69
+ traceback.print_exc()
70
+ logging_logger.info("重连中......")
71
+ self.re_conn()
72
+ return False
73
+
74
+ def get_server_mq_num(self, num=100):
75
+ if self.basepika.get_queue_size(self.queue) < num:
76
+ return True
77
+ else:
78
+ return False
79
+
80
+ def easy_send_mq(self, body):
81
+ self.basepika.easy_send_msg(routing_key=self.queue,
82
+ body=body,
83
+ properties=self.properties)
@@ -0,0 +1,20 @@
1
+ import time
2
+
3
+
4
+ def set_school_list_proxy(self):
5
+ """
6
+ 使用该函数条件, self为类的对象
7
+ 代理在 proxyset set里
8
+ 使用的是bshttpx这个变量
9
+ """
10
+ try:
11
+ proxy = self.proxyset.pop()
12
+ self.logger.info("proxy is:{},proxy size is: {}".format(proxy, len(self.proxyset)))
13
+ return self.bshttpx.creat_sn(proxy=proxy,
14
+ headers=self.headers,
15
+ verify=False)
16
+ except KeyError as e:
17
+ time.sleep(15)
18
+ if str(e) == "'pop from an empty set'":
19
+ self.proxyset = set(self.school_proxy_list)
20
+ return self.set_school_list_proxy()
File without changes
@@ -0,0 +1,19 @@
1
+ from re_common.baselibrary import BaseDicts
2
+ from re_common.baselibrary.utils.mfaker import MFaker
3
+
4
+
5
+ def test_basedict_sortkeys():
6
+ fake = MFaker()
7
+ dicts = fake.create_data(MFaker.m_pydict, **fake.py_para())
8
+ print(dicts)
9
+ dicts2 = BaseDicts.sortkeys(dicts)
10
+ print(dicts2)
11
+ print(dicts)
12
+
13
+
14
+ def test_basedict_sortvalues():
15
+ dicts = {'data': 'FDZqqOGNMyGJlNRoCsJd', 'participant': 'petersonadrienne@bennett.com',
16
+ 'often': 'KnJSSDeSTPboiwjSdGwR', 'friend': '1639', 'above': '8144', 'in': '1614'}
17
+ dicts2 = BaseDicts.sortvalues(dicts)
18
+ print(dicts2)
19
+
@@ -0,0 +1,13 @@
1
+ from re_common.baselibrary.utils.baseavro import BaseAvro
2
+
3
+ id_set = set()
4
+ for line in BaseAvro().read_line_yeild(r"F:\fun2\avro"):
5
+ id_set.add(line["key"])
6
+
7
+ print(len(id_set))
8
+ lines = ""
9
+
10
+ with open(r"F:\fun2\avro1.txt", 'w', encoding="utf-8") as f:
11
+ for id in id_set:
12
+ lines = id + "\n"
13
+ f.write(lines)
@@ -0,0 +1,14 @@
1
+ from re_common.baselibrary.utils.basedir import BaseDir
2
+ from re_common.baselibrary.utils.basefile import BaseFile
3
+
4
+ # files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 3)
5
+ # for file_line in files_line_list:
6
+ # # file_line = str(file_line, encoding="utf-8")
7
+ # # file_line = file_line.decode(encoding="utf-8")
8
+ # print(file_line)
9
+
10
+ for file in BaseDir.get_dir_all_files(r"F:\db3\mysql_date\data_dir"):
11
+ files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 11000)
12
+ strs = "\n".join(files_line_list)
13
+ BaseFile.single_add_file(r"F:\db3\mysql_date\end\part-00000", strs + "\n")
14
+
@@ -0,0 +1,77 @@
1
+ ###########################################
2
+ # 同项目调用基础包
3
+ import datetime
4
+ import gzip
5
+ import json
6
+ import os
7
+ import sys
8
+ import time
9
+
10
+ filepath = os.path.abspath(__file__)
11
+ pathlist = filepath.split(os.sep)
12
+ pathlist = pathlist[:-3]
13
+ TopPath = os.sep.join(pathlist)
14
+ sys.path.insert(0, TopPath)
15
+ print(TopPath)
16
+ ############################################
17
+
18
+ from re_common.baselibrary.utils.basemssql import BaseMsSql
19
+ from re_common.baselibrary.utils.basefile import BaseFile
20
+ from re_common.baselibrary.utils.basetime import BaseTime
21
+ bt = BaseTime()
22
+
23
+ host = "127.0.0.1"
24
+ user = "sa"
25
+ pwd = "xujiang1994323"
26
+ db = "patData"
27
+ charset = "utf8"
28
+
29
+ basemssql = BaseMsSql(host, user, pwd, db, charset, as_dict=True)
30
+ basemssql.conn()
31
+ basemssql.exec_select_query("select * from [dbo].[New_pattzb]")
32
+ outPathFile = r"F:\db3\patnetjson_big\jss_patent.big_json.gz"
33
+ i = 0
34
+ size = 10000
35
+ count = 2000000
36
+ outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
37
+ start = time.time()
38
+ end = False
39
+ while True:
40
+ with gzip.open(outPathFile, 'wb') as f:
41
+ while True:
42
+ a = basemssql.cur.fetchmany(size=size)
43
+ if not a:
44
+ print(i)
45
+ print(int(time.time() - start))
46
+ print("break")
47
+ end = True
48
+ break
49
+ for row in a:
50
+ dicts = {}
51
+ for k,v in row.items():
52
+ # print(k)
53
+ try:
54
+ v = v.encode('latin-1').decode('gbk')
55
+ except:
56
+ pass
57
+ if isinstance(v, datetime.datetime):
58
+ v = bt.datetime_to_string(v, "%Y-%m-%d %H:%M:%S")
59
+ dicts[k] = v
60
+ # print(type(v))
61
+ # print(dicts)
62
+ line = json.dumps(dicts,ensure_ascii=False)+"\n"
63
+ # print(line)
64
+ lines = line.encode(encoding="utf8")
65
+ f.write(lines)
66
+ i = i + size
67
+ print(i)
68
+ print(int(time.time() - start))
69
+ if i >= count:
70
+ outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
71
+ i=0
72
+ break
73
+ if end:
74
+ break
75
+
76
+
77
+
@@ -0,0 +1,8 @@
1
+ from re_common.baselibrary.utils.baseodbc import BaseODBC
2
+
3
+ baseodbc = BaseODBC(r"C:\Users\xuzhu\Desktop\DB_20200701_GB.mdb")
4
+ # baseodbc = BaseODBC(r"D:\download\cnki_qk\download\get_journal\mdb\cnki期刊信息_20200315.mdb")
5
+ baseodbc.get_cur()
6
+ sql = "select * from `CN`"
7
+ for row in baseodbc.select(sql):
8
+ print(row)
@@ -0,0 +1,38 @@
1
+ import time
2
+
3
+ from pandas._libs.tslibs.timestamps import Timestamp
4
+
5
+
6
+ def test_dataform_to_numpy():
7
+ import pandas as pd
8
+ import numpy as np
9
+ dicts = {'A': 1.,
10
+ 'B': pd.Timestamp('20130102'),
11
+ 'C': pd.Series(1, index=list(range(4)), dtype='float32'),
12
+ 'D': np.array([3] * 4, dtype='int32'),
13
+ 'E': pd.Categorical(["test", "train", "test", "train"]),
14
+ 'F': 'foo'}
15
+ from re_common.baselibrary.utils.basepandas import BasePandas
16
+ bp = BasePandas()
17
+ startTime = time.time()
18
+ df = bp.dicts_to_dataform(dicts)
19
+ # 数据类型一致时速度会很快
20
+ print(bp.dataform_to_numpy(df))
21
+ endTime = time.time()
22
+ print(endTime - startTime)
23
+ dicts = {'A': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
24
+ 'B': [Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
25
+ 'C': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
26
+ 'D': [3, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
27
+ 'E': ['test', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
28
+ 'F': ['foo', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo']}
29
+
30
+
31
+ startTime = time.time()
32
+ df = bp.dicts_to_dataform(dicts)
33
+ # 数据类型一致时速度会很快
34
+ print(bp.dataform_to_numpy(df))
35
+ endTime = time.time()
36
+ print(endTime - startTime)
37
+
38
+ test_dataform_to_numpy()
File without changes
@@ -0,0 +1,14 @@
1
+ import os
2
+ import sys
3
+
4
+ filepath = os.path.abspath(__file__)
5
+ pathlist = filepath.split(os.sep)
6
+ pathlist = pathlist[:-4]
7
+ TopPath = os.sep.join(pathlist)
8
+ sys.path.insert(0, TopPath)
9
+ print(TopPath)
10
+
11
+ from re_common.libtest.get_attr_test import settings
12
+ from re_common.baselibrary.tools.get_attr import get_attrs
13
+
14
+ print(get_attrs(settings))
@@ -0,0 +1,55 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+
4
+
5
+ """
6
+ CREATE YOUR DEFAULT_CONFIG !
7
+
8
+ Some configuration:
9
+ CONCURRENT_REQUESTS 并发数量
10
+ RETRIES 重试次数
11
+ DOWNLOAD_DELAY 下载延时
12
+ RETRY_DELAY 重试延时
13
+ DOWNLOAD_TIMEOUT 超时限制
14
+ USER_AGENT 用户代理
15
+ LOG_FILE 日志路径
16
+ LOG_LEVEL 日志等级
17
+ """
18
+
19
+
20
+ CONCURRENT_REQUESTS = 20
21
+
22
+
23
+
24
+
25
+
26
+ MIDDLEWARE = [
27
+ 'middlewares.middleware',
28
+ ]
29
+
30
+
31
+ # PIPELINES = []
32
+
33
+
34
+
35
+
36
+ DEFAULT_REQUEST_CONFIG = {
37
+ "RETRIES": 0,
38
+ "DOWNLOAD_DELAY": 0,
39
+ "RETRY_DELAY": 0,
40
+ "DOWNLOAD_TIMEOUT": 10,
41
+ }
42
+
43
+
44
+ # '''生成日志文件'''
45
+ # LOG_FILE = './asyncpy.log'
46
+ # LOG_LEVEL = 'DEBUG'
47
+
48
+
49
+
50
+ # CLOSESPIDER_TIMEOUT = 10
51
+
52
+
53
+
54
+
55
+ USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36"
@@ -0,0 +1,54 @@
1
+
2
+
3
+ from re_common.vip.baseencodeid import BaseLngid
4
+
5
+
6
+ def encode_id():
7
+ """"
8
+ 由 sub_db_id 和 rawid 得到 lngid。
9
+ case_insensitive: 标识源网站的 rawid 是否区分大小写
10
+ """
11
+
12
+ sub_db_id = "1"
13
+ rawid = "A44cbe4375b431741"
14
+ b = BaseLngid()
15
+ print("********区分大小写************")
16
+ lngid = b.GetLngid(sub_db_id,
17
+ rawid,
18
+ case_insensitive=True)
19
+ print(lngid)
20
+ print("********不区分大小写************")
21
+ lngid = b.GetLngid(sub_db_id,
22
+ rawid,
23
+ case_insensitive=False)
24
+ print(lngid)
25
+
26
+ def decode_id():
27
+ """
28
+ limited_id 是由 lngid去除sub_db_id后得到的字符串
29
+ limited_id超过20长度时,为不可逆的
30
+ :return:
31
+ """
32
+ limited_id_big = "F65F41FFCF049A3A21516C5CDFE40A22"
33
+ limited_id_small = "HLXMH9XNH5XMJC04HHXMJDG2J919Y"
34
+ limited_id_err = "123456" * 20
35
+ b = BaseLngid()
36
+ print("********区分大小写************")
37
+ rawid = b.GetRawid(limited_id_big,
38
+ case_insensitive=True)
39
+ print(rawid)
40
+ print("********不区分大小写************")
41
+ rawid = b.GetRawid(limited_id_small,
42
+ case_insensitive=False)
43
+ print(rawid)
44
+ print("*******limited_id超过20长度时*******")
45
+ rawid = b.GetRawid(limited_id_err,
46
+ case_insensitive=False)
47
+
48
+
49
+
50
+
51
+
52
+ if __name__ == '__main__':
53
+ # encode_id()
54
+ decode_id()