re-common 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. re_common/v2/baselibrary/tools/search_hash_tools.py +33 -0
  2. re_common/v2/baselibrary/tools/text_matcher.py +223 -0
  3. re_common/v2/baselibrary/utils/BusinessStringUtil.py +2 -2
  4. re_common/v2/baselibrary/utils/author_smi.py +308 -0
  5. re_common/v2/baselibrary/utils/string_clear.py +15 -1
  6. re_common/v2/baselibrary/utils/stringutils.py +36 -1
  7. {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/METADATA +1 -1
  8. re_common-2.0.1.dist-info/RECORD +25 -0
  9. re_common/baselibrary/__init__.py +0 -4
  10. re_common/baselibrary/baseabs/__init__.py +0 -7
  11. re_common/baselibrary/baseabs/baseabs.py +0 -26
  12. re_common/baselibrary/database/mbuilder.py +0 -132
  13. re_common/baselibrary/database/moudle.py +0 -93
  14. re_common/baselibrary/database/msqlite3.py +0 -194
  15. re_common/baselibrary/database/mysql.py +0 -169
  16. re_common/baselibrary/database/sql_factory.py +0 -26
  17. re_common/baselibrary/mthread/MThreadingRun.py +0 -486
  18. re_common/baselibrary/mthread/MThreadingRunEvent.py +0 -349
  19. re_common/baselibrary/mthread/__init__.py +0 -3
  20. re_common/baselibrary/mthread/mythreading.py +0 -695
  21. re_common/baselibrary/pakge_other/__init__.py +0 -0
  22. re_common/baselibrary/pakge_other/socks.py +0 -404
  23. re_common/baselibrary/readconfig/__init__.py +0 -0
  24. re_common/baselibrary/readconfig/config_factory.py +0 -18
  25. re_common/baselibrary/readconfig/ini_config.py +0 -317
  26. re_common/baselibrary/readconfig/toml_config.py +0 -49
  27. re_common/baselibrary/temporary/__init__.py +0 -0
  28. re_common/baselibrary/temporary/envdata.py +0 -36
  29. re_common/baselibrary/tools/__init__.py +0 -0
  30. re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  31. re_common/baselibrary/tools/all_requests/aiohttp_request.py +0 -118
  32. re_common/baselibrary/tools/all_requests/httpx_requet.py +0 -102
  33. re_common/baselibrary/tools/all_requests/mrequest.py +0 -412
  34. re_common/baselibrary/tools/all_requests/requests_request.py +0 -81
  35. re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  36. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +0 -31
  37. re_common/baselibrary/tools/contrast_db3.py +0 -123
  38. re_common/baselibrary/tools/copy_file.py +0 -39
  39. re_common/baselibrary/tools/db3_2_sizedb3.py +0 -102
  40. re_common/baselibrary/tools/foreachgz.py +0 -40
  41. re_common/baselibrary/tools/get_attr.py +0 -11
  42. re_common/baselibrary/tools/image_to_pdf.py +0 -62
  43. re_common/baselibrary/tools/java_code_deal.py +0 -139
  44. re_common/baselibrary/tools/javacode.py +0 -79
  45. re_common/baselibrary/tools/mdb_db3.py +0 -48
  46. re_common/baselibrary/tools/merge_file.py +0 -171
  47. re_common/baselibrary/tools/merge_gz_file.py +0 -165
  48. re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  49. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +0 -42
  50. re_common/baselibrary/tools/mhdfstools/hdfst.py +0 -42
  51. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +0 -38
  52. re_common/baselibrary/tools/mongo_tools.py +0 -50
  53. re_common/baselibrary/tools/move_file.py +0 -170
  54. re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  55. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +0 -63
  56. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +0 -354
  57. re_common/baselibrary/tools/move_mongo/use_mttf.py +0 -18
  58. re_common/baselibrary/tools/move_mongo/use_mv.py +0 -93
  59. re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  60. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +0 -125
  61. re_common/baselibrary/tools/mpandas/pandas_visualization.py +0 -8
  62. re_common/baselibrary/tools/myparsel.py +0 -104
  63. re_common/baselibrary/tools/rename_dir_file.py +0 -37
  64. re_common/baselibrary/tools/sequoiadb_utils.py +0 -398
  65. re_common/baselibrary/tools/split_line_to_many.py +0 -25
  66. re_common/baselibrary/tools/stringtodicts.py +0 -33
  67. re_common/baselibrary/tools/workwechant_bot.py +0 -84
  68. re_common/baselibrary/utils/__init__.py +0 -0
  69. re_common/baselibrary/utils/baseaiohttp.py +0 -296
  70. re_common/baselibrary/utils/baseaiomysql.py +0 -87
  71. re_common/baselibrary/utils/baseallstep.py +0 -191
  72. re_common/baselibrary/utils/baseavro.py +0 -19
  73. re_common/baselibrary/utils/baseboto3.py +0 -291
  74. re_common/baselibrary/utils/basecsv.py +0 -32
  75. re_common/baselibrary/utils/basedict.py +0 -133
  76. re_common/baselibrary/utils/basedir.py +0 -241
  77. re_common/baselibrary/utils/baseencode.py +0 -351
  78. re_common/baselibrary/utils/baseencoding.py +0 -29
  79. re_common/baselibrary/utils/baseesdsl.py +0 -86
  80. re_common/baselibrary/utils/baseexcel.py +0 -264
  81. re_common/baselibrary/utils/baseexcept.py +0 -109
  82. re_common/baselibrary/utils/basefile.py +0 -654
  83. re_common/baselibrary/utils/baseftp.py +0 -214
  84. re_common/baselibrary/utils/basegzip.py +0 -60
  85. re_common/baselibrary/utils/basehdfs.py +0 -135
  86. re_common/baselibrary/utils/basehttpx.py +0 -268
  87. re_common/baselibrary/utils/baseip.py +0 -87
  88. re_common/baselibrary/utils/basejson.py +0 -2
  89. re_common/baselibrary/utils/baselist.py +0 -32
  90. re_common/baselibrary/utils/basemotor.py +0 -190
  91. re_common/baselibrary/utils/basemssql.py +0 -98
  92. re_common/baselibrary/utils/baseodbc.py +0 -113
  93. re_common/baselibrary/utils/basepandas.py +0 -302
  94. re_common/baselibrary/utils/basepeewee.py +0 -11
  95. re_common/baselibrary/utils/basepika.py +0 -180
  96. re_common/baselibrary/utils/basepydash.py +0 -143
  97. re_common/baselibrary/utils/basepymongo.py +0 -230
  98. re_common/baselibrary/utils/basequeue.py +0 -22
  99. re_common/baselibrary/utils/baserar.py +0 -57
  100. re_common/baselibrary/utils/baserequest.py +0 -279
  101. re_common/baselibrary/utils/baseset.py +0 -8
  102. re_common/baselibrary/utils/basesmb.py +0 -403
  103. re_common/baselibrary/utils/basestring.py +0 -382
  104. re_common/baselibrary/utils/basetime.py +0 -320
  105. re_common/baselibrary/utils/basetuple.py +0 -0
  106. re_common/baselibrary/utils/baseurl.py +0 -121
  107. re_common/baselibrary/utils/basezip.py +0 -57
  108. re_common/baselibrary/utils/core/__init__.py +0 -8
  109. re_common/baselibrary/utils/core/bottomutils.py +0 -18
  110. re_common/baselibrary/utils/core/mdeprecated.py +0 -327
  111. re_common/baselibrary/utils/core/mlamada.py +0 -16
  112. re_common/baselibrary/utils/core/msginfo.py +0 -25
  113. re_common/baselibrary/utils/core/requests_core.py +0 -103
  114. re_common/baselibrary/utils/fateadm.py +0 -429
  115. re_common/baselibrary/utils/importfun.py +0 -123
  116. re_common/baselibrary/utils/mfaker.py +0 -57
  117. re_common/baselibrary/utils/my_abc/__init__.py +0 -3
  118. re_common/baselibrary/utils/my_abc/better_abc.py +0 -32
  119. re_common/baselibrary/utils/mylogger.py +0 -414
  120. re_common/baselibrary/utils/myredisclient.py +0 -861
  121. re_common/baselibrary/utils/pipupgrade.py +0 -21
  122. re_common/baselibrary/utils/ringlist.py +0 -85
  123. re_common/baselibrary/utils/version_compare.py +0 -36
  124. re_common/baselibrary/utils/ydmhttp.py +0 -126
  125. re_common/facade/__init__.py +0 -1
  126. re_common/facade/lazy_import.py +0 -11
  127. re_common/facade/loggerfacade.py +0 -25
  128. re_common/facade/mysqlfacade.py +0 -467
  129. re_common/facade/now.py +0 -31
  130. re_common/facade/sqlite3facade.py +0 -257
  131. re_common/facade/use/__init__.py +0 -0
  132. re_common/facade/use/mq_use_facade.py +0 -83
  133. re_common/facade/use/proxy_use_facade.py +0 -20
  134. re_common/libtest/__init__.py +0 -0
  135. re_common/libtest/base_dict_test.py +0 -19
  136. re_common/libtest/baseavro_test.py +0 -13
  137. re_common/libtest/basefile_test.py +0 -14
  138. re_common/libtest/basemssql_test.py +0 -77
  139. re_common/libtest/baseodbc_test.py +0 -8
  140. re_common/libtest/basepandas_test.py +0 -38
  141. re_common/libtest/get_attr_test/__init__.py +0 -0
  142. re_common/libtest/get_attr_test/get_attr_test_settings.py +0 -14
  143. re_common/libtest/get_attr_test/settings.py +0 -55
  144. re_common/libtest/idencode_test.py +0 -54
  145. re_common/libtest/iniconfig_test.py +0 -35
  146. re_common/libtest/ip_test.py +0 -35
  147. re_common/libtest/merge_file_test.py +0 -20
  148. re_common/libtest/mfaker_test.py +0 -9
  149. re_common/libtest/mm3_test.py +0 -32
  150. re_common/libtest/mylogger_test.py +0 -89
  151. re_common/libtest/myparsel_test.py +0 -28
  152. re_common/libtest/mysql_test.py +0 -151
  153. re_common/libtest/pymongo_test.py +0 -21
  154. re_common/libtest/split_test.py +0 -12
  155. re_common/libtest/sqlite3_merge_test.py +0 -6
  156. re_common/libtest/sqlite3_test.py +0 -34
  157. re_common/libtest/tomlconfig_test.py +0 -30
  158. re_common/libtest/use_tools_test/__init__.py +0 -3
  159. re_common/libtest/user/__init__.py +0 -5
  160. re_common/studio/__init__.py +0 -5
  161. re_common/studio/assignment_expressions.py +0 -37
  162. re_common/studio/mydash/__init__.py +0 -0
  163. re_common/studio/mydash/test1.py +0 -19
  164. re_common/studio/pydashstudio/__init__.py +0 -0
  165. re_common/studio/pydashstudio/first.py +0 -9
  166. re_common/studio/streamlitstudio/__init__.py +0 -0
  167. re_common/studio/streamlitstudio/first_app.py +0 -66
  168. re_common/studio/streamlitstudio/uber_pickups.py +0 -24
  169. re_common/studio/test.py +0 -19
  170. re_common/vip/__init__.py +0 -0
  171. re_common/vip/base_step_process.py +0 -11
  172. re_common/vip/baseencodeid.py +0 -91
  173. re_common/vip/changetaskname.py +0 -28
  174. re_common/vip/core_var.py +0 -24
  175. re_common/vip/mmh3Hash.py +0 -90
  176. re_common/vip/proxy/__init__.py +0 -0
  177. re_common/vip/proxy/allproxys.py +0 -127
  178. re_common/vip/proxy/allproxys_thread.py +0 -159
  179. re_common/vip/proxy/cnki_proxy.py +0 -153
  180. re_common/vip/proxy/kuaidaili.py +0 -87
  181. re_common/vip/proxy/proxy_all.py +0 -113
  182. re_common/vip/proxy/update_kuaidaili_0.py +0 -42
  183. re_common/vip/proxy/wanfang_proxy.py +0 -152
  184. re_common/vip/proxy/wp_proxy_all.py +0 -182
  185. re_common/vip/read_rawid_to_txt.py +0 -92
  186. re_common/vip/title/__init__.py +0 -5
  187. re_common/vip/title/transform/TransformBookTitleToZt.py +0 -125
  188. re_common/vip/title/transform/TransformConferenceTitleToZt.py +0 -139
  189. re_common/vip/title/transform/TransformCstadTitleToZt.py +0 -196
  190. re_common/vip/title/transform/TransformJournalTitleToZt.py +0 -203
  191. re_common/vip/title/transform/TransformPatentTitleToZt.py +0 -132
  192. re_common/vip/title/transform/TransformRegulationTitleToZt.py +0 -114
  193. re_common/vip/title/transform/TransformStandardTitleToZt.py +0 -135
  194. re_common/vip/title/transform/TransformThesisTitleToZt.py +0 -135
  195. re_common/vip/title/transform/__init__.py +0 -11
  196. re_common-2.0.0.dist-info/RECORD +0 -209
  197. /re_common/{baselibrary/database/__init__.py → v2/baselibrary/tools/list_tools.py} +0 -0
  198. {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/LICENSE +0 -0
  199. {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/WHEEL +0 -0
  200. {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,257 +0,0 @@
1
- # db3数据的连接返回一个连接对象
2
- import sys
3
- import traceback
4
-
5
- from re_common.baselibrary import MLogger
6
- from re_common.baselibrary.baseabs import BaseAbs
7
- from re_common.baselibrary.database.mbuilder import Sqlite3Builder
8
-
9
-
10
- class Sqlite3Utiles(object):
11
- def __init__(self, logger=None):
12
- self.sqllite3 = None
13
- self.conn = None
14
- self.cursor = None
15
-
16
- self._logger = logger
17
-
18
- @property
19
- def logger(self):
20
- if self._logger is None:
21
- return MLogger().streamlogger
22
- return self._logger
23
-
24
- @logger.setter
25
- def logger(self, value):
26
- assert isinstance(value, MLogger)
27
- self._logger = value
28
-
29
- def get_new_cursor(self):
30
- """
31
- 获取一个新的游标
32
- :return:
33
- """
34
- # 检查db的存在及是否断掉 不知道为什么 不可以加括号 但编辑器会警告
35
- return self.conn.cursor()
36
-
37
- def Sqlite3DBConnectFromFilePath(self, dbfilepath, encoding="gbk", timeout=60):
38
- """
39
- 通过直接文件连接 我使用Hadoop导下来的db3是gbk编码的
40
- 如果为utf-8编码请改为utf-8
41
- :param sec: section
42
- :return:
43
- """
44
- sqllite3 = BaseAbs.get_sql_factory().sqlite_factory()
45
- # 传入的是配置文件和section选项 dbpath为配置文件路径
46
- sqlite3_moudle = Sqlite3Builder().build_file_path(dbfilepath).build_timeout(timeout).get_moudle()
47
- sqllite3.link(sqlite3_moudle)
48
- # 设置txt的编码格式 hadoop 上的db3为gbk 默认为utf8
49
- sqllite3.set_encoding(encoding=encoding)
50
- # 返回一个连接
51
- self.sqllite3 = sqllite3
52
- self.conn = self.sqllite3.db
53
- return self
54
-
55
- def Sqlite3DBConnectFromConfig(self, cobnfigfilepath, sec, encoding="gbk"):
56
- """
57
- 通过配置文件连接
58
- :param sec: section
59
- :return:
60
- """
61
- sqllite3 = BaseAbs.get_sql_factory().sqlite_factory()
62
- # 传入的是配置文件和section选项 dbpath为配置文件路径
63
- sqlite3_moudle = Sqlite3Builder(cobnfigfilepath, sec).build_all().get_moudle()
64
- sqllite3.link(sqlite3_moudle)
65
- # 设置txt的编码格式 hadoop 上的db3为gbk 默认为utf8
66
- sqllite3.set_encoding(encoding=encoding)
67
- # 返回一个连接
68
- self.sqllite3 = sqllite3
69
- self.conn = self.sqllite3.db
70
- return self
71
-
72
- def ExeSqlliteList(self, sqlList, errExit=True):
73
- """
74
- 该函数和上面一样执行一个sql列表且不返回结果
75
- 属于插入和更新类函数 但该函数针对db3
76
- :param sqllitename:
77
- :param sqlList:
78
- :return:
79
- """
80
- dbMsg = None
81
- cur = self.get_new_cursor()
82
- if self.conn:
83
- count = 0
84
- for sql in sqlList:
85
- count += 1
86
- try:
87
- self.logger.info("{} 执行sql数量:{}".format(sql, str(count)))
88
- cur.execute(sql)
89
- except:
90
- self.logger.error('*errSql:' + sql)
91
- dbMsg = '*InsertError:' + traceback.format_exc()
92
- if errExit:
93
- self.logger.error(dbMsg)
94
- sys.exit()
95
- if dbMsg:
96
- self.logger.error(dbMsg)
97
- continue
98
- self.conn.commit()
99
- cur.close()
100
-
101
- def ExeSqlliteMany(self, sql, itermany, errExit=True):
102
- dbMsg = None
103
- cur = self.get_new_cursor()
104
- if self.conn:
105
- try:
106
- self.logger.info("{}\n{}".format(sql, str(itermany)))
107
- cur.executemany(sql, itermany)
108
- except:
109
- self.logger.error('*errSql:' + sql)
110
- dbMsg = '*InsertError:' + traceback.format_exc()
111
- if errExit:
112
- self.logger.error(dbMsg)
113
- sys.exit()
114
- if dbMsg:
115
- self.logger.error(dbMsg)
116
- self.conn.commit()
117
- cur.close()
118
-
119
- def ExeSqlliteSql(self, sql):
120
- """
121
- 该函数和上面一样执行一个sql列表且不返回结果
122
- 属于插入和更新类函数 但该函数针对db3
123
- :param sqllitename:
124
- :param sqlList:
125
- :return:
126
- """
127
- dbMsg = None
128
- cur = self.get_new_cursor()
129
- if self.conn:
130
- try:
131
- self.logger.info(sql)
132
- cur.execute(sql)
133
- self.conn.commit()
134
- except:
135
- self.logger.error('*errSql:' + sql)
136
- dbMsg = '*InsertError:' + traceback.format_exc()
137
- if dbMsg:
138
- self.logger.error(dbMsg)
139
- return False
140
- else:
141
- return False
142
- cur.close()
143
- return True
144
-
145
- def SelectFromSqlliteFetchall(self, sql):
146
- """
147
- 该函数和上面一样执行一个sql列表且不返回结果
148
- 属于插入和更新类函数 但该函数针对db3
149
- :param sqllitename:
150
- :param sqlList:
151
- :return:
152
- """
153
- cur = self.get_new_cursor()
154
- if self.conn:
155
- try:
156
- self.logger.info(sql)
157
- cur.execute(sql)
158
- rows = cur.fetchall()
159
- return rows
160
- except:
161
- self.logger.error('*errSql:' + sql)
162
- dbMsg = '*InsertError:' + traceback.format_exc()
163
- if dbMsg:
164
- self.logger.error(dbMsg)
165
- return False
166
- else:
167
- return False
168
- cur.close()
169
-
170
- def SelectFromSqlliteFetchall_dicts(self, sql):
171
- """
172
- 该函数和上面一样执行一个sql列表且不返回结果
173
- 属于插入和更新类函数 但该函数针对db3
174
- :param sqllitename:
175
- :param sqlList:
176
- :return:
177
- """
178
-
179
- def dict_factory(cursor, row):
180
- d = {}
181
- for idx, col in enumerate(cursor.description):
182
- d[col[0]] = row[idx]
183
- return d
184
-
185
- self.conn.row_factory = dict_factory
186
- cur = self.get_new_cursor()
187
- if self.conn:
188
- try:
189
- self.logger.info(sql)
190
- cur.execute(sql)
191
- rows = cur.fetchall()
192
- return rows
193
- except:
194
- self.logger.error('*errSql:' + sql)
195
- dbMsg = '*InsertError:' + traceback.format_exc()
196
- if dbMsg:
197
- self.logger.error(dbMsg)
198
- return False
199
- else:
200
- return False
201
- cur.close()
202
-
203
- def SelectFromSqlliteFetchOne(self, sql):
204
- """
205
- 该函数和上面一样执行一个sql列表且不返回结果
206
- 属于插入和更新类函数 但该函数针对db3
207
- :param sqllitename:
208
- :param sqlList:
209
- :return:
210
- """
211
- dbMsg = None
212
- cur = self.get_new_cursor()
213
- if self.conn:
214
- try:
215
- self.logger.info(sql)
216
- cur.execute(sql)
217
- while True:
218
- row = cur.fetchone()
219
- if row is None:
220
- return None
221
- else:
222
- yield row
223
- except:
224
- self.logger.error('*errSql:' + sql)
225
- dbMsg = '*InsertError:' + traceback.format_exc()
226
- if dbMsg:
227
- self.logger.error(dbMsg)
228
- return False
229
- else:
230
- return False
231
- cur.close()
232
-
233
- def ExeVACUUM(self):
234
- """
235
- 清理空间
236
- :return:
237
- """
238
- dbMsg = None
239
- if self.conn:
240
- try:
241
- self.conn.execute("VACUUM")
242
- except:
243
- dbMsg = '*VACUUMError:' + traceback.format_exc()
244
- if dbMsg:
245
- self.logger.error(dbMsg)
246
- return False
247
- else:
248
- return False
249
- return True
250
-
251
- def sqliteEscape(self, keyWord):
252
- keyWord = keyWord.replace("'", "''")
253
- keyWord = keyWord.replace("\\", "\\\\")
254
- return keyWord
255
-
256
- def close(self):
257
- self.conn.close()
File without changes
@@ -1,83 +0,0 @@
1
- import logging
2
- import traceback
3
-
4
- from re_common.baselibrary.utils.basepika import BasePika
5
- from retry import retry
6
-
7
- logging_logger = logging.getLogger(__name__)
8
-
9
-
10
- class UseMq(object):
11
-
12
- def __init__(self, queue, qos=1):
13
- self.queue = queue
14
- self.qos = qos
15
- self.basepika = BasePika()
16
- self.basepika.set_default()
17
- self.basepika.connect()
18
- self.basepika.create_channel()
19
- self.basepika.queue_declare(queue=queue, durable=True)
20
- self.basepika.basic_qos(qos)
21
- self.properties = self.basepika.get_properties()
22
-
23
- def re_conn(self):
24
- """
25
- 重新连接
26
- :return:
27
- """
28
- self.basepika.connect()
29
- self.basepika.create_channel()
30
- self.basepika.queue_declare(queue=self.queue, durable=True)
31
- self.basepika.basic_qos(self.qos)
32
-
33
- @retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
34
- def get_mq(self):
35
- try:
36
- if self.basepika.channel.is_closed:
37
- logging_logger.info("重连中......")
38
- self.re_conn()
39
- logging_logger.info("重连完成......")
40
- self.basepika.set_get_msg_callback(routing_key=self.queue, callback=self.callback, auto_ack=False)
41
- self.basepika.start_get_msg()
42
- except:
43
- traceback.print_exc()
44
- logging_logger.info("重连中......")
45
- self.re_conn()
46
-
47
- def callback(self, ch, method, properties, body):
48
- # print(type(body))
49
- # print(" [x] Received %r" % body)
50
- # body = body.decode()
51
- self.callback2(ch, method, properties, body)
52
- if self.basepika.auto_ack is False:
53
- self.basepika.basic_ack(ch, method)
54
-
55
- def callback2(self, ch, method, properties, body):
56
- pass
57
-
58
- @retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
59
- def send_mq(self, body, num=100):
60
- try:
61
- if self.basepika.get_queue_size(self.queue) < num:
62
- self.basepika.easy_send_msg(routing_key=self.queue,
63
- body=body,
64
- properties=self.properties)
65
- return True
66
- else:
67
- return False
68
- except:
69
- traceback.print_exc()
70
- logging_logger.info("重连中......")
71
- self.re_conn()
72
- return False
73
-
74
- def get_server_mq_num(self, num=100):
75
- if self.basepika.get_queue_size(self.queue) < num:
76
- return True
77
- else:
78
- return False
79
-
80
- def easy_send_mq(self, body):
81
- self.basepika.easy_send_msg(routing_key=self.queue,
82
- body=body,
83
- properties=self.properties)
@@ -1,20 +0,0 @@
1
- import time
2
-
3
-
4
- def set_school_list_proxy(self):
5
- """
6
- 使用该函数条件, self为类的对象
7
- 代理在 proxyset set里
8
- 使用的是bshttpx这个变量
9
- """
10
- try:
11
- proxy = self.proxyset.pop()
12
- self.logger.info("proxy is:{},proxy size is: {}".format(proxy, len(self.proxyset)))
13
- return self.bshttpx.creat_sn(proxy=proxy,
14
- headers=self.headers,
15
- verify=False)
16
- except KeyError as e:
17
- time.sleep(15)
18
- if str(e) == "'pop from an empty set'":
19
- self.proxyset = set(self.school_proxy_list)
20
- return self.set_school_list_proxy()
File without changes
@@ -1,19 +0,0 @@
1
- from re_common.baselibrary import BaseDicts
2
- from re_common.baselibrary.utils.mfaker import MFaker
3
-
4
-
5
- def test_basedict_sortkeys():
6
- fake = MFaker()
7
- dicts = fake.create_data(MFaker.m_pydict, **fake.py_para())
8
- print(dicts)
9
- dicts2 = BaseDicts.sortkeys(dicts)
10
- print(dicts2)
11
- print(dicts)
12
-
13
-
14
- def test_basedict_sortvalues():
15
- dicts = {'data': 'FDZqqOGNMyGJlNRoCsJd', 'participant': 'petersonadrienne@bennett.com',
16
- 'often': 'KnJSSDeSTPboiwjSdGwR', 'friend': '1639', 'above': '8144', 'in': '1614'}
17
- dicts2 = BaseDicts.sortvalues(dicts)
18
- print(dicts2)
19
-
@@ -1,13 +0,0 @@
1
- from re_common.baselibrary.utils.baseavro import BaseAvro
2
-
3
- id_set = set()
4
- for line in BaseAvro().read_line_yeild(r"F:\fun2\avro"):
5
- id_set.add(line["key"])
6
-
7
- print(len(id_set))
8
- lines = ""
9
-
10
- with open(r"F:\fun2\avro1.txt", 'w', encoding="utf-8") as f:
11
- for id in id_set:
12
- lines = id + "\n"
13
- f.write(lines)
@@ -1,14 +0,0 @@
1
- from re_common.baselibrary.utils.basedir import BaseDir
2
- from re_common.baselibrary.utils.basefile import BaseFile
3
-
4
- # files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 3)
5
- # for file_line in files_line_list:
6
- # # file_line = str(file_line, encoding="utf-8")
7
- # # file_line = file_line.decode(encoding="utf-8")
8
- # print(file_line)
9
-
10
- for file in BaseDir.get_dir_all_files(r"F:\db3\mysql_date\data_dir"):
11
- files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 11000)
12
- strs = "\n".join(files_line_list)
13
- BaseFile.single_add_file(r"F:\db3\mysql_date\end\part-00000", strs + "\n")
14
-
@@ -1,77 +0,0 @@
1
- ###########################################
2
- # 同项目调用基础包
3
- import datetime
4
- import gzip
5
- import json
6
- import os
7
- import sys
8
- import time
9
-
10
- filepath = os.path.abspath(__file__)
11
- pathlist = filepath.split(os.sep)
12
- pathlist = pathlist[:-3]
13
- TopPath = os.sep.join(pathlist)
14
- sys.path.insert(0, TopPath)
15
- print(TopPath)
16
- ############################################
17
-
18
- from re_common.baselibrary.utils.basemssql import BaseMsSql
19
- from re_common.baselibrary.utils.basefile import BaseFile
20
- from re_common.baselibrary.utils.basetime import BaseTime
21
- bt = BaseTime()
22
-
23
- host = "127.0.0.1"
24
- user = "sa"
25
- pwd = "xujiang1994323"
26
- db = "patData"
27
- charset = "utf8"
28
-
29
- basemssql = BaseMsSql(host, user, pwd, db, charset, as_dict=True)
30
- basemssql.conn()
31
- basemssql.exec_select_query("select * from [dbo].[New_pattzb]")
32
- outPathFile = r"F:\db3\patnetjson_big\jss_patent.big_json.gz"
33
- i = 0
34
- size = 10000
35
- count = 2000000
36
- outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
37
- start = time.time()
38
- end = False
39
- while True:
40
- with gzip.open(outPathFile, 'wb') as f:
41
- while True:
42
- a = basemssql.cur.fetchmany(size=size)
43
- if not a:
44
- print(i)
45
- print(int(time.time() - start))
46
- print("break")
47
- end = True
48
- break
49
- for row in a:
50
- dicts = {}
51
- for k,v in row.items():
52
- # print(k)
53
- try:
54
- v = v.encode('latin-1').decode('gbk')
55
- except:
56
- pass
57
- if isinstance(v, datetime.datetime):
58
- v = bt.datetime_to_string(v, "%Y-%m-%d %H:%M:%S")
59
- dicts[k] = v
60
- # print(type(v))
61
- # print(dicts)
62
- line = json.dumps(dicts,ensure_ascii=False)+"\n"
63
- # print(line)
64
- lines = line.encode(encoding="utf8")
65
- f.write(lines)
66
- i = i + size
67
- print(i)
68
- print(int(time.time() - start))
69
- if i >= count:
70
- outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
71
- i=0
72
- break
73
- if end:
74
- break
75
-
76
-
77
-
@@ -1,8 +0,0 @@
1
- from re_common.baselibrary.utils.baseodbc import BaseODBC
2
-
3
- baseodbc = BaseODBC(r"C:\Users\xuzhu\Desktop\DB_20200701_GB.mdb")
4
- # baseodbc = BaseODBC(r"D:\download\cnki_qk\download\get_journal\mdb\cnki期刊信息_20200315.mdb")
5
- baseodbc.get_cur()
6
- sql = "select * from `CN`"
7
- for row in baseodbc.select(sql):
8
- print(row)
@@ -1,38 +0,0 @@
1
- import time
2
-
3
- from pandas._libs.tslibs.timestamps import Timestamp
4
-
5
-
6
- def test_dataform_to_numpy():
7
- import pandas as pd
8
- import numpy as np
9
- dicts = {'A': 1.,
10
- 'B': pd.Timestamp('20130102'),
11
- 'C': pd.Series(1, index=list(range(4)), dtype='float32'),
12
- 'D': np.array([3] * 4, dtype='int32'),
13
- 'E': pd.Categorical(["test", "train", "test", "train"]),
14
- 'F': 'foo'}
15
- from re_common.baselibrary.utils.basepandas import BasePandas
16
- bp = BasePandas()
17
- startTime = time.time()
18
- df = bp.dicts_to_dataform(dicts)
19
- # 数据类型一致时速度会很快
20
- print(bp.dataform_to_numpy(df))
21
- endTime = time.time()
22
- print(endTime - startTime)
23
- dicts = {'A': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
24
- 'B': [Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
25
- 'C': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
26
- 'D': [3, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
27
- 'E': ['test', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
28
- 'F': ['foo', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo']}
29
-
30
-
31
- startTime = time.time()
32
- df = bp.dicts_to_dataform(dicts)
33
- # 数据类型一致时速度会很快
34
- print(bp.dataform_to_numpy(df))
35
- endTime = time.time()
36
- print(endTime - startTime)
37
-
38
- test_dataform_to_numpy()
File without changes
@@ -1,14 +0,0 @@
1
- import os
2
- import sys
3
-
4
- filepath = os.path.abspath(__file__)
5
- pathlist = filepath.split(os.sep)
6
- pathlist = pathlist[:-4]
7
- TopPath = os.sep.join(pathlist)
8
- sys.path.insert(0, TopPath)
9
- print(TopPath)
10
-
11
- from re_common.libtest.get_attr_test import settings
12
- from re_common.baselibrary.tools.get_attr import get_attrs
13
-
14
- print(get_attrs(settings))
@@ -1,55 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
-
4
-
5
- """
6
- CREATE YOUR DEFAULT_CONFIG !
7
-
8
- Some configuration:
9
- CONCURRENT_REQUESTS 并发数量
10
- RETRIES 重试次数
11
- DOWNLOAD_DELAY 下载延时
12
- RETRY_DELAY 重试延时
13
- DOWNLOAD_TIMEOUT 超时限制
14
- USER_AGENT 用户代理
15
- LOG_FILE 日志路径
16
- LOG_LEVEL 日志等级
17
- """
18
-
19
-
20
- CONCURRENT_REQUESTS = 20
21
-
22
-
23
-
24
-
25
-
26
- MIDDLEWARE = [
27
- 'middlewares.middleware',
28
- ]
29
-
30
-
31
- # PIPELINES = []
32
-
33
-
34
-
35
-
36
- DEFAULT_REQUEST_CONFIG = {
37
- "RETRIES": 0,
38
- "DOWNLOAD_DELAY": 0,
39
- "RETRY_DELAY": 0,
40
- "DOWNLOAD_TIMEOUT": 10,
41
- }
42
-
43
-
44
- # '''生成日志文件'''
45
- # LOG_FILE = './asyncpy.log'
46
- # LOG_LEVEL = 'DEBUG'
47
-
48
-
49
-
50
- # CLOSESPIDER_TIMEOUT = 10
51
-
52
-
53
-
54
-
55
- USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36"
@@ -1,54 +0,0 @@
1
-
2
-
3
- from re_common.vip.baseencodeid import BaseLngid
4
-
5
-
6
- def encode_id():
7
- """"
8
- 由 sub_db_id 和 rawid 得到 lngid。
9
- case_insensitive: 标识源网站的 rawid 是否区分大小写
10
- """
11
-
12
- sub_db_id = "1"
13
- rawid = "A44cbe4375b431741"
14
- b = BaseLngid()
15
- print("********区分大小写************")
16
- lngid = b.GetLngid(sub_db_id,
17
- rawid,
18
- case_insensitive=True)
19
- print(lngid)
20
- print("********不区分大小写************")
21
- lngid = b.GetLngid(sub_db_id,
22
- rawid,
23
- case_insensitive=False)
24
- print(lngid)
25
-
26
- def decode_id():
27
- """
28
- limited_id 是由 lngid去除sub_db_id后得到的字符串
29
- limited_id超过20长度时,为不可逆的
30
- :return:
31
- """
32
- limited_id_big = "F65F41FFCF049A3A21516C5CDFE40A22"
33
- limited_id_small = "HLXMH9XNH5XMJC04HHXMJDG2J919Y"
34
- limited_id_err = "123456" * 20
35
- b = BaseLngid()
36
- print("********区分大小写************")
37
- rawid = b.GetRawid(limited_id_big,
38
- case_insensitive=True)
39
- print(rawid)
40
- print("********不区分大小写************")
41
- rawid = b.GetRawid(limited_id_small,
42
- case_insensitive=False)
43
- print(rawid)
44
- print("*******limited_id超过20长度时*******")
45
- rawid = b.GetRawid(limited_id_err,
46
- case_insensitive=False)
47
-
48
-
49
-
50
-
51
-
52
- if __name__ == '__main__':
53
- # encode_id()
54
- decode_id()