re-common 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/v2/baselibrary/tools/search_hash_tools.py +33 -0
- re_common/v2/baselibrary/tools/text_matcher.py +223 -0
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +2 -2
- re_common/v2/baselibrary/utils/author_smi.py +308 -0
- re_common/v2/baselibrary/utils/string_clear.py +15 -1
- re_common/v2/baselibrary/utils/stringutils.py +36 -1
- {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/METADATA +1 -1
- re_common-2.0.1.dist-info/RECORD +25 -0
- re_common/baselibrary/__init__.py +0 -4
- re_common/baselibrary/baseabs/__init__.py +0 -7
- re_common/baselibrary/baseabs/baseabs.py +0 -26
- re_common/baselibrary/database/mbuilder.py +0 -132
- re_common/baselibrary/database/moudle.py +0 -93
- re_common/baselibrary/database/msqlite3.py +0 -194
- re_common/baselibrary/database/mysql.py +0 -169
- re_common/baselibrary/database/sql_factory.py +0 -26
- re_common/baselibrary/mthread/MThreadingRun.py +0 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +0 -349
- re_common/baselibrary/mthread/__init__.py +0 -3
- re_common/baselibrary/mthread/mythreading.py +0 -695
- re_common/baselibrary/pakge_other/__init__.py +0 -0
- re_common/baselibrary/pakge_other/socks.py +0 -404
- re_common/baselibrary/readconfig/__init__.py +0 -0
- re_common/baselibrary/readconfig/config_factory.py +0 -18
- re_common/baselibrary/readconfig/ini_config.py +0 -317
- re_common/baselibrary/readconfig/toml_config.py +0 -49
- re_common/baselibrary/temporary/__init__.py +0 -0
- re_common/baselibrary/temporary/envdata.py +0 -36
- re_common/baselibrary/tools/__init__.py +0 -0
- re_common/baselibrary/tools/all_requests/__init__.py +0 -0
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +0 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +0 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +0 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +0 -81
- re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +0 -31
- re_common/baselibrary/tools/contrast_db3.py +0 -123
- re_common/baselibrary/tools/copy_file.py +0 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +0 -102
- re_common/baselibrary/tools/foreachgz.py +0 -40
- re_common/baselibrary/tools/get_attr.py +0 -11
- re_common/baselibrary/tools/image_to_pdf.py +0 -62
- re_common/baselibrary/tools/java_code_deal.py +0 -139
- re_common/baselibrary/tools/javacode.py +0 -79
- re_common/baselibrary/tools/mdb_db3.py +0 -48
- re_common/baselibrary/tools/merge_file.py +0 -171
- re_common/baselibrary/tools/merge_gz_file.py +0 -165
- re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +0 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +0 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +0 -38
- re_common/baselibrary/tools/mongo_tools.py +0 -50
- re_common/baselibrary/tools/move_file.py +0 -170
- re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +0 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +0 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +0 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +0 -93
- re_common/baselibrary/tools/mpandas/__init__.py +0 -0
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +0 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +0 -8
- re_common/baselibrary/tools/myparsel.py +0 -104
- re_common/baselibrary/tools/rename_dir_file.py +0 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +0 -398
- re_common/baselibrary/tools/split_line_to_many.py +0 -25
- re_common/baselibrary/tools/stringtodicts.py +0 -33
- re_common/baselibrary/tools/workwechant_bot.py +0 -84
- re_common/baselibrary/utils/__init__.py +0 -0
- re_common/baselibrary/utils/baseaiohttp.py +0 -296
- re_common/baselibrary/utils/baseaiomysql.py +0 -87
- re_common/baselibrary/utils/baseallstep.py +0 -191
- re_common/baselibrary/utils/baseavro.py +0 -19
- re_common/baselibrary/utils/baseboto3.py +0 -291
- re_common/baselibrary/utils/basecsv.py +0 -32
- re_common/baselibrary/utils/basedict.py +0 -133
- re_common/baselibrary/utils/basedir.py +0 -241
- re_common/baselibrary/utils/baseencode.py +0 -351
- re_common/baselibrary/utils/baseencoding.py +0 -29
- re_common/baselibrary/utils/baseesdsl.py +0 -86
- re_common/baselibrary/utils/baseexcel.py +0 -264
- re_common/baselibrary/utils/baseexcept.py +0 -109
- re_common/baselibrary/utils/basefile.py +0 -654
- re_common/baselibrary/utils/baseftp.py +0 -214
- re_common/baselibrary/utils/basegzip.py +0 -60
- re_common/baselibrary/utils/basehdfs.py +0 -135
- re_common/baselibrary/utils/basehttpx.py +0 -268
- re_common/baselibrary/utils/baseip.py +0 -87
- re_common/baselibrary/utils/basejson.py +0 -2
- re_common/baselibrary/utils/baselist.py +0 -32
- re_common/baselibrary/utils/basemotor.py +0 -190
- re_common/baselibrary/utils/basemssql.py +0 -98
- re_common/baselibrary/utils/baseodbc.py +0 -113
- re_common/baselibrary/utils/basepandas.py +0 -302
- re_common/baselibrary/utils/basepeewee.py +0 -11
- re_common/baselibrary/utils/basepika.py +0 -180
- re_common/baselibrary/utils/basepydash.py +0 -143
- re_common/baselibrary/utils/basepymongo.py +0 -230
- re_common/baselibrary/utils/basequeue.py +0 -22
- re_common/baselibrary/utils/baserar.py +0 -57
- re_common/baselibrary/utils/baserequest.py +0 -279
- re_common/baselibrary/utils/baseset.py +0 -8
- re_common/baselibrary/utils/basesmb.py +0 -403
- re_common/baselibrary/utils/basestring.py +0 -382
- re_common/baselibrary/utils/basetime.py +0 -320
- re_common/baselibrary/utils/basetuple.py +0 -0
- re_common/baselibrary/utils/baseurl.py +0 -121
- re_common/baselibrary/utils/basezip.py +0 -57
- re_common/baselibrary/utils/core/__init__.py +0 -8
- re_common/baselibrary/utils/core/bottomutils.py +0 -18
- re_common/baselibrary/utils/core/mdeprecated.py +0 -327
- re_common/baselibrary/utils/core/mlamada.py +0 -16
- re_common/baselibrary/utils/core/msginfo.py +0 -25
- re_common/baselibrary/utils/core/requests_core.py +0 -103
- re_common/baselibrary/utils/fateadm.py +0 -429
- re_common/baselibrary/utils/importfun.py +0 -123
- re_common/baselibrary/utils/mfaker.py +0 -57
- re_common/baselibrary/utils/my_abc/__init__.py +0 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +0 -32
- re_common/baselibrary/utils/mylogger.py +0 -414
- re_common/baselibrary/utils/myredisclient.py +0 -861
- re_common/baselibrary/utils/pipupgrade.py +0 -21
- re_common/baselibrary/utils/ringlist.py +0 -85
- re_common/baselibrary/utils/version_compare.py +0 -36
- re_common/baselibrary/utils/ydmhttp.py +0 -126
- re_common/facade/__init__.py +0 -1
- re_common/facade/lazy_import.py +0 -11
- re_common/facade/loggerfacade.py +0 -25
- re_common/facade/mysqlfacade.py +0 -467
- re_common/facade/now.py +0 -31
- re_common/facade/sqlite3facade.py +0 -257
- re_common/facade/use/__init__.py +0 -0
- re_common/facade/use/mq_use_facade.py +0 -83
- re_common/facade/use/proxy_use_facade.py +0 -20
- re_common/libtest/__init__.py +0 -0
- re_common/libtest/base_dict_test.py +0 -19
- re_common/libtest/baseavro_test.py +0 -13
- re_common/libtest/basefile_test.py +0 -14
- re_common/libtest/basemssql_test.py +0 -77
- re_common/libtest/baseodbc_test.py +0 -8
- re_common/libtest/basepandas_test.py +0 -38
- re_common/libtest/get_attr_test/__init__.py +0 -0
- re_common/libtest/get_attr_test/get_attr_test_settings.py +0 -14
- re_common/libtest/get_attr_test/settings.py +0 -55
- re_common/libtest/idencode_test.py +0 -54
- re_common/libtest/iniconfig_test.py +0 -35
- re_common/libtest/ip_test.py +0 -35
- re_common/libtest/merge_file_test.py +0 -20
- re_common/libtest/mfaker_test.py +0 -9
- re_common/libtest/mm3_test.py +0 -32
- re_common/libtest/mylogger_test.py +0 -89
- re_common/libtest/myparsel_test.py +0 -28
- re_common/libtest/mysql_test.py +0 -151
- re_common/libtest/pymongo_test.py +0 -21
- re_common/libtest/split_test.py +0 -12
- re_common/libtest/sqlite3_merge_test.py +0 -6
- re_common/libtest/sqlite3_test.py +0 -34
- re_common/libtest/tomlconfig_test.py +0 -30
- re_common/libtest/use_tools_test/__init__.py +0 -3
- re_common/libtest/user/__init__.py +0 -5
- re_common/studio/__init__.py +0 -5
- re_common/studio/assignment_expressions.py +0 -37
- re_common/studio/mydash/__init__.py +0 -0
- re_common/studio/mydash/test1.py +0 -19
- re_common/studio/pydashstudio/__init__.py +0 -0
- re_common/studio/pydashstudio/first.py +0 -9
- re_common/studio/streamlitstudio/__init__.py +0 -0
- re_common/studio/streamlitstudio/first_app.py +0 -66
- re_common/studio/streamlitstudio/uber_pickups.py +0 -24
- re_common/studio/test.py +0 -19
- re_common/vip/__init__.py +0 -0
- re_common/vip/base_step_process.py +0 -11
- re_common/vip/baseencodeid.py +0 -91
- re_common/vip/changetaskname.py +0 -28
- re_common/vip/core_var.py +0 -24
- re_common/vip/mmh3Hash.py +0 -90
- re_common/vip/proxy/__init__.py +0 -0
- re_common/vip/proxy/allproxys.py +0 -127
- re_common/vip/proxy/allproxys_thread.py +0 -159
- re_common/vip/proxy/cnki_proxy.py +0 -153
- re_common/vip/proxy/kuaidaili.py +0 -87
- re_common/vip/proxy/proxy_all.py +0 -113
- re_common/vip/proxy/update_kuaidaili_0.py +0 -42
- re_common/vip/proxy/wanfang_proxy.py +0 -152
- re_common/vip/proxy/wp_proxy_all.py +0 -182
- re_common/vip/read_rawid_to_txt.py +0 -92
- re_common/vip/title/__init__.py +0 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +0 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +0 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +0 -196
- re_common/vip/title/transform/TransformJournalTitleToZt.py +0 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +0 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +0 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +0 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +0 -135
- re_common/vip/title/transform/__init__.py +0 -11
- re_common-2.0.0.dist-info/RECORD +0 -209
- /re_common/{baselibrary/database/__init__.py → v2/baselibrary/tools/list_tools.py} +0 -0
- {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/LICENSE +0 -0
- {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/WHEEL +0 -0
- {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/top_level.txt +0 -0
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
# db3数据的连接返回一个连接对象
|
|
2
|
-
import sys
|
|
3
|
-
import traceback
|
|
4
|
-
|
|
5
|
-
from re_common.baselibrary import MLogger
|
|
6
|
-
from re_common.baselibrary.baseabs import BaseAbs
|
|
7
|
-
from re_common.baselibrary.database.mbuilder import Sqlite3Builder
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class Sqlite3Utiles(object):
|
|
11
|
-
def __init__(self, logger=None):
|
|
12
|
-
self.sqllite3 = None
|
|
13
|
-
self.conn = None
|
|
14
|
-
self.cursor = None
|
|
15
|
-
|
|
16
|
-
self._logger = logger
|
|
17
|
-
|
|
18
|
-
@property
|
|
19
|
-
def logger(self):
|
|
20
|
-
if self._logger is None:
|
|
21
|
-
return MLogger().streamlogger
|
|
22
|
-
return self._logger
|
|
23
|
-
|
|
24
|
-
@logger.setter
|
|
25
|
-
def logger(self, value):
|
|
26
|
-
assert isinstance(value, MLogger)
|
|
27
|
-
self._logger = value
|
|
28
|
-
|
|
29
|
-
def get_new_cursor(self):
|
|
30
|
-
"""
|
|
31
|
-
获取一个新的游标
|
|
32
|
-
:return:
|
|
33
|
-
"""
|
|
34
|
-
# 检查db的存在及是否断掉 不知道为什么 不可以加括号 但编辑器会警告
|
|
35
|
-
return self.conn.cursor()
|
|
36
|
-
|
|
37
|
-
def Sqlite3DBConnectFromFilePath(self, dbfilepath, encoding="gbk", timeout=60):
|
|
38
|
-
"""
|
|
39
|
-
通过直接文件连接 我使用Hadoop导下来的db3是gbk编码的
|
|
40
|
-
如果为utf-8编码请改为utf-8
|
|
41
|
-
:param sec: section
|
|
42
|
-
:return:
|
|
43
|
-
"""
|
|
44
|
-
sqllite3 = BaseAbs.get_sql_factory().sqlite_factory()
|
|
45
|
-
# 传入的是配置文件和section选项 dbpath为配置文件路径
|
|
46
|
-
sqlite3_moudle = Sqlite3Builder().build_file_path(dbfilepath).build_timeout(timeout).get_moudle()
|
|
47
|
-
sqllite3.link(sqlite3_moudle)
|
|
48
|
-
# 设置txt的编码格式 hadoop 上的db3为gbk 默认为utf8
|
|
49
|
-
sqllite3.set_encoding(encoding=encoding)
|
|
50
|
-
# 返回一个连接
|
|
51
|
-
self.sqllite3 = sqllite3
|
|
52
|
-
self.conn = self.sqllite3.db
|
|
53
|
-
return self
|
|
54
|
-
|
|
55
|
-
def Sqlite3DBConnectFromConfig(self, cobnfigfilepath, sec, encoding="gbk"):
|
|
56
|
-
"""
|
|
57
|
-
通过配置文件连接
|
|
58
|
-
:param sec: section
|
|
59
|
-
:return:
|
|
60
|
-
"""
|
|
61
|
-
sqllite3 = BaseAbs.get_sql_factory().sqlite_factory()
|
|
62
|
-
# 传入的是配置文件和section选项 dbpath为配置文件路径
|
|
63
|
-
sqlite3_moudle = Sqlite3Builder(cobnfigfilepath, sec).build_all().get_moudle()
|
|
64
|
-
sqllite3.link(sqlite3_moudle)
|
|
65
|
-
# 设置txt的编码格式 hadoop 上的db3为gbk 默认为utf8
|
|
66
|
-
sqllite3.set_encoding(encoding=encoding)
|
|
67
|
-
# 返回一个连接
|
|
68
|
-
self.sqllite3 = sqllite3
|
|
69
|
-
self.conn = self.sqllite3.db
|
|
70
|
-
return self
|
|
71
|
-
|
|
72
|
-
def ExeSqlliteList(self, sqlList, errExit=True):
|
|
73
|
-
"""
|
|
74
|
-
该函数和上面一样执行一个sql列表且不返回结果
|
|
75
|
-
属于插入和更新类函数 但该函数针对db3
|
|
76
|
-
:param sqllitename:
|
|
77
|
-
:param sqlList:
|
|
78
|
-
:return:
|
|
79
|
-
"""
|
|
80
|
-
dbMsg = None
|
|
81
|
-
cur = self.get_new_cursor()
|
|
82
|
-
if self.conn:
|
|
83
|
-
count = 0
|
|
84
|
-
for sql in sqlList:
|
|
85
|
-
count += 1
|
|
86
|
-
try:
|
|
87
|
-
self.logger.info("{} 执行sql数量:{}".format(sql, str(count)))
|
|
88
|
-
cur.execute(sql)
|
|
89
|
-
except:
|
|
90
|
-
self.logger.error('*errSql:' + sql)
|
|
91
|
-
dbMsg = '*InsertError:' + traceback.format_exc()
|
|
92
|
-
if errExit:
|
|
93
|
-
self.logger.error(dbMsg)
|
|
94
|
-
sys.exit()
|
|
95
|
-
if dbMsg:
|
|
96
|
-
self.logger.error(dbMsg)
|
|
97
|
-
continue
|
|
98
|
-
self.conn.commit()
|
|
99
|
-
cur.close()
|
|
100
|
-
|
|
101
|
-
def ExeSqlliteMany(self, sql, itermany, errExit=True):
|
|
102
|
-
dbMsg = None
|
|
103
|
-
cur = self.get_new_cursor()
|
|
104
|
-
if self.conn:
|
|
105
|
-
try:
|
|
106
|
-
self.logger.info("{}\n{}".format(sql, str(itermany)))
|
|
107
|
-
cur.executemany(sql, itermany)
|
|
108
|
-
except:
|
|
109
|
-
self.logger.error('*errSql:' + sql)
|
|
110
|
-
dbMsg = '*InsertError:' + traceback.format_exc()
|
|
111
|
-
if errExit:
|
|
112
|
-
self.logger.error(dbMsg)
|
|
113
|
-
sys.exit()
|
|
114
|
-
if dbMsg:
|
|
115
|
-
self.logger.error(dbMsg)
|
|
116
|
-
self.conn.commit()
|
|
117
|
-
cur.close()
|
|
118
|
-
|
|
119
|
-
def ExeSqlliteSql(self, sql):
|
|
120
|
-
"""
|
|
121
|
-
该函数和上面一样执行一个sql列表且不返回结果
|
|
122
|
-
属于插入和更新类函数 但该函数针对db3
|
|
123
|
-
:param sqllitename:
|
|
124
|
-
:param sqlList:
|
|
125
|
-
:return:
|
|
126
|
-
"""
|
|
127
|
-
dbMsg = None
|
|
128
|
-
cur = self.get_new_cursor()
|
|
129
|
-
if self.conn:
|
|
130
|
-
try:
|
|
131
|
-
self.logger.info(sql)
|
|
132
|
-
cur.execute(sql)
|
|
133
|
-
self.conn.commit()
|
|
134
|
-
except:
|
|
135
|
-
self.logger.error('*errSql:' + sql)
|
|
136
|
-
dbMsg = '*InsertError:' + traceback.format_exc()
|
|
137
|
-
if dbMsg:
|
|
138
|
-
self.logger.error(dbMsg)
|
|
139
|
-
return False
|
|
140
|
-
else:
|
|
141
|
-
return False
|
|
142
|
-
cur.close()
|
|
143
|
-
return True
|
|
144
|
-
|
|
145
|
-
def SelectFromSqlliteFetchall(self, sql):
|
|
146
|
-
"""
|
|
147
|
-
该函数和上面一样执行一个sql列表且不返回结果
|
|
148
|
-
属于插入和更新类函数 但该函数针对db3
|
|
149
|
-
:param sqllitename:
|
|
150
|
-
:param sqlList:
|
|
151
|
-
:return:
|
|
152
|
-
"""
|
|
153
|
-
cur = self.get_new_cursor()
|
|
154
|
-
if self.conn:
|
|
155
|
-
try:
|
|
156
|
-
self.logger.info(sql)
|
|
157
|
-
cur.execute(sql)
|
|
158
|
-
rows = cur.fetchall()
|
|
159
|
-
return rows
|
|
160
|
-
except:
|
|
161
|
-
self.logger.error('*errSql:' + sql)
|
|
162
|
-
dbMsg = '*InsertError:' + traceback.format_exc()
|
|
163
|
-
if dbMsg:
|
|
164
|
-
self.logger.error(dbMsg)
|
|
165
|
-
return False
|
|
166
|
-
else:
|
|
167
|
-
return False
|
|
168
|
-
cur.close()
|
|
169
|
-
|
|
170
|
-
def SelectFromSqlliteFetchall_dicts(self, sql):
|
|
171
|
-
"""
|
|
172
|
-
该函数和上面一样执行一个sql列表且不返回结果
|
|
173
|
-
属于插入和更新类函数 但该函数针对db3
|
|
174
|
-
:param sqllitename:
|
|
175
|
-
:param sqlList:
|
|
176
|
-
:return:
|
|
177
|
-
"""
|
|
178
|
-
|
|
179
|
-
def dict_factory(cursor, row):
|
|
180
|
-
d = {}
|
|
181
|
-
for idx, col in enumerate(cursor.description):
|
|
182
|
-
d[col[0]] = row[idx]
|
|
183
|
-
return d
|
|
184
|
-
|
|
185
|
-
self.conn.row_factory = dict_factory
|
|
186
|
-
cur = self.get_new_cursor()
|
|
187
|
-
if self.conn:
|
|
188
|
-
try:
|
|
189
|
-
self.logger.info(sql)
|
|
190
|
-
cur.execute(sql)
|
|
191
|
-
rows = cur.fetchall()
|
|
192
|
-
return rows
|
|
193
|
-
except:
|
|
194
|
-
self.logger.error('*errSql:' + sql)
|
|
195
|
-
dbMsg = '*InsertError:' + traceback.format_exc()
|
|
196
|
-
if dbMsg:
|
|
197
|
-
self.logger.error(dbMsg)
|
|
198
|
-
return False
|
|
199
|
-
else:
|
|
200
|
-
return False
|
|
201
|
-
cur.close()
|
|
202
|
-
|
|
203
|
-
def SelectFromSqlliteFetchOne(self, sql):
|
|
204
|
-
"""
|
|
205
|
-
该函数和上面一样执行一个sql列表且不返回结果
|
|
206
|
-
属于插入和更新类函数 但该函数针对db3
|
|
207
|
-
:param sqllitename:
|
|
208
|
-
:param sqlList:
|
|
209
|
-
:return:
|
|
210
|
-
"""
|
|
211
|
-
dbMsg = None
|
|
212
|
-
cur = self.get_new_cursor()
|
|
213
|
-
if self.conn:
|
|
214
|
-
try:
|
|
215
|
-
self.logger.info(sql)
|
|
216
|
-
cur.execute(sql)
|
|
217
|
-
while True:
|
|
218
|
-
row = cur.fetchone()
|
|
219
|
-
if row is None:
|
|
220
|
-
return None
|
|
221
|
-
else:
|
|
222
|
-
yield row
|
|
223
|
-
except:
|
|
224
|
-
self.logger.error('*errSql:' + sql)
|
|
225
|
-
dbMsg = '*InsertError:' + traceback.format_exc()
|
|
226
|
-
if dbMsg:
|
|
227
|
-
self.logger.error(dbMsg)
|
|
228
|
-
return False
|
|
229
|
-
else:
|
|
230
|
-
return False
|
|
231
|
-
cur.close()
|
|
232
|
-
|
|
233
|
-
def ExeVACUUM(self):
|
|
234
|
-
"""
|
|
235
|
-
清理空间
|
|
236
|
-
:return:
|
|
237
|
-
"""
|
|
238
|
-
dbMsg = None
|
|
239
|
-
if self.conn:
|
|
240
|
-
try:
|
|
241
|
-
self.conn.execute("VACUUM")
|
|
242
|
-
except:
|
|
243
|
-
dbMsg = '*VACUUMError:' + traceback.format_exc()
|
|
244
|
-
if dbMsg:
|
|
245
|
-
self.logger.error(dbMsg)
|
|
246
|
-
return False
|
|
247
|
-
else:
|
|
248
|
-
return False
|
|
249
|
-
return True
|
|
250
|
-
|
|
251
|
-
def sqliteEscape(self, keyWord):
|
|
252
|
-
keyWord = keyWord.replace("'", "''")
|
|
253
|
-
keyWord = keyWord.replace("\\", "\\\\")
|
|
254
|
-
return keyWord
|
|
255
|
-
|
|
256
|
-
def close(self):
|
|
257
|
-
self.conn.close()
|
re_common/facade/use/__init__.py
DELETED
|
File without changes
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import traceback
|
|
3
|
-
|
|
4
|
-
from re_common.baselibrary.utils.basepika import BasePika
|
|
5
|
-
from retry import retry
|
|
6
|
-
|
|
7
|
-
logging_logger = logging.getLogger(__name__)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class UseMq(object):
|
|
11
|
-
|
|
12
|
-
def __init__(self, queue, qos=1):
|
|
13
|
-
self.queue = queue
|
|
14
|
-
self.qos = qos
|
|
15
|
-
self.basepika = BasePika()
|
|
16
|
-
self.basepika.set_default()
|
|
17
|
-
self.basepika.connect()
|
|
18
|
-
self.basepika.create_channel()
|
|
19
|
-
self.basepika.queue_declare(queue=queue, durable=True)
|
|
20
|
-
self.basepika.basic_qos(qos)
|
|
21
|
-
self.properties = self.basepika.get_properties()
|
|
22
|
-
|
|
23
|
-
def re_conn(self):
|
|
24
|
-
"""
|
|
25
|
-
重新连接
|
|
26
|
-
:return:
|
|
27
|
-
"""
|
|
28
|
-
self.basepika.connect()
|
|
29
|
-
self.basepika.create_channel()
|
|
30
|
-
self.basepika.queue_declare(queue=self.queue, durable=True)
|
|
31
|
-
self.basepika.basic_qos(self.qos)
|
|
32
|
-
|
|
33
|
-
@retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
|
|
34
|
-
def get_mq(self):
|
|
35
|
-
try:
|
|
36
|
-
if self.basepika.channel.is_closed:
|
|
37
|
-
logging_logger.info("重连中......")
|
|
38
|
-
self.re_conn()
|
|
39
|
-
logging_logger.info("重连完成......")
|
|
40
|
-
self.basepika.set_get_msg_callback(routing_key=self.queue, callback=self.callback, auto_ack=False)
|
|
41
|
-
self.basepika.start_get_msg()
|
|
42
|
-
except:
|
|
43
|
-
traceback.print_exc()
|
|
44
|
-
logging_logger.info("重连中......")
|
|
45
|
-
self.re_conn()
|
|
46
|
-
|
|
47
|
-
def callback(self, ch, method, properties, body):
|
|
48
|
-
# print(type(body))
|
|
49
|
-
# print(" [x] Received %r" % body)
|
|
50
|
-
# body = body.decode()
|
|
51
|
-
self.callback2(ch, method, properties, body)
|
|
52
|
-
if self.basepika.auto_ack is False:
|
|
53
|
-
self.basepika.basic_ack(ch, method)
|
|
54
|
-
|
|
55
|
-
def callback2(self, ch, method, properties, body):
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
@retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
|
|
59
|
-
def send_mq(self, body, num=100):
|
|
60
|
-
try:
|
|
61
|
-
if self.basepika.get_queue_size(self.queue) < num:
|
|
62
|
-
self.basepika.easy_send_msg(routing_key=self.queue,
|
|
63
|
-
body=body,
|
|
64
|
-
properties=self.properties)
|
|
65
|
-
return True
|
|
66
|
-
else:
|
|
67
|
-
return False
|
|
68
|
-
except:
|
|
69
|
-
traceback.print_exc()
|
|
70
|
-
logging_logger.info("重连中......")
|
|
71
|
-
self.re_conn()
|
|
72
|
-
return False
|
|
73
|
-
|
|
74
|
-
def get_server_mq_num(self, num=100):
|
|
75
|
-
if self.basepika.get_queue_size(self.queue) < num:
|
|
76
|
-
return True
|
|
77
|
-
else:
|
|
78
|
-
return False
|
|
79
|
-
|
|
80
|
-
def easy_send_mq(self, body):
|
|
81
|
-
self.basepika.easy_send_msg(routing_key=self.queue,
|
|
82
|
-
body=body,
|
|
83
|
-
properties=self.properties)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
import time
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def set_school_list_proxy(self):
|
|
5
|
-
"""
|
|
6
|
-
使用该函数条件, self为类的对象
|
|
7
|
-
代理在 proxyset set里
|
|
8
|
-
使用的是bshttpx这个变量
|
|
9
|
-
"""
|
|
10
|
-
try:
|
|
11
|
-
proxy = self.proxyset.pop()
|
|
12
|
-
self.logger.info("proxy is:{},proxy size is: {}".format(proxy, len(self.proxyset)))
|
|
13
|
-
return self.bshttpx.creat_sn(proxy=proxy,
|
|
14
|
-
headers=self.headers,
|
|
15
|
-
verify=False)
|
|
16
|
-
except KeyError as e:
|
|
17
|
-
time.sleep(15)
|
|
18
|
-
if str(e) == "'pop from an empty set'":
|
|
19
|
-
self.proxyset = set(self.school_proxy_list)
|
|
20
|
-
return self.set_school_list_proxy()
|
re_common/libtest/__init__.py
DELETED
|
File without changes
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
from re_common.baselibrary import BaseDicts
|
|
2
|
-
from re_common.baselibrary.utils.mfaker import MFaker
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def test_basedict_sortkeys():
|
|
6
|
-
fake = MFaker()
|
|
7
|
-
dicts = fake.create_data(MFaker.m_pydict, **fake.py_para())
|
|
8
|
-
print(dicts)
|
|
9
|
-
dicts2 = BaseDicts.sortkeys(dicts)
|
|
10
|
-
print(dicts2)
|
|
11
|
-
print(dicts)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def test_basedict_sortvalues():
|
|
15
|
-
dicts = {'data': 'FDZqqOGNMyGJlNRoCsJd', 'participant': 'petersonadrienne@bennett.com',
|
|
16
|
-
'often': 'KnJSSDeSTPboiwjSdGwR', 'friend': '1639', 'above': '8144', 'in': '1614'}
|
|
17
|
-
dicts2 = BaseDicts.sortvalues(dicts)
|
|
18
|
-
print(dicts2)
|
|
19
|
-
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from re_common.baselibrary.utils.baseavro import BaseAvro
|
|
2
|
-
|
|
3
|
-
id_set = set()
|
|
4
|
-
for line in BaseAvro().read_line_yeild(r"F:\fun2\avro"):
|
|
5
|
-
id_set.add(line["key"])
|
|
6
|
-
|
|
7
|
-
print(len(id_set))
|
|
8
|
-
lines = ""
|
|
9
|
-
|
|
10
|
-
with open(r"F:\fun2\avro1.txt", 'w', encoding="utf-8") as f:
|
|
11
|
-
for id in id_set:
|
|
12
|
-
lines = id + "\n"
|
|
13
|
-
f.write(lines)
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
from re_common.baselibrary.utils.basedir import BaseDir
|
|
2
|
-
from re_common.baselibrary.utils.basefile import BaseFile
|
|
3
|
-
|
|
4
|
-
# files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 3)
|
|
5
|
-
# for file_line in files_line_list:
|
|
6
|
-
# # file_line = str(file_line, encoding="utf-8")
|
|
7
|
-
# # file_line = file_line.decode(encoding="utf-8")
|
|
8
|
-
# print(file_line)
|
|
9
|
-
|
|
10
|
-
for file in BaseDir.get_dir_all_files(r"F:\db3\mysql_date\data_dir"):
|
|
11
|
-
files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 11000)
|
|
12
|
-
strs = "\n".join(files_line_list)
|
|
13
|
-
BaseFile.single_add_file(r"F:\db3\mysql_date\end\part-00000", strs + "\n")
|
|
14
|
-
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
###########################################
|
|
2
|
-
# 同项目调用基础包
|
|
3
|
-
import datetime
|
|
4
|
-
import gzip
|
|
5
|
-
import json
|
|
6
|
-
import os
|
|
7
|
-
import sys
|
|
8
|
-
import time
|
|
9
|
-
|
|
10
|
-
filepath = os.path.abspath(__file__)
|
|
11
|
-
pathlist = filepath.split(os.sep)
|
|
12
|
-
pathlist = pathlist[:-3]
|
|
13
|
-
TopPath = os.sep.join(pathlist)
|
|
14
|
-
sys.path.insert(0, TopPath)
|
|
15
|
-
print(TopPath)
|
|
16
|
-
############################################
|
|
17
|
-
|
|
18
|
-
from re_common.baselibrary.utils.basemssql import BaseMsSql
|
|
19
|
-
from re_common.baselibrary.utils.basefile import BaseFile
|
|
20
|
-
from re_common.baselibrary.utils.basetime import BaseTime
|
|
21
|
-
bt = BaseTime()
|
|
22
|
-
|
|
23
|
-
host = "127.0.0.1"
|
|
24
|
-
user = "sa"
|
|
25
|
-
pwd = "xujiang1994323"
|
|
26
|
-
db = "patData"
|
|
27
|
-
charset = "utf8"
|
|
28
|
-
|
|
29
|
-
basemssql = BaseMsSql(host, user, pwd, db, charset, as_dict=True)
|
|
30
|
-
basemssql.conn()
|
|
31
|
-
basemssql.exec_select_query("select * from [dbo].[New_pattzb]")
|
|
32
|
-
outPathFile = r"F:\db3\patnetjson_big\jss_patent.big_json.gz"
|
|
33
|
-
i = 0
|
|
34
|
-
size = 10000
|
|
35
|
-
count = 2000000
|
|
36
|
-
outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
|
|
37
|
-
start = time.time()
|
|
38
|
-
end = False
|
|
39
|
-
while True:
|
|
40
|
-
with gzip.open(outPathFile, 'wb') as f:
|
|
41
|
-
while True:
|
|
42
|
-
a = basemssql.cur.fetchmany(size=size)
|
|
43
|
-
if not a:
|
|
44
|
-
print(i)
|
|
45
|
-
print(int(time.time() - start))
|
|
46
|
-
print("break")
|
|
47
|
-
end = True
|
|
48
|
-
break
|
|
49
|
-
for row in a:
|
|
50
|
-
dicts = {}
|
|
51
|
-
for k,v in row.items():
|
|
52
|
-
# print(k)
|
|
53
|
-
try:
|
|
54
|
-
v = v.encode('latin-1').decode('gbk')
|
|
55
|
-
except:
|
|
56
|
-
pass
|
|
57
|
-
if isinstance(v, datetime.datetime):
|
|
58
|
-
v = bt.datetime_to_string(v, "%Y-%m-%d %H:%M:%S")
|
|
59
|
-
dicts[k] = v
|
|
60
|
-
# print(type(v))
|
|
61
|
-
# print(dicts)
|
|
62
|
-
line = json.dumps(dicts,ensure_ascii=False)+"\n"
|
|
63
|
-
# print(line)
|
|
64
|
-
lines = line.encode(encoding="utf8")
|
|
65
|
-
f.write(lines)
|
|
66
|
-
i = i + size
|
|
67
|
-
print(i)
|
|
68
|
-
print(int(time.time() - start))
|
|
69
|
-
if i >= count:
|
|
70
|
-
outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
|
|
71
|
-
i=0
|
|
72
|
-
break
|
|
73
|
-
if end:
|
|
74
|
-
break
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
from re_common.baselibrary.utils.baseodbc import BaseODBC
|
|
2
|
-
|
|
3
|
-
baseodbc = BaseODBC(r"C:\Users\xuzhu\Desktop\DB_20200701_GB.mdb")
|
|
4
|
-
# baseodbc = BaseODBC(r"D:\download\cnki_qk\download\get_journal\mdb\cnki期刊信息_20200315.mdb")
|
|
5
|
-
baseodbc.get_cur()
|
|
6
|
-
sql = "select * from `CN`"
|
|
7
|
-
for row in baseodbc.select(sql):
|
|
8
|
-
print(row)
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import time
|
|
2
|
-
|
|
3
|
-
from pandas._libs.tslibs.timestamps import Timestamp
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def test_dataform_to_numpy():
|
|
7
|
-
import pandas as pd
|
|
8
|
-
import numpy as np
|
|
9
|
-
dicts = {'A': 1.,
|
|
10
|
-
'B': pd.Timestamp('20130102'),
|
|
11
|
-
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
|
|
12
|
-
'D': np.array([3] * 4, dtype='int32'),
|
|
13
|
-
'E': pd.Categorical(["test", "train", "test", "train"]),
|
|
14
|
-
'F': 'foo'}
|
|
15
|
-
from re_common.baselibrary.utils.basepandas import BasePandas
|
|
16
|
-
bp = BasePandas()
|
|
17
|
-
startTime = time.time()
|
|
18
|
-
df = bp.dicts_to_dataform(dicts)
|
|
19
|
-
# 数据类型一致时速度会很快
|
|
20
|
-
print(bp.dataform_to_numpy(df))
|
|
21
|
-
endTime = time.time()
|
|
22
|
-
print(endTime - startTime)
|
|
23
|
-
dicts = {'A': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
24
|
-
'B': [Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
25
|
-
'C': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
26
|
-
'D': [3, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
27
|
-
'E': ['test', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
28
|
-
'F': ['foo', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo']}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
startTime = time.time()
|
|
32
|
-
df = bp.dicts_to_dataform(dicts)
|
|
33
|
-
# 数据类型一致时速度会很快
|
|
34
|
-
print(bp.dataform_to_numpy(df))
|
|
35
|
-
endTime = time.time()
|
|
36
|
-
print(endTime - startTime)
|
|
37
|
-
|
|
38
|
-
test_dataform_to_numpy()
|
|
File without changes
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import sys
|
|
3
|
-
|
|
4
|
-
filepath = os.path.abspath(__file__)
|
|
5
|
-
pathlist = filepath.split(os.sep)
|
|
6
|
-
pathlist = pathlist[:-4]
|
|
7
|
-
TopPath = os.sep.join(pathlist)
|
|
8
|
-
sys.path.insert(0, TopPath)
|
|
9
|
-
print(TopPath)
|
|
10
|
-
|
|
11
|
-
from re_common.libtest.get_attr_test import settings
|
|
12
|
-
from re_common.baselibrary.tools.get_attr import get_attrs
|
|
13
|
-
|
|
14
|
-
print(get_attrs(settings))
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
"""
|
|
6
|
-
CREATE YOUR DEFAULT_CONFIG !
|
|
7
|
-
|
|
8
|
-
Some configuration:
|
|
9
|
-
CONCURRENT_REQUESTS 并发数量
|
|
10
|
-
RETRIES 重试次数
|
|
11
|
-
DOWNLOAD_DELAY 下载延时
|
|
12
|
-
RETRY_DELAY 重试延时
|
|
13
|
-
DOWNLOAD_TIMEOUT 超时限制
|
|
14
|
-
USER_AGENT 用户代理
|
|
15
|
-
LOG_FILE 日志路径
|
|
16
|
-
LOG_LEVEL 日志等级
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
CONCURRENT_REQUESTS = 20
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
MIDDLEWARE = [
|
|
27
|
-
'middlewares.middleware',
|
|
28
|
-
]
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
# PIPELINES = []
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
DEFAULT_REQUEST_CONFIG = {
|
|
37
|
-
"RETRIES": 0,
|
|
38
|
-
"DOWNLOAD_DELAY": 0,
|
|
39
|
-
"RETRY_DELAY": 0,
|
|
40
|
-
"DOWNLOAD_TIMEOUT": 10,
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
# '''生成日志文件'''
|
|
45
|
-
# LOG_FILE = './asyncpy.log'
|
|
46
|
-
# LOG_LEVEL = 'DEBUG'
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
# CLOSESPIDER_TIMEOUT = 10
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36"
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from re_common.vip.baseencodeid import BaseLngid
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def encode_id():
|
|
7
|
-
""""
|
|
8
|
-
由 sub_db_id 和 rawid 得到 lngid。
|
|
9
|
-
case_insensitive: 标识源网站的 rawid 是否区分大小写
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
sub_db_id = "1"
|
|
13
|
-
rawid = "A44cbe4375b431741"
|
|
14
|
-
b = BaseLngid()
|
|
15
|
-
print("********区分大小写************")
|
|
16
|
-
lngid = b.GetLngid(sub_db_id,
|
|
17
|
-
rawid,
|
|
18
|
-
case_insensitive=True)
|
|
19
|
-
print(lngid)
|
|
20
|
-
print("********不区分大小写************")
|
|
21
|
-
lngid = b.GetLngid(sub_db_id,
|
|
22
|
-
rawid,
|
|
23
|
-
case_insensitive=False)
|
|
24
|
-
print(lngid)
|
|
25
|
-
|
|
26
|
-
def decode_id():
|
|
27
|
-
"""
|
|
28
|
-
limited_id 是由 lngid去除sub_db_id后得到的字符串
|
|
29
|
-
limited_id超过20长度时,为不可逆的
|
|
30
|
-
:return:
|
|
31
|
-
"""
|
|
32
|
-
limited_id_big = "F65F41FFCF049A3A21516C5CDFE40A22"
|
|
33
|
-
limited_id_small = "HLXMH9XNH5XMJC04HHXMJDG2J919Y"
|
|
34
|
-
limited_id_err = "123456" * 20
|
|
35
|
-
b = BaseLngid()
|
|
36
|
-
print("********区分大小写************")
|
|
37
|
-
rawid = b.GetRawid(limited_id_big,
|
|
38
|
-
case_insensitive=True)
|
|
39
|
-
print(rawid)
|
|
40
|
-
print("********不区分大小写************")
|
|
41
|
-
rawid = b.GetRawid(limited_id_small,
|
|
42
|
-
case_insensitive=False)
|
|
43
|
-
print(rawid)
|
|
44
|
-
print("*******limited_id超过20长度时*******")
|
|
45
|
-
rawid = b.GetRawid(limited_id_err,
|
|
46
|
-
case_insensitive=False)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
if __name__ == '__main__':
|
|
53
|
-
# encode_id()
|
|
54
|
-
decode_id()
|