re-common 10.0.22__py3-none-any.whl → 10.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/baselibrary/__init__.py +4 -4
- re_common/baselibrary/baseabs/__init__.py +6 -6
- re_common/baselibrary/baseabs/baseabs.py +26 -26
- re_common/baselibrary/database/mbuilder.py +132 -132
- re_common/baselibrary/database/moudle.py +93 -93
- re_common/baselibrary/database/msqlite3.py +194 -194
- re_common/baselibrary/database/mysql.py +169 -169
- re_common/baselibrary/database/sql_factory.py +26 -26
- re_common/baselibrary/mthread/MThreadingRun.py +486 -486
- re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
- re_common/baselibrary/mthread/__init__.py +2 -2
- re_common/baselibrary/mthread/mythreading.py +695 -695
- re_common/baselibrary/pakge_other/socks.py +404 -404
- re_common/baselibrary/readconfig/config_factory.py +18 -18
- re_common/baselibrary/readconfig/ini_config.py +317 -317
- re_common/baselibrary/readconfig/toml_config.py +49 -49
- re_common/baselibrary/temporary/envdata.py +36 -36
- re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
- re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
- re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
- re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
- re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
- re_common/baselibrary/tools/contrast_db3.py +123 -123
- re_common/baselibrary/tools/copy_file.py +39 -39
- re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
- re_common/baselibrary/tools/foreachgz.py +39 -39
- re_common/baselibrary/tools/get_attr.py +10 -10
- re_common/baselibrary/tools/image_to_pdf.py +61 -61
- re_common/baselibrary/tools/java_code_deal.py +139 -139
- re_common/baselibrary/tools/javacode.py +79 -79
- re_common/baselibrary/tools/mdb_db3.py +48 -48
- re_common/baselibrary/tools/merge_file.py +171 -171
- re_common/baselibrary/tools/merge_gz_file.py +165 -165
- re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
- re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
- re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
- re_common/baselibrary/tools/mongo_tools.py +50 -50
- re_common/baselibrary/tools/move_file.py +170 -170
- re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
- re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
- re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
- re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
- re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
- re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
- re_common/baselibrary/tools/myparsel.py +104 -104
- re_common/baselibrary/tools/rename_dir_file.py +37 -37
- re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
- re_common/baselibrary/tools/split_line_to_many.py +25 -25
- re_common/baselibrary/tools/stringtodicts.py +33 -33
- re_common/baselibrary/tools/workwechant_bot.py +84 -84
- re_common/baselibrary/utils/baseaiohttp.py +296 -296
- re_common/baselibrary/utils/baseaiomysql.py +87 -87
- re_common/baselibrary/utils/baseallstep.py +191 -191
- re_common/baselibrary/utils/baseavro.py +19 -19
- re_common/baselibrary/utils/baseboto3.py +291 -291
- re_common/baselibrary/utils/basecsv.py +32 -32
- re_common/baselibrary/utils/basedict.py +133 -133
- re_common/baselibrary/utils/basedir.py +241 -241
- re_common/baselibrary/utils/baseencode.py +351 -351
- re_common/baselibrary/utils/baseencoding.py +28 -28
- re_common/baselibrary/utils/baseesdsl.py +86 -86
- re_common/baselibrary/utils/baseexcel.py +264 -264
- re_common/baselibrary/utils/baseexcept.py +109 -109
- re_common/baselibrary/utils/basefile.py +654 -654
- re_common/baselibrary/utils/baseftp.py +214 -214
- re_common/baselibrary/utils/basegzip.py +60 -60
- re_common/baselibrary/utils/basehdfs.py +135 -135
- re_common/baselibrary/utils/basehttpx.py +268 -268
- re_common/baselibrary/utils/baseip.py +87 -87
- re_common/baselibrary/utils/basejson.py +2 -2
- re_common/baselibrary/utils/baselist.py +32 -32
- re_common/baselibrary/utils/basemotor.py +190 -190
- re_common/baselibrary/utils/basemssql.py +98 -98
- re_common/baselibrary/utils/baseodbc.py +113 -113
- re_common/baselibrary/utils/basepandas.py +302 -302
- re_common/baselibrary/utils/basepeewee.py +11 -11
- re_common/baselibrary/utils/basepika.py +180 -180
- re_common/baselibrary/utils/basepydash.py +143 -143
- re_common/baselibrary/utils/basepymongo.py +230 -230
- re_common/baselibrary/utils/basequeue.py +22 -22
- re_common/baselibrary/utils/baserar.py +57 -57
- re_common/baselibrary/utils/baserequest.py +279 -279
- re_common/baselibrary/utils/baseset.py +8 -8
- re_common/baselibrary/utils/basesmb.py +403 -403
- re_common/baselibrary/utils/basestring.py +382 -382
- re_common/baselibrary/utils/basetime.py +320 -320
- re_common/baselibrary/utils/baseurl.py +121 -121
- re_common/baselibrary/utils/basezip.py +57 -57
- re_common/baselibrary/utils/core/__init__.py +7 -7
- re_common/baselibrary/utils/core/bottomutils.py +18 -18
- re_common/baselibrary/utils/core/mdeprecated.py +327 -327
- re_common/baselibrary/utils/core/mlamada.py +16 -16
- re_common/baselibrary/utils/core/msginfo.py +25 -25
- re_common/baselibrary/utils/core/requests_core.py +103 -103
- re_common/baselibrary/utils/fateadm.py +429 -429
- re_common/baselibrary/utils/importfun.py +123 -123
- re_common/baselibrary/utils/mfaker.py +57 -57
- re_common/baselibrary/utils/my_abc/__init__.py +3 -3
- re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
- re_common/baselibrary/utils/mylogger.py +414 -414
- re_common/baselibrary/utils/myredisclient.py +861 -861
- re_common/baselibrary/utils/pipupgrade.py +21 -21
- re_common/baselibrary/utils/ringlist.py +85 -85
- re_common/baselibrary/utils/version_compare.py +36 -36
- re_common/baselibrary/utils/ydmhttp.py +126 -126
- re_common/facade/lazy_import.py +11 -11
- re_common/facade/loggerfacade.py +25 -25
- re_common/facade/mysqlfacade.py +467 -467
- re_common/facade/now.py +31 -31
- re_common/facade/sqlite3facade.py +257 -257
- re_common/facade/use/mq_use_facade.py +83 -83
- re_common/facade/use/proxy_use_facade.py +19 -19
- re_common/libtest/base_dict_test.py +19 -19
- re_common/libtest/baseavro_test.py +13 -13
- re_common/libtest/basefile_test.py +14 -14
- re_common/libtest/basemssql_test.py +77 -77
- re_common/libtest/baseodbc_test.py +7 -7
- re_common/libtest/basepandas_test.py +38 -38
- re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
- re_common/libtest/get_attr_test/settings.py +54 -54
- re_common/libtest/idencode_test.py +53 -53
- re_common/libtest/iniconfig_test.py +35 -35
- re_common/libtest/ip_test.py +34 -34
- re_common/libtest/merge_file_test.py +20 -20
- re_common/libtest/mfaker_test.py +8 -8
- re_common/libtest/mm3_test.py +31 -31
- re_common/libtest/mylogger_test.py +88 -88
- re_common/libtest/myparsel_test.py +27 -27
- re_common/libtest/mysql_test.py +151 -151
- re_common/libtest/pymongo_test.py +21 -21
- re_common/libtest/split_test.py +11 -11
- re_common/libtest/sqlite3_merge_test.py +5 -5
- re_common/libtest/sqlite3_test.py +34 -34
- re_common/libtest/tomlconfig_test.py +30 -30
- re_common/libtest/use_tools_test/__init__.py +2 -2
- re_common/libtest/user/__init__.py +4 -4
- re_common/studio/__init__.py +4 -4
- re_common/studio/assignment_expressions.py +36 -36
- re_common/studio/mydash/test1.py +18 -18
- re_common/studio/pydashstudio/first.py +9 -9
- re_common/studio/streamlitstudio/first_app.py +65 -65
- re_common/studio/streamlitstudio/uber_pickups.py +23 -23
- re_common/studio/test.py +18 -18
- re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +195 -0
- re_common/v2/baselibrary/business_utils/__init__.py +0 -0
- re_common/v2/baselibrary/business_utils/rel_tools.py +6 -0
- re_common/v2/baselibrary/decorators/utils.py +59 -59
- re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
- re_common/v2/baselibrary/tools/WeChatRobot.py +95 -79
- re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
- re_common/v2/baselibrary/tools/dict_tools.py +37 -37
- re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
- re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
- re_common/v2/baselibrary/tools/list_tools.py +65 -65
- re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
- re_common/v2/baselibrary/tools/text_matcher.py +326 -326
- re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
- re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
- re_common/v2/baselibrary/utils/author_smi.py +360 -360
- re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
- re_common/v2/baselibrary/utils/basedict.py +37 -37
- re_common/v2/baselibrary/utils/basehdfs.py +161 -161
- re_common/v2/baselibrary/utils/basepika.py +180 -180
- re_common/v2/baselibrary/utils/basetime.py +77 -77
- re_common/v2/baselibrary/utils/db.py +38 -38
- re_common/v2/baselibrary/utils/json_cls.py +16 -16
- re_common/v2/baselibrary/utils/mq.py +83 -83
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
- re_common/v2/baselibrary/utils/string_bool.py +186 -149
- re_common/v2/baselibrary/utils/string_clear.py +227 -204
- re_common/v2/baselibrary/utils/string_smi.py +18 -18
- re_common/v2/baselibrary/utils/stringutils.py +213 -213
- re_common/vip/base_step_process.py +11 -11
- re_common/vip/baseencodeid.py +90 -90
- re_common/vip/changetaskname.py +28 -28
- re_common/vip/core_var.py +24 -24
- re_common/vip/mmh3Hash.py +89 -89
- re_common/vip/proxy/allproxys.py +127 -127
- re_common/vip/proxy/allproxys_thread.py +159 -159
- re_common/vip/proxy/cnki_proxy.py +153 -153
- re_common/vip/proxy/kuaidaili.py +87 -87
- re_common/vip/proxy/proxy_all.py +113 -113
- re_common/vip/proxy/update_kuaidaili_0.py +42 -42
- re_common/vip/proxy/wanfang_proxy.py +152 -152
- re_common/vip/proxy/wp_proxy_all.py +181 -181
- re_common/vip/read_rawid_to_txt.py +91 -91
- re_common/vip/title/__init__.py +5 -5
- re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
- re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
- re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
- re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
- re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
- re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
- re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
- re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
- re_common/vip/title/transform/__init__.py +10 -10
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/LICENSE +201 -201
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/METADATA +16 -16
- re_common-10.0.24.dist-info/RECORD +230 -0
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/WHEEL +1 -1
- re_common-10.0.22.dist-info/RECORD +0 -227
- {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/top_level.txt +0 -0
|
@@ -1,83 +1,83 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import traceback
|
|
3
|
-
|
|
4
|
-
from re_common.baselibrary.utils.basepika import BasePika
|
|
5
|
-
from retry import retry
|
|
6
|
-
|
|
7
|
-
logging_logger = logging.getLogger(__name__)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class UseMq(object):
|
|
11
|
-
|
|
12
|
-
def __init__(self, queue, qos=1):
|
|
13
|
-
self.queue = queue
|
|
14
|
-
self.qos = qos
|
|
15
|
-
self.basepika = BasePika()
|
|
16
|
-
self.basepika.set_default()
|
|
17
|
-
self.basepika.connect()
|
|
18
|
-
self.basepika.create_channel()
|
|
19
|
-
self.basepika.queue_declare(queue=queue, durable=True)
|
|
20
|
-
self.basepika.basic_qos(qos)
|
|
21
|
-
self.properties = self.basepika.get_properties()
|
|
22
|
-
|
|
23
|
-
def re_conn(self):
|
|
24
|
-
"""
|
|
25
|
-
重新连接
|
|
26
|
-
:return:
|
|
27
|
-
"""
|
|
28
|
-
self.basepika.connect()
|
|
29
|
-
self.basepika.create_channel()
|
|
30
|
-
self.basepika.queue_declare(queue=self.queue, durable=True)
|
|
31
|
-
self.basepika.basic_qos(self.qos)
|
|
32
|
-
|
|
33
|
-
@retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
|
|
34
|
-
def get_mq(self):
|
|
35
|
-
try:
|
|
36
|
-
if self.basepika.channel.is_closed:
|
|
37
|
-
logging_logger.info("重连中......")
|
|
38
|
-
self.re_conn()
|
|
39
|
-
logging_logger.info("重连完成......")
|
|
40
|
-
self.basepika.set_get_msg_callback(routing_key=self.queue, callback=self.callback, auto_ack=False)
|
|
41
|
-
self.basepika.start_get_msg()
|
|
42
|
-
except:
|
|
43
|
-
traceback.print_exc()
|
|
44
|
-
logging_logger.info("重连中......")
|
|
45
|
-
self.re_conn()
|
|
46
|
-
|
|
47
|
-
def callback(self, ch, method, properties, body):
|
|
48
|
-
# print(type(body))
|
|
49
|
-
# print(" [x] Received %r" % body)
|
|
50
|
-
# body = body.decode()
|
|
51
|
-
self.callback2(ch, method, properties, body)
|
|
52
|
-
if self.basepika.auto_ack is False:
|
|
53
|
-
self.basepika.basic_ack(ch, method)
|
|
54
|
-
|
|
55
|
-
def callback2(self, ch, method, properties, body):
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
@retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
|
|
59
|
-
def send_mq(self, body, num=100):
|
|
60
|
-
try:
|
|
61
|
-
if self.basepika.get_queue_size(self.queue) < num:
|
|
62
|
-
self.basepika.easy_send_msg(routing_key=self.queue,
|
|
63
|
-
body=body,
|
|
64
|
-
properties=self.properties)
|
|
65
|
-
return True
|
|
66
|
-
else:
|
|
67
|
-
return False
|
|
68
|
-
except:
|
|
69
|
-
traceback.print_exc()
|
|
70
|
-
logging_logger.info("重连中......")
|
|
71
|
-
self.re_conn()
|
|
72
|
-
return False
|
|
73
|
-
|
|
74
|
-
def get_server_mq_num(self, num=100):
|
|
75
|
-
if self.basepika.get_queue_size(self.queue) < num:
|
|
76
|
-
return True
|
|
77
|
-
else:
|
|
78
|
-
return False
|
|
79
|
-
|
|
80
|
-
def easy_send_mq(self, body):
|
|
81
|
-
self.basepika.easy_send_msg(routing_key=self.queue,
|
|
82
|
-
body=body,
|
|
83
|
-
properties=self.properties)
|
|
1
|
+
import logging
|
|
2
|
+
import traceback
|
|
3
|
+
|
|
4
|
+
from re_common.baselibrary.utils.basepika import BasePika
|
|
5
|
+
from retry import retry
|
|
6
|
+
|
|
7
|
+
logging_logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class UseMq(object):
|
|
11
|
+
|
|
12
|
+
def __init__(self, queue, qos=1):
|
|
13
|
+
self.queue = queue
|
|
14
|
+
self.qos = qos
|
|
15
|
+
self.basepika = BasePika()
|
|
16
|
+
self.basepika.set_default()
|
|
17
|
+
self.basepika.connect()
|
|
18
|
+
self.basepika.create_channel()
|
|
19
|
+
self.basepika.queue_declare(queue=queue, durable=True)
|
|
20
|
+
self.basepika.basic_qos(qos)
|
|
21
|
+
self.properties = self.basepika.get_properties()
|
|
22
|
+
|
|
23
|
+
def re_conn(self):
|
|
24
|
+
"""
|
|
25
|
+
重新连接
|
|
26
|
+
:return:
|
|
27
|
+
"""
|
|
28
|
+
self.basepika.connect()
|
|
29
|
+
self.basepika.create_channel()
|
|
30
|
+
self.basepika.queue_declare(queue=self.queue, durable=True)
|
|
31
|
+
self.basepika.basic_qos(self.qos)
|
|
32
|
+
|
|
33
|
+
@retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
|
|
34
|
+
def get_mq(self):
|
|
35
|
+
try:
|
|
36
|
+
if self.basepika.channel.is_closed:
|
|
37
|
+
logging_logger.info("重连中......")
|
|
38
|
+
self.re_conn()
|
|
39
|
+
logging_logger.info("重连完成......")
|
|
40
|
+
self.basepika.set_get_msg_callback(routing_key=self.queue, callback=self.callback, auto_ack=False)
|
|
41
|
+
self.basepika.start_get_msg()
|
|
42
|
+
except:
|
|
43
|
+
traceback.print_exc()
|
|
44
|
+
logging_logger.info("重连中......")
|
|
45
|
+
self.re_conn()
|
|
46
|
+
|
|
47
|
+
def callback(self, ch, method, properties, body):
|
|
48
|
+
# print(type(body))
|
|
49
|
+
# print(" [x] Received %r" % body)
|
|
50
|
+
# body = body.decode()
|
|
51
|
+
self.callback2(ch, method, properties, body)
|
|
52
|
+
if self.basepika.auto_ack is False:
|
|
53
|
+
self.basepika.basic_ack(ch, method)
|
|
54
|
+
|
|
55
|
+
def callback2(self, ch, method, properties, body):
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
@retry(delay=5, backoff=2, max_delay=60 * 3, logger=logging_logger)
|
|
59
|
+
def send_mq(self, body, num=100):
|
|
60
|
+
try:
|
|
61
|
+
if self.basepika.get_queue_size(self.queue) < num:
|
|
62
|
+
self.basepika.easy_send_msg(routing_key=self.queue,
|
|
63
|
+
body=body,
|
|
64
|
+
properties=self.properties)
|
|
65
|
+
return True
|
|
66
|
+
else:
|
|
67
|
+
return False
|
|
68
|
+
except:
|
|
69
|
+
traceback.print_exc()
|
|
70
|
+
logging_logger.info("重连中......")
|
|
71
|
+
self.re_conn()
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
def get_server_mq_num(self, num=100):
|
|
75
|
+
if self.basepika.get_queue_size(self.queue) < num:
|
|
76
|
+
return True
|
|
77
|
+
else:
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
def easy_send_mq(self, body):
|
|
81
|
+
self.basepika.easy_send_msg(routing_key=self.queue,
|
|
82
|
+
body=body,
|
|
83
|
+
properties=self.properties)
|
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
import time
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def set_school_list_proxy(self):
|
|
5
|
-
"""
|
|
6
|
-
使用该函数条件, self为类的对象
|
|
7
|
-
代理在 proxyset set里
|
|
8
|
-
使用的是bshttpx这个变量
|
|
9
|
-
"""
|
|
10
|
-
try:
|
|
11
|
-
proxy = self.proxyset.pop()
|
|
12
|
-
self.logger.info("proxy is:{},proxy size is: {}".format(proxy, len(self.proxyset)))
|
|
13
|
-
return self.bshttpx.creat_sn(proxy=proxy,
|
|
14
|
-
headers=self.headers,
|
|
15
|
-
verify=False)
|
|
16
|
-
except KeyError as e:
|
|
17
|
-
time.sleep(15)
|
|
18
|
-
if str(e) == "'pop from an empty set'":
|
|
19
|
-
self.proxyset = set(self.school_proxy_list)
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def set_school_list_proxy(self):
|
|
5
|
+
"""
|
|
6
|
+
使用该函数条件, self为类的对象
|
|
7
|
+
代理在 proxyset set里
|
|
8
|
+
使用的是bshttpx这个变量
|
|
9
|
+
"""
|
|
10
|
+
try:
|
|
11
|
+
proxy = self.proxyset.pop()
|
|
12
|
+
self.logger.info("proxy is:{},proxy size is: {}".format(proxy, len(self.proxyset)))
|
|
13
|
+
return self.bshttpx.creat_sn(proxy=proxy,
|
|
14
|
+
headers=self.headers,
|
|
15
|
+
verify=False)
|
|
16
|
+
except KeyError as e:
|
|
17
|
+
time.sleep(15)
|
|
18
|
+
if str(e) == "'pop from an empty set'":
|
|
19
|
+
self.proxyset = set(self.school_proxy_list)
|
|
20
20
|
return self.set_school_list_proxy()
|
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
from re_common.baselibrary import BaseDicts
|
|
2
|
-
from re_common.baselibrary.utils.mfaker import MFaker
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def test_basedict_sortkeys():
|
|
6
|
-
fake = MFaker()
|
|
7
|
-
dicts = fake.create_data(MFaker.m_pydict, **fake.py_para())
|
|
8
|
-
print(dicts)
|
|
9
|
-
dicts2 = BaseDicts.sortkeys(dicts)
|
|
10
|
-
print(dicts2)
|
|
11
|
-
print(dicts)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def test_basedict_sortvalues():
|
|
15
|
-
dicts = {'data': 'FDZqqOGNMyGJlNRoCsJd', 'participant': 'petersonadrienne@bennett.com',
|
|
16
|
-
'often': 'KnJSSDeSTPboiwjSdGwR', 'friend': '1639', 'above': '8144', 'in': '1614'}
|
|
17
|
-
dicts2 = BaseDicts.sortvalues(dicts)
|
|
18
|
-
print(dicts2)
|
|
19
|
-
|
|
1
|
+
from re_common.baselibrary import BaseDicts
|
|
2
|
+
from re_common.baselibrary.utils.mfaker import MFaker
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def test_basedict_sortkeys():
|
|
6
|
+
fake = MFaker()
|
|
7
|
+
dicts = fake.create_data(MFaker.m_pydict, **fake.py_para())
|
|
8
|
+
print(dicts)
|
|
9
|
+
dicts2 = BaseDicts.sortkeys(dicts)
|
|
10
|
+
print(dicts2)
|
|
11
|
+
print(dicts)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_basedict_sortvalues():
|
|
15
|
+
dicts = {'data': 'FDZqqOGNMyGJlNRoCsJd', 'participant': 'petersonadrienne@bennett.com',
|
|
16
|
+
'often': 'KnJSSDeSTPboiwjSdGwR', 'friend': '1639', 'above': '8144', 'in': '1614'}
|
|
17
|
+
dicts2 = BaseDicts.sortvalues(dicts)
|
|
18
|
+
print(dicts2)
|
|
19
|
+
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
from re_common.baselibrary.utils.baseavro import BaseAvro
|
|
2
|
-
|
|
3
|
-
id_set = set()
|
|
4
|
-
for line in BaseAvro().read_line_yeild(r"F:\fun2\avro"):
|
|
5
|
-
id_set.add(line["key"])
|
|
6
|
-
|
|
7
|
-
print(len(id_set))
|
|
8
|
-
lines = ""
|
|
9
|
-
|
|
10
|
-
with open(r"F:\fun2\avro1.txt", 'w', encoding="utf-8") as f:
|
|
11
|
-
for id in id_set:
|
|
12
|
-
lines = id + "\n"
|
|
13
|
-
f.write(lines)
|
|
1
|
+
from re_common.baselibrary.utils.baseavro import BaseAvro
|
|
2
|
+
|
|
3
|
+
id_set = set()
|
|
4
|
+
for line in BaseAvro().read_line_yeild(r"F:\fun2\avro"):
|
|
5
|
+
id_set.add(line["key"])
|
|
6
|
+
|
|
7
|
+
print(len(id_set))
|
|
8
|
+
lines = ""
|
|
9
|
+
|
|
10
|
+
with open(r"F:\fun2\avro1.txt", 'w', encoding="utf-8") as f:
|
|
11
|
+
for id in id_set:
|
|
12
|
+
lines = id + "\n"
|
|
13
|
+
f.write(lines)
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
from re_common.baselibrary.utils.basedir import BaseDir
|
|
2
|
-
from re_common.baselibrary.utils.basefile import BaseFile
|
|
3
|
-
|
|
4
|
-
# files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 3)
|
|
5
|
-
# for file_line in files_line_list:
|
|
6
|
-
# # file_line = str(file_line, encoding="utf-8")
|
|
7
|
-
# # file_line = file_line.decode(encoding="utf-8")
|
|
8
|
-
# print(file_line)
|
|
9
|
-
|
|
10
|
-
for file in BaseDir.get_dir_all_files(r"F:\db3\mysql_date\data_dir"):
|
|
11
|
-
files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 11000)
|
|
12
|
-
strs = "\n".join(files_line_list)
|
|
13
|
-
BaseFile.single_add_file(r"F:\db3\mysql_date\end\part-00000", strs + "\n")
|
|
14
|
-
|
|
1
|
+
from re_common.baselibrary.utils.basedir import BaseDir
|
|
2
|
+
from re_common.baselibrary.utils.basefile import BaseFile
|
|
3
|
+
|
|
4
|
+
# files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 3)
|
|
5
|
+
# for file_line in files_line_list:
|
|
6
|
+
# # file_line = str(file_line, encoding="utf-8")
|
|
7
|
+
# # file_line = file_line.decode(encoding="utf-8")
|
|
8
|
+
# print(file_line)
|
|
9
|
+
|
|
10
|
+
for file in BaseDir.get_dir_all_files(r"F:\db3\mysql_date\data_dir"):
|
|
11
|
+
files_line_list = BaseFile.read_end_line(r"F:\db3\mysql_date\test\part-00000", 11000)
|
|
12
|
+
strs = "\n".join(files_line_list)
|
|
13
|
+
BaseFile.single_add_file(r"F:\db3\mysql_date\end\part-00000", strs + "\n")
|
|
14
|
+
|
|
@@ -1,77 +1,77 @@
|
|
|
1
|
-
###########################################
|
|
2
|
-
# 同项目调用基础包
|
|
3
|
-
import datetime
|
|
4
|
-
import gzip
|
|
5
|
-
import json
|
|
6
|
-
import os
|
|
7
|
-
import sys
|
|
8
|
-
import time
|
|
9
|
-
|
|
10
|
-
filepath = os.path.abspath(__file__)
|
|
11
|
-
pathlist = filepath.split(os.sep)
|
|
12
|
-
pathlist = pathlist[:-3]
|
|
13
|
-
TopPath = os.sep.join(pathlist)
|
|
14
|
-
sys.path.insert(0, TopPath)
|
|
15
|
-
print(TopPath)
|
|
16
|
-
############################################
|
|
17
|
-
|
|
18
|
-
from re_common.baselibrary.utils.basemssql import BaseMsSql
|
|
19
|
-
from re_common.baselibrary.utils.basefile import BaseFile
|
|
20
|
-
from re_common.baselibrary.utils.basetime import BaseTime
|
|
21
|
-
bt = BaseTime()
|
|
22
|
-
|
|
23
|
-
host = "127.0.0.1"
|
|
24
|
-
user = "sa"
|
|
25
|
-
pwd = "xujiang1994323"
|
|
26
|
-
db = "patData"
|
|
27
|
-
charset = "utf8"
|
|
28
|
-
|
|
29
|
-
basemssql = BaseMsSql(host, user, pwd, db, charset, as_dict=True)
|
|
30
|
-
basemssql.conn()
|
|
31
|
-
basemssql.exec_select_query("select * from [dbo].[New_pattzb]")
|
|
32
|
-
outPathFile = r"F:\db3\patnetjson_big\jss_patent.big_json.gz"
|
|
33
|
-
i = 0
|
|
34
|
-
size = 10000
|
|
35
|
-
count = 2000000
|
|
36
|
-
outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
|
|
37
|
-
start = time.time()
|
|
38
|
-
end = False
|
|
39
|
-
while True:
|
|
40
|
-
with gzip.open(outPathFile, 'wb') as f:
|
|
41
|
-
while True:
|
|
42
|
-
a = basemssql.cur.fetchmany(size=size)
|
|
43
|
-
if not a:
|
|
44
|
-
print(i)
|
|
45
|
-
print(int(time.time() - start))
|
|
46
|
-
print("break")
|
|
47
|
-
end = True
|
|
48
|
-
break
|
|
49
|
-
for row in a:
|
|
50
|
-
dicts = {}
|
|
51
|
-
for k,v in row.items():
|
|
52
|
-
# print(k)
|
|
53
|
-
try:
|
|
54
|
-
v = v.encode('latin-1').decode('gbk')
|
|
55
|
-
except:
|
|
56
|
-
pass
|
|
57
|
-
if isinstance(v, datetime.datetime):
|
|
58
|
-
v = bt.datetime_to_string(v, "%Y-%m-%d %H:%M:%S")
|
|
59
|
-
dicts[k] = v
|
|
60
|
-
# print(type(v))
|
|
61
|
-
# print(dicts)
|
|
62
|
-
line = json.dumps(dicts,ensure_ascii=False)+"\n"
|
|
63
|
-
# print(line)
|
|
64
|
-
lines = line.encode(encoding="utf8")
|
|
65
|
-
f.write(lines)
|
|
66
|
-
i = i + size
|
|
67
|
-
print(i)
|
|
68
|
-
print(int(time.time() - start))
|
|
69
|
-
if i >= count:
|
|
70
|
-
outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
|
|
71
|
-
i=0
|
|
72
|
-
break
|
|
73
|
-
if end:
|
|
74
|
-
break
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
1
|
+
###########################################
|
|
2
|
+
# 同项目调用基础包
|
|
3
|
+
import datetime
|
|
4
|
+
import gzip
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
filepath = os.path.abspath(__file__)
|
|
11
|
+
pathlist = filepath.split(os.sep)
|
|
12
|
+
pathlist = pathlist[:-3]
|
|
13
|
+
TopPath = os.sep.join(pathlist)
|
|
14
|
+
sys.path.insert(0, TopPath)
|
|
15
|
+
print(TopPath)
|
|
16
|
+
############################################
|
|
17
|
+
|
|
18
|
+
from re_common.baselibrary.utils.basemssql import BaseMsSql
|
|
19
|
+
from re_common.baselibrary.utils.basefile import BaseFile
|
|
20
|
+
from re_common.baselibrary.utils.basetime import BaseTime
|
|
21
|
+
bt = BaseTime()
|
|
22
|
+
|
|
23
|
+
host = "127.0.0.1"
|
|
24
|
+
user = "sa"
|
|
25
|
+
pwd = "xujiang1994323"
|
|
26
|
+
db = "patData"
|
|
27
|
+
charset = "utf8"
|
|
28
|
+
|
|
29
|
+
basemssql = BaseMsSql(host, user, pwd, db, charset, as_dict=True)
|
|
30
|
+
basemssql.conn()
|
|
31
|
+
basemssql.exec_select_query("select * from [dbo].[New_pattzb]")
|
|
32
|
+
outPathFile = r"F:\db3\patnetjson_big\jss_patent.big_json.gz"
|
|
33
|
+
i = 0
|
|
34
|
+
size = 10000
|
|
35
|
+
count = 2000000
|
|
36
|
+
outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
|
|
37
|
+
start = time.time()
|
|
38
|
+
end = False
|
|
39
|
+
while True:
|
|
40
|
+
with gzip.open(outPathFile, 'wb') as f:
|
|
41
|
+
while True:
|
|
42
|
+
a = basemssql.cur.fetchmany(size=size)
|
|
43
|
+
if not a:
|
|
44
|
+
print(i)
|
|
45
|
+
print(int(time.time() - start))
|
|
46
|
+
print("break")
|
|
47
|
+
end = True
|
|
48
|
+
break
|
|
49
|
+
for row in a:
|
|
50
|
+
dicts = {}
|
|
51
|
+
for k,v in row.items():
|
|
52
|
+
# print(k)
|
|
53
|
+
try:
|
|
54
|
+
v = v.encode('latin-1').decode('gbk')
|
|
55
|
+
except:
|
|
56
|
+
pass
|
|
57
|
+
if isinstance(v, datetime.datetime):
|
|
58
|
+
v = bt.datetime_to_string(v, "%Y-%m-%d %H:%M:%S")
|
|
59
|
+
dicts[k] = v
|
|
60
|
+
# print(type(v))
|
|
61
|
+
# print(dicts)
|
|
62
|
+
line = json.dumps(dicts,ensure_ascii=False)+"\n"
|
|
63
|
+
# print(line)
|
|
64
|
+
lines = line.encode(encoding="utf8")
|
|
65
|
+
f.write(lines)
|
|
66
|
+
i = i + size
|
|
67
|
+
print(i)
|
|
68
|
+
print(int(time.time() - start))
|
|
69
|
+
if i >= count:
|
|
70
|
+
outPathFile = BaseFile.get_new_filename(outPathFile, sign=".")
|
|
71
|
+
i=0
|
|
72
|
+
break
|
|
73
|
+
if end:
|
|
74
|
+
break
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from re_common.baselibrary.utils.baseodbc import BaseODBC
|
|
2
|
-
|
|
3
|
-
baseodbc = BaseODBC(r"C:\Users\xuzhu\Desktop\DB_20200701_GB.mdb")
|
|
4
|
-
# baseodbc = BaseODBC(r"D:\download\cnki_qk\download\get_journal\mdb\cnki期刊信息_20200315.mdb")
|
|
5
|
-
baseodbc.get_cur()
|
|
6
|
-
sql = "select * from `CN`"
|
|
7
|
-
for row in baseodbc.select(sql):
|
|
1
|
+
from re_common.baselibrary.utils.baseodbc import BaseODBC
|
|
2
|
+
|
|
3
|
+
baseodbc = BaseODBC(r"C:\Users\xuzhu\Desktop\DB_20200701_GB.mdb")
|
|
4
|
+
# baseodbc = BaseODBC(r"D:\download\cnki_qk\download\get_journal\mdb\cnki期刊信息_20200315.mdb")
|
|
5
|
+
baseodbc.get_cur()
|
|
6
|
+
sql = "select * from `CN`"
|
|
7
|
+
for row in baseodbc.select(sql):
|
|
8
8
|
print(row)
|
|
@@ -1,38 +1,38 @@
|
|
|
1
|
-
import time
|
|
2
|
-
|
|
3
|
-
from pandas._libs.tslibs.timestamps import Timestamp
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def test_dataform_to_numpy():
|
|
7
|
-
import pandas as pd
|
|
8
|
-
import numpy as np
|
|
9
|
-
dicts = {'A': 1.,
|
|
10
|
-
'B': pd.Timestamp('20130102'),
|
|
11
|
-
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
|
|
12
|
-
'D': np.array([3] * 4, dtype='int32'),
|
|
13
|
-
'E': pd.Categorical(["test", "train", "test", "train"]),
|
|
14
|
-
'F': 'foo'}
|
|
15
|
-
from re_common.baselibrary.utils.basepandas import BasePandas
|
|
16
|
-
bp = BasePandas()
|
|
17
|
-
startTime = time.time()
|
|
18
|
-
df = bp.dicts_to_dataform(dicts)
|
|
19
|
-
# 数据类型一致时速度会很快
|
|
20
|
-
print(bp.dataform_to_numpy(df))
|
|
21
|
-
endTime = time.time()
|
|
22
|
-
print(endTime - startTime)
|
|
23
|
-
dicts = {'A': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
24
|
-
'B': [Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
25
|
-
'C': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
26
|
-
'D': [3, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
27
|
-
'E': ['test', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
28
|
-
'F': ['foo', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo']}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
startTime = time.time()
|
|
32
|
-
df = bp.dicts_to_dataform(dicts)
|
|
33
|
-
# 数据类型一致时速度会很快
|
|
34
|
-
print(bp.dataform_to_numpy(df))
|
|
35
|
-
endTime = time.time()
|
|
36
|
-
print(endTime - startTime)
|
|
37
|
-
|
|
38
|
-
test_dataform_to_numpy()
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from pandas._libs.tslibs.timestamps import Timestamp
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_dataform_to_numpy():
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import numpy as np
|
|
9
|
+
dicts = {'A': 1.,
|
|
10
|
+
'B': pd.Timestamp('20130102'),
|
|
11
|
+
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
|
|
12
|
+
'D': np.array([3] * 4, dtype='int32'),
|
|
13
|
+
'E': pd.Categorical(["test", "train", "test", "train"]),
|
|
14
|
+
'F': 'foo'}
|
|
15
|
+
from re_common.baselibrary.utils.basepandas import BasePandas
|
|
16
|
+
bp = BasePandas()
|
|
17
|
+
startTime = time.time()
|
|
18
|
+
df = bp.dicts_to_dataform(dicts)
|
|
19
|
+
# 数据类型一致时速度会很快
|
|
20
|
+
print(bp.dataform_to_numpy(df))
|
|
21
|
+
endTime = time.time()
|
|
22
|
+
print(endTime - startTime)
|
|
23
|
+
dicts = {'A': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
24
|
+
'B': [Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
25
|
+
'C': [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
26
|
+
'D': [3, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
27
|
+
'E': ['test', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
|
|
28
|
+
'F': ['foo', Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo']}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
startTime = time.time()
|
|
32
|
+
df = bp.dicts_to_dataform(dicts)
|
|
33
|
+
# 数据类型一致时速度会很快
|
|
34
|
+
print(bp.dataform_to_numpy(df))
|
|
35
|
+
endTime = time.time()
|
|
36
|
+
print(endTime - startTime)
|
|
37
|
+
|
|
38
|
+
test_dataform_to_numpy()
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import sys
|
|
3
|
-
|
|
4
|
-
filepath = os.path.abspath(__file__)
|
|
5
|
-
pathlist = filepath.split(os.sep)
|
|
6
|
-
pathlist = pathlist[:-4]
|
|
7
|
-
TopPath = os.sep.join(pathlist)
|
|
8
|
-
sys.path.insert(0, TopPath)
|
|
9
|
-
print(TopPath)
|
|
10
|
-
|
|
11
|
-
from re_common.libtest.get_attr_test import settings
|
|
12
|
-
from re_common.baselibrary.tools.get_attr import get_attrs
|
|
13
|
-
|
|
14
|
-
print(get_attrs(settings))
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
filepath = os.path.abspath(__file__)
|
|
5
|
+
pathlist = filepath.split(os.sep)
|
|
6
|
+
pathlist = pathlist[:-4]
|
|
7
|
+
TopPath = os.sep.join(pathlist)
|
|
8
|
+
sys.path.insert(0, TopPath)
|
|
9
|
+
print(TopPath)
|
|
10
|
+
|
|
11
|
+
from re_common.libtest.get_attr_test import settings
|
|
12
|
+
from re_common.baselibrary.tools.get_attr import get_attrs
|
|
13
|
+
|
|
14
|
+
print(get_attrs(settings))
|