mdbq 4.0.28__py3-none-any.whl → 4.0.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/deduplicator.py +16 -8
- mdbq/other/download_sku_picture.py +6 -0
- mdbq/spider/aikucun.py +6 -0
- {mdbq-4.0.28.dist-info → mdbq-4.0.30.dist-info}/METADATA +1 -1
- {mdbq-4.0.28.dist-info → mdbq-4.0.30.dist-info}/RECORD +8 -8
- {mdbq-4.0.28.dist-info → mdbq-4.0.30.dist-info}/WHEEL +0 -0
- {mdbq-4.0.28.dist-info → mdbq-4.0.30.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.30'
|
mdbq/mysql/deduplicator.py
CHANGED
@@ -63,7 +63,9 @@ class MySQLDeduplicator:
|
|
63
63
|
skip_system_dbs: bool = True,
|
64
64
|
max_retries: int = 3,
|
65
65
|
retry_waiting_time: int = 5,
|
66
|
-
pool_size: int =
|
66
|
+
pool_size: int = 20,
|
67
|
+
mincached: int = 5,
|
68
|
+
maxcached: int = 10,
|
67
69
|
primary_key: str = 'id',
|
68
70
|
date_range: Optional[List[str]] = None,
|
69
71
|
recent_month: Optional[int] = None,
|
@@ -83,6 +85,8 @@ class MySQLDeduplicator:
|
|
83
85
|
:param exclude_databases: 排除的数据库名列表
|
84
86
|
:param exclude_tables: 排除的表名字典 {数据库名: [表名, ...]}
|
85
87
|
:param duplicate_keep_mode: 'keep_one'(默认,重复组保留一条),'remove_all'(全部删除重复组)
|
88
|
+
:param mincached: 连接池空闲连接数
|
89
|
+
:param maxcached: 连接池最大缓存连接数
|
86
90
|
"""
|
87
91
|
# 连接池状态标志
|
88
92
|
self._closed = False
|
@@ -100,7 +104,9 @@ class MySQLDeduplicator:
|
|
100
104
|
password=password,
|
101
105
|
charset=charset,
|
102
106
|
maxconnections=pool_size,
|
103
|
-
cursorclass=pymysql.cursors.DictCursor
|
107
|
+
cursorclass=pymysql.cursors.DictCursor,
|
108
|
+
mincached=mincached,
|
109
|
+
maxcached=maxcached,
|
104
110
|
)
|
105
111
|
|
106
112
|
# 并发模式要将 pool_size 加大
|
@@ -508,7 +514,7 @@ class MySQLDeduplicator:
|
|
508
514
|
dup_count_row = cursor.fetchone()
|
509
515
|
dup_count = dup_count_row['cnt'] if dup_count_row and 'cnt' in dup_count_row else 0
|
510
516
|
if dup_count == 0:
|
511
|
-
logger.
|
517
|
+
logger.info('没有重复数据', {"库": database, "表": table, "数据量": total_count, "数据日期": date_val})
|
512
518
|
cursor.execute(drop_temp_sql)
|
513
519
|
conn.commit()
|
514
520
|
return (0, 0)
|
@@ -1357,7 +1363,7 @@ def main():
|
|
1357
1363
|
section='mysql',
|
1358
1364
|
keys=['host', 'port', 'username', 'password'],
|
1359
1365
|
)
|
1360
|
-
|
1366
|
+
host = 'localhost'
|
1361
1367
|
|
1362
1368
|
deduplicator = MySQLDeduplicator(
|
1363
1369
|
username=username,
|
@@ -1369,11 +1375,13 @@ def main():
|
|
1369
1375
|
skip_system_dbs=True,
|
1370
1376
|
max_retries=3,
|
1371
1377
|
retry_waiting_time=5,
|
1372
|
-
pool_size=
|
1373
|
-
|
1378
|
+
pool_size=20,
|
1379
|
+
mincached=5,
|
1380
|
+
maxcached=10,
|
1381
|
+
recent_month=1,
|
1374
1382
|
# date_range=['2025-06-09', '2025-06-10'],
|
1375
1383
|
exclude_columns=['更新时间'],
|
1376
|
-
|
1384
|
+
exclude_databases=['cookie文件', '日志', '视频数据', '云电影'],
|
1377
1385
|
# exclude_tables={
|
1378
1386
|
# '推广数据2': [
|
1379
1387
|
# '地域报表_城市_2025_04',
|
@@ -1387,7 +1395,7 @@ def main():
|
|
1387
1395
|
)
|
1388
1396
|
|
1389
1397
|
# 全库去重(单线程)
|
1390
|
-
|
1398
|
+
deduplicator.deduplicate_all(dry_run=False, parallel=True, reorder_id=True)
|
1391
1399
|
|
1392
1400
|
# # 指定数据库去重(多线程)
|
1393
1401
|
# deduplicator.deduplicate_database('数据引擎2', dry_run=False, parallel=True, reorder_id=True)
|
@@ -119,6 +119,12 @@ class LoadAccount:
|
|
119
119
|
os.environ["webdriver.chrome.driver"] = chrome_path
|
120
120
|
# option.binary_location = chrome_path # Macos 设置此参数报错
|
121
121
|
service = Service(chromedriver_path)
|
122
|
+
elif platform.system().lower() == 'linux':
|
123
|
+
# ubuntu
|
124
|
+
chrome_path = '/usr/bin/google-chrome'
|
125
|
+
chromedriver_path = '/usr/local/bin/chromedriver'
|
126
|
+
# option.binary_location = chrome_path # macOS 设置此参数有效
|
127
|
+
service = Service(chromedriver_path)
|
122
128
|
else:
|
123
129
|
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
124
130
|
chromedriver_path = '/usr/local/chrome/chromedriver'
|
mdbq/spider/aikucun.py
CHANGED
@@ -125,6 +125,12 @@ class AikuCun:
|
|
125
125
|
os.environ["webdriver.chrome.driver"] = chrome_path
|
126
126
|
# option.binary_location = chrome_path # Macos 设置此参数报错
|
127
127
|
service = Service(chromedriver_path)
|
128
|
+
elif platform.system().lower() == 'linux':
|
129
|
+
# ubuntu
|
130
|
+
chrome_path = '/usr/bin/google-chrome'
|
131
|
+
chromedriver_path = '/usr/local/bin/chromedriver'
|
132
|
+
# option.binary_location = chrome_path # macOS 设置此参数有效
|
133
|
+
service = Service(chromedriver_path)
|
128
134
|
else:
|
129
135
|
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
130
136
|
chromedriver_path = '/usr/local/chrome/chromedriver'
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=dvApMa3Ja0z33lQcWZtoQiZp-2Lxso9cz0yfXlPVDUQ,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/query_data.py,sha256=UWwVcpOo9Ro6u9VtXcr8T8Hdh8Atem4yjO4wCsvcOTw,166882
|
5
5
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
@@ -8,13 +8,13 @@ mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
8
8
|
mdbq/myconf/myconf.py,sha256=GR250mf2KKImRUamPM2TEi9no_65tR4uKXn7eHNCAmg,31205
|
9
9
|
mdbq/myconf/myconf_bak.py,sha256=39tLUBVlWQZzQfrwk7YoLEfipo11fpwWjaLBHcUt2qM,33341
|
10
10
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
11
|
-
mdbq/mysql/deduplicator.py,sha256=
|
11
|
+
mdbq/mysql/deduplicator.py,sha256=d8eDUvoNAE1OoquK45iCGc0mk1sP9orRgRQjRQIG_7w,73540
|
12
12
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
13
13
|
mdbq/mysql/s_query.py,sha256=nByjtk5G18zgpomldb_5xDUAwBPXyXAcV1zvq5vkM_4,45005
|
14
14
|
mdbq/mysql/unique_.py,sha256=XHDBiY-zF_Hmt28Jf-xh3E-_sIWoJCxdi2dOAfnAyQ0,21127
|
15
15
|
mdbq/mysql/uploader.py,sha256=jW4aHm3kvt3EJ4tgnMl2fRPLY06VWaBv8JI5LSSMVzE,81151
|
16
16
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
|
-
mdbq/other/download_sku_picture.py,sha256=
|
17
|
+
mdbq/other/download_sku_picture.py,sha256=EBRgO4L4k1AoCLehtCY_DvjUlYUNj_9CcePnsSKjx5Q,45114
|
18
18
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
19
19
|
mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
|
20
20
|
mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
|
@@ -24,8 +24,8 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
|
24
24
|
mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
25
25
|
mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
|
-
mdbq/spider/aikucun.py,sha256=
|
28
|
-
mdbq-4.0.
|
29
|
-
mdbq-4.0.
|
30
|
-
mdbq-4.0.
|
31
|
-
mdbq-4.0.
|
27
|
+
mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
|
28
|
+
mdbq-4.0.30.dist-info/METADATA,sha256=NQZCJJKOfy3kdneuarm2UbrvbG3C5S4gDCDPxOS60ls,364
|
29
|
+
mdbq-4.0.30.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-4.0.30.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-4.0.30.dist-info/RECORD,,
|
File without changes
|
File without changes
|