mdbq 4.0.28__py3-none-any.whl → 4.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.0.28'
1
+ VERSION = '4.0.30'
@@ -63,7 +63,9 @@ class MySQLDeduplicator:
63
63
  skip_system_dbs: bool = True,
64
64
  max_retries: int = 3,
65
65
  retry_waiting_time: int = 5,
66
- pool_size: int = 10,
66
+ pool_size: int = 20,
67
+ mincached: int = 5,
68
+ maxcached: int = 10,
67
69
  primary_key: str = 'id',
68
70
  date_range: Optional[List[str]] = None,
69
71
  recent_month: Optional[int] = None,
@@ -83,6 +85,8 @@ class MySQLDeduplicator:
83
85
  :param exclude_databases: 排除的数据库名列表
84
86
  :param exclude_tables: 排除的表名字典 {数据库名: [表名, ...]}
85
87
  :param duplicate_keep_mode: 'keep_one'(默认,重复组保留一条),'remove_all'(全部删除重复组)
88
+ :param mincached: 连接池空闲连接数
89
+ :param maxcached: 连接池最大缓存连接数
86
90
  """
87
91
  # 连接池状态标志
88
92
  self._closed = False
@@ -100,7 +104,9 @@ class MySQLDeduplicator:
100
104
  password=password,
101
105
  charset=charset,
102
106
  maxconnections=pool_size,
103
- cursorclass=pymysql.cursors.DictCursor
107
+ cursorclass=pymysql.cursors.DictCursor,
108
+ mincached=mincached,
109
+ maxcached=maxcached,
104
110
  )
105
111
 
106
112
  # 并发模式要将 pool_size 加大
@@ -508,7 +514,7 @@ class MySQLDeduplicator:
508
514
  dup_count_row = cursor.fetchone()
509
515
  dup_count = dup_count_row['cnt'] if dup_count_row and 'cnt' in dup_count_row else 0
510
516
  if dup_count == 0:
511
- logger.debug('没有重复数据', {"库": database, "表": table, "数据量": total_count, "数据日期": date_val})
517
+ logger.info('没有重复数据', {"库": database, "表": table, "数据量": total_count, "数据日期": date_val})
512
518
  cursor.execute(drop_temp_sql)
513
519
  conn.commit()
514
520
  return (0, 0)
@@ -1357,7 +1363,7 @@ def main():
1357
1363
  section='mysql',
1358
1364
  keys=['host', 'port', 'username', 'password'],
1359
1365
  )
1360
- # host = 'localhost'
1366
+ host = 'localhost'
1361
1367
 
1362
1368
  deduplicator = MySQLDeduplicator(
1363
1369
  username=username,
@@ -1369,11 +1375,13 @@ def main():
1369
1375
  skip_system_dbs=True,
1370
1376
  max_retries=3,
1371
1377
  retry_waiting_time=5,
1372
- pool_size=30,
1373
- # recent_month=1,
1378
+ pool_size=20,
1379
+ mincached=5,
1380
+ maxcached=10,
1381
+ recent_month=1,
1374
1382
  # date_range=['2025-06-09', '2025-06-10'],
1375
1383
  exclude_columns=['更新时间'],
1376
- # exclude_databases=['测试库4'],
1384
+ exclude_databases=['cookie文件', '日志', '视频数据', '云电影'],
1377
1385
  # exclude_tables={
1378
1386
  # '推广数据2': [
1379
1387
  # '地域报表_城市_2025_04',
@@ -1387,7 +1395,7 @@ def main():
1387
1395
  )
1388
1396
 
1389
1397
  # 全库去重(单线程)
1390
- # deduplicator.deduplicate_all(dry_run=False, parallel=True, reorder_id=True)
1398
+ deduplicator.deduplicate_all(dry_run=False, parallel=True, reorder_id=True)
1391
1399
 
1392
1400
  # # 指定数据库去重(多线程)
1393
1401
  # deduplicator.deduplicate_database('数据引擎2', dry_run=False, parallel=True, reorder_id=True)
@@ -119,6 +119,12 @@ class LoadAccount:
119
119
  os.environ["webdriver.chrome.driver"] = chrome_path
120
120
  # option.binary_location = chrome_path # Macos 设置此参数报错
121
121
  service = Service(chromedriver_path)
122
+ elif platform.system().lower() == 'linux':
123
+ # ubuntu
124
+ chrome_path = '/usr/bin/google-chrome'
125
+ chromedriver_path = '/usr/local/bin/chromedriver'
126
+ # option.binary_location = chrome_path # macOS 设置此参数有效
127
+ service = Service(chromedriver_path)
122
128
  else:
123
129
  chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
124
130
  chromedriver_path = '/usr/local/chrome/chromedriver'
mdbq/spider/aikucun.py CHANGED
@@ -125,6 +125,12 @@ class AikuCun:
125
125
  os.environ["webdriver.chrome.driver"] = chrome_path
126
126
  # option.binary_location = chrome_path # Macos 设置此参数报错
127
127
  service = Service(chromedriver_path)
128
+ elif platform.system().lower() == 'linux':
129
+ # ubuntu
130
+ chrome_path = '/usr/bin/google-chrome'
131
+ chromedriver_path = '/usr/local/bin/chromedriver'
132
+ # option.binary_location = chrome_path # macOS 设置此参数有效
133
+ service = Service(chromedriver_path)
128
134
  else:
129
135
  chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
130
136
  chromedriver_path = '/usr/local/chrome/chromedriver'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.28
3
+ Version: 4.0.30
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=xMsXHu_A2cflhY5yKH_XTsHjVS0UvcjRyCC0GhRxiV0,18
2
+ mdbq/__version__.py,sha256=dvApMa3Ja0z33lQcWZtoQiZp-2Lxso9cz0yfXlPVDUQ,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/query_data.py,sha256=UWwVcpOo9Ro6u9VtXcr8T8Hdh8Atem4yjO4wCsvcOTw,166882
5
5
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
@@ -8,13 +8,13 @@ mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
8
8
  mdbq/myconf/myconf.py,sha256=GR250mf2KKImRUamPM2TEi9no_65tR4uKXn7eHNCAmg,31205
9
9
  mdbq/myconf/myconf_bak.py,sha256=39tLUBVlWQZzQfrwk7YoLEfipo11fpwWjaLBHcUt2qM,33341
10
10
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
11
- mdbq/mysql/deduplicator.py,sha256=0cyGZIHqWM1ELCPrepLtb51jludzvKMIeWFV7jPuIp4,73225
11
+ mdbq/mysql/deduplicator.py,sha256=d8eDUvoNAE1OoquK45iCGc0mk1sP9orRgRQjRQIG_7w,73540
12
12
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
13
13
  mdbq/mysql/s_query.py,sha256=nByjtk5G18zgpomldb_5xDUAwBPXyXAcV1zvq5vkM_4,45005
14
14
  mdbq/mysql/unique_.py,sha256=XHDBiY-zF_Hmt28Jf-xh3E-_sIWoJCxdi2dOAfnAyQ0,21127
15
15
  mdbq/mysql/uploader.py,sha256=jW4aHm3kvt3EJ4tgnMl2fRPLY06VWaBv8JI5LSSMVzE,81151
16
16
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
17
- mdbq/other/download_sku_picture.py,sha256=MfHBBffM8oWiso2jbrrIET7CQCkypSov0SYOoSN-PqA,44798
17
+ mdbq/other/download_sku_picture.py,sha256=EBRgO4L4k1AoCLehtCY_DvjUlYUNj_9CcePnsSKjx5Q,45114
18
18
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
19
19
  mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
20
20
  mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
@@ -24,8 +24,8 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
24
24
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
- mdbq/spider/aikucun.py,sha256=juOqpr_dHeE1RyjCu67VcpzoJAWMO7FKv0i8KiH8WUo,21552
28
- mdbq-4.0.28.dist-info/METADATA,sha256=ALYXGbIVSK6h34xxXxs_DSGkNrMTRZzch1nv3ge1IGg,364
29
- mdbq-4.0.28.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-4.0.28.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-4.0.28.dist-info/RECORD,,
27
+ mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
28
+ mdbq-4.0.30.dist-info/METADATA,sha256=NQZCJJKOfy3kdneuarm2UbrvbG3C5S4gDCDPxOS60ls,364
29
+ mdbq-4.0.30.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-4.0.30.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-4.0.30.dist-info/RECORD,,
File without changes