mdbq 4.0.1__py3-none-any.whl → 4.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/unique_.py +3 -5
- mdbq/mysql/uploader.py +42 -17
- {mdbq-4.0.1.dist-info → mdbq-4.0.2.dist-info}/METADATA +1 -1
- {mdbq-4.0.1.dist-info → mdbq-4.0.2.dist-info}/RECORD +7 -7
- {mdbq-4.0.1.dist-info → mdbq-4.0.2.dist-info}/WHEEL +0 -0
- {mdbq-4.0.1.dist-info → mdbq-4.0.2.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.2'
|
mdbq/mysql/unique_.py
CHANGED
@@ -7,16 +7,14 @@ from dbutils.pooled_db import PooledDB
|
|
7
7
|
import os
|
8
8
|
|
9
9
|
logger = mylogger.MyLogger(
|
10
|
-
name='unique_',
|
11
10
|
logging_mode='file',
|
12
|
-
log_level='
|
13
|
-
log_file='unique_.log',
|
11
|
+
log_level='info',
|
14
12
|
log_format='json',
|
15
13
|
max_log_size=50,
|
16
14
|
backup_count=5,
|
17
15
|
enable_async=False, # 是否启用异步日志
|
18
|
-
sample_rate=1, # 采样DEBUG/INFO
|
19
|
-
sensitive_fields=[], #
|
16
|
+
sample_rate=1, # 采样DEBUG/INFO日志
|
17
|
+
sensitive_fields=[], # 敏感字段过滤
|
20
18
|
enable_metrics=False, # 是否启用性能指标
|
21
19
|
)
|
22
20
|
|
mdbq/mysql/uploader.py
CHANGED
@@ -6,7 +6,9 @@ from functools import wraps
|
|
6
6
|
import warnings
|
7
7
|
import pymysql
|
8
8
|
import pandas as pd
|
9
|
+
import os
|
9
10
|
from mdbq.log import mylogger
|
11
|
+
from mdbq.config import config
|
10
12
|
from typing import Union, List, Dict, Optional, Any, Tuple, Set
|
11
13
|
from dbutils.pooled_db import PooledDB
|
12
14
|
import json
|
@@ -950,7 +952,7 @@ class MySQLUploader:
|
|
950
952
|
unique_keys: Optional[List[List[str]]] = None
|
951
953
|
):
|
952
954
|
"""
|
953
|
-
|
955
|
+
上传数据到数据库的主入口方法
|
954
956
|
|
955
957
|
:param db_name: 数据库名
|
956
958
|
:param table_name: 表名
|
@@ -971,6 +973,23 @@ class MySQLUploader:
|
|
971
973
|
- 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
|
972
974
|
:param unique_keys: 唯一约束列表,每个元素为列名列表,支持多列组合唯一约束
|
973
975
|
:raises: 可能抛出各种验证和数据库相关异常
|
976
|
+
|
977
|
+
---
|
978
|
+
unique_keys、check_duplicate、update_on_duplicate 三者组合下的行为总结:
|
979
|
+
|
980
|
+
| unique_keys | check_duplicate | update_on_duplicate | 行为 |
|
981
|
+
|-------------|----------------|---------------------|------------------------------|
|
982
|
+
| 有/无 | False | False | 冲突时报错/跳过,不覆盖 |
|
983
|
+
| 有/无 | False | True | 冲突时覆盖(ON DUPLICATE KEY)|
|
984
|
+
| 有/无 | True | False | 主动查重,冲突时跳过,不覆盖 |
|
985
|
+
| 有/无 | True | True | 主动查重,冲突时覆盖 |
|
986
|
+
|
987
|
+
- unique_keys 只决定唯一性,不决定是否覆盖。
|
988
|
+
- check_duplicate=True 时,插入前主动查重,重复数据跳过或覆盖,取决于 update_on_duplicate。
|
989
|
+
- update_on_duplicate=True 时,遇到唯一约束冲突会用新数据覆盖旧数据。
|
990
|
+
- 只要 update_on_duplicate=True 且表存在唯一约束(如 unique_keys),无论 check_duplicate 是否为 True,都会更新旧数据(即 ON DUPLICATE KEY UPDATE 生效)。
|
991
|
+
- 如需"覆盖"行为,务必设置 update_on_duplicate=True,不管 check_duplicate 是否为 True。
|
992
|
+
- 如需"跳过"行为,设置 update_on_duplicate=False 即可。
|
974
993
|
"""
|
975
994
|
# upload_start = time.time()
|
976
995
|
if isinstance(data, list) or (hasattr(data, 'shape') and hasattr(data, '__len__')):
|
@@ -1335,7 +1354,16 @@ class MySQLUploader:
|
|
1335
1354
|
transaction_mode: str,
|
1336
1355
|
update_on_duplicate: bool = False
|
1337
1356
|
) -> Tuple[int, int, int]:
|
1338
|
-
"""
|
1357
|
+
"""
|
1358
|
+
执行批量插入操作,优化batch和hybrid模式。
|
1359
|
+
|
1360
|
+
- batch模式下,使用executemany批量插入(如SQL带ON DUPLICATE KEY UPDATE时),MySQL会对每一行单独判断唯一约束:
|
1361
|
+
- 不冲突的行会被正常插入。
|
1362
|
+
- 冲突的行会触发ON DUPLICATE KEY UPDATE,用新数据更新旧数据。
|
1363
|
+
- 不会因为一行冲突导致整批失败或回滚。
|
1364
|
+
- 只有遇到严重的数据库错误(如所有行都因唯一约束冲突且没有ON DUPLICATE KEY UPDATE),才会整体回滚。
|
1365
|
+
- 返回值为(插入行数, 跳过行数, 失败行数)。
|
1366
|
+
"""
|
1339
1367
|
import pymysql # 确保异常类型可用
|
1340
1368
|
def get_optimal_batch_size(total_rows: int) -> int:
|
1341
1369
|
if total_rows <= 100:
|
@@ -1533,19 +1561,16 @@ class MySQLUploader:
|
|
1533
1561
|
|
1534
1562
|
|
1535
1563
|
def main():
|
1536
|
-
""
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
4. 上传数据到数据库
|
1542
|
-
5. 关闭连接
|
1543
|
-
"""
|
1564
|
+
dir_path = os.path.expanduser("~")
|
1565
|
+
my_cont = config.read_config(file_path=os.path.join(dir_path, 'spd.txt'))
|
1566
|
+
username, password, host, port = my_cont['username'], my_cont['password'], my_cont['host'], int(my_cont['port'])
|
1567
|
+
host = 'localhost'
|
1568
|
+
|
1544
1569
|
uploader = MySQLUploader(
|
1545
|
-
username=
|
1546
|
-
password=
|
1547
|
-
host=
|
1548
|
-
port=
|
1570
|
+
username=username,
|
1571
|
+
password=password,
|
1572
|
+
host=host,
|
1573
|
+
port=port,
|
1549
1574
|
)
|
1550
1575
|
|
1551
1576
|
# 定义列和数据类型
|
@@ -1561,7 +1586,7 @@ def main():
|
|
1561
1586
|
data = [
|
1562
1587
|
{'日期': '2023-01-8', 'name': 'JACk', 'AGE': '24', 'salary': 555.1545},
|
1563
1588
|
{'日期': '2023-01-15', 'name': 'Alice', 'AGE': 35, 'salary': '100'},
|
1564
|
-
{'日期': '2023-01-15', 'name': 'Alice', 'AGE':
|
1589
|
+
{'日期': '2023-01-15', 'name': 'Alice', 'AGE': 5, 'salary': 15478},
|
1565
1590
|
{'日期': '2023-02-20', 'name': 'Bob', 'AGE': 25, 'salary': 45000.75},
|
1566
1591
|
]
|
1567
1592
|
|
@@ -1574,7 +1599,7 @@ def main():
|
|
1574
1599
|
primary_keys=[], # 创建唯一主键
|
1575
1600
|
check_duplicate=False, # 检查重复数据
|
1576
1601
|
duplicate_columns=[], # 指定排重的组合键
|
1577
|
-
update_on_duplicate=
|
1602
|
+
update_on_duplicate=True, # 更新旧数据
|
1578
1603
|
allow_null=False, # 允许插入空值
|
1579
1604
|
partition_by='year', # 分表方式
|
1580
1605
|
partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
|
@@ -1587,5 +1612,5 @@ def main():
|
|
1587
1612
|
|
1588
1613
|
|
1589
1614
|
if __name__ == '__main__':
|
1590
|
-
|
1615
|
+
main()
|
1591
1616
|
pass
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=_fU0Mj16CzZHiwvkoGULFRC0vc6b0FxDy6MgQQON3Gw,17
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/query_data.py,sha256=U6dYK8_gEaNnsGKooEkzfAWnzNA8kt0uomec49e4olE,177536
|
5
5
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -11,8 +11,8 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
11
11
|
mdbq/mysql/deduplicator.py,sha256=8v3MC6TJ0YEiExWrTP9OXAxTYnL9XbpYL2vWaER1h2M,73099
|
12
12
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
13
13
|
mdbq/mysql/s_query.py,sha256=tSBEbyuVQBeE6tckHSbguAQh9T07tvPhf4J6DlpUBP8,10508
|
14
|
-
mdbq/mysql/unique_.py,sha256=
|
15
|
-
mdbq/mysql/uploader.py,sha256=
|
14
|
+
mdbq/mysql/unique_.py,sha256=Wgqq_PjAAD757JTa10wjYaJgssZ_C_ypU6DW56jbuyw,21074
|
15
|
+
mdbq/mysql/uploader.py,sha256=n6K2rn7cGHGLGevX2JO7pKrUiQiGNluRx3CwnxARZGI,72503
|
16
16
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
17
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
18
18
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=l3zBK7wrZl0oO42-_UGylyatnIp_SBw8wDDvof9fht4,23534
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=hPRzLQvFIF4ibN8aP3Dg_ru5meac90faPyzOB22cj-o,20965
|
28
|
-
mdbq-4.0.
|
29
|
-
mdbq-4.0.
|
30
|
-
mdbq-4.0.
|
31
|
-
mdbq-4.0.
|
28
|
+
mdbq-4.0.2.dist-info/METADATA,sha256=EdCDmGum97Hjm9aKPV-2qAf9IoMkh-F0DE6ATMLJTX8,363
|
29
|
+
mdbq-4.0.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-4.0.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-4.0.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|