mdbq 4.1.11__py3-none-any.whl → 4.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mdbq might be problematic. Click here for more details.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +109 -4
- {mdbq-4.1.11.dist-info → mdbq-4.1.12.dist-info}/METADATA +1 -1
- {mdbq-4.1.11.dist-info → mdbq-4.1.12.dist-info}/RECORD +6 -6
- {mdbq-4.1.11.dist-info → mdbq-4.1.12.dist-info}/WHEEL +0 -0
- {mdbq-4.1.11.dist-info → mdbq-4.1.12.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = '4.1.
|
|
1
|
+
VERSION = '4.1.12'
|
mdbq/mysql/uploader.py
CHANGED
|
@@ -1219,12 +1219,17 @@ class MySQLUploader:
|
|
|
1219
1219
|
set_typ: Dict[str, str],
|
|
1220
1220
|
allow_null: bool = False,
|
|
1221
1221
|
db_name: str = None,
|
|
1222
|
-
table_name: str = None,
|
|
1222
|
+
table_name: str = None,
|
|
1223
|
+
auto_timestamps: bool = False
|
|
1223
1224
|
) -> Tuple[List[Dict], Dict[str, str]]:
|
|
1224
1225
|
"""
|
|
1225
1226
|
准备要上传的数据,验证并转换数据类型
|
|
1226
1227
|
根据set_typ自动处理所有数据类型的列:补齐缺失的列并丢弃多余的列
|
|
1227
1228
|
"""
|
|
1229
|
+
# 处理自动时间戳功能
|
|
1230
|
+
if auto_timestamps:
|
|
1231
|
+
data, set_typ = self._process_auto_timestamps(data, set_typ, db_name, table_name)
|
|
1232
|
+
|
|
1228
1233
|
# set_typ的键清洗
|
|
1229
1234
|
if not set_typ:
|
|
1230
1235
|
set_typ = {}
|
|
@@ -1416,7 +1421,8 @@ class MySQLUploader:
|
|
|
1416
1421
|
indexes: Optional[List[str]] = None,
|
|
1417
1422
|
update_on_duplicate: bool = False,
|
|
1418
1423
|
transaction_mode: str = "batch",
|
|
1419
|
-
unique_keys: Optional[List[List[str]]] = None
|
|
1424
|
+
unique_keys: Optional[List[List[str]]] = None,
|
|
1425
|
+
auto_timestamps: bool = False
|
|
1420
1426
|
):
|
|
1421
1427
|
"""
|
|
1422
1428
|
上传数据到数据库的主入口方法
|
|
@@ -1439,6 +1445,7 @@ class MySQLUploader:
|
|
|
1439
1445
|
- 'batch' : 整批提交事务(性能最优)
|
|
1440
1446
|
- 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
|
|
1441
1447
|
:param unique_keys: 唯一约束列表,每个元素为列名列表,支持多列组合唯一约束。格式:[['col1', 'col2'], ['col3']] 或 None
|
|
1448
|
+
:param auto_timestamps: 是否自动添加创建时间和更新时间列,默认为False。启用后会自动添加'创建时间'和'更新时间'两列
|
|
1442
1449
|
:raises: 可能抛出各种验证和数据库相关异常
|
|
1443
1450
|
|
|
1444
1451
|
---
|
|
@@ -1483,6 +1490,17 @@ class MySQLUploader:
|
|
|
1483
1490
|
- 只要 update_on_duplicate=True 且表存在唯一约束(如 unique_keys),无论 check_duplicate 是否为 True,都会更新旧数据(即 ON DUPLICATE KEY UPDATE 生效)。
|
|
1484
1491
|
- 如需"覆盖"行为,务必设置 update_on_duplicate=True,不管 check_duplicate 是否为 True。
|
|
1485
1492
|
- 如需"跳过"行为,设置 update_on_duplicate=False 即可。
|
|
1493
|
+
|
|
1494
|
+
---
|
|
1495
|
+
auto_timestamps 参数:
|
|
1496
|
+
|
|
1497
|
+
- 当 auto_timestamps=True 时,系统会自动添加'创建时间'和'更新时间'两列
|
|
1498
|
+
- 如果原始数据中已存在这两列,系统会先移除原始数据中的这些列,然后添加新的时间戳
|
|
1499
|
+
- '创建时间':记录数据首次插入的时间,使用当前时间戳
|
|
1500
|
+
- '更新时间':记录数据最后更新的时间,插入时与创建时间相同,更新时会自动更新为当前时间
|
|
1501
|
+
- 时间戳列的数据类型为 DATETIME,格式为 'YYYY-MM-DD HH:MM:SS'
|
|
1502
|
+
- 这两列会自动添加到 set_typ 中,无需手动指定
|
|
1503
|
+
- 建议在需要审计数据变更历史的表中启用此功能
|
|
1486
1504
|
"""
|
|
1487
1505
|
# upload_start = time.time()
|
|
1488
1506
|
# 检查data参数是否为None
|
|
@@ -1492,7 +1510,7 @@ class MySQLUploader:
|
|
|
1492
1510
|
'表': table_name,
|
|
1493
1511
|
})
|
|
1494
1512
|
raise ValueError("data参数不能为None,请传入有效的数据")
|
|
1495
|
-
|
|
1513
|
+
|
|
1496
1514
|
if isinstance(data, list) or (hasattr(data, 'shape') and hasattr(data, '__len__')):
|
|
1497
1515
|
initial_row_count = len(data)
|
|
1498
1516
|
else:
|
|
@@ -1553,7 +1571,7 @@ class MySQLUploader:
|
|
|
1553
1571
|
raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
|
|
1554
1572
|
|
|
1555
1573
|
# 准备数据
|
|
1556
|
-
prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null, db_name, table_name)
|
|
1574
|
+
prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null, db_name, table_name, auto_timestamps)
|
|
1557
1575
|
|
|
1558
1576
|
# 检查数据库是否存在
|
|
1559
1577
|
if not self._check_database_exists(db_name):
|
|
@@ -2577,6 +2595,93 @@ class MySQLUploader:
|
|
|
2577
2595
|
|
|
2578
2596
|
return result_df
|
|
2579
2597
|
|
|
2598
|
+
def _process_auto_timestamps(
|
|
2599
|
+
self,
|
|
2600
|
+
data: Union[Dict, List[Dict], pd.DataFrame],
|
|
2601
|
+
set_typ: Dict[str, str],
|
|
2602
|
+
db_name: str,
|
|
2603
|
+
table_name: str
|
|
2604
|
+
) -> Tuple[Union[Dict, List[Dict], pd.DataFrame], Dict[str, str]]:
|
|
2605
|
+
"""
|
|
2606
|
+
处理自动时间戳功能
|
|
2607
|
+
|
|
2608
|
+
:param data: 原始数据
|
|
2609
|
+
:param set_typ: 列类型定义
|
|
2610
|
+
:param db_name: 数据库名
|
|
2611
|
+
:param table_name: 表名
|
|
2612
|
+
:return: 处理后的数据和更新后的set_typ
|
|
2613
|
+
"""
|
|
2614
|
+
|
|
2615
|
+
# 定义时间戳列名
|
|
2616
|
+
created_col = '创建时间'
|
|
2617
|
+
updated_col = '更新时间'
|
|
2618
|
+
current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
2619
|
+
|
|
2620
|
+
# 复制set_typ以避免修改原始对象
|
|
2621
|
+
updated_set_typ = set_typ.copy()
|
|
2622
|
+
|
|
2623
|
+
# 添加时间戳列到set_typ
|
|
2624
|
+
updated_set_typ[created_col] = 'DATETIME'
|
|
2625
|
+
updated_set_typ[updated_col] = 'DATETIME'
|
|
2626
|
+
|
|
2627
|
+
# 处理DataFrame格式的数据
|
|
2628
|
+
if hasattr(data, 'shape') and hasattr(data, 'columns'):
|
|
2629
|
+
import pandas as pd
|
|
2630
|
+
df = data.copy()
|
|
2631
|
+
|
|
2632
|
+
# 移除原始数据中可能存在的时间戳列
|
|
2633
|
+
columns_to_remove = []
|
|
2634
|
+
for col in df.columns:
|
|
2635
|
+
if col in [created_col, updated_col]:
|
|
2636
|
+
columns_to_remove.append(col)
|
|
2637
|
+
logger.warning('移除原始数据中的时间戳列', {
|
|
2638
|
+
'库': db_name,
|
|
2639
|
+
'表': table_name,
|
|
2640
|
+
'列': col,
|
|
2641
|
+
'原因': '与自动时间戳功能冲突'
|
|
2642
|
+
})
|
|
2643
|
+
|
|
2644
|
+
if columns_to_remove:
|
|
2645
|
+
df = df.drop(columns=columns_to_remove)
|
|
2646
|
+
|
|
2647
|
+
# 添加时间戳列
|
|
2648
|
+
df[created_col] = current_time
|
|
2649
|
+
df[updated_col] = current_time
|
|
2650
|
+
|
|
2651
|
+
return df, updated_set_typ
|
|
2652
|
+
|
|
2653
|
+
# 处理字典或字典列表格式的数据
|
|
2654
|
+
else:
|
|
2655
|
+
# 确保data是列表格式
|
|
2656
|
+
if isinstance(data, dict):
|
|
2657
|
+
data_list = [data]
|
|
2658
|
+
is_single_dict = True
|
|
2659
|
+
else:
|
|
2660
|
+
data_list = data
|
|
2661
|
+
is_single_dict = False
|
|
2662
|
+
|
|
2663
|
+
# 处理每一行数据
|
|
2664
|
+
processed_data = []
|
|
2665
|
+
for row in data_list:
|
|
2666
|
+
new_row = {}
|
|
2667
|
+
|
|
2668
|
+
# 复制原始数据,但跳过可能存在的时间戳列
|
|
2669
|
+
for key, value in row.items():
|
|
2670
|
+
if key not in [created_col, updated_col]:
|
|
2671
|
+
new_row[key] = value
|
|
2672
|
+
|
|
2673
|
+
# 添加时间戳
|
|
2674
|
+
new_row[created_col] = current_time
|
|
2675
|
+
new_row[updated_col] = current_time
|
|
2676
|
+
|
|
2677
|
+
processed_data.append(new_row)
|
|
2678
|
+
|
|
2679
|
+
# 如果原始数据是单个字典,返回单个字典
|
|
2680
|
+
if is_single_dict:
|
|
2681
|
+
return processed_data[0], updated_set_typ
|
|
2682
|
+
else:
|
|
2683
|
+
return processed_data, updated_set_typ
|
|
2684
|
+
|
|
2580
2685
|
|
|
2581
2686
|
def main():
|
|
2582
2687
|
dir_path = os.path.expanduser("~")
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
|
2
|
-
mdbq/__version__.py,sha256=
|
|
2
|
+
mdbq/__version__.py,sha256=IFiIUlZSAqjKo9qfpz69wxp6zBBmSIhmlxQDUZwjhNE,18
|
|
3
3
|
mdbq/auth/__init__.py,sha256=pnPMAt63sh1B6kEvmutUuro46zVf2v2YDAG7q-jV_To,24
|
|
4
4
|
mdbq/auth/auth_backend.py,sha256=iLN7AqiSq7fQgFtNtge_TIlVOR1hrCSZXH6oId6uGX4,116924
|
|
5
5
|
mdbq/auth/crypto.py,sha256=fcZRFCnrKVVdWDUx_zds51ynFYwS9DBvJOrRQVldrfM,15931
|
|
@@ -15,7 +15,7 @@ mdbq/mysql/deduplicator.py,sha256=2fugLyKs_xkvYvoG0C0hRYbJ_w8-4oa1FJ_vavoD7Qo,73
|
|
|
15
15
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
|
16
16
|
mdbq/mysql/s_query.py,sha256=N2xHJf2CiUXjXIVBemdst-wamIP3908EGAJOFG13fCU,50475
|
|
17
17
|
mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
|
|
18
|
-
mdbq/mysql/uploader.py,sha256=
|
|
18
|
+
mdbq/mysql/uploader.py,sha256=7LIWclzGFWYzrt46NTJn34LJZJaz9eWiV6fIj6-ZK7k,130288
|
|
19
19
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
20
20
|
mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
|
|
21
21
|
mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
|
|
@@ -35,7 +35,7 @@ mdbq/route/routes.py,sha256=QVGfTvDgu0CpcKCvk1ra74H8uojgqTLUav1fnVAqLEA,29433
|
|
|
35
35
|
mdbq/selenium/__init__.py,sha256=AKzeEceqZyvqn2dEDoJSzDQnbuENkJSHAlbHAD0u0ZI,10
|
|
36
36
|
mdbq/selenium/get_driver.py,sha256=1NTlVUE6QsyjTrVVVqTO2LOnYf578ccFWlWnvIXGtic,20903
|
|
37
37
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
|
38
|
-
mdbq-4.1.
|
|
39
|
-
mdbq-4.1.
|
|
40
|
-
mdbq-4.1.
|
|
41
|
-
mdbq-4.1.
|
|
38
|
+
mdbq-4.1.12.dist-info/METADATA,sha256=EI6IFZT65_tpgdTRxL9AdN5JJBKdMX4vIiMztPSKOR8,364
|
|
39
|
+
mdbq-4.1.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
mdbq-4.1.12.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
|
41
|
+
mdbq-4.1.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|