mdbq 4.1.11__tar.gz → 4.1.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mdbq might be problematic. Click here for more details.
- {mdbq-4.1.11 → mdbq-4.1.13}/PKG-INFO +1 -1
- mdbq-4.1.13/mdbq/__version__.py +1 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/deduplicator.py +7 -7
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/uploader.py +314 -104
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq.egg-info/PKG-INFO +1 -1
- mdbq-4.1.11/mdbq/__version__.py +0 -1
- {mdbq-4.1.11 → mdbq-4.1.13}/README.txt +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/auth/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/auth/auth_backend.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/auth/crypto.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/auth/rate_limiter.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/js/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/js/jc.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/log/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/log/mylogger.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/myconf/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/myconf/myconf.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/mysql.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/s_query.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/unique_.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/download_sku_picture.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/error_handler.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/otk.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/pov_city.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/ua_sj.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/pbix/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/redis/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/redis/getredis.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/redis/redis_cache.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/route/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/route/analytics.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/route/monitor.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/route/routes.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/selenium/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/selenium/get_driver.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/spider/__init__.py +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/setup.cfg +0 -0
- {mdbq-4.1.11 → mdbq-4.1.13}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = '4.1.13'
|
|
@@ -1364,12 +1364,12 @@ def main():
|
|
|
1364
1364
|
skip_system_dbs=True,
|
|
1365
1365
|
max_retries=3,
|
|
1366
1366
|
retry_waiting_time=5,
|
|
1367
|
-
pool_size=
|
|
1368
|
-
mincached=
|
|
1369
|
-
maxcached=
|
|
1367
|
+
pool_size=10,
|
|
1368
|
+
mincached=2,
|
|
1369
|
+
maxcached=5,
|
|
1370
1370
|
# recent_month=1,
|
|
1371
1371
|
# date_range=['2025-06-09', '2025-06-10'],
|
|
1372
|
-
exclude_columns=['更新时间'],
|
|
1372
|
+
exclude_columns=['创建时间', '更新时间'],
|
|
1373
1373
|
exclude_databases=['cookie文件', '日志', '视频数据', '云电影'],
|
|
1374
1374
|
# exclude_tables={
|
|
1375
1375
|
# '推广数据2': [
|
|
@@ -1391,9 +1391,9 @@ def main():
|
|
|
1391
1391
|
|
|
1392
1392
|
# # 指定表去重(使用特定列)
|
|
1393
1393
|
deduplicator.deduplicate_table(
|
|
1394
|
-
'
|
|
1395
|
-
'
|
|
1396
|
-
columns=['
|
|
1394
|
+
'推广数据_奥莱店',
|
|
1395
|
+
'主体报表_2025',
|
|
1396
|
+
columns=['日期', '店铺名称', '场景id', '计划id', '主体id'],
|
|
1397
1397
|
dry_run=False,
|
|
1398
1398
|
reorder_id=True,
|
|
1399
1399
|
)
|
|
@@ -435,15 +435,36 @@ class MySQLUploader:
|
|
|
435
435
|
if not primary_keys:
|
|
436
436
|
column_defs.append("`id` INT NOT NULL AUTO_INCREMENT")
|
|
437
437
|
|
|
438
|
-
#
|
|
438
|
+
# 添加其他列,确保时间戳字段按正确顺序添加
|
|
439
|
+
timestamp_cols = ['创建时间', '更新时间']
|
|
440
|
+
regular_cols = []
|
|
441
|
+
timestamp_defs = []
|
|
442
|
+
|
|
443
|
+
# 先处理非时间戳字段
|
|
439
444
|
for col_name, col_type in set_typ.items():
|
|
440
445
|
if col_name == 'id':
|
|
441
446
|
continue
|
|
447
|
+
if col_name in timestamp_cols:
|
|
448
|
+
continue # 时间戳字段稍后按顺序处理
|
|
449
|
+
|
|
442
450
|
safe_col_name = self._normalize_col(col_name)
|
|
443
451
|
col_def = f"`{safe_col_name}` {col_type}"
|
|
444
|
-
if not allow_null and not col_type.lower().startswith('json'):
|
|
452
|
+
if not allow_null and not col_type.lower().startswith('json') and not col_type.lower().startswith('timestamp'):
|
|
445
453
|
col_def += " NOT NULL"
|
|
446
|
-
|
|
454
|
+
regular_cols.append(col_def)
|
|
455
|
+
|
|
456
|
+
# 按固定顺序添加时间戳字段
|
|
457
|
+
for timestamp_col in timestamp_cols:
|
|
458
|
+
if timestamp_col in set_typ:
|
|
459
|
+
safe_col_name = self._normalize_col(timestamp_col)
|
|
460
|
+
col_type = set_typ[timestamp_col]
|
|
461
|
+
col_def = f"`{safe_col_name}` {col_type}"
|
|
462
|
+
# TIMESTAMP字段不需要额外的NOT NULL,因为已经包含在类型定义中
|
|
463
|
+
timestamp_defs.append(col_def)
|
|
464
|
+
|
|
465
|
+
# 合并所有列定义:常规字段 + 时间戳字段
|
|
466
|
+
column_defs.extend(regular_cols)
|
|
467
|
+
column_defs.extend(timestamp_defs)
|
|
447
468
|
|
|
448
469
|
# 主键处理逻辑调整
|
|
449
470
|
def _index_col_sql(col):
|
|
@@ -672,9 +693,9 @@ class MySQLUploader:
|
|
|
672
693
|
'decimal': 0.0,
|
|
673
694
|
'float': 0.0,
|
|
674
695
|
'double': 0.0,
|
|
675
|
-
'date': '
|
|
676
|
-
'datetime': '
|
|
677
|
-
'timestamp': '
|
|
696
|
+
'date': '2000-01-01',
|
|
697
|
+
'datetime': '2000-01-01 00:00:00',
|
|
698
|
+
'timestamp': '2000-01-01 00:00:00',
|
|
678
699
|
'json': '{}',
|
|
679
700
|
'varchar': 'none',
|
|
680
701
|
'text': 'none',
|
|
@@ -783,6 +804,12 @@ class MySQLUploader:
|
|
|
783
804
|
"""
|
|
784
805
|
column_type_lower = column_type.lower() if column_type else ''
|
|
785
806
|
|
|
807
|
+
# 对于包含CURRENT_TIMESTAMP的TIMESTAMP字段,跳过验证,让MySQL自动处理
|
|
808
|
+
if ('timestamp' in column_type_lower and 'current_timestamp' in column_type_lower and
|
|
809
|
+
col_name in ['创建时间', '更新时间']):
|
|
810
|
+
# 这些字段由MySQL自动处理,不需要传入值
|
|
811
|
+
return None
|
|
812
|
+
|
|
786
813
|
# 统一的空值检查(None、空字符串、NaN)
|
|
787
814
|
is_empty_value = False
|
|
788
815
|
if value is None:
|
|
@@ -1219,12 +1246,17 @@ class MySQLUploader:
|
|
|
1219
1246
|
set_typ: Dict[str, str],
|
|
1220
1247
|
allow_null: bool = False,
|
|
1221
1248
|
db_name: str = None,
|
|
1222
|
-
table_name: str = None,
|
|
1249
|
+
table_name: str = None,
|
|
1250
|
+
auto_timestamps: bool = False
|
|
1223
1251
|
) -> Tuple[List[Dict], Dict[str, str]]:
|
|
1224
1252
|
"""
|
|
1225
1253
|
准备要上传的数据,验证并转换数据类型
|
|
1226
1254
|
根据set_typ自动处理所有数据类型的列:补齐缺失的列并丢弃多余的列
|
|
1227
1255
|
"""
|
|
1256
|
+
# 处理自动时间戳功能
|
|
1257
|
+
if auto_timestamps:
|
|
1258
|
+
data, set_typ = self._process_auto_timestamps(data, set_typ, db_name, table_name)
|
|
1259
|
+
|
|
1228
1260
|
# set_typ的键清洗
|
|
1229
1261
|
if not set_typ:
|
|
1230
1262
|
set_typ = {}
|
|
@@ -1330,50 +1362,79 @@ class MySQLUploader:
|
|
|
1330
1362
|
# 跳过id列,不允许外部传入id
|
|
1331
1363
|
if (self.case_sensitive and col_name == 'id') or (not self.case_sensitive and col_name.lower() == 'id'):
|
|
1332
1364
|
continue
|
|
1365
|
+
# 对于自动时间戳字段,使用特殊标记让MySQL使用DEFAULT值
|
|
1366
|
+
col_type_lower = filtered_set_typ[col_name].lower()
|
|
1367
|
+
is_auto_timestamp = ('timestamp' in col_type_lower and 'current_timestamp' in col_type_lower and
|
|
1368
|
+
col_name in ['创建时间', '更新时间'])
|
|
1369
|
+
|
|
1333
1370
|
if col_name not in row:
|
|
1334
1371
|
# 对于缺失的列,使用None作为默认值,在_validate_value中会根据allow_null和列类型进行进一步处理
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1372
|
+
if is_auto_timestamp:
|
|
1373
|
+
# 自动时间戳字段使用特殊标记
|
|
1374
|
+
prepared_row[col_name] = 'DEFAULT'
|
|
1375
|
+
else:
|
|
1376
|
+
try:
|
|
1377
|
+
prepared_row[col_name] = self._validate_value(None, filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
|
|
1378
|
+
except ValueError as e:
|
|
1379
|
+
if not allow_null:
|
|
1380
|
+
# 如果不允许空值但验证失败,尝试使用兜底值
|
|
1381
|
+
try:
|
|
1382
|
+
fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, None)
|
|
1383
|
+
if fallback_value is not None:
|
|
1384
|
+
prepared_row[col_name] = fallback_value
|
|
1385
|
+
logger.warning(f"行号:{row_idx} -> 缺失列: `{col_name}`, 使用兜底值: {fallback_value}", {'row': self._shorten_for_log(row)})
|
|
1386
|
+
else:
|
|
1387
|
+
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
|
|
1388
|
+
logger.error(error_msg, {'row': self._shorten_for_log(row)})
|
|
1389
|
+
raise ValueError(error_msg)
|
|
1390
|
+
except Exception:
|
|
1346
1391
|
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
|
|
1347
1392
|
logger.error(error_msg, {'row': self._shorten_for_log(row)})
|
|
1348
1393
|
raise ValueError(error_msg)
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
logger.error(error_msg, {'row': self._shorten_for_log(row)})
|
|
1352
|
-
raise ValueError(error_msg)
|
|
1353
|
-
else:
|
|
1354
|
-
prepared_row[col_name] = None
|
|
1394
|
+
else:
|
|
1395
|
+
prepared_row[col_name] = None
|
|
1355
1396
|
else:
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
#
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1397
|
+
if is_auto_timestamp:
|
|
1398
|
+
# 自动时间戳字段忽略用户传入的值,使用DEFAULT
|
|
1399
|
+
prepared_row[col_name] = 'DEFAULT'
|
|
1400
|
+
if row[col_name] is not None: # 如果用户传入了值,给出警告
|
|
1401
|
+
logger.warning('忽略自动时间戳字段的用户传入值', {
|
|
1402
|
+
'库': db_name,
|
|
1403
|
+
'表': table_name,
|
|
1404
|
+
'列': col_name,
|
|
1405
|
+
'用户值': row[col_name],
|
|
1406
|
+
'原因': '将使用MySQL CURRENT_TIMESTAMP'
|
|
1407
|
+
})
|
|
1408
|
+
else:
|
|
1409
|
+
try:
|
|
1410
|
+
prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
|
|
1411
|
+
except ValueError as e:
|
|
1412
|
+
# 如果数据验证失败,检查是否为空值且不允许空值,尝试使用兜底值
|
|
1413
|
+
original_value = row[col_name]
|
|
1414
|
+
is_empty_original = (original_value is None or
|
|
1415
|
+
original_value == '' or
|
|
1416
|
+
(not isinstance(original_value, (list, dict)) and
|
|
1417
|
+
pd.isna(original_value) if hasattr(pd, 'isna') else False))
|
|
1418
|
+
|
|
1419
|
+
if is_empty_original and not allow_null:
|
|
1420
|
+
try:
|
|
1421
|
+
fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, original_value)
|
|
1422
|
+
if fallback_value is not None:
|
|
1423
|
+
prepared_row[col_name] = fallback_value
|
|
1424
|
+
logger.warning(f"行:{row_idx}, 列:`{col_name}` -> 原值验证失败,使用兜底值: {fallback_value}", {
|
|
1425
|
+
'原值': original_value,
|
|
1426
|
+
'兜底值': fallback_value,
|
|
1427
|
+
'row': self._shorten_for_log(row)
|
|
1428
|
+
})
|
|
1429
|
+
else:
|
|
1430
|
+
logger.error('数据验证失败', {
|
|
1431
|
+
'列': col_name,
|
|
1432
|
+
'行': row_idx,
|
|
1433
|
+
'报错': str(e),
|
|
1434
|
+
'row': self._shorten_for_log(row),
|
|
1435
|
+
})
|
|
1436
|
+
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1437
|
+
except Exception:
|
|
1377
1438
|
logger.error('数据验证失败', {
|
|
1378
1439
|
'列': col_name,
|
|
1379
1440
|
'行': row_idx,
|
|
@@ -1381,7 +1442,7 @@ class MySQLUploader:
|
|
|
1381
1442
|
'row': self._shorten_for_log(row),
|
|
1382
1443
|
})
|
|
1383
1444
|
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1384
|
-
|
|
1445
|
+
else:
|
|
1385
1446
|
logger.error('数据验证失败', {
|
|
1386
1447
|
'列': col_name,
|
|
1387
1448
|
'行': row_idx,
|
|
@@ -1389,15 +1450,7 @@ class MySQLUploader:
|
|
|
1389
1450
|
'row': self._shorten_for_log(row),
|
|
1390
1451
|
})
|
|
1391
1452
|
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1392
|
-
|
|
1393
|
-
logger.error('数据验证失败', {
|
|
1394
|
-
'列': col_name,
|
|
1395
|
-
'行': row_idx,
|
|
1396
|
-
'报错': str(e),
|
|
1397
|
-
'row': self._shorten_for_log(row),
|
|
1398
|
-
})
|
|
1399
|
-
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1400
|
-
prepared_data.append(prepared_row)
|
|
1453
|
+
prepared_data.append(prepared_row)
|
|
1401
1454
|
return prepared_data, filtered_set_typ
|
|
1402
1455
|
|
|
1403
1456
|
def upload_data(
|
|
@@ -1416,7 +1469,8 @@ class MySQLUploader:
|
|
|
1416
1469
|
indexes: Optional[List[str]] = None,
|
|
1417
1470
|
update_on_duplicate: bool = False,
|
|
1418
1471
|
transaction_mode: str = "batch",
|
|
1419
|
-
unique_keys: Optional[List[List[str]]] = None
|
|
1472
|
+
unique_keys: Optional[List[List[str]]] = None,
|
|
1473
|
+
auto_timestamps: bool = False
|
|
1420
1474
|
):
|
|
1421
1475
|
"""
|
|
1422
1476
|
上传数据到数据库的主入口方法
|
|
@@ -1439,6 +1493,7 @@ class MySQLUploader:
|
|
|
1439
1493
|
- 'batch' : 整批提交事务(性能最优)
|
|
1440
1494
|
- 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
|
|
1441
1495
|
:param unique_keys: 唯一约束列表,每个元素为列名列表,支持多列组合唯一约束。格式:[['col1', 'col2'], ['col3']] 或 None
|
|
1496
|
+
:param auto_timestamps: 是否自动添加创建时间和更新时间列,默认为False。启用后会自动添加'创建时间'和'更新时间'两列
|
|
1442
1497
|
:raises: 可能抛出各种验证和数据库相关异常
|
|
1443
1498
|
|
|
1444
1499
|
---
|
|
@@ -1483,6 +1538,17 @@ class MySQLUploader:
|
|
|
1483
1538
|
- 只要 update_on_duplicate=True 且表存在唯一约束(如 unique_keys),无论 check_duplicate 是否为 True,都会更新旧数据(即 ON DUPLICATE KEY UPDATE 生效)。
|
|
1484
1539
|
- 如需"覆盖"行为,务必设置 update_on_duplicate=True,不管 check_duplicate 是否为 True。
|
|
1485
1540
|
- 如需"跳过"行为,设置 update_on_duplicate=False 即可。
|
|
1541
|
+
|
|
1542
|
+
---
|
|
1543
|
+
auto_timestamps 参数:
|
|
1544
|
+
|
|
1545
|
+
- 当 auto_timestamps=True 时,系统会自动添加'创建时间'和'更新时间'两列
|
|
1546
|
+
- 如果原始数据中已存在这两列,系统会先移除原始数据中的这些列,然后添加新的时间戳
|
|
1547
|
+
- '创建时间':记录数据首次插入的时间,使用当前时间戳
|
|
1548
|
+
- '更新时间':记录数据最后更新的时间,插入时与创建时间相同,更新时会自动更新为当前时间
|
|
1549
|
+
- 时间戳列的数据类型为 DATETIME,格式为 'YYYY-MM-DD HH:MM:SS'
|
|
1550
|
+
- 这两列会自动添加到 set_typ 中,无需手动指定
|
|
1551
|
+
- 建议在需要审计数据变更历史的表中启用此功能
|
|
1486
1552
|
"""
|
|
1487
1553
|
# upload_start = time.time()
|
|
1488
1554
|
# 检查data参数是否为None
|
|
@@ -1492,7 +1558,7 @@ class MySQLUploader:
|
|
|
1492
1558
|
'表': table_name,
|
|
1493
1559
|
})
|
|
1494
1560
|
raise ValueError("data参数不能为None,请传入有效的数据")
|
|
1495
|
-
|
|
1561
|
+
|
|
1496
1562
|
if isinstance(data, list) or (hasattr(data, 'shape') and hasattr(data, '__len__')):
|
|
1497
1563
|
initial_row_count = len(data)
|
|
1498
1564
|
else:
|
|
@@ -1553,7 +1619,7 @@ class MySQLUploader:
|
|
|
1553
1619
|
raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
|
|
1554
1620
|
|
|
1555
1621
|
# 准备数据
|
|
1556
|
-
prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null, db_name, table_name)
|
|
1622
|
+
prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null, db_name, table_name, auto_timestamps)
|
|
1557
1623
|
|
|
1558
1624
|
# 检查数据库是否存在
|
|
1559
1625
|
if not self._check_database_exists(db_name):
|
|
@@ -1890,6 +1956,49 @@ class MySQLUploader:
|
|
|
1890
1956
|
return str(value)
|
|
1891
1957
|
return value
|
|
1892
1958
|
|
|
1959
|
+
def execute_single_row_with_defaults(row):
|
|
1960
|
+
"""处理单行插入,支持DEFAULT字段"""
|
|
1961
|
+
has_defaults = any(row.get(col) == 'DEFAULT' for col in all_columns)
|
|
1962
|
+
|
|
1963
|
+
if has_defaults:
|
|
1964
|
+
# 分离普通字段和DEFAULT字段
|
|
1965
|
+
regular_columns = []
|
|
1966
|
+
regular_values = []
|
|
1967
|
+
default_columns = []
|
|
1968
|
+
|
|
1969
|
+
for col in all_columns:
|
|
1970
|
+
val = row.get(col)
|
|
1971
|
+
if val == 'DEFAULT':
|
|
1972
|
+
default_columns.append(col)
|
|
1973
|
+
else:
|
|
1974
|
+
regular_columns.append(col)
|
|
1975
|
+
regular_values.append(ensure_basic_type(val))
|
|
1976
|
+
|
|
1977
|
+
# 构建INSERT ... SET语句
|
|
1978
|
+
set_clauses = []
|
|
1979
|
+
for col in regular_columns:
|
|
1980
|
+
set_clauses.append(f"`{self._validate_identifier(col)}` = %s")
|
|
1981
|
+
for col in default_columns:
|
|
1982
|
+
set_clauses.append(f"`{self._validate_identifier(col)}` = DEFAULT")
|
|
1983
|
+
|
|
1984
|
+
if set_clauses:
|
|
1985
|
+
dynamic_sql = f"INSERT INTO `{db_name}`.`{table_name}` SET {', '.join(set_clauses)}"
|
|
1986
|
+
if update_on_duplicate and regular_columns:
|
|
1987
|
+
update_clauses = [f"`{self._validate_identifier(col)}` = VALUES(`{self._validate_identifier(col)}`)" for col in regular_columns]
|
|
1988
|
+
if update_clauses:
|
|
1989
|
+
dynamic_sql += f" ON DUPLICATE KEY UPDATE {', '.join(update_clauses)}"
|
|
1990
|
+
|
|
1991
|
+
cursor.execute(dynamic_sql, regular_values)
|
|
1992
|
+
return cursor.rowcount if cursor.rowcount is not None else 0
|
|
1993
|
+
else:
|
|
1994
|
+
# 没有DEFAULT字段,使用原有逻辑
|
|
1995
|
+
values = [ensure_basic_type(row.get(col)) for col in all_columns]
|
|
1996
|
+
if check_duplicate and not update_on_duplicate:
|
|
1997
|
+
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
|
1998
|
+
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
|
1999
|
+
cursor.execute(sql, values)
|
|
2000
|
+
return cursor.rowcount if cursor.rowcount is not None else 0
|
|
2001
|
+
|
|
1893
2002
|
batch_size = get_optimal_batch_size(len(data))
|
|
1894
2003
|
all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
|
|
1895
2004
|
total_inserted = 0
|
|
@@ -1900,50 +2009,72 @@ class MySQLUploader:
|
|
|
1900
2009
|
if transaction_mode == 'batch':
|
|
1901
2010
|
for i in range(0, len(data), batch_size):
|
|
1902
2011
|
batch = data[i:i + batch_size]
|
|
1903
|
-
|
|
1904
|
-
for row in batch
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
2012
|
+
# 检查是否有DEFAULT字段,如果有则需要特殊处理
|
|
2013
|
+
has_default_fields = any(row.get(col) == 'DEFAULT' for row in batch for col in all_columns)
|
|
2014
|
+
|
|
2015
|
+
if has_default_fields:
|
|
2016
|
+
# 对于包含DEFAULT字段的情况,逐行处理
|
|
2017
|
+
for row in batch:
|
|
2018
|
+
try:
|
|
2019
|
+
affected = execute_single_row_with_defaults(row)
|
|
2020
|
+
if update_on_duplicate:
|
|
2021
|
+
total_inserted += 1
|
|
2022
|
+
else:
|
|
2023
|
+
if affected > 0:
|
|
2024
|
+
total_inserted += 1
|
|
2025
|
+
else:
|
|
2026
|
+
total_skipped += 1
|
|
2027
|
+
except pymysql.err.IntegrityError:
|
|
2028
|
+
total_skipped += 1
|
|
2029
|
+
except Exception as e:
|
|
2030
|
+
total_failed += 1
|
|
2031
|
+
logger.error('单行插入失败', {
|
|
2032
|
+
'库': db_name,
|
|
2033
|
+
'表': table_name,
|
|
2034
|
+
'错误': str(e)
|
|
2035
|
+
})
|
|
1912
2036
|
conn.commit()
|
|
1913
|
-
|
|
1914
|
-
#
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
2037
|
+
else:
|
|
2038
|
+
# 没有DEFAULT字段,使用原有逻辑
|
|
2039
|
+
values_list = []
|
|
2040
|
+
for row in batch:
|
|
2041
|
+
values = [ensure_basic_type(row.get(col)) for col in all_columns]
|
|
2042
|
+
if check_duplicate and not update_on_duplicate:
|
|
2043
|
+
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
|
2044
|
+
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
|
2045
|
+
values_list.append(values)
|
|
2046
|
+
try:
|
|
2047
|
+
cursor.executemany(sql, values_list)
|
|
2048
|
+
conn.commit()
|
|
2049
|
+
# 在batch模式下,affected_rows表示实际影响的行数
|
|
2050
|
+
# 如果update_on_duplicate为True,则affected_rows包含更新的行数
|
|
2051
|
+
# 如果update_on_duplicate为False,则affected_rows只包含插入的行数
|
|
2052
|
+
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
|
2053
|
+
if update_on_duplicate:
|
|
2054
|
+
# 当启用更新时,affected_rows包含插入和更新的行数
|
|
2055
|
+
# 我们需要区分插入和更新的行数
|
|
2056
|
+
# 由于无法准确区分,我们假设所有行都是插入的
|
|
2057
|
+
total_inserted += len(batch)
|
|
2058
|
+
else:
|
|
2059
|
+
# 当不启用更新时,affected_rows只包含插入的行数
|
|
2060
|
+
total_inserted += affected
|
|
2061
|
+
total_skipped += len(batch) - affected
|
|
2062
|
+
except pymysql.err.IntegrityError as e:
|
|
2063
|
+
conn.rollback()
|
|
2064
|
+
# 在唯一约束冲突时,所有行都被跳过
|
|
2065
|
+
total_skipped += len(batch)
|
|
2066
|
+
logger.debug('批量插入唯一约束冲突,全部跳过', {'库': db_name, '表': table_name, '错误': str(e)})
|
|
2067
|
+
except Exception as e:
|
|
2068
|
+
conn.rollback()
|
|
2069
|
+
total_failed += len(batch)
|
|
2070
|
+
logger.error('批量插入失败', {'库': db_name, '表': table_name, '错误': str(e)})
|
|
1935
2071
|
elif transaction_mode == 'hybrid':
|
|
1936
2072
|
hybrid_n = 100 # 可配置
|
|
1937
2073
|
for i in range(0, len(data), hybrid_n):
|
|
1938
2074
|
batch = data[i:i + hybrid_n]
|
|
1939
2075
|
for row in batch:
|
|
1940
2076
|
try:
|
|
1941
|
-
|
|
1942
|
-
if check_duplicate and not update_on_duplicate:
|
|
1943
|
-
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
|
1944
|
-
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
|
1945
|
-
cursor.execute(sql, values)
|
|
1946
|
-
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
|
2077
|
+
affected = execute_single_row_with_defaults(row)
|
|
1947
2078
|
if update_on_duplicate:
|
|
1948
2079
|
# 当启用更新时,affected_rows包含插入和更新的行数
|
|
1949
2080
|
# 假设所有行都是插入的,因为无法区分插入和更新
|
|
@@ -1966,12 +2097,7 @@ class MySQLUploader:
|
|
|
1966
2097
|
else: # row模式
|
|
1967
2098
|
for row in data:
|
|
1968
2099
|
try:
|
|
1969
|
-
|
|
1970
|
-
if check_duplicate and not update_on_duplicate:
|
|
1971
|
-
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
|
1972
|
-
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
|
1973
|
-
cursor.execute(sql, values)
|
|
1974
|
-
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
|
2100
|
+
affected = execute_single_row_with_defaults(row)
|
|
1975
2101
|
if update_on_duplicate:
|
|
1976
2102
|
# 当启用更新时,affected_rows包含插入和更新的行数
|
|
1977
2103
|
# 假设所有行都是插入的,因为无法区分插入和更新
|
|
@@ -2191,14 +2317,22 @@ class MySQLUploader:
|
|
|
2191
2317
|
default_value = " DEFAULT 0.0"
|
|
2192
2318
|
elif any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
|
|
2193
2319
|
default_value = " DEFAULT 'none'"
|
|
2320
|
+
elif 'timestamp' in column_type_lower:
|
|
2321
|
+
# TIMESTAMP类型已经包含DEFAULT定义,不需要额外添加
|
|
2322
|
+
default_value = ""
|
|
2194
2323
|
elif 'date' in column_type_lower:
|
|
2195
|
-
if 'datetime' in column_type_lower
|
|
2196
|
-
default_value = " DEFAULT '
|
|
2324
|
+
if 'datetime' in column_type_lower:
|
|
2325
|
+
default_value = " DEFAULT '2000-01-01 00:00:00'"
|
|
2197
2326
|
else:
|
|
2198
|
-
default_value = " DEFAULT '
|
|
2327
|
+
default_value = " DEFAULT '2000-01-01'"
|
|
2199
2328
|
elif 'json' in column_type_lower:
|
|
2200
2329
|
default_value = " DEFAULT '{}'"
|
|
2201
2330
|
|
|
2331
|
+
# 对于TIMESTAMP类型,不添加额外的NULL约束,因为已经包含在类型定义中
|
|
2332
|
+
if 'timestamp' in column_type.lower() and ('default' in column_type.lower() or 'current_timestamp' in column_type.lower()):
|
|
2333
|
+
null_constraint = "" # TIMESTAMP类型已经包含完整定义
|
|
2334
|
+
default_value = ""
|
|
2335
|
+
|
|
2202
2336
|
sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD COLUMN `{column}` {column_type} {null_constraint}{default_value}'
|
|
2203
2337
|
|
|
2204
2338
|
conn = None
|
|
@@ -2577,6 +2711,82 @@ class MySQLUploader:
|
|
|
2577
2711
|
|
|
2578
2712
|
return result_df
|
|
2579
2713
|
|
|
2714
|
+
def _process_auto_timestamps(
|
|
2715
|
+
self,
|
|
2716
|
+
data: Union[Dict, List[Dict], pd.DataFrame],
|
|
2717
|
+
set_typ: Dict[str, str],
|
|
2718
|
+
db_name: str,
|
|
2719
|
+
table_name: str
|
|
2720
|
+
) -> Tuple[Union[Dict, List[Dict], pd.DataFrame], Dict[str, str]]:
|
|
2721
|
+
"""
|
|
2722
|
+
处理自动时间戳功能
|
|
2723
|
+
|
|
2724
|
+
:param data: 原始数据
|
|
2725
|
+
:param set_typ: 列类型定义
|
|
2726
|
+
:param db_name: 数据库名
|
|
2727
|
+
:param table_name: 表名
|
|
2728
|
+
:return: 处理后的数据和更新后的set_typ
|
|
2729
|
+
"""
|
|
2730
|
+
|
|
2731
|
+
# 定义时间戳列名
|
|
2732
|
+
created_col = '创建时间'
|
|
2733
|
+
updated_col = '更新时间'
|
|
2734
|
+
|
|
2735
|
+
# 复制set_typ以避免修改原始对象
|
|
2736
|
+
updated_set_typ = set_typ.copy()
|
|
2737
|
+
|
|
2738
|
+
# 使用MySQL的CURRENT_TIMESTAMP功能,按固定顺序添加时间戳列
|
|
2739
|
+
# 创建时间:插入时自动设置,更新时不变
|
|
2740
|
+
updated_set_typ[created_col] = 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP'
|
|
2741
|
+
# 更新时间:插入和更新时都自动设置为当前时间
|
|
2742
|
+
updated_set_typ[updated_col] = 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP'
|
|
2743
|
+
|
|
2744
|
+
# 处理DataFrame格式的数据
|
|
2745
|
+
if hasattr(data, 'shape') and hasattr(data, 'columns'):
|
|
2746
|
+
import pandas as pd
|
|
2747
|
+
df = data.copy()
|
|
2748
|
+
|
|
2749
|
+
# 移除原始数据中可能存在的时间戳列,让MySQL自动处理
|
|
2750
|
+
columns_to_remove = []
|
|
2751
|
+
for col in df.columns:
|
|
2752
|
+
if col in [created_col, updated_col]:
|
|
2753
|
+
columns_to_remove.append(col)
|
|
2754
|
+
|
|
2755
|
+
if columns_to_remove:
|
|
2756
|
+
df = df.drop(columns=columns_to_remove)
|
|
2757
|
+
|
|
2758
|
+
# 不再手动添加时间戳列,让MySQL的CURRENT_TIMESTAMP自动处理
|
|
2759
|
+
return df, updated_set_typ
|
|
2760
|
+
|
|
2761
|
+
# 处理字典或字典列表格式的数据
|
|
2762
|
+
else:
|
|
2763
|
+
# 确保data是列表格式
|
|
2764
|
+
if isinstance(data, dict):
|
|
2765
|
+
data_list = [data]
|
|
2766
|
+
is_single_dict = True
|
|
2767
|
+
else:
|
|
2768
|
+
data_list = data
|
|
2769
|
+
is_single_dict = False
|
|
2770
|
+
|
|
2771
|
+
# 处理每一行数据
|
|
2772
|
+
processed_data = []
|
|
2773
|
+
for row in data_list:
|
|
2774
|
+
new_row = {}
|
|
2775
|
+
|
|
2776
|
+
# 复制原始数据,但跳过可能存在的时间戳列
|
|
2777
|
+
for key, value in row.items():
|
|
2778
|
+
if key not in [created_col, updated_col]:
|
|
2779
|
+
new_row[key] = value
|
|
2780
|
+
|
|
2781
|
+
# 不再手动添加时间戳,让MySQL的CURRENT_TIMESTAMP自动处理
|
|
2782
|
+
processed_data.append(new_row)
|
|
2783
|
+
|
|
2784
|
+
# 如果原始数据是单个字典,返回单个字典
|
|
2785
|
+
if is_single_dict:
|
|
2786
|
+
return processed_data[0], updated_set_typ
|
|
2787
|
+
else:
|
|
2788
|
+
return processed_data, updated_set_typ
|
|
2789
|
+
|
|
2580
2790
|
|
|
2581
2791
|
def main():
|
|
2582
2792
|
dir_path = os.path.expanduser("~")
|
mdbq-4.1.11/mdbq/__version__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VERSION = '4.1.11'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|