mdbq 4.1.12__py3-none-any.whl → 4.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mdbq might be problematic. Click here for more details.
- mdbq/__version__.py +1 -1
- mdbq/mysql/deduplicator.py +7 -7
- mdbq/mysql/uploader.py +224 -119
- {mdbq-4.1.12.dist-info → mdbq-4.1.13.dist-info}/METADATA +1 -1
- {mdbq-4.1.12.dist-info → mdbq-4.1.13.dist-info}/RECORD +7 -7
- {mdbq-4.1.12.dist-info → mdbq-4.1.13.dist-info}/WHEEL +0 -0
- {mdbq-4.1.12.dist-info → mdbq-4.1.13.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = '4.1.
|
|
1
|
+
VERSION = '4.1.13'
|
mdbq/mysql/deduplicator.py
CHANGED
|
@@ -1364,12 +1364,12 @@ def main():
|
|
|
1364
1364
|
skip_system_dbs=True,
|
|
1365
1365
|
max_retries=3,
|
|
1366
1366
|
retry_waiting_time=5,
|
|
1367
|
-
pool_size=
|
|
1368
|
-
mincached=
|
|
1369
|
-
maxcached=
|
|
1367
|
+
pool_size=10,
|
|
1368
|
+
mincached=2,
|
|
1369
|
+
maxcached=5,
|
|
1370
1370
|
# recent_month=1,
|
|
1371
1371
|
# date_range=['2025-06-09', '2025-06-10'],
|
|
1372
|
-
exclude_columns=['更新时间'],
|
|
1372
|
+
exclude_columns=['创建时间', '更新时间'],
|
|
1373
1373
|
exclude_databases=['cookie文件', '日志', '视频数据', '云电影'],
|
|
1374
1374
|
# exclude_tables={
|
|
1375
1375
|
# '推广数据2': [
|
|
@@ -1391,9 +1391,9 @@ def main():
|
|
|
1391
1391
|
|
|
1392
1392
|
# # 指定表去重(使用特定列)
|
|
1393
1393
|
deduplicator.deduplicate_table(
|
|
1394
|
-
'
|
|
1395
|
-
'
|
|
1396
|
-
columns=['
|
|
1394
|
+
'推广数据_奥莱店',
|
|
1395
|
+
'主体报表_2025',
|
|
1396
|
+
columns=['日期', '店铺名称', '场景id', '计划id', '主体id'],
|
|
1397
1397
|
dry_run=False,
|
|
1398
1398
|
reorder_id=True,
|
|
1399
1399
|
)
|
mdbq/mysql/uploader.py
CHANGED
|
@@ -435,15 +435,36 @@ class MySQLUploader:
|
|
|
435
435
|
if not primary_keys:
|
|
436
436
|
column_defs.append("`id` INT NOT NULL AUTO_INCREMENT")
|
|
437
437
|
|
|
438
|
-
#
|
|
438
|
+
# 添加其他列,确保时间戳字段按正确顺序添加
|
|
439
|
+
timestamp_cols = ['创建时间', '更新时间']
|
|
440
|
+
regular_cols = []
|
|
441
|
+
timestamp_defs = []
|
|
442
|
+
|
|
443
|
+
# 先处理非时间戳字段
|
|
439
444
|
for col_name, col_type in set_typ.items():
|
|
440
445
|
if col_name == 'id':
|
|
441
446
|
continue
|
|
447
|
+
if col_name in timestamp_cols:
|
|
448
|
+
continue # 时间戳字段稍后按顺序处理
|
|
449
|
+
|
|
442
450
|
safe_col_name = self._normalize_col(col_name)
|
|
443
451
|
col_def = f"`{safe_col_name}` {col_type}"
|
|
444
|
-
if not allow_null and not col_type.lower().startswith('json'):
|
|
452
|
+
if not allow_null and not col_type.lower().startswith('json') and not col_type.lower().startswith('timestamp'):
|
|
445
453
|
col_def += " NOT NULL"
|
|
446
|
-
|
|
454
|
+
regular_cols.append(col_def)
|
|
455
|
+
|
|
456
|
+
# 按固定顺序添加时间戳字段
|
|
457
|
+
for timestamp_col in timestamp_cols:
|
|
458
|
+
if timestamp_col in set_typ:
|
|
459
|
+
safe_col_name = self._normalize_col(timestamp_col)
|
|
460
|
+
col_type = set_typ[timestamp_col]
|
|
461
|
+
col_def = f"`{safe_col_name}` {col_type}"
|
|
462
|
+
# TIMESTAMP字段不需要额外的NOT NULL,因为已经包含在类型定义中
|
|
463
|
+
timestamp_defs.append(col_def)
|
|
464
|
+
|
|
465
|
+
# 合并所有列定义:常规字段 + 时间戳字段
|
|
466
|
+
column_defs.extend(regular_cols)
|
|
467
|
+
column_defs.extend(timestamp_defs)
|
|
447
468
|
|
|
448
469
|
# 主键处理逻辑调整
|
|
449
470
|
def _index_col_sql(col):
|
|
@@ -672,9 +693,9 @@ class MySQLUploader:
|
|
|
672
693
|
'decimal': 0.0,
|
|
673
694
|
'float': 0.0,
|
|
674
695
|
'double': 0.0,
|
|
675
|
-
'date': '
|
|
676
|
-
'datetime': '
|
|
677
|
-
'timestamp': '
|
|
696
|
+
'date': '2000-01-01',
|
|
697
|
+
'datetime': '2000-01-01 00:00:00',
|
|
698
|
+
'timestamp': '2000-01-01 00:00:00',
|
|
678
699
|
'json': '{}',
|
|
679
700
|
'varchar': 'none',
|
|
680
701
|
'text': 'none',
|
|
@@ -783,6 +804,12 @@ class MySQLUploader:
|
|
|
783
804
|
"""
|
|
784
805
|
column_type_lower = column_type.lower() if column_type else ''
|
|
785
806
|
|
|
807
|
+
# 对于包含CURRENT_TIMESTAMP的TIMESTAMP字段,跳过验证,让MySQL自动处理
|
|
808
|
+
if ('timestamp' in column_type_lower and 'current_timestamp' in column_type_lower and
|
|
809
|
+
col_name in ['创建时间', '更新时间']):
|
|
810
|
+
# 这些字段由MySQL自动处理,不需要传入值
|
|
811
|
+
return None
|
|
812
|
+
|
|
786
813
|
# 统一的空值检查(None、空字符串、NaN)
|
|
787
814
|
is_empty_value = False
|
|
788
815
|
if value is None:
|
|
@@ -1335,50 +1362,79 @@ class MySQLUploader:
|
|
|
1335
1362
|
# 跳过id列,不允许外部传入id
|
|
1336
1363
|
if (self.case_sensitive and col_name == 'id') or (not self.case_sensitive and col_name.lower() == 'id'):
|
|
1337
1364
|
continue
|
|
1365
|
+
# 对于自动时间戳字段,使用特殊标记让MySQL使用DEFAULT值
|
|
1366
|
+
col_type_lower = filtered_set_typ[col_name].lower()
|
|
1367
|
+
is_auto_timestamp = ('timestamp' in col_type_lower and 'current_timestamp' in col_type_lower and
|
|
1368
|
+
col_name in ['创建时间', '更新时间'])
|
|
1369
|
+
|
|
1338
1370
|
if col_name not in row:
|
|
1339
1371
|
# 对于缺失的列,使用None作为默认值,在_validate_value中会根据allow_null和列类型进行进一步处理
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1372
|
+
if is_auto_timestamp:
|
|
1373
|
+
# 自动时间戳字段使用特殊标记
|
|
1374
|
+
prepared_row[col_name] = 'DEFAULT'
|
|
1375
|
+
else:
|
|
1376
|
+
try:
|
|
1377
|
+
prepared_row[col_name] = self._validate_value(None, filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
|
|
1378
|
+
except ValueError as e:
|
|
1379
|
+
if not allow_null:
|
|
1380
|
+
# 如果不允许空值但验证失败,尝试使用兜底值
|
|
1381
|
+
try:
|
|
1382
|
+
fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, None)
|
|
1383
|
+
if fallback_value is not None:
|
|
1384
|
+
prepared_row[col_name] = fallback_value
|
|
1385
|
+
logger.warning(f"行号:{row_idx} -> 缺失列: `{col_name}`, 使用兜底值: {fallback_value}", {'row': self._shorten_for_log(row)})
|
|
1386
|
+
else:
|
|
1387
|
+
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
|
|
1388
|
+
logger.error(error_msg, {'row': self._shorten_for_log(row)})
|
|
1389
|
+
raise ValueError(error_msg)
|
|
1390
|
+
except Exception:
|
|
1351
1391
|
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
|
|
1352
1392
|
logger.error(error_msg, {'row': self._shorten_for_log(row)})
|
|
1353
1393
|
raise ValueError(error_msg)
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
logger.error(error_msg, {'row': self._shorten_for_log(row)})
|
|
1357
|
-
raise ValueError(error_msg)
|
|
1358
|
-
else:
|
|
1359
|
-
prepared_row[col_name] = None
|
|
1394
|
+
else:
|
|
1395
|
+
prepared_row[col_name] = None
|
|
1360
1396
|
else:
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
#
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1397
|
+
if is_auto_timestamp:
|
|
1398
|
+
# 自动时间戳字段忽略用户传入的值,使用DEFAULT
|
|
1399
|
+
prepared_row[col_name] = 'DEFAULT'
|
|
1400
|
+
if row[col_name] is not None: # 如果用户传入了值,给出警告
|
|
1401
|
+
logger.warning('忽略自动时间戳字段的用户传入值', {
|
|
1402
|
+
'库': db_name,
|
|
1403
|
+
'表': table_name,
|
|
1404
|
+
'列': col_name,
|
|
1405
|
+
'用户值': row[col_name],
|
|
1406
|
+
'原因': '将使用MySQL CURRENT_TIMESTAMP'
|
|
1407
|
+
})
|
|
1408
|
+
else:
|
|
1409
|
+
try:
|
|
1410
|
+
prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
|
|
1411
|
+
except ValueError as e:
|
|
1412
|
+
# 如果数据验证失败,检查是否为空值且不允许空值,尝试使用兜底值
|
|
1413
|
+
original_value = row[col_name]
|
|
1414
|
+
is_empty_original = (original_value is None or
|
|
1415
|
+
original_value == '' or
|
|
1416
|
+
(not isinstance(original_value, (list, dict)) and
|
|
1417
|
+
pd.isna(original_value) if hasattr(pd, 'isna') else False))
|
|
1418
|
+
|
|
1419
|
+
if is_empty_original and not allow_null:
|
|
1420
|
+
try:
|
|
1421
|
+
fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, original_value)
|
|
1422
|
+
if fallback_value is not None:
|
|
1423
|
+
prepared_row[col_name] = fallback_value
|
|
1424
|
+
logger.warning(f"行:{row_idx}, 列:`{col_name}` -> 原值验证失败,使用兜底值: {fallback_value}", {
|
|
1425
|
+
'原值': original_value,
|
|
1426
|
+
'兜底值': fallback_value,
|
|
1427
|
+
'row': self._shorten_for_log(row)
|
|
1428
|
+
})
|
|
1429
|
+
else:
|
|
1430
|
+
logger.error('数据验证失败', {
|
|
1431
|
+
'列': col_name,
|
|
1432
|
+
'行': row_idx,
|
|
1433
|
+
'报错': str(e),
|
|
1434
|
+
'row': self._shorten_for_log(row),
|
|
1435
|
+
})
|
|
1436
|
+
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1437
|
+
except Exception:
|
|
1382
1438
|
logger.error('数据验证失败', {
|
|
1383
1439
|
'列': col_name,
|
|
1384
1440
|
'行': row_idx,
|
|
@@ -1386,7 +1442,7 @@ class MySQLUploader:
|
|
|
1386
1442
|
'row': self._shorten_for_log(row),
|
|
1387
1443
|
})
|
|
1388
1444
|
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1389
|
-
|
|
1445
|
+
else:
|
|
1390
1446
|
logger.error('数据验证失败', {
|
|
1391
1447
|
'列': col_name,
|
|
1392
1448
|
'行': row_idx,
|
|
@@ -1394,15 +1450,7 @@ class MySQLUploader:
|
|
|
1394
1450
|
'row': self._shorten_for_log(row),
|
|
1395
1451
|
})
|
|
1396
1452
|
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1397
|
-
|
|
1398
|
-
logger.error('数据验证失败', {
|
|
1399
|
-
'列': col_name,
|
|
1400
|
-
'行': row_idx,
|
|
1401
|
-
'报错': str(e),
|
|
1402
|
-
'row': self._shorten_for_log(row),
|
|
1403
|
-
})
|
|
1404
|
-
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1405
|
-
prepared_data.append(prepared_row)
|
|
1453
|
+
prepared_data.append(prepared_row)
|
|
1406
1454
|
return prepared_data, filtered_set_typ
|
|
1407
1455
|
|
|
1408
1456
|
def upload_data(
|
|
@@ -1908,6 +1956,49 @@ class MySQLUploader:
|
|
|
1908
1956
|
return str(value)
|
|
1909
1957
|
return value
|
|
1910
1958
|
|
|
1959
|
+
def execute_single_row_with_defaults(row):
|
|
1960
|
+
"""处理单行插入,支持DEFAULT字段"""
|
|
1961
|
+
has_defaults = any(row.get(col) == 'DEFAULT' for col in all_columns)
|
|
1962
|
+
|
|
1963
|
+
if has_defaults:
|
|
1964
|
+
# 分离普通字段和DEFAULT字段
|
|
1965
|
+
regular_columns = []
|
|
1966
|
+
regular_values = []
|
|
1967
|
+
default_columns = []
|
|
1968
|
+
|
|
1969
|
+
for col in all_columns:
|
|
1970
|
+
val = row.get(col)
|
|
1971
|
+
if val == 'DEFAULT':
|
|
1972
|
+
default_columns.append(col)
|
|
1973
|
+
else:
|
|
1974
|
+
regular_columns.append(col)
|
|
1975
|
+
regular_values.append(ensure_basic_type(val))
|
|
1976
|
+
|
|
1977
|
+
# 构建INSERT ... SET语句
|
|
1978
|
+
set_clauses = []
|
|
1979
|
+
for col in regular_columns:
|
|
1980
|
+
set_clauses.append(f"`{self._validate_identifier(col)}` = %s")
|
|
1981
|
+
for col in default_columns:
|
|
1982
|
+
set_clauses.append(f"`{self._validate_identifier(col)}` = DEFAULT")
|
|
1983
|
+
|
|
1984
|
+
if set_clauses:
|
|
1985
|
+
dynamic_sql = f"INSERT INTO `{db_name}`.`{table_name}` SET {', '.join(set_clauses)}"
|
|
1986
|
+
if update_on_duplicate and regular_columns:
|
|
1987
|
+
update_clauses = [f"`{self._validate_identifier(col)}` = VALUES(`{self._validate_identifier(col)}`)" for col in regular_columns]
|
|
1988
|
+
if update_clauses:
|
|
1989
|
+
dynamic_sql += f" ON DUPLICATE KEY UPDATE {', '.join(update_clauses)}"
|
|
1990
|
+
|
|
1991
|
+
cursor.execute(dynamic_sql, regular_values)
|
|
1992
|
+
return cursor.rowcount if cursor.rowcount is not None else 0
|
|
1993
|
+
else:
|
|
1994
|
+
# 没有DEFAULT字段,使用原有逻辑
|
|
1995
|
+
values = [ensure_basic_type(row.get(col)) for col in all_columns]
|
|
1996
|
+
if check_duplicate and not update_on_duplicate:
|
|
1997
|
+
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
|
1998
|
+
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
|
1999
|
+
cursor.execute(sql, values)
|
|
2000
|
+
return cursor.rowcount if cursor.rowcount is not None else 0
|
|
2001
|
+
|
|
1911
2002
|
batch_size = get_optimal_batch_size(len(data))
|
|
1912
2003
|
all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
|
|
1913
2004
|
total_inserted = 0
|
|
@@ -1918,50 +2009,72 @@ class MySQLUploader:
|
|
|
1918
2009
|
if transaction_mode == 'batch':
|
|
1919
2010
|
for i in range(0, len(data), batch_size):
|
|
1920
2011
|
batch = data[i:i + batch_size]
|
|
1921
|
-
|
|
1922
|
-
for row in batch
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
2012
|
+
# 检查是否有DEFAULT字段,如果有则需要特殊处理
|
|
2013
|
+
has_default_fields = any(row.get(col) == 'DEFAULT' for row in batch for col in all_columns)
|
|
2014
|
+
|
|
2015
|
+
if has_default_fields:
|
|
2016
|
+
# 对于包含DEFAULT字段的情况,逐行处理
|
|
2017
|
+
for row in batch:
|
|
2018
|
+
try:
|
|
2019
|
+
affected = execute_single_row_with_defaults(row)
|
|
2020
|
+
if update_on_duplicate:
|
|
2021
|
+
total_inserted += 1
|
|
2022
|
+
else:
|
|
2023
|
+
if affected > 0:
|
|
2024
|
+
total_inserted += 1
|
|
2025
|
+
else:
|
|
2026
|
+
total_skipped += 1
|
|
2027
|
+
except pymysql.err.IntegrityError:
|
|
2028
|
+
total_skipped += 1
|
|
2029
|
+
except Exception as e:
|
|
2030
|
+
total_failed += 1
|
|
2031
|
+
logger.error('单行插入失败', {
|
|
2032
|
+
'库': db_name,
|
|
2033
|
+
'表': table_name,
|
|
2034
|
+
'错误': str(e)
|
|
2035
|
+
})
|
|
1930
2036
|
conn.commit()
|
|
1931
|
-
|
|
1932
|
-
#
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
2037
|
+
else:
|
|
2038
|
+
# 没有DEFAULT字段,使用原有逻辑
|
|
2039
|
+
values_list = []
|
|
2040
|
+
for row in batch:
|
|
2041
|
+
values = [ensure_basic_type(row.get(col)) for col in all_columns]
|
|
2042
|
+
if check_duplicate and not update_on_duplicate:
|
|
2043
|
+
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
|
2044
|
+
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
|
2045
|
+
values_list.append(values)
|
|
2046
|
+
try:
|
|
2047
|
+
cursor.executemany(sql, values_list)
|
|
2048
|
+
conn.commit()
|
|
2049
|
+
# 在batch模式下,affected_rows表示实际影响的行数
|
|
2050
|
+
# 如果update_on_duplicate为True,则affected_rows包含更新的行数
|
|
2051
|
+
# 如果update_on_duplicate为False,则affected_rows只包含插入的行数
|
|
2052
|
+
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
|
2053
|
+
if update_on_duplicate:
|
|
2054
|
+
# 当启用更新时,affected_rows包含插入和更新的行数
|
|
2055
|
+
# 我们需要区分插入和更新的行数
|
|
2056
|
+
# 由于无法准确区分,我们假设所有行都是插入的
|
|
2057
|
+
total_inserted += len(batch)
|
|
2058
|
+
else:
|
|
2059
|
+
# 当不启用更新时,affected_rows只包含插入的行数
|
|
2060
|
+
total_inserted += affected
|
|
2061
|
+
total_skipped += len(batch) - affected
|
|
2062
|
+
except pymysql.err.IntegrityError as e:
|
|
2063
|
+
conn.rollback()
|
|
2064
|
+
# 在唯一约束冲突时,所有行都被跳过
|
|
2065
|
+
total_skipped += len(batch)
|
|
2066
|
+
logger.debug('批量插入唯一约束冲突,全部跳过', {'库': db_name, '表': table_name, '错误': str(e)})
|
|
2067
|
+
except Exception as e:
|
|
2068
|
+
conn.rollback()
|
|
2069
|
+
total_failed += len(batch)
|
|
2070
|
+
logger.error('批量插入失败', {'库': db_name, '表': table_name, '错误': str(e)})
|
|
1953
2071
|
elif transaction_mode == 'hybrid':
|
|
1954
2072
|
hybrid_n = 100 # 可配置
|
|
1955
2073
|
for i in range(0, len(data), hybrid_n):
|
|
1956
2074
|
batch = data[i:i + hybrid_n]
|
|
1957
2075
|
for row in batch:
|
|
1958
2076
|
try:
|
|
1959
|
-
|
|
1960
|
-
if check_duplicate and not update_on_duplicate:
|
|
1961
|
-
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
|
1962
|
-
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
|
1963
|
-
cursor.execute(sql, values)
|
|
1964
|
-
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
|
2077
|
+
affected = execute_single_row_with_defaults(row)
|
|
1965
2078
|
if update_on_duplicate:
|
|
1966
2079
|
# 当启用更新时,affected_rows包含插入和更新的行数
|
|
1967
2080
|
# 假设所有行都是插入的,因为无法区分插入和更新
|
|
@@ -1984,12 +2097,7 @@ class MySQLUploader:
|
|
|
1984
2097
|
else: # row模式
|
|
1985
2098
|
for row in data:
|
|
1986
2099
|
try:
|
|
1987
|
-
|
|
1988
|
-
if check_duplicate and not update_on_duplicate:
|
|
1989
|
-
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
|
1990
|
-
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
|
1991
|
-
cursor.execute(sql, values)
|
|
1992
|
-
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
|
2100
|
+
affected = execute_single_row_with_defaults(row)
|
|
1993
2101
|
if update_on_duplicate:
|
|
1994
2102
|
# 当启用更新时,affected_rows包含插入和更新的行数
|
|
1995
2103
|
# 假设所有行都是插入的,因为无法区分插入和更新
|
|
@@ -2209,14 +2317,22 @@ class MySQLUploader:
|
|
|
2209
2317
|
default_value = " DEFAULT 0.0"
|
|
2210
2318
|
elif any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
|
|
2211
2319
|
default_value = " DEFAULT 'none'"
|
|
2320
|
+
elif 'timestamp' in column_type_lower:
|
|
2321
|
+
# TIMESTAMP类型已经包含DEFAULT定义,不需要额外添加
|
|
2322
|
+
default_value = ""
|
|
2212
2323
|
elif 'date' in column_type_lower:
|
|
2213
|
-
if 'datetime' in column_type_lower
|
|
2214
|
-
default_value = " DEFAULT '
|
|
2324
|
+
if 'datetime' in column_type_lower:
|
|
2325
|
+
default_value = " DEFAULT '2000-01-01 00:00:00'"
|
|
2215
2326
|
else:
|
|
2216
|
-
default_value = " DEFAULT '
|
|
2327
|
+
default_value = " DEFAULT '2000-01-01'"
|
|
2217
2328
|
elif 'json' in column_type_lower:
|
|
2218
2329
|
default_value = " DEFAULT '{}'"
|
|
2219
2330
|
|
|
2331
|
+
# 对于TIMESTAMP类型,不添加额外的NULL约束,因为已经包含在类型定义中
|
|
2332
|
+
if 'timestamp' in column_type.lower() and ('default' in column_type.lower() or 'current_timestamp' in column_type.lower()):
|
|
2333
|
+
null_constraint = "" # TIMESTAMP类型已经包含完整定义
|
|
2334
|
+
default_value = ""
|
|
2335
|
+
|
|
2220
2336
|
sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD COLUMN `{column}` {column_type} {null_constraint}{default_value}'
|
|
2221
2337
|
|
|
2222
2338
|
conn = None
|
|
@@ -2615,39 +2731,31 @@ class MySQLUploader:
|
|
|
2615
2731
|
# 定义时间戳列名
|
|
2616
2732
|
created_col = '创建时间'
|
|
2617
2733
|
updated_col = '更新时间'
|
|
2618
|
-
current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
2619
2734
|
|
|
2620
2735
|
# 复制set_typ以避免修改原始对象
|
|
2621
2736
|
updated_set_typ = set_typ.copy()
|
|
2622
2737
|
|
|
2623
|
-
#
|
|
2624
|
-
|
|
2625
|
-
updated_set_typ[
|
|
2738
|
+
# 使用MySQL的CURRENT_TIMESTAMP功能,按固定顺序添加时间戳列
|
|
2739
|
+
# 创建时间:插入时自动设置,更新时不变
|
|
2740
|
+
updated_set_typ[created_col] = 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP'
|
|
2741
|
+
# 更新时间:插入和更新时都自动设置为当前时间
|
|
2742
|
+
updated_set_typ[updated_col] = 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP'
|
|
2626
2743
|
|
|
2627
2744
|
# 处理DataFrame格式的数据
|
|
2628
2745
|
if hasattr(data, 'shape') and hasattr(data, 'columns'):
|
|
2629
2746
|
import pandas as pd
|
|
2630
2747
|
df = data.copy()
|
|
2631
2748
|
|
|
2632
|
-
#
|
|
2749
|
+
# 移除原始数据中可能存在的时间戳列,让MySQL自动处理
|
|
2633
2750
|
columns_to_remove = []
|
|
2634
2751
|
for col in df.columns:
|
|
2635
2752
|
if col in [created_col, updated_col]:
|
|
2636
2753
|
columns_to_remove.append(col)
|
|
2637
|
-
logger.warning('移除原始数据中的时间戳列', {
|
|
2638
|
-
'库': db_name,
|
|
2639
|
-
'表': table_name,
|
|
2640
|
-
'列': col,
|
|
2641
|
-
'原因': '与自动时间戳功能冲突'
|
|
2642
|
-
})
|
|
2643
2754
|
|
|
2644
2755
|
if columns_to_remove:
|
|
2645
2756
|
df = df.drop(columns=columns_to_remove)
|
|
2646
2757
|
|
|
2647
|
-
#
|
|
2648
|
-
df[created_col] = current_time
|
|
2649
|
-
df[updated_col] = current_time
|
|
2650
|
-
|
|
2758
|
+
# 不再手动添加时间戳列,让MySQL的CURRENT_TIMESTAMP自动处理
|
|
2651
2759
|
return df, updated_set_typ
|
|
2652
2760
|
|
|
2653
2761
|
# 处理字典或字典列表格式的数据
|
|
@@ -2670,10 +2778,7 @@ class MySQLUploader:
|
|
|
2670
2778
|
if key not in [created_col, updated_col]:
|
|
2671
2779
|
new_row[key] = value
|
|
2672
2780
|
|
|
2673
|
-
#
|
|
2674
|
-
new_row[created_col] = current_time
|
|
2675
|
-
new_row[updated_col] = current_time
|
|
2676
|
-
|
|
2781
|
+
# 不再手动添加时间戳,让MySQL的CURRENT_TIMESTAMP自动处理
|
|
2677
2782
|
processed_data.append(new_row)
|
|
2678
2783
|
|
|
2679
2784
|
# 如果原始数据是单个字典,返回单个字典
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
|
2
|
-
mdbq/__version__.py,sha256=
|
|
2
|
+
mdbq/__version__.py,sha256=ekxTFUNsVfy6qrG8u7tQM8OlFtSVSQTGZsMd31HqV9I,18
|
|
3
3
|
mdbq/auth/__init__.py,sha256=pnPMAt63sh1B6kEvmutUuro46zVf2v2YDAG7q-jV_To,24
|
|
4
4
|
mdbq/auth/auth_backend.py,sha256=iLN7AqiSq7fQgFtNtge_TIlVOR1hrCSZXH6oId6uGX4,116924
|
|
5
5
|
mdbq/auth/crypto.py,sha256=fcZRFCnrKVVdWDUx_zds51ynFYwS9DBvJOrRQVldrfM,15931
|
|
@@ -11,11 +11,11 @@ mdbq/log/mylogger.py,sha256=DyBftCMNLe1pTTXsa830pUtDISJxpJHFIradYtE3lFA,26418
|
|
|
11
11
|
mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
12
12
|
mdbq/myconf/myconf.py,sha256=x_9mS6wOfKVjCVElbruxj2yjzitbyKiTkf59quG-5Zg,32529
|
|
13
13
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
14
|
-
mdbq/mysql/deduplicator.py,sha256=
|
|
14
|
+
mdbq/mysql/deduplicator.py,sha256=tzLIm9K9S0lGLlVTI0dDQVYpWX796XCuyufmw1lU26Y,73098
|
|
15
15
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
|
16
16
|
mdbq/mysql/s_query.py,sha256=N2xHJf2CiUXjXIVBemdst-wamIP3908EGAJOFG13fCU,50475
|
|
17
17
|
mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
|
|
18
|
-
mdbq/mysql/uploader.py,sha256=
|
|
18
|
+
mdbq/mysql/uploader.py,sha256=0ZnC7KxF1y5-UlCPyrdg-MHpVm_NTHdZ6yY2ytxG8Io,136520
|
|
19
19
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
20
20
|
mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
|
|
21
21
|
mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
|
|
@@ -35,7 +35,7 @@ mdbq/route/routes.py,sha256=QVGfTvDgu0CpcKCvk1ra74H8uojgqTLUav1fnVAqLEA,29433
|
|
|
35
35
|
mdbq/selenium/__init__.py,sha256=AKzeEceqZyvqn2dEDoJSzDQnbuENkJSHAlbHAD0u0ZI,10
|
|
36
36
|
mdbq/selenium/get_driver.py,sha256=1NTlVUE6QsyjTrVVVqTO2LOnYf578ccFWlWnvIXGtic,20903
|
|
37
37
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
|
38
|
-
mdbq-4.1.
|
|
39
|
-
mdbq-4.1.
|
|
40
|
-
mdbq-4.1.
|
|
41
|
-
mdbq-4.1.
|
|
38
|
+
mdbq-4.1.13.dist-info/METADATA,sha256=GJ4JaVRELhrQqlakZeZ_C2swBtLhhVXsd9ens555dck,364
|
|
39
|
+
mdbq-4.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
mdbq-4.1.13.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
|
41
|
+
mdbq-4.1.13.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|