mdbq 4.1.11__tar.gz → 4.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mdbq might be problematic. Click here for more details.

Files changed (46) hide show
  1. {mdbq-4.1.11 → mdbq-4.1.13}/PKG-INFO +1 -1
  2. mdbq-4.1.13/mdbq/__version__.py +1 -0
  3. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/deduplicator.py +7 -7
  4. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/uploader.py +314 -104
  5. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq.egg-info/PKG-INFO +1 -1
  6. mdbq-4.1.11/mdbq/__version__.py +0 -1
  7. {mdbq-4.1.11 → mdbq-4.1.13}/README.txt +0 -0
  8. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/__init__.py +0 -0
  9. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/auth/__init__.py +0 -0
  10. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/auth/auth_backend.py +0 -0
  11. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/auth/crypto.py +0 -0
  12. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/auth/rate_limiter.py +0 -0
  13. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/js/__init__.py +0 -0
  14. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/js/jc.py +0 -0
  15. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/log/__init__.py +0 -0
  16. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/log/mylogger.py +0 -0
  17. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/myconf/__init__.py +0 -0
  18. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/myconf/myconf.py +0 -0
  19. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/__init__.py +0 -0
  20. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/mysql.py +0 -0
  21. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/s_query.py +0 -0
  22. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/mysql/unique_.py +0 -0
  23. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/__init__.py +0 -0
  24. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/download_sku_picture.py +0 -0
  25. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/error_handler.py +0 -0
  26. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/otk.py +0 -0
  27. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/pov_city.py +0 -0
  28. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/other/ua_sj.py +0 -0
  29. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/pbix/__init__.py +0 -0
  30. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/pbix/pbix_refresh.py +0 -0
  31. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/pbix/refresh_all.py +0 -0
  32. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/redis/__init__.py +0 -0
  33. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/redis/getredis.py +0 -0
  34. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/redis/redis_cache.py +0 -0
  35. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/route/__init__.py +0 -0
  36. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/route/analytics.py +0 -0
  37. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/route/monitor.py +0 -0
  38. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/route/routes.py +0 -0
  39. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/selenium/__init__.py +0 -0
  40. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/selenium/get_driver.py +0 -0
  41. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq/spider/__init__.py +0 -0
  42. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq.egg-info/SOURCES.txt +0 -0
  43. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq.egg-info/dependency_links.txt +0 -0
  44. {mdbq-4.1.11 → mdbq-4.1.13}/mdbq.egg-info/top_level.txt +0 -0
  45. {mdbq-4.1.11 → mdbq-4.1.13}/setup.cfg +0 -0
  46. {mdbq-4.1.11 → mdbq-4.1.13}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdbq
3
- Version: 4.1.11
3
+ Version: 4.1.13
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.1.13'
@@ -1364,12 +1364,12 @@ def main():
1364
1364
  skip_system_dbs=True,
1365
1365
  max_retries=3,
1366
1366
  retry_waiting_time=5,
1367
- pool_size=20,
1368
- mincached=5,
1369
- maxcached=10,
1367
+ pool_size=10,
1368
+ mincached=2,
1369
+ maxcached=5,
1370
1370
  # recent_month=1,
1371
1371
  # date_range=['2025-06-09', '2025-06-10'],
1372
- exclude_columns=['更新时间'],
1372
+ exclude_columns=['创建时间', '更新时间'],
1373
1373
  exclude_databases=['cookie文件', '日志', '视频数据', '云电影'],
1374
1374
  # exclude_tables={
1375
1375
  # '推广数据2': [
@@ -1391,9 +1391,9 @@ def main():
1391
1391
 
1392
1392
  # # 指定表去重(使用特定列)
1393
1393
  deduplicator.deduplicate_table(
1394
- '生意参谋3',
1395
- '新品追踪_旧接口_2025',
1396
- columns=['商品id', '累计商品浏览量', '累计商品访客数'],
1394
+ '推广数据_奥莱店',
1395
+ '主体报表_2025',
1396
+ columns=['日期', '店铺名称', '场景id', '计划id', '主体id'],
1397
1397
  dry_run=False,
1398
1398
  reorder_id=True,
1399
1399
  )
@@ -435,15 +435,36 @@ class MySQLUploader:
435
435
  if not primary_keys:
436
436
  column_defs.append("`id` INT NOT NULL AUTO_INCREMENT")
437
437
 
438
- # 添加其他列
438
+ # 添加其他列,确保时间戳字段按正确顺序添加
439
+ timestamp_cols = ['创建时间', '更新时间']
440
+ regular_cols = []
441
+ timestamp_defs = []
442
+
443
+ # 先处理非时间戳字段
439
444
  for col_name, col_type in set_typ.items():
440
445
  if col_name == 'id':
441
446
  continue
447
+ if col_name in timestamp_cols:
448
+ continue # 时间戳字段稍后按顺序处理
449
+
442
450
  safe_col_name = self._normalize_col(col_name)
443
451
  col_def = f"`{safe_col_name}` {col_type}"
444
- if not allow_null and not col_type.lower().startswith('json'):
452
+ if not allow_null and not col_type.lower().startswith('json') and not col_type.lower().startswith('timestamp'):
445
453
  col_def += " NOT NULL"
446
- column_defs.append(col_def)
454
+ regular_cols.append(col_def)
455
+
456
+ # 按固定顺序添加时间戳字段
457
+ for timestamp_col in timestamp_cols:
458
+ if timestamp_col in set_typ:
459
+ safe_col_name = self._normalize_col(timestamp_col)
460
+ col_type = set_typ[timestamp_col]
461
+ col_def = f"`{safe_col_name}` {col_type}"
462
+ # TIMESTAMP字段不需要额外的NOT NULL,因为已经包含在类型定义中
463
+ timestamp_defs.append(col_def)
464
+
465
+ # 合并所有列定义:常规字段 + 时间戳字段
466
+ column_defs.extend(regular_cols)
467
+ column_defs.extend(timestamp_defs)
447
468
 
448
469
  # 主键处理逻辑调整
449
470
  def _index_col_sql(col):
@@ -672,9 +693,9 @@ class MySQLUploader:
672
693
  'decimal': 0.0,
673
694
  'float': 0.0,
674
695
  'double': 0.0,
675
- 'date': '1970-01-01',
676
- 'datetime': '1970-01-01 00:00:00',
677
- 'timestamp': '1970-01-01 00:00:00',
696
+ 'date': '2000-01-01',
697
+ 'datetime': '2000-01-01 00:00:00',
698
+ 'timestamp': '2000-01-01 00:00:00',
678
699
  'json': '{}',
679
700
  'varchar': 'none',
680
701
  'text': 'none',
@@ -783,6 +804,12 @@ class MySQLUploader:
783
804
  """
784
805
  column_type_lower = column_type.lower() if column_type else ''
785
806
 
807
+ # 对于包含CURRENT_TIMESTAMP的TIMESTAMP字段,跳过验证,让MySQL自动处理
808
+ if ('timestamp' in column_type_lower and 'current_timestamp' in column_type_lower and
809
+ col_name in ['创建时间', '更新时间']):
810
+ # 这些字段由MySQL自动处理,不需要传入值
811
+ return None
812
+
786
813
  # 统一的空值检查(None、空字符串、NaN)
787
814
  is_empty_value = False
788
815
  if value is None:
@@ -1219,12 +1246,17 @@ class MySQLUploader:
1219
1246
  set_typ: Dict[str, str],
1220
1247
  allow_null: bool = False,
1221
1248
  db_name: str = None,
1222
- table_name: str = None,
1249
+ table_name: str = None,
1250
+ auto_timestamps: bool = False
1223
1251
  ) -> Tuple[List[Dict], Dict[str, str]]:
1224
1252
  """
1225
1253
  准备要上传的数据,验证并转换数据类型
1226
1254
  根据set_typ自动处理所有数据类型的列:补齐缺失的列并丢弃多余的列
1227
1255
  """
1256
+ # 处理自动时间戳功能
1257
+ if auto_timestamps:
1258
+ data, set_typ = self._process_auto_timestamps(data, set_typ, db_name, table_name)
1259
+
1228
1260
  # set_typ的键清洗
1229
1261
  if not set_typ:
1230
1262
  set_typ = {}
@@ -1330,50 +1362,79 @@ class MySQLUploader:
1330
1362
  # 跳过id列,不允许外部传入id
1331
1363
  if (self.case_sensitive and col_name == 'id') or (not self.case_sensitive and col_name.lower() == 'id'):
1332
1364
  continue
1365
+ # 对于自动时间戳字段,使用特殊标记让MySQL使用DEFAULT值
1366
+ col_type_lower = filtered_set_typ[col_name].lower()
1367
+ is_auto_timestamp = ('timestamp' in col_type_lower and 'current_timestamp' in col_type_lower and
1368
+ col_name in ['创建时间', '更新时间'])
1369
+
1333
1370
  if col_name not in row:
1334
1371
  # 对于缺失的列,使用None作为默认值,在_validate_value中会根据allow_null和列类型进行进一步处理
1335
- try:
1336
- prepared_row[col_name] = self._validate_value(None, filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
1337
- except ValueError as e:
1338
- if not allow_null:
1339
- # 如果不允许空值但验证失败,尝试使用兜底值
1340
- try:
1341
- fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, None)
1342
- if fallback_value is not None:
1343
- prepared_row[col_name] = fallback_value
1344
- logger.warning(f"行号:{row_idx} -> 缺失列: `{col_name}`, 使用兜底值: {fallback_value}", {'row': self._shorten_for_log(row)})
1345
- else:
1372
+ if is_auto_timestamp:
1373
+ # 自动时间戳字段使用特殊标记
1374
+ prepared_row[col_name] = 'DEFAULT'
1375
+ else:
1376
+ try:
1377
+ prepared_row[col_name] = self._validate_value(None, filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
1378
+ except ValueError as e:
1379
+ if not allow_null:
1380
+ # 如果不允许空值但验证失败,尝试使用兜底值
1381
+ try:
1382
+ fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, None)
1383
+ if fallback_value is not None:
1384
+ prepared_row[col_name] = fallback_value
1385
+ logger.warning(f"行号:{row_idx} -> 缺失列: `{col_name}`, 使用兜底值: {fallback_value}", {'row': self._shorten_for_log(row)})
1386
+ else:
1387
+ error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
1388
+ logger.error(error_msg, {'row': self._shorten_for_log(row)})
1389
+ raise ValueError(error_msg)
1390
+ except Exception:
1346
1391
  error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
1347
1392
  logger.error(error_msg, {'row': self._shorten_for_log(row)})
1348
1393
  raise ValueError(error_msg)
1349
- except Exception:
1350
- error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
1351
- logger.error(error_msg, {'row': self._shorten_for_log(row)})
1352
- raise ValueError(error_msg)
1353
- else:
1354
- prepared_row[col_name] = None
1394
+ else:
1395
+ prepared_row[col_name] = None
1355
1396
  else:
1356
- try:
1357
- prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
1358
- except ValueError as e:
1359
- # 如果数据验证失败,检查是否为空值且不允许空值,尝试使用兜底值
1360
- original_value = row[col_name]
1361
- is_empty_original = (original_value is None or
1362
- original_value == '' or
1363
- (not isinstance(original_value, (list, dict)) and
1364
- pd.isna(original_value) if hasattr(pd, 'isna') else False))
1365
-
1366
- if is_empty_original and not allow_null:
1367
- try:
1368
- fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, original_value)
1369
- if fallback_value is not None:
1370
- prepared_row[col_name] = fallback_value
1371
- logger.warning(f"行:{row_idx}, 列:`{col_name}` -> 原值验证失败,使用兜底值: {fallback_value}", {
1372
- '原值': original_value,
1373
- '兜底值': fallback_value,
1374
- 'row': self._shorten_for_log(row)
1375
- })
1376
- else:
1397
+ if is_auto_timestamp:
1398
+ # 自动时间戳字段忽略用户传入的值,使用DEFAULT
1399
+ prepared_row[col_name] = 'DEFAULT'
1400
+ if row[col_name] is not None: # 如果用户传入了值,给出警告
1401
+ logger.warning('忽略自动时间戳字段的用户传入值', {
1402
+ '库': db_name,
1403
+ '': table_name,
1404
+ '列': col_name,
1405
+ '用户值': row[col_name],
1406
+ '原因': '将使用MySQL CURRENT_TIMESTAMP'
1407
+ })
1408
+ else:
1409
+ try:
1410
+ prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
1411
+ except ValueError as e:
1412
+ # 如果数据验证失败,检查是否为空值且不允许空值,尝试使用兜底值
1413
+ original_value = row[col_name]
1414
+ is_empty_original = (original_value is None or
1415
+ original_value == '' or
1416
+ (not isinstance(original_value, (list, dict)) and
1417
+ pd.isna(original_value) if hasattr(pd, 'isna') else False))
1418
+
1419
+ if is_empty_original and not allow_null:
1420
+ try:
1421
+ fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, original_value)
1422
+ if fallback_value is not None:
1423
+ prepared_row[col_name] = fallback_value
1424
+ logger.warning(f"行:{row_idx}, 列:`{col_name}` -> 原值验证失败,使用兜底值: {fallback_value}", {
1425
+ '原值': original_value,
1426
+ '兜底值': fallback_value,
1427
+ 'row': self._shorten_for_log(row)
1428
+ })
1429
+ else:
1430
+ logger.error('数据验证失败', {
1431
+ '列': col_name,
1432
+ '行': row_idx,
1433
+ '报错': str(e),
1434
+ 'row': self._shorten_for_log(row),
1435
+ })
1436
+ raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1437
+ except Exception:
1377
1438
  logger.error('数据验证失败', {
1378
1439
  '列': col_name,
1379
1440
  '行': row_idx,
@@ -1381,7 +1442,7 @@ class MySQLUploader:
1381
1442
  'row': self._shorten_for_log(row),
1382
1443
  })
1383
1444
  raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1384
- except Exception:
1445
+ else:
1385
1446
  logger.error('数据验证失败', {
1386
1447
  '列': col_name,
1387
1448
  '行': row_idx,
@@ -1389,15 +1450,7 @@ class MySQLUploader:
1389
1450
  'row': self._shorten_for_log(row),
1390
1451
  })
1391
1452
  raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1392
- else:
1393
- logger.error('数据验证失败', {
1394
- '列': col_name,
1395
- '行': row_idx,
1396
- '报错': str(e),
1397
- 'row': self._shorten_for_log(row),
1398
- })
1399
- raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1400
- prepared_data.append(prepared_row)
1453
+ prepared_data.append(prepared_row)
1401
1454
  return prepared_data, filtered_set_typ
1402
1455
 
1403
1456
  def upload_data(
@@ -1416,7 +1469,8 @@ class MySQLUploader:
1416
1469
  indexes: Optional[List[str]] = None,
1417
1470
  update_on_duplicate: bool = False,
1418
1471
  transaction_mode: str = "batch",
1419
- unique_keys: Optional[List[List[str]]] = None
1472
+ unique_keys: Optional[List[List[str]]] = None,
1473
+ auto_timestamps: bool = False
1420
1474
  ):
1421
1475
  """
1422
1476
  上传数据到数据库的主入口方法
@@ -1439,6 +1493,7 @@ class MySQLUploader:
1439
1493
  - 'batch' : 整批提交事务(性能最优)
1440
1494
  - 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
1441
1495
  :param unique_keys: 唯一约束列表,每个元素为列名列表,支持多列组合唯一约束。格式:[['col1', 'col2'], ['col3']] 或 None
1496
+ :param auto_timestamps: 是否自动添加创建时间和更新时间列,默认为False。启用后会自动添加'创建时间'和'更新时间'两列
1442
1497
  :raises: 可能抛出各种验证和数据库相关异常
1443
1498
 
1444
1499
  ---
@@ -1483,6 +1538,17 @@ class MySQLUploader:
1483
1538
  - 只要 update_on_duplicate=True 且表存在唯一约束(如 unique_keys),无论 check_duplicate 是否为 True,都会更新旧数据(即 ON DUPLICATE KEY UPDATE 生效)。
1484
1539
  - 如需"覆盖"行为,务必设置 update_on_duplicate=True,不管 check_duplicate 是否为 True。
1485
1540
  - 如需"跳过"行为,设置 update_on_duplicate=False 即可。
1541
+
1542
+ ---
1543
+ auto_timestamps 参数:
1544
+
1545
+ - 当 auto_timestamps=True 时,系统会自动添加'创建时间'和'更新时间'两列
1546
+ - 如果原始数据中已存在这两列,系统会先移除原始数据中的这些列,然后添加新的时间戳
1547
+ - '创建时间':记录数据首次插入的时间,使用当前时间戳
1548
+ - '更新时间':记录数据最后更新的时间,插入时与创建时间相同,更新时会自动更新为当前时间
1549
+ - 时间戳列的数据类型为 DATETIME,格式为 'YYYY-MM-DD HH:MM:SS'
1550
+ - 这两列会自动添加到 set_typ 中,无需手动指定
1551
+ - 建议在需要审计数据变更历史的表中启用此功能
1486
1552
  """
1487
1553
  # upload_start = time.time()
1488
1554
  # 检查data参数是否为None
@@ -1492,7 +1558,7 @@ class MySQLUploader:
1492
1558
  '表': table_name,
1493
1559
  })
1494
1560
  raise ValueError("data参数不能为None,请传入有效的数据")
1495
-
1561
+
1496
1562
  if isinstance(data, list) or (hasattr(data, 'shape') and hasattr(data, '__len__')):
1497
1563
  initial_row_count = len(data)
1498
1564
  else:
@@ -1553,7 +1619,7 @@ class MySQLUploader:
1553
1619
  raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
1554
1620
 
1555
1621
  # 准备数据
1556
- prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null, db_name, table_name)
1622
+ prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null, db_name, table_name, auto_timestamps)
1557
1623
 
1558
1624
  # 检查数据库是否存在
1559
1625
  if not self._check_database_exists(db_name):
@@ -1890,6 +1956,49 @@ class MySQLUploader:
1890
1956
  return str(value)
1891
1957
  return value
1892
1958
 
1959
+ def execute_single_row_with_defaults(row):
1960
+ """处理单行插入,支持DEFAULT字段"""
1961
+ has_defaults = any(row.get(col) == 'DEFAULT' for col in all_columns)
1962
+
1963
+ if has_defaults:
1964
+ # 分离普通字段和DEFAULT字段
1965
+ regular_columns = []
1966
+ regular_values = []
1967
+ default_columns = []
1968
+
1969
+ for col in all_columns:
1970
+ val = row.get(col)
1971
+ if val == 'DEFAULT':
1972
+ default_columns.append(col)
1973
+ else:
1974
+ regular_columns.append(col)
1975
+ regular_values.append(ensure_basic_type(val))
1976
+
1977
+ # 构建INSERT ... SET语句
1978
+ set_clauses = []
1979
+ for col in regular_columns:
1980
+ set_clauses.append(f"`{self._validate_identifier(col)}` = %s")
1981
+ for col in default_columns:
1982
+ set_clauses.append(f"`{self._validate_identifier(col)}` = DEFAULT")
1983
+
1984
+ if set_clauses:
1985
+ dynamic_sql = f"INSERT INTO `{db_name}`.`{table_name}` SET {', '.join(set_clauses)}"
1986
+ if update_on_duplicate and regular_columns:
1987
+ update_clauses = [f"`{self._validate_identifier(col)}` = VALUES(`{self._validate_identifier(col)}`)" for col in regular_columns]
1988
+ if update_clauses:
1989
+ dynamic_sql += f" ON DUPLICATE KEY UPDATE {', '.join(update_clauses)}"
1990
+
1991
+ cursor.execute(dynamic_sql, regular_values)
1992
+ return cursor.rowcount if cursor.rowcount is not None else 0
1993
+ else:
1994
+ # 没有DEFAULT字段,使用原有逻辑
1995
+ values = [ensure_basic_type(row.get(col)) for col in all_columns]
1996
+ if check_duplicate and not update_on_duplicate:
1997
+ dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1998
+ values += [ensure_basic_type(row.get(col)) for col in dup_cols]
1999
+ cursor.execute(sql, values)
2000
+ return cursor.rowcount if cursor.rowcount is not None else 0
2001
+
1893
2002
  batch_size = get_optimal_batch_size(len(data))
1894
2003
  all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1895
2004
  total_inserted = 0
@@ -1900,50 +2009,72 @@ class MySQLUploader:
1900
2009
  if transaction_mode == 'batch':
1901
2010
  for i in range(0, len(data), batch_size):
1902
2011
  batch = data[i:i + batch_size]
1903
- values_list = []
1904
- for row in batch:
1905
- values = [ensure_basic_type(row.get(col)) for col in all_columns]
1906
- if check_duplicate and not update_on_duplicate:
1907
- dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1908
- values += [ensure_basic_type(row.get(col)) for col in dup_cols]
1909
- values_list.append(values)
1910
- try:
1911
- cursor.executemany(sql, values_list)
2012
+ # 检查是否有DEFAULT字段,如果有则需要特殊处理
2013
+ has_default_fields = any(row.get(col) == 'DEFAULT' for row in batch for col in all_columns)
2014
+
2015
+ if has_default_fields:
2016
+ # 对于包含DEFAULT字段的情况,逐行处理
2017
+ for row in batch:
2018
+ try:
2019
+ affected = execute_single_row_with_defaults(row)
2020
+ if update_on_duplicate:
2021
+ total_inserted += 1
2022
+ else:
2023
+ if affected > 0:
2024
+ total_inserted += 1
2025
+ else:
2026
+ total_skipped += 1
2027
+ except pymysql.err.IntegrityError:
2028
+ total_skipped += 1
2029
+ except Exception as e:
2030
+ total_failed += 1
2031
+ logger.error('单行插入失败', {
2032
+ '库': db_name,
2033
+ '表': table_name,
2034
+ '错误': str(e)
2035
+ })
1912
2036
  conn.commit()
1913
- # 在batch模式下,affected_rows表示实际影响的行数
1914
- # 如果update_on_duplicate为True,则affected_rows包含更新的行数
1915
- # 如果update_on_duplicate为False,则affected_rows只包含插入的行数
1916
- affected = cursor.rowcount if cursor.rowcount is not None else 0
1917
- if update_on_duplicate:
1918
- # 当启用更新时,affected_rows包含插入和更新的行数
1919
- # 我们需要区分插入和更新的行数
1920
- # 由于无法准确区分,我们假设所有行都是插入的
1921
- total_inserted += len(batch)
1922
- else:
1923
- # 当不启用更新时,affected_rows只包含插入的行数
1924
- total_inserted += affected
1925
- total_skipped += len(batch) - affected
1926
- except pymysql.err.IntegrityError as e:
1927
- conn.rollback()
1928
- # 在唯一约束冲突时,所有行都被跳过
1929
- total_skipped += len(batch)
1930
- logger.debug('批量插入唯一约束冲突,全部跳过', {'库': db_name, '表': table_name, '错误': str(e)})
1931
- except Exception as e:
1932
- conn.rollback()
1933
- total_failed += len(batch)
1934
- logger.error('批量插入失败', {'库': db_name, '表': table_name, '错误': str(e)})
2037
+ else:
2038
+ # 没有DEFAULT字段,使用原有逻辑
2039
+ values_list = []
2040
+ for row in batch:
2041
+ values = [ensure_basic_type(row.get(col)) for col in all_columns]
2042
+ if check_duplicate and not update_on_duplicate:
2043
+ dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
2044
+ values += [ensure_basic_type(row.get(col)) for col in dup_cols]
2045
+ values_list.append(values)
2046
+ try:
2047
+ cursor.executemany(sql, values_list)
2048
+ conn.commit()
2049
+ # batch模式下,affected_rows表示实际影响的行数
2050
+ # 如果update_on_duplicate为True,则affected_rows包含更新的行数
2051
+ # 如果update_on_duplicate为False,则affected_rows只包含插入的行数
2052
+ affected = cursor.rowcount if cursor.rowcount is not None else 0
2053
+ if update_on_duplicate:
2054
+ # 当启用更新时,affected_rows包含插入和更新的行数
2055
+ # 我们需要区分插入和更新的行数
2056
+ # 由于无法准确区分,我们假设所有行都是插入的
2057
+ total_inserted += len(batch)
2058
+ else:
2059
+ # 当不启用更新时,affected_rows只包含插入的行数
2060
+ total_inserted += affected
2061
+ total_skipped += len(batch) - affected
2062
+ except pymysql.err.IntegrityError as e:
2063
+ conn.rollback()
2064
+ # 在唯一约束冲突时,所有行都被跳过
2065
+ total_skipped += len(batch)
2066
+ logger.debug('批量插入唯一约束冲突,全部跳过', {'库': db_name, '表': table_name, '错误': str(e)})
2067
+ except Exception as e:
2068
+ conn.rollback()
2069
+ total_failed += len(batch)
2070
+ logger.error('批量插入失败', {'库': db_name, '表': table_name, '错误': str(e)})
1935
2071
  elif transaction_mode == 'hybrid':
1936
2072
  hybrid_n = 100 # 可配置
1937
2073
  for i in range(0, len(data), hybrid_n):
1938
2074
  batch = data[i:i + hybrid_n]
1939
2075
  for row in batch:
1940
2076
  try:
1941
- values = [ensure_basic_type(row.get(col)) for col in all_columns]
1942
- if check_duplicate and not update_on_duplicate:
1943
- dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1944
- values += [ensure_basic_type(row.get(col)) for col in dup_cols]
1945
- cursor.execute(sql, values)
1946
- affected = cursor.rowcount if cursor.rowcount is not None else 0
2077
+ affected = execute_single_row_with_defaults(row)
1947
2078
  if update_on_duplicate:
1948
2079
  # 当启用更新时,affected_rows包含插入和更新的行数
1949
2080
  # 假设所有行都是插入的,因为无法区分插入和更新
@@ -1966,12 +2097,7 @@ class MySQLUploader:
1966
2097
  else: # row模式
1967
2098
  for row in data:
1968
2099
  try:
1969
- values = [ensure_basic_type(row.get(col)) for col in all_columns]
1970
- if check_duplicate and not update_on_duplicate:
1971
- dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1972
- values += [ensure_basic_type(row.get(col)) for col in dup_cols]
1973
- cursor.execute(sql, values)
1974
- affected = cursor.rowcount if cursor.rowcount is not None else 0
2100
+ affected = execute_single_row_with_defaults(row)
1975
2101
  if update_on_duplicate:
1976
2102
  # 当启用更新时,affected_rows包含插入和更新的行数
1977
2103
  # 假设所有行都是插入的,因为无法区分插入和更新
@@ -2191,14 +2317,22 @@ class MySQLUploader:
2191
2317
  default_value = " DEFAULT 0.0"
2192
2318
  elif any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
2193
2319
  default_value = " DEFAULT 'none'"
2320
+ elif 'timestamp' in column_type_lower:
2321
+ # TIMESTAMP类型已经包含DEFAULT定义,不需要额外添加
2322
+ default_value = ""
2194
2323
  elif 'date' in column_type_lower:
2195
- if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
2196
- default_value = " DEFAULT '1970-01-01 00:00:00'"
2324
+ if 'datetime' in column_type_lower:
2325
+ default_value = " DEFAULT '2000-01-01 00:00:00'"
2197
2326
  else:
2198
- default_value = " DEFAULT '1970-01-01'"
2327
+ default_value = " DEFAULT '2000-01-01'"
2199
2328
  elif 'json' in column_type_lower:
2200
2329
  default_value = " DEFAULT '{}'"
2201
2330
 
2331
+ # 对于TIMESTAMP类型,不添加额外的NULL约束,因为已经包含在类型定义中
2332
+ if 'timestamp' in column_type.lower() and ('default' in column_type.lower() or 'current_timestamp' in column_type.lower()):
2333
+ null_constraint = "" # TIMESTAMP类型已经包含完整定义
2334
+ default_value = ""
2335
+
2202
2336
  sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD COLUMN `{column}` {column_type} {null_constraint}{default_value}'
2203
2337
 
2204
2338
  conn = None
@@ -2577,6 +2711,82 @@ class MySQLUploader:
2577
2711
 
2578
2712
  return result_df
2579
2713
 
2714
+ def _process_auto_timestamps(
2715
+ self,
2716
+ data: Union[Dict, List[Dict], pd.DataFrame],
2717
+ set_typ: Dict[str, str],
2718
+ db_name: str,
2719
+ table_name: str
2720
+ ) -> Tuple[Union[Dict, List[Dict], pd.DataFrame], Dict[str, str]]:
2721
+ """
2722
+ 处理自动时间戳功能
2723
+
2724
+ :param data: 原始数据
2725
+ :param set_typ: 列类型定义
2726
+ :param db_name: 数据库名
2727
+ :param table_name: 表名
2728
+ :return: 处理后的数据和更新后的set_typ
2729
+ """
2730
+
2731
+ # 定义时间戳列名
2732
+ created_col = '创建时间'
2733
+ updated_col = '更新时间'
2734
+
2735
+ # 复制set_typ以避免修改原始对象
2736
+ updated_set_typ = set_typ.copy()
2737
+
2738
+ # 使用MySQL的CURRENT_TIMESTAMP功能,按固定顺序添加时间戳列
2739
+ # 创建时间:插入时自动设置,更新时不变
2740
+ updated_set_typ[created_col] = 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP'
2741
+ # 更新时间:插入和更新时都自动设置为当前时间
2742
+ updated_set_typ[updated_col] = 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP'
2743
+
2744
+ # 处理DataFrame格式的数据
2745
+ if hasattr(data, 'shape') and hasattr(data, 'columns'):
2746
+ import pandas as pd
2747
+ df = data.copy()
2748
+
2749
+ # 移除原始数据中可能存在的时间戳列,让MySQL自动处理
2750
+ columns_to_remove = []
2751
+ for col in df.columns:
2752
+ if col in [created_col, updated_col]:
2753
+ columns_to_remove.append(col)
2754
+
2755
+ if columns_to_remove:
2756
+ df = df.drop(columns=columns_to_remove)
2757
+
2758
+ # 不再手动添加时间戳列,让MySQL的CURRENT_TIMESTAMP自动处理
2759
+ return df, updated_set_typ
2760
+
2761
+ # 处理字典或字典列表格式的数据
2762
+ else:
2763
+ # 确保data是列表格式
2764
+ if isinstance(data, dict):
2765
+ data_list = [data]
2766
+ is_single_dict = True
2767
+ else:
2768
+ data_list = data
2769
+ is_single_dict = False
2770
+
2771
+ # 处理每一行数据
2772
+ processed_data = []
2773
+ for row in data_list:
2774
+ new_row = {}
2775
+
2776
+ # 复制原始数据,但跳过可能存在的时间戳列
2777
+ for key, value in row.items():
2778
+ if key not in [created_col, updated_col]:
2779
+ new_row[key] = value
2780
+
2781
+ # 不再手动添加时间戳,让MySQL的CURRENT_TIMESTAMP自动处理
2782
+ processed_data.append(new_row)
2783
+
2784
+ # 如果原始数据是单个字典,返回单个字典
2785
+ if is_single_dict:
2786
+ return processed_data[0], updated_set_typ
2787
+ else:
2788
+ return processed_data, updated_set_typ
2789
+
2580
2790
 
2581
2791
  def main():
2582
2792
  dir_path = os.path.expanduser("~")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdbq
3
- Version: 4.1.11
3
+ Version: 4.1.13
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '4.1.11'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes