mdbq 3.9.6__py3-none-any.whl → 3.9.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/mysql/mysql.py CHANGED
@@ -12,6 +12,7 @@ import os
12
12
  import logging
13
13
  import logging.handlers
14
14
  from mdbq.other import otk
15
+ from mdbq.log import mylogger
15
16
  from typing import Union, List, Dict, Optional, Any, Tuple, Set
16
17
  from dbutils.pooled_db import PooledDB
17
18
  import json
@@ -27,7 +28,18 @@ warnings.filterwarnings('ignore')
27
28
  建表流程:
28
29
  建表规范:
29
30
  """
30
- logger = logging.getLogger(__name__)
31
+ logger = mylogger.MyLogger(
32
+ name='mysql',
33
+ logging_mode='both',
34
+ log_level='info',
35
+ log_file='mysql.log',
36
+ log_format='json',
37
+ max_log_size=50,
38
+ backup_count=5,
39
+ enable_async=False, # 是否启用异步日志
40
+ sample_rate=0.5, # 采样50%的DEBUG/INFO日志
41
+ sensitive_fields=[], # 敏感字段列表
42
+ )
31
43
 
32
44
 
33
45
  def count_decimal_places(num_str):
@@ -1136,19 +1148,13 @@ class MySQLUploader:
1136
1148
  port: int = 3306,
1137
1149
  charset: str = 'utf8mb4',
1138
1150
  collation: str = 'utf8mb4_0900_ai_ci', # utf8mb4_0900_ai_ci: 该排序规则对大小写不敏感, utf8mb4_0900_as_cs/utf8mb4_bin: 对大小写敏感
1139
- logging_mode: str = 'console', # 'both'(控制台+文件), 'console'(仅控制台), 'file'(仅文件), 'none'(禁用)
1140
- log_level: str = 'INFO', # 默认日志级别
1141
- log_file: str = 'mysql_upload.log', # 日志文件路径
1142
- max_log_size: int = 50, # 日志文件大小(MB)
1143
- backup_count: int = 5, # 保留的日志文件数量
1144
1151
  max_retries: int = 10,
1145
1152
  retry_interval: int = 10,
1146
1153
  pool_size: int = 5,
1147
1154
  connect_timeout: int = 10,
1148
1155
  read_timeout: int = 30,
1149
1156
  write_timeout: int = 30,
1150
- ssl: Optional[Dict] = None,
1151
- enable_metrics: bool = True # 是否启用性能指标收集
1157
+ ssl: Optional[Dict] = None
1152
1158
  ):
1153
1159
  """
1154
1160
  :param username: 数据库用户名
@@ -1157,11 +1163,7 @@ class MySQLUploader:
1157
1163
  :param port: 数据库端口,默认为3306
1158
1164
  :param charset: 字符集,默认为utf8mb4
1159
1165
  :param collation: 排序规则,默认为utf8mb4_0900_ai_ci
1160
- :param logging_mode: 日志模式,可选 'both'(控制台+文件), 'console'(仅控制台), 'file'(仅文件), 'none'(禁用)
1161
- :param log_level: 日志级别,默认为INFO
1162
- :param log_file: 日志文件路径
1163
- :param max_log_size: 日志文件最大大小(MB),默认为50
1164
- :param backup_count: 保留的日志备份数量,默认为5
1166
+
1165
1167
  :param max_retries: 最大重试次数,默认为10
1166
1168
  :param retry_interval: 重试间隔(秒),默认为10
1167
1169
  :param pool_size: 连接池大小,默认为5
@@ -1169,7 +1171,6 @@ class MySQLUploader:
1169
1171
  :param read_timeout: 读取超时(秒),默认为30
1170
1172
  :param write_timeout: 写入超时(秒),默认为30
1171
1173
  :param ssl: SSL配置字典,默认为None
1172
- :param enable_metrics: 是否启用性能指标收集,默认为True
1173
1174
  """
1174
1175
  self.username = username
1175
1176
  self.password = password
@@ -1186,178 +1187,12 @@ class MySQLUploader:
1186
1187
  self.ssl = ssl
1187
1188
  self._prepared_statements = StatementCache(maxsize=100)
1188
1189
  self._max_cached_statements = 100
1189
- self.enable_metrics = enable_metrics
1190
- self.metrics = {
1191
- 'total_uploads': 0,
1192
- 'successful_uploads': 0,
1193
- 'failed_uploads': 0,
1194
- 'total_rows': 0,
1195
- 'successful_rows': 0,
1196
- 'failed_rows': 0,
1197
- 'total_retries': 0,
1198
- 'total_execution_time': 0.0,
1199
- 'connection_usage': [],
1200
- 'memory_usage': [],
1201
- 'cpu_usage': []
1202
- }
1203
- self._last_metrics_time = 0
1204
- self._metrics_cache = {} # 缓存最近一次的系统指标
1205
- self.metrics_interval = 30 # 指标采集频率控制
1206
- self._table_metadata_cache = {} # 元信息缓存
1207
- self.metadata_cache_ttl = 300 # 元信息缓存频率控制
1208
-
1209
- # 初始化日志系统
1210
- self._init_logging(logging_mode, log_level, log_file, max_log_size, backup_count)
1190
+ self._table_metadata_cache = {}
1191
+ self.metadata_cache_ttl = 300 # 5分钟缓存时间
1211
1192
 
1212
1193
  # 创建连接池
1213
1194
  self.pool = self._create_connection_pool()
1214
1195
 
1215
- def _init_logging(
1216
- self,
1217
- logging_mode: str,
1218
- log_level: str,
1219
- log_file: str,
1220
- max_log_size: int,
1221
- backup_count: int
1222
- ):
1223
- """初始化结构化日志配置"""
1224
- if logging_mode.lower() == 'none':
1225
- self.logger = None
1226
- return
1227
-
1228
- valid_levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
1229
- level = log_level.upper() if log_level.upper() in valid_levels else 'INFO'
1230
-
1231
- # 创建格式化器 - 使用结构化JSON格式
1232
- class StructuredFormatter(logging.Formatter):
1233
- def format(self, record):
1234
- log_data = {
1235
- 'time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
1236
- 'level': record.levelname,
1237
- 'message': record.getMessage(),
1238
- # 'logger': record.name,
1239
- 'module': record.module,
1240
- 'line': record.lineno,
1241
- # 'process': record.process
1242
- }
1243
-
1244
- # 添加异常信息
1245
- if record.exc_info:
1246
- log_data['exception'] = self.formatException(record.exc_info)
1247
-
1248
- # 过滤敏感信息
1249
- if hasattr(record, 'password'):
1250
- log_data['message'] = log_data['message'].replace(self.password, '***')
1251
-
1252
- return json.dumps(log_data, ensure_ascii=False)
1253
-
1254
- # 创建日志记录器
1255
- self.logger = logging.getLogger('upload')
1256
- self.logger.setLevel(level)
1257
-
1258
- # 防止重复添加handler
1259
- if self.logger.handlers:
1260
- for handler in self.logger.handlers[:]:
1261
- self.logger.removeHandler(handler)
1262
-
1263
- formatter = StructuredFormatter()
1264
- mode = logging_mode.lower()
1265
-
1266
- # 根据模式添加相应的handler
1267
- if mode in ('both', 'console'):
1268
- console_handler = logging.StreamHandler()
1269
- console_handler.setFormatter(formatter)
1270
- self.logger.addHandler(console_handler)
1271
-
1272
- if mode in ('both', 'file'):
1273
- file_handler = logging.handlers.RotatingFileHandler(
1274
- filename=log_file,
1275
- maxBytes=max_log_size * 1024 * 1024,
1276
- backupCount=backup_count,
1277
- encoding='utf-8'
1278
- )
1279
- file_handler.setFormatter(formatter)
1280
- self.logger.addHandler(file_handler)
1281
-
1282
- def _record_metrics(self, metric_name: str, value: Any = 1, is_timing: bool = False):
1283
- """记录性能指标"""
1284
- if not self.enable_metrics:
1285
- return
1286
-
1287
- # 对于频繁调用的指标,使用更高效的数据结构
1288
- if metric_name in ('total_uploads', 'successful_uploads', 'failed_uploads'):
1289
- self.metrics[metric_name] = self.metrics.get(metric_name, 0) + value
1290
- return
1291
-
1292
- if metric_name not in self.metrics:
1293
- self.metrics[metric_name] = []
1294
-
1295
- if is_timing:
1296
- # 如果是时间指标,记录时间戳和值
1297
- self.metrics[metric_name].append({
1298
- 'timestamp': datetime.datetime.now().isoformat(),
1299
- 'value': value
1300
- })
1301
- else:
1302
- # 其他指标直接累加
1303
- if isinstance(self.metrics[metric_name], (int, float)):
1304
- self.metrics[metric_name] += value
1305
- elif isinstance(self.metrics[metric_name], list):
1306
- self.metrics[metric_name].append({
1307
- 'timestamp': datetime.datetime.now().isoformat(),
1308
- 'value': value
1309
- })
1310
-
1311
- def _get_system_metrics(self):
1312
- """获取系统资源使用指标"""
1313
- if not self.enable_metrics:
1314
- return {}
1315
-
1316
- metrics = {
1317
- 'memory': psutil.virtual_memory().percent,
1318
- 'cpu': psutil.cpu_percent(),
1319
- }
1320
-
1321
- # 更安全的连接数获取方式
1322
- if hasattr(self, 'pool') and self.pool is not None:
1323
- try:
1324
- # 对于不同的连接池实现可能有不同的属性名
1325
- if hasattr(self.pool, '_connections'):
1326
- connections = self.pool._connections
1327
- metrics['connections'] = len(connections) if hasattr(connections, '__len__') else 0
1328
- else:
1329
- metrics['connections'] = 0
1330
- except Exception:
1331
- metrics['connections'] = 0
1332
- else:
1333
- metrics['connections'] = 0
1334
-
1335
- return metrics
1336
-
1337
- def _log_with_metrics(self, level: str, message: str, extra: Optional[Dict] = None):
1338
- """日志记录"""
1339
- if not self.logger:
1340
- return
1341
-
1342
- if len(message) > 500:
1343
- message = message[:500] + '...'
1344
-
1345
- now = time.time()
1346
- if now - self._last_metrics_time > self.metrics_interval:
1347
- self._metrics_cache = self._get_system_metrics()
1348
- # 使用缓存的指标
1349
- log_extra = {'metrics': self._metrics_cache}
1350
- self._last_metrics_time = now
1351
- else:
1352
- # 记录系统指标
1353
- metrics = self._get_system_metrics()
1354
- log_extra = {'metrics': metrics}
1355
-
1356
- if extra:
1357
- log_extra.update(extra)
1358
-
1359
- getattr(self.logger, level.lower())(message, extra={'extra_data': log_extra})
1360
-
1361
1196
  def _create_connection_pool(self) -> PooledDB:
1362
1197
  """创建数据库连接池"""
1363
1198
  if hasattr(self, 'pool') and self.pool is not None and self._check_pool_health():
@@ -1386,7 +1221,7 @@ class MySQLUploader:
1386
1221
  required_keys = {'ca', 'cert', 'key'}
1387
1222
  if not all(k in self.ssl for k in required_keys):
1388
1223
  error_msg = "SSL配置必须包含ca、cert和key"
1389
- self._log_with_metrics('error', error_msg)
1224
+ logger.error(error_msg)
1390
1225
  raise ValueError(error_msg)
1391
1226
  pool_params['ssl'] = {
1392
1227
  'ca': self.ssl['ca'],
@@ -1398,17 +1233,15 @@ class MySQLUploader:
1398
1233
  try:
1399
1234
  pool = PooledDB(**pool_params)
1400
1235
  elapsed = time.time() - start_time
1401
- self._record_metrics('connection_pool_creation_time', elapsed, is_timing=True)
1402
- self._log_with_metrics('info', "连接池创建成功", {
1236
+ logger.info("连接池创建成功", {
1403
1237
  'pool_size': self.pool_size,
1404
1238
  'time_elapsed': elapsed
1405
1239
  })
1406
1240
  return pool
1407
1241
  except Exception as e:
1408
1242
  elapsed = time.time() - start_time
1409
- self._record_metrics('connection_pool_failures', 1)
1410
1243
  self.pool = None
1411
- self._log_with_metrics('error', "连接池创建失败", {
1244
+ logger.error("连接池创建失败", {
1412
1245
  'error': str(e),
1413
1246
  'time_elapsed': elapsed
1414
1247
  })
@@ -1421,7 +1254,7 @@ class MySQLUploader:
1421
1254
  start_time = time.time()
1422
1255
  operation = func.__name__
1423
1256
 
1424
- self._log_with_metrics('debug', f"开始执行操作: {operation}", {
1257
+ logger.debug(f"开始执行操作: {operation}", {
1425
1258
  'attempt': 1,
1426
1259
  'max_retries': self.max_retries
1427
1260
  })
@@ -1432,14 +1265,13 @@ class MySQLUploader:
1432
1265
  elapsed = time.time() - start_time
1433
1266
 
1434
1267
  if attempt > 0:
1435
- self._record_metrics('total_retries', attempt)
1436
- self._log_with_metrics('info', "操作成功(重试后)", {
1268
+ logger.info("操作成功(重试后)", {
1437
1269
  'operation': operation,
1438
1270
  'attempts': attempt + 1,
1439
1271
  'time_elapsed': elapsed
1440
1272
  })
1441
1273
  else:
1442
- self._log_with_metrics('debug', "操作成功", {
1274
+ logger.debug("操作成功", {
1443
1275
  'operation': operation,
1444
1276
  'time_elapsed': elapsed
1445
1277
  })
@@ -1448,7 +1280,6 @@ class MySQLUploader:
1448
1280
 
1449
1281
  except (pymysql.OperationalError, pymysql.err.MySQLError) as e:
1450
1282
  last_exception = e
1451
- self._record_metrics('database_errors', 1)
1452
1283
 
1453
1284
  # 记录详细的MySQL错误信息
1454
1285
  error_details = {
@@ -1462,26 +1293,25 @@ class MySQLUploader:
1462
1293
  if attempt < self.max_retries - 1:
1463
1294
  wait_time = self.retry_interval * (attempt + 1)
1464
1295
  error_details['wait_time'] = wait_time
1465
- self._log_with_metrics('warning', f"数据库操作失败,准备重试 {error_details}", )
1296
+ logger.warning(f"数据库操作失败,准备重试 {error_details}", )
1466
1297
  time.sleep(wait_time)
1467
1298
 
1468
1299
  # 尝试重新连接
1469
1300
  try:
1470
1301
  self.pool = self._create_connection_pool()
1471
- self._log_with_metrics('info', "成功重新建立数据库连接")
1302
+ logger.info("成功重新建立数据库连接")
1472
1303
  except Exception as reconnect_error:
1473
- self._log_with_metrics('error', "重连失败", {
1304
+ logger.error("重连失败", {
1474
1305
  'error': str(reconnect_error)
1475
1306
  })
1476
1307
  else:
1477
1308
  elapsed = time.time() - start_time
1478
1309
  error_details['time_elapsed'] = elapsed
1479
- self._log_with_metrics('error', f"操作最终失败 {error_details}")
1310
+ logger.error(f"操作最终失败 {error_details}")
1480
1311
 
1481
1312
  except pymysql.IntegrityError as e:
1482
1313
  elapsed = time.time() - start_time
1483
- self._record_metrics('integrity_errors', 1)
1484
- self._log_with_metrics('error', "完整性约束错误", {
1314
+ logger.error("完整性约束错误", {
1485
1315
  'operation': operation,
1486
1316
  'time_elapsed': elapsed,
1487
1317
  'error_code': e.args[0] if e.args else None,
@@ -1492,8 +1322,7 @@ class MySQLUploader:
1492
1322
  except Exception as e:
1493
1323
  last_exception = e
1494
1324
  elapsed = time.time() - start_time
1495
- self._record_metrics('unexpected_errors', 1)
1496
- self._log_with_metrics('error', "发生意外错误", {
1325
+ logger.error("发生意外错误", {
1497
1326
  'operation': operation,
1498
1327
  'time_elapsed': elapsed,
1499
1328
  'error_type': type(e).__name__,
@@ -1510,10 +1339,10 @@ class MySQLUploader:
1510
1339
  """从连接池获取连接"""
1511
1340
  try:
1512
1341
  conn = self.pool.connection()
1513
- self._log_with_metrics('debug', "获取数据库连接")
1342
+ logger.debug("获取数据库连接")
1514
1343
  return conn
1515
1344
  except Exception as e:
1516
- self._log_with_metrics("error", f'{e}')
1345
+ logger.error(f'{e}')
1517
1346
  raise ConnectionError(f"连接数据库失败: {str(e)}")
1518
1347
 
1519
1348
  def _check_database_exists(self, db_name: str) -> bool:
@@ -1526,10 +1355,10 @@ class MySQLUploader:
1526
1355
  with conn.cursor() as cursor:
1527
1356
  cursor.execute(sql, (db_name,))
1528
1357
  exists = bool(cursor.fetchone())
1529
- self._log_with_metrics('debug', f"{db_name} 数据库已存在: {exists}")
1358
+ logger.debug(f"{db_name} 数据库已存在: {exists}")
1530
1359
  return exists
1531
1360
  except Exception as e:
1532
- self._log_with_metrics('error', f"检查数据库是否存在时出错: {str(e)}")
1361
+ logger.error(f"检查数据库是否存在时出错: {str(e)}")
1533
1362
  raise
1534
1363
 
1535
1364
  def _create_database(self, db_name: str):
@@ -1542,9 +1371,9 @@ class MySQLUploader:
1542
1371
  with conn.cursor() as cursor:
1543
1372
  cursor.execute(sql)
1544
1373
  conn.commit()
1545
- self._log_with_metrics('info', f"{db_name} 数据库已创建")
1374
+ logger.info(f"{db_name} 数据库已创建")
1546
1375
  except Exception as e:
1547
- self._log_with_metrics('error', f"{db_name}: 无法创建数据库 {str(e)}")
1376
+ logger.error(f"{db_name}: 无法创建数据库 {str(e)}")
1548
1377
  conn.rollback()
1549
1378
  raise
1550
1379
 
@@ -1567,7 +1396,7 @@ class MySQLUploader:
1567
1396
  date_obj = self._validate_datetime(date_value, True)
1568
1397
  except ValueError:
1569
1398
  error_msg = f"无效的日期格式1: {date_value}"
1570
- self._log_with_metrics('error', error_msg)
1399
+ logger.error(error_msg)
1571
1400
  raise ValueError(error_msg)
1572
1401
 
1573
1402
  if partition_by == 'year':
@@ -1576,7 +1405,7 @@ class MySQLUploader:
1576
1405
  return f"{table_name}_{date_obj.year}_{date_obj.month:02d}"
1577
1406
  else:
1578
1407
  error_msg = "partition_by must be 'year' or 'month'"
1579
- self._log_with_metrics('error', error_msg)
1408
+ logger.error(error_msg)
1580
1409
  raise ValueError(error_msg)
1581
1410
 
1582
1411
  def _validate_identifier(self, identifier: str) -> str:
@@ -1590,14 +1419,14 @@ class MySQLUploader:
1590
1419
  """
1591
1420
  if not identifier or not isinstance(identifier, str):
1592
1421
  error_msg = f"无效的标识符: {identifier}"
1593
- self._log_with_metrics('error', error_msg)
1422
+ logger.error(error_msg)
1594
1423
  raise ValueError(error_msg)
1595
1424
 
1596
1425
  # 移除非法字符,只保留字母、数字、下划线和美元符号
1597
1426
  cleaned = re.sub(r'[^\w\u4e00-\u9fff$]', '', identifier)
1598
1427
  if not cleaned:
1599
1428
  error_msg = f"无法清理异常标识符: {identifier}"
1600
- self._log_with_metrics('error', error_msg)
1429
+ logger.error(error_msg)
1601
1430
  raise ValueError(error_msg)
1602
1431
 
1603
1432
  # 检查是否为MySQL保留字
@@ -1606,7 +1435,7 @@ class MySQLUploader:
1606
1435
  'not', 'like', 'in', 'is', 'null', 'true', 'false', 'between'
1607
1436
  }
1608
1437
  if cleaned.lower() in mysql_keywords:
1609
- self._log_with_metrics('debug', f"存在MySQL保留字: {cleaned}")
1438
+ logger.debug(f"存在MySQL保留字: {cleaned}")
1610
1439
  return f"`{cleaned}`"
1611
1440
 
1612
1441
  return cleaned
@@ -1633,7 +1462,7 @@ class MySQLUploader:
1633
1462
  cursor.execute(sql, (db_name, table_name))
1634
1463
  result = bool(cursor.fetchone())
1635
1464
  except Exception as e:
1636
- self._log_with_metrics('error', f"检查数据表是否存在时发生未知错误: {e}", )
1465
+ logger.error(f"检查数据表是否存在时发生未知错误: {e}", )
1637
1466
  raise
1638
1467
 
1639
1468
  # 执行查询并缓存结果
@@ -1665,7 +1494,7 @@ class MySQLUploader:
1665
1494
 
1666
1495
  if not set_typ:
1667
1496
  error_msg = "No columns specified for table creation"
1668
- self._log_with_metrics('error', error_msg)
1497
+ logger.error(error_msg)
1669
1498
  raise ValueError(error_msg)
1670
1499
 
1671
1500
  # 构建列定义SQL
@@ -1710,7 +1539,7 @@ class MySQLUploader:
1710
1539
  with self._get_connection() as conn:
1711
1540
  with conn.cursor() as cursor:
1712
1541
  cursor.execute(sql)
1713
- self._log_with_metrics('info', f"{db_name}.{table_name}: 数据表已创建")
1542
+ logger.info(f"{db_name}.{table_name}: 数据表已创建")
1714
1543
 
1715
1544
  # 添加普通索引
1716
1545
  index_statements = []
@@ -1736,13 +1565,13 @@ class MySQLUploader:
1736
1565
  with conn.cursor() as cursor:
1737
1566
  for stmt in index_statements:
1738
1567
  cursor.execute(stmt)
1739
- self._log_with_metrics('debug', f"Executed index statement: {stmt}", )
1568
+ logger.debug(f"Executed index statement: {stmt}", )
1740
1569
 
1741
1570
  conn.commit()
1742
- self._log_with_metrics('info', f"{db_name}.{table_name}: 索引已添加")
1571
+ logger.info(f"{db_name}.{table_name}: 索引已添加")
1743
1572
 
1744
1573
  except Exception as e:
1745
- self._log_with_metrics('error', f"{db_name}.{table_name}: 建表失败: {str(e)}")
1574
+ logger.error(f"{db_name}.{table_name}: 建表失败: {str(e)}")
1746
1575
  conn.rollback()
1747
1576
  raise
1748
1577
 
@@ -1812,7 +1641,7 @@ class MySQLUploader:
1812
1641
  return value
1813
1642
  except (ValueError, TypeError) as e:
1814
1643
  error_msg = f"数据类型转换异常 {value} to type {column_type}: {str(e)}"
1815
- self._log_with_metrics('error', error_msg)
1644
+ logger.error(error_msg)
1816
1645
  raise ValueError(error_msg)
1817
1646
 
1818
1647
  def _get_table_columns(self, db_name: str, table_name: str) -> Dict[str, str]:
@@ -1831,10 +1660,10 @@ class MySQLUploader:
1831
1660
  with conn.cursor() as cursor:
1832
1661
  cursor.execute(sql, (db_name, table_name))
1833
1662
  set_typ = {row['COLUMN_NAME']: row['DATA_TYPE'] for row in cursor.fetchall()}
1834
- self._log_with_metrics('debug', f"{db_name}.{table_name}: 获取表的列信息: {set_typ}")
1663
+ logger.debug(f"{db_name}.{table_name}: 获取表的列信息: {set_typ}")
1835
1664
  return set_typ
1836
1665
  except Exception as e:
1837
- self._log_with_metrics('error', f"无法获取表列信息: {str(e)}")
1666
+ logger.error(f"无法获取表列信息: {str(e)}")
1838
1667
  raise
1839
1668
 
1840
1669
  def _upload_to_table(
@@ -1860,21 +1689,21 @@ class MySQLUploader:
1860
1689
  allow_null=allow_null)
1861
1690
  else:
1862
1691
  error_msg = f"数据表不存在: '{db_name}.{table_name}'"
1863
- self._log_with_metrics('error', error_msg)
1692
+ logger.error(error_msg)
1864
1693
  raise ValueError(error_msg)
1865
1694
 
1866
1695
  # 获取表结构并验证
1867
1696
  table_columns = self._get_table_columns(db_name, table_name)
1868
1697
  if not table_columns:
1869
1698
  error_msg = f"获取列失败 '{db_name}.{table_name}'"
1870
- self._log_with_metrics('error', error_msg)
1699
+ logger.error(error_msg)
1871
1700
  raise ValueError(error_msg)
1872
1701
 
1873
1702
  # 验证数据列与表列匹配
1874
1703
  for col in set_typ:
1875
1704
  if col not in table_columns:
1876
1705
  error_msg = f"列不存在: '{col}' -> '{db_name}.{table_name}'"
1877
- self._log_with_metrics('error', error_msg)
1706
+ logger.error(error_msg)
1878
1707
  raise ValueError(error_msg)
1879
1708
 
1880
1709
  # 插入数据
@@ -1957,7 +1786,7 @@ class MySQLUploader:
1957
1786
  data.columns = [col.lower() for col in data.columns]
1958
1787
  data = data.replace({pd.NA: None}).to_dict('records')
1959
1788
  except Exception as e:
1960
- self._log_with_metrics("error", f"数据转字典时发生错误: {e}", )
1789
+ logger.error(f"数据转字典时发生错误: {e}", )
1961
1790
  raise ValueError(f"数据转字典时发生错误: {e}")
1962
1791
  elif isinstance(data, dict):
1963
1792
  data = [{k.lower(): v for k, v in data.items()}]
@@ -1966,7 +1795,7 @@ class MySQLUploader:
1966
1795
  data = [{k.lower(): v for k, v in item.items()} for item in data]
1967
1796
  else:
1968
1797
  error_msg = "数据结构必须是字典、列表、字典列表或dataframe"
1969
- self._log_with_metrics('error', error_msg)
1798
+ logger.error(error_msg)
1970
1799
  raise ValueError(error_msg)
1971
1800
 
1972
1801
  # 将set_typ的键转为小写
@@ -1988,11 +1817,11 @@ class MySQLUploader:
1988
1817
  if sample_values:
1989
1818
  inferred_type = self._infer_data_type(sample_values[0])
1990
1819
  filtered_set_typ[col] = inferred_type
1991
- self._log_with_metrics('debug', f"自动推断列'{col}'的数据类型为: {inferred_type}")
1820
+ logger.debug(f"自动推断列'{col}'的数据类型为: {inferred_type}")
1992
1821
  else:
1993
1822
  # 没有样本值,使用默认类型
1994
1823
  filtered_set_typ[col] = 'VARCHAR(255)'
1995
- self._log_with_metrics('debug', f"为列'{col}'使用默认数据类型: VARCHAR(255)")
1824
+ logger.debug(f"为列'{col}'使用默认数据类型: VARCHAR(255)")
1996
1825
 
1997
1826
  prepared_data = []
1998
1827
  for row_idx, row in enumerate(data, 1):
@@ -2005,7 +1834,7 @@ class MySQLUploader:
2005
1834
  if col_name not in row:
2006
1835
  if not allow_null:
2007
1836
  error_msg = f"Row {row_idx}: Missing required column '{col_name}' in data"
2008
- self._log_with_metrics('error', error_msg)
1837
+ logger.error(error_msg)
2009
1838
  raise ValueError(error_msg)
2010
1839
  prepared_row[col_name] = None
2011
1840
  else:
@@ -2013,11 +1842,11 @@ class MySQLUploader:
2013
1842
  prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name])
2014
1843
  except ValueError as e:
2015
1844
  error_msg = f"Row {row_idx}, column '{col_name}': {str(e)}"
2016
- self._log_with_metrics('error', error_msg)
1845
+ logger.error(error_msg)
2017
1846
  raise ValueError(error_msg)
2018
1847
  prepared_data.append(prepared_row)
2019
1848
 
2020
- self._log_with_metrics('debug', f"已准备 {len(prepared_data)} 行数据")
1849
+ logger.debug(f"已准备 {len(prepared_data)} 行数据")
2021
1850
  return prepared_data, filtered_set_typ
2022
1851
 
2023
1852
  def upload_data(
@@ -2039,14 +1868,12 @@ class MySQLUploader:
2039
1868
  上传数据到数据库
2040
1869
  """
2041
1870
  upload_start = time.time()
2042
- self._record_metrics('total_uploads', 1)
2043
1871
  initial_row_count = len(data) if hasattr(data, '__len__') else 1
2044
- self.metrics['total_rows'] += len(data) if hasattr(data, '__len__') else 1
2045
1872
 
2046
1873
  batch_id = f"batch_{int(time.time() * 1000)}"
2047
1874
  success_flag = False
2048
1875
 
2049
- self._log_with_metrics('info', "开始上传数据", {
1876
+ logger.info("开始上传数据", {
2050
1877
  'batch_id': batch_id,
2051
1878
  'database': db_name,
2052
1879
  'table': table_name,
@@ -2060,12 +1887,12 @@ class MySQLUploader:
2060
1887
  # 验证参数
2061
1888
  if not set_typ:
2062
1889
  error_msg = "列的数据类型缺失"
2063
- self._log_with_metrics('error', error_msg)
1890
+ logger.error(error_msg)
2064
1891
  raise ValueError(error_msg)
2065
1892
 
2066
1893
  if partition_by and partition_by not in ['year', 'month']:
2067
1894
  error_msg = "分表方式必须是 'year' 或 'month'"
2068
- self._log_with_metrics('error', error_msg)
1895
+ logger.error(error_msg)
2069
1896
  raise ValueError(error_msg)
2070
1897
 
2071
1898
  # 准备数据
@@ -2077,7 +1904,7 @@ class MySQLUploader:
2077
1904
  self._create_database(db_name)
2078
1905
  else:
2079
1906
  error_msg = f"数据库不存在: '{db_name}'"
2080
- self._log_with_metrics('error', error_msg)
1907
+ logger.error(error_msg)
2081
1908
  raise ValueError(error_msg)
2082
1909
 
2083
1910
  # 处理分表逻辑
@@ -2087,7 +1914,7 @@ class MySQLUploader:
2087
1914
  try:
2088
1915
  if partition_date_column not in row:
2089
1916
  error_msg = f"异常缺失列 '{partition_date_column}'"
2090
- self._log_with_metrics('error', error_msg)
1917
+ logger.error(error_msg)
2091
1918
  continue # 跳过当前行
2092
1919
 
2093
1920
  part_table = self._get_partition_table_name(
@@ -2099,7 +1926,7 @@ class MySQLUploader:
2099
1926
  partitioned_data[part_table] = []
2100
1927
  partitioned_data[part_table].append(row)
2101
1928
  except Exception as e:
2102
- self._log_with_metrics('error', "分表处理失败", {
1929
+ logger.error("分表处理失败", {
2103
1930
  'row_data': row,
2104
1931
  'error': str(e)
2105
1932
  })
@@ -2115,7 +1942,7 @@ class MySQLUploader:
2115
1942
  indexes, batch_id
2116
1943
  )
2117
1944
  except Exception as e:
2118
- self._log_with_metrics('error', "分表上传失败", {
1945
+ logger.error("分表上传失败", {
2119
1946
  'partition_table': part_table,
2120
1947
  'error': str(e)
2121
1948
  })
@@ -2132,25 +1959,17 @@ class MySQLUploader:
2132
1959
  success_flag = True
2133
1960
 
2134
1961
  except Exception as e:
2135
- self._log_with_metrics('error', "上传过程中发生全局错误", {
1962
+ logger.error("上传过程中发生全局错误", {
2136
1963
  'error': str(e),
2137
1964
  'error_type': type(e).__name__
2138
1965
  })
2139
1966
  finally:
2140
1967
  elapsed = time.time() - upload_start
2141
- self._record_metrics('upload_execution_time', elapsed, is_timing=True)
2142
-
2143
- if success_flag:
2144
- self._record_metrics('successful_uploads', 1)
2145
- else:
2146
- self._record_metrics('failed_uploads', 1)
2147
-
2148
- self._log_with_metrics('info', "上传处理完成", {
1968
+ logger.info("上传处理完成", {
2149
1969
  'batch_id': batch_id,
2150
1970
  'success': success_flag,
2151
1971
  'time_elapsed': elapsed,
2152
- 'initial_row_count': initial_row_count,
2153
- 'processed_rows': self.metrics['successful_rows'] + self.metrics['failed_rows']
1972
+ 'initial_row_count': initial_row_count
2154
1973
  })
2155
1974
 
2156
1975
  def _insert_data(
@@ -2263,7 +2082,7 @@ class MySQLUploader:
2263
2082
  'duplicate_check': check_duplicate,
2264
2083
  'duplicate_columns': duplicate_columns
2265
2084
  }
2266
- self._log_with_metrics('error', f"单行插入失败: {error_details}")
2085
+ logger.error(f"单行插入失败: {error_details}")
2267
2086
  continue # 跳过当前行,继续处理下一行
2268
2087
 
2269
2088
  # 更新统计信息
@@ -2276,8 +2095,6 @@ class MySQLUploader:
2276
2095
  total_inserted += successful_rows
2277
2096
 
2278
2097
  batch_elapsed = time.time() - batch_start
2279
- self._record_metrics('batch_execution_time', batch_elapsed, is_timing=True)
2280
-
2281
2098
  batch_info = {
2282
2099
  'batch_id': batch_id,
2283
2100
  'batch_index': i // batch_size + 1,
@@ -2288,68 +2105,42 @@ class MySQLUploader:
2288
2105
  'time_elapsed': batch_elapsed,
2289
2106
  'rows_per_second': successful_rows / batch_elapsed if batch_elapsed > 0 else 0
2290
2107
  }
2291
- self._log_with_metrics('debug', f"批次处理完成 {batch_info}")
2108
+ logger.debug(f"批次处理完成 {batch_info}")
2292
2109
 
2293
- # 更新全局指标
2294
- self.metrics['failed_rows'] += total_failed
2295
- self._log_with_metrics('info', "数据插入完成", {
2110
+ logger.info("数据插入完成", {
2296
2111
  'total_rows': len(data),
2297
2112
  'inserted_rows': total_inserted,
2298
2113
  'skipped_rows': total_skipped,
2299
2114
  'failed_rows': total_failed
2300
2115
  })
2301
2116
 
2302
- def get_metrics(self) -> Dict:
2303
- """获取当前性能指标"""
2304
- metrics = self.metrics.copy()
2305
-
2306
- # 添加当前系统指标
2307
- metrics.update({
2308
- 'current_time': datetime.datetime.now().isoformat(),
2309
- 'system': self._get_system_metrics(),
2310
- 'connection_pool': {
2311
- 'size': self.pool_size,
2312
- 'active': len(self.pool._connections) if hasattr(self.pool, '_connections') else 0
2313
- }
2314
- })
2315
-
2316
- return metrics
2317
-
2318
2117
  def close(self):
2319
2118
  """关闭连接池并记录最终指标"""
2320
2119
  close_start = time.time()
2321
2120
 
2322
2121
  try:
2323
2122
  if hasattr(self, 'pool') and self.pool is not None:
2324
- # 记录关闭前的连接池状态
2325
- active_connections = self._get_system_metrics().get('connections', 0)
2326
-
2327
2123
  # 更安全的关闭方式
2328
2124
  try:
2329
2125
  self.pool.close()
2330
2126
  except Exception as e:
2331
- self._log_with_metrics('warning', "关闭连接池时出错", {
2127
+ logger.warning("关闭连接池时出错", {
2332
2128
  'error': str(e)
2333
2129
  })
2334
2130
 
2335
2131
  self.pool = None
2336
2132
 
2337
- elapsed = time.time() - close_start
2338
- self._log_with_metrics('info', "连接池已关闭", {
2339
- 'active_connections_before_close': active_connections,
2133
+ elapsed = round(time.time() - close_start, 2)
2134
+ logger.info("连接池已关闭", {
2340
2135
  'close_time_elapsed': elapsed
2341
2136
  })
2342
2137
  except Exception as e:
2343
- elapsed = time.time() - close_start
2344
- self._log_with_metrics('error', "关闭连接池失败", {
2138
+ elapsed = round(time.time() - close_start, 2)
2139
+ logger.error("关闭连接池失败", {
2345
2140
  'error': str(e),
2346
2141
  'close_time_elapsed': elapsed
2347
2142
  })
2348
2143
  raise
2349
- finally:
2350
- # 记录最终性能指标
2351
- if hasattr(self, 'logger') and self.logger and self.enable_metrics:
2352
- self._log_with_metrics('debug', "最终性能指标", self.get_metrics())
2353
2144
 
2354
2145
  def _check_pool_health(self):
2355
2146
  """定期检查连接池健康状态"""
@@ -2358,8 +2149,8 @@ class MySQLUploader:
2358
2149
  conn.ping(reconnect=True)
2359
2150
  conn.close()
2360
2151
  return True
2361
- except Exception:
2362
- self._log_with_metrics('warning', "连接池健康检查失败", {
2152
+ except Exception as e:
2153
+ logger.warning("连接池健康检查失败", {
2363
2154
  'error': str(e)
2364
2155
  })
2365
2156
  return False
@@ -2428,9 +2219,6 @@ class MySQLDeduplicator:
2428
2219
  max_workers: int = 1,
2429
2220
  batch_size: int = 1000,
2430
2221
  skip_system_dbs: bool = True,
2431
- logging_mode: str = 'console',
2432
- log_level: str = 'INFO',
2433
- log_file: str = 'mysql_deduplicate.log',
2434
2222
  max_retries: int = 3,
2435
2223
  retry_interval: int = 5,
2436
2224
  pool_size: int = 5
@@ -2446,9 +2234,6 @@ class MySQLDeduplicator:
2446
2234
  :param max_workers: 最大工作线程数,默认为1(单线程)
2447
2235
  :param batch_size: 批量处理大小,默认为1000
2448
2236
  :param skip_system_dbs: 是否跳过系统数据库,默认为True
2449
- :param logging_mode: 日志模式('console', 'file', 'both', 'none')
2450
- :param log_level: 日志级别('DEBUG', 'INFO', 'WARNING', 'ERROR')
2451
- :param log_file: 日志文件路径
2452
2237
  :param max_retries: 最大重试次数
2453
2238
  :param retry_interval: 重试间隔(秒)
2454
2239
  :param pool_size: 连接池大小
@@ -2476,69 +2261,20 @@ class MySQLDeduplicator:
2476
2261
  self._lock = threading.Lock()
2477
2262
  self._processing_tables = set() # 正在处理的表集合
2478
2263
 
2479
- # 初始化日志
2480
- self._init_logging(logging_mode, log_level, log_file)
2481
-
2482
2264
  # 系统数据库列表
2483
2265
  self.SYSTEM_DATABASES = {
2484
2266
  'information_schema', 'mysql',
2485
2267
  'performance_schema', 'sys'
2486
2268
  }
2487
2269
 
2488
- def _init_logging(
2489
- self,
2490
- logging_mode: str,
2491
- log_level: str,
2492
- log_file: str
2493
- ):
2494
- """初始化日志配置"""
2495
- self.logger = logging.getLogger('mysql_deduplicator')
2496
- self.logger.setLevel(log_level.upper())
2497
-
2498
- # 防止重复添加handler
2499
- if self.logger.handlers:
2500
- for handler in self.logger.handlers[:]:
2501
- self.logger.removeHandler(handler)
2502
-
2503
- formatter = logging.Formatter(
2504
- '%(asctime)s - %(levelname)s - %(message)s',
2505
- datefmt='%Y-%m-%d %H:%M:%S'
2506
- )
2507
-
2508
- mode = logging_mode.lower()
2509
- if mode in ('both', 'console'):
2510
- console_handler = logging.StreamHandler()
2511
- console_handler.setFormatter(formatter)
2512
- self.logger.addHandler(console_handler)
2513
-
2514
- if mode in ('both', 'file'):
2515
- file_handler = logging.FileHandler(
2516
- filename=log_file,
2517
- encoding='utf-8'
2518
- )
2519
- file_handler.setFormatter(formatter)
2520
- self.logger.addHandler(file_handler)
2521
-
2522
- def _log(self, level: str, message: str, extra: Optional[Dict] = None):
2523
- """统一的日志记录方法"""
2524
- if not hasattr(self.logger, level.lower()):
2525
- return
2526
-
2527
- # 简化日志内容,避免过长
2528
- if len(message) > 500:
2529
- message = message[:500] + '...'
2530
-
2531
- log_method = getattr(self.logger, level.lower())
2532
- log_method(message, extra=extra)
2533
-
2534
2270
  def _get_connection(self):
2535
2271
  """从连接池获取连接"""
2536
2272
  try:
2537
2273
  conn = self.pool.connection()
2538
- self._log('debug', "成功获取数据库连接")
2274
+ logger.debug("成功获取数据库连接")
2539
2275
  return conn
2540
2276
  except Exception as e:
2541
- self._log('error', f"获取数据库连接失败: {str(e)}")
2277
+ logger.error(f"获取数据库连接失败: {str(e)}")
2542
2278
  raise ConnectionError(f"连接数据库失败: {str(e)}")
2543
2279
 
2544
2280
  @staticmethod
@@ -2555,16 +2291,14 @@ class MySQLDeduplicator:
2555
2291
  last_exception = e
2556
2292
  if attempt < self.max_retries:
2557
2293
  wait_time = self.retry_interval * (attempt + 1)
2558
- self._log('warning',
2559
- f"数据库操作失败,准备重试 (尝试 {attempt + 1}/{self.max_retries})",
2560
- {'error': str(e), 'wait_time': wait_time})
2294
+ logger.warning(
2295
+ f"数据库操作失败,准备重试 (尝试 {attempt + 1}/{self.max_retries})",
2296
+ {'error': str(e), 'wait_time': wait_time})
2561
2297
  time.sleep(wait_time)
2562
2298
  continue
2563
2299
  except Exception as e:
2564
2300
  last_exception = e
2565
- self._log('error',
2566
- f"操作失败: {str(e)}",
2567
- {'error_type': type(e).__name__})
2301
+ logger.error(f"操作失败: {str(e)}", {'error_type': type(e).__name__})
2568
2302
  break
2569
2303
 
2570
2304
  if last_exception:
@@ -2620,7 +2354,7 @@ class MySQLDeduplicator:
2620
2354
 
2621
2355
  with self._lock:
2622
2356
  if key in self._processing_tables:
2623
- self._log('debug', f"表 {key} 正在被其他线程处理,跳过")
2357
+ logger.debug(f"表 {key} 正在被其他线程处理,跳过")
2624
2358
  return False
2625
2359
  self._processing_tables.add(key)
2626
2360
  return True
@@ -2653,12 +2387,12 @@ class MySQLDeduplicator:
2653
2387
  return (0, 0)
2654
2388
 
2655
2389
  try:
2656
- self._log('info', f"开始处理表: {database}.{table}")
2390
+ logger.info(f"开始处理表: {database}.{table}")
2657
2391
 
2658
2392
  # 获取实际列名
2659
2393
  all_columns = self._get_table_columns(database, table)
2660
2394
  if not all_columns:
2661
- self._log('warning', f"表 {database}.{table} 没有有效列(可能只有id列),跳过")
2395
+ logger.warning(f"表 {database}.{table} 没有有效列(可能只有id列),跳过")
2662
2396
  return (0, 0)
2663
2397
 
2664
2398
  # 使用指定列或所有列
@@ -2666,14 +2400,14 @@ class MySQLDeduplicator:
2666
2400
  invalid_columns = set(use_columns) - set(all_columns)
2667
2401
 
2668
2402
  if invalid_columns:
2669
- self._log('warning',
2670
- f"表 {database}.{table} 中不存在以下列: {invalid_columns},使用有效列",
2671
- {'invalid_columns': invalid_columns}
2672
- )
2403
+ logger.warning(
2404
+ f"表 {database}.{table} 中不存在以下列: {invalid_columns},使用有效列",
2405
+ {'invalid_columns': invalid_columns}
2406
+ )
2673
2407
  use_columns = [col for col in use_columns if col in all_columns]
2674
2408
 
2675
2409
  if not use_columns:
2676
- self._log('error', f"表 {database}.{table} 没有有效的去重列")
2410
+ logger.error(f"表 {database}.{table} 没有有效的去重列")
2677
2411
  return (0, 0)
2678
2412
 
2679
2413
  # 构建去重SQL
@@ -2706,31 +2440,31 @@ class MySQLDeduplicator:
2706
2440
  dup_count = cursor.fetchone()['cnt']
2707
2441
 
2708
2442
  if dup_count == 0:
2709
- self._log('info', f"表 {database}.{table} 没有重复数据")
2443
+ logger.info(f"表 {database}.{table} 没有重复数据")
2710
2444
  cursor.execute(drop_temp_sql)
2711
2445
  conn.commit()
2712
2446
  return (0, 0)
2713
2447
 
2714
- self._log('info',
2715
- f"表 {database}.{table} 发现 {dup_count} 组重复数据",
2716
- {'columns': use_columns}
2717
- )
2448
+ logger.info(
2449
+ f"表 {database}.{table} 发现 {dup_count} 组重复数据",
2450
+ {'columns': use_columns}
2451
+ )
2718
2452
 
2719
2453
  if not dry_run:
2720
2454
  # 执行实际删除
2721
2455
  cursor.execute(delete_dup_sql)
2722
2456
  affected_rows = cursor.rowcount
2723
2457
  conn.commit()
2724
- self._log('info',
2725
- f"表 {database}.{table} 已删除 {affected_rows} 行重复数据",
2726
- {'columns': use_columns}
2727
- )
2458
+ logger.info(
2459
+ f"表 {database}.{table} 已删除 {affected_rows} 行重复数据",
2460
+ {'columns': use_columns}
2461
+ )
2728
2462
  else:
2729
2463
  affected_rows = 0
2730
- self._log('info',
2731
- f"[模拟运行] 表 {database}.{table} 将删除 {dup_count} 组重复数据",
2732
- {'columns': use_columns}
2733
- )
2464
+ logger.info(
2465
+ f"[模拟运行] 表 {database}.{table} 将删除 {dup_count} 组重复数据",
2466
+ {'columns': use_columns}
2467
+ )
2734
2468
 
2735
2469
  # 清理临时表
2736
2470
  cursor.execute(drop_temp_sql)
@@ -2739,10 +2473,10 @@ class MySQLDeduplicator:
2739
2473
  return (dup_count, affected_rows)
2740
2474
 
2741
2475
  except Exception as e:
2742
- self._log('error',
2743
- f"处理表 {database}.{table} 时出错: {str(e)}",
2744
- {'error_type': type(e).__name__}
2745
- )
2476
+ logger.error(
2477
+ f"处理表 {database}.{table} 时出错: {str(e)}",
2478
+ {'error_type': type(e).__name__}
2479
+ )
2746
2480
  return (0, 0)
2747
2481
  finally:
2748
2482
  self._release_table_lock(database, table)
@@ -2766,15 +2500,15 @@ class MySQLDeduplicator:
2766
2500
  try:
2767
2501
  # 检查表是否存在
2768
2502
  if not self._check_table_exists(database, table):
2769
- self._log('warning', f"表 {database}.{table} 不存在,跳过")
2503
+ logger.warning(f"表 {database}.{table} 不存在,跳过")
2770
2504
  return (0, 0)
2771
2505
 
2772
2506
  return self._deduplicate_table(database, table, columns, dry_run)
2773
2507
  except Exception as e:
2774
- self._log('error',
2775
- f"处理表 {database}.{table} 时发生全局错误: {str(e)}",
2776
- {'error_type': type(e).__name__}
2777
- )
2508
+ logger.error(
2509
+ f"处理表 {database}.{table} 时发生全局错误: {str(e)}",
2510
+ {'error_type': type(e).__name__}
2511
+ )
2778
2512
  return (0, 0)
2779
2513
 
2780
2514
  def deduplicate_database(
@@ -2800,19 +2534,19 @@ class MySQLDeduplicator:
2800
2534
  try:
2801
2535
  # 检查数据库是否存在
2802
2536
  if not self._check_database_exists(database):
2803
- self._log('warning', f"数据库 {database} 不存在,跳过")
2537
+ logger.warning(f"数据库 {database} 不存在,跳过")
2804
2538
  return results
2805
2539
 
2806
2540
  # 获取要处理的表
2807
2541
  target_tables = tables or self._get_tables(database)
2808
2542
  if not target_tables:
2809
- self._log('info', f"数据库 {database} 中没有表,跳过")
2543
+ logger.info(f"数据库 {database} 中没有表,跳过")
2810
2544
  return results
2811
2545
 
2812
- self._log('info',
2813
- f"开始处理数据库 {database} 中的 {len(target_tables)} 张表",
2814
- {'tables': target_tables}
2815
- )
2546
+ logger.info(
2547
+ f"开始处理数据库 {database} 中的 {len(target_tables)} 张表",
2548
+ {'tables': target_tables}
2549
+ )
2816
2550
 
2817
2551
  if parallel and self.max_workers > 1:
2818
2552
  # 并行处理
@@ -2833,10 +2567,10 @@ class MySQLDeduplicator:
2833
2567
  dup_count, affected_rows = future.result()
2834
2568
  results[table] = (dup_count, affected_rows)
2835
2569
  except Exception as e:
2836
- self._log('error',
2837
- f"处理表 {database}.{table} 时出错: {str(e)}",
2838
- {'error_type': type(e).__name__}
2839
- )
2570
+ logger.error(
2571
+ f"处理表 {database}.{table} 时出错: {str(e)}",
2572
+ {'error_type': type(e).__name__}
2573
+ )
2840
2574
  results[table] = (0, 0)
2841
2575
  else:
2842
2576
  # 串行处理
@@ -2851,18 +2585,15 @@ class MySQLDeduplicator:
2851
2585
  total_dup = sum(r[0] for r in results.values())
2852
2586
  total_del = sum(r[1] for r in results.values())
2853
2587
 
2854
- self._log('info',
2855
- f"数据库 {database} 处理完成 - 共发现 {total_dup} 组重复数据,删除 {total_del} 行",
2856
- {'results': results}
2857
- )
2588
+ logger.info(
2589
+ f"数据库 {database} 处理完成 - 共发现 {total_dup} 组重复数据,删除 {total_del} 行",
2590
+ {'results': results}
2591
+ )
2858
2592
 
2859
2593
  return results
2860
2594
 
2861
2595
  except Exception as e:
2862
- self._log('error',
2863
- f"处理数据库 {database} 时发生全局错误: {str(e)}",
2864
- {'error_type': type(e).__name__}
2865
- )
2596
+ logger.error(f"处理数据库 {database} 时发生全局错误: {str(e)}", {'error_type': type(e).__name__})
2866
2597
  return results
2867
2598
 
2868
2599
  def deduplicate_all(
@@ -2889,13 +2620,10 @@ class MySQLDeduplicator:
2889
2620
  # 获取要处理的数据库
2890
2621
  target_dbs = databases or self._get_databases()
2891
2622
  if not target_dbs:
2892
- self._log('warning', "没有可处理的数据库")
2623
+ logger.warning("没有可处理的数据库")
2893
2624
  return all_results
2894
2625
 
2895
- self._log('info',
2896
- f"开始处理 {len(target_dbs)} 个数据库",
2897
- {'databases': target_dbs}
2898
- )
2626
+ logger.info(f"开始处理 {len(target_dbs)} 个数据库", {'databases': target_dbs})
2899
2627
 
2900
2628
  if parallel and self.max_workers > 1:
2901
2629
  # 并行处理数据库
@@ -2917,10 +2645,7 @@ class MySQLDeduplicator:
2917
2645
  db_results = future.result()
2918
2646
  all_results[db] = db_results
2919
2647
  except Exception as e:
2920
- self._log('error',
2921
- f"处理数据库 {db} 时出错: {str(e)}",
2922
- {'error_type': type(e).__name__}
2923
- )
2648
+ logger.error(f"处理数据库 {db} 时出错: {str(e)}", {'error_type': type(e).__name__})
2924
2649
  all_results[db] = {}
2925
2650
  else:
2926
2651
  # 串行处理数据库
@@ -2942,18 +2667,15 @@ class MySQLDeduplicator:
2942
2667
  for r in db.values()
2943
2668
  )
2944
2669
 
2945
- self._log('info',
2946
- f"所有数据库处理完成 - 共发现 {total_dup} 组重复数据,删除 {total_del} 行",
2947
- {'total_results': all_results}
2948
- )
2670
+ logger.info(
2671
+ f"所有数据库处理完成 - 共发现 {total_dup} 组重复数据,删除 {total_del} 行",
2672
+ {'total_results': all_results}
2673
+ )
2949
2674
 
2950
2675
  return all_results
2951
2676
 
2952
2677
  except Exception as e:
2953
- self._log('error',
2954
- f"全局处理时发生错误: {str(e)}",
2955
- {'error_type': type(e).__name__}
2956
- )
2678
+ logger.error(f"全局处理时发生错误: {str(e)}", {'error_type': type(e).__name__})
2957
2679
  return all_results
2958
2680
 
2959
2681
  @_retry_on_failure
@@ -2985,12 +2707,9 @@ class MySQLDeduplicator:
2985
2707
  try:
2986
2708
  if hasattr(self, 'pool') and self.pool:
2987
2709
  self.pool.close()
2988
- self._log('info', "数据库连接池已关闭")
2710
+ logger.info("数据库连接池已关闭")
2989
2711
  except Exception as e:
2990
- self._log('error',
2991
- f"关闭连接池时出错: {str(e)}",
2992
- {'error_type': type(e).__name__}
2993
- )
2712
+ logger.error(f"关闭连接池时出错: {str(e)}", {'error_type': type(e).__name__})
2994
2713
  finally:
2995
2714
  self.pool = None
2996
2715
 
@@ -3004,11 +2723,9 @@ class MySQLDeduplicator:
3004
2723
  def main():
3005
2724
  uploader = MySQLUploader(
3006
2725
  username='root',
3007
- password='188988yang188',
2726
+ password='pw',
3008
2727
  host='localhost',
3009
2728
  port=3306,
3010
- logging_mode='console',
3011
- log_level='info'
3012
2729
  )
3013
2730
 
3014
2731
  # 定义列和数据类型
@@ -3050,24 +2767,22 @@ def main():
3050
2767
  def main2():
3051
2768
  deduplicator = MySQLDeduplicator(
3052
2769
  username='root',
3053
- password='1',
2770
+ password='pw',
3054
2771
  host='localhost',
3055
2772
  port=3306
3056
2773
  )
3057
2774
 
3058
- # # 全库去重(单线程)
3059
- # deduplicator.deduplicate_all()
2775
+ # 全库去重(单线程)
2776
+ deduplicator.deduplicate_all()
3060
2777
 
3061
2778
  # # 指定数据库去重(多线程)
3062
2779
  # deduplicator.deduplicate_database('my_db', parallel=True)
3063
2780
 
3064
- # 指定表去重(使用特定列)
3065
- deduplicator.deduplicate_table('my_db', 'my_table', columns=['name', 'date'])
2781
+ # # 指定表去重(使用特定列)
2782
+ # deduplicator.deduplicate_table('my_db', 'my_table', columns=['name', 'date'])
3066
2783
 
3067
2784
  # 关闭连接
3068
2785
  deduplicator.close()
3069
2786
 
3070
2787
  if __name__ == '__main__':
3071
2788
  pass
3072
-
3073
- main2()