mdbq 3.9.2__py3-none-any.whl → 3.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.9.2'
1
+ VERSION = '3.9.3'
mdbq/mysql/mysql.py CHANGED
@@ -15,7 +15,8 @@ from mdbq.other import otk
15
15
  from typing import Union, List, Dict, Optional, Any, Tuple
16
16
  from dbutils.pooled_db import PooledDB
17
17
  import json
18
- import psutil # 用于监控资源使用情况
18
+ import psutil
19
+ from collections import OrderedDict
19
20
 
20
21
 
21
22
  warnings.filterwarnings('ignore')
@@ -1111,6 +1112,18 @@ class OptimizeDatas:
1111
1112
  self.connection.close()
1112
1113
 
1113
1114
 
1115
+ class StatementCache(OrderedDict):
1116
+ """LRU缓存策略"""
1117
+ def __init__(self, maxsize=100):
1118
+ super().__init__()
1119
+ self.maxsize = maxsize
1120
+
1121
+ def __setitem__(self, key, value):
1122
+ super().__setitem__(key, value)
1123
+ if len(self) > self.maxsize:
1124
+ self.popitem(last=False)
1125
+
1126
+
1114
1127
  class MySQLUploader:
1115
1128
  def __init__(
1116
1129
  self,
@@ -1119,7 +1132,7 @@ class MySQLUploader:
1119
1132
  host: str = 'localhost',
1120
1133
  port: int = 3306,
1121
1134
  charset: str = 'utf8mb4',
1122
- collation: str = 'utf8mb4_0900_ai_ci',
1135
+ collation: str = 'utf8mb4_0900_ai_ci', # utf8mb4_0900_ai_ci: 该排序规则对大小写不敏感, utf8mb4_0900_as_cs/utf8mb4_bin: 对大小写敏感
1123
1136
  logging_mode: str = 'console', # 'both'(控制台+文件), 'console'(仅控制台), 'file'(仅文件), 'none'(禁用)
1124
1137
  log_level: str = 'INFO', # 默认日志级别
1125
1138
  log_file: str = 'mysql_upload.log', # 日志文件路径
@@ -1127,7 +1140,7 @@ class MySQLUploader:
1127
1140
  backup_count: int = 5, # 保留的日志文件数量
1128
1141
  max_retries: int = 10,
1129
1142
  retry_interval: int = 10,
1130
- pool_size: int = 10,
1143
+ pool_size: int = 5,
1131
1144
  connect_timeout: int = 10,
1132
1145
  read_timeout: int = 30,
1133
1146
  write_timeout: int = 30,
@@ -1168,7 +1181,7 @@ class MySQLUploader:
1168
1181
  self.read_timeout = read_timeout
1169
1182
  self.write_timeout = write_timeout
1170
1183
  self.ssl = ssl
1171
- self._prepared_statements = {}
1184
+ self._prepared_statements = StatementCache(maxsize=100)
1172
1185
  self._max_cached_statements = 100
1173
1186
  self.enable_metrics = enable_metrics
1174
1187
  self.metrics = {
@@ -1184,6 +1197,11 @@ class MySQLUploader:
1184
1197
  'memory_usage': [],
1185
1198
  'cpu_usage': []
1186
1199
  }
1200
+ self._last_metrics_time = 0
1201
+ self._metrics_cache = {} # 缓存最近一次的系统指标
1202
+ self.metrics_interval = 30 # 指标采集频率控制
1203
+ self._table_metadata_cache = {} # 元信息缓存
1204
+ self.metadata_cache_ttl = 300 # 元信息缓存频率控制
1187
1205
 
1188
1206
  # 初始化日志系统
1189
1207
  self._init_logging(logging_mode, log_level, log_file, max_log_size, backup_count)
@@ -1224,6 +1242,10 @@ class MySQLUploader:
1224
1242
  if record.exc_info:
1225
1243
  log_data['exception'] = self.formatException(record.exc_info)
1226
1244
 
1245
+ # 过滤敏感信息
1246
+ if hasattr(record, 'password'):
1247
+ log_data['message'] = log_data['message'].replace(self.password, '***')
1248
+
1227
1249
  return json.dumps(log_data, ensure_ascii=False)
1228
1250
 
1229
1251
  # 创建日志记录器
@@ -1259,6 +1281,11 @@ class MySQLUploader:
1259
1281
  if not self.enable_metrics:
1260
1282
  return
1261
1283
 
1284
+ # 对于频繁调用的指标,使用更高效的数据结构
1285
+ if metric_name in ('total_uploads', 'successful_uploads', 'failed_uploads'):
1286
+ self.metrics[metric_name] = self.metrics.get(metric_name, 0) + value
1287
+ return
1288
+
1262
1289
  if metric_name not in self.metrics:
1263
1290
  self.metrics[metric_name] = []
1264
1291
 
@@ -1309,9 +1336,20 @@ class MySQLUploader:
1309
1336
  if not self.logger:
1310
1337
  return
1311
1338
 
1312
- # 记录系统指标
1313
- metrics = self._get_system_metrics()
1314
- log_extra = {'metrics': metrics}
1339
+ if len(message) > 500:
1340
+ message = message[:500] + '...'
1341
+
1342
+ now = time.time()
1343
+ if now - self._last_metrics_time > self.metrics_interval:
1344
+ self._metrics_cache = self._get_system_metrics()
1345
+ # 使用缓存的指标
1346
+ log_extra = {'metrics': self._metrics_cache}
1347
+ self._last_metrics_time = now
1348
+ else:
1349
+ # 记录系统指标
1350
+ metrics = self._get_system_metrics()
1351
+ log_extra = {'metrics': metrics}
1352
+
1315
1353
  if extra:
1316
1354
  log_extra.update(extra)
1317
1355
 
@@ -1319,6 +1357,9 @@ class MySQLUploader:
1319
1357
 
1320
1358
  def _create_connection_pool(self) -> PooledDB:
1321
1359
  """创建数据库连接池"""
1360
+ if hasattr(self, 'pool') and self.pool is not None and self._check_pool_health():
1361
+ return self.pool
1362
+
1322
1363
  start_time = time.time()
1323
1364
  self.pool = None
1324
1365
 
@@ -1418,7 +1459,7 @@ class MySQLUploader:
1418
1459
  if attempt < self.max_retries - 1:
1419
1460
  wait_time = self.retry_interval * (attempt + 1)
1420
1461
  error_details['wait_time'] = wait_time
1421
- self._log_with_metrics('warning', "数据库操作失败,准备重试", error_details)
1462
+ self._log_with_metrics('warning', f"数据库操作失败,准备重试 {error_details}", )
1422
1463
  time.sleep(wait_time)
1423
1464
 
1424
1465
  # 尝试重新连接
@@ -1432,7 +1473,7 @@ class MySQLUploader:
1432
1473
  else:
1433
1474
  elapsed = time.time() - start_time
1434
1475
  error_details['time_elapsed'] = elapsed
1435
- self._log_with_metrics('error', "操作最终失败", error_details)
1476
+ self._log_with_metrics('error', f"操作最终失败 {error_details}")
1436
1477
 
1437
1478
  except pymysql.IntegrityError as e:
1438
1479
  elapsed = time.time() - start_time
@@ -1469,7 +1510,7 @@ class MySQLUploader:
1469
1510
  self._log_with_metrics('debug', "获取数据库连接")
1470
1511
  return conn
1471
1512
  except Exception as e:
1472
- self._log_with_metrics("error", str(e))
1513
+ self._log_with_metrics("error", f'{e}')
1473
1514
  raise ConnectionError(f"连接数据库失败: {str(e)}")
1474
1515
 
1475
1516
  def _check_database_exists(self, db_name: str) -> bool:
@@ -1515,12 +1556,14 @@ class MySQLUploader:
1515
1556
  :raises ValueError: 如果日期格式无效或分表方式无效
1516
1557
  """
1517
1558
  try:
1518
- date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d %H:%M:%S')
1559
+ # date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d %H:%M:%S')
1560
+ date_obj = self._validate_datetime(date_value, True)
1519
1561
  except ValueError:
1520
1562
  try:
1521
- date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d')
1563
+ # date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d')
1564
+ date_obj = self._validate_datetime(date_value, True)
1522
1565
  except ValueError:
1523
- error_msg = f"无效的日期格式: {date_value}"
1566
+ error_msg = f"无效的日期格式1: {date_value}"
1524
1567
  self._log_with_metrics('error', error_msg)
1525
1568
  raise ValueError(error_msg)
1526
1569
 
@@ -1567,24 +1610,33 @@ class MySQLUploader:
1567
1610
 
1568
1611
  def _check_table_exists(self, db_name: str, table_name: str) -> bool:
1569
1612
  """检查表是否存在"""
1613
+ cache_key = f"{db_name}.{table_name}"
1614
+ if cache_key in self._table_metadata_cache:
1615
+ cached_time, result = self._table_metadata_cache[cache_key]
1616
+ if time.time() - cached_time < self.metadata_cache_ttl:
1617
+ return result
1618
+
1570
1619
  db_name = self._validate_identifier(db_name)
1571
1620
  table_name = self._validate_identifier(table_name)
1572
1621
  sql = """
1573
- SELECT TABLE_NAME
1574
- FROM INFORMATION_SCHEMA.TABLES
1575
- WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
1576
- """
1622
+ SELECT TABLE_NAME
1623
+ FROM INFORMATION_SCHEMA.TABLES
1624
+ WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
1625
+ """
1577
1626
 
1578
1627
  try:
1579
1628
  with self._get_connection() as conn:
1580
1629
  with conn.cursor() as cursor:
1581
1630
  cursor.execute(sql, (db_name, table_name))
1582
- exists = bool(cursor.fetchone())
1583
- return exists
1631
+ result = bool(cursor.fetchone())
1584
1632
  except Exception as e:
1585
1633
  self._log_with_metrics('error', f"检查数据表是否存在时发生未知错误: {e}", )
1586
1634
  raise
1587
1635
 
1636
+ # 执行查询并缓存结果
1637
+ self._table_metadata_cache[cache_key] = (time.time(), result)
1638
+ return result
1639
+
1588
1640
  def _create_table(
1589
1641
  self,
1590
1642
  db_name: str,
@@ -1691,7 +1743,8 @@ class MySQLUploader:
1691
1743
  conn.rollback()
1692
1744
  raise
1693
1745
 
1694
- def _validate_datetime(self, value):
1746
+ def _validate_datetime(self, value, date_type=False):
1747
+ """date_type: 返回字符串类型或者日期类型"""
1695
1748
  formats = [
1696
1749
  '%Y-%m-%d %H:%M:%S',
1697
1750
  '%Y-%m-%d',
@@ -1699,14 +1752,20 @@ class MySQLUploader:
1699
1752
  '%Y/%m/%d',
1700
1753
  '%Y%m%d',
1701
1754
  '%Y-%m-%dT%H:%M:%S',
1702
- '%Y-%m-%d %H:%M:%S.%f'
1755
+ '%Y-%m-%d %H:%M:%S.%f',
1756
+ '%Y/%-m/%-d', # 2023/1/8
1757
+ '%Y-%m-%-d', # 2023-01-8
1758
+ '%Y-%-m-%-d' # 2023-1-8
1703
1759
  ]
1704
1760
  for fmt in formats:
1705
1761
  try:
1706
- return datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d %H:%M:%S')
1762
+ if date_type:
1763
+ return pd.to_datetime(datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d'))
1764
+ else:
1765
+ return datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d %H:%M:%S')
1707
1766
  except ValueError:
1708
1767
  continue
1709
- raise ValueError(f"无效的日期格式: {value}")
1768
+ raise ValueError(f"无效的日期格式2: {value}")
1710
1769
 
1711
1770
  def _validate_value(self, value: Any, column_type: str) -> Any:
1712
1771
  """
@@ -1724,7 +1783,9 @@ class MySQLUploader:
1724
1783
  column_type_lower = column_type.lower()
1725
1784
 
1726
1785
  if 'int' in column_type_lower:
1727
- return int(value) if value is not None else None
1786
+ if isinstance(value, (str, bytes)) and not value.strip().isdigit():
1787
+ raise ValueError("非数字字符串无法转换为整数")
1788
+ return int(value)
1728
1789
  elif any(t in column_type_lower for t in ['float', 'double', 'decimal']):
1729
1790
  return float(value) if value is not None else None
1730
1791
  elif '日期' in column_type_lower or 'time' in column_type_lower:
@@ -1819,6 +1880,58 @@ class MySQLUploader:
1819
1880
  check_duplicate, duplicate_columns
1820
1881
  )
1821
1882
 
1883
+ def _infer_data_type(self, value: Any) -> str:
1884
+ """
1885
+ 根据值推断合适的数据类型
1886
+
1887
+ :param value: 要推断的值
1888
+ :return: MySQL数据类型字符串
1889
+ """
1890
+ if value is None:
1891
+ return 'VARCHAR(255)' # 默认字符串类型
1892
+
1893
+ if isinstance(value, bool):
1894
+ return 'TINYINT(1)'
1895
+ elif isinstance(value, int):
1896
+ # if -128 <= value <= 127:
1897
+ # return 'TINYINT'
1898
+ # elif -32768 <= value <= 32767:
1899
+ # return 'SMALLINT'
1900
+ # elif -8388608 <= value <= 8388607:
1901
+ # return 'MEDIUMINT'
1902
+ if -2147483648 <= value <= 2147483647:
1903
+ return 'INT'
1904
+ else:
1905
+ return 'BIGINT'
1906
+ elif isinstance(value, float):
1907
+ return 'DECIMAL(10,2)'
1908
+ elif isinstance(value, (datetime.datetime, pd.Timestamp)):
1909
+ return 'DATETIME'
1910
+ elif isinstance(value, datetime.date):
1911
+ return 'DATE'
1912
+ elif isinstance(value, (list, dict)):
1913
+ return 'JSON'
1914
+ elif isinstance(value, str):
1915
+ # 尝试判断是否是日期时间
1916
+ try:
1917
+ self._validate_datetime(value)
1918
+ return 'DATETIME'
1919
+ except ValueError:
1920
+ pass
1921
+
1922
+ # 根据字符串长度选择合适类型
1923
+ length = len(value)
1924
+ if length <= 255:
1925
+ return 'VARCHAR(255)'
1926
+ elif length <= 65535:
1927
+ return 'TEXT'
1928
+ elif length <= 16777215:
1929
+ return 'MEDIUMTEXT'
1930
+ else:
1931
+ return 'LONGTEXT'
1932
+ else:
1933
+ return 'VARCHAR(255)'
1934
+
1822
1935
  def _prepare_data(
1823
1936
  self,
1824
1937
  data: Union[Dict, List[Dict], pd.DataFrame],
@@ -1831,27 +1944,57 @@ class MySQLUploader:
1831
1944
  :param data: 输入数据
1832
1945
  :param set_typ: 列名和数据类型字典 {列名: 数据类型}
1833
1946
  :param allow_null: 是否允许空值
1834
- :return: 准备好的数据列表
1947
+ :return: 待上传的数据列表和对应的数据类型
1835
1948
  :raises ValueError: 如果数据验证失败
1836
1949
  """
1837
1950
  # 统一数据格式为字典列表
1838
1951
  if isinstance(data, pd.DataFrame):
1839
1952
  try:
1953
+ # 将列名转为小写
1954
+ data.columns = [col.lower() for col in data.columns]
1840
1955
  data = data.replace({pd.NA: None}).to_dict('records')
1841
1956
  except Exception as e:
1842
- self._log_with_metrics("error", f"转为为字典时发生错误: {e}", )
1843
- raise ValueError(f"转为为字典时发生错误: {e}")
1957
+ self._log_with_metrics("error", f"数据转字典时发生错误: {e}", )
1958
+ raise ValueError(f"数据转字典时发生错误: {e}")
1844
1959
  elif isinstance(data, dict):
1845
- data = [data]
1846
- elif not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
1847
- error_msg = "Data must be a dict, list of dicts, or DataFrame"
1960
+ data = [{k.lower(): v for k, v in data.items()}]
1961
+ elif isinstance(data, list) and all(isinstance(item, dict) for item in data):
1962
+ # 将列表中的每个字典键转为小写
1963
+ data = [{k.lower(): v for k, v in item.items()} for item in data]
1964
+ else:
1965
+ error_msg = "数据结构必须是字典、列表、字典列表或dataframe"
1848
1966
  self._log_with_metrics('error', error_msg)
1849
1967
  raise ValueError(error_msg)
1850
1968
 
1969
+ # 将set_typ的键转为小写
1970
+ set_typ = {k.lower(): v for k, v in set_typ.items()}
1971
+
1972
+ # 获取数据中实际存在的列名
1973
+ data_columns = set()
1974
+ if data:
1975
+ data_columns = set(data[0].keys())
1976
+
1977
+ # 过滤set_typ,只保留数据中存在的列
1978
+ filtered_set_typ = {}
1979
+ for col in data_columns:
1980
+ if col in set_typ:
1981
+ filtered_set_typ[col] = set_typ[col]
1982
+ else:
1983
+ # 如果列不在set_typ中,尝试推断类型
1984
+ sample_values = [row[col] for row in data if col in row and row[col] is not None][:10]
1985
+ if sample_values:
1986
+ inferred_type = self._infer_data_type(sample_values[0])
1987
+ filtered_set_typ[col] = inferred_type
1988
+ self._log_with_metrics('debug', f"自动推断列'{col}'的数据类型为: {inferred_type}")
1989
+ else:
1990
+ # 没有样本值,使用默认类型
1991
+ filtered_set_typ[col] = 'VARCHAR(255)'
1992
+ self._log_with_metrics('debug', f"为列'{col}'使用默认数据类型: VARCHAR(255)")
1993
+
1851
1994
  prepared_data = []
1852
1995
  for row_idx, row in enumerate(data, 1):
1853
1996
  prepared_row = {}
1854
- for col_name, col_type in set_typ.items():
1997
+ for col_name in filtered_set_typ:
1855
1998
  # 跳过id列,不允许外部传入id
1856
1999
  if col_name.lower() == 'id':
1857
2000
  continue
@@ -1864,7 +2007,7 @@ class MySQLUploader:
1864
2007
  prepared_row[col_name] = None
1865
2008
  else:
1866
2009
  try:
1867
- prepared_row[col_name] = self._validate_value(row[col_name], col_type)
2010
+ prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name])
1868
2011
  except ValueError as e:
1869
2012
  error_msg = f"Row {row_idx}, column '{col_name}': {str(e)}"
1870
2013
  self._log_with_metrics('error', error_msg)
@@ -1872,7 +2015,7 @@ class MySQLUploader:
1872
2015
  prepared_data.append(prepared_row)
1873
2016
 
1874
2017
  self._log_with_metrics('debug', f"已准备 {len(prepared_data)} 行数据")
1875
- return prepared_data
2018
+ return prepared_data, filtered_set_typ
1876
2019
 
1877
2020
  def upload_data(
1878
2021
  self,
@@ -1913,7 +2056,7 @@ class MySQLUploader:
1913
2056
  try:
1914
2057
  # 验证参数
1915
2058
  if not set_typ:
1916
- error_msg = "必须指定列定义"
2059
+ error_msg = "列的数据类型缺失"
1917
2060
  self._log_with_metrics('error', error_msg)
1918
2061
  raise ValueError(error_msg)
1919
2062
 
@@ -1923,7 +2066,7 @@ class MySQLUploader:
1923
2066
  raise ValueError(error_msg)
1924
2067
 
1925
2068
  # 准备数据
1926
- prepared_data = self._prepare_data(data, set_typ, allow_null)
2069
+ prepared_data, set_typ = self._prepare_data(data, set_typ, allow_null)
1927
2070
 
1928
2071
  # 检查数据库是否存在
1929
2072
  if not self._check_database_exists(db_name):
@@ -2019,7 +2162,17 @@ class MySQLUploader:
2019
2162
  batch_id: Optional[str] = None
2020
2163
  ):
2021
2164
  """
2022
- 插入数据到表中,增强日志记录和性能监控
2165
+ 插入数据到表中
2166
+
2167
+ 参数:
2168
+ db_name: 数据库名
2169
+ table_name: 表名
2170
+ data: 要插入的数据列表
2171
+ set_typ: 列名和数据类型字典 {列名: 数据类型}
2172
+ check_duplicate: 是否检查重复
2173
+ duplicate_columns: 用于检查重复的列(为空时检查所有列)
2174
+ batch_size: 批量插入大小
2175
+ batch_id: 批次ID用于日志追踪
2023
2176
  """
2024
2177
  if not data:
2025
2178
  return
@@ -2033,27 +2186,40 @@ class MySQLUploader:
2033
2186
  if check_duplicate:
2034
2187
  if not duplicate_columns:
2035
2188
  duplicate_columns = all_columns
2189
+ else:
2190
+ duplicate_columns = [col for col in duplicate_columns if col != 'id']
2191
+
2192
+ conditions = []
2193
+ for col in duplicate_columns:
2194
+ col_type = set_typ.get(col, '').lower()
2195
+
2196
+ # 处理DECIMAL类型,使用ROUND确保精度一致
2197
+ if col_type.startswith('decimal'):
2198
+ # 提取小数位数,如DECIMAL(10,2)提取2
2199
+ scale_match = re.search(r'decimal\(\d+,(\d+)\)', col_type)
2200
+ scale = int(scale_match.group(1)) if scale_match else 2
2201
+ conditions.append(f"ROUND(`{self._validate_identifier(col)}`, {scale}) = ROUND(%s, {scale})")
2202
+ else:
2203
+ conditions.append(f"`{self._validate_identifier(col)}` = %s")
2036
2204
 
2037
- safe_dup_columns = [self._validate_identifier(col) for col in duplicate_columns]
2038
- conditions = [f"`{col}` = %s" for col in safe_dup_columns]
2039
2205
  where_clause = " AND ".join(conditions)
2040
2206
 
2041
2207
  sql = f"""
2042
- INSERT INTO `{db_name}`.`{table_name}`
2043
- (`{'`,`'.join(safe_columns)}`)
2044
- SELECT {placeholders}
2045
- FROM DUAL
2046
- WHERE NOT EXISTS (
2047
- SELECT 1 FROM `{db_name}`.`{table_name}`
2048
- WHERE {where_clause}
2049
- )
2050
- """
2208
+ INSERT INTO `{db_name}`.`{table_name}`
2209
+ (`{'`,`'.join(safe_columns)}`)
2210
+ SELECT {placeholders}
2211
+ FROM DUAL
2212
+ WHERE NOT EXISTS (
2213
+ SELECT 1 FROM `{db_name}`.`{table_name}`
2214
+ WHERE {where_clause}
2215
+ )
2216
+ """
2051
2217
  else:
2052
2218
  sql = f"""
2053
- INSERT INTO `{db_name}`.`{table_name}`
2054
- (`{'`,`'.join(safe_columns)}`)
2055
- VALUES ({placeholders})
2056
- """
2219
+ INSERT INTO `{db_name}`.`{table_name}`
2220
+ (`{'`,`'.join(safe_columns)}`)
2221
+ VALUES ({placeholders})
2222
+ """
2057
2223
 
2058
2224
  total_inserted = 0
2059
2225
  total_skipped = 0
@@ -2069,7 +2235,9 @@ class MySQLUploader:
2069
2235
 
2070
2236
  for row in batch:
2071
2237
  try:
2238
+ # 准备参数
2072
2239
  row_values = [row.get(col) for col in all_columns]
2240
+ # 如果是排重检查,添加排重列值
2073
2241
  if check_duplicate:
2074
2242
  row_values += [row.get(col) for col in duplicate_columns]
2075
2243
 
@@ -2086,11 +2254,13 @@ class MySQLUploader:
2086
2254
  'batch_id': batch_id,
2087
2255
  'database': db_name,
2088
2256
  'table': table_name,
2089
- 'row_data': row,
2090
2257
  'error_type': type(e).__name__,
2091
- 'error_message': str(e)
2258
+ 'error_message': str(e),
2259
+ 'column_types': set_typ,
2260
+ 'duplicate_check': check_duplicate,
2261
+ 'duplicate_columns': duplicate_columns
2092
2262
  }
2093
- self._log_with_metrics('error', "单行插入失败", error_details)
2263
+ self._log_with_metrics('error', f"单行插入失败: {error_details}")
2094
2264
  continue # 跳过当前行,继续处理下一行
2095
2265
 
2096
2266
  # 更新统计信息
@@ -2115,7 +2285,7 @@ class MySQLUploader:
2115
2285
  'time_elapsed': batch_elapsed,
2116
2286
  'rows_per_second': successful_rows / batch_elapsed if batch_elapsed > 0 else 0
2117
2287
  }
2118
- self._log_with_metrics('debug', "批次处理完成", batch_info)
2288
+ self._log_with_metrics('debug', f"批次处理完成 {batch_info}")
2119
2289
 
2120
2290
  # 更新全局指标
2121
2291
  self.metrics['failed_rows'] += total_failed
@@ -2178,37 +2348,66 @@ class MySQLUploader:
2178
2348
  if hasattr(self, 'logger') and self.logger and self.enable_metrics:
2179
2349
  self._log_with_metrics('debug', "最终性能指标", self.get_metrics())
2180
2350
 
2181
- def __main__():
2182
- pass
2351
+ def _check_pool_health(self):
2352
+ """定期检查连接池健康状态"""
2353
+ try:
2354
+ conn = self.pool.connection()
2355
+ conn.ping(reconnect=True)
2356
+ conn.close()
2357
+ return True
2358
+ except Exception:
2359
+ self._log_with_metrics('warning', "连接池健康检查失败", {
2360
+ 'error': str(e)
2361
+ })
2362
+ return False
2363
+
2364
+ def retry_on_failure(max_retries=3, delay=1):
2365
+ def decorator(func):
2366
+ @wraps(func)
2367
+ def wrapper(*args, **kwargs):
2368
+ last_exception = None
2369
+ for attempt in range(max_retries):
2370
+ try:
2371
+ return func(*args, **kwargs)
2372
+ except (pymysql.OperationalError, pymysql.InterfaceError) as e:
2373
+ last_exception = e
2374
+ if attempt < max_retries - 1:
2375
+ time.sleep(delay * (attempt + 1))
2376
+ continue
2377
+ raise MySQLUploaderError(f"操作重试{max_retries}次后失败") from e
2378
+ except Exception as e:
2379
+ raise MySQLUploaderError(f"操作失败: {str(e)}") from e
2380
+ raise last_exception if last_exception else MySQLUploaderError("未知错误")
2183
2381
 
2382
+ return wrapper
2184
2383
 
2185
- if __name__ == '__main__':
2186
- pass
2384
+ return decorator
2187
2385
 
2188
- # 初始化上传器
2386
+ def main():
2189
2387
  uploader = MySQLUploader(
2190
2388
  username='root',
2191
2389
  password='1',
2192
2390
  host='localhost',
2193
2391
  port=3306,
2194
- logging_mode='both',
2392
+ logging_mode='console',
2195
2393
  log_level='info'
2196
2394
  )
2197
2395
 
2198
2396
  # 定义列和数据类型
2199
2397
  set_typ = {
2200
- 'id': 'INT',
2201
2398
  'name': 'VARCHAR(255)',
2202
2399
  'age': 'INT',
2203
2400
  'salary': 'DECIMAL(10,2)',
2204
- '日期': 'DATE'
2401
+ '日期': 'DATE',
2402
+ 'shop': None,
2205
2403
  }
2206
2404
 
2207
2405
  # 准备数据
2208
2406
  data = [
2209
- {'日期': '2023-01-15', 'name': 'Alice', 'age': 35, 'salary': 100},
2210
- {'日期': '2023-01-15', 'name': 'Alice', 'age': 30, 'salary': 0.0},
2211
- {'日期': '2023-02-20', 'name': 'Bob', 'age': 25, 'salary': 45000.75}
2407
+ {'日期': '2023-01-8', 'name': 'JACk', 'AGE': '24', 'salary': 555.1545},
2408
+ {'日期': '2023-01-15', 'name': 'Alice', 'AGE': 35, 'salary': 100},
2409
+ {'日期': '2023-01-15', 'name': 'Alice', 'AGE': 30, 'salary': 0.0},
2410
+ {'日期': '2023-02-20', 'name': 'Bob', 'AGE': 25, 'salary': 45000.75}
2212
2411
  ]
2213
2412
 
2214
2413
  # 上传数据
@@ -2217,15 +2416,18 @@ if __name__ == '__main__':
2217
2416
  table_name='测试表',
2218
2417
  data=data,
2219
2418
  set_typ=set_typ, # 定义列和数据类型
2220
- primary_keys=[], # 指定主键
2419
+ primary_keys=[], # 创建唯一主键
2221
2420
  check_duplicate=True, # 检查重复数据
2222
- duplicate_columns=['name', 'age'], #
2421
+ duplicate_columns=[], # 指定排重的组合键
2223
2422
  allow_null=False, # 允许插入空值
2224
2423
  partition_by='year', # 按月分表
2225
- partition_date_column = '日期', # 用于分表的日期列名,默认为'日期'
2226
- auto_create = True, # 表不存在时自动创建, 默认参数不要更改
2227
- indexes = ['name'], # 指定索引列
2424
+ partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
2425
+ auto_create=True, # 表不存在时自动创建, 默认参数不要更改
2426
+ indexes=[], # 指定索引列
2228
2427
  )
2229
2428
 
2230
- # 关闭上传器
2231
2429
  uploader.close()
2430
+
2431
+
2432
+ if __name__ == '__main__':
2433
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.9.2
3
+ Version: 3.9.3
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=x030kSR5wz8nf_l9kAxmL-5kk7A84GeAWO_CGB1N2Cw,17
2
+ mdbq/__version__.py,sha256=Pw2FixsnE8Hf360X55h_tF8Xeez7UgHd2pSgUkJY-v4,17
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
5
5
  mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
@@ -8,7 +8,7 @@ mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
8
8
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
9
9
  mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
10
10
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
11
- mdbq/mysql/mysql.py,sha256=9e1mT12gWE8-Vld-E52EAtoAxKqQVmTlG7aGUXO17vo,99908
11
+ mdbq/mysql/mysql.py,sha256=YX-tgugceODrJHcXgbosWFVThjXv3I2gCvTt_siKBOI,108606
12
12
  mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
13
13
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
14
14
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
@@ -22,7 +22,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
22
22
  mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
23
23
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
24
24
  mdbq/spider/aikucun.py,sha256=OhyEv1VyAKTOHjLDM37iNDQeRg5OnrNoKODoG2VxHes,19806
25
- mdbq-3.9.2.dist-info/METADATA,sha256=6Swdffh2m_pfD0XyeKyJGbehkwcuhWFiJtHakbCuBDQ,363
26
- mdbq-3.9.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
27
- mdbq-3.9.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
28
- mdbq-3.9.2.dist-info/RECORD,,
25
+ mdbq-3.9.3.dist-info/METADATA,sha256=Vt2mII7wAfEhzQa9G8PreCPV_hkdM1DLTTDcUMyepPg,363
26
+ mdbq-3.9.3.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
27
+ mdbq-3.9.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
28
+ mdbq-3.9.3.dist-info/RECORD,,
File without changes