mdbq 3.9.2__py3-none-any.whl → 3.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.9.
|
1
|
+
VERSION = '3.9.3'
|
mdbq/mysql/mysql.py
CHANGED
@@ -15,7 +15,8 @@ from mdbq.other import otk
|
|
15
15
|
from typing import Union, List, Dict, Optional, Any, Tuple
|
16
16
|
from dbutils.pooled_db import PooledDB
|
17
17
|
import json
|
18
|
-
import psutil
|
18
|
+
import psutil
|
19
|
+
from collections import OrderedDict
|
19
20
|
|
20
21
|
|
21
22
|
warnings.filterwarnings('ignore')
|
@@ -1111,6 +1112,18 @@ class OptimizeDatas:
|
|
1111
1112
|
self.connection.close()
|
1112
1113
|
|
1113
1114
|
|
1115
|
+
class StatementCache(OrderedDict):
|
1116
|
+
"""LRU缓存策略"""
|
1117
|
+
def __init__(self, maxsize=100):
|
1118
|
+
super().__init__()
|
1119
|
+
self.maxsize = maxsize
|
1120
|
+
|
1121
|
+
def __setitem__(self, key, value):
|
1122
|
+
super().__setitem__(key, value)
|
1123
|
+
if len(self) > self.maxsize:
|
1124
|
+
self.popitem(last=False)
|
1125
|
+
|
1126
|
+
|
1114
1127
|
class MySQLUploader:
|
1115
1128
|
def __init__(
|
1116
1129
|
self,
|
@@ -1119,7 +1132,7 @@ class MySQLUploader:
|
|
1119
1132
|
host: str = 'localhost',
|
1120
1133
|
port: int = 3306,
|
1121
1134
|
charset: str = 'utf8mb4',
|
1122
|
-
collation: str = 'utf8mb4_0900_ai_ci',
|
1135
|
+
collation: str = 'utf8mb4_0900_ai_ci', # utf8mb4_0900_ai_ci: 该排序规则对大小写不敏感, utf8mb4_0900_as_cs/utf8mb4_bin: 对大小写敏感
|
1123
1136
|
logging_mode: str = 'console', # 'both'(控制台+文件), 'console'(仅控制台), 'file'(仅文件), 'none'(禁用)
|
1124
1137
|
log_level: str = 'INFO', # 默认日志级别
|
1125
1138
|
log_file: str = 'mysql_upload.log', # 日志文件路径
|
@@ -1127,7 +1140,7 @@ class MySQLUploader:
|
|
1127
1140
|
backup_count: int = 5, # 保留的日志文件数量
|
1128
1141
|
max_retries: int = 10,
|
1129
1142
|
retry_interval: int = 10,
|
1130
|
-
pool_size: int =
|
1143
|
+
pool_size: int = 5,
|
1131
1144
|
connect_timeout: int = 10,
|
1132
1145
|
read_timeout: int = 30,
|
1133
1146
|
write_timeout: int = 30,
|
@@ -1168,7 +1181,7 @@ class MySQLUploader:
|
|
1168
1181
|
self.read_timeout = read_timeout
|
1169
1182
|
self.write_timeout = write_timeout
|
1170
1183
|
self.ssl = ssl
|
1171
|
-
self._prepared_statements =
|
1184
|
+
self._prepared_statements = StatementCache(maxsize=100)
|
1172
1185
|
self._max_cached_statements = 100
|
1173
1186
|
self.enable_metrics = enable_metrics
|
1174
1187
|
self.metrics = {
|
@@ -1184,6 +1197,11 @@ class MySQLUploader:
|
|
1184
1197
|
'memory_usage': [],
|
1185
1198
|
'cpu_usage': []
|
1186
1199
|
}
|
1200
|
+
self._last_metrics_time = 0
|
1201
|
+
self._metrics_cache = {} # 缓存最近一次的系统指标
|
1202
|
+
self.metrics_interval = 30 # 指标采集频率控制
|
1203
|
+
self._table_metadata_cache = {} # 元信息缓存
|
1204
|
+
self.metadata_cache_ttl = 300 # 元信息缓存频率控制
|
1187
1205
|
|
1188
1206
|
# 初始化日志系统
|
1189
1207
|
self._init_logging(logging_mode, log_level, log_file, max_log_size, backup_count)
|
@@ -1224,6 +1242,10 @@ class MySQLUploader:
|
|
1224
1242
|
if record.exc_info:
|
1225
1243
|
log_data['exception'] = self.formatException(record.exc_info)
|
1226
1244
|
|
1245
|
+
# 过滤敏感信息
|
1246
|
+
if hasattr(record, 'password'):
|
1247
|
+
log_data['message'] = log_data['message'].replace(self.password, '***')
|
1248
|
+
|
1227
1249
|
return json.dumps(log_data, ensure_ascii=False)
|
1228
1250
|
|
1229
1251
|
# 创建日志记录器
|
@@ -1259,6 +1281,11 @@ class MySQLUploader:
|
|
1259
1281
|
if not self.enable_metrics:
|
1260
1282
|
return
|
1261
1283
|
|
1284
|
+
# 对于频繁调用的指标,使用更高效的数据结构
|
1285
|
+
if metric_name in ('total_uploads', 'successful_uploads', 'failed_uploads'):
|
1286
|
+
self.metrics[metric_name] = self.metrics.get(metric_name, 0) + value
|
1287
|
+
return
|
1288
|
+
|
1262
1289
|
if metric_name not in self.metrics:
|
1263
1290
|
self.metrics[metric_name] = []
|
1264
1291
|
|
@@ -1309,9 +1336,20 @@ class MySQLUploader:
|
|
1309
1336
|
if not self.logger:
|
1310
1337
|
return
|
1311
1338
|
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1339
|
+
if len(message) > 500:
|
1340
|
+
message = message[:500] + '...'
|
1341
|
+
|
1342
|
+
now = time.time()
|
1343
|
+
if now - self._last_metrics_time > self.metrics_interval:
|
1344
|
+
self._metrics_cache = self._get_system_metrics()
|
1345
|
+
# 使用缓存的指标
|
1346
|
+
log_extra = {'metrics': self._metrics_cache}
|
1347
|
+
self._last_metrics_time = now
|
1348
|
+
else:
|
1349
|
+
# 记录系统指标
|
1350
|
+
metrics = self._get_system_metrics()
|
1351
|
+
log_extra = {'metrics': metrics}
|
1352
|
+
|
1315
1353
|
if extra:
|
1316
1354
|
log_extra.update(extra)
|
1317
1355
|
|
@@ -1319,6 +1357,9 @@ class MySQLUploader:
|
|
1319
1357
|
|
1320
1358
|
def _create_connection_pool(self) -> PooledDB:
|
1321
1359
|
"""创建数据库连接池"""
|
1360
|
+
if hasattr(self, 'pool') and self.pool is not None and self._check_pool_health():
|
1361
|
+
return self.pool
|
1362
|
+
|
1322
1363
|
start_time = time.time()
|
1323
1364
|
self.pool = None
|
1324
1365
|
|
@@ -1418,7 +1459,7 @@ class MySQLUploader:
|
|
1418
1459
|
if attempt < self.max_retries - 1:
|
1419
1460
|
wait_time = self.retry_interval * (attempt + 1)
|
1420
1461
|
error_details['wait_time'] = wait_time
|
1421
|
-
self._log_with_metrics('warning', "数据库操作失败,准备重试",
|
1462
|
+
self._log_with_metrics('warning', f"数据库操作失败,准备重试 {error_details}", )
|
1422
1463
|
time.sleep(wait_time)
|
1423
1464
|
|
1424
1465
|
# 尝试重新连接
|
@@ -1432,7 +1473,7 @@ class MySQLUploader:
|
|
1432
1473
|
else:
|
1433
1474
|
elapsed = time.time() - start_time
|
1434
1475
|
error_details['time_elapsed'] = elapsed
|
1435
|
-
self._log_with_metrics('error', "操作最终失败
|
1476
|
+
self._log_with_metrics('error', f"操作最终失败 {error_details}")
|
1436
1477
|
|
1437
1478
|
except pymysql.IntegrityError as e:
|
1438
1479
|
elapsed = time.time() - start_time
|
@@ -1469,7 +1510,7 @@ class MySQLUploader:
|
|
1469
1510
|
self._log_with_metrics('debug', "获取数据库连接")
|
1470
1511
|
return conn
|
1471
1512
|
except Exception as e:
|
1472
|
-
self._log_with_metrics("error",
|
1513
|
+
self._log_with_metrics("error", f'{e}')
|
1473
1514
|
raise ConnectionError(f"连接数据库失败: {str(e)}")
|
1474
1515
|
|
1475
1516
|
def _check_database_exists(self, db_name: str) -> bool:
|
@@ -1515,12 +1556,14 @@ class MySQLUploader:
|
|
1515
1556
|
:raises ValueError: 如果日期格式无效或分表方式无效
|
1516
1557
|
"""
|
1517
1558
|
try:
|
1518
|
-
date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d %H:%M:%S')
|
1559
|
+
# date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d %H:%M:%S')
|
1560
|
+
date_obj = self._validate_datetime(date_value, True)
|
1519
1561
|
except ValueError:
|
1520
1562
|
try:
|
1521
|
-
date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d')
|
1563
|
+
# date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d')
|
1564
|
+
date_obj = self._validate_datetime(date_value, True)
|
1522
1565
|
except ValueError:
|
1523
|
-
error_msg = f"
|
1566
|
+
error_msg = f"无效的日期格式1: {date_value}"
|
1524
1567
|
self._log_with_metrics('error', error_msg)
|
1525
1568
|
raise ValueError(error_msg)
|
1526
1569
|
|
@@ -1567,24 +1610,33 @@ class MySQLUploader:
|
|
1567
1610
|
|
1568
1611
|
def _check_table_exists(self, db_name: str, table_name: str) -> bool:
|
1569
1612
|
"""检查表是否存在"""
|
1613
|
+
cache_key = f"{db_name}.{table_name}"
|
1614
|
+
if cache_key in self._table_metadata_cache:
|
1615
|
+
cached_time, result = self._table_metadata_cache[cache_key]
|
1616
|
+
if time.time() - cached_time < self.metadata_cache_ttl:
|
1617
|
+
return result
|
1618
|
+
|
1570
1619
|
db_name = self._validate_identifier(db_name)
|
1571
1620
|
table_name = self._validate_identifier(table_name)
|
1572
1621
|
sql = """
|
1573
|
-
|
1574
|
-
|
1575
|
-
|
1576
|
-
|
1622
|
+
SELECT TABLE_NAME
|
1623
|
+
FROM INFORMATION_SCHEMA.TABLES
|
1624
|
+
WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
|
1625
|
+
"""
|
1577
1626
|
|
1578
1627
|
try:
|
1579
1628
|
with self._get_connection() as conn:
|
1580
1629
|
with conn.cursor() as cursor:
|
1581
1630
|
cursor.execute(sql, (db_name, table_name))
|
1582
|
-
|
1583
|
-
return exists
|
1631
|
+
result = bool(cursor.fetchone())
|
1584
1632
|
except Exception as e:
|
1585
1633
|
self._log_with_metrics('error', f"检查数据表是否存在时发生未知错误: {e}", )
|
1586
1634
|
raise
|
1587
1635
|
|
1636
|
+
# 执行查询并缓存结果
|
1637
|
+
self._table_metadata_cache[cache_key] = (time.time(), result)
|
1638
|
+
return result
|
1639
|
+
|
1588
1640
|
def _create_table(
|
1589
1641
|
self,
|
1590
1642
|
db_name: str,
|
@@ -1691,7 +1743,8 @@ class MySQLUploader:
|
|
1691
1743
|
conn.rollback()
|
1692
1744
|
raise
|
1693
1745
|
|
1694
|
-
def _validate_datetime(self, value):
|
1746
|
+
def _validate_datetime(self, value, date_type=False):
|
1747
|
+
"""date_type: 返回字符串类型或者日期类型"""
|
1695
1748
|
formats = [
|
1696
1749
|
'%Y-%m-%d %H:%M:%S',
|
1697
1750
|
'%Y-%m-%d',
|
@@ -1699,14 +1752,20 @@ class MySQLUploader:
|
|
1699
1752
|
'%Y/%m/%d',
|
1700
1753
|
'%Y%m%d',
|
1701
1754
|
'%Y-%m-%dT%H:%M:%S',
|
1702
|
-
'%Y-%m-%d %H:%M:%S.%f'
|
1755
|
+
'%Y-%m-%d %H:%M:%S.%f',
|
1756
|
+
'%Y/%-m/%-d', # 2023/1/8
|
1757
|
+
'%Y-%m-%-d', # 2023-01-8
|
1758
|
+
'%Y-%-m-%-d' # 2023-1-8
|
1703
1759
|
]
|
1704
1760
|
for fmt in formats:
|
1705
1761
|
try:
|
1706
|
-
|
1762
|
+
if date_type:
|
1763
|
+
return pd.to_datetime(datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d'))
|
1764
|
+
else:
|
1765
|
+
return datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d %H:%M:%S')
|
1707
1766
|
except ValueError:
|
1708
1767
|
continue
|
1709
|
-
raise ValueError(f"
|
1768
|
+
raise ValueError(f"无效的日期格式2: {value}")
|
1710
1769
|
|
1711
1770
|
def _validate_value(self, value: Any, column_type: str) -> Any:
|
1712
1771
|
"""
|
@@ -1724,7 +1783,9 @@ class MySQLUploader:
|
|
1724
1783
|
column_type_lower = column_type.lower()
|
1725
1784
|
|
1726
1785
|
if 'int' in column_type_lower:
|
1727
|
-
|
1786
|
+
if isinstance(value, (str, bytes)) and not value.strip().isdigit():
|
1787
|
+
raise ValueError("非数字字符串无法转换为整数")
|
1788
|
+
return int(value)
|
1728
1789
|
elif any(t in column_type_lower for t in ['float', 'double', 'decimal']):
|
1729
1790
|
return float(value) if value is not None else None
|
1730
1791
|
elif '日期' in column_type_lower or 'time' in column_type_lower:
|
@@ -1819,6 +1880,58 @@ class MySQLUploader:
|
|
1819
1880
|
check_duplicate, duplicate_columns
|
1820
1881
|
)
|
1821
1882
|
|
1883
|
+
def _infer_data_type(self, value: Any) -> str:
|
1884
|
+
"""
|
1885
|
+
根据值推断合适的数据类型
|
1886
|
+
|
1887
|
+
:param value: 要推断的值
|
1888
|
+
:return: MySQL数据类型字符串
|
1889
|
+
"""
|
1890
|
+
if value is None:
|
1891
|
+
return 'VARCHAR(255)' # 默认字符串类型
|
1892
|
+
|
1893
|
+
if isinstance(value, bool):
|
1894
|
+
return 'TINYINT(1)'
|
1895
|
+
elif isinstance(value, int):
|
1896
|
+
# if -128 <= value <= 127:
|
1897
|
+
# return 'TINYINT'
|
1898
|
+
# elif -32768 <= value <= 32767:
|
1899
|
+
# return 'SMALLINT'
|
1900
|
+
# elif -8388608 <= value <= 8388607:
|
1901
|
+
# return 'MEDIUMINT'
|
1902
|
+
if -2147483648 <= value <= 2147483647:
|
1903
|
+
return 'INT'
|
1904
|
+
else:
|
1905
|
+
return 'BIGINT'
|
1906
|
+
elif isinstance(value, float):
|
1907
|
+
return 'DECIMAL(10,2)'
|
1908
|
+
elif isinstance(value, (datetime.datetime, pd.Timestamp)):
|
1909
|
+
return 'DATETIME'
|
1910
|
+
elif isinstance(value, datetime.date):
|
1911
|
+
return 'DATE'
|
1912
|
+
elif isinstance(value, (list, dict)):
|
1913
|
+
return 'JSON'
|
1914
|
+
elif isinstance(value, str):
|
1915
|
+
# 尝试判断是否是日期时间
|
1916
|
+
try:
|
1917
|
+
self._validate_datetime(value)
|
1918
|
+
return 'DATETIME'
|
1919
|
+
except ValueError:
|
1920
|
+
pass
|
1921
|
+
|
1922
|
+
# 根据字符串长度选择合适类型
|
1923
|
+
length = len(value)
|
1924
|
+
if length <= 255:
|
1925
|
+
return 'VARCHAR(255)'
|
1926
|
+
elif length <= 65535:
|
1927
|
+
return 'TEXT'
|
1928
|
+
elif length <= 16777215:
|
1929
|
+
return 'MEDIUMTEXT'
|
1930
|
+
else:
|
1931
|
+
return 'LONGTEXT'
|
1932
|
+
else:
|
1933
|
+
return 'VARCHAR(255)'
|
1934
|
+
|
1822
1935
|
def _prepare_data(
|
1823
1936
|
self,
|
1824
1937
|
data: Union[Dict, List[Dict], pd.DataFrame],
|
@@ -1831,27 +1944,57 @@ class MySQLUploader:
|
|
1831
1944
|
:param data: 输入数据
|
1832
1945
|
:param set_typ: 列名和数据类型字典 {列名: 数据类型}
|
1833
1946
|
:param allow_null: 是否允许空值
|
1834
|
-
:return:
|
1947
|
+
:return: 待上传的数据列表和对应的数据类型
|
1835
1948
|
:raises ValueError: 如果数据验证失败
|
1836
1949
|
"""
|
1837
1950
|
# 统一数据格式为字典列表
|
1838
1951
|
if isinstance(data, pd.DataFrame):
|
1839
1952
|
try:
|
1953
|
+
# 将列名转为小写
|
1954
|
+
data.columns = [col.lower() for col in data.columns]
|
1840
1955
|
data = data.replace({pd.NA: None}).to_dict('records')
|
1841
1956
|
except Exception as e:
|
1842
|
-
self._log_with_metrics("error", f"
|
1843
|
-
raise ValueError(f"
|
1957
|
+
self._log_with_metrics("error", f"数据转字典时发生错误: {e}", )
|
1958
|
+
raise ValueError(f"数据转字典时发生错误: {e}")
|
1844
1959
|
elif isinstance(data, dict):
|
1845
|
-
data = [data]
|
1846
|
-
elif
|
1847
|
-
|
1960
|
+
data = [{k.lower(): v for k, v in data.items()}]
|
1961
|
+
elif isinstance(data, list) and all(isinstance(item, dict) for item in data):
|
1962
|
+
# 将列表中的每个字典键转为小写
|
1963
|
+
data = [{k.lower(): v for k, v in item.items()} for item in data]
|
1964
|
+
else:
|
1965
|
+
error_msg = "数据结构必须是字典、列表、字典列表或dataframe"
|
1848
1966
|
self._log_with_metrics('error', error_msg)
|
1849
1967
|
raise ValueError(error_msg)
|
1850
1968
|
|
1969
|
+
# 将set_typ的键转为小写
|
1970
|
+
set_typ = {k.lower(): v for k, v in set_typ.items()}
|
1971
|
+
|
1972
|
+
# 获取数据中实际存在的列名
|
1973
|
+
data_columns = set()
|
1974
|
+
if data:
|
1975
|
+
data_columns = set(data[0].keys())
|
1976
|
+
|
1977
|
+
# 过滤set_typ,只保留数据中存在的列
|
1978
|
+
filtered_set_typ = {}
|
1979
|
+
for col in data_columns:
|
1980
|
+
if col in set_typ:
|
1981
|
+
filtered_set_typ[col] = set_typ[col]
|
1982
|
+
else:
|
1983
|
+
# 如果列不在set_typ中,尝试推断类型
|
1984
|
+
sample_values = [row[col] for row in data if col in row and row[col] is not None][:10]
|
1985
|
+
if sample_values:
|
1986
|
+
inferred_type = self._infer_data_type(sample_values[0])
|
1987
|
+
filtered_set_typ[col] = inferred_type
|
1988
|
+
self._log_with_metrics('debug', f"自动推断列'{col}'的数据类型为: {inferred_type}")
|
1989
|
+
else:
|
1990
|
+
# 没有样本值,使用默认类型
|
1991
|
+
filtered_set_typ[col] = 'VARCHAR(255)'
|
1992
|
+
self._log_with_metrics('debug', f"为列'{col}'使用默认数据类型: VARCHAR(255)")
|
1993
|
+
|
1851
1994
|
prepared_data = []
|
1852
1995
|
for row_idx, row in enumerate(data, 1):
|
1853
1996
|
prepared_row = {}
|
1854
|
-
for col_name
|
1997
|
+
for col_name in filtered_set_typ:
|
1855
1998
|
# 跳过id列,不允许外部传入id
|
1856
1999
|
if col_name.lower() == 'id':
|
1857
2000
|
continue
|
@@ -1864,7 +2007,7 @@ class MySQLUploader:
|
|
1864
2007
|
prepared_row[col_name] = None
|
1865
2008
|
else:
|
1866
2009
|
try:
|
1867
|
-
prepared_row[col_name] = self._validate_value(row[col_name],
|
2010
|
+
prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name])
|
1868
2011
|
except ValueError as e:
|
1869
2012
|
error_msg = f"Row {row_idx}, column '{col_name}': {str(e)}"
|
1870
2013
|
self._log_with_metrics('error', error_msg)
|
@@ -1872,7 +2015,7 @@ class MySQLUploader:
|
|
1872
2015
|
prepared_data.append(prepared_row)
|
1873
2016
|
|
1874
2017
|
self._log_with_metrics('debug', f"已准备 {len(prepared_data)} 行数据")
|
1875
|
-
return prepared_data
|
2018
|
+
return prepared_data, filtered_set_typ
|
1876
2019
|
|
1877
2020
|
def upload_data(
|
1878
2021
|
self,
|
@@ -1913,7 +2056,7 @@ class MySQLUploader:
|
|
1913
2056
|
try:
|
1914
2057
|
# 验证参数
|
1915
2058
|
if not set_typ:
|
1916
|
-
error_msg = "
|
2059
|
+
error_msg = "列的数据类型缺失"
|
1917
2060
|
self._log_with_metrics('error', error_msg)
|
1918
2061
|
raise ValueError(error_msg)
|
1919
2062
|
|
@@ -1923,7 +2066,7 @@ class MySQLUploader:
|
|
1923
2066
|
raise ValueError(error_msg)
|
1924
2067
|
|
1925
2068
|
# 准备数据
|
1926
|
-
prepared_data = self._prepare_data(data, set_typ, allow_null)
|
2069
|
+
prepared_data, set_typ = self._prepare_data(data, set_typ, allow_null)
|
1927
2070
|
|
1928
2071
|
# 检查数据库是否存在
|
1929
2072
|
if not self._check_database_exists(db_name):
|
@@ -2019,7 +2162,17 @@ class MySQLUploader:
|
|
2019
2162
|
batch_id: Optional[str] = None
|
2020
2163
|
):
|
2021
2164
|
"""
|
2022
|
-
|
2165
|
+
插入数据到表中
|
2166
|
+
|
2167
|
+
参数:
|
2168
|
+
db_name: 数据库名
|
2169
|
+
table_name: 表名
|
2170
|
+
data: 要插入的数据列表
|
2171
|
+
set_typ: 列名和数据类型字典 {列名: 数据类型}
|
2172
|
+
check_duplicate: 是否检查重复
|
2173
|
+
duplicate_columns: 用于检查重复的列(为空时检查所有列)
|
2174
|
+
batch_size: 批量插入大小
|
2175
|
+
batch_id: 批次ID用于日志追踪
|
2023
2176
|
"""
|
2024
2177
|
if not data:
|
2025
2178
|
return
|
@@ -2033,27 +2186,40 @@ class MySQLUploader:
|
|
2033
2186
|
if check_duplicate:
|
2034
2187
|
if not duplicate_columns:
|
2035
2188
|
duplicate_columns = all_columns
|
2189
|
+
else:
|
2190
|
+
duplicate_columns = [col for col in duplicate_columns if col != 'id']
|
2191
|
+
|
2192
|
+
conditions = []
|
2193
|
+
for col in duplicate_columns:
|
2194
|
+
col_type = set_typ.get(col, '').lower()
|
2195
|
+
|
2196
|
+
# 处理DECIMAL类型,使用ROUND确保精度一致
|
2197
|
+
if col_type.startswith('decimal'):
|
2198
|
+
# 提取小数位数,如DECIMAL(10,2)提取2
|
2199
|
+
scale_match = re.search(r'decimal\(\d+,(\d+)\)', col_type)
|
2200
|
+
scale = int(scale_match.group(1)) if scale_match else 2
|
2201
|
+
conditions.append(f"ROUND(`{self._validate_identifier(col)}`, {scale}) = ROUND(%s, {scale})")
|
2202
|
+
else:
|
2203
|
+
conditions.append(f"`{self._validate_identifier(col)}` = %s")
|
2036
2204
|
|
2037
|
-
safe_dup_columns = [self._validate_identifier(col) for col in duplicate_columns]
|
2038
|
-
conditions = [f"`{col}` = %s" for col in safe_dup_columns]
|
2039
2205
|
where_clause = " AND ".join(conditions)
|
2040
2206
|
|
2041
2207
|
sql = f"""
|
2042
|
-
|
2043
|
-
|
2044
|
-
|
2045
|
-
|
2046
|
-
|
2047
|
-
|
2048
|
-
|
2049
|
-
|
2050
|
-
|
2208
|
+
INSERT INTO `{db_name}`.`{table_name}`
|
2209
|
+
(`{'`,`'.join(safe_columns)}`)
|
2210
|
+
SELECT {placeholders}
|
2211
|
+
FROM DUAL
|
2212
|
+
WHERE NOT EXISTS (
|
2213
|
+
SELECT 1 FROM `{db_name}`.`{table_name}`
|
2214
|
+
WHERE {where_clause}
|
2215
|
+
)
|
2216
|
+
"""
|
2051
2217
|
else:
|
2052
2218
|
sql = f"""
|
2053
|
-
|
2054
|
-
|
2055
|
-
|
2056
|
-
|
2219
|
+
INSERT INTO `{db_name}`.`{table_name}`
|
2220
|
+
(`{'`,`'.join(safe_columns)}`)
|
2221
|
+
VALUES ({placeholders})
|
2222
|
+
"""
|
2057
2223
|
|
2058
2224
|
total_inserted = 0
|
2059
2225
|
total_skipped = 0
|
@@ -2069,7 +2235,9 @@ class MySQLUploader:
|
|
2069
2235
|
|
2070
2236
|
for row in batch:
|
2071
2237
|
try:
|
2238
|
+
# 准备参数
|
2072
2239
|
row_values = [row.get(col) for col in all_columns]
|
2240
|
+
# 如果是排重检查,添加排重列值
|
2073
2241
|
if check_duplicate:
|
2074
2242
|
row_values += [row.get(col) for col in duplicate_columns]
|
2075
2243
|
|
@@ -2086,11 +2254,13 @@ class MySQLUploader:
|
|
2086
2254
|
'batch_id': batch_id,
|
2087
2255
|
'database': db_name,
|
2088
2256
|
'table': table_name,
|
2089
|
-
'row_data': row,
|
2090
2257
|
'error_type': type(e).__name__,
|
2091
|
-
'error_message': str(e)
|
2258
|
+
'error_message': str(e),
|
2259
|
+
'column_types': set_typ,
|
2260
|
+
'duplicate_check': check_duplicate,
|
2261
|
+
'duplicate_columns': duplicate_columns
|
2092
2262
|
}
|
2093
|
-
self._log_with_metrics('error', "
|
2263
|
+
self._log_with_metrics('error', f"单行插入失败: {error_details}")
|
2094
2264
|
continue # 跳过当前行,继续处理下一行
|
2095
2265
|
|
2096
2266
|
# 更新统计信息
|
@@ -2115,7 +2285,7 @@ class MySQLUploader:
|
|
2115
2285
|
'time_elapsed': batch_elapsed,
|
2116
2286
|
'rows_per_second': successful_rows / batch_elapsed if batch_elapsed > 0 else 0
|
2117
2287
|
}
|
2118
|
-
self._log_with_metrics('debug', "批次处理完成
|
2288
|
+
self._log_with_metrics('debug', f"批次处理完成 {batch_info}")
|
2119
2289
|
|
2120
2290
|
# 更新全局指标
|
2121
2291
|
self.metrics['failed_rows'] += total_failed
|
@@ -2178,37 +2348,66 @@ class MySQLUploader:
|
|
2178
2348
|
if hasattr(self, 'logger') and self.logger and self.enable_metrics:
|
2179
2349
|
self._log_with_metrics('debug', "最终性能指标", self.get_metrics())
|
2180
2350
|
|
2181
|
-
def
|
2182
|
-
|
2351
|
+
def _check_pool_health(self):
|
2352
|
+
"""定期检查连接池健康状态"""
|
2353
|
+
try:
|
2354
|
+
conn = self.pool.connection()
|
2355
|
+
conn.ping(reconnect=True)
|
2356
|
+
conn.close()
|
2357
|
+
return True
|
2358
|
+
except Exception:
|
2359
|
+
self._log_with_metrics('warning', "连接池健康检查失败", {
|
2360
|
+
'error': str(e)
|
2361
|
+
})
|
2362
|
+
return False
|
2363
|
+
|
2364
|
+
def retry_on_failure(max_retries=3, delay=1):
|
2365
|
+
def decorator(func):
|
2366
|
+
@wraps(func)
|
2367
|
+
def wrapper(*args, **kwargs):
|
2368
|
+
last_exception = None
|
2369
|
+
for attempt in range(max_retries):
|
2370
|
+
try:
|
2371
|
+
return func(*args, **kwargs)
|
2372
|
+
except (pymysql.OperationalError, pymysql.InterfaceError) as e:
|
2373
|
+
last_exception = e
|
2374
|
+
if attempt < max_retries - 1:
|
2375
|
+
time.sleep(delay * (attempt + 1))
|
2376
|
+
continue
|
2377
|
+
raise MySQLUploaderError(f"操作重试{max_retries}次后失败") from e
|
2378
|
+
except Exception as e:
|
2379
|
+
raise MySQLUploaderError(f"操作失败: {str(e)}") from e
|
2380
|
+
raise last_exception if last_exception else MySQLUploaderError("未知错误")
|
2183
2381
|
|
2382
|
+
return wrapper
|
2184
2383
|
|
2185
|
-
|
2186
|
-
pass
|
2384
|
+
return decorator
|
2187
2385
|
|
2188
|
-
|
2386
|
+
def main():
|
2189
2387
|
uploader = MySQLUploader(
|
2190
2388
|
username='root',
|
2191
2389
|
password='1',
|
2192
2390
|
host='localhost',
|
2193
2391
|
port=3306,
|
2194
|
-
logging_mode='
|
2392
|
+
logging_mode='console',
|
2195
2393
|
log_level='info'
|
2196
2394
|
)
|
2197
2395
|
|
2198
2396
|
# 定义列和数据类型
|
2199
2397
|
set_typ = {
|
2200
|
-
'id': 'INT',
|
2201
2398
|
'name': 'VARCHAR(255)',
|
2202
2399
|
'age': 'INT',
|
2203
2400
|
'salary': 'DECIMAL(10,2)',
|
2204
|
-
'日期': 'DATE'
|
2401
|
+
'日期': 'DATE',
|
2402
|
+
'shop': None,
|
2205
2403
|
}
|
2206
2404
|
|
2207
2405
|
# 准备数据
|
2208
2406
|
data = [
|
2209
|
-
{'日期': '2023-01-
|
2210
|
-
{'日期': '2023-01-15', 'name': 'Alice', '
|
2211
|
-
{'日期': '2023-
|
2407
|
+
{'日期': '2023-01-8', 'name': 'JACk', 'AGE': '24', 'salary': 555.1545},
|
2408
|
+
{'日期': '2023-01-15', 'name': 'Alice', 'AGE': 35, 'salary': 100},
|
2409
|
+
{'日期': '2023-01-15', 'name': 'Alice', 'AGE': 30, 'salary': 0.0},
|
2410
|
+
{'日期': '2023-02-20', 'name': 'Bob', 'AGE': 25, 'salary': 45000.75}
|
2212
2411
|
]
|
2213
2412
|
|
2214
2413
|
# 上传数据
|
@@ -2217,15 +2416,18 @@ if __name__ == '__main__':
|
|
2217
2416
|
table_name='测试表',
|
2218
2417
|
data=data,
|
2219
2418
|
set_typ=set_typ, # 定义列和数据类型
|
2220
|
-
primary_keys=[], #
|
2419
|
+
primary_keys=[], # 创建唯一主键
|
2221
2420
|
check_duplicate=True, # 检查重复数据
|
2222
|
-
duplicate_columns=[
|
2421
|
+
duplicate_columns=[], # 指定排重的组合键
|
2223
2422
|
allow_null=False, # 允许插入空值
|
2224
2423
|
partition_by='year', # 按月分表
|
2225
|
-
partition_date_column
|
2226
|
-
auto_create
|
2227
|
-
indexes
|
2424
|
+
partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
|
2425
|
+
auto_create=True, # 表不存在时自动创建, 默认参数不要更改
|
2426
|
+
indexes=[], # 指定索引列
|
2228
2427
|
)
|
2229
2428
|
|
2230
|
-
# 关闭上传器
|
2231
2429
|
uploader.close()
|
2430
|
+
|
2431
|
+
|
2432
|
+
if __name__ == '__main__':
|
2433
|
+
pass
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=Pw2FixsnE8Hf360X55h_tF8Xeez7UgHd2pSgUkJY-v4,17
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
|
5
5
|
mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
|
@@ -8,7 +8,7 @@ mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
|
|
8
8
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
9
9
|
mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
|
10
10
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
11
|
-
mdbq/mysql/mysql.py,sha256=
|
11
|
+
mdbq/mysql/mysql.py,sha256=YX-tgugceODrJHcXgbosWFVThjXv3I2gCvTt_siKBOI,108606
|
12
12
|
mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
|
13
13
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
14
14
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
@@ -22,7 +22,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
22
22
|
mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
|
23
23
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
24
24
|
mdbq/spider/aikucun.py,sha256=OhyEv1VyAKTOHjLDM37iNDQeRg5OnrNoKODoG2VxHes,19806
|
25
|
-
mdbq-3.9.
|
26
|
-
mdbq-3.9.
|
27
|
-
mdbq-3.9.
|
28
|
-
mdbq-3.9.
|
25
|
+
mdbq-3.9.3.dist-info/METADATA,sha256=Vt2mII7wAfEhzQa9G8PreCPV_hkdM1DLTTDcUMyepPg,363
|
26
|
+
mdbq-3.9.3.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
27
|
+
mdbq-3.9.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
28
|
+
mdbq-3.9.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|