mdbq 3.10.9__py3-none-any.whl → 3.10.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/deduplicator.py +151 -145
- mdbq/mysql/uploader.py +263 -327
- {mdbq-3.10.9.dist-info → mdbq-3.10.11.dist-info}/METADATA +1 -1
- {mdbq-3.10.9.dist-info → mdbq-3.10.11.dist-info}/RECORD +7 -7
- {mdbq-3.10.9.dist-info → mdbq-3.10.11.dist-info}/WHEEL +0 -0
- {mdbq-3.10.9.dist-info → mdbq-3.10.11.dist-info}/top_level.txt +0 -0
mdbq/mysql/uploader.py
CHANGED
@@ -1,17 +1,3 @@
|
|
1
|
-
"""
|
2
|
-
MySQL数据上传工具类
|
3
|
-
|
4
|
-
这个模块提供了一个用于将数据上传到MySQL数据库的工具类。它支持以下主要功能:
|
5
|
-
1. 自动创建数据库和表
|
6
|
-
2. 支持数据分表存储
|
7
|
-
3. 支持数据重复检查和更新
|
8
|
-
4. 支持批量数据插入
|
9
|
-
5. 支持多种事务模式
|
10
|
-
6. 自动类型转换和验证
|
11
|
-
7. 连接池管理
|
12
|
-
8. 错误重试机制
|
13
|
-
"""
|
14
|
-
|
15
1
|
# -*- coding:utf-8 -*-
|
16
2
|
import datetime
|
17
3
|
import re
|
@@ -171,9 +157,7 @@ class MySQLUploader:
|
|
171
157
|
"""
|
172
158
|
if hasattr(self, 'pool') and self.pool is not None and self._check_pool_health():
|
173
159
|
return self.pool
|
174
|
-
|
175
160
|
self.pool = None
|
176
|
-
|
177
161
|
pool_params = {
|
178
162
|
'creator': pymysql,
|
179
163
|
'host': self.host,
|
@@ -189,11 +173,10 @@ class MySQLUploader:
|
|
189
173
|
'write_timeout': self.write_timeout,
|
190
174
|
'autocommit': False
|
191
175
|
}
|
192
|
-
|
193
176
|
if self.ssl:
|
194
177
|
required_keys = {'ca', 'cert', 'key'}
|
195
178
|
if not all(k in self.ssl for k in required_keys):
|
196
|
-
error_msg =
|
179
|
+
error_msg = 'SSL配置必须包含ca、cert和key'
|
197
180
|
logger.error(error_msg)
|
198
181
|
raise ValueError(error_msg)
|
199
182
|
pool_params['ssl'] = {
|
@@ -202,19 +185,14 @@ class MySQLUploader:
|
|
202
185
|
'key': self.ssl['key'],
|
203
186
|
'check_hostname': self.ssl.get('check_hostname', False)
|
204
187
|
}
|
205
|
-
|
206
188
|
try:
|
207
189
|
pool = PooledDB(**pool_params)
|
208
|
-
logger.info(
|
209
|
-
'连接池': self.pool_size
|
210
|
-
})
|
190
|
+
logger.info('连接池创建成功', {'连接池': self.pool_size, 'host': self.host, 'port': self.port})
|
211
191
|
return pool
|
212
192
|
except Exception as e:
|
213
193
|
self.pool = None
|
214
|
-
logger.error(
|
215
|
-
|
216
|
-
})
|
217
|
-
raise ConnectionError(f"连接池创建失败: {str(e)}")
|
194
|
+
logger.error('连接池创建失败', {'error': str(e), 'host': self.host, 'port': self.port})
|
195
|
+
raise ConnectionError(f'连接池创建失败: {str(e)}')
|
218
196
|
|
219
197
|
def _execute_with_retry(self, func):
|
220
198
|
"""
|
@@ -228,32 +206,17 @@ class MySQLUploader:
|
|
228
206
|
def wrapper(*args, **kwargs):
|
229
207
|
last_exception = None
|
230
208
|
operation = func.__name__
|
231
|
-
|
232
|
-
logger.debug(f"开始执行操作: {operation}", {
|
233
|
-
'attempt': 1,
|
234
|
-
'max_retries': self.max_retries
|
235
|
-
})
|
236
|
-
|
209
|
+
logger.debug(f'开始执行操作: {operation}', {'max_retries': self.max_retries})
|
237
210
|
for attempt in range(self.max_retries):
|
238
211
|
try:
|
239
212
|
result = func(*args, **kwargs)
|
240
|
-
|
241
213
|
if attempt > 0:
|
242
|
-
logger.info(
|
243
|
-
'operation': operation,
|
244
|
-
'attempts': attempt + 1
|
245
|
-
})
|
214
|
+
logger.info('操作成功(重试后)', {'operation': operation, 'attempts': attempt + 1})
|
246
215
|
else:
|
247
|
-
logger.debug(
|
248
|
-
'operation': operation
|
249
|
-
})
|
250
|
-
|
216
|
+
logger.debug('操作成功', {'operation': operation})
|
251
217
|
return result
|
252
|
-
|
253
218
|
except (pymysql.OperationalError, pymysql.err.MySQLError) as e:
|
254
219
|
last_exception = e
|
255
|
-
|
256
|
-
# 记录详细的MySQL错误信息
|
257
220
|
error_details = {
|
258
221
|
'operation': operation,
|
259
222
|
'error_code': e.args[0] if e.args else None,
|
@@ -261,47 +224,38 @@ class MySQLUploader:
|
|
261
224
|
'attempt': attempt + 1,
|
262
225
|
'max_retries': self.max_retries
|
263
226
|
}
|
264
|
-
|
265
227
|
if attempt < self.max_retries - 1:
|
266
228
|
wait_time = self.retry_interval * (attempt + 1)
|
267
229
|
error_details['wait_time'] = wait_time
|
268
|
-
logger.warning(
|
230
|
+
logger.warning('数据库操作失败,准备重试', error_details)
|
269
231
|
time.sleep(wait_time)
|
270
|
-
|
271
|
-
# 尝试重新连接
|
272
232
|
try:
|
273
233
|
self.pool = self._create_connection_pool()
|
274
|
-
logger.info(
|
234
|
+
logger.info('成功重新建立数据库连接')
|
275
235
|
except Exception as reconnect_error:
|
276
|
-
logger.error(
|
277
|
-
'error': str(reconnect_error)
|
278
|
-
})
|
236
|
+
logger.error('重连失败', {'error': str(reconnect_error)})
|
279
237
|
else:
|
280
|
-
logger.error(
|
281
|
-
|
238
|
+
logger.error('操作最终失败', error_details)
|
282
239
|
except pymysql.IntegrityError as e:
|
283
|
-
logger.error(
|
240
|
+
logger.error('完整性约束错误', {
|
284
241
|
'operation': operation,
|
285
242
|
'error_code': e.args[0] if e.args else None,
|
286
243
|
'error_message': e.args[1] if len(e.args) > 1 else None
|
287
244
|
})
|
288
245
|
raise e
|
289
|
-
|
290
246
|
except Exception as e:
|
291
247
|
last_exception = e
|
292
|
-
logger.error(
|
248
|
+
logger.error('发生意外错误', {
|
293
249
|
'operation': operation,
|
294
250
|
'error_type': type(e).__name__,
|
295
251
|
'error_message': str(e),
|
296
252
|
'error_args': e.args if hasattr(e, 'args') else None
|
297
253
|
})
|
298
254
|
break
|
299
|
-
|
300
|
-
raise last_exception if last_exception else Exception("发生未知错误")
|
301
|
-
|
255
|
+
raise last_exception if last_exception else Exception('发生未知错误')
|
302
256
|
return wrapper
|
303
257
|
|
304
|
-
def _get_connection(self):
|
258
|
+
def _get_connection(self) -> pymysql.connections.Connection:
|
305
259
|
"""
|
306
260
|
从连接池获取数据库连接
|
307
261
|
|
@@ -310,11 +264,11 @@ class MySQLUploader:
|
|
310
264
|
"""
|
311
265
|
try:
|
312
266
|
conn = self.pool.connection()
|
313
|
-
logger.debug(
|
267
|
+
logger.debug('获取数据库连接', {'host': self.host, 'port': self.port})
|
314
268
|
return conn
|
315
269
|
except Exception as e:
|
316
|
-
logger.error(
|
317
|
-
raise ConnectionError(f
|
270
|
+
logger.error('获取数据库连接失败', {'error': str(e)})
|
271
|
+
raise ConnectionError(f'连接数据库失败: {str(e)}')
|
318
272
|
|
319
273
|
def _check_database_exists(self, db_name: str) -> bool:
|
320
274
|
"""
|
@@ -325,23 +279,19 @@ class MySQLUploader:
|
|
325
279
|
:raises: 可能抛出数据库相关异常
|
326
280
|
"""
|
327
281
|
db_name = self._validate_identifier(db_name)
|
328
|
-
sql =
|
329
|
-
|
282
|
+
sql = 'SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = %s'
|
330
283
|
try:
|
331
284
|
with self._get_connection() as conn:
|
332
285
|
with conn.cursor() as cursor:
|
333
286
|
cursor.execute(sql, (db_name,))
|
334
287
|
exists = bool(cursor.fetchone())
|
335
|
-
logger.debug(
|
288
|
+
logger.debug('数据库存在检查', {'库': db_name, '存在': exists})
|
336
289
|
return exists
|
337
290
|
except Exception as e:
|
338
|
-
logger.error(
|
339
|
-
'库': db_name,
|
340
|
-
'检查数据库是否存在时出错': str(e),
|
341
|
-
})
|
291
|
+
logger.error('检查数据库是否存在时出错', {'库': db_name, '错误': str(e)})
|
342
292
|
raise
|
343
293
|
|
344
|
-
def _create_database(self, db_name: str):
|
294
|
+
def _create_database(self, db_name: str) -> None:
|
345
295
|
"""
|
346
296
|
创建数据库
|
347
297
|
|
@@ -349,19 +299,15 @@ class MySQLUploader:
|
|
349
299
|
:raises: 可能抛出数据库相关异常
|
350
300
|
"""
|
351
301
|
db_name = self._validate_identifier(db_name)
|
352
|
-
sql = f
|
353
|
-
|
302
|
+
sql = f'CREATE DATABASE IF NOT EXISTS `{db_name}` CHARACTER SET {self.charset} COLLATE {self.collation}'
|
354
303
|
try:
|
355
304
|
with self._get_connection() as conn:
|
356
305
|
with conn.cursor() as cursor:
|
357
306
|
cursor.execute(sql)
|
358
307
|
conn.commit()
|
359
|
-
logger.info(
|
308
|
+
logger.info('数据库已创建', {'库': db_name})
|
360
309
|
except Exception as e:
|
361
|
-
logger.error(
|
362
|
-
'无法创建数据库': str(e),
|
363
|
-
'库': db_name
|
364
|
-
})
|
310
|
+
logger.error('无法创建数据库', {'库': db_name, '错误': str(e)})
|
365
311
|
conn.rollback()
|
366
312
|
raise
|
367
313
|
|
@@ -376,24 +322,16 @@ class MySQLUploader:
|
|
376
322
|
:raises ValueError: 如果日期格式无效或分表方式无效
|
377
323
|
"""
|
378
324
|
try:
|
379
|
-
# date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d')
|
380
325
|
date_obj = self._validate_datetime(date_value, True)
|
381
326
|
except ValueError:
|
382
|
-
logger.error(
|
383
|
-
|
384
|
-
'表': table_name
|
385
|
-
})
|
386
|
-
raise ValueError(f"`{table_name}` 无效的日期格式1: `{date_value}`")
|
387
|
-
|
327
|
+
logger.error('无效的日期格式', {'表': table_name, '日期值': date_value})
|
328
|
+
raise ValueError(f"`{table_name}` 无效的日期格式: `{date_value}`")
|
388
329
|
if partition_by == 'year':
|
389
330
|
return f"{table_name}_{date_obj.year}"
|
390
331
|
elif partition_by == 'month':
|
391
332
|
return f"{table_name}_{date_obj.year}_{date_obj.month:02d}"
|
392
333
|
else:
|
393
|
-
logger.error(
|
394
|
-
"分表方式必须是 'year' 或 'month' 或 'None'": partition_by,
|
395
|
-
'表': table_name
|
396
|
-
})
|
334
|
+
logger.error('分表方式无效', {'表': table_name, '分表方式': partition_by})
|
397
335
|
raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
|
398
336
|
|
399
337
|
def _validate_identifier(self, identifier: str) -> str:
|
@@ -405,36 +343,22 @@ class MySQLUploader:
|
|
405
343
|
:raises ValueError: 当标识符无效时抛出
|
406
344
|
"""
|
407
345
|
if not identifier or not isinstance(identifier, str):
|
408
|
-
logger.error(
|
409
|
-
'无效的标识符': identifier
|
410
|
-
})
|
346
|
+
logger.error('无效的标识符', {'标识符': identifier})
|
411
347
|
raise ValueError(f"无效的标识符: `{identifier}`")
|
412
|
-
|
413
|
-
# 统一转为小写(除非明确要求大小写敏感)
|
414
348
|
if not self.case_sensitive:
|
415
349
|
identifier = identifier.lower()
|
416
|
-
|
417
|
-
# 移除非法字符,只保留字母、数字、下划线和美元符号
|
418
|
-
cleaned = re.sub(r'[^\w\u4e00-\u9fff$]', '_', identifier)
|
419
|
-
|
420
|
-
# 将多个连续的下划线替换为单个下划线, 移除开头和结尾的下划线
|
350
|
+
cleaned = re.sub(r'[^-\w\u4e00-\u9fff$]', '_', identifier)
|
421
351
|
cleaned = re.sub(r'_+', '_', cleaned).strip('_')
|
422
|
-
|
423
352
|
if not cleaned:
|
424
|
-
logger.error(
|
425
|
-
'无法清理异常标识符': identifier
|
426
|
-
})
|
353
|
+
logger.error('无法清理异常标识符', {'原始标识符': identifier})
|
427
354
|
raise ValueError(f"无法清理异常标识符: `{identifier}`")
|
428
|
-
|
429
|
-
# 检查是否为MySQL保留字
|
430
355
|
mysql_keywords = {
|
431
356
|
'select', 'insert', 'update', 'delete', 'from', 'where', 'and', 'or',
|
432
357
|
'not', 'like', 'in', 'is', 'null', 'true', 'false', 'between'
|
433
358
|
}
|
434
359
|
if cleaned.lower() in mysql_keywords:
|
435
|
-
logger.debug(
|
360
|
+
logger.debug('存在MySQL保留字', {'标识符': cleaned})
|
436
361
|
return f"`{cleaned}`"
|
437
|
-
|
438
362
|
return cleaned
|
439
363
|
|
440
364
|
def _check_table_exists(self, db_name: str, table_name: str) -> bool:
|
@@ -450,8 +374,8 @@ class MySQLUploader:
|
|
450
374
|
if cache_key in self._table_metadata_cache:
|
451
375
|
cached_time, result = self._table_metadata_cache[cache_key]
|
452
376
|
if time.time() - cached_time < self.metadata_cache_ttl:
|
377
|
+
logger.debug('表存在缓存命中', {'库': db_name, '表': table_name, '存在': result})
|
453
378
|
return result
|
454
|
-
|
455
379
|
db_name = self._validate_identifier(db_name)
|
456
380
|
table_name = self._validate_identifier(table_name)
|
457
381
|
sql = """
|
@@ -459,21 +383,16 @@ class MySQLUploader:
|
|
459
383
|
FROM INFORMATION_SCHEMA.TABLES
|
460
384
|
WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
|
461
385
|
"""
|
462
|
-
|
463
386
|
try:
|
464
387
|
with self._get_connection() as conn:
|
465
388
|
with conn.cursor() as cursor:
|
466
389
|
cursor.execute(sql, (db_name, table_name))
|
467
390
|
result = bool(cursor.fetchone())
|
468
391
|
except Exception as e:
|
469
|
-
logger.error(
|
470
|
-
'库': db_name,
|
471
|
-
'表': table_name,
|
472
|
-
'检查数据表是否存在时发生未知错误': str(e)})
|
392
|
+
logger.error('检查数据表是否存在时发生未知错误', {'库': db_name, '表': table_name, '错误': str(e)})
|
473
393
|
raise
|
474
|
-
|
475
|
-
# 执行查询并缓存结果
|
476
394
|
self._table_metadata_cache[cache_key] = (time.time(), result)
|
395
|
+
logger.debug('表存在检查', {'库': db_name, '表': table_name, '存在': result})
|
477
396
|
return result
|
478
397
|
|
479
398
|
def _create_table(
|
@@ -485,7 +404,7 @@ class MySQLUploader:
|
|
485
404
|
date_column: Optional[str] = None,
|
486
405
|
indexes: Optional[List[str]] = None,
|
487
406
|
allow_null: bool = False
|
488
|
-
):
|
407
|
+
) -> None:
|
489
408
|
"""
|
490
409
|
创建数据表
|
491
410
|
|
@@ -500,69 +419,42 @@ class MySQLUploader:
|
|
500
419
|
"""
|
501
420
|
db_name = self._validate_identifier(db_name)
|
502
421
|
table_name = self._validate_identifier(table_name)
|
503
|
-
|
504
422
|
if not set_typ:
|
505
|
-
logger.error(
|
506
|
-
'库': db_name,
|
507
|
-
'表': table_name,
|
508
|
-
'set_typ 未指定': set_typ})
|
423
|
+
logger.error('建表时未指定set_typ', {'库': db_name, '表': table_name})
|
509
424
|
raise ValueError('set_typ 未指定')
|
510
|
-
|
511
|
-
# 构建列定义SQL
|
512
425
|
column_defs = ["`id` INT NOT NULL AUTO_INCREMENT"]
|
513
|
-
|
514
|
-
# 添加其他列定义
|
515
426
|
for col_name, col_type in set_typ.items():
|
516
|
-
# 跳过id列,因为已经在前面添加了
|
517
427
|
if col_name.lower() == 'id':
|
518
428
|
continue
|
519
429
|
safe_col_name = self._validate_identifier(col_name)
|
520
430
|
col_def = f"`{safe_col_name}` {col_type}"
|
521
|
-
|
522
|
-
# 根据allow_null决定是否添加NOT NULL约束
|
523
431
|
if not allow_null and not col_type.lower().startswith('json'):
|
524
432
|
col_def += " NOT NULL"
|
525
|
-
|
526
433
|
column_defs.append(col_def)
|
527
|
-
|
528
|
-
# 添加主键定义
|
529
434
|
if primary_keys:
|
530
|
-
# 确保id在主键中
|
531
435
|
if 'id' not in [pk.lower() for pk in primary_keys]:
|
532
436
|
primary_keys = ['id'] + primary_keys
|
533
437
|
else:
|
534
|
-
# 如果没有指定主键,则使用id作为主键
|
535
438
|
primary_keys = ['id']
|
536
|
-
|
537
|
-
# 添加主键定义
|
538
439
|
safe_primary_keys = [self._validate_identifier(pk) for pk in primary_keys]
|
539
440
|
primary_key_sql = f", PRIMARY KEY (`{'`,`'.join(safe_primary_keys)}`)"
|
540
|
-
|
541
|
-
# 构建完整SQL
|
542
441
|
sql = f"""
|
543
442
|
CREATE TABLE IF NOT EXISTS `{db_name}`.`{table_name}` (
|
544
443
|
{','.join(column_defs)}
|
545
444
|
{primary_key_sql}
|
546
445
|
) ENGINE=InnoDB DEFAULT CHARSET={self.charset} COLLATE={self.collation}
|
547
446
|
"""
|
548
|
-
|
549
447
|
try:
|
550
448
|
with self._get_connection() as conn:
|
551
449
|
with conn.cursor() as cursor:
|
552
450
|
cursor.execute(sql)
|
553
|
-
logger.info(
|
554
|
-
|
555
|
-
# 添加普通索引
|
451
|
+
logger.info('数据表已创建', {'库': db_name, '表': table_name})
|
556
452
|
index_statements = []
|
557
|
-
|
558
|
-
# 日期列索引
|
559
453
|
if date_column and date_column in set_typ:
|
560
454
|
safe_date_col = self._validate_identifier(date_column)
|
561
455
|
index_statements.append(
|
562
456
|
f"ALTER TABLE `{db_name}`.`{table_name}` ADD INDEX `idx_{safe_date_col}` (`{safe_date_col}`)"
|
563
457
|
)
|
564
|
-
|
565
|
-
# 其他索引
|
566
458
|
if indexes:
|
567
459
|
for idx_col in indexes:
|
568
460
|
if idx_col in set_typ:
|
@@ -570,27 +462,19 @@ class MySQLUploader:
|
|
570
462
|
index_statements.append(
|
571
463
|
f"ALTER TABLE `{db_name}`.`{table_name}` ADD INDEX `idx_{safe_idx_col}` (`{safe_idx_col}`)"
|
572
464
|
)
|
573
|
-
|
574
|
-
# 执行所有索引创建语句
|
575
465
|
if index_statements:
|
576
466
|
with conn.cursor() as cursor:
|
577
467
|
for stmt in index_statements:
|
578
468
|
cursor.execute(stmt)
|
579
|
-
logger.debug(
|
580
|
-
|
469
|
+
logger.debug('执行索引语句', {'SQL': stmt})
|
581
470
|
conn.commit()
|
582
|
-
logger.info(
|
583
|
-
|
471
|
+
logger.info('索引已添加', {'库': db_name, '表': table_name, '索引': indexes})
|
584
472
|
except Exception as e:
|
585
|
-
logger.error(
|
586
|
-
'库': db_name,
|
587
|
-
'表': table_name,
|
588
|
-
'建表失败': str(e),
|
589
|
-
})
|
473
|
+
logger.error('建表失败', {'库': db_name, '表': table_name, '错误': str(e)})
|
590
474
|
conn.rollback()
|
591
475
|
raise
|
592
476
|
|
593
|
-
def _validate_datetime(self, value, date_type=False):
|
477
|
+
def _validate_datetime(self, value: str, date_type: bool = False) -> Any:
|
594
478
|
"""
|
595
479
|
验证并标准化日期时间格式
|
596
480
|
|
@@ -614,12 +498,17 @@ class MySQLUploader:
|
|
614
498
|
for fmt in formats:
|
615
499
|
try:
|
616
500
|
if date_type:
|
617
|
-
|
501
|
+
result = pd.to_datetime(datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d'))
|
502
|
+
logger.debug('日期格式化成功', {'原始': value, '格式': fmt, '结果': str(result)})
|
503
|
+
return result
|
618
504
|
else:
|
619
|
-
|
505
|
+
result = datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d %H:%M:%S')
|
506
|
+
logger.debug('日期格式化成功', {'原始': value, '格式': fmt, '结果': str(result)})
|
507
|
+
return result
|
620
508
|
except ValueError:
|
621
509
|
continue
|
622
|
-
|
510
|
+
logger.error('无效的日期格式', {'值': value})
|
511
|
+
raise ValueError(f"无效的日期格式: `{value}`")
|
623
512
|
|
624
513
|
def _validate_value(self, value: Any, column_type: str, allow_null: bool) -> Any:
|
625
514
|
"""
|
@@ -633,40 +522,31 @@ class MySQLUploader:
|
|
633
522
|
"""
|
634
523
|
if value is None:
|
635
524
|
if not allow_null:
|
525
|
+
logger.warning('字段值为None但不允许空值', {'字段类型': column_type})
|
636
526
|
return 'none'
|
637
527
|
return None
|
638
|
-
|
639
528
|
try:
|
640
529
|
column_type_lower = column_type.lower()
|
641
|
-
|
642
|
-
# 处理百分比值
|
643
530
|
if isinstance(value, str) and value.strip().endswith('%'):
|
644
531
|
try:
|
645
|
-
# 移除百分号并转换为小数
|
646
532
|
percent_str = value.strip().replace('%', '')
|
647
|
-
|
648
|
-
if 'e' in percent_str.lower():
|
649
|
-
percent_value = float(percent_str)
|
650
|
-
else:
|
651
|
-
percent_value = float(percent_str)
|
533
|
+
percent_value = float(percent_str)
|
652
534
|
decimal_value = percent_value / 100
|
535
|
+
logger.debug('百分比字符串转小数', {'原始': value, '结果': decimal_value})
|
653
536
|
return decimal_value
|
654
537
|
except ValueError:
|
655
|
-
|
656
|
-
|
538
|
+
logger.warning('百分比字符串转小数失败', {'原始': value})
|
657
539
|
elif 'int' in column_type_lower:
|
658
540
|
if isinstance(value, str):
|
659
|
-
# 移除可能的逗号和空格
|
660
541
|
value = value.replace(',', '').strip()
|
661
|
-
# 尝试转换为浮点数再转整数
|
662
542
|
try:
|
663
543
|
return int(float(value))
|
664
544
|
except ValueError:
|
545
|
+
logger.error('字符串转整数失败', {'值': value})
|
665
546
|
raise ValueError(f"`{value}` -> 无法转为整数")
|
666
547
|
return int(value) if value is not None else None
|
667
548
|
elif any(t in column_type_lower for t in ['float', 'double', 'decimal']):
|
668
549
|
if isinstance(value, str):
|
669
|
-
# 处理可能包含逗号的数字字符串
|
670
550
|
value = value.replace(',', '')
|
671
551
|
return float(value) if value is not None else None
|
672
552
|
elif 'date' in column_type_lower or 'time' in column_type_lower:
|
@@ -674,12 +554,12 @@ class MySQLUploader:
|
|
674
554
|
return value.strftime('%Y-%m-%d %H:%M:%S')
|
675
555
|
elif isinstance(value, str):
|
676
556
|
try:
|
677
|
-
return self._validate_datetime(value)
|
557
|
+
return self._validate_datetime(value)
|
678
558
|
except ValueError as e:
|
559
|
+
logger.error('无效日期格式', {'值': value, '错误': str(e)})
|
679
560
|
raise ValueError(f"无效日期格式: `{value}` -> {str(e)}")
|
680
561
|
return str(value)
|
681
562
|
elif 'char' in column_type_lower or 'text' in column_type_lower:
|
682
|
-
# 防止SQL注入
|
683
563
|
if isinstance(value, str):
|
684
564
|
return value.replace('\\', '\\\\').replace("'", "\\'")
|
685
565
|
return str(value)
|
@@ -688,9 +568,7 @@ class MySQLUploader:
|
|
688
568
|
else:
|
689
569
|
return value
|
690
570
|
except (ValueError, TypeError) as e:
|
691
|
-
logger.error(
|
692
|
-
f'转换异常, 无法将 `{value}` 的数据类型转为: `{column_type}`': str(e),
|
693
|
-
})
|
571
|
+
logger.error('数据类型转换异常', {'值': value, '目标类型': column_type, '错误': str(e)})
|
694
572
|
raise ValueError(f"转换异常 -> 无法将 `{value}` 的数据类型转为: `{column_type}` -> {str(e)}")
|
695
573
|
|
696
574
|
def _get_table_columns(self, db_name: str, table_name: str) -> Dict[str, str]:
|
@@ -710,20 +588,15 @@ class MySQLUploader:
|
|
710
588
|
WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
|
711
589
|
ORDER BY ORDINAL_POSITION
|
712
590
|
"""
|
713
|
-
|
714
591
|
try:
|
715
592
|
with self._get_connection() as conn:
|
716
593
|
with conn.cursor() as cursor:
|
717
594
|
cursor.execute(sql, (db_name, table_name))
|
718
595
|
set_typ = {row['COLUMN_NAME'].lower(): row['DATA_TYPE'] for row in cursor.fetchall()}
|
719
|
-
logger.debug(
|
596
|
+
logger.debug('获取表的列信息', {'库': db_name, '表': table_name, '列信息': set_typ})
|
720
597
|
return set_typ
|
721
598
|
except Exception as e:
|
722
|
-
logger.error(
|
723
|
-
'库': db_name,
|
724
|
-
'表': table_name,
|
725
|
-
'无法获取表列信息': str(e),
|
726
|
-
})
|
599
|
+
logger.error('无法获取表列信息', {'库': db_name, '表': table_name, '错误': str(e)})
|
727
600
|
raise
|
728
601
|
|
729
602
|
def _upload_to_table(
|
@@ -750,30 +623,32 @@ class MySQLUploader:
|
|
750
623
|
self._create_table(db_name, table_name, set_typ, primary_keys, date_column, indexes,
|
751
624
|
allow_null=allow_null)
|
752
625
|
else:
|
753
|
-
logger.error(
|
626
|
+
logger.error('数据表不存在', {
|
754
627
|
'库': db_name,
|
755
628
|
'表': table_name,
|
756
|
-
'
|
629
|
+
'func': sys._getframe().f_code.co_name,
|
757
630
|
})
|
758
631
|
raise ValueError(f"数据表不存在: `{db_name}`.`{table_name}`")
|
759
632
|
|
760
633
|
# 获取表结构并验证
|
761
634
|
table_columns = self._get_table_columns(db_name, table_name)
|
762
635
|
if not table_columns:
|
763
|
-
logger.error(
|
636
|
+
logger.error('获取列失败', {
|
764
637
|
'库': db_name,
|
765
638
|
'表': table_name,
|
766
|
-
'
|
639
|
+
'列': self._shorten_for_log(table_columns),
|
640
|
+
'func': sys._getframe().f_code.co_name,
|
767
641
|
})
|
768
642
|
raise ValueError(f"获取列失败 `{db_name}`.`{table_name}`")
|
769
643
|
|
770
644
|
# 验证数据列与表列匹配
|
771
645
|
for col in set_typ:
|
772
646
|
if col not in table_columns:
|
773
|
-
logger.error(
|
647
|
+
logger.error('列不存在', {
|
774
648
|
'库': db_name,
|
775
649
|
'表': table_name,
|
776
|
-
'
|
650
|
+
'列': col,
|
651
|
+
'func': sys._getframe().f_code.co_name,
|
777
652
|
})
|
778
653
|
raise ValueError(f"列不存在: `{col}` -> `{db_name}`.`{table_name}`")
|
779
654
|
|
@@ -858,11 +733,9 @@ class MySQLUploader:
|
|
858
733
|
- List[Dict[str, Any]]:将规范化列表中每个字典的键
|
859
734
|
"""
|
860
735
|
if isinstance(data, pd.DataFrame):
|
861
|
-
# 处理DataFrame
|
862
736
|
data.columns = [self._validate_identifier(col) for col in data.columns]
|
863
737
|
return data
|
864
738
|
elif isinstance(data, list):
|
865
|
-
# 处理字典列表
|
866
739
|
return [{self._validate_identifier(k): v for k, v in item.items()} for item in data]
|
867
740
|
return data
|
868
741
|
|
@@ -888,9 +761,10 @@ class MySQLUploader:
|
|
888
761
|
data.columns = [col.lower() for col in data.columns]
|
889
762
|
data = data.replace({pd.NA: None}).to_dict('records')
|
890
763
|
except Exception as e:
|
891
|
-
logger.error(
|
892
|
-
'
|
893
|
-
'
|
764
|
+
logger.error('数据转字典时发生错误', {
|
765
|
+
'error': str(e),
|
766
|
+
'data': self._shorten_for_log(data),
|
767
|
+
'func': sys._getframe().f_code.co_name,
|
894
768
|
})
|
895
769
|
raise ValueError(f"数据转字典时发生错误: {e}")
|
896
770
|
elif isinstance(data, dict):
|
@@ -899,8 +773,9 @@ class MySQLUploader:
|
|
899
773
|
# 将列表中的每个字典键转为小写
|
900
774
|
data = [{k.lower(): v for k, v in item.items()} for item in data]
|
901
775
|
else:
|
902
|
-
logger.error(
|
903
|
-
'
|
776
|
+
logger.error('数据结构必须是字典、列表、字典列表或dataframe', {
|
777
|
+
'data': self._shorten_for_log(data),
|
778
|
+
'func': sys._getframe().f_code.co_name,
|
904
779
|
})
|
905
780
|
raise ValueError("数据结构必须是字典、列表、字典列表或dataframe")
|
906
781
|
|
@@ -943,17 +818,19 @@ class MySQLUploader:
|
|
943
818
|
if col_name not in row:
|
944
819
|
if not allow_null:
|
945
820
|
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`"
|
946
|
-
logger.error(error_msg)
|
821
|
+
logger.error(error_msg, {'row': self._shorten_for_log(row)})
|
947
822
|
raise ValueError(error_msg)
|
948
823
|
prepared_row[col_name] = None
|
949
824
|
else:
|
950
825
|
try:
|
951
826
|
prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null)
|
952
827
|
except ValueError as e:
|
953
|
-
logger.error(
|
828
|
+
logger.error('数据验证失败', {
|
954
829
|
'列': col_name,
|
955
830
|
'行': row_idx,
|
956
831
|
'报错': str(e),
|
832
|
+
'row': self._shorten_for_log(row),
|
833
|
+
'func': sys._getframe().f_code.co_name,
|
957
834
|
})
|
958
835
|
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
959
836
|
prepared_data.append(prepared_row)
|
@@ -1009,10 +886,20 @@ class MySQLUploader:
|
|
1009
886
|
'库': db_name,
|
1010
887
|
'表': table_name,
|
1011
888
|
'批次': batch_id,
|
1012
|
-
'分表方式': partition_by,
|
1013
|
-
'排重': check_duplicate,
|
1014
889
|
'传入': len(data) if hasattr(data, '__len__') else 1,
|
1015
|
-
|
890
|
+
'参数': {
|
891
|
+
'主键': primary_keys,
|
892
|
+
'去重': check_duplicate,
|
893
|
+
'去重列': duplicate_columns,
|
894
|
+
'允许空值': allow_null,
|
895
|
+
'分表方式': partition_by,
|
896
|
+
'分表列': partition_date_column,
|
897
|
+
# '自动建表': auto_create,
|
898
|
+
'索引': indexes,
|
899
|
+
'更新旧数据': update_on_duplicate,
|
900
|
+
'事务模式': transaction_mode
|
901
|
+
},
|
902
|
+
'数据样例': self._shorten_for_log(data, 2)
|
1016
903
|
})
|
1017
904
|
|
1018
905
|
try:
|
@@ -1020,11 +907,12 @@ class MySQLUploader:
|
|
1020
907
|
if partition_by:
|
1021
908
|
partition_by = str(partition_by).lower()
|
1022
909
|
if partition_by not in ['year', 'month']:
|
1023
|
-
logger.error(
|
910
|
+
logger.error('分表方式必须是 "year" 或 "month" 或 "None', {
|
1024
911
|
'库': db_name,
|
1025
912
|
'表': table_name,
|
1026
913
|
'批次': batch_id,
|
1027
|
-
'
|
914
|
+
'分表方式': partition_by,
|
915
|
+
'func': sys._getframe().f_code.co_name,
|
1028
916
|
})
|
1029
917
|
raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
|
1030
918
|
|
@@ -1036,8 +924,9 @@ class MySQLUploader:
|
|
1036
924
|
if auto_create:
|
1037
925
|
self._create_database(db_name)
|
1038
926
|
else:
|
1039
|
-
logger.error(
|
1040
|
-
'
|
927
|
+
logger.error('数据库不存在', {
|
928
|
+
'库': db_name,
|
929
|
+
'func': sys._getframe().f_code.co_name,
|
1041
930
|
})
|
1042
931
|
raise ValueError(f"数据库不存在: `{db_name}`")
|
1043
932
|
|
@@ -1047,11 +936,13 @@ class MySQLUploader:
|
|
1047
936
|
for row in prepared_data:
|
1048
937
|
try:
|
1049
938
|
if partition_date_column not in row:
|
1050
|
-
logger.error(
|
939
|
+
logger.error('异常缺失列',{
|
1051
940
|
'库': db_name,
|
1052
941
|
'表': table_name,
|
1053
942
|
'批次': batch_id,
|
1054
|
-
'
|
943
|
+
'缺失列': partition_date_column,
|
944
|
+
'row': self._shorten_for_log(row),
|
945
|
+
'func': sys._getframe().f_code.co_name,
|
1055
946
|
})
|
1056
947
|
continue # 跳过当前行
|
1057
948
|
|
@@ -1064,11 +955,12 @@ class MySQLUploader:
|
|
1064
955
|
partitioned_data[part_table] = []
|
1065
956
|
partitioned_data[part_table].append(row)
|
1066
957
|
except Exception as e:
|
1067
|
-
logger.error(
|
958
|
+
logger.error('分表处理异常', {
|
1068
959
|
'库': db_name,
|
1069
960
|
'表': table_name,
|
1070
|
-
'row_data': row,
|
1071
|
-
'
|
961
|
+
'row_data': self._shorten_for_log(row),
|
962
|
+
'error': str(e),
|
963
|
+
'func': sys._getframe().f_code.co_name,
|
1072
964
|
})
|
1073
965
|
continue # 跳过当前行
|
1074
966
|
|
@@ -1082,11 +974,13 @@ class MySQLUploader:
|
|
1082
974
|
indexes, batch_id, update_on_duplicate, transaction_mode
|
1083
975
|
)
|
1084
976
|
except Exception as e:
|
1085
|
-
logger.error(
|
977
|
+
logger.error('分表上传异常', {
|
1086
978
|
'库': db_name,
|
1087
979
|
'表': table_name,
|
1088
980
|
'分表': part_table,
|
1089
|
-
'
|
981
|
+
'error': str(e),
|
982
|
+
'数据样例': self._shorten_for_log(part_data, 2),
|
983
|
+
'func': sys._getframe().f_code.co_name,
|
1090
984
|
})
|
1091
985
|
continue # 跳过当前分表,继续处理其他分表
|
1092
986
|
else:
|
@@ -1101,11 +995,13 @@ class MySQLUploader:
|
|
1101
995
|
success_flag = True
|
1102
996
|
|
1103
997
|
except Exception as e:
|
1104
|
-
logger.error(
|
998
|
+
logger.error('上传过程发生全局错误', {
|
1105
999
|
'库': db_name,
|
1106
1000
|
'表': table_name,
|
1107
|
-
'
|
1001
|
+
'error': str(e),
|
1108
1002
|
'error_type': type(e).__name__,
|
1003
|
+
'数据样例': self._shorten_for_log(data, 2),
|
1004
|
+
'func': sys._getframe().f_code.co_name,
|
1109
1005
|
})
|
1110
1006
|
finally:
|
1111
1007
|
logger.info("存储完成", {
|
@@ -1180,10 +1076,11 @@ class MySQLUploader:
|
|
1180
1076
|
"""验证并标准化事务模式"""
|
1181
1077
|
valid_modes = ('row', 'batch', 'hybrid')
|
1182
1078
|
if mode.lower() not in valid_modes:
|
1183
|
-
logger.error(
|
1184
|
-
'
|
1079
|
+
logger.error('事务模式参数错误', {
|
1080
|
+
'错误值': mode,
|
1185
1081
|
'可选值': valid_modes,
|
1186
|
-
'自动使用默认模式': 'batch'
|
1082
|
+
'自动使用默认模式': 'batch',
|
1083
|
+
'func': sys._getframe().f_code.co_name,
|
1187
1084
|
})
|
1188
1085
|
return 'batch'
|
1189
1086
|
return mode.lower()
|
@@ -1352,7 +1249,7 @@ class MySQLUploader:
|
|
1352
1249
|
batch_inserted, batch_skipped, batch_failed = self._process_batch(
|
1353
1250
|
conn, cursor, db_name, table_name, batch, all_columns,
|
1354
1251
|
sql, check_duplicate, duplicate_columns, batch_id,
|
1355
|
-
transaction_mode,
|
1252
|
+
transaction_mode, update_on_duplicate
|
1356
1253
|
)
|
1357
1254
|
|
1358
1255
|
# 更新总统计
|
@@ -1375,24 +1272,47 @@ class MySQLUploader:
|
|
1375
1272
|
duplicate_columns: Optional[List[str]],
|
1376
1273
|
batch_id: Optional[str],
|
1377
1274
|
transaction_mode: str,
|
1378
|
-
batch_index: int,
|
1379
|
-
total_data_length: int,
|
1380
1275
|
update_on_duplicate: bool = False
|
1381
1276
|
) -> Tuple[int, int, int]:
|
1382
|
-
"""
|
1277
|
+
"""
|
1278
|
+
处理单个批次的数据插入
|
1279
|
+
|
1280
|
+
:param conn: 数据库连接对象
|
1281
|
+
:param cursor: 数据库游标对象
|
1282
|
+
:param db_name: 数据库名
|
1283
|
+
:param table_name: 表名
|
1284
|
+
:param batch: 当前批次的数据(字典列表)
|
1285
|
+
:param all_columns: 需要插入的所有列名
|
1286
|
+
:param sql: 执行的SQL语句
|
1287
|
+
:param check_duplicate: 是否检查重复
|
1288
|
+
:param duplicate_columns: 排重列
|
1289
|
+
:param batch_id: 批次ID
|
1290
|
+
:param transaction_mode: 事务模式
|
1291
|
+
:param update_on_duplicate: 遇到重复时是否更新
|
1292
|
+
:return: (插入数, 跳过数, 失败数)
|
1293
|
+
"""
|
1383
1294
|
batch_inserted = 0
|
1384
1295
|
batch_skipped = 0
|
1385
1296
|
batch_failed = 0
|
1386
|
-
|
1297
|
+
batch_size = len(batch)
|
1298
|
+
logger.debug('批次插入开始', {
|
1299
|
+
'库': db_name,
|
1300
|
+
'表': table_name,
|
1301
|
+
'批次ID': batch_id,
|
1302
|
+
'批次大小': batch_size,
|
1303
|
+
'事务模式': transaction_mode,
|
1304
|
+
'SQL预览': sql[:200],
|
1305
|
+
'排重': check_duplicate,
|
1306
|
+
'排重列': duplicate_columns,
|
1307
|
+
'允许更新': update_on_duplicate,
|
1308
|
+
'数据样例': self._shorten_for_log(batch, 2)
|
1309
|
+
})
|
1387
1310
|
if transaction_mode == 'batch':
|
1388
|
-
# 批量模式特殊处理 - 尝试逐行插入但保持事务
|
1389
1311
|
try:
|
1390
1312
|
for row_idx, row in enumerate(batch, 1):
|
1391
1313
|
result = self._process_single_row(
|
1392
|
-
db_name, table_name,
|
1393
|
-
|
1394
|
-
check_duplicate, duplicate_columns,
|
1395
|
-
update_on_duplicate
|
1314
|
+
db_name, table_name, cursor, row, all_columns, sql,
|
1315
|
+
check_duplicate, duplicate_columns, update_on_duplicate
|
1396
1316
|
)
|
1397
1317
|
if result == 'inserted':
|
1398
1318
|
batch_inserted += 1
|
@@ -1400,32 +1320,32 @@ class MySQLUploader:
|
|
1400
1320
|
batch_skipped += 1
|
1401
1321
|
else:
|
1402
1322
|
batch_failed += 1
|
1403
|
-
|
1404
|
-
# 批量模式最后统一提交
|
1405
1323
|
conn.commit()
|
1406
|
-
|
1324
|
+
logger.debug('批次插入成功', {
|
1325
|
+
'库': db_name,
|
1326
|
+
'表': table_name,
|
1327
|
+
'批次ID': batch_id,
|
1328
|
+
'插入': batch_inserted,
|
1329
|
+
'跳过': batch_skipped,
|
1330
|
+
'失败': batch_failed
|
1331
|
+
})
|
1407
1332
|
except Exception as e:
|
1408
|
-
# 如果整个批量操作失败,回滚
|
1409
1333
|
conn.rollback()
|
1410
|
-
batch_failed
|
1411
|
-
logger.error(
|
1334
|
+
batch_failed += len(batch)
|
1335
|
+
logger.error('批次插入失败', {
|
1412
1336
|
'库': db_name,
|
1413
1337
|
'表': table_name,
|
1414
|
-
'批次':
|
1415
|
-
'
|
1416
|
-
'
|
1417
|
-
'
|
1418
|
-
'处理方式': '整个批次回滚'
|
1338
|
+
'批次ID': batch_id,
|
1339
|
+
'错误': str(e),
|
1340
|
+
'SQL预览': sql[:200],
|
1341
|
+
'数据样例': self._shorten_for_log(batch, 2)
|
1419
1342
|
})
|
1420
|
-
|
1421
1343
|
else: # row 或 hybrid 模式
|
1422
1344
|
for row_idx, row in enumerate(batch, 1):
|
1423
1345
|
try:
|
1424
1346
|
result = self._process_single_row(
|
1425
|
-
db_name, table_name,
|
1426
|
-
|
1427
|
-
check_duplicate, duplicate_columns,
|
1428
|
-
update_on_duplicate
|
1347
|
+
db_name, table_name, cursor, row, all_columns, sql,
|
1348
|
+
check_duplicate, duplicate_columns, update_on_duplicate
|
1429
1349
|
)
|
1430
1350
|
if result == 'inserted':
|
1431
1351
|
batch_inserted += 1
|
@@ -1433,37 +1353,41 @@ class MySQLUploader:
|
|
1433
1353
|
batch_skipped += 1
|
1434
1354
|
else:
|
1435
1355
|
batch_failed += 1
|
1436
|
-
|
1437
|
-
|
1438
|
-
|
1439
|
-
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1356
|
+
conn.commit()
|
1357
|
+
logger.debug('单行插入成功', {
|
1358
|
+
'库': db_name,
|
1359
|
+
'表': table_name,
|
1360
|
+
'批次ID': batch_id,
|
1361
|
+
'行号': row_idx,
|
1362
|
+
'插入状态': result
|
1363
|
+
})
|
1443
1364
|
except Exception as e:
|
1444
1365
|
conn.rollback()
|
1445
1366
|
batch_failed += 1
|
1446
|
-
logger.error(
|
1367
|
+
logger.error('单行插入失败', {
|
1447
1368
|
'库': db_name,
|
1448
1369
|
'表': table_name,
|
1449
|
-
'
|
1450
|
-
'
|
1451
|
-
'
|
1452
|
-
'
|
1453
|
-
'
|
1454
|
-
'事务模式': transaction_mode,
|
1370
|
+
'批次ID': batch_id,
|
1371
|
+
'行号': row_idx,
|
1372
|
+
'错误': str(e),
|
1373
|
+
'SQL预览': sql[:200],
|
1374
|
+
'数据': self._shorten_for_log(row)
|
1455
1375
|
})
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1459
|
-
|
1460
|
-
|
1376
|
+
logger.debug('批次插入结束', {
|
1377
|
+
'库': db_name,
|
1378
|
+
'表': table_name,
|
1379
|
+
'批次ID': batch_id,
|
1380
|
+
'插入': batch_inserted,
|
1381
|
+
'跳过': batch_skipped,
|
1382
|
+
'失败': batch_failed,
|
1383
|
+
'数据样例': self._shorten_for_log(batch, 2)
|
1384
|
+
})
|
1461
1385
|
return batch_inserted, batch_skipped, batch_failed
|
1462
1386
|
|
1463
1387
|
def _process_single_row(
|
1464
1388
|
self,
|
1465
|
-
db_name,
|
1466
|
-
table_name,
|
1389
|
+
db_name: str,
|
1390
|
+
table_name: str,
|
1467
1391
|
cursor,
|
1468
1392
|
row: Dict,
|
1469
1393
|
all_columns: List[str],
|
@@ -1472,95 +1396,87 @@ class MySQLUploader:
|
|
1472
1396
|
duplicate_columns: Optional[List[str]],
|
1473
1397
|
update_on_duplicate: bool = False
|
1474
1398
|
) -> str:
|
1475
|
-
"""
|
1476
|
-
|
1477
|
-
# 准备参数
|
1478
|
-
row_values = [row.get(col) for col in all_columns]
|
1479
|
-
|
1480
|
-
# 确定排重列(排除id和更新时间列)
|
1481
|
-
dup_cols = duplicate_columns if duplicate_columns else [
|
1482
|
-
col for col in all_columns
|
1483
|
-
if col.lower() not in self.base_excute_col
|
1484
|
-
]
|
1485
|
-
|
1486
|
-
if check_duplicate:
|
1487
|
-
# 添加排重条件参数
|
1488
|
-
dup_values = [row.get(col) for col in dup_cols]
|
1489
|
-
row_values.extend(dup_values)
|
1490
|
-
|
1491
|
-
# logger.info(sql)
|
1492
|
-
# logger.info(row_values)
|
1493
|
-
cursor.execute(sql, row_values)
|
1399
|
+
"""
|
1400
|
+
处理单行数据插入
|
1494
1401
|
|
1402
|
+
:param db_name: 数据库名
|
1403
|
+
:param table_name: 表名
|
1404
|
+
:param cursor: 数据库游标对象
|
1405
|
+
:param row: 单行数据(字典)
|
1406
|
+
:param all_columns: 需要插入的所有列名
|
1407
|
+
:param sql: 执行的SQL语句
|
1408
|
+
:param check_duplicate: 是否检查重复
|
1409
|
+
:param duplicate_columns: 排重列
|
1410
|
+
:param update_on_duplicate: 遇到重复时是否更新
|
1411
|
+
:return: 'inserted' | 'skipped' | 'failed'
|
1412
|
+
"""
|
1413
|
+
try:
|
1414
|
+
# 构造参数
|
1415
|
+
values = [row.get(col) for col in all_columns]
|
1495
1416
|
if check_duplicate:
|
1496
|
-
#
|
1497
|
-
|
1498
|
-
|
1499
|
-
|
1417
|
+
# 需要为 WHERE NOT EXISTS 语句补充参数
|
1418
|
+
if not update_on_duplicate:
|
1419
|
+
# duplicate_columns 为空时,默认用所有列(排除id/更新时间)
|
1420
|
+
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
1421
|
+
values = values + [row.get(col) for col in dup_cols]
|
1422
|
+
cursor.execute(sql, values)
|
1500
1423
|
except Exception as e:
|
1501
|
-
logger.error(
|
1502
|
-
'
|
1503
|
-
'
|
1504
|
-
'
|
1505
|
-
'
|
1506
|
-
'处理方式': '继续处理剩余行'
|
1424
|
+
logger.error('单行插入失败', {
|
1425
|
+
'库': db_name,
|
1426
|
+
'表': table_name,
|
1427
|
+
'row': self._shorten_for_log(row),
|
1428
|
+
'错误': str(e)
|
1507
1429
|
})
|
1508
1430
|
return 'failed'
|
1431
|
+
return 'inserted'
|
1509
1432
|
|
1510
|
-
def close(self):
|
1433
|
+
def close(self) -> None:
|
1511
1434
|
"""
|
1512
1435
|
关闭连接池并清理资源
|
1513
|
-
|
1514
1436
|
这个方法会安全地关闭数据库连接池,并清理相关资源。
|
1515
1437
|
建议结束时手动调用此方法。
|
1516
|
-
|
1517
1438
|
:raises: 可能抛出关闭连接时的异常
|
1518
1439
|
"""
|
1519
1440
|
try:
|
1520
1441
|
if hasattr(self, 'pool') and self.pool is not None:
|
1521
|
-
# 更安全的关闭方式
|
1522
1442
|
try:
|
1523
1443
|
self.pool.close()
|
1524
1444
|
except Exception as e:
|
1525
|
-
logger.warning(
|
1526
|
-
'error': str(e)
|
1527
|
-
})
|
1528
|
-
|
1445
|
+
logger.warning('关闭连接池时出错', {'error': str(e)})
|
1529
1446
|
self.pool = None
|
1530
|
-
|
1531
|
-
logger.info("success", {'uploader.py': '连接池关闭'})
|
1447
|
+
logger.info('连接池关闭', {'uploader.py': '连接池关闭'})
|
1532
1448
|
except Exception as e:
|
1533
|
-
logger.error(
|
1534
|
-
'error': str(e)
|
1535
|
-
})
|
1449
|
+
logger.error('关闭连接池失败', {'error': str(e)})
|
1536
1450
|
raise
|
1537
1451
|
|
1538
|
-
def _check_pool_health(self):
|
1452
|
+
def _check_pool_health(self) -> bool:
|
1539
1453
|
"""
|
1540
1454
|
检查连接池健康状态
|
1541
|
-
|
1542
1455
|
:return: 连接池健康返回True,否则返回False
|
1543
1456
|
"""
|
1544
1457
|
conn = None
|
1545
1458
|
try:
|
1546
1459
|
conn = self.pool.connection()
|
1547
1460
|
conn.ping(reconnect=True)
|
1461
|
+
logger.debug('连接池健康检查通过')
|
1548
1462
|
return True
|
1549
1463
|
except Exception as e:
|
1550
|
-
logger.warning(
|
1551
|
-
'error': str(e)
|
1552
|
-
})
|
1464
|
+
logger.warning('连接池健康检查失败', {'error': str(e)})
|
1553
1465
|
return False
|
1554
1466
|
finally:
|
1555
1467
|
if conn:
|
1556
1468
|
try:
|
1557
1469
|
conn.close()
|
1558
1470
|
except Exception as e:
|
1559
|
-
logger.warning(
|
1560
|
-
'error': str(e)
|
1561
|
-
})
|
1471
|
+
logger.warning('关闭连接时出错', {'error': str(e)})
|
1562
1472
|
|
1563
|
-
def retry_on_failure(max_retries=3, delay=1):
|
1473
|
+
def retry_on_failure(max_retries: int = 3, delay: int = 1):
|
1474
|
+
"""
|
1475
|
+
通用重试装饰器
|
1476
|
+
:param max_retries: 最大重试次数
|
1477
|
+
:param delay: 重试间隔(秒)
|
1478
|
+
:return: 装饰器
|
1479
|
+
"""
|
1564
1480
|
def decorator(func):
|
1565
1481
|
@wraps(func)
|
1566
1482
|
def wrapper(*args, **kwargs):
|
@@ -1570,18 +1486,40 @@ class MySQLUploader:
|
|
1570
1486
|
return func(*args, **kwargs)
|
1571
1487
|
except (pymysql.OperationalError, pymysql.InterfaceError) as e:
|
1572
1488
|
last_exception = e
|
1489
|
+
logger.warning('操作失败,准备重试', {'attempt': attempt + 1, 'error': str(e)})
|
1573
1490
|
if attempt < max_retries - 1:
|
1574
1491
|
time.sleep(delay * (attempt + 1))
|
1575
1492
|
continue
|
1576
|
-
|
1493
|
+
logger.error(f'操作重试 {max_retries} 次后失败', {'error': str(e)})
|
1494
|
+
raise
|
1577
1495
|
except Exception as e:
|
1578
|
-
|
1579
|
-
|
1580
|
-
|
1496
|
+
logger.error('操作失败', {'error': str(e)})
|
1497
|
+
raise
|
1498
|
+
raise last_exception if last_exception else logger.error('操作重试失败,未知错误')
|
1581
1499
|
return wrapper
|
1582
|
-
|
1583
1500
|
return decorator
|
1584
1501
|
|
1502
|
+
def _shorten_for_log(self, obj: Any, maxlen: int = 200) -> Any:
|
1503
|
+
"""
|
1504
|
+
日志安全截断工具:对字符串、列表、字典等做长度限制,避免日志过长。
|
1505
|
+
:param obj: 原始对象
|
1506
|
+
:param maxlen: 最大长度/元素数
|
1507
|
+
:return: 截断后的对象
|
1508
|
+
"""
|
1509
|
+
if isinstance(obj, str):
|
1510
|
+
return obj[:maxlen] + ("..." if len(obj) > maxlen else "")
|
1511
|
+
elif isinstance(obj, list):
|
1512
|
+
return obj[:maxlen] + (["..."] if len(obj) > maxlen else [])
|
1513
|
+
elif isinstance(obj, dict):
|
1514
|
+
short = {k: self._shorten_for_log(v, maxlen) for i, (k, v) in enumerate(obj.items()) if i < maxlen}
|
1515
|
+
if len(obj) > maxlen:
|
1516
|
+
short['...'] = f"total_keys={len(obj)}"
|
1517
|
+
return short
|
1518
|
+
elif hasattr(obj, 'shape') and hasattr(obj, 'head'):
|
1519
|
+
# pandas DataFrame
|
1520
|
+
return f"DataFrame shape={obj.shape}, head={obj.head(1).to_dict()}"
|
1521
|
+
return obj
|
1522
|
+
|
1585
1523
|
def __enter__(self):
|
1586
1524
|
return self
|
1587
1525
|
|
@@ -1591,9 +1529,7 @@ class MySQLUploader:
|
|
1591
1529
|
|
1592
1530
|
def main():
|
1593
1531
|
"""
|
1594
|
-
|
1595
|
-
|
1596
|
-
这个示例展示了如何:
|
1532
|
+
示例:
|
1597
1533
|
1. 创建上传器实例
|
1598
1534
|
2. 定义数据表结构
|
1599
1535
|
3. 准备测试数据
|