mdbq 3.9.16__tar.gz → 3.9.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-3.9.16 → mdbq-3.9.18}/PKG-INFO +1 -1
- mdbq-3.9.18/mdbq/__version__.py +1 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/log/mylogger.py +9 -6
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/mysql/uploader.py +148 -103
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq.egg-info/PKG-INFO +1 -1
- mdbq-3.9.16/mdbq/__version__.py +0 -1
- {mdbq-3.9.16 → mdbq-3.9.18}/README.txt +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/__init__.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/aggregation/optimize.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/aggregation/query_data.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/config/__init__.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/config/config.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/log/__init__.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/log/spider_logging.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/mysql/__init__.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/mysql/deduplicator.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/mysql/mysql.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/mysql/s_query.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/other/__init__.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/other/download_sku_picture.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/other/otk.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/other/pov_city.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/other/ua_sj.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/pbix/__init__.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/redis/__init__.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/redis/getredis.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/spider/__init__.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq/spider/aikucun.py +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/setup.cfg +0 -0
- {mdbq-3.9.16 → mdbq-3.9.18}/setup.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
VERSION = '3.9.18'
|
@@ -14,7 +14,6 @@ import inspect
|
|
14
14
|
|
15
15
|
try:
|
16
16
|
import psutil
|
17
|
-
|
18
17
|
HAS_PSUTIL = True
|
19
18
|
except ImportError:
|
20
19
|
HAS_PSUTIL = False
|
@@ -22,9 +21,9 @@ except ImportError:
|
|
22
21
|
|
23
22
|
class MyLogger:
|
24
23
|
"""
|
25
|
-
|
24
|
+
日志记录器
|
26
25
|
|
27
|
-
|
26
|
+
功能:
|
28
27
|
- 异步日志记录(减少I/O阻塞)
|
29
28
|
- 上下文管理器支持
|
30
29
|
- 自定义日志过滤器
|
@@ -71,10 +70,10 @@ class MyLogger:
|
|
71
70
|
flush_interval: int = 5
|
72
71
|
):
|
73
72
|
"""
|
74
|
-
|
73
|
+
初始化日志器
|
75
74
|
|
76
|
-
:param name:
|
77
|
-
:param logging_mode:
|
75
|
+
:param name: 日志器名称
|
76
|
+
:param logging_mode: 输出模式
|
78
77
|
:param log_level: 日志级别
|
79
78
|
:param log_file: 日志文件路径
|
80
79
|
:param max_log_size: 单个日志文件最大大小(MB)
|
@@ -391,6 +390,10 @@ class MyLogger:
|
|
391
390
|
if not hasattr(self.logger, level.lower()):
|
392
391
|
return
|
393
392
|
|
393
|
+
# message 仅接收字符串类型
|
394
|
+
if not isinstance(message, str):
|
395
|
+
message = str(message)
|
396
|
+
|
394
397
|
# 简化日志内容,避免过长
|
395
398
|
if len(message) > self.message_limited:
|
396
399
|
message = message[:self.message_limited] + '...'
|
@@ -13,6 +13,7 @@ from typing import Union, List, Dict, Optional, Any, Tuple, Set
|
|
13
13
|
from dbutils.pooled_db import PooledDB
|
14
14
|
import json
|
15
15
|
from collections import OrderedDict
|
16
|
+
import sys
|
16
17
|
|
17
18
|
warnings.filterwarnings('ignore')
|
18
19
|
logger = mylogger.MyLogger(
|
@@ -301,7 +302,10 @@ class MySQLUploader:
|
|
301
302
|
logger.debug(f"{db_name} 数据库已存在: {exists}")
|
302
303
|
return exists
|
303
304
|
except Exception as e:
|
304
|
-
logger.error(
|
305
|
+
logger.error(sys._getframe().f_code.co_name, {
|
306
|
+
'检查数据库是否存在时出错': str(e),
|
307
|
+
'库': db_name
|
308
|
+
})
|
305
309
|
raise
|
306
310
|
|
307
311
|
def _create_database(self, db_name: str):
|
@@ -321,7 +325,10 @@ class MySQLUploader:
|
|
321
325
|
conn.commit()
|
322
326
|
logger.info(f"`{db_name}` 数据库已创建")
|
323
327
|
except Exception as e:
|
324
|
-
logger.error(
|
328
|
+
logger.error(sys._getframe().f_code.co_name, {
|
329
|
+
'无法创建数据库': str(e),
|
330
|
+
'库': db_name
|
331
|
+
})
|
325
332
|
conn.rollback()
|
326
333
|
raise
|
327
334
|
|
@@ -339,18 +346,22 @@ class MySQLUploader:
|
|
339
346
|
# date_obj = datetime.datetime.strptime(date_value, '%Y-%m-%d')
|
340
347
|
date_obj = self._validate_datetime(date_value, True)
|
341
348
|
except ValueError:
|
342
|
-
|
343
|
-
|
344
|
-
|
349
|
+
logger.error(sys._getframe().f_code.co_name, {
|
350
|
+
'无效的日期格式1': date_value,
|
351
|
+
'表': table_name
|
352
|
+
})
|
353
|
+
raise ValueError(f"`{table_name}` 无效的日期格式1: `{date_value}`")
|
345
354
|
|
346
355
|
if partition_by == 'year':
|
347
356
|
return f"{table_name}_{date_obj.year}"
|
348
357
|
elif partition_by == 'month':
|
349
358
|
return f"{table_name}_{date_obj.year}_{date_obj.month:02d}"
|
350
359
|
else:
|
351
|
-
|
352
|
-
|
353
|
-
|
360
|
+
logger.error(sys._getframe().f_code.co_name, {
|
361
|
+
"分表方式必须是 'year' 或 'month'": partition_by,
|
362
|
+
'表': table_name
|
363
|
+
})
|
364
|
+
raise ValueError("分表方式必须是 'year' 或 'month'")
|
354
365
|
|
355
366
|
def _validate_identifier(self, identifier: str) -> str:
|
356
367
|
"""
|
@@ -361,9 +372,10 @@ class MySQLUploader:
|
|
361
372
|
:raises ValueError: 当标识符无效时抛出
|
362
373
|
"""
|
363
374
|
if not identifier or not isinstance(identifier, str):
|
364
|
-
|
365
|
-
|
366
|
-
|
375
|
+
logger.error(sys._getframe().f_code.co_name, {
|
376
|
+
'无效的标识符': identifier
|
377
|
+
})
|
378
|
+
raise ValueError(f"无效的标识符: `{identifier}`")
|
367
379
|
|
368
380
|
# 移除非法字符,只保留字母、数字、下划线和美元符号
|
369
381
|
cleaned = re.sub(r'[^\w\u4e00-\u9fff$]', '_', identifier)
|
@@ -372,9 +384,10 @@ class MySQLUploader:
|
|
372
384
|
cleaned = re.sub(r'_+', '_', cleaned).strip('_')
|
373
385
|
|
374
386
|
if not cleaned:
|
375
|
-
|
376
|
-
|
377
|
-
|
387
|
+
logger.error(sys._getframe().f_code.co_name, {
|
388
|
+
'无法清理异常标识符': identifier
|
389
|
+
})
|
390
|
+
raise ValueError(f"无法清理异常标识符: `{identifier}`")
|
378
391
|
|
379
392
|
# 检查是否为MySQL保留字
|
380
393
|
mysql_keywords = {
|
@@ -416,7 +429,10 @@ class MySQLUploader:
|
|
416
429
|
cursor.execute(sql, (db_name, table_name))
|
417
430
|
result = bool(cursor.fetchone())
|
418
431
|
except Exception as e:
|
419
|
-
logger.error(
|
432
|
+
logger.error(sys._getframe().f_code.co_name, {
|
433
|
+
'库': db_name,
|
434
|
+
'表': table_name,
|
435
|
+
'检查数据表是否存在时发生未知错误': str(e)})
|
420
436
|
raise
|
421
437
|
|
422
438
|
# 执行查询并缓存结果
|
@@ -449,9 +465,11 @@ class MySQLUploader:
|
|
449
465
|
table_name = self._validate_identifier(table_name)
|
450
466
|
|
451
467
|
if not set_typ:
|
452
|
-
|
453
|
-
|
454
|
-
|
468
|
+
logger.error(sys._getframe().f_code.co_name, {
|
469
|
+
'库': db_name,
|
470
|
+
'表': table_name,
|
471
|
+
'set_typ 未指定': set_typ})
|
472
|
+
raise ValueError('set_typ 未指定')
|
455
473
|
|
456
474
|
# 构建列定义SQL
|
457
475
|
column_defs = ["`id` INT NOT NULL AUTO_INCREMENT"]
|
@@ -527,7 +545,11 @@ class MySQLUploader:
|
|
527
545
|
logger.info(f"索引已添加: `{db_name}`.`{table_name}` -> `{indexes}`")
|
528
546
|
|
529
547
|
except Exception as e:
|
530
|
-
logger.error(
|
548
|
+
logger.error(sys._getframe().f_code.co_name, {
|
549
|
+
'建表失败': str(e),
|
550
|
+
'库': db_name,
|
551
|
+
'表': table_name,
|
552
|
+
})
|
531
553
|
conn.rollback()
|
532
554
|
raise
|
533
555
|
|
@@ -624,9 +646,10 @@ class MySQLUploader:
|
|
624
646
|
else:
|
625
647
|
return value
|
626
648
|
except (ValueError, TypeError) as e:
|
627
|
-
|
628
|
-
|
629
|
-
|
649
|
+
logger.error(sys._getframe().f_code.co_name, {
|
650
|
+
f'转换异常, 无法将 `{value}` 的数据类型转为: `{column_type}`': str(e),
|
651
|
+
})
|
652
|
+
raise ValueError(f"转换异常 -> 无法将 `{value}` 的数据类型转为: `{column_type}` -> {str(e)}")
|
630
653
|
|
631
654
|
def _get_table_columns(self, db_name: str, table_name: str) -> Dict[str, str]:
|
632
655
|
"""
|
@@ -654,7 +677,11 @@ class MySQLUploader:
|
|
654
677
|
logger.debug(f"`{db_name}`.`{table_name}`: 获取表的列信息: `{set_typ}`")
|
655
678
|
return set_typ
|
656
679
|
except Exception as e:
|
657
|
-
logger.error(
|
680
|
+
logger.error(sys._getframe().f_code.co_name, {
|
681
|
+
'无法获取表列信息': str(e),
|
682
|
+
'库': db_name,
|
683
|
+
'表': table_name,
|
684
|
+
})
|
658
685
|
raise
|
659
686
|
|
660
687
|
def _upload_to_table(
|
@@ -680,23 +707,32 @@ class MySQLUploader:
|
|
680
707
|
self._create_table(db_name, table_name, set_typ, primary_keys, date_column, indexes,
|
681
708
|
allow_null=allow_null)
|
682
709
|
else:
|
683
|
-
|
684
|
-
|
685
|
-
|
710
|
+
logger.error(sys._getframe().f_code.co_name, {
|
711
|
+
'数据表不存在': table_name,
|
712
|
+
'库': db_name,
|
713
|
+
'表': table_name,
|
714
|
+
})
|
715
|
+
raise ValueError(f"数据表不存在: `{db_name}`.`{table_name}`")
|
686
716
|
|
687
717
|
# 获取表结构并验证
|
688
718
|
table_columns = self._get_table_columns(db_name, table_name)
|
689
719
|
if not table_columns:
|
690
|
-
|
691
|
-
|
692
|
-
|
720
|
+
logger.error(sys._getframe().f_code.co_name, {
|
721
|
+
'获取列失败': table_columns,
|
722
|
+
'库': db_name,
|
723
|
+
'表': table_name,
|
724
|
+
})
|
725
|
+
raise ValueError(f"获取列失败 `{db_name}`.`{table_name}`")
|
693
726
|
|
694
727
|
# 验证数据列与表列匹配
|
695
728
|
for col in set_typ:
|
696
729
|
if col not in table_columns:
|
697
|
-
|
698
|
-
|
699
|
-
|
730
|
+
logger.error(sys._getframe().f_code.co_name, {
|
731
|
+
'列不存在': f'{col} -> {table_columns}',
|
732
|
+
'库': db_name,
|
733
|
+
'表': table_name,
|
734
|
+
})
|
735
|
+
raise ValueError(f"列不存在: `{col}` -> `{db_name}`.`{table_name}`")
|
700
736
|
|
701
737
|
# 插入数据
|
702
738
|
self._insert_data(
|
@@ -808,7 +844,10 @@ class MySQLUploader:
|
|
808
844
|
data.columns = [col.lower() for col in data.columns]
|
809
845
|
data = data.replace({pd.NA: None}).to_dict('records')
|
810
846
|
except Exception as e:
|
811
|
-
logger.error(
|
847
|
+
logger.error(sys._getframe().f_code.co_name, {
|
848
|
+
'数据转字典时发生错误': str(e),
|
849
|
+
'数据': data,
|
850
|
+
})
|
812
851
|
raise ValueError(f"数据转字典时发生错误: {e}")
|
813
852
|
elif isinstance(data, dict):
|
814
853
|
data = [{k.lower(): v for k, v in data.items()}]
|
@@ -816,9 +855,10 @@ class MySQLUploader:
|
|
816
855
|
# 将列表中的每个字典键转为小写
|
817
856
|
data = [{k.lower(): v for k, v in item.items()} for item in data]
|
818
857
|
else:
|
819
|
-
|
820
|
-
|
821
|
-
|
858
|
+
logger.error(sys._getframe().f_code.co_name, {
|
859
|
+
'数据结构必须是字典、列表、字典列表或dataframe': data,
|
860
|
+
})
|
861
|
+
raise ValueError("数据结构必须是字典、列表、字典列表或dataframe")
|
822
862
|
|
823
863
|
# 统一处理原始数据中列名的特殊字符
|
824
864
|
data = self.normalize_column_names(data)
|
@@ -860,15 +900,19 @@ class MySQLUploader:
|
|
860
900
|
if not allow_null:
|
861
901
|
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`"
|
862
902
|
logger.error(error_msg)
|
903
|
+
|
863
904
|
raise ValueError(error_msg)
|
864
905
|
prepared_row[col_name] = None
|
865
906
|
else:
|
866
907
|
try:
|
867
908
|
prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null)
|
868
909
|
except ValueError as e:
|
869
|
-
|
870
|
-
|
871
|
-
|
910
|
+
logger.error(sys._getframe().f_code.co_name, {
|
911
|
+
'列': col_name,
|
912
|
+
'行': row_idx,
|
913
|
+
'报错': str(e),
|
914
|
+
})
|
915
|
+
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
872
916
|
prepared_data.append(prepared_row)
|
873
917
|
|
874
918
|
logger.debug(f"已准备 {len(prepared_data)} 行数据")
|
@@ -932,9 +976,13 @@ class MySQLUploader:
|
|
932
976
|
if partition_by:
|
933
977
|
partition_by = str(partition_by).lower()
|
934
978
|
if partition_by not in ['year', 'month']:
|
935
|
-
|
936
|
-
|
937
|
-
|
979
|
+
logger.error(sys._getframe().f_code.co_name, {
|
980
|
+
'分表方式必须是 "year" 或 "month" 或 "None"': partition_by,
|
981
|
+
'库': db_name,
|
982
|
+
'表': table_name,
|
983
|
+
'批次': batch_id
|
984
|
+
})
|
985
|
+
raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
|
938
986
|
|
939
987
|
# 准备数据
|
940
988
|
prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null)
|
@@ -944,9 +992,10 @@ class MySQLUploader:
|
|
944
992
|
if auto_create:
|
945
993
|
self._create_database(db_name)
|
946
994
|
else:
|
947
|
-
|
948
|
-
|
949
|
-
|
995
|
+
logger.error(sys._getframe().f_code.co_name, {
|
996
|
+
'数据库不存在': db_name
|
997
|
+
})
|
998
|
+
raise ValueError(f"数据库不存在: `{db_name}`")
|
950
999
|
|
951
1000
|
# 处理分表逻辑
|
952
1001
|
if partition_by:
|
@@ -954,7 +1003,7 @@ class MySQLUploader:
|
|
954
1003
|
for row in prepared_data:
|
955
1004
|
try:
|
956
1005
|
if partition_date_column not in row:
|
957
|
-
logger.error(
|
1006
|
+
logger.error(sys._getframe().f_code.co_name,{
|
958
1007
|
'库': db_name,
|
959
1008
|
'表': table_name,
|
960
1009
|
'批次': batch_id,
|
@@ -971,9 +1020,11 @@ class MySQLUploader:
|
|
971
1020
|
partitioned_data[part_table] = []
|
972
1021
|
partitioned_data[part_table].append(row)
|
973
1022
|
except Exception as e:
|
974
|
-
logger.error(
|
1023
|
+
logger.error(sys._getframe().f_code.co_name, {
|
975
1024
|
'row_data': row,
|
976
|
-
'
|
1025
|
+
'分表处理失败': str(e),
|
1026
|
+
'库': db_name,
|
1027
|
+
'表': table_name,
|
977
1028
|
})
|
978
1029
|
continue # 跳过当前行
|
979
1030
|
|
@@ -987,9 +1038,11 @@ class MySQLUploader:
|
|
987
1038
|
indexes, batch_id, update_on_duplicate
|
988
1039
|
)
|
989
1040
|
except Exception as e:
|
990
|
-
logger.error(
|
1041
|
+
logger.error(sys._getframe().f_code.co_name, {
|
991
1042
|
'分表': part_table,
|
992
|
-
'
|
1043
|
+
'分表上传失败': str(e),
|
1044
|
+
'库': db_name,
|
1045
|
+
'表': table_name,
|
993
1046
|
})
|
994
1047
|
continue # 跳过当前分表,继续处理其他分表
|
995
1048
|
else:
|
@@ -1004,12 +1057,14 @@ class MySQLUploader:
|
|
1004
1057
|
success_flag = True
|
1005
1058
|
|
1006
1059
|
except Exception as e:
|
1007
|
-
logger.error(
|
1008
|
-
'
|
1009
|
-
'error_type': type(e).__name__
|
1060
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1061
|
+
'上传过程发生全局错误': str(e),
|
1062
|
+
'error_type': type(e).__name__,
|
1063
|
+
'库': db_name,
|
1064
|
+
'表': table_name,
|
1010
1065
|
})
|
1011
1066
|
finally:
|
1012
|
-
logger.info("
|
1067
|
+
logger.info("存储完成", {
|
1013
1068
|
'库': db_name,
|
1014
1069
|
'表': table_name,
|
1015
1070
|
'批次': batch_id,
|
@@ -1051,6 +1106,11 @@ class MySQLUploader:
|
|
1051
1106
|
safe_columns = [self._validate_identifier(col) for col in all_columns]
|
1052
1107
|
placeholders = ','.join(['%s'] * len(safe_columns))
|
1053
1108
|
|
1109
|
+
# 初始化统计变量
|
1110
|
+
total_inserted = 0
|
1111
|
+
total_skipped = 0
|
1112
|
+
total_failed = 0
|
1113
|
+
|
1054
1114
|
# 构建基础SQL语句
|
1055
1115
|
if check_duplicate:
|
1056
1116
|
if not duplicate_columns:
|
@@ -1061,10 +1121,7 @@ class MySQLUploader:
|
|
1061
1121
|
conditions = []
|
1062
1122
|
for col in duplicate_columns:
|
1063
1123
|
col_type = set_typ.get(col, '').lower()
|
1064
|
-
|
1065
|
-
# 处理DECIMAL类型,使用ROUND确保精度一致
|
1066
1124
|
if col_type.startswith('decimal'):
|
1067
|
-
# 提取小数位数,如DECIMAL(10,2)提取2
|
1068
1125
|
scale_match = re.search(r'decimal\(\d+,(\d+)\)', col_type)
|
1069
1126
|
scale = int(scale_match.group(1)) if scale_match else 2
|
1070
1127
|
conditions.append(f"ROUND(`{self._validate_identifier(col)}`, {scale}) = ROUND(%s, {scale})")
|
@@ -1082,10 +1139,6 @@ class MySQLUploader:
|
|
1082
1139
|
VALUES ({placeholders})
|
1083
1140
|
ON DUPLICATE KEY UPDATE {update_clause}
|
1084
1141
|
"""
|
1085
|
-
|
1086
|
-
# 注意:在update_on_duplicate模式下,row_values只需要插入数据,不需要排重列值
|
1087
|
-
def prepare_values(row):
|
1088
|
-
return [row.get(col) for col in all_columns]
|
1089
1142
|
else:
|
1090
1143
|
sql = f"""INSERT INTO `{db_name}`.`{table_name}`
|
1091
1144
|
(`{'`,`'.join(safe_columns)}`)
|
@@ -1096,10 +1149,6 @@ class MySQLUploader:
|
|
1096
1149
|
WHERE {where_clause}
|
1097
1150
|
)
|
1098
1151
|
"""
|
1099
|
-
|
1100
|
-
# 在check_duplicate模式下,row_values需要插入数据+排重列值
|
1101
|
-
def prepare_values(row):
|
1102
|
-
return [row.get(col) for col in all_columns] + [row.get(col) for col in duplicate_columns]
|
1103
1152
|
else:
|
1104
1153
|
sql = f"""
|
1105
1154
|
INSERT INTO `{db_name}`.`{table_name}`
|
@@ -1107,68 +1156,64 @@ class MySQLUploader:
|
|
1107
1156
|
VALUES ({placeholders})
|
1108
1157
|
"""
|
1109
1158
|
|
1110
|
-
# 普通模式下,row_values只需要插入数据
|
1111
|
-
def prepare_values(row):
|
1112
|
-
return [row.get(col) for col in all_columns]
|
1113
|
-
|
1114
|
-
total_inserted = 0
|
1115
|
-
total_skipped = 0
|
1116
|
-
total_failed = 0 # 失败计数器
|
1117
|
-
|
1118
1159
|
# 分批插入数据
|
1119
1160
|
with self._get_connection() as conn:
|
1120
1161
|
with conn.cursor() as cursor:
|
1121
1162
|
for i in range(0, len(data), batch_size):
|
1122
|
-
batch_start = time.time()
|
1123
1163
|
batch = data[i:i + batch_size]
|
1124
|
-
|
1164
|
+
batch_inserted = 0
|
1165
|
+
batch_skipped = 0
|
1166
|
+
batch_failed = 0
|
1125
1167
|
|
1126
1168
|
for row in batch:
|
1127
1169
|
try:
|
1128
1170
|
# 准备参数
|
1129
|
-
row_values =
|
1171
|
+
row_values = [row.get(col) for col in all_columns]
|
1172
|
+
if check_duplicate and not update_on_duplicate:
|
1173
|
+
row_values += [row.get(col) for col in duplicate_columns]
|
1174
|
+
|
1130
1175
|
cursor.execute(sql, row_values)
|
1131
|
-
successful_rows += 1
|
1132
|
-
conn.commit() # 每次成功插入后提交
|
1133
1176
|
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1177
|
+
if check_duplicate:
|
1178
|
+
# 检查是否实际插入了行
|
1179
|
+
if cursor.rowcount > 0:
|
1180
|
+
batch_inserted += 1
|
1181
|
+
else:
|
1182
|
+
batch_skipped += 1
|
1183
|
+
else:
|
1184
|
+
batch_inserted += 1
|
1185
|
+
|
1186
|
+
conn.commit()
|
1137
1187
|
|
1138
|
-
|
1139
|
-
|
1188
|
+
except Exception as e:
|
1189
|
+
conn.rollback()
|
1190
|
+
batch_failed += 1
|
1191
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1140
1192
|
'库': db_name,
|
1141
1193
|
'表': table_name,
|
1142
1194
|
'批次': batch_id,
|
1143
1195
|
'error_type': type(e).__name__,
|
1144
|
-
'
|
1196
|
+
'单行插入失败': str(e),
|
1145
1197
|
'数据类型': set_typ,
|
1146
1198
|
'是否排重': check_duplicate,
|
1147
1199
|
'排重列': duplicate_columns
|
1148
1200
|
})
|
1149
|
-
continue # 跳过当前行,继续处理下一行
|
1150
|
-
|
1151
|
-
# 更新统计信息
|
1152
|
-
if check_duplicate:
|
1153
|
-
cursor.execute("SELECT ROW_COUNT()")
|
1154
|
-
affected_rows = cursor.rowcount
|
1155
|
-
total_inserted += affected_rows
|
1156
|
-
total_skipped += len(batch) - affected_rows - (len(batch) - successful_rows)
|
1157
|
-
else:
|
1158
|
-
total_inserted += successful_rows
|
1159
1201
|
|
1160
|
-
|
1161
|
-
|
1202
|
+
# 更新总统计
|
1203
|
+
total_inserted += batch_inserted
|
1204
|
+
total_skipped += batch_skipped
|
1205
|
+
total_failed += batch_failed
|
1206
|
+
|
1207
|
+
logger.debug(sys._getframe().f_code.co_name, {
|
1162
1208
|
'库': db_name,
|
1163
1209
|
'表': table_name,
|
1164
1210
|
'批次': batch_id,
|
1165
|
-
'
|
1166
|
-
'
|
1167
|
-
'
|
1168
|
-
'
|
1169
|
-
'
|
1170
|
-
'
|
1171
|
-
'rows_per_second': successful_rows / batch_elapsed if batch_elapsed > 0 else 0
|
1211
|
+
'批次处理完成': i // batch_size + 1,
|
1212
|
+
'总批次': (len(data) + batch_size - 1) // batch_size,
|
1213
|
+
'数据量': len(batch),
|
1214
|
+
'插入': batch_inserted,
|
1215
|
+
'跳过': batch_skipped,
|
1216
|
+
'失败': batch_failed
|
1172
1217
|
})
|
1173
1218
|
|
1174
1219
|
logger.info('插入完成', {
|
mdbq-3.9.16/mdbq/__version__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
VERSION = '3.9.16'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|