mdbq 3.12.0__py3-none-any.whl → 3.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +177 -83
- {mdbq-3.12.0.dist-info → mdbq-3.12.1.dist-info}/METADATA +1 -1
- {mdbq-3.12.0.dist-info → mdbq-3.12.1.dist-info}/RECORD +6 -6
- {mdbq-3.12.0.dist-info → mdbq-3.12.1.dist-info}/WHEEL +0 -0
- {mdbq-3.12.0.dist-info → mdbq-3.12.1.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.12.
|
1
|
+
VERSION = '3.12.1'
|
mdbq/mysql/uploader.py
CHANGED
@@ -23,8 +23,8 @@ logger = mylogger.MyLogger(
|
|
23
23
|
max_log_size=50,
|
24
24
|
backup_count=5,
|
25
25
|
enable_async=False, # 是否启用异步日志
|
26
|
-
sample_rate=1, # 采样
|
27
|
-
sensitive_fields=[], #
|
26
|
+
sample_rate=1, # 采样DEBUG/INFO日志, 0.5表示50%的日志会被采样
|
27
|
+
sensitive_fields=[], # 过滤敏感字段列表
|
28
28
|
)
|
29
29
|
|
30
30
|
|
@@ -83,7 +83,7 @@ class MySQLUploader:
|
|
83
83
|
charset: str = 'utf8mb4',
|
84
84
|
collation: str = 'utf8mb4_0900_ai_ci',
|
85
85
|
max_retries: int = 10,
|
86
|
-
|
86
|
+
retry_waiting_time: int = 10,
|
87
87
|
pool_size: int = 5,
|
88
88
|
connect_timeout: int = 10,
|
89
89
|
read_timeout: int = 30,
|
@@ -100,7 +100,7 @@ class MySQLUploader:
|
|
100
100
|
:param charset: 字符集,默认为utf8mb4
|
101
101
|
:param collation: 排序规则,默认为utf8mb4_0900_ai_ci,对大小写不敏感,utf8mb4_0900_as_cs/utf8mb4_bin: 对大小写敏感
|
102
102
|
:param max_retries: 最大重试次数,默认为10
|
103
|
-
:param
|
103
|
+
:param retry_waiting_time: 重试间隔(秒),默认为10
|
104
104
|
:param pool_size: 连接池大小,默认为5
|
105
105
|
:param connect_timeout: 连接超时(秒),默认为10
|
106
106
|
:param read_timeout: 读取超时(秒),默认为30
|
@@ -114,7 +114,7 @@ class MySQLUploader:
|
|
114
114
|
self.charset = charset
|
115
115
|
self.collation = collation
|
116
116
|
self.max_retries = max(max_retries, 1)
|
117
|
-
self.
|
117
|
+
self.retry_waiting_time = max(retry_waiting_time, 1)
|
118
118
|
self.pool_size = max(pool_size, 1)
|
119
119
|
self.connect_timeout = connect_timeout
|
120
120
|
self.read_timeout = read_timeout
|
@@ -169,7 +169,7 @@ class MySQLUploader:
|
|
169
169
|
}
|
170
170
|
try:
|
171
171
|
pool = PooledDB(**pool_params)
|
172
|
-
logger.
|
172
|
+
logger.debug('连接池创建成功', {'连接池': self.pool_size, 'host': self.host, 'port': self.port})
|
173
173
|
return pool
|
174
174
|
except Exception as e:
|
175
175
|
self.pool = None
|
@@ -188,14 +188,11 @@ class MySQLUploader:
|
|
188
188
|
def wrapper(self, *args, **kwargs):
|
189
189
|
last_exception = None
|
190
190
|
operation = func.__name__
|
191
|
-
logger.debug(f'开始执行操作: {operation}', {'max_retries': self.max_retries})
|
192
191
|
for attempt in range(self.max_retries):
|
193
192
|
try:
|
194
193
|
result = func(self, *args, **kwargs)
|
195
194
|
if attempt > 0:
|
196
195
|
logger.info('操作成功(重试后)', {'operation': operation, 'attempts': attempt + 1})
|
197
|
-
else:
|
198
|
-
logger.debug('操作成功', {'operation': operation})
|
199
196
|
return result
|
200
197
|
except (pymysql.OperationalError, pymysql.err.MySQLError) as e:
|
201
198
|
last_exception = e
|
@@ -207,7 +204,7 @@ class MySQLUploader:
|
|
207
204
|
'max_retries': self.max_retries
|
208
205
|
}
|
209
206
|
if attempt < self.max_retries - 1:
|
210
|
-
wait_time = self.
|
207
|
+
wait_time = self.retry_waiting_time * (attempt + 1)
|
211
208
|
error_details['wait_time'] = wait_time
|
212
209
|
logger.warning('数据库操作失败,准备重试', error_details)
|
213
210
|
time.sleep(wait_time)
|
@@ -218,13 +215,6 @@ class MySQLUploader:
|
|
218
215
|
logger.error('重连失败', {'error': str(reconnect_error)})
|
219
216
|
else:
|
220
217
|
logger.error('操作最终失败', error_details)
|
221
|
-
except pymysql.IntegrityError as e:
|
222
|
-
logger.error('完整性约束错误', {
|
223
|
-
'operation': operation,
|
224
|
-
'error_code': e.args[0] if e.args else None,
|
225
|
-
'error_message': e.args[1] if len(e.args) > 1 else None
|
226
|
-
})
|
227
|
-
raise e
|
228
218
|
except Exception as e:
|
229
219
|
last_exception = e
|
230
220
|
logger.error('发生意外错误', {
|
@@ -247,10 +237,9 @@ class MySQLUploader:
|
|
247
237
|
"""
|
248
238
|
try:
|
249
239
|
conn = self.pool.connection()
|
250
|
-
logger.debug('获取数据库连接', {'host': self.host, 'port': self.port})
|
251
240
|
return conn
|
252
241
|
except Exception as e:
|
253
|
-
logger.error('
|
242
|
+
logger.error('从连接池获取数据库连接失败', {'error': str(e)})
|
254
243
|
raise ConnectionError(f'连接数据库失败: {str(e)}')
|
255
244
|
|
256
245
|
@_execute_with_retry
|
@@ -392,7 +381,8 @@ class MySQLUploader:
|
|
392
381
|
primary_keys: Optional[List[str]] = None,
|
393
382
|
date_column: Optional[str] = None,
|
394
383
|
indexes: Optional[List[str]] = None,
|
395
|
-
allow_null: bool = False
|
384
|
+
allow_null: bool = False,
|
385
|
+
unique_keys: Optional[List[List[str]]] = None
|
396
386
|
) -> None:
|
397
387
|
"""
|
398
388
|
创建数据表,优化索引创建方式
|
@@ -402,39 +392,48 @@ class MySQLUploader:
|
|
402
392
|
if not set_typ:
|
403
393
|
logger.error('建表时未指定set_typ', {'库': db_name, '表': table_name})
|
404
394
|
raise ValueError('set_typ 未指定')
|
395
|
+
# set_typ的键清洗
|
396
|
+
set_typ = {self._normalize_col(k): v for k, v in set_typ.items()}
|
405
397
|
column_defs = ["`id` INT NOT NULL AUTO_INCREMENT"]
|
406
398
|
for col_name, col_type in set_typ.items():
|
407
|
-
if col_name
|
399
|
+
if col_name == 'id':
|
408
400
|
continue
|
409
|
-
safe_col_name = self.
|
401
|
+
safe_col_name = self._normalize_col(col_name)
|
410
402
|
col_def = f"`{safe_col_name}` {col_type}"
|
411
403
|
if not allow_null and not col_type.lower().startswith('json'):
|
412
404
|
col_def += " NOT NULL"
|
413
405
|
column_defs.append(col_def)
|
414
|
-
|
415
|
-
|
416
|
-
|
406
|
+
# 主键处理逻辑调整
|
407
|
+
if primary_keys and len(primary_keys) > 0:
|
408
|
+
safe_primary_keys = [self._normalize_col(pk) for pk in primary_keys]
|
409
|
+
primary_key_sql = f"PRIMARY KEY (`{'`,`'.join(safe_primary_keys)}`)"
|
417
410
|
else:
|
418
|
-
|
419
|
-
|
420
|
-
primary_key_sql = f", PRIMARY KEY (`{'`,`'.join(safe_primary_keys)}`)"
|
411
|
+
safe_primary_keys = [self._normalize_col('id')]
|
412
|
+
primary_key_sql = f"PRIMARY KEY (`id`)"
|
421
413
|
# 索引统一在CREATE TABLE中定义
|
422
414
|
index_defs = []
|
423
415
|
if date_column and date_column in set_typ:
|
424
|
-
safe_date_col = self.
|
416
|
+
safe_date_col = self._normalize_col(date_column)
|
425
417
|
index_defs.append(f"INDEX `idx_{safe_date_col}` (`{safe_date_col}`)")
|
426
418
|
if indexes:
|
427
419
|
for idx_col in indexes:
|
428
420
|
if idx_col in set_typ:
|
429
|
-
safe_idx_col = self.
|
421
|
+
safe_idx_col = self._normalize_col(idx_col)
|
430
422
|
index_defs.append(f"INDEX `idx_{safe_idx_col}` (`{safe_idx_col}`)")
|
423
|
+
# UNIQUE KEY定义
|
424
|
+
unique_defs = []
|
425
|
+
if unique_keys:
|
426
|
+
for idx, unique_cols in enumerate(unique_keys):
|
427
|
+
if not unique_cols:
|
428
|
+
continue
|
429
|
+
safe_unique_cols = [self._normalize_col(col) for col in unique_cols]
|
430
|
+
unique_name = f"uniq_{'_'.join(safe_unique_cols)}_{idx}"
|
431
|
+
unique_defs.append(f"UNIQUE KEY `{unique_name}` (`{'`,`'.join(safe_unique_cols)}`)")
|
431
432
|
index_defs = list(set(index_defs))
|
432
|
-
|
433
|
+
all_defs = column_defs + [primary_key_sql] + index_defs + unique_defs
|
433
434
|
sql = f"""
|
434
435
|
CREATE TABLE IF NOT EXISTS `{db_name}`.`{table_name}` (
|
435
|
-
{','.join(
|
436
|
-
{primary_key_sql}
|
437
|
-
{index_sql}
|
436
|
+
{','.join(all_defs)}
|
438
437
|
) ENGINE=InnoDB DEFAULT CHARSET={self.charset} COLLATE={self.collation}
|
439
438
|
"""
|
440
439
|
conn = None
|
@@ -443,7 +442,7 @@ class MySQLUploader:
|
|
443
442
|
with conn.cursor() as cursor:
|
444
443
|
cursor.execute(sql)
|
445
444
|
conn.commit()
|
446
|
-
logger.info('数据表及索引已创建', {'库': db_name, '表': table_name, '索引': indexes})
|
445
|
+
logger.info('数据表及索引已创建', {'库': db_name, '表': table_name, '索引': indexes, '唯一约束': unique_keys})
|
447
446
|
except Exception as e:
|
448
447
|
logger.error('建表失败', {'库': db_name, '表': table_name, '错误': str(e)})
|
449
448
|
if conn is not None:
|
@@ -476,11 +475,9 @@ class MySQLUploader:
|
|
476
475
|
try:
|
477
476
|
if date_type:
|
478
477
|
result = pd.to_datetime(datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d'))
|
479
|
-
logger.debug('日期格式化成功', {'原始': value, '格式': fmt, '结果': str(result)})
|
480
478
|
return result
|
481
479
|
else:
|
482
480
|
result = datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d %H:%M:%S')
|
483
|
-
logger.debug('日期格式化成功', {'原始': value, '格式': fmt, '结果': str(result)})
|
484
481
|
return result
|
485
482
|
except ValueError:
|
486
483
|
continue
|
@@ -613,7 +610,7 @@ class MySQLUploader:
|
|
613
610
|
cursor.execute(sql_check, (db_name, table_name, column))
|
614
611
|
exists = cursor.fetchone()
|
615
612
|
if exists and list(exists.values())[0] > 0:
|
616
|
-
logger.debug('
|
613
|
+
logger.debug('索引检查', {'库': db_name, '表': table_name, '索引列': column})
|
617
614
|
return
|
618
615
|
cursor.execute(sql_create)
|
619
616
|
conn.commit()
|
@@ -622,6 +619,49 @@ class MySQLUploader:
|
|
622
619
|
logger.error('创建索引失败', {'库': db_name, '表': table_name, '列': column, '错误': str(e)})
|
623
620
|
raise
|
624
621
|
|
622
|
+
def _get_existing_unique_keys(self, db_name: str, table_name: str) -> List[List[str]]:
|
623
|
+
"""
|
624
|
+
获取表中所有UNIQUE KEY的列组合(不含主键)。
|
625
|
+
返回:[[col1, col2], ...]
|
626
|
+
"""
|
627
|
+
db_name = self._validate_identifier(db_name)
|
628
|
+
table_name = self._validate_identifier(table_name)
|
629
|
+
sql = '''
|
630
|
+
SELECT INDEX_NAME, COLUMN_NAME
|
631
|
+
FROM INFORMATION_SCHEMA.STATISTICS
|
632
|
+
WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND NON_UNIQUE = 0 AND INDEX_NAME != 'PRIMARY'
|
633
|
+
ORDER BY INDEX_NAME, SEQ_IN_INDEX
|
634
|
+
'''
|
635
|
+
unique_map = {}
|
636
|
+
try:
|
637
|
+
with self._get_connection() as conn:
|
638
|
+
with conn.cursor() as cursor:
|
639
|
+
cursor.execute(sql, (db_name, table_name))
|
640
|
+
for row in cursor.fetchall():
|
641
|
+
idx = row['INDEX_NAME']
|
642
|
+
col = row['COLUMN_NAME']
|
643
|
+
unique_map.setdefault(idx, []).append(col)
|
644
|
+
except Exception as e:
|
645
|
+
logger.warning('获取UNIQUE KEY信息失败', {'库': db_name, '表': table_name, '错误': str(e)})
|
646
|
+
# 只返回列名组合,全部清洗小写
|
647
|
+
return [[self._normalize_col(c) for c in cols] for cols in unique_map.values() if cols]
|
648
|
+
|
649
|
+
def _add_unique_key(self, db_name: str, table_name: str, unique_cols: List[str]):
|
650
|
+
"""
|
651
|
+
添加UNIQUE KEY
|
652
|
+
"""
|
653
|
+
safe_cols = [self._normalize_col(col) for col in unique_cols]
|
654
|
+
unique_name = f"uniq_{'_'.join(safe_cols)}_{int(time.time()*1000)%100000}"
|
655
|
+
sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD UNIQUE KEY `{unique_name}` ({','.join(f'`{col}`' for col in safe_cols)})'
|
656
|
+
try:
|
657
|
+
with self._get_connection() as conn:
|
658
|
+
with conn.cursor() as cursor:
|
659
|
+
cursor.execute(sql)
|
660
|
+
conn.commit()
|
661
|
+
logger.info('添加唯一约束列成功', {'库': db_name, '表': table_name, '列': unique_cols})
|
662
|
+
except Exception as e:
|
663
|
+
logger.warning('唯一约束列添加失败', {'库': db_name, '表': table_name, '列': unique_cols, '错误': str(e)})
|
664
|
+
|
625
665
|
def _upload_to_table(
|
626
666
|
self,
|
627
667
|
db_name: str,
|
@@ -637,14 +677,15 @@ class MySQLUploader:
|
|
637
677
|
indexes: Optional[List[str]],
|
638
678
|
batch_id: Optional[str] = None,
|
639
679
|
update_on_duplicate: bool = False,
|
640
|
-
transaction_mode: str = "batch"
|
680
|
+
transaction_mode: str = "batch",
|
681
|
+
unique_keys: Optional[List[List[str]]] = None
|
641
682
|
):
|
642
683
|
"""实际执行表上传的方法"""
|
643
|
-
|
644
|
-
if not
|
684
|
+
table_existed = self._check_table_exists(db_name, table_name)
|
685
|
+
if not table_existed:
|
645
686
|
if auto_create:
|
646
687
|
self._create_table(db_name, table_name, set_typ, primary_keys, date_column, indexes,
|
647
|
-
allow_null=allow_null)
|
688
|
+
allow_null=allow_null, unique_keys=unique_keys)
|
648
689
|
else:
|
649
690
|
logger.error('数据表不存在', {
|
650
691
|
'库': db_name,
|
@@ -652,8 +693,30 @@ class MySQLUploader:
|
|
652
693
|
'func': sys._getframe().f_code.co_name,
|
653
694
|
})
|
654
695
|
raise ValueError(f"数据表不存在: `{db_name}`.`{table_name}`")
|
655
|
-
|
656
|
-
|
696
|
+
if table_existed and unique_keys:
|
697
|
+
try:
|
698
|
+
exist_ukeys = self._get_existing_unique_keys(db_name, table_name)
|
699
|
+
exist_ukeys_norm = [sorted([c.lower() for c in uk]) for uk in exist_ukeys]
|
700
|
+
filtered_ukeys = [uk for uk in unique_keys if 1 <= len(uk) <= 20]
|
701
|
+
to_add = []
|
702
|
+
for uk in filtered_ukeys:
|
703
|
+
norm_uk = sorted([c.lower() for c in uk])
|
704
|
+
if norm_uk not in exist_ukeys_norm:
|
705
|
+
to_add.append(uk)
|
706
|
+
max_unique_keys = 10
|
707
|
+
if len(exist_ukeys) + len(to_add) > max_unique_keys:
|
708
|
+
logger.warning('unique_keys超限', {
|
709
|
+
'库': db_name,
|
710
|
+
'表': table_name,
|
711
|
+
'已存在': exist_ukeys,
|
712
|
+
'本次待添加': to_add,
|
713
|
+
'最大数量': max_unique_keys
|
714
|
+
})
|
715
|
+
to_add = to_add[:max_unique_keys - len(exist_ukeys)]
|
716
|
+
for uk in to_add:
|
717
|
+
self._add_unique_key(db_name, table_name, uk)
|
718
|
+
except Exception as e:
|
719
|
+
logger.warning('动态unique key处理异常', {'库': db_name, '表': table_name, '错误': str(e)})
|
657
720
|
table_columns = self._get_table_columns(db_name, table_name)
|
658
721
|
if not table_columns:
|
659
722
|
logger.error('获取列失败', {
|
@@ -663,8 +726,6 @@ class MySQLUploader:
|
|
663
726
|
'func': sys._getframe().f_code.co_name,
|
664
727
|
})
|
665
728
|
raise ValueError(f"获取列失败 `{db_name}`.`{table_name}`")
|
666
|
-
|
667
|
-
# 验证数据列与表列匹配
|
668
729
|
for col in set_typ:
|
669
730
|
if col not in table_columns:
|
670
731
|
logger.error('列不存在', {
|
@@ -674,22 +735,19 @@ class MySQLUploader:
|
|
674
735
|
'func': sys._getframe().f_code.co_name,
|
675
736
|
})
|
676
737
|
raise ValueError(f"列不存在: `{col}` -> `{db_name}`.`{table_name}`")
|
677
|
-
|
678
|
-
# 确保分表参考字段为索引
|
679
738
|
if date_column and date_column in table_columns:
|
680
739
|
try:
|
681
740
|
self._ensure_index(db_name, table_name, date_column)
|
682
741
|
except Exception as e:
|
683
742
|
logger.warning('分表参考字段索引创建失败', {'库': db_name, '表': table_name, '列': date_column, '错误': str(e)})
|
684
|
-
|
685
|
-
# 插入数据
|
686
|
-
self._insert_data(
|
743
|
+
inserted, skipped, failed = self._insert_data(
|
687
744
|
db_name, table_name, data, set_typ,
|
688
745
|
check_duplicate, duplicate_columns,
|
689
746
|
batch_id=batch_id,
|
690
747
|
update_on_duplicate=update_on_duplicate,
|
691
748
|
transaction_mode=transaction_mode
|
692
749
|
)
|
750
|
+
return inserted, skipped, failed
|
693
751
|
|
694
752
|
def _infer_data_type(self, value: Any, no_log: bool = False) -> str:
|
695
753
|
"""
|
@@ -817,11 +875,8 @@ class MySQLUploader:
|
|
817
875
|
# 统一处理原始数据中列名的特殊字符
|
818
876
|
data = self.normalize_column_names(data)
|
819
877
|
|
820
|
-
# set_typ
|
821
|
-
|
822
|
-
set_typ = {k: v for k, v in set_typ.items()}
|
823
|
-
else:
|
824
|
-
set_typ = {k.lower(): v for k, v in set_typ.items()}
|
878
|
+
# set_typ的键清洗
|
879
|
+
set_typ = {self._normalize_col(k): v for k, v in set_typ.items()}
|
825
880
|
|
826
881
|
# 获取数据中实际存在的列名
|
827
882
|
data_columns = set()
|
@@ -890,7 +945,8 @@ class MySQLUploader:
|
|
890
945
|
auto_create: bool = True,
|
891
946
|
indexes: Optional[List[str]] = None,
|
892
947
|
update_on_duplicate: bool = False,
|
893
|
-
transaction_mode: str = "batch"
|
948
|
+
transaction_mode: str = "batch",
|
949
|
+
unique_keys: Optional[List[List[str]]] = None
|
894
950
|
):
|
895
951
|
"""
|
896
952
|
上传数据到数据库的主入口方法,分表逻辑异常处理统计丢弃数据
|
@@ -912,6 +968,7 @@ class MySQLUploader:
|
|
912
968
|
- 'row' : 逐行提交事务(错误隔离性好)
|
913
969
|
- 'batch' : 整批提交事务(性能最优)
|
914
970
|
- 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
|
971
|
+
:param unique_keys: 唯一约束列表,每个元素为列名列表,支持多列组合唯一约束
|
915
972
|
:raises: 可能抛出各种验证和数据库相关异常
|
916
973
|
"""
|
917
974
|
# upload_start = time.time()
|
@@ -936,7 +993,8 @@ class MySQLUploader:
|
|
936
993
|
# '自动建表': auto_create,
|
937
994
|
'索引': indexes,
|
938
995
|
'更新旧数据': update_on_duplicate,
|
939
|
-
'事务模式': transaction_mode
|
996
|
+
'事务模式': transaction_mode,
|
997
|
+
'唯一约束': unique_keys
|
940
998
|
},
|
941
999
|
# '数据样例': self._shorten_for_log(data, 2)
|
942
1000
|
})
|
@@ -1005,15 +1063,21 @@ class MySQLUploader:
|
|
1005
1063
|
continue
|
1006
1064
|
|
1007
1065
|
# 对每个分表执行上传
|
1066
|
+
total_inserted = 0
|
1067
|
+
total_skipped = dropped_rows # 分表异常丢弃
|
1068
|
+
total_failed = 0
|
1008
1069
|
for part_table, part_data in partitioned_data.items():
|
1009
1070
|
try:
|
1010
|
-
self._upload_to_table(
|
1071
|
+
inserted, skipped, failed = self._upload_to_table(
|
1011
1072
|
db_name, part_table, part_data, filtered_set_typ,
|
1012
1073
|
primary_keys, check_duplicate, duplicate_columns,
|
1013
1074
|
allow_null, auto_create, partition_date_column,
|
1014
|
-
indexes, batch_id, update_on_duplicate, transaction_mode
|
1075
|
+
indexes, batch_id, update_on_duplicate, transaction_mode,
|
1076
|
+
unique_keys
|
1015
1077
|
)
|
1016
|
-
|
1078
|
+
total_inserted += inserted
|
1079
|
+
total_skipped += skipped
|
1080
|
+
total_failed += failed
|
1017
1081
|
if partition_date_column in filtered_set_typ:
|
1018
1082
|
try:
|
1019
1083
|
self._ensure_index(db_name, part_table, partition_date_column)
|
@@ -1031,13 +1095,16 @@ class MySQLUploader:
|
|
1031
1095
|
continue # 跳过当前分表,继续处理其他分表
|
1032
1096
|
else:
|
1033
1097
|
# 不分表,直接上传
|
1034
|
-
self._upload_to_table(
|
1098
|
+
inserted, skipped, failed = self._upload_to_table(
|
1035
1099
|
db_name, table_name, prepared_data, filtered_set_typ,
|
1036
1100
|
primary_keys, check_duplicate, duplicate_columns,
|
1037
1101
|
allow_null, auto_create, partition_date_column,
|
1038
|
-
indexes, batch_id, update_on_duplicate, transaction_mode
|
1102
|
+
indexes, batch_id, update_on_duplicate, transaction_mode,
|
1103
|
+
unique_keys
|
1039
1104
|
)
|
1040
|
-
|
1105
|
+
total_inserted = inserted
|
1106
|
+
total_skipped = skipped
|
1107
|
+
total_failed = failed
|
1041
1108
|
if partition_date_column in filtered_set_typ:
|
1042
1109
|
try:
|
1043
1110
|
self._ensure_index(db_name, table_name, partition_date_column)
|
@@ -1062,7 +1129,9 @@ class MySQLUploader:
|
|
1062
1129
|
'批次': batch_id,
|
1063
1130
|
'finish': success_flag,
|
1064
1131
|
'数据行': initial_row_count,
|
1065
|
-
'
|
1132
|
+
'插入': total_inserted,
|
1133
|
+
'跳过': total_skipped,
|
1134
|
+
'失败': total_failed
|
1066
1135
|
})
|
1067
1136
|
|
1068
1137
|
@_execute_with_retry
|
@@ -1095,26 +1164,19 @@ class MySQLUploader:
|
|
1095
1164
|
- 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
|
1096
1165
|
"""
|
1097
1166
|
if not data:
|
1098
|
-
return
|
1099
|
-
|
1100
|
-
# 验证事务模式
|
1167
|
+
return 0, 0, 0
|
1101
1168
|
transaction_mode = self._validate_transaction_mode(transaction_mode)
|
1102
|
-
|
1103
|
-
# 准备SQL语句
|
1104
1169
|
sql = self._prepare_insert_sql(
|
1105
1170
|
db_name, table_name, set_typ,
|
1106
1171
|
check_duplicate, duplicate_columns,
|
1107
1172
|
update_on_duplicate
|
1108
1173
|
)
|
1109
|
-
|
1110
|
-
# 执行批量插入
|
1111
1174
|
total_inserted, total_skipped, total_failed = self._execute_batch_insert(
|
1112
1175
|
db_name, table_name, data, set_typ,
|
1113
1176
|
sql, check_duplicate, duplicate_columns,
|
1114
1177
|
batch_id, transaction_mode,
|
1115
1178
|
update_on_duplicate
|
1116
1179
|
)
|
1117
|
-
|
1118
1180
|
logger.info('插入完成', {
|
1119
1181
|
'库': db_name,
|
1120
1182
|
'表': table_name,
|
@@ -1124,6 +1186,7 @@ class MySQLUploader:
|
|
1124
1186
|
'失败': total_failed,
|
1125
1187
|
'事务模式': transaction_mode,
|
1126
1188
|
})
|
1189
|
+
return total_inserted, total_skipped, total_failed
|
1127
1190
|
|
1128
1191
|
def _validate_transaction_mode(self, mode: str) -> str:
|
1129
1192
|
"""验证并标准化事务模式"""
|
@@ -1266,6 +1329,7 @@ class MySQLUploader:
|
|
1266
1329
|
update_on_duplicate: bool = False
|
1267
1330
|
) -> Tuple[int, int, int]:
|
1268
1331
|
"""执行批量插入操作,优化batch和hybrid模式"""
|
1332
|
+
import pymysql # 确保异常类型可用
|
1269
1333
|
def get_optimal_batch_size(total_rows: int) -> int:
|
1270
1334
|
if total_rows <= 100:
|
1271
1335
|
return total_rows
|
@@ -1295,7 +1359,13 @@ class MySQLUploader:
|
|
1295
1359
|
try:
|
1296
1360
|
cursor.executemany(sql, values_list)
|
1297
1361
|
conn.commit()
|
1298
|
-
|
1362
|
+
inserted = cursor.rowcount if cursor.rowcount is not None else 0
|
1363
|
+
total_inserted += inserted
|
1364
|
+
total_skipped += len(batch) - inserted
|
1365
|
+
except pymysql.err.IntegrityError as e:
|
1366
|
+
conn.rollback()
|
1367
|
+
total_skipped += len(batch)
|
1368
|
+
logger.debug('批量插入唯一约束冲突,全部跳过', {'库': db_name, '表': table_name, '错误': str(e)})
|
1299
1369
|
except Exception as e:
|
1300
1370
|
conn.rollback()
|
1301
1371
|
total_failed += len(batch)
|
@@ -1311,7 +1381,15 @@ class MySQLUploader:
|
|
1311
1381
|
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
1312
1382
|
values += [row.get(col) for col in dup_cols]
|
1313
1383
|
cursor.execute(sql, values)
|
1314
|
-
|
1384
|
+
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1385
|
+
if affected > 0:
|
1386
|
+
total_inserted += 1
|
1387
|
+
else:
|
1388
|
+
total_skipped += 1
|
1389
|
+
except pymysql.err.IntegrityError as e:
|
1390
|
+
conn.rollback()
|
1391
|
+
total_skipped += 1
|
1392
|
+
logger.debug('hybrid单行插入唯一约束冲突,跳过', {'库': db_name, '表': table_name, '错误': str(e)})
|
1315
1393
|
except Exception as e:
|
1316
1394
|
conn.rollback()
|
1317
1395
|
total_failed += 1
|
@@ -1325,8 +1403,16 @@ class MySQLUploader:
|
|
1325
1403
|
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
1326
1404
|
values += [row.get(col) for col in dup_cols]
|
1327
1405
|
cursor.execute(sql, values)
|
1406
|
+
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1407
|
+
if affected > 0:
|
1408
|
+
total_inserted += 1
|
1409
|
+
else:
|
1410
|
+
total_skipped += 1
|
1328
1411
|
conn.commit()
|
1329
|
-
|
1412
|
+
except pymysql.err.IntegrityError as e:
|
1413
|
+
conn.rollback()
|
1414
|
+
total_skipped += 1
|
1415
|
+
logger.debug('单行插入唯一约束冲突,跳过', {'库': db_name, '表': table_name, '错误': str(e)})
|
1330
1416
|
except Exception as e:
|
1331
1417
|
conn.rollback()
|
1332
1418
|
total_failed += 1
|
@@ -1347,9 +1433,9 @@ class MySQLUploader:
|
|
1347
1433
|
self.pool = None
|
1348
1434
|
except Exception as e:
|
1349
1435
|
logger.warning('关闭连接池时出错', {'error': str(e)})
|
1350
|
-
logger.
|
1436
|
+
logger.debug('finished', {'uploader.py': '连接池关闭'})
|
1351
1437
|
except Exception as e:
|
1352
|
-
logger.error('关闭连接池失败', {'
|
1438
|
+
logger.error('关闭连接池失败', {'uploader.py': str(e)})
|
1353
1439
|
raise
|
1354
1440
|
|
1355
1441
|
def _check_pool_health(self) -> bool:
|
@@ -1431,6 +1517,13 @@ class MySQLUploader:
|
|
1431
1517
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
1432
1518
|
self.close()
|
1433
1519
|
|
1520
|
+
def _normalize_col(self, col: str) -> str:
|
1521
|
+
"""
|
1522
|
+
列名自动清洗并转小写(如case_sensitive为False),保证和表结构一致。
|
1523
|
+
"""
|
1524
|
+
safe = self._validate_identifier(col)
|
1525
|
+
return safe if self.case_sensitive else safe.lower()
|
1526
|
+
|
1434
1527
|
|
1435
1528
|
def main():
|
1436
1529
|
"""
|
@@ -1443,7 +1536,7 @@ def main():
|
|
1443
1536
|
"""
|
1444
1537
|
uploader = MySQLUploader(
|
1445
1538
|
username='root',
|
1446
|
-
password='
|
1539
|
+
password='pwd',
|
1447
1540
|
host='localhost',
|
1448
1541
|
port=3306,
|
1449
1542
|
)
|
@@ -1462,7 +1555,7 @@ def main():
|
|
1462
1555
|
{'日期': '2023-01-8', 'name': 'JACk', 'AGE': '24', 'salary': 555.1545},
|
1463
1556
|
{'日期': '2023-01-15', 'name': 'Alice', 'AGE': 35, 'salary': '100'},
|
1464
1557
|
{'日期': '2023-01-15', 'name': 'Alice', 'AGE': 30, 'salary': 0.0},
|
1465
|
-
{'日期': '2023-02-20', 'name': 'Bob', 'AGE': 25, 'salary': 45000.75}
|
1558
|
+
{'日期': '2023-02-20', 'name': 'Bob', 'AGE': 25, 'salary': 45000.75},
|
1466
1559
|
]
|
1467
1560
|
|
1468
1561
|
# 上传数据
|
@@ -1474,12 +1567,13 @@ def main():
|
|
1474
1567
|
primary_keys=[], # 创建唯一主键
|
1475
1568
|
check_duplicate=False, # 检查重复数据
|
1476
1569
|
duplicate_columns=[], # 指定排重的组合键
|
1570
|
+
update_on_duplicate=False, # 更新旧数据
|
1477
1571
|
allow_null=False, # 允许插入空值
|
1478
|
-
partition_by='year', #
|
1572
|
+
partition_by='year', # 分表方式
|
1479
1573
|
partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
|
1480
|
-
|
1481
|
-
indexes=[], # 指定索引列
|
1574
|
+
indexes=[], # 普通索引列
|
1482
1575
|
transaction_mode='row', # 事务模式
|
1576
|
+
unique_keys=[['日期', 'name', 'age']] # 唯一约束列表
|
1483
1577
|
)
|
1484
1578
|
|
1485
1579
|
uploader.close()
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=vHfePSxiigIQg58VIYYk2QYh_4AtpXtMsfV3nHXNUhg,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/query_data.py,sha256=nxL8hSy8yI1QLlqnkTNHHQSxRfo-6WKL5OA-N4xLB7c,179832
|
5
5
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -11,7 +11,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
11
11
|
mdbq/mysql/deduplicator.py,sha256=KMJ_YyqAniaLVRqOHLgO92PgwknIDB-EgaOY7S6iMZ4,68599
|
12
12
|
mdbq/mysql/mysql.py,sha256=Kjpi-LL00WQUmTTOfhEBsNrmo4-4kFFJzrHbVKfqiBE,56770
|
13
13
|
mdbq/mysql/s_query.py,sha256=dlnrVJ3-Vp1Suv9CNbPxyYSRqRJUHjOpF39tb2F-wBc,10190
|
14
|
-
mdbq/mysql/uploader.py,sha256=
|
14
|
+
mdbq/mysql/uploader.py,sha256=PD8gA2PixoK2ZH4vWTmz1kbNTab8VGUJLoepD024H5Q,70265
|
15
15
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
17
17
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
@@ -24,7 +24,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
24
24
|
mdbq/redis/getredis.py,sha256=YHgCKO8mEsslwet33K5tGss-nrDDwPnOSlhA9iBu0jY,24078
|
25
25
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
26
26
|
mdbq/spider/aikucun.py,sha256=cqK-JRd_DHbToC7hyo83m8o97NZkJFqmB2xBtr6aAVU,20961
|
27
|
-
mdbq-3.12.
|
28
|
-
mdbq-3.12.
|
29
|
-
mdbq-3.12.
|
30
|
-
mdbq-3.12.
|
27
|
+
mdbq-3.12.1.dist-info/METADATA,sha256=viVkeKnHLlpvAxthu_c50VYyla5Uc2COG99IigfDPmc,364
|
28
|
+
mdbq-3.12.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
29
|
+
mdbq-3.12.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
30
|
+
mdbq-3.12.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|