mdbq 3.12.0__py3-none-any.whl → 3.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.12.0'
1
+ VERSION = '3.12.2'
@@ -1294,32 +1294,32 @@ def main():
1294
1294
  password='pwd',
1295
1295
  host='localhost',
1296
1296
  port=3306,
1297
- max_workers= 2,
1297
+ max_workers= 3,
1298
1298
  batch_size=1000,
1299
1299
  skip_system_dbs=True,
1300
1300
  max_retries=3,
1301
1301
  retry_waiting_time=5,
1302
- # pool_size=30,
1302
+ pool_size=30,
1303
1303
  recent_month=1,
1304
1304
  # date_range=['2025-06-09', '2025-06-10'],
1305
1305
  date_column='日期',
1306
- exclude_databases=['测试库4'],
1307
- exclude_tables={
1308
- '推广数据2': [
1309
- '地域报表_城市_2025_04',
1310
- # '地域报表_城市_2025_04_copy1',
1311
- ],
1312
- "生意参谋3": [
1313
- "商品排行_2025",
1314
- ],
1315
- },
1306
+ # exclude_databases=['测试库4'],
1307
+ # exclude_tables={
1308
+ # '推广数据2': [
1309
+ # '地域报表_城市_2025_04',
1310
+ # # '地域报表_城市_2025_04_copy1',
1311
+ # ],
1312
+ # "生意参谋3": [
1313
+ # "商品排行_2025",
1314
+ # ],
1315
+ # },
1316
1316
  )
1317
1317
 
1318
1318
  # 全库去重(单线程)
1319
1319
  deduplicator.deduplicate_all(dry_run=False, parallel=True, reorder_id=True)
1320
1320
 
1321
1321
  # # 指定数据库去重(多线程)
1322
- # deduplicator.deduplicate_database('my_db', dry_run=False, parallel=True, reorder_id=True)
1322
+ # deduplicator.deduplicate_database('数据引擎2', dry_run=False, parallel=True, reorder_id=True)
1323
1323
 
1324
1324
  # # 指定表去重(使用特定列)
1325
1325
  # deduplicator.deduplicate_table('my_db', 'my_table', columns=['name', 'data'], dry_run=False, reorder_id=True)
@@ -1331,5 +1331,5 @@ def main():
1331
1331
  deduplicator.close()
1332
1332
 
1333
1333
  if __name__ == '__main__':
1334
- # main()
1334
+ main()
1335
1335
  pass
mdbq/mysql/uploader.py CHANGED
@@ -23,8 +23,8 @@ logger = mylogger.MyLogger(
23
23
  max_log_size=50,
24
24
  backup_count=5,
25
25
  enable_async=False, # 是否启用异步日志
26
- sample_rate=1, # 采样50%的DEBUG/INFO日志
27
- sensitive_fields=[], # 敏感字段列表
26
+ sample_rate=1, # 采样DEBUG/INFO日志, 0.5表示50%的日志会被采样
27
+ sensitive_fields=[], # 过滤敏感字段列表
28
28
  )
29
29
 
30
30
 
@@ -83,7 +83,7 @@ class MySQLUploader:
83
83
  charset: str = 'utf8mb4',
84
84
  collation: str = 'utf8mb4_0900_ai_ci',
85
85
  max_retries: int = 10,
86
- retry_interval: int = 10,
86
+ retry_waiting_time: int = 10,
87
87
  pool_size: int = 5,
88
88
  connect_timeout: int = 10,
89
89
  read_timeout: int = 30,
@@ -100,7 +100,7 @@ class MySQLUploader:
100
100
  :param charset: 字符集,默认为utf8mb4
101
101
  :param collation: 排序规则,默认为utf8mb4_0900_ai_ci,对大小写不敏感,utf8mb4_0900_as_cs/utf8mb4_bin: 对大小写敏感
102
102
  :param max_retries: 最大重试次数,默认为10
103
- :param retry_interval: 重试间隔(秒),默认为10
103
+ :param retry_waiting_time: 重试间隔(秒),默认为10
104
104
  :param pool_size: 连接池大小,默认为5
105
105
  :param connect_timeout: 连接超时(秒),默认为10
106
106
  :param read_timeout: 读取超时(秒),默认为30
@@ -114,7 +114,7 @@ class MySQLUploader:
114
114
  self.charset = charset
115
115
  self.collation = collation
116
116
  self.max_retries = max(max_retries, 1)
117
- self.retry_interval = max(retry_interval, 1)
117
+ self.retry_waiting_time = max(retry_waiting_time, 1)
118
118
  self.pool_size = max(pool_size, 1)
119
119
  self.connect_timeout = connect_timeout
120
120
  self.read_timeout = read_timeout
@@ -169,7 +169,7 @@ class MySQLUploader:
169
169
  }
170
170
  try:
171
171
  pool = PooledDB(**pool_params)
172
- logger.info('连接池创建成功', {'连接池': self.pool_size, 'host': self.host, 'port': self.port})
172
+ logger.debug('连接池创建成功', {'连接池': self.pool_size, 'host': self.host, 'port': self.port})
173
173
  return pool
174
174
  except Exception as e:
175
175
  self.pool = None
@@ -188,14 +188,11 @@ class MySQLUploader:
188
188
  def wrapper(self, *args, **kwargs):
189
189
  last_exception = None
190
190
  operation = func.__name__
191
- logger.debug(f'开始执行操作: {operation}', {'max_retries': self.max_retries})
192
191
  for attempt in range(self.max_retries):
193
192
  try:
194
193
  result = func(self, *args, **kwargs)
195
194
  if attempt > 0:
196
195
  logger.info('操作成功(重试后)', {'operation': operation, 'attempts': attempt + 1})
197
- else:
198
- logger.debug('操作成功', {'operation': operation})
199
196
  return result
200
197
  except (pymysql.OperationalError, pymysql.err.MySQLError) as e:
201
198
  last_exception = e
@@ -207,7 +204,7 @@ class MySQLUploader:
207
204
  'max_retries': self.max_retries
208
205
  }
209
206
  if attempt < self.max_retries - 1:
210
- wait_time = self.retry_interval * (attempt + 1)
207
+ wait_time = self.retry_waiting_time * (attempt + 1)
211
208
  error_details['wait_time'] = wait_time
212
209
  logger.warning('数据库操作失败,准备重试', error_details)
213
210
  time.sleep(wait_time)
@@ -218,13 +215,6 @@ class MySQLUploader:
218
215
  logger.error('重连失败', {'error': str(reconnect_error)})
219
216
  else:
220
217
  logger.error('操作最终失败', error_details)
221
- except pymysql.IntegrityError as e:
222
- logger.error('完整性约束错误', {
223
- 'operation': operation,
224
- 'error_code': e.args[0] if e.args else None,
225
- 'error_message': e.args[1] if len(e.args) > 1 else None
226
- })
227
- raise e
228
218
  except Exception as e:
229
219
  last_exception = e
230
220
  logger.error('发生意外错误', {
@@ -247,10 +237,9 @@ class MySQLUploader:
247
237
  """
248
238
  try:
249
239
  conn = self.pool.connection()
250
- logger.debug('获取数据库连接', {'host': self.host, 'port': self.port})
251
240
  return conn
252
241
  except Exception as e:
253
- logger.error('获取数据库连接失败', {'error': str(e)})
242
+ logger.error('从连接池获取数据库连接失败', {'error': str(e)})
254
243
  raise ConnectionError(f'连接数据库失败: {str(e)}')
255
244
 
256
245
  @_execute_with_retry
@@ -392,7 +381,8 @@ class MySQLUploader:
392
381
  primary_keys: Optional[List[str]] = None,
393
382
  date_column: Optional[str] = None,
394
383
  indexes: Optional[List[str]] = None,
395
- allow_null: bool = False
384
+ allow_null: bool = False,
385
+ unique_keys: Optional[List[List[str]]] = None
396
386
  ) -> None:
397
387
  """
398
388
  创建数据表,优化索引创建方式
@@ -402,39 +392,48 @@ class MySQLUploader:
402
392
  if not set_typ:
403
393
  logger.error('建表时未指定set_typ', {'库': db_name, '表': table_name})
404
394
  raise ValueError('set_typ 未指定')
395
+ # set_typ的键清洗
396
+ set_typ = {self._normalize_col(k): v for k, v in set_typ.items()}
405
397
  column_defs = ["`id` INT NOT NULL AUTO_INCREMENT"]
406
398
  for col_name, col_type in set_typ.items():
407
- if col_name.lower() == 'id':
399
+ if col_name == 'id':
408
400
  continue
409
- safe_col_name = self._validate_identifier(col_name)
401
+ safe_col_name = self._normalize_col(col_name)
410
402
  col_def = f"`{safe_col_name}` {col_type}"
411
403
  if not allow_null and not col_type.lower().startswith('json'):
412
404
  col_def += " NOT NULL"
413
405
  column_defs.append(col_def)
414
- if primary_keys:
415
- if 'id' not in [pk.lower() for pk in primary_keys]:
416
- primary_keys = ['id'] + primary_keys
406
+ # 主键处理逻辑调整
407
+ if primary_keys and len(primary_keys) > 0:
408
+ safe_primary_keys = [self._normalize_col(pk) for pk in primary_keys]
409
+ primary_key_sql = f"PRIMARY KEY (`{'`,`'.join(safe_primary_keys)}`)"
417
410
  else:
418
- primary_keys = ['id']
419
- safe_primary_keys = [self._validate_identifier(pk) for pk in primary_keys]
420
- primary_key_sql = f", PRIMARY KEY (`{'`,`'.join(safe_primary_keys)}`)"
411
+ safe_primary_keys = [self._normalize_col('id')]
412
+ primary_key_sql = f"PRIMARY KEY (`id`)"
421
413
  # 索引统一在CREATE TABLE中定义
422
414
  index_defs = []
423
415
  if date_column and date_column in set_typ:
424
- safe_date_col = self._validate_identifier(date_column)
416
+ safe_date_col = self._normalize_col(date_column)
425
417
  index_defs.append(f"INDEX `idx_{safe_date_col}` (`{safe_date_col}`)")
426
418
  if indexes:
427
419
  for idx_col in indexes:
428
420
  if idx_col in set_typ:
429
- safe_idx_col = self._validate_identifier(idx_col)
421
+ safe_idx_col = self._normalize_col(idx_col)
430
422
  index_defs.append(f"INDEX `idx_{safe_idx_col}` (`{safe_idx_col}`)")
423
+ # UNIQUE KEY定义
424
+ unique_defs = []
425
+ if unique_keys:
426
+ for idx, unique_cols in enumerate(unique_keys):
427
+ if not unique_cols:
428
+ continue
429
+ safe_unique_cols = [self._normalize_col(col) for col in unique_cols]
430
+ unique_name = f"uniq_{'_'.join(safe_unique_cols)}_{idx}"
431
+ unique_defs.append(f"UNIQUE KEY `{unique_name}` (`{'`,`'.join(safe_unique_cols)}`)")
431
432
  index_defs = list(set(index_defs))
432
- index_sql = (',' + ','.join(index_defs)) if index_defs else ''
433
+ all_defs = column_defs + [primary_key_sql] + index_defs + unique_defs
433
434
  sql = f"""
434
435
  CREATE TABLE IF NOT EXISTS `{db_name}`.`{table_name}` (
435
- {','.join(column_defs)}
436
- {primary_key_sql}
437
- {index_sql}
436
+ {','.join(all_defs)}
438
437
  ) ENGINE=InnoDB DEFAULT CHARSET={self.charset} COLLATE={self.collation}
439
438
  """
440
439
  conn = None
@@ -443,7 +442,7 @@ class MySQLUploader:
443
442
  with conn.cursor() as cursor:
444
443
  cursor.execute(sql)
445
444
  conn.commit()
446
- logger.info('数据表及索引已创建', {'库': db_name, '表': table_name, '索引': indexes})
445
+ logger.info('数据表及索引已创建', {'库': db_name, '表': table_name, '索引': indexes, '唯一约束': unique_keys})
447
446
  except Exception as e:
448
447
  logger.error('建表失败', {'库': db_name, '表': table_name, '错误': str(e)})
449
448
  if conn is not None:
@@ -476,11 +475,9 @@ class MySQLUploader:
476
475
  try:
477
476
  if date_type:
478
477
  result = pd.to_datetime(datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d'))
479
- logger.debug('日期格式化成功', {'原始': value, '格式': fmt, '结果': str(result)})
480
478
  return result
481
479
  else:
482
480
  result = datetime.datetime.strptime(value, fmt).strftime('%Y-%m-%d %H:%M:%S')
483
- logger.debug('日期格式化成功', {'原始': value, '格式': fmt, '结果': str(result)})
484
481
  return result
485
482
  except ValueError:
486
483
  continue
@@ -613,7 +610,7 @@ class MySQLUploader:
613
610
  cursor.execute(sql_check, (db_name, table_name, column))
614
611
  exists = cursor.fetchone()
615
612
  if exists and list(exists.values())[0] > 0:
616
- logger.debug('索引已存在', {'库': db_name, '表': table_name, '': column})
613
+ logger.debug('索引检查', {'库': db_name, '表': table_name, '索引列': column})
617
614
  return
618
615
  cursor.execute(sql_create)
619
616
  conn.commit()
@@ -622,6 +619,49 @@ class MySQLUploader:
622
619
  logger.error('创建索引失败', {'库': db_name, '表': table_name, '列': column, '错误': str(e)})
623
620
  raise
624
621
 
622
+ def _get_existing_unique_keys(self, db_name: str, table_name: str) -> List[List[str]]:
623
+ """
624
+ 获取表中所有UNIQUE KEY的列组合(不含主键)。
625
+ 返回:[[col1, col2], ...]
626
+ """
627
+ db_name = self._validate_identifier(db_name)
628
+ table_name = self._validate_identifier(table_name)
629
+ sql = '''
630
+ SELECT INDEX_NAME, COLUMN_NAME
631
+ FROM INFORMATION_SCHEMA.STATISTICS
632
+ WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND NON_UNIQUE = 0 AND INDEX_NAME != 'PRIMARY'
633
+ ORDER BY INDEX_NAME, SEQ_IN_INDEX
634
+ '''
635
+ unique_map = {}
636
+ try:
637
+ with self._get_connection() as conn:
638
+ with conn.cursor() as cursor:
639
+ cursor.execute(sql, (db_name, table_name))
640
+ for row in cursor.fetchall():
641
+ idx = row['INDEX_NAME']
642
+ col = row['COLUMN_NAME']
643
+ unique_map.setdefault(idx, []).append(col)
644
+ except Exception as e:
645
+ logger.warning('获取UNIQUE KEY信息失败', {'库': db_name, '表': table_name, '错误': str(e)})
646
+ # 只返回列名组合,全部清洗小写
647
+ return [[self._normalize_col(c) for c in cols] for cols in unique_map.values() if cols]
648
+
649
+ def _add_unique_key(self, db_name: str, table_name: str, unique_cols: List[str]):
650
+ """
651
+ 添加UNIQUE KEY
652
+ """
653
+ safe_cols = [self._normalize_col(col) for col in unique_cols]
654
+ unique_name = f"uniq_{'_'.join(safe_cols)}_{int(time.time()*1000)%100000}"
655
+ sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD UNIQUE KEY `{unique_name}` ({','.join(f'`{col}`' for col in safe_cols)})'
656
+ try:
657
+ with self._get_connection() as conn:
658
+ with conn.cursor() as cursor:
659
+ cursor.execute(sql)
660
+ conn.commit()
661
+ logger.info('添加唯一约束列成功', {'库': db_name, '表': table_name, '列': unique_cols})
662
+ except Exception as e:
663
+ logger.warning('唯一约束列添加失败', {'库': db_name, '表': table_name, '列': unique_cols, '错误': str(e)})
664
+
625
665
  def _upload_to_table(
626
666
  self,
627
667
  db_name: str,
@@ -637,14 +677,15 @@ class MySQLUploader:
637
677
  indexes: Optional[List[str]],
638
678
  batch_id: Optional[str] = None,
639
679
  update_on_duplicate: bool = False,
640
- transaction_mode: str = "batch"
680
+ transaction_mode: str = "batch",
681
+ unique_keys: Optional[List[List[str]]] = None
641
682
  ):
642
683
  """实际执行表上传的方法"""
643
- # 检查表是否存在
644
- if not self._check_table_exists(db_name, table_name):
684
+ table_existed = self._check_table_exists(db_name, table_name)
685
+ if not table_existed:
645
686
  if auto_create:
646
687
  self._create_table(db_name, table_name, set_typ, primary_keys, date_column, indexes,
647
- allow_null=allow_null)
688
+ allow_null=allow_null, unique_keys=unique_keys)
648
689
  else:
649
690
  logger.error('数据表不存在', {
650
691
  '库': db_name,
@@ -652,8 +693,30 @@ class MySQLUploader:
652
693
  'func': sys._getframe().f_code.co_name,
653
694
  })
654
695
  raise ValueError(f"数据表不存在: `{db_name}`.`{table_name}`")
655
-
656
- # 获取表结构并验证
696
+ if table_existed and unique_keys:
697
+ try:
698
+ exist_ukeys = self._get_existing_unique_keys(db_name, table_name)
699
+ exist_ukeys_norm = [sorted([c.lower() for c in uk]) for uk in exist_ukeys]
700
+ filtered_ukeys = [uk for uk in unique_keys if 1 <= len(uk) <= 20]
701
+ to_add = []
702
+ for uk in filtered_ukeys:
703
+ norm_uk = sorted([c.lower() for c in uk])
704
+ if norm_uk not in exist_ukeys_norm:
705
+ to_add.append(uk)
706
+ max_unique_keys = 10
707
+ if len(exist_ukeys) + len(to_add) > max_unique_keys:
708
+ logger.warning('unique_keys超限', {
709
+ '库': db_name,
710
+ '表': table_name,
711
+ '已存在': exist_ukeys,
712
+ '本次待添加': to_add,
713
+ '最大数量': max_unique_keys
714
+ })
715
+ to_add = to_add[:max_unique_keys - len(exist_ukeys)]
716
+ for uk in to_add:
717
+ self._add_unique_key(db_name, table_name, uk)
718
+ except Exception as e:
719
+ logger.warning('动态unique key处理异常', {'库': db_name, '表': table_name, '错误': str(e)})
657
720
  table_columns = self._get_table_columns(db_name, table_name)
658
721
  if not table_columns:
659
722
  logger.error('获取列失败', {
@@ -663,8 +726,6 @@ class MySQLUploader:
663
726
  'func': sys._getframe().f_code.co_name,
664
727
  })
665
728
  raise ValueError(f"获取列失败 `{db_name}`.`{table_name}`")
666
-
667
- # 验证数据列与表列匹配
668
729
  for col in set_typ:
669
730
  if col not in table_columns:
670
731
  logger.error('列不存在', {
@@ -674,22 +735,19 @@ class MySQLUploader:
674
735
  'func': sys._getframe().f_code.co_name,
675
736
  })
676
737
  raise ValueError(f"列不存在: `{col}` -> `{db_name}`.`{table_name}`")
677
-
678
- # 确保分表参考字段为索引
679
738
  if date_column and date_column in table_columns:
680
739
  try:
681
740
  self._ensure_index(db_name, table_name, date_column)
682
741
  except Exception as e:
683
742
  logger.warning('分表参考字段索引创建失败', {'库': db_name, '表': table_name, '列': date_column, '错误': str(e)})
684
-
685
- # 插入数据
686
- self._insert_data(
743
+ inserted, skipped, failed = self._insert_data(
687
744
  db_name, table_name, data, set_typ,
688
745
  check_duplicate, duplicate_columns,
689
746
  batch_id=batch_id,
690
747
  update_on_duplicate=update_on_duplicate,
691
748
  transaction_mode=transaction_mode
692
749
  )
750
+ return inserted, skipped, failed
693
751
 
694
752
  def _infer_data_type(self, value: Any, no_log: bool = False) -> str:
695
753
  """
@@ -817,11 +875,8 @@ class MySQLUploader:
817
875
  # 统一处理原始数据中列名的特殊字符
818
876
  data = self.normalize_column_names(data)
819
877
 
820
- # set_typ的键处理
821
- if self.case_sensitive:
822
- set_typ = {k: v for k, v in set_typ.items()}
823
- else:
824
- set_typ = {k.lower(): v for k, v in set_typ.items()}
878
+ # set_typ的键清洗
879
+ set_typ = {self._normalize_col(k): v for k, v in set_typ.items()}
825
880
 
826
881
  # 获取数据中实际存在的列名
827
882
  data_columns = set()
@@ -890,7 +945,8 @@ class MySQLUploader:
890
945
  auto_create: bool = True,
891
946
  indexes: Optional[List[str]] = None,
892
947
  update_on_duplicate: bool = False,
893
- transaction_mode: str = "batch"
948
+ transaction_mode: str = "batch",
949
+ unique_keys: Optional[List[List[str]]] = None
894
950
  ):
895
951
  """
896
952
  上传数据到数据库的主入口方法,分表逻辑异常处理统计丢弃数据
@@ -912,6 +968,7 @@ class MySQLUploader:
912
968
  - 'row' : 逐行提交事务(错误隔离性好)
913
969
  - 'batch' : 整批提交事务(性能最优)
914
970
  - 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
971
+ :param unique_keys: 唯一约束列表,每个元素为列名列表,支持多列组合唯一约束
915
972
  :raises: 可能抛出各种验证和数据库相关异常
916
973
  """
917
974
  # upload_start = time.time()
@@ -936,7 +993,8 @@ class MySQLUploader:
936
993
  # '自动建表': auto_create,
937
994
  '索引': indexes,
938
995
  '更新旧数据': update_on_duplicate,
939
- '事务模式': transaction_mode
996
+ '事务模式': transaction_mode,
997
+ '唯一约束': unique_keys
940
998
  },
941
999
  # '数据样例': self._shorten_for_log(data, 2)
942
1000
  })
@@ -1005,15 +1063,21 @@ class MySQLUploader:
1005
1063
  continue
1006
1064
 
1007
1065
  # 对每个分表执行上传
1066
+ total_inserted = 0
1067
+ total_skipped = dropped_rows # 分表异常丢弃
1068
+ total_failed = 0
1008
1069
  for part_table, part_data in partitioned_data.items():
1009
1070
  try:
1010
- self._upload_to_table(
1071
+ inserted, skipped, failed = self._upload_to_table(
1011
1072
  db_name, part_table, part_data, filtered_set_typ,
1012
1073
  primary_keys, check_duplicate, duplicate_columns,
1013
1074
  allow_null, auto_create, partition_date_column,
1014
- indexes, batch_id, update_on_duplicate, transaction_mode
1075
+ indexes, batch_id, update_on_duplicate, transaction_mode,
1076
+ unique_keys
1015
1077
  )
1016
- # 确保分表参考字段为索引
1078
+ total_inserted += inserted
1079
+ total_skipped += skipped
1080
+ total_failed += failed
1017
1081
  if partition_date_column in filtered_set_typ:
1018
1082
  try:
1019
1083
  self._ensure_index(db_name, part_table, partition_date_column)
@@ -1031,13 +1095,16 @@ class MySQLUploader:
1031
1095
  continue # 跳过当前分表,继续处理其他分表
1032
1096
  else:
1033
1097
  # 不分表,直接上传
1034
- self._upload_to_table(
1098
+ inserted, skipped, failed = self._upload_to_table(
1035
1099
  db_name, table_name, prepared_data, filtered_set_typ,
1036
1100
  primary_keys, check_duplicate, duplicate_columns,
1037
1101
  allow_null, auto_create, partition_date_column,
1038
- indexes, batch_id, update_on_duplicate, transaction_mode
1102
+ indexes, batch_id, update_on_duplicate, transaction_mode,
1103
+ unique_keys
1039
1104
  )
1040
- # 确保分表参考字段为索引
1105
+ total_inserted = inserted
1106
+ total_skipped = skipped
1107
+ total_failed = failed
1041
1108
  if partition_date_column in filtered_set_typ:
1042
1109
  try:
1043
1110
  self._ensure_index(db_name, table_name, partition_date_column)
@@ -1062,7 +1129,9 @@ class MySQLUploader:
1062
1129
  '批次': batch_id,
1063
1130
  'finish': success_flag,
1064
1131
  '数据行': initial_row_count,
1065
- '丢弃行数': dropped_rows
1132
+ '插入': total_inserted,
1133
+ '跳过': total_skipped,
1134
+ '失败': total_failed
1066
1135
  })
1067
1136
 
1068
1137
  @_execute_with_retry
@@ -1095,26 +1164,19 @@ class MySQLUploader:
1095
1164
  - 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
1096
1165
  """
1097
1166
  if not data:
1098
- return
1099
-
1100
- # 验证事务模式
1167
+ return 0, 0, 0
1101
1168
  transaction_mode = self._validate_transaction_mode(transaction_mode)
1102
-
1103
- # 准备SQL语句
1104
1169
  sql = self._prepare_insert_sql(
1105
1170
  db_name, table_name, set_typ,
1106
1171
  check_duplicate, duplicate_columns,
1107
1172
  update_on_duplicate
1108
1173
  )
1109
-
1110
- # 执行批量插入
1111
1174
  total_inserted, total_skipped, total_failed = self._execute_batch_insert(
1112
1175
  db_name, table_name, data, set_typ,
1113
1176
  sql, check_duplicate, duplicate_columns,
1114
1177
  batch_id, transaction_mode,
1115
1178
  update_on_duplicate
1116
1179
  )
1117
-
1118
1180
  logger.info('插入完成', {
1119
1181
  '库': db_name,
1120
1182
  '表': table_name,
@@ -1124,6 +1186,7 @@ class MySQLUploader:
1124
1186
  '失败': total_failed,
1125
1187
  '事务模式': transaction_mode,
1126
1188
  })
1189
+ return total_inserted, total_skipped, total_failed
1127
1190
 
1128
1191
  def _validate_transaction_mode(self, mode: str) -> str:
1129
1192
  """验证并标准化事务模式"""
@@ -1266,6 +1329,7 @@ class MySQLUploader:
1266
1329
  update_on_duplicate: bool = False
1267
1330
  ) -> Tuple[int, int, int]:
1268
1331
  """执行批量插入操作,优化batch和hybrid模式"""
1332
+ import pymysql # 确保异常类型可用
1269
1333
  def get_optimal_batch_size(total_rows: int) -> int:
1270
1334
  if total_rows <= 100:
1271
1335
  return total_rows
@@ -1295,7 +1359,13 @@ class MySQLUploader:
1295
1359
  try:
1296
1360
  cursor.executemany(sql, values_list)
1297
1361
  conn.commit()
1298
- total_inserted += len(batch)
1362
+ inserted = cursor.rowcount if cursor.rowcount is not None else 0
1363
+ total_inserted += inserted
1364
+ total_skipped += len(batch) - inserted
1365
+ except pymysql.err.IntegrityError as e:
1366
+ conn.rollback()
1367
+ total_skipped += len(batch)
1368
+ logger.debug('批量插入唯一约束冲突,全部跳过', {'库': db_name, '表': table_name, '错误': str(e)})
1299
1369
  except Exception as e:
1300
1370
  conn.rollback()
1301
1371
  total_failed += len(batch)
@@ -1311,7 +1381,15 @@ class MySQLUploader:
1311
1381
  dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1312
1382
  values += [row.get(col) for col in dup_cols]
1313
1383
  cursor.execute(sql, values)
1314
- total_inserted += 1
1384
+ affected = cursor.rowcount if cursor.rowcount is not None else 0
1385
+ if affected > 0:
1386
+ total_inserted += 1
1387
+ else:
1388
+ total_skipped += 1
1389
+ except pymysql.err.IntegrityError as e:
1390
+ conn.rollback()
1391
+ total_skipped += 1
1392
+ logger.debug('hybrid单行插入唯一约束冲突,跳过', {'库': db_name, '表': table_name, '错误': str(e)})
1315
1393
  except Exception as e:
1316
1394
  conn.rollback()
1317
1395
  total_failed += 1
@@ -1325,8 +1403,16 @@ class MySQLUploader:
1325
1403
  dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1326
1404
  values += [row.get(col) for col in dup_cols]
1327
1405
  cursor.execute(sql, values)
1406
+ affected = cursor.rowcount if cursor.rowcount is not None else 0
1407
+ if affected > 0:
1408
+ total_inserted += 1
1409
+ else:
1410
+ total_skipped += 1
1328
1411
  conn.commit()
1329
- total_inserted += 1
1412
+ except pymysql.err.IntegrityError as e:
1413
+ conn.rollback()
1414
+ total_skipped += 1
1415
+ logger.debug('单行插入唯一约束冲突,跳过', {'库': db_name, '表': table_name, '错误': str(e)})
1330
1416
  except Exception as e:
1331
1417
  conn.rollback()
1332
1418
  total_failed += 1
@@ -1347,9 +1433,9 @@ class MySQLUploader:
1347
1433
  self.pool = None
1348
1434
  except Exception as e:
1349
1435
  logger.warning('关闭连接池时出错', {'error': str(e)})
1350
- logger.info('连接池关闭', {'uploader.py': '连接池关闭'})
1436
+ logger.debug('finished', {'uploader.py': '连接池关闭'})
1351
1437
  except Exception as e:
1352
- logger.error('关闭连接池失败', {'error': str(e)})
1438
+ logger.error('关闭连接池失败', {'uploader.py': str(e)})
1353
1439
  raise
1354
1440
 
1355
1441
  def _check_pool_health(self) -> bool:
@@ -1431,6 +1517,13 @@ class MySQLUploader:
1431
1517
  def __exit__(self, exc_type, exc_val, exc_tb):
1432
1518
  self.close()
1433
1519
 
1520
+ def _normalize_col(self, col: str) -> str:
1521
+ """
1522
+ 列名自动清洗并转小写(如case_sensitive为False),保证和表结构一致。
1523
+ """
1524
+ safe = self._validate_identifier(col)
1525
+ return safe if self.case_sensitive else safe.lower()
1526
+
1434
1527
 
1435
1528
  def main():
1436
1529
  """
@@ -1443,7 +1536,7 @@ def main():
1443
1536
  """
1444
1537
  uploader = MySQLUploader(
1445
1538
  username='root',
1446
- password='pw',
1539
+ password='pwd',
1447
1540
  host='localhost',
1448
1541
  port=3306,
1449
1542
  )
@@ -1462,7 +1555,7 @@ def main():
1462
1555
  {'日期': '2023-01-8', 'name': 'JACk', 'AGE': '24', 'salary': 555.1545},
1463
1556
  {'日期': '2023-01-15', 'name': 'Alice', 'AGE': 35, 'salary': '100'},
1464
1557
  {'日期': '2023-01-15', 'name': 'Alice', 'AGE': 30, 'salary': 0.0},
1465
- {'日期': '2023-02-20', 'name': 'Bob', 'AGE': 25, 'salary': 45000.75}
1558
+ {'日期': '2023-02-20', 'name': 'Bob', 'AGE': 25, 'salary': 45000.75},
1466
1559
  ]
1467
1560
 
1468
1561
  # 上传数据
@@ -1474,12 +1567,13 @@ def main():
1474
1567
  primary_keys=[], # 创建唯一主键
1475
1568
  check_duplicate=False, # 检查重复数据
1476
1569
  duplicate_columns=[], # 指定排重的组合键
1570
+ update_on_duplicate=False, # 更新旧数据
1477
1571
  allow_null=False, # 允许插入空值
1478
- partition_by='year', # 按月分表
1572
+ partition_by='year', # 分表方式
1479
1573
  partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
1480
- auto_create=True, # 表不存在时自动创建, 默认参数不要更改
1481
- indexes=[], # 指定索引列
1574
+ indexes=[], # 普通索引列
1482
1575
  transaction_mode='row', # 事务模式
1576
+ unique_keys=[['日期', 'name', 'age']], # 唯一约束列表
1483
1577
  )
1484
1578
 
1485
1579
  uploader.close()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.12.0
3
+ Version: 3.12.2
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=W8WVhYkHLU0SBDlL9Q6XQVTqIrzYjc1kFBZgqzS_NEI,18
2
+ mdbq/__version__.py,sha256=7f_XGixBIsGAP5w1W3y_kTPfLVlurDoRsv3FOOJLDIA,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/query_data.py,sha256=nxL8hSy8yI1QLlqnkTNHHQSxRfo-6WKL5OA-N4xLB7c,179832
5
5
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -8,10 +8,10 @@ mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
8
8
  mdbq/log/mylogger.py,sha256=Crw6LwVo3I3IUbzIETu8f46Quza3CTCh-qYf4edbBPo,24139
9
9
  mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
10
10
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
11
- mdbq/mysql/deduplicator.py,sha256=KMJ_YyqAniaLVRqOHLgO92PgwknIDB-EgaOY7S6iMZ4,68599
11
+ mdbq/mysql/deduplicator.py,sha256=eILGFxFtMCSR6dvCgEgsOjwlK_hEWBe2dFSgJTPHfj8,68623
12
12
  mdbq/mysql/mysql.py,sha256=Kjpi-LL00WQUmTTOfhEBsNrmo4-4kFFJzrHbVKfqiBE,56770
13
13
  mdbq/mysql/s_query.py,sha256=dlnrVJ3-Vp1Suv9CNbPxyYSRqRJUHjOpF39tb2F-wBc,10190
14
- mdbq/mysql/uploader.py,sha256=8Px_W2bYOr1wQgMXMK0DggNiuE6a6Ul4BlJake8LSo8,64469
14
+ mdbq/mysql/uploader.py,sha256=szX6t4SObBF6fbHT2s5ixfh1-c288cigsJ66pFE02Qg,70266
15
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
17
17
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -24,7 +24,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
24
24
  mdbq/redis/getredis.py,sha256=YHgCKO8mEsslwet33K5tGss-nrDDwPnOSlhA9iBu0jY,24078
25
25
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
26
26
  mdbq/spider/aikucun.py,sha256=cqK-JRd_DHbToC7hyo83m8o97NZkJFqmB2xBtr6aAVU,20961
27
- mdbq-3.12.0.dist-info/METADATA,sha256=Q6EyaC61H4okFva6YFV2a0Y3Iqun8L8mnpSkeVXcFdc,364
28
- mdbq-3.12.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
- mdbq-3.12.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
- mdbq-3.12.0.dist-info/RECORD,,
27
+ mdbq-3.12.2.dist-info/METADATA,sha256=1V071qDvuX5apscDGFLY_ICX6mMVBUhjzMGh_a_wd7w,364
28
+ mdbq-3.12.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
+ mdbq-3.12.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
+ mdbq-3.12.2.dist-info/RECORD,,
File without changes