mdbq 3.11.8__py3-none-any.whl → 3.11.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.11.8'
1
+ VERSION = '3.11.10'
mdbq/log/mylogger.py CHANGED
@@ -247,7 +247,7 @@ class MyLogger:
247
247
  if isinstance(log_data.get('message'), str):
248
248
  log_data['message'] = log_data['message'].replace(field, '***')
249
249
 
250
- return json.dumps(log_data, ensure_ascii=False)
250
+ return json.dumps(log_data, ensure_ascii=False, default=str)
251
251
 
252
252
  formatter = StructuredFormatter()
253
253
 
@@ -37,7 +37,7 @@ class MySQLDeduplicator:
37
37
 
38
38
  功能:
39
39
  1. 自动检测并删除MySQL数据库中的重复数据
40
- 2. 支持全库扫描或指定表处理
40
+ 2. 支持全库扫描或指定表理
41
41
  3. 支持多线程/多进程安全处理
42
42
  4. 完善的错误处理和日志记录
43
43
 
@@ -114,7 +114,7 @@ class MySQLDeduplicator:
114
114
  )
115
115
 
116
116
  # 配置参数
117
- self.max_workers = max(1, min(max_workers, 20)) # 限制最大线程数
117
+ self.max_workers = min(max(1, max_workers), pool_size) # 限制最大线程数,不能超过连接池
118
118
  self.batch_size = batch_size
119
119
  self.skip_system_dbs = skip_system_dbs
120
120
  self.max_retries = max_retries
@@ -215,7 +215,7 @@ class MySQLDeduplicator:
215
215
  last_exception = None
216
216
  for attempt in range(self.max_retries + 1):
217
217
  try:
218
- logger.debug(f'调用{func.__name__},第{attempt+1}次尝试', {'args': args, 'kwargs': kwargs})
218
+ logger.debug(f'调用{func.__name__},第{attempt+1}次连接', {'args': args, 'kwargs': kwargs})
219
219
  return func(self, *args, **kwargs)
220
220
  except (pymysql.OperationalError, pymysql.InterfaceError) as e:
221
221
  last_exception = e
@@ -269,7 +269,8 @@ class MySQLDeduplicator:
269
269
  with conn.cursor() as cursor:
270
270
  cursor.execute(f"USE `{database}`")
271
271
  cursor.execute(sql)
272
- return [row[f'Tables_in_{database}'] for row in cursor.fetchall()]
272
+ # 严格过滤所有以'temp_'为前缀的表名(如temp_xxx、temp_xxx_dedup_...、temp_xxx_reorderid_...等)
273
+ return [row[f'Tables_in_{database}'] for row in cursor.fetchall() if not re.match(r'^temp_.*', row[f'Tables_in_{database}'])]
273
274
 
274
275
  @_retry_on_failure
275
276
  def _get_table_columns(self, database: str, table: str) -> List[str]:
@@ -328,129 +329,267 @@ class MySQLDeduplicator:
328
329
  if key in self._processing_tables:
329
330
  self._processing_tables.remove(key)
330
331
 
332
+ @_retry_on_failure
333
+ def _ensure_index(self, database: str, table: str, date_column: str) -> None:
334
+ """
335
+ 检查并为date_column自动创建索引(如果未存在)。
336
+ Args:
337
+ database (str): 数据库名。
338
+ table (str): 表名。
339
+ date_column (str): 需要检查的日期列名。
340
+ """
341
+ with self._get_connection() as conn:
342
+ with conn.cursor() as cursor:
343
+ # 检查索引是否已存在
344
+ cursor.execute(
345
+ """
346
+ SELECT COUNT(1) as idx_count FROM INFORMATION_SCHEMA.STATISTICS
347
+ WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s
348
+ """,
349
+ (database, table, date_column)
350
+ )
351
+ idx_count = cursor.fetchone()['idx_count']
352
+ if idx_count == 0:
353
+ # 自动创建索引
354
+ index_name = f"idx_{date_column}"
355
+ safe_index_name = self._make_safe_table_name(index_name, prefix='', suffix='', max_length=64)
356
+ try:
357
+ cursor.execute(f"CREATE INDEX `{safe_index_name}` ON `{database}`.`{table}` (`{date_column}`)")
358
+ conn.commit()
359
+ logger.info('已自动为date_column创建索引', {"库": database, "表": table, "date_column": date_column, "索引名": safe_index_name})
360
+ except Exception as e:
361
+ logger.error('自动创建date_column索引失败', {"库": database, "表": table, "date_column": date_column, "异常": str(e)})
362
+ else:
363
+ logger.debug('date_column已存在索引', {"库": database, "表": table, "date_column": date_column})
364
+
365
+ def _row_generator(self, database, table, select_cols, select_where, batch_size=10000):
366
+ """
367
+ 生成器:分批拉取表数据,避免一次性加载全部数据到内存。
368
+ Args:
369
+ database (str): 数据库名。
370
+ table (str): 表名。
371
+ select_cols (str): 选择的列字符串。
372
+ select_where (str): where条件字符串。
373
+ batch_size (int): 每批拉取的行数。
374
+ Yields:
375
+ dict: 每行数据。
376
+ """
377
+ offset = 0
378
+ while True:
379
+ sql = f"SELECT {select_cols} FROM `{database}`.`{table}` {select_where} LIMIT {batch_size} OFFSET {offset}"
380
+ with self._get_connection() as conn:
381
+ with conn.cursor() as cursor:
382
+ cursor.execute(sql)
383
+ rows = cursor.fetchall()
384
+ if not rows:
385
+ break
386
+ for row in rows:
387
+ yield row
388
+ if len(rows) < batch_size:
389
+ break
390
+ offset += batch_size
391
+
392
+ def _get_all_dates(self, database: str, table: str, date_column: str) -> list:
393
+ """
394
+ 获取表中所有不同的日期分区(按天)。
395
+ Args:
396
+ database (str): 数据库名。
397
+ table (str): 表名。
398
+ date_column (str): 日期列名。
399
+ Returns:
400
+ List: 所有不同的日期(字符串)。
401
+ """
402
+ sql = f"SELECT DISTINCT `{date_column}` FROM `{database}`.`{table}` ORDER BY `{date_column}` ASC"
403
+ with self._get_connection() as conn:
404
+ with conn.cursor() as cursor:
405
+ cursor.execute(sql)
406
+ return [row[date_column] for row in cursor.fetchall() if row[date_column] is not None]
407
+
331
408
  def _deduplicate_table(
332
409
  self,
333
410
  database: str,
334
411
  table: str,
335
412
  columns: Optional[List[str]] = None,
336
- dry_run: bool = False
413
+ dry_run: bool = False,
414
+ use_python_dedup: bool = False,
415
+ dedup_start_date: Optional[str] = None,
416
+ dedup_end_date: Optional[str] = None,
417
+ lock_table: bool = True
337
418
  ) -> Tuple[int, int]:
338
419
  """
339
420
  执行单表去重。
340
-
341
- Args:
342
- database (str): 数据库名。
343
- table (str): 表名。
344
- columns (Optional[List[str]]): 用于去重的列名列表(为None时使用所有列)。
345
- dry_run (bool): 是否为模拟运行(只统计不实际删除)。
346
- Returns:
347
- Tuple[int, int]: (重复组数, 实际删除行数)。
421
+ 支持按天分批处理(如果表包含date_column),否则全表去重。
422
+ 如果date_column在exclude_columns中,直接跳过该表。
423
+ 优化:分批删除时用主键、避免重复建/删临时表、并发处理每天。
348
424
  """
349
- if not self._acquire_table_lock(database, table):
425
+ if lock_table and not self._acquire_table_lock(database, table):
350
426
  return (0, 0)
351
427
  temp_table = None
352
428
  try:
353
- # 获取原始数据总量
354
- with self._get_connection() as conn:
355
- with conn.cursor() as cursor:
356
- logger.debug('执行SQL', {'sql': f'SELECT COUNT(*) as cnt FROM `{database}`.`{table}`'})
357
- cursor.execute(f"SELECT COUNT(*) as cnt FROM `{database}`.`{table}`")
358
- total_count_row = cursor.fetchone()
359
- total_count = total_count_row['cnt'] if total_count_row and 'cnt' in total_count_row else 0
360
- logger.info('执行', {"库": database, "表": table, "开始处理数据量": total_count, 'func': sys._getframe().f_code.co_name})
361
429
  # 获取实际列名
362
430
  all_columns = self._get_table_columns(database, table)
363
- logger.debug('获取表列', {'库': database, '表': table, 'all_columns': all_columns})
364
- # 检查是否需要按时间范围过滤
365
- use_time_filter = False
366
- time_col = self.date_column
367
431
  all_columns_lower = [col.lower() for col in all_columns]
368
- # 排除exclude_columns
369
432
  exclude_columns_lower = [col.lower() for col in getattr(self, 'exclude_columns', [])]
370
- # 统一列名小写做判断
433
+ time_col = self.date_column
434
+ time_col_lower = time_col.lower() if time_col else None
435
+ # 1. 跳过date_column在exclude_columns的情况
436
+ if time_col_lower and time_col_lower in exclude_columns_lower:
437
+ logger.warning('date_column在exclude_columns中,跳过该表', {"库": database, "表": table, "date_column": time_col, "exclude_columns": self.exclude_columns})
438
+ return (0, 0)
439
+ # 2. 判断表是否包含date_column
440
+ has_time_col = time_col_lower in all_columns_lower if time_col_lower else False
441
+ # 如果包含date_column,自动检查并创建索引
442
+ if has_time_col and dedup_start_date is None and dedup_end_date is None:
443
+ self._ensure_index(database, table, time_col)
444
+ # 按天分区多线程处理
445
+ all_dates = self._get_all_dates(database, table, time_col)
446
+ total_dup = 0
447
+ total_del = 0
448
+ def process_date(date_val):
449
+ try:
450
+ logger.debug('按天分区去重', {"库": database, "表": table, "日期": date_val})
451
+ dup_count, affected_rows = self._deduplicate_table(
452
+ database, table, columns, dry_run, use_python_dedup,
453
+ dedup_start_date=date_val, dedup_end_date=date_val,
454
+ lock_table=False
455
+ )
456
+ return (dup_count, affected_rows, date_val, None)
457
+ except Exception as e:
458
+ logger.error('分区去重异常', {"库": database, "表": table, "日期": date_val, "异常": str(e), "func": sys._getframe().f_code.co_name})
459
+ return (0, 0, date_val, str(e))
460
+ with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
461
+ future_to_date = {executor.submit(process_date, date_val): date_val for date_val in all_dates}
462
+ for future in concurrent.futures.as_completed(future_to_date):
463
+ dup_count, affected_rows, date_val, err = future.result()
464
+ if err:
465
+ logger.warning('分区处理失败', {"库": database, "表": table, "日期": date_val, "异常": err, "func": sys._getframe().f_code.co_name})
466
+ total_dup += dup_count
467
+ total_del += affected_rows
468
+ return (total_dup, total_del)
469
+ # 获取去重列
371
470
  use_columns = columns or all_columns
372
471
  use_columns = [col for col in use_columns if col.lower() in all_columns_lower and col.lower() not in exclude_columns_lower]
373
472
  invalid_columns = set([col for col in (columns or []) if col.lower() not in all_columns_lower])
374
473
  if invalid_columns:
375
474
  logger.warning('不存在的列', {"库": database, "表": table, "不存在以下列": invalid_columns, 'func': sys._getframe().f_code.co_name})
376
475
  if not use_columns:
377
- logger.error('没有有效的去重列', {"库": database, "表": table})
476
+ logger.error('没有有效的去重列', {"库": database, "表": table, "func": sys._getframe().f_code.co_name})
378
477
  return (0, 0)
379
- # 统一用反引号包裹
380
- column_list = ', '.join([f'`{col}`' for col in use_columns])
381
- temp_table = self._make_safe_table_name(table, prefix=f"temp_", suffix=f"_dedup_{os.getpid()}_{threading.get_ident()}")
382
478
  pk = self.primary_key
383
- # 主键判断也用小写
384
- if pk.lower() not in all_columns_lower and pk != 'id':
385
- logger.error('', {"不存在主键列": database, "表": table, "主键列不存在": pk})
386
- return (0, 0)
387
- # 找到实际主键名
388
479
  pk_real = next((c for c in all_columns if c.lower() == pk.lower()), pk)
389
- # 构造where条件
390
- where_time = ''
391
- if use_time_filter:
392
- where_time = f"WHERE `{time_col}` >= '{self._dedup_start_date}' AND `{time_col}` <= '{self._dedup_end_date}'"
480
+ # 判断是否需要加日期区间条件
481
+ where_sql = ''
482
+ if has_time_col and dedup_start_date and dedup_end_date:
483
+ where_sql = f"t.`{time_col}` >= '{dedup_start_date}' AND t.`{time_col}` <= '{dedup_end_date}'"
484
+ # 获取原始数据总量(只统计区间内数据)
485
+ with self._get_connection() as conn:
486
+ with conn.cursor() as cursor:
487
+ count_where = f"WHERE `{time_col}` >= '{dedup_start_date}' AND `{time_col}` <= '{dedup_end_date}'" if has_time_col and dedup_start_date and dedup_end_date else ''
488
+ count_sql = f"SELECT COUNT(*) as cnt FROM `{database}`.`{table}` {count_where}"
489
+ logger.debug('执行SQL', {'sql': count_sql})
490
+ cursor.execute(count_sql)
491
+ total_count_row = cursor.fetchone()
492
+ total_count = total_count_row['cnt'] if total_count_row and 'cnt' in total_count_row else 0
493
+ logger.info('执行', {"库": database, "表": table, "开始处理数据量": total_count, 'func': sys._getframe().f_code.co_name, "数据日期": dedup_end_date})
494
+ column_list = ', '.join([f'`{col}`' for col in use_columns])
495
+
496
+ # 用Python查找重复
497
+ if use_python_dedup:
498
+ # 1. 拉取所有数据(生成器分批拉取)
499
+ select_cols = f'`{pk_real}`,' + ','.join([f'`{col}`' for col in use_columns])
500
+ select_where = f"WHERE `{time_col}` >= '{dedup_start_date}' AND `{time_col}` <= '{dedup_end_date}'" if has_time_col and dedup_start_date and dedup_end_date else ''
501
+ select_sql = f"SELECT {select_cols} FROM `{database}`.`{table}` {select_where}"
502
+ logger.debug('用Python查找重复,拉取数据SQL', {'sql': select_sql})
503
+ # 用生成器分批拉取
504
+ grouped = defaultdict(list)
505
+ for row in self._row_generator(database, table, select_cols, select_where, self.batch_size):
506
+ key = tuple(row[col] for col in use_columns)
507
+ grouped[key].append(row[pk_real])
508
+ # 2. 统计重复组和待删除id
509
+ dup_count = 0
510
+ del_ids = []
511
+ for ids in grouped.values():
512
+ if len(ids) > 1:
513
+ dup_count += 1
514
+ del_ids.extend(ids[1:]) # 只保留第一个
515
+ affected_rows = 0
516
+ if not dry_run and del_ids:
517
+ with self._get_connection() as conn:
518
+ with conn.cursor() as cursor:
519
+ for i in range(0, len(del_ids), self.batch_size):
520
+ batch = del_ids[i:i+self.batch_size]
521
+ del_ids_str = ','.join([str(i) for i in batch])
522
+ delete_sql = f"DELETE FROM `{database}`.`{table}` WHERE `{pk_real}` IN ({del_ids_str})"
523
+ logger.debug('用Python分批删除SQL', {'sql': delete_sql, 'ids': batch})
524
+ cursor.execute(delete_sql)
525
+ batch_deleted = cursor.rowcount
526
+ affected_rows += batch_deleted
527
+ conn.commit()
528
+ logger.info('去重完成', {"库": database, "表": table, "数据量": total_count, "重复组": dup_count, "实际删除": affected_rows, "去重方式": "Python", "数据处理": self.duplicate_keep_mode, "数据日期": dedup_end_date})
529
+ return (dup_count, affected_rows)
530
+ # SQL方式查找重复
531
+ temp_table = self._make_temp_table_name(table)
532
+ drop_temp_sql = f"DROP TABLE IF EXISTS `{database}`.`{temp_table}`"
533
+ # 创建临时表时加where条件
534
+ create_temp_where = f"WHERE `{time_col}` >= '{dedup_start_date}' AND `{time_col}` <= '{dedup_end_date}'" if has_time_col and dedup_start_date and dedup_end_date else ''
393
535
  create_temp_sql = f"""
394
536
  CREATE TABLE `{database}`.`{temp_table}` AS
395
537
  SELECT MIN(`{pk_real}`) as `min_id`, {column_list}, COUNT(*) as `dup_count`
396
538
  FROM `{database}`.`{table}`
397
- {where_time}
539
+ {create_temp_where}
398
540
  GROUP BY {column_list}
399
541
  HAVING COUNT(*) > 1
400
542
  """
401
- drop_temp_sql = f"DROP TABLE IF EXISTS `{database}`.`{temp_table}`"
402
543
  with self._get_connection() as conn:
403
544
  with conn.cursor() as cursor:
404
545
  logger.debug('创建临时表SQL', {'sql': create_temp_sql})
405
546
  cursor.execute(create_temp_sql)
406
- logger.debug('统计临时表重复组SQL', {'sql': f'SELECT COUNT(*) as cnt FROM `{database}`.`{temp_table}`'})
407
547
  cursor.execute(f"SELECT COUNT(*) as cnt FROM `{database}`.`{temp_table}`")
408
548
  dup_count_row = cursor.fetchone()
409
549
  dup_count = dup_count_row['cnt'] if dup_count_row and 'cnt' in dup_count_row else 0
410
550
  if dup_count == 0:
411
- logger.info('没有重复数据', {"库": database, "表": table, "数据量": total_count, "时间范围": [self._dedup_start_date, self._dedup_end_date] if use_time_filter else None, "实际去重列": use_columns})
412
- logger.debug('删除临时表SQL', {'sql': drop_temp_sql})
551
+ logger.info('没有重复数据', {"库": database, "表": table, "数据量": total_count, "数据日期": dedup_end_date})
413
552
  cursor.execute(drop_temp_sql)
414
553
  conn.commit()
415
554
  return (0, 0)
416
555
  affected_rows = 0
417
556
  if not dry_run:
418
- # 分批删除,避免锁表
419
557
  while True:
420
- if self.duplicate_keep_mode == 'remove_all':
421
- # 删除所有重复组的所有记录
422
- delete_dup_sql = f"""
423
- DELETE FROM `{database}`.`{table}`
424
- WHERE ({', '.join([f'`{col}`' for col in use_columns])}) IN (
425
- SELECT {column_list} FROM `{database}`.`{temp_table}`
426
- ) {'AND' if use_time_filter else ''} {f'`{time_col}` >= \'{self._dedup_start_date}\' AND `{time_col}` <= \'{self._dedup_end_date}\'' if use_time_filter else ''}
427
- LIMIT {self.batch_size}
428
- """
429
- else:
430
- # 修正:只删除重复组中不是min_id的行,唯一数据不动
431
- delete_dup_sql = f"""
432
- DELETE FROM `{database}`.`{table}` t
433
- WHERE EXISTS (
434
- SELECT 1 FROM `{database}`.`{temp_table}` tmp
435
- WHERE
436
- {' AND '.join([f't.`{col}` <=> tmp.`{col}`' for col in use_columns])}
437
- AND t.`{pk_real}` <> tmp.`min_id`
438
- )
439
- {'AND' if use_time_filter else ''} {f't.`{time_col}` >= \'{self._dedup_start_date}\' AND t.`{time_col}` <= \'{self._dedup_end_date}\'' if use_time_filter else ''}
440
- LIMIT {self.batch_size}
441
- """
442
- logger.debug('执行删除重复数据SQL', {'sql': delete_dup_sql})
443
- cursor.execute(delete_dup_sql)
558
+ where_clauses = []
559
+ if self.duplicate_keep_mode == 'keep_one':
560
+ where_clauses.append(f"t.`{pk_real}` <> tmp.`min_id`")
561
+ if where_sql.strip():
562
+ where_clauses.append(where_sql.strip())
563
+ where_full = "WHERE " + " AND ".join(where_clauses) if where_clauses else ""
564
+ find_dup_ids_sql = f"""
565
+ SELECT t.`{pk_real}` as del_id
566
+ FROM `{database}`.`{table}` t
567
+ JOIN `{database}`.`{temp_table}` tmp
568
+ ON {' AND '.join([f't.`{col}` <=> tmp.`{col}`' for col in use_columns])}
569
+ {where_full}
570
+ LIMIT {self.batch_size}
571
+ """
572
+ logger.debug('查找待删除重复id SQL', {'sql': find_dup_ids_sql})
573
+ cursor.execute(find_dup_ids_sql)
574
+ del_ids = [row['del_id'] for row in cursor.fetchall()]
575
+ if not del_ids:
576
+ break
577
+ del_ids_str = ','.join([str(i) for i in del_ids])
578
+ delete_sql = f"DELETE FROM `{database}`.`{table}` WHERE `{pk_real}` IN ({del_ids_str})"
579
+ logger.debug('按id批量删除SQL', {'sql': delete_sql, 'ids': del_ids})
580
+ cursor.execute(delete_sql)
444
581
  batch_deleted = cursor.rowcount
445
582
  affected_rows += batch_deleted
446
583
  conn.commit()
584
+ if batch_deleted == 0:
585
+ logger.warning('检测到未能删除任何数据,强制跳出循环,防止假死', {"库": database, "表": table})
586
+ break
447
587
  if batch_deleted < self.batch_size:
448
588
  break
449
- logger.info('操作删除', {"库": database, "表": table, "数据量": total_count, "重复组数": dup_count, "实际删除": affected_rows, "时间范围": [self._dedup_start_date, self._dedup_end_date] if use_time_filter else None, "实际去重列": use_columns, "去重模式": self.duplicate_keep_mode})
589
+ logger.info('操作删除', {"库": database, "表": table, "数据量": total_count, "重复组": dup_count, "实际删除": affected_rows, "去重方式": "SQL", "数据处理": self.duplicate_keep_mode, "数据日期": dedup_end_date})
450
590
  else:
451
- logger.debug('dry_run模式,不执行删除', {"库": database, "表": table, "重复组数": dup_count, "时间范围": [self._dedup_start_date, self._dedup_end_date] if use_time_filter else None})
591
+ logger.debug('dry_run模式,不执行删除', {"库": database, "表": table, "重复组": dup_count})
452
592
  affected_rows = 0
453
- logger.debug('删除临时表SQL', {'sql': drop_temp_sql})
454
593
  cursor.execute(drop_temp_sql)
455
594
  conn.commit()
456
595
  return (dup_count, affected_rows)
@@ -468,14 +607,17 @@ class MySQLDeduplicator:
468
607
  logger.error('异常时清理临时表失败', {"库": database, "表": table, "异常": str(drop_e)})
469
608
  return (0, 0)
470
609
  finally:
471
- self._release_table_lock(database, table)
610
+ if lock_table:
611
+ self._release_table_lock(database, table)
472
612
 
473
613
  def deduplicate_table(
474
614
  self,
475
615
  database: str,
476
616
  table: str,
477
617
  columns: Optional[List[str]] = None,
478
- dry_run: bool = False
618
+ dry_run: bool = False,
619
+ reorder_id: bool = False,
620
+ use_python_dedup: bool = True
479
621
  ) -> Tuple[int, int]:
480
622
  """
481
623
  对指定表进行去重。
@@ -485,6 +627,8 @@ class MySQLDeduplicator:
485
627
  table (str): 表名。
486
628
  columns (Optional[List[str]]): 用于去重的列名列表(为None时使用所有列)。
487
629
  dry_run (bool): 是否为模拟运行(只统计不实际删除)。
630
+ reorder_id (bool): 去重后是否重排id。
631
+ use_python_dedup (bool): 是否用Python查找重复id。
488
632
  Returns:
489
633
  Tuple[int, int]: (重复组数, 实际删除行数)。
490
634
  """
@@ -495,9 +639,17 @@ class MySQLDeduplicator:
495
639
  if not self._check_table_exists(database, table):
496
640
  logger.warning('表不存在', {"库": database, "表": table, "warning": "跳过"})
497
641
  return (0, 0)
498
- logger.info('单表开始', {"库": database, "表": table, "参数": {"指定去重列": columns, "模拟运行": dry_run, '排除列': self.exclude_columns}})
499
- result = self._deduplicate_table(database, table, columns, dry_run)
642
+ logger.info('单表开始', {"库": database, "表": table, "参数": {"指定去重列": columns, "模拟运行": dry_run, '排除列': self.exclude_columns, 'use_python_dedup': use_python_dedup}})
643
+ result = self._deduplicate_table(database, table, columns, dry_run, use_python_dedup)
500
644
  logger.info('单表完成', {"库": database, "表": table, "结果[重复, 删除]": result})
645
+ # 自动重排id列(仅当有实际删除时且reorder_id为True)
646
+ dup_count, affected_rows = result
647
+ if reorder_id and affected_rows > 0:
648
+ try:
649
+ reorder_ok = self.reorder_id_column(database, table, id_column=self.primary_key, dry_run=dry_run)
650
+ logger.info('自动重排id列完成', {"库": database, "表": table, "结果": reorder_ok})
651
+ except Exception as e:
652
+ logger.error('自动重排id列异常', {"库": database, "表": table, "异常": str(e)})
501
653
  return result
502
654
  except Exception as e:
503
655
  logger.error('发生全局错误', {"库": database, "表": table, 'func': sys._getframe().f_code.co_name, "发生全局错误": str(e)})
@@ -509,7 +661,9 @@ class MySQLDeduplicator:
509
661
  tables: Optional[List[str]] = None,
510
662
  columns_map: Optional[Dict[str, List[str]]] = None,
511
663
  dry_run: bool = False,
512
- parallel: bool = False
664
+ parallel: bool = False,
665
+ reorder_id: bool = False,
666
+ use_python_dedup: bool = True
513
667
  ) -> Dict[str, Tuple[int, int]]:
514
668
  """
515
669
  对指定数据库的所有表进行去重。
@@ -520,6 +674,8 @@ class MySQLDeduplicator:
520
674
  columns_map (Optional[Dict[str, List[str]]]): 各表使用的去重列 {表名: [列名]}。
521
675
  dry_run (bool): 是否为模拟运行。
522
676
  parallel (bool): 是否并行处理。
677
+ reorder_id (bool): 去重后是否重排id。
678
+ use_python_dedup (bool): 是否用Python查找重复id。
523
679
  Returns:
524
680
  Dict[str, Tuple[int, int]]: {表名: (重复组数, 实际删除行数)}。
525
681
  """
@@ -548,7 +704,7 @@ class MySQLDeduplicator:
548
704
  logger.debug('提交表去重任务', {'库': database, '表': table, 'columns': columns})
549
705
  futures[executor.submit(
550
706
  self.deduplicate_table,
551
- database, table, columns, dry_run
707
+ database, table, columns, dry_run, reorder_id, use_python_dedup
552
708
  )] = table
553
709
  for future in concurrent.futures.as_completed(futures):
554
710
  table = futures[future]
@@ -564,12 +720,12 @@ class MySQLDeduplicator:
564
720
  for table in target_tables:
565
721
  columns = columns_map.get(table) if columns_map else None
566
722
  dup_count, affected_rows = self.deduplicate_table(
567
- database, table, columns, dry_run
723
+ database, table, columns, dry_run, reorder_id, use_python_dedup
568
724
  )
569
725
  results[table] = (dup_count, affected_rows)
570
726
  total_dup = sum(r[0] for r in results.values())
571
727
  total_del = sum(r[1] for r in results.values())
572
- logger.info('单库完成', {"库": database, "重复组数": total_dup, "总删除行数": total_del, "详细结果": results})
728
+ logger.info('单库完成', {"库": database, "重复组": total_dup, "总删除行": total_del, "详细结果": results})
573
729
  return results
574
730
  except Exception as e:
575
731
  logger.error('发生全局错误', {"库": database, 'func': sys._getframe().f_code.co_name, "error": str(e), 'traceback': repr(e)})
@@ -581,7 +737,9 @@ class MySQLDeduplicator:
581
737
  tables_map: Optional[Dict[str, List[str]]] = None,
582
738
  columns_map: Optional[Dict[str, Dict[str, List[str]]]] = None,
583
739
  dry_run: bool = False,
584
- parallel: bool = False
740
+ parallel: bool = False,
741
+ reorder_id: bool = False,
742
+ use_python_dedup: bool = True
585
743
  ) -> Dict[str, Dict[str, Tuple[int, int]]]:
586
744
  """
587
745
  对所有数据库进行去重。
@@ -592,6 +750,8 @@ class MySQLDeduplicator:
592
750
  columns_map (Optional[Dict[str, Dict[str, List[str]]]]): 指定每个表去重时使用的列,格式为 {数据库名: {表名: [列名, ...]}}。如果为 None,则使用所有列。
593
751
  dry_run (bool): 是否为模拟运行模式。为 True 时只统计重复行数,不实际删除。
594
752
  parallel (bool): 是否并行处理多个数据库。为 True 时使用线程池并发处理。
753
+ reorder_id (bool): 去重后是否重排id。
754
+ use_python_dedup (bool): 是否用Python查找重复id。
595
755
  Returns:
596
756
  Dict[str, Dict[str, Tuple[int, int]]]: 嵌套字典,格式为 {数据库名: {表名: (重复组数, 实际删除行数)}}。
597
757
  """
@@ -603,7 +763,7 @@ class MySQLDeduplicator:
603
763
  if not target_dbs:
604
764
  logger.warning('没有可处理的数据库')
605
765
  return all_results
606
- logger.info('全局开始', {"数据库数量": len(target_dbs), "数据库列表": target_dbs, "参数": {"模拟运行": dry_run, "并行处理": parallel, '排除列': self.exclude_columns}})
766
+ logger.info('全局开始', {"数据库数量": len(target_dbs), "数据库列表": target_dbs, "参数": {"模拟运行": dry_run, "并行处理": parallel, '排除列': self.exclude_columns, 'use_python_dedup': use_python_dedup}})
607
767
  if parallel and self.max_workers > 1:
608
768
  # 使用线程池并行处理多个数据库
609
769
  with concurrent.futures.ThreadPoolExecutor(
@@ -615,7 +775,7 @@ class MySQLDeduplicator:
615
775
  db_columns_map = columns_map.get(db) if columns_map else None
616
776
  futures[executor.submit(
617
777
  self.deduplicate_database,
618
- db, tables, db_columns_map, dry_run, False
778
+ db, tables, db_columns_map, dry_run, False, reorder_id, use_python_dedup
619
779
  )] = db
620
780
  for future in concurrent.futures.as_completed(futures):
621
781
  db = futures[future]
@@ -631,7 +791,7 @@ class MySQLDeduplicator:
631
791
  tables = tables_map.get(db) if tables_map else None
632
792
  db_columns_map = columns_map.get(db) if columns_map else None
633
793
  db_results = self.deduplicate_database(
634
- db, tables, db_columns_map, dry_run, parallel
794
+ db, tables, db_columns_map, dry_run, parallel, reorder_id, use_python_dedup
635
795
  )
636
796
  all_results[db] = db_results
637
797
  total_dup = sum(
@@ -642,7 +802,7 @@ class MySQLDeduplicator:
642
802
  r[1] for db in all_results.values()
643
803
  for r in db.values()
644
804
  )
645
- logger.info('全局完成', {"总重复组数": total_dup, "总删除行数": total_del, "详细结果": dict(all_results)})
805
+ logger.info('全局完成', {"总重复组": total_dup, "总删除行": total_del, "详细结果": dict(all_results)})
646
806
  return all_results
647
807
  except Exception as e:
648
808
  logger.error('异常', {"error": str(e), 'traceback': repr(e)})
@@ -772,22 +932,12 @@ class MySQLDeduplicator:
772
932
  if not self._check_table_exists(database, table):
773
933
  logger.warning('表不存在,跳过id重排', {"库": database, "表": table})
774
934
  return False
775
- # 检查id列是否存在
776
- with self._get_connection() as conn:
777
- with conn.cursor() as cursor:
778
- cursor.execute("""
779
- SELECT COLUMN_NAME, COLUMN_KEY
780
- FROM INFORMATION_SCHEMA.COLUMNS
781
- WHERE TABLE_SCHEMA=%s AND TABLE_NAME=%s
782
- """, (database, table))
783
- columns_info = cursor.fetchall()
784
- columns = [row['COLUMN_NAME'] for row in columns_info]
785
- id_is_pk = any(row['COLUMN_NAME'].lower() == id_column.lower() and row['COLUMN_KEY'] in ('PRI', 'UNI') for row in columns_info)
935
+ # 检查id列、主键信息(用_get_table_info)
936
+ columns, pk_cols, id_is_pk = self._get_table_info(database, table, id_column)
786
937
  if id_column not in columns:
787
938
  logger.warning('表无id列,跳过id重排', {"库": database, "表": table})
788
939
  return False
789
940
  # 检查主键是否为单列id
790
- pk_cols = [row['COLUMN_NAME'] for row in columns_info if row['COLUMN_KEY'] == 'PRI']
791
941
  if len(pk_cols) != 1 or pk_cols[0].lower() != id_column.lower():
792
942
  logger.warning('主键不是单列id,跳过id重排', {"库": database, "表": table, "主键列": pk_cols})
793
943
  return False
@@ -806,13 +956,13 @@ class MySQLDeduplicator:
806
956
  with conn.cursor() as cursor:
807
957
  cursor.execute(f"SHOW CREATE TABLE {table_quoted}")
808
958
  create_table_sql = cursor.fetchone()['Create Table']
809
- logger.info('开始id重排', {"库": database, "表": table, "重排列": id_column, "dry_run": dry_run, "DDL警告": "MySQL DDL操作不可回滚,建议提前备份!"})
959
+ logger.info('开始id重排', {"库": database, "表": table, "重排列": id_column, "试运行": dry_run, "DDL警告": "MySQL DDL操作不可回滚,建议提前备份!"})
810
960
  if dry_run:
811
961
  logger.info('dry_run模式,打印原表结构', {"库": database, "表": table, "建表语句": create_table_sql})
812
962
  return True
813
- temp_table = self._make_safe_table_name(table, prefix=f"temp_", suffix=f"_reorderid_{os.getpid()}_{threading.get_ident()}")
963
+ temp_table = self._make_temp_table_name(table)
814
964
  temp_table_quoted = f"`{database}`.`{temp_table}`"
815
- backup_table = self._make_safe_table_name(table, prefix="backup_", suffix=f"_{int(time.time())}_{uuid.uuid4().hex[:8]}")
965
+ backup_table = self._make_backup_table_name(table)
816
966
  backup_table_quoted = f"`{database}`.`{backup_table}`"
817
967
  try:
818
968
  with self._get_connection() as conn:
@@ -927,23 +1077,71 @@ class MySQLDeduplicator:
927
1077
  return (prefix + suffix)[:max_length]
928
1078
  return f"{prefix}{base[:remain]}{suffix}"[:max_length]
929
1079
 
1080
+ def _get_table_info(self, database: str, table: str, id_column: str = None):
1081
+ """
1082
+ 获取表的所有列名、主键列名列表、指定id列是否为主键。
1083
+ Args:
1084
+ database (str): 数据库名。
1085
+ table (str): 表名。
1086
+ id_column (str): id列名,默认使用self.primary_key。
1087
+ Returns:
1088
+ Tuple[List[str], List[str], bool]: (所有列名, 主键列名, id列是否为主键)
1089
+ """
1090
+ id_column = id_column or self.primary_key
1091
+ with self._get_connection() as conn:
1092
+ with conn.cursor() as cursor:
1093
+ cursor.execute("""
1094
+ SELECT COLUMN_NAME, COLUMN_KEY
1095
+ FROM INFORMATION_SCHEMA.COLUMNS
1096
+ WHERE TABLE_SCHEMA=%s AND TABLE_NAME=%s
1097
+ """, (database, table))
1098
+ columns_info = cursor.fetchall()
1099
+ columns = [row['COLUMN_NAME'] for row in columns_info]
1100
+ pk_cols = [row['COLUMN_NAME'] for row in columns_info if row['COLUMN_KEY'] == 'PRI']
1101
+ id_is_pk = any(row['COLUMN_NAME'].lower() == id_column.lower() and row['COLUMN_KEY'] in ('PRI', 'UNI') for row in columns_info)
1102
+ return columns, pk_cols, id_is_pk
1103
+
1104
+ def _make_temp_table_name(self, base: str) -> str:
1105
+ """
1106
+ 生成临时表名,带有 temp_ 前缀和 _dedup_ 进程线程后缀。
1107
+ """
1108
+ suffix = f"_dedup_{os.getpid()}_{threading.get_ident()}"
1109
+ return self._make_safe_table_name(base, prefix="temp_", suffix=suffix)
1110
+
1111
+ def _make_backup_table_name(self, base: str) -> str:
1112
+ """
1113
+ 生成备份表名,带有 backup_ 前缀和时间戳+uuid后缀。
1114
+ """
1115
+ suffix = f"_{int(time.time())}_{uuid.uuid4().hex[:8]}"
1116
+ return self._make_safe_table_name(base, prefix="backup_", suffix=suffix)
1117
+
930
1118
 
931
1119
  def main():
932
1120
  deduplicator = MySQLDeduplicator(
933
1121
  username='root',
934
1122
  password='pwd',
935
1123
  host='localhost',
936
- port=3306
1124
+ port=3306,
1125
+ # date_range=['2025-05-27', '2025-05-28'],
1126
+ exclude_tables={'推广数据2': [
1127
+ # '地域报表_城市_2025_04',
1128
+ # '地域报表_城市_2025_05',
1129
+ # '地域报表_城市_2025_06',
1130
+ '地域报表_城市_2025_04_copy1',
1131
+ '地域报表_城市_2025_05_copy1',
1132
+ '地域报表_城市_2025_06_copy1',
1133
+ '主体报表_2025_copy1'
1134
+ ]}
937
1135
  )
938
1136
 
939
1137
  # 全库去重(单线程)
940
- deduplicator.deduplicate_all(dry_run=False, parallel=True)
1138
+ deduplicator.deduplicate_all(dry_run=True, parallel=True, reorder_id=True)
941
1139
 
942
1140
  # # 指定数据库去重(多线程)
943
- # deduplicator.deduplicate_database('my_db', dry_run=False, parallel=False)
1141
+ # deduplicator.deduplicate_database('my_db', dry_run=False, parallel=False, reorder_id=True)
944
1142
 
945
1143
  # # 指定表去重(使用特定列)
946
- # deduplicator.deduplicate_table('my_db', 'my_table', columns=['name', 'date'], dry_run=False)
1144
+ # deduplicator.deduplicate_table('my_db', 'my_table', columns=["name", "date"], dry_run=False, reorder_id=False)
947
1145
 
948
1146
  # # 重排id列
949
1147
  # deduplicator.reorder_id_column('my_db', 'my_table', 'id', dry_run=False, auto_drop_backup=True)
@@ -952,5 +1150,5 @@ def main():
952
1150
  deduplicator.close()
953
1151
 
954
1152
  if __name__ == '__main__':
955
- main()
1153
+ # main()
956
1154
  pass
mdbq/mysql/uploader.py CHANGED
@@ -428,6 +428,7 @@ class MySQLUploader:
428
428
  if idx_col in set_typ:
429
429
  safe_idx_col = self._validate_identifier(idx_col)
430
430
  index_defs.append(f"INDEX `idx_{safe_idx_col}` (`{safe_idx_col}`)")
431
+ index_defs = list(set(index_defs))
431
432
  index_sql = (',' + ','.join(index_defs)) if index_defs else ''
432
433
  sql = f"""
433
434
  CREATE TABLE IF NOT EXISTS `{db_name}`.`{table_name}` (
@@ -593,6 +594,34 @@ class MySQLUploader:
593
594
  logger.error('无法获取表列信息', {'库': db_name, '表': table_name, '错误': str(e)})
594
595
  raise
595
596
 
597
+ def _ensure_index(self, db_name: str, table_name: str, column: str):
598
+ """
599
+ 确保某列有索引,如果没有则创建。
600
+ """
601
+ db_name = self._validate_identifier(db_name)
602
+ table_name = self._validate_identifier(table_name)
603
+ column = self._validate_identifier(column)
604
+ # 检查索引是否已存在
605
+ sql_check = '''
606
+ SELECT COUNT(1) FROM INFORMATION_SCHEMA.STATISTICS
607
+ WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s
608
+ '''
609
+ sql_create = f'ALTER TABLE `{db_name}`.`{table_name}` ADD INDEX `idx_{column}` (`{column}`)'
610
+ try:
611
+ with self._get_connection() as conn:
612
+ with conn.cursor() as cursor:
613
+ cursor.execute(sql_check, (db_name, table_name, column))
614
+ exists = cursor.fetchone()
615
+ if exists and list(exists.values())[0] > 0:
616
+ logger.debug('索引已存在', {'库': db_name, '表': table_name, '列': column})
617
+ return
618
+ cursor.execute(sql_create)
619
+ conn.commit()
620
+ logger.info('已为列创建索引', {'库': db_name, '表': table_name, '列': column})
621
+ except Exception as e:
622
+ logger.error('创建索引失败', {'库': db_name, '表': table_name, '列': column, '错误': str(e)})
623
+ raise
624
+
596
625
  def _upload_to_table(
597
626
  self,
598
627
  db_name: str,
@@ -646,6 +675,13 @@ class MySQLUploader:
646
675
  })
647
676
  raise ValueError(f"列不存在: `{col}` -> `{db_name}`.`{table_name}`")
648
677
 
678
+ # 确保分表参考字段为索引
679
+ if date_column and date_column in table_columns:
680
+ try:
681
+ self._ensure_index(db_name, table_name, date_column)
682
+ except Exception as e:
683
+ logger.warning('分表参考字段索引创建失败', {'库': db_name, '表': table_name, '列': date_column, '错误': str(e)})
684
+
649
685
  # 插入数据
650
686
  self._insert_data(
651
687
  db_name, table_name, data, set_typ,
@@ -868,7 +904,7 @@ class MySQLUploader:
868
904
  :param duplicate_columns: 用于检查重复的列,可选
869
905
  :param allow_null: 是否允许空值,默认为False
870
906
  :param partition_by: 分表方式('year'、'month'、'None'),可选
871
- :param partition_date_column: 用于分表的日期列名,默认为'日期'
907
+ :param partition_date_column: 用于分表的日期列名,默认为'日期', 默认会添加为索引
872
908
  :param auto_create: 表不存在时是否自动创建,默认为True
873
909
  :param indexes: 需要创建索引的列列表,可选
874
910
  :param update_on_duplicate: 遇到重复数据时是否更新旧数据,默认为False
@@ -977,6 +1013,12 @@ class MySQLUploader:
977
1013
  allow_null, auto_create, partition_date_column,
978
1014
  indexes, batch_id, update_on_duplicate, transaction_mode
979
1015
  )
1016
+ # 确保分表参考字段为索引
1017
+ if partition_date_column in filtered_set_typ:
1018
+ try:
1019
+ self._ensure_index(db_name, part_table, partition_date_column)
1020
+ except Exception as e:
1021
+ logger.warning('分表参考字段索引创建失败', {'库': db_name, '表': part_table, '列': partition_date_column, '错误': str(e)})
980
1022
  except Exception as e:
981
1023
  logger.error('分表上传异常', {
982
1024
  '库': db_name,
@@ -995,6 +1037,12 @@ class MySQLUploader:
995
1037
  allow_null, auto_create, partition_date_column,
996
1038
  indexes, batch_id, update_on_duplicate, transaction_mode
997
1039
  )
1040
+ # 确保分表参考字段为索引
1041
+ if partition_date_column in filtered_set_typ:
1042
+ try:
1043
+ self._ensure_index(db_name, table_name, partition_date_column)
1044
+ except Exception as e:
1045
+ logger.warning('分表参考字段索引创建失败', {'库': db_name, '表': table_name, '列': partition_date_column, '错误': str(e)})
998
1046
 
999
1047
  success_flag = True
1000
1048
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.11.8
3
+ Version: 3.11.10
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,17 +1,17 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=JqV56ilza72jpkf_fztVtAdeSmcdPr0BmGGo9FFjGrA,18
2
+ mdbq/__version__.py,sha256=L9HK2W1LgO8Zc5gpJgI1uJ5J0VRcUyMXHr1ZT-FeNOM,19
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/query_data.py,sha256=nxL8hSy8yI1QLlqnkTNHHQSxRfo-6WKL5OA-N4xLB7c,179832
5
5
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
6
6
  mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
7
7
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
8
- mdbq/log/mylogger.py,sha256=HuxLBCXjm6fZrxYE0rdpUCz359WGeqOX0vvg9jTuRY4,24126
8
+ mdbq/log/mylogger.py,sha256=Crw6LwVo3I3IUbzIETu8f46Quza3CTCh-qYf4edbBPo,24139
9
9
  mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
10
10
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
11
- mdbq/mysql/deduplicator.py,sha256=Znmjn4sI1Mj2koSPTDojFwg_1MTgk3GZTFZyhSRwn7s,46746
11
+ mdbq/mysql/deduplicator.py,sha256=w8etA5dAsY7g58bWU3SQt7n_OWnS9Y2TVh0D7m0MK9E,57961
12
12
  mdbq/mysql/mysql.py,sha256=Kjpi-LL00WQUmTTOfhEBsNrmo4-4kFFJzrHbVKfqiBE,56770
13
13
  mdbq/mysql/s_query.py,sha256=dlnrVJ3-Vp1Suv9CNbPxyYSRqRJUHjOpF39tb2F-wBc,10190
14
- mdbq/mysql/uploader.py,sha256=LxPlAfSNhQbLu-or4wxa-vLjCw5_PIN3ZVoksWUJazQ,61701
14
+ mdbq/mysql/uploader.py,sha256=8Px_W2bYOr1wQgMXMK0DggNiuE6a6Ul4BlJake8LSo8,64469
15
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
17
17
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -24,7 +24,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
24
24
  mdbq/redis/getredis.py,sha256=YHgCKO8mEsslwet33K5tGss-nrDDwPnOSlhA9iBu0jY,24078
25
25
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
26
26
  mdbq/spider/aikucun.py,sha256=cqK-JRd_DHbToC7hyo83m8o97NZkJFqmB2xBtr6aAVU,20961
27
- mdbq-3.11.8.dist-info/METADATA,sha256=EJtaHsIzWmcB9hTRg1NZeDd55Zez0lu6FPD_ZQB9nMw,364
28
- mdbq-3.11.8.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
- mdbq-3.11.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
- mdbq-3.11.8.dist-info/RECORD,,
27
+ mdbq-3.11.10.dist-info/METADATA,sha256=dVhkC84iq1GWtV6onfsLj18CwfGnIo1bXXDa-TXUU1E,365
28
+ mdbq-3.11.10.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
+ mdbq-3.11.10.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
+ mdbq-3.11.10.dist-info/RECORD,,
File without changes