mdbq 3.11.5__py3-none-any.whl → 3.11.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/deduplicator.py +191 -23
- {mdbq-3.11.5.dist-info → mdbq-3.11.7.dist-info}/METADATA +1 -1
- {mdbq-3.11.5.dist-info → mdbq-3.11.7.dist-info}/RECORD +6 -6
- {mdbq-3.11.5.dist-info → mdbq-3.11.7.dist-info}/WHEEL +0 -0
- {mdbq-3.11.5.dist-info → mdbq-3.11.7.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.11.
|
1
|
+
VERSION = '3.11.7'
|
mdbq/mysql/deduplicator.py
CHANGED
@@ -80,7 +80,8 @@ class MySQLDeduplicator:
|
|
80
80
|
date_column: str = '日期',
|
81
81
|
exclude_columns: Optional[List[str]] = None,
|
82
82
|
exclude_databases: Optional[List[str]] = None,
|
83
|
-
exclude_tables: Optional[Dict[str, List[str]]] = None
|
83
|
+
exclude_tables: Optional[Dict[str, List[str]]] = None,
|
84
|
+
duplicate_keep_mode: str = 'keep_one' # 新增参数
|
84
85
|
) -> None:
|
85
86
|
"""
|
86
87
|
初始化去重处理器
|
@@ -90,6 +91,7 @@ class MySQLDeduplicator:
|
|
90
91
|
:param exclude_columns: 去重时排除的列名列表,默认为['id', '更新时间']
|
91
92
|
:param exclude_databases: 排除的数据库名列表
|
92
93
|
:param exclude_tables: 排除的表名字典 {数据库名: [表名, ...]}
|
94
|
+
:param duplicate_keep_mode: 'keep_one'(默认,重复组保留一条),'remove_all'(全部删除重复组)
|
93
95
|
"""
|
94
96
|
# 连接池状态标志
|
95
97
|
self._closed = False
|
@@ -173,6 +175,8 @@ class MySQLDeduplicator:
|
|
173
175
|
self.exclude_databases = set([db.lower() for db in exclude_databases]) if exclude_databases else set()
|
174
176
|
self.exclude_tables = {k.lower(): set([t.lower() for t in v]) for k, v in (exclude_tables or {}).items()}
|
175
177
|
|
178
|
+
self.duplicate_keep_mode = duplicate_keep_mode if duplicate_keep_mode in ('keep_one', 'remove_all') else 'keep_one'
|
179
|
+
|
176
180
|
def _get_connection(self) -> pymysql.connections.Connection:
|
177
181
|
"""
|
178
182
|
从连接池获取一个数据库连接。
|
@@ -329,7 +333,8 @@ class MySQLDeduplicator:
|
|
329
333
|
database: str,
|
330
334
|
table: str,
|
331
335
|
columns: Optional[List[str]] = None,
|
332
|
-
dry_run: bool = False
|
336
|
+
dry_run: bool = False,
|
337
|
+
reset_id: bool = False
|
333
338
|
) -> Tuple[int, int]:
|
334
339
|
"""
|
335
340
|
执行单表去重。
|
@@ -339,6 +344,7 @@ class MySQLDeduplicator:
|
|
339
344
|
table (str): 表名。
|
340
345
|
columns (Optional[List[str]]): 用于去重的列名列表(为None时使用所有列)。
|
341
346
|
dry_run (bool): 是否为模拟运行(只统计不实际删除)。
|
347
|
+
reset_id (bool): 是否在去重后重排id。
|
342
348
|
Returns:
|
343
349
|
Tuple[int, int]: (重复组数, 实际删除行数)。
|
344
350
|
"""
|
@@ -414,14 +420,28 @@ class MySQLDeduplicator:
|
|
414
420
|
if not dry_run:
|
415
421
|
# 分批删除,避免锁表
|
416
422
|
while True:
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
423
|
+
if self.duplicate_keep_mode == 'remove_all':
|
424
|
+
# 删除所有重复组的所有记录
|
425
|
+
delete_dup_sql = f"""
|
426
|
+
DELETE FROM `{database}`.`{table}`
|
427
|
+
WHERE ({', '.join([f'`{col}`' for col in use_columns])}) IN (
|
428
|
+
SELECT {column_list} FROM `{database}`.`{temp_table}`
|
429
|
+
) {'AND' if use_time_filter else ''} {f'`{time_col}` >= \'{self._dedup_start_date}\' AND `{time_col}` <= \'{self._dedup_end_date}\'' if use_time_filter else ''}
|
430
|
+
LIMIT {self.batch_size}
|
431
|
+
"""
|
432
|
+
else:
|
433
|
+
# 修正:只删除重复组中不是min_id的行,唯一数据不动
|
434
|
+
delete_dup_sql = f"""
|
435
|
+
DELETE FROM `{database}`.`{table}` t
|
436
|
+
WHERE EXISTS (
|
437
|
+
SELECT 1 FROM `{database}`.`{temp_table}` tmp
|
438
|
+
WHERE
|
439
|
+
{' AND '.join([f't.`{col}` <=> tmp.`{col}`' for col in use_columns])}
|
440
|
+
AND t.`{pk_real}` <> tmp.`min_id`
|
441
|
+
)
|
442
|
+
{'AND' if use_time_filter else ''} {f't.`{time_col}` >= \'{self._dedup_start_date}\' AND t.`{time_col}` <= \'{self._dedup_end_date}\'' if use_time_filter else ''}
|
443
|
+
LIMIT {self.batch_size}
|
444
|
+
"""
|
425
445
|
logger.debug('执行删除重复数据SQL', {'sql': delete_dup_sql})
|
426
446
|
cursor.execute(delete_dup_sql)
|
427
447
|
batch_deleted = cursor.rowcount
|
@@ -429,7 +449,10 @@ class MySQLDeduplicator:
|
|
429
449
|
conn.commit()
|
430
450
|
if batch_deleted < self.batch_size:
|
431
451
|
break
|
432
|
-
logger.info('操作删除', {"库": database, "表": table, "数据量": total_count, "重复组数": dup_count, "实际删除": affected_rows, "时间范围": [self._dedup_start_date, self._dedup_end_date] if use_time_filter else None, "实际去重列": use_columns})
|
452
|
+
logger.info('操作删除', {"库": database, "表": table, "数据量": total_count, "重复组数": dup_count, "实际删除": affected_rows, "时间范围": [self._dedup_start_date, self._dedup_end_date] if use_time_filter else None, "实际去重列": use_columns, "去重模式": self.duplicate_keep_mode})
|
453
|
+
# 新增:去重后重排id
|
454
|
+
if reset_id and affected_rows > 0:
|
455
|
+
self._reset_id_column(database, table)
|
433
456
|
else:
|
434
457
|
logger.debug('dry_run模式,不执行删除', {"库": database, "表": table, "重复组数": dup_count, "时间范围": [self._dedup_start_date, self._dedup_end_date] if use_time_filter else None})
|
435
458
|
affected_rows = 0
|
@@ -458,7 +481,8 @@ class MySQLDeduplicator:
|
|
458
481
|
database: str,
|
459
482
|
table: str,
|
460
483
|
columns: Optional[List[str]] = None,
|
461
|
-
dry_run: bool = False
|
484
|
+
dry_run: bool = False,
|
485
|
+
reset_id: bool = False
|
462
486
|
) -> Tuple[int, int]:
|
463
487
|
"""
|
464
488
|
对指定表进行去重。
|
@@ -468,6 +492,7 @@ class MySQLDeduplicator:
|
|
468
492
|
table (str): 表名。
|
469
493
|
columns (Optional[List[str]]): 用于去重的列名列表(为None时使用所有列)。
|
470
494
|
dry_run (bool): 是否为模拟运行(只统计不实际删除)。
|
495
|
+
reset_id (bool): 是否在去重后重排id。
|
471
496
|
Returns:
|
472
497
|
Tuple[int, int]: (重复组数, 实际删除行数)。
|
473
498
|
"""
|
@@ -479,7 +504,7 @@ class MySQLDeduplicator:
|
|
479
504
|
logger.warning('表不存在', {"库": database, "表": table, "warning": "跳过"})
|
480
505
|
return (0, 0)
|
481
506
|
logger.info('单表开始', {"库": database, "表": table, "参数": {"指定去重列": columns, "模拟运行": dry_run, '排除列': self.exclude_columns}})
|
482
|
-
result = self._deduplicate_table(database, table, columns, dry_run)
|
507
|
+
result = self._deduplicate_table(database, table, columns, dry_run, reset_id)
|
483
508
|
logger.info('单表完成', {"库": database, "表": table, "结果[重复, 删除]": result})
|
484
509
|
return result
|
485
510
|
except Exception as e:
|
@@ -492,7 +517,8 @@ class MySQLDeduplicator:
|
|
492
517
|
tables: Optional[List[str]] = None,
|
493
518
|
columns_map: Optional[Dict[str, List[str]]] = None,
|
494
519
|
dry_run: bool = False,
|
495
|
-
parallel: bool = False
|
520
|
+
parallel: bool = False,
|
521
|
+
reset_id: bool = False
|
496
522
|
) -> Dict[str, Tuple[int, int]]:
|
497
523
|
"""
|
498
524
|
对指定数据库的所有表进行去重。
|
@@ -503,6 +529,7 @@ class MySQLDeduplicator:
|
|
503
529
|
columns_map (Optional[Dict[str, List[str]]]): 各表使用的去重列 {表名: [列名]}。
|
504
530
|
dry_run (bool): 是否为模拟运行。
|
505
531
|
parallel (bool): 是否并行处理。
|
532
|
+
reset_id (bool): 是否在去重后重排id。
|
506
533
|
Returns:
|
507
534
|
Dict[str, Tuple[int, int]]: {表名: (重复组数, 实际删除行数)}。
|
508
535
|
"""
|
@@ -531,7 +558,7 @@ class MySQLDeduplicator:
|
|
531
558
|
logger.debug('提交表去重任务', {'库': database, '表': table, 'columns': columns})
|
532
559
|
futures[executor.submit(
|
533
560
|
self.deduplicate_table,
|
534
|
-
database, table, columns, dry_run
|
561
|
+
database, table, columns, dry_run, reset_id
|
535
562
|
)] = table
|
536
563
|
for future in concurrent.futures.as_completed(futures):
|
537
564
|
table = futures[future]
|
@@ -547,7 +574,7 @@ class MySQLDeduplicator:
|
|
547
574
|
for table in target_tables:
|
548
575
|
columns = columns_map.get(table) if columns_map else None
|
549
576
|
dup_count, affected_rows = self.deduplicate_table(
|
550
|
-
database, table, columns, dry_run
|
577
|
+
database, table, columns, dry_run, reset_id
|
551
578
|
)
|
552
579
|
results[table] = (dup_count, affected_rows)
|
553
580
|
total_dup = sum(r[0] for r in results.values())
|
@@ -564,7 +591,8 @@ class MySQLDeduplicator:
|
|
564
591
|
tables_map: Optional[Dict[str, List[str]]] = None,
|
565
592
|
columns_map: Optional[Dict[str, Dict[str, List[str]]]] = None,
|
566
593
|
dry_run: bool = False,
|
567
|
-
parallel: bool = False
|
594
|
+
parallel: bool = False,
|
595
|
+
reset_id: bool = False
|
568
596
|
) -> Dict[str, Dict[str, Tuple[int, int]]]:
|
569
597
|
"""
|
570
598
|
对所有数据库进行去重。
|
@@ -575,6 +603,7 @@ class MySQLDeduplicator:
|
|
575
603
|
columns_map (Optional[Dict[str, Dict[str, List[str]]]]): 指定每个表去重时使用的列,格式为 {数据库名: {表名: [列名, ...]}}。如果为 None,则使用所有列。
|
576
604
|
dry_run (bool): 是否为模拟运行模式。为 True 时只统计重复行数,不实际删除。
|
577
605
|
parallel (bool): 是否并行处理多个数据库。为 True 时使用线程池并发处理。
|
606
|
+
reset_id (bool): 是否在去重后重排id。
|
578
607
|
Returns:
|
579
608
|
Dict[str, Dict[str, Tuple[int, int]]]: 嵌套字典,格式为 {数据库名: {表名: (重复组数, 实际删除行数)}}。
|
580
609
|
"""
|
@@ -598,7 +627,7 @@ class MySQLDeduplicator:
|
|
598
627
|
db_columns_map = columns_map.get(db) if columns_map else None
|
599
628
|
futures[executor.submit(
|
600
629
|
self.deduplicate_database,
|
601
|
-
db, tables, db_columns_map, dry_run, False
|
630
|
+
db, tables, db_columns_map, dry_run, False, reset_id
|
602
631
|
)] = db
|
603
632
|
for future in concurrent.futures.as_completed(futures):
|
604
633
|
db = futures[future]
|
@@ -614,7 +643,7 @@ class MySQLDeduplicator:
|
|
614
643
|
tables = tables_map.get(db) if tables_map else None
|
615
644
|
db_columns_map = columns_map.get(db) if columns_map else None
|
616
645
|
db_results = self.deduplicate_database(
|
617
|
-
db, tables, db_columns_map, dry_run, parallel
|
646
|
+
db, tables, db_columns_map, dry_run, parallel, reset_id
|
618
647
|
)
|
619
648
|
all_results[db] = db_results
|
620
649
|
total_dup = sum(
|
@@ -709,6 +738,146 @@ class MySQLDeduplicator:
|
|
709
738
|
"""
|
710
739
|
self.close()
|
711
740
|
|
741
|
+
def _has_foreign_key_dependency(self, database: str, table: str, pk: str) -> bool:
|
742
|
+
"""检测id列是否被其他表外键引用。"""
|
743
|
+
fk_check_sql = '''
|
744
|
+
SELECT TABLE_NAME, COLUMN_NAME, CONSTRAINT_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME
|
745
|
+
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
746
|
+
WHERE REFERENCED_TABLE_SCHEMA = %s AND REFERENCED_TABLE_NAME = %s AND REFERENCED_COLUMN_NAME = %s
|
747
|
+
'''
|
748
|
+
with self._get_connection() as conn:
|
749
|
+
with conn.cursor() as cursor:
|
750
|
+
cursor.execute(f"USE `{database}`")
|
751
|
+
cursor.execute(fk_check_sql, (database, table, pk))
|
752
|
+
fk_rows = cursor.fetchall()
|
753
|
+
return bool(fk_rows)
|
754
|
+
|
755
|
+
def _get_table_create_sql_and_pk(self, database: str, table: str) -> tuple:
|
756
|
+
"""获取表的CREATE语句和主键字段列表。"""
|
757
|
+
with self._get_connection() as conn:
|
758
|
+
with conn.cursor() as cursor:
|
759
|
+
cursor.execute(f"USE `{database}`")
|
760
|
+
cursor.execute(f"SHOW CREATE TABLE `{database}`.`{table}`")
|
761
|
+
create_sql = cursor.fetchone()['Create Table']
|
762
|
+
cursor.execute(f"SHOW KEYS FROM `{database}`.`{table}` WHERE Key_name = 'PRIMARY'")
|
763
|
+
pk_rows = cursor.fetchall()
|
764
|
+
pk_columns = [row['Column_name'] for row in pk_rows]
|
765
|
+
return create_sql, pk_columns
|
766
|
+
|
767
|
+
def _make_temp_table_sql(self, create_sql: str, table: str, temp_table: str, pk: str, pk_columns: list) -> str:
|
768
|
+
"""生成临时表的CREATE语句,仅替换id字段类型。"""
|
769
|
+
def replace_id_type(sql):
|
770
|
+
lines = sql.split('\n')
|
771
|
+
new_lines = []
|
772
|
+
for line in lines:
|
773
|
+
if re.match(rf'\s*`{pk}` ', line):
|
774
|
+
if pk_columns == [pk]:
|
775
|
+
line = re.sub(r'`' + pk + r'`\s+[^,]*', f'`{pk}` INT NOT NULL AUTO_INCREMENT', line)
|
776
|
+
else:
|
777
|
+
line = re.sub(r'`' + pk + r'`\s+[^,]*', f'`{pk}` INT NOT NULL', line)
|
778
|
+
new_lines.append(line)
|
779
|
+
return '\n'.join(new_lines)
|
780
|
+
create_sql_temp = re.sub(
|
781
|
+
rf'CREATE TABLE `{table}`',
|
782
|
+
f'CREATE TABLE `{temp_table}`',
|
783
|
+
create_sql,
|
784
|
+
count=1
|
785
|
+
)
|
786
|
+
create_sql_temp = replace_id_type(create_sql_temp)
|
787
|
+
create_sql_temp = re.sub(r'AUTO_INCREMENT=\d+', '', create_sql_temp)
|
788
|
+
return create_sql_temp
|
789
|
+
|
790
|
+
def _create_and_fill_temp_table(self, database: str, table: str, temp_table: str, pk: str) -> list:
|
791
|
+
"""创建临时表并插入重排id数据,返回所有字段名。"""
|
792
|
+
with self._get_connection() as conn:
|
793
|
+
with conn.cursor() as cursor:
|
794
|
+
cursor.execute(f"USE `{database}`")
|
795
|
+
cursor.execute(f"SHOW COLUMNS FROM `{database}`.`{table}`")
|
796
|
+
columns = [row['Field'] for row in cursor.fetchall()]
|
797
|
+
columns_wo_id = [col for col in columns if col != pk]
|
798
|
+
col_list = ', '.join([f'`{col}`' for col in columns_wo_id])
|
799
|
+
insert_sql = f"INSERT INTO `{database}`.`{temp_table}` ({col_list}, `{pk}`) SELECT {col_list}, (@rownum:=@rownum+1) as `{pk}` FROM `{database}`.`{table}` JOIN (SELECT @rownum:=0) r ORDER BY `{pk}` ASC"
|
800
|
+
cursor.execute(insert_sql)
|
801
|
+
return columns
|
802
|
+
|
803
|
+
def _swap_tables_with_backup(self, database: str, table: str, temp_table: str, bak_table: str):
|
804
|
+
"""原表重命名为备份,临时表变原表名。"""
|
805
|
+
with self._get_connection() as conn:
|
806
|
+
with conn.cursor() as cursor:
|
807
|
+
cursor.execute(f"USE `{database}`")
|
808
|
+
cursor.execute(f"RENAME TABLE `{database}`.`{table}` TO `{database}`.`{bak_table}`")
|
809
|
+
cursor.execute(f"RENAME TABLE `{database}`.`{temp_table}` TO `{database}`.`{table}`")
|
810
|
+
conn.commit()
|
811
|
+
|
812
|
+
def _check_and_cleanup_backup(self, database: str, table: str, bak_table: str) -> bool:
|
813
|
+
"""校验新表和备份表数据量一致,安全删除备份表。"""
|
814
|
+
with self._get_connection() as conn:
|
815
|
+
with conn.cursor() as cursor:
|
816
|
+
cursor.execute(f"USE `{database}`")
|
817
|
+
cursor.execute(f"SELECT COUNT(*) as cnt FROM `{database}`.`{table}`")
|
818
|
+
new_cnt = cursor.fetchone()['cnt']
|
819
|
+
cursor.execute(f"SELECT COUNT(*) as cnt FROM `{database}`.`{bak_table}`")
|
820
|
+
old_cnt = cursor.fetchone()['cnt']
|
821
|
+
if new_cnt == old_cnt:
|
822
|
+
cursor.execute(f"DROP TABLE `{database}`.`{bak_table}`")
|
823
|
+
conn.commit()
|
824
|
+
return True
|
825
|
+
else:
|
826
|
+
logger.error('id重排后数据量不一致,未删除备份表', {'库': database, '表': table, '新表行数': new_cnt, '备份表行数': old_cnt})
|
827
|
+
return False
|
828
|
+
|
829
|
+
def _rollback_table_swap(self, database: str, table: str, bak_table: str):
|
830
|
+
"""回滚:如bak表存在且原表不存在,则恢复原表名。"""
|
831
|
+
try:
|
832
|
+
with self._get_connection() as conn:
|
833
|
+
with conn.cursor() as cursor:
|
834
|
+
cursor.execute(f"USE `{database}`")
|
835
|
+
cursor.execute(f"SHOW TABLES LIKE '{bak_table}'")
|
836
|
+
if cursor.fetchone():
|
837
|
+
cursor.execute(f"SHOW TABLES LIKE '{table}'")
|
838
|
+
if not cursor.fetchone():
|
839
|
+
cursor.execute(f"RENAME TABLE `{database}`.`{bak_table}` TO `{database}`.`{table}`")
|
840
|
+
conn.commit()
|
841
|
+
logger.info('回滚成功,已恢复原表', {'库': database, '表': table})
|
842
|
+
except Exception as e2:
|
843
|
+
logger.error('回滚失败', {'库': database, '表': table, '异常': str(e2)})
|
844
|
+
|
845
|
+
def _reset_id_column(self, database: str, table: str) -> bool:
|
846
|
+
pk = self.primary_key
|
847
|
+
temp_table = f"temp_{table}_resetid_{os.getpid()}_{threading.get_ident()}"
|
848
|
+
temp_table = re.sub(r'[^a-zA-Z0-9_]', '_', temp_table)[:60]
|
849
|
+
bak_table = f"{table}_bak_{int(time.time())}"
|
850
|
+
try:
|
851
|
+
# 1. 检查外键依赖
|
852
|
+
if self._has_foreign_key_dependency(database, table, pk):
|
853
|
+
logger.warning('存在外键依赖,拒绝重排id', {'库': database, '表': table})
|
854
|
+
return False
|
855
|
+
# 2. 获取表结构和主键
|
856
|
+
create_sql, pk_columns = self._get_table_create_sql_and_pk(database, table)
|
857
|
+
# 3. 生成临时表DDL
|
858
|
+
create_sql_temp = self._make_temp_table_sql(create_sql, table, temp_table, pk, pk_columns)
|
859
|
+
# 4. 创建临时表
|
860
|
+
with self._get_connection() as conn:
|
861
|
+
with conn.cursor() as cursor:
|
862
|
+
cursor.execute(f"USE `{database}`")
|
863
|
+
cursor.execute(f"DROP TABLE IF EXISTS `{database}`.`{temp_table}`")
|
864
|
+
cursor.execute(create_sql_temp)
|
865
|
+
conn.commit()
|
866
|
+
# 5. 填充临时表
|
867
|
+
self._create_and_fill_temp_table(database, table, temp_table, pk)
|
868
|
+
# 6. 表交换
|
869
|
+
self._swap_tables_with_backup(database, table, temp_table, bak_table)
|
870
|
+
# 7. 校验和清理
|
871
|
+
if self._check_and_cleanup_backup(database, table, bak_table):
|
872
|
+
logger.info('id重排完成并安全删除备份表,主键信息已保留', {'库': database, '表': table})
|
873
|
+
return True
|
874
|
+
else:
|
875
|
+
return False
|
876
|
+
except Exception as e:
|
877
|
+
logger.error('id重排失败,尝试回滚', {'库': database, '表': table, '异常': str(e)})
|
878
|
+
self._rollback_table_swap(database, table, bak_table)
|
879
|
+
return False
|
880
|
+
|
712
881
|
|
713
882
|
def main():
|
714
883
|
deduplicator = MySQLDeduplicator(
|
@@ -719,14 +888,13 @@ def main():
|
|
719
888
|
)
|
720
889
|
|
721
890
|
# 全库去重(单线程)
|
722
|
-
deduplicator.deduplicate_all(dry_run=False, parallel=False)
|
891
|
+
deduplicator.deduplicate_all(dry_run=False, parallel=False, reset_id=False)
|
723
892
|
|
724
893
|
# # 指定数据库去重(多线程)
|
725
|
-
#
|
726
|
-
# deduplicator.deduplicate_database('my_db', dry_run=False, parallel=True)
|
894
|
+
# deduplicator.deduplicate_database('my_db', dry_run=False, parallel=True, reset_id=False)
|
727
895
|
|
728
896
|
# # 指定表去重(使用特定列)
|
729
|
-
# deduplicator.deduplicate_table('my_db', 'my_table', columns=['name', 'date'], dry_run=False)
|
897
|
+
# deduplicator.deduplicate_table('my_db', 'my_table', columns=['name', 'date'], dry_run=False, reset_id=False)
|
730
898
|
|
731
899
|
# 关闭连接
|
732
900
|
deduplicator.close()
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=KXKzNBZD4M6L-jW29owhKjBycLSiUdGeTf_uNAYvyGI,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/query_data.py,sha256=nxL8hSy8yI1QLlqnkTNHHQSxRfo-6WKL5OA-N4xLB7c,179832
|
5
5
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -8,7 +8,7 @@ mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
|
8
8
|
mdbq/log/mylogger.py,sha256=HuxLBCXjm6fZrxYE0rdpUCz359WGeqOX0vvg9jTuRY4,24126
|
9
9
|
mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
|
10
10
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
11
|
-
mdbq/mysql/deduplicator.py,sha256=
|
11
|
+
mdbq/mysql/deduplicator.py,sha256=dq40YBLVd5ho28pYzVfwm5pA90YA3iN6l9xX4k0Ynds,42808
|
12
12
|
mdbq/mysql/mysql.py,sha256=Kjpi-LL00WQUmTTOfhEBsNrmo4-4kFFJzrHbVKfqiBE,56770
|
13
13
|
mdbq/mysql/s_query.py,sha256=dlnrVJ3-Vp1Suv9CNbPxyYSRqRJUHjOpF39tb2F-wBc,10190
|
14
14
|
mdbq/mysql/uploader.py,sha256=LxPlAfSNhQbLu-or4wxa-vLjCw5_PIN3ZVoksWUJazQ,61701
|
@@ -24,7 +24,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
24
24
|
mdbq/redis/getredis.py,sha256=YHgCKO8mEsslwet33K5tGss-nrDDwPnOSlhA9iBu0jY,24078
|
25
25
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
26
26
|
mdbq/spider/aikucun.py,sha256=cqK-JRd_DHbToC7hyo83m8o97NZkJFqmB2xBtr6aAVU,20961
|
27
|
-
mdbq-3.11.
|
28
|
-
mdbq-3.11.
|
29
|
-
mdbq-3.11.
|
30
|
-
mdbq-3.11.
|
27
|
+
mdbq-3.11.7.dist-info/METADATA,sha256=j_0kmOn4tTbk8TY8LqbEZ2OWmJz0-70sUQNwP_N0VCc,364
|
28
|
+
mdbq-3.11.7.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
29
|
+
mdbq-3.11.7.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
30
|
+
mdbq-3.11.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|