mdbq 3.9.3__py3-none-any.whl → 3.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.9.3'
1
+ VERSION = '3.9.5'
mdbq/log/mlogger.py ADDED
@@ -0,0 +1,531 @@
1
+ import logging
2
+ import logging.handlers
3
+ import datetime
4
+ import json
5
+ import os
6
+ import time
7
+ import threading
8
+ import queue
9
+ from typing import Optional, Dict, Any, List, Callable, Union
10
+
11
+ try:
12
+ import psutil
13
+
14
+ HAS_PSUTIL = True
15
+ except ImportError:
16
+ HAS_PSUTIL = False
17
+
18
+
19
+ class MyLogger:
20
+ """
21
+ 增强版结构化日志记录器,支持多种高级功能
22
+
23
+ 功能增强:
24
+ - 异步日志记录(减少I/O阻塞)
25
+ - 上下文管理器支持
26
+ - 自定义日志过滤器
27
+ - 更丰富的系统指标采集
28
+ - 日志采样控制
29
+ - 动态日志级别调整
30
+ - 请求跟踪ID
31
+ - 多线程安全
32
+ - 日志缓冲和批量写入
33
+ - 自定义异常处理
34
+
35
+ 使用示例:
36
+ logger = MyLogger(
37
+ name='app_logger',
38
+ logging_mode='both',
39
+ log_level='INFO',
40
+ log_file='app.log',
41
+ max_log_size=50,
42
+ backup_count=5,
43
+ enable_async=True
44
+ )
45
+
46
+ with logger.context(request_id='12345'):
47
+ logger.info("处理请求", extra={'user': 'admin'})
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ name: str = 'mlogger',
53
+ logging_mode: str = 'console', # 'both', 'console', 'file', 'none'
54
+ log_level: str = 'INFO', # 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
55
+ log_file: str = 'm_app.log',
56
+ log_format: str = 'json', # 默认json格式,可选'simple'
57
+ max_log_size: int = 50, # MB
58
+ backup_count: int = 5,
59
+ sensitive_fields: Optional[List[str]] = None,
60
+ enable_async: bool = False,
61
+ buffer_size: int = 1000,
62
+ sample_rate: float = 1.0,
63
+ filters: Optional[List[Callable]] = None,
64
+ enable_metrics: bool = False,
65
+ metrics_interval: int = 300,
66
+ message_limited: int = 1000
67
+ ):
68
+ """
69
+ 初始化日志记录器
70
+
71
+ :param name: 日志记录器名称
72
+ :param logging_mode: 日志输出模式
73
+ :param log_level: 日志级别
74
+ :param log_file: 日志文件路径
75
+ :param max_log_size: 单个日志文件最大大小(MB)
76
+ :param backup_count: 保留的日志文件数量
77
+ :param sensitive_fields: 敏感字段列表(会被过滤)
78
+ :param enable_async: 是否启用异步日志
79
+ :param buffer_size: 日志缓冲大小(仅异步模式有效)
80
+ :param sample_rate: 控制日志的采样率(0.0-1.0),使用消息内容的哈希值来决定是否记录,减少日志量,防止日志过于冗长
81
+ :param filters: 自定义日志过滤器列表
82
+ :param enable_metrics: 是否启用系统指标采集
83
+ :param metrics_interval: 指标采集间隔(秒)
84
+ :param message_limited: 简化日志内容,避免过长
85
+ """
86
+ self.name = name
87
+ self.logging_mode = logging_mode.lower()
88
+ self.log_level = log_level.upper()
89
+ self.log_file = log_file
90
+ self.log_format = log_format
91
+ self.max_log_size = max_log_size
92
+ self.backup_count = backup_count
93
+ self.sensitive_fields = sensitive_fields or []
94
+ self.enable_async = enable_async
95
+ self.buffer_size = buffer_size
96
+ self.sample_rate = max(0.0, min(1.0, sample_rate))
97
+ self.filters = filters or []
98
+ self.enable_metrics = enable_metrics and HAS_PSUTIL
99
+ self.metrics_interval = metrics_interval
100
+ self.message_limited = message_limited
101
+
102
+ # 上下文相关
103
+ self._context = threading.local()
104
+ self._context.data = {}
105
+
106
+ # 系统指标相关
107
+ self._last_metrics_time = 0
108
+ self._metrics_cache = {}
109
+
110
+ # 异步日志相关
111
+ self._log_queue = queue.Queue(maxsize=buffer_size)
112
+ self._async_thread = None
113
+ self._stop_event = threading.Event()
114
+
115
+ # 创建日志记录器
116
+ self.logger = logging.getLogger(name)
117
+ self._init_logging()
118
+
119
+ if self.enable_async:
120
+ self._start_async_logging()
121
+
122
+ def __enter__(self):
123
+ """上下文管理器入口"""
124
+ return self
125
+
126
+ def __exit__(self, exc_type, exc_val, exc_tb):
127
+ """上下文管理器退出"""
128
+ self.shutdown()
129
+ if exc_type is not None:
130
+ self.error(f"上下文内异常: {exc_val}",
131
+ extra={'类型': str(exc_type)})
132
+ return False
133
+
134
+ def context(self, **kwargs):
135
+ """返回一个上下文管理器,可以设置临时上下文变量"""
136
+ return self._ContextManager(self, kwargs)
137
+
138
+ class _ContextManager:
139
+ def __init__(self, logger, context_vars):
140
+ self.logger = logger
141
+ self.context_vars = context_vars
142
+ self.old_context = {}
143
+
144
+ def __enter__(self):
145
+ # 保存旧上下文并设置新上下文
146
+ self.old_context = getattr(self.logger._context, 'data', {}).copy()
147
+ self.logger._context.data.update(self.context_vars)
148
+ return self.logger
149
+
150
+ def __exit__(self, exc_type, exc_val, exc_tb):
151
+ # 恢复旧上下文
152
+ self.logger._context.data = self.old_context
153
+ if exc_type is not None:
154
+ self.logger.error(f"上下文内异常2: {exc_val}",
155
+ extra={'类型': str(exc_type)})
156
+ return False
157
+
158
+ def _init_logging(self):
159
+ """初始化日志配置"""
160
+ valid_levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
161
+ if self.log_level not in valid_levels:
162
+ self.log_level = 'INFO'
163
+
164
+ self.logger.setLevel(self.log_level)
165
+
166
+ # 防止重复添加handler
167
+ if self.logger.handlers:
168
+ for handler in self.logger.handlers[:]:
169
+ self.logger.removeHandler(handler)
170
+
171
+ # 定义日志格式
172
+ if self.log_format.lower() == 'simple':
173
+ # 简单文本格式
174
+ class SimpleFormatter(logging.Formatter):
175
+ def format(self, record):
176
+ # 基础日志信息
177
+ msg = super().format(record)
178
+
179
+ # 添加上下文信息
180
+ if hasattr(record, 'extra_data') and record.extra_data:
181
+ context_data = record.extra_data.get('context_data', {})
182
+ if context_data:
183
+ msg += f" | Context: {context_data}"
184
+
185
+ # 添加性能指标
186
+ metrics = record.extra_data.get('性能指标', {})
187
+ if metrics:
188
+ msg += f" | Metrics: {metrics}"
189
+
190
+ # 添加其他额外数据
191
+ extra = {k: v for k, v in record.extra_data.items()
192
+ if k not in ('context_data', '性能指标')}
193
+ if extra:
194
+ msg += f" | Extra: {extra}"
195
+
196
+ return msg
197
+
198
+ formatter = SimpleFormatter('%(asctime)s - %(levelname)s - %(message)s')
199
+ formatter.datefmt = '%Y-%m-%d %H:%M:%S'
200
+ else:
201
+ # 结构化JSON格式
202
+ class StructuredFormatter(logging.Formatter):
203
+ def format(self, record):
204
+ log_data = {
205
+ 'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
206
+ 'level': record.levelname,
207
+ 'message': record.getMessage(),
208
+ 'module': record.module,
209
+ 'function': record.funcName,
210
+ 'line': record.lineno,
211
+ 'thread': record.threadName,
212
+ 'process': record.processName,
213
+ }
214
+
215
+ # 添加额外字段
216
+ if hasattr(record, 'extra_data'):
217
+ log_data.update(record.extra_data)
218
+
219
+ # 添加上下文信息
220
+ if hasattr(record, 'context_data'):
221
+ log_data.update(record.context_data)
222
+
223
+ # 添加异常信息
224
+ if record.exc_info:
225
+ log_data['异常'] = self.formatException(record.exc_info)
226
+
227
+ # 过滤敏感信息
228
+ if hasattr(record, '过滤'):
229
+ for field in record.sensitive_fields:
230
+ if field in log_data:
231
+ log_data[field] = '***'
232
+ if isinstance(log_data.get('message'), str):
233
+ log_data['message'] = log_data['message'].replace(field, '***')
234
+
235
+ return json.dumps(log_data, ensure_ascii=False)
236
+
237
+ formatter = StructuredFormatter()
238
+
239
+ # 根据模式添加相应的handler
240
+ if self.logging_mode in ('both', 'console'):
241
+ console_handler = logging.StreamHandler()
242
+ console_handler.setFormatter(formatter)
243
+ self.logger.addHandler(console_handler)
244
+
245
+ if self.logging_mode in ('both', 'file'):
246
+ # 确保日志目录存在
247
+ log_dir = os.path.dirname(self.log_file)
248
+ if log_dir and not os.path.exists(log_dir):
249
+ os.makedirs(log_dir)
250
+
251
+ file_handler = logging.handlers.RotatingFileHandler(
252
+ filename=self.log_file,
253
+ maxBytes=self.max_log_size * 1024 * 1024,
254
+ backupCount=self.backup_count,
255
+ encoding='utf-8'
256
+ )
257
+ file_handler.setFormatter(formatter)
258
+ self.logger.addHandler(file_handler)
259
+
260
+ def _get_system_metrics(self) -> Dict[str, Any]:
261
+ """获取系统资源使用指标"""
262
+ if not self.enable_metrics:
263
+ return {}
264
+
265
+ try:
266
+ return {
267
+ '内存': {
268
+ '使用率': psutil.virtual_memory().percent,
269
+ '已使用': psutil.virtual_memory().used,
270
+ '可用': psutil.virtual_memory().available,
271
+ },
272
+ 'CPU': {
273
+ '使用率': psutil.cpu_percent(),
274
+ '核心数': psutil.cpu_count(),
275
+ },
276
+ '磁盘': {
277
+ '使用率': psutil.disk_usage('/').percent,
278
+ '已使用': psutil.disk_usage('/').used,
279
+ '剩余': psutil.disk_usage('/').free,
280
+ },
281
+ '网络': {
282
+ '发送字节数': psutil.net_io_counters().bytes_sent,
283
+ '接收字节数': psutil.net_io_counters().bytes_recv,
284
+ },
285
+ '进程': {
286
+ 'PID': os.getpid(),
287
+ '线程数': threading.active_count(),
288
+ }
289
+ }
290
+ except Exception as e:
291
+ self.logger.warning(f"无法采集系统性能指标: {e}",
292
+ extra={'extra_data': {'metrics_error': str(e)}})
293
+ return {}
294
+
295
+ def _apply_filters(self, level: str, message: str, extra: Dict) -> bool:
296
+ """应用自定义过滤器"""
297
+ for filter_func in self.filters:
298
+ try:
299
+ if not filter_func(level, message, extra):
300
+ return False # 如果过滤器返回 False,则丢弃该日志
301
+ except Exception as e:
302
+ self.logger.warning(f"过滤失败: {e}",
303
+ extra={'extra_data': {'filter_error': str(e)}})
304
+ return True # 所有过滤器都返回 True,则记录该日志
305
+
306
+ def _async_log_worker(self):
307
+ """异步日志工作线程"""
308
+ while not self._stop_event.is_set() or not self._log_queue.empty():
309
+ try:
310
+ log_args = self._log_queue.get(timeout=0.1)
311
+ if log_args:
312
+ level, message, extra = log_args
313
+ self._sync_log(level, message, extra)
314
+ except queue.Empty:
315
+ continue
316
+ except Exception as e:
317
+ # 防止日志线程崩溃
318
+ try:
319
+ self.logger.error(f"日志线程崩溃: {e}",
320
+ extra={'extra_data': {'async_error': str(e)}})
321
+ except:
322
+ pass
323
+
324
+ def _start_async_logging(self):
325
+ """启动异步日志线程"""
326
+ self._stop_event.clear()
327
+ self._async_thread = threading.Thread(
328
+ target=self._async_log_worker,
329
+ name=f"{self.name}_async_logger",
330
+ daemon=True
331
+ )
332
+ self._async_thread.start()
333
+
334
+ def _sync_log(self, level: str, message: str, extra: Optional[Dict] = None):
335
+ """同步日志记录"""
336
+ if not hasattr(self.logger, level.lower()):
337
+ return
338
+
339
+ # 简化日志内容,避免过长
340
+ if len(message) > self.message_limited:
341
+ message = message[:self.message_limited] + '...'
342
+
343
+ # 定期收集系统指标
344
+ if self.enable_metrics:
345
+ now = time.time()
346
+ if now - self._last_metrics_time > self.metrics_interval:
347
+ self._metrics_cache = self._get_system_metrics()
348
+ self._last_metrics_time = now
349
+
350
+ # 准备日志额外数据
351
+ log_extra = {}
352
+ if self.enable_metrics:
353
+ log_extra['性能指标'] = self._metrics_cache
354
+ if extra:
355
+ log_extra.update(extra)
356
+
357
+ # 添加上下文信息
358
+ if hasattr(self._context, 'data') and self._context.data:
359
+ log_extra['context_data'] = self._context.data.copy()
360
+
361
+ # 添加敏感字段过滤
362
+ log_extra['过滤'] = self.sensitive_fields
363
+
364
+ # 应用日志采样
365
+ if self.sample_rate < 1.0 and level.lower() in ('debug', 'info'):
366
+ if hash(message) % 100 >= self.sample_rate * 100:
367
+ return
368
+
369
+ # 应用过滤器
370
+ if not self._apply_filters(level, message, log_extra):
371
+ return
372
+
373
+ # 记录日志
374
+ getattr(self.logger, level.lower())(message, extra={'extra_data': log_extra})
375
+
376
+ def log(self, level: str, message: str, extra: Optional[Dict] = None):
377
+ """
378
+ 记录日志
379
+
380
+ :param level: 日志级别 ('debug', 'info', 'warning', 'error', 'critical')
381
+ :param message: 日志消息
382
+ :param extra: 额外数据字典
383
+ """
384
+ if not hasattr(self.logger, level.lower()):
385
+ return
386
+
387
+ if self.enable_async:
388
+ try:
389
+ self._log_queue.put((level, message, extra), timeout=0.1)
390
+ except queue.Full:
391
+ # 队列满时降级为同步日志
392
+ self._sync_log(level, f"[ASYNC QUEUE FULL] {message}", extra)
393
+ else:
394
+ self._sync_log(level, message, extra)
395
+
396
+ def set_level(self, level: str):
397
+ """动态设置日志级别"""
398
+ valid_levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
399
+ level = level.upper()
400
+ if level in valid_levels:
401
+ self.log_level = level
402
+ self.logger.setLevel(level)
403
+ for handler in self.logger.handlers:
404
+ handler.setLevel(level)
405
+
406
+ def add_filter(self, filter_func: Callable):
407
+ """添加日志过滤器"""
408
+ if callable(filter_func):
409
+ self.filters.append(filter_func)
410
+
411
+ def set_context(self, **kwargs):
412
+ """设置上下文变量"""
413
+ if not hasattr(self._context, 'data'):
414
+ self._context.data = {}
415
+ self._context.data.update(kwargs)
416
+
417
+ def get_context(self, key: str, default=None):
418
+ """获取上下文变量"""
419
+ if hasattr(self._context, 'data'):
420
+ return self._context.data.get(key, default)
421
+ return default
422
+
423
+ def clear_context(self):
424
+ """清除所有上下文变量"""
425
+ if hasattr(self._context, 'data'):
426
+ self._context.data.clear()
427
+
428
+ def shutdown(self):
429
+ """关闭日志记录器,确保所有日志被刷新"""
430
+ if self.enable_async:
431
+ self._stop_event.set()
432
+ if self._async_thread and self._async_thread.is_alive():
433
+ self._async_thread.join(timeout=5)
434
+
435
+ # 确保所有handler被刷新
436
+ for handler in self.logger.handlers:
437
+ handler.flush()
438
+ handler.close()
439
+
440
+ def debug(self, message: str, extra: Optional[Dict] = None):
441
+ """记录调试信息"""
442
+ self.log('debug', message, extra)
443
+
444
+ def info(self, message: str, extra: Optional[Dict] = None):
445
+ """记录一般信息"""
446
+ self.log('info', message, extra)
447
+
448
+ def warning(self, message: str, extra: Optional[Dict] = None):
449
+ """记录警告信息"""
450
+ self.log('warning', message, extra)
451
+
452
+ def error(self, message: str, extra: Optional[Dict] = None):
453
+ """记录错误信息"""
454
+ self.log('error', message, extra)
455
+
456
+ def critical(self, message: str, extra: Optional[Dict] = None):
457
+ """记录严重错误信息"""
458
+ self.log('critical', message, extra)
459
+
460
+ def exception(self, message: str, exc_info: Exception, extra: Optional[Dict] = None):
461
+ """记录异常信息"""
462
+ if not extra:
463
+ extra = {}
464
+ extra['异常'] = str(exc_info)
465
+ extra['类型'] = exc_info.__class__.__name__
466
+ self.log('error', message, extra)
467
+
468
+ def timeit(self, message: str = "Execution time"):
469
+ """返回一个计时器上下文管理器"""
470
+ return self._Timer(self, message)
471
+
472
+ class _Timer:
473
+ def __init__(self, logger, message):
474
+ self.logger = logger
475
+ self.message = message
476
+ self.start_time = None
477
+
478
+ def __enter__(self):
479
+ self.start_time = time.time()
480
+ return self
481
+
482
+ def __exit__(self, exc_type, exc_val, exc_tb):
483
+ elapsed = time.time() - self.start_time
484
+ self.logger.info(f"{self.message}: {elapsed:.3f}s",
485
+ extra={'elapsed_seconds': elapsed})
486
+ return False
487
+
488
+
489
+ def main():
490
+ # 创建日志记录器
491
+ logger = MyLogger(
492
+ name='my_app',
493
+ logging_mode='both',
494
+ log_level='DEBUG',
495
+ log_file='/Users/xigua/Downloads/my_app.log',
496
+ log_format='simple',
497
+ enable_async=True,
498
+ sample_rate=1, # 采样50%的DEBUG/INFO日志
499
+ sensitive_fields=['password', 'token'],
500
+ enable_metrics=True
501
+ )
502
+
503
+ # 添加自定义过滤器
504
+ def keyword_filter(level, message, extra):
505
+ blocked_keywords = ['secret', 'password']
506
+ return not any(keyword in message.lower() for keyword in blocked_keywords)
507
+
508
+ logger.add_filter(keyword_filter)
509
+
510
+ # 使用上下文管理器
511
+ with logger.context(request_id='12345', user='admin'):
512
+ logger.info("开始处理请求")
513
+
514
+ # 使用计时器
515
+ with logger.timeit("数据库查询"):
516
+ time.sleep(0.1) # 模拟耗时操作
517
+
518
+ logger.debug("调试信息", extra={'data': {'key': 'value'}})
519
+
520
+ try:
521
+ 1 / 0
522
+ except Exception as e:
523
+ logger.exception("发生错误", e)
524
+
525
+ # 确保所有日志被刷新
526
+ logger.shutdown()
527
+
528
+
529
+ if __name__ == '__main__':
530
+ pass
531
+ main()
mdbq/mysql/mysql.py CHANGED
@@ -12,11 +12,14 @@ import os
12
12
  import logging
13
13
  import logging.handlers
14
14
  from mdbq.other import otk
15
- from typing import Union, List, Dict, Optional, Any, Tuple
15
+ from typing import Union, List, Dict, Optional, Any, Tuple, Set
16
16
  from dbutils.pooled_db import PooledDB
17
17
  import json
18
18
  import psutil
19
19
  from collections import OrderedDict
20
+ import threading
21
+ import concurrent.futures
22
+ from collections import defaultdict
20
23
 
21
24
 
22
25
  warnings.filterwarnings('ignore')
@@ -2383,10 +2386,625 @@ class MySQLUploader:
2383
2386
 
2384
2387
  return decorator
2385
2388
 
2389
+
2390
+ class MySQLDeduplicator:
2391
+ """
2392
+ MySQL数据去重
2393
+
2394
+ 功能:
2395
+ 1. 自动检测并删除MySQL数据库中的重复数据
2396
+ 2. 支持全库扫描或指定表处理
2397
+ 3. 支持多线程/多进程安全处理
2398
+ 4. 完善的错误处理和日志记录
2399
+
2400
+ 使用示例:
2401
+ deduplicator = MySQLDeduplicator(
2402
+ username='root',
2403
+ password='password',
2404
+ host='localhost',
2405
+ port=3306
2406
+ )
2407
+
2408
+ # 全库去重
2409
+ deduplicator.deduplicate_all()
2410
+
2411
+ # 指定数据库去重(多线程)
2412
+ deduplicator.deduplicate_database('my_db', parallel=True)
2413
+
2414
+ # 指定表去重(使用特定列)
2415
+ deduplicator.deduplicate_table('my_db', 'my_table', columns=['name', 'date'])
2416
+
2417
+ # 关闭连接
2418
+ deduplicator.close()
2419
+ """
2420
+
2421
+ def __init__(
2422
+ self,
2423
+ username: str,
2424
+ password: str,
2425
+ host: str = 'localhost',
2426
+ port: int = 3306,
2427
+ charset: str = 'utf8mb4',
2428
+ max_workers: int = 1,
2429
+ batch_size: int = 1000,
2430
+ skip_system_dbs: bool = True,
2431
+ logging_mode: str = 'console',
2432
+ log_level: str = 'INFO',
2433
+ log_file: str = 'mysql_deduplicate.log',
2434
+ max_retries: int = 3,
2435
+ retry_interval: int = 5,
2436
+ pool_size: int = 5
2437
+ ):
2438
+ """
2439
+ 初始化去重处理器
2440
+
2441
+ :param username: 数据库用户名
2442
+ :param password: 数据库密码
2443
+ :param host: 数据库主机,默认为localhost
2444
+ :param port: 数据库端口,默认为3306
2445
+ :param charset: 字符集,默认为utf8mb4
2446
+ :param max_workers: 最大工作线程数,默认为1(单线程)
2447
+ :param batch_size: 批量处理大小,默认为1000
2448
+ :param skip_system_dbs: 是否跳过系统数据库,默认为True
2449
+ :param logging_mode: 日志模式('console', 'file', 'both', 'none')
2450
+ :param log_level: 日志级别('DEBUG', 'INFO', 'WARNING', 'ERROR')
2451
+ :param log_file: 日志文件路径
2452
+ :param max_retries: 最大重试次数
2453
+ :param retry_interval: 重试间隔(秒)
2454
+ :param pool_size: 连接池大小
2455
+ """
2456
+ # 初始化连接池
2457
+ self.pool = PooledDB(
2458
+ creator=pymysql,
2459
+ host=host,
2460
+ port=port,
2461
+ user=username,
2462
+ password=password,
2463
+ charset=charset,
2464
+ maxconnections=pool_size,
2465
+ cursorclass=pymysql.cursors.DictCursor
2466
+ )
2467
+
2468
+ # 配置参数
2469
+ self.max_workers = max(1, min(max_workers, 20)) # 限制最大线程数
2470
+ self.batch_size = batch_size
2471
+ self.skip_system_dbs = skip_system_dbs
2472
+ self.max_retries = max_retries
2473
+ self.retry_interval = retry_interval
2474
+
2475
+ # 线程安全控制
2476
+ self._lock = threading.Lock()
2477
+ self._processing_tables = set() # 正在处理的表集合
2478
+
2479
+ # 初始化日志
2480
+ self._init_logging(logging_mode, log_level, log_file)
2481
+
2482
+ # 系统数据库列表
2483
+ self.SYSTEM_DATABASES = {
2484
+ 'information_schema', 'mysql',
2485
+ 'performance_schema', 'sys'
2486
+ }
2487
+
2488
+ def _init_logging(
2489
+ self,
2490
+ logging_mode: str,
2491
+ log_level: str,
2492
+ log_file: str
2493
+ ):
2494
+ """初始化日志配置"""
2495
+ self.logger = logging.getLogger('mysql_deduplicator')
2496
+ self.logger.setLevel(log_level.upper())
2497
+
2498
+ # 防止重复添加handler
2499
+ if self.logger.handlers:
2500
+ for handler in self.logger.handlers[:]:
2501
+ self.logger.removeHandler(handler)
2502
+
2503
+ formatter = logging.Formatter(
2504
+ '%(asctime)s - %(levelname)s - %(message)s',
2505
+ datefmt='%Y-%m-%d %H:%M:%S'
2506
+ )
2507
+
2508
+ mode = logging_mode.lower()
2509
+ if mode in ('both', 'console'):
2510
+ console_handler = logging.StreamHandler()
2511
+ console_handler.setFormatter(formatter)
2512
+ self.logger.addHandler(console_handler)
2513
+
2514
+ if mode in ('both', 'file'):
2515
+ file_handler = logging.FileHandler(
2516
+ filename=log_file,
2517
+ encoding='utf-8'
2518
+ )
2519
+ file_handler.setFormatter(formatter)
2520
+ self.logger.addHandler(file_handler)
2521
+
2522
+ def _log(self, level: str, message: str, extra: Optional[Dict] = None):
2523
+ """统一的日志记录方法"""
2524
+ if not hasattr(self.logger, level.lower()):
2525
+ return
2526
+
2527
+ # 简化日志内容,避免过长
2528
+ if len(message) > 500:
2529
+ message = message[:500] + '...'
2530
+
2531
+ log_method = getattr(self.logger, level.lower())
2532
+ log_method(message, extra=extra)
2533
+
2534
+ def _get_connection(self):
2535
+ """从连接池获取连接"""
2536
+ try:
2537
+ conn = self.pool.connection()
2538
+ self._log('debug', "成功获取数据库连接")
2539
+ return conn
2540
+ except Exception as e:
2541
+ self._log('error', f"获取数据库连接失败: {str(e)}")
2542
+ raise ConnectionError(f"连接数据库失败: {str(e)}")
2543
+
2544
+ @staticmethod
2545
+ def _retry_on_failure(func):
2546
+ """重试装饰器"""
2547
+
2548
+ @wraps(func)
2549
+ def wrapper(self, *args, **kwargs):
2550
+ last_exception = None
2551
+ for attempt in range(self.max_retries + 1):
2552
+ try:
2553
+ return func(self, *args, **kwargs)
2554
+ except (pymysql.OperationalError, pymysql.InterfaceError) as e:
2555
+ last_exception = e
2556
+ if attempt < self.max_retries:
2557
+ wait_time = self.retry_interval * (attempt + 1)
2558
+ self._log('warning',
2559
+ f"数据库操作失败,准备重试 (尝试 {attempt + 1}/{self.max_retries})",
2560
+ {'error': str(e), 'wait_time': wait_time})
2561
+ time.sleep(wait_time)
2562
+ continue
2563
+ except Exception as e:
2564
+ last_exception = e
2565
+ self._log('error',
2566
+ f"操作失败: {str(e)}",
2567
+ {'error_type': type(e).__name__})
2568
+ break
2569
+
2570
+ if last_exception:
2571
+ raise last_exception
2572
+ raise Exception("未知错误")
2573
+
2574
+ return wrapper
2575
+
2576
+ @_retry_on_failure
2577
+ def _get_databases(self) -> List[str]:
2578
+ """获取所有非系统数据库列表"""
2579
+ sql = "SHOW DATABASES"
2580
+
2581
+ with self._get_connection() as conn:
2582
+ with conn.cursor() as cursor:
2583
+ cursor.execute(sql)
2584
+ all_dbs = [row['Database'] for row in cursor.fetchall()]
2585
+
2586
+ if self.skip_system_dbs:
2587
+ return [db for db in all_dbs if db.lower() not in self.SYSTEM_DATABASES]
2588
+ return all_dbs
2589
+
2590
+ @_retry_on_failure
2591
+ def _get_tables(self, database: str) -> List[str]:
2592
+ """获取指定数据库的所有表"""
2593
+ sql = "SHOW TABLES"
2594
+
2595
+ with self._get_connection() as conn:
2596
+ with conn.cursor() as cursor:
2597
+ cursor.execute(f"USE `{database}`")
2598
+ cursor.execute(sql)
2599
+ return [row[f'Tables_in_{database}'] for row in cursor.fetchall()]
2600
+
2601
+ @_retry_on_failure
2602
+ def _get_table_columns(self, database: str, table: str) -> List[str]:
2603
+ """获取表的列名(排除id列)"""
2604
+ sql = """
2605
+ SELECT COLUMN_NAME
2606
+ FROM INFORMATION_SCHEMA.COLUMNS
2607
+ WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
2608
+ ORDER BY ORDINAL_POSITION
2609
+ """
2610
+
2611
+ with self._get_connection() as conn:
2612
+ with conn.cursor() as cursor:
2613
+ cursor.execute(sql, (database, table))
2614
+ return [row['COLUMN_NAME'] for row in cursor.fetchall()
2615
+ if row['COLUMN_NAME'].lower() != 'id']
2616
+
2617
+ def _acquire_table_lock(self, database: str, table: str) -> bool:
2618
+ """获取表处理锁,防止并发处理同一张表"""
2619
+ key = f"{database}.{table}"
2620
+
2621
+ with self._lock:
2622
+ if key in self._processing_tables:
2623
+ self._log('debug', f"表 {key} 正在被其他线程处理,跳过")
2624
+ return False
2625
+ self._processing_tables.add(key)
2626
+ return True
2627
+
2628
+ def _release_table_lock(self, database: str, table: str):
2629
+ """释放表处理锁"""
2630
+ key = f"{database}.{table}"
2631
+
2632
+ with self._lock:
2633
+ if key in self._processing_tables:
2634
+ self._processing_tables.remove(key)
2635
+
2636
+ def _deduplicate_table(
2637
+ self,
2638
+ database: str,
2639
+ table: str,
2640
+ columns: Optional[List[str]] = None,
2641
+ dry_run: bool = False
2642
+ ) -> Tuple[int, int]:
2643
+ """
2644
+ 执行单表去重
2645
+
2646
+ :param database: 数据库名
2647
+ :param table: 表名
2648
+ :param columns: 用于去重的列(为None时使用所有列)
2649
+ :param dry_run: 是否模拟运行(只统计不实际删除)
2650
+ :return: (重复行数, 删除行数)
2651
+ """
2652
+ if not self._acquire_table_lock(database, table):
2653
+ return (0, 0)
2654
+
2655
+ try:
2656
+ self._log('info', f"开始处理表: {database}.{table}")
2657
+
2658
+ # 获取实际列名
2659
+ all_columns = self._get_table_columns(database, table)
2660
+ if not all_columns:
2661
+ self._log('warning', f"表 {database}.{table} 没有有效列(可能只有id列),跳过")
2662
+ return (0, 0)
2663
+
2664
+ # 使用指定列或所有列
2665
+ use_columns = columns or all_columns
2666
+ invalid_columns = set(use_columns) - set(all_columns)
2667
+
2668
+ if invalid_columns:
2669
+ self._log('warning',
2670
+ f"表 {database}.{table} 中不存在以下列: {invalid_columns},使用有效列",
2671
+ {'invalid_columns': invalid_columns}
2672
+ )
2673
+ use_columns = [col for col in use_columns if col in all_columns]
2674
+
2675
+ if not use_columns:
2676
+ self._log('error', f"表 {database}.{table} 没有有效的去重列")
2677
+ return (0, 0)
2678
+
2679
+ # 构建去重SQL
2680
+ column_list = ', '.join([f'`{col}`' for col in use_columns])
2681
+ temp_table = f"temp_{table}_{int(time.time())}"
2682
+
2683
+ # 使用临时表方案处理去重,避免锁表问题
2684
+ create_temp_sql = f"""
2685
+ CREATE TABLE `{database}`.`{temp_table}` AS
2686
+ SELECT MIN(`id`) as `min_id`, {column_list}, COUNT(*) as `dup_count`
2687
+ FROM `{database}`.`{table}`
2688
+ GROUP BY {column_list}
2689
+ HAVING COUNT(*) > 1
2690
+ """
2691
+
2692
+ delete_dup_sql = f"""
2693
+ DELETE FROM `{database}`.`{table}`
2694
+ WHERE `id` NOT IN (
2695
+ SELECT `min_id` FROM `{database}`.`{temp_table}`
2696
+ ) AND ({' OR '.join([f'`{col}` IS NOT NULL' for col in use_columns])})
2697
+ """
2698
+
2699
+ drop_temp_sql = f"DROP TABLE IF EXISTS `{database}`.`{temp_table}`"
2700
+
2701
+ with self._get_connection() as conn:
2702
+ with conn.cursor() as cursor:
2703
+ # 创建临时表统计重复数据
2704
+ cursor.execute(create_temp_sql)
2705
+ cursor.execute(f"SELECT COUNT(*) as cnt FROM `{database}`.`{temp_table}`")
2706
+ dup_count = cursor.fetchone()['cnt']
2707
+
2708
+ if dup_count == 0:
2709
+ self._log('info', f"表 {database}.{table} 没有重复数据")
2710
+ cursor.execute(drop_temp_sql)
2711
+ conn.commit()
2712
+ return (0, 0)
2713
+
2714
+ self._log('info',
2715
+ f"表 {database}.{table} 发现 {dup_count} 组重复数据",
2716
+ {'columns': use_columns}
2717
+ )
2718
+
2719
+ if not dry_run:
2720
+ # 执行实际删除
2721
+ cursor.execute(delete_dup_sql)
2722
+ affected_rows = cursor.rowcount
2723
+ conn.commit()
2724
+ self._log('info',
2725
+ f"表 {database}.{table} 已删除 {affected_rows} 行重复数据",
2726
+ {'columns': use_columns}
2727
+ )
2728
+ else:
2729
+ affected_rows = 0
2730
+ self._log('info',
2731
+ f"[模拟运行] 表 {database}.{table} 将删除 {dup_count} 组重复数据",
2732
+ {'columns': use_columns}
2733
+ )
2734
+
2735
+ # 清理临时表
2736
+ cursor.execute(drop_temp_sql)
2737
+ conn.commit()
2738
+
2739
+ return (dup_count, affected_rows)
2740
+
2741
+ except Exception as e:
2742
+ self._log('error',
2743
+ f"处理表 {database}.{table} 时出错: {str(e)}",
2744
+ {'error_type': type(e).__name__}
2745
+ )
2746
+ return (0, 0)
2747
+ finally:
2748
+ self._release_table_lock(database, table)
2749
+
2750
+ def deduplicate_table(
2751
+ self,
2752
+ database: str,
2753
+ table: str,
2754
+ columns: Optional[List[str]] = None,
2755
+ dry_run: bool = False
2756
+ ) -> Tuple[int, int]:
2757
+ """
2758
+ 对指定表进行去重
2759
+
2760
+ :param database: 数据库名
2761
+ :param table: 表名
2762
+ :param columns: 用于去重的列(为None时使用所有列)
2763
+ :param dry_run: 是否模拟运行(只统计不实际删除)
2764
+ :return: (重复行数, 删除行数)
2765
+ """
2766
+ try:
2767
+ # 检查表是否存在
2768
+ if not self._check_table_exists(database, table):
2769
+ self._log('warning', f"表 {database}.{table} 不存在,跳过")
2770
+ return (0, 0)
2771
+
2772
+ return self._deduplicate_table(database, table, columns, dry_run)
2773
+ except Exception as e:
2774
+ self._log('error',
2775
+ f"处理表 {database}.{table} 时发生全局错误: {str(e)}",
2776
+ {'error_type': type(e).__name__}
2777
+ )
2778
+ return (0, 0)
2779
+
2780
+ def deduplicate_database(
2781
+ self,
2782
+ database: str,
2783
+ tables: Optional[List[str]] = None,
2784
+ columns_map: Optional[Dict[str, List[str]]] = None,
2785
+ dry_run: bool = False,
2786
+ parallel: bool = False
2787
+ ) -> Dict[str, Tuple[int, int]]:
2788
+ """
2789
+ 对指定数据库的所有表进行去重
2790
+
2791
+ :param database: 数据库名
2792
+ :param tables: 要处理的表列表(为None时处理所有表)
2793
+ :param columns_map: 各表使用的去重列 {表名: [列名]}
2794
+ :param dry_run: 是否模拟运行
2795
+ :param parallel: 是否并行处理
2796
+ :return: 字典 {表名: (重复行数, 删除行数)}
2797
+ """
2798
+ results = {}
2799
+
2800
+ try:
2801
+ # 检查数据库是否存在
2802
+ if not self._check_database_exists(database):
2803
+ self._log('warning', f"数据库 {database} 不存在,跳过")
2804
+ return results
2805
+
2806
+ # 获取要处理的表
2807
+ target_tables = tables or self._get_tables(database)
2808
+ if not target_tables:
2809
+ self._log('info', f"数据库 {database} 中没有表,跳过")
2810
+ return results
2811
+
2812
+ self._log('info',
2813
+ f"开始处理数据库 {database} 中的 {len(target_tables)} 张表",
2814
+ {'tables': target_tables}
2815
+ )
2816
+
2817
+ if parallel and self.max_workers > 1:
2818
+ # 并行处理
2819
+ with concurrent.futures.ThreadPoolExecutor(
2820
+ max_workers=self.max_workers
2821
+ ) as executor:
2822
+ futures = {}
2823
+ for table in target_tables:
2824
+ columns = columns_map.get(table) if columns_map else None
2825
+ futures[executor.submit(
2826
+ self.deduplicate_table,
2827
+ database, table, columns, dry_run
2828
+ )] = table
2829
+
2830
+ for future in concurrent.futures.as_completed(futures):
2831
+ table = futures[future]
2832
+ try:
2833
+ dup_count, affected_rows = future.result()
2834
+ results[table] = (dup_count, affected_rows)
2835
+ except Exception as e:
2836
+ self._log('error',
2837
+ f"处理表 {database}.{table} 时出错: {str(e)}",
2838
+ {'error_type': type(e).__name__}
2839
+ )
2840
+ results[table] = (0, 0)
2841
+ else:
2842
+ # 串行处理
2843
+ for table in target_tables:
2844
+ columns = columns_map.get(table) if columns_map else None
2845
+ dup_count, affected_rows = self.deduplicate_table(
2846
+ database, table, columns, dry_run
2847
+ )
2848
+ results[table] = (dup_count, affected_rows)
2849
+
2850
+ # 统计结果
2851
+ total_dup = sum(r[0] for r in results.values())
2852
+ total_del = sum(r[1] for r in results.values())
2853
+
2854
+ self._log('info',
2855
+ f"数据库 {database} 处理完成 - 共发现 {total_dup} 组重复数据,删除 {total_del} 行",
2856
+ {'results': results}
2857
+ )
2858
+
2859
+ return results
2860
+
2861
+ except Exception as e:
2862
+ self._log('error',
2863
+ f"处理数据库 {database} 时发生全局错误: {str(e)}",
2864
+ {'error_type': type(e).__name__}
2865
+ )
2866
+ return results
2867
+
2868
+ def deduplicate_all(
2869
+ self,
2870
+ databases: Optional[List[str]] = None,
2871
+ tables_map: Optional[Dict[str, List[str]]] = None,
2872
+ columns_map: Optional[Dict[str, Dict[str, List[str]]]] = None,
2873
+ dry_run: bool = False,
2874
+ parallel: bool = False
2875
+ ) -> Dict[str, Dict[str, Tuple[int, int]]]:
2876
+ """
2877
+ 对所有数据库进行去重
2878
+
2879
+ :param databases: 要处理的数据库列表(为None时处理所有非系统数据库)
2880
+ :param tables_map: 各数据库要处理的表 {数据库名: [表名]}
2881
+ :param columns_map: 各表使用的去重列 {数据库名: {表名: [列名]}}
2882
+ :param dry_run: 是否模拟运行
2883
+ :param parallel: 是否并行处理
2884
+ :return: 嵌套字典 {数据库名: {表名: (重复行数, 删除行数)}}
2885
+ """
2886
+ all_results = defaultdict(dict)
2887
+
2888
+ try:
2889
+ # 获取要处理的数据库
2890
+ target_dbs = databases or self._get_databases()
2891
+ if not target_dbs:
2892
+ self._log('warning', "没有可处理的数据库")
2893
+ return all_results
2894
+
2895
+ self._log('info',
2896
+ f"开始处理 {len(target_dbs)} 个数据库",
2897
+ {'databases': target_dbs}
2898
+ )
2899
+
2900
+ if parallel and self.max_workers > 1:
2901
+ # 并行处理数据库
2902
+ with concurrent.futures.ThreadPoolExecutor(
2903
+ max_workers=self.max_workers
2904
+ ) as executor:
2905
+ futures = {}
2906
+ for db in target_dbs:
2907
+ tables = tables_map.get(db) if tables_map else None
2908
+ db_columns_map = columns_map.get(db) if columns_map else None
2909
+ futures[executor.submit(
2910
+ self.deduplicate_database,
2911
+ db, tables, db_columns_map, dry_run, False
2912
+ )] = db
2913
+
2914
+ for future in concurrent.futures.as_completed(futures):
2915
+ db = futures[future]
2916
+ try:
2917
+ db_results = future.result()
2918
+ all_results[db] = db_results
2919
+ except Exception as e:
2920
+ self._log('error',
2921
+ f"处理数据库 {db} 时出错: {str(e)}",
2922
+ {'error_type': type(e).__name__}
2923
+ )
2924
+ all_results[db] = {}
2925
+ else:
2926
+ # 串行处理数据库
2927
+ for db in target_dbs:
2928
+ tables = tables_map.get(db) if tables_map else None
2929
+ db_columns_map = columns_map.get(db) if columns_map else None
2930
+ db_results = self.deduplicate_database(
2931
+ db, tables, db_columns_map, dry_run, parallel
2932
+ )
2933
+ all_results[db] = db_results
2934
+
2935
+ # 统计总体结果
2936
+ total_dup = sum(
2937
+ r[0] for db in all_results.values()
2938
+ for r in db.values()
2939
+ )
2940
+ total_del = sum(
2941
+ r[1] for db in all_results.values()
2942
+ for r in db.values()
2943
+ )
2944
+
2945
+ self._log('info',
2946
+ f"所有数据库处理完成 - 共发现 {total_dup} 组重复数据,删除 {total_del} 行",
2947
+ {'total_results': all_results}
2948
+ )
2949
+
2950
+ return all_results
2951
+
2952
+ except Exception as e:
2953
+ self._log('error',
2954
+ f"全局处理时发生错误: {str(e)}",
2955
+ {'error_type': type(e).__name__}
2956
+ )
2957
+ return all_results
2958
+
2959
+ @_retry_on_failure
2960
+ def _check_database_exists(self, database: str) -> bool:
2961
+ """检查数据库是否存在"""
2962
+ sql = "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = %s"
2963
+
2964
+ with self._get_connection() as conn:
2965
+ with conn.cursor() as cursor:
2966
+ cursor.execute(sql, (database,))
2967
+ return bool(cursor.fetchone())
2968
+
2969
+ @_retry_on_failure
2970
+ def _check_table_exists(self, database: str, table: str) -> bool:
2971
+ """检查表是否存在"""
2972
+ sql = """
2973
+ SELECT TABLE_NAME
2974
+ FROM INFORMATION_SCHEMA.TABLES
2975
+ WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
2976
+ """
2977
+
2978
+ with self._get_connection() as conn:
2979
+ with conn.cursor() as cursor:
2980
+ cursor.execute(sql, (database, table))
2981
+ return bool(cursor.fetchone())
2982
+
2983
+ def close(self):
2984
+ """关闭连接池"""
2985
+ try:
2986
+ if hasattr(self, 'pool') and self.pool:
2987
+ self.pool.close()
2988
+ self._log('info', "数据库连接池已关闭")
2989
+ except Exception as e:
2990
+ self._log('error',
2991
+ f"关闭连接池时出错: {str(e)}",
2992
+ {'error_type': type(e).__name__}
2993
+ )
2994
+ finally:
2995
+ self.pool = None
2996
+
2997
+ def __enter__(self):
2998
+ return self
2999
+
3000
+ def __exit__(self, exc_type, exc_val, exc_tb):
3001
+ self.close()
3002
+
3003
+
2386
3004
  def main():
2387
3005
  uploader = MySQLUploader(
2388
3006
  username='root',
2389
- password='1',
3007
+ password='188988yang188',
2390
3008
  host='localhost',
2391
3009
  port=3306,
2392
3010
  logging_mode='console',
@@ -2417,7 +3035,7 @@ def main():
2417
3035
  data=data,
2418
3036
  set_typ=set_typ, # 定义列和数据类型
2419
3037
  primary_keys=[], # 创建唯一主键
2420
- check_duplicate=True, # 检查重复数据
3038
+ check_duplicate=False, # 检查重复数据
2421
3039
  duplicate_columns=[], # 指定排重的组合键
2422
3040
  allow_null=False, # 允许插入空值
2423
3041
  partition_by='year', # 按月分表
@@ -2429,5 +3047,27 @@ def main():
2429
3047
  uploader.close()
2430
3048
 
2431
3049
 
3050
+ def main2():
3051
+ deduplicator = MySQLDeduplicator(
3052
+ username='root',
3053
+ password='1',
3054
+ host='localhost',
3055
+ port=3306
3056
+ )
3057
+
3058
+ # # 全库去重(单线程)
3059
+ # deduplicator.deduplicate_all()
3060
+
3061
+ # # 指定数据库去重(多线程)
3062
+ # deduplicator.deduplicate_database('my_db', parallel=True)
3063
+
3064
+ # 指定表去重(使用特定列)
3065
+ deduplicator.deduplicate_table('my_db', 'my_table', columns=['name', 'date'])
3066
+
3067
+ # 关闭连接
3068
+ deduplicator.close()
3069
+
2432
3070
  if __name__ == '__main__':
2433
3071
  pass
3072
+
3073
+ main2()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.9.3
3
+ Version: 3.9.5
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,14 +1,15 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=Pw2FixsnE8Hf360X55h_tF8Xeez7UgHd2pSgUkJY-v4,17
2
+ mdbq/__version__.py,sha256=6a9QWBZBLdUXab5LGTsteurYczwlY3DSfMu9kjZZUlA,17
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
5
5
  mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
6
6
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
7
7
  mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
8
8
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
9
+ mdbq/log/mlogger.py,sha256=eM_0CYSWZsiwl6jHkm9F7S7pb_V54LdImrC3tOtxojg,19686
9
10
  mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
10
11
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
11
- mdbq/mysql/mysql.py,sha256=YX-tgugceODrJHcXgbosWFVThjXv3I2gCvTt_siKBOI,108606
12
+ mdbq/mysql/mysql.py,sha256=WFkv2flWuYRJqTzCOvxyiPeJgyJvVQzSG24jH_x5oEg,132390
12
13
  mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
13
14
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
14
15
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
@@ -22,7 +23,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
22
23
  mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
23
24
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
24
25
  mdbq/spider/aikucun.py,sha256=OhyEv1VyAKTOHjLDM37iNDQeRg5OnrNoKODoG2VxHes,19806
25
- mdbq-3.9.3.dist-info/METADATA,sha256=Vt2mII7wAfEhzQa9G8PreCPV_hkdM1DLTTDcUMyepPg,363
26
- mdbq-3.9.3.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
27
- mdbq-3.9.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
28
- mdbq-3.9.3.dist-info/RECORD,,
26
+ mdbq-3.9.5.dist-info/METADATA,sha256=uv8f_2V0nVGSA9GFa39R9Pdg1ZX2O8ACLQnZkjR9T7I,363
27
+ mdbq-3.9.5.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
28
+ mdbq-3.9.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
29
+ mdbq-3.9.5.dist-info/RECORD,,
File without changes