mdbq 3.11.1__py3-none-any.whl → 3.11.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.11.1'
1
+ VERSION = '3.11.3'
@@ -9,14 +9,11 @@ import pandas as pd
9
9
  import numpy as np
10
10
  from functools import wraps
11
11
  import platform
12
- import json
13
12
  import os
14
13
  import time
15
14
  import calendar
16
15
  import concurrent.futures
17
- import traceback
18
16
  import logging
19
- import sys
20
17
 
21
18
  """
22
19
 
mdbq/log/mylogger.py CHANGED
@@ -7,7 +7,7 @@ import sys
7
7
  import time
8
8
  import threading
9
9
  import queue
10
- from typing import Optional, Dict, Any, List, Callable, Union
10
+ from typing import Optional, Dict, Any, List, Callable
11
11
  import atexit
12
12
  import traceback
13
13
  import inspect
@@ -18,7 +18,7 @@ from datetime import datetime
18
18
  warnings.filterwarnings('ignore')
19
19
  logger = mylogger.MyLogger(
20
20
  name='deduplicator',
21
- logging_mode='both',
21
+ logging_mode='file',
22
22
  log_level='info',
23
23
  log_file='deduplicator.log',
24
24
  log_format='json',
mdbq/mysql/mysql.py CHANGED
@@ -7,10 +7,9 @@ import warnings
7
7
  import pymysql
8
8
  import pandas as pd
9
9
  from sqlalchemy import create_engine
10
- import os
11
10
  from mdbq.other import otk
12
11
  from mdbq.log import mylogger
13
- import json
12
+ import math
14
13
 
15
14
  warnings.filterwarnings('ignore')
16
15
  """
@@ -131,7 +130,7 @@ class MysqlUpload:
131
130
  new_dict_data: dict = {}
132
131
  for k, v in dict_data.items():
133
132
  k = str(k).lower()
134
- k = re.sub(r'[()\-,,$&~^、 ()\"\'“”=·/。》《><!!`]', '_', k, re.IGNORECASE)
133
+ k = re.sub(r'[()\-,,$&~^、 ()\"\'"="·/。》《><!!`]', '_', k, re.IGNORECASE)
135
134
  k = k.replace(')', '')
136
135
  k = re.sub(r'_{2,}', '_', k)
137
136
  k = re.sub(r'_+$', '', k)
@@ -526,7 +525,7 @@ class MysqlUpload:
526
525
  new_dict_data = {}
527
526
  for k, v in dict_data.items():
528
527
  k = str(k).lower()
529
- k = re.sub(r'[()\-,,$&~^、 ()\"\'“”=·/。》《><!!`]', '_', k, re.IGNORECASE)
528
+ k = re.sub(r'[()\-,,$&~^、 ()\"\'"="·/。》《><!!`]', '_', k, re.IGNORECASE)
530
529
  k = k.replace(')', '')
531
530
  k = re.sub(r'_{2,}', '_', k)
532
531
  k = re.sub(r'_+$', '', k)
mdbq/mysql/uploader.py CHANGED
@@ -10,8 +10,8 @@ from mdbq.log import mylogger
10
10
  from typing import Union, List, Dict, Optional, Any, Tuple, Set
11
11
  from dbutils.pooled_db import PooledDB
12
12
  import json
13
- from collections import OrderedDict
14
13
  import sys
14
+ from decimal import Decimal, InvalidOperation
15
15
 
16
16
  warnings.filterwarnings('ignore')
17
17
  logger = mylogger.MyLogger(
@@ -28,62 +28,44 @@ logger = mylogger.MyLogger(
28
28
  )
29
29
 
30
30
 
31
- def count_decimal_places(num_str):
31
+ def count_decimal_places(num_str: str) -> Tuple[int, int]:
32
32
  """
33
- 计算数字字符串的小数位数,支持科学计数法
34
-
35
- :param num_str: 数字字符串
36
- :return: 返回元组(整数位数, 小数位数)
37
- :raises: 无显式抛出异常,但正则匹配失败时返回(0, 0)
38
- """
39
- match = re.match(r'^[-+]?\d+(\.\d+)?([eE][-+]?\d+)?$', str(num_str))
40
- if match:
41
- # 如果是科学计数法
42
- match = re.findall(r'(\d+)\.(\d+)[eE][-+]?(\d+)$', str(num_str))
43
- if match:
44
- if len(match[0]) == 3:
45
- if int(match[0][2]) < len(match[0][1]):
46
- # count_int 清除整数部分开头的 0 并计算整数位数
47
- count_int = len(re.sub('^0+', '', str(match[0][0]))) + int(match[0][2])
48
- # 计算小数位数
49
- count_float = len(match[0][1]) - int(match[0][2])
50
- return count_int, count_float
51
- # 如果是普通小数
52
- match = re.findall(r'(\d+)\.(\d+)$', str(num_str))
53
- if match:
54
- count_int = len(re.sub('^0+', '', str(match[0][0])))
55
- count_float = len(match[0][1])
56
- return count_int, count_float # 计算小数位数
57
- return 0, 0
58
-
59
-
60
- class StatementCache(OrderedDict):
61
- """
62
- 基于OrderedDict实现的LRU缓存策略,用于缓存SQL语句
63
-
64
- 这个类继承自OrderedDict,实现了最近最少使用(LRU)的缓存策略。
65
- 当缓存达到最大容量时,会自动删除最早添加的项。
33
+ 统计小数点前后位数,支持科学计数法。
34
+ 返回:(整数位数, 小数位数)
66
35
  """
36
+ try:
37
+ d = Decimal(str(num_str))
38
+ sign, digits, exponent = d.as_tuple()
39
+ int_part = len(digits) + exponent if exponent < 0 else len(digits)
40
+ dec_part = -exponent if exponent < 0 else 0
41
+ return max(int_part, 0), max(dec_part, 0)
42
+ except (InvalidOperation, ValueError, TypeError):
43
+ return (0, 0)
44
+
45
+
46
+ class StatementCache(dict):
47
+ """简单LRU缓存实现,用于SQL语句缓存"""
67
48
  def __init__(self, maxsize=100):
68
- """
69
- 初始化缓存
70
-
71
- :param maxsize: 最大缓存大小,默认为100条SQL语句
72
- """
73
49
  super().__init__()
74
- self.maxsize = maxsize
75
-
50
+ self._maxsize = maxsize
51
+ self._order = []
52
+ def __getitem__(self, key):
53
+ value = super().__getitem__(key)
54
+ self._order.remove(key)
55
+ self._order.append(key)
56
+ return value
76
57
  def __setitem__(self, key, value):
77
- """
78
- 重写设置项方法,实现LRU策略
79
-
80
- :param key: 缓存键
81
- :param value: 缓存值
82
- """
58
+ if key in self:
59
+ self._order.remove(key)
60
+ elif len(self._order) >= self._maxsize:
61
+ oldest = self._order.pop(0)
62
+ super().__delitem__(oldest)
83
63
  super().__setitem__(key, value)
84
- if len(self) > self.maxsize:
85
- self.popitem(last=False)
86
-
64
+ self._order.append(key)
65
+ def get(self, key, default=None):
66
+ if key in self:
67
+ return self[key]
68
+ return default
87
69
 
88
70
  class MySQLUploader:
89
71
  """
@@ -194,22 +176,22 @@ class MySQLUploader:
194
176
  logger.error('连接池创建失败', {'error': str(e), 'host': self.host, 'port': self.port})
195
177
  raise ConnectionError(f'连接池创建失败: {str(e)}')
196
178
 
197
- def _execute_with_retry(self, func):
179
+ @staticmethod
180
+ def _execute_with_retry(func):
198
181
  """
199
182
  带重试机制的装饰器,用于数据库操作
200
-
201
183
  :param func: 被装饰的函数
202
184
  :return: 装饰后的函数
203
185
  :raises: 可能抛出原始异常或最后一次重试的异常
204
186
  """
205
187
  @wraps(func)
206
- def wrapper(*args, **kwargs):
188
+ def wrapper(self, *args, **kwargs):
207
189
  last_exception = None
208
190
  operation = func.__name__
209
191
  logger.debug(f'开始执行操作: {operation}', {'max_retries': self.max_retries})
210
192
  for attempt in range(self.max_retries):
211
193
  try:
212
- result = func(*args, **kwargs)
194
+ result = func(self, *args, **kwargs)
213
195
  if attempt > 0:
214
196
  logger.info('操作成功(重试后)', {'operation': operation, 'attempts': attempt + 1})
215
197
  else:
@@ -255,6 +237,7 @@ class MySQLUploader:
255
237
  raise last_exception if last_exception else Exception('发生未知错误')
256
238
  return wrapper
257
239
 
240
+ @_execute_with_retry
258
241
  def _get_connection(self) -> pymysql.connections.Connection:
259
242
  """
260
243
  从连接池获取数据库连接
@@ -270,6 +253,7 @@ class MySQLUploader:
270
253
  logger.error('获取数据库连接失败', {'error': str(e)})
271
254
  raise ConnectionError(f'连接数据库失败: {str(e)}')
272
255
 
256
+ @_execute_with_retry
273
257
  def _check_database_exists(self, db_name: str) -> bool:
274
258
  """
275
259
  检查数据库是否存在
@@ -280,6 +264,7 @@ class MySQLUploader:
280
264
  """
281
265
  db_name = self._validate_identifier(db_name)
282
266
  sql = 'SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = %s'
267
+ conn = None
283
268
  try:
284
269
  with self._get_connection() as conn:
285
270
  with conn.cursor() as cursor:
@@ -291,6 +276,7 @@ class MySQLUploader:
291
276
  logger.error('检查数据库是否存在时出错', {'库': db_name, '错误': str(e)})
292
277
  raise
293
278
 
279
+ @_execute_with_retry
294
280
  def _create_database(self, db_name: str) -> None:
295
281
  """
296
282
  创建数据库
@@ -300,6 +286,7 @@ class MySQLUploader:
300
286
  """
301
287
  db_name = self._validate_identifier(db_name)
302
288
  sql = f'CREATE DATABASE IF NOT EXISTS `{db_name}` CHARACTER SET {self.charset} COLLATE {self.collation}'
289
+ conn = None
303
290
  try:
304
291
  with self._get_connection() as conn:
305
292
  with conn.cursor() as cursor:
@@ -308,7 +295,8 @@ class MySQLUploader:
308
295
  logger.info('数据库已创建', {'库': db_name})
309
296
  except Exception as e:
310
297
  logger.error('无法创建数据库', {'库': db_name, '错误': str(e)})
311
- conn.rollback()
298
+ if conn is not None:
299
+ conn.rollback()
312
300
  raise
313
301
 
314
302
  def _get_partition_table_name(self, table_name: str, date_value: str, partition_by: str) -> str:
@@ -345,10 +333,8 @@ class MySQLUploader:
345
333
  if not identifier or not isinstance(identifier, str):
346
334
  logger.error('无效的标识符', {'标识符': identifier})
347
335
  raise ValueError(f"无效的标识符: `{identifier}`")
348
- if not self.case_sensitive:
349
- cleaned = re.sub(r'[^\w\u4e00-\u9fff$]', '_', identifier)
350
- else:
351
- cleaned = identifier
336
+ # 始终做特殊字符清理
337
+ cleaned = re.sub(r'[^-\uFFFF\w\u4e00-\u9fff$]', '_', identifier)
352
338
  cleaned = re.sub(r'_+', '_', cleaned).strip('_')
353
339
  if not cleaned:
354
340
  logger.error('无法清理异常标识符', {'原始标识符': identifier})
@@ -362,6 +348,7 @@ class MySQLUploader:
362
348
  return f"`{cleaned}`"
363
349
  return cleaned
364
350
 
351
+ @_execute_with_retry
365
352
  def _check_table_exists(self, db_name: str, table_name: str) -> bool:
366
353
  """
367
354
  检查表是否存在
@@ -396,6 +383,7 @@ class MySQLUploader:
396
383
  logger.debug('表存在检查', {'库': db_name, '表': table_name, '存在': result})
397
384
  return result
398
385
 
386
+ @_execute_with_retry
399
387
  def _create_table(
400
388
  self,
401
389
  db_name: str,
@@ -407,16 +395,7 @@ class MySQLUploader:
407
395
  allow_null: bool = False
408
396
  ) -> None:
409
397
  """
410
- 创建数据表
411
-
412
- :param db_name: 数据库名
413
- :param table_name: 表名
414
- :param set_typ: 列名和数据类型字典 {列名: 数据类型}
415
- :param primary_keys: 主键列列表,可选
416
- :param date_column: 日期列名,可选,如果存在将设置为索引
417
- :param indexes: 需要创建索引的列列表,可选
418
- :param allow_null: 是否允许空值,默认为False
419
- :raises: 可能抛出数据库相关异常
398
+ 创建数据表,优化索引创建方式
420
399
  """
421
400
  db_name = self._validate_identifier(db_name)
422
401
  table_name = self._validate_identifier(table_name)
@@ -439,40 +418,35 @@ class MySQLUploader:
439
418
  primary_keys = ['id']
440
419
  safe_primary_keys = [self._validate_identifier(pk) for pk in primary_keys]
441
420
  primary_key_sql = f", PRIMARY KEY (`{'`,`'.join(safe_primary_keys)}`)"
421
+ # 索引统一在CREATE TABLE中定义
422
+ index_defs = []
423
+ if date_column and date_column in set_typ:
424
+ safe_date_col = self._validate_identifier(date_column)
425
+ index_defs.append(f"INDEX `idx_{safe_date_col}` (`{safe_date_col}`)")
426
+ if indexes:
427
+ for idx_col in indexes:
428
+ if idx_col in set_typ:
429
+ safe_idx_col = self._validate_identifier(idx_col)
430
+ index_defs.append(f"INDEX `idx_{safe_idx_col}` (`{safe_idx_col}`)")
431
+ index_sql = (',' + ','.join(index_defs)) if index_defs else ''
442
432
  sql = f"""
443
433
  CREATE TABLE IF NOT EXISTS `{db_name}`.`{table_name}` (
444
434
  {','.join(column_defs)}
445
435
  {primary_key_sql}
436
+ {index_sql}
446
437
  ) ENGINE=InnoDB DEFAULT CHARSET={self.charset} COLLATE={self.collation}
447
438
  """
439
+ conn = None
448
440
  try:
449
441
  with self._get_connection() as conn:
450
442
  with conn.cursor() as cursor:
451
443
  cursor.execute(sql)
452
- logger.info('数据表已创建', {'库': db_name, '表': table_name})
453
- index_statements = []
454
- if date_column and date_column in set_typ:
455
- safe_date_col = self._validate_identifier(date_column)
456
- index_statements.append(
457
- f"ALTER TABLE `{db_name}`.`{table_name}` ADD INDEX `idx_{safe_date_col}` (`{safe_date_col}`)"
458
- )
459
- if indexes:
460
- for idx_col in indexes:
461
- if idx_col in set_typ:
462
- safe_idx_col = self._validate_identifier(idx_col)
463
- index_statements.append(
464
- f"ALTER TABLE `{db_name}`.`{table_name}` ADD INDEX `idx_{safe_idx_col}` (`{safe_idx_col}`)"
465
- )
466
- if index_statements:
467
- with conn.cursor() as cursor:
468
- for stmt in index_statements:
469
- cursor.execute(stmt)
470
- logger.debug('执行索引语句', {'SQL': stmt})
471
444
  conn.commit()
472
- logger.info('索引已添加', {'库': db_name, '表': table_name, '索引': indexes})
445
+ logger.info('数据表及索引已创建', {'库': db_name, '表': table_name, '索引': indexes})
473
446
  except Exception as e:
474
447
  logger.error('建表失败', {'库': db_name, '表': table_name, '错误': str(e)})
475
- conn.rollback()
448
+ if conn is not None:
449
+ conn.rollback()
476
450
  raise
477
451
 
478
452
  def _validate_datetime(self, value: str, date_type: bool = False) -> Any:
@@ -511,19 +485,24 @@ class MySQLUploader:
511
485
  logger.error('无效的日期格式', {'值': value})
512
486
  raise ValueError(f"无效的日期格式: `{value}`")
513
487
 
514
- def _validate_value(self, value: Any, column_type: str, allow_null: bool) -> Any:
488
+ def _validate_value(self, value: Any, column_type: str, allow_null: bool, db_name: str = None, table_name: str = None, col_name: str = None) -> Any:
515
489
  """
516
490
  根据列类型验证并转换数据值
517
491
 
518
492
  :param value: 要验证的值
519
493
  :param column_type: 列的数据类型
520
494
  :param allow_null: 是否允许空值
495
+ :param db_name: 数据库名(用于日志)
496
+ :param table_name: 表名(用于日志)
497
+ :param col_name: 列名(用于日志)
521
498
  :return: 转换后的值
522
499
  :raises ValueError: 当值转换失败时抛出
523
500
  """
524
501
  if value is None:
525
502
  if not allow_null:
526
- logger.warning('字段值为None但不允许空值', {'字段类型': column_type})
503
+ logger.warning('字段值为None但不允许空值, 已填充为none', {
504
+ '库': db_name, '表': table_name, '列': col_name, '字段类型': column_type
505
+ })
527
506
  return 'none'
528
507
  return None
529
508
  try:
@@ -536,14 +515,18 @@ class MySQLUploader:
536
515
  logger.debug('百分比字符串转小数', {'原始': value, '结果': decimal_value})
537
516
  return decimal_value
538
517
  except ValueError:
539
- logger.warning('百分比字符串转小数失败', {'原始': value})
518
+ logger.warning('百分比字符串转小数失败', {
519
+ '库': db_name, '表': table_name, '列': col_name, '原始': value
520
+ })
540
521
  elif 'int' in column_type_lower:
541
522
  if isinstance(value, str):
542
523
  value = value.replace(',', '').strip()
543
524
  try:
544
525
  return int(float(value))
545
526
  except ValueError:
546
- logger.error('字符串转整数失败', {'值': value})
527
+ logger.error('字符串转整数失败', {
528
+ '库': db_name, '表': table_name, '列': col_name, '值': value
529
+ })
547
530
  raise ValueError(f"`{value}` -> 无法转为整数")
548
531
  return int(value) if value is not None else None
549
532
  elif any(t in column_type_lower for t in ['float', 'double', 'decimal']):
@@ -557,7 +540,9 @@ class MySQLUploader:
557
540
  try:
558
541
  return self._validate_datetime(value)
559
542
  except ValueError as e:
560
- logger.error('无效日期格式', {'值': value, '错误': str(e)})
543
+ logger.error('无效日期格式', {
544
+ '库': db_name, '表': table_name, '列': col_name, '值': value, '错误': str(e)
545
+ })
561
546
  raise ValueError(f"无效日期格式: `{value}` -> {str(e)}")
562
547
  return str(value)
563
548
  elif 'char' in column_type_lower or 'text' in column_type_lower:
@@ -569,9 +554,12 @@ class MySQLUploader:
569
554
  else:
570
555
  return value
571
556
  except (ValueError, TypeError) as e:
572
- logger.error('数据类型转换异常', {'值': value, '目标类型': column_type, '错误': str(e)})
557
+ logger.error('数据类型转换异常', {
558
+ '库': db_name, '表': table_name, '列': col_name, '值': value, '目标类型': column_type, '错误': str(e)
559
+ })
573
560
  raise ValueError(f"转换异常 -> 无法将 `{value}` 的数据类型转为: `{column_type}` -> {str(e)}")
574
561
 
562
+ @_execute_with_retry
575
563
  def _get_table_columns(self, db_name: str, table_name: str) -> Dict[str, str]:
576
564
  """
577
565
  获取表的列名和数据类型
@@ -593,7 +581,10 @@ class MySQLUploader:
593
581
  with self._get_connection() as conn:
594
582
  with conn.cursor() as cursor:
595
583
  cursor.execute(sql, (db_name, table_name))
596
- set_typ = {row['COLUMN_NAME'].lower(): row['DATA_TYPE'] for row in cursor.fetchall()}
584
+ if self.case_sensitive:
585
+ set_typ = {row['COLUMN_NAME']: row['DATA_TYPE'] for row in cursor.fetchall()}
586
+ else:
587
+ set_typ = {row['COLUMN_NAME'].lower(): row['DATA_TYPE'] for row in cursor.fetchall()}
597
588
  logger.debug('获取表的列信息', {'库': db_name, '表': table_name, '列信息': set_typ})
598
589
  return set_typ
599
590
  except Exception as e:
@@ -727,39 +718,38 @@ class MySQLUploader:
727
718
  """
728
719
  1. pandas:规范化列名
729
720
  2. 字典列表:规范化每个字典的键
730
-
731
- 参数:
732
- data: 输入数据,支持两种类型:
733
- - pandas.DataFrame:将规范化其列名
734
- - List[Dict[str, Any]]:将规范化列表中每个字典的键
735
721
  """
736
722
  if isinstance(data, pd.DataFrame):
737
- data.columns = [self._validate_identifier(col) for col in data.columns]
723
+ if self.case_sensitive:
724
+ data.columns = [self._validate_identifier(col) for col in data.columns]
725
+ else:
726
+ data.columns = [self._validate_identifier(col).lower() for col in data.columns]
738
727
  return data
739
728
  elif isinstance(data, list):
740
- return [{self._validate_identifier(k): v for k, v in item.items()} for item in data]
729
+ if self.case_sensitive:
730
+ return [{self._validate_identifier(k): v for k, v in item.items()} for item in data]
731
+ else:
732
+ return [{self._validate_identifier(k).lower(): v for k, v in item.items()} for item in data]
741
733
  return data
742
734
 
743
735
  def _prepare_data(
744
736
  self,
745
737
  data: Union[Dict, List[Dict], pd.DataFrame],
746
738
  set_typ: Dict[str, str],
747
- allow_null: bool = False
739
+ allow_null: bool = False,
740
+ db_name: str = None,
741
+ table_name: str = None,
748
742
  ) -> Tuple[List[Dict], Dict[str, str]]:
749
743
  """
750
744
  准备要上传的数据,验证并转换数据类型
751
-
752
- :param data: 输入数据,可以是字典、字典列表或DataFrame
753
- :param set_typ: 列名和数据类型字典 {列名: 数据类型}
754
- :param allow_null: 是否允许空值
755
- :return: 元组(准备好的数据列表, 过滤后的列类型字典)
756
- :raises ValueError: 当数据验证失败时抛出
757
745
  """
758
746
  # 统一数据格式为字典列表
759
747
  if isinstance(data, pd.DataFrame):
760
748
  try:
761
- # 将列名转为小写
762
- data.columns = [col.lower() for col in data.columns]
749
+ if self.case_sensitive:
750
+ data.columns = [col for col in data.columns]
751
+ else:
752
+ data.columns = [col.lower() for col in data.columns]
763
753
  data = data.replace({pd.NA: None}).to_dict('records')
764
754
  except Exception as e:
765
755
  logger.error('数据转字典时发生错误', {
@@ -769,10 +759,15 @@ class MySQLUploader:
769
759
  })
770
760
  raise ValueError(f"数据转字典时发生错误: {e}")
771
761
  elif isinstance(data, dict):
772
- data = [{k.lower(): v for k, v in data.items()}]
762
+ if self.case_sensitive:
763
+ data = [{k: v for k, v in data.items()}]
764
+ else:
765
+ data = [{k.lower(): v for k, v in data.items()}]
773
766
  elif isinstance(data, list) and all(isinstance(item, dict) for item in data):
774
- # 将列表中的每个字典键转为小写
775
- data = [{k.lower(): v for k, v in item.items()} for item in data]
767
+ if self.case_sensitive:
768
+ data = [{k: v for k, v in item.items()} for item in data]
769
+ else:
770
+ data = [{k.lower(): v for k, v in item.items()} for item in data]
776
771
  else:
777
772
  logger.error('数据结构必须是字典、列表、字典列表或dataframe', {
778
773
  'data': self._shorten_for_log(data),
@@ -783,8 +778,11 @@ class MySQLUploader:
783
778
  # 统一处理原始数据中列名的特殊字符
784
779
  data = self.normalize_column_names(data)
785
780
 
786
- # set_typ的键转为小写
787
- set_typ = {k.lower(): v for k, v in set_typ.items()}
781
+ # set_typ的键处理
782
+ if self.case_sensitive:
783
+ set_typ = {k: v for k, v in set_typ.items()}
784
+ else:
785
+ set_typ = {k.lower(): v for k, v in set_typ.items()}
788
786
 
789
787
  # 获取数据中实际存在的列名
790
788
  data_columns = set()
@@ -797,25 +795,25 @@ class MySQLUploader:
797
795
  if col in set_typ:
798
796
  filtered_set_typ[col] = set_typ[col]
799
797
  else:
800
- # 如果列不在set_typ中,尝试推断类型
801
- sample_values = [row[col] for row in data if col in row and row[col] is not None][:10]
802
- if sample_values:
803
- inferred_type = self._infer_data_type(sample_values[0])
804
- filtered_set_typ[col] = inferred_type
805
- logger.debug(f"自动推断列 `{col}` 的数据类型为: `{inferred_type}`")
806
- else:
807
- # 没有样本值,使用默认类型
808
- filtered_set_typ[col] = 'VARCHAR(255)'
809
- logger.debug(f"列 `{col}` 使用默认数据类型: VARCHAR(255)")
798
+ # 如果列不在set_typ中,采样多个非None值推断类型
799
+ sample_values = [row[col] for row in data if col in row and row[col] is not None][:5]
800
+ inferred_type = None
801
+ for val in sample_values:
802
+ inferred_type = self._infer_data_type(val)
803
+ if inferred_type:
804
+ break
805
+ if not inferred_type:
806
+ inferred_type = 'VARCHAR(255)'
807
+ filtered_set_typ[col] = inferred_type
808
+ logger.debug(f"自动推断列 `{col}` 的数据类型为: `{inferred_type}`")
810
809
 
811
810
  prepared_data = []
812
811
  for row_idx, row in enumerate(data, 1):
813
812
  prepared_row = {}
814
813
  for col_name in filtered_set_typ:
815
814
  # 跳过id列,不允许外部传入id
816
- if col_name.lower() == 'id':
815
+ if (self.case_sensitive and col_name == 'id') or (not self.case_sensitive and col_name.lower() == 'id'):
817
816
  continue
818
-
819
817
  if col_name not in row:
820
818
  if not allow_null:
821
819
  error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`"
@@ -824,7 +822,7 @@ class MySQLUploader:
824
822
  prepared_row[col_name] = None
825
823
  else:
826
824
  try:
827
- prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null)
825
+ prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
828
826
  except ValueError as e:
829
827
  logger.error('数据验证失败', {
830
828
  '列': col_name,
@@ -856,7 +854,7 @@ class MySQLUploader:
856
854
  transaction_mode: str = "batch"
857
855
  ):
858
856
  """
859
- 上传数据到数据库的主入口方法
857
+ 上传数据到数据库的主入口方法,分表逻辑异常处理统计丢弃数据
860
858
 
861
859
  :param db_name: 数据库名
862
860
  :param table_name: 表名
@@ -882,6 +880,7 @@ class MySQLUploader:
882
880
 
883
881
  batch_id = f"batch_{int(time.time() * 1000)}"
884
882
  success_flag = False
883
+ dropped_rows = 0
885
884
 
886
885
  logger.info("开始上传", {
887
886
  '库': db_name,
@@ -918,7 +917,7 @@ class MySQLUploader:
918
917
  raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
919
918
 
920
919
  # 准备数据
921
- prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null)
920
+ prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null, db_name, table_name)
922
921
 
923
922
  # 检查数据库是否存在
924
923
  if not self._check_database_exists(db_name):
@@ -945,8 +944,8 @@ class MySQLUploader:
945
944
  'row': self._shorten_for_log(row),
946
945
  'func': sys._getframe().f_code.co_name,
947
946
  })
948
- continue # 跳过当前行
949
-
947
+ dropped_rows += 1
948
+ continue
950
949
  part_table = self._get_partition_table_name(
951
950
  table_name,
952
951
  str(row[partition_date_column]),
@@ -963,7 +962,8 @@ class MySQLUploader:
963
962
  'error': str(e),
964
963
  'func': sys._getframe().f_code.co_name,
965
964
  })
966
- continue # 跳过当前行
965
+ dropped_rows += 1
966
+ continue
967
967
 
968
968
  # 对每个分表执行上传
969
969
  for part_table, part_data in partitioned_data.items():
@@ -1010,10 +1010,11 @@ class MySQLUploader:
1010
1010
  '表': table_name,
1011
1011
  '批次': batch_id,
1012
1012
  'finish': success_flag,
1013
- # '耗时': round(time.time() - upload_start, 2),
1014
- '数据行': initial_row_count
1013
+ '数据行': initial_row_count,
1014
+ '丢弃行数': dropped_rows
1015
1015
  })
1016
1016
 
1017
+ @_execute_with_retry
1017
1018
  def _insert_data(
1018
1019
  self,
1019
1020
  db_name: str,
@@ -1178,34 +1179,27 @@ class MySQLUploader:
1178
1179
  update_on_duplicate: bool
1179
1180
  ) -> str:
1180
1181
  """
1181
- 准备插入SQL语句
1182
-
1183
- 1. 当 check_duplicate=False 时,忽略 duplicate_columns 和 update_on_duplicate 参数,直接插入全部data。
1184
- 2. 当 check_duplicate=False 且 update_on_duplicate=True 时,由于 check_duplicate=False,直接插入全部data。
1185
- 3. 当 check_duplicate=True 且 duplicate_columns=[] 且 update_on_duplicate=True 时,获取数据库所有列(但排除`id`和`更新时间`列),按这些列(不含`id`和`更新时间`)排重插入,遇到重复数据时更新旧数据。
1186
- 4. 当 check_duplicate=True 且 duplicate_columns=[] 且 update_on_duplicate=False 时,获取数据库所有列(但排除`id`和`更新时间`列),按这些列(不含`id`和`更新时间`)排重插入,不考虑是否更新旧数据。
1187
- 5. 当 check_duplicate=True 且 duplicate_columns 指定了排重列且 update_on_duplicate=True 时,按 duplicate_columns 指定的列(但排除`id`和`更新时间`)排重插入,遇到重复数据时更新旧数据。
1188
- 6. 当 check_duplicate=True 且 duplicate_columns 指定了排重列且 update_on_duplicate=False 时,按 duplicate_columns 指定的列(但排除`id`和`更新时间`)排重插入,不考虑是否更新旧数据。
1189
-
1182
+ 准备插入SQL语句, 增加StatementCache缓存
1190
1183
  """
1184
+ cache_key = (db_name, table_name, tuple(sorted(set_typ.items())), check_duplicate, tuple(duplicate_columns) if duplicate_columns else (), update_on_duplicate)
1185
+ cached = self._prepared_statements.get(cache_key)
1186
+ if cached:
1187
+ return cached
1191
1188
  # 获取所有列名(排除id)
1192
1189
  all_columns = [col for col in set_typ.keys()
1193
1190
  if col.lower() != 'id']
1194
-
1195
- # 情况1-2:不检查重复
1196
1191
  if not check_duplicate:
1197
- return self._build_simple_insert_sql(db_name, table_name, all_columns,
1192
+ sql = self._build_simple_insert_sql(db_name, table_name, all_columns,
1198
1193
  update_on_duplicate)
1199
-
1200
- # 确定排重列(排除id和更新时间列)
1201
- dup_cols = duplicate_columns if duplicate_columns else [
1202
- col for col in all_columns
1203
- if col.lower() not in self.base_excute_col
1204
- ]
1205
-
1206
- # 情况3-6:检查重复
1207
- return self._build_duplicate_check_sql(db_name, table_name, all_columns,
1194
+ else:
1195
+ dup_cols = duplicate_columns if duplicate_columns else [
1196
+ col for col in all_columns
1197
+ if col.lower() not in self.base_excute_col
1198
+ ]
1199
+ sql = self._build_duplicate_check_sql(db_name, table_name, all_columns,
1208
1200
  dup_cols, update_on_duplicate, set_typ)
1201
+ self._prepared_statements[cache_key] = sql
1202
+ return sql
1209
1203
 
1210
1204
  def _execute_batch_insert(
1211
1205
  self,
@@ -1220,10 +1214,8 @@ class MySQLUploader:
1220
1214
  transaction_mode: str,
1221
1215
  update_on_duplicate: bool = False
1222
1216
  ) -> Tuple[int, int, int]:
1223
- """执行批量插入操作"""
1224
-
1217
+ """执行批量插入操作,优化batch和hybrid模式"""
1225
1218
  def get_optimal_batch_size(total_rows: int) -> int:
1226
- # 根据数据量调整批量大小
1227
1219
  if total_rows <= 100:
1228
1220
  return total_rows
1229
1221
  elif total_rows <= 1000:
@@ -1232,205 +1224,64 @@ class MySQLUploader:
1232
1224
  return 1000
1233
1225
  else:
1234
1226
  return 2000
1235
-
1236
1227
  batch_size = get_optimal_batch_size(len(data))
1237
-
1238
- # 获取所有列名(排除id列)
1239
- all_columns = [col for col in set_typ.keys()
1240
- if col.lower() != 'id']
1241
-
1228
+ all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1242
1229
  total_inserted = 0
1243
1230
  total_skipped = 0
1244
1231
  total_failed = 0
1245
-
1246
1232
  with self._get_connection() as conn:
1247
1233
  with conn.cursor() as cursor:
1248
- for i in range(0, len(data), batch_size):
1249
- batch = data[i:i + batch_size]
1250
- batch_inserted, batch_skipped, batch_failed = self._process_batch(
1251
- conn, cursor, db_name, table_name, batch, all_columns,
1252
- sql, check_duplicate, duplicate_columns, batch_id,
1253
- transaction_mode, update_on_duplicate
1254
- )
1255
-
1256
- # 更新总统计
1257
- total_inserted += batch_inserted
1258
- total_skipped += batch_skipped
1259
- total_failed += batch_failed
1260
-
1234
+ if transaction_mode == 'batch':
1235
+ for i in range(0, len(data), batch_size):
1236
+ batch = data[i:i + batch_size]
1237
+ values_list = []
1238
+ for row in batch:
1239
+ values = [row.get(col) for col in all_columns]
1240
+ if check_duplicate and not update_on_duplicate:
1241
+ dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1242
+ values += [row.get(col) for col in dup_cols]
1243
+ values_list.append(values)
1244
+ try:
1245
+ cursor.executemany(sql, values_list)
1246
+ conn.commit()
1247
+ total_inserted += len(batch)
1248
+ except Exception as e:
1249
+ conn.rollback()
1250
+ total_failed += len(batch)
1251
+ logger.error('批量插入失败', {'库': db_name, '表': table_name, '错误': str(e)})
1252
+ elif transaction_mode == 'hybrid':
1253
+ hybrid_n = 100 # 可配置
1254
+ for i in range(0, len(data), hybrid_n):
1255
+ batch = data[i:i + hybrid_n]
1256
+ for row in batch:
1257
+ try:
1258
+ values = [row.get(col) for col in all_columns]
1259
+ if check_duplicate and not update_on_duplicate:
1260
+ dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1261
+ values += [row.get(col) for col in dup_cols]
1262
+ cursor.execute(sql, values)
1263
+ total_inserted += 1
1264
+ except Exception as e:
1265
+ conn.rollback()
1266
+ total_failed += 1
1267
+ logger.error('hybrid单行插入失败', {'库': db_name, '表': table_name, '错误': str(e)})
1268
+ conn.commit()
1269
+ else: # row模式
1270
+ for row in data:
1271
+ try:
1272
+ values = [row.get(col) for col in all_columns]
1273
+ if check_duplicate and not update_on_duplicate:
1274
+ dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1275
+ values += [row.get(col) for col in dup_cols]
1276
+ cursor.execute(sql, values)
1277
+ conn.commit()
1278
+ total_inserted += 1
1279
+ except Exception as e:
1280
+ conn.rollback()
1281
+ total_failed += 1
1282
+ logger.error('单行插入失败', {'库': db_name, '表': table_name, '错误': str(e)})
1261
1283
  return total_inserted, total_skipped, total_failed
1262
1284
 
1263
- def _process_batch(
1264
- self,
1265
- conn,
1266
- cursor,
1267
- db_name: str,
1268
- table_name: str,
1269
- batch: List[Dict],
1270
- all_columns: List[str],
1271
- sql: str,
1272
- check_duplicate: bool,
1273
- duplicate_columns: Optional[List[str]],
1274
- batch_id: Optional[str],
1275
- transaction_mode: str,
1276
- update_on_duplicate: bool = False
1277
- ) -> Tuple[int, int, int]:
1278
- """
1279
- 处理单个批次的数据插入
1280
-
1281
- :param conn: 数据库连接对象
1282
- :param cursor: 数据库游标对象
1283
- :param db_name: 数据库名
1284
- :param table_name: 表名
1285
- :param batch: 当前批次的数据(字典列表)
1286
- :param all_columns: 需要插入的所有列名
1287
- :param sql: 执行的SQL语句
1288
- :param check_duplicate: 是否检查重复
1289
- :param duplicate_columns: 排重列
1290
- :param batch_id: 批次ID
1291
- :param transaction_mode: 事务模式
1292
- :param update_on_duplicate: 遇到重复时是否更新
1293
- :return: (插入数, 跳过数, 失败数)
1294
- """
1295
- batch_inserted = 0
1296
- batch_skipped = 0
1297
- batch_failed = 0
1298
- batch_size = len(batch)
1299
- logger.debug('批次插入开始', {
1300
- '库': db_name,
1301
- '表': table_name,
1302
- '批次ID': batch_id,
1303
- '批次大小': batch_size,
1304
- '事务模式': transaction_mode,
1305
- 'SQL预览': sql[:200],
1306
- '排重': check_duplicate,
1307
- '排重列': duplicate_columns,
1308
- '允许更新': update_on_duplicate,
1309
- '数据样例': self._shorten_for_log(batch, 2)
1310
- })
1311
- if transaction_mode == 'batch':
1312
- try:
1313
- for row_idx, row in enumerate(batch, 1):
1314
- result = self._process_single_row(
1315
- db_name, table_name, cursor, row, all_columns, sql,
1316
- check_duplicate, duplicate_columns, update_on_duplicate
1317
- )
1318
- if result == 'inserted':
1319
- batch_inserted += 1
1320
- elif result == 'skipped':
1321
- batch_skipped += 1
1322
- else:
1323
- batch_failed += 1
1324
- conn.commit()
1325
- logger.debug('批次插入成功', {
1326
- '库': db_name,
1327
- '表': table_name,
1328
- '批次ID': batch_id,
1329
- '插入': batch_inserted,
1330
- '跳过': batch_skipped,
1331
- '失败': batch_failed
1332
- })
1333
- except Exception as e:
1334
- conn.rollback()
1335
- batch_failed += len(batch)
1336
- logger.error('批次插入失败', {
1337
- '库': db_name,
1338
- '表': table_name,
1339
- '批次ID': batch_id,
1340
- '错误': str(e),
1341
- 'SQL预览': sql[:200],
1342
- '数据样例': self._shorten_for_log(batch, 2)
1343
- })
1344
- else: # row 或 hybrid 模式
1345
- for row_idx, row in enumerate(batch, 1):
1346
- try:
1347
- result = self._process_single_row(
1348
- db_name, table_name, cursor, row, all_columns, sql,
1349
- check_duplicate, duplicate_columns, update_on_duplicate
1350
- )
1351
- if result == 'inserted':
1352
- batch_inserted += 1
1353
- elif result == 'skipped':
1354
- batch_skipped += 1
1355
- else:
1356
- batch_failed += 1
1357
- conn.commit()
1358
- logger.debug('单行插入成功', {
1359
- '库': db_name,
1360
- '表': table_name,
1361
- '批次ID': batch_id,
1362
- '行号': row_idx,
1363
- '插入状态': result
1364
- })
1365
- except Exception as e:
1366
- conn.rollback()
1367
- batch_failed += 1
1368
- logger.error('单行插入失败', {
1369
- '库': db_name,
1370
- '表': table_name,
1371
- '批次ID': batch_id,
1372
- '行号': row_idx,
1373
- '错误': str(e),
1374
- 'SQL预览': sql[:200],
1375
- '数据': self._shorten_for_log(row)
1376
- })
1377
- logger.debug('批次插入结束', {
1378
- '库': db_name,
1379
- '表': table_name,
1380
- '批次ID': batch_id,
1381
- '插入': batch_inserted,
1382
- '跳过': batch_skipped,
1383
- '失败': batch_failed,
1384
- '数据样例': self._shorten_for_log(batch, 2)
1385
- })
1386
- return batch_inserted, batch_skipped, batch_failed
1387
-
1388
- def _process_single_row(
1389
- self,
1390
- db_name: str,
1391
- table_name: str,
1392
- cursor,
1393
- row: Dict,
1394
- all_columns: List[str],
1395
- sql: str,
1396
- check_duplicate: bool,
1397
- duplicate_columns: Optional[List[str]],
1398
- update_on_duplicate: bool = False
1399
- ) -> str:
1400
- """
1401
- 处理单行数据插入
1402
-
1403
- :param db_name: 数据库名
1404
- :param table_name: 表名
1405
- :param cursor: 数据库游标对象
1406
- :param row: 单行数据(字典)
1407
- :param all_columns: 需要插入的所有列名
1408
- :param sql: 执行的SQL语句
1409
- :param check_duplicate: 是否检查重复
1410
- :param duplicate_columns: 排重列
1411
- :param update_on_duplicate: 遇到重复时是否更新
1412
- :return: 'inserted' | 'skipped' | 'failed'
1413
- """
1414
- try:
1415
- # 构造参数
1416
- values = [row.get(col) for col in all_columns]
1417
- if check_duplicate:
1418
- # 需要为 WHERE NOT EXISTS 语句补充参数
1419
- if not update_on_duplicate:
1420
- # duplicate_columns 为空时,默认用所有列(排除id/更新时间)
1421
- dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
1422
- values = values + [row.get(col) for col in dup_cols]
1423
- cursor.execute(sql, values)
1424
- except Exception as e:
1425
- logger.error('单行插入失败', {
1426
- '库': db_name,
1427
- '表': table_name,
1428
- 'row': self._shorten_for_log(row),
1429
- '错误': str(e)
1430
- })
1431
- return 'failed'
1432
- return 'inserted'
1433
-
1434
1285
  def close(self) -> None:
1435
1286
  """
1436
1287
  关闭连接池并清理资源
@@ -1441,10 +1292,10 @@ class MySQLUploader:
1441
1292
  try:
1442
1293
  if hasattr(self, 'pool') and self.pool is not None:
1443
1294
  try:
1444
- self.pool.close()
1295
+ # self.pool.close() # PooledDB 没有 close 方法
1296
+ self.pool = None
1445
1297
  except Exception as e:
1446
1298
  logger.warning('关闭连接池时出错', {'error': str(e)})
1447
- self.pool = None
1448
1299
  logger.info('连接池关闭', {'uploader.py': '连接池关闭'})
1449
1300
  except Exception as e:
1450
1301
  logger.error('关闭连接池失败', {'error': str(e)})
@@ -1452,11 +1303,12 @@ class MySQLUploader:
1452
1303
 
1453
1304
  def _check_pool_health(self) -> bool:
1454
1305
  """
1455
- 检查连接池健康状态
1456
- :return: 连接池健康返回True,否则返回False
1306
+ 检查连接池健康状态,防止连接泄露
1457
1307
  """
1458
1308
  conn = None
1459
1309
  try:
1310
+ if not hasattr(self, 'pool') or self.pool is None:
1311
+ return False
1460
1312
  conn = self.pool.connection()
1461
1313
  conn.ping(reconnect=True)
1462
1314
  logger.debug('连接池健康检查通过')
@@ -1465,12 +1317,13 @@ class MySQLUploader:
1465
1317
  logger.warning('连接池健康检查失败', {'error': str(e)})
1466
1318
  return False
1467
1319
  finally:
1468
- if conn:
1320
+ if conn is not None:
1469
1321
  try:
1470
1322
  conn.close()
1471
1323
  except Exception as e:
1472
1324
  logger.warning('关闭连接时出错', {'error': str(e)})
1473
1325
 
1326
+ @staticmethod
1474
1327
  def retry_on_failure(max_retries: int = 3, delay: int = 1):
1475
1328
  """
1476
1329
  通用重试装饰器
mdbq/spider/aikucun.py CHANGED
@@ -3,10 +3,8 @@ import datetime
3
3
  import requests
4
4
  import json
5
5
  import os
6
- import sys
7
6
  import re
8
7
  import time
9
- import warnings
10
8
  import platform
11
9
  import getpass
12
10
  from selenium import webdriver
@@ -15,20 +13,18 @@ from selenium.webdriver.common.by import By
15
13
  from selenium.webdriver.support import expected_conditions as EC
16
14
  from selenium.webdriver.chrome.service import Service
17
15
  import pymysql
18
- import pandas as pd
19
- from mdbq.log import spider_logging
20
- from mdbq.mysql import mysql
16
+ from mdbq.mysql import uploader
21
17
  from mdbq.mysql import s_query
22
18
  from mdbq.config import config
23
19
  from mdbq.other import ua_sj
24
20
  from mdbq.other import otk
21
+ from mdbq.log import mylogger
25
22
 
26
23
  dir_path = os.path.expanduser("~")
27
24
  config_file = os.path.join(dir_path, 'spd.txt')
28
25
  content = config.read_config(file_path=config_file)
29
26
  username, password, host, port = content['username'], content['password'], content['host'], content['port']
30
27
 
31
- # m_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
32
28
  uld = uploader.MySQLUploader(username=username, password=password, host=host, port=int(port), pool_size=10)
33
29
  # 实例化一个数据查询类,用来获取 cookies 表数据
34
30
  download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
@@ -188,11 +184,6 @@ class AikuCun:
188
184
  '更新时间': 'timestamp'
189
185
  }
190
186
  # 更新至数据库记录
191
- # m_engine.dict_to_mysql(
192
- # db_name=self.db_name,
193
- # table_name=self.table_name,
194
- # dict_data=self.token,
195
- # )
196
187
  uld.upload_data(
197
188
  db_name=self.db_name,
198
189
  table_name=self.table_name,
@@ -429,15 +420,6 @@ class AikuCun:
429
420
  drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
430
421
  else:
431
422
  drop_dup = ['日期', '平台', '店铺名称', '条码']
432
- # m_engine.insert_many_dict(
433
- # db_name=db_name,
434
- # table_name=table_name,
435
- # dict_data_list=_results,
436
- # icm_update=drop_dup, # 唯一组合键
437
- # # unique_main_key=['人群id'],
438
- # set_typ=set_typ,
439
- # allow_not_null=False, # 创建允许插入空值的列
440
- # )
441
423
  uld.upload_data(
442
424
  db_name=db_name,
443
425
  table_name=table_name,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.11.1
3
+ Version: 3.11.3
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,17 +1,17 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=yfllhgz9Co6QQbMUq9eHkXYCazHcxgzu3Z4cqXTNmwo,18
2
+ mdbq/__version__.py,sha256=SerN98H6Mx8rHVh-jf2Nmc7iZHb02NHGVphB1O5jKwE,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/query_data.py,sha256=fdotW8qdAyDB13p7r3p6AGBkavcHnf6hIvSMtcS7vqE,179875
4
+ mdbq/aggregation/query_data.py,sha256=nxL8hSy8yI1QLlqnkTNHHQSxRfo-6WKL5OA-N4xLB7c,179832
5
5
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
6
6
  mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
7
7
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
8
- mdbq/log/mylogger.py,sha256=07sstIeaIQUJXwpMwmxppRI7kW7QwZFnv4Rr3UDlyUs,24133
8
+ mdbq/log/mylogger.py,sha256=HuxLBCXjm6fZrxYE0rdpUCz359WGeqOX0vvg9jTuRY4,24126
9
9
  mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
10
10
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
11
- mdbq/mysql/deduplicator.py,sha256=ibmxpzenhPgT_ei61TjQB2ZxYs9ztkG_ygbLSa8RIlM,32990
12
- mdbq/mysql/mysql.py,sha256=Lfy9PsEdgmdRtcG_UUgegH3bFTJPhByTWkcAYl8G6m0,56788
11
+ mdbq/mysql/deduplicator.py,sha256=bIV010UkFfSUONY6-756x3tDVO4k6q3pqxoY3Z2xT-k,32990
12
+ mdbq/mysql/mysql.py,sha256=Kjpi-LL00WQUmTTOfhEBsNrmo4-4kFFJzrHbVKfqiBE,56770
13
13
  mdbq/mysql/s_query.py,sha256=dlnrVJ3-Vp1Suv9CNbPxyYSRqRJUHjOpF39tb2F-wBc,10190
14
- mdbq/mysql/uploader.py,sha256=3Ci34yVlLd7odqHLqnBRnkVMKM2Po26LJvtCpN-lBA4,66489
14
+ mdbq/mysql/uploader.py,sha256=3fXyNA0GzBNaadAh6cOgbuUEvY4IAhKn4apgbkToEno,61321
15
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
17
17
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -23,8 +23,8 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
23
23
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
24
24
  mdbq/redis/getredis.py,sha256=YHgCKO8mEsslwet33K5tGss-nrDDwPnOSlhA9iBu0jY,24078
25
25
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
26
- mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
27
- mdbq-3.11.1.dist-info/METADATA,sha256=Nf8bnoO4TD0fo7xI6sVypTo2xHGy3ldHAQqfooRq_40,364
28
- mdbq-3.11.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
- mdbq-3.11.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
- mdbq-3.11.1.dist-info/RECORD,,
26
+ mdbq/spider/aikucun.py,sha256=cqK-JRd_DHbToC7hyo83m8o97NZkJFqmB2xBtr6aAVU,20961
27
+ mdbq-3.11.3.dist-info/METADATA,sha256=tgDHEyJKxO0ML-gUTBap1b6yP-xv5sEA_SsfVJ_31C0,364
28
+ mdbq-3.11.3.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
+ mdbq-3.11.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
+ mdbq-3.11.3.dist-info/RECORD,,
File without changes