mdbq 4.2.0__py3-none-any.whl → 4.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mdbq might be problematic. Click here for more details.

mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.2.0'
1
+ VERSION = '4.2.2'
mdbq/mysql/uploader.py CHANGED
@@ -1,20 +1,24 @@
1
1
  # -*- coding:utf-8 -*-
2
2
  """
3
- MySQL数据上传器 - 重构版本
4
- 提供高可用、易维护的MySQL数据上传功能
3
+ MySQL数据上传
5
4
  """
6
-
7
5
  import datetime
8
6
  import time
9
7
  import json
10
8
  import re
11
- from typing import Union, List, Dict, Optional, Any, Tuple
9
+ import io
10
+ from typing import Union, List, Dict, Optional, Any, Tuple, Iterator
12
11
  from functools import wraps
13
12
  from decimal import Decimal, InvalidOperation
14
13
  import math
15
-
14
+ import concurrent.futures
15
+ import threading
16
+ from queue import Queue
16
17
  import pymysql
17
18
  import pandas as pd
19
+ import psutil
20
+ import enum
21
+ import ipaddress
18
22
  from dbutils.pooled_db import PooledDB
19
23
  from mdbq.log import mylogger
20
24
  # from mdbq.myconf import myconf
@@ -87,12 +91,35 @@ class DatabaseConnectionManager:
87
91
  class DataTypeInferrer:
88
92
  """数据类型推断器"""
89
93
 
94
+ # 自定义类型映射注册表
95
+ _custom_type_handlers = {}
96
+
97
+ @classmethod
98
+ def register_type_handler(cls, type_name: str, handler_func):
99
+ """
100
+ 注册自定义类型处理器
101
+
102
+ :param type_name: 类型名称
103
+ :param handler_func: 处理函数,接收value参数,返回MySQL类型字符串或None
104
+ """
105
+ cls._custom_type_handlers[type_name] = handler_func
106
+
90
107
  @staticmethod
91
108
  def infer_mysql_type(value: Any) -> str:
92
109
  """推断MySQL数据类型"""
93
110
  if value is None or str(value).lower() in ['', 'none', 'nan']:
94
111
  return 'VARCHAR(255)'
95
112
 
113
+ # 检查自定义类型处理器
114
+ for type_name, handler in DataTypeInferrer._custom_type_handlers.items():
115
+ try:
116
+ result = handler(value)
117
+ if result:
118
+ return result
119
+ except Exception:
120
+ continue
121
+
122
+ # Python基本类型
96
123
  if isinstance(value, bool):
97
124
  return 'TINYINT(1)'
98
125
  elif isinstance(value, int):
@@ -109,42 +136,231 @@ class DataTypeInferrer:
109
136
  elif isinstance(value, (list, dict)):
110
137
  return 'JSON'
111
138
  elif isinstance(value, str):
112
- # 尝试判断是否是日期时间
113
- if DataValidator.is_datetime_string(value):
114
- return 'DATETIME'
115
-
116
- # 根据字符串长度选择类型
117
- length = len(value)
118
- if length <= 255:
119
- return 'VARCHAR(255)'
120
- elif length <= 65535:
121
- return 'TEXT'
122
- else:
123
- return 'LONGTEXT'
139
+ return DataTypeInferrer._infer_string_type(value)
140
+
141
+ # 处理枚举类型
142
+ if hasattr(value, '__class__') and hasattr(value.__class__, '__bases__'):
143
+ # 检查是否是枚举类型
144
+ if isinstance(value, enum.Enum):
145
+ # 根据枚举值的类型决定MySQL类型
146
+ enum_value = value.value
147
+ if isinstance(enum_value, int):
148
+ return 'INT'
149
+ elif isinstance(enum_value, str):
150
+ max_len = max(len(str(item.value)) for item in value.__class__)
151
+ return f'VARCHAR({min(max_len * 2, 255)})'
152
+ else:
153
+ return 'VARCHAR(255)'
154
+
155
+ # 处理其他特殊类型
156
+ value_str = str(value)
157
+
158
+ # UUID检测
159
+ if DataTypeInferrer._is_uuid(value_str):
160
+ return 'CHAR(36)'
161
+
162
+ # IP地址检测
163
+ if DataTypeInferrer._is_ip_address(value_str):
164
+ return 'VARCHAR(45)' # 支持IPv6
165
+
166
+ # 邮箱检测
167
+ if DataTypeInferrer._is_email(value_str):
168
+ return 'VARCHAR(255)'
169
+
170
+ # URL检测
171
+ if DataTypeInferrer._is_url(value_str):
172
+ return 'TEXT'
124
173
 
125
- return 'VARCHAR(255)'
174
+ # 默认字符串处理
175
+ return DataTypeInferrer._infer_string_type(value_str)
176
+
177
+ @staticmethod
178
+ def _infer_string_type(value: str) -> str:
179
+ """推断字符串类型"""
180
+ # 尝试判断是否是日期时间
181
+ if DataValidator.is_datetime_string(value):
182
+ return 'DATETIME'
183
+
184
+ # 数值字符串检测
185
+ if DataTypeInferrer._is_numeric_string(value):
186
+ if '.' in value or 'e' in value.lower():
187
+ return 'DECIMAL(20,6)'
188
+ else:
189
+ try:
190
+ int_val = int(value)
191
+ if -2147483648 <= int_val <= 2147483647:
192
+ return 'INT'
193
+ else:
194
+ return 'BIGINT'
195
+ except ValueError:
196
+ pass
197
+
198
+ # 根据字符串长度选择类型
199
+ length = len(value)
200
+ if length <= 255:
201
+ return 'VARCHAR(255)'
202
+ elif length <= 65535:
203
+ return 'TEXT'
204
+ else:
205
+ return 'LONGTEXT'
126
206
 
127
207
  @staticmethod
128
- def infer_types_from_data(data: List[Dict]) -> Dict[str, str]:
129
- """从数据中推断所有列的类型"""
208
+ def _is_uuid(value: str) -> bool:
209
+ """检测是否是UUID格式"""
210
+ uuid_pattern = r'^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
211
+ return bool(re.match(uuid_pattern, value.lower()))
212
+
213
+ @staticmethod
214
+ def _is_ip_address(value: str) -> bool:
215
+ """检测是否是IP地址"""
216
+ try:
217
+ ipaddress.ip_address(value)
218
+ return True
219
+ except ValueError:
220
+ return False
221
+
222
+ @staticmethod
223
+ def _is_email(value: str) -> bool:
224
+ """检测是否是邮箱地址"""
225
+ email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
226
+ return bool(re.match(email_pattern, value))
227
+
228
+ @staticmethod
229
+ def _is_url(value: str) -> bool:
230
+ """检测是否是URL"""
231
+ url_pattern = r'^https?://[^\s/$.?#].[^\s]*$'
232
+ return bool(re.match(url_pattern, value, re.IGNORECASE))
233
+
234
+ @staticmethod
235
+ def _is_numeric_string(value: str) -> bool:
236
+ """检测是否是数值字符串"""
237
+ try:
238
+ float(value)
239
+ return True
240
+ except ValueError:
241
+ return False
242
+
243
+ @staticmethod
244
+ def infer_types_from_data(data: List[Dict], sample_size: int = 100) -> Dict[str, str]:
245
+ """
246
+ 从数据中推断所有列的类型
247
+
248
+ :param data: 数据列表
249
+ :param sample_size: 采样大小,避免检查过多数据
250
+ """
130
251
  if not data:
131
252
  return {}
132
253
 
133
254
  type_map = {}
134
- for row in data[:10]: # 只检查前10行
255
+ type_candidates = {} # 存储每列的候选类型
256
+
257
+ # 采样数据进行类型推断
258
+ sample_data = data[:sample_size] if len(data) > sample_size else data
259
+
260
+ for row in sample_data:
135
261
  for col, value in row.items():
136
262
  # 跳过系统列
137
263
  if col.lower() in ['id', 'create_at', 'update_at']:
138
264
  continue
139
- if col not in type_map and value is not None:
140
- type_map[col] = DataTypeInferrer.infer_mysql_type(value)
265
+
266
+ if value is not None and str(value).strip():
267
+ mysql_type = DataTypeInferrer.infer_mysql_type(value)
268
+
269
+ if col not in type_candidates:
270
+ type_candidates[col] = []
271
+ type_candidates[col].append(mysql_type)
272
+
273
+ # 为每列选择最合适的类型
274
+ for col, types in type_candidates.items():
275
+ type_map[col] = DataTypeInferrer._select_best_type(types)
141
276
 
142
- # 自动添加系统列类型定义
277
+ # 自动添加系统列类型定义(id列只在新建表时添加)
143
278
  type_map['id'] = 'BIGINT'
144
279
  type_map['create_at'] = 'TIMESTAMP'
145
280
  type_map['update_at'] = 'TIMESTAMP'
146
281
 
147
282
  return type_map
283
+
284
+ @staticmethod
285
+ def _select_best_type(type_candidates: List[str]) -> str:
286
+ """
287
+ 从候选类型中选择最佳类型
288
+
289
+ 优先级:JSON > LONGTEXT > TEXT > VARCHAR > DECIMAL > BIGINT > INT > DATETIME > DATE
290
+ """
291
+ if not type_candidates:
292
+ return 'VARCHAR(255)'
293
+
294
+ # 类型优先级映射
295
+ type_priority = {
296
+ 'JSON': 10,
297
+ 'LONGTEXT': 9,
298
+ 'TEXT': 8,
299
+ 'VARCHAR': 7,
300
+ 'DECIMAL': 6,
301
+ 'BIGINT': 5,
302
+ 'INT': 4,
303
+ 'DATETIME': 3,
304
+ 'DATE': 2,
305
+ 'TINYINT': 1
306
+ }
307
+
308
+ # 找到优先级最高的类型
309
+ best_type = 'VARCHAR(255)'
310
+ best_priority = 0
311
+
312
+ for candidate in set(type_candidates):
313
+ # 提取基础类型名
314
+ base_type = candidate.split('(')[0].upper()
315
+ priority = type_priority.get(base_type, 0)
316
+
317
+ if priority > best_priority:
318
+ best_priority = priority
319
+ best_type = candidate
320
+
321
+ return best_type
322
+
323
+
324
+ # 注册一些常用的自定义类型处理器
325
+ def register_common_type_handlers():
326
+ """注册常用的自定义类型处理器"""
327
+
328
+ def handle_phone_number(value):
329
+ """处理电话号码"""
330
+ if isinstance(value, str):
331
+ # 中国手机号码格式
332
+ if re.match(r'^1[3-9]\d{9}$', value):
333
+ return 'VARCHAR(11)'
334
+ # 国际电话号码格式
335
+ if re.match(r'^\+?[1-9]\d{1,14}$', value):
336
+ return 'VARCHAR(20)'
337
+ return None
338
+
339
+ def handle_id_card(value):
340
+ """处理身份证号"""
341
+ if isinstance(value, str):
342
+ # 中国身份证号码
343
+ if re.match(r'^\d{17}[\dXx]$', value):
344
+ return 'CHAR(18)'
345
+ return None
346
+
347
+ def handle_json_string(value):
348
+ """处理JSON字符串"""
349
+ if isinstance(value, str):
350
+ try:
351
+ json.loads(value)
352
+ return 'JSON'
353
+ except (ValueError, TypeError):
354
+ pass
355
+ return None
356
+
357
+ # 注册处理器
358
+ DataTypeInferrer.register_type_handler('phone', handle_phone_number)
359
+ DataTypeInferrer.register_type_handler('id_card', handle_id_card)
360
+ DataTypeInferrer.register_type_handler('json_string', handle_json_string)
361
+
362
+ # 自动注册常用类型处理器
363
+ register_common_type_handlers()
148
364
 
149
365
 
150
366
  class DataValidator:
@@ -376,6 +592,80 @@ class TableManager:
376
592
  )
377
593
  return bool(cursor.fetchone())
378
594
 
595
+ def get_table_columns(self, db_name: str, table_name: str) -> Dict[str, str]:
596
+ """获取表的列信息"""
597
+ db_name = self._sanitize_identifier(db_name)
598
+ table_name = self._sanitize_identifier(table_name)
599
+
600
+ with self.conn_mgr.get_connection() as conn:
601
+ with conn.cursor() as cursor:
602
+ cursor.execute("""
603
+ SELECT COLUMN_NAME, COLUMN_TYPE
604
+ FROM INFORMATION_SCHEMA.COLUMNS
605
+ WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
606
+ """, (db_name, table_name))
607
+
608
+ columns = {}
609
+ for row in cursor.fetchall():
610
+ columns[row['COLUMN_NAME']] = row['COLUMN_TYPE']
611
+ return columns
612
+
613
+ def get_table_primary_key(self, db_name: str, table_name: str) -> Optional[str]:
614
+ """获取表的主键列名"""
615
+ db_name = self._sanitize_identifier(db_name)
616
+ table_name = self._sanitize_identifier(table_name)
617
+
618
+ with self.conn_mgr.get_connection() as conn:
619
+ with conn.cursor() as cursor:
620
+ cursor.execute("""
621
+ SELECT COLUMN_NAME
622
+ FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
623
+ WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
624
+ AND CONSTRAINT_NAME = 'PRIMARY'
625
+ """, (db_name, table_name))
626
+
627
+ result = cursor.fetchone()
628
+ return result['COLUMN_NAME'] if result else None
629
+
630
+ def ensure_system_columns(self, db_name: str, table_name: str):
631
+ """确保表有系统列,如果没有则添加(保持原有主键结构)"""
632
+ existing_columns = self.get_table_columns(db_name, table_name)
633
+ existing_primary_key = self.get_table_primary_key(db_name, table_name)
634
+
635
+ with self.conn_mgr.get_connection() as conn:
636
+ with conn.cursor() as cursor:
637
+ # 只有在表没有主键且没有id列时,才添加id主键
638
+ if existing_primary_key is None and 'id' not in existing_columns:
639
+ cursor.execute(f"""
640
+ ALTER TABLE `{db_name}`.`{table_name}`
641
+ ADD COLUMN `id` BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY FIRST
642
+ """)
643
+ logger.info('自动添加id主键列', {'database': db_name, 'table': table_name})
644
+ elif existing_primary_key is not None:
645
+ logger.debug('表已有主键,保持原有结构', {
646
+ 'database': db_name,
647
+ 'table': table_name,
648
+ 'primary_key': existing_primary_key
649
+ })
650
+
651
+ # 检查并添加create_at列
652
+ if 'create_at' not in existing_columns:
653
+ cursor.execute(f"""
654
+ ALTER TABLE `{db_name}`.`{table_name}`
655
+ ADD COLUMN `create_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
656
+ """)
657
+ logger.info('自动添加create_at列', {'database': db_name, 'table': table_name})
658
+
659
+ # 检查并添加update_at列
660
+ if 'update_at' not in existing_columns:
661
+ cursor.execute(f"""
662
+ ALTER TABLE `{db_name}`.`{table_name}`
663
+ ADD COLUMN `update_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
664
+ """)
665
+ logger.info('自动添加update_at列', {'database': db_name, 'table': table_name})
666
+
667
+ conn.commit()
668
+
379
669
  def create_table(self, db_name: str, table_name: str, columns: Dict[str, str],
380
670
  primary_keys: Optional[List[str]] = None,
381
671
  unique_keys: Optional[List[List[str]]] = None):
@@ -479,24 +769,49 @@ class DataProcessor:
479
769
  """数据处理器"""
480
770
 
481
771
  @staticmethod
482
- def normalize_data(data: Union[Dict, List[Dict], pd.DataFrame]) -> List[Dict]:
483
- """标准化数据格式为字典列表"""
772
+ def normalize_data(data: Union[Dict, List[Dict], pd.DataFrame],
773
+ chunk_size: int = 5000,
774
+ memory_limit_mb: int = 100) -> Iterator[List[Dict]]:
775
+ """
776
+ 标准化数据格式为分块迭代器
777
+
778
+ :param data: 输入数据
779
+ :param chunk_size: 每个chunk的大小
780
+ :param memory_limit_mb: 内存限制(MB),超过时自动调整chunk_size
781
+ """
782
+ # 动态调整chunk_size基于可用内存
783
+ available_memory_mb = psutil.virtual_memory().available / 1024 / 1024
784
+ if available_memory_mb < memory_limit_mb * 2:
785
+ chunk_size = min(chunk_size, 1000) # 内存紧张时减小chunk
786
+
484
787
  if isinstance(data, pd.DataFrame):
485
- return data.to_dict('records')
788
+ # 对于大DataFrame,使用更高效的分块方式
789
+ if len(data) > 50000:
790
+ # 大数据集使用pandas的分块读取
791
+ for chunk in pd.read_csv(io.StringIO(data.to_csv(index=False)), chunksize=chunk_size):
792
+ yield chunk.to_dict('records')
793
+ else:
794
+ for i in range(0, len(data), chunk_size):
795
+ chunk = data.iloc[i:i + chunk_size]
796
+ yield chunk.to_dict('records')
486
797
  elif isinstance(data, dict):
487
- return [data]
488
- elif isinstance(data, list) and all(isinstance(item, dict) for item in data):
489
- return data
798
+ yield [data]
799
+ elif isinstance(data, list):
800
+ if all(isinstance(item, dict) for item in data):
801
+ for i in range(0, len(data), chunk_size):
802
+ yield data[i:i + chunk_size]
803
+ else:
804
+ raise ValueError("列表中必须全部是字典")
490
805
  else:
491
806
  raise ValueError("数据格式必须是字典、字典列表或DataFrame")
492
807
 
493
808
  @staticmethod
494
- def prepare_data_for_insert(data: List[Dict], set_typ: Dict[str, str],
809
+ def prepare_data_for_insert(data_chunk: List[Dict], set_typ: Dict[str, str],
495
810
  allow_null: bool = False) -> List[Dict]:
496
811
  """准备插入数据"""
497
812
  prepared_data = []
498
813
 
499
- for row_idx, row in enumerate(data, 1):
814
+ for row_idx, row in enumerate(data_chunk, 1):
500
815
  prepared_row = {}
501
816
 
502
817
  for col_name, col_type in set_typ.items():
@@ -523,13 +838,13 @@ class DataProcessor:
523
838
  return prepared_data
524
839
 
525
840
  @staticmethod
526
- def partition_data_by_date(data: List[Dict], date_column: str,
841
+ def partition_data_by_date(data_chunk: List[Dict], date_column: str,
527
842
  partition_by: str) -> Dict[str, List[Dict]]:
528
- """按日期分区数据"""
843
+ """按日期分区数据块"""
529
844
  partitioned = {}
530
845
  table_manager = TableManager(None, None) # 只用静态方法
531
846
 
532
- for row in data:
847
+ for row in data_chunk:
533
848
  if date_column not in row:
534
849
  logger.warning('缺少分区日期列', {'列名': date_column, '行数据': row})
535
850
  continue
@@ -583,34 +898,59 @@ class DataInserter:
583
898
  def _execute_batch_insert(self, sql: str, data: List[Dict],
584
899
  columns: List[str]) -> Tuple[int, int, int]:
585
900
  """执行批量插入"""
586
- batch_size = min(1000, len(data))
901
+ # 动态调整批次大小
902
+ estimated_row_size = len(str(data[0])) if data else 100
903
+ max_packet_size = 16 * 1024 * 1024 # 16MB MySQL默认限制
904
+ optimal_batch_size = min(
905
+ max_packet_size // (estimated_row_size * len(columns)),
906
+ 2000, # 最大批次
907
+ len(data)
908
+ )
909
+ batch_size = max(100, optimal_batch_size) # 最小100条
910
+
587
911
  total_inserted = 0
588
912
  total_skipped = 0
589
913
  total_failed = 0
590
914
 
591
915
  with self.conn_mgr.get_connection() as conn:
592
916
  with conn.cursor() as cursor:
593
- for i in range(0, len(data), batch_size):
594
- batch = data[i:i + batch_size]
595
- values_list = []
596
-
597
- for row in batch:
598
- values = [self._ensure_basic_type(row.get(col)) for col in columns]
599
- values_list.append(values)
917
+ # 预处理所有数据,减少循环中的处理开销
918
+ all_values = []
919
+ for row in data:
920
+ values = [self._ensure_basic_type(row.get(col)) for col in columns]
921
+ all_values.append(values)
922
+
923
+ # 分批处理,使用更大的事务批次
924
+ transaction_size = min(5000, len(all_values)) # 每个事务处理的记录数
925
+
926
+ for tx_start in range(0, len(all_values), transaction_size):
927
+ tx_end = min(tx_start + transaction_size, len(all_values))
928
+ tx_values = all_values[tx_start:tx_end]
600
929
 
601
930
  try:
602
- cursor.executemany(sql, values_list)
931
+ # 开始事务
932
+ conn.begin()
933
+
934
+ # 在事务内分批执行,成功后直接累加
935
+ for i in range(0, len(tx_values), batch_size):
936
+ batch_values = tx_values[i:i + batch_size]
937
+
938
+ try:
939
+ cursor.executemany(sql, batch_values)
940
+ total_inserted += len(batch_values)
941
+ except pymysql.err.IntegrityError:
942
+ total_skipped += len(batch_values)
943
+ logger.debug('批量插入唯一约束冲突,跳过', {'批次大小': len(batch_values)})
944
+ except Exception as e:
945
+ logger.error('批量插入失败', {'错误': str(e), '批次大小': len(batch_values)})
946
+ raise
947
+
603
948
  conn.commit()
604
- affected = cursor.rowcount if cursor.rowcount is not None else len(batch)
605
- total_inserted += affected
606
- except pymysql.err.IntegrityError:
607
- conn.rollback()
608
- total_skipped += len(batch)
609
- logger.debug('批量插入唯一约束冲突,跳过', {'批次大小': len(batch)})
949
+
610
950
  except Exception as e:
611
951
  conn.rollback()
612
- total_failed += len(batch)
613
- logger.error('批量插入失败', {'错误': str(e), '批次大小': len(batch)})
952
+ logger.error('事务执行失败,已回滚', {'错误': str(e)})
953
+ total_failed += len(tx_values)
614
954
 
615
955
  return total_inserted, total_skipped, total_failed
616
956
 
@@ -655,13 +995,13 @@ def retry_on_failure(max_retries: int = 3, delay: int = 1):
655
995
 
656
996
  class MySQLUploader:
657
997
  """
658
- MySQL数据上传器 - 重构版本
998
+ MySQL数据上传器
659
999
 
660
1000
  特性:
661
1001
  - 自动为每个表添加id(BIGINT自增主键)、create_at、update_at时间戳列
662
1002
  - 支持自动建表、分表、数据类型推断
663
1003
  - 高可用连接池管理和重试机制
664
- - 批量插入优化
1004
+ - 流式批量插入优化
665
1005
  """
666
1006
 
667
1007
  def __init__(self, username: str, password: str, host: str = 'localhost',
@@ -705,7 +1045,7 @@ class MySQLUploader:
705
1045
  partition_by: Optional[str] = None,
706
1046
  partition_date_column: str = '日期',
707
1047
  update_on_duplicate: bool = False,
708
- unique_keys: Optional[List[List[str]]] = None) -> bool:
1048
+ unique_keys: Optional[List[List[str]]] = None) -> Dict[str, Any]:
709
1049
  """
710
1050
  上传数据到MySQL数据库
711
1051
 
@@ -714,120 +1054,256 @@ class MySQLUploader:
714
1054
  - create_at: 创建时间戳(插入时自动设置)
715
1055
  - update_at: 更新时间戳(插入和更新时自动设置)
716
1056
 
717
- :param db_name: 数据库名(会自动转为小写)
718
- :param table_name: 表名(会自动转为小写)
719
- :param data: 要上传的数据
1057
+ :param db_name: 数据库名
1058
+ :param table_name: 表名
1059
+ :param data: 要上传的数据,支持字典、字典列表、DataFrame
720
1060
  :param set_typ: 列类型定义,如果为None则自动推断(无需包含系统列)
721
1061
  :param allow_null: 是否允许空值
722
1062
  :param partition_by: 分表方式('year'或'month')
723
1063
  :param partition_date_column: 分表日期列名
724
1064
  :param update_on_duplicate: 遇到重复数据时是否更新
725
1065
  :param unique_keys: 唯一约束列表(无需包含系统列)
726
- :return: 上传是否成功
1066
+ :return: 上传结果详情
727
1067
  """
728
1068
  db_name = db_name.lower()
729
1069
  table_name = table_name.lower()
1070
+
1071
+ result = {
1072
+ 'success': False,
1073
+ 'inserted_rows': 0,
1074
+ 'skipped_rows': 0,
1075
+ 'failed_rows': 0,
1076
+ 'tables_created': []
1077
+ }
1078
+
730
1079
  try:
731
- start_time = time.time()
1080
+ # 计算原始数据大小
1081
+ original_data_size = 0
1082
+ if isinstance(data, (pd.DataFrame, list)):
1083
+ original_data_size = len(data)
1084
+ elif isinstance(data, dict):
1085
+ original_data_size = 1
732
1086
 
733
- # 标准化数据
1087
+ # 标准化数据为流式迭代器
734
1088
  normalized_data = DataProcessor.normalize_data(data)
735
- if not normalized_data:
736
- logger.warning('数据为空,跳过上传')
737
- return True
738
1089
 
739
1090
  # 推断或验证列类型
740
1091
  if set_typ is None:
741
- set_typ = DataTypeInferrer.infer_types_from_data(normalized_data)
742
- logger.info('自动推断数据类型', {'类型映射': set_typ})
1092
+ # 取第一个chunk进行类型推断
1093
+ first_chunk = next(iter(normalized_data))
1094
+ set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
1095
+ # 重新创建迭代器
1096
+ normalized_data = DataProcessor.normalize_data(data)
1097
+ logger.debug('自动推断数据类型', {'类型映射': set_typ})
743
1098
 
744
1099
  # 确保数据库存在
745
1100
  self.table_mgr.ensure_database_exists(db_name)
746
1101
 
747
1102
  # 处理分表逻辑
748
1103
  if partition_by:
749
- return self._handle_partitioned_upload(
1104
+ upload_result = self._handle_partitioned_upload(
750
1105
  db_name, table_name, normalized_data, set_typ,
751
1106
  partition_by, partition_date_column, allow_null,
752
1107
  update_on_duplicate, unique_keys
753
1108
  )
754
1109
  else:
755
- return self._handle_single_table_upload(
1110
+ upload_result = self._handle_single_table_upload(
756
1111
  db_name, table_name, normalized_data, set_typ,
757
1112
  allow_null, update_on_duplicate, unique_keys
758
1113
  )
759
-
1114
+
1115
+ # 合并结果
1116
+ result.update(upload_result)
1117
+ result['success'] = upload_result.get('failed_rows', 0) == 0
1118
+
760
1119
  except Exception as e:
761
1120
  logger.error('数据上传失败', {
762
1121
  '数据库': db_name,
763
1122
  '表名': table_name,
764
1123
  '错误': str(e)
765
1124
  })
766
- return False
1125
+ result['success'] = False
1126
+
1127
+ return result
767
1128
 
768
1129
  def _handle_single_table_upload(self, db_name: str, table_name: str,
769
- data: List[Dict], set_typ: Dict[str, str],
1130
+ data: Iterator[List[Dict]],
1131
+ set_typ: Dict[str, str],
770
1132
  allow_null: bool, update_on_duplicate: bool,
771
- unique_keys: Optional[List[List[str]]]) -> bool:
1133
+ unique_keys: Optional[List[List[str]]]) -> Dict[str, Any]:
772
1134
  """处理单表上传"""
1135
+ result = {
1136
+ 'inserted_rows': 0,
1137
+ 'skipped_rows': 0,
1138
+ 'failed_rows': 0,
1139
+ 'tables_created': []
1140
+ }
1141
+
773
1142
  # 确保表存在
774
1143
  if not self.table_mgr.table_exists(db_name, table_name):
775
1144
  self.table_mgr.create_table(db_name, table_name, set_typ,
776
1145
  unique_keys=unique_keys)
1146
+ result['tables_created'].append(f"{db_name}.{table_name}")
1147
+ else:
1148
+ # 表已存在,确保有时间戳列(但保持原有主键结构)
1149
+ self.table_mgr.ensure_system_columns(db_name, table_name)
777
1150
 
778
- # 准备数据
779
- prepared_data = DataProcessor.prepare_data_for_insert(
780
- data, set_typ, allow_null
781
- )
782
-
783
- # 插入数据
784
- inserted, skipped, failed = self.data_inserter.insert_data(
785
- db_name, table_name, prepared_data, set_typ, update_on_duplicate
786
- )
1151
+ # 流式处理每个数据块
1152
+ for chunk in data:
1153
+ if not chunk:
1154
+ continue
1155
+
1156
+ prepared_chunk = DataProcessor.prepare_data_for_insert(
1157
+ chunk, set_typ, allow_null
1158
+ )
1159
+
1160
+ inserted, skipped, failed = self.data_inserter.insert_data(
1161
+ db_name, table_name, prepared_chunk, set_typ, update_on_duplicate
1162
+ )
1163
+
1164
+ result['inserted_rows'] += inserted
1165
+ result['skipped_rows'] += skipped
1166
+ result['failed_rows'] += failed
787
1167
 
788
1168
  logger.info('单表上传完成', {
789
1169
  '数据库': db_name,
790
1170
  '表名': table_name,
791
- '总数': len(data),
792
- '插入': inserted,
793
- '跳过': skipped,
794
- '失败': failed
1171
+ '插入': result['inserted_rows'],
1172
+ '跳过': result['skipped_rows'],
1173
+ '失败': result['failed_rows']
795
1174
  })
796
1175
 
797
- return failed == 0
1176
+ return result
798
1177
 
799
1178
  def _handle_partitioned_upload(self, db_name: str, base_table_name: str,
800
- data: List[Dict], set_typ: Dict[str, str],
1179
+ data: Iterator[List[Dict]],
1180
+ set_typ: Dict[str, str],
801
1181
  partition_by: str, partition_date_column: str,
802
1182
  allow_null: bool, update_on_duplicate: bool,
803
- unique_keys: Optional[List[List[str]]]) -> bool:
1183
+ unique_keys: Optional[List[List[str]]]) -> Dict[str, Any]:
804
1184
  """处理分表上传"""
805
- # 按日期分区数据
806
- partitioned_data = DataProcessor.partition_data_by_date(
807
- data, partition_date_column, partition_by
808
- )
1185
+ result = {
1186
+ 'inserted_rows': 0,
1187
+ 'skipped_rows': 0,
1188
+ 'failed_rows': 0,
1189
+ 'tables_created': []
1190
+ }
809
1191
 
810
- total_success = True
1192
+ # 使用更小的缓冲区,更频繁地刷新
1193
+ partition_buffers = {}
1194
+ buffer_limit = 1000 # 减小缓冲区大小
811
1195
 
812
- for partition_suffix, partition_data in partitioned_data.items():
813
- partition_table_name = f"{base_table_name}_{partition_suffix}"
1196
+ # 记录已创建的表,避免重复检查
1197
+ created_tables = set()
1198
+
1199
+ for chunk in data:
1200
+ if not chunk:
1201
+ continue
814
1202
 
815
- success = self._handle_single_table_upload(
816
- db_name, partition_table_name, partition_data, set_typ,
817
- allow_null, update_on_duplicate, unique_keys
1203
+ # 按日期分区当前chunk
1204
+ partitioned_chunk = DataProcessor.partition_data_by_date(
1205
+ chunk, partition_date_column, partition_by
818
1206
  )
819
1207
 
820
- if not success:
821
- total_success = False
1208
+ # 将数据添加到对应分区缓冲区
1209
+ for partition_suffix, partition_data in partitioned_chunk.items():
1210
+ if partition_suffix not in partition_buffers:
1211
+ partition_buffers[partition_suffix] = []
1212
+ partition_buffers[partition_suffix].extend(partition_data)
1213
+
1214
+ # 更频繁地刷新缓冲区
1215
+ if len(partition_buffers[partition_suffix]) >= buffer_limit:
1216
+ partition_result = self._process_partition_buffer_optimized(
1217
+ db_name, base_table_name, partition_suffix,
1218
+ partition_buffers[partition_suffix], set_typ,
1219
+ allow_null, update_on_duplicate, unique_keys, created_tables
1220
+ )
1221
+ self._merge_partition_result(result, partition_result)
1222
+ partition_buffers[partition_suffix] = [] # 清空缓冲区
1223
+
1224
+ # 定期检查所有缓冲区,防止某些分区数据积累过多
1225
+ total_buffered = sum(len(buffer) for buffer in partition_buffers.values())
1226
+ if total_buffered > 5000: # 总缓冲超过5000条时强制刷新
1227
+ for partition_suffix in list(partition_buffers.keys()):
1228
+ if partition_buffers[partition_suffix]:
1229
+ partition_result = self._process_partition_buffer_optimized(
1230
+ db_name, base_table_name, partition_suffix,
1231
+ partition_buffers[partition_suffix], set_typ,
1232
+ allow_null, update_on_duplicate, unique_keys, created_tables
1233
+ )
1234
+ self._merge_partition_result(result, partition_result)
1235
+ partition_buffers[partition_suffix] = []
1236
+
1237
+ # 处理剩余的缓冲区数据
1238
+ for partition_suffix, buffer_data in partition_buffers.items():
1239
+ if buffer_data:
1240
+ partition_result = self._process_partition_buffer_optimized(
1241
+ db_name, base_table_name, partition_suffix,
1242
+ buffer_data, set_typ, allow_null, update_on_duplicate, unique_keys, created_tables
1243
+ )
1244
+ self._merge_partition_result(result, partition_result)
822
1245
 
823
1246
  logger.info('分表上传完成', {
824
1247
  '数据库': db_name,
825
1248
  '基础表名': base_table_name,
826
- '分区数': len(partitioned_data),
827
- '总体成功': total_success
1249
+ '分区数': len(created_tables),
1250
+ '插入': result['inserted_rows'],
1251
+ '跳过': result['skipped_rows'],
1252
+ '失败': result['failed_rows']
828
1253
  })
829
1254
 
830
- return total_success
1255
+ return result
1256
+
1257
+ def _process_partition_buffer_optimized(self, db_name: str, base_table_name: str,
1258
+ partition_suffix: str, partition_data: List[Dict],
1259
+ set_typ: Dict[str, str], allow_null: bool,
1260
+ update_on_duplicate: bool,
1261
+ unique_keys: Optional[List[List[str]]],
1262
+ created_tables: set) -> Dict[str, Any]:
1263
+ """处理单个分区的缓冲数据"""
1264
+ partition_table_name = f"{base_table_name}_{partition_suffix}"
1265
+
1266
+ result = {
1267
+ 'inserted_rows': 0,
1268
+ 'skipped_rows': 0,
1269
+ 'failed_rows': 0,
1270
+ 'tables_created': []
1271
+ }
1272
+
1273
+ # 优化表存在性检查
1274
+ table_key = f"{db_name}.{partition_table_name}"
1275
+ if table_key not in created_tables:
1276
+ if not self.table_mgr.table_exists(db_name, partition_table_name):
1277
+ self.table_mgr.create_table(db_name, partition_table_name, set_typ,
1278
+ unique_keys=unique_keys)
1279
+ result['tables_created'].append(table_key)
1280
+ else:
1281
+ # 表已存在,确保有时间戳列(但保持原有主键结构)
1282
+ self.table_mgr.ensure_system_columns(db_name, partition_table_name)
1283
+ created_tables.add(table_key)
1284
+
1285
+ # 准备并插入数据
1286
+ prepared_data = DataProcessor.prepare_data_for_insert(
1287
+ partition_data, set_typ, allow_null
1288
+ )
1289
+
1290
+ inserted, skipped, failed = self.data_inserter.insert_data(
1291
+ db_name, partition_table_name, prepared_data, set_typ, update_on_duplicate
1292
+ )
1293
+
1294
+ result['inserted_rows'] = inserted
1295
+ result['skipped_rows'] = skipped
1296
+ result['failed_rows'] = failed
1297
+
1298
+ return result
1299
+
1300
+ def _merge_partition_result(self, main_result: Dict[str, Any],
1301
+ partition_result: Dict[str, Any]):
1302
+ """合并分区处理结果"""
1303
+ main_result['inserted_rows'] += partition_result['inserted_rows']
1304
+ main_result['skipped_rows'] += partition_result['skipped_rows']
1305
+ main_result['failed_rows'] += partition_result['failed_rows']
1306
+ main_result['tables_created'].extend(partition_result['tables_created'])
831
1307
 
832
1308
  def close(self):
833
1309
  """关闭连接"""
@@ -846,6 +1322,165 @@ class MySQLUploader:
846
1322
  def __exit__(self, exc_type, exc_val, exc_tb):
847
1323
  self.close()
848
1324
 
1325
+ def upload_data_concurrent(self, db_name: str, table_name: str,
1326
+ data: Union[Dict, List[Dict], pd.DataFrame],
1327
+ set_typ: Optional[Dict[str, str]] = None,
1328
+ allow_null: bool = False,
1329
+ partition_by: Optional[str] = None,
1330
+ partition_date_column: str = '日期',
1331
+ update_on_duplicate: bool = False,
1332
+ unique_keys: Optional[List[List[str]]] = None,
1333
+ max_workers: int = 3) -> Dict[str, Any]:
1334
+ """
1335
+ 并发上传数据到MySQL数据库
1336
+
1337
+ :param max_workers: 最大并发工作线程数
1338
+ :return: 上传结果详情
1339
+ """
1340
+ db_name = db_name.lower()
1341
+ table_name = table_name.lower()
1342
+
1343
+ result = {
1344
+ 'success': False,
1345
+ 'inserted_rows': 0,
1346
+ 'skipped_rows': 0,
1347
+ 'failed_rows': 0,
1348
+ 'tables_created': []
1349
+ }
1350
+
1351
+ try:
1352
+ # 标准化数据为流式迭代器
1353
+ normalized_data = DataProcessor.normalize_data(data, chunk_size=2000) # 更小的chunk用于并发
1354
+
1355
+ # 推断或验证列类型
1356
+ if set_typ is None:
1357
+ first_chunk = next(iter(normalized_data))
1358
+ set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
1359
+ normalized_data = DataProcessor.normalize_data(data, chunk_size=2000)
1360
+ logger.debug('自动推断数据类型', {'类型映射': set_typ})
1361
+
1362
+ # 确保数据库存在
1363
+ self.table_mgr.ensure_database_exists(db_name)
1364
+
1365
+ # 创建线程锁用于表创建的线程安全
1366
+ table_creation_lock = threading.Lock()
1367
+ created_tables_set = set()
1368
+
1369
+ def process_chunk_worker(chunk_data):
1370
+ """工作线程函数"""
1371
+ try:
1372
+ if partition_by:
1373
+ # 分表处理
1374
+ partitioned_chunk = DataProcessor.partition_data_by_date(
1375
+ chunk_data, partition_date_column, partition_by
1376
+ )
1377
+
1378
+ chunk_result = {
1379
+ 'inserted_rows': 0,
1380
+ 'skipped_rows': 0,
1381
+ 'failed_rows': 0,
1382
+ 'tables_created': []
1383
+ }
1384
+
1385
+ for partition_suffix, partition_data in partitioned_chunk.items():
1386
+ partition_table_name = f"{table_name}_{partition_suffix}"
1387
+ table_key = f"{db_name}.{partition_table_name}"
1388
+
1389
+ # 确保表存在(线程安全)
1390
+ with table_creation_lock:
1391
+ if table_key not in created_tables_set:
1392
+ if not self.table_mgr.table_exists(db_name, partition_table_name):
1393
+ self.table_mgr.create_table(db_name, partition_table_name, set_typ,
1394
+ unique_keys=unique_keys)
1395
+ chunk_result['tables_created'].append(table_key)
1396
+ else:
1397
+ self.table_mgr.ensure_system_columns(db_name, partition_table_name)
1398
+ created_tables_set.add(table_key)
1399
+
1400
+ # 准备并插入数据
1401
+ prepared_data = DataProcessor.prepare_data_for_insert(
1402
+ partition_data, set_typ, allow_null
1403
+ )
1404
+
1405
+ inserted, skipped, failed = self.data_inserter.insert_data(
1406
+ db_name, partition_table_name, prepared_data, set_typ, update_on_duplicate
1407
+ )
1408
+
1409
+ chunk_result['inserted_rows'] += inserted
1410
+ chunk_result['skipped_rows'] += skipped
1411
+ chunk_result['failed_rows'] += failed
1412
+ else:
1413
+ # 单表处理
1414
+ table_key = f"{db_name}.{table_name}"
1415
+ with table_creation_lock:
1416
+ if table_key not in created_tables_set:
1417
+ if not self.table_mgr.table_exists(db_name, table_name):
1418
+ self.table_mgr.create_table(db_name, table_name, set_typ,
1419
+ unique_keys=unique_keys)
1420
+ chunk_result = {'tables_created': [table_key]}
1421
+ else:
1422
+ self.table_mgr.ensure_system_columns(db_name, table_name)
1423
+ chunk_result = {'tables_created': []}
1424
+ created_tables_set.add(table_key)
1425
+ else:
1426
+ chunk_result = {'tables_created': []}
1427
+
1428
+ prepared_chunk = DataProcessor.prepare_data_for_insert(
1429
+ chunk_data, set_typ, allow_null
1430
+ )
1431
+
1432
+ inserted, skipped, failed = self.data_inserter.insert_data(
1433
+ db_name, table_name, prepared_chunk, set_typ, update_on_duplicate
1434
+ )
1435
+
1436
+ chunk_result.update({
1437
+ 'inserted_rows': inserted,
1438
+ 'skipped_rows': skipped,
1439
+ 'failed_rows': failed
1440
+ })
1441
+
1442
+ return chunk_result
1443
+
1444
+ except Exception as e:
1445
+ logger.error('并发处理chunk失败', {'错误': str(e)})
1446
+ return {
1447
+ 'inserted_rows': 0,
1448
+ 'skipped_rows': 0,
1449
+ 'failed_rows': len(chunk_data) if chunk_data else 0,
1450
+ 'tables_created': []
1451
+ }
1452
+
1453
+ # 使用线程池执行并发处理
1454
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
1455
+ # 提交所有任务
1456
+ future_to_chunk = {}
1457
+ for chunk in normalized_data:
1458
+ if chunk:
1459
+ future = executor.submit(process_chunk_worker, chunk)
1460
+ future_to_chunk[future] = len(chunk)
1461
+
1462
+ # 收集结果
1463
+ for future in concurrent.futures.as_completed(future_to_chunk):
1464
+ chunk_result = future.result()
1465
+ result['inserted_rows'] += chunk_result['inserted_rows']
1466
+ result['skipped_rows'] += chunk_result['skipped_rows']
1467
+ result['failed_rows'] += chunk_result['failed_rows']
1468
+ result['tables_created'].extend(chunk_result['tables_created'])
1469
+
1470
+ # 去重tables_created
1471
+ result['tables_created'] = list(set(result['tables_created']))
1472
+ result['success'] = result['failed_rows'] == 0
1473
+
1474
+ except Exception as e:
1475
+ logger.error('并发数据上传失败', {
1476
+ '数据库': db_name,
1477
+ '表名': table_name,
1478
+ '错误': str(e)
1479
+ })
1480
+ result['success'] = False
1481
+
1482
+ return result
1483
+
849
1484
 
850
1485
  # 使用示例
851
1486
  if __name__ == '__main__':
@@ -863,24 +1498,14 @@ if __name__ == '__main__':
863
1498
  {'name': 'Bob', 'age': 30, 'salary': 60000.0, '日期': '2023-01-02'},
864
1499
  ]
865
1500
 
866
- # 定义列类型(系统会自动添加id、create_at、update_at列)
867
- column_types = {
868
- 'name': 'VARCHAR(255)',
869
- 'age': 'INT',
870
- 'salary': 'DECIMAL(10,2)',
871
- '日期': 'DATE'
872
- }
873
-
874
- # 上传数据
875
- success = uploader.upload_data(
1501
+ # 上传数据(自动推断类型,流式处理)
1502
+ result = uploader.upload_data(
876
1503
  db_name='test_db',
877
1504
  table_name='test_table',
878
1505
  data=sample_data,
879
- set_typ=column_types,
880
- allow_null=False,
881
1506
  update_on_duplicate=True,
882
1507
  unique_keys=[['name', '日期']]
883
1508
  )
884
1509
 
885
1510
  uploader.close()
886
- print(f"上传结果: {success}")
1511
+ print(f"上传结果: {result}")
@@ -820,18 +820,11 @@ def main(service_name, database):
820
820
  db_name='属性设置2',
821
821
  table_name='天猫商品sku信息',
822
822
  data=s.df,
823
- set_typ={}, # 定义列和数据类型
824
- primary_keys=[], # 创建唯一主键
825
- check_duplicate=False, # 检查重复数据
826
- update_on_duplicate=False, # 遇到重复时更新数据,默认 False 跳过
827
- duplicate_columns=[], # 指定排重的组合键
828
- allow_null=False, # 允许插入空值
829
- partition_by=None, # 按年/月分表
830
- partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
831
- auto_create=True, # 表不存在时自动创建, 默认参数不要更改
832
- indexes=[], # 指定索引列
833
- transaction_mode='row', # 事务模式
834
- unique_keys=[[]], # 唯一约束列表
823
+ set_typ=None,
824
+ allow_null=False,
825
+ partition_by=None,
826
+ update_on_duplicate=True,
827
+ unique_keys=None,
835
828
  )
836
829
 
837
830
 
@@ -907,18 +900,11 @@ def download_sku(service_name='company', database='mysql', db_name='属性设置
907
900
  db_name=table_name,
908
901
  table_name=table_name,
909
902
  data=s.df,
910
- set_typ={}, # 定义列和数据类型
911
- primary_keys=[], # 创建唯一主键
912
- check_duplicate=False, # 检查重复数据
913
- update_on_duplicate=False, # 遇到重复时更新数据,默认 False 跳过
914
- duplicate_columns=[], # 指定排重的组合键
915
- allow_null=False, # 允许插入空值
916
- partition_by=None, # 按年/月分表
917
- partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
918
- auto_create=True, # 表不存在时自动创建, 默认参数不要更改
919
- indexes=[], # 指定索引列
920
- transaction_mode='row', # 事务模式
921
- unique_keys=[[]], # 唯一约束列表
903
+ set_typ=None,
904
+ allow_null=False,
905
+ partition_by=None,
906
+ update_on_duplicate=True,
907
+ unique_keys=None,
922
908
  )
923
909
 
924
910
  # 从数据库中读取数据,并下载素材到本地
@@ -954,18 +940,11 @@ def download_sku(service_name='company', database='mysql', db_name='属性设置
954
940
  db_name=db_name,
955
941
  table_name=table_name,
956
942
  data=df,
957
- set_typ={}, # 定义列和数据类型
958
- primary_keys=[], # 创建唯一主键
959
- check_duplicate=False, # 检查重复数据
960
- update_on_duplicate=False, # 遇到重复时更新数据,默认 False 跳过
961
- duplicate_columns=[], # 指定排重的组合键
962
- allow_null=False, # 允许插入空值
963
- partition_by=None, # 按年/月分表
964
- partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
965
- auto_create=True, # 表不存在时自动创建, 默认参数不要更改
966
- indexes=[], # 指定索引列
967
- transaction_mode='row', # 事务模式
968
- unique_keys=[[]], # 唯一约束列表
943
+ set_typ=None,
944
+ allow_null=False,
945
+ partition_by=None,
946
+ update_on_duplicate=True,
947
+ unique_keys=None,
969
948
  )
970
949
 
971
950
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.2.0
3
+ Version: 4.2.2
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=ZxnKNunFTlHOzO3MsfFUVCuATEizUGyGn-xxae0gXSI,17
2
+ mdbq/__version__.py,sha256=gNDA6f7PmXcbqB0lTY4HIgD6dEB6SGywjhHa3HAyczA,17
3
3
  mdbq/auth/__init__.py,sha256=pnPMAt63sh1B6kEvmutUuro46zVf2v2YDAG7q-jV_To,24
4
4
  mdbq/auth/auth_backend.py,sha256=iLN7AqiSq7fQgFtNtge_TIlVOR1hrCSZXH6oId6uGX4,116924
5
5
  mdbq/auth/crypto.py,sha256=fcZRFCnrKVVdWDUx_zds51ynFYwS9DBvJOrRQVldrfM,15931
@@ -15,9 +15,9 @@ mdbq/mysql/deduplicator.py,sha256=tzLIm9K9S0lGLlVTI0dDQVYpWX796XCuyufmw1lU26Y,73
15
15
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
16
16
  mdbq/mysql/s_query.py,sha256=N2xHJf2CiUXjXIVBemdst-wamIP3908EGAJOFG13fCU,50475
17
17
  mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
18
- mdbq/mysql/uploader.py,sha256=BFJnrXvQYU7soZMr-vm3zChhqmw960eePOy8QqebRDo,32678
18
+ mdbq/mysql/uploader.py,sha256=2inrXu3PIlvowfm5_0U4Trx_mraApjII8g_5ycFbNJ0,60059
19
19
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
20
- mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
20
+ mdbq/other/download_sku_picture.py,sha256=MJX47I9jTUMFzO1kyEH-onIzAGa6QpgfmghrmyYnEsc,45111
21
21
  mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
22
22
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
23
23
  mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
@@ -35,7 +35,7 @@ mdbq/route/routes.py,sha256=QVGfTvDgu0CpcKCvk1ra74H8uojgqTLUav1fnVAqLEA,29433
35
35
  mdbq/selenium/__init__.py,sha256=AKzeEceqZyvqn2dEDoJSzDQnbuENkJSHAlbHAD0u0ZI,10
36
36
  mdbq/selenium/get_driver.py,sha256=1NTlVUE6QsyjTrVVVqTO2LOnYf578ccFWlWnvIXGtic,20903
37
37
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
38
- mdbq-4.2.0.dist-info/METADATA,sha256=vyR4bK1uDkWIH2Y1t-0cmuRB8wWcw59xiAvv_EZw5_I,363
39
- mdbq-4.2.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
40
- mdbq-4.2.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
- mdbq-4.2.0.dist-info/RECORD,,
38
+ mdbq-4.2.2.dist-info/METADATA,sha256=vfhvk7DXQ267-NOPdqKJ_AWCWSEbWKdDjIf7bilbCXo,363
39
+ mdbq-4.2.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
40
+ mdbq-4.2.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
+ mdbq-4.2.2.dist-info/RECORD,,
File without changes