mdbq 3.9.7__py3-none-any.whl → 3.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.9.7'
1
+ VERSION = '3.9.9'
mdbq/log/mylogger.py CHANGED
@@ -9,6 +9,8 @@ import threading
9
9
  import queue
10
10
  from typing import Optional, Dict, Any, List, Callable, Union
11
11
  import atexit
12
+ import traceback
13
+ import inspect
12
14
 
13
15
  try:
14
16
  import psutil
@@ -123,6 +125,7 @@ class MyLogger:
123
125
  # 定时刷新相关
124
126
  self._flush_thread = None
125
127
  self._last_flush_time = 0
128
+ self._start_flush_thread()
126
129
 
127
130
  # 创建日志记录器
128
131
  self.logger = logging.getLogger(name)
@@ -219,12 +222,9 @@ class MyLogger:
219
222
  'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
220
223
  'level': record.levelname,
221
224
  'message': record.getMessage(),
222
- # 'module': record.module,
223
- 'function': record.funcName,
224
- # 'line': record.lineno,
225
- # 'thread': record.threadName,
226
- # 'process': record.processName,
227
225
  'name': record.name,
226
+ # 'module': record.module,
227
+ # 'function': record.funcName,
228
228
  }
229
229
 
230
230
  # 添加额外字段
@@ -347,6 +347,45 @@ class MyLogger:
347
347
  )
348
348
  self._async_thread.start()
349
349
 
350
+ def log_error_handler(retry_times=0, fallback_level='error'):
351
+ """
352
+ 日志错误处理装饰器
353
+
354
+ 参数:
355
+ - retry_times: 异常时重试次数
356
+ - fallback_level: 降级日志级别
357
+ """
358
+
359
+ def decorator(log_method):
360
+ def wrapper(self, level: str, message: str, extra: Optional[Dict] = None):
361
+ last_exception = None
362
+ for attempt in range(retry_times + 1):
363
+ try:
364
+ return log_method(self, level, message, extra)
365
+ except Exception as e:
366
+ last_exception = e
367
+ if attempt < retry_times:
368
+ time.sleep(0.1 * (attempt + 1)) # 简单的退避策略
369
+ continue
370
+
371
+ try:
372
+ # 降级处理
373
+ logging.basicConfig()
374
+ fallback_logger = logging.getLogger(f"{getattr(self, 'name', 'mylogger')}_fallback")
375
+ fallback_msg = f"[降级处理] {message}"[:1000]
376
+ getattr(fallback_logger, fallback_level)(
377
+ f"日志记录失败(尝试{attempt + 1}次): {e}\n原始消息: {fallback_msg}"
378
+ )
379
+ except:
380
+ sys.stderr.write(f"严重: 日志系统完全失败 - {last_exception}\n")
381
+
382
+ return None
383
+
384
+ return wrapper
385
+
386
+ return decorator
387
+
388
+ @log_error_handler(retry_times=1, fallback_level='warning')
350
389
  def _sync_log(self, level: str, message: str, extra: Optional[Dict] = None):
351
390
  """同步日志记录"""
352
391
  if not hasattr(self.logger, level.lower()):
@@ -375,7 +414,8 @@ class MyLogger:
375
414
  log_extra['context_data'] = self._context.data.copy()
376
415
 
377
416
  # 添加敏感字段过滤
378
- log_extra['过滤'] = self.sensitive_fields
417
+ if self.sensitive_fields:
418
+ log_extra['过滤'] = self.sensitive_fields
379
419
 
380
420
  # 应用日志采样
381
421
  if self.sample_rate < 1.0 and level.lower() in ('debug', 'info'):
@@ -441,28 +481,6 @@ class MyLogger:
441
481
  if hasattr(self._context, 'data'):
442
482
  self._context.data.clear()
443
483
 
444
- def shutdown(self):
445
- """关闭日志记录器,确保所有日志被刷新"""
446
- if self.enable_async:
447
- self._stop_event.set()
448
- # 等待队列清空
449
- while not self._log_queue.empty():
450
- time.sleep(0.1)
451
- if self._async_thread and self._async_thread.is_alive():
452
- self._async_thread.join(timeout=2)
453
- if self._flush_thread and self._flush_thread.is_alive():
454
- self._flush_thread.join(timeout=2)
455
-
456
- # 确保所有handler被刷新
457
- self._flush_handlers()
458
-
459
- # 关闭所有handler
460
- for handler in self.logger.handlers:
461
- try:
462
- handler.close()
463
- except:
464
- pass
465
-
466
484
  def debug(self, message: str, extra: Optional[Dict] = None):
467
485
  """记录调试信息"""
468
486
  self.log('debug', message, extra)
@@ -487,41 +505,46 @@ class MyLogger:
487
505
  """记录异常信息"""
488
506
  if not extra:
489
507
  extra = {}
490
- # # 获取异常发生的实际位置
508
+
509
+ # # 获取完整的异常堆栈
491
510
  # tb = exc_info.__traceback__
511
+ # while tb.tb_next:
512
+ # tb = tb.tb_next # 获取最内层的堆栈帧
492
513
  #
493
- # if tb:
494
- # extra.update({
495
- # 'module': tb.tb_frame.f_globals.get('__name__', ''),
496
- # 'function': tb.tb_frame.f_code.co_name,
497
- # 'line': tb.tb_lineno,
498
- # 'file': tb.tb_frame.f_code.co_filename
499
- # })
500
- # extra['异常'] = str(exc_info)
501
- # extra['类型'] = exc_info.__class__.__name__
502
- # self.log('error', message, extra)
503
-
504
- # 获取完整的异常堆栈
505
- tb = exc_info.__traceback__
506
- while tb.tb_next:
507
- tb = tb.tb_next # 获取最内层的堆栈帧
508
-
509
- extra.update({
510
- 'module': tb.tb_frame.f_globals.get('__name__', ''),
511
- 'function': tb.tb_frame.f_code.co_name,
512
- 'line': tb.tb_lineno,
513
- 'file': tb.tb_frame.f_code.co_filename,
514
- '异常': str(exc_info),
515
- '类型': exc_info.__class__.__name__,
516
- '堆栈': self._format_traceback(exc_info)
517
- })
514
+ # extra.update({
515
+ # 'module': tb.tb_frame.f_globals.get('__name__', ''),
516
+ # 'function': tb.tb_frame.f_code.co_name,
517
+ # 'line': tb.tb_lineno,
518
+ # 'file': tb.tb_frame.f_code.co_filename,
519
+ # '异常': str(exc_info),
520
+ # '类型': exc_info.__class__.__name__,
521
+ # '堆栈': self._format_traceback(exc_info)
522
+ # })
523
+
524
+ # 使用inspect获取调用栈
525
+ frame = inspect.currentframe()
526
+ try:
527
+ # 向上追溯2层(1层是exception方法本身,2层是实际调用位置)
528
+ caller_frame = frame.f_back.f_back
529
+ extra.update({
530
+ 'module': caller_frame.f_globals.get('__name__', ''),
531
+ 'function': caller_frame.f_code.co_name,
532
+ 'line': caller_frame.f_lineno,
533
+ 'file': caller_frame.f_code.co_filename,
534
+ '异常': str(exc_info),
535
+ '类型': exc_info.__class__.__name__,
536
+ '堆栈': self._format_traceback(exc_info)
537
+ })
538
+ finally:
539
+ del frame # 避免循环引用
518
540
 
519
541
  # 直接使用logger的error方法记录,保留原始调用栈
520
542
  self.log('error', message, extra)
521
543
 
522
544
  def _format_traceback(self, exc_info):
523
545
  """格式化异常堆栈"""
524
- import traceback
546
+ if exc_info is None:
547
+ return ""
525
548
  return ''.join(traceback.format_exception(type(exc_info), exc_info, exc_info.__traceback__))
526
549
 
527
550
  def timeit(self, message: str = "Execution time"):
@@ -579,17 +602,40 @@ class MyLogger:
579
602
  except:
580
603
  pass
581
604
 
605
+ def shutdown(self):
606
+ """关闭日志记录器,确保所有日志被刷新"""
607
+ if self.enable_async:
608
+ self._stop_event.set()
609
+ # 等待队列清空
610
+ while not self._log_queue.empty():
611
+ time.sleep(0.1)
612
+ if self._async_thread and self._async_thread.is_alive():
613
+ self._async_thread.join(timeout=0.5)
614
+
615
+ # 确保所有handler被刷新
616
+ if self._flush_thread:
617
+ self._flush_handlers()
618
+ if self._flush_thread.is_alive():
619
+ self._flush_thread.join(timeout=0.5)
620
+
621
+ # 关闭所有handler
622
+ for handler in self.logger.handlers:
623
+ try:
624
+ handler.close()
625
+ except:
626
+ pass
627
+
582
628
  def main():
583
629
  # 创建日志记录器
584
630
  logger = MyLogger(
585
631
  name='my_app',
586
632
  logging_mode='both',
587
633
  log_level='DEBUG',
588
- log_file='/Users/xigua/Downloads/my_app.log',
634
+ log_file='my_app.log',
589
635
  log_format='json',
590
636
  max_log_size=50,
591
637
  backup_count=5,
592
- enable_async=True, # 是否启用异步日志
638
+ enable_async=False, # 是否启用异步日志
593
639
  sample_rate=1, # 采样50%的DEBUG/INFO日志
594
640
  sensitive_fields=[], # 敏感字段列表
595
641
  enable_metrics=False, # 是否启用性能指标
@@ -603,4 +649,3 @@ def main():
603
649
 
604
650
  if __name__ == '__main__':
605
651
  pass
606
- main()
@@ -17,14 +17,14 @@ from collections import defaultdict
17
17
  warnings.filterwarnings('ignore')
18
18
  logger = mylogger.MyLogger(
19
19
  name='deduplicator',
20
- logging_mode='none',
21
- log_level='error',
20
+ logging_mode='both',
21
+ log_level='info',
22
22
  log_file='deduplicator.log',
23
23
  log_format='json',
24
24
  max_log_size=50,
25
25
  backup_count=5,
26
26
  enable_async=False, # 是否启用异步日志
27
- sample_rate=0.5, # 采样50%的DEBUG/INFO日志
27
+ sample_rate=1, # 采样50%的DEBUG/INFO日志
28
28
  sensitive_fields=[], # 敏感字段列表
29
29
  )
30
30
 
@@ -89,6 +89,9 @@ class MySQLDeduplicator:
89
89
  :param retry_interval: 重试间隔(秒)
90
90
  :param pool_size: 连接池大小
91
91
  """
92
+ # 连接池状态标志
93
+ self._closed = False
94
+
92
95
  # 初始化连接池
93
96
  self.pool = PooledDB(
94
97
  creator=pymysql,
@@ -113,13 +116,12 @@ class MySQLDeduplicator:
113
116
  self._processing_tables = set() # 正在处理的表集合
114
117
 
115
118
  # 系统数据库列表
116
- self.SYSTEM_DATABASES = {
117
- 'information_schema', 'mysql',
118
- 'performance_schema', 'sys'
119
- }
119
+ self.SYSTEM_DATABASES = {'information_schema', 'mysql', 'performance_schema', 'sys'}
120
120
 
121
121
  def _get_connection(self):
122
122
  """从连接池获取连接"""
123
+ if self._closed:
124
+ raise ConnectionError("连接池已关闭")
123
125
  try:
124
126
  conn = self.pool.connection()
125
127
  logger.debug("成功获取数据库连接")
@@ -263,7 +265,9 @@ class MySQLDeduplicator:
263
265
 
264
266
  # 构建去重SQL
265
267
  column_list = ', '.join([f'`{col}`' for col in use_columns])
266
- temp_table = f"temp_{table}_{int(time.time())}"
268
+ # temp_table = f"temp_{table}_{int(time.time())}"
269
+ temp_table = f"temp_{table}_dedup_{os.getpid()}" # 使用进程ID构建临时表
270
+ temp_table = re.sub(r'[^a-zA-Z0-9_]', '_', temp_table) # 确保表名合法
267
271
 
268
272
  # 使用临时表方案处理去重,避免锁表问题
269
273
  create_temp_sql = f"""
@@ -556,13 +560,12 @@ class MySQLDeduplicator:
556
560
  def close(self):
557
561
  """关闭连接池"""
558
562
  try:
559
- if hasattr(self, 'pool') and self.pool:
563
+ if hasattr(self, 'pool') and self.pool and not self._closed:
560
564
  self.pool.close()
565
+ self._closed = True
561
566
  logger.info("数据库连接池已关闭")
562
567
  except Exception as e:
563
568
  logger.error(f"关闭连接池时出错: {str(e)}", {'error_type': type(e).__name__})
564
- finally:
565
- self.pool = None
566
569
 
567
570
  def __enter__(self):
568
571
  return self
@@ -574,7 +577,7 @@ class MySQLDeduplicator:
574
577
  def main():
575
578
  deduplicator = MySQLDeduplicator(
576
579
  username='root',
577
- password='pw',
580
+ password='188988yang188',
578
581
  host='localhost',
579
582
  port=3306
580
583
  )
mdbq/mysql/uploader.py CHANGED
@@ -17,20 +17,26 @@ from collections import OrderedDict
17
17
  warnings.filterwarnings('ignore')
18
18
  logger = mylogger.MyLogger(
19
19
  name='uploader',
20
- logging_mode='none',
21
- log_level='error',
20
+ logging_mode='both',
21
+ log_level='info',
22
22
  log_file='uploader.log',
23
23
  log_format='json',
24
24
  max_log_size=50,
25
25
  backup_count=5,
26
26
  enable_async=False, # 是否启用异步日志
27
- sample_rate=0.5, # 采样50%的DEBUG/INFO日志
27
+ sample_rate=1, # 采样50%的DEBUG/INFO日志
28
28
  sensitive_fields=[], # 敏感字段列表
29
29
  )
30
30
 
31
31
 
32
32
  def count_decimal_places(num_str):
33
- """ 计算小数位数, 允许科学计数法 """
33
+ """
34
+ 计算数字字符串的小数位数,支持科学计数法
35
+
36
+ :param num_str: 数字字符串
37
+ :return: 返回元组(整数位数, 小数位数)
38
+ :raises: 无显式抛出异常,但正则匹配失败时返回(0, 0)
39
+ """
34
40
  match = re.match(r'^[-+]?\d+(\.\d+)?([eE][-+]?\d+)?$', str(num_str))
35
41
  if match:
36
42
  # 如果是科学计数法
@@ -53,8 +59,13 @@ def count_decimal_places(num_str):
53
59
 
54
60
 
55
61
  class StatementCache(OrderedDict):
56
- """LRU缓存策略"""
62
+ """基于OrderedDict实现的LRU缓存策略,用于缓存SQL语句"""
57
63
  def __init__(self, maxsize=100):
64
+ """
65
+ 初始化缓存
66
+
67
+ :param maxsize: 最大缓存大小,默认为100
68
+ """
58
69
  super().__init__()
59
70
  self.maxsize = maxsize
60
71
 
@@ -119,7 +130,12 @@ class MySQLUploader:
119
130
  self.pool = self._create_connection_pool()
120
131
 
121
132
  def _create_connection_pool(self) -> PooledDB:
122
- """创建数据库连接池"""
133
+ """
134
+ 创建数据库连接池
135
+
136
+ :return: PooledDB连接池实例
137
+ :raises ConnectionError: 当连接池创建失败时抛出
138
+ """
123
139
  if hasattr(self, 'pool') and self.pool is not None and self._check_pool_health():
124
140
  return self.pool
125
141
 
@@ -157,22 +173,29 @@ class MySQLUploader:
157
173
 
158
174
  try:
159
175
  pool = PooledDB(**pool_params)
160
- elapsed = time.time() - start_time
176
+ elapsed = round(time.time() - start_time, 2)
161
177
  logger.info("连接池创建成功", {
162
178
  'pool_size': self.pool_size,
163
- 'time_elapsed': elapsed
179
+ '耗时': elapsed
164
180
  })
165
181
  return pool
166
182
  except Exception as e:
167
- elapsed = time.time() - start_time
183
+ elapsed = round(time.time() - start_time, 2)
168
184
  self.pool = None
169
185
  logger.error("连接池创建失败", {
170
186
  'error': str(e),
171
- 'time_elapsed': elapsed
187
+ '耗时': elapsed
172
188
  })
173
189
  raise ConnectionError(f"连接池创建失败: {str(e)}")
174
190
 
175
191
  def _execute_with_retry(self, func):
192
+ """
193
+ 带重试机制的装饰器,用于数据库操作
194
+
195
+ :param func: 被装饰的函数
196
+ :return: 装饰后的函数
197
+ :raises: 可能抛出原始异常或最后一次重试的异常
198
+ """
176
199
  @wraps(func)
177
200
  def wrapper(*args, **kwargs):
178
201
  last_exception = None
@@ -187,18 +210,18 @@ class MySQLUploader:
187
210
  for attempt in range(self.max_retries):
188
211
  try:
189
212
  result = func(*args, **kwargs)
190
- elapsed = time.time() - start_time
213
+ elapsed = round(time.time() - start_time, 2)
191
214
 
192
215
  if attempt > 0:
193
216
  logger.info("操作成功(重试后)", {
194
217
  'operation': operation,
195
218
  'attempts': attempt + 1,
196
- 'time_elapsed': elapsed
219
+ '耗时': elapsed
197
220
  })
198
221
  else:
199
222
  logger.debug("操作成功", {
200
223
  'operation': operation,
201
- 'time_elapsed': elapsed
224
+ '耗时': elapsed
202
225
  })
203
226
 
204
227
  return result
@@ -230,15 +253,15 @@ class MySQLUploader:
230
253
  'error': str(reconnect_error)
231
254
  })
232
255
  else:
233
- elapsed = time.time() - start_time
234
- error_details['time_elapsed'] = elapsed
256
+ elapsed = round(time.time() - start_time, 2)
257
+ error_details['耗时'] = elapsed
235
258
  logger.error(f"操作最终失败 {error_details}")
236
259
 
237
260
  except pymysql.IntegrityError as e:
238
- elapsed = time.time() - start_time
261
+ elapsed = round(time.time() - start_time, 2)
239
262
  logger.error("完整性约束错误", {
240
263
  'operation': operation,
241
- 'time_elapsed': elapsed,
264
+ '耗时': elapsed,
242
265
  'error_code': e.args[0] if e.args else None,
243
266
  'error_message': e.args[1] if len(e.args) > 1 else None
244
267
  })
@@ -246,10 +269,10 @@ class MySQLUploader:
246
269
 
247
270
  except Exception as e:
248
271
  last_exception = e
249
- elapsed = time.time() - start_time
272
+ elapsed = round(time.time() - start_time, 2)
250
273
  logger.error("发生意外错误", {
251
274
  'operation': operation,
252
- 'time_elapsed': elapsed,
275
+ '耗时': elapsed,
253
276
  'error_type': type(e).__name__,
254
277
  'error_message': str(e),
255
278
  'error_args': e.args if hasattr(e, 'args') else None
@@ -261,7 +284,12 @@ class MySQLUploader:
261
284
  return wrapper
262
285
 
263
286
  def _get_connection(self):
264
- """从连接池获取连接"""
287
+ """
288
+ 从连接池获取数据库连接
289
+
290
+ :return: 数据库连接对象
291
+ :raises ConnectionError: 当获取连接失败时抛出
292
+ """
265
293
  try:
266
294
  conn = self.pool.connection()
267
295
  logger.debug("获取数据库连接")
@@ -271,7 +299,13 @@ class MySQLUploader:
271
299
  raise ConnectionError(f"连接数据库失败: {str(e)}")
272
300
 
273
301
  def _check_database_exists(self, db_name: str) -> bool:
274
- """检查数据库是否存在"""
302
+ """
303
+ 检查数据库是否存在
304
+
305
+ :param db_name: 数据库名称
306
+ :return: 存在返回True,否则返回False
307
+ :raises: 可能抛出数据库相关异常
308
+ """
275
309
  db_name = self._validate_identifier(db_name)
276
310
  sql = "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = %s"
277
311
 
@@ -287,7 +321,12 @@ class MySQLUploader:
287
321
  raise
288
322
 
289
323
  def _create_database(self, db_name: str):
290
- """创建数据库"""
324
+ """
325
+ 创建数据库
326
+
327
+ :param db_name: 要创建的数据库名称
328
+ :raises: 可能抛出数据库相关异常
329
+ """
291
330
  db_name = self._validate_identifier(db_name)
292
331
  sql = f"CREATE DATABASE IF NOT EXISTS `{db_name}` CHARACTER SET {self.charset} COLLATE {self.collation}"
293
332
 
@@ -329,18 +368,17 @@ class MySQLUploader:
329
368
  elif partition_by == 'month':
330
369
  return f"{table_name}_{date_obj.year}_{date_obj.month:02d}"
331
370
  else:
332
- error_msg = "partition_by must be 'year' or 'month'"
371
+ error_msg = "分表方式必须是 'year' 'month'"
333
372
  logger.error(error_msg)
334
373
  raise ValueError(error_msg)
335
374
 
336
375
  def _validate_identifier(self, identifier: str) -> str:
337
376
  """
338
- 验证并清理数据库标识符(数据库名、表名、列名)
339
- 防止SQL注入和非法字符
377
+ 验证并清理数据库标识符(表名、列名等)
340
378
 
341
379
  :param identifier: 要验证的标识符
342
380
  :return: 清理后的安全标识符
343
- :raises ValueError: 如果标识符无效
381
+ :raises ValueError: 当标识符无效时抛出
344
382
  """
345
383
  if not identifier or not isinstance(identifier, str):
346
384
  error_msg = f"无效的标识符: {identifier}"
@@ -366,7 +404,14 @@ class MySQLUploader:
366
404
  return cleaned
367
405
 
368
406
  def _check_table_exists(self, db_name: str, table_name: str) -> bool:
369
- """检查表是否存在"""
407
+ """
408
+ 检查表是否存在
409
+
410
+ :param db_name: 数据库名
411
+ :param table_name: 表名
412
+ :return: 存在返回True,否则返回False
413
+ :raises: 可能抛出数据库相关异常
414
+ """
370
415
  cache_key = f"{db_name}.{table_name}"
371
416
  if cache_key in self._table_metadata_cache:
372
417
  cached_time, result = self._table_metadata_cache[cache_key]
@@ -410,9 +455,11 @@ class MySQLUploader:
410
455
  :param db_name: 数据库名
411
456
  :param table_name: 表名
412
457
  :param set_typ: 列名和数据类型字典 {列名: 数据类型}
413
- :param primary_keys: 主键列列表
414
- :param date_column: 日期列名,如果存在将设置为索引
415
- :param indexes: 需要创建索引的列列表
458
+ :param primary_keys: 主键列列表,可选
459
+ :param date_column: 日期列名,可选,如果存在将设置为索引
460
+ :param indexes: 需要创建索引的列列表,可选
461
+ :param allow_null: 是否允许空值,默认为False
462
+ :raises: 可能抛出数据库相关异常
416
463
  """
417
464
  db_name = self._validate_identifier(db_name)
418
465
  table_name = self._validate_identifier(table_name)
@@ -501,7 +548,14 @@ class MySQLUploader:
501
548
  raise
502
549
 
503
550
  def _validate_datetime(self, value, date_type=False):
504
- """date_type: 返回字符串类型或者日期类型"""
551
+ """
552
+ 验证并标准化日期时间格式
553
+
554
+ :param value: 日期时间值
555
+ :param date_type: 是否返回日期类型(True)或字符串(False)
556
+ :return: 标准化后的日期时间字符串或日期对象
557
+ :raises ValueError: 当日期格式无效时抛出
558
+ """
505
559
  formats = [
506
560
  '%Y-%m-%d %H:%M:%S',
507
561
  '%Y-%m-%d',
@@ -524,26 +578,42 @@ class MySQLUploader:
524
578
  continue
525
579
  raise ValueError(f"无效的日期格式2: {value}")
526
580
 
527
- def _validate_value(self, value: Any, column_type: str) -> Any:
581
+ def _validate_value(self, value: Any, column_type: str, allow_null: bool) -> Any:
528
582
  """
529
- 验证并清理数据值,根据列类型进行适当转换
583
+ 根据列类型验证并转换数据值
530
584
 
531
585
  :param value: 要验证的值
532
586
  :param column_type: 列的数据类型
533
- :return: 清理后的值
534
- :raises ValueError: 如果值转换失败
587
+ :param allow_null: 是否允许空值
588
+ :return: 转换后的值
589
+ :raises ValueError: 当值转换失败时抛出
535
590
  """
536
591
  if value is None:
592
+ if not allow_null:
593
+ return 'none'
537
594
  return None
538
595
 
539
596
  try:
540
597
  column_type_lower = column_type.lower()
541
598
 
599
+ # 处理百分比值
600
+ if isinstance(value, str) and '%' in value:
601
+ try:
602
+ # 移除百分号并转换为小数
603
+ percent_value = float(value.strip().replace('%', ''))
604
+ decimal_value = percent_value / 100
605
+ return decimal_value
606
+ except ValueError:
607
+ pass # 如果不是有效的百分比数字,继续正常处理
608
+
542
609
  if 'int' in column_type_lower:
543
610
  if isinstance(value, (str, bytes)) and not value.strip().isdigit():
544
611
  raise ValueError("非数字字符串无法转换为整数")
545
612
  return int(value)
546
613
  elif any(t in column_type_lower for t in ['float', 'double', 'decimal']):
614
+ if isinstance(value, str):
615
+ # 处理可能包含逗号的数字字符串
616
+ value = value.replace(',', '')
547
617
  return float(value) if value is not None else None
548
618
  elif '日期' in column_type_lower or 'time' in column_type_lower:
549
619
  if isinstance(value, (datetime.datetime, pd.Timestamp)):
@@ -570,7 +640,14 @@ class MySQLUploader:
570
640
  raise ValueError(error_msg)
571
641
 
572
642
  def _get_table_columns(self, db_name: str, table_name: str) -> Dict[str, str]:
573
- """获取表的列名和数据类型"""
643
+ """
644
+ 获取表的列名和数据类型
645
+
646
+ :param db_name: 数据库名
647
+ :param table_name: 表名
648
+ :return: 列名和数据类型字典 {列名: 数据类型}
649
+ :raises: 可能抛出数据库相关异常
650
+ """
574
651
  db_name = self._validate_identifier(db_name)
575
652
  table_name = self._validate_identifier(table_name)
576
653
  sql = """
@@ -639,7 +716,7 @@ class MySQLUploader:
639
716
 
640
717
  def _infer_data_type(self, value: Any) -> str:
641
718
  """
642
- 根据值推断合适的数据类型
719
+ 根据值推断合适的MySQL数据类型
643
720
 
644
721
  :param value: 要推断的值
645
722
  :return: MySQL数据类型字符串
@@ -647,6 +724,10 @@ class MySQLUploader:
647
724
  if value is None:
648
725
  return 'VARCHAR(255)' # 默认字符串类型
649
726
 
727
+ # 检查是否是百分比字符串
728
+ if isinstance(value, str) and '%' in value:
729
+ return 'DECIMAL(10,4)' # 百分比统一使用DECIMAL(10,4)
730
+
650
731
  if isinstance(value, bool):
651
732
  return 'TINYINT(1)'
652
733
  elif isinstance(value, int):
@@ -661,7 +742,10 @@ class MySQLUploader:
661
742
  else:
662
743
  return 'BIGINT'
663
744
  elif isinstance(value, float):
664
- return 'DECIMAL(10,2)'
745
+ # 计算小数位数
746
+ num_str = str(value)
747
+ _, decimal_places = count_decimal_places(num_str)
748
+ return f'DECIMAL(20,{min(decimal_places, 6)})' # 限制最大6位小数
665
749
  elif isinstance(value, (datetime.datetime, pd.Timestamp)):
666
750
  return 'DATETIME'
667
751
  elif isinstance(value, datetime.date):
@@ -694,15 +778,15 @@ class MySQLUploader:
694
778
  data: Union[Dict, List[Dict], pd.DataFrame],
695
779
  set_typ: Dict[str, str],
696
780
  allow_null: bool = False
697
- ) -> List[Dict]:
781
+ ) -> Tuple[List[Dict], Dict[str, str]]:
698
782
  """
699
783
  准备要上传的数据,验证并转换数据类型
700
784
 
701
- :param data: 输入数据
785
+ :param data: 输入数据,可以是字典、字典列表或DataFrame
702
786
  :param set_typ: 列名和数据类型字典 {列名: 数据类型}
703
787
  :param allow_null: 是否允许空值
704
- :return: 待上传的数据列表和对应的数据类型
705
- :raises ValueError: 如果数据验证失败
788
+ :return: 元组(准备好的数据列表, 过滤后的列类型字典)
789
+ :raises ValueError: 当数据验证失败时抛出
706
790
  """
707
791
  # 统一数据格式为字典列表
708
792
  if isinstance(data, pd.DataFrame):
@@ -764,7 +848,7 @@ class MySQLUploader:
764
848
  prepared_row[col_name] = None
765
849
  else:
766
850
  try:
767
- prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name])
851
+ prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null)
768
852
  except ValueError as e:
769
853
  error_msg = f"Row {row_idx}, column '{col_name}': {str(e)}"
770
854
  logger.error(error_msg)
@@ -790,7 +874,21 @@ class MySQLUploader:
790
874
  indexes: Optional[List[str]] = None
791
875
  ):
792
876
  """
793
- 上传数据到数据库
877
+ 上传数据到数据库的主入口方法
878
+
879
+ :param db_name: 数据库名
880
+ :param table_name: 表名
881
+ :param data: 要上传的数据
882
+ :param set_typ: 列名和数据类型字典 {列名: 数据类型}
883
+ :param primary_keys: 主键列列表,可选
884
+ :param check_duplicate: 是否检查重复数据,默认为False
885
+ :param duplicate_columns: 用于检查重复的列,可选
886
+ :param allow_null: 是否允许空值,默认为False
887
+ :param partition_by: 分表方式('year'或'month'),可选
888
+ :param partition_date_column: 用于分表的日期列名,默认为'日期'
889
+ :param auto_create: 表不存在时是否自动创建,默认为True
890
+ :param indexes: 需要创建索引的列列表,可选
891
+ :raises: 可能抛出各种验证和数据库相关异常
794
892
  """
795
893
  upload_start = time.time()
796
894
  initial_row_count = len(data) if hasattr(data, '__len__') else 1
@@ -799,21 +897,21 @@ class MySQLUploader:
799
897
  success_flag = False
800
898
 
801
899
  logger.info("开始上传数据", {
802
- 'batch_id': batch_id,
900
+ '批次号': batch_id,
803
901
  'database': db_name,
804
902
  'table': table_name,
805
- 'partition_by': partition_by,
806
- 'check_duplicate': check_duplicate,
807
- 'row_count': len(data) if hasattr(data, '__len__') else 1,
808
- 'auto_create': auto_create
903
+ '分表方式': partition_by,
904
+ '是否排重': check_duplicate,
905
+ '总计行数': len(data) if hasattr(data, '__len__') else 1,
906
+ '自动建表': auto_create
809
907
  })
810
908
 
811
909
  try:
812
- # 验证参数
813
- if not set_typ:
814
- error_msg = "列的数据类型缺失"
815
- logger.error(error_msg)
816
- raise ValueError(error_msg)
910
+ # # 验证参数
911
+ # if not set_typ:
912
+ # error_msg = "列的数据类型缺失"
913
+ # logger.error(error_msg)
914
+ # raise ValueError(error_msg)
817
915
 
818
916
  if partition_by and partition_by not in ['year', 'month']:
819
917
  error_msg = "分表方式必须是 'year' 或 'month'"
@@ -821,7 +919,7 @@ class MySQLUploader:
821
919
  raise ValueError(error_msg)
822
920
 
823
921
  # 准备数据
824
- prepared_data, set_typ = self._prepare_data(data, set_typ, allow_null)
922
+ prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null)
825
923
 
826
924
  # 检查数据库是否存在
827
925
  if not self._check_database_exists(db_name):
@@ -861,7 +959,7 @@ class MySQLUploader:
861
959
  for part_table, part_data in partitioned_data.items():
862
960
  try:
863
961
  self._upload_to_table(
864
- db_name, part_table, part_data, set_typ,
962
+ db_name, part_table, part_data, filtered_set_typ,
865
963
  primary_keys, check_duplicate, duplicate_columns,
866
964
  allow_null, auto_create, partition_date_column,
867
965
  indexes, batch_id
@@ -875,7 +973,7 @@ class MySQLUploader:
875
973
  else:
876
974
  # 不分表,直接上传
877
975
  self._upload_to_table(
878
- db_name, table_name, prepared_data, set_typ,
976
+ db_name, table_name, prepared_data, filtered_set_typ,
879
977
  primary_keys, check_duplicate, duplicate_columns,
880
978
  allow_null, auto_create, partition_date_column,
881
979
  indexes, batch_id
@@ -889,12 +987,12 @@ class MySQLUploader:
889
987
  'error_type': type(e).__name__
890
988
  })
891
989
  finally:
892
- elapsed = time.time() - upload_start
990
+ elapsed = round(time.time() - upload_start, 2)
893
991
  logger.info("上传处理完成", {
894
- 'batch_id': batch_id,
992
+ '批次号': batch_id,
895
993
  'success': success_flag,
896
- 'time_elapsed': elapsed,
897
- 'initial_row_count': initial_row_count
994
+ '耗时': elapsed,
995
+ '数据行': initial_row_count
898
996
  })
899
997
 
900
998
  def _insert_data(
@@ -909,17 +1007,16 @@ class MySQLUploader:
909
1007
  batch_id: Optional[str] = None
910
1008
  ):
911
1009
  """
912
- 插入数据到表中
913
-
914
- 参数:
915
- db_name: 数据库名
916
- table_name: 表名
917
- data: 要插入的数据列表
918
- set_typ: 列名和数据类型字典 {列名: 数据类型}
919
- check_duplicate: 是否检查重复
920
- duplicate_columns: 用于检查重复的列(为空时检查所有列)
921
- batch_size: 批量插入大小
922
- batch_id: 批次ID用于日志追踪
1010
+ 实际执行数据插入的方法
1011
+
1012
+ :param db_name: 数据库名
1013
+ :param table_name: 表名
1014
+ :param data: 要插入的数据列表
1015
+ :param set_typ: 列名和数据类型字典 {列名: 数据类型}
1016
+ :param check_duplicate: 是否检查重复数据,默认为False
1017
+ :param duplicate_columns: 用于检查重复的列,可选
1018
+ :param batch_size: 批量插入大小,默认为1000
1019
+ :param batch_id: 批次ID用于日志追踪,可选
923
1020
  """
924
1021
  if not data:
925
1022
  return
@@ -998,13 +1095,13 @@ class MySQLUploader:
998
1095
 
999
1096
  # 记录失败行详细信息
1000
1097
  error_details = {
1001
- 'batch_id': batch_id,
1098
+ '批次号': batch_id,
1002
1099
  'database': db_name,
1003
1100
  'table': table_name,
1004
1101
  'error_type': type(e).__name__,
1005
1102
  'error_message': str(e),
1006
1103
  'column_types': set_typ,
1007
- 'duplicate_check': check_duplicate,
1104
+ '是否排重': check_duplicate,
1008
1105
  'duplicate_columns': duplicate_columns
1009
1106
  }
1010
1107
  logger.error(f"单行插入失败: {error_details}")
@@ -1019,28 +1116,31 @@ class MySQLUploader:
1019
1116
  else:
1020
1117
  total_inserted += successful_rows
1021
1118
 
1022
- batch_elapsed = time.time() - batch_start
1119
+ batch_elapsed = round(time.time() - batch_start, 2)
1023
1120
  batch_info = {
1024
- 'batch_id': batch_id,
1121
+ '批次号': batch_id,
1025
1122
  'batch_index': i // batch_size + 1,
1026
1123
  'total_batches': (len(data) + batch_size - 1) // batch_size,
1027
1124
  'batch_size': len(batch),
1028
1125
  'successful_rows': successful_rows,
1029
1126
  'failed_rows': len(batch) - successful_rows,
1030
- 'time_elapsed': batch_elapsed,
1127
+ '耗时': batch_elapsed,
1031
1128
  'rows_per_second': successful_rows / batch_elapsed if batch_elapsed > 0 else 0
1032
1129
  }
1033
1130
  logger.debug(f"批次处理完成 {batch_info}")
1034
1131
 
1035
1132
  logger.info("数据插入完成", {
1036
- 'total_rows': len(data),
1037
- 'inserted_rows': total_inserted,
1038
- 'skipped_rows': total_skipped,
1039
- 'failed_rows': total_failed
1133
+ '总数据行': len(data),
1134
+ '插入行数': total_inserted,
1135
+ '跳过行数': total_skipped,
1136
+ '失败行数': total_failed
1040
1137
  })
1041
1138
 
1042
1139
  def close(self):
1043
- """关闭连接池并记录最终指标"""
1140
+ """
1141
+ 关闭连接池并清理资源
1142
+ :raises: 可能抛出关闭连接时的异常
1143
+ """
1044
1144
  close_start = time.time()
1045
1145
 
1046
1146
  try:
@@ -1057,18 +1157,22 @@ class MySQLUploader:
1057
1157
 
1058
1158
  elapsed = round(time.time() - close_start, 2)
1059
1159
  logger.info("连接池已关闭", {
1060
- 'close_time_elapsed': elapsed
1160
+ '耗时': elapsed
1061
1161
  })
1062
1162
  except Exception as e:
1063
1163
  elapsed = round(time.time() - close_start, 2)
1064
1164
  logger.error("关闭连接池失败", {
1065
1165
  'error': str(e),
1066
- 'close_time_elapsed': elapsed
1166
+ '耗时': elapsed
1067
1167
  })
1068
1168
  raise
1069
1169
 
1070
1170
  def _check_pool_health(self):
1071
- """定期检查连接池健康状态"""
1171
+ """
1172
+ 检查连接池健康状态
1173
+
1174
+ :return: 连接池健康返回True,否则返回False
1175
+ """
1072
1176
  try:
1073
1177
  conn = self.pool.connection()
1074
1178
  conn.ping(reconnect=True)
@@ -1093,10 +1197,10 @@ class MySQLUploader:
1093
1197
  if attempt < max_retries - 1:
1094
1198
  time.sleep(delay * (attempt + 1))
1095
1199
  continue
1096
- raise MySQLUploaderError(f"操作重试{max_retries}次后失败") from e
1200
+ raise logger.error(f"操作重试{max_retries}次后失败")
1097
1201
  except Exception as e:
1098
- raise MySQLUploaderError(f"操作失败: {str(e)}") from e
1099
- raise last_exception if last_exception else MySQLUploaderError("未知错误")
1202
+ raise logger.error(f"操作失败: {str(e)}")
1203
+ raise last_exception if last_exception else logger.error("操作重试失败,未知错误")
1100
1204
 
1101
1205
  return wrapper
1102
1206
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.9.7
3
+ Version: 3.9.9
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,18 +1,18 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=-IZp-xcG1aWVfit9XDMRHuI2-WEcYyXeULDeK2w-mPI,17
2
+ mdbq/__version__.py,sha256=Z38j4uvZuqpFYiUEq0FTd82-1Y90RoVwpNEDWVHNTkk,17
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
5
5
  mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
6
6
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
7
7
  mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
8
8
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
9
- mdbq/log/mylogger.py,sha256=T1s94-2ERWjFkzSKJKH0EgtqWdy0DE_OGb5-Ds5vYgk,22596
9
+ mdbq/log/mylogger.py,sha256=jHCVO7KPQrg2kcCaIrakHivZmFBJyy-24sIn2rsbK4Y,24440
10
10
  mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
11
11
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
12
- mdbq/mysql/deduplicator.py,sha256=sd5R-6Y00yGQ2PFTW3jkXPvJ-_OFEQZCjXM99nRHa8Q,21670
12
+ mdbq/mysql/deduplicator.py,sha256=brhX3eyE8-kn3nAYweKfBbAkXiNcyw_pL4CTyPqmPBg,21983
13
13
  mdbq/mysql/mysql.py,sha256=jTcizvUtRdwMhWK2i_LA9yDPmcifLjUzVhwTbC3wfJk,119785
14
14
  mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
15
- mdbq/mysql/uploader.py,sha256=tb84-rJtrwla7SeBXL9EWzn0vIKnsE_9FebGNIoKrmU,45043
15
+ mdbq/mysql/uploader.py,sha256=mIgUnV7MwIkrbG-dchMkMzWo_N-XrQROLWTGGGuD_ts,49171
16
16
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
17
17
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
18
18
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=OhyEv1VyAKTOHjLDM37iNDQeRg5OnrNoKODoG2VxHes,19806
28
- mdbq-3.9.7.dist-info/METADATA,sha256=yX7vEbqUQIMoaJXh6VGpWKyYa5Ge0-ePKoxET8Y6LBM,363
29
- mdbq-3.9.7.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-3.9.7.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-3.9.7.dist-info/RECORD,,
28
+ mdbq-3.9.9.dist-info/METADATA,sha256=F6RAyI8aGmpT-VLwVeY7jw13qemIce-PMH2Ri335GAE,363
29
+ mdbq-3.9.9.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-3.9.9.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-3.9.9.dist-info/RECORD,,
File without changes