mdbq 3.9.7__py3-none-any.whl → 3.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.9.7'
1
+ VERSION = '3.9.8'
mdbq/log/mylogger.py CHANGED
@@ -9,6 +9,8 @@ import threading
9
9
  import queue
10
10
  from typing import Optional, Dict, Any, List, Callable, Union
11
11
  import atexit
12
+ import traceback
13
+ import inspect
12
14
 
13
15
  try:
14
16
  import psutil
@@ -123,6 +125,7 @@ class MyLogger:
123
125
  # 定时刷新相关
124
126
  self._flush_thread = None
125
127
  self._last_flush_time = 0
128
+ self._start_flush_thread()
126
129
 
127
130
  # 创建日志记录器
128
131
  self.logger = logging.getLogger(name)
@@ -219,12 +222,9 @@ class MyLogger:
219
222
  'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
220
223
  'level': record.levelname,
221
224
  'message': record.getMessage(),
222
- # 'module': record.module,
223
- 'function': record.funcName,
224
- # 'line': record.lineno,
225
- # 'thread': record.threadName,
226
- # 'process': record.processName,
227
225
  'name': record.name,
226
+ # 'module': record.module,
227
+ # 'function': record.funcName,
228
228
  }
229
229
 
230
230
  # 添加额外字段
@@ -347,6 +347,45 @@ class MyLogger:
347
347
  )
348
348
  self._async_thread.start()
349
349
 
350
+ def log_error_handler(retry_times=0, fallback_level='error'):
351
+ """
352
+ 日志错误处理装饰器
353
+
354
+ 参数:
355
+ - retry_times: 异常时重试次数
356
+ - fallback_level: 降级日志级别
357
+ """
358
+
359
+ def decorator(log_method):
360
+ def wrapper(self, level: str, message: str, extra: Optional[Dict] = None):
361
+ last_exception = None
362
+ for attempt in range(retry_times + 1):
363
+ try:
364
+ return log_method(self, level, message, extra)
365
+ except Exception as e:
366
+ last_exception = e
367
+ if attempt < retry_times:
368
+ time.sleep(0.1 * (attempt + 1)) # 简单的退避策略
369
+ continue
370
+
371
+ try:
372
+ # 降级处理
373
+ logging.basicConfig()
374
+ fallback_logger = logging.getLogger(f"{getattr(self, 'name', 'mylogger')}_fallback")
375
+ fallback_msg = f"[降级处理] {message}"[:1000]
376
+ getattr(fallback_logger, fallback_level)(
377
+ f"日志记录失败(尝试{attempt + 1}次): {e}\n原始消息: {fallback_msg}"
378
+ )
379
+ except:
380
+ sys.stderr.write(f"严重: 日志系统完全失败 - {last_exception}\n")
381
+
382
+ return None
383
+
384
+ return wrapper
385
+
386
+ return decorator
387
+
388
+ @log_error_handler(retry_times=1, fallback_level='warning')
350
389
  def _sync_log(self, level: str, message: str, extra: Optional[Dict] = None):
351
390
  """同步日志记录"""
352
391
  if not hasattr(self.logger, level.lower()):
@@ -375,7 +414,8 @@ class MyLogger:
375
414
  log_extra['context_data'] = self._context.data.copy()
376
415
 
377
416
  # 添加敏感字段过滤
378
- log_extra['过滤'] = self.sensitive_fields
417
+ if self.sensitive_fields:
418
+ log_extra['过滤'] = self.sensitive_fields
379
419
 
380
420
  # 应用日志采样
381
421
  if self.sample_rate < 1.0 and level.lower() in ('debug', 'info'):
@@ -441,28 +481,6 @@ class MyLogger:
441
481
  if hasattr(self._context, 'data'):
442
482
  self._context.data.clear()
443
483
 
444
- def shutdown(self):
445
- """关闭日志记录器,确保所有日志被刷新"""
446
- if self.enable_async:
447
- self._stop_event.set()
448
- # 等待队列清空
449
- while not self._log_queue.empty():
450
- time.sleep(0.1)
451
- if self._async_thread and self._async_thread.is_alive():
452
- self._async_thread.join(timeout=2)
453
- if self._flush_thread and self._flush_thread.is_alive():
454
- self._flush_thread.join(timeout=2)
455
-
456
- # 确保所有handler被刷新
457
- self._flush_handlers()
458
-
459
- # 关闭所有handler
460
- for handler in self.logger.handlers:
461
- try:
462
- handler.close()
463
- except:
464
- pass
465
-
466
484
  def debug(self, message: str, extra: Optional[Dict] = None):
467
485
  """记录调试信息"""
468
486
  self.log('debug', message, extra)
@@ -487,41 +505,46 @@ class MyLogger:
487
505
  """记录异常信息"""
488
506
  if not extra:
489
507
  extra = {}
490
- # # 获取异常发生的实际位置
508
+
509
+ # # 获取完整的异常堆栈
491
510
  # tb = exc_info.__traceback__
511
+ # while tb.tb_next:
512
+ # tb = tb.tb_next # 获取最内层的堆栈帧
492
513
  #
493
- # if tb:
494
- # extra.update({
495
- # 'module': tb.tb_frame.f_globals.get('__name__', ''),
496
- # 'function': tb.tb_frame.f_code.co_name,
497
- # 'line': tb.tb_lineno,
498
- # 'file': tb.tb_frame.f_code.co_filename
499
- # })
500
- # extra['异常'] = str(exc_info)
501
- # extra['类型'] = exc_info.__class__.__name__
502
- # self.log('error', message, extra)
503
-
504
- # 获取完整的异常堆栈
505
- tb = exc_info.__traceback__
506
- while tb.tb_next:
507
- tb = tb.tb_next # 获取最内层的堆栈帧
508
-
509
- extra.update({
510
- 'module': tb.tb_frame.f_globals.get('__name__', ''),
511
- 'function': tb.tb_frame.f_code.co_name,
512
- 'line': tb.tb_lineno,
513
- 'file': tb.tb_frame.f_code.co_filename,
514
- '异常': str(exc_info),
515
- '类型': exc_info.__class__.__name__,
516
- '堆栈': self._format_traceback(exc_info)
517
- })
514
+ # extra.update({
515
+ # 'module': tb.tb_frame.f_globals.get('__name__', ''),
516
+ # 'function': tb.tb_frame.f_code.co_name,
517
+ # 'line': tb.tb_lineno,
518
+ # 'file': tb.tb_frame.f_code.co_filename,
519
+ # '异常': str(exc_info),
520
+ # '类型': exc_info.__class__.__name__,
521
+ # '堆栈': self._format_traceback(exc_info)
522
+ # })
523
+
524
+ # 使用inspect获取调用栈
525
+ frame = inspect.currentframe()
526
+ try:
527
+ # 向上追溯2层(1层是exception方法本身,2层是实际调用位置)
528
+ caller_frame = frame.f_back.f_back
529
+ extra.update({
530
+ 'module': caller_frame.f_globals.get('__name__', ''),
531
+ 'function': caller_frame.f_code.co_name,
532
+ 'line': caller_frame.f_lineno,
533
+ 'file': caller_frame.f_code.co_filename,
534
+ '异常': str(exc_info),
535
+ '类型': exc_info.__class__.__name__,
536
+ '堆栈': self._format_traceback(exc_info)
537
+ })
538
+ finally:
539
+ del frame # 避免循环引用
518
540
 
519
541
  # 直接使用logger的error方法记录,保留原始调用栈
520
542
  self.log('error', message, extra)
521
543
 
522
544
  def _format_traceback(self, exc_info):
523
545
  """格式化异常堆栈"""
524
- import traceback
546
+ if exc_info is None:
547
+ return ""
525
548
  return ''.join(traceback.format_exception(type(exc_info), exc_info, exc_info.__traceback__))
526
549
 
527
550
  def timeit(self, message: str = "Execution time"):
@@ -579,17 +602,40 @@ class MyLogger:
579
602
  except:
580
603
  pass
581
604
 
605
+ def shutdown(self):
606
+ """关闭日志记录器,确保所有日志被刷新"""
607
+ if self.enable_async:
608
+ self._stop_event.set()
609
+ # 等待队列清空
610
+ while not self._log_queue.empty():
611
+ time.sleep(0.1)
612
+ if self._async_thread and self._async_thread.is_alive():
613
+ self._async_thread.join(timeout=0.5)
614
+
615
+ # 确保所有handler被刷新
616
+ if self._flush_thread:
617
+ self._flush_handlers()
618
+ if self._flush_thread.is_alive():
619
+ self._flush_thread.join(timeout=0.5)
620
+
621
+ # 关闭所有handler
622
+ for handler in self.logger.handlers:
623
+ try:
624
+ handler.close()
625
+ except:
626
+ pass
627
+
582
628
  def main():
583
629
  # 创建日志记录器
584
630
  logger = MyLogger(
585
631
  name='my_app',
586
632
  logging_mode='both',
587
633
  log_level='DEBUG',
588
- log_file='/Users/xigua/Downloads/my_app.log',
634
+ log_file='my_app.log',
589
635
  log_format='json',
590
636
  max_log_size=50,
591
637
  backup_count=5,
592
- enable_async=True, # 是否启用异步日志
638
+ enable_async=False, # 是否启用异步日志
593
639
  sample_rate=1, # 采样50%的DEBUG/INFO日志
594
640
  sensitive_fields=[], # 敏感字段列表
595
641
  enable_metrics=False, # 是否启用性能指标
@@ -603,4 +649,3 @@ def main():
603
649
 
604
650
  if __name__ == '__main__':
605
651
  pass
606
- main()
@@ -89,6 +89,9 @@ class MySQLDeduplicator:
89
89
  :param retry_interval: 重试间隔(秒)
90
90
  :param pool_size: 连接池大小
91
91
  """
92
+ # 连接池状态标志
93
+ self._closed = False
94
+
92
95
  # 初始化连接池
93
96
  self.pool = PooledDB(
94
97
  creator=pymysql,
@@ -120,6 +123,8 @@ class MySQLDeduplicator:
120
123
 
121
124
  def _get_connection(self):
122
125
  """从连接池获取连接"""
126
+ if self._closed:
127
+ raise ConnectionError("连接池已关闭")
123
128
  try:
124
129
  conn = self.pool.connection()
125
130
  logger.debug("成功获取数据库连接")
@@ -263,7 +268,9 @@ class MySQLDeduplicator:
263
268
 
264
269
  # 构建去重SQL
265
270
  column_list = ', '.join([f'`{col}`' for col in use_columns])
266
- temp_table = f"temp_{table}_{int(time.time())}"
271
+ # temp_table = f"temp_{table}_{int(time.time())}"
272
+ temp_table = f"temp_{table}_dedup_{os.getpid()}" # 使用进程ID构建临时表
273
+ temp_table = re.sub(r'[^a-zA-Z0-9_]', '_', temp_table) # 确保表名合法
267
274
 
268
275
  # 使用临时表方案处理去重,避免锁表问题
269
276
  create_temp_sql = f"""
@@ -556,13 +563,12 @@ class MySQLDeduplicator:
556
563
  def close(self):
557
564
  """关闭连接池"""
558
565
  try:
559
- if hasattr(self, 'pool') and self.pool:
566
+ if hasattr(self, 'pool') and self.pool and not self._closed:
560
567
  self.pool.close()
568
+ self._closed = True
561
569
  logger.info("数据库连接池已关闭")
562
570
  except Exception as e:
563
571
  logger.error(f"关闭连接池时出错: {str(e)}", {'error_type': type(e).__name__})
564
- finally:
565
- self.pool = None
566
572
 
567
573
  def __enter__(self):
568
574
  return self
mdbq/mysql/uploader.py CHANGED
@@ -661,7 +661,10 @@ class MySQLUploader:
661
661
  else:
662
662
  return 'BIGINT'
663
663
  elif isinstance(value, float):
664
- return 'DECIMAL(10,2)'
664
+ # 计算小数位数
665
+ num_str = str(value)
666
+ _, decimal_places = count_decimal_places(num_str)
667
+ return f'DECIMAL(20,{min(decimal_places, 6)})' # 限制最大6位小数
665
668
  elif isinstance(value, (datetime.datetime, pd.Timestamp)):
666
669
  return 'DATETIME'
667
670
  elif isinstance(value, datetime.date):
@@ -694,7 +697,7 @@ class MySQLUploader:
694
697
  data: Union[Dict, List[Dict], pd.DataFrame],
695
698
  set_typ: Dict[str, str],
696
699
  allow_null: bool = False
697
- ) -> List[Dict]:
700
+ ) -> Tuple[List[Dict], Dict[str, str]]:
698
701
  """
699
702
  准备要上传的数据,验证并转换数据类型
700
703
 
@@ -821,7 +824,7 @@ class MySQLUploader:
821
824
  raise ValueError(error_msg)
822
825
 
823
826
  # 准备数据
824
- prepared_data, set_typ = self._prepare_data(data, set_typ, allow_null)
827
+ prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null)
825
828
 
826
829
  # 检查数据库是否存在
827
830
  if not self._check_database_exists(db_name):
@@ -861,7 +864,7 @@ class MySQLUploader:
861
864
  for part_table, part_data in partitioned_data.items():
862
865
  try:
863
866
  self._upload_to_table(
864
- db_name, part_table, part_data, set_typ,
867
+ db_name, part_table, part_data, filtered_set_typ,
865
868
  primary_keys, check_duplicate, duplicate_columns,
866
869
  allow_null, auto_create, partition_date_column,
867
870
  indexes, batch_id
@@ -875,7 +878,7 @@ class MySQLUploader:
875
878
  else:
876
879
  # 不分表,直接上传
877
880
  self._upload_to_table(
878
- db_name, table_name, prepared_data, set_typ,
881
+ db_name, table_name, prepared_data, filtered_set_typ,
879
882
  primary_keys, check_duplicate, duplicate_columns,
880
883
  allow_null, auto_create, partition_date_column,
881
884
  indexes, batch_id
@@ -1093,10 +1096,10 @@ class MySQLUploader:
1093
1096
  if attempt < max_retries - 1:
1094
1097
  time.sleep(delay * (attempt + 1))
1095
1098
  continue
1096
- raise MySQLUploaderError(f"操作重试{max_retries}次后失败") from e
1099
+ raise logger.error(f"操作重试{max_retries}次后失败")
1097
1100
  except Exception as e:
1098
- raise MySQLUploaderError(f"操作失败: {str(e)}") from e
1099
- raise last_exception if last_exception else MySQLUploaderError("未知错误")
1101
+ raise logger.error(f"操作失败: {str(e)}")
1102
+ raise last_exception if last_exception else logger.error("操作重试失败,未知错误")
1100
1103
 
1101
1104
  return wrapper
1102
1105
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.9.7
3
+ Version: 3.9.8
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,18 +1,18 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=-IZp-xcG1aWVfit9XDMRHuI2-WEcYyXeULDeK2w-mPI,17
2
+ mdbq/__version__.py,sha256=cWQRevRu-qUG7cakaKmnu0uw_arEQikDebOc5jW6PWs,17
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
5
5
  mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
6
6
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
7
7
  mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
8
8
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
9
- mdbq/log/mylogger.py,sha256=T1s94-2ERWjFkzSKJKH0EgtqWdy0DE_OGb5-Ds5vYgk,22596
9
+ mdbq/log/mylogger.py,sha256=jHCVO7KPQrg2kcCaIrakHivZmFBJyy-24sIn2rsbK4Y,24440
10
10
  mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
11
11
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
12
- mdbq/mysql/deduplicator.py,sha256=sd5R-6Y00yGQ2PFTW3jkXPvJ-_OFEQZCjXM99nRHa8Q,21670
12
+ mdbq/mysql/deduplicator.py,sha256=do1OqZtGwdf_KrRU_3LK4ZlKpQFsH6Pjef8eguoY4Xo,22009
13
13
  mdbq/mysql/mysql.py,sha256=jTcizvUtRdwMhWK2i_LA9yDPmcifLjUzVhwTbC3wfJk,119785
14
14
  mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
15
- mdbq/mysql/uploader.py,sha256=tb84-rJtrwla7SeBXL9EWzn0vIKnsE_9FebGNIoKrmU,45043
15
+ mdbq/mysql/uploader.py,sha256=Zfp1xeCD0oNuBMnSmg2AKFqa2SToPkKstBUOH_h5vBM,45260
16
16
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
17
17
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
18
18
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=OhyEv1VyAKTOHjLDM37iNDQeRg5OnrNoKODoG2VxHes,19806
28
- mdbq-3.9.7.dist-info/METADATA,sha256=yX7vEbqUQIMoaJXh6VGpWKyYa5Ge0-ePKoxET8Y6LBM,363
29
- mdbq-3.9.7.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-3.9.7.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-3.9.7.dist-info/RECORD,,
28
+ mdbq-3.9.8.dist-info/METADATA,sha256=Ebjy7F7OmVoOel2poNGxFCX7l6AniJrupqwmuqTOJpA,363
29
+ mdbq-3.9.8.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-3.9.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-3.9.8.dist-info/RECORD,,
File without changes