mdbq 3.9.7__py3-none-any.whl → 3.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/log/mylogger.py +103 -58
- mdbq/mysql/deduplicator.py +10 -4
- mdbq/mysql/uploader.py +11 -8
- {mdbq-3.9.7.dist-info → mdbq-3.9.8.dist-info}/METADATA +1 -1
- {mdbq-3.9.7.dist-info → mdbq-3.9.8.dist-info}/RECORD +8 -8
- {mdbq-3.9.7.dist-info → mdbq-3.9.8.dist-info}/WHEEL +0 -0
- {mdbq-3.9.7.dist-info → mdbq-3.9.8.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.9.
|
1
|
+
VERSION = '3.9.8'
|
mdbq/log/mylogger.py
CHANGED
@@ -9,6 +9,8 @@ import threading
|
|
9
9
|
import queue
|
10
10
|
from typing import Optional, Dict, Any, List, Callable, Union
|
11
11
|
import atexit
|
12
|
+
import traceback
|
13
|
+
import inspect
|
12
14
|
|
13
15
|
try:
|
14
16
|
import psutil
|
@@ -123,6 +125,7 @@ class MyLogger:
|
|
123
125
|
# 定时刷新相关
|
124
126
|
self._flush_thread = None
|
125
127
|
self._last_flush_time = 0
|
128
|
+
self._start_flush_thread()
|
126
129
|
|
127
130
|
# 创建日志记录器
|
128
131
|
self.logger = logging.getLogger(name)
|
@@ -219,12 +222,9 @@ class MyLogger:
|
|
219
222
|
'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
220
223
|
'level': record.levelname,
|
221
224
|
'message': record.getMessage(),
|
222
|
-
# 'module': record.module,
|
223
|
-
'function': record.funcName,
|
224
|
-
# 'line': record.lineno,
|
225
|
-
# 'thread': record.threadName,
|
226
|
-
# 'process': record.processName,
|
227
225
|
'name': record.name,
|
226
|
+
# 'module': record.module,
|
227
|
+
# 'function': record.funcName,
|
228
228
|
}
|
229
229
|
|
230
230
|
# 添加额外字段
|
@@ -347,6 +347,45 @@ class MyLogger:
|
|
347
347
|
)
|
348
348
|
self._async_thread.start()
|
349
349
|
|
350
|
+
def log_error_handler(retry_times=0, fallback_level='error'):
|
351
|
+
"""
|
352
|
+
日志错误处理装饰器
|
353
|
+
|
354
|
+
参数:
|
355
|
+
- retry_times: 异常时重试次数
|
356
|
+
- fallback_level: 降级日志级别
|
357
|
+
"""
|
358
|
+
|
359
|
+
def decorator(log_method):
|
360
|
+
def wrapper(self, level: str, message: str, extra: Optional[Dict] = None):
|
361
|
+
last_exception = None
|
362
|
+
for attempt in range(retry_times + 1):
|
363
|
+
try:
|
364
|
+
return log_method(self, level, message, extra)
|
365
|
+
except Exception as e:
|
366
|
+
last_exception = e
|
367
|
+
if attempt < retry_times:
|
368
|
+
time.sleep(0.1 * (attempt + 1)) # 简单的退避策略
|
369
|
+
continue
|
370
|
+
|
371
|
+
try:
|
372
|
+
# 降级处理
|
373
|
+
logging.basicConfig()
|
374
|
+
fallback_logger = logging.getLogger(f"{getattr(self, 'name', 'mylogger')}_fallback")
|
375
|
+
fallback_msg = f"[降级处理] {message}"[:1000]
|
376
|
+
getattr(fallback_logger, fallback_level)(
|
377
|
+
f"日志记录失败(尝试{attempt + 1}次): {e}\n原始消息: {fallback_msg}"
|
378
|
+
)
|
379
|
+
except:
|
380
|
+
sys.stderr.write(f"严重: 日志系统完全失败 - {last_exception}\n")
|
381
|
+
|
382
|
+
return None
|
383
|
+
|
384
|
+
return wrapper
|
385
|
+
|
386
|
+
return decorator
|
387
|
+
|
388
|
+
@log_error_handler(retry_times=1, fallback_level='warning')
|
350
389
|
def _sync_log(self, level: str, message: str, extra: Optional[Dict] = None):
|
351
390
|
"""同步日志记录"""
|
352
391
|
if not hasattr(self.logger, level.lower()):
|
@@ -375,7 +414,8 @@ class MyLogger:
|
|
375
414
|
log_extra['context_data'] = self._context.data.copy()
|
376
415
|
|
377
416
|
# 添加敏感字段过滤
|
378
|
-
|
417
|
+
if self.sensitive_fields:
|
418
|
+
log_extra['过滤'] = self.sensitive_fields
|
379
419
|
|
380
420
|
# 应用日志采样
|
381
421
|
if self.sample_rate < 1.0 and level.lower() in ('debug', 'info'):
|
@@ -441,28 +481,6 @@ class MyLogger:
|
|
441
481
|
if hasattr(self._context, 'data'):
|
442
482
|
self._context.data.clear()
|
443
483
|
|
444
|
-
def shutdown(self):
|
445
|
-
"""关闭日志记录器,确保所有日志被刷新"""
|
446
|
-
if self.enable_async:
|
447
|
-
self._stop_event.set()
|
448
|
-
# 等待队列清空
|
449
|
-
while not self._log_queue.empty():
|
450
|
-
time.sleep(0.1)
|
451
|
-
if self._async_thread and self._async_thread.is_alive():
|
452
|
-
self._async_thread.join(timeout=2)
|
453
|
-
if self._flush_thread and self._flush_thread.is_alive():
|
454
|
-
self._flush_thread.join(timeout=2)
|
455
|
-
|
456
|
-
# 确保所有handler被刷新
|
457
|
-
self._flush_handlers()
|
458
|
-
|
459
|
-
# 关闭所有handler
|
460
|
-
for handler in self.logger.handlers:
|
461
|
-
try:
|
462
|
-
handler.close()
|
463
|
-
except:
|
464
|
-
pass
|
465
|
-
|
466
484
|
def debug(self, message: str, extra: Optional[Dict] = None):
|
467
485
|
"""记录调试信息"""
|
468
486
|
self.log('debug', message, extra)
|
@@ -487,41 +505,46 @@ class MyLogger:
|
|
487
505
|
"""记录异常信息"""
|
488
506
|
if not extra:
|
489
507
|
extra = {}
|
490
|
-
|
508
|
+
|
509
|
+
# # 获取完整的异常堆栈
|
491
510
|
# tb = exc_info.__traceback__
|
511
|
+
# while tb.tb_next:
|
512
|
+
# tb = tb.tb_next # 获取最内层的堆栈帧
|
492
513
|
#
|
493
|
-
#
|
494
|
-
#
|
495
|
-
#
|
496
|
-
#
|
497
|
-
#
|
498
|
-
#
|
499
|
-
#
|
500
|
-
#
|
501
|
-
#
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
514
|
+
# extra.update({
|
515
|
+
# 'module': tb.tb_frame.f_globals.get('__name__', ''),
|
516
|
+
# 'function': tb.tb_frame.f_code.co_name,
|
517
|
+
# 'line': tb.tb_lineno,
|
518
|
+
# 'file': tb.tb_frame.f_code.co_filename,
|
519
|
+
# '异常': str(exc_info),
|
520
|
+
# '类型': exc_info.__class__.__name__,
|
521
|
+
# '堆栈': self._format_traceback(exc_info)
|
522
|
+
# })
|
523
|
+
|
524
|
+
# 使用inspect获取调用栈
|
525
|
+
frame = inspect.currentframe()
|
526
|
+
try:
|
527
|
+
# 向上追溯2层(1层是exception方法本身,2层是实际调用位置)
|
528
|
+
caller_frame = frame.f_back.f_back
|
529
|
+
extra.update({
|
530
|
+
'module': caller_frame.f_globals.get('__name__', ''),
|
531
|
+
'function': caller_frame.f_code.co_name,
|
532
|
+
'line': caller_frame.f_lineno,
|
533
|
+
'file': caller_frame.f_code.co_filename,
|
534
|
+
'异常': str(exc_info),
|
535
|
+
'类型': exc_info.__class__.__name__,
|
536
|
+
'堆栈': self._format_traceback(exc_info)
|
537
|
+
})
|
538
|
+
finally:
|
539
|
+
del frame # 避免循环引用
|
518
540
|
|
519
541
|
# 直接使用logger的error方法记录,保留原始调用栈
|
520
542
|
self.log('error', message, extra)
|
521
543
|
|
522
544
|
def _format_traceback(self, exc_info):
|
523
545
|
"""格式化异常堆栈"""
|
524
|
-
|
546
|
+
if exc_info is None:
|
547
|
+
return ""
|
525
548
|
return ''.join(traceback.format_exception(type(exc_info), exc_info, exc_info.__traceback__))
|
526
549
|
|
527
550
|
def timeit(self, message: str = "Execution time"):
|
@@ -579,17 +602,40 @@ class MyLogger:
|
|
579
602
|
except:
|
580
603
|
pass
|
581
604
|
|
605
|
+
def shutdown(self):
|
606
|
+
"""关闭日志记录器,确保所有日志被刷新"""
|
607
|
+
if self.enable_async:
|
608
|
+
self._stop_event.set()
|
609
|
+
# 等待队列清空
|
610
|
+
while not self._log_queue.empty():
|
611
|
+
time.sleep(0.1)
|
612
|
+
if self._async_thread and self._async_thread.is_alive():
|
613
|
+
self._async_thread.join(timeout=0.5)
|
614
|
+
|
615
|
+
# 确保所有handler被刷新
|
616
|
+
if self._flush_thread:
|
617
|
+
self._flush_handlers()
|
618
|
+
if self._flush_thread.is_alive():
|
619
|
+
self._flush_thread.join(timeout=0.5)
|
620
|
+
|
621
|
+
# 关闭所有handler
|
622
|
+
for handler in self.logger.handlers:
|
623
|
+
try:
|
624
|
+
handler.close()
|
625
|
+
except:
|
626
|
+
pass
|
627
|
+
|
582
628
|
def main():
|
583
629
|
# 创建日志记录器
|
584
630
|
logger = MyLogger(
|
585
631
|
name='my_app',
|
586
632
|
logging_mode='both',
|
587
633
|
log_level='DEBUG',
|
588
|
-
log_file='
|
634
|
+
log_file='my_app.log',
|
589
635
|
log_format='json',
|
590
636
|
max_log_size=50,
|
591
637
|
backup_count=5,
|
592
|
-
enable_async=
|
638
|
+
enable_async=False, # 是否启用异步日志
|
593
639
|
sample_rate=1, # 采样50%的DEBUG/INFO日志
|
594
640
|
sensitive_fields=[], # 敏感字段列表
|
595
641
|
enable_metrics=False, # 是否启用性能指标
|
@@ -603,4 +649,3 @@ def main():
|
|
603
649
|
|
604
650
|
if __name__ == '__main__':
|
605
651
|
pass
|
606
|
-
main()
|
mdbq/mysql/deduplicator.py
CHANGED
@@ -89,6 +89,9 @@ class MySQLDeduplicator:
|
|
89
89
|
:param retry_interval: 重试间隔(秒)
|
90
90
|
:param pool_size: 连接池大小
|
91
91
|
"""
|
92
|
+
# 连接池状态标志
|
93
|
+
self._closed = False
|
94
|
+
|
92
95
|
# 初始化连接池
|
93
96
|
self.pool = PooledDB(
|
94
97
|
creator=pymysql,
|
@@ -120,6 +123,8 @@ class MySQLDeduplicator:
|
|
120
123
|
|
121
124
|
def _get_connection(self):
|
122
125
|
"""从连接池获取连接"""
|
126
|
+
if self._closed:
|
127
|
+
raise ConnectionError("连接池已关闭")
|
123
128
|
try:
|
124
129
|
conn = self.pool.connection()
|
125
130
|
logger.debug("成功获取数据库连接")
|
@@ -263,7 +268,9 @@ class MySQLDeduplicator:
|
|
263
268
|
|
264
269
|
# 构建去重SQL
|
265
270
|
column_list = ', '.join([f'`{col}`' for col in use_columns])
|
266
|
-
temp_table = f"temp_{table}_{int(time.time())}"
|
271
|
+
# temp_table = f"temp_{table}_{int(time.time())}"
|
272
|
+
temp_table = f"temp_{table}_dedup_{os.getpid()}" # 使用进程ID构建临时表
|
273
|
+
temp_table = re.sub(r'[^a-zA-Z0-9_]', '_', temp_table) # 确保表名合法
|
267
274
|
|
268
275
|
# 使用临时表方案处理去重,避免锁表问题
|
269
276
|
create_temp_sql = f"""
|
@@ -556,13 +563,12 @@ class MySQLDeduplicator:
|
|
556
563
|
def close(self):
|
557
564
|
"""关闭连接池"""
|
558
565
|
try:
|
559
|
-
if hasattr(self, 'pool') and self.pool:
|
566
|
+
if hasattr(self, 'pool') and self.pool and not self._closed:
|
560
567
|
self.pool.close()
|
568
|
+
self._closed = True
|
561
569
|
logger.info("数据库连接池已关闭")
|
562
570
|
except Exception as e:
|
563
571
|
logger.error(f"关闭连接池时出错: {str(e)}", {'error_type': type(e).__name__})
|
564
|
-
finally:
|
565
|
-
self.pool = None
|
566
572
|
|
567
573
|
def __enter__(self):
|
568
574
|
return self
|
mdbq/mysql/uploader.py
CHANGED
@@ -661,7 +661,10 @@ class MySQLUploader:
|
|
661
661
|
else:
|
662
662
|
return 'BIGINT'
|
663
663
|
elif isinstance(value, float):
|
664
|
-
|
664
|
+
# 计算小数位数
|
665
|
+
num_str = str(value)
|
666
|
+
_, decimal_places = count_decimal_places(num_str)
|
667
|
+
return f'DECIMAL(20,{min(decimal_places, 6)})' # 限制最大6位小数
|
665
668
|
elif isinstance(value, (datetime.datetime, pd.Timestamp)):
|
666
669
|
return 'DATETIME'
|
667
670
|
elif isinstance(value, datetime.date):
|
@@ -694,7 +697,7 @@ class MySQLUploader:
|
|
694
697
|
data: Union[Dict, List[Dict], pd.DataFrame],
|
695
698
|
set_typ: Dict[str, str],
|
696
699
|
allow_null: bool = False
|
697
|
-
) -> List[Dict]:
|
700
|
+
) -> Tuple[List[Dict], Dict[str, str]]:
|
698
701
|
"""
|
699
702
|
准备要上传的数据,验证并转换数据类型
|
700
703
|
|
@@ -821,7 +824,7 @@ class MySQLUploader:
|
|
821
824
|
raise ValueError(error_msg)
|
822
825
|
|
823
826
|
# 准备数据
|
824
|
-
prepared_data,
|
827
|
+
prepared_data, filtered_set_typ = self._prepare_data(data, set_typ, allow_null)
|
825
828
|
|
826
829
|
# 检查数据库是否存在
|
827
830
|
if not self._check_database_exists(db_name):
|
@@ -861,7 +864,7 @@ class MySQLUploader:
|
|
861
864
|
for part_table, part_data in partitioned_data.items():
|
862
865
|
try:
|
863
866
|
self._upload_to_table(
|
864
|
-
db_name, part_table, part_data,
|
867
|
+
db_name, part_table, part_data, filtered_set_typ,
|
865
868
|
primary_keys, check_duplicate, duplicate_columns,
|
866
869
|
allow_null, auto_create, partition_date_column,
|
867
870
|
indexes, batch_id
|
@@ -875,7 +878,7 @@ class MySQLUploader:
|
|
875
878
|
else:
|
876
879
|
# 不分表,直接上传
|
877
880
|
self._upload_to_table(
|
878
|
-
db_name, table_name, prepared_data,
|
881
|
+
db_name, table_name, prepared_data, filtered_set_typ,
|
879
882
|
primary_keys, check_duplicate, duplicate_columns,
|
880
883
|
allow_null, auto_create, partition_date_column,
|
881
884
|
indexes, batch_id
|
@@ -1093,10 +1096,10 @@ class MySQLUploader:
|
|
1093
1096
|
if attempt < max_retries - 1:
|
1094
1097
|
time.sleep(delay * (attempt + 1))
|
1095
1098
|
continue
|
1096
|
-
raise
|
1099
|
+
raise logger.error(f"操作重试{max_retries}次后失败")
|
1097
1100
|
except Exception as e:
|
1098
|
-
raise
|
1099
|
-
raise last_exception if last_exception else
|
1101
|
+
raise logger.error(f"操作失败: {str(e)}")
|
1102
|
+
raise last_exception if last_exception else logger.error("操作重试失败,未知错误")
|
1100
1103
|
|
1101
1104
|
return wrapper
|
1102
1105
|
|
@@ -1,18 +1,18 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256
|
2
|
+
mdbq/__version__.py,sha256=cWQRevRu-qUG7cakaKmnu0uw_arEQikDebOc5jW6PWs,17
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
|
5
5
|
mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
|
6
6
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
7
7
|
mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
|
8
8
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
9
|
-
mdbq/log/mylogger.py,sha256=
|
9
|
+
mdbq/log/mylogger.py,sha256=jHCVO7KPQrg2kcCaIrakHivZmFBJyy-24sIn2rsbK4Y,24440
|
10
10
|
mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
|
11
11
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
12
|
-
mdbq/mysql/deduplicator.py,sha256=
|
12
|
+
mdbq/mysql/deduplicator.py,sha256=do1OqZtGwdf_KrRU_3LK4ZlKpQFsH6Pjef8eguoY4Xo,22009
|
13
13
|
mdbq/mysql/mysql.py,sha256=jTcizvUtRdwMhWK2i_LA9yDPmcifLjUzVhwTbC3wfJk,119785
|
14
14
|
mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
|
15
|
-
mdbq/mysql/uploader.py,sha256=
|
15
|
+
mdbq/mysql/uploader.py,sha256=Zfp1xeCD0oNuBMnSmg2AKFqa2SToPkKstBUOH_h5vBM,45260
|
16
16
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
17
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
18
18
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=OhyEv1VyAKTOHjLDM37iNDQeRg5OnrNoKODoG2VxHes,19806
|
28
|
-
mdbq-3.9.
|
29
|
-
mdbq-3.9.
|
30
|
-
mdbq-3.9.
|
31
|
-
mdbq-3.9.
|
28
|
+
mdbq-3.9.8.dist-info/METADATA,sha256=Ebjy7F7OmVoOel2poNGxFCX7l6AniJrupqwmuqTOJpA,363
|
29
|
+
mdbq-3.9.8.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-3.9.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-3.9.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|