algo-backend-framework 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- algo_backend/__init__.py +0 -0
- algo_backend/config/__init__.py +8 -0
- algo_backend/config/basic_config.py +13 -0
- algo_backend/config/loguru_config.py +19 -0
- algo_backend/exception/__init__.py +22 -0
- algo_backend/exception/error_code_manage.py +126 -0
- algo_backend/exception/exception.py +42 -0
- algo_backend/exception/status_code.py +103 -0
- algo_backend/handler/__init__.py +3 -0
- algo_backend/handler/exception_to_vo.py +37 -0
- algo_backend/handler/operation_handler.py +71 -0
- algo_backend/intercept/__init__.py +9 -0
- algo_backend/intercept/common.py +45 -0
- algo_backend/intercept/http.py +40 -0
- algo_backend/intercept/validate.py +78 -0
- algo_backend/log/__init__.py +1 -0
- algo_backend/log/common.py +16 -0
- algo_backend/log/loguru/__init__.py +5 -0
- algo_backend/log/loguru/log_clean.py +140 -0
- algo_backend/log/loguru/log_setup.py +89 -0
- algo_backend/log/loguru/log_starter.py +65 -0
- algo_backend/log/loguru/patch_logging.py +83 -0
- algo_backend/log/nblog/__init__.py +0 -0
- algo_backend/metrics/__init__.py +22 -0
- algo_backend/metrics/collector/__init__.py +12 -0
- algo_backend/metrics/collector/common.py +17 -0
- algo_backend/metrics/collector/gc_metrics.py +74 -0
- algo_backend/metrics/collector/schedule_monitor.py +50 -0
- algo_backend/metrics/collector/system_metrics.py +169 -0
- algo_backend/metrics/http_metrics.py +56 -0
- algo_backend/metrics/prometheus_context.py +55 -0
- algo_backend/metrics/time_cost_metrics.py +146 -0
- algo_backend/middleware/__init__.py +4 -0
- algo_backend/middleware/cors.py +10 -0
- algo_backend/middleware/metrics.py +12 -0
- algo_backend/schema/__init__.py +3 -0
- algo_backend/schema/vo.py +83 -0
- algo_backend/starter/__init__.py +4 -0
- algo_backend/starter/default_app_generator.py +169 -0
- algo_backend/starter/default_service_starter.py +70 -0
- algo_backend/starter/event_list.py +32 -0
- algo_backend/utils/__init__.py +8 -0
- algo_backend/utils/meta_class.py +50 -0
- algo_backend/utils/utils.py +22 -0
- algo_backend_framework-0.0.1.dist-info/METADATA +60 -0
- algo_backend_framework-0.0.1.dist-info/RECORD +48 -0
- algo_backend_framework-0.0.1.dist-info/WHEEL +5 -0
- algo_backend_framework-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
from loguru import logger
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LogInfo(BaseModel):
|
|
12
|
+
"""
|
|
13
|
+
日志信息
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
file_path: str = Field(..., description="日志文件路径")
|
|
17
|
+
file_size: Optional[int] = None # 日志文件大小
|
|
18
|
+
create_time: Optional[datetime] = None # 日志文件创建时间
|
|
19
|
+
modify_time: Optional[datetime] = None # 日志文件修改时间
|
|
20
|
+
access_time: Optional[datetime] = None # 日志文件访问时间
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def gen(cls, file_path: str):
|
|
24
|
+
if not os.path.exists(file_path):
|
|
25
|
+
return cls(file_path=file_path)
|
|
26
|
+
return cls(
|
|
27
|
+
file_path=file_path,
|
|
28
|
+
file_size=os.path.getsize(file_path),
|
|
29
|
+
create_time=datetime.fromtimestamp(os.path.getctime(file_path)),
|
|
30
|
+
modify_time=datetime.fromtimestamp(os.path.getmtime(file_path)),
|
|
31
|
+
access_time=datetime.fromtimestamp(os.path.getatime(file_path)),
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class LoguruCleaner:
|
|
36
|
+
"""
|
|
37
|
+
清理时间较长的日志,默认保留60天的日志
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, log_dir: str, retention_day: int = 60):
|
|
41
|
+
"""
|
|
42
|
+
:param log_dir: 日志目录
|
|
43
|
+
:param retention_day: 保留天数
|
|
44
|
+
"""
|
|
45
|
+
self.retention_day = retention_day
|
|
46
|
+
self.log_dir = log_dir
|
|
47
|
+
self.current_time: datetime = datetime.now()
|
|
48
|
+
self.threshold_time: datetime = self.current_time - timedelta(
|
|
49
|
+
days=self.retention_day
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def scan_log(self) -> List[LogInfo]:
|
|
53
|
+
"""
|
|
54
|
+
扫描日志目录,返回所有日志文件
|
|
55
|
+
"""
|
|
56
|
+
log_files = []
|
|
57
|
+
if os.path.exists(self.log_dir):
|
|
58
|
+
log_files = [
|
|
59
|
+
LogInfo.gen(os.path.join(self.log_dir, file))
|
|
60
|
+
for file in os.listdir(self.log_dir)
|
|
61
|
+
if file.endswith(".log")
|
|
62
|
+
]
|
|
63
|
+
return log_files
|
|
64
|
+
|
|
65
|
+
def judge_is_old_log(self, log_file: LogInfo) -> bool:
|
|
66
|
+
"""
|
|
67
|
+
判断日志文件是否过期
|
|
68
|
+
过期标准:
|
|
69
|
+
"""
|
|
70
|
+
if log_file.modify_time and log_file.modify_time < self.threshold_time:
|
|
71
|
+
return True
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
def extract_old_log(self) -> List[LogInfo]:
|
|
75
|
+
"""
|
|
76
|
+
提取过期的日志文件
|
|
77
|
+
"""
|
|
78
|
+
log_files = self.scan_log()
|
|
79
|
+
|
|
80
|
+
if not log_files:
|
|
81
|
+
logger.debug("No log files found")
|
|
82
|
+
return []
|
|
83
|
+
|
|
84
|
+
logger.debug(
|
|
85
|
+
f"[ExtractOldLogFiles]: scan [{len(log_files)}] logs from [{self.log_dir}]"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
old_log_files = [o for o in log_files if self.judge_is_old_log(o)]
|
|
89
|
+
|
|
90
|
+
logger.debug(
|
|
91
|
+
f"[ExtractOldLogFiles]: extract [{len(old_log_files)}] old logs, threshold day [{self.threshold_time}]"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return old_log_files
|
|
95
|
+
|
|
96
|
+
def delete_log(self, log_file: List[LogInfo]):
|
|
97
|
+
"""
|
|
98
|
+
删除日志文件
|
|
99
|
+
"""
|
|
100
|
+
if log_file:
|
|
101
|
+
logger.debug(f"[DeleteLogFiles]: Start to delete [{len(log_file)}] logs")
|
|
102
|
+
cnt = 0
|
|
103
|
+
for log in log_file:
|
|
104
|
+
try:
|
|
105
|
+
os.remove(log.file_path)
|
|
106
|
+
logger.debug(f"[DeleteLogFiles]: Delete log [{log.file_path}]")
|
|
107
|
+
cnt += 1
|
|
108
|
+
except FileNotFoundError as e:
|
|
109
|
+
logger.warning(
|
|
110
|
+
f"[DeleteLogFiles]: Failed to delete log [{log.file_path}], error [{e}]"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
logger.debug(f"[DeleteLogFiles]: Delete [{cnt}]/[{len(log_file)}] logs")
|
|
114
|
+
|
|
115
|
+
def delete_lod_log(self):
|
|
116
|
+
"""
|
|
117
|
+
删除过时的日志文件
|
|
118
|
+
"""
|
|
119
|
+
old_logs = self.extract_old_log()
|
|
120
|
+
self.delete_log(old_logs)
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def schedule_run(cls, log_dir: str, retention_day: int = 60):
|
|
124
|
+
"""
|
|
125
|
+
启动线程,定时清理日志
|
|
126
|
+
"""
|
|
127
|
+
# 创建线程
|
|
128
|
+
interval = 60 * 60 * 24
|
|
129
|
+
|
|
130
|
+
def worker():
|
|
131
|
+
while True:
|
|
132
|
+
log_cleaner = LoguruCleaner(log_dir, retention_day)
|
|
133
|
+
logger.debug(
|
|
134
|
+
f"pid={os.getpid()} | [ScheduleRun]: Start to run log cleaner, log dir [{log_dir}], retention day [{retention_day}]"
|
|
135
|
+
)
|
|
136
|
+
log_cleaner.delete_lod_log()
|
|
137
|
+
time.sleep(interval) # 暂停一天
|
|
138
|
+
|
|
139
|
+
thread = threading.Thread(target=worker, daemon=True)
|
|
140
|
+
thread.start()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
from .patch_logging import patch_logging_to_loguru
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LoguruSetup:
|
|
11
|
+
FORMAT = (
|
|
12
|
+
"<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
|
|
13
|
+
"<level>{level: <8}</level> | "
|
|
14
|
+
"<cyan>p-{process}</cyan> | "
|
|
15
|
+
"<cyan>t-{thread}</cyan> | "
|
|
16
|
+
"<cyan>{thread.name}</cyan> | "
|
|
17
|
+
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
|
|
18
|
+
"<level>{message}</level>"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__IS_SET_ROTATE = False
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def rotate_daily(
|
|
25
|
+
cls,
|
|
26
|
+
*,
|
|
27
|
+
log_dir: str,
|
|
28
|
+
service_name: str,
|
|
29
|
+
add_pid_suffix: bool = True,
|
|
30
|
+
save_info: bool = True,
|
|
31
|
+
save_debug: bool = True,
|
|
32
|
+
stderr_colorize: bool = True,
|
|
33
|
+
run_id_suffix: Optional[str] = None,
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
日志输出终端和落盘
|
|
37
|
+
: params: log_dir: 日志目录
|
|
38
|
+
: params: service_name: 服务名
|
|
39
|
+
: params: add_pid_suffix: 是否添加进程ID后缀
|
|
40
|
+
: params: save_info: 是否保存INFO级别日志
|
|
41
|
+
: params: save_debug: 是否保存DEBUG级别日志
|
|
42
|
+
: params: stderr_colorize: 是否启用终端颜色显示
|
|
43
|
+
: params: run_id_suffix: 运行ID后缀,用于避免多副本时的冲突
|
|
44
|
+
"""
|
|
45
|
+
if cls.__IS_SET_ROTATE:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
logger.remove() # 清空设置,防止重复
|
|
49
|
+
|
|
50
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
51
|
+
pid_suffix = f"_{os.getpid()}" if add_pid_suffix else ""
|
|
52
|
+
run_id_suffix = f"_r{run_id_suffix}" if run_id_suffix else ""
|
|
53
|
+
|
|
54
|
+
# 添加终端处理器(控制台输出)
|
|
55
|
+
logger.add(
|
|
56
|
+
sink=sys.stderr, # 输出到标准错误流
|
|
57
|
+
level="DEBUG", # 终端显示更详细的DEBUG日志
|
|
58
|
+
format=cls.FORMAT,
|
|
59
|
+
colorize=stderr_colorize, # 启用颜色显示
|
|
60
|
+
backtrace=True, # 堆栈信息显示在终端
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if save_info:
|
|
64
|
+
# 配置 INFO 及以上级别日志
|
|
65
|
+
logger.add(
|
|
66
|
+
os.path.join(
|
|
67
|
+
log_dir, f"{service_name}_info{pid_suffix}{run_id_suffix}.log"
|
|
68
|
+
),
|
|
69
|
+
rotation="1 day", # 每日滚动
|
|
70
|
+
filter=lambda record: record["level"].no >= 20,
|
|
71
|
+
format=cls.FORMAT,
|
|
72
|
+
enqueue=True,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if save_debug:
|
|
76
|
+
# 配置 DEBUG 级别日志
|
|
77
|
+
logger.add(
|
|
78
|
+
os.path.join(
|
|
79
|
+
log_dir, f"{service_name}_debug{pid_suffix}{run_id_suffix}.log"
|
|
80
|
+
),
|
|
81
|
+
rotation="1 day", # 每日滚动
|
|
82
|
+
level="DEBUG",
|
|
83
|
+
filter=lambda record: record["level"].no >= 10,
|
|
84
|
+
format=cls.FORMAT,
|
|
85
|
+
enqueue=True,
|
|
86
|
+
)
|
|
87
|
+
patch_logging_to_loguru()
|
|
88
|
+
logger.info("日志设置完成")
|
|
89
|
+
cls.__IS_SET_ROTATE = True
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import socket
|
|
3
|
+
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
from algo_backend.config import LoguruConfig as LogConfig
|
|
7
|
+
|
|
8
|
+
from ..common import BasicLogStarter
|
|
9
|
+
from .log_clean import LoguruCleaner
|
|
10
|
+
from .log_setup import LoguruSetup
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LoguruStarter(BasicLogStarter):
|
|
14
|
+
"""
|
|
15
|
+
容器内日志目录默认是:/logger/服务名
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, service_name: str):
|
|
19
|
+
super().__init__(service_name)
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def service_log_dir(self):
|
|
23
|
+
return os.path.join(LogConfig.LOGGER_PATH, self.service_name)
|
|
24
|
+
|
|
25
|
+
def setup_log(self):
|
|
26
|
+
"""
|
|
27
|
+
日志设置
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
LoguruSetup.rotate_daily(
|
|
31
|
+
log_dir=self.service_log_dir,
|
|
32
|
+
service_name=self.add_container_id(service_name=self.service_name),
|
|
33
|
+
add_pid_suffix=True,
|
|
34
|
+
save_info=LogConfig.SAVE_INFO_LEVEL,
|
|
35
|
+
save_debug=LogConfig.SAVE_DEBUG_LOG,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
for pkg in LogConfig.get_disable_log_pkg():
|
|
39
|
+
# 忽略一些包的日志
|
|
40
|
+
logger.debug(f"ignore log: {pkg}")
|
|
41
|
+
logger.disable(pkg)
|
|
42
|
+
|
|
43
|
+
def run_log_cleaner(self):
|
|
44
|
+
"""
|
|
45
|
+
启动定时任务清理日志
|
|
46
|
+
"""
|
|
47
|
+
LoguruCleaner.schedule_run(
|
|
48
|
+
log_dir=self.service_log_dir,
|
|
49
|
+
retention_day=LogConfig.LOG_RETENTION_DAY,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def add_container_id(cls, service_name: str):
|
|
54
|
+
if not LogConfig.LOG_ADD_CONTAINED_ID:
|
|
55
|
+
logger.info("日志名不增加containerId")
|
|
56
|
+
return service_name
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
socket_hostname = f"-{socket.gethostname()}"
|
|
60
|
+
except:
|
|
61
|
+
socket_hostname = ""
|
|
62
|
+
if service_name in socket_hostname:
|
|
63
|
+
return service_name
|
|
64
|
+
else:
|
|
65
|
+
return f"{service_name}{socket_hostname}"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
|
|
5
|
+
from algo_backend.config import LoguruConfig
|
|
6
|
+
|
|
7
|
+
disable_log_pkg = LoguruConfig.get_disable_log_pkg()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def patch_logging_to_loguru():
|
|
11
|
+
"""
|
|
12
|
+
将 Python 原生 logging 系统的所有日志重定向到 loguru
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
class LoguruHandler(logging.Handler):
|
|
16
|
+
def emit(self, record):
|
|
17
|
+
# 过滤特定模块的日志
|
|
18
|
+
if any(excluded in record.name for excluded in disable_log_pkg):
|
|
19
|
+
return
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
level = logger.level(record.levelname).name
|
|
23
|
+
except ValueError:
|
|
24
|
+
level = record.levelno
|
|
25
|
+
|
|
26
|
+
# 从当前帧开始,找到真正调用 logging 的位置
|
|
27
|
+
frame = logging.currentframe()
|
|
28
|
+
depth = 0 # 从0开始计算,动态确定深度
|
|
29
|
+
|
|
30
|
+
# 遍历调用栈,跳过所有 logging 模块的内部调用
|
|
31
|
+
while frame:
|
|
32
|
+
filename = frame.f_code.co_filename
|
|
33
|
+
func_name = frame.f_code.co_name
|
|
34
|
+
|
|
35
|
+
# 检查是否为 logging 模块的内部调用
|
|
36
|
+
is_logging_internal = (
|
|
37
|
+
# 标准库 logging 模块路径
|
|
38
|
+
"logging" in filename
|
|
39
|
+
and (
|
|
40
|
+
filename.endswith("logging/__init__.py")
|
|
41
|
+
or "/logging/" in filename
|
|
42
|
+
or "\\logging\\" in filename
|
|
43
|
+
)
|
|
44
|
+
) or (
|
|
45
|
+
# logging 内部函数名
|
|
46
|
+
func_name
|
|
47
|
+
in (
|
|
48
|
+
"callHandlers",
|
|
49
|
+
"handle",
|
|
50
|
+
"emit",
|
|
51
|
+
"handleError",
|
|
52
|
+
"_log",
|
|
53
|
+
"makeRecord",
|
|
54
|
+
"getLogger",
|
|
55
|
+
"debug",
|
|
56
|
+
"info",
|
|
57
|
+
"warning",
|
|
58
|
+
"error",
|
|
59
|
+
"exception",
|
|
60
|
+
"critical",
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if is_logging_internal:
|
|
65
|
+
frame = frame.f_back
|
|
66
|
+
depth += 1
|
|
67
|
+
else:
|
|
68
|
+
# 找到真实的调用者,跳出循环
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
# 如果找不到合适的帧,回退到默认行为
|
|
72
|
+
if not frame:
|
|
73
|
+
depth = 2
|
|
74
|
+
|
|
75
|
+
logger.opt(depth=depth, exception=record.exc_info).log(
|
|
76
|
+
level, record.getMessage()
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# 设置根记录器级别为最低级别,确保所有日志都被处理
|
|
80
|
+
logging.root.setLevel(logging.DEBUG)
|
|
81
|
+
|
|
82
|
+
logging.root.handlers = []
|
|
83
|
+
logging.root.addHandler(LoguruHandler())
|
|
File without changes
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from .collector import AbstractMetricCollector, SystemMetricsMonitor
|
|
2
|
+
from .http_metrics import RequestTimeCostMiddleware
|
|
3
|
+
from .prometheus_context import PrometheusContext
|
|
4
|
+
from .time_cost_metrics import (
|
|
5
|
+
BasicTimeCostMetrics,
|
|
6
|
+
ApiTimeCostMetrics,
|
|
7
|
+
ClientTimeCostMetrics,
|
|
8
|
+
PrometheusTimeCostMetricSetting,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"SystemMetricsMonitor",
|
|
13
|
+
"RequestTimeCostMiddleware",
|
|
14
|
+
"PrometheusContext",
|
|
15
|
+
# 定时收集指标
|
|
16
|
+
"AbstractMetricCollector",
|
|
17
|
+
# 耗时指标
|
|
18
|
+
"PrometheusTimeCostMetricSetting",
|
|
19
|
+
"BasicTimeCostMetrics",
|
|
20
|
+
"ApiTimeCostMetrics",
|
|
21
|
+
"ClientTimeCostMetrics",
|
|
22
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .common import AbstractMetricCollector
|
|
2
|
+
from .gc_metrics import PythonGcMetricsCollector, PythonGcObjectMetricsCollector
|
|
3
|
+
from .schedule_monitor import MetricsScheduleMonitor
|
|
4
|
+
from .system_metrics import SystemMetricsMonitor
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"PythonGcMetricsCollector",
|
|
8
|
+
"PythonGcObjectMetricsCollector",
|
|
9
|
+
"AbstractMetricCollector",
|
|
10
|
+
"MetricsScheduleMonitor",
|
|
11
|
+
"SystemMetricsMonitor",
|
|
12
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AbstractMetricCollector(ABC):
|
|
5
|
+
"""指标收集器抽象基类"""
|
|
6
|
+
|
|
7
|
+
def __init__(self, interval_sec: int = None):
|
|
8
|
+
self.collect_interval = interval_sec or 30 # 收集间隔,秒
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
async def collect(self):
|
|
12
|
+
"""收集指标数据"""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
def set_interval(self, interval_sec: int):
|
|
16
|
+
"""设置收集间隔"""
|
|
17
|
+
self.collect_interval = interval_sec
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from prometheus_client import Gauge
|
|
6
|
+
|
|
7
|
+
from .common import AbstractMetricCollector
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PythonGcMetricsCollector(AbstractMetricCollector):
|
|
13
|
+
def __init__(self, interval_sec: int = 30):
|
|
14
|
+
super().__init__(interval_sec=interval_sec)
|
|
15
|
+
self.pid = os.getpid()
|
|
16
|
+
# GC 总次数
|
|
17
|
+
self.gc_collections_total = Gauge(
|
|
18
|
+
"app_python_gc_collections_total",
|
|
19
|
+
"Total number of GC collections",
|
|
20
|
+
["generation", "pid"],
|
|
21
|
+
)
|
|
22
|
+
# 累计回收的对象总数
|
|
23
|
+
self.gc_collected_total = Gauge(
|
|
24
|
+
"app_python_gc_collected_total",
|
|
25
|
+
"Total objects collected",
|
|
26
|
+
["generation", "pid"],
|
|
27
|
+
)
|
|
28
|
+
# 累计无法回收的对象总数
|
|
29
|
+
self.gc_uncollectable_total = Gauge(
|
|
30
|
+
"app_python_gc_uncollectable_total",
|
|
31
|
+
"Total uncollectable objects",
|
|
32
|
+
["generation", "pid"],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
async def collect(self):
|
|
36
|
+
try:
|
|
37
|
+
# 获取当前统计信息
|
|
38
|
+
stats = gc.get_stats()
|
|
39
|
+
|
|
40
|
+
# 更新指标
|
|
41
|
+
for gen, stat in enumerate(stats):
|
|
42
|
+
self.gc_collections_total.labels(generation=gen, pid=self.pid).set(
|
|
43
|
+
stat.get("collections", 0)
|
|
44
|
+
)
|
|
45
|
+
self.gc_collected_total.labels(generation=gen, pid=self.pid).set(
|
|
46
|
+
stat.get("collected", 0)
|
|
47
|
+
)
|
|
48
|
+
self.gc_uncollectable_total.labels(generation=gen, pid=self.pid).set(
|
|
49
|
+
stat.get("uncollectable", 0)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logger.error(f"Error collecting GC metrics: {e}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class PythonGcObjectMetricsCollector(AbstractMetricCollector):
|
|
57
|
+
def __init__(self, interval_sec: int = 30):
|
|
58
|
+
super().__init__(interval_sec=interval_sec)
|
|
59
|
+
self.pid = os.getpid()
|
|
60
|
+
# 当前时刻回收的对象数
|
|
61
|
+
self.gc_objects_count = Gauge(
|
|
62
|
+
"app_python_gc_objects_count",
|
|
63
|
+
"Current number of objects",
|
|
64
|
+
["generation", "pid"],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
async def collect(self):
|
|
68
|
+
try:
|
|
69
|
+
counts = gc.get_count()
|
|
70
|
+
for gen, count in enumerate(counts):
|
|
71
|
+
self.gc_objects_count.labels(generation=gen, pid=self.pid).set(count)
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.error(f"Error collecting GC Object metrics: {e}")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from .common import AbstractMetricCollector
|
|
6
|
+
from .gc_metrics import PythonGcMetricsCollector, PythonGcObjectMetricsCollector
|
|
7
|
+
from .system_metrics import SystemMetricsMonitor
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MetricsScheduleMonitor:
|
|
13
|
+
"""
|
|
14
|
+
定时收集指标
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
self.collectors: List[AbstractMetricCollector] = []
|
|
19
|
+
|
|
20
|
+
def add(self, collector: AbstractMetricCollector) -> "MetricsScheduleMonitor":
|
|
21
|
+
self.collectors.append(collector)
|
|
22
|
+
return self
|
|
23
|
+
|
|
24
|
+
def register_default_collectors(self) -> "MetricsScheduleMonitor":
|
|
25
|
+
"""
|
|
26
|
+
注册默认指标收集器
|
|
27
|
+
"""
|
|
28
|
+
self.add(SystemMetricsMonitor(interval_sec=30).register_default_collectors())
|
|
29
|
+
self.add(PythonGcMetricsCollector(interval_sec=30))
|
|
30
|
+
self.add(PythonGcObjectMetricsCollector(interval_sec=5))
|
|
31
|
+
return self
|
|
32
|
+
|
|
33
|
+
async def run_monitor(self):
|
|
34
|
+
"""运行监控任务"""
|
|
35
|
+
|
|
36
|
+
if len(self.collectors) == 0:
|
|
37
|
+
self.register_default_collectors()
|
|
38
|
+
|
|
39
|
+
logger.info(
|
|
40
|
+
f"Start system metrics monitor, collector num = {len(self.collectors)}"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
async def schedule_collect(collector: AbstractMetricCollector):
|
|
44
|
+
"""定时收集指标数据"""
|
|
45
|
+
while True:
|
|
46
|
+
await collector.collect()
|
|
47
|
+
await asyncio.sleep(collector.collect_interval)
|
|
48
|
+
|
|
49
|
+
for c in self.collectors:
|
|
50
|
+
asyncio.create_task(schedule_collect(c))
|