aury-boot 0.0.29__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aury/boot/_version.py +2 -2
- aury/boot/application/__init__.py +2 -4
- aury/boot/application/app/base.py +126 -2
- aury/boot/application/app/components.py +226 -1
- aury/boot/application/config/settings.py +201 -3
- aury/boot/application/constants/components.py +3 -0
- aury/boot/application/middleware/logging.py +45 -6
- aury/boot/commands/docs.py +40 -0
- aury/boot/commands/init.py +2 -0
- aury/boot/commands/templates/project/AGENTS.md.tpl +59 -0
- aury/boot/commands/templates/project/alert_rules.example.yaml.tpl +85 -0
- aury/boot/commands/templates/project/aury_docs/00-overview.md.tpl +3 -0
- aury/boot/commands/templates/project/aury_docs/17-alerting.md.tpl +210 -0
- aury/boot/commands/templates/project/env_templates/messaging.tpl +21 -13
- aury/boot/commands/templates/project/env_templates/monitoring.tpl +63 -0
- aury/boot/common/logging/context.py +17 -1
- aury/boot/common/logging/format.py +4 -0
- aury/boot/infrastructure/__init__.py +4 -8
- aury/boot/infrastructure/channel/__init__.py +9 -8
- aury/boot/infrastructure/channel/backends/__init__.py +2 -6
- aury/boot/infrastructure/channel/backends/broadcaster.py +141 -0
- aury/boot/infrastructure/channel/base.py +11 -4
- aury/boot/infrastructure/channel/manager.py +25 -24
- aury/boot/infrastructure/database/query_tools/__init__.py +3 -5
- aury/boot/infrastructure/events/__init__.py +4 -6
- aury/boot/infrastructure/events/backends/__init__.py +2 -4
- aury/boot/infrastructure/events/backends/broadcaster.py +189 -0
- aury/boot/infrastructure/events/base.py +9 -4
- aury/boot/infrastructure/events/manager.py +24 -20
- aury/boot/infrastructure/monitoring/__init__.py +210 -6
- aury/boot/infrastructure/monitoring/alerting/__init__.py +50 -0
- aury/boot/infrastructure/monitoring/alerting/aggregator.py +193 -0
- aury/boot/infrastructure/monitoring/alerting/events.py +141 -0
- aury/boot/infrastructure/monitoring/alerting/manager.py +430 -0
- aury/boot/infrastructure/monitoring/alerting/notifiers/__init__.py +16 -0
- aury/boot/infrastructure/monitoring/alerting/notifiers/base.py +60 -0
- aury/boot/infrastructure/monitoring/alerting/notifiers/feishu.py +209 -0
- aury/boot/infrastructure/monitoring/alerting/notifiers/webhook.py +110 -0
- aury/boot/infrastructure/monitoring/alerting/rules.py +179 -0
- aury/boot/infrastructure/monitoring/health/__init__.py +231 -0
- aury/boot/infrastructure/monitoring/tracing/__init__.py +55 -0
- aury/boot/infrastructure/monitoring/tracing/context.py +43 -0
- aury/boot/infrastructure/monitoring/tracing/logging.py +73 -0
- aury/boot/infrastructure/monitoring/tracing/processor.py +357 -0
- aury/boot/infrastructure/monitoring/tracing/provider.py +322 -0
- aury/boot/infrastructure/monitoring/tracing/tracing.py +235 -0
- {aury_boot-0.0.29.dist-info → aury_boot-0.0.31.dist-info}/METADATA +14 -1
- {aury_boot-0.0.29.dist-info → aury_boot-0.0.31.dist-info}/RECORD +50 -33
- aury/boot/infrastructure/channel/backends/memory.py +0 -126
- aury/boot/infrastructure/channel/backends/redis.py +0 -130
- aury/boot/infrastructure/events/backends/memory.py +0 -86
- aury/boot/infrastructure/events/backends/redis.py +0 -169
- {aury_boot-0.0.29.dist-info → aury_boot-0.0.31.dist-info}/WHEEL +0 -0
- {aury_boot-0.0.29.dist-info → aury_boot-0.0.31.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""健康检查模块(待实现)。
|
|
2
|
+
|
|
3
|
+
提供可插拔的健康检查功能,支持 Kubernetes 标准探针。
|
|
4
|
+
|
|
5
|
+
TODO: 实现以下功能
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
# =============================================================================
|
|
11
|
+
# 以下为伪代码,待实现
|
|
12
|
+
# =============================================================================
|
|
13
|
+
|
|
14
|
+
# from abc import ABC, abstractmethod
|
|
15
|
+
# from enum import Enum
|
|
16
|
+
# from typing import Literal
|
|
17
|
+
# import asyncio
|
|
18
|
+
# from dataclasses import dataclass, field
|
|
19
|
+
#
|
|
20
|
+
#
|
|
21
|
+
# class ProbeType(str, Enum):
|
|
22
|
+
# """探针类型。"""
|
|
23
|
+
# LIVENESS = "liveness" # 存活检查,失败会重启
|
|
24
|
+
# READINESS = "readiness" # 就绪检查,失败从负载均衡移除
|
|
25
|
+
# STARTUP = "startup" # 启动检查
|
|
26
|
+
#
|
|
27
|
+
#
|
|
28
|
+
# @dataclass
|
|
29
|
+
# class HealthCheck:
|
|
30
|
+
# """健康检查项。"""
|
|
31
|
+
# name: str
|
|
32
|
+
# check_func: Callable[[], Awaitable[None]]
|
|
33
|
+
# timeout: float = 5.0
|
|
34
|
+
# critical: bool = True # 失败是否导致整体失败
|
|
35
|
+
#
|
|
36
|
+
# async def run(self) -> tuple[str, bool, str | None]:
|
|
37
|
+
# """执行检查。
|
|
38
|
+
#
|
|
39
|
+
# Returns:
|
|
40
|
+
# (name, success, error_message)
|
|
41
|
+
# """
|
|
42
|
+
# try:
|
|
43
|
+
# await asyncio.wait_for(self.check_func(), timeout=self.timeout)
|
|
44
|
+
# return (self.name, True, None)
|
|
45
|
+
# except asyncio.TimeoutError:
|
|
46
|
+
# return (self.name, False, f"Timeout after {self.timeout}s")
|
|
47
|
+
# except Exception as e:
|
|
48
|
+
# return (self.name, False, str(e))
|
|
49
|
+
#
|
|
50
|
+
#
|
|
51
|
+
# @dataclass
|
|
52
|
+
# class HealthResult:
|
|
53
|
+
# """健康检查结果。"""
|
|
54
|
+
# status: Literal["healthy", "unhealthy", "degraded"]
|
|
55
|
+
# checks: dict[str, dict] = field(default_factory=dict)
|
|
56
|
+
#
|
|
57
|
+
#
|
|
58
|
+
# class HealthManager:
|
|
59
|
+
# """可插拔健康检查管理器。"""
|
|
60
|
+
#
|
|
61
|
+
# _checks: dict[str, list[HealthCheck]] = {
|
|
62
|
+
# "liveness": [],
|
|
63
|
+
# "readiness": [],
|
|
64
|
+
# "startup": [],
|
|
65
|
+
# }
|
|
66
|
+
#
|
|
67
|
+
# @classmethod
|
|
68
|
+
# def register(
|
|
69
|
+
# cls,
|
|
70
|
+
# name: str,
|
|
71
|
+
# probe: Literal["liveness", "readiness", "startup"] = "readiness",
|
|
72
|
+
# timeout: float = 5.0,
|
|
73
|
+
# critical: bool = True,
|
|
74
|
+
# ):
|
|
75
|
+
# """装饰器注册检查器。
|
|
76
|
+
#
|
|
77
|
+
# 用法:
|
|
78
|
+
# @HealthManager.register("database", probe="readiness")
|
|
79
|
+
# async def check_db():
|
|
80
|
+
# await db.execute("SELECT 1")
|
|
81
|
+
#
|
|
82
|
+
# @HealthManager.register("redis", probe="readiness", critical=False)
|
|
83
|
+
# async def check_redis():
|
|
84
|
+
# await redis.ping()
|
|
85
|
+
#
|
|
86
|
+
# @HealthManager.register("app", probe="liveness")
|
|
87
|
+
# async def check_app():
|
|
88
|
+
# return True
|
|
89
|
+
# """
|
|
90
|
+
# def decorator(func):
|
|
91
|
+
# cls._checks[probe].append(
|
|
92
|
+
# HealthCheck(name, func, timeout, critical)
|
|
93
|
+
# )
|
|
94
|
+
# return func
|
|
95
|
+
# return decorator
|
|
96
|
+
#
|
|
97
|
+
# @classmethod
|
|
98
|
+
# async def check(
|
|
99
|
+
# cls,
|
|
100
|
+
# probe: str,
|
|
101
|
+
# detailed: bool = False,
|
|
102
|
+
# ) -> HealthResult:
|
|
103
|
+
# """执行指定探针的所有检查(并行+超时)。
|
|
104
|
+
#
|
|
105
|
+
# Args:
|
|
106
|
+
# probe: 探针类型 (liveness/readiness/startup)
|
|
107
|
+
# detailed: 是否返回详细信息
|
|
108
|
+
#
|
|
109
|
+
# Returns:
|
|
110
|
+
# HealthResult: 检查结果
|
|
111
|
+
# """
|
|
112
|
+
# checks = cls._checks.get(probe, [])
|
|
113
|
+
# if not checks:
|
|
114
|
+
# return HealthResult(status="healthy")
|
|
115
|
+
#
|
|
116
|
+
# # 并行执行所有检查
|
|
117
|
+
# results = await asyncio.gather(
|
|
118
|
+
# *[c.run() for c in checks],
|
|
119
|
+
# return_exceptions=True
|
|
120
|
+
# )
|
|
121
|
+
#
|
|
122
|
+
# # 汇总结果
|
|
123
|
+
# all_ok = True
|
|
124
|
+
# has_degraded = False
|
|
125
|
+
# check_results = {}
|
|
126
|
+
#
|
|
127
|
+
# for check, result in zip(checks, results):
|
|
128
|
+
# if isinstance(result, Exception):
|
|
129
|
+
# name, success, error = check.name, False, str(result)
|
|
130
|
+
# else:
|
|
131
|
+
# name, success, error = result
|
|
132
|
+
#
|
|
133
|
+
# check_results[name] = {
|
|
134
|
+
# "status": "ok" if success else "error",
|
|
135
|
+
# "error": error,
|
|
136
|
+
# }
|
|
137
|
+
#
|
|
138
|
+
# if not success:
|
|
139
|
+
# if check.critical:
|
|
140
|
+
# all_ok = False
|
|
141
|
+
# else:
|
|
142
|
+
# has_degraded = True
|
|
143
|
+
#
|
|
144
|
+
# if all_ok:
|
|
145
|
+
# status = "degraded" if has_degraded else "healthy"
|
|
146
|
+
# else:
|
|
147
|
+
# status = "unhealthy"
|
|
148
|
+
#
|
|
149
|
+
# return HealthResult(
|
|
150
|
+
# status=status,
|
|
151
|
+
# checks=check_results if detailed else {},
|
|
152
|
+
# )
|
|
153
|
+
#
|
|
154
|
+
# @classmethod
|
|
155
|
+
# def clear(cls) -> None:
|
|
156
|
+
# """清除所有注册的检查器。"""
|
|
157
|
+
# for probe in cls._checks:
|
|
158
|
+
# cls._checks[probe] = []
|
|
159
|
+
#
|
|
160
|
+
#
|
|
161
|
+
# # =============================================================================
|
|
162
|
+
# # 内置检查器
|
|
163
|
+
# # =============================================================================
|
|
164
|
+
#
|
|
165
|
+
#
|
|
166
|
+
# def register_database_check(db_manager) -> None:
|
|
167
|
+
# """注册数据库健康检查。"""
|
|
168
|
+
# @HealthManager.register("database", probe="readiness")
|
|
169
|
+
# async def check_database():
|
|
170
|
+
# await db_manager.health_check()
|
|
171
|
+
#
|
|
172
|
+
#
|
|
173
|
+
# def register_cache_check(cache_manager) -> None:
|
|
174
|
+
# """注册缓存健康检查。"""
|
|
175
|
+
# @HealthManager.register("cache", probe="readiness", critical=False)
|
|
176
|
+
# async def check_cache():
|
|
177
|
+
# await cache_manager.get("__health__", default=None)
|
|
178
|
+
#
|
|
179
|
+
#
|
|
180
|
+
# def register_redis_check(redis_client) -> None:
|
|
181
|
+
# """注册 Redis 健康检查。"""
|
|
182
|
+
# @HealthManager.register("redis", probe="readiness", critical=False)
|
|
183
|
+
# async def check_redis():
|
|
184
|
+
# await redis_client.ping()
|
|
185
|
+
#
|
|
186
|
+
#
|
|
187
|
+
# # =============================================================================
|
|
188
|
+
# # 路由注册(在 FoundationApp 中调用)
|
|
189
|
+
# # =============================================================================
|
|
190
|
+
#
|
|
191
|
+
#
|
|
192
|
+
# def setup_health_routes(app) -> None:
|
|
193
|
+
# """注册健康检查路由。
|
|
194
|
+
#
|
|
195
|
+
# 路由:
|
|
196
|
+
# GET /health/live -> liveness checks
|
|
197
|
+
# GET /health/ready -> readiness checks
|
|
198
|
+
# GET /health/startup -> startup checks
|
|
199
|
+
# """
|
|
200
|
+
# from fastapi import status
|
|
201
|
+
# from fastapi.responses import JSONResponse
|
|
202
|
+
#
|
|
203
|
+
# @app.get("/health/live", tags=["health"])
|
|
204
|
+
# async def liveness():
|
|
205
|
+
# result = await HealthManager.check("liveness")
|
|
206
|
+
# return JSONResponse(
|
|
207
|
+
# content={"status": result.status},
|
|
208
|
+
# status_code=status.HTTP_200_OK if result.status == "healthy"
|
|
209
|
+
# else status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
210
|
+
# )
|
|
211
|
+
#
|
|
212
|
+
# @app.get("/health/ready", tags=["health"])
|
|
213
|
+
# async def readiness():
|
|
214
|
+
# result = await HealthManager.check("readiness", detailed=True)
|
|
215
|
+
# return JSONResponse(
|
|
216
|
+
# content={"status": result.status, "checks": result.checks},
|
|
217
|
+
# status_code=status.HTTP_200_OK if result.status != "unhealthy"
|
|
218
|
+
# else status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
219
|
+
# )
|
|
220
|
+
#
|
|
221
|
+
# @app.get("/health/startup", tags=["health"])
|
|
222
|
+
# async def startup():
|
|
223
|
+
# result = await HealthManager.check("startup")
|
|
224
|
+
# return JSONResponse(
|
|
225
|
+
# content={"status": result.status},
|
|
226
|
+
# status_code=status.HTTP_200_OK if result.status == "healthy"
|
|
227
|
+
# else status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
228
|
+
# )
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
__all__: list[str] = []
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""OpenTelemetry 集成模块。
|
|
2
|
+
|
|
3
|
+
提供 OpenTelemetry 遍测功能的封装,包括:
|
|
4
|
+
- TracerProvider 配置和初始化
|
|
5
|
+
- 自定义 SpanProcessor(用于触发告警)
|
|
6
|
+
- trace_id 获取函数
|
|
7
|
+
- 便捷的 span API
|
|
8
|
+
- 日志告警集成
|
|
9
|
+
|
|
10
|
+
使用方式:
|
|
11
|
+
# 自动集成(通过 TelemetryComponent)
|
|
12
|
+
TELEMETRY__ENABLED=true
|
|
13
|
+
|
|
14
|
+
# 手动追踪
|
|
15
|
+
from aury.boot.infrastructure.monitoring.tracing import span, trace_span
|
|
16
|
+
|
|
17
|
+
@trace_span(kind="llm", model="gpt-4")
|
|
18
|
+
async def call_llm(prompt: str):
|
|
19
|
+
...
|
|
20
|
+
|
|
21
|
+
with span("tool.search", kind="tool"):
|
|
22
|
+
result = await search()
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from .context import get_otel_trace_id, is_otel_available
|
|
28
|
+
from .logging import setup_otel_logging
|
|
29
|
+
from .processor import AlertingSpanProcessor
|
|
30
|
+
from .provider import TelemetryConfig, TelemetryProvider
|
|
31
|
+
from .tracing import (
|
|
32
|
+
SpanKind,
|
|
33
|
+
set_span_attribute,
|
|
34
|
+
set_span_error,
|
|
35
|
+
span,
|
|
36
|
+
trace_span,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
# Provider
|
|
41
|
+
"AlertingSpanProcessor",
|
|
42
|
+
"TelemetryConfig",
|
|
43
|
+
"TelemetryProvider",
|
|
44
|
+
# Context
|
|
45
|
+
"get_otel_trace_id",
|
|
46
|
+
"is_otel_available",
|
|
47
|
+
# Tracing API
|
|
48
|
+
"SpanKind",
|
|
49
|
+
"set_span_attribute",
|
|
50
|
+
"set_span_error",
|
|
51
|
+
"span",
|
|
52
|
+
"trace_span",
|
|
53
|
+
# Logging
|
|
54
|
+
"setup_otel_logging",
|
|
55
|
+
]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""OpenTelemetry context 工具函数。
|
|
2
|
+
|
|
3
|
+
提供从 OTel 获取 trace_id 的函数。
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def is_otel_available() -> bool:
|
|
10
|
+
"""检查 OpenTelemetry 是否可用。"""
|
|
11
|
+
try:
|
|
12
|
+
from opentelemetry import trace
|
|
13
|
+
return bool(trace) # 确保引用被使用
|
|
14
|
+
except ImportError:
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_otel_trace_id() -> str | None:
|
|
19
|
+
"""从 OpenTelemetry 获取当前 trace_id。
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
str | None: 32 位十六进制 trace_id,如果不可用则返回 None
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
from opentelemetry import trace
|
|
26
|
+
|
|
27
|
+
span = trace.get_current_span()
|
|
28
|
+
if span and span.is_recording():
|
|
29
|
+
trace_id = span.get_span_context().trace_id
|
|
30
|
+
if trace_id:
|
|
31
|
+
return format(trace_id, "032x")
|
|
32
|
+
except ImportError:
|
|
33
|
+
pass
|
|
34
|
+
except Exception:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
"get_otel_trace_id",
|
|
42
|
+
"is_otel_available",
|
|
43
|
+
]
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""OTel Logs 集成。
|
|
2
|
+
|
|
3
|
+
将 loguru 日志接入 OpenTelemetry,导出到 OTLP(Loki/Elasticsearch 等)。
|
|
4
|
+
|
|
5
|
+
用法:
|
|
6
|
+
# 在 TelemetryComponent 中自动配置
|
|
7
|
+
TELEMETRY__LOGS_ENDPOINT=http://loki:3100
|
|
8
|
+
|
|
9
|
+
# 或手动配置:
|
|
10
|
+
setup_otel_logging(endpoint="http://loki:3100")
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
|
|
17
|
+
from aury.boot.common.logging import logger
|
|
18
|
+
|
|
19
|
+
# OTel Logs 可选依赖
|
|
20
|
+
try:
|
|
21
|
+
from opentelemetry._logs import set_logger_provider
|
|
22
|
+
from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
|
|
23
|
+
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
|
|
24
|
+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
|
|
25
|
+
except ImportError:
|
|
26
|
+
OTLPLogExporter = None # type: ignore[assignment, misc]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def setup_otel_logging(
|
|
30
|
+
endpoint: str,
|
|
31
|
+
headers: dict[str, str] | None = None,
|
|
32
|
+
) -> None:
|
|
33
|
+
"""配置 OTel 日志集成。
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
endpoint: OTLP 日志导出端点(如 http://loki:3100)
|
|
37
|
+
headers: OTLP 请求头(可选)
|
|
38
|
+
"""
|
|
39
|
+
if OTLPLogExporter is None:
|
|
40
|
+
logger.debug("OTLP 日志导出器未安装")
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
# 创建 LoggerProvider
|
|
45
|
+
logger_provider = LoggerProvider()
|
|
46
|
+
set_logger_provider(logger_provider)
|
|
47
|
+
|
|
48
|
+
# 添加 OTLP 导出器
|
|
49
|
+
exporter = OTLPLogExporter(
|
|
50
|
+
endpoint=endpoint,
|
|
51
|
+
headers=headers or None,
|
|
52
|
+
)
|
|
53
|
+
logger_provider.add_log_record_processor(
|
|
54
|
+
BatchLogRecordProcessor(exporter)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# 添加标准 logging handler
|
|
58
|
+
logging_handler = LoggingHandler(
|
|
59
|
+
level="DEBUG",
|
|
60
|
+
logger_provider=logger_provider,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# 配置 loguru 转发到标准 logging
|
|
64
|
+
logging.getLogger().addHandler(logging_handler)
|
|
65
|
+
|
|
66
|
+
logger.info(f"Logs OTLP 导出器已配置: {endpoint}")
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.warning(f"OTLP 日志导出配置失败: {e}")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
__all__ = [
|
|
72
|
+
"setup_otel_logging",
|
|
73
|
+
]
|