aury-boot 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. aury/boot/_version.py +2 -2
  2. aury/boot/application/app/base.py +126 -2
  3. aury/boot/application/app/components.py +224 -1
  4. aury/boot/application/config/settings.py +195 -3
  5. aury/boot/application/constants/components.py +3 -0
  6. aury/boot/application/middleware/logging.py +45 -6
  7. aury/boot/commands/docs.py +40 -0
  8. aury/boot/commands/init.py +2 -0
  9. aury/boot/commands/templates/project/AGENTS.md.tpl +16 -1
  10. aury/boot/commands/templates/project/alert_rules.example.yaml.tpl +85 -0
  11. aury/boot/commands/templates/project/aury_docs/00-overview.md.tpl +3 -0
  12. aury/boot/commands/templates/project/aury_docs/03-service.md.tpl +60 -0
  13. aury/boot/commands/templates/project/aury_docs/17-alerting.md.tpl +210 -0
  14. aury/boot/commands/templates/project/env_templates/monitoring.tpl +61 -0
  15. aury/boot/common/logging/context.py +17 -1
  16. aury/boot/common/logging/format.py +4 -0
  17. aury/boot/domain/transaction/__init__.py +57 -0
  18. aury/boot/infrastructure/channel/base.py +6 -2
  19. aury/boot/infrastructure/database/query_tools/__init__.py +3 -5
  20. aury/boot/infrastructure/monitoring/__init__.py +210 -6
  21. aury/boot/infrastructure/monitoring/alerting/__init__.py +50 -0
  22. aury/boot/infrastructure/monitoring/alerting/aggregator.py +193 -0
  23. aury/boot/infrastructure/monitoring/alerting/events.py +141 -0
  24. aury/boot/infrastructure/monitoring/alerting/manager.py +428 -0
  25. aury/boot/infrastructure/monitoring/alerting/notifiers/__init__.py +16 -0
  26. aury/boot/infrastructure/monitoring/alerting/notifiers/base.py +60 -0
  27. aury/boot/infrastructure/monitoring/alerting/notifiers/feishu.py +209 -0
  28. aury/boot/infrastructure/monitoring/alerting/notifiers/webhook.py +110 -0
  29. aury/boot/infrastructure/monitoring/alerting/rules.py +163 -0
  30. aury/boot/infrastructure/monitoring/health/__init__.py +231 -0
  31. aury/boot/infrastructure/monitoring/tracing/__init__.py +55 -0
  32. aury/boot/infrastructure/monitoring/tracing/context.py +43 -0
  33. aury/boot/infrastructure/monitoring/tracing/logging.py +73 -0
  34. aury/boot/infrastructure/monitoring/tracing/processor.py +327 -0
  35. aury/boot/infrastructure/monitoring/tracing/provider.py +320 -0
  36. aury/boot/infrastructure/monitoring/tracing/tracing.py +235 -0
  37. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/METADATA +14 -1
  38. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/RECORD +40 -21
  39. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/WHEEL +0 -0
  40. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,231 @@
1
+ """健康检查模块(待实现)。
2
+
3
+ 提供可插拔的健康检查功能,支持 Kubernetes 标准探针。
4
+
5
+ TODO: 实现以下功能
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ # =============================================================================
11
+ # 以下为伪代码,待实现
12
+ # =============================================================================
13
+
14
+ # from abc import ABC, abstractmethod
15
+ # from enum import Enum
16
+ # from typing import Literal
17
+ # import asyncio
18
+ # from dataclasses import dataclass, field
19
+ #
20
+ #
21
+ # class ProbeType(str, Enum):
22
+ # """探针类型。"""
23
+ # LIVENESS = "liveness" # 存活检查,失败会重启
24
+ # READINESS = "readiness" # 就绪检查,失败从负载均衡移除
25
+ # STARTUP = "startup" # 启动检查
26
+ #
27
+ #
28
+ # @dataclass
29
+ # class HealthCheck:
30
+ # """健康检查项。"""
31
+ # name: str
32
+ # check_func: Callable[[], Awaitable[None]]
33
+ # timeout: float = 5.0
34
+ # critical: bool = True # 失败是否导致整体失败
35
+ #
36
+ # async def run(self) -> tuple[str, bool, str | None]:
37
+ # """执行检查。
38
+ #
39
+ # Returns:
40
+ # (name, success, error_message)
41
+ # """
42
+ # try:
43
+ # await asyncio.wait_for(self.check_func(), timeout=self.timeout)
44
+ # return (self.name, True, None)
45
+ # except asyncio.TimeoutError:
46
+ # return (self.name, False, f"Timeout after {self.timeout}s")
47
+ # except Exception as e:
48
+ # return (self.name, False, str(e))
49
+ #
50
+ #
51
+ # @dataclass
52
+ # class HealthResult:
53
+ # """健康检查结果。"""
54
+ # status: Literal["healthy", "unhealthy", "degraded"]
55
+ # checks: dict[str, dict] = field(default_factory=dict)
56
+ #
57
+ #
58
+ # class HealthManager:
59
+ # """可插拔健康检查管理器。"""
60
+ #
61
+ # _checks: dict[str, list[HealthCheck]] = {
62
+ # "liveness": [],
63
+ # "readiness": [],
64
+ # "startup": [],
65
+ # }
66
+ #
67
+ # @classmethod
68
+ # def register(
69
+ # cls,
70
+ # name: str,
71
+ # probe: Literal["liveness", "readiness", "startup"] = "readiness",
72
+ # timeout: float = 5.0,
73
+ # critical: bool = True,
74
+ # ):
75
+ # """装饰器注册检查器。
76
+ #
77
+ # 用法:
78
+ # @HealthManager.register("database", probe="readiness")
79
+ # async def check_db():
80
+ # await db.execute("SELECT 1")
81
+ #
82
+ # @HealthManager.register("redis", probe="readiness", critical=False)
83
+ # async def check_redis():
84
+ # await redis.ping()
85
+ #
86
+ # @HealthManager.register("app", probe="liveness")
87
+ # async def check_app():
88
+ # return True
89
+ # """
90
+ # def decorator(func):
91
+ # cls._checks[probe].append(
92
+ # HealthCheck(name, func, timeout, critical)
93
+ # )
94
+ # return func
95
+ # return decorator
96
+ #
97
+ # @classmethod
98
+ # async def check(
99
+ # cls,
100
+ # probe: str,
101
+ # detailed: bool = False,
102
+ # ) -> HealthResult:
103
+ # """执行指定探针的所有检查(并行+超时)。
104
+ #
105
+ # Args:
106
+ # probe: 探针类型 (liveness/readiness/startup)
107
+ # detailed: 是否返回详细信息
108
+ #
109
+ # Returns:
110
+ # HealthResult: 检查结果
111
+ # """
112
+ # checks = cls._checks.get(probe, [])
113
+ # if not checks:
114
+ # return HealthResult(status="healthy")
115
+ #
116
+ # # 并行执行所有检查
117
+ # results = await asyncio.gather(
118
+ # *[c.run() for c in checks],
119
+ # return_exceptions=True
120
+ # )
121
+ #
122
+ # # 汇总结果
123
+ # all_ok = True
124
+ # has_degraded = False
125
+ # check_results = {}
126
+ #
127
+ # for check, result in zip(checks, results):
128
+ # if isinstance(result, Exception):
129
+ # name, success, error = check.name, False, str(result)
130
+ # else:
131
+ # name, success, error = result
132
+ #
133
+ # check_results[name] = {
134
+ # "status": "ok" if success else "error",
135
+ # "error": error,
136
+ # }
137
+ #
138
+ # if not success:
139
+ # if check.critical:
140
+ # all_ok = False
141
+ # else:
142
+ # has_degraded = True
143
+ #
144
+ # if all_ok:
145
+ # status = "degraded" if has_degraded else "healthy"
146
+ # else:
147
+ # status = "unhealthy"
148
+ #
149
+ # return HealthResult(
150
+ # status=status,
151
+ # checks=check_results if detailed else {},
152
+ # )
153
+ #
154
+ # @classmethod
155
+ # def clear(cls) -> None:
156
+ # """清除所有注册的检查器。"""
157
+ # for probe in cls._checks:
158
+ # cls._checks[probe] = []
159
+ #
160
+ #
161
+ # # =============================================================================
162
+ # # 内置检查器
163
+ # # =============================================================================
164
+ #
165
+ #
166
+ # def register_database_check(db_manager) -> None:
167
+ # """注册数据库健康检查。"""
168
+ # @HealthManager.register("database", probe="readiness")
169
+ # async def check_database():
170
+ # await db_manager.health_check()
171
+ #
172
+ #
173
+ # def register_cache_check(cache_manager) -> None:
174
+ # """注册缓存健康检查。"""
175
+ # @HealthManager.register("cache", probe="readiness", critical=False)
176
+ # async def check_cache():
177
+ # await cache_manager.get("__health__", default=None)
178
+ #
179
+ #
180
+ # def register_redis_check(redis_client) -> None:
181
+ # """注册 Redis 健康检查。"""
182
+ # @HealthManager.register("redis", probe="readiness", critical=False)
183
+ # async def check_redis():
184
+ # await redis_client.ping()
185
+ #
186
+ #
187
+ # # =============================================================================
188
+ # # 路由注册(在 FoundationApp 中调用)
189
+ # # =============================================================================
190
+ #
191
+ #
192
+ # def setup_health_routes(app) -> None:
193
+ # """注册健康检查路由。
194
+ #
195
+ # 路由:
196
+ # GET /health/live -> liveness checks
197
+ # GET /health/ready -> readiness checks
198
+ # GET /health/startup -> startup checks
199
+ # """
200
+ # from fastapi import status
201
+ # from fastapi.responses import JSONResponse
202
+ #
203
+ # @app.get("/health/live", tags=["health"])
204
+ # async def liveness():
205
+ # result = await HealthManager.check("liveness")
206
+ # return JSONResponse(
207
+ # content={"status": result.status},
208
+ # status_code=status.HTTP_200_OK if result.status == "healthy"
209
+ # else status.HTTP_503_SERVICE_UNAVAILABLE,
210
+ # )
211
+ #
212
+ # @app.get("/health/ready", tags=["health"])
213
+ # async def readiness():
214
+ # result = await HealthManager.check("readiness", detailed=True)
215
+ # return JSONResponse(
216
+ # content={"status": result.status, "checks": result.checks},
217
+ # status_code=status.HTTP_200_OK if result.status != "unhealthy"
218
+ # else status.HTTP_503_SERVICE_UNAVAILABLE,
219
+ # )
220
+ #
221
+ # @app.get("/health/startup", tags=["health"])
222
+ # async def startup():
223
+ # result = await HealthManager.check("startup")
224
+ # return JSONResponse(
225
+ # content={"status": result.status},
226
+ # status_code=status.HTTP_200_OK if result.status == "healthy"
227
+ # else status.HTTP_503_SERVICE_UNAVAILABLE,
228
+ # )
229
+
230
+
231
+ __all__: list[str] = []
@@ -0,0 +1,55 @@
1
+ """OpenTelemetry 集成模块。
2
+
3
+ 提供 OpenTelemetry 遍测功能的封装,包括:
4
+ - TracerProvider 配置和初始化
5
+ - 自定义 SpanProcessor(用于触发告警)
6
+ - trace_id 获取函数
7
+ - 便捷的 span API
8
+ - 日志告警集成
9
+
10
+ 使用方式:
11
+ # 自动集成(通过 TelemetryComponent)
12
+ TELEMETRY__ENABLED=true
13
+
14
+ # 手动追踪
15
+ from aury.boot.infrastructure.monitoring.tracing import span, trace_span
16
+
17
+ @trace_span(kind="llm", model="gpt-4")
18
+ async def call_llm(prompt: str):
19
+ ...
20
+
21
+ with span("tool.search", kind="tool"):
22
+ result = await search()
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ from .context import get_otel_trace_id, is_otel_available
28
+ from .logging import setup_otel_logging
29
+ from .processor import AlertingSpanProcessor
30
+ from .provider import TelemetryConfig, TelemetryProvider
31
+ from .tracing import (
32
+ SpanKind,
33
+ set_span_attribute,
34
+ set_span_error,
35
+ span,
36
+ trace_span,
37
+ )
38
+
39
+ __all__ = [
40
+ # Provider
41
+ "AlertingSpanProcessor",
42
+ "TelemetryConfig",
43
+ "TelemetryProvider",
44
+ # Context
45
+ "get_otel_trace_id",
46
+ "is_otel_available",
47
+ # Tracing API
48
+ "SpanKind",
49
+ "set_span_attribute",
50
+ "set_span_error",
51
+ "span",
52
+ "trace_span",
53
+ # Logging
54
+ "setup_otel_logging",
55
+ ]
@@ -0,0 +1,43 @@
1
+ """OpenTelemetry context 工具函数。
2
+
3
+ 提供从 OTel 获取 trace_id 的函数。
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+
9
+ def is_otel_available() -> bool:
10
+ """检查 OpenTelemetry 是否可用。"""
11
+ try:
12
+ from opentelemetry import trace
13
+ return bool(trace) # 确保引用被使用
14
+ except ImportError:
15
+ return False
16
+
17
+
18
+ def get_otel_trace_id() -> str | None:
19
+ """从 OpenTelemetry 获取当前 trace_id。
20
+
21
+ Returns:
22
+ str | None: 32 位十六进制 trace_id,如果不可用则返回 None
23
+ """
24
+ try:
25
+ from opentelemetry import trace
26
+
27
+ span = trace.get_current_span()
28
+ if span and span.is_recording():
29
+ trace_id = span.get_span_context().trace_id
30
+ if trace_id:
31
+ return format(trace_id, "032x")
32
+ except ImportError:
33
+ pass
34
+ except Exception:
35
+ pass
36
+
37
+ return None
38
+
39
+
40
+ __all__ = [
41
+ "get_otel_trace_id",
42
+ "is_otel_available",
43
+ ]
@@ -0,0 +1,73 @@
1
+ """OTel Logs 集成。
2
+
3
+ 将 loguru 日志接入 OpenTelemetry,导出到 OTLP(Loki/Elasticsearch 等)。
4
+
5
+ 用法:
6
+ # 在 TelemetryComponent 中自动配置
7
+ TELEMETRY__LOGS_ENDPOINT=http://loki:3100
8
+
9
+ # 或手动配置:
10
+ setup_otel_logging(endpoint="http://loki:3100")
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+
17
+ from aury.boot.common.logging import logger
18
+
19
+ # OTel Logs 可选依赖
20
+ try:
21
+ from opentelemetry._logs import set_logger_provider
22
+ from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
23
+ from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
24
+ from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
25
+ except ImportError:
26
+ OTLPLogExporter = None # type: ignore[assignment, misc]
27
+
28
+
29
+ def setup_otel_logging(
30
+ endpoint: str,
31
+ headers: dict[str, str] | None = None,
32
+ ) -> None:
33
+ """配置 OTel 日志集成。
34
+
35
+ Args:
36
+ endpoint: OTLP 日志导出端点(如 http://loki:3100)
37
+ headers: OTLP 请求头(可选)
38
+ """
39
+ if OTLPLogExporter is None:
40
+ logger.debug("OTLP 日志导出器未安装")
41
+ return
42
+
43
+ try:
44
+ # 创建 LoggerProvider
45
+ logger_provider = LoggerProvider()
46
+ set_logger_provider(logger_provider)
47
+
48
+ # 添加 OTLP 导出器
49
+ exporter = OTLPLogExporter(
50
+ endpoint=endpoint,
51
+ headers=headers or None,
52
+ )
53
+ logger_provider.add_log_record_processor(
54
+ BatchLogRecordProcessor(exporter)
55
+ )
56
+
57
+ # 添加标准 logging handler
58
+ logging_handler = LoggingHandler(
59
+ level="DEBUG",
60
+ logger_provider=logger_provider,
61
+ )
62
+
63
+ # 配置 loguru 转发到标准 logging
64
+ logging.getLogger().addHandler(logging_handler)
65
+
66
+ logger.info(f"Logs OTLP 导出器已配置: {endpoint}")
67
+ except Exception as e:
68
+ logger.warning(f"OTLP 日志导出配置失败: {e}")
69
+
70
+
71
+ __all__ = [
72
+ "setup_otel_logging",
73
+ ]