aury-boot 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. aury/boot/_version.py +2 -2
  2. aury/boot/application/app/base.py +126 -2
  3. aury/boot/application/app/components.py +224 -1
  4. aury/boot/application/config/settings.py +195 -3
  5. aury/boot/application/constants/components.py +3 -0
  6. aury/boot/application/middleware/logging.py +45 -6
  7. aury/boot/commands/docs.py +40 -0
  8. aury/boot/commands/init.py +2 -0
  9. aury/boot/commands/templates/project/AGENTS.md.tpl +16 -1
  10. aury/boot/commands/templates/project/alert_rules.example.yaml.tpl +85 -0
  11. aury/boot/commands/templates/project/aury_docs/00-overview.md.tpl +3 -0
  12. aury/boot/commands/templates/project/aury_docs/03-service.md.tpl +60 -0
  13. aury/boot/commands/templates/project/aury_docs/17-alerting.md.tpl +210 -0
  14. aury/boot/commands/templates/project/env_templates/monitoring.tpl +61 -0
  15. aury/boot/common/logging/context.py +17 -1
  16. aury/boot/common/logging/format.py +4 -0
  17. aury/boot/domain/transaction/__init__.py +57 -0
  18. aury/boot/infrastructure/channel/base.py +6 -2
  19. aury/boot/infrastructure/database/query_tools/__init__.py +3 -5
  20. aury/boot/infrastructure/monitoring/__init__.py +210 -6
  21. aury/boot/infrastructure/monitoring/alerting/__init__.py +50 -0
  22. aury/boot/infrastructure/monitoring/alerting/aggregator.py +193 -0
  23. aury/boot/infrastructure/monitoring/alerting/events.py +141 -0
  24. aury/boot/infrastructure/monitoring/alerting/manager.py +428 -0
  25. aury/boot/infrastructure/monitoring/alerting/notifiers/__init__.py +16 -0
  26. aury/boot/infrastructure/monitoring/alerting/notifiers/base.py +60 -0
  27. aury/boot/infrastructure/monitoring/alerting/notifiers/feishu.py +209 -0
  28. aury/boot/infrastructure/monitoring/alerting/notifiers/webhook.py +110 -0
  29. aury/boot/infrastructure/monitoring/alerting/rules.py +163 -0
  30. aury/boot/infrastructure/monitoring/health/__init__.py +231 -0
  31. aury/boot/infrastructure/monitoring/tracing/__init__.py +55 -0
  32. aury/boot/infrastructure/monitoring/tracing/context.py +43 -0
  33. aury/boot/infrastructure/monitoring/tracing/logging.py +73 -0
  34. aury/boot/infrastructure/monitoring/tracing/processor.py +327 -0
  35. aury/boot/infrastructure/monitoring/tracing/provider.py +320 -0
  36. aury/boot/infrastructure/monitoring/tracing/tracing.py +235 -0
  37. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/METADATA +14 -1
  38. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/RECORD +40 -21
  39. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/WHEEL +0 -0
  40. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,428 @@
1
+ """告警管理器。
2
+
3
+ 核心告警处理逻辑,包括:
4
+ - 规则匹配
5
+ - 事件聚合
6
+ - 通知发送
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ from aury.boot.common.logging import logger
16
+
17
+ from .aggregator import AlertAggregator
18
+ from .events import AlertEvent, AlertEventType, AlertNotification, AlertSeverity
19
+ from .rules import AlertRule, load_rules_from_dict
20
+
21
+ if TYPE_CHECKING:
22
+ from .notifiers.base import AlertNotifier
23
+
24
+
25
+ class AlertManager:
26
+ """告警管理器。
27
+
28
+ 负责处理告警事件、匹配规则、聚合事件、发送通知。
29
+
30
+ 使用方式:
31
+ # 初始化(通常在应用启动时)
32
+ alert_manager = AlertManager.get_instance()
33
+ await alert_manager.initialize(config)
34
+
35
+ # 发送告警事件
36
+ await alert_manager.emit(AlertEvent(...))
37
+
38
+ # 或使用便捷函数
39
+ await emit_alert(AlertEventType.SLOW_REQUEST, "慢请求", duration=1.5)
40
+ """
41
+
42
+ _instance: "AlertManager | None" = None
43
+ _notifier_classes: dict[str, type["AlertNotifier"]] = {}
44
+
45
+ def __init__(self) -> None:
46
+ """初始化告警管理器。"""
47
+ self._enabled = False
48
+ self._service_name = ""
49
+ self._rules: list[AlertRule] = []
50
+ self._notifiers: dict[str, "AlertNotifier"] = {}
51
+ self._aggregators: dict[str, AlertAggregator] = {} # 每个规则一个聚合器
52
+ self._defaults: dict[str, Any] = {}
53
+ self._initialized = False
54
+
55
+ @classmethod
56
+ def get_instance(cls) -> "AlertManager":
57
+ """获取单例实例。"""
58
+ if cls._instance is None:
59
+ cls._instance = cls()
60
+ return cls._instance
61
+
62
+ @classmethod
63
+ def register_notifier_class(cls, name: str, notifier_cls: type["AlertNotifier"]) -> None:
64
+ """注册通知器类型。
65
+
66
+ Args:
67
+ name: 类型名称(如 "feishu", "webhook")
68
+ notifier_cls: 通知器类
69
+ """
70
+ cls._notifier_classes[name] = notifier_cls
71
+
72
+ async def initialize(
73
+ self,
74
+ *,
75
+ enabled: bool = True,
76
+ service_name: str = "",
77
+ rules_file: str | Path | None = None,
78
+ defaults: dict[str, Any] | None = None,
79
+ notifiers: dict[str, dict[str, Any]] | None = None,
80
+ ) -> None:
81
+ """初始化告警管理器。
82
+
83
+ Args:
84
+ enabled: 是否启用告警
85
+ service_name: 服务名称
86
+ rules_file: 规则文件路径(YAML)
87
+ defaults: 默认配置
88
+ notifiers: 通知器配置(从 config.alert.get_notifiers() 获取)
89
+ """
90
+ self._enabled = enabled
91
+ self._service_name = service_name
92
+ self._defaults = defaults or {}
93
+
94
+ if not enabled:
95
+ logger.info("告警系统已禁用")
96
+ return
97
+
98
+ # 注册内置通知器类型
99
+ self._register_builtin_notifiers()
100
+
101
+ # 从配置加载内置通知器
102
+ if notifiers:
103
+ self._load_notifiers_from_config(notifiers)
104
+
105
+ # 加载规则
106
+ if rules_file:
107
+ await self._load_rules_from_file(rules_file)
108
+
109
+ # 如果没有规则,创建默认规则
110
+ if not self._rules:
111
+ self._create_default_rules()
112
+
113
+ # 为每个规则创建聚合器
114
+ for rule in self._rules:
115
+ self._aggregators[rule.name] = AlertAggregator(
116
+ window_seconds=rule.aggregate_window,
117
+ threshold=rule.aggregate_threshold,
118
+ suppress_seconds=rule.suppress_seconds,
119
+ )
120
+
121
+ self._initialized = True
122
+ logger.info(
123
+ f"告警系统已初始化: {len(self._rules)} 条规则, "
124
+ f"{len(self._notifiers)} 个通知器"
125
+ )
126
+
127
+ def _register_builtin_notifiers(self) -> None:
128
+ """注册内置通知器类型。"""
129
+ from .notifiers.feishu import FeishuNotifier
130
+ from .notifiers.webhook import WebhookNotifier
131
+
132
+ self.register_notifier_class("feishu", FeishuNotifier)
133
+ self.register_notifier_class("webhook", WebhookNotifier)
134
+
135
+ def _load_notifiers_from_config(
136
+ self,
137
+ notifiers: dict[str, dict[str, Any]],
138
+ ) -> None:
139
+ """从配置加载通知器。
140
+
141
+ Args:
142
+ notifiers: 通知器配置(从 config.alert.get_notifiers() 获取)
143
+ """
144
+ for name, config in notifiers.items():
145
+ # type 字段决定通知器类型,默认用实例名
146
+ notifier_type = config.pop("type", name)
147
+ notifier_cls = self._notifier_classes.get(notifier_type)
148
+
149
+ if not notifier_cls:
150
+ logger.warning(f"未知的通知器类型: {notifier_type},跳过 {name}")
151
+ continue
152
+
153
+ try:
154
+ notifier = notifier_cls.from_config(config)
155
+ self._notifiers[name] = notifier
156
+ logger.debug(f"已加载通知器: {name} ({notifier_type})")
157
+ except Exception as e:
158
+ logger.error(f"加载通知器 {name} 失败: {e}")
159
+
160
+ # 如果有 notifier,将第一个设为 default
161
+ if self._notifiers and "default" not in self._notifiers:
162
+ first_name = next(iter(self._notifiers))
163
+ self._notifiers["default"] = self._notifiers[first_name]
164
+
165
+ async def _load_rules_from_file(self, rules_file: str | Path) -> None:
166
+ """从 YAML 文件加载规则。"""
167
+ rules_path = Path(rules_file)
168
+ if not rules_path.exists():
169
+ logger.warning(f"规则文件不存在: {rules_path}")
170
+ return
171
+
172
+ try:
173
+ import yaml
174
+ with open(rules_path, encoding="utf-8") as f:
175
+ data = yaml.safe_load(f)
176
+
177
+ if data:
178
+ defaults, rules = load_rules_from_dict(data)
179
+ self._defaults.update(defaults)
180
+ self._rules.extend(rules)
181
+ logger.info(f"从 {rules_path} 加载了 {len(rules)} 条规则")
182
+ except ImportError:
183
+ logger.warning("未安装 PyYAML,无法加载 YAML 规则文件")
184
+ except Exception as e:
185
+ logger.error(f"加载规则文件失败: {e}")
186
+
187
+ def _create_default_rules(self) -> None:
188
+ """创建默认规则。"""
189
+ slow_request_threshold = self._defaults.get("slow_request_threshold", 1.0)
190
+ slow_sql_threshold = self._defaults.get("slow_sql_threshold", 0.5)
191
+
192
+ default_rules = [
193
+ # 慢请求
194
+ AlertRule(
195
+ name="default_slow_request",
196
+ event_types=[AlertEventType.SLOW_REQUEST],
197
+ threshold=slow_request_threshold,
198
+ aggregate_window=self._defaults.get("aggregate_window", 10),
199
+ aggregate_threshold=self._defaults.get("slow_request_aggregate", 5),
200
+ suppress_seconds=self._defaults.get("suppress_seconds", 300),
201
+ ),
202
+ # 慢 SQL
203
+ AlertRule(
204
+ name="default_slow_sql",
205
+ event_types=[AlertEventType.SLOW_SQL],
206
+ threshold=slow_sql_threshold,
207
+ aggregate_window=self._defaults.get("aggregate_window", 10),
208
+ aggregate_threshold=self._defaults.get("slow_sql_aggregate", 5),
209
+ suppress_seconds=self._defaults.get("suppress_seconds", 300),
210
+ ),
211
+ # 异常(立即告警)
212
+ AlertRule(
213
+ name="default_exception",
214
+ event_types=[AlertEventType.EXCEPTION],
215
+ aggregate_threshold=self._defaults.get("exception_aggregate", 1),
216
+ suppress_seconds=self._defaults.get("suppress_seconds", 300),
217
+ ),
218
+ # 任务失败(立即告警)
219
+ AlertRule(
220
+ name="default_task_failure",
221
+ event_types=[AlertEventType.TASK_FAILURE],
222
+ aggregate_threshold=1,
223
+ suppress_seconds=60,
224
+ ),
225
+ # 任务超时
226
+ AlertRule(
227
+ name="default_task_timeout",
228
+ event_types=[AlertEventType.TASK_TIMEOUT],
229
+ aggregate_threshold=1,
230
+ suppress_seconds=300,
231
+ ),
232
+ # 自定义告警(立即告警)
233
+ AlertRule(
234
+ name="default_custom",
235
+ event_types=[AlertEventType.CUSTOM],
236
+ aggregate_threshold=1,
237
+ suppress_seconds=self._defaults.get("suppress_seconds", 10),
238
+ ),
239
+ ]
240
+
241
+ self._rules.extend(default_rules)
242
+
243
+ def add_rule(self, rule: AlertRule) -> None:
244
+ """添加告警规则。
245
+
246
+ Args:
247
+ rule: 告警规则
248
+ """
249
+ self._rules.append(rule)
250
+ self._aggregators[rule.name] = AlertAggregator(
251
+ window_seconds=rule.aggregate_window,
252
+ threshold=rule.aggregate_threshold,
253
+ suppress_seconds=rule.suppress_seconds,
254
+ )
255
+
256
+ def register_notifier(self, name: str, notifier: "AlertNotifier") -> None:
257
+ """注册通知器实例。
258
+
259
+ Args:
260
+ name: 通知器名称
261
+ notifier: 通知器实例
262
+ """
263
+ self._notifiers[name] = notifier
264
+
265
+ async def emit(self, event: AlertEvent) -> None:
266
+ """发送告警事件。
267
+
268
+ Args:
269
+ event: 告警事件
270
+ """
271
+ if not self._enabled:
272
+ return
273
+
274
+ if not self._initialized:
275
+ logger.warning("告警系统未初始化,跳过事件")
276
+ return
277
+
278
+ # 设置服务名
279
+ if not event.service_name:
280
+ event.service_name = self._service_name
281
+
282
+ # 匹配规则
283
+ for rule in self._rules:
284
+ if rule.matches(event):
285
+ aggregator = self._aggregators.get(rule.name)
286
+ if aggregator and aggregator.should_alert(event):
287
+ await self._send_notification(rule, event, aggregator)
288
+ break # 只匹配第一个规则
289
+
290
+ async def _send_notification(
291
+ self,
292
+ rule: AlertRule,
293
+ event: AlertEvent,
294
+ aggregator: AlertAggregator,
295
+ ) -> None:
296
+ """发送通知。"""
297
+ # 获取聚合信息
298
+ agg_info = aggregator.get_aggregation_info(event)
299
+
300
+ # 构建通知
301
+ notification = AlertNotification(
302
+ title=self._build_title(event, agg_info["count"]),
303
+ message=event.message,
304
+ severity=event.severity,
305
+ event_type=event.event_type,
306
+ source=event.source,
307
+ service_name=event.service_name,
308
+ count=agg_info["count"],
309
+ trace_ids=agg_info["trace_ids"],
310
+ metadata=event.metadata,
311
+ )
312
+
313
+ # 发送到所有配置的通知器
314
+ for notifier_name in rule.notifiers:
315
+ notifier = self._notifiers.get(notifier_name)
316
+ if notifier:
317
+ # 异步发送,不阻塞
318
+ asyncio.create_task(self._safe_send(notifier, notification))
319
+ else:
320
+ logger.warning(f"通知器不存在: {notifier_name}")
321
+
322
+ async def _safe_send(self, notifier: "AlertNotifier", notification: AlertNotification) -> None:
323
+ """安全发送通知(捕获异常)。"""
324
+ try:
325
+ await notifier.send(notification)
326
+ except Exception as e:
327
+ logger.error(f"发送通知失败: {e}")
328
+
329
+ def _build_title(self, event: AlertEvent, count: int) -> str:
330
+ """构建通知标题。"""
331
+ type_names = {
332
+ AlertEventType.SLOW_REQUEST: "慢请求",
333
+ AlertEventType.SLOW_SQL: "慢SQL",
334
+ AlertEventType.EXCEPTION: "异常",
335
+ AlertEventType.TASK_FAILURE: "任务失败",
336
+ AlertEventType.TASK_TIMEOUT: "任务超时",
337
+ AlertEventType.CUSTOM: "告警",
338
+ }
339
+ type_name = type_names.get(event.event_type, "告警")
340
+
341
+ if count > 1:
342
+ return f"[{event.severity.value.upper()}] {type_name} x{count}"
343
+ return f"[{event.severity.value.upper()}] {type_name}"
344
+
345
+ @property
346
+ def is_enabled(self) -> bool:
347
+ """是否启用。"""
348
+ return self._enabled
349
+
350
+ @property
351
+ def is_initialized(self) -> bool:
352
+ """是否已初始化。"""
353
+ return self._initialized
354
+
355
+
356
+ # 便捷函数
357
+ async def emit_alert(
358
+ event_type: AlertEventType,
359
+ message: str,
360
+ *,
361
+ severity: AlertSeverity = AlertSeverity.WARNING,
362
+ trace_id: str | None = None,
363
+ source: str | None = None,
364
+ **metadata: Any,
365
+ ) -> None:
366
+ """发送告警事件的便捷函数。
367
+
368
+ 自动获取 trace_id 和检测来源。
369
+
370
+ Args:
371
+ event_type: 事件类型
372
+ message: 告警消息
373
+ severity: 严重级别
374
+ trace_id: 追踪 ID(可选,自动获取)
375
+ source: 来源(可选,自动检测)
376
+ **metadata: 额外元数据
377
+
378
+ 示例:
379
+ await emit_alert(
380
+ AlertEventType.SLOW_SQL,
381
+ "慢SQL查询",
382
+ duration=2.5,
383
+ sql="SELECT ...",
384
+ )
385
+ """
386
+ from aury.boot.common.logging import get_trace_id
387
+
388
+ if trace_id is None:
389
+ trace_id = get_trace_id() or ""
390
+
391
+ if source is None:
392
+ source = _detect_source()
393
+
394
+ event = AlertEvent(
395
+ event_type=event_type,
396
+ severity=severity,
397
+ message=message,
398
+ trace_id=trace_id,
399
+ source=source,
400
+ metadata=metadata,
401
+ )
402
+
403
+ manager = AlertManager.get_instance()
404
+ await manager.emit(event)
405
+
406
+
407
+ def _detect_source() -> str:
408
+ """检测当前执行来源。"""
409
+ import inspect
410
+
411
+ # 检查调用栈
412
+ for frame_info in inspect.stack():
413
+ module = frame_info.frame.f_globals.get("__name__", "")
414
+
415
+ if "scheduler" in module.lower():
416
+ return "scheduler"
417
+ if "task" in module.lower() or "worker" in module.lower():
418
+ return "task"
419
+ if "middleware" in module.lower() or "api" in module.lower():
420
+ return "api"
421
+
422
+ return "unknown"
423
+
424
+
425
+ __all__ = [
426
+ "AlertManager",
427
+ "emit_alert",
428
+ ]
@@ -0,0 +1,16 @@
1
+ """告警通知器模块。
2
+
3
+ 提供内置的通知器实现:
4
+ - FeishuNotifier: 飞书机器人
5
+ - WebhookNotifier: 通用 Webhook
6
+ """
7
+
8
+ from .base import AlertNotifier
9
+ from .feishu import FeishuNotifier
10
+ from .webhook import WebhookNotifier
11
+
12
+ __all__ = [
13
+ "AlertNotifier",
14
+ "FeishuNotifier",
15
+ "WebhookNotifier",
16
+ ]
@@ -0,0 +1,60 @@
1
+ """通知器基类。
2
+
3
+ 定义通知器接口,所有通知器实现都应继承此类。
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from abc import ABC, abstractmethod
9
+ from typing import TYPE_CHECKING
10
+
11
+ if TYPE_CHECKING:
12
+ from ..events import AlertNotification
13
+
14
+
15
+ class AlertNotifier(ABC):
16
+ """告警通知器基类。
17
+
18
+ 所有通知器都应继承此类并实现 send 方法。
19
+
20
+ 示例:
21
+ class MyNotifier(AlertNotifier):
22
+ def __init__(self, api_key: str):
23
+ self.api_key = api_key
24
+
25
+ @classmethod
26
+ def from_config(cls, config: dict) -> "MyNotifier":
27
+ return cls(api_key=config["api_key"])
28
+
29
+ async def send(self, notification: AlertNotification) -> bool:
30
+ # 发送通知
31
+ ...
32
+ """
33
+
34
+ @classmethod
35
+ @abstractmethod
36
+ def from_config(cls, config: dict) -> "AlertNotifier":
37
+ """从配置字典创建通知器实例。
38
+
39
+ Args:
40
+ config: 配置字典(从环境变量解析)
41
+
42
+ Returns:
43
+ 通知器实例
44
+ """
45
+ ...
46
+
47
+ @abstractmethod
48
+ async def send(self, notification: "AlertNotification") -> bool:
49
+ """发送告警通知。
50
+
51
+ Args:
52
+ notification: 告警通知对象
53
+
54
+ Returns:
55
+ bool: 是否发送成功
56
+ """
57
+ ...
58
+
59
+
60
+ __all__ = ["AlertNotifier"]