aury-boot 0.0.29__py3-none-any.whl → 0.0.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. aury/boot/_version.py +2 -2
  2. aury/boot/application/__init__.py +2 -4
  3. aury/boot/application/app/base.py +126 -2
  4. aury/boot/application/app/components.py +226 -1
  5. aury/boot/application/config/settings.py +201 -3
  6. aury/boot/application/constants/components.py +3 -0
  7. aury/boot/application/middleware/logging.py +45 -6
  8. aury/boot/commands/docs.py +40 -0
  9. aury/boot/commands/init.py +2 -0
  10. aury/boot/commands/templates/project/AGENTS.md.tpl +59 -0
  11. aury/boot/commands/templates/project/alert_rules.example.yaml.tpl +85 -0
  12. aury/boot/commands/templates/project/aury_docs/00-overview.md.tpl +3 -0
  13. aury/boot/commands/templates/project/aury_docs/17-alerting.md.tpl +210 -0
  14. aury/boot/commands/templates/project/env_templates/messaging.tpl +21 -13
  15. aury/boot/commands/templates/project/env_templates/monitoring.tpl +63 -0
  16. aury/boot/common/logging/context.py +17 -1
  17. aury/boot/common/logging/format.py +4 -0
  18. aury/boot/infrastructure/__init__.py +4 -8
  19. aury/boot/infrastructure/channel/__init__.py +9 -8
  20. aury/boot/infrastructure/channel/backends/__init__.py +2 -6
  21. aury/boot/infrastructure/channel/backends/broadcaster.py +141 -0
  22. aury/boot/infrastructure/channel/base.py +11 -4
  23. aury/boot/infrastructure/channel/manager.py +25 -24
  24. aury/boot/infrastructure/database/query_tools/__init__.py +3 -5
  25. aury/boot/infrastructure/events/__init__.py +4 -6
  26. aury/boot/infrastructure/events/backends/__init__.py +2 -4
  27. aury/boot/infrastructure/events/backends/broadcaster.py +189 -0
  28. aury/boot/infrastructure/events/base.py +9 -4
  29. aury/boot/infrastructure/events/manager.py +24 -20
  30. aury/boot/infrastructure/monitoring/__init__.py +210 -6
  31. aury/boot/infrastructure/monitoring/alerting/__init__.py +50 -0
  32. aury/boot/infrastructure/monitoring/alerting/aggregator.py +193 -0
  33. aury/boot/infrastructure/monitoring/alerting/events.py +141 -0
  34. aury/boot/infrastructure/monitoring/alerting/manager.py +430 -0
  35. aury/boot/infrastructure/monitoring/alerting/notifiers/__init__.py +16 -0
  36. aury/boot/infrastructure/monitoring/alerting/notifiers/base.py +60 -0
  37. aury/boot/infrastructure/monitoring/alerting/notifiers/feishu.py +209 -0
  38. aury/boot/infrastructure/monitoring/alerting/notifiers/webhook.py +110 -0
  39. aury/boot/infrastructure/monitoring/alerting/rules.py +179 -0
  40. aury/boot/infrastructure/monitoring/health/__init__.py +231 -0
  41. aury/boot/infrastructure/monitoring/tracing/__init__.py +55 -0
  42. aury/boot/infrastructure/monitoring/tracing/context.py +43 -0
  43. aury/boot/infrastructure/monitoring/tracing/logging.py +73 -0
  44. aury/boot/infrastructure/monitoring/tracing/processor.py +357 -0
  45. aury/boot/infrastructure/monitoring/tracing/provider.py +322 -0
  46. aury/boot/infrastructure/monitoring/tracing/tracing.py +235 -0
  47. {aury_boot-0.0.29.dist-info → aury_boot-0.0.31.dist-info}/METADATA +14 -1
  48. {aury_boot-0.0.29.dist-info → aury_boot-0.0.31.dist-info}/RECORD +50 -33
  49. aury/boot/infrastructure/channel/backends/memory.py +0 -126
  50. aury/boot/infrastructure/channel/backends/redis.py +0 -130
  51. aury/boot/infrastructure/events/backends/memory.py +0 -86
  52. aury/boot/infrastructure/events/backends/redis.py +0 -169
  53. {aury_boot-0.0.29.dist-info → aury_boot-0.0.31.dist-info}/WHEEL +0 -0
  54. {aury_boot-0.0.29.dist-info → aury_boot-0.0.31.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,209 @@
1
+ """飞书通知器。
2
+
3
+ 通过飞书机器人 Webhook 发送告警通知。
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import base64
9
+ import hashlib
10
+ import hmac
11
+ import time
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ import httpx
15
+
16
+ from aury.boot.common.logging import logger
17
+
18
+ from .base import AlertNotifier
19
+
20
+ if TYPE_CHECKING:
21
+ from ..events import AlertNotification
22
+
23
+
24
+ class FeishuNotifier(AlertNotifier):
25
+ """飞书机器人通知器。
26
+
27
+ 通过飞书自定义机器人 Webhook 发送告警。
28
+ 支持签名校验(可选)。
29
+
30
+ 环境变量配置示例:
31
+ ALERT_NOTIFIER_FEISHU_TYPE=feishu
32
+ ALERT_NOTIFIER_FEISHU_WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/xxx
33
+ ALERT_NOTIFIER_FEISHU_SECRET=xxx # 可选,签名密钥
34
+ """
35
+
36
+ def __init__(self, webhook: str, secret: str | None = None) -> None:
37
+ """初始化飞书通知器。
38
+
39
+ Args:
40
+ webhook: 飞书机器人 Webhook URL
41
+ secret: 签名密钥(可选)
42
+ """
43
+ self.webhook = webhook
44
+ self.secret = secret
45
+
46
+ @classmethod
47
+ def from_config(cls, config: dict) -> "FeishuNotifier":
48
+ """从配置创建实例。"""
49
+ webhook = config.get("webhook")
50
+ if not webhook:
51
+ raise ValueError("飞书通知器配置缺少 webhook")
52
+ return cls(webhook=webhook, secret=config.get("secret"))
53
+
54
+ def _generate_sign(self, timestamp: int) -> str:
55
+ """生成签名。
56
+
57
+ 飞书签名算法:
58
+ sign = base64(hmac-sha256(timestamp + "\\n" + secret, secret))
59
+ """
60
+ if not self.secret:
61
+ return ""
62
+
63
+ string_to_sign = f"{timestamp}\n{self.secret}"
64
+ hmac_code = hmac.new(
65
+ string_to_sign.encode("utf-8"),
66
+ digestmod=hashlib.sha256,
67
+ ).digest()
68
+ return base64.b64encode(hmac_code).decode("utf-8")
69
+
70
+ def _build_message(self, notification: "AlertNotification") -> dict[str, Any]:
71
+ """构建飞书消息体。
72
+
73
+ 使用 JSON 2.0 卡片格式,支持完整的 markdown 语法。
74
+ """
75
+ # 颜色映射
76
+ color_map = {
77
+ "info": "blue",
78
+ "warning": "yellow",
79
+ "error": "red",
80
+ "critical": "red",
81
+ }
82
+ color = color_map.get(notification.severity.value, "grey")
83
+
84
+ # 构建详情列表
85
+ details = []
86
+
87
+ # 基本信息
88
+ details.append(f"**服务**: {notification.service_name or '未知'}")
89
+ details.append(f"**来源**: {notification.source}")
90
+ details.append(f"**类型**: {notification.event_type.value}")
91
+
92
+ # 时间信息
93
+ details.append(f"**时间**: {notification.last_timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
94
+
95
+ # 聚合信息
96
+ if notification.count > 1:
97
+ details.append(f"**触发次数**: {notification.count} 次")
98
+
99
+ # Trace ID
100
+ if notification.trace_ids:
101
+ trace_str = ", ".join(notification.trace_ids[:3])
102
+ if len(notification.trace_ids) > 3:
103
+ trace_str += f" ... (共{len(notification.trace_ids)}个)"
104
+ details.append(f"**Trace ID**: {trace_str}")
105
+
106
+ # 元数据(排除 SQL 和堆栈,它们单独处理)
107
+ sql_content: str | None = None
108
+ stacktrace_content: str | None = None
109
+
110
+ if notification.metadata:
111
+ if "duration" in notification.metadata:
112
+ details.append(f"**耗时**: {notification.metadata['duration']:.3f}s")
113
+ if "endpoint" in notification.metadata:
114
+ details.append(f"**接口**: {notification.metadata['endpoint']}")
115
+ if "error_type" in notification.metadata:
116
+ details.append(f"**错误类型**: {notification.metadata['error_type']}")
117
+ if "error_message" in notification.metadata:
118
+ details.append(f"**错误信息**: {notification.metadata['error_message']}")
119
+ if "task_name" in notification.metadata:
120
+ details.append(f"**任务**: {notification.metadata['task_name']}")
121
+ # SQL 和堆栈单独处理
122
+ if "sql" in notification.metadata:
123
+ sql_content = notification.metadata["sql"]
124
+ if "stacktrace" in notification.metadata:
125
+ stacktrace_content = notification.metadata["stacktrace"]
126
+
127
+ # 构建卡片元素
128
+ elements: list[dict[str, Any]] = [
129
+ {
130
+ "tag": "markdown",
131
+ "content": notification.message,
132
+ },
133
+ {
134
+ "tag": "hr",
135
+ },
136
+ {
137
+ "tag": "markdown",
138
+ "content": "\n".join(details),
139
+ },
140
+ ]
141
+
142
+ # 添加 SQL 代码块
143
+ if sql_content:
144
+ elements.append({"tag": "hr"})
145
+ elements.append({
146
+ "tag": "markdown",
147
+ "content": f"**SQL**:\n```sql\n{sql_content}\n```",
148
+ })
149
+
150
+ # 添加堆栈代码块
151
+ if stacktrace_content:
152
+ elements.append({"tag": "hr"})
153
+ elements.append({
154
+ "tag": "markdown",
155
+ "content": f"**堆栈**:\n```python\n{stacktrace_content}\n```",
156
+ })
157
+
158
+ # 构建 JSON 2.0 卡片消息
159
+ card = {
160
+ "msg_type": "interactive",
161
+ "card": {
162
+ "schema": "2.0",
163
+ "config": {
164
+ "wide_screen_mode": True,
165
+ },
166
+ "header": {
167
+ "template": color,
168
+ "title": {
169
+ "tag": "plain_text",
170
+ "content": notification.title,
171
+ },
172
+ },
173
+ "body": {
174
+ "elements": elements,
175
+ },
176
+ },
177
+ }
178
+
179
+ return card
180
+
181
+ async def send(self, notification: "AlertNotification") -> bool:
182
+ """发送飞书通知。"""
183
+ try:
184
+ # 构建消息
185
+ message = self._build_message(notification)
186
+
187
+ # 添加签名(如果配置了)
188
+ if self.secret:
189
+ timestamp = int(time.time())
190
+ message["timestamp"] = str(timestamp)
191
+ message["sign"] = self._generate_sign(timestamp)
192
+
193
+ # 发送请求
194
+ async with httpx.AsyncClient(timeout=10) as client:
195
+ response = await client.post(self.webhook, json=message)
196
+ result = response.json()
197
+
198
+ if result.get("code") == 0 or result.get("StatusCode") == 0:
199
+ logger.debug(f"飞书通知发送成功: {notification.title}")
200
+ return True
201
+ else:
202
+ logger.error(f"飞书通知发送失败: {result}")
203
+ return False
204
+ except Exception as e:
205
+ logger.error(f"飞书通知发送异常: {e}")
206
+ return False
207
+
208
+
209
+ __all__ = ["FeishuNotifier"]
@@ -0,0 +1,110 @@
1
+ """通用 Webhook 通知器。
2
+
3
+ 通过 HTTP POST 发送告警到任意 Webhook 端点。
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ import httpx
11
+
12
+ from aury.boot.common.logging import logger
13
+
14
+ from .base import AlertNotifier
15
+
16
+ if TYPE_CHECKING:
17
+ from ..events import AlertNotification
18
+
19
+
20
+ class WebhookNotifier(AlertNotifier):
21
+ """通用 Webhook 通知器。
22
+
23
+ 将告警以 JSON 格式 POST 到指定 URL。
24
+ 支持自定义请求头。
25
+
26
+ 环境变量配置示例:
27
+ ALERT_NOTIFIER_MYWEBHOOK_TYPE=webhook
28
+ ALERT_NOTIFIER_MYWEBHOOK_URL=https://my-alert-system.com/api/alert
29
+ ALERT_NOTIFIER_MYWEBHOOK_HEADERS={"Authorization": "Bearer xxx"}
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ url: str,
35
+ headers: dict[str, str] | None = None,
36
+ timeout: int = 10,
37
+ ) -> None:
38
+ """初始化 Webhook 通知器。
39
+
40
+ Args:
41
+ url: Webhook URL
42
+ headers: 自定义请求头
43
+ timeout: 请求超时时间(秒)
44
+ """
45
+ self.url = url
46
+ self.headers = headers or {}
47
+ self.timeout = timeout
48
+
49
+ @classmethod
50
+ def from_config(cls, config: dict) -> "WebhookNotifier":
51
+ """从配置创建实例。"""
52
+ url = config.get("url")
53
+ if not url:
54
+ raise ValueError("Webhook 通知器配置缺少 url")
55
+
56
+ # 解析 headers(可能是 JSON 字符串)
57
+ headers = config.get("headers")
58
+ if isinstance(headers, str):
59
+ import json
60
+ try:
61
+ headers = json.loads(headers)
62
+ except json.JSONDecodeError:
63
+ headers = {}
64
+
65
+ timeout = int(config.get("timeout", 10))
66
+
67
+ return cls(url=url, headers=headers, timeout=timeout)
68
+
69
+ def _build_payload(self, notification: "AlertNotification") -> dict[str, Any]:
70
+ """构建请求体。"""
71
+ return {
72
+ "title": notification.title,
73
+ "message": notification.message,
74
+ "severity": notification.severity.value,
75
+ "event_type": notification.event_type.value,
76
+ "source": notification.source,
77
+ "service_name": notification.service_name,
78
+ "count": notification.count,
79
+ "first_timestamp": notification.first_timestamp.isoformat(),
80
+ "last_timestamp": notification.last_timestamp.isoformat(),
81
+ "trace_ids": notification.trace_ids,
82
+ "metadata": notification.metadata,
83
+ }
84
+
85
+ async def send(self, notification: "AlertNotification") -> bool:
86
+ """发送 Webhook 通知。"""
87
+ try:
88
+ payload = self._build_payload(notification)
89
+
90
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
91
+ response = await client.post(
92
+ self.url,
93
+ json=payload,
94
+ headers=self.headers,
95
+ )
96
+
97
+ if response.is_success:
98
+ logger.debug(f"Webhook 通知发送成功: {notification.title}")
99
+ return True
100
+ else:
101
+ logger.error(
102
+ f"Webhook 通知发送失败: {response.status_code} - {response.text}"
103
+ )
104
+ return False
105
+ except Exception as e:
106
+ logger.error(f"Webhook 通知发送异常: {e}")
107
+ return False
108
+
109
+
110
+ __all__ = ["WebhookNotifier"]
@@ -0,0 +1,179 @@
1
+ """告警规则定义。
2
+
3
+ 定义告警规则数据结构和匹配逻辑。
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import fnmatch
9
+ import re
10
+ from dataclasses import dataclass, field
11
+ from typing import TYPE_CHECKING
12
+
13
+ from .events import AlertEventType, AlertSeverity
14
+
15
+ if TYPE_CHECKING:
16
+ from .events import AlertEvent
17
+
18
+
19
+ @dataclass
20
+ class AlertRule:
21
+ """告警规则。
22
+
23
+ 定义何时触发告警、如何聚合、发送到哪些通知器。
24
+
25
+ 示例:
26
+ # 慢请求规则:1分钟内5次触发,5分钟抑制
27
+ rule = AlertRule(
28
+ name="slow_request",
29
+ event_types=[AlertEventType.SLOW_REQUEST],
30
+ threshold=1.0,
31
+ aggregate_window=60,
32
+ aggregate_threshold=5,
33
+ suppress_seconds=300,
34
+ notifiers=["feishu"],
35
+ )
36
+
37
+ # 关键接口规则:更严格的阈值
38
+ rule = AlertRule(
39
+ name="critical_api",
40
+ event_types=[AlertEventType.SLOW_REQUEST],
41
+ path_pattern="/api/v1/payments/*",
42
+ threshold=0.5,
43
+ aggregate_threshold=1,
44
+ notifiers=["feishu", "sms"],
45
+ )
46
+ """
47
+
48
+ name: str
49
+ event_types: list[AlertEventType]
50
+
51
+ # 触发条件
52
+ threshold: float | None = None # 慢阈值(秒),仅对 slow_* 类型有效
53
+ severity_min: AlertSeverity = AlertSeverity.WARNING
54
+
55
+ # 过滤条件
56
+ source_filter: str | None = None # api / task / scheduler
57
+ path_pattern: str | None = None # 路径匹配(支持 * 通配符)
58
+ exclude_paths: list[str] | None = None # 排除路径列表(支持 * 通配符)
59
+
60
+ # 聚合配置
61
+ aggregate_window: int = 10 # 滑动窗口(秒)
62
+ aggregate_threshold: int = 1 # 触发阈值
63
+ suppress_seconds: int = 300 # 抑制时间(秒)
64
+
65
+ # 通知配置
66
+ notifiers: list[str] = field(default_factory=lambda: ["default"])
67
+
68
+ # 编译后的正则(内部使用)
69
+ _path_regex: re.Pattern | None = field(default=None, repr=False)
70
+ _exclude_regexes: list[re.Pattern] = field(default_factory=list, repr=False)
71
+
72
+ def __post_init__(self) -> None:
73
+ """初始化后编译路径正则。"""
74
+ if self.path_pattern:
75
+ # 将通配符转换为正则
76
+ regex_pattern = fnmatch.translate(self.path_pattern)
77
+ self._path_regex = re.compile(regex_pattern)
78
+
79
+ if self.exclude_paths:
80
+ # 编译所有排除路径的正则
81
+ for exclude_pattern in self.exclude_paths:
82
+ regex_pattern = fnmatch.translate(exclude_pattern)
83
+ self._exclude_regexes.append(re.compile(regex_pattern))
84
+
85
+ def matches(self, event: "AlertEvent") -> bool:
86
+ """检查事件是否匹配规则。
87
+
88
+ Args:
89
+ event: 告警事件
90
+
91
+ Returns:
92
+ bool: 是否匹配
93
+ """
94
+ # 检查事件类型
95
+ if event.event_type not in self.event_types:
96
+ return False
97
+
98
+ # 检查严重级别
99
+ severity_order = [AlertSeverity.INFO, AlertSeverity.WARNING, AlertSeverity.ERROR, AlertSeverity.CRITICAL]
100
+ if severity_order.index(event.severity) < severity_order.index(self.severity_min):
101
+ return False
102
+
103
+ # 检查来源
104
+ if self.source_filter and event.source != self.source_filter:
105
+ return False
106
+
107
+ # 检查路径
108
+ if self._path_regex:
109
+ endpoint = event.metadata.get("endpoint", "")
110
+ if not self._path_regex.match(endpoint):
111
+ return False
112
+
113
+ # 检查排除路径
114
+ if self._exclude_regexes:
115
+ endpoint = event.metadata.get("endpoint", "")
116
+ for exclude_regex in self._exclude_regexes:
117
+ if exclude_regex.match(endpoint):
118
+ return False # 匹配到排除规则,不触发告警
119
+
120
+ # 检查阈值(对于 slow_* 类型)
121
+ if self.threshold is not None and event.event_type in (
122
+ AlertEventType.SLOW_REQUEST,
123
+ AlertEventType.SLOW_SQL,
124
+ ):
125
+ duration = event.metadata.get("duration", 0)
126
+ if duration < self.threshold:
127
+ return False
128
+
129
+ return True
130
+
131
+
132
+ def load_rules_from_dict(data: dict) -> tuple[dict, list[AlertRule]]:
133
+ """从字典加载规则配置。
134
+
135
+ Args:
136
+ data: 规则配置字典(通常从 YAML 加载)
137
+
138
+ Returns:
139
+ (defaults, rules) 元组
140
+ """
141
+ defaults = data.get("defaults", {})
142
+ rules = []
143
+
144
+ for rule_data in data.get("rules", []):
145
+ # 解析事件类型
146
+ event_types = []
147
+ for et in rule_data.get("event_types", []):
148
+ if isinstance(et, str):
149
+ event_types.append(AlertEventType(et))
150
+ else:
151
+ event_types.append(et)
152
+
153
+ # 解析严重级别
154
+ severity_min = rule_data.get("severity_min", defaults.get("severity_min", "warning"))
155
+ if isinstance(severity_min, str):
156
+ severity_min = AlertSeverity(severity_min.lower())
157
+
158
+ rule = AlertRule(
159
+ name=rule_data["name"],
160
+ event_types=event_types,
161
+ threshold=rule_data.get("threshold", defaults.get("threshold")),
162
+ severity_min=severity_min,
163
+ source_filter=rule_data.get("source_filter") or rule_data.get("source"),
164
+ path_pattern=rule_data.get("path_pattern"),
165
+ exclude_paths=rule_data.get("exclude_paths"),
166
+ aggregate_window=rule_data.get("aggregate_window", defaults.get("aggregate_window", 10)),
167
+ aggregate_threshold=rule_data.get("aggregate_threshold", defaults.get("aggregate_threshold", 1)),
168
+ suppress_seconds=rule_data.get("suppress_seconds", defaults.get("suppress_seconds", 300)),
169
+ notifiers=rule_data.get("notifiers", ["default"]),
170
+ )
171
+ rules.append(rule)
172
+
173
+ return defaults, rules
174
+
175
+
176
+ __all__ = [
177
+ "AlertRule",
178
+ "load_rules_from_dict",
179
+ ]