aury-boot 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. aury/boot/_version.py +2 -2
  2. aury/boot/application/app/base.py +126 -2
  3. aury/boot/application/app/components.py +224 -1
  4. aury/boot/application/config/settings.py +195 -3
  5. aury/boot/application/constants/components.py +3 -0
  6. aury/boot/application/middleware/logging.py +45 -6
  7. aury/boot/commands/docs.py +40 -0
  8. aury/boot/commands/init.py +2 -0
  9. aury/boot/commands/templates/project/AGENTS.md.tpl +16 -1
  10. aury/boot/commands/templates/project/alert_rules.example.yaml.tpl +85 -0
  11. aury/boot/commands/templates/project/aury_docs/00-overview.md.tpl +3 -0
  12. aury/boot/commands/templates/project/aury_docs/03-service.md.tpl +60 -0
  13. aury/boot/commands/templates/project/aury_docs/17-alerting.md.tpl +210 -0
  14. aury/boot/commands/templates/project/env_templates/monitoring.tpl +61 -0
  15. aury/boot/common/logging/context.py +17 -1
  16. aury/boot/common/logging/format.py +4 -0
  17. aury/boot/domain/transaction/__init__.py +57 -0
  18. aury/boot/infrastructure/channel/base.py +6 -2
  19. aury/boot/infrastructure/database/query_tools/__init__.py +3 -5
  20. aury/boot/infrastructure/monitoring/__init__.py +210 -6
  21. aury/boot/infrastructure/monitoring/alerting/__init__.py +50 -0
  22. aury/boot/infrastructure/monitoring/alerting/aggregator.py +193 -0
  23. aury/boot/infrastructure/monitoring/alerting/events.py +141 -0
  24. aury/boot/infrastructure/monitoring/alerting/manager.py +428 -0
  25. aury/boot/infrastructure/monitoring/alerting/notifiers/__init__.py +16 -0
  26. aury/boot/infrastructure/monitoring/alerting/notifiers/base.py +60 -0
  27. aury/boot/infrastructure/monitoring/alerting/notifiers/feishu.py +209 -0
  28. aury/boot/infrastructure/monitoring/alerting/notifiers/webhook.py +110 -0
  29. aury/boot/infrastructure/monitoring/alerting/rules.py +163 -0
  30. aury/boot/infrastructure/monitoring/health/__init__.py +231 -0
  31. aury/boot/infrastructure/monitoring/tracing/__init__.py +55 -0
  32. aury/boot/infrastructure/monitoring/tracing/context.py +43 -0
  33. aury/boot/infrastructure/monitoring/tracing/logging.py +73 -0
  34. aury/boot/infrastructure/monitoring/tracing/processor.py +327 -0
  35. aury/boot/infrastructure/monitoring/tracing/provider.py +320 -0
  36. aury/boot/infrastructure/monitoring/tracing/tracing.py +235 -0
  37. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/METADATA +14 -1
  38. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/RECORD +40 -21
  39. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/WHEEL +0 -0
  40. {aury_boot-0.0.28.dist-info → aury_boot-0.0.30.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,210 @@
1
+ # 告警系统
2
+
3
+ 本文档介绍 {project_name} 项目中的告警系统配置和使用方法。
4
+
5
+ ## 快速开始(简版配置)
6
+
7
+ 所有告警发送到同一个飞书群:
8
+
9
+ ```bash
10
+ # .env
11
+ ALERT__ENABLED=true
12
+ ALERT__NOTIFIERS__DEFAULT__TYPE=feishu
13
+ ALERT__NOTIFIERS__DEFAULT__WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/xxx
14
+ ```
15
+
16
+ 框架会自动创建默认规则,检测慢请求、慢 SQL、异常并发送告警。
17
+
18
+ ---
19
+
20
+ ## 完整版配置(分群告警)
21
+
22
+ 不同类型的告警发送到不同的飞书群,避免一个群接收所有消息。
23
+
24
+ ### 1. 环境变量
25
+
26
+ ```bash
27
+ # .env
28
+
29
+ # ============ 告警系统 ============
30
+ ALERT__ENABLED=true
31
+ ALERT__RULES_FILE=alert_rules.yaml
32
+
33
+ # 性能群(慢请求、慢SQL)
34
+ ALERT__NOTIFIERS__PERF_GROUP__TYPE=feishu
35
+ ALERT__NOTIFIERS__PERF_GROUP__WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/perf-xxx
36
+ ALERT__NOTIFIERS__PERF_GROUP__SECRET=your-secret # 可选
37
+
38
+ # 错误群(异常)
39
+ ALERT__NOTIFIERS__ERROR_GROUP__TYPE=feishu
40
+ ALERT__NOTIFIERS__ERROR_GROUP__WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/error-xxx
41
+
42
+ # 运维群(任务失败、超时)
43
+ ALERT__NOTIFIERS__OPS_GROUP__TYPE=feishu
44
+ ALERT__NOTIFIERS__OPS_GROUP__WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/ops-xxx
45
+
46
+ # 慢操作阈值
47
+ ALERT__SLOW_REQUEST_THRESHOLD=1.0
48
+ ALERT__SLOW_SQL_THRESHOLD=0.5
49
+ ```
50
+
51
+ ### 2. 规则文件
52
+
53
+ 生成规则模板:`aury docs alert-rules`
54
+
55
+ ```yaml
56
+ # alert_rules.yaml
57
+ defaults:
58
+ slow_request_threshold: 1.0
59
+ slow_sql_threshold: 0.5
60
+ aggregate_window: 10
61
+ suppress_seconds: 300
62
+
63
+ rules:
64
+ # 慢请求 → 性能群
65
+ - name: slow_request
66
+ event_types: [slow_request]
67
+ aggregate_threshold: 5
68
+ notifiers: [perf_group]
69
+
70
+ # 慢 SQL → 性能群
71
+ - name: slow_sql
72
+ event_types: [slow_sql]
73
+ aggregate_threshold: 10
74
+ notifiers: [perf_group]
75
+
76
+ # 异常 → 错误群(立即告警)
77
+ - name: exception
78
+ event_types: [exception]
79
+ aggregate_threshold: 1
80
+ suppress_seconds: 60
81
+ notifiers: [error_group]
82
+
83
+ # 任务失败/超时 → 运维群
84
+ - name: task_issues
85
+ event_types: [task_failure, task_timeout]
86
+ aggregate_threshold: 1
87
+ notifiers: [ops_group]
88
+ ```
89
+
90
+ ### 3. 效果
91
+
92
+ | 事件类型 | 目标群 | 触发条件 |
93
+ |---------|--------|----------|
94
+ | 慢请求(>1s) | 性能群 | 60秒内累计5次 |
95
+ | 慢 SQL(>0.5s) | 性能群 | 60秒内累计10次 |
96
+ | 异常 | 错误群 | 立即告警 |
97
+ | 任务失败/超时 | 运维群 | 立即告警 |
98
+
99
+ ---
100
+
101
+ ## 代码中手动发送告警
102
+
103
+ ```python
104
+ from aury.boot.infrastructure.monitoring.alerting import emit_alert, AlertEventType, AlertSeverity
105
+
106
+ # 发送自定义告警
107
+ await emit_alert(
108
+ AlertEventType.CUSTOM,
109
+ "订单支付超时",
110
+ severity=AlertSeverity.WARNING,
111
+ order_id="12345",
112
+ user_id="u001",
113
+ )
114
+
115
+ # 发送慢 SQL 告警(通常由框架自动触发)
116
+ await emit_alert(
117
+ AlertEventType.SLOW_SQL,
118
+ "慢查询告警",
119
+ duration=2.5,
120
+ sql="SELECT * FROM orders WHERE ...",
121
+ )
122
+ ```
123
+
124
+ ---
125
+
126
+ ## 通知器类型
127
+
128
+ ### 飞书(feishu)
129
+
130
+ ```bash
131
+ ALERT__NOTIFIERS__XXX__TYPE=feishu
132
+ ALERT__NOTIFIERS__XXX__WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/xxx
133
+ ALERT__NOTIFIERS__XXX__SECRET=xxx # 可选,签名密钥
134
+ ```
135
+
136
+ ### 通用 Webhook
137
+
138
+ ```bash
139
+ ALERT__NOTIFIERS__XXX__TYPE=webhook
140
+ ALERT__NOTIFIERS__XXX__URL=https://your-system.com/alert
141
+ ALERT__NOTIFIERS__XXX__METHOD=POST
142
+ ALERT__NOTIFIERS__XXX__HEADERS='{{"Authorization": "Bearer xxx"}}'
143
+ ```
144
+
145
+ ### 自定义通知器
146
+
147
+ ```python
148
+ from aury.boot.infrastructure.monitoring.alerting import AlertNotifier, AlertManager
149
+
150
+ class DingTalkNotifier(AlertNotifier):
151
+ @classmethod
152
+ def from_config(cls, config: dict) -> "DingTalkNotifier":
153
+ return cls(webhook=config["webhook"])
154
+
155
+ async def send(self, notification) -> bool:
156
+ # 实现发送逻辑
157
+ ...
158
+
159
+ # 注册
160
+ AlertManager.register_notifier_class("dingtalk", DingTalkNotifier)
161
+ ```
162
+
163
+ 然后在环境变量中使用:
164
+
165
+ ```bash
166
+ ALERT__NOTIFIERS__DING__TYPE=dingtalk
167
+ ALERT__NOTIFIERS__DING__WEBHOOK=https://oapi.dingtalk.com/robot/send?access_token=xxx
168
+ ```
169
+
170
+ ---
171
+
172
+ ## 告警事件类型
173
+
174
+ | 类型 | 说明 | 自动触发 |
175
+ |-----|------|----------|
176
+ | `slow_request` | 慢 HTTP 请求 | 框架自动检测 |
177
+ | `slow_sql` | 慢 SQL 查询 | 框架自动检测 |
178
+ | `exception` | 异常/错误 | 框架自动检测 |
179
+ | `task_failure` | 任务执行失败 | 任务系统触发 |
180
+ | `task_timeout` | 任务执行超时 | 任务系统触发 |
181
+ | `custom` | 自定义告警 | 手动调用 emit_alert |
182
+
183
+ ---
184
+
185
+ ## 告警抑制与聚合
186
+
187
+ - **聚合窗口**:在窗口时间内累计触发次数,达到阈值才发送告警
188
+ - **抑制时间**:同一告警在抑制时间内不会重复发送
189
+ - **示例**:`aggregate_window=60, aggregate_threshold=5` 表示 60 秒内触发 5 次才告警
190
+
191
+ 这样可以避免告警风暴,同时不遗漏重要问题。
192
+
193
+ ---
194
+
195
+ ## 环境变量参考
196
+
197
+ | 变量 | 说明 | 默认值 |
198
+ |------|------|--------|
199
+ | `ALERT__ENABLED` | 是否启用告警 | `false` |
200
+ | `ALERT__RULES_FILE` | 规则文件路径 | - |
201
+ | `ALERT__SLOW_REQUEST_THRESHOLD` | 慢请求阈值(秒) | `1.0` |
202
+ | `ALERT__SLOW_SQL_THRESHOLD` | 慢 SQL 阈值(秒) | `0.5` |
203
+ | `ALERT__ALERT_ON_SLOW_REQUEST` | 是否对慢请求告警 | `true` |
204
+ | `ALERT__ALERT_ON_SLOW_SQL` | 是否对慢 SQL 告警 | `true` |
205
+ | `ALERT__ALERT_ON_ERROR` | 是否对异常告警 | `true` |
206
+ | `ALERT__AGGREGATE_WINDOW` | 聚合窗口(秒) | `10` |
207
+ | `ALERT__SLOW_REQUEST_AGGREGATE` | 慢请求触发阈值(窗口内次数) | `5` |
208
+ | `ALERT__SLOW_SQL_AGGREGATE` | 慢 SQL 触发阈值 | `10` |
209
+ | `ALERT__EXCEPTION_AGGREGATE` | 异常触发阈值 | `1` |
210
+ | `ALERT__SUPPRESS_SECONDS` | 抑制时间(秒) | `300` |
@@ -0,0 +1,61 @@
1
+ # =============================================================================
2
+ # 监控与告警配置
3
+ # =============================================================================
4
+
5
+ # ---------- OpenTelemetry 遥测 ----------
6
+ # 启用后自动 instrument FastAPI、SQLAlchemy、httpx
7
+ TELEMETRY__ENABLED=false
8
+ # TELEMETRY__SAMPLING_RATE=1.0
9
+
10
+ # OTLP 导出(可选)
11
+ # TELEMETRY__TRACES_ENDPOINT=http://jaeger:4317
12
+ # TELEMETRY__LOGS_ENDPOINT=http://loki:3100
13
+ # TELEMETRY__METRICS_ENDPOINT=http://prometheus:9090
14
+
15
+ # ---------- 告警系统 ----------
16
+ ALERT__ENABLED=false
17
+ # 慢操作阈值
18
+ ALERT__SLOW_REQUEST_THRESHOLD=1.0
19
+ ALERT__SLOW_SQL_THRESHOLD=0.5
20
+ # 告警开关
21
+ ALERT__ALERT_ON_SLOW_REQUEST=true
22
+ ALERT__ALERT_ON_SLOW_SQL=true
23
+ ALERT__ALERT_ON_ERROR=true
24
+ # 聚合与抑制
25
+ ALERT__AGGREGATE_WINDOW=10
26
+ ALERT__SLOW_REQUEST_AGGREGATE=5
27
+ ALERT__SLOW_SQL_AGGREGATE=10
28
+ ALERT__EXCEPTION_AGGREGATE=1
29
+ ALERT__SUPPRESS_SECONDS=300
30
+
31
+ # ---------- 告警通知器(简版:单群)----------
32
+ # 所有告警发到同一个飞书群,取消注释并填写 Webhook 即可
33
+ # ALERT__NOTIFIERS__DEFAULT__TYPE=feishu
34
+ # ALERT__NOTIFIERS__DEFAULT__WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/your-webhook-id
35
+ # ALERT__NOTIFIERS__DEFAULT__SECRET=your-secret
36
+
37
+ # ---------- 告警通知器(完整版:分群告警)----------
38
+ # 不同类型告警发到不同群,需配合 alert_rules.yaml 使用
39
+ # 生成规则模板:aury docs alert-rules
40
+ # ALERT__RULES_FILE=alert_rules.yaml
41
+
42
+ # 性能群(慢请求、慢SQL)
43
+ # ALERT__NOTIFIERS__PERF_GROUP__TYPE=feishu
44
+ # ALERT__NOTIFIERS__PERF_GROUP__WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/perf-webhook-id
45
+ # ALERT__NOTIFIERS__PERF_GROUP__SECRET=perf-secret
46
+
47
+ # 错误群(异常)
48
+ # ALERT__NOTIFIERS__ERROR_GROUP__TYPE=feishu
49
+ # ALERT__NOTIFIERS__ERROR_GROUP__WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/error-webhook-id
50
+ # ALERT__NOTIFIERS__ERROR_GROUP__SECRET=error-secret
51
+
52
+ # 运维群(任务失败、超时)
53
+ # ALERT__NOTIFIERS__OPS_GROUP__TYPE=feishu
54
+ # ALERT__NOTIFIERS__OPS_GROUP__WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/ops-webhook-id
55
+ # ALERT__NOTIFIERS__OPS_GROUP__SECRET=ops-secret
56
+
57
+ # 通用 Webhook(自定义系统)
58
+ # ALERT__NOTIFIERS__CUSTOM__TYPE=webhook
59
+ # ALERT__NOTIFIERS__CUSTOM__URL=https://your-system.com/api/alert
60
+ # ALERT__NOTIFIERS__CUSTOM__METHOD=POST
61
+ # ALERT__NOTIFIERS__CUSTOM__HEADERS={{"Authorization": "Bearer your-token"}}
@@ -58,8 +58,24 @@ def set_service_context(context: ServiceContext | str) -> None:
58
58
  def get_trace_id() -> str:
59
59
  """获取当前链路追踪ID。
60
60
 
61
- 如果尚未设置,则生成一个新的随机 ID
61
+ 优先从 OpenTelemetry 获取(如果已启用),否则使用内置 trace_id
62
+ 如果都没有设置,则生成一个新的随机 ID。
62
63
  """
64
+ # 优先从 OTel 获取
65
+ try:
66
+ from opentelemetry import trace
67
+
68
+ span = trace.get_current_span()
69
+ if span and span.is_recording():
70
+ otel_trace_id = span.get_span_context().trace_id
71
+ if otel_trace_id:
72
+ return format(otel_trace_id, "032x")
73
+ except ImportError:
74
+ pass
75
+ except Exception:
76
+ pass
77
+
78
+ # 回退到内置实现
63
79
  trace_id = _trace_id_var.get()
64
80
  if not trace_id:
65
81
  trace_id = str(uuid.uuid4())
@@ -309,7 +309,11 @@ def log_exception(
309
309
 
310
310
  __all__ = [
311
311
  "create_console_sink",
312
+ "format_exception_compact",
312
313
  "format_exception_java_style",
313
314
  "format_message",
314
315
  "log_exception",
315
316
  ]
317
+
318
+ # 别名导出(保持内部使用 _ 前缀)
319
+ format_exception_compact = _format_exception_compact
@@ -390,11 +390,68 @@ def requires_transaction(func: Callable) -> Callable:
390
390
  return wrapper
391
391
 
392
392
 
393
+ def isolated_task[T](func: Callable[..., T]) -> Callable[..., T]:
394
+ """后台任务隔离装饰器。
395
+
396
+ 重置事务上下文,避免从父协程继承 _transaction_depth 导致 auto_commit 失效。
397
+
398
+ 问题背景:
399
+ asyncio.create_task() 会继承父协程的 contextvars。如果父协程在 @transactional 中,
400
+ _transaction_depth > 0,子任务的 auto_commit 和 transactional_context 都会认为
401
+ "在事务中" 而跳过 commit,导致 session 关闭时 rollback。
402
+
403
+ 用法:
404
+ @isolated_task
405
+ async def upload_cover(space_id: int, cover_url: str):
406
+ async with db.session() as session:
407
+ async with transactional_context(session):
408
+ repo = SpaceRepository(session, Space)
409
+ space = await repo.get(space_id)
410
+ await repo.update(space, {"cover": cover_url})
411
+ # 现在会正常 commit
412
+
413
+ # 在 Service 中 spawn
414
+ asyncio.create_task(upload_cover(space.id, url))
415
+ """
416
+ @wraps(func)
417
+ async def wrapper(*args, **kwargs):
418
+ # 重置事务深度,让当前任务成为独立的事务上下文
419
+ token = _transaction_depth.set(0)
420
+ try:
421
+ return await func(*args, **kwargs)
422
+ finally:
423
+ _transaction_depth.reset(token)
424
+
425
+ return wrapper
426
+
427
+
428
+ @asynccontextmanager
429
+ async def isolated_context() -> AsyncGenerator[None]:
430
+ """后台任务隔离上下文管理器。
431
+
432
+ 与 @isolated_task 作用相同,但用于上下文管理器形式。
433
+
434
+ 用法:
435
+ async def background_job():
436
+ async with isolated_context():
437
+ async with db.session() as session:
438
+ async with transactional_context(session):
439
+ ...
440
+ """
441
+ token = _transaction_depth.set(0)
442
+ try:
443
+ yield
444
+ finally:
445
+ _transaction_depth.reset(token)
446
+
447
+
393
448
  __all__ = [
394
449
  "TransactionManager",
395
450
  "TransactionRequiredError",
396
451
  "_transaction_depth", # 内部使用,不对外文档化
397
452
  "ensure_transaction",
453
+ "isolated_context",
454
+ "isolated_task",
398
455
  "on_commit",
399
456
  "requires_transaction",
400
457
  "transactional",
@@ -31,7 +31,10 @@ class ChannelMessage:
31
31
  timestamp: datetime = field(default_factory=datetime.now)
32
32
 
33
33
  def to_sse(self) -> str:
34
- """转换为 SSE 格式。"""
34
+ """转换为 SSE 格式。
35
+
36
+ SSE 规范要求每个消息以双换行符结束。
37
+ """
35
38
  lines = []
36
39
  if self.id:
37
40
  lines.append(f"id: {self.id}")
@@ -42,7 +45,8 @@ class ChannelMessage:
42
45
  for line in data_str.split("\n"):
43
46
  lines.append(f"data: {line}")
44
47
  lines.append("") # 空行结束
45
- return "\n".join(lines)
48
+ # SSE 规范要求消息以双换行符结束
49
+ return "\n".join(lines) + "\n"
46
50
 
47
51
 
48
52
  class IChannel(ABC):
@@ -12,8 +12,8 @@ import time
12
12
 
13
13
  from aury.boot.common.logging import logger
14
14
 
15
- # 查询性能监控配置
16
- QUERY_SLOW_THRESHOLD = 1.0 # 慢查询阈值(秒)
15
+ # 默认慢查询阈值(秒)
16
+ DEFAULT_SLOW_QUERY_THRESHOLD = 1.0
17
17
 
18
18
 
19
19
  def cache_query(
@@ -86,7 +86,7 @@ def cache_query(
86
86
 
87
87
 
88
88
  def monitor_query(
89
- slow_threshold: float = QUERY_SLOW_THRESHOLD,
89
+ slow_threshold: float = DEFAULT_SLOW_QUERY_THRESHOLD,
90
90
  enable_explain: bool = False,
91
91
  ) -> Callable:
92
92
  """查询性能监控装饰器。
@@ -104,7 +104,6 @@ def monitor_query(
104
104
  async def list(self, **filters):
105
105
  return await super().list(**filters)
106
106
  """
107
-
108
107
  def decorator(func: Callable) -> Callable:
109
108
  @wraps(func)
110
109
  async def wrapper(self, *args, **kwargs):
@@ -154,7 +153,6 @@ def monitor_query(
154
153
 
155
154
  return decorator
156
155
 
157
-
158
156
  __all__ = [
159
157
  "cache_query",
160
158
  "monitor_query",