aury-boot 0.0.39__py3-none-any.whl → 0.0.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. aury/boot/_version.py +2 -2
  2. aury/boot/application/adapter/http.py +17 -6
  3. aury/boot/application/app/base.py +1 -0
  4. aury/boot/application/app/components.py +93 -3
  5. aury/boot/application/config/settings.py +80 -2
  6. aury/boot/commands/init.py +20 -0
  7. aury/boot/commands/pkg.py +31 -1
  8. aury/boot/commands/templates/project/aury_docs/00-overview.md.tpl +1 -0
  9. aury/boot/commands/templates/project/aury_docs/18-monitoring-profiling.md.tpl +239 -0
  10. aury/boot/commands/templates/project/env_templates/monitoring.tpl +15 -0
  11. aury/boot/common/logging/setup.py +8 -3
  12. aury/boot/infrastructure/cache/redis.py +82 -16
  13. aury/boot/infrastructure/channel/__init__.py +2 -1
  14. aury/boot/infrastructure/channel/backends/__init__.py +2 -1
  15. aury/boot/infrastructure/channel/backends/redis_cluster.py +124 -0
  16. aury/boot/infrastructure/channel/backends/redis_cluster_channel.py +139 -0
  17. aury/boot/infrastructure/channel/base.py +2 -0
  18. aury/boot/infrastructure/channel/manager.py +9 -1
  19. aury/boot/infrastructure/clients/redis/manager.py +90 -19
  20. aury/boot/infrastructure/database/manager.py +6 -4
  21. aury/boot/infrastructure/monitoring/__init__.py +10 -2
  22. aury/boot/infrastructure/monitoring/alerting/notifiers/feishu.py +33 -16
  23. aury/boot/infrastructure/monitoring/alerting/notifiers/webhook.py +14 -13
  24. aury/boot/infrastructure/monitoring/profiling/__init__.py +664 -0
  25. aury/boot/infrastructure/scheduler/__init__.py +2 -0
  26. aury/boot/infrastructure/scheduler/jobstores/__init__.py +10 -0
  27. aury/boot/infrastructure/scheduler/jobstores/redis_cluster.py +255 -0
  28. aury/boot/infrastructure/scheduler/manager.py +15 -3
  29. aury/boot/toolkit/http/__init__.py +180 -85
  30. {aury_boot-0.0.39.dist-info → aury_boot-0.0.41.dist-info}/METADATA +14 -4
  31. {aury_boot-0.0.39.dist-info → aury_boot-0.0.41.dist-info}/RECORD +33 -27
  32. {aury_boot-0.0.39.dist-info → aury_boot-0.0.41.dist-info}/WHEEL +0 -0
  33. {aury_boot-0.0.39.dist-info → aury_boot-0.0.41.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,664 @@
1
+ """Profiling 模块。
2
+
3
+ 提供持续性能分析和问题时刻状态快照功能:
4
+ - Pyroscope 集成:持续采样生成火焰图
5
+ - 事件循环阻塞检测:检测同步代码阻塞协程
6
+
7
+ 使用方式:
8
+ # 通过配置启用
9
+ PROFILING__ENABLED=true
10
+ PROFILING__PYROSCOPE_ENDPOINT=http://pyroscope:4040
11
+
12
+ # 事件循环阻塞检测
13
+ PROFILING__BLOCKING_DETECTOR_ENABLED=true
14
+ PROFILING__BLOCKING_THRESHOLD_MS=100
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ import sys
21
+ import threading
22
+ import time
23
+ import traceback
24
+ from collections import deque
25
+ from dataclasses import dataclass, field
26
+ from datetime import datetime
27
+ from typing import TYPE_CHECKING, Any
28
+
29
+ from aury.boot.common.logging import logger
30
+
31
+ # Pyroscope 可选依赖
32
+ try:
33
+ import pyroscope
34
+ PYROSCOPE_AVAILABLE = True
35
+ except ImportError:
36
+ pyroscope = None # type: ignore[assignment]
37
+ PYROSCOPE_AVAILABLE = False
38
+
39
+ # psutil 可选依赖(用于进程资源监控)
40
+ try:
41
+ import psutil
42
+ PSUTIL_AVAILABLE = True
43
+ except ImportError:
44
+ psutil = None # type: ignore[assignment]
45
+ PSUTIL_AVAILABLE = False
46
+
47
+ if TYPE_CHECKING:
48
+ from collections.abc import Callable
49
+
50
+
51
+ # =============================================================================
52
+ # 配置
53
+ # =============================================================================
54
+
55
+
56
+ @dataclass
57
+ class ProfilingConfig:
58
+ """Profiling 配置。"""
59
+
60
+ # Pyroscope 配置
61
+ enabled: bool = False
62
+ pyroscope_endpoint: str | None = None
63
+ pyroscope_auth_token: str | None = None
64
+ service_name: str = "aury-service"
65
+ environment: str = "development"
66
+
67
+ # 事件循环阻塞检测配置
68
+ blocking_detector_enabled: bool = False
69
+ blocking_check_interval_ms: float = 100
70
+ blocking_threshold_ms: float = 100
71
+ blocking_severe_threshold_ms: float = 500
72
+ blocking_alert_enabled: bool = True
73
+ blocking_alert_cooldown_seconds: float = 60
74
+ blocking_max_history: int = 50
75
+
76
+ # 滑动窗口统计(秒)
77
+ blocking_stats_window_seconds: float = 300 # 5分钟
78
+
79
+ # 标签
80
+ tags: dict[str, str] = field(default_factory=dict)
81
+
82
+
83
+ # =============================================================================
84
+ # Pyroscope 集成
85
+ # =============================================================================
86
+
87
+
88
+ class PyroscopeProfiler:
89
+ """Pyroscope 持续 Profiler。
90
+
91
+ 集成 Grafana Pyroscope 实现持续性能分析和火焰图生成。
92
+ """
93
+
94
+ def __init__(self, config: ProfilingConfig) -> None:
95
+ self._config = config
96
+ self._initialized = False
97
+
98
+ def start(self) -> bool:
99
+ """启动 Pyroscope profiling。"""
100
+ if self._initialized:
101
+ return True
102
+
103
+ if not PYROSCOPE_AVAILABLE:
104
+ logger.warning("Pyroscope 未安装,跳过 profiling 初始化 (pip install pyroscope-io)")
105
+ return False
106
+
107
+ if not self._config.pyroscope_endpoint:
108
+ logger.warning("Pyroscope endpoint 未配置,跳过初始化")
109
+ return False
110
+
111
+ try:
112
+ pyroscope.configure(
113
+ application_name=self._config.service_name,
114
+ server_address=self._config.pyroscope_endpoint,
115
+ auth_token=self._config.pyroscope_auth_token or "",
116
+ tags=self._config.tags,
117
+ )
118
+ self._initialized = True
119
+ logger.info(
120
+ f"Pyroscope profiling 已启动 | "
121
+ f"endpoint={self._config.pyroscope_endpoint} "
122
+ f"service={self._config.service_name}"
123
+ )
124
+ return True
125
+ except Exception as e:
126
+ logger.error(f"Pyroscope 初始化失败: {e}")
127
+ return False
128
+
129
+ def stop(self) -> None:
130
+ """停止 Pyroscope profiling。"""
131
+ if not self._initialized:
132
+ return
133
+
134
+ try:
135
+ if PYROSCOPE_AVAILABLE:
136
+ pyroscope.shutdown()
137
+ self._initialized = False
138
+ logger.info("Pyroscope profiling 已停止")
139
+ except Exception as e:
140
+ logger.warning(f"Pyroscope 关闭失败: {e}")
141
+
142
+ @property
143
+ def is_running(self) -> bool:
144
+ """是否正在运行。"""
145
+ return self._initialized
146
+
147
+
148
+ # =============================================================================
149
+ # 事件循环阻塞检测
150
+ # =============================================================================
151
+
152
+
153
+ @dataclass
154
+ class BlockingEvent:
155
+ """阻塞事件记录。"""
156
+
157
+ timestamp: datetime
158
+ blocked_ms: float
159
+ main_thread_stack: list[dict[str, Any]] # 最佳堆栈(用户代码优先)
160
+ all_sampled_stacks: list[list[dict[str, Any]]] = field(default_factory=list) # 所有采样堆栈
161
+ process_stats: dict[str, Any] | None = None
162
+
163
+
164
+ class EventLoopBlockingDetector:
165
+ """事件循环阻塞检测器。
166
+
167
+ 原理:后台线程定期向事件循环投递任务,如果任务执行延迟超过阈值,
168
+ 说明事件循环被同步代码阻塞。此时自动捕获主线程调用栈和进程状态。
169
+
170
+ 用于排查:
171
+ - 同步 I/O 阻塞协程
172
+ - CPU 密集型代码阻塞事件循环
173
+ - 死锁或长时间锁等待
174
+ """
175
+
176
+ def __init__(self, config: ProfilingConfig) -> None:
177
+ self._config = config
178
+ self._running = False
179
+ self._thread: threading.Thread | None = None
180
+ self._loop: asyncio.AbstractEventLoop | None = None
181
+ self._blocking_events: list[BlockingEvent] = []
182
+ self._lock = threading.Lock()
183
+ # 滑动窗口统计:记录时间戳 (timestamp, is_block)
184
+ self._check_history: deque[tuple[float, bool]] = deque()
185
+ self._last_alert_time: float = 0
186
+
187
+ def start(self, loop: asyncio.AbstractEventLoop | None = None) -> None:
188
+ """启动阻塞检测。"""
189
+ if self._running:
190
+ return
191
+
192
+ try:
193
+ self._loop = loop or asyncio.get_running_loop()
194
+ except RuntimeError:
195
+ logger.warning("无法获取事件循环,阻塞检测器未启动")
196
+ return
197
+
198
+ self._running = True
199
+ self._thread = threading.Thread(
200
+ target=self._monitor_loop,
201
+ daemon=True,
202
+ name="blocking-detector",
203
+ )
204
+ self._thread.start()
205
+ logger.info(
206
+ f"事件循环阻塞检测已启动 | "
207
+ f"阈值={self._config.blocking_threshold_ms}ms "
208
+ f"严重阈值={self._config.blocking_severe_threshold_ms}ms"
209
+ )
210
+
211
+ def stop(self) -> None:
212
+ """停止阻塞检测。"""
213
+ self._running = False
214
+ if self._thread:
215
+ self._thread.join(timeout=1.0)
216
+ self._thread = None
217
+ logger.info("事件循环阻塞检测已停止")
218
+
219
+ def _monitor_loop(self) -> None:
220
+ """后台监控循环。"""
221
+ while self._running and self._loop:
222
+ try:
223
+ start_time = time.perf_counter()
224
+ future = asyncio.run_coroutine_threadsafe(self._ping(), self._loop)
225
+
226
+ # 在等待期间连续采样堆栈
227
+ sampled_stacks: list[list[dict[str, Any]]] = []
228
+ sample_interval = 0.01 # 10ms 采样一次
229
+
230
+ try:
231
+ # 轮询等待,同时采样堆栈
232
+ timeout = self._config.blocking_threshold_ms * 10 / 1000
233
+ deadline = time.perf_counter() + timeout
234
+
235
+ while time.perf_counter() < deadline:
236
+ try:
237
+ future.result(timeout=sample_interval)
238
+ break # 成功返回
239
+ except TimeoutError:
240
+ # 还在等待,采样当前堆栈
241
+ elapsed = (time.perf_counter() - start_time) * 1000
242
+ if elapsed > self._config.blocking_threshold_ms * 0.5: # 超过阈值50%开始采样
243
+ stack = self._capture_main_thread_stack()
244
+ if stack and (not sampled_stacks or stack != sampled_stacks[-1]):
245
+ sampled_stacks.append(stack)
246
+ else:
247
+ # 超时
248
+ elapsed_ms = (time.perf_counter() - start_time) * 1000
249
+ self._record_blocking(elapsed_ms, sampled_stacks)
250
+ self._record_check(is_block=True)
251
+ time.sleep(self._config.blocking_check_interval_ms / 1000)
252
+ continue
253
+
254
+ except Exception:
255
+ pass
256
+
257
+ elapsed_ms = (time.perf_counter() - start_time) * 1000
258
+ is_blocked = elapsed_ms > self._config.blocking_threshold_ms
259
+ if is_blocked:
260
+ self._record_blocking(elapsed_ms, sampled_stacks)
261
+
262
+ # 记录检查历史(滑动窗口)
263
+ self._record_check(is_blocked)
264
+ except Exception:
265
+ pass # 事件循环可能已关闭
266
+
267
+ time.sleep(self._config.blocking_check_interval_ms / 1000)
268
+
269
+ async def _ping(self) -> None:
270
+ """空操作,用于测量事件循环响应时间。"""
271
+ pass
272
+
273
+ def _record_blocking(
274
+ self,
275
+ blocked_ms: float,
276
+ sampled_stacks: list[list[dict[str, Any]]] | None = None,
277
+ ) -> None:
278
+ """记录阻塞事件。"""
279
+
280
+ # 优先使用采样的堆栈(阻塞期间捕获的),否则捕获当前堆栈
281
+ if sampled_stacks:
282
+ # 取用户代码最多的堆栈作为主堆栈
283
+ stack = self._merge_sampled_stacks(sampled_stacks)
284
+ # 去重保留所有不同的堆栈
285
+ unique_stacks = self._dedupe_stacks(sampled_stacks)
286
+ else:
287
+ stack = self._capture_main_thread_stack()
288
+ unique_stacks = [stack] if stack else []
289
+
290
+ # 获取进程状态
291
+ process_stats = self._capture_process_stats()
292
+
293
+ event = BlockingEvent(
294
+ timestamp=datetime.now(),
295
+ blocked_ms=round(blocked_ms, 2),
296
+ main_thread_stack=stack,
297
+ all_sampled_stacks=unique_stacks,
298
+ process_stats=process_stats,
299
+ )
300
+
301
+ with self._lock:
302
+ self._blocking_events.append(event)
303
+ if len(self._blocking_events) > self._config.blocking_max_history:
304
+ self._blocking_events.pop(0)
305
+
306
+ # 输出日志
307
+ self._log_blocking(event)
308
+
309
+ # 发送告警
310
+ if self._config.blocking_alert_enabled and self._loop:
311
+ self._maybe_send_alert(event)
312
+
313
+ def _capture_main_thread_stack(self) -> list[dict[str, Any]]:
314
+ """捕获主线程调用栈。"""
315
+ main_thread_id = threading.main_thread().ident
316
+ if not main_thread_id or main_thread_id not in sys._current_frames():
317
+ return []
318
+
319
+ frame = sys._current_frames()[main_thread_id]
320
+ stack = []
321
+
322
+ for filename, lineno, name, line in traceback.extract_stack(frame):
323
+ # 只跳过检测器自身和 frozen 内部代码
324
+ if "<frozen" in filename or "monitoring/profiling" in filename:
325
+ continue
326
+
327
+ stack.append({
328
+ "file": filename,
329
+ "line": lineno,
330
+ "function": name,
331
+ "code": line,
332
+ })
333
+
334
+ return stack[-20:] # 保留最近 20 帧
335
+
336
+ def _is_user_code(self, filename: str) -> bool:
337
+ """判断是否为用户代码(非标准库/非三方库)。"""
338
+ if not filename:
339
+ return False
340
+ is_stdlib = any(p in filename for p in (
341
+ "/lib/python", "/Lib/Python", "/opt/homebrew/Cellar/python",
342
+ "/.pyenv/", "/Python.framework/"
343
+ ))
344
+ is_site_packages = "site-packages" in filename or "dist-packages" in filename
345
+ return not is_stdlib and not is_site_packages
346
+
347
+ def _score_stack(self, stack: list[dict[str, Any]]) -> int:
348
+ """评分堆栈:用户代码帧越多分数越高。"""
349
+ return sum(1 for f in stack if self._is_user_code(f.get("file", "")))
350
+
351
+ def _stack_signature(self, stack: list[dict[str, Any]]) -> str:
352
+ """生成堆栈签名用于去重。"""
353
+ return "|".join(f"{f.get('file', '')}:{f.get('line', '')}" for f in stack[-5:])
354
+
355
+ def _dedupe_stacks(
356
+ self, stacks: list[list[dict[str, Any]]]
357
+ ) -> list[list[dict[str, Any]]]:
358
+ """去重堆栈,保留唯一的堆栈。"""
359
+ seen: set[str] = set()
360
+ unique: list[list[dict[str, Any]]] = []
361
+ for stack in stacks:
362
+ sig = self._stack_signature(stack)
363
+ if sig not in seen:
364
+ seen.add(sig)
365
+ unique.append(stack)
366
+ return unique
367
+
368
+ def _merge_sampled_stacks(
369
+ self, sampled_stacks: list[list[dict[str, Any]]]
370
+ ) -> list[dict[str, Any]]:
371
+ """合并多次采样的堆栈,返回用户代码最多的。"""
372
+ if not sampled_stacks:
373
+ return []
374
+ return max(sampled_stacks, key=self._score_stack)
375
+
376
+ def _capture_process_stats(self) -> dict[str, Any] | None:
377
+ """捕获当前进程状态。"""
378
+ if not PSUTIL_AVAILABLE:
379
+ return None
380
+
381
+ try:
382
+ proc = psutil.Process()
383
+ with proc.oneshot():
384
+ return {
385
+ "cpu_percent": proc.cpu_percent(),
386
+ "memory_rss_mb": round(proc.memory_info().rss / 1024**2, 2),
387
+ "num_threads": proc.num_threads(),
388
+ "num_fds": proc.num_fds() if hasattr(proc, "num_fds") else None,
389
+ }
390
+ except Exception:
391
+ return None
392
+
393
+ def _format_stack(self, stack: list[dict[str, Any]], limit: int = 5, highlight_user: bool = True) -> str:
394
+ """格式化调用栈为字符串。"""
395
+ lines = []
396
+ for frame in stack[-limit:]:
397
+ if frame.get("code"):
398
+ filename = frame['file']
399
+ is_user = self._is_user_code(filename)
400
+ # 用户代码加前缀标记
401
+ prefix = "→ " if (highlight_user and is_user) else " "
402
+ lines.append(f"{prefix}{filename}:{frame['line']} in {frame['function']}")
403
+ lines.append(f" > {frame['code']}")
404
+ return "\n".join(lines)
405
+
406
+ def _record_check(self, is_block: bool) -> None:
407
+ """记录一次检查到滑动窗口。"""
408
+ now = time.time()
409
+ with self._lock:
410
+ self._check_history.append((now, is_block))
411
+ # 清理过期数据
412
+ cutoff = now - self._config.blocking_stats_window_seconds
413
+ while self._check_history and self._check_history[0][0] < cutoff:
414
+ self._check_history.popleft()
415
+
416
+ def _get_window_stats(self) -> tuple[int, int]:
417
+ """获取时间窗口内的统计。
418
+
419
+ Returns:
420
+ (total_checks, total_blocks)
421
+ """
422
+ now = time.time()
423
+ cutoff = now - self._config.blocking_stats_window_seconds
424
+ total_checks = 0
425
+ total_blocks = 0
426
+
427
+ with self._lock:
428
+ for ts, is_block in self._check_history:
429
+ if ts >= cutoff:
430
+ total_checks += 1
431
+ if is_block:
432
+ total_blocks += 1
433
+
434
+ return total_checks, total_blocks
435
+
436
+ def _log_blocking(self, event: BlockingEvent) -> None:
437
+ """输出阻塞日志。"""
438
+ is_severe = event.blocked_ms >= self._config.blocking_severe_threshold_ms
439
+ log_fn = logger.error if is_severe else logger.warning
440
+
441
+ # 获取时间窗口统计
442
+ total_checks, total_blocks = self._get_window_stats()
443
+ window_minutes = int(self._config.blocking_stats_window_seconds / 60)
444
+
445
+ # 格式化进程状态
446
+ stats_str = ""
447
+ if event.process_stats:
448
+ s = event.process_stats
449
+ stats_str = f" | CPU={s.get('cpu_percent', 'N/A')}% RSS={s.get('memory_rss_mb', 'N/A')}MB threads={s.get('num_threads', 'N/A')}"
450
+
451
+ # 检查是否有用户代码
452
+ has_user_code = self._score_stack(event.main_thread_stack) > 0
453
+
454
+ # 构建堆栈信息
455
+ stack_lines = []
456
+
457
+ if has_user_code:
458
+ # 有用户代码,显示主堆栈
459
+ stack_lines.append("调用栈 (→ 标记用户代码):")
460
+ stack_lines.append(self._format_stack(event.main_thread_stack, limit=8))
461
+ else:
462
+ # 没有用户代码,可能是框架内部阻塞
463
+ stack_lines.append("调用栈 (无用户代码,可能是三方库/框架内部阻塞):")
464
+ stack_lines.append(self._format_stack(event.main_thread_stack, limit=5, highlight_user=False))
465
+
466
+ # 显示所有不同的采样堆栈
467
+ if len(event.all_sampled_stacks) > 1:
468
+ stack_lines.append(f"\n共采样到 {len(event.all_sampled_stacks)} 个不同堆栈:")
469
+ for i, stack in enumerate(event.all_sampled_stacks[:3], 1): # 最多显示3个
470
+ if stack != event.main_thread_stack:
471
+ stack_lines.append(f"--- 采样 #{i} ---")
472
+ stack_lines.append(self._format_stack(stack, limit=3, highlight_user=False))
473
+
474
+ log_fn(
475
+ f"事件循环阻塞{'(严重)' if is_severe else ''}: {event.blocked_ms:.0f}ms "
476
+ f"(阈值={self._config.blocking_threshold_ms}ms, "
477
+ f"近{window_minutes}分钟={total_blocks}次, "
478
+ f"阻塞率={total_blocks / max(total_checks, 1) * 100:.2f}%){stats_str}\n"
479
+ + "\n".join(stack_lines)
480
+ )
481
+
482
+ def _maybe_send_alert(self, event: BlockingEvent) -> None:
483
+ """发送告警(带冷却)。"""
484
+ now = time.time()
485
+ if now - self._last_alert_time < self._config.blocking_alert_cooldown_seconds:
486
+ return
487
+
488
+ self._last_alert_time = now
489
+ asyncio.run_coroutine_threadsafe(self._send_alert(event), self._loop)
490
+
491
+ async def _send_alert(self, event: BlockingEvent) -> None:
492
+ """发送告警。"""
493
+ try:
494
+ from aury.boot.infrastructure.monitoring.alerting import (
495
+ AlertEventType,
496
+ AlertSeverity,
497
+ emit_alert,
498
+ )
499
+
500
+ is_severe = event.blocked_ms >= self._config.blocking_severe_threshold_ms
501
+ severity = AlertSeverity.CRITICAL if is_severe else AlertSeverity.WARNING
502
+
503
+ # 获取时间窗口统计
504
+ total_checks, total_blocks = self._get_window_stats()
505
+ window_minutes = int(self._config.blocking_stats_window_seconds / 60)
506
+
507
+ await emit_alert(
508
+ AlertEventType.CUSTOM,
509
+ f"事件循环阻塞{'(严重)' if is_severe else ''}: {event.blocked_ms:.0f}ms",
510
+ severity=severity,
511
+ source="blocking_detector",
512
+ blocked_ms=event.blocked_ms,
513
+ threshold_ms=self._config.blocking_threshold_ms,
514
+ window_minutes=window_minutes,
515
+ total_blocks=total_blocks,
516
+ block_rate=f"{total_blocks / max(total_checks, 1) * 100:.2f}%",
517
+ stacktrace=self._format_stack(event.main_thread_stack),
518
+ process_stats=event.process_stats,
519
+ )
520
+ except Exception as e:
521
+ logger.debug(f"发送阻塞告警失败: {e}")
522
+
523
+ def get_status(self) -> dict[str, Any]:
524
+ """获取检测状态和历史。"""
525
+ total_checks, total_blocks = self._get_window_stats()
526
+ window_minutes = int(self._config.blocking_stats_window_seconds / 60)
527
+
528
+ with self._lock:
529
+ events = [
530
+ {
531
+ "timestamp": e.timestamp.isoformat(),
532
+ "blocked_ms": e.blocked_ms,
533
+ "stack": e.main_thread_stack,
534
+ "process_stats": e.process_stats,
535
+ }
536
+ for e in self._blocking_events
537
+ ]
538
+
539
+ return {
540
+ "running": self._running,
541
+ "config": {
542
+ "check_interval_ms": self._config.blocking_check_interval_ms,
543
+ "threshold_ms": self._config.blocking_threshold_ms,
544
+ "severe_threshold_ms": self._config.blocking_severe_threshold_ms,
545
+ "alert_enabled": self._config.blocking_alert_enabled,
546
+ "stats_window_seconds": self._config.blocking_stats_window_seconds,
547
+ },
548
+ "stats": {
549
+ "window_minutes": window_minutes,
550
+ "total_checks": total_checks,
551
+ "total_blocks": total_blocks,
552
+ "block_rate_percent": round(
553
+ total_blocks / max(total_checks, 1) * 100, 2
554
+ ),
555
+ },
556
+ "recent_events": events,
557
+ }
558
+
559
+ def clear_history(self) -> None:
560
+ """清空阻塞历史。"""
561
+ with self._lock:
562
+ self._blocking_events.clear()
563
+ self._check_history.clear()
564
+
565
+ @property
566
+ def is_running(self) -> bool:
567
+ """是否正在运行。"""
568
+ return self._running
569
+
570
+
571
+ # =============================================================================
572
+ # 统一管理器
573
+ # =============================================================================
574
+
575
+
576
+ class ProfilingManager:
577
+ """Profiling 统一管理器。
578
+
579
+ 管理 Pyroscope 和阻塞检测器的生命周期。
580
+ """
581
+
582
+ _instance: "ProfilingManager | None" = None
583
+
584
+ def __init__(self) -> None:
585
+ self._config: ProfilingConfig | None = None
586
+ self._pyroscope: PyroscopeProfiler | None = None
587
+ self._blocking_detector: EventLoopBlockingDetector | None = None
588
+
589
+ @classmethod
590
+ def get_instance(cls) -> "ProfilingManager":
591
+ """获取单例实例。"""
592
+ if cls._instance is None:
593
+ cls._instance = cls()
594
+ return cls._instance
595
+
596
+ def configure(self, config: ProfilingConfig) -> None:
597
+ """配置管理器。"""
598
+ self._config = config
599
+ self._pyroscope = PyroscopeProfiler(config)
600
+ self._blocking_detector = EventLoopBlockingDetector(config)
601
+
602
+ async def start(self) -> None:
603
+ """启动所有 profiling 组件。"""
604
+ if not self._config:
605
+ logger.warning("ProfilingManager 未配置")
606
+ return
607
+
608
+ # 启动 Pyroscope
609
+ if self._config.enabled and self._pyroscope:
610
+ self._pyroscope.start()
611
+
612
+ # 启动阻塞检测器
613
+ if self._config.blocking_detector_enabled and self._blocking_detector:
614
+ self._blocking_detector.start()
615
+
616
+ async def stop(self) -> None:
617
+ """停止所有 profiling 组件。"""
618
+ if self._pyroscope:
619
+ self._pyroscope.stop()
620
+
621
+ if self._blocking_detector:
622
+ self._blocking_detector.stop()
623
+
624
+ @property
625
+ def pyroscope(self) -> PyroscopeProfiler | None:
626
+ """获取 Pyroscope profiler。"""
627
+ return self._pyroscope
628
+
629
+ @property
630
+ def blocking_detector(self) -> EventLoopBlockingDetector | None:
631
+ """获取阻塞检测器。"""
632
+ return self._blocking_detector
633
+
634
+ def get_status(self) -> dict[str, Any]:
635
+ """获取所有组件状态。"""
636
+ return {
637
+ "pyroscope": {
638
+ "available": PYROSCOPE_AVAILABLE,
639
+ "running": self._pyroscope.is_running if self._pyroscope else False,
640
+ },
641
+ "blocking_detector": (
642
+ self._blocking_detector.get_status()
643
+ if self._blocking_detector
644
+ else {"running": False}
645
+ ),
646
+ }
647
+
648
+
649
+ # 便捷访问
650
+ def get_profiling_manager() -> ProfilingManager:
651
+ """获取 ProfilingManager 实例。"""
652
+ return ProfilingManager.get_instance()
653
+
654
+
655
+ __all__ = [
656
+ "BlockingEvent",
657
+ "EventLoopBlockingDetector",
658
+ "ProfilingConfig",
659
+ "ProfilingManager",
660
+ "PyroscopeProfiler",
661
+ "get_profiling_manager",
662
+ "PSUTIL_AVAILABLE",
663
+ "PYROSCOPE_AVAILABLE",
664
+ ]
@@ -8,9 +8,11 @@ from .exceptions import (
8
8
  SchedulerError,
9
9
  SchedulerJobError,
10
10
  )
11
+ from .jobstores import RedisClusterJobStore
11
12
  from .manager import SchedulerManager
12
13
 
13
14
  __all__ = [
15
+ "RedisClusterJobStore",
14
16
  "SchedulerBackendError",
15
17
  "SchedulerError",
16
18
  "SchedulerJobError",
@@ -0,0 +1,10 @@
1
+ """APScheduler JobStore 扩展。
2
+
3
+ 提供 Redis Cluster 支持的 JobStore。
4
+ """
5
+
6
+ from .redis_cluster import RedisClusterJobStore
7
+
8
+ __all__ = [
9
+ "RedisClusterJobStore",
10
+ ]