aury-boot 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aury/boot/_version.py +2 -2
- aury/boot/application/app/components.py +12 -1
- aury/boot/application/config/settings.py +7 -2
- aury/boot/infrastructure/cache/redis.py +83 -15
- aury/boot/infrastructure/channel/__init__.py +2 -1
- aury/boot/infrastructure/channel/backends/__init__.py +2 -1
- aury/boot/infrastructure/channel/backends/redis_cluster.py +124 -0
- aury/boot/infrastructure/channel/backends/redis_cluster_channel.py +139 -0
- aury/boot/infrastructure/channel/base.py +2 -0
- aury/boot/infrastructure/channel/manager.py +9 -1
- aury/boot/infrastructure/clients/redis/manager.py +94 -19
- aury/boot/infrastructure/monitoring/alerting/notifiers/feishu.py +2 -1
- aury/boot/infrastructure/monitoring/profiling/__init__.py +135 -44
- aury/boot/infrastructure/scheduler/__init__.py +2 -0
- aury/boot/infrastructure/scheduler/jobstores/__init__.py +10 -0
- aury/boot/infrastructure/scheduler/jobstores/redis_cluster.py +255 -0
- {aury_boot-0.0.40.dist-info → aury_boot-0.0.42.dist-info}/METADATA +5 -1
- {aury_boot-0.0.40.dist-info → aury_boot-0.0.42.dist-info}/RECORD +20 -16
- {aury_boot-0.0.40.dist-info → aury_boot-0.0.42.dist-info}/WHEEL +0 -0
- {aury_boot-0.0.40.dist-info → aury_boot-0.0.42.dist-info}/entry_points.txt +0 -0
|
@@ -1,16 +1,25 @@
|
|
|
1
1
|
"""Redis 客户端管理器 - 命名多实例模式。
|
|
2
2
|
|
|
3
3
|
提供统一的 Redis 连接管理,支持多实例。
|
|
4
|
+
支持普通 Redis 和 Redis Cluster:
|
|
5
|
+
- redis://... - 普通 Redis
|
|
6
|
+
- redis-cluster://... - Redis Cluster
|
|
4
7
|
"""
|
|
5
8
|
|
|
6
9
|
from __future__ import annotations
|
|
7
10
|
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
from urllib.parse import urlparse
|
|
13
|
+
|
|
8
14
|
from redis.asyncio import ConnectionPool, Redis
|
|
9
15
|
|
|
10
16
|
from aury.boot.common.logging import logger
|
|
11
17
|
|
|
12
18
|
from .config import RedisConfig
|
|
13
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from redis.asyncio.cluster import RedisCluster
|
|
22
|
+
|
|
14
23
|
|
|
15
24
|
class RedisClient:
|
|
16
25
|
"""Redis 客户端管理器(命名多实例)。
|
|
@@ -50,8 +59,9 @@ class RedisClient:
|
|
|
50
59
|
self.name = name
|
|
51
60
|
self._config: RedisConfig | None = None
|
|
52
61
|
self._pool: ConnectionPool | None = None
|
|
53
|
-
self._redis: Redis | None = None
|
|
62
|
+
self._redis: Redis | RedisCluster | None = None
|
|
54
63
|
self._initialized: bool = False
|
|
64
|
+
self._is_cluster: bool = False
|
|
55
65
|
|
|
56
66
|
@classmethod
|
|
57
67
|
def get_instance(cls, name: str = "default") -> RedisClient:
|
|
@@ -135,6 +145,10 @@ class RedisClient:
|
|
|
135
145
|
async def initialize(self) -> RedisClient:
|
|
136
146
|
"""初始化 Redis 连接。
|
|
137
147
|
|
|
148
|
+
自动检测 URL scheme:
|
|
149
|
+
- redis://... -> 普通 Redis
|
|
150
|
+
- redis-cluster://... -> Redis Cluster
|
|
151
|
+
|
|
138
152
|
Returns:
|
|
139
153
|
self: 支持链式调用
|
|
140
154
|
|
|
@@ -152,33 +166,84 @@ class RedisClient:
|
|
|
152
166
|
)
|
|
153
167
|
|
|
154
168
|
try:
|
|
155
|
-
|
|
156
|
-
self._pool = ConnectionPool.from_url(
|
|
157
|
-
self._config.url,
|
|
158
|
-
max_connections=self._config.max_connections,
|
|
159
|
-
socket_timeout=self._config.socket_timeout,
|
|
160
|
-
socket_connect_timeout=self._config.socket_connect_timeout,
|
|
161
|
-
retry_on_timeout=self._config.retry_on_timeout,
|
|
162
|
-
health_check_interval=self._config.health_check_interval,
|
|
163
|
-
decode_responses=self._config.decode_responses,
|
|
164
|
-
)
|
|
169
|
+
url = self._config.url
|
|
165
170
|
|
|
166
|
-
#
|
|
167
|
-
|
|
171
|
+
# 自动检测是否为集群模式
|
|
172
|
+
if url.startswith("redis-cluster://"):
|
|
173
|
+
await self._initialize_cluster(url)
|
|
174
|
+
else:
|
|
175
|
+
await self._initialize_standalone(url)
|
|
168
176
|
|
|
169
177
|
# 验证连接
|
|
170
178
|
await self._redis.ping()
|
|
171
179
|
|
|
172
180
|
self._initialized = True
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
logger.info(f"Redis 客户端 [{self.name}]
|
|
181
|
+
masked_url = self._mask_url(url)
|
|
182
|
+
mode = "Cluster" if self._is_cluster else "Standalone"
|
|
183
|
+
logger.info(f"Redis 客户端 [{self.name}] 初始化完成 ({mode}): {masked_url}")
|
|
176
184
|
|
|
177
185
|
return self
|
|
178
186
|
except Exception as e:
|
|
179
187
|
logger.error(f"Redis 客户端 [{self.name}] 初始化失败: {e}")
|
|
180
188
|
raise
|
|
181
189
|
|
|
190
|
+
async def _initialize_standalone(self, url: str) -> None:
|
|
191
|
+
"""初始化普通 Redis 连接。"""
|
|
192
|
+
self._pool = ConnectionPool.from_url(
|
|
193
|
+
url,
|
|
194
|
+
max_connections=self._config.max_connections,
|
|
195
|
+
socket_timeout=self._config.socket_timeout,
|
|
196
|
+
socket_connect_timeout=self._config.socket_connect_timeout,
|
|
197
|
+
retry_on_timeout=self._config.retry_on_timeout,
|
|
198
|
+
health_check_interval=self._config.health_check_interval,
|
|
199
|
+
decode_responses=self._config.decode_responses,
|
|
200
|
+
)
|
|
201
|
+
self._redis = Redis(connection_pool=self._pool)
|
|
202
|
+
self._is_cluster = False
|
|
203
|
+
|
|
204
|
+
async def _initialize_cluster(self, url: str) -> None:
|
|
205
|
+
"""初始化 Redis Cluster 连接(使用 coredis)。
|
|
206
|
+
|
|
207
|
+
支持 URL 格式:
|
|
208
|
+
- redis-cluster://password@host:port (密码在用户名位置)
|
|
209
|
+
- redis-cluster://:password@host:port (标准格式)
|
|
210
|
+
- redis-cluster://username:password@host:port (ACL 模式)
|
|
211
|
+
"""
|
|
212
|
+
try:
|
|
213
|
+
from coredis import RedisCluster
|
|
214
|
+
except ImportError as exc:
|
|
215
|
+
raise ImportError(
|
|
216
|
+
"Redis Cluster 需要安装 coredis: pip install coredis"
|
|
217
|
+
) from exc
|
|
218
|
+
|
|
219
|
+
# 解析 URL
|
|
220
|
+
parsed_url = url.replace("redis-cluster://", "redis://")
|
|
221
|
+
parsed = urlparse(parsed_url)
|
|
222
|
+
|
|
223
|
+
# 提取认证信息
|
|
224
|
+
username = parsed.username
|
|
225
|
+
password = parsed.password
|
|
226
|
+
|
|
227
|
+
# 处理 password@host 格式
|
|
228
|
+
if username and not password:
|
|
229
|
+
password = username
|
|
230
|
+
username = None
|
|
231
|
+
|
|
232
|
+
# 构建连接参数
|
|
233
|
+
cluster_kwargs: dict = {
|
|
234
|
+
"host": parsed.hostname or "localhost",
|
|
235
|
+
"port": parsed.port or 6379,
|
|
236
|
+
"decode_responses": self._config.decode_responses,
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if username:
|
|
240
|
+
cluster_kwargs["username"] = username
|
|
241
|
+
if password:
|
|
242
|
+
cluster_kwargs["password"] = password
|
|
243
|
+
|
|
244
|
+
self._redis = RedisCluster(**cluster_kwargs)
|
|
245
|
+
self._is_cluster = True
|
|
246
|
+
|
|
182
247
|
def _mask_url(self, url: str) -> str:
|
|
183
248
|
"""URL 脱敏(隐藏密码)。"""
|
|
184
249
|
if "@" in url:
|
|
@@ -198,11 +263,16 @@ class RedisClient:
|
|
|
198
263
|
return self._initialized
|
|
199
264
|
|
|
200
265
|
@property
|
|
201
|
-
def
|
|
266
|
+
def is_cluster(self) -> bool:
|
|
267
|
+
"""检查是否为集群模式。"""
|
|
268
|
+
return self._is_cluster
|
|
269
|
+
|
|
270
|
+
@property
|
|
271
|
+
def connection(self) -> Redis | RedisCluster:
|
|
202
272
|
"""获取 Redis 连接。
|
|
203
273
|
|
|
204
274
|
Returns:
|
|
205
|
-
Redis
|
|
275
|
+
Redis 或 RedisCluster 客户端实例
|
|
206
276
|
|
|
207
277
|
Raises:
|
|
208
278
|
RuntimeError: 未初始化时调用
|
|
@@ -245,7 +315,11 @@ class RedisClient:
|
|
|
245
315
|
async def cleanup(self) -> None:
|
|
246
316
|
"""清理资源,关闭连接。"""
|
|
247
317
|
if self._redis:
|
|
248
|
-
|
|
318
|
+
if self._is_cluster:
|
|
319
|
+
# coredis 使用 close() 方法
|
|
320
|
+
await self._redis.close()
|
|
321
|
+
else:
|
|
322
|
+
await self._redis.close()
|
|
249
323
|
logger.info(f"Redis 客户端 [{self.name}] 已关闭")
|
|
250
324
|
|
|
251
325
|
if self._pool:
|
|
@@ -254,6 +328,7 @@ class RedisClient:
|
|
|
254
328
|
self._redis = None
|
|
255
329
|
self._pool = None
|
|
256
330
|
self._initialized = False
|
|
331
|
+
self._is_cluster = False
|
|
257
332
|
|
|
258
333
|
def __repr__(self) -> str:
|
|
259
334
|
"""字符串表示。"""
|
|
@@ -125,7 +125,8 @@ class FeishuNotifier(AlertNotifier):
|
|
|
125
125
|
if "threshold_ms" in notification.metadata:
|
|
126
126
|
details.append(f"**阈值**: {notification.metadata['threshold_ms']:.0f}ms")
|
|
127
127
|
if "total_blocks" in notification.metadata:
|
|
128
|
-
|
|
128
|
+
window_minutes = notification.metadata.get("window_minutes", 5)
|
|
129
|
+
details.append(f"**近{window_minutes}分钟**: {notification.metadata['total_blocks']} 次")
|
|
129
130
|
if "block_rate" in notification.metadata:
|
|
130
131
|
details.append(f"**阻塞率**: {notification.metadata['block_rate']}")
|
|
131
132
|
if "process_stats" in notification.metadata:
|
|
@@ -21,6 +21,7 @@ import sys
|
|
|
21
21
|
import threading
|
|
22
22
|
import time
|
|
23
23
|
import traceback
|
|
24
|
+
from collections import deque
|
|
24
25
|
from dataclasses import dataclass, field
|
|
25
26
|
from datetime import datetime
|
|
26
27
|
from typing import TYPE_CHECKING, Any
|
|
@@ -72,6 +73,9 @@ class ProfilingConfig:
|
|
|
72
73
|
blocking_alert_cooldown_seconds: float = 60
|
|
73
74
|
blocking_max_history: int = 50
|
|
74
75
|
|
|
76
|
+
# 滑动窗口统计(秒)
|
|
77
|
+
blocking_stats_window_seconds: float = 300 # 5分钟
|
|
78
|
+
|
|
75
79
|
# 标签
|
|
76
80
|
tags: dict[str, str] = field(default_factory=dict)
|
|
77
81
|
|
|
@@ -152,7 +156,8 @@ class BlockingEvent:
|
|
|
152
156
|
|
|
153
157
|
timestamp: datetime
|
|
154
158
|
blocked_ms: float
|
|
155
|
-
main_thread_stack: list[dict[str, Any]]
|
|
159
|
+
main_thread_stack: list[dict[str, Any]] # 最佳堆栈(用户代码优先)
|
|
160
|
+
all_sampled_stacks: list[list[dict[str, Any]]] = field(default_factory=list) # 所有采样堆栈
|
|
156
161
|
process_stats: dict[str, Any] | None = None
|
|
157
162
|
|
|
158
163
|
|
|
@@ -175,8 +180,8 @@ class EventLoopBlockingDetector:
|
|
|
175
180
|
self._loop: asyncio.AbstractEventLoop | None = None
|
|
176
181
|
self._blocking_events: list[BlockingEvent] = []
|
|
177
182
|
self._lock = threading.Lock()
|
|
178
|
-
|
|
179
|
-
self.
|
|
183
|
+
# 滑动窗口统计:记录时间戳 (timestamp, is_block)
|
|
184
|
+
self._check_history: deque[tuple[float, bool]] = deque()
|
|
180
185
|
self._last_alert_time: float = 0
|
|
181
186
|
|
|
182
187
|
def start(self, loop: asyncio.AbstractEventLoop | None = None) -> None:
|
|
@@ -242,7 +247,7 @@ class EventLoopBlockingDetector:
|
|
|
242
247
|
# 超时
|
|
243
248
|
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
|
244
249
|
self._record_blocking(elapsed_ms, sampled_stacks)
|
|
245
|
-
self.
|
|
250
|
+
self._record_check(is_block=True)
|
|
246
251
|
time.sleep(self._config.blocking_check_interval_ms / 1000)
|
|
247
252
|
continue
|
|
248
253
|
|
|
@@ -250,10 +255,12 @@ class EventLoopBlockingDetector:
|
|
|
250
255
|
pass
|
|
251
256
|
|
|
252
257
|
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
|
253
|
-
|
|
258
|
+
is_blocked = elapsed_ms > self._config.blocking_threshold_ms
|
|
259
|
+
if is_blocked:
|
|
254
260
|
self._record_blocking(elapsed_ms, sampled_stacks)
|
|
255
261
|
|
|
256
|
-
|
|
262
|
+
# 记录检查历史(滑动窗口)
|
|
263
|
+
self._record_check(is_blocked)
|
|
257
264
|
except Exception:
|
|
258
265
|
pass # 事件循环可能已关闭
|
|
259
266
|
|
|
@@ -269,14 +276,16 @@ class EventLoopBlockingDetector:
|
|
|
269
276
|
sampled_stacks: list[list[dict[str, Any]]] | None = None,
|
|
270
277
|
) -> None:
|
|
271
278
|
"""记录阻塞事件。"""
|
|
272
|
-
self._total_blocks += 1
|
|
273
279
|
|
|
274
280
|
# 优先使用采样的堆栈(阻塞期间捕获的),否则捕获当前堆栈
|
|
275
281
|
if sampled_stacks:
|
|
276
|
-
#
|
|
282
|
+
# 取用户代码最多的堆栈作为主堆栈
|
|
277
283
|
stack = self._merge_sampled_stacks(sampled_stacks)
|
|
284
|
+
# 去重保留所有不同的堆栈
|
|
285
|
+
unique_stacks = self._dedupe_stacks(sampled_stacks)
|
|
278
286
|
else:
|
|
279
287
|
stack = self._capture_main_thread_stack()
|
|
288
|
+
unique_stacks = [stack] if stack else []
|
|
280
289
|
|
|
281
290
|
# 获取进程状态
|
|
282
291
|
process_stats = self._capture_process_stats()
|
|
@@ -285,6 +294,7 @@ class EventLoopBlockingDetector:
|
|
|
285
294
|
timestamp=datetime.now(),
|
|
286
295
|
blocked_ms=round(blocked_ms, 2),
|
|
287
296
|
main_thread_stack=stack,
|
|
297
|
+
all_sampled_stacks=unique_stacks,
|
|
288
298
|
process_stats=process_stats,
|
|
289
299
|
)
|
|
290
300
|
|
|
@@ -323,31 +333,45 @@ class EventLoopBlockingDetector:
|
|
|
323
333
|
|
|
324
334
|
return stack[-20:] # 保留最近 20 帧
|
|
325
335
|
|
|
336
|
+
def _is_user_code(self, filename: str) -> bool:
|
|
337
|
+
"""判断是否为用户代码(非标准库/非三方库)。"""
|
|
338
|
+
if not filename:
|
|
339
|
+
return False
|
|
340
|
+
is_stdlib = any(p in filename for p in (
|
|
341
|
+
"/lib/python", "/Lib/Python", "/opt/homebrew/Cellar/python",
|
|
342
|
+
"/.pyenv/", "/Python.framework/"
|
|
343
|
+
))
|
|
344
|
+
is_site_packages = "site-packages" in filename or "dist-packages" in filename
|
|
345
|
+
return not is_stdlib and not is_site_packages
|
|
346
|
+
|
|
347
|
+
def _score_stack(self, stack: list[dict[str, Any]]) -> int:
|
|
348
|
+
"""评分堆栈:用户代码帧越多分数越高。"""
|
|
349
|
+
return sum(1 for f in stack if self._is_user_code(f.get("file", "")))
|
|
350
|
+
|
|
351
|
+
def _stack_signature(self, stack: list[dict[str, Any]]) -> str:
|
|
352
|
+
"""生成堆栈签名用于去重。"""
|
|
353
|
+
return "|".join(f"{f.get('file', '')}:{f.get('line', '')}" for f in stack[-5:])
|
|
354
|
+
|
|
355
|
+
def _dedupe_stacks(
|
|
356
|
+
self, stacks: list[list[dict[str, Any]]]
|
|
357
|
+
) -> list[list[dict[str, Any]]]:
|
|
358
|
+
"""去重堆栈,保留唯一的堆栈。"""
|
|
359
|
+
seen: set[str] = set()
|
|
360
|
+
unique: list[list[dict[str, Any]]] = []
|
|
361
|
+
for stack in stacks:
|
|
362
|
+
sig = self._stack_signature(stack)
|
|
363
|
+
if sig not in seen:
|
|
364
|
+
seen.add(sig)
|
|
365
|
+
unique.append(stack)
|
|
366
|
+
return unique
|
|
367
|
+
|
|
326
368
|
def _merge_sampled_stacks(
|
|
327
369
|
self, sampled_stacks: list[list[dict[str, Any]]]
|
|
328
370
|
) -> list[dict[str, Any]]:
|
|
329
|
-
"""
|
|
330
|
-
|
|
331
|
-
优先返回包含用户代码(非标准库/site-packages)的堆栈。
|
|
332
|
-
"""
|
|
371
|
+
"""合并多次采样的堆栈,返回用户代码最多的。"""
|
|
333
372
|
if not sampled_stacks:
|
|
334
373
|
return []
|
|
335
|
-
|
|
336
|
-
# 评分标准:用户代码帧数越多越好
|
|
337
|
-
def score_stack(stack: list[dict[str, Any]]) -> int:
|
|
338
|
-
user_frames = 0
|
|
339
|
-
for frame in stack:
|
|
340
|
-
filename = frame.get("file", "")
|
|
341
|
-
is_stdlib = any(p in filename for p in (
|
|
342
|
-
"/lib/python", "/Lib/Python", "/opt/homebrew/", "/.pyenv/"
|
|
343
|
-
))
|
|
344
|
-
is_site_packages = "site-packages" in filename or "dist-packages" in filename
|
|
345
|
-
if not is_stdlib and not is_site_packages:
|
|
346
|
-
user_frames += 1
|
|
347
|
-
return user_frames
|
|
348
|
-
|
|
349
|
-
# 返回用户代码帧最多的堆栈
|
|
350
|
-
return max(sampled_stacks, key=score_stack)
|
|
374
|
+
return max(sampled_stacks, key=self._score_stack)
|
|
351
375
|
|
|
352
376
|
def _capture_process_stats(self) -> dict[str, Any] | None:
|
|
353
377
|
"""捕获当前进程状态。"""
|
|
@@ -366,22 +390,57 @@ class EventLoopBlockingDetector:
|
|
|
366
390
|
except Exception:
|
|
367
391
|
return None
|
|
368
392
|
|
|
369
|
-
def _format_stack(self, stack: list[dict[str, Any]], limit: int = 5) -> str:
|
|
393
|
+
def _format_stack(self, stack: list[dict[str, Any]], limit: int = 5, highlight_user: bool = True) -> str:
|
|
370
394
|
"""格式化调用栈为字符串。"""
|
|
371
395
|
lines = []
|
|
372
396
|
for frame in stack[-limit:]:
|
|
373
397
|
if frame.get("code"):
|
|
374
|
-
|
|
398
|
+
filename = frame['file']
|
|
399
|
+
is_user = self._is_user_code(filename)
|
|
400
|
+
# 用户代码加前缀标记
|
|
401
|
+
prefix = "→ " if (highlight_user and is_user) else " "
|
|
402
|
+
lines.append(f"{prefix}{filename}:{frame['line']} in {frame['function']}")
|
|
375
403
|
lines.append(f" > {frame['code']}")
|
|
376
404
|
return "\n".join(lines)
|
|
377
405
|
|
|
406
|
+
def _record_check(self, is_block: bool) -> None:
|
|
407
|
+
"""记录一次检查到滑动窗口。"""
|
|
408
|
+
now = time.time()
|
|
409
|
+
with self._lock:
|
|
410
|
+
self._check_history.append((now, is_block))
|
|
411
|
+
# 清理过期数据
|
|
412
|
+
cutoff = now - self._config.blocking_stats_window_seconds
|
|
413
|
+
while self._check_history and self._check_history[0][0] < cutoff:
|
|
414
|
+
self._check_history.popleft()
|
|
415
|
+
|
|
416
|
+
def _get_window_stats(self) -> tuple[int, int]:
|
|
417
|
+
"""获取时间窗口内的统计。
|
|
418
|
+
|
|
419
|
+
Returns:
|
|
420
|
+
(total_checks, total_blocks)
|
|
421
|
+
"""
|
|
422
|
+
now = time.time()
|
|
423
|
+
cutoff = now - self._config.blocking_stats_window_seconds
|
|
424
|
+
total_checks = 0
|
|
425
|
+
total_blocks = 0
|
|
426
|
+
|
|
427
|
+
with self._lock:
|
|
428
|
+
for ts, is_block in self._check_history:
|
|
429
|
+
if ts >= cutoff:
|
|
430
|
+
total_checks += 1
|
|
431
|
+
if is_block:
|
|
432
|
+
total_blocks += 1
|
|
433
|
+
|
|
434
|
+
return total_checks, total_blocks
|
|
435
|
+
|
|
378
436
|
def _log_blocking(self, event: BlockingEvent) -> None:
|
|
379
437
|
"""输出阻塞日志。"""
|
|
380
438
|
is_severe = event.blocked_ms >= self._config.blocking_severe_threshold_ms
|
|
381
439
|
log_fn = logger.error if is_severe else logger.warning
|
|
382
440
|
|
|
383
|
-
#
|
|
384
|
-
|
|
441
|
+
# 获取时间窗口统计
|
|
442
|
+
total_checks, total_blocks = self._get_window_stats()
|
|
443
|
+
window_minutes = int(self._config.blocking_stats_window_seconds / 60)
|
|
385
444
|
|
|
386
445
|
# 格式化进程状态
|
|
387
446
|
stats_str = ""
|
|
@@ -389,12 +448,35 @@ class EventLoopBlockingDetector:
|
|
|
389
448
|
s = event.process_stats
|
|
390
449
|
stats_str = f" | CPU={s.get('cpu_percent', 'N/A')}% RSS={s.get('memory_rss_mb', 'N/A')}MB threads={s.get('num_threads', 'N/A')}"
|
|
391
450
|
|
|
451
|
+
# 检查是否有用户代码
|
|
452
|
+
has_user_code = self._score_stack(event.main_thread_stack) > 0
|
|
453
|
+
|
|
454
|
+
# 构建堆栈信息
|
|
455
|
+
stack_lines = []
|
|
456
|
+
|
|
457
|
+
if has_user_code:
|
|
458
|
+
# 有用户代码,显示主堆栈
|
|
459
|
+
stack_lines.append("调用栈 (→ 标记用户代码):")
|
|
460
|
+
stack_lines.append(self._format_stack(event.main_thread_stack, limit=8))
|
|
461
|
+
else:
|
|
462
|
+
# 没有用户代码,可能是框架内部阻塞
|
|
463
|
+
stack_lines.append("调用栈 (无用户代码,可能是三方库/框架内部阻塞):")
|
|
464
|
+
stack_lines.append(self._format_stack(event.main_thread_stack, limit=5, highlight_user=False))
|
|
465
|
+
|
|
466
|
+
# 显示所有不同的采样堆栈
|
|
467
|
+
if len(event.all_sampled_stacks) > 1:
|
|
468
|
+
stack_lines.append(f"\n共采样到 {len(event.all_sampled_stacks)} 个不同堆栈:")
|
|
469
|
+
for i, stack in enumerate(event.all_sampled_stacks[:3], 1): # 最多显示3个
|
|
470
|
+
if stack != event.main_thread_stack:
|
|
471
|
+
stack_lines.append(f"--- 采样 #{i} ---")
|
|
472
|
+
stack_lines.append(self._format_stack(stack, limit=3, highlight_user=False))
|
|
473
|
+
|
|
392
474
|
log_fn(
|
|
393
|
-
f"事件循环阻塞{'
|
|
475
|
+
f"事件循环阻塞{'(严重)' if is_severe else ''}: {event.blocked_ms:.0f}ms "
|
|
394
476
|
f"(阈值={self._config.blocking_threshold_ms}ms, "
|
|
395
|
-
f"
|
|
396
|
-
f"阻塞率={
|
|
397
|
-
|
|
477
|
+
f"近{window_minutes}分钟={total_blocks}次, "
|
|
478
|
+
f"阻塞率={total_blocks / max(total_checks, 1) * 100:.2f}%){stats_str}\n"
|
|
479
|
+
+ "\n".join(stack_lines)
|
|
398
480
|
)
|
|
399
481
|
|
|
400
482
|
def _maybe_send_alert(self, event: BlockingEvent) -> None:
|
|
@@ -418,15 +500,20 @@ class EventLoopBlockingDetector:
|
|
|
418
500
|
is_severe = event.blocked_ms >= self._config.blocking_severe_threshold_ms
|
|
419
501
|
severity = AlertSeverity.CRITICAL if is_severe else AlertSeverity.WARNING
|
|
420
502
|
|
|
503
|
+
# 获取时间窗口统计
|
|
504
|
+
total_checks, total_blocks = self._get_window_stats()
|
|
505
|
+
window_minutes = int(self._config.blocking_stats_window_seconds / 60)
|
|
506
|
+
|
|
421
507
|
await emit_alert(
|
|
422
508
|
AlertEventType.CUSTOM,
|
|
423
|
-
f"事件循环阻塞{'
|
|
509
|
+
f"事件循环阻塞{'(严重)' if is_severe else ''}: {event.blocked_ms:.0f}ms",
|
|
424
510
|
severity=severity,
|
|
425
511
|
source="blocking_detector",
|
|
426
512
|
blocked_ms=event.blocked_ms,
|
|
427
513
|
threshold_ms=self._config.blocking_threshold_ms,
|
|
428
|
-
|
|
429
|
-
|
|
514
|
+
window_minutes=window_minutes,
|
|
515
|
+
total_blocks=total_blocks,
|
|
516
|
+
block_rate=f"{total_blocks / max(total_checks, 1) * 100:.2f}%",
|
|
430
517
|
stacktrace=self._format_stack(event.main_thread_stack),
|
|
431
518
|
process_stats=event.process_stats,
|
|
432
519
|
)
|
|
@@ -435,6 +522,9 @@ class EventLoopBlockingDetector:
|
|
|
435
522
|
|
|
436
523
|
def get_status(self) -> dict[str, Any]:
|
|
437
524
|
"""获取检测状态和历史。"""
|
|
525
|
+
total_checks, total_blocks = self._get_window_stats()
|
|
526
|
+
window_minutes = int(self._config.blocking_stats_window_seconds / 60)
|
|
527
|
+
|
|
438
528
|
with self._lock:
|
|
439
529
|
events = [
|
|
440
530
|
{
|
|
@@ -453,12 +543,14 @@ class EventLoopBlockingDetector:
|
|
|
453
543
|
"threshold_ms": self._config.blocking_threshold_ms,
|
|
454
544
|
"severe_threshold_ms": self._config.blocking_severe_threshold_ms,
|
|
455
545
|
"alert_enabled": self._config.blocking_alert_enabled,
|
|
546
|
+
"stats_window_seconds": self._config.blocking_stats_window_seconds,
|
|
456
547
|
},
|
|
457
548
|
"stats": {
|
|
458
|
-
"
|
|
459
|
-
"
|
|
549
|
+
"window_minutes": window_minutes,
|
|
550
|
+
"total_checks": total_checks,
|
|
551
|
+
"total_blocks": total_blocks,
|
|
460
552
|
"block_rate_percent": round(
|
|
461
|
-
|
|
553
|
+
total_blocks / max(total_checks, 1) * 100, 2
|
|
462
554
|
),
|
|
463
555
|
},
|
|
464
556
|
"recent_events": events,
|
|
@@ -468,8 +560,7 @@ class EventLoopBlockingDetector:
|
|
|
468
560
|
"""清空阻塞历史。"""
|
|
469
561
|
with self._lock:
|
|
470
562
|
self._blocking_events.clear()
|
|
471
|
-
|
|
472
|
-
self._total_blocks = 0
|
|
563
|
+
self._check_history.clear()
|
|
473
564
|
|
|
474
565
|
@property
|
|
475
566
|
def is_running(self) -> bool:
|
|
@@ -8,9 +8,11 @@ from .exceptions import (
|
|
|
8
8
|
SchedulerError,
|
|
9
9
|
SchedulerJobError,
|
|
10
10
|
)
|
|
11
|
+
from .jobstores import RedisClusterJobStore
|
|
11
12
|
from .manager import SchedulerManager
|
|
12
13
|
|
|
13
14
|
__all__ = [
|
|
15
|
+
"RedisClusterJobStore",
|
|
14
16
|
"SchedulerBackendError",
|
|
15
17
|
"SchedulerError",
|
|
16
18
|
"SchedulerJobError",
|