jettask 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. jettask/__init__.py +2 -0
  2. jettask/cli.py +12 -8
  3. jettask/config/lua_scripts.py +37 -0
  4. jettask/config/nacos_config.py +1 -1
  5. jettask/core/app.py +313 -340
  6. jettask/core/container.py +4 -4
  7. jettask/{persistence → core}/namespace.py +93 -27
  8. jettask/core/task.py +16 -9
  9. jettask/core/unified_manager_base.py +136 -26
  10. jettask/db/__init__.py +67 -0
  11. jettask/db/base.py +137 -0
  12. jettask/{utils/db_connector.py → db/connector.py} +130 -26
  13. jettask/db/models/__init__.py +16 -0
  14. jettask/db/models/scheduled_task.py +196 -0
  15. jettask/db/models/task.py +77 -0
  16. jettask/db/models/task_run.py +85 -0
  17. jettask/executor/__init__.py +0 -15
  18. jettask/executor/core.py +76 -31
  19. jettask/executor/process_entry.py +29 -114
  20. jettask/executor/task_executor.py +4 -0
  21. jettask/messaging/event_pool.py +928 -685
  22. jettask/messaging/scanner.py +30 -0
  23. jettask/persistence/__init__.py +28 -103
  24. jettask/persistence/buffer.py +170 -0
  25. jettask/persistence/consumer.py +330 -249
  26. jettask/persistence/manager.py +304 -0
  27. jettask/persistence/persistence.py +391 -0
  28. jettask/scheduler/__init__.py +15 -3
  29. jettask/scheduler/{task_crud.py → database.py} +61 -57
  30. jettask/scheduler/loader.py +2 -2
  31. jettask/scheduler/{scheduler_coordinator.py → manager.py} +23 -6
  32. jettask/scheduler/models.py +14 -10
  33. jettask/scheduler/schedule.py +166 -0
  34. jettask/scheduler/scheduler.py +12 -11
  35. jettask/schemas/__init__.py +50 -1
  36. jettask/schemas/backlog.py +43 -6
  37. jettask/schemas/namespace.py +70 -19
  38. jettask/schemas/queue.py +19 -3
  39. jettask/schemas/responses.py +493 -0
  40. jettask/task/__init__.py +0 -2
  41. jettask/task/router.py +3 -0
  42. jettask/test_connection_monitor.py +1 -1
  43. jettask/utils/__init__.py +7 -5
  44. jettask/utils/db_init.py +8 -4
  45. jettask/utils/namespace_dep.py +167 -0
  46. jettask/utils/queue_matcher.py +186 -0
  47. jettask/utils/rate_limit/concurrency_limiter.py +7 -1
  48. jettask/utils/stream_backlog.py +1 -1
  49. jettask/webui/__init__.py +0 -1
  50. jettask/webui/api/__init__.py +4 -4
  51. jettask/webui/api/alerts.py +806 -71
  52. jettask/webui/api/example_refactored.py +400 -0
  53. jettask/webui/api/namespaces.py +390 -45
  54. jettask/webui/api/overview.py +300 -54
  55. jettask/webui/api/queues.py +971 -267
  56. jettask/webui/api/scheduled.py +1249 -56
  57. jettask/webui/api/settings.py +129 -7
  58. jettask/webui/api/workers.py +442 -0
  59. jettask/webui/app.py +46 -2329
  60. jettask/webui/middleware/__init__.py +6 -0
  61. jettask/webui/middleware/namespace_middleware.py +135 -0
  62. jettask/webui/services/__init__.py +146 -0
  63. jettask/webui/services/heartbeat_service.py +251 -0
  64. jettask/webui/services/overview_service.py +60 -51
  65. jettask/webui/services/queue_monitor_service.py +426 -0
  66. jettask/webui/services/redis_monitor_service.py +87 -0
  67. jettask/webui/services/settings_service.py +174 -111
  68. jettask/webui/services/task_monitor_service.py +222 -0
  69. jettask/webui/services/timeline_pg_service.py +452 -0
  70. jettask/webui/services/timeline_service.py +189 -0
  71. jettask/webui/services/worker_monitor_service.py +467 -0
  72. jettask/webui/utils/__init__.py +11 -0
  73. jettask/webui/utils/time_utils.py +122 -0
  74. jettask/worker/lifecycle.py +8 -2
  75. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/METADATA +1 -1
  76. jettask-0.2.24.dist-info/RECORD +142 -0
  77. jettask/executor/executor.py +0 -338
  78. jettask/persistence/backlog_monitor.py +0 -567
  79. jettask/persistence/base.py +0 -2334
  80. jettask/persistence/db_manager.py +0 -516
  81. jettask/persistence/maintenance.py +0 -81
  82. jettask/persistence/message_consumer.py +0 -259
  83. jettask/persistence/models.py +0 -49
  84. jettask/persistence/offline_recovery.py +0 -196
  85. jettask/persistence/queue_discovery.py +0 -215
  86. jettask/persistence/task_persistence.py +0 -218
  87. jettask/persistence/task_updater.py +0 -583
  88. jettask/scheduler/add_execution_count.sql +0 -11
  89. jettask/scheduler/add_priority_field.sql +0 -26
  90. jettask/scheduler/add_scheduler_id.sql +0 -25
  91. jettask/scheduler/add_scheduler_id_index.sql +0 -10
  92. jettask/scheduler/make_scheduler_id_required.sql +0 -28
  93. jettask/scheduler/migrate_interval_seconds.sql +0 -9
  94. jettask/scheduler/performance_optimization.sql +0 -45
  95. jettask/scheduler/run_scheduler.py +0 -186
  96. jettask/scheduler/schema.sql +0 -84
  97. jettask/task/task_executor.py +0 -318
  98. jettask/webui/api/analytics.py +0 -323
  99. jettask/webui/config.py +0 -90
  100. jettask/webui/models/__init__.py +0 -3
  101. jettask/webui/models/namespace.py +0 -63
  102. jettask/webui/namespace_manager/__init__.py +0 -10
  103. jettask/webui/namespace_manager/multi.py +0 -593
  104. jettask/webui/namespace_manager/unified.py +0 -193
  105. jettask/webui/run.py +0 -46
  106. jettask-0.2.23.dist-info/RECORD +0 -145
  107. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/WHEEL +0 -0
  108. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/entry_points.txt +0 -0
  109. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/licenses/LICENSE +0 -0
  110. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,6 @@
1
+ """
2
+ WebUI 中间件模块
3
+ """
4
+ from .namespace_middleware import NamespaceMiddleware
5
+
6
+ __all__ = ['NamespaceMiddleware']
@@ -0,0 +1,135 @@
1
+ """
2
+ Namespace 中间件 - 自动注入命名空间上下文
3
+
4
+ 这个中间件会自动检测路由中的 {namespace} 参数,并将 NamespaceContext 注入到 request.state.ns
5
+ 这样所有路由都无需手动使用 Depends(get_namespace_context),直接访问 request.state.ns 即可
6
+ """
7
+ import logging
8
+ import re
9
+ from starlette.middleware.base import BaseHTTPMiddleware
10
+ from starlette.requests import Request
11
+ from starlette.responses import JSONResponse
12
+ from typing import Callable
13
+
14
+ from jettask.utils.namespace_dep import NamespaceContext
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class NamespaceMiddleware(BaseHTTPMiddleware):
20
+ """
21
+ Namespace 自动注入中间件
22
+
23
+ 功能:
24
+ 1. 自动检测路由路径中的 {namespace} 参数
25
+ 2. 查询命名空间配置并建立数据库连接
26
+ 3. 将 NamespaceContext 注入到 request.state.ns
27
+ 4. 统一处理命名空间不存在等错误
28
+
29
+ 使用方式:
30
+ ```python
31
+ # 在 app.py 中注册
32
+ app.add_middleware(NamespaceMiddleware)
33
+
34
+ # 在路由中使用
35
+ @router.get("/{namespace}/queues")
36
+ async def get_queues(request: Request):
37
+ ns = request.state.ns # 已自动注入
38
+ redis_client = await ns.get_redis_client()
39
+ # ... 业务逻辑
40
+ ```
41
+ """
42
+
43
+ # 需要排除的路径前缀(这些路径不需要 namespace)
44
+ EXCLUDED_PATHS = [
45
+ '/api/v1/namespaces', # 命名空间管理自身
46
+ '/api/v1/overview/', # 根路径(健康检查等)
47
+ '/docs', # API 文档
48
+ '/openapi.json', # OpenAPI schema
49
+ '/redoc', # ReDoc 文档
50
+ '/health', # 健康检查
51
+ ]
52
+
53
+ async def dispatch(self, request: Request, call_next: Callable):
54
+ """
55
+ 中间件处理逻辑
56
+
57
+ Args:
58
+ request: HTTP 请求对象
59
+ call_next: 下一个中间件或路由处理器
60
+
61
+ Returns:
62
+ HTTP 响应
63
+ """
64
+ # 1. 检查是否是排除路径
65
+ path = request.url.path
66
+
67
+ # 检查排除路径
68
+ for excluded in self.EXCLUDED_PATHS:
69
+ if path.startswith(excluded):
70
+ # 不需要 namespace,直接放行
71
+ return await call_next(request)
72
+
73
+ # 2. 从路径中提取 namespace 参数
74
+ # 匹配模式:/api/v1/xxx/{namespace}/...
75
+ namespace_match = re.search(r'/api/v1/[^/]+/([^/]+)', path)
76
+
77
+ if not namespace_match:
78
+ # 没有 namespace 参数,直接放行
79
+ return await call_next(request)
80
+
81
+ namespace = namespace_match.group(1)
82
+
83
+ # 3. 特殊处理:如果 namespace 实际上是其他路径段(如 "redis"),跳过
84
+ # 例如:/api/v1/queues/redis/monitor/{namespace}
85
+ if namespace in ['redis', 'tasks-v2', 'statistics']:
86
+ # 尝试从更后面的路径段提取 namespace
87
+ # 模式:/api/v1/queues/redis/monitor/{namespace}
88
+ namespace_match2 = re.search(r'/api/v1/[^/]+/[^/]+/[^/]+/([^/]+)', path)
89
+ if namespace_match2:
90
+ namespace = namespace_match2.group(1)
91
+ else:
92
+ # 如果还是没有,直接放行(可能这个路由不需要 namespace)
93
+ return await call_next(request)
94
+
95
+ # 4. 获取 namespace_data_access
96
+ if not hasattr(request.app.state, 'namespace_data_access'):
97
+ logger.error("namespace_data_access 未初始化")
98
+ return JSONResponse(
99
+ status_code=500,
100
+ content={"detail": "Namespace data access not initialized"}
101
+ )
102
+
103
+ manager = request.app.state.namespace_data_access.manager
104
+
105
+ # 5. 获取命名空间连接并注入上下文
106
+ try:
107
+ connection = await manager.get_connection(namespace)
108
+
109
+ # 创建 NamespaceContext 并注入到 request.state
110
+ request.state.ns = NamespaceContext(
111
+ namespace_name=namespace,
112
+ connection=connection,
113
+ manager=manager
114
+ )
115
+
116
+ logger.debug(f"已为请求 {path} 注入命名空间上下文: {namespace}")
117
+
118
+ except ValueError as e:
119
+ # 命名空间不存在或配置错误
120
+ logger.warning(f"命名空间 '{namespace}' 不存在或配置错误: {e}")
121
+ return JSONResponse(
122
+ status_code=404,
123
+ content={"detail": f"命名空间 '{namespace}' 不存在或配置错误"}
124
+ )
125
+ except Exception as e:
126
+ # 其他错误(数据库连接失败等)
127
+ logger.error(f"获取命名空间 '{namespace}' 连接失败: {e}", exc_info=True)
128
+ return JSONResponse(
129
+ status_code=500,
130
+ content={"detail": f"获取命名空间连接失败: {str(e)}"}
131
+ )
132
+
133
+ # 6. 调用下一个处理器
134
+ response = await call_next(request)
135
+ return response
@@ -11,6 +11,144 @@ from .analytics_service import AnalyticsService
11
11
  from .settings_service import SettingsService
12
12
  from .task_service import TaskService
13
13
 
14
+ # 监控服务
15
+ from .redis_monitor_service import RedisMonitorService
16
+ from .task_monitor_service import TaskMonitorService
17
+ from .worker_monitor_service import WorkerMonitorService
18
+ from .queue_monitor_service import QueueMonitorService
19
+ from .heartbeat_service import HeartbeatService
20
+ from .timeline_service import TimelineService
21
+ from .timeline_pg_service import TimelinePgService
22
+
23
+
24
+ class MonitorService:
25
+ """
26
+ 统一的监控服务类
27
+
28
+ 整合所有监控服务,提供统一的接口
29
+ """
30
+
31
+ def __init__(self, redis_url: str = "redis://localhost:6379", redis_prefix: str = "jettask"):
32
+ """
33
+ 初始化监控服务
34
+
35
+ Args:
36
+ redis_url: Redis 连接 URL
37
+ redis_prefix: Redis 键前缀
38
+ """
39
+ # 创建基础 Redis 服务
40
+ self.redis_service = RedisMonitorService(redis_url, redis_prefix)
41
+
42
+ # 创建各个子服务
43
+ self.task_service = TaskMonitorService(self.redis_service)
44
+ self.worker_service = WorkerMonitorService(self.redis_service)
45
+ self.queue_service = QueueMonitorService(self.redis_service)
46
+ self.heartbeat_service = HeartbeatService(self.redis_service)
47
+ self.timeline_service = TimelineService(self.redis_service)
48
+
49
+ async def connect(self):
50
+ """连接到 Redis"""
51
+ await self.redis_service.connect()
52
+
53
+ async def close(self):
54
+ """关闭所有服务"""
55
+ # 停止心跳扫描器
56
+ await self.heartbeat_service.stop_heartbeat_scanner()
57
+ # 关闭 Redis 连接
58
+ await self.redis_service.close()
59
+
60
+ async def __aenter__(self):
61
+ """异步上下文管理器入口"""
62
+ await self.connect()
63
+ return self
64
+
65
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
66
+ """异步上下文管理器退出"""
67
+ await self.close()
68
+
69
+ # ==================== Task Monitor Methods ====================
70
+
71
+ async def get_task_info(self, stream_id: str, queue_name: str):
72
+ """获取单个任务的详细信息"""
73
+ return await self.task_service.get_task_info(stream_id, queue_name)
74
+
75
+ async def get_stream_info(self, queue_name: str):
76
+ """获取 Stream 的统计信息"""
77
+ return await self.task_service.get_stream_info(queue_name)
78
+
79
+ async def get_queue_tasks(self, queue_name: str, start: str = "-", end: str = "+", count: int = 100, reverse: bool = False):
80
+ """获取队列中的任务列表"""
81
+ return await self.task_service.get_queue_tasks(queue_name, start, end, count, reverse)
82
+
83
+ # ==================== Worker Monitor Methods ====================
84
+
85
+ async def get_worker_heartbeats(self, queue_name: str):
86
+ """获取指定队列的 Worker 心跳信息"""
87
+ return await self.worker_service.get_worker_heartbeats(queue_name)
88
+
89
+ async def get_queue_worker_summary(self, queue_name: str):
90
+ """获取队列的 Worker 汇总统计信息(包含历史数据)"""
91
+ return await self.worker_service.get_queue_worker_summary(queue_name)
92
+
93
+ async def get_queue_worker_summary_fast(self, queue_name: str):
94
+ """获取队列的 Worker 汇总统计信息(快速版,仅在线 Worker)"""
95
+ return await self.worker_service.get_queue_worker_summary_fast(queue_name)
96
+
97
+ async def get_worker_offline_history(self, limit: int = 100, start_time=None, end_time=None):
98
+ """获取 Worker 下线历史记录"""
99
+ return await self.worker_service.get_worker_offline_history(limit, start_time, end_time)
100
+
101
+ # ==================== Queue Monitor Methods ====================
102
+
103
+ async def get_all_queues(self):
104
+ """获取所有队列名称"""
105
+ return await self.queue_service.get_all_queues()
106
+
107
+ async def get_queue_stats(self, queue_name: str):
108
+ """获取队列统计信息(RabbitMQ 兼容格式)"""
109
+ return await self.queue_service.get_queue_stats(queue_name)
110
+
111
+ # ==================== Heartbeat Service Methods ====================
112
+
113
+ async def start_heartbeat_scanner(self):
114
+ """启动心跳扫描器"""
115
+ await self.heartbeat_service.start_heartbeat_scanner()
116
+
117
+ async def stop_heartbeat_scanner(self):
118
+ """停止心跳扫描器"""
119
+ await self.heartbeat_service.stop_heartbeat_scanner()
120
+
121
+ async def check_worker_heartbeat(self, worker_id: str):
122
+ """检查单个 Worker 的心跳状态"""
123
+ return await self.heartbeat_service.check_worker_heartbeat(worker_id)
124
+
125
+ async def get_heartbeat_stats(self):
126
+ """获取心跳监控统计信息"""
127
+ return await self.heartbeat_service.get_heartbeat_stats()
128
+
129
+ # ==================== Timeline Service Methods ====================
130
+
131
+ async def get_redis_timeline(self, queue_name: str, **kwargs):
132
+ """获取 Redis Stream 时间轴数据"""
133
+ return await self.timeline_service.get_redis_timeline(queue_name, **kwargs)
134
+
135
+ # ==================== Utility Methods ====================
136
+
137
+ def get_prefixed_queue_name(self, queue_name: str) -> str:
138
+ """为队列名称添加前缀"""
139
+ return self.redis_service.get_prefixed_queue_name(queue_name)
140
+
141
+ @property
142
+ def redis(self):
143
+ """获取 Redis 客户端"""
144
+ return self.redis_service.redis
145
+
146
+ @property
147
+ def redis_prefix(self) -> str:
148
+ """获取 Redis 前缀"""
149
+ return self.redis_service.redis_prefix
150
+
151
+
14
152
  __all__ = [
15
153
  'OverviewService',
16
154
  'QueueService',
@@ -19,4 +157,12 @@ __all__ = [
19
157
  'AnalyticsService',
20
158
  'SettingsService',
21
159
  'TaskService',
160
+ 'MonitorService',
161
+ 'RedisMonitorService',
162
+ 'TaskMonitorService',
163
+ 'WorkerMonitorService',
164
+ 'QueueMonitorService',
165
+ 'HeartbeatService',
166
+ 'TimelineService',
167
+ 'TimelinePgService',
22
168
  ]
@@ -0,0 +1,251 @@
1
+ """
2
+ 心跳监控服务
3
+
4
+ 提供 Worker 心跳检查和自动离线标记功能
5
+ """
6
+ import asyncio
7
+ import logging
8
+ import time
9
+ from typing import Optional
10
+
11
+ from .redis_monitor_service import RedisMonitorService
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class HeartbeatService:
17
+ """心跳监控服务类"""
18
+
19
+ def __init__(self, redis_service: RedisMonitorService, scanner_interval: int = 5, heartbeat_timeout: int = 30):
20
+ """
21
+ 初始化心跳监控服务
22
+
23
+ Args:
24
+ redis_service: Redis 监控基础服务实例
25
+ scanner_interval: 扫描间隔(秒),默认 5 秒
26
+ heartbeat_timeout: 心跳超时时间(秒),默认 30 秒
27
+ """
28
+ self.redis_service = redis_service
29
+ self.scanner_interval = scanner_interval
30
+ self.default_heartbeat_timeout = heartbeat_timeout
31
+
32
+ # 扫描器任务控制
33
+ self.scanner_task: Optional[asyncio.Task] = None
34
+ self._scanner_running = False
35
+
36
+ @property
37
+ def redis(self):
38
+ """获取 Redis 客户端"""
39
+ return self.redis_service.redis
40
+
41
+ @property
42
+ def redis_prefix(self) -> str:
43
+ """获取 Redis 前缀"""
44
+ return self.redis_service.redis_prefix
45
+
46
+ @property
47
+ def worker_state_manager(self):
48
+ """获取 WorkerStateManager"""
49
+ return self.redis_service.worker_state_manager
50
+
51
+ async def start_heartbeat_scanner(self):
52
+ """启动心跳扫描器"""
53
+ if not self._scanner_running:
54
+ self._scanner_running = True
55
+ self.scanner_task = asyncio.create_task(self._heartbeat_scanner())
56
+ logger.info("心跳扫描器任务已创建并启动")
57
+ else:
58
+ logger.warning("心跳扫描器已经在运行中")
59
+
60
+ async def stop_heartbeat_scanner(self):
61
+ """停止心跳扫描器"""
62
+ self._scanner_running = False
63
+ if self.scanner_task and not self.scanner_task.done():
64
+ self.scanner_task.cancel()
65
+ try:
66
+ await self.scanner_task
67
+ except asyncio.CancelledError:
68
+ logger.info("心跳扫描器已取消")
69
+ pass
70
+ logger.info("心跳扫描器已停止")
71
+
72
+ async def _heartbeat_scanner(self):
73
+ """
74
+ 心跳扫描器任务,定期检查 Worker 心跳状态
75
+
76
+ 定期扫描所有 Worker 的心跳信息,如果发现心跳超时的 Worker,
77
+ 自动标记为离线状态。
78
+ """
79
+ logger.info(f"心跳扫描器启动 (扫描间隔: {self.scanner_interval}s, 超时阈值: {self.default_heartbeat_timeout}s)")
80
+
81
+ while self._scanner_running:
82
+ try:
83
+ # 使用 WorkerStateManager 获取所有 worker
84
+ from jettask.worker.lifecycle import WorkerStateManager
85
+
86
+ worker_manager = WorkerStateManager(
87
+ redis_client=self.redis,
88
+ redis_prefix=self.redis_prefix
89
+ )
90
+
91
+ # 获取所有 worker ID
92
+ worker_ids = await worker_manager.get_all_workers()
93
+
94
+ if worker_ids:
95
+ current_time = time.time()
96
+ logger.debug(f"检查 {len(worker_ids)} 个 Worker 的心跳状态")
97
+
98
+ # 批量获取所有 worker 信息
99
+ all_workers_info = await worker_manager.get_all_workers_info(only_alive=False)
100
+
101
+ # 检查每个 worker 的心跳
102
+ offline_count = 0
103
+ for worker_id in worker_ids:
104
+ worker_data = all_workers_info.get(worker_id)
105
+ if not worker_data:
106
+ continue
107
+
108
+ try:
109
+ # 获取心跳相关信息
110
+ last_heartbeat = float(worker_data.get('last_heartbeat', 0))
111
+ is_alive = worker_data.get('is_alive') == 'true'
112
+ heartbeat_timeout = float(
113
+ worker_data.get('heartbeat_timeout', self.default_heartbeat_timeout)
114
+ )
115
+ consumer_id = worker_data.get('consumer_id', worker_id)
116
+
117
+ # 检查是否超时
118
+ heartbeat_age = current_time - last_heartbeat
119
+ if is_alive and heartbeat_age > heartbeat_timeout:
120
+ logger.info(
121
+ f"Worker {consumer_id} 心跳超时 ({heartbeat_age:.1f}s > {heartbeat_timeout}s),标记为离线"
122
+ )
123
+
124
+ # 通过 WorkerStateManager 更新 worker 状态为离线
125
+ await worker_manager.set_worker_offline(
126
+ worker_id=worker_id,
127
+ reason="heartbeat_timeout"
128
+ )
129
+ offline_count += 1
130
+
131
+ except Exception as e:
132
+ logger.error(f"检查 worker {worker_id} 心跳时出错: {e}", exc_info=True)
133
+
134
+ if offline_count > 0:
135
+ logger.info(f"本次扫描标记了 {offline_count} 个 Worker 为离线")
136
+
137
+ # 等待下一次扫描
138
+ await asyncio.sleep(self.scanner_interval)
139
+
140
+ except asyncio.CancelledError:
141
+ logger.info("心跳扫描器收到取消信号")
142
+ break
143
+ except Exception as e:
144
+ logger.error(f"心跳扫描器出错: {e}", exc_info=True)
145
+ await asyncio.sleep(self.scanner_interval)
146
+
147
+ logger.info("心跳扫描器已停止运行")
148
+
149
+ async def check_worker_heartbeat(self, worker_id: str) -> bool:
150
+ """
151
+ 检查单个 Worker 的心跳状态
152
+
153
+ Args:
154
+ worker_id: Worker ID
155
+
156
+ Returns:
157
+ 如果 Worker 在线返回 True,否则返回 False
158
+ """
159
+ try:
160
+ worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
161
+ worker_data = await self.redis.hgetall(worker_key)
162
+
163
+ if not worker_data:
164
+ logger.warning(f"Worker {worker_id} 不存在")
165
+ return False
166
+
167
+ last_heartbeat = float(worker_data.get('last_heartbeat', 0))
168
+ is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
169
+ heartbeat_timeout = float(worker_data.get('heartbeat_timeout', self.default_heartbeat_timeout))
170
+
171
+ current_time = time.time()
172
+ heartbeat_age = current_time - last_heartbeat
173
+
174
+ # Worker 标记为在线且心跳未超时
175
+ return is_alive and heartbeat_age <= heartbeat_timeout
176
+
177
+ except Exception as e:
178
+ logger.error(f"检查 worker {worker_id} 心跳状态时出错: {e}", exc_info=True)
179
+ return False
180
+
181
+ async def get_heartbeat_stats(self) -> dict:
182
+ """
183
+ 获取心跳监控统计信息
184
+
185
+ Returns:
186
+ 包含心跳统计信息的字典
187
+ """
188
+ try:
189
+ from jettask.worker.lifecycle import WorkerStateManager
190
+
191
+ worker_manager = WorkerStateManager(
192
+ redis_client=self.redis,
193
+ redis_prefix=self.redis_prefix
194
+ )
195
+
196
+ # 获取所有 worker
197
+ worker_ids = await worker_manager.get_all_workers()
198
+
199
+ if not worker_ids:
200
+ return {
201
+ 'total_workers': 0,
202
+ 'online_workers': 0,
203
+ 'timeout_workers': 0,
204
+ 'offline_workers': 0
205
+ }
206
+
207
+ # 获取所有 worker 信息
208
+ all_workers_info = await worker_manager.get_all_workers_info(only_alive=False)
209
+
210
+ current_time = time.time()
211
+ online_count = 0
212
+ timeout_count = 0
213
+ offline_count = 0
214
+
215
+ for worker_id in worker_ids:
216
+ worker_data = all_workers_info.get(worker_id)
217
+ if not worker_data:
218
+ continue
219
+
220
+ last_heartbeat = float(worker_data.get('last_heartbeat', 0))
221
+ is_alive = worker_data.get('is_alive') == 'true'
222
+ heartbeat_timeout = float(worker_data.get('heartbeat_timeout', self.default_heartbeat_timeout))
223
+
224
+ heartbeat_age = current_time - last_heartbeat
225
+
226
+ if not is_alive:
227
+ offline_count += 1
228
+ elif heartbeat_age > heartbeat_timeout:
229
+ timeout_count += 1
230
+ else:
231
+ online_count += 1
232
+
233
+ return {
234
+ 'total_workers': len(worker_ids),
235
+ 'online_workers': online_count,
236
+ 'timeout_workers': timeout_count,
237
+ 'offline_workers': offline_count,
238
+ 'scanner_running': self._scanner_running,
239
+ 'scanner_interval': self.scanner_interval,
240
+ 'heartbeat_timeout': self.default_heartbeat_timeout
241
+ }
242
+
243
+ except Exception as e:
244
+ logger.error(f"获取心跳统计信息时出错: {e}", exc_info=True)
245
+ return {
246
+ 'total_workers': 0,
247
+ 'online_workers': 0,
248
+ 'timeout_workers': 0,
249
+ 'offline_workers': 0,
250
+ 'error': str(e)
251
+ }