jettask 0.2.20__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +4 -0
- jettask/cli.py +12 -8
- jettask/config/lua_scripts.py +37 -0
- jettask/config/nacos_config.py +1 -1
- jettask/core/app.py +313 -340
- jettask/core/container.py +4 -4
- jettask/{persistence → core}/namespace.py +93 -27
- jettask/core/task.py +16 -9
- jettask/core/unified_manager_base.py +136 -26
- jettask/db/__init__.py +67 -0
- jettask/db/base.py +137 -0
- jettask/{utils/db_connector.py → db/connector.py} +130 -26
- jettask/db/models/__init__.py +16 -0
- jettask/db/models/scheduled_task.py +196 -0
- jettask/db/models/task.py +77 -0
- jettask/db/models/task_run.py +85 -0
- jettask/executor/__init__.py +0 -15
- jettask/executor/core.py +76 -31
- jettask/executor/process_entry.py +29 -114
- jettask/executor/task_executor.py +4 -0
- jettask/messaging/event_pool.py +928 -685
- jettask/messaging/scanner.py +30 -0
- jettask/persistence/__init__.py +28 -103
- jettask/persistence/buffer.py +170 -0
- jettask/persistence/consumer.py +330 -249
- jettask/persistence/manager.py +304 -0
- jettask/persistence/persistence.py +391 -0
- jettask/scheduler/__init__.py +15 -3
- jettask/scheduler/{task_crud.py → database.py} +61 -57
- jettask/scheduler/loader.py +2 -2
- jettask/scheduler/{scheduler_coordinator.py → manager.py} +23 -6
- jettask/scheduler/models.py +14 -10
- jettask/scheduler/schedule.py +166 -0
- jettask/scheduler/scheduler.py +12 -11
- jettask/schemas/__init__.py +50 -1
- jettask/schemas/backlog.py +43 -6
- jettask/schemas/namespace.py +70 -19
- jettask/schemas/queue.py +19 -3
- jettask/schemas/responses.py +493 -0
- jettask/task/__init__.py +0 -2
- jettask/task/router.py +3 -0
- jettask/test_connection_monitor.py +1 -1
- jettask/utils/__init__.py +7 -5
- jettask/utils/db_init.py +8 -4
- jettask/utils/namespace_dep.py +167 -0
- jettask/utils/queue_matcher.py +186 -0
- jettask/utils/rate_limit/concurrency_limiter.py +7 -1
- jettask/utils/stream_backlog.py +1 -1
- jettask/webui/__init__.py +0 -1
- jettask/webui/api/__init__.py +4 -4
- jettask/webui/api/alerts.py +806 -71
- jettask/webui/api/example_refactored.py +400 -0
- jettask/webui/api/namespaces.py +390 -45
- jettask/webui/api/overview.py +300 -54
- jettask/webui/api/queues.py +971 -267
- jettask/webui/api/scheduled.py +1249 -56
- jettask/webui/api/settings.py +129 -7
- jettask/webui/api/workers.py +442 -0
- jettask/webui/app.py +46 -2329
- jettask/webui/middleware/__init__.py +6 -0
- jettask/webui/middleware/namespace_middleware.py +135 -0
- jettask/webui/services/__init__.py +146 -0
- jettask/webui/services/heartbeat_service.py +251 -0
- jettask/webui/services/overview_service.py +60 -51
- jettask/webui/services/queue_monitor_service.py +426 -0
- jettask/webui/services/redis_monitor_service.py +87 -0
- jettask/webui/services/settings_service.py +174 -111
- jettask/webui/services/task_monitor_service.py +222 -0
- jettask/webui/services/timeline_pg_service.py +452 -0
- jettask/webui/services/timeline_service.py +189 -0
- jettask/webui/services/worker_monitor_service.py +467 -0
- jettask/webui/utils/__init__.py +11 -0
- jettask/webui/utils/time_utils.py +122 -0
- jettask/worker/lifecycle.py +8 -2
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/METADATA +1 -1
- jettask-0.2.24.dist-info/RECORD +142 -0
- jettask/executor/executor.py +0 -338
- jettask/persistence/backlog_monitor.py +0 -567
- jettask/persistence/base.py +0 -2334
- jettask/persistence/db_manager.py +0 -516
- jettask/persistence/maintenance.py +0 -81
- jettask/persistence/message_consumer.py +0 -259
- jettask/persistence/models.py +0 -49
- jettask/persistence/offline_recovery.py +0 -196
- jettask/persistence/queue_discovery.py +0 -215
- jettask/persistence/task_persistence.py +0 -218
- jettask/persistence/task_updater.py +0 -583
- jettask/scheduler/add_execution_count.sql +0 -11
- jettask/scheduler/add_priority_field.sql +0 -26
- jettask/scheduler/add_scheduler_id.sql +0 -25
- jettask/scheduler/add_scheduler_id_index.sql +0 -10
- jettask/scheduler/make_scheduler_id_required.sql +0 -28
- jettask/scheduler/migrate_interval_seconds.sql +0 -9
- jettask/scheduler/performance_optimization.sql +0 -45
- jettask/scheduler/run_scheduler.py +0 -186
- jettask/scheduler/schema.sql +0 -84
- jettask/task/task_executor.py +0 -318
- jettask/webui/api/analytics.py +0 -323
- jettask/webui/config.py +0 -90
- jettask/webui/models/__init__.py +0 -3
- jettask/webui/models/namespace.py +0 -63
- jettask/webui/namespace_manager/__init__.py +0 -10
- jettask/webui/namespace_manager/multi.py +0 -593
- jettask/webui/namespace_manager/unified.py +0 -193
- jettask/webui/run.py +0 -46
- jettask-0.2.20.dist-info/RECORD +0 -145
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/WHEEL +0 -0
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/top_level.txt +0 -0
jettask/webui/api/settings.py
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
提供轻量级的路由入口,业务逻辑在 SettingsService 中实现
|
4
4
|
"""
|
5
5
|
from fastapi import APIRouter, HTTPException
|
6
|
+
from typing import Dict, Any
|
6
7
|
import logging
|
7
8
|
import traceback
|
8
9
|
|
10
|
+
from jettask.schemas import SystemSettingsResponse, DatabaseStatusResponse
|
9
11
|
from jettask.webui.services.settings_service import SettingsService
|
10
12
|
|
11
13
|
logger = logging.getLogger(__name__)
|
@@ -16,11 +18,71 @@ router = APIRouter(prefix="/settings", tags=["settings"])
|
|
16
18
|
|
17
19
|
# ============ 系统配置接口 ============
|
18
20
|
|
19
|
-
@router.get(
|
20
|
-
|
21
|
+
@router.get(
|
22
|
+
"/system",
|
23
|
+
summary="获取系统配置信息",
|
24
|
+
description="获取系统级别的配置信息,包括 API 版本、服务名称、运行环境、数据库状态等",
|
25
|
+
response_model=SystemSettingsResponse,
|
26
|
+
responses={
|
27
|
+
200: {
|
28
|
+
"description": "成功返回系统配置信息"
|
29
|
+
},
|
30
|
+
500: {
|
31
|
+
"description": "服务器内部错误",
|
32
|
+
"content": {
|
33
|
+
"application/json": {
|
34
|
+
"example": {
|
35
|
+
"detail": "获取系统配置失败: Configuration error"
|
36
|
+
}
|
37
|
+
}
|
38
|
+
}
|
39
|
+
}
|
40
|
+
}
|
41
|
+
)
|
42
|
+
async def get_system_settings() -> Dict[str, Any]:
|
21
43
|
"""
|
22
|
-
获取系统配置信息
|
23
|
-
|
44
|
+
## 获取系统配置信息
|
45
|
+
|
46
|
+
返回 JetTask WebUI 的系统级配置信息,用于系统管理和监控。
|
47
|
+
|
48
|
+
**配置信息包括**:
|
49
|
+
- API 版本号
|
50
|
+
- 服务名称
|
51
|
+
- 运行环境 (development/staging/production)
|
52
|
+
- 调试模式状态
|
53
|
+
- 数据库连接状态和配置
|
54
|
+
|
55
|
+
**使用场景**:
|
56
|
+
- 系统设置页面
|
57
|
+
- 运维监控
|
58
|
+
- 故障排查
|
59
|
+
- 环境验证
|
60
|
+
|
61
|
+
**示例请求**:
|
62
|
+
```bash
|
63
|
+
curl -X GET "http://localhost:8001/api/v1/settings/system"
|
64
|
+
```
|
65
|
+
|
66
|
+
**示例响应**:
|
67
|
+
```json
|
68
|
+
{
|
69
|
+
"success": true,
|
70
|
+
"data": {
|
71
|
+
"api_version": "v1",
|
72
|
+
"service_name": "JetTask WebUI",
|
73
|
+
"environment": "development",
|
74
|
+
"debug_mode": true,
|
75
|
+
"database": {
|
76
|
+
"connected": true,
|
77
|
+
"host": "localhost",
|
78
|
+
"port": 5432,
|
79
|
+
"database": "jettask",
|
80
|
+
"pool_size": 10,
|
81
|
+
"active_connections": 3
|
82
|
+
}
|
83
|
+
}
|
84
|
+
}
|
85
|
+
```
|
24
86
|
"""
|
25
87
|
try:
|
26
88
|
return SettingsService.get_system_settings()
|
@@ -29,10 +91,70 @@ async def get_system_settings():
|
|
29
91
|
raise HTTPException(status_code=500, detail=str(e))
|
30
92
|
|
31
93
|
|
32
|
-
@router.get(
|
33
|
-
|
94
|
+
@router.get(
|
95
|
+
"/database-status",
|
96
|
+
summary="检查数据库连接状态",
|
97
|
+
description="检查数据库(PostgreSQL/MySQL)的连接状态和性能指标",
|
98
|
+
response_model=DatabaseStatusResponse,
|
99
|
+
responses={
|
100
|
+
200: {
|
101
|
+
"description": "成功返回数据库状态"
|
102
|
+
},
|
103
|
+
500: {
|
104
|
+
"description": "服务器内部错误或数据库连接失败",
|
105
|
+
"content": {
|
106
|
+
"application/json": {
|
107
|
+
"example": {
|
108
|
+
"detail": "数据库状态检查失败: Connection refused"
|
109
|
+
}
|
110
|
+
}
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}
|
114
|
+
)
|
115
|
+
async def check_database_status() -> Dict[str, Any]:
|
34
116
|
"""
|
35
|
-
检查数据库连接状态
|
117
|
+
## 检查数据库连接状态
|
118
|
+
|
119
|
+
检查 JetTask 数据库的连接状态、配置信息和性能指标。
|
120
|
+
|
121
|
+
**返回信息包括**:
|
122
|
+
- 连接状态 (connected: true/false)
|
123
|
+
- 主机地址和端口
|
124
|
+
- 数据库名称
|
125
|
+
- 连接池大小
|
126
|
+
- 当前活跃连接数
|
127
|
+
|
128
|
+
**使用场景**:
|
129
|
+
- 系统健康检查
|
130
|
+
- 数据库监控
|
131
|
+
- 故障诊断
|
132
|
+
- 容量规划
|
133
|
+
|
134
|
+
**示例请求**:
|
135
|
+
```bash
|
136
|
+
curl -X GET "http://localhost:8001/api/v1/settings/database-status"
|
137
|
+
```
|
138
|
+
|
139
|
+
**示例响应**:
|
140
|
+
```json
|
141
|
+
{
|
142
|
+
"success": true,
|
143
|
+
"data": {
|
144
|
+
"connected": true,
|
145
|
+
"host": "localhost",
|
146
|
+
"port": 5432,
|
147
|
+
"database": "jettask",
|
148
|
+
"pool_size": 10,
|
149
|
+
"active_connections": 3
|
150
|
+
}
|
151
|
+
}
|
152
|
+
```
|
153
|
+
|
154
|
+
**注意事项**:
|
155
|
+
- 此接口会实际连接数据库进行检查
|
156
|
+
- 如果数据库不可用,将返回 500 错误
|
157
|
+
- 建议配置监控告警
|
36
158
|
"""
|
37
159
|
try:
|
38
160
|
return await SettingsService.check_database_status()
|
@@ -0,0 +1,442 @@
|
|
1
|
+
"""
|
2
|
+
Worker 监控模块 - Worker 状态监控、心跳管理、离线历史
|
3
|
+
|
4
|
+
提供 Worker 相关的监控和管理功能
|
5
|
+
"""
|
6
|
+
from fastapi import APIRouter, HTTPException, Request, Query, Path
|
7
|
+
from typing import Optional, List, Dict, Any
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from jettask.schemas import (
|
11
|
+
WorkersResponse,
|
12
|
+
WorkerSummaryResponse,
|
13
|
+
WorkerOfflineHistoryResponse
|
14
|
+
)
|
15
|
+
|
16
|
+
router = APIRouter(prefix="/workers", tags=["workers"])
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
# ============ Worker 监控 ============
|
21
|
+
|
22
|
+
@router.get(
|
23
|
+
"/{namespace}/{queue_name}",
|
24
|
+
summary="获取队列的 Worker 列表",
|
25
|
+
description="获取指定队列所有 Worker 的实时心跳信息和状态",
|
26
|
+
response_model=WorkersResponse,
|
27
|
+
responses={
|
28
|
+
200: {
|
29
|
+
"description": "成功返回 Worker 列表"
|
30
|
+
},
|
31
|
+
500: {
|
32
|
+
"description": "服务器内部错误",
|
33
|
+
"content": {
|
34
|
+
"application/json": {
|
35
|
+
"example": {"detail": "Monitor service not initialized"}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
}
|
39
|
+
}
|
40
|
+
)
|
41
|
+
async def get_queue_workers(
|
42
|
+
request: Request,
|
43
|
+
namespace: str = Path(..., description="命名空间名称", example="default"),
|
44
|
+
queue_name: str = Path(..., description="队列名称", example="email_queue")
|
45
|
+
) -> Dict[str, Any]:
|
46
|
+
"""
|
47
|
+
## 获取队列的 Worker 列表
|
48
|
+
|
49
|
+
获取指定队列所有 Worker 的实时心跳信息,包括在线状态、处理任务等。
|
50
|
+
|
51
|
+
**返回信息包括**:
|
52
|
+
- Worker ID
|
53
|
+
- 队列名称
|
54
|
+
- 在线状态(online/offline)
|
55
|
+
- 最后心跳时间
|
56
|
+
- 已处理任务数
|
57
|
+
- 当前正在处理的任务 ID
|
58
|
+
|
59
|
+
**使用场景**:
|
60
|
+
- Worker 监控面板
|
61
|
+
- 实时状态查看
|
62
|
+
- 负载分析
|
63
|
+
- 故障诊断
|
64
|
+
|
65
|
+
**示例请求**:
|
66
|
+
```bash
|
67
|
+
curl -X GET "http://localhost:8001/api/v1/workers/default/email_queue"
|
68
|
+
```
|
69
|
+
|
70
|
+
**注意事项**:
|
71
|
+
- Worker 状态基于心跳时间判断,超过心跳超时时间视为离线
|
72
|
+
- 离线 Worker 仍会在列表中显示一段时间
|
73
|
+
- 心跳数据实时更新,可能有轻微延迟
|
74
|
+
"""
|
75
|
+
try:
|
76
|
+
# 从 app.state 获取 monitor 实例
|
77
|
+
if not hasattr(request.app.state, 'monitor'):
|
78
|
+
raise HTTPException(status_code=500, detail="Monitor service not initialized")
|
79
|
+
|
80
|
+
monitor = request.app.state.monitor
|
81
|
+
workers = await monitor.get_worker_heartbeats(queue_name)
|
82
|
+
|
83
|
+
return {
|
84
|
+
"success": True,
|
85
|
+
"namespace": namespace,
|
86
|
+
"queue": queue_name,
|
87
|
+
"workers": workers
|
88
|
+
}
|
89
|
+
except Exception as e:
|
90
|
+
logger.error(f"获取队列 {queue_name} 的 Worker 信息失败: {e}", exc_info=True)
|
91
|
+
raise HTTPException(status_code=500, detail=str(e))
|
92
|
+
|
93
|
+
|
94
|
+
@router.get(
|
95
|
+
"/{namespace}/{queue_name}/summary",
|
96
|
+
summary="获取队列 Worker 汇总统计",
|
97
|
+
description="获取指定队列 Worker 的汇总统计信息,包括总数、在线数、离线数等",
|
98
|
+
response_model=WorkerSummaryResponse,
|
99
|
+
responses={
|
100
|
+
200: {
|
101
|
+
"description": "成功返回汇总统计"
|
102
|
+
},
|
103
|
+
500: {
|
104
|
+
"description": "服务器内部错误"
|
105
|
+
}
|
106
|
+
}
|
107
|
+
)
|
108
|
+
async def get_queue_worker_summary(
|
109
|
+
request: Request,
|
110
|
+
namespace: str = Path(..., description="命名空间名称", example="default"),
|
111
|
+
queue_name: str = Path(..., description="队列名称", example="email_queue"),
|
112
|
+
fast: bool = Query(False, description="是否使用快速模式(不包含历史数据)", example=False)
|
113
|
+
) -> Dict[str, Any]:
|
114
|
+
"""
|
115
|
+
## 获取队列 Worker 汇总统计
|
116
|
+
|
117
|
+
获取指定队列所有 Worker 的汇总统计信息,用于快速了解整体状况。
|
118
|
+
|
119
|
+
**统计指标包括**:
|
120
|
+
- 总 Worker 数
|
121
|
+
- 在线 Worker 数
|
122
|
+
- 离线 Worker 数
|
123
|
+
- 总处理任务数
|
124
|
+
- 平均每个 Worker 处理任务数
|
125
|
+
|
126
|
+
**快速模式说明**:
|
127
|
+
- `fast=false`: 完整模式,包含离线 Worker 历史数据(默认)
|
128
|
+
- `fast=true`: 快速模式,只统计当前在线 Worker,性能更好
|
129
|
+
|
130
|
+
**使用场景**:
|
131
|
+
- 监控看板汇总信息
|
132
|
+
- Worker 集群健康检查
|
133
|
+
- 容量规划
|
134
|
+
- 性能分析
|
135
|
+
|
136
|
+
**示例请求**:
|
137
|
+
```bash
|
138
|
+
# 获取完整统计
|
139
|
+
curl -X GET "http://localhost:8001/api/v1/workers/default/email_queue/summary"
|
140
|
+
|
141
|
+
# 使用快速模式
|
142
|
+
curl -X GET "http://localhost:8001/api/v1/workers/default/email_queue/summary?fast=true"
|
143
|
+
```
|
144
|
+
|
145
|
+
**注意事项**:
|
146
|
+
- 快速模式适合实时监控场景
|
147
|
+
- 完整模式适合分析历史趋势
|
148
|
+
- 统计数据基于心跳时间,可能有轻微延迟
|
149
|
+
"""
|
150
|
+
try:
|
151
|
+
if not hasattr(request.app.state, 'monitor'):
|
152
|
+
raise HTTPException(status_code=500, detail="Monitor service not initialized")
|
153
|
+
|
154
|
+
monitor = request.app.state.monitor
|
155
|
+
|
156
|
+
if fast:
|
157
|
+
summary = await monitor.get_queue_worker_summary_fast(queue_name)
|
158
|
+
else:
|
159
|
+
summary = await monitor.get_queue_worker_summary(queue_name)
|
160
|
+
|
161
|
+
return {
|
162
|
+
"success": True,
|
163
|
+
"namespace": namespace,
|
164
|
+
"queue": queue_name,
|
165
|
+
"summary": summary
|
166
|
+
}
|
167
|
+
except Exception as e:
|
168
|
+
logger.error(f"获取队列 {queue_name} 的 Worker 汇总统计失败: {e}", exc_info=True)
|
169
|
+
raise HTTPException(status_code=500, detail=str(e))
|
170
|
+
|
171
|
+
|
172
|
+
@router.get(
|
173
|
+
"/offline-history",
|
174
|
+
summary="获取全局 Worker 离线历史",
|
175
|
+
description="获取所有 Worker 的离线历史记录,支持时间范围筛选",
|
176
|
+
response_model=WorkerOfflineHistoryResponse,
|
177
|
+
responses={
|
178
|
+
200: {
|
179
|
+
"description": "成功返回离线历史"
|
180
|
+
},
|
181
|
+
500: {
|
182
|
+
"description": "服务器内部错误"
|
183
|
+
}
|
184
|
+
}
|
185
|
+
)
|
186
|
+
async def get_workers_offline_history(
|
187
|
+
request: Request,
|
188
|
+
limit: int = Query(100, ge=1, le=1000, description="返回记录数量限制", example=100),
|
189
|
+
start_time: Optional[float] = Query(None, description="开始时间戳(Unix 时间戳)", example=1697644800),
|
190
|
+
end_time: Optional[float] = Query(None, description="结束时间戳(Unix 时间戳)", example=1697731200)
|
191
|
+
) -> Dict[str, Any]:
|
192
|
+
"""
|
193
|
+
## 获取全局 Worker 离线历史
|
194
|
+
|
195
|
+
获取所有命名空间、所有队列的 Worker 离线历史记录。
|
196
|
+
|
197
|
+
**返回信息包括**:
|
198
|
+
- Worker ID
|
199
|
+
- 队列名称
|
200
|
+
- 离线时间
|
201
|
+
- 最后处理的任务 ID
|
202
|
+
- 离线原因(heartbeat_timeout、shutdown、crash 等)
|
203
|
+
|
204
|
+
**使用场景**:
|
205
|
+
- Worker 稳定性分析
|
206
|
+
- 故障诊断
|
207
|
+
- 历史趋势分析
|
208
|
+
- 运维报表
|
209
|
+
|
210
|
+
**示例请求**:
|
211
|
+
```bash
|
212
|
+
# 获取最近100条离线记录
|
213
|
+
curl -X GET "http://localhost:8001/api/v1/workers/offline-history?limit=100"
|
214
|
+
|
215
|
+
# 获取指定时间范围的离线记录
|
216
|
+
curl -X GET "http://localhost:8001/api/v1/workers/offline-history?start_time=1697644800&end_time=1697731200&limit=50"
|
217
|
+
```
|
218
|
+
|
219
|
+
**注意事项**:
|
220
|
+
- 时间戳使用 Unix 时间戳格式(秒)
|
221
|
+
- 默认返回最近100条记录
|
222
|
+
- 最大可返回1000条记录
|
223
|
+
- 记录按离线时间倒序排列
|
224
|
+
"""
|
225
|
+
try:
|
226
|
+
if not hasattr(request.app.state, 'monitor'):
|
227
|
+
raise HTTPException(status_code=500, detail="Monitor service not initialized")
|
228
|
+
|
229
|
+
monitor = request.app.state.monitor
|
230
|
+
history = await monitor.get_worker_offline_history(limit, start_time, end_time)
|
231
|
+
|
232
|
+
return {
|
233
|
+
"success": True,
|
234
|
+
"history": history,
|
235
|
+
"total": len(history)
|
236
|
+
}
|
237
|
+
except Exception as e:
|
238
|
+
logger.error(f"获取 Worker 离线历史失败: {e}", exc_info=True)
|
239
|
+
raise HTTPException(status_code=500, detail=str(e))
|
240
|
+
|
241
|
+
|
242
|
+
@router.get(
|
243
|
+
"/{namespace}/{queue_name}/offline-history",
|
244
|
+
summary="获取队列 Worker 离线历史",
|
245
|
+
description="获取指定队列的 Worker 离线历史记录",
|
246
|
+
response_model=WorkerOfflineHistoryResponse,
|
247
|
+
responses={
|
248
|
+
200: {
|
249
|
+
"description": "成功返回队列离线历史"
|
250
|
+
},
|
251
|
+
500: {
|
252
|
+
"description": "服务器内部错误"
|
253
|
+
}
|
254
|
+
}
|
255
|
+
)
|
256
|
+
async def get_queue_workers_offline_history(
|
257
|
+
request: Request,
|
258
|
+
namespace: str = Path(..., description="命名空间名称", example="default"),
|
259
|
+
queue_name: str = Path(..., description="队列名称", example="email_queue"),
|
260
|
+
limit: int = Query(100, ge=1, le=1000, description="返回记录数量限制", example=100),
|
261
|
+
start_time: Optional[float] = Query(None, description="开始时间戳(Unix 时间戳)"),
|
262
|
+
end_time: Optional[float] = Query(None, description="结束时间戳(Unix 时间戳)")
|
263
|
+
) -> Dict[str, Any]:
|
264
|
+
"""
|
265
|
+
## 获取队列 Worker 离线历史
|
266
|
+
|
267
|
+
获取指定队列的 Worker 离线历史记录,用于分析特定队列的 Worker 稳定性。
|
268
|
+
|
269
|
+
**使用场景**:
|
270
|
+
- 分析特定队列的 Worker 稳定性
|
271
|
+
- 诊断队列相关的 Worker 问题
|
272
|
+
- 队列维护和优化
|
273
|
+
|
274
|
+
**示例请求**:
|
275
|
+
```bash
|
276
|
+
curl -X GET "http://localhost:8001/api/v1/workers/default/email_queue/offline-history?limit=50"
|
277
|
+
```
|
278
|
+
|
279
|
+
**注意事项**:
|
280
|
+
- 只返回该队列相关的 Worker 离线记录
|
281
|
+
- 如果一个 Worker 服务多个队列,只要包含目标队列就会返回
|
282
|
+
"""
|
283
|
+
try:
|
284
|
+
if not hasattr(request.app.state, 'monitor'):
|
285
|
+
raise HTTPException(status_code=500, detail="Monitor service not initialized")
|
286
|
+
|
287
|
+
monitor = request.app.state.monitor
|
288
|
+
|
289
|
+
# 获取所有历史记录,然后过滤出该队列的
|
290
|
+
all_history = await monitor.get_worker_offline_history(limit * 10, start_time, end_time)
|
291
|
+
queue_history = [
|
292
|
+
record for record in all_history
|
293
|
+
if queue_name in record.get('queues', '').split(',')
|
294
|
+
][:limit]
|
295
|
+
|
296
|
+
return {
|
297
|
+
"success": True,
|
298
|
+
"namespace": namespace,
|
299
|
+
"queue": queue_name,
|
300
|
+
"history": queue_history,
|
301
|
+
"total": len(queue_history)
|
302
|
+
}
|
303
|
+
except Exception as e:
|
304
|
+
logger.error(f"获取队列 {queue_name} 的 Worker 离线历史失败: {e}", exc_info=True)
|
305
|
+
raise HTTPException(status_code=500, detail=str(e))
|
306
|
+
|
307
|
+
|
308
|
+
# ============ Worker 心跳监控 ============
|
309
|
+
|
310
|
+
@router.get(
|
311
|
+
"/heartbeat/stats",
|
312
|
+
summary="获取心跳监控统计",
|
313
|
+
description="获取所有 Worker 的心跳监控统计信息",
|
314
|
+
responses={
|
315
|
+
200: {
|
316
|
+
"description": "成功返回心跳统计",
|
317
|
+
"content": {
|
318
|
+
"application/json": {
|
319
|
+
"example": {
|
320
|
+
"total_workers": 50,
|
321
|
+
"online_workers": 45,
|
322
|
+
"offline_workers": 3,
|
323
|
+
"timeout_workers": 2
|
324
|
+
}
|
325
|
+
}
|
326
|
+
}
|
327
|
+
},
|
328
|
+
500: {
|
329
|
+
"description": "服务器内部错误"
|
330
|
+
}
|
331
|
+
}
|
332
|
+
)
|
333
|
+
async def get_heartbeat_stats(request: Request) -> Dict[str, Any]:
|
334
|
+
"""
|
335
|
+
## 获取心跳监控统计
|
336
|
+
|
337
|
+
获取所有 Worker 的心跳监控统计信息,用于整体健康度监控。
|
338
|
+
|
339
|
+
**统计指标包括**:
|
340
|
+
- 总 Worker 数
|
341
|
+
- 在线 Worker 数
|
342
|
+
- 离线 Worker 数
|
343
|
+
- 心跳超时 Worker 数
|
344
|
+
|
345
|
+
**使用场景**:
|
346
|
+
- 全局监控大盘
|
347
|
+
- 系统健康度评估
|
348
|
+
- 告警规则触发
|
349
|
+
- 运维看板
|
350
|
+
|
351
|
+
**示例请求**:
|
352
|
+
```bash
|
353
|
+
curl -X GET "http://localhost:8001/api/v1/workers/heartbeat/stats"
|
354
|
+
```
|
355
|
+
|
356
|
+
**注意事项**:
|
357
|
+
- 统计数据实时计算
|
358
|
+
- 超时判断基于配置的心跳超时时间
|
359
|
+
- 离线 Worker 会在一定时间后清理
|
360
|
+
"""
|
361
|
+
try:
|
362
|
+
if not hasattr(request.app.state, 'monitor'):
|
363
|
+
raise HTTPException(status_code=500, detail="Monitor service not initialized")
|
364
|
+
|
365
|
+
monitor = request.app.state.monitor
|
366
|
+
stats = await monitor.get_heartbeat_stats()
|
367
|
+
|
368
|
+
return stats
|
369
|
+
except Exception as e:
|
370
|
+
logger.error(f"获取心跳统计信息失败: {e}", exc_info=True)
|
371
|
+
raise HTTPException(status_code=500, detail=str(e))
|
372
|
+
|
373
|
+
|
374
|
+
@router.get(
|
375
|
+
"/heartbeat/{worker_id}",
|
376
|
+
summary="检查 Worker 心跳状态",
|
377
|
+
description="检查指定 Worker 的心跳状态,判断是否在线",
|
378
|
+
responses={
|
379
|
+
200: {
|
380
|
+
"description": "成功返回心跳状态",
|
381
|
+
"content": {
|
382
|
+
"application/json": {
|
383
|
+
"example": {
|
384
|
+
"worker_id": "worker-001",
|
385
|
+
"is_online": True
|
386
|
+
}
|
387
|
+
}
|
388
|
+
}
|
389
|
+
},
|
390
|
+
500: {
|
391
|
+
"description": "服务器内部错误"
|
392
|
+
}
|
393
|
+
}
|
394
|
+
)
|
395
|
+
async def check_worker_heartbeat(
|
396
|
+
request: Request,
|
397
|
+
worker_id: str = Path(..., description="Worker ID", example="worker-001")
|
398
|
+
) -> Dict[str, Any]:
|
399
|
+
"""
|
400
|
+
## 检查 Worker 心跳状态
|
401
|
+
|
402
|
+
检查指定 Worker 的心跳状态,判断该 Worker 是否在线。
|
403
|
+
|
404
|
+
**返回信息**:
|
405
|
+
- Worker ID
|
406
|
+
- 是否在线(true/false)
|
407
|
+
|
408
|
+
**使用场景**:
|
409
|
+
- 故障诊断
|
410
|
+
- Worker 健康检查
|
411
|
+
- 自动化运维脚本
|
412
|
+
- 监控告警
|
413
|
+
|
414
|
+
**示例请求**:
|
415
|
+
```bash
|
416
|
+
curl -X GET "http://localhost:8001/api/v1/workers/heartbeat/worker-001"
|
417
|
+
```
|
418
|
+
|
419
|
+
**判断逻辑**:
|
420
|
+
- 如果 Worker 最后心跳时间在超时时间内,返回 `is_online: true`
|
421
|
+
- 如果超过超时时间,返回 `is_online: false`
|
422
|
+
- 如果从未收到心跳,返回 `is_online: false`
|
423
|
+
|
424
|
+
**注意事项**:
|
425
|
+
- 在线状态基于最后心跳时间判断
|
426
|
+
- 默认心跳超时时间为30秒(可配置)
|
427
|
+
- Worker ID 区分大小写
|
428
|
+
"""
|
429
|
+
try:
|
430
|
+
if not hasattr(request.app.state, 'monitor'):
|
431
|
+
raise HTTPException(status_code=500, detail="Monitor service not initialized")
|
432
|
+
|
433
|
+
monitor = request.app.state.monitor
|
434
|
+
is_online = await monitor.check_worker_heartbeat(worker_id)
|
435
|
+
|
436
|
+
return {
|
437
|
+
"worker_id": worker_id,
|
438
|
+
"is_online": is_online
|
439
|
+
}
|
440
|
+
except Exception as e:
|
441
|
+
logger.error(f"检查 Worker {worker_id} 心跳状态失败: {e}", exc_info=True)
|
442
|
+
raise HTTPException(status_code=500, detail=str(e))
|