jettask 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/constants.py +213 -0
- jettask/core/app.py +525 -205
- jettask/core/cli.py +193 -185
- jettask/core/consumer_manager.py +126 -34
- jettask/core/context.py +3 -0
- jettask/core/enums.py +137 -0
- jettask/core/event_pool.py +501 -168
- jettask/core/message.py +147 -0
- jettask/core/offline_worker_recovery.py +181 -114
- jettask/core/task.py +10 -174
- jettask/core/task_batch.py +153 -0
- jettask/core/unified_manager_base.py +243 -0
- jettask/core/worker_scanner.py +54 -54
- jettask/executors/asyncio.py +184 -64
- jettask/webui/backend/config.py +51 -0
- jettask/webui/backend/data_access.py +2083 -92
- jettask/webui/backend/data_api.py +3294 -0
- jettask/webui/backend/dependencies.py +261 -0
- jettask/webui/backend/init_meta_db.py +158 -0
- jettask/webui/backend/main.py +1358 -69
- jettask/webui/backend/main_unified.py +78 -0
- jettask/webui/backend/main_v2.py +394 -0
- jettask/webui/backend/namespace_api.py +295 -0
- jettask/webui/backend/namespace_api_old.py +294 -0
- jettask/webui/backend/namespace_data_access.py +611 -0
- jettask/webui/backend/queue_backlog_api.py +727 -0
- jettask/webui/backend/queue_stats_v2.py +521 -0
- jettask/webui/backend/redis_monitor_api.py +476 -0
- jettask/webui/backend/unified_api_router.py +1601 -0
- jettask/webui/db_init.py +204 -32
- jettask/webui/frontend/package-lock.json +492 -1
- jettask/webui/frontend/package.json +4 -1
- jettask/webui/frontend/src/App.css +105 -7
- jettask/webui/frontend/src/App.jsx +49 -20
- jettask/webui/frontend/src/components/NamespaceSelector.jsx +166 -0
- jettask/webui/frontend/src/components/QueueBacklogChart.jsx +298 -0
- jettask/webui/frontend/src/components/QueueBacklogTrend.jsx +638 -0
- jettask/webui/frontend/src/components/QueueDetailsTable.css +65 -0
- jettask/webui/frontend/src/components/QueueDetailsTable.jsx +487 -0
- jettask/webui/frontend/src/components/QueueDetailsTableV2.jsx +465 -0
- jettask/webui/frontend/src/components/ScheduledTaskFilter.jsx +423 -0
- jettask/webui/frontend/src/components/TaskFilter.jsx +425 -0
- jettask/webui/frontend/src/components/TimeRangeSelector.css +21 -0
- jettask/webui/frontend/src/components/TimeRangeSelector.jsx +160 -0
- jettask/webui/frontend/src/components/layout/AppLayout.css +95 -0
- jettask/webui/frontend/src/components/layout/AppLayout.jsx +49 -0
- jettask/webui/frontend/src/components/layout/Header.css +34 -10
- jettask/webui/frontend/src/components/layout/Header.jsx +31 -23
- jettask/webui/frontend/src/components/layout/SideMenu.css +137 -0
- jettask/webui/frontend/src/components/layout/SideMenu.jsx +209 -0
- jettask/webui/frontend/src/components/layout/TabsNav.css +244 -0
- jettask/webui/frontend/src/components/layout/TabsNav.jsx +206 -0
- jettask/webui/frontend/src/components/layout/UserInfo.css +197 -0
- jettask/webui/frontend/src/components/layout/UserInfo.jsx +197 -0
- jettask/webui/frontend/src/contexts/NamespaceContext.jsx +72 -0
- jettask/webui/frontend/src/contexts/TabsContext.backup.jsx +245 -0
- jettask/webui/frontend/src/main.jsx +1 -0
- jettask/webui/frontend/src/pages/Alerts.jsx +684 -0
- jettask/webui/frontend/src/pages/Dashboard.jsx +1330 -0
- jettask/webui/frontend/src/pages/QueueDetail.jsx +1109 -10
- jettask/webui/frontend/src/pages/QueueMonitor.jsx +236 -115
- jettask/webui/frontend/src/pages/Queues.jsx +5 -1
- jettask/webui/frontend/src/pages/ScheduledTasks.jsx +809 -0
- jettask/webui/frontend/src/pages/Settings.jsx +800 -0
- jettask/webui/frontend/src/services/api.js +7 -5
- jettask/webui/frontend/src/utils/suppressWarnings.js +22 -0
- jettask/webui/frontend/src/utils/userPreferences.js +154 -0
- jettask/webui/multi_namespace_consumer.py +543 -0
- jettask/webui/pg_consumer.py +983 -246
- jettask/webui/static/dist/assets/index-7129cfe1.css +1 -0
- jettask/webui/static/dist/assets/index-8d1935cc.js +774 -0
- jettask/webui/static/dist/index.html +2 -2
- jettask/webui/task_center.py +216 -0
- jettask/webui/task_center_client.py +150 -0
- jettask/webui/unified_consumer_manager.py +193 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/METADATA +1 -1
- jettask-0.2.4.dist-info/RECORD +134 -0
- jettask/webui/pg_consumer_slow.py +0 -1099
- jettask/webui/pg_consumer_test.py +0 -678
- jettask/webui/static/dist/assets/index-823408e8.css +0 -1
- jettask/webui/static/dist/assets/index-9968b0b8.js +0 -543
- jettask/webui/test_pg_consumer_recovery.py +0 -547
- jettask/webui/test_recovery_simple.py +0 -492
- jettask/webui/test_self_recovery.py +0 -467
- jettask-0.2.1.dist-info/RECORD +0 -91
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/WHEEL +0 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,3294 @@
|
|
1
|
+
"""
|
2
|
+
数据查询API路由
|
3
|
+
所有接口都需要指定namespace参数
|
4
|
+
"""
|
5
|
+
from fastapi import APIRouter, HTTPException, Query, Request
|
6
|
+
from typing import List, Dict, Optional
|
7
|
+
from pydantic import BaseModel
|
8
|
+
from datetime import datetime, timedelta, timezone
|
9
|
+
import logging
|
10
|
+
import time
|
11
|
+
import traceback
|
12
|
+
from sqlalchemy import text
|
13
|
+
|
14
|
+
from namespace_data_access import get_namespace_data_access
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
router = APIRouter(prefix="/api/data", tags=["data"])
|
19
|
+
|
20
|
+
# 获取全局数据访问实例
|
21
|
+
data_access = get_namespace_data_access()
|
22
|
+
|
23
|
+
|
24
|
+
async def handle_database_connection_error(e: Exception, namespace: str, operation_name: str):
|
25
|
+
"""
|
26
|
+
处理数据库连接异常,重置连接并抛出适当的HTTP异常
|
27
|
+
|
28
|
+
Args:
|
29
|
+
e: 原始异常
|
30
|
+
namespace: 命名空间名称
|
31
|
+
operation_name: 操作名称(用于错误消息)
|
32
|
+
"""
|
33
|
+
error_msg = str(e)
|
34
|
+
|
35
|
+
# 检查是否为连接相关的错误
|
36
|
+
is_connection_error = (
|
37
|
+
"password authentication failed" in error_msg or
|
38
|
+
"connection failed" in error_msg or
|
39
|
+
"could not connect to server" in error_msg
|
40
|
+
)
|
41
|
+
|
42
|
+
if is_connection_error:
|
43
|
+
# 重置连接以便重新初始化
|
44
|
+
try:
|
45
|
+
await data_access.reset_connection(namespace)
|
46
|
+
logger.info(f"已重置命名空间 {namespace} 的数据库连接")
|
47
|
+
except Exception as reset_error:
|
48
|
+
logger.error(f"重置命名空间 {namespace} 连接失败: {reset_error}")
|
49
|
+
|
50
|
+
if "password authentication failed" in error_msg or "connection failed" in error_msg:
|
51
|
+
raise HTTPException(
|
52
|
+
status_code=500,
|
53
|
+
detail=f"命名空间 '{namespace}' 的数据库连接失败,请检查该命名空间的数据库配置"
|
54
|
+
)
|
55
|
+
else:
|
56
|
+
raise HTTPException(
|
57
|
+
status_code=500,
|
58
|
+
detail=f"命名空间 '{namespace}' 的数据库服务不可用,请检查数据库是否正常运行"
|
59
|
+
)
|
60
|
+
else:
|
61
|
+
# 其他类型的错误
|
62
|
+
raise HTTPException(status_code=500, detail=f"{operation_name}失败: {error_msg}")
|
63
|
+
|
64
|
+
|
65
|
+
def build_queue_filter_and_params(queues: Optional[str] = None):
|
66
|
+
"""
|
67
|
+
构建队列筛选条件和参数
|
68
|
+
|
69
|
+
Args:
|
70
|
+
queues: 逗号分隔的队列名称列表
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
tuple: (queue_filter_sql, queue_list, queue_params_dict)
|
74
|
+
"""
|
75
|
+
queue_list = []
|
76
|
+
if queues:
|
77
|
+
queue_list = [q.strip() for q in queues.split(',') if q.strip()]
|
78
|
+
|
79
|
+
queue_filter = ""
|
80
|
+
queue_params = {}
|
81
|
+
|
82
|
+
if queue_list:
|
83
|
+
queue_placeholders = ','.join([f':queue_{i}' for i in range(len(queue_list))])
|
84
|
+
queue_filter = f"AND t.queue IN ({queue_placeholders})"
|
85
|
+
|
86
|
+
# 添加队列参数
|
87
|
+
for i, queue in enumerate(queue_list):
|
88
|
+
queue_params[f'queue_{i}'] = queue
|
89
|
+
|
90
|
+
return queue_filter, queue_list, queue_params
|
91
|
+
|
92
|
+
|
93
|
+
class TimeRangeQuery(BaseModel):
|
94
|
+
start_time: Optional[datetime] = None
|
95
|
+
end_time: Optional[datetime] = None
|
96
|
+
time_range: Optional[str] = "15m"
|
97
|
+
queues: Optional[List[str]] = None
|
98
|
+
filters: Optional[List[Dict]] = None
|
99
|
+
|
100
|
+
|
101
|
+
class TimeRangeResult:
|
102
|
+
"""时间范围处理结果"""
|
103
|
+
def __init__(self, start_time: datetime, end_time: datetime, interval: str, interval_seconds: int, granularity: str):
|
104
|
+
self.start_time = start_time
|
105
|
+
self.end_time = end_time
|
106
|
+
self.interval = interval
|
107
|
+
self.interval_seconds = interval_seconds
|
108
|
+
self.granularity = granularity
|
109
|
+
|
110
|
+
|
111
|
+
def parse_time_range_query(query: TimeRangeQuery) -> TimeRangeResult:
|
112
|
+
"""
|
113
|
+
解析TimeRangeQuery对象,返回时间范围和间隔信息
|
114
|
+
|
115
|
+
Args:
|
116
|
+
query: TimeRangeQuery对象
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
TimeRangeResult对象,包含start_time, end_time, interval, interval_seconds, granularity
|
120
|
+
"""
|
121
|
+
end_time = datetime.now(timezone.utc)
|
122
|
+
|
123
|
+
if query.time_range and query.time_range != 'custom':
|
124
|
+
# 解析时间范围字符串
|
125
|
+
if query.time_range.endswith('m'):
|
126
|
+
minutes = int(query.time_range[:-1])
|
127
|
+
start_time = end_time - timedelta(minutes=minutes)
|
128
|
+
elif query.time_range.endswith('h'):
|
129
|
+
hours = int(query.time_range[:-1])
|
130
|
+
start_time = end_time - timedelta(hours=hours)
|
131
|
+
elif query.time_range.endswith('d'):
|
132
|
+
days = int(query.time_range[:-1])
|
133
|
+
start_time = end_time - timedelta(days=days)
|
134
|
+
else:
|
135
|
+
start_time = end_time - timedelta(minutes=15)
|
136
|
+
else:
|
137
|
+
# 使用自定义时间范围或默认值
|
138
|
+
start_time = query.start_time or (end_time - timedelta(minutes=15))
|
139
|
+
end_time = query.end_time or end_time
|
140
|
+
|
141
|
+
# 动态计算时间间隔
|
142
|
+
return calculate_dynamic_interval(start_time, end_time)
|
143
|
+
|
144
|
+
|
145
|
+
def parse_time_range_string(time_range: str) -> TimeRangeResult:
|
146
|
+
"""
|
147
|
+
解析时间范围字符串,返回时间范围和间隔信息
|
148
|
+
|
149
|
+
Args:
|
150
|
+
time_range: 时间范围字符串,如'1h', '30m', '7d'
|
151
|
+
|
152
|
+
Returns:
|
153
|
+
TimeRangeResult对象,包含start_time, end_time, interval, interval_seconds, granularity
|
154
|
+
"""
|
155
|
+
end_time = datetime.now(timezone.utc)
|
156
|
+
|
157
|
+
if time_range.endswith('m'):
|
158
|
+
minutes = int(time_range[:-1])
|
159
|
+
start_time = end_time - timedelta(minutes=minutes)
|
160
|
+
elif time_range.endswith('h'):
|
161
|
+
hours = int(time_range[:-1])
|
162
|
+
start_time = end_time - timedelta(hours=hours)
|
163
|
+
elif time_range.endswith('d'):
|
164
|
+
days = int(time_range[:-1])
|
165
|
+
start_time = end_time - timedelta(days=days)
|
166
|
+
else:
|
167
|
+
start_time = end_time - timedelta(hours=1)
|
168
|
+
|
169
|
+
# 动态计算时间间隔
|
170
|
+
return calculate_dynamic_interval(start_time, end_time)
|
171
|
+
|
172
|
+
|
173
|
+
def calculate_dynamic_interval(start_time: datetime, end_time: datetime, target_points: int = 200) -> TimeRangeResult:
|
174
|
+
"""
|
175
|
+
根据时间范围动态计算合适的时间间隔
|
176
|
+
|
177
|
+
Args:
|
178
|
+
start_time: 开始时间
|
179
|
+
end_time: 结束时间
|
180
|
+
target_points: 目标数据点数量,默认200
|
181
|
+
|
182
|
+
Returns:
|
183
|
+
TimeRangeResult对象,包含时间间隔信息
|
184
|
+
"""
|
185
|
+
duration = (end_time - start_time).total_seconds()
|
186
|
+
ideal_interval_seconds = duration / target_points
|
187
|
+
|
188
|
+
# 选择合适的间隔
|
189
|
+
if ideal_interval_seconds <= 1:
|
190
|
+
interval_seconds = 1
|
191
|
+
interval = '1 seconds'
|
192
|
+
granularity = 'second'
|
193
|
+
elif ideal_interval_seconds <= 5:
|
194
|
+
interval_seconds = 5
|
195
|
+
interval = '5 seconds'
|
196
|
+
granularity = 'second'
|
197
|
+
elif ideal_interval_seconds <= 10:
|
198
|
+
interval_seconds = 10
|
199
|
+
interval = '10 seconds'
|
200
|
+
granularity = 'second'
|
201
|
+
elif ideal_interval_seconds <= 30:
|
202
|
+
interval_seconds = 30
|
203
|
+
interval = '30 seconds'
|
204
|
+
granularity = 'second'
|
205
|
+
elif ideal_interval_seconds <= 60:
|
206
|
+
interval_seconds = 60
|
207
|
+
interval = '1 minute'
|
208
|
+
granularity = 'minute'
|
209
|
+
elif ideal_interval_seconds <= 120:
|
210
|
+
interval_seconds = 120
|
211
|
+
interval = '2 minutes'
|
212
|
+
granularity = 'minute'
|
213
|
+
elif ideal_interval_seconds <= 300:
|
214
|
+
interval_seconds = 300
|
215
|
+
interval = '5 minutes'
|
216
|
+
granularity = 'minute'
|
217
|
+
elif ideal_interval_seconds <= 600:
|
218
|
+
interval_seconds = 600
|
219
|
+
interval = '10 minutes'
|
220
|
+
granularity = 'minute'
|
221
|
+
elif ideal_interval_seconds <= 1800:
|
222
|
+
interval_seconds = 1800
|
223
|
+
interval = '30 minutes'
|
224
|
+
granularity = 'minute'
|
225
|
+
elif ideal_interval_seconds <= 3600:
|
226
|
+
interval_seconds = 3600
|
227
|
+
interval = '1 hour'
|
228
|
+
granularity = 'hour'
|
229
|
+
else:
|
230
|
+
interval_seconds = 86400
|
231
|
+
interval = '1 day'
|
232
|
+
granularity = 'day'
|
233
|
+
|
234
|
+
return TimeRangeResult(start_time, end_time, interval, interval_seconds, granularity)
|
235
|
+
|
236
|
+
|
237
|
+
class QueueStatsResponse(BaseModel):
|
238
|
+
queue_name: str
|
239
|
+
length: int
|
240
|
+
consumer_groups: int
|
241
|
+
consumers: int
|
242
|
+
pending: int
|
243
|
+
|
244
|
+
|
245
|
+
class TaskDetailResponse(BaseModel):
|
246
|
+
id: str
|
247
|
+
status: str
|
248
|
+
name: str
|
249
|
+
queue: str
|
250
|
+
worker_id: Optional[str]
|
251
|
+
created_at: Optional[str]
|
252
|
+
started_at: Optional[str]
|
253
|
+
completed_at: Optional[str]
|
254
|
+
result: Optional[str]
|
255
|
+
error: Optional[str]
|
256
|
+
retry_count: int
|
257
|
+
|
258
|
+
|
259
|
+
class ScheduledTaskResponse(BaseModel):
|
260
|
+
id: int
|
261
|
+
name: str
|
262
|
+
queue: str
|
263
|
+
schedule: str
|
264
|
+
task_data: dict
|
265
|
+
enabled: bool
|
266
|
+
last_run_at: Optional[str]
|
267
|
+
next_run_at: Optional[str]
|
268
|
+
execution_count: int
|
269
|
+
created_at: Optional[str]
|
270
|
+
updated_at: Optional[str]
|
271
|
+
description: Optional[str]
|
272
|
+
max_retries: Optional[int]
|
273
|
+
retry_delay: Optional[int]
|
274
|
+
timeout: Optional[int]
|
275
|
+
priority: Optional[int]
|
276
|
+
|
277
|
+
|
278
|
+
@router.get("/namespaces", response_model=List[dict])
|
279
|
+
async def list_available_namespaces():
|
280
|
+
"""获取所有可用的命名空间列表"""
|
281
|
+
try:
|
282
|
+
namespaces = await data_access.manager.list_namespaces()
|
283
|
+
# 只返回基本信息
|
284
|
+
return [
|
285
|
+
{
|
286
|
+
'id': ns.get('id'),
|
287
|
+
'name': ns.get('name'),
|
288
|
+
'description': ns.get('description', ''),
|
289
|
+
'created_at': ns.get('created_at')
|
290
|
+
}
|
291
|
+
for ns in namespaces
|
292
|
+
]
|
293
|
+
except Exception as e:
|
294
|
+
logger.error(f"获取命名空间列表失败: {e}")
|
295
|
+
traceback.print_exc()
|
296
|
+
raise HTTPException(status_code=500, detail=str(e))
|
297
|
+
|
298
|
+
|
299
|
+
@router.get("/queues/{namespace}", response_model=List[QueueStatsResponse])
|
300
|
+
async def get_queue_stats(namespace: str):
|
301
|
+
"""
|
302
|
+
获取指定命名空间的队列统计信息
|
303
|
+
|
304
|
+
Args:
|
305
|
+
namespace: 命名空间名称
|
306
|
+
"""
|
307
|
+
try:
|
308
|
+
stats = await data_access.get_queue_stats(namespace)
|
309
|
+
return stats
|
310
|
+
except Exception as e:
|
311
|
+
logger.error(f"获取队列统计失败: {e}")
|
312
|
+
traceback.print_exc()
|
313
|
+
raise HTTPException(status_code=500, detail=str(e))
|
314
|
+
|
315
|
+
|
316
|
+
@router.get("/tasks/{namespace}/{task_id}", response_model=TaskDetailResponse)
|
317
|
+
async def get_task_detail(namespace: str, task_id: str):
|
318
|
+
"""
|
319
|
+
获取指定命名空间中的任务详情
|
320
|
+
|
321
|
+
Args:
|
322
|
+
namespace: 命名空间名称
|
323
|
+
task_id: 任务ID
|
324
|
+
"""
|
325
|
+
try:
|
326
|
+
task = await data_access.get_task_detail(namespace, task_id)
|
327
|
+
if not task:
|
328
|
+
raise HTTPException(status_code=404, detail="Task not found")
|
329
|
+
return task
|
330
|
+
except HTTPException:
|
331
|
+
raise
|
332
|
+
except Exception as e:
|
333
|
+
logger.error(f"获取任务详情失败: {e}")
|
334
|
+
traceback.print_exc()
|
335
|
+
raise HTTPException(status_code=500, detail=str(e))
|
336
|
+
|
337
|
+
|
338
|
+
@router.get("/scheduled-tasks/{namespace}")
|
339
|
+
async def get_scheduled_tasks(
|
340
|
+
namespace: str,
|
341
|
+
limit: int = Query(100, description="返回记录数"),
|
342
|
+
offset: int = Query(0, description="偏移量")
|
343
|
+
):
|
344
|
+
"""
|
345
|
+
获取指定命名空间的定时任务列表
|
346
|
+
|
347
|
+
Args:
|
348
|
+
namespace: 命名空间名称
|
349
|
+
limit: 返回记录数
|
350
|
+
offset: 偏移量
|
351
|
+
"""
|
352
|
+
try:
|
353
|
+
result = await data_access.get_scheduled_tasks(namespace, limit, offset)
|
354
|
+
return result
|
355
|
+
except Exception as e:
|
356
|
+
logger.error(f"获取定时任务列表失败: {e}")
|
357
|
+
traceback.print_exc()
|
358
|
+
raise HTTPException(status_code=500, detail=str(e))
|
359
|
+
|
360
|
+
|
361
|
+
@router.get("/queue-history/{namespace}/{queue_name}")
|
362
|
+
async def get_queue_history(
|
363
|
+
namespace: str,
|
364
|
+
queue_name: str,
|
365
|
+
hours: int = Query(24, description="历史时间范围(小时)"),
|
366
|
+
interval: int = Query(1, description="时间间隔(小时)")
|
367
|
+
):
|
368
|
+
"""
|
369
|
+
获取指定命名空间中队列的历史数据
|
370
|
+
|
371
|
+
Args:
|
372
|
+
namespace: 命名空间名称
|
373
|
+
queue_name: 队列名称
|
374
|
+
hours: 历史时间范围(小时)
|
375
|
+
interval: 时间间隔(小时)
|
376
|
+
"""
|
377
|
+
try:
|
378
|
+
history = await data_access.get_queue_history(
|
379
|
+
namespace, queue_name, hours, interval
|
380
|
+
)
|
381
|
+
return history
|
382
|
+
except Exception as e:
|
383
|
+
logger.error(f"获取队列历史数据失败: {e}")
|
384
|
+
traceback.print_exc()
|
385
|
+
raise HTTPException(status_code=500, detail=str(e))
|
386
|
+
|
387
|
+
|
388
|
+
@router.post("/queue-timeline/{namespace}")
|
389
|
+
async def get_queue_timeline(namespace: str, query: TimeRangeQuery):
|
390
|
+
"""
|
391
|
+
获取指定命名空间的队列时间线数据
|
392
|
+
|
393
|
+
Args:
|
394
|
+
namespace: 命名空间名称
|
395
|
+
query: 时间范围查询参数
|
396
|
+
"""
|
397
|
+
try:
|
398
|
+
# 使用路径参数中的namespace(TimeRangeQuery没有namespace属性)
|
399
|
+
|
400
|
+
conn = await data_access.manager.get_connection(namespace)
|
401
|
+
|
402
|
+
# 使用公共工具函数处理时间范围
|
403
|
+
time_range_result = parse_time_range_query(query)
|
404
|
+
start_time = time_range_result.start_time
|
405
|
+
end_time = time_range_result.end_time
|
406
|
+
|
407
|
+
# 如果没有PostgreSQL配置,返回模拟数据
|
408
|
+
if not conn.pg_config:
|
409
|
+
# 生成模拟的时序数据
|
410
|
+
timeline_data = []
|
411
|
+
duration = (end_time - start_time).total_seconds()
|
412
|
+
num_points = min(50, max(10, int(duration / 60))) # 10-50个数据点
|
413
|
+
|
414
|
+
for i in range(num_points):
|
415
|
+
timestamp = start_time + timedelta(seconds=i * duration / num_points)
|
416
|
+
for queue_name in (query.queues or ['default']):
|
417
|
+
timeline_data.append({
|
418
|
+
'time': timestamp.isoformat(),
|
419
|
+
'queue': queue_name,
|
420
|
+
'value': 100 + i * 2 # 模拟增长
|
421
|
+
})
|
422
|
+
|
423
|
+
return {
|
424
|
+
"data": timeline_data,
|
425
|
+
"granularity": "1m"
|
426
|
+
}
|
427
|
+
|
428
|
+
# 从PostgreSQL查询时序数据
|
429
|
+
async with await conn.get_pg_session() as session:
|
430
|
+
try:
|
431
|
+
# 动态计算时间间隔,目标是生成约200个时间点
|
432
|
+
duration = (end_time - start_time).total_seconds()
|
433
|
+
TARGET_POINTS = 200 # 目标数据点数
|
434
|
+
ideal_interval_seconds = duration / TARGET_POINTS
|
435
|
+
|
436
|
+
# 将间隔秒数规范化到合理的值
|
437
|
+
if ideal_interval_seconds <= 1:
|
438
|
+
interval = '1 second'
|
439
|
+
granularity = 'second'
|
440
|
+
elif ideal_interval_seconds <= 5:
|
441
|
+
interval = '5 seconds'
|
442
|
+
granularity = 'second'
|
443
|
+
elif ideal_interval_seconds <= 10:
|
444
|
+
interval = '10 seconds'
|
445
|
+
granularity = 'second'
|
446
|
+
elif ideal_interval_seconds <= 30:
|
447
|
+
interval = '30 seconds'
|
448
|
+
granularity = 'second'
|
449
|
+
elif ideal_interval_seconds <= 60:
|
450
|
+
interval = '1 minute'
|
451
|
+
granularity = 'minute'
|
452
|
+
elif ideal_interval_seconds <= 120:
|
453
|
+
interval = '2 minutes'
|
454
|
+
granularity = 'minute'
|
455
|
+
elif ideal_interval_seconds <= 300:
|
456
|
+
interval = '5 minutes'
|
457
|
+
granularity = 'minute'
|
458
|
+
elif ideal_interval_seconds <= 600:
|
459
|
+
interval = '10 minutes'
|
460
|
+
granularity = 'minute'
|
461
|
+
elif ideal_interval_seconds <= 900:
|
462
|
+
interval = '15 minutes'
|
463
|
+
granularity = 'minute'
|
464
|
+
elif ideal_interval_seconds <= 1800:
|
465
|
+
interval = '30 minutes'
|
466
|
+
granularity = 'minute'
|
467
|
+
elif ideal_interval_seconds <= 3600:
|
468
|
+
interval = '1 hour'
|
469
|
+
granularity = 'hour'
|
470
|
+
elif ideal_interval_seconds <= 7200:
|
471
|
+
interval = '2 hours'
|
472
|
+
granularity = 'hour'
|
473
|
+
elif ideal_interval_seconds <= 14400:
|
474
|
+
interval = '4 hours'
|
475
|
+
granularity = 'hour'
|
476
|
+
elif ideal_interval_seconds <= 21600:
|
477
|
+
interval = '6 hours'
|
478
|
+
granularity = 'hour'
|
479
|
+
elif ideal_interval_seconds <= 43200:
|
480
|
+
interval = '12 hours'
|
481
|
+
granularity = 'hour'
|
482
|
+
else:
|
483
|
+
interval = '1 day'
|
484
|
+
granularity = 'day'
|
485
|
+
|
486
|
+
timeline_data = []
|
487
|
+
|
488
|
+
# 计算间隔秒数用于时间桶对齐
|
489
|
+
interval_seconds_map = {
|
490
|
+
'1 second': 1, '5 seconds': 5, '10 seconds': 10, '30 seconds': 30,
|
491
|
+
'1 minute': 60, '2 minutes': 120, '5 minutes': 300, '10 minutes': 600,
|
492
|
+
'15 minutes': 900, '30 minutes': 1800, '1 hour': 3600, '2 hours': 7200,
|
493
|
+
'4 hours': 14400, '6 hours': 21600, '12 hours': 43200, '1 day': 86400
|
494
|
+
}
|
495
|
+
interval_seconds = interval_seconds_map.get(interval, 60) # 默认1分钟
|
496
|
+
|
497
|
+
for queue_name in (query.queues or []):
|
498
|
+
# 使用对齐到固定边界的时间序列生成方案
|
499
|
+
# 注意:需要匹配基础队列名及其所有优先级队列
|
500
|
+
query_sql = text(f"""
|
501
|
+
WITH time_series AS (
|
502
|
+
-- 生成对齐到固定边界的时间序列
|
503
|
+
SELECT generate_series(
|
504
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM CAST(:start_time AS timestamptz)) / {interval_seconds}) * {interval_seconds}),
|
505
|
+
to_timestamp(CEILING(EXTRACT(epoch FROM CAST(:end_time AS timestamptz)) / {interval_seconds}) * {interval_seconds} + {interval_seconds}),
|
506
|
+
CAST(:interval_val AS interval)
|
507
|
+
) AS time_bucket
|
508
|
+
),
|
509
|
+
task_counts AS (
|
510
|
+
SELECT
|
511
|
+
-- 任务时间也对齐到相同边界
|
512
|
+
to_timestamp(
|
513
|
+
FLOOR(EXTRACT(epoch FROM t.created_at) / {interval_seconds}) * {interval_seconds}
|
514
|
+
) AS time_bucket,
|
515
|
+
COUNT(t.stream_id) as count,
|
516
|
+
COUNT(CASE WHEN t.stream_id NOT IN (SELECT stream_id FROM task_runs) THEN 1 END) as pending,
|
517
|
+
COUNT(CASE WHEN EXISTS (SELECT 1 FROM task_runs tr WHERE tr.stream_id = t.stream_id AND tr.status = 'pending') THEN 1 END) as processing
|
518
|
+
FROM tasks t
|
519
|
+
WHERE t.namespace = :namespace
|
520
|
+
-- 匹配基础队列名和所有优先级队列(如 shared_queue, shared_queue:0, shared_queue:5 等)
|
521
|
+
AND (t.queue = :queue_name OR t.queue LIKE :queue_pattern)
|
522
|
+
AND t.created_at >= :start_time
|
523
|
+
AND t.created_at <= :end_time
|
524
|
+
GROUP BY 1
|
525
|
+
)
|
526
|
+
SELECT
|
527
|
+
ts.time_bucket,
|
528
|
+
COALESCE(tc.count, 0) as value,
|
529
|
+
COALESCE(tc.pending, 0) as pending,
|
530
|
+
COALESCE(tc.processing, 0) as processing
|
531
|
+
FROM time_series ts
|
532
|
+
LEFT JOIN task_counts tc ON ts.time_bucket = tc.time_bucket
|
533
|
+
ORDER BY ts.time_bucket
|
534
|
+
""")
|
535
|
+
|
536
|
+
result = await session.execute(query_sql, {
|
537
|
+
'namespace': namespace,
|
538
|
+
'queue_name': queue_name,
|
539
|
+
'queue_pattern': f'{queue_name}:%', # 匹配所有优先级队列
|
540
|
+
'start_time': start_time,
|
541
|
+
'end_time': end_time,
|
542
|
+
'interval_val': interval
|
543
|
+
})
|
544
|
+
|
545
|
+
# 转换结果为列表以便获取索引
|
546
|
+
rows = list(result)
|
547
|
+
end_index = len(rows) - 1
|
548
|
+
|
549
|
+
for idx, row in enumerate(rows):
|
550
|
+
timeline_data.append({
|
551
|
+
'time': row.time_bucket.isoformat(),
|
552
|
+
'queue': queue_name,
|
553
|
+
'value': int(row.value) or None if idx > 0 and end_index != idx else int(row.value)
|
554
|
+
})
|
555
|
+
|
556
|
+
return {
|
557
|
+
"data": timeline_data,
|
558
|
+
"granularity": granularity
|
559
|
+
}
|
560
|
+
|
561
|
+
except Exception as e:
|
562
|
+
logger.warning(f"查询时序数据失败,返回当前快照: {e}")
|
563
|
+
traceback.print_exc()
|
564
|
+
|
565
|
+
# 如果查询失败,返回Redis当前快照
|
566
|
+
redis_client = await conn.get_redis_client()
|
567
|
+
try:
|
568
|
+
timeline_data = []
|
569
|
+
for queue_name in (query.queues or []):
|
570
|
+
queue_key = f"{conn.redis_prefix}:QUEUE:{queue_name}"
|
571
|
+
queue_len = await redis_client.xlen(queue_key)
|
572
|
+
|
573
|
+
timeline_data.append({
|
574
|
+
'time': end_time.isoformat(),
|
575
|
+
'queue': queue_name,
|
576
|
+
'value': queue_len
|
577
|
+
})
|
578
|
+
|
579
|
+
return {
|
580
|
+
"data": timeline_data,
|
581
|
+
"granularity": "snapshot"
|
582
|
+
}
|
583
|
+
finally:
|
584
|
+
await redis_client.aclose()
|
585
|
+
|
586
|
+
except Exception as e:
|
587
|
+
logger.error(f"获取队列时间线失败: {e}")
|
588
|
+
traceback.print_exc()
|
589
|
+
raise HTTPException(status_code=500, detail=str(e))
|
590
|
+
|
591
|
+
|
592
|
+
@router.get("/queue-consumers/{namespace}/{queue_name}")
|
593
|
+
async def get_queue_consumers(namespace: str, queue_name: str):
|
594
|
+
"""
|
595
|
+
获取指定命名空间中队列的消费者信息
|
596
|
+
|
597
|
+
Args:
|
598
|
+
namespace: 命名空间名称
|
599
|
+
queue_name: 队列名称
|
600
|
+
"""
|
601
|
+
try:
|
602
|
+
conn = await data_access.manager.get_connection(namespace)
|
603
|
+
redis_client = await conn.get_redis_client()
|
604
|
+
|
605
|
+
try:
|
606
|
+
queue_key = f"{conn.redis_prefix}:QUEUE:{queue_name}"
|
607
|
+
|
608
|
+
# 获取消费组信息
|
609
|
+
groups_info = await redis_client.xinfo_groups(queue_key)
|
610
|
+
|
611
|
+
result = {
|
612
|
+
'queue_name': queue_name,
|
613
|
+
'consumer_groups': []
|
614
|
+
}
|
615
|
+
|
616
|
+
for group in groups_info:
|
617
|
+
# 获取消费者详情
|
618
|
+
consumers_info = await redis_client.xinfo_consumers(queue_key, group['name'])
|
619
|
+
|
620
|
+
group_data = {
|
621
|
+
'name': group['name'],
|
622
|
+
'consumers': group.get('consumers', 0),
|
623
|
+
'pending': group.get('pending', 0),
|
624
|
+
'last_delivered_id': group.get('last-delivered-id'),
|
625
|
+
'consumer_details': []
|
626
|
+
}
|
627
|
+
|
628
|
+
for consumer in consumers_info:
|
629
|
+
group_data['consumer_details'].append({
|
630
|
+
'name': consumer.get('name'),
|
631
|
+
'pending': consumer.get('pending', 0),
|
632
|
+
'idle': consumer.get('idle', 0)
|
633
|
+
})
|
634
|
+
|
635
|
+
result['consumer_groups'].append(group_data)
|
636
|
+
|
637
|
+
return result
|
638
|
+
|
639
|
+
finally:
|
640
|
+
await redis_client.aclose()
|
641
|
+
|
642
|
+
except Exception as e:
|
643
|
+
logger.error(f"获取消费者信息失败: {e}")
|
644
|
+
traceback.print_exc()
|
645
|
+
raise HTTPException(status_code=500, detail=str(e))
|
646
|
+
|
647
|
+
|
648
|
+
@router.get("/system-stats/{namespace}")
|
649
|
+
async def get_system_stats(namespace: str):
|
650
|
+
"""
|
651
|
+
获取指定命名空间的系统统计信息
|
652
|
+
|
653
|
+
Args:
|
654
|
+
namespace: 命名空间名称
|
655
|
+
"""
|
656
|
+
try:
|
657
|
+
conn = await data_access.manager.get_connection(namespace)
|
658
|
+
redis_client = await conn.get_redis_client()
|
659
|
+
|
660
|
+
try:
|
661
|
+
# 统计各种类型的键
|
662
|
+
stats = {
|
663
|
+
'namespace': namespace,
|
664
|
+
'queues': 0,
|
665
|
+
'tasks': 0,
|
666
|
+
'delayed_tasks': 0,
|
667
|
+
'workers': 0
|
668
|
+
}
|
669
|
+
|
670
|
+
# 统计队列数量
|
671
|
+
queue_pattern = f"{conn.redis_prefix}:QUEUE:*"
|
672
|
+
async for _ in redis_client.scan_iter(match=queue_pattern):
|
673
|
+
stats['queues'] += 1
|
674
|
+
|
675
|
+
# 统计任务数量
|
676
|
+
task_pattern = f"{conn.redis_prefix}:TASK:*"
|
677
|
+
async for _ in redis_client.scan_iter(match=task_pattern):
|
678
|
+
stats['tasks'] += 1
|
679
|
+
|
680
|
+
# 统计延迟任务数量
|
681
|
+
delayed_pattern = f"{conn.redis_prefix}:DELAYED_QUEUE:*"
|
682
|
+
async for key in redis_client.scan_iter(match=delayed_pattern):
|
683
|
+
count = await redis_client.zcard(key)
|
684
|
+
stats['delayed_tasks'] += count
|
685
|
+
|
686
|
+
# 统计工作进程数量
|
687
|
+
worker_pattern = f"{conn.redis_prefix}:WORKER:*"
|
688
|
+
async for _ in redis_client.scan_iter(match=worker_pattern):
|
689
|
+
stats['workers'] += 1
|
690
|
+
|
691
|
+
return stats
|
692
|
+
|
693
|
+
finally:
|
694
|
+
await redis_client.aclose()
|
695
|
+
|
696
|
+
except Exception as e:
|
697
|
+
logger.error(f"获取系统统计信息失败: {e}")
|
698
|
+
traceback.print_exc()
|
699
|
+
raise HTTPException(status_code=500, detail=str(e))
|
700
|
+
|
701
|
+
|
702
|
+
@router.post("/queue-details/{namespace}")
|
703
|
+
async def get_queue_details(namespace: str, query: TimeRangeQuery):
|
704
|
+
"""
|
705
|
+
获取指定命名空间中队列的详细信息
|
706
|
+
|
707
|
+
Args:
|
708
|
+
namespace: 命名空间名称
|
709
|
+
query: 查询参数(包含队列列表和时间范围)
|
710
|
+
"""
|
711
|
+
try:
|
712
|
+
conn = await data_access.manager.get_connection(namespace)
|
713
|
+
redis_client = await conn.get_redis_client()
|
714
|
+
|
715
|
+
# 确定时间范围
|
716
|
+
end_time = datetime.now(timezone.utc)
|
717
|
+
if query.time_range and query.time_range != 'custom':
|
718
|
+
# 解析时间范围字符串
|
719
|
+
if query.time_range.endswith('m'):
|
720
|
+
minutes = int(query.time_range[:-1])
|
721
|
+
start_time = end_time - timedelta(minutes=minutes)
|
722
|
+
elif query.time_range.endswith('h'):
|
723
|
+
hours = int(query.time_range[:-1])
|
724
|
+
start_time = end_time - timedelta(hours=hours)
|
725
|
+
elif query.time_range.endswith('d'):
|
726
|
+
days = int(query.time_range[:-1])
|
727
|
+
start_time = end_time - timedelta(days=days)
|
728
|
+
else:
|
729
|
+
start_time = end_time - timedelta(minutes=15)
|
730
|
+
else:
|
731
|
+
# 使用自定义时间范围或默认值
|
732
|
+
start_time = query.start_time or (end_time - timedelta(minutes=15))
|
733
|
+
end_time = query.end_time or end_time
|
734
|
+
|
735
|
+
result = []
|
736
|
+
|
737
|
+
for queue_name in query.queues:
|
738
|
+
queue_key = f"{conn.redis_prefix}:QUEUE:{queue_name}"
|
739
|
+
|
740
|
+
# 获取Redis中的队列信息
|
741
|
+
queue_len = await redis_client.xlen(queue_key)
|
742
|
+
|
743
|
+
# 获取消费组信息
|
744
|
+
try:
|
745
|
+
groups_info = await redis_client.xinfo_groups(queue_key)
|
746
|
+
consumer_groups = len(groups_info)
|
747
|
+
total_consumers = sum(g.get('consumers', 0) for g in groups_info)
|
748
|
+
|
749
|
+
# 计算所有消费者已领取但未确认的消息数(不可见消息)
|
750
|
+
invisible_messages_count = 0
|
751
|
+
for group in groups_info:
|
752
|
+
try:
|
753
|
+
consumers = await redis_client.xinfo_consumers(queue_key, group['name'])
|
754
|
+
for consumer in consumers:
|
755
|
+
invisible_messages_count += consumer.get('pending', 0) # 消费者已领取但未确认
|
756
|
+
except:
|
757
|
+
pass
|
758
|
+
|
759
|
+
# 注意:这里不再使用这些变量,将在后面根据数据库的pending任务重新计算
|
760
|
+
# visible_messages_count 和 invisible_messages_count 将在获取数据库数据后重新计算
|
761
|
+
|
762
|
+
except:
|
763
|
+
consumer_groups = 0
|
764
|
+
total_consumers = 0
|
765
|
+
invisible_messages_count = 0
|
766
|
+
# visible_messages_count 将在获取数据库数据后计算
|
767
|
+
|
768
|
+
# 获取活跃的workers数量
|
769
|
+
active_workers = 0
|
770
|
+
try:
|
771
|
+
worker_keys = []
|
772
|
+
async for key in redis_client.scan_iter(match=f"{conn.redis_prefix}:WORKER:*"):
|
773
|
+
worker_keys.append(key)
|
774
|
+
|
775
|
+
for worker_key in worker_keys:
|
776
|
+
worker_info = await redis_client.hgetall(worker_key)
|
777
|
+
if worker_info:
|
778
|
+
last_heartbeat = worker_info.get('last_heartbeat')
|
779
|
+
if last_heartbeat:
|
780
|
+
try:
|
781
|
+
heartbeat_time = float(last_heartbeat)
|
782
|
+
if time.time() - heartbeat_time < 60: # 60秒内有心跳
|
783
|
+
worker_queues = worker_info.get('queues', '')
|
784
|
+
if queue_name in worker_queues:
|
785
|
+
active_workers += 1
|
786
|
+
except:
|
787
|
+
pass
|
788
|
+
except Exception as e:
|
789
|
+
logger.warning(f"获取活跃workers失败: {e}")
|
790
|
+
traceback.print_exc()
|
791
|
+
|
792
|
+
# 从PostgreSQL获取统计数据
|
793
|
+
total_tasks = 0
|
794
|
+
pending_tasks_db = 0 # 数据库中的pending任务数
|
795
|
+
completed_tasks = 0
|
796
|
+
failed_tasks = 0
|
797
|
+
processing_tasks = 0
|
798
|
+
enqueue_rate = 0
|
799
|
+
dequeue_rate = 0
|
800
|
+
tasks_per_minute = 0
|
801
|
+
|
802
|
+
if conn.pg_config:
|
803
|
+
try:
|
804
|
+
async with await conn.get_pg_session() as session:
|
805
|
+
# 查询任务统计
|
806
|
+
query_sql = text("""
|
807
|
+
SELECT
|
808
|
+
COUNT(DISTINCT t.stream_id) as total,
|
809
|
+
COUNT(DISTINCT CASE WHEN t.stream_id NOT IN (SELECT stream_id FROM task_runs) THEN t.stream_id END) as pending,
|
810
|
+
COUNT(DISTINCT CASE WHEN tr.status = 'success' THEN t.stream_id END) as completed,
|
811
|
+
COUNT(DISTINCT CASE WHEN tr.status = 'error' THEN t.stream_id END) as failed,
|
812
|
+
COUNT(DISTINCT CASE WHEN tr.status = 'pending' THEN t.stream_id END) as processing,
|
813
|
+
COUNT(DISTINCT CASE WHEN t.created_at >= :recent_time THEN t.stream_id END) as recent_created,
|
814
|
+
COUNT(DISTINCT CASE WHEN tr.status = 'success' AND tr.end_time >= :recent_time THEN t.stream_id END) as recent_completed
|
815
|
+
FROM tasks t
|
816
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
817
|
+
WHERE t.namespace = :namespace
|
818
|
+
-- 匹配基础队列名和所有优先级队列
|
819
|
+
AND (t.queue = :queue_name OR t.queue LIKE :queue_pattern)
|
820
|
+
AND t.created_at >= :start_time
|
821
|
+
AND t.created_at <= :end_time
|
822
|
+
""")
|
823
|
+
|
824
|
+
# 最近1分钟的时间点,用于计算速率
|
825
|
+
recent_time = end_time - timedelta(minutes=1)
|
826
|
+
|
827
|
+
params = {
|
828
|
+
'namespace': namespace,
|
829
|
+
'queue_name': queue_name,
|
830
|
+
'queue_pattern': f'{queue_name}:%', # 匹配所有优先级队列
|
831
|
+
'start_time': start_time,
|
832
|
+
'end_time': end_time,
|
833
|
+
'recent_time': recent_time
|
834
|
+
}
|
835
|
+
print(f'PostgreSQL查询参数: {params}')
|
836
|
+
|
837
|
+
result_db = await session.execute(query_sql, params)
|
838
|
+
|
839
|
+
row = result_db.first()
|
840
|
+
print(f'PostgreSQL查询结果: {row=}')
|
841
|
+
|
842
|
+
# 额外调试:检查是否有该队列的任务(不限时间)
|
843
|
+
debug_query = text("SELECT COUNT(*) as count FROM tasks WHERE namespace = :namespace AND (queue = :queue_name OR queue LIKE :queue_pattern)")
|
844
|
+
debug_result = await session.execute(debug_query, {
|
845
|
+
'namespace': namespace,
|
846
|
+
'queue_name': queue_name,
|
847
|
+
'queue_pattern': f'{queue_name}:%'
|
848
|
+
})
|
849
|
+
debug_row = debug_result.first()
|
850
|
+
print(f'该队列总任务数(不限时间): {debug_row.count if debug_row else 0}')
|
851
|
+
if row:
|
852
|
+
total_tasks = row.total or 0
|
853
|
+
pending_tasks_db = row.pending or 0 # 获取数据库中的pending任务数
|
854
|
+
completed_tasks = row.completed or 0
|
855
|
+
failed_tasks = row.failed or 0
|
856
|
+
processing_tasks = row.processing or 0
|
857
|
+
|
858
|
+
# 计算速率(基于最近1分钟)
|
859
|
+
enqueue_rate = row.recent_created or 0
|
860
|
+
dequeue_rate = row.recent_completed or 0
|
861
|
+
|
862
|
+
# 计算整个时间段的平均处理速度
|
863
|
+
time_diff_minutes = (end_time - start_time).total_seconds() / 60
|
864
|
+
if time_diff_minutes > 0:
|
865
|
+
tasks_per_minute = round(total_tasks / time_diff_minutes, 2)
|
866
|
+
|
867
|
+
except Exception as e:
|
868
|
+
logger.warning(f"查询数据库失败: {e}")
|
869
|
+
traceback.print_exc()
|
870
|
+
|
871
|
+
# 根据用户要求重新计算可见和不可见消息
|
872
|
+
# 注意:这里的计算基于时间范围内的数据库pending任务
|
873
|
+
# 但Redis的invisible_messages_count是实时的,可能包含时间范围外的任务
|
874
|
+
# 为了保持逻辑一致,我们使用以下计算:
|
875
|
+
# - 如果时间范围内没有pending任务,则可见和不可见都为0
|
876
|
+
# - 否则,不可见消息取Redis实际值和DB pending任务的较小值
|
877
|
+
if pending_tasks_db > 0:
|
878
|
+
# 不可见消息不能超过pending任务总数
|
879
|
+
actual_invisible = min(invisible_messages_count, pending_tasks_db)
|
880
|
+
visible_messages_count = pending_tasks_db - actual_invisible
|
881
|
+
else:
|
882
|
+
visible_messages_count = 0
|
883
|
+
actual_invisible = 0
|
884
|
+
|
885
|
+
result.append({
|
886
|
+
'queue_name': queue_name,
|
887
|
+
# 基于查询条件的数据库统计
|
888
|
+
'message_count': total_tasks, # 符合查询条件的任务总数(来自数据库)
|
889
|
+
'visible_messages': visible_messages_count, # 可见消息 = DB pending - min(Redis invisible, DB pending)
|
890
|
+
'invisible_messages': actual_invisible, # 不可见消息 = min(Redis invisible, DB pending)
|
891
|
+
'processing': processing_tasks, # 数据库中processing状态的任务数
|
892
|
+
'queue_length': queue_len, # Redis实时队列长度(保留用于参考)
|
893
|
+
|
894
|
+
# 历史统计数据(来自PostgreSQL)- 基于选定时间范围
|
895
|
+
'completed': completed_tasks, # 时间范围内已完成
|
896
|
+
'failed': failed_tasks, # 时间范围内失败
|
897
|
+
'consumption_rate': tasks_per_minute, # 消费速度
|
898
|
+
'enqueue_rate': enqueue_rate, # 入队速率
|
899
|
+
'dequeue_rate': dequeue_rate, # 出队速率
|
900
|
+
|
901
|
+
# 混合数据
|
902
|
+
'success_rate': round((completed_tasks / total_tasks * 100) if total_tasks > 0 else 0, 2), # 成功率(基于历史)
|
903
|
+
'queue_status': 'active' if queue_len > 0 or active_workers > 0 else 'idle', # 状态(基于实时)
|
904
|
+
'active_workers': active_workers, # 活跃Workers
|
905
|
+
'consumer_groups': consumer_groups, # 消费组数
|
906
|
+
'consumers': total_consumers, # 消费者数
|
907
|
+
|
908
|
+
# 调试信息(可选)
|
909
|
+
'historical_tasks': total_tasks, # 时间范围内的任务总数(用于调试)
|
910
|
+
})
|
911
|
+
|
912
|
+
await redis_client.aclose()
|
913
|
+
return {'success': True, 'data': result}
|
914
|
+
|
915
|
+
except Exception as e:
|
916
|
+
logger.error(f"获取队列详情失败: {e}")
|
917
|
+
traceback.print_exc()
|
918
|
+
raise HTTPException(status_code=500, detail=str(e))
|
919
|
+
|
920
|
+
|
921
|
+
@router.post("/queue-flow-rates/{namespace}")
|
922
|
+
async def get_queue_flow_rates(namespace: str, query: TimeRangeQuery):
|
923
|
+
"""
|
924
|
+
获取指定命名空间中队列的流量速率(入队、完成、失败)
|
925
|
+
|
926
|
+
Args:
|
927
|
+
namespace: 命名空间名称
|
928
|
+
query: 时间范围查询参数
|
929
|
+
"""
|
930
|
+
try:
|
931
|
+
print(f'请求参数: get_queue_flow_rates {namespace=}, {query=}')
|
932
|
+
|
933
|
+
# 使用公共工具函数处理时间范围
|
934
|
+
time_range_result = parse_time_range_query(query)
|
935
|
+
start_time = time_range_result.start_time
|
936
|
+
end_time = time_range_result.end_time
|
937
|
+
|
938
|
+
print(f'时间范围: {start_time=}, {end_time=}')
|
939
|
+
|
940
|
+
# 使用命名空间数据访问
|
941
|
+
conn = await data_access.manager.get_connection(namespace)
|
942
|
+
|
943
|
+
# 如果没有PostgreSQL配置,返回空数据
|
944
|
+
if not conn.pg_config:
|
945
|
+
return {"data": [], "granularity": "minute"}
|
946
|
+
|
947
|
+
async with await conn.get_pg_session() as session:
|
948
|
+
# 如果没有指定队列,获取所有队列
|
949
|
+
if not query.queues or len(query.queues) == 0:
|
950
|
+
# 获取所有队列名称
|
951
|
+
queue_sql = text("""
|
952
|
+
SELECT DISTINCT queue
|
953
|
+
FROM tasks
|
954
|
+
WHERE namespace = :namespace
|
955
|
+
ORDER BY queue
|
956
|
+
""")
|
957
|
+
queue_result = await session.execute(queue_sql, {'namespace': namespace})
|
958
|
+
all_queues = [row.queue for row in queue_result.fetchall()]
|
959
|
+
|
960
|
+
print(f'所有队列: {all_queues=}')
|
961
|
+
|
962
|
+
if not all_queues:
|
963
|
+
return {"data": [], "granularity": "minute"}
|
964
|
+
|
965
|
+
# 统计所有队列的流量数据
|
966
|
+
queue_conditions = "(" + " OR ".join([f"t.queue = '{queue}'" for queue in all_queues]) + ")"
|
967
|
+
else:
|
968
|
+
# 使用指定的队列(支持基础队列名和优先级队列)
|
969
|
+
queue_name = query.queues[0]
|
970
|
+
# 匹配精确队列名或带优先级的队列名(如 shared_queue 或 shared_queue:5)
|
971
|
+
queue_conditions = f"(t.queue = '{queue_name}' OR t.queue LIKE '{queue_name}:%')"
|
972
|
+
print(f'指定队列: {queue_name=}, 条件: {queue_conditions}')
|
973
|
+
|
974
|
+
# 使用已计算好的时间间隔信息
|
975
|
+
interval = time_range_result.interval
|
976
|
+
interval_seconds = time_range_result.interval_seconds
|
977
|
+
granularity = time_range_result.granularity
|
978
|
+
|
979
|
+
# 查询流量数据
|
980
|
+
flow_sql = text(f"""
|
981
|
+
WITH time_series AS (
|
982
|
+
SELECT to_timestamp(FLOOR(EXTRACT(epoch FROM ts) / {interval_seconds}) * {interval_seconds}) AS time_bucket
|
983
|
+
FROM generate_series(
|
984
|
+
:start_time ::timestamptz,
|
985
|
+
:end_time ::timestamptz + INTERVAL '{interval_seconds} seconds',
|
986
|
+
:interval_val ::interval
|
987
|
+
) AS ts
|
988
|
+
),
|
989
|
+
enqueue_counts AS (
|
990
|
+
SELECT
|
991
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM t.created_at) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
992
|
+
COUNT(*) as enqueued
|
993
|
+
FROM tasks t
|
994
|
+
WHERE t.namespace = :namespace
|
995
|
+
AND ({queue_conditions})
|
996
|
+
AND t.created_at >= :start_time
|
997
|
+
AND t.created_at <= :end_time
|
998
|
+
GROUP BY time_bucket
|
999
|
+
),
|
1000
|
+
complete_counts AS (
|
1001
|
+
SELECT
|
1002
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.end_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
1003
|
+
COUNT(*) as completed
|
1004
|
+
FROM task_runs tr
|
1005
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
1006
|
+
WHERE t.namespace = :namespace
|
1007
|
+
AND ({queue_conditions})
|
1008
|
+
AND tr.end_time >= :start_time
|
1009
|
+
AND tr.end_time <= :end_time
|
1010
|
+
AND tr.status = 'success'
|
1011
|
+
GROUP BY time_bucket
|
1012
|
+
),
|
1013
|
+
failed_counts AS (
|
1014
|
+
SELECT
|
1015
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.end_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
1016
|
+
COUNT(*) as failed
|
1017
|
+
FROM task_runs tr
|
1018
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
1019
|
+
WHERE t.namespace = :namespace
|
1020
|
+
AND ({queue_conditions})
|
1021
|
+
AND tr.end_time >= :start_time
|
1022
|
+
AND tr.end_time <= :end_time
|
1023
|
+
AND tr.status = 'error'
|
1024
|
+
GROUP BY time_bucket
|
1025
|
+
)
|
1026
|
+
SELECT
|
1027
|
+
ts.time_bucket,
|
1028
|
+
COALESCE(eq.enqueued, 0) as enqueued,
|
1029
|
+
COALESCE(cc.completed, 0) as completed,
|
1030
|
+
COALESCE(fc.failed, 0) as failed
|
1031
|
+
FROM time_series ts
|
1032
|
+
LEFT JOIN enqueue_counts eq ON ts.time_bucket = eq.time_bucket
|
1033
|
+
LEFT JOIN complete_counts cc ON ts.time_bucket = cc.time_bucket
|
1034
|
+
LEFT JOIN failed_counts fc ON ts.time_bucket = fc.time_bucket
|
1035
|
+
ORDER BY ts.time_bucket
|
1036
|
+
""")
|
1037
|
+
|
1038
|
+
# 先查询tasks表看有没有数据(不限时间范围)
|
1039
|
+
test_sql = text("""
|
1040
|
+
SELECT COUNT(*) as total_count,
|
1041
|
+
COUNT(CASE WHEN created_at >= :start_time AND created_at <= :end_time THEN 1 END) as range_count,
|
1042
|
+
MIN(created_at) as min_time,
|
1043
|
+
MAX(created_at) as max_time
|
1044
|
+
FROM tasks
|
1045
|
+
WHERE namespace = :namespace
|
1046
|
+
""")
|
1047
|
+
test_result = await session.execute(test_sql, {
|
1048
|
+
'namespace': namespace,
|
1049
|
+
'start_time': start_time,
|
1050
|
+
'end_time': end_time
|
1051
|
+
})
|
1052
|
+
test_row = test_result.fetchone()
|
1053
|
+
print(f'tasks表统计 - 总记录数: {test_row.total_count}, 时间范围内: {test_row.range_count}')
|
1054
|
+
print(f'tasks表时间范围: {test_row.min_time} 到 {test_row.max_time}')
|
1055
|
+
|
1056
|
+
result = await session.execute(flow_sql, {
|
1057
|
+
'namespace': namespace,
|
1058
|
+
'start_time': start_time,
|
1059
|
+
'end_time': end_time,
|
1060
|
+
'interval_val': interval
|
1061
|
+
})
|
1062
|
+
|
1063
|
+
# 格式化数据
|
1064
|
+
data = []
|
1065
|
+
rows = result.fetchall()
|
1066
|
+
print(f'查询结果行数: {len(rows)}')
|
1067
|
+
end_index = len(rows) - 1
|
1068
|
+
|
1069
|
+
for idx, row in enumerate(rows):
|
1070
|
+
time_point = row.time_bucket.isoformat()
|
1071
|
+
|
1072
|
+
# 直接使用实际值,不管是否为0
|
1073
|
+
# 这样可以保持与tasks接口的行为一致
|
1074
|
+
data.append({'time': time_point, 'value': row.enqueued, 'metric': '入队速率'})
|
1075
|
+
data.append({'time': time_point, 'value': row.completed, 'metric': '完成速率'})
|
1076
|
+
data.append({'time': time_point, 'value': row.failed, 'metric': '失败数'})
|
1077
|
+
|
1078
|
+
return {"data": data, "granularity": granularity}
|
1079
|
+
|
1080
|
+
# 下面是新的实现,暂时注释掉
|
1081
|
+
'''
|
1082
|
+
# 获取命名空间连接
|
1083
|
+
conn = await data_access.manager.get_connection(namespace)
|
1084
|
+
|
1085
|
+
# 如果没有PostgreSQL配置,返回模拟数据
|
1086
|
+
if not conn.pg_config:
|
1087
|
+
# 生成模拟数据
|
1088
|
+
end_time = datetime.now(timezone.utc)
|
1089
|
+
start_time = end_time - timedelta(minutes=15)
|
1090
|
+
|
1091
|
+
data = []
|
1092
|
+
num_points = 10
|
1093
|
+
for i in range(num_points):
|
1094
|
+
timestamp = start_time + timedelta(seconds=i * 90)
|
1095
|
+
time_str = timestamp.isoformat()
|
1096
|
+
|
1097
|
+
data.append({'time': time_str, 'value': 10 + i, 'metric': '入队速率'})
|
1098
|
+
data.append({'time': time_str, 'value': 8 + i, 'metric': '完成速率'})
|
1099
|
+
data.append({'time': time_str, 'value': 1, 'metric': '失败数'})
|
1100
|
+
|
1101
|
+
return {"data": data, "granularity": "minute"}
|
1102
|
+
|
1103
|
+
# 处理时间范围
|
1104
|
+
end_time = datetime.now(timezone.utc)
|
1105
|
+
if query.time_range and query.time_range != 'custom':
|
1106
|
+
# 解析时间范围字符串
|
1107
|
+
if query.time_range.endswith('m'):
|
1108
|
+
minutes = int(query.time_range[:-1])
|
1109
|
+
start_time = end_time - timedelta(minutes=minutes)
|
1110
|
+
elif query.time_range.endswith('h'):
|
1111
|
+
hours = int(query.time_range[:-1])
|
1112
|
+
start_time = end_time - timedelta(hours=hours)
|
1113
|
+
elif query.time_range.endswith('d'):
|
1114
|
+
days = int(query.time_range[:-1])
|
1115
|
+
start_time = end_time - timedelta(days=days)
|
1116
|
+
else:
|
1117
|
+
start_time = end_time - timedelta(minutes=15)
|
1118
|
+
else:
|
1119
|
+
# 使用自定义时间范围或默认值
|
1120
|
+
start_time = query.start_time or (end_time - timedelta(minutes=15))
|
1121
|
+
end_time = query.end_time or end_time
|
1122
|
+
|
1123
|
+
# 从PostgreSQL查询流量数据
|
1124
|
+
async with await conn.get_pg_session() as session:
|
1125
|
+
# 动态计算时间间隔
|
1126
|
+
duration = (end_time - start_time).total_seconds()
|
1127
|
+
TARGET_POINTS = 200
|
1128
|
+
ideal_interval_seconds = duration / TARGET_POINTS
|
1129
|
+
|
1130
|
+
# 选择合适的间隔
|
1131
|
+
if ideal_interval_seconds <= 1:
|
1132
|
+
interval = '1 second'
|
1133
|
+
granularity = 'second'
|
1134
|
+
elif ideal_interval_seconds <= 5:
|
1135
|
+
interval = '5 seconds'
|
1136
|
+
granularity = 'second'
|
1137
|
+
elif ideal_interval_seconds <= 10:
|
1138
|
+
interval = '10 seconds'
|
1139
|
+
granularity = 'second'
|
1140
|
+
elif ideal_interval_seconds <= 30:
|
1141
|
+
interval = '30 seconds'
|
1142
|
+
granularity = 'second'
|
1143
|
+
elif ideal_interval_seconds <= 60:
|
1144
|
+
interval = '1 minute'
|
1145
|
+
granularity = 'minute'
|
1146
|
+
elif ideal_interval_seconds <= 300:
|
1147
|
+
interval = '5 minutes'
|
1148
|
+
granularity = 'minute'
|
1149
|
+
elif ideal_interval_seconds <= 600:
|
1150
|
+
interval = '10 minutes'
|
1151
|
+
granularity = 'minute'
|
1152
|
+
elif ideal_interval_seconds <= 900:
|
1153
|
+
interval = '15 minutes'
|
1154
|
+
granularity = 'minute'
|
1155
|
+
elif ideal_interval_seconds <= 1800:
|
1156
|
+
interval = '30 minutes'
|
1157
|
+
granularity = 'minute'
|
1158
|
+
elif ideal_interval_seconds <= 3600:
|
1159
|
+
interval = '1 hour'
|
1160
|
+
granularity = 'hour'
|
1161
|
+
else:
|
1162
|
+
interval = '1 day'
|
1163
|
+
granularity = 'day'
|
1164
|
+
|
1165
|
+
# 构建筛选条件
|
1166
|
+
filter_conditions = []
|
1167
|
+
if query.filters:
|
1168
|
+
for filter_item in query.filters:
|
1169
|
+
field = filter_item.get('field')
|
1170
|
+
operator = filter_item.get('operator')
|
1171
|
+
value = filter_item.get('value')
|
1172
|
+
|
1173
|
+
if field and operator and value:
|
1174
|
+
if operator == 'eq':
|
1175
|
+
filter_conditions.append(f"AND {field} = '{value}'")
|
1176
|
+
elif operator == 'ne':
|
1177
|
+
filter_conditions.append(f"AND {field} != '{value}'")
|
1178
|
+
elif operator == 'contains':
|
1179
|
+
filter_conditions.append(f"AND {field} LIKE '%{value}%'")
|
1180
|
+
|
1181
|
+
extra_where = " ".join(filter_conditions)
|
1182
|
+
|
1183
|
+
# 根据间隔确定聚合粒度
|
1184
|
+
if granularity == 'second':
|
1185
|
+
if ideal_interval_seconds <= 1:
|
1186
|
+
time_trunc = 'second'
|
1187
|
+
interval_seconds = 1
|
1188
|
+
elif ideal_interval_seconds <= 5:
|
1189
|
+
time_trunc = '5 seconds'
|
1190
|
+
interval_seconds = 5
|
1191
|
+
elif ideal_interval_seconds <= 10:
|
1192
|
+
time_trunc = '10 seconds'
|
1193
|
+
interval_seconds = 10
|
1194
|
+
else:
|
1195
|
+
time_trunc = '30 seconds'
|
1196
|
+
interval_seconds = 30
|
1197
|
+
elif granularity == 'minute':
|
1198
|
+
time_trunc = 'minute'
|
1199
|
+
interval_seconds = 60
|
1200
|
+
elif granularity == 'hour':
|
1201
|
+
time_trunc = 'hour'
|
1202
|
+
interval_seconds = 3600
|
1203
|
+
else:
|
1204
|
+
time_trunc = 'day'
|
1205
|
+
interval_seconds = 86400
|
1206
|
+
|
1207
|
+
# 查询入队速率、完成速率和失败数
|
1208
|
+
# 重要:时间序列也要对齐到相同的时间桶
|
1209
|
+
query_sql = text(f"""
|
1210
|
+
WITH time_series AS (
|
1211
|
+
SELECT to_timestamp(FLOOR(EXTRACT(epoch FROM ts) / {interval_seconds}) * {interval_seconds}) AS time_bucket
|
1212
|
+
FROM generate_series(
|
1213
|
+
:start_time ::timestamptz,
|
1214
|
+
:end_time ::timestamptz,
|
1215
|
+
:interval_val ::interval
|
1216
|
+
) AS ts
|
1217
|
+
),
|
1218
|
+
enqueued_rate AS (
|
1219
|
+
SELECT
|
1220
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM created_at) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
1221
|
+
COUNT(*) AS count
|
1222
|
+
FROM tasks
|
1223
|
+
WHERE namespace = :namespace
|
1224
|
+
AND queue_name = :queue_name
|
1225
|
+
AND created_at >= :start_time
|
1226
|
+
AND created_at <= :end_time
|
1227
|
+
GROUP BY 1
|
1228
|
+
),
|
1229
|
+
completed_rate AS (
|
1230
|
+
SELECT
|
1231
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM completed_at) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
1232
|
+
COUNT(*) AS count
|
1233
|
+
FROM tasks
|
1234
|
+
WHERE namespace = :namespace
|
1235
|
+
AND queue_name = :queue_name
|
1236
|
+
AND completed_at >= :start_time
|
1237
|
+
AND completed_at <= :end_time
|
1238
|
+
AND status = 'success'
|
1239
|
+
GROUP BY 1
|
1240
|
+
),
|
1241
|
+
failed_rate AS (
|
1242
|
+
SELECT
|
1243
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM completed_at) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
1244
|
+
COUNT(*) AS count
|
1245
|
+
FROM tasks
|
1246
|
+
WHERE namespace = :namespace
|
1247
|
+
AND queue_name = :queue_name
|
1248
|
+
AND completed_at >= :start_time
|
1249
|
+
AND completed_at <= :end_time
|
1250
|
+
AND status = 'error'
|
1251
|
+
GROUP BY 1
|
1252
|
+
)
|
1253
|
+
SELECT
|
1254
|
+
ts.time_bucket,
|
1255
|
+
COALESCE(e.count, 0) AS enqueued,
|
1256
|
+
COALESCE(c.count, 0) AS completed,
|
1257
|
+
COALESCE(f.count, 0) AS failed
|
1258
|
+
FROM time_series ts
|
1259
|
+
LEFT JOIN enqueued_rate e ON ts.time_bucket = e.time_bucket
|
1260
|
+
LEFT JOIN completed_rate c ON ts.time_bucket = c.time_bucket
|
1261
|
+
LEFT JOIN failed_rate f ON ts.time_bucket = f.time_bucket
|
1262
|
+
ORDER BY ts.time_bucket
|
1263
|
+
""")
|
1264
|
+
|
1265
|
+
result = await session.execute(query_sql, {
|
1266
|
+
'namespace': namespace,
|
1267
|
+
'queue_name': queue_name,
|
1268
|
+
'start_time': start_time,
|
1269
|
+
'end_time': end_time,
|
1270
|
+
'interval_val': interval
|
1271
|
+
})
|
1272
|
+
|
1273
|
+
# 格式化数据为前端需要的格式
|
1274
|
+
data = []
|
1275
|
+
rows = result.fetchall()
|
1276
|
+
end_index = len(rows) - 1
|
1277
|
+
|
1278
|
+
for idx, row in enumerate(rows):
|
1279
|
+
time_str = row.time_bucket.isoformat()
|
1280
|
+
|
1281
|
+
# 对于中间的数据点,如果值为0则设为None,让图表自动连接
|
1282
|
+
# 只保留第一个和最后一个点的0值
|
1283
|
+
enqueued_val = row.enqueued if row.enqueued > 0 or idx == 0 or idx == end_index else None
|
1284
|
+
completed_val = row.completed if row.completed > 0 or idx == 0 or idx == end_index else None
|
1285
|
+
failed_val = row.failed if row.failed > 0 or idx == 0 or idx == end_index else None
|
1286
|
+
|
1287
|
+
data.append({'time': time_str, 'value': enqueued_val, 'metric': '入队速率'})
|
1288
|
+
data.append({'time': time_str, 'value': completed_val, 'metric': '完成速率'})
|
1289
|
+
data.append({'time': time_str, 'value': failed_val, 'metric': '失败数'})
|
1290
|
+
|
1291
|
+
return {"data": data, "granularity": granularity}
|
1292
|
+
'''
|
1293
|
+
|
1294
|
+
except Exception as e:
|
1295
|
+
logger.error(f"获取队列流量速率失败: {e}")
|
1296
|
+
traceback.print_exc()
|
1297
|
+
|
1298
|
+
await handle_database_connection_error(e, namespace, "获取队列流量数据")
|
1299
|
+
|
1300
|
+
|
1301
|
+
@router.get("/dashboard-stats/{namespace}")
|
1302
|
+
async def get_dashboard_stats(
|
1303
|
+
namespace: str,
|
1304
|
+
time_range: str = "24h",
|
1305
|
+
queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
|
1306
|
+
):
|
1307
|
+
"""
|
1308
|
+
获取仪表板统计数据(任务总数、成功数、失败数、成功率、吞吐量等)
|
1309
|
+
|
1310
|
+
Args:
|
1311
|
+
namespace: 命名空间名称
|
1312
|
+
time_range: 时间范围(如'1h', '24h', '7d')
|
1313
|
+
"""
|
1314
|
+
try:
|
1315
|
+
conn = await data_access.manager.get_connection(namespace)
|
1316
|
+
|
1317
|
+
# 如果没有PostgreSQL配置,返回空数据
|
1318
|
+
if not conn.pg_config:
|
1319
|
+
return {
|
1320
|
+
"success": True,
|
1321
|
+
"data": {
|
1322
|
+
"total_tasks": 0,
|
1323
|
+
"completed_tasks": 0,
|
1324
|
+
"failed_tasks": 0,
|
1325
|
+
"running_tasks": 0,
|
1326
|
+
"pending_tasks": 0,
|
1327
|
+
"success_rate": 0,
|
1328
|
+
"throughput": 0,
|
1329
|
+
"avg_processing_time": 0,
|
1330
|
+
"total_queues": 0
|
1331
|
+
}
|
1332
|
+
}
|
1333
|
+
|
1334
|
+
# 计算时间范围
|
1335
|
+
end_time = datetime.now(timezone.utc)
|
1336
|
+
if time_range.endswith('m'):
|
1337
|
+
minutes = int(time_range[:-1])
|
1338
|
+
start_time = end_time - timedelta(minutes=minutes)
|
1339
|
+
elif time_range.endswith('h'):
|
1340
|
+
hours = int(time_range[:-1])
|
1341
|
+
start_time = end_time - timedelta(hours=hours)
|
1342
|
+
elif time_range.endswith('d'):
|
1343
|
+
days = int(time_range[:-1])
|
1344
|
+
start_time = end_time - timedelta(days=days)
|
1345
|
+
else:
|
1346
|
+
start_time = end_time - timedelta(hours=24) # 默认24小时
|
1347
|
+
|
1348
|
+
# 构建队列筛选条件
|
1349
|
+
queue_filter, queue_list, queue_params = build_queue_filter_and_params(queues)
|
1350
|
+
print(f'🔍 Dashboard Stats - 收到队列参数: {queues}')
|
1351
|
+
print(f'🔍 Dashboard Stats - 解析后的队列列表: {queue_list}')
|
1352
|
+
print(f'🔍 Dashboard Stats - SQL筛选条件: {queue_filter}')
|
1353
|
+
print(f'🔍 Dashboard Stats - 查询参数: {queue_params}')
|
1354
|
+
async with await conn.get_pg_session() as session:
|
1355
|
+
# 获取任务统计数据
|
1356
|
+
# 修复:正确区分pending任务(在tasks表但不在task_runs表中的任务)
|
1357
|
+
stats_sql = text(f"""
|
1358
|
+
WITH task_stats AS (
|
1359
|
+
SELECT
|
1360
|
+
t.stream_id,
|
1361
|
+
t.created_at,
|
1362
|
+
t.queue,
|
1363
|
+
tr.status,
|
1364
|
+
tr.execution_time,
|
1365
|
+
tr.end_time
|
1366
|
+
FROM tasks t
|
1367
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
1368
|
+
WHERE t.namespace = :namespace
|
1369
|
+
AND t.created_at >= :start_time
|
1370
|
+
AND t.created_at <= :end_time
|
1371
|
+
{queue_filter}
|
1372
|
+
)
|
1373
|
+
SELECT
|
1374
|
+
COUNT(DISTINCT stream_id) as total_tasks,
|
1375
|
+
COUNT(DISTINCT CASE WHEN status = 'success' THEN stream_id END) as completed_tasks,
|
1376
|
+
COUNT(DISTINCT CASE WHEN status = 'error' THEN stream_id END) as failed_tasks,
|
1377
|
+
COUNT(DISTINCT CASE WHEN status = 'running' THEN stream_id END) as running_tasks,
|
1378
|
+
-- pending任务:在tasks表中但没有在task_runs表中(status为NULL)或status='pending'
|
1379
|
+
COUNT(DISTINCT CASE WHEN status IS NULL OR status = 'pending' THEN stream_id END) as pending_tasks,
|
1380
|
+
COUNT(DISTINCT queue) as total_queues,
|
1381
|
+
AVG(CASE WHEN status = 'success' AND execution_time IS NOT NULL
|
1382
|
+
THEN execution_time END) as avg_execution_time
|
1383
|
+
FROM task_stats
|
1384
|
+
""")
|
1385
|
+
|
1386
|
+
# 准备查询参数
|
1387
|
+
query_params = {
|
1388
|
+
'namespace': namespace,
|
1389
|
+
'start_time': start_time,
|
1390
|
+
'end_time': end_time,
|
1391
|
+
**queue_params
|
1392
|
+
}
|
1393
|
+
|
1394
|
+
print(f'🔍 Dashboard Stats - 最终SQL: {stats_sql}')
|
1395
|
+
print(f'🔍 Dashboard Stats - 最终查询参数: {query_params}')
|
1396
|
+
|
1397
|
+
# 调试:查看数据库中实际的队列名称
|
1398
|
+
debug_sql = text("""
|
1399
|
+
SELECT DISTINCT t.queue
|
1400
|
+
FROM tasks t
|
1401
|
+
WHERE t.namespace = :namespace
|
1402
|
+
AND t.created_at >= :start_time
|
1403
|
+
AND t.created_at <= :end_time
|
1404
|
+
LIMIT 10
|
1405
|
+
""")
|
1406
|
+
debug_result = await session.execute(debug_sql, {
|
1407
|
+
'namespace': namespace,
|
1408
|
+
'start_time': start_time,
|
1409
|
+
'end_time': end_time
|
1410
|
+
})
|
1411
|
+
actual_queues = [row.queue for row in debug_result.fetchall()]
|
1412
|
+
print(f'🔍 Dashboard Stats - 数据库中实际的队列名称: {actual_queues}')
|
1413
|
+
|
1414
|
+
result = await session.execute(stats_sql, query_params)
|
1415
|
+
|
1416
|
+
row = result.first()
|
1417
|
+
|
1418
|
+
if row:
|
1419
|
+
total_tasks = row.total_tasks or 0
|
1420
|
+
completed_tasks = row.completed_tasks or 0
|
1421
|
+
failed_tasks = row.failed_tasks or 0
|
1422
|
+
pending_tasks = row.pending_tasks or 0
|
1423
|
+
total_queues = row.total_queues or 0
|
1424
|
+
avg_execution_time = row.avg_execution_time or 0
|
1425
|
+
|
1426
|
+
# 从task_runs表计算当前正在执行的任务数(start_time <= now < end_time)
|
1427
|
+
running_tasks = 0
|
1428
|
+
try:
|
1429
|
+
running_sql = text(f"""
|
1430
|
+
SELECT COUNT(*) as total_running
|
1431
|
+
FROM task_runs tr
|
1432
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
1433
|
+
WHERE t.namespace = :namespace
|
1434
|
+
AND tr.start_time IS NOT NULL
|
1435
|
+
AND tr.start_time <= NOW()
|
1436
|
+
AND (tr.end_time IS NULL OR tr.end_time > NOW())
|
1437
|
+
{queue_filter}
|
1438
|
+
""")
|
1439
|
+
|
1440
|
+
running_result = await session.execute(running_sql, query_params)
|
1441
|
+
|
1442
|
+
running_row = running_result.first()
|
1443
|
+
running_tasks = int(running_row.total_running) if running_row else 0
|
1444
|
+
|
1445
|
+
except Exception as e:
|
1446
|
+
logger.warning(f"计算当前并发任务数失败,使用默认值: {e}")
|
1447
|
+
traceback.print_exc()
|
1448
|
+
running_tasks = row.running_tasks or 0
|
1449
|
+
|
1450
|
+
# 计算成功率
|
1451
|
+
success_rate = round((completed_tasks / total_tasks * 100) if total_tasks > 0 else 0, 1)
|
1452
|
+
|
1453
|
+
# 计算吞吐量(每分钟完成的任务数)
|
1454
|
+
# 使用递进的时间窗口来找到最合适的吞吐量计算方式
|
1455
|
+
recent_end_time = datetime.now(timezone.utc)
|
1456
|
+
throughput = 0
|
1457
|
+
|
1458
|
+
# 尝试多个时间窗口,从短到长,找到有足够数据的窗口
|
1459
|
+
time_windows = [
|
1460
|
+
(5, "最近5分钟"),
|
1461
|
+
(10, "最近10分钟"),
|
1462
|
+
(30, "最近30分钟"),
|
1463
|
+
(60, "最近1小时")
|
1464
|
+
]
|
1465
|
+
|
1466
|
+
for window_minutes, window_desc in time_windows:
|
1467
|
+
recent_start_time = recent_end_time - timedelta(minutes=window_minutes)
|
1468
|
+
|
1469
|
+
recent_query = text(f"""
|
1470
|
+
SELECT COUNT(DISTINCT t.stream_id) as recent_completed
|
1471
|
+
FROM tasks t
|
1472
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
1473
|
+
WHERE t.namespace = :namespace
|
1474
|
+
AND tr.status = 'success'
|
1475
|
+
AND tr.end_time >= :recent_start_time
|
1476
|
+
AND tr.end_time <= :recent_end_time
|
1477
|
+
{queue_filter}
|
1478
|
+
""")
|
1479
|
+
|
1480
|
+
# 准备吞吐量查询参数
|
1481
|
+
throughput_params = {
|
1482
|
+
'namespace': namespace,
|
1483
|
+
'recent_start_time': recent_start_time,
|
1484
|
+
'recent_end_time': recent_end_time,
|
1485
|
+
**queue_params
|
1486
|
+
}
|
1487
|
+
|
1488
|
+
recent_result = await session.execute(recent_query, throughput_params)
|
1489
|
+
|
1490
|
+
recent_row = recent_result.first()
|
1491
|
+
recent_completed = recent_row.recent_completed if recent_row else 0
|
1492
|
+
|
1493
|
+
print(f'🔍 Dashboard Stats - 吞吐量计算 {window_desc}: {recent_completed} 个任务完成')
|
1494
|
+
|
1495
|
+
# 如果这个时间窗口有足够的数据(至少5个任务),就使用它
|
1496
|
+
if recent_completed >= 5:
|
1497
|
+
throughput = round(recent_completed / window_minutes, 1)
|
1498
|
+
logger.info(f"使用{window_desc}计算吞吐量: {recent_completed}个任务/{window_minutes}分钟 = {throughput}任务/分钟")
|
1499
|
+
break
|
1500
|
+
elif recent_completed > 0:
|
1501
|
+
# 如果有少量数据,也计算但继续寻找更好的窗口
|
1502
|
+
throughput = round(recent_completed / window_minutes, 1)
|
1503
|
+
|
1504
|
+
# 如果所有窗口都没有数据,吞吐量为0
|
1505
|
+
if throughput == 0:
|
1506
|
+
logger.info("最近1小时内没有完成的任务,吞吐量为0")
|
1507
|
+
|
1508
|
+
# 将execution_time从秒转换为毫秒
|
1509
|
+
avg_processing_time = round(avg_execution_time * 1000 if avg_execution_time else 0, 1)
|
1510
|
+
|
1511
|
+
# 同时获取任务数量分布数据(按队列分组,不区分状态)
|
1512
|
+
distribution_sql = text(f"""
|
1513
|
+
SELECT
|
1514
|
+
t.queue,
|
1515
|
+
COUNT(DISTINCT t.stream_id) as count
|
1516
|
+
FROM tasks t
|
1517
|
+
WHERE t.namespace = :namespace
|
1518
|
+
AND t.created_at >= :start_time
|
1519
|
+
AND t.created_at <= :end_time
|
1520
|
+
{queue_filter}
|
1521
|
+
GROUP BY t.queue
|
1522
|
+
ORDER BY count DESC, t.queue
|
1523
|
+
""")
|
1524
|
+
|
1525
|
+
distribution_result = await session.execute(distribution_sql, query_params)
|
1526
|
+
|
1527
|
+
# 格式化分布数据为饼图格式(只按队列,不区分状态)
|
1528
|
+
distribution_data = []
|
1529
|
+
|
1530
|
+
for row in distribution_result.fetchall():
|
1531
|
+
queue = row.queue
|
1532
|
+
count = row.count
|
1533
|
+
|
1534
|
+
if count > 0:
|
1535
|
+
distribution_data.append({
|
1536
|
+
'type': queue, # 直接使用队列名,不添加状态后缀
|
1537
|
+
'value': count,
|
1538
|
+
'queue': queue,
|
1539
|
+
'status': 'all' # 表示所有状态
|
1540
|
+
})
|
1541
|
+
|
1542
|
+
# 如果没有数据,返回默认值
|
1543
|
+
if not distribution_data:
|
1544
|
+
distribution_data = [
|
1545
|
+
{'type': '暂无数据', 'value': 1, 'queue': '', 'status': 'empty'}
|
1546
|
+
]
|
1547
|
+
|
1548
|
+
return {
|
1549
|
+
"success": True,
|
1550
|
+
"data": {
|
1551
|
+
"total_tasks": total_tasks,
|
1552
|
+
"completed_tasks": completed_tasks,
|
1553
|
+
"failed_tasks": failed_tasks,
|
1554
|
+
"running_tasks": running_tasks,
|
1555
|
+
"pending_tasks": pending_tasks,
|
1556
|
+
"success_rate": success_rate,
|
1557
|
+
"throughput": throughput,
|
1558
|
+
"avg_processing_time": avg_processing_time,
|
1559
|
+
"total_queues": total_queues,
|
1560
|
+
"time_range": time_range,
|
1561
|
+
"start_time": start_time.isoformat(),
|
1562
|
+
"end_time": end_time.isoformat(),
|
1563
|
+
"task_distribution": distribution_data # 新增:任务状态分布数据
|
1564
|
+
}
|
1565
|
+
}
|
1566
|
+
else:
|
1567
|
+
return {
|
1568
|
+
"success": True,
|
1569
|
+
"data": {
|
1570
|
+
"total_tasks": 0,
|
1571
|
+
"completed_tasks": 0,
|
1572
|
+
"failed_tasks": 0,
|
1573
|
+
"running_tasks": 0,
|
1574
|
+
"pending_tasks": 0,
|
1575
|
+
"success_rate": 0,
|
1576
|
+
"throughput": 0,
|
1577
|
+
"avg_processing_time": 0,
|
1578
|
+
"total_queues": 0
|
1579
|
+
}
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
except Exception as e:
|
1583
|
+
logger.error(f"获取仪表板统计数据失败: {e}")
|
1584
|
+
traceback.print_exc()
|
1585
|
+
raise HTTPException(status_code=500, detail=str(e))
|
1586
|
+
|
1587
|
+
|
1588
|
+
@router.get("/queue-backlog-trend/{namespace}")
|
1589
|
+
async def get_queue_backlog_trend(
|
1590
|
+
namespace: str,
|
1591
|
+
time_range: str = "1h",
|
1592
|
+
queue_name: Optional[str] = None,
|
1593
|
+
queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
|
1594
|
+
):
|
1595
|
+
"""
|
1596
|
+
获取队列积压趋势数据(排队任务数)
|
1597
|
+
|
1598
|
+
Args:
|
1599
|
+
namespace: 命名空间名称
|
1600
|
+
time_range: 时间范围(如'1h', '24h', '7d')
|
1601
|
+
queue_name: 队列名称(可选,不指定则获取所有队列的总积压)
|
1602
|
+
"""
|
1603
|
+
try:
|
1604
|
+
conn = await data_access.manager.get_connection(namespace)
|
1605
|
+
|
1606
|
+
# 使用公共工具函数处理时间范围
|
1607
|
+
time_range_result = parse_time_range_string(time_range)
|
1608
|
+
start_time = time_range_result.start_time
|
1609
|
+
end_time = time_range_result.end_time
|
1610
|
+
interval = time_range_result.interval
|
1611
|
+
interval_seconds = time_range_result.interval_seconds
|
1612
|
+
|
1613
|
+
# 如果没有PostgreSQL配置,返回模拟数据
|
1614
|
+
if not conn.pg_config:
|
1615
|
+
# 生成模拟的排队任务数趋势
|
1616
|
+
data = []
|
1617
|
+
num_points = min(50, max(10, int((end_time - start_time).total_seconds() / interval_seconds)))
|
1618
|
+
|
1619
|
+
for i in range(num_points):
|
1620
|
+
timestamp = start_time + timedelta(seconds=i * interval_seconds)
|
1621
|
+
data.append({
|
1622
|
+
'time': timestamp.isoformat(),
|
1623
|
+
'value': max(0, 5 + int(2 * (0.5 - abs(i - num_points/2) / (num_points/2)))), # 模拟波动
|
1624
|
+
'metric': '排队任务数'
|
1625
|
+
})
|
1626
|
+
|
1627
|
+
return {"data": data, "granularity": "minute"}
|
1628
|
+
|
1629
|
+
async with await conn.get_pg_session() as session:
|
1630
|
+
# 从stream_backlog_monitor表获取数据,使用pending_count字段
|
1631
|
+
try:
|
1632
|
+
backlog_sql = text(f"""
|
1633
|
+
WITH time_series AS (
|
1634
|
+
SELECT to_timestamp(FLOOR(EXTRACT(epoch FROM ts) / {interval_seconds}) * {interval_seconds}) AS time_bucket
|
1635
|
+
FROM generate_series(
|
1636
|
+
:start_time ::timestamptz,
|
1637
|
+
:end_time ::timestamptz,
|
1638
|
+
:interval_val ::interval
|
1639
|
+
) AS ts
|
1640
|
+
),
|
1641
|
+
backlog_data AS (
|
1642
|
+
SELECT
|
1643
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM created_at) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
1644
|
+
-- 使用pending_count字段,它表示实际的待处理任务数
|
1645
|
+
-- 如果没有pending_count,则使用0
|
1646
|
+
MAX(COALESCE(pending_count, 0)) as max_pending
|
1647
|
+
FROM stream_backlog_monitor
|
1648
|
+
WHERE namespace = :namespace
|
1649
|
+
AND created_at >= :start_time
|
1650
|
+
AND created_at <= :end_time
|
1651
|
+
{f"AND stream_name = :queue_name" if queue_name else ""}
|
1652
|
+
GROUP BY 1
|
1653
|
+
)
|
1654
|
+
SELECT
|
1655
|
+
ts.time_bucket,
|
1656
|
+
COALESCE(bd.max_pending, 0) as pending_value
|
1657
|
+
FROM time_series ts
|
1658
|
+
LEFT JOIN backlog_data bd ON ts.time_bucket = bd.time_bucket
|
1659
|
+
ORDER BY ts.time_bucket
|
1660
|
+
""")
|
1661
|
+
|
1662
|
+
params = {
|
1663
|
+
'namespace': namespace,
|
1664
|
+
'start_time': start_time,
|
1665
|
+
'end_time': end_time,
|
1666
|
+
'interval_val': interval
|
1667
|
+
}
|
1668
|
+
if queue_name:
|
1669
|
+
params['queue_name'] = queue_name
|
1670
|
+
|
1671
|
+
result = await session.execute(backlog_sql, params)
|
1672
|
+
rows = result.fetchall()
|
1673
|
+
|
1674
|
+
# 直接使用pending_count数据,不需要检查是否有非零值
|
1675
|
+
data = []
|
1676
|
+
for idx, row in enumerate(rows):
|
1677
|
+
# 第0个和最后一个元素的value不能为null,其他的可以为null
|
1678
|
+
is_first_or_last = idx == 0 or idx == len(rows) - 1
|
1679
|
+
value = int(row.pending_value) if row.pending_value > 0 else (0 if is_first_or_last else None)
|
1680
|
+
|
1681
|
+
data.append({
|
1682
|
+
'time': row.time_bucket.isoformat(),
|
1683
|
+
'value': value,
|
1684
|
+
'metric': '排队任务数'
|
1685
|
+
})
|
1686
|
+
return {"data": data, "granularity": "minute"}
|
1687
|
+
|
1688
|
+
except Exception as e:
|
1689
|
+
logger.error(f"从stream_backlog_monitor获取数据失败: {e}")
|
1690
|
+
traceback.print_exc()
|
1691
|
+
raise HTTPException(status_code=500, detail=f"获取排队任务数据失败: {str(e)}")
|
1692
|
+
|
1693
|
+
except Exception as e:
|
1694
|
+
logger.error(f"获取队列积压趋势失败: {e}")
|
1695
|
+
traceback.print_exc()
|
1696
|
+
raise HTTPException(status_code=500, detail=str(e))
|
1697
|
+
|
1698
|
+
|
1699
|
+
@router.get("/task-creation-latency/{namespace}")
|
1700
|
+
async def get_task_creation_latency(
|
1701
|
+
namespace: str,
|
1702
|
+
time_range: str = "1h"
|
1703
|
+
):
|
1704
|
+
"""
|
1705
|
+
获取任务创建延时趋势数据
|
1706
|
+
|
1707
|
+
Args:
|
1708
|
+
namespace: 命名空间名称
|
1709
|
+
time_range: 时间范围(如'1h', '24h', '7d')
|
1710
|
+
"""
|
1711
|
+
try:
|
1712
|
+
conn = await data_access.manager.get_connection(namespace)
|
1713
|
+
|
1714
|
+
# 计算时间范围
|
1715
|
+
end_time = datetime.now(timezone.utc)
|
1716
|
+
if time_range.endswith('m'):
|
1717
|
+
minutes = int(time_range[:-1])
|
1718
|
+
start_time = end_time - timedelta(minutes=minutes)
|
1719
|
+
interval = '1 minute'
|
1720
|
+
interval_seconds = 60
|
1721
|
+
elif time_range.endswith('h'):
|
1722
|
+
hours = int(time_range[:-1])
|
1723
|
+
start_time = end_time - timedelta(hours=hours)
|
1724
|
+
interval = '5 minutes' if hours <= 6 else '10 minutes'
|
1725
|
+
interval_seconds = 300 if hours <= 6 else 600
|
1726
|
+
elif time_range.endswith('d'):
|
1727
|
+
days = int(time_range[:-1])
|
1728
|
+
start_time = end_time - timedelta(days=days)
|
1729
|
+
interval = '1 hour'
|
1730
|
+
interval_seconds = 3600
|
1731
|
+
else:
|
1732
|
+
start_time = end_time - timedelta(hours=1)
|
1733
|
+
interval = '1 minute'
|
1734
|
+
interval_seconds = 60
|
1735
|
+
|
1736
|
+
# 如果没有PostgreSQL配置,返回模拟数据
|
1737
|
+
if not conn.pg_config:
|
1738
|
+
data = []
|
1739
|
+
num_points = min(30, max(10, int((end_time - start_time).total_seconds() / interval_seconds)))
|
1740
|
+
|
1741
|
+
for i in range(num_points):
|
1742
|
+
timestamp = start_time + timedelta(seconds=i * interval_seconds)
|
1743
|
+
data.append({
|
1744
|
+
'time': timestamp.isoformat(),
|
1745
|
+
'value': 50 + 20 * (0.5 - abs((i - num_points/2) / (num_points/2))), # 模拟延时波动
|
1746
|
+
'type': '创建延时'
|
1747
|
+
})
|
1748
|
+
|
1749
|
+
return {"data": data, "granularity": "minute"}
|
1750
|
+
|
1751
|
+
async with await conn.get_pg_session() as session:
|
1752
|
+
# 计算任务创建延时(从提交到开始处理的时间)
|
1753
|
+
try:
|
1754
|
+
latency_sql = text(f"""
|
1755
|
+
WITH time_series AS (
|
1756
|
+
SELECT to_timestamp(FLOOR(EXTRACT(epoch FROM ts) / {interval_seconds}) * {interval_seconds}) AS time_bucket
|
1757
|
+
FROM generate_series(
|
1758
|
+
:start_time ::timestamptz,
|
1759
|
+
:end_time ::timestamptz,
|
1760
|
+
:interval_val ::interval
|
1761
|
+
) AS ts
|
1762
|
+
),
|
1763
|
+
task_latency AS (
|
1764
|
+
SELECT
|
1765
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.start_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
1766
|
+
AVG(EXTRACT(epoch FROM (tr.start_time - t.created_at))) as avg_latency_seconds
|
1767
|
+
FROM tasks t
|
1768
|
+
JOIN task_runs tr ON t.stream_id = tr.stream_id
|
1769
|
+
WHERE t.namespace = :namespace
|
1770
|
+
AND tr.start_time >= :start_time
|
1771
|
+
AND tr.start_time <= :end_time
|
1772
|
+
AND tr.start_time IS NOT NULL
|
1773
|
+
GROUP BY 1
|
1774
|
+
)
|
1775
|
+
SELECT
|
1776
|
+
ts.time_bucket,
|
1777
|
+
COALESCE(tl.avg_latency_seconds, 0) as latency_ms
|
1778
|
+
FROM time_series ts
|
1779
|
+
LEFT JOIN task_latency tl ON ts.time_bucket = tl.time_bucket
|
1780
|
+
ORDER BY ts.time_bucket
|
1781
|
+
""")
|
1782
|
+
|
1783
|
+
result = await session.execute(latency_sql, {
|
1784
|
+
'namespace': namespace,
|
1785
|
+
'start_time': start_time,
|
1786
|
+
'end_time': end_time,
|
1787
|
+
'interval_val': interval
|
1788
|
+
})
|
1789
|
+
|
1790
|
+
data = []
|
1791
|
+
for row in result.fetchall():
|
1792
|
+
# 转换为毫秒
|
1793
|
+
latency_ms = row.latency_ms * 1000 if row.latency_ms > 0 else None
|
1794
|
+
data.append({
|
1795
|
+
'time': row.time_bucket.isoformat(),
|
1796
|
+
'value': round(latency_ms, 1) if latency_ms else None,
|
1797
|
+
'type': '创建延时'
|
1798
|
+
})
|
1799
|
+
|
1800
|
+
return {"data": data, "granularity": "minute"}
|
1801
|
+
|
1802
|
+
except Exception as e:
|
1803
|
+
logger.error(f"获取任务创建延时失败: {e}")
|
1804
|
+
traceback.print_exc()
|
1805
|
+
raise HTTPException(status_code=500, detail=f"获取创建延时数据失败: {str(e)}")
|
1806
|
+
|
1807
|
+
except Exception as e:
|
1808
|
+
logger.error(f"获取任务创建延时失败: {e}")
|
1809
|
+
traceback.print_exc()
|
1810
|
+
raise HTTPException(status_code=500, detail=str(e))
|
1811
|
+
|
1812
|
+
|
1813
|
+
@router.get("/top-queues/{namespace}")
|
1814
|
+
async def get_top_queues(
|
1815
|
+
namespace: str,
|
1816
|
+
metric: str = Query("backlog", description="指标类型: backlog(积压) 或 error(错误率)"),
|
1817
|
+
limit: int = 10,
|
1818
|
+
time_range: str = "24h",
|
1819
|
+
queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
|
1820
|
+
):
|
1821
|
+
"""
|
1822
|
+
获取队列排行榜 - 支持积压和错误率两种指标
|
1823
|
+
|
1824
|
+
Args:
|
1825
|
+
namespace: 命名空间名称
|
1826
|
+
metric: 指标类型 (backlog/error)
|
1827
|
+
limit: 返回的队列数量限制
|
1828
|
+
time_range: 时间范围
|
1829
|
+
"""
|
1830
|
+
if metric == "backlog":
|
1831
|
+
return await _get_top_backlog_queues(namespace, limit, queues)
|
1832
|
+
elif metric == "error":
|
1833
|
+
return await _get_top_error_queues(namespace, limit, time_range, queues)
|
1834
|
+
else:
|
1835
|
+
raise HTTPException(status_code=400, detail=f"不支持的指标类型: {metric}")
|
1836
|
+
|
1837
|
+
|
1838
|
+
@router.get("/top-backlog-queues/{namespace}")
|
1839
|
+
async def get_top_backlog_queues(
|
1840
|
+
namespace: str,
|
1841
|
+
limit: int = 10,
|
1842
|
+
time_range: str = Query("1h", description="时间范围,如1h、24h、7d"),
|
1843
|
+
queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
|
1844
|
+
):
|
1845
|
+
"""
|
1846
|
+
获取积压最多的队列Top10 (已废弃,请使用 /top-queues/{namespace}?metric=backlog)
|
1847
|
+
|
1848
|
+
Args:
|
1849
|
+
namespace: 命名空间名称
|
1850
|
+
limit: 返回的队列数量限制
|
1851
|
+
time_range: 时间范围,如1h、24h、7d
|
1852
|
+
"""
|
1853
|
+
return await _get_top_backlog_queues(namespace, limit, time_range, queues)
|
1854
|
+
|
1855
|
+
|
1856
|
+
async def _get_top_backlog_queues(
|
1857
|
+
namespace: str,
|
1858
|
+
limit: int = 10,
|
1859
|
+
time_range: str = "1h",
|
1860
|
+
queues: Optional[str] = None
|
1861
|
+
):
|
1862
|
+
"""
|
1863
|
+
内部方法:获取积压最多的队列Top10
|
1864
|
+
"""
|
1865
|
+
try:
|
1866
|
+
conn = await data_access.manager.get_connection(namespace)
|
1867
|
+
|
1868
|
+
# 计算时间范围
|
1869
|
+
end_time = datetime.now(timezone.utc)
|
1870
|
+
if time_range.endswith('h'):
|
1871
|
+
hours = int(time_range[:-1])
|
1872
|
+
start_time = end_time - timedelta(hours=hours)
|
1873
|
+
elif time_range.endswith('d'):
|
1874
|
+
days = int(time_range[:-1])
|
1875
|
+
start_time = end_time - timedelta(days=days)
|
1876
|
+
else:
|
1877
|
+
# 默认1小时
|
1878
|
+
start_time = end_time - timedelta(hours=1)
|
1879
|
+
|
1880
|
+
# 如果没有PostgreSQL配置,返回空数据
|
1881
|
+
if not conn.pg_config:
|
1882
|
+
return {
|
1883
|
+
"success": True,
|
1884
|
+
"data": []
|
1885
|
+
}
|
1886
|
+
|
1887
|
+
async with await conn.get_pg_session() as session:
|
1888
|
+
try:
|
1889
|
+
# 处理队列筛选参数
|
1890
|
+
queue_list = []
|
1891
|
+
if queues:
|
1892
|
+
queue_list = [q.strip() for q in queues.split(',') if q.strip()]
|
1893
|
+
|
1894
|
+
# 优先从stream_backlog_monitor获取最新的积压数据
|
1895
|
+
# 使用backlog_unprocessed字段表示总积压(包括未投递和已投递未处理的消息)
|
1896
|
+
if queue_list:
|
1897
|
+
backlog_sql = text("""
|
1898
|
+
SELECT
|
1899
|
+
stream_name as queue,
|
1900
|
+
MAX(backlog_unprocessed) as backlog,
|
1901
|
+
CASE
|
1902
|
+
WHEN MAX(backlog_unprocessed) > 100 THEN 'critical'
|
1903
|
+
WHEN MAX(backlog_unprocessed) > 50 THEN 'warning'
|
1904
|
+
ELSE 'normal'
|
1905
|
+
END as status
|
1906
|
+
FROM stream_backlog_monitor
|
1907
|
+
WHERE namespace = :namespace
|
1908
|
+
AND created_at >= :start_time
|
1909
|
+
AND created_at <= :end_time
|
1910
|
+
AND stream_name = ANY(:queues)
|
1911
|
+
GROUP BY stream_name
|
1912
|
+
HAVING MAX(backlog_unprocessed) > 0
|
1913
|
+
ORDER BY backlog DESC
|
1914
|
+
LIMIT :limit
|
1915
|
+
""")
|
1916
|
+
|
1917
|
+
result = await session.execute(backlog_sql, {
|
1918
|
+
'namespace': namespace,
|
1919
|
+
'start_time': start_time,
|
1920
|
+
'end_time': end_time,
|
1921
|
+
'queues': queue_list,
|
1922
|
+
'limit': limit
|
1923
|
+
})
|
1924
|
+
else:
|
1925
|
+
backlog_sql = text("""
|
1926
|
+
SELECT
|
1927
|
+
stream_name as queue,
|
1928
|
+
MAX(backlog_unprocessed) as backlog,
|
1929
|
+
CASE
|
1930
|
+
WHEN MAX(backlog_unprocessed) > 100 THEN 'critical'
|
1931
|
+
WHEN MAX(backlog_unprocessed) > 50 THEN 'warning'
|
1932
|
+
ELSE 'normal'
|
1933
|
+
END as status
|
1934
|
+
FROM stream_backlog_monitor
|
1935
|
+
WHERE namespace = :namespace
|
1936
|
+
AND created_at >= :start_time
|
1937
|
+
AND created_at <= :end_time
|
1938
|
+
GROUP BY stream_name
|
1939
|
+
HAVING MAX(backlog_unprocessed) > 0
|
1940
|
+
ORDER BY backlog DESC
|
1941
|
+
LIMIT :limit
|
1942
|
+
""")
|
1943
|
+
|
1944
|
+
result = await session.execute(backlog_sql, {
|
1945
|
+
'namespace': namespace,
|
1946
|
+
'start_time': start_time,
|
1947
|
+
'end_time': end_time,
|
1948
|
+
'limit': limit
|
1949
|
+
})
|
1950
|
+
|
1951
|
+
backlog_queues = []
|
1952
|
+
for row in result.fetchall():
|
1953
|
+
backlog_queues.append({
|
1954
|
+
"queue": row.queue,
|
1955
|
+
"backlog": int(row.backlog),
|
1956
|
+
"status": row.status
|
1957
|
+
})
|
1958
|
+
|
1959
|
+
if backlog_queues:
|
1960
|
+
return {"success": True, "data": backlog_queues}
|
1961
|
+
|
1962
|
+
except Exception as e:
|
1963
|
+
logger.warning(f"从stream_backlog_monitor获取积压数据失败: {e}")
|
1964
|
+
traceback.print_exc()
|
1965
|
+
|
1966
|
+
# 如果没有积压监控数据,从tasks表统计pending任务
|
1967
|
+
try:
|
1968
|
+
task_sql = text("""
|
1969
|
+
SELECT
|
1970
|
+
t.queue,
|
1971
|
+
COUNT(DISTINCT t.stream_id) as backlog,
|
1972
|
+
CASE
|
1973
|
+
WHEN COUNT(DISTINCT t.stream_id) > 1000 THEN 'critical'
|
1974
|
+
WHEN COUNT(DISTINCT t.stream_id) > 500 THEN 'warning'
|
1975
|
+
ELSE 'normal'
|
1976
|
+
END as status
|
1977
|
+
FROM tasks t
|
1978
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
1979
|
+
WHERE t.namespace = :namespace
|
1980
|
+
AND (tr.stream_id IS NULL OR tr.status = 'pending')
|
1981
|
+
AND t.created_at > NOW() - INTERVAL '24 hour'
|
1982
|
+
GROUP BY t.queue
|
1983
|
+
ORDER BY backlog DESC
|
1984
|
+
LIMIT :limit
|
1985
|
+
""")
|
1986
|
+
|
1987
|
+
result = await session.execute(task_sql, {
|
1988
|
+
'namespace': namespace,
|
1989
|
+
'limit': limit
|
1990
|
+
})
|
1991
|
+
|
1992
|
+
backlog_queues = []
|
1993
|
+
for row in result.fetchall():
|
1994
|
+
backlog_queues.append({
|
1995
|
+
"queue": row.queue,
|
1996
|
+
"backlog": int(row.backlog),
|
1997
|
+
"status": row.status
|
1998
|
+
})
|
1999
|
+
|
2000
|
+
return {"success": True, "data": backlog_queues}
|
2001
|
+
|
2002
|
+
except Exception as e:
|
2003
|
+
logger.error(f"从tasks表获取积压数据失败: {e}")
|
2004
|
+
traceback.print_exc()
|
2005
|
+
raise HTTPException(status_code=500, detail=f"获取积压排行数据失败: {str(e)}")
|
2006
|
+
|
2007
|
+
except Exception as e:
|
2008
|
+
logger.error(f"获取积压排行失败: {e}")
|
2009
|
+
traceback.print_exc()
|
2010
|
+
raise HTTPException(status_code=500, detail=str(e))
|
2011
|
+
|
2012
|
+
|
2013
|
+
@router.get("/top-error-queues/{namespace}")
|
2014
|
+
async def get_top_error_queues(
|
2015
|
+
namespace: str,
|
2016
|
+
limit: int = 10,
|
2017
|
+
time_range: str = "24h",
|
2018
|
+
queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
|
2019
|
+
):
|
2020
|
+
"""
|
2021
|
+
获取错误率最高的队列Top10 (已废弃,请使用 /top-queues/{namespace}?metric=error)
|
2022
|
+
|
2023
|
+
Args:
|
2024
|
+
namespace: 命名空间名称
|
2025
|
+
limit: 返回的队列数量限制
|
2026
|
+
time_range: 时间范围
|
2027
|
+
"""
|
2028
|
+
return await _get_top_error_queues(namespace, limit, time_range, queues)
|
2029
|
+
|
2030
|
+
|
2031
|
+
async def _get_top_error_queues(
|
2032
|
+
namespace: str,
|
2033
|
+
limit: int = 10,
|
2034
|
+
time_range: str = "24h",
|
2035
|
+
queues: Optional[str] = None
|
2036
|
+
):
|
2037
|
+
"""
|
2038
|
+
内部方法:获取错误率最高的队列Top10
|
2039
|
+
"""
|
2040
|
+
try:
|
2041
|
+
conn = await data_access.manager.get_connection(namespace)
|
2042
|
+
|
2043
|
+
# 计算时间范围
|
2044
|
+
end_time = datetime.now(timezone.utc)
|
2045
|
+
if time_range.endswith('h'):
|
2046
|
+
hours = int(time_range[:-1])
|
2047
|
+
start_time = end_time - timedelta(hours=hours)
|
2048
|
+
elif time_range.endswith('d'):
|
2049
|
+
days = int(time_range[:-1])
|
2050
|
+
start_time = end_time - timedelta(days=days)
|
2051
|
+
else:
|
2052
|
+
start_time = end_time - timedelta(hours=24)
|
2053
|
+
|
2054
|
+
# 如果没有PostgreSQL配置,返回空数据
|
2055
|
+
if not conn.pg_config:
|
2056
|
+
return {
|
2057
|
+
"success": True,
|
2058
|
+
"data": []
|
2059
|
+
}
|
2060
|
+
|
2061
|
+
async with await conn.get_pg_session() as session:
|
2062
|
+
try:
|
2063
|
+
# 直接从task_runs表查询,因为只有执行过的任务才会有错误记录
|
2064
|
+
# 通过consumer_group提取队列名(格式通常是 namespace:QUEUE:queue_name:task_name)
|
2065
|
+
error_sql = text("""
|
2066
|
+
WITH queue_stats AS (
|
2067
|
+
SELECT
|
2068
|
+
CASE
|
2069
|
+
WHEN consumer_group LIKE :queue_prefix || '%'
|
2070
|
+
THEN SPLIT_PART(consumer_group, ':', 3)
|
2071
|
+
ELSE consumer_group
|
2072
|
+
END as queue,
|
2073
|
+
COUNT(DISTINCT stream_id) as total,
|
2074
|
+
COUNT(DISTINCT CASE WHEN status IN ('failed', 'error', 'timeout') THEN stream_id END) as failed
|
2075
|
+
FROM task_runs
|
2076
|
+
WHERE created_at >= :start_time
|
2077
|
+
AND created_at <= :end_time
|
2078
|
+
AND consumer_group LIKE :namespace_prefix || '%'
|
2079
|
+
GROUP BY 1
|
2080
|
+
)
|
2081
|
+
SELECT
|
2082
|
+
queue,
|
2083
|
+
total,
|
2084
|
+
failed,
|
2085
|
+
ROUND(failed * 100.0 / NULLIF(total, 0), 1) as error_rate
|
2086
|
+
FROM queue_stats
|
2087
|
+
WHERE failed > 0
|
2088
|
+
ORDER BY error_rate DESC, failed DESC
|
2089
|
+
LIMIT :limit
|
2090
|
+
""")
|
2091
|
+
|
2092
|
+
result = await session.execute(error_sql, {
|
2093
|
+
'namespace_prefix': f'{namespace}:',
|
2094
|
+
'queue_prefix': f'{namespace}:QUEUE:',
|
2095
|
+
'start_time': start_time,
|
2096
|
+
'end_time': end_time,
|
2097
|
+
'limit': limit
|
2098
|
+
})
|
2099
|
+
|
2100
|
+
error_queues = []
|
2101
|
+
for row in result.fetchall():
|
2102
|
+
error_queues.append({
|
2103
|
+
"queue": row.queue,
|
2104
|
+
"errorRate": str(row.error_rate or 0),
|
2105
|
+
"failed": int(row.failed),
|
2106
|
+
"total": int(row.total)
|
2107
|
+
})
|
2108
|
+
|
2109
|
+
return {"success": True, "data": error_queues}
|
2110
|
+
|
2111
|
+
except Exception as e:
|
2112
|
+
logger.error(f"获取错误率排行数据失败: {e}")
|
2113
|
+
traceback.print_exc()
|
2114
|
+
raise HTTPException(status_code=500, detail=f"获取错误率排行数据失败: {str(e)}")
|
2115
|
+
|
2116
|
+
except Exception as e:
|
2117
|
+
logger.error(f"获取错误率排行失败: {e}")
|
2118
|
+
traceback.print_exc()
|
2119
|
+
raise HTTPException(status_code=500, detail=str(e))
|
2120
|
+
|
2121
|
+
|
2122
|
+
@router.get("/task-concurrency-trend/{namespace}")
|
2123
|
+
async def get_task_concurrency_trend(
|
2124
|
+
namespace: str,
|
2125
|
+
time_range: str = "1h"
|
2126
|
+
):
|
2127
|
+
"""
|
2128
|
+
获取任务执行数量趋势数据(每个时间间隔内开始执行的任务数量)
|
2129
|
+
|
2130
|
+
Args:
|
2131
|
+
namespace: 命名空间名称
|
2132
|
+
time_range: 时间范围(如'1h', '24h', '7d')
|
2133
|
+
"""
|
2134
|
+
try:
|
2135
|
+
conn = await data_access.manager.get_connection(namespace)
|
2136
|
+
|
2137
|
+
# 计算时间范围
|
2138
|
+
end_time = datetime.now(timezone.utc)
|
2139
|
+
if time_range.endswith('m'):
|
2140
|
+
minutes = int(time_range[:-1])
|
2141
|
+
start_time = end_time - timedelta(minutes=minutes)
|
2142
|
+
interval = '1 minute'
|
2143
|
+
interval_seconds = 60
|
2144
|
+
elif time_range.endswith('h'):
|
2145
|
+
hours = int(time_range[:-1])
|
2146
|
+
start_time = end_time - timedelta(hours=hours)
|
2147
|
+
interval = '5 minutes' if hours <= 6 else '10 minutes'
|
2148
|
+
interval_seconds = 300 if hours <= 6 else 600
|
2149
|
+
elif time_range.endswith('d'):
|
2150
|
+
days = int(time_range[:-1])
|
2151
|
+
start_time = end_time - timedelta(days=days)
|
2152
|
+
interval = '1 hour'
|
2153
|
+
interval_seconds = 3600
|
2154
|
+
else:
|
2155
|
+
start_time = end_time - timedelta(hours=1)
|
2156
|
+
interval = '1 minute'
|
2157
|
+
interval_seconds = 60
|
2158
|
+
|
2159
|
+
async with await conn.get_pg_session() as session:
|
2160
|
+
try:
|
2161
|
+
# 计算每个时间间隔内执行的任务数量(更实用的指标)
|
2162
|
+
concurrency_sql = text(f"""
|
2163
|
+
WITH time_series AS (
|
2164
|
+
SELECT
|
2165
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM ts) / {interval_seconds}) * {interval_seconds}) AS time_bucket_start,
|
2166
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM ts) / {interval_seconds}) * {interval_seconds}) + INTERVAL '{interval_seconds} seconds' AS time_bucket_end
|
2167
|
+
FROM generate_series(
|
2168
|
+
:start_time ::timestamptz,
|
2169
|
+
:end_time ::timestamptz,
|
2170
|
+
:interval_val ::interval
|
2171
|
+
) AS ts
|
2172
|
+
)
|
2173
|
+
SELECT
|
2174
|
+
ts.time_bucket_start as time_bucket,
|
2175
|
+
COUNT(tr.stream_id) as concurrent_count
|
2176
|
+
FROM time_series ts
|
2177
|
+
LEFT JOIN task_runs tr ON (
|
2178
|
+
EXISTS (
|
2179
|
+
SELECT 1 FROM tasks t
|
2180
|
+
WHERE t.stream_id = tr.stream_id
|
2181
|
+
AND t.namespace = :namespace
|
2182
|
+
)
|
2183
|
+
AND tr.start_time IS NOT NULL
|
2184
|
+
AND tr.start_time >= ts.time_bucket_start
|
2185
|
+
AND tr.start_time < ts.time_bucket_end
|
2186
|
+
)
|
2187
|
+
GROUP BY ts.time_bucket_start
|
2188
|
+
ORDER BY ts.time_bucket_start
|
2189
|
+
""")
|
2190
|
+
|
2191
|
+
result = await session.execute(concurrency_sql, {
|
2192
|
+
'namespace': namespace,
|
2193
|
+
'start_time': start_time,
|
2194
|
+
'end_time': end_time,
|
2195
|
+
'interval_val': interval
|
2196
|
+
})
|
2197
|
+
|
2198
|
+
data = []
|
2199
|
+
for row in result.fetchall():
|
2200
|
+
concurrent_count = row.concurrent_count or 0
|
2201
|
+
data.append({
|
2202
|
+
'time': row.time_bucket.isoformat(),
|
2203
|
+
'value': int(concurrent_count),
|
2204
|
+
'type': '执行数量'
|
2205
|
+
})
|
2206
|
+
|
2207
|
+
return {"data": data, "granularity": "minute"}
|
2208
|
+
|
2209
|
+
except Exception as e:
|
2210
|
+
logger.error(f"获取任务并发数据失败: {e}")
|
2211
|
+
traceback.print_exc()
|
2212
|
+
# 如果查询失败,从Redis获取当前并发数作为静态数据
|
2213
|
+
try:
|
2214
|
+
redis_client = await conn.get_redis_client()
|
2215
|
+
|
2216
|
+
# 统计当前正在处理的任务数
|
2217
|
+
worker_pattern = f"{conn.redis_prefix}:WORKER:*"
|
2218
|
+
current_concurrent = 0
|
2219
|
+
|
2220
|
+
async for worker_key in redis_client.scan_iter(match=worker_pattern):
|
2221
|
+
worker_info = await redis_client.hgetall(worker_key)
|
2222
|
+
if worker_info and worker_info.get('status') == 'busy':
|
2223
|
+
current_concurrent += 1
|
2224
|
+
|
2225
|
+
await redis_client.aclose()
|
2226
|
+
|
2227
|
+
# 生成静态数据点
|
2228
|
+
data = []
|
2229
|
+
num_points = min(20, max(5, int((end_time - start_time).total_seconds() / interval_seconds)))
|
2230
|
+
|
2231
|
+
for i in range(num_points):
|
2232
|
+
timestamp = start_time + timedelta(seconds=i * interval_seconds)
|
2233
|
+
data.append({
|
2234
|
+
'time': timestamp.isoformat(),
|
2235
|
+
'value': current_concurrent if i == num_points - 1 else None,
|
2236
|
+
'type': '并发数'
|
2237
|
+
})
|
2238
|
+
|
2239
|
+
return {"data": data, "granularity": "minute"}
|
2240
|
+
|
2241
|
+
except Exception as redis_error:
|
2242
|
+
logger.error(f"从Redis获取并发数据也失败: {redis_error}")
|
2243
|
+
raise HTTPException(status_code=500, detail=f"获取并发数据失败: {str(e)}")
|
2244
|
+
|
2245
|
+
except Exception as e:
|
2246
|
+
logger.error(f"获取任务并发趋势失败: {e}")
|
2247
|
+
traceback.print_exc()
|
2248
|
+
raise HTTPException(status_code=500, detail=str(e))
|
2249
|
+
|
2250
|
+
|
2251
|
+
@router.post("/tasks/{namespace}")
|
2252
|
+
async def get_namespace_tasks(namespace: str, request: Request):
|
2253
|
+
"""
|
2254
|
+
获取指定命名空间中队列的任务列表
|
2255
|
+
|
2256
|
+
Args:
|
2257
|
+
namespace: 命名空间名称
|
2258
|
+
request: 请求体,包含queue_name、分页、筛选等参数
|
2259
|
+
"""
|
2260
|
+
try:
|
2261
|
+
# 解析请求体
|
2262
|
+
body = await request.json()
|
2263
|
+
queue_name = body.get('queue_name')
|
2264
|
+
page = body.get('page', 1)
|
2265
|
+
page_size = body.get('page_size', 20)
|
2266
|
+
filters = body.get('filters', [])
|
2267
|
+
sort_field = body.get('sort_field')
|
2268
|
+
sort_order = body.get('sort_order', 'desc')
|
2269
|
+
|
2270
|
+
# 处理时间范围参数
|
2271
|
+
start_time = body.get('start_time')
|
2272
|
+
end_time = body.get('end_time')
|
2273
|
+
time_range = body.get('time_range')
|
2274
|
+
|
2275
|
+
if not queue_name:
|
2276
|
+
raise HTTPException(status_code=400, detail="queue_name is required")
|
2277
|
+
|
2278
|
+
# 获取命名空间连接
|
2279
|
+
conn = await data_access.manager.get_connection(namespace)
|
2280
|
+
|
2281
|
+
# 如果没有PostgreSQL配置,返回空数据
|
2282
|
+
if not conn.pg_config:
|
2283
|
+
return {
|
2284
|
+
"success": True,
|
2285
|
+
"data": [],
|
2286
|
+
"total": 0,
|
2287
|
+
"page": page,
|
2288
|
+
"page_size": page_size
|
2289
|
+
}
|
2290
|
+
|
2291
|
+
# 如果提供了时间范围,计算起止时间
|
2292
|
+
if not start_time or not end_time:
|
2293
|
+
if time_range and time_range != 'custom':
|
2294
|
+
now = datetime.now(timezone.utc)
|
2295
|
+
time_range_map = {
|
2296
|
+
"15m": timedelta(minutes=15),
|
2297
|
+
"30m": timedelta(minutes=30),
|
2298
|
+
"1h": timedelta(hours=1),
|
2299
|
+
"3h": timedelta(hours=3),
|
2300
|
+
"6h": timedelta(hours=6),
|
2301
|
+
"12h": timedelta(hours=12),
|
2302
|
+
"24h": timedelta(hours=24),
|
2303
|
+
"7d": timedelta(days=7),
|
2304
|
+
"30d": timedelta(days=30),
|
2305
|
+
}
|
2306
|
+
|
2307
|
+
delta = time_range_map.get(time_range)
|
2308
|
+
if delta:
|
2309
|
+
end_time = now
|
2310
|
+
start_time = end_time - delta
|
2311
|
+
|
2312
|
+
# 如果有时间范围,将其转换为datetime对象
|
2313
|
+
if start_time and isinstance(start_time, str):
|
2314
|
+
start_time = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
|
2315
|
+
if end_time and isinstance(end_time, str):
|
2316
|
+
end_time = datetime.fromisoformat(end_time.replace('Z', '+00:00'))
|
2317
|
+
|
2318
|
+
# 从PostgreSQL查询任务数据
|
2319
|
+
async with await conn.get_pg_session() as session:
|
2320
|
+
# 构建基础查询(支持基础队列名和优先级队列)
|
2321
|
+
query_conditions = ["t.namespace = :namespace", "(t.queue = :queue_name OR t.queue LIKE :queue_pattern)"]
|
2322
|
+
query_params = {
|
2323
|
+
'namespace': namespace,
|
2324
|
+
'queue_name': queue_name,
|
2325
|
+
'queue_pattern': f'{queue_name}:%' # 匹配所有优先级队列
|
2326
|
+
}
|
2327
|
+
|
2328
|
+
# 添加时间范围条件
|
2329
|
+
if start_time:
|
2330
|
+
query_conditions.append("t.created_at >= :start_time")
|
2331
|
+
query_params['start_time'] = start_time
|
2332
|
+
if end_time:
|
2333
|
+
query_conditions.append("t.created_at <= :end_time")
|
2334
|
+
query_params['end_time'] = end_time
|
2335
|
+
|
2336
|
+
# 添加筛选条件
|
2337
|
+
for filter_item in filters:
|
2338
|
+
# 跳过被禁用的筛选条件
|
2339
|
+
if filter_item.get('enabled') == False:
|
2340
|
+
continue
|
2341
|
+
|
2342
|
+
field = filter_item.get('field')
|
2343
|
+
operator = filter_item.get('operator')
|
2344
|
+
value = filter_item.get('value')
|
2345
|
+
|
2346
|
+
if not field or not operator:
|
2347
|
+
continue
|
2348
|
+
|
2349
|
+
# 特殊处理id字段(映射到stream_id)
|
2350
|
+
if field == 'id':
|
2351
|
+
actual_field = 'stream_id'
|
2352
|
+
table_prefix = 't.'
|
2353
|
+
elif field == 'scheduled_task_id':
|
2354
|
+
# scheduled_task_id特殊处理,需要转换为字符串
|
2355
|
+
actual_field = field
|
2356
|
+
table_prefix = 't.'
|
2357
|
+
# 将值转换为字符串
|
2358
|
+
if operator == 'eq':
|
2359
|
+
query_conditions.append(f"{table_prefix}{actual_field} = :{field}")
|
2360
|
+
query_params[field] = str(value)
|
2361
|
+
elif operator == 'ne':
|
2362
|
+
query_conditions.append(f"{table_prefix}{actual_field} != :{field}")
|
2363
|
+
query_params[field] = str(value)
|
2364
|
+
elif operator == 'contains':
|
2365
|
+
query_conditions.append(f"{table_prefix}{actual_field} LIKE :{field}")
|
2366
|
+
query_params[field] = f'%{str(value)}%'
|
2367
|
+
continue # 跳过后续的通用处理
|
2368
|
+
else:
|
2369
|
+
actual_field = field
|
2370
|
+
# 根据字段决定使用哪个表的别名
|
2371
|
+
table_prefix = 't.'
|
2372
|
+
if field in ['status', 'task_name', 'worker_id', 'consumer_group']:
|
2373
|
+
table_prefix = 'tr.'
|
2374
|
+
elif field in ['queue', 'namespace', 'stream_id']:
|
2375
|
+
table_prefix = 't.'
|
2376
|
+
|
2377
|
+
# 简单的相等筛选
|
2378
|
+
if operator == 'eq':
|
2379
|
+
# 对于status字段的特殊处理
|
2380
|
+
if field == 'status' and value == 'pending':
|
2381
|
+
# pending状态:task_runs中没有记录(status为NULL)或status='pending'
|
2382
|
+
query_conditions.append(f"({table_prefix}{actual_field} IS NULL OR {table_prefix}{actual_field} = :{field})")
|
2383
|
+
query_params[field] = value
|
2384
|
+
else:
|
2385
|
+
query_conditions.append(f"{table_prefix}{actual_field} = :{field}")
|
2386
|
+
query_params[field] = value
|
2387
|
+
elif operator == 'ne':
|
2388
|
+
# 对于status字段的特殊处理,需要包含NULL值
|
2389
|
+
if field == 'status':
|
2390
|
+
# 使用COALESCE处理NULL值,将NULL视为'pending'
|
2391
|
+
query_conditions.append(f"(COALESCE({table_prefix}{actual_field}, 'pending') != :{field})")
|
2392
|
+
query_params[field] = value
|
2393
|
+
else:
|
2394
|
+
query_conditions.append(f"{table_prefix}{actual_field} != :{field}")
|
2395
|
+
query_params[field] = value
|
2396
|
+
elif operator == 'contains':
|
2397
|
+
query_conditions.append(f"{table_prefix}{actual_field} LIKE :{field}")
|
2398
|
+
query_params[field] = f'%{value}%'
|
2399
|
+
|
2400
|
+
# 构建WHERE子句
|
2401
|
+
where_clause = " AND ".join(query_conditions)
|
2402
|
+
|
2403
|
+
# 计算总数(需要JOIN因为WHERE条件可能涉及task_runs表)
|
2404
|
+
count_sql = text(f"""
|
2405
|
+
SELECT COUNT(DISTINCT t.stream_id)
|
2406
|
+
FROM tasks t
|
2407
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
2408
|
+
WHERE {where_clause}
|
2409
|
+
""")
|
2410
|
+
total_result = await session.execute(count_sql, query_params)
|
2411
|
+
total = total_result.scalar()
|
2412
|
+
|
2413
|
+
# 构建排序子句
|
2414
|
+
order_clause = "t.created_at DESC" # 默认排序
|
2415
|
+
if sort_field:
|
2416
|
+
# 根据字段添加正确的表别名
|
2417
|
+
if sort_field in ['created_at', 'queue', 'stream_id']:
|
2418
|
+
order_clause = f"t.{sort_field} {sort_order.upper()}"
|
2419
|
+
elif sort_field in ['status', 'task_name', 'worker_id', 'consumer_group', 'started_at', 'completed_at']:
|
2420
|
+
order_clause = f"tr.{sort_field} {sort_order.upper()}"
|
2421
|
+
else:
|
2422
|
+
order_clause = f"{sort_field} {sort_order.upper()}"
|
2423
|
+
|
2424
|
+
# 查询任务列表(连接tasks和task_runs表)
|
2425
|
+
offset = (page - 1) * page_size
|
2426
|
+
query_sql = text(f"""
|
2427
|
+
SELECT
|
2428
|
+
t.stream_id as id,
|
2429
|
+
COALESCE(tr.status, 'pending') as status,
|
2430
|
+
COALESCE(tr.task_name, t.payload->>'task_name', 'unknown') as task_name,
|
2431
|
+
t.queue as queue_name,
|
2432
|
+
tr.consumer_group,
|
2433
|
+
tr.worker_id,
|
2434
|
+
t.created_at,
|
2435
|
+
tr.start_time as started_at,
|
2436
|
+
tr.end_time as completed_at,
|
2437
|
+
tr.duration,
|
2438
|
+
tr.execution_time,
|
2439
|
+
t.payload as task_data,
|
2440
|
+
tr.result,
|
2441
|
+
tr.error_message,
|
2442
|
+
tr.retry_count
|
2443
|
+
FROM tasks t
|
2444
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
2445
|
+
WHERE {where_clause}
|
2446
|
+
ORDER BY {order_clause}
|
2447
|
+
LIMIT :limit OFFSET :offset
|
2448
|
+
""")
|
2449
|
+
|
2450
|
+
query_params['limit'] = page_size
|
2451
|
+
query_params['offset'] = offset
|
2452
|
+
|
2453
|
+
result = await session.execute(query_sql, query_params)
|
2454
|
+
|
2455
|
+
# 格式化数据
|
2456
|
+
tasks = []
|
2457
|
+
for row in result:
|
2458
|
+
# 使用error_message字段或从result中提取错误信息
|
2459
|
+
error = row.error_message
|
2460
|
+
retry_count = row.retry_count if row.retry_count else 0
|
2461
|
+
|
2462
|
+
if not error and row.result:
|
2463
|
+
try:
|
2464
|
+
import json
|
2465
|
+
result_data = json.loads(row.result) if isinstance(row.result, str) else row.result
|
2466
|
+
if isinstance(result_data, dict):
|
2467
|
+
error = result_data.get('error')
|
2468
|
+
except:
|
2469
|
+
pass
|
2470
|
+
|
2471
|
+
task = {
|
2472
|
+
'id': row.id,
|
2473
|
+
'status': row.status,
|
2474
|
+
'task_name': row.task_name, # 改为task_name以匹配前端
|
2475
|
+
'queue': row.queue_name,
|
2476
|
+
'consumer_group': row.consumer_group if row.consumer_group else '-', # 添加消费者组字段
|
2477
|
+
'worker_id': row.worker_id,
|
2478
|
+
'created_at': row.created_at.isoformat() if row.created_at else None,
|
2479
|
+
'started_at': row.started_at.isoformat() if row.started_at else None,
|
2480
|
+
'completed_at': row.completed_at.isoformat() if row.completed_at else None,
|
2481
|
+
'error': error,
|
2482
|
+
'retry_count': retry_count
|
2483
|
+
}
|
2484
|
+
|
2485
|
+
# 执行时间(从开始到完成)- 直接使用秒数值
|
2486
|
+
if row.execution_time is not None:
|
2487
|
+
task['execution_time'] = row.execution_time # 返回数字类型,前端会格式化
|
2488
|
+
else:
|
2489
|
+
task['execution_time'] = None
|
2490
|
+
|
2491
|
+
|
2492
|
+
# 计算总耗时(从创建到完成)
|
2493
|
+
if row.created_at and row.completed_at:
|
2494
|
+
duration = (row.completed_at - row.created_at).total_seconds()
|
2495
|
+
task['duration'] = duration # 返回数字类型,前端会格式化
|
2496
|
+
elif row.created_at and row.status == 'running':
|
2497
|
+
# 如果任务还在运行,计算从创建到现在的时间
|
2498
|
+
duration = (datetime.now(timezone.utc) - row.created_at).total_seconds()
|
2499
|
+
task['duration'] = duration
|
2500
|
+
else:
|
2501
|
+
task['duration'] = None
|
2502
|
+
|
2503
|
+
tasks.append(task)
|
2504
|
+
|
2505
|
+
return {
|
2506
|
+
"success": True,
|
2507
|
+
"data": tasks,
|
2508
|
+
"total": total,
|
2509
|
+
"page": page,
|
2510
|
+
"page_size": page_size
|
2511
|
+
}
|
2512
|
+
|
2513
|
+
except Exception as e:
|
2514
|
+
import traceback
|
2515
|
+
traceback.print_exc()
|
2516
|
+
logger.error(f"获取任务列表失败: {e}")
|
2517
|
+
|
2518
|
+
await handle_database_connection_error(e, namespace, "获取任务列表")
|
2519
|
+
|
2520
|
+
|
2521
|
+
@router.get("/message-offset-trend/{namespace}")
|
2522
|
+
async def get_message_offset_trend(
|
2523
|
+
namespace: str,
|
2524
|
+
time_range: str = "1h"
|
2525
|
+
):
|
2526
|
+
"""
|
2527
|
+
获取消息offset趋势数据(投递和确认进度)
|
2528
|
+
|
2529
|
+
Args:
|
2530
|
+
namespace: 命名空间名称
|
2531
|
+
time_range: 时间范围(如'1h', '24h', '7d')
|
2532
|
+
"""
|
2533
|
+
try:
|
2534
|
+
conn = await data_access.manager.get_connection(namespace)
|
2535
|
+
|
2536
|
+
# 计算时间范围
|
2537
|
+
end_time = datetime.now(timezone.utc)
|
2538
|
+
if time_range.endswith('m'):
|
2539
|
+
minutes = int(time_range[:-1])
|
2540
|
+
start_time = end_time - timedelta(minutes=minutes)
|
2541
|
+
interval = '1 minute'
|
2542
|
+
interval_seconds = 60
|
2543
|
+
elif time_range.endswith('h'):
|
2544
|
+
hours = int(time_range[:-1])
|
2545
|
+
start_time = end_time - timedelta(hours=hours)
|
2546
|
+
interval = '5 minutes' if hours <= 6 else '10 minutes'
|
2547
|
+
interval_seconds = 300 if hours <= 6 else 600
|
2548
|
+
elif time_range.endswith('d'):
|
2549
|
+
days = int(time_range[:-1])
|
2550
|
+
start_time = end_time - timedelta(days=days)
|
2551
|
+
interval = '1 hour'
|
2552
|
+
interval_seconds = 3600
|
2553
|
+
else:
|
2554
|
+
start_time = end_time - timedelta(hours=1)
|
2555
|
+
interval = '1 minute'
|
2556
|
+
interval_seconds = 60
|
2557
|
+
|
2558
|
+
# 如果没有PostgreSQL配置,返回模拟数据
|
2559
|
+
if not conn.pg_config:
|
2560
|
+
data = []
|
2561
|
+
num_points = min(30, max(10, int((end_time - start_time).total_seconds() / interval_seconds)))
|
2562
|
+
|
2563
|
+
for i in range(num_points):
|
2564
|
+
timestamp = start_time + timedelta(seconds=i * interval_seconds)
|
2565
|
+
base_offset = 1000 + i * 10
|
2566
|
+
data.extend([
|
2567
|
+
{
|
2568
|
+
'time': timestamp.isoformat(),
|
2569
|
+
'value': base_offset + 10,
|
2570
|
+
'type': '已发布Offset'
|
2571
|
+
},
|
2572
|
+
{
|
2573
|
+
'time': timestamp.isoformat(),
|
2574
|
+
'value': base_offset + 5,
|
2575
|
+
'type': '已投递Offset'
|
2576
|
+
},
|
2577
|
+
{
|
2578
|
+
'time': timestamp.isoformat(),
|
2579
|
+
'value': base_offset,
|
2580
|
+
'type': '已确认Offset'
|
2581
|
+
}
|
2582
|
+
])
|
2583
|
+
|
2584
|
+
return {"data": data, "granularity": "minute"}
|
2585
|
+
|
2586
|
+
async with await conn.get_pg_session() as session:
|
2587
|
+
try:
|
2588
|
+
# 从stream_backlog_monitor表获取offset进度的时间序列数据
|
2589
|
+
offset_sql = text(f"""
|
2590
|
+
WITH time_series AS (
|
2591
|
+
SELECT to_timestamp(FLOOR(EXTRACT(epoch FROM ts) / {interval_seconds}) * {interval_seconds}) AS time_bucket
|
2592
|
+
FROM generate_series(
|
2593
|
+
:start_time ::timestamptz,
|
2594
|
+
:end_time ::timestamptz,
|
2595
|
+
:interval_val ::interval
|
2596
|
+
) AS ts
|
2597
|
+
),
|
2598
|
+
offset_aggregated AS (
|
2599
|
+
SELECT
|
2600
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM created_at) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
2601
|
+
MAX(last_delivered_offset) as max_delivered_offset,
|
2602
|
+
MAX(last_acked_offset) as max_acked_offset,
|
2603
|
+
MAX(last_published_offset) as max_published_offset
|
2604
|
+
FROM stream_backlog_monitor
|
2605
|
+
WHERE namespace = :namespace
|
2606
|
+
AND created_at >= :start_time
|
2607
|
+
AND created_at <= :end_time
|
2608
|
+
GROUP BY time_bucket
|
2609
|
+
)
|
2610
|
+
SELECT
|
2611
|
+
ts.time_bucket,
|
2612
|
+
COALESCE(oa.max_delivered_offset, 0) as delivered_offset,
|
2613
|
+
COALESCE(oa.max_acked_offset, 0) as acked_offset,
|
2614
|
+
COALESCE(oa.max_published_offset, 0) as published_offset
|
2615
|
+
FROM time_series ts
|
2616
|
+
LEFT JOIN offset_aggregated oa ON ts.time_bucket = oa.time_bucket
|
2617
|
+
ORDER BY ts.time_bucket
|
2618
|
+
""")
|
2619
|
+
|
2620
|
+
result = await session.execute(offset_sql, {
|
2621
|
+
'namespace': namespace,
|
2622
|
+
'start_time': start_time,
|
2623
|
+
'end_time': end_time,
|
2624
|
+
'interval_val': interval
|
2625
|
+
})
|
2626
|
+
|
2627
|
+
data = []
|
2628
|
+
for row in result.fetchall():
|
2629
|
+
timestamp = row.time_bucket.isoformat()
|
2630
|
+
|
2631
|
+
# 添加已发布offset数据
|
2632
|
+
data.append({
|
2633
|
+
'time': timestamp,
|
2634
|
+
'value': int(row.published_offset),
|
2635
|
+
'type': '已发布Offset'
|
2636
|
+
})
|
2637
|
+
|
2638
|
+
# 添加已投递offset数据
|
2639
|
+
data.append({
|
2640
|
+
'time': timestamp,
|
2641
|
+
'value': int(row.delivered_offset),
|
2642
|
+
'type': '已投递Offset'
|
2643
|
+
})
|
2644
|
+
|
2645
|
+
# 添加已确认offset数据
|
2646
|
+
data.append({
|
2647
|
+
'time': timestamp,
|
2648
|
+
'value': int(row.acked_offset),
|
2649
|
+
'type': '已确认Offset'
|
2650
|
+
})
|
2651
|
+
|
2652
|
+
return {"data": data, "granularity": "minute"}
|
2653
|
+
|
2654
|
+
except Exception as e:
|
2655
|
+
logger.error(f"查询offset趋势数据失败: {e}")
|
2656
|
+
traceback.print_exc()
|
2657
|
+
# 返回模拟数据
|
2658
|
+
data = []
|
2659
|
+
num_points = min(20, max(10, int((end_time - start_time).total_seconds() / interval_seconds)))
|
2660
|
+
|
2661
|
+
for i in range(num_points):
|
2662
|
+
timestamp = start_time + timedelta(seconds=i * interval_seconds)
|
2663
|
+
base_offset = 1000 + i * 10
|
2664
|
+
data.extend([
|
2665
|
+
{
|
2666
|
+
'time': timestamp.isoformat(),
|
2667
|
+
'value': base_offset + 10,
|
2668
|
+
'type': '已发布Offset'
|
2669
|
+
},
|
2670
|
+
{
|
2671
|
+
'time': timestamp.isoformat(),
|
2672
|
+
'value': base_offset + 5,
|
2673
|
+
'type': '已投递Offset'
|
2674
|
+
},
|
2675
|
+
{
|
2676
|
+
'time': timestamp.isoformat(),
|
2677
|
+
'value': base_offset,
|
2678
|
+
'type': '已确认Offset'
|
2679
|
+
}
|
2680
|
+
])
|
2681
|
+
|
2682
|
+
return {"data": data, "granularity": "minute"}
|
2683
|
+
|
2684
|
+
except Exception as e:
|
2685
|
+
logger.error(f"获取消息offset趋势失败: {e}")
|
2686
|
+
traceback.print_exc()
|
2687
|
+
raise HTTPException(status_code=500, detail=str(e))
|
2688
|
+
|
2689
|
+
|
2690
|
+
@router.post("/dashboard-overview-stats/{namespace}")
|
2691
|
+
async def get_dashboard_overview_stats(namespace: str, query: TimeRangeQuery):
|
2692
|
+
"""
|
2693
|
+
获取概览页面的统一统计数据
|
2694
|
+
包含:任务处理趋势、任务并发数量、任务处理时间、任务执行延时
|
2695
|
+
|
2696
|
+
Args:
|
2697
|
+
namespace: 命名空间名称
|
2698
|
+
query: 时间范围查询参数
|
2699
|
+
|
2700
|
+
Returns:
|
2701
|
+
统一的时间序列数据,包含所有概览图表需要的指标和granularity字段
|
2702
|
+
"""
|
2703
|
+
try:
|
2704
|
+
conn = await data_access.manager.get_connection(namespace)
|
2705
|
+
|
2706
|
+
# 如果没有PostgreSQL配置,返回空数据
|
2707
|
+
if not conn.pg_config:
|
2708
|
+
return {
|
2709
|
+
"task_trend": [],
|
2710
|
+
"concurrency": [],
|
2711
|
+
"processing_time": [],
|
2712
|
+
"creation_latency": [],
|
2713
|
+
"granularity": "minute"
|
2714
|
+
}
|
2715
|
+
|
2716
|
+
# 使用公共工具函数处理时间范围
|
2717
|
+
time_range_result = parse_time_range_query(query)
|
2718
|
+
start_time = time_range_result.start_time
|
2719
|
+
end_time = time_range_result.end_time
|
2720
|
+
interval = time_range_result.interval
|
2721
|
+
interval_seconds = time_range_result.interval_seconds
|
2722
|
+
granularity = time_range_result.granularity
|
2723
|
+
|
2724
|
+
# 构建队列筛选条件
|
2725
|
+
queues_str = ','.join(query.queues) if query.queues else None
|
2726
|
+
queue_filter, queue_list, queue_params = build_queue_filter_and_params(queues_str)
|
2727
|
+
print(f'🔍 Dashboard Overview - 收到队列参数: {query.queues}')
|
2728
|
+
print(f'🔍 Dashboard Overview - SQL筛选条件: {queue_filter}')
|
2729
|
+
|
2730
|
+
async with await conn.get_pg_session() as session:
|
2731
|
+
# 统一查询所有概览页面需要的数据
|
2732
|
+
sql = text(f"""
|
2733
|
+
WITH time_series AS (
|
2734
|
+
SELECT to_timestamp(FLOOR(EXTRACT(epoch FROM ts) / {interval_seconds}) * {interval_seconds}) AS time_bucket
|
2735
|
+
FROM generate_series(
|
2736
|
+
:start_time ::timestamptz,
|
2737
|
+
:end_time ::timestamptz + INTERVAL '{interval_seconds} seconds',
|
2738
|
+
:interval_val ::interval
|
2739
|
+
) AS ts
|
2740
|
+
),
|
2741
|
+
enqueue_counts AS (
|
2742
|
+
SELECT
|
2743
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM t.created_at) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
2744
|
+
COUNT(DISTINCT t.stream_id) as enqueued
|
2745
|
+
FROM tasks t
|
2746
|
+
WHERE t.namespace = :namespace
|
2747
|
+
AND t.created_at >= :start_time
|
2748
|
+
AND t.created_at <= :end_time
|
2749
|
+
{queue_filter}
|
2750
|
+
GROUP BY time_bucket
|
2751
|
+
),
|
2752
|
+
complete_counts AS (
|
2753
|
+
SELECT
|
2754
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.end_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
2755
|
+
COUNT(DISTINCT t.stream_id) as completed
|
2756
|
+
FROM task_runs tr
|
2757
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
2758
|
+
WHERE t.namespace = :namespace
|
2759
|
+
AND tr.end_time >= :start_time
|
2760
|
+
AND tr.end_time <= :end_time
|
2761
|
+
AND tr.status = 'success'
|
2762
|
+
-- 只统计在时间范围内创建的任务的完成情况
|
2763
|
+
AND t.created_at >= :start_time
|
2764
|
+
AND t.created_at <= :end_time
|
2765
|
+
{queue_filter}
|
2766
|
+
GROUP BY time_bucket
|
2767
|
+
),
|
2768
|
+
failed_counts AS (
|
2769
|
+
SELECT
|
2770
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.end_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
2771
|
+
COUNT(DISTINCT t.stream_id) as failed
|
2772
|
+
FROM task_runs tr
|
2773
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
2774
|
+
WHERE t.namespace = :namespace
|
2775
|
+
AND tr.end_time >= :start_time
|
2776
|
+
AND tr.end_time <= :end_time
|
2777
|
+
AND tr.status = 'error'
|
2778
|
+
-- 只统计在时间范围内创建的任务的失败情况
|
2779
|
+
AND t.created_at >= :start_time
|
2780
|
+
AND t.created_at <= :end_time
|
2781
|
+
{queue_filter}
|
2782
|
+
GROUP BY time_bucket
|
2783
|
+
),
|
2784
|
+
concurrency_data AS (
|
2785
|
+
-- 计算每个时间桶内任务开始时的并发数
|
2786
|
+
-- 统计唯一任务,避免重试导致的重复计数
|
2787
|
+
SELECT
|
2788
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.start_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
2789
|
+
COUNT(DISTINCT t.stream_id) as concurrent_tasks
|
2790
|
+
FROM task_runs tr
|
2791
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
2792
|
+
WHERE t.namespace = :namespace
|
2793
|
+
AND tr.start_time >= :start_time
|
2794
|
+
AND tr.start_time <= :end_time
|
2795
|
+
AND tr.start_time IS NOT NULL
|
2796
|
+
AND tr.end_time IS NOT NULL
|
2797
|
+
-- 只统计在时间范围内创建的任务
|
2798
|
+
AND t.created_at >= :start_time
|
2799
|
+
AND t.created_at <= :end_time
|
2800
|
+
{queue_filter}
|
2801
|
+
GROUP BY time_bucket
|
2802
|
+
),
|
2803
|
+
processing_time_data AS (
|
2804
|
+
SELECT
|
2805
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.end_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
2806
|
+
AVG(CASE WHEN tr.status = 'success' AND tr.execution_time > 0
|
2807
|
+
THEN tr.execution_time END) as avg_processing_time,
|
2808
|
+
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY
|
2809
|
+
CASE WHEN tr.status = 'success' AND tr.execution_time > 0
|
2810
|
+
THEN tr.execution_time END) as p50_processing_time,
|
2811
|
+
PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY
|
2812
|
+
CASE WHEN tr.status = 'success' AND tr.execution_time > 0
|
2813
|
+
THEN tr.execution_time END) as p90_processing_time
|
2814
|
+
FROM task_runs tr
|
2815
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
2816
|
+
WHERE t.namespace = :namespace
|
2817
|
+
AND tr.end_time >= :start_time
|
2818
|
+
AND tr.end_time <= :end_time
|
2819
|
+
AND tr.status = 'success'
|
2820
|
+
{queue_filter}
|
2821
|
+
GROUP BY time_bucket
|
2822
|
+
),
|
2823
|
+
creation_latency_data AS (
|
2824
|
+
SELECT
|
2825
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.start_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
2826
|
+
AVG(EXTRACT(EPOCH FROM (tr.start_time - t.created_at))) as avg_creation_latency,
|
2827
|
+
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY
|
2828
|
+
EXTRACT(EPOCH FROM (tr.start_time - t.created_at))) as p50_creation_latency,
|
2829
|
+
PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY
|
2830
|
+
EXTRACT(EPOCH FROM (tr.start_time - t.created_at))) as p90_creation_latency
|
2831
|
+
FROM task_runs tr
|
2832
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
2833
|
+
WHERE t.namespace = :namespace
|
2834
|
+
AND tr.start_time >= :start_time
|
2835
|
+
AND tr.start_time <= :end_time
|
2836
|
+
AND tr.start_time IS NOT NULL
|
2837
|
+
{queue_filter}
|
2838
|
+
GROUP BY time_bucket
|
2839
|
+
)
|
2840
|
+
SELECT
|
2841
|
+
ts.time_bucket,
|
2842
|
+
COALESCE(eq.enqueued, 0) as enqueued,
|
2843
|
+
COALESCE(cc.completed, 0) as completed,
|
2844
|
+
COALESCE(fc.failed, 0) as failed,
|
2845
|
+
COALESCE(cd.concurrent_tasks, 0) as concurrent_tasks,
|
2846
|
+
ROUND(ptd.avg_processing_time::numeric, 6) as avg_processing_time,
|
2847
|
+
ROUND(ptd.p50_processing_time::numeric, 6) as p50_processing_time,
|
2848
|
+
ROUND(ptd.p90_processing_time::numeric, 6) as p90_processing_time,
|
2849
|
+
ROUND(cld.avg_creation_latency::numeric, 3) as avg_creation_latency,
|
2850
|
+
ROUND(cld.p50_creation_latency::numeric, 3) as p50_creation_latency,
|
2851
|
+
ROUND(cld.p90_creation_latency::numeric, 3) as p90_creation_latency
|
2852
|
+
FROM time_series ts
|
2853
|
+
LEFT JOIN enqueue_counts eq ON ts.time_bucket = eq.time_bucket
|
2854
|
+
LEFT JOIN complete_counts cc ON ts.time_bucket = cc.time_bucket
|
2855
|
+
LEFT JOIN failed_counts fc ON ts.time_bucket = fc.time_bucket
|
2856
|
+
LEFT JOIN concurrency_data cd ON ts.time_bucket = cd.time_bucket
|
2857
|
+
LEFT JOIN processing_time_data ptd ON ts.time_bucket = ptd.time_bucket
|
2858
|
+
LEFT JOIN creation_latency_data cld ON ts.time_bucket = cld.time_bucket
|
2859
|
+
ORDER BY ts.time_bucket
|
2860
|
+
""")
|
2861
|
+
|
2862
|
+
# 准备查询参数
|
2863
|
+
query_params = {
|
2864
|
+
'namespace': namespace,
|
2865
|
+
'start_time': start_time,
|
2866
|
+
'end_time': end_time,
|
2867
|
+
'interval_val': interval,
|
2868
|
+
**queue_params
|
2869
|
+
}
|
2870
|
+
|
2871
|
+
print(f'🔍 Dashboard Overview - 最终查询参数: {query_params}')
|
2872
|
+
|
2873
|
+
result = await session.execute(sql, query_params)
|
2874
|
+
|
2875
|
+
|
2876
|
+
# 格式化数据,按业务分组
|
2877
|
+
task_trend = []
|
2878
|
+
concurrency = []
|
2879
|
+
processing_time = []
|
2880
|
+
creation_latency = []
|
2881
|
+
|
2882
|
+
rows = result.fetchall()
|
2883
|
+
end_index = len(rows) - 1
|
2884
|
+
|
2885
|
+
for idx, row in enumerate(rows):
|
2886
|
+
time_str = row.time_bucket.isoformat()
|
2887
|
+
|
2888
|
+
# 任务处理趋势数据
|
2889
|
+
enqueued_val = row.enqueued if row.enqueued > 0 or idx == 0 or idx == end_index else None
|
2890
|
+
completed_val = row.completed if row.completed > 0 or idx == 0 or idx == end_index else None
|
2891
|
+
failed_val = row.failed if row.failed > 0 or idx == 0 or idx == end_index else None
|
2892
|
+
|
2893
|
+
task_trend.extend([
|
2894
|
+
{'time': time_str, 'value': enqueued_val, 'metric': '入队速率'},
|
2895
|
+
{'time': time_str, 'value': completed_val, 'metric': '完成速率'},
|
2896
|
+
{'time': time_str, 'value': failed_val, 'metric': '失败数'}
|
2897
|
+
])
|
2898
|
+
|
2899
|
+
# 任务并发数量 - 直接显示计算的并发数(包括0)
|
2900
|
+
concurrent_val = row.concurrent_tasks or 0
|
2901
|
+
|
2902
|
+
concurrency.append({
|
2903
|
+
'time': time_str,
|
2904
|
+
'value': concurrent_val,
|
2905
|
+
'metric': '并发任务数'
|
2906
|
+
})
|
2907
|
+
|
2908
|
+
# 任务处理时间(转换为毫秒)
|
2909
|
+
if row.avg_processing_time is not None:
|
2910
|
+
avg_time_val = round(float(row.avg_processing_time * 1000), 1)
|
2911
|
+
else:
|
2912
|
+
avg_time_val = None if idx != 0 and idx != end_index else 0
|
2913
|
+
|
2914
|
+
if row.p50_processing_time is not None:
|
2915
|
+
p50_time_val = round(float(row.p50_processing_time * 1000), 1)
|
2916
|
+
else:
|
2917
|
+
p50_time_val = None if idx != 0 and idx != end_index else 0
|
2918
|
+
|
2919
|
+
if row.p90_processing_time is not None:
|
2920
|
+
p90_time_val = round(float(row.p90_processing_time * 1000), 1)
|
2921
|
+
else:
|
2922
|
+
p90_time_val = None if idx != 0 and idx != end_index else 0
|
2923
|
+
|
2924
|
+
processing_time.extend([
|
2925
|
+
{'time': time_str, 'value': avg_time_val, 'metric': '平均处理时间'},
|
2926
|
+
{'time': time_str, 'value': p50_time_val, 'metric': 'P50处理时间'},
|
2927
|
+
{'time': time_str, 'value': p90_time_val, 'metric': 'P90处理时间'}
|
2928
|
+
])
|
2929
|
+
|
2930
|
+
# 任务执行延时(秒)
|
2931
|
+
if row.avg_creation_latency is not None:
|
2932
|
+
avg_latency_val = round(float(row.avg_creation_latency), 3)
|
2933
|
+
else:
|
2934
|
+
avg_latency_val = None if idx != 0 and idx != end_index else 0
|
2935
|
+
|
2936
|
+
if row.p50_creation_latency is not None:
|
2937
|
+
p50_latency_val = round(float(row.p50_creation_latency), 3)
|
2938
|
+
else:
|
2939
|
+
p50_latency_val = None if idx != 0 and idx != end_index else 0
|
2940
|
+
|
2941
|
+
if row.p90_creation_latency is not None:
|
2942
|
+
p90_latency_val = round(float(row.p90_creation_latency), 3)
|
2943
|
+
else:
|
2944
|
+
p90_latency_val = None if idx != 0 and idx != end_index else 0
|
2945
|
+
|
2946
|
+
creation_latency.extend([
|
2947
|
+
{'time': time_str, 'value': avg_latency_val, 'metric': '平均执行延时'},
|
2948
|
+
{'time': time_str, 'value': p50_latency_val, 'metric': 'P50执行延时'},
|
2949
|
+
{'time': time_str, 'value': p90_latency_val, 'metric': 'P90执行延时'}
|
2950
|
+
])
|
2951
|
+
|
2952
|
+
total_data_points = len(task_trend) + len(concurrency) + len(processing_time) + len(creation_latency)
|
2953
|
+
logger.info(f"获取到 {total_data_points} 条概览统计数据,粒度: {granularity}")
|
2954
|
+
|
2955
|
+
return {
|
2956
|
+
"task_trend": task_trend,
|
2957
|
+
"concurrency": concurrency,
|
2958
|
+
"processing_time": processing_time,
|
2959
|
+
"creation_latency": creation_latency,
|
2960
|
+
"granularity": granularity
|
2961
|
+
}
|
2962
|
+
|
2963
|
+
except Exception as e:
|
2964
|
+
logger.error(f"获取概览统计数据失败: {e}")
|
2965
|
+
traceback.print_exc()
|
2966
|
+
return {
|
2967
|
+
"task_trend": [],
|
2968
|
+
"concurrency": [],
|
2969
|
+
"processing_time": [],
|
2970
|
+
"creation_latency": [],
|
2971
|
+
"granularity": "minute"
|
2972
|
+
}
|
2973
|
+
|
2974
|
+
|
2975
|
+
@router.get("/task-processing-time-trend/{namespace}")
|
2976
|
+
async def get_task_processing_time_trend(
|
2977
|
+
namespace: str,
|
2978
|
+
time_range: str = "24h"
|
2979
|
+
):
|
2980
|
+
"""
|
2981
|
+
获取任务处理时间趋势数据(用于时间序列图表)
|
2982
|
+
|
2983
|
+
Args:
|
2984
|
+
namespace: 命名空间名称
|
2985
|
+
time_range: 时间范围(如'1h', '24h', '7d')
|
2986
|
+
|
2987
|
+
Returns:
|
2988
|
+
按时间间隔分组的处理时间统计(平均值、P50、P90等)
|
2989
|
+
"""
|
2990
|
+
try:
|
2991
|
+
conn = await data_access.manager.get_connection(namespace)
|
2992
|
+
|
2993
|
+
# 如果没有PostgreSQL配置,返回空数据
|
2994
|
+
if not conn.pg_config:
|
2995
|
+
return {
|
2996
|
+
"success": True,
|
2997
|
+
"data": []
|
2998
|
+
}
|
2999
|
+
|
3000
|
+
# 计算时间范围和间隔
|
3001
|
+
end_time = datetime.now(timezone.utc)
|
3002
|
+
interval = "15 minutes" # 默认15分钟间隔
|
3003
|
+
|
3004
|
+
if time_range.endswith('m'):
|
3005
|
+
minutes = int(time_range[:-1])
|
3006
|
+
start_time = end_time - timedelta(minutes=minutes)
|
3007
|
+
if minutes <= 60:
|
3008
|
+
interval = "5 minutes"
|
3009
|
+
elif minutes <= 240:
|
3010
|
+
interval = "15 minutes"
|
3011
|
+
else:
|
3012
|
+
interval = "1 hour"
|
3013
|
+
elif time_range.endswith('h'):
|
3014
|
+
hours = int(time_range[:-1])
|
3015
|
+
start_time = end_time - timedelta(hours=hours)
|
3016
|
+
if hours <= 6:
|
3017
|
+
interval = "15 minutes"
|
3018
|
+
elif hours <= 24:
|
3019
|
+
interval = "1 hour"
|
3020
|
+
else:
|
3021
|
+
interval = "4 hours"
|
3022
|
+
elif time_range.endswith('d'):
|
3023
|
+
days = int(time_range[:-1])
|
3024
|
+
start_time = end_time - timedelta(days=days)
|
3025
|
+
if days <= 1:
|
3026
|
+
interval = "1 hour"
|
3027
|
+
elif days <= 7:
|
3028
|
+
interval = "4 hours"
|
3029
|
+
else:
|
3030
|
+
interval = "1 day"
|
3031
|
+
else:
|
3032
|
+
start_time = end_time - timedelta(hours=24) # 默认24小时
|
3033
|
+
interval = "1 hour"
|
3034
|
+
|
3035
|
+
async with await conn.get_pg_session() as session:
|
3036
|
+
# 首先检查execution_time的样本数据
|
3037
|
+
sample_sql = text("""
|
3038
|
+
SELECT
|
3039
|
+
tr.execution_time,
|
3040
|
+
tr.duration,
|
3041
|
+
tr.start_time,
|
3042
|
+
tr.end_time,
|
3043
|
+
t.queue
|
3044
|
+
FROM task_runs tr
|
3045
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
3046
|
+
WHERE t.namespace = :namespace
|
3047
|
+
AND tr.end_time >= :start_time
|
3048
|
+
AND tr.end_time <= :end_time
|
3049
|
+
AND tr.status = 'success'
|
3050
|
+
AND (tr.execution_time IS NOT NULL OR tr.duration IS NOT NULL)
|
3051
|
+
ORDER BY tr.end_time DESC
|
3052
|
+
LIMIT 10
|
3053
|
+
""")
|
3054
|
+
|
3055
|
+
sample_result = await session.execute(sample_sql, {
|
3056
|
+
'namespace': namespace,
|
3057
|
+
'start_time': start_time,
|
3058
|
+
'end_time': end_time
|
3059
|
+
})
|
3060
|
+
|
3061
|
+
logger.info("=== 样本execution_time数据 ===")
|
3062
|
+
for sample_row in sample_result:
|
3063
|
+
logger.info(f"execution_time={sample_row.execution_time}, duration={sample_row.duration}, "
|
3064
|
+
f"queue={sample_row.queue}, start_time={sample_row.start_time}, end_time={sample_row.end_time}")
|
3065
|
+
|
3066
|
+
# 查询处理时间趋势数据 - 使用duration字段如果execution_time为空
|
3067
|
+
sql = text("""
|
3068
|
+
WITH processing_stats AS (
|
3069
|
+
SELECT
|
3070
|
+
date_trunc('hour', tr.end_time) as time_bucket,
|
3071
|
+
COALESCE(tr.execution_time, tr.duration) as processing_time,
|
3072
|
+
t.queue
|
3073
|
+
FROM task_runs tr
|
3074
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
3075
|
+
WHERE t.namespace = :namespace
|
3076
|
+
AND tr.end_time >= :start_time
|
3077
|
+
AND tr.end_time <= :end_time
|
3078
|
+
AND tr.status = 'success'
|
3079
|
+
AND (tr.execution_time IS NOT NULL OR tr.duration IS NOT NULL)
|
3080
|
+
AND COALESCE(tr.execution_time, tr.duration) > 0
|
3081
|
+
)
|
3082
|
+
SELECT
|
3083
|
+
ps.time_bucket,
|
3084
|
+
COUNT(ps.processing_time) as task_count,
|
3085
|
+
ROUND(AVG(ps.processing_time)::numeric, 6) as avg_processing_time,
|
3086
|
+
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY ps.processing_time)::numeric, 6) as p50_processing_time,
|
3087
|
+
ROUND(PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY ps.processing_time)::numeric, 6) as p90_processing_time,
|
3088
|
+
ROUND(MIN(ps.processing_time)::numeric, 6) as min_processing_time,
|
3089
|
+
ROUND(MAX(ps.processing_time)::numeric, 6) as max_processing_time
|
3090
|
+
FROM processing_stats ps
|
3091
|
+
GROUP BY ps.time_bucket
|
3092
|
+
ORDER BY ps.time_bucket
|
3093
|
+
""")
|
3094
|
+
|
3095
|
+
result = await session.execute(sql, {
|
3096
|
+
'namespace': namespace,
|
3097
|
+
'start_time': start_time,
|
3098
|
+
'end_time': end_time
|
3099
|
+
})
|
3100
|
+
|
3101
|
+
data = []
|
3102
|
+
for row in result:
|
3103
|
+
# 调试日志
|
3104
|
+
logger.info(f"处理时间数据行: time_bucket={row.time_bucket}, task_count={row.task_count}, "
|
3105
|
+
f"avg_processing_time={row.avg_processing_time}, "
|
3106
|
+
f"p50_processing_time={row.p50_processing_time}, "
|
3107
|
+
f"p90_processing_time={row.p90_processing_time}")
|
3108
|
+
|
3109
|
+
# 将处理时间从秒转换为毫秒
|
3110
|
+
data.append({
|
3111
|
+
'time': row.time_bucket.isoformat() if row.time_bucket else None,
|
3112
|
+
'metric': '平均处理时间',
|
3113
|
+
'value': round(float(row.avg_processing_time * 1000), 1) if row.avg_processing_time else 0,
|
3114
|
+
'queue': 'all',
|
3115
|
+
'task_count': row.task_count or 0
|
3116
|
+
})
|
3117
|
+
data.append({
|
3118
|
+
'time': row.time_bucket.isoformat() if row.time_bucket else None,
|
3119
|
+
'metric': 'P50处理时间',
|
3120
|
+
'value': round(float(row.p50_processing_time * 1000), 1) if row.p50_processing_time else 0,
|
3121
|
+
'queue': 'all',
|
3122
|
+
'task_count': row.task_count or 0
|
3123
|
+
})
|
3124
|
+
data.append({
|
3125
|
+
'time': row.time_bucket.isoformat() if row.time_bucket else None,
|
3126
|
+
'metric': 'P90处理时间',
|
3127
|
+
'value': round(float(row.p90_processing_time * 1000), 1) if row.p90_processing_time else 0,
|
3128
|
+
'queue': 'all',
|
3129
|
+
'task_count': row.task_count or 0
|
3130
|
+
})
|
3131
|
+
|
3132
|
+
logger.info(f"获取到 {len(data)} 条处理时间趋势数据")
|
3133
|
+
|
3134
|
+
return {
|
3135
|
+
"success": True,
|
3136
|
+
"data": data,
|
3137
|
+
"time_range": time_range,
|
3138
|
+
"start_time": start_time.isoformat(),
|
3139
|
+
"end_time": end_time.isoformat()
|
3140
|
+
}
|
3141
|
+
|
3142
|
+
except Exception as e:
|
3143
|
+
logger.error(f"获取任务处理时间趋势数据失败: {e}")
|
3144
|
+
traceback.print_exc()
|
3145
|
+
return {
|
3146
|
+
"success": False,
|
3147
|
+
"error": str(e),
|
3148
|
+
"data": []
|
3149
|
+
}
|
3150
|
+
|
3151
|
+
|
3152
|
+
@router.get("/task-status-distribution/{namespace}")
|
3153
|
+
async def get_task_status_distribution(
|
3154
|
+
namespace: str,
|
3155
|
+
time_range: str = "24h",
|
3156
|
+
queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
|
3157
|
+
):
|
3158
|
+
"""
|
3159
|
+
获取任务状态分布数据,按队列分组统计
|
3160
|
+
|
3161
|
+
Args:
|
3162
|
+
namespace: 命名空间名称
|
3163
|
+
time_range: 时间范围(如'1h', '24h', '7d')
|
3164
|
+
|
3165
|
+
Returns:
|
3166
|
+
按队列分组的任务状态分布数据,用于饼图展示
|
3167
|
+
"""
|
3168
|
+
try:
|
3169
|
+
conn = await data_access.manager.get_connection(namespace)
|
3170
|
+
|
3171
|
+
if not conn.pg_config:
|
3172
|
+
return {"data": []}
|
3173
|
+
|
3174
|
+
# 计算时间范围
|
3175
|
+
end_time = datetime.now(timezone.utc)
|
3176
|
+
if time_range.endswith('m'):
|
3177
|
+
minutes = int(time_range[:-1])
|
3178
|
+
start_time = end_time - timedelta(minutes=minutes)
|
3179
|
+
elif time_range.endswith('h'):
|
3180
|
+
hours = int(time_range[:-1])
|
3181
|
+
start_time = end_time - timedelta(hours=hours)
|
3182
|
+
elif time_range.endswith('d'):
|
3183
|
+
days = int(time_range[:-1])
|
3184
|
+
start_time = end_time - timedelta(days=days)
|
3185
|
+
else:
|
3186
|
+
start_time = end_time - timedelta(hours=24)
|
3187
|
+
|
3188
|
+
# 解析队列列表
|
3189
|
+
queue_list = []
|
3190
|
+
if queues:
|
3191
|
+
queue_list = [q.strip() for q in queues.split(',') if q.strip()]
|
3192
|
+
|
3193
|
+
async with await conn.get_pg_session() as session:
|
3194
|
+
# 构建队列筛选条件
|
3195
|
+
queue_filter = ""
|
3196
|
+
if queue_list:
|
3197
|
+
queue_placeholders = ','.join([f':queue_{i}' for i in range(len(queue_list))])
|
3198
|
+
queue_filter = f"AND t.queue IN ({queue_placeholders})"
|
3199
|
+
|
3200
|
+
# 查询任务状态分布数据
|
3201
|
+
distribution_sql = text(f"""
|
3202
|
+
SELECT
|
3203
|
+
t.queue,
|
3204
|
+
tr.status,
|
3205
|
+
COUNT(*) as count
|
3206
|
+
FROM task_runs tr
|
3207
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
3208
|
+
WHERE t.namespace = :namespace
|
3209
|
+
AND tr.end_time >= :start_time
|
3210
|
+
AND tr.end_time <= :end_time
|
3211
|
+
AND tr.status IS NOT NULL
|
3212
|
+
{queue_filter}
|
3213
|
+
GROUP BY t.queue, tr.status
|
3214
|
+
ORDER BY t.queue, tr.status
|
3215
|
+
""")
|
3216
|
+
|
3217
|
+
# 准备查询参数
|
3218
|
+
query_params = {
|
3219
|
+
'namespace': namespace,
|
3220
|
+
'start_time': start_time,
|
3221
|
+
'end_time': end_time
|
3222
|
+
}
|
3223
|
+
|
3224
|
+
# 添加队列参数
|
3225
|
+
for i, queue in enumerate(queue_list):
|
3226
|
+
query_params[f'queue_{i}'] = queue
|
3227
|
+
|
3228
|
+
result = await session.execute(distribution_sql, query_params)
|
3229
|
+
|
3230
|
+
# 处理数据,按队列分组
|
3231
|
+
queue_data = {}
|
3232
|
+
total_by_status = {}
|
3233
|
+
|
3234
|
+
for row in result.fetchall():
|
3235
|
+
queue = row.queue
|
3236
|
+
status = row.status
|
3237
|
+
count = row.count
|
3238
|
+
|
3239
|
+
# 按队列统计
|
3240
|
+
if queue not in queue_data:
|
3241
|
+
queue_data[queue] = {'success': 0, 'error': 0, 'timeout': 0}
|
3242
|
+
|
3243
|
+
if status == 'success':
|
3244
|
+
queue_data[queue]['success'] += count
|
3245
|
+
elif status == 'error':
|
3246
|
+
queue_data[queue]['error'] += count
|
3247
|
+
elif status == 'timeout':
|
3248
|
+
queue_data[queue]['timeout'] += count
|
3249
|
+
|
3250
|
+
# 按状态统计总数
|
3251
|
+
if status not in total_by_status:
|
3252
|
+
total_by_status[status] = 0
|
3253
|
+
total_by_status[status] += count
|
3254
|
+
|
3255
|
+
# 格式化数据为饼图格式
|
3256
|
+
data = []
|
3257
|
+
|
3258
|
+
# 方案1: 按队列分组显示
|
3259
|
+
for queue, counts in queue_data.items():
|
3260
|
+
total_queue = counts['success'] + counts['error'] + counts['timeout']
|
3261
|
+
if total_queue > 0:
|
3262
|
+
data.append({
|
3263
|
+
'type': f'{queue} (成功)',
|
3264
|
+
'value': counts['success'],
|
3265
|
+
'queue': queue,
|
3266
|
+
'status': 'success'
|
3267
|
+
})
|
3268
|
+
if counts['error'] > 0:
|
3269
|
+
data.append({
|
3270
|
+
'type': f'{queue} (失败)',
|
3271
|
+
'value': counts['error'],
|
3272
|
+
'queue': queue,
|
3273
|
+
'status': 'error'
|
3274
|
+
})
|
3275
|
+
if counts['timeout'] > 0:
|
3276
|
+
data.append({
|
3277
|
+
'type': f'{queue} (超时)',
|
3278
|
+
'value': counts['timeout'],
|
3279
|
+
'queue': queue,
|
3280
|
+
'status': 'timeout'
|
3281
|
+
})
|
3282
|
+
|
3283
|
+
# 如果没有数据,返回默认值
|
3284
|
+
if not data:
|
3285
|
+
data = [
|
3286
|
+
{'type': '暂无数据', 'value': 1, 'queue': '', 'status': 'empty'}
|
3287
|
+
]
|
3288
|
+
|
3289
|
+
return {"data": data}
|
3290
|
+
|
3291
|
+
except Exception as e:
|
3292
|
+
logger.error(f"获取任务状态分布失败: {e}")
|
3293
|
+
traceback.print_exc()
|
3294
|
+
raise HTTPException(status_code=500, detail=str(e))
|