jettask 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +14 -35
- jettask/{webui/__main__.py → __main__.py} +4 -4
- jettask/api/__init__.py +103 -0
- jettask/api/v1/__init__.py +29 -0
- jettask/api/v1/alerts.py +226 -0
- jettask/api/v1/analytics.py +323 -0
- jettask/api/v1/namespaces.py +134 -0
- jettask/api/v1/overview.py +136 -0
- jettask/api/v1/queues.py +530 -0
- jettask/api/v1/scheduled.py +420 -0
- jettask/api/v1/settings.py +44 -0
- jettask/{webui/api.py → api.py} +4 -46
- jettask/{webui/backend → backend}/main.py +21 -109
- jettask/{webui/backend → backend}/main_unified.py +1 -1
- jettask/{webui/backend → backend}/namespace_api_old.py +3 -30
- jettask/{webui/backend → backend}/namespace_data_access.py +2 -1
- jettask/{webui/backend → backend}/unified_api_router.py +14 -74
- jettask/{core/cli.py → cli.py} +106 -26
- jettask/config/nacos_config.py +386 -0
- jettask/core/app.py +8 -100
- jettask/core/db_manager.py +515 -0
- jettask/core/event_pool.py +5 -2
- jettask/core/unified_manager_base.py +59 -14
- jettask/{webui/db_init.py → db_init.py} +1 -1
- jettask/executors/asyncio.py +2 -2
- jettask/{webui/integrated_gradio_app.py → integrated_gradio_app.py} +1 -1
- jettask/{webui/multi_namespace_consumer.py → multi_namespace_consumer.py} +5 -2
- jettask/{webui/pg_consumer.py → pg_consumer.py} +137 -69
- jettask/{webui/run.py → run.py} +1 -1
- jettask/{webui/run_webui.py → run_webui.py} +4 -4
- jettask/scheduler/manager.py +6 -0
- jettask/scheduler/multi_namespace_scheduler.py +2 -2
- jettask/scheduler/unified_manager.py +5 -5
- jettask/scheduler/unified_scheduler_manager.py +20 -12
- jettask/schemas/__init__.py +166 -0
- jettask/schemas/alert.py +99 -0
- jettask/schemas/backlog.py +122 -0
- jettask/schemas/common.py +139 -0
- jettask/schemas/monitoring.py +181 -0
- jettask/schemas/namespace.py +168 -0
- jettask/schemas/queue.py +83 -0
- jettask/schemas/scheduled_task.py +128 -0
- jettask/schemas/task.py +70 -0
- jettask/services/__init__.py +24 -0
- jettask/services/alert_service.py +454 -0
- jettask/services/analytics_service.py +46 -0
- jettask/services/overview_service.py +978 -0
- jettask/services/queue_service.py +711 -0
- jettask/services/redis_monitor_service.py +151 -0
- jettask/services/scheduled_task_service.py +207 -0
- jettask/services/settings_service.py +758 -0
- jettask/services/task_service.py +157 -0
- jettask/{webui/task_center.py → task_center.py} +30 -8
- jettask/{webui/task_center_client.py → task_center_client.py} +1 -1
- jettask/{webui/config.py → webui_config.py} +6 -1
- jettask/webui_exceptions.py +67 -0
- jettask/webui_sql/verify_database.sql +72 -0
- {jettask-0.2.15.dist-info → jettask-0.2.17.dist-info}/METADATA +2 -1
- jettask-0.2.17.dist-info/RECORD +150 -0
- {jettask-0.2.15.dist-info → jettask-0.2.17.dist-info}/entry_points.txt +1 -1
- jettask/webui/backend/data_api.py +0 -3294
- jettask/webui/backend/namespace_api.py +0 -295
- jettask/webui/backend/queue_backlog_api.py +0 -727
- jettask/webui/backend/redis_monitor_api.py +0 -476
- jettask/webui/frontend/index.html +0 -13
- jettask/webui/frontend/package.json +0 -30
- jettask/webui/frontend/src/App.css +0 -109
- jettask/webui/frontend/src/App.jsx +0 -66
- jettask/webui/frontend/src/components/NamespaceSelector.jsx +0 -166
- jettask/webui/frontend/src/components/QueueBacklogChart.jsx +0 -298
- jettask/webui/frontend/src/components/QueueBacklogTrend.jsx +0 -638
- jettask/webui/frontend/src/components/QueueDetailsTable.css +0 -65
- jettask/webui/frontend/src/components/QueueDetailsTable.jsx +0 -487
- jettask/webui/frontend/src/components/QueueDetailsTableV2.jsx +0 -465
- jettask/webui/frontend/src/components/ScheduledTaskFilter.jsx +0 -423
- jettask/webui/frontend/src/components/TaskFilter.jsx +0 -425
- jettask/webui/frontend/src/components/TimeRangeSelector.css +0 -21
- jettask/webui/frontend/src/components/TimeRangeSelector.jsx +0 -160
- jettask/webui/frontend/src/components/charts/QueueChart.jsx +0 -111
- jettask/webui/frontend/src/components/charts/QueueTrendChart.jsx +0 -115
- jettask/webui/frontend/src/components/charts/WorkerChart.jsx +0 -40
- jettask/webui/frontend/src/components/common/StatsCard.jsx +0 -18
- jettask/webui/frontend/src/components/layout/AppLayout.css +0 -95
- jettask/webui/frontend/src/components/layout/AppLayout.jsx +0 -49
- jettask/webui/frontend/src/components/layout/Header.css +0 -106
- jettask/webui/frontend/src/components/layout/Header.jsx +0 -106
- jettask/webui/frontend/src/components/layout/SideMenu.css +0 -137
- jettask/webui/frontend/src/components/layout/SideMenu.jsx +0 -209
- jettask/webui/frontend/src/components/layout/TabsNav.css +0 -244
- jettask/webui/frontend/src/components/layout/TabsNav.jsx +0 -206
- jettask/webui/frontend/src/components/layout/UserInfo.css +0 -197
- jettask/webui/frontend/src/components/layout/UserInfo.jsx +0 -197
- jettask/webui/frontend/src/contexts/LoadingContext.jsx +0 -27
- jettask/webui/frontend/src/contexts/NamespaceContext.jsx +0 -72
- jettask/webui/frontend/src/contexts/TabsContext.backup.jsx +0 -245
- jettask/webui/frontend/src/index.css +0 -114
- jettask/webui/frontend/src/main.jsx +0 -22
- jettask/webui/frontend/src/pages/Alerts.jsx +0 -684
- jettask/webui/frontend/src/pages/Dashboard/index.css +0 -35
- jettask/webui/frontend/src/pages/Dashboard/index.jsx +0 -281
- jettask/webui/frontend/src/pages/Dashboard.jsx +0 -1330
- jettask/webui/frontend/src/pages/QueueDetail.jsx +0 -1117
- jettask/webui/frontend/src/pages/QueueMonitor.jsx +0 -527
- jettask/webui/frontend/src/pages/Queues.jsx +0 -12
- jettask/webui/frontend/src/pages/ScheduledTasks.jsx +0 -810
- jettask/webui/frontend/src/pages/Settings.jsx +0 -801
- jettask/webui/frontend/src/pages/Workers.jsx +0 -12
- jettask/webui/frontend/src/services/api.js +0 -159
- jettask/webui/frontend/src/services/queueTrend.js +0 -166
- jettask/webui/frontend/src/utils/suppressWarnings.js +0 -22
- jettask/webui/frontend/src/utils/userPreferences.js +0 -154
- jettask/webui/frontend/vite.config.js +0 -26
- jettask/webui/sql/init_database.sql +0 -640
- jettask-0.2.15.dist-info/RECORD +0 -172
- /jettask/{webui/backend → backend}/__init__.py +0 -0
- /jettask/{webui/backend → backend}/api/__init__.py +0 -0
- /jettask/{webui/backend → backend}/api/v1/__init__.py +0 -0
- /jettask/{webui/backend → backend}/api/v1/monitoring.py +0 -0
- /jettask/{webui/backend → backend}/api/v1/namespaces.py +0 -0
- /jettask/{webui/backend → backend}/api/v1/queues.py +0 -0
- /jettask/{webui/backend → backend}/api/v1/tasks.py +0 -0
- /jettask/{webui/backend → backend}/config.py +0 -0
- /jettask/{webui/backend → backend}/core/__init__.py +0 -0
- /jettask/{webui/backend → backend}/core/cache.py +0 -0
- /jettask/{webui/backend → backend}/core/database.py +0 -0
- /jettask/{webui/backend → backend}/core/exceptions.py +0 -0
- /jettask/{webui/backend → backend}/data_access.py +0 -0
- /jettask/{webui/backend → backend}/dependencies.py +0 -0
- /jettask/{webui/backend → backend}/init_meta_db.py +0 -0
- /jettask/{webui/backend → backend}/main_v2.py +0 -0
- /jettask/{webui/backend → backend}/models/__init__.py +0 -0
- /jettask/{webui/backend → backend}/models/requests.py +0 -0
- /jettask/{webui/backend → backend}/models/responses.py +0 -0
- /jettask/{webui/backend → backend}/queue_stats_v2.py +0 -0
- /jettask/{webui/backend → backend}/services/__init__.py +0 -0
- /jettask/{webui/backend → backend}/start.py +0 -0
- /jettask/{webui/cleanup_deprecated_tables.sql → cleanup_deprecated_tables.sql} +0 -0
- /jettask/{webui/gradio_app.py → gradio_app.py} +0 -0
- /jettask/{webui/__init__.py → main.py} +0 -0
- /jettask/{webui/models.py → models.py} +0 -0
- /jettask/{webui/run_monitor.py → run_monitor.py} +0 -0
- /jettask/{webui/schema.sql → schema.sql} +0 -0
- /jettask/{webui/unified_consumer_manager.py → unified_consumer_manager.py} +0 -0
- /jettask/{webui/models → webui_models}/__init__.py +0 -0
- /jettask/{webui/models → webui_models}/namespace.py +0 -0
- /jettask/{webui/sql → webui_sql}/batch_upsert_functions.sql +0 -0
- {jettask-0.2.15.dist-info → jettask-0.2.17.dist-info}/WHEEL +0 -0
- {jettask-0.2.15.dist-info → jettask-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.15.dist-info → jettask-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,978 @@
|
|
|
1
|
+
"""
|
|
2
|
+
概览服务层
|
|
3
|
+
处理系统概览和健康检查相关的业务逻辑
|
|
4
|
+
"""
|
|
5
|
+
from datetime import datetime, timedelta, timezone
|
|
6
|
+
from typing import Dict, Any, List, Optional
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
import traceback
|
|
10
|
+
from sqlalchemy import text
|
|
11
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
12
|
+
|
|
13
|
+
from jettask.schemas import TimeRangeQuery
|
|
14
|
+
from jettask.core.db_manager import get_db_manager
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# 获取全局数据访问实例
|
|
19
|
+
# 使用新的统一数据库管理器
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TimeRangeResult:
|
|
23
|
+
"""时间范围处理结果"""
|
|
24
|
+
def __init__(self, start_time: datetime, end_time: datetime, interval: str, interval_seconds: int, granularity: str):
|
|
25
|
+
self.start_time = start_time
|
|
26
|
+
self.end_time = end_time
|
|
27
|
+
self.interval = interval
|
|
28
|
+
self.interval_seconds = interval_seconds
|
|
29
|
+
self.granularity = granularity
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class OverviewService:
|
|
33
|
+
"""概览服务类"""
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def get_root_info() -> Dict[str, Any]:
|
|
37
|
+
"""
|
|
38
|
+
获取根路径信息
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
API基本信息
|
|
42
|
+
"""
|
|
43
|
+
return {
|
|
44
|
+
"message": "JetTask Monitor API",
|
|
45
|
+
"version": "1.0.0",
|
|
46
|
+
"timestamp": datetime.now(timezone.utc).isoformat()
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def get_health_status() -> Dict[str, Any]:
|
|
51
|
+
"""
|
|
52
|
+
获取健康状态
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
健康状态信息
|
|
56
|
+
"""
|
|
57
|
+
return {
|
|
58
|
+
"status": "healthy",
|
|
59
|
+
"timestamp": datetime.now(timezone.utc).isoformat()
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
async def get_system_stats(namespace: str) -> Dict[str, Any]:
|
|
64
|
+
"""
|
|
65
|
+
获取指定命名空间的系统统计信息
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
namespace: 命名空间名称
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
系统统计信息
|
|
72
|
+
"""
|
|
73
|
+
db_manager = get_db_manager()
|
|
74
|
+
pool = await db_manager.get_pool(namespace)
|
|
75
|
+
redis_client = await pool.get_redis_client()
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
# 统计各种类型的键
|
|
79
|
+
stats = {
|
|
80
|
+
'namespace': namespace,
|
|
81
|
+
'queues': 0,
|
|
82
|
+
'tasks': 0,
|
|
83
|
+
'delayed_tasks': 0,
|
|
84
|
+
'workers': 0
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# 统计队列数量
|
|
88
|
+
queue_pattern = f"{namespace}:QUEUE:*"
|
|
89
|
+
async for _ in redis_client.scan_iter(match=queue_pattern):
|
|
90
|
+
stats['queues'] += 1
|
|
91
|
+
|
|
92
|
+
# 统计任务数量
|
|
93
|
+
task_pattern = f"{namespace}:TASK:*"
|
|
94
|
+
async for _ in redis_client.scan_iter(match=task_pattern):
|
|
95
|
+
stats['tasks'] += 1
|
|
96
|
+
|
|
97
|
+
# 统计延迟任务数量
|
|
98
|
+
delayed_pattern = f"{namespace}:DELAYED_QUEUE:*"
|
|
99
|
+
async for key in redis_client.scan_iter(match=delayed_pattern):
|
|
100
|
+
count = await redis_client.zcard(key)
|
|
101
|
+
stats['delayed_tasks'] += count
|
|
102
|
+
|
|
103
|
+
# 统计工作进程数量
|
|
104
|
+
worker_pattern = f"{namespace}:WORKER:*"
|
|
105
|
+
async for _ in redis_client.scan_iter(match=worker_pattern):
|
|
106
|
+
stats['workers'] += 1
|
|
107
|
+
|
|
108
|
+
return stats
|
|
109
|
+
|
|
110
|
+
finally:
|
|
111
|
+
await redis_client.aclose()
|
|
112
|
+
|
|
113
|
+
@staticmethod
|
|
114
|
+
async def get_dashboard_stats(
|
|
115
|
+
namespace: str,
|
|
116
|
+
time_range: str = "24h",
|
|
117
|
+
queues: Optional[str] = None
|
|
118
|
+
) -> Dict[str, Any]:
|
|
119
|
+
"""
|
|
120
|
+
获取仪表板统计数据
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
namespace: 命名空间名称
|
|
124
|
+
time_range: 时间范围
|
|
125
|
+
queues: 逗号分隔的队列名称列表
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
仪表板统计数据
|
|
129
|
+
"""
|
|
130
|
+
db_manager = get_db_manager()
|
|
131
|
+
pool = await db_manager.get_pool(namespace)
|
|
132
|
+
|
|
133
|
+
# 如果没有PostgreSQL配置,返回空数据
|
|
134
|
+
# 检查是否配置了PostgreSQL
|
|
135
|
+
if not pool.config.has_postgres():
|
|
136
|
+
return {
|
|
137
|
+
"success": True,
|
|
138
|
+
"data": _get_empty_dashboard_stats()
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
# 计算时间范围
|
|
142
|
+
end_time = datetime.now(timezone.utc)
|
|
143
|
+
start_time = _parse_time_range(time_range, end_time)
|
|
144
|
+
|
|
145
|
+
# 构建队列筛选条件
|
|
146
|
+
queue_filter, queue_list, queue_params = _build_queue_filter_and_params(queues)
|
|
147
|
+
|
|
148
|
+
async with pool.get_sa_session() as session:
|
|
149
|
+
# 获取统计数据
|
|
150
|
+
stats_data = await _get_task_statistics(
|
|
151
|
+
session, namespace, start_time, end_time,
|
|
152
|
+
queue_filter, queue_params
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# 计算吞吐量
|
|
156
|
+
throughput = await _calculate_throughput(
|
|
157
|
+
session, namespace, queue_filter, queue_params
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# 获取任务分布数据
|
|
161
|
+
# 合并所有查询参数
|
|
162
|
+
distribution_params = {
|
|
163
|
+
'namespace': namespace,
|
|
164
|
+
'start_time': start_time,
|
|
165
|
+
'end_time': end_time,
|
|
166
|
+
**queue_params
|
|
167
|
+
}
|
|
168
|
+
distribution_data = await _get_task_distribution(
|
|
169
|
+
session, namespace, start_time, end_time,
|
|
170
|
+
queue_filter, distribution_params
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
"success": True,
|
|
175
|
+
"data": {
|
|
176
|
+
**stats_data,
|
|
177
|
+
"throughput": throughput,
|
|
178
|
+
"distribution": distribution_data
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
@staticmethod
|
|
183
|
+
async def get_top_queues(
|
|
184
|
+
namespace: str,
|
|
185
|
+
metric: str = "backlog",
|
|
186
|
+
limit: int = 10,
|
|
187
|
+
time_range: str = "24h",
|
|
188
|
+
queues: Optional[str] = None
|
|
189
|
+
) -> Dict[str, Any]:
|
|
190
|
+
"""
|
|
191
|
+
获取队列排行榜
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
namespace: 命名空间名称
|
|
195
|
+
metric: 指标类型 (backlog/error)
|
|
196
|
+
limit: 返回的队列数量限制
|
|
197
|
+
time_range: 时间范围
|
|
198
|
+
queues: 逗号分隔的队列名称列表
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
队列排行榜数据
|
|
202
|
+
"""
|
|
203
|
+
if metric == "backlog":
|
|
204
|
+
return await _get_top_backlog_queues(namespace, limit, time_range, queues)
|
|
205
|
+
elif metric == "error":
|
|
206
|
+
return await _get_top_error_queues(namespace, limit, time_range, queues)
|
|
207
|
+
else:
|
|
208
|
+
raise ValueError(f"不支持的指标类型: {metric}")
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
async def get_dashboard_overview_stats(
|
|
212
|
+
namespace: str,
|
|
213
|
+
query: TimeRangeQuery
|
|
214
|
+
) -> Dict[str, Any]:
|
|
215
|
+
"""
|
|
216
|
+
获取概览页面的统一统计数据
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
namespace: 命名空间名称
|
|
220
|
+
query: 时间范围查询参数
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
统一的时间序列数据
|
|
224
|
+
"""
|
|
225
|
+
db_manager = get_db_manager()
|
|
226
|
+
pool = await db_manager.get_pool(namespace)
|
|
227
|
+
|
|
228
|
+
# 如果没有PostgreSQL配置,返回空数据
|
|
229
|
+
# 检查是否配置了PostgreSQL
|
|
230
|
+
if not pool.config.has_postgres():
|
|
231
|
+
return _get_empty_overview_stats()
|
|
232
|
+
|
|
233
|
+
# 解析时间范围
|
|
234
|
+
time_range_result = _parse_time_range_query(query)
|
|
235
|
+
|
|
236
|
+
# 构建队列筛选条件
|
|
237
|
+
# 使用 TimeRangeQuery 的 queues 字段(支持多队列列表)
|
|
238
|
+
queue_list = query.queues if hasattr(query, 'queues') and query.queues else None
|
|
239
|
+
queue_filter, _, queue_params = _build_queue_filter_and_params(queue_list)
|
|
240
|
+
|
|
241
|
+
async with pool.get_sa_session() as session:
|
|
242
|
+
# 执行统一查询
|
|
243
|
+
result = await _execute_overview_query(
|
|
244
|
+
session, namespace, time_range_result,
|
|
245
|
+
queue_filter, queue_params
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# 格式化数据
|
|
249
|
+
return _format_overview_data(result, time_range_result.granularity)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
# ============ 辅助函数 ============
|
|
253
|
+
|
|
254
|
+
def _parse_time_range(time_range: str, end_time: datetime) -> datetime:
|
|
255
|
+
"""解析时间范围字符串"""
|
|
256
|
+
if time_range.endswith('m'):
|
|
257
|
+
minutes = int(time_range[:-1])
|
|
258
|
+
return end_time - timedelta(minutes=minutes)
|
|
259
|
+
elif time_range.endswith('h'):
|
|
260
|
+
hours = int(time_range[:-1])
|
|
261
|
+
return end_time - timedelta(hours=hours)
|
|
262
|
+
elif time_range.endswith('d'):
|
|
263
|
+
days = int(time_range[:-1])
|
|
264
|
+
return end_time - timedelta(days=days)
|
|
265
|
+
else:
|
|
266
|
+
return end_time - timedelta(hours=24) # 默认24小时
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _parse_time_range_query(query: TimeRangeQuery) -> TimeRangeResult:
|
|
270
|
+
"""解析TimeRangeQuery对象"""
|
|
271
|
+
# TimeRangeQuery 有 start_time, end_time 和 interval 字段
|
|
272
|
+
# 但没有 time_range 字段
|
|
273
|
+
|
|
274
|
+
# 如果有结束时间,使用它;否则使用当前时间
|
|
275
|
+
if query.end_time:
|
|
276
|
+
end_time = datetime.fromisoformat(query.end_time.replace('Z', '+00:00')) if isinstance(query.end_time, str) else query.end_time
|
|
277
|
+
else:
|
|
278
|
+
end_time = datetime.now(timezone.utc)
|
|
279
|
+
|
|
280
|
+
# 如果有开始时间,使用它;否则基于interval或默认24小时
|
|
281
|
+
if query.start_time:
|
|
282
|
+
start_time = datetime.fromisoformat(query.start_time.replace('Z', '+00:00')) if isinstance(query.start_time, str) else query.start_time
|
|
283
|
+
else:
|
|
284
|
+
# 使用interval字段来计算开始时间,默认为24小时
|
|
285
|
+
interval = query.interval or "24h"
|
|
286
|
+
start_time = _parse_time_range(interval, end_time)
|
|
287
|
+
|
|
288
|
+
return _calculate_dynamic_interval(start_time, end_time)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _calculate_dynamic_interval(start_time: datetime, end_time: datetime, target_points: int = 200) -> TimeRangeResult:
|
|
292
|
+
"""根据时间范围动态计算合适的时间间隔"""
|
|
293
|
+
duration = (end_time - start_time).total_seconds()
|
|
294
|
+
ideal_interval_seconds = duration / target_points
|
|
295
|
+
|
|
296
|
+
# 选择合适的间隔
|
|
297
|
+
intervals = [
|
|
298
|
+
(1, '1 seconds', 'second'),
|
|
299
|
+
(5, '5 seconds', 'second'),
|
|
300
|
+
(10, '10 seconds', 'second'),
|
|
301
|
+
(30, '30 seconds', 'second'),
|
|
302
|
+
(60, '1 minute', 'minute'),
|
|
303
|
+
(300, '5 minutes', 'minute'),
|
|
304
|
+
(600, '10 minutes', 'minute'),
|
|
305
|
+
(1800, '30 minutes', 'minute'),
|
|
306
|
+
(3600, '1 hour', 'hour'),
|
|
307
|
+
(21600, '6 hours', 'hour'),
|
|
308
|
+
(43200, '12 hours', 'hour'),
|
|
309
|
+
(86400, '1 day', 'day')
|
|
310
|
+
]
|
|
311
|
+
|
|
312
|
+
for seconds, interval_str, granularity in intervals:
|
|
313
|
+
if ideal_interval_seconds <= seconds:
|
|
314
|
+
return TimeRangeResult(start_time, end_time, interval_str, seconds, granularity)
|
|
315
|
+
|
|
316
|
+
# 默认返回1天间隔
|
|
317
|
+
return TimeRangeResult(start_time, end_time, '1 day', 86400, 'day')
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _build_queue_filter_and_params(queues: Optional[List[str]] = None):
|
|
321
|
+
"""构建队列筛选条件和参数"""
|
|
322
|
+
queue_list = []
|
|
323
|
+
if queues:
|
|
324
|
+
# 如果是字符串,按逗号分割(向后兼容)
|
|
325
|
+
if isinstance(queues, str):
|
|
326
|
+
queue_list = [q.strip() for q in queues.split(',') if q.strip()]
|
|
327
|
+
# 如果是列表,直接使用
|
|
328
|
+
elif isinstance(queues, list):
|
|
329
|
+
queue_list = [q.strip() for q in queues if q and q.strip()]
|
|
330
|
+
else:
|
|
331
|
+
queue_list = []
|
|
332
|
+
|
|
333
|
+
queue_filter = ""
|
|
334
|
+
queue_params = {}
|
|
335
|
+
|
|
336
|
+
if queue_list:
|
|
337
|
+
queue_placeholders = ','.join([f':queue_{i}' for i in range(len(queue_list))])
|
|
338
|
+
queue_filter = f"AND t.queue IN ({queue_placeholders})"
|
|
339
|
+
|
|
340
|
+
for i, queue in enumerate(queue_list):
|
|
341
|
+
queue_params[f'queue_{i}'] = queue
|
|
342
|
+
|
|
343
|
+
return queue_filter, queue_list, queue_params
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _get_empty_dashboard_stats():
|
|
347
|
+
"""返回空的仪表板统计数据"""
|
|
348
|
+
return {
|
|
349
|
+
"total_tasks": 0,
|
|
350
|
+
"completed_tasks": 0,
|
|
351
|
+
"failed_tasks": 0,
|
|
352
|
+
"running_tasks": 0,
|
|
353
|
+
"pending_tasks": 0,
|
|
354
|
+
"success_rate": 0,
|
|
355
|
+
"throughput": 0,
|
|
356
|
+
"avg_processing_time": 0,
|
|
357
|
+
"total_queues": 0,
|
|
358
|
+
"distribution": [{'type': '暂无数据', 'value': 1, 'queue': '', 'status': 'empty'}]
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _get_empty_overview_stats():
|
|
363
|
+
"""返回空的概览统计数据"""
|
|
364
|
+
return {
|
|
365
|
+
"task_trend": [],
|
|
366
|
+
"concurrency": [],
|
|
367
|
+
"processing_time": [],
|
|
368
|
+
"creation_latency": [],
|
|
369
|
+
"granularity": "minute"
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
async def _get_task_statistics(session, namespace, start_time, end_time, queue_filter, queue_params):
|
|
374
|
+
"""获取任务统计数据"""
|
|
375
|
+
stats_sql = text(f"""
|
|
376
|
+
WITH task_stats AS (
|
|
377
|
+
SELECT
|
|
378
|
+
t.stream_id,
|
|
379
|
+
t.created_at,
|
|
380
|
+
t.queue,
|
|
381
|
+
tr.status,
|
|
382
|
+
tr.execution_time,
|
|
383
|
+
tr.end_time
|
|
384
|
+
FROM tasks t
|
|
385
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
|
386
|
+
WHERE t.namespace = :namespace
|
|
387
|
+
AND t.created_at >= :start_time
|
|
388
|
+
AND t.created_at <= :end_time
|
|
389
|
+
{queue_filter}
|
|
390
|
+
)
|
|
391
|
+
SELECT
|
|
392
|
+
COUNT(DISTINCT stream_id) as total_tasks,
|
|
393
|
+
COUNT(DISTINCT CASE WHEN status = 'success' THEN stream_id END) as completed_tasks,
|
|
394
|
+
COUNT(DISTINCT CASE WHEN status = 'error' THEN stream_id END) as failed_tasks,
|
|
395
|
+
COUNT(DISTINCT CASE WHEN status = 'running' THEN stream_id END) as running_tasks,
|
|
396
|
+
COUNT(DISTINCT CASE WHEN status IS NULL OR status = 'pending' THEN stream_id END) as pending_tasks,
|
|
397
|
+
COUNT(DISTINCT queue) as total_queues,
|
|
398
|
+
AVG(CASE WHEN status = 'success' AND execution_time IS NOT NULL
|
|
399
|
+
THEN execution_time END) as avg_execution_time
|
|
400
|
+
FROM task_stats
|
|
401
|
+
""")
|
|
402
|
+
|
|
403
|
+
# 合并所有查询参数
|
|
404
|
+
all_params = {
|
|
405
|
+
'namespace': namespace,
|
|
406
|
+
'start_time': start_time,
|
|
407
|
+
'end_time': end_time,
|
|
408
|
+
**queue_params
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
result = await session.execute(stats_sql, all_params)
|
|
412
|
+
row = result.first()
|
|
413
|
+
|
|
414
|
+
if row:
|
|
415
|
+
avg_execution_time = row.avg_execution_time or 0
|
|
416
|
+
success_rate = round((row.completed_tasks / row.total_tasks * 100) if row.total_tasks > 0 else 0, 1)
|
|
417
|
+
|
|
418
|
+
return {
|
|
419
|
+
"total_tasks": row.total_tasks or 0,
|
|
420
|
+
"completed_tasks": row.completed_tasks or 0,
|
|
421
|
+
"failed_tasks": row.failed_tasks or 0,
|
|
422
|
+
"running_tasks": row.running_tasks or 0,
|
|
423
|
+
"pending_tasks": row.pending_tasks or 0,
|
|
424
|
+
"success_rate": success_rate,
|
|
425
|
+
"avg_processing_time": round(avg_execution_time * 1000 if avg_execution_time else 0, 1),
|
|
426
|
+
"total_queues": row.total_queues or 0
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
return _get_empty_dashboard_stats()
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
async def _calculate_throughput(session, namespace, queue_filter, queue_params):
|
|
433
|
+
"""计算吞吐量(每分钟完成的任务数)"""
|
|
434
|
+
recent_end_time = datetime.now(timezone.utc)
|
|
435
|
+
throughput = 0
|
|
436
|
+
|
|
437
|
+
time_windows = [
|
|
438
|
+
(5, "最近5分钟"),
|
|
439
|
+
(10, "最近10分钟"),
|
|
440
|
+
(30, "最近30分钟"),
|
|
441
|
+
(60, "最近1小时")
|
|
442
|
+
]
|
|
443
|
+
|
|
444
|
+
for window_minutes, window_desc in time_windows:
|
|
445
|
+
recent_start_time = recent_end_time - timedelta(minutes=window_minutes)
|
|
446
|
+
|
|
447
|
+
recent_query = text(f"""
|
|
448
|
+
SELECT COUNT(DISTINCT t.stream_id) as recent_completed
|
|
449
|
+
FROM tasks t
|
|
450
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
|
451
|
+
WHERE t.namespace = :namespace
|
|
452
|
+
AND tr.status = 'success'
|
|
453
|
+
AND tr.end_time >= :recent_start_time
|
|
454
|
+
AND tr.end_time <= :recent_end_time
|
|
455
|
+
{queue_filter}
|
|
456
|
+
""")
|
|
457
|
+
|
|
458
|
+
throughput_params = {
|
|
459
|
+
'namespace': namespace,
|
|
460
|
+
'recent_start_time': recent_start_time,
|
|
461
|
+
'recent_end_time': recent_end_time,
|
|
462
|
+
**queue_params
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
recent_result = await session.execute(recent_query, throughput_params)
|
|
466
|
+
recent_row = recent_result.first()
|
|
467
|
+
recent_completed = recent_row.recent_completed if recent_row else 0
|
|
468
|
+
|
|
469
|
+
if recent_completed >= 5:
|
|
470
|
+
throughput = round(recent_completed / window_minutes, 1)
|
|
471
|
+
logger.info(f"使用{window_desc}计算吞吐量: {recent_completed}个任务/{window_minutes}分钟 = {throughput}任务/分钟")
|
|
472
|
+
break
|
|
473
|
+
elif recent_completed > 0:
|
|
474
|
+
throughput = round(recent_completed / window_minutes, 1)
|
|
475
|
+
|
|
476
|
+
return throughput
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
async def _get_task_distribution(session, namespace, start_time, end_time, queue_filter, query_params):
|
|
480
|
+
"""获取任务分布数据"""
|
|
481
|
+
distribution_sql = text(f"""
|
|
482
|
+
SELECT
|
|
483
|
+
t.queue,
|
|
484
|
+
COUNT(DISTINCT t.stream_id) as count
|
|
485
|
+
FROM tasks t
|
|
486
|
+
WHERE t.namespace = :namespace
|
|
487
|
+
AND t.created_at >= :start_time
|
|
488
|
+
AND t.created_at <= :end_time
|
|
489
|
+
{queue_filter}
|
|
490
|
+
GROUP BY t.queue
|
|
491
|
+
ORDER BY count DESC, t.queue
|
|
492
|
+
""")
|
|
493
|
+
|
|
494
|
+
distribution_result = await session.execute(distribution_sql, query_params)
|
|
495
|
+
|
|
496
|
+
distribution_data = []
|
|
497
|
+
for row in distribution_result.fetchall():
|
|
498
|
+
if row.count > 0:
|
|
499
|
+
distribution_data.append({
|
|
500
|
+
'type': row.queue,
|
|
501
|
+
'value': row.count,
|
|
502
|
+
'queue': row.queue,
|
|
503
|
+
'status': 'all'
|
|
504
|
+
})
|
|
505
|
+
|
|
506
|
+
if not distribution_data:
|
|
507
|
+
distribution_data = [
|
|
508
|
+
{'type': '暂无数据', 'value': 1, 'queue': '', 'status': 'empty'}
|
|
509
|
+
]
|
|
510
|
+
|
|
511
|
+
return distribution_data
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
async def _get_top_backlog_queues(namespace, limit, time_range, queues):
|
|
515
|
+
"""获取积压最多的队列Top N"""
|
|
516
|
+
db_manager = get_db_manager()
|
|
517
|
+
pool = await db_manager.get_pool(namespace)
|
|
518
|
+
|
|
519
|
+
# 检查是否配置了PostgreSQL
|
|
520
|
+
if not pool.config.has_postgres():
|
|
521
|
+
return {"success": True, "data": []}
|
|
522
|
+
|
|
523
|
+
end_time = datetime.now(timezone.utc)
|
|
524
|
+
start_time = _parse_time_range(time_range, end_time)
|
|
525
|
+
|
|
526
|
+
async with pool.get_sa_session() as session:
|
|
527
|
+
queue_list = []
|
|
528
|
+
if queues:
|
|
529
|
+
# 如果是字符串,按逗号分割(向后兼容)
|
|
530
|
+
if isinstance(queues, str):
|
|
531
|
+
queue_list = [q.strip() for q in queues.split(',') if q.strip()]
|
|
532
|
+
# 如果是列表,直接使用
|
|
533
|
+
elif isinstance(queues, list):
|
|
534
|
+
queue_list = [q.strip() for q in queues if q and q.strip()]
|
|
535
|
+
|
|
536
|
+
try:
|
|
537
|
+
# 优先从stream_backlog_monitor获取最新的积压数据
|
|
538
|
+
if queue_list:
|
|
539
|
+
backlog_sql = text("""
|
|
540
|
+
SELECT
|
|
541
|
+
stream_name as queue,
|
|
542
|
+
MAX(backlog_unprocessed) as backlog,
|
|
543
|
+
CASE
|
|
544
|
+
WHEN MAX(backlog_unprocessed) > 100 THEN 'critical'
|
|
545
|
+
WHEN MAX(backlog_unprocessed) > 50 THEN 'warning'
|
|
546
|
+
ELSE 'normal'
|
|
547
|
+
END as status
|
|
548
|
+
FROM stream_backlog_monitor
|
|
549
|
+
WHERE namespace = :namespace
|
|
550
|
+
AND created_at >= :start_time
|
|
551
|
+
AND created_at <= :end_time
|
|
552
|
+
AND stream_name = ANY(:queues)
|
|
553
|
+
GROUP BY stream_name
|
|
554
|
+
HAVING MAX(backlog_unprocessed) > 0
|
|
555
|
+
ORDER BY backlog DESC
|
|
556
|
+
LIMIT :limit
|
|
557
|
+
""")
|
|
558
|
+
|
|
559
|
+
result = await session.execute(backlog_sql, {
|
|
560
|
+
'namespace': namespace,
|
|
561
|
+
'start_time': start_time,
|
|
562
|
+
'end_time': end_time,
|
|
563
|
+
'queues': queue_list,
|
|
564
|
+
'limit': limit
|
|
565
|
+
})
|
|
566
|
+
else:
|
|
567
|
+
backlog_sql = text("""
|
|
568
|
+
SELECT
|
|
569
|
+
stream_name as queue,
|
|
570
|
+
MAX(backlog_unprocessed) as backlog,
|
|
571
|
+
CASE
|
|
572
|
+
WHEN MAX(backlog_unprocessed) > 100 THEN 'critical'
|
|
573
|
+
WHEN MAX(backlog_unprocessed) > 50 THEN 'warning'
|
|
574
|
+
ELSE 'normal'
|
|
575
|
+
END as status
|
|
576
|
+
FROM stream_backlog_monitor
|
|
577
|
+
WHERE namespace = :namespace
|
|
578
|
+
AND created_at >= :start_time
|
|
579
|
+
AND created_at <= :end_time
|
|
580
|
+
GROUP BY stream_name
|
|
581
|
+
HAVING MAX(backlog_unprocessed) > 0
|
|
582
|
+
ORDER BY backlog DESC
|
|
583
|
+
LIMIT :limit
|
|
584
|
+
""")
|
|
585
|
+
|
|
586
|
+
result = await session.execute(backlog_sql, {
|
|
587
|
+
'namespace': namespace,
|
|
588
|
+
'start_time': start_time,
|
|
589
|
+
'end_time': end_time,
|
|
590
|
+
'limit': limit
|
|
591
|
+
})
|
|
592
|
+
|
|
593
|
+
backlog_queues = []
|
|
594
|
+
for row in result.fetchall():
|
|
595
|
+
backlog_queues.append({
|
|
596
|
+
"queue": row.queue,
|
|
597
|
+
"backlog": int(row.backlog),
|
|
598
|
+
"status": row.status
|
|
599
|
+
})
|
|
600
|
+
|
|
601
|
+
return {"success": True, "data": backlog_queues}
|
|
602
|
+
|
|
603
|
+
except Exception as e:
|
|
604
|
+
logger.warning(f"从stream_backlog_monitor获取积压数据失败: {e}")
|
|
605
|
+
|
|
606
|
+
# 如果失败,从tasks表统计
|
|
607
|
+
return await _get_top_backlog_from_tasks(session, namespace, limit)
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
async def _get_top_backlog_from_tasks(session, namespace, limit):
|
|
611
|
+
"""从tasks表获取积压数据"""
|
|
612
|
+
task_sql = text("""
|
|
613
|
+
SELECT
|
|
614
|
+
t.queue,
|
|
615
|
+
COUNT(DISTINCT t.stream_id) as backlog,
|
|
616
|
+
CASE
|
|
617
|
+
WHEN COUNT(DISTINCT t.stream_id) > 1000 THEN 'critical'
|
|
618
|
+
WHEN COUNT(DISTINCT t.stream_id) > 500 THEN 'warning'
|
|
619
|
+
ELSE 'normal'
|
|
620
|
+
END as status
|
|
621
|
+
FROM tasks t
|
|
622
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
|
623
|
+
WHERE t.namespace = :namespace
|
|
624
|
+
AND (tr.stream_id IS NULL OR tr.status = 'pending')
|
|
625
|
+
AND t.created_at > NOW() - INTERVAL '24 hour'
|
|
626
|
+
GROUP BY t.queue
|
|
627
|
+
ORDER BY backlog DESC
|
|
628
|
+
LIMIT :limit
|
|
629
|
+
""")
|
|
630
|
+
|
|
631
|
+
result = await session.execute(task_sql, {
|
|
632
|
+
'namespace': namespace,
|
|
633
|
+
'limit': limit
|
|
634
|
+
})
|
|
635
|
+
|
|
636
|
+
backlog_queues = []
|
|
637
|
+
for row in result.fetchall():
|
|
638
|
+
backlog_queues.append({
|
|
639
|
+
"queue": row.queue,
|
|
640
|
+
"backlog": int(row.backlog),
|
|
641
|
+
"status": row.status
|
|
642
|
+
})
|
|
643
|
+
|
|
644
|
+
return {"success": True, "data": backlog_queues}
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
async def _get_top_error_queues(namespace, limit, time_range, queues):
|
|
648
|
+
"""获取错误率最高的队列Top N"""
|
|
649
|
+
db_manager = get_db_manager()
|
|
650
|
+
pool = await db_manager.get_pool(namespace)
|
|
651
|
+
|
|
652
|
+
# 检查是否配置了PostgreSQL
|
|
653
|
+
if not pool.config.has_postgres():
|
|
654
|
+
return {"success": True, "data": []}
|
|
655
|
+
|
|
656
|
+
end_time = datetime.now(timezone.utc)
|
|
657
|
+
start_time = _parse_time_range(time_range, end_time)
|
|
658
|
+
|
|
659
|
+
async with pool.get_sa_session() as session:
|
|
660
|
+
queue_list = []
|
|
661
|
+
if queues:
|
|
662
|
+
queue_list = [q.strip() for q in queues.split(',') if q.strip()]
|
|
663
|
+
|
|
664
|
+
if queue_list:
|
|
665
|
+
error_sql = text("""
|
|
666
|
+
WITH queue_stats AS (
|
|
667
|
+
SELECT
|
|
668
|
+
t.queue,
|
|
669
|
+
COUNT(DISTINCT t.stream_id) as total,
|
|
670
|
+
COUNT(DISTINCT CASE WHEN tr.status = 'error' THEN t.stream_id END) as errors
|
|
671
|
+
FROM tasks t
|
|
672
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
|
673
|
+
WHERE t.namespace = :namespace
|
|
674
|
+
AND t.created_at >= :start_time
|
|
675
|
+
AND t.created_at <= :end_time
|
|
676
|
+
AND t.queue = ANY(:queues)
|
|
677
|
+
GROUP BY t.queue
|
|
678
|
+
)
|
|
679
|
+
SELECT
|
|
680
|
+
queue,
|
|
681
|
+
errors,
|
|
682
|
+
total,
|
|
683
|
+
CASE
|
|
684
|
+
WHEN total > 0 THEN ROUND(errors::numeric / total * 100, 1)
|
|
685
|
+
ELSE 0
|
|
686
|
+
END as error_rate
|
|
687
|
+
FROM queue_stats
|
|
688
|
+
WHERE errors > 0
|
|
689
|
+
ORDER BY error_rate DESC, errors DESC
|
|
690
|
+
LIMIT :limit
|
|
691
|
+
""")
|
|
692
|
+
|
|
693
|
+
result = await session.execute(error_sql, {
|
|
694
|
+
'namespace': namespace,
|
|
695
|
+
'start_time': start_time,
|
|
696
|
+
'end_time': end_time,
|
|
697
|
+
'queues': queue_list,
|
|
698
|
+
'limit': limit
|
|
699
|
+
})
|
|
700
|
+
else:
|
|
701
|
+
error_sql = text("""
|
|
702
|
+
WITH queue_stats AS (
|
|
703
|
+
SELECT
|
|
704
|
+
t.queue,
|
|
705
|
+
COUNT(DISTINCT t.stream_id) as total,
|
|
706
|
+
COUNT(DISTINCT CASE WHEN tr.status = 'error' THEN t.stream_id END) as errors
|
|
707
|
+
FROM tasks t
|
|
708
|
+
LEFT JOIN task_runs tr ON t.stream_id = tr.stream_id
|
|
709
|
+
WHERE t.namespace = :namespace
|
|
710
|
+
AND t.created_at >= :start_time
|
|
711
|
+
AND t.created_at <= :end_time
|
|
712
|
+
GROUP BY t.queue
|
|
713
|
+
)
|
|
714
|
+
SELECT
|
|
715
|
+
queue,
|
|
716
|
+
errors,
|
|
717
|
+
total,
|
|
718
|
+
CASE
|
|
719
|
+
WHEN total > 0 THEN ROUND(errors::numeric / total * 100, 1)
|
|
720
|
+
ELSE 0
|
|
721
|
+
END as error_rate
|
|
722
|
+
FROM queue_stats
|
|
723
|
+
WHERE errors > 0
|
|
724
|
+
ORDER BY error_rate DESC, errors DESC
|
|
725
|
+
LIMIT :limit
|
|
726
|
+
""")
|
|
727
|
+
|
|
728
|
+
result = await session.execute(error_sql, {
|
|
729
|
+
'namespace': namespace,
|
|
730
|
+
'start_time': start_time,
|
|
731
|
+
'end_time': end_time,
|
|
732
|
+
'limit': limit
|
|
733
|
+
})
|
|
734
|
+
|
|
735
|
+
error_queues = []
|
|
736
|
+
for row in result.fetchall():
|
|
737
|
+
error_queues.append({
|
|
738
|
+
"queue": row.queue,
|
|
739
|
+
"errors": int(row.errors),
|
|
740
|
+
"total": int(row.total),
|
|
741
|
+
"error_rate": float(row.error_rate)
|
|
742
|
+
})
|
|
743
|
+
|
|
744
|
+
return {"success": True, "data": error_queues}
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
async def _execute_overview_query(session, namespace, time_range_result, queue_filter, queue_params):
|
|
748
|
+
"""执行概览统计查询"""
|
|
749
|
+
interval_seconds = time_range_result.interval_seconds
|
|
750
|
+
|
|
751
|
+
# 直接在SQL中使用间隔秒数,避免参数类型问题
|
|
752
|
+
sql = text(f"""
|
|
753
|
+
WITH time_series AS (
|
|
754
|
+
SELECT to_timestamp(FLOOR(EXTRACT(epoch FROM ts) / {interval_seconds}) * {interval_seconds}) AS time_bucket
|
|
755
|
+
FROM generate_series(
|
|
756
|
+
:start_time ::timestamptz,
|
|
757
|
+
:end_time ::timestamptz + INTERVAL '{interval_seconds} seconds',
|
|
758
|
+
INTERVAL '{interval_seconds} seconds'
|
|
759
|
+
) AS ts
|
|
760
|
+
),
|
|
761
|
+
enqueue_counts AS (
|
|
762
|
+
SELECT
|
|
763
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM t.created_at) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
|
764
|
+
COUNT(DISTINCT t.stream_id) as enqueued
|
|
765
|
+
FROM tasks t
|
|
766
|
+
WHERE t.namespace = :namespace
|
|
767
|
+
AND t.created_at >= :start_time
|
|
768
|
+
AND t.created_at <= :end_time
|
|
769
|
+
{queue_filter}
|
|
770
|
+
GROUP BY time_bucket
|
|
771
|
+
),
|
|
772
|
+
complete_counts AS (
|
|
773
|
+
SELECT
|
|
774
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.end_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
|
775
|
+
COUNT(DISTINCT t.stream_id) as completed
|
|
776
|
+
FROM task_runs tr
|
|
777
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
|
778
|
+
WHERE t.namespace = :namespace
|
|
779
|
+
AND tr.end_time >= :start_time
|
|
780
|
+
AND tr.end_time <= :end_time
|
|
781
|
+
AND tr.status = 'success'
|
|
782
|
+
AND t.created_at >= :start_time
|
|
783
|
+
AND t.created_at <= :end_time
|
|
784
|
+
{queue_filter}
|
|
785
|
+
GROUP BY time_bucket
|
|
786
|
+
),
|
|
787
|
+
failed_counts AS (
|
|
788
|
+
SELECT
|
|
789
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.end_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
|
790
|
+
COUNT(DISTINCT t.stream_id) as failed
|
|
791
|
+
FROM task_runs tr
|
|
792
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
|
793
|
+
WHERE t.namespace = :namespace
|
|
794
|
+
AND tr.end_time >= :start_time
|
|
795
|
+
AND tr.end_time <= :end_time
|
|
796
|
+
AND tr.status = 'error'
|
|
797
|
+
AND t.created_at >= :start_time
|
|
798
|
+
AND t.created_at <= :end_time
|
|
799
|
+
{queue_filter}
|
|
800
|
+
GROUP BY time_bucket
|
|
801
|
+
),
|
|
802
|
+
concurrency_data AS (
|
|
803
|
+
SELECT
|
|
804
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.start_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
|
805
|
+
COUNT(DISTINCT t.stream_id) as concurrent_tasks
|
|
806
|
+
FROM task_runs tr
|
|
807
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
|
808
|
+
WHERE t.namespace = :namespace
|
|
809
|
+
AND tr.start_time >= :start_time
|
|
810
|
+
AND tr.start_time <= :end_time
|
|
811
|
+
AND tr.start_time IS NOT NULL
|
|
812
|
+
AND tr.end_time IS NOT NULL
|
|
813
|
+
AND t.created_at >= :start_time
|
|
814
|
+
AND t.created_at <= :end_time
|
|
815
|
+
{queue_filter}
|
|
816
|
+
GROUP BY time_bucket
|
|
817
|
+
),
|
|
818
|
+
processing_time_data AS (
|
|
819
|
+
SELECT
|
|
820
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.end_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
|
821
|
+
AVG(CASE WHEN tr.status = 'success' AND tr.execution_time > 0
|
|
822
|
+
THEN tr.execution_time END) as avg_processing_time,
|
|
823
|
+
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY
|
|
824
|
+
CASE WHEN tr.status = 'success' AND tr.execution_time > 0
|
|
825
|
+
THEN tr.execution_time END) as p50_processing_time,
|
|
826
|
+
PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY
|
|
827
|
+
CASE WHEN tr.status = 'success' AND tr.execution_time > 0
|
|
828
|
+
THEN tr.execution_time END) as p90_processing_time
|
|
829
|
+
FROM task_runs tr
|
|
830
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
|
831
|
+
WHERE t.namespace = :namespace
|
|
832
|
+
AND tr.end_time >= :start_time
|
|
833
|
+
AND tr.end_time <= :end_time
|
|
834
|
+
AND tr.status = 'success'
|
|
835
|
+
{queue_filter}
|
|
836
|
+
GROUP BY time_bucket
|
|
837
|
+
),
|
|
838
|
+
creation_latency_data AS (
|
|
839
|
+
SELECT
|
|
840
|
+
to_timestamp(FLOOR(EXTRACT(epoch FROM tr.start_time) / {interval_seconds}) * {interval_seconds}) AS time_bucket,
|
|
841
|
+
AVG(EXTRACT(EPOCH FROM (tr.start_time - t.created_at))) as avg_creation_latency,
|
|
842
|
+
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY
|
|
843
|
+
EXTRACT(EPOCH FROM (tr.start_time - t.created_at))) as p50_creation_latency,
|
|
844
|
+
PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY
|
|
845
|
+
EXTRACT(EPOCH FROM (tr.start_time - t.created_at))) as p90_creation_latency
|
|
846
|
+
FROM task_runs tr
|
|
847
|
+
JOIN tasks t ON tr.stream_id = t.stream_id
|
|
848
|
+
WHERE t.namespace = :namespace
|
|
849
|
+
AND tr.start_time >= :start_time
|
|
850
|
+
AND tr.start_time <= :end_time
|
|
851
|
+
AND tr.start_time IS NOT NULL
|
|
852
|
+
{queue_filter}
|
|
853
|
+
GROUP BY time_bucket
|
|
854
|
+
)
|
|
855
|
+
SELECT
|
|
856
|
+
ts.time_bucket,
|
|
857
|
+
COALESCE(eq.enqueued, 0) as enqueued,
|
|
858
|
+
COALESCE(cc.completed, 0) as completed,
|
|
859
|
+
COALESCE(fc.failed, 0) as failed,
|
|
860
|
+
COALESCE(cd.concurrent_tasks, 0) as concurrent_tasks,
|
|
861
|
+
ROUND(ptd.avg_processing_time::numeric, 6) as avg_processing_time,
|
|
862
|
+
ROUND(ptd.p50_processing_time::numeric, 6) as p50_processing_time,
|
|
863
|
+
ROUND(ptd.p90_processing_time::numeric, 6) as p90_processing_time,
|
|
864
|
+
ROUND(cld.avg_creation_latency::numeric, 3) as avg_creation_latency,
|
|
865
|
+
ROUND(cld.p50_creation_latency::numeric, 3) as p50_creation_latency,
|
|
866
|
+
ROUND(cld.p90_creation_latency::numeric, 3) as p90_creation_latency
|
|
867
|
+
FROM time_series ts
|
|
868
|
+
LEFT JOIN enqueue_counts eq ON ts.time_bucket = eq.time_bucket
|
|
869
|
+
LEFT JOIN complete_counts cc ON ts.time_bucket = cc.time_bucket
|
|
870
|
+
LEFT JOIN failed_counts fc ON ts.time_bucket = fc.time_bucket
|
|
871
|
+
LEFT JOIN concurrency_data cd ON ts.time_bucket = cd.time_bucket
|
|
872
|
+
LEFT JOIN processing_time_data ptd ON ts.time_bucket = ptd.time_bucket
|
|
873
|
+
LEFT JOIN creation_latency_data cld ON ts.time_bucket = cld.time_bucket
|
|
874
|
+
ORDER BY ts.time_bucket
|
|
875
|
+
""")
|
|
876
|
+
|
|
877
|
+
query_params = {
|
|
878
|
+
'namespace': namespace,
|
|
879
|
+
'start_time': time_range_result.start_time,
|
|
880
|
+
'end_time': time_range_result.end_time,
|
|
881
|
+
**queue_params
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
result = await session.execute(sql, query_params)
|
|
885
|
+
return result.fetchall()
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
def _format_overview_data(rows, granularity):
|
|
889
|
+
"""格式化概览数据"""
|
|
890
|
+
task_trend = []
|
|
891
|
+
concurrency = []
|
|
892
|
+
processing_time = []
|
|
893
|
+
creation_latency = []
|
|
894
|
+
|
|
895
|
+
end_index = len(rows) - 1
|
|
896
|
+
|
|
897
|
+
for idx, row in enumerate(rows):
|
|
898
|
+
time_str = row.time_bucket.isoformat()
|
|
899
|
+
|
|
900
|
+
# 任务处理趋势数据
|
|
901
|
+
enqueued_val = row.enqueued if row.enqueued > 0 or idx == 0 or idx == end_index else None
|
|
902
|
+
completed_val = row.completed if row.completed > 0 or idx == 0 or idx == end_index else None
|
|
903
|
+
failed_val = row.failed if row.failed > 0 or idx == 0 or idx == end_index else None
|
|
904
|
+
|
|
905
|
+
task_trend.extend([
|
|
906
|
+
{'time': time_str, 'value': enqueued_val, 'metric': '入队速率'},
|
|
907
|
+
{'time': time_str, 'value': completed_val, 'metric': '完成速率'},
|
|
908
|
+
{'time': time_str, 'value': failed_val, 'metric': '失败数'}
|
|
909
|
+
])
|
|
910
|
+
|
|
911
|
+
# 任务并发数量
|
|
912
|
+
concurrency.append({
|
|
913
|
+
'time': time_str,
|
|
914
|
+
'value': row.concurrent_tasks or 0,
|
|
915
|
+
'metric': '并发任务数'
|
|
916
|
+
})
|
|
917
|
+
|
|
918
|
+
# 任务处理时间(转换为毫秒)
|
|
919
|
+
_add_processing_time_data(processing_time, time_str, row, idx, end_index)
|
|
920
|
+
|
|
921
|
+
# 任务执行延时(秒)
|
|
922
|
+
_add_creation_latency_data(creation_latency, time_str, row, idx, end_index)
|
|
923
|
+
|
|
924
|
+
return {
|
|
925
|
+
"task_trend": task_trend,
|
|
926
|
+
"concurrency": concurrency,
|
|
927
|
+
"processing_time": processing_time,
|
|
928
|
+
"creation_latency": creation_latency,
|
|
929
|
+
"granularity": granularity
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
def _add_processing_time_data(processing_time, time_str, row, idx, end_index):
|
|
934
|
+
"""添加处理时间数据"""
|
|
935
|
+
if row.avg_processing_time is not None:
|
|
936
|
+
avg_time_val = round(float(row.avg_processing_time * 1000), 1)
|
|
937
|
+
else:
|
|
938
|
+
avg_time_val = None if idx != 0 and idx != end_index else 0
|
|
939
|
+
|
|
940
|
+
if row.p50_processing_time is not None:
|
|
941
|
+
p50_time_val = round(float(row.p50_processing_time * 1000), 1)
|
|
942
|
+
else:
|
|
943
|
+
p50_time_val = None if idx != 0 and idx != end_index else 0
|
|
944
|
+
|
|
945
|
+
if row.p90_processing_time is not None:
|
|
946
|
+
p90_time_val = round(float(row.p90_processing_time * 1000), 1)
|
|
947
|
+
else:
|
|
948
|
+
p90_time_val = None if idx != 0 and idx != end_index else 0
|
|
949
|
+
|
|
950
|
+
processing_time.extend([
|
|
951
|
+
{'time': time_str, 'value': avg_time_val, 'metric': '平均处理时间'},
|
|
952
|
+
{'time': time_str, 'value': p50_time_val, 'metric': 'P50处理时间'},
|
|
953
|
+
{'time': time_str, 'value': p90_time_val, 'metric': 'P90处理时间'}
|
|
954
|
+
])
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
def _add_creation_latency_data(creation_latency, time_str, row, idx, end_index):
|
|
958
|
+
"""添加创建延迟数据"""
|
|
959
|
+
if row.avg_creation_latency is not None:
|
|
960
|
+
avg_latency_val = round(float(row.avg_creation_latency), 3)
|
|
961
|
+
else:
|
|
962
|
+
avg_latency_val = None if idx != 0 and idx != end_index else 0
|
|
963
|
+
|
|
964
|
+
if row.p50_creation_latency is not None:
|
|
965
|
+
p50_latency_val = round(float(row.p50_creation_latency), 3)
|
|
966
|
+
else:
|
|
967
|
+
p50_latency_val = None if idx != 0 and idx != end_index else 0
|
|
968
|
+
|
|
969
|
+
if row.p90_creation_latency is not None:
|
|
970
|
+
p90_latency_val = round(float(row.p90_creation_latency), 3)
|
|
971
|
+
else:
|
|
972
|
+
p90_latency_val = None if idx != 0 and idx != end_index else 0
|
|
973
|
+
|
|
974
|
+
creation_latency.extend([
|
|
975
|
+
{'time': time_str, 'value': avg_latency_val, 'metric': '平均执行延时'},
|
|
976
|
+
{'time': time_str, 'value': p50_latency_val, 'metric': 'P50执行延时'},
|
|
977
|
+
{'time': time_str, 'value': p90_latency_val, 'metric': 'P90执行延时'}
|
|
978
|
+
])
|