jettask 0.2.20__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +4 -0
- jettask/cli.py +12 -8
- jettask/config/lua_scripts.py +37 -0
- jettask/config/nacos_config.py +1 -1
- jettask/core/app.py +313 -340
- jettask/core/container.py +4 -4
- jettask/{persistence → core}/namespace.py +93 -27
- jettask/core/task.py +16 -9
- jettask/core/unified_manager_base.py +136 -26
- jettask/db/__init__.py +67 -0
- jettask/db/base.py +137 -0
- jettask/{utils/db_connector.py → db/connector.py} +130 -26
- jettask/db/models/__init__.py +16 -0
- jettask/db/models/scheduled_task.py +196 -0
- jettask/db/models/task.py +77 -0
- jettask/db/models/task_run.py +85 -0
- jettask/executor/__init__.py +0 -15
- jettask/executor/core.py +76 -31
- jettask/executor/process_entry.py +29 -114
- jettask/executor/task_executor.py +4 -0
- jettask/messaging/event_pool.py +928 -685
- jettask/messaging/scanner.py +30 -0
- jettask/persistence/__init__.py +28 -103
- jettask/persistence/buffer.py +170 -0
- jettask/persistence/consumer.py +330 -249
- jettask/persistence/manager.py +304 -0
- jettask/persistence/persistence.py +391 -0
- jettask/scheduler/__init__.py +15 -3
- jettask/scheduler/{task_crud.py → database.py} +61 -57
- jettask/scheduler/loader.py +2 -2
- jettask/scheduler/{scheduler_coordinator.py → manager.py} +23 -6
- jettask/scheduler/models.py +14 -10
- jettask/scheduler/schedule.py +166 -0
- jettask/scheduler/scheduler.py +12 -11
- jettask/schemas/__init__.py +50 -1
- jettask/schemas/backlog.py +43 -6
- jettask/schemas/namespace.py +70 -19
- jettask/schemas/queue.py +19 -3
- jettask/schemas/responses.py +493 -0
- jettask/task/__init__.py +0 -2
- jettask/task/router.py +3 -0
- jettask/test_connection_monitor.py +1 -1
- jettask/utils/__init__.py +7 -5
- jettask/utils/db_init.py +8 -4
- jettask/utils/namespace_dep.py +167 -0
- jettask/utils/queue_matcher.py +186 -0
- jettask/utils/rate_limit/concurrency_limiter.py +7 -1
- jettask/utils/stream_backlog.py +1 -1
- jettask/webui/__init__.py +0 -1
- jettask/webui/api/__init__.py +4 -4
- jettask/webui/api/alerts.py +806 -71
- jettask/webui/api/example_refactored.py +400 -0
- jettask/webui/api/namespaces.py +390 -45
- jettask/webui/api/overview.py +300 -54
- jettask/webui/api/queues.py +971 -267
- jettask/webui/api/scheduled.py +1249 -56
- jettask/webui/api/settings.py +129 -7
- jettask/webui/api/workers.py +442 -0
- jettask/webui/app.py +46 -2329
- jettask/webui/middleware/__init__.py +6 -0
- jettask/webui/middleware/namespace_middleware.py +135 -0
- jettask/webui/services/__init__.py +146 -0
- jettask/webui/services/heartbeat_service.py +251 -0
- jettask/webui/services/overview_service.py +60 -51
- jettask/webui/services/queue_monitor_service.py +426 -0
- jettask/webui/services/redis_monitor_service.py +87 -0
- jettask/webui/services/settings_service.py +174 -111
- jettask/webui/services/task_monitor_service.py +222 -0
- jettask/webui/services/timeline_pg_service.py +452 -0
- jettask/webui/services/timeline_service.py +189 -0
- jettask/webui/services/worker_monitor_service.py +467 -0
- jettask/webui/utils/__init__.py +11 -0
- jettask/webui/utils/time_utils.py +122 -0
- jettask/worker/lifecycle.py +8 -2
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/METADATA +1 -1
- jettask-0.2.24.dist-info/RECORD +142 -0
- jettask/executor/executor.py +0 -338
- jettask/persistence/backlog_monitor.py +0 -567
- jettask/persistence/base.py +0 -2334
- jettask/persistence/db_manager.py +0 -516
- jettask/persistence/maintenance.py +0 -81
- jettask/persistence/message_consumer.py +0 -259
- jettask/persistence/models.py +0 -49
- jettask/persistence/offline_recovery.py +0 -196
- jettask/persistence/queue_discovery.py +0 -215
- jettask/persistence/task_persistence.py +0 -218
- jettask/persistence/task_updater.py +0 -583
- jettask/scheduler/add_execution_count.sql +0 -11
- jettask/scheduler/add_priority_field.sql +0 -26
- jettask/scheduler/add_scheduler_id.sql +0 -25
- jettask/scheduler/add_scheduler_id_index.sql +0 -10
- jettask/scheduler/make_scheduler_id_required.sql +0 -28
- jettask/scheduler/migrate_interval_seconds.sql +0 -9
- jettask/scheduler/performance_optimization.sql +0 -45
- jettask/scheduler/run_scheduler.py +0 -186
- jettask/scheduler/schema.sql +0 -84
- jettask/task/task_executor.py +0 -318
- jettask/webui/api/analytics.py +0 -323
- jettask/webui/config.py +0 -90
- jettask/webui/models/__init__.py +0 -3
- jettask/webui/models/namespace.py +0 -63
- jettask/webui/namespace_manager/__init__.py +0 -10
- jettask/webui/namespace_manager/multi.py +0 -593
- jettask/webui/namespace_manager/unified.py +0 -193
- jettask/webui/run.py +0 -46
- jettask-0.2.20.dist-info/RECORD +0 -145
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/WHEEL +0 -0
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/top_level.txt +0 -0
jettask/webui/app.py
CHANGED
@@ -1,1291 +1,18 @@
|
|
1
|
-
import asyncio
|
2
|
-
import json
|
3
|
-
import time
|
4
1
|
import logging
|
5
|
-
from datetime import datetime, timedelta, timezone
|
6
|
-
from typing import Dict, List, Optional, Any
|
7
2
|
from contextlib import asynccontextmanager
|
8
|
-
from fastapi import FastAPI
|
9
|
-
from fastapi.responses import HTMLResponse
|
10
|
-
from fastapi.staticfiles import StaticFiles
|
3
|
+
from fastapi import FastAPI
|
11
4
|
from fastapi.middleware.cors import CORSMiddleware
|
12
|
-
from starlette.websockets import WebSocketState
|
13
|
-
from redis import asyncio as aioredis
|
14
5
|
import uvicorn
|
15
|
-
from pathlib import Path
|
16
|
-
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
|
17
|
-
from sqlalchemy.orm import sessionmaker
|
18
|
-
from sqlalchemy import select, func, and_, or_, text
|
19
|
-
from sqlalchemy.dialects import postgresql
|
20
6
|
|
21
|
-
from jettask.
|
22
|
-
from jettask.webui.config import PostgreSQLConfig, RedisConfig
|
23
|
-
from jettask.persistence.models import Base, Task
|
7
|
+
from jettask.webui.services import MonitorService
|
24
8
|
|
25
9
|
logger = logging.getLogger(__name__)
|
26
10
|
|
27
|
-
# SQLAlchemy异步引擎和会话(独立于consumer)
|
28
|
-
async_engine = None
|
29
|
-
AsyncSessionLocal = None
|
30
|
-
|
31
|
-
def parse_iso_datetime(time_str: str) -> datetime:
|
32
|
-
"""解析ISO格式的时间字符串,确保返回 UTC 时间"""
|
33
|
-
if time_str.endswith('Z'):
|
34
|
-
# Z 表示 UTC 时间
|
35
|
-
dt = datetime.fromisoformat(time_str.replace('Z', '+00:00'))
|
36
|
-
else:
|
37
|
-
dt = datetime.fromisoformat(time_str)
|
38
|
-
|
39
|
-
# 如果没有时区信息,假定为 UTC
|
40
|
-
if dt.tzinfo is None:
|
41
|
-
dt = dt.replace(tzinfo=timezone.utc)
|
42
|
-
# 如果有时区信息,转换为 UTC
|
43
|
-
elif dt.tzinfo != timezone.utc:
|
44
|
-
dt = dt.astimezone(timezone.utc)
|
45
|
-
|
46
|
-
return dt
|
47
|
-
|
48
|
-
async def get_db_engine():
|
49
|
-
"""获取SQLAlchemy异步引擎(用于读取数据)"""
|
50
|
-
global async_engine, AsyncSessionLocal
|
51
|
-
|
52
|
-
if async_engine:
|
53
|
-
return async_engine
|
54
|
-
|
55
|
-
# 尝试从环境变量或配置获取PostgreSQL连接信息
|
56
|
-
import os
|
57
|
-
|
58
|
-
pg_config = PostgreSQLConfig.from_env()
|
59
|
-
|
60
|
-
if not pg_config.dsn:
|
61
|
-
logger.warning("PostgreSQL connection not configured")
|
62
|
-
return None
|
63
|
-
|
64
|
-
try:
|
65
|
-
# 将 DSN 转换为 SQLAlchemy 格式
|
66
|
-
# 如果是 postgresql:// 开头,改为 postgresql+psycopg://
|
67
|
-
if pg_config.dsn.startswith('postgresql://'):
|
68
|
-
dsn = pg_config.dsn.replace('postgresql://', 'postgresql+psycopg://', 1)
|
69
|
-
else:
|
70
|
-
dsn = pg_config.dsn
|
71
|
-
|
72
|
-
async_engine = create_async_engine(
|
73
|
-
dsn,
|
74
|
-
pool_size=10,
|
75
|
-
max_overflow=5,
|
76
|
-
pool_pre_ping=True,
|
77
|
-
echo=False
|
78
|
-
)
|
79
|
-
|
80
|
-
# 创建异步会话工厂
|
81
|
-
AsyncSessionLocal = sessionmaker(
|
82
|
-
async_engine,
|
83
|
-
class_=AsyncSession,
|
84
|
-
expire_on_commit=False
|
85
|
-
)
|
86
|
-
|
87
|
-
logger.info("SQLAlchemy async engine created for WebUI")
|
88
|
-
return async_engine
|
89
|
-
except Exception as e:
|
90
|
-
logger.error(f"Failed to create SQLAlchemy async engine: {e}")
|
91
|
-
return None
|
92
|
-
|
93
|
-
async def get_db_session():
|
94
|
-
"""获取数据库会话"""
|
95
|
-
if not AsyncSessionLocal:
|
96
|
-
await get_db_engine()
|
97
|
-
|
98
|
-
if AsyncSessionLocal:
|
99
|
-
async with AsyncSessionLocal() as session:
|
100
|
-
yield session
|
101
|
-
else:
|
102
|
-
yield None
|
103
|
-
|
104
|
-
class RedisMonitor:
|
105
|
-
def __init__(self, redis_url: str = "redis://localhost:6379", redis_prefix: str = "jettask"):
|
106
|
-
self.redis_url = redis_url
|
107
|
-
self.redis_prefix = redis_prefix
|
108
|
-
self.redis: Optional[aioredis.Redis] = None
|
109
|
-
self.worker_state_manager = None # 延迟初始化
|
110
|
-
self.scanner_task: Optional[asyncio.Task] = None
|
111
|
-
self.scanner_interval = 5 # 5秒扫描一次
|
112
|
-
self.default_heartbeat_timeout = 30 # 默认30秒心跳超时
|
113
|
-
self._queues_cache = None
|
114
|
-
self._queues_cache_time = 0
|
115
|
-
self._queues_cache_ttl = 60 # 缓存60秒
|
116
|
-
self._workers_cache = None
|
117
|
-
self._workers_cache_time = 0
|
118
|
-
self._workers_cache_ttl = 5 # worker缓存5秒,因为更新频繁
|
119
|
-
self._scanner_running = False # 标记扫描器是否正在运行
|
120
|
-
|
121
|
-
async def connect(self):
|
122
|
-
# 使用统一的连接池管理
|
123
|
-
from jettask.utils.db_connector import get_async_redis_pool
|
124
|
-
|
125
|
-
pool = get_async_redis_pool(
|
126
|
-
self.redis_url,
|
127
|
-
decode_responses=True,
|
128
|
-
max_connections=100,
|
129
|
-
socket_connect_timeout=5,
|
130
|
-
socket_timeout=10,
|
131
|
-
socket_keepalive=True,
|
132
|
-
health_check_interval=30
|
133
|
-
)
|
134
|
-
self.redis = aioredis.Redis(connection_pool=pool)
|
135
|
-
|
136
|
-
# 初始化 WorkerStateManager
|
137
|
-
from jettask.worker.lifecycle import WorkerStateManager
|
138
|
-
self.worker_state_manager = WorkerStateManager(
|
139
|
-
redis_client=self.redis,
|
140
|
-
redis_prefix=self.redis_prefix
|
141
|
-
)
|
142
|
-
|
143
|
-
async def close(self):
|
144
|
-
# 停止扫描器任务
|
145
|
-
if self.scanner_task and not self.scanner_task.done():
|
146
|
-
self.scanner_task.cancel()
|
147
|
-
try:
|
148
|
-
await self.scanner_task
|
149
|
-
except asyncio.CancelledError:
|
150
|
-
pass
|
151
|
-
|
152
|
-
if self.redis:
|
153
|
-
await self.redis.close()
|
154
|
-
|
155
|
-
def get_prefixed_queue_name(self, queue_name: str) -> str:
|
156
|
-
"""为队列名称添加前缀"""
|
157
|
-
return f"{self.redis_prefix}:QUEUE:{queue_name}"
|
158
|
-
|
159
|
-
|
160
|
-
async def get_task_info(self, event_id: str) -> Dict[str, Any]:
|
161
|
-
"""获取任务详细信息"""
|
162
|
-
status_key = f"{self.redis_prefix}:STATUS:{event_id}"
|
163
|
-
result_key = f"{self.redis_prefix}:RESULT:{event_id}"
|
164
|
-
|
165
|
-
status = await self.redis.get(status_key)
|
166
|
-
result = await self.redis.get(result_key)
|
167
|
-
|
168
|
-
task_info = {
|
169
|
-
"event_id": event_id,
|
170
|
-
"status": status,
|
171
|
-
"result": result
|
172
|
-
}
|
173
|
-
|
174
|
-
# 如果有状态信息,尝试从对应的队列stream中获取详细信息
|
175
|
-
if status:
|
176
|
-
try:
|
177
|
-
status_data = json.loads(status)
|
178
|
-
queue_name = status_data.get("queue")
|
179
|
-
|
180
|
-
if queue_name:
|
181
|
-
# 从stream中查找该任务
|
182
|
-
# 使用 xrange 扫描最近的消息
|
183
|
-
prefixed_queue_name = self.get_prefixed_queue_name(queue_name)
|
184
|
-
messages = await self.redis.xrange(prefixed_queue_name, count=1000)
|
185
|
-
|
186
|
-
for msg_id, data in messages:
|
187
|
-
# 检查消息数据中的event_id是否匹配
|
188
|
-
if (data.get("event_id") == event_id or
|
189
|
-
data.get("id") == event_id or
|
190
|
-
data.get("task_id") == event_id):
|
191
|
-
task_info["stream_data"] = {
|
192
|
-
"message_id": msg_id,
|
193
|
-
"data": data,
|
194
|
-
"queue": queue_name
|
195
|
-
}
|
196
|
-
break
|
197
|
-
|
198
|
-
# 如果消息ID就是event_id,直接尝试获取
|
199
|
-
if not task_info.get("stream_data"):
|
200
|
-
try:
|
201
|
-
direct_messages = await self.redis.xrange(
|
202
|
-
prefixed_queue_name,
|
203
|
-
min=event_id,
|
204
|
-
max=event_id,
|
205
|
-
count=1
|
206
|
-
)
|
207
|
-
if direct_messages:
|
208
|
-
msg_id, data = direct_messages[0]
|
209
|
-
task_info["stream_data"] = {
|
210
|
-
"message_id": msg_id,
|
211
|
-
"data": data,
|
212
|
-
"queue": queue_name
|
213
|
-
}
|
214
|
-
except:
|
215
|
-
pass
|
216
|
-
|
217
|
-
except Exception as e:
|
218
|
-
print(f"Error parsing status for task {event_id}: {e}")
|
219
|
-
|
220
|
-
return task_info
|
221
|
-
|
222
|
-
async def get_stream_info(self, queue_name: str, event_id: str) -> Optional[Dict[str, Any]]:
|
223
|
-
"""从Stream中获取任务详细信息"""
|
224
|
-
try:
|
225
|
-
prefixed_queue_name = self.get_prefixed_queue_name(queue_name)
|
226
|
-
# 先尝试按event_id直接查找
|
227
|
-
messages = await self.redis.xrange(prefixed_queue_name, min=event_id, max=event_id, count=1)
|
228
|
-
if messages:
|
229
|
-
msg_id, data = messages[0]
|
230
|
-
return {
|
231
|
-
"message_id": msg_id,
|
232
|
-
"data": data,
|
233
|
-
"queue": queue_name
|
234
|
-
}
|
235
|
-
|
236
|
-
# 如果没找到,可能event_id是消息内容的一部分,扫描最近的消息
|
237
|
-
messages = await self.redis.xrange(prefixed_queue_name, count=100)
|
238
|
-
for msg_id, data in messages:
|
239
|
-
if data.get("event_id") == event_id or data.get("id") == event_id:
|
240
|
-
return {
|
241
|
-
"message_id": msg_id,
|
242
|
-
"data": data,
|
243
|
-
"queue": queue_name
|
244
|
-
}
|
245
|
-
except Exception as e:
|
246
|
-
print(f"Error reading from stream {prefixed_queue_name}: {e}")
|
247
|
-
return None
|
248
|
-
|
249
|
-
async def get_queue_tasks(self, queue_name: str, start_time: Optional[str] = None,
|
250
|
-
end_time: Optional[str] = None, limit: int = 100) -> Dict[str, Any]:
|
251
|
-
"""获取指定队列的任务(基于时间范围)
|
252
|
-
|
253
|
-
Args:
|
254
|
-
queue_name: 队列名称
|
255
|
-
start_time: 开始时间(Redis Stream ID格式或时间戳)
|
256
|
-
end_time: 结束时间(Redis Stream ID格式或时间戳)
|
257
|
-
limit: 返回的最大任务数
|
258
|
-
"""
|
259
|
-
all_tasks = []
|
260
|
-
|
261
|
-
try:
|
262
|
-
# 处理时间参数
|
263
|
-
# 如果没有指定结束时间,使用 '+' 表示到最新
|
264
|
-
if not end_time:
|
265
|
-
end_time = '+'
|
266
|
-
|
267
|
-
# 如果没有指定开始时间,使用 '-' 表示从最早开始
|
268
|
-
if not start_time:
|
269
|
-
start_time = '-'
|
270
|
-
|
271
|
-
# 从队列的stream中读取消息
|
272
|
-
# 使用 xrevrange 按时间倒序获取(最新的在前)
|
273
|
-
prefixed_queue_name = self.get_prefixed_queue_name(queue_name)
|
274
|
-
messages = await self.redis.xrevrange(
|
275
|
-
prefixed_queue_name,
|
276
|
-
max=end_time,
|
277
|
-
min=start_time,
|
278
|
-
count=limit
|
279
|
-
)
|
280
|
-
|
281
|
-
for msg_id, data in messages:
|
282
|
-
# 在easy_task中,event_id就是Redis生成的stream消息ID
|
283
|
-
event_id = msg_id
|
284
|
-
|
285
|
-
# 构建任务信息
|
286
|
-
task_info = {
|
287
|
-
"event_id": event_id,
|
288
|
-
"message_id": msg_id,
|
289
|
-
"stream_data": data,
|
290
|
-
"task_name": data.get("name", "unknown"),
|
291
|
-
"queue": data.get("queue", queue_name),
|
292
|
-
"trigger_time": data.get("trigger_time")
|
293
|
-
}
|
294
|
-
|
295
|
-
# 尝试解析args和kwargs,并组合成参数字符串
|
296
|
-
params_str = ""
|
297
|
-
try:
|
298
|
-
args_list = []
|
299
|
-
kwargs_dict = {}
|
300
|
-
|
301
|
-
if data.get("args"):
|
302
|
-
args_list = json.loads(data["args"])
|
303
|
-
task_info["args"] = args_list
|
304
|
-
|
305
|
-
if data.get("kwargs"):
|
306
|
-
kwargs_dict = json.loads(data["kwargs"])
|
307
|
-
task_info["kwargs"] = kwargs_dict
|
308
|
-
|
309
|
-
# 构建参数字符串
|
310
|
-
params_parts = []
|
311
|
-
if args_list:
|
312
|
-
params_parts.extend([str(arg) for arg in args_list])
|
313
|
-
if kwargs_dict:
|
314
|
-
params_parts.extend([f"{k}={v}" for k, v in kwargs_dict.items()])
|
315
|
-
|
316
|
-
params_str = ", ".join(params_parts) if params_parts else "无参数"
|
317
|
-
|
318
|
-
except Exception as e:
|
319
|
-
params_str = "解析失败"
|
320
|
-
|
321
|
-
task_info["params_str"] = params_str
|
322
|
-
|
323
|
-
# 从状态键获取信息(不默认获取结果)
|
324
|
-
status_key = f"{self.redis_prefix}:STATUS:{event_id}"
|
325
|
-
|
326
|
-
# 获取状态
|
327
|
-
status = await self.redis.get(status_key)
|
328
|
-
|
329
|
-
if status:
|
330
|
-
task_info["status"] = status
|
331
|
-
try:
|
332
|
-
parsed_status = json.loads(status)
|
333
|
-
task_info["parsed_status"] = parsed_status
|
334
|
-
# 从状态中获取消费者信息
|
335
|
-
task_info["consumer"] = parsed_status.get("consumer", "-")
|
336
|
-
except:
|
337
|
-
task_info["parsed_status"] = {"status": "unknown"}
|
338
|
-
task_info["consumer"] = "-"
|
339
|
-
else:
|
340
|
-
# 如果没有状态,显示未知
|
341
|
-
task_info["status"] = json.dumps({
|
342
|
-
"status": "未知",
|
343
|
-
"queue": queue_name,
|
344
|
-
"created_at": datetime.fromtimestamp(float(data.get("trigger_time", 0))).isoformat() if data.get("trigger_time") else None
|
345
|
-
})
|
346
|
-
task_info["parsed_status"] = {
|
347
|
-
"status": "未知",
|
348
|
-
"queue": queue_name,
|
349
|
-
"created_at": datetime.fromtimestamp(float(data.get("trigger_time", 0))).isoformat() if data.get("trigger_time") else None
|
350
|
-
}
|
351
|
-
task_info["consumer"] = "-"
|
352
|
-
|
353
|
-
all_tasks.append(task_info)
|
354
|
-
|
355
|
-
except Exception as e:
|
356
|
-
print(f"Error reading queue {queue_name}: {e}")
|
357
|
-
# 如果stream不存在或出错,返回空结果
|
358
|
-
return {
|
359
|
-
"tasks": [],
|
360
|
-
"count": 0,
|
361
|
-
"oldest_id": None,
|
362
|
-
"newest_id": None,
|
363
|
-
"has_more": False,
|
364
|
-
"limit": limit
|
365
|
-
}
|
366
|
-
|
367
|
-
# 获取最早和最晚的消息ID用于分页导航
|
368
|
-
oldest_id = all_tasks[-1]["message_id"] if all_tasks else None
|
369
|
-
newest_id = all_tasks[0]["message_id"] if all_tasks else None
|
370
|
-
|
371
|
-
# 检查是否还有更多数据
|
372
|
-
has_more = len(messages) >= limit
|
373
|
-
|
374
|
-
# 获取队列总长度
|
375
|
-
total_count = 0
|
376
|
-
try:
|
377
|
-
queue_info = await self.redis.xinfo_stream(prefixed_queue_name)
|
378
|
-
total_count = queue_info.get("length", 0)
|
379
|
-
except Exception as e:
|
380
|
-
print(f"Error getting queue info for {queue_name}: {e}")
|
381
|
-
total_count = len(all_tasks)
|
382
|
-
|
383
|
-
return {
|
384
|
-
"tasks": all_tasks,
|
385
|
-
"count": len(all_tasks),
|
386
|
-
"total_count": total_count,
|
387
|
-
"oldest_id": oldest_id,
|
388
|
-
"newest_id": newest_id,
|
389
|
-
"has_more": has_more,
|
390
|
-
"limit": limit
|
391
|
-
}
|
392
|
-
|
393
|
-
async def get_worker_heartbeats(self, queue_name: str) -> List[Dict[str, Any]]:
|
394
|
-
"""获取指定队列的Worker心跳信息 - 直接扫描WORKER键"""
|
395
|
-
worker_list = []
|
396
|
-
current_time = datetime.now(timezone.utc).timestamp()
|
397
|
-
|
398
|
-
# 直接扫描所有WORKER键(排除HISTORY相关的键)
|
399
|
-
# 使用 RegistryManager 替代 scan
|
400
|
-
from jettask.worker.manager import WorkerState as WorkerRegistry
|
401
|
-
from jettask.messaging.registry import QueueRegistry
|
402
|
-
worker_registry = WorkerRegistry(
|
403
|
-
queue_registry = QueueRegistry(
|
404
|
-
redis_client=None,
|
405
|
-
async_redis_client=self.redis,
|
406
|
-
redis_prefix=self.redis_prefix
|
407
|
-
)
|
408
|
-
|
409
|
-
# 获取所有 worker ID
|
410
|
-
worker_ids = await worker_registry.get_all_workers()
|
411
|
-
worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
|
412
|
-
|
413
|
-
# 批量获取所有worker数据
|
414
|
-
if worker_keys:
|
415
|
-
pipe = self.redis.pipeline()
|
416
|
-
for key in worker_keys:
|
417
|
-
pipe.hgetall(key)
|
418
|
-
all_workers_data = await pipe.execute()
|
419
|
-
|
420
|
-
for i, worker_data in enumerate(all_workers_data):
|
421
|
-
if not worker_data:
|
422
|
-
continue
|
423
|
-
|
424
|
-
# 检查worker是否属于指定队列
|
425
|
-
worker_queues = worker_data.get('queues', '')
|
426
|
-
if queue_name not in worker_queues.split(','):
|
427
|
-
continue
|
428
|
-
|
429
|
-
worker_id = worker_keys[i].split(':')[-1]
|
430
|
-
last_heartbeat = float(worker_data.get('last_heartbeat', 0))
|
431
|
-
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
432
|
-
consumer_id = worker_data.get('consumer_id', worker_id)
|
433
|
-
|
434
|
-
# 构建显示数据
|
435
|
-
display_data = {
|
436
|
-
'consumer_id': consumer_id,
|
437
|
-
'consumer_name': f"{consumer_id}-{queue_name}", # 保持兼容性
|
438
|
-
'host': worker_data.get('host', 'unknown'),
|
439
|
-
'pid': int(worker_data.get('pid', 0)),
|
440
|
-
'queue': queue_name,
|
441
|
-
'last_heartbeat': last_heartbeat,
|
442
|
-
'last_heartbeat_time': datetime.fromtimestamp(last_heartbeat).isoformat(),
|
443
|
-
'seconds_ago': int(current_time - last_heartbeat),
|
444
|
-
'is_alive': is_alive,
|
445
|
-
# 队列特定的统计信息
|
446
|
-
'success_count': int(worker_data.get(f'{queue_name}:success_count', 0)),
|
447
|
-
'failed_count': int(worker_data.get(f'{queue_name}:failed_count', 0)),
|
448
|
-
'total_count': int(worker_data.get(f'{queue_name}:total_count', 0)),
|
449
|
-
'running_tasks': int(worker_data.get(f'{queue_name}:running_tasks', 0)),
|
450
|
-
'avg_processing_time': float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0)),
|
451
|
-
'avg_latency_time': float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
|
452
|
-
}
|
453
|
-
|
454
|
-
# 如果离线时间存在,添加离线时间信息
|
455
|
-
if 'offline_time' in worker_data:
|
456
|
-
display_data['offline_time'] = float(worker_data['offline_time'])
|
457
|
-
display_data['offline_time_formatted'] = datetime.fromtimestamp(float(worker_data['offline_time'])).isoformat()
|
458
|
-
|
459
|
-
worker_list.append(display_data)
|
460
|
-
|
461
|
-
return worker_list
|
462
|
-
|
463
|
-
async def get_queue_worker_summary(self, queue_name: str) -> Dict[str, Any]:
|
464
|
-
"""获取队列的worker汇总统计信息"""
|
465
|
-
try:
|
466
|
-
# 直接扫描所有WORKER键并过滤(排除HISTORY相关的键)
|
467
|
-
# 使用 RegistryManager 替代 scan
|
468
|
-
from jettask.worker.manager import WorkerState as WorkerRegistry
|
469
|
-
from jettask.messaging.registry import QueueRegistry
|
470
|
-
registry = RegistryManager(
|
471
|
-
redis_client=None,
|
472
|
-
async_redis_client=self.redis,
|
473
|
-
redis_prefix=self.redis_prefix
|
474
|
-
)
|
475
|
-
|
476
|
-
# 获取所有 worker ID
|
477
|
-
worker_ids = await worker_registry.get_all_workers()
|
478
|
-
worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
|
479
|
-
|
480
|
-
if not worker_keys:
|
481
|
-
return {
|
482
|
-
'total_workers': 0,
|
483
|
-
'online_workers': 0,
|
484
|
-
'offline_workers': 0,
|
485
|
-
'total_success_count': 0,
|
486
|
-
'total_failed_count': 0,
|
487
|
-
'total_count': 0,
|
488
|
-
'total_running_tasks': 0,
|
489
|
-
'avg_processing_time': 0.0,
|
490
|
-
'avg_latency_time': 0.0
|
491
|
-
}
|
492
|
-
|
493
|
-
# 批量获取worker数据
|
494
|
-
pipe = self.redis.pipeline()
|
495
|
-
for key in worker_keys:
|
496
|
-
pipe.hgetall(key)
|
497
|
-
all_workers_data = await pipe.execute()
|
498
|
-
|
499
|
-
# 过滤属于该队列的worker
|
500
|
-
queue_workers_data = []
|
501
|
-
for i, worker_data in enumerate(all_workers_data):
|
502
|
-
if worker_data and queue_name in worker_data.get('queues', '').split(','):
|
503
|
-
queue_workers_data.append(worker_data)
|
504
|
-
|
505
|
-
# 汇总统计
|
506
|
-
total_workers = len(queue_workers_data)
|
507
|
-
online_workers = 0
|
508
|
-
offline_workers = 0
|
509
|
-
total_success_count = 0
|
510
|
-
total_failed_count = 0
|
511
|
-
total_count = 0
|
512
|
-
total_running_tasks = 0
|
513
|
-
total_processing_time = 0.0
|
514
|
-
processing_time_count = 0
|
515
|
-
total_latency_time = 0.0
|
516
|
-
latency_time_count = 0
|
517
|
-
|
518
|
-
current_time = datetime.now(timezone.utc).timestamp()
|
519
|
-
offline_worker_ids = [] # 记录离线worker的ID,避免从历史中重复统计
|
520
|
-
|
521
|
-
for worker_data in queue_workers_data:
|
522
|
-
try:
|
523
|
-
# 检查worker状态
|
524
|
-
last_heartbeat = float(worker_data.get('last_heartbeat', 0))
|
525
|
-
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
526
|
-
worker_id = worker_data.get('consumer_id', '')
|
527
|
-
|
528
|
-
if is_alive and (current_time - last_heartbeat) < 30:
|
529
|
-
online_workers += 1
|
530
|
-
# 只统计在线worker的数据
|
531
|
-
success_count = int(worker_data.get(f'{queue_name}:success_count', 0))
|
532
|
-
failed_count = int(worker_data.get(f'{queue_name}:failed_count', 0))
|
533
|
-
running_tasks = int(worker_data.get(f'{queue_name}:running_tasks', 0))
|
534
|
-
avg_processing_time = float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0))
|
535
|
-
avg_latency_time = float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
|
536
|
-
|
537
|
-
total_success_count += success_count
|
538
|
-
total_failed_count += failed_count
|
539
|
-
total_count += success_count + failed_count
|
540
|
-
total_running_tasks += running_tasks
|
541
|
-
|
542
|
-
if avg_processing_time > 0:
|
543
|
-
total_processing_time += avg_processing_time
|
544
|
-
processing_time_count += 1
|
545
|
-
|
546
|
-
if avg_latency_time > 0:
|
547
|
-
total_latency_time += avg_latency_time
|
548
|
-
latency_time_count += 1
|
549
|
-
else:
|
550
|
-
offline_workers += 1
|
551
|
-
# 记录离线worker的ID,从历史中统计
|
552
|
-
if worker_id:
|
553
|
-
offline_worker_ids.append(worker_id)
|
554
|
-
|
555
|
-
except Exception as e:
|
556
|
-
print(f"Error processing worker summary: {e}")
|
557
|
-
continue
|
558
|
-
|
559
|
-
# 统计离线worker的数据(从WORKER键中)
|
560
|
-
for worker_data in queue_workers_data:
|
561
|
-
try:
|
562
|
-
# 检查是否是离线worker
|
563
|
-
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
564
|
-
worker_id = worker_data.get('consumer_id', '')
|
565
|
-
|
566
|
-
if not is_alive and worker_id in offline_worker_ids:
|
567
|
-
# 统计离线worker的数据
|
568
|
-
success_count = int(worker_data.get(f'{queue_name}:success_count', 0))
|
569
|
-
failed_count = int(worker_data.get(f'{queue_name}:failed_count', 0))
|
570
|
-
|
571
|
-
total_success_count += success_count
|
572
|
-
total_failed_count += failed_count
|
573
|
-
total_count += success_count + failed_count
|
574
|
-
|
575
|
-
# 处理时间统计
|
576
|
-
avg_processing_time = float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0))
|
577
|
-
if avg_processing_time > 0:
|
578
|
-
total_processing_time += avg_processing_time
|
579
|
-
processing_time_count += 1
|
580
|
-
|
581
|
-
# 延迟时间统计
|
582
|
-
avg_latency_time = float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
|
583
|
-
if avg_latency_time > 0:
|
584
|
-
total_latency_time += avg_latency_time
|
585
|
-
latency_time_count += 1
|
586
|
-
|
587
|
-
except Exception as e:
|
588
|
-
print(f"Error processing offline worker stats: {e}")
|
589
|
-
continue
|
590
|
-
|
591
|
-
# 计算平均处理时间(包含历史)
|
592
|
-
overall_avg_processing_time = 0.0
|
593
|
-
if processing_time_count > 0:
|
594
|
-
overall_avg_processing_time = total_processing_time / processing_time_count
|
595
|
-
|
596
|
-
# 计算平均延迟时间
|
597
|
-
overall_avg_latency_time = 0.0
|
598
|
-
if latency_time_count > 0:
|
599
|
-
overall_avg_latency_time = total_latency_time / latency_time_count
|
600
|
-
|
601
|
-
return {
|
602
|
-
'total_workers': total_workers,
|
603
|
-
'online_workers': online_workers,
|
604
|
-
'offline_workers': offline_workers,
|
605
|
-
'total_success_count': total_success_count,
|
606
|
-
'total_failed_count': total_failed_count,
|
607
|
-
'total_count': total_count,
|
608
|
-
'total_running_tasks': total_running_tasks,
|
609
|
-
'avg_processing_time': round(overall_avg_processing_time, 3),
|
610
|
-
'avg_latency_time': round(overall_avg_latency_time, 3),
|
611
|
-
'history_included': True
|
612
|
-
}
|
613
|
-
|
614
|
-
except Exception as e:
|
615
|
-
print(f"Error getting queue worker summary for {queue_name}: {e}")
|
616
|
-
return {
|
617
|
-
'total_workers': 0,
|
618
|
-
'online_workers': 0,
|
619
|
-
'offline_workers': 0,
|
620
|
-
'total_success_count': 0,
|
621
|
-
'total_failed_count': 0,
|
622
|
-
'total_count': 0,
|
623
|
-
'total_running_tasks': 0,
|
624
|
-
'avg_processing_time': 0.0,
|
625
|
-
'avg_latency_time': 0.0
|
626
|
-
}
|
627
|
-
|
628
|
-
async def get_queue_worker_summary_fast(self, queue_name: str) -> Dict[str, Any]:
|
629
|
-
"""获取队列的worker汇总统计信息(快速版,不包含历史)"""
|
630
|
-
try:
|
631
|
-
# 直接扫描所有WORKER键(排除HISTORY相关的键)
|
632
|
-
# 使用 RegistryManager 替代 scan
|
633
|
-
from jettask.worker.manager import WorkerState as WorkerRegistry
|
634
|
-
from jettask.messaging.registry import QueueRegistry
|
635
|
-
registry = RegistryManager(
|
636
|
-
redis_client=None,
|
637
|
-
async_redis_client=self.redis,
|
638
|
-
redis_prefix=self.redis_prefix
|
639
|
-
)
|
640
|
-
|
641
|
-
# 获取所有 worker ID
|
642
|
-
worker_ids = await worker_registry.get_all_workers()
|
643
|
-
worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
|
644
|
-
|
645
|
-
if not worker_keys:
|
646
|
-
return {
|
647
|
-
'total_workers': 0,
|
648
|
-
'online_workers': 0,
|
649
|
-
'offline_workers': 0,
|
650
|
-
'total_success_count': 0,
|
651
|
-
'total_failed_count': 0,
|
652
|
-
'total_count': 0,
|
653
|
-
'total_running_tasks': 0,
|
654
|
-
'avg_processing_time': 0.0,
|
655
|
-
'avg_latency_time': 0.0
|
656
|
-
}
|
657
|
-
|
658
|
-
# 使用pipeline批量获取worker数据
|
659
|
-
pipe = self.redis.pipeline()
|
660
|
-
for worker_key in worker_keys:
|
661
|
-
pipe.hgetall(worker_key)
|
662
|
-
|
663
|
-
all_workers_data = await pipe.execute()
|
664
|
-
|
665
|
-
# 过滤属于该队列的worker
|
666
|
-
worker_data_list = []
|
667
|
-
for worker_data in all_workers_data:
|
668
|
-
if worker_data and queue_name in worker_data.get('queues', '').split(','):
|
669
|
-
worker_data_list.append(worker_data)
|
670
|
-
|
671
|
-
# 汇总统计
|
672
|
-
total_workers = len(worker_data_list)
|
673
|
-
online_workers = 0
|
674
|
-
offline_workers = 0
|
675
|
-
total_success_count = 0
|
676
|
-
total_failed_count = 0
|
677
|
-
total_count = 0
|
678
|
-
total_running_tasks = 0
|
679
|
-
total_processing_time = 0.0
|
680
|
-
processing_time_count = 0
|
681
|
-
total_latency_time = 0.0
|
682
|
-
latency_time_count = 0
|
683
|
-
|
684
|
-
current_time = datetime.now(timezone.utc).timestamp()
|
685
|
-
|
686
|
-
for worker_data in worker_data_list:
|
687
|
-
if not worker_data:
|
688
|
-
continue
|
689
|
-
|
690
|
-
# 检查worker状态
|
691
|
-
last_heartbeat = float(worker_data.get('last_heartbeat', 0))
|
692
|
-
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
693
|
-
|
694
|
-
if is_alive and (current_time - last_heartbeat) < 30:
|
695
|
-
online_workers += 1
|
696
|
-
# 只统计在线worker的数据(快速版不包含历史,所以只统计在线的)
|
697
|
-
success_count = int(worker_data.get(f'{queue_name}:success_count', 0))
|
698
|
-
failed_count = int(worker_data.get(f'{queue_name}:failed_count', 0))
|
699
|
-
running_tasks = int(worker_data.get(f'{queue_name}:running_tasks', 0))
|
700
|
-
avg_processing_time = float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0))
|
701
|
-
avg_latency_time = float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
|
702
|
-
|
703
|
-
total_success_count += success_count
|
704
|
-
total_failed_count += failed_count
|
705
|
-
total_count += success_count + failed_count
|
706
|
-
total_running_tasks += running_tasks
|
707
|
-
|
708
|
-
if avg_processing_time > 0:
|
709
|
-
total_processing_time += avg_processing_time
|
710
|
-
processing_time_count += 1
|
711
|
-
|
712
|
-
if avg_latency_time > 0:
|
713
|
-
total_latency_time += avg_latency_time
|
714
|
-
latency_time_count += 1
|
715
|
-
else:
|
716
|
-
offline_workers += 1
|
717
|
-
# 快速版不统计离线worker的数据
|
718
|
-
|
719
|
-
# 计算平均处理时间
|
720
|
-
avg_processing_time = 0.0
|
721
|
-
if processing_time_count > 0:
|
722
|
-
avg_processing_time = total_processing_time / processing_time_count
|
723
|
-
|
724
|
-
# 计算平均延迟时间
|
725
|
-
avg_latency_time = 0.0
|
726
|
-
if latency_time_count > 0:
|
727
|
-
avg_latency_time = total_latency_time / latency_time_count
|
728
|
-
|
729
|
-
return {
|
730
|
-
'total_workers': total_workers,
|
731
|
-
'online_workers': online_workers,
|
732
|
-
'offline_workers': offline_workers,
|
733
|
-
'total_success_count': total_success_count,
|
734
|
-
'total_failed_count': total_failed_count,
|
735
|
-
'total_count': total_count,
|
736
|
-
'total_running_tasks': total_running_tasks,
|
737
|
-
'avg_processing_time': round(avg_processing_time, 3),
|
738
|
-
'avg_latency_time': round(avg_latency_time, 3)
|
739
|
-
}
|
740
|
-
|
741
|
-
except Exception as e:
|
742
|
-
print(f"Error getting queue worker summary for {queue_name}: {e}")
|
743
|
-
return {
|
744
|
-
'total_workers': 0,
|
745
|
-
'online_workers': 0,
|
746
|
-
'offline_workers': 0,
|
747
|
-
'total_success_count': 0,
|
748
|
-
'total_failed_count': 0,
|
749
|
-
'total_count': 0,
|
750
|
-
'total_running_tasks': 0,
|
751
|
-
'avg_processing_time': 0.0,
|
752
|
-
'avg_latency_time': 0.0
|
753
|
-
}
|
754
|
-
|
755
|
-
async def get_worker_offline_history(self, limit: int = 100, start_time: Optional[float] = None, end_time: Optional[float] = None) -> List[Dict[str, Any]]:
|
756
|
-
"""获取worker下线历史记录 - 直接从WORKER键读取离线worker信息"""
|
757
|
-
try:
|
758
|
-
# 扫描所有WORKER键(排除HISTORY相关的键)
|
759
|
-
pattern = f"{self.redis_prefix}:WORKER:*"
|
760
|
-
cursor = 0
|
761
|
-
# 使用 RegistryManager 替代 scan
|
762
|
-
from jettask.worker.manager import WorkerState as WorkerRegistry
|
763
|
-
from jettask.messaging.registry import QueueRegistry
|
764
|
-
registry = RegistryManager(
|
765
|
-
redis_client=None,
|
766
|
-
async_redis_client=self.redis,
|
767
|
-
redis_prefix=self.redis_prefix
|
768
|
-
)
|
769
|
-
|
770
|
-
# 获取所有 worker ID
|
771
|
-
worker_ids = await worker_registry.get_all_workers()
|
772
|
-
worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
|
773
|
-
|
774
|
-
if not worker_keys:
|
775
|
-
return []
|
776
|
-
|
777
|
-
# 批量获取所有worker数据
|
778
|
-
pipe = self.redis.pipeline()
|
779
|
-
for key in worker_keys:
|
780
|
-
pipe.hgetall(key)
|
781
|
-
all_workers_data = await pipe.execute()
|
782
|
-
|
783
|
-
# 收集离线的worker
|
784
|
-
offline_workers = []
|
785
|
-
current_time = time.time()
|
786
|
-
|
787
|
-
for i, worker_data in enumerate(all_workers_data):
|
788
|
-
if not worker_data:
|
789
|
-
continue
|
790
|
-
|
791
|
-
# 检查是否离线
|
792
|
-
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
793
|
-
if not is_alive and 'offline_time' in worker_data:
|
794
|
-
offline_time = float(worker_data.get('offline_time', 0))
|
795
|
-
|
796
|
-
# 时间范围过滤
|
797
|
-
if start_time and offline_time < start_time:
|
798
|
-
continue
|
799
|
-
if end_time and offline_time > end_time:
|
800
|
-
continue
|
801
|
-
|
802
|
-
# 计算运行时长
|
803
|
-
online_time = float(worker_data.get('created_at', offline_time))
|
804
|
-
duration_seconds = int(offline_time - online_time)
|
805
|
-
|
806
|
-
# 构建离线记录
|
807
|
-
record = {
|
808
|
-
'consumer_id': worker_data.get('consumer_id', ''),
|
809
|
-
'host': worker_data.get('host', 'unknown'),
|
810
|
-
'pid': int(worker_data.get('pid', 0)),
|
811
|
-
'queues': worker_data.get('queues', ''),
|
812
|
-
'online_time': online_time,
|
813
|
-
'offline_time': offline_time,
|
814
|
-
'duration_seconds': duration_seconds,
|
815
|
-
'last_heartbeat': float(worker_data.get('last_heartbeat', 0)),
|
816
|
-
'shutdown_reason': worker_data.get('shutdown_reason', 'unknown'),
|
817
|
-
'online_time_str': datetime.fromtimestamp(online_time).isoformat(),
|
818
|
-
'offline_time_str': datetime.fromtimestamp(offline_time).isoformat(),
|
819
|
-
}
|
820
|
-
|
821
|
-
# 格式化运行时长
|
822
|
-
hours = duration_seconds // 3600
|
823
|
-
minutes = (duration_seconds % 3600) // 60
|
824
|
-
seconds = duration_seconds % 60
|
825
|
-
record['duration_str'] = f"{hours}h {minutes}m {seconds}s"
|
826
|
-
|
827
|
-
# 添加统计信息(聚合所有队列的数据)
|
828
|
-
queues = worker_data.get('queues', '').split(',') if worker_data.get('queues') else []
|
829
|
-
total_success = 0
|
830
|
-
total_failed = 0
|
831
|
-
total_count = 0
|
832
|
-
|
833
|
-
for queue in queues:
|
834
|
-
if queue.strip():
|
835
|
-
queue = queue.strip()
|
836
|
-
total_success += int(worker_data.get(f'{queue}:success_count', 0))
|
837
|
-
total_failed += int(worker_data.get(f'{queue}:failed_count', 0))
|
838
|
-
total_count += int(worker_data.get(f'{queue}:total_count', 0))
|
839
|
-
|
840
|
-
record['total_success_count'] = total_success
|
841
|
-
record['total_failed_count'] = total_failed
|
842
|
-
record['total_count'] = total_count
|
843
|
-
record['total_running_tasks'] = 0 # 离线worker没有运行中的任务
|
844
|
-
|
845
|
-
# 计算平均处理时间
|
846
|
-
if total_count > 0:
|
847
|
-
total_processing_time = 0.0
|
848
|
-
for queue in queues:
|
849
|
-
if queue.strip():
|
850
|
-
queue = queue.strip()
|
851
|
-
avg_time = float(worker_data.get(f'{queue}:avg_processing_time', 0))
|
852
|
-
count = int(worker_data.get(f'{queue}:total_count', 0))
|
853
|
-
if avg_time > 0 and count > 0:
|
854
|
-
total_processing_time += avg_time * count
|
855
|
-
record['avg_processing_time'] = total_processing_time / total_count
|
856
|
-
else:
|
857
|
-
record['avg_processing_time'] = 0.0
|
858
|
-
|
859
|
-
offline_workers.append((offline_time, record))
|
860
|
-
|
861
|
-
# 按离线时间倒序排序
|
862
|
-
offline_workers.sort(key=lambda x: x[0], reverse=True)
|
863
|
-
|
864
|
-
# 返回指定数量的记录
|
865
|
-
return [record for _, record in offline_workers[:limit]]
|
866
|
-
|
867
|
-
except Exception as e:
|
868
|
-
print(f"Error getting worker offline history: {e}")
|
869
|
-
return []
|
870
|
-
|
871
|
-
async def get_global_stats_with_history(self) -> Dict[str, Any]:
|
872
|
-
"""获取全局统计信息(优化版)- 注:不再重复统计历史数据"""
|
873
|
-
try:
|
874
|
-
# 获取所有队列
|
875
|
-
queues = await self.get_all_queues()
|
876
|
-
|
877
|
-
# 并行获取所有队列的汇总信息和队列统计
|
878
|
-
queue_summaries_task = asyncio.gather(
|
879
|
-
*[self.get_queue_worker_summary_fast(queue) for queue in queues],
|
880
|
-
return_exceptions=True
|
881
|
-
)
|
882
|
-
queue_stats_task = asyncio.gather(
|
883
|
-
*[self.get_queue_stats(queue) for queue in queues],
|
884
|
-
return_exceptions=True
|
885
|
-
)
|
886
|
-
|
887
|
-
queue_summaries, queue_stats = await asyncio.gather(
|
888
|
-
queue_summaries_task, queue_stats_task
|
889
|
-
)
|
890
|
-
|
891
|
-
# 初始化统计
|
892
|
-
total_success = 0
|
893
|
-
total_failed = 0
|
894
|
-
total_tasks = 0
|
895
|
-
total_running = 0
|
896
|
-
total_workers = 0
|
897
|
-
online_workers = 0
|
898
|
-
offline_workers = 0
|
899
|
-
total_processing_time = 0.0
|
900
|
-
total_processing_count = 0
|
901
|
-
total_latency_time = 0.0
|
902
|
-
total_latency_count = 0
|
903
|
-
|
904
|
-
# RabbitMQ风格指标
|
905
|
-
total_messages = 0
|
906
|
-
total_messages_ready = 0
|
907
|
-
total_messages_unacknowledged = 0
|
908
|
-
total_consumers = 0
|
909
|
-
total_publish = 0
|
910
|
-
total_deliver_get = 0
|
911
|
-
total_ack = 0
|
912
|
-
|
913
|
-
# 汇总统计信息
|
914
|
-
for i, summary in enumerate(queue_summaries):
|
915
|
-
if isinstance(summary, Exception):
|
916
|
-
print(f"Error getting stats for queue {queues[i]}: {summary}")
|
917
|
-
continue
|
918
|
-
|
919
|
-
total_workers += summary.get('total_workers', 0)
|
920
|
-
online_workers += summary.get('online_workers', 0)
|
921
|
-
offline_workers += summary.get('offline_workers', 0)
|
922
|
-
total_success += summary.get('total_success_count', 0)
|
923
|
-
total_failed += summary.get('total_failed_count', 0)
|
924
|
-
total_tasks += summary.get('total_count', 0)
|
925
|
-
total_running += summary.get('total_running_tasks', 0)
|
926
|
-
|
927
|
-
# 累加平均处理时间(需要根据任务数加权)
|
928
|
-
avg_time = summary.get('avg_processing_time', 0)
|
929
|
-
task_count = summary.get('total_count', 0)
|
930
|
-
if avg_time > 0 and task_count > 0:
|
931
|
-
total_processing_time += avg_time * task_count
|
932
|
-
total_processing_count += task_count
|
933
|
-
|
934
|
-
# 累加平均延迟时间
|
935
|
-
avg_latency = summary.get('avg_latency_time', 0)
|
936
|
-
if avg_latency > 0 and task_count > 0:
|
937
|
-
total_latency_time += avg_latency * task_count
|
938
|
-
total_latency_count += task_count
|
939
|
-
|
940
|
-
# 汇总RabbitMQ风格指标
|
941
|
-
for i, stats in enumerate(queue_stats):
|
942
|
-
if isinstance(stats, Exception):
|
943
|
-
continue
|
944
|
-
|
945
|
-
total_messages += stats.get('messages', 0)
|
946
|
-
total_messages_ready += stats.get('messages_ready', 0)
|
947
|
-
total_messages_unacknowledged += stats.get('messages_unacknowledged', 0)
|
948
|
-
total_consumers += stats.get('consumers', 0)
|
949
|
-
|
950
|
-
message_stats = stats.get('message_stats', {})
|
951
|
-
total_publish += message_stats.get('publish', 0)
|
952
|
-
total_deliver_get += message_stats.get('deliver_get', 0)
|
953
|
-
total_ack += message_stats.get('ack', 0)
|
954
|
-
|
955
|
-
# 计算全局平均处理时间
|
956
|
-
global_avg_processing_time = 0.0
|
957
|
-
if total_processing_count > 0:
|
958
|
-
global_avg_processing_time = total_processing_time / total_processing_count
|
959
|
-
|
960
|
-
# 计算全局平均延迟时间
|
961
|
-
global_avg_latency_time = 0.0
|
962
|
-
if total_latency_count > 0:
|
963
|
-
global_avg_latency_time = total_latency_time / total_latency_count
|
964
|
-
|
965
|
-
return {
|
966
|
-
# 原有指标
|
967
|
-
'total_queues': len(queues),
|
968
|
-
'total_workers': total_workers,
|
969
|
-
'online_workers': online_workers,
|
970
|
-
'offline_workers': offline_workers,
|
971
|
-
'total_success_count': total_success,
|
972
|
-
'total_failed_count': total_failed,
|
973
|
-
'total_count': total_tasks,
|
974
|
-
'total_running_tasks': total_running,
|
975
|
-
'avg_processing_time': round(global_avg_processing_time, 3),
|
976
|
-
'avg_latency_time': round(global_avg_latency_time, 3),
|
977
|
-
'history_included': False,
|
978
|
-
# RabbitMQ风格指标
|
979
|
-
'messages': total_messages,
|
980
|
-
'messages_ready': total_messages_ready,
|
981
|
-
'messages_unacknowledged': total_messages_unacknowledged,
|
982
|
-
'consumers': total_consumers,
|
983
|
-
'message_stats': {
|
984
|
-
'publish': total_publish,
|
985
|
-
'deliver_get': total_deliver_get,
|
986
|
-
'ack': total_ack
|
987
|
-
},
|
988
|
-
'timestamp': datetime.now(timezone.utc).isoformat()
|
989
|
-
}
|
990
|
-
|
991
|
-
except Exception as e:
|
992
|
-
print(f"Error getting global stats: {e}")
|
993
|
-
return {
|
994
|
-
'total_queues': 0,
|
995
|
-
'total_workers': 0,
|
996
|
-
'online_workers': 0,
|
997
|
-
'offline_workers': 0,
|
998
|
-
'total_success_count': 0,
|
999
|
-
'total_failed_count': 0,
|
1000
|
-
'total_count': 0,
|
1001
|
-
'total_running_tasks': 0,
|
1002
|
-
'avg_processing_time': 0.0,
|
1003
|
-
'avg_latency_time': 0.0,
|
1004
|
-
'history_included': False,
|
1005
|
-
'messages': 0,
|
1006
|
-
'messages_ready': 0,
|
1007
|
-
'messages_unacknowledged': 0,
|
1008
|
-
'consumers': 0,
|
1009
|
-
'message_stats': {
|
1010
|
-
'publish': 0,
|
1011
|
-
'deliver_get': 0,
|
1012
|
-
'ack': 0
|
1013
|
-
},
|
1014
|
-
'error': str(e)
|
1015
|
-
}
|
1016
|
-
|
1017
|
-
async def get_all_queues(self) -> List[str]:
|
1018
|
-
"""获取所有队列名称 - 优先从global:queues集合获取,带缓存"""
|
1019
|
-
try:
|
1020
|
-
# 检查缓存是否有效
|
1021
|
-
current_time = time.time()
|
1022
|
-
if self._queues_cache is not None and (current_time - self._queues_cache_time) < self._queues_cache_ttl:
|
1023
|
-
return self._queues_cache
|
1024
|
-
|
1025
|
-
# 优先尝试从全局队列集合获取
|
1026
|
-
global_queues_key = f'{self.redis_prefix}:global:queues'
|
1027
|
-
queues = await self.redis.smembers(global_queues_key)
|
1028
|
-
|
1029
|
-
if queues:
|
1030
|
-
# 如果有全局队列集合,直接使用
|
1031
|
-
result = sorted(list(queues))
|
1032
|
-
self._queues_cache = result
|
1033
|
-
self._queues_cache_time = current_time
|
1034
|
-
return result
|
1035
|
-
|
1036
|
-
# 如果没有全局队列集合,回退到扫描方式
|
1037
|
-
queues = set()
|
1038
|
-
|
1039
|
-
# 优化:更精确的扫描模式,只扫描QUEUE:*键
|
1040
|
-
pattern = f"{self.redis_prefix}:QUEUE:*"
|
1041
|
-
cursor = 0
|
1042
|
-
|
1043
|
-
# 使用 RegistryManager 替代 scan
|
1044
|
-
from jettask.worker.manager import WorkerState as WorkerRegistry
|
1045
|
-
from jettask.messaging.registry import QueueRegistry
|
1046
|
-
registry = RegistryManager(
|
1047
|
-
redis_client=None,
|
1048
|
-
async_redis_client=self.redis,
|
1049
|
-
redis_prefix=self.redis_prefix
|
1050
|
-
)
|
1051
|
-
|
1052
|
-
# 获取所有队列
|
1053
|
-
queues = await queue_registry.get_all_queues()
|
1054
|
-
|
1055
|
-
# 返回排序后的队列列表并更新缓存
|
1056
|
-
result = sorted(list(queues))
|
1057
|
-
self._queues_cache = result
|
1058
|
-
self._queues_cache_time = current_time
|
1059
|
-
return result
|
1060
|
-
|
1061
|
-
except Exception as e:
|
1062
|
-
print(f"Error scanning queues: {e}")
|
1063
|
-
return []
|
1064
|
-
|
1065
|
-
async def get_queue_stats(self, queue_name: str) -> Dict[str, Any]:
|
1066
|
-
"""获取队列统计信息 (RabbitMQ兼容格式)"""
|
1067
|
-
prefixed_queue_name = self.get_prefixed_queue_name(queue_name)
|
1068
|
-
|
1069
|
-
try:
|
1070
|
-
info = await self.redis.xinfo_stream(prefixed_queue_name)
|
1071
|
-
groups = await self.redis.xinfo_groups(prefixed_queue_name)
|
1072
|
-
except Exception as e:
|
1073
|
-
# 如果队列不存在,返回默认值
|
1074
|
-
return {
|
1075
|
-
"queue": queue_name,
|
1076
|
-
"messages": 0,
|
1077
|
-
"messages_ready": 0,
|
1078
|
-
"messages_unacknowledged": 0,
|
1079
|
-
"consumers": 0,
|
1080
|
-
"message_stats": {
|
1081
|
-
"publish": 0,
|
1082
|
-
"deliver_get": 0,
|
1083
|
-
"ack": 0
|
1084
|
-
},
|
1085
|
-
"consumer_groups": [],
|
1086
|
-
"error": str(e)
|
1087
|
-
}
|
1088
|
-
|
1089
|
-
# 计算基础指标
|
1090
|
-
total_messages = info["length"]
|
1091
|
-
total_pending = 0
|
1092
|
-
total_consumers = 0
|
1093
|
-
total_delivered = 0
|
1094
|
-
|
1095
|
-
consumer_groups_info = []
|
1096
|
-
|
1097
|
-
for group in groups:
|
1098
|
-
group_pending = group["pending"]
|
1099
|
-
group_consumers_count = group["consumers"]
|
1100
|
-
|
1101
|
-
total_pending += group_pending
|
1102
|
-
total_consumers += group_consumers_count
|
1103
|
-
|
1104
|
-
group_info = {
|
1105
|
-
"name": group["name"],
|
1106
|
-
"consumers": group_consumers_count,
|
1107
|
-
"pending": group_pending,
|
1108
|
-
"last_delivered_id": group["last-delivered-id"]
|
1109
|
-
}
|
1110
|
-
|
1111
|
-
# 获取消费者详情
|
1112
|
-
try:
|
1113
|
-
consumers = await self.redis.xinfo_consumers(prefixed_queue_name, group["name"])
|
1114
|
-
group_info["consumer_details"] = consumers
|
1115
|
-
|
1116
|
-
# 从消费者统计中计算deliver和ack数量
|
1117
|
-
for consumer in consumers:
|
1118
|
-
total_delivered += consumer.get("pel-count", 0)
|
1119
|
-
|
1120
|
-
except Exception as e:
|
1121
|
-
group_info["consumer_details"] = []
|
1122
|
-
print(f"Error getting consumers for group {group['name']}: {e}")
|
1123
|
-
|
1124
|
-
consumer_groups_info.append(group_info)
|
1125
|
-
|
1126
|
-
# 从Worker统计中获取更精确的消息统计
|
1127
|
-
worker_summary = await self.get_queue_worker_summary_fast(queue_name)
|
1128
|
-
publish_count = worker_summary.get('total_count', 0) # 总处理数作为发布数的近似
|
1129
|
-
deliver_count = worker_summary.get('total_success_count', 0) + worker_summary.get('total_failed_count', 0)
|
1130
|
-
ack_count = worker_summary.get('total_success_count', 0)
|
1131
|
-
|
1132
|
-
# 计算就绪消息数 (队列总长度 - 未确认消息数)
|
1133
|
-
messages_ready = max(0, total_messages - total_pending)
|
1134
|
-
|
1135
|
-
# RabbitMQ风格的统计信息
|
1136
|
-
stats = {
|
1137
|
-
"queue": queue_name,
|
1138
|
-
# RabbitMQ兼容指标
|
1139
|
-
"messages": total_messages, # 队列中消息总数
|
1140
|
-
"messages_ready": messages_ready, # 就绪状态的消息数
|
1141
|
-
"messages_unacknowledged": total_pending, # 未确认的消息数
|
1142
|
-
"consumers": total_consumers, # 消费者数量
|
1143
|
-
"message_stats": {
|
1144
|
-
"publish": publish_count, # 发布到队列的消息数量
|
1145
|
-
"deliver_get": deliver_count, # 被消费的消息数量
|
1146
|
-
"ack": ack_count # 被确认的消息数量
|
1147
|
-
},
|
1148
|
-
# 原有详细信息保持兼容性
|
1149
|
-
"length": info["length"],
|
1150
|
-
"first_entry": info.get("first-entry"),
|
1151
|
-
"last_entry": info.get("last-entry"),
|
1152
|
-
"consumer_groups": consumer_groups_info,
|
1153
|
-
# 额外的性能指标
|
1154
|
-
"performance_stats": {
|
1155
|
-
"avg_processing_time": worker_summary.get('avg_processing_time', 0.0),
|
1156
|
-
"avg_latency_time": worker_summary.get('avg_latency_time', 0.0),
|
1157
|
-
"total_running_tasks": worker_summary.get('total_running_tasks', 0)
|
1158
|
-
}
|
1159
|
-
}
|
1160
|
-
|
1161
|
-
return stats
|
1162
|
-
|
1163
|
-
async def _heartbeat_scanner(self):
|
1164
|
-
"""心跳扫描器任务,定期检查worker心跳状态"""
|
1165
|
-
logger = logging.getLogger('webui.heartbeat')
|
1166
|
-
logger.info("心跳扫描器启动")
|
1167
|
-
|
1168
|
-
while self._scanner_running:
|
1169
|
-
try:
|
1170
|
-
# 使用 RegistryManager 获取所有 worker,避免 SCAN
|
1171
|
-
from jettask.worker.manager import WorkerState as WorkerRegistry
|
1172
|
-
from jettask.messaging.registry import QueueRegistry
|
1173
|
-
registry = RegistryManager(
|
1174
|
-
redis_client=None,
|
1175
|
-
async_redis_client=self.redis,
|
1176
|
-
redis_prefix=self.redis_prefix
|
1177
|
-
)
|
1178
|
-
|
1179
|
-
# 获取所有 worker ID
|
1180
|
-
worker_ids = await worker_registry.get_all_workers()
|
1181
|
-
|
1182
|
-
# 构建 worker 键
|
1183
|
-
worker_keys = []
|
1184
|
-
for worker_id in worker_ids:
|
1185
|
-
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
1186
|
-
# 过滤掉HISTORY相关的键(虽然注册表中不应该有)
|
1187
|
-
if ':HISTORY:' not in worker_key:
|
1188
|
-
worker_keys.append(worker_key)
|
1189
|
-
|
1190
|
-
if worker_keys:
|
1191
|
-
# 通过 WorkerStateManager 批量获取 worker 数据
|
1192
|
-
current_time = time.time()
|
1193
|
-
|
1194
|
-
if self.worker_state_manager:
|
1195
|
-
# 使用 WorkerStateManager 批量获取所有 worker 信息
|
1196
|
-
all_workers_info = await self.worker_state_manager.get_all_workers_info(only_alive=False)
|
1197
|
-
|
1198
|
-
# 检查每个worker的心跳
|
1199
|
-
for worker_id in worker_ids:
|
1200
|
-
worker_data = all_workers_info.get(worker_id)
|
1201
|
-
if not worker_data:
|
1202
|
-
continue
|
1203
|
-
|
1204
|
-
try:
|
1205
|
-
# 获取心跳相关信息
|
1206
|
-
last_heartbeat = float(worker_data.get('last_heartbeat', 0))
|
1207
|
-
is_alive = worker_data.get('is_alive') == 'true'
|
1208
|
-
heartbeat_timeout = float(worker_data.get('heartbeat_timeout', self.default_heartbeat_timeout))
|
1209
|
-
consumer_id = worker_data.get('consumer_id', '')
|
1210
|
-
|
1211
|
-
# 检查是否超时
|
1212
|
-
if is_alive and (current_time - last_heartbeat) > heartbeat_timeout:
|
1213
|
-
logger.info(f"Worker {consumer_id} 心跳超时,标记为离线")
|
1214
|
-
|
1215
|
-
# 通过 WorkerStateManager 更新worker状态为离线
|
1216
|
-
await self.worker_state_manager.set_worker_offline(
|
1217
|
-
worker_id=worker_id,
|
1218
|
-
reason="heartbeat_timeout"
|
1219
|
-
)
|
1220
|
-
|
1221
|
-
except Exception as e:
|
1222
|
-
logger.error(f"检查worker心跳时出错: {e}")
|
1223
|
-
else:
|
1224
|
-
# 降级处理:直接使用 Redis
|
1225
|
-
pipe = self.redis.pipeline()
|
1226
|
-
for key in worker_keys:
|
1227
|
-
pipe.hgetall(key)
|
1228
|
-
all_workers_data = await pipe.execute()
|
1229
|
-
|
1230
|
-
# 检查每个worker的心跳
|
1231
|
-
for i, worker_data in enumerate(all_workers_data):
|
1232
|
-
if not worker_data:
|
1233
|
-
continue
|
1234
|
-
|
1235
|
-
try:
|
1236
|
-
# 获取心跳相关信息
|
1237
|
-
last_heartbeat = float(worker_data.get('last_heartbeat', 0))
|
1238
|
-
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
1239
|
-
heartbeat_timeout = float(worker_data.get('heartbeat_timeout', self.default_heartbeat_timeout))
|
1240
|
-
consumer_id = worker_data.get('consumer_id', '')
|
1241
|
-
|
1242
|
-
# 检查是否超时
|
1243
|
-
if is_alive and (current_time - last_heartbeat) > heartbeat_timeout:
|
1244
|
-
logger.info(f"Worker {consumer_id} 心跳超时,标记为离线")
|
1245
|
-
|
1246
|
-
# 更新worker状态为离线
|
1247
|
-
worker_key = worker_keys[i]
|
1248
|
-
await self.redis.hset(worker_key, 'is_alive', 'false')
|
1249
|
-
|
1250
|
-
except Exception as e:
|
1251
|
-
logger.error(f"检查worker心跳时出错: {e}")
|
1252
|
-
|
1253
|
-
# 等待下一次扫描
|
1254
|
-
await asyncio.sleep(self.scanner_interval)
|
1255
|
-
|
1256
|
-
except asyncio.CancelledError:
|
1257
|
-
logger.info("心跳扫描器收到取消信号")
|
1258
|
-
break
|
1259
|
-
except Exception as e:
|
1260
|
-
logger.error(f"心跳扫描器出错: {e}")
|
1261
|
-
await asyncio.sleep(self.scanner_interval)
|
1262
|
-
|
1263
|
-
logger.info("心跳扫描器已停止")
|
1264
|
-
|
1265
|
-
async def start_heartbeat_scanner(self):
|
1266
|
-
"""启动心跳扫描器"""
|
1267
|
-
if not self._scanner_running:
|
1268
|
-
self._scanner_running = True
|
1269
|
-
self.scanner_task = asyncio.create_task(self._heartbeat_scanner())
|
1270
|
-
logging.getLogger('webui').info("心跳扫描器任务已创建")
|
1271
|
-
|
1272
|
-
async def stop_heartbeat_scanner(self):
|
1273
|
-
"""停止心跳扫描器"""
|
1274
|
-
self._scanner_running = False
|
1275
|
-
if self.scanner_task and not self.scanner_task.done():
|
1276
|
-
self.scanner_task.cancel()
|
1277
|
-
try:
|
1278
|
-
await self.scanner_task
|
1279
|
-
except asyncio.CancelledError:
|
1280
|
-
pass
|
1281
|
-
|
1282
11
|
# 创建全局监控器实例
|
1283
|
-
monitor =
|
1284
|
-
pg_consumer = None
|
12
|
+
monitor = MonitorService()
|
1285
13
|
|
1286
14
|
@asynccontextmanager
|
1287
15
|
async def lifespan(app: FastAPI):
|
1288
|
-
global pg_consumer
|
1289
16
|
|
1290
17
|
# Startup
|
1291
18
|
try:
|
@@ -1293,24 +20,13 @@ async def lifespan(app: FastAPI):
|
|
1293
20
|
# 检查是否使用Nacos配置
|
1294
21
|
use_nacos = os.getenv('USE_NACOS', 'false').lower() == 'true'
|
1295
22
|
|
1296
|
-
#
|
1297
|
-
from jettask.persistence.db_manager import init_db_manager
|
1298
|
-
await init_db_manager(use_nacos=use_nacos)
|
1299
|
-
|
1300
|
-
# 创建数据访问实例
|
1301
|
-
from jettask.persistence.base import JetTaskDataAccess
|
1302
|
-
from jettask.persistence.namespace import get_namespace_data_access
|
23
|
+
# 直接使用 connector.py 管理数据库连接
|
1303
24
|
from jettask.config.task_center import task_center_config
|
1304
25
|
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
app.state.data_access = data_access
|
1310
|
-
app.state.namespace_data_access = namespace_data_access
|
1311
|
-
|
1312
|
-
# 初始化JetTask数据访问
|
1313
|
-
await data_access.initialize()
|
26
|
+
# 存储配置信息在 app.state 中,供路由使用
|
27
|
+
app.state.redis_url = os.environ.get('JETTASK_REDIS_URL', 'redis://localhost:6379/0')
|
28
|
+
app.state.pg_url = os.environ.get('JETTASK_PG_URL', 'postgresql+asyncpg://jettask:123456@localhost:5432/jettask')
|
29
|
+
app.state.redis_prefix = os.environ.get('JETTASK_REDIS_PREFIX', 'jettask')
|
1314
30
|
|
1315
31
|
# 记录任务中心配置
|
1316
32
|
logger.info("=" * 60)
|
@@ -1322,18 +38,14 @@ async def lifespan(app: FastAPI):
|
|
1322
38
|
logger.info("=" * 60)
|
1323
39
|
|
1324
40
|
# 连接 monitor
|
1325
|
-
await monitor.connect()
|
1326
|
-
# 启动心跳扫描器
|
1327
|
-
await monitor.start_heartbeat_scanner()
|
41
|
+
# await monitor.connect()
|
42
|
+
# # 启动心跳扫描器
|
43
|
+
# await monitor.start_heartbeat_scanner()
|
44
|
+
# # 将 monitor 存储到 app.state 供新路由使用
|
45
|
+
# app.state.monitor = monitor
|
1328
46
|
|
1329
|
-
#
|
1330
|
-
|
1331
|
-
redis_config = RedisConfig.from_env()
|
1332
|
-
pg_consumer = PostgreSQLConsumer(app.state.pg_config, redis_config)
|
1333
|
-
await pg_consumer.start()
|
1334
|
-
logging.info("PostgreSQL consumer started")
|
1335
|
-
else:
|
1336
|
-
logging.info("PostgreSQL consumer disabled (use --with-consumer to enable)")
|
47
|
+
# PostgreSQL consumer 已弃用,由统一的数据库管理器处理
|
48
|
+
logging.info("PostgreSQL consumer disabled (use --with-consumer to enable)")
|
1337
49
|
|
1338
50
|
logger.info("JetTask WebUI 启动成功")
|
1339
51
|
except Exception as e:
|
@@ -1350,23 +62,8 @@ async def lifespan(app: FastAPI):
|
|
1350
62
|
await monitor.stop_heartbeat_scanner()
|
1351
63
|
await monitor.close()
|
1352
64
|
|
1353
|
-
#
|
1354
|
-
|
1355
|
-
await pg_consumer.stop()
|
1356
|
-
|
1357
|
-
# 关闭数据访问
|
1358
|
-
if hasattr(app.state, 'data_access'):
|
1359
|
-
await app.state.data_access.close()
|
1360
|
-
|
1361
|
-
# 关闭数据库管理器
|
1362
|
-
from jettask.persistence.db_manager import close_db_manager
|
1363
|
-
await close_db_manager()
|
1364
|
-
|
1365
|
-
# 关闭SQLAlchemy引擎
|
1366
|
-
global async_engine
|
1367
|
-
if async_engine:
|
1368
|
-
await async_engine.dispose()
|
1369
|
-
async_engine = None
|
65
|
+
# 数据库连接池由 connector.py 全局管理
|
66
|
+
# 不需要显式关闭,它们会在进程结束时自动清理
|
1370
67
|
|
1371
68
|
logger.info("JetTask WebUI 关闭完成")
|
1372
69
|
except Exception as e:
|
@@ -1385,1019 +82,39 @@ app.add_middleware(
|
|
1385
82
|
allow_headers=["*"], # 允许所有请求头
|
1386
83
|
)
|
1387
84
|
|
85
|
+
# 配置 Namespace 自动注入中间件
|
86
|
+
# 这个中间件会自动检测路由中的 {namespace} 参数,并注入到 request.state.ns
|
87
|
+
from jettask.webui.middleware import NamespaceMiddleware
|
88
|
+
app.add_middleware(NamespaceMiddleware)
|
89
|
+
logger.info("NamespaceMiddleware 已注册 - 所有包含 {namespace} 的路由将自动注入命名空间上下文")
|
90
|
+
|
1388
91
|
# 注册 API 路由
|
1389
92
|
from jettask.webui.api import api_router
|
1390
93
|
app.include_router(api_router)
|
1391
94
|
|
1392
|
-
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1399
|
-
|
1400
|
-
|
1401
|
-
|
1402
|
-
|
1403
|
-
|
1404
|
-
|
1405
|
-
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
|
1414
|
-
|
1415
|
-
end_dt = datetime.now(timezone.utc)
|
1416
|
-
else:
|
1417
|
-
end_dt = parse_iso_datetime(end_time)
|
1418
|
-
|
1419
|
-
if not start_time:
|
1420
|
-
start_dt = end_dt - timedelta(hours=1)
|
1421
|
-
else:
|
1422
|
-
start_dt = parse_iso_datetime(start_time)
|
1423
|
-
|
1424
|
-
# 解析时间间隔
|
1425
|
-
interval_minutes = 5 # 默认5分钟
|
1426
|
-
if interval.endswith('m'):
|
1427
|
-
interval_minutes = int(interval[:-1])
|
1428
|
-
elif interval.endswith('h'):
|
1429
|
-
interval_minutes = int(interval[:-1]) * 60
|
1430
|
-
|
1431
|
-
# 获取数据库引擎
|
1432
|
-
engine = await get_db_engine()
|
1433
|
-
if not engine:
|
1434
|
-
return {
|
1435
|
-
"timeline": [],
|
1436
|
-
"interval": interval,
|
1437
|
-
"start_time": start_dt.isoformat(),
|
1438
|
-
"end_time": end_dt.isoformat(),
|
1439
|
-
"error": "PostgreSQL connection not configured"
|
1440
|
-
}
|
1441
|
-
|
1442
|
-
try:
|
1443
|
-
async with AsyncSessionLocal() as session:
|
1444
|
-
# 使用 SQLAlchemy 的原生 SQL 查询(因为复杂的时间分组)
|
1445
|
-
query = text(f"""
|
1446
|
-
SELECT
|
1447
|
-
DATE_TRUNC('minute', created_at) -
|
1448
|
-
INTERVAL '{interval_minutes} minutes' * (EXTRACT(MINUTE FROM created_at)::int % {interval_minutes}) as time_bucket,
|
1449
|
-
COUNT(*) as count,
|
1450
|
-
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed_count,
|
1451
|
-
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed_count,
|
1452
|
-
AVG(CASE WHEN status = 'completed' AND processing_time IS NOT NULL
|
1453
|
-
THEN processing_time ELSE NULL END) as avg_processing_time
|
1454
|
-
FROM tasks
|
1455
|
-
WHERE queue_name = :queue_name
|
1456
|
-
AND created_at >= :start_dt
|
1457
|
-
AND created_at < :end_dt
|
1458
|
-
GROUP BY time_bucket
|
1459
|
-
ORDER BY time_bucket
|
1460
|
-
""")
|
1461
|
-
|
1462
|
-
result = await session.execute(query, {
|
1463
|
-
'queue_name': queue_name,
|
1464
|
-
'start_dt': start_dt,
|
1465
|
-
'end_dt': end_dt
|
1466
|
-
})
|
1467
|
-
rows = result.mappings().all() # Use mappings() to get dict-like results
|
1468
|
-
|
1469
|
-
# 构建时间轴数据
|
1470
|
-
timeline = []
|
1471
|
-
for row in rows:
|
1472
|
-
timeline.append({
|
1473
|
-
"time": row['time_bucket'].isoformat(),
|
1474
|
-
"count": row['count'],
|
1475
|
-
"completed_count": row['completed_count'],
|
1476
|
-
"failed_count": row['failed_count'],
|
1477
|
-
"avg_processing_time": float(row['avg_processing_time']) if row['avg_processing_time'] else 0
|
1478
|
-
})
|
1479
|
-
|
1480
|
-
# 填充缺失的时间点
|
1481
|
-
filled_timeline = []
|
1482
|
-
current_time = start_dt
|
1483
|
-
timeline_dict = {item['time']: item for item in timeline}
|
1484
|
-
|
1485
|
-
while current_time < end_dt:
|
1486
|
-
time_key = current_time.isoformat()
|
1487
|
-
if time_key in timeline_dict:
|
1488
|
-
filled_timeline.append(timeline_dict[time_key])
|
1489
|
-
else:
|
1490
|
-
filled_timeline.append({
|
1491
|
-
"time": time_key,
|
1492
|
-
"count": 0,
|
1493
|
-
"completed_count": 0,
|
1494
|
-
"failed_count": 0,
|
1495
|
-
"avg_processing_time": 0
|
1496
|
-
})
|
1497
|
-
current_time += timedelta(minutes=interval_minutes)
|
1498
|
-
|
1499
|
-
return {
|
1500
|
-
"timeline": filled_timeline,
|
1501
|
-
"interval": interval,
|
1502
|
-
"start_time": start_dt.isoformat(),
|
1503
|
-
"end_time": end_dt.isoformat()
|
1504
|
-
}
|
1505
|
-
|
1506
|
-
except Exception as e:
|
1507
|
-
logger.error(f"Error fetching timeline from PostgreSQL: {e}")
|
1508
|
-
return {
|
1509
|
-
"timeline": [],
|
1510
|
-
"interval": interval,
|
1511
|
-
"start_time": start_dt.isoformat(),
|
1512
|
-
"end_time": end_dt.isoformat(),
|
1513
|
-
"error": str(e)
|
1514
|
-
}
|
1515
|
-
|
1516
|
-
@app.get("/api/queues/timeline/pg")
|
1517
|
-
async def get_queues_timeline_from_pg(
|
1518
|
-
queues: str = Query(..., description="Comma-separated list of queue names"),
|
1519
|
-
start_time: Optional[str] = None,
|
1520
|
-
end_time: Optional[str] = None
|
1521
|
-
):
|
1522
|
-
"""从PostgreSQL获取多个队列的任务时间分布数据"""
|
1523
|
-
# 解析队列列表
|
1524
|
-
if not queues or queues.strip() == "":
|
1525
|
-
# 如果没有提供队列,返回空结果
|
1526
|
-
# 计算默认时间范围
|
1527
|
-
end_dt = datetime.now(timezone.utc) if not end_time else parse_iso_datetime(end_time)
|
1528
|
-
start_dt = (end_dt - timedelta(hours=1)) if not start_time else parse_iso_datetime(start_time)
|
1529
|
-
|
1530
|
-
return {
|
1531
|
-
"queues": [],
|
1532
|
-
"start_time": start_dt.isoformat(),
|
1533
|
-
"end_time": end_dt.isoformat(),
|
1534
|
-
"interval": interval,
|
1535
|
-
"message": "No queues selected"
|
1536
|
-
}
|
1537
|
-
|
1538
|
-
queue_list = [q.strip() for q in queues.split(',') if q.strip()][:10] # 最多10个队列
|
1539
|
-
|
1540
|
-
# 如果没有提供时间范围,默认最近1小时
|
1541
|
-
if not end_time:
|
1542
|
-
end_dt = datetime.now(timezone.utc)
|
1543
|
-
else:
|
1544
|
-
end_dt = parse_iso_datetime(end_time)
|
1545
|
-
|
1546
|
-
if not start_time:
|
1547
|
-
start_dt = end_dt - timedelta(hours=1)
|
1548
|
-
else:
|
1549
|
-
start_dt = parse_iso_datetime(start_time)
|
1550
|
-
|
1551
|
-
logger.info(f'{start_dt=} {end_dt=}')
|
1552
|
-
|
1553
|
-
# 根据时间范围自动计算合适的时间间隔
|
1554
|
-
duration = (end_dt - start_dt).total_seconds()
|
1555
|
-
|
1556
|
-
# 动态计算时间间隔
|
1557
|
-
if duration <= 300: # <= 5分钟
|
1558
|
-
interval_seconds = 0.5 # 500毫秒
|
1559
|
-
interval_type = 'millisecond'
|
1560
|
-
interval = '500ms'
|
1561
|
-
elif duration <= 900: # <= 15分钟
|
1562
|
-
interval_seconds = 1 # 1秒
|
1563
|
-
interval_type = 'second'
|
1564
|
-
interval = '1s'
|
1565
|
-
elif duration <= 1800: # <= 30分钟
|
1566
|
-
interval_seconds = 2 # 2秒
|
1567
|
-
interval_type = 'second'
|
1568
|
-
interval = '2s'
|
1569
|
-
elif duration <= 3600: # <= 1小时
|
1570
|
-
interval_seconds = 30 # 30秒
|
1571
|
-
interval_type = 'second'
|
1572
|
-
interval = '30s'
|
1573
|
-
elif duration <= 10800: # <= 3小时
|
1574
|
-
interval_seconds = 300 # 5分钟
|
1575
|
-
interval_type = 'minute'
|
1576
|
-
interval = '5m'
|
1577
|
-
elif duration <= 21600: # <= 6小时
|
1578
|
-
interval_seconds = 600 # 10分钟
|
1579
|
-
interval_type = 'minute'
|
1580
|
-
interval = '10m'
|
1581
|
-
elif duration <= 43200: # <= 12小时
|
1582
|
-
interval_seconds = 1800 # 30分钟
|
1583
|
-
interval_type = 'minute'
|
1584
|
-
interval = '30m'
|
1585
|
-
elif duration <= 86400: # <= 24小时
|
1586
|
-
interval_seconds = 3600 # 1小时
|
1587
|
-
interval_type = 'hour'
|
1588
|
-
interval = '1h'
|
1589
|
-
elif duration <= 172800: # <= 2天
|
1590
|
-
interval_seconds = 7200 # 2小时
|
1591
|
-
interval_type = 'hour'
|
1592
|
-
interval = '2h'
|
1593
|
-
elif duration <= 604800: # <= 7天
|
1594
|
-
interval_seconds = 21600 # 6小时
|
1595
|
-
interval_type = 'hour'
|
1596
|
-
interval = '6h'
|
1597
|
-
else: # > 7天
|
1598
|
-
interval_seconds = 86400 # 1天
|
1599
|
-
interval_type = 'hour'
|
1600
|
-
interval = '24h'
|
1601
|
-
|
1602
|
-
# 转换为分钟数(用于兼容旧代码)
|
1603
|
-
interval_minutes = interval_seconds / 60
|
1604
|
-
|
1605
|
-
logger.info(f"Time range: {duration}s, using interval: {interval} -> {interval_seconds} seconds, type: {interval_type}")
|
1606
|
-
|
1607
|
-
# 获取数据库引擎
|
1608
|
-
engine = await get_db_engine()
|
1609
|
-
if not engine:
|
1610
|
-
return {
|
1611
|
-
"queues": [],
|
1612
|
-
"start_time": start_dt.isoformat(),
|
1613
|
-
"end_time": end_dt.isoformat(),
|
1614
|
-
"interval": interval,
|
1615
|
-
"error": "PostgreSQL connection not configured"
|
1616
|
-
}
|
1617
|
-
|
1618
|
-
result = []
|
1619
|
-
|
1620
|
-
for queue_name in queue_list:
|
1621
|
-
try:
|
1622
|
-
async with AsyncSessionLocal() as session:
|
1623
|
-
# 使用更简单直接的时间分组方法
|
1624
|
-
if interval_type == 'millisecond':
|
1625
|
-
# 对于毫秒级别的间隔
|
1626
|
-
query = text(f"""
|
1627
|
-
SELECT
|
1628
|
-
DATE_TRUNC('second', created_at) +
|
1629
|
-
INTERVAL '{interval_seconds} seconds' * FLOOR(EXTRACT(MILLISECONDS FROM created_at) / ({interval_seconds} * 1000)) as time_bucket,
|
1630
|
-
COUNT(*) as count
|
1631
|
-
FROM tasks
|
1632
|
-
WHERE queue_name = :queue_name
|
1633
|
-
AND created_at >= :start_dt
|
1634
|
-
AND created_at < :end_dt
|
1635
|
-
GROUP BY time_bucket
|
1636
|
-
ORDER BY time_bucket
|
1637
|
-
""")
|
1638
|
-
elif interval_type == 'second':
|
1639
|
-
# 对于秒级别的间隔
|
1640
|
-
query = text(f"""
|
1641
|
-
SELECT
|
1642
|
-
DATE_TRUNC('minute', created_at) +
|
1643
|
-
INTERVAL '{interval_seconds} seconds' * FLOOR(EXTRACT(SECOND FROM created_at) / {interval_seconds}) as time_bucket,
|
1644
|
-
COUNT(*) as count
|
1645
|
-
FROM tasks
|
1646
|
-
WHERE queue_name = :queue_name
|
1647
|
-
AND created_at >= :start_dt
|
1648
|
-
AND created_at < :end_dt
|
1649
|
-
GROUP BY time_bucket
|
1650
|
-
ORDER BY time_bucket
|
1651
|
-
""")
|
1652
|
-
elif interval_type == 'minute' and interval_minutes < 60:
|
1653
|
-
# 对于分钟级别的间隔(小于1小时)
|
1654
|
-
query = text(f"""
|
1655
|
-
SELECT
|
1656
|
-
DATE_TRUNC('hour', created_at) +
|
1657
|
-
INTERVAL '{interval_minutes} minutes' * FLOOR(EXTRACT(MINUTE FROM created_at) / {interval_minutes}) as time_bucket,
|
1658
|
-
COUNT(*) as count
|
1659
|
-
FROM tasks
|
1660
|
-
WHERE queue_name = :queue_name
|
1661
|
-
AND created_at >= :start_dt
|
1662
|
-
AND created_at < :end_dt
|
1663
|
-
GROUP BY time_bucket
|
1664
|
-
ORDER BY time_bucket
|
1665
|
-
""")
|
1666
|
-
elif interval_minutes == 60:
|
1667
|
-
# 对于1小时间隔,直接使用小时截断
|
1668
|
-
query = text("""
|
1669
|
-
SELECT
|
1670
|
-
DATE_TRUNC('hour', created_at) as time_bucket,
|
1671
|
-
COUNT(*) as count
|
1672
|
-
FROM tasks
|
1673
|
-
WHERE queue_name = :queue_name
|
1674
|
-
AND created_at >= :start_dt
|
1675
|
-
AND created_at < :end_dt
|
1676
|
-
GROUP BY time_bucket
|
1677
|
-
ORDER BY time_bucket
|
1678
|
-
""")
|
1679
|
-
else:
|
1680
|
-
# 对于大于1小时的间隔,使用小时级别的计算
|
1681
|
-
interval_hours = int(interval_minutes // 60)
|
1682
|
-
query = text(f"""
|
1683
|
-
SELECT
|
1684
|
-
DATE_TRUNC('day', created_at) +
|
1685
|
-
INTERVAL '{interval_hours} hours' * FLOOR(EXTRACT(HOUR FROM created_at) / {interval_hours}) as time_bucket,
|
1686
|
-
COUNT(*) as count
|
1687
|
-
FROM tasks
|
1688
|
-
WHERE queue_name = :queue_name
|
1689
|
-
AND created_at >= :start_dt
|
1690
|
-
AND created_at < :end_dt
|
1691
|
-
GROUP BY time_bucket
|
1692
|
-
ORDER BY time_bucket
|
1693
|
-
""")
|
1694
|
-
params = {
|
1695
|
-
'queue_name': queue_name,
|
1696
|
-
'start_dt': start_dt,
|
1697
|
-
'end_dt': end_dt
|
1698
|
-
}
|
1699
|
-
|
1700
|
-
# 先绑定参数
|
1701
|
-
bound_query = query.bindparams(**params)
|
1702
|
-
|
1703
|
-
# 生成可直接执行的 SQL(带参数值)
|
1704
|
-
compiled_sql = bound_query.compile(
|
1705
|
-
dialect=postgresql.dialect(),
|
1706
|
-
compile_kwargs={"literal_binds": True}
|
1707
|
-
).string
|
1708
|
-
compiled_sql = compiled_sql.replace("%%", "%")
|
1709
|
-
|
1710
|
-
print("可直接复制到 Navicat 执行的 SQL:\n", compiled_sql)
|
1711
|
-
|
1712
|
-
# 再执行
|
1713
|
-
result_obj = await session.execute(query, params)
|
1714
|
-
rows = result_obj.mappings().all() # Use mappings() to get dict-like results
|
1715
|
-
logger.info(f'{rows=}')
|
1716
|
-
# 构建时间轴数据
|
1717
|
-
timeline = []
|
1718
|
-
for row in rows:
|
1719
|
-
timeline.append({
|
1720
|
-
"time": row['time_bucket'].isoformat(),
|
1721
|
-
"count": row['count']
|
1722
|
-
})
|
1723
|
-
|
1724
|
-
# 填充缺失的时间点
|
1725
|
-
filled_timeline = []
|
1726
|
-
|
1727
|
-
# 构建一个时间到数据的映射,用于快速查找
|
1728
|
-
# 由于可能存在时区或微小时间差异,我们需要更灵活的匹配
|
1729
|
-
timeline_data = []
|
1730
|
-
for item in timeline:
|
1731
|
-
dt = datetime.fromisoformat(item['time'])
|
1732
|
-
timeline_data.append((dt, item['count']))
|
1733
|
-
|
1734
|
-
# 对timeline_data按时间排序
|
1735
|
-
timeline_data.sort(key=lambda x: x[0])
|
1736
|
-
|
1737
|
-
# 生成完整的时间序列
|
1738
|
-
filled_timeline = []
|
1739
|
-
|
1740
|
-
# 对齐到interval
|
1741
|
-
def align_to_interval(dt, interval_seconds):
|
1742
|
-
"""对齐时间到interval_seconds的整数倍"""
|
1743
|
-
if interval_seconds >= 3600: # 大于等于1小时
|
1744
|
-
# 按小时对齐
|
1745
|
-
dt = dt.replace(minute=0, second=0, microsecond=0)
|
1746
|
-
interval_hours = interval_seconds // 3600
|
1747
|
-
aligned_hour = (dt.hour // interval_hours) * interval_hours
|
1748
|
-
return dt.replace(hour=aligned_hour)
|
1749
|
-
elif interval_seconds >= 60: # 大于等于1分钟
|
1750
|
-
# 按分钟对齐
|
1751
|
-
dt = dt.replace(second=0, microsecond=0)
|
1752
|
-
interval_minutes = interval_seconds // 60
|
1753
|
-
total_minutes = dt.hour * 60 + dt.minute
|
1754
|
-
aligned_total_minutes = (total_minutes // interval_minutes) * interval_minutes
|
1755
|
-
aligned_hour = aligned_total_minutes // 60
|
1756
|
-
aligned_minute = aligned_total_minutes % 60
|
1757
|
-
return dt.replace(hour=aligned_hour, minute=aligned_minute)
|
1758
|
-
elif interval_seconds >= 1: # 秒级别
|
1759
|
-
# 按秒对齐
|
1760
|
-
dt = dt.replace(microsecond=0)
|
1761
|
-
aligned_second = int(dt.second // interval_seconds) * int(interval_seconds)
|
1762
|
-
return dt.replace(second=aligned_second)
|
1763
|
-
else: # 毫秒级别
|
1764
|
-
# 按毫秒对齐
|
1765
|
-
total_ms = dt.microsecond / 1000 # 转换为毫秒
|
1766
|
-
interval_ms = interval_seconds * 1000
|
1767
|
-
aligned_ms = int(total_ms // interval_ms) * interval_ms
|
1768
|
-
aligned_microsecond = int(aligned_ms * 1000)
|
1769
|
-
return dt.replace(microsecond=aligned_microsecond)
|
1770
|
-
|
1771
|
-
current_time = align_to_interval(start_dt, interval_seconds)
|
1772
|
-
|
1773
|
-
# 用于追踪我们在timeline_data中的位置
|
1774
|
-
timeline_index = 0
|
1775
|
-
|
1776
|
-
while current_time < end_dt:
|
1777
|
-
# 查找是否有匹配的数据点
|
1778
|
-
# 允许最多interval_seconds/2的误差
|
1779
|
-
tolerance = timedelta(seconds=interval_seconds/2)
|
1780
|
-
found = False
|
1781
|
-
|
1782
|
-
# 从当前位置开始查找
|
1783
|
-
while timeline_index < len(timeline_data):
|
1784
|
-
data_time, count = timeline_data[timeline_index]
|
1785
|
-
|
1786
|
-
# 计算时间差(秒)
|
1787
|
-
time_diff = abs((data_time - current_time).total_seconds())
|
1788
|
-
|
1789
|
-
if time_diff < interval_seconds / 2:
|
1790
|
-
# 找到匹配的数据
|
1791
|
-
filled_timeline.append({
|
1792
|
-
"time": current_time.isoformat(),
|
1793
|
-
"count": count
|
1794
|
-
})
|
1795
|
-
found = True
|
1796
|
-
timeline_index += 1
|
1797
|
-
break
|
1798
|
-
elif data_time > current_time + tolerance:
|
1799
|
-
# 数据时间已经超过当前时间太多,停止查找
|
1800
|
-
break
|
1801
|
-
else:
|
1802
|
-
# 这个数据点太早了,继续查找下一个
|
1803
|
-
timeline_index += 1
|
1804
|
-
|
1805
|
-
if not found:
|
1806
|
-
# 没有找到匹配的数据,填充0
|
1807
|
-
filled_timeline.append({
|
1808
|
-
"time": current_time.isoformat(),
|
1809
|
-
"count": 0
|
1810
|
-
})
|
1811
|
-
|
1812
|
-
current_time += timedelta(seconds=interval_seconds)
|
1813
|
-
result.append({
|
1814
|
-
"queue": queue_name,
|
1815
|
-
"timeline": {
|
1816
|
-
"timeline": filled_timeline,
|
1817
|
-
"interval": interval
|
1818
|
-
}
|
1819
|
-
})
|
1820
|
-
|
1821
|
-
except Exception as e:
|
1822
|
-
logger.error(f"Error fetching timeline for queue {queue_name}: {e}")
|
1823
|
-
result.append({
|
1824
|
-
"queue": queue_name,
|
1825
|
-
"timeline": {
|
1826
|
-
"timeline": [],
|
1827
|
-
"interval": interval,
|
1828
|
-
"error": str(e)
|
1829
|
-
}
|
1830
|
-
})
|
1831
|
-
# logger.info(f'{result=}')
|
1832
|
-
return {
|
1833
|
-
"queues": result,
|
1834
|
-
"start_time": start_dt.isoformat(),
|
1835
|
-
"end_time": end_dt.isoformat(),
|
1836
|
-
"interval": interval
|
1837
|
-
}
|
1838
|
-
|
1839
|
-
@app.get("/api/queue/{queue_name}/timeline")
|
1840
|
-
async def get_queue_timeline(
|
1841
|
-
queue_name: str,
|
1842
|
-
interval: str = "1m",
|
1843
|
-
duration: str = "1h",
|
1844
|
-
start_time: Optional[str] = None,
|
1845
|
-
end_time: Optional[str] = None,
|
1846
|
-
context: str = "detail" # 'overview' for homepage, 'detail' for queue detail page
|
1847
|
-
):
|
1848
|
-
"""获取队列任务的时间分布(用于时间轴)"""
|
1849
|
-
try:
|
1850
|
-
# 解析时间间隔和持续时间
|
1851
|
-
interval_seconds = parse_time_duration(interval)
|
1852
|
-
|
1853
|
-
# 根据上下文设置不同的数据限制
|
1854
|
-
if context == "overview":
|
1855
|
-
# 首页概览:固定获取最近1小时的所有数据
|
1856
|
-
duration_seconds = 3600 # 1小时
|
1857
|
-
now = int(datetime.now(timezone.utc).timestamp() * 1000)
|
1858
|
-
start = now - duration_seconds * 1000
|
1859
|
-
min_id = f"{start}-0"
|
1860
|
-
max_id = "+"
|
1861
|
-
max_count = 100000 # 首页概览获取所有数据
|
1862
|
-
else:
|
1863
|
-
# 队列详情页:根据参数获取,但限制最多10000条
|
1864
|
-
if start_time and end_time:
|
1865
|
-
# 使用提供的时间范围
|
1866
|
-
min_id = start_time
|
1867
|
-
max_id = end_time if end_time != '+' else '+'
|
1868
|
-
else:
|
1869
|
-
# 使用duration参数计算时间范围
|
1870
|
-
duration_seconds = parse_time_duration(duration)
|
1871
|
-
now = int(datetime.now(timezone.utc).timestamp() * 1000)
|
1872
|
-
start = now - duration_seconds * 1000
|
1873
|
-
min_id = f"{start}-0"
|
1874
|
-
max_id = "+"
|
1875
|
-
max_count = 10000 # 详情页限制10000条
|
1876
|
-
|
1877
|
-
# 获取指定时间范围内的消息
|
1878
|
-
prefixed_queue_name = monitor.get_prefixed_queue_name(queue_name)
|
1879
|
-
print(f'{prefixed_queue_name=} {min_id=} {max_id=} {max_count=}')
|
1880
|
-
messages = await monitor.redis.xrange(
|
1881
|
-
prefixed_queue_name,
|
1882
|
-
min=min_id,
|
1883
|
-
max=max_id,
|
1884
|
-
count=max_count
|
1885
|
-
)
|
1886
|
-
|
1887
|
-
# 按时间间隔统计任务数量
|
1888
|
-
buckets = {}
|
1889
|
-
bucket_size = interval_seconds * 1000 # 转换为毫秒
|
1890
|
-
|
1891
|
-
# 计算实际的时间范围用于生成时间轴
|
1892
|
-
if start_time and end_time:
|
1893
|
-
# 从参数中解析时间范围
|
1894
|
-
if start_time != '-':
|
1895
|
-
actual_start = int(start_time.split('-')[0])
|
1896
|
-
else:
|
1897
|
-
actual_start = int(datetime.now(timezone.utc).timestamp() * 1000) - 86400000 # 默认24小时前
|
1898
|
-
|
1899
|
-
if end_time != '+':
|
1900
|
-
actual_end = int(end_time.split('-')[0])
|
1901
|
-
else:
|
1902
|
-
actual_end = int(datetime.now(timezone.utc).timestamp() * 1000)
|
1903
|
-
else:
|
1904
|
-
# 使用duration参数计算的时间范围
|
1905
|
-
actual_start = start
|
1906
|
-
actual_end = now
|
1907
|
-
|
1908
|
-
for msg_id, _ in messages:
|
1909
|
-
# 从消息ID提取时间戳
|
1910
|
-
timestamp = int(msg_id.split('-')[0])
|
1911
|
-
bucket_key = (timestamp // bucket_size) * bucket_size
|
1912
|
-
buckets[bucket_key] = buckets.get(bucket_key, 0) + 1
|
1913
|
-
|
1914
|
-
# 转换为时间序列数据
|
1915
|
-
timeline_data = []
|
1916
|
-
current_bucket = (actual_start // bucket_size) * bucket_size
|
1917
|
-
|
1918
|
-
while current_bucket <= actual_end:
|
1919
|
-
timeline_data.append({
|
1920
|
-
"timestamp": current_bucket,
|
1921
|
-
"count": buckets.get(current_bucket, 0)
|
1922
|
-
})
|
1923
|
-
current_bucket += bucket_size
|
1924
|
-
|
1925
|
-
# 计算实际任务总数
|
1926
|
-
total_tasks = len(messages)
|
1927
|
-
|
1928
|
-
# 检查是否达到数据限制
|
1929
|
-
has_more = False
|
1930
|
-
if context == "detail" and total_tasks >= max_count:
|
1931
|
-
has_more = True
|
1932
|
-
|
1933
|
-
return {
|
1934
|
-
"timeline": timeline_data,
|
1935
|
-
"interval": interval,
|
1936
|
-
"duration": duration,
|
1937
|
-
"start": actual_start,
|
1938
|
-
"end": actual_end,
|
1939
|
-
"total_tasks": total_tasks, # 添加实际任务总数
|
1940
|
-
"message_count": len(messages), # 实际获取到的消息数量
|
1941
|
-
"has_more": has_more, # 是否还有更多数据
|
1942
|
-
"limit": max_count if context == "detail" else None # 数据限制
|
1943
|
-
}
|
1944
|
-
|
1945
|
-
except Exception as e:
|
1946
|
-
print(f"Error getting timeline for queue {queue_name}: {e}")
|
1947
|
-
return {
|
1948
|
-
"timeline": [],
|
1949
|
-
"error": str(e)
|
1950
|
-
}
|
1951
|
-
|
1952
|
-
def parse_time_duration(duration_str: str) -> int:
|
1953
|
-
"""解析时间字符串为秒数 (如 '1h', '10m', '30s')"""
|
1954
|
-
units = {
|
1955
|
-
's': 1,
|
1956
|
-
'm': 60,
|
1957
|
-
'h': 3600,
|
1958
|
-
'd': 86400
|
1959
|
-
}
|
1960
|
-
|
1961
|
-
if duration_str[-1] in units:
|
1962
|
-
value = int(duration_str[:-1])
|
1963
|
-
unit = duration_str[-1]
|
1964
|
-
return value * units[unit]
|
1965
|
-
|
1966
|
-
# 默认为秒
|
1967
|
-
return int(duration_str)
|
1968
|
-
|
1969
|
-
@app.get("/api/task/{event_id}/result")
|
1970
|
-
async def get_task_result(event_id: str):
|
1971
|
-
"""获取单个任务的结果"""
|
1972
|
-
result_key = f"{monitor.redis_prefix}:RESULT:{event_id}"
|
1973
|
-
result = await monitor.redis.get(result_key)
|
1974
|
-
return {"event_id": event_id, "result": result}
|
1975
|
-
|
1976
|
-
@app.get("/api/queues")
|
1977
|
-
async def get_queues():
|
1978
|
-
"""获取所有队列"""
|
1979
|
-
queues = await monitor.get_all_queues()
|
1980
|
-
return {"queues": queues}
|
1981
|
-
|
1982
|
-
@app.get("/api/queue/{queue_name}/stats")
|
1983
|
-
async def get_queue_stats(queue_name: str):
|
1984
|
-
"""获取队列统计信息"""
|
1985
|
-
try:
|
1986
|
-
stats = await monitor.get_queue_stats(queue_name)
|
1987
|
-
return stats
|
1988
|
-
except Exception as e:
|
1989
|
-
return {"error": str(e)}
|
1990
|
-
|
1991
|
-
@app.get("/api/queue/{queue_name}/workers")
|
1992
|
-
async def get_queue_workers(queue_name: str):
|
1993
|
-
"""获取队列的Worker信息"""
|
1994
|
-
workers = await monitor.get_worker_heartbeats(queue_name)
|
1995
|
-
return {"queue": queue_name, "workers": workers}
|
1996
|
-
|
1997
|
-
@app.get("/api/queue/{queue_name}/worker-summary")
|
1998
|
-
async def get_queue_worker_summary(queue_name: str):
|
1999
|
-
"""获取队列的Worker汇总统计信息"""
|
2000
|
-
summary = await monitor.get_queue_worker_summary(queue_name)
|
2001
|
-
return {"queue": queue_name, "summary": summary}
|
2002
|
-
|
2003
|
-
@app.get("/api/workers/offline-history")
|
2004
|
-
async def get_workers_offline_history(
|
2005
|
-
limit: int = 100,
|
2006
|
-
start_time: Optional[float] = None,
|
2007
|
-
end_time: Optional[float] = None
|
2008
|
-
):
|
2009
|
-
"""获取所有worker的下线历史记录"""
|
2010
|
-
history = await monitor.get_worker_offline_history(limit, start_time, end_time)
|
2011
|
-
return {"history": history, "total": len(history)}
|
2012
|
-
|
2013
|
-
@app.get("/api/global-stats")
|
2014
|
-
async def get_global_stats():
|
2015
|
-
"""获取全局统计信息(包含历史记录)"""
|
2016
|
-
stats = await monitor.get_global_stats_with_history()
|
2017
|
-
return stats
|
2018
|
-
|
2019
|
-
@app.get("/api/global-stats/light")
|
2020
|
-
async def get_global_stats_light():
|
2021
|
-
"""获取轻量级全局统计信息(不包含历史记录)"""
|
2022
|
-
try:
|
2023
|
-
# 获取所有队列
|
2024
|
-
queues = await monitor.get_all_queues()
|
2025
|
-
|
2026
|
-
# 并行获取所有队列的快速汇总和简单统计
|
2027
|
-
summaries_task = asyncio.gather(
|
2028
|
-
*[monitor.get_queue_worker_summary_fast(queue) for queue in queues],
|
2029
|
-
return_exceptions=True
|
2030
|
-
)
|
2031
|
-
|
2032
|
-
# 获取基础队列信息(不获取完整stats以提高性能)
|
2033
|
-
queue_lengths_task = asyncio.gather(
|
2034
|
-
*[monitor.redis.xlen(monitor.get_prefixed_queue_name(queue)) for queue in queues],
|
2035
|
-
return_exceptions=True
|
2036
|
-
)
|
2037
|
-
|
2038
|
-
summaries, queue_lengths = await asyncio.gather(
|
2039
|
-
summaries_task, queue_lengths_task
|
2040
|
-
)
|
2041
|
-
|
2042
|
-
# 汇总数据
|
2043
|
-
total_workers = 0
|
2044
|
-
online_workers = 0
|
2045
|
-
total_running_tasks = 0
|
2046
|
-
total_messages = 0
|
2047
|
-
total_consumers = 0
|
2048
|
-
|
2049
|
-
for summary in summaries:
|
2050
|
-
if not isinstance(summary, Exception):
|
2051
|
-
total_workers += summary.get('total_workers', 0)
|
2052
|
-
online_workers += summary.get('online_workers', 0)
|
2053
|
-
total_running_tasks += summary.get('total_running_tasks', 0)
|
2054
|
-
total_consumers += summary.get('total_workers', 0) # 近似使用worker数作为消费者数
|
2055
|
-
|
2056
|
-
# 汇总消息数
|
2057
|
-
for length in queue_lengths:
|
2058
|
-
if not isinstance(length, Exception):
|
2059
|
-
total_messages += length
|
2060
|
-
|
2061
|
-
return {
|
2062
|
-
'total_queues': len(queues),
|
2063
|
-
'total_workers': total_workers,
|
2064
|
-
'online_workers': online_workers,
|
2065
|
-
'total_running_tasks': total_running_tasks,
|
2066
|
-
'messages': total_messages,
|
2067
|
-
'consumers': total_consumers,
|
2068
|
-
'timestamp': datetime.now(timezone.utc).isoformat()
|
2069
|
-
}
|
2070
|
-
except Exception as e:
|
2071
|
-
return {
|
2072
|
-
'error': str(e),
|
2073
|
-
'total_queues': 0,
|
2074
|
-
'total_workers': 0,
|
2075
|
-
'online_workers': 0,
|
2076
|
-
'total_running_tasks': 0,
|
2077
|
-
'messages': 0,
|
2078
|
-
'consumers': 0
|
2079
|
-
}
|
2080
|
-
|
2081
|
-
|
2082
|
-
@app.get("/api/queue/{queue_name}/workers/offline-history")
|
2083
|
-
async def get_queue_workers_offline_history(
|
2084
|
-
queue_name: str,
|
2085
|
-
limit: int = 100,
|
2086
|
-
start_time: Optional[float] = None,
|
2087
|
-
end_time: Optional[float] = None
|
2088
|
-
):
|
2089
|
-
"""获取指定队列的worker下线历史记录"""
|
2090
|
-
# 获取所有历史记录,然后过滤出该队列的
|
2091
|
-
all_history = await monitor.get_worker_offline_history(limit * 10, start_time, end_time)
|
2092
|
-
queue_history = [
|
2093
|
-
record for record in all_history
|
2094
|
-
if queue_name in record.get('queues', '').split(',')
|
2095
|
-
][:limit]
|
2096
|
-
return {"queue": queue_name, "history": queue_history, "total": len(queue_history)}
|
2097
|
-
|
2098
|
-
@app.websocket("/ws")
|
2099
|
-
async def websocket_endpoint(websocket: WebSocket):
|
2100
|
-
"""WebSocket端点,用于实时更新(优化版)"""
|
2101
|
-
await websocket.accept()
|
2102
|
-
|
2103
|
-
try:
|
2104
|
-
# 标记是否是首次连接
|
2105
|
-
is_first_load = True
|
2106
|
-
|
2107
|
-
while True:
|
2108
|
-
try:
|
2109
|
-
# 检查WebSocket连接状态
|
2110
|
-
if websocket.client_state != WebSocketState.CONNECTED:
|
2111
|
-
break
|
2112
|
-
|
2113
|
-
# 首次连接时发送基础信息
|
2114
|
-
if is_first_load:
|
2115
|
-
# 只发送队列列表,不发送任务信息
|
2116
|
-
data = {
|
2117
|
-
"queues": await monitor.get_all_queues(),
|
2118
|
-
"timestamp": datetime.now(timezone.utc).isoformat(),
|
2119
|
-
"initial_load": True
|
2120
|
-
}
|
2121
|
-
await websocket.send_json(data)
|
2122
|
-
is_first_load = False
|
2123
|
-
else:
|
2124
|
-
# 后续更新:并行获取所有worker信息
|
2125
|
-
queues = await monitor.get_all_queues()
|
2126
|
-
|
2127
|
-
# 使用 asyncio.gather 并行获取所有队列的worker信息
|
2128
|
-
worker_tasks = [monitor.get_worker_heartbeats(queue) for queue in queues]
|
2129
|
-
worker_results = await asyncio.gather(*worker_tasks, return_exceptions=True)
|
2130
|
-
|
2131
|
-
# 构建队列worker映射
|
2132
|
-
queue_workers = {}
|
2133
|
-
for i, queue in enumerate(queues):
|
2134
|
-
if isinstance(worker_results[i], Exception):
|
2135
|
-
print(f"Error getting workers for queue {queue}: {worker_results[i]}")
|
2136
|
-
queue_workers[queue] = []
|
2137
|
-
else:
|
2138
|
-
queue_workers[queue] = worker_results[i]
|
2139
|
-
|
2140
|
-
data = {
|
2141
|
-
"queues": queues,
|
2142
|
-
"workers": queue_workers,
|
2143
|
-
"timestamp": datetime.now(timezone.utc).isoformat(),
|
2144
|
-
"initial_load": False
|
2145
|
-
}
|
2146
|
-
|
2147
|
-
# 再次检查连接状态后发送
|
2148
|
-
if websocket.client_state == WebSocketState.CONNECTED:
|
2149
|
-
await websocket.send_json(data)
|
2150
|
-
else:
|
2151
|
-
break
|
2152
|
-
|
2153
|
-
await asyncio.sleep(2) # 每2秒更新一次
|
2154
|
-
|
2155
|
-
except WebSocketDisconnect:
|
2156
|
-
# WebSocket已断开,退出循环
|
2157
|
-
break
|
2158
|
-
except Exception as e:
|
2159
|
-
# 检查是否是因为连接已关闭导致的错误
|
2160
|
-
if "close message has been sent" in str(e) or "WebSocket is not connected" in str(e):
|
2161
|
-
break
|
2162
|
-
print(f"Error in websocket loop: {e}")
|
2163
|
-
# 对于其他错误,等待一段时间后继续
|
2164
|
-
await asyncio.sleep(5)
|
2165
|
-
|
2166
|
-
except WebSocketDisconnect:
|
2167
|
-
pass
|
2168
|
-
except Exception as e:
|
2169
|
-
print(f"WebSocket error: {e}")
|
2170
|
-
finally:
|
2171
|
-
# 确保WebSocket正确关闭
|
2172
|
-
try:
|
2173
|
-
await websocket.close()
|
2174
|
-
except:
|
2175
|
-
pass
|
2176
|
-
|
2177
|
-
# 挂载静态文件
|
2178
|
-
static_dir = Path(__file__).parent / "static"
|
2179
|
-
static_dir.mkdir(exist_ok=True)
|
2180
|
-
|
2181
|
-
# 挂载静态文件目录
|
2182
|
-
app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
|
2183
|
-
|
2184
|
-
@app.get("/")
|
2185
|
-
async def read_index():
|
2186
|
-
"""返回主页HTML"""
|
2187
|
-
html_path = static_dir / "index.html"
|
2188
|
-
if html_path.exists():
|
2189
|
-
return HTMLResponse(content=html_path.read_text())
|
2190
|
-
return HTMLResponse(content="<h1>Jettask Monitor</h1><p>Static files not found</p>")
|
2191
|
-
|
2192
|
-
@app.get("/queue.html")
|
2193
|
-
async def read_queue():
|
2194
|
-
"""返回队列详情页HTML"""
|
2195
|
-
html_path = static_dir / "queue.html"
|
2196
|
-
if html_path.exists():
|
2197
|
-
return HTMLResponse(content=html_path.read_text())
|
2198
|
-
return HTMLResponse(content="<h1>Queue Details</h1><p>Page not found</p>")
|
2199
|
-
|
2200
|
-
@app.get("/queues.html")
|
2201
|
-
async def read_queues():
|
2202
|
-
"""返回队列列表页HTML"""
|
2203
|
-
html_path = static_dir / "queues.html"
|
2204
|
-
if html_path.exists():
|
2205
|
-
return HTMLResponse(content=html_path.read_text())
|
2206
|
-
return HTMLResponse(content="<h1>Queues</h1><p>Page not found</p>")
|
2207
|
-
|
2208
|
-
@app.get("/workers.html")
|
2209
|
-
async def read_workers():
|
2210
|
-
"""返回Workers页HTML"""
|
2211
|
-
html_path = static_dir / "workers.html"
|
2212
|
-
if html_path.exists():
|
2213
|
-
return HTMLResponse(content=html_path.read_text())
|
2214
|
-
return HTMLResponse(content="<h1>Workers</h1><p>Page not found</p>")
|
2215
|
-
|
2216
|
-
|
2217
|
-
# PostgreSQL相关的API端点
|
2218
|
-
@app.get("/api/pg/tasks")
|
2219
|
-
async def get_pg_tasks(
|
2220
|
-
status: Optional[str] = None,
|
2221
|
-
queue_name: Optional[str] = None,
|
2222
|
-
limit: int = 100,
|
2223
|
-
offset: int = 0
|
2224
|
-
):
|
2225
|
-
"""从PostgreSQL获取任务列表"""
|
2226
|
-
engine = await get_db_engine()
|
2227
|
-
if not engine:
|
2228
|
-
return {"error": "PostgreSQL not configured"}
|
2229
|
-
|
2230
|
-
try:
|
2231
|
-
async with AsyncSessionLocal() as session:
|
2232
|
-
# 构建查询
|
2233
|
-
query = select(Task)
|
2234
|
-
|
2235
|
-
if status:
|
2236
|
-
query = query.where(Task.status == status)
|
2237
|
-
|
2238
|
-
if queue_name:
|
2239
|
-
query = query.where(Task.queue_name == queue_name)
|
2240
|
-
|
2241
|
-
query = query.order_by(Task.created_at.desc())
|
2242
|
-
query = query.limit(limit).offset(offset)
|
2243
|
-
|
2244
|
-
result = await session.execute(query)
|
2245
|
-
tasks_obj = result.scalars().all()
|
2246
|
-
|
2247
|
-
tasks = []
|
2248
|
-
for task_obj in tasks_obj:
|
2249
|
-
task = {
|
2250
|
-
'id': task_obj.id,
|
2251
|
-
'queue_name': task_obj.queue_name,
|
2252
|
-
'task_name': task_obj.task_name,
|
2253
|
-
'task_data': task_obj.task_data,
|
2254
|
-
'priority': task_obj.priority,
|
2255
|
-
'retry_count': task_obj.retry_count,
|
2256
|
-
'max_retry': task_obj.max_retry,
|
2257
|
-
'status': task_obj.status,
|
2258
|
-
'result': task_obj.result,
|
2259
|
-
'error_message': task_obj.error_message,
|
2260
|
-
'created_at': task_obj.created_at,
|
2261
|
-
'started_at': task_obj.started_at,
|
2262
|
-
'completed_at': task_obj.completed_at,
|
2263
|
-
'worker_id': task_obj.worker_id,
|
2264
|
-
'execution_time': task_obj.execution_time,
|
2265
|
-
'duration': task_obj.duration,
|
2266
|
-
'metadata': task_obj.task_metadata,
|
2267
|
-
'next_sync_time': task_obj.next_sync_time,
|
2268
|
-
'sync_check_count': task_obj.sync_check_count
|
2269
|
-
}
|
2270
|
-
# 转换时间戳为ISO格式(确保是 UTC)
|
2271
|
-
for field in ['created_at', 'started_at', 'completed_at']:
|
2272
|
-
if task.get(field):
|
2273
|
-
# PostgreSQL 的 TIMESTAMP WITH TIME ZONE 会返回 aware datetime
|
2274
|
-
if task[field].tzinfo is None:
|
2275
|
-
# 如果没有时区信息,假定为 UTC
|
2276
|
-
task[field] = task[field].replace(tzinfo=timezone.utc)
|
2277
|
-
task[field] = task[field].isoformat()
|
2278
|
-
# 解析JSON字段
|
2279
|
-
for field in ['task_data', 'result', 'metadata']:
|
2280
|
-
if task.get(field) and isinstance(task[field], str):
|
2281
|
-
try:
|
2282
|
-
task[field] = json.loads(task[field])
|
2283
|
-
except:
|
2284
|
-
pass
|
2285
|
-
tasks.append(task)
|
2286
|
-
|
2287
|
-
return {"tasks": tasks, "total": len(tasks)}
|
2288
|
-
|
2289
|
-
except Exception as e:
|
2290
|
-
logging.error(f"Error fetching tasks from PostgreSQL: {e}")
|
2291
|
-
return {"error": str(e)}
|
2292
|
-
|
2293
|
-
|
2294
|
-
@app.get("/api/pg/stats")
|
2295
|
-
async def get_pg_stats():
|
2296
|
-
"""获取PostgreSQL中的统计信息"""
|
2297
|
-
engine = await get_db_engine()
|
2298
|
-
if not engine:
|
2299
|
-
return {"error": "PostgreSQL not configured"}
|
2300
|
-
|
2301
|
-
try:
|
2302
|
-
async with AsyncSessionLocal() as session:
|
2303
|
-
# 获取任务统计
|
2304
|
-
task_stats_query = text("""
|
2305
|
-
SELECT
|
2306
|
-
COUNT(*) as total_tasks,
|
2307
|
-
COUNT(CASE WHEN status = 'pending' THEN 1 END) as pending_tasks,
|
2308
|
-
COUNT(CASE WHEN status = 'running' THEN 1 END) as running_tasks,
|
2309
|
-
COUNT(CASE WHEN status = 'success' THEN 1 END) as completed_tasks,
|
2310
|
-
COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_tasks
|
2311
|
-
FROM tasks
|
2312
|
-
""")
|
2313
|
-
|
2314
|
-
task_stats_result = await session.execute(task_stats_query)
|
2315
|
-
task_stats = task_stats_result.mappings().fetchone()
|
2316
|
-
|
2317
|
-
# 获取队列统计
|
2318
|
-
queue_stats_query = text("""
|
2319
|
-
SELECT
|
2320
|
-
queue_name,
|
2321
|
-
COUNT(*) as total_tasks,
|
2322
|
-
COUNT(CASE WHEN status = 'pending' THEN 1 END) as pending_tasks,
|
2323
|
-
COUNT(CASE WHEN status = 'running' THEN 1 END) as running_tasks,
|
2324
|
-
COUNT(CASE WHEN status = 'success' THEN 1 END) as completed_tasks,
|
2325
|
-
COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_tasks
|
2326
|
-
FROM tasks
|
2327
|
-
GROUP BY queue_name
|
2328
|
-
ORDER BY total_tasks DESC
|
2329
|
-
""")
|
2330
|
-
|
2331
|
-
queue_stats_result = await session.execute(queue_stats_query)
|
2332
|
-
queue_stats = queue_stats_result.mappings().all()
|
2333
|
-
|
2334
|
-
return {
|
2335
|
-
"task_stats": dict(task_stats) if task_stats else {},
|
2336
|
-
"queue_stats": [dict(row) for row in queue_stats]
|
2337
|
-
}
|
2338
|
-
|
2339
|
-
except Exception as e:
|
2340
|
-
logging.error(f"Error fetching stats from PostgreSQL: {e}")
|
2341
|
-
return {"error": str(e)}
|
2342
|
-
|
2343
|
-
|
2344
|
-
@app.get("/api/pg/task/{task_id}")
|
2345
|
-
async def get_pg_task(task_id: str):
|
2346
|
-
"""从PostgreSQL获取单个任务的详细信息"""
|
2347
|
-
engine = await get_db_engine()
|
2348
|
-
if not engine:
|
2349
|
-
return {"error": "PostgreSQL not configured"}
|
2350
|
-
|
2351
|
-
try:
|
2352
|
-
async with AsyncSessionLocal() as session:
|
2353
|
-
result = await session.execute(select(Task).where(Task.id == task_id))
|
2354
|
-
task_obj = result.scalar_one_or_none()
|
2355
|
-
|
2356
|
-
if not task_obj:
|
2357
|
-
return {"error": "Task not found"}
|
2358
|
-
|
2359
|
-
task = {
|
2360
|
-
'id': task_obj.id,
|
2361
|
-
'queue_name': task_obj.queue_name,
|
2362
|
-
'task_name': task_obj.task_name,
|
2363
|
-
'task_data': task_obj.task_data,
|
2364
|
-
'priority': task_obj.priority,
|
2365
|
-
'retry_count': task_obj.retry_count,
|
2366
|
-
'max_retry': task_obj.max_retry,
|
2367
|
-
'status': task_obj.status,
|
2368
|
-
'result': task_obj.result,
|
2369
|
-
'error_message': task_obj.error_message,
|
2370
|
-
'created_at': task_obj.created_at,
|
2371
|
-
'started_at': task_obj.started_at,
|
2372
|
-
'completed_at': task_obj.completed_at,
|
2373
|
-
'worker_id': task_obj.worker_id,
|
2374
|
-
'execution_time': task_obj.execution_time,
|
2375
|
-
'duration': task_obj.duration,
|
2376
|
-
'metadata': task_obj.task_metadata,
|
2377
|
-
'next_sync_time': task_obj.next_sync_time,
|
2378
|
-
'sync_check_count': task_obj.sync_check_count
|
2379
|
-
}
|
2380
|
-
# 转换时间戳为ISO格式(确保是 UTC)
|
2381
|
-
for field in ['created_at', 'started_at', 'completed_at']:
|
2382
|
-
if task.get(field):
|
2383
|
-
# PostgreSQL 的 TIMESTAMP WITH TIME ZONE 会返回 aware datetime
|
2384
|
-
if task[field].tzinfo is None:
|
2385
|
-
# 如果没有时区信息,假定为 UTC
|
2386
|
-
task[field] = task[field].replace(tzinfo=timezone.utc)
|
2387
|
-
task[field] = task[field].isoformat()
|
2388
|
-
# 解析JSON字段
|
2389
|
-
for field in ['task_data', 'result', 'metadata']:
|
2390
|
-
if task.get(field) and isinstance(task[field], str):
|
2391
|
-
try:
|
2392
|
-
task[field] = json.loads(task[field])
|
2393
|
-
except:
|
2394
|
-
pass
|
2395
|
-
|
2396
|
-
return {"task": task}
|
2397
|
-
|
2398
|
-
except Exception as e:
|
2399
|
-
logging.error(f"Error fetching task from PostgreSQL: {e}")
|
2400
|
-
return {"error": str(e)}
|
95
|
+
# ============ WebSocket 实时推送 ============
|
96
|
+
# (parse_time_duration 已移除,因为 PG 时间轴路由已迁移)
|
97
|
+
|
98
|
+
# ============ 已迁移路由 ============
|
99
|
+
# 以下路由已迁移到模块化的 API 路由:
|
100
|
+
# - GET /api/queues → api/queues.py
|
101
|
+
# - GET /api/queue/{queue_name}/stats → api/queues.py
|
102
|
+
# - GET /api/queue/{queue_name}/workers → api/workers.py
|
103
|
+
# - GET /api/queue/{queue_name}/worker-summary → api/workers.py
|
104
|
+
# - GET /api/workers/offline-history → api/workers.py
|
105
|
+
# - GET /api/global-stats → api/overview.py
|
106
|
+
# - GET /api/global-stats/light → api/overview.py
|
107
|
+
# ====================================
|
108
|
+
|
109
|
+
# GET /api/queue/{queue_name}/workers/offline-history → 已迁移到 api/workers.py
|
110
|
+
|
111
|
+
|
112
|
+
# ============ PostgreSQL 路由已迁移 ============
|
113
|
+
# 以下路由已迁移到 api/analytics.py:
|
114
|
+
# - GET /api/pg/tasks → GET /api/v1/analytics/pg/tasks
|
115
|
+
# - GET /api/pg/stats → GET /api/v1/analytics/pg/stats
|
116
|
+
# - GET /api/pg/task/{task_id} → GET /api/v1/analytics/pg/task/{task_id}
|
117
|
+
# ==============================================
|
2401
118
|
|
2402
119
|
if __name__ == "__main__":
|
2403
120
|
# 配置日志
|