jettask 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. jettask/__init__.py +2 -0
  2. jettask/cli.py +12 -8
  3. jettask/config/lua_scripts.py +37 -0
  4. jettask/config/nacos_config.py +1 -1
  5. jettask/core/app.py +313 -340
  6. jettask/core/container.py +4 -4
  7. jettask/{persistence → core}/namespace.py +93 -27
  8. jettask/core/task.py +16 -9
  9. jettask/core/unified_manager_base.py +136 -26
  10. jettask/db/__init__.py +67 -0
  11. jettask/db/base.py +137 -0
  12. jettask/{utils/db_connector.py → db/connector.py} +130 -26
  13. jettask/db/models/__init__.py +16 -0
  14. jettask/db/models/scheduled_task.py +196 -0
  15. jettask/db/models/task.py +77 -0
  16. jettask/db/models/task_run.py +85 -0
  17. jettask/executor/__init__.py +0 -15
  18. jettask/executor/core.py +76 -31
  19. jettask/executor/process_entry.py +29 -114
  20. jettask/executor/task_executor.py +4 -0
  21. jettask/messaging/event_pool.py +928 -685
  22. jettask/messaging/scanner.py +30 -0
  23. jettask/persistence/__init__.py +28 -103
  24. jettask/persistence/buffer.py +170 -0
  25. jettask/persistence/consumer.py +330 -249
  26. jettask/persistence/manager.py +304 -0
  27. jettask/persistence/persistence.py +391 -0
  28. jettask/scheduler/__init__.py +15 -3
  29. jettask/scheduler/{task_crud.py → database.py} +61 -57
  30. jettask/scheduler/loader.py +2 -2
  31. jettask/scheduler/{scheduler_coordinator.py → manager.py} +23 -6
  32. jettask/scheduler/models.py +14 -10
  33. jettask/scheduler/schedule.py +166 -0
  34. jettask/scheduler/scheduler.py +12 -11
  35. jettask/schemas/__init__.py +50 -1
  36. jettask/schemas/backlog.py +43 -6
  37. jettask/schemas/namespace.py +70 -19
  38. jettask/schemas/queue.py +19 -3
  39. jettask/schemas/responses.py +493 -0
  40. jettask/task/__init__.py +0 -2
  41. jettask/task/router.py +3 -0
  42. jettask/test_connection_monitor.py +1 -1
  43. jettask/utils/__init__.py +7 -5
  44. jettask/utils/db_init.py +8 -4
  45. jettask/utils/namespace_dep.py +167 -0
  46. jettask/utils/queue_matcher.py +186 -0
  47. jettask/utils/rate_limit/concurrency_limiter.py +7 -1
  48. jettask/utils/stream_backlog.py +1 -1
  49. jettask/webui/__init__.py +0 -1
  50. jettask/webui/api/__init__.py +4 -4
  51. jettask/webui/api/alerts.py +806 -71
  52. jettask/webui/api/example_refactored.py +400 -0
  53. jettask/webui/api/namespaces.py +390 -45
  54. jettask/webui/api/overview.py +300 -54
  55. jettask/webui/api/queues.py +971 -267
  56. jettask/webui/api/scheduled.py +1249 -56
  57. jettask/webui/api/settings.py +129 -7
  58. jettask/webui/api/workers.py +442 -0
  59. jettask/webui/app.py +46 -2329
  60. jettask/webui/middleware/__init__.py +6 -0
  61. jettask/webui/middleware/namespace_middleware.py +135 -0
  62. jettask/webui/services/__init__.py +146 -0
  63. jettask/webui/services/heartbeat_service.py +251 -0
  64. jettask/webui/services/overview_service.py +60 -51
  65. jettask/webui/services/queue_monitor_service.py +426 -0
  66. jettask/webui/services/redis_monitor_service.py +87 -0
  67. jettask/webui/services/settings_service.py +174 -111
  68. jettask/webui/services/task_monitor_service.py +222 -0
  69. jettask/webui/services/timeline_pg_service.py +452 -0
  70. jettask/webui/services/timeline_service.py +189 -0
  71. jettask/webui/services/worker_monitor_service.py +467 -0
  72. jettask/webui/utils/__init__.py +11 -0
  73. jettask/webui/utils/time_utils.py +122 -0
  74. jettask/worker/lifecycle.py +8 -2
  75. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/METADATA +1 -1
  76. jettask-0.2.24.dist-info/RECORD +142 -0
  77. jettask/executor/executor.py +0 -338
  78. jettask/persistence/backlog_monitor.py +0 -567
  79. jettask/persistence/base.py +0 -2334
  80. jettask/persistence/db_manager.py +0 -516
  81. jettask/persistence/maintenance.py +0 -81
  82. jettask/persistence/message_consumer.py +0 -259
  83. jettask/persistence/models.py +0 -49
  84. jettask/persistence/offline_recovery.py +0 -196
  85. jettask/persistence/queue_discovery.py +0 -215
  86. jettask/persistence/task_persistence.py +0 -218
  87. jettask/persistence/task_updater.py +0 -583
  88. jettask/scheduler/add_execution_count.sql +0 -11
  89. jettask/scheduler/add_priority_field.sql +0 -26
  90. jettask/scheduler/add_scheduler_id.sql +0 -25
  91. jettask/scheduler/add_scheduler_id_index.sql +0 -10
  92. jettask/scheduler/make_scheduler_id_required.sql +0 -28
  93. jettask/scheduler/migrate_interval_seconds.sql +0 -9
  94. jettask/scheduler/performance_optimization.sql +0 -45
  95. jettask/scheduler/run_scheduler.py +0 -186
  96. jettask/scheduler/schema.sql +0 -84
  97. jettask/task/task_executor.py +0 -318
  98. jettask/webui/api/analytics.py +0 -323
  99. jettask/webui/config.py +0 -90
  100. jettask/webui/models/__init__.py +0 -3
  101. jettask/webui/models/namespace.py +0 -63
  102. jettask/webui/namespace_manager/__init__.py +0 -10
  103. jettask/webui/namespace_manager/multi.py +0 -593
  104. jettask/webui/namespace_manager/unified.py +0 -193
  105. jettask/webui/run.py +0 -46
  106. jettask-0.2.23.dist-info/RECORD +0 -145
  107. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/WHEEL +0 -0
  108. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/entry_points.txt +0 -0
  109. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/licenses/LICENSE +0 -0
  110. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/top_level.txt +0 -0
jettask/webui/app.py CHANGED
@@ -1,1291 +1,18 @@
1
- import asyncio
2
- import json
3
- import time
4
1
  import logging
5
- from datetime import datetime, timedelta, timezone
6
- from typing import Dict, List, Optional, Any
7
2
  from contextlib import asynccontextmanager
8
- from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Query
9
- from fastapi.responses import HTMLResponse
10
- from fastapi.staticfiles import StaticFiles
3
+ from fastapi import FastAPI
11
4
  from fastapi.middleware.cors import CORSMiddleware
12
- from starlette.websockets import WebSocketState
13
- from redis import asyncio as aioredis
14
5
  import uvicorn
15
- from pathlib import Path
16
- from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
17
- from sqlalchemy.orm import sessionmaker
18
- from sqlalchemy import select, func, and_, or_, text
19
- from sqlalchemy.dialects import postgresql
20
6
 
21
- from jettask.persistence import PostgreSQLConsumer
22
- from jettask.webui.config import PostgreSQLConfig, RedisConfig
23
- from jettask.persistence.models import Base, Task
7
+ from jettask.webui.services import MonitorService
24
8
 
25
9
  logger = logging.getLogger(__name__)
26
10
 
27
- # SQLAlchemy异步引擎和会话(独立于consumer)
28
- async_engine = None
29
- AsyncSessionLocal = None
30
-
31
- def parse_iso_datetime(time_str: str) -> datetime:
32
- """解析ISO格式的时间字符串,确保返回 UTC 时间"""
33
- if time_str.endswith('Z'):
34
- # Z 表示 UTC 时间
35
- dt = datetime.fromisoformat(time_str.replace('Z', '+00:00'))
36
- else:
37
- dt = datetime.fromisoformat(time_str)
38
-
39
- # 如果没有时区信息,假定为 UTC
40
- if dt.tzinfo is None:
41
- dt = dt.replace(tzinfo=timezone.utc)
42
- # 如果有时区信息,转换为 UTC
43
- elif dt.tzinfo != timezone.utc:
44
- dt = dt.astimezone(timezone.utc)
45
-
46
- return dt
47
-
48
- async def get_db_engine():
49
- """获取SQLAlchemy异步引擎(用于读取数据)"""
50
- global async_engine, AsyncSessionLocal
51
-
52
- if async_engine:
53
- return async_engine
54
-
55
- # 尝试从环境变量或配置获取PostgreSQL连接信息
56
- import os
57
-
58
- pg_config = PostgreSQLConfig.from_env()
59
-
60
- if not pg_config.dsn:
61
- logger.warning("PostgreSQL connection not configured")
62
- return None
63
-
64
- try:
65
- # 将 DSN 转换为 SQLAlchemy 格式
66
- # 如果是 postgresql:// 开头,改为 postgresql+psycopg://
67
- if pg_config.dsn.startswith('postgresql://'):
68
- dsn = pg_config.dsn.replace('postgresql://', 'postgresql+psycopg://', 1)
69
- else:
70
- dsn = pg_config.dsn
71
-
72
- async_engine = create_async_engine(
73
- dsn,
74
- pool_size=10,
75
- max_overflow=5,
76
- pool_pre_ping=True,
77
- echo=False
78
- )
79
-
80
- # 创建异步会话工厂
81
- AsyncSessionLocal = sessionmaker(
82
- async_engine,
83
- class_=AsyncSession,
84
- expire_on_commit=False
85
- )
86
-
87
- logger.info("SQLAlchemy async engine created for WebUI")
88
- return async_engine
89
- except Exception as e:
90
- logger.error(f"Failed to create SQLAlchemy async engine: {e}")
91
- return None
92
-
93
- async def get_db_session():
94
- """获取数据库会话"""
95
- if not AsyncSessionLocal:
96
- await get_db_engine()
97
-
98
- if AsyncSessionLocal:
99
- async with AsyncSessionLocal() as session:
100
- yield session
101
- else:
102
- yield None
103
-
104
- class RedisMonitor:
105
- def __init__(self, redis_url: str = "redis://localhost:6379", redis_prefix: str = "jettask"):
106
- self.redis_url = redis_url
107
- self.redis_prefix = redis_prefix
108
- self.redis: Optional[aioredis.Redis] = None
109
- self.worker_state_manager = None # 延迟初始化
110
- self.scanner_task: Optional[asyncio.Task] = None
111
- self.scanner_interval = 5 # 5秒扫描一次
112
- self.default_heartbeat_timeout = 30 # 默认30秒心跳超时
113
- self._queues_cache = None
114
- self._queues_cache_time = 0
115
- self._queues_cache_ttl = 60 # 缓存60秒
116
- self._workers_cache = None
117
- self._workers_cache_time = 0
118
- self._workers_cache_ttl = 5 # worker缓存5秒,因为更新频繁
119
- self._scanner_running = False # 标记扫描器是否正在运行
120
-
121
- async def connect(self):
122
- # 使用统一的连接池管理
123
- from jettask.utils.db_connector import get_async_redis_pool
124
-
125
- pool = get_async_redis_pool(
126
- self.redis_url,
127
- decode_responses=True,
128
- max_connections=100,
129
- socket_connect_timeout=5,
130
- socket_timeout=10,
131
- socket_keepalive=True,
132
- health_check_interval=30
133
- )
134
- self.redis = aioredis.Redis(connection_pool=pool)
135
-
136
- # 初始化 WorkerStateManager
137
- from jettask.worker.lifecycle import WorkerStateManager
138
- self.worker_state_manager = WorkerStateManager(
139
- redis_client=self.redis,
140
- redis_prefix=self.redis_prefix
141
- )
142
-
143
- async def close(self):
144
- # 停止扫描器任务
145
- if self.scanner_task and not self.scanner_task.done():
146
- self.scanner_task.cancel()
147
- try:
148
- await self.scanner_task
149
- except asyncio.CancelledError:
150
- pass
151
-
152
- if self.redis:
153
- await self.redis.close()
154
-
155
- def get_prefixed_queue_name(self, queue_name: str) -> str:
156
- """为队列名称添加前缀"""
157
- return f"{self.redis_prefix}:QUEUE:{queue_name}"
158
-
159
-
160
- async def get_task_info(self, event_id: str) -> Dict[str, Any]:
161
- """获取任务详细信息"""
162
- status_key = f"{self.redis_prefix}:STATUS:{event_id}"
163
- result_key = f"{self.redis_prefix}:RESULT:{event_id}"
164
-
165
- status = await self.redis.get(status_key)
166
- result = await self.redis.get(result_key)
167
-
168
- task_info = {
169
- "event_id": event_id,
170
- "status": status,
171
- "result": result
172
- }
173
-
174
- # 如果有状态信息,尝试从对应的队列stream中获取详细信息
175
- if status:
176
- try:
177
- status_data = json.loads(status)
178
- queue_name = status_data.get("queue")
179
-
180
- if queue_name:
181
- # 从stream中查找该任务
182
- # 使用 xrange 扫描最近的消息
183
- prefixed_queue_name = self.get_prefixed_queue_name(queue_name)
184
- messages = await self.redis.xrange(prefixed_queue_name, count=1000)
185
-
186
- for msg_id, data in messages:
187
- # 检查消息数据中的event_id是否匹配
188
- if (data.get("event_id") == event_id or
189
- data.get("id") == event_id or
190
- data.get("task_id") == event_id):
191
- task_info["stream_data"] = {
192
- "message_id": msg_id,
193
- "data": data,
194
- "queue": queue_name
195
- }
196
- break
197
-
198
- # 如果消息ID就是event_id,直接尝试获取
199
- if not task_info.get("stream_data"):
200
- try:
201
- direct_messages = await self.redis.xrange(
202
- prefixed_queue_name,
203
- min=event_id,
204
- max=event_id,
205
- count=1
206
- )
207
- if direct_messages:
208
- msg_id, data = direct_messages[0]
209
- task_info["stream_data"] = {
210
- "message_id": msg_id,
211
- "data": data,
212
- "queue": queue_name
213
- }
214
- except:
215
- pass
216
-
217
- except Exception as e:
218
- print(f"Error parsing status for task {event_id}: {e}")
219
-
220
- return task_info
221
-
222
- async def get_stream_info(self, queue_name: str, event_id: str) -> Optional[Dict[str, Any]]:
223
- """从Stream中获取任务详细信息"""
224
- try:
225
- prefixed_queue_name = self.get_prefixed_queue_name(queue_name)
226
- # 先尝试按event_id直接查找
227
- messages = await self.redis.xrange(prefixed_queue_name, min=event_id, max=event_id, count=1)
228
- if messages:
229
- msg_id, data = messages[0]
230
- return {
231
- "message_id": msg_id,
232
- "data": data,
233
- "queue": queue_name
234
- }
235
-
236
- # 如果没找到,可能event_id是消息内容的一部分,扫描最近的消息
237
- messages = await self.redis.xrange(prefixed_queue_name, count=100)
238
- for msg_id, data in messages:
239
- if data.get("event_id") == event_id or data.get("id") == event_id:
240
- return {
241
- "message_id": msg_id,
242
- "data": data,
243
- "queue": queue_name
244
- }
245
- except Exception as e:
246
- print(f"Error reading from stream {prefixed_queue_name}: {e}")
247
- return None
248
-
249
- async def get_queue_tasks(self, queue_name: str, start_time: Optional[str] = None,
250
- end_time: Optional[str] = None, limit: int = 100) -> Dict[str, Any]:
251
- """获取指定队列的任务(基于时间范围)
252
-
253
- Args:
254
- queue_name: 队列名称
255
- start_time: 开始时间(Redis Stream ID格式或时间戳)
256
- end_time: 结束时间(Redis Stream ID格式或时间戳)
257
- limit: 返回的最大任务数
258
- """
259
- all_tasks = []
260
-
261
- try:
262
- # 处理时间参数
263
- # 如果没有指定结束时间,使用 '+' 表示到最新
264
- if not end_time:
265
- end_time = '+'
266
-
267
- # 如果没有指定开始时间,使用 '-' 表示从最早开始
268
- if not start_time:
269
- start_time = '-'
270
-
271
- # 从队列的stream中读取消息
272
- # 使用 xrevrange 按时间倒序获取(最新的在前)
273
- prefixed_queue_name = self.get_prefixed_queue_name(queue_name)
274
- messages = await self.redis.xrevrange(
275
- prefixed_queue_name,
276
- max=end_time,
277
- min=start_time,
278
- count=limit
279
- )
280
-
281
- for msg_id, data in messages:
282
- # 在easy_task中,event_id就是Redis生成的stream消息ID
283
- event_id = msg_id
284
-
285
- # 构建任务信息
286
- task_info = {
287
- "event_id": event_id,
288
- "message_id": msg_id,
289
- "stream_data": data,
290
- "task_name": data.get("name", "unknown"),
291
- "queue": data.get("queue", queue_name),
292
- "trigger_time": data.get("trigger_time")
293
- }
294
-
295
- # 尝试解析args和kwargs,并组合成参数字符串
296
- params_str = ""
297
- try:
298
- args_list = []
299
- kwargs_dict = {}
300
-
301
- if data.get("args"):
302
- args_list = json.loads(data["args"])
303
- task_info["args"] = args_list
304
-
305
- if data.get("kwargs"):
306
- kwargs_dict = json.loads(data["kwargs"])
307
- task_info["kwargs"] = kwargs_dict
308
-
309
- # 构建参数字符串
310
- params_parts = []
311
- if args_list:
312
- params_parts.extend([str(arg) for arg in args_list])
313
- if kwargs_dict:
314
- params_parts.extend([f"{k}={v}" for k, v in kwargs_dict.items()])
315
-
316
- params_str = ", ".join(params_parts) if params_parts else "无参数"
317
-
318
- except Exception as e:
319
- params_str = "解析失败"
320
-
321
- task_info["params_str"] = params_str
322
-
323
- # 从状态键获取信息(不默认获取结果)
324
- status_key = f"{self.redis_prefix}:STATUS:{event_id}"
325
-
326
- # 获取状态
327
- status = await self.redis.get(status_key)
328
-
329
- if status:
330
- task_info["status"] = status
331
- try:
332
- parsed_status = json.loads(status)
333
- task_info["parsed_status"] = parsed_status
334
- # 从状态中获取消费者信息
335
- task_info["consumer"] = parsed_status.get("consumer", "-")
336
- except:
337
- task_info["parsed_status"] = {"status": "unknown"}
338
- task_info["consumer"] = "-"
339
- else:
340
- # 如果没有状态,显示未知
341
- task_info["status"] = json.dumps({
342
- "status": "未知",
343
- "queue": queue_name,
344
- "created_at": datetime.fromtimestamp(float(data.get("trigger_time", 0))).isoformat() if data.get("trigger_time") else None
345
- })
346
- task_info["parsed_status"] = {
347
- "status": "未知",
348
- "queue": queue_name,
349
- "created_at": datetime.fromtimestamp(float(data.get("trigger_time", 0))).isoformat() if data.get("trigger_time") else None
350
- }
351
- task_info["consumer"] = "-"
352
-
353
- all_tasks.append(task_info)
354
-
355
- except Exception as e:
356
- print(f"Error reading queue {queue_name}: {e}")
357
- # 如果stream不存在或出错,返回空结果
358
- return {
359
- "tasks": [],
360
- "count": 0,
361
- "oldest_id": None,
362
- "newest_id": None,
363
- "has_more": False,
364
- "limit": limit
365
- }
366
-
367
- # 获取最早和最晚的消息ID用于分页导航
368
- oldest_id = all_tasks[-1]["message_id"] if all_tasks else None
369
- newest_id = all_tasks[0]["message_id"] if all_tasks else None
370
-
371
- # 检查是否还有更多数据
372
- has_more = len(messages) >= limit
373
-
374
- # 获取队列总长度
375
- total_count = 0
376
- try:
377
- queue_info = await self.redis.xinfo_stream(prefixed_queue_name)
378
- total_count = queue_info.get("length", 0)
379
- except Exception as e:
380
- print(f"Error getting queue info for {queue_name}: {e}")
381
- total_count = len(all_tasks)
382
-
383
- return {
384
- "tasks": all_tasks,
385
- "count": len(all_tasks),
386
- "total_count": total_count,
387
- "oldest_id": oldest_id,
388
- "newest_id": newest_id,
389
- "has_more": has_more,
390
- "limit": limit
391
- }
392
-
393
- async def get_worker_heartbeats(self, queue_name: str) -> List[Dict[str, Any]]:
394
- """获取指定队列的Worker心跳信息 - 直接扫描WORKER键"""
395
- worker_list = []
396
- current_time = datetime.now(timezone.utc).timestamp()
397
-
398
- # 直接扫描所有WORKER键(排除HISTORY相关的键)
399
- # 使用 RegistryManager 替代 scan
400
- from jettask.worker.manager import WorkerState as WorkerRegistry
401
- from jettask.messaging.registry import QueueRegistry
402
- worker_registry = WorkerRegistry(
403
- queue_registry = QueueRegistry(
404
- redis_client=None,
405
- async_redis_client=self.redis,
406
- redis_prefix=self.redis_prefix
407
- )
408
-
409
- # 获取所有 worker ID
410
- worker_ids = await worker_registry.get_all_workers()
411
- worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
412
-
413
- # 批量获取所有worker数据
414
- if worker_keys:
415
- pipe = self.redis.pipeline()
416
- for key in worker_keys:
417
- pipe.hgetall(key)
418
- all_workers_data = await pipe.execute()
419
-
420
- for i, worker_data in enumerate(all_workers_data):
421
- if not worker_data:
422
- continue
423
-
424
- # 检查worker是否属于指定队列
425
- worker_queues = worker_data.get('queues', '')
426
- if queue_name not in worker_queues.split(','):
427
- continue
428
-
429
- worker_id = worker_keys[i].split(':')[-1]
430
- last_heartbeat = float(worker_data.get('last_heartbeat', 0))
431
- is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
432
- consumer_id = worker_data.get('consumer_id', worker_id)
433
-
434
- # 构建显示数据
435
- display_data = {
436
- 'consumer_id': consumer_id,
437
- 'consumer_name': f"{consumer_id}-{queue_name}", # 保持兼容性
438
- 'host': worker_data.get('host', 'unknown'),
439
- 'pid': int(worker_data.get('pid', 0)),
440
- 'queue': queue_name,
441
- 'last_heartbeat': last_heartbeat,
442
- 'last_heartbeat_time': datetime.fromtimestamp(last_heartbeat).isoformat(),
443
- 'seconds_ago': int(current_time - last_heartbeat),
444
- 'is_alive': is_alive,
445
- # 队列特定的统计信息
446
- 'success_count': int(worker_data.get(f'{queue_name}:success_count', 0)),
447
- 'failed_count': int(worker_data.get(f'{queue_name}:failed_count', 0)),
448
- 'total_count': int(worker_data.get(f'{queue_name}:total_count', 0)),
449
- 'running_tasks': int(worker_data.get(f'{queue_name}:running_tasks', 0)),
450
- 'avg_processing_time': float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0)),
451
- 'avg_latency_time': float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
452
- }
453
-
454
- # 如果离线时间存在,添加离线时间信息
455
- if 'offline_time' in worker_data:
456
- display_data['offline_time'] = float(worker_data['offline_time'])
457
- display_data['offline_time_formatted'] = datetime.fromtimestamp(float(worker_data['offline_time'])).isoformat()
458
-
459
- worker_list.append(display_data)
460
-
461
- return worker_list
462
-
463
- async def get_queue_worker_summary(self, queue_name: str) -> Dict[str, Any]:
464
- """获取队列的worker汇总统计信息"""
465
- try:
466
- # 直接扫描所有WORKER键并过滤(排除HISTORY相关的键)
467
- # 使用 RegistryManager 替代 scan
468
- from jettask.worker.manager import WorkerState as WorkerRegistry
469
- from jettask.messaging.registry import QueueRegistry
470
- registry = RegistryManager(
471
- redis_client=None,
472
- async_redis_client=self.redis,
473
- redis_prefix=self.redis_prefix
474
- )
475
-
476
- # 获取所有 worker ID
477
- worker_ids = await worker_registry.get_all_workers()
478
- worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
479
-
480
- if not worker_keys:
481
- return {
482
- 'total_workers': 0,
483
- 'online_workers': 0,
484
- 'offline_workers': 0,
485
- 'total_success_count': 0,
486
- 'total_failed_count': 0,
487
- 'total_count': 0,
488
- 'total_running_tasks': 0,
489
- 'avg_processing_time': 0.0,
490
- 'avg_latency_time': 0.0
491
- }
492
-
493
- # 批量获取worker数据
494
- pipe = self.redis.pipeline()
495
- for key in worker_keys:
496
- pipe.hgetall(key)
497
- all_workers_data = await pipe.execute()
498
-
499
- # 过滤属于该队列的worker
500
- queue_workers_data = []
501
- for i, worker_data in enumerate(all_workers_data):
502
- if worker_data and queue_name in worker_data.get('queues', '').split(','):
503
- queue_workers_data.append(worker_data)
504
-
505
- # 汇总统计
506
- total_workers = len(queue_workers_data)
507
- online_workers = 0
508
- offline_workers = 0
509
- total_success_count = 0
510
- total_failed_count = 0
511
- total_count = 0
512
- total_running_tasks = 0
513
- total_processing_time = 0.0
514
- processing_time_count = 0
515
- total_latency_time = 0.0
516
- latency_time_count = 0
517
-
518
- current_time = datetime.now(timezone.utc).timestamp()
519
- offline_worker_ids = [] # 记录离线worker的ID,避免从历史中重复统计
520
-
521
- for worker_data in queue_workers_data:
522
- try:
523
- # 检查worker状态
524
- last_heartbeat = float(worker_data.get('last_heartbeat', 0))
525
- is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
526
- worker_id = worker_data.get('consumer_id', '')
527
-
528
- if is_alive and (current_time - last_heartbeat) < 30:
529
- online_workers += 1
530
- # 只统计在线worker的数据
531
- success_count = int(worker_data.get(f'{queue_name}:success_count', 0))
532
- failed_count = int(worker_data.get(f'{queue_name}:failed_count', 0))
533
- running_tasks = int(worker_data.get(f'{queue_name}:running_tasks', 0))
534
- avg_processing_time = float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0))
535
- avg_latency_time = float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
536
-
537
- total_success_count += success_count
538
- total_failed_count += failed_count
539
- total_count += success_count + failed_count
540
- total_running_tasks += running_tasks
541
-
542
- if avg_processing_time > 0:
543
- total_processing_time += avg_processing_time
544
- processing_time_count += 1
545
-
546
- if avg_latency_time > 0:
547
- total_latency_time += avg_latency_time
548
- latency_time_count += 1
549
- else:
550
- offline_workers += 1
551
- # 记录离线worker的ID,从历史中统计
552
- if worker_id:
553
- offline_worker_ids.append(worker_id)
554
-
555
- except Exception as e:
556
- print(f"Error processing worker summary: {e}")
557
- continue
558
-
559
- # 统计离线worker的数据(从WORKER键中)
560
- for worker_data in queue_workers_data:
561
- try:
562
- # 检查是否是离线worker
563
- is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
564
- worker_id = worker_data.get('consumer_id', '')
565
-
566
- if not is_alive and worker_id in offline_worker_ids:
567
- # 统计离线worker的数据
568
- success_count = int(worker_data.get(f'{queue_name}:success_count', 0))
569
- failed_count = int(worker_data.get(f'{queue_name}:failed_count', 0))
570
-
571
- total_success_count += success_count
572
- total_failed_count += failed_count
573
- total_count += success_count + failed_count
574
-
575
- # 处理时间统计
576
- avg_processing_time = float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0))
577
- if avg_processing_time > 0:
578
- total_processing_time += avg_processing_time
579
- processing_time_count += 1
580
-
581
- # 延迟时间统计
582
- avg_latency_time = float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
583
- if avg_latency_time > 0:
584
- total_latency_time += avg_latency_time
585
- latency_time_count += 1
586
-
587
- except Exception as e:
588
- print(f"Error processing offline worker stats: {e}")
589
- continue
590
-
591
- # 计算平均处理时间(包含历史)
592
- overall_avg_processing_time = 0.0
593
- if processing_time_count > 0:
594
- overall_avg_processing_time = total_processing_time / processing_time_count
595
-
596
- # 计算平均延迟时间
597
- overall_avg_latency_time = 0.0
598
- if latency_time_count > 0:
599
- overall_avg_latency_time = total_latency_time / latency_time_count
600
-
601
- return {
602
- 'total_workers': total_workers,
603
- 'online_workers': online_workers,
604
- 'offline_workers': offline_workers,
605
- 'total_success_count': total_success_count,
606
- 'total_failed_count': total_failed_count,
607
- 'total_count': total_count,
608
- 'total_running_tasks': total_running_tasks,
609
- 'avg_processing_time': round(overall_avg_processing_time, 3),
610
- 'avg_latency_time': round(overall_avg_latency_time, 3),
611
- 'history_included': True
612
- }
613
-
614
- except Exception as e:
615
- print(f"Error getting queue worker summary for {queue_name}: {e}")
616
- return {
617
- 'total_workers': 0,
618
- 'online_workers': 0,
619
- 'offline_workers': 0,
620
- 'total_success_count': 0,
621
- 'total_failed_count': 0,
622
- 'total_count': 0,
623
- 'total_running_tasks': 0,
624
- 'avg_processing_time': 0.0,
625
- 'avg_latency_time': 0.0
626
- }
627
-
628
- async def get_queue_worker_summary_fast(self, queue_name: str) -> Dict[str, Any]:
629
- """获取队列的worker汇总统计信息(快速版,不包含历史)"""
630
- try:
631
- # 直接扫描所有WORKER键(排除HISTORY相关的键)
632
- # 使用 RegistryManager 替代 scan
633
- from jettask.worker.manager import WorkerState as WorkerRegistry
634
- from jettask.messaging.registry import QueueRegistry
635
- registry = RegistryManager(
636
- redis_client=None,
637
- async_redis_client=self.redis,
638
- redis_prefix=self.redis_prefix
639
- )
640
-
641
- # 获取所有 worker ID
642
- worker_ids = await worker_registry.get_all_workers()
643
- worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
644
-
645
- if not worker_keys:
646
- return {
647
- 'total_workers': 0,
648
- 'online_workers': 0,
649
- 'offline_workers': 0,
650
- 'total_success_count': 0,
651
- 'total_failed_count': 0,
652
- 'total_count': 0,
653
- 'total_running_tasks': 0,
654
- 'avg_processing_time': 0.0,
655
- 'avg_latency_time': 0.0
656
- }
657
-
658
- # 使用pipeline批量获取worker数据
659
- pipe = self.redis.pipeline()
660
- for worker_key in worker_keys:
661
- pipe.hgetall(worker_key)
662
-
663
- all_workers_data = await pipe.execute()
664
-
665
- # 过滤属于该队列的worker
666
- worker_data_list = []
667
- for worker_data in all_workers_data:
668
- if worker_data and queue_name in worker_data.get('queues', '').split(','):
669
- worker_data_list.append(worker_data)
670
-
671
- # 汇总统计
672
- total_workers = len(worker_data_list)
673
- online_workers = 0
674
- offline_workers = 0
675
- total_success_count = 0
676
- total_failed_count = 0
677
- total_count = 0
678
- total_running_tasks = 0
679
- total_processing_time = 0.0
680
- processing_time_count = 0
681
- total_latency_time = 0.0
682
- latency_time_count = 0
683
-
684
- current_time = datetime.now(timezone.utc).timestamp()
685
-
686
- for worker_data in worker_data_list:
687
- if not worker_data:
688
- continue
689
-
690
- # 检查worker状态
691
- last_heartbeat = float(worker_data.get('last_heartbeat', 0))
692
- is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
693
-
694
- if is_alive and (current_time - last_heartbeat) < 30:
695
- online_workers += 1
696
- # 只统计在线worker的数据(快速版不包含历史,所以只统计在线的)
697
- success_count = int(worker_data.get(f'{queue_name}:success_count', 0))
698
- failed_count = int(worker_data.get(f'{queue_name}:failed_count', 0))
699
- running_tasks = int(worker_data.get(f'{queue_name}:running_tasks', 0))
700
- avg_processing_time = float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0))
701
- avg_latency_time = float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
702
-
703
- total_success_count += success_count
704
- total_failed_count += failed_count
705
- total_count += success_count + failed_count
706
- total_running_tasks += running_tasks
707
-
708
- if avg_processing_time > 0:
709
- total_processing_time += avg_processing_time
710
- processing_time_count += 1
711
-
712
- if avg_latency_time > 0:
713
- total_latency_time += avg_latency_time
714
- latency_time_count += 1
715
- else:
716
- offline_workers += 1
717
- # 快速版不统计离线worker的数据
718
-
719
- # 计算平均处理时间
720
- avg_processing_time = 0.0
721
- if processing_time_count > 0:
722
- avg_processing_time = total_processing_time / processing_time_count
723
-
724
- # 计算平均延迟时间
725
- avg_latency_time = 0.0
726
- if latency_time_count > 0:
727
- avg_latency_time = total_latency_time / latency_time_count
728
-
729
- return {
730
- 'total_workers': total_workers,
731
- 'online_workers': online_workers,
732
- 'offline_workers': offline_workers,
733
- 'total_success_count': total_success_count,
734
- 'total_failed_count': total_failed_count,
735
- 'total_count': total_count,
736
- 'total_running_tasks': total_running_tasks,
737
- 'avg_processing_time': round(avg_processing_time, 3),
738
- 'avg_latency_time': round(avg_latency_time, 3)
739
- }
740
-
741
- except Exception as e:
742
- print(f"Error getting queue worker summary for {queue_name}: {e}")
743
- return {
744
- 'total_workers': 0,
745
- 'online_workers': 0,
746
- 'offline_workers': 0,
747
- 'total_success_count': 0,
748
- 'total_failed_count': 0,
749
- 'total_count': 0,
750
- 'total_running_tasks': 0,
751
- 'avg_processing_time': 0.0,
752
- 'avg_latency_time': 0.0
753
- }
754
-
755
- async def get_worker_offline_history(self, limit: int = 100, start_time: Optional[float] = None, end_time: Optional[float] = None) -> List[Dict[str, Any]]:
756
- """获取worker下线历史记录 - 直接从WORKER键读取离线worker信息"""
757
- try:
758
- # 扫描所有WORKER键(排除HISTORY相关的键)
759
- pattern = f"{self.redis_prefix}:WORKER:*"
760
- cursor = 0
761
- # 使用 RegistryManager 替代 scan
762
- from jettask.worker.manager import WorkerState as WorkerRegistry
763
- from jettask.messaging.registry import QueueRegistry
764
- registry = RegistryManager(
765
- redis_client=None,
766
- async_redis_client=self.redis,
767
- redis_prefix=self.redis_prefix
768
- )
769
-
770
- # 获取所有 worker ID
771
- worker_ids = await worker_registry.get_all_workers()
772
- worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
773
-
774
- if not worker_keys:
775
- return []
776
-
777
- # 批量获取所有worker数据
778
- pipe = self.redis.pipeline()
779
- for key in worker_keys:
780
- pipe.hgetall(key)
781
- all_workers_data = await pipe.execute()
782
-
783
- # 收集离线的worker
784
- offline_workers = []
785
- current_time = time.time()
786
-
787
- for i, worker_data in enumerate(all_workers_data):
788
- if not worker_data:
789
- continue
790
-
791
- # 检查是否离线
792
- is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
793
- if not is_alive and 'offline_time' in worker_data:
794
- offline_time = float(worker_data.get('offline_time', 0))
795
-
796
- # 时间范围过滤
797
- if start_time and offline_time < start_time:
798
- continue
799
- if end_time and offline_time > end_time:
800
- continue
801
-
802
- # 计算运行时长
803
- online_time = float(worker_data.get('created_at', offline_time))
804
- duration_seconds = int(offline_time - online_time)
805
-
806
- # 构建离线记录
807
- record = {
808
- 'consumer_id': worker_data.get('consumer_id', ''),
809
- 'host': worker_data.get('host', 'unknown'),
810
- 'pid': int(worker_data.get('pid', 0)),
811
- 'queues': worker_data.get('queues', ''),
812
- 'online_time': online_time,
813
- 'offline_time': offline_time,
814
- 'duration_seconds': duration_seconds,
815
- 'last_heartbeat': float(worker_data.get('last_heartbeat', 0)),
816
- 'shutdown_reason': worker_data.get('shutdown_reason', 'unknown'),
817
- 'online_time_str': datetime.fromtimestamp(online_time).isoformat(),
818
- 'offline_time_str': datetime.fromtimestamp(offline_time).isoformat(),
819
- }
820
-
821
- # 格式化运行时长
822
- hours = duration_seconds // 3600
823
- minutes = (duration_seconds % 3600) // 60
824
- seconds = duration_seconds % 60
825
- record['duration_str'] = f"{hours}h {minutes}m {seconds}s"
826
-
827
- # 添加统计信息(聚合所有队列的数据)
828
- queues = worker_data.get('queues', '').split(',') if worker_data.get('queues') else []
829
- total_success = 0
830
- total_failed = 0
831
- total_count = 0
832
-
833
- for queue in queues:
834
- if queue.strip():
835
- queue = queue.strip()
836
- total_success += int(worker_data.get(f'{queue}:success_count', 0))
837
- total_failed += int(worker_data.get(f'{queue}:failed_count', 0))
838
- total_count += int(worker_data.get(f'{queue}:total_count', 0))
839
-
840
- record['total_success_count'] = total_success
841
- record['total_failed_count'] = total_failed
842
- record['total_count'] = total_count
843
- record['total_running_tasks'] = 0 # 离线worker没有运行中的任务
844
-
845
- # 计算平均处理时间
846
- if total_count > 0:
847
- total_processing_time = 0.0
848
- for queue in queues:
849
- if queue.strip():
850
- queue = queue.strip()
851
- avg_time = float(worker_data.get(f'{queue}:avg_processing_time', 0))
852
- count = int(worker_data.get(f'{queue}:total_count', 0))
853
- if avg_time > 0 and count > 0:
854
- total_processing_time += avg_time * count
855
- record['avg_processing_time'] = total_processing_time / total_count
856
- else:
857
- record['avg_processing_time'] = 0.0
858
-
859
- offline_workers.append((offline_time, record))
860
-
861
- # 按离线时间倒序排序
862
- offline_workers.sort(key=lambda x: x[0], reverse=True)
863
-
864
- # 返回指定数量的记录
865
- return [record for _, record in offline_workers[:limit]]
866
-
867
- except Exception as e:
868
- print(f"Error getting worker offline history: {e}")
869
- return []
870
-
871
- async def get_global_stats_with_history(self) -> Dict[str, Any]:
872
- """获取全局统计信息(优化版)- 注:不再重复统计历史数据"""
873
- try:
874
- # 获取所有队列
875
- queues = await self.get_all_queues()
876
-
877
- # 并行获取所有队列的汇总信息和队列统计
878
- queue_summaries_task = asyncio.gather(
879
- *[self.get_queue_worker_summary_fast(queue) for queue in queues],
880
- return_exceptions=True
881
- )
882
- queue_stats_task = asyncio.gather(
883
- *[self.get_queue_stats(queue) for queue in queues],
884
- return_exceptions=True
885
- )
886
-
887
- queue_summaries, queue_stats = await asyncio.gather(
888
- queue_summaries_task, queue_stats_task
889
- )
890
-
891
- # 初始化统计
892
- total_success = 0
893
- total_failed = 0
894
- total_tasks = 0
895
- total_running = 0
896
- total_workers = 0
897
- online_workers = 0
898
- offline_workers = 0
899
- total_processing_time = 0.0
900
- total_processing_count = 0
901
- total_latency_time = 0.0
902
- total_latency_count = 0
903
-
904
- # RabbitMQ风格指标
905
- total_messages = 0
906
- total_messages_ready = 0
907
- total_messages_unacknowledged = 0
908
- total_consumers = 0
909
- total_publish = 0
910
- total_deliver_get = 0
911
- total_ack = 0
912
-
913
- # 汇总统计信息
914
- for i, summary in enumerate(queue_summaries):
915
- if isinstance(summary, Exception):
916
- print(f"Error getting stats for queue {queues[i]}: {summary}")
917
- continue
918
-
919
- total_workers += summary.get('total_workers', 0)
920
- online_workers += summary.get('online_workers', 0)
921
- offline_workers += summary.get('offline_workers', 0)
922
- total_success += summary.get('total_success_count', 0)
923
- total_failed += summary.get('total_failed_count', 0)
924
- total_tasks += summary.get('total_count', 0)
925
- total_running += summary.get('total_running_tasks', 0)
926
-
927
- # 累加平均处理时间(需要根据任务数加权)
928
- avg_time = summary.get('avg_processing_time', 0)
929
- task_count = summary.get('total_count', 0)
930
- if avg_time > 0 and task_count > 0:
931
- total_processing_time += avg_time * task_count
932
- total_processing_count += task_count
933
-
934
- # 累加平均延迟时间
935
- avg_latency = summary.get('avg_latency_time', 0)
936
- if avg_latency > 0 and task_count > 0:
937
- total_latency_time += avg_latency * task_count
938
- total_latency_count += task_count
939
-
940
- # 汇总RabbitMQ风格指标
941
- for i, stats in enumerate(queue_stats):
942
- if isinstance(stats, Exception):
943
- continue
944
-
945
- total_messages += stats.get('messages', 0)
946
- total_messages_ready += stats.get('messages_ready', 0)
947
- total_messages_unacknowledged += stats.get('messages_unacknowledged', 0)
948
- total_consumers += stats.get('consumers', 0)
949
-
950
- message_stats = stats.get('message_stats', {})
951
- total_publish += message_stats.get('publish', 0)
952
- total_deliver_get += message_stats.get('deliver_get', 0)
953
- total_ack += message_stats.get('ack', 0)
954
-
955
- # 计算全局平均处理时间
956
- global_avg_processing_time = 0.0
957
- if total_processing_count > 0:
958
- global_avg_processing_time = total_processing_time / total_processing_count
959
-
960
- # 计算全局平均延迟时间
961
- global_avg_latency_time = 0.0
962
- if total_latency_count > 0:
963
- global_avg_latency_time = total_latency_time / total_latency_count
964
-
965
- return {
966
- # 原有指标
967
- 'total_queues': len(queues),
968
- 'total_workers': total_workers,
969
- 'online_workers': online_workers,
970
- 'offline_workers': offline_workers,
971
- 'total_success_count': total_success,
972
- 'total_failed_count': total_failed,
973
- 'total_count': total_tasks,
974
- 'total_running_tasks': total_running,
975
- 'avg_processing_time': round(global_avg_processing_time, 3),
976
- 'avg_latency_time': round(global_avg_latency_time, 3),
977
- 'history_included': False,
978
- # RabbitMQ风格指标
979
- 'messages': total_messages,
980
- 'messages_ready': total_messages_ready,
981
- 'messages_unacknowledged': total_messages_unacknowledged,
982
- 'consumers': total_consumers,
983
- 'message_stats': {
984
- 'publish': total_publish,
985
- 'deliver_get': total_deliver_get,
986
- 'ack': total_ack
987
- },
988
- 'timestamp': datetime.now(timezone.utc).isoformat()
989
- }
990
-
991
- except Exception as e:
992
- print(f"Error getting global stats: {e}")
993
- return {
994
- 'total_queues': 0,
995
- 'total_workers': 0,
996
- 'online_workers': 0,
997
- 'offline_workers': 0,
998
- 'total_success_count': 0,
999
- 'total_failed_count': 0,
1000
- 'total_count': 0,
1001
- 'total_running_tasks': 0,
1002
- 'avg_processing_time': 0.0,
1003
- 'avg_latency_time': 0.0,
1004
- 'history_included': False,
1005
- 'messages': 0,
1006
- 'messages_ready': 0,
1007
- 'messages_unacknowledged': 0,
1008
- 'consumers': 0,
1009
- 'message_stats': {
1010
- 'publish': 0,
1011
- 'deliver_get': 0,
1012
- 'ack': 0
1013
- },
1014
- 'error': str(e)
1015
- }
1016
-
1017
- async def get_all_queues(self) -> List[str]:
1018
- """获取所有队列名称 - 优先从global:queues集合获取,带缓存"""
1019
- try:
1020
- # 检查缓存是否有效
1021
- current_time = time.time()
1022
- if self._queues_cache is not None and (current_time - self._queues_cache_time) < self._queues_cache_ttl:
1023
- return self._queues_cache
1024
-
1025
- # 优先尝试从全局队列集合获取
1026
- global_queues_key = f'{self.redis_prefix}:global:queues'
1027
- queues = await self.redis.smembers(global_queues_key)
1028
-
1029
- if queues:
1030
- # 如果有全局队列集合,直接使用
1031
- result = sorted(list(queues))
1032
- self._queues_cache = result
1033
- self._queues_cache_time = current_time
1034
- return result
1035
-
1036
- # 如果没有全局队列集合,回退到扫描方式
1037
- queues = set()
1038
-
1039
- # 优化:更精确的扫描模式,只扫描QUEUE:*键
1040
- pattern = f"{self.redis_prefix}:QUEUE:*"
1041
- cursor = 0
1042
-
1043
- # 使用 RegistryManager 替代 scan
1044
- from jettask.worker.manager import WorkerState as WorkerRegistry
1045
- from jettask.messaging.registry import QueueRegistry
1046
- registry = RegistryManager(
1047
- redis_client=None,
1048
- async_redis_client=self.redis,
1049
- redis_prefix=self.redis_prefix
1050
- )
1051
-
1052
- # 获取所有队列
1053
- queues = await queue_registry.get_all_queues()
1054
-
1055
- # 返回排序后的队列列表并更新缓存
1056
- result = sorted(list(queues))
1057
- self._queues_cache = result
1058
- self._queues_cache_time = current_time
1059
- return result
1060
-
1061
- except Exception as e:
1062
- print(f"Error scanning queues: {e}")
1063
- return []
1064
-
1065
- async def get_queue_stats(self, queue_name: str) -> Dict[str, Any]:
1066
- """获取队列统计信息 (RabbitMQ兼容格式)"""
1067
- prefixed_queue_name = self.get_prefixed_queue_name(queue_name)
1068
-
1069
- try:
1070
- info = await self.redis.xinfo_stream(prefixed_queue_name)
1071
- groups = await self.redis.xinfo_groups(prefixed_queue_name)
1072
- except Exception as e:
1073
- # 如果队列不存在,返回默认值
1074
- return {
1075
- "queue": queue_name,
1076
- "messages": 0,
1077
- "messages_ready": 0,
1078
- "messages_unacknowledged": 0,
1079
- "consumers": 0,
1080
- "message_stats": {
1081
- "publish": 0,
1082
- "deliver_get": 0,
1083
- "ack": 0
1084
- },
1085
- "consumer_groups": [],
1086
- "error": str(e)
1087
- }
1088
-
1089
- # 计算基础指标
1090
- total_messages = info["length"]
1091
- total_pending = 0
1092
- total_consumers = 0
1093
- total_delivered = 0
1094
-
1095
- consumer_groups_info = []
1096
-
1097
- for group in groups:
1098
- group_pending = group["pending"]
1099
- group_consumers_count = group["consumers"]
1100
-
1101
- total_pending += group_pending
1102
- total_consumers += group_consumers_count
1103
-
1104
- group_info = {
1105
- "name": group["name"],
1106
- "consumers": group_consumers_count,
1107
- "pending": group_pending,
1108
- "last_delivered_id": group["last-delivered-id"]
1109
- }
1110
-
1111
- # 获取消费者详情
1112
- try:
1113
- consumers = await self.redis.xinfo_consumers(prefixed_queue_name, group["name"])
1114
- group_info["consumer_details"] = consumers
1115
-
1116
- # 从消费者统计中计算deliver和ack数量
1117
- for consumer in consumers:
1118
- total_delivered += consumer.get("pel-count", 0)
1119
-
1120
- except Exception as e:
1121
- group_info["consumer_details"] = []
1122
- print(f"Error getting consumers for group {group['name']}: {e}")
1123
-
1124
- consumer_groups_info.append(group_info)
1125
-
1126
- # 从Worker统计中获取更精确的消息统计
1127
- worker_summary = await self.get_queue_worker_summary_fast(queue_name)
1128
- publish_count = worker_summary.get('total_count', 0) # 总处理数作为发布数的近似
1129
- deliver_count = worker_summary.get('total_success_count', 0) + worker_summary.get('total_failed_count', 0)
1130
- ack_count = worker_summary.get('total_success_count', 0)
1131
-
1132
- # 计算就绪消息数 (队列总长度 - 未确认消息数)
1133
- messages_ready = max(0, total_messages - total_pending)
1134
-
1135
- # RabbitMQ风格的统计信息
1136
- stats = {
1137
- "queue": queue_name,
1138
- # RabbitMQ兼容指标
1139
- "messages": total_messages, # 队列中消息总数
1140
- "messages_ready": messages_ready, # 就绪状态的消息数
1141
- "messages_unacknowledged": total_pending, # 未确认的消息数
1142
- "consumers": total_consumers, # 消费者数量
1143
- "message_stats": {
1144
- "publish": publish_count, # 发布到队列的消息数量
1145
- "deliver_get": deliver_count, # 被消费的消息数量
1146
- "ack": ack_count # 被确认的消息数量
1147
- },
1148
- # 原有详细信息保持兼容性
1149
- "length": info["length"],
1150
- "first_entry": info.get("first-entry"),
1151
- "last_entry": info.get("last-entry"),
1152
- "consumer_groups": consumer_groups_info,
1153
- # 额外的性能指标
1154
- "performance_stats": {
1155
- "avg_processing_time": worker_summary.get('avg_processing_time', 0.0),
1156
- "avg_latency_time": worker_summary.get('avg_latency_time', 0.0),
1157
- "total_running_tasks": worker_summary.get('total_running_tasks', 0)
1158
- }
1159
- }
1160
-
1161
- return stats
1162
-
1163
- async def _heartbeat_scanner(self):
1164
- """心跳扫描器任务,定期检查worker心跳状态"""
1165
- logger = logging.getLogger('webui.heartbeat')
1166
- logger.info("心跳扫描器启动")
1167
-
1168
- while self._scanner_running:
1169
- try:
1170
- # 使用 RegistryManager 获取所有 worker,避免 SCAN
1171
- from jettask.worker.manager import WorkerState as WorkerRegistry
1172
- from jettask.messaging.registry import QueueRegistry
1173
- registry = RegistryManager(
1174
- redis_client=None,
1175
- async_redis_client=self.redis,
1176
- redis_prefix=self.redis_prefix
1177
- )
1178
-
1179
- # 获取所有 worker ID
1180
- worker_ids = await worker_registry.get_all_workers()
1181
-
1182
- # 构建 worker 键
1183
- worker_keys = []
1184
- for worker_id in worker_ids:
1185
- worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
1186
- # 过滤掉HISTORY相关的键(虽然注册表中不应该有)
1187
- if ':HISTORY:' not in worker_key:
1188
- worker_keys.append(worker_key)
1189
-
1190
- if worker_keys:
1191
- # 通过 WorkerStateManager 批量获取 worker 数据
1192
- current_time = time.time()
1193
-
1194
- if self.worker_state_manager:
1195
- # 使用 WorkerStateManager 批量获取所有 worker 信息
1196
- all_workers_info = await self.worker_state_manager.get_all_workers_info(only_alive=False)
1197
-
1198
- # 检查每个worker的心跳
1199
- for worker_id in worker_ids:
1200
- worker_data = all_workers_info.get(worker_id)
1201
- if not worker_data:
1202
- continue
1203
-
1204
- try:
1205
- # 获取心跳相关信息
1206
- last_heartbeat = float(worker_data.get('last_heartbeat', 0))
1207
- is_alive = worker_data.get('is_alive') == 'true'
1208
- heartbeat_timeout = float(worker_data.get('heartbeat_timeout', self.default_heartbeat_timeout))
1209
- consumer_id = worker_data.get('consumer_id', '')
1210
-
1211
- # 检查是否超时
1212
- if is_alive and (current_time - last_heartbeat) > heartbeat_timeout:
1213
- logger.info(f"Worker {consumer_id} 心跳超时,标记为离线")
1214
-
1215
- # 通过 WorkerStateManager 更新worker状态为离线
1216
- await self.worker_state_manager.set_worker_offline(
1217
- worker_id=worker_id,
1218
- reason="heartbeat_timeout"
1219
- )
1220
-
1221
- except Exception as e:
1222
- logger.error(f"检查worker心跳时出错: {e}")
1223
- else:
1224
- # 降级处理:直接使用 Redis
1225
- pipe = self.redis.pipeline()
1226
- for key in worker_keys:
1227
- pipe.hgetall(key)
1228
- all_workers_data = await pipe.execute()
1229
-
1230
- # 检查每个worker的心跳
1231
- for i, worker_data in enumerate(all_workers_data):
1232
- if not worker_data:
1233
- continue
1234
-
1235
- try:
1236
- # 获取心跳相关信息
1237
- last_heartbeat = float(worker_data.get('last_heartbeat', 0))
1238
- is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
1239
- heartbeat_timeout = float(worker_data.get('heartbeat_timeout', self.default_heartbeat_timeout))
1240
- consumer_id = worker_data.get('consumer_id', '')
1241
-
1242
- # 检查是否超时
1243
- if is_alive and (current_time - last_heartbeat) > heartbeat_timeout:
1244
- logger.info(f"Worker {consumer_id} 心跳超时,标记为离线")
1245
-
1246
- # 更新worker状态为离线
1247
- worker_key = worker_keys[i]
1248
- await self.redis.hset(worker_key, 'is_alive', 'false')
1249
-
1250
- except Exception as e:
1251
- logger.error(f"检查worker心跳时出错: {e}")
1252
-
1253
- # 等待下一次扫描
1254
- await asyncio.sleep(self.scanner_interval)
1255
-
1256
- except asyncio.CancelledError:
1257
- logger.info("心跳扫描器收到取消信号")
1258
- break
1259
- except Exception as e:
1260
- logger.error(f"心跳扫描器出错: {e}")
1261
- await asyncio.sleep(self.scanner_interval)
1262
-
1263
- logger.info("心跳扫描器已停止")
1264
-
1265
- async def start_heartbeat_scanner(self):
1266
- """启动心跳扫描器"""
1267
- if not self._scanner_running:
1268
- self._scanner_running = True
1269
- self.scanner_task = asyncio.create_task(self._heartbeat_scanner())
1270
- logging.getLogger('webui').info("心跳扫描器任务已创建")
1271
-
1272
- async def stop_heartbeat_scanner(self):
1273
- """停止心跳扫描器"""
1274
- self._scanner_running = False
1275
- if self.scanner_task and not self.scanner_task.done():
1276
- self.scanner_task.cancel()
1277
- try:
1278
- await self.scanner_task
1279
- except asyncio.CancelledError:
1280
- pass
1281
-
1282
11
  # 创建全局监控器实例
1283
- monitor = RedisMonitor()
1284
- pg_consumer = None
12
+ monitor = MonitorService()
1285
13
 
1286
14
  @asynccontextmanager
1287
15
  async def lifespan(app: FastAPI):
1288
- global pg_consumer
1289
16
 
1290
17
  # Startup
1291
18
  try:
@@ -1293,24 +20,13 @@ async def lifespan(app: FastAPI):
1293
20
  # 检查是否使用Nacos配置
1294
21
  use_nacos = os.getenv('USE_NACOS', 'false').lower() == 'true'
1295
22
 
1296
- # 初始化数据库管理器
1297
- from jettask.persistence.db_manager import init_db_manager
1298
- await init_db_manager(use_nacos=use_nacos)
1299
-
1300
- # 创建数据访问实例
1301
- from jettask.persistence.base import JetTaskDataAccess
1302
- from jettask.persistence.namespace import get_namespace_data_access
23
+ # 直接使用 connector.py 管理数据库连接
1303
24
  from jettask.config.task_center import task_center_config
1304
25
 
1305
- data_access = JetTaskDataAccess()
1306
- namespace_data_access = get_namespace_data_access()
1307
-
1308
- # 存储在app.state中供路由使用
1309
- app.state.data_access = data_access
1310
- app.state.namespace_data_access = namespace_data_access
1311
-
1312
- # 初始化JetTask数据访问
1313
- await data_access.initialize()
26
+ # 存储配置信息在 app.state 中,供路由使用
27
+ app.state.redis_url = os.environ.get('JETTASK_REDIS_URL', 'redis://localhost:6379/0')
28
+ app.state.pg_url = os.environ.get('JETTASK_PG_URL', 'postgresql+asyncpg://jettask:123456@localhost:5432/jettask')
29
+ app.state.redis_prefix = os.environ.get('JETTASK_REDIS_PREFIX', 'jettask')
1314
30
 
1315
31
  # 记录任务中心配置
1316
32
  logger.info("=" * 60)
@@ -1322,18 +38,14 @@ async def lifespan(app: FastAPI):
1322
38
  logger.info("=" * 60)
1323
39
 
1324
40
  # 连接 monitor
1325
- await monitor.connect()
1326
- # 启动心跳扫描器
1327
- await monitor.start_heartbeat_scanner()
41
+ # await monitor.connect()
42
+ # # 启动心跳扫描器
43
+ # await monitor.start_heartbeat_scanner()
44
+ # # 将 monitor 存储到 app.state 供新路由使用
45
+ # app.state.monitor = monitor
1328
46
 
1329
- # 启动PostgreSQL消费者(如果配置了且显式启用)
1330
- if hasattr(app.state, 'pg_config') and getattr(app.state, 'enable_consumer', False):
1331
- redis_config = RedisConfig.from_env()
1332
- pg_consumer = PostgreSQLConsumer(app.state.pg_config, redis_config)
1333
- await pg_consumer.start()
1334
- logging.info("PostgreSQL consumer started")
1335
- else:
1336
- logging.info("PostgreSQL consumer disabled (use --with-consumer to enable)")
47
+ # PostgreSQL consumer 已弃用,由统一的数据库管理器处理
48
+ logging.info("PostgreSQL consumer disabled (use --with-consumer to enable)")
1337
49
 
1338
50
  logger.info("JetTask WebUI 启动成功")
1339
51
  except Exception as e:
@@ -1350,23 +62,8 @@ async def lifespan(app: FastAPI):
1350
62
  await monitor.stop_heartbeat_scanner()
1351
63
  await monitor.close()
1352
64
 
1353
- # 停止PostgreSQL消费者
1354
- if pg_consumer:
1355
- await pg_consumer.stop()
1356
-
1357
- # 关闭数据访问
1358
- if hasattr(app.state, 'data_access'):
1359
- await app.state.data_access.close()
1360
-
1361
- # 关闭数据库管理器
1362
- from jettask.persistence.db_manager import close_db_manager
1363
- await close_db_manager()
1364
-
1365
- # 关闭SQLAlchemy引擎
1366
- global async_engine
1367
- if async_engine:
1368
- await async_engine.dispose()
1369
- async_engine = None
65
+ # 数据库连接池由 connector.py 全局管理
66
+ # 不需要显式关闭,它们会在进程结束时自动清理
1370
67
 
1371
68
  logger.info("JetTask WebUI 关闭完成")
1372
69
  except Exception as e:
@@ -1385,1019 +82,39 @@ app.add_middleware(
1385
82
  allow_headers=["*"], # 允许所有请求头
1386
83
  )
1387
84
 
85
+ # 配置 Namespace 自动注入中间件
86
+ # 这个中间件会自动检测路由中的 {namespace} 参数,并注入到 request.state.ns
87
+ from jettask.webui.middleware import NamespaceMiddleware
88
+ app.add_middleware(NamespaceMiddleware)
89
+ logger.info("NamespaceMiddleware 已注册 - 所有包含 {namespace} 的路由将自动注入命名空间上下文")
90
+
1388
91
  # 注册 API 路由
1389
92
  from jettask.webui.api import api_router
1390
93
  app.include_router(api_router)
1391
94
 
1392
-
1393
- @app.get("/api/queue/{queue_name}/tasks")
1394
- async def get_queue_tasks(
1395
- queue_name: str,
1396
- start_time: Optional[str] = None,
1397
- end_time: Optional[str] = None,
1398
- limit: int = 50
1399
- ):
1400
- """获取指定队列的任务(基于时间范围)"""
1401
- print(f'{queue_name=} {start_time=} {end_time=} {limit=}')
1402
- result = await monitor.get_queue_tasks(queue_name, start_time, end_time, limit)
1403
- return result
1404
-
1405
- @app.get("/api/queue/{queue_name}/timeline/pg")
1406
- async def get_queue_timeline_from_pg(
1407
- queue_name: str,
1408
- start_time: Optional[str] = None,
1409
- end_time: Optional[str] = None,
1410
- interval: str = "5m"
1411
- ):
1412
- """从PostgreSQL获取队列任务的时间分布数据"""
1413
- # 如果没有提供时间范围,默认最近1小时
1414
- if not end_time:
1415
- end_dt = datetime.now(timezone.utc)
1416
- else:
1417
- end_dt = parse_iso_datetime(end_time)
1418
-
1419
- if not start_time:
1420
- start_dt = end_dt - timedelta(hours=1)
1421
- else:
1422
- start_dt = parse_iso_datetime(start_time)
1423
-
1424
- # 解析时间间隔
1425
- interval_minutes = 5 # 默认5分钟
1426
- if interval.endswith('m'):
1427
- interval_minutes = int(interval[:-1])
1428
- elif interval.endswith('h'):
1429
- interval_minutes = int(interval[:-1]) * 60
1430
-
1431
- # 获取数据库引擎
1432
- engine = await get_db_engine()
1433
- if not engine:
1434
- return {
1435
- "timeline": [],
1436
- "interval": interval,
1437
- "start_time": start_dt.isoformat(),
1438
- "end_time": end_dt.isoformat(),
1439
- "error": "PostgreSQL connection not configured"
1440
- }
1441
-
1442
- try:
1443
- async with AsyncSessionLocal() as session:
1444
- # 使用 SQLAlchemy 的原生 SQL 查询(因为复杂的时间分组)
1445
- query = text(f"""
1446
- SELECT
1447
- DATE_TRUNC('minute', created_at) -
1448
- INTERVAL '{interval_minutes} minutes' * (EXTRACT(MINUTE FROM created_at)::int % {interval_minutes}) as time_bucket,
1449
- COUNT(*) as count,
1450
- SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed_count,
1451
- SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed_count,
1452
- AVG(CASE WHEN status = 'completed' AND processing_time IS NOT NULL
1453
- THEN processing_time ELSE NULL END) as avg_processing_time
1454
- FROM tasks
1455
- WHERE queue_name = :queue_name
1456
- AND created_at >= :start_dt
1457
- AND created_at < :end_dt
1458
- GROUP BY time_bucket
1459
- ORDER BY time_bucket
1460
- """)
1461
-
1462
- result = await session.execute(query, {
1463
- 'queue_name': queue_name,
1464
- 'start_dt': start_dt,
1465
- 'end_dt': end_dt
1466
- })
1467
- rows = result.mappings().all() # Use mappings() to get dict-like results
1468
-
1469
- # 构建时间轴数据
1470
- timeline = []
1471
- for row in rows:
1472
- timeline.append({
1473
- "time": row['time_bucket'].isoformat(),
1474
- "count": row['count'],
1475
- "completed_count": row['completed_count'],
1476
- "failed_count": row['failed_count'],
1477
- "avg_processing_time": float(row['avg_processing_time']) if row['avg_processing_time'] else 0
1478
- })
1479
-
1480
- # 填充缺失的时间点
1481
- filled_timeline = []
1482
- current_time = start_dt
1483
- timeline_dict = {item['time']: item for item in timeline}
1484
-
1485
- while current_time < end_dt:
1486
- time_key = current_time.isoformat()
1487
- if time_key in timeline_dict:
1488
- filled_timeline.append(timeline_dict[time_key])
1489
- else:
1490
- filled_timeline.append({
1491
- "time": time_key,
1492
- "count": 0,
1493
- "completed_count": 0,
1494
- "failed_count": 0,
1495
- "avg_processing_time": 0
1496
- })
1497
- current_time += timedelta(minutes=interval_minutes)
1498
-
1499
- return {
1500
- "timeline": filled_timeline,
1501
- "interval": interval,
1502
- "start_time": start_dt.isoformat(),
1503
- "end_time": end_dt.isoformat()
1504
- }
1505
-
1506
- except Exception as e:
1507
- logger.error(f"Error fetching timeline from PostgreSQL: {e}")
1508
- return {
1509
- "timeline": [],
1510
- "interval": interval,
1511
- "start_time": start_dt.isoformat(),
1512
- "end_time": end_dt.isoformat(),
1513
- "error": str(e)
1514
- }
1515
-
1516
- @app.get("/api/queues/timeline/pg")
1517
- async def get_queues_timeline_from_pg(
1518
- queues: str = Query(..., description="Comma-separated list of queue names"),
1519
- start_time: Optional[str] = None,
1520
- end_time: Optional[str] = None
1521
- ):
1522
- """从PostgreSQL获取多个队列的任务时间分布数据"""
1523
- # 解析队列列表
1524
- if not queues or queues.strip() == "":
1525
- # 如果没有提供队列,返回空结果
1526
- # 计算默认时间范围
1527
- end_dt = datetime.now(timezone.utc) if not end_time else parse_iso_datetime(end_time)
1528
- start_dt = (end_dt - timedelta(hours=1)) if not start_time else parse_iso_datetime(start_time)
1529
-
1530
- return {
1531
- "queues": [],
1532
- "start_time": start_dt.isoformat(),
1533
- "end_time": end_dt.isoformat(),
1534
- "interval": interval,
1535
- "message": "No queues selected"
1536
- }
1537
-
1538
- queue_list = [q.strip() for q in queues.split(',') if q.strip()][:10] # 最多10个队列
1539
-
1540
- # 如果没有提供时间范围,默认最近1小时
1541
- if not end_time:
1542
- end_dt = datetime.now(timezone.utc)
1543
- else:
1544
- end_dt = parse_iso_datetime(end_time)
1545
-
1546
- if not start_time:
1547
- start_dt = end_dt - timedelta(hours=1)
1548
- else:
1549
- start_dt = parse_iso_datetime(start_time)
1550
-
1551
- logger.info(f'{start_dt=} {end_dt=}')
1552
-
1553
- # 根据时间范围自动计算合适的时间间隔
1554
- duration = (end_dt - start_dt).total_seconds()
1555
-
1556
- # 动态计算时间间隔
1557
- if duration <= 300: # <= 5分钟
1558
- interval_seconds = 0.5 # 500毫秒
1559
- interval_type = 'millisecond'
1560
- interval = '500ms'
1561
- elif duration <= 900: # <= 15分钟
1562
- interval_seconds = 1 # 1秒
1563
- interval_type = 'second'
1564
- interval = '1s'
1565
- elif duration <= 1800: # <= 30分钟
1566
- interval_seconds = 2 # 2秒
1567
- interval_type = 'second'
1568
- interval = '2s'
1569
- elif duration <= 3600: # <= 1小时
1570
- interval_seconds = 30 # 30秒
1571
- interval_type = 'second'
1572
- interval = '30s'
1573
- elif duration <= 10800: # <= 3小时
1574
- interval_seconds = 300 # 5分钟
1575
- interval_type = 'minute'
1576
- interval = '5m'
1577
- elif duration <= 21600: # <= 6小时
1578
- interval_seconds = 600 # 10分钟
1579
- interval_type = 'minute'
1580
- interval = '10m'
1581
- elif duration <= 43200: # <= 12小时
1582
- interval_seconds = 1800 # 30分钟
1583
- interval_type = 'minute'
1584
- interval = '30m'
1585
- elif duration <= 86400: # <= 24小时
1586
- interval_seconds = 3600 # 1小时
1587
- interval_type = 'hour'
1588
- interval = '1h'
1589
- elif duration <= 172800: # <= 2天
1590
- interval_seconds = 7200 # 2小时
1591
- interval_type = 'hour'
1592
- interval = '2h'
1593
- elif duration <= 604800: # <= 7天
1594
- interval_seconds = 21600 # 6小时
1595
- interval_type = 'hour'
1596
- interval = '6h'
1597
- else: # > 7天
1598
- interval_seconds = 86400 # 1天
1599
- interval_type = 'hour'
1600
- interval = '24h'
1601
-
1602
- # 转换为分钟数(用于兼容旧代码)
1603
- interval_minutes = interval_seconds / 60
1604
-
1605
- logger.info(f"Time range: {duration}s, using interval: {interval} -> {interval_seconds} seconds, type: {interval_type}")
1606
-
1607
- # 获取数据库引擎
1608
- engine = await get_db_engine()
1609
- if not engine:
1610
- return {
1611
- "queues": [],
1612
- "start_time": start_dt.isoformat(),
1613
- "end_time": end_dt.isoformat(),
1614
- "interval": interval,
1615
- "error": "PostgreSQL connection not configured"
1616
- }
1617
-
1618
- result = []
1619
-
1620
- for queue_name in queue_list:
1621
- try:
1622
- async with AsyncSessionLocal() as session:
1623
- # 使用更简单直接的时间分组方法
1624
- if interval_type == 'millisecond':
1625
- # 对于毫秒级别的间隔
1626
- query = text(f"""
1627
- SELECT
1628
- DATE_TRUNC('second', created_at) +
1629
- INTERVAL '{interval_seconds} seconds' * FLOOR(EXTRACT(MILLISECONDS FROM created_at) / ({interval_seconds} * 1000)) as time_bucket,
1630
- COUNT(*) as count
1631
- FROM tasks
1632
- WHERE queue_name = :queue_name
1633
- AND created_at >= :start_dt
1634
- AND created_at < :end_dt
1635
- GROUP BY time_bucket
1636
- ORDER BY time_bucket
1637
- """)
1638
- elif interval_type == 'second':
1639
- # 对于秒级别的间隔
1640
- query = text(f"""
1641
- SELECT
1642
- DATE_TRUNC('minute', created_at) +
1643
- INTERVAL '{interval_seconds} seconds' * FLOOR(EXTRACT(SECOND FROM created_at) / {interval_seconds}) as time_bucket,
1644
- COUNT(*) as count
1645
- FROM tasks
1646
- WHERE queue_name = :queue_name
1647
- AND created_at >= :start_dt
1648
- AND created_at < :end_dt
1649
- GROUP BY time_bucket
1650
- ORDER BY time_bucket
1651
- """)
1652
- elif interval_type == 'minute' and interval_minutes < 60:
1653
- # 对于分钟级别的间隔(小于1小时)
1654
- query = text(f"""
1655
- SELECT
1656
- DATE_TRUNC('hour', created_at) +
1657
- INTERVAL '{interval_minutes} minutes' * FLOOR(EXTRACT(MINUTE FROM created_at) / {interval_minutes}) as time_bucket,
1658
- COUNT(*) as count
1659
- FROM tasks
1660
- WHERE queue_name = :queue_name
1661
- AND created_at >= :start_dt
1662
- AND created_at < :end_dt
1663
- GROUP BY time_bucket
1664
- ORDER BY time_bucket
1665
- """)
1666
- elif interval_minutes == 60:
1667
- # 对于1小时间隔,直接使用小时截断
1668
- query = text("""
1669
- SELECT
1670
- DATE_TRUNC('hour', created_at) as time_bucket,
1671
- COUNT(*) as count
1672
- FROM tasks
1673
- WHERE queue_name = :queue_name
1674
- AND created_at >= :start_dt
1675
- AND created_at < :end_dt
1676
- GROUP BY time_bucket
1677
- ORDER BY time_bucket
1678
- """)
1679
- else:
1680
- # 对于大于1小时的间隔,使用小时级别的计算
1681
- interval_hours = int(interval_minutes // 60)
1682
- query = text(f"""
1683
- SELECT
1684
- DATE_TRUNC('day', created_at) +
1685
- INTERVAL '{interval_hours} hours' * FLOOR(EXTRACT(HOUR FROM created_at) / {interval_hours}) as time_bucket,
1686
- COUNT(*) as count
1687
- FROM tasks
1688
- WHERE queue_name = :queue_name
1689
- AND created_at >= :start_dt
1690
- AND created_at < :end_dt
1691
- GROUP BY time_bucket
1692
- ORDER BY time_bucket
1693
- """)
1694
- params = {
1695
- 'queue_name': queue_name,
1696
- 'start_dt': start_dt,
1697
- 'end_dt': end_dt
1698
- }
1699
-
1700
- # 先绑定参数
1701
- bound_query = query.bindparams(**params)
1702
-
1703
- # 生成可直接执行的 SQL(带参数值)
1704
- compiled_sql = bound_query.compile(
1705
- dialect=postgresql.dialect(),
1706
- compile_kwargs={"literal_binds": True}
1707
- ).string
1708
- compiled_sql = compiled_sql.replace("%%", "%")
1709
-
1710
- print("可直接复制到 Navicat 执行的 SQL:\n", compiled_sql)
1711
-
1712
- # 再执行
1713
- result_obj = await session.execute(query, params)
1714
- rows = result_obj.mappings().all() # Use mappings() to get dict-like results
1715
- logger.info(f'{rows=}')
1716
- # 构建时间轴数据
1717
- timeline = []
1718
- for row in rows:
1719
- timeline.append({
1720
- "time": row['time_bucket'].isoformat(),
1721
- "count": row['count']
1722
- })
1723
-
1724
- # 填充缺失的时间点
1725
- filled_timeline = []
1726
-
1727
- # 构建一个时间到数据的映射,用于快速查找
1728
- # 由于可能存在时区或微小时间差异,我们需要更灵活的匹配
1729
- timeline_data = []
1730
- for item in timeline:
1731
- dt = datetime.fromisoformat(item['time'])
1732
- timeline_data.append((dt, item['count']))
1733
-
1734
- # 对timeline_data按时间排序
1735
- timeline_data.sort(key=lambda x: x[0])
1736
-
1737
- # 生成完整的时间序列
1738
- filled_timeline = []
1739
-
1740
- # 对齐到interval
1741
- def align_to_interval(dt, interval_seconds):
1742
- """对齐时间到interval_seconds的整数倍"""
1743
- if interval_seconds >= 3600: # 大于等于1小时
1744
- # 按小时对齐
1745
- dt = dt.replace(minute=0, second=0, microsecond=0)
1746
- interval_hours = interval_seconds // 3600
1747
- aligned_hour = (dt.hour // interval_hours) * interval_hours
1748
- return dt.replace(hour=aligned_hour)
1749
- elif interval_seconds >= 60: # 大于等于1分钟
1750
- # 按分钟对齐
1751
- dt = dt.replace(second=0, microsecond=0)
1752
- interval_minutes = interval_seconds // 60
1753
- total_minutes = dt.hour * 60 + dt.minute
1754
- aligned_total_minutes = (total_minutes // interval_minutes) * interval_minutes
1755
- aligned_hour = aligned_total_minutes // 60
1756
- aligned_minute = aligned_total_minutes % 60
1757
- return dt.replace(hour=aligned_hour, minute=aligned_minute)
1758
- elif interval_seconds >= 1: # 秒级别
1759
- # 按秒对齐
1760
- dt = dt.replace(microsecond=0)
1761
- aligned_second = int(dt.second // interval_seconds) * int(interval_seconds)
1762
- return dt.replace(second=aligned_second)
1763
- else: # 毫秒级别
1764
- # 按毫秒对齐
1765
- total_ms = dt.microsecond / 1000 # 转换为毫秒
1766
- interval_ms = interval_seconds * 1000
1767
- aligned_ms = int(total_ms // interval_ms) * interval_ms
1768
- aligned_microsecond = int(aligned_ms * 1000)
1769
- return dt.replace(microsecond=aligned_microsecond)
1770
-
1771
- current_time = align_to_interval(start_dt, interval_seconds)
1772
-
1773
- # 用于追踪我们在timeline_data中的位置
1774
- timeline_index = 0
1775
-
1776
- while current_time < end_dt:
1777
- # 查找是否有匹配的数据点
1778
- # 允许最多interval_seconds/2的误差
1779
- tolerance = timedelta(seconds=interval_seconds/2)
1780
- found = False
1781
-
1782
- # 从当前位置开始查找
1783
- while timeline_index < len(timeline_data):
1784
- data_time, count = timeline_data[timeline_index]
1785
-
1786
- # 计算时间差(秒)
1787
- time_diff = abs((data_time - current_time).total_seconds())
1788
-
1789
- if time_diff < interval_seconds / 2:
1790
- # 找到匹配的数据
1791
- filled_timeline.append({
1792
- "time": current_time.isoformat(),
1793
- "count": count
1794
- })
1795
- found = True
1796
- timeline_index += 1
1797
- break
1798
- elif data_time > current_time + tolerance:
1799
- # 数据时间已经超过当前时间太多,停止查找
1800
- break
1801
- else:
1802
- # 这个数据点太早了,继续查找下一个
1803
- timeline_index += 1
1804
-
1805
- if not found:
1806
- # 没有找到匹配的数据,填充0
1807
- filled_timeline.append({
1808
- "time": current_time.isoformat(),
1809
- "count": 0
1810
- })
1811
-
1812
- current_time += timedelta(seconds=interval_seconds)
1813
- result.append({
1814
- "queue": queue_name,
1815
- "timeline": {
1816
- "timeline": filled_timeline,
1817
- "interval": interval
1818
- }
1819
- })
1820
-
1821
- except Exception as e:
1822
- logger.error(f"Error fetching timeline for queue {queue_name}: {e}")
1823
- result.append({
1824
- "queue": queue_name,
1825
- "timeline": {
1826
- "timeline": [],
1827
- "interval": interval,
1828
- "error": str(e)
1829
- }
1830
- })
1831
- # logger.info(f'{result=}')
1832
- return {
1833
- "queues": result,
1834
- "start_time": start_dt.isoformat(),
1835
- "end_time": end_dt.isoformat(),
1836
- "interval": interval
1837
- }
1838
-
1839
- @app.get("/api/queue/{queue_name}/timeline")
1840
- async def get_queue_timeline(
1841
- queue_name: str,
1842
- interval: str = "1m",
1843
- duration: str = "1h",
1844
- start_time: Optional[str] = None,
1845
- end_time: Optional[str] = None,
1846
- context: str = "detail" # 'overview' for homepage, 'detail' for queue detail page
1847
- ):
1848
- """获取队列任务的时间分布(用于时间轴)"""
1849
- try:
1850
- # 解析时间间隔和持续时间
1851
- interval_seconds = parse_time_duration(interval)
1852
-
1853
- # 根据上下文设置不同的数据限制
1854
- if context == "overview":
1855
- # 首页概览:固定获取最近1小时的所有数据
1856
- duration_seconds = 3600 # 1小时
1857
- now = int(datetime.now(timezone.utc).timestamp() * 1000)
1858
- start = now - duration_seconds * 1000
1859
- min_id = f"{start}-0"
1860
- max_id = "+"
1861
- max_count = 100000 # 首页概览获取所有数据
1862
- else:
1863
- # 队列详情页:根据参数获取,但限制最多10000条
1864
- if start_time and end_time:
1865
- # 使用提供的时间范围
1866
- min_id = start_time
1867
- max_id = end_time if end_time != '+' else '+'
1868
- else:
1869
- # 使用duration参数计算时间范围
1870
- duration_seconds = parse_time_duration(duration)
1871
- now = int(datetime.now(timezone.utc).timestamp() * 1000)
1872
- start = now - duration_seconds * 1000
1873
- min_id = f"{start}-0"
1874
- max_id = "+"
1875
- max_count = 10000 # 详情页限制10000条
1876
-
1877
- # 获取指定时间范围内的消息
1878
- prefixed_queue_name = monitor.get_prefixed_queue_name(queue_name)
1879
- print(f'{prefixed_queue_name=} {min_id=} {max_id=} {max_count=}')
1880
- messages = await monitor.redis.xrange(
1881
- prefixed_queue_name,
1882
- min=min_id,
1883
- max=max_id,
1884
- count=max_count
1885
- )
1886
-
1887
- # 按时间间隔统计任务数量
1888
- buckets = {}
1889
- bucket_size = interval_seconds * 1000 # 转换为毫秒
1890
-
1891
- # 计算实际的时间范围用于生成时间轴
1892
- if start_time and end_time:
1893
- # 从参数中解析时间范围
1894
- if start_time != '-':
1895
- actual_start = int(start_time.split('-')[0])
1896
- else:
1897
- actual_start = int(datetime.now(timezone.utc).timestamp() * 1000) - 86400000 # 默认24小时前
1898
-
1899
- if end_time != '+':
1900
- actual_end = int(end_time.split('-')[0])
1901
- else:
1902
- actual_end = int(datetime.now(timezone.utc).timestamp() * 1000)
1903
- else:
1904
- # 使用duration参数计算的时间范围
1905
- actual_start = start
1906
- actual_end = now
1907
-
1908
- for msg_id, _ in messages:
1909
- # 从消息ID提取时间戳
1910
- timestamp = int(msg_id.split('-')[0])
1911
- bucket_key = (timestamp // bucket_size) * bucket_size
1912
- buckets[bucket_key] = buckets.get(bucket_key, 0) + 1
1913
-
1914
- # 转换为时间序列数据
1915
- timeline_data = []
1916
- current_bucket = (actual_start // bucket_size) * bucket_size
1917
-
1918
- while current_bucket <= actual_end:
1919
- timeline_data.append({
1920
- "timestamp": current_bucket,
1921
- "count": buckets.get(current_bucket, 0)
1922
- })
1923
- current_bucket += bucket_size
1924
-
1925
- # 计算实际任务总数
1926
- total_tasks = len(messages)
1927
-
1928
- # 检查是否达到数据限制
1929
- has_more = False
1930
- if context == "detail" and total_tasks >= max_count:
1931
- has_more = True
1932
-
1933
- return {
1934
- "timeline": timeline_data,
1935
- "interval": interval,
1936
- "duration": duration,
1937
- "start": actual_start,
1938
- "end": actual_end,
1939
- "total_tasks": total_tasks, # 添加实际任务总数
1940
- "message_count": len(messages), # 实际获取到的消息数量
1941
- "has_more": has_more, # 是否还有更多数据
1942
- "limit": max_count if context == "detail" else None # 数据限制
1943
- }
1944
-
1945
- except Exception as e:
1946
- print(f"Error getting timeline for queue {queue_name}: {e}")
1947
- return {
1948
- "timeline": [],
1949
- "error": str(e)
1950
- }
1951
-
1952
- def parse_time_duration(duration_str: str) -> int:
1953
- """解析时间字符串为秒数 (如 '1h', '10m', '30s')"""
1954
- units = {
1955
- 's': 1,
1956
- 'm': 60,
1957
- 'h': 3600,
1958
- 'd': 86400
1959
- }
1960
-
1961
- if duration_str[-1] in units:
1962
- value = int(duration_str[:-1])
1963
- unit = duration_str[-1]
1964
- return value * units[unit]
1965
-
1966
- # 默认为秒
1967
- return int(duration_str)
1968
-
1969
- @app.get("/api/task/{event_id}/result")
1970
- async def get_task_result(event_id: str):
1971
- """获取单个任务的结果"""
1972
- result_key = f"{monitor.redis_prefix}:RESULT:{event_id}"
1973
- result = await monitor.redis.get(result_key)
1974
- return {"event_id": event_id, "result": result}
1975
-
1976
- @app.get("/api/queues")
1977
- async def get_queues():
1978
- """获取所有队列"""
1979
- queues = await monitor.get_all_queues()
1980
- return {"queues": queues}
1981
-
1982
- @app.get("/api/queue/{queue_name}/stats")
1983
- async def get_queue_stats(queue_name: str):
1984
- """获取队列统计信息"""
1985
- try:
1986
- stats = await monitor.get_queue_stats(queue_name)
1987
- return stats
1988
- except Exception as e:
1989
- return {"error": str(e)}
1990
-
1991
- @app.get("/api/queue/{queue_name}/workers")
1992
- async def get_queue_workers(queue_name: str):
1993
- """获取队列的Worker信息"""
1994
- workers = await monitor.get_worker_heartbeats(queue_name)
1995
- return {"queue": queue_name, "workers": workers}
1996
-
1997
- @app.get("/api/queue/{queue_name}/worker-summary")
1998
- async def get_queue_worker_summary(queue_name: str):
1999
- """获取队列的Worker汇总统计信息"""
2000
- summary = await monitor.get_queue_worker_summary(queue_name)
2001
- return {"queue": queue_name, "summary": summary}
2002
-
2003
- @app.get("/api/workers/offline-history")
2004
- async def get_workers_offline_history(
2005
- limit: int = 100,
2006
- start_time: Optional[float] = None,
2007
- end_time: Optional[float] = None
2008
- ):
2009
- """获取所有worker的下线历史记录"""
2010
- history = await monitor.get_worker_offline_history(limit, start_time, end_time)
2011
- return {"history": history, "total": len(history)}
2012
-
2013
- @app.get("/api/global-stats")
2014
- async def get_global_stats():
2015
- """获取全局统计信息(包含历史记录)"""
2016
- stats = await monitor.get_global_stats_with_history()
2017
- return stats
2018
-
2019
- @app.get("/api/global-stats/light")
2020
- async def get_global_stats_light():
2021
- """获取轻量级全局统计信息(不包含历史记录)"""
2022
- try:
2023
- # 获取所有队列
2024
- queues = await monitor.get_all_queues()
2025
-
2026
- # 并行获取所有队列的快速汇总和简单统计
2027
- summaries_task = asyncio.gather(
2028
- *[monitor.get_queue_worker_summary_fast(queue) for queue in queues],
2029
- return_exceptions=True
2030
- )
2031
-
2032
- # 获取基础队列信息(不获取完整stats以提高性能)
2033
- queue_lengths_task = asyncio.gather(
2034
- *[monitor.redis.xlen(monitor.get_prefixed_queue_name(queue)) for queue in queues],
2035
- return_exceptions=True
2036
- )
2037
-
2038
- summaries, queue_lengths = await asyncio.gather(
2039
- summaries_task, queue_lengths_task
2040
- )
2041
-
2042
- # 汇总数据
2043
- total_workers = 0
2044
- online_workers = 0
2045
- total_running_tasks = 0
2046
- total_messages = 0
2047
- total_consumers = 0
2048
-
2049
- for summary in summaries:
2050
- if not isinstance(summary, Exception):
2051
- total_workers += summary.get('total_workers', 0)
2052
- online_workers += summary.get('online_workers', 0)
2053
- total_running_tasks += summary.get('total_running_tasks', 0)
2054
- total_consumers += summary.get('total_workers', 0) # 近似使用worker数作为消费者数
2055
-
2056
- # 汇总消息数
2057
- for length in queue_lengths:
2058
- if not isinstance(length, Exception):
2059
- total_messages += length
2060
-
2061
- return {
2062
- 'total_queues': len(queues),
2063
- 'total_workers': total_workers,
2064
- 'online_workers': online_workers,
2065
- 'total_running_tasks': total_running_tasks,
2066
- 'messages': total_messages,
2067
- 'consumers': total_consumers,
2068
- 'timestamp': datetime.now(timezone.utc).isoformat()
2069
- }
2070
- except Exception as e:
2071
- return {
2072
- 'error': str(e),
2073
- 'total_queues': 0,
2074
- 'total_workers': 0,
2075
- 'online_workers': 0,
2076
- 'total_running_tasks': 0,
2077
- 'messages': 0,
2078
- 'consumers': 0
2079
- }
2080
-
2081
-
2082
- @app.get("/api/queue/{queue_name}/workers/offline-history")
2083
- async def get_queue_workers_offline_history(
2084
- queue_name: str,
2085
- limit: int = 100,
2086
- start_time: Optional[float] = None,
2087
- end_time: Optional[float] = None
2088
- ):
2089
- """获取指定队列的worker下线历史记录"""
2090
- # 获取所有历史记录,然后过滤出该队列的
2091
- all_history = await monitor.get_worker_offline_history(limit * 10, start_time, end_time)
2092
- queue_history = [
2093
- record for record in all_history
2094
- if queue_name in record.get('queues', '').split(',')
2095
- ][:limit]
2096
- return {"queue": queue_name, "history": queue_history, "total": len(queue_history)}
2097
-
2098
- @app.websocket("/ws")
2099
- async def websocket_endpoint(websocket: WebSocket):
2100
- """WebSocket端点,用于实时更新(优化版)"""
2101
- await websocket.accept()
2102
-
2103
- try:
2104
- # 标记是否是首次连接
2105
- is_first_load = True
2106
-
2107
- while True:
2108
- try:
2109
- # 检查WebSocket连接状态
2110
- if websocket.client_state != WebSocketState.CONNECTED:
2111
- break
2112
-
2113
- # 首次连接时发送基础信息
2114
- if is_first_load:
2115
- # 只发送队列列表,不发送任务信息
2116
- data = {
2117
- "queues": await monitor.get_all_queues(),
2118
- "timestamp": datetime.now(timezone.utc).isoformat(),
2119
- "initial_load": True
2120
- }
2121
- await websocket.send_json(data)
2122
- is_first_load = False
2123
- else:
2124
- # 后续更新:并行获取所有worker信息
2125
- queues = await monitor.get_all_queues()
2126
-
2127
- # 使用 asyncio.gather 并行获取所有队列的worker信息
2128
- worker_tasks = [monitor.get_worker_heartbeats(queue) for queue in queues]
2129
- worker_results = await asyncio.gather(*worker_tasks, return_exceptions=True)
2130
-
2131
- # 构建队列worker映射
2132
- queue_workers = {}
2133
- for i, queue in enumerate(queues):
2134
- if isinstance(worker_results[i], Exception):
2135
- print(f"Error getting workers for queue {queue}: {worker_results[i]}")
2136
- queue_workers[queue] = []
2137
- else:
2138
- queue_workers[queue] = worker_results[i]
2139
-
2140
- data = {
2141
- "queues": queues,
2142
- "workers": queue_workers,
2143
- "timestamp": datetime.now(timezone.utc).isoformat(),
2144
- "initial_load": False
2145
- }
2146
-
2147
- # 再次检查连接状态后发送
2148
- if websocket.client_state == WebSocketState.CONNECTED:
2149
- await websocket.send_json(data)
2150
- else:
2151
- break
2152
-
2153
- await asyncio.sleep(2) # 每2秒更新一次
2154
-
2155
- except WebSocketDisconnect:
2156
- # WebSocket已断开,退出循环
2157
- break
2158
- except Exception as e:
2159
- # 检查是否是因为连接已关闭导致的错误
2160
- if "close message has been sent" in str(e) or "WebSocket is not connected" in str(e):
2161
- break
2162
- print(f"Error in websocket loop: {e}")
2163
- # 对于其他错误,等待一段时间后继续
2164
- await asyncio.sleep(5)
2165
-
2166
- except WebSocketDisconnect:
2167
- pass
2168
- except Exception as e:
2169
- print(f"WebSocket error: {e}")
2170
- finally:
2171
- # 确保WebSocket正确关闭
2172
- try:
2173
- await websocket.close()
2174
- except:
2175
- pass
2176
-
2177
- # 挂载静态文件
2178
- static_dir = Path(__file__).parent / "static"
2179
- static_dir.mkdir(exist_ok=True)
2180
-
2181
- # 挂载静态文件目录
2182
- app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
2183
-
2184
- @app.get("/")
2185
- async def read_index():
2186
- """返回主页HTML"""
2187
- html_path = static_dir / "index.html"
2188
- if html_path.exists():
2189
- return HTMLResponse(content=html_path.read_text())
2190
- return HTMLResponse(content="<h1>Jettask Monitor</h1><p>Static files not found</p>")
2191
-
2192
- @app.get("/queue.html")
2193
- async def read_queue():
2194
- """返回队列详情页HTML"""
2195
- html_path = static_dir / "queue.html"
2196
- if html_path.exists():
2197
- return HTMLResponse(content=html_path.read_text())
2198
- return HTMLResponse(content="<h1>Queue Details</h1><p>Page not found</p>")
2199
-
2200
- @app.get("/queues.html")
2201
- async def read_queues():
2202
- """返回队列列表页HTML"""
2203
- html_path = static_dir / "queues.html"
2204
- if html_path.exists():
2205
- return HTMLResponse(content=html_path.read_text())
2206
- return HTMLResponse(content="<h1>Queues</h1><p>Page not found</p>")
2207
-
2208
- @app.get("/workers.html")
2209
- async def read_workers():
2210
- """返回Workers页HTML"""
2211
- html_path = static_dir / "workers.html"
2212
- if html_path.exists():
2213
- return HTMLResponse(content=html_path.read_text())
2214
- return HTMLResponse(content="<h1>Workers</h1><p>Page not found</p>")
2215
-
2216
-
2217
- # PostgreSQL相关的API端点
2218
- @app.get("/api/pg/tasks")
2219
- async def get_pg_tasks(
2220
- status: Optional[str] = None,
2221
- queue_name: Optional[str] = None,
2222
- limit: int = 100,
2223
- offset: int = 0
2224
- ):
2225
- """从PostgreSQL获取任务列表"""
2226
- engine = await get_db_engine()
2227
- if not engine:
2228
- return {"error": "PostgreSQL not configured"}
2229
-
2230
- try:
2231
- async with AsyncSessionLocal() as session:
2232
- # 构建查询
2233
- query = select(Task)
2234
-
2235
- if status:
2236
- query = query.where(Task.status == status)
2237
-
2238
- if queue_name:
2239
- query = query.where(Task.queue_name == queue_name)
2240
-
2241
- query = query.order_by(Task.created_at.desc())
2242
- query = query.limit(limit).offset(offset)
2243
-
2244
- result = await session.execute(query)
2245
- tasks_obj = result.scalars().all()
2246
-
2247
- tasks = []
2248
- for task_obj in tasks_obj:
2249
- task = {
2250
- 'id': task_obj.id,
2251
- 'queue_name': task_obj.queue_name,
2252
- 'task_name': task_obj.task_name,
2253
- 'task_data': task_obj.task_data,
2254
- 'priority': task_obj.priority,
2255
- 'retry_count': task_obj.retry_count,
2256
- 'max_retry': task_obj.max_retry,
2257
- 'status': task_obj.status,
2258
- 'result': task_obj.result,
2259
- 'error_message': task_obj.error_message,
2260
- 'created_at': task_obj.created_at,
2261
- 'started_at': task_obj.started_at,
2262
- 'completed_at': task_obj.completed_at,
2263
- 'worker_id': task_obj.worker_id,
2264
- 'execution_time': task_obj.execution_time,
2265
- 'duration': task_obj.duration,
2266
- 'metadata': task_obj.task_metadata,
2267
- 'next_sync_time': task_obj.next_sync_time,
2268
- 'sync_check_count': task_obj.sync_check_count
2269
- }
2270
- # 转换时间戳为ISO格式(确保是 UTC)
2271
- for field in ['created_at', 'started_at', 'completed_at']:
2272
- if task.get(field):
2273
- # PostgreSQL 的 TIMESTAMP WITH TIME ZONE 会返回 aware datetime
2274
- if task[field].tzinfo is None:
2275
- # 如果没有时区信息,假定为 UTC
2276
- task[field] = task[field].replace(tzinfo=timezone.utc)
2277
- task[field] = task[field].isoformat()
2278
- # 解析JSON字段
2279
- for field in ['task_data', 'result', 'metadata']:
2280
- if task.get(field) and isinstance(task[field], str):
2281
- try:
2282
- task[field] = json.loads(task[field])
2283
- except:
2284
- pass
2285
- tasks.append(task)
2286
-
2287
- return {"tasks": tasks, "total": len(tasks)}
2288
-
2289
- except Exception as e:
2290
- logging.error(f"Error fetching tasks from PostgreSQL: {e}")
2291
- return {"error": str(e)}
2292
-
2293
-
2294
- @app.get("/api/pg/stats")
2295
- async def get_pg_stats():
2296
- """获取PostgreSQL中的统计信息"""
2297
- engine = await get_db_engine()
2298
- if not engine:
2299
- return {"error": "PostgreSQL not configured"}
2300
-
2301
- try:
2302
- async with AsyncSessionLocal() as session:
2303
- # 获取任务统计
2304
- task_stats_query = text("""
2305
- SELECT
2306
- COUNT(*) as total_tasks,
2307
- COUNT(CASE WHEN status = 'pending' THEN 1 END) as pending_tasks,
2308
- COUNT(CASE WHEN status = 'running' THEN 1 END) as running_tasks,
2309
- COUNT(CASE WHEN status = 'success' THEN 1 END) as completed_tasks,
2310
- COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_tasks
2311
- FROM tasks
2312
- """)
2313
-
2314
- task_stats_result = await session.execute(task_stats_query)
2315
- task_stats = task_stats_result.mappings().fetchone()
2316
-
2317
- # 获取队列统计
2318
- queue_stats_query = text("""
2319
- SELECT
2320
- queue_name,
2321
- COUNT(*) as total_tasks,
2322
- COUNT(CASE WHEN status = 'pending' THEN 1 END) as pending_tasks,
2323
- COUNT(CASE WHEN status = 'running' THEN 1 END) as running_tasks,
2324
- COUNT(CASE WHEN status = 'success' THEN 1 END) as completed_tasks,
2325
- COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_tasks
2326
- FROM tasks
2327
- GROUP BY queue_name
2328
- ORDER BY total_tasks DESC
2329
- """)
2330
-
2331
- queue_stats_result = await session.execute(queue_stats_query)
2332
- queue_stats = queue_stats_result.mappings().all()
2333
-
2334
- return {
2335
- "task_stats": dict(task_stats) if task_stats else {},
2336
- "queue_stats": [dict(row) for row in queue_stats]
2337
- }
2338
-
2339
- except Exception as e:
2340
- logging.error(f"Error fetching stats from PostgreSQL: {e}")
2341
- return {"error": str(e)}
2342
-
2343
-
2344
- @app.get("/api/pg/task/{task_id}")
2345
- async def get_pg_task(task_id: str):
2346
- """从PostgreSQL获取单个任务的详细信息"""
2347
- engine = await get_db_engine()
2348
- if not engine:
2349
- return {"error": "PostgreSQL not configured"}
2350
-
2351
- try:
2352
- async with AsyncSessionLocal() as session:
2353
- result = await session.execute(select(Task).where(Task.id == task_id))
2354
- task_obj = result.scalar_one_or_none()
2355
-
2356
- if not task_obj:
2357
- return {"error": "Task not found"}
2358
-
2359
- task = {
2360
- 'id': task_obj.id,
2361
- 'queue_name': task_obj.queue_name,
2362
- 'task_name': task_obj.task_name,
2363
- 'task_data': task_obj.task_data,
2364
- 'priority': task_obj.priority,
2365
- 'retry_count': task_obj.retry_count,
2366
- 'max_retry': task_obj.max_retry,
2367
- 'status': task_obj.status,
2368
- 'result': task_obj.result,
2369
- 'error_message': task_obj.error_message,
2370
- 'created_at': task_obj.created_at,
2371
- 'started_at': task_obj.started_at,
2372
- 'completed_at': task_obj.completed_at,
2373
- 'worker_id': task_obj.worker_id,
2374
- 'execution_time': task_obj.execution_time,
2375
- 'duration': task_obj.duration,
2376
- 'metadata': task_obj.task_metadata,
2377
- 'next_sync_time': task_obj.next_sync_time,
2378
- 'sync_check_count': task_obj.sync_check_count
2379
- }
2380
- # 转换时间戳为ISO格式(确保是 UTC)
2381
- for field in ['created_at', 'started_at', 'completed_at']:
2382
- if task.get(field):
2383
- # PostgreSQL 的 TIMESTAMP WITH TIME ZONE 会返回 aware datetime
2384
- if task[field].tzinfo is None:
2385
- # 如果没有时区信息,假定为 UTC
2386
- task[field] = task[field].replace(tzinfo=timezone.utc)
2387
- task[field] = task[field].isoformat()
2388
- # 解析JSON字段
2389
- for field in ['task_data', 'result', 'metadata']:
2390
- if task.get(field) and isinstance(task[field], str):
2391
- try:
2392
- task[field] = json.loads(task[field])
2393
- except:
2394
- pass
2395
-
2396
- return {"task": task}
2397
-
2398
- except Exception as e:
2399
- logging.error(f"Error fetching task from PostgreSQL: {e}")
2400
- return {"error": str(e)}
95
+ # ============ WebSocket 实时推送 ============
96
+ # (parse_time_duration 已移除,因为 PG 时间轴路由已迁移)
97
+
98
+ # ============ 已迁移路由 ============
99
+ # 以下路由已迁移到模块化的 API 路由:
100
+ # - GET /api/queues → api/queues.py
101
+ # - GET /api/queue/{queue_name}/stats → api/queues.py
102
+ # - GET /api/queue/{queue_name}/workers → api/workers.py
103
+ # - GET /api/queue/{queue_name}/worker-summary → api/workers.py
104
+ # - GET /api/workers/offline-history → api/workers.py
105
+ # - GET /api/global-stats api/overview.py
106
+ # - GET /api/global-stats/light → api/overview.py
107
+ # ====================================
108
+
109
+ # GET /api/queue/{queue_name}/workers/offline-history → 已迁移到 api/workers.py
110
+
111
+
112
+ # ============ PostgreSQL 路由已迁移 ============
113
+ # 以下路由已迁移到 api/analytics.py:
114
+ # - GET /api/pg/tasks → GET /api/v1/analytics/pg/tasks
115
+ # - GET /api/pg/stats → GET /api/v1/analytics/pg/stats
116
+ # - GET /api/pg/task/{task_id} → GET /api/v1/analytics/pg/task/{task_id}
117
+ # ==============================================
2401
118
 
2402
119
  if __name__ == "__main__":
2403
120
  # 配置日志