jettask 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. jettask/constants.py +213 -0
  2. jettask/core/app.py +525 -205
  3. jettask/core/cli.py +193 -185
  4. jettask/core/consumer_manager.py +126 -34
  5. jettask/core/context.py +3 -0
  6. jettask/core/enums.py +137 -0
  7. jettask/core/event_pool.py +501 -168
  8. jettask/core/message.py +147 -0
  9. jettask/core/offline_worker_recovery.py +181 -114
  10. jettask/core/task.py +10 -174
  11. jettask/core/task_batch.py +153 -0
  12. jettask/core/unified_manager_base.py +243 -0
  13. jettask/core/worker_scanner.py +54 -54
  14. jettask/executors/asyncio.py +184 -64
  15. jettask/webui/backend/config.py +51 -0
  16. jettask/webui/backend/data_access.py +2083 -92
  17. jettask/webui/backend/data_api.py +3294 -0
  18. jettask/webui/backend/dependencies.py +261 -0
  19. jettask/webui/backend/init_meta_db.py +158 -0
  20. jettask/webui/backend/main.py +1358 -69
  21. jettask/webui/backend/main_unified.py +78 -0
  22. jettask/webui/backend/main_v2.py +394 -0
  23. jettask/webui/backend/namespace_api.py +295 -0
  24. jettask/webui/backend/namespace_api_old.py +294 -0
  25. jettask/webui/backend/namespace_data_access.py +611 -0
  26. jettask/webui/backend/queue_backlog_api.py +727 -0
  27. jettask/webui/backend/queue_stats_v2.py +521 -0
  28. jettask/webui/backend/redis_monitor_api.py +476 -0
  29. jettask/webui/backend/unified_api_router.py +1601 -0
  30. jettask/webui/db_init.py +204 -32
  31. jettask/webui/frontend/package-lock.json +492 -1
  32. jettask/webui/frontend/package.json +4 -1
  33. jettask/webui/frontend/src/App.css +105 -7
  34. jettask/webui/frontend/src/App.jsx +49 -20
  35. jettask/webui/frontend/src/components/NamespaceSelector.jsx +166 -0
  36. jettask/webui/frontend/src/components/QueueBacklogChart.jsx +298 -0
  37. jettask/webui/frontend/src/components/QueueBacklogTrend.jsx +638 -0
  38. jettask/webui/frontend/src/components/QueueDetailsTable.css +65 -0
  39. jettask/webui/frontend/src/components/QueueDetailsTable.jsx +487 -0
  40. jettask/webui/frontend/src/components/QueueDetailsTableV2.jsx +465 -0
  41. jettask/webui/frontend/src/components/ScheduledTaskFilter.jsx +423 -0
  42. jettask/webui/frontend/src/components/TaskFilter.jsx +425 -0
  43. jettask/webui/frontend/src/components/TimeRangeSelector.css +21 -0
  44. jettask/webui/frontend/src/components/TimeRangeSelector.jsx +160 -0
  45. jettask/webui/frontend/src/components/layout/AppLayout.css +95 -0
  46. jettask/webui/frontend/src/components/layout/AppLayout.jsx +49 -0
  47. jettask/webui/frontend/src/components/layout/Header.css +34 -10
  48. jettask/webui/frontend/src/components/layout/Header.jsx +31 -23
  49. jettask/webui/frontend/src/components/layout/SideMenu.css +137 -0
  50. jettask/webui/frontend/src/components/layout/SideMenu.jsx +209 -0
  51. jettask/webui/frontend/src/components/layout/TabsNav.css +244 -0
  52. jettask/webui/frontend/src/components/layout/TabsNav.jsx +206 -0
  53. jettask/webui/frontend/src/components/layout/UserInfo.css +197 -0
  54. jettask/webui/frontend/src/components/layout/UserInfo.jsx +197 -0
  55. jettask/webui/frontend/src/contexts/NamespaceContext.jsx +72 -0
  56. jettask/webui/frontend/src/contexts/TabsContext.backup.jsx +245 -0
  57. jettask/webui/frontend/src/main.jsx +1 -0
  58. jettask/webui/frontend/src/pages/Alerts.jsx +684 -0
  59. jettask/webui/frontend/src/pages/Dashboard.jsx +1330 -0
  60. jettask/webui/frontend/src/pages/QueueDetail.jsx +1109 -10
  61. jettask/webui/frontend/src/pages/QueueMonitor.jsx +236 -115
  62. jettask/webui/frontend/src/pages/Queues.jsx +5 -1
  63. jettask/webui/frontend/src/pages/ScheduledTasks.jsx +809 -0
  64. jettask/webui/frontend/src/pages/Settings.jsx +800 -0
  65. jettask/webui/frontend/src/services/api.js +7 -5
  66. jettask/webui/frontend/src/utils/suppressWarnings.js +22 -0
  67. jettask/webui/frontend/src/utils/userPreferences.js +154 -0
  68. jettask/webui/multi_namespace_consumer.py +543 -0
  69. jettask/webui/pg_consumer.py +983 -246
  70. jettask/webui/static/dist/assets/index-7129cfe1.css +1 -0
  71. jettask/webui/static/dist/assets/index-8d1935cc.js +774 -0
  72. jettask/webui/static/dist/index.html +2 -2
  73. jettask/webui/task_center.py +216 -0
  74. jettask/webui/task_center_client.py +150 -0
  75. jettask/webui/unified_consumer_manager.py +193 -0
  76. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/METADATA +1 -1
  77. jettask-0.2.4.dist-info/RECORD +134 -0
  78. jettask/webui/pg_consumer_slow.py +0 -1099
  79. jettask/webui/pg_consumer_test.py +0 -678
  80. jettask/webui/static/dist/assets/index-823408e8.css +0 -1
  81. jettask/webui/static/dist/assets/index-9968b0b8.js +0 -543
  82. jettask/webui/test_pg_consumer_recovery.py +0 -547
  83. jettask/webui/test_recovery_simple.py +0 -492
  84. jettask/webui/test_self_recovery.py +0 -467
  85. jettask-0.2.1.dist-info/RECORD +0 -91
  86. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/WHEEL +0 -0
  87. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/entry_points.txt +0 -0
  88. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/licenses/LICENSE +0 -0
  89. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1601 @@
1
+ """
2
+ 统一的API路由文件
3
+ 将所有分散的API接口整合到一个文件中,方便维护和管理
4
+ """
5
+
6
+ from fastapi import APIRouter, HTTPException, Query, Request, Depends
7
+ from typing import List, Dict, Optional, Any, Union
8
+ from pydantic import BaseModel, Field
9
+ from datetime import datetime, timedelta, timezone
10
+ import logging
11
+ import time
12
+ import json
13
+ import asyncio
14
+ import psutil
15
+ from sqlalchemy import text
16
+ from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
17
+ from sqlalchemy.orm import sessionmaker
18
+ import traceback
19
+
20
+ # 导入本地模块
21
+ try:
22
+ from namespace_data_access import get_namespace_data_access
23
+ from config import task_center_config
24
+ except ImportError:
25
+ # 如果相对导入失败,尝试绝对导入
26
+ from jettask.webui.backend.namespace_data_access import get_namespace_data_access
27
+ from jettask.webui.backend.config import task_center_config
28
+
29
+ # 设置日志
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # 创建统一的路由器
33
+ router = APIRouter(prefix="/api", tags=["API"])
34
+
35
+ # ==================== 数据模型定义 ====================
36
+
37
+ class TimeRangeQuery(BaseModel):
38
+ """时间范围查询参数"""
39
+ namespace: str = "default"
40
+ queue_name: Optional[str] = None
41
+ time_range: str = "1h"
42
+ granularity: Optional[str] = None
43
+
44
+ class QueueStatsResponse(BaseModel):
45
+ """队列统计响应"""
46
+ queue_name: str
47
+ pending: int
48
+ running: int
49
+ completed: int
50
+ failed: int
51
+
52
+ class TaskDetailResponse(BaseModel):
53
+ """任务详情响应"""
54
+ task_id: str
55
+ status: str
56
+ created_at: datetime
57
+ started_at: Optional[datetime]
58
+ completed_at: Optional[datetime]
59
+ error_message: Optional[str]
60
+
61
+ class DashboardOverviewRequest(BaseModel):
62
+ """仪表板概览请求参数"""
63
+ time_range: str = "1h"
64
+ queues: Optional[List[str]] = None
65
+
66
+ class ScheduledTaskCreate(BaseModel):
67
+ """创建定时任务的请求模型"""
68
+ task_name: str
69
+ queue_name: str
70
+ task_data: dict
71
+ cron_expression: Optional[str] = None
72
+ interval_seconds: Optional[int] = None
73
+ enabled: bool = True
74
+
75
+ class ScheduledTaskUpdate(BaseModel):
76
+ """更新定时任务的请求模型"""
77
+ task_name: Optional[str] = None
78
+ queue_name: Optional[str] = None
79
+ task_data: Optional[dict] = None
80
+ cron_expression: Optional[str] = None
81
+ interval_seconds: Optional[int] = None
82
+ enabled: Optional[bool] = None
83
+
84
+ class AlertRuleCreate(BaseModel):
85
+ """创建告警规则的请求模型"""
86
+ name: str
87
+ metric: str
88
+ threshold: float
89
+ condition: str # gt, lt, eq
90
+ duration: int # 持续时间(秒)
91
+ notification_channels: List[str]
92
+ enabled: bool = True
93
+
94
+ class NamespaceCreate(BaseModel):
95
+ """创建命名空间的请求模型"""
96
+ name: str
97
+ redis_url: str
98
+ pg_url: Optional[str] = None
99
+ description: Optional[str] = None
100
+
101
+ class NamespaceUpdate(BaseModel):
102
+ """更新命名空间的请求模型"""
103
+ redis_url: Optional[str] = None
104
+ pg_url: Optional[str] = None
105
+ description: Optional[str] = None
106
+
107
+ # ==================== 辅助函数 ====================
108
+
109
+ def parse_time_range(time_range: str) -> timedelta:
110
+ """解析时间范围字符串"""
111
+ units = {
112
+ 'm': 'minutes',
113
+ 'h': 'hours',
114
+ 'd': 'days',
115
+ 'w': 'weeks'
116
+ }
117
+
118
+ if not time_range or len(time_range) < 2:
119
+ return timedelta(hours=1)
120
+
121
+ try:
122
+ value = int(time_range[:-1])
123
+ unit = time_range[-1].lower()
124
+
125
+ if unit in units:
126
+ return timedelta(**{units[unit]: value})
127
+ else:
128
+ return timedelta(hours=1)
129
+ except (ValueError, KeyError):
130
+ return timedelta(hours=1)
131
+
132
+ def get_base_queue_name(queue_name: str) -> str:
133
+ """获取基础队列名(去除priority后缀)"""
134
+ if '_priority_' in queue_name:
135
+ return queue_name.split('_priority_')[0]
136
+ return queue_name
137
+
138
+ # ==================== Dashboard 相关接口 ====================
139
+
140
+ @router.get("/data/dashboard-stats/{namespace}")
141
+ async def get_dashboard_stats(
142
+ namespace: str,
143
+ time_range: str = "24h",
144
+ queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
145
+ ):
146
+ """
147
+ 获取仪表板统计数据(任务总数、成功数、失败数、成功率、吞吐量等)
148
+ """
149
+ try:
150
+ data_access = get_namespace_data_access()
151
+ conn = await data_access.manager.get_connection(namespace)
152
+
153
+ # 如果没有PostgreSQL配置,返回空数据
154
+ if not conn.pg_config:
155
+ return {
156
+ "success": True,
157
+ "data": {
158
+ "total_tasks": 0,
159
+ "completed_tasks": 0,
160
+ "failed_tasks": 0,
161
+ "running_tasks": 0,
162
+ "pending_tasks": 0,
163
+ "success_rate": 0,
164
+ "throughput": 0,
165
+ "avg_processing_time": 0,
166
+ "total_queues": 0,
167
+ "task_distribution": []
168
+ }
169
+ }
170
+
171
+ # 解析时间范围
172
+ time_delta = parse_time_range(time_range)
173
+ start_time = datetime.now(timezone.utc) - time_delta
174
+
175
+ # 解析队列筛选
176
+ queue_filter = []
177
+ if queues:
178
+ queue_filter = [q.strip() for q in queues.split(',') if q.strip()]
179
+
180
+ async with conn.async_engine.begin() as pg_conn:
181
+ # 构建队列筛选条件
182
+ queue_condition = ""
183
+ if queue_filter:
184
+ queue_list = "', '".join(queue_filter)
185
+ queue_condition = f"AND queue IN ('{queue_list}')"
186
+
187
+ # 获取任务统计
188
+ stats_query = f"""
189
+ SELECT
190
+ COUNT(*) as total_tasks,
191
+ COUNT(CASE WHEN status = 'completed' THEN 1 END) as completed_tasks,
192
+ COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_tasks,
193
+ COUNT(CASE WHEN status = 'running' THEN 1 END) as running_tasks,
194
+ COUNT(CASE WHEN status = 'pending' THEN 1 END) as pending_tasks,
195
+ AVG(CASE
196
+ WHEN status = 'completed' AND completed_at IS NOT NULL AND started_at IS NOT NULL
197
+ THEN EXTRACT(EPOCH FROM (completed_at - started_at)) * 1000
198
+ ELSE NULL
199
+ END) as avg_processing_time
200
+ FROM tasks
201
+ WHERE namespace = :namespace
202
+ AND created_at >= :start_time
203
+ {queue_condition}
204
+ """
205
+
206
+ result = await pg_conn.execute(
207
+ text(stats_query),
208
+ {"namespace": namespace, "start_time": start_time}
209
+ )
210
+ stats = result.fetchone()
211
+
212
+ # 获取队列数量
213
+ queue_query = f"""
214
+ SELECT COUNT(DISTINCT queue) as total_queues
215
+ FROM tasks
216
+ WHERE namespace = :namespace
217
+ {queue_condition}
218
+ """
219
+ queue_result = await pg_conn.execute(
220
+ text(queue_query),
221
+ {"namespace": namespace}
222
+ )
223
+ queue_count = queue_result.fetchone()
224
+
225
+ # 获取任务分布(按队列)
226
+ distribution_query = f"""
227
+ SELECT
228
+ queue as type,
229
+ COUNT(*) as value
230
+ FROM tasks
231
+ WHERE namespace = :namespace
232
+ AND created_at >= :start_time
233
+ {queue_condition}
234
+ GROUP BY queue
235
+ ORDER BY value DESC
236
+ LIMIT 10
237
+ """
238
+ distribution_result = await pg_conn.execute(
239
+ text(distribution_query),
240
+ {"namespace": namespace, "start_time": start_time}
241
+ )
242
+ distribution_data = [
243
+ {"type": row.type, "value": row.value}
244
+ for row in distribution_result
245
+ ]
246
+
247
+ # 计算吞吐量(最近几分钟完成的任务数)
248
+ throughput_minutes = 5
249
+ throughput_start = datetime.now(timezone.utc) - timedelta(minutes=throughput_minutes)
250
+ throughput_query = f"""
251
+ SELECT COUNT(*) as completed_count
252
+ FROM tasks
253
+ WHERE namespace = :namespace
254
+ AND status = 'completed'
255
+ AND completed_at >= :start_time
256
+ {queue_condition}
257
+ """
258
+ throughput_result = await pg_conn.execute(
259
+ text(throughput_query),
260
+ {"namespace": namespace, "start_time": throughput_start}
261
+ )
262
+ throughput_count = throughput_result.fetchone().completed_count or 0
263
+ throughput = (throughput_count / throughput_minutes) if throughput_minutes > 0 else 0
264
+
265
+ # 计算成功率
266
+ total = stats.total_tasks or 0
267
+ completed = stats.completed_tasks or 0
268
+ failed = stats.failed_tasks or 0
269
+ success_rate = (completed / (completed + failed) * 100) if (completed + failed) > 0 else 0
270
+
271
+ return {
272
+ "success": True,
273
+ "data": {
274
+ "total_tasks": total,
275
+ "completed_tasks": completed,
276
+ "failed_tasks": failed,
277
+ "running_tasks": stats.running_tasks or 0,
278
+ "pending_tasks": stats.pending_tasks or 0,
279
+ "success_rate": round(success_rate, 2),
280
+ "throughput": round(throughput, 2),
281
+ "avg_processing_time": round(stats.avg_processing_time or 0, 2),
282
+ "total_queues": queue_count.total_queues or 0,
283
+ "task_distribution": distribution_data
284
+ }
285
+ }
286
+
287
+ except Exception as e:
288
+ logger.error(f"获取仪表板统计失败: {e}")
289
+ traceback.print_exc()
290
+ raise HTTPException(status_code=500, detail=str(e))
291
+
292
+ @router.post("/data/dashboard-overview-stats/{namespace}")
293
+ async def get_dashboard_overview_stats(
294
+ namespace: str,
295
+ request: DashboardOverviewRequest
296
+ ):
297
+ """
298
+ 获取仪表板概览统计数据(任务趋势、并发数、处理时间等)
299
+ """
300
+ try:
301
+ data_access = get_namespace_data_access()
302
+ conn = await data_access.manager.get_connection(namespace)
303
+
304
+ if not conn.pg_config:
305
+ return {
306
+ "task_trend": [],
307
+ "concurrency": [],
308
+ "processing_time": [],
309
+ "creation_latency": [],
310
+ "granularity": "minute"
311
+ }
312
+
313
+ # 解析时间范围和粒度
314
+ time_delta = parse_time_range(request.time_range)
315
+
316
+ # 根据时间范围决定数据粒度
317
+ if time_delta <= timedelta(hours=1):
318
+ granularity = "minute"
319
+ interval = "1 minute"
320
+ elif time_delta <= timedelta(hours=6):
321
+ granularity = "5 minutes"
322
+ interval = "5 minutes"
323
+ elif time_delta <= timedelta(days=1):
324
+ granularity = "hour"
325
+ interval = "1 hour"
326
+ else:
327
+ granularity = "day"
328
+ interval = "1 day"
329
+
330
+ start_time = datetime.now(timezone.utc) - time_delta
331
+
332
+ # 构建队列筛选条件
333
+ queue_condition = ""
334
+ if request.queues:
335
+ queue_list = "', '".join(request.queues)
336
+ queue_condition = f"AND queue IN ('{queue_list}')"
337
+
338
+ async with conn.async_engine.begin() as pg_conn:
339
+ # 获取任务趋势数据
340
+ trend_query = f"""
341
+ WITH time_series AS (
342
+ SELECT generate_series(
343
+ date_trunc('{granularity}', :start_time::timestamptz),
344
+ date_trunc('{granularity}', CURRENT_TIMESTAMP),
345
+ '{interval}'::interval
346
+ ) AS time_bucket
347
+ ),
348
+ task_metrics AS (
349
+ SELECT
350
+ date_trunc('{granularity}', created_at) as time_bucket,
351
+ COUNT(*) FILTER (WHERE created_at IS NOT NULL) as created_count,
352
+ COUNT(*) FILTER (WHERE status = 'completed') as completed_count,
353
+ COUNT(*) FILTER (WHERE status = 'failed') as failed_count
354
+ FROM tasks
355
+ WHERE namespace = :namespace
356
+ AND created_at >= :start_time
357
+ {queue_condition}
358
+ GROUP BY date_trunc('{granularity}', created_at)
359
+ )
360
+ SELECT
361
+ ts.time_bucket,
362
+ COALESCE(tm.created_count, 0) as created_count,
363
+ COALESCE(tm.completed_count, 0) as completed_count,
364
+ COALESCE(tm.failed_count, 0) as failed_count
365
+ FROM time_series ts
366
+ LEFT JOIN task_metrics tm ON ts.time_bucket = tm.time_bucket
367
+ ORDER BY ts.time_bucket
368
+ """
369
+
370
+ trend_result = await pg_conn.execute(
371
+ text(trend_query),
372
+ {"namespace": namespace, "start_time": start_time}
373
+ )
374
+
375
+ task_trend = []
376
+ for row in trend_result:
377
+ time_str = row.time_bucket.isoformat()
378
+ task_trend.extend([
379
+ {"time": time_str, "value": row.created_count, "metric": "入队速率"},
380
+ {"time": time_str, "value": row.completed_count, "metric": "完成速率"},
381
+ {"time": time_str, "value": row.failed_count, "metric": "失败数"}
382
+ ])
383
+
384
+ # 获取并发数据
385
+ concurrency_query = f"""
386
+ WITH time_series AS (
387
+ SELECT generate_series(
388
+ date_trunc('{granularity}', :start_time::timestamptz),
389
+ date_trunc('{granularity}', CURRENT_TIMESTAMP),
390
+ '{interval}'::interval
391
+ ) AS time_bucket
392
+ ),
393
+ concurrency_data AS (
394
+ SELECT
395
+ date_trunc('{granularity}', started_at) as time_bucket,
396
+ COUNT(DISTINCT task_id) as concurrent_tasks
397
+ FROM tasks
398
+ WHERE namespace = :namespace
399
+ AND started_at >= :start_time
400
+ AND started_at IS NOT NULL
401
+ {queue_condition}
402
+ GROUP BY date_trunc('{granularity}', started_at)
403
+ )
404
+ SELECT
405
+ ts.time_bucket,
406
+ COALESCE(cd.concurrent_tasks, 0) as concurrent_tasks
407
+ FROM time_series ts
408
+ LEFT JOIN concurrency_data cd ON ts.time_bucket = cd.time_bucket
409
+ ORDER BY ts.time_bucket
410
+ """
411
+
412
+ concurrency_result = await pg_conn.execute(
413
+ text(concurrency_query),
414
+ {"namespace": namespace, "start_time": start_time}
415
+ )
416
+
417
+ concurrency = [
418
+ {"time": row.time_bucket.isoformat(), "value": row.concurrent_tasks, "metric": "并发数"}
419
+ for row in concurrency_result
420
+ ]
421
+
422
+ # 获取处理时间数据
423
+ processing_time_query = f"""
424
+ WITH time_series AS (
425
+ SELECT generate_series(
426
+ date_trunc('{granularity}', :start_time::timestamptz),
427
+ date_trunc('{granularity}', CURRENT_TIMESTAMP),
428
+ '{interval}'::interval
429
+ ) AS time_bucket
430
+ ),
431
+ processing_metrics AS (
432
+ SELECT
433
+ date_trunc('{granularity}', completed_at) as time_bucket,
434
+ PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (completed_at - started_at)) * 1000) as p50,
435
+ PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (completed_at - started_at)) * 1000) as p90,
436
+ AVG(EXTRACT(EPOCH FROM (completed_at - started_at)) * 1000) as avg_time
437
+ FROM tasks
438
+ WHERE namespace = :namespace
439
+ AND completed_at >= :start_time
440
+ AND status = 'completed'
441
+ AND started_at IS NOT NULL
442
+ {queue_condition}
443
+ GROUP BY date_trunc('{granularity}', completed_at)
444
+ )
445
+ SELECT
446
+ ts.time_bucket,
447
+ COALESCE(pm.p50, 0) as p50,
448
+ COALESCE(pm.p90, 0) as p90,
449
+ COALESCE(pm.avg_time, 0) as avg_time
450
+ FROM time_series ts
451
+ LEFT JOIN processing_metrics pm ON ts.time_bucket = pm.time_bucket
452
+ ORDER BY ts.time_bucket
453
+ """
454
+
455
+ processing_result = await pg_conn.execute(
456
+ text(processing_time_query),
457
+ {"namespace": namespace, "start_time": start_time}
458
+ )
459
+
460
+ processing_time = []
461
+ for row in processing_result:
462
+ time_str = row.time_bucket.isoformat()
463
+ processing_time.extend([
464
+ {"time": time_str, "value": round(row.p50, 2), "metric": "P50处理时间"},
465
+ {"time": time_str, "value": round(row.p90, 2), "metric": "P90处理时间"},
466
+ {"time": time_str, "value": round(row.avg_time, 2), "metric": "平均处理时间"}
467
+ ])
468
+
469
+ # 获取创建延时数据
470
+ creation_latency_query = f"""
471
+ WITH time_series AS (
472
+ SELECT generate_series(
473
+ date_trunc('{granularity}', :start_time::timestamptz),
474
+ date_trunc('{granularity}', CURRENT_TIMESTAMP),
475
+ '{interval}'::interval
476
+ ) AS time_bucket
477
+ ),
478
+ latency_metrics AS (
479
+ SELECT
480
+ date_trunc('{granularity}', started_at) as time_bucket,
481
+ AVG(EXTRACT(EPOCH FROM (started_at - created_at)) * 1000) as avg_latency
482
+ FROM tasks
483
+ WHERE namespace = :namespace
484
+ AND started_at >= :start_time
485
+ AND started_at IS NOT NULL
486
+ {queue_condition}
487
+ GROUP BY date_trunc('{granularity}', started_at)
488
+ )
489
+ SELECT
490
+ ts.time_bucket,
491
+ COALESCE(lm.avg_latency, 0) as avg_latency
492
+ FROM time_series ts
493
+ LEFT JOIN latency_metrics lm ON ts.time_bucket = lm.time_bucket
494
+ ORDER BY ts.time_bucket
495
+ """
496
+
497
+ latency_result = await pg_conn.execute(
498
+ text(latency_query),
499
+ {"namespace": namespace, "start_time": start_time}
500
+ )
501
+
502
+ creation_latency = [
503
+ {"time": row.time_bucket.isoformat(), "value": round(row.avg_latency, 2), "metric": "创建延时"}
504
+ for row in latency_result
505
+ ]
506
+
507
+ return {
508
+ "task_trend": task_trend,
509
+ "concurrency": concurrency,
510
+ "processing_time": processing_time,
511
+ "creation_latency": creation_latency,
512
+ "granularity": granularity
513
+ }
514
+
515
+ except Exception as e:
516
+ logger.error(f"获取概览统计失败: {e}")
517
+ traceback.print_exc()
518
+ raise HTTPException(status_code=500, detail=str(e))
519
+
520
+ @router.get("/data/top-queues/{namespace}")
521
+ async def get_top_queues(
522
+ namespace: str,
523
+ metric: str = Query("backlog", description="排序指标: backlog或error"),
524
+ limit: int = Query(10, description="返回数量"),
525
+ time_range: Optional[str] = Query(None, description="时间范围"),
526
+ queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
527
+ ):
528
+ """
529
+ 获取Top队列(按积压或错误率排序)
530
+ """
531
+ try:
532
+ data_access = get_namespace_data_access()
533
+
534
+ if metric == "backlog":
535
+ # 获取积压最多的队列
536
+ queues_data = await data_access.get_queue_stats(namespace)
537
+
538
+ # 如果指定了队列筛选,进行过滤
539
+ if queues:
540
+ queue_filter = [q.strip() for q in queues.split(',') if q.strip()]
541
+ queues_data = [q for q in queues_data if get_base_queue_name(q['queue_name']) in queue_filter]
542
+
543
+ # 按积压数量排序
544
+ sorted_queues = sorted(queues_data, key=lambda x: x.get('pending', 0), reverse=True)[:limit]
545
+
546
+ result = []
547
+ for queue in sorted_queues:
548
+ backlog = queue.get('pending', 0)
549
+ status = 'normal'
550
+ if backlog > 1000:
551
+ status = 'critical'
552
+ elif backlog > 100:
553
+ status = 'warning'
554
+
555
+ result.append({
556
+ "queue": get_base_queue_name(queue['queue_name']),
557
+ "backlog": backlog,
558
+ "status": status
559
+ })
560
+
561
+ return {"success": True, "data": result}
562
+
563
+ elif metric == "error":
564
+ # 获取错误率最高的队列
565
+ conn = await data_access.manager.get_connection(namespace)
566
+ if not conn.pg_config:
567
+ return {"success": True, "data": []}
568
+
569
+ # 解析时间范围
570
+ time_delta = parse_time_range(time_range) if time_range else timedelta(hours=24)
571
+ start_time = datetime.now(timezone.utc) - time_delta
572
+
573
+ # 构建队列筛选条件
574
+ queue_condition = ""
575
+ if queues:
576
+ queue_filter = [q.strip() for q in queues.split(',') if q.strip()]
577
+ queue_list = "', '".join(queue_filter)
578
+ queue_condition = f"AND queue IN ('{queue_list}')"
579
+
580
+ async with conn.async_engine.begin() as pg_conn:
581
+ query = f"""
582
+ SELECT
583
+ queue,
584
+ COUNT(*) as total,
585
+ COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed,
586
+ ROUND(COUNT(CASE WHEN status = 'failed' THEN 1 END) * 100.0 / COUNT(*), 2) as error_rate
587
+ FROM tasks
588
+ WHERE namespace = :namespace
589
+ AND created_at >= :start_time
590
+ {queue_condition}
591
+ GROUP BY queue
592
+ HAVING COUNT(CASE WHEN status = 'failed' THEN 1 END) > 0
593
+ ORDER BY error_rate DESC
594
+ LIMIT :limit
595
+ """
596
+
597
+ result = await pg_conn.execute(
598
+ text(query),
599
+ {"namespace": namespace, "start_time": start_time, "limit": limit}
600
+ )
601
+
602
+ data = [
603
+ {
604
+ "queue": row.queue,
605
+ "errorRate": row.error_rate,
606
+ "failed": row.failed,
607
+ "total": row.total
608
+ }
609
+ for row in result
610
+ ]
611
+
612
+ return {"success": True, "data": data}
613
+
614
+ else:
615
+ raise HTTPException(status_code=400, detail="无效的metric参数")
616
+
617
+ except Exception as e:
618
+ logger.error(f"获取Top队列失败: {e}")
619
+ traceback.print_exc()
620
+ raise HTTPException(status_code=500, detail=str(e))
621
+
622
+ @router.get("/data/queue-backlog-trend/{namespace}")
623
+ async def get_queue_backlog_trend(
624
+ namespace: str,
625
+ time_range: str = "1h",
626
+ queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
627
+ ):
628
+ """
629
+ 获取队列积压趋势数据
630
+ """
631
+ try:
632
+ data_access = get_namespace_data_access()
633
+ conn = await data_access.manager.get_connection(namespace)
634
+
635
+ if not conn.pg_config:
636
+ return {"success": True, "data": []}
637
+
638
+ # 解析时间范围
639
+ time_delta = parse_time_range(time_range)
640
+ start_time = datetime.now(timezone.utc) - time_delta
641
+
642
+ # 解析队列筛选
643
+ queue_filter = []
644
+ if queues:
645
+ queue_filter = [q.strip() for q in queues.split(',') if q.strip()]
646
+
647
+ async with conn.async_engine.begin() as pg_conn:
648
+ # 构建队列筛选条件
649
+ queue_condition = ""
650
+ if queue_filter:
651
+ queue_list = "', '".join(queue_filter)
652
+ queue_condition = f"AND queue IN ('{queue_list}')"
653
+
654
+ # 根据时间范围决定数据粒度
655
+ if time_delta <= timedelta(hours=1):
656
+ granularity = "minute"
657
+ elif time_delta <= timedelta(hours=6):
658
+ granularity = "5 minutes"
659
+ elif time_delta <= timedelta(days=1):
660
+ granularity = "hour"
661
+ else:
662
+ granularity = "day"
663
+
664
+ query = f"""
665
+ SELECT
666
+ date_trunc('{granularity}', created_at) as time_bucket,
667
+ COUNT(CASE WHEN status = 'pending' THEN 1 END) as pending_count
668
+ FROM tasks
669
+ WHERE namespace = :namespace
670
+ AND created_at >= :start_time
671
+ {queue_condition}
672
+ GROUP BY date_trunc('{granularity}', created_at)
673
+ ORDER BY time_bucket
674
+ """
675
+
676
+ result = await pg_conn.execute(
677
+ text(query),
678
+ {"namespace": namespace, "start_time": start_time}
679
+ )
680
+
681
+ data = [
682
+ {
683
+ "time": row.time_bucket.isoformat(),
684
+ "value": row.pending_count,
685
+ "metric": "排队任务数"
686
+ }
687
+ for row in result
688
+ ]
689
+
690
+ return {"success": True, "data": data}
691
+
692
+ except Exception as e:
693
+ logger.error(f"获取队列积压趋势失败: {e}")
694
+ traceback.print_exc()
695
+ raise HTTPException(status_code=500, detail=str(e))
696
+
697
+ # ==================== 队列管理接口 ====================
698
+
699
+ @router.get("/queues/{namespace}")
700
+ async def get_queues(namespace: str):
701
+ """获取指定命名空间的队列列表"""
702
+ try:
703
+ namespace_access = get_namespace_data_access()
704
+ queues_data = await namespace_access.get_queue_stats(namespace)
705
+ return {
706
+ "success": True,
707
+ "data": list(set([get_base_queue_name(q['queue_name']) for q in queues_data]))
708
+ }
709
+ except Exception as e:
710
+ logger.error(f"获取队列列表失败: {e}")
711
+ traceback.print_exc()
712
+ raise HTTPException(status_code=500, detail=str(e))
713
+
714
+ @router.post("/data/queue-details/{namespace}")
715
+ async def get_queue_details(
716
+ namespace: str,
717
+ params: Dict[str, Any]
718
+ ):
719
+ """获取队列详细信息"""
720
+ try:
721
+ data_access = get_namespace_data_access()
722
+
723
+ # 获取分页参数
724
+ page = params.get('page', 1)
725
+ page_size = params.get('pageSize', 10)
726
+
727
+ # 获取队列统计数据
728
+ queues_data = await data_access.get_queue_stats(namespace)
729
+
730
+ # 处理数据
731
+ processed_data = []
732
+ for queue in queues_data:
733
+ base_name = get_base_queue_name(queue['queue_name'])
734
+ existing = next((q for q in processed_data if q['queue_name'] == base_name), None)
735
+
736
+ if existing:
737
+ existing['pending'] += queue.get('pending', 0)
738
+ existing['running'] += queue.get('running', 0)
739
+ existing['completed'] += queue.get('completed', 0)
740
+ existing['failed'] += queue.get('failed', 0)
741
+ else:
742
+ processed_data.append({
743
+ 'queue_name': base_name,
744
+ 'pending': queue.get('pending', 0),
745
+ 'running': queue.get('running', 0),
746
+ 'completed': queue.get('completed', 0),
747
+ 'failed': queue.get('failed', 0),
748
+ 'total': queue.get('pending', 0) + queue.get('running', 0) +
749
+ queue.get('completed', 0) + queue.get('failed', 0)
750
+ })
751
+
752
+ # 分页
753
+ start = (page - 1) * page_size
754
+ end = start + page_size
755
+ paginated_data = processed_data[start:end]
756
+
757
+ return {
758
+ "success": True,
759
+ "data": paginated_data,
760
+ "total": len(processed_data),
761
+ "page": page,
762
+ "pageSize": page_size
763
+ }
764
+
765
+ except Exception as e:
766
+ logger.error(f"获取队列详情失败: {e}")
767
+ traceback.print_exc()
768
+ raise HTTPException(status_code=500, detail=str(e))
769
+
770
+ @router.delete("/queue/{queue_name}")
771
+ async def delete_queue(queue_name: str):
772
+ """删除队列"""
773
+ try:
774
+ namespace_access = get_namespace_data_access()
775
+ # 这里需要实现删除队列的逻辑
776
+ # 暂时返回成功
777
+ return {"success": True, "message": f"队列 {queue_name} 已删除"}
778
+ except Exception as e:
779
+ logger.error(f"删除队列失败: {e}")
780
+ traceback.print_exc()
781
+ raise HTTPException(status_code=500, detail=str(e))
782
+
783
+ @router.post("/queue/{queue_name}/trim")
784
+ async def trim_queue(queue_name: str, params: Dict[str, Any]):
785
+ """清理队列"""
786
+ try:
787
+ keep_count = params.get('keep_count', 0)
788
+ # 这里需要实现清理队列的逻辑
789
+ # 暂时返回成功
790
+ return {"success": True, "message": f"队列 {queue_name} 已清理,保留 {keep_count} 条"}
791
+ except Exception as e:
792
+ logger.error(f"清理队列失败: {e}")
793
+ traceback.print_exc()
794
+ raise HTTPException(status_code=500, detail=str(e))
795
+
796
+ @router.post("/data/queue-timeline/{namespace}")
797
+ async def get_queue_timeline(
798
+ namespace: str,
799
+ params: TimeRangeQuery
800
+ ):
801
+ """获取队列时间线数据"""
802
+ try:
803
+ data_access = get_namespace_data_access()
804
+ conn = await data_access.manager.get_connection(namespace)
805
+
806
+ if not conn.pg_config:
807
+ return {"data": [], "stats": {}}
808
+
809
+ # 实现获取队列时间线数据的逻辑
810
+ return {
811
+ "data": [],
812
+ "stats": {
813
+ "total_messages": 0,
814
+ "avg_processing_time": 0,
815
+ "max_processing_time": 0
816
+ }
817
+ }
818
+
819
+ except Exception as e:
820
+ logger.error(f"获取队列时间线失败: {e}")
821
+ traceback.print_exc()
822
+ raise HTTPException(status_code=500, detail=str(e))
823
+
824
+ @router.post("/data/queue-flow-rates/{namespace}")
825
+ async def get_queue_flow_rates(
826
+ namespace: str,
827
+ params: TimeRangeQuery
828
+ ):
829
+ """获取队列流量速率"""
830
+ try:
831
+ data_access = get_namespace_data_access()
832
+ conn = await data_access.manager.get_connection(namespace)
833
+
834
+ if not conn.pg_config:
835
+ return {"data": []}
836
+
837
+ # 实现获取队列流量速率的逻辑
838
+ return {"data": []}
839
+
840
+ except Exception as e:
841
+ logger.error(f"获取队列流量速率失败: {e}")
842
+ traceback.print_exc()
843
+ raise HTTPException(status_code=500, detail=str(e))
844
+
845
+ # ==================== 任务管理接口 ====================
846
+
847
+ @router.post("/data/tasks/{namespace}")
848
+ async def get_tasks(
849
+ namespace: str,
850
+ params: Dict[str, Any]
851
+ ):
852
+ """获取任务列表"""
853
+ try:
854
+ data_access = get_namespace_data_access()
855
+ conn = await data_access.manager.get_connection(namespace)
856
+
857
+ if not conn.pg_config:
858
+ return {"data": [], "total": 0}
859
+
860
+ # 获取分页参数
861
+ page = params.get('page', 1)
862
+ page_size = params.get('pageSize', 20)
863
+ queue_name = params.get('queue_name')
864
+ status = params.get('status')
865
+
866
+ offset = (page - 1) * page_size
867
+
868
+ async with conn.async_engine.begin() as pg_conn:
869
+ # 构建查询条件
870
+ conditions = ["namespace = :namespace"]
871
+ query_params = {"namespace": namespace}
872
+
873
+ if queue_name:
874
+ conditions.append("queue = :queue")
875
+ query_params["queue"] = queue_name
876
+
877
+ if status:
878
+ conditions.append("status = :status")
879
+ query_params["status"] = status
880
+
881
+ where_clause = " AND ".join(conditions)
882
+
883
+ # 获取总数
884
+ count_query = f"SELECT COUNT(*) as total FROM tasks WHERE {where_clause}"
885
+ count_result = await pg_conn.execute(text(count_query), query_params)
886
+ total = count_result.fetchone().total
887
+
888
+ # 获取任务列表
889
+ query = f"""
890
+ SELECT
891
+ task_id,
892
+ queue,
893
+ status,
894
+ created_at,
895
+ started_at,
896
+ completed_at,
897
+ error_message
898
+ FROM tasks
899
+ WHERE {where_clause}
900
+ ORDER BY created_at DESC
901
+ LIMIT :limit OFFSET :offset
902
+ """
903
+
904
+ query_params["limit"] = page_size
905
+ query_params["offset"] = offset
906
+
907
+ result = await pg_conn.execute(text(query), query_params)
908
+
909
+ tasks = []
910
+ for row in result:
911
+ tasks.append({
912
+ "task_id": row.task_id,
913
+ "queue": row.queue,
914
+ "status": row.status,
915
+ "created_at": row.created_at.isoformat() if row.created_at else None,
916
+ "started_at": row.started_at.isoformat() if row.started_at else None,
917
+ "completed_at": row.completed_at.isoformat() if row.completed_at else None,
918
+ "error_message": row.error_message
919
+ })
920
+
921
+ return {
922
+ "data": tasks,
923
+ "total": total,
924
+ "page": page,
925
+ "pageSize": page_size
926
+ }
927
+
928
+ except Exception as e:
929
+ logger.error(f"获取任务列表失败: {e}")
930
+ traceback.print_exc()
931
+ raise HTTPException(status_code=500, detail=str(e))
932
+
933
+ @router.get("/task/{task_id}/details")
934
+ async def get_task_details(
935
+ task_id: str,
936
+ consumer_group: Optional[str] = Query(None)
937
+ ):
938
+ """获取任务详情"""
939
+ try:
940
+ # 实现获取任务详情的逻辑
941
+ return {
942
+ "task_id": task_id,
943
+ "status": "completed",
944
+ "created_at": datetime.now().isoformat(),
945
+ "consumer_group": consumer_group
946
+ }
947
+ except Exception as e:
948
+ logger.error(f"获取任务详情失败: {e}")
949
+ traceback.print_exc()
950
+ raise HTTPException(status_code=500, detail=str(e))
951
+
952
+ # ==================== 定时任务接口 ====================
953
+
954
+ @router.get("/data/scheduled-tasks/{namespace}")
955
+ async def get_scheduled_tasks(
956
+ namespace: str,
957
+ limit: int = Query(20),
958
+ offset: int = Query(0)
959
+ ):
960
+ """获取定时任务列表"""
961
+ try:
962
+ data_access = get_namespace_data_access()
963
+ conn = await data_access.manager.get_connection(namespace)
964
+
965
+ if not conn.pg_config:
966
+ return {"data": [], "total": 0}
967
+
968
+ async with conn.async_engine.begin() as pg_conn:
969
+ # 获取总数
970
+ count_query = """
971
+ SELECT COUNT(*) as total
972
+ FROM scheduled_tasks
973
+ WHERE namespace = :namespace
974
+ """
975
+ count_result = await pg_conn.execute(
976
+ text(count_query),
977
+ {"namespace": namespace}
978
+ )
979
+ total = count_result.fetchone().total
980
+
981
+ # 获取定时任务列表
982
+ query = """
983
+ SELECT
984
+ id,
985
+ task_name,
986
+ queue_name,
987
+ task_data,
988
+ cron_expression,
989
+ interval_seconds,
990
+ enabled,
991
+ last_run_at,
992
+ next_run_at,
993
+ created_at
994
+ FROM scheduled_tasks
995
+ WHERE namespace = :namespace
996
+ ORDER BY created_at DESC
997
+ LIMIT :limit OFFSET :offset
998
+ """
999
+
1000
+ result = await pg_conn.execute(
1001
+ text(query),
1002
+ {"namespace": namespace, "limit": limit, "offset": offset}
1003
+ )
1004
+
1005
+ tasks = []
1006
+ for row in result:
1007
+ tasks.append({
1008
+ "id": row.id,
1009
+ "task_name": row.task_name,
1010
+ "queue_name": row.queue_name,
1011
+ "task_data": row.task_data,
1012
+ "cron_expression": row.cron_expression,
1013
+ "interval_seconds": row.interval_seconds,
1014
+ "enabled": row.enabled,
1015
+ "last_run_at": row.last_run_at.isoformat() if row.last_run_at else None,
1016
+ "next_run_at": row.next_run_at.isoformat() if row.next_run_at else None,
1017
+ "created_at": row.created_at.isoformat() if row.created_at else None
1018
+ })
1019
+
1020
+ return {
1021
+ "data": tasks,
1022
+ "total": total
1023
+ }
1024
+
1025
+ except Exception as e:
1026
+ logger.error(f"获取定时任务列表失败: {e}")
1027
+ traceback.print_exc()
1028
+ raise HTTPException(status_code=500, detail=str(e))
1029
+
1030
+ @router.get("/scheduled-tasks/statistics/{namespace}")
1031
+ async def get_scheduled_tasks_statistics(namespace: str):
1032
+ """获取定时任务统计信息"""
1033
+ try:
1034
+ data_access = get_namespace_data_access()
1035
+ conn = await data_access.manager.get_connection(namespace)
1036
+
1037
+ if not conn.pg_config:
1038
+ return {
1039
+ "total": 0,
1040
+ "enabled": 0,
1041
+ "disabled": 0,
1042
+ "running": 0
1043
+ }
1044
+
1045
+ async with conn.async_engine.begin() as pg_conn:
1046
+ query = """
1047
+ SELECT
1048
+ COUNT(*) as total,
1049
+ COUNT(CASE WHEN enabled = true THEN 1 END) as enabled,
1050
+ COUNT(CASE WHEN enabled = false THEN 1 END) as disabled
1051
+ FROM scheduled_tasks
1052
+ WHERE namespace = :namespace
1053
+ """
1054
+
1055
+ result = await pg_conn.execute(
1056
+ text(query),
1057
+ {"namespace": namespace}
1058
+ )
1059
+
1060
+ stats = result.fetchone()
1061
+
1062
+ return {
1063
+ "total": stats.total,
1064
+ "enabled": stats.enabled,
1065
+ "disabled": stats.disabled,
1066
+ "running": 0 # 需要实现运行中任务的统计
1067
+ }
1068
+
1069
+ except Exception as e:
1070
+ logger.error(f"获取定时任务统计失败: {e}")
1071
+ traceback.print_exc()
1072
+ raise HTTPException(status_code=500, detail=str(e))
1073
+
1074
+ @router.post("/scheduled-tasks")
1075
+ async def create_scheduled_task(task: ScheduledTaskCreate):
1076
+ """创建定时任务"""
1077
+ try:
1078
+ # 实现创建定时任务的逻辑
1079
+ return {"success": True, "message": "定时任务创建成功"}
1080
+ except Exception as e:
1081
+ logger.error(f"创建定时任务失败: {e}")
1082
+ traceback.print_exc()
1083
+ raise HTTPException(status_code=500, detail=str(e))
1084
+
1085
+ @router.put("/scheduled-tasks/{task_id}")
1086
+ async def update_scheduled_task(task_id: str, task: ScheduledTaskUpdate):
1087
+ """更新定时任务"""
1088
+ try:
1089
+ # 实现更新定时任务的逻辑
1090
+ return {"success": True, "message": "定时任务更新成功"}
1091
+ except Exception as e:
1092
+ logger.error(f"更新定时任务失败: {e}")
1093
+ traceback.print_exc()
1094
+ raise HTTPException(status_code=500, detail=str(e))
1095
+
1096
+ @router.delete("/scheduled-tasks/{task_id}")
1097
+ async def delete_scheduled_task(task_id: str):
1098
+ """删除定时任务"""
1099
+ try:
1100
+ # 实现删除定时任务的逻辑
1101
+ return {"success": True, "message": "定时任务删除成功"}
1102
+ except Exception as e:
1103
+ logger.error(f"删除定时任务失败: {e}")
1104
+ traceback.print_exc()
1105
+ raise HTTPException(status_code=500, detail=str(e))
1106
+
1107
+ @router.post("/scheduled-tasks/{task_id}/toggle")
1108
+ async def toggle_scheduled_task(task_id: str):
1109
+ """启用/禁用定时任务"""
1110
+ try:
1111
+ # 实现切换定时任务状态的逻辑
1112
+ return {"success": True, "message": "定时任务状态已切换"}
1113
+ except Exception as e:
1114
+ logger.error(f"切换定时任务状态失败: {e}")
1115
+ traceback.print_exc()
1116
+ raise HTTPException(status_code=500, detail=str(e))
1117
+
1118
+ @router.post("/scheduled-tasks/{task_id}/execute")
1119
+ async def execute_scheduled_task(task_id: str):
1120
+ """立即执行定时任务"""
1121
+ try:
1122
+ # 实现立即执行定时任务的逻辑
1123
+ return {"success": True, "message": "定时任务已触发执行"}
1124
+ except Exception as e:
1125
+ logger.error(f"执行定时任务失败: {e}")
1126
+ traceback.print_exc()
1127
+ raise HTTPException(status_code=500, detail=str(e))
1128
+
1129
+ # ==================== 告警规则接口 ====================
1130
+
1131
+ @router.get("/alert-rules")
1132
+ async def get_alert_rules():
1133
+ """获取告警规则列表"""
1134
+ try:
1135
+ # 实现获取告警规则的逻辑
1136
+ return {"data": []}
1137
+ except Exception as e:
1138
+ logger.error(f"获取告警规则失败: {e}")
1139
+ traceback.print_exc()
1140
+ raise HTTPException(status_code=500, detail=str(e))
1141
+
1142
+ @router.post("/alert-rules")
1143
+ async def create_alert_rule(rule: AlertRuleCreate):
1144
+ """创建告警规则"""
1145
+ try:
1146
+ # 实现创建告警规则的逻辑
1147
+ return {"success": True, "message": "告警规则创建成功"}
1148
+ except Exception as e:
1149
+ logger.error(f"创建告警规则失败: {e}")
1150
+ traceback.print_exc()
1151
+ raise HTTPException(status_code=500, detail=str(e))
1152
+
1153
+ @router.put("/alert-rules/{rule_id}")
1154
+ async def update_alert_rule(rule_id: str, rule: AlertRuleCreate):
1155
+ """更新告警规则"""
1156
+ try:
1157
+ # 实现更新告警规则的逻辑
1158
+ return {"success": True, "message": "告警规则更新成功"}
1159
+ except Exception as e:
1160
+ logger.error(f"更新告警规则失败: {e}")
1161
+ traceback.print_exc()
1162
+ raise HTTPException(status_code=500, detail=str(e))
1163
+
1164
+ @router.delete("/alert-rules/{rule_id}")
1165
+ async def delete_alert_rule(rule_id: str):
1166
+ """删除告警规则"""
1167
+ try:
1168
+ # 实现删除告警规则的逻辑
1169
+ return {"success": True, "message": "告警规则删除成功"}
1170
+ except Exception as e:
1171
+ logger.error(f"删除告警规则失败: {e}")
1172
+ traceback.print_exc()
1173
+ raise HTTPException(status_code=500, detail=str(e))
1174
+
1175
+ @router.put("/alert-rules/{rule_id}/toggle")
1176
+ async def toggle_alert_rule(rule_id: str):
1177
+ """启用/禁用告警规则"""
1178
+ try:
1179
+ # 实现切换告警规则状态的逻辑
1180
+ return {"success": True, "message": "告警规则状态已切换"}
1181
+ except Exception as e:
1182
+ logger.error(f"切换告警规则状态失败: {e}")
1183
+ traceback.print_exc()
1184
+ raise HTTPException(status_code=500, detail=str(e))
1185
+
1186
+ @router.get("/alert-rules/{rule_id}/history")
1187
+ async def get_alert_history(rule_id: str):
1188
+ """获取告警历史"""
1189
+ try:
1190
+ # 实现获取告警历史的逻辑
1191
+ return {"data": []}
1192
+ except Exception as e:
1193
+ logger.error(f"获取告警历史失败: {e}")
1194
+ traceback.print_exc()
1195
+ raise HTTPException(status_code=500, detail=str(e))
1196
+
1197
+ @router.post("/alert-rules/{rule_id}/test")
1198
+ async def test_alert_rule(rule_id: str):
1199
+ """测试告警规则"""
1200
+ try:
1201
+ # 实现测试告警规则的逻辑
1202
+ return {"success": True, "message": "告警规则测试成功"}
1203
+ except Exception as e:
1204
+ logger.error(f"测试告警规则失败: {e}")
1205
+ traceback.print_exc()
1206
+ raise HTTPException(status_code=500, detail=str(e))
1207
+
1208
+ # ==================== 命名空间管理接口 ====================
1209
+
1210
+ @router.get("/namespaces")
1211
+ async def get_namespaces():
1212
+ """获取命名空间列表"""
1213
+ try:
1214
+ # 从配置中获取数据库连接
1215
+ if not task_center_config.pg_url:
1216
+ return []
1217
+
1218
+ engine = create_async_engine(task_center_config.pg_url)
1219
+ AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1220
+
1221
+ async with AsyncSessionLocal() as session:
1222
+ query = text("""
1223
+ SELECT id, name, description, redis_config, pg_config, created_at, updated_at
1224
+ FROM namespaces
1225
+ ORDER BY name
1226
+ """)
1227
+ result = await session.execute(query)
1228
+ namespaces = []
1229
+ for row in result:
1230
+ # 解析配置
1231
+ redis_config = row.redis_config if row.redis_config else {}
1232
+ pg_config = row.pg_config if row.pg_config else {}
1233
+
1234
+ namespaces.append({
1235
+ "id": row.id,
1236
+ "name": row.name,
1237
+ "redis_url": redis_config.get("url", ""),
1238
+ "pg_url": pg_config.get("url", ""),
1239
+ "description": row.description or "",
1240
+ "created_at": row.created_at.isoformat() if row.created_at else None,
1241
+ "updated_at": row.updated_at.isoformat() if row.updated_at else None
1242
+ })
1243
+
1244
+ return namespaces
1245
+
1246
+ except Exception as e:
1247
+ logger.error(f"获取命名空间列表失败: {e}")
1248
+ traceback.print_exc()
1249
+ raise HTTPException(status_code=500, detail=str(e))
1250
+
1251
+ @router.get("/data/namespaces")
1252
+ async def get_data_namespaces():
1253
+ """获取命名空间列表(数据API版本)"""
1254
+ return await get_namespaces()
1255
+
1256
+ @router.post("/namespaces")
1257
+ async def create_namespace(namespace: NamespaceCreate):
1258
+ """创建命名空间"""
1259
+ try:
1260
+ if not task_center_config.pg_url:
1261
+ raise HTTPException(status_code=500, detail="数据库未配置")
1262
+
1263
+ engine = create_async_engine(task_center_config.pg_url)
1264
+ AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1265
+
1266
+ async with AsyncSessionLocal() as session:
1267
+ # 检查命名空间是否已存在
1268
+ check_query = text("SELECT id FROM namespaces WHERE name = :name")
1269
+ existing = await session.execute(check_query, {"name": namespace.name})
1270
+ if existing.fetchone():
1271
+ raise HTTPException(status_code=400, detail="命名空间已存在")
1272
+
1273
+ # 创建命名空间
1274
+ redis_config = {"url": namespace.redis_url} if namespace.redis_url else {}
1275
+ pg_config = {"url": namespace.pg_url} if namespace.pg_url else {}
1276
+
1277
+ insert_query = text("""
1278
+ INSERT INTO namespaces (name, description, redis_config, pg_config, created_at, updated_at)
1279
+ VALUES (:name, :description, :redis_config, :pg_config, NOW(), NOW())
1280
+ RETURNING id
1281
+ """)
1282
+
1283
+ result = await session.execute(
1284
+ insert_query,
1285
+ {
1286
+ "name": namespace.name,
1287
+ "description": namespace.description,
1288
+ "redis_config": json.dumps(redis_config),
1289
+ "pg_config": json.dumps(pg_config)
1290
+ }
1291
+ )
1292
+ await session.commit()
1293
+
1294
+ new_id = result.fetchone().id
1295
+
1296
+ return {
1297
+ "success": True,
1298
+ "message": "命名空间创建成功",
1299
+ "id": new_id
1300
+ }
1301
+
1302
+ except HTTPException:
1303
+ raise
1304
+ except Exception as e:
1305
+ logger.error(f"创建命名空间失败: {e}")
1306
+ traceback.print_exc()
1307
+ raise HTTPException(status_code=500, detail=str(e))
1308
+
1309
+ @router.put("/namespaces/{name}")
1310
+ async def update_namespace(name: str, namespace: NamespaceUpdate):
1311
+ """更新命名空间"""
1312
+ try:
1313
+ if not task_center_config.pg_url:
1314
+ raise HTTPException(status_code=500, detail="数据库未配置")
1315
+
1316
+ engine = create_async_engine(task_center_config.pg_url)
1317
+ AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1318
+
1319
+ async with AsyncSessionLocal() as session:
1320
+ # 获取现有命名空间
1321
+ query = text("SELECT redis_config, pg_config FROM namespaces WHERE name = :name")
1322
+ result = await session.execute(query, {"name": name})
1323
+ row = result.fetchone()
1324
+
1325
+ if not row:
1326
+ raise HTTPException(status_code=404, detail="命名空间不存在")
1327
+
1328
+ # 解析现有配置
1329
+ redis_config = row.redis_config if row.redis_config else {}
1330
+ pg_config = row.pg_config if row.pg_config else {}
1331
+
1332
+ # 更新配置
1333
+ if namespace.redis_url is not None:
1334
+ redis_config["url"] = namespace.redis_url
1335
+ if namespace.pg_url is not None:
1336
+ pg_config["url"] = namespace.pg_url
1337
+
1338
+ # 更新数据库
1339
+ update_query = text("""
1340
+ UPDATE namespaces
1341
+ SET redis_config = :redis_config,
1342
+ pg_config = :pg_config,
1343
+ description = :description,
1344
+ updated_at = NOW()
1345
+ WHERE name = :name
1346
+ """)
1347
+
1348
+ await session.execute(
1349
+ update_query,
1350
+ {
1351
+ "name": name,
1352
+ "redis_config": json.dumps(redis_config),
1353
+ "pg_config": json.dumps(pg_config),
1354
+ "description": namespace.description
1355
+ }
1356
+ )
1357
+ await session.commit()
1358
+
1359
+ return {"success": True, "message": "命名空间更新成功"}
1360
+
1361
+ except HTTPException:
1362
+ raise
1363
+ except Exception as e:
1364
+ logger.error(f"更新命名空间失败: {e}")
1365
+ traceback.print_exc()
1366
+ raise HTTPException(status_code=500, detail=str(e))
1367
+
1368
+ @router.delete("/namespaces/{name}")
1369
+ async def delete_namespace(name: str):
1370
+ """删除命名空间"""
1371
+ try:
1372
+ if not task_center_config.pg_url:
1373
+ raise HTTPException(status_code=500, detail="数据库未配置")
1374
+
1375
+ engine = create_async_engine(task_center_config.pg_url)
1376
+ AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1377
+
1378
+ async with AsyncSessionLocal() as session:
1379
+ # 检查命名空间是否存在
1380
+ check_query = text("SELECT id FROM namespaces WHERE name = :name")
1381
+ result = await session.execute(check_query, {"name": name})
1382
+ if not result.fetchone():
1383
+ raise HTTPException(status_code=404, detail="命名空间不存在")
1384
+
1385
+ # 删除命名空间
1386
+ delete_query = text("DELETE FROM namespaces WHERE name = :name")
1387
+ await session.execute(delete_query, {"name": name})
1388
+ await session.commit()
1389
+
1390
+ return {"success": True, "message": "命名空间删除成功"}
1391
+
1392
+ except HTTPException:
1393
+ raise
1394
+ except Exception as e:
1395
+ logger.error(f"删除命名空间失败: {e}")
1396
+ traceback.print_exc()
1397
+ raise HTTPException(status_code=500, detail=str(e))
1398
+
1399
+ @router.get("/namespaces/{name}")
1400
+ async def get_namespace_details(name: str):
1401
+ """获取命名空间详情"""
1402
+ try:
1403
+ if not task_center_config.pg_url:
1404
+ raise HTTPException(status_code=500, detail="数据库未配置")
1405
+
1406
+ engine = create_async_engine(task_center_config.pg_url)
1407
+ AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1408
+
1409
+ async with AsyncSessionLocal() as session:
1410
+ query = text("""
1411
+ SELECT id, name, description, redis_config, pg_config, created_at, updated_at
1412
+ FROM namespaces
1413
+ WHERE name = :name
1414
+ """)
1415
+ result = await session.execute(query, {"name": name})
1416
+ row = result.fetchone()
1417
+
1418
+ if not row:
1419
+ raise HTTPException(status_code=404, detail="命名空间不存在")
1420
+
1421
+ # 解析配置
1422
+ redis_config = row.redis_config if row.redis_config else {}
1423
+ pg_config = row.pg_config if row.pg_config else {}
1424
+
1425
+ return {
1426
+ "id": row.id,
1427
+ "name": row.name,
1428
+ "redis_url": redis_config.get("url", ""),
1429
+ "pg_url": pg_config.get("url", ""),
1430
+ "description": row.description or "",
1431
+ "created_at": row.created_at.isoformat() if row.created_at else None,
1432
+ "updated_at": row.updated_at.isoformat() if row.updated_at else None
1433
+ }
1434
+
1435
+ except HTTPException:
1436
+ raise
1437
+ except Exception as e:
1438
+ logger.error(f"获取命名空间详情失败: {e}")
1439
+ traceback.print_exc()
1440
+ raise HTTPException(status_code=500, detail=str(e))
1441
+
1442
+ # ==================== Redis监控接口 ====================
1443
+
1444
+ @router.get("/redis/monitor/{namespace}")
1445
+ async def get_redis_monitor(namespace: str):
1446
+ """获取Redis监控信息"""
1447
+ try:
1448
+ data_access = get_namespace_data_access()
1449
+ conn = await data_access.manager.get_connection(namespace)
1450
+
1451
+ # 获取Redis客户端
1452
+ try:
1453
+ redis_client = await conn.get_redis_client()
1454
+ except ValueError as e:
1455
+ raise HTTPException(status_code=404, detail=str(e))
1456
+
1457
+ # 获取Redis信息
1458
+ info = await redis_client.info()
1459
+
1460
+ # 处理内存信息
1461
+ memory_info = {
1462
+ "used_memory": info.get("used_memory", 0),
1463
+ "used_memory_human": info.get("used_memory_human", "0B"),
1464
+ "usage_percentage": None,
1465
+ "maxmemory": info.get("maxmemory", 0),
1466
+ "maxmemory_human": "0B",
1467
+ "mem_fragmentation_ratio": info.get("mem_fragmentation_ratio", 1.0)
1468
+ }
1469
+
1470
+ # 计算内存使用百分比
1471
+ if info.get("maxmemory") and info.get("maxmemory") > 0:
1472
+ memory_info["usage_percentage"] = round(
1473
+ (info.get("used_memory", 0) / info.get("maxmemory")) * 100, 2
1474
+ )
1475
+ memory_info["maxmemory_human"] = f"{info.get('maxmemory') / (1024*1024):.1f}MB"
1476
+
1477
+ # 处理客户端信息
1478
+ clients_info = {
1479
+ "connected_clients": info.get("connected_clients", 0),
1480
+ "blocked_clients": info.get("blocked_clients", 0)
1481
+ }
1482
+
1483
+ # 处理统计信息
1484
+ stats_info = {
1485
+ "instantaneous_ops_per_sec": info.get("instantaneous_ops_per_sec", 0),
1486
+ "hit_rate": 0,
1487
+ "keyspace_hits": info.get("keyspace_hits", 0),
1488
+ "keyspace_misses": info.get("keyspace_misses", 0)
1489
+ }
1490
+
1491
+ # 计算命中率
1492
+ total_hits = stats_info["keyspace_hits"] + stats_info["keyspace_misses"]
1493
+ if total_hits > 0:
1494
+ stats_info["hit_rate"] = round(
1495
+ (stats_info["keyspace_hits"] / total_hits) * 100, 2
1496
+ )
1497
+
1498
+ # 处理键空间信息
1499
+ keyspace_info = {
1500
+ "total_keys": 0
1501
+ }
1502
+
1503
+ # 统计所有数据库的键数量
1504
+ for key in info:
1505
+ if key.startswith("db"):
1506
+ db_info = info[key]
1507
+ if isinstance(db_info, dict):
1508
+ keyspace_info["total_keys"] += db_info.get("keys", 0)
1509
+
1510
+ # 处理服务器信息
1511
+ server_info = {
1512
+ "redis_version": info.get("redis_version", "unknown"),
1513
+ "uptime_in_seconds": info.get("uptime_in_seconds", 0)
1514
+ }
1515
+
1516
+ return {
1517
+ "success": True,
1518
+ "data": {
1519
+ "status": "healthy",
1520
+ "memory": memory_info,
1521
+ "clients": clients_info,
1522
+ "stats": stats_info,
1523
+ "keyspace": keyspace_info,
1524
+ "server": server_info
1525
+ }
1526
+ }
1527
+
1528
+ except HTTPException:
1529
+ raise
1530
+ except Exception as e:
1531
+ logger.error(f"获取Redis监控信息失败: {e}")
1532
+ traceback.print_exc()
1533
+ raise HTTPException(status_code=500, detail=str(e))
1534
+
1535
+ # ==================== 流积压监控接口 ====================
1536
+
1537
+ @router.get("/stream-backlog/{namespace}")
1538
+ async def get_stream_backlog(
1539
+ namespace: str,
1540
+ time_range: str = Query("1h", description="时间范围"),
1541
+ queue: Optional[str] = Query(None, description="队列名称")
1542
+ ):
1543
+ """获取流积压数据"""
1544
+ try:
1545
+ data_access = get_namespace_data_access()
1546
+ conn = await data_access.manager.get_connection(namespace)
1547
+
1548
+ if not conn.pg_config:
1549
+ return {"data": []}
1550
+
1551
+ # 解析时间范围
1552
+ time_delta = parse_time_range(time_range)
1553
+ start_time = datetime.now(timezone.utc) - time_delta
1554
+
1555
+ async with conn.async_engine.begin() as pg_conn:
1556
+ # 构建查询条件
1557
+ conditions = ["namespace = :namespace", "created_at >= :start_time"]
1558
+ params = {"namespace": namespace, "start_time": start_time}
1559
+
1560
+ if queue:
1561
+ conditions.append("stream_name = :queue")
1562
+ params["queue"] = queue
1563
+
1564
+ where_clause = " AND ".join(conditions)
1565
+
1566
+ query = f"""
1567
+ SELECT
1568
+ stream_name,
1569
+ consumer_group,
1570
+ consumer_lag,
1571
+ last_published_offset,
1572
+ last_acknowledged_offset,
1573
+ created_at
1574
+ FROM stream_backlog_monitor
1575
+ WHERE {where_clause}
1576
+ ORDER BY created_at DESC
1577
+ LIMIT 1000
1578
+ """
1579
+
1580
+ result = await pg_conn.execute(text(query), params)
1581
+
1582
+ data = []
1583
+ for row in result:
1584
+ data.append({
1585
+ "stream_name": row.stream_name,
1586
+ "consumer_group": row.consumer_group,
1587
+ "consumer_lag": row.consumer_lag,
1588
+ "last_published_offset": row.last_published_offset,
1589
+ "last_acknowledged_offset": row.last_acknowledged_offset,
1590
+ "created_at": row.created_at.isoformat()
1591
+ })
1592
+
1593
+ return {"data": data}
1594
+
1595
+ except Exception as e:
1596
+ logger.error(f"获取流积压数据失败: {e}")
1597
+ traceback.print_exc()
1598
+ raise HTTPException(status_code=500, detail=str(e))
1599
+
1600
+ # 导出路由器
1601
+ __all__ = ["router"]