jettask 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. jettask/__init__.py +60 -2
  2. jettask/cli.py +314 -228
  3. jettask/config/__init__.py +9 -1
  4. jettask/config/config.py +245 -0
  5. jettask/config/env_loader.py +381 -0
  6. jettask/config/lua_scripts.py +158 -0
  7. jettask/config/nacos_config.py +132 -5
  8. jettask/core/__init__.py +1 -1
  9. jettask/core/app.py +1573 -666
  10. jettask/core/app_importer.py +33 -16
  11. jettask/core/container.py +532 -0
  12. jettask/core/task.py +1 -4
  13. jettask/core/unified_manager_base.py +2 -2
  14. jettask/executor/__init__.py +38 -0
  15. jettask/executor/core.py +625 -0
  16. jettask/executor/executor.py +338 -0
  17. jettask/executor/orchestrator.py +290 -0
  18. jettask/executor/process_entry.py +638 -0
  19. jettask/executor/task_executor.py +317 -0
  20. jettask/messaging/__init__.py +68 -0
  21. jettask/messaging/event_pool.py +2188 -0
  22. jettask/messaging/reader.py +519 -0
  23. jettask/messaging/registry.py +266 -0
  24. jettask/messaging/scanner.py +369 -0
  25. jettask/messaging/sender.py +312 -0
  26. jettask/persistence/__init__.py +118 -0
  27. jettask/persistence/backlog_monitor.py +567 -0
  28. jettask/{backend/data_access.py → persistence/base.py} +58 -57
  29. jettask/persistence/consumer.py +315 -0
  30. jettask/{core → persistence}/db_manager.py +23 -22
  31. jettask/persistence/maintenance.py +81 -0
  32. jettask/persistence/message_consumer.py +259 -0
  33. jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
  34. jettask/persistence/offline_recovery.py +196 -0
  35. jettask/persistence/queue_discovery.py +215 -0
  36. jettask/persistence/task_persistence.py +218 -0
  37. jettask/persistence/task_updater.py +583 -0
  38. jettask/scheduler/__init__.py +2 -2
  39. jettask/scheduler/loader.py +6 -5
  40. jettask/scheduler/run_scheduler.py +1 -1
  41. jettask/scheduler/scheduler.py +7 -7
  42. jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
  43. jettask/task/__init__.py +16 -0
  44. jettask/{router.py → task/router.py} +26 -8
  45. jettask/task/task_center/__init__.py +9 -0
  46. jettask/task/task_executor.py +318 -0
  47. jettask/task/task_registry.py +291 -0
  48. jettask/test_connection_monitor.py +73 -0
  49. jettask/utils/__init__.py +31 -1
  50. jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
  51. jettask/utils/db_connector.py +1629 -0
  52. jettask/{db_init.py → utils/db_init.py} +1 -1
  53. jettask/utils/rate_limit/__init__.py +30 -0
  54. jettask/utils/rate_limit/concurrency_limiter.py +665 -0
  55. jettask/utils/rate_limit/config.py +145 -0
  56. jettask/utils/rate_limit/limiter.py +41 -0
  57. jettask/utils/rate_limit/manager.py +269 -0
  58. jettask/utils/rate_limit/qps_limiter.py +154 -0
  59. jettask/utils/rate_limit/task_limiter.py +384 -0
  60. jettask/utils/serializer.py +3 -0
  61. jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
  62. jettask/utils/time_sync.py +173 -0
  63. jettask/webui/__init__.py +27 -0
  64. jettask/{api/v1 → webui/api}/alerts.py +1 -1
  65. jettask/{api/v1 → webui/api}/analytics.py +2 -2
  66. jettask/{api/v1 → webui/api}/namespaces.py +1 -1
  67. jettask/{api/v1 → webui/api}/overview.py +1 -1
  68. jettask/{api/v1 → webui/api}/queues.py +3 -3
  69. jettask/{api/v1 → webui/api}/scheduled.py +1 -1
  70. jettask/{api/v1 → webui/api}/settings.py +1 -1
  71. jettask/{api.py → webui/app.py} +253 -145
  72. jettask/webui/namespace_manager/__init__.py +10 -0
  73. jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
  74. jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
  75. jettask/{run.py → webui/run.py} +2 -2
  76. jettask/{services → webui/services}/__init__.py +1 -3
  77. jettask/{services → webui/services}/overview_service.py +34 -16
  78. jettask/{services → webui/services}/queue_service.py +1 -1
  79. jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
  80. jettask/{services → webui/services}/settings_service.py +1 -1
  81. jettask/worker/__init__.py +53 -0
  82. jettask/worker/lifecycle.py +1507 -0
  83. jettask/worker/manager.py +583 -0
  84. jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
  85. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
  86. jettask-0.2.20.dist-info/RECORD +145 -0
  87. jettask/__main__.py +0 -140
  88. jettask/api/__init__.py +0 -103
  89. jettask/backend/__init__.py +0 -1
  90. jettask/backend/api/__init__.py +0 -3
  91. jettask/backend/api/v1/__init__.py +0 -17
  92. jettask/backend/api/v1/monitoring.py +0 -431
  93. jettask/backend/api/v1/namespaces.py +0 -504
  94. jettask/backend/api/v1/queues.py +0 -342
  95. jettask/backend/api/v1/tasks.py +0 -367
  96. jettask/backend/core/__init__.py +0 -3
  97. jettask/backend/core/cache.py +0 -221
  98. jettask/backend/core/database.py +0 -200
  99. jettask/backend/core/exceptions.py +0 -102
  100. jettask/backend/dependencies.py +0 -261
  101. jettask/backend/init_meta_db.py +0 -158
  102. jettask/backend/main.py +0 -1426
  103. jettask/backend/main_unified.py +0 -78
  104. jettask/backend/main_v2.py +0 -394
  105. jettask/backend/models/__init__.py +0 -3
  106. jettask/backend/models/requests.py +0 -236
  107. jettask/backend/models/responses.py +0 -230
  108. jettask/backend/namespace_api_old.py +0 -267
  109. jettask/backend/services/__init__.py +0 -3
  110. jettask/backend/start.py +0 -42
  111. jettask/backend/unified_api_router.py +0 -1541
  112. jettask/cleanup_deprecated_tables.sql +0 -16
  113. jettask/core/consumer_manager.py +0 -1695
  114. jettask/core/delay_scanner.py +0 -256
  115. jettask/core/event_pool.py +0 -1700
  116. jettask/core/heartbeat_process.py +0 -222
  117. jettask/core/task_batch.py +0 -153
  118. jettask/core/worker_scanner.py +0 -271
  119. jettask/executors/__init__.py +0 -5
  120. jettask/executors/asyncio.py +0 -876
  121. jettask/executors/base.py +0 -30
  122. jettask/executors/common.py +0 -148
  123. jettask/executors/multi_asyncio.py +0 -309
  124. jettask/gradio_app.py +0 -570
  125. jettask/integrated_gradio_app.py +0 -1088
  126. jettask/main.py +0 -0
  127. jettask/monitoring/__init__.py +0 -3
  128. jettask/pg_consumer.py +0 -1896
  129. jettask/run_monitor.py +0 -22
  130. jettask/run_webui.py +0 -148
  131. jettask/scheduler/multi_namespace_scheduler.py +0 -294
  132. jettask/scheduler/unified_manager.py +0 -450
  133. jettask/task_center_client.py +0 -150
  134. jettask/utils/serializer_optimized.py +0 -33
  135. jettask/webui_exceptions.py +0 -67
  136. jettask-0.2.18.dist-info/RECORD +0 -150
  137. /jettask/{constants.py → config/constants.py} +0 -0
  138. /jettask/{backend/config.py → config/task_center.py} +0 -0
  139. /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
  140. /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
  141. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
  142. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
  143. /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
  144. /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
  145. /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
  146. /jettask/{models.py → persistence/models.py} +0 -0
  147. /jettask/scheduler/{manager.py → task_crud.py} +0 -0
  148. /jettask/{schema.sql → schemas/schema.sql} +0 -0
  149. /jettask/{task_center.py → task/task_center/client.py} +0 -0
  150. /jettask/{monitoring → utils}/file_watcher.py +0 -0
  151. /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
  152. /jettask/{api/v1 → webui/api}/__init__.py +0 -0
  153. /jettask/{webui_config.py → webui/config.py} +0 -0
  154. /jettask/{webui_models → webui/models}/__init__.py +0 -0
  155. /jettask/{webui_models → webui/models}/namespace.py +0 -0
  156. /jettask/{services → webui/services}/alert_service.py +0 -0
  157. /jettask/{services → webui/services}/analytics_service.py +0 -0
  158. /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
  159. /jettask/{services → webui/services}/task_service.py +0 -0
  160. /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
  161. /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
  162. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
  163. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
  164. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
  165. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -1,1541 +0,0 @@
1
- """
2
- 统一的API路由文件
3
- 将所有分散的API接口整合到一个文件中,方便维护和管理
4
- """
5
-
6
- from fastapi import APIRouter, HTTPException, Query, Request, Depends
7
- from typing import List, Dict, Optional, Any, Union
8
- from datetime import datetime, timedelta, timezone
9
- import logging
10
- import time
11
- import json
12
- import asyncio
13
- import psutil
14
- from sqlalchemy import text
15
- from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
16
- from sqlalchemy.orm import sessionmaker
17
- import traceback
18
- from jettask.schemas import (
19
- TimeRangeQuery,
20
- QueueStatsResponse,
21
- TaskDetailResponse,
22
- DashboardOverviewRequest,
23
- ScheduledTaskCreate,
24
- ScheduledTaskUpdate,
25
- AlertRuleCreate,
26
- NamespaceCreate,
27
- NamespaceUpdate
28
- )
29
-
30
- # 导入本地模块
31
- try:
32
- from namespace_data_access import get_namespace_data_access
33
- from config import task_center_config
34
- except ImportError:
35
- # 如果相对导入失败,尝试绝对导入
36
- from jettask.backend.namespace_data_access import get_namespace_data_access
37
- from jettask.backend.config import task_center_config
38
-
39
- # 设置日志
40
- logger = logging.getLogger(__name__)
41
-
42
- # 创建统一的路由器
43
- router = APIRouter(prefix="/api", tags=["API"])
44
-
45
- # ==================== 数据模型从 schemas 模块导入 ====================
46
-
47
- # ==================== 辅助函数 ====================
48
-
49
- def parse_time_range(time_range: str) -> timedelta:
50
- """解析时间范围字符串"""
51
- units = {
52
- 'm': 'minutes',
53
- 'h': 'hours',
54
- 'd': 'days',
55
- 'w': 'weeks'
56
- }
57
-
58
- if not time_range or len(time_range) < 2:
59
- return timedelta(hours=1)
60
-
61
- try:
62
- value = int(time_range[:-1])
63
- unit = time_range[-1].lower()
64
-
65
- if unit in units:
66
- return timedelta(**{units[unit]: value})
67
- else:
68
- return timedelta(hours=1)
69
- except (ValueError, KeyError):
70
- return timedelta(hours=1)
71
-
72
- def get_base_queue_name(queue_name: str) -> str:
73
- """获取基础队列名(去除priority后缀)"""
74
- if '_priority_' in queue_name:
75
- return queue_name.split('_priority_')[0]
76
- return queue_name
77
-
78
- # ==================== Dashboard 相关接口 ====================
79
-
80
- @router.get("/data/dashboard-stats/{namespace}")
81
- async def get_dashboard_stats(
82
- namespace: str,
83
- time_range: str = "24h",
84
- queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
85
- ):
86
- """
87
- 获取仪表板统计数据(任务总数、成功数、失败数、成功率、吞吐量等)
88
- """
89
- try:
90
- data_access = get_namespace_data_access()
91
- conn = await data_access.manager.get_connection(namespace)
92
-
93
- # 如果没有PostgreSQL配置,返回空数据
94
- if not conn.pg_config:
95
- return {
96
- "success": True,
97
- "data": {
98
- "total_tasks": 0,
99
- "completed_tasks": 0,
100
- "failed_tasks": 0,
101
- "running_tasks": 0,
102
- "pending_tasks": 0,
103
- "success_rate": 0,
104
- "throughput": 0,
105
- "avg_processing_time": 0,
106
- "total_queues": 0,
107
- "task_distribution": []
108
- }
109
- }
110
-
111
- # 解析时间范围
112
- time_delta = parse_time_range(time_range)
113
- start_time = datetime.now(timezone.utc) - time_delta
114
-
115
- # 解析队列筛选
116
- queue_filter = []
117
- if queues:
118
- queue_filter = [q.strip() for q in queues.split(',') if q.strip()]
119
-
120
- async with conn.async_engine.begin() as pg_conn:
121
- # 构建队列筛选条件
122
- queue_condition = ""
123
- if queue_filter:
124
- queue_list = "', '".join(queue_filter)
125
- queue_condition = f"AND queue IN ('{queue_list}')"
126
-
127
- # 获取任务统计
128
- stats_query = f"""
129
- SELECT
130
- COUNT(*) as total_tasks,
131
- COUNT(CASE WHEN status = 'completed' THEN 1 END) as completed_tasks,
132
- COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_tasks,
133
- COUNT(CASE WHEN status = 'running' THEN 1 END) as running_tasks,
134
- COUNT(CASE WHEN status = 'pending' THEN 1 END) as pending_tasks,
135
- AVG(CASE
136
- WHEN status = 'completed' AND completed_at IS NOT NULL AND started_at IS NOT NULL
137
- THEN EXTRACT(EPOCH FROM (completed_at - started_at)) * 1000
138
- ELSE NULL
139
- END) as avg_processing_time
140
- FROM tasks
141
- WHERE namespace = :namespace
142
- AND created_at >= :start_time
143
- {queue_condition}
144
- """
145
-
146
- result = await pg_conn.execute(
147
- text(stats_query),
148
- {"namespace": namespace, "start_time": start_time}
149
- )
150
- stats = result.fetchone()
151
-
152
- # 获取队列数量
153
- queue_query = f"""
154
- SELECT COUNT(DISTINCT queue) as total_queues
155
- FROM tasks
156
- WHERE namespace = :namespace
157
- {queue_condition}
158
- """
159
- queue_result = await pg_conn.execute(
160
- text(queue_query),
161
- {"namespace": namespace}
162
- )
163
- queue_count = queue_result.fetchone()
164
-
165
- # 获取任务分布(按队列)
166
- distribution_query = f"""
167
- SELECT
168
- queue as type,
169
- COUNT(*) as value
170
- FROM tasks
171
- WHERE namespace = :namespace
172
- AND created_at >= :start_time
173
- {queue_condition}
174
- GROUP BY queue
175
- ORDER BY value DESC
176
- LIMIT 10
177
- """
178
- distribution_result = await pg_conn.execute(
179
- text(distribution_query),
180
- {"namespace": namespace, "start_time": start_time}
181
- )
182
- distribution_data = [
183
- {"type": row.type, "value": row.value}
184
- for row in distribution_result
185
- ]
186
-
187
- # 计算吞吐量(最近几分钟完成的任务数)
188
- throughput_minutes = 5
189
- throughput_start = datetime.now(timezone.utc) - timedelta(minutes=throughput_minutes)
190
- throughput_query = f"""
191
- SELECT COUNT(*) as completed_count
192
- FROM tasks
193
- WHERE namespace = :namespace
194
- AND status = 'completed'
195
- AND completed_at >= :start_time
196
- {queue_condition}
197
- """
198
- throughput_result = await pg_conn.execute(
199
- text(throughput_query),
200
- {"namespace": namespace, "start_time": throughput_start}
201
- )
202
- throughput_count = throughput_result.fetchone().completed_count or 0
203
- throughput = (throughput_count / throughput_minutes) if throughput_minutes > 0 else 0
204
-
205
- # 计算成功率
206
- total = stats.total_tasks or 0
207
- completed = stats.completed_tasks or 0
208
- failed = stats.failed_tasks or 0
209
- success_rate = (completed / (completed + failed) * 100) if (completed + failed) > 0 else 0
210
-
211
- return {
212
- "success": True,
213
- "data": {
214
- "total_tasks": total,
215
- "completed_tasks": completed,
216
- "failed_tasks": failed,
217
- "running_tasks": stats.running_tasks or 0,
218
- "pending_tasks": stats.pending_tasks or 0,
219
- "success_rate": round(success_rate, 2),
220
- "throughput": round(throughput, 2),
221
- "avg_processing_time": round(stats.avg_processing_time or 0, 2),
222
- "total_queues": queue_count.total_queues or 0,
223
- "task_distribution": distribution_data
224
- }
225
- }
226
-
227
- except Exception as e:
228
- logger.error(f"获取仪表板统计失败: {e}")
229
- traceback.print_exc()
230
- raise HTTPException(status_code=500, detail=str(e))
231
-
232
- @router.post("/data/dashboard-overview-stats/{namespace}")
233
- async def get_dashboard_overview_stats(
234
- namespace: str,
235
- request: DashboardOverviewRequest
236
- ):
237
- """
238
- 获取仪表板概览统计数据(任务趋势、并发数、处理时间等)
239
- """
240
- try:
241
- data_access = get_namespace_data_access()
242
- conn = await data_access.manager.get_connection(namespace)
243
-
244
- if not conn.pg_config:
245
- return {
246
- "task_trend": [],
247
- "concurrency": [],
248
- "processing_time": [],
249
- "creation_latency": [],
250
- "granularity": "minute"
251
- }
252
-
253
- # 解析时间范围和粒度
254
- time_delta = parse_time_range(request.time_range)
255
-
256
- # 根据时间范围决定数据粒度
257
- if time_delta <= timedelta(hours=1):
258
- granularity = "minute"
259
- interval = "1 minute"
260
- elif time_delta <= timedelta(hours=6):
261
- granularity = "5 minutes"
262
- interval = "5 minutes"
263
- elif time_delta <= timedelta(days=1):
264
- granularity = "hour"
265
- interval = "1 hour"
266
- else:
267
- granularity = "day"
268
- interval = "1 day"
269
-
270
- start_time = datetime.now(timezone.utc) - time_delta
271
-
272
- # 构建队列筛选条件
273
- queue_condition = ""
274
- if request.queues:
275
- queue_list = "', '".join(request.queues)
276
- queue_condition = f"AND queue IN ('{queue_list}')"
277
-
278
- async with conn.async_engine.begin() as pg_conn:
279
- # 获取任务趋势数据
280
- trend_query = f"""
281
- WITH time_series AS (
282
- SELECT generate_series(
283
- date_trunc('{granularity}', :start_time::timestamptz),
284
- date_trunc('{granularity}', CURRENT_TIMESTAMP),
285
- '{interval}'::interval
286
- ) AS time_bucket
287
- ),
288
- task_metrics AS (
289
- SELECT
290
- date_trunc('{granularity}', created_at) as time_bucket,
291
- COUNT(*) FILTER (WHERE created_at IS NOT NULL) as created_count,
292
- COUNT(*) FILTER (WHERE status = 'completed') as completed_count,
293
- COUNT(*) FILTER (WHERE status = 'failed') as failed_count
294
- FROM tasks
295
- WHERE namespace = :namespace
296
- AND created_at >= :start_time
297
- {queue_condition}
298
- GROUP BY date_trunc('{granularity}', created_at)
299
- )
300
- SELECT
301
- ts.time_bucket,
302
- COALESCE(tm.created_count, 0) as created_count,
303
- COALESCE(tm.completed_count, 0) as completed_count,
304
- COALESCE(tm.failed_count, 0) as failed_count
305
- FROM time_series ts
306
- LEFT JOIN task_metrics tm ON ts.time_bucket = tm.time_bucket
307
- ORDER BY ts.time_bucket
308
- """
309
-
310
- trend_result = await pg_conn.execute(
311
- text(trend_query),
312
- {"namespace": namespace, "start_time": start_time}
313
- )
314
-
315
- task_trend = []
316
- for row in trend_result:
317
- time_str = row.time_bucket.isoformat()
318
- task_trend.extend([
319
- {"time": time_str, "value": row.created_count, "metric": "入队速率"},
320
- {"time": time_str, "value": row.completed_count, "metric": "完成速率"},
321
- {"time": time_str, "value": row.failed_count, "metric": "失败数"}
322
- ])
323
-
324
- # 获取并发数据
325
- concurrency_query = f"""
326
- WITH time_series AS (
327
- SELECT generate_series(
328
- date_trunc('{granularity}', :start_time::timestamptz),
329
- date_trunc('{granularity}', CURRENT_TIMESTAMP),
330
- '{interval}'::interval
331
- ) AS time_bucket
332
- ),
333
- concurrency_data AS (
334
- SELECT
335
- date_trunc('{granularity}', started_at) as time_bucket,
336
- COUNT(DISTINCT task_id) as concurrent_tasks
337
- FROM tasks
338
- WHERE namespace = :namespace
339
- AND started_at >= :start_time
340
- AND started_at IS NOT NULL
341
- {queue_condition}
342
- GROUP BY date_trunc('{granularity}', started_at)
343
- )
344
- SELECT
345
- ts.time_bucket,
346
- COALESCE(cd.concurrent_tasks, 0) as concurrent_tasks
347
- FROM time_series ts
348
- LEFT JOIN concurrency_data cd ON ts.time_bucket = cd.time_bucket
349
- ORDER BY ts.time_bucket
350
- """
351
-
352
- concurrency_result = await pg_conn.execute(
353
- text(concurrency_query),
354
- {"namespace": namespace, "start_time": start_time}
355
- )
356
-
357
- concurrency = [
358
- {"time": row.time_bucket.isoformat(), "value": row.concurrent_tasks, "metric": "并发数"}
359
- for row in concurrency_result
360
- ]
361
-
362
- # 获取处理时间数据
363
- processing_time_query = f"""
364
- WITH time_series AS (
365
- SELECT generate_series(
366
- date_trunc('{granularity}', :start_time::timestamptz),
367
- date_trunc('{granularity}', CURRENT_TIMESTAMP),
368
- '{interval}'::interval
369
- ) AS time_bucket
370
- ),
371
- processing_metrics AS (
372
- SELECT
373
- date_trunc('{granularity}', completed_at) as time_bucket,
374
- PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (completed_at - started_at)) * 1000) as p50,
375
- PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (completed_at - started_at)) * 1000) as p90,
376
- AVG(EXTRACT(EPOCH FROM (completed_at - started_at)) * 1000) as avg_time
377
- FROM tasks
378
- WHERE namespace = :namespace
379
- AND completed_at >= :start_time
380
- AND status = 'completed'
381
- AND started_at IS NOT NULL
382
- {queue_condition}
383
- GROUP BY date_trunc('{granularity}', completed_at)
384
- )
385
- SELECT
386
- ts.time_bucket,
387
- COALESCE(pm.p50, 0) as p50,
388
- COALESCE(pm.p90, 0) as p90,
389
- COALESCE(pm.avg_time, 0) as avg_time
390
- FROM time_series ts
391
- LEFT JOIN processing_metrics pm ON ts.time_bucket = pm.time_bucket
392
- ORDER BY ts.time_bucket
393
- """
394
-
395
- processing_result = await pg_conn.execute(
396
- text(processing_time_query),
397
- {"namespace": namespace, "start_time": start_time}
398
- )
399
-
400
- processing_time = []
401
- for row in processing_result:
402
- time_str = row.time_bucket.isoformat()
403
- processing_time.extend([
404
- {"time": time_str, "value": round(row.p50, 2), "metric": "P50处理时间"},
405
- {"time": time_str, "value": round(row.p90, 2), "metric": "P90处理时间"},
406
- {"time": time_str, "value": round(row.avg_time, 2), "metric": "平均处理时间"}
407
- ])
408
-
409
- # 获取创建延时数据
410
- creation_latency_query = f"""
411
- WITH time_series AS (
412
- SELECT generate_series(
413
- date_trunc('{granularity}', :start_time::timestamptz),
414
- date_trunc('{granularity}', CURRENT_TIMESTAMP),
415
- '{interval}'::interval
416
- ) AS time_bucket
417
- ),
418
- latency_metrics AS (
419
- SELECT
420
- date_trunc('{granularity}', started_at) as time_bucket,
421
- AVG(EXTRACT(EPOCH FROM (started_at - created_at)) * 1000) as avg_latency
422
- FROM tasks
423
- WHERE namespace = :namespace
424
- AND started_at >= :start_time
425
- AND started_at IS NOT NULL
426
- {queue_condition}
427
- GROUP BY date_trunc('{granularity}', started_at)
428
- )
429
- SELECT
430
- ts.time_bucket,
431
- COALESCE(lm.avg_latency, 0) as avg_latency
432
- FROM time_series ts
433
- LEFT JOIN latency_metrics lm ON ts.time_bucket = lm.time_bucket
434
- ORDER BY ts.time_bucket
435
- """
436
-
437
- latency_result = await pg_conn.execute(
438
- text(latency_query),
439
- {"namespace": namespace, "start_time": start_time}
440
- )
441
-
442
- creation_latency = [
443
- {"time": row.time_bucket.isoformat(), "value": round(row.avg_latency, 2), "metric": "创建延时"}
444
- for row in latency_result
445
- ]
446
-
447
- return {
448
- "task_trend": task_trend,
449
- "concurrency": concurrency,
450
- "processing_time": processing_time,
451
- "creation_latency": creation_latency,
452
- "granularity": granularity
453
- }
454
-
455
- except Exception as e:
456
- logger.error(f"获取概览统计失败: {e}")
457
- traceback.print_exc()
458
- raise HTTPException(status_code=500, detail=str(e))
459
-
460
- @router.get("/data/top-queues/{namespace}")
461
- async def get_top_queues(
462
- namespace: str,
463
- metric: str = Query("backlog", description="排序指标: backlog或error"),
464
- limit: int = Query(10, description="返回数量"),
465
- time_range: Optional[str] = Query(None, description="时间范围"),
466
- queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
467
- ):
468
- """
469
- 获取Top队列(按积压或错误率排序)
470
- """
471
- try:
472
- data_access = get_namespace_data_access()
473
-
474
- if metric == "backlog":
475
- # 获取积压最多的队列
476
- queues_data = await data_access.get_queue_stats(namespace)
477
-
478
- # 如果指定了队列筛选,进行过滤
479
- if queues:
480
- queue_filter = [q.strip() for q in queues.split(',') if q.strip()]
481
- queues_data = [q for q in queues_data if get_base_queue_name(q['queue_name']) in queue_filter]
482
-
483
- # 按积压数量排序
484
- sorted_queues = sorted(queues_data, key=lambda x: x.get('pending', 0), reverse=True)[:limit]
485
-
486
- result = []
487
- for queue in sorted_queues:
488
- backlog = queue.get('pending', 0)
489
- status = 'normal'
490
- if backlog > 1000:
491
- status = 'critical'
492
- elif backlog > 100:
493
- status = 'warning'
494
-
495
- result.append({
496
- "queue": get_base_queue_name(queue['queue_name']),
497
- "backlog": backlog,
498
- "status": status
499
- })
500
-
501
- return {"success": True, "data": result}
502
-
503
- elif metric == "error":
504
- # 获取错误率最高的队列
505
- conn = await data_access.manager.get_connection(namespace)
506
- if not conn.pg_config:
507
- return {"success": True, "data": []}
508
-
509
- # 解析时间范围
510
- time_delta = parse_time_range(time_range) if time_range else timedelta(hours=24)
511
- start_time = datetime.now(timezone.utc) - time_delta
512
-
513
- # 构建队列筛选条件
514
- queue_condition = ""
515
- if queues:
516
- queue_filter = [q.strip() for q in queues.split(',') if q.strip()]
517
- queue_list = "', '".join(queue_filter)
518
- queue_condition = f"AND queue IN ('{queue_list}')"
519
-
520
- async with conn.async_engine.begin() as pg_conn:
521
- query = f"""
522
- SELECT
523
- queue,
524
- COUNT(*) as total,
525
- COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed,
526
- ROUND(COUNT(CASE WHEN status = 'failed' THEN 1 END) * 100.0 / COUNT(*), 2) as error_rate
527
- FROM tasks
528
- WHERE namespace = :namespace
529
- AND created_at >= :start_time
530
- {queue_condition}
531
- GROUP BY queue
532
- HAVING COUNT(CASE WHEN status = 'failed' THEN 1 END) > 0
533
- ORDER BY error_rate DESC
534
- LIMIT :limit
535
- """
536
-
537
- result = await pg_conn.execute(
538
- text(query),
539
- {"namespace": namespace, "start_time": start_time, "limit": limit}
540
- )
541
-
542
- data = [
543
- {
544
- "queue": row.queue,
545
- "errorRate": row.error_rate,
546
- "failed": row.failed,
547
- "total": row.total
548
- }
549
- for row in result
550
- ]
551
-
552
- return {"success": True, "data": data}
553
-
554
- else:
555
- raise HTTPException(status_code=400, detail="无效的metric参数")
556
-
557
- except Exception as e:
558
- logger.error(f"获取Top队列失败: {e}")
559
- traceback.print_exc()
560
- raise HTTPException(status_code=500, detail=str(e))
561
-
562
- @router.get("/data/queue-backlog-trend/{namespace}")
563
- async def get_queue_backlog_trend(
564
- namespace: str,
565
- time_range: str = "1h",
566
- queues: Optional[str] = Query(None, description="逗号分隔的队列名称列表")
567
- ):
568
- """
569
- 获取队列积压趋势数据
570
- """
571
- try:
572
- data_access = get_namespace_data_access()
573
- conn = await data_access.manager.get_connection(namespace)
574
-
575
- if not conn.pg_config:
576
- return {"success": True, "data": []}
577
-
578
- # 解析时间范围
579
- time_delta = parse_time_range(time_range)
580
- start_time = datetime.now(timezone.utc) - time_delta
581
-
582
- # 解析队列筛选
583
- queue_filter = []
584
- if queues:
585
- queue_filter = [q.strip() for q in queues.split(',') if q.strip()]
586
-
587
- async with conn.async_engine.begin() as pg_conn:
588
- # 构建队列筛选条件
589
- queue_condition = ""
590
- if queue_filter:
591
- queue_list = "', '".join(queue_filter)
592
- queue_condition = f"AND queue IN ('{queue_list}')"
593
-
594
- # 根据时间范围决定数据粒度
595
- if time_delta <= timedelta(hours=1):
596
- granularity = "minute"
597
- elif time_delta <= timedelta(hours=6):
598
- granularity = "5 minutes"
599
- elif time_delta <= timedelta(days=1):
600
- granularity = "hour"
601
- else:
602
- granularity = "day"
603
-
604
- query = f"""
605
- SELECT
606
- date_trunc('{granularity}', created_at) as time_bucket,
607
- COUNT(CASE WHEN status = 'pending' THEN 1 END) as pending_count
608
- FROM tasks
609
- WHERE namespace = :namespace
610
- AND created_at >= :start_time
611
- {queue_condition}
612
- GROUP BY date_trunc('{granularity}', created_at)
613
- ORDER BY time_bucket
614
- """
615
-
616
- result = await pg_conn.execute(
617
- text(query),
618
- {"namespace": namespace, "start_time": start_time}
619
- )
620
-
621
- data = [
622
- {
623
- "time": row.time_bucket.isoformat(),
624
- "value": row.pending_count,
625
- "metric": "排队任务数"
626
- }
627
- for row in result
628
- ]
629
-
630
- return {"success": True, "data": data}
631
-
632
- except Exception as e:
633
- logger.error(f"获取队列积压趋势失败: {e}")
634
- traceback.print_exc()
635
- raise HTTPException(status_code=500, detail=str(e))
636
-
637
- # ==================== 队列管理接口 ====================
638
-
639
- @router.get("/queues/{namespace}")
640
- async def get_queues(namespace: str):
641
- """获取指定命名空间的队列列表"""
642
- try:
643
- namespace_access = get_namespace_data_access()
644
- queues_data = await namespace_access.get_queue_stats(namespace)
645
- return {
646
- "success": True,
647
- "data": list(set([get_base_queue_name(q['queue_name']) for q in queues_data]))
648
- }
649
- except Exception as e:
650
- logger.error(f"获取队列列表失败: {e}")
651
- traceback.print_exc()
652
- raise HTTPException(status_code=500, detail=str(e))
653
-
654
- @router.post("/data/queue-details/{namespace}")
655
- async def get_queue_details(
656
- namespace: str,
657
- params: Dict[str, Any]
658
- ):
659
- """获取队列详细信息"""
660
- try:
661
- data_access = get_namespace_data_access()
662
-
663
- # 获取分页参数
664
- page = params.get('page', 1)
665
- page_size = params.get('pageSize', 10)
666
-
667
- # 获取队列统计数据
668
- queues_data = await data_access.get_queue_stats(namespace)
669
-
670
- # 处理数据
671
- processed_data = []
672
- for queue in queues_data:
673
- base_name = get_base_queue_name(queue['queue_name'])
674
- existing = next((q for q in processed_data if q['queue_name'] == base_name), None)
675
-
676
- if existing:
677
- existing['pending'] += queue.get('pending', 0)
678
- existing['running'] += queue.get('running', 0)
679
- existing['completed'] += queue.get('completed', 0)
680
- existing['failed'] += queue.get('failed', 0)
681
- else:
682
- processed_data.append({
683
- 'queue_name': base_name,
684
- 'pending': queue.get('pending', 0),
685
- 'running': queue.get('running', 0),
686
- 'completed': queue.get('completed', 0),
687
- 'failed': queue.get('failed', 0),
688
- 'total': queue.get('pending', 0) + queue.get('running', 0) +
689
- queue.get('completed', 0) + queue.get('failed', 0)
690
- })
691
-
692
- # 分页
693
- start = (page - 1) * page_size
694
- end = start + page_size
695
- paginated_data = processed_data[start:end]
696
-
697
- return {
698
- "success": True,
699
- "data": paginated_data,
700
- "total": len(processed_data),
701
- "page": page,
702
- "pageSize": page_size
703
- }
704
-
705
- except Exception as e:
706
- logger.error(f"获取队列详情失败: {e}")
707
- traceback.print_exc()
708
- raise HTTPException(status_code=500, detail=str(e))
709
-
710
- @router.delete("/queue/{queue_name}")
711
- async def delete_queue(queue_name: str):
712
- """删除队列"""
713
- try:
714
- namespace_access = get_namespace_data_access()
715
- # 这里需要实现删除队列的逻辑
716
- # 暂时返回成功
717
- return {"success": True, "message": f"队列 {queue_name} 已删除"}
718
- except Exception as e:
719
- logger.error(f"删除队列失败: {e}")
720
- traceback.print_exc()
721
- raise HTTPException(status_code=500, detail=str(e))
722
-
723
- @router.post("/queue/{queue_name}/trim")
724
- async def trim_queue(queue_name: str, params: Dict[str, Any]):
725
- """清理队列"""
726
- try:
727
- keep_count = params.get('keep_count', 0)
728
- # 这里需要实现清理队列的逻辑
729
- # 暂时返回成功
730
- return {"success": True, "message": f"队列 {queue_name} 已清理,保留 {keep_count} 条"}
731
- except Exception as e:
732
- logger.error(f"清理队列失败: {e}")
733
- traceback.print_exc()
734
- raise HTTPException(status_code=500, detail=str(e))
735
-
736
- @router.post("/data/queue-timeline/{namespace}")
737
- async def get_queue_timeline(
738
- namespace: str,
739
- params: TimeRangeQuery
740
- ):
741
- """获取队列时间线数据"""
742
- try:
743
- data_access = get_namespace_data_access()
744
- conn = await data_access.manager.get_connection(namespace)
745
-
746
- if not conn.pg_config:
747
- return {"data": [], "stats": {}}
748
-
749
- # 实现获取队列时间线数据的逻辑
750
- return {
751
- "data": [],
752
- "stats": {
753
- "total_messages": 0,
754
- "avg_processing_time": 0,
755
- "max_processing_time": 0
756
- }
757
- }
758
-
759
- except Exception as e:
760
- logger.error(f"获取队列时间线失败: {e}")
761
- traceback.print_exc()
762
- raise HTTPException(status_code=500, detail=str(e))
763
-
764
- @router.post("/data/queue-flow-rates/{namespace}")
765
- async def get_queue_flow_rates(
766
- namespace: str,
767
- params: TimeRangeQuery
768
- ):
769
- """获取队列流量速率"""
770
- try:
771
- data_access = get_namespace_data_access()
772
- conn = await data_access.manager.get_connection(namespace)
773
-
774
- if not conn.pg_config:
775
- return {"data": []}
776
-
777
- # 实现获取队列流量速率的逻辑
778
- return {"data": []}
779
-
780
- except Exception as e:
781
- logger.error(f"获取队列流量速率失败: {e}")
782
- traceback.print_exc()
783
- raise HTTPException(status_code=500, detail=str(e))
784
-
785
- # ==================== 任务管理接口 ====================
786
-
787
- @router.post("/data/tasks/{namespace}")
788
- async def get_tasks(
789
- namespace: str,
790
- params: Dict[str, Any]
791
- ):
792
- """获取任务列表"""
793
- try:
794
- data_access = get_namespace_data_access()
795
- conn = await data_access.manager.get_connection(namespace)
796
-
797
- if not conn.pg_config:
798
- return {"data": [], "total": 0}
799
-
800
- # 获取分页参数
801
- page = params.get('page', 1)
802
- page_size = params.get('pageSize', 20)
803
- queue_name = params.get('queue_name')
804
- status = params.get('status')
805
-
806
- offset = (page - 1) * page_size
807
-
808
- async with conn.async_engine.begin() as pg_conn:
809
- # 构建查询条件
810
- conditions = ["namespace = :namespace"]
811
- query_params = {"namespace": namespace}
812
-
813
- if queue_name:
814
- conditions.append("queue = :queue")
815
- query_params["queue"] = queue_name
816
-
817
- if status:
818
- conditions.append("status = :status")
819
- query_params["status"] = status
820
-
821
- where_clause = " AND ".join(conditions)
822
-
823
- # 获取总数
824
- count_query = f"SELECT COUNT(*) as total FROM tasks WHERE {where_clause}"
825
- count_result = await pg_conn.execute(text(count_query), query_params)
826
- total = count_result.fetchone().total
827
-
828
- # 获取任务列表
829
- query = f"""
830
- SELECT
831
- task_id,
832
- queue,
833
- status,
834
- created_at,
835
- started_at,
836
- completed_at,
837
- error_message
838
- FROM tasks
839
- WHERE {where_clause}
840
- ORDER BY created_at DESC
841
- LIMIT :limit OFFSET :offset
842
- """
843
-
844
- query_params["limit"] = page_size
845
- query_params["offset"] = offset
846
-
847
- result = await pg_conn.execute(text(query), query_params)
848
-
849
- tasks = []
850
- for row in result:
851
- tasks.append({
852
- "task_id": row.task_id,
853
- "queue": row.queue,
854
- "status": row.status,
855
- "created_at": row.created_at.isoformat() if row.created_at else None,
856
- "started_at": row.started_at.isoformat() if row.started_at else None,
857
- "completed_at": row.completed_at.isoformat() if row.completed_at else None,
858
- "error_message": row.error_message
859
- })
860
-
861
- return {
862
- "data": tasks,
863
- "total": total,
864
- "page": page,
865
- "pageSize": page_size
866
- }
867
-
868
- except Exception as e:
869
- logger.error(f"获取任务列表失败: {e}")
870
- traceback.print_exc()
871
- raise HTTPException(status_code=500, detail=str(e))
872
-
873
- @router.get("/task/{task_id}/details")
874
- async def get_task_details(
875
- task_id: str,
876
- consumer_group: Optional[str] = Query(None)
877
- ):
878
- """获取任务详情"""
879
- try:
880
- # 实现获取任务详情的逻辑
881
- return {
882
- "task_id": task_id,
883
- "status": "completed",
884
- "created_at": datetime.now().isoformat(),
885
- "consumer_group": consumer_group
886
- }
887
- except Exception as e:
888
- logger.error(f"获取任务详情失败: {e}")
889
- traceback.print_exc()
890
- raise HTTPException(status_code=500, detail=str(e))
891
-
892
- # ==================== 定时任务接口 ====================
893
-
894
- @router.get("/data/scheduled-tasks/{namespace}")
895
- async def get_scheduled_tasks(
896
- namespace: str,
897
- limit: int = Query(20),
898
- offset: int = Query(0)
899
- ):
900
- """获取定时任务列表"""
901
- try:
902
- data_access = get_namespace_data_access()
903
- conn = await data_access.manager.get_connection(namespace)
904
-
905
- if not conn.pg_config:
906
- return {"data": [], "total": 0}
907
-
908
- async with conn.async_engine.begin() as pg_conn:
909
- # 获取总数
910
- count_query = """
911
- SELECT COUNT(*) as total
912
- FROM scheduled_tasks
913
- WHERE namespace = :namespace
914
- """
915
- count_result = await pg_conn.execute(
916
- text(count_query),
917
- {"namespace": namespace}
918
- )
919
- total = count_result.fetchone().total
920
-
921
- # 获取定时任务列表
922
- query = """
923
- SELECT
924
- id,
925
- task_name,
926
- queue_name,
927
- task_data,
928
- cron_expression,
929
- interval_seconds,
930
- enabled,
931
- last_run_at,
932
- next_run_at,
933
- created_at
934
- FROM scheduled_tasks
935
- WHERE namespace = :namespace
936
- ORDER BY created_at DESC
937
- LIMIT :limit OFFSET :offset
938
- """
939
-
940
- result = await pg_conn.execute(
941
- text(query),
942
- {"namespace": namespace, "limit": limit, "offset": offset}
943
- )
944
-
945
- tasks = []
946
- for row in result:
947
- tasks.append({
948
- "id": row.id,
949
- "task_name": row.task_name,
950
- "queue_name": row.queue_name,
951
- "task_data": row.task_data,
952
- "cron_expression": row.cron_expression,
953
- "interval_seconds": row.interval_seconds,
954
- "enabled": row.enabled,
955
- "last_run_at": row.last_run_at.isoformat() if row.last_run_at else None,
956
- "next_run_at": row.next_run_at.isoformat() if row.next_run_at else None,
957
- "created_at": row.created_at.isoformat() if row.created_at else None
958
- })
959
-
960
- return {
961
- "data": tasks,
962
- "total": total
963
- }
964
-
965
- except Exception as e:
966
- logger.error(f"获取定时任务列表失败: {e}")
967
- traceback.print_exc()
968
- raise HTTPException(status_code=500, detail=str(e))
969
-
970
- @router.get("/scheduled-tasks/statistics/{namespace}")
971
- async def get_scheduled_tasks_statistics(namespace: str):
972
- """获取定时任务统计信息"""
973
- try:
974
- data_access = get_namespace_data_access()
975
- conn = await data_access.manager.get_connection(namespace)
976
-
977
- if not conn.pg_config:
978
- return {
979
- "total": 0,
980
- "enabled": 0,
981
- "disabled": 0,
982
- "running": 0
983
- }
984
-
985
- async with conn.async_engine.begin() as pg_conn:
986
- query = """
987
- SELECT
988
- COUNT(*) as total,
989
- COUNT(CASE WHEN enabled = true THEN 1 END) as enabled,
990
- COUNT(CASE WHEN enabled = false THEN 1 END) as disabled
991
- FROM scheduled_tasks
992
- WHERE namespace = :namespace
993
- """
994
-
995
- result = await pg_conn.execute(
996
- text(query),
997
- {"namespace": namespace}
998
- )
999
-
1000
- stats = result.fetchone()
1001
-
1002
- return {
1003
- "total": stats.total,
1004
- "enabled": stats.enabled,
1005
- "disabled": stats.disabled,
1006
- "running": 0 # 需要实现运行中任务的统计
1007
- }
1008
-
1009
- except Exception as e:
1010
- logger.error(f"获取定时任务统计失败: {e}")
1011
- traceback.print_exc()
1012
- raise HTTPException(status_code=500, detail=str(e))
1013
-
1014
- @router.post("/scheduled-tasks")
1015
- async def create_scheduled_task(task: ScheduledTaskCreate):
1016
- """创建定时任务"""
1017
- try:
1018
- # 实现创建定时任务的逻辑
1019
- return {"success": True, "message": "定时任务创建成功"}
1020
- except Exception as e:
1021
- logger.error(f"创建定时任务失败: {e}")
1022
- traceback.print_exc()
1023
- raise HTTPException(status_code=500, detail=str(e))
1024
-
1025
- @router.put("/scheduled-tasks/{task_id}")
1026
- async def update_scheduled_task(task_id: str, task: ScheduledTaskUpdate):
1027
- """更新定时任务"""
1028
- try:
1029
- # 实现更新定时任务的逻辑
1030
- return {"success": True, "message": "定时任务更新成功"}
1031
- except Exception as e:
1032
- logger.error(f"更新定时任务失败: {e}")
1033
- traceback.print_exc()
1034
- raise HTTPException(status_code=500, detail=str(e))
1035
-
1036
- @router.delete("/scheduled-tasks/{task_id}")
1037
- async def delete_scheduled_task(task_id: str):
1038
- """删除定时任务"""
1039
- try:
1040
- # 实现删除定时任务的逻辑
1041
- return {"success": True, "message": "定时任务删除成功"}
1042
- except Exception as e:
1043
- logger.error(f"删除定时任务失败: {e}")
1044
- traceback.print_exc()
1045
- raise HTTPException(status_code=500, detail=str(e))
1046
-
1047
- @router.post("/scheduled-tasks/{task_id}/toggle")
1048
- async def toggle_scheduled_task(task_id: str):
1049
- """启用/禁用定时任务"""
1050
- try:
1051
- # 实现切换定时任务状态的逻辑
1052
- return {"success": True, "message": "定时任务状态已切换"}
1053
- except Exception as e:
1054
- logger.error(f"切换定时任务状态失败: {e}")
1055
- traceback.print_exc()
1056
- raise HTTPException(status_code=500, detail=str(e))
1057
-
1058
- @router.post("/scheduled-tasks/{task_id}/execute")
1059
- async def execute_scheduled_task(task_id: str):
1060
- """立即执行定时任务"""
1061
- try:
1062
- # 实现立即执行定时任务的逻辑
1063
- return {"success": True, "message": "定时任务已触发执行"}
1064
- except Exception as e:
1065
- logger.error(f"执行定时任务失败: {e}")
1066
- traceback.print_exc()
1067
- raise HTTPException(status_code=500, detail=str(e))
1068
-
1069
- # ==================== 告警规则接口 ====================
1070
-
1071
- @router.get("/alert-rules")
1072
- async def get_alert_rules():
1073
- """获取告警规则列表"""
1074
- try:
1075
- # 实现获取告警规则的逻辑
1076
- return {"data": []}
1077
- except Exception as e:
1078
- logger.error(f"获取告警规则失败: {e}")
1079
- traceback.print_exc()
1080
- raise HTTPException(status_code=500, detail=str(e))
1081
-
1082
- @router.post("/alert-rules")
1083
- async def create_alert_rule(rule: AlertRuleCreate):
1084
- """创建告警规则"""
1085
- try:
1086
- # 实现创建告警规则的逻辑
1087
- return {"success": True, "message": "告警规则创建成功"}
1088
- except Exception as e:
1089
- logger.error(f"创建告警规则失败: {e}")
1090
- traceback.print_exc()
1091
- raise HTTPException(status_code=500, detail=str(e))
1092
-
1093
- @router.put("/alert-rules/{rule_id}")
1094
- async def update_alert_rule(rule_id: str, rule: AlertRuleCreate):
1095
- """更新告警规则"""
1096
- try:
1097
- # 实现更新告警规则的逻辑
1098
- return {"success": True, "message": "告警规则更新成功"}
1099
- except Exception as e:
1100
- logger.error(f"更新告警规则失败: {e}")
1101
- traceback.print_exc()
1102
- raise HTTPException(status_code=500, detail=str(e))
1103
-
1104
- @router.delete("/alert-rules/{rule_id}")
1105
- async def delete_alert_rule(rule_id: str):
1106
- """删除告警规则"""
1107
- try:
1108
- # 实现删除告警规则的逻辑
1109
- return {"success": True, "message": "告警规则删除成功"}
1110
- except Exception as e:
1111
- logger.error(f"删除告警规则失败: {e}")
1112
- traceback.print_exc()
1113
- raise HTTPException(status_code=500, detail=str(e))
1114
-
1115
- @router.put("/alert-rules/{rule_id}/toggle")
1116
- async def toggle_alert_rule(rule_id: str):
1117
- """启用/禁用告警规则"""
1118
- try:
1119
- # 实现切换告警规则状态的逻辑
1120
- return {"success": True, "message": "告警规则状态已切换"}
1121
- except Exception as e:
1122
- logger.error(f"切换告警规则状态失败: {e}")
1123
- traceback.print_exc()
1124
- raise HTTPException(status_code=500, detail=str(e))
1125
-
1126
- @router.get("/alert-rules/{rule_id}/history")
1127
- async def get_alert_history(rule_id: str):
1128
- """获取告警历史"""
1129
- try:
1130
- # 实现获取告警历史的逻辑
1131
- return {"data": []}
1132
- except Exception as e:
1133
- logger.error(f"获取告警历史失败: {e}")
1134
- traceback.print_exc()
1135
- raise HTTPException(status_code=500, detail=str(e))
1136
-
1137
- @router.post("/alert-rules/{rule_id}/test")
1138
- async def test_alert_rule(rule_id: str):
1139
- """测试告警规则"""
1140
- try:
1141
- # 实现测试告警规则的逻辑
1142
- return {"success": True, "message": "告警规则测试成功"}
1143
- except Exception as e:
1144
- logger.error(f"测试告警规则失败: {e}")
1145
- traceback.print_exc()
1146
- raise HTTPException(status_code=500, detail=str(e))
1147
-
1148
- # ==================== 命名空间管理接口 ====================
1149
-
1150
- @router.get("/namespaces")
1151
- async def get_namespaces():
1152
- """获取命名空间列表"""
1153
- try:
1154
- # 从配置中获取数据库连接
1155
- if not task_center_config.pg_url:
1156
- return []
1157
-
1158
- engine = create_async_engine(task_center_config.pg_url)
1159
- AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1160
-
1161
- async with AsyncSessionLocal() as session:
1162
- query = text("""
1163
- SELECT id, name, description, redis_config, pg_config, created_at, updated_at
1164
- FROM namespaces
1165
- ORDER BY name
1166
- """)
1167
- result = await session.execute(query)
1168
- namespaces = []
1169
- for row in result:
1170
- # 解析配置
1171
- redis_config = row.redis_config if row.redis_config else {}
1172
- pg_config = row.pg_config if row.pg_config else {}
1173
-
1174
- namespaces.append({
1175
- "id": row.id,
1176
- "name": row.name,
1177
- "redis_url": redis_config.get("url", ""),
1178
- "pg_url": pg_config.get("url", ""),
1179
- "description": row.description or "",
1180
- "created_at": row.created_at.isoformat() if row.created_at else None,
1181
- "updated_at": row.updated_at.isoformat() if row.updated_at else None
1182
- })
1183
-
1184
- return namespaces
1185
-
1186
- except Exception as e:
1187
- logger.error(f"获取命名空间列表失败: {e}")
1188
- traceback.print_exc()
1189
- raise HTTPException(status_code=500, detail=str(e))
1190
-
1191
- @router.get("/data/namespaces")
1192
- async def get_data_namespaces():
1193
- """获取命名空间列表(数据API版本)"""
1194
- return await get_namespaces()
1195
-
1196
- @router.post("/namespaces")
1197
- async def create_namespace(namespace: NamespaceCreate):
1198
- """创建命名空间"""
1199
- try:
1200
- if not task_center_config.pg_url:
1201
- raise HTTPException(status_code=500, detail="数据库未配置")
1202
-
1203
- engine = create_async_engine(task_center_config.pg_url)
1204
- AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1205
-
1206
- async with AsyncSessionLocal() as session:
1207
- # 检查命名空间是否已存在
1208
- check_query = text("SELECT id FROM namespaces WHERE name = :name")
1209
- existing = await session.execute(check_query, {"name": namespace.name})
1210
- if existing.fetchone():
1211
- raise HTTPException(status_code=400, detail="命名空间已存在")
1212
-
1213
- # 创建命名空间
1214
- redis_config = {"url": namespace.redis_url} if namespace.redis_url else {}
1215
- pg_config = {"url": namespace.pg_url} if namespace.pg_url else {}
1216
-
1217
- insert_query = text("""
1218
- INSERT INTO namespaces (name, description, redis_config, pg_config, created_at, updated_at)
1219
- VALUES (:name, :description, :redis_config, :pg_config, NOW(), NOW())
1220
- RETURNING id
1221
- """)
1222
-
1223
- result = await session.execute(
1224
- insert_query,
1225
- {
1226
- "name": namespace.name,
1227
- "description": namespace.description,
1228
- "redis_config": json.dumps(redis_config),
1229
- "pg_config": json.dumps(pg_config)
1230
- }
1231
- )
1232
- await session.commit()
1233
-
1234
- new_id = result.fetchone().id
1235
-
1236
- return {
1237
- "success": True,
1238
- "message": "命名空间创建成功",
1239
- "id": new_id
1240
- }
1241
-
1242
- except HTTPException:
1243
- raise
1244
- except Exception as e:
1245
- logger.error(f"创建命名空间失败: {e}")
1246
- traceback.print_exc()
1247
- raise HTTPException(status_code=500, detail=str(e))
1248
-
1249
- @router.put("/namespaces/{name}")
1250
- async def update_namespace(name: str, namespace: NamespaceUpdate):
1251
- """更新命名空间"""
1252
- try:
1253
- if not task_center_config.pg_url:
1254
- raise HTTPException(status_code=500, detail="数据库未配置")
1255
-
1256
- engine = create_async_engine(task_center_config.pg_url)
1257
- AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1258
-
1259
- async with AsyncSessionLocal() as session:
1260
- # 获取现有命名空间
1261
- query = text("SELECT redis_config, pg_config FROM namespaces WHERE name = :name")
1262
- result = await session.execute(query, {"name": name})
1263
- row = result.fetchone()
1264
-
1265
- if not row:
1266
- raise HTTPException(status_code=404, detail="命名空间不存在")
1267
-
1268
- # 解析现有配置
1269
- redis_config = row.redis_config if row.redis_config else {}
1270
- pg_config = row.pg_config if row.pg_config else {}
1271
-
1272
- # 更新配置
1273
- if namespace.redis_url is not None:
1274
- redis_config["url"] = namespace.redis_url
1275
- if namespace.pg_url is not None:
1276
- pg_config["url"] = namespace.pg_url
1277
-
1278
- # 更新数据库
1279
- update_query = text("""
1280
- UPDATE namespaces
1281
- SET redis_config = :redis_config,
1282
- pg_config = :pg_config,
1283
- description = :description,
1284
- updated_at = NOW()
1285
- WHERE name = :name
1286
- """)
1287
-
1288
- await session.execute(
1289
- update_query,
1290
- {
1291
- "name": name,
1292
- "redis_config": json.dumps(redis_config),
1293
- "pg_config": json.dumps(pg_config),
1294
- "description": namespace.description
1295
- }
1296
- )
1297
- await session.commit()
1298
-
1299
- return {"success": True, "message": "命名空间更新成功"}
1300
-
1301
- except HTTPException:
1302
- raise
1303
- except Exception as e:
1304
- logger.error(f"更新命名空间失败: {e}")
1305
- traceback.print_exc()
1306
- raise HTTPException(status_code=500, detail=str(e))
1307
-
1308
- @router.delete("/namespaces/{name}")
1309
- async def delete_namespace(name: str):
1310
- """删除命名空间"""
1311
- try:
1312
- if not task_center_config.pg_url:
1313
- raise HTTPException(status_code=500, detail="数据库未配置")
1314
-
1315
- engine = create_async_engine(task_center_config.pg_url)
1316
- AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1317
-
1318
- async with AsyncSessionLocal() as session:
1319
- # 检查命名空间是否存在
1320
- check_query = text("SELECT id FROM namespaces WHERE name = :name")
1321
- result = await session.execute(check_query, {"name": name})
1322
- if not result.fetchone():
1323
- raise HTTPException(status_code=404, detail="命名空间不存在")
1324
-
1325
- # 删除命名空间
1326
- delete_query = text("DELETE FROM namespaces WHERE name = :name")
1327
- await session.execute(delete_query, {"name": name})
1328
- await session.commit()
1329
-
1330
- return {"success": True, "message": "命名空间删除成功"}
1331
-
1332
- except HTTPException:
1333
- raise
1334
- except Exception as e:
1335
- logger.error(f"删除命名空间失败: {e}")
1336
- traceback.print_exc()
1337
- raise HTTPException(status_code=500, detail=str(e))
1338
-
1339
- @router.get("/namespaces/{name}")
1340
- async def get_namespace_details(name: str):
1341
- """获取命名空间详情"""
1342
- try:
1343
- if not task_center_config.pg_url:
1344
- raise HTTPException(status_code=500, detail="数据库未配置")
1345
-
1346
- engine = create_async_engine(task_center_config.pg_url)
1347
- AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1348
-
1349
- async with AsyncSessionLocal() as session:
1350
- query = text("""
1351
- SELECT id, name, description, redis_config, pg_config, created_at, updated_at
1352
- FROM namespaces
1353
- WHERE name = :name
1354
- """)
1355
- result = await session.execute(query, {"name": name})
1356
- row = result.fetchone()
1357
-
1358
- if not row:
1359
- raise HTTPException(status_code=404, detail="命名空间不存在")
1360
-
1361
- # 解析配置
1362
- redis_config = row.redis_config if row.redis_config else {}
1363
- pg_config = row.pg_config if row.pg_config else {}
1364
-
1365
- return {
1366
- "id": row.id,
1367
- "name": row.name,
1368
- "redis_url": redis_config.get("url", ""),
1369
- "pg_url": pg_config.get("url", ""),
1370
- "description": row.description or "",
1371
- "created_at": row.created_at.isoformat() if row.created_at else None,
1372
- "updated_at": row.updated_at.isoformat() if row.updated_at else None
1373
- }
1374
-
1375
- except HTTPException:
1376
- raise
1377
- except Exception as e:
1378
- logger.error(f"获取命名空间详情失败: {e}")
1379
- traceback.print_exc()
1380
- raise HTTPException(status_code=500, detail=str(e))
1381
-
1382
- # ==================== Redis监控接口 ====================
1383
-
1384
- @router.get("/redis/monitor/{namespace}")
1385
- async def get_redis_monitor(namespace: str):
1386
- """获取Redis监控信息"""
1387
- try:
1388
- data_access = get_namespace_data_access()
1389
- conn = await data_access.manager.get_connection(namespace)
1390
-
1391
- # 获取Redis客户端
1392
- try:
1393
- redis_client = await conn.get_redis_client()
1394
- except ValueError as e:
1395
- raise HTTPException(status_code=404, detail=str(e))
1396
-
1397
- # 获取Redis信息
1398
- info = await redis_client.info()
1399
-
1400
- # 处理内存信息
1401
- memory_info = {
1402
- "used_memory": info.get("used_memory", 0),
1403
- "used_memory_human": info.get("used_memory_human", "0B"),
1404
- "usage_percentage": None,
1405
- "maxmemory": info.get("maxmemory", 0),
1406
- "maxmemory_human": "0B",
1407
- "mem_fragmentation_ratio": info.get("mem_fragmentation_ratio", 1.0)
1408
- }
1409
-
1410
- # 计算内存使用百分比
1411
- if info.get("maxmemory") and info.get("maxmemory") > 0:
1412
- memory_info["usage_percentage"] = round(
1413
- (info.get("used_memory", 0) / info.get("maxmemory")) * 100, 2
1414
- )
1415
- memory_info["maxmemory_human"] = f"{info.get('maxmemory') / (1024*1024):.1f}MB"
1416
-
1417
- # 处理客户端信息
1418
- clients_info = {
1419
- "connected_clients": info.get("connected_clients", 0),
1420
- "blocked_clients": info.get("blocked_clients", 0)
1421
- }
1422
-
1423
- # 处理统计信息
1424
- stats_info = {
1425
- "instantaneous_ops_per_sec": info.get("instantaneous_ops_per_sec", 0),
1426
- "hit_rate": 0,
1427
- "keyspace_hits": info.get("keyspace_hits", 0),
1428
- "keyspace_misses": info.get("keyspace_misses", 0)
1429
- }
1430
-
1431
- # 计算命中率
1432
- total_hits = stats_info["keyspace_hits"] + stats_info["keyspace_misses"]
1433
- if total_hits > 0:
1434
- stats_info["hit_rate"] = round(
1435
- (stats_info["keyspace_hits"] / total_hits) * 100, 2
1436
- )
1437
-
1438
- # 处理键空间信息
1439
- keyspace_info = {
1440
- "total_keys": 0
1441
- }
1442
-
1443
- # 统计所有数据库的键数量
1444
- for key in info:
1445
- if key.startswith("db"):
1446
- db_info = info[key]
1447
- if isinstance(db_info, dict):
1448
- keyspace_info["total_keys"] += db_info.get("keys", 0)
1449
-
1450
- # 处理服务器信息
1451
- server_info = {
1452
- "redis_version": info.get("redis_version", "unknown"),
1453
- "uptime_in_seconds": info.get("uptime_in_seconds", 0)
1454
- }
1455
-
1456
- return {
1457
- "success": True,
1458
- "data": {
1459
- "status": "healthy",
1460
- "memory": memory_info,
1461
- "clients": clients_info,
1462
- "stats": stats_info,
1463
- "keyspace": keyspace_info,
1464
- "server": server_info
1465
- }
1466
- }
1467
-
1468
- except HTTPException:
1469
- raise
1470
- except Exception as e:
1471
- logger.error(f"获取Redis监控信息失败: {e}")
1472
- traceback.print_exc()
1473
- raise HTTPException(status_code=500, detail=str(e))
1474
-
1475
- # ==================== 流积压监控接口 ====================
1476
-
1477
- @router.get("/stream-backlog/{namespace}")
1478
- async def get_stream_backlog(
1479
- namespace: str,
1480
- time_range: str = Query("1h", description="时间范围"),
1481
- queue: Optional[str] = Query(None, description="队列名称")
1482
- ):
1483
- """获取流积压数据"""
1484
- try:
1485
- data_access = get_namespace_data_access()
1486
- conn = await data_access.manager.get_connection(namespace)
1487
-
1488
- if not conn.pg_config:
1489
- return {"data": []}
1490
-
1491
- # 解析时间范围
1492
- time_delta = parse_time_range(time_range)
1493
- start_time = datetime.now(timezone.utc) - time_delta
1494
-
1495
- async with conn.async_engine.begin() as pg_conn:
1496
- # 构建查询条件
1497
- conditions = ["namespace = :namespace", "created_at >= :start_time"]
1498
- params = {"namespace": namespace, "start_time": start_time}
1499
-
1500
- if queue:
1501
- conditions.append("stream_name = :queue")
1502
- params["queue"] = queue
1503
-
1504
- where_clause = " AND ".join(conditions)
1505
-
1506
- query = f"""
1507
- SELECT
1508
- stream_name,
1509
- consumer_group,
1510
- consumer_lag,
1511
- last_published_offset,
1512
- last_acknowledged_offset,
1513
- created_at
1514
- FROM stream_backlog_monitor
1515
- WHERE {where_clause}
1516
- ORDER BY created_at DESC
1517
- LIMIT 1000
1518
- """
1519
-
1520
- result = await pg_conn.execute(text(query), params)
1521
-
1522
- data = []
1523
- for row in result:
1524
- data.append({
1525
- "stream_name": row.stream_name,
1526
- "consumer_group": row.consumer_group,
1527
- "consumer_lag": row.consumer_lag,
1528
- "last_published_offset": row.last_published_offset,
1529
- "last_acknowledged_offset": row.last_acknowledged_offset,
1530
- "created_at": row.created_at.isoformat()
1531
- })
1532
-
1533
- return {"data": data}
1534
-
1535
- except Exception as e:
1536
- logger.error(f"获取流积压数据失败: {e}")
1537
- traceback.print_exc()
1538
- raise HTTPException(status_code=500, detail=str(e))
1539
-
1540
- # 导出路由器
1541
- __all__ = ["router"]