jettask 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. jettask/constants.py +213 -0
  2. jettask/core/app.py +525 -205
  3. jettask/core/cli.py +193 -185
  4. jettask/core/consumer_manager.py +126 -34
  5. jettask/core/context.py +3 -0
  6. jettask/core/enums.py +137 -0
  7. jettask/core/event_pool.py +501 -168
  8. jettask/core/message.py +147 -0
  9. jettask/core/offline_worker_recovery.py +181 -114
  10. jettask/core/task.py +10 -174
  11. jettask/core/task_batch.py +153 -0
  12. jettask/core/unified_manager_base.py +243 -0
  13. jettask/core/worker_scanner.py +54 -54
  14. jettask/executors/asyncio.py +184 -64
  15. jettask/webui/backend/config.py +51 -0
  16. jettask/webui/backend/data_access.py +2083 -92
  17. jettask/webui/backend/data_api.py +3294 -0
  18. jettask/webui/backend/dependencies.py +261 -0
  19. jettask/webui/backend/init_meta_db.py +158 -0
  20. jettask/webui/backend/main.py +1358 -69
  21. jettask/webui/backend/main_unified.py +78 -0
  22. jettask/webui/backend/main_v2.py +394 -0
  23. jettask/webui/backend/namespace_api.py +295 -0
  24. jettask/webui/backend/namespace_api_old.py +294 -0
  25. jettask/webui/backend/namespace_data_access.py +611 -0
  26. jettask/webui/backend/queue_backlog_api.py +727 -0
  27. jettask/webui/backend/queue_stats_v2.py +521 -0
  28. jettask/webui/backend/redis_monitor_api.py +476 -0
  29. jettask/webui/backend/unified_api_router.py +1601 -0
  30. jettask/webui/db_init.py +204 -32
  31. jettask/webui/frontend/package-lock.json +492 -1
  32. jettask/webui/frontend/package.json +4 -1
  33. jettask/webui/frontend/src/App.css +105 -7
  34. jettask/webui/frontend/src/App.jsx +49 -20
  35. jettask/webui/frontend/src/components/NamespaceSelector.jsx +166 -0
  36. jettask/webui/frontend/src/components/QueueBacklogChart.jsx +298 -0
  37. jettask/webui/frontend/src/components/QueueBacklogTrend.jsx +638 -0
  38. jettask/webui/frontend/src/components/QueueDetailsTable.css +65 -0
  39. jettask/webui/frontend/src/components/QueueDetailsTable.jsx +487 -0
  40. jettask/webui/frontend/src/components/QueueDetailsTableV2.jsx +465 -0
  41. jettask/webui/frontend/src/components/ScheduledTaskFilter.jsx +423 -0
  42. jettask/webui/frontend/src/components/TaskFilter.jsx +425 -0
  43. jettask/webui/frontend/src/components/TimeRangeSelector.css +21 -0
  44. jettask/webui/frontend/src/components/TimeRangeSelector.jsx +160 -0
  45. jettask/webui/frontend/src/components/layout/AppLayout.css +95 -0
  46. jettask/webui/frontend/src/components/layout/AppLayout.jsx +49 -0
  47. jettask/webui/frontend/src/components/layout/Header.css +34 -10
  48. jettask/webui/frontend/src/components/layout/Header.jsx +31 -23
  49. jettask/webui/frontend/src/components/layout/SideMenu.css +137 -0
  50. jettask/webui/frontend/src/components/layout/SideMenu.jsx +209 -0
  51. jettask/webui/frontend/src/components/layout/TabsNav.css +244 -0
  52. jettask/webui/frontend/src/components/layout/TabsNav.jsx +206 -0
  53. jettask/webui/frontend/src/components/layout/UserInfo.css +197 -0
  54. jettask/webui/frontend/src/components/layout/UserInfo.jsx +197 -0
  55. jettask/webui/frontend/src/contexts/NamespaceContext.jsx +72 -0
  56. jettask/webui/frontend/src/contexts/TabsContext.backup.jsx +245 -0
  57. jettask/webui/frontend/src/main.jsx +1 -0
  58. jettask/webui/frontend/src/pages/Alerts.jsx +684 -0
  59. jettask/webui/frontend/src/pages/Dashboard.jsx +1330 -0
  60. jettask/webui/frontend/src/pages/QueueDetail.jsx +1109 -10
  61. jettask/webui/frontend/src/pages/QueueMonitor.jsx +236 -115
  62. jettask/webui/frontend/src/pages/Queues.jsx +5 -1
  63. jettask/webui/frontend/src/pages/ScheduledTasks.jsx +809 -0
  64. jettask/webui/frontend/src/pages/Settings.jsx +800 -0
  65. jettask/webui/frontend/src/services/api.js +7 -5
  66. jettask/webui/frontend/src/utils/suppressWarnings.js +22 -0
  67. jettask/webui/frontend/src/utils/userPreferences.js +154 -0
  68. jettask/webui/multi_namespace_consumer.py +543 -0
  69. jettask/webui/pg_consumer.py +983 -246
  70. jettask/webui/static/dist/assets/index-7129cfe1.css +1 -0
  71. jettask/webui/static/dist/assets/index-8d1935cc.js +774 -0
  72. jettask/webui/static/dist/index.html +2 -2
  73. jettask/webui/task_center.py +216 -0
  74. jettask/webui/task_center_client.py +150 -0
  75. jettask/webui/unified_consumer_manager.py +193 -0
  76. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/METADATA +1 -1
  77. jettask-0.2.4.dist-info/RECORD +134 -0
  78. jettask/webui/pg_consumer_slow.py +0 -1099
  79. jettask/webui/pg_consumer_test.py +0 -678
  80. jettask/webui/static/dist/assets/index-823408e8.css +0 -1
  81. jettask/webui/static/dist/assets/index-9968b0b8.js +0 -543
  82. jettask/webui/test_pg_consumer_recovery.py +0 -547
  83. jettask/webui/test_recovery_simple.py +0 -492
  84. jettask/webui/test_self_recovery.py +0 -467
  85. jettask-0.2.1.dist-info/RECORD +0 -91
  86. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/WHEEL +0 -0
  87. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/entry_points.txt +0 -0
  88. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/licenses/LICENSE +0 -0
  89. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,727 @@
1
+ """
2
+ 队列积压监控API接口
3
+ 从stream_backlog_monitor表读取历史积压数据
4
+ """
5
+
6
+ from fastapi import APIRouter, Query, HTTPException
7
+ from typing import Optional, List, Dict, Any
8
+ from datetime import datetime, timedelta, timezone
9
+ from pydantic import BaseModel
10
+ import asyncpg
11
+ import os
12
+ from contextlib import asynccontextmanager
13
+ import traceback
14
+
15
+ router = APIRouter(prefix="/api/queue-backlog", tags=["queue-backlog"])
16
+
17
+
18
+ class BacklogTrendRequest(BaseModel):
19
+ """积压趋势请求参数"""
20
+ namespace: str = "default"
21
+ queues: Optional[List[str]] = None # None表示所有队列
22
+ time_range: str = "1h" # 15m, 30m, 1h, 3h, 6h, 12h, 24h, 3d, 7d
23
+ start_time: Optional[datetime] = None
24
+ end_time: Optional[datetime] = None
25
+ granularity: Optional[str] = None # auto, minute, 5minute, 10minute, 30minute, hour, day
26
+ include_groups: bool = False # 是否包含消费组级别的数据
27
+
28
+
29
+ class BacklogSnapshot(BaseModel):
30
+ """积压快照"""
31
+ namespace: str
32
+ queue_name: str
33
+ consumer_group: Optional[str]
34
+ timestamp: datetime
35
+ last_published_offset: int
36
+ last_delivered_offset: int
37
+ last_acked_offset: int
38
+ pending_count: int
39
+ backlog_undelivered: int
40
+ backlog_unprocessed: int
41
+ produce_rate: Optional[float]
42
+ deliver_rate: Optional[float]
43
+ ack_rate: Optional[float]
44
+
45
+
46
+ class BacklogStatistics(BaseModel):
47
+ """积压统计信息"""
48
+ queue_name: str
49
+ current_backlog: int
50
+ max_backlog: int
51
+ avg_backlog: float
52
+ min_backlog: int
53
+ trend: str # up, down, stable
54
+ trend_percentage: float
55
+ alert_level: Optional[str] # normal, warning, critical
56
+
57
+
58
+ class BacklogTrendResponse(BaseModel):
59
+ """积压趋势响应"""
60
+ success: bool
61
+ data: List[Dict[str, Any]]
62
+ statistics: List[BacklogStatistics]
63
+ granularity: str
64
+ time_range: Dict[str, datetime]
65
+
66
+
67
+ # 数据库连接池
68
+ _db_pool = None
69
+
70
+
71
+ async def get_db_pool():
72
+ """获取数据库连接池"""
73
+ global _db_pool
74
+ if _db_pool is None:
75
+ pg_url = os.getenv('JETTASK_PG_URL', 'postgresql://jettask:123456@localhost:5432/jettask')
76
+ # 解析连接字符串
77
+ if pg_url.startswith('postgresql://'):
78
+ pg_url = pg_url.replace('postgresql://', '')
79
+ elif pg_url.startswith('postgresql+asyncpg://'):
80
+ pg_url = pg_url.replace('postgresql+asyncpg://', '')
81
+
82
+ # 分离用户名密码和主机信息
83
+ if '@' in pg_url:
84
+ auth, host_info = pg_url.split('@')
85
+ if ':' in auth:
86
+ user, password = auth.split(':')
87
+ else:
88
+ user = auth
89
+ password = None
90
+
91
+ if '/' in host_info:
92
+ host_port, database = host_info.split('/')
93
+ if ':' in host_port:
94
+ host, port = host_port.split(':')
95
+ port = int(port)
96
+ else:
97
+ host = host_port
98
+ port = 5432
99
+ else:
100
+ host = host_info
101
+ port = 5432
102
+ database = 'jettask'
103
+ else:
104
+ user = 'jettask'
105
+ password = '123456'
106
+ host = 'localhost'
107
+ port = 5432
108
+ database = 'jettask'
109
+
110
+ _db_pool = await asyncpg.create_pool(
111
+ host=host,
112
+ port=port,
113
+ user=user,
114
+ password=password,
115
+ database=database,
116
+ min_size=2,
117
+ max_size=10
118
+ )
119
+ return _db_pool
120
+
121
+
122
+ def parse_time_range(time_range: str) -> timedelta:
123
+ """解析时间范围字符串"""
124
+ time_map = {
125
+ '15m': timedelta(minutes=15),
126
+ '30m': timedelta(minutes=30),
127
+ '1h': timedelta(hours=1),
128
+ '3h': timedelta(hours=3),
129
+ '6h': timedelta(hours=6),
130
+ '12h': timedelta(hours=12),
131
+ '24h': timedelta(hours=24),
132
+ '1d': timedelta(days=1),
133
+ '3d': timedelta(days=3),
134
+ '7d': timedelta(days=7),
135
+ }
136
+ return time_map.get(time_range, timedelta(hours=1))
137
+
138
+
139
+ def determine_granularity(time_delta: timedelta, granularity: Optional[str] = None) -> tuple[str, int]:
140
+ """根据时间范围确定合适的数据粒度(参考fetch_queue_flow_rates的实现)
141
+
142
+ Returns:
143
+ (granularity_name, interval_seconds)
144
+ """
145
+ if granularity and granularity != 'auto':
146
+ # 手动指定粒度时的秒数映射
147
+ manual_map = {
148
+ 'second': ('second', 1),
149
+ 'minute': ('minute', 60),
150
+ '5minute': ('5minute', 300),
151
+ '10minute': ('10minute', 600),
152
+ '30minute': ('30minute', 1800),
153
+ 'hour': ('hour', 3600),
154
+ 'day': ('day', 86400),
155
+ }
156
+ return manual_map.get(granularity, ('minute', 60))
157
+
158
+ # 根据时间范围选择合适的粒度
159
+ # 目标:保持合理的数据点数量,避免过多或过少
160
+ duration_seconds = time_delta.total_seconds()
161
+ duration_minutes = duration_seconds / 60
162
+ duration_hours = duration_minutes / 60
163
+ duration_days = duration_hours / 24
164
+
165
+ # 根据时间范围选择粒度
166
+ if duration_minutes <= 15:
167
+ # 15分钟以内:5秒粒度(最多180个点)
168
+ return ('second', 5)
169
+ elif duration_minutes <= 30:
170
+ # 30分钟以内:10秒粒度(最多180个点)
171
+ return ('second', 10)
172
+ elif duration_hours <= 1:
173
+ # 1小时以内:30秒粒度(最多120个点)
174
+ return ('second', 30)
175
+ elif duration_hours <= 3:
176
+ # 3小时以内:1分钟粒度(最多180个点)
177
+ return ('minute', 60)
178
+ elif duration_hours <= 6:
179
+ # 6小时以内:2分钟粒度(最多180个点)
180
+ return ('minute', 120)
181
+ elif duration_hours <= 12:
182
+ # 12小时以内:5分钟粒度(最多144个点)
183
+ return ('5minute', 300)
184
+ elif duration_hours <= 24:
185
+ # 24小时以内:10分钟粒度(最多144个点)
186
+ return ('10minute', 600)
187
+ elif duration_days <= 3:
188
+ # 3天以内:30分钟粒度(最多144个点)
189
+ return ('30minute', 1800)
190
+ elif duration_days <= 7:
191
+ # 7天以内:1小时粒度(最多168个点)
192
+ return ('hour', 3600)
193
+ elif duration_days <= 30:
194
+ # 30天以内:4小时粒度(最多180个点)
195
+ return ('hour', 14400)
196
+ else:
197
+ # 超过30天:1天粒度
198
+ return ('day', 86400)
199
+
200
+
201
+ def get_time_bucket_sql(granularity: str, interval_seconds: int) -> str:
202
+ """获取时间分组的SQL表达式(改进版,支持更多粒度)"""
203
+
204
+ # 对于秒级粒度
205
+ if granularity == 'second':
206
+ if interval_seconds == 1:
207
+ return "date_trunc('second', created_at)"
208
+ else:
209
+ # 5秒、10秒、30秒等 - 修正算法
210
+ return f"date_trunc('minute', created_at) + interval '{interval_seconds} seconds' * floor(extract(second from created_at)::int / {interval_seconds})"
211
+
212
+ # 对于分钟级粒度
213
+ elif granularity == 'minute':
214
+ if interval_seconds == 60:
215
+ return "date_trunc('minute', created_at)"
216
+ elif interval_seconds == 120:
217
+ return "date_trunc('hour', created_at) + interval '2 minutes' * floor(extract(minute from created_at)::int / 2)"
218
+ else:
219
+ # 其他分钟间隔
220
+ minutes = interval_seconds // 60
221
+ return f"date_trunc('hour', created_at) + interval '{minutes} minutes' * floor(extract(minute from created_at)::int / {minutes})"
222
+
223
+ # 特定的分钟粒度
224
+ elif granularity == '5minute':
225
+ return "date_trunc('hour', created_at) + interval '5 minutes' * floor(extract(minute from created_at)::int / 5)"
226
+ elif granularity == '10minute':
227
+ return "date_trunc('hour', created_at) + interval '10 minutes' * floor(extract(minute from created_at)::int / 10)"
228
+ elif granularity == '15minute':
229
+ return "date_trunc('hour', created_at) + interval '15 minutes' * floor(extract(minute from created_at)::int / 15)"
230
+ elif granularity == '30minute':
231
+ return "date_trunc('hour', created_at) + interval '30 minutes' * floor(extract(minute from created_at)::int / 30)"
232
+
233
+ # 小时级粒度
234
+ elif granularity == 'hour':
235
+ if interval_seconds == 3600:
236
+ return "date_trunc('hour', created_at)"
237
+ else:
238
+ # 2小时、4小时、6小时、12小时等
239
+ hours = interval_seconds // 3600
240
+ return f"date_trunc('day', created_at) + interval '{hours} hours' * floor(extract(hour from created_at)::int / {hours})"
241
+
242
+ # 天级粒度
243
+ elif granularity == 'day':
244
+ return "date_trunc('day', created_at)"
245
+
246
+ # 默认值
247
+ else:
248
+ return "date_trunc('minute', created_at)"
249
+
250
+
251
+ @router.post("/trend", response_model=BacklogTrendResponse)
252
+ async def get_backlog_trend(request: BacklogTrendRequest):
253
+ """
254
+ 获取队列积压趋势数据
255
+
256
+ 支持多种时间范围和数据粒度,自动聚合数据点
257
+ """
258
+ pool = await get_db_pool()
259
+
260
+ # 确定时间范围
261
+ if request.start_time and request.end_time:
262
+ start_time = request.start_time
263
+ end_time = request.end_time
264
+ time_delta = end_time - start_time
265
+ else:
266
+ time_delta = parse_time_range(request.time_range)
267
+ end_time = datetime.now(timezone.utc)
268
+ start_time = end_time - time_delta
269
+
270
+ # 确定数据粒度和间隔
271
+ granularity_name, interval_seconds = determine_granularity(time_delta, request.granularity)
272
+ time_bucket = get_time_bucket_sql(granularity_name, interval_seconds)
273
+
274
+ # 记录日志
275
+ actual_points = int(time_delta.total_seconds() / interval_seconds) + 1
276
+ print(f"使用时间间隔: {interval_seconds}秒 ({granularity_name}), 预计生成 {actual_points} 个时间点")
277
+
278
+ async with pool.acquire() as conn:
279
+ # 构建查询条件
280
+ conditions = [
281
+ "namespace = $1",
282
+ "created_at >= $2",
283
+ "created_at <= $3"
284
+ ]
285
+ params = [request.namespace, start_time, end_time]
286
+
287
+ # 添加队列筛选
288
+ if request.queues:
289
+ placeholders = [f"${i+4}" for i in range(len(request.queues))]
290
+ conditions.append(f"stream_name IN ({','.join(placeholders)})")
291
+ params.extend(request.queues)
292
+
293
+ # 添加消费组筛选
294
+ # if not request.include_groups:
295
+ # # 不包含消费组数据时,只查询整体Stream的统计(consumer_group为NULL)
296
+ # conditions.append("consumer_group IS NULL")
297
+
298
+ where_clause = " AND ".join(conditions)
299
+
300
+ # 根据是否包含groups构建不同的查询
301
+ if request.include_groups:
302
+ # 按任务名称聚合数据,但确保每个时间点只有一条记录
303
+ # 从consumer_group中提取任务名称(最后一个点号后的部分)
304
+ query = f"""
305
+ WITH raw_data AS (
306
+ SELECT
307
+ {time_bucket} as time_bucket,
308
+ stream_name as queue_name,
309
+ consumer_group,
310
+ CASE
311
+ -- 处理 default:QUEUE:queue_name:task.subtask 格式
312
+ WHEN consumer_group LIKE 'default:QUEUE:%' THEN
313
+ SPLIT_PART(consumer_group, '.', -1) -- 取最后一个点号后的部分
314
+ -- 保留原始的消费组名称(如 consumer_group_1)
315
+ ELSE consumer_group
316
+ END as task_name,
317
+ backlog_unprocessed,
318
+ last_published_offset,
319
+ last_delivered_offset,
320
+ pending_count,
321
+ produce_rate,
322
+ deliver_rate
323
+ FROM stream_backlog_monitor
324
+ WHERE {where_clause}
325
+ AND consumer_group != 'default_pg_consumer' -- 屏蔽 default_pg_consumer
326
+ AND consumer_group IS NOT NULL -- 确保有消费组
327
+ ),
328
+ task_aggregated AS (
329
+ SELECT
330
+ time_bucket,
331
+ queue_name,
332
+ task_name,
333
+ SUM(backlog_unprocessed) as total_backlog,
334
+ MAX(backlog_unprocessed) as max_backlog,
335
+ MIN(backlog_unprocessed) as min_backlog,
336
+ MAX(last_published_offset) as max_published,
337
+ MAX(last_delivered_offset) as max_delivered,
338
+ SUM(pending_count) as total_pending,
339
+ AVG(produce_rate) as avg_produce_rate,
340
+ AVG(deliver_rate) as avg_deliver_rate,
341
+ COUNT(DISTINCT consumer_group) as sample_count
342
+ FROM raw_data
343
+ GROUP BY time_bucket, queue_name, task_name
344
+ )
345
+ SELECT
346
+ time_bucket,
347
+ queue_name,
348
+ task_name as consumer_group, -- 使用task_name作为consumer_group返回
349
+ total_backlog::int as avg_backlog,
350
+ max_backlog,
351
+ min_backlog,
352
+ max_published::int as avg_published,
353
+ max_delivered::int as avg_delivered,
354
+ total_pending::int as avg_pending,
355
+ avg_produce_rate,
356
+ avg_deliver_rate,
357
+ sample_count
358
+ FROM task_aggregated
359
+ ORDER BY time_bucket, queue_name, task_name
360
+ """
361
+ else:
362
+ # 不包含消费组,聚合所有消费组和优先级的数据
363
+ query = f"""
364
+ SELECT
365
+ {time_bucket} as time_bucket,
366
+ stream_name as queue_name,
367
+ NULL as consumer_group,
368
+ COALESCE(SUM(backlog_unprocessed), 0)::int as avg_backlog,
369
+ COALESCE(MAX(backlog_unprocessed), 0)::int as max_backlog,
370
+ COALESCE(MIN(backlog_unprocessed), 0)::int as min_backlog,
371
+ COALESCE(MAX(last_published_offset), 0)::int as avg_published,
372
+ COALESCE(MAX(last_delivered_offset), 0)::int as avg_delivered,
373
+ COALESCE(SUM(pending_count), 0)::int as avg_pending,
374
+ AVG(produce_rate) as avg_produce_rate,
375
+ AVG(deliver_rate) as avg_deliver_rate,
376
+ COUNT(DISTINCT COALESCE(priority::text, '0') || ':' || COALESCE(consumer_group, 'null')) as sample_count
377
+ FROM stream_backlog_monitor
378
+ WHERE {where_clause}
379
+ GROUP BY time_bucket, stream_name
380
+ ORDER BY time_bucket, stream_name
381
+ """
382
+
383
+ rows = await conn.fetch(query, *params)
384
+
385
+ # 查询统计信息
386
+ if request.include_groups:
387
+ # 包含消费组的统计
388
+ stats_query = f"""
389
+ WITH latest_data AS (
390
+ SELECT DISTINCT ON (stream_name, consumer_group)
391
+ stream_name,
392
+ consumer_group,
393
+ backlog_unprocessed as current_backlog,
394
+ created_at
395
+ FROM stream_backlog_monitor
396
+ WHERE {where_clause}
397
+ ORDER BY stream_name, consumer_group, created_at DESC
398
+ ),
399
+ stats AS (
400
+ SELECT
401
+ stream_name,
402
+ AVG(backlog_unprocessed) as avg_backlog,
403
+ MAX(backlog_unprocessed) as max_backlog,
404
+ MIN(backlog_unprocessed) as min_backlog
405
+ FROM stream_backlog_monitor
406
+ WHERE {where_clause}
407
+ GROUP BY stream_name
408
+ ),
409
+ trend AS (
410
+ SELECT
411
+ stream_name,
412
+ CASE
413
+ WHEN COUNT(*) >= 2 THEN
414
+ (AVG(CASE WHEN row_num <= 5 THEN backlog_unprocessed END) -
415
+ AVG(CASE WHEN row_num > count_all - 5 THEN backlog_unprocessed END))
416
+ ELSE 0
417
+ END as trend_diff
418
+ FROM (
419
+ SELECT
420
+ stream_name,
421
+ backlog_unprocessed,
422
+ ROW_NUMBER() OVER (PARTITION BY stream_name ORDER BY created_at DESC) as row_num,
423
+ COUNT(*) OVER (PARTITION BY stream_name) as count_all
424
+ FROM stream_backlog_monitor
425
+ WHERE {where_clause} AND consumer_group IS NULL
426
+ ) t
427
+ GROUP BY stream_name
428
+ )
429
+ SELECT
430
+ s.stream_name,
431
+ l.current_backlog,
432
+ s.max_backlog,
433
+ s.avg_backlog,
434
+ s.min_backlog,
435
+ COALESCE(t.trend_diff, 0) as trend_diff
436
+ FROM stats s
437
+ JOIN latest_data l ON s.stream_name = l.stream_name
438
+ LEFT JOIN trend t ON s.stream_name = t.stream_name
439
+ WHERE l.consumer_group IS NULL
440
+ """
441
+ else:
442
+ # 不包含消费组,聚合所有消费组和优先级的统计
443
+ stats_query = f"""
444
+ WITH latest_per_priority AS (
445
+ SELECT DISTINCT ON (stream_name, priority, consumer_group)
446
+ stream_name,
447
+ priority,
448
+ consumer_group,
449
+ backlog_unprocessed,
450
+ created_at
451
+ FROM stream_backlog_monitor
452
+ WHERE {where_clause}
453
+ ORDER BY stream_name, priority, consumer_group, created_at DESC
454
+ ),
455
+ latest_data AS (
456
+ SELECT
457
+ stream_name,
458
+ SUM(backlog_unprocessed) as current_backlog,
459
+ MAX(created_at) as latest_time
460
+ FROM latest_per_priority
461
+ GROUP BY stream_name
462
+ ),
463
+ stats AS (
464
+ SELECT
465
+ stream_name,
466
+ AVG(backlog_unprocessed) as avg_backlog,
467
+ MAX(backlog_unprocessed) as max_backlog,
468
+ MIN(backlog_unprocessed) as min_backlog
469
+ FROM stream_backlog_monitor
470
+ WHERE {where_clause}
471
+ GROUP BY stream_name
472
+ ),
473
+ trend AS (
474
+ SELECT
475
+ stream_name,
476
+ CASE
477
+ WHEN COUNT(DISTINCT time_bucket) >= 2 THEN
478
+ (AVG(CASE WHEN row_num <= 5 THEN total_backlog END) -
479
+ AVG(CASE WHEN row_num > count_all - 5 THEN total_backlog END))
480
+ ELSE 0
481
+ END as trend_diff
482
+ FROM (
483
+ SELECT
484
+ stream_name,
485
+ {time_bucket} as time_bucket,
486
+ SUM(backlog_unprocessed) as total_backlog,
487
+ ROW_NUMBER() OVER (PARTITION BY stream_name ORDER BY {time_bucket} DESC) as row_num,
488
+ COUNT(*) OVER (PARTITION BY stream_name) as count_all
489
+ FROM stream_backlog_monitor
490
+ WHERE {where_clause}
491
+ GROUP BY stream_name, {time_bucket}
492
+ ) t
493
+ GROUP BY stream_name
494
+ )
495
+ SELECT
496
+ s.stream_name,
497
+ l.current_backlog,
498
+ s.max_backlog,
499
+ s.avg_backlog,
500
+ s.min_backlog,
501
+ COALESCE(t.trend_diff, 0) as trend_diff
502
+ FROM stats s
503
+ JOIN latest_data l ON s.stream_name = l.stream_name
504
+ LEFT JOIN trend t ON s.stream_name = t.stream_name
505
+ """
506
+
507
+ stats_rows = await conn.fetch(stats_query, *params)
508
+
509
+ # 转换数据格式
510
+ data = []
511
+ for row in rows:
512
+ data_item = {
513
+ 'time': row['time_bucket'].isoformat(),
514
+ 'queue': row['queue_name'],
515
+ 'group': row['consumer_group'],
516
+ 'backlog': row['avg_backlog'],
517
+ 'max_backlog': row['max_backlog'],
518
+ 'min_backlog': row['min_backlog'],
519
+ 'published': row['avg_published'],
520
+ 'delivered': row['avg_delivered'],
521
+ 'pending': row['avg_pending'],
522
+ 'produce_rate': row['avg_produce_rate'],
523
+ 'deliver_rate': row['avg_deliver_rate'],
524
+ }
525
+ # 注意:现在聚合了优先级,不再返回priority字段
526
+ data.append(data_item)
527
+
528
+ # 生成统计信息
529
+ statistics = []
530
+ for stat_row in stats_rows:
531
+ # 计算趋势
532
+ trend_diff = stat_row['trend_diff'] or 0
533
+ avg_backlog = stat_row['avg_backlog'] or 1
534
+ trend_percentage = (trend_diff / avg_backlog * 100) if avg_backlog > 0 else 0
535
+
536
+ if trend_percentage > 20:
537
+ trend = 'up'
538
+ elif trend_percentage < -20:
539
+ trend = 'down'
540
+ else:
541
+ trend = 'stable'
542
+
543
+ # 确定告警级别
544
+ current = stat_row['current_backlog'] or 0
545
+ if current >= 5000:
546
+ alert_level = 'critical'
547
+ elif current >= 1000:
548
+ alert_level = 'warning'
549
+ else:
550
+ alert_level = 'normal'
551
+
552
+ statistics.append(BacklogStatistics(
553
+ queue_name=stat_row['stream_name'],
554
+ current_backlog=current,
555
+ max_backlog=stat_row['max_backlog'] or 0,
556
+ avg_backlog=float(stat_row['avg_backlog'] or 0),
557
+ min_backlog=stat_row['min_backlog'] or 0,
558
+ trend=trend,
559
+ trend_percentage=trend_percentage,
560
+ alert_level=alert_level
561
+ ))
562
+
563
+ return BacklogTrendResponse(
564
+ success=True,
565
+ data=data,
566
+ statistics=statistics,
567
+ granularity=granularity_name, # 使用granularity_name而不是granularity
568
+ time_range={
569
+ 'start': start_time,
570
+ 'end': end_time
571
+ }
572
+ )
573
+
574
+
575
+ @router.get("/current", response_model=Dict[str, Any])
576
+ async def get_current_backlog(
577
+ namespace: str = Query("default", description="命名空间"),
578
+ queue: Optional[str] = Query(None, description="队列名称"),
579
+ include_groups: bool = Query(False, description="是否包含消费组数据")
580
+ ):
581
+ """
582
+ 获取当前队列积压状态(最新数据)
583
+ """
584
+ pool = await get_db_pool()
585
+
586
+ async with pool.acquire() as conn:
587
+ # 构建查询条件
588
+ conditions = ["namespace = $1"]
589
+ params = [namespace]
590
+
591
+ if queue:
592
+ conditions.append("stream_name = $2")
593
+ params.append(queue)
594
+
595
+ if not include_groups:
596
+ conditions.append("consumer_group IS NULL")
597
+
598
+ where_clause = " AND ".join(conditions)
599
+
600
+ # 使用视图获取最新数据
601
+ query = f"""
602
+ SELECT DISTINCT ON (stream_name, consumer_group)
603
+ namespace,
604
+ stream_name,
605
+ consumer_group,
606
+ last_published_offset,
607
+ last_delivered_offset,
608
+ last_acked_offset,
609
+ pending_count,
610
+ backlog_undelivered,
611
+ backlog_unprocessed,
612
+ produce_rate,
613
+ deliver_rate,
614
+ ack_rate,
615
+ created_at
616
+ FROM stream_backlog_monitor
617
+ WHERE {where_clause}
618
+ ORDER BY stream_name, consumer_group, created_at DESC
619
+ """
620
+
621
+ rows = await conn.fetch(query, *params)
622
+
623
+ # 按队列组织数据
624
+ result = {}
625
+ for row in rows:
626
+ queue_name = row['stream_name']
627
+ if queue_name not in result:
628
+ result[queue_name] = {
629
+ 'queue_name': queue_name,
630
+ 'namespace': row['namespace'],
631
+ 'last_update': row['created_at'].isoformat(),
632
+ 'summary': None,
633
+ 'consumer_groups': []
634
+ }
635
+
636
+ data = {
637
+ 'consumer_group': row['consumer_group'],
638
+ 'last_published_offset': row['last_published_offset'],
639
+ 'last_delivered_offset': row['last_delivered_offset'],
640
+ 'last_acked_offset': row['last_acked_offset'],
641
+ 'pending_count': row['pending_count'],
642
+ 'backlog_undelivered': row['backlog_undelivered'],
643
+ 'backlog_unprocessed': row['backlog_unprocessed'],
644
+ 'produce_rate': row['produce_rate'],
645
+ 'deliver_rate': row['deliver_rate'],
646
+ 'ack_rate': row['ack_rate'],
647
+ }
648
+
649
+ if row['consumer_group'] is None:
650
+ result[queue_name]['summary'] = data
651
+ else:
652
+ result[queue_name]['consumer_groups'].append(data)
653
+
654
+ return {
655
+ 'success': True,
656
+ 'data': list(result.values())
657
+ }
658
+
659
+
660
+ @router.get("/alert-queues", response_model=Dict[str, Any])
661
+ async def get_alert_queues(
662
+ namespace: str = Query("default", description="命名空间"),
663
+ warning_threshold: int = Query(1000, description="警告阈值"),
664
+ critical_threshold: int = Query(5000, description="严重阈值")
665
+ ):
666
+ """
667
+ 获取需要告警的队列列表
668
+ """
669
+ pool = await get_db_pool()
670
+
671
+ async with pool.acquire() as conn:
672
+ query = """
673
+ SELECT DISTINCT ON (stream_name)
674
+ stream_name,
675
+ backlog_unprocessed,
676
+ produce_rate,
677
+ deliver_rate,
678
+ created_at
679
+ FROM stream_backlog_monitor
680
+ WHERE
681
+ namespace = $1
682
+ AND consumer_group IS NULL
683
+ AND backlog_unprocessed >= $2
684
+ ORDER BY stream_name, created_at DESC
685
+ """
686
+
687
+ rows = await conn.fetch(query, namespace, warning_threshold)
688
+
689
+ # 分类告警
690
+ alerts = {
691
+ 'critical': [],
692
+ 'warning': [],
693
+ }
694
+
695
+ for row in rows:
696
+ alert_data = {
697
+ 'queue_name': row['stream_name'],
698
+ 'backlog': row['backlog_unprocessed'],
699
+ 'produce_rate': row['produce_rate'],
700
+ 'deliver_rate': row['deliver_rate'],
701
+ 'last_update': row['created_at'].isoformat()
702
+ }
703
+
704
+ if row['backlog_unprocessed'] >= critical_threshold:
705
+ alerts['critical'].append(alert_data)
706
+ else:
707
+ alerts['warning'].append(alert_data)
708
+
709
+ return {
710
+ 'success': True,
711
+ 'namespace': namespace,
712
+ 'thresholds': {
713
+ 'warning': warning_threshold,
714
+ 'critical': critical_threshold
715
+ },
716
+ 'alerts': alerts,
717
+ 'total_alerts': len(alerts['critical']) + len(alerts['warning'])
718
+ }
719
+
720
+
721
+ # 清理函数
722
+ async def cleanup():
723
+ """清理资源"""
724
+ global _db_pool
725
+ if _db_pool:
726
+ await _db_pool.close()
727
+ _db_pool = None