jettask 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/constants.py +213 -0
- jettask/core/app.py +525 -205
- jettask/core/cli.py +193 -185
- jettask/core/consumer_manager.py +126 -34
- jettask/core/context.py +3 -0
- jettask/core/enums.py +137 -0
- jettask/core/event_pool.py +501 -168
- jettask/core/message.py +147 -0
- jettask/core/offline_worker_recovery.py +181 -114
- jettask/core/task.py +10 -174
- jettask/core/task_batch.py +153 -0
- jettask/core/unified_manager_base.py +243 -0
- jettask/core/worker_scanner.py +54 -54
- jettask/executors/asyncio.py +184 -64
- jettask/webui/backend/config.py +51 -0
- jettask/webui/backend/data_access.py +2083 -92
- jettask/webui/backend/data_api.py +3294 -0
- jettask/webui/backend/dependencies.py +261 -0
- jettask/webui/backend/init_meta_db.py +158 -0
- jettask/webui/backend/main.py +1358 -69
- jettask/webui/backend/main_unified.py +78 -0
- jettask/webui/backend/main_v2.py +394 -0
- jettask/webui/backend/namespace_api.py +295 -0
- jettask/webui/backend/namespace_api_old.py +294 -0
- jettask/webui/backend/namespace_data_access.py +611 -0
- jettask/webui/backend/queue_backlog_api.py +727 -0
- jettask/webui/backend/queue_stats_v2.py +521 -0
- jettask/webui/backend/redis_monitor_api.py +476 -0
- jettask/webui/backend/unified_api_router.py +1601 -0
- jettask/webui/db_init.py +204 -32
- jettask/webui/frontend/package-lock.json +492 -1
- jettask/webui/frontend/package.json +4 -1
- jettask/webui/frontend/src/App.css +105 -7
- jettask/webui/frontend/src/App.jsx +49 -20
- jettask/webui/frontend/src/components/NamespaceSelector.jsx +166 -0
- jettask/webui/frontend/src/components/QueueBacklogChart.jsx +298 -0
- jettask/webui/frontend/src/components/QueueBacklogTrend.jsx +638 -0
- jettask/webui/frontend/src/components/QueueDetailsTable.css +65 -0
- jettask/webui/frontend/src/components/QueueDetailsTable.jsx +487 -0
- jettask/webui/frontend/src/components/QueueDetailsTableV2.jsx +465 -0
- jettask/webui/frontend/src/components/ScheduledTaskFilter.jsx +423 -0
- jettask/webui/frontend/src/components/TaskFilter.jsx +425 -0
- jettask/webui/frontend/src/components/TimeRangeSelector.css +21 -0
- jettask/webui/frontend/src/components/TimeRangeSelector.jsx +160 -0
- jettask/webui/frontend/src/components/layout/AppLayout.css +95 -0
- jettask/webui/frontend/src/components/layout/AppLayout.jsx +49 -0
- jettask/webui/frontend/src/components/layout/Header.css +34 -10
- jettask/webui/frontend/src/components/layout/Header.jsx +31 -23
- jettask/webui/frontend/src/components/layout/SideMenu.css +137 -0
- jettask/webui/frontend/src/components/layout/SideMenu.jsx +209 -0
- jettask/webui/frontend/src/components/layout/TabsNav.css +244 -0
- jettask/webui/frontend/src/components/layout/TabsNav.jsx +206 -0
- jettask/webui/frontend/src/components/layout/UserInfo.css +197 -0
- jettask/webui/frontend/src/components/layout/UserInfo.jsx +197 -0
- jettask/webui/frontend/src/contexts/NamespaceContext.jsx +72 -0
- jettask/webui/frontend/src/contexts/TabsContext.backup.jsx +245 -0
- jettask/webui/frontend/src/main.jsx +1 -0
- jettask/webui/frontend/src/pages/Alerts.jsx +684 -0
- jettask/webui/frontend/src/pages/Dashboard.jsx +1330 -0
- jettask/webui/frontend/src/pages/QueueDetail.jsx +1109 -10
- jettask/webui/frontend/src/pages/QueueMonitor.jsx +236 -115
- jettask/webui/frontend/src/pages/Queues.jsx +5 -1
- jettask/webui/frontend/src/pages/ScheduledTasks.jsx +809 -0
- jettask/webui/frontend/src/pages/Settings.jsx +800 -0
- jettask/webui/frontend/src/services/api.js +7 -5
- jettask/webui/frontend/src/utils/suppressWarnings.js +22 -0
- jettask/webui/frontend/src/utils/userPreferences.js +154 -0
- jettask/webui/multi_namespace_consumer.py +543 -0
- jettask/webui/pg_consumer.py +983 -246
- jettask/webui/static/dist/assets/index-7129cfe1.css +1 -0
- jettask/webui/static/dist/assets/index-8d1935cc.js +774 -0
- jettask/webui/static/dist/index.html +2 -2
- jettask/webui/task_center.py +216 -0
- jettask/webui/task_center_client.py +150 -0
- jettask/webui/unified_consumer_manager.py +193 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/METADATA +1 -1
- jettask-0.2.4.dist-info/RECORD +134 -0
- jettask/webui/pg_consumer_slow.py +0 -1099
- jettask/webui/pg_consumer_test.py +0 -678
- jettask/webui/static/dist/assets/index-823408e8.css +0 -1
- jettask/webui/static/dist/assets/index-9968b0b8.js +0 -543
- jettask/webui/test_pg_consumer_recovery.py +0 -547
- jettask/webui/test_recovery_simple.py +0 -492
- jettask/webui/test_self_recovery.py +0 -467
- jettask-0.2.1.dist-info/RECORD +0 -91
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/WHEEL +0 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,727 @@
|
|
1
|
+
"""
|
2
|
+
队列积压监控API接口
|
3
|
+
从stream_backlog_monitor表读取历史积压数据
|
4
|
+
"""
|
5
|
+
|
6
|
+
from fastapi import APIRouter, Query, HTTPException
|
7
|
+
from typing import Optional, List, Dict, Any
|
8
|
+
from datetime import datetime, timedelta, timezone
|
9
|
+
from pydantic import BaseModel
|
10
|
+
import asyncpg
|
11
|
+
import os
|
12
|
+
from contextlib import asynccontextmanager
|
13
|
+
import traceback
|
14
|
+
|
15
|
+
router = APIRouter(prefix="/api/queue-backlog", tags=["queue-backlog"])
|
16
|
+
|
17
|
+
|
18
|
+
class BacklogTrendRequest(BaseModel):
|
19
|
+
"""积压趋势请求参数"""
|
20
|
+
namespace: str = "default"
|
21
|
+
queues: Optional[List[str]] = None # None表示所有队列
|
22
|
+
time_range: str = "1h" # 15m, 30m, 1h, 3h, 6h, 12h, 24h, 3d, 7d
|
23
|
+
start_time: Optional[datetime] = None
|
24
|
+
end_time: Optional[datetime] = None
|
25
|
+
granularity: Optional[str] = None # auto, minute, 5minute, 10minute, 30minute, hour, day
|
26
|
+
include_groups: bool = False # 是否包含消费组级别的数据
|
27
|
+
|
28
|
+
|
29
|
+
class BacklogSnapshot(BaseModel):
|
30
|
+
"""积压快照"""
|
31
|
+
namespace: str
|
32
|
+
queue_name: str
|
33
|
+
consumer_group: Optional[str]
|
34
|
+
timestamp: datetime
|
35
|
+
last_published_offset: int
|
36
|
+
last_delivered_offset: int
|
37
|
+
last_acked_offset: int
|
38
|
+
pending_count: int
|
39
|
+
backlog_undelivered: int
|
40
|
+
backlog_unprocessed: int
|
41
|
+
produce_rate: Optional[float]
|
42
|
+
deliver_rate: Optional[float]
|
43
|
+
ack_rate: Optional[float]
|
44
|
+
|
45
|
+
|
46
|
+
class BacklogStatistics(BaseModel):
|
47
|
+
"""积压统计信息"""
|
48
|
+
queue_name: str
|
49
|
+
current_backlog: int
|
50
|
+
max_backlog: int
|
51
|
+
avg_backlog: float
|
52
|
+
min_backlog: int
|
53
|
+
trend: str # up, down, stable
|
54
|
+
trend_percentage: float
|
55
|
+
alert_level: Optional[str] # normal, warning, critical
|
56
|
+
|
57
|
+
|
58
|
+
class BacklogTrendResponse(BaseModel):
|
59
|
+
"""积压趋势响应"""
|
60
|
+
success: bool
|
61
|
+
data: List[Dict[str, Any]]
|
62
|
+
statistics: List[BacklogStatistics]
|
63
|
+
granularity: str
|
64
|
+
time_range: Dict[str, datetime]
|
65
|
+
|
66
|
+
|
67
|
+
# 数据库连接池
|
68
|
+
_db_pool = None
|
69
|
+
|
70
|
+
|
71
|
+
async def get_db_pool():
|
72
|
+
"""获取数据库连接池"""
|
73
|
+
global _db_pool
|
74
|
+
if _db_pool is None:
|
75
|
+
pg_url = os.getenv('JETTASK_PG_URL', 'postgresql://jettask:123456@localhost:5432/jettask')
|
76
|
+
# 解析连接字符串
|
77
|
+
if pg_url.startswith('postgresql://'):
|
78
|
+
pg_url = pg_url.replace('postgresql://', '')
|
79
|
+
elif pg_url.startswith('postgresql+asyncpg://'):
|
80
|
+
pg_url = pg_url.replace('postgresql+asyncpg://', '')
|
81
|
+
|
82
|
+
# 分离用户名密码和主机信息
|
83
|
+
if '@' in pg_url:
|
84
|
+
auth, host_info = pg_url.split('@')
|
85
|
+
if ':' in auth:
|
86
|
+
user, password = auth.split(':')
|
87
|
+
else:
|
88
|
+
user = auth
|
89
|
+
password = None
|
90
|
+
|
91
|
+
if '/' in host_info:
|
92
|
+
host_port, database = host_info.split('/')
|
93
|
+
if ':' in host_port:
|
94
|
+
host, port = host_port.split(':')
|
95
|
+
port = int(port)
|
96
|
+
else:
|
97
|
+
host = host_port
|
98
|
+
port = 5432
|
99
|
+
else:
|
100
|
+
host = host_info
|
101
|
+
port = 5432
|
102
|
+
database = 'jettask'
|
103
|
+
else:
|
104
|
+
user = 'jettask'
|
105
|
+
password = '123456'
|
106
|
+
host = 'localhost'
|
107
|
+
port = 5432
|
108
|
+
database = 'jettask'
|
109
|
+
|
110
|
+
_db_pool = await asyncpg.create_pool(
|
111
|
+
host=host,
|
112
|
+
port=port,
|
113
|
+
user=user,
|
114
|
+
password=password,
|
115
|
+
database=database,
|
116
|
+
min_size=2,
|
117
|
+
max_size=10
|
118
|
+
)
|
119
|
+
return _db_pool
|
120
|
+
|
121
|
+
|
122
|
+
def parse_time_range(time_range: str) -> timedelta:
|
123
|
+
"""解析时间范围字符串"""
|
124
|
+
time_map = {
|
125
|
+
'15m': timedelta(minutes=15),
|
126
|
+
'30m': timedelta(minutes=30),
|
127
|
+
'1h': timedelta(hours=1),
|
128
|
+
'3h': timedelta(hours=3),
|
129
|
+
'6h': timedelta(hours=6),
|
130
|
+
'12h': timedelta(hours=12),
|
131
|
+
'24h': timedelta(hours=24),
|
132
|
+
'1d': timedelta(days=1),
|
133
|
+
'3d': timedelta(days=3),
|
134
|
+
'7d': timedelta(days=7),
|
135
|
+
}
|
136
|
+
return time_map.get(time_range, timedelta(hours=1))
|
137
|
+
|
138
|
+
|
139
|
+
def determine_granularity(time_delta: timedelta, granularity: Optional[str] = None) -> tuple[str, int]:
|
140
|
+
"""根据时间范围确定合适的数据粒度(参考fetch_queue_flow_rates的实现)
|
141
|
+
|
142
|
+
Returns:
|
143
|
+
(granularity_name, interval_seconds)
|
144
|
+
"""
|
145
|
+
if granularity and granularity != 'auto':
|
146
|
+
# 手动指定粒度时的秒数映射
|
147
|
+
manual_map = {
|
148
|
+
'second': ('second', 1),
|
149
|
+
'minute': ('minute', 60),
|
150
|
+
'5minute': ('5minute', 300),
|
151
|
+
'10minute': ('10minute', 600),
|
152
|
+
'30minute': ('30minute', 1800),
|
153
|
+
'hour': ('hour', 3600),
|
154
|
+
'day': ('day', 86400),
|
155
|
+
}
|
156
|
+
return manual_map.get(granularity, ('minute', 60))
|
157
|
+
|
158
|
+
# 根据时间范围选择合适的粒度
|
159
|
+
# 目标:保持合理的数据点数量,避免过多或过少
|
160
|
+
duration_seconds = time_delta.total_seconds()
|
161
|
+
duration_minutes = duration_seconds / 60
|
162
|
+
duration_hours = duration_minutes / 60
|
163
|
+
duration_days = duration_hours / 24
|
164
|
+
|
165
|
+
# 根据时间范围选择粒度
|
166
|
+
if duration_minutes <= 15:
|
167
|
+
# 15分钟以内:5秒粒度(最多180个点)
|
168
|
+
return ('second', 5)
|
169
|
+
elif duration_minutes <= 30:
|
170
|
+
# 30分钟以内:10秒粒度(最多180个点)
|
171
|
+
return ('second', 10)
|
172
|
+
elif duration_hours <= 1:
|
173
|
+
# 1小时以内:30秒粒度(最多120个点)
|
174
|
+
return ('second', 30)
|
175
|
+
elif duration_hours <= 3:
|
176
|
+
# 3小时以内:1分钟粒度(最多180个点)
|
177
|
+
return ('minute', 60)
|
178
|
+
elif duration_hours <= 6:
|
179
|
+
# 6小时以内:2分钟粒度(最多180个点)
|
180
|
+
return ('minute', 120)
|
181
|
+
elif duration_hours <= 12:
|
182
|
+
# 12小时以内:5分钟粒度(最多144个点)
|
183
|
+
return ('5minute', 300)
|
184
|
+
elif duration_hours <= 24:
|
185
|
+
# 24小时以内:10分钟粒度(最多144个点)
|
186
|
+
return ('10minute', 600)
|
187
|
+
elif duration_days <= 3:
|
188
|
+
# 3天以内:30分钟粒度(最多144个点)
|
189
|
+
return ('30minute', 1800)
|
190
|
+
elif duration_days <= 7:
|
191
|
+
# 7天以内:1小时粒度(最多168个点)
|
192
|
+
return ('hour', 3600)
|
193
|
+
elif duration_days <= 30:
|
194
|
+
# 30天以内:4小时粒度(最多180个点)
|
195
|
+
return ('hour', 14400)
|
196
|
+
else:
|
197
|
+
# 超过30天:1天粒度
|
198
|
+
return ('day', 86400)
|
199
|
+
|
200
|
+
|
201
|
+
def get_time_bucket_sql(granularity: str, interval_seconds: int) -> str:
|
202
|
+
"""获取时间分组的SQL表达式(改进版,支持更多粒度)"""
|
203
|
+
|
204
|
+
# 对于秒级粒度
|
205
|
+
if granularity == 'second':
|
206
|
+
if interval_seconds == 1:
|
207
|
+
return "date_trunc('second', created_at)"
|
208
|
+
else:
|
209
|
+
# 5秒、10秒、30秒等 - 修正算法
|
210
|
+
return f"date_trunc('minute', created_at) + interval '{interval_seconds} seconds' * floor(extract(second from created_at)::int / {interval_seconds})"
|
211
|
+
|
212
|
+
# 对于分钟级粒度
|
213
|
+
elif granularity == 'minute':
|
214
|
+
if interval_seconds == 60:
|
215
|
+
return "date_trunc('minute', created_at)"
|
216
|
+
elif interval_seconds == 120:
|
217
|
+
return "date_trunc('hour', created_at) + interval '2 minutes' * floor(extract(minute from created_at)::int / 2)"
|
218
|
+
else:
|
219
|
+
# 其他分钟间隔
|
220
|
+
minutes = interval_seconds // 60
|
221
|
+
return f"date_trunc('hour', created_at) + interval '{minutes} minutes' * floor(extract(minute from created_at)::int / {minutes})"
|
222
|
+
|
223
|
+
# 特定的分钟粒度
|
224
|
+
elif granularity == '5minute':
|
225
|
+
return "date_trunc('hour', created_at) + interval '5 minutes' * floor(extract(minute from created_at)::int / 5)"
|
226
|
+
elif granularity == '10minute':
|
227
|
+
return "date_trunc('hour', created_at) + interval '10 minutes' * floor(extract(minute from created_at)::int / 10)"
|
228
|
+
elif granularity == '15minute':
|
229
|
+
return "date_trunc('hour', created_at) + interval '15 minutes' * floor(extract(minute from created_at)::int / 15)"
|
230
|
+
elif granularity == '30minute':
|
231
|
+
return "date_trunc('hour', created_at) + interval '30 minutes' * floor(extract(minute from created_at)::int / 30)"
|
232
|
+
|
233
|
+
# 小时级粒度
|
234
|
+
elif granularity == 'hour':
|
235
|
+
if interval_seconds == 3600:
|
236
|
+
return "date_trunc('hour', created_at)"
|
237
|
+
else:
|
238
|
+
# 2小时、4小时、6小时、12小时等
|
239
|
+
hours = interval_seconds // 3600
|
240
|
+
return f"date_trunc('day', created_at) + interval '{hours} hours' * floor(extract(hour from created_at)::int / {hours})"
|
241
|
+
|
242
|
+
# 天级粒度
|
243
|
+
elif granularity == 'day':
|
244
|
+
return "date_trunc('day', created_at)"
|
245
|
+
|
246
|
+
# 默认值
|
247
|
+
else:
|
248
|
+
return "date_trunc('minute', created_at)"
|
249
|
+
|
250
|
+
|
251
|
+
@router.post("/trend", response_model=BacklogTrendResponse)
|
252
|
+
async def get_backlog_trend(request: BacklogTrendRequest):
|
253
|
+
"""
|
254
|
+
获取队列积压趋势数据
|
255
|
+
|
256
|
+
支持多种时间范围和数据粒度,自动聚合数据点
|
257
|
+
"""
|
258
|
+
pool = await get_db_pool()
|
259
|
+
|
260
|
+
# 确定时间范围
|
261
|
+
if request.start_time and request.end_time:
|
262
|
+
start_time = request.start_time
|
263
|
+
end_time = request.end_time
|
264
|
+
time_delta = end_time - start_time
|
265
|
+
else:
|
266
|
+
time_delta = parse_time_range(request.time_range)
|
267
|
+
end_time = datetime.now(timezone.utc)
|
268
|
+
start_time = end_time - time_delta
|
269
|
+
|
270
|
+
# 确定数据粒度和间隔
|
271
|
+
granularity_name, interval_seconds = determine_granularity(time_delta, request.granularity)
|
272
|
+
time_bucket = get_time_bucket_sql(granularity_name, interval_seconds)
|
273
|
+
|
274
|
+
# 记录日志
|
275
|
+
actual_points = int(time_delta.total_seconds() / interval_seconds) + 1
|
276
|
+
print(f"使用时间间隔: {interval_seconds}秒 ({granularity_name}), 预计生成 {actual_points} 个时间点")
|
277
|
+
|
278
|
+
async with pool.acquire() as conn:
|
279
|
+
# 构建查询条件
|
280
|
+
conditions = [
|
281
|
+
"namespace = $1",
|
282
|
+
"created_at >= $2",
|
283
|
+
"created_at <= $3"
|
284
|
+
]
|
285
|
+
params = [request.namespace, start_time, end_time]
|
286
|
+
|
287
|
+
# 添加队列筛选
|
288
|
+
if request.queues:
|
289
|
+
placeholders = [f"${i+4}" for i in range(len(request.queues))]
|
290
|
+
conditions.append(f"stream_name IN ({','.join(placeholders)})")
|
291
|
+
params.extend(request.queues)
|
292
|
+
|
293
|
+
# 添加消费组筛选
|
294
|
+
# if not request.include_groups:
|
295
|
+
# # 不包含消费组数据时,只查询整体Stream的统计(consumer_group为NULL)
|
296
|
+
# conditions.append("consumer_group IS NULL")
|
297
|
+
|
298
|
+
where_clause = " AND ".join(conditions)
|
299
|
+
|
300
|
+
# 根据是否包含groups构建不同的查询
|
301
|
+
if request.include_groups:
|
302
|
+
# 按任务名称聚合数据,但确保每个时间点只有一条记录
|
303
|
+
# 从consumer_group中提取任务名称(最后一个点号后的部分)
|
304
|
+
query = f"""
|
305
|
+
WITH raw_data AS (
|
306
|
+
SELECT
|
307
|
+
{time_bucket} as time_bucket,
|
308
|
+
stream_name as queue_name,
|
309
|
+
consumer_group,
|
310
|
+
CASE
|
311
|
+
-- 处理 default:QUEUE:queue_name:task.subtask 格式
|
312
|
+
WHEN consumer_group LIKE 'default:QUEUE:%' THEN
|
313
|
+
SPLIT_PART(consumer_group, '.', -1) -- 取最后一个点号后的部分
|
314
|
+
-- 保留原始的消费组名称(如 consumer_group_1)
|
315
|
+
ELSE consumer_group
|
316
|
+
END as task_name,
|
317
|
+
backlog_unprocessed,
|
318
|
+
last_published_offset,
|
319
|
+
last_delivered_offset,
|
320
|
+
pending_count,
|
321
|
+
produce_rate,
|
322
|
+
deliver_rate
|
323
|
+
FROM stream_backlog_monitor
|
324
|
+
WHERE {where_clause}
|
325
|
+
AND consumer_group != 'default_pg_consumer' -- 屏蔽 default_pg_consumer
|
326
|
+
AND consumer_group IS NOT NULL -- 确保有消费组
|
327
|
+
),
|
328
|
+
task_aggregated AS (
|
329
|
+
SELECT
|
330
|
+
time_bucket,
|
331
|
+
queue_name,
|
332
|
+
task_name,
|
333
|
+
SUM(backlog_unprocessed) as total_backlog,
|
334
|
+
MAX(backlog_unprocessed) as max_backlog,
|
335
|
+
MIN(backlog_unprocessed) as min_backlog,
|
336
|
+
MAX(last_published_offset) as max_published,
|
337
|
+
MAX(last_delivered_offset) as max_delivered,
|
338
|
+
SUM(pending_count) as total_pending,
|
339
|
+
AVG(produce_rate) as avg_produce_rate,
|
340
|
+
AVG(deliver_rate) as avg_deliver_rate,
|
341
|
+
COUNT(DISTINCT consumer_group) as sample_count
|
342
|
+
FROM raw_data
|
343
|
+
GROUP BY time_bucket, queue_name, task_name
|
344
|
+
)
|
345
|
+
SELECT
|
346
|
+
time_bucket,
|
347
|
+
queue_name,
|
348
|
+
task_name as consumer_group, -- 使用task_name作为consumer_group返回
|
349
|
+
total_backlog::int as avg_backlog,
|
350
|
+
max_backlog,
|
351
|
+
min_backlog,
|
352
|
+
max_published::int as avg_published,
|
353
|
+
max_delivered::int as avg_delivered,
|
354
|
+
total_pending::int as avg_pending,
|
355
|
+
avg_produce_rate,
|
356
|
+
avg_deliver_rate,
|
357
|
+
sample_count
|
358
|
+
FROM task_aggregated
|
359
|
+
ORDER BY time_bucket, queue_name, task_name
|
360
|
+
"""
|
361
|
+
else:
|
362
|
+
# 不包含消费组,聚合所有消费组和优先级的数据
|
363
|
+
query = f"""
|
364
|
+
SELECT
|
365
|
+
{time_bucket} as time_bucket,
|
366
|
+
stream_name as queue_name,
|
367
|
+
NULL as consumer_group,
|
368
|
+
COALESCE(SUM(backlog_unprocessed), 0)::int as avg_backlog,
|
369
|
+
COALESCE(MAX(backlog_unprocessed), 0)::int as max_backlog,
|
370
|
+
COALESCE(MIN(backlog_unprocessed), 0)::int as min_backlog,
|
371
|
+
COALESCE(MAX(last_published_offset), 0)::int as avg_published,
|
372
|
+
COALESCE(MAX(last_delivered_offset), 0)::int as avg_delivered,
|
373
|
+
COALESCE(SUM(pending_count), 0)::int as avg_pending,
|
374
|
+
AVG(produce_rate) as avg_produce_rate,
|
375
|
+
AVG(deliver_rate) as avg_deliver_rate,
|
376
|
+
COUNT(DISTINCT COALESCE(priority::text, '0') || ':' || COALESCE(consumer_group, 'null')) as sample_count
|
377
|
+
FROM stream_backlog_monitor
|
378
|
+
WHERE {where_clause}
|
379
|
+
GROUP BY time_bucket, stream_name
|
380
|
+
ORDER BY time_bucket, stream_name
|
381
|
+
"""
|
382
|
+
|
383
|
+
rows = await conn.fetch(query, *params)
|
384
|
+
|
385
|
+
# 查询统计信息
|
386
|
+
if request.include_groups:
|
387
|
+
# 包含消费组的统计
|
388
|
+
stats_query = f"""
|
389
|
+
WITH latest_data AS (
|
390
|
+
SELECT DISTINCT ON (stream_name, consumer_group)
|
391
|
+
stream_name,
|
392
|
+
consumer_group,
|
393
|
+
backlog_unprocessed as current_backlog,
|
394
|
+
created_at
|
395
|
+
FROM stream_backlog_monitor
|
396
|
+
WHERE {where_clause}
|
397
|
+
ORDER BY stream_name, consumer_group, created_at DESC
|
398
|
+
),
|
399
|
+
stats AS (
|
400
|
+
SELECT
|
401
|
+
stream_name,
|
402
|
+
AVG(backlog_unprocessed) as avg_backlog,
|
403
|
+
MAX(backlog_unprocessed) as max_backlog,
|
404
|
+
MIN(backlog_unprocessed) as min_backlog
|
405
|
+
FROM stream_backlog_monitor
|
406
|
+
WHERE {where_clause}
|
407
|
+
GROUP BY stream_name
|
408
|
+
),
|
409
|
+
trend AS (
|
410
|
+
SELECT
|
411
|
+
stream_name,
|
412
|
+
CASE
|
413
|
+
WHEN COUNT(*) >= 2 THEN
|
414
|
+
(AVG(CASE WHEN row_num <= 5 THEN backlog_unprocessed END) -
|
415
|
+
AVG(CASE WHEN row_num > count_all - 5 THEN backlog_unprocessed END))
|
416
|
+
ELSE 0
|
417
|
+
END as trend_diff
|
418
|
+
FROM (
|
419
|
+
SELECT
|
420
|
+
stream_name,
|
421
|
+
backlog_unprocessed,
|
422
|
+
ROW_NUMBER() OVER (PARTITION BY stream_name ORDER BY created_at DESC) as row_num,
|
423
|
+
COUNT(*) OVER (PARTITION BY stream_name) as count_all
|
424
|
+
FROM stream_backlog_monitor
|
425
|
+
WHERE {where_clause} AND consumer_group IS NULL
|
426
|
+
) t
|
427
|
+
GROUP BY stream_name
|
428
|
+
)
|
429
|
+
SELECT
|
430
|
+
s.stream_name,
|
431
|
+
l.current_backlog,
|
432
|
+
s.max_backlog,
|
433
|
+
s.avg_backlog,
|
434
|
+
s.min_backlog,
|
435
|
+
COALESCE(t.trend_diff, 0) as trend_diff
|
436
|
+
FROM stats s
|
437
|
+
JOIN latest_data l ON s.stream_name = l.stream_name
|
438
|
+
LEFT JOIN trend t ON s.stream_name = t.stream_name
|
439
|
+
WHERE l.consumer_group IS NULL
|
440
|
+
"""
|
441
|
+
else:
|
442
|
+
# 不包含消费组,聚合所有消费组和优先级的统计
|
443
|
+
stats_query = f"""
|
444
|
+
WITH latest_per_priority AS (
|
445
|
+
SELECT DISTINCT ON (stream_name, priority, consumer_group)
|
446
|
+
stream_name,
|
447
|
+
priority,
|
448
|
+
consumer_group,
|
449
|
+
backlog_unprocessed,
|
450
|
+
created_at
|
451
|
+
FROM stream_backlog_monitor
|
452
|
+
WHERE {where_clause}
|
453
|
+
ORDER BY stream_name, priority, consumer_group, created_at DESC
|
454
|
+
),
|
455
|
+
latest_data AS (
|
456
|
+
SELECT
|
457
|
+
stream_name,
|
458
|
+
SUM(backlog_unprocessed) as current_backlog,
|
459
|
+
MAX(created_at) as latest_time
|
460
|
+
FROM latest_per_priority
|
461
|
+
GROUP BY stream_name
|
462
|
+
),
|
463
|
+
stats AS (
|
464
|
+
SELECT
|
465
|
+
stream_name,
|
466
|
+
AVG(backlog_unprocessed) as avg_backlog,
|
467
|
+
MAX(backlog_unprocessed) as max_backlog,
|
468
|
+
MIN(backlog_unprocessed) as min_backlog
|
469
|
+
FROM stream_backlog_monitor
|
470
|
+
WHERE {where_clause}
|
471
|
+
GROUP BY stream_name
|
472
|
+
),
|
473
|
+
trend AS (
|
474
|
+
SELECT
|
475
|
+
stream_name,
|
476
|
+
CASE
|
477
|
+
WHEN COUNT(DISTINCT time_bucket) >= 2 THEN
|
478
|
+
(AVG(CASE WHEN row_num <= 5 THEN total_backlog END) -
|
479
|
+
AVG(CASE WHEN row_num > count_all - 5 THEN total_backlog END))
|
480
|
+
ELSE 0
|
481
|
+
END as trend_diff
|
482
|
+
FROM (
|
483
|
+
SELECT
|
484
|
+
stream_name,
|
485
|
+
{time_bucket} as time_bucket,
|
486
|
+
SUM(backlog_unprocessed) as total_backlog,
|
487
|
+
ROW_NUMBER() OVER (PARTITION BY stream_name ORDER BY {time_bucket} DESC) as row_num,
|
488
|
+
COUNT(*) OVER (PARTITION BY stream_name) as count_all
|
489
|
+
FROM stream_backlog_monitor
|
490
|
+
WHERE {where_clause}
|
491
|
+
GROUP BY stream_name, {time_bucket}
|
492
|
+
) t
|
493
|
+
GROUP BY stream_name
|
494
|
+
)
|
495
|
+
SELECT
|
496
|
+
s.stream_name,
|
497
|
+
l.current_backlog,
|
498
|
+
s.max_backlog,
|
499
|
+
s.avg_backlog,
|
500
|
+
s.min_backlog,
|
501
|
+
COALESCE(t.trend_diff, 0) as trend_diff
|
502
|
+
FROM stats s
|
503
|
+
JOIN latest_data l ON s.stream_name = l.stream_name
|
504
|
+
LEFT JOIN trend t ON s.stream_name = t.stream_name
|
505
|
+
"""
|
506
|
+
|
507
|
+
stats_rows = await conn.fetch(stats_query, *params)
|
508
|
+
|
509
|
+
# 转换数据格式
|
510
|
+
data = []
|
511
|
+
for row in rows:
|
512
|
+
data_item = {
|
513
|
+
'time': row['time_bucket'].isoformat(),
|
514
|
+
'queue': row['queue_name'],
|
515
|
+
'group': row['consumer_group'],
|
516
|
+
'backlog': row['avg_backlog'],
|
517
|
+
'max_backlog': row['max_backlog'],
|
518
|
+
'min_backlog': row['min_backlog'],
|
519
|
+
'published': row['avg_published'],
|
520
|
+
'delivered': row['avg_delivered'],
|
521
|
+
'pending': row['avg_pending'],
|
522
|
+
'produce_rate': row['avg_produce_rate'],
|
523
|
+
'deliver_rate': row['avg_deliver_rate'],
|
524
|
+
}
|
525
|
+
# 注意:现在聚合了优先级,不再返回priority字段
|
526
|
+
data.append(data_item)
|
527
|
+
|
528
|
+
# 生成统计信息
|
529
|
+
statistics = []
|
530
|
+
for stat_row in stats_rows:
|
531
|
+
# 计算趋势
|
532
|
+
trend_diff = stat_row['trend_diff'] or 0
|
533
|
+
avg_backlog = stat_row['avg_backlog'] or 1
|
534
|
+
trend_percentage = (trend_diff / avg_backlog * 100) if avg_backlog > 0 else 0
|
535
|
+
|
536
|
+
if trend_percentage > 20:
|
537
|
+
trend = 'up'
|
538
|
+
elif trend_percentage < -20:
|
539
|
+
trend = 'down'
|
540
|
+
else:
|
541
|
+
trend = 'stable'
|
542
|
+
|
543
|
+
# 确定告警级别
|
544
|
+
current = stat_row['current_backlog'] or 0
|
545
|
+
if current >= 5000:
|
546
|
+
alert_level = 'critical'
|
547
|
+
elif current >= 1000:
|
548
|
+
alert_level = 'warning'
|
549
|
+
else:
|
550
|
+
alert_level = 'normal'
|
551
|
+
|
552
|
+
statistics.append(BacklogStatistics(
|
553
|
+
queue_name=stat_row['stream_name'],
|
554
|
+
current_backlog=current,
|
555
|
+
max_backlog=stat_row['max_backlog'] or 0,
|
556
|
+
avg_backlog=float(stat_row['avg_backlog'] or 0),
|
557
|
+
min_backlog=stat_row['min_backlog'] or 0,
|
558
|
+
trend=trend,
|
559
|
+
trend_percentage=trend_percentage,
|
560
|
+
alert_level=alert_level
|
561
|
+
))
|
562
|
+
|
563
|
+
return BacklogTrendResponse(
|
564
|
+
success=True,
|
565
|
+
data=data,
|
566
|
+
statistics=statistics,
|
567
|
+
granularity=granularity_name, # 使用granularity_name而不是granularity
|
568
|
+
time_range={
|
569
|
+
'start': start_time,
|
570
|
+
'end': end_time
|
571
|
+
}
|
572
|
+
)
|
573
|
+
|
574
|
+
|
575
|
+
@router.get("/current", response_model=Dict[str, Any])
|
576
|
+
async def get_current_backlog(
|
577
|
+
namespace: str = Query("default", description="命名空间"),
|
578
|
+
queue: Optional[str] = Query(None, description="队列名称"),
|
579
|
+
include_groups: bool = Query(False, description="是否包含消费组数据")
|
580
|
+
):
|
581
|
+
"""
|
582
|
+
获取当前队列积压状态(最新数据)
|
583
|
+
"""
|
584
|
+
pool = await get_db_pool()
|
585
|
+
|
586
|
+
async with pool.acquire() as conn:
|
587
|
+
# 构建查询条件
|
588
|
+
conditions = ["namespace = $1"]
|
589
|
+
params = [namespace]
|
590
|
+
|
591
|
+
if queue:
|
592
|
+
conditions.append("stream_name = $2")
|
593
|
+
params.append(queue)
|
594
|
+
|
595
|
+
if not include_groups:
|
596
|
+
conditions.append("consumer_group IS NULL")
|
597
|
+
|
598
|
+
where_clause = " AND ".join(conditions)
|
599
|
+
|
600
|
+
# 使用视图获取最新数据
|
601
|
+
query = f"""
|
602
|
+
SELECT DISTINCT ON (stream_name, consumer_group)
|
603
|
+
namespace,
|
604
|
+
stream_name,
|
605
|
+
consumer_group,
|
606
|
+
last_published_offset,
|
607
|
+
last_delivered_offset,
|
608
|
+
last_acked_offset,
|
609
|
+
pending_count,
|
610
|
+
backlog_undelivered,
|
611
|
+
backlog_unprocessed,
|
612
|
+
produce_rate,
|
613
|
+
deliver_rate,
|
614
|
+
ack_rate,
|
615
|
+
created_at
|
616
|
+
FROM stream_backlog_monitor
|
617
|
+
WHERE {where_clause}
|
618
|
+
ORDER BY stream_name, consumer_group, created_at DESC
|
619
|
+
"""
|
620
|
+
|
621
|
+
rows = await conn.fetch(query, *params)
|
622
|
+
|
623
|
+
# 按队列组织数据
|
624
|
+
result = {}
|
625
|
+
for row in rows:
|
626
|
+
queue_name = row['stream_name']
|
627
|
+
if queue_name not in result:
|
628
|
+
result[queue_name] = {
|
629
|
+
'queue_name': queue_name,
|
630
|
+
'namespace': row['namespace'],
|
631
|
+
'last_update': row['created_at'].isoformat(),
|
632
|
+
'summary': None,
|
633
|
+
'consumer_groups': []
|
634
|
+
}
|
635
|
+
|
636
|
+
data = {
|
637
|
+
'consumer_group': row['consumer_group'],
|
638
|
+
'last_published_offset': row['last_published_offset'],
|
639
|
+
'last_delivered_offset': row['last_delivered_offset'],
|
640
|
+
'last_acked_offset': row['last_acked_offset'],
|
641
|
+
'pending_count': row['pending_count'],
|
642
|
+
'backlog_undelivered': row['backlog_undelivered'],
|
643
|
+
'backlog_unprocessed': row['backlog_unprocessed'],
|
644
|
+
'produce_rate': row['produce_rate'],
|
645
|
+
'deliver_rate': row['deliver_rate'],
|
646
|
+
'ack_rate': row['ack_rate'],
|
647
|
+
}
|
648
|
+
|
649
|
+
if row['consumer_group'] is None:
|
650
|
+
result[queue_name]['summary'] = data
|
651
|
+
else:
|
652
|
+
result[queue_name]['consumer_groups'].append(data)
|
653
|
+
|
654
|
+
return {
|
655
|
+
'success': True,
|
656
|
+
'data': list(result.values())
|
657
|
+
}
|
658
|
+
|
659
|
+
|
660
|
+
@router.get("/alert-queues", response_model=Dict[str, Any])
|
661
|
+
async def get_alert_queues(
|
662
|
+
namespace: str = Query("default", description="命名空间"),
|
663
|
+
warning_threshold: int = Query(1000, description="警告阈值"),
|
664
|
+
critical_threshold: int = Query(5000, description="严重阈值")
|
665
|
+
):
|
666
|
+
"""
|
667
|
+
获取需要告警的队列列表
|
668
|
+
"""
|
669
|
+
pool = await get_db_pool()
|
670
|
+
|
671
|
+
async with pool.acquire() as conn:
|
672
|
+
query = """
|
673
|
+
SELECT DISTINCT ON (stream_name)
|
674
|
+
stream_name,
|
675
|
+
backlog_unprocessed,
|
676
|
+
produce_rate,
|
677
|
+
deliver_rate,
|
678
|
+
created_at
|
679
|
+
FROM stream_backlog_monitor
|
680
|
+
WHERE
|
681
|
+
namespace = $1
|
682
|
+
AND consumer_group IS NULL
|
683
|
+
AND backlog_unprocessed >= $2
|
684
|
+
ORDER BY stream_name, created_at DESC
|
685
|
+
"""
|
686
|
+
|
687
|
+
rows = await conn.fetch(query, namespace, warning_threshold)
|
688
|
+
|
689
|
+
# 分类告警
|
690
|
+
alerts = {
|
691
|
+
'critical': [],
|
692
|
+
'warning': [],
|
693
|
+
}
|
694
|
+
|
695
|
+
for row in rows:
|
696
|
+
alert_data = {
|
697
|
+
'queue_name': row['stream_name'],
|
698
|
+
'backlog': row['backlog_unprocessed'],
|
699
|
+
'produce_rate': row['produce_rate'],
|
700
|
+
'deliver_rate': row['deliver_rate'],
|
701
|
+
'last_update': row['created_at'].isoformat()
|
702
|
+
}
|
703
|
+
|
704
|
+
if row['backlog_unprocessed'] >= critical_threshold:
|
705
|
+
alerts['critical'].append(alert_data)
|
706
|
+
else:
|
707
|
+
alerts['warning'].append(alert_data)
|
708
|
+
|
709
|
+
return {
|
710
|
+
'success': True,
|
711
|
+
'namespace': namespace,
|
712
|
+
'thresholds': {
|
713
|
+
'warning': warning_threshold,
|
714
|
+
'critical': critical_threshold
|
715
|
+
},
|
716
|
+
'alerts': alerts,
|
717
|
+
'total_alerts': len(alerts['critical']) + len(alerts['warning'])
|
718
|
+
}
|
719
|
+
|
720
|
+
|
721
|
+
# 清理函数
|
722
|
+
async def cleanup():
|
723
|
+
"""清理资源"""
|
724
|
+
global _db_pool
|
725
|
+
if _db_pool:
|
726
|
+
await _db_pool.close()
|
727
|
+
_db_pool = None
|