jettask 0.2.19__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +12 -3
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/METADATA +2 -71
- jettask-0.2.23.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.19.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/WHEEL +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/top_level.txt +0 -0
@@ -1,431 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Monitoring and analytics API v1
|
3
|
-
"""
|
4
|
-
from typing import List, Optional
|
5
|
-
from fastapi import APIRouter, Depends, HTTPException, Query
|
6
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
7
|
-
import redis.asyncio as redis
|
8
|
-
|
9
|
-
from dependencies import (
|
10
|
-
get_validated_namespace, get_pg_connection, get_redis_client,
|
11
|
-
get_namespace_connection, validate_time_range, get_request_metrics, RequestMetrics
|
12
|
-
)
|
13
|
-
from models.requests import MonitoringRequest, BacklogTrendRequest, AnalyticsRequest
|
14
|
-
from models.responses import MonitoringResponse, AnalyticsResponse, BaseResponse
|
15
|
-
from core.cache import cache_result, CACHE_CONFIGS
|
16
|
-
from queue_backlog_api import get_backlog_trend
|
17
|
-
import logging
|
18
|
-
|
19
|
-
logger = logging.getLogger(__name__)
|
20
|
-
router = APIRouter()
|
21
|
-
|
22
|
-
|
23
|
-
@router.post("/backlog-trends", response_model=MonitoringResponse)
|
24
|
-
@cache_result(**CACHE_CONFIGS['monitoring_data'])
|
25
|
-
async def get_queue_backlog_trends(
|
26
|
-
request: BacklogTrendRequest,
|
27
|
-
namespace: str = Depends(get_validated_namespace),
|
28
|
-
metrics: RequestMetrics = Depends(get_request_metrics)
|
29
|
-
):
|
30
|
-
"""获取队列积压趋势(统一接口)"""
|
31
|
-
metrics.start(namespace, "POST /monitoring/backlog-trends")
|
32
|
-
|
33
|
-
try:
|
34
|
-
# 直接调用现有的积压趋势API
|
35
|
-
backlog_response = await get_backlog_trend(request)
|
36
|
-
|
37
|
-
# 转换为标准监控响应格式
|
38
|
-
monitoring_data = {
|
39
|
-
'series': [],
|
40
|
-
'granularity': backlog_response.granularity,
|
41
|
-
'time_range': backlog_response.time_range
|
42
|
-
}
|
43
|
-
|
44
|
-
# 按系列分组数据
|
45
|
-
series_data = {}
|
46
|
-
for item in backlog_response.data:
|
47
|
-
series_name = item.get('group') or item['queue']
|
48
|
-
if series_name not in series_data:
|
49
|
-
series_data[series_name] = []
|
50
|
-
|
51
|
-
series_data[series_name].append({
|
52
|
-
'timestamp': item['time'],
|
53
|
-
'value': item['backlog'],
|
54
|
-
'metadata': {
|
55
|
-
'queue': item['queue'],
|
56
|
-
'consumer_group': item.get('group'),
|
57
|
-
'published': item.get('published'),
|
58
|
-
'delivered': item.get('delivered'),
|
59
|
-
'pending': item.get('pending')
|
60
|
-
}
|
61
|
-
})
|
62
|
-
|
63
|
-
# 转换为时间序列格式
|
64
|
-
for series_name, data_points in series_data.items():
|
65
|
-
monitoring_data['series'].append({
|
66
|
-
'name': series_name,
|
67
|
-
'data_points': data_points,
|
68
|
-
'unit': 'tasks'
|
69
|
-
})
|
70
|
-
|
71
|
-
return MonitoringResponse(data=monitoring_data)
|
72
|
-
|
73
|
-
except Exception as e:
|
74
|
-
logger.error(f"获取积压趋势失败: {e}")
|
75
|
-
raise HTTPException(status_code=500, detail=str(e))
|
76
|
-
finally:
|
77
|
-
metrics.finish()
|
78
|
-
|
79
|
-
|
80
|
-
@router.get("/queue-flow-rates/{queue_name}", response_model=MonitoringResponse)
|
81
|
-
@cache_result(**CACHE_CONFIGS['monitoring_data'])
|
82
|
-
async def get_queue_flow_rates(
|
83
|
-
queue_name: str,
|
84
|
-
namespace: str = Depends(get_validated_namespace),
|
85
|
-
time_params: dict = Depends(validate_time_range),
|
86
|
-
granularity: Optional[str] = Query(None, description="数据粒度"),
|
87
|
-
redis_client: redis.Redis = Depends(get_redis_client),
|
88
|
-
pg_session: AsyncSession = Depends(get_pg_connection),
|
89
|
-
metrics: RequestMetrics = Depends(get_request_metrics)
|
90
|
-
):
|
91
|
-
"""获取队列流量速率"""
|
92
|
-
metrics.start(namespace, f"GET /monitoring/queue-flow-rates/{queue_name}")
|
93
|
-
print(f'{get_queue_flow_rates=}')
|
94
|
-
try:
|
95
|
-
# 使用现有的data_access来获取流量速率
|
96
|
-
from ...data_access import JetTaskDataAccess
|
97
|
-
|
98
|
-
data_access = JetTaskDataAccess()
|
99
|
-
|
100
|
-
flow_data, returned_granularity = await data_access.fetch_queue_flow_rates(
|
101
|
-
queue_name=queue_name,
|
102
|
-
start_time=time_params.get('start_time'),
|
103
|
-
end_time=time_params.get('end_time'),
|
104
|
-
filters=[]
|
105
|
-
)
|
106
|
-
|
107
|
-
# 转换为监控响应格式
|
108
|
-
series_data = {
|
109
|
-
'enqueued': [],
|
110
|
-
'started': [],
|
111
|
-
'completed': []
|
112
|
-
}
|
113
|
-
|
114
|
-
for item in flow_data:
|
115
|
-
timestamp = item['time']
|
116
|
-
series_data['enqueued'].append({
|
117
|
-
'timestamp': timestamp,
|
118
|
-
'value': item.get('enqueued', 0),
|
119
|
-
'metadata': {'queue': queue_name, 'type': 'enqueued'}
|
120
|
-
})
|
121
|
-
series_data['started'].append({
|
122
|
-
'timestamp': timestamp,
|
123
|
-
'value': item.get('started', 0),
|
124
|
-
'metadata': {'queue': queue_name, 'type': 'started'}
|
125
|
-
})
|
126
|
-
series_data['completed'].append({
|
127
|
-
'timestamp': timestamp,
|
128
|
-
'value': item.get('completed', 0),
|
129
|
-
'metadata': {'queue': queue_name, 'type': 'completed'}
|
130
|
-
})
|
131
|
-
|
132
|
-
monitoring_data = {
|
133
|
-
'series': [
|
134
|
-
{
|
135
|
-
'name': 'Enqueued',
|
136
|
-
'data_points': series_data['enqueued'],
|
137
|
-
'unit': 'tasks/min'
|
138
|
-
},
|
139
|
-
{
|
140
|
-
'name': 'Started',
|
141
|
-
'data_points': series_data['started'],
|
142
|
-
'unit': 'tasks/min'
|
143
|
-
},
|
144
|
-
{
|
145
|
-
'name': 'Completed',
|
146
|
-
'data_points': series_data['completed'],
|
147
|
-
'unit': 'tasks/min'
|
148
|
-
}
|
149
|
-
],
|
150
|
-
'granularity': returned_granularity,
|
151
|
-
'time_range': {
|
152
|
-
'start': time_params.get('start_time'),
|
153
|
-
'end': time_params.get('end_time')
|
154
|
-
}
|
155
|
-
}
|
156
|
-
|
157
|
-
return MonitoringResponse(data=monitoring_data)
|
158
|
-
|
159
|
-
except Exception as e:
|
160
|
-
logger.error(f"获取队列流量速率失败: {e}")
|
161
|
-
raise HTTPException(status_code=500, detail=str(e))
|
162
|
-
finally:
|
163
|
-
metrics.finish()
|
164
|
-
|
165
|
-
|
166
|
-
@router.get("/system-health")
|
167
|
-
async def get_system_health(
|
168
|
-
namespace: str = Depends(get_validated_namespace),
|
169
|
-
redis_client: redis.Redis = Depends(get_redis_client),
|
170
|
-
pg_session: AsyncSession = Depends(get_pg_connection),
|
171
|
-
metrics: RequestMetrics = Depends(get_request_metrics)
|
172
|
-
):
|
173
|
-
"""获取系统健康状态"""
|
174
|
-
metrics.start(namespace, "GET /monitoring/system-health")
|
175
|
-
|
176
|
-
try:
|
177
|
-
health_data = {
|
178
|
-
'status': 'healthy',
|
179
|
-
'version': '1.0.0',
|
180
|
-
'uptime': 3600.0, # 示例运行时间
|
181
|
-
'components': {},
|
182
|
-
'metrics': {}
|
183
|
-
}
|
184
|
-
|
185
|
-
# 检查Redis连接
|
186
|
-
try:
|
187
|
-
await redis_client.ping()
|
188
|
-
health_data['components']['redis'] = 'healthy'
|
189
|
-
except Exception as e:
|
190
|
-
health_data['components']['redis'] = f'unhealthy: {str(e)}'
|
191
|
-
health_data['status'] = 'degraded'
|
192
|
-
|
193
|
-
# 检查PostgreSQL连接
|
194
|
-
try:
|
195
|
-
result = await pg_session.execute("SELECT 1")
|
196
|
-
health_data['components']['postgresql'] = 'healthy'
|
197
|
-
except Exception as e:
|
198
|
-
health_data['components']['postgresql'] = f'unhealthy: {str(e)}'
|
199
|
-
health_data['status'] = 'degraded'
|
200
|
-
|
201
|
-
# 获取系统指标
|
202
|
-
try:
|
203
|
-
import psutil
|
204
|
-
import time
|
205
|
-
|
206
|
-
health_data['metrics'] = {
|
207
|
-
'cpu_usage': psutil.cpu_percent(interval=1),
|
208
|
-
'memory_usage': psutil.virtual_memory().percent,
|
209
|
-
'disk_usage': psutil.disk_usage('/').percent,
|
210
|
-
'process_count': len(psutil.pids()),
|
211
|
-
'uptime': time.time() - psutil.boot_time()
|
212
|
-
}
|
213
|
-
except ImportError:
|
214
|
-
health_data['metrics'] = {
|
215
|
-
'note': 'psutil not available for system metrics'
|
216
|
-
}
|
217
|
-
|
218
|
-
return BaseResponse(data=health_data)
|
219
|
-
|
220
|
-
except Exception as e:
|
221
|
-
logger.error(f"获取系统健康状态失败: {e}")
|
222
|
-
raise HTTPException(status_code=500, detail=str(e))
|
223
|
-
finally:
|
224
|
-
metrics.finish()
|
225
|
-
|
226
|
-
|
227
|
-
@router.post("/analytics", response_model=AnalyticsResponse)
|
228
|
-
@cache_result(ttl=600, key_prefix="analytics") # 10分钟缓存
|
229
|
-
async def get_analytics_data(
|
230
|
-
request: AnalyticsRequest,
|
231
|
-
namespace: str = Depends(get_validated_namespace),
|
232
|
-
pg_session: AsyncSession = Depends(get_pg_connection),
|
233
|
-
metrics: RequestMetrics = Depends(get_request_metrics)
|
234
|
-
):
|
235
|
-
"""获取分析数据"""
|
236
|
-
metrics.start(namespace, "POST /monitoring/analytics")
|
237
|
-
|
238
|
-
try:
|
239
|
-
analysis_type = request.analysis_type.lower()
|
240
|
-
|
241
|
-
if analysis_type == "queue_performance":
|
242
|
-
# 队列性能分析
|
243
|
-
analytics_data = await _analyze_queue_performance(
|
244
|
-
pg_session, request.dimensions, request.metrics,
|
245
|
-
request.start_time, request.end_time
|
246
|
-
)
|
247
|
-
|
248
|
-
elif analysis_type == "task_distribution":
|
249
|
-
# 任务分布分析
|
250
|
-
analytics_data = await _analyze_task_distribution(
|
251
|
-
pg_session, request.dimensions, request.metrics,
|
252
|
-
request.start_time, request.end_time
|
253
|
-
)
|
254
|
-
|
255
|
-
elif analysis_type == "error_patterns":
|
256
|
-
# 错误模式分析
|
257
|
-
analytics_data = await _analyze_error_patterns(
|
258
|
-
pg_session, request.dimensions, request.metrics,
|
259
|
-
request.start_time, request.end_time
|
260
|
-
)
|
261
|
-
|
262
|
-
elif analysis_type == "resource_utilization":
|
263
|
-
# 资源利用率分析
|
264
|
-
analytics_data = await _analyze_resource_utilization(
|
265
|
-
pg_session, request.dimensions, request.metrics,
|
266
|
-
request.start_time, request.end_time
|
267
|
-
)
|
268
|
-
|
269
|
-
else:
|
270
|
-
raise HTTPException(
|
271
|
-
status_code=400,
|
272
|
-
detail=f"Unsupported analysis type: {analysis_type}"
|
273
|
-
)
|
274
|
-
|
275
|
-
return AnalyticsResponse(data=analytics_data)
|
276
|
-
|
277
|
-
except HTTPException:
|
278
|
-
raise
|
279
|
-
except Exception as e:
|
280
|
-
logger.error(f"获取分析数据失败: {e}")
|
281
|
-
raise HTTPException(status_code=500, detail=str(e))
|
282
|
-
finally:
|
283
|
-
metrics.finish()
|
284
|
-
|
285
|
-
|
286
|
-
@router.get("/alerts/active")
|
287
|
-
async def get_active_alerts(
|
288
|
-
namespace: str = Depends(get_validated_namespace),
|
289
|
-
severity: Optional[str] = Query(None, description="告警级别筛选"),
|
290
|
-
metrics: RequestMetrics = Depends(get_request_metrics)
|
291
|
-
):
|
292
|
-
"""获取活跃告警"""
|
293
|
-
metrics.start(namespace, "GET /monitoring/alerts/active")
|
294
|
-
|
295
|
-
try:
|
296
|
-
# 这里需要实现告警查询逻辑
|
297
|
-
# 1. 从告警存储中获取活跃告警
|
298
|
-
# 2. 根据严重程度筛选
|
299
|
-
# 3. 返回告警详情
|
300
|
-
|
301
|
-
# 暂时返回模拟数据
|
302
|
-
alerts = [
|
303
|
-
{
|
304
|
-
"id": "alert_001",
|
305
|
-
"rule_name": "High Queue Backlog",
|
306
|
-
"severity": "warning",
|
307
|
-
"status": "firing",
|
308
|
-
"queue_name": "shared_queue",
|
309
|
-
"trigger_value": 1200,
|
310
|
-
"threshold": 1000,
|
311
|
-
"started_at": "2025-09-08T12:00:00Z",
|
312
|
-
"description": "Queue backlog exceeds threshold"
|
313
|
-
},
|
314
|
-
{
|
315
|
-
"id": "alert_002",
|
316
|
-
"rule_name": "High Error Rate",
|
317
|
-
"severity": "critical",
|
318
|
-
"status": "firing",
|
319
|
-
"queue_name": "priority_queue",
|
320
|
-
"trigger_value": 0.15,
|
321
|
-
"threshold": 0.10,
|
322
|
-
"started_at": "2025-09-08T11:30:00Z",
|
323
|
-
"description": "Error rate exceeds 10%"
|
324
|
-
}
|
325
|
-
]
|
326
|
-
|
327
|
-
# 应用严重程度筛选
|
328
|
-
if severity:
|
329
|
-
alerts = [alert for alert in alerts if alert['severity'] == severity]
|
330
|
-
|
331
|
-
return BaseResponse(
|
332
|
-
data={
|
333
|
-
"alerts": alerts,
|
334
|
-
"total_count": len(alerts),
|
335
|
-
"counts_by_severity": {
|
336
|
-
"critical": len([a for a in alerts if a['severity'] == 'critical']),
|
337
|
-
"warning": len([a for a in alerts if a['severity'] == 'warning']),
|
338
|
-
"info": len([a for a in alerts if a['severity'] == 'info'])
|
339
|
-
}
|
340
|
-
}
|
341
|
-
)
|
342
|
-
|
343
|
-
except Exception as e:
|
344
|
-
logger.error(f"获取活跃告警失败: {e}")
|
345
|
-
raise HTTPException(status_code=500, detail=str(e))
|
346
|
-
finally:
|
347
|
-
metrics.finish()
|
348
|
-
|
349
|
-
|
350
|
-
# 辅助分析函数
|
351
|
-
|
352
|
-
async def _analyze_queue_performance(pg_session, dimensions, metrics, start_time, end_time):
|
353
|
-
"""分析队列性能"""
|
354
|
-
# 这里实现队列性能分析逻辑
|
355
|
-
return {
|
356
|
-
"chart_data": [
|
357
|
-
{"queue": "shared_queue", "avg_processing_time": 12.5, "throughput": 234.5},
|
358
|
-
{"queue": "priority_queue", "avg_processing_time": 8.2, "throughput": 156.3}
|
359
|
-
],
|
360
|
-
"summary": {
|
361
|
-
"total_queues": 2,
|
362
|
-
"avg_processing_time": 10.35,
|
363
|
-
"total_throughput": 390.8
|
364
|
-
},
|
365
|
-
"insights": [
|
366
|
-
"shared_queue has higher processing time but better throughput",
|
367
|
-
"priority_queue shows good processing efficiency"
|
368
|
-
]
|
369
|
-
}
|
370
|
-
|
371
|
-
|
372
|
-
async def _analyze_task_distribution(pg_session, dimensions, metrics, start_time, end_time):
|
373
|
-
"""分析任务分布"""
|
374
|
-
# 这里实现任务分布分析逻辑
|
375
|
-
return {
|
376
|
-
"chart_data": [
|
377
|
-
{"hour": 0, "task_count": 45, "queue": "shared_queue"},
|
378
|
-
{"hour": 1, "task_count": 52, "queue": "shared_queue"},
|
379
|
-
{"hour": 2, "task_count": 38, "queue": "shared_queue"}
|
380
|
-
],
|
381
|
-
"summary": {
|
382
|
-
"total_tasks": 135,
|
383
|
-
"peak_hour": 1,
|
384
|
-
"lowest_hour": 2
|
385
|
-
},
|
386
|
-
"insights": [
|
387
|
-
"Task load is relatively stable with slight peak at hour 1"
|
388
|
-
]
|
389
|
-
}
|
390
|
-
|
391
|
-
|
392
|
-
async def _analyze_error_patterns(pg_session, dimensions, metrics, start_time, end_time):
|
393
|
-
"""分析错误模式"""
|
394
|
-
# 这里实现错误模式分析逻辑
|
395
|
-
return {
|
396
|
-
"chart_data": [
|
397
|
-
{"error_type": "TimeoutError", "count": 25, "percentage": 45.5},
|
398
|
-
{"error_type": "ConnectionError", "count": 18, "percentage": 32.7},
|
399
|
-
{"error_type": "ValidationError", "count": 12, "percentage": 21.8}
|
400
|
-
],
|
401
|
-
"summary": {
|
402
|
-
"total_errors": 55,
|
403
|
-
"most_common_error": "TimeoutError",
|
404
|
-
"error_rate": 0.044
|
405
|
-
},
|
406
|
-
"insights": [
|
407
|
-
"TimeoutError is the most common error type",
|
408
|
-
"Consider increasing timeout values for better reliability"
|
409
|
-
]
|
410
|
-
}
|
411
|
-
|
412
|
-
|
413
|
-
async def _analyze_resource_utilization(pg_session, dimensions, metrics, start_time, end_time):
|
414
|
-
"""分析资源利用率"""
|
415
|
-
# 这里实现资源利用率分析逻辑
|
416
|
-
return {
|
417
|
-
"chart_data": [
|
418
|
-
{"timestamp": "2025-09-08T12:00:00Z", "cpu": 45.2, "memory": 62.1, "disk": 23.4},
|
419
|
-
{"timestamp": "2025-09-08T12:15:00Z", "cpu": 52.8, "memory": 65.3, "disk": 23.5}
|
420
|
-
],
|
421
|
-
"summary": {
|
422
|
-
"avg_cpu": 49.0,
|
423
|
-
"avg_memory": 63.7,
|
424
|
-
"avg_disk": 23.45
|
425
|
-
},
|
426
|
-
"insights": [
|
427
|
-
"CPU utilization is moderate",
|
428
|
-
"Memory usage is within acceptable range",
|
429
|
-
"Disk usage is low"
|
430
|
-
]
|
431
|
-
}
|