jettask 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +2 -0
- jettask/cli.py +12 -8
- jettask/config/lua_scripts.py +37 -0
- jettask/config/nacos_config.py +1 -1
- jettask/core/app.py +313 -340
- jettask/core/container.py +4 -4
- jettask/{persistence → core}/namespace.py +93 -27
- jettask/core/task.py +16 -9
- jettask/core/unified_manager_base.py +136 -26
- jettask/db/__init__.py +67 -0
- jettask/db/base.py +137 -0
- jettask/{utils/db_connector.py → db/connector.py} +130 -26
- jettask/db/models/__init__.py +16 -0
- jettask/db/models/scheduled_task.py +196 -0
- jettask/db/models/task.py +77 -0
- jettask/db/models/task_run.py +85 -0
- jettask/executor/__init__.py +0 -15
- jettask/executor/core.py +76 -31
- jettask/executor/process_entry.py +29 -114
- jettask/executor/task_executor.py +4 -0
- jettask/messaging/event_pool.py +928 -685
- jettask/messaging/scanner.py +30 -0
- jettask/persistence/__init__.py +28 -103
- jettask/persistence/buffer.py +170 -0
- jettask/persistence/consumer.py +330 -249
- jettask/persistence/manager.py +304 -0
- jettask/persistence/persistence.py +391 -0
- jettask/scheduler/__init__.py +15 -3
- jettask/scheduler/{task_crud.py → database.py} +61 -57
- jettask/scheduler/loader.py +2 -2
- jettask/scheduler/{scheduler_coordinator.py → manager.py} +23 -6
- jettask/scheduler/models.py +14 -10
- jettask/scheduler/schedule.py +166 -0
- jettask/scheduler/scheduler.py +12 -11
- jettask/schemas/__init__.py +50 -1
- jettask/schemas/backlog.py +43 -6
- jettask/schemas/namespace.py +70 -19
- jettask/schemas/queue.py +19 -3
- jettask/schemas/responses.py +493 -0
- jettask/task/__init__.py +0 -2
- jettask/task/router.py +3 -0
- jettask/test_connection_monitor.py +1 -1
- jettask/utils/__init__.py +7 -5
- jettask/utils/db_init.py +8 -4
- jettask/utils/namespace_dep.py +167 -0
- jettask/utils/queue_matcher.py +186 -0
- jettask/utils/rate_limit/concurrency_limiter.py +7 -1
- jettask/utils/stream_backlog.py +1 -1
- jettask/webui/__init__.py +0 -1
- jettask/webui/api/__init__.py +4 -4
- jettask/webui/api/alerts.py +806 -71
- jettask/webui/api/example_refactored.py +400 -0
- jettask/webui/api/namespaces.py +390 -45
- jettask/webui/api/overview.py +300 -54
- jettask/webui/api/queues.py +971 -267
- jettask/webui/api/scheduled.py +1249 -56
- jettask/webui/api/settings.py +129 -7
- jettask/webui/api/workers.py +442 -0
- jettask/webui/app.py +46 -2329
- jettask/webui/middleware/__init__.py +6 -0
- jettask/webui/middleware/namespace_middleware.py +135 -0
- jettask/webui/services/__init__.py +146 -0
- jettask/webui/services/heartbeat_service.py +251 -0
- jettask/webui/services/overview_service.py +60 -51
- jettask/webui/services/queue_monitor_service.py +426 -0
- jettask/webui/services/redis_monitor_service.py +87 -0
- jettask/webui/services/settings_service.py +174 -111
- jettask/webui/services/task_monitor_service.py +222 -0
- jettask/webui/services/timeline_pg_service.py +452 -0
- jettask/webui/services/timeline_service.py +189 -0
- jettask/webui/services/worker_monitor_service.py +467 -0
- jettask/webui/utils/__init__.py +11 -0
- jettask/webui/utils/time_utils.py +122 -0
- jettask/worker/lifecycle.py +8 -2
- {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/METADATA +1 -1
- jettask-0.2.24.dist-info/RECORD +142 -0
- jettask/executor/executor.py +0 -338
- jettask/persistence/backlog_monitor.py +0 -567
- jettask/persistence/base.py +0 -2334
- jettask/persistence/db_manager.py +0 -516
- jettask/persistence/maintenance.py +0 -81
- jettask/persistence/message_consumer.py +0 -259
- jettask/persistence/models.py +0 -49
- jettask/persistence/offline_recovery.py +0 -196
- jettask/persistence/queue_discovery.py +0 -215
- jettask/persistence/task_persistence.py +0 -218
- jettask/persistence/task_updater.py +0 -583
- jettask/scheduler/add_execution_count.sql +0 -11
- jettask/scheduler/add_priority_field.sql +0 -26
- jettask/scheduler/add_scheduler_id.sql +0 -25
- jettask/scheduler/add_scheduler_id_index.sql +0 -10
- jettask/scheduler/make_scheduler_id_required.sql +0 -28
- jettask/scheduler/migrate_interval_seconds.sql +0 -9
- jettask/scheduler/performance_optimization.sql +0 -45
- jettask/scheduler/run_scheduler.py +0 -186
- jettask/scheduler/schema.sql +0 -84
- jettask/task/task_executor.py +0 -318
- jettask/webui/api/analytics.py +0 -323
- jettask/webui/config.py +0 -90
- jettask/webui/models/__init__.py +0 -3
- jettask/webui/models/namespace.py +0 -63
- jettask/webui/namespace_manager/__init__.py +0 -10
- jettask/webui/namespace_manager/multi.py +0 -593
- jettask/webui/namespace_manager/unified.py +0 -193
- jettask/webui/run.py +0 -46
- jettask-0.2.23.dist-info/RECORD +0 -145
- {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/WHEEL +0 -0
- {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,189 @@
|
|
1
|
+
"""
|
2
|
+
时间轴服务
|
3
|
+
|
4
|
+
提供队列任务的时间分布分析功能
|
5
|
+
"""
|
6
|
+
import logging
|
7
|
+
from typing import Dict, Any, List, Optional
|
8
|
+
from datetime import datetime, timedelta, timezone
|
9
|
+
|
10
|
+
from .redis_monitor_service import RedisMonitorService
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
class TimelineService:
|
16
|
+
"""时间轴服务类"""
|
17
|
+
|
18
|
+
def __init__(self, redis_service: RedisMonitorService):
|
19
|
+
"""
|
20
|
+
初始化时间轴服务
|
21
|
+
|
22
|
+
Args:
|
23
|
+
redis_service: Redis 监控基础服务实例
|
24
|
+
"""
|
25
|
+
self.redis_service = redis_service
|
26
|
+
|
27
|
+
@property
|
28
|
+
def redis(self):
|
29
|
+
"""获取 Redis 客户端"""
|
30
|
+
return self.redis_service.redis
|
31
|
+
|
32
|
+
@property
|
33
|
+
def redis_prefix(self) -> str:
|
34
|
+
"""获取 Redis 前缀"""
|
35
|
+
return self.redis_service.redis_prefix
|
36
|
+
|
37
|
+
async def get_redis_timeline(
|
38
|
+
self,
|
39
|
+
queue_name: str,
|
40
|
+
interval: str = "1m",
|
41
|
+
duration: str = "1h",
|
42
|
+
start_time: Optional[str] = None,
|
43
|
+
end_time: Optional[str] = None,
|
44
|
+
context: str = "detail"
|
45
|
+
) -> Dict[str, Any]:
|
46
|
+
"""
|
47
|
+
从 Redis Stream 获取队列任务的时间分布
|
48
|
+
|
49
|
+
Args:
|
50
|
+
queue_name: 队列名称
|
51
|
+
interval: 时间间隔 (如 '1m', '5m', '1h')
|
52
|
+
duration: 持续时间 (如 '1h', '24h')
|
53
|
+
start_time: 开始时间(可选)
|
54
|
+
end_time: 结束时间(可选)
|
55
|
+
context: 上下文 ('overview' 或 'detail')
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
时间轴数据
|
59
|
+
"""
|
60
|
+
try:
|
61
|
+
# 解析时间间隔和持续时间
|
62
|
+
interval_seconds = self._parse_time_duration(interval)
|
63
|
+
|
64
|
+
# 根据上下文设置不同的数据限制
|
65
|
+
if context == "overview":
|
66
|
+
# 首页概览:固定获取最近1小时的所有数据
|
67
|
+
duration_seconds = 3600 # 1小时
|
68
|
+
now = int(datetime.now(timezone.utc).timestamp() * 1000)
|
69
|
+
start = now - duration_seconds * 1000
|
70
|
+
min_id = f"{start}-0"
|
71
|
+
max_id = "+"
|
72
|
+
max_count = 100000 # 首页概览获取所有数据
|
73
|
+
else:
|
74
|
+
# 队列详情页:根据参数获取,但限制最多10000条
|
75
|
+
if start_time and end_time:
|
76
|
+
# 使用提供的时间范围
|
77
|
+
min_id = start_time
|
78
|
+
max_id = end_time if end_time != '+' else '+'
|
79
|
+
else:
|
80
|
+
# 使用duration参数计算时间范围
|
81
|
+
duration_seconds = self._parse_time_duration(duration)
|
82
|
+
now = int(datetime.now(timezone.utc).timestamp() * 1000)
|
83
|
+
start = now - duration_seconds * 1000
|
84
|
+
min_id = f"{start}-0"
|
85
|
+
max_id = "+"
|
86
|
+
max_count = 10000 # 详情页限制10000条
|
87
|
+
|
88
|
+
# 获取指定时间范围内的消息
|
89
|
+
prefixed_queue_name = self.redis_service.get_prefixed_queue_name(queue_name)
|
90
|
+
messages = await self.redis.xrange(
|
91
|
+
prefixed_queue_name,
|
92
|
+
min=min_id,
|
93
|
+
max=max_id,
|
94
|
+
count=max_count
|
95
|
+
)
|
96
|
+
|
97
|
+
# 按时间间隔统计任务数量
|
98
|
+
buckets = {}
|
99
|
+
bucket_size = interval_seconds * 1000 # 转换为毫秒
|
100
|
+
|
101
|
+
# 计算实际的时间范围用于生成时间轴
|
102
|
+
if start_time and end_time:
|
103
|
+
# 从参数中解析时间范围
|
104
|
+
if start_time != '-':
|
105
|
+
actual_start = int(start_time.split('-')[0])
|
106
|
+
else:
|
107
|
+
actual_start = int(datetime.now(timezone.utc).timestamp() * 1000) - 86400000
|
108
|
+
|
109
|
+
if end_time != '+':
|
110
|
+
actual_end = int(end_time.split('-')[0])
|
111
|
+
else:
|
112
|
+
actual_end = int(datetime.now(timezone.utc).timestamp() * 1000)
|
113
|
+
else:
|
114
|
+
# 使用duration参数计算的时间范围
|
115
|
+
actual_start = start
|
116
|
+
actual_end = now
|
117
|
+
|
118
|
+
for msg_id, _ in messages:
|
119
|
+
# 从消息ID提取时间戳
|
120
|
+
timestamp = int(msg_id.split('-')[0])
|
121
|
+
bucket_key = (timestamp // bucket_size) * bucket_size
|
122
|
+
buckets[bucket_key] = buckets.get(bucket_key, 0) + 1
|
123
|
+
|
124
|
+
# 转换为时间序列数据
|
125
|
+
timeline_data = []
|
126
|
+
current_bucket = (actual_start // bucket_size) * bucket_size
|
127
|
+
|
128
|
+
while current_bucket <= actual_end:
|
129
|
+
timeline_data.append({
|
130
|
+
"timestamp": current_bucket,
|
131
|
+
"count": buckets.get(current_bucket, 0)
|
132
|
+
})
|
133
|
+
current_bucket += bucket_size
|
134
|
+
|
135
|
+
# 计算实际任务总数
|
136
|
+
total_tasks = len(messages)
|
137
|
+
|
138
|
+
# 检查是否达到数据限制
|
139
|
+
has_more = False
|
140
|
+
if context == "detail" and total_tasks >= max_count:
|
141
|
+
has_more = True
|
142
|
+
|
143
|
+
logger.info(f"Redis 时间轴: 队列={queue_name}, 任务数={total_tasks}, 数据点={len(timeline_data)}")
|
144
|
+
|
145
|
+
return {
|
146
|
+
"timeline": timeline_data,
|
147
|
+
"interval": interval,
|
148
|
+
"duration": duration,
|
149
|
+
"start": actual_start,
|
150
|
+
"end": actual_end,
|
151
|
+
"total_tasks": total_tasks,
|
152
|
+
"message_count": len(messages),
|
153
|
+
"has_more": has_more,
|
154
|
+
"limit": max_count if context == "detail" else None,
|
155
|
+
"source": "redis"
|
156
|
+
}
|
157
|
+
|
158
|
+
except Exception as e:
|
159
|
+
logger.error(f"获取 Redis 时间轴失败: 队列={queue_name}, 错误={e}", exc_info=True)
|
160
|
+
return {
|
161
|
+
"timeline": [],
|
162
|
+
"error": str(e),
|
163
|
+
"source": "redis"
|
164
|
+
}
|
165
|
+
|
166
|
+
def _parse_time_duration(self, duration_str: str) -> int:
|
167
|
+
"""
|
168
|
+
解析时间字符串为秒数
|
169
|
+
|
170
|
+
Args:
|
171
|
+
duration_str: 时间字符串 (如 '1h', '10m', '30s')
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
秒数
|
175
|
+
"""
|
176
|
+
units = {
|
177
|
+
's': 1,
|
178
|
+
'm': 60,
|
179
|
+
'h': 3600,
|
180
|
+
'd': 86400
|
181
|
+
}
|
182
|
+
|
183
|
+
if duration_str[-1] in units:
|
184
|
+
value = int(duration_str[:-1])
|
185
|
+
unit = duration_str[-1]
|
186
|
+
return value * units[unit]
|
187
|
+
|
188
|
+
# 默认为秒
|
189
|
+
return int(duration_str)
|
@@ -0,0 +1,467 @@
|
|
1
|
+
"""
|
2
|
+
Worker 监控服务
|
3
|
+
|
4
|
+
提供 Worker 相关的监控功能
|
5
|
+
"""
|
6
|
+
import logging
|
7
|
+
from typing import Dict, Any, List, Optional
|
8
|
+
from datetime import datetime, timezone
|
9
|
+
import time
|
10
|
+
|
11
|
+
from .redis_monitor_service import RedisMonitorService
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class WorkerMonitorService:
|
17
|
+
"""Worker 监控服务类"""
|
18
|
+
|
19
|
+
def __init__(self, redis_service: RedisMonitorService):
|
20
|
+
"""
|
21
|
+
初始化 Worker 监控服务
|
22
|
+
|
23
|
+
Args:
|
24
|
+
redis_service: Redis 监控基础服务实例
|
25
|
+
"""
|
26
|
+
self.redis_service = redis_service
|
27
|
+
|
28
|
+
@property
|
29
|
+
def redis(self):
|
30
|
+
"""获取 Redis 客户端"""
|
31
|
+
return self.redis_service.redis
|
32
|
+
|
33
|
+
@property
|
34
|
+
def redis_prefix(self) -> str:
|
35
|
+
"""获取 Redis 前缀"""
|
36
|
+
return self.redis_service.redis_prefix
|
37
|
+
|
38
|
+
async def get_worker_heartbeats(self, queue_name: str) -> List[Dict[str, Any]]:
|
39
|
+
"""
|
40
|
+
获取指定队列的 Worker 心跳信息
|
41
|
+
|
42
|
+
Args:
|
43
|
+
queue_name: 队列名称
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
Worker 心跳信息列表
|
47
|
+
"""
|
48
|
+
worker_list = []
|
49
|
+
current_time = datetime.now(timezone.utc).timestamp()
|
50
|
+
|
51
|
+
try:
|
52
|
+
# 使用 WorkerStateManager 获取所有 worker
|
53
|
+
from jettask.worker.lifecycle import WorkerStateManager
|
54
|
+
|
55
|
+
worker_manager = WorkerStateManager(
|
56
|
+
redis_client=self.redis,
|
57
|
+
redis_prefix=self.redis_prefix
|
58
|
+
)
|
59
|
+
|
60
|
+
# 获取所有 worker ID
|
61
|
+
worker_ids = await worker_manager.get_all_workers()
|
62
|
+
if not worker_ids:
|
63
|
+
logger.debug(f"No workers found for queue {queue_name}")
|
64
|
+
return []
|
65
|
+
|
66
|
+
worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
|
67
|
+
|
68
|
+
# 批量获取所有 worker 数据
|
69
|
+
pipe = self.redis.pipeline()
|
70
|
+
for key in worker_keys:
|
71
|
+
pipe.hgetall(key)
|
72
|
+
all_workers_data = await pipe.execute()
|
73
|
+
|
74
|
+
# 处理每个 worker
|
75
|
+
for i, worker_data in enumerate(all_workers_data):
|
76
|
+
if not worker_data:
|
77
|
+
continue
|
78
|
+
|
79
|
+
# 检查 worker 是否属于指定队列
|
80
|
+
worker_queues = worker_data.get('queues', '')
|
81
|
+
if queue_name not in worker_queues.split(','):
|
82
|
+
continue
|
83
|
+
|
84
|
+
worker_id = worker_keys[i].split(':')[-1]
|
85
|
+
last_heartbeat = float(worker_data.get('last_heartbeat', 0))
|
86
|
+
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
87
|
+
consumer_id = worker_data.get('consumer_id', worker_id)
|
88
|
+
|
89
|
+
# 构建显示数据
|
90
|
+
display_data = {
|
91
|
+
'consumer_id': consumer_id,
|
92
|
+
'consumer_name': f"{consumer_id}-{queue_name}",
|
93
|
+
'host': worker_data.get('host', 'unknown'),
|
94
|
+
'pid': int(worker_data.get('pid', 0)),
|
95
|
+
'queue': queue_name,
|
96
|
+
'last_heartbeat': last_heartbeat,
|
97
|
+
'last_heartbeat_time': datetime.fromtimestamp(last_heartbeat).isoformat(),
|
98
|
+
'seconds_ago': int(current_time - last_heartbeat),
|
99
|
+
'is_alive': is_alive,
|
100
|
+
# 队列特定的统计信息
|
101
|
+
'success_count': int(worker_data.get(f'{queue_name}:success_count', 0)),
|
102
|
+
'failed_count': int(worker_data.get(f'{queue_name}:failed_count', 0)),
|
103
|
+
'total_count': int(worker_data.get(f'{queue_name}:total_count', 0)),
|
104
|
+
'running_tasks': int(worker_data.get(f'{queue_name}:running_tasks', 0)),
|
105
|
+
'avg_processing_time': float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0)),
|
106
|
+
'avg_latency_time': float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
|
107
|
+
}
|
108
|
+
|
109
|
+
# 如果离线时间存在,添加离线时间信息
|
110
|
+
if 'offline_time' in worker_data:
|
111
|
+
display_data['offline_time'] = float(worker_data['offline_time'])
|
112
|
+
display_data['offline_time_formatted'] = datetime.fromtimestamp(
|
113
|
+
float(worker_data['offline_time'])
|
114
|
+
).isoformat()
|
115
|
+
|
116
|
+
worker_list.append(display_data)
|
117
|
+
|
118
|
+
logger.info(f"Retrieved {len(worker_list)} workers for queue {queue_name}")
|
119
|
+
return worker_list
|
120
|
+
|
121
|
+
except Exception as e:
|
122
|
+
logger.error(f"Error getting worker heartbeats for queue {queue_name}: {e}", exc_info=True)
|
123
|
+
return []
|
124
|
+
|
125
|
+
async def get_queue_worker_summary(self, queue_name: str) -> Dict[str, Any]:
|
126
|
+
"""
|
127
|
+
获取队列的 Worker 汇总统计信息(包含历史数据)
|
128
|
+
|
129
|
+
Args:
|
130
|
+
queue_name: 队列名称
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
Worker 汇总统计字典
|
134
|
+
"""
|
135
|
+
try:
|
136
|
+
# 使用 WorkerStateManager
|
137
|
+
from jettask.worker.lifecycle import WorkerStateManager
|
138
|
+
|
139
|
+
worker_manager = WorkerStateManager(
|
140
|
+
redis_client=self.redis,
|
141
|
+
redis_prefix=self.redis_prefix
|
142
|
+
)
|
143
|
+
|
144
|
+
# 获取所有 worker ID
|
145
|
+
worker_ids = await worker_manager.get_all_workers()
|
146
|
+
worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
|
147
|
+
|
148
|
+
if not worker_keys:
|
149
|
+
return self._empty_summary()
|
150
|
+
|
151
|
+
# 批量获取 worker 数据
|
152
|
+
pipe = self.redis.pipeline()
|
153
|
+
for key in worker_keys:
|
154
|
+
pipe.hgetall(key)
|
155
|
+
all_workers_data = await pipe.execute()
|
156
|
+
|
157
|
+
# 过滤属于该队列的 worker
|
158
|
+
queue_workers_data = []
|
159
|
+
for worker_data in all_workers_data:
|
160
|
+
if worker_data and queue_name in worker_data.get('queues', '').split(','):
|
161
|
+
queue_workers_data.append(worker_data)
|
162
|
+
|
163
|
+
# 汇总统计
|
164
|
+
stats = self._calculate_worker_stats(queue_workers_data, queue_name, include_history=True)
|
165
|
+
stats['history_included'] = True
|
166
|
+
|
167
|
+
logger.debug(f"Worker summary for queue {queue_name}: {stats}")
|
168
|
+
return stats
|
169
|
+
|
170
|
+
except Exception as e:
|
171
|
+
logger.error(f"Error getting queue worker summary for {queue_name}: {e}", exc_info=True)
|
172
|
+
return self._empty_summary()
|
173
|
+
|
174
|
+
async def get_queue_worker_summary_fast(self, queue_name: str) -> Dict[str, Any]:
|
175
|
+
"""
|
176
|
+
获取队列的 Worker 汇总统计信息(快速版,仅在线 Worker)
|
177
|
+
|
178
|
+
Args:
|
179
|
+
queue_name: 队列名称
|
180
|
+
|
181
|
+
Returns:
|
182
|
+
Worker 汇总统计字典
|
183
|
+
"""
|
184
|
+
try:
|
185
|
+
# 使用 WorkerStateManager
|
186
|
+
from jettask.worker.lifecycle import WorkerStateManager
|
187
|
+
|
188
|
+
worker_manager = WorkerStateManager(
|
189
|
+
redis_client=self.redis,
|
190
|
+
redis_prefix=self.redis_prefix
|
191
|
+
)
|
192
|
+
|
193
|
+
# 获取所有 worker ID
|
194
|
+
worker_ids = await worker_manager.get_all_workers()
|
195
|
+
worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
|
196
|
+
|
197
|
+
if not worker_keys:
|
198
|
+
return self._empty_summary()
|
199
|
+
|
200
|
+
# 批量获取 worker 数据
|
201
|
+
pipe = self.redis.pipeline()
|
202
|
+
for worker_key in worker_keys:
|
203
|
+
pipe.hgetall(worker_key)
|
204
|
+
|
205
|
+
all_workers_data = await pipe.execute()
|
206
|
+
|
207
|
+
# 过滤属于该队列的 worker
|
208
|
+
worker_data_list = []
|
209
|
+
for worker_data in all_workers_data:
|
210
|
+
if worker_data and queue_name in worker_data.get('queues', '').split(','):
|
211
|
+
worker_data_list.append(worker_data)
|
212
|
+
|
213
|
+
# 汇总统计(仅在线 Worker)
|
214
|
+
stats = self._calculate_worker_stats(worker_data_list, queue_name, include_history=False)
|
215
|
+
|
216
|
+
logger.debug(f"Fast worker summary for queue {queue_name}: {stats}")
|
217
|
+
return stats
|
218
|
+
|
219
|
+
except Exception as e:
|
220
|
+
logger.error(f"Error getting fast queue worker summary for {queue_name}: {e}", exc_info=True)
|
221
|
+
return self._empty_summary()
|
222
|
+
|
223
|
+
async def get_worker_offline_history(
|
224
|
+
self,
|
225
|
+
limit: int = 100,
|
226
|
+
start_time: Optional[float] = None,
|
227
|
+
end_time: Optional[float] = None
|
228
|
+
) -> List[Dict[str, Any]]:
|
229
|
+
"""
|
230
|
+
获取 Worker 下线历史记录
|
231
|
+
|
232
|
+
Args:
|
233
|
+
limit: 返回记录数量限制
|
234
|
+
start_time: 开始时间戳(可选)
|
235
|
+
end_time: 结束时间戳(可选)
|
236
|
+
|
237
|
+
Returns:
|
238
|
+
离线历史记录列表
|
239
|
+
"""
|
240
|
+
try:
|
241
|
+
# 使用 WorkerStateManager
|
242
|
+
from jettask.worker.lifecycle import WorkerStateManager
|
243
|
+
|
244
|
+
worker_manager = WorkerStateManager(
|
245
|
+
redis_client=self.redis,
|
246
|
+
redis_prefix=self.redis_prefix
|
247
|
+
)
|
248
|
+
|
249
|
+
# 获取所有 worker ID
|
250
|
+
worker_ids = await worker_manager.get_all_workers()
|
251
|
+
worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
|
252
|
+
|
253
|
+
if not worker_keys:
|
254
|
+
return []
|
255
|
+
|
256
|
+
# 批量获取所有 worker 数据
|
257
|
+
pipe = self.redis.pipeline()
|
258
|
+
for key in worker_keys:
|
259
|
+
pipe.hgetall(key)
|
260
|
+
all_workers_data = await pipe.execute()
|
261
|
+
|
262
|
+
# 收集离线的 worker
|
263
|
+
offline_workers = []
|
264
|
+
|
265
|
+
for worker_data in all_workers_data:
|
266
|
+
if not worker_data:
|
267
|
+
continue
|
268
|
+
|
269
|
+
# 检查是否离线
|
270
|
+
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
271
|
+
if not is_alive and 'offline_time' in worker_data:
|
272
|
+
offline_time = float(worker_data.get('offline_time', 0))
|
273
|
+
|
274
|
+
# 时间范围过滤
|
275
|
+
if start_time and offline_time < start_time:
|
276
|
+
continue
|
277
|
+
if end_time and offline_time > end_time:
|
278
|
+
continue
|
279
|
+
|
280
|
+
# 构建离线记录
|
281
|
+
record = self._build_offline_record(worker_data, offline_time)
|
282
|
+
offline_workers.append((offline_time, record))
|
283
|
+
|
284
|
+
# 按离线时间倒序排序
|
285
|
+
offline_workers.sort(key=lambda x: x[0], reverse=True)
|
286
|
+
|
287
|
+
# 返回指定数量的记录
|
288
|
+
result = [record for _, record in offline_workers[:limit]]
|
289
|
+
logger.info(f"Retrieved {len(result)} offline worker records")
|
290
|
+
return result
|
291
|
+
|
292
|
+
except Exception as e:
|
293
|
+
logger.error(f"Error getting worker offline history: {e}", exc_info=True)
|
294
|
+
return []
|
295
|
+
|
296
|
+
def _empty_summary(self) -> Dict[str, Any]:
|
297
|
+
"""返回空的汇总统计"""
|
298
|
+
return {
|
299
|
+
'total_workers': 0,
|
300
|
+
'online_workers': 0,
|
301
|
+
'offline_workers': 0,
|
302
|
+
'total_success_count': 0,
|
303
|
+
'total_failed_count': 0,
|
304
|
+
'total_count': 0,
|
305
|
+
'total_running_tasks': 0,
|
306
|
+
'avg_processing_time': 0.0,
|
307
|
+
'avg_latency_time': 0.0
|
308
|
+
}
|
309
|
+
|
310
|
+
def _calculate_worker_stats(
|
311
|
+
self,
|
312
|
+
workers_data: List[Dict[str, Any]],
|
313
|
+
queue_name: str,
|
314
|
+
include_history: bool
|
315
|
+
) -> Dict[str, Any]:
|
316
|
+
"""
|
317
|
+
计算 Worker 统计信息
|
318
|
+
|
319
|
+
Args:
|
320
|
+
workers_data: Worker 数据列表
|
321
|
+
queue_name: 队列名称
|
322
|
+
include_history: 是否包含离线 Worker 的历史数据
|
323
|
+
|
324
|
+
Returns:
|
325
|
+
统计信息字典
|
326
|
+
"""
|
327
|
+
total_workers = len(workers_data)
|
328
|
+
online_workers = 0
|
329
|
+
offline_workers = 0
|
330
|
+
total_success_count = 0
|
331
|
+
total_failed_count = 0
|
332
|
+
total_count = 0
|
333
|
+
total_running_tasks = 0
|
334
|
+
total_processing_time = 0.0
|
335
|
+
processing_time_count = 0
|
336
|
+
total_latency_time = 0.0
|
337
|
+
latency_time_count = 0
|
338
|
+
|
339
|
+
current_time = datetime.now(timezone.utc).timestamp()
|
340
|
+
|
341
|
+
for worker_data in workers_data:
|
342
|
+
try:
|
343
|
+
last_heartbeat = float(worker_data.get('last_heartbeat', 0))
|
344
|
+
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
345
|
+
|
346
|
+
is_online = is_alive and (current_time - last_heartbeat) < 30
|
347
|
+
|
348
|
+
if is_online:
|
349
|
+
online_workers += 1
|
350
|
+
else:
|
351
|
+
offline_workers += 1
|
352
|
+
# 如果不包含历史,跳过离线 Worker 的统计
|
353
|
+
if not include_history:
|
354
|
+
continue
|
355
|
+
|
356
|
+
# 统计数据
|
357
|
+
success_count = int(worker_data.get(f'{queue_name}:success_count', 0))
|
358
|
+
failed_count = int(worker_data.get(f'{queue_name}:failed_count', 0))
|
359
|
+
running_tasks = int(worker_data.get(f'{queue_name}:running_tasks', 0))
|
360
|
+
avg_processing_time = float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0))
|
361
|
+
avg_latency_time = float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
|
362
|
+
|
363
|
+
total_success_count += success_count
|
364
|
+
total_failed_count += failed_count
|
365
|
+
total_count += success_count + failed_count
|
366
|
+
total_running_tasks += running_tasks
|
367
|
+
|
368
|
+
if avg_processing_time > 0:
|
369
|
+
total_processing_time += avg_processing_time
|
370
|
+
processing_time_count += 1
|
371
|
+
|
372
|
+
if avg_latency_time > 0:
|
373
|
+
total_latency_time += avg_latency_time
|
374
|
+
latency_time_count += 1
|
375
|
+
|
376
|
+
except Exception as e:
|
377
|
+
logger.warning(f"Error processing worker stats: {e}")
|
378
|
+
continue
|
379
|
+
|
380
|
+
# 计算平均值
|
381
|
+
overall_avg_processing_time = 0.0
|
382
|
+
if processing_time_count > 0:
|
383
|
+
overall_avg_processing_time = total_processing_time / processing_time_count
|
384
|
+
|
385
|
+
overall_avg_latency_time = 0.0
|
386
|
+
if latency_time_count > 0:
|
387
|
+
overall_avg_latency_time = total_latency_time / latency_time_count
|
388
|
+
|
389
|
+
return {
|
390
|
+
'total_workers': total_workers,
|
391
|
+
'online_workers': online_workers,
|
392
|
+
'offline_workers': offline_workers,
|
393
|
+
'total_success_count': total_success_count,
|
394
|
+
'total_failed_count': total_failed_count,
|
395
|
+
'total_count': total_count,
|
396
|
+
'total_running_tasks': total_running_tasks,
|
397
|
+
'avg_processing_time': round(overall_avg_processing_time, 3),
|
398
|
+
'avg_latency_time': round(overall_avg_latency_time, 3)
|
399
|
+
}
|
400
|
+
|
401
|
+
def _build_offline_record(self, worker_data: Dict[str, Any], offline_time: float) -> Dict[str, Any]:
|
402
|
+
"""
|
403
|
+
构建离线记录
|
404
|
+
|
405
|
+
Args:
|
406
|
+
worker_data: Worker 数据
|
407
|
+
offline_time: 离线时间戳
|
408
|
+
|
409
|
+
Returns:
|
410
|
+
离线记录字典
|
411
|
+
"""
|
412
|
+
# 计算运行时长
|
413
|
+
online_time = float(worker_data.get('created_at', offline_time))
|
414
|
+
duration_seconds = int(offline_time - online_time)
|
415
|
+
|
416
|
+
# 基础记录
|
417
|
+
record = {
|
418
|
+
'consumer_id': worker_data.get('consumer_id', ''),
|
419
|
+
'host': worker_data.get('host', 'unknown'),
|
420
|
+
'pid': int(worker_data.get('pid', 0)),
|
421
|
+
'queues': worker_data.get('queues', ''),
|
422
|
+
'online_time': online_time,
|
423
|
+
'offline_time': offline_time,
|
424
|
+
'duration_seconds': duration_seconds,
|
425
|
+
'last_heartbeat': float(worker_data.get('last_heartbeat', 0)),
|
426
|
+
'shutdown_reason': worker_data.get('shutdown_reason', 'unknown'),
|
427
|
+
'online_time_str': datetime.fromtimestamp(online_time).isoformat(),
|
428
|
+
'offline_time_str': datetime.fromtimestamp(offline_time).isoformat(),
|
429
|
+
}
|
430
|
+
|
431
|
+
# 格式化运行时长
|
432
|
+
hours = duration_seconds // 3600
|
433
|
+
minutes = (duration_seconds % 3600) // 60
|
434
|
+
seconds = duration_seconds % 60
|
435
|
+
record['duration_str'] = f"{hours}h {minutes}m {seconds}s"
|
436
|
+
|
437
|
+
# 添加统计信息(聚合所有队列的数据)
|
438
|
+
queues = worker_data.get('queues', '').split(',') if worker_data.get('queues') else []
|
439
|
+
total_success = 0
|
440
|
+
total_failed = 0
|
441
|
+
total_count = 0
|
442
|
+
total_processing_time = 0.0
|
443
|
+
|
444
|
+
for queue in queues:
|
445
|
+
if queue.strip():
|
446
|
+
queue = queue.strip()
|
447
|
+
total_success += int(worker_data.get(f'{queue}:success_count', 0))
|
448
|
+
total_failed += int(worker_data.get(f'{queue}:failed_count', 0))
|
449
|
+
count = int(worker_data.get(f'{queue}:total_count', 0))
|
450
|
+
total_count += count
|
451
|
+
|
452
|
+
avg_time = float(worker_data.get(f'{queue}:avg_processing_time', 0))
|
453
|
+
if avg_time > 0 and count > 0:
|
454
|
+
total_processing_time += avg_time * count
|
455
|
+
|
456
|
+
record['total_success_count'] = total_success
|
457
|
+
record['total_failed_count'] = total_failed
|
458
|
+
record['total_count'] = total_count
|
459
|
+
record['total_running_tasks'] = 0 # 离线 worker 没有运行中的任务
|
460
|
+
|
461
|
+
# 计算平均处理时间
|
462
|
+
if total_count > 0:
|
463
|
+
record['avg_processing_time'] = total_processing_time / total_count
|
464
|
+
else:
|
465
|
+
record['avg_processing_time'] = 0.0
|
466
|
+
|
467
|
+
return record
|