jettask 0.2.20__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. jettask/__init__.py +4 -0
  2. jettask/cli.py +12 -8
  3. jettask/config/lua_scripts.py +37 -0
  4. jettask/config/nacos_config.py +1 -1
  5. jettask/core/app.py +313 -340
  6. jettask/core/container.py +4 -4
  7. jettask/{persistence → core}/namespace.py +93 -27
  8. jettask/core/task.py +16 -9
  9. jettask/core/unified_manager_base.py +136 -26
  10. jettask/db/__init__.py +67 -0
  11. jettask/db/base.py +137 -0
  12. jettask/{utils/db_connector.py → db/connector.py} +130 -26
  13. jettask/db/models/__init__.py +16 -0
  14. jettask/db/models/scheduled_task.py +196 -0
  15. jettask/db/models/task.py +77 -0
  16. jettask/db/models/task_run.py +85 -0
  17. jettask/executor/__init__.py +0 -15
  18. jettask/executor/core.py +76 -31
  19. jettask/executor/process_entry.py +29 -114
  20. jettask/executor/task_executor.py +4 -0
  21. jettask/messaging/event_pool.py +928 -685
  22. jettask/messaging/scanner.py +30 -0
  23. jettask/persistence/__init__.py +28 -103
  24. jettask/persistence/buffer.py +170 -0
  25. jettask/persistence/consumer.py +330 -249
  26. jettask/persistence/manager.py +304 -0
  27. jettask/persistence/persistence.py +391 -0
  28. jettask/scheduler/__init__.py +15 -3
  29. jettask/scheduler/{task_crud.py → database.py} +61 -57
  30. jettask/scheduler/loader.py +2 -2
  31. jettask/scheduler/{scheduler_coordinator.py → manager.py} +23 -6
  32. jettask/scheduler/models.py +14 -10
  33. jettask/scheduler/schedule.py +166 -0
  34. jettask/scheduler/scheduler.py +12 -11
  35. jettask/schemas/__init__.py +50 -1
  36. jettask/schemas/backlog.py +43 -6
  37. jettask/schemas/namespace.py +70 -19
  38. jettask/schemas/queue.py +19 -3
  39. jettask/schemas/responses.py +493 -0
  40. jettask/task/__init__.py +0 -2
  41. jettask/task/router.py +3 -0
  42. jettask/test_connection_monitor.py +1 -1
  43. jettask/utils/__init__.py +7 -5
  44. jettask/utils/db_init.py +8 -4
  45. jettask/utils/namespace_dep.py +167 -0
  46. jettask/utils/queue_matcher.py +186 -0
  47. jettask/utils/rate_limit/concurrency_limiter.py +7 -1
  48. jettask/utils/stream_backlog.py +1 -1
  49. jettask/webui/__init__.py +0 -1
  50. jettask/webui/api/__init__.py +4 -4
  51. jettask/webui/api/alerts.py +806 -71
  52. jettask/webui/api/example_refactored.py +400 -0
  53. jettask/webui/api/namespaces.py +390 -45
  54. jettask/webui/api/overview.py +300 -54
  55. jettask/webui/api/queues.py +971 -267
  56. jettask/webui/api/scheduled.py +1249 -56
  57. jettask/webui/api/settings.py +129 -7
  58. jettask/webui/api/workers.py +442 -0
  59. jettask/webui/app.py +46 -2329
  60. jettask/webui/middleware/__init__.py +6 -0
  61. jettask/webui/middleware/namespace_middleware.py +135 -0
  62. jettask/webui/services/__init__.py +146 -0
  63. jettask/webui/services/heartbeat_service.py +251 -0
  64. jettask/webui/services/overview_service.py +60 -51
  65. jettask/webui/services/queue_monitor_service.py +426 -0
  66. jettask/webui/services/redis_monitor_service.py +87 -0
  67. jettask/webui/services/settings_service.py +174 -111
  68. jettask/webui/services/task_monitor_service.py +222 -0
  69. jettask/webui/services/timeline_pg_service.py +452 -0
  70. jettask/webui/services/timeline_service.py +189 -0
  71. jettask/webui/services/worker_monitor_service.py +467 -0
  72. jettask/webui/utils/__init__.py +11 -0
  73. jettask/webui/utils/time_utils.py +122 -0
  74. jettask/worker/lifecycle.py +8 -2
  75. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/METADATA +1 -1
  76. jettask-0.2.24.dist-info/RECORD +142 -0
  77. jettask/executor/executor.py +0 -338
  78. jettask/persistence/backlog_monitor.py +0 -567
  79. jettask/persistence/base.py +0 -2334
  80. jettask/persistence/db_manager.py +0 -516
  81. jettask/persistence/maintenance.py +0 -81
  82. jettask/persistence/message_consumer.py +0 -259
  83. jettask/persistence/models.py +0 -49
  84. jettask/persistence/offline_recovery.py +0 -196
  85. jettask/persistence/queue_discovery.py +0 -215
  86. jettask/persistence/task_persistence.py +0 -218
  87. jettask/persistence/task_updater.py +0 -583
  88. jettask/scheduler/add_execution_count.sql +0 -11
  89. jettask/scheduler/add_priority_field.sql +0 -26
  90. jettask/scheduler/add_scheduler_id.sql +0 -25
  91. jettask/scheduler/add_scheduler_id_index.sql +0 -10
  92. jettask/scheduler/make_scheduler_id_required.sql +0 -28
  93. jettask/scheduler/migrate_interval_seconds.sql +0 -9
  94. jettask/scheduler/performance_optimization.sql +0 -45
  95. jettask/scheduler/run_scheduler.py +0 -186
  96. jettask/scheduler/schema.sql +0 -84
  97. jettask/task/task_executor.py +0 -318
  98. jettask/webui/api/analytics.py +0 -323
  99. jettask/webui/config.py +0 -90
  100. jettask/webui/models/__init__.py +0 -3
  101. jettask/webui/models/namespace.py +0 -63
  102. jettask/webui/namespace_manager/__init__.py +0 -10
  103. jettask/webui/namespace_manager/multi.py +0 -593
  104. jettask/webui/namespace_manager/unified.py +0 -193
  105. jettask/webui/run.py +0 -46
  106. jettask-0.2.20.dist-info/RECORD +0 -145
  107. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/WHEEL +0 -0
  108. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/entry_points.txt +0 -0
  109. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/licenses/LICENSE +0 -0
  110. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,189 @@
1
+ """
2
+ 时间轴服务
3
+
4
+ 提供队列任务的时间分布分析功能
5
+ """
6
+ import logging
7
+ from typing import Dict, Any, List, Optional
8
+ from datetime import datetime, timedelta, timezone
9
+
10
+ from .redis_monitor_service import RedisMonitorService
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class TimelineService:
16
+ """时间轴服务类"""
17
+
18
+ def __init__(self, redis_service: RedisMonitorService):
19
+ """
20
+ 初始化时间轴服务
21
+
22
+ Args:
23
+ redis_service: Redis 监控基础服务实例
24
+ """
25
+ self.redis_service = redis_service
26
+
27
+ @property
28
+ def redis(self):
29
+ """获取 Redis 客户端"""
30
+ return self.redis_service.redis
31
+
32
+ @property
33
+ def redis_prefix(self) -> str:
34
+ """获取 Redis 前缀"""
35
+ return self.redis_service.redis_prefix
36
+
37
+ async def get_redis_timeline(
38
+ self,
39
+ queue_name: str,
40
+ interval: str = "1m",
41
+ duration: str = "1h",
42
+ start_time: Optional[str] = None,
43
+ end_time: Optional[str] = None,
44
+ context: str = "detail"
45
+ ) -> Dict[str, Any]:
46
+ """
47
+ 从 Redis Stream 获取队列任务的时间分布
48
+
49
+ Args:
50
+ queue_name: 队列名称
51
+ interval: 时间间隔 (如 '1m', '5m', '1h')
52
+ duration: 持续时间 (如 '1h', '24h')
53
+ start_time: 开始时间(可选)
54
+ end_time: 结束时间(可选)
55
+ context: 上下文 ('overview' 或 'detail')
56
+
57
+ Returns:
58
+ 时间轴数据
59
+ """
60
+ try:
61
+ # 解析时间间隔和持续时间
62
+ interval_seconds = self._parse_time_duration(interval)
63
+
64
+ # 根据上下文设置不同的数据限制
65
+ if context == "overview":
66
+ # 首页概览:固定获取最近1小时的所有数据
67
+ duration_seconds = 3600 # 1小时
68
+ now = int(datetime.now(timezone.utc).timestamp() * 1000)
69
+ start = now - duration_seconds * 1000
70
+ min_id = f"{start}-0"
71
+ max_id = "+"
72
+ max_count = 100000 # 首页概览获取所有数据
73
+ else:
74
+ # 队列详情页:根据参数获取,但限制最多10000条
75
+ if start_time and end_time:
76
+ # 使用提供的时间范围
77
+ min_id = start_time
78
+ max_id = end_time if end_time != '+' else '+'
79
+ else:
80
+ # 使用duration参数计算时间范围
81
+ duration_seconds = self._parse_time_duration(duration)
82
+ now = int(datetime.now(timezone.utc).timestamp() * 1000)
83
+ start = now - duration_seconds * 1000
84
+ min_id = f"{start}-0"
85
+ max_id = "+"
86
+ max_count = 10000 # 详情页限制10000条
87
+
88
+ # 获取指定时间范围内的消息
89
+ prefixed_queue_name = self.redis_service.get_prefixed_queue_name(queue_name)
90
+ messages = await self.redis.xrange(
91
+ prefixed_queue_name,
92
+ min=min_id,
93
+ max=max_id,
94
+ count=max_count
95
+ )
96
+
97
+ # 按时间间隔统计任务数量
98
+ buckets = {}
99
+ bucket_size = interval_seconds * 1000 # 转换为毫秒
100
+
101
+ # 计算实际的时间范围用于生成时间轴
102
+ if start_time and end_time:
103
+ # 从参数中解析时间范围
104
+ if start_time != '-':
105
+ actual_start = int(start_time.split('-')[0])
106
+ else:
107
+ actual_start = int(datetime.now(timezone.utc).timestamp() * 1000) - 86400000
108
+
109
+ if end_time != '+':
110
+ actual_end = int(end_time.split('-')[0])
111
+ else:
112
+ actual_end = int(datetime.now(timezone.utc).timestamp() * 1000)
113
+ else:
114
+ # 使用duration参数计算的时间范围
115
+ actual_start = start
116
+ actual_end = now
117
+
118
+ for msg_id, _ in messages:
119
+ # 从消息ID提取时间戳
120
+ timestamp = int(msg_id.split('-')[0])
121
+ bucket_key = (timestamp // bucket_size) * bucket_size
122
+ buckets[bucket_key] = buckets.get(bucket_key, 0) + 1
123
+
124
+ # 转换为时间序列数据
125
+ timeline_data = []
126
+ current_bucket = (actual_start // bucket_size) * bucket_size
127
+
128
+ while current_bucket <= actual_end:
129
+ timeline_data.append({
130
+ "timestamp": current_bucket,
131
+ "count": buckets.get(current_bucket, 0)
132
+ })
133
+ current_bucket += bucket_size
134
+
135
+ # 计算实际任务总数
136
+ total_tasks = len(messages)
137
+
138
+ # 检查是否达到数据限制
139
+ has_more = False
140
+ if context == "detail" and total_tasks >= max_count:
141
+ has_more = True
142
+
143
+ logger.info(f"Redis 时间轴: 队列={queue_name}, 任务数={total_tasks}, 数据点={len(timeline_data)}")
144
+
145
+ return {
146
+ "timeline": timeline_data,
147
+ "interval": interval,
148
+ "duration": duration,
149
+ "start": actual_start,
150
+ "end": actual_end,
151
+ "total_tasks": total_tasks,
152
+ "message_count": len(messages),
153
+ "has_more": has_more,
154
+ "limit": max_count if context == "detail" else None,
155
+ "source": "redis"
156
+ }
157
+
158
+ except Exception as e:
159
+ logger.error(f"获取 Redis 时间轴失败: 队列={queue_name}, 错误={e}", exc_info=True)
160
+ return {
161
+ "timeline": [],
162
+ "error": str(e),
163
+ "source": "redis"
164
+ }
165
+
166
+ def _parse_time_duration(self, duration_str: str) -> int:
167
+ """
168
+ 解析时间字符串为秒数
169
+
170
+ Args:
171
+ duration_str: 时间字符串 (如 '1h', '10m', '30s')
172
+
173
+ Returns:
174
+ 秒数
175
+ """
176
+ units = {
177
+ 's': 1,
178
+ 'm': 60,
179
+ 'h': 3600,
180
+ 'd': 86400
181
+ }
182
+
183
+ if duration_str[-1] in units:
184
+ value = int(duration_str[:-1])
185
+ unit = duration_str[-1]
186
+ return value * units[unit]
187
+
188
+ # 默认为秒
189
+ return int(duration_str)
@@ -0,0 +1,467 @@
1
+ """
2
+ Worker 监控服务
3
+
4
+ 提供 Worker 相关的监控功能
5
+ """
6
+ import logging
7
+ from typing import Dict, Any, List, Optional
8
+ from datetime import datetime, timezone
9
+ import time
10
+
11
+ from .redis_monitor_service import RedisMonitorService
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class WorkerMonitorService:
17
+ """Worker 监控服务类"""
18
+
19
+ def __init__(self, redis_service: RedisMonitorService):
20
+ """
21
+ 初始化 Worker 监控服务
22
+
23
+ Args:
24
+ redis_service: Redis 监控基础服务实例
25
+ """
26
+ self.redis_service = redis_service
27
+
28
+ @property
29
+ def redis(self):
30
+ """获取 Redis 客户端"""
31
+ return self.redis_service.redis
32
+
33
+ @property
34
+ def redis_prefix(self) -> str:
35
+ """获取 Redis 前缀"""
36
+ return self.redis_service.redis_prefix
37
+
38
+ async def get_worker_heartbeats(self, queue_name: str) -> List[Dict[str, Any]]:
39
+ """
40
+ 获取指定队列的 Worker 心跳信息
41
+
42
+ Args:
43
+ queue_name: 队列名称
44
+
45
+ Returns:
46
+ Worker 心跳信息列表
47
+ """
48
+ worker_list = []
49
+ current_time = datetime.now(timezone.utc).timestamp()
50
+
51
+ try:
52
+ # 使用 WorkerStateManager 获取所有 worker
53
+ from jettask.worker.lifecycle import WorkerStateManager
54
+
55
+ worker_manager = WorkerStateManager(
56
+ redis_client=self.redis,
57
+ redis_prefix=self.redis_prefix
58
+ )
59
+
60
+ # 获取所有 worker ID
61
+ worker_ids = await worker_manager.get_all_workers()
62
+ if not worker_ids:
63
+ logger.debug(f"No workers found for queue {queue_name}")
64
+ return []
65
+
66
+ worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
67
+
68
+ # 批量获取所有 worker 数据
69
+ pipe = self.redis.pipeline()
70
+ for key in worker_keys:
71
+ pipe.hgetall(key)
72
+ all_workers_data = await pipe.execute()
73
+
74
+ # 处理每个 worker
75
+ for i, worker_data in enumerate(all_workers_data):
76
+ if not worker_data:
77
+ continue
78
+
79
+ # 检查 worker 是否属于指定队列
80
+ worker_queues = worker_data.get('queues', '')
81
+ if queue_name not in worker_queues.split(','):
82
+ continue
83
+
84
+ worker_id = worker_keys[i].split(':')[-1]
85
+ last_heartbeat = float(worker_data.get('last_heartbeat', 0))
86
+ is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
87
+ consumer_id = worker_data.get('consumer_id', worker_id)
88
+
89
+ # 构建显示数据
90
+ display_data = {
91
+ 'consumer_id': consumer_id,
92
+ 'consumer_name': f"{consumer_id}-{queue_name}",
93
+ 'host': worker_data.get('host', 'unknown'),
94
+ 'pid': int(worker_data.get('pid', 0)),
95
+ 'queue': queue_name,
96
+ 'last_heartbeat': last_heartbeat,
97
+ 'last_heartbeat_time': datetime.fromtimestamp(last_heartbeat).isoformat(),
98
+ 'seconds_ago': int(current_time - last_heartbeat),
99
+ 'is_alive': is_alive,
100
+ # 队列特定的统计信息
101
+ 'success_count': int(worker_data.get(f'{queue_name}:success_count', 0)),
102
+ 'failed_count': int(worker_data.get(f'{queue_name}:failed_count', 0)),
103
+ 'total_count': int(worker_data.get(f'{queue_name}:total_count', 0)),
104
+ 'running_tasks': int(worker_data.get(f'{queue_name}:running_tasks', 0)),
105
+ 'avg_processing_time': float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0)),
106
+ 'avg_latency_time': float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
107
+ }
108
+
109
+ # 如果离线时间存在,添加离线时间信息
110
+ if 'offline_time' in worker_data:
111
+ display_data['offline_time'] = float(worker_data['offline_time'])
112
+ display_data['offline_time_formatted'] = datetime.fromtimestamp(
113
+ float(worker_data['offline_time'])
114
+ ).isoformat()
115
+
116
+ worker_list.append(display_data)
117
+
118
+ logger.info(f"Retrieved {len(worker_list)} workers for queue {queue_name}")
119
+ return worker_list
120
+
121
+ except Exception as e:
122
+ logger.error(f"Error getting worker heartbeats for queue {queue_name}: {e}", exc_info=True)
123
+ return []
124
+
125
+ async def get_queue_worker_summary(self, queue_name: str) -> Dict[str, Any]:
126
+ """
127
+ 获取队列的 Worker 汇总统计信息(包含历史数据)
128
+
129
+ Args:
130
+ queue_name: 队列名称
131
+
132
+ Returns:
133
+ Worker 汇总统计字典
134
+ """
135
+ try:
136
+ # 使用 WorkerStateManager
137
+ from jettask.worker.lifecycle import WorkerStateManager
138
+
139
+ worker_manager = WorkerStateManager(
140
+ redis_client=self.redis,
141
+ redis_prefix=self.redis_prefix
142
+ )
143
+
144
+ # 获取所有 worker ID
145
+ worker_ids = await worker_manager.get_all_workers()
146
+ worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
147
+
148
+ if not worker_keys:
149
+ return self._empty_summary()
150
+
151
+ # 批量获取 worker 数据
152
+ pipe = self.redis.pipeline()
153
+ for key in worker_keys:
154
+ pipe.hgetall(key)
155
+ all_workers_data = await pipe.execute()
156
+
157
+ # 过滤属于该队列的 worker
158
+ queue_workers_data = []
159
+ for worker_data in all_workers_data:
160
+ if worker_data and queue_name in worker_data.get('queues', '').split(','):
161
+ queue_workers_data.append(worker_data)
162
+
163
+ # 汇总统计
164
+ stats = self._calculate_worker_stats(queue_workers_data, queue_name, include_history=True)
165
+ stats['history_included'] = True
166
+
167
+ logger.debug(f"Worker summary for queue {queue_name}: {stats}")
168
+ return stats
169
+
170
+ except Exception as e:
171
+ logger.error(f"Error getting queue worker summary for {queue_name}: {e}", exc_info=True)
172
+ return self._empty_summary()
173
+
174
+ async def get_queue_worker_summary_fast(self, queue_name: str) -> Dict[str, Any]:
175
+ """
176
+ 获取队列的 Worker 汇总统计信息(快速版,仅在线 Worker)
177
+
178
+ Args:
179
+ queue_name: 队列名称
180
+
181
+ Returns:
182
+ Worker 汇总统计字典
183
+ """
184
+ try:
185
+ # 使用 WorkerStateManager
186
+ from jettask.worker.lifecycle import WorkerStateManager
187
+
188
+ worker_manager = WorkerStateManager(
189
+ redis_client=self.redis,
190
+ redis_prefix=self.redis_prefix
191
+ )
192
+
193
+ # 获取所有 worker ID
194
+ worker_ids = await worker_manager.get_all_workers()
195
+ worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
196
+
197
+ if not worker_keys:
198
+ return self._empty_summary()
199
+
200
+ # 批量获取 worker 数据
201
+ pipe = self.redis.pipeline()
202
+ for worker_key in worker_keys:
203
+ pipe.hgetall(worker_key)
204
+
205
+ all_workers_data = await pipe.execute()
206
+
207
+ # 过滤属于该队列的 worker
208
+ worker_data_list = []
209
+ for worker_data in all_workers_data:
210
+ if worker_data and queue_name in worker_data.get('queues', '').split(','):
211
+ worker_data_list.append(worker_data)
212
+
213
+ # 汇总统计(仅在线 Worker)
214
+ stats = self._calculate_worker_stats(worker_data_list, queue_name, include_history=False)
215
+
216
+ logger.debug(f"Fast worker summary for queue {queue_name}: {stats}")
217
+ return stats
218
+
219
+ except Exception as e:
220
+ logger.error(f"Error getting fast queue worker summary for {queue_name}: {e}", exc_info=True)
221
+ return self._empty_summary()
222
+
223
+ async def get_worker_offline_history(
224
+ self,
225
+ limit: int = 100,
226
+ start_time: Optional[float] = None,
227
+ end_time: Optional[float] = None
228
+ ) -> List[Dict[str, Any]]:
229
+ """
230
+ 获取 Worker 下线历史记录
231
+
232
+ Args:
233
+ limit: 返回记录数量限制
234
+ start_time: 开始时间戳(可选)
235
+ end_time: 结束时间戳(可选)
236
+
237
+ Returns:
238
+ 离线历史记录列表
239
+ """
240
+ try:
241
+ # 使用 WorkerStateManager
242
+ from jettask.worker.lifecycle import WorkerStateManager
243
+
244
+ worker_manager = WorkerStateManager(
245
+ redis_client=self.redis,
246
+ redis_prefix=self.redis_prefix
247
+ )
248
+
249
+ # 获取所有 worker ID
250
+ worker_ids = await worker_manager.get_all_workers()
251
+ worker_keys = [f"{self.redis_prefix}:WORKER:{wid}" for wid in worker_ids]
252
+
253
+ if not worker_keys:
254
+ return []
255
+
256
+ # 批量获取所有 worker 数据
257
+ pipe = self.redis.pipeline()
258
+ for key in worker_keys:
259
+ pipe.hgetall(key)
260
+ all_workers_data = await pipe.execute()
261
+
262
+ # 收集离线的 worker
263
+ offline_workers = []
264
+
265
+ for worker_data in all_workers_data:
266
+ if not worker_data:
267
+ continue
268
+
269
+ # 检查是否离线
270
+ is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
271
+ if not is_alive and 'offline_time' in worker_data:
272
+ offline_time = float(worker_data.get('offline_time', 0))
273
+
274
+ # 时间范围过滤
275
+ if start_time and offline_time < start_time:
276
+ continue
277
+ if end_time and offline_time > end_time:
278
+ continue
279
+
280
+ # 构建离线记录
281
+ record = self._build_offline_record(worker_data, offline_time)
282
+ offline_workers.append((offline_time, record))
283
+
284
+ # 按离线时间倒序排序
285
+ offline_workers.sort(key=lambda x: x[0], reverse=True)
286
+
287
+ # 返回指定数量的记录
288
+ result = [record for _, record in offline_workers[:limit]]
289
+ logger.info(f"Retrieved {len(result)} offline worker records")
290
+ return result
291
+
292
+ except Exception as e:
293
+ logger.error(f"Error getting worker offline history: {e}", exc_info=True)
294
+ return []
295
+
296
+ def _empty_summary(self) -> Dict[str, Any]:
297
+ """返回空的汇总统计"""
298
+ return {
299
+ 'total_workers': 0,
300
+ 'online_workers': 0,
301
+ 'offline_workers': 0,
302
+ 'total_success_count': 0,
303
+ 'total_failed_count': 0,
304
+ 'total_count': 0,
305
+ 'total_running_tasks': 0,
306
+ 'avg_processing_time': 0.0,
307
+ 'avg_latency_time': 0.0
308
+ }
309
+
310
+ def _calculate_worker_stats(
311
+ self,
312
+ workers_data: List[Dict[str, Any]],
313
+ queue_name: str,
314
+ include_history: bool
315
+ ) -> Dict[str, Any]:
316
+ """
317
+ 计算 Worker 统计信息
318
+
319
+ Args:
320
+ workers_data: Worker 数据列表
321
+ queue_name: 队列名称
322
+ include_history: 是否包含离线 Worker 的历史数据
323
+
324
+ Returns:
325
+ 统计信息字典
326
+ """
327
+ total_workers = len(workers_data)
328
+ online_workers = 0
329
+ offline_workers = 0
330
+ total_success_count = 0
331
+ total_failed_count = 0
332
+ total_count = 0
333
+ total_running_tasks = 0
334
+ total_processing_time = 0.0
335
+ processing_time_count = 0
336
+ total_latency_time = 0.0
337
+ latency_time_count = 0
338
+
339
+ current_time = datetime.now(timezone.utc).timestamp()
340
+
341
+ for worker_data in workers_data:
342
+ try:
343
+ last_heartbeat = float(worker_data.get('last_heartbeat', 0))
344
+ is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
345
+
346
+ is_online = is_alive and (current_time - last_heartbeat) < 30
347
+
348
+ if is_online:
349
+ online_workers += 1
350
+ else:
351
+ offline_workers += 1
352
+ # 如果不包含历史,跳过离线 Worker 的统计
353
+ if not include_history:
354
+ continue
355
+
356
+ # 统计数据
357
+ success_count = int(worker_data.get(f'{queue_name}:success_count', 0))
358
+ failed_count = int(worker_data.get(f'{queue_name}:failed_count', 0))
359
+ running_tasks = int(worker_data.get(f'{queue_name}:running_tasks', 0))
360
+ avg_processing_time = float(worker_data.get(f'{queue_name}:avg_processing_time', 0.0))
361
+ avg_latency_time = float(worker_data.get(f'{queue_name}:avg_latency_time', 0.0))
362
+
363
+ total_success_count += success_count
364
+ total_failed_count += failed_count
365
+ total_count += success_count + failed_count
366
+ total_running_tasks += running_tasks
367
+
368
+ if avg_processing_time > 0:
369
+ total_processing_time += avg_processing_time
370
+ processing_time_count += 1
371
+
372
+ if avg_latency_time > 0:
373
+ total_latency_time += avg_latency_time
374
+ latency_time_count += 1
375
+
376
+ except Exception as e:
377
+ logger.warning(f"Error processing worker stats: {e}")
378
+ continue
379
+
380
+ # 计算平均值
381
+ overall_avg_processing_time = 0.0
382
+ if processing_time_count > 0:
383
+ overall_avg_processing_time = total_processing_time / processing_time_count
384
+
385
+ overall_avg_latency_time = 0.0
386
+ if latency_time_count > 0:
387
+ overall_avg_latency_time = total_latency_time / latency_time_count
388
+
389
+ return {
390
+ 'total_workers': total_workers,
391
+ 'online_workers': online_workers,
392
+ 'offline_workers': offline_workers,
393
+ 'total_success_count': total_success_count,
394
+ 'total_failed_count': total_failed_count,
395
+ 'total_count': total_count,
396
+ 'total_running_tasks': total_running_tasks,
397
+ 'avg_processing_time': round(overall_avg_processing_time, 3),
398
+ 'avg_latency_time': round(overall_avg_latency_time, 3)
399
+ }
400
+
401
+ def _build_offline_record(self, worker_data: Dict[str, Any], offline_time: float) -> Dict[str, Any]:
402
+ """
403
+ 构建离线记录
404
+
405
+ Args:
406
+ worker_data: Worker 数据
407
+ offline_time: 离线时间戳
408
+
409
+ Returns:
410
+ 离线记录字典
411
+ """
412
+ # 计算运行时长
413
+ online_time = float(worker_data.get('created_at', offline_time))
414
+ duration_seconds = int(offline_time - online_time)
415
+
416
+ # 基础记录
417
+ record = {
418
+ 'consumer_id': worker_data.get('consumer_id', ''),
419
+ 'host': worker_data.get('host', 'unknown'),
420
+ 'pid': int(worker_data.get('pid', 0)),
421
+ 'queues': worker_data.get('queues', ''),
422
+ 'online_time': online_time,
423
+ 'offline_time': offline_time,
424
+ 'duration_seconds': duration_seconds,
425
+ 'last_heartbeat': float(worker_data.get('last_heartbeat', 0)),
426
+ 'shutdown_reason': worker_data.get('shutdown_reason', 'unknown'),
427
+ 'online_time_str': datetime.fromtimestamp(online_time).isoformat(),
428
+ 'offline_time_str': datetime.fromtimestamp(offline_time).isoformat(),
429
+ }
430
+
431
+ # 格式化运行时长
432
+ hours = duration_seconds // 3600
433
+ minutes = (duration_seconds % 3600) // 60
434
+ seconds = duration_seconds % 60
435
+ record['duration_str'] = f"{hours}h {minutes}m {seconds}s"
436
+
437
+ # 添加统计信息(聚合所有队列的数据)
438
+ queues = worker_data.get('queues', '').split(',') if worker_data.get('queues') else []
439
+ total_success = 0
440
+ total_failed = 0
441
+ total_count = 0
442
+ total_processing_time = 0.0
443
+
444
+ for queue in queues:
445
+ if queue.strip():
446
+ queue = queue.strip()
447
+ total_success += int(worker_data.get(f'{queue}:success_count', 0))
448
+ total_failed += int(worker_data.get(f'{queue}:failed_count', 0))
449
+ count = int(worker_data.get(f'{queue}:total_count', 0))
450
+ total_count += count
451
+
452
+ avg_time = float(worker_data.get(f'{queue}:avg_processing_time', 0))
453
+ if avg_time > 0 and count > 0:
454
+ total_processing_time += avg_time * count
455
+
456
+ record['total_success_count'] = total_success
457
+ record['total_failed_count'] = total_failed
458
+ record['total_count'] = total_count
459
+ record['total_running_tasks'] = 0 # 离线 worker 没有运行中的任务
460
+
461
+ # 计算平均处理时间
462
+ if total_count > 0:
463
+ record['avg_processing_time'] = total_processing_time / total_count
464
+ else:
465
+ record['avg_processing_time'] = 0.0
466
+
467
+ return record
@@ -0,0 +1,11 @@
1
+ """
2
+ 工具函数模块
3
+ """
4
+
5
+ from .time_utils import parse_iso_datetime, format_task_timestamps, task_obj_to_dict
6
+
7
+ __all__ = [
8
+ 'parse_iso_datetime',
9
+ 'format_task_timestamps',
10
+ 'task_obj_to_dict',
11
+ ]