jettask 0.2.19__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +12 -3
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/METADATA +2 -71
- jettask-0.2.23.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.19.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/WHEEL +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,583 @@
|
|
1
|
+
"""
|
2
|
+
Worker 管理器
|
3
|
+
|
4
|
+
整合了 Worker 相关的所有核心功能:
|
5
|
+
- Worker 状态管理(注册、查询、统计)
|
6
|
+
- Worker ID 生成和命名
|
7
|
+
- 消费者名称管理
|
8
|
+
"""
|
9
|
+
|
10
|
+
import os
|
11
|
+
import uuid
|
12
|
+
import logging
|
13
|
+
import time
|
14
|
+
from typing import Dict, Any, Optional, List, Set
|
15
|
+
|
16
|
+
logger = logging.getLogger('app')
|
17
|
+
|
18
|
+
|
19
|
+
class WorkerState:
|
20
|
+
"""
|
21
|
+
Worker 状态管理器
|
22
|
+
|
23
|
+
负责 Worker 在 Redis 中的状态数据管理:
|
24
|
+
- Worker 注册和注销
|
25
|
+
- Worker 列表查询
|
26
|
+
- Worker 按任务查询
|
27
|
+
- Worker 数量统计
|
28
|
+
|
29
|
+
注:之前叫 WorkerRegistry,现在改名为 WorkerState 更准确反映其职责
|
30
|
+
"""
|
31
|
+
|
32
|
+
def __init__(self, redis_client, async_redis_client, redis_prefix: str = 'jettask'):
|
33
|
+
"""
|
34
|
+
初始化 Worker 状态管理器
|
35
|
+
|
36
|
+
Args:
|
37
|
+
redis_client: 同步 Redis 客户端
|
38
|
+
async_redis_client: 异步 Redis 客户端
|
39
|
+
redis_prefix: Redis 键前缀
|
40
|
+
"""
|
41
|
+
self.redis = redis_client
|
42
|
+
self.async_redis = async_redis_client
|
43
|
+
self.redis_prefix = redis_prefix
|
44
|
+
self.workers_registry_key = f"{redis_prefix}:REGISTRY:WORKERS"
|
45
|
+
|
46
|
+
# ========== Worker 注册管理 ==========
|
47
|
+
|
48
|
+
async def register_worker(self, worker_id: str):
|
49
|
+
"""注册 Worker 到全局注册表"""
|
50
|
+
await self.async_redis.sadd(self.workers_registry_key, worker_id)
|
51
|
+
logger.debug(f"Registered worker: {worker_id}")
|
52
|
+
|
53
|
+
async def unregister_worker(self, worker_id: str):
|
54
|
+
"""从全局注册表注销 Worker"""
|
55
|
+
await self.async_redis.srem(self.workers_registry_key, worker_id)
|
56
|
+
logger.debug(f"Unregistered worker: {worker_id}")
|
57
|
+
|
58
|
+
async def get_all_workers(self) -> Set[str]:
|
59
|
+
"""获取所有已注册的 Worker ID"""
|
60
|
+
return await self.async_redis.smembers(self.workers_registry_key)
|
61
|
+
|
62
|
+
def get_all_workers_sync(self) -> Set[str]:
|
63
|
+
"""同步方式获取所有已注册的 Worker ID"""
|
64
|
+
return self.redis.smembers(self.workers_registry_key)
|
65
|
+
|
66
|
+
async def get_worker_count(self) -> int:
|
67
|
+
"""获取已注册的 Worker 总数"""
|
68
|
+
return await self.async_redis.scard(self.workers_registry_key)
|
69
|
+
|
70
|
+
async def get_offline_workers(self) -> Set[str]:
|
71
|
+
"""获取所有离线的 Worker ID
|
72
|
+
|
73
|
+
离线 Worker 是指已注册但 is_alive=false 的 Worker
|
74
|
+
"""
|
75
|
+
all_workers = await self.get_all_workers()
|
76
|
+
offline_workers = set()
|
77
|
+
|
78
|
+
for worker_id in all_workers:
|
79
|
+
if isinstance(worker_id, bytes):
|
80
|
+
worker_id = worker_id.decode('utf-8')
|
81
|
+
|
82
|
+
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
83
|
+
is_alive = await self.async_redis.hget(worker_key, 'is_alive')
|
84
|
+
|
85
|
+
if is_alive:
|
86
|
+
is_alive = is_alive.decode('utf-8') if isinstance(is_alive, bytes) else is_alive
|
87
|
+
if is_alive != 'true':
|
88
|
+
offline_workers.add(worker_id)
|
89
|
+
else:
|
90
|
+
# Worker key 不存在或没有 is_alive 字段,认为离线
|
91
|
+
offline_workers.add(worker_id)
|
92
|
+
|
93
|
+
return offline_workers
|
94
|
+
|
95
|
+
async def get_workers_for_task(self, task_name: str, only_alive: bool = True) -> Set[str]:
|
96
|
+
"""获取执行特定任务的 Worker 列表
|
97
|
+
|
98
|
+
通过检查 WORKER:* hash 中的 group_info 字段来判断哪些 Worker 在处理该任务
|
99
|
+
|
100
|
+
Args:
|
101
|
+
task_name: 任务名称
|
102
|
+
only_alive: 是否只返回在线的 Worker(默认 True)
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
处理该任务的 Worker ID 集合
|
106
|
+
"""
|
107
|
+
all_worker_ids = await self.get_all_workers()
|
108
|
+
matched_workers = set()
|
109
|
+
group_info_prefix = f"group_info:{self.redis_prefix}:QUEUE:"
|
110
|
+
|
111
|
+
for worker_id in all_worker_ids:
|
112
|
+
if isinstance(worker_id, bytes):
|
113
|
+
worker_id = worker_id.decode('utf-8')
|
114
|
+
|
115
|
+
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
116
|
+
worker_info = await self.async_redis.hgetall(worker_key)
|
117
|
+
|
118
|
+
if not worker_info:
|
119
|
+
continue
|
120
|
+
|
121
|
+
# 解码 bytes keys
|
122
|
+
decoded_info = {}
|
123
|
+
for k, v in worker_info.items():
|
124
|
+
key = k.decode('utf-8') if isinstance(k, bytes) else k
|
125
|
+
val = v.decode('utf-8') if isinstance(v, bytes) else v
|
126
|
+
decoded_info[key] = val
|
127
|
+
|
128
|
+
# 检查 is_alive 状态
|
129
|
+
if only_alive:
|
130
|
+
is_alive = decoded_info.get('is_alive', 'false')
|
131
|
+
if is_alive != 'true':
|
132
|
+
continue
|
133
|
+
|
134
|
+
# 检查是否包含该任务的 group_info
|
135
|
+
# 格式: group_info:test5:QUEUE:robust_bench2:benchmark_task
|
136
|
+
for key in decoded_info.keys():
|
137
|
+
if key.startswith(group_info_prefix):
|
138
|
+
parts = key.split(':')
|
139
|
+
if len(parts) >= 5:
|
140
|
+
worker_task_name = parts[-1] # 最后一部分是 task_name
|
141
|
+
if worker_task_name == task_name:
|
142
|
+
matched_workers.add(worker_id)
|
143
|
+
break
|
144
|
+
|
145
|
+
return matched_workers
|
146
|
+
|
147
|
+
async def get_active_worker_count_for_task(self, task_name: str) -> int:
|
148
|
+
"""获取执行特定任务的在线 Worker 数量
|
149
|
+
|
150
|
+
Args:
|
151
|
+
task_name: 任务名称
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
在线 Worker 数量
|
155
|
+
"""
|
156
|
+
workers = await self.get_workers_for_task(task_name, only_alive=True)
|
157
|
+
return len(workers)
|
158
|
+
|
159
|
+
async def find_offline_workers_for_queue(
|
160
|
+
self,
|
161
|
+
queue: str,
|
162
|
+
worker_prefix: str = 'WORKER',
|
163
|
+
worker_state_manager=None
|
164
|
+
) -> List[tuple]:
|
165
|
+
"""查找指定队列的离线 Worker
|
166
|
+
|
167
|
+
查找条件:
|
168
|
+
1. Worker 已离线(is_alive=false)
|
169
|
+
2. 消息未转移(messages_transferred=false)
|
170
|
+
3. Worker 负责该队列
|
171
|
+
|
172
|
+
Args:
|
173
|
+
queue: 队列名称(支持优先级队列格式:queue:priority)
|
174
|
+
worker_prefix: Worker 键前缀(默认 'WORKER')
|
175
|
+
worker_state_manager: WorkerStateManager 实例(可选,用于读取 worker 信息)
|
176
|
+
|
177
|
+
Returns:
|
178
|
+
离线 Worker 列表,每项为 (worker_key, worker_data) 元组
|
179
|
+
"""
|
180
|
+
offline_workers = []
|
181
|
+
|
182
|
+
try:
|
183
|
+
# 获取所有 worker ID
|
184
|
+
worker_ids = await self.get_all_workers()
|
185
|
+
logger.debug(f"[Recovery] Found {len(worker_ids)} workers in registry for queue {queue}")
|
186
|
+
|
187
|
+
for worker_id in worker_ids:
|
188
|
+
# 解码 worker_id(可能是 bytes)
|
189
|
+
if isinstance(worker_id, bytes):
|
190
|
+
worker_id = worker_id.decode('utf-8')
|
191
|
+
|
192
|
+
# 构建 worker key
|
193
|
+
worker_key = f"{self.redis_prefix}:{worker_prefix}:{worker_id}"
|
194
|
+
|
195
|
+
try:
|
196
|
+
# 读取 worker 信息
|
197
|
+
if worker_state_manager:
|
198
|
+
decoded_worker_data = await worker_state_manager.get_worker_info(worker_id)
|
199
|
+
else:
|
200
|
+
worker_data = await self.async_redis.hgetall(worker_key)
|
201
|
+
if not worker_data:
|
202
|
+
continue
|
203
|
+
|
204
|
+
# 解码二进制数据
|
205
|
+
decoded_worker_data = {}
|
206
|
+
for k, v in worker_data.items():
|
207
|
+
key = k.decode('utf-8') if isinstance(k, bytes) else k
|
208
|
+
value = v.decode('utf-8') if isinstance(v, bytes) else v
|
209
|
+
decoded_worker_data[key] = value
|
210
|
+
|
211
|
+
if not decoded_worker_data:
|
212
|
+
continue
|
213
|
+
|
214
|
+
# 检查 worker 是否离线且消息未转移
|
215
|
+
is_alive = decoded_worker_data.get('is_alive', 'false') == 'true'
|
216
|
+
messages_transferred = decoded_worker_data.get('messages_transferred', 'false') == 'true'
|
217
|
+
|
218
|
+
# 找到离线且消息未转移的 worker
|
219
|
+
if not is_alive and not messages_transferred:
|
220
|
+
queues_str = decoded_worker_data.get('queues', '')
|
221
|
+
worker_queues = queues_str.split(',') if queues_str else []
|
222
|
+
|
223
|
+
# 检查这个 worker 是否负责当前队列
|
224
|
+
# 支持优先级队列:如果 queue 是 "base:priority" 格式,检查 worker 是否负责 base 队列
|
225
|
+
queue_matched = False
|
226
|
+
if ':' in queue and queue.rsplit(':', 1)[-1].isdigit():
|
227
|
+
# 这是优先级队列,提取基础队列名
|
228
|
+
base_queue = queue.rsplit(':', 1)[0]
|
229
|
+
queue_matched = base_queue in worker_queues
|
230
|
+
else:
|
231
|
+
# 普通队列
|
232
|
+
queue_matched = queue in worker_queues
|
233
|
+
|
234
|
+
if queue_matched:
|
235
|
+
logger.info(
|
236
|
+
f"Found offline worker needing recovery: {worker_id}, "
|
237
|
+
f"queues={worker_queues}, is_alive={is_alive}, "
|
238
|
+
f"messages_transferred={messages_transferred}"
|
239
|
+
)
|
240
|
+
offline_workers.append((worker_key, decoded_worker_data))
|
241
|
+
else:
|
242
|
+
logger.debug(
|
243
|
+
f"Worker {worker_id} is offline but not responsible for queue {queue} "
|
244
|
+
f"(worker_queues={worker_queues})"
|
245
|
+
)
|
246
|
+
|
247
|
+
except Exception as e:
|
248
|
+
logger.error(f"Error processing worker key {worker_key}: {e}")
|
249
|
+
continue
|
250
|
+
|
251
|
+
except Exception as e:
|
252
|
+
logger.error(f"Error finding offline workers: {e}")
|
253
|
+
|
254
|
+
return offline_workers
|
255
|
+
|
256
|
+
|
257
|
+
class WorkerNaming:
|
258
|
+
"""
|
259
|
+
Worker ID 生成和复用
|
260
|
+
|
261
|
+
职责:
|
262
|
+
- 生成唯一的 Worker ID
|
263
|
+
- 查找可复用的离线 Worker ID
|
264
|
+
"""
|
265
|
+
|
266
|
+
def generate_worker_id(self, prefix: str) -> str:
|
267
|
+
"""
|
268
|
+
生成新的 Worker ID
|
269
|
+
|
270
|
+
格式: {prefix}-{uuid}-{pid}
|
271
|
+
例如: YYDG-a1b2c3d4-12345
|
272
|
+
|
273
|
+
Args:
|
274
|
+
prefix: Worker ID 前缀(通常是主机名)
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
生成的 Worker ID
|
278
|
+
"""
|
279
|
+
return f"{prefix}-{uuid.uuid4().hex[:8]}-{os.getpid()}"
|
280
|
+
|
281
|
+
async def find_reusable_worker_id(
|
282
|
+
self,
|
283
|
+
prefix: str,
|
284
|
+
worker_state: 'WorkerState'
|
285
|
+
) -> Optional[str]:
|
286
|
+
"""
|
287
|
+
查找可复用的离线 Worker ID
|
288
|
+
|
289
|
+
Args:
|
290
|
+
prefix: Worker ID 前缀
|
291
|
+
worker_state: Worker 状态管理器实例
|
292
|
+
|
293
|
+
Returns:
|
294
|
+
可复用的 Worker ID,如果没有则返回 None
|
295
|
+
"""
|
296
|
+
try:
|
297
|
+
offline_workers = await worker_state.get_offline_workers()
|
298
|
+
|
299
|
+
for worker_id in offline_workers:
|
300
|
+
if isinstance(worker_id, bytes):
|
301
|
+
worker_id = worker_id.decode('utf-8')
|
302
|
+
if worker_id.startswith(prefix):
|
303
|
+
logger.debug(f"Found reusable worker ID: {worker_id}")
|
304
|
+
return worker_id
|
305
|
+
except Exception as e:
|
306
|
+
logger.warning(f"Error finding reusable worker ID: {e}")
|
307
|
+
|
308
|
+
return None
|
309
|
+
|
310
|
+
|
311
|
+
class ConsumerManager:
|
312
|
+
"""消费者名称管理器
|
313
|
+
|
314
|
+
使用 HEARTBEAT 心跳策略管理消费者名称
|
315
|
+
"""
|
316
|
+
|
317
|
+
def __init__(
|
318
|
+
self,
|
319
|
+
redis_client,
|
320
|
+
config: Dict[str, Any] = None,
|
321
|
+
app=None
|
322
|
+
):
|
323
|
+
self.redis_client = redis_client
|
324
|
+
self.config = config or {}
|
325
|
+
self._consumer_name = None
|
326
|
+
self.app = app
|
327
|
+
|
328
|
+
# Redis prefix configuration
|
329
|
+
self.redis_prefix = config.get('redis_prefix', 'jettask')
|
330
|
+
|
331
|
+
# 心跳策略实例 - 由外部传入 lifecycle 实例
|
332
|
+
self._heartbeat_strategy = None
|
333
|
+
|
334
|
+
def set_heartbeat_strategy(self, heartbeat_strategy):
|
335
|
+
"""设置心跳策略实例(由外部注入)"""
|
336
|
+
self._heartbeat_strategy = heartbeat_strategy
|
337
|
+
|
338
|
+
def get_prefixed_queue_name(self, queue: str) -> str:
|
339
|
+
"""为队列名称添加前缀"""
|
340
|
+
return f"{self.redis_prefix}:QUEUE:{queue}"
|
341
|
+
|
342
|
+
def get_consumer_name(self, queue: str) -> str:
|
343
|
+
"""获取消费者名称(使用 HEARTBEAT 策略)"""
|
344
|
+
return self._get_heartbeat_name(queue)
|
345
|
+
|
346
|
+
def _get_heartbeat_name(self, queue: str) -> str:
|
347
|
+
"""基于心跳策略获取消费者名称"""
|
348
|
+
if not self._heartbeat_strategy:
|
349
|
+
raise RuntimeError("Heartbeat strategy not initialized properly")
|
350
|
+
|
351
|
+
return self._heartbeat_strategy.get_consumer_name(queue)
|
352
|
+
|
353
|
+
async def record_group_info_async(self, queue: str, task_name: str, group_name: str, consumer_name: str):
|
354
|
+
"""异步记录task的group信息到worker hash表
|
355
|
+
|
356
|
+
Args:
|
357
|
+
queue: 队列名
|
358
|
+
task_name: 任务名
|
359
|
+
group_name: consumer group名称
|
360
|
+
consumer_name: consumer名称
|
361
|
+
"""
|
362
|
+
if not self._heartbeat_strategy:
|
363
|
+
return
|
364
|
+
|
365
|
+
try:
|
366
|
+
# 确保 consumer_id 已初始化
|
367
|
+
self._heartbeat_strategy._ensure_consumer_id()
|
368
|
+
worker_id = self._heartbeat_strategy.consumer_id
|
369
|
+
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
370
|
+
|
371
|
+
# 构建group信息
|
372
|
+
import json
|
373
|
+
group_info = {
|
374
|
+
'queue': queue,
|
375
|
+
'task_name': task_name,
|
376
|
+
'group_name': group_name,
|
377
|
+
'consumer_name': consumer_name,
|
378
|
+
'stream_key': f"{self.redis_prefix}:QUEUE:{queue}"
|
379
|
+
}
|
380
|
+
|
381
|
+
# 将group信息存储到worker的hash中
|
382
|
+
# 使用 group_info:{group_name} 作为field
|
383
|
+
field_name = f"group_info:{group_name}"
|
384
|
+
|
385
|
+
# 使用app的async_redis客户端
|
386
|
+
if self.app and hasattr(self.app, 'async_redis'):
|
387
|
+
await self.app.async_redis.hset(
|
388
|
+
worker_key,
|
389
|
+
field_name,
|
390
|
+
json.dumps(group_info)
|
391
|
+
)
|
392
|
+
logger.debug(f"Recorded group info for task {task_name}: {group_info}")
|
393
|
+
else:
|
394
|
+
logger.warning("Cannot record group info: async_redis not available")
|
395
|
+
|
396
|
+
except Exception as e:
|
397
|
+
logger.error(f"Error recording task group info: {e}")
|
398
|
+
|
399
|
+
async def get_workers_for_task(self, task_name: str, only_alive: bool = True) -> Set[str]:
|
400
|
+
"""获取执行特定任务的 Worker 列表
|
401
|
+
|
402
|
+
委托给 WorkerState 来实现
|
403
|
+
|
404
|
+
Args:
|
405
|
+
task_name: 任务名称
|
406
|
+
only_alive: 是否只返回在线的 Worker(默认 True)
|
407
|
+
|
408
|
+
Returns:
|
409
|
+
处理该任务的 Worker ID 集合
|
410
|
+
"""
|
411
|
+
# 通过 app 获取 WorkerState 实例
|
412
|
+
if self.app and hasattr(self.app, 'worker_state'):
|
413
|
+
worker_state = self.app.worker_state
|
414
|
+
return await worker_state.get_workers_for_task(task_name, only_alive)
|
415
|
+
|
416
|
+
# 降级方案:返回空集合
|
417
|
+
logger.debug(f"Cannot get workers for task {task_name}: WorkerState not available")
|
418
|
+
return set()
|
419
|
+
|
420
|
+
def cleanup(self):
|
421
|
+
"""清理资源(优雅关闭时调用)"""
|
422
|
+
if self._heartbeat_strategy:
|
423
|
+
self._heartbeat_strategy.cleanup()
|
424
|
+
|
425
|
+
|
426
|
+
class WorkerManager:
|
427
|
+
"""
|
428
|
+
Worker 管理器 - 统一入口
|
429
|
+
|
430
|
+
这是推荐的 Worker 管理方式,提供:
|
431
|
+
- 职责清晰的接口
|
432
|
+
- 简洁的代码结构
|
433
|
+
- 易于测试和维护
|
434
|
+
|
435
|
+
使用示例:
|
436
|
+
manager = WorkerManager(redis, async_redis, 'jettask')
|
437
|
+
worker_id = await manager.start_worker('MyApp', ['queue1'])
|
438
|
+
await manager.record_task_start(worker_id, 'queue1')
|
439
|
+
await manager.record_task_finish(worker_id, 'queue1', True, 1.5)
|
440
|
+
await manager.stop_worker(worker_id)
|
441
|
+
"""
|
442
|
+
|
443
|
+
def __init__(
|
444
|
+
self,
|
445
|
+
redis_client,
|
446
|
+
async_redis_client,
|
447
|
+
redis_prefix: str = 'jettask'
|
448
|
+
):
|
449
|
+
"""
|
450
|
+
初始化 Worker 管理器
|
451
|
+
|
452
|
+
Args:
|
453
|
+
redis_client: 同步 Redis 客户端
|
454
|
+
async_redis_client: 异步 Redis 客户端
|
455
|
+
redis_prefix: Redis 键前缀
|
456
|
+
"""
|
457
|
+
self.redis_client = redis_client
|
458
|
+
self.async_redis_client = async_redis_client
|
459
|
+
self.redis_prefix = redis_prefix
|
460
|
+
|
461
|
+
# 初始化各个组件
|
462
|
+
self.worker_state = WorkerState(
|
463
|
+
redis_client=redis_client,
|
464
|
+
async_redis_client=async_redis_client,
|
465
|
+
redis_prefix=redis_prefix
|
466
|
+
)
|
467
|
+
|
468
|
+
self.naming = WorkerNaming()
|
469
|
+
|
470
|
+
# lifecycle 组件将在下一步创建
|
471
|
+
self.lifecycle = None
|
472
|
+
|
473
|
+
# 为了兼容性,保留 registry 别名(指向 worker_state)
|
474
|
+
self.registry = self.worker_state
|
475
|
+
|
476
|
+
logger.debug("WorkerManager initialized")
|
477
|
+
|
478
|
+
async def start_worker(
|
479
|
+
self,
|
480
|
+
prefix: str,
|
481
|
+
queues: List[str],
|
482
|
+
reuse_offline: bool = True
|
483
|
+
) -> str:
|
484
|
+
"""
|
485
|
+
启动一个 Worker
|
486
|
+
|
487
|
+
Args:
|
488
|
+
prefix: Worker ID 前缀(推荐使用应用名或主机名)
|
489
|
+
queues: Worker 负责的队列列表
|
490
|
+
reuse_offline: 是否复用离线的 Worker ID(默认True)
|
491
|
+
|
492
|
+
Returns:
|
493
|
+
启动的 Worker ID
|
494
|
+
|
495
|
+
Example:
|
496
|
+
worker_id = await manager.start_worker('MyApp', ['queue1', 'queue2'])
|
497
|
+
"""
|
498
|
+
if not self.lifecycle:
|
499
|
+
raise RuntimeError("Lifecycle manager not initialized")
|
500
|
+
|
501
|
+
return await self.lifecycle.initialize_worker(prefix, queues, reuse_offline)
|
502
|
+
|
503
|
+
async def stop_worker(self, worker_id: str):
|
504
|
+
"""
|
505
|
+
停止一个 Worker
|
506
|
+
|
507
|
+
Args:
|
508
|
+
worker_id: 要停止的 Worker ID
|
509
|
+
|
510
|
+
Example:
|
511
|
+
await manager.stop_worker(worker_id)
|
512
|
+
"""
|
513
|
+
if not self.lifecycle:
|
514
|
+
raise RuntimeError("Lifecycle manager not initialized")
|
515
|
+
|
516
|
+
await self.lifecycle.cleanup_worker(worker_id)
|
517
|
+
|
518
|
+
async def record_task_start(self, worker_id: str, queue: str):
|
519
|
+
"""
|
520
|
+
记录任务开始
|
521
|
+
|
522
|
+
Args:
|
523
|
+
worker_id: Worker ID
|
524
|
+
queue: 队列名称
|
525
|
+
|
526
|
+
Example:
|
527
|
+
await manager.record_task_start(worker_id, 'queue1')
|
528
|
+
"""
|
529
|
+
if not self.lifecycle:
|
530
|
+
raise RuntimeError("Lifecycle manager not initialized")
|
531
|
+
|
532
|
+
await self.lifecycle.record_task_start(worker_id, queue)
|
533
|
+
|
534
|
+
async def record_task_finish(
|
535
|
+
self,
|
536
|
+
worker_id: str,
|
537
|
+
queue: str,
|
538
|
+
success: bool,
|
539
|
+
duration: float
|
540
|
+
):
|
541
|
+
"""
|
542
|
+
记录任务完成
|
543
|
+
|
544
|
+
Args:
|
545
|
+
worker_id: Worker ID
|
546
|
+
queue: 队列名称
|
547
|
+
success: 是否成功
|
548
|
+
duration: 处理耗时(秒)
|
549
|
+
|
550
|
+
Example:
|
551
|
+
await manager.record_task_finish(worker_id, 'queue1', True, 1.5)
|
552
|
+
"""
|
553
|
+
if not self.lifecycle:
|
554
|
+
raise RuntimeError("Lifecycle manager not initialized")
|
555
|
+
|
556
|
+
await self.lifecycle.record_task_finish(worker_id, queue, success, duration)
|
557
|
+
|
558
|
+
async def get_worker_info(self, worker_id: str) -> Optional[Dict[str, Any]]:
|
559
|
+
"""
|
560
|
+
获取 Worker 信息
|
561
|
+
|
562
|
+
Args:
|
563
|
+
worker_id: Worker ID
|
564
|
+
|
565
|
+
Returns:
|
566
|
+
Worker 信息字典,如果不存在则返回 None
|
567
|
+
|
568
|
+
Example:
|
569
|
+
info = await manager.get_worker_info(worker_id)
|
570
|
+
print(f"Worker {worker_id} is {'online' if info['is_alive']=='true' else 'offline'}")
|
571
|
+
"""
|
572
|
+
if not self.lifecycle:
|
573
|
+
raise RuntimeError("Lifecycle manager not initialized")
|
574
|
+
|
575
|
+
return await self.lifecycle.get_worker_info(worker_id)
|
576
|
+
|
577
|
+
|
578
|
+
__all__ = [
|
579
|
+
'WorkerState', # Worker 状态管理(之前叫 WorkerRegistry)
|
580
|
+
'WorkerNaming', # Worker ID 生成
|
581
|
+
'ConsumerManager', # 消费者名称管理
|
582
|
+
'WorkerManager', # 统一管理器
|
583
|
+
]
|