jettask 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +10 -3
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
- jettask-0.2.20.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.19.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,259 @@
|
|
1
|
+
"""消息消费模块
|
2
|
+
|
3
|
+
负责从Redis Stream队列中消费消息,并持久化到PostgreSQL。
|
4
|
+
"""
|
5
|
+
|
6
|
+
import asyncio
|
7
|
+
import logging
|
8
|
+
import traceback
|
9
|
+
from typing import List, Dict
|
10
|
+
from collections import defaultdict
|
11
|
+
|
12
|
+
import redis.asyncio as redis
|
13
|
+
from redis.asyncio import Redis
|
14
|
+
|
15
|
+
from .task_persistence import TaskPersistence
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
class MessageConsumer:
|
21
|
+
"""消息消费器
|
22
|
+
|
23
|
+
职责:
|
24
|
+
- 从Redis Stream队列消费消息
|
25
|
+
- 解析消息并持久化到数据库
|
26
|
+
- 管理多个队列的消费任务
|
27
|
+
- 处理错误重试和ACK
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
redis_client: Redis,
|
33
|
+
redis_prefix: str,
|
34
|
+
consumer_group: str,
|
35
|
+
consumer_id: str,
|
36
|
+
task_persistence: TaskPersistence,
|
37
|
+
queue_discovery: 'QueueDiscovery'
|
38
|
+
):
|
39
|
+
"""初始化消息消费器
|
40
|
+
|
41
|
+
Args:
|
42
|
+
redis_client: Redis异步客户端
|
43
|
+
redis_prefix: Redis键前缀
|
44
|
+
consumer_group: 消费者组名称
|
45
|
+
consumer_id: 消费者ID
|
46
|
+
task_persistence: 任务持久化处理器
|
47
|
+
queue_discovery: 队列发现器
|
48
|
+
"""
|
49
|
+
self.redis_client = redis_client
|
50
|
+
self.redis_prefix = redis_prefix
|
51
|
+
self.consumer_group = consumer_group
|
52
|
+
self.consumer_id = consumer_id
|
53
|
+
self.task_persistence = task_persistence
|
54
|
+
self.queue_discovery = queue_discovery
|
55
|
+
|
56
|
+
# 错误计数器
|
57
|
+
self._consecutive_errors = defaultdict(int)
|
58
|
+
|
59
|
+
# 已处理任务ID缓存(用于优化查询)
|
60
|
+
self._processed_task_ids = set()
|
61
|
+
self._processed_ids_lock = asyncio.Lock()
|
62
|
+
self._processed_ids_max_size = 100000
|
63
|
+
self._processed_ids_cleanup_interval = 300
|
64
|
+
|
65
|
+
self._running = False
|
66
|
+
self._consume_task = None
|
67
|
+
self._queue_tasks = {}
|
68
|
+
|
69
|
+
async def start(self):
|
70
|
+
"""启动消费器"""
|
71
|
+
self._running = True
|
72
|
+
self._consume_task = asyncio.create_task(self._consume_queues())
|
73
|
+
logger.debug("MessageConsumer started")
|
74
|
+
|
75
|
+
async def stop(self):
|
76
|
+
"""停止消费器"""
|
77
|
+
self._running = False
|
78
|
+
|
79
|
+
if self._consume_task:
|
80
|
+
self._consume_task.cancel()
|
81
|
+
try:
|
82
|
+
await self._consume_task
|
83
|
+
except asyncio.CancelledError:
|
84
|
+
pass
|
85
|
+
|
86
|
+
# 取消所有队列任务
|
87
|
+
for task in self._queue_tasks.values():
|
88
|
+
task.cancel()
|
89
|
+
|
90
|
+
if self._queue_tasks:
|
91
|
+
await asyncio.gather(*self._queue_tasks.values(), return_exceptions=True)
|
92
|
+
|
93
|
+
logger.debug("MessageConsumer stopped")
|
94
|
+
|
95
|
+
async def _consume_queues(self):
|
96
|
+
"""启动所有队列的消费任务"""
|
97
|
+
while self._running:
|
98
|
+
try:
|
99
|
+
# 获取已知队列
|
100
|
+
known_queues = self.queue_discovery.get_known_queues()
|
101
|
+
|
102
|
+
# 为每个队列启动消费任务
|
103
|
+
for queue in known_queues:
|
104
|
+
if queue not in self._queue_tasks or self._queue_tasks[queue].done():
|
105
|
+
self._queue_tasks[queue] = asyncio.create_task(self._consume_queue(queue))
|
106
|
+
logger.debug(f"Started consumer task for queue: {queue}")
|
107
|
+
|
108
|
+
# 移除不存在的队列任务
|
109
|
+
for queue in list(self._queue_tasks.keys()):
|
110
|
+
if queue not in known_queues:
|
111
|
+
self._queue_tasks[queue].cancel()
|
112
|
+
del self._queue_tasks[queue]
|
113
|
+
logger.debug(f"Stopped consumer task for removed queue: {queue}")
|
114
|
+
|
115
|
+
await asyncio.sleep(10)
|
116
|
+
|
117
|
+
except Exception as e:
|
118
|
+
logger.error(f"Error in consume_queues manager: {e}")
|
119
|
+
logger.error(traceback.format_exc())
|
120
|
+
await asyncio.sleep(5)
|
121
|
+
|
122
|
+
async def _consume_queue(self, queue_name: str):
|
123
|
+
"""消费单个队列的任务(包括优先级队列)"""
|
124
|
+
# 判断是否是优先级队列
|
125
|
+
is_priority_queue = ':' in queue_name and queue_name.rsplit(':', 1)[-1].isdigit()
|
126
|
+
|
127
|
+
if is_priority_queue:
|
128
|
+
# 优先级队列格式:base_queue:priority (如 robust_bench2:2)
|
129
|
+
base_queue = queue_name.rsplit(':', 1)[0]
|
130
|
+
priority = queue_name.rsplit(':', 1)[1]
|
131
|
+
stream_key = f"{self.redis_prefix}:QUEUE:{base_queue}:{priority}"
|
132
|
+
else:
|
133
|
+
# 普通队列
|
134
|
+
stream_key = f"{self.redis_prefix}:QUEUE:{queue_name}"
|
135
|
+
|
136
|
+
logger.debug(f"Consuming queue: {queue_name}, stream_key: {stream_key}, is_priority: {is_priority_queue}")
|
137
|
+
|
138
|
+
check_backlog = True
|
139
|
+
lastid = "0-0"
|
140
|
+
|
141
|
+
# pg_consumer 应该使用统一的 consumer_id,而不是为每个队列创建新的
|
142
|
+
# 因为 pg_consumer 的职责是消费所有队列的消息并写入数据库
|
143
|
+
# 它不是真正的任务执行者,所以不需要为每个队列创建独立的 consumer
|
144
|
+
consumer_name = self.consumer_id
|
145
|
+
|
146
|
+
# ConsumerManager会自动处理离线worker的pending消息恢复
|
147
|
+
# 不需要手动恢复
|
148
|
+
|
149
|
+
while self._running and queue_name in self.queue_discovery.get_known_queues():
|
150
|
+
try:
|
151
|
+
myid = lastid if check_backlog else ">"
|
152
|
+
|
153
|
+
messages = await self.redis_client.xreadgroup(
|
154
|
+
self.consumer_group,
|
155
|
+
consumer_name, # 使用ConsumerManager管理的consumer_name
|
156
|
+
{stream_key: myid},
|
157
|
+
count=10000,
|
158
|
+
block=1000 if not check_backlog else 0
|
159
|
+
)
|
160
|
+
|
161
|
+
if not messages or (messages and len(messages[0][1]) == 0):
|
162
|
+
check_backlog = False
|
163
|
+
continue
|
164
|
+
|
165
|
+
if messages:
|
166
|
+
await self._process_messages(messages)
|
167
|
+
self._consecutive_errors[queue_name] = 0
|
168
|
+
|
169
|
+
if messages[0] and messages[0][1]:
|
170
|
+
lastid = messages[0][1][-1][0].decode('utf-8') if isinstance(messages[0][1][-1][0], bytes) else messages[0][1][-1][0]
|
171
|
+
check_backlog = len(messages[0][1]) >= 2000
|
172
|
+
|
173
|
+
except redis.ResponseError as e:
|
174
|
+
if "NOGROUP" in str(e):
|
175
|
+
try:
|
176
|
+
await self.redis_client.xgroup_create(
|
177
|
+
stream_key, self.consumer_group, id='0', mkstream=True
|
178
|
+
)
|
179
|
+
logger.debug(f"Recreated consumer group for queue: {queue_name}")
|
180
|
+
check_backlog = True
|
181
|
+
lastid = "0-0"
|
182
|
+
except:
|
183
|
+
pass
|
184
|
+
else:
|
185
|
+
logger.error(f"Redis error for queue {queue_name}: {e}")
|
186
|
+
logger.error(traceback.format_exc())
|
187
|
+
self._consecutive_errors[queue_name] += 1
|
188
|
+
|
189
|
+
if self._consecutive_errors[queue_name] > 10:
|
190
|
+
logger.debug(f"Too many errors for queue {queue_name}, will retry later")
|
191
|
+
await asyncio.sleep(30)
|
192
|
+
self._consecutive_errors[queue_name] = 0
|
193
|
+
|
194
|
+
except Exception as e:
|
195
|
+
logger.error(f"Error consuming queue {queue_name}: {e}", exc_info=True)
|
196
|
+
self._consecutive_errors[queue_name] += 1
|
197
|
+
await asyncio.sleep(1)
|
198
|
+
|
199
|
+
async def _process_messages(self, messages: List):
|
200
|
+
"""处理消息并保存到PostgreSQL"""
|
201
|
+
tasks_to_insert = []
|
202
|
+
ack_batch = []
|
203
|
+
|
204
|
+
for stream_key, stream_messages in messages:
|
205
|
+
if not stream_messages:
|
206
|
+
continue
|
207
|
+
|
208
|
+
stream_key_str = stream_key.decode('utf-8') if isinstance(stream_key, bytes) else stream_key
|
209
|
+
msg_ids_to_ack = []
|
210
|
+
|
211
|
+
for msg_id, data in stream_messages:
|
212
|
+
try:
|
213
|
+
if not msg_id or not data:
|
214
|
+
continue
|
215
|
+
|
216
|
+
msg_id_str = msg_id.decode('utf-8') if isinstance(msg_id, bytes) else str(msg_id)
|
217
|
+
|
218
|
+
# 使用TaskPersistence解析消息
|
219
|
+
task_info = self.task_persistence.parse_stream_message(msg_id_str, data)
|
220
|
+
if task_info:
|
221
|
+
tasks_to_insert.append(task_info)
|
222
|
+
msg_ids_to_ack.append(msg_id)
|
223
|
+
|
224
|
+
except Exception as e:
|
225
|
+
logger.error(f"Error processing message {msg_id}: {e}")
|
226
|
+
logger.error(traceback.format_exc())
|
227
|
+
|
228
|
+
if msg_ids_to_ack:
|
229
|
+
ack_batch.append((stream_key, msg_ids_to_ack))
|
230
|
+
|
231
|
+
if tasks_to_insert:
|
232
|
+
# 使用TaskPersistence插入任务
|
233
|
+
inserted_count = await self.task_persistence.insert_tasks(tasks_to_insert)
|
234
|
+
|
235
|
+
# 将成功插入的任务ID添加到内存集合中
|
236
|
+
async with self._processed_ids_lock:
|
237
|
+
for task in tasks_to_insert:
|
238
|
+
self._processed_task_ids.add(task['id'])
|
239
|
+
|
240
|
+
# 如果集合过大,清理最早的一半
|
241
|
+
if len(self._processed_task_ids) > self._processed_ids_max_size:
|
242
|
+
# 只保留最新的一半ID
|
243
|
+
ids_list = list(self._processed_task_ids)
|
244
|
+
keep_count = self._processed_ids_max_size // 2
|
245
|
+
self._processed_task_ids = set(ids_list[-keep_count:])
|
246
|
+
logger.debug(f"Cleaned processed IDs cache, kept {keep_count} most recent IDs")
|
247
|
+
|
248
|
+
# ACK所有消息(即使部分插入失败,也要ACK,避免重复处理)
|
249
|
+
if ack_batch:
|
250
|
+
pipeline = self.redis_client.pipeline()
|
251
|
+
for stream_key, msg_ids in ack_batch:
|
252
|
+
pipeline.xack(stream_key, self.consumer_group, *msg_ids)
|
253
|
+
|
254
|
+
try:
|
255
|
+
await pipeline.execute()
|
256
|
+
total_acked = sum(len(msg_ids) for _, msg_ids in ack_batch)
|
257
|
+
logger.debug(f"Successfully ACKed {total_acked} messages")
|
258
|
+
except Exception as e:
|
259
|
+
logger.error(f"Error executing batch ACK: {e}")
|
@@ -11,152 +11,113 @@ from datetime import datetime, timedelta, timezone
|
|
11
11
|
from typing import Dict, List, Optional, Tuple, Any
|
12
12
|
import redis.asyncio as redis
|
13
13
|
from sqlalchemy import text, bindparam
|
14
|
-
from sqlalchemy.ext.asyncio import
|
15
|
-
from sqlalchemy.orm import sessionmaker
|
14
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
16
15
|
import aiohttp
|
17
16
|
|
17
|
+
# 导入统一的数据库连接工具
|
18
|
+
from ..utils.db_connector import (
|
19
|
+
get_dual_mode_async_redis_client,
|
20
|
+
get_pg_engine_and_factory
|
21
|
+
)
|
22
|
+
|
18
23
|
logger = logging.getLogger(__name__)
|
19
24
|
|
20
25
|
|
21
26
|
class NamespaceConnection:
|
22
27
|
"""单个命名空间的数据库连接"""
|
23
|
-
|
28
|
+
|
24
29
|
def __init__(self, namespace_name: str, redis_config: dict, pg_config: dict):
|
25
30
|
self.namespace_name = namespace_name
|
26
31
|
self.redis_config = redis_config
|
27
32
|
self.pg_config = pg_config
|
28
33
|
self.redis_prefix = namespace_name # 使用命名空间名作为Redis前缀
|
29
|
-
|
30
|
-
#
|
34
|
+
|
35
|
+
# 使用全局单例连接池
|
36
|
+
self._text_redis_client: Optional[redis.Redis] = None
|
37
|
+
self._binary_redis_client: Optional[redis.Redis] = None
|
38
|
+
self._initialized = False
|
39
|
+
|
40
|
+
# PostgreSQL 相关
|
31
41
|
self.async_engine = None
|
32
42
|
self.AsyncSessionLocal = None
|
33
|
-
self._redis_pool = None
|
34
|
-
self._binary_redis_pool = None
|
35
|
-
self._initialized = False
|
36
43
|
|
37
44
|
async def initialize(self):
|
38
45
|
"""初始化数据库连接"""
|
39
46
|
if self._initialized:
|
40
47
|
return
|
41
|
-
|
48
|
+
|
42
49
|
try:
|
43
|
-
# 初始化PostgreSQL
|
50
|
+
# 初始化 PostgreSQL 连接(使用全局单例)
|
44
51
|
if self.pg_config:
|
45
|
-
|
46
|
-
|
47
|
-
dsn = dsn.replace('postgresql://', 'postgresql+psycopg://', 1)
|
48
|
-
|
49
|
-
print(f'{dsn=}')
|
50
|
-
self.async_engine = create_async_engine(
|
51
|
-
dsn,
|
52
|
+
self.async_engine, self.AsyncSessionLocal = get_pg_engine_and_factory(
|
53
|
+
config=self.pg_config,
|
52
54
|
pool_size=10,
|
53
55
|
max_overflow=5,
|
54
56
|
pool_pre_ping=True,
|
55
57
|
echo=False
|
56
58
|
)
|
57
|
-
|
58
|
-
|
59
|
-
bind=self.async_engine,
|
60
|
-
class_=AsyncSession,
|
61
|
-
expire_on_commit=False
|
62
|
-
)
|
63
|
-
|
64
|
-
# 初始化Redis连接池
|
59
|
+
|
60
|
+
# 初始化 Redis 连接(使用全局单例,双模式)
|
65
61
|
if self.redis_config:
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
redis_url,
|
72
|
-
decode_responses=True,
|
73
|
-
encoding='utf-8'
|
74
|
-
)
|
75
|
-
|
76
|
-
self._binary_redis_pool = redis.ConnectionPool.from_url(
|
77
|
-
redis_url,
|
78
|
-
decode_responses=False
|
79
|
-
)
|
80
|
-
else:
|
81
|
-
# 从分离的配置创建连接池
|
82
|
-
self._redis_pool = redis.ConnectionPool(
|
83
|
-
host=self.redis_config.get('host', 'localhost'),
|
84
|
-
port=self.redis_config.get('port', 6379),
|
85
|
-
db=self.redis_config.get('db', 0),
|
86
|
-
password=self.redis_config.get('password'),
|
87
|
-
decode_responses=True,
|
88
|
-
encoding='utf-8'
|
89
|
-
)
|
90
|
-
|
91
|
-
self._binary_redis_pool = redis.ConnectionPool(
|
92
|
-
host=self.redis_config.get('host', 'localhost'),
|
93
|
-
port=self.redis_config.get('port', 6379),
|
94
|
-
db=self.redis_config.get('db', 0),
|
95
|
-
password=self.redis_config.get('password'),
|
96
|
-
decode_responses=False
|
97
|
-
)
|
98
|
-
|
62
|
+
self._text_redis_client, self._binary_redis_client = get_dual_mode_async_redis_client(
|
63
|
+
redis_url=self.redis_config.get('url') if isinstance(self.redis_config, dict) else self.redis_config,
|
64
|
+
max_connections=50
|
65
|
+
)
|
66
|
+
|
99
67
|
self._initialized = True
|
100
68
|
logger.info(f"命名空间 {self.namespace_name} 数据库连接初始化成功")
|
101
|
-
|
69
|
+
|
102
70
|
except Exception as e:
|
103
71
|
logger.error(f"初始化命名空间 {self.namespace_name} 数据库连接失败: {e}")
|
104
72
|
traceback.print_exc()
|
105
73
|
raise
|
106
|
-
|
107
|
-
def _build_pg_dsn(self) -> str:
|
108
|
-
"""构建PostgreSQL DSN"""
|
109
|
-
config = self.pg_config
|
110
|
-
# 支持两种格式:url格式或分离的配置
|
111
|
-
if 'url' in config:
|
112
|
-
return config['url']
|
113
|
-
else:
|
114
|
-
return f"postgresql://{config['user']}:{config['password']}@{config['host']}:{config['port']}/{config['database']}"
|
115
|
-
|
74
|
+
|
116
75
|
async def get_redis_client(self, decode: bool = True) -> redis.Redis:
|
117
|
-
"""获取Redis
|
76
|
+
"""获取 Redis 客户端(使用全局单例)"""
|
118
77
|
try:
|
119
78
|
if not self._initialized:
|
120
79
|
await self.initialize()
|
121
|
-
|
122
|
-
|
123
|
-
if
|
124
|
-
|
125
|
-
|
126
|
-
|
80
|
+
|
81
|
+
# 根据 decode 参数选择文本或二进制客户端
|
82
|
+
client = self._text_redis_client if decode else self._binary_redis_client
|
83
|
+
if not client:
|
84
|
+
raise ValueError(f"命名空间 {self.namespace_name} 没有配置 Redis")
|
85
|
+
|
86
|
+
return client
|
127
87
|
except Exception as e:
|
128
88
|
# 连接异常时重置初始化标志,允许重新初始化
|
129
|
-
logger.error(f"获取Redis客户端失败: {e}")
|
89
|
+
logger.error(f"获取 Redis 客户端失败: {e}")
|
130
90
|
traceback.print_exc()
|
131
91
|
self._initialized = False
|
132
92
|
raise
|
133
|
-
|
93
|
+
|
134
94
|
async def get_pg_session(self) -> AsyncSession:
|
135
|
-
"""获取PostgreSQL
|
95
|
+
"""获取 PostgreSQL 会话(使用全局单例)"""
|
136
96
|
try:
|
137
97
|
if not self._initialized:
|
138
98
|
await self.initialize()
|
139
|
-
|
99
|
+
|
140
100
|
if not self.AsyncSessionLocal:
|
141
|
-
raise ValueError(f"命名空间 {self.namespace_name} 没有配置PostgreSQL")
|
142
|
-
|
101
|
+
raise ValueError(f"命名空间 {self.namespace_name} 没有配置 PostgreSQL")
|
102
|
+
|
143
103
|
return self.AsyncSessionLocal()
|
144
104
|
except Exception as e:
|
145
105
|
# 连接异常时重置初始化标志,允许重新初始化
|
146
|
-
logger.error(f"获取PostgreSQL会话失败: {e}")
|
106
|
+
logger.error(f"获取 PostgreSQL 会话失败: {e}")
|
147
107
|
traceback.print_exc()
|
148
108
|
self._initialized = False
|
149
109
|
raise
|
150
|
-
|
110
|
+
|
151
111
|
async def close(self):
|
152
|
-
"""
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
112
|
+
"""关闭数据库连接(由于使用全局单例,这里只重置状态)"""
|
113
|
+
# 注意:连接池由全局单例管理,这里只清理引用
|
114
|
+
self._text_redis_client = None
|
115
|
+
self._binary_redis_client = None
|
116
|
+
|
117
|
+
# PostgreSQL engine 也是全局单例,只清理引用
|
118
|
+
self.async_engine = None
|
119
|
+
self.AsyncSessionLocal = None
|
120
|
+
|
160
121
|
self._initialized = False
|
161
122
|
logger.info(f"命名空间 {self.namespace_name} 数据库连接已关闭")
|
162
123
|
|
@@ -331,12 +292,19 @@ class NamespaceJetTaskDataAccess:
|
|
331
292
|
redis_client = await conn.get_redis_client()
|
332
293
|
|
333
294
|
try:
|
334
|
-
#
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
295
|
+
# 使用 RegistryManager 获取所有队列,避免 SCAN
|
296
|
+
from jettask.messaging.registry import QueueRegistry
|
297
|
+
registry = QueueRegistry(
|
298
|
+
redis_client=None,
|
299
|
+
async_redis_client=redis_client,
|
300
|
+
redis_prefix=conn.redis_prefix
|
301
|
+
)
|
302
|
+
|
303
|
+
# 获取所有队列名称
|
304
|
+
queue_names = await registry.get_all_queues()
|
305
|
+
|
306
|
+
# 构建完整的队列键
|
307
|
+
queue_keys = [f"{conn.redis_prefix}:QUEUE:{queue_name}" for queue_name in queue_names]
|
340
308
|
|
341
309
|
stats = []
|
342
310
|
for queue_key in queue_keys:
|
@@ -0,0 +1,196 @@
|
|
1
|
+
"""离线Worker恢复模块
|
2
|
+
|
3
|
+
负责恢复离线PG_CONSUMER的消息,包括TASK_CHANGES流的离线消息。
|
4
|
+
"""
|
5
|
+
|
6
|
+
import asyncio
|
7
|
+
import logging
|
8
|
+
import msgpack
|
9
|
+
import traceback
|
10
|
+
from typing import Optional
|
11
|
+
|
12
|
+
from redis.asyncio import Redis
|
13
|
+
from jettask.worker.recovery import OfflineWorkerRecovery
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
class OfflineRecoveryHandler:
|
19
|
+
"""离线Worker恢复处理器
|
20
|
+
|
21
|
+
职责:
|
22
|
+
- 启动离线worker恢复服务
|
23
|
+
- 恢复TASK_CHANGES stream的离线消息
|
24
|
+
- 处理恢复的消息并更新任务状态
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
redis_client: Redis,
|
30
|
+
redis_prefix: str,
|
31
|
+
consumer_id: str,
|
32
|
+
task_updater: 'TaskUpdater' # 类型提示使用字符串避免循环导入
|
33
|
+
):
|
34
|
+
"""初始化离线恢复处理器
|
35
|
+
|
36
|
+
Args:
|
37
|
+
redis_client: Redis异步客户端
|
38
|
+
redis_prefix: Redis键前缀
|
39
|
+
consumer_id: 消费者ID
|
40
|
+
task_updater: 任务更新器实例(用于处理恢复的消息)
|
41
|
+
"""
|
42
|
+
self.redis_client = redis_client
|
43
|
+
self.redis_prefix = redis_prefix
|
44
|
+
self.consumer_id = consumer_id
|
45
|
+
self.task_updater = task_updater
|
46
|
+
|
47
|
+
# 创建 WorkerState 实例(用于查询 Worker 状态)
|
48
|
+
from jettask.worker.manager import WorkerState
|
49
|
+
self.worker_state = WorkerState(
|
50
|
+
redis_client=None, # persistence 模块使用异步客户端
|
51
|
+
async_redis_client=redis_client,
|
52
|
+
redis_prefix=redis_prefix
|
53
|
+
)
|
54
|
+
|
55
|
+
# 创建离线worker恢复器(用于恢复TASK_CHANGES stream的离线消息)
|
56
|
+
# 注意:这里不传入consumer_manager,因为需要在start时初始化
|
57
|
+
self.offline_recovery = None
|
58
|
+
|
59
|
+
self._running = False
|
60
|
+
self._recovery_task = None
|
61
|
+
|
62
|
+
def set_consumer_manager(self, consumer_manager):
|
63
|
+
"""设置ConsumerManager(延迟初始化)
|
64
|
+
|
65
|
+
Args:
|
66
|
+
consumer_manager: ConsumerManager实例
|
67
|
+
"""
|
68
|
+
self.offline_recovery = OfflineWorkerRecovery(
|
69
|
+
async_redis_client=self.redis_client,
|
70
|
+
redis_prefix=self.redis_prefix,
|
71
|
+
worker_prefix='PG_CONSUMER', # 使用PG_CONSUMER前缀
|
72
|
+
consumer_manager=consumer_manager,
|
73
|
+
worker_state=self.worker_state # 传入在 __init__ 中创建的 WorkerState
|
74
|
+
)
|
75
|
+
|
76
|
+
async def start(self):
|
77
|
+
"""启动离线恢复服务"""
|
78
|
+
if not self.offline_recovery:
|
79
|
+
logger.warning("OfflineRecovery not initialized, please call set_consumer_manager first")
|
80
|
+
return
|
81
|
+
|
82
|
+
self._running = True
|
83
|
+
self._recovery_task = asyncio.create_task(self._recovery_loop())
|
84
|
+
logger.debug("OfflineRecoveryHandler started")
|
85
|
+
|
86
|
+
async def stop(self):
|
87
|
+
"""停止离线恢复服务"""
|
88
|
+
self._running = False
|
89
|
+
|
90
|
+
if self.offline_recovery:
|
91
|
+
self.offline_recovery.stop() # stop() 不是异步方法
|
92
|
+
|
93
|
+
if self._recovery_task:
|
94
|
+
self._recovery_task.cancel()
|
95
|
+
try:
|
96
|
+
await self._recovery_task
|
97
|
+
except asyncio.CancelledError:
|
98
|
+
pass
|
99
|
+
|
100
|
+
logger.debug("OfflineRecoveryHandler stopped")
|
101
|
+
|
102
|
+
async def _recovery_loop(self):
|
103
|
+
"""离线恢复循环"""
|
104
|
+
while self._running:
|
105
|
+
try:
|
106
|
+
total_recovered = 0
|
107
|
+
|
108
|
+
# 恢复TASK_CHANGES stream的消息
|
109
|
+
recovered = await self._recover_task_changes_offline_messages()
|
110
|
+
if recovered > 0:
|
111
|
+
logger.debug(f"Recovered {recovered} TASK_CHANGES messages")
|
112
|
+
total_recovered += recovered
|
113
|
+
|
114
|
+
if total_recovered > 0:
|
115
|
+
logger.debug(f"Total recovered {total_recovered} messages in this cycle")
|
116
|
+
|
117
|
+
# 每1秒扫描一次
|
118
|
+
await asyncio.sleep(1)
|
119
|
+
|
120
|
+
except Exception as e:
|
121
|
+
logger.error(f"Error in offline recovery service: {e}")
|
122
|
+
await asyncio.sleep(10)
|
123
|
+
|
124
|
+
async def _recover_task_changes_offline_messages(self) -> int:
|
125
|
+
"""恢复TASK_CHANGES stream的离线消息"""
|
126
|
+
# 使用 OfflineWorkerRecovery 的标准接口
|
127
|
+
try:
|
128
|
+
# 为TASK_CHANGES定义自定义的队列格式化器
|
129
|
+
def task_changes_formatter(queue):
|
130
|
+
# 对于TASK_CHANGES,直接返回stream key(不加QUEUE:前缀)
|
131
|
+
if queue == 'TASK_CHANGES':
|
132
|
+
return f"{self.redis_prefix}:TASK_CHANGES"
|
133
|
+
else:
|
134
|
+
return f"{self.redis_prefix}:QUEUE:{queue}"
|
135
|
+
|
136
|
+
# 创建专门用于TASK_CHANGES的恢复器
|
137
|
+
task_changes_recovery = OfflineWorkerRecovery(
|
138
|
+
async_redis_client=self.redis_client,
|
139
|
+
redis_prefix=self.redis_prefix,
|
140
|
+
worker_prefix='PG_CONSUMER',
|
141
|
+
queue_formatter=task_changes_formatter,
|
142
|
+
worker_state=self.worker_state # 传入在 __init__ 中创建的 WorkerState
|
143
|
+
)
|
144
|
+
|
145
|
+
# 调用标准的恢复方法
|
146
|
+
# TASK_CHANGES作为队列名传入,会被正确处理
|
147
|
+
recovered = await task_changes_recovery.recover_offline_workers(
|
148
|
+
queue='TASK_CHANGES', # 这个队列名会用于查找离线worker
|
149
|
+
current_consumer_name=self.consumer_id,
|
150
|
+
process_message_callback=self._process_recovered_task_change_v2
|
151
|
+
)
|
152
|
+
|
153
|
+
return recovered
|
154
|
+
|
155
|
+
except Exception as e:
|
156
|
+
logger.error(f"Error in recover_task_changes_offline_messages: {e}")
|
157
|
+
return 0
|
158
|
+
|
159
|
+
async def _process_recovered_task_change_v2(self, msg_id, msg_data, queue, consumer_id):
|
160
|
+
"""处理恢复的TASK_CHANGES消息(符合OfflineWorkerRecovery的回调接口)"""
|
161
|
+
try:
|
162
|
+
logger.debug(f'处理恢复的TASK_CHANGES消息(符合OfflineWorkerRecovery的回调接口) {msg_data=}')
|
163
|
+
# 解析消息 - 现在使用task_id而不是event_id
|
164
|
+
if b'task_id' in msg_data:
|
165
|
+
# 使用msgpack解压task_id
|
166
|
+
compressed_task_id = msg_data[b'task_id']
|
167
|
+
task_key = msgpack.unpackb(compressed_task_id)
|
168
|
+
task_key = task_key.decode('utf-8') if isinstance(task_key, bytes) else str(task_key)
|
169
|
+
|
170
|
+
# 从完整的task_key格式提取stream_id
|
171
|
+
# 格式: namespace:TASK:stream_id:queue_name
|
172
|
+
stream_id = None
|
173
|
+
if ':TASK:' in task_key:
|
174
|
+
parts = task_key.split(':TASK:')
|
175
|
+
if len(parts) == 2:
|
176
|
+
# 再从右边部分提取stream_id
|
177
|
+
right_parts = parts[1].split(':')
|
178
|
+
if right_parts:
|
179
|
+
stream_id = right_parts[0] # 提取stream_id
|
180
|
+
|
181
|
+
if stream_id:
|
182
|
+
logger.debug(f"Processing recovered TASK_CHANGES message: {stream_id} from offline worker {consumer_id}")
|
183
|
+
# 更新任务状态 - 传入(stream_id, task_key)元组
|
184
|
+
# 使用task_updater的内部方法
|
185
|
+
await self.task_updater._update_tasks_by_event([(stream_id, task_key)])
|
186
|
+
else:
|
187
|
+
logger.warning(f"Cannot extract stream_id from task_key: {task_key}")
|
188
|
+
|
189
|
+
# ACK消息
|
190
|
+
change_stream_key = f"{self.redis_prefix}:TASK_CHANGES"
|
191
|
+
consumer_group = f"{self.redis_prefix}_changes_consumer"
|
192
|
+
await self.redis_client.xack(change_stream_key, consumer_group, msg_id)
|
193
|
+
|
194
|
+
except Exception as e:
|
195
|
+
logger.error(f"Error processing recovered task change {msg_id}: {e}")
|
196
|
+
logger.error(traceback.format_exc())
|