jettask 0.2.19__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. jettask/__init__.py +12 -3
  2. jettask/cli.py +314 -228
  3. jettask/config/__init__.py +9 -1
  4. jettask/config/config.py +245 -0
  5. jettask/config/env_loader.py +381 -0
  6. jettask/config/lua_scripts.py +158 -0
  7. jettask/config/nacos_config.py +132 -5
  8. jettask/core/__init__.py +1 -1
  9. jettask/core/app.py +1573 -666
  10. jettask/core/app_importer.py +33 -16
  11. jettask/core/container.py +532 -0
  12. jettask/core/task.py +1 -4
  13. jettask/core/unified_manager_base.py +2 -2
  14. jettask/executor/__init__.py +38 -0
  15. jettask/executor/core.py +625 -0
  16. jettask/executor/executor.py +338 -0
  17. jettask/executor/orchestrator.py +290 -0
  18. jettask/executor/process_entry.py +638 -0
  19. jettask/executor/task_executor.py +317 -0
  20. jettask/messaging/__init__.py +68 -0
  21. jettask/messaging/event_pool.py +2188 -0
  22. jettask/messaging/reader.py +519 -0
  23. jettask/messaging/registry.py +266 -0
  24. jettask/messaging/scanner.py +369 -0
  25. jettask/messaging/sender.py +312 -0
  26. jettask/persistence/__init__.py +118 -0
  27. jettask/persistence/backlog_monitor.py +567 -0
  28. jettask/{backend/data_access.py → persistence/base.py} +58 -57
  29. jettask/persistence/consumer.py +315 -0
  30. jettask/{core → persistence}/db_manager.py +23 -22
  31. jettask/persistence/maintenance.py +81 -0
  32. jettask/persistence/message_consumer.py +259 -0
  33. jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
  34. jettask/persistence/offline_recovery.py +196 -0
  35. jettask/persistence/queue_discovery.py +215 -0
  36. jettask/persistence/task_persistence.py +218 -0
  37. jettask/persistence/task_updater.py +583 -0
  38. jettask/scheduler/__init__.py +2 -2
  39. jettask/scheduler/loader.py +6 -5
  40. jettask/scheduler/run_scheduler.py +1 -1
  41. jettask/scheduler/scheduler.py +7 -7
  42. jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
  43. jettask/task/__init__.py +16 -0
  44. jettask/{router.py → task/router.py} +26 -8
  45. jettask/task/task_center/__init__.py +9 -0
  46. jettask/task/task_executor.py +318 -0
  47. jettask/task/task_registry.py +291 -0
  48. jettask/test_connection_monitor.py +73 -0
  49. jettask/utils/__init__.py +31 -1
  50. jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
  51. jettask/utils/db_connector.py +1629 -0
  52. jettask/{db_init.py → utils/db_init.py} +1 -1
  53. jettask/utils/rate_limit/__init__.py +30 -0
  54. jettask/utils/rate_limit/concurrency_limiter.py +665 -0
  55. jettask/utils/rate_limit/config.py +145 -0
  56. jettask/utils/rate_limit/limiter.py +41 -0
  57. jettask/utils/rate_limit/manager.py +269 -0
  58. jettask/utils/rate_limit/qps_limiter.py +154 -0
  59. jettask/utils/rate_limit/task_limiter.py +384 -0
  60. jettask/utils/serializer.py +3 -0
  61. jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
  62. jettask/utils/time_sync.py +173 -0
  63. jettask/webui/__init__.py +27 -0
  64. jettask/{api/v1 → webui/api}/alerts.py +1 -1
  65. jettask/{api/v1 → webui/api}/analytics.py +2 -2
  66. jettask/{api/v1 → webui/api}/namespaces.py +1 -1
  67. jettask/{api/v1 → webui/api}/overview.py +1 -1
  68. jettask/{api/v1 → webui/api}/queues.py +3 -3
  69. jettask/{api/v1 → webui/api}/scheduled.py +1 -1
  70. jettask/{api/v1 → webui/api}/settings.py +1 -1
  71. jettask/{api.py → webui/app.py} +253 -145
  72. jettask/webui/namespace_manager/__init__.py +10 -0
  73. jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
  74. jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
  75. jettask/{run.py → webui/run.py} +2 -2
  76. jettask/{services → webui/services}/__init__.py +1 -3
  77. jettask/{services → webui/services}/overview_service.py +34 -16
  78. jettask/{services → webui/services}/queue_service.py +1 -1
  79. jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
  80. jettask/{services → webui/services}/settings_service.py +1 -1
  81. jettask/worker/__init__.py +53 -0
  82. jettask/worker/lifecycle.py +1507 -0
  83. jettask/worker/manager.py +583 -0
  84. jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
  85. {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/METADATA +2 -71
  86. jettask-0.2.23.dist-info/RECORD +145 -0
  87. jettask/__main__.py +0 -140
  88. jettask/api/__init__.py +0 -103
  89. jettask/backend/__init__.py +0 -1
  90. jettask/backend/api/__init__.py +0 -3
  91. jettask/backend/api/v1/__init__.py +0 -17
  92. jettask/backend/api/v1/monitoring.py +0 -431
  93. jettask/backend/api/v1/namespaces.py +0 -504
  94. jettask/backend/api/v1/queues.py +0 -342
  95. jettask/backend/api/v1/tasks.py +0 -367
  96. jettask/backend/core/__init__.py +0 -3
  97. jettask/backend/core/cache.py +0 -221
  98. jettask/backend/core/database.py +0 -200
  99. jettask/backend/core/exceptions.py +0 -102
  100. jettask/backend/dependencies.py +0 -261
  101. jettask/backend/init_meta_db.py +0 -158
  102. jettask/backend/main.py +0 -1426
  103. jettask/backend/main_unified.py +0 -78
  104. jettask/backend/main_v2.py +0 -394
  105. jettask/backend/models/__init__.py +0 -3
  106. jettask/backend/models/requests.py +0 -236
  107. jettask/backend/models/responses.py +0 -230
  108. jettask/backend/namespace_api_old.py +0 -267
  109. jettask/backend/services/__init__.py +0 -3
  110. jettask/backend/start.py +0 -42
  111. jettask/backend/unified_api_router.py +0 -1541
  112. jettask/cleanup_deprecated_tables.sql +0 -16
  113. jettask/core/consumer_manager.py +0 -1695
  114. jettask/core/delay_scanner.py +0 -256
  115. jettask/core/event_pool.py +0 -1700
  116. jettask/core/heartbeat_process.py +0 -222
  117. jettask/core/task_batch.py +0 -153
  118. jettask/core/worker_scanner.py +0 -271
  119. jettask/executors/__init__.py +0 -5
  120. jettask/executors/asyncio.py +0 -876
  121. jettask/executors/base.py +0 -30
  122. jettask/executors/common.py +0 -148
  123. jettask/executors/multi_asyncio.py +0 -309
  124. jettask/gradio_app.py +0 -570
  125. jettask/integrated_gradio_app.py +0 -1088
  126. jettask/main.py +0 -0
  127. jettask/monitoring/__init__.py +0 -3
  128. jettask/pg_consumer.py +0 -1896
  129. jettask/run_monitor.py +0 -22
  130. jettask/run_webui.py +0 -148
  131. jettask/scheduler/multi_namespace_scheduler.py +0 -294
  132. jettask/scheduler/unified_manager.py +0 -450
  133. jettask/task_center_client.py +0 -150
  134. jettask/utils/serializer_optimized.py +0 -33
  135. jettask/webui_exceptions.py +0 -67
  136. jettask-0.2.19.dist-info/RECORD +0 -150
  137. /jettask/{constants.py → config/constants.py} +0 -0
  138. /jettask/{backend/config.py → config/task_center.py} +0 -0
  139. /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
  140. /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
  141. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
  142. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
  143. /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
  144. /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
  145. /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
  146. /jettask/{models.py → persistence/models.py} +0 -0
  147. /jettask/scheduler/{manager.py → task_crud.py} +0 -0
  148. /jettask/{schema.sql → schemas/schema.sql} +0 -0
  149. /jettask/{task_center.py → task/task_center/client.py} +0 -0
  150. /jettask/{monitoring → utils}/file_watcher.py +0 -0
  151. /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
  152. /jettask/{api/v1 → webui/api}/__init__.py +0 -0
  153. /jettask/{webui_config.py → webui/config.py} +0 -0
  154. /jettask/{webui_models → webui/models}/__init__.py +0 -0
  155. /jettask/{webui_models → webui/models}/namespace.py +0 -0
  156. /jettask/{services → webui/services}/alert_service.py +0 -0
  157. /jettask/{services → webui/services}/analytics_service.py +0 -0
  158. /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
  159. /jettask/{services → webui/services}/task_service.py +0 -0
  160. /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
  161. /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
  162. {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/WHEEL +0 -0
  163. {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/entry_points.txt +0 -0
  164. {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/licenses/LICENSE +0 -0
  165. {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,259 @@
1
+ """消息消费模块
2
+
3
+ 负责从Redis Stream队列中消费消息,并持久化到PostgreSQL。
4
+ """
5
+
6
+ import asyncio
7
+ import logging
8
+ import traceback
9
+ from typing import List, Dict
10
+ from collections import defaultdict
11
+
12
+ import redis.asyncio as redis
13
+ from redis.asyncio import Redis
14
+
15
+ from .task_persistence import TaskPersistence
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class MessageConsumer:
21
+ """消息消费器
22
+
23
+ 职责:
24
+ - 从Redis Stream队列消费消息
25
+ - 解析消息并持久化到数据库
26
+ - 管理多个队列的消费任务
27
+ - 处理错误重试和ACK
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ redis_client: Redis,
33
+ redis_prefix: str,
34
+ consumer_group: str,
35
+ consumer_id: str,
36
+ task_persistence: TaskPersistence,
37
+ queue_discovery: 'QueueDiscovery'
38
+ ):
39
+ """初始化消息消费器
40
+
41
+ Args:
42
+ redis_client: Redis异步客户端
43
+ redis_prefix: Redis键前缀
44
+ consumer_group: 消费者组名称
45
+ consumer_id: 消费者ID
46
+ task_persistence: 任务持久化处理器
47
+ queue_discovery: 队列发现器
48
+ """
49
+ self.redis_client = redis_client
50
+ self.redis_prefix = redis_prefix
51
+ self.consumer_group = consumer_group
52
+ self.consumer_id = consumer_id
53
+ self.task_persistence = task_persistence
54
+ self.queue_discovery = queue_discovery
55
+
56
+ # 错误计数器
57
+ self._consecutive_errors = defaultdict(int)
58
+
59
+ # 已处理任务ID缓存(用于优化查询)
60
+ self._processed_task_ids = set()
61
+ self._processed_ids_lock = asyncio.Lock()
62
+ self._processed_ids_max_size = 100000
63
+ self._processed_ids_cleanup_interval = 300
64
+
65
+ self._running = False
66
+ self._consume_task = None
67
+ self._queue_tasks = {}
68
+
69
+ async def start(self):
70
+ """启动消费器"""
71
+ self._running = True
72
+ self._consume_task = asyncio.create_task(self._consume_queues())
73
+ logger.debug("MessageConsumer started")
74
+
75
+ async def stop(self):
76
+ """停止消费器"""
77
+ self._running = False
78
+
79
+ if self._consume_task:
80
+ self._consume_task.cancel()
81
+ try:
82
+ await self._consume_task
83
+ except asyncio.CancelledError:
84
+ pass
85
+
86
+ # 取消所有队列任务
87
+ for task in self._queue_tasks.values():
88
+ task.cancel()
89
+
90
+ if self._queue_tasks:
91
+ await asyncio.gather(*self._queue_tasks.values(), return_exceptions=True)
92
+
93
+ logger.debug("MessageConsumer stopped")
94
+
95
+ async def _consume_queues(self):
96
+ """启动所有队列的消费任务"""
97
+ while self._running:
98
+ try:
99
+ # 获取已知队列
100
+ known_queues = self.queue_discovery.get_known_queues()
101
+
102
+ # 为每个队列启动消费任务
103
+ for queue in known_queues:
104
+ if queue not in self._queue_tasks or self._queue_tasks[queue].done():
105
+ self._queue_tasks[queue] = asyncio.create_task(self._consume_queue(queue))
106
+ logger.debug(f"Started consumer task for queue: {queue}")
107
+
108
+ # 移除不存在的队列任务
109
+ for queue in list(self._queue_tasks.keys()):
110
+ if queue not in known_queues:
111
+ self._queue_tasks[queue].cancel()
112
+ del self._queue_tasks[queue]
113
+ logger.debug(f"Stopped consumer task for removed queue: {queue}")
114
+
115
+ await asyncio.sleep(10)
116
+
117
+ except Exception as e:
118
+ logger.error(f"Error in consume_queues manager: {e}")
119
+ logger.error(traceback.format_exc())
120
+ await asyncio.sleep(5)
121
+
122
+ async def _consume_queue(self, queue_name: str):
123
+ """消费单个队列的任务(包括优先级队列)"""
124
+ # 判断是否是优先级队列
125
+ is_priority_queue = ':' in queue_name and queue_name.rsplit(':', 1)[-1].isdigit()
126
+
127
+ if is_priority_queue:
128
+ # 优先级队列格式:base_queue:priority (如 robust_bench2:2)
129
+ base_queue = queue_name.rsplit(':', 1)[0]
130
+ priority = queue_name.rsplit(':', 1)[1]
131
+ stream_key = f"{self.redis_prefix}:QUEUE:{base_queue}:{priority}"
132
+ else:
133
+ # 普通队列
134
+ stream_key = f"{self.redis_prefix}:QUEUE:{queue_name}"
135
+
136
+ logger.debug(f"Consuming queue: {queue_name}, stream_key: {stream_key}, is_priority: {is_priority_queue}")
137
+
138
+ check_backlog = True
139
+ lastid = "0-0"
140
+
141
+ # pg_consumer 应该使用统一的 consumer_id,而不是为每个队列创建新的
142
+ # 因为 pg_consumer 的职责是消费所有队列的消息并写入数据库
143
+ # 它不是真正的任务执行者,所以不需要为每个队列创建独立的 consumer
144
+ consumer_name = self.consumer_id
145
+
146
+ # ConsumerManager会自动处理离线worker的pending消息恢复
147
+ # 不需要手动恢复
148
+
149
+ while self._running and queue_name in self.queue_discovery.get_known_queues():
150
+ try:
151
+ myid = lastid if check_backlog else ">"
152
+
153
+ messages = await self.redis_client.xreadgroup(
154
+ self.consumer_group,
155
+ consumer_name, # 使用ConsumerManager管理的consumer_name
156
+ {stream_key: myid},
157
+ count=10000,
158
+ block=1000 if not check_backlog else 0
159
+ )
160
+
161
+ if not messages or (messages and len(messages[0][1]) == 0):
162
+ check_backlog = False
163
+ continue
164
+
165
+ if messages:
166
+ await self._process_messages(messages)
167
+ self._consecutive_errors[queue_name] = 0
168
+
169
+ if messages[0] and messages[0][1]:
170
+ lastid = messages[0][1][-1][0].decode('utf-8') if isinstance(messages[0][1][-1][0], bytes) else messages[0][1][-1][0]
171
+ check_backlog = len(messages[0][1]) >= 2000
172
+
173
+ except redis.ResponseError as e:
174
+ if "NOGROUP" in str(e):
175
+ try:
176
+ await self.redis_client.xgroup_create(
177
+ stream_key, self.consumer_group, id='0', mkstream=True
178
+ )
179
+ logger.debug(f"Recreated consumer group for queue: {queue_name}")
180
+ check_backlog = True
181
+ lastid = "0-0"
182
+ except:
183
+ pass
184
+ else:
185
+ logger.error(f"Redis error for queue {queue_name}: {e}")
186
+ logger.error(traceback.format_exc())
187
+ self._consecutive_errors[queue_name] += 1
188
+
189
+ if self._consecutive_errors[queue_name] > 10:
190
+ logger.debug(f"Too many errors for queue {queue_name}, will retry later")
191
+ await asyncio.sleep(30)
192
+ self._consecutive_errors[queue_name] = 0
193
+
194
+ except Exception as e:
195
+ logger.error(f"Error consuming queue {queue_name}: {e}", exc_info=True)
196
+ self._consecutive_errors[queue_name] += 1
197
+ await asyncio.sleep(1)
198
+
199
+ async def _process_messages(self, messages: List):
200
+ """处理消息并保存到PostgreSQL"""
201
+ tasks_to_insert = []
202
+ ack_batch = []
203
+
204
+ for stream_key, stream_messages in messages:
205
+ if not stream_messages:
206
+ continue
207
+
208
+ stream_key_str = stream_key.decode('utf-8') if isinstance(stream_key, bytes) else stream_key
209
+ msg_ids_to_ack = []
210
+
211
+ for msg_id, data in stream_messages:
212
+ try:
213
+ if not msg_id or not data:
214
+ continue
215
+
216
+ msg_id_str = msg_id.decode('utf-8') if isinstance(msg_id, bytes) else str(msg_id)
217
+
218
+ # 使用TaskPersistence解析消息
219
+ task_info = self.task_persistence.parse_stream_message(msg_id_str, data)
220
+ if task_info:
221
+ tasks_to_insert.append(task_info)
222
+ msg_ids_to_ack.append(msg_id)
223
+
224
+ except Exception as e:
225
+ logger.error(f"Error processing message {msg_id}: {e}")
226
+ logger.error(traceback.format_exc())
227
+
228
+ if msg_ids_to_ack:
229
+ ack_batch.append((stream_key, msg_ids_to_ack))
230
+
231
+ if tasks_to_insert:
232
+ # 使用TaskPersistence插入任务
233
+ inserted_count = await self.task_persistence.insert_tasks(tasks_to_insert)
234
+
235
+ # 将成功插入的任务ID添加到内存集合中
236
+ async with self._processed_ids_lock:
237
+ for task in tasks_to_insert:
238
+ self._processed_task_ids.add(task['id'])
239
+
240
+ # 如果集合过大,清理最早的一半
241
+ if len(self._processed_task_ids) > self._processed_ids_max_size:
242
+ # 只保留最新的一半ID
243
+ ids_list = list(self._processed_task_ids)
244
+ keep_count = self._processed_ids_max_size // 2
245
+ self._processed_task_ids = set(ids_list[-keep_count:])
246
+ logger.debug(f"Cleaned processed IDs cache, kept {keep_count} most recent IDs")
247
+
248
+ # ACK所有消息(即使部分插入失败,也要ACK,避免重复处理)
249
+ if ack_batch:
250
+ pipeline = self.redis_client.pipeline()
251
+ for stream_key, msg_ids in ack_batch:
252
+ pipeline.xack(stream_key, self.consumer_group, *msg_ids)
253
+
254
+ try:
255
+ await pipeline.execute()
256
+ total_acked = sum(len(msg_ids) for _, msg_ids in ack_batch)
257
+ logger.debug(f"Successfully ACKed {total_acked} messages")
258
+ except Exception as e:
259
+ logger.error(f"Error executing batch ACK: {e}")
@@ -11,152 +11,113 @@ from datetime import datetime, timedelta, timezone
11
11
  from typing import Dict, List, Optional, Tuple, Any
12
12
  import redis.asyncio as redis
13
13
  from sqlalchemy import text, bindparam
14
- from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
15
- from sqlalchemy.orm import sessionmaker
14
+ from sqlalchemy.ext.asyncio import AsyncSession
16
15
  import aiohttp
17
16
 
17
+ # 导入统一的数据库连接工具
18
+ from ..utils.db_connector import (
19
+ get_dual_mode_async_redis_client,
20
+ get_pg_engine_and_factory
21
+ )
22
+
18
23
  logger = logging.getLogger(__name__)
19
24
 
20
25
 
21
26
  class NamespaceConnection:
22
27
  """单个命名空间的数据库连接"""
23
-
28
+
24
29
  def __init__(self, namespace_name: str, redis_config: dict, pg_config: dict):
25
30
  self.namespace_name = namespace_name
26
31
  self.redis_config = redis_config
27
32
  self.pg_config = pg_config
28
33
  self.redis_prefix = namespace_name # 使用命名空间名作为Redis前缀
29
-
30
- # 数据库连接对象
34
+
35
+ # 使用全局单例连接池
36
+ self._text_redis_client: Optional[redis.Redis] = None
37
+ self._binary_redis_client: Optional[redis.Redis] = None
38
+ self._initialized = False
39
+
40
+ # PostgreSQL 相关
31
41
  self.async_engine = None
32
42
  self.AsyncSessionLocal = None
33
- self._redis_pool = None
34
- self._binary_redis_pool = None
35
- self._initialized = False
36
43
 
37
44
  async def initialize(self):
38
45
  """初始化数据库连接"""
39
46
  if self._initialized:
40
47
  return
41
-
48
+
42
49
  try:
43
- # 初始化PostgreSQL连接
50
+ # 初始化 PostgreSQL 连接(使用全局单例)
44
51
  if self.pg_config:
45
- dsn = self._build_pg_dsn()
46
- if dsn.startswith('postgresql://'):
47
- dsn = dsn.replace('postgresql://', 'postgresql+psycopg://', 1)
48
-
49
- print(f'{dsn=}')
50
- self.async_engine = create_async_engine(
51
- dsn,
52
+ self.async_engine, self.AsyncSessionLocal = get_pg_engine_and_factory(
53
+ config=self.pg_config,
52
54
  pool_size=10,
53
55
  max_overflow=5,
54
56
  pool_pre_ping=True,
55
57
  echo=False
56
58
  )
57
-
58
- self.AsyncSessionLocal = sessionmaker(
59
- bind=self.async_engine,
60
- class_=AsyncSession,
61
- expire_on_commit=False
62
- )
63
-
64
- # 初始化Redis连接池
59
+
60
+ # 初始化 Redis 连接(使用全局单例,双模式)
65
61
  if self.redis_config:
66
- # 支持两种格式:url格式或分离的host/port格式
67
- redis_url = self.redis_config.get('url')
68
- if redis_url:
69
- # 从URL创建连接池
70
- self._redis_pool = redis.ConnectionPool.from_url(
71
- redis_url,
72
- decode_responses=True,
73
- encoding='utf-8'
74
- )
75
-
76
- self._binary_redis_pool = redis.ConnectionPool.from_url(
77
- redis_url,
78
- decode_responses=False
79
- )
80
- else:
81
- # 从分离的配置创建连接池
82
- self._redis_pool = redis.ConnectionPool(
83
- host=self.redis_config.get('host', 'localhost'),
84
- port=self.redis_config.get('port', 6379),
85
- db=self.redis_config.get('db', 0),
86
- password=self.redis_config.get('password'),
87
- decode_responses=True,
88
- encoding='utf-8'
89
- )
90
-
91
- self._binary_redis_pool = redis.ConnectionPool(
92
- host=self.redis_config.get('host', 'localhost'),
93
- port=self.redis_config.get('port', 6379),
94
- db=self.redis_config.get('db', 0),
95
- password=self.redis_config.get('password'),
96
- decode_responses=False
97
- )
98
-
62
+ self._text_redis_client, self._binary_redis_client = get_dual_mode_async_redis_client(
63
+ redis_url=self.redis_config.get('url') if isinstance(self.redis_config, dict) else self.redis_config,
64
+ max_connections=50
65
+ )
66
+
99
67
  self._initialized = True
100
68
  logger.info(f"命名空间 {self.namespace_name} 数据库连接初始化成功")
101
-
69
+
102
70
  except Exception as e:
103
71
  logger.error(f"初始化命名空间 {self.namespace_name} 数据库连接失败: {e}")
104
72
  traceback.print_exc()
105
73
  raise
106
-
107
- def _build_pg_dsn(self) -> str:
108
- """构建PostgreSQL DSN"""
109
- config = self.pg_config
110
- # 支持两种格式:url格式或分离的配置
111
- if 'url' in config:
112
- return config['url']
113
- else:
114
- return f"postgresql://{config['user']}:{config['password']}@{config['host']}:{config['port']}/{config['database']}"
115
-
74
+
116
75
  async def get_redis_client(self, decode: bool = True) -> redis.Redis:
117
- """获取Redis客户端"""
76
+ """获取 Redis 客户端(使用全局单例)"""
118
77
  try:
119
78
  if not self._initialized:
120
79
  await self.initialize()
121
-
122
- pool = self._redis_pool if decode else self._binary_redis_pool
123
- if not pool:
124
- raise ValueError(f"命名空间 {self.namespace_name} 没有配置Redis")
125
-
126
- return redis.Redis(connection_pool=pool)
80
+
81
+ # 根据 decode 参数选择文本或二进制客户端
82
+ client = self._text_redis_client if decode else self._binary_redis_client
83
+ if not client:
84
+ raise ValueError(f"命名空间 {self.namespace_name} 没有配置 Redis")
85
+
86
+ return client
127
87
  except Exception as e:
128
88
  # 连接异常时重置初始化标志,允许重新初始化
129
- logger.error(f"获取Redis客户端失败: {e}")
89
+ logger.error(f"获取 Redis 客户端失败: {e}")
130
90
  traceback.print_exc()
131
91
  self._initialized = False
132
92
  raise
133
-
93
+
134
94
  async def get_pg_session(self) -> AsyncSession:
135
- """获取PostgreSQL会话"""
95
+ """获取 PostgreSQL 会话(使用全局单例)"""
136
96
  try:
137
97
  if not self._initialized:
138
98
  await self.initialize()
139
-
99
+
140
100
  if not self.AsyncSessionLocal:
141
- raise ValueError(f"命名空间 {self.namespace_name} 没有配置PostgreSQL")
142
-
101
+ raise ValueError(f"命名空间 {self.namespace_name} 没有配置 PostgreSQL")
102
+
143
103
  return self.AsyncSessionLocal()
144
104
  except Exception as e:
145
105
  # 连接异常时重置初始化标志,允许重新初始化
146
- logger.error(f"获取PostgreSQL会话失败: {e}")
106
+ logger.error(f"获取 PostgreSQL 会话失败: {e}")
147
107
  traceback.print_exc()
148
108
  self._initialized = False
149
109
  raise
150
-
110
+
151
111
  async def close(self):
152
- """关闭数据库连接"""
153
- if self._redis_pool:
154
- await self._redis_pool.aclose()
155
- if self._binary_redis_pool:
156
- await self._binary_redis_pool.aclose()
157
- if self.async_engine:
158
- await self.async_engine.dispose()
159
-
112
+ """关闭数据库连接(由于使用全局单例,这里只重置状态)"""
113
+ # 注意:连接池由全局单例管理,这里只清理引用
114
+ self._text_redis_client = None
115
+ self._binary_redis_client = None
116
+
117
+ # PostgreSQL engine 也是全局单例,只清理引用
118
+ self.async_engine = None
119
+ self.AsyncSessionLocal = None
120
+
160
121
  self._initialized = False
161
122
  logger.info(f"命名空间 {self.namespace_name} 数据库连接已关闭")
162
123
 
@@ -331,12 +292,19 @@ class NamespaceJetTaskDataAccess:
331
292
  redis_client = await conn.get_redis_client()
332
293
 
333
294
  try:
334
- # 获取所有队列
335
- queue_pattern = f"{conn.redis_prefix}:QUEUE:*"
336
- print(f'{queue_pattern=}')
337
- queue_keys = []
338
- async for key in redis_client.scan_iter(match=queue_pattern):
339
- queue_keys.append(key)
295
+ # 使用 RegistryManager 获取所有队列,避免 SCAN
296
+ from jettask.messaging.registry import QueueRegistry
297
+ registry = QueueRegistry(
298
+ redis_client=None,
299
+ async_redis_client=redis_client,
300
+ redis_prefix=conn.redis_prefix
301
+ )
302
+
303
+ # 获取所有队列名称
304
+ queue_names = await registry.get_all_queues()
305
+
306
+ # 构建完整的队列键
307
+ queue_keys = [f"{conn.redis_prefix}:QUEUE:{queue_name}" for queue_name in queue_names]
340
308
 
341
309
  stats = []
342
310
  for queue_key in queue_keys:
@@ -0,0 +1,196 @@
1
+ """离线Worker恢复模块
2
+
3
+ 负责恢复离线PG_CONSUMER的消息,包括TASK_CHANGES流的离线消息。
4
+ """
5
+
6
+ import asyncio
7
+ import logging
8
+ import msgpack
9
+ import traceback
10
+ from typing import Optional
11
+
12
+ from redis.asyncio import Redis
13
+ from jettask.worker.recovery import OfflineWorkerRecovery
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class OfflineRecoveryHandler:
19
+ """离线Worker恢复处理器
20
+
21
+ 职责:
22
+ - 启动离线worker恢复服务
23
+ - 恢复TASK_CHANGES stream的离线消息
24
+ - 处理恢复的消息并更新任务状态
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ redis_client: Redis,
30
+ redis_prefix: str,
31
+ consumer_id: str,
32
+ task_updater: 'TaskUpdater' # 类型提示使用字符串避免循环导入
33
+ ):
34
+ """初始化离线恢复处理器
35
+
36
+ Args:
37
+ redis_client: Redis异步客户端
38
+ redis_prefix: Redis键前缀
39
+ consumer_id: 消费者ID
40
+ task_updater: 任务更新器实例(用于处理恢复的消息)
41
+ """
42
+ self.redis_client = redis_client
43
+ self.redis_prefix = redis_prefix
44
+ self.consumer_id = consumer_id
45
+ self.task_updater = task_updater
46
+
47
+ # 创建 WorkerState 实例(用于查询 Worker 状态)
48
+ from jettask.worker.manager import WorkerState
49
+ self.worker_state = WorkerState(
50
+ redis_client=None, # persistence 模块使用异步客户端
51
+ async_redis_client=redis_client,
52
+ redis_prefix=redis_prefix
53
+ )
54
+
55
+ # 创建离线worker恢复器(用于恢复TASK_CHANGES stream的离线消息)
56
+ # 注意:这里不传入consumer_manager,因为需要在start时初始化
57
+ self.offline_recovery = None
58
+
59
+ self._running = False
60
+ self._recovery_task = None
61
+
62
+ def set_consumer_manager(self, consumer_manager):
63
+ """设置ConsumerManager(延迟初始化)
64
+
65
+ Args:
66
+ consumer_manager: ConsumerManager实例
67
+ """
68
+ self.offline_recovery = OfflineWorkerRecovery(
69
+ async_redis_client=self.redis_client,
70
+ redis_prefix=self.redis_prefix,
71
+ worker_prefix='PG_CONSUMER', # 使用PG_CONSUMER前缀
72
+ consumer_manager=consumer_manager,
73
+ worker_state=self.worker_state # 传入在 __init__ 中创建的 WorkerState
74
+ )
75
+
76
+ async def start(self):
77
+ """启动离线恢复服务"""
78
+ if not self.offline_recovery:
79
+ logger.warning("OfflineRecovery not initialized, please call set_consumer_manager first")
80
+ return
81
+
82
+ self._running = True
83
+ self._recovery_task = asyncio.create_task(self._recovery_loop())
84
+ logger.debug("OfflineRecoveryHandler started")
85
+
86
+ async def stop(self):
87
+ """停止离线恢复服务"""
88
+ self._running = False
89
+
90
+ if self.offline_recovery:
91
+ self.offline_recovery.stop() # stop() 不是异步方法
92
+
93
+ if self._recovery_task:
94
+ self._recovery_task.cancel()
95
+ try:
96
+ await self._recovery_task
97
+ except asyncio.CancelledError:
98
+ pass
99
+
100
+ logger.debug("OfflineRecoveryHandler stopped")
101
+
102
+ async def _recovery_loop(self):
103
+ """离线恢复循环"""
104
+ while self._running:
105
+ try:
106
+ total_recovered = 0
107
+
108
+ # 恢复TASK_CHANGES stream的消息
109
+ recovered = await self._recover_task_changes_offline_messages()
110
+ if recovered > 0:
111
+ logger.debug(f"Recovered {recovered} TASK_CHANGES messages")
112
+ total_recovered += recovered
113
+
114
+ if total_recovered > 0:
115
+ logger.debug(f"Total recovered {total_recovered} messages in this cycle")
116
+
117
+ # 每1秒扫描一次
118
+ await asyncio.sleep(1)
119
+
120
+ except Exception as e:
121
+ logger.error(f"Error in offline recovery service: {e}")
122
+ await asyncio.sleep(10)
123
+
124
+ async def _recover_task_changes_offline_messages(self) -> int:
125
+ """恢复TASK_CHANGES stream的离线消息"""
126
+ # 使用 OfflineWorkerRecovery 的标准接口
127
+ try:
128
+ # 为TASK_CHANGES定义自定义的队列格式化器
129
+ def task_changes_formatter(queue):
130
+ # 对于TASK_CHANGES,直接返回stream key(不加QUEUE:前缀)
131
+ if queue == 'TASK_CHANGES':
132
+ return f"{self.redis_prefix}:TASK_CHANGES"
133
+ else:
134
+ return f"{self.redis_prefix}:QUEUE:{queue}"
135
+
136
+ # 创建专门用于TASK_CHANGES的恢复器
137
+ task_changes_recovery = OfflineWorkerRecovery(
138
+ async_redis_client=self.redis_client,
139
+ redis_prefix=self.redis_prefix,
140
+ worker_prefix='PG_CONSUMER',
141
+ queue_formatter=task_changes_formatter,
142
+ worker_state=self.worker_state # 传入在 __init__ 中创建的 WorkerState
143
+ )
144
+
145
+ # 调用标准的恢复方法
146
+ # TASK_CHANGES作为队列名传入,会被正确处理
147
+ recovered = await task_changes_recovery.recover_offline_workers(
148
+ queue='TASK_CHANGES', # 这个队列名会用于查找离线worker
149
+ current_consumer_name=self.consumer_id,
150
+ process_message_callback=self._process_recovered_task_change_v2
151
+ )
152
+
153
+ return recovered
154
+
155
+ except Exception as e:
156
+ logger.error(f"Error in recover_task_changes_offline_messages: {e}")
157
+ return 0
158
+
159
+ async def _process_recovered_task_change_v2(self, msg_id, msg_data, queue, consumer_id):
160
+ """处理恢复的TASK_CHANGES消息(符合OfflineWorkerRecovery的回调接口)"""
161
+ try:
162
+ logger.debug(f'处理恢复的TASK_CHANGES消息(符合OfflineWorkerRecovery的回调接口) {msg_data=}')
163
+ # 解析消息 - 现在使用task_id而不是event_id
164
+ if b'task_id' in msg_data:
165
+ # 使用msgpack解压task_id
166
+ compressed_task_id = msg_data[b'task_id']
167
+ task_key = msgpack.unpackb(compressed_task_id)
168
+ task_key = task_key.decode('utf-8') if isinstance(task_key, bytes) else str(task_key)
169
+
170
+ # 从完整的task_key格式提取stream_id
171
+ # 格式: namespace:TASK:stream_id:queue_name
172
+ stream_id = None
173
+ if ':TASK:' in task_key:
174
+ parts = task_key.split(':TASK:')
175
+ if len(parts) == 2:
176
+ # 再从右边部分提取stream_id
177
+ right_parts = parts[1].split(':')
178
+ if right_parts:
179
+ stream_id = right_parts[0] # 提取stream_id
180
+
181
+ if stream_id:
182
+ logger.debug(f"Processing recovered TASK_CHANGES message: {stream_id} from offline worker {consumer_id}")
183
+ # 更新任务状态 - 传入(stream_id, task_key)元组
184
+ # 使用task_updater的内部方法
185
+ await self.task_updater._update_tasks_by_event([(stream_id, task_key)])
186
+ else:
187
+ logger.warning(f"Cannot extract stream_id from task_key: {task_key}")
188
+
189
+ # ACK消息
190
+ change_stream_key = f"{self.redis_prefix}:TASK_CHANGES"
191
+ consumer_group = f"{self.redis_prefix}_changes_consumer"
192
+ await self.redis_client.xack(change_stream_key, consumer_group, msg_id)
193
+
194
+ except Exception as e:
195
+ logger.error(f"Error processing recovered task change {msg_id}: {e}")
196
+ logger.error(traceback.format_exc())