jettask 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. jettask/constants.py +213 -0
  2. jettask/core/app.py +525 -205
  3. jettask/core/cli.py +193 -185
  4. jettask/core/consumer_manager.py +126 -34
  5. jettask/core/context.py +3 -0
  6. jettask/core/enums.py +137 -0
  7. jettask/core/event_pool.py +501 -168
  8. jettask/core/message.py +147 -0
  9. jettask/core/offline_worker_recovery.py +181 -114
  10. jettask/core/task.py +10 -174
  11. jettask/core/task_batch.py +153 -0
  12. jettask/core/unified_manager_base.py +243 -0
  13. jettask/core/worker_scanner.py +54 -54
  14. jettask/executors/asyncio.py +184 -64
  15. jettask/webui/backend/config.py +51 -0
  16. jettask/webui/backend/data_access.py +2083 -92
  17. jettask/webui/backend/data_api.py +3294 -0
  18. jettask/webui/backend/dependencies.py +261 -0
  19. jettask/webui/backend/init_meta_db.py +158 -0
  20. jettask/webui/backend/main.py +1358 -69
  21. jettask/webui/backend/main_unified.py +78 -0
  22. jettask/webui/backend/main_v2.py +394 -0
  23. jettask/webui/backend/namespace_api.py +295 -0
  24. jettask/webui/backend/namespace_api_old.py +294 -0
  25. jettask/webui/backend/namespace_data_access.py +611 -0
  26. jettask/webui/backend/queue_backlog_api.py +727 -0
  27. jettask/webui/backend/queue_stats_v2.py +521 -0
  28. jettask/webui/backend/redis_monitor_api.py +476 -0
  29. jettask/webui/backend/unified_api_router.py +1601 -0
  30. jettask/webui/db_init.py +204 -32
  31. jettask/webui/frontend/package-lock.json +492 -1
  32. jettask/webui/frontend/package.json +4 -1
  33. jettask/webui/frontend/src/App.css +105 -7
  34. jettask/webui/frontend/src/App.jsx +49 -20
  35. jettask/webui/frontend/src/components/NamespaceSelector.jsx +166 -0
  36. jettask/webui/frontend/src/components/QueueBacklogChart.jsx +298 -0
  37. jettask/webui/frontend/src/components/QueueBacklogTrend.jsx +638 -0
  38. jettask/webui/frontend/src/components/QueueDetailsTable.css +65 -0
  39. jettask/webui/frontend/src/components/QueueDetailsTable.jsx +487 -0
  40. jettask/webui/frontend/src/components/QueueDetailsTableV2.jsx +465 -0
  41. jettask/webui/frontend/src/components/ScheduledTaskFilter.jsx +423 -0
  42. jettask/webui/frontend/src/components/TaskFilter.jsx +425 -0
  43. jettask/webui/frontend/src/components/TimeRangeSelector.css +21 -0
  44. jettask/webui/frontend/src/components/TimeRangeSelector.jsx +160 -0
  45. jettask/webui/frontend/src/components/layout/AppLayout.css +95 -0
  46. jettask/webui/frontend/src/components/layout/AppLayout.jsx +49 -0
  47. jettask/webui/frontend/src/components/layout/Header.css +34 -10
  48. jettask/webui/frontend/src/components/layout/Header.jsx +31 -23
  49. jettask/webui/frontend/src/components/layout/SideMenu.css +137 -0
  50. jettask/webui/frontend/src/components/layout/SideMenu.jsx +209 -0
  51. jettask/webui/frontend/src/components/layout/TabsNav.css +244 -0
  52. jettask/webui/frontend/src/components/layout/TabsNav.jsx +206 -0
  53. jettask/webui/frontend/src/components/layout/UserInfo.css +197 -0
  54. jettask/webui/frontend/src/components/layout/UserInfo.jsx +197 -0
  55. jettask/webui/frontend/src/contexts/NamespaceContext.jsx +72 -0
  56. jettask/webui/frontend/src/contexts/TabsContext.backup.jsx +245 -0
  57. jettask/webui/frontend/src/main.jsx +1 -0
  58. jettask/webui/frontend/src/pages/Alerts.jsx +684 -0
  59. jettask/webui/frontend/src/pages/Dashboard.jsx +1330 -0
  60. jettask/webui/frontend/src/pages/QueueDetail.jsx +1109 -10
  61. jettask/webui/frontend/src/pages/QueueMonitor.jsx +236 -115
  62. jettask/webui/frontend/src/pages/Queues.jsx +5 -1
  63. jettask/webui/frontend/src/pages/ScheduledTasks.jsx +809 -0
  64. jettask/webui/frontend/src/pages/Settings.jsx +800 -0
  65. jettask/webui/frontend/src/services/api.js +7 -5
  66. jettask/webui/frontend/src/utils/suppressWarnings.js +22 -0
  67. jettask/webui/frontend/src/utils/userPreferences.js +154 -0
  68. jettask/webui/multi_namespace_consumer.py +543 -0
  69. jettask/webui/pg_consumer.py +983 -246
  70. jettask/webui/static/dist/assets/index-7129cfe1.css +1 -0
  71. jettask/webui/static/dist/assets/index-8d1935cc.js +774 -0
  72. jettask/webui/static/dist/index.html +2 -2
  73. jettask/webui/task_center.py +216 -0
  74. jettask/webui/task_center_client.py +150 -0
  75. jettask/webui/unified_consumer_manager.py +193 -0
  76. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/METADATA +1 -1
  77. jettask-0.2.4.dist-info/RECORD +134 -0
  78. jettask/webui/pg_consumer_slow.py +0 -1099
  79. jettask/webui/pg_consumer_test.py +0 -678
  80. jettask/webui/static/dist/assets/index-823408e8.css +0 -1
  81. jettask/webui/static/dist/assets/index-9968b0b8.js +0 -543
  82. jettask/webui/test_pg_consumer_recovery.py +0 -547
  83. jettask/webui/test_recovery_simple.py +0 -492
  84. jettask/webui/test_self_recovery.py +0 -467
  85. jettask-0.2.1.dist-info/RECORD +0 -91
  86. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/WHEEL +0 -0
  87. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/entry_points.txt +0 -0
  88. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/licenses/LICENSE +0 -0
  89. {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,6 @@ from ..utils.serializer import dumps_str, loads_str
2
2
  import time
3
3
  import threading
4
4
  import logging
5
- import contextlib
6
5
  import asyncio
7
6
  from collections import defaultdict, deque, Counter
8
7
  from typing import List, Optional, TYPE_CHECKING, Union
@@ -68,6 +67,12 @@ class EventPool(object):
68
67
  # 创建带前缀的队列名称映射
69
68
  self.prefixed_queues = {}
70
69
 
70
+ # 优先级队列管理
71
+ self.priority_queues = {} # {base_queue: [queue:1, queue:2, ...]}
72
+ self.priority_queues_lock = threading.Lock()
73
+ self._last_priority_scan = {} # 记录上次扫描时间
74
+ self._priority_scan_interval = 5 # 5秒扫描一次,及时发现新的优先级队列
75
+
71
76
  # 用于跟踪广播消息
72
77
  self._broadcast_message_tracker = {}
73
78
 
@@ -117,6 +122,7 @@ class EventPool(object):
117
122
  """为队列名称添加前缀"""
118
123
  return f"{self.redis_prefix}:QUEUE:{queue}"
119
124
 
125
+
120
126
  def get_redis_client(self, asyncio: bool = False, binary: bool = False):
121
127
  """获取Redis客户端
122
128
 
@@ -128,84 +134,62 @@ class EventPool(object):
128
134
  return self.async_binary_redis_client if asyncio else self.binary_redis_client
129
135
  return self.async_redis_client if asyncio else self.redis_client
130
136
 
131
- def create_group(self):
132
- """创建消费组 - 现在consumer group在listen_event_by_task中动态创建"""
133
- for queue in self.queues:
134
- prefixed_queue = self.get_prefixed_queue_name(queue)
135
-
136
- # 创建默认消费组(用于兼容性)
137
- with contextlib.suppress(Exception):
138
- prefixed_queue_bytes = prefixed_queue.encode() if isinstance(prefixed_queue, str) else prefixed_queue
139
- self.binary_redis_client.xgroup_create(
140
- name=prefixed_queue_bytes,
141
- groupname=prefixed_queue_bytes,
142
- id=b"0",
143
- mkstream=True,
144
- )
145
-
146
- def send_event(self, queue, message: dict, asyncio: bool = False):
147
- # 使用二进制客户端进行Stream操作
148
- client = self.get_redis_client(asyncio, binary=True)
149
- prefixed_queue = self.get_prefixed_queue_name(queue)
150
- # 确保键是bytes类型,但值不需要编码(msgpack已经返回二进制)
151
- prefixed_queue_bytes = prefixed_queue.encode() if isinstance(prefixed_queue, str) else prefixed_queue
152
- stream_data = {b'data': dumps_str(message)} # 使用bytes键
153
- try:
154
- event_id = client.xadd(prefixed_queue_bytes, stream_data)
155
- # 将bytes类型的event_id转换为字符串返回
156
- if isinstance(event_id, bytes):
157
- event_id = event_id.decode('utf-8')
158
- return event_id
159
- except redis.exceptions.ResponseError as e:
160
- # 如果队列不存在,创建它
161
- if "ERR" in str(e):
162
- logger.warning(f'队列 {prefixed_queue} 不存在,正在创建...')
163
- try:
164
- # 先创建队列
165
- event_id = client.xadd(prefixed_queue_bytes, stream_data)
166
- # 再创建消费者组
167
- with contextlib.suppress(Exception):
168
- client.xgroup_create(
169
- name=prefixed_queue_bytes,
170
- groupname=prefixed_queue_bytes,
171
- id=b"0"
172
- )
173
- # 将bytes类型的event_id转换为字符串返回
174
- if isinstance(event_id, bytes):
175
- event_id = event_id.decode('utf-8')
176
- return event_id
177
- except Exception as create_error:
178
- logger.error(f'创建队列失败: {create_error}')
179
- raise
180
- else:
181
- raise
182
-
183
- def batch_send_event(self, queue, messages: List[dict], asyncio: bool = False):
184
- # 使用二进制客户端进行Stream操作
185
- client = self.get_redis_client(asyncio, binary=True)
186
- pipe = client.pipeline()
187
- prefixed_queue = self.get_prefixed_queue_name(queue)
188
- prefixed_queue_bytes = prefixed_queue.encode() if isinstance(prefixed_queue, str) else prefixed_queue
189
- if asyncio:
190
- return self._batch_send_event(prefixed_queue_bytes, messages, pipe)
191
- for message in messages:
192
- # 确保消息格式正确
193
- if 'data' in message:
194
- binary_message = {b'data': message['data'] if isinstance(message['data'], bytes) else dumps_str(message['data'])}
195
- else:
196
- binary_message = message
197
- pipe.xadd(prefixed_queue_bytes, binary_message)
198
- return pipe.execute()
199
137
 
200
138
  async def _batch_send_event(self, prefixed_queue, messages: List[dict], pipe):
139
+ # 使用Lua脚本批量发送消息并添加自增offset
140
+ lua_script = """
141
+ local stream_key = KEYS[1]
142
+ local prefix = ARGV[1]
143
+ local results = {}
144
+
145
+ -- 使用Hash存储所有队列的offset
146
+ local offsets_hash = prefix .. ':QUEUE_OFFSETS'
147
+
148
+ -- 从stream_key中提取队列名(去掉prefix:QUEUE:前缀)
149
+ local queue_name = string.gsub(stream_key, '^' .. prefix .. ':QUEUE:', '')
150
+
151
+ -- 从ARGV[2]开始,每个参数是一个消息的data
152
+ for i = 2, #ARGV do
153
+ local data = ARGV[i]
154
+
155
+ -- 使用HINCRBY原子递增offset(如果不存在会自动创建并设为1)
156
+ local current_offset = redis.call('HINCRBY', offsets_hash, queue_name, 1)
157
+
158
+ -- 添加消息到Stream(包含offset字段)
159
+ local stream_id = redis.call('XADD', stream_key, '*',
160
+ 'data', data,
161
+ 'offset', current_offset)
162
+
163
+ table.insert(results, stream_id)
164
+ end
165
+
166
+ return results
167
+ """
168
+
169
+ # 准备Lua脚本参数
170
+ lua_args = [self.redis_prefix.encode() if isinstance(self.redis_prefix, str) else self.redis_prefix]
171
+
201
172
  for message in messages:
202
173
  # 确保消息格式正确
203
174
  if 'data' in message:
204
- binary_message = {b'data': message['data'] if isinstance(message['data'], bytes) else dumps_str(message['data'])}
175
+ data = message['data'] if isinstance(message['data'], bytes) else dumps_str(message['data'])
205
176
  else:
206
- binary_message = message
207
- await pipe.xadd(prefixed_queue, binary_message)
208
- return await pipe.execute()
177
+ data = dumps_str(message)
178
+ lua_args.append(data)
179
+
180
+ # 获取异步Redis客户端(不使用pipe,直接使用client)
181
+ client = self.get_redis_client(asyncio=True, binary=True)
182
+
183
+ # 执行Lua脚本
184
+ results = await client.eval(
185
+ lua_script,
186
+ 1, # 1个KEY
187
+ prefixed_queue, # KEY[1]: stream key
188
+ *lua_args # ARGV: prefix, data1, data2, ...
189
+ )
190
+
191
+ # 解码所有返回的Stream ID
192
+ return [r.decode('utf-8') if isinstance(r, bytes) else r for r in results]
209
193
 
210
194
  def is_urgent(self, routing_key):
211
195
  is_urgent = self.solo_urgent_retry.get(routing_key, False)
@@ -213,6 +197,107 @@ class EventPool(object):
213
197
  del self.solo_urgent_retry[routing_key]
214
198
  return is_urgent
215
199
 
200
+ async def scan_priority_queues(self, base_queue: str) -> list:
201
+ """扫描Redis中的优先级队列
202
+
203
+ Args:
204
+ base_queue: 基础队列名(不带优先级后缀)
205
+
206
+ Returns:
207
+ 按优先级排序的队列列表
208
+ """
209
+ pattern = f"{self.redis_prefix}:QUEUE:{base_queue}:*"
210
+
211
+ try:
212
+ # 使用SCAN命令扫描匹配的键
213
+ cursor = 0
214
+ priority_queues = set()
215
+
216
+ while True:
217
+ cursor, keys = await self.async_binary_redis_client.scan(
218
+ cursor=cursor,
219
+ match=pattern.encode() if isinstance(pattern, str) else pattern,
220
+ count=100
221
+ )
222
+
223
+ for key in keys:
224
+ # 解析键名获取优先级
225
+ key_str = key.decode() if isinstance(key, bytes) else key
226
+ # 提取优先级后缀
227
+ parts = key_str.split(':')
228
+ if len(parts) >= 4: # jettask:QUEUE:base_queue:priority
229
+ queue_with_priority = ':'.join(parts[2:]) # base_queue:priority
230
+ priority_queues.add(queue_with_priority)
231
+
232
+ if cursor == 0:
233
+ break
234
+
235
+ # 添加基础队列(无优先级)
236
+ priority_queues.add(base_queue)
237
+
238
+ # 按优先级排序(数字越小优先级越高)
239
+ sorted_queues = []
240
+ for q in priority_queues:
241
+ if ':' in q:
242
+ base, priority = q.rsplit(':', 1)
243
+ if base == base_queue and priority.isdigit():
244
+ sorted_queues.append((int(priority), q))
245
+ else:
246
+ sorted_queues.append((float('inf'), q)) # 非数字优先级放最后
247
+ else:
248
+ sorted_queues.append((float('inf'), q)) # 无优先级放最后
249
+
250
+ sorted_queues.sort(key=lambda x: x[0])
251
+ return [q[1] for q in sorted_queues]
252
+
253
+ except Exception as e:
254
+ logger.error(f"Error scanning priority queues for {base_queue}: {e}")
255
+ return [base_queue] # 返回基础队列作为fallback
256
+
257
+ async def update_priority_queues_cache(self, base_queue: str):
258
+ """更新优先级队列缓存
259
+
260
+ Args:
261
+ base_queue: 基础队列名
262
+ """
263
+ current_time = time.time()
264
+
265
+ # 检查是否需要重新扫描
266
+ last_scan = self._last_priority_scan.get(base_queue, 0)
267
+ if current_time - last_scan < self._priority_scan_interval:
268
+ return # 未到扫描时间
269
+
270
+ # 扫描并更新缓存
271
+ priority_queues = await self.scan_priority_queues(base_queue)
272
+
273
+ with self.priority_queues_lock:
274
+ self.priority_queues[base_queue] = priority_queues
275
+ self._last_priority_scan[base_queue] = current_time
276
+
277
+ # if len(priority_queues) > 1:
278
+ # logger.info(f"Updated priority queues for {base_queue}: {priority_queues}")
279
+
280
+ def get_priority_queues(self, base_queue: str) -> list:
281
+ """获取优先级队列列表(从缓存)
282
+
283
+ Args:
284
+ base_queue: 基础队列名
285
+
286
+ Returns:
287
+ 优先级队列列表(已加上前缀)
288
+ """
289
+ with self.priority_queues_lock:
290
+ queues = self.priority_queues.get(base_queue, [])
291
+ # 返回已加上前缀的队列名
292
+ result = []
293
+ for q in queues:
294
+ if q == base_queue:
295
+ continue # 跳过基础队列,在listen_event_by_task中已经处理
296
+ # 只返回优先级队列(带优先级后缀的)
297
+ if ':' in q and q.rsplit(':', 1)[1].isdigit():
298
+ result.append(f"{self.redis_prefix}:QUEUE:{q}")
299
+ return result
300
+
216
301
  @classmethod
217
302
  def separate_by_key(cls, lst):
218
303
  groups = {}
@@ -456,7 +541,7 @@ class EventPool(object):
456
541
  consecutive_errors += 1
457
542
  return False, consecutive_errors
458
543
 
459
- def _process_message_common(self, event_id: str, event_data: dict, queue: str, event_queue, is_async: bool = False, consumer_name: str = None):
544
+ def _process_message_common(self, event_id: str, event_data: dict, queue: str, event_queue, is_async: bool = False, consumer_name: str = None, group_name: str = None):
460
545
  """通用的消息处理逻辑,供同步和异步版本使用"""
461
546
  # 检查消息是否已被认领,防止重复处理
462
547
  if event_id in self._claimed_message_ids:
@@ -491,18 +576,17 @@ class EventPool(object):
491
576
 
492
577
  routing = final_event_data.get("routing")
493
578
 
494
- # 从consumer_name中提取group_name
495
- # consumer_name格式: "YYDG-xxx:task_name"
496
- # group_name格式: "prefixed_queue:task_name"
497
- group_name = None
498
- if consumer_name and ':' in consumer_name:
499
- # 从consumer_name中提取task_name
500
- task_name_part = consumer_name.split(':', 1)[1]
579
+ # 从消息体中获取实际的队列名(可能包含优先级后缀)
580
+ # 这确保ACK使用正确的stream key
581
+ actual_queue = final_event_data.get('queue', queue)
582
+
583
+ # 如果没有传入group_name,使用默认值(prefixed_queue)
584
+ if not group_name:
501
585
  prefixed_queue = self.get_prefixed_queue_name(queue)
502
- group_name = f"{prefixed_queue}:{task_name_part}"
586
+ group_name = prefixed_queue
503
587
 
504
588
  task_item = {
505
- "queue": queue,
589
+ "queue": actual_queue, # 使用消息体中的实际队列名(可能包含优先级)
506
590
  "event_id": actual_event_id,
507
591
  "event_data": final_event_data, # 使用解析后的数据
508
592
  "consumer": consumer_name, # 添加消费者信息
@@ -573,11 +657,19 @@ class EventPool(object):
573
657
  )
574
658
 
575
659
  # 等待consumer manager初始化
660
+ # 对于优先级队列,使用基础队列名来获取consumer
661
+ base_queue = queue
662
+ if ':' in queue and queue.rsplit(':', 1)[-1].isdigit():
663
+ base_queue = queue.rsplit(':', 1)[0]
664
+
576
665
  wait_times = [0.1, 0.2, 0.4, 0.8, 1.6, 3.2]
577
666
  for wait_time in wait_times:
578
667
  try:
579
- current_consumer = self.consumer_manager.get_consumer_name(queue)
668
+ current_consumer = self.consumer_manager.get_consumer_name(base_queue)
580
669
  if current_consumer:
670
+ # 对于优先级队列,consumer名称需要添加队列后缀
671
+ if base_queue != queue:
672
+ current_consumer = f"{current_consumer}:{queue.rsplit(':', 1)[-1]}"
581
673
  logger.info(f"Consumer manager initialized for queue {queue}, consumer: {current_consumer}")
582
674
  break
583
675
  except Exception as e:
@@ -587,19 +679,18 @@ class EventPool(object):
587
679
  logger.info(f"Offline worker processor for queue {queue} is now active")
588
680
 
589
681
  # 扫描间隔
590
- scan_interval = 1 # 每30秒扫描一次
682
+ scan_interval = 2 # 每30秒扫描一次
591
683
 
592
684
  while not self._stop_reading:
593
685
  try:
594
- # 调用恢复模块进行恢复
595
- # 不传递process_message_callback,让它使用event_queue
686
+ # 直接恢复所有任务(每个task都有独立的consumer group)
596
687
  recovered = await recovery.recover_offline_workers(
597
688
  queue=queue,
598
- event_queue=event_queue
689
+ event_queue=event_queue,
690
+ current_consumer_name=current_consumer
599
691
  )
600
-
601
692
  if recovered > 0:
602
- logger.info(f"Recovered {recovered} messages for queue {queue}")
693
+ logger.info(f"Recovered {recovered} messages on queue {queue}")
603
694
 
604
695
  except Exception as e:
605
696
  import traceback
@@ -613,9 +704,8 @@ class EventPool(object):
613
704
 
614
705
 
615
706
  async def listening_event(self, event_queue: asyncio.Queue, prefetch_multiplier: int = 1):
616
- """监听事件 - 每个task都有独立的consumer group,天然支持广播"""
707
+ """监听事件 - 为每个task创建独立的consumer group"""
617
708
 
618
- # 创建一个字典来存储每个队列的延迟任务 - 使用list + Lock更高效
619
709
  # 创建一个字典来存储每个队列的延迟任务 - 使用list + Lock更高效
620
710
  delayed_tasks_lists = {}
621
711
  delayed_tasks_locks = {}
@@ -623,6 +713,8 @@ class EventPool(object):
623
713
  delayed_tasks_lists[queue] = []
624
714
  delayed_tasks_locks[queue] = asyncio.Lock()
625
715
 
716
+ # group信息将在每个task监听时记录
717
+
626
718
  async def scan_delayed_tasks_for_queue(queue: str, task_list: list, task_lock: asyncio.Lock):
627
719
  """为单个队列独立扫描延迟任务"""
628
720
  base_interval = self.consumer_config.get('scan_interval', 0.05) # 基础间隔50ms
@@ -664,26 +756,98 @@ class EventPool(object):
664
756
 
665
757
  async def listen_event_by_task(queue, task_name):
666
758
  """为单个任务监听事件"""
667
- check_backlog = True
668
- lastid = "0-0"
759
+ # 为每个队列单独管理check_backlog状态
760
+ check_backlog = {} # {queue_name: bool}
761
+ lastid = {} # 每个队列的lastid
669
762
  consecutive_errors = 0
670
763
  max_consecutive_errors = 5
671
764
 
672
- # 使用task_name作为consumer group
765
+ # 获取任务对象
766
+ task = self.app._tasks.get(task_name)
767
+ if not task:
768
+ logger.error(f"Task {task_name} not found")
769
+ return
770
+
771
+ # 定义必要的变量
673
772
  prefixed_queue = self.get_prefixed_queue_name(queue)
773
+ # 使用函数名作为group_name,实现任务隔离
674
774
  group_name = f"{prefixed_queue}:{task_name}"
775
+ print(f'{group_name=} {task_name=}')
776
+ consumer_name = self.consumer_manager.get_consumer_name(queue)
777
+
778
+ # 记录group信息到worker hash表
779
+ if self.consumer_manager:
780
+ await self.consumer_manager.record_group_info_async(queue, task_name, group_name, consumer_name)
781
+
782
+ # 初始化优先级队列扫描
783
+ await self.update_priority_queues_cache(queue)
675
784
 
676
- # 使用consumer_manager获取consumer名称,以便启动心跳等功能
677
- # print(f'listening_event {queue=}')
678
- managed_consumer_name = self.consumer_manager.get_consumer_name(queue)
679
- # 但是为了保持每个task独立的consumer,还是需要加上task_name
680
- consumer_name = f"{managed_consumer_name}:{task_name}"
785
+ # 获取所有优先级队列(包括默认队列)
786
+ priority_queues = self.get_priority_queues(queue)
787
+ all_queues = [prefixed_queue] + priority_queues # 默认队列 + 优先级队列
788
+
789
+ # 为每个队列创建consumer group(如果不存在)
790
+ for q in all_queues:
791
+ try:
792
+ await self.async_redis_client.xgroup_create(
793
+ name=q,
794
+ groupname=group_name,
795
+ id="0",
796
+ mkstream=True
797
+ )
798
+ logger.info(f"Created consumer group {group_name} for queue {q}")
799
+ except Exception as e:
800
+ if "BUSYGROUP" in str(e):
801
+ logger.debug(f"Consumer group {group_name} already exists for queue {q}")
802
+ else:
803
+ logger.warning(f"Error creating consumer group for {q}: {e}")
804
+
805
+ # 初始化每个队列的lastid和check_backlog
806
+ for q in all_queues:
807
+ lastid[q] = "0-0"
808
+ check_backlog[q] = True # 每个队列都需要检查历史消息
681
809
 
682
810
  # 获取该队列的延迟任务列表和锁
683
811
  delayed_list = delayed_tasks_lists.get(queue)
684
812
  delayed_lock = delayed_tasks_locks.get(queue)
685
813
 
814
+ # 记录上次优先级队列更新时间
815
+ last_priority_update = time.time()
816
+
686
817
  while not self._stop_reading:
818
+ # 定期更新优先级队列缓存
819
+ current_time = time.time()
820
+ if current_time - last_priority_update >= self._priority_scan_interval:
821
+ await self.update_priority_queues_cache(queue)
822
+ new_priority_queues = self.get_priority_queues(queue)
823
+
824
+ # 如果优先级队列有变化,更新本地变量
825
+ if new_priority_queues != priority_queues:
826
+ logger.info(f"Priority queues updated for {queue}: {priority_queues} -> {new_priority_queues}")
827
+ priority_queues = new_priority_queues
828
+ all_queues = [prefixed_queue] + priority_queues
829
+
830
+ # 为新的优先级队列创建consumer group
831
+ for q in all_queues:
832
+ if q not in lastid: # 这是新队列
833
+ try:
834
+ await self.async_redis_client.xgroup_create(
835
+ name=q,
836
+ groupname=group_name,
837
+ id="0",
838
+ mkstream=True
839
+ )
840
+ logger.info(f"Created consumer group {group_name} for new priority queue {q}")
841
+ except Exception as e:
842
+ if "BUSYGROUP" not in str(e):
843
+ logger.warning(f"Error creating consumer group for {q}: {e}")
844
+
845
+ # 初始化新队列的状态
846
+ lastid[q] = "0-0"
847
+ check_backlog[q] = True
848
+
849
+ last_priority_update = current_time
850
+
687
851
  # 批量获取并处理延迟任务(使用list更高效)
688
852
  if delayed_list:
689
853
  # 原子地交换list内容
@@ -701,17 +865,23 @@ class EventPool(object):
701
865
  other_tasks = [] # 属于其他task的任务
702
866
 
703
867
  for delayed_task in tasks_to_process:
704
- # 检查任务是否属于当前task
868
+ # 对于延迟任务,不需要检查name字段
869
+ # 因为延迟任务在发送时没有指定特定的task
870
+ # 所有监听这个队列的task都可以处理
705
871
  task_data = delayed_task.get('data', {})
706
872
  if isinstance(task_data, str):
707
873
  import json
708
874
  task_data = json.loads(task_data)
709
875
 
710
- msg_task_name = task_data.get('name')
711
- if msg_task_name == task_name:
712
- my_tasks.append((delayed_task, task_data))
713
- else:
876
+ # 每个task都有独立的consumer group,都能独立处理消息
877
+ # 检查消息是否指定了目标task
878
+ target_tasks = task_data.get('_target_tasks', None)
879
+ if target_tasks and task_name not in target_tasks:
880
+ # 这个消息不是给当前task的
714
881
  other_tasks.append(delayed_task)
882
+ else:
883
+ # 当前task处理这个任务
884
+ my_tasks.append((delayed_task, task_data))
715
885
 
716
886
  # 处理属于当前task的所有任务
717
887
  for delayed_task, task_data in my_tasks:
@@ -726,7 +896,7 @@ class EventPool(object):
726
896
 
727
897
  result = self._process_message_common(
728
898
  event_id, task_data, queue, event_queue,
729
- is_async=True, consumer_name=consumer_name
899
+ is_async=True, consumer_name=consumer_name, group_name=group_name
730
900
  )
731
901
  if isinstance(result, tuple) and result[0] == 'async_put':
732
902
  await self._async_put_task(event_queue, result[1])
@@ -736,50 +906,183 @@ class EventPool(object):
736
906
  async with delayed_lock:
737
907
  delayed_list.extend(other_tasks)
738
908
 
739
- # 然后处理正常的Stream消息
740
- if check_backlog:
741
- myid = lastid
742
- else:
743
- myid = ">"
909
+ # 处理正常的Stream消息(支持优先级队列)
910
+ # 实现真正的优先级消费:
911
+ # 1. 先检查event_queue是否已满
912
+ # 2. 优先从高优先级队列读取
913
+ # 3. 只有高优先级队列空了才读取低优先级
914
+ # 4. 不超过prefetch_multiplier限制
744
915
 
745
- try:
746
- # 读取消息,使用二进制客户端
747
- prefixed_queue_bytes = prefixed_queue.encode() if isinstance(prefixed_queue, str) else prefixed_queue
748
- # group_name_bytes = group_name.encode() if isinstance(group_name, str) else group_name
749
- # consumer_name_bytes = consumer_name.encode() if isinstance(consumer_name, str) else consumer_name
916
+ # 检查内存队列是否已满
917
+ current_queue_size = event_queue.qsize() if hasattr(event_queue, 'qsize') else 0
918
+ if current_queue_size >= prefetch_multiplier:
919
+ # 内存队列已满,等待处理
920
+ await asyncio.sleep(0.01) # 短暂等待
921
+ continue
922
+
923
+ messages = []
924
+ messages_needed = prefetch_multiplier - current_queue_size # 还能读取的消息数
925
+
926
+ if messages_needed <= 0:
927
+ # 不需要读取更多消息
928
+ await asyncio.sleep(0.01)
929
+ continue
930
+
931
+ # 按优先级顺序读取队列
932
+ for q in all_queues:
933
+ if messages_needed <= 0:
934
+ break # 已经读取足够的消息
935
+
936
+ q_bytes = q.encode() if isinstance(q, str) else q
937
+ # 针对具体队列检查是否需要读取历史消息
938
+ if check_backlog.get(q, True):
939
+ myid = lastid.get(q, "0-0")
940
+ else:
941
+ myid = ">"
750
942
  myid_bytes = myid.encode() if isinstance(myid, str) else myid
751
- # 动态调整阻塞时间:如果有延迟任务待处理,使用0(非阻塞)
752
- # 否则使用阻塞时间以节省CPU
753
- block_time = 1000 # 1秒阻塞
754
- # 移除debug的print语句
755
943
 
756
- messages = await self.async_binary_redis_client.xreadgroup(
757
- groupname=group_name,
758
- consumername=consumer_name,
759
- streams={prefixed_queue_bytes: myid_bytes},
760
- count=prefetch_multiplier,
761
- block=block_time
762
- )
944
+ try:
945
+ # 为优先级队列使用正确的consumer名称
946
+ # 如果是优先级队列,consumer名称需要带优先级后缀
947
+ q_consumer_name = consumer_name
948
+ if q != prefixed_queue and ':' in q:
949
+ # 这是优先级队列,添加优先级后缀
950
+ priority_suffix = q.rsplit(':', 1)[-1]
951
+ q_consumer_name = f"{consumer_name}:{priority_suffix}"
952
+ # 从当前优先级队列读取(最多读取messages_needed个)
953
+ q_messages = await self.async_binary_redis_client.xreadgroup(
954
+ groupname=group_name,
955
+ consumername=q_consumer_name,
956
+ streams={q_bytes: myid_bytes},
957
+ count=messages_needed, # 只读取需要的数量
958
+ block=1 # 非阻塞
959
+ )
960
+
961
+ if q_messages:
962
+ # logger.debug(f"Read messages from {q}: {len(q_messages[0][1]) if q_messages else 0} messages")
963
+ # if check_backlog.get(q, True):
964
+ # print(f'先处理历史消息:{q_bytes=} {group_name=} {q_messages=}')
965
+ # 记录从哪个队列读取的
966
+ messages.extend(q_messages)
967
+ messages_read = len(q_messages[0][1]) if q_messages else 0
968
+ messages_needed -= messages_read
969
+
970
+ # 如果高优先级队列还有消息,继续从该队列读取
971
+ # 直到该队列空了或者达到prefetch限制
972
+ if messages_read > 0 and messages_needed > 0:
973
+ # 该队列可能还有更多消息,下次循环继续优先从这个队列读
974
+ # 但现在先处理已读取的消息
975
+ break # 跳出for循环,处理已有消息
976
+
977
+ except Exception as e:
978
+ if "NOGROUP" in str(e):
979
+ # 创建consumer group
980
+ try:
981
+ await self.async_redis_client.xgroup_create(
982
+ name=q,
983
+ groupname=group_name,
984
+ id="0",
985
+ mkstream=True
986
+ )
987
+ logger.debug(f"Created consumer group {group_name} for {q}")
988
+ # 重试读取
989
+ q_messages = await self.async_binary_redis_client.xreadgroup(
990
+ groupname=group_name,
991
+ consumername=q_consumer_name,
992
+ streams={q_bytes: myid_bytes},
993
+ count=messages_needed,
994
+ block=0
995
+ )
996
+ if q_messages:
997
+ messages.extend(q_messages)
998
+ messages_read = len(q_messages[0][1]) if q_messages else 0
999
+ messages_needed -= messages_read
1000
+ except:
1001
+ pass
1002
+ else:
1003
+ logger.debug(f"Error reading from queue {q}: {e}")
1004
+ continue
1005
+
1006
+ # 如果没有读取到任何消息,在最高优先级队列上阻塞等待
1007
+ if not messages:
1008
+ # 优先在高优先级队列上等待
1009
+ # 如果有优先级队列,在第一个优先级队列上等待
1010
+ # 否则在默认队列上等待
1011
+ wait_queue = all_queues[0] if all_queues else prefixed_queue
1012
+
1013
+ try:
1014
+ q_bytes = wait_queue.encode() if isinstance(wait_queue, str) else wait_queue
1015
+ if check_backlog.get(wait_queue, True):
1016
+ myid = lastid.get(wait_queue, "0-0")
1017
+ else:
1018
+ myid = ">"
1019
+ myid_bytes = myid.encode() if isinstance(myid, str) else myid
1020
+
1021
+ # 为等待队列使用正确的consumer名称
1022
+ wait_consumer_name = consumer_name
1023
+ if wait_queue != prefixed_queue and ':' in wait_queue:
1024
+ # 这是优先级队列,添加优先级后缀
1025
+ priority_suffix = wait_queue.rsplit(':', 1)[-1]
1026
+ wait_consumer_name = f"{consumer_name}:{priority_suffix}"
1027
+
1028
+ messages = await self.async_binary_redis_client.xreadgroup(
1029
+ groupname=group_name,
1030
+ consumername=wait_consumer_name,
1031
+ streams={q_bytes: myid_bytes},
1032
+ count=prefetch_multiplier,
1033
+ block=100 # 阻塞100ms
1034
+ )
1035
+ except Exception as e:
1036
+ # 忽略错误,下次循环重试
1037
+ pass
1038
+
1039
+ try:
763
1040
  # logger.info(f'{group_name=} {consumer_name=} {block_time=}')
764
1041
  consecutive_errors = 0
765
1042
  # if check_backlog and messages:
766
1043
  # logger.info(f'先消费之前的消息 {group_name=} ')
767
1044
  # logger.info(f'{check_backlog=} {messages=}')
768
- if not messages:
769
- check_backlog = False
770
- # 当使用阻塞模式时,xreadgroup已经等待了block_time毫秒
771
- # 不需要额外的sleep,这会导致不必要的CPU占用
772
- continue
773
1045
 
774
- check_backlog = len(messages[0][1]) > 0
1046
+ # 上报已投递的offset(用于积压监控)
1047
+ try:
1048
+ from jettask.monitor.stream_backlog_monitor import report_delivered_offset
1049
+ # 对每个stream的消息上报offset
1050
+ for msg in messages:
1051
+ stream_name = msg[0]
1052
+ if isinstance(stream_name, bytes):
1053
+ stream_name = stream_name.decode('utf-8')
1054
+ # 提取队列名(去掉前缀)
1055
+ queue_name = stream_name.replace(f"{self.redis_prefix}:STREAM:", "")
1056
+ await report_delivered_offset(
1057
+ self.async_redis_client,
1058
+ self.redis_prefix,
1059
+ queue_name,
1060
+ group_name,
1061
+ [msg]
1062
+ )
1063
+ except Exception as e:
1064
+ # 监控失败不影响主流程
1065
+ logger.debug(f"Failed to report delivered offset: {e}")
775
1066
 
776
1067
  # 收集需要跳过的消息ID
777
1068
  skip_message_ids = []
778
1069
 
779
1070
  for message in messages:
1071
+ # print(f'{message=}')
1072
+ # message[0]是stream名称,message[1]是消息列表
1073
+ stream_name = message[0]
1074
+ if isinstance(stream_name, bytes):
1075
+ stream_name = stream_name.decode('utf-8')
1076
+
1077
+ # 根据这个具体队列的消息数量,更新该队列的check_backlog状态
1078
+ if len(message[1]) == 0:
1079
+ # 这个队列没有历史消息了,下次读取最新消息
1080
+ check_backlog[stream_name] = False
1081
+
780
1082
  for event in message[1]:
781
1083
  event_id = event[0]
782
- lastid = event_id
1084
+ # 更新对应队列的lastid
1085
+ lastid[stream_name] = event_id
783
1086
  # 将bytes类型的event_id转换为字符串
784
1087
  if isinstance(event_id, bytes):
785
1088
  event_id = event_id.decode('utf-8')
@@ -795,30 +1098,30 @@ class EventPool(object):
795
1098
 
796
1099
  # 直接解析二进制数据,不需要解码
797
1100
  parsed_data = loads_str(data_field)
798
-
799
1101
  # 跳过延迟任务(延迟任务由延迟扫描器处理)
800
1102
  if parsed_data.get('is_delayed') == 1:
801
1103
  should_process = False
802
1104
  continue
803
1105
 
804
- # 检查是否是广播消息
805
- is_broadcast = parsed_data.get('_broadcast', False)
1106
+ # 每个task都有独立的consumer group
1107
+ # 检查消息是否指定了目标task(用于精确路由)
806
1108
  target_tasks = parsed_data.get('_target_tasks', None)
807
-
808
- if is_broadcast:
809
- # 广播消息:默认所有task都处理,除非被target_tasks排除
810
- if target_tasks and task_name not in target_tasks:
811
- should_process = False
812
- else:
813
- # 普通消息:必须有name字段且匹配当前task才处理
814
- msg_task_name = parsed_data.get('name')
815
- if not msg_task_name or msg_task_name != task_name:
816
- should_process = False
1109
+ if target_tasks and task_name not in target_tasks:
1110
+ # 这个消息指定了其他task处理
1111
+ should_process = False
817
1112
 
818
1113
  if should_process:
819
1114
  # 添加task_name到数据中(用于执行器识别任务)
820
1115
  parsed_data['_task_name'] = task_name
821
1116
 
1117
+ # 提取offset字段(如果存在)
1118
+ offset_field = event_data.get(b'offset') or event_data.get('offset')
1119
+ if offset_field:
1120
+ # 将offset添加到parsed_data中
1121
+ if isinstance(offset_field, bytes):
1122
+ offset_field = offset_field.decode('utf-8')
1123
+ parsed_data['offset'] = offset_field
1124
+
822
1125
  # 更新event_data
823
1126
  event_data.clear()
824
1127
  for key, value in parsed_data.items():
@@ -833,22 +1136,40 @@ class EventPool(object):
833
1136
 
834
1137
  if should_process:
835
1138
  # 处理消息 - 消息会被放入队列,由执行器处理并ACK
1139
+ # 使用消息体中的实际队列名(可能包含优先级)
1140
+ actual_queue = event_data.get('queue', queue)
1141
+
1142
+ # 确定实际的consumer名称(对于优先级队列需要带后缀)
1143
+ actual_consumer_name = consumer_name
1144
+ # 从stream_name判断是否是优先级队列
1145
+ if stream_name != prefixed_queue and ':' in stream_name:
1146
+ # 这是优先级队列,添加优先级后缀
1147
+ priority_suffix = stream_name.rsplit(':', 1)[-1]
1148
+ actual_consumer_name = f"{consumer_name}:{priority_suffix}"
1149
+
836
1150
  result = self._process_message_common(
837
- event_id, event_data, queue, event_queue,
838
- is_async=True, consumer_name=consumer_name
1151
+ event_id, event_data, actual_queue, event_queue,
1152
+ is_async=True, consumer_name=actual_consumer_name, group_name=group_name
839
1153
  )
840
1154
  if isinstance(result, tuple) and result[0] == 'async_put':
841
1155
  await self._async_put_task(event_queue, result[1])
1156
+ logger.debug(f"Put task {event_id} into event_queue")
842
1157
  # 注意:这里不ACK,由执行器在处理完成后ACK
843
1158
  else:
844
1159
  # 不属于当前task的消息,收集起来批量ACK
845
1160
  skip_message_ids.append(event_id)
846
1161
 
847
1162
 
848
- # 批量ACK不需要的消息
1163
+ # 批量ACK不需要的消息(需要按队列分组)
849
1164
  if skip_message_ids:
1165
+ # 这里简化处理,对所有队列都尝试ACK(实际只会在对应队列ACK成功)
850
1166
  group_name_bytes = group_name.encode() if isinstance(group_name, str) else group_name
851
- await self.async_binary_redis_client.xack(prefixed_queue_bytes, group_name_bytes, *skip_message_ids)
1167
+ for q in all_queues:
1168
+ q_bytes = q.encode() if isinstance(q, str) else q
1169
+ try:
1170
+ await self.async_binary_redis_client.xack(q_bytes, group_name_bytes, *skip_message_ids)
1171
+ except:
1172
+ pass # 忽略ACK错误
852
1173
  logger.debug(f"Task {task_name} batch ACKed {len(skip_message_ids)} skipped messages")
853
1174
 
854
1175
  except Exception as e:
@@ -861,13 +1182,19 @@ class EventPool(object):
861
1182
  if "NOGROUP" in error_msg:
862
1183
  logger.info(f"Detected NOGROUP error for {task_name}, attempting to recreate consumer group...")
863
1184
  try:
864
- await self.async_redis_client.xgroup_create(
865
- name=prefixed_queue,
866
- groupname=group_name,
867
- id="0",
868
- mkstream=True
869
- )
870
- logger.info(f"Successfully recreated consumer group {group_name} for task {task_name}")
1185
+ # 为所有队列创建consumer group
1186
+ for q in all_queues:
1187
+ try:
1188
+ await self.async_redis_client.xgroup_create(
1189
+ name=q,
1190
+ groupname=group_name,
1191
+ id="0",
1192
+ mkstream=True
1193
+ )
1194
+ logger.info(f"Successfully created consumer group {group_name} for queue {q}")
1195
+ except:
1196
+ pass # 可能已存在
1197
+ logger.info(f"Consumer groups created/verified for task {task_name}")
871
1198
  # 重新创建成功,重置错误计数器
872
1199
  consecutive_errors = 0
873
1200
  continue
@@ -899,7 +1226,17 @@ class EventPool(object):
899
1226
  tasks.append(scanner_task)
900
1227
 
901
1228
  # 为每个队列启动离线worker处理器(带自动重启)
902
- for queue in self.queues:
1229
+ # 包括优先级队列
1230
+ all_recovery_queues = set(self.queues)
1231
+ for base_queue in self.queues:
1232
+ # 扫描优先级队列
1233
+ priority_queues = await self.scan_priority_queues(base_queue)
1234
+ for pq in priority_queues:
1235
+ if pq != base_queue: # 不重复添加基础队列
1236
+ all_recovery_queues.add(pq)
1237
+
1238
+ # 为所有队列(包括优先级队列)启动离线worker处理器
1239
+ for queue in all_recovery_queues:
903
1240
  logger.info(f"Starting offline worker processor for queue: {queue}")
904
1241
  offline_processor_task = asyncio.create_task(
905
1242
  self._start_offline_worker_processor_with_restart(queue, event_queue)
@@ -1008,7 +1345,7 @@ class EventPool(object):
1008
1345
  if tasks_to_add:
1009
1346
  async with task_lock:
1010
1347
  task_list.extend(tasks_to_add)
1011
- logger.debug(f"Added {len(tasks_to_add)} delayed tasks to list for queue {queue}")
1348
+ logger.info(f"Added {len(tasks_to_add)} delayed tasks to list for queue {queue}")
1012
1349
 
1013
1350
  except Exception as e:
1014
1351
  logger.error(f"Error scanning delayed tasks for queue {queue}: {e}")
@@ -1241,20 +1578,16 @@ class EventPool(object):
1241
1578
  logger.error(f"Error processing delayed tasks for queue {queue}: {e}")
1242
1579
  # 错误不应该阻塞主流程
1243
1580
  def read_pending(self, groupname: str, queue: str, asyncio: bool = False):
1244
- # 使用二进制客户端进行Stream操作
1581
+ # 现在使用自动解码的客户端
1245
1582
  client = self.get_redis_client(asyncio, binary=True)
1246
1583
  prefixed_queue = self.get_prefixed_queue_name(queue)
1247
- prefixed_queue_bytes = prefixed_queue.encode() if isinstance(prefixed_queue, str) else prefixed_queue
1248
- groupname_bytes = groupname.encode() if isinstance(groupname, str) else groupname
1249
- return client.xpending(prefixed_queue_bytes, groupname_bytes)
1584
+ return client.xpending(prefixed_queue, groupname)
1250
1585
 
1251
1586
  def ack(self, queue, event_id, asyncio: bool = False):
1252
- # 使用二进制客户端进行Stream操作
1587
+ # 现在使用自动解码的客户端
1253
1588
  client = self.get_redis_client(asyncio, binary=True)
1254
1589
  prefixed_queue = self.get_prefixed_queue_name(queue)
1255
- prefixed_queue_bytes = prefixed_queue.encode() if isinstance(prefixed_queue, str) else prefixed_queue
1256
- event_id_bytes = event_id if isinstance(event_id, bytes) else event_id.encode() if isinstance(event_id, str) else str(event_id).encode()
1257
- result = client.xack(prefixed_queue_bytes, prefixed_queue_bytes, event_id_bytes)
1590
+ result = client.xack(prefixed_queue, prefixed_queue, event_id)
1258
1591
  # 清理已认领的消息ID
1259
1592
  if event_id in self._claimed_message_ids:
1260
1593
  self._claimed_message_ids.remove(event_id)