jettask 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. jettask/__init__.py +2 -0
  2. jettask/cli.py +12 -8
  3. jettask/config/lua_scripts.py +37 -0
  4. jettask/config/nacos_config.py +1 -1
  5. jettask/core/app.py +313 -340
  6. jettask/core/container.py +4 -4
  7. jettask/{persistence → core}/namespace.py +93 -27
  8. jettask/core/task.py +16 -9
  9. jettask/core/unified_manager_base.py +136 -26
  10. jettask/db/__init__.py +67 -0
  11. jettask/db/base.py +137 -0
  12. jettask/{utils/db_connector.py → db/connector.py} +130 -26
  13. jettask/db/models/__init__.py +16 -0
  14. jettask/db/models/scheduled_task.py +196 -0
  15. jettask/db/models/task.py +77 -0
  16. jettask/db/models/task_run.py +85 -0
  17. jettask/executor/__init__.py +0 -15
  18. jettask/executor/core.py +76 -31
  19. jettask/executor/process_entry.py +29 -114
  20. jettask/executor/task_executor.py +4 -0
  21. jettask/messaging/event_pool.py +928 -685
  22. jettask/messaging/scanner.py +30 -0
  23. jettask/persistence/__init__.py +28 -103
  24. jettask/persistence/buffer.py +170 -0
  25. jettask/persistence/consumer.py +330 -249
  26. jettask/persistence/manager.py +304 -0
  27. jettask/persistence/persistence.py +391 -0
  28. jettask/scheduler/__init__.py +15 -3
  29. jettask/scheduler/{task_crud.py → database.py} +61 -57
  30. jettask/scheduler/loader.py +2 -2
  31. jettask/scheduler/{scheduler_coordinator.py → manager.py} +23 -6
  32. jettask/scheduler/models.py +14 -10
  33. jettask/scheduler/schedule.py +166 -0
  34. jettask/scheduler/scheduler.py +12 -11
  35. jettask/schemas/__init__.py +50 -1
  36. jettask/schemas/backlog.py +43 -6
  37. jettask/schemas/namespace.py +70 -19
  38. jettask/schemas/queue.py +19 -3
  39. jettask/schemas/responses.py +493 -0
  40. jettask/task/__init__.py +0 -2
  41. jettask/task/router.py +3 -0
  42. jettask/test_connection_monitor.py +1 -1
  43. jettask/utils/__init__.py +7 -5
  44. jettask/utils/db_init.py +8 -4
  45. jettask/utils/namespace_dep.py +167 -0
  46. jettask/utils/queue_matcher.py +186 -0
  47. jettask/utils/rate_limit/concurrency_limiter.py +7 -1
  48. jettask/utils/stream_backlog.py +1 -1
  49. jettask/webui/__init__.py +0 -1
  50. jettask/webui/api/__init__.py +4 -4
  51. jettask/webui/api/alerts.py +806 -71
  52. jettask/webui/api/example_refactored.py +400 -0
  53. jettask/webui/api/namespaces.py +390 -45
  54. jettask/webui/api/overview.py +300 -54
  55. jettask/webui/api/queues.py +971 -267
  56. jettask/webui/api/scheduled.py +1249 -56
  57. jettask/webui/api/settings.py +129 -7
  58. jettask/webui/api/workers.py +442 -0
  59. jettask/webui/app.py +46 -2329
  60. jettask/webui/middleware/__init__.py +6 -0
  61. jettask/webui/middleware/namespace_middleware.py +135 -0
  62. jettask/webui/services/__init__.py +146 -0
  63. jettask/webui/services/heartbeat_service.py +251 -0
  64. jettask/webui/services/overview_service.py +60 -51
  65. jettask/webui/services/queue_monitor_service.py +426 -0
  66. jettask/webui/services/redis_monitor_service.py +87 -0
  67. jettask/webui/services/settings_service.py +174 -111
  68. jettask/webui/services/task_monitor_service.py +222 -0
  69. jettask/webui/services/timeline_pg_service.py +452 -0
  70. jettask/webui/services/timeline_service.py +189 -0
  71. jettask/webui/services/worker_monitor_service.py +467 -0
  72. jettask/webui/utils/__init__.py +11 -0
  73. jettask/webui/utils/time_utils.py +122 -0
  74. jettask/worker/lifecycle.py +8 -2
  75. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/METADATA +1 -1
  76. jettask-0.2.24.dist-info/RECORD +142 -0
  77. jettask/executor/executor.py +0 -338
  78. jettask/persistence/backlog_monitor.py +0 -567
  79. jettask/persistence/base.py +0 -2334
  80. jettask/persistence/db_manager.py +0 -516
  81. jettask/persistence/maintenance.py +0 -81
  82. jettask/persistence/message_consumer.py +0 -259
  83. jettask/persistence/models.py +0 -49
  84. jettask/persistence/offline_recovery.py +0 -196
  85. jettask/persistence/queue_discovery.py +0 -215
  86. jettask/persistence/task_persistence.py +0 -218
  87. jettask/persistence/task_updater.py +0 -583
  88. jettask/scheduler/add_execution_count.sql +0 -11
  89. jettask/scheduler/add_priority_field.sql +0 -26
  90. jettask/scheduler/add_scheduler_id.sql +0 -25
  91. jettask/scheduler/add_scheduler_id_index.sql +0 -10
  92. jettask/scheduler/make_scheduler_id_required.sql +0 -28
  93. jettask/scheduler/migrate_interval_seconds.sql +0 -9
  94. jettask/scheduler/performance_optimization.sql +0 -45
  95. jettask/scheduler/run_scheduler.py +0 -186
  96. jettask/scheduler/schema.sql +0 -84
  97. jettask/task/task_executor.py +0 -318
  98. jettask/webui/api/analytics.py +0 -323
  99. jettask/webui/config.py +0 -90
  100. jettask/webui/models/__init__.py +0 -3
  101. jettask/webui/models/namespace.py +0 -63
  102. jettask/webui/namespace_manager/__init__.py +0 -10
  103. jettask/webui/namespace_manager/multi.py +0 -593
  104. jettask/webui/namespace_manager/unified.py +0 -193
  105. jettask/webui/run.py +0 -46
  106. jettask-0.2.23.dist-info/RECORD +0 -145
  107. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/WHEEL +0 -0
  108. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/entry_points.txt +0 -0
  109. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/licenses/LICENSE +0 -0
  110. {jettask-0.2.23.dist-info → jettask-0.2.24.dist-info}/top_level.txt +0 -0
@@ -1,165 +1,317 @@
1
- """PostgreSQL消费者主模块
1
+ """PostgreSQL Consumer - 基于通配符队列的新实现
2
2
 
3
- 协调各个子模块,提供统一的消费者接口。
3
+ 完全替换旧的 consumer.py 实现,使用 Jettask 通配符队列功能。
4
4
  """
5
5
 
6
- import asyncio
6
+ import time
7
7
  import logging
8
- import os
9
- import socket
10
- import traceback
11
- from typing import Optional
12
-
13
- import redis.asyncio as redis
14
- from redis.asyncio import Redis
15
- from sqlalchemy.ext.asyncio import create_async_engine
16
- from sqlalchemy.orm import sessionmaker
17
- from sqlalchemy.ext.asyncio import AsyncSession
18
- from sqlalchemy import text
19
-
20
- from jettask.webui.config import PostgreSQLConfig, RedisConfig
21
- from jettask.worker.manager import ConsumerManager
22
-
23
- from .backlog_monitor import BacklogMonitor
24
- from .task_updater import TaskUpdater
25
- from .offline_recovery import OfflineRecoveryHandler
26
- from .task_persistence import TaskPersistence
27
- from .queue_discovery import QueueDiscovery
28
- from .message_consumer import MessageConsumer
29
- from .maintenance import DatabaseMaintenance
8
+ from datetime import datetime, timezone
9
+
10
+ from jettask import Jettask
11
+ from jettask.core.context import TaskContext
12
+ from jettask.db.connector import get_pg_engine_and_factory, DBConfig
13
+ from .buffer import BatchBuffer
14
+ from .persistence import TaskPersistence
30
15
 
31
16
  logger = logging.getLogger(__name__)
32
17
 
33
18
 
34
19
  class PostgreSQLConsumer:
35
- """PostgreSQL消费者,从Redis队列消费任务并持久化到PostgreSQL
36
-
37
- 支持多租户(命名空间)隔离
38
-
39
- 架构说明:
40
- - BacklogMonitor: 监控Stream积压情况
41
- - TaskUpdater: 更新任务状态(从TASK_CHANGES流)
42
- - OfflineRecoveryHandler: 恢复离线worker的消息
43
- - TaskPersistence: 解析并持久化任务数据
44
- - QueueDiscovery: 发现和管理队列
45
- - MessageConsumer: 消费队列消息
46
- - DatabaseMaintenance: 数据库维护任务
20
+ """PostgreSQL Consumer - 基于通配符队列
21
+
22
+ 核心特性:
23
+ 1. 使用 @app.task(queue='*') 监听所有队列
24
+ 2. 使用 @app.task(queue='TASK_CHANGES') 处理状态更新
25
+ 3. 批量 INSERT 和 UPDATE
26
+ 4. 自动队列发现(Jettask 内置)
47
27
  """
48
28
 
49
29
  def __init__(
50
30
  self,
51
- pg_config: PostgreSQLConfig,
52
- redis_config: RedisConfig,
31
+ pg_config, # 可以是字典或配置对象
32
+ redis_config, # 可以是字典或配置对象
53
33
  prefix: str = "jettask",
54
- node_id: str = None,
55
- # consumer_strategy 参数已移除,现在只使用 HEARTBEAT 策略
56
34
  namespace_id: str = None,
57
35
  namespace_name: str = None,
58
- enable_backlog_monitor: bool = True,
59
- backlog_monitor_interval: int = 1
36
+ batch_size: int = 1000,
37
+ flush_interval: float = 5.0
60
38
  ):
61
- """初始化PostgreSQL消费者
39
+ """初始化 PG Consumer
62
40
 
63
41
  Args:
64
- pg_config: PostgreSQL配置
65
- redis_config: Redis配置
42
+ pg_config: PostgreSQL配置(字典或对象)
43
+ redis_config: Redis配置(字典或对象)
66
44
  prefix: Redis键前缀
67
- node_id: 节点ID
68
- consumer_strategy: 消费者策略
45
+ node_id: 节点ID(兼容旧接口,不使用)
69
46
  namespace_id: 命名空间ID
70
47
  namespace_name: 命名空间名称
71
- enable_backlog_monitor: 是否启用积压监控
72
- backlog_monitor_interval: 积压监控间隔(秒)
48
+ enable_backlog_monitor: 是否启用积压监控(兼容旧接口,不使用)
49
+ backlog_monitor_interval: 积压监控间隔(兼容旧接口,不使用)
50
+ batch_size: 批量大小
51
+ flush_interval: 刷新间隔(秒)
73
52
  """
74
53
  self.pg_config = pg_config
75
54
  self.redis_config = redis_config
76
- self.prefix = prefix
77
-
78
- # 命名空间支持
55
+ self.redis_prefix = prefix
79
56
  self.namespace_id = namespace_id
80
57
  self.namespace_name = namespace_name or "default"
81
58
 
82
- # 节点标识
83
- hostname = socket.gethostname()
84
- self.node_id = node_id or f"{hostname}_{os.getpid()}"
59
+ # 构建 Redis URL(兼容字典和对象两种格式)
60
+ if isinstance(redis_config, dict):
61
+ # 字典格式 - 优先使用 'url' 字段
62
+ redis_url = redis_config.get('url') or redis_config.get('redis_url')
63
+ if not redis_url:
64
+ # 从独立字段构建
65
+ password = redis_config.get('password', '')
66
+ host = redis_config.get('host', 'localhost')
67
+ port = redis_config.get('port', 6379)
68
+ db = redis_config.get('db', 0)
69
+ redis_url = f"redis://"
70
+ if password:
71
+ redis_url += f":{password}@"
72
+ redis_url += f"{host}:{port}/{db}"
73
+ else:
74
+ # 对象格式
75
+ redis_url = f"redis://"
76
+ if hasattr(redis_config, 'password') and redis_config.password:
77
+ redis_url += f":{redis_config.password}@"
78
+ redis_url += f"{redis_config.host}:{redis_config.port}/{redis_config.db}"
85
79
 
86
- # 消费者配置
87
- # consumer_strategy 已移除,现在只使用 HEARTBEAT 策略
88
- self.consumer_group = f"{prefix}_pg_consumer"
80
+ self.redis_url = redis_url
81
+ logger.debug(f"构建 Redis URL: {redis_url}")
89
82
 
90
- # Redis和数据库连接(将在start时初始化)
91
- self.redis_client: Optional[Redis] = None
83
+ # 数据库引擎和会话(将在 start 时初始化)
92
84
  self.async_engine = None
93
85
  self.AsyncSessionLocal = None
86
+ self.db_manager = None
87
+
88
+ # 创建 Jettask 应用
89
+ self.app = Jettask(
90
+ redis_url=redis_url,
91
+ redis_prefix=prefix
92
+ )
94
93
 
95
- # ConsumerManager(将在start时初始化)
96
- self.consumer_manager = None
97
- self.consumer_id = None
94
+ # 创建两个独立的批量缓冲区
95
+ # 1. INSERT 缓冲区(用于新任务持久化)
96
+ self.insert_buffer = BatchBuffer(
97
+ max_size=batch_size,
98
+ max_delay=flush_interval,
99
+ operation_type='insert'
100
+ )
98
101
 
99
- # 各个子模块(将在start时初始化)
100
- self.backlog_monitor: Optional[BacklogMonitor] = None
101
- self.task_updater: Optional[TaskUpdater] = None
102
- self.offline_recovery: Optional[OfflineRecoveryHandler] = None
103
- self.task_persistence: Optional[TaskPersistence] = None
104
- self.queue_discovery: Optional[QueueDiscovery] = None
105
- self.message_consumer: Optional[MessageConsumer] = None
106
- self.database_maintenance: Optional[DatabaseMaintenance] = None
102
+ # 2. UPDATE 缓冲区(用于任务状态更新)
103
+ self.update_buffer = BatchBuffer(
104
+ max_size=batch_size // 2, # 状态更新通常更频繁,用较小的批次
105
+ max_delay=flush_interval,
106
+ operation_type='update'
107
+ )
107
108
 
108
- # 积压监控配置
109
- self.enable_backlog_monitor = enable_backlog_monitor
110
- self.backlog_monitor_interval = backlog_monitor_interval
109
+ # 注册任务
110
+ self._register_tasks()
111
111
 
112
+ # 运行控制
112
113
  self._running = False
113
114
 
114
- async def start(self):
115
- """启动消费者"""
116
- logger.info(f"Starting PostgreSQL consumer (modular) on node: {self.node_id}")
115
+ def _register_tasks(self):
116
+ """注册任务处理器"""
117
+ # 创建闭包函数来访问实例属性
118
+ consumer = self # 捕获 self 引用
117
119
 
118
- # 1. 连接Redis(使用全局客户端实例)
119
- from jettask.utils.db_connector import get_async_redis_client, get_sync_redis_client
120
+ @self.app.task(queue='*', auto_ack=False)
121
+ async def _handle_persist_task(ctx: TaskContext, **kwargs):
122
+ return await consumer._do_handle_persist_task(ctx, **kwargs)
120
123
 
121
- # 构建 Redis URL
122
- redis_url = f"redis://"
123
- if self.redis_config.password:
124
- redis_url += f":{self.redis_config.password}@"
125
- redis_url += f"{self.redis_config.host}:{self.redis_config.port}/{self.redis_config.db}"
124
+ @self.app.task(queue='TASK_CHANGES', auto_ack=False)
125
+ async def _handle_status_update(ctx: TaskContext, **kwargs):
126
+ print(f'[PG Consumer] 处理状态更新: {ctx.event_id} {kwargs=}')
127
+ return await consumer._do_handle_status_update(ctx, **kwargs)
126
128
 
127
- self.redis_client = get_async_redis_client(
128
- redis_url=redis_url,
129
- decode_responses=False # 保持二进制模式
130
- )
129
+ async def _do_handle_persist_task(self, ctx: TaskContext, **kwargs):
130
+ """处理任务持久化(INSERT)
131
131
 
132
- # 2. 初始化ConsumerManager(需要同步的Redis客户端)
133
- sync_redis_client = get_sync_redis_client(
134
- redis_url=redis_url,
135
- decode_responses=True
136
- )
132
+ 使用通配符 queue='*' 监听所有队列
133
+ Jettask 会自动发现新队列并开始消费
137
134
 
138
- # 配置ConsumerManager
139
- initial_queues = ['TASK_CHANGES'] # TASK_CHANGES是固定的
140
- consumer_config = {
141
- 'redis_prefix': self.prefix,
142
- 'queues': initial_queues,
143
- 'worker_prefix': 'PG_CONSUMER', # 使用不同的前缀,与task worker区分开
144
- }
145
-
146
- self.consumer_manager = ConsumerManager(
147
- redis_client=sync_redis_client,
148
- # strategy 参数已移除,现在只使用 HEARTBEAT 策略
149
- config=consumer_config
150
- )
135
+ Args:
136
+ ctx: Jettask 自动注入的任务上下文(包含 queue, event_id 等)
137
+ **kwargs: 任务的原始数据字段
138
+ """
139
+ # 跳过 TASK_CHANGES 队列(由另一个任务处理)
140
+ if ctx.queue == f'{self.redis_prefix}:QUEUE:TASK_CHANGES':
141
+ await ctx.ack()
142
+ return
143
+
144
+ try:
145
+ # 提取纯队列名(去掉 prefix:QUEUE: 前缀)
146
+ queue_name = ctx.queue.replace(f'{self.redis_prefix}:QUEUE:', '')
147
+
148
+ # 记录真实的队列名称(用于验证通配符队列功能)
149
+ logger.info(f"[持久化任务] 完整路径: {ctx.queue}, 队列名: {queue_name}, Stream ID: {ctx.event_id}")
150
+
151
+ # 构建任务记录
152
+ trigger_time = kwargs.get('trigger_time', time.time())
153
+ if isinstance(trigger_time, (str, bytes)):
154
+ trigger_time = float(trigger_time)
155
+
156
+ priority = kwargs.get('priority', 0)
157
+ if isinstance(priority, (str, bytes)):
158
+ priority = int(priority)
159
+
160
+ record = {
161
+ 'stream_id': ctx.event_id,
162
+ 'queue': ctx.queue.replace(f'{self.redis_prefix}:QUEUE:', ''),
163
+ 'task_name': kwargs.get('task_name', 'unknown'),
164
+ 'payload': kwargs.get('payload', {}),
165
+ 'priority': priority,
166
+ 'created_at': datetime.fromtimestamp(trigger_time, tz=timezone.utc),
167
+ 'scheduled_task_id': kwargs.get('scheduled_task_id'),
168
+ 'namespace': self.namespace_name,
169
+ 'source': 'scheduler' if kwargs.get('scheduled_task_id') else 'redis_stream',
170
+ }
171
+
172
+ # 添加到缓冲区(不立即处理,不立即 ACK)
173
+ self.insert_buffer.add(record, ctx)
174
+
175
+ # 检查是否需要刷新(批量大小或超时)
176
+ if self.insert_buffer.should_flush():
177
+ await self.insert_buffer.flush(self.db_manager)
178
+
179
+ # 同时检查 UPDATE 缓冲区是否需要刷新(利用这次机会)
180
+ if self.update_buffer.should_flush():
181
+ await self.update_buffer.flush(self.db_manager)
182
+
183
+ except Exception as e:
184
+ logger.error(f"持久化任务失败: {e}", exc_info=True)
185
+ # 出错也要 ACK,避免消息堆积
186
+ await ctx.ack()
187
+
188
+ async def _do_handle_status_update(self, ctx: TaskContext, **kwargs):
189
+ """处理任务状态更新(UPDATE)
190
+
191
+ 消费 TASK_CHANGES 队列,批量更新数据库中的任务状态
151
192
 
152
- # 获取稳定的consumer_id
153
- self.consumer_id = self.consumer_manager.get_consumer_name('TASK_CHANGES')
154
- logger.debug(f"Using consumer_id: {self.consumer_id} with strategy: HEARTBEAT")
193
+ Args:
194
+ ctx: Jettask 自动注入的任务上下文
195
+ **kwargs: 任务的原始数据字段(包含 task_id)
196
+ """
197
+ try:
198
+ # 从消息中获取 task_id
199
+ task_id = kwargs.get('task_id')
200
+ if not task_id:
201
+ logger.warning(f"TASK_CHANGES 消息缺少 task_id: {ctx.event_id}")
202
+ await ctx.ack()
203
+ return
204
+
205
+ # 从 Redis Hash 中读取完整的任务状态信息
206
+ # task_id 格式: test5:TASK:event_id:queue:task_name
207
+ # 我们需要查询 Redis Hash 获取状态信息
208
+ redis_client = ctx.app.async_binary_redis
209
+ # 查询任务状态 Hash
210
+ task_info = await redis_client.hgetall(task_id)
211
+ logger.info(f"task_id={task_id!r}")
212
+ logger.info(f"task_info={task_info!r}")
213
+ if not task_info:
214
+ logger.warning(f"无法找到任务状态信息: {task_id}")
215
+ await ctx.ack()
216
+ return
217
+
218
+ # 从 task_id 中提取 event_id (stream_id)
219
+ # task_id 格式: prefix:TASK:event_id:queue:task_name
220
+ parts = task_id.split(':')
221
+ if len(parts) >= 3:
222
+ event_id = parts[2] # 提取 event_id
223
+ else:
224
+ logger.error(f"无效的 task_id 格式: {task_id}")
225
+ await ctx.ack()
226
+ return
227
+
228
+ # 解析各个字段(binary redis 返回 bytes)
229
+ # 1. retries
230
+ retries = task_info.get(b'retries', 0)
231
+ if isinstance(retries, bytes):
232
+ retries = int(retries.decode('utf-8')) if retries else 0
233
+ elif isinstance(retries, str):
234
+ retries = int(retries) if retries else 0
235
+
236
+ # 2. started_at
237
+ started_at = task_info.get(b'started_at')
238
+ if started_at:
239
+ if isinstance(started_at, bytes):
240
+ started_at = float(started_at.decode('utf-8'))
241
+ elif isinstance(started_at, str):
242
+ started_at = float(started_at)
243
+
244
+ # 3. completed_at
245
+ completed_at = task_info.get(b'completed_at')
246
+ if completed_at:
247
+ if isinstance(completed_at, bytes):
248
+ completed_at = float(completed_at.decode('utf-8'))
249
+ elif isinstance(completed_at, str):
250
+ completed_at = float(completed_at)
251
+
252
+ # 4. consumer
253
+ consumer = task_info.get(b'consumer')
254
+ if consumer:
255
+ if isinstance(consumer, bytes):
256
+ consumer = consumer.decode('utf-8')
257
+
258
+ # 5. status
259
+ status = task_info.get(b'status')
260
+ if status:
261
+ if isinstance(status, bytes):
262
+ status = status.decode('utf-8')
263
+
264
+ # 6. result (保持原始 bytes,在 persistence.py 中解析)
265
+ result = task_info.get(b'result')
266
+
267
+ # 7. error/exception
268
+ error = task_info.get(b'exception') or task_info.get(b'error')
269
+
270
+ update_record = {
271
+ 'stream_id': event_id,
272
+ 'status': status,
273
+ 'result': result, # bytes 格式,稍后解析
274
+ 'error': error,
275
+ 'started_at': started_at,
276
+ 'completed_at': completed_at,
277
+ 'retries': retries,
278
+ 'consumer': consumer,
279
+ }
280
+
281
+ logger.info(f"update_record={update_record!r}")
282
+
283
+ print(f'{update_record=}')
284
+ # 添加到状态更新缓冲区
285
+ self.update_buffer.add(update_record, ctx)
286
+
287
+ # 检查是否需要刷新(批量大小或超时)
288
+ if self.update_buffer.should_flush():
289
+ await self.update_buffer.flush(self.db_manager)
290
+
291
+ # 同时检查 INSERT 缓冲区是否需要刷新(利用这次机会)
292
+ if self.insert_buffer.should_flush():
293
+ await self.insert_buffer.flush(self.db_manager)
294
+
295
+ except Exception as e:
296
+ logger.error(f"更新任务状态失败: {e}", exc_info=True)
297
+ # 出错也要 ACK
298
+ await ctx.ack()
299
+
300
+ async def start(self, concurrency: int = 4):
301
+ """启动 Consumer
155
302
 
156
- # 3. 创建SQLAlchemy异步引擎
157
- if self.pg_config.dsn.startswith('postgresql://'):
158
- dsn = self.pg_config.dsn.replace('postgresql://', 'postgresql+asyncpg://', 1)
159
- else:
160
- dsn = self.pg_config.dsn
303
+ Args:
304
+ concurrency: 并发数
305
+ """
306
+ logger.info(f"Starting PostgreSQL consumer (wildcard queue mode)")
307
+ logger.info(f"Namespace: {self.namespace_name} ({self.namespace_id or 'N/A'})")
161
308
 
162
- self.async_engine = create_async_engine(
309
+ # 1. 使用 connector.py 统一管理数据库连接
310
+ # 解析 PostgreSQL 配置为标准 DSN
311
+ dsn = DBConfig.parse_pg_config(self.pg_config)
312
+
313
+ # 使用全局单例引擎和会话工厂
314
+ self.async_engine, self.AsyncSessionLocal = get_pg_engine_and_factory(
163
315
  dsn,
164
316
  pool_size=50,
165
317
  max_overflow=20,
@@ -168,148 +320,77 @@ class PostgreSQLConsumer:
168
320
  echo=False
169
321
  )
170
322
 
171
- # 预热连接池
172
- logger.debug("Pre-warming database connection pool...")
173
- async with self.async_engine.begin() as conn:
174
- await conn.execute(text("SELECT 1"))
175
-
176
- # 创建异步会话工厂
177
- self.AsyncSessionLocal = sessionmaker(
178
- self.async_engine,
179
- class_=AsyncSession,
180
- expire_on_commit=False
181
- )
323
+ logger.debug(f"使用全局 PostgreSQL 连接池: {dsn[:50]}...")
182
324
 
183
- # 4. 初始化各个子模块
184
- # 任务持久化处理器
185
- self.task_persistence = TaskPersistence(
325
+ # 2. 初始化任务持久化管理器
326
+ self.db_manager = TaskPersistence(
186
327
  async_session_local=self.AsyncSessionLocal,
187
328
  namespace_id=self.namespace_id,
188
329
  namespace_name=self.namespace_name
189
330
  )
190
331
 
191
- # 队列发现器
192
- self.queue_discovery = QueueDiscovery(
193
- redis_client=self.redis_client,
194
- redis_prefix=self.prefix,
195
- consumer_group=self.consumer_group,
196
- consumer_manager=self.consumer_manager
197
- )
198
-
199
- # 先进行一次队列发现,确保ConsumerManager有正确的队列列表
200
- await self.queue_discovery.initial_queue_discovery()
201
-
202
- # 消息消费器
203
- self.message_consumer = MessageConsumer(
204
- redis_client=self.redis_client,
205
- redis_prefix=self.prefix,
206
- consumer_group=self.consumer_group,
207
- consumer_id=self.consumer_id,
208
- task_persistence=self.task_persistence,
209
- queue_discovery=self.queue_discovery
210
- )
211
-
212
- # 任务状态更新器
213
- self.task_updater = TaskUpdater(
214
- redis_client=self.redis_client,
215
- async_session_local=self.AsyncSessionLocal,
216
- redis_prefix=self.prefix,
217
- consumer_id=self.consumer_id
218
- )
219
-
220
- # 离线恢复处理器
221
- self.offline_recovery = OfflineRecoveryHandler(
222
- redis_client=self.redis_client,
223
- redis_prefix=self.prefix,
224
- consumer_id=self.consumer_id,
225
- task_updater=self.task_updater
226
- )
227
- # 延迟初始化(需要consumer_manager)
228
- self.offline_recovery.set_consumer_manager(self.consumer_manager)
229
-
230
- # 数据库维护
231
- self.database_maintenance = DatabaseMaintenance(
232
- async_session_local=self.AsyncSessionLocal
233
- )
234
-
235
- # 积压监控器
236
- self.backlog_monitor = BacklogMonitor(
237
- redis_client=self.redis_client,
238
- async_session_local=self.AsyncSessionLocal,
239
- redis_prefix=self.prefix,
240
- namespace_name=self.namespace_name,
241
- node_id=self.node_id,
242
- enable_monitor=self.enable_backlog_monitor,
243
- monitor_interval=self.backlog_monitor_interval
244
- )
245
-
246
- # 5. 启动所有子模块
332
+ # 3. 设置运行状态
247
333
  self._running = True
248
334
 
249
- # 启动队列发现
250
- await self.queue_discovery.start_discovery()
251
-
252
- # 启动消息消费
253
- await self.message_consumer.start()
254
-
255
- # 启动任务状态更新
256
- await self.task_updater.start()
257
-
258
- # 启动离线恢复
259
- await self.offline_recovery.start()
260
-
261
- # 启动数据库维护
262
- await self.database_maintenance.start()
263
-
264
- # 启动积压监控
265
- if self.enable_backlog_monitor:
266
- await self.backlog_monitor.start()
267
- logger.info(f"Stream backlog monitor enabled with {self.backlog_monitor_interval}s interval")
268
-
269
- # 如果使用HEARTBEAT策略,ConsumerManager会自动管理心跳
270
- if self.consumer_manager:
271
- logger.debug("Heartbeat is managed by ConsumerManager")
272
-
273
- logger.debug("PostgreSQL consumer started successfully")
335
+ # 注意:不在主进程启动定时刷新任务,因为缓冲区在子进程中
336
+ # 刷新逻辑已集成到任务处理函数中(每次处理任务时都会检查是否需要刷新)
337
+
338
+ # 启动 Worker(使用通配符队列)
339
+ logger.info("=" * 60)
340
+ logger.info(f"启动 PG Consumer (通配符队列模式)")
341
+ logger.info("=" * 60)
342
+ logger.info(f"命名空间: {self.namespace_name} ({self.namespace_id or 'N/A'})")
343
+ logger.info(f"监听队列: * (所有队列) + TASK_CHANGES (状态更新)")
344
+ logger.info(f"INSERT 批量: {self.insert_buffer.max_size} 条")
345
+ logger.info(f"UPDATE 批量: {self.update_buffer.max_size} 条")
346
+ logger.info(f"刷新间隔: {self.insert_buffer.max_delay} 秒")
347
+ logger.info(f"并发数: {concurrency}")
348
+ logger.info("=" * 60)
349
+
350
+ try:
351
+ # 启动 Worker
352
+ # 需要同时监听两个队列:
353
+ # 1. '*' - 通配符匹配所有常规任务队列(INSERT)
354
+ # 2. 'TASK_CHANGES' - 专门的状态更新队列(UPDATE)
355
+ await self.app.start(
356
+ queues=['*', 'TASK_CHANGES'], # 🎯 关键:监听所有队列 + 状态更新队列
357
+ concurrency=concurrency
358
+ )
359
+ finally:
360
+ await self.stop()
274
361
 
275
362
  async def stop(self):
276
- """停止消费者"""
277
- logger.debug("Stopping PostgreSQL consumer...")
363
+ """停止 Consumer"""
364
+ logger.info("停止 PG Consumer...")
278
365
  self._running = False
279
366
 
280
- # 停止所有子模块
281
- if self.backlog_monitor:
282
- await self.backlog_monitor.stop()
283
-
284
- if self.database_maintenance:
285
- await self.database_maintenance.stop()
286
-
287
- if self.offline_recovery:
288
- await self.offline_recovery.stop()
289
-
290
- if self.task_updater:
291
- await self.task_updater.stop()
292
-
293
- if self.message_consumer:
294
- await self.message_consumer.stop()
295
-
296
- if self.queue_discovery:
297
- await self.queue_discovery.stop_discovery()
298
-
299
- # 清理ConsumerManager
300
- if self.consumer_manager:
301
- try:
302
- self.consumer_manager.cleanup()
303
- logger.debug(f"Cleaned up ConsumerManager for consumer: {self.consumer_id}")
304
- except Exception as e:
305
- logger.error(f"Error cleaning up ConsumerManager: {e}")
306
- logger.error(traceback.format_exc())
307
-
308
- # 关闭连接
309
- if self.redis_client:
310
- await self.redis_client.close()
311
-
312
- if self.async_engine:
313
- await self.async_engine.dispose()
314
-
315
- logger.debug("PostgreSQL consumer stopped")
367
+ # 注意:定时刷新任务已移除,刷新逻辑集成在任务处理中
368
+
369
+ # 最后刷新一次缓冲区
370
+ try:
371
+ if self.insert_buffer.records:
372
+ await self.insert_buffer.flush(self.db_manager)
373
+ if self.update_buffer.records:
374
+ await self.update_buffer.flush(self.db_manager)
375
+ except Exception as e:
376
+ logger.error(f"最终刷新失败: {e}")
377
+
378
+ # 注意:不关闭数据库引擎,因为它是全局单例,由 connector.py 管理
379
+ # 多个 consumer 实例可能共享同一个引擎
380
+
381
+ # 打印统计信息
382
+ insert_stats = self.insert_buffer.get_stats()
383
+ update_stats = self.update_buffer.get_stats()
384
+
385
+ logger.info("=" * 60)
386
+ logger.info("PG Consumer 统计信息")
387
+ logger.info("=" * 60)
388
+ logger.info(f"INSERT: 总计 {insert_stats['total_flushed']} 条, "
389
+ f"刷新 {insert_stats['flush_count']} 次, "
390
+ f"平均 {insert_stats['avg_per_flush']} 条/次")
391
+ logger.info(f"UPDATE: 总计 {update_stats['total_flushed']} 条, "
392
+ f"刷新 {update_stats['flush_count']} 次, "
393
+ f"平均 {update_stats['avg_per_flush']} 条/次")
394
+ logger.info("=" * 60)
395
+
396
+ logger.info("PG Consumer 已停止")