jettask 0.2.20__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +4 -0
- jettask/cli.py +12 -8
- jettask/config/lua_scripts.py +37 -0
- jettask/config/nacos_config.py +1 -1
- jettask/core/app.py +313 -340
- jettask/core/container.py +4 -4
- jettask/{persistence → core}/namespace.py +93 -27
- jettask/core/task.py +16 -9
- jettask/core/unified_manager_base.py +136 -26
- jettask/db/__init__.py +67 -0
- jettask/db/base.py +137 -0
- jettask/{utils/db_connector.py → db/connector.py} +130 -26
- jettask/db/models/__init__.py +16 -0
- jettask/db/models/scheduled_task.py +196 -0
- jettask/db/models/task.py +77 -0
- jettask/db/models/task_run.py +85 -0
- jettask/executor/__init__.py +0 -15
- jettask/executor/core.py +76 -31
- jettask/executor/process_entry.py +29 -114
- jettask/executor/task_executor.py +4 -0
- jettask/messaging/event_pool.py +928 -685
- jettask/messaging/scanner.py +30 -0
- jettask/persistence/__init__.py +28 -103
- jettask/persistence/buffer.py +170 -0
- jettask/persistence/consumer.py +330 -249
- jettask/persistence/manager.py +304 -0
- jettask/persistence/persistence.py +391 -0
- jettask/scheduler/__init__.py +15 -3
- jettask/scheduler/{task_crud.py → database.py} +61 -57
- jettask/scheduler/loader.py +2 -2
- jettask/scheduler/{scheduler_coordinator.py → manager.py} +23 -6
- jettask/scheduler/models.py +14 -10
- jettask/scheduler/schedule.py +166 -0
- jettask/scheduler/scheduler.py +12 -11
- jettask/schemas/__init__.py +50 -1
- jettask/schemas/backlog.py +43 -6
- jettask/schemas/namespace.py +70 -19
- jettask/schemas/queue.py +19 -3
- jettask/schemas/responses.py +493 -0
- jettask/task/__init__.py +0 -2
- jettask/task/router.py +3 -0
- jettask/test_connection_monitor.py +1 -1
- jettask/utils/__init__.py +7 -5
- jettask/utils/db_init.py +8 -4
- jettask/utils/namespace_dep.py +167 -0
- jettask/utils/queue_matcher.py +186 -0
- jettask/utils/rate_limit/concurrency_limiter.py +7 -1
- jettask/utils/stream_backlog.py +1 -1
- jettask/webui/__init__.py +0 -1
- jettask/webui/api/__init__.py +4 -4
- jettask/webui/api/alerts.py +806 -71
- jettask/webui/api/example_refactored.py +400 -0
- jettask/webui/api/namespaces.py +390 -45
- jettask/webui/api/overview.py +300 -54
- jettask/webui/api/queues.py +971 -267
- jettask/webui/api/scheduled.py +1249 -56
- jettask/webui/api/settings.py +129 -7
- jettask/webui/api/workers.py +442 -0
- jettask/webui/app.py +46 -2329
- jettask/webui/middleware/__init__.py +6 -0
- jettask/webui/middleware/namespace_middleware.py +135 -0
- jettask/webui/services/__init__.py +146 -0
- jettask/webui/services/heartbeat_service.py +251 -0
- jettask/webui/services/overview_service.py +60 -51
- jettask/webui/services/queue_monitor_service.py +426 -0
- jettask/webui/services/redis_monitor_service.py +87 -0
- jettask/webui/services/settings_service.py +174 -111
- jettask/webui/services/task_monitor_service.py +222 -0
- jettask/webui/services/timeline_pg_service.py +452 -0
- jettask/webui/services/timeline_service.py +189 -0
- jettask/webui/services/worker_monitor_service.py +467 -0
- jettask/webui/utils/__init__.py +11 -0
- jettask/webui/utils/time_utils.py +122 -0
- jettask/worker/lifecycle.py +8 -2
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/METADATA +1 -1
- jettask-0.2.24.dist-info/RECORD +142 -0
- jettask/executor/executor.py +0 -338
- jettask/persistence/backlog_monitor.py +0 -567
- jettask/persistence/base.py +0 -2334
- jettask/persistence/db_manager.py +0 -516
- jettask/persistence/maintenance.py +0 -81
- jettask/persistence/message_consumer.py +0 -259
- jettask/persistence/models.py +0 -49
- jettask/persistence/offline_recovery.py +0 -196
- jettask/persistence/queue_discovery.py +0 -215
- jettask/persistence/task_persistence.py +0 -218
- jettask/persistence/task_updater.py +0 -583
- jettask/scheduler/add_execution_count.sql +0 -11
- jettask/scheduler/add_priority_field.sql +0 -26
- jettask/scheduler/add_scheduler_id.sql +0 -25
- jettask/scheduler/add_scheduler_id_index.sql +0 -10
- jettask/scheduler/make_scheduler_id_required.sql +0 -28
- jettask/scheduler/migrate_interval_seconds.sql +0 -9
- jettask/scheduler/performance_optimization.sql +0 -45
- jettask/scheduler/run_scheduler.py +0 -186
- jettask/scheduler/schema.sql +0 -84
- jettask/task/task_executor.py +0 -318
- jettask/webui/api/analytics.py +0 -323
- jettask/webui/config.py +0 -90
- jettask/webui/models/__init__.py +0 -3
- jettask/webui/models/namespace.py +0 -63
- jettask/webui/namespace_manager/__init__.py +0 -10
- jettask/webui/namespace_manager/multi.py +0 -593
- jettask/webui/namespace_manager/unified.py +0 -193
- jettask/webui/run.py +0 -46
- jettask-0.2.20.dist-info/RECORD +0 -145
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/WHEEL +0 -0
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/top_level.txt +0 -0
jettask/persistence/consumer.py
CHANGED
@@ -1,165 +1,317 @@
|
|
1
|
-
"""PostgreSQL
|
1
|
+
"""PostgreSQL Consumer - 基于通配符队列的新实现
|
2
2
|
|
3
|
-
|
3
|
+
完全替换旧的 consumer.py 实现,使用 Jettask 通配符队列功能。
|
4
4
|
"""
|
5
5
|
|
6
|
-
import
|
6
|
+
import time
|
7
7
|
import logging
|
8
|
-
import
|
9
|
-
|
10
|
-
import
|
11
|
-
from
|
12
|
-
|
13
|
-
|
14
|
-
from
|
15
|
-
from sqlalchemy.ext.asyncio import create_async_engine
|
16
|
-
from sqlalchemy.orm import sessionmaker
|
17
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
18
|
-
from sqlalchemy import text
|
19
|
-
|
20
|
-
from jettask.webui.config import PostgreSQLConfig, RedisConfig
|
21
|
-
from jettask.worker.manager import ConsumerManager
|
22
|
-
|
23
|
-
from .backlog_monitor import BacklogMonitor
|
24
|
-
from .task_updater import TaskUpdater
|
25
|
-
from .offline_recovery import OfflineRecoveryHandler
|
26
|
-
from .task_persistence import TaskPersistence
|
27
|
-
from .queue_discovery import QueueDiscovery
|
28
|
-
from .message_consumer import MessageConsumer
|
29
|
-
from .maintenance import DatabaseMaintenance
|
8
|
+
from datetime import datetime, timezone
|
9
|
+
|
10
|
+
from jettask import Jettask
|
11
|
+
from jettask.core.context import TaskContext
|
12
|
+
from jettask.db.connector import get_pg_engine_and_factory, DBConfig
|
13
|
+
from .buffer import BatchBuffer
|
14
|
+
from .persistence import TaskPersistence
|
30
15
|
|
31
16
|
logger = logging.getLogger(__name__)
|
32
17
|
|
33
18
|
|
34
19
|
class PostgreSQLConsumer:
|
35
|
-
"""PostgreSQL
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
- OfflineRecoveryHandler: 恢复离线worker的消息
|
43
|
-
- TaskPersistence: 解析并持久化任务数据
|
44
|
-
- QueueDiscovery: 发现和管理队列
|
45
|
-
- MessageConsumer: 消费队列消息
|
46
|
-
- DatabaseMaintenance: 数据库维护任务
|
20
|
+
"""PostgreSQL Consumer - 基于通配符队列
|
21
|
+
|
22
|
+
核心特性:
|
23
|
+
1. 使用 @app.task(queue='*') 监听所有队列
|
24
|
+
2. 使用 @app.task(queue='TASK_CHANGES') 处理状态更新
|
25
|
+
3. 批量 INSERT 和 UPDATE
|
26
|
+
4. 自动队列发现(Jettask 内置)
|
47
27
|
"""
|
48
28
|
|
49
29
|
def __init__(
|
50
30
|
self,
|
51
|
-
pg_config
|
52
|
-
redis_config
|
31
|
+
pg_config, # 可以是字典或配置对象
|
32
|
+
redis_config, # 可以是字典或配置对象
|
53
33
|
prefix: str = "jettask",
|
54
|
-
node_id: str = None,
|
55
|
-
# consumer_strategy 参数已移除,现在只使用 HEARTBEAT 策略
|
56
34
|
namespace_id: str = None,
|
57
35
|
namespace_name: str = None,
|
58
|
-
|
59
|
-
|
36
|
+
batch_size: int = 1000,
|
37
|
+
flush_interval: float = 5.0
|
60
38
|
):
|
61
|
-
"""初始化
|
39
|
+
"""初始化 PG Consumer
|
62
40
|
|
63
41
|
Args:
|
64
|
-
pg_config: PostgreSQL
|
65
|
-
redis_config: Redis
|
42
|
+
pg_config: PostgreSQL配置(字典或对象)
|
43
|
+
redis_config: Redis配置(字典或对象)
|
66
44
|
prefix: Redis键前缀
|
67
|
-
node_id: 节点ID
|
68
|
-
consumer_strategy: 消费者策略
|
45
|
+
node_id: 节点ID(兼容旧接口,不使用)
|
69
46
|
namespace_id: 命名空间ID
|
70
47
|
namespace_name: 命名空间名称
|
71
|
-
enable_backlog_monitor:
|
72
|
-
backlog_monitor_interval:
|
48
|
+
enable_backlog_monitor: 是否启用积压监控(兼容旧接口,不使用)
|
49
|
+
backlog_monitor_interval: 积压监控间隔(兼容旧接口,不使用)
|
50
|
+
batch_size: 批量大小
|
51
|
+
flush_interval: 刷新间隔(秒)
|
73
52
|
"""
|
74
53
|
self.pg_config = pg_config
|
75
54
|
self.redis_config = redis_config
|
76
|
-
self.
|
77
|
-
|
78
|
-
# 命名空间支持
|
55
|
+
self.redis_prefix = prefix
|
79
56
|
self.namespace_id = namespace_id
|
80
57
|
self.namespace_name = namespace_name or "default"
|
81
58
|
|
82
|
-
#
|
83
|
-
|
84
|
-
|
59
|
+
# 构建 Redis URL(兼容字典和对象两种格式)
|
60
|
+
if isinstance(redis_config, dict):
|
61
|
+
# 字典格式 - 优先使用 'url' 字段
|
62
|
+
redis_url = redis_config.get('url') or redis_config.get('redis_url')
|
63
|
+
if not redis_url:
|
64
|
+
# 从独立字段构建
|
65
|
+
password = redis_config.get('password', '')
|
66
|
+
host = redis_config.get('host', 'localhost')
|
67
|
+
port = redis_config.get('port', 6379)
|
68
|
+
db = redis_config.get('db', 0)
|
69
|
+
redis_url = f"redis://"
|
70
|
+
if password:
|
71
|
+
redis_url += f":{password}@"
|
72
|
+
redis_url += f"{host}:{port}/{db}"
|
73
|
+
else:
|
74
|
+
# 对象格式
|
75
|
+
redis_url = f"redis://"
|
76
|
+
if hasattr(redis_config, 'password') and redis_config.password:
|
77
|
+
redis_url += f":{redis_config.password}@"
|
78
|
+
redis_url += f"{redis_config.host}:{redis_config.port}/{redis_config.db}"
|
85
79
|
|
86
|
-
|
87
|
-
|
88
|
-
self.consumer_group = f"{prefix}_pg_consumer"
|
80
|
+
self.redis_url = redis_url
|
81
|
+
logger.debug(f"构建 Redis URL: {redis_url}")
|
89
82
|
|
90
|
-
#
|
91
|
-
self.redis_client: Optional[Redis] = None
|
83
|
+
# 数据库引擎和会话(将在 start 时初始化)
|
92
84
|
self.async_engine = None
|
93
85
|
self.AsyncSessionLocal = None
|
86
|
+
self.db_manager = None
|
87
|
+
|
88
|
+
# 创建 Jettask 应用
|
89
|
+
self.app = Jettask(
|
90
|
+
redis_url=redis_url,
|
91
|
+
redis_prefix=prefix
|
92
|
+
)
|
94
93
|
|
95
|
-
#
|
96
|
-
|
97
|
-
self.
|
94
|
+
# 创建两个独立的批量缓冲区
|
95
|
+
# 1. INSERT 缓冲区(用于新任务持久化)
|
96
|
+
self.insert_buffer = BatchBuffer(
|
97
|
+
max_size=batch_size,
|
98
|
+
max_delay=flush_interval,
|
99
|
+
operation_type='insert'
|
100
|
+
)
|
98
101
|
|
99
|
-
#
|
100
|
-
self.
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
self.message_consumer: Optional[MessageConsumer] = None
|
106
|
-
self.database_maintenance: Optional[DatabaseMaintenance] = None
|
102
|
+
# 2. UPDATE 缓冲区(用于任务状态更新)
|
103
|
+
self.update_buffer = BatchBuffer(
|
104
|
+
max_size=batch_size // 2, # 状态更新通常更频繁,用较小的批次
|
105
|
+
max_delay=flush_interval,
|
106
|
+
operation_type='update'
|
107
|
+
)
|
107
108
|
|
108
|
-
#
|
109
|
-
self.
|
110
|
-
self.backlog_monitor_interval = backlog_monitor_interval
|
109
|
+
# 注册任务
|
110
|
+
self._register_tasks()
|
111
111
|
|
112
|
+
# 运行控制
|
112
113
|
self._running = False
|
113
114
|
|
114
|
-
|
115
|
-
"""
|
116
|
-
|
115
|
+
def _register_tasks(self):
|
116
|
+
"""注册任务处理器"""
|
117
|
+
# 创建闭包函数来访问实例属性
|
118
|
+
consumer = self # 捕获 self 引用
|
117
119
|
|
118
|
-
|
119
|
-
|
120
|
+
@self.app.task(queue='*', auto_ack=False)
|
121
|
+
async def _handle_persist_task(ctx: TaskContext, **kwargs):
|
122
|
+
return await consumer._do_handle_persist_task(ctx, **kwargs)
|
120
123
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
redis_url += f"{self.redis_config.host}:{self.redis_config.port}/{self.redis_config.db}"
|
124
|
+
@self.app.task(queue='TASK_CHANGES', auto_ack=False)
|
125
|
+
async def _handle_status_update(ctx: TaskContext, **kwargs):
|
126
|
+
print(f'[PG Consumer] 处理状态更新: {ctx.event_id} {kwargs=}')
|
127
|
+
return await consumer._do_handle_status_update(ctx, **kwargs)
|
126
128
|
|
127
|
-
|
128
|
-
|
129
|
-
decode_responses=False # 保持二进制模式
|
130
|
-
)
|
129
|
+
async def _do_handle_persist_task(self, ctx: TaskContext, **kwargs):
|
130
|
+
"""处理任务持久化(INSERT)
|
131
131
|
|
132
|
-
|
133
|
-
|
134
|
-
redis_url=redis_url,
|
135
|
-
decode_responses=True
|
136
|
-
)
|
132
|
+
使用通配符 queue='*' 监听所有队列
|
133
|
+
Jettask 会自动发现新队列并开始消费
|
137
134
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
#
|
149
|
-
|
150
|
-
|
135
|
+
Args:
|
136
|
+
ctx: Jettask 自动注入的任务上下文(包含 queue, event_id 等)
|
137
|
+
**kwargs: 任务的原始数据字段
|
138
|
+
"""
|
139
|
+
# 跳过 TASK_CHANGES 队列(由另一个任务处理)
|
140
|
+
if ctx.queue == f'{self.redis_prefix}:QUEUE:TASK_CHANGES':
|
141
|
+
await ctx.ack()
|
142
|
+
return
|
143
|
+
|
144
|
+
try:
|
145
|
+
# 提取纯队列名(去掉 prefix:QUEUE: 前缀)
|
146
|
+
queue_name = ctx.queue.replace(f'{self.redis_prefix}:QUEUE:', '')
|
147
|
+
|
148
|
+
# 记录真实的队列名称(用于验证通配符队列功能)
|
149
|
+
logger.info(f"[持久化任务] 完整路径: {ctx.queue}, 队列名: {queue_name}, Stream ID: {ctx.event_id}")
|
150
|
+
|
151
|
+
# 构建任务记录
|
152
|
+
trigger_time = kwargs.get('trigger_time', time.time())
|
153
|
+
if isinstance(trigger_time, (str, bytes)):
|
154
|
+
trigger_time = float(trigger_time)
|
155
|
+
|
156
|
+
priority = kwargs.get('priority', 0)
|
157
|
+
if isinstance(priority, (str, bytes)):
|
158
|
+
priority = int(priority)
|
159
|
+
|
160
|
+
record = {
|
161
|
+
'stream_id': ctx.event_id,
|
162
|
+
'queue': ctx.queue.replace(f'{self.redis_prefix}:QUEUE:', ''),
|
163
|
+
'task_name': kwargs.get('task_name', 'unknown'),
|
164
|
+
'payload': kwargs.get('payload', {}),
|
165
|
+
'priority': priority,
|
166
|
+
'created_at': datetime.fromtimestamp(trigger_time, tz=timezone.utc),
|
167
|
+
'scheduled_task_id': kwargs.get('scheduled_task_id'),
|
168
|
+
'namespace': self.namespace_name,
|
169
|
+
'source': 'scheduler' if kwargs.get('scheduled_task_id') else 'redis_stream',
|
170
|
+
}
|
171
|
+
|
172
|
+
# 添加到缓冲区(不立即处理,不立即 ACK)
|
173
|
+
self.insert_buffer.add(record, ctx)
|
174
|
+
|
175
|
+
# 检查是否需要刷新(批量大小或超时)
|
176
|
+
if self.insert_buffer.should_flush():
|
177
|
+
await self.insert_buffer.flush(self.db_manager)
|
178
|
+
|
179
|
+
# 同时检查 UPDATE 缓冲区是否需要刷新(利用这次机会)
|
180
|
+
if self.update_buffer.should_flush():
|
181
|
+
await self.update_buffer.flush(self.db_manager)
|
182
|
+
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(f"持久化任务失败: {e}", exc_info=True)
|
185
|
+
# 出错也要 ACK,避免消息堆积
|
186
|
+
await ctx.ack()
|
187
|
+
|
188
|
+
async def _do_handle_status_update(self, ctx: TaskContext, **kwargs):
|
189
|
+
"""处理任务状态更新(UPDATE)
|
190
|
+
|
191
|
+
消费 TASK_CHANGES 队列,批量更新数据库中的任务状态
|
151
192
|
|
152
|
-
|
153
|
-
|
154
|
-
|
193
|
+
Args:
|
194
|
+
ctx: Jettask 自动注入的任务上下文
|
195
|
+
**kwargs: 任务的原始数据字段(包含 task_id)
|
196
|
+
"""
|
197
|
+
try:
|
198
|
+
# 从消息中获取 task_id
|
199
|
+
task_id = kwargs.get('task_id')
|
200
|
+
if not task_id:
|
201
|
+
logger.warning(f"TASK_CHANGES 消息缺少 task_id: {ctx.event_id}")
|
202
|
+
await ctx.ack()
|
203
|
+
return
|
204
|
+
|
205
|
+
# 从 Redis Hash 中读取完整的任务状态信息
|
206
|
+
# task_id 格式: test5:TASK:event_id:queue:task_name
|
207
|
+
# 我们需要查询 Redis Hash 获取状态信息
|
208
|
+
redis_client = ctx.app.async_binary_redis
|
209
|
+
# 查询任务状态 Hash
|
210
|
+
task_info = await redis_client.hgetall(task_id)
|
211
|
+
logger.info(f"task_id={task_id!r}")
|
212
|
+
logger.info(f"task_info={task_info!r}")
|
213
|
+
if not task_info:
|
214
|
+
logger.warning(f"无法找到任务状态信息: {task_id}")
|
215
|
+
await ctx.ack()
|
216
|
+
return
|
217
|
+
|
218
|
+
# 从 task_id 中提取 event_id (stream_id)
|
219
|
+
# task_id 格式: prefix:TASK:event_id:queue:task_name
|
220
|
+
parts = task_id.split(':')
|
221
|
+
if len(parts) >= 3:
|
222
|
+
event_id = parts[2] # 提取 event_id
|
223
|
+
else:
|
224
|
+
logger.error(f"无效的 task_id 格式: {task_id}")
|
225
|
+
await ctx.ack()
|
226
|
+
return
|
227
|
+
|
228
|
+
# 解析各个字段(binary redis 返回 bytes)
|
229
|
+
# 1. retries
|
230
|
+
retries = task_info.get(b'retries', 0)
|
231
|
+
if isinstance(retries, bytes):
|
232
|
+
retries = int(retries.decode('utf-8')) if retries else 0
|
233
|
+
elif isinstance(retries, str):
|
234
|
+
retries = int(retries) if retries else 0
|
235
|
+
|
236
|
+
# 2. started_at
|
237
|
+
started_at = task_info.get(b'started_at')
|
238
|
+
if started_at:
|
239
|
+
if isinstance(started_at, bytes):
|
240
|
+
started_at = float(started_at.decode('utf-8'))
|
241
|
+
elif isinstance(started_at, str):
|
242
|
+
started_at = float(started_at)
|
243
|
+
|
244
|
+
# 3. completed_at
|
245
|
+
completed_at = task_info.get(b'completed_at')
|
246
|
+
if completed_at:
|
247
|
+
if isinstance(completed_at, bytes):
|
248
|
+
completed_at = float(completed_at.decode('utf-8'))
|
249
|
+
elif isinstance(completed_at, str):
|
250
|
+
completed_at = float(completed_at)
|
251
|
+
|
252
|
+
# 4. consumer
|
253
|
+
consumer = task_info.get(b'consumer')
|
254
|
+
if consumer:
|
255
|
+
if isinstance(consumer, bytes):
|
256
|
+
consumer = consumer.decode('utf-8')
|
257
|
+
|
258
|
+
# 5. status
|
259
|
+
status = task_info.get(b'status')
|
260
|
+
if status:
|
261
|
+
if isinstance(status, bytes):
|
262
|
+
status = status.decode('utf-8')
|
263
|
+
|
264
|
+
# 6. result (保持原始 bytes,在 persistence.py 中解析)
|
265
|
+
result = task_info.get(b'result')
|
266
|
+
|
267
|
+
# 7. error/exception
|
268
|
+
error = task_info.get(b'exception') or task_info.get(b'error')
|
269
|
+
|
270
|
+
update_record = {
|
271
|
+
'stream_id': event_id,
|
272
|
+
'status': status,
|
273
|
+
'result': result, # bytes 格式,稍后解析
|
274
|
+
'error': error,
|
275
|
+
'started_at': started_at,
|
276
|
+
'completed_at': completed_at,
|
277
|
+
'retries': retries,
|
278
|
+
'consumer': consumer,
|
279
|
+
}
|
280
|
+
|
281
|
+
logger.info(f"update_record={update_record!r}")
|
282
|
+
|
283
|
+
print(f'{update_record=}')
|
284
|
+
# 添加到状态更新缓冲区
|
285
|
+
self.update_buffer.add(update_record, ctx)
|
286
|
+
|
287
|
+
# 检查是否需要刷新(批量大小或超时)
|
288
|
+
if self.update_buffer.should_flush():
|
289
|
+
await self.update_buffer.flush(self.db_manager)
|
290
|
+
|
291
|
+
# 同时检查 INSERT 缓冲区是否需要刷新(利用这次机会)
|
292
|
+
if self.insert_buffer.should_flush():
|
293
|
+
await self.insert_buffer.flush(self.db_manager)
|
294
|
+
|
295
|
+
except Exception as e:
|
296
|
+
logger.error(f"更新任务状态失败: {e}", exc_info=True)
|
297
|
+
# 出错也要 ACK
|
298
|
+
await ctx.ack()
|
299
|
+
|
300
|
+
async def start(self, concurrency: int = 4):
|
301
|
+
"""启动 Consumer
|
155
302
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
303
|
+
Args:
|
304
|
+
concurrency: 并发数
|
305
|
+
"""
|
306
|
+
logger.info(f"Starting PostgreSQL consumer (wildcard queue mode)")
|
307
|
+
logger.info(f"Namespace: {self.namespace_name} ({self.namespace_id or 'N/A'})")
|
161
308
|
|
162
|
-
|
309
|
+
# 1. 使用 connector.py 统一管理数据库连接
|
310
|
+
# 解析 PostgreSQL 配置为标准 DSN
|
311
|
+
dsn = DBConfig.parse_pg_config(self.pg_config)
|
312
|
+
|
313
|
+
# 使用全局单例引擎和会话工厂
|
314
|
+
self.async_engine, self.AsyncSessionLocal = get_pg_engine_and_factory(
|
163
315
|
dsn,
|
164
316
|
pool_size=50,
|
165
317
|
max_overflow=20,
|
@@ -168,148 +320,77 @@ class PostgreSQLConsumer:
|
|
168
320
|
echo=False
|
169
321
|
)
|
170
322
|
|
171
|
-
|
172
|
-
logger.debug("Pre-warming database connection pool...")
|
173
|
-
async with self.async_engine.begin() as conn:
|
174
|
-
await conn.execute(text("SELECT 1"))
|
175
|
-
|
176
|
-
# 创建异步会话工厂
|
177
|
-
self.AsyncSessionLocal = sessionmaker(
|
178
|
-
self.async_engine,
|
179
|
-
class_=AsyncSession,
|
180
|
-
expire_on_commit=False
|
181
|
-
)
|
323
|
+
logger.debug(f"使用全局 PostgreSQL 连接池: {dsn[:50]}...")
|
182
324
|
|
183
|
-
#
|
184
|
-
|
185
|
-
self.task_persistence = TaskPersistence(
|
325
|
+
# 2. 初始化任务持久化管理器
|
326
|
+
self.db_manager = TaskPersistence(
|
186
327
|
async_session_local=self.AsyncSessionLocal,
|
187
328
|
namespace_id=self.namespace_id,
|
188
329
|
namespace_name=self.namespace_name
|
189
330
|
)
|
190
331
|
|
191
|
-
#
|
192
|
-
self.queue_discovery = QueueDiscovery(
|
193
|
-
redis_client=self.redis_client,
|
194
|
-
redis_prefix=self.prefix,
|
195
|
-
consumer_group=self.consumer_group,
|
196
|
-
consumer_manager=self.consumer_manager
|
197
|
-
)
|
198
|
-
|
199
|
-
# 先进行一次队列发现,确保ConsumerManager有正确的队列列表
|
200
|
-
await self.queue_discovery.initial_queue_discovery()
|
201
|
-
|
202
|
-
# 消息消费器
|
203
|
-
self.message_consumer = MessageConsumer(
|
204
|
-
redis_client=self.redis_client,
|
205
|
-
redis_prefix=self.prefix,
|
206
|
-
consumer_group=self.consumer_group,
|
207
|
-
consumer_id=self.consumer_id,
|
208
|
-
task_persistence=self.task_persistence,
|
209
|
-
queue_discovery=self.queue_discovery
|
210
|
-
)
|
211
|
-
|
212
|
-
# 任务状态更新器
|
213
|
-
self.task_updater = TaskUpdater(
|
214
|
-
redis_client=self.redis_client,
|
215
|
-
async_session_local=self.AsyncSessionLocal,
|
216
|
-
redis_prefix=self.prefix,
|
217
|
-
consumer_id=self.consumer_id
|
218
|
-
)
|
219
|
-
|
220
|
-
# 离线恢复处理器
|
221
|
-
self.offline_recovery = OfflineRecoveryHandler(
|
222
|
-
redis_client=self.redis_client,
|
223
|
-
redis_prefix=self.prefix,
|
224
|
-
consumer_id=self.consumer_id,
|
225
|
-
task_updater=self.task_updater
|
226
|
-
)
|
227
|
-
# 延迟初始化(需要consumer_manager)
|
228
|
-
self.offline_recovery.set_consumer_manager(self.consumer_manager)
|
229
|
-
|
230
|
-
# 数据库维护
|
231
|
-
self.database_maintenance = DatabaseMaintenance(
|
232
|
-
async_session_local=self.AsyncSessionLocal
|
233
|
-
)
|
234
|
-
|
235
|
-
# 积压监控器
|
236
|
-
self.backlog_monitor = BacklogMonitor(
|
237
|
-
redis_client=self.redis_client,
|
238
|
-
async_session_local=self.AsyncSessionLocal,
|
239
|
-
redis_prefix=self.prefix,
|
240
|
-
namespace_name=self.namespace_name,
|
241
|
-
node_id=self.node_id,
|
242
|
-
enable_monitor=self.enable_backlog_monitor,
|
243
|
-
monitor_interval=self.backlog_monitor_interval
|
244
|
-
)
|
245
|
-
|
246
|
-
# 5. 启动所有子模块
|
332
|
+
# 3. 设置运行状态
|
247
333
|
self._running = True
|
248
334
|
|
249
|
-
#
|
250
|
-
|
251
|
-
|
252
|
-
#
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
335
|
+
# 注意:不在主进程启动定时刷新任务,因为缓冲区在子进程中
|
336
|
+
# 刷新逻辑已集成到任务处理函数中(每次处理任务时都会检查是否需要刷新)
|
337
|
+
|
338
|
+
# 启动 Worker(使用通配符队列)
|
339
|
+
logger.info("=" * 60)
|
340
|
+
logger.info(f"启动 PG Consumer (通配符队列模式)")
|
341
|
+
logger.info("=" * 60)
|
342
|
+
logger.info(f"命名空间: {self.namespace_name} ({self.namespace_id or 'N/A'})")
|
343
|
+
logger.info(f"监听队列: * (所有队列) + TASK_CHANGES (状态更新)")
|
344
|
+
logger.info(f"INSERT 批量: {self.insert_buffer.max_size} 条")
|
345
|
+
logger.info(f"UPDATE 批量: {self.update_buffer.max_size} 条")
|
346
|
+
logger.info(f"刷新间隔: {self.insert_buffer.max_delay} 秒")
|
347
|
+
logger.info(f"并发数: {concurrency}")
|
348
|
+
logger.info("=" * 60)
|
349
|
+
|
350
|
+
try:
|
351
|
+
# 启动 Worker
|
352
|
+
# 需要同时监听两个队列:
|
353
|
+
# 1. '*' - 通配符匹配所有常规任务队列(INSERT)
|
354
|
+
# 2. 'TASK_CHANGES' - 专门的状态更新队列(UPDATE)
|
355
|
+
await self.app.start(
|
356
|
+
queues=['*', 'TASK_CHANGES'], # 🎯 关键:监听所有队列 + 状态更新队列
|
357
|
+
concurrency=concurrency
|
358
|
+
)
|
359
|
+
finally:
|
360
|
+
await self.stop()
|
274
361
|
|
275
362
|
async def stop(self):
|
276
|
-
"""
|
277
|
-
logger.
|
363
|
+
"""停止 Consumer"""
|
364
|
+
logger.info("停止 PG Consumer...")
|
278
365
|
self._running = False
|
279
366
|
|
280
|
-
#
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
await self.redis_client.close()
|
311
|
-
|
312
|
-
if self.async_engine:
|
313
|
-
await self.async_engine.dispose()
|
314
|
-
|
315
|
-
logger.debug("PostgreSQL consumer stopped")
|
367
|
+
# 注意:定时刷新任务已移除,刷新逻辑集成在任务处理中
|
368
|
+
|
369
|
+
# 最后刷新一次缓冲区
|
370
|
+
try:
|
371
|
+
if self.insert_buffer.records:
|
372
|
+
await self.insert_buffer.flush(self.db_manager)
|
373
|
+
if self.update_buffer.records:
|
374
|
+
await self.update_buffer.flush(self.db_manager)
|
375
|
+
except Exception as e:
|
376
|
+
logger.error(f"最终刷新失败: {e}")
|
377
|
+
|
378
|
+
# 注意:不关闭数据库引擎,因为它是全局单例,由 connector.py 管理
|
379
|
+
# 多个 consumer 实例可能共享同一个引擎
|
380
|
+
|
381
|
+
# 打印统计信息
|
382
|
+
insert_stats = self.insert_buffer.get_stats()
|
383
|
+
update_stats = self.update_buffer.get_stats()
|
384
|
+
|
385
|
+
logger.info("=" * 60)
|
386
|
+
logger.info("PG Consumer 统计信息")
|
387
|
+
logger.info("=" * 60)
|
388
|
+
logger.info(f"INSERT: 总计 {insert_stats['total_flushed']} 条, "
|
389
|
+
f"刷新 {insert_stats['flush_count']} 次, "
|
390
|
+
f"平均 {insert_stats['avg_per_flush']} 条/次")
|
391
|
+
logger.info(f"UPDATE: 总计 {update_stats['total_flushed']} 条, "
|
392
|
+
f"刷新 {update_stats['flush_count']} 次, "
|
393
|
+
f"平均 {update_stats['avg_per_flush']} 条/次")
|
394
|
+
logger.info("=" * 60)
|
395
|
+
|
396
|
+
logger.info("PG Consumer 已停止")
|