jettask 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +10 -3
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
- jettask-0.2.20.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.19.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,583 @@
|
|
1
|
+
"""任务状态更新模块
|
2
|
+
|
3
|
+
负责从TASK_CHANGES流中消费任务变更事件,并更新数据库中的任务状态。
|
4
|
+
"""
|
5
|
+
|
6
|
+
import asyncio
|
7
|
+
import json
|
8
|
+
import logging
|
9
|
+
import traceback
|
10
|
+
from typing import Dict, List, Optional, Any, Set
|
11
|
+
from datetime import datetime, timezone
|
12
|
+
|
13
|
+
import redis.asyncio as redis
|
14
|
+
from redis.asyncio import Redis
|
15
|
+
from sqlalchemy import text
|
16
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
17
|
+
from sqlalchemy.orm import sessionmaker
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
class TaskUpdater:
|
23
|
+
"""任务状态更新器
|
24
|
+
|
25
|
+
职责:
|
26
|
+
- 消费TASK_CHANGES流中的任务变更事件
|
27
|
+
- 解析任务状态更新信息
|
28
|
+
- 批量更新数据库中的任务状态
|
29
|
+
- 支持pending消息恢复
|
30
|
+
"""
|
31
|
+
|
32
|
+
def __init__(
|
33
|
+
self,
|
34
|
+
redis_client: Redis,
|
35
|
+
async_session_local: sessionmaker,
|
36
|
+
redis_prefix: str,
|
37
|
+
consumer_id: str
|
38
|
+
):
|
39
|
+
"""初始化任务状态更新器
|
40
|
+
|
41
|
+
Args:
|
42
|
+
redis_client: Redis异步客户端
|
43
|
+
async_session_local: SQLAlchemy会话工厂
|
44
|
+
redis_prefix: Redis键前缀
|
45
|
+
consumer_id: 消费者ID
|
46
|
+
"""
|
47
|
+
self.redis_client = redis_client
|
48
|
+
self.AsyncSessionLocal = async_session_local
|
49
|
+
self.redis_prefix = redis_prefix
|
50
|
+
self.consumer_id = consumer_id
|
51
|
+
|
52
|
+
# Stream配置
|
53
|
+
self.change_stream_key = f"{redis_prefix}:TASK_CHANGES"
|
54
|
+
self.consumer_group = f"{redis_prefix}_changes_consumer"
|
55
|
+
|
56
|
+
# 待重试的任务更新
|
57
|
+
self._pending_updates = {}
|
58
|
+
self._pending_updates_lock = asyncio.Lock()
|
59
|
+
self._max_pending_updates = 10000
|
60
|
+
self._retry_interval = 5 # 每5秒重试一次
|
61
|
+
|
62
|
+
self._running = False
|
63
|
+
self._consume_task = None
|
64
|
+
self._retry_task = None
|
65
|
+
|
66
|
+
async def start(self):
|
67
|
+
"""启动更新器"""
|
68
|
+
# 创建消费者组
|
69
|
+
try:
|
70
|
+
await self.redis_client.xgroup_create(
|
71
|
+
self.change_stream_key, self.consumer_group, id='0', mkstream=True
|
72
|
+
)
|
73
|
+
logger.debug(f"Created consumer group for task changes stream")
|
74
|
+
except redis.ResponseError:
|
75
|
+
pass
|
76
|
+
|
77
|
+
self._running = True
|
78
|
+
self._consume_task = asyncio.create_task(self._consume_task_changes())
|
79
|
+
self._retry_task = asyncio.create_task(self._retry_pending_updates())
|
80
|
+
logger.debug("TaskUpdater started")
|
81
|
+
|
82
|
+
async def stop(self):
|
83
|
+
"""停止更新器"""
|
84
|
+
self._running = False
|
85
|
+
|
86
|
+
if self._consume_task:
|
87
|
+
self._consume_task.cancel()
|
88
|
+
try:
|
89
|
+
await self._consume_task
|
90
|
+
except asyncio.CancelledError:
|
91
|
+
pass
|
92
|
+
|
93
|
+
if self._retry_task:
|
94
|
+
self._retry_task.cancel()
|
95
|
+
try:
|
96
|
+
await self._retry_task
|
97
|
+
except asyncio.CancelledError:
|
98
|
+
pass
|
99
|
+
|
100
|
+
logger.debug("TaskUpdater stopped")
|
101
|
+
|
102
|
+
async def _consume_task_changes(self):
|
103
|
+
"""消费任务变更事件流 - 基于事件驱动的更新(支持pending消息恢复)"""
|
104
|
+
# 模仿 listen_event_by_task 的写法:先处理pending消息,再处理新消息
|
105
|
+
check_backlog = True
|
106
|
+
lastid = "0-0"
|
107
|
+
batch_size = 1000
|
108
|
+
|
109
|
+
while self._running:
|
110
|
+
try:
|
111
|
+
# 决定读取位置:如果有backlog,从lastid开始;否则读取新消息
|
112
|
+
if check_backlog:
|
113
|
+
myid = lastid
|
114
|
+
else:
|
115
|
+
myid = ">"
|
116
|
+
|
117
|
+
messages = await self.redis_client.xreadgroup(
|
118
|
+
self.consumer_group,
|
119
|
+
self.consumer_id,
|
120
|
+
{self.change_stream_key: myid},
|
121
|
+
count=batch_size,
|
122
|
+
block=1000 if not check_backlog else 0 # backlog时不阻塞
|
123
|
+
)
|
124
|
+
|
125
|
+
if not messages:
|
126
|
+
check_backlog = False
|
127
|
+
continue
|
128
|
+
|
129
|
+
# 检查是否还有更多backlog消息
|
130
|
+
if messages and len(messages[0][1]) > 0:
|
131
|
+
check_backlog = len(messages[0][1]) >= batch_size
|
132
|
+
else:
|
133
|
+
check_backlog = False
|
134
|
+
|
135
|
+
# 收集消息ID和对应的task_id
|
136
|
+
msg_to_task = {} # msg_id -> (stream_id, task_key) 映射
|
137
|
+
|
138
|
+
for _, stream_messages in messages:
|
139
|
+
for msg_id, data in stream_messages:
|
140
|
+
try:
|
141
|
+
# 更新lastid(无论消息是否处理成功)
|
142
|
+
if isinstance(msg_id, bytes):
|
143
|
+
lastid = msg_id.decode('utf-8')
|
144
|
+
else:
|
145
|
+
lastid = str(msg_id)
|
146
|
+
|
147
|
+
task_key = data[b'id']
|
148
|
+
task_key = task_key.decode('utf-8') if isinstance(task_key, bytes) else str(task_key)
|
149
|
+
|
150
|
+
# 从完整的task_key格式提取stream_id
|
151
|
+
# 格式: namespace:TASK:stream_id:queue_name
|
152
|
+
stream_id = None
|
153
|
+
if ':TASK:' in task_key:
|
154
|
+
parts = task_key.split(':TASK:')
|
155
|
+
if len(parts) == 2:
|
156
|
+
# 再从右边部分提取stream_id
|
157
|
+
right_parts = parts[1].split(':')
|
158
|
+
if right_parts:
|
159
|
+
stream_id = right_parts[0] # 提取stream_id
|
160
|
+
|
161
|
+
if stream_id:
|
162
|
+
# 存储元组: (stream_id, task_key)
|
163
|
+
msg_to_task[msg_id] = (stream_id, task_key)
|
164
|
+
else:
|
165
|
+
logger.warning(f"Cannot extract stream_id from task_key: {task_key}")
|
166
|
+
except Exception as e:
|
167
|
+
logger.error(f"Error processing change event {msg_id}: {e} {data=}")
|
168
|
+
logger.error(traceback.format_exc())
|
169
|
+
# 解析失败的消息也应该ACK,避免一直重试
|
170
|
+
await self.redis_client.xack(self.change_stream_key, self.consumer_group, msg_id)
|
171
|
+
|
172
|
+
if msg_to_task:
|
173
|
+
# 批量更新任务,返回成功更新的task_id列表
|
174
|
+
# msg_to_task 的值现在是元组 (stream_id, task_key)
|
175
|
+
id_tuples = list(set(msg_to_task.values()))
|
176
|
+
logger.info(f"Processing {len(id_tuples)} task updates from change stream")
|
177
|
+
successful_tuples = await self._update_tasks_by_event(id_tuples)
|
178
|
+
|
179
|
+
# 只ACK成功更新的消息
|
180
|
+
ack_ids = []
|
181
|
+
failed_count = 0
|
182
|
+
for msg_id, id_tuple in msg_to_task.items():
|
183
|
+
if successful_tuples and id_tuple in successful_tuples:
|
184
|
+
ack_ids.append(msg_id)
|
185
|
+
else:
|
186
|
+
failed_count += 1
|
187
|
+
|
188
|
+
if ack_ids:
|
189
|
+
await self.redis_client.xack(self.change_stream_key, self.consumer_group, *ack_ids)
|
190
|
+
if len(ack_ids) > 0:
|
191
|
+
logger.info(f"Updated {len(ack_ids)} task statuses")
|
192
|
+
|
193
|
+
if failed_count > 0:
|
194
|
+
logger.debug(f"Failed to update {failed_count} tasks, will retry")
|
195
|
+
|
196
|
+
except redis.ResponseError as e:
|
197
|
+
if "NOGROUP" in str(e):
|
198
|
+
# 如果消费者组不存在,重新创建
|
199
|
+
try:
|
200
|
+
await self.redis_client.xgroup_create(
|
201
|
+
self.change_stream_key, self.consumer_group, id='0', mkstream=True
|
202
|
+
)
|
203
|
+
logger.debug(f"Recreated consumer group for task changes stream")
|
204
|
+
check_backlog = True
|
205
|
+
lastid = "0-0"
|
206
|
+
except:
|
207
|
+
pass
|
208
|
+
else:
|
209
|
+
logger.error(f"Redis error in consume_task_changes: {e}")
|
210
|
+
logger.error(traceback.format_exc())
|
211
|
+
await asyncio.sleep(1)
|
212
|
+
except Exception as e:
|
213
|
+
logger.error(f"Error in consume_task_changes: {e}", exc_info=True)
|
214
|
+
await asyncio.sleep(1)
|
215
|
+
|
216
|
+
async def _update_tasks_by_event(self, id_tuples: List[tuple]) -> Set[tuple]:
|
217
|
+
"""基于事件ID批量更新任务状态
|
218
|
+
|
219
|
+
Args:
|
220
|
+
id_tuples: 元组列表,每个元组为 (stream_id, task_key)
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
成功更新的元组集合
|
224
|
+
"""
|
225
|
+
if not id_tuples:
|
226
|
+
return set()
|
227
|
+
|
228
|
+
successful_tuples = set()
|
229
|
+
|
230
|
+
try:
|
231
|
+
pipeline = self.redis_client.pipeline()
|
232
|
+
for stream_id, task_key in id_tuples:
|
233
|
+
pipeline.hgetall(task_key)
|
234
|
+
|
235
|
+
redis_values = await pipeline.execute()
|
236
|
+
updates = []
|
237
|
+
valid_tuples = [] # 记录有效的元组
|
238
|
+
|
239
|
+
if len(id_tuples) != len(redis_values):
|
240
|
+
logger.error(f'Mismatch: {len(id_tuples)=} {len(redis_values)=}')
|
241
|
+
# 不抛出异常,继续处理能处理的
|
242
|
+
|
243
|
+
for i, (stream_id, task_key) in enumerate(id_tuples):
|
244
|
+
if i >= len(redis_values):
|
245
|
+
logger.error(f'Missing redis value for task_key={task_key}')
|
246
|
+
continue
|
247
|
+
|
248
|
+
hash_data = redis_values[i]
|
249
|
+
|
250
|
+
if not hash_data:
|
251
|
+
logger.debug(f'No hash data for task_key={task_key}')
|
252
|
+
continue
|
253
|
+
|
254
|
+
try:
|
255
|
+
# 从task_key解析出consumer_group
|
256
|
+
# task_key格式: namespace:TASK:stream_id:group_name
|
257
|
+
# 其中group_name就是完整的consumer_group(格式: jettask:QUEUE:queue_name:task_name)
|
258
|
+
parts = task_key.split(':', 3) # 最多分割成4部分
|
259
|
+
if len(parts) == 4:
|
260
|
+
# parts[0] = namespace (如 'default')
|
261
|
+
# parts[1] = 'TASK'
|
262
|
+
# parts[2] = stream_id
|
263
|
+
# parts[3] = group_name (consumer_group)
|
264
|
+
consumer_group = parts[3] # 直接使用group_name作为consumer_group
|
265
|
+
logger.debug(f"Extracted consumer_group from task_key: {consumer_group}")
|
266
|
+
else:
|
267
|
+
logger.warning(f"Cannot parse consumer_group from task_key: {task_key}")
|
268
|
+
continue
|
269
|
+
|
270
|
+
# 从consumer_group中提取task_name
|
271
|
+
# consumer_group格式: prefix:QUEUE:queue:task_name (如 jettask:QUEUE:robust_bench2:robust_benchmark.benchmark_task)
|
272
|
+
task_name = None
|
273
|
+
if consumer_group:
|
274
|
+
parts = consumer_group.split(':')
|
275
|
+
if len(parts) >= 4:
|
276
|
+
# 最后一部分是task_name
|
277
|
+
task_name = parts[-1]
|
278
|
+
logger.debug(f"Extracted task_name '{task_name}' from consumer_group '{consumer_group}'")
|
279
|
+
|
280
|
+
# 使用stream_id作为任务ID
|
281
|
+
update_info = self._parse_task_hash(stream_id, hash_data)
|
282
|
+
if update_info:
|
283
|
+
# 添加consumer_group和task_name到更新信息中
|
284
|
+
update_info['consumer_group'] = consumer_group
|
285
|
+
update_info['task_name'] = task_name or 'unknown' # 如果无法提取task_name,使用'unknown'
|
286
|
+
# consumer_name就是worker_id(执行任务的实际worker)
|
287
|
+
update_info['consumer_name'] = update_info.get('worker_id')
|
288
|
+
updates.append(update_info)
|
289
|
+
valid_tuples.append((stream_id, task_key))
|
290
|
+
else:
|
291
|
+
logger.debug(f'Failed to parse stream_id={stream_id} hash_data={hash_data}')
|
292
|
+
except Exception as e:
|
293
|
+
logger.error(f'Error parsing task stream_id={stream_id}: {e}')
|
294
|
+
continue
|
295
|
+
|
296
|
+
if updates:
|
297
|
+
logger.info(f"Attempting to update {len(updates)} tasks, first few: {[u['id'] for u in updates[:3]]}")
|
298
|
+
try:
|
299
|
+
# _update_tasks 现在返回成功更新的ID集合
|
300
|
+
batch_successful = await self._update_tasks(updates)
|
301
|
+
# 将成功的stream_id映射回元组
|
302
|
+
for stream_id in batch_successful:
|
303
|
+
for tuple_item in valid_tuples:
|
304
|
+
if tuple_item[0] == stream_id: # stream_id匹配
|
305
|
+
successful_tuples.add(tuple_item)
|
306
|
+
if batch_successful:
|
307
|
+
logger.info(f"Successfully updated {len(batch_successful)} tasks from change events")
|
308
|
+
else:
|
309
|
+
logger.warning(f"No tasks were successfully updated")
|
310
|
+
except Exception as e:
|
311
|
+
logger.error(f"Error in batch update: {e}")
|
312
|
+
# 批量更新失败,尝试逐个更新
|
313
|
+
for update, tuple_item in zip(updates, valid_tuples):
|
314
|
+
try:
|
315
|
+
single_successful = await self._update_tasks([update])
|
316
|
+
if update['id'] in single_successful:
|
317
|
+
successful_tuples.add(tuple_item)
|
318
|
+
except Exception as single_error:
|
319
|
+
logger.error(f"Failed to update task {tuple_item[0]}: {single_error}")
|
320
|
+
|
321
|
+
except Exception as e:
|
322
|
+
logger.error(f"Error updating tasks by event: {e}", exc_info=True)
|
323
|
+
|
324
|
+
logger.debug(f'{successful_tuples=}')
|
325
|
+
return successful_tuples
|
326
|
+
|
327
|
+
def _parse_task_hash(self, task_id: str, hash_data: dict) -> Optional[dict]:
|
328
|
+
"""解析Redis Hash数据"""
|
329
|
+
update_info = {
|
330
|
+
'id': task_id,
|
331
|
+
'status': None,
|
332
|
+
'result': None,
|
333
|
+
'error_message': None,
|
334
|
+
'started_at': None,
|
335
|
+
'completed_at': None,
|
336
|
+
'worker_id': None,
|
337
|
+
'execution_time': None,
|
338
|
+
'duration': None
|
339
|
+
}
|
340
|
+
|
341
|
+
try:
|
342
|
+
from jettask.utils.serializer import loads_str
|
343
|
+
|
344
|
+
hash_dict = {}
|
345
|
+
for k, v in hash_data.items():
|
346
|
+
key = k.decode('utf-8') if isinstance(k, bytes) else k
|
347
|
+
if isinstance(v, bytes):
|
348
|
+
try:
|
349
|
+
value = loads_str(v)
|
350
|
+
if isinstance(value, (dict, list)):
|
351
|
+
value = json.dumps(value, ensure_ascii=False)
|
352
|
+
else:
|
353
|
+
value = str(value)
|
354
|
+
except:
|
355
|
+
try:
|
356
|
+
value = v.decode('utf-8')
|
357
|
+
except:
|
358
|
+
value = str(v)
|
359
|
+
else:
|
360
|
+
value = v
|
361
|
+
hash_dict[key] = value
|
362
|
+
|
363
|
+
update_info['status'] = hash_dict.get('status')
|
364
|
+
update_info['error_message'] = hash_dict.get('error_msg') or hash_dict.get('exception')
|
365
|
+
|
366
|
+
# 转换时间戳
|
367
|
+
for time_field in ['started_at', 'completed_at']:
|
368
|
+
if hash_dict.get(time_field):
|
369
|
+
try:
|
370
|
+
time_str = hash_dict[time_field]
|
371
|
+
if isinstance(time_str, str) and time_str.startswith("b'") and time_str.endswith("'"):
|
372
|
+
time_str = time_str[2:-1]
|
373
|
+
update_info[time_field] = datetime.fromtimestamp(float(time_str), tz=timezone.utc)
|
374
|
+
except:
|
375
|
+
pass
|
376
|
+
|
377
|
+
update_info['worker_id'] = hash_dict.get('consumer') or hash_dict.get('worker_id')
|
378
|
+
|
379
|
+
# 转换数值 - 直接存储原始秒数值
|
380
|
+
for num_field in ['execution_time', 'duration']:
|
381
|
+
if hash_dict.get(num_field):
|
382
|
+
try:
|
383
|
+
num_str = hash_dict[num_field]
|
384
|
+
# 直接存储浮点数秒值
|
385
|
+
update_info[num_field] = float(num_str)
|
386
|
+
except:
|
387
|
+
pass
|
388
|
+
|
389
|
+
# 处理result
|
390
|
+
if 'result' in hash_dict:
|
391
|
+
result_str = hash_dict['result']
|
392
|
+
if result_str == 'null':
|
393
|
+
update_info['result'] = None
|
394
|
+
else:
|
395
|
+
update_info['result'] = result_str
|
396
|
+
|
397
|
+
# 只返回有数据的更新
|
398
|
+
if any(v is not None for k, v in update_info.items() if k != 'id'):
|
399
|
+
return update_info
|
400
|
+
|
401
|
+
except Exception as e:
|
402
|
+
logger.error(f"Failed to parse hash data for task {task_id}: {e}")
|
403
|
+
|
404
|
+
return None
|
405
|
+
|
406
|
+
async def _update_tasks(self, updates: List[Dict[str, Any]]) -> Set[str]:
|
407
|
+
"""批量更新任务状态(使用UPSERT逻辑处理task_runs表)
|
408
|
+
|
409
|
+
Returns:
|
410
|
+
成功更新的stream_id集合
|
411
|
+
"""
|
412
|
+
if not updates:
|
413
|
+
return set()
|
414
|
+
|
415
|
+
try:
|
416
|
+
async with self.AsyncSessionLocal() as session:
|
417
|
+
# V3结构:使用UPSERT逻辑处理task_runs表
|
418
|
+
stream_ids = [u['id'] for u in updates]
|
419
|
+
logger.info(f"Upserting {len(stream_ids)} task_runs records")
|
420
|
+
|
421
|
+
# 对于分区表,我们需要使用不同的UPSERT策略
|
422
|
+
# 先尝试UPDATE,如果没有更新到任何行,则INSERT
|
423
|
+
upsert_query = text("""
|
424
|
+
WITH updated AS (
|
425
|
+
UPDATE task_runs SET
|
426
|
+
consumer_name = COALESCE(CAST(:consumer_name AS TEXT), consumer_name),
|
427
|
+
status = CASE
|
428
|
+
WHEN CAST(:status AS TEXT) IS NULL THEN status
|
429
|
+
WHEN status = 'pending' THEN COALESCE(CAST(:status AS TEXT), status)
|
430
|
+
WHEN status = 'running' AND CAST(:status AS TEXT) IN ('success', 'failed', 'timeout', 'skipped') THEN CAST(:status AS TEXT)
|
431
|
+
WHEN status IN ('success', 'failed', 'timeout', 'skipped') THEN status
|
432
|
+
ELSE COALESCE(CAST(:status AS TEXT), status)
|
433
|
+
END,
|
434
|
+
result = CASE
|
435
|
+
WHEN status IN ('success', 'failed', 'timeout', 'skipped') AND CAST(:status AS TEXT) NOT IN ('success', 'failed', 'timeout', 'skipped') THEN result
|
436
|
+
ELSE COALESCE(CAST(:result AS jsonb), result)
|
437
|
+
END,
|
438
|
+
error_message = CASE
|
439
|
+
WHEN status IN ('success', 'failed', 'timeout', 'skipped') AND CAST(:status AS TEXT) NOT IN ('success', 'failed', 'timeout', 'skipped') THEN error_message
|
440
|
+
ELSE COALESCE(CAST(:error_message AS TEXT), error_message)
|
441
|
+
END,
|
442
|
+
start_time = COALESCE(CAST(:started_at AS TIMESTAMPTZ), start_time),
|
443
|
+
end_time = CASE
|
444
|
+
WHEN status IN ('success', 'failed', 'timeout', 'skipped') AND CAST(:status AS TEXT) NOT IN ('success', 'failed', 'timeout', 'skipped') THEN end_time
|
445
|
+
ELSE COALESCE(CAST(:completed_at AS TIMESTAMPTZ), end_time)
|
446
|
+
END,
|
447
|
+
worker_id = COALESCE(CAST(:worker_id AS TEXT), worker_id),
|
448
|
+
duration = COALESCE(CAST(:duration AS DOUBLE PRECISION), duration),
|
449
|
+
execution_time = COALESCE(CAST(:execution_time AS DOUBLE PRECISION), execution_time),
|
450
|
+
updated_at = CURRENT_TIMESTAMP
|
451
|
+
WHERE stream_id = :stream_id AND consumer_group = :consumer_group
|
452
|
+
RETURNING stream_id
|
453
|
+
)
|
454
|
+
INSERT INTO task_runs (
|
455
|
+
stream_id, task_name, consumer_group, consumer_name, status, result, error_message,
|
456
|
+
start_time, end_time, worker_id, duration, execution_time,
|
457
|
+
created_at, updated_at
|
458
|
+
)
|
459
|
+
SELECT
|
460
|
+
:stream_id, :task_name, :consumer_group, :consumer_name,
|
461
|
+
COALESCE(CAST(:status AS TEXT), 'pending'),
|
462
|
+
CAST(:result AS jsonb),
|
463
|
+
CAST(:error_message AS TEXT),
|
464
|
+
CAST(:started_at AS TIMESTAMPTZ),
|
465
|
+
CAST(:completed_at AS TIMESTAMPTZ),
|
466
|
+
CAST(:worker_id AS TEXT),
|
467
|
+
CAST(:duration AS DOUBLE PRECISION),
|
468
|
+
CAST(:execution_time AS DOUBLE PRECISION),
|
469
|
+
CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
|
470
|
+
WHERE NOT EXISTS (SELECT 1 FROM updated)
|
471
|
+
RETURNING stream_id;
|
472
|
+
""")
|
473
|
+
|
474
|
+
# 为每个更新转换参数名称(从id改为stream_id)
|
475
|
+
run_updates = []
|
476
|
+
for update in updates:
|
477
|
+
run_update = update.copy()
|
478
|
+
run_update['stream_id'] = run_update.pop('id') # 将id改为stream_id
|
479
|
+
# consumer_group 已经在 update_info 中了,不需要额外处理
|
480
|
+
run_updates.append(run_update)
|
481
|
+
|
482
|
+
# 批量执行UPSERT - 使用事务批处理提高性能
|
483
|
+
successful_count = 0
|
484
|
+
batch_size = 20 # 每批处理20条记录
|
485
|
+
|
486
|
+
for i in range(0, len(run_updates), batch_size):
|
487
|
+
batch = run_updates[i:i+batch_size]
|
488
|
+
|
489
|
+
try:
|
490
|
+
# 在一个事务中处理整批
|
491
|
+
for run_update in batch:
|
492
|
+
result = await session.execute(upsert_query, run_update)
|
493
|
+
if result.rowcount > 0:
|
494
|
+
successful_count += 1
|
495
|
+
|
496
|
+
# 批量提交
|
497
|
+
await session.commit()
|
498
|
+
logger.debug(f"Batch {i//batch_size + 1} committed: {len(batch)} records")
|
499
|
+
|
500
|
+
except Exception as e:
|
501
|
+
logger.error(f"Batch {i//batch_size + 1} failed, trying individual records: {e}")
|
502
|
+
await session.rollback()
|
503
|
+
|
504
|
+
# 如果批处理失败,回退到逐个处理这批记录
|
505
|
+
for run_update in batch:
|
506
|
+
try:
|
507
|
+
result = await session.execute(upsert_query, run_update)
|
508
|
+
await session.commit()
|
509
|
+
if result.rowcount > 0:
|
510
|
+
successful_count += 1
|
511
|
+
except Exception as individual_error:
|
512
|
+
logger.error(f"Individual upsert failed for {run_update.get('stream_id')}: {individual_error}")
|
513
|
+
await session.rollback()
|
514
|
+
|
515
|
+
# 记录成功更新的数量
|
516
|
+
if successful_count > 0:
|
517
|
+
logger.info(f"Upserted {successful_count}/{len(run_updates)} task_runs records")
|
518
|
+
|
519
|
+
# 检查哪些任务是完成状态,需要从Redis中删除
|
520
|
+
completed_task_keys = []
|
521
|
+
for update in updates:
|
522
|
+
status = update.get('status')
|
523
|
+
# 如果状态是完成状态(success, error, cancel等)
|
524
|
+
if status in ['success', 'error', 'failed', 'cancel', 'cancelled', 'timeout', 'skipped']:
|
525
|
+
# 构建task_key
|
526
|
+
# task_key格式: namespace:TASK:stream_id:group_name
|
527
|
+
stream_id = update['id']
|
528
|
+
consumer_group = update.get('consumer_group')
|
529
|
+
if consumer_group:
|
530
|
+
# 从consumer_group提取namespace
|
531
|
+
# consumer_group格式: prefix:QUEUE:queue:task_name
|
532
|
+
parts = consumer_group.split(':', 1)
|
533
|
+
namespace = parts[0] if parts else 'default'
|
534
|
+
task_key = f"{namespace}:TASK:{stream_id}:{consumer_group}"
|
535
|
+
completed_task_keys.append(task_key)
|
536
|
+
logger.info(f"Task {stream_id} with status {status} will be deleted from Redis: {task_key}")
|
537
|
+
|
538
|
+
# 从Redis中删除已完成的任务
|
539
|
+
if completed_task_keys:
|
540
|
+
try:
|
541
|
+
pipeline = self.redis_client.pipeline()
|
542
|
+
for task_key in completed_task_keys:
|
543
|
+
pipeline.delete(task_key)
|
544
|
+
deleted_results = await pipeline.execute()
|
545
|
+
deleted_count = sum(1 for r in deleted_results if r > 0)
|
546
|
+
if deleted_count > 0:
|
547
|
+
logger.info(f"Deleted {deleted_count} completed tasks from Redis")
|
548
|
+
except Exception as e:
|
549
|
+
logger.error(f"Error deleting completed tasks from Redis: {e}")
|
550
|
+
|
551
|
+
# UPSERT 操作总是成功的,返回所有stream_id
|
552
|
+
# 不需要复杂的错误处理,因为UPSERT保证了操作的原子性
|
553
|
+
return set(stream_ids)
|
554
|
+
|
555
|
+
except Exception as e:
|
556
|
+
logger.error(f"Error upserting task statuses: {e}")
|
557
|
+
logger.error(traceback.format_exc())
|
558
|
+
return set() # 出错时返回空集
|
559
|
+
|
560
|
+
async def _retry_pending_updates(self):
|
561
|
+
"""定期重试待更新的任务"""
|
562
|
+
while self._running:
|
563
|
+
try:
|
564
|
+
await asyncio.sleep(self._retry_interval) # 等待一段时间
|
565
|
+
|
566
|
+
# 获取待重试的更新
|
567
|
+
async with self._pending_updates_lock:
|
568
|
+
if not self._pending_updates:
|
569
|
+
continue
|
570
|
+
|
571
|
+
# 取出所有待重试的更新
|
572
|
+
pending_items = list(self._pending_updates.items())
|
573
|
+
self._pending_updates.clear()
|
574
|
+
|
575
|
+
if pending_items:
|
576
|
+
# 重新尝试更新
|
577
|
+
updates = [update_info for _, update_info in pending_items]
|
578
|
+
logger.debug(f"Retrying {len(pending_items)} pending task updates")
|
579
|
+
await self._update_tasks(updates)
|
580
|
+
|
581
|
+
except Exception as e:
|
582
|
+
logger.error(f"Error in retry pending updates: {e}")
|
583
|
+
await asyncio.sleep(5)
|
jettask/scheduler/__init__.py
CHANGED
@@ -6,11 +6,11 @@
|
|
6
6
|
from .models import ScheduledTask, TaskExecutionHistory
|
7
7
|
from .scheduler import TaskScheduler
|
8
8
|
from .loader import TaskLoader
|
9
|
-
from .
|
9
|
+
from .task_crud import ScheduledTaskManager
|
10
10
|
|
11
11
|
__all__ = [
|
12
12
|
'ScheduledTask',
|
13
|
-
'TaskExecutionHistory',
|
13
|
+
'TaskExecutionHistory',
|
14
14
|
'TaskScheduler',
|
15
15
|
'TaskLoader',
|
16
16
|
'ScheduledTaskManager'
|
jettask/scheduler/loader.py
CHANGED
@@ -8,7 +8,7 @@ from datetime import datetime, timedelta
|
|
8
8
|
import json
|
9
9
|
|
10
10
|
from ..utils.task_logger import get_task_logger, LogContext
|
11
|
-
from .
|
11
|
+
from .task_crud import ScheduledTaskManager
|
12
12
|
from .models import ScheduledTask
|
13
13
|
|
14
14
|
|
@@ -53,11 +53,12 @@ class TaskLoader:
|
|
53
53
|
self.loaded_tasks: Set[str] = set() # 已加载的任务ID
|
54
54
|
|
55
55
|
async def connect(self):
|
56
|
-
"""建立Redis
|
56
|
+
"""建立Redis连接(使用统一的连接池管理)"""
|
57
57
|
if not self.redis:
|
58
|
-
|
59
|
-
|
60
|
-
|
58
|
+
from jettask.utils.db_connector import get_async_redis_client
|
59
|
+
|
60
|
+
self.redis = get_async_redis_client(
|
61
|
+
redis_url=self.redis_url,
|
61
62
|
decode_responses=False
|
62
63
|
)
|
63
64
|
|
@@ -14,7 +14,7 @@ sys.path.insert(0, str(project_root))
|
|
14
14
|
|
15
15
|
from jettask import Jettask
|
16
16
|
from jettask.scheduler.scheduler import TaskScheduler
|
17
|
-
from jettask.scheduler.
|
17
|
+
from jettask.scheduler.task_crud import ScheduledTaskManager
|
18
18
|
from jettask.utils.task_logger import get_task_logger
|
19
19
|
|
20
20
|
logger = get_task_logger(__name__)
|
jettask/scheduler/scheduler.py
CHANGED
@@ -10,9 +10,8 @@ from typing import Optional, List, TYPE_CHECKING
|
|
10
10
|
from datetime import datetime
|
11
11
|
|
12
12
|
from ..utils.task_logger import get_task_logger, LogContext
|
13
|
-
from .
|
14
|
-
from .models import ScheduledTask, TaskExecutionHistory, TaskType
|
15
|
-
from .models import TaskStatus as ScheduledTaskStatus # 定时任务专用的状态枚举
|
13
|
+
from .task_crud import ScheduledTaskManager
|
14
|
+
from .models import ScheduledTask, TaskExecutionHistory, TaskType, TaskStatus as ScheduledTaskStatus
|
16
15
|
from .loader import TaskLoader
|
17
16
|
|
18
17
|
# 类型注解导入(避免循环导入)
|
@@ -619,11 +618,12 @@ class TaskScheduler:
|
|
619
618
|
|
620
619
|
async def run(self):
|
621
620
|
"""运行调度器主循环"""
|
622
|
-
# 建立Redis
|
621
|
+
# 建立Redis连接(使用统一的连接池管理)
|
623
622
|
if not self.redis:
|
624
|
-
|
625
|
-
|
626
|
-
|
623
|
+
from jettask.utils.db_connector import get_async_redis_client
|
624
|
+
|
625
|
+
self.redis = get_async_redis_client(
|
626
|
+
redis_url=self.redis_url,
|
627
627
|
decode_responses=False
|
628
628
|
)
|
629
629
|
|