jettask 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. jettask/__init__.py +60 -2
  2. jettask/cli.py +314 -228
  3. jettask/config/__init__.py +9 -1
  4. jettask/config/config.py +245 -0
  5. jettask/config/env_loader.py +381 -0
  6. jettask/config/lua_scripts.py +158 -0
  7. jettask/config/nacos_config.py +132 -5
  8. jettask/core/__init__.py +1 -1
  9. jettask/core/app.py +1573 -666
  10. jettask/core/app_importer.py +33 -16
  11. jettask/core/container.py +532 -0
  12. jettask/core/task.py +1 -4
  13. jettask/core/unified_manager_base.py +2 -2
  14. jettask/executor/__init__.py +38 -0
  15. jettask/executor/core.py +625 -0
  16. jettask/executor/executor.py +338 -0
  17. jettask/executor/orchestrator.py +290 -0
  18. jettask/executor/process_entry.py +638 -0
  19. jettask/executor/task_executor.py +317 -0
  20. jettask/messaging/__init__.py +68 -0
  21. jettask/messaging/event_pool.py +2188 -0
  22. jettask/messaging/reader.py +519 -0
  23. jettask/messaging/registry.py +266 -0
  24. jettask/messaging/scanner.py +369 -0
  25. jettask/messaging/sender.py +312 -0
  26. jettask/persistence/__init__.py +118 -0
  27. jettask/persistence/backlog_monitor.py +567 -0
  28. jettask/{backend/data_access.py → persistence/base.py} +58 -57
  29. jettask/persistence/consumer.py +315 -0
  30. jettask/{core → persistence}/db_manager.py +23 -22
  31. jettask/persistence/maintenance.py +81 -0
  32. jettask/persistence/message_consumer.py +259 -0
  33. jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
  34. jettask/persistence/offline_recovery.py +196 -0
  35. jettask/persistence/queue_discovery.py +215 -0
  36. jettask/persistence/task_persistence.py +218 -0
  37. jettask/persistence/task_updater.py +583 -0
  38. jettask/scheduler/__init__.py +2 -2
  39. jettask/scheduler/loader.py +6 -5
  40. jettask/scheduler/run_scheduler.py +1 -1
  41. jettask/scheduler/scheduler.py +7 -7
  42. jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
  43. jettask/task/__init__.py +16 -0
  44. jettask/{router.py → task/router.py} +26 -8
  45. jettask/task/task_center/__init__.py +9 -0
  46. jettask/task/task_executor.py +318 -0
  47. jettask/task/task_registry.py +291 -0
  48. jettask/test_connection_monitor.py +73 -0
  49. jettask/utils/__init__.py +31 -1
  50. jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
  51. jettask/utils/db_connector.py +1629 -0
  52. jettask/{db_init.py → utils/db_init.py} +1 -1
  53. jettask/utils/rate_limit/__init__.py +30 -0
  54. jettask/utils/rate_limit/concurrency_limiter.py +665 -0
  55. jettask/utils/rate_limit/config.py +145 -0
  56. jettask/utils/rate_limit/limiter.py +41 -0
  57. jettask/utils/rate_limit/manager.py +269 -0
  58. jettask/utils/rate_limit/qps_limiter.py +154 -0
  59. jettask/utils/rate_limit/task_limiter.py +384 -0
  60. jettask/utils/serializer.py +3 -0
  61. jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
  62. jettask/utils/time_sync.py +173 -0
  63. jettask/webui/__init__.py +27 -0
  64. jettask/{api/v1 → webui/api}/alerts.py +1 -1
  65. jettask/{api/v1 → webui/api}/analytics.py +2 -2
  66. jettask/{api/v1 → webui/api}/namespaces.py +1 -1
  67. jettask/{api/v1 → webui/api}/overview.py +1 -1
  68. jettask/{api/v1 → webui/api}/queues.py +3 -3
  69. jettask/{api/v1 → webui/api}/scheduled.py +1 -1
  70. jettask/{api/v1 → webui/api}/settings.py +1 -1
  71. jettask/{api.py → webui/app.py} +253 -145
  72. jettask/webui/namespace_manager/__init__.py +10 -0
  73. jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
  74. jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
  75. jettask/{run.py → webui/run.py} +2 -2
  76. jettask/{services → webui/services}/__init__.py +1 -3
  77. jettask/{services → webui/services}/overview_service.py +34 -16
  78. jettask/{services → webui/services}/queue_service.py +1 -1
  79. jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
  80. jettask/{services → webui/services}/settings_service.py +1 -1
  81. jettask/worker/__init__.py +53 -0
  82. jettask/worker/lifecycle.py +1507 -0
  83. jettask/worker/manager.py +583 -0
  84. jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
  85. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
  86. jettask-0.2.20.dist-info/RECORD +145 -0
  87. jettask/__main__.py +0 -140
  88. jettask/api/__init__.py +0 -103
  89. jettask/backend/__init__.py +0 -1
  90. jettask/backend/api/__init__.py +0 -3
  91. jettask/backend/api/v1/__init__.py +0 -17
  92. jettask/backend/api/v1/monitoring.py +0 -431
  93. jettask/backend/api/v1/namespaces.py +0 -504
  94. jettask/backend/api/v1/queues.py +0 -342
  95. jettask/backend/api/v1/tasks.py +0 -367
  96. jettask/backend/core/__init__.py +0 -3
  97. jettask/backend/core/cache.py +0 -221
  98. jettask/backend/core/database.py +0 -200
  99. jettask/backend/core/exceptions.py +0 -102
  100. jettask/backend/dependencies.py +0 -261
  101. jettask/backend/init_meta_db.py +0 -158
  102. jettask/backend/main.py +0 -1426
  103. jettask/backend/main_unified.py +0 -78
  104. jettask/backend/main_v2.py +0 -394
  105. jettask/backend/models/__init__.py +0 -3
  106. jettask/backend/models/requests.py +0 -236
  107. jettask/backend/models/responses.py +0 -230
  108. jettask/backend/namespace_api_old.py +0 -267
  109. jettask/backend/services/__init__.py +0 -3
  110. jettask/backend/start.py +0 -42
  111. jettask/backend/unified_api_router.py +0 -1541
  112. jettask/cleanup_deprecated_tables.sql +0 -16
  113. jettask/core/consumer_manager.py +0 -1695
  114. jettask/core/delay_scanner.py +0 -256
  115. jettask/core/event_pool.py +0 -1700
  116. jettask/core/heartbeat_process.py +0 -222
  117. jettask/core/task_batch.py +0 -153
  118. jettask/core/worker_scanner.py +0 -271
  119. jettask/executors/__init__.py +0 -5
  120. jettask/executors/asyncio.py +0 -876
  121. jettask/executors/base.py +0 -30
  122. jettask/executors/common.py +0 -148
  123. jettask/executors/multi_asyncio.py +0 -309
  124. jettask/gradio_app.py +0 -570
  125. jettask/integrated_gradio_app.py +0 -1088
  126. jettask/main.py +0 -0
  127. jettask/monitoring/__init__.py +0 -3
  128. jettask/pg_consumer.py +0 -1896
  129. jettask/run_monitor.py +0 -22
  130. jettask/run_webui.py +0 -148
  131. jettask/scheduler/multi_namespace_scheduler.py +0 -294
  132. jettask/scheduler/unified_manager.py +0 -450
  133. jettask/task_center_client.py +0 -150
  134. jettask/utils/serializer_optimized.py +0 -33
  135. jettask/webui_exceptions.py +0 -67
  136. jettask-0.2.18.dist-info/RECORD +0 -150
  137. /jettask/{constants.py → config/constants.py} +0 -0
  138. /jettask/{backend/config.py → config/task_center.py} +0 -0
  139. /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
  140. /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
  141. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
  142. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
  143. /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
  144. /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
  145. /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
  146. /jettask/{models.py → persistence/models.py} +0 -0
  147. /jettask/scheduler/{manager.py → task_crud.py} +0 -0
  148. /jettask/{schema.sql → schemas/schema.sql} +0 -0
  149. /jettask/{task_center.py → task/task_center/client.py} +0 -0
  150. /jettask/{monitoring → utils}/file_watcher.py +0 -0
  151. /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
  152. /jettask/{api/v1 → webui/api}/__init__.py +0 -0
  153. /jettask/{webui_config.py → webui/config.py} +0 -0
  154. /jettask/{webui_models → webui/models}/__init__.py +0 -0
  155. /jettask/{webui_models → webui/models}/namespace.py +0 -0
  156. /jettask/{services → webui/services}/alert_service.py +0 -0
  157. /jettask/{services → webui/services}/analytics_service.py +0 -0
  158. /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
  159. /jettask/{services → webui/services}/task_service.py +0 -0
  160. /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
  161. /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
  162. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
  163. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
  164. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
  165. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,583 @@
1
+ """任务状态更新模块
2
+
3
+ 负责从TASK_CHANGES流中消费任务变更事件,并更新数据库中的任务状态。
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import logging
9
+ import traceback
10
+ from typing import Dict, List, Optional, Any, Set
11
+ from datetime import datetime, timezone
12
+
13
+ import redis.asyncio as redis
14
+ from redis.asyncio import Redis
15
+ from sqlalchemy import text
16
+ from sqlalchemy.ext.asyncio import AsyncSession
17
+ from sqlalchemy.orm import sessionmaker
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class TaskUpdater:
23
+ """任务状态更新器
24
+
25
+ 职责:
26
+ - 消费TASK_CHANGES流中的任务变更事件
27
+ - 解析任务状态更新信息
28
+ - 批量更新数据库中的任务状态
29
+ - 支持pending消息恢复
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ redis_client: Redis,
35
+ async_session_local: sessionmaker,
36
+ redis_prefix: str,
37
+ consumer_id: str
38
+ ):
39
+ """初始化任务状态更新器
40
+
41
+ Args:
42
+ redis_client: Redis异步客户端
43
+ async_session_local: SQLAlchemy会话工厂
44
+ redis_prefix: Redis键前缀
45
+ consumer_id: 消费者ID
46
+ """
47
+ self.redis_client = redis_client
48
+ self.AsyncSessionLocal = async_session_local
49
+ self.redis_prefix = redis_prefix
50
+ self.consumer_id = consumer_id
51
+
52
+ # Stream配置
53
+ self.change_stream_key = f"{redis_prefix}:TASK_CHANGES"
54
+ self.consumer_group = f"{redis_prefix}_changes_consumer"
55
+
56
+ # 待重试的任务更新
57
+ self._pending_updates = {}
58
+ self._pending_updates_lock = asyncio.Lock()
59
+ self._max_pending_updates = 10000
60
+ self._retry_interval = 5 # 每5秒重试一次
61
+
62
+ self._running = False
63
+ self._consume_task = None
64
+ self._retry_task = None
65
+
66
+ async def start(self):
67
+ """启动更新器"""
68
+ # 创建消费者组
69
+ try:
70
+ await self.redis_client.xgroup_create(
71
+ self.change_stream_key, self.consumer_group, id='0', mkstream=True
72
+ )
73
+ logger.debug(f"Created consumer group for task changes stream")
74
+ except redis.ResponseError:
75
+ pass
76
+
77
+ self._running = True
78
+ self._consume_task = asyncio.create_task(self._consume_task_changes())
79
+ self._retry_task = asyncio.create_task(self._retry_pending_updates())
80
+ logger.debug("TaskUpdater started")
81
+
82
+ async def stop(self):
83
+ """停止更新器"""
84
+ self._running = False
85
+
86
+ if self._consume_task:
87
+ self._consume_task.cancel()
88
+ try:
89
+ await self._consume_task
90
+ except asyncio.CancelledError:
91
+ pass
92
+
93
+ if self._retry_task:
94
+ self._retry_task.cancel()
95
+ try:
96
+ await self._retry_task
97
+ except asyncio.CancelledError:
98
+ pass
99
+
100
+ logger.debug("TaskUpdater stopped")
101
+
102
+ async def _consume_task_changes(self):
103
+ """消费任务变更事件流 - 基于事件驱动的更新(支持pending消息恢复)"""
104
+ # 模仿 listen_event_by_task 的写法:先处理pending消息,再处理新消息
105
+ check_backlog = True
106
+ lastid = "0-0"
107
+ batch_size = 1000
108
+
109
+ while self._running:
110
+ try:
111
+ # 决定读取位置:如果有backlog,从lastid开始;否则读取新消息
112
+ if check_backlog:
113
+ myid = lastid
114
+ else:
115
+ myid = ">"
116
+
117
+ messages = await self.redis_client.xreadgroup(
118
+ self.consumer_group,
119
+ self.consumer_id,
120
+ {self.change_stream_key: myid},
121
+ count=batch_size,
122
+ block=1000 if not check_backlog else 0 # backlog时不阻塞
123
+ )
124
+
125
+ if not messages:
126
+ check_backlog = False
127
+ continue
128
+
129
+ # 检查是否还有更多backlog消息
130
+ if messages and len(messages[0][1]) > 0:
131
+ check_backlog = len(messages[0][1]) >= batch_size
132
+ else:
133
+ check_backlog = False
134
+
135
+ # 收集消息ID和对应的task_id
136
+ msg_to_task = {} # msg_id -> (stream_id, task_key) 映射
137
+
138
+ for _, stream_messages in messages:
139
+ for msg_id, data in stream_messages:
140
+ try:
141
+ # 更新lastid(无论消息是否处理成功)
142
+ if isinstance(msg_id, bytes):
143
+ lastid = msg_id.decode('utf-8')
144
+ else:
145
+ lastid = str(msg_id)
146
+
147
+ task_key = data[b'id']
148
+ task_key = task_key.decode('utf-8') if isinstance(task_key, bytes) else str(task_key)
149
+
150
+ # 从完整的task_key格式提取stream_id
151
+ # 格式: namespace:TASK:stream_id:queue_name
152
+ stream_id = None
153
+ if ':TASK:' in task_key:
154
+ parts = task_key.split(':TASK:')
155
+ if len(parts) == 2:
156
+ # 再从右边部分提取stream_id
157
+ right_parts = parts[1].split(':')
158
+ if right_parts:
159
+ stream_id = right_parts[0] # 提取stream_id
160
+
161
+ if stream_id:
162
+ # 存储元组: (stream_id, task_key)
163
+ msg_to_task[msg_id] = (stream_id, task_key)
164
+ else:
165
+ logger.warning(f"Cannot extract stream_id from task_key: {task_key}")
166
+ except Exception as e:
167
+ logger.error(f"Error processing change event {msg_id}: {e} {data=}")
168
+ logger.error(traceback.format_exc())
169
+ # 解析失败的消息也应该ACK,避免一直重试
170
+ await self.redis_client.xack(self.change_stream_key, self.consumer_group, msg_id)
171
+
172
+ if msg_to_task:
173
+ # 批量更新任务,返回成功更新的task_id列表
174
+ # msg_to_task 的值现在是元组 (stream_id, task_key)
175
+ id_tuples = list(set(msg_to_task.values()))
176
+ logger.info(f"Processing {len(id_tuples)} task updates from change stream")
177
+ successful_tuples = await self._update_tasks_by_event(id_tuples)
178
+
179
+ # 只ACK成功更新的消息
180
+ ack_ids = []
181
+ failed_count = 0
182
+ for msg_id, id_tuple in msg_to_task.items():
183
+ if successful_tuples and id_tuple in successful_tuples:
184
+ ack_ids.append(msg_id)
185
+ else:
186
+ failed_count += 1
187
+
188
+ if ack_ids:
189
+ await self.redis_client.xack(self.change_stream_key, self.consumer_group, *ack_ids)
190
+ if len(ack_ids) > 0:
191
+ logger.info(f"Updated {len(ack_ids)} task statuses")
192
+
193
+ if failed_count > 0:
194
+ logger.debug(f"Failed to update {failed_count} tasks, will retry")
195
+
196
+ except redis.ResponseError as e:
197
+ if "NOGROUP" in str(e):
198
+ # 如果消费者组不存在,重新创建
199
+ try:
200
+ await self.redis_client.xgroup_create(
201
+ self.change_stream_key, self.consumer_group, id='0', mkstream=True
202
+ )
203
+ logger.debug(f"Recreated consumer group for task changes stream")
204
+ check_backlog = True
205
+ lastid = "0-0"
206
+ except:
207
+ pass
208
+ else:
209
+ logger.error(f"Redis error in consume_task_changes: {e}")
210
+ logger.error(traceback.format_exc())
211
+ await asyncio.sleep(1)
212
+ except Exception as e:
213
+ logger.error(f"Error in consume_task_changes: {e}", exc_info=True)
214
+ await asyncio.sleep(1)
215
+
216
+ async def _update_tasks_by_event(self, id_tuples: List[tuple]) -> Set[tuple]:
217
+ """基于事件ID批量更新任务状态
218
+
219
+ Args:
220
+ id_tuples: 元组列表,每个元组为 (stream_id, task_key)
221
+
222
+ Returns:
223
+ 成功更新的元组集合
224
+ """
225
+ if not id_tuples:
226
+ return set()
227
+
228
+ successful_tuples = set()
229
+
230
+ try:
231
+ pipeline = self.redis_client.pipeline()
232
+ for stream_id, task_key in id_tuples:
233
+ pipeline.hgetall(task_key)
234
+
235
+ redis_values = await pipeline.execute()
236
+ updates = []
237
+ valid_tuples = [] # 记录有效的元组
238
+
239
+ if len(id_tuples) != len(redis_values):
240
+ logger.error(f'Mismatch: {len(id_tuples)=} {len(redis_values)=}')
241
+ # 不抛出异常,继续处理能处理的
242
+
243
+ for i, (stream_id, task_key) in enumerate(id_tuples):
244
+ if i >= len(redis_values):
245
+ logger.error(f'Missing redis value for task_key={task_key}')
246
+ continue
247
+
248
+ hash_data = redis_values[i]
249
+
250
+ if not hash_data:
251
+ logger.debug(f'No hash data for task_key={task_key}')
252
+ continue
253
+
254
+ try:
255
+ # 从task_key解析出consumer_group
256
+ # task_key格式: namespace:TASK:stream_id:group_name
257
+ # 其中group_name就是完整的consumer_group(格式: jettask:QUEUE:queue_name:task_name)
258
+ parts = task_key.split(':', 3) # 最多分割成4部分
259
+ if len(parts) == 4:
260
+ # parts[0] = namespace (如 'default')
261
+ # parts[1] = 'TASK'
262
+ # parts[2] = stream_id
263
+ # parts[3] = group_name (consumer_group)
264
+ consumer_group = parts[3] # 直接使用group_name作为consumer_group
265
+ logger.debug(f"Extracted consumer_group from task_key: {consumer_group}")
266
+ else:
267
+ logger.warning(f"Cannot parse consumer_group from task_key: {task_key}")
268
+ continue
269
+
270
+ # 从consumer_group中提取task_name
271
+ # consumer_group格式: prefix:QUEUE:queue:task_name (如 jettask:QUEUE:robust_bench2:robust_benchmark.benchmark_task)
272
+ task_name = None
273
+ if consumer_group:
274
+ parts = consumer_group.split(':')
275
+ if len(parts) >= 4:
276
+ # 最后一部分是task_name
277
+ task_name = parts[-1]
278
+ logger.debug(f"Extracted task_name '{task_name}' from consumer_group '{consumer_group}'")
279
+
280
+ # 使用stream_id作为任务ID
281
+ update_info = self._parse_task_hash(stream_id, hash_data)
282
+ if update_info:
283
+ # 添加consumer_group和task_name到更新信息中
284
+ update_info['consumer_group'] = consumer_group
285
+ update_info['task_name'] = task_name or 'unknown' # 如果无法提取task_name,使用'unknown'
286
+ # consumer_name就是worker_id(执行任务的实际worker)
287
+ update_info['consumer_name'] = update_info.get('worker_id')
288
+ updates.append(update_info)
289
+ valid_tuples.append((stream_id, task_key))
290
+ else:
291
+ logger.debug(f'Failed to parse stream_id={stream_id} hash_data={hash_data}')
292
+ except Exception as e:
293
+ logger.error(f'Error parsing task stream_id={stream_id}: {e}')
294
+ continue
295
+
296
+ if updates:
297
+ logger.info(f"Attempting to update {len(updates)} tasks, first few: {[u['id'] for u in updates[:3]]}")
298
+ try:
299
+ # _update_tasks 现在返回成功更新的ID集合
300
+ batch_successful = await self._update_tasks(updates)
301
+ # 将成功的stream_id映射回元组
302
+ for stream_id in batch_successful:
303
+ for tuple_item in valid_tuples:
304
+ if tuple_item[0] == stream_id: # stream_id匹配
305
+ successful_tuples.add(tuple_item)
306
+ if batch_successful:
307
+ logger.info(f"Successfully updated {len(batch_successful)} tasks from change events")
308
+ else:
309
+ logger.warning(f"No tasks were successfully updated")
310
+ except Exception as e:
311
+ logger.error(f"Error in batch update: {e}")
312
+ # 批量更新失败,尝试逐个更新
313
+ for update, tuple_item in zip(updates, valid_tuples):
314
+ try:
315
+ single_successful = await self._update_tasks([update])
316
+ if update['id'] in single_successful:
317
+ successful_tuples.add(tuple_item)
318
+ except Exception as single_error:
319
+ logger.error(f"Failed to update task {tuple_item[0]}: {single_error}")
320
+
321
+ except Exception as e:
322
+ logger.error(f"Error updating tasks by event: {e}", exc_info=True)
323
+
324
+ logger.debug(f'{successful_tuples=}')
325
+ return successful_tuples
326
+
327
+ def _parse_task_hash(self, task_id: str, hash_data: dict) -> Optional[dict]:
328
+ """解析Redis Hash数据"""
329
+ update_info = {
330
+ 'id': task_id,
331
+ 'status': None,
332
+ 'result': None,
333
+ 'error_message': None,
334
+ 'started_at': None,
335
+ 'completed_at': None,
336
+ 'worker_id': None,
337
+ 'execution_time': None,
338
+ 'duration': None
339
+ }
340
+
341
+ try:
342
+ from jettask.utils.serializer import loads_str
343
+
344
+ hash_dict = {}
345
+ for k, v in hash_data.items():
346
+ key = k.decode('utf-8') if isinstance(k, bytes) else k
347
+ if isinstance(v, bytes):
348
+ try:
349
+ value = loads_str(v)
350
+ if isinstance(value, (dict, list)):
351
+ value = json.dumps(value, ensure_ascii=False)
352
+ else:
353
+ value = str(value)
354
+ except:
355
+ try:
356
+ value = v.decode('utf-8')
357
+ except:
358
+ value = str(v)
359
+ else:
360
+ value = v
361
+ hash_dict[key] = value
362
+
363
+ update_info['status'] = hash_dict.get('status')
364
+ update_info['error_message'] = hash_dict.get('error_msg') or hash_dict.get('exception')
365
+
366
+ # 转换时间戳
367
+ for time_field in ['started_at', 'completed_at']:
368
+ if hash_dict.get(time_field):
369
+ try:
370
+ time_str = hash_dict[time_field]
371
+ if isinstance(time_str, str) and time_str.startswith("b'") and time_str.endswith("'"):
372
+ time_str = time_str[2:-1]
373
+ update_info[time_field] = datetime.fromtimestamp(float(time_str), tz=timezone.utc)
374
+ except:
375
+ pass
376
+
377
+ update_info['worker_id'] = hash_dict.get('consumer') or hash_dict.get('worker_id')
378
+
379
+ # 转换数值 - 直接存储原始秒数值
380
+ for num_field in ['execution_time', 'duration']:
381
+ if hash_dict.get(num_field):
382
+ try:
383
+ num_str = hash_dict[num_field]
384
+ # 直接存储浮点数秒值
385
+ update_info[num_field] = float(num_str)
386
+ except:
387
+ pass
388
+
389
+ # 处理result
390
+ if 'result' in hash_dict:
391
+ result_str = hash_dict['result']
392
+ if result_str == 'null':
393
+ update_info['result'] = None
394
+ else:
395
+ update_info['result'] = result_str
396
+
397
+ # 只返回有数据的更新
398
+ if any(v is not None for k, v in update_info.items() if k != 'id'):
399
+ return update_info
400
+
401
+ except Exception as e:
402
+ logger.error(f"Failed to parse hash data for task {task_id}: {e}")
403
+
404
+ return None
405
+
406
+ async def _update_tasks(self, updates: List[Dict[str, Any]]) -> Set[str]:
407
+ """批量更新任务状态(使用UPSERT逻辑处理task_runs表)
408
+
409
+ Returns:
410
+ 成功更新的stream_id集合
411
+ """
412
+ if not updates:
413
+ return set()
414
+
415
+ try:
416
+ async with self.AsyncSessionLocal() as session:
417
+ # V3结构:使用UPSERT逻辑处理task_runs表
418
+ stream_ids = [u['id'] for u in updates]
419
+ logger.info(f"Upserting {len(stream_ids)} task_runs records")
420
+
421
+ # 对于分区表,我们需要使用不同的UPSERT策略
422
+ # 先尝试UPDATE,如果没有更新到任何行,则INSERT
423
+ upsert_query = text("""
424
+ WITH updated AS (
425
+ UPDATE task_runs SET
426
+ consumer_name = COALESCE(CAST(:consumer_name AS TEXT), consumer_name),
427
+ status = CASE
428
+ WHEN CAST(:status AS TEXT) IS NULL THEN status
429
+ WHEN status = 'pending' THEN COALESCE(CAST(:status AS TEXT), status)
430
+ WHEN status = 'running' AND CAST(:status AS TEXT) IN ('success', 'failed', 'timeout', 'skipped') THEN CAST(:status AS TEXT)
431
+ WHEN status IN ('success', 'failed', 'timeout', 'skipped') THEN status
432
+ ELSE COALESCE(CAST(:status AS TEXT), status)
433
+ END,
434
+ result = CASE
435
+ WHEN status IN ('success', 'failed', 'timeout', 'skipped') AND CAST(:status AS TEXT) NOT IN ('success', 'failed', 'timeout', 'skipped') THEN result
436
+ ELSE COALESCE(CAST(:result AS jsonb), result)
437
+ END,
438
+ error_message = CASE
439
+ WHEN status IN ('success', 'failed', 'timeout', 'skipped') AND CAST(:status AS TEXT) NOT IN ('success', 'failed', 'timeout', 'skipped') THEN error_message
440
+ ELSE COALESCE(CAST(:error_message AS TEXT), error_message)
441
+ END,
442
+ start_time = COALESCE(CAST(:started_at AS TIMESTAMPTZ), start_time),
443
+ end_time = CASE
444
+ WHEN status IN ('success', 'failed', 'timeout', 'skipped') AND CAST(:status AS TEXT) NOT IN ('success', 'failed', 'timeout', 'skipped') THEN end_time
445
+ ELSE COALESCE(CAST(:completed_at AS TIMESTAMPTZ), end_time)
446
+ END,
447
+ worker_id = COALESCE(CAST(:worker_id AS TEXT), worker_id),
448
+ duration = COALESCE(CAST(:duration AS DOUBLE PRECISION), duration),
449
+ execution_time = COALESCE(CAST(:execution_time AS DOUBLE PRECISION), execution_time),
450
+ updated_at = CURRENT_TIMESTAMP
451
+ WHERE stream_id = :stream_id AND consumer_group = :consumer_group
452
+ RETURNING stream_id
453
+ )
454
+ INSERT INTO task_runs (
455
+ stream_id, task_name, consumer_group, consumer_name, status, result, error_message,
456
+ start_time, end_time, worker_id, duration, execution_time,
457
+ created_at, updated_at
458
+ )
459
+ SELECT
460
+ :stream_id, :task_name, :consumer_group, :consumer_name,
461
+ COALESCE(CAST(:status AS TEXT), 'pending'),
462
+ CAST(:result AS jsonb),
463
+ CAST(:error_message AS TEXT),
464
+ CAST(:started_at AS TIMESTAMPTZ),
465
+ CAST(:completed_at AS TIMESTAMPTZ),
466
+ CAST(:worker_id AS TEXT),
467
+ CAST(:duration AS DOUBLE PRECISION),
468
+ CAST(:execution_time AS DOUBLE PRECISION),
469
+ CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
470
+ WHERE NOT EXISTS (SELECT 1 FROM updated)
471
+ RETURNING stream_id;
472
+ """)
473
+
474
+ # 为每个更新转换参数名称(从id改为stream_id)
475
+ run_updates = []
476
+ for update in updates:
477
+ run_update = update.copy()
478
+ run_update['stream_id'] = run_update.pop('id') # 将id改为stream_id
479
+ # consumer_group 已经在 update_info 中了,不需要额外处理
480
+ run_updates.append(run_update)
481
+
482
+ # 批量执行UPSERT - 使用事务批处理提高性能
483
+ successful_count = 0
484
+ batch_size = 20 # 每批处理20条记录
485
+
486
+ for i in range(0, len(run_updates), batch_size):
487
+ batch = run_updates[i:i+batch_size]
488
+
489
+ try:
490
+ # 在一个事务中处理整批
491
+ for run_update in batch:
492
+ result = await session.execute(upsert_query, run_update)
493
+ if result.rowcount > 0:
494
+ successful_count += 1
495
+
496
+ # 批量提交
497
+ await session.commit()
498
+ logger.debug(f"Batch {i//batch_size + 1} committed: {len(batch)} records")
499
+
500
+ except Exception as e:
501
+ logger.error(f"Batch {i//batch_size + 1} failed, trying individual records: {e}")
502
+ await session.rollback()
503
+
504
+ # 如果批处理失败,回退到逐个处理这批记录
505
+ for run_update in batch:
506
+ try:
507
+ result = await session.execute(upsert_query, run_update)
508
+ await session.commit()
509
+ if result.rowcount > 0:
510
+ successful_count += 1
511
+ except Exception as individual_error:
512
+ logger.error(f"Individual upsert failed for {run_update.get('stream_id')}: {individual_error}")
513
+ await session.rollback()
514
+
515
+ # 记录成功更新的数量
516
+ if successful_count > 0:
517
+ logger.info(f"Upserted {successful_count}/{len(run_updates)} task_runs records")
518
+
519
+ # 检查哪些任务是完成状态,需要从Redis中删除
520
+ completed_task_keys = []
521
+ for update in updates:
522
+ status = update.get('status')
523
+ # 如果状态是完成状态(success, error, cancel等)
524
+ if status in ['success', 'error', 'failed', 'cancel', 'cancelled', 'timeout', 'skipped']:
525
+ # 构建task_key
526
+ # task_key格式: namespace:TASK:stream_id:group_name
527
+ stream_id = update['id']
528
+ consumer_group = update.get('consumer_group')
529
+ if consumer_group:
530
+ # 从consumer_group提取namespace
531
+ # consumer_group格式: prefix:QUEUE:queue:task_name
532
+ parts = consumer_group.split(':', 1)
533
+ namespace = parts[0] if parts else 'default'
534
+ task_key = f"{namespace}:TASK:{stream_id}:{consumer_group}"
535
+ completed_task_keys.append(task_key)
536
+ logger.info(f"Task {stream_id} with status {status} will be deleted from Redis: {task_key}")
537
+
538
+ # 从Redis中删除已完成的任务
539
+ if completed_task_keys:
540
+ try:
541
+ pipeline = self.redis_client.pipeline()
542
+ for task_key in completed_task_keys:
543
+ pipeline.delete(task_key)
544
+ deleted_results = await pipeline.execute()
545
+ deleted_count = sum(1 for r in deleted_results if r > 0)
546
+ if deleted_count > 0:
547
+ logger.info(f"Deleted {deleted_count} completed tasks from Redis")
548
+ except Exception as e:
549
+ logger.error(f"Error deleting completed tasks from Redis: {e}")
550
+
551
+ # UPSERT 操作总是成功的,返回所有stream_id
552
+ # 不需要复杂的错误处理,因为UPSERT保证了操作的原子性
553
+ return set(stream_ids)
554
+
555
+ except Exception as e:
556
+ logger.error(f"Error upserting task statuses: {e}")
557
+ logger.error(traceback.format_exc())
558
+ return set() # 出错时返回空集
559
+
560
+ async def _retry_pending_updates(self):
561
+ """定期重试待更新的任务"""
562
+ while self._running:
563
+ try:
564
+ await asyncio.sleep(self._retry_interval) # 等待一段时间
565
+
566
+ # 获取待重试的更新
567
+ async with self._pending_updates_lock:
568
+ if not self._pending_updates:
569
+ continue
570
+
571
+ # 取出所有待重试的更新
572
+ pending_items = list(self._pending_updates.items())
573
+ self._pending_updates.clear()
574
+
575
+ if pending_items:
576
+ # 重新尝试更新
577
+ updates = [update_info for _, update_info in pending_items]
578
+ logger.debug(f"Retrying {len(pending_items)} pending task updates")
579
+ await self._update_tasks(updates)
580
+
581
+ except Exception as e:
582
+ logger.error(f"Error in retry pending updates: {e}")
583
+ await asyncio.sleep(5)
@@ -6,11 +6,11 @@
6
6
  from .models import ScheduledTask, TaskExecutionHistory
7
7
  from .scheduler import TaskScheduler
8
8
  from .loader import TaskLoader
9
- from .manager import ScheduledTaskManager
9
+ from .task_crud import ScheduledTaskManager
10
10
 
11
11
  __all__ = [
12
12
  'ScheduledTask',
13
- 'TaskExecutionHistory',
13
+ 'TaskExecutionHistory',
14
14
  'TaskScheduler',
15
15
  'TaskLoader',
16
16
  'ScheduledTaskManager'
@@ -8,7 +8,7 @@ from datetime import datetime, timedelta
8
8
  import json
9
9
 
10
10
  from ..utils.task_logger import get_task_logger, LogContext
11
- from .manager import ScheduledTaskManager
11
+ from .task_crud import ScheduledTaskManager
12
12
  from .models import ScheduledTask
13
13
 
14
14
 
@@ -53,11 +53,12 @@ class TaskLoader:
53
53
  self.loaded_tasks: Set[str] = set() # 已加载的任务ID
54
54
 
55
55
  async def connect(self):
56
- """建立Redis连接"""
56
+ """建立Redis连接(使用统一的连接池管理)"""
57
57
  if not self.redis:
58
- self.redis = await aioredis.from_url(
59
- self.redis_url,
60
- encoding="utf-8",
58
+ from jettask.utils.db_connector import get_async_redis_client
59
+
60
+ self.redis = get_async_redis_client(
61
+ redis_url=self.redis_url,
61
62
  decode_responses=False
62
63
  )
63
64
 
@@ -14,7 +14,7 @@ sys.path.insert(0, str(project_root))
14
14
 
15
15
  from jettask import Jettask
16
16
  from jettask.scheduler.scheduler import TaskScheduler
17
- from jettask.scheduler.manager import ScheduledTaskManager
17
+ from jettask.scheduler.task_crud import ScheduledTaskManager
18
18
  from jettask.utils.task_logger import get_task_logger
19
19
 
20
20
  logger = get_task_logger(__name__)
@@ -10,9 +10,8 @@ from typing import Optional, List, TYPE_CHECKING
10
10
  from datetime import datetime
11
11
 
12
12
  from ..utils.task_logger import get_task_logger, LogContext
13
- from .manager import ScheduledTaskManager
14
- from .models import ScheduledTask, TaskExecutionHistory, TaskType
15
- from .models import TaskStatus as ScheduledTaskStatus # 定时任务专用的状态枚举
13
+ from .task_crud import ScheduledTaskManager
14
+ from .models import ScheduledTask, TaskExecutionHistory, TaskType, TaskStatus as ScheduledTaskStatus
16
15
  from .loader import TaskLoader
17
16
 
18
17
  # 类型注解导入(避免循环导入)
@@ -619,11 +618,12 @@ class TaskScheduler:
619
618
 
620
619
  async def run(self):
621
620
  """运行调度器主循环"""
622
- # 建立Redis连接
621
+ # 建立Redis连接(使用统一的连接池管理)
623
622
  if not self.redis:
624
- self.redis = await aioredis.from_url(
625
- self.redis_url,
626
- encoding="utf-8",
623
+ from jettask.utils.db_connector import get_async_redis_client
624
+
625
+ self.redis = get_async_redis_client(
626
+ redis_url=self.redis_url,
627
627
  decode_responses=False
628
628
  )
629
629