jettask 0.2.20__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. jettask/__init__.py +4 -0
  2. jettask/cli.py +12 -8
  3. jettask/config/lua_scripts.py +37 -0
  4. jettask/config/nacos_config.py +1 -1
  5. jettask/core/app.py +313 -340
  6. jettask/core/container.py +4 -4
  7. jettask/{persistence → core}/namespace.py +93 -27
  8. jettask/core/task.py +16 -9
  9. jettask/core/unified_manager_base.py +136 -26
  10. jettask/db/__init__.py +67 -0
  11. jettask/db/base.py +137 -0
  12. jettask/{utils/db_connector.py → db/connector.py} +130 -26
  13. jettask/db/models/__init__.py +16 -0
  14. jettask/db/models/scheduled_task.py +196 -0
  15. jettask/db/models/task.py +77 -0
  16. jettask/db/models/task_run.py +85 -0
  17. jettask/executor/__init__.py +0 -15
  18. jettask/executor/core.py +76 -31
  19. jettask/executor/process_entry.py +29 -114
  20. jettask/executor/task_executor.py +4 -0
  21. jettask/messaging/event_pool.py +928 -685
  22. jettask/messaging/scanner.py +30 -0
  23. jettask/persistence/__init__.py +28 -103
  24. jettask/persistence/buffer.py +170 -0
  25. jettask/persistence/consumer.py +330 -249
  26. jettask/persistence/manager.py +304 -0
  27. jettask/persistence/persistence.py +391 -0
  28. jettask/scheduler/__init__.py +15 -3
  29. jettask/scheduler/{task_crud.py → database.py} +61 -57
  30. jettask/scheduler/loader.py +2 -2
  31. jettask/scheduler/{scheduler_coordinator.py → manager.py} +23 -6
  32. jettask/scheduler/models.py +14 -10
  33. jettask/scheduler/schedule.py +166 -0
  34. jettask/scheduler/scheduler.py +12 -11
  35. jettask/schemas/__init__.py +50 -1
  36. jettask/schemas/backlog.py +43 -6
  37. jettask/schemas/namespace.py +70 -19
  38. jettask/schemas/queue.py +19 -3
  39. jettask/schemas/responses.py +493 -0
  40. jettask/task/__init__.py +0 -2
  41. jettask/task/router.py +3 -0
  42. jettask/test_connection_monitor.py +1 -1
  43. jettask/utils/__init__.py +7 -5
  44. jettask/utils/db_init.py +8 -4
  45. jettask/utils/namespace_dep.py +167 -0
  46. jettask/utils/queue_matcher.py +186 -0
  47. jettask/utils/rate_limit/concurrency_limiter.py +7 -1
  48. jettask/utils/stream_backlog.py +1 -1
  49. jettask/webui/__init__.py +0 -1
  50. jettask/webui/api/__init__.py +4 -4
  51. jettask/webui/api/alerts.py +806 -71
  52. jettask/webui/api/example_refactored.py +400 -0
  53. jettask/webui/api/namespaces.py +390 -45
  54. jettask/webui/api/overview.py +300 -54
  55. jettask/webui/api/queues.py +971 -267
  56. jettask/webui/api/scheduled.py +1249 -56
  57. jettask/webui/api/settings.py +129 -7
  58. jettask/webui/api/workers.py +442 -0
  59. jettask/webui/app.py +46 -2329
  60. jettask/webui/middleware/__init__.py +6 -0
  61. jettask/webui/middleware/namespace_middleware.py +135 -0
  62. jettask/webui/services/__init__.py +146 -0
  63. jettask/webui/services/heartbeat_service.py +251 -0
  64. jettask/webui/services/overview_service.py +60 -51
  65. jettask/webui/services/queue_monitor_service.py +426 -0
  66. jettask/webui/services/redis_monitor_service.py +87 -0
  67. jettask/webui/services/settings_service.py +174 -111
  68. jettask/webui/services/task_monitor_service.py +222 -0
  69. jettask/webui/services/timeline_pg_service.py +452 -0
  70. jettask/webui/services/timeline_service.py +189 -0
  71. jettask/webui/services/worker_monitor_service.py +467 -0
  72. jettask/webui/utils/__init__.py +11 -0
  73. jettask/webui/utils/time_utils.py +122 -0
  74. jettask/worker/lifecycle.py +8 -2
  75. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/METADATA +1 -1
  76. jettask-0.2.24.dist-info/RECORD +142 -0
  77. jettask/executor/executor.py +0 -338
  78. jettask/persistence/backlog_monitor.py +0 -567
  79. jettask/persistence/base.py +0 -2334
  80. jettask/persistence/db_manager.py +0 -516
  81. jettask/persistence/maintenance.py +0 -81
  82. jettask/persistence/message_consumer.py +0 -259
  83. jettask/persistence/models.py +0 -49
  84. jettask/persistence/offline_recovery.py +0 -196
  85. jettask/persistence/queue_discovery.py +0 -215
  86. jettask/persistence/task_persistence.py +0 -218
  87. jettask/persistence/task_updater.py +0 -583
  88. jettask/scheduler/add_execution_count.sql +0 -11
  89. jettask/scheduler/add_priority_field.sql +0 -26
  90. jettask/scheduler/add_scheduler_id.sql +0 -25
  91. jettask/scheduler/add_scheduler_id_index.sql +0 -10
  92. jettask/scheduler/make_scheduler_id_required.sql +0 -28
  93. jettask/scheduler/migrate_interval_seconds.sql +0 -9
  94. jettask/scheduler/performance_optimization.sql +0 -45
  95. jettask/scheduler/run_scheduler.py +0 -186
  96. jettask/scheduler/schema.sql +0 -84
  97. jettask/task/task_executor.py +0 -318
  98. jettask/webui/api/analytics.py +0 -323
  99. jettask/webui/config.py +0 -90
  100. jettask/webui/models/__init__.py +0 -3
  101. jettask/webui/models/namespace.py +0 -63
  102. jettask/webui/namespace_manager/__init__.py +0 -10
  103. jettask/webui/namespace_manager/multi.py +0 -593
  104. jettask/webui/namespace_manager/unified.py +0 -193
  105. jettask/webui/run.py +0 -46
  106. jettask-0.2.20.dist-info/RECORD +0 -145
  107. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/WHEEL +0 -0
  108. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/entry_points.txt +0 -0
  109. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/licenses/LICENSE +0 -0
  110. {jettask-0.2.20.dist-info → jettask-0.2.24.dist-info}/top_level.txt +0 -0
@@ -1,583 +0,0 @@
1
- """任务状态更新模块
2
-
3
- 负责从TASK_CHANGES流中消费任务变更事件,并更新数据库中的任务状态。
4
- """
5
-
6
- import asyncio
7
- import json
8
- import logging
9
- import traceback
10
- from typing import Dict, List, Optional, Any, Set
11
- from datetime import datetime, timezone
12
-
13
- import redis.asyncio as redis
14
- from redis.asyncio import Redis
15
- from sqlalchemy import text
16
- from sqlalchemy.ext.asyncio import AsyncSession
17
- from sqlalchemy.orm import sessionmaker
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- class TaskUpdater:
23
- """任务状态更新器
24
-
25
- 职责:
26
- - 消费TASK_CHANGES流中的任务变更事件
27
- - 解析任务状态更新信息
28
- - 批量更新数据库中的任务状态
29
- - 支持pending消息恢复
30
- """
31
-
32
- def __init__(
33
- self,
34
- redis_client: Redis,
35
- async_session_local: sessionmaker,
36
- redis_prefix: str,
37
- consumer_id: str
38
- ):
39
- """初始化任务状态更新器
40
-
41
- Args:
42
- redis_client: Redis异步客户端
43
- async_session_local: SQLAlchemy会话工厂
44
- redis_prefix: Redis键前缀
45
- consumer_id: 消费者ID
46
- """
47
- self.redis_client = redis_client
48
- self.AsyncSessionLocal = async_session_local
49
- self.redis_prefix = redis_prefix
50
- self.consumer_id = consumer_id
51
-
52
- # Stream配置
53
- self.change_stream_key = f"{redis_prefix}:TASK_CHANGES"
54
- self.consumer_group = f"{redis_prefix}_changes_consumer"
55
-
56
- # 待重试的任务更新
57
- self._pending_updates = {}
58
- self._pending_updates_lock = asyncio.Lock()
59
- self._max_pending_updates = 10000
60
- self._retry_interval = 5 # 每5秒重试一次
61
-
62
- self._running = False
63
- self._consume_task = None
64
- self._retry_task = None
65
-
66
- async def start(self):
67
- """启动更新器"""
68
- # 创建消费者组
69
- try:
70
- await self.redis_client.xgroup_create(
71
- self.change_stream_key, self.consumer_group, id='0', mkstream=True
72
- )
73
- logger.debug(f"Created consumer group for task changes stream")
74
- except redis.ResponseError:
75
- pass
76
-
77
- self._running = True
78
- self._consume_task = asyncio.create_task(self._consume_task_changes())
79
- self._retry_task = asyncio.create_task(self._retry_pending_updates())
80
- logger.debug("TaskUpdater started")
81
-
82
- async def stop(self):
83
- """停止更新器"""
84
- self._running = False
85
-
86
- if self._consume_task:
87
- self._consume_task.cancel()
88
- try:
89
- await self._consume_task
90
- except asyncio.CancelledError:
91
- pass
92
-
93
- if self._retry_task:
94
- self._retry_task.cancel()
95
- try:
96
- await self._retry_task
97
- except asyncio.CancelledError:
98
- pass
99
-
100
- logger.debug("TaskUpdater stopped")
101
-
102
- async def _consume_task_changes(self):
103
- """消费任务变更事件流 - 基于事件驱动的更新(支持pending消息恢复)"""
104
- # 模仿 listen_event_by_task 的写法:先处理pending消息,再处理新消息
105
- check_backlog = True
106
- lastid = "0-0"
107
- batch_size = 1000
108
-
109
- while self._running:
110
- try:
111
- # 决定读取位置:如果有backlog,从lastid开始;否则读取新消息
112
- if check_backlog:
113
- myid = lastid
114
- else:
115
- myid = ">"
116
-
117
- messages = await self.redis_client.xreadgroup(
118
- self.consumer_group,
119
- self.consumer_id,
120
- {self.change_stream_key: myid},
121
- count=batch_size,
122
- block=1000 if not check_backlog else 0 # backlog时不阻塞
123
- )
124
-
125
- if not messages:
126
- check_backlog = False
127
- continue
128
-
129
- # 检查是否还有更多backlog消息
130
- if messages and len(messages[0][1]) > 0:
131
- check_backlog = len(messages[0][1]) >= batch_size
132
- else:
133
- check_backlog = False
134
-
135
- # 收集消息ID和对应的task_id
136
- msg_to_task = {} # msg_id -> (stream_id, task_key) 映射
137
-
138
- for _, stream_messages in messages:
139
- for msg_id, data in stream_messages:
140
- try:
141
- # 更新lastid(无论消息是否处理成功)
142
- if isinstance(msg_id, bytes):
143
- lastid = msg_id.decode('utf-8')
144
- else:
145
- lastid = str(msg_id)
146
-
147
- task_key = data[b'id']
148
- task_key = task_key.decode('utf-8') if isinstance(task_key, bytes) else str(task_key)
149
-
150
- # 从完整的task_key格式提取stream_id
151
- # 格式: namespace:TASK:stream_id:queue_name
152
- stream_id = None
153
- if ':TASK:' in task_key:
154
- parts = task_key.split(':TASK:')
155
- if len(parts) == 2:
156
- # 再从右边部分提取stream_id
157
- right_parts = parts[1].split(':')
158
- if right_parts:
159
- stream_id = right_parts[0] # 提取stream_id
160
-
161
- if stream_id:
162
- # 存储元组: (stream_id, task_key)
163
- msg_to_task[msg_id] = (stream_id, task_key)
164
- else:
165
- logger.warning(f"Cannot extract stream_id from task_key: {task_key}")
166
- except Exception as e:
167
- logger.error(f"Error processing change event {msg_id}: {e} {data=}")
168
- logger.error(traceback.format_exc())
169
- # 解析失败的消息也应该ACK,避免一直重试
170
- await self.redis_client.xack(self.change_stream_key, self.consumer_group, msg_id)
171
-
172
- if msg_to_task:
173
- # 批量更新任务,返回成功更新的task_id列表
174
- # msg_to_task 的值现在是元组 (stream_id, task_key)
175
- id_tuples = list(set(msg_to_task.values()))
176
- logger.info(f"Processing {len(id_tuples)} task updates from change stream")
177
- successful_tuples = await self._update_tasks_by_event(id_tuples)
178
-
179
- # 只ACK成功更新的消息
180
- ack_ids = []
181
- failed_count = 0
182
- for msg_id, id_tuple in msg_to_task.items():
183
- if successful_tuples and id_tuple in successful_tuples:
184
- ack_ids.append(msg_id)
185
- else:
186
- failed_count += 1
187
-
188
- if ack_ids:
189
- await self.redis_client.xack(self.change_stream_key, self.consumer_group, *ack_ids)
190
- if len(ack_ids) > 0:
191
- logger.info(f"Updated {len(ack_ids)} task statuses")
192
-
193
- if failed_count > 0:
194
- logger.debug(f"Failed to update {failed_count} tasks, will retry")
195
-
196
- except redis.ResponseError as e:
197
- if "NOGROUP" in str(e):
198
- # 如果消费者组不存在,重新创建
199
- try:
200
- await self.redis_client.xgroup_create(
201
- self.change_stream_key, self.consumer_group, id='0', mkstream=True
202
- )
203
- logger.debug(f"Recreated consumer group for task changes stream")
204
- check_backlog = True
205
- lastid = "0-0"
206
- except:
207
- pass
208
- else:
209
- logger.error(f"Redis error in consume_task_changes: {e}")
210
- logger.error(traceback.format_exc())
211
- await asyncio.sleep(1)
212
- except Exception as e:
213
- logger.error(f"Error in consume_task_changes: {e}", exc_info=True)
214
- await asyncio.sleep(1)
215
-
216
- async def _update_tasks_by_event(self, id_tuples: List[tuple]) -> Set[tuple]:
217
- """基于事件ID批量更新任务状态
218
-
219
- Args:
220
- id_tuples: 元组列表,每个元组为 (stream_id, task_key)
221
-
222
- Returns:
223
- 成功更新的元组集合
224
- """
225
- if not id_tuples:
226
- return set()
227
-
228
- successful_tuples = set()
229
-
230
- try:
231
- pipeline = self.redis_client.pipeline()
232
- for stream_id, task_key in id_tuples:
233
- pipeline.hgetall(task_key)
234
-
235
- redis_values = await pipeline.execute()
236
- updates = []
237
- valid_tuples = [] # 记录有效的元组
238
-
239
- if len(id_tuples) != len(redis_values):
240
- logger.error(f'Mismatch: {len(id_tuples)=} {len(redis_values)=}')
241
- # 不抛出异常,继续处理能处理的
242
-
243
- for i, (stream_id, task_key) in enumerate(id_tuples):
244
- if i >= len(redis_values):
245
- logger.error(f'Missing redis value for task_key={task_key}')
246
- continue
247
-
248
- hash_data = redis_values[i]
249
-
250
- if not hash_data:
251
- logger.debug(f'No hash data for task_key={task_key}')
252
- continue
253
-
254
- try:
255
- # 从task_key解析出consumer_group
256
- # task_key格式: namespace:TASK:stream_id:group_name
257
- # 其中group_name就是完整的consumer_group(格式: jettask:QUEUE:queue_name:task_name)
258
- parts = task_key.split(':', 3) # 最多分割成4部分
259
- if len(parts) == 4:
260
- # parts[0] = namespace (如 'default')
261
- # parts[1] = 'TASK'
262
- # parts[2] = stream_id
263
- # parts[3] = group_name (consumer_group)
264
- consumer_group = parts[3] # 直接使用group_name作为consumer_group
265
- logger.debug(f"Extracted consumer_group from task_key: {consumer_group}")
266
- else:
267
- logger.warning(f"Cannot parse consumer_group from task_key: {task_key}")
268
- continue
269
-
270
- # 从consumer_group中提取task_name
271
- # consumer_group格式: prefix:QUEUE:queue:task_name (如 jettask:QUEUE:robust_bench2:robust_benchmark.benchmark_task)
272
- task_name = None
273
- if consumer_group:
274
- parts = consumer_group.split(':')
275
- if len(parts) >= 4:
276
- # 最后一部分是task_name
277
- task_name = parts[-1]
278
- logger.debug(f"Extracted task_name '{task_name}' from consumer_group '{consumer_group}'")
279
-
280
- # 使用stream_id作为任务ID
281
- update_info = self._parse_task_hash(stream_id, hash_data)
282
- if update_info:
283
- # 添加consumer_group和task_name到更新信息中
284
- update_info['consumer_group'] = consumer_group
285
- update_info['task_name'] = task_name or 'unknown' # 如果无法提取task_name,使用'unknown'
286
- # consumer_name就是worker_id(执行任务的实际worker)
287
- update_info['consumer_name'] = update_info.get('worker_id')
288
- updates.append(update_info)
289
- valid_tuples.append((stream_id, task_key))
290
- else:
291
- logger.debug(f'Failed to parse stream_id={stream_id} hash_data={hash_data}')
292
- except Exception as e:
293
- logger.error(f'Error parsing task stream_id={stream_id}: {e}')
294
- continue
295
-
296
- if updates:
297
- logger.info(f"Attempting to update {len(updates)} tasks, first few: {[u['id'] for u in updates[:3]]}")
298
- try:
299
- # _update_tasks 现在返回成功更新的ID集合
300
- batch_successful = await self._update_tasks(updates)
301
- # 将成功的stream_id映射回元组
302
- for stream_id in batch_successful:
303
- for tuple_item in valid_tuples:
304
- if tuple_item[0] == stream_id: # stream_id匹配
305
- successful_tuples.add(tuple_item)
306
- if batch_successful:
307
- logger.info(f"Successfully updated {len(batch_successful)} tasks from change events")
308
- else:
309
- logger.warning(f"No tasks were successfully updated")
310
- except Exception as e:
311
- logger.error(f"Error in batch update: {e}")
312
- # 批量更新失败,尝试逐个更新
313
- for update, tuple_item in zip(updates, valid_tuples):
314
- try:
315
- single_successful = await self._update_tasks([update])
316
- if update['id'] in single_successful:
317
- successful_tuples.add(tuple_item)
318
- except Exception as single_error:
319
- logger.error(f"Failed to update task {tuple_item[0]}: {single_error}")
320
-
321
- except Exception as e:
322
- logger.error(f"Error updating tasks by event: {e}", exc_info=True)
323
-
324
- logger.debug(f'{successful_tuples=}')
325
- return successful_tuples
326
-
327
- def _parse_task_hash(self, task_id: str, hash_data: dict) -> Optional[dict]:
328
- """解析Redis Hash数据"""
329
- update_info = {
330
- 'id': task_id,
331
- 'status': None,
332
- 'result': None,
333
- 'error_message': None,
334
- 'started_at': None,
335
- 'completed_at': None,
336
- 'worker_id': None,
337
- 'execution_time': None,
338
- 'duration': None
339
- }
340
-
341
- try:
342
- from jettask.utils.serializer import loads_str
343
-
344
- hash_dict = {}
345
- for k, v in hash_data.items():
346
- key = k.decode('utf-8') if isinstance(k, bytes) else k
347
- if isinstance(v, bytes):
348
- try:
349
- value = loads_str(v)
350
- if isinstance(value, (dict, list)):
351
- value = json.dumps(value, ensure_ascii=False)
352
- else:
353
- value = str(value)
354
- except:
355
- try:
356
- value = v.decode('utf-8')
357
- except:
358
- value = str(v)
359
- else:
360
- value = v
361
- hash_dict[key] = value
362
-
363
- update_info['status'] = hash_dict.get('status')
364
- update_info['error_message'] = hash_dict.get('error_msg') or hash_dict.get('exception')
365
-
366
- # 转换时间戳
367
- for time_field in ['started_at', 'completed_at']:
368
- if hash_dict.get(time_field):
369
- try:
370
- time_str = hash_dict[time_field]
371
- if isinstance(time_str, str) and time_str.startswith("b'") and time_str.endswith("'"):
372
- time_str = time_str[2:-1]
373
- update_info[time_field] = datetime.fromtimestamp(float(time_str), tz=timezone.utc)
374
- except:
375
- pass
376
-
377
- update_info['worker_id'] = hash_dict.get('consumer') or hash_dict.get('worker_id')
378
-
379
- # 转换数值 - 直接存储原始秒数值
380
- for num_field in ['execution_time', 'duration']:
381
- if hash_dict.get(num_field):
382
- try:
383
- num_str = hash_dict[num_field]
384
- # 直接存储浮点数秒值
385
- update_info[num_field] = float(num_str)
386
- except:
387
- pass
388
-
389
- # 处理result
390
- if 'result' in hash_dict:
391
- result_str = hash_dict['result']
392
- if result_str == 'null':
393
- update_info['result'] = None
394
- else:
395
- update_info['result'] = result_str
396
-
397
- # 只返回有数据的更新
398
- if any(v is not None for k, v in update_info.items() if k != 'id'):
399
- return update_info
400
-
401
- except Exception as e:
402
- logger.error(f"Failed to parse hash data for task {task_id}: {e}")
403
-
404
- return None
405
-
406
- async def _update_tasks(self, updates: List[Dict[str, Any]]) -> Set[str]:
407
- """批量更新任务状态(使用UPSERT逻辑处理task_runs表)
408
-
409
- Returns:
410
- 成功更新的stream_id集合
411
- """
412
- if not updates:
413
- return set()
414
-
415
- try:
416
- async with self.AsyncSessionLocal() as session:
417
- # V3结构:使用UPSERT逻辑处理task_runs表
418
- stream_ids = [u['id'] for u in updates]
419
- logger.info(f"Upserting {len(stream_ids)} task_runs records")
420
-
421
- # 对于分区表,我们需要使用不同的UPSERT策略
422
- # 先尝试UPDATE,如果没有更新到任何行,则INSERT
423
- upsert_query = text("""
424
- WITH updated AS (
425
- UPDATE task_runs SET
426
- consumer_name = COALESCE(CAST(:consumer_name AS TEXT), consumer_name),
427
- status = CASE
428
- WHEN CAST(:status AS TEXT) IS NULL THEN status
429
- WHEN status = 'pending' THEN COALESCE(CAST(:status AS TEXT), status)
430
- WHEN status = 'running' AND CAST(:status AS TEXT) IN ('success', 'failed', 'timeout', 'skipped') THEN CAST(:status AS TEXT)
431
- WHEN status IN ('success', 'failed', 'timeout', 'skipped') THEN status
432
- ELSE COALESCE(CAST(:status AS TEXT), status)
433
- END,
434
- result = CASE
435
- WHEN status IN ('success', 'failed', 'timeout', 'skipped') AND CAST(:status AS TEXT) NOT IN ('success', 'failed', 'timeout', 'skipped') THEN result
436
- ELSE COALESCE(CAST(:result AS jsonb), result)
437
- END,
438
- error_message = CASE
439
- WHEN status IN ('success', 'failed', 'timeout', 'skipped') AND CAST(:status AS TEXT) NOT IN ('success', 'failed', 'timeout', 'skipped') THEN error_message
440
- ELSE COALESCE(CAST(:error_message AS TEXT), error_message)
441
- END,
442
- start_time = COALESCE(CAST(:started_at AS TIMESTAMPTZ), start_time),
443
- end_time = CASE
444
- WHEN status IN ('success', 'failed', 'timeout', 'skipped') AND CAST(:status AS TEXT) NOT IN ('success', 'failed', 'timeout', 'skipped') THEN end_time
445
- ELSE COALESCE(CAST(:completed_at AS TIMESTAMPTZ), end_time)
446
- END,
447
- worker_id = COALESCE(CAST(:worker_id AS TEXT), worker_id),
448
- duration = COALESCE(CAST(:duration AS DOUBLE PRECISION), duration),
449
- execution_time = COALESCE(CAST(:execution_time AS DOUBLE PRECISION), execution_time),
450
- updated_at = CURRENT_TIMESTAMP
451
- WHERE stream_id = :stream_id AND consumer_group = :consumer_group
452
- RETURNING stream_id
453
- )
454
- INSERT INTO task_runs (
455
- stream_id, task_name, consumer_group, consumer_name, status, result, error_message,
456
- start_time, end_time, worker_id, duration, execution_time,
457
- created_at, updated_at
458
- )
459
- SELECT
460
- :stream_id, :task_name, :consumer_group, :consumer_name,
461
- COALESCE(CAST(:status AS TEXT), 'pending'),
462
- CAST(:result AS jsonb),
463
- CAST(:error_message AS TEXT),
464
- CAST(:started_at AS TIMESTAMPTZ),
465
- CAST(:completed_at AS TIMESTAMPTZ),
466
- CAST(:worker_id AS TEXT),
467
- CAST(:duration AS DOUBLE PRECISION),
468
- CAST(:execution_time AS DOUBLE PRECISION),
469
- CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
470
- WHERE NOT EXISTS (SELECT 1 FROM updated)
471
- RETURNING stream_id;
472
- """)
473
-
474
- # 为每个更新转换参数名称(从id改为stream_id)
475
- run_updates = []
476
- for update in updates:
477
- run_update = update.copy()
478
- run_update['stream_id'] = run_update.pop('id') # 将id改为stream_id
479
- # consumer_group 已经在 update_info 中了,不需要额外处理
480
- run_updates.append(run_update)
481
-
482
- # 批量执行UPSERT - 使用事务批处理提高性能
483
- successful_count = 0
484
- batch_size = 20 # 每批处理20条记录
485
-
486
- for i in range(0, len(run_updates), batch_size):
487
- batch = run_updates[i:i+batch_size]
488
-
489
- try:
490
- # 在一个事务中处理整批
491
- for run_update in batch:
492
- result = await session.execute(upsert_query, run_update)
493
- if result.rowcount > 0:
494
- successful_count += 1
495
-
496
- # 批量提交
497
- await session.commit()
498
- logger.debug(f"Batch {i//batch_size + 1} committed: {len(batch)} records")
499
-
500
- except Exception as e:
501
- logger.error(f"Batch {i//batch_size + 1} failed, trying individual records: {e}")
502
- await session.rollback()
503
-
504
- # 如果批处理失败,回退到逐个处理这批记录
505
- for run_update in batch:
506
- try:
507
- result = await session.execute(upsert_query, run_update)
508
- await session.commit()
509
- if result.rowcount > 0:
510
- successful_count += 1
511
- except Exception as individual_error:
512
- logger.error(f"Individual upsert failed for {run_update.get('stream_id')}: {individual_error}")
513
- await session.rollback()
514
-
515
- # 记录成功更新的数量
516
- if successful_count > 0:
517
- logger.info(f"Upserted {successful_count}/{len(run_updates)} task_runs records")
518
-
519
- # 检查哪些任务是完成状态,需要从Redis中删除
520
- completed_task_keys = []
521
- for update in updates:
522
- status = update.get('status')
523
- # 如果状态是完成状态(success, error, cancel等)
524
- if status in ['success', 'error', 'failed', 'cancel', 'cancelled', 'timeout', 'skipped']:
525
- # 构建task_key
526
- # task_key格式: namespace:TASK:stream_id:group_name
527
- stream_id = update['id']
528
- consumer_group = update.get('consumer_group')
529
- if consumer_group:
530
- # 从consumer_group提取namespace
531
- # consumer_group格式: prefix:QUEUE:queue:task_name
532
- parts = consumer_group.split(':', 1)
533
- namespace = parts[0] if parts else 'default'
534
- task_key = f"{namespace}:TASK:{stream_id}:{consumer_group}"
535
- completed_task_keys.append(task_key)
536
- logger.info(f"Task {stream_id} with status {status} will be deleted from Redis: {task_key}")
537
-
538
- # 从Redis中删除已完成的任务
539
- if completed_task_keys:
540
- try:
541
- pipeline = self.redis_client.pipeline()
542
- for task_key in completed_task_keys:
543
- pipeline.delete(task_key)
544
- deleted_results = await pipeline.execute()
545
- deleted_count = sum(1 for r in deleted_results if r > 0)
546
- if deleted_count > 0:
547
- logger.info(f"Deleted {deleted_count} completed tasks from Redis")
548
- except Exception as e:
549
- logger.error(f"Error deleting completed tasks from Redis: {e}")
550
-
551
- # UPSERT 操作总是成功的,返回所有stream_id
552
- # 不需要复杂的错误处理,因为UPSERT保证了操作的原子性
553
- return set(stream_ids)
554
-
555
- except Exception as e:
556
- logger.error(f"Error upserting task statuses: {e}")
557
- logger.error(traceback.format_exc())
558
- return set() # 出错时返回空集
559
-
560
- async def _retry_pending_updates(self):
561
- """定期重试待更新的任务"""
562
- while self._running:
563
- try:
564
- await asyncio.sleep(self._retry_interval) # 等待一段时间
565
-
566
- # 获取待重试的更新
567
- async with self._pending_updates_lock:
568
- if not self._pending_updates:
569
- continue
570
-
571
- # 取出所有待重试的更新
572
- pending_items = list(self._pending_updates.items())
573
- self._pending_updates.clear()
574
-
575
- if pending_items:
576
- # 重新尝试更新
577
- updates = [update_info for _, update_info in pending_items]
578
- logger.debug(f"Retrying {len(pending_items)} pending task updates")
579
- await self._update_tasks(updates)
580
-
581
- except Exception as e:
582
- logger.error(f"Error in retry pending updates: {e}")
583
- await asyncio.sleep(5)
@@ -1,11 +0,0 @@
1
- -- 为scheduled_tasks表添加执行次数字段
2
- ALTER TABLE scheduled_tasks
3
- ADD COLUMN IF NOT EXISTS execution_count INTEGER DEFAULT 0;
4
-
5
- -- 添加注释
6
- COMMENT ON COLUMN scheduled_tasks.execution_count IS '任务执行次数';
7
-
8
- -- 为现有记录设置初始值(可选,根据历史数据估算)
9
- UPDATE scheduled_tasks
10
- SET execution_count = 0
11
- WHERE execution_count IS NULL;
@@ -1,26 +0,0 @@
1
- -- 添加priority字段到scheduled_tasks表
2
- -- 用于支持定时任务的优先级设置
3
-
4
- -- 检查是否已经存在priority字段,避免重复添加
5
- DO $$
6
- BEGIN
7
- IF NOT EXISTS (
8
- SELECT 1
9
- FROM information_schema.columns
10
- WHERE table_name = 'scheduled_tasks'
11
- AND column_name = 'priority'
12
- ) THEN
13
- ALTER TABLE scheduled_tasks
14
- ADD COLUMN priority INTEGER DEFAULT NULL;
15
-
16
- -- 添加注释
17
- COMMENT ON COLUMN scheduled_tasks.priority IS '任务优先级 (1=最高, 数字越大优先级越低,NULL=默认最低)';
18
-
19
- -- 创建索引以提高查询性能
20
- CREATE INDEX idx_scheduled_tasks_priority ON scheduled_tasks(priority);
21
-
22
- RAISE NOTICE 'Added priority column to scheduled_tasks table';
23
- ELSE
24
- RAISE NOTICE 'Priority column already exists in scheduled_tasks table';
25
- END IF;
26
- END $$;
@@ -1,25 +0,0 @@
1
- -- Migration to add scheduler_id column to scheduled_tasks table
2
- -- This allows unique identification and deduplication of tasks
3
-
4
- -- Add the scheduler_id column if it doesn't exist
5
- DO $$
6
- BEGIN
7
- IF NOT EXISTS (
8
- SELECT 1
9
- FROM information_schema.columns
10
- WHERE table_name = 'scheduled_tasks'
11
- AND column_name = 'scheduler_id'
12
- ) THEN
13
- ALTER TABLE scheduled_tasks
14
- ADD COLUMN scheduler_id VARCHAR(255) UNIQUE;
15
-
16
- -- Add comment
17
- COMMENT ON COLUMN scheduled_tasks.scheduler_id IS
18
- 'Unique identifier for the task, used for deduplication';
19
- END IF;
20
- END $$;
21
-
22
- -- Create index for scheduler_id if it doesn't exist
23
- CREATE INDEX IF NOT EXISTS idx_scheduled_tasks_scheduler_id
24
- ON scheduled_tasks(scheduler_id)
25
- WHERE scheduler_id IS NOT NULL;
@@ -1,10 +0,0 @@
1
- -- Migration to add index for scheduler_id field
2
- -- This index is critical for performance as we heavily rely on scheduler_id for lookups
3
-
4
- -- Create unique index on scheduler_id for fast lookups
5
- CREATE UNIQUE INDEX IF NOT EXISTS idx_scheduled_tasks_scheduler_id
6
- ON scheduled_tasks(scheduler_id);
7
-
8
- -- Also add a comment to clarify the importance
9
- COMMENT ON INDEX idx_scheduled_tasks_scheduler_id IS
10
- 'Unique index on scheduler_id for fast task lookups and deduplication';