jettask 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. jettask/__init__.py +10 -3
  2. jettask/cli.py +314 -228
  3. jettask/config/__init__.py +9 -1
  4. jettask/config/config.py +245 -0
  5. jettask/config/env_loader.py +381 -0
  6. jettask/config/lua_scripts.py +158 -0
  7. jettask/config/nacos_config.py +132 -5
  8. jettask/core/__init__.py +1 -1
  9. jettask/core/app.py +1573 -666
  10. jettask/core/app_importer.py +33 -16
  11. jettask/core/container.py +532 -0
  12. jettask/core/task.py +1 -4
  13. jettask/core/unified_manager_base.py +2 -2
  14. jettask/executor/__init__.py +38 -0
  15. jettask/executor/core.py +625 -0
  16. jettask/executor/executor.py +338 -0
  17. jettask/executor/orchestrator.py +290 -0
  18. jettask/executor/process_entry.py +638 -0
  19. jettask/executor/task_executor.py +317 -0
  20. jettask/messaging/__init__.py +68 -0
  21. jettask/messaging/event_pool.py +2188 -0
  22. jettask/messaging/reader.py +519 -0
  23. jettask/messaging/registry.py +266 -0
  24. jettask/messaging/scanner.py +369 -0
  25. jettask/messaging/sender.py +312 -0
  26. jettask/persistence/__init__.py +118 -0
  27. jettask/persistence/backlog_monitor.py +567 -0
  28. jettask/{backend/data_access.py → persistence/base.py} +58 -57
  29. jettask/persistence/consumer.py +315 -0
  30. jettask/{core → persistence}/db_manager.py +23 -22
  31. jettask/persistence/maintenance.py +81 -0
  32. jettask/persistence/message_consumer.py +259 -0
  33. jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
  34. jettask/persistence/offline_recovery.py +196 -0
  35. jettask/persistence/queue_discovery.py +215 -0
  36. jettask/persistence/task_persistence.py +218 -0
  37. jettask/persistence/task_updater.py +583 -0
  38. jettask/scheduler/__init__.py +2 -2
  39. jettask/scheduler/loader.py +6 -5
  40. jettask/scheduler/run_scheduler.py +1 -1
  41. jettask/scheduler/scheduler.py +7 -7
  42. jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
  43. jettask/task/__init__.py +16 -0
  44. jettask/{router.py → task/router.py} +26 -8
  45. jettask/task/task_center/__init__.py +9 -0
  46. jettask/task/task_executor.py +318 -0
  47. jettask/task/task_registry.py +291 -0
  48. jettask/test_connection_monitor.py +73 -0
  49. jettask/utils/__init__.py +31 -1
  50. jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
  51. jettask/utils/db_connector.py +1629 -0
  52. jettask/{db_init.py → utils/db_init.py} +1 -1
  53. jettask/utils/rate_limit/__init__.py +30 -0
  54. jettask/utils/rate_limit/concurrency_limiter.py +665 -0
  55. jettask/utils/rate_limit/config.py +145 -0
  56. jettask/utils/rate_limit/limiter.py +41 -0
  57. jettask/utils/rate_limit/manager.py +269 -0
  58. jettask/utils/rate_limit/qps_limiter.py +154 -0
  59. jettask/utils/rate_limit/task_limiter.py +384 -0
  60. jettask/utils/serializer.py +3 -0
  61. jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
  62. jettask/utils/time_sync.py +173 -0
  63. jettask/webui/__init__.py +27 -0
  64. jettask/{api/v1 → webui/api}/alerts.py +1 -1
  65. jettask/{api/v1 → webui/api}/analytics.py +2 -2
  66. jettask/{api/v1 → webui/api}/namespaces.py +1 -1
  67. jettask/{api/v1 → webui/api}/overview.py +1 -1
  68. jettask/{api/v1 → webui/api}/queues.py +3 -3
  69. jettask/{api/v1 → webui/api}/scheduled.py +1 -1
  70. jettask/{api/v1 → webui/api}/settings.py +1 -1
  71. jettask/{api.py → webui/app.py} +253 -145
  72. jettask/webui/namespace_manager/__init__.py +10 -0
  73. jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
  74. jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
  75. jettask/{run.py → webui/run.py} +2 -2
  76. jettask/{services → webui/services}/__init__.py +1 -3
  77. jettask/{services → webui/services}/overview_service.py +34 -16
  78. jettask/{services → webui/services}/queue_service.py +1 -1
  79. jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
  80. jettask/{services → webui/services}/settings_service.py +1 -1
  81. jettask/worker/__init__.py +53 -0
  82. jettask/worker/lifecycle.py +1507 -0
  83. jettask/worker/manager.py +583 -0
  84. jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
  85. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
  86. jettask-0.2.20.dist-info/RECORD +145 -0
  87. jettask/__main__.py +0 -140
  88. jettask/api/__init__.py +0 -103
  89. jettask/backend/__init__.py +0 -1
  90. jettask/backend/api/__init__.py +0 -3
  91. jettask/backend/api/v1/__init__.py +0 -17
  92. jettask/backend/api/v1/monitoring.py +0 -431
  93. jettask/backend/api/v1/namespaces.py +0 -504
  94. jettask/backend/api/v1/queues.py +0 -342
  95. jettask/backend/api/v1/tasks.py +0 -367
  96. jettask/backend/core/__init__.py +0 -3
  97. jettask/backend/core/cache.py +0 -221
  98. jettask/backend/core/database.py +0 -200
  99. jettask/backend/core/exceptions.py +0 -102
  100. jettask/backend/dependencies.py +0 -261
  101. jettask/backend/init_meta_db.py +0 -158
  102. jettask/backend/main.py +0 -1426
  103. jettask/backend/main_unified.py +0 -78
  104. jettask/backend/main_v2.py +0 -394
  105. jettask/backend/models/__init__.py +0 -3
  106. jettask/backend/models/requests.py +0 -236
  107. jettask/backend/models/responses.py +0 -230
  108. jettask/backend/namespace_api_old.py +0 -267
  109. jettask/backend/services/__init__.py +0 -3
  110. jettask/backend/start.py +0 -42
  111. jettask/backend/unified_api_router.py +0 -1541
  112. jettask/cleanup_deprecated_tables.sql +0 -16
  113. jettask/core/consumer_manager.py +0 -1695
  114. jettask/core/delay_scanner.py +0 -256
  115. jettask/core/event_pool.py +0 -1700
  116. jettask/core/heartbeat_process.py +0 -222
  117. jettask/core/task_batch.py +0 -153
  118. jettask/core/worker_scanner.py +0 -271
  119. jettask/executors/__init__.py +0 -5
  120. jettask/executors/asyncio.py +0 -876
  121. jettask/executors/base.py +0 -30
  122. jettask/executors/common.py +0 -148
  123. jettask/executors/multi_asyncio.py +0 -309
  124. jettask/gradio_app.py +0 -570
  125. jettask/integrated_gradio_app.py +0 -1088
  126. jettask/main.py +0 -0
  127. jettask/monitoring/__init__.py +0 -3
  128. jettask/pg_consumer.py +0 -1896
  129. jettask/run_monitor.py +0 -22
  130. jettask/run_webui.py +0 -148
  131. jettask/scheduler/multi_namespace_scheduler.py +0 -294
  132. jettask/scheduler/unified_manager.py +0 -450
  133. jettask/task_center_client.py +0 -150
  134. jettask/utils/serializer_optimized.py +0 -33
  135. jettask/webui_exceptions.py +0 -67
  136. jettask-0.2.19.dist-info/RECORD +0 -150
  137. /jettask/{constants.py → config/constants.py} +0 -0
  138. /jettask/{backend/config.py → config/task_center.py} +0 -0
  139. /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
  140. /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
  141. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
  142. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
  143. /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
  144. /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
  145. /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
  146. /jettask/{models.py → persistence/models.py} +0 -0
  147. /jettask/scheduler/{manager.py → task_crud.py} +0 -0
  148. /jettask/{schema.sql → schemas/schema.sql} +0 -0
  149. /jettask/{task_center.py → task/task_center/client.py} +0 -0
  150. /jettask/{monitoring → utils}/file_watcher.py +0 -0
  151. /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
  152. /jettask/{api/v1 → webui/api}/__init__.py +0 -0
  153. /jettask/{webui_config.py → webui/config.py} +0 -0
  154. /jettask/{webui_models → webui/models}/__init__.py +0 -0
  155. /jettask/{webui_models → webui/models}/namespace.py +0 -0
  156. /jettask/{services → webui/services}/alert_service.py +0 -0
  157. /jettask/{services → webui/services}/analytics_service.py +0 -0
  158. /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
  159. /jettask/{services → webui/services}/task_service.py +0 -0
  160. /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
  161. /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
  162. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
  163. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
  164. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
  165. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,215 @@
1
+ """队列发现模块
2
+
3
+ 负责动态发现新队列,并为新队列创建消费者组。
4
+ 使用队列注册表替代scan命令以提高性能。
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ import traceback
10
+ from typing import Set
11
+
12
+ import redis.asyncio as redis
13
+ from redis.asyncio import Redis
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class QueueDiscovery:
19
+ """队列发现器
20
+
21
+ 职责:
22
+ - 初始队列发现(启动时执行一次)
23
+ - 定期发现新队列
24
+ - 为新队列创建消费者组
25
+ - 更新ConsumerManager的队列列表
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ redis_client: Redis,
31
+ redis_prefix: str,
32
+ consumer_group: str,
33
+ consumer_manager=None
34
+ ):
35
+ """初始化队列发现器
36
+
37
+ Args:
38
+ redis_client: Redis异步客户端
39
+ redis_prefix: Redis键前缀
40
+ consumer_group: 消费者组名称
41
+ consumer_manager: ConsumerManager实例(可选)
42
+ """
43
+ self.redis_client = redis_client
44
+ self.redis_prefix = redis_prefix
45
+ self.consumer_group = consumer_group
46
+ self.consumer_manager = consumer_manager
47
+
48
+ # 队列注册表的Redis key
49
+ self.queue_registry_key = f"{redis_prefix}:QUEUE_REGISTRY"
50
+
51
+ # 已知队列集合
52
+ self._known_queues = set()
53
+
54
+ self._running = False
55
+ self._discovery_task = None
56
+
57
+ async def initial_queue_discovery(self) -> Set[str]:
58
+ """初始队列发现,在启动时执行一次 - 使用队列注册表替代scan
59
+
60
+ Returns:
61
+ 发现的队列集合
62
+ """
63
+ try:
64
+ new_queues = set()
65
+ logger.info(f"Starting initial queue discovery from queue registry: {self.queue_registry_key}")
66
+
67
+ # 从队列注册表获取所有队列
68
+ queue_members = await self.redis_client.smembers(self.queue_registry_key.encode())
69
+ for queue_name_bytes in queue_members:
70
+ queue_name = queue_name_bytes.decode('utf-8') if isinstance(queue_name_bytes, bytes) else str(queue_name_bytes)
71
+ new_queues.add(queue_name)
72
+ logger.info(f"Found registered queue: {queue_name}")
73
+
74
+ # 如果注册表为空,使用 RegistryManager 初始化
75
+ if not new_queues:
76
+ logger.warning(f"Queue registry is empty, initializing from RegistryManager...")
77
+ from jettask.messaging.registry import QueueRegistry
78
+ registry = QueueRegistry(
79
+ redis_client=None, # 同步客户端,这里不需要
80
+ async_redis_client=self.redis_client,
81
+ redis_prefix=self.redis_prefix
82
+ )
83
+
84
+ # 初始化注册表(仅在首次运行时需要)
85
+ await registry.initialize_from_existing_data()
86
+
87
+ # 从注册表获取队列
88
+ new_queues = await registry.get_all_queues()
89
+ logger.info(f"Got {len(new_queues)} queues from registry manager")
90
+
91
+ if new_queues:
92
+ logger.info(f"Initial queue discovery found {len(new_queues)} queues: {new_queues}")
93
+ # 合并所有队列:TASK_CHANGES + 动态发现的队列
94
+ # 转换 bytes 为字符串
95
+ string_queues = []
96
+ for q in new_queues:
97
+ if isinstance(q, bytes):
98
+ string_queues.append(q.decode('utf-8'))
99
+ else:
100
+ string_queues.append(str(q))
101
+ all_queues = string_queues + ['TASK_CHANGES']
102
+
103
+ # 更新ConsumerManager的配置
104
+ if self.consumer_manager:
105
+ self.consumer_manager.config['queues'] = all_queues
106
+
107
+ # 更新worker的队列信息
108
+ await self._update_worker_queues(all_queues)
109
+
110
+ self._known_queues = new_queues
111
+
112
+ return new_queues
113
+
114
+ except Exception as e:
115
+ logger.error(f"Error in initial queue discovery: {e}")
116
+ logger.error(traceback.format_exc())
117
+ return set()
118
+
119
+ async def start_discovery(self):
120
+ """启动定期队列发现"""
121
+ self._running = True
122
+ self._discovery_task = asyncio.create_task(self._discover_queues_loop())
123
+ logger.debug("QueueDiscovery started")
124
+
125
+ async def stop_discovery(self):
126
+ """停止队列发现"""
127
+ self._running = False
128
+ if self._discovery_task:
129
+ self._discovery_task.cancel()
130
+ try:
131
+ await self._discovery_task
132
+ except asyncio.CancelledError:
133
+ pass
134
+ logger.debug("QueueDiscovery stopped")
135
+
136
+ async def _discover_queues_loop(self):
137
+ """定期发现新队列 - 使用队列注册表替代scan"""
138
+ while self._running:
139
+ try:
140
+ new_queues = set()
141
+
142
+ # 从队列注册表获取所有队列
143
+ queue_members = await self.redis_client.smembers(self.queue_registry_key)
144
+ for queue_name_bytes in queue_members:
145
+ queue_name = queue_name_bytes.decode('utf-8') if isinstance(queue_name_bytes, bytes) else str(queue_name_bytes)
146
+ new_queues.add(queue_name)
147
+
148
+ # 优化:添加日志,只在队列数量或内容发生变化时记录
149
+ if len(new_queues) != len(self._known_queues) or new_queues != self._known_queues:
150
+ logger.debug(f"Queue registry contains {len(new_queues)} queues: {sorted(new_queues)}")
151
+
152
+ # 为新发现的队列创建消费者组(注意:新队列应该通过生产者自动注册)
153
+ new_discovered = new_queues - self._known_queues
154
+ if new_discovered:
155
+ for queue in new_discovered:
156
+ # 正确构建stream_key,保留优先级部分
157
+ stream_key = f"{self.redis_prefix}:QUEUE:{queue}"
158
+ try:
159
+ await self.redis_client.xgroup_create(
160
+ stream_key, self.consumer_group, id='0', mkstream=True
161
+ )
162
+ logger.info(f"Created consumer group for new queue: {queue} with stream_key: {stream_key}")
163
+ except redis.ResponseError:
164
+ pass
165
+
166
+ # 更新ConsumerManager的队列列表(同步操作)
167
+ if new_queues != self._known_queues:
168
+ logger.info(f"Queue discovery: found {len(new_queues)} queues: {new_queues}")
169
+ # 合并所有队列:TASK_CHANGES + 动态发现的队列
170
+ all_queues = list(new_queues) + ['TASK_CHANGES']
171
+
172
+ # 更新ConsumerManager的配置
173
+ if self.consumer_manager:
174
+ self.consumer_manager.config['queues'] = all_queues
175
+
176
+ # 更新worker的队列信息
177
+ await self._update_worker_queues(all_queues)
178
+
179
+ self._known_queues = new_queues
180
+ await asyncio.sleep(10) # 保持较短的检查间隔,确保新队列能及时发现
181
+
182
+ except Exception as e:
183
+ logger.error(f"Error discovering queues: {e}")
184
+ logger.error(traceback.format_exc())
185
+ await asyncio.sleep(10)
186
+
187
+ async def _update_worker_queues(self, all_queues: list):
188
+ """更新worker的队列信息到Redis"""
189
+ try:
190
+ # ConsumerStrategy 已移除,现在只使用 HEARTBEAT 策略
191
+
192
+ # 获取实际的consumer_id(从心跳策略中)
193
+ if hasattr(self.consumer_manager, '_heartbeat_strategy'):
194
+ actual_consumer_id = self.consumer_manager._heartbeat_strategy.consumer_id
195
+ else:
196
+ # 从config中获取或使用默认值
197
+ actual_consumer_id = self.consumer_manager.config.get('consumer_id', 'unknown')
198
+
199
+ worker_key = f"{self.redis_prefix}:{self.consumer_manager.config.get('worker_prefix', 'PG_CONSUMER')}:{actual_consumer_id}"
200
+
201
+ # 使用同步Redis客户端更新
202
+ self.consumer_manager.redis_client.hset(
203
+ worker_key,
204
+ 'queues',
205
+ ','.join(all_queues)
206
+ )
207
+ logger.debug(f"Updated worker queues: {all_queues}")
208
+
209
+ except Exception as e:
210
+ logger.error(f"Error updating worker queues: {e}")
211
+ logger.error(traceback.format_exc())
212
+
213
+ def get_known_queues(self) -> Set[str]:
214
+ """获取已知队列集合"""
215
+ return self._known_queues.copy()
@@ -0,0 +1,218 @@
1
+ """任务持久化模块
2
+
3
+ 负责解析Redis Stream消息,并将任务数据批量插入PostgreSQL数据库。
4
+ """
5
+
6
+ import json
7
+ import logging
8
+ import traceback
9
+ from typing import Dict, List, Optional, Any
10
+ from datetime import datetime, timezone
11
+
12
+ from sqlalchemy import text
13
+ from sqlalchemy.ext.asyncio import AsyncSession
14
+ from sqlalchemy.orm import sessionmaker
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class TaskPersistence:
20
+ """任务持久化处理器
21
+
22
+ 职责:
23
+ - 解析Stream消息为任务信息
24
+ - 批量插入任务到PostgreSQL的tasks表
25
+ - 处理插入失败的降级策略
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ async_session_local: sessionmaker,
31
+ namespace_id: str,
32
+ namespace_name: str
33
+ ):
34
+ """初始化任务持久化处理器
35
+
36
+ Args:
37
+ async_session_local: SQLAlchemy会话工厂
38
+ namespace_id: 命名空间ID
39
+ namespace_name: 命名空间名称
40
+ """
41
+ self.AsyncSessionLocal = async_session_local
42
+ self.namespace_id = namespace_id
43
+ self.namespace_name = namespace_name
44
+
45
+ def parse_stream_message(self, task_id: str, data: dict) -> Optional[dict]:
46
+ """解析Stream消息为任务信息(返回完整的字段)
47
+
48
+ Args:
49
+ task_id: 任务ID(Redis Stream ID)
50
+ data: 消息数据
51
+
52
+ Returns:
53
+ 解析后的任务信息字典,失败返回None
54
+ """
55
+ try:
56
+ from jettask.utils.serializer import loads_str
57
+
58
+ if b'data' in data:
59
+ task_data = loads_str(data[b'data'])
60
+ else:
61
+ task_data = {}
62
+ for k, v in data.items():
63
+ key = k.decode('utf-8') if isinstance(k, bytes) else k
64
+ if isinstance(v, bytes):
65
+ try:
66
+ value = loads_str(v)
67
+ except:
68
+ value = str(v)
69
+ else:
70
+ value = v
71
+ task_data[key] = value
72
+
73
+ # 如果配置了命名空间,检查消息是否属于该命名空间
74
+ # if self.namespace_id:
75
+ # msg_namespace_id = task_data.get('__namespace_id')
76
+ # # 如果消息没有namespace_id且当前不是默认命名空间,跳过
77
+ # if msg_namespace_id != self.namespace_id:
78
+ # if not (msg_namespace_id is None and self.namespace_id == 'default'):
79
+ # logger.debug(f"Skipping message from different namespace: {msg_namespace_id} != {self.namespace_id}")
80
+ # return None
81
+
82
+ queue_name = task_data['queue']
83
+ task_name = task_data.get('name', task_data.get('task', 'unknown'))
84
+
85
+ created_at = None
86
+ if 'trigger_time' in task_data:
87
+ try:
88
+ timestamp = float(task_data['trigger_time'])
89
+ created_at = datetime.fromtimestamp(timestamp, tz=timezone.utc)
90
+ except:
91
+ pass
92
+
93
+ # 返回完整的字段,包括所有可能为None的字段
94
+ return {
95
+ 'id': task_id,
96
+ 'queue_name': queue_name,
97
+ 'task_name': task_name,
98
+ 'task_data': json.dumps(task_data),
99
+ 'priority': int(task_data.get('priority', 0)),
100
+ 'retry_count': int(task_data.get('retry', 0)),
101
+ 'max_retry': int(task_data.get('max_retry', 3)),
102
+ 'status': 'pending',
103
+ 'result': None, # 新任务没有结果
104
+ 'error_message': None, # 新任务没有错误信息
105
+ 'created_at': created_at,
106
+ 'started_at': None, # 新任务还未开始
107
+ 'completed_at': None, # 新任务还未完成
108
+ 'scheduled_task_id': task_data.get('scheduled_task_id'), # 调度任务ID
109
+ 'metadata': json.dumps(task_data.get('metadata', {})),
110
+ 'worker_id': None, # 新任务还未分配worker
111
+ 'execution_time': None, # 新任务还没有执行时间
112
+ 'duration': None, # 新任务还没有持续时间
113
+ 'namespace_id': self.namespace_id # 添加命名空间ID
114
+ }
115
+
116
+ except Exception as e:
117
+ logger.error(f"Error parsing stream message for task {task_id}: {e}")
118
+ logger.error(traceback.format_exc())
119
+ return None
120
+
121
+ async def insert_tasks(self, tasks: List[Dict[str, Any]]) -> int:
122
+ """批量插入任务到PostgreSQL(只处理tasks表)
123
+
124
+ Args:
125
+ tasks: 任务信息列表
126
+
127
+ Returns:
128
+ 实际插入的记录数
129
+ """
130
+ if not tasks:
131
+ return 0
132
+
133
+ logger.info(f"Attempting to insert {len(tasks)} tasks to tasks table")
134
+
135
+ try:
136
+ async with self.AsyncSessionLocal() as session:
137
+ # 插入tasks表 - 使用批量INSERT忽略冲突
138
+ # 由于stream_id在实践中是唯一的,我们可以简单地忽略重复
139
+ tasks_query = text("""
140
+ INSERT INTO tasks (stream_id, queue, namespace, scheduled_task_id,
141
+ payload, priority, created_at, source, metadata)
142
+ VALUES (:stream_id, :queue, :namespace, :scheduled_task_id,
143
+ CAST(:payload AS jsonb), :priority, :created_at, :source, CAST(:metadata AS jsonb))
144
+ ON CONFLICT DO NOTHING
145
+ RETURNING stream_id;
146
+ """)
147
+
148
+ # 准备tasks表的数据
149
+ tasks_data = []
150
+ for task in tasks:
151
+ task_data = json.loads(task['task_data'])
152
+
153
+ # 从task_data中获取scheduled_task_id
154
+ scheduled_task_id = task_data.get('scheduled_task_id') or task.get('scheduled_task_id')
155
+
156
+ # 根据是否有scheduled_task_id来判断任务来源
157
+ if scheduled_task_id:
158
+ source = 'scheduler' # 定时任务
159
+ else:
160
+ source = 'redis_stream' # 普通任务
161
+
162
+ tasks_data.append({
163
+ 'stream_id': task['id'], # Redis Stream ID作为stream_id
164
+ 'queue': task['queue_name'],
165
+ 'namespace': self.namespace_name,
166
+ 'scheduled_task_id': str(scheduled_task_id) if scheduled_task_id else None,
167
+ 'payload': task['task_data'], # 完整的任务数据
168
+ 'priority': task['priority'],
169
+ 'created_at': task['created_at'],
170
+ 'source': source,
171
+ 'metadata': task.get('metadata', '{}')
172
+ })
173
+
174
+ # 批量插入 - 使用executemany提高性能
175
+ logger.debug(f"Executing batch insert with {len(tasks_data)} tasks")
176
+
177
+ try:
178
+ # 使用executemany批量插入
179
+ result = await session.execute(tasks_query, tasks_data)
180
+
181
+ # 获取实际插入的记录数
182
+ inserted_count = result.rowcount
183
+
184
+ await session.commit()
185
+ logger.debug("Tasks table batch insert transaction completed")
186
+ return inserted_count
187
+
188
+ except Exception as e:
189
+ logger.error(f"Error in batch insert, trying fallback: {e}")
190
+ await session.rollback()
191
+
192
+ # 如果批量插入失败,降级为小批量插入(每批10条)
193
+ batch_size = 10
194
+ total_inserted = 0
195
+
196
+ for i in range(0, len(tasks_data), batch_size):
197
+ batch = tasks_data[i:i+batch_size]
198
+ try:
199
+ result = await session.execute(tasks_query, batch)
200
+ batch_inserted = result.rowcount
201
+ if batch_inserted > 0:
202
+ total_inserted += batch_inserted
203
+ await session.commit()
204
+ except Exception as batch_error:
205
+ logger.error(f"Batch {i//batch_size + 1} failed: {batch_error}")
206
+ await session.rollback()
207
+
208
+ if total_inserted > 0:
209
+ logger.info(f"Fallback insert completed: {total_inserted} tasks inserted")
210
+ else:
211
+ logger.info(f"No new tasks inserted in fallback mode")
212
+
213
+ return total_inserted
214
+
215
+ except Exception as e:
216
+ logger.error(f"Error inserting tasks to PostgreSQL: {e}")
217
+ logger.error(traceback.format_exc())
218
+ return 0