jettask 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +60 -2
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
- jettask-0.2.20.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.18.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,215 @@
|
|
1
|
+
"""队列发现模块
|
2
|
+
|
3
|
+
负责动态发现新队列,并为新队列创建消费者组。
|
4
|
+
使用队列注册表替代scan命令以提高性能。
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import logging
|
9
|
+
import traceback
|
10
|
+
from typing import Set
|
11
|
+
|
12
|
+
import redis.asyncio as redis
|
13
|
+
from redis.asyncio import Redis
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
class QueueDiscovery:
|
19
|
+
"""队列发现器
|
20
|
+
|
21
|
+
职责:
|
22
|
+
- 初始队列发现(启动时执行一次)
|
23
|
+
- 定期发现新队列
|
24
|
+
- 为新队列创建消费者组
|
25
|
+
- 更新ConsumerManager的队列列表
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
redis_client: Redis,
|
31
|
+
redis_prefix: str,
|
32
|
+
consumer_group: str,
|
33
|
+
consumer_manager=None
|
34
|
+
):
|
35
|
+
"""初始化队列发现器
|
36
|
+
|
37
|
+
Args:
|
38
|
+
redis_client: Redis异步客户端
|
39
|
+
redis_prefix: Redis键前缀
|
40
|
+
consumer_group: 消费者组名称
|
41
|
+
consumer_manager: ConsumerManager实例(可选)
|
42
|
+
"""
|
43
|
+
self.redis_client = redis_client
|
44
|
+
self.redis_prefix = redis_prefix
|
45
|
+
self.consumer_group = consumer_group
|
46
|
+
self.consumer_manager = consumer_manager
|
47
|
+
|
48
|
+
# 队列注册表的Redis key
|
49
|
+
self.queue_registry_key = f"{redis_prefix}:QUEUE_REGISTRY"
|
50
|
+
|
51
|
+
# 已知队列集合
|
52
|
+
self._known_queues = set()
|
53
|
+
|
54
|
+
self._running = False
|
55
|
+
self._discovery_task = None
|
56
|
+
|
57
|
+
async def initial_queue_discovery(self) -> Set[str]:
|
58
|
+
"""初始队列发现,在启动时执行一次 - 使用队列注册表替代scan
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
发现的队列集合
|
62
|
+
"""
|
63
|
+
try:
|
64
|
+
new_queues = set()
|
65
|
+
logger.info(f"Starting initial queue discovery from queue registry: {self.queue_registry_key}")
|
66
|
+
|
67
|
+
# 从队列注册表获取所有队列
|
68
|
+
queue_members = await self.redis_client.smembers(self.queue_registry_key.encode())
|
69
|
+
for queue_name_bytes in queue_members:
|
70
|
+
queue_name = queue_name_bytes.decode('utf-8') if isinstance(queue_name_bytes, bytes) else str(queue_name_bytes)
|
71
|
+
new_queues.add(queue_name)
|
72
|
+
logger.info(f"Found registered queue: {queue_name}")
|
73
|
+
|
74
|
+
# 如果注册表为空,使用 RegistryManager 初始化
|
75
|
+
if not new_queues:
|
76
|
+
logger.warning(f"Queue registry is empty, initializing from RegistryManager...")
|
77
|
+
from jettask.messaging.registry import QueueRegistry
|
78
|
+
registry = QueueRegistry(
|
79
|
+
redis_client=None, # 同步客户端,这里不需要
|
80
|
+
async_redis_client=self.redis_client,
|
81
|
+
redis_prefix=self.redis_prefix
|
82
|
+
)
|
83
|
+
|
84
|
+
# 初始化注册表(仅在首次运行时需要)
|
85
|
+
await registry.initialize_from_existing_data()
|
86
|
+
|
87
|
+
# 从注册表获取队列
|
88
|
+
new_queues = await registry.get_all_queues()
|
89
|
+
logger.info(f"Got {len(new_queues)} queues from registry manager")
|
90
|
+
|
91
|
+
if new_queues:
|
92
|
+
logger.info(f"Initial queue discovery found {len(new_queues)} queues: {new_queues}")
|
93
|
+
# 合并所有队列:TASK_CHANGES + 动态发现的队列
|
94
|
+
# 转换 bytes 为字符串
|
95
|
+
string_queues = []
|
96
|
+
for q in new_queues:
|
97
|
+
if isinstance(q, bytes):
|
98
|
+
string_queues.append(q.decode('utf-8'))
|
99
|
+
else:
|
100
|
+
string_queues.append(str(q))
|
101
|
+
all_queues = string_queues + ['TASK_CHANGES']
|
102
|
+
|
103
|
+
# 更新ConsumerManager的配置
|
104
|
+
if self.consumer_manager:
|
105
|
+
self.consumer_manager.config['queues'] = all_queues
|
106
|
+
|
107
|
+
# 更新worker的队列信息
|
108
|
+
await self._update_worker_queues(all_queues)
|
109
|
+
|
110
|
+
self._known_queues = new_queues
|
111
|
+
|
112
|
+
return new_queues
|
113
|
+
|
114
|
+
except Exception as e:
|
115
|
+
logger.error(f"Error in initial queue discovery: {e}")
|
116
|
+
logger.error(traceback.format_exc())
|
117
|
+
return set()
|
118
|
+
|
119
|
+
async def start_discovery(self):
|
120
|
+
"""启动定期队列发现"""
|
121
|
+
self._running = True
|
122
|
+
self._discovery_task = asyncio.create_task(self._discover_queues_loop())
|
123
|
+
logger.debug("QueueDiscovery started")
|
124
|
+
|
125
|
+
async def stop_discovery(self):
|
126
|
+
"""停止队列发现"""
|
127
|
+
self._running = False
|
128
|
+
if self._discovery_task:
|
129
|
+
self._discovery_task.cancel()
|
130
|
+
try:
|
131
|
+
await self._discovery_task
|
132
|
+
except asyncio.CancelledError:
|
133
|
+
pass
|
134
|
+
logger.debug("QueueDiscovery stopped")
|
135
|
+
|
136
|
+
async def _discover_queues_loop(self):
|
137
|
+
"""定期发现新队列 - 使用队列注册表替代scan"""
|
138
|
+
while self._running:
|
139
|
+
try:
|
140
|
+
new_queues = set()
|
141
|
+
|
142
|
+
# 从队列注册表获取所有队列
|
143
|
+
queue_members = await self.redis_client.smembers(self.queue_registry_key)
|
144
|
+
for queue_name_bytes in queue_members:
|
145
|
+
queue_name = queue_name_bytes.decode('utf-8') if isinstance(queue_name_bytes, bytes) else str(queue_name_bytes)
|
146
|
+
new_queues.add(queue_name)
|
147
|
+
|
148
|
+
# 优化:添加日志,只在队列数量或内容发生变化时记录
|
149
|
+
if len(new_queues) != len(self._known_queues) or new_queues != self._known_queues:
|
150
|
+
logger.debug(f"Queue registry contains {len(new_queues)} queues: {sorted(new_queues)}")
|
151
|
+
|
152
|
+
# 为新发现的队列创建消费者组(注意:新队列应该通过生产者自动注册)
|
153
|
+
new_discovered = new_queues - self._known_queues
|
154
|
+
if new_discovered:
|
155
|
+
for queue in new_discovered:
|
156
|
+
# 正确构建stream_key,保留优先级部分
|
157
|
+
stream_key = f"{self.redis_prefix}:QUEUE:{queue}"
|
158
|
+
try:
|
159
|
+
await self.redis_client.xgroup_create(
|
160
|
+
stream_key, self.consumer_group, id='0', mkstream=True
|
161
|
+
)
|
162
|
+
logger.info(f"Created consumer group for new queue: {queue} with stream_key: {stream_key}")
|
163
|
+
except redis.ResponseError:
|
164
|
+
pass
|
165
|
+
|
166
|
+
# 更新ConsumerManager的队列列表(同步操作)
|
167
|
+
if new_queues != self._known_queues:
|
168
|
+
logger.info(f"Queue discovery: found {len(new_queues)} queues: {new_queues}")
|
169
|
+
# 合并所有队列:TASK_CHANGES + 动态发现的队列
|
170
|
+
all_queues = list(new_queues) + ['TASK_CHANGES']
|
171
|
+
|
172
|
+
# 更新ConsumerManager的配置
|
173
|
+
if self.consumer_manager:
|
174
|
+
self.consumer_manager.config['queues'] = all_queues
|
175
|
+
|
176
|
+
# 更新worker的队列信息
|
177
|
+
await self._update_worker_queues(all_queues)
|
178
|
+
|
179
|
+
self._known_queues = new_queues
|
180
|
+
await asyncio.sleep(10) # 保持较短的检查间隔,确保新队列能及时发现
|
181
|
+
|
182
|
+
except Exception as e:
|
183
|
+
logger.error(f"Error discovering queues: {e}")
|
184
|
+
logger.error(traceback.format_exc())
|
185
|
+
await asyncio.sleep(10)
|
186
|
+
|
187
|
+
async def _update_worker_queues(self, all_queues: list):
|
188
|
+
"""更新worker的队列信息到Redis"""
|
189
|
+
try:
|
190
|
+
# ConsumerStrategy 已移除,现在只使用 HEARTBEAT 策略
|
191
|
+
|
192
|
+
# 获取实际的consumer_id(从心跳策略中)
|
193
|
+
if hasattr(self.consumer_manager, '_heartbeat_strategy'):
|
194
|
+
actual_consumer_id = self.consumer_manager._heartbeat_strategy.consumer_id
|
195
|
+
else:
|
196
|
+
# 从config中获取或使用默认值
|
197
|
+
actual_consumer_id = self.consumer_manager.config.get('consumer_id', 'unknown')
|
198
|
+
|
199
|
+
worker_key = f"{self.redis_prefix}:{self.consumer_manager.config.get('worker_prefix', 'PG_CONSUMER')}:{actual_consumer_id}"
|
200
|
+
|
201
|
+
# 使用同步Redis客户端更新
|
202
|
+
self.consumer_manager.redis_client.hset(
|
203
|
+
worker_key,
|
204
|
+
'queues',
|
205
|
+
','.join(all_queues)
|
206
|
+
)
|
207
|
+
logger.debug(f"Updated worker queues: {all_queues}")
|
208
|
+
|
209
|
+
except Exception as e:
|
210
|
+
logger.error(f"Error updating worker queues: {e}")
|
211
|
+
logger.error(traceback.format_exc())
|
212
|
+
|
213
|
+
def get_known_queues(self) -> Set[str]:
|
214
|
+
"""获取已知队列集合"""
|
215
|
+
return self._known_queues.copy()
|
@@ -0,0 +1,218 @@
|
|
1
|
+
"""任务持久化模块
|
2
|
+
|
3
|
+
负责解析Redis Stream消息,并将任务数据批量插入PostgreSQL数据库。
|
4
|
+
"""
|
5
|
+
|
6
|
+
import json
|
7
|
+
import logging
|
8
|
+
import traceback
|
9
|
+
from typing import Dict, List, Optional, Any
|
10
|
+
from datetime import datetime, timezone
|
11
|
+
|
12
|
+
from sqlalchemy import text
|
13
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
14
|
+
from sqlalchemy.orm import sessionmaker
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
class TaskPersistence:
|
20
|
+
"""任务持久化处理器
|
21
|
+
|
22
|
+
职责:
|
23
|
+
- 解析Stream消息为任务信息
|
24
|
+
- 批量插入任务到PostgreSQL的tasks表
|
25
|
+
- 处理插入失败的降级策略
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
async_session_local: sessionmaker,
|
31
|
+
namespace_id: str,
|
32
|
+
namespace_name: str
|
33
|
+
):
|
34
|
+
"""初始化任务持久化处理器
|
35
|
+
|
36
|
+
Args:
|
37
|
+
async_session_local: SQLAlchemy会话工厂
|
38
|
+
namespace_id: 命名空间ID
|
39
|
+
namespace_name: 命名空间名称
|
40
|
+
"""
|
41
|
+
self.AsyncSessionLocal = async_session_local
|
42
|
+
self.namespace_id = namespace_id
|
43
|
+
self.namespace_name = namespace_name
|
44
|
+
|
45
|
+
def parse_stream_message(self, task_id: str, data: dict) -> Optional[dict]:
|
46
|
+
"""解析Stream消息为任务信息(返回完整的字段)
|
47
|
+
|
48
|
+
Args:
|
49
|
+
task_id: 任务ID(Redis Stream ID)
|
50
|
+
data: 消息数据
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
解析后的任务信息字典,失败返回None
|
54
|
+
"""
|
55
|
+
try:
|
56
|
+
from jettask.utils.serializer import loads_str
|
57
|
+
|
58
|
+
if b'data' in data:
|
59
|
+
task_data = loads_str(data[b'data'])
|
60
|
+
else:
|
61
|
+
task_data = {}
|
62
|
+
for k, v in data.items():
|
63
|
+
key = k.decode('utf-8') if isinstance(k, bytes) else k
|
64
|
+
if isinstance(v, bytes):
|
65
|
+
try:
|
66
|
+
value = loads_str(v)
|
67
|
+
except:
|
68
|
+
value = str(v)
|
69
|
+
else:
|
70
|
+
value = v
|
71
|
+
task_data[key] = value
|
72
|
+
|
73
|
+
# 如果配置了命名空间,检查消息是否属于该命名空间
|
74
|
+
# if self.namespace_id:
|
75
|
+
# msg_namespace_id = task_data.get('__namespace_id')
|
76
|
+
# # 如果消息没有namespace_id且当前不是默认命名空间,跳过
|
77
|
+
# if msg_namespace_id != self.namespace_id:
|
78
|
+
# if not (msg_namespace_id is None and self.namespace_id == 'default'):
|
79
|
+
# logger.debug(f"Skipping message from different namespace: {msg_namespace_id} != {self.namespace_id}")
|
80
|
+
# return None
|
81
|
+
|
82
|
+
queue_name = task_data['queue']
|
83
|
+
task_name = task_data.get('name', task_data.get('task', 'unknown'))
|
84
|
+
|
85
|
+
created_at = None
|
86
|
+
if 'trigger_time' in task_data:
|
87
|
+
try:
|
88
|
+
timestamp = float(task_data['trigger_time'])
|
89
|
+
created_at = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
90
|
+
except:
|
91
|
+
pass
|
92
|
+
|
93
|
+
# 返回完整的字段,包括所有可能为None的字段
|
94
|
+
return {
|
95
|
+
'id': task_id,
|
96
|
+
'queue_name': queue_name,
|
97
|
+
'task_name': task_name,
|
98
|
+
'task_data': json.dumps(task_data),
|
99
|
+
'priority': int(task_data.get('priority', 0)),
|
100
|
+
'retry_count': int(task_data.get('retry', 0)),
|
101
|
+
'max_retry': int(task_data.get('max_retry', 3)),
|
102
|
+
'status': 'pending',
|
103
|
+
'result': None, # 新任务没有结果
|
104
|
+
'error_message': None, # 新任务没有错误信息
|
105
|
+
'created_at': created_at,
|
106
|
+
'started_at': None, # 新任务还未开始
|
107
|
+
'completed_at': None, # 新任务还未完成
|
108
|
+
'scheduled_task_id': task_data.get('scheduled_task_id'), # 调度任务ID
|
109
|
+
'metadata': json.dumps(task_data.get('metadata', {})),
|
110
|
+
'worker_id': None, # 新任务还未分配worker
|
111
|
+
'execution_time': None, # 新任务还没有执行时间
|
112
|
+
'duration': None, # 新任务还没有持续时间
|
113
|
+
'namespace_id': self.namespace_id # 添加命名空间ID
|
114
|
+
}
|
115
|
+
|
116
|
+
except Exception as e:
|
117
|
+
logger.error(f"Error parsing stream message for task {task_id}: {e}")
|
118
|
+
logger.error(traceback.format_exc())
|
119
|
+
return None
|
120
|
+
|
121
|
+
async def insert_tasks(self, tasks: List[Dict[str, Any]]) -> int:
|
122
|
+
"""批量插入任务到PostgreSQL(只处理tasks表)
|
123
|
+
|
124
|
+
Args:
|
125
|
+
tasks: 任务信息列表
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
实际插入的记录数
|
129
|
+
"""
|
130
|
+
if not tasks:
|
131
|
+
return 0
|
132
|
+
|
133
|
+
logger.info(f"Attempting to insert {len(tasks)} tasks to tasks table")
|
134
|
+
|
135
|
+
try:
|
136
|
+
async with self.AsyncSessionLocal() as session:
|
137
|
+
# 插入tasks表 - 使用批量INSERT忽略冲突
|
138
|
+
# 由于stream_id在实践中是唯一的,我们可以简单地忽略重复
|
139
|
+
tasks_query = text("""
|
140
|
+
INSERT INTO tasks (stream_id, queue, namespace, scheduled_task_id,
|
141
|
+
payload, priority, created_at, source, metadata)
|
142
|
+
VALUES (:stream_id, :queue, :namespace, :scheduled_task_id,
|
143
|
+
CAST(:payload AS jsonb), :priority, :created_at, :source, CAST(:metadata AS jsonb))
|
144
|
+
ON CONFLICT DO NOTHING
|
145
|
+
RETURNING stream_id;
|
146
|
+
""")
|
147
|
+
|
148
|
+
# 准备tasks表的数据
|
149
|
+
tasks_data = []
|
150
|
+
for task in tasks:
|
151
|
+
task_data = json.loads(task['task_data'])
|
152
|
+
|
153
|
+
# 从task_data中获取scheduled_task_id
|
154
|
+
scheduled_task_id = task_data.get('scheduled_task_id') or task.get('scheduled_task_id')
|
155
|
+
|
156
|
+
# 根据是否有scheduled_task_id来判断任务来源
|
157
|
+
if scheduled_task_id:
|
158
|
+
source = 'scheduler' # 定时任务
|
159
|
+
else:
|
160
|
+
source = 'redis_stream' # 普通任务
|
161
|
+
|
162
|
+
tasks_data.append({
|
163
|
+
'stream_id': task['id'], # Redis Stream ID作为stream_id
|
164
|
+
'queue': task['queue_name'],
|
165
|
+
'namespace': self.namespace_name,
|
166
|
+
'scheduled_task_id': str(scheduled_task_id) if scheduled_task_id else None,
|
167
|
+
'payload': task['task_data'], # 完整的任务数据
|
168
|
+
'priority': task['priority'],
|
169
|
+
'created_at': task['created_at'],
|
170
|
+
'source': source,
|
171
|
+
'metadata': task.get('metadata', '{}')
|
172
|
+
})
|
173
|
+
|
174
|
+
# 批量插入 - 使用executemany提高性能
|
175
|
+
logger.debug(f"Executing batch insert with {len(tasks_data)} tasks")
|
176
|
+
|
177
|
+
try:
|
178
|
+
# 使用executemany批量插入
|
179
|
+
result = await session.execute(tasks_query, tasks_data)
|
180
|
+
|
181
|
+
# 获取实际插入的记录数
|
182
|
+
inserted_count = result.rowcount
|
183
|
+
|
184
|
+
await session.commit()
|
185
|
+
logger.debug("Tasks table batch insert transaction completed")
|
186
|
+
return inserted_count
|
187
|
+
|
188
|
+
except Exception as e:
|
189
|
+
logger.error(f"Error in batch insert, trying fallback: {e}")
|
190
|
+
await session.rollback()
|
191
|
+
|
192
|
+
# 如果批量插入失败,降级为小批量插入(每批10条)
|
193
|
+
batch_size = 10
|
194
|
+
total_inserted = 0
|
195
|
+
|
196
|
+
for i in range(0, len(tasks_data), batch_size):
|
197
|
+
batch = tasks_data[i:i+batch_size]
|
198
|
+
try:
|
199
|
+
result = await session.execute(tasks_query, batch)
|
200
|
+
batch_inserted = result.rowcount
|
201
|
+
if batch_inserted > 0:
|
202
|
+
total_inserted += batch_inserted
|
203
|
+
await session.commit()
|
204
|
+
except Exception as batch_error:
|
205
|
+
logger.error(f"Batch {i//batch_size + 1} failed: {batch_error}")
|
206
|
+
await session.rollback()
|
207
|
+
|
208
|
+
if total_inserted > 0:
|
209
|
+
logger.info(f"Fallback insert completed: {total_inserted} tasks inserted")
|
210
|
+
else:
|
211
|
+
logger.info(f"No new tasks inserted in fallback mode")
|
212
|
+
|
213
|
+
return total_inserted
|
214
|
+
|
215
|
+
except Exception as e:
|
216
|
+
logger.error(f"Error inserting tasks to PostgreSQL: {e}")
|
217
|
+
logger.error(traceback.format_exc())
|
218
|
+
return 0
|