jettask 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/constants.py +213 -0
- jettask/core/app.py +525 -205
- jettask/core/cli.py +193 -185
- jettask/core/consumer_manager.py +126 -34
- jettask/core/context.py +3 -0
- jettask/core/enums.py +137 -0
- jettask/core/event_pool.py +501 -168
- jettask/core/message.py +147 -0
- jettask/core/offline_worker_recovery.py +181 -114
- jettask/core/task.py +10 -174
- jettask/core/task_batch.py +153 -0
- jettask/core/unified_manager_base.py +243 -0
- jettask/core/worker_scanner.py +54 -54
- jettask/executors/asyncio.py +184 -64
- jettask/webui/backend/config.py +51 -0
- jettask/webui/backend/data_access.py +2083 -92
- jettask/webui/backend/data_api.py +3294 -0
- jettask/webui/backend/dependencies.py +261 -0
- jettask/webui/backend/init_meta_db.py +158 -0
- jettask/webui/backend/main.py +1358 -69
- jettask/webui/backend/main_unified.py +78 -0
- jettask/webui/backend/main_v2.py +394 -0
- jettask/webui/backend/namespace_api.py +295 -0
- jettask/webui/backend/namespace_api_old.py +294 -0
- jettask/webui/backend/namespace_data_access.py +611 -0
- jettask/webui/backend/queue_backlog_api.py +727 -0
- jettask/webui/backend/queue_stats_v2.py +521 -0
- jettask/webui/backend/redis_monitor_api.py +476 -0
- jettask/webui/backend/unified_api_router.py +1601 -0
- jettask/webui/db_init.py +204 -32
- jettask/webui/frontend/package-lock.json +492 -1
- jettask/webui/frontend/package.json +4 -1
- jettask/webui/frontend/src/App.css +105 -7
- jettask/webui/frontend/src/App.jsx +49 -20
- jettask/webui/frontend/src/components/NamespaceSelector.jsx +166 -0
- jettask/webui/frontend/src/components/QueueBacklogChart.jsx +298 -0
- jettask/webui/frontend/src/components/QueueBacklogTrend.jsx +638 -0
- jettask/webui/frontend/src/components/QueueDetailsTable.css +65 -0
- jettask/webui/frontend/src/components/QueueDetailsTable.jsx +487 -0
- jettask/webui/frontend/src/components/QueueDetailsTableV2.jsx +465 -0
- jettask/webui/frontend/src/components/ScheduledTaskFilter.jsx +423 -0
- jettask/webui/frontend/src/components/TaskFilter.jsx +425 -0
- jettask/webui/frontend/src/components/TimeRangeSelector.css +21 -0
- jettask/webui/frontend/src/components/TimeRangeSelector.jsx +160 -0
- jettask/webui/frontend/src/components/layout/AppLayout.css +95 -0
- jettask/webui/frontend/src/components/layout/AppLayout.jsx +49 -0
- jettask/webui/frontend/src/components/layout/Header.css +34 -10
- jettask/webui/frontend/src/components/layout/Header.jsx +31 -23
- jettask/webui/frontend/src/components/layout/SideMenu.css +137 -0
- jettask/webui/frontend/src/components/layout/SideMenu.jsx +209 -0
- jettask/webui/frontend/src/components/layout/TabsNav.css +244 -0
- jettask/webui/frontend/src/components/layout/TabsNav.jsx +206 -0
- jettask/webui/frontend/src/components/layout/UserInfo.css +197 -0
- jettask/webui/frontend/src/components/layout/UserInfo.jsx +197 -0
- jettask/webui/frontend/src/contexts/NamespaceContext.jsx +72 -0
- jettask/webui/frontend/src/contexts/TabsContext.backup.jsx +245 -0
- jettask/webui/frontend/src/main.jsx +1 -0
- jettask/webui/frontend/src/pages/Alerts.jsx +684 -0
- jettask/webui/frontend/src/pages/Dashboard.jsx +1330 -0
- jettask/webui/frontend/src/pages/QueueDetail.jsx +1109 -10
- jettask/webui/frontend/src/pages/QueueMonitor.jsx +236 -115
- jettask/webui/frontend/src/pages/Queues.jsx +5 -1
- jettask/webui/frontend/src/pages/ScheduledTasks.jsx +809 -0
- jettask/webui/frontend/src/pages/Settings.jsx +800 -0
- jettask/webui/frontend/src/services/api.js +7 -5
- jettask/webui/frontend/src/utils/suppressWarnings.js +22 -0
- jettask/webui/frontend/src/utils/userPreferences.js +154 -0
- jettask/webui/multi_namespace_consumer.py +543 -0
- jettask/webui/pg_consumer.py +983 -246
- jettask/webui/static/dist/assets/index-7129cfe1.css +1 -0
- jettask/webui/static/dist/assets/index-8d1935cc.js +774 -0
- jettask/webui/static/dist/index.html +2 -2
- jettask/webui/task_center.py +216 -0
- jettask/webui/task_center_client.py +150 -0
- jettask/webui/unified_consumer_manager.py +193 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/METADATA +1 -1
- jettask-0.2.4.dist-info/RECORD +134 -0
- jettask/webui/pg_consumer_slow.py +0 -1099
- jettask/webui/pg_consumer_test.py +0 -678
- jettask/webui/static/dist/assets/index-823408e8.css +0 -1
- jettask/webui/static/dist/assets/index-9968b0b8.js +0 -543
- jettask/webui/test_pg_consumer_recovery.py +0 -547
- jettask/webui/test_recovery_simple.py +0 -492
- jettask/webui/test_self_recovery.py +0 -467
- jettask-0.2.1.dist-info/RECORD +0 -91
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/WHEEL +0 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.1.dist-info → jettask-0.2.4.dist-info}/top_level.txt +0 -0
jettask/core/consumer_manager.py
CHANGED
@@ -155,7 +155,7 @@ class ConsumerManager:
|
|
155
155
|
# 由于已经在_validate_strategy_configuration中验证过,
|
156
156
|
# 这里应该只会在MainProcess中执行
|
157
157
|
self._consumer_name = pod_name
|
158
|
-
logger.
|
158
|
+
logger.debug(f"使用Pod策略的consumer名称: {self._consumer_name}")
|
159
159
|
|
160
160
|
return f"{self._consumer_name}-{queue}"
|
161
161
|
|
@@ -195,6 +195,16 @@ class ConsumerManager:
|
|
195
195
|
return self._heartbeat_strategy.is_heartbeat_timeout()
|
196
196
|
return False
|
197
197
|
|
198
|
+
def record_group_info(self, queue: str, task_name: str, group_name: str, consumer_name: str):
|
199
|
+
"""记录task的group信息到worker hash表(仅对HEARTBEAT策略有效)"""
|
200
|
+
if self.strategy == ConsumerStrategy.HEARTBEAT and self._heartbeat_strategy:
|
201
|
+
self._heartbeat_strategy.record_group_info(queue, task_name, group_name, consumer_name)
|
202
|
+
|
203
|
+
async def record_group_info_async(self, queue: str, task_name: str, group_name: str, consumer_name: str):
|
204
|
+
"""异步记录task的group信息到worker hash表(仅对HEARTBEAT策略有效)"""
|
205
|
+
if self.strategy == ConsumerStrategy.HEARTBEAT and self._heartbeat_strategy:
|
206
|
+
await self._heartbeat_strategy.record_group_info_async(queue, task_name, group_name, consumer_name)
|
207
|
+
|
198
208
|
def cleanup_expired_consumers(self, queue: str):
|
199
209
|
"""清理过期的消费者(可选功能)"""
|
200
210
|
try:
|
@@ -380,10 +390,10 @@ class HeartbeatConsumerStrategy:
|
|
380
390
|
break
|
381
391
|
|
382
392
|
if not worker_keys:
|
383
|
-
logger.
|
393
|
+
logger.debug("No worker keys found during scan")
|
384
394
|
return None
|
385
395
|
else:
|
386
|
-
logger.
|
396
|
+
logger.debug(f"Found {len(worker_keys)} worker keys to check")
|
387
397
|
|
388
398
|
# 查找符合条件的离线worker
|
389
399
|
offline_workers = []
|
@@ -392,7 +402,7 @@ class HeartbeatConsumerStrategy:
|
|
392
402
|
try:
|
393
403
|
# 获取worker数据
|
394
404
|
worker_data = self.redis.hgetall(worker_key)
|
395
|
-
# logger.
|
405
|
+
# logger.debug(f'{worker_key=} {worker_data=}')
|
396
406
|
if not worker_data:
|
397
407
|
continue
|
398
408
|
# 获取worker的状态信息
|
@@ -420,15 +430,15 @@ class HeartbeatConsumerStrategy:
|
|
420
430
|
# 1. is_alive标记为false,或者
|
421
431
|
# 2. 最后心跳时间超过了heartbeat_timeout
|
422
432
|
is_truly_offline = (not is_alive) or (current_time - last_heartbeat > self.heartbeat_timeout)
|
423
|
-
# logger.
|
433
|
+
# logger.debug(f'{is_truly_offline=} {worker_data=}')
|
424
434
|
if not is_truly_offline:
|
425
|
-
logger.
|
435
|
+
logger.debug(f"Worker {is_alive=} {current_time - last_heartbeat} {self.heartbeat_timeout} {worker_data.get('consumer_id')} is still active (last_heartbeat: {current_time - last_heartbeat:.1f}s ago)")
|
426
436
|
continue
|
427
437
|
|
428
438
|
# 需要离线超过heartbeat_timeout才能被复用(与离线检测保持一致)
|
429
439
|
# min_offline_duration = self.heartbeat_timeout
|
430
440
|
# if offline_time > 0 and (current_time - offline_time) < min_offline_duration:
|
431
|
-
# logger.
|
441
|
+
# logger.debug(f"Worker {worker_data.get('consumer_id')} offline for only {current_time - offline_time:.1f}s, need {min_offline_duration}s")
|
432
442
|
# continue
|
433
443
|
|
434
444
|
# 获取consumer_id
|
@@ -484,7 +494,7 @@ class HeartbeatConsumerStrategy:
|
|
484
494
|
|
485
495
|
pipeline.execute()
|
486
496
|
|
487
|
-
logger.
|
497
|
+
logger.debug(f"Found reusable worker: {selected_consumer_id}, offline since {time.time() - selected_offline_time:.1f}s ago")
|
488
498
|
return selected_consumer_id
|
489
499
|
|
490
500
|
except Exception as e:
|
@@ -734,9 +744,9 @@ class HeartbeatConsumerStrategy:
|
|
734
744
|
if not self.consumer_id:
|
735
745
|
# 如果没有可复用的,生成新的consumer ID
|
736
746
|
self.consumer_id = f"{self.hostname_prefix}-{uuid.uuid4().hex[:8]}-{os.getpid()}"
|
737
|
-
logger.
|
747
|
+
logger.debug(f"Created new consumer ID: {self.consumer_id}")
|
738
748
|
else:
|
739
|
-
logger.
|
749
|
+
logger.debug(f"Reusing offline worker ID: {self.consumer_id}")
|
740
750
|
|
741
751
|
# 更新worker_key
|
742
752
|
self._worker_key = f'{self.redis_prefix}:{self.worker_prefix}:{self.consumer_id}'
|
@@ -771,9 +781,91 @@ class HeartbeatConsumerStrategy:
|
|
771
781
|
if queue not in self._heartbeat_processes:
|
772
782
|
self._start_heartbeat_process_for_queue(queue)
|
773
783
|
|
774
|
-
logger.
|
784
|
+
logger.debug(f"Created consumer name for queue {queue}: {self.consumer_names[queue]}")
|
775
785
|
return self.consumer_names[queue]
|
776
786
|
|
787
|
+
def record_group_info(self, queue: str, task_name: str, group_name: str, consumer_name: str):
|
788
|
+
"""记录task的group信息到worker hash表
|
789
|
+
|
790
|
+
Args:
|
791
|
+
queue: 队列名
|
792
|
+
task_name: 任务名
|
793
|
+
group_name: consumer group名称
|
794
|
+
consumer_name: consumer名称
|
795
|
+
"""
|
796
|
+
try:
|
797
|
+
# 确保worker_key已初始化
|
798
|
+
if not self._worker_key:
|
799
|
+
self._ensure_consumer_id()
|
800
|
+
if not self._worker_key:
|
801
|
+
logger.warning("Cannot record group info: worker_key not initialized")
|
802
|
+
return
|
803
|
+
|
804
|
+
# 构建group信息
|
805
|
+
import json
|
806
|
+
group_info = {
|
807
|
+
'queue': queue,
|
808
|
+
'task_name': task_name,
|
809
|
+
'group_name': group_name,
|
810
|
+
'consumer_name': consumer_name,
|
811
|
+
'stream_key': f"{self.redis_prefix}:QUEUE:{queue}"
|
812
|
+
}
|
813
|
+
|
814
|
+
# 将group信息存储到worker的hash中
|
815
|
+
# 使用 group_info:{group_name} 作为field
|
816
|
+
field_name = f"group_info:{group_name}"
|
817
|
+
self.redis.hset(
|
818
|
+
self._worker_key,
|
819
|
+
field_name,
|
820
|
+
json.dumps(group_info)
|
821
|
+
)
|
822
|
+
|
823
|
+
logger.debug(f"Recorded group info for task {task_name}: {group_info}")
|
824
|
+
|
825
|
+
except Exception as e:
|
826
|
+
logger.error(f"Error recording task group info: {e}")
|
827
|
+
|
828
|
+
async def record_group_info_async(self, queue: str, task_name: str, group_name: str, consumer_name: str):
|
829
|
+
"""异步记录task的group信息到worker hash表
|
830
|
+
|
831
|
+
Args:
|
832
|
+
queue: 队列名
|
833
|
+
task_name: 任务名
|
834
|
+
group_name: consumer group名称
|
835
|
+
consumer_name: consumer名称
|
836
|
+
"""
|
837
|
+
try:
|
838
|
+
# 确保worker_key已初始化
|
839
|
+
if not self._worker_key:
|
840
|
+
self._ensure_consumer_id()
|
841
|
+
if not self._worker_key:
|
842
|
+
logger.warning("Cannot record group info: worker_key not initialized")
|
843
|
+
return
|
844
|
+
|
845
|
+
# 构建group信息
|
846
|
+
import json
|
847
|
+
group_info = {
|
848
|
+
'queue': queue,
|
849
|
+
'task_name': task_name,
|
850
|
+
'group_name': group_name,
|
851
|
+
'consumer_name': consumer_name,
|
852
|
+
'stream_key': f"{self.redis_prefix}:QUEUE:{queue}"
|
853
|
+
}
|
854
|
+
|
855
|
+
# 将group信息存储到worker的hash中
|
856
|
+
# 使用 group_info:{group_name} 作为field
|
857
|
+
field_name = f"group_info:{group_name}"
|
858
|
+
await self.async_redis.hset(
|
859
|
+
self._worker_key,
|
860
|
+
field_name,
|
861
|
+
json.dumps(group_info)
|
862
|
+
)
|
863
|
+
|
864
|
+
logger.debug(f"Recorded group info for task {task_name}: {group_info}")
|
865
|
+
|
866
|
+
except Exception as e:
|
867
|
+
logger.error(f"Error recording task group info: {e}")
|
868
|
+
|
777
869
|
def _ensure_worker_initialized(self):
|
778
870
|
"""确保worker已初始化"""
|
779
871
|
if self.consumer_id is None:
|
@@ -788,7 +880,7 @@ class HeartbeatConsumerStrategy:
|
|
788
880
|
# 心跳进程已经在运行,只需要记录这个队列
|
789
881
|
self._heartbeat_processes[queue] = True
|
790
882
|
return
|
791
|
-
logger.
|
883
|
+
logger.debug('启动心跳进程')
|
792
884
|
# 第一次调用时创建心跳进程管理器
|
793
885
|
if self._heartbeat_process_manager is None:
|
794
886
|
# 获取Redis URL
|
@@ -845,11 +937,11 @@ class HeartbeatConsumerStrategy:
|
|
845
937
|
self.redis.hset(self._worker_key, mapping=worker_info)
|
846
938
|
# 同时添加到 sorted set
|
847
939
|
self.redis.zadd(f"{self.redis_prefix}:ACTIVE_WORKERS", {self.consumer_id: current_time})
|
848
|
-
logger.
|
940
|
+
logger.debug(f"Initialized worker {self.consumer_id} with key {self._worker_key}")
|
849
941
|
|
850
942
|
self._heartbeat_process_manager.add_queue(queue, self._worker_key)
|
851
943
|
self._heartbeat_processes[queue] = True
|
852
|
-
# logger.
|
944
|
+
# logger.debug(f"Started heartbeat process for queue {queue}")
|
853
945
|
|
854
946
|
def _start_scanner(self):
|
855
947
|
"""启动扫描器协程"""
|
@@ -858,7 +950,7 @@ class HeartbeatConsumerStrategy:
|
|
858
950
|
self._scanner_task = loop.create_task(self._scanner_loop())
|
859
951
|
# 立即执行一次扫描,清理可能存在的死亡worker
|
860
952
|
loop.create_task(self._immediate_scan())
|
861
|
-
# logger.
|
953
|
+
# logger.debug("Started heartbeat scanner coroutine")
|
862
954
|
except RuntimeError:
|
863
955
|
# 没有运行中的事件循环,标记为需要启动
|
864
956
|
logger.debug("No running event loop, scanner will be started when async context is available")
|
@@ -867,9 +959,9 @@ class HeartbeatConsumerStrategy:
|
|
867
959
|
async def _immediate_scan(self):
|
868
960
|
"""启动时立即执行一次扫描(协程版本)"""
|
869
961
|
try:
|
870
|
-
# logger.
|
962
|
+
# logger.debug("Performing immediate scan for dead workers...")
|
871
963
|
await self._perform_scan()
|
872
|
-
# logger.
|
964
|
+
# logger.debug("Immediate scan completed")
|
873
965
|
except Exception as e:
|
874
966
|
logger.error(f"Error in immediate scan: {e}")
|
875
967
|
|
@@ -955,7 +1047,7 @@ class HeartbeatConsumerStrategy:
|
|
955
1047
|
|
956
1048
|
if is_alive and last_heartbeat < worker_timeout_threshold:
|
957
1049
|
# 心跳超时的活跃worker
|
958
|
-
logger.
|
1050
|
+
logger.debug(f"Worker {consumer_id} timeout detected: "
|
959
1051
|
f"last_heartbeat={last_heartbeat}, "
|
960
1052
|
f"timeout={worker_heartbeat_timeout}s, "
|
961
1053
|
f"threshold={worker_timeout_threshold}")
|
@@ -969,7 +1061,7 @@ class HeartbeatConsumerStrategy:
|
|
969
1061
|
continue
|
970
1062
|
|
971
1063
|
if timeout_workers:
|
972
|
-
logger.
|
1064
|
+
logger.debug(f"Found {len(timeout_workers)} timeout workers")
|
973
1065
|
|
974
1066
|
for worker_key, worker_data in timeout_workers:
|
975
1067
|
consumer_id = worker_data.get('consumer_id')
|
@@ -996,10 +1088,10 @@ class HeartbeatConsumerStrategy:
|
|
996
1088
|
# 再次检查worker是否真的超时(避免竞态条件)
|
997
1089
|
current_heartbeat = await self.async_redis.hget(worker_key, 'last_heartbeat')
|
998
1090
|
if current_heartbeat and float(current_heartbeat) >= timeout_threshold:
|
999
|
-
logger.
|
1091
|
+
logger.debug(f"Worker {consumer_id} is now alive, skipping")
|
1000
1092
|
continue
|
1001
1093
|
|
1002
|
-
logger.
|
1094
|
+
logger.debug(f"Processing timeout worker: {consumer_id}")
|
1003
1095
|
# 只标记worker为离线
|
1004
1096
|
await self._mark_worker_offline(worker_key, worker_data)
|
1005
1097
|
|
@@ -1029,7 +1121,7 @@ class HeartbeatConsumerStrategy:
|
|
1029
1121
|
'shutdown_reason': 'heartbeat_timeout',
|
1030
1122
|
'messages_transferred': 'false' # 初始状态:消息未转移
|
1031
1123
|
})
|
1032
|
-
logger.
|
1124
|
+
logger.debug(f"Marked worker {consumer_id} as offline with messages_transferred=false")
|
1033
1125
|
else:
|
1034
1126
|
# 已经是离线状态的worker,只更新离线时间
|
1035
1127
|
await self.async_redis.hset(worker_key, 'offline_time', str(current_time))
|
@@ -1055,7 +1147,7 @@ class HeartbeatConsumerStrategy:
|
|
1055
1147
|
try:
|
1056
1148
|
loop = asyncio.get_running_loop()
|
1057
1149
|
self._stats_flusher_task = loop.create_task(self._stats_flusher_loop())
|
1058
|
-
logger.
|
1150
|
+
logger.debug("Started stats flusher coroutine")
|
1059
1151
|
except RuntimeError:
|
1060
1152
|
# 没有运行中的事件循环,标记为需要启动
|
1061
1153
|
logger.debug("No running event loop for stats flusher, will be started when async context is available")
|
@@ -1085,7 +1177,7 @@ class HeartbeatConsumerStrategy:
|
|
1085
1177
|
prefixed_queue = self.get_prefixed_queue_name(queue)
|
1086
1178
|
result = self.redis.execute_command('XGROUP', 'DELCONSUMER', prefixed_queue, prefixed_queue, consumer_name)
|
1087
1179
|
if result > 0:
|
1088
|
-
logger.
|
1180
|
+
logger.debug(f"Deleted stream consumer {consumer_name} from group {queue}")
|
1089
1181
|
else:
|
1090
1182
|
logger.debug(f"Stream consumer {consumer_name} was not found in group {queue}")
|
1091
1183
|
except Exception as e:
|
@@ -1118,13 +1210,13 @@ class HeartbeatConsumerStrategy:
|
|
1118
1210
|
# 再次检查worker是否真的超时(避免竞态条件)
|
1119
1211
|
current_score = await self.async_redis.zscore(heartbeat_key, worker_data)
|
1120
1212
|
if current_score and time.time() - current_score < self.heartbeat_timeout:
|
1121
|
-
logger.
|
1213
|
+
logger.debug(f"Worker {consumer_name} is now alive, skipping")
|
1122
1214
|
return
|
1123
1215
|
|
1124
1216
|
# 从有序集合中删除死亡的worker(使用原始的worker_data)
|
1125
1217
|
removed = await self.async_redis.zrem(heartbeat_key, worker_data)
|
1126
1218
|
if removed:
|
1127
|
-
logger.
|
1219
|
+
logger.debug(f"Removed dead worker {consumer_name} from heartbeat set for queue {queue}")
|
1128
1220
|
|
1129
1221
|
# 重置该consumer的pending消息
|
1130
1222
|
await self._reset_consumer_pending_messages(queue, consumer_name)
|
@@ -1201,7 +1293,7 @@ class HeartbeatConsumerStrategy:
|
|
1201
1293
|
pass
|
1202
1294
|
return
|
1203
1295
|
|
1204
|
-
logger.
|
1296
|
+
logger.debug(f"Found {len(consumer_messages)} pending messages for dead consumer {consumer_name}")
|
1205
1297
|
|
1206
1298
|
# 获取消息ID列表
|
1207
1299
|
message_ids = [msg['message_id'] for msg in consumer_messages]
|
@@ -1295,7 +1387,7 @@ class HeartbeatConsumerStrategy:
|
|
1295
1387
|
'status': 'completed' if not failed_messages else 'completed_with_errors'
|
1296
1388
|
})
|
1297
1389
|
|
1298
|
-
logger.
|
1390
|
+
logger.debug(f"Recovery completed: {recovered_count}/{len(message_ids)} messages recovered from {consumer_name}")
|
1299
1391
|
|
1300
1392
|
if failed_messages:
|
1301
1393
|
logger.error(f"Failed to recover {len(failed_messages)} messages: {failed_messages[:10]}...")
|
@@ -1463,7 +1555,7 @@ class HeartbeatConsumerStrategy:
|
|
1463
1555
|
# 没有pending消息,直接删除
|
1464
1556
|
try:
|
1465
1557
|
await self.async_redis.execute_command('XGROUP', 'DELCONSUMER', queue, queue, consumer_name)
|
1466
|
-
logger.
|
1558
|
+
logger.debug(f"Cleaned up stale recovery consumer {consumer_name}")
|
1467
1559
|
cleaned_count += 1
|
1468
1560
|
except Exception as e:
|
1469
1561
|
logger.error(f"Failed to delete recovery consumer {consumer_name}: {e}")
|
@@ -1474,7 +1566,7 @@ class HeartbeatConsumerStrategy:
|
|
1474
1566
|
logger.error(f"Traceback:\n{traceback.format_exc()}")
|
1475
1567
|
|
1476
1568
|
if cleaned_count > 0:
|
1477
|
-
logger.
|
1569
|
+
logger.debug(f"Cleaned up {cleaned_count} stale recovery consumers")
|
1478
1570
|
|
1479
1571
|
except Exception as e:
|
1480
1572
|
logger.error(f"Error in cleanup_stale_recovery_consumers: {e}")
|
@@ -1548,7 +1640,7 @@ class HeartbeatConsumerStrategy:
|
|
1548
1640
|
|
1549
1641
|
# 如果worker从未运行过(没有数据),则不需要处理
|
1550
1642
|
if not worker_data:
|
1551
|
-
logger.
|
1643
|
+
logger.debug(f"Worker {self.consumer_id} never started, skipping cleanup")
|
1552
1644
|
return
|
1553
1645
|
|
1554
1646
|
# 更新worker状态为离线(保留所有现有数据)
|
@@ -1573,14 +1665,14 @@ class HeartbeatConsumerStrategy:
|
|
1573
1665
|
|
1574
1666
|
# 不再保存历史记录,WORKER键本身就包含了所有信息
|
1575
1667
|
|
1576
|
-
logger.
|
1668
|
+
logger.debug(f"Marked worker {self.consumer_id} as offline immediately")
|
1577
1669
|
|
1578
1670
|
except Exception as e:
|
1579
1671
|
logger.error(f"Failed to mark worker as offline during cleanup: {e}")
|
1580
1672
|
|
1581
1673
|
# 如果从未成功运行过,直接返回
|
1582
1674
|
if not worker_data:
|
1583
|
-
logger.
|
1675
|
+
logger.debug(f"Heartbeat consumer {self.consumer_id} stopped gracefully (never started)")
|
1584
1676
|
return
|
1585
1677
|
|
1586
1678
|
# 等待扫描线程结束(非阻塞)
|
@@ -1600,4 +1692,4 @@ class HeartbeatConsumerStrategy:
|
|
1600
1692
|
# 重要:不删除心跳记录!
|
1601
1693
|
# 心跳记录必须保留,让scanner能够检测到worker离线并恢复pending消息
|
1602
1694
|
# 心跳会因为超时自动被scanner清理
|
1603
|
-
logger.
|
1695
|
+
logger.debug(f"Heartbeat consumer {self.consumer_id} stopped")
|
jettask/core/context.py
CHANGED
@@ -20,6 +20,8 @@ class TaskContext:
|
|
20
20
|
async def my_task(ctx: TaskContext, data: dict):
|
21
21
|
print(f"Task ID: {ctx.event_id}")
|
22
22
|
print(f"Task Name: {ctx.name}")
|
23
|
+
if ctx.scheduled_task_id:
|
24
|
+
print(f"Triggered by scheduled task: {ctx.scheduled_task_id}")
|
23
25
|
return data
|
24
26
|
"""
|
25
27
|
event_id: str
|
@@ -29,6 +31,7 @@ class TaskContext:
|
|
29
31
|
queue: Optional[str] = None
|
30
32
|
worker_id: Optional[str] = None
|
31
33
|
retry_count: int = 0
|
34
|
+
scheduled_task_id: Optional[int] = None # 定时任务ID(如果由定时任务触发)
|
32
35
|
|
33
36
|
def __repr__(self) -> str:
|
34
37
|
return f"TaskContext(event_id={self.event_id}, name={self.name}, queue={self.queue})"
|
jettask/core/enums.py
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
"""
|
2
|
+
任务状态和结果定义
|
3
|
+
"""
|
4
|
+
from enum import Enum
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import Any, Optional, Dict
|
7
|
+
import time
|
8
|
+
|
9
|
+
|
10
|
+
class TaskStatus(Enum):
|
11
|
+
"""任务状态枚举"""
|
12
|
+
# 基本状态
|
13
|
+
PENDING = "pending" # 任务已创建,等待执行
|
14
|
+
RUNNING = "running" # 任务正在执行
|
15
|
+
SUCCESS = "success" # 任务执行成功
|
16
|
+
ERROR = "error" # 任务执行失败
|
17
|
+
|
18
|
+
# 特殊状态
|
19
|
+
DELAYED = "delayed" # 延迟任务,等待触发
|
20
|
+
REJECTED = "rejected" # 任务被拒绝(on_before返回reject)
|
21
|
+
RETRY = "retry" # 任务等待重试
|
22
|
+
TIMEOUT = "timeout" # 任务执行超时
|
23
|
+
CANCELLED = "cancelled" # 任务被取消
|
24
|
+
|
25
|
+
# 定时任务相关状态
|
26
|
+
SCHEDULED = "scheduled" # 已调度(定时任务专用)
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
def is_terminal(cls, status: 'TaskStatus') -> bool:
|
30
|
+
"""判断是否是终态(不会再改变的状态)"""
|
31
|
+
return status in {cls.SUCCESS, cls.ERROR, cls.REJECTED, cls.TIMEOUT, cls.CANCELLED}
|
32
|
+
|
33
|
+
@classmethod
|
34
|
+
def is_active(cls, status: 'TaskStatus') -> bool:
|
35
|
+
"""判断是否是活跃状态(正在处理中)"""
|
36
|
+
return status in {cls.PENDING, cls.RUNNING, cls.DELAYED, cls.RETRY, cls.SCHEDULED}
|
37
|
+
|
38
|
+
|
39
|
+
@dataclass
|
40
|
+
class TaskResult:
|
41
|
+
"""
|
42
|
+
任务结果对象
|
43
|
+
apply_async 返回的结果,包含任务的完整信息
|
44
|
+
"""
|
45
|
+
id: str # 任务ID(event_id)
|
46
|
+
name: str # 任务名称
|
47
|
+
queue: str # 队列名称
|
48
|
+
status: TaskStatus = TaskStatus.PENDING # 任务状态
|
49
|
+
created_at: float = None # 创建时间
|
50
|
+
trigger_time: float = None # 触发时间
|
51
|
+
scheduled_task_id: Optional[int] = None # 定时任务ID(如果由定时任务触发)
|
52
|
+
args: tuple = None # 位置参数
|
53
|
+
kwargs: dict = None # 关键字参数
|
54
|
+
metadata: Dict[str, Any] = None # 其他元数据
|
55
|
+
|
56
|
+
def __post_init__(self):
|
57
|
+
"""初始化默认值"""
|
58
|
+
if self.created_at is None:
|
59
|
+
self.created_at = time.time()
|
60
|
+
if self.trigger_time is None:
|
61
|
+
self.trigger_time = self.created_at
|
62
|
+
if self.args is None:
|
63
|
+
self.args = ()
|
64
|
+
if self.kwargs is None:
|
65
|
+
self.kwargs = {}
|
66
|
+
if self.metadata is None:
|
67
|
+
self.metadata = {}
|
68
|
+
|
69
|
+
def __str__(self) -> str:
|
70
|
+
"""字符串表示"""
|
71
|
+
return f"TaskResult(id={self.id}, name={self.name}, status={self.status.value})"
|
72
|
+
|
73
|
+
def __repr__(self) -> str:
|
74
|
+
"""详细表示"""
|
75
|
+
return (f"TaskResult(id={self.id}, name={self.name}, queue={self.queue}, "
|
76
|
+
f"status={self.status.value}, created_at={self.created_at})")
|
77
|
+
|
78
|
+
async def wait(self, timeout: Optional[float] = None):
|
79
|
+
"""
|
80
|
+
等待任务完成
|
81
|
+
|
82
|
+
Args:
|
83
|
+
timeout: 超时时间(秒)
|
84
|
+
|
85
|
+
Returns:
|
86
|
+
任务执行结果
|
87
|
+
"""
|
88
|
+
# TODO: 实现等待逻辑
|
89
|
+
raise NotImplementedError("wait method not implemented yet")
|
90
|
+
|
91
|
+
async def get_result(self, timeout: Optional[float] = None):
|
92
|
+
"""
|
93
|
+
获取任务结果
|
94
|
+
|
95
|
+
Args:
|
96
|
+
timeout: 超时时间(秒)
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
任务执行结果
|
100
|
+
"""
|
101
|
+
# TODO: 实现获取结果逻辑
|
102
|
+
raise NotImplementedError("get_result method not implemented yet")
|
103
|
+
|
104
|
+
async def cancel(self) -> bool:
|
105
|
+
"""
|
106
|
+
取消任务
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
是否成功取消
|
110
|
+
"""
|
111
|
+
# TODO: 实现取消逻辑
|
112
|
+
raise NotImplementedError("cancel method not implemented yet")
|
113
|
+
|
114
|
+
async def get_status(self) -> TaskStatus:
|
115
|
+
"""
|
116
|
+
获取任务当前状态
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
任务状态
|
120
|
+
"""
|
121
|
+
# TODO: 实现获取状态逻辑
|
122
|
+
raise NotImplementedError("get_status method not implemented yet")
|
123
|
+
|
124
|
+
@property
|
125
|
+
def is_ready(self) -> bool:
|
126
|
+
"""任务是否已完成(终态)"""
|
127
|
+
return TaskStatus.is_terminal(self.status)
|
128
|
+
|
129
|
+
@property
|
130
|
+
def is_successful(self) -> bool:
|
131
|
+
"""任务是否成功"""
|
132
|
+
return self.status == TaskStatus.SUCCESS
|
133
|
+
|
134
|
+
@property
|
135
|
+
def is_failed(self) -> bool:
|
136
|
+
"""任务是否失败"""
|
137
|
+
return self.status in {TaskStatus.ERROR, TaskStatus.TIMEOUT, TaskStatus.REJECTED}
|