jettask 0.2.19__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +12 -3
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/METADATA +2 -71
- jettask-0.2.23.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.19.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/WHEEL +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.23.dist-info}/top_level.txt +0 -0
@@ -1,222 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
独立的心跳上报进程,避免被CPU密集型任务阻塞
|
3
|
-
"""
|
4
|
-
import multiprocessing
|
5
|
-
import time
|
6
|
-
import os
|
7
|
-
import logging
|
8
|
-
import signal
|
9
|
-
import redis
|
10
|
-
from typing import Dict, Set, Optional
|
11
|
-
|
12
|
-
logger = logging.getLogger(__name__)
|
13
|
-
|
14
|
-
|
15
|
-
class HeartbeatProcess:
|
16
|
-
"""独立的心跳上报进程"""
|
17
|
-
|
18
|
-
def __init__(self, redis_url: str, worker_key: str, consumer_id: str,
|
19
|
-
heartbeat_interval: float = 5.0, heartbeat_timeout: float = 15.0):
|
20
|
-
self.redis_url = redis_url
|
21
|
-
self.worker_key = worker_key
|
22
|
-
self.consumer_id = consumer_id
|
23
|
-
self.heartbeat_interval = heartbeat_interval
|
24
|
-
self.heartbeat_timeout = heartbeat_timeout
|
25
|
-
self.process: Optional[multiprocessing.Process] = None
|
26
|
-
self.stop_event = multiprocessing.Event()
|
27
|
-
# 使用共享内存记录最后心跳时间
|
28
|
-
self.last_heartbeat_time = multiprocessing.Value('d', time.time())
|
29
|
-
|
30
|
-
def start(self):
|
31
|
-
"""启动心跳进程"""
|
32
|
-
if self.process and self.process.is_alive():
|
33
|
-
logger.warning("Heartbeat process already running")
|
34
|
-
return
|
35
|
-
|
36
|
-
self.stop_event.clear()
|
37
|
-
self.process = multiprocessing.Process(
|
38
|
-
target=self._heartbeat_loop,
|
39
|
-
args=(self.redis_url, self.worker_key, self.consumer_id,
|
40
|
-
self.heartbeat_interval, self.heartbeat_timeout, self.stop_event,
|
41
|
-
self.last_heartbeat_time),
|
42
|
-
daemon=True,
|
43
|
-
name=f"heartbeat-{self.consumer_id}"
|
44
|
-
)
|
45
|
-
self.process.start()
|
46
|
-
logger.debug(f"Started heartbeat process for {self.consumer_id}, PID: {self.process.pid}")
|
47
|
-
|
48
|
-
def stop(self):
|
49
|
-
"""停止心跳进程"""
|
50
|
-
if not self.process:
|
51
|
-
return
|
52
|
-
|
53
|
-
try:
|
54
|
-
# 检查process是否有必要的属性
|
55
|
-
if hasattr(self.process, 'is_alive') and callable(self.process.is_alive):
|
56
|
-
if self.process.is_alive():
|
57
|
-
self.stop_event.set()
|
58
|
-
if hasattr(self.process, 'terminate'):
|
59
|
-
self.process.terminate()
|
60
|
-
self.process.join(timeout=5)
|
61
|
-
if self.process.is_alive():
|
62
|
-
logger.warning("Heartbeat process did not stop gracefully, forcing kill")
|
63
|
-
if hasattr(self.process, 'kill'):
|
64
|
-
self.process.kill()
|
65
|
-
self.process.join()
|
66
|
-
logger.debug(f"Stopped heartbeat process for {self.consumer_id}")
|
67
|
-
else:
|
68
|
-
logger.debug(f"Heartbeat process for {self.consumer_id} is not a valid process object")
|
69
|
-
except AttributeError as e:
|
70
|
-
logger.debug(f"Heartbeat process attributes error: {e}")
|
71
|
-
except Exception as e:
|
72
|
-
logger.warning(f"Error stopping heartbeat process: {e}")
|
73
|
-
finally:
|
74
|
-
self.process = None
|
75
|
-
|
76
|
-
def get_last_heartbeat_time(self) -> float:
|
77
|
-
"""获取最后一次心跳时间"""
|
78
|
-
with self.last_heartbeat_time.get_lock():
|
79
|
-
return self.last_heartbeat_time.value
|
80
|
-
|
81
|
-
@staticmethod
|
82
|
-
def _heartbeat_loop(redis_url: str, worker_key: str, consumer_id: str,
|
83
|
-
heartbeat_interval: float, heartbeat_timeout: float,
|
84
|
-
stop_event: multiprocessing.Event,
|
85
|
-
last_heartbeat_time: multiprocessing.Value):
|
86
|
-
"""心跳循环 - 在独立进程中运行"""
|
87
|
-
# 忽略中断信号,让主进程处理
|
88
|
-
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
89
|
-
|
90
|
-
# 创建独立的Redis连接
|
91
|
-
redis_client = redis.from_url(redis_url, decode_responses=True)
|
92
|
-
|
93
|
-
# 获取主机信息
|
94
|
-
import socket
|
95
|
-
try:
|
96
|
-
hostname = socket.gethostname()
|
97
|
-
if not hostname or hostname == 'localhost':
|
98
|
-
hostname = socket.gethostbyname(socket.gethostname())
|
99
|
-
except:
|
100
|
-
hostname = os.environ.get('HOSTNAME', 'unknown')
|
101
|
-
|
102
|
-
logger.debug(f"Heartbeat process started for {consumer_id} in PID {os.getpid()}")
|
103
|
-
|
104
|
-
heartbeat_count = 0
|
105
|
-
last_log_time = time.time()
|
106
|
-
|
107
|
-
while not stop_event.is_set():
|
108
|
-
try:
|
109
|
-
current_time = time.time()
|
110
|
-
|
111
|
-
# 更新心跳信息
|
112
|
-
redis_client.hset(worker_key, mapping={
|
113
|
-
'last_heartbeat': str(current_time),
|
114
|
-
'heartbeat_pid': str(os.getpid()), # 记录心跳进程PID
|
115
|
-
'is_alive': 'true'
|
116
|
-
})
|
117
|
-
|
118
|
-
# 同时更新 sorted set
|
119
|
-
redis_prefix = worker_key.split(':')[0] # 获取前缀(如 'jettask')
|
120
|
-
worker_id = worker_key.split(':')[-1] # 获取 worker ID
|
121
|
-
redis_client.zadd(f"{redis_prefix}:ACTIVE_WORKERS", {worker_id: current_time})
|
122
|
-
|
123
|
-
# 更新本地心跳时间记录
|
124
|
-
with last_heartbeat_time.get_lock():
|
125
|
-
last_heartbeat_time.value = current_time
|
126
|
-
|
127
|
-
heartbeat_count += 1
|
128
|
-
|
129
|
-
# 每30秒记录一次日志
|
130
|
-
if current_time - last_log_time >= 30:
|
131
|
-
logger.debug(f"Heartbeat process for {consumer_id} sent {heartbeat_count} heartbeats")
|
132
|
-
last_log_time = current_time
|
133
|
-
heartbeat_count = 0
|
134
|
-
|
135
|
-
# 等待下一次心跳
|
136
|
-
stop_event.wait(heartbeat_interval)
|
137
|
-
|
138
|
-
except Exception as e:
|
139
|
-
logger.error(f"Error in heartbeat process for {consumer_id}: {e}")
|
140
|
-
time.sleep(1) # 出错时短暂等待
|
141
|
-
|
142
|
-
logger.debug(f"Heartbeat process for {consumer_id} exiting")
|
143
|
-
|
144
|
-
|
145
|
-
class HeartbeatProcessManager:
|
146
|
-
"""管理worker的心跳进程(只需要一个进程)"""
|
147
|
-
|
148
|
-
def __init__(self, redis_url: str, consumer_id: str, heartbeat_interval: float = 5.0,
|
149
|
-
heartbeat_timeout: float = 15.0):
|
150
|
-
self.redis_url = redis_url
|
151
|
-
self.consumer_id = consumer_id
|
152
|
-
self.heartbeat_interval = heartbeat_interval
|
153
|
-
self.heartbeat_timeout = heartbeat_timeout
|
154
|
-
self.heartbeat_process: Optional[HeartbeatProcess] = None
|
155
|
-
self.worker_key: Optional[str] = None
|
156
|
-
self.queues: Set[str] = set() # 记录worker处理的队列
|
157
|
-
self.redis_client = redis.from_url(redis_url, decode_responses=True)
|
158
|
-
|
159
|
-
def add_queue(self, queue: str, worker_key: str):
|
160
|
-
"""添加队列(只在第一次时启动心跳进程)"""
|
161
|
-
self.queues.add(queue)
|
162
|
-
|
163
|
-
# 只需要启动一次心跳进程
|
164
|
-
if self.heartbeat_process is not None:
|
165
|
-
logger.debug(f"Heartbeat process already running, added queue {queue}")
|
166
|
-
return
|
167
|
-
|
168
|
-
# 第一次调用时启动心跳进程
|
169
|
-
self.worker_key = worker_key
|
170
|
-
self.heartbeat_process = HeartbeatProcess(
|
171
|
-
redis_url=self.redis_url,
|
172
|
-
worker_key=worker_key,
|
173
|
-
consumer_id=self.consumer_id,
|
174
|
-
heartbeat_interval=self.heartbeat_interval,
|
175
|
-
heartbeat_timeout=self.heartbeat_timeout
|
176
|
-
)
|
177
|
-
self.heartbeat_process.start()
|
178
|
-
logger.debug(f"Started single heartbeat process for worker {self.consumer_id}")
|
179
|
-
|
180
|
-
def remove_queue(self, queue: str):
|
181
|
-
"""移除队列"""
|
182
|
-
if queue in self.queues:
|
183
|
-
self.queues.remove(queue)
|
184
|
-
logger.debug(f"Removed queue {queue} from heartbeat monitoring")
|
185
|
-
|
186
|
-
# 如果没有队列了,停止心跳进程
|
187
|
-
if not self.queues and self.heartbeat_process:
|
188
|
-
self.heartbeat_process.stop()
|
189
|
-
self.heartbeat_process = None
|
190
|
-
logger.debug("No more queues, stopped heartbeat process")
|
191
|
-
|
192
|
-
def stop_all(self):
|
193
|
-
"""停止心跳进程"""
|
194
|
-
if self.heartbeat_process:
|
195
|
-
self.heartbeat_process.stop()
|
196
|
-
self.heartbeat_process = None
|
197
|
-
self.queues.clear()
|
198
|
-
|
199
|
-
def is_healthy(self) -> bool:
|
200
|
-
"""检查心跳进程是否健康"""
|
201
|
-
if not self.heartbeat_process:
|
202
|
-
return len(self.queues) == 0 # 如果没有队列,则是健康的
|
203
|
-
|
204
|
-
if not self.heartbeat_process.process or not self.heartbeat_process.process.is_alive():
|
205
|
-
logger.error(f"Heartbeat process for worker {self.consumer_id} is not alive")
|
206
|
-
return False
|
207
|
-
return True
|
208
|
-
|
209
|
-
def get_last_heartbeat_time(self) -> Optional[float]:
|
210
|
-
"""获取最后一次心跳时间"""
|
211
|
-
if self.heartbeat_process:
|
212
|
-
return self.heartbeat_process.get_last_heartbeat_time()
|
213
|
-
return None
|
214
|
-
|
215
|
-
def is_heartbeat_timeout(self) -> bool:
|
216
|
-
"""检查心跳是否已超时"""
|
217
|
-
last_heartbeat = self.get_last_heartbeat_time()
|
218
|
-
if last_heartbeat is None:
|
219
|
-
return False # 心跳进程未启动
|
220
|
-
|
221
|
-
current_time = time.time()
|
222
|
-
return (current_time - last_heartbeat) > self.heartbeat_timeout
|
jettask/core/task_batch.py
DELETED
@@ -1,153 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
任务批量发送构建器
|
3
|
-
提供类型安全的批量任务发送接口
|
4
|
-
"""
|
5
|
-
from typing import List, Dict, Any, Optional, Union, TYPE_CHECKING
|
6
|
-
import asyncio
|
7
|
-
from dataclasses import dataclass, field
|
8
|
-
|
9
|
-
if TYPE_CHECKING:
|
10
|
-
from .task import Task
|
11
|
-
from .app import JetTaskApp
|
12
|
-
|
13
|
-
|
14
|
-
@dataclass
|
15
|
-
class TaskMessage:
|
16
|
-
"""单个任务消息"""
|
17
|
-
task_name: str
|
18
|
-
queue: str
|
19
|
-
args: tuple = field(default_factory=tuple)
|
20
|
-
kwargs: dict = field(default_factory=dict)
|
21
|
-
delay: Optional[int] = None
|
22
|
-
timeout: Optional[int] = None
|
23
|
-
max_retries: Optional[int] = None
|
24
|
-
retry_delay: Optional[int] = None
|
25
|
-
scheduled_task_id: Optional[int] = None
|
26
|
-
routing: Optional[dict] = None
|
27
|
-
|
28
|
-
def to_dict(self) -> dict:
|
29
|
-
"""转换为字典格式"""
|
30
|
-
data = {
|
31
|
-
'task_name': self.task_name,
|
32
|
-
'args': self.args,
|
33
|
-
'kwargs': self.kwargs,
|
34
|
-
}
|
35
|
-
|
36
|
-
# 只添加非None的可选参数
|
37
|
-
optional_fields = ['delay', 'timeout', 'max_retries', 'retry_delay',
|
38
|
-
'scheduled_task_id', 'routing']
|
39
|
-
for field in optional_fields:
|
40
|
-
value = getattr(self, field)
|
41
|
-
if value is not None:
|
42
|
-
data[field] = value
|
43
|
-
|
44
|
-
return data
|
45
|
-
|
46
|
-
|
47
|
-
class TaskBatch:
|
48
|
-
"""
|
49
|
-
任务批量构建器
|
50
|
-
|
51
|
-
使用示例:
|
52
|
-
batch = task.batch()
|
53
|
-
batch.add(args=(1,), kwargs={'user': 'alice'})
|
54
|
-
batch.add(args=(2,), kwargs={'user': 'bob'}, delay=5)
|
55
|
-
results = await batch.send()
|
56
|
-
"""
|
57
|
-
|
58
|
-
def __init__(self, task: 'Task', app: 'JetTaskApp'):
|
59
|
-
self.task = task
|
60
|
-
self.app = app
|
61
|
-
self.messages: List[TaskMessage] = []
|
62
|
-
self._queue = task.queue
|
63
|
-
|
64
|
-
def add(
|
65
|
-
self,
|
66
|
-
args: tuple = None,
|
67
|
-
kwargs: dict = None,
|
68
|
-
queue: str = None,
|
69
|
-
delay: int = None,
|
70
|
-
timeout: int = None,
|
71
|
-
max_retries: int = None,
|
72
|
-
retry_delay: int = None,
|
73
|
-
scheduled_task_id: int = None,
|
74
|
-
routing: dict = None,
|
75
|
-
) -> 'TaskBatch':
|
76
|
-
"""
|
77
|
-
添加一个任务到批量队列
|
78
|
-
|
79
|
-
参数签名与 apply_async 完全一致,保证IDE提示
|
80
|
-
|
81
|
-
Args:
|
82
|
-
args: 位置参数
|
83
|
-
kwargs: 关键字参数
|
84
|
-
queue: 指定队列(默认使用task的队列)
|
85
|
-
delay: 延迟执行时间(秒)
|
86
|
-
timeout: 任务超时时间(秒)
|
87
|
-
max_retries: 最大重试次数
|
88
|
-
retry_delay: 重试间隔(秒)
|
89
|
-
scheduled_task_id: 定时任务ID
|
90
|
-
routing: 路由信息
|
91
|
-
|
92
|
-
Returns:
|
93
|
-
self: 支持链式调用
|
94
|
-
"""
|
95
|
-
message = TaskMessage(
|
96
|
-
task_name=self.task.name,
|
97
|
-
queue=queue or self._queue,
|
98
|
-
args=args or (),
|
99
|
-
kwargs=kwargs or {},
|
100
|
-
delay=delay,
|
101
|
-
timeout=timeout,
|
102
|
-
max_retries=max_retries,
|
103
|
-
retry_delay=retry_delay,
|
104
|
-
scheduled_task_id=scheduled_task_id,
|
105
|
-
routing=routing,
|
106
|
-
)
|
107
|
-
self.messages.append(message)
|
108
|
-
return self
|
109
|
-
|
110
|
-
async def send(self) -> List[str]:
|
111
|
-
"""
|
112
|
-
批量发送所有任务
|
113
|
-
|
114
|
-
Returns:
|
115
|
-
List[str]: 任务ID列表
|
116
|
-
"""
|
117
|
-
if not self.messages:
|
118
|
-
return []
|
119
|
-
|
120
|
-
# 转换为批量写入格式
|
121
|
-
batch_data = []
|
122
|
-
for msg in self.messages:
|
123
|
-
batch_data.append({
|
124
|
-
'queue': msg.queue,
|
125
|
-
'data': msg.to_dict()
|
126
|
-
})
|
127
|
-
|
128
|
-
# 调用app的批量写入方法
|
129
|
-
# 这里假设app有一个内部的批量写入方法
|
130
|
-
result = await self.app._bulk_write_messages(batch_data)
|
131
|
-
|
132
|
-
# 清空消息列表,允许复用
|
133
|
-
self.messages.clear()
|
134
|
-
|
135
|
-
return result
|
136
|
-
|
137
|
-
def send_sync(self) -> List[str]:
|
138
|
-
"""
|
139
|
-
同步版本的批量发送
|
140
|
-
|
141
|
-
Returns:
|
142
|
-
List[str]: 任务ID列表
|
143
|
-
"""
|
144
|
-
loop = asyncio.get_event_loop()
|
145
|
-
return loop.run_until_complete(self.send())
|
146
|
-
|
147
|
-
def __len__(self) -> int:
|
148
|
-
"""返回当前批量任务的数量"""
|
149
|
-
return len(self.messages)
|
150
|
-
|
151
|
-
def clear(self) -> None:
|
152
|
-
"""清空批量任务列表"""
|
153
|
-
self.messages.clear()
|
jettask/core/worker_scanner.py
DELETED
@@ -1,271 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Worker 扫描器 - 高效检测超时 Worker
|
3
|
-
使用 Redis Sorted Set 实现 O(log N) 复杂度
|
4
|
-
"""
|
5
|
-
|
6
|
-
import time
|
7
|
-
import logging
|
8
|
-
import asyncio
|
9
|
-
from typing import Dict, List, Optional
|
10
|
-
|
11
|
-
logger = logging.getLogger('app')
|
12
|
-
|
13
|
-
|
14
|
-
class WorkerScanner:
|
15
|
-
"""
|
16
|
-
使用 Redis Sorted Set 优化的 Worker 扫描器
|
17
|
-
|
18
|
-
核心优化:
|
19
|
-
1. O(log N) 的超时检测复杂度
|
20
|
-
2. 自动一致性维护
|
21
|
-
3. 原子性操作保证数据一致
|
22
|
-
"""
|
23
|
-
|
24
|
-
def __init__(self, sync_redis, async_redis, redis_prefix: str = 'jettask',
|
25
|
-
heartbeat_timeout: float = 3.0):
|
26
|
-
self.redis = sync_redis
|
27
|
-
self.async_redis = async_redis
|
28
|
-
self.redis_prefix = redis_prefix
|
29
|
-
self.heartbeat_timeout = heartbeat_timeout
|
30
|
-
self.active_workers_key = f"{redis_prefix}:ACTIVE_WORKERS"
|
31
|
-
|
32
|
-
# 一致性维护
|
33
|
-
self._initialized = False
|
34
|
-
self._last_full_sync = 0
|
35
|
-
self._full_sync_interval = 60 # 每60秒完整同步
|
36
|
-
self._scan_counter = 0
|
37
|
-
self._partial_check_interval = 10 # 每10次扫描做部分检查
|
38
|
-
|
39
|
-
# async def ensure_initialized(self):
|
40
|
-
# """确保 Sorted Set 已初始化并保持一致"""
|
41
|
-
# current_time = time.time()
|
42
|
-
|
43
|
-
# # 首次初始化或定期完整同步
|
44
|
-
# if not self._initialized or (current_time - self._last_full_sync > self._full_sync_interval):
|
45
|
-
# await self._full_sync()
|
46
|
-
# self._initialized = True
|
47
|
-
# self._last_full_sync = current_time
|
48
|
-
|
49
|
-
# async def _full_sync(self):
|
50
|
-
# """完整同步 Hash 和 Sorted Set"""
|
51
|
-
# try:
|
52
|
-
# logger.debug("Starting full synchronization of ACTIVE_WORKERS")
|
53
|
-
|
54
|
-
# # 1. 收集所有活跃 worker 的 Hash 数据
|
55
|
-
# pattern = f"{self.redis_prefix}:WORKER:*"
|
56
|
-
# hash_workers = {}
|
57
|
-
|
58
|
-
# async for key in self.async_redis.scan_iter(match=pattern, count=100):
|
59
|
-
# if ':HISTORY:' not in key and ':REUSE:' not in key:
|
60
|
-
# worker_id = key.split(':')[-1]
|
61
|
-
# worker_data = await self.async_redis.hgetall(key)
|
62
|
-
|
63
|
-
# if worker_data and worker_data.get('is_alive', 'true').lower() == 'true':
|
64
|
-
# try:
|
65
|
-
# heartbeat = float(worker_data.get('last_heartbeat', 0))
|
66
|
-
# hash_workers[worker_id] = heartbeat
|
67
|
-
# except (ValueError, TypeError):
|
68
|
-
# continue
|
69
|
-
|
70
|
-
# # 2. 获取 Sorted Set 中的数据
|
71
|
-
# zset_workers = await self.async_redis.zrange(
|
72
|
-
# self.active_workers_key, 0, -1, withscores=True
|
73
|
-
# )
|
74
|
-
# zset_dict = {worker_id: score for worker_id, score in zset_workers}
|
75
|
-
|
76
|
-
# # 3. 计算差异
|
77
|
-
# hash_ids = set(hash_workers.keys())
|
78
|
-
# zset_ids = set(zset_dict.keys())
|
79
|
-
|
80
|
-
# to_add = hash_ids - zset_ids # Hash有但ZSet无
|
81
|
-
# to_remove = zset_ids - hash_ids # ZSet有但Hash无
|
82
|
-
# to_update = {} # 时间戳不一致的
|
83
|
-
|
84
|
-
# for worker_id in hash_ids & zset_ids:
|
85
|
-
# if abs(hash_workers[worker_id] - zset_dict[worker_id]) > 0.1:
|
86
|
-
# to_update[worker_id] = hash_workers[worker_id]
|
87
|
-
|
88
|
-
# # 4. 批量修复
|
89
|
-
# if to_add or to_remove or to_update:
|
90
|
-
# pipeline = self.async_redis.pipeline()
|
91
|
-
|
92
|
-
# if to_add:
|
93
|
-
# members = {worker_id: hash_workers[worker_id] for worker_id in to_add}
|
94
|
-
# pipeline.zadd(self.active_workers_key, members)
|
95
|
-
|
96
|
-
# if to_remove:
|
97
|
-
# pipeline.zrem(self.active_workers_key, *to_remove)
|
98
|
-
|
99
|
-
# if to_update:
|
100
|
-
# pipeline.zadd(self.active_workers_key, to_update)
|
101
|
-
|
102
|
-
# await pipeline.execute()
|
103
|
-
# logger.info(f"Full sync: +{len(to_add)}, -{len(to_remove)}, ~{len(to_update)}")
|
104
|
-
|
105
|
-
# except Exception as e:
|
106
|
-
# logger.error(f"Full sync failed: {e}")
|
107
|
-
|
108
|
-
async def scan_timeout_workers(self) -> List[Dict]:
|
109
|
-
"""
|
110
|
-
快速扫描超时的 worker - O(log N) 复杂度
|
111
|
-
注意:需要考虑每个worker自己的heartbeat_timeout
|
112
|
-
"""
|
113
|
-
# await self.ensure_initialized()
|
114
|
-
|
115
|
-
# 定期部分检查
|
116
|
-
self._scan_counter += 1
|
117
|
-
if self._scan_counter >= self._partial_check_interval:
|
118
|
-
self._scan_counter = 0
|
119
|
-
asyncio.create_task(self._partial_check())
|
120
|
-
|
121
|
-
current_time = time.time()
|
122
|
-
# 使用最大可能的超时时间作为初始筛选(避免遗漏)
|
123
|
-
# 实际超时判断会在后面使用每个worker自己的timeout
|
124
|
-
max_possible_timeout = 300 # 5分钟,足够覆盖大多数情况
|
125
|
-
cutoff_time = current_time - max_possible_timeout
|
126
|
-
|
127
|
-
# O(log N) 获取可能超时的 workers(宽松筛选)
|
128
|
-
potential_timeout_worker_ids = await self.async_redis.zrangebyscore(
|
129
|
-
self.active_workers_key,
|
130
|
-
min=0,
|
131
|
-
max=current_time - 1 # 至少1秒没更新的
|
132
|
-
)
|
133
|
-
|
134
|
-
if not potential_timeout_worker_ids:
|
135
|
-
return []
|
136
|
-
|
137
|
-
# 批量获取 worker 详细信息
|
138
|
-
pipeline = self.async_redis.pipeline()
|
139
|
-
for worker_id in potential_timeout_worker_ids:
|
140
|
-
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
141
|
-
pipeline.hgetall(worker_key)
|
142
|
-
|
143
|
-
workers_data = await pipeline.execute()
|
144
|
-
|
145
|
-
# 验证并构造结果
|
146
|
-
result = []
|
147
|
-
cleanup_pipeline = self.async_redis.pipeline()
|
148
|
-
need_cleanup = False
|
149
|
-
|
150
|
-
for worker_id, worker_data in zip(potential_timeout_worker_ids, workers_data):
|
151
|
-
if not worker_data:
|
152
|
-
# Hash 不存在,清理 ZSet
|
153
|
-
cleanup_pipeline.zrem(self.active_workers_key, worker_id)
|
154
|
-
need_cleanup = True
|
155
|
-
continue
|
156
|
-
|
157
|
-
# 获取该worker自己的heartbeat_timeout
|
158
|
-
worker_heartbeat_timeout = float(worker_data.get('heartbeat_timeout', self.heartbeat_timeout))
|
159
|
-
|
160
|
-
# 双重检查实际心跳时间(使用worker自己的timeout)
|
161
|
-
last_heartbeat = float(worker_data.get('last_heartbeat', 0))
|
162
|
-
worker_cutoff_time = current_time - worker_heartbeat_timeout
|
163
|
-
|
164
|
-
if last_heartbeat >= worker_cutoff_time:
|
165
|
-
# 未超时,更新 ZSet 并跳过
|
166
|
-
cleanup_pipeline.zadd(self.active_workers_key, {worker_id: last_heartbeat})
|
167
|
-
need_cleanup = True
|
168
|
-
continue
|
169
|
-
|
170
|
-
# 检查存活状态
|
171
|
-
is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
|
172
|
-
if not is_alive:
|
173
|
-
# 已离线,清理 ZSet
|
174
|
-
cleanup_pipeline.zrem(self.active_workers_key, worker_id)
|
175
|
-
need_cleanup = True
|
176
|
-
continue
|
177
|
-
|
178
|
-
# 确认超时(使用worker自己的timeout判断)
|
179
|
-
logger.debug(f"Worker {worker_id} timeout: last_heartbeat={last_heartbeat}, "
|
180
|
-
f"timeout={worker_heartbeat_timeout}s, cutoff={worker_cutoff_time}")
|
181
|
-
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
182
|
-
result.append({
|
183
|
-
'worker_key': worker_key,
|
184
|
-
'worker_data': worker_data,
|
185
|
-
'worker_id': worker_id
|
186
|
-
})
|
187
|
-
|
188
|
-
if need_cleanup:
|
189
|
-
await cleanup_pipeline.execute()
|
190
|
-
|
191
|
-
if result:
|
192
|
-
logger.info(f"Found {len(result)} timeout workers")
|
193
|
-
|
194
|
-
return result
|
195
|
-
|
196
|
-
async def update_heartbeat(self, worker_id: str, heartbeat_time: Optional[float] = None):
|
197
|
-
"""原子性更新心跳(同时更新 Hash 和 ZSet)"""
|
198
|
-
if heartbeat_time is None:
|
199
|
-
heartbeat_time = time.time()
|
200
|
-
|
201
|
-
pipeline = self.async_redis.pipeline()
|
202
|
-
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
203
|
-
|
204
|
-
# 同时更新两个数据结构
|
205
|
-
pipeline.hset(worker_key, 'last_heartbeat', str(heartbeat_time))
|
206
|
-
pipeline.zadd(self.active_workers_key, {worker_id: heartbeat_time})
|
207
|
-
|
208
|
-
await pipeline.execute()
|
209
|
-
|
210
|
-
async def add_worker(self, worker_id: str, worker_data: Dict):
|
211
|
-
"""添加新 worker(同时加入 Hash 和 ZSet)"""
|
212
|
-
heartbeat_time = float(worker_data.get('last_heartbeat', time.time()))
|
213
|
-
|
214
|
-
pipeline = self.async_redis.pipeline()
|
215
|
-
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
216
|
-
|
217
|
-
# 创建 Hash
|
218
|
-
pipeline.hset(worker_key, mapping=worker_data)
|
219
|
-
# 加入 ZSet
|
220
|
-
pipeline.zadd(self.active_workers_key, {worker_id: heartbeat_time})
|
221
|
-
|
222
|
-
await pipeline.execute()
|
223
|
-
logger.debug(f"Added worker {worker_id} to system")
|
224
|
-
|
225
|
-
async def remove_worker(self, worker_id: str):
|
226
|
-
"""移除 worker(同时更新 Hash 和 ZSet)"""
|
227
|
-
pipeline = self.async_redis.pipeline()
|
228
|
-
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
229
|
-
|
230
|
-
# 标记 Hash 为离线
|
231
|
-
pipeline.hset(worker_key, 'is_alive', 'false')
|
232
|
-
# 从 ZSet 移除
|
233
|
-
pipeline.zrem(self.active_workers_key, worker_id)
|
234
|
-
|
235
|
-
await pipeline.execute()
|
236
|
-
logger.debug(f"Removed worker {worker_id} from active set")
|
237
|
-
|
238
|
-
async def _partial_check(self):
|
239
|
-
"""部分一致性检查"""
|
240
|
-
try:
|
241
|
-
# 随机检查10个 worker
|
242
|
-
sample_size = min(10, await self.async_redis.zcard(self.active_workers_key))
|
243
|
-
if sample_size == 0:
|
244
|
-
return
|
245
|
-
|
246
|
-
# 随机抽样
|
247
|
-
random_workers = await self.async_redis.zrandmember(
|
248
|
-
self.active_workers_key, sample_size, withscores=True
|
249
|
-
)
|
250
|
-
|
251
|
-
for worker_id, zset_score in random_workers:
|
252
|
-
worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
|
253
|
-
hash_heartbeat = await self.async_redis.hget(worker_key, 'last_heartbeat')
|
254
|
-
|
255
|
-
if not hash_heartbeat:
|
256
|
-
# Hash 不存在,删除 ZSet 条目
|
257
|
-
await self.async_redis.zrem(self.active_workers_key, worker_id)
|
258
|
-
logger.debug(f"Partial check: removed {worker_id}")
|
259
|
-
else:
|
260
|
-
# 检查时间戳一致性
|
261
|
-
hash_time = float(hash_heartbeat)
|
262
|
-
if abs(hash_time - zset_score) > 1.0:
|
263
|
-
await self.async_redis.zadd(self.active_workers_key, {worker_id: hash_time})
|
264
|
-
logger.debug(f"Partial check: synced {worker_id}")
|
265
|
-
|
266
|
-
except Exception as e:
|
267
|
-
logger.debug(f"Partial check error: {e}")
|
268
|
-
|
269
|
-
async def get_active_count(self) -> int:
|
270
|
-
"""获取活跃 worker 数量 - O(1)"""
|
271
|
-
return await self.async_redis.zcard(self.active_workers_key)
|