jettask 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +60 -2
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
- jettask-0.2.20.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.18.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,338 @@
|
|
1
|
+
"""
|
2
|
+
统一执行器
|
3
|
+
|
4
|
+
整合了单进程和多进程执行模式的统一接口
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import multiprocessing
|
9
|
+
import logging
|
10
|
+
import os
|
11
|
+
import time
|
12
|
+
from collections import deque
|
13
|
+
from typing import List, Optional
|
14
|
+
|
15
|
+
from .core import ExecutionMode, ExecutorCore
|
16
|
+
from .orchestrator import ProcessConfig, ProcessOrchestrator
|
17
|
+
from ..worker.lifecycle import WorkerStateManager
|
18
|
+
from ..utils.rate_limit.manager import RateLimiterManager
|
19
|
+
|
20
|
+
logger = logging.getLogger('app')
|
21
|
+
|
22
|
+
# Try to use uvloop for better performance
|
23
|
+
try:
|
24
|
+
import uvloop
|
25
|
+
uvloop.install()
|
26
|
+
logger.debug("Using uvloop for better performance")
|
27
|
+
except ImportError:
|
28
|
+
pass
|
29
|
+
|
30
|
+
|
31
|
+
class UnifiedExecutor:
|
32
|
+
"""
|
33
|
+
统一执行器
|
34
|
+
|
35
|
+
整合AsyncioExecutor和MultiAsyncioExecutor的功能
|
36
|
+
支持单进程和多进程两种执行模式
|
37
|
+
|
38
|
+
职责:
|
39
|
+
1. 提供统一的执行器接口
|
40
|
+
2. 根据模式选择ExecutorCore或ProcessOrchestrator
|
41
|
+
3. 管理事件队列和任务分发
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(self, event_queue, app, concurrency=100,
|
45
|
+
mode: ExecutionMode = ExecutionMode.SINGLE_PROCESS,
|
46
|
+
task_name: str = None):
|
47
|
+
"""
|
48
|
+
初始化统一执行器
|
49
|
+
|
50
|
+
Args:
|
51
|
+
event_queue: 事件队列
|
52
|
+
app: Application实例
|
53
|
+
concurrency: 并发数
|
54
|
+
mode: 执行模式
|
55
|
+
task_name: 任务名称(单进程模式必需)
|
56
|
+
"""
|
57
|
+
self.event_queue = event_queue
|
58
|
+
self.app = app
|
59
|
+
self.concurrency = concurrency
|
60
|
+
self.mode = mode
|
61
|
+
self.task_name = task_name
|
62
|
+
|
63
|
+
# 根据模式初始化核心组件
|
64
|
+
if mode == ExecutionMode.SINGLE_PROCESS:
|
65
|
+
if not task_name:
|
66
|
+
raise ValueError("task_name is required for SINGLE_PROCESS mode")
|
67
|
+
|
68
|
+
self.executor_core = ExecutorCore(
|
69
|
+
app=app,
|
70
|
+
task_name=task_name,
|
71
|
+
concurrency=concurrency
|
72
|
+
)
|
73
|
+
self.orchestrator = None
|
74
|
+
logger.debug(f"UnifiedExecutor initialized in SINGLE_PROCESS mode for task {task_name}")
|
75
|
+
|
76
|
+
elif mode == ExecutionMode.MULTI_PROCESS:
|
77
|
+
self.executor_core = None
|
78
|
+
self.orchestrator = ProcessOrchestrator(
|
79
|
+
app=app,
|
80
|
+
num_processes=concurrency
|
81
|
+
)
|
82
|
+
logger.debug(f"UnifiedExecutor initialized in MULTI_PROCESS mode with {concurrency} processes")
|
83
|
+
|
84
|
+
else:
|
85
|
+
raise ValueError(f"Unsupported execution mode: {mode}")
|
86
|
+
|
87
|
+
# 活动任务集合(单进程模式使用)
|
88
|
+
self._active_tasks = set()
|
89
|
+
|
90
|
+
def logic(self, *args, **kwargs):
|
91
|
+
"""
|
92
|
+
BaseExecutor接口方法
|
93
|
+
在单进程模式下不使用,多进程模式委托给ProcessOrchestrator
|
94
|
+
"""
|
95
|
+
pass
|
96
|
+
|
97
|
+
async def loop(self):
|
98
|
+
"""主循环 - 单进程模式"""
|
99
|
+
if self.mode != ExecutionMode.SINGLE_PROCESS:
|
100
|
+
raise RuntimeError("loop() is only for SINGLE_PROCESS mode")
|
101
|
+
|
102
|
+
# 初始化限流器
|
103
|
+
self.app.consumer_manager._heartbeat_strategy._ensure_consumer_id()
|
104
|
+
worker_id = self.app.consumer_manager._heartbeat_strategy.consumer_id
|
105
|
+
registry_manager = self.app.consumer_manager
|
106
|
+
|
107
|
+
if not self.app.worker_state_manager:
|
108
|
+
self.app.worker_state_manager = WorkerStateManager(
|
109
|
+
redis_client=self.app.ep.async_redis_client,
|
110
|
+
redis_prefix=self.executor_core.prefix,
|
111
|
+
event_pool=self.app.ep # 传入 EventPool 实例,启用事件驱动的消息恢复
|
112
|
+
)
|
113
|
+
await self.app.worker_state_manager.start_listener()
|
114
|
+
logger.debug(f"WorkerStateManager started for worker {worker_id}")
|
115
|
+
|
116
|
+
# # 初始化时间同步
|
117
|
+
from jettask.utils.time_sync import init_time_sync
|
118
|
+
time_sync = await init_time_sync(self.app.ep.async_redis_client)
|
119
|
+
logger.debug(f"TimeSync initialized, offset={time_sync.get_offset():.6f}s")
|
120
|
+
|
121
|
+
self.executor_core.rate_limiter_manager = RateLimiterManager(
|
122
|
+
redis_client=self.app.ep.async_redis_client,
|
123
|
+
worker_id=worker_id,
|
124
|
+
redis_prefix=self.executor_core.prefix,
|
125
|
+
registry_manager=registry_manager,
|
126
|
+
worker_state_manager=self.app.worker_state_manager
|
127
|
+
)
|
128
|
+
logger.debug(f"RateLimiterManager initialized for worker {worker_id}")
|
129
|
+
|
130
|
+
await self.executor_core.rate_limiter_manager.load_config_from_redis()
|
131
|
+
|
132
|
+
tasks_batch = []
|
133
|
+
max_buffer_size = 5000
|
134
|
+
|
135
|
+
try:
|
136
|
+
while True:
|
137
|
+
# 检查退出信号
|
138
|
+
if hasattr(self.app, '_should_exit') and self.app._should_exit:
|
139
|
+
logger.debug("UnifiedExecutor detected shutdown signal")
|
140
|
+
break
|
141
|
+
|
142
|
+
# 检查父进程
|
143
|
+
if hasattr(os, 'getppid') and os.getppid() == 1:
|
144
|
+
logger.debug("Parent process died, exiting...")
|
145
|
+
break
|
146
|
+
|
147
|
+
current_time = time.time()
|
148
|
+
|
149
|
+
# 获取事件
|
150
|
+
event = None
|
151
|
+
try:
|
152
|
+
event = await asyncio.wait_for(self.event_queue.get(), timeout=0.1)
|
153
|
+
except asyncio.TimeoutError:
|
154
|
+
event = None
|
155
|
+
|
156
|
+
if event:
|
157
|
+
event.pop("execute_time", None)
|
158
|
+
tasks_batch.append(event)
|
159
|
+
logger.debug(f"[EVENT] Got event: {event.get('event_id', 'unknown')}, task_name={event.get('event_data', {}).get('_task_name')}")
|
160
|
+
|
161
|
+
# 批量创建任务
|
162
|
+
if tasks_batch:
|
163
|
+
for event in tasks_batch:
|
164
|
+
event_data = event.get('event_data', {})
|
165
|
+
event_task_name = event_data.get("_task_name") or event_data.get("name")
|
166
|
+
|
167
|
+
if not event_task_name:
|
168
|
+
logger.error(f"No task_name in event {event.get('event_id')}")
|
169
|
+
continue
|
170
|
+
|
171
|
+
# 验证任务名称匹配
|
172
|
+
if event_task_name != self.task_name:
|
173
|
+
logger.error(f"Task name mismatch: {event_task_name} != {self.task_name}")
|
174
|
+
continue
|
175
|
+
|
176
|
+
# 限流控制
|
177
|
+
logger.debug(f"[TASK] Attempting to acquire rate limit for {self.task_name}, event_id={event.get('event_id')}")
|
178
|
+
rate_limit_token = await self.executor_core.rate_limiter_manager.acquire(
|
179
|
+
task_name=self.task_name,
|
180
|
+
timeout=None
|
181
|
+
)
|
182
|
+
print(f'{rate_limit_token=}')
|
183
|
+
if not rate_limit_token:
|
184
|
+
logger.error(f"Failed to acquire token for {self.task_name}")
|
185
|
+
continue
|
186
|
+
logger.debug(f"[TASK] Successfully acquired rate limit for {self.task_name}, token={rate_limit_token}, starting execution")
|
187
|
+
|
188
|
+
self.executor_core.batch_counter += 1
|
189
|
+
|
190
|
+
# 创建任务包装器,在任务完成时自动释放限流许可
|
191
|
+
async def execute_with_release(event_data, token):
|
192
|
+
try:
|
193
|
+
await self.executor_core.execute_task(**event_data)
|
194
|
+
finally:
|
195
|
+
# 无论任务成功还是失败,都释放并发许可
|
196
|
+
await self.executor_core.rate_limiter_manager.release(self.task_name, task_id=token)
|
197
|
+
|
198
|
+
task = asyncio.create_task(execute_with_release(event, rate_limit_token))
|
199
|
+
self._active_tasks.add(task)
|
200
|
+
task.add_done_callback(self._active_tasks.discard)
|
201
|
+
|
202
|
+
tasks_batch.clear()
|
203
|
+
|
204
|
+
# 智能缓冲区管理
|
205
|
+
buffer_full = (
|
206
|
+
len(self.executor_core.pending_acks) >= max_buffer_size or
|
207
|
+
len(self.executor_core.status_updates) >= max_buffer_size or
|
208
|
+
len(self.executor_core.data_updates) >= max_buffer_size or
|
209
|
+
len(self.executor_core.task_info_updates) >= max_buffer_size
|
210
|
+
)
|
211
|
+
|
212
|
+
should_flush_periodic = False
|
213
|
+
has_pending_data = (
|
214
|
+
self.executor_core.pending_acks or
|
215
|
+
self.executor_core.status_updates or
|
216
|
+
self.executor_core.data_updates or
|
217
|
+
self.executor_core.task_info_updates
|
218
|
+
)
|
219
|
+
|
220
|
+
if has_pending_data:
|
221
|
+
for data_type, config in self.executor_core.pipeline_config.items():
|
222
|
+
time_since_flush = current_time - self.executor_core.last_pipeline_flush[data_type]
|
223
|
+
|
224
|
+
if data_type == 'ack' and self.executor_core.pending_acks:
|
225
|
+
if time_since_flush >= config['max_delay']:
|
226
|
+
should_flush_periodic = True
|
227
|
+
break
|
228
|
+
elif data_type == 'task_info' and self.executor_core.task_info_updates:
|
229
|
+
if time_since_flush >= config['max_delay']:
|
230
|
+
should_flush_periodic = True
|
231
|
+
break
|
232
|
+
elif data_type == 'status' and self.executor_core.status_updates:
|
233
|
+
if time_since_flush >= config['max_delay']:
|
234
|
+
should_flush_periodic = True
|
235
|
+
break
|
236
|
+
elif data_type == 'data' and self.executor_core.data_updates:
|
237
|
+
if time_since_flush >= config['max_delay']:
|
238
|
+
should_flush_periodic = True
|
239
|
+
break
|
240
|
+
|
241
|
+
if buffer_full or should_flush_periodic:
|
242
|
+
asyncio.create_task(self.executor_core._flush_all_buffers())
|
243
|
+
|
244
|
+
# 智能休眠
|
245
|
+
has_events = False
|
246
|
+
if isinstance(self.event_queue, deque):
|
247
|
+
has_events = bool(self.event_queue)
|
248
|
+
elif isinstance(self.event_queue, asyncio.Queue):
|
249
|
+
has_events = not self.event_queue.empty()
|
250
|
+
|
251
|
+
if has_events:
|
252
|
+
await asyncio.sleep(0)
|
253
|
+
else:
|
254
|
+
if has_pending_data:
|
255
|
+
await self.executor_core._flush_all_buffers()
|
256
|
+
await asyncio.sleep(0.001)
|
257
|
+
|
258
|
+
except KeyboardInterrupt:
|
259
|
+
logger.debug("UnifiedExecutor received KeyboardInterrupt")
|
260
|
+
except Exception as e:
|
261
|
+
logger.error(f"UnifiedExecutor loop error: {e}")
|
262
|
+
finally:
|
263
|
+
await self._cleanup_single_process()
|
264
|
+
|
265
|
+
async def _cleanup_single_process(self):
|
266
|
+
"""清理单进程模式资源"""
|
267
|
+
logger.debug("UnifiedExecutor cleaning up...")
|
268
|
+
|
269
|
+
# 设置停止标志
|
270
|
+
if hasattr(self.app.ep, '_stop_reading'):
|
271
|
+
self.app.ep._stop_reading = True
|
272
|
+
|
273
|
+
# 取消活动任务
|
274
|
+
if self._active_tasks:
|
275
|
+
logger.debug(f"Cancelling {len(self._active_tasks)} active tasks...")
|
276
|
+
for task in self._active_tasks:
|
277
|
+
if not task.done():
|
278
|
+
task.cancel()
|
279
|
+
|
280
|
+
if self._active_tasks:
|
281
|
+
try:
|
282
|
+
await asyncio.wait_for(
|
283
|
+
asyncio.gather(*self._active_tasks, return_exceptions=True),
|
284
|
+
timeout=0.2
|
285
|
+
)
|
286
|
+
except asyncio.TimeoutError:
|
287
|
+
logger.debug("Some tasks did not complete in time")
|
288
|
+
|
289
|
+
# 清理ExecutorCore
|
290
|
+
await self.executor_core.cleanup()
|
291
|
+
|
292
|
+
# 清理event_pool
|
293
|
+
if hasattr(self.app.ep, 'cleanup'):
|
294
|
+
try:
|
295
|
+
self.app.ep.cleanup()
|
296
|
+
except Exception as e:
|
297
|
+
logger.error(f"Error cleaning up EventPool: {e}")
|
298
|
+
|
299
|
+
# 标记worker离线
|
300
|
+
if self.app.consumer_manager:
|
301
|
+
try:
|
302
|
+
self.app.consumer_manager.cleanup()
|
303
|
+
logger.debug("Worker marked as offline")
|
304
|
+
except Exception as e:
|
305
|
+
logger.error(f"Error marking worker offline: {e}")
|
306
|
+
|
307
|
+
logger.debug("UnifiedExecutor stopped")
|
308
|
+
|
309
|
+
def start_multi_process(self, queues: List[str], prefetch_multiplier: int = 100, worker_id: str = None, worker_key: str = None):
|
310
|
+
"""启动多进程模式
|
311
|
+
|
312
|
+
Args:
|
313
|
+
queues: 队列列表
|
314
|
+
prefetch_multiplier: 预取倍数
|
315
|
+
worker_id: Worker ID(主进程生成,子进程复用)
|
316
|
+
worker_key: Worker Key(主进程生成,子进程复用)
|
317
|
+
"""
|
318
|
+
if self.mode != ExecutionMode.MULTI_PROCESS:
|
319
|
+
raise RuntimeError("start_multi_process() is only for MULTI_PROCESS mode")
|
320
|
+
|
321
|
+
self.orchestrator.start(queues, prefetch_multiplier, worker_id, worker_key)
|
322
|
+
|
323
|
+
def shutdown(self):
|
324
|
+
"""
|
325
|
+
关闭执行器
|
326
|
+
|
327
|
+
根据执行模式调用相应的关闭方法
|
328
|
+
"""
|
329
|
+
if self.mode == ExecutionMode.MULTI_PROCESS:
|
330
|
+
if self.orchestrator:
|
331
|
+
self.orchestrator.shutdown()
|
332
|
+
elif self.mode == ExecutionMode.SINGLE_PROCESS:
|
333
|
+
# 单进程模式的清理在 _cleanup_single_process 中处理
|
334
|
+
# 这里只是一个占位符,实际清理由事件循环完成
|
335
|
+
logger.debug("UnifiedExecutor shutdown called in SINGLE_PROCESS mode")
|
336
|
+
|
337
|
+
|
338
|
+
__all__ = ['UnifiedExecutor']
|
@@ -0,0 +1,290 @@
|
|
1
|
+
"""
|
2
|
+
多进程编排器
|
3
|
+
|
4
|
+
职责:
|
5
|
+
1. 启动和停止子进程
|
6
|
+
2. 监控子进程健康
|
7
|
+
3. 故障自动重启
|
8
|
+
|
9
|
+
注意:使用系统默认的 multiprocessing 启动模式(Linux上是fork,Windows/macOS上是spawn)。
|
10
|
+
- fork模式:子进程会继承父进程的状态,需要通过 cleanup_inherited_state() 清理
|
11
|
+
- spawn模式:子进程是全新的Python解释器,不会继承父进程状态,但需要所有对象可序列化
|
12
|
+
"""
|
13
|
+
import time
|
14
|
+
import os
|
15
|
+
import signal
|
16
|
+
import logging
|
17
|
+
import multiprocessing
|
18
|
+
from dataclasses import dataclass
|
19
|
+
from typing import Dict, List
|
20
|
+
|
21
|
+
logger = logging.getLogger('app')
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class ProcessConfig:
|
26
|
+
"""进程配置"""
|
27
|
+
process_id: int
|
28
|
+
redis_url: str
|
29
|
+
redis_prefix: str
|
30
|
+
queues: List[str]
|
31
|
+
tasks: Dict
|
32
|
+
concurrency: int
|
33
|
+
prefetch_multiplier: int
|
34
|
+
max_connections: int
|
35
|
+
consumer_strategy: str
|
36
|
+
consumer_config: Dict
|
37
|
+
worker_id: str
|
38
|
+
worker_key: str
|
39
|
+
|
40
|
+
|
41
|
+
class ProcessOrchestrator:
|
42
|
+
"""
|
43
|
+
进程编排器 - 只负责进程生命周期管理
|
44
|
+
|
45
|
+
职责:
|
46
|
+
1. 启动和停止子进程
|
47
|
+
2. 监控子进程健康状态
|
48
|
+
3. 自动重启失败的进程
|
49
|
+
"""
|
50
|
+
|
51
|
+
def __init__(self, app, num_processes: int = 2):
|
52
|
+
"""
|
53
|
+
初始化进程编排器
|
54
|
+
|
55
|
+
Args:
|
56
|
+
app: Application 实例
|
57
|
+
num_processes: 进程数量
|
58
|
+
"""
|
59
|
+
self.app = app
|
60
|
+
self.num_processes = num_processes
|
61
|
+
self.processes: Dict[int, multiprocessing.Process] = {}
|
62
|
+
self.process_configs: Dict[int, ProcessConfig] = {}
|
63
|
+
self.shutdown_event = multiprocessing.Event()
|
64
|
+
|
65
|
+
# 监控配置
|
66
|
+
self._monitor_interval = 1.0
|
67
|
+
self._restart_delay = 2.0
|
68
|
+
self._max_restart_attempts = 3
|
69
|
+
self._restart_counts: Dict[int, int] = {}
|
70
|
+
self._main_received_signal = False
|
71
|
+
self._shutdown_called = False
|
72
|
+
|
73
|
+
logger.debug(f"ProcessOrchestrator initialized with {num_processes} processes")
|
74
|
+
|
75
|
+
def _create_process_config(
|
76
|
+
self,
|
77
|
+
process_id: int,
|
78
|
+
queues: List[str],
|
79
|
+
prefetch_multiplier: int
|
80
|
+
) -> ProcessConfig:
|
81
|
+
"""创建进程配置"""
|
82
|
+
# 复制 consumer_config 并添加 disable_heartbeat_process 标志
|
83
|
+
# 因为在子进程中,心跳已经由主进程的 HeartbeatThreadManager 管理
|
84
|
+
consumer_config = dict(self.app.consumer_config or {})
|
85
|
+
consumer_config['disable_heartbeat_process'] = True
|
86
|
+
|
87
|
+
return ProcessConfig(
|
88
|
+
process_id=process_id,
|
89
|
+
redis_url=self.app.redis_url,
|
90
|
+
redis_prefix=self.app.redis_prefix,
|
91
|
+
queues=queues,
|
92
|
+
tasks=self.app._tasks,
|
93
|
+
concurrency=10000, # 子进程内部并发
|
94
|
+
prefetch_multiplier=prefetch_multiplier,
|
95
|
+
max_connections=self.app.max_connections,
|
96
|
+
consumer_strategy=self.app.consumer_strategy,
|
97
|
+
consumer_config=consumer_config,
|
98
|
+
worker_id=getattr(self.app, 'worker_id', None),
|
99
|
+
worker_key=getattr(self.app, 'worker_key', None)
|
100
|
+
)
|
101
|
+
|
102
|
+
def _start_process(self, process_id: int, config: ProcessConfig) -> multiprocessing.Process:
|
103
|
+
"""启动单个子进程
|
104
|
+
|
105
|
+
注意:在 fork 模式下,子进程会在 subprocess_main 的第一行重置 logging 锁。
|
106
|
+
"""
|
107
|
+
from .process_entry import subprocess_main
|
108
|
+
process = multiprocessing.Process(
|
109
|
+
target=subprocess_main,
|
110
|
+
args=(
|
111
|
+
config.process_id,
|
112
|
+
config.redis_url,
|
113
|
+
config.redis_prefix,
|
114
|
+
config.queues,
|
115
|
+
config.tasks,
|
116
|
+
config.concurrency,
|
117
|
+
config.prefetch_multiplier,
|
118
|
+
config.max_connections,
|
119
|
+
config.consumer_strategy,
|
120
|
+
config.consumer_config,
|
121
|
+
config.worker_id,
|
122
|
+
config.worker_key,
|
123
|
+
self.shutdown_event
|
124
|
+
),
|
125
|
+
name=f"JetTask-Worker-{process_id}"
|
126
|
+
)
|
127
|
+
process.start()
|
128
|
+
|
129
|
+
|
130
|
+
logger.info(f"Started process #{process_id} (PID: {process.pid})")
|
131
|
+
return process
|
132
|
+
|
133
|
+
def _restart_process(self, process_id: int):
|
134
|
+
"""重启失败的进程"""
|
135
|
+
if self.shutdown_event.is_set():
|
136
|
+
return
|
137
|
+
|
138
|
+
restart_count = self._restart_counts.get(process_id, 0)
|
139
|
+
if restart_count >= self._max_restart_attempts:
|
140
|
+
logger.error(f"Process #{process_id} exceeded max restart attempts")
|
141
|
+
return
|
142
|
+
|
143
|
+
self._restart_counts[process_id] = restart_count + 1
|
144
|
+
delay = self._restart_delay * (2 ** restart_count)
|
145
|
+
|
146
|
+
logger.info(
|
147
|
+
f"Restarting process #{process_id} (attempt {restart_count + 1}) after {delay}s"
|
148
|
+
)
|
149
|
+
time.sleep(delay)
|
150
|
+
|
151
|
+
config = self.process_configs[process_id]
|
152
|
+
process = self._start_process(process_id, config)
|
153
|
+
self.processes[process_id] = process
|
154
|
+
|
155
|
+
def _monitor_processes(self) -> int:
|
156
|
+
"""监控进程健康状态"""
|
157
|
+
alive_count = 0
|
158
|
+
|
159
|
+
for process_id, process in list(self.processes.items()):
|
160
|
+
if process.is_alive():
|
161
|
+
alive_count += 1
|
162
|
+
self._restart_counts[process_id] = 0 # 重置重启计数
|
163
|
+
else:
|
164
|
+
exit_code = process.exitcode
|
165
|
+
|
166
|
+
if self.shutdown_event.is_set():
|
167
|
+
logger.debug(f"Process #{process_id} stopped during shutdown")
|
168
|
+
elif exit_code in (-15, -2): # SIGTERM, SIGINT
|
169
|
+
logger.info(f"Process #{process_id} received termination signal")
|
170
|
+
self.shutdown_event.set()
|
171
|
+
elif exit_code == 0:
|
172
|
+
if not self._main_received_signal:
|
173
|
+
# 检查是否所有进程都停止了
|
174
|
+
all_stopped = all(not p.is_alive() for p in self.processes.values())
|
175
|
+
if all_stopped:
|
176
|
+
logger.info("All processes stopped simultaneously")
|
177
|
+
self.shutdown_event.set()
|
178
|
+
else:
|
179
|
+
logger.warning(f"Process #{process_id} stopped unexpectedly")
|
180
|
+
self._restart_process(process_id)
|
181
|
+
else:
|
182
|
+
logger.error(f"Process #{process_id} exited with code {exit_code}")
|
183
|
+
self._restart_process(process_id)
|
184
|
+
|
185
|
+
return alive_count
|
186
|
+
|
187
|
+
def start(
|
188
|
+
self,
|
189
|
+
queues: List[str],
|
190
|
+
prefetch_multiplier: int = 100,
|
191
|
+
worker_ids: list = None,
|
192
|
+
):
|
193
|
+
"""启动所有进程
|
194
|
+
|
195
|
+
Args:
|
196
|
+
queues: 队列列表
|
197
|
+
prefetch_multiplier: 预取倍数
|
198
|
+
"""
|
199
|
+
logger.info(f"Starting {self.num_processes} worker processes")
|
200
|
+
|
201
|
+
# 设置主进程信号处理
|
202
|
+
def signal_handler(signum, frame):
|
203
|
+
logger.info(f"Main process received signal {signum}")
|
204
|
+
self._main_received_signal = True
|
205
|
+
self.shutdown_event.set()
|
206
|
+
|
207
|
+
signal.signal(signal.SIGINT, signal_handler)
|
208
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
209
|
+
|
210
|
+
try:
|
211
|
+
# 启动所有进程
|
212
|
+
for i, (worker_id, worker_key) in enumerate(worker_ids):
|
213
|
+
config = self._create_process_config(i, queues, prefetch_multiplier)
|
214
|
+
if worker_id:
|
215
|
+
config.worker_id = worker_id
|
216
|
+
config.worker_key = worker_key
|
217
|
+
|
218
|
+
self.process_configs[i] = config
|
219
|
+
process = self._start_process(i, config)
|
220
|
+
self.processes[i] = process
|
221
|
+
|
222
|
+
time.sleep(0.1) # 错开启动时间
|
223
|
+
|
224
|
+
logger.info(f"All {self.num_processes} processes started")
|
225
|
+
|
226
|
+
# 监控循环
|
227
|
+
while not self.shutdown_event.is_set():
|
228
|
+
alive_count = self._monitor_processes()
|
229
|
+
|
230
|
+
if alive_count == 0:
|
231
|
+
if self._main_received_signal or self.shutdown_event.is_set():
|
232
|
+
logger.info("All processes stopped during shutdown")
|
233
|
+
else:
|
234
|
+
logger.error("All processes stopped unexpectedly")
|
235
|
+
break
|
236
|
+
|
237
|
+
time.sleep(self._monitor_interval)
|
238
|
+
|
239
|
+
except KeyboardInterrupt:
|
240
|
+
logger.info("Main process received KeyboardInterrupt")
|
241
|
+
self._main_received_signal = True
|
242
|
+
self.shutdown_event.set()
|
243
|
+
except Exception as e:
|
244
|
+
logger.error(f"ProcessOrchestrator error: {e}", exc_info=True)
|
245
|
+
finally:
|
246
|
+
self.shutdown()
|
247
|
+
|
248
|
+
def shutdown(self):
|
249
|
+
"""关闭所有进程"""
|
250
|
+
if self._shutdown_called:
|
251
|
+
return
|
252
|
+
self._shutdown_called = True
|
253
|
+
|
254
|
+
logger.info("Shutting down ProcessOrchestrator")
|
255
|
+
|
256
|
+
self.shutdown_event.set()
|
257
|
+
|
258
|
+
# 强制杀死所有进程
|
259
|
+
for process_id, process in self.processes.items():
|
260
|
+
if process.is_alive():
|
261
|
+
logger.info(f"Force killing process #{process_id} (PID: {process.pid})")
|
262
|
+
try:
|
263
|
+
os.kill(process.pid, signal.SIGKILL)
|
264
|
+
except Exception as e:
|
265
|
+
logger.warning(f"Error killing process #{process_id}: {e}")
|
266
|
+
try:
|
267
|
+
process.kill()
|
268
|
+
except:
|
269
|
+
pass
|
270
|
+
|
271
|
+
# 等待进程退出(最多1秒)
|
272
|
+
start_time = time.time()
|
273
|
+
for process_id, process in self.processes.items():
|
274
|
+
remaining = max(0, 1.0 - (time.time() - start_time))
|
275
|
+
try:
|
276
|
+
process.join(timeout=remaining)
|
277
|
+
except:
|
278
|
+
pass
|
279
|
+
|
280
|
+
if process.is_alive():
|
281
|
+
logger.warning(f"Process #{process_id} still alive after SIGKILL")
|
282
|
+
|
283
|
+
self.processes.clear()
|
284
|
+
self.process_configs.clear()
|
285
|
+
self._restart_counts.clear()
|
286
|
+
|
287
|
+
logger.info("ProcessOrchestrator shutdown complete")
|
288
|
+
|
289
|
+
|
290
|
+
__all__ = ['ProcessOrchestrator', 'ProcessConfig']
|