jettask 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +60 -2
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
- jettask-0.2.20.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.18.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
jettask/executors/base.py
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
from collections import deque
|
2
|
-
from abc import ABC, abstractmethod
|
3
|
-
import time
|
4
|
-
from typing import TYPE_CHECKING
|
5
|
-
|
6
|
-
from .common import CommonExecutorMixin
|
7
|
-
|
8
|
-
if TYPE_CHECKING:
|
9
|
-
from ..core.app import Jettask
|
10
|
-
|
11
|
-
|
12
|
-
class BaseExecutor(CommonExecutorMixin, ABC):
|
13
|
-
"""Base class for all executors"""
|
14
|
-
|
15
|
-
def __init__(self, event_queue: deque, app: "Jettask", concurrency: int = 1) -> None:
|
16
|
-
self.event_queue = event_queue
|
17
|
-
self.app = app
|
18
|
-
self.concurrency = concurrency
|
19
|
-
self.last_refresh_pending_time = 0
|
20
|
-
self.pedding_count = 0
|
21
|
-
self.batch_counter = 0
|
22
|
-
|
23
|
-
def logic(self, *args, **kwargs):
|
24
|
-
"""Process a single task"""
|
25
|
-
pass
|
26
|
-
|
27
|
-
@abstractmethod
|
28
|
-
def loop(self):
|
29
|
-
"""Main loop for the executor"""
|
30
|
-
pass
|
jettask/executors/common.py
DELETED
@@ -1,148 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
import traceback
|
3
|
-
from ..utils.serializer import dumps_str, loads_str
|
4
|
-
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
|
5
|
-
from ..utils.traceback_filter import filter_framework_traceback
|
6
|
-
|
7
|
-
if TYPE_CHECKING:
|
8
|
-
from ..core.app import Jettask
|
9
|
-
from ..core.task import Task
|
10
|
-
|
11
|
-
|
12
|
-
class CommonExecutorMixin:
|
13
|
-
"""Mixin class containing common functionality for all executors"""
|
14
|
-
|
15
|
-
app: "Jettask"
|
16
|
-
last_refresh_pending_time: float
|
17
|
-
pedding_count: int
|
18
|
-
|
19
|
-
def parse_task_data(self, data: bytes) -> Tuple[str, str, str, list, dict]:
|
20
|
-
"""Parse task data from bytes"""
|
21
|
-
data_str = data.decode("utf-8")
|
22
|
-
parts = data_str.split("_____", 4)
|
23
|
-
event_id = parts[0]
|
24
|
-
task_name = parts[1]
|
25
|
-
trigger_time = parts[2]
|
26
|
-
args = loads_str(parts[3]) if parts[3] else []
|
27
|
-
kwargs = loads_str(parts[4]) if parts[4] else {}
|
28
|
-
return event_id, task_name, trigger_time, args, kwargs
|
29
|
-
|
30
|
-
def get_task(self, task_name: str) -> Optional["Task"]:
|
31
|
-
"""Get task by name"""
|
32
|
-
return self.app.get_task_by_name(task_name)
|
33
|
-
|
34
|
-
def handle_task_error(self, event_id: str, task_name: str, error: Exception) -> Dict[str, Any]:
|
35
|
-
"""Handle task execution error"""
|
36
|
-
# 使用过滤后的堆栈信息
|
37
|
-
error_msg = filter_framework_traceback()
|
38
|
-
result = {
|
39
|
-
"status": "error",
|
40
|
-
"task_name": task_name,
|
41
|
-
"message": str(error),
|
42
|
-
"traceback": error_msg
|
43
|
-
}
|
44
|
-
self.app.set_task_status(event_id, "error")
|
45
|
-
self.app.set_data(event_id, dumps_str(result))
|
46
|
-
return result
|
47
|
-
|
48
|
-
def handle_task_success(self, event_id: str, task_name: str, result: Any) -> Dict[str, Any]:
|
49
|
-
"""Handle successful task execution"""
|
50
|
-
result_data = {
|
51
|
-
"status": "success",
|
52
|
-
"task_name": task_name,
|
53
|
-
"result": result
|
54
|
-
}
|
55
|
-
self.app.set_task_status(event_id, "success")
|
56
|
-
self.app.set_data(event_id, dumps_str(result_data))
|
57
|
-
return result_data
|
58
|
-
|
59
|
-
def ack_message(self, data: bytes) -> None:
|
60
|
-
"""Acknowledge message processing"""
|
61
|
-
self.app.ep.ack(data)
|
62
|
-
|
63
|
-
def get_routing_data(self, kwargs: dict) -> Tuple[Optional[str], Optional[str]]:
|
64
|
-
"""Extract routing data from kwargs"""
|
65
|
-
routing_key = kwargs.pop("routing_key", None)
|
66
|
-
aggregation_key = kwargs.pop("aggregation_key", None)
|
67
|
-
return routing_key, aggregation_key
|
68
|
-
|
69
|
-
def should_process_routing(self, routing_key: Optional[str], aggregation_key: Optional[str]) -> bool:
|
70
|
-
"""Check if task should be processed based on routing"""
|
71
|
-
if routing_key:
|
72
|
-
# Check solo running state
|
73
|
-
if self.app.is_solo_running_by_aggregation_key(aggregation_key):
|
74
|
-
return False
|
75
|
-
# Set solo running state
|
76
|
-
self.app.set_solo_running_by_aggregation_key(aggregation_key)
|
77
|
-
return True
|
78
|
-
|
79
|
-
def clear_routing_state(self, routing_key: Optional[str], aggregation_key: Optional[str]) -> None:
|
80
|
-
"""Clear routing state after task completion"""
|
81
|
-
if routing_key:
|
82
|
-
self.app.clear_solo_running_by_aggregation_key(aggregation_key)
|
83
|
-
|
84
|
-
def handle_urgent_retry(self, kwargs: dict, event_id: str, task_name: str,
|
85
|
-
trigger_time: str, args: list) -> bool:
|
86
|
-
"""Handle urgent retry logic"""
|
87
|
-
urgent_retry = kwargs.pop("urgent_retry", None)
|
88
|
-
if urgent_retry:
|
89
|
-
delay = kwargs.pop("delay", 0)
|
90
|
-
self.app.delay(
|
91
|
-
task_name=task_name,
|
92
|
-
delay=delay,
|
93
|
-
args=args,
|
94
|
-
kwargs=kwargs,
|
95
|
-
task_id=event_id,
|
96
|
-
trigger_time=trigger_time,
|
97
|
-
producer_type="urgent_retry",
|
98
|
-
)
|
99
|
-
self.app.set_task_status(event_id, "retry")
|
100
|
-
return True
|
101
|
-
return False
|
102
|
-
|
103
|
-
def get_pending_count(self) -> int:
|
104
|
-
"""Get pending count with caching"""
|
105
|
-
current_time = time.time()
|
106
|
-
if current_time - self.last_refresh_pending_time > 1:
|
107
|
-
self.pedding_count = self.app.ep.get_pending_count()
|
108
|
-
self.last_refresh_pending_time = current_time
|
109
|
-
return self.pedding_count
|
110
|
-
|
111
|
-
def execute_task_lifecycle(self, task: "Task", event_id: str, trigger_time: str,
|
112
|
-
args: list, kwargs: dict, is_async: bool = False):
|
113
|
-
"""Execute task with lifecycle methods"""
|
114
|
-
# This method will be implemented differently for sync/async executors
|
115
|
-
raise NotImplementedError("Subclasses must implement execute_task_lifecycle")
|
116
|
-
|
117
|
-
def format_batch_data(self, data: bytes) -> Tuple[str, str, list, dict]:
|
118
|
-
"""Parse batch task data"""
|
119
|
-
data_str = data.decode("utf-8")
|
120
|
-
parts = data_str.split("_____", 3)
|
121
|
-
batch_id = parts[0]
|
122
|
-
task_name = parts[1]
|
123
|
-
args = loads_str(parts[2]) if parts[2] else []
|
124
|
-
kwargs = loads_str(parts[3]) if parts[3] else {}
|
125
|
-
return batch_id, task_name, args, kwargs
|
126
|
-
|
127
|
-
def extract_batch_params(self, kwargs: dict) -> Tuple[list, list, str, Optional[str], Optional[str]]:
|
128
|
-
"""Extract batch-specific parameters from kwargs"""
|
129
|
-
event_ids = kwargs.pop("event_ids", [])
|
130
|
-
trigger_times = kwargs.pop("trigger_times", [])
|
131
|
-
producer_type = kwargs.pop("producer_type", "normal")
|
132
|
-
routing_key = kwargs.pop("routing_key", None)
|
133
|
-
aggregation_key = kwargs.pop("aggregation_key", None)
|
134
|
-
return event_ids, trigger_times, producer_type, routing_key, aggregation_key
|
135
|
-
|
136
|
-
def ack_batch_events(self, event_ids: list) -> None:
|
137
|
-
"""Acknowledge multiple events in batch"""
|
138
|
-
for event_id in event_ids:
|
139
|
-
data = f"{event_id}_____batch_ack".encode()
|
140
|
-
self.ack_message(data)
|
141
|
-
|
142
|
-
def update_batch_status(self, event_ids: list, status: str, result: Any = None) -> None:
|
143
|
-
"""Update status for multiple events in batch"""
|
144
|
-
result_str = dumps_str(result) if result is not None else None
|
145
|
-
for event_id in event_ids:
|
146
|
-
self.app.set_task_status(event_id, status)
|
147
|
-
if result_str:
|
148
|
-
self.app.set_data(event_id, result_str)
|
@@ -1,309 +0,0 @@
|
|
1
|
-
import multiprocessing
|
2
|
-
import logging
|
3
|
-
import time
|
4
|
-
import os
|
5
|
-
import signal
|
6
|
-
from typing import Dict, List
|
7
|
-
from multiprocessing import Process, Event
|
8
|
-
from dataclasses import dataclass
|
9
|
-
|
10
|
-
from .base import BaseExecutor
|
11
|
-
|
12
|
-
logger = logging.getLogger('app')
|
13
|
-
|
14
|
-
|
15
|
-
@dataclass
|
16
|
-
class ProcessConfig:
|
17
|
-
"""Configuration for AsyncIO executor process"""
|
18
|
-
executor_id: int
|
19
|
-
redis_url: str
|
20
|
-
queues: List[str]
|
21
|
-
app_tasks: Dict
|
22
|
-
consumer_strategy: str
|
23
|
-
consumer_config: Dict
|
24
|
-
max_connections: int
|
25
|
-
prefetch_multiplier: int
|
26
|
-
concurrency_per_process: int = 10000
|
27
|
-
|
28
|
-
|
29
|
-
class MultiAsyncioExecutor(BaseExecutor):
|
30
|
-
"""
|
31
|
-
Multi-asyncio executor that manages multiple AsyncioExecutor instances
|
32
|
-
Each instance runs in its own process with its own event loop
|
33
|
-
|
34
|
-
Features:
|
35
|
-
- Automatic process restart on failure
|
36
|
-
- Graceful shutdown with timeout
|
37
|
-
- Process health monitoring
|
38
|
-
- Configurable concurrency per process
|
39
|
-
"""
|
40
|
-
|
41
|
-
def __init__(self, event_queue, app, concurrency=3):
|
42
|
-
super().__init__(event_queue, app, concurrency)
|
43
|
-
self.processes: Dict[int, Process] = {}
|
44
|
-
self.process_configs: Dict[int, ProcessConfig] = {}
|
45
|
-
self.shutdown_event = Event()
|
46
|
-
self._monitor_interval = 1 # seconds
|
47
|
-
self._status_log_interval = 30 # seconds
|
48
|
-
self._restart_delay = 2 # seconds
|
49
|
-
self._max_restart_attempts = 3
|
50
|
-
self._restart_counts: Dict[int, int] = {}
|
51
|
-
self._main_received_signal = False # Track if main process received signal
|
52
|
-
|
53
|
-
def logic(self):
|
54
|
-
"""
|
55
|
-
Logic method for BaseExecutor interface.
|
56
|
-
Not used in MultiAsyncioExecutor as it delegates to AsyncioExecutor instances.
|
57
|
-
"""
|
58
|
-
pass
|
59
|
-
|
60
|
-
@staticmethod
|
61
|
-
def _run_asyncio_executor(config: ProcessConfig, shutdown_event):
|
62
|
-
"""Run an AsyncioExecutor in a separate process"""
|
63
|
-
# Set process name for debugging
|
64
|
-
multiprocessing.current_process().name = f"AsyncioExecutor-{config.executor_id}"
|
65
|
-
|
66
|
-
# Configure logging for subprocess
|
67
|
-
logging.basicConfig(
|
68
|
-
level=logging.INFO,
|
69
|
-
format=f"%(asctime)s - %(levelname)s - [Executor-{config.executor_id}] - %(message)s",
|
70
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
71
|
-
)
|
72
|
-
logger = logging.getLogger('app')
|
73
|
-
|
74
|
-
# Handle signals gracefully
|
75
|
-
def signal_handler(signum, _frame):
|
76
|
-
logger.info(f"AsyncioExecutor #{config.executor_id} received signal {signum}")
|
77
|
-
shutdown_event.set()
|
78
|
-
|
79
|
-
signal.signal(signal.SIGINT, signal_handler)
|
80
|
-
signal.signal(signal.SIGTERM, signal_handler)
|
81
|
-
|
82
|
-
try:
|
83
|
-
# Import inside process to avoid pickle issues
|
84
|
-
from ..core.app import Jettask
|
85
|
-
|
86
|
-
# Create app instance for this process
|
87
|
-
app = Jettask(
|
88
|
-
redis_url=config.redis_url,
|
89
|
-
max_connections=config.max_connections,
|
90
|
-
consumer_strategy=config.consumer_strategy,
|
91
|
-
consumer_config=config.consumer_config,
|
92
|
-
tasks=config.app_tasks
|
93
|
-
)
|
94
|
-
|
95
|
-
logger.info(f"Started AsyncioExecutor #{config.executor_id} in process {os.getpid()}")
|
96
|
-
|
97
|
-
# Check shutdown event before starting
|
98
|
-
if shutdown_event.is_set():
|
99
|
-
logger.info(f"AsyncioExecutor #{config.executor_id} shutdown before start")
|
100
|
-
return
|
101
|
-
|
102
|
-
# Start the executor
|
103
|
-
app._start(
|
104
|
-
execute_type="asyncio",
|
105
|
-
queues=config.queues,
|
106
|
-
concurrency=config.concurrency_per_process,
|
107
|
-
prefetch_multiplier=config.prefetch_multiplier
|
108
|
-
)
|
109
|
-
|
110
|
-
except KeyboardInterrupt:
|
111
|
-
logger.info(f"AsyncioExecutor #{config.executor_id} received interrupt")
|
112
|
-
except Exception as e:
|
113
|
-
logger.error(f"AsyncioExecutor #{config.executor_id} error: {e}")
|
114
|
-
import traceback
|
115
|
-
traceback.print_exc()
|
116
|
-
raise # Re-raise to trigger restart mechanism
|
117
|
-
finally:
|
118
|
-
logger.info(f"AsyncioExecutor #{config.executor_id} stopped")
|
119
|
-
|
120
|
-
def _create_process_config(self, executor_id: int, queues: List[str],
|
121
|
-
prefetch_multiplier: int) -> ProcessConfig:
|
122
|
-
"""Create configuration for a process"""
|
123
|
-
return ProcessConfig(
|
124
|
-
executor_id=executor_id,
|
125
|
-
redis_url=self.app.redis_url,
|
126
|
-
queues=queues,
|
127
|
-
app_tasks=self.app._tasks,
|
128
|
-
consumer_strategy=self.app.consumer_strategy,
|
129
|
-
consumer_config=self.app.consumer_config,
|
130
|
-
max_connections=self.app.max_connections,
|
131
|
-
prefetch_multiplier=prefetch_multiplier,
|
132
|
-
concurrency_per_process=10000
|
133
|
-
)
|
134
|
-
|
135
|
-
def _start_process(self, executor_id: int, config: ProcessConfig) -> Process:
|
136
|
-
"""Start a single AsyncioExecutor process"""
|
137
|
-
process = Process(
|
138
|
-
target=self._run_asyncio_executor,
|
139
|
-
args=(config, self.shutdown_event),
|
140
|
-
name=f"AsyncioExecutor-{executor_id}"
|
141
|
-
)
|
142
|
-
process.start()
|
143
|
-
logger.info(f"Started AsyncioExecutor process #{executor_id} (PID: {process.pid})")
|
144
|
-
return process
|
145
|
-
|
146
|
-
def _restart_process(self, executor_id: int):
|
147
|
-
"""Restart a failed process with exponential backoff"""
|
148
|
-
if self.shutdown_event.is_set():
|
149
|
-
return
|
150
|
-
|
151
|
-
restart_count = self._restart_counts.get(executor_id, 0)
|
152
|
-
if restart_count >= self._max_restart_attempts:
|
153
|
-
logger.error(f"Process #{executor_id} exceeded max restart attempts ({self._max_restart_attempts})")
|
154
|
-
return
|
155
|
-
|
156
|
-
self._restart_counts[executor_id] = restart_count + 1
|
157
|
-
delay = self._restart_delay * (2 ** restart_count) # Exponential backoff
|
158
|
-
|
159
|
-
logger.info(f"Restarting process #{executor_id} (attempt {restart_count + 1}/{self._max_restart_attempts}) "
|
160
|
-
f"after {delay}s delay")
|
161
|
-
time.sleep(delay)
|
162
|
-
|
163
|
-
config = self.process_configs[executor_id]
|
164
|
-
process = self._start_process(executor_id, config)
|
165
|
-
self.processes[executor_id] = process
|
166
|
-
|
167
|
-
def _monitor_processes(self):
|
168
|
-
"""Monitor process health and restart failed processes"""
|
169
|
-
alive_count = 0
|
170
|
-
for executor_id, process in list(self.processes.items()):
|
171
|
-
if process.is_alive():
|
172
|
-
alive_count += 1
|
173
|
-
# Reset restart count for healthy processes
|
174
|
-
self._restart_counts[executor_id] = 0
|
175
|
-
else:
|
176
|
-
exit_code = process.exitcode
|
177
|
-
# 如果正在关闭,不要重启进程
|
178
|
-
if self.shutdown_event.is_set():
|
179
|
-
logger.info(f"Process #{executor_id} stopped during shutdown")
|
180
|
-
# 如果进程收到SIGTERM(exit_code == -15)或SIGINT(exit_code == -2),认为是正常关闭
|
181
|
-
elif exit_code == -15 or exit_code == -2:
|
182
|
-
logger.info(f"Process #{executor_id} received termination signal, marking as shutdown")
|
183
|
-
# 设置shutdown事件,避免重启其他进程
|
184
|
-
self.shutdown_event.set()
|
185
|
-
# 如果所有进程都同时停止(正常退出),认为是收到了关闭信号
|
186
|
-
elif exit_code == 0 or exit_code is None:
|
187
|
-
# 如果主进程已经收到信号,不要重启
|
188
|
-
if self._main_received_signal:
|
189
|
-
logger.info(f"Process #{executor_id} stopped after main received signal")
|
190
|
-
else:
|
191
|
-
# 检查是否所有进程都停止了
|
192
|
-
all_stopped = True
|
193
|
-
for _, p in self.processes.items():
|
194
|
-
if p.is_alive():
|
195
|
-
all_stopped = False
|
196
|
-
break
|
197
|
-
|
198
|
-
if all_stopped:
|
199
|
-
logger.info(f"All processes stopped simultaneously, marking as shutdown")
|
200
|
-
self.shutdown_event.set()
|
201
|
-
else:
|
202
|
-
logger.warning(f"Process #{executor_id} stopped unexpectedly with exit code {exit_code}")
|
203
|
-
self._restart_process(executor_id)
|
204
|
-
else:
|
205
|
-
logger.warning(f"Process #{executor_id} exited with code {exit_code}")
|
206
|
-
self._restart_process(executor_id)
|
207
|
-
|
208
|
-
return alive_count
|
209
|
-
|
210
|
-
def loop(self):
|
211
|
-
"""Main loop that starts and monitors AsyncioExecutor processes"""
|
212
|
-
logger.info(f"Starting MultiAsyncioExecutor with {self.concurrency} asyncio processes")
|
213
|
-
|
214
|
-
# Set up signal handler to track when main process receives signal
|
215
|
-
def signal_handler(signum, frame):
|
216
|
-
logger.info(f"MultiAsyncioExecutor received signal {signum}")
|
217
|
-
self._main_received_signal = True
|
218
|
-
self.shutdown_event.set()
|
219
|
-
|
220
|
-
# Register signal handlers
|
221
|
-
signal.signal(signal.SIGINT, signal_handler)
|
222
|
-
signal.signal(signal.SIGTERM, signal_handler)
|
223
|
-
|
224
|
-
try:
|
225
|
-
# Get configuration
|
226
|
-
queues = getattr(self.app.ep, 'queues', ['robust_bench'])
|
227
|
-
prefetch_multiplier = getattr(self, 'prefetch_multiplier', 100)
|
228
|
-
|
229
|
-
# Start AsyncioExecutor processes
|
230
|
-
for i in range(self.concurrency):
|
231
|
-
config = self._create_process_config(i, queues, prefetch_multiplier)
|
232
|
-
self.process_configs[i] = config
|
233
|
-
|
234
|
-
process = self._start_process(i, config)
|
235
|
-
self.processes[i] = process
|
236
|
-
|
237
|
-
# Small delay to avoid thundering herd
|
238
|
-
time.sleep(0.1)
|
239
|
-
|
240
|
-
logger.info(f"All {self.concurrency} AsyncioExecutor processes started")
|
241
|
-
|
242
|
-
# Monitor executor processes
|
243
|
-
last_status_log = time.time()
|
244
|
-
|
245
|
-
while not self.shutdown_event.is_set():
|
246
|
-
# Monitor process health
|
247
|
-
alive_count = self._monitor_processes()
|
248
|
-
|
249
|
-
if alive_count == 0:
|
250
|
-
if self._main_received_signal or self.shutdown_event.is_set():
|
251
|
-
logger.info("All AsyncioExecutor processes have stopped during shutdown")
|
252
|
-
else:
|
253
|
-
logger.error("All AsyncioExecutor processes have stopped unexpectedly")
|
254
|
-
break
|
255
|
-
|
256
|
-
# Log status periodically
|
257
|
-
current_time = time.time()
|
258
|
-
if current_time - last_status_log > self._status_log_interval:
|
259
|
-
dead_count = self.concurrency - alive_count
|
260
|
-
logger.info(f"MultiAsyncioExecutor status - Active: {alive_count}/{self.concurrency}, "
|
261
|
-
f"Dead: {dead_count}, Restart attempts: {sum(self._restart_counts.values())}")
|
262
|
-
last_status_log = current_time
|
263
|
-
|
264
|
-
time.sleep(self._monitor_interval)
|
265
|
-
|
266
|
-
except KeyboardInterrupt:
|
267
|
-
logger.info("MultiAsyncioExecutor received KeyboardInterrupt")
|
268
|
-
self._main_received_signal = True
|
269
|
-
self.shutdown_event.set()
|
270
|
-
except Exception as e:
|
271
|
-
logger.error(f"MultiAsyncioExecutor error: {e}")
|
272
|
-
import traceback
|
273
|
-
traceback.print_exc()
|
274
|
-
finally:
|
275
|
-
self.shutdown()
|
276
|
-
|
277
|
-
def shutdown(self):
|
278
|
-
"""Gracefully shutdown all executor processes"""
|
279
|
-
logger.info("Shutting down MultiAsyncioExecutor...")
|
280
|
-
|
281
|
-
# Signal shutdown - this prevents any restart attempts
|
282
|
-
self.shutdown_event.set()
|
283
|
-
|
284
|
-
# 快速关闭模式 - 减少等待时间
|
285
|
-
shutdown_timeout = 1.0 # 减少到1秒
|
286
|
-
terminate_timeout = 0.5 # 减少到0.5秒
|
287
|
-
|
288
|
-
# 先发送终止信号给所有进程
|
289
|
-
for executor_id, process in self.processes.items():
|
290
|
-
if process.is_alive():
|
291
|
-
logger.info(f"Sending TERM signal to process #{executor_id} (PID: {process.pid})")
|
292
|
-
process.terminate()
|
293
|
-
|
294
|
-
# 短暂等待所有进程自行退出
|
295
|
-
time.sleep(shutdown_timeout)
|
296
|
-
|
297
|
-
# 强制杀死仍在运行的进程
|
298
|
-
for executor_id, process in self.processes.items():
|
299
|
-
if process.is_alive():
|
300
|
-
logger.warning(f"Process #{executor_id} did not terminate, killing...")
|
301
|
-
process.kill()
|
302
|
-
# 不再等待join,让操作系统清理
|
303
|
-
|
304
|
-
# Clear process tracking
|
305
|
-
self.processes.clear()
|
306
|
-
self.process_configs.clear()
|
307
|
-
self._restart_counts.clear()
|
308
|
-
|
309
|
-
logger.info("MultiAsyncioExecutor shutdown complete")
|