digitalkin 0.3.0rc1__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- digitalkin/__version__.py +1 -1
- digitalkin/core/common/__init__.py +9 -0
- digitalkin/core/common/factories.py +156 -0
- digitalkin/core/job_manager/base_job_manager.py +128 -28
- digitalkin/core/job_manager/single_job_manager.py +80 -25
- digitalkin/core/job_manager/taskiq_broker.py +114 -19
- digitalkin/core/job_manager/taskiq_job_manager.py +291 -39
- digitalkin/core/task_manager/base_task_manager.py +539 -0
- digitalkin/core/task_manager/local_task_manager.py +108 -0
- digitalkin/core/task_manager/remote_task_manager.py +87 -0
- digitalkin/core/task_manager/surrealdb_repository.py +43 -4
- digitalkin/core/task_manager/task_executor.py +249 -0
- digitalkin/core/task_manager/task_session.py +107 -19
- digitalkin/grpc_servers/module_server.py +2 -2
- digitalkin/grpc_servers/module_servicer.py +21 -12
- digitalkin/grpc_servers/registry_server.py +1 -1
- digitalkin/grpc_servers/registry_servicer.py +4 -4
- digitalkin/grpc_servers/utils/grpc_error_handler.py +53 -0
- digitalkin/models/core/task_monitor.py +17 -0
- digitalkin/models/grpc_servers/models.py +4 -4
- digitalkin/models/module/module_context.py +5 -0
- digitalkin/models/module/module_types.py +304 -16
- digitalkin/modules/_base_module.py +66 -28
- digitalkin/services/cost/grpc_cost.py +8 -41
- digitalkin/services/filesystem/grpc_filesystem.py +9 -38
- digitalkin/services/services_config.py +11 -0
- digitalkin/services/services_models.py +3 -1
- digitalkin/services/setup/default_setup.py +5 -6
- digitalkin/services/setup/grpc_setup.py +51 -14
- digitalkin/services/storage/grpc_storage.py +2 -2
- digitalkin/services/user_profile/__init__.py +12 -0
- digitalkin/services/user_profile/default_user_profile.py +55 -0
- digitalkin/services/user_profile/grpc_user_profile.py +69 -0
- digitalkin/services/user_profile/user_profile_strategy.py +40 -0
- digitalkin/utils/__init__.py +28 -0
- digitalkin/utils/dynamic_schema.py +483 -0
- {digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/METADATA +9 -29
- {digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/RECORD +42 -30
- modules/dynamic_setup_module.py +362 -0
- digitalkin/core/task_manager/task_manager.py +0 -439
- {digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/WHEEL +0 -0
- {digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,439 +0,0 @@
|
|
|
1
|
-
"""Task manager with comprehensive lifecycle management."""
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import contextlib
|
|
5
|
-
import datetime
|
|
6
|
-
from collections.abc import Coroutine
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
from digitalkin.core.task_manager.surrealdb_repository import SurrealDBConnection
|
|
10
|
-
from digitalkin.core.task_manager.task_session import TaskSession
|
|
11
|
-
from digitalkin.logger import logger
|
|
12
|
-
from digitalkin.models.core.task_monitor import SignalMessage, SignalType, TaskStatus
|
|
13
|
-
from digitalkin.modules._base_module import BaseModule
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class TaskManager:
|
|
17
|
-
"""Task manager with comprehensive lifecycle management.
|
|
18
|
-
|
|
19
|
-
Handle the tasks creation, execution, monitoring, signaling, and cancellation.
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
tasks: dict[str, asyncio.Task]
|
|
23
|
-
tasks_sessions: dict[str, TaskSession]
|
|
24
|
-
channel: SurrealDBConnection
|
|
25
|
-
default_timeout: float
|
|
26
|
-
max_concurrent_tasks: int
|
|
27
|
-
_shutdown_event: asyncio.Event
|
|
28
|
-
|
|
29
|
-
def __init__(self, default_timeout: float = 10.0, max_concurrent_tasks: int = 100) -> None:
|
|
30
|
-
"""Defining task manager properties."""
|
|
31
|
-
self.tasks = {}
|
|
32
|
-
self.tasks_sessions = {}
|
|
33
|
-
self.default_timeout = default_timeout
|
|
34
|
-
self.max_concurrent_tasks = max_concurrent_tasks
|
|
35
|
-
self._shutdown_event = asyncio.Event()
|
|
36
|
-
|
|
37
|
-
logger.info(
|
|
38
|
-
"TaskManager initialized with max_concurrent_tasks: %d, default_timeout: %.1f",
|
|
39
|
-
max_concurrent_tasks,
|
|
40
|
-
default_timeout,
|
|
41
|
-
extra={
|
|
42
|
-
"max_concurrent_tasks": max_concurrent_tasks,
|
|
43
|
-
"default_timeout": default_timeout,
|
|
44
|
-
},
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
@property
|
|
48
|
-
def task_count(self) -> int:
|
|
49
|
-
"""Number of managed tasks."""
|
|
50
|
-
return len(self.tasks_sessions)
|
|
51
|
-
|
|
52
|
-
@property
|
|
53
|
-
def running_tasks(self) -> set[str]:
|
|
54
|
-
"""Get IDs of currently running tasks."""
|
|
55
|
-
return {task_id for task_id, task in self.tasks.items() if not task.done()}
|
|
56
|
-
|
|
57
|
-
async def _cleanup_task(self, task_id: str, mission_id: str) -> None:
|
|
58
|
-
"""Clean up task resources.
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
task_id (str): The ID of the task to clean up.
|
|
62
|
-
mission_id (str): The ID of the mission associated with the task.
|
|
63
|
-
"""
|
|
64
|
-
logger.debug(
|
|
65
|
-
"Cleaning up resources for task: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id}
|
|
66
|
-
)
|
|
67
|
-
if task_id in self.tasks_sessions:
|
|
68
|
-
await self.tasks_sessions[task_id].db.close()
|
|
69
|
-
# Remove from collections
|
|
70
|
-
|
|
71
|
-
async def _task_wrapper( # noqa: C901, PLR0915
|
|
72
|
-
self,
|
|
73
|
-
task_id: str,
|
|
74
|
-
mission_id: str,
|
|
75
|
-
coro: Coroutine[Any, Any, None],
|
|
76
|
-
session: TaskSession,
|
|
77
|
-
) -> asyncio.Task[None]:
|
|
78
|
-
"""Task wrapper that runs main, heartbeat, and listener concurrently.
|
|
79
|
-
|
|
80
|
-
The first to finish determines the outcome. Returns a Task that the
|
|
81
|
-
caller can await externally.
|
|
82
|
-
|
|
83
|
-
Returns:
|
|
84
|
-
asyncio.Task[None]: The supervisor task managing the lifecycle.
|
|
85
|
-
"""
|
|
86
|
-
|
|
87
|
-
async def signal_wrapper() -> None:
|
|
88
|
-
try:
|
|
89
|
-
await self.channel.create(
|
|
90
|
-
"tasks",
|
|
91
|
-
SignalMessage(
|
|
92
|
-
task_id=task_id,
|
|
93
|
-
mission_id=mission_id,
|
|
94
|
-
status=session.status,
|
|
95
|
-
action=SignalType.START,
|
|
96
|
-
).model_dump(),
|
|
97
|
-
)
|
|
98
|
-
await session.listen_signals()
|
|
99
|
-
except asyncio.CancelledError:
|
|
100
|
-
logger.debug("Signal listener cancelled", extra={"mission_id": mission_id, "task_id": task_id})
|
|
101
|
-
finally:
|
|
102
|
-
await self.channel.create(
|
|
103
|
-
"tasks",
|
|
104
|
-
SignalMessage(
|
|
105
|
-
task_id=task_id,
|
|
106
|
-
mission_id=mission_id,
|
|
107
|
-
status=session.status,
|
|
108
|
-
action=SignalType.STOP,
|
|
109
|
-
).model_dump(),
|
|
110
|
-
)
|
|
111
|
-
logger.info("Signal listener ended", extra={"mission_id": mission_id, "task_id": task_id})
|
|
112
|
-
|
|
113
|
-
async def heartbeat_wrapper() -> None:
|
|
114
|
-
try:
|
|
115
|
-
await session.generate_heartbeats()
|
|
116
|
-
except asyncio.CancelledError:
|
|
117
|
-
logger.debug("Signal listener cancelled", extra={"mission_id": mission_id, "task_id": task_id})
|
|
118
|
-
finally:
|
|
119
|
-
logger.info("Heartbeat task ended", extra={"mission_id": mission_id, "task_id": task_id})
|
|
120
|
-
|
|
121
|
-
async def supervisor() -> None:
|
|
122
|
-
session.started_at = datetime.datetime.now(datetime.timezone.utc)
|
|
123
|
-
session.status = TaskStatus.RUNNING
|
|
124
|
-
|
|
125
|
-
main_task = asyncio.create_task(coro, name=f"{task_id}_main")
|
|
126
|
-
hb_task = asyncio.create_task(heartbeat_wrapper(), name=f"{task_id}_heartbeat")
|
|
127
|
-
sig_task = asyncio.create_task(signal_wrapper(), name=f"{task_id}_listener")
|
|
128
|
-
|
|
129
|
-
try:
|
|
130
|
-
done, pending = await asyncio.wait(
|
|
131
|
-
[main_task, sig_task, hb_task],
|
|
132
|
-
return_when=asyncio.FIRST_COMPLETED,
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
# One task completed -> cancel the others
|
|
136
|
-
for t in pending:
|
|
137
|
-
t.cancel()
|
|
138
|
-
|
|
139
|
-
# Propagate exception/result from the finished task
|
|
140
|
-
completed = next(iter(done))
|
|
141
|
-
await completed
|
|
142
|
-
|
|
143
|
-
if completed is main_task:
|
|
144
|
-
session.status = TaskStatus.COMPLETED
|
|
145
|
-
elif completed is sig_task or (completed is hb_task and sig_task.done()):
|
|
146
|
-
logger.debug(f"Task cancelled due to signal {sig_task=}")
|
|
147
|
-
session.status = TaskStatus.CANCELLED
|
|
148
|
-
elif completed is hb_task:
|
|
149
|
-
session.status = TaskStatus.FAILED
|
|
150
|
-
msg = f"Heartbeat stopped for {task_id}"
|
|
151
|
-
raise RuntimeError(msg) # noqa: TRY301
|
|
152
|
-
|
|
153
|
-
except asyncio.CancelledError:
|
|
154
|
-
session.status = TaskStatus.CANCELLED
|
|
155
|
-
raise
|
|
156
|
-
except Exception:
|
|
157
|
-
session.status = TaskStatus.FAILED
|
|
158
|
-
raise
|
|
159
|
-
finally:
|
|
160
|
-
session.completed_at = datetime.datetime.now(datetime.timezone.utc)
|
|
161
|
-
# Ensure all tasks are cleaned up
|
|
162
|
-
for t in [main_task, hb_task, sig_task]:
|
|
163
|
-
if not t.done():
|
|
164
|
-
t.cancel()
|
|
165
|
-
await asyncio.gather(main_task, hb_task, sig_task, return_exceptions=True)
|
|
166
|
-
|
|
167
|
-
# Return the supervisor task to be awaited outside
|
|
168
|
-
return asyncio.create_task(supervisor(), name=f"{task_id}_supervisor")
|
|
169
|
-
|
|
170
|
-
async def create_task(
|
|
171
|
-
self,
|
|
172
|
-
task_id: str,
|
|
173
|
-
mission_id: str,
|
|
174
|
-
module: BaseModule,
|
|
175
|
-
coro: Coroutine[Any, Any, None],
|
|
176
|
-
heartbeat_interval: datetime.timedelta = datetime.timedelta(seconds=2),
|
|
177
|
-
connection_timeout: datetime.timedelta = datetime.timedelta(seconds=5),
|
|
178
|
-
) -> None:
|
|
179
|
-
"""Create and start a new managed task.
|
|
180
|
-
|
|
181
|
-
Raises:
|
|
182
|
-
ValueError: task_id duplicated
|
|
183
|
-
RuntimeError: task overload
|
|
184
|
-
"""
|
|
185
|
-
if task_id in self.tasks:
|
|
186
|
-
# close Coroutine during runtime
|
|
187
|
-
coro.close()
|
|
188
|
-
logger.warning(
|
|
189
|
-
"Task creation failed - task already exists: '%s'",
|
|
190
|
-
task_id,
|
|
191
|
-
extra={"mission_id": mission_id, "task_id": task_id},
|
|
192
|
-
)
|
|
193
|
-
msg = f"Task {task_id} already exists"
|
|
194
|
-
raise ValueError(msg)
|
|
195
|
-
|
|
196
|
-
if len(self.tasks) >= self.max_concurrent_tasks:
|
|
197
|
-
coro.close()
|
|
198
|
-
logger.error(
|
|
199
|
-
"Task creation failed - max concurrent tasks reached: %d",
|
|
200
|
-
self.max_concurrent_tasks,
|
|
201
|
-
extra={
|
|
202
|
-
"mission_id": mission_id,
|
|
203
|
-
"task_id": task_id,
|
|
204
|
-
"current_count": len(self.tasks),
|
|
205
|
-
"max_concurrent": self.max_concurrent_tasks,
|
|
206
|
-
},
|
|
207
|
-
)
|
|
208
|
-
msg = f"Maximum concurrent tasks ({self.max_concurrent_tasks}) reached"
|
|
209
|
-
raise RuntimeError(msg)
|
|
210
|
-
|
|
211
|
-
logger.info(
|
|
212
|
-
"Creating new task: '%s'",
|
|
213
|
-
task_id,
|
|
214
|
-
extra={
|
|
215
|
-
"mission_id": mission_id,
|
|
216
|
-
"task_id": task_id,
|
|
217
|
-
"heartbeat_interval": heartbeat_interval,
|
|
218
|
-
"connection_timeout": connection_timeout,
|
|
219
|
-
},
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
try:
|
|
223
|
-
# Initialize components
|
|
224
|
-
channel: SurrealDBConnection = SurrealDBConnection("task_manager", connection_timeout)
|
|
225
|
-
await channel.init_surreal_instance()
|
|
226
|
-
session = TaskSession(task_id, mission_id, channel, module, heartbeat_interval)
|
|
227
|
-
|
|
228
|
-
self.tasks_sessions[task_id] = session
|
|
229
|
-
|
|
230
|
-
# Create wrapper task
|
|
231
|
-
self.tasks[task_id] = asyncio.create_task(
|
|
232
|
-
self._task_wrapper(
|
|
233
|
-
task_id,
|
|
234
|
-
mission_id,
|
|
235
|
-
coro,
|
|
236
|
-
session,
|
|
237
|
-
),
|
|
238
|
-
name=task_id,
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
logger.info(
|
|
242
|
-
"Task created successfully: '%s'",
|
|
243
|
-
task_id,
|
|
244
|
-
extra={
|
|
245
|
-
"mission_id": mission_id,
|
|
246
|
-
"task_id": task_id,
|
|
247
|
-
"total_tasks": len(self.tasks),
|
|
248
|
-
},
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
except Exception as e:
|
|
252
|
-
logger.error(
|
|
253
|
-
"Failed to create task: '%s'",
|
|
254
|
-
task_id,
|
|
255
|
-
extra={"mission_id": mission_id, "task_id": task_id, "error": str(e)},
|
|
256
|
-
exc_info=True,
|
|
257
|
-
)
|
|
258
|
-
# Cleanup on failure
|
|
259
|
-
await self._cleanup_task(task_id, mission_id=mission_id)
|
|
260
|
-
raise
|
|
261
|
-
|
|
262
|
-
async def send_signal(self, task_id: str, mission_id: str, signal_type: str, payload: dict) -> bool:
|
|
263
|
-
"""Send signal to a specific task.
|
|
264
|
-
|
|
265
|
-
Returns:
|
|
266
|
-
bool: True if the task sent successfully the given signal, False otherwise.
|
|
267
|
-
"""
|
|
268
|
-
if task_id not in self.tasks_sessions:
|
|
269
|
-
logger.warning(
|
|
270
|
-
"Cannot send signal - task not found: '%s'",
|
|
271
|
-
task_id,
|
|
272
|
-
extra={"mission_id": mission_id, "task_id": task_id, "signal_type": signal_type},
|
|
273
|
-
)
|
|
274
|
-
return False
|
|
275
|
-
|
|
276
|
-
logger.info(
|
|
277
|
-
"Sending signal '%s' to task: '%s'",
|
|
278
|
-
signal_type,
|
|
279
|
-
task_id,
|
|
280
|
-
extra={"mission_id": mission_id, "task_id": task_id, "signal_type": signal_type, "payload": payload},
|
|
281
|
-
)
|
|
282
|
-
|
|
283
|
-
await self.channel.update("tasks", signal_type, payload)
|
|
284
|
-
return True
|
|
285
|
-
|
|
286
|
-
async def cancel_task(self, task_id: str, mission_id: str, timeout: float | None = None) -> bool:
|
|
287
|
-
"""Cancel a task with graceful shutdown and fallback.
|
|
288
|
-
|
|
289
|
-
Returns:
|
|
290
|
-
bool: True if the task was cancelled successfully, False otherwise.
|
|
291
|
-
"""
|
|
292
|
-
if task_id not in self.tasks:
|
|
293
|
-
logger.warning(
|
|
294
|
-
"Cannot cancel - task not found: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id}
|
|
295
|
-
)
|
|
296
|
-
return True
|
|
297
|
-
|
|
298
|
-
timeout = timeout or self.default_timeout
|
|
299
|
-
task = self.tasks[task_id]
|
|
300
|
-
|
|
301
|
-
logger.info(
|
|
302
|
-
"Initiating task cancellation: '%s', timeout: %.1fs",
|
|
303
|
-
task_id,
|
|
304
|
-
timeout,
|
|
305
|
-
extra={"mission_id": mission_id, "task_id": task_id, "timeout": timeout},
|
|
306
|
-
)
|
|
307
|
-
|
|
308
|
-
try:
|
|
309
|
-
# Phase 1: Cooperative cancellation
|
|
310
|
-
# await self.send_signal(task_id, mission_id, "cancel") # noqa: ERA001
|
|
311
|
-
|
|
312
|
-
# Wait for graceful shutdown
|
|
313
|
-
await asyncio.wait_for(task, timeout=timeout)
|
|
314
|
-
|
|
315
|
-
logger.info(
|
|
316
|
-
"Task cancelled gracefully: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id}
|
|
317
|
-
)
|
|
318
|
-
|
|
319
|
-
except asyncio.TimeoutError:
|
|
320
|
-
logger.warning(
|
|
321
|
-
"Graceful cancellation timed out for task: '%s', forcing cancellation",
|
|
322
|
-
task_id,
|
|
323
|
-
extra={"mission_id": mission_id, "task_id": task_id, "timeout": timeout},
|
|
324
|
-
)
|
|
325
|
-
|
|
326
|
-
# Phase 2: Force cancellation
|
|
327
|
-
task.cancel()
|
|
328
|
-
with contextlib.suppress(asyncio.CancelledError):
|
|
329
|
-
await task
|
|
330
|
-
|
|
331
|
-
logger.warning("Task force-cancelled: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id})
|
|
332
|
-
return True
|
|
333
|
-
|
|
334
|
-
except Exception as e:
|
|
335
|
-
logger.error(
|
|
336
|
-
"Error during task cancellation: '%s'",
|
|
337
|
-
task_id,
|
|
338
|
-
extra={"mission_id": mission_id, "task_id": task_id, "error": str(e)},
|
|
339
|
-
exc_info=True,
|
|
340
|
-
)
|
|
341
|
-
return False
|
|
342
|
-
return True
|
|
343
|
-
|
|
344
|
-
async def clean_session(self, task_id: str, mission_id: str) -> bool:
|
|
345
|
-
"""Clean up task session without cancelling the task.
|
|
346
|
-
|
|
347
|
-
Returns:
|
|
348
|
-
bool: True if the task was cleaned successfully, False otherwise.
|
|
349
|
-
"""
|
|
350
|
-
if task_id not in self.tasks_sessions:
|
|
351
|
-
logger.warning(
|
|
352
|
-
"Cannot clean session - task not found: '%s'",
|
|
353
|
-
task_id,
|
|
354
|
-
extra={"mission_id": mission_id, "task_id": task_id},
|
|
355
|
-
)
|
|
356
|
-
return False
|
|
357
|
-
|
|
358
|
-
await self.tasks_sessions[task_id].module.stop()
|
|
359
|
-
await self.cancel_task(mission_id, task_id)
|
|
360
|
-
|
|
361
|
-
logger.info("Cleaning up session for task: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id})
|
|
362
|
-
self.tasks_sessions.pop(task_id, None)
|
|
363
|
-
return True
|
|
364
|
-
|
|
365
|
-
async def pause_task(self, task_id: str, mission_id: str) -> bool:
|
|
366
|
-
"""Pause a running task.
|
|
367
|
-
|
|
368
|
-
Returns:
|
|
369
|
-
bool: True if the task was paused successfully, False otherwise.
|
|
370
|
-
"""
|
|
371
|
-
return await self.send_signal(task_id, mission_id, "pause", {})
|
|
372
|
-
|
|
373
|
-
async def resume_task(self, task_id: str, mission_id: str) -> bool:
|
|
374
|
-
"""Resume a paused task.
|
|
375
|
-
|
|
376
|
-
Returns:
|
|
377
|
-
bool: True if the task was paused successfully, False otherwise.
|
|
378
|
-
"""
|
|
379
|
-
return await self.send_signal(task_id, mission_id, "resume", {})
|
|
380
|
-
|
|
381
|
-
async def get_task_status(self, task_id: str, mission_id: str) -> bool:
|
|
382
|
-
"""Request status from a task.
|
|
383
|
-
|
|
384
|
-
Returns:
|
|
385
|
-
bool: True if the task was paused successfully, False otherwise.
|
|
386
|
-
"""
|
|
387
|
-
return await self.send_signal(task_id, mission_id, "status", {})
|
|
388
|
-
|
|
389
|
-
async def cancel_all_tasks(self, mission_id: str, timeout: float | None = None) -> dict[str, bool]:
|
|
390
|
-
"""Cancel all running tasks.
|
|
391
|
-
|
|
392
|
-
Returns:
|
|
393
|
-
dict[str: bool]: True if the tasks were paused successfully, False otherwise.
|
|
394
|
-
"""
|
|
395
|
-
timeout = timeout or self.default_timeout
|
|
396
|
-
task_ids = list(self.running_tasks)
|
|
397
|
-
|
|
398
|
-
logger.info(
|
|
399
|
-
"Cancelling all tasks: %d tasks",
|
|
400
|
-
len(task_ids),
|
|
401
|
-
extra={"mission_id": mission_id, "task_count": len(task_ids), "timeout": timeout},
|
|
402
|
-
)
|
|
403
|
-
|
|
404
|
-
results = {}
|
|
405
|
-
for task_id in task_ids:
|
|
406
|
-
results[task_id] = await self.cancel_task(task_id, mission_id, timeout)
|
|
407
|
-
|
|
408
|
-
return results
|
|
409
|
-
|
|
410
|
-
async def shutdown(self, mission_id: str, timeout: float = 30.0) -> None:
|
|
411
|
-
"""Graceful shutdown of all tasks."""
|
|
412
|
-
logger.info(
|
|
413
|
-
"TaskManager shutdown initiated, timeout: %.1fs",
|
|
414
|
-
timeout,
|
|
415
|
-
extra={"mission_id": mission_id, "timeout": timeout, "active_tasks": len(self.running_tasks)},
|
|
416
|
-
)
|
|
417
|
-
|
|
418
|
-
self._shutdown_event.set()
|
|
419
|
-
results = await self.cancel_all_tasks(mission_id, timeout)
|
|
420
|
-
|
|
421
|
-
failed_tasks = [task_id for task_id, success in results.items() if not success]
|
|
422
|
-
if failed_tasks:
|
|
423
|
-
logger.error(
|
|
424
|
-
"Failed to cancel %d tasks during shutdown: %s",
|
|
425
|
-
len(failed_tasks),
|
|
426
|
-
failed_tasks,
|
|
427
|
-
extra={"mission_id": mission_id, "failed_tasks": failed_tasks, "failed_count": len(failed_tasks)},
|
|
428
|
-
)
|
|
429
|
-
|
|
430
|
-
logger.info(
|
|
431
|
-
"TaskManager shutdown completed, cancelled: %d, failed: %d",
|
|
432
|
-
len(results) - len(failed_tasks),
|
|
433
|
-
len(failed_tasks),
|
|
434
|
-
extra={
|
|
435
|
-
"mission_id": mission_id,
|
|
436
|
-
"cancelled_count": len(results) - len(failed_tasks),
|
|
437
|
-
"failed_count": len(failed_tasks),
|
|
438
|
-
},
|
|
439
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|