digitalkin 0.3.0rc2__py3-none-any.whl → 0.3.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. digitalkin/__version__.py +1 -1
  2. digitalkin/core/common/__init__.py +9 -0
  3. digitalkin/core/common/factories.py +156 -0
  4. digitalkin/core/job_manager/base_job_manager.py +128 -28
  5. digitalkin/core/job_manager/single_job_manager.py +80 -25
  6. digitalkin/core/job_manager/taskiq_broker.py +114 -19
  7. digitalkin/core/job_manager/taskiq_job_manager.py +292 -39
  8. digitalkin/core/task_manager/base_task_manager.py +464 -0
  9. digitalkin/core/task_manager/local_task_manager.py +108 -0
  10. digitalkin/core/task_manager/remote_task_manager.py +87 -0
  11. digitalkin/core/task_manager/surrealdb_repository.py +43 -4
  12. digitalkin/core/task_manager/task_executor.py +173 -0
  13. digitalkin/core/task_manager/task_session.py +34 -12
  14. digitalkin/grpc_servers/module_server.py +2 -2
  15. digitalkin/grpc_servers/module_servicer.py +4 -3
  16. digitalkin/grpc_servers/registry_server.py +1 -1
  17. digitalkin/grpc_servers/registry_servicer.py +4 -4
  18. digitalkin/grpc_servers/utils/grpc_error_handler.py +53 -0
  19. digitalkin/models/grpc_servers/models.py +4 -4
  20. digitalkin/services/cost/grpc_cost.py +8 -41
  21. digitalkin/services/filesystem/grpc_filesystem.py +9 -38
  22. digitalkin/services/setup/default_setup.py +5 -6
  23. digitalkin/services/setup/grpc_setup.py +51 -14
  24. digitalkin/services/storage/grpc_storage.py +2 -2
  25. digitalkin/services/user_profile/__init__.py +1 -0
  26. digitalkin/services/user_profile/default_user_profile.py +55 -0
  27. digitalkin/services/user_profile/grpc_user_profile.py +69 -0
  28. digitalkin/services/user_profile/user_profile_strategy.py +40 -0
  29. {digitalkin-0.3.0rc2.dist-info → digitalkin-0.3.1.dev0.dist-info}/METADATA +7 -7
  30. {digitalkin-0.3.0rc2.dist-info → digitalkin-0.3.1.dev0.dist-info}/RECORD +33 -23
  31. digitalkin/core/task_manager/task_manager.py +0 -442
  32. {digitalkin-0.3.0rc2.dist-info → digitalkin-0.3.1.dev0.dist-info}/WHEEL +0 -0
  33. {digitalkin-0.3.0rc2.dist-info → digitalkin-0.3.1.dev0.dist-info}/licenses/LICENSE +0 -0
  34. {digitalkin-0.3.0rc2.dist-info → digitalkin-0.3.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,464 @@
1
+ """Base task manager with common lifecycle management."""
2
+
3
+ import asyncio
4
+ import contextlib
5
+ import datetime
6
+ import types
7
+ from abc import ABC, abstractmethod
8
+ from collections.abc import Coroutine
9
+ from typing import Any
10
+
11
+ from digitalkin.core.task_manager.surrealdb_repository import SurrealDBConnection
12
+ from digitalkin.core.task_manager.task_session import TaskSession
13
+ from digitalkin.logger import logger
14
+ from digitalkin.modules._base_module import BaseModule
15
+
16
+
17
+ class BaseTaskManager(ABC):
18
+ """Base task manager with common lifecycle management.
19
+
20
+ Provides shared functionality for task orchestration, monitoring, signaling, and cancellation.
21
+ Subclasses implement specific execution strategies (local or remote).
22
+
23
+ Supports async context manager protocol for automatic resource cleanup:
24
+ async with LocalTaskManager() as manager:
25
+ await manager.create_task(...)
26
+ # Resources automatically cleaned up on exit
27
+ """
28
+
29
+ tasks: dict[str, asyncio.Task]
30
+ tasks_sessions: dict[str, TaskSession]
31
+ default_timeout: float
32
+ max_concurrent_tasks: int
33
+ _shutdown_event: asyncio.Event
34
+
35
+ def __init__(
36
+ self,
37
+ default_timeout: float = 10.0,
38
+ max_concurrent_tasks: int = 100,
39
+ ) -> None:
40
+ """Initialize task manager properties.
41
+
42
+ Args:
43
+ default_timeout: Default timeout for task operations in seconds
44
+ max_concurrent_tasks: Maximum number of concurrent tasks
45
+ """
46
+ self.tasks = {}
47
+ self.tasks_sessions = {}
48
+ self.default_timeout = default_timeout
49
+ self.max_concurrent_tasks = max_concurrent_tasks
50
+ self._shutdown_event = asyncio.Event()
51
+
52
+ logger.info(
53
+ "%s initialized with max_concurrent_tasks: %d, default_timeout: %.1f",
54
+ self.__class__.__name__,
55
+ max_concurrent_tasks,
56
+ default_timeout,
57
+ extra={
58
+ "max_concurrent_tasks": max_concurrent_tasks,
59
+ "default_timeout": default_timeout,
60
+ },
61
+ )
62
+
63
+ @property
64
+ def task_count(self) -> int:
65
+ """Number of managed tasks."""
66
+ return len(self.tasks_sessions)
67
+
68
+ @property
69
+ def running_tasks(self) -> set[str]:
70
+ """Get IDs of currently running tasks."""
71
+ return {task_id for task_id, task in self.tasks.items() if not task.done()}
72
+
73
+ async def _cleanup_task(self, task_id: str, mission_id: str) -> None:
74
+ """Clean up task resources.
75
+
76
+ Delegates cleanup to TaskSession which handles:
77
+ - Clearing queue items to free memory
78
+ - Stopping module (if not already stopped)
79
+ - Closing database connection (which kills live queries)
80
+
81
+ Then removes task from tracking dictionaries.
82
+
83
+ Args:
84
+ task_id: The ID of the task to clean up
85
+ mission_id: The ID of the mission associated with the task
86
+ """
87
+ logger.debug(
88
+ "Cleaning up resources for task: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id}
89
+ )
90
+ if task_id in self.tasks_sessions:
91
+ session = self.tasks_sessions[task_id]
92
+ await session.cleanup()
93
+ self.tasks_sessions.pop(task_id, None)
94
+
95
+ self.tasks.pop(task_id, None)
96
+
97
+ async def _validate_task_creation(self, task_id: str, mission_id: str, coro: Coroutine[Any, Any, None]) -> None:
98
+ """Validate task creation preconditions.
99
+
100
+ Args:
101
+ task_id: The ID of the task to create
102
+ mission_id: The ID of the mission associated with the task
103
+ coro: The coroutine to execute
104
+
105
+ Raises:
106
+ ValueError: If task_id already exists
107
+ RuntimeError: If max concurrent tasks reached
108
+ """
109
+ if task_id in self.tasks_sessions:
110
+ coro.close()
111
+ logger.warning(
112
+ "Task creation failed - task already exists: '%s'",
113
+ task_id,
114
+ extra={"mission_id": mission_id, "task_id": task_id},
115
+ )
116
+ msg = f"Task {task_id} already exists"
117
+ raise ValueError(msg)
118
+
119
+ if len(self.tasks_sessions) >= self.max_concurrent_tasks:
120
+ coro.close()
121
+ logger.error(
122
+ "Task creation failed - max concurrent tasks reached: %d",
123
+ self.max_concurrent_tasks,
124
+ extra={
125
+ "mission_id": mission_id,
126
+ "task_id": task_id,
127
+ "current_count": len(self.tasks_sessions),
128
+ "max_concurrent": self.max_concurrent_tasks,
129
+ },
130
+ )
131
+ msg = f"Maximum concurrent tasks ({self.max_concurrent_tasks}) reached"
132
+ raise RuntimeError(msg)
133
+
134
+ async def _create_session(
135
+ self,
136
+ task_id: str,
137
+ mission_id: str,
138
+ module: BaseModule,
139
+ heartbeat_interval: datetime.timedelta,
140
+ connection_timeout: datetime.timedelta,
141
+ ) -> tuple[SurrealDBConnection, TaskSession]:
142
+ """Create SurrealDB connection and task session.
143
+
144
+ Args:
145
+ task_id: The ID of the task
146
+ mission_id: The ID of the mission
147
+ module: The module instance
148
+ heartbeat_interval: Interval between heartbeats
149
+ connection_timeout: Connection timeout for SurrealDB
150
+
151
+ Returns:
152
+ Tuple of (channel, session)
153
+ """
154
+ channel: SurrealDBConnection = SurrealDBConnection("task_manager", connection_timeout)
155
+ await channel.init_surreal_instance()
156
+ session = TaskSession(
157
+ task_id=task_id,
158
+ mission_id=mission_id,
159
+ db=channel,
160
+ module=module,
161
+ heartbeat_interval=heartbeat_interval,
162
+ )
163
+ self.tasks_sessions[task_id] = session
164
+ return channel, session
165
+
166
+ @abstractmethod
167
+ async def create_task(
168
+ self,
169
+ task_id: str,
170
+ mission_id: str,
171
+ module: BaseModule,
172
+ coro: Coroutine[Any, Any, None],
173
+ heartbeat_interval: datetime.timedelta = datetime.timedelta(seconds=2),
174
+ connection_timeout: datetime.timedelta = datetime.timedelta(seconds=5),
175
+ ) -> None:
176
+ """Create and manage a new task.
177
+
178
+ Subclasses implement specific execution strategies.
179
+
180
+ Args:
181
+ task_id: Unique identifier for the task
182
+ mission_id: Mission identifier
183
+ module: Module instance to execute
184
+ coro: Coroutine to execute
185
+ heartbeat_interval: Interval between heartbeats
186
+ connection_timeout: Connection timeout for SurrealDB
187
+
188
+ Raises:
189
+ ValueError: If task_id duplicated
190
+ RuntimeError: If task overload
191
+ """
192
+ ...
193
+
194
+ async def send_signal(self, task_id: str, mission_id: str, signal_type: str, payload: dict) -> bool:
195
+ """Send signal to a specific task.
196
+
197
+ Args:
198
+ task_id: The ID of the task
199
+ mission_id: The ID of the mission
200
+ signal_type: Type of signal to send
201
+ payload: Signal payload
202
+
203
+ Returns:
204
+ True if the signal was sent successfully, False otherwise
205
+ """
206
+ if task_id not in self.tasks_sessions:
207
+ logger.warning(
208
+ "Cannot send signal - task not found: '%s'",
209
+ task_id,
210
+ extra={"mission_id": mission_id, "task_id": task_id, "signal_type": signal_type},
211
+ )
212
+ return False
213
+
214
+ logger.info(
215
+ "Sending signal '%s' to task: '%s'",
216
+ signal_type,
217
+ task_id,
218
+ extra={"mission_id": mission_id, "task_id": task_id, "signal_type": signal_type, "payload": payload},
219
+ )
220
+
221
+ # Use the task session's db connection to send the signal
222
+ session = self.tasks_sessions[task_id]
223
+ await session.db.update("signals", task_id, {"type": signal_type, "payload": payload})
224
+ return True
225
+
226
+ async def cancel_task(self, task_id: str, mission_id: str, timeout: float | None = None) -> bool:
227
+ """Cancel a task with graceful shutdown and fallback.
228
+
229
+ Args:
230
+ task_id: The ID of the task to cancel
231
+ mission_id: The ID of the mission
232
+ timeout: Optional timeout for cancellation
233
+
234
+ Returns:
235
+ True if the task was cancelled successfully, False otherwise
236
+ """
237
+ if task_id not in self.tasks:
238
+ logger.warning(
239
+ "Cannot cancel - task not found: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id}
240
+ )
241
+ # Still cleanup any orphaned session
242
+ await self._cleanup_task(task_id, mission_id)
243
+ return True
244
+
245
+ timeout = timeout or self.default_timeout
246
+ task = self.tasks[task_id]
247
+
248
+ logger.info(
249
+ "Initiating task cancellation: '%s', timeout: %.1fs",
250
+ task_id,
251
+ timeout,
252
+ extra={"mission_id": mission_id, "task_id": task_id, "timeout": timeout},
253
+ )
254
+
255
+ try:
256
+ # Wait for graceful shutdown
257
+ await asyncio.wait_for(task, timeout=timeout)
258
+
259
+ logger.info(
260
+ "Task cancelled gracefully: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id}
261
+ )
262
+
263
+ except asyncio.TimeoutError:
264
+ logger.warning(
265
+ "Graceful cancellation timed out for task: '%s', forcing cancellation",
266
+ task_id,
267
+ extra={"mission_id": mission_id, "task_id": task_id, "timeout": timeout},
268
+ )
269
+
270
+ # Phase 2: Force cancellation
271
+ task.cancel()
272
+ with contextlib.suppress(asyncio.CancelledError):
273
+ await task
274
+
275
+ logger.warning("Task force-cancelled: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id})
276
+ await self._cleanup_task(task_id, mission_id)
277
+ return True
278
+
279
+ except Exception as e:
280
+ logger.error(
281
+ "Error during task cancellation: '%s'",
282
+ task_id,
283
+ extra={"mission_id": mission_id, "task_id": task_id, "error": str(e)},
284
+ exc_info=True,
285
+ )
286
+ await self._cleanup_task(task_id, mission_id)
287
+ return False
288
+
289
+ await self._cleanup_task(task_id, mission_id)
290
+ return True
291
+
292
+ async def clean_session(self, task_id: str, mission_id: str) -> bool:
293
+ """Clean up task session without cancelling the task.
294
+
295
+ Args:
296
+ task_id: The ID of the task
297
+ mission_id: The ID of the mission
298
+
299
+ Returns:
300
+ True if the task session was cleaned successfully, False otherwise.
301
+ """
302
+ if task_id not in self.tasks_sessions:
303
+ logger.warning(
304
+ "Cannot clean session - task not found: '%s'",
305
+ task_id,
306
+ extra={"mission_id": mission_id, "task_id": task_id},
307
+ )
308
+ return False
309
+
310
+ await self.cancel_task(mission_id=mission_id, task_id=task_id)
311
+
312
+ logger.info("Cleaning up session for task: '%s'", task_id, extra={"mission_id": mission_id, "task_id": task_id})
313
+ return True
314
+
315
+ async def pause_task(self, task_id: str, mission_id: str) -> bool:
316
+ """Pause a running task.
317
+
318
+ Args:
319
+ task_id: The ID of the task
320
+ mission_id: The ID of the mission
321
+
322
+ Returns:
323
+ True if the task was paused successfully, False otherwise
324
+ """
325
+ return await self.send_signal(task_id=task_id, mission_id=mission_id, signal_type="pause", payload={})
326
+
327
+ async def resume_task(self, task_id: str, mission_id: str) -> bool:
328
+ """Resume a paused task.
329
+
330
+ Args:
331
+ task_id: The ID of the task
332
+ mission_id: The ID of the mission
333
+
334
+ Returns:
335
+ True if the task was resumed successfully, False otherwise
336
+ """
337
+ return await self.send_signal(task_id=task_id, mission_id=mission_id, signal_type="resume", payload={})
338
+
339
+ async def get_task_status(self, task_id: str, mission_id: str) -> bool:
340
+ """Request status from a task.
341
+
342
+ Args:
343
+ task_id: The ID of the task
344
+ mission_id: The ID of the mission
345
+
346
+ Returns:
347
+ True if the status request was sent successfully, False otherwise
348
+ """
349
+ return await self.send_signal(task_id=task_id, mission_id=mission_id, signal_type="status", payload={})
350
+
351
+ async def cancel_all_tasks(self, mission_id: str, timeout: float | None = None) -> dict[str, bool | BaseException]:
352
+ """Cancel all running tasks.
353
+
354
+ Args:
355
+ mission_id: The ID of the mission
356
+ timeout: Optional timeout for cancellation
357
+
358
+ Returns:
359
+ Dictionary mapping task_id to cancellation success status
360
+ """
361
+ timeout = timeout or self.default_timeout
362
+ task_ids = list(self.running_tasks)
363
+
364
+ logger.info(
365
+ "Cancelling all tasks in parallel: %d tasks",
366
+ len(task_ids),
367
+ extra={"mission_id": mission_id, "task_count": len(task_ids), "timeout": timeout},
368
+ )
369
+
370
+ # Cancel all tasks in parallel to reduce latency
371
+ cancel_coros = [
372
+ self.cancel_task(
373
+ task_id=task_id,
374
+ mission_id=mission_id,
375
+ timeout=timeout,
376
+ )
377
+ for task_id in task_ids
378
+ ]
379
+ results_list = await asyncio.gather(*cancel_coros, return_exceptions=True)
380
+
381
+ # Build results dictionary
382
+ results: dict[str, bool | BaseException] = {}
383
+ for task_id, result in zip(task_ids, results_list):
384
+ if isinstance(result, Exception):
385
+ logger.error("Exception cancelling task %s: %s", task_id, result)
386
+ results[task_id] = False
387
+ else:
388
+ results[task_id] = result
389
+
390
+ return results
391
+
392
+ async def shutdown(self, mission_id: str, timeout: float = 30.0) -> None:
393
+ """Graceful shutdown of all tasks.
394
+
395
+ Args:
396
+ mission_id: The ID of the mission
397
+ timeout: Timeout for shutdown operations
398
+ """
399
+ logger.info(
400
+ "TaskManager shutdown initiated, timeout: %.1fs",
401
+ timeout,
402
+ extra={"mission_id": mission_id, "timeout": timeout, "active_tasks": len(self.running_tasks)},
403
+ )
404
+
405
+ self._shutdown_event.set()
406
+ results = await self.cancel_all_tasks(mission_id, timeout)
407
+
408
+ failed_tasks = [task_id for task_id, success in results.items() if not success]
409
+ if failed_tasks:
410
+ logger.error(
411
+ "Failed to cancel %d tasks during shutdown: %s",
412
+ len(failed_tasks),
413
+ failed_tasks,
414
+ extra={"mission_id": mission_id, "failed_tasks": failed_tasks, "failed_count": len(failed_tasks)},
415
+ )
416
+
417
+ # Clean up any remaining sessions (in case cancellation didn't clean them)
418
+ remaining_sessions = list(self.tasks_sessions.keys())
419
+ if remaining_sessions:
420
+ logger.info("Cleaning up %d remaining task sessions", len(remaining_sessions))
421
+ cleanup_coros = [self._cleanup_task(task_id, mission_id) for task_id in remaining_sessions]
422
+ await asyncio.gather(*cleanup_coros, return_exceptions=True)
423
+
424
+ logger.info(
425
+ "TaskManager shutdown completed, cancelled: %d, failed: %d",
426
+ len(results) - len(failed_tasks),
427
+ len(failed_tasks),
428
+ extra={
429
+ "mission_id": mission_id,
430
+ "cancelled_count": len(results) - len(failed_tasks),
431
+ "failed_count": len(failed_tasks),
432
+ },
433
+ )
434
+
435
+ async def __aenter__(self) -> "BaseTaskManager":
436
+ """Enter async context manager.
437
+
438
+ Returns:
439
+ Self for use in async with statements
440
+ """
441
+ logger.debug("Entering %s context", self.__class__.__name__)
442
+ return self
443
+
444
+ async def __aexit__(
445
+ self,
446
+ exc_type: type[BaseException] | None,
447
+ exc_val: BaseException | None,
448
+ exc_tb: types.TracebackType | None,
449
+ ) -> None:
450
+ """Exit async context manager and clean up resources.
451
+
452
+ Args:
453
+ exc_type: Exception type if an exception occurred
454
+ exc_val: Exception value if an exception occurred
455
+ exc_tb: Exception traceback if an exception occurred
456
+ """
457
+ logger.debug(
458
+ "Exiting %s context, exception: %s",
459
+ self.__class__.__name__,
460
+ exc_type,
461
+ extra={"exc_type": exc_type, "exc_val": exc_val},
462
+ )
463
+ # Shutdown with default mission_id for context manager usage
464
+ await self.shutdown(mission_id="context_manager_cleanup")
@@ -0,0 +1,108 @@
1
+ """Local task manager for single-process execution."""
2
+
3
+ import datetime
4
+ from collections.abc import Coroutine
5
+ from typing import Any
6
+
7
+ from digitalkin.core.task_manager.base_task_manager import BaseTaskManager
8
+ from digitalkin.core.task_manager.task_executor import TaskExecutor
9
+ from digitalkin.logger import logger
10
+ from digitalkin.modules._base_module import BaseModule
11
+
12
+
13
+ class LocalTaskManager(BaseTaskManager):
14
+ """Task manager for local execution in the same process.
15
+
16
+ Executes tasks locally using TaskExecutor with the supervisor pattern.
17
+ Suitable for single-server deployments and development.
18
+ """
19
+
20
+ _executor: TaskExecutor
21
+
22
+ def __init__(
23
+ self,
24
+ default_timeout: float = 10.0,
25
+ max_concurrent_tasks: int = 100,
26
+ ) -> None:
27
+ """Initialize local task manager.
28
+
29
+ Args:
30
+ default_timeout: Default timeout for task operations in seconds
31
+ max_concurrent_tasks: Maximum number of concurrent tasks
32
+ """
33
+ super().__init__(default_timeout, max_concurrent_tasks)
34
+ self._executor = TaskExecutor()
35
+
36
+ async def create_task(
37
+ self,
38
+ task_id: str,
39
+ mission_id: str,
40
+ module: BaseModule,
41
+ coro: Coroutine[Any, Any, None],
42
+ heartbeat_interval: datetime.timedelta = datetime.timedelta(seconds=2),
43
+ connection_timeout: datetime.timedelta = datetime.timedelta(seconds=5),
44
+ ) -> None:
45
+ """Create and execute a task locally using TaskExecutor.
46
+
47
+ Args:
48
+ task_id: Unique identifier for the task
49
+ mission_id: Mission identifier
50
+ module: Module instance to execute
51
+ coro: Coroutine to execute
52
+ heartbeat_interval: Interval between heartbeats
53
+ connection_timeout: Connection timeout for SurrealDB
54
+
55
+ Raises:
56
+ ValueError: If task_id duplicated
57
+ RuntimeError: If task overload
58
+ """
59
+ # Validation
60
+ await self._validate_task_creation(task_id, mission_id, coro)
61
+
62
+ logger.info(
63
+ "Creating local task: '%s'",
64
+ task_id,
65
+ extra={
66
+ "mission_id": mission_id,
67
+ "task_id": task_id,
68
+ "heartbeat_interval": heartbeat_interval,
69
+ "connection_timeout": connection_timeout,
70
+ },
71
+ )
72
+
73
+ try:
74
+ # Create session
75
+ channel, session = await self._create_session(
76
+ task_id, mission_id, module, heartbeat_interval, connection_timeout
77
+ )
78
+
79
+ # Execute task using TaskExecutor
80
+ supervisor_task = await self._executor.execute_task(
81
+ task_id,
82
+ mission_id,
83
+ coro,
84
+ session,
85
+ channel,
86
+ )
87
+ self.tasks[task_id] = supervisor_task
88
+
89
+ logger.info(
90
+ "Local task created and started: '%s'",
91
+ task_id,
92
+ extra={
93
+ "mission_id": mission_id,
94
+ "task_id": task_id,
95
+ "total_tasks": len(self.tasks),
96
+ },
97
+ )
98
+
99
+ except Exception as e:
100
+ logger.error(
101
+ "Failed to create local task: '%s'",
102
+ task_id,
103
+ extra={"mission_id": mission_id, "task_id": task_id, "error": str(e)},
104
+ exc_info=True,
105
+ )
106
+ # Cleanup on failure
107
+ await self._cleanup_task(task_id, mission_id=mission_id)
108
+ raise
@@ -0,0 +1,87 @@
1
+ """Remote task manager for distributed execution."""
2
+
3
+ import datetime
4
+ from collections.abc import Coroutine
5
+ from typing import Any
6
+
7
+ from digitalkin.core.task_manager.base_task_manager import BaseTaskManager
8
+ from digitalkin.logger import logger
9
+ from digitalkin.modules._base_module import BaseModule
10
+
11
+
12
+ class RemoteTaskManager(BaseTaskManager):
13
+ """Task manager for distributed/remote execution.
14
+
15
+ Only manages task metadata and signals - actual execution happens in remote workers.
16
+ Suitable for horizontally scaled deployments with Taskiq/Celery workers.
17
+ """
18
+
19
+ async def create_task(
20
+ self,
21
+ task_id: str,
22
+ mission_id: str,
23
+ module: BaseModule,
24
+ coro: Coroutine[Any, Any, None],
25
+ heartbeat_interval: datetime.timedelta = datetime.timedelta(seconds=2),
26
+ connection_timeout: datetime.timedelta = datetime.timedelta(seconds=5),
27
+ ) -> None:
28
+ """Register task for remote execution (metadata only).
29
+
30
+ Creates TaskSession for signal handling and monitoring, but doesn't execute the coroutine.
31
+ The coroutine will be recreated and executed by a remote worker.
32
+
33
+ Args:
34
+ task_id: Unique identifier for the task
35
+ mission_id: Mission identifier
36
+ module: Module instance for metadata (not executed here)
37
+ coro: Coroutine (will be closed - execution happens in worker)
38
+ heartbeat_interval: Interval between heartbeats
39
+ connection_timeout: Connection timeout for SurrealDB
40
+
41
+ Raises:
42
+ ValueError: If task_id duplicated
43
+ RuntimeError: If task overload
44
+ """
45
+ # Validation
46
+ await self._validate_task_creation(task_id, mission_id, coro)
47
+
48
+ logger.info(
49
+ "Registering remote task: '%s'",
50
+ task_id,
51
+ extra={
52
+ "mission_id": mission_id,
53
+ "task_id": task_id,
54
+ "heartbeat_interval": heartbeat_interval,
55
+ "connection_timeout": connection_timeout,
56
+ },
57
+ )
58
+
59
+ try:
60
+ # Create session for metadata and signal handling
61
+ _channel, _session = await self._create_session(
62
+ task_id, mission_id, module, heartbeat_interval, connection_timeout
63
+ )
64
+
65
+ # Close coroutine - worker will recreate and execute it
66
+ coro.close()
67
+
68
+ logger.info(
69
+ "Remote task registered: '%s'",
70
+ task_id,
71
+ extra={
72
+ "mission_id": mission_id,
73
+ "task_id": task_id,
74
+ "total_sessions": len(self.tasks_sessions),
75
+ },
76
+ )
77
+
78
+ except Exception as e:
79
+ logger.error(
80
+ "Failed to register remote task: '%s'",
81
+ task_id,
82
+ extra={"mission_id": mission_id, "task_id": task_id, "error": str(e)},
83
+ exc_info=True,
84
+ )
85
+ # Cleanup on failure
86
+ await self._cleanup_task(task_id, mission_id=mission_id)
87
+ raise