digitalkin 0.3.1.dev1__py3-none-any.whl → 0.3.2a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. base_server/server_async_insecure.py +6 -5
  2. base_server/server_async_secure.py +6 -5
  3. base_server/server_sync_insecure.py +5 -4
  4. base_server/server_sync_secure.py +5 -4
  5. digitalkin/__version__.py +1 -1
  6. digitalkin/core/job_manager/base_job_manager.py +1 -1
  7. digitalkin/core/job_manager/single_job_manager.py +78 -36
  8. digitalkin/core/job_manager/taskiq_broker.py +8 -7
  9. digitalkin/core/job_manager/taskiq_job_manager.py +9 -5
  10. digitalkin/core/task_manager/base_task_manager.py +3 -1
  11. digitalkin/core/task_manager/surrealdb_repository.py +13 -7
  12. digitalkin/core/task_manager/task_executor.py +27 -10
  13. digitalkin/core/task_manager/task_session.py +133 -101
  14. digitalkin/grpc_servers/module_server.py +95 -171
  15. digitalkin/grpc_servers/module_servicer.py +133 -27
  16. digitalkin/grpc_servers/utils/grpc_client_wrapper.py +36 -10
  17. digitalkin/grpc_servers/utils/utility_schema_extender.py +106 -0
  18. digitalkin/models/__init__.py +1 -1
  19. digitalkin/models/core/job_manager_models.py +0 -8
  20. digitalkin/models/core/task_monitor.py +23 -1
  21. digitalkin/models/grpc_servers/models.py +95 -8
  22. digitalkin/models/module/__init__.py +26 -13
  23. digitalkin/models/module/base_types.py +61 -0
  24. digitalkin/models/module/module_context.py +279 -13
  25. digitalkin/models/module/module_types.py +29 -109
  26. digitalkin/models/module/setup_types.py +547 -0
  27. digitalkin/models/module/tool_cache.py +230 -0
  28. digitalkin/models/module/tool_reference.py +160 -0
  29. digitalkin/models/module/utility.py +167 -0
  30. digitalkin/models/services/cost.py +22 -1
  31. digitalkin/models/services/registry.py +77 -0
  32. digitalkin/modules/__init__.py +5 -1
  33. digitalkin/modules/_base_module.py +253 -90
  34. digitalkin/modules/archetype_module.py +6 -1
  35. digitalkin/modules/tool_module.py +6 -1
  36. digitalkin/modules/triggers/__init__.py +8 -0
  37. digitalkin/modules/triggers/healthcheck_ping_trigger.py +45 -0
  38. digitalkin/modules/triggers/healthcheck_services_trigger.py +63 -0
  39. digitalkin/modules/triggers/healthcheck_status_trigger.py +52 -0
  40. digitalkin/services/__init__.py +4 -0
  41. digitalkin/services/communication/__init__.py +7 -0
  42. digitalkin/services/communication/communication_strategy.py +87 -0
  43. digitalkin/services/communication/default_communication.py +104 -0
  44. digitalkin/services/communication/grpc_communication.py +264 -0
  45. digitalkin/services/cost/cost_strategy.py +36 -14
  46. digitalkin/services/cost/default_cost.py +61 -1
  47. digitalkin/services/cost/grpc_cost.py +98 -2
  48. digitalkin/services/filesystem/grpc_filesystem.py +9 -2
  49. digitalkin/services/registry/__init__.py +22 -1
  50. digitalkin/services/registry/default_registry.py +156 -4
  51. digitalkin/services/registry/exceptions.py +47 -0
  52. digitalkin/services/registry/grpc_registry.py +382 -0
  53. digitalkin/services/registry/registry_models.py +15 -0
  54. digitalkin/services/registry/registry_strategy.py +106 -4
  55. digitalkin/services/services_config.py +25 -3
  56. digitalkin/services/services_models.py +5 -1
  57. digitalkin/services/setup/default_setup.py +1 -1
  58. digitalkin/services/setup/grpc_setup.py +1 -1
  59. digitalkin/services/storage/grpc_storage.py +1 -1
  60. digitalkin/services/user_profile/__init__.py +11 -0
  61. digitalkin/services/user_profile/grpc_user_profile.py +2 -2
  62. digitalkin/services/user_profile/user_profile_strategy.py +0 -15
  63. digitalkin/utils/__init__.py +40 -0
  64. digitalkin/utils/conditional_schema.py +260 -0
  65. digitalkin/utils/dynamic_schema.py +487 -0
  66. digitalkin/utils/schema_splitter.py +290 -0
  67. {digitalkin-0.3.1.dev1.dist-info → digitalkin-0.3.2a2.dist-info}/METADATA +13 -13
  68. digitalkin-0.3.2a2.dist-info/RECORD +144 -0
  69. {digitalkin-0.3.1.dev1.dist-info → digitalkin-0.3.2a2.dist-info}/WHEEL +1 -1
  70. {digitalkin-0.3.1.dev1.dist-info → digitalkin-0.3.2a2.dist-info}/top_level.txt +1 -0
  71. modules/archetype_with_tools_module.py +232 -0
  72. modules/cpu_intensive_module.py +1 -1
  73. modules/dynamic_setup_module.py +338 -0
  74. modules/minimal_llm_module.py +1 -1
  75. modules/text_transform_module.py +1 -1
  76. monitoring/digitalkin_observability/__init__.py +46 -0
  77. monitoring/digitalkin_observability/http_server.py +150 -0
  78. monitoring/digitalkin_observability/interceptors.py +176 -0
  79. monitoring/digitalkin_observability/metrics.py +201 -0
  80. monitoring/digitalkin_observability/prometheus.py +137 -0
  81. monitoring/tests/test_metrics.py +172 -0
  82. services/filesystem_module.py +7 -5
  83. services/storage_module.py +4 -2
  84. digitalkin/grpc_servers/registry_server.py +0 -65
  85. digitalkin/grpc_servers/registry_servicer.py +0 -456
  86. digitalkin-0.3.1.dev1.dist-info/RECORD +0 -117
  87. {digitalkin-0.3.1.dev1.dist-info → digitalkin-0.3.2a2.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,7 @@
1
1
  """Task executor for running tasks with full lifecycle management."""
2
2
 
3
3
  import asyncio
4
+ import contextlib
4
5
  import datetime
5
6
  from collections.abc import Coroutine
6
7
  from typing import Any
@@ -59,6 +60,8 @@ class TaskExecutor:
59
60
  SignalMessage(
60
61
  task_id=task_id,
61
62
  mission_id=mission_id,
63
+ setup_id=session.setup_id,
64
+ setup_version_id=session.setup_version_id,
62
65
  status=session.status,
63
66
  action=SignalType.START,
64
67
  ).model_dump(),
@@ -67,15 +70,21 @@ class TaskExecutor:
67
70
  except asyncio.CancelledError:
68
71
  logger.debug("Signal listener cancelled", extra={"mission_id": mission_id, "task_id": task_id})
69
72
  finally:
70
- await channel.create(
71
- "tasks",
72
- SignalMessage(
73
- task_id=task_id,
74
- mission_id=mission_id,
75
- status=session.status,
76
- action=SignalType.STOP,
77
- ).model_dump(),
78
- )
73
+ with contextlib.suppress(Exception): # Connection may already be closed
74
+ await channel.create(
75
+ "tasks",
76
+ SignalMessage(
77
+ task_id=task_id,
78
+ mission_id=mission_id,
79
+ setup_id=session.setup_id,
80
+ setup_version_id=session.setup_version_id,
81
+ status=session.status,
82
+ action=SignalType.STOP,
83
+ cancellation_reason=session.cancellation_reason,
84
+ error_message=session._last_exception, # noqa: SLF001
85
+ exception_traceback=session._last_traceback, # noqa: SLF001
86
+ ).model_dump(),
87
+ )
79
88
  logger.info("Signal listener ended", extra={"mission_id": mission_id, "task_id": task_id})
80
89
 
81
90
  async def heartbeat_wrapper() -> None:
@@ -125,8 +134,14 @@ class TaskExecutor:
125
134
  # Heartbeat stopped - failure cleanup
126
135
  cleanup_reason = CancellationReason.FAILURE_CLEANUP
127
136
 
137
+ # Signal stream to close FIRST before any cleanup
138
+ session.close_stream()
139
+
128
140
  # Cancel pending tasks with proper reason logging
129
141
  if pending:
142
+ # Give stream time to see the signal and exit gracefully
143
+ await asyncio.sleep(0.01) # Allow one event loop cycle
144
+
130
145
  pending_names = [t.get_name() for t in pending]
131
146
  logger.debug(
132
147
  "Cancelling pending tasks: %s, reason: %s",
@@ -148,6 +163,7 @@ class TaskExecutor:
148
163
  # Determine final status based on which task completed
149
164
  if completed is main_task:
150
165
  session.status = TaskStatus.COMPLETED
166
+ session.cancellation_reason = CancellationReason.COMPLETED
151
167
  logger.info(
152
168
  "Main task completed successfully",
153
169
  extra={"mission_id": mission_id, "task_id": task_id},
@@ -193,9 +209,10 @@ class TaskExecutor:
193
209
  )
194
210
  cleanup_reason = CancellationReason.FAILURE_CLEANUP
195
211
  raise
196
- except Exception:
212
+ except Exception as e:
197
213
  session.status = TaskStatus.FAILED
198
214
  cleanup_reason = CancellationReason.FAILURE_CLEANUP
215
+ session.record_exception(e)
199
216
  logger.exception(
200
217
  "Task failed with exception: '%s'",
201
218
  task_id,
@@ -1,7 +1,9 @@
1
1
  """Task session easing task lifecycle management."""
2
2
 
3
3
  import asyncio
4
+ import contextlib
4
5
  import datetime
6
+ import traceback
5
7
  from collections.abc import AsyncGenerator
6
8
 
7
9
  from digitalkin.core.task_manager.surrealdb_repository import SurrealDBConnection
@@ -39,9 +41,17 @@ class TaskSession:
39
41
  is_cancelled: asyncio.Event
40
42
  cancellation_reason: CancellationReason
41
43
  _paused: asyncio.Event
44
+ _stream_closed: asyncio.Event
42
45
  _heartbeat_interval: datetime.timedelta
43
46
  _last_heartbeat: datetime.datetime
44
47
 
48
+ # Exception tracking for enhanced DB logging
49
+ _last_exception: str | None
50
+ _last_traceback: str | None
51
+
52
+ # Cleanup guard for idempotent cleanup
53
+ _cleanup_done: bool
54
+
45
55
  def __init__(
46
56
  self,
47
57
  task_id: str,
@@ -49,14 +59,24 @@ class TaskSession:
49
59
  db: SurrealDBConnection,
50
60
  module: BaseModule,
51
61
  heartbeat_interval: datetime.timedelta = datetime.timedelta(seconds=2),
62
+ queue_maxsize: int = 1000,
52
63
  ) -> None:
53
- """Initialize Task Session."""
64
+ """Initialize Task Session.
65
+
66
+ Args:
67
+ task_id: Unique task identifier
68
+ mission_id: Mission identifier
69
+ db: SurrealDB connection
70
+ module: Module instance
71
+ heartbeat_interval: Interval between heartbeats
72
+ queue_maxsize: Maximum size for the queue (0 = unlimited)
73
+ """
54
74
  self.db = db
55
75
  self.module = module
56
76
 
57
77
  self.status = TaskStatus.PENDING
58
78
  # Bounded queue to prevent unbounded memory growth (max 1000 items)
59
- self.queue: asyncio.Queue = asyncio.Queue(maxsize=1000)
79
+ self.queue: asyncio.Queue = asyncio.Queue(maxsize=queue_maxsize)
60
80
 
61
81
  self.task_id = task_id
62
82
  self.mission_id = mission_id
@@ -71,12 +91,23 @@ class TaskSession:
71
91
  self.is_cancelled = asyncio.Event()
72
92
  self.cancellation_reason = CancellationReason.UNKNOWN
73
93
  self._paused = asyncio.Event()
94
+ self._stream_closed = asyncio.Event()
74
95
  self._heartbeat_interval = heartbeat_interval
75
96
 
97
+ # Exception tracking
98
+ self._last_exception = None
99
+ self._last_traceback = None
100
+
101
+ # Cleanup guard
102
+ self._cleanup_done = False
103
+
76
104
  logger.info(
77
- "TaskContext initialized for task: '%s'",
78
- task_id,
79
- extra={"task_id": task_id, "mission_id": mission_id, "heartbeat_interval": heartbeat_interval},
105
+ "TaskSession initialized",
106
+ extra={
107
+ "task_id": task_id,
108
+ "mission_id": mission_id,
109
+ "heartbeat_interval": str(heartbeat_interval),
110
+ },
80
111
  )
81
112
 
82
113
  @property
@@ -89,6 +120,39 @@ class TaskSession:
89
120
  """Task paused status."""
90
121
  return self._paused.is_set()
91
122
 
123
+ @property
124
+ def stream_closed(self) -> bool:
125
+ """Check if stream termination was signaled."""
126
+ return self._stream_closed.is_set()
127
+
128
+ def close_stream(self) -> None:
129
+ """Signal that the stream should terminate."""
130
+ self._stream_closed.set()
131
+
132
+ @property
133
+ def setup_id(self) -> str:
134
+ """Get setup_id from module context."""
135
+ return self.module.context.session.setup_id
136
+
137
+ @property
138
+ def setup_version_id(self) -> str:
139
+ """Get setup_version_id from module context."""
140
+ return self.module.context.session.setup_version_id
141
+
142
+ @property
143
+ def session_ids(self) -> dict[str, str]:
144
+ """Get all session IDs from module context for structured logging."""
145
+ return self.module.context.session.current_ids()
146
+
147
+ def record_exception(self, exc: Exception) -> None:
148
+ """Record exception details for DB logging.
149
+
150
+ Args:
151
+ exc: The exception that caused the task to fail.
152
+ """
153
+ self._last_exception = str(exc)
154
+ self._last_traceback = traceback.format_exc()
155
+
92
156
  async def send_heartbeat(self) -> bool:
93
157
  """Rate-limited heartbeat with connection resilience.
94
158
 
@@ -98,6 +162,8 @@ class TaskSession:
98
162
  heartbeat = HeartbeatMessage(
99
163
  task_id=self.task_id,
100
164
  mission_id=self.mission_id,
165
+ setup_id=self.setup_id,
166
+ setup_version_id=self.setup_version_id,
101
167
  timestamp=datetime.datetime.now(datetime.timezone.utc),
102
168
  )
103
169
 
@@ -110,23 +176,17 @@ class TaskSession:
110
176
  return True
111
177
  except Exception as e:
112
178
  logger.error(
113
- "Heartbeat exception for task: '%s'",
114
- self.task_id,
115
- extra={"task_id": self.task_id, "error": str(e)},
179
+ "Heartbeat exception",
180
+ extra={**self.session_ids, "error": str(e)},
116
181
  exc_info=True,
117
182
  )
118
- logger.error(
119
- "Initial heartbeat failed for task: '%s'",
120
- self.task_id,
121
- extra={"task_id": self.task_id},
122
- )
183
+ logger.error("Initial heartbeat failed", extra=self.session_ids)
123
184
  return False
124
185
 
125
186
  if (heartbeat.timestamp - self._last_heartbeat) < self._heartbeat_interval:
126
187
  logger.debug(
127
- "Heartbeat skipped due to rate limiting for task: '%s' | delta=%s",
128
- self.task_id,
129
- heartbeat.timestamp - self._last_heartbeat,
188
+ "Heartbeat skipped due to rate limiting",
189
+ extra={**self.session_ids, "delta": str(heartbeat.timestamp - self._last_heartbeat)},
130
190
  )
131
191
  return True
132
192
 
@@ -137,39 +197,24 @@ class TaskSession:
137
197
  return True
138
198
  except Exception as e:
139
199
  logger.error(
140
- "Heartbeat exception for task: '%s'",
141
- self.task_id,
142
- extra={"task_id": self.task_id, "error": str(e)},
200
+ "Heartbeat exception",
201
+ extra={**self.session_ids, "error": str(e)},
143
202
  exc_info=True,
144
203
  )
145
- logger.warning(
146
- "Heartbeat failed for task: '%s'",
147
- self.task_id,
148
- extra={"task_id": self.task_id},
149
- )
204
+ logger.warning("Heartbeat failed", extra=self.session_ids)
150
205
  return False
151
206
 
152
207
  async def generate_heartbeats(self) -> None:
153
208
  """Periodic heartbeat generator with cancellation support."""
154
- logger.debug(
155
- "Heartbeat generator started for task: '%s'",
156
- self.task_id,
157
- extra={"task_id": self.task_id, "mission_id": self.mission_id},
158
- )
209
+ logger.debug("Heartbeat generator started", extra=self.session_ids)
159
210
  while not self.cancelled:
160
211
  logger.debug(
161
- "Heartbeat tick for task: '%s', cancelled=%s",
162
- self.task_id,
163
- self.cancelled,
164
- extra={"task_id": self.task_id, "mission_id": self.mission_id},
212
+ "Heartbeat tick",
213
+ extra={**self.session_ids, "cancelled": self.cancelled},
165
214
  )
166
215
  success = await self.send_heartbeat()
167
216
  if not success:
168
- logger.error(
169
- "Heartbeat failed, cancelling task: '%s'",
170
- self.task_id,
171
- extra={"task_id": self.task_id, "mission_id": self.mission_id},
172
- )
217
+ logger.error("Heartbeat failed, cancelling task", extra=self.session_ids)
173
218
  await self._handle_cancel(CancellationReason.HEARTBEAT_FAILURE)
174
219
  break
175
220
  await asyncio.sleep(self._heartbeat_interval.total_seconds())
@@ -177,11 +222,7 @@ class TaskSession:
177
222
  async def wait_if_paused(self) -> None:
178
223
  """Block execution if task is paused."""
179
224
  if self._paused.is_set():
180
- logger.info(
181
- "Task paused, waiting for resume: '%s'",
182
- self.task_id,
183
- extra={"task_id": self.task_id},
184
- )
225
+ logger.info("Task paused, waiting for resume", extra=self.session_ids)
185
226
  await self._paused.wait()
186
227
 
187
228
  async def listen_signals(self) -> None: # noqa: C901
@@ -190,18 +231,14 @@ class TaskSession:
190
231
  Raises:
191
232
  CancelledError: Asyncio when task cancelling
192
233
  """
193
- logger.info(
194
- "Signal listener started for task: '%s'",
195
- self.task_id,
196
- extra={"task_id": self.task_id},
197
- )
234
+ logger.info("Signal listener started", extra=self.session_ids)
198
235
  if self.signal_record_id is None:
199
236
  self.signal_record_id = (await self.db.select_by_task_id("tasks", self.task_id)).get("id")
200
237
 
201
238
  live_id, live_signals = await self.db.start_live("tasks")
202
239
  try:
203
240
  async for signal in live_signals:
204
- logger.debug("Signal received for task '%s': %s", self.task_id, signal)
241
+ logger.debug("Signal received", extra={**self.session_ids, "signal": signal})
205
242
  if self.cancelled:
206
243
  break
207
244
 
@@ -218,26 +255,18 @@ class TaskSession:
218
255
  await self._handle_status_request()
219
256
 
220
257
  except asyncio.CancelledError:
221
- logger.debug(
222
- "Signal listener cancelled for task: '%s'",
223
- self.task_id,
224
- extra={"task_id": self.task_id},
225
- )
258
+ logger.debug("Signal listener cancelled", extra=self.session_ids)
226
259
  raise
227
260
  except Exception as e:
228
261
  logger.error(
229
- "Signal listener fatal error for task: '%s'",
230
- self.task_id,
231
- extra={"task_id": self.task_id, "error": str(e)},
262
+ "Signal listener fatal error",
263
+ extra={**self.session_ids, "error": str(e)},
232
264
  exc_info=True,
233
265
  )
234
266
  finally:
235
- await self.db.stop_live(live_id)
236
- logger.info(
237
- "Signal listener stopped for task: '%s'",
238
- self.task_id,
239
- extra={"task_id": self.task_id},
240
- )
267
+ with contextlib.suppress(Exception): # Connection may already be closed
268
+ await self.db.stop_live(live_id)
269
+ logger.info("Signal listener stopped", extra=self.session_ids)
241
270
 
242
271
  async def _handle_cancel(self, reason: CancellationReason = CancellationReason.UNKNOWN) -> None:
243
272
  """Idempotent cancellation with acknowledgment and reason tracking.
@@ -247,13 +276,9 @@ class TaskSession:
247
276
  """
248
277
  if self.is_cancelled.is_set():
249
278
  logger.debug(
250
- "Cancel ignored - task already cancelled: '%s' (existing reason: %s, new reason: %s)",
251
- self.task_id,
252
- self.cancellation_reason.value,
253
- reason.value,
279
+ "Cancel ignored - already cancelled",
254
280
  extra={
255
- "task_id": self.task_id,
256
- "mission_id": self.mission_id,
281
+ **self.session_ids,
257
282
  "existing_reason": self.cancellation_reason.value,
258
283
  "new_reason": reason.value,
259
284
  },
@@ -267,25 +292,13 @@ class TaskSession:
267
292
  # Log with appropriate level based on reason
268
293
  if reason in {CancellationReason.SUCCESS_CLEANUP, CancellationReason.FAILURE_CLEANUP}:
269
294
  logger.debug(
270
- "Task cancelled (cleanup): '%s', reason: %s",
271
- self.task_id,
272
- reason.value,
273
- extra={
274
- "task_id": self.task_id,
275
- "mission_id": self.mission_id,
276
- "cancellation_reason": reason.value,
277
- },
295
+ "Task cancelled (cleanup)",
296
+ extra={**self.session_ids, "cancellation_reason": reason.value},
278
297
  )
279
298
  else:
280
299
  logger.info(
281
- "Task cancelled: '%s', reason: %s",
282
- self.task_id,
283
- reason.value,
284
- extra={
285
- "task_id": self.task_id,
286
- "mission_id": self.mission_id,
287
- "cancellation_reason": reason.value,
288
- },
300
+ "Task cancelled",
301
+ extra={**self.session_ids, "cancellation_reason": reason.value},
289
302
  )
290
303
 
291
304
  # Resume if paused so cancellation can proceed
@@ -298,19 +311,18 @@ class TaskSession:
298
311
  SignalMessage(
299
312
  task_id=self.task_id,
300
313
  mission_id=self.mission_id,
314
+ setup_id=self.setup_id,
315
+ setup_version_id=self.setup_version_id,
301
316
  action=SignalType.ACK_CANCEL,
302
317
  status=self.status,
318
+ cancellation_reason=reason,
303
319
  ).model_dump(),
304
320
  )
305
321
 
306
322
  async def _handle_pause(self) -> None:
307
323
  """Pause task execution."""
308
324
  if not self._paused.is_set():
309
- logger.info(
310
- "Pausing task: '%s'",
311
- self.task_id,
312
- extra={"task_id": self.task_id},
313
- )
325
+ logger.info("Task paused", extra=self.session_ids)
314
326
  self._paused.set()
315
327
 
316
328
  await self.db.update(
@@ -319,6 +331,8 @@ class TaskSession:
319
331
  SignalMessage(
320
332
  task_id=self.task_id,
321
333
  mission_id=self.mission_id,
334
+ setup_id=self.setup_id,
335
+ setup_version_id=self.setup_version_id,
322
336
  action=SignalType.ACK_PAUSE,
323
337
  status=self.status,
324
338
  ).model_dump(),
@@ -327,11 +341,7 @@ class TaskSession:
327
341
  async def _handle_resume(self) -> None:
328
342
  """Resume paused task."""
329
343
  if self._paused.is_set():
330
- logger.info(
331
- "Resuming task: '%s'",
332
- self.task_id,
333
- extra={"task_id": self.task_id},
334
- )
344
+ logger.info("Task resumed", extra=self.session_ids)
335
345
  self._paused.clear()
336
346
 
337
347
  await self.db.update(
@@ -340,6 +350,8 @@ class TaskSession:
340
350
  SignalMessage(
341
351
  task_id=self.task_id,
342
352
  mission_id=self.mission_id,
353
+ setup_id=self.setup_id,
354
+ setup_version_id=self.setup_version_id,
343
355
  action=SignalType.ACK_RESUME,
344
356
  status=self.status,
345
357
  ).model_dump(),
@@ -351,28 +363,38 @@ class TaskSession:
351
363
  "tasks",
352
364
  self.signal_record_id, # type: ignore
353
365
  SignalMessage(
354
- mission_id=self.mission_id,
355
366
  task_id=self.task_id,
367
+ mission_id=self.mission_id,
368
+ setup_id=self.setup_id,
369
+ setup_version_id=self.setup_version_id,
356
370
  status=self.status,
357
371
  action=SignalType.ACK_STATUS,
358
372
  ).model_dump(),
359
373
  )
360
374
 
361
- logger.debug(
362
- "Status report sent for task: '%s'",
363
- self.task_id,
364
- extra={"task_id": self.task_id},
365
- )
375
+ logger.debug("Status report sent", extra=self.session_ids)
366
376
 
367
377
  async def cleanup(self) -> None:
368
378
  """Clean up task session resources.
369
379
 
380
+ This method is idempotent - safe to call multiple times.
381
+ Second and subsequent calls are no-ops.
382
+
370
383
  This includes:
371
384
  - Clearing queue to free memory
385
+ - Cleaning up module context services
372
386
  - Stopping module
373
387
  - Closing database connection
374
388
  - Clearing module reference
375
389
  """
390
+ if self._cleanup_done:
391
+ logger.debug(
392
+ "Cleanup already done, skipping",
393
+ extra={"task_id": self.task_id, "mission_id": self.mission_id},
394
+ )
395
+ return
396
+ self._cleanup_done = True
397
+
376
398
  # Clear queue to free memory
377
399
  try:
378
400
  while not self.queue.empty():
@@ -380,6 +402,16 @@ class TaskSession:
380
402
  except asyncio.QueueEmpty:
381
403
  pass
382
404
 
405
+ # Clean up module context services (e.g., gRPC channel pool)
406
+ if self.module is not None and self.module.context is not None:
407
+ try:
408
+ await self.module.context.cleanup()
409
+ except Exception:
410
+ logger.exception(
411
+ "Error cleaning up module context",
412
+ extra={"mission_id": self.mission_id, "task_id": self.task_id},
413
+ )
414
+
383
415
  # Stop module
384
416
  try:
385
417
  await self.module.stop()