digitalkin 0.2.23__py3-none-any.whl → 0.3.1.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- digitalkin/__version__.py +1 -1
- digitalkin/core/__init__.py +1 -0
- digitalkin/core/common/__init__.py +9 -0
- digitalkin/core/common/factories.py +156 -0
- digitalkin/core/job_manager/__init__.py +1 -0
- digitalkin/{modules → core}/job_manager/base_job_manager.py +137 -31
- digitalkin/core/job_manager/single_job_manager.py +354 -0
- digitalkin/{modules → core}/job_manager/taskiq_broker.py +116 -22
- digitalkin/core/job_manager/taskiq_job_manager.py +541 -0
- digitalkin/core/task_manager/__init__.py +1 -0
- digitalkin/core/task_manager/base_task_manager.py +539 -0
- digitalkin/core/task_manager/local_task_manager.py +108 -0
- digitalkin/core/task_manager/remote_task_manager.py +87 -0
- digitalkin/core/task_manager/surrealdb_repository.py +266 -0
- digitalkin/core/task_manager/task_executor.py +249 -0
- digitalkin/core/task_manager/task_session.py +406 -0
- digitalkin/grpc_servers/__init__.py +1 -19
- digitalkin/grpc_servers/_base_server.py +3 -3
- digitalkin/grpc_servers/module_server.py +27 -43
- digitalkin/grpc_servers/module_servicer.py +51 -36
- digitalkin/grpc_servers/registry_server.py +2 -2
- digitalkin/grpc_servers/registry_servicer.py +4 -4
- digitalkin/grpc_servers/utils/__init__.py +1 -0
- digitalkin/grpc_servers/utils/exceptions.py +0 -8
- digitalkin/grpc_servers/utils/grpc_client_wrapper.py +4 -4
- digitalkin/grpc_servers/utils/grpc_error_handler.py +53 -0
- digitalkin/logger.py +73 -24
- digitalkin/mixins/__init__.py +19 -0
- digitalkin/mixins/base_mixin.py +10 -0
- digitalkin/mixins/callback_mixin.py +24 -0
- digitalkin/mixins/chat_history_mixin.py +110 -0
- digitalkin/mixins/cost_mixin.py +76 -0
- digitalkin/mixins/file_history_mixin.py +93 -0
- digitalkin/mixins/filesystem_mixin.py +46 -0
- digitalkin/mixins/logger_mixin.py +51 -0
- digitalkin/mixins/storage_mixin.py +79 -0
- digitalkin/models/core/__init__.py +1 -0
- digitalkin/{modules/job_manager → models/core}/job_manager_models.py +3 -3
- digitalkin/models/core/task_monitor.py +70 -0
- digitalkin/models/grpc_servers/__init__.py +1 -0
- digitalkin/{grpc_servers/utils → models/grpc_servers}/models.py +5 -5
- digitalkin/models/module/__init__.py +2 -0
- digitalkin/models/module/module.py +9 -1
- digitalkin/models/module/module_context.py +122 -6
- digitalkin/models/module/module_types.py +307 -19
- digitalkin/models/services/__init__.py +9 -0
- digitalkin/models/services/cost.py +1 -0
- digitalkin/models/services/storage.py +39 -5
- digitalkin/modules/_base_module.py +123 -118
- digitalkin/modules/tool_module.py +10 -2
- digitalkin/modules/trigger_handler.py +7 -6
- digitalkin/services/cost/__init__.py +9 -2
- digitalkin/services/cost/grpc_cost.py +9 -42
- digitalkin/services/filesystem/default_filesystem.py +0 -2
- digitalkin/services/filesystem/grpc_filesystem.py +10 -39
- digitalkin/services/setup/default_setup.py +5 -6
- digitalkin/services/setup/grpc_setup.py +52 -15
- digitalkin/services/storage/grpc_storage.py +4 -4
- digitalkin/services/user_profile/__init__.py +1 -0
- digitalkin/services/user_profile/default_user_profile.py +55 -0
- digitalkin/services/user_profile/grpc_user_profile.py +69 -0
- digitalkin/services/user_profile/user_profile_strategy.py +40 -0
- digitalkin/utils/__init__.py +28 -0
- digitalkin/utils/arg_parser.py +1 -1
- digitalkin/utils/development_mode_action.py +2 -2
- digitalkin/utils/dynamic_schema.py +483 -0
- digitalkin/utils/package_discover.py +1 -2
- {digitalkin-0.2.23.dist-info → digitalkin-0.3.1.dev2.dist-info}/METADATA +11 -30
- digitalkin-0.3.1.dev2.dist-info/RECORD +119 -0
- modules/dynamic_setup_module.py +362 -0
- digitalkin/grpc_servers/utils/factory.py +0 -180
- digitalkin/modules/job_manager/single_job_manager.py +0 -294
- digitalkin/modules/job_manager/taskiq_job_manager.py +0 -290
- digitalkin-0.2.23.dist-info/RECORD +0 -89
- /digitalkin/{grpc_servers/utils → models/grpc_servers}/types.py +0 -0
- {digitalkin-0.2.23.dist-info → digitalkin-0.3.1.dev2.dist-info}/WHEEL +0 -0
- {digitalkin-0.2.23.dist-info → digitalkin-0.3.1.dev2.dist-info}/licenses/LICENSE +0 -0
- {digitalkin-0.2.23.dist-info → digitalkin-0.3.1.dev2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""Task session easing task lifecycle management."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import datetime
|
|
5
|
+
from collections.abc import AsyncGenerator
|
|
6
|
+
|
|
7
|
+
from digitalkin.core.task_manager.surrealdb_repository import SurrealDBConnection
|
|
8
|
+
from digitalkin.logger import logger
|
|
9
|
+
from digitalkin.models.core.task_monitor import (
|
|
10
|
+
CancellationReason,
|
|
11
|
+
HeartbeatMessage,
|
|
12
|
+
SignalMessage,
|
|
13
|
+
SignalType,
|
|
14
|
+
TaskStatus,
|
|
15
|
+
)
|
|
16
|
+
from digitalkin.modules._base_module import BaseModule
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TaskSession:
|
|
20
|
+
"""Task Session with lifecycle management.
|
|
21
|
+
|
|
22
|
+
The Session defined the whole lifecycle of a task as an epheneral context.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
db: SurrealDBConnection
|
|
26
|
+
module: BaseModule
|
|
27
|
+
|
|
28
|
+
status: TaskStatus
|
|
29
|
+
signal_queue: AsyncGenerator | None
|
|
30
|
+
|
|
31
|
+
task_id: str
|
|
32
|
+
mission_id: str
|
|
33
|
+
signal_record_id: str | None
|
|
34
|
+
heartbeat_record_id: str | None
|
|
35
|
+
|
|
36
|
+
started_at: datetime.datetime | None
|
|
37
|
+
completed_at: datetime.datetime | None
|
|
38
|
+
|
|
39
|
+
is_cancelled: asyncio.Event
|
|
40
|
+
cancellation_reason: CancellationReason
|
|
41
|
+
_paused: asyncio.Event
|
|
42
|
+
_heartbeat_interval: datetime.timedelta
|
|
43
|
+
_last_heartbeat: datetime.datetime
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
task_id: str,
|
|
48
|
+
mission_id: str,
|
|
49
|
+
db: SurrealDBConnection,
|
|
50
|
+
module: BaseModule,
|
|
51
|
+
heartbeat_interval: datetime.timedelta = datetime.timedelta(seconds=2),
|
|
52
|
+
queue_maxsize: int = 1000,
|
|
53
|
+
) -> None:
|
|
54
|
+
"""Initialize Task Session.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
task_id: Unique task identifier
|
|
58
|
+
mission_id: Mission identifier
|
|
59
|
+
db: SurrealDB connection
|
|
60
|
+
module: Module instance
|
|
61
|
+
heartbeat_interval: Interval between heartbeats
|
|
62
|
+
queue_maxsize: Maximum size for the queue (0 = unlimited)
|
|
63
|
+
"""
|
|
64
|
+
self.db = db
|
|
65
|
+
self.module = module
|
|
66
|
+
|
|
67
|
+
self.status = TaskStatus.PENDING
|
|
68
|
+
# Bounded queue to prevent unbounded memory growth (max 1000 items)
|
|
69
|
+
self.queue: asyncio.Queue = asyncio.Queue(maxsize=queue_maxsize)
|
|
70
|
+
|
|
71
|
+
self.task_id = task_id
|
|
72
|
+
self.mission_id = mission_id
|
|
73
|
+
|
|
74
|
+
self.heartbeat = None
|
|
75
|
+
self.started_at = None
|
|
76
|
+
self.completed_at = None
|
|
77
|
+
|
|
78
|
+
self.signal_record_id = None
|
|
79
|
+
self.heartbeat_record_id = None
|
|
80
|
+
|
|
81
|
+
self.is_cancelled = asyncio.Event()
|
|
82
|
+
self.cancellation_reason = CancellationReason.UNKNOWN
|
|
83
|
+
self._paused = asyncio.Event()
|
|
84
|
+
self._heartbeat_interval = heartbeat_interval
|
|
85
|
+
|
|
86
|
+
logger.info(
|
|
87
|
+
"TaskContext initialized for task: '%s'",
|
|
88
|
+
task_id,
|
|
89
|
+
extra={"task_id": task_id, "mission_id": mission_id, "heartbeat_interval": heartbeat_interval},
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def cancelled(self) -> bool:
|
|
94
|
+
"""Task cancellation status."""
|
|
95
|
+
return self.is_cancelled.is_set()
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def paused(self) -> bool:
|
|
99
|
+
"""Task paused status."""
|
|
100
|
+
return self._paused.is_set()
|
|
101
|
+
|
|
102
|
+
async def send_heartbeat(self) -> bool:
|
|
103
|
+
"""Rate-limited heartbeat with connection resilience.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
bool: True if heartbeat was successful, False otherwise
|
|
107
|
+
"""
|
|
108
|
+
heartbeat = HeartbeatMessage(
|
|
109
|
+
task_id=self.task_id,
|
|
110
|
+
mission_id=self.mission_id,
|
|
111
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc),
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if self.heartbeat_record_id is None:
|
|
115
|
+
try:
|
|
116
|
+
success = await self.db.create("heartbeats", heartbeat.model_dump())
|
|
117
|
+
if "code" not in success:
|
|
118
|
+
self.heartbeat_record_id = success.get("id") # type: ignore
|
|
119
|
+
self._last_heartbeat = heartbeat.timestamp
|
|
120
|
+
return True
|
|
121
|
+
except Exception as e:
|
|
122
|
+
logger.error(
|
|
123
|
+
"Heartbeat exception for task: '%s'",
|
|
124
|
+
self.task_id,
|
|
125
|
+
extra={"task_id": self.task_id, "error": str(e)},
|
|
126
|
+
exc_info=True,
|
|
127
|
+
)
|
|
128
|
+
logger.error(
|
|
129
|
+
"Initial heartbeat failed for task: '%s'",
|
|
130
|
+
self.task_id,
|
|
131
|
+
extra={"task_id": self.task_id},
|
|
132
|
+
)
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
if (heartbeat.timestamp - self._last_heartbeat) < self._heartbeat_interval:
|
|
136
|
+
logger.debug(
|
|
137
|
+
"Heartbeat skipped due to rate limiting for task: '%s' | delta=%s",
|
|
138
|
+
self.task_id,
|
|
139
|
+
heartbeat.timestamp - self._last_heartbeat,
|
|
140
|
+
)
|
|
141
|
+
return True
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
success = await self.db.merge("heartbeats", self.heartbeat_record_id, heartbeat.model_dump())
|
|
145
|
+
if "code" not in success:
|
|
146
|
+
self._last_heartbeat = heartbeat.timestamp
|
|
147
|
+
return True
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.error(
|
|
150
|
+
"Heartbeat exception for task: '%s'",
|
|
151
|
+
self.task_id,
|
|
152
|
+
extra={"task_id": self.task_id, "error": str(e)},
|
|
153
|
+
exc_info=True,
|
|
154
|
+
)
|
|
155
|
+
logger.warning(
|
|
156
|
+
"Heartbeat failed for task: '%s'",
|
|
157
|
+
self.task_id,
|
|
158
|
+
extra={"task_id": self.task_id},
|
|
159
|
+
)
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
async def generate_heartbeats(self) -> None:
|
|
163
|
+
"""Periodic heartbeat generator with cancellation support."""
|
|
164
|
+
logger.debug(
|
|
165
|
+
"Heartbeat generator started for task: '%s'",
|
|
166
|
+
self.task_id,
|
|
167
|
+
extra={"task_id": self.task_id, "mission_id": self.mission_id},
|
|
168
|
+
)
|
|
169
|
+
while not self.cancelled:
|
|
170
|
+
logger.debug(
|
|
171
|
+
"Heartbeat tick for task: '%s', cancelled=%s",
|
|
172
|
+
self.task_id,
|
|
173
|
+
self.cancelled,
|
|
174
|
+
extra={"task_id": self.task_id, "mission_id": self.mission_id},
|
|
175
|
+
)
|
|
176
|
+
success = await self.send_heartbeat()
|
|
177
|
+
if not success:
|
|
178
|
+
logger.error(
|
|
179
|
+
"Heartbeat failed, cancelling task: '%s'",
|
|
180
|
+
self.task_id,
|
|
181
|
+
extra={"task_id": self.task_id, "mission_id": self.mission_id},
|
|
182
|
+
)
|
|
183
|
+
await self._handle_cancel(CancellationReason.HEARTBEAT_FAILURE)
|
|
184
|
+
break
|
|
185
|
+
await asyncio.sleep(self._heartbeat_interval.total_seconds())
|
|
186
|
+
|
|
187
|
+
async def wait_if_paused(self) -> None:
|
|
188
|
+
"""Block execution if task is paused."""
|
|
189
|
+
if self._paused.is_set():
|
|
190
|
+
logger.info(
|
|
191
|
+
"Task paused, waiting for resume: '%s'",
|
|
192
|
+
self.task_id,
|
|
193
|
+
extra={"task_id": self.task_id},
|
|
194
|
+
)
|
|
195
|
+
await self._paused.wait()
|
|
196
|
+
|
|
197
|
+
async def listen_signals(self) -> None: # noqa: C901
|
|
198
|
+
"""Enhanced signal listener with comprehensive handling.
|
|
199
|
+
|
|
200
|
+
Raises:
|
|
201
|
+
CancelledError: Asyncio when task cancelling
|
|
202
|
+
"""
|
|
203
|
+
logger.info(
|
|
204
|
+
"Signal listener started for task: '%s'",
|
|
205
|
+
self.task_id,
|
|
206
|
+
extra={"task_id": self.task_id},
|
|
207
|
+
)
|
|
208
|
+
if self.signal_record_id is None:
|
|
209
|
+
self.signal_record_id = (await self.db.select_by_task_id("tasks", self.task_id)).get("id")
|
|
210
|
+
|
|
211
|
+
live_id, live_signals = await self.db.start_live("tasks")
|
|
212
|
+
try:
|
|
213
|
+
async for signal in live_signals:
|
|
214
|
+
logger.debug("Signal received for task '%s': %s", self.task_id, signal)
|
|
215
|
+
if self.cancelled:
|
|
216
|
+
break
|
|
217
|
+
|
|
218
|
+
if signal is None or signal["id"] == self.signal_record_id or "payload" not in signal:
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
if signal["action"] == "cancel":
|
|
222
|
+
await self._handle_cancel(CancellationReason.SIGNAL)
|
|
223
|
+
elif signal["action"] == "pause":
|
|
224
|
+
await self._handle_pause()
|
|
225
|
+
elif signal["action"] == "resume":
|
|
226
|
+
await self._handle_resume()
|
|
227
|
+
elif signal["action"] == "status":
|
|
228
|
+
await self._handle_status_request()
|
|
229
|
+
|
|
230
|
+
except asyncio.CancelledError:
|
|
231
|
+
logger.debug(
|
|
232
|
+
"Signal listener cancelled for task: '%s'",
|
|
233
|
+
self.task_id,
|
|
234
|
+
extra={"task_id": self.task_id},
|
|
235
|
+
)
|
|
236
|
+
raise
|
|
237
|
+
except Exception as e:
|
|
238
|
+
logger.error(
|
|
239
|
+
"Signal listener fatal error for task: '%s'",
|
|
240
|
+
self.task_id,
|
|
241
|
+
extra={"task_id": self.task_id, "error": str(e)},
|
|
242
|
+
exc_info=True,
|
|
243
|
+
)
|
|
244
|
+
finally:
|
|
245
|
+
await self.db.stop_live(live_id)
|
|
246
|
+
logger.info(
|
|
247
|
+
"Signal listener stopped for task: '%s'",
|
|
248
|
+
self.task_id,
|
|
249
|
+
extra={"task_id": self.task_id},
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
async def _handle_cancel(self, reason: CancellationReason = CancellationReason.UNKNOWN) -> None:
|
|
253
|
+
"""Idempotent cancellation with acknowledgment and reason tracking.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
reason: The reason for cancellation (signal, heartbeat failure, cleanup, etc.)
|
|
257
|
+
"""
|
|
258
|
+
if self.is_cancelled.is_set():
|
|
259
|
+
logger.debug(
|
|
260
|
+
"Cancel ignored - task already cancelled: '%s' (existing reason: %s, new reason: %s)",
|
|
261
|
+
self.task_id,
|
|
262
|
+
self.cancellation_reason.value,
|
|
263
|
+
reason.value,
|
|
264
|
+
extra={
|
|
265
|
+
"task_id": self.task_id,
|
|
266
|
+
"mission_id": self.mission_id,
|
|
267
|
+
"existing_reason": self.cancellation_reason.value,
|
|
268
|
+
"new_reason": reason.value,
|
|
269
|
+
},
|
|
270
|
+
)
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
self.cancellation_reason = reason
|
|
274
|
+
self.status = TaskStatus.CANCELLED
|
|
275
|
+
self.is_cancelled.set()
|
|
276
|
+
|
|
277
|
+
# Log with appropriate level based on reason
|
|
278
|
+
if reason in {CancellationReason.SUCCESS_CLEANUP, CancellationReason.FAILURE_CLEANUP}:
|
|
279
|
+
logger.debug(
|
|
280
|
+
"Task cancelled (cleanup): '%s', reason: %s",
|
|
281
|
+
self.task_id,
|
|
282
|
+
reason.value,
|
|
283
|
+
extra={
|
|
284
|
+
"task_id": self.task_id,
|
|
285
|
+
"mission_id": self.mission_id,
|
|
286
|
+
"cancellation_reason": reason.value,
|
|
287
|
+
},
|
|
288
|
+
)
|
|
289
|
+
else:
|
|
290
|
+
logger.info(
|
|
291
|
+
"Task cancelled: '%s', reason: %s",
|
|
292
|
+
self.task_id,
|
|
293
|
+
reason.value,
|
|
294
|
+
extra={
|
|
295
|
+
"task_id": self.task_id,
|
|
296
|
+
"mission_id": self.mission_id,
|
|
297
|
+
"cancellation_reason": reason.value,
|
|
298
|
+
},
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Resume if paused so cancellation can proceed
|
|
302
|
+
if self._paused.is_set():
|
|
303
|
+
self._paused.set()
|
|
304
|
+
|
|
305
|
+
await self.db.update(
|
|
306
|
+
"tasks",
|
|
307
|
+
self.signal_record_id, # type: ignore
|
|
308
|
+
SignalMessage(
|
|
309
|
+
task_id=self.task_id,
|
|
310
|
+
mission_id=self.mission_id,
|
|
311
|
+
action=SignalType.ACK_CANCEL,
|
|
312
|
+
status=self.status,
|
|
313
|
+
).model_dump(),
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
async def _handle_pause(self) -> None:
|
|
317
|
+
"""Pause task execution."""
|
|
318
|
+
if not self._paused.is_set():
|
|
319
|
+
logger.info(
|
|
320
|
+
"Pausing task: '%s'",
|
|
321
|
+
self.task_id,
|
|
322
|
+
extra={"task_id": self.task_id},
|
|
323
|
+
)
|
|
324
|
+
self._paused.set()
|
|
325
|
+
|
|
326
|
+
await self.db.update(
|
|
327
|
+
"tasks",
|
|
328
|
+
self.signal_record_id, # type: ignore
|
|
329
|
+
SignalMessage(
|
|
330
|
+
task_id=self.task_id,
|
|
331
|
+
mission_id=self.mission_id,
|
|
332
|
+
action=SignalType.ACK_PAUSE,
|
|
333
|
+
status=self.status,
|
|
334
|
+
).model_dump(),
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
async def _handle_resume(self) -> None:
|
|
338
|
+
"""Resume paused task."""
|
|
339
|
+
if self._paused.is_set():
|
|
340
|
+
logger.info(
|
|
341
|
+
"Resuming task: '%s'",
|
|
342
|
+
self.task_id,
|
|
343
|
+
extra={"task_id": self.task_id},
|
|
344
|
+
)
|
|
345
|
+
self._paused.clear()
|
|
346
|
+
|
|
347
|
+
await self.db.update(
|
|
348
|
+
"tasks",
|
|
349
|
+
self.signal_record_id, # type: ignore
|
|
350
|
+
SignalMessage(
|
|
351
|
+
task_id=self.task_id,
|
|
352
|
+
mission_id=self.mission_id,
|
|
353
|
+
action=SignalType.ACK_RESUME,
|
|
354
|
+
status=self.status,
|
|
355
|
+
).model_dump(),
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
async def _handle_status_request(self) -> None:
|
|
359
|
+
"""Send current task status."""
|
|
360
|
+
await self.db.update(
|
|
361
|
+
"tasks",
|
|
362
|
+
self.signal_record_id, # type: ignore
|
|
363
|
+
SignalMessage(
|
|
364
|
+
mission_id=self.mission_id,
|
|
365
|
+
task_id=self.task_id,
|
|
366
|
+
status=self.status,
|
|
367
|
+
action=SignalType.ACK_STATUS,
|
|
368
|
+
).model_dump(),
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
logger.debug(
|
|
372
|
+
"Status report sent for task: '%s'",
|
|
373
|
+
self.task_id,
|
|
374
|
+
extra={"task_id": self.task_id},
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
async def cleanup(self) -> None:
|
|
378
|
+
"""Clean up task session resources.
|
|
379
|
+
|
|
380
|
+
This includes:
|
|
381
|
+
- Clearing queue to free memory
|
|
382
|
+
- Stopping module
|
|
383
|
+
- Closing database connection
|
|
384
|
+
- Clearing module reference
|
|
385
|
+
"""
|
|
386
|
+
# Clear queue to free memory
|
|
387
|
+
try:
|
|
388
|
+
while not self.queue.empty():
|
|
389
|
+
self.queue.get_nowait()
|
|
390
|
+
except asyncio.QueueEmpty:
|
|
391
|
+
pass
|
|
392
|
+
|
|
393
|
+
# Stop module
|
|
394
|
+
try:
|
|
395
|
+
await self.module.stop()
|
|
396
|
+
except Exception:
|
|
397
|
+
logger.exception(
|
|
398
|
+
"Error stopping module during cleanup",
|
|
399
|
+
extra={"mission_id": self.mission_id, "task_id": self.task_id},
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Close DB connection (kills all live queries)
|
|
403
|
+
await self.db.close()
|
|
404
|
+
|
|
405
|
+
# Clear module reference to allow garbage collection
|
|
406
|
+
self.module = None # type: ignore
|
|
@@ -1,19 +1 @@
|
|
|
1
|
-
"""This package contains the gRPC server and client implementations.
|
|
2
|
-
|
|
3
|
-
```shell
|
|
4
|
-
digitalkin/grpc/
|
|
5
|
-
├── __init__.py
|
|
6
|
-
├── base_server.py # Base server implementation with common functionality
|
|
7
|
-
├── module_server.py # Module-specific server implementation
|
|
8
|
-
├── registry_server.py # Registry-specific server implementation
|
|
9
|
-
├── module_servicer.py # gRPC servicer for Module service
|
|
10
|
-
├── registry_servicer.py # gRPC servicer for Registry service
|
|
11
|
-
├── client/ # Client libraries for connecting to servers
|
|
12
|
-
│ ├── __init__.py
|
|
13
|
-
│ ├── module_client.py
|
|
14
|
-
│ └── registry_client.py
|
|
15
|
-
└── utils/ # Utility functions
|
|
16
|
-
├── __init__.py
|
|
17
|
-
└── server_utils.py # Common server utilities
|
|
18
|
-
```
|
|
19
|
-
"""
|
|
1
|
+
"""This package contains the gRPC server and client implementations."""
|
|
@@ -17,9 +17,9 @@ from digitalkin.grpc_servers.utils.exceptions import (
|
|
|
17
17
|
ServerStateError,
|
|
18
18
|
ServicerError,
|
|
19
19
|
)
|
|
20
|
-
from digitalkin.grpc_servers.utils.models import SecurityMode, ServerConfig, ServerMode
|
|
21
|
-
from digitalkin.grpc_servers.utils.types import GrpcServer, ServiceDescriptor, T
|
|
22
20
|
from digitalkin.logger import logger
|
|
21
|
+
from digitalkin.models.grpc_servers.models import SecurityMode, ServerConfig, ServerMode
|
|
22
|
+
from digitalkin.models.grpc_servers.types import GrpcServer, ServiceDescriptor, T
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class BaseServer(abc.ABC):
|
|
@@ -299,7 +299,7 @@ class BaseServer(abc.ABC):
|
|
|
299
299
|
self._add_reflection()
|
|
300
300
|
|
|
301
301
|
# Start the server
|
|
302
|
-
logger.debug("Starting gRPC server on %s", self.config.address)
|
|
302
|
+
logger.debug("Starting gRPC server on %s", self.config.address, extra={"config": self.config})
|
|
303
303
|
try:
|
|
304
304
|
if self.config.mode == ServerMode.ASYNC:
|
|
305
305
|
# For async server, use the event loop
|
|
@@ -1,31 +1,30 @@
|
|
|
1
1
|
"""Module gRPC server implementation for DigitalKin."""
|
|
2
2
|
|
|
3
|
-
from pathlib import Path
|
|
4
3
|
import uuid
|
|
4
|
+
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
import grpc
|
|
7
|
+
from digitalkin_proto.agentic_mesh_protocol.module.v1 import (
|
|
8
|
+
module_service_pb2,
|
|
9
|
+
module_service_pb2_grpc,
|
|
10
|
+
)
|
|
11
|
+
from digitalkin_proto.agentic_mesh_protocol.module_registry.v1 import (
|
|
12
|
+
metadata_pb2,
|
|
13
|
+
module_registry_service_pb2_grpc,
|
|
14
|
+
registration_pb2,
|
|
15
|
+
)
|
|
7
16
|
|
|
8
17
|
from digitalkin.grpc_servers._base_server import BaseServer
|
|
9
18
|
from digitalkin.grpc_servers.module_servicer import ModuleServicer
|
|
10
19
|
from digitalkin.grpc_servers.utils.exceptions import ServerError
|
|
11
|
-
from digitalkin.
|
|
20
|
+
from digitalkin.logger import logger
|
|
21
|
+
from digitalkin.models.grpc_servers.models import (
|
|
12
22
|
ClientConfig,
|
|
13
23
|
ModuleServerConfig,
|
|
14
24
|
SecurityMode,
|
|
15
25
|
)
|
|
16
26
|
from digitalkin.modules._base_module import BaseModule
|
|
17
27
|
|
|
18
|
-
from digitalkin_proto.digitalkin.module.v2 import (
|
|
19
|
-
module_service_pb2,
|
|
20
|
-
module_service_pb2_grpc,
|
|
21
|
-
)
|
|
22
|
-
from digitalkin_proto.digitalkin.module_registry.v2 import (
|
|
23
|
-
metadata_pb2,
|
|
24
|
-
module_registry_service_pb2_grpc,
|
|
25
|
-
registration_pb2,
|
|
26
|
-
)
|
|
27
|
-
from digitalkin.logger import logger
|
|
28
|
-
|
|
29
28
|
|
|
30
29
|
class ModuleServer(BaseServer):
|
|
31
30
|
"""gRPC server for a DigitalKin module.
|
|
@@ -50,7 +49,8 @@ class ModuleServer(BaseServer):
|
|
|
50
49
|
|
|
51
50
|
Args:
|
|
52
51
|
module_class: The module instance to be served.
|
|
53
|
-
|
|
52
|
+
server_config: Server configuration including registry address if auto-registration is desired.
|
|
53
|
+
client_config: Client configuration used by services.
|
|
54
54
|
"""
|
|
55
55
|
super().__init__(server_config)
|
|
56
56
|
self.module_class = module_class
|
|
@@ -79,10 +79,9 @@ class ModuleServer(BaseServer):
|
|
|
79
79
|
|
|
80
80
|
def start(self) -> None:
|
|
81
81
|
"""Start the module server and register with the registry if configured."""
|
|
82
|
-
logger.info("Starting module server",extra={"server_config": self.server_config})
|
|
82
|
+
logger.info("Starting module server", extra={"server_config": self.server_config})
|
|
83
83
|
super().start()
|
|
84
84
|
|
|
85
|
-
logger.debug("Starting module server",extra={"server_config": self.server_config})
|
|
86
85
|
# If a registry address is provided, register the module
|
|
87
86
|
if self.server_config.registry_address:
|
|
88
87
|
try:
|
|
@@ -91,14 +90,12 @@ class ModuleServer(BaseServer):
|
|
|
91
90
|
logger.exception("Failed to register with registry")
|
|
92
91
|
|
|
93
92
|
if self.module_servicer is not None:
|
|
94
|
-
logger.debug(
|
|
95
|
-
"Setup post init started",extra={"client_config": self.client_config}
|
|
96
|
-
)
|
|
93
|
+
logger.debug("Setup post init started", extra={"client_config": self.client_config})
|
|
97
94
|
self.module_servicer.setup.__post_init__(self.client_config)
|
|
98
95
|
|
|
99
96
|
async def start_async(self) -> None:
|
|
100
97
|
"""Start the module server and register with the registry if configured."""
|
|
101
|
-
logger.info("Starting module server",extra={"server_config": self.server_config})
|
|
98
|
+
logger.info("Starting module server", extra={"server_config": self.server_config})
|
|
102
99
|
await super().start_async()
|
|
103
100
|
# If a registry address is provided, register the module
|
|
104
101
|
if self.server_config.registry_address:
|
|
@@ -108,10 +105,8 @@ class ModuleServer(BaseServer):
|
|
|
108
105
|
logger.exception("Failed to register with registry")
|
|
109
106
|
|
|
110
107
|
if self.module_servicer is not None:
|
|
111
|
-
logger.info(
|
|
112
|
-
|
|
113
|
-
)
|
|
114
|
-
await self.module_servicer.job_manager._start()
|
|
108
|
+
logger.info("Setup post init started", extra={"client_config": self.client_config})
|
|
109
|
+
await self.module_servicer.job_manager.start()
|
|
115
110
|
self.module_servicer.setup.__post_init__(self.client_config)
|
|
116
111
|
|
|
117
112
|
def stop(self, grace: float | None = None) -> None:
|
|
@@ -134,6 +129,7 @@ class ModuleServer(BaseServer):
|
|
|
134
129
|
logger.debug(
|
|
135
130
|
"Registering module with registry at %s",
|
|
136
131
|
self.server_config.registry_address,
|
|
132
|
+
extra={"server_config": self.server_config},
|
|
137
133
|
)
|
|
138
134
|
|
|
139
135
|
# Create appropriate channel based on security mode
|
|
@@ -148,16 +144,11 @@ class ModuleServer(BaseServer):
|
|
|
148
144
|
|
|
149
145
|
metadata = metadata_pb2.Metadata(
|
|
150
146
|
name=self.module_class.metadata["name"],
|
|
151
|
-
tags=[
|
|
152
|
-
metadata_pb2.Tag(tag=tag)
|
|
153
|
-
for tag in self.module_class.metadata["tags"]
|
|
154
|
-
],
|
|
147
|
+
tags=[metadata_pb2.Tag(tag=tag) for tag in self.module_class.metadata["tags"]],
|
|
155
148
|
description=self.module_class.metadata["description"],
|
|
156
149
|
)
|
|
157
150
|
|
|
158
|
-
self.module_class.metadata["module_id"] = (
|
|
159
|
-
f"{self.module_class.metadata['name']}:{uuid.uuid4()}"
|
|
160
|
-
)
|
|
151
|
+
self.module_class.metadata["module_id"] = f"{self.module_class.metadata['name']}:{uuid.uuid4()}"
|
|
161
152
|
# Create registration request
|
|
162
153
|
request = registration_pb2.RegisterRequest(
|
|
163
154
|
module_id=self.module_class.metadata["module_id"],
|
|
@@ -173,6 +164,7 @@ class ModuleServer(BaseServer):
|
|
|
173
164
|
"Request sent to registry for module: %s:%s",
|
|
174
165
|
self.module_class.metadata["name"],
|
|
175
166
|
self.module_class.metadata["module_id"],
|
|
167
|
+
extra={"module_info": self.module_class.metadata},
|
|
176
168
|
)
|
|
177
169
|
response = stub.RegisterModule(request)
|
|
178
170
|
|
|
@@ -234,9 +226,7 @@ class ModuleServer(BaseServer):
|
|
|
234
226
|
):
|
|
235
227
|
# Secure channel
|
|
236
228
|
# Secure channel
|
|
237
|
-
root_certificates = Path(
|
|
238
|
-
self.client_config.credentials.root_cert_path
|
|
239
|
-
).read_bytes()
|
|
229
|
+
root_certificates = Path(self.client_config.credentials.root_cert_path).read_bytes()
|
|
240
230
|
|
|
241
231
|
# mTLS channel
|
|
242
232
|
private_key = None
|
|
@@ -245,12 +235,8 @@ class ModuleServer(BaseServer):
|
|
|
245
235
|
self.client_config.credentials.client_cert_path is not None
|
|
246
236
|
and self.client_config.credentials.client_key_path is not None
|
|
247
237
|
):
|
|
248
|
-
private_key = Path(
|
|
249
|
-
|
|
250
|
-
).read_bytes()
|
|
251
|
-
certificate_chain = Path(
|
|
252
|
-
self.client_config.credentials.client_cert_path
|
|
253
|
-
).read_bytes()
|
|
238
|
+
private_key = Path(self.client_config.credentials.client_key_path).read_bytes()
|
|
239
|
+
certificate_chain = Path(self.client_config.credentials.client_cert_path).read_bytes()
|
|
254
240
|
|
|
255
241
|
# Create channel credentials
|
|
256
242
|
channel_credentials = grpc.ssl_channel_credentials(
|
|
@@ -258,9 +244,7 @@ class ModuleServer(BaseServer):
|
|
|
258
244
|
certificate_chain=certificate_chain,
|
|
259
245
|
private_key=private_key,
|
|
260
246
|
)
|
|
261
|
-
return grpc.secure_channel(
|
|
262
|
-
self.server_config.registry_address, channel_credentials
|
|
263
|
-
)
|
|
247
|
+
return grpc.secure_channel(self.server_config.registry_address, channel_credentials)
|
|
264
248
|
# Insecure channel
|
|
265
249
|
return grpc.insecure_channel(self.server_config.registry_address)
|
|
266
250
|
|