digitalkin 0.3.1.dev2__py3-none-any.whl → 0.3.2.dev14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- base_server/server_async_insecure.py +6 -5
- base_server/server_async_secure.py +6 -5
- base_server/server_sync_insecure.py +5 -4
- base_server/server_sync_secure.py +5 -4
- digitalkin/__version__.py +1 -1
- digitalkin/core/job_manager/base_job_manager.py +1 -1
- digitalkin/core/job_manager/single_job_manager.py +28 -9
- digitalkin/core/job_manager/taskiq_broker.py +7 -6
- digitalkin/core/job_manager/taskiq_job_manager.py +1 -1
- digitalkin/core/task_manager/surrealdb_repository.py +7 -7
- digitalkin/core/task_manager/task_session.py +60 -98
- digitalkin/grpc_servers/module_server.py +109 -168
- digitalkin/grpc_servers/module_servicer.py +38 -16
- digitalkin/grpc_servers/utils/grpc_client_wrapper.py +24 -8
- digitalkin/grpc_servers/utils/utility_schema_extender.py +100 -0
- digitalkin/models/__init__.py +1 -1
- digitalkin/models/core/job_manager_models.py +0 -8
- digitalkin/models/core/task_monitor.py +4 -0
- digitalkin/models/grpc_servers/models.py +91 -6
- digitalkin/models/module/__init__.py +18 -13
- digitalkin/models/module/base_types.py +61 -0
- digitalkin/models/module/module_context.py +173 -13
- digitalkin/models/module/module_types.py +28 -392
- digitalkin/models/module/setup_types.py +490 -0
- digitalkin/models/module/tool_cache.py +68 -0
- digitalkin/models/module/tool_reference.py +117 -0
- digitalkin/models/module/utility.py +167 -0
- digitalkin/models/services/registry.py +35 -0
- digitalkin/modules/__init__.py +5 -1
- digitalkin/modules/_base_module.py +154 -61
- digitalkin/modules/archetype_module.py +6 -1
- digitalkin/modules/tool_module.py +6 -1
- digitalkin/modules/triggers/__init__.py +8 -0
- digitalkin/modules/triggers/healthcheck_ping_trigger.py +45 -0
- digitalkin/modules/triggers/healthcheck_services_trigger.py +63 -0
- digitalkin/modules/triggers/healthcheck_status_trigger.py +52 -0
- digitalkin/services/__init__.py +4 -0
- digitalkin/services/communication/__init__.py +7 -0
- digitalkin/services/communication/communication_strategy.py +76 -0
- digitalkin/services/communication/default_communication.py +101 -0
- digitalkin/services/communication/grpc_communication.py +234 -0
- digitalkin/services/cost/grpc_cost.py +1 -1
- digitalkin/services/filesystem/grpc_filesystem.py +1 -1
- digitalkin/services/registry/__init__.py +22 -1
- digitalkin/services/registry/default_registry.py +135 -4
- digitalkin/services/registry/exceptions.py +47 -0
- digitalkin/services/registry/grpc_registry.py +306 -0
- digitalkin/services/registry/registry_models.py +15 -0
- digitalkin/services/registry/registry_strategy.py +88 -4
- digitalkin/services/services_config.py +25 -3
- digitalkin/services/services_models.py +5 -1
- digitalkin/services/setup/default_setup.py +1 -1
- digitalkin/services/setup/grpc_setup.py +1 -1
- digitalkin/services/storage/grpc_storage.py +1 -1
- digitalkin/services/user_profile/__init__.py +11 -0
- digitalkin/services/user_profile/grpc_user_profile.py +2 -2
- digitalkin/services/user_profile/user_profile_strategy.py +0 -15
- digitalkin/utils/schema_splitter.py +207 -0
- {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/METADATA +5 -5
- digitalkin-0.3.2.dev14.dist-info/RECORD +143 -0
- {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/top_level.txt +1 -0
- modules/archetype_with_tools_module.py +244 -0
- modules/cpu_intensive_module.py +1 -1
- modules/dynamic_setup_module.py +5 -29
- modules/minimal_llm_module.py +1 -1
- modules/text_transform_module.py +1 -1
- monitoring/digitalkin_observability/__init__.py +46 -0
- monitoring/digitalkin_observability/http_server.py +150 -0
- monitoring/digitalkin_observability/interceptors.py +176 -0
- monitoring/digitalkin_observability/metrics.py +201 -0
- monitoring/digitalkin_observability/prometheus.py +137 -0
- monitoring/tests/test_metrics.py +172 -0
- services/filesystem_module.py +7 -5
- services/storage_module.py +4 -2
- digitalkin/grpc_servers/registry_server.py +0 -65
- digitalkin/grpc_servers/registry_servicer.py +0 -456
- digitalkin-0.3.1.dev2.dist-info/RECORD +0 -119
- {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/WHEEL +0 -0
- {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/licenses/LICENSE +0 -0
|
@@ -84,9 +84,12 @@ class TaskSession:
|
|
|
84
84
|
self._heartbeat_interval = heartbeat_interval
|
|
85
85
|
|
|
86
86
|
logger.info(
|
|
87
|
-
"
|
|
88
|
-
|
|
89
|
-
|
|
87
|
+
"TaskSession initialized",
|
|
88
|
+
extra={
|
|
89
|
+
"task_id": task_id,
|
|
90
|
+
"mission_id": mission_id,
|
|
91
|
+
"heartbeat_interval": str(heartbeat_interval),
|
|
92
|
+
},
|
|
90
93
|
)
|
|
91
94
|
|
|
92
95
|
@property
|
|
@@ -99,6 +102,21 @@ class TaskSession:
|
|
|
99
102
|
"""Task paused status."""
|
|
100
103
|
return self._paused.is_set()
|
|
101
104
|
|
|
105
|
+
@property
|
|
106
|
+
def setup_id(self) -> str:
|
|
107
|
+
"""Get setup_id from module context."""
|
|
108
|
+
return self.module.context.session.setup_id
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def setup_version_id(self) -> str:
|
|
112
|
+
"""Get setup_version_id from module context."""
|
|
113
|
+
return self.module.context.session.setup_version_id
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def session_ids(self) -> dict[str, str]:
|
|
117
|
+
"""Get all session IDs from module context for structured logging."""
|
|
118
|
+
return self.module.context.session.current_ids()
|
|
119
|
+
|
|
102
120
|
async def send_heartbeat(self) -> bool:
|
|
103
121
|
"""Rate-limited heartbeat with connection resilience.
|
|
104
122
|
|
|
@@ -108,6 +126,8 @@ class TaskSession:
|
|
|
108
126
|
heartbeat = HeartbeatMessage(
|
|
109
127
|
task_id=self.task_id,
|
|
110
128
|
mission_id=self.mission_id,
|
|
129
|
+
setup_id=self.setup_id,
|
|
130
|
+
setup_version_id=self.setup_version_id,
|
|
111
131
|
timestamp=datetime.datetime.now(datetime.timezone.utc),
|
|
112
132
|
)
|
|
113
133
|
|
|
@@ -120,23 +140,17 @@ class TaskSession:
|
|
|
120
140
|
return True
|
|
121
141
|
except Exception as e:
|
|
122
142
|
logger.error(
|
|
123
|
-
"Heartbeat exception
|
|
124
|
-
self.
|
|
125
|
-
extra={"task_id": self.task_id, "error": str(e)},
|
|
143
|
+
"Heartbeat exception",
|
|
144
|
+
extra={**self.session_ids, "error": str(e)},
|
|
126
145
|
exc_info=True,
|
|
127
146
|
)
|
|
128
|
-
logger.error(
|
|
129
|
-
"Initial heartbeat failed for task: '%s'",
|
|
130
|
-
self.task_id,
|
|
131
|
-
extra={"task_id": self.task_id},
|
|
132
|
-
)
|
|
147
|
+
logger.error("Initial heartbeat failed", extra=self.session_ids)
|
|
133
148
|
return False
|
|
134
149
|
|
|
135
150
|
if (heartbeat.timestamp - self._last_heartbeat) < self._heartbeat_interval:
|
|
136
151
|
logger.debug(
|
|
137
|
-
"Heartbeat skipped due to rate limiting
|
|
138
|
-
self.
|
|
139
|
-
heartbeat.timestamp - self._last_heartbeat,
|
|
152
|
+
"Heartbeat skipped due to rate limiting",
|
|
153
|
+
extra={**self.session_ids, "delta": str(heartbeat.timestamp - self._last_heartbeat)},
|
|
140
154
|
)
|
|
141
155
|
return True
|
|
142
156
|
|
|
@@ -147,39 +161,24 @@ class TaskSession:
|
|
|
147
161
|
return True
|
|
148
162
|
except Exception as e:
|
|
149
163
|
logger.error(
|
|
150
|
-
"Heartbeat exception
|
|
151
|
-
self.
|
|
152
|
-
extra={"task_id": self.task_id, "error": str(e)},
|
|
164
|
+
"Heartbeat exception",
|
|
165
|
+
extra={**self.session_ids, "error": str(e)},
|
|
153
166
|
exc_info=True,
|
|
154
167
|
)
|
|
155
|
-
logger.warning(
|
|
156
|
-
"Heartbeat failed for task: '%s'",
|
|
157
|
-
self.task_id,
|
|
158
|
-
extra={"task_id": self.task_id},
|
|
159
|
-
)
|
|
168
|
+
logger.warning("Heartbeat failed", extra=self.session_ids)
|
|
160
169
|
return False
|
|
161
170
|
|
|
162
171
|
async def generate_heartbeats(self) -> None:
|
|
163
172
|
"""Periodic heartbeat generator with cancellation support."""
|
|
164
|
-
logger.debug(
|
|
165
|
-
"Heartbeat generator started for task: '%s'",
|
|
166
|
-
self.task_id,
|
|
167
|
-
extra={"task_id": self.task_id, "mission_id": self.mission_id},
|
|
168
|
-
)
|
|
173
|
+
logger.debug("Heartbeat generator started", extra=self.session_ids)
|
|
169
174
|
while not self.cancelled:
|
|
170
175
|
logger.debug(
|
|
171
|
-
"Heartbeat tick
|
|
172
|
-
self.
|
|
173
|
-
self.cancelled,
|
|
174
|
-
extra={"task_id": self.task_id, "mission_id": self.mission_id},
|
|
176
|
+
"Heartbeat tick",
|
|
177
|
+
extra={**self.session_ids, "cancelled": self.cancelled},
|
|
175
178
|
)
|
|
176
179
|
success = await self.send_heartbeat()
|
|
177
180
|
if not success:
|
|
178
|
-
logger.error(
|
|
179
|
-
"Heartbeat failed, cancelling task: '%s'",
|
|
180
|
-
self.task_id,
|
|
181
|
-
extra={"task_id": self.task_id, "mission_id": self.mission_id},
|
|
182
|
-
)
|
|
181
|
+
logger.error("Heartbeat failed, cancelling task", extra=self.session_ids)
|
|
183
182
|
await self._handle_cancel(CancellationReason.HEARTBEAT_FAILURE)
|
|
184
183
|
break
|
|
185
184
|
await asyncio.sleep(self._heartbeat_interval.total_seconds())
|
|
@@ -187,11 +186,7 @@ class TaskSession:
|
|
|
187
186
|
async def wait_if_paused(self) -> None:
|
|
188
187
|
"""Block execution if task is paused."""
|
|
189
188
|
if self._paused.is_set():
|
|
190
|
-
logger.info(
|
|
191
|
-
"Task paused, waiting for resume: '%s'",
|
|
192
|
-
self.task_id,
|
|
193
|
-
extra={"task_id": self.task_id},
|
|
194
|
-
)
|
|
189
|
+
logger.info("Task paused, waiting for resume", extra=self.session_ids)
|
|
195
190
|
await self._paused.wait()
|
|
196
191
|
|
|
197
192
|
async def listen_signals(self) -> None: # noqa: C901
|
|
@@ -200,18 +195,14 @@ class TaskSession:
|
|
|
200
195
|
Raises:
|
|
201
196
|
CancelledError: Asyncio when task cancelling
|
|
202
197
|
"""
|
|
203
|
-
logger.info(
|
|
204
|
-
"Signal listener started for task: '%s'",
|
|
205
|
-
self.task_id,
|
|
206
|
-
extra={"task_id": self.task_id},
|
|
207
|
-
)
|
|
198
|
+
logger.info("Signal listener started", extra=self.session_ids)
|
|
208
199
|
if self.signal_record_id is None:
|
|
209
200
|
self.signal_record_id = (await self.db.select_by_task_id("tasks", self.task_id)).get("id")
|
|
210
201
|
|
|
211
202
|
live_id, live_signals = await self.db.start_live("tasks")
|
|
212
203
|
try:
|
|
213
204
|
async for signal in live_signals:
|
|
214
|
-
logger.debug("Signal received
|
|
205
|
+
logger.debug("Signal received", extra={**self.session_ids, "signal": signal})
|
|
215
206
|
if self.cancelled:
|
|
216
207
|
break
|
|
217
208
|
|
|
@@ -228,26 +219,17 @@ class TaskSession:
|
|
|
228
219
|
await self._handle_status_request()
|
|
229
220
|
|
|
230
221
|
except asyncio.CancelledError:
|
|
231
|
-
logger.debug(
|
|
232
|
-
"Signal listener cancelled for task: '%s'",
|
|
233
|
-
self.task_id,
|
|
234
|
-
extra={"task_id": self.task_id},
|
|
235
|
-
)
|
|
222
|
+
logger.debug("Signal listener cancelled", extra=self.session_ids)
|
|
236
223
|
raise
|
|
237
224
|
except Exception as e:
|
|
238
225
|
logger.error(
|
|
239
|
-
"Signal listener fatal error
|
|
240
|
-
self.
|
|
241
|
-
extra={"task_id": self.task_id, "error": str(e)},
|
|
226
|
+
"Signal listener fatal error",
|
|
227
|
+
extra={**self.session_ids, "error": str(e)},
|
|
242
228
|
exc_info=True,
|
|
243
229
|
)
|
|
244
230
|
finally:
|
|
245
231
|
await self.db.stop_live(live_id)
|
|
246
|
-
logger.info(
|
|
247
|
-
"Signal listener stopped for task: '%s'",
|
|
248
|
-
self.task_id,
|
|
249
|
-
extra={"task_id": self.task_id},
|
|
250
|
-
)
|
|
232
|
+
logger.info("Signal listener stopped", extra=self.session_ids)
|
|
251
233
|
|
|
252
234
|
async def _handle_cancel(self, reason: CancellationReason = CancellationReason.UNKNOWN) -> None:
|
|
253
235
|
"""Idempotent cancellation with acknowledgment and reason tracking.
|
|
@@ -257,13 +239,9 @@ class TaskSession:
|
|
|
257
239
|
"""
|
|
258
240
|
if self.is_cancelled.is_set():
|
|
259
241
|
logger.debug(
|
|
260
|
-
"Cancel ignored -
|
|
261
|
-
self.task_id,
|
|
262
|
-
self.cancellation_reason.value,
|
|
263
|
-
reason.value,
|
|
242
|
+
"Cancel ignored - already cancelled",
|
|
264
243
|
extra={
|
|
265
|
-
|
|
266
|
-
"mission_id": self.mission_id,
|
|
244
|
+
**self.session_ids,
|
|
267
245
|
"existing_reason": self.cancellation_reason.value,
|
|
268
246
|
"new_reason": reason.value,
|
|
269
247
|
},
|
|
@@ -277,25 +255,13 @@ class TaskSession:
|
|
|
277
255
|
# Log with appropriate level based on reason
|
|
278
256
|
if reason in {CancellationReason.SUCCESS_CLEANUP, CancellationReason.FAILURE_CLEANUP}:
|
|
279
257
|
logger.debug(
|
|
280
|
-
"Task cancelled (cleanup)
|
|
281
|
-
self.
|
|
282
|
-
reason.value,
|
|
283
|
-
extra={
|
|
284
|
-
"task_id": self.task_id,
|
|
285
|
-
"mission_id": self.mission_id,
|
|
286
|
-
"cancellation_reason": reason.value,
|
|
287
|
-
},
|
|
258
|
+
"Task cancelled (cleanup)",
|
|
259
|
+
extra={**self.session_ids, "cancellation_reason": reason.value},
|
|
288
260
|
)
|
|
289
261
|
else:
|
|
290
262
|
logger.info(
|
|
291
|
-
"Task cancelled
|
|
292
|
-
self.
|
|
293
|
-
reason.value,
|
|
294
|
-
extra={
|
|
295
|
-
"task_id": self.task_id,
|
|
296
|
-
"mission_id": self.mission_id,
|
|
297
|
-
"cancellation_reason": reason.value,
|
|
298
|
-
},
|
|
263
|
+
"Task cancelled",
|
|
264
|
+
extra={**self.session_ids, "cancellation_reason": reason.value},
|
|
299
265
|
)
|
|
300
266
|
|
|
301
267
|
# Resume if paused so cancellation can proceed
|
|
@@ -308,6 +274,8 @@ class TaskSession:
|
|
|
308
274
|
SignalMessage(
|
|
309
275
|
task_id=self.task_id,
|
|
310
276
|
mission_id=self.mission_id,
|
|
277
|
+
setup_id=self.setup_id,
|
|
278
|
+
setup_version_id=self.setup_version_id,
|
|
311
279
|
action=SignalType.ACK_CANCEL,
|
|
312
280
|
status=self.status,
|
|
313
281
|
).model_dump(),
|
|
@@ -316,11 +284,7 @@ class TaskSession:
|
|
|
316
284
|
async def _handle_pause(self) -> None:
|
|
317
285
|
"""Pause task execution."""
|
|
318
286
|
if not self._paused.is_set():
|
|
319
|
-
logger.info(
|
|
320
|
-
"Pausing task: '%s'",
|
|
321
|
-
self.task_id,
|
|
322
|
-
extra={"task_id": self.task_id},
|
|
323
|
-
)
|
|
287
|
+
logger.info("Task paused", extra=self.session_ids)
|
|
324
288
|
self._paused.set()
|
|
325
289
|
|
|
326
290
|
await self.db.update(
|
|
@@ -329,6 +293,8 @@ class TaskSession:
|
|
|
329
293
|
SignalMessage(
|
|
330
294
|
task_id=self.task_id,
|
|
331
295
|
mission_id=self.mission_id,
|
|
296
|
+
setup_id=self.setup_id,
|
|
297
|
+
setup_version_id=self.setup_version_id,
|
|
332
298
|
action=SignalType.ACK_PAUSE,
|
|
333
299
|
status=self.status,
|
|
334
300
|
).model_dump(),
|
|
@@ -337,11 +303,7 @@ class TaskSession:
|
|
|
337
303
|
async def _handle_resume(self) -> None:
|
|
338
304
|
"""Resume paused task."""
|
|
339
305
|
if self._paused.is_set():
|
|
340
|
-
logger.info(
|
|
341
|
-
"Resuming task: '%s'",
|
|
342
|
-
self.task_id,
|
|
343
|
-
extra={"task_id": self.task_id},
|
|
344
|
-
)
|
|
306
|
+
logger.info("Task resumed", extra=self.session_ids)
|
|
345
307
|
self._paused.clear()
|
|
346
308
|
|
|
347
309
|
await self.db.update(
|
|
@@ -350,6 +312,8 @@ class TaskSession:
|
|
|
350
312
|
SignalMessage(
|
|
351
313
|
task_id=self.task_id,
|
|
352
314
|
mission_id=self.mission_id,
|
|
315
|
+
setup_id=self.setup_id,
|
|
316
|
+
setup_version_id=self.setup_version_id,
|
|
353
317
|
action=SignalType.ACK_RESUME,
|
|
354
318
|
status=self.status,
|
|
355
319
|
).model_dump(),
|
|
@@ -361,18 +325,16 @@ class TaskSession:
|
|
|
361
325
|
"tasks",
|
|
362
326
|
self.signal_record_id, # type: ignore
|
|
363
327
|
SignalMessage(
|
|
364
|
-
mission_id=self.mission_id,
|
|
365
328
|
task_id=self.task_id,
|
|
329
|
+
mission_id=self.mission_id,
|
|
330
|
+
setup_id=self.setup_id,
|
|
331
|
+
setup_version_id=self.setup_version_id,
|
|
366
332
|
status=self.status,
|
|
367
333
|
action=SignalType.ACK_STATUS,
|
|
368
334
|
).model_dump(),
|
|
369
335
|
)
|
|
370
336
|
|
|
371
|
-
logger.debug(
|
|
372
|
-
"Status report sent for task: '%s'",
|
|
373
|
-
self.task_id,
|
|
374
|
-
extra={"task_id": self.task_id},
|
|
375
|
-
)
|
|
337
|
+
logger.debug("Status report sent", extra=self.session_ids)
|
|
376
338
|
|
|
377
339
|
async def cleanup(self) -> None:
|
|
378
340
|
"""Clean up task session resources.
|
|
@@ -1,22 +1,14 @@
|
|
|
1
1
|
"""Module gRPC server implementation for DigitalKin."""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from pathlib import Path
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
5
4
|
|
|
6
|
-
import
|
|
7
|
-
from digitalkin_proto.agentic_mesh_protocol.module.v1 import (
|
|
5
|
+
from agentic_mesh_protocol.module.v1 import (
|
|
8
6
|
module_service_pb2,
|
|
9
7
|
module_service_pb2_grpc,
|
|
10
8
|
)
|
|
11
|
-
from digitalkin_proto.agentic_mesh_protocol.module_registry.v1 import (
|
|
12
|
-
metadata_pb2,
|
|
13
|
-
module_registry_service_pb2_grpc,
|
|
14
|
-
registration_pb2,
|
|
15
|
-
)
|
|
16
9
|
|
|
17
10
|
from digitalkin.grpc_servers._base_server import BaseServer
|
|
18
11
|
from digitalkin.grpc_servers.module_servicer import ModuleServicer
|
|
19
|
-
from digitalkin.grpc_servers.utils.exceptions import ServerError
|
|
20
12
|
from digitalkin.logger import logger
|
|
21
13
|
from digitalkin.models.grpc_servers.models import (
|
|
22
14
|
ClientConfig,
|
|
@@ -24,13 +16,17 @@ from digitalkin.models.grpc_servers.models import (
|
|
|
24
16
|
SecurityMode,
|
|
25
17
|
)
|
|
26
18
|
from digitalkin.modules._base_module import BaseModule
|
|
19
|
+
from digitalkin.services.registry import GrpcRegistry
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from digitalkin.services.registry import RegistryStrategy
|
|
27
23
|
|
|
28
24
|
|
|
29
25
|
class ModuleServer(BaseServer):
|
|
30
26
|
"""gRPC server for a DigitalKin module.
|
|
31
27
|
|
|
32
28
|
This server exposes the module's functionality through the ModuleService gRPC interface.
|
|
33
|
-
It can optionally register itself with a
|
|
29
|
+
It can optionally register itself with a Registry server.
|
|
34
30
|
|
|
35
31
|
Attributes:
|
|
36
32
|
module: The module instance being served.
|
|
@@ -57,6 +53,12 @@ class ModuleServer(BaseServer):
|
|
|
57
53
|
self.server_config = server_config
|
|
58
54
|
self.client_config = client_config
|
|
59
55
|
self.module_servicer: ModuleServicer | None = None
|
|
56
|
+
self.registry: RegistryStrategy | None = None
|
|
57
|
+
|
|
58
|
+
self._registry_client_config: ClientConfig | None = None
|
|
59
|
+
if self.server_config.registry_address:
|
|
60
|
+
self._registry_client_config = self._build_registry_client_config()
|
|
61
|
+
self._prepare_registry_config()
|
|
60
62
|
|
|
61
63
|
def _register_servicers(self) -> None:
|
|
62
64
|
"""Register the module servicer with the gRPC server.
|
|
@@ -77,17 +79,53 @@ class ModuleServer(BaseServer):
|
|
|
77
79
|
)
|
|
78
80
|
logger.debug("Registered Module servicer")
|
|
79
81
|
|
|
82
|
+
def _prepare_registry_config(self) -> None:
|
|
83
|
+
"""Prepare registry client config on module_class before server starts.
|
|
84
|
+
|
|
85
|
+
This ensures ServicesConfig created by JobManager will have registry config,
|
|
86
|
+
allowing spawned module instances to inherit the registry configuration.
|
|
87
|
+
"""
|
|
88
|
+
if not self._registry_client_config:
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
self.module_class.services_config_params["registry"] = {"client_config": self._registry_client_config}
|
|
92
|
+
|
|
93
|
+
def _build_registry_client_config(self) -> ClientConfig:
|
|
94
|
+
"""Build ClientConfig for registry from server_config.registry_address.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
ClientConfig configured for registry connection.
|
|
98
|
+
"""
|
|
99
|
+
host, port = self.server_config.registry_address.rsplit(":", 1)
|
|
100
|
+
return ClientConfig(
|
|
101
|
+
host=host,
|
|
102
|
+
port=int(port),
|
|
103
|
+
mode=self.server_config.mode,
|
|
104
|
+
security=self.client_config.security if self.client_config else SecurityMode.INSECURE,
|
|
105
|
+
credentials=self.client_config.credentials if self.client_config else None,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def _init_registry(self) -> None:
|
|
109
|
+
"""Initialize server-level registry client for registration.
|
|
110
|
+
|
|
111
|
+
Note: services_config_params["registry"] is already set in _prepare_registry_config()
|
|
112
|
+
which runs in __init__(). This method only creates the server-level client instance.
|
|
113
|
+
"""
|
|
114
|
+
if not self._registry_client_config:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
self.registry = GrpcRegistry("", "", "", self._registry_client_config)
|
|
118
|
+
|
|
80
119
|
def start(self) -> None:
|
|
81
120
|
"""Start the module server and register with the registry if configured."""
|
|
82
121
|
logger.info("Starting module server", extra={"server_config": self.server_config})
|
|
83
122
|
super().start()
|
|
84
123
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
logger.exception("Failed to register with registry")
|
|
124
|
+
try:
|
|
125
|
+
self._init_registry()
|
|
126
|
+
self._register_with_registry()
|
|
127
|
+
except Exception:
|
|
128
|
+
logger.exception("Failed to register with registry")
|
|
91
129
|
|
|
92
130
|
if self.module_servicer is not None:
|
|
93
131
|
logger.debug("Setup post init started", extra={"client_config": self.client_config})
|
|
@@ -97,12 +135,12 @@ class ModuleServer(BaseServer):
|
|
|
97
135
|
"""Start the module server and register with the registry if configured."""
|
|
98
136
|
logger.info("Starting module server", extra={"server_config": self.server_config})
|
|
99
137
|
await super().start_async()
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
self._init_registry()
|
|
141
|
+
self._register_with_registry()
|
|
142
|
+
except Exception:
|
|
143
|
+
logger.exception("Failed to register with registry")
|
|
106
144
|
|
|
107
145
|
if self.module_servicer is not None:
|
|
108
146
|
logger.info("Setup post init started", extra={"client_config": self.client_config})
|
|
@@ -110,158 +148,61 @@ class ModuleServer(BaseServer):
|
|
|
110
148
|
self.module_servicer.setup.__post_init__(self.client_config)
|
|
111
149
|
|
|
112
150
|
def stop(self, grace: float | None = None) -> None:
|
|
113
|
-
"""Stop the module server
|
|
114
|
-
# If registered with a registry, deregister
|
|
115
|
-
if self.server_config.registry_address:
|
|
116
|
-
try:
|
|
117
|
-
self._deregister_from_registry()
|
|
118
|
-
except ServerError:
|
|
119
|
-
logger.exception("Failed to deregister from registry")
|
|
151
|
+
"""Stop the module server.
|
|
120
152
|
|
|
153
|
+
Modules become inactive when they stop sending heartbeats
|
|
154
|
+
"""
|
|
121
155
|
super().stop(grace)
|
|
122
156
|
|
|
123
157
|
def _register_with_registry(self) -> None:
|
|
124
|
-
"""Register this module with the registry server.
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
channel = self._create_registry_channel()
|
|
137
|
-
|
|
138
|
-
with channel:
|
|
139
|
-
# Create a stub (client)
|
|
140
|
-
stub = module_registry_service_pb2_grpc.ModuleRegistryServiceStub(channel)
|
|
141
|
-
|
|
142
|
-
# Determine module type
|
|
143
|
-
module_type = self._determine_module_type()
|
|
144
|
-
|
|
145
|
-
metadata = metadata_pb2.Metadata(
|
|
146
|
-
name=self.module_class.metadata["name"],
|
|
147
|
-
tags=[metadata_pb2.Tag(tag=tag) for tag in self.module_class.metadata["tags"]],
|
|
148
|
-
description=self.module_class.metadata["description"],
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
self.module_class.metadata["module_id"] = f"{self.module_class.metadata['name']}:{uuid.uuid4()}"
|
|
152
|
-
# Create registration request
|
|
153
|
-
request = registration_pb2.RegisterRequest(
|
|
154
|
-
module_id=self.module_class.metadata["module_id"],
|
|
155
|
-
version=self.module_class.metadata["version"],
|
|
156
|
-
module_type=module_type,
|
|
157
|
-
address=self.server_config.address,
|
|
158
|
-
metadata=metadata,
|
|
158
|
+
"""Register this module with the registry server."""
|
|
159
|
+
if not self.registry:
|
|
160
|
+
logger.debug("No registry configured, skipping registration")
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
module_id = self.module_class.get_module_id()
|
|
164
|
+
version = self.module_class.metadata.get("version", "0.0.0")
|
|
165
|
+
|
|
166
|
+
if not module_id or module_id == "unknown":
|
|
167
|
+
logger.warning(
|
|
168
|
+
"Module has no valid module_id, skipping registration",
|
|
169
|
+
extra={"module_class": self.module_class.__name__},
|
|
159
170
|
)
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
if response.success:
|
|
172
|
-
logger.debug("Module registered successfully")
|
|
173
|
-
else:
|
|
174
|
-
logger.error("Module registration failed")
|
|
175
|
-
except grpc.RpcError:
|
|
176
|
-
logger.exception("RPC error during registration:")
|
|
177
|
-
raise ServerError
|
|
178
|
-
|
|
179
|
-
def _deregister_from_registry(self) -> None:
|
|
180
|
-
"""Deregister this module from the registry server.
|
|
181
|
-
|
|
182
|
-
Raises:
|
|
183
|
-
ServerError: If communication with the registry server fails.
|
|
184
|
-
"""
|
|
185
|
-
logger.debug(
|
|
186
|
-
"Deregistering module from registry at %s",
|
|
187
|
-
self.server_config.registry_address,
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
logger.info(
|
|
174
|
+
"Attempting to register module with registry",
|
|
175
|
+
extra={
|
|
176
|
+
"module_id": module_id,
|
|
177
|
+
"address": self.server_config.host,
|
|
178
|
+
"port": self.server_config.port,
|
|
179
|
+
"version": version,
|
|
180
|
+
"registry_address": self.server_config.registry_address,
|
|
181
|
+
},
|
|
188
182
|
)
|
|
189
183
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
184
|
+
result = self.registry.register(
|
|
185
|
+
module_id=module_id,
|
|
186
|
+
address=self.server_config.host,
|
|
187
|
+
port=self.server_config.port,
|
|
188
|
+
version=version,
|
|
189
|
+
)
|
|
196
190
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
191
|
+
if result:
|
|
192
|
+
logger.info(
|
|
193
|
+
"Module registered successfully",
|
|
194
|
+
extra={
|
|
195
|
+
"module_id": result.module_id,
|
|
196
|
+
"address": self.server_config.host,
|
|
197
|
+
"port": self.server_config.port,
|
|
198
|
+
"registry_address": self.server_config.registry_address,
|
|
199
|
+
},
|
|
200
200
|
)
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
logger.error("Module deregistration failed")
|
|
209
|
-
except grpc.RpcError:
|
|
210
|
-
logger.exception("RPC error during deregistration")
|
|
211
|
-
raise ServerError
|
|
212
|
-
|
|
213
|
-
def _create_registry_channel(self) -> grpc.Channel:
|
|
214
|
-
"""Create an appropriate channel to the registry server.
|
|
215
|
-
|
|
216
|
-
Returns:
|
|
217
|
-
A gRPC channel for communication with the registry.
|
|
218
|
-
|
|
219
|
-
Raises:
|
|
220
|
-
ValueError: If credentials are required but not provided.
|
|
221
|
-
"""
|
|
222
|
-
if (
|
|
223
|
-
self.client_config is not None
|
|
224
|
-
and self.client_config.security == SecurityMode.SECURE
|
|
225
|
-
and self.client_config.credentials
|
|
226
|
-
):
|
|
227
|
-
# Secure channel
|
|
228
|
-
# Secure channel
|
|
229
|
-
root_certificates = Path(self.client_config.credentials.root_cert_path).read_bytes()
|
|
230
|
-
|
|
231
|
-
# mTLS channel
|
|
232
|
-
private_key = None
|
|
233
|
-
certificate_chain = None
|
|
234
|
-
if (
|
|
235
|
-
self.client_config.credentials.client_cert_path is not None
|
|
236
|
-
and self.client_config.credentials.client_key_path is not None
|
|
237
|
-
):
|
|
238
|
-
private_key = Path(self.client_config.credentials.client_key_path).read_bytes()
|
|
239
|
-
certificate_chain = Path(self.client_config.credentials.client_cert_path).read_bytes()
|
|
240
|
-
|
|
241
|
-
# Create channel credentials
|
|
242
|
-
channel_credentials = grpc.ssl_channel_credentials(
|
|
243
|
-
root_certificates=root_certificates,
|
|
244
|
-
certificate_chain=certificate_chain,
|
|
245
|
-
private_key=private_key,
|
|
201
|
+
else:
|
|
202
|
+
logger.warning(
|
|
203
|
+
"Module registration returned None (module may not exist in registry)",
|
|
204
|
+
extra={
|
|
205
|
+
"module_id": module_id,
|
|
206
|
+
"registry_address": self.server_config.registry_address,
|
|
207
|
+
},
|
|
246
208
|
)
|
|
247
|
-
return grpc.secure_channel(self.server_config.registry_address, channel_credentials)
|
|
248
|
-
# Insecure channel
|
|
249
|
-
return grpc.insecure_channel(self.server_config.registry_address)
|
|
250
|
-
|
|
251
|
-
def _determine_module_type(self) -> str:
|
|
252
|
-
"""Determine the module type based on its class.
|
|
253
|
-
|
|
254
|
-
Returns:
|
|
255
|
-
A string representing the module type.
|
|
256
|
-
"""
|
|
257
|
-
module_type = "UNKNOWN"
|
|
258
|
-
class_name = self.module_class.__name__
|
|
259
|
-
|
|
260
|
-
if class_name == "ToolModule":
|
|
261
|
-
module_type = "TOOL"
|
|
262
|
-
elif class_name == "TriggerModule":
|
|
263
|
-
module_type = "TRIGGER"
|
|
264
|
-
elif class_name == "ArchetypeModule":
|
|
265
|
-
module_type = "KIN"
|
|
266
|
-
|
|
267
|
-
return module_type
|