digitalkin 0.3.1.dev2__py3-none-any.whl → 0.3.2.dev14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- base_server/server_async_insecure.py +6 -5
- base_server/server_async_secure.py +6 -5
- base_server/server_sync_insecure.py +5 -4
- base_server/server_sync_secure.py +5 -4
- digitalkin/__version__.py +1 -1
- digitalkin/core/job_manager/base_job_manager.py +1 -1
- digitalkin/core/job_manager/single_job_manager.py +28 -9
- digitalkin/core/job_manager/taskiq_broker.py +7 -6
- digitalkin/core/job_manager/taskiq_job_manager.py +1 -1
- digitalkin/core/task_manager/surrealdb_repository.py +7 -7
- digitalkin/core/task_manager/task_session.py +60 -98
- digitalkin/grpc_servers/module_server.py +109 -168
- digitalkin/grpc_servers/module_servicer.py +38 -16
- digitalkin/grpc_servers/utils/grpc_client_wrapper.py +24 -8
- digitalkin/grpc_servers/utils/utility_schema_extender.py +100 -0
- digitalkin/models/__init__.py +1 -1
- digitalkin/models/core/job_manager_models.py +0 -8
- digitalkin/models/core/task_monitor.py +4 -0
- digitalkin/models/grpc_servers/models.py +91 -6
- digitalkin/models/module/__init__.py +18 -13
- digitalkin/models/module/base_types.py +61 -0
- digitalkin/models/module/module_context.py +173 -13
- digitalkin/models/module/module_types.py +28 -392
- digitalkin/models/module/setup_types.py +490 -0
- digitalkin/models/module/tool_cache.py +68 -0
- digitalkin/models/module/tool_reference.py +117 -0
- digitalkin/models/module/utility.py +167 -0
- digitalkin/models/services/registry.py +35 -0
- digitalkin/modules/__init__.py +5 -1
- digitalkin/modules/_base_module.py +154 -61
- digitalkin/modules/archetype_module.py +6 -1
- digitalkin/modules/tool_module.py +6 -1
- digitalkin/modules/triggers/__init__.py +8 -0
- digitalkin/modules/triggers/healthcheck_ping_trigger.py +45 -0
- digitalkin/modules/triggers/healthcheck_services_trigger.py +63 -0
- digitalkin/modules/triggers/healthcheck_status_trigger.py +52 -0
- digitalkin/services/__init__.py +4 -0
- digitalkin/services/communication/__init__.py +7 -0
- digitalkin/services/communication/communication_strategy.py +76 -0
- digitalkin/services/communication/default_communication.py +101 -0
- digitalkin/services/communication/grpc_communication.py +234 -0
- digitalkin/services/cost/grpc_cost.py +1 -1
- digitalkin/services/filesystem/grpc_filesystem.py +1 -1
- digitalkin/services/registry/__init__.py +22 -1
- digitalkin/services/registry/default_registry.py +135 -4
- digitalkin/services/registry/exceptions.py +47 -0
- digitalkin/services/registry/grpc_registry.py +306 -0
- digitalkin/services/registry/registry_models.py +15 -0
- digitalkin/services/registry/registry_strategy.py +88 -4
- digitalkin/services/services_config.py +25 -3
- digitalkin/services/services_models.py +5 -1
- digitalkin/services/setup/default_setup.py +1 -1
- digitalkin/services/setup/grpc_setup.py +1 -1
- digitalkin/services/storage/grpc_storage.py +1 -1
- digitalkin/services/user_profile/__init__.py +11 -0
- digitalkin/services/user_profile/grpc_user_profile.py +2 -2
- digitalkin/services/user_profile/user_profile_strategy.py +0 -15
- digitalkin/utils/schema_splitter.py +207 -0
- {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/METADATA +5 -5
- digitalkin-0.3.2.dev14.dist-info/RECORD +143 -0
- {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/top_level.txt +1 -0
- modules/archetype_with_tools_module.py +244 -0
- modules/cpu_intensive_module.py +1 -1
- modules/dynamic_setup_module.py +5 -29
- modules/minimal_llm_module.py +1 -1
- modules/text_transform_module.py +1 -1
- monitoring/digitalkin_observability/__init__.py +46 -0
- monitoring/digitalkin_observability/http_server.py +150 -0
- monitoring/digitalkin_observability/interceptors.py +176 -0
- monitoring/digitalkin_observability/metrics.py +201 -0
- monitoring/digitalkin_observability/prometheus.py +137 -0
- monitoring/tests/test_metrics.py +172 -0
- services/filesystem_module.py +7 -5
- services/storage_module.py +4 -2
- digitalkin/grpc_servers/registry_server.py +0 -65
- digitalkin/grpc_servers/registry_servicer.py +0 -456
- digitalkin-0.3.1.dev2.dist-info/RECORD +0 -119
- {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/WHEEL +0 -0
- {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,7 +5,7 @@ from collections.abc import AsyncGenerator
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
import grpc
|
|
8
|
-
from
|
|
8
|
+
from agentic_mesh_protocol.module.v1 import (
|
|
9
9
|
information_pb2,
|
|
10
10
|
lifecycle_pb2,
|
|
11
11
|
module_service_pb2_grpc,
|
|
@@ -19,6 +19,7 @@ from digitalkin.logger import logger
|
|
|
19
19
|
from digitalkin.models.core.job_manager_models import JobManagerMode
|
|
20
20
|
from digitalkin.models.module.module import ModuleStatus
|
|
21
21
|
from digitalkin.modules._base_module import BaseModule
|
|
22
|
+
from digitalkin.services.registry import GrpcRegistry, RegistryStrategy
|
|
22
23
|
from digitalkin.services.services_models import ServicesMode
|
|
23
24
|
from digitalkin.services.setup.default_setup import DefaultSetup
|
|
24
25
|
from digitalkin.services.setup.grpc_setup import GrpcSetup
|
|
@@ -40,6 +41,7 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
|
|
|
40
41
|
args: Namespace
|
|
41
42
|
setup: SetupStrategy
|
|
42
43
|
job_manager: BaseJobManager
|
|
44
|
+
_registry_cache: RegistryStrategy | None = None
|
|
43
45
|
|
|
44
46
|
def _add_parser_args(self, parser: ArgumentParser) -> None:
|
|
45
47
|
super()._add_parser_args(parser)
|
|
@@ -82,6 +84,26 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
|
|
|
82
84
|
)
|
|
83
85
|
self.setup = GrpcSetup() if self.args.services_mode == ServicesMode.REMOTE else DefaultSetup()
|
|
84
86
|
|
|
87
|
+
def _get_registry(self) -> RegistryStrategy | None:
|
|
88
|
+
"""Get a cached registry instance if configured.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Cached GrpcRegistry instance if registry config exists, None otherwise.
|
|
92
|
+
"""
|
|
93
|
+
if self._registry_cache is not None:
|
|
94
|
+
return self._registry_cache
|
|
95
|
+
|
|
96
|
+
registry_config = self.module_class.services_config_params.get("registry")
|
|
97
|
+
if not registry_config:
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
client_config = registry_config.get("client_config")
|
|
101
|
+
if not client_config:
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
self._registry_cache = GrpcRegistry("", "", "", client_config)
|
|
105
|
+
return self._registry_cache
|
|
106
|
+
|
|
85
107
|
async def ConfigSetupModule( # noqa: N802
|
|
86
108
|
self,
|
|
87
109
|
request: lifecycle_pb2.ConfigSetupModuleRequest,
|
|
@@ -108,8 +130,6 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
|
|
|
108
130
|
"mission_id": request.mission_id,
|
|
109
131
|
},
|
|
110
132
|
)
|
|
111
|
-
# Process the module input
|
|
112
|
-
# TODO: Secret should be used here as well
|
|
113
133
|
setup_version = request.setup_version
|
|
114
134
|
config_setup_data = self.module_class.create_config_setup_model(json_format.MessageToDict(request.content))
|
|
115
135
|
setup_version_data = await self.module_class.create_setup_model(
|
|
@@ -139,8 +159,8 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
|
|
|
139
159
|
return lifecycle_pb2.ConfigSetupModuleResponse(success=False)
|
|
140
160
|
|
|
141
161
|
updated_setup_data = await self.job_manager.generate_config_setup_module_response(job_id)
|
|
142
|
-
logger.info("Setup updated")
|
|
143
|
-
logger.debug(
|
|
162
|
+
logger.info("Setup updated", extra={"job_id": job_id})
|
|
163
|
+
logger.debug("Updated setup data", extra={"job_id": job_id, "setup_data": updated_setup_data})
|
|
144
164
|
setup_version.content = json_format.ParseDict(
|
|
145
165
|
updated_setup_data,
|
|
146
166
|
struct_pb2.Struct(),
|
|
@@ -219,16 +239,16 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
|
|
|
219
239
|
yield lifecycle_pb2.StartModuleResponse(success=False, job_id=job_id)
|
|
220
240
|
break
|
|
221
241
|
|
|
222
|
-
|
|
242
|
+
logger.info("Yielding message from job %s: %s", job_id, message)
|
|
243
|
+
proto = json_format.ParseDict(message, struct_pb2.Struct(), ignore_unknown_fields=True)
|
|
244
|
+
yield lifecycle_pb2.StartModuleResponse(success=True, output=proto, job_id=job_id)
|
|
245
|
+
|
|
246
|
+
if message.get("root", {}).get("protocol") == "end_of_stream":
|
|
223
247
|
logger.info(
|
|
224
|
-
"End of stream
|
|
248
|
+
"End of stream signal received",
|
|
225
249
|
extra={"job_id": job_id, "mission_id": request.mission_id},
|
|
226
250
|
)
|
|
227
251
|
break
|
|
228
|
-
|
|
229
|
-
logger.info("Yielding message from job %s: %s", job_id, message)
|
|
230
|
-
proto = json_format.ParseDict(message, struct_pb2.Struct(), ignore_unknown_fields=True)
|
|
231
|
-
yield lifecycle_pb2.StartModuleResponse(success=True, output=proto, job_id=job_id)
|
|
232
252
|
finally:
|
|
233
253
|
await self.job_manager.wait_for_completion(job_id)
|
|
234
254
|
await self.job_manager.clean_session(job_id, mission_id=request.mission_id)
|
|
@@ -249,17 +269,19 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
|
|
|
249
269
|
Returns:
|
|
250
270
|
A response indicating success or failure.
|
|
251
271
|
"""
|
|
252
|
-
logger.debug(
|
|
272
|
+
logger.debug(
|
|
273
|
+
"StopModule called",
|
|
274
|
+
extra={"module_class": self.module_class.__name__, "job_id": request.job_id},
|
|
275
|
+
)
|
|
253
276
|
|
|
254
277
|
response: bool = await self.job_manager.stop_module(request.job_id)
|
|
255
278
|
if not response:
|
|
256
|
-
|
|
257
|
-
logger.warning(message)
|
|
279
|
+
logger.warning("Job not found for stop request", extra={"job_id": request.job_id})
|
|
258
280
|
context.set_code(grpc.StatusCode.NOT_FOUND)
|
|
259
|
-
context.set_details(
|
|
281
|
+
context.set_details(f"Job {request.job_id} not found")
|
|
260
282
|
return lifecycle_pb2.StopModuleResponse(success=False)
|
|
261
283
|
|
|
262
|
-
logger.debug("Job
|
|
284
|
+
logger.debug("Job stopped successfully", extra={"job_id": request.job_id})
|
|
263
285
|
return lifecycle_pb2.StopModuleResponse(success=True)
|
|
264
286
|
|
|
265
287
|
async def GetModuleStatus( # noqa: N802
|
|
@@ -43,9 +43,9 @@ class GrpcClientWrapper:
|
|
|
43
43
|
private_key=private_key,
|
|
44
44
|
)
|
|
45
45
|
|
|
46
|
-
return grpc.secure_channel(config.address, channel_credentials, options=config.
|
|
46
|
+
return grpc.secure_channel(config.address, channel_credentials, options=config.grpc_options)
|
|
47
47
|
# Insecure channel
|
|
48
|
-
return grpc.insecure_channel(config.address, options=config.
|
|
48
|
+
return grpc.insecure_channel(config.address, options=config.grpc_options)
|
|
49
49
|
|
|
50
50
|
def exec_grpc_query(self, query_endpoint: str, request: Any) -> Any: # noqa: ANN401
|
|
51
51
|
"""Execute a gRPC query with from the query's rpc endpoint name.
|
|
@@ -58,15 +58,31 @@ class GrpcClientWrapper:
|
|
|
58
58
|
corresponding gRPC reponse.
|
|
59
59
|
|
|
60
60
|
Raises:
|
|
61
|
-
ServerError: gRPC error catching
|
|
61
|
+
ServerError: gRPC error catching with status code and details
|
|
62
62
|
"""
|
|
63
|
+
service_name = getattr(self, "service_name", "unknown")
|
|
63
64
|
try:
|
|
64
|
-
|
|
65
|
-
|
|
65
|
+
logger.debug(
|
|
66
|
+
"Sending gRPC request to %s",
|
|
67
|
+
query_endpoint,
|
|
68
|
+
extra={"request": str(request), "service": service_name},
|
|
69
|
+
)
|
|
66
70
|
response = getattr(self.stub, query_endpoint)(request)
|
|
67
|
-
logger.debug(
|
|
71
|
+
logger.debug(
|
|
72
|
+
"Received gRPC response from %s",
|
|
73
|
+
query_endpoint,
|
|
74
|
+
extra={"response": str(response), "service": service_name},
|
|
75
|
+
)
|
|
68
76
|
except grpc.RpcError as e:
|
|
69
|
-
|
|
70
|
-
|
|
77
|
+
status_code = e.code().name if hasattr(e, "code") else "UNKNOWN"
|
|
78
|
+
details = e.details() if hasattr(e, "details") else str(e)
|
|
79
|
+
msg = f"[{status_code}] {details}"
|
|
80
|
+
logger.error(
|
|
81
|
+
"gRPC %s failed: %s",
|
|
82
|
+
query_endpoint,
|
|
83
|
+
msg,
|
|
84
|
+
extra={"service": service_name},
|
|
85
|
+
)
|
|
86
|
+
raise ServerError(msg) from e
|
|
71
87
|
else:
|
|
72
88
|
return response
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Utility schema extender for gRPC API responses.
|
|
2
|
+
|
|
3
|
+
This module extends module schemas with SDK utility protocols for API responses.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import types
|
|
7
|
+
from typing import Annotated, Union, get_args, get_origin
|
|
8
|
+
|
|
9
|
+
from pydantic import Field, create_model
|
|
10
|
+
|
|
11
|
+
from digitalkin.models.module.module_types import DataModel
|
|
12
|
+
from digitalkin.models.module.utility import (
|
|
13
|
+
EndOfStreamOutput,
|
|
14
|
+
HealthcheckPingInput,
|
|
15
|
+
HealthcheckPingOutput,
|
|
16
|
+
HealthcheckServicesInput,
|
|
17
|
+
HealthcheckServicesOutput,
|
|
18
|
+
HealthcheckStatusInput,
|
|
19
|
+
HealthcheckStatusOutput,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class UtilitySchemaExtender:
|
|
24
|
+
"""Extends module schemas with SDK utility protocols for API responses.
|
|
25
|
+
|
|
26
|
+
This class provides methods to create extended Pydantic models that include
|
|
27
|
+
both user-defined protocols and SDK utility protocols in their schemas.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
_output_protocols = (
|
|
31
|
+
EndOfStreamOutput,
|
|
32
|
+
HealthcheckPingOutput,
|
|
33
|
+
HealthcheckServicesOutput,
|
|
34
|
+
HealthcheckStatusOutput,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
_input_protocols = (
|
|
38
|
+
HealthcheckPingInput,
|
|
39
|
+
HealthcheckServicesInput,
|
|
40
|
+
HealthcheckStatusInput,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _extract_union_types(cls, annotation: type) -> tuple:
|
|
45
|
+
"""Extract individual types from a Union or Annotated[Union, ...] annotation.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
A tuple of individual types contained in the Union.
|
|
49
|
+
"""
|
|
50
|
+
if get_origin(annotation) is Annotated:
|
|
51
|
+
inner_args = get_args(annotation)
|
|
52
|
+
if inner_args:
|
|
53
|
+
return cls._extract_union_types(inner_args[0])
|
|
54
|
+
if get_origin(annotation) is Union or isinstance(annotation, types.UnionType):
|
|
55
|
+
return get_args(annotation)
|
|
56
|
+
return (annotation,)
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def create_extended_output_model(cls, base_model: type[DataModel]) -> type[DataModel]:
|
|
60
|
+
"""Create an extended output model that includes utility output protocols.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
base_model: The module's output_format class (a DataModel subclass).
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
A new DataModel subclass with root typed as Union[original_types, utility_types].
|
|
67
|
+
"""
|
|
68
|
+
original_annotation = base_model.model_fields["root"].annotation
|
|
69
|
+
original_types = cls._extract_union_types(original_annotation)
|
|
70
|
+
extended_types = (*original_types, *cls._output_protocols)
|
|
71
|
+
union_type = Union[extended_types] # type: ignore[valid-type] # noqa: UP007
|
|
72
|
+
extended_root = Annotated[union_type, Field(discriminator="protocol")] # type: ignore[valid-type]
|
|
73
|
+
return create_model(
|
|
74
|
+
f"{base_model.__name__}Utilities",
|
|
75
|
+
__base__=DataModel,
|
|
76
|
+
root=(extended_root, ...),
|
|
77
|
+
annotations=(dict[str, str], Field(default={})),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def create_extended_input_model(cls, base_model: type[DataModel]) -> type[DataModel]:
|
|
82
|
+
"""Create an extended input model that includes utility input protocols.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
base_model: The module's input_format class (a DataModel subclass).
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
A new DataModel subclass with root typed as Union[original_types, utility_types].
|
|
89
|
+
"""
|
|
90
|
+
original_annotation = base_model.model_fields["root"].annotation
|
|
91
|
+
original_types = cls._extract_union_types(original_annotation)
|
|
92
|
+
extended_types = (*original_types, *cls._input_protocols)
|
|
93
|
+
union_type = Union[extended_types] # type: ignore[valid-type] # noqa: UP007
|
|
94
|
+
extended_root = Annotated[union_type, Field(discriminator="protocol")] # type: ignore[valid-type]
|
|
95
|
+
return create_model(
|
|
96
|
+
f"{base_model.__name__}Utilities",
|
|
97
|
+
__base__=DataModel,
|
|
98
|
+
root=(extended_root, ...),
|
|
99
|
+
annotations=(dict[str, str], Field(default={})),
|
|
100
|
+
)
|
digitalkin/models/__init__.py
CHANGED
|
@@ -2,17 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel
|
|
6
|
-
|
|
7
5
|
from digitalkin.core.job_manager.base_job_manager import BaseJobManager
|
|
8
6
|
|
|
9
7
|
|
|
10
|
-
class StreamCodeModel(BaseModel):
|
|
11
|
-
"""Typed error/code model."""
|
|
12
|
-
|
|
13
|
-
code: str
|
|
14
|
-
|
|
15
|
-
|
|
16
8
|
class JobManagerMode(Enum):
|
|
17
9
|
"""Job manager mode."""
|
|
18
10
|
|
|
@@ -55,6 +55,8 @@ class SignalMessage(BaseModel):
|
|
|
55
55
|
|
|
56
56
|
task_id: str = Field(..., description="Unique identifier for the task")
|
|
57
57
|
mission_id: str = Field(..., description="Identifier for the mission")
|
|
58
|
+
setup_id: str = Field(default="", description="Identifier for the setup")
|
|
59
|
+
setup_version_id: str = Field(default="", description="Identifier for the setup version")
|
|
58
60
|
status: TaskStatus = Field(..., description="Current status of the task")
|
|
59
61
|
action: SignalType = Field(..., description="Type of signal action")
|
|
60
62
|
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
@@ -67,4 +69,6 @@ class HeartbeatMessage(BaseModel):
|
|
|
67
69
|
|
|
68
70
|
task_id: str = Field(..., description="Unique identifier for the task")
|
|
69
71
|
mission_id: str = Field(..., description="Identifier for the mission")
|
|
72
|
+
setup_id: str = Field(default="", description="Identifier for the setup")
|
|
73
|
+
setup_version_id: str = Field(default="", description="Identifier for the setup version")
|
|
70
74
|
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
@@ -65,6 +65,42 @@ class ServerCredentials(BaseModel):
|
|
|
65
65
|
return v
|
|
66
66
|
|
|
67
67
|
|
|
68
|
+
class RetryPolicy(BaseModel):
|
|
69
|
+
"""gRPC retry policy configuration for resilient connections.
|
|
70
|
+
|
|
71
|
+
Attributes:
|
|
72
|
+
max_attempts: Maximum retry attempts including the original call
|
|
73
|
+
initial_backoff: Initial backoff duration (e.g., "0.1s")
|
|
74
|
+
max_backoff: Maximum backoff duration (e.g., "10s")
|
|
75
|
+
backoff_multiplier: Multiplier for exponential backoff
|
|
76
|
+
retryable_status_codes: gRPC status codes that trigger retry
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
max_attempts: int = Field(default=5, ge=1, le=10, description="Maximum retry attempts including the original call")
|
|
80
|
+
initial_backoff: str = Field(default="0.1s", description="Initial backoff duration (e.g., '0.1s')")
|
|
81
|
+
max_backoff: str = Field(default="10s", description="Maximum backoff duration (e.g., '10s')")
|
|
82
|
+
backoff_multiplier: float = Field(default=2.0, ge=1.0, description="Multiplier for exponential backoff")
|
|
83
|
+
retryable_status_codes: list[str] = Field(
|
|
84
|
+
default_factory=lambda: ["UNAVAILABLE", "RESOURCE_EXHAUSTED"],
|
|
85
|
+
description="gRPC status codes that trigger retry",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
model_config = {"extra": "forbid", "frozen": True}
|
|
89
|
+
|
|
90
|
+
def to_service_config_json(self) -> str:
|
|
91
|
+
"""Serialize to gRPC service config JSON string.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
JSON string for grpc.service_config channel option.
|
|
95
|
+
"""
|
|
96
|
+
codes = "[" + ",".join(f'"{c}"' for c in self.retryable_status_codes) + "]"
|
|
97
|
+
return (
|
|
98
|
+
f'{{"methodConfig":[{{"name":[{{}}],"retryPolicy":{{"maxAttempts":{self.max_attempts},'
|
|
99
|
+
f'"initialBackoff":"{self.initial_backoff}","maxBackoff":"{self.max_backoff}",'
|
|
100
|
+
f'"backoffMultiplier":{self.backoff_multiplier},"retryableStatusCodes":{codes}}}}}]}}'
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
68
104
|
class ClientCredentials(BaseModel):
|
|
69
105
|
"""Model for client credentials in secure mode.
|
|
70
106
|
|
|
@@ -170,15 +206,47 @@ class ClientConfig(ChannelConfig):
|
|
|
170
206
|
security: Security mode (secure/insecure)
|
|
171
207
|
credentials: Client credentials for secure mode
|
|
172
208
|
channel_options: Additional channel options
|
|
209
|
+
retry_policy: Retry policy for failed RPCs
|
|
173
210
|
"""
|
|
174
211
|
|
|
175
212
|
credentials: ClientCredentials | None = Field(None, description="Client credentials for secure mode")
|
|
213
|
+
retry_policy: RetryPolicy = Field(default_factory=lambda: RetryPolicy(), description="Retry policy for failed RPCs") # noqa: PLW0108
|
|
176
214
|
channel_options: list[tuple[str, Any]] = Field(
|
|
177
215
|
default_factory=lambda: [
|
|
178
|
-
("grpc.max_receive_message_length", 100 * 1024 * 1024),
|
|
179
|
-
("grpc.max_send_message_length", 100 * 1024 * 1024),
|
|
216
|
+
("grpc.max_receive_message_length", 100 * 1024 * 1024),
|
|
217
|
+
("grpc.max_send_message_length", 100 * 1024 * 1024),
|
|
218
|
+
# === DNS Re-resolution (Critical for Container Environments) ===
|
|
219
|
+
# Minimum milliseconds between DNS re-resolution attempts (500 ms)
|
|
220
|
+
# When connection fails, gRPC will re-query DNS after this interval
|
|
221
|
+
# Solves: Container restarts with new IPs causing "No route to host"
|
|
222
|
+
("grpc.dns_min_time_between_resolutions_ms", 500),
|
|
223
|
+
# Initial delay before first reconnection attempt (1 second)
|
|
224
|
+
("grpc.initial_reconnect_backoff_ms", 1000),
|
|
225
|
+
# Maximum delay between reconnection attempts (10 seconds)
|
|
226
|
+
# Prevents overwhelming the network during extended outages
|
|
227
|
+
("grpc.max_reconnect_backoff_ms", 10000),
|
|
228
|
+
# Minimum delay between reconnection attempts (500ms)
|
|
229
|
+
# Ensures rapid recovery for brief network glitches
|
|
230
|
+
("grpc.min_reconnect_backoff_ms", 500),
|
|
231
|
+
# === Keepalive Settings (Detect Dead Connections) ===
|
|
232
|
+
# Send keepalive ping every 60 seconds when connection is idle
|
|
233
|
+
# Proactively detects dead connections before RPC calls fail
|
|
234
|
+
("grpc.keepalive_time_ms", 60000),
|
|
235
|
+
# Wait 20 seconds for keepalive response before declaring connection dead
|
|
236
|
+
# Triggers reconnection (with DNS re-resolution) if pong not received
|
|
237
|
+
("grpc.keepalive_timeout_ms", 20000),
|
|
238
|
+
# Send keepalive pings even when no RPCs are in flight
|
|
239
|
+
# Essential for long-lived connections that may sit idle
|
|
240
|
+
("grpc.keepalive_permit_without_calls", True),
|
|
241
|
+
# Minimum interval between HTTP/2 pings (30 seconds)
|
|
242
|
+
# Must be >= server's grpc.http2.min_ping_interval_without_data_ms (10s)
|
|
243
|
+
("grpc.http2.min_time_between_pings_ms", 30000),
|
|
244
|
+
# === Retry Configuration ===
|
|
245
|
+
# Enable automatic retry for failed RPCs (1 = enabled)
|
|
246
|
+
# Works with retryable status codes: UNAVAILABLE, RESOURCE_EXHAUSTED
|
|
247
|
+
("grpc.enable_retries", 1),
|
|
180
248
|
],
|
|
181
|
-
description="
|
|
249
|
+
description="Resilient gRPC channel options with DNS re-resolution, keepalive, and retries",
|
|
182
250
|
)
|
|
183
251
|
|
|
184
252
|
@field_validator("credentials")
|
|
@@ -204,6 +272,15 @@ class ClientConfig(ChannelConfig):
|
|
|
204
272
|
raise ConfigurationError(msg)
|
|
205
273
|
return v
|
|
206
274
|
|
|
275
|
+
@property
|
|
276
|
+
def grpc_options(self) -> list[tuple[str, Any]]:
|
|
277
|
+
"""Get channel options with retry policy service config.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Full list of gRPC channel options.
|
|
281
|
+
"""
|
|
282
|
+
return [*self.channel_options, ("grpc.service_config", self.retry_policy.to_service_config_json())]
|
|
283
|
+
|
|
207
284
|
|
|
208
285
|
class ServerConfig(ChannelConfig):
|
|
209
286
|
"""Base configuration for gRPC servers.
|
|
@@ -223,10 +300,18 @@ class ServerConfig(ChannelConfig):
|
|
|
223
300
|
credentials: ServerCredentials | None = Field(None, description="Server credentials for secure mode")
|
|
224
301
|
server_options: list[tuple[str, Any]] = Field(
|
|
225
302
|
default_factory=lambda: [
|
|
226
|
-
("grpc.max_receive_message_length", 100 * 1024 * 1024),
|
|
227
|
-
("grpc.max_send_message_length", 100 * 1024 * 1024),
|
|
303
|
+
("grpc.max_receive_message_length", 100 * 1024 * 1024),
|
|
304
|
+
("grpc.max_send_message_length", 100 * 1024 * 1024),
|
|
305
|
+
# === Keepalive Permission (Required for Client Keepalive) ===
|
|
306
|
+
# Allow clients to send keepalive pings without active RPCs
|
|
307
|
+
# Without this, server rejects client keepalives with GOAWAY
|
|
308
|
+
("grpc.keepalive_permit_without_calls", True),
|
|
309
|
+
# Minimum interval server allows between client pings (10 seconds)
|
|
310
|
+
# Prevents "too_many_pings" GOAWAY errors
|
|
311
|
+
# Must match or be less than client's http2.min_time_between_pings_ms
|
|
312
|
+
("grpc.http2.min_ping_interval_without_data_ms", 10000),
|
|
228
313
|
],
|
|
229
|
-
description="
|
|
314
|
+
description="gRPC server options with keepalive support",
|
|
230
315
|
)
|
|
231
316
|
enable_reflection: bool = Field(default=True, description="Enable reflection for the server")
|
|
232
317
|
enable_health_check: bool = Field(default=True, description="Enable health check service")
|
|
@@ -1,28 +1,33 @@
|
|
|
1
1
|
"""This module contains the models for the modules."""
|
|
2
2
|
|
|
3
|
-
from digitalkin.models.module.module import Module, ModuleStatus
|
|
4
3
|
from digitalkin.models.module.module_context import ModuleContext
|
|
5
4
|
from digitalkin.models.module.module_types import (
|
|
6
5
|
DataModel,
|
|
7
6
|
DataTrigger,
|
|
8
|
-
DataTriggerT,
|
|
9
|
-
InputModelT,
|
|
10
|
-
OutputModelT,
|
|
11
|
-
SecretModelT,
|
|
12
7
|
SetupModel,
|
|
13
|
-
|
|
8
|
+
)
|
|
9
|
+
from digitalkin.models.module.tool_reference import (
|
|
10
|
+
ToolReference,
|
|
11
|
+
ToolReferenceConfig,
|
|
12
|
+
ToolSelectionMode,
|
|
13
|
+
)
|
|
14
|
+
from digitalkin.models.module.utility import (
|
|
15
|
+
EndOfStreamOutput,
|
|
16
|
+
ModuleStartInfoOutput,
|
|
17
|
+
UtilityProtocol,
|
|
18
|
+
UtilityRegistry,
|
|
14
19
|
)
|
|
15
20
|
|
|
16
21
|
__all__ = [
|
|
17
22
|
"DataModel",
|
|
18
23
|
"DataTrigger",
|
|
19
|
-
"
|
|
20
|
-
"InputModelT",
|
|
21
|
-
"Module",
|
|
24
|
+
"EndOfStreamOutput",
|
|
22
25
|
"ModuleContext",
|
|
23
|
-
"
|
|
24
|
-
"OutputModelT",
|
|
25
|
-
"SecretModelT",
|
|
26
|
+
"ModuleStartInfoOutput",
|
|
26
27
|
"SetupModel",
|
|
27
|
-
"
|
|
28
|
+
"ToolReference",
|
|
29
|
+
"ToolReferenceConfig",
|
|
30
|
+
"ToolSelectionMode",
|
|
31
|
+
"UtilityProtocol",
|
|
32
|
+
"UtilityRegistry",
|
|
28
33
|
]
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Base types for module models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from digitalkin.models.module.setup_types import SetupModel
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DataTrigger(BaseModel):
|
|
15
|
+
"""Defines the root input/output model exposing the protocol.
|
|
16
|
+
|
|
17
|
+
The mandatory protocol is important to define the module beahvior following the user or agent input/output.
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
class MyInput(DataModel):
|
|
21
|
+
root: DataTrigger
|
|
22
|
+
user_define_data: Any
|
|
23
|
+
|
|
24
|
+
# Usage
|
|
25
|
+
my_input = MyInput(root=DataTrigger(protocol="message"))
|
|
26
|
+
print(my_input.root.protocol) # Output: message
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
protocol: ClassVar[str]
|
|
30
|
+
created_at: str = Field(
|
|
31
|
+
default_factory=lambda: datetime.now(tz=timezone.utc).isoformat(),
|
|
32
|
+
title="Created At",
|
|
33
|
+
description="Timestamp when the payload was created.",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
DataTriggerT = TypeVar("DataTriggerT", bound=DataTrigger)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DataModel(BaseModel, Generic[DataTriggerT]):
|
|
41
|
+
"""Base definition of input/output model showing mandatory root fields.
|
|
42
|
+
|
|
43
|
+
The Model define the Module Input/output, usually referring to multiple input/output type defined by an union.
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
class ModuleInput(DataModel):
|
|
47
|
+
root: FileInput | MessageInput
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
root: DataTriggerT
|
|
51
|
+
annotations: dict[str, str] = Field(
|
|
52
|
+
default={},
|
|
53
|
+
title="Annotations",
|
|
54
|
+
description="Additional metadata or annotations related to the output. ex {'role': 'user'}",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
InputModelT = TypeVar("InputModelT", bound=DataModel)
|
|
59
|
+
OutputModelT = TypeVar("OutputModelT", bound=DataModel)
|
|
60
|
+
SecretModelT = TypeVar("SecretModelT", bound=BaseModel)
|
|
61
|
+
SetupModelT = TypeVar("SetupModelT", bound="SetupModel")
|