digitalkin 0.3.1.dev2__py3-none-any.whl → 0.3.2.dev14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. base_server/server_async_insecure.py +6 -5
  2. base_server/server_async_secure.py +6 -5
  3. base_server/server_sync_insecure.py +5 -4
  4. base_server/server_sync_secure.py +5 -4
  5. digitalkin/__version__.py +1 -1
  6. digitalkin/core/job_manager/base_job_manager.py +1 -1
  7. digitalkin/core/job_manager/single_job_manager.py +28 -9
  8. digitalkin/core/job_manager/taskiq_broker.py +7 -6
  9. digitalkin/core/job_manager/taskiq_job_manager.py +1 -1
  10. digitalkin/core/task_manager/surrealdb_repository.py +7 -7
  11. digitalkin/core/task_manager/task_session.py +60 -98
  12. digitalkin/grpc_servers/module_server.py +109 -168
  13. digitalkin/grpc_servers/module_servicer.py +38 -16
  14. digitalkin/grpc_servers/utils/grpc_client_wrapper.py +24 -8
  15. digitalkin/grpc_servers/utils/utility_schema_extender.py +100 -0
  16. digitalkin/models/__init__.py +1 -1
  17. digitalkin/models/core/job_manager_models.py +0 -8
  18. digitalkin/models/core/task_monitor.py +4 -0
  19. digitalkin/models/grpc_servers/models.py +91 -6
  20. digitalkin/models/module/__init__.py +18 -13
  21. digitalkin/models/module/base_types.py +61 -0
  22. digitalkin/models/module/module_context.py +173 -13
  23. digitalkin/models/module/module_types.py +28 -392
  24. digitalkin/models/module/setup_types.py +490 -0
  25. digitalkin/models/module/tool_cache.py +68 -0
  26. digitalkin/models/module/tool_reference.py +117 -0
  27. digitalkin/models/module/utility.py +167 -0
  28. digitalkin/models/services/registry.py +35 -0
  29. digitalkin/modules/__init__.py +5 -1
  30. digitalkin/modules/_base_module.py +154 -61
  31. digitalkin/modules/archetype_module.py +6 -1
  32. digitalkin/modules/tool_module.py +6 -1
  33. digitalkin/modules/triggers/__init__.py +8 -0
  34. digitalkin/modules/triggers/healthcheck_ping_trigger.py +45 -0
  35. digitalkin/modules/triggers/healthcheck_services_trigger.py +63 -0
  36. digitalkin/modules/triggers/healthcheck_status_trigger.py +52 -0
  37. digitalkin/services/__init__.py +4 -0
  38. digitalkin/services/communication/__init__.py +7 -0
  39. digitalkin/services/communication/communication_strategy.py +76 -0
  40. digitalkin/services/communication/default_communication.py +101 -0
  41. digitalkin/services/communication/grpc_communication.py +234 -0
  42. digitalkin/services/cost/grpc_cost.py +1 -1
  43. digitalkin/services/filesystem/grpc_filesystem.py +1 -1
  44. digitalkin/services/registry/__init__.py +22 -1
  45. digitalkin/services/registry/default_registry.py +135 -4
  46. digitalkin/services/registry/exceptions.py +47 -0
  47. digitalkin/services/registry/grpc_registry.py +306 -0
  48. digitalkin/services/registry/registry_models.py +15 -0
  49. digitalkin/services/registry/registry_strategy.py +88 -4
  50. digitalkin/services/services_config.py +25 -3
  51. digitalkin/services/services_models.py +5 -1
  52. digitalkin/services/setup/default_setup.py +1 -1
  53. digitalkin/services/setup/grpc_setup.py +1 -1
  54. digitalkin/services/storage/grpc_storage.py +1 -1
  55. digitalkin/services/user_profile/__init__.py +11 -0
  56. digitalkin/services/user_profile/grpc_user_profile.py +2 -2
  57. digitalkin/services/user_profile/user_profile_strategy.py +0 -15
  58. digitalkin/utils/schema_splitter.py +207 -0
  59. {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/METADATA +5 -5
  60. digitalkin-0.3.2.dev14.dist-info/RECORD +143 -0
  61. {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/top_level.txt +1 -0
  62. modules/archetype_with_tools_module.py +244 -0
  63. modules/cpu_intensive_module.py +1 -1
  64. modules/dynamic_setup_module.py +5 -29
  65. modules/minimal_llm_module.py +1 -1
  66. modules/text_transform_module.py +1 -1
  67. monitoring/digitalkin_observability/__init__.py +46 -0
  68. monitoring/digitalkin_observability/http_server.py +150 -0
  69. monitoring/digitalkin_observability/interceptors.py +176 -0
  70. monitoring/digitalkin_observability/metrics.py +201 -0
  71. monitoring/digitalkin_observability/prometheus.py +137 -0
  72. monitoring/tests/test_metrics.py +172 -0
  73. services/filesystem_module.py +7 -5
  74. services/storage_module.py +4 -2
  75. digitalkin/grpc_servers/registry_server.py +0 -65
  76. digitalkin/grpc_servers/registry_servicer.py +0 -456
  77. digitalkin-0.3.1.dev2.dist-info/RECORD +0 -119
  78. {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/WHEEL +0 -0
  79. {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/licenses/LICENSE +0 -0
@@ -5,7 +5,7 @@ from collections.abc import AsyncGenerator
5
5
  from typing import Any
6
6
 
7
7
  import grpc
8
- from digitalkin_proto.agentic_mesh_protocol.module.v1 import (
8
+ from agentic_mesh_protocol.module.v1 import (
9
9
  information_pb2,
10
10
  lifecycle_pb2,
11
11
  module_service_pb2_grpc,
@@ -19,6 +19,7 @@ from digitalkin.logger import logger
19
19
  from digitalkin.models.core.job_manager_models import JobManagerMode
20
20
  from digitalkin.models.module.module import ModuleStatus
21
21
  from digitalkin.modules._base_module import BaseModule
22
+ from digitalkin.services.registry import GrpcRegistry, RegistryStrategy
22
23
  from digitalkin.services.services_models import ServicesMode
23
24
  from digitalkin.services.setup.default_setup import DefaultSetup
24
25
  from digitalkin.services.setup.grpc_setup import GrpcSetup
@@ -40,6 +41,7 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
40
41
  args: Namespace
41
42
  setup: SetupStrategy
42
43
  job_manager: BaseJobManager
44
+ _registry_cache: RegistryStrategy | None = None
43
45
 
44
46
  def _add_parser_args(self, parser: ArgumentParser) -> None:
45
47
  super()._add_parser_args(parser)
@@ -82,6 +84,26 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
82
84
  )
83
85
  self.setup = GrpcSetup() if self.args.services_mode == ServicesMode.REMOTE else DefaultSetup()
84
86
 
87
+ def _get_registry(self) -> RegistryStrategy | None:
88
+ """Get a cached registry instance if configured.
89
+
90
+ Returns:
91
+ Cached GrpcRegistry instance if registry config exists, None otherwise.
92
+ """
93
+ if self._registry_cache is not None:
94
+ return self._registry_cache
95
+
96
+ registry_config = self.module_class.services_config_params.get("registry")
97
+ if not registry_config:
98
+ return None
99
+
100
+ client_config = registry_config.get("client_config")
101
+ if not client_config:
102
+ return None
103
+
104
+ self._registry_cache = GrpcRegistry("", "", "", client_config)
105
+ return self._registry_cache
106
+
85
107
  async def ConfigSetupModule( # noqa: N802
86
108
  self,
87
109
  request: lifecycle_pb2.ConfigSetupModuleRequest,
@@ -108,8 +130,6 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
108
130
  "mission_id": request.mission_id,
109
131
  },
110
132
  )
111
- # Process the module input
112
- # TODO: Secret should be used here as well
113
133
  setup_version = request.setup_version
114
134
  config_setup_data = self.module_class.create_config_setup_model(json_format.MessageToDict(request.content))
115
135
  setup_version_data = await self.module_class.create_setup_model(
@@ -139,8 +159,8 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
139
159
  return lifecycle_pb2.ConfigSetupModuleResponse(success=False)
140
160
 
141
161
  updated_setup_data = await self.job_manager.generate_config_setup_module_response(job_id)
142
- logger.info("Setup updated")
143
- logger.debug(f"Updated setup data: {updated_setup_data=}")
162
+ logger.info("Setup updated", extra={"job_id": job_id})
163
+ logger.debug("Updated setup data", extra={"job_id": job_id, "setup_data": updated_setup_data})
144
164
  setup_version.content = json_format.ParseDict(
145
165
  updated_setup_data,
146
166
  struct_pb2.Struct(),
@@ -219,16 +239,16 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
219
239
  yield lifecycle_pb2.StartModuleResponse(success=False, job_id=job_id)
220
240
  break
221
241
 
222
- if message.get("code", None) is not None and message.get("code") == "__END_OF_STREAM__":
242
+ logger.info("Yielding message from job %s: %s", job_id, message)
243
+ proto = json_format.ParseDict(message, struct_pb2.Struct(), ignore_unknown_fields=True)
244
+ yield lifecycle_pb2.StartModuleResponse(success=True, output=proto, job_id=job_id)
245
+
246
+ if message.get("root", {}).get("protocol") == "end_of_stream":
223
247
  logger.info(
224
- "End of stream via __END_OF_STREAM__",
248
+ "End of stream signal received",
225
249
  extra={"job_id": job_id, "mission_id": request.mission_id},
226
250
  )
227
251
  break
228
-
229
- logger.info("Yielding message from job %s: %s", job_id, message)
230
- proto = json_format.ParseDict(message, struct_pb2.Struct(), ignore_unknown_fields=True)
231
- yield lifecycle_pb2.StartModuleResponse(success=True, output=proto, job_id=job_id)
232
252
  finally:
233
253
  await self.job_manager.wait_for_completion(job_id)
234
254
  await self.job_manager.clean_session(job_id, mission_id=request.mission_id)
@@ -249,17 +269,19 @@ class ModuleServicer(module_service_pb2_grpc.ModuleServiceServicer, ArgParser):
249
269
  Returns:
250
270
  A response indicating success or failure.
251
271
  """
252
- logger.debug("StopModule called for module: '%s'", self.module_class.__name__)
272
+ logger.debug(
273
+ "StopModule called",
274
+ extra={"module_class": self.module_class.__name__, "job_id": request.job_id},
275
+ )
253
276
 
254
277
  response: bool = await self.job_manager.stop_module(request.job_id)
255
278
  if not response:
256
- message = f"Job {request.job_id} not found"
257
- logger.warning(message)
279
+ logger.warning("Job not found for stop request", extra={"job_id": request.job_id})
258
280
  context.set_code(grpc.StatusCode.NOT_FOUND)
259
- context.set_details(message)
281
+ context.set_details(f"Job {request.job_id} not found")
260
282
  return lifecycle_pb2.StopModuleResponse(success=False)
261
283
 
262
- logger.debug("Job %s stopped successfully", request.job_id, extra={"job_id": request.job_id})
284
+ logger.debug("Job stopped successfully", extra={"job_id": request.job_id})
263
285
  return lifecycle_pb2.StopModuleResponse(success=True)
264
286
 
265
287
  async def GetModuleStatus( # noqa: N802
@@ -43,9 +43,9 @@ class GrpcClientWrapper:
43
43
  private_key=private_key,
44
44
  )
45
45
 
46
- return grpc.secure_channel(config.address, channel_credentials, options=config.channel_options)
46
+ return grpc.secure_channel(config.address, channel_credentials, options=config.grpc_options)
47
47
  # Insecure channel
48
- return grpc.insecure_channel(config.address, options=config.channel_options)
48
+ return grpc.insecure_channel(config.address, options=config.grpc_options)
49
49
 
50
50
  def exec_grpc_query(self, query_endpoint: str, request: Any) -> Any: # noqa: ANN401
51
51
  """Execute a gRPC query with from the query's rpc endpoint name.
@@ -58,15 +58,31 @@ class GrpcClientWrapper:
58
58
  corresponding gRPC reponse.
59
59
 
60
60
  Raises:
61
- ServerError: gRPC error catching
61
+ ServerError: gRPC error catching with status code and details
62
62
  """
63
+ service_name = getattr(self, "service_name", "unknown")
63
64
  try:
64
- # Call the register method
65
- logger.debug("send request to %s", query_endpoint, extra={"request": request})
65
+ logger.debug(
66
+ "Sending gRPC request to %s",
67
+ query_endpoint,
68
+ extra={"request": str(request), "service": service_name},
69
+ )
66
70
  response = getattr(self.stub, query_endpoint)(request)
67
- logger.debug("receive response from request to %s", query_endpoint, extra={"response": response})
71
+ logger.debug(
72
+ "Received gRPC response from %s",
73
+ query_endpoint,
74
+ extra={"response": str(response), "service": service_name},
75
+ )
68
76
  except grpc.RpcError as e:
69
- logger.exception("RPC error during %s", query_endpoint, extra={"error": e.details()})
70
- raise ServerError
77
+ status_code = e.code().name if hasattr(e, "code") else "UNKNOWN"
78
+ details = e.details() if hasattr(e, "details") else str(e)
79
+ msg = f"[{status_code}] {details}"
80
+ logger.error(
81
+ "gRPC %s failed: %s",
82
+ query_endpoint,
83
+ msg,
84
+ extra={"service": service_name},
85
+ )
86
+ raise ServerError(msg) from e
71
87
  else:
72
88
  return response
@@ -0,0 +1,100 @@
1
+ """Utility schema extender for gRPC API responses.
2
+
3
+ This module extends module schemas with SDK utility protocols for API responses.
4
+ """
5
+
6
+ import types
7
+ from typing import Annotated, Union, get_args, get_origin
8
+
9
+ from pydantic import Field, create_model
10
+
11
+ from digitalkin.models.module.module_types import DataModel
12
+ from digitalkin.models.module.utility import (
13
+ EndOfStreamOutput,
14
+ HealthcheckPingInput,
15
+ HealthcheckPingOutput,
16
+ HealthcheckServicesInput,
17
+ HealthcheckServicesOutput,
18
+ HealthcheckStatusInput,
19
+ HealthcheckStatusOutput,
20
+ )
21
+
22
+
23
+ class UtilitySchemaExtender:
24
+ """Extends module schemas with SDK utility protocols for API responses.
25
+
26
+ This class provides methods to create extended Pydantic models that include
27
+ both user-defined protocols and SDK utility protocols in their schemas.
28
+ """
29
+
30
+ _output_protocols = (
31
+ EndOfStreamOutput,
32
+ HealthcheckPingOutput,
33
+ HealthcheckServicesOutput,
34
+ HealthcheckStatusOutput,
35
+ )
36
+
37
+ _input_protocols = (
38
+ HealthcheckPingInput,
39
+ HealthcheckServicesInput,
40
+ HealthcheckStatusInput,
41
+ )
42
+
43
+ @classmethod
44
+ def _extract_union_types(cls, annotation: type) -> tuple:
45
+ """Extract individual types from a Union or Annotated[Union, ...] annotation.
46
+
47
+ Returns:
48
+ A tuple of individual types contained in the Union.
49
+ """
50
+ if get_origin(annotation) is Annotated:
51
+ inner_args = get_args(annotation)
52
+ if inner_args:
53
+ return cls._extract_union_types(inner_args[0])
54
+ if get_origin(annotation) is Union or isinstance(annotation, types.UnionType):
55
+ return get_args(annotation)
56
+ return (annotation,)
57
+
58
+ @classmethod
59
+ def create_extended_output_model(cls, base_model: type[DataModel]) -> type[DataModel]:
60
+ """Create an extended output model that includes utility output protocols.
61
+
62
+ Args:
63
+ base_model: The module's output_format class (a DataModel subclass).
64
+
65
+ Returns:
66
+ A new DataModel subclass with root typed as Union[original_types, utility_types].
67
+ """
68
+ original_annotation = base_model.model_fields["root"].annotation
69
+ original_types = cls._extract_union_types(original_annotation)
70
+ extended_types = (*original_types, *cls._output_protocols)
71
+ union_type = Union[extended_types] # type: ignore[valid-type] # noqa: UP007
72
+ extended_root = Annotated[union_type, Field(discriminator="protocol")] # type: ignore[valid-type]
73
+ return create_model(
74
+ f"{base_model.__name__}Utilities",
75
+ __base__=DataModel,
76
+ root=(extended_root, ...),
77
+ annotations=(dict[str, str], Field(default={})),
78
+ )
79
+
80
+ @classmethod
81
+ def create_extended_input_model(cls, base_model: type[DataModel]) -> type[DataModel]:
82
+ """Create an extended input model that includes utility input protocols.
83
+
84
+ Args:
85
+ base_model: The module's input_format class (a DataModel subclass).
86
+
87
+ Returns:
88
+ A new DataModel subclass with root typed as Union[original_types, utility_types].
89
+ """
90
+ original_annotation = base_model.model_fields["root"].annotation
91
+ original_types = cls._extract_union_types(original_annotation)
92
+ extended_types = (*original_types, *cls._input_protocols)
93
+ union_type = Union[extended_types] # type: ignore[valid-type] # noqa: UP007
94
+ extended_root = Annotated[union_type, Field(discriminator="protocol")] # type: ignore[valid-type]
95
+ return create_model(
96
+ f"{base_model.__name__}Utilities",
97
+ __base__=DataModel,
98
+ root=(extended_root, ...),
99
+ annotations=(dict[str, str], Field(default={})),
100
+ )
@@ -1,6 +1,6 @@
1
1
  """This package contains the models for DigitalKin."""
2
2
 
3
- from digitalkin.models.module import Module, ModuleStatus
3
+ from digitalkin.models.module.module import Module, ModuleStatus
4
4
 
5
5
  __all__ = [
6
6
  "Module",
@@ -2,17 +2,9 @@
2
2
 
3
3
  from enum import Enum
4
4
 
5
- from pydantic import BaseModel
6
-
7
5
  from digitalkin.core.job_manager.base_job_manager import BaseJobManager
8
6
 
9
7
 
10
- class StreamCodeModel(BaseModel):
11
- """Typed error/code model."""
12
-
13
- code: str
14
-
15
-
16
8
  class JobManagerMode(Enum):
17
9
  """Job manager mode."""
18
10
 
@@ -55,6 +55,8 @@ class SignalMessage(BaseModel):
55
55
 
56
56
  task_id: str = Field(..., description="Unique identifier for the task")
57
57
  mission_id: str = Field(..., description="Identifier for the mission")
58
+ setup_id: str = Field(default="", description="Identifier for the setup")
59
+ setup_version_id: str = Field(default="", description="Identifier for the setup version")
58
60
  status: TaskStatus = Field(..., description="Current status of the task")
59
61
  action: SignalType = Field(..., description="Type of signal action")
60
62
  timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
@@ -67,4 +69,6 @@ class HeartbeatMessage(BaseModel):
67
69
 
68
70
  task_id: str = Field(..., description="Unique identifier for the task")
69
71
  mission_id: str = Field(..., description="Identifier for the mission")
72
+ setup_id: str = Field(default="", description="Identifier for the setup")
73
+ setup_version_id: str = Field(default="", description="Identifier for the setup version")
70
74
  timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
@@ -65,6 +65,42 @@ class ServerCredentials(BaseModel):
65
65
  return v
66
66
 
67
67
 
68
+ class RetryPolicy(BaseModel):
69
+ """gRPC retry policy configuration for resilient connections.
70
+
71
+ Attributes:
72
+ max_attempts: Maximum retry attempts including the original call
73
+ initial_backoff: Initial backoff duration (e.g., "0.1s")
74
+ max_backoff: Maximum backoff duration (e.g., "10s")
75
+ backoff_multiplier: Multiplier for exponential backoff
76
+ retryable_status_codes: gRPC status codes that trigger retry
77
+ """
78
+
79
+ max_attempts: int = Field(default=5, ge=1, le=10, description="Maximum retry attempts including the original call")
80
+ initial_backoff: str = Field(default="0.1s", description="Initial backoff duration (e.g., '0.1s')")
81
+ max_backoff: str = Field(default="10s", description="Maximum backoff duration (e.g., '10s')")
82
+ backoff_multiplier: float = Field(default=2.0, ge=1.0, description="Multiplier for exponential backoff")
83
+ retryable_status_codes: list[str] = Field(
84
+ default_factory=lambda: ["UNAVAILABLE", "RESOURCE_EXHAUSTED"],
85
+ description="gRPC status codes that trigger retry",
86
+ )
87
+
88
+ model_config = {"extra": "forbid", "frozen": True}
89
+
90
+ def to_service_config_json(self) -> str:
91
+ """Serialize to gRPC service config JSON string.
92
+
93
+ Returns:
94
+ JSON string for grpc.service_config channel option.
95
+ """
96
+ codes = "[" + ",".join(f'"{c}"' for c in self.retryable_status_codes) + "]"
97
+ return (
98
+ f'{{"methodConfig":[{{"name":[{{}}],"retryPolicy":{{"maxAttempts":{self.max_attempts},'
99
+ f'"initialBackoff":"{self.initial_backoff}","maxBackoff":"{self.max_backoff}",'
100
+ f'"backoffMultiplier":{self.backoff_multiplier},"retryableStatusCodes":{codes}}}}}]}}'
101
+ )
102
+
103
+
68
104
  class ClientCredentials(BaseModel):
69
105
  """Model for client credentials in secure mode.
70
106
 
@@ -170,15 +206,47 @@ class ClientConfig(ChannelConfig):
170
206
  security: Security mode (secure/insecure)
171
207
  credentials: Client credentials for secure mode
172
208
  channel_options: Additional channel options
209
+ retry_policy: Retry policy for failed RPCs
173
210
  """
174
211
 
175
212
  credentials: ClientCredentials | None = Field(None, description="Client credentials for secure mode")
213
+ retry_policy: RetryPolicy = Field(default_factory=lambda: RetryPolicy(), description="Retry policy for failed RPCs") # noqa: PLW0108
176
214
  channel_options: list[tuple[str, Any]] = Field(
177
215
  default_factory=lambda: [
178
- ("grpc.max_receive_message_length", 100 * 1024 * 1024), # 100MB
179
- ("grpc.max_send_message_length", 100 * 1024 * 1024), # 100MB
216
+ ("grpc.max_receive_message_length", 100 * 1024 * 1024),
217
+ ("grpc.max_send_message_length", 100 * 1024 * 1024),
218
+ # === DNS Re-resolution (Critical for Container Environments) ===
219
+ # Minimum milliseconds between DNS re-resolution attempts (500 ms)
220
+ # When connection fails, gRPC will re-query DNS after this interval
221
+ # Solves: Container restarts with new IPs causing "No route to host"
222
+ ("grpc.dns_min_time_between_resolutions_ms", 500),
223
+ # Initial delay before first reconnection attempt (1 second)
224
+ ("grpc.initial_reconnect_backoff_ms", 1000),
225
+ # Maximum delay between reconnection attempts (10 seconds)
226
+ # Prevents overwhelming the network during extended outages
227
+ ("grpc.max_reconnect_backoff_ms", 10000),
228
+ # Minimum delay between reconnection attempts (500ms)
229
+ # Ensures rapid recovery for brief network glitches
230
+ ("grpc.min_reconnect_backoff_ms", 500),
231
+ # === Keepalive Settings (Detect Dead Connections) ===
232
+ # Send keepalive ping every 60 seconds when connection is idle
233
+ # Proactively detects dead connections before RPC calls fail
234
+ ("grpc.keepalive_time_ms", 60000),
235
+ # Wait 20 seconds for keepalive response before declaring connection dead
236
+ # Triggers reconnection (with DNS re-resolution) if pong not received
237
+ ("grpc.keepalive_timeout_ms", 20000),
238
+ # Send keepalive pings even when no RPCs are in flight
239
+ # Essential for long-lived connections that may sit idle
240
+ ("grpc.keepalive_permit_without_calls", True),
241
+ # Minimum interval between HTTP/2 pings (30 seconds)
242
+ # Must be >= server's grpc.http2.min_ping_interval_without_data_ms (10s)
243
+ ("grpc.http2.min_time_between_pings_ms", 30000),
244
+ # === Retry Configuration ===
245
+ # Enable automatic retry for failed RPCs (1 = enabled)
246
+ # Works with retryable status codes: UNAVAILABLE, RESOURCE_EXHAUSTED
247
+ ("grpc.enable_retries", 1),
180
248
  ],
181
- description="Additional channel options",
249
+ description="Resilient gRPC channel options with DNS re-resolution, keepalive, and retries",
182
250
  )
183
251
 
184
252
  @field_validator("credentials")
@@ -204,6 +272,15 @@ class ClientConfig(ChannelConfig):
204
272
  raise ConfigurationError(msg)
205
273
  return v
206
274
 
275
+ @property
276
+ def grpc_options(self) -> list[tuple[str, Any]]:
277
+ """Get channel options with retry policy service config.
278
+
279
+ Returns:
280
+ Full list of gRPC channel options.
281
+ """
282
+ return [*self.channel_options, ("grpc.service_config", self.retry_policy.to_service_config_json())]
283
+
207
284
 
208
285
  class ServerConfig(ChannelConfig):
209
286
  """Base configuration for gRPC servers.
@@ -223,10 +300,18 @@ class ServerConfig(ChannelConfig):
223
300
  credentials: ServerCredentials | None = Field(None, description="Server credentials for secure mode")
224
301
  server_options: list[tuple[str, Any]] = Field(
225
302
  default_factory=lambda: [
226
- ("grpc.max_receive_message_length", 100 * 1024 * 1024), # 100MB
227
- ("grpc.max_send_message_length", 100 * 1024 * 1024), # 100MB
303
+ ("grpc.max_receive_message_length", 100 * 1024 * 1024),
304
+ ("grpc.max_send_message_length", 100 * 1024 * 1024),
305
+ # === Keepalive Permission (Required for Client Keepalive) ===
306
+ # Allow clients to send keepalive pings without active RPCs
307
+ # Without this, server rejects client keepalives with GOAWAY
308
+ ("grpc.keepalive_permit_without_calls", True),
309
+ # Minimum interval server allows between client pings (10 seconds)
310
+ # Prevents "too_many_pings" GOAWAY errors
311
+ # Must match or be less than client's http2.min_time_between_pings_ms
312
+ ("grpc.http2.min_ping_interval_without_data_ms", 10000),
228
313
  ],
229
- description="Additional server options",
314
+ description="gRPC server options with keepalive support",
230
315
  )
231
316
  enable_reflection: bool = Field(default=True, description="Enable reflection for the server")
232
317
  enable_health_check: bool = Field(default=True, description="Enable health check service")
@@ -1,28 +1,33 @@
1
1
  """This module contains the models for the modules."""
2
2
 
3
- from digitalkin.models.module.module import Module, ModuleStatus
4
3
  from digitalkin.models.module.module_context import ModuleContext
5
4
  from digitalkin.models.module.module_types import (
6
5
  DataModel,
7
6
  DataTrigger,
8
- DataTriggerT,
9
- InputModelT,
10
- OutputModelT,
11
- SecretModelT,
12
7
  SetupModel,
13
- SetupModelT,
8
+ )
9
+ from digitalkin.models.module.tool_reference import (
10
+ ToolReference,
11
+ ToolReferenceConfig,
12
+ ToolSelectionMode,
13
+ )
14
+ from digitalkin.models.module.utility import (
15
+ EndOfStreamOutput,
16
+ ModuleStartInfoOutput,
17
+ UtilityProtocol,
18
+ UtilityRegistry,
14
19
  )
15
20
 
16
21
  __all__ = [
17
22
  "DataModel",
18
23
  "DataTrigger",
19
- "DataTriggerT",
20
- "InputModelT",
21
- "Module",
24
+ "EndOfStreamOutput",
22
25
  "ModuleContext",
23
- "ModuleStatus",
24
- "OutputModelT",
25
- "SecretModelT",
26
+ "ModuleStartInfoOutput",
26
27
  "SetupModel",
27
- "SetupModelT",
28
+ "ToolReference",
29
+ "ToolReferenceConfig",
30
+ "ToolSelectionMode",
31
+ "UtilityProtocol",
32
+ "UtilityRegistry",
28
33
  ]
@@ -0,0 +1,61 @@
1
+ """Base types for module models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime, timezone
6
+ from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+ if TYPE_CHECKING:
11
+ from digitalkin.models.module.setup_types import SetupModel
12
+
13
+
14
+ class DataTrigger(BaseModel):
15
+ """Defines the root input/output model exposing the protocol.
16
+
17
+ The mandatory protocol is important to define the module beahvior following the user or agent input/output.
18
+
19
+ Example:
20
+ class MyInput(DataModel):
21
+ root: DataTrigger
22
+ user_define_data: Any
23
+
24
+ # Usage
25
+ my_input = MyInput(root=DataTrigger(protocol="message"))
26
+ print(my_input.root.protocol) # Output: message
27
+ """
28
+
29
+ protocol: ClassVar[str]
30
+ created_at: str = Field(
31
+ default_factory=lambda: datetime.now(tz=timezone.utc).isoformat(),
32
+ title="Created At",
33
+ description="Timestamp when the payload was created.",
34
+ )
35
+
36
+
37
+ DataTriggerT = TypeVar("DataTriggerT", bound=DataTrigger)
38
+
39
+
40
+ class DataModel(BaseModel, Generic[DataTriggerT]):
41
+ """Base definition of input/output model showing mandatory root fields.
42
+
43
+ The Model define the Module Input/output, usually referring to multiple input/output type defined by an union.
44
+
45
+ Example:
46
+ class ModuleInput(DataModel):
47
+ root: FileInput | MessageInput
48
+ """
49
+
50
+ root: DataTriggerT
51
+ annotations: dict[str, str] = Field(
52
+ default={},
53
+ title="Annotations",
54
+ description="Additional metadata or annotations related to the output. ex {'role': 'user'}",
55
+ )
56
+
57
+
58
+ InputModelT = TypeVar("InputModelT", bound=DataModel)
59
+ OutputModelT = TypeVar("OutputModelT", bound=DataModel)
60
+ SecretModelT = TypeVar("SecretModelT", bound=BaseModel)
61
+ SetupModelT = TypeVar("SetupModelT", bound="SetupModel")