digitalkin 0.2.25rc0__py3-none-any.whl → 0.3.2.dev14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. base_server/server_async_insecure.py +6 -5
  2. base_server/server_async_secure.py +6 -5
  3. base_server/server_sync_insecure.py +5 -4
  4. base_server/server_sync_secure.py +5 -4
  5. digitalkin/__version__.py +1 -1
  6. digitalkin/core/__init__.py +1 -0
  7. digitalkin/core/common/__init__.py +9 -0
  8. digitalkin/core/common/factories.py +156 -0
  9. digitalkin/core/job_manager/__init__.py +1 -0
  10. digitalkin/{modules → core}/job_manager/base_job_manager.py +138 -32
  11. digitalkin/core/job_manager/single_job_manager.py +373 -0
  12. digitalkin/{modules → core}/job_manager/taskiq_broker.py +121 -26
  13. digitalkin/core/job_manager/taskiq_job_manager.py +541 -0
  14. digitalkin/core/task_manager/__init__.py +1 -0
  15. digitalkin/core/task_manager/base_task_manager.py +539 -0
  16. digitalkin/core/task_manager/local_task_manager.py +108 -0
  17. digitalkin/core/task_manager/remote_task_manager.py +87 -0
  18. digitalkin/core/task_manager/surrealdb_repository.py +266 -0
  19. digitalkin/core/task_manager/task_executor.py +249 -0
  20. digitalkin/core/task_manager/task_session.py +368 -0
  21. digitalkin/grpc_servers/__init__.py +1 -19
  22. digitalkin/grpc_servers/_base_server.py +3 -3
  23. digitalkin/grpc_servers/module_server.py +120 -195
  24. digitalkin/grpc_servers/module_servicer.py +81 -44
  25. digitalkin/grpc_servers/utils/__init__.py +1 -0
  26. digitalkin/grpc_servers/utils/exceptions.py +0 -8
  27. digitalkin/grpc_servers/utils/grpc_client_wrapper.py +25 -9
  28. digitalkin/grpc_servers/utils/grpc_error_handler.py +53 -0
  29. digitalkin/grpc_servers/utils/utility_schema_extender.py +100 -0
  30. digitalkin/logger.py +64 -27
  31. digitalkin/mixins/__init__.py +19 -0
  32. digitalkin/mixins/base_mixin.py +10 -0
  33. digitalkin/mixins/callback_mixin.py +24 -0
  34. digitalkin/mixins/chat_history_mixin.py +110 -0
  35. digitalkin/mixins/cost_mixin.py +76 -0
  36. digitalkin/mixins/file_history_mixin.py +93 -0
  37. digitalkin/mixins/filesystem_mixin.py +46 -0
  38. digitalkin/mixins/logger_mixin.py +51 -0
  39. digitalkin/mixins/storage_mixin.py +79 -0
  40. digitalkin/models/__init__.py +1 -1
  41. digitalkin/models/core/__init__.py +1 -0
  42. digitalkin/{modules/job_manager → models/core}/job_manager_models.py +3 -11
  43. digitalkin/models/core/task_monitor.py +74 -0
  44. digitalkin/models/grpc_servers/__init__.py +1 -0
  45. digitalkin/{grpc_servers/utils → models/grpc_servers}/models.py +92 -7
  46. digitalkin/models/module/__init__.py +18 -11
  47. digitalkin/models/module/base_types.py +61 -0
  48. digitalkin/models/module/module.py +9 -1
  49. digitalkin/models/module/module_context.py +282 -6
  50. digitalkin/models/module/module_types.py +29 -105
  51. digitalkin/models/module/setup_types.py +490 -0
  52. digitalkin/models/module/tool_cache.py +68 -0
  53. digitalkin/models/module/tool_reference.py +117 -0
  54. digitalkin/models/module/utility.py +167 -0
  55. digitalkin/models/services/__init__.py +9 -0
  56. digitalkin/models/services/cost.py +1 -0
  57. digitalkin/models/services/registry.py +35 -0
  58. digitalkin/models/services/storage.py +39 -5
  59. digitalkin/modules/__init__.py +5 -1
  60. digitalkin/modules/_base_module.py +265 -167
  61. digitalkin/modules/archetype_module.py +6 -1
  62. digitalkin/modules/tool_module.py +16 -3
  63. digitalkin/modules/trigger_handler.py +7 -6
  64. digitalkin/modules/triggers/__init__.py +8 -0
  65. digitalkin/modules/triggers/healthcheck_ping_trigger.py +45 -0
  66. digitalkin/modules/triggers/healthcheck_services_trigger.py +63 -0
  67. digitalkin/modules/triggers/healthcheck_status_trigger.py +52 -0
  68. digitalkin/services/__init__.py +4 -0
  69. digitalkin/services/communication/__init__.py +7 -0
  70. digitalkin/services/communication/communication_strategy.py +76 -0
  71. digitalkin/services/communication/default_communication.py +101 -0
  72. digitalkin/services/communication/grpc_communication.py +234 -0
  73. digitalkin/services/cost/__init__.py +9 -2
  74. digitalkin/services/cost/grpc_cost.py +9 -42
  75. digitalkin/services/filesystem/default_filesystem.py +0 -2
  76. digitalkin/services/filesystem/grpc_filesystem.py +10 -39
  77. digitalkin/services/registry/__init__.py +22 -1
  78. digitalkin/services/registry/default_registry.py +135 -4
  79. digitalkin/services/registry/exceptions.py +47 -0
  80. digitalkin/services/registry/grpc_registry.py +306 -0
  81. digitalkin/services/registry/registry_models.py +15 -0
  82. digitalkin/services/registry/registry_strategy.py +88 -4
  83. digitalkin/services/services_config.py +25 -3
  84. digitalkin/services/services_models.py +5 -1
  85. digitalkin/services/setup/default_setup.py +6 -7
  86. digitalkin/services/setup/grpc_setup.py +52 -15
  87. digitalkin/services/storage/grpc_storage.py +4 -4
  88. digitalkin/services/user_profile/__init__.py +12 -0
  89. digitalkin/services/user_profile/default_user_profile.py +55 -0
  90. digitalkin/services/user_profile/grpc_user_profile.py +69 -0
  91. digitalkin/services/user_profile/user_profile_strategy.py +25 -0
  92. digitalkin/utils/__init__.py +28 -0
  93. digitalkin/utils/arg_parser.py +1 -1
  94. digitalkin/utils/development_mode_action.py +2 -2
  95. digitalkin/utils/dynamic_schema.py +483 -0
  96. digitalkin/utils/package_discover.py +1 -2
  97. digitalkin/utils/schema_splitter.py +207 -0
  98. {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/METADATA +11 -30
  99. digitalkin-0.3.2.dev14.dist-info/RECORD +143 -0
  100. {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/top_level.txt +1 -0
  101. modules/archetype_with_tools_module.py +244 -0
  102. modules/cpu_intensive_module.py +1 -1
  103. modules/dynamic_setup_module.py +338 -0
  104. modules/minimal_llm_module.py +1 -1
  105. modules/text_transform_module.py +1 -1
  106. monitoring/digitalkin_observability/__init__.py +46 -0
  107. monitoring/digitalkin_observability/http_server.py +150 -0
  108. monitoring/digitalkin_observability/interceptors.py +176 -0
  109. monitoring/digitalkin_observability/metrics.py +201 -0
  110. monitoring/digitalkin_observability/prometheus.py +137 -0
  111. monitoring/tests/test_metrics.py +172 -0
  112. services/filesystem_module.py +7 -5
  113. services/storage_module.py +4 -2
  114. digitalkin/grpc_servers/registry_server.py +0 -65
  115. digitalkin/grpc_servers/registry_servicer.py +0 -456
  116. digitalkin/grpc_servers/utils/factory.py +0 -180
  117. digitalkin/modules/job_manager/single_job_manager.py +0 -294
  118. digitalkin/modules/job_manager/taskiq_job_manager.py +0 -290
  119. digitalkin-0.2.25rc0.dist-info/RECORD +0 -89
  120. /digitalkin/{grpc_servers/utils → models/grpc_servers}/types.py +0 -0
  121. {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/WHEEL +0 -0
  122. {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,46 @@
1
+ """Filesystem Mixin to ease filesystem use."""
2
+
3
+ from typing import Any
4
+
5
+ from digitalkin.models.module.module_context import ModuleContext
6
+ from digitalkin.services.filesystem.filesystem_strategy import FilesystemRecord
7
+
8
+
9
+ class FilesystemMixin:
10
+ """Mixin providing filesystem operations through the filesystem strategy.
11
+
12
+ This mixin wraps filesystem strategy calls to provide a cleaner API
13
+ for file operations in trigger handlers.
14
+ """
15
+
16
+ @staticmethod
17
+ def upload_files(context: ModuleContext, files: list[Any]) -> tuple[list[FilesystemRecord], int, int]:
18
+ """Upload files using the filesystem strategy.
19
+
20
+ Args:
21
+ context: Module context containing the filesystem strategy
22
+ files: List of files to upload
23
+
24
+ Returns:
25
+ Tuple of (all_files, succeeded_files, failed_files)
26
+
27
+ Raises:
28
+ FilesystemServiceError: If upload operation fails
29
+ """
30
+ return context.filesystem.upload_files(files)
31
+
32
+ @staticmethod
33
+ def get_file(context: ModuleContext, file_id: str) -> FilesystemRecord:
34
+ """Retrieve a file by ID with the content.
35
+
36
+ Args:
37
+ context: Module context containing the filesystem strategy
38
+ file_id: Unique identifier for the file
39
+
40
+ Returns:
41
+ File object with metadata and optionally content
42
+
43
+ Raises:
44
+ FilesystemServiceError: If file retrieval fails
45
+ """
46
+ return context.filesystem.get_file(file_id, include_content=True)
@@ -0,0 +1,51 @@
1
+ """Logger Mixin to ease and merge every logs."""
2
+
3
+ from digitalkin.models.module.module_context import ModuleContext
4
+
5
+
6
+ class LoggerMixin:
7
+ """Mixin providing callback operations through the callbacks strategy.
8
+
9
+ This mixin wraps callback strategy calls to provide a cleaner API
10
+ for logging and messaging in trigger handlers.
11
+ """
12
+
13
+ @staticmethod
14
+ def log_debug(context: ModuleContext, message: str) -> None:
15
+ """Log debug message using the callbacks strategy.
16
+
17
+ Args:
18
+ context: Module context containing the callbacks strategy
19
+ message: Debug message to log
20
+ """
21
+ return context.callbacks.logger.debug(message, extra=context.session.current_ids())
22
+
23
+ @staticmethod
24
+ def log_info(context: ModuleContext, message: str) -> None:
25
+ """Log info message using the callbacks strategy.
26
+
27
+ Args:
28
+ context: Module context containing the callbacks strategy
29
+ message: Info message to log
30
+ """
31
+ return context.callbacks.logger.info(message, extra=context.session.current_ids())
32
+
33
+ @staticmethod
34
+ def log_warning(context: ModuleContext, message: str) -> None:
35
+ """Log warning message using the callbacks strategy.
36
+
37
+ Args:
38
+ context: Module context containing the callbacks strategy
39
+ message: Warning message to log
40
+ """
41
+ return context.callbacks.logger.warning(message, extra=context.session.current_ids())
42
+
43
+ @staticmethod
44
+ def log_error(context: ModuleContext, message: str) -> None:
45
+ """Log error message using the callbacks strategy.
46
+
47
+ Args:
48
+ context: Module context containing the callbacks strategy
49
+ message: Error message to log
50
+ """
51
+ return context.callbacks.logger.error(message, extra=context.session.current_ids())
@@ -0,0 +1,79 @@
1
+ """Storage Mixin to ease storage access in Triggers."""
2
+
3
+ from typing import Any, Literal
4
+
5
+ from digitalkin.models.module.module_context import ModuleContext
6
+ from digitalkin.services.storage.storage_strategy import StorageRecord
7
+
8
+
9
+ class StorageMixin:
10
+ """Mixin providing storage operations through the storage strategy.
11
+
12
+ This mixin wraps storage strategy calls to provide a cleaner API
13
+ for trigger handlers.
14
+ """
15
+
16
+ @staticmethod
17
+ def store_storage(
18
+ context: ModuleContext,
19
+ collection: str,
20
+ record_id: str | None,
21
+ data: dict[str, Any],
22
+ data_type: Literal["OUTPUT", "VIEW", "LOGS", "OTHER"] = "OUTPUT",
23
+ ) -> StorageRecord:
24
+ """Store data using the storage strategy.
25
+
26
+ Args:
27
+ context: Module context containing the storage strategy
28
+ collection: Collection name for the data
29
+ record_id: Optional record identifier
30
+ data: Data to store
31
+ data_type: Type of data being stored
32
+
33
+ Returns:
34
+ Result from the storage strategy
35
+
36
+ Raises:
37
+ StorageServiceError: If storage operation fails
38
+ """
39
+ return context.storage.store(collection, record_id, data, data_type=data_type)
40
+
41
+ @staticmethod
42
+ def read_storage(context: ModuleContext, collection: str, record_id: str) -> StorageRecord | None:
43
+ """Read data from storage.
44
+
45
+ Args:
46
+ context: Module context containing the storage strategy
47
+ collection: Collection name
48
+ record_id: Record identifier
49
+
50
+ Returns:
51
+ Retrieved data
52
+
53
+ Raises:
54
+ StorageServiceError: If read operation fails
55
+ """
56
+ return context.storage.read(collection, record_id)
57
+
58
+ @staticmethod
59
+ def update_storage(
60
+ context: ModuleContext,
61
+ collection: str,
62
+ record_id: str,
63
+ data: dict[str, Any],
64
+ ) -> StorageRecord | None:
65
+ """Update existing data in storage.
66
+
67
+ Args:
68
+ context: Module context containing the storage strategy
69
+ collection: Collection name
70
+ record_id: Record identifier
71
+ data: Updated data
72
+
73
+ Returns:
74
+ Result from the storage strategy
75
+
76
+ Raises:
77
+ StorageServiceError: If update operation fails
78
+ """
79
+ return context.storage.update(collection, record_id, data)
@@ -1,6 +1,6 @@
1
1
  """This package contains the models for DigitalKin."""
2
2
 
3
- from digitalkin.models.module import Module, ModuleStatus
3
+ from digitalkin.models.module.module import Module, ModuleStatus
4
4
 
5
5
  __all__ = [
6
6
  "Module",
@@ -0,0 +1 @@
1
+ """Core models."""
@@ -2,15 +2,7 @@
2
2
 
3
3
  from enum import Enum
4
4
 
5
- from pydantic import BaseModel
6
-
7
- from digitalkin.modules.job_manager.base_job_manager import BaseJobManager
8
-
9
-
10
- class StreamCodeModel(BaseModel):
11
- """Typed error/code model."""
12
-
13
- code: str
5
+ from digitalkin.core.job_manager.base_job_manager import BaseJobManager
14
6
 
15
7
 
16
8
  class JobManagerMode(Enum):
@@ -35,10 +27,10 @@ class JobManagerMode(Enum):
35
27
  """
36
28
  match self:
37
29
  case JobManagerMode.SINGLE:
38
- from digitalkin.modules.job_manager.single_job_manager import SingleJobManager # noqa: PLC0415
30
+ from digitalkin.core.job_manager.single_job_manager import SingleJobManager # noqa: PLC0415
39
31
 
40
32
  return SingleJobManager
41
33
  case JobManagerMode.TASKIQ:
42
- from digitalkin.modules.job_manager.taskiq_job_manager import TaskiqJobManager # noqa: PLC0415
34
+ from digitalkin.core.job_manager.taskiq_job_manager import TaskiqJobManager # noqa: PLC0415
43
35
 
44
36
  return TaskiqJobManager
@@ -0,0 +1,74 @@
1
+ """Task monitoring models for signaling and heartbeat messages."""
2
+
3
+ from datetime import datetime, timezone
4
+ from enum import Enum
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class TaskStatus(Enum):
11
+ """Task status enumeration."""
12
+
13
+ PENDING = "pending"
14
+ RUNNING = "running"
15
+ CANCELLED = "cancelled"
16
+ COMPLETED = "completed"
17
+ FAILED = "failed"
18
+
19
+
20
+ class CancellationReason(Enum):
21
+ """Reason for task cancellation - helps distinguish cleanup vs real cancellation."""
22
+
23
+ # Cleanup cancellations (not errors)
24
+ SUCCESS_CLEANUP = "success_cleanup" # Main task completed, cleaning up helper tasks
25
+ FAILURE_CLEANUP = "failure_cleanup" # Main task failed, cleaning up helper tasks
26
+
27
+ # Real cancellations
28
+ SIGNAL = "signal" # External signal requested cancellation
29
+ HEARTBEAT_FAILURE = "heartbeat_failure" # Heartbeat stopped working
30
+ TIMEOUT = "timeout" # Task timed out
31
+ SHUTDOWN = "shutdown" # Manager is shutting down
32
+
33
+ # Unknown/unset
34
+ UNKNOWN = "unknown" # Reason not determined
35
+
36
+
37
+ class SignalType(Enum):
38
+ """Signal type enumeration."""
39
+
40
+ START = "start"
41
+ STOP = "stop"
42
+ CANCEL = "cancel"
43
+ PAUSE = "pause"
44
+ RESUME = "resume"
45
+ STATUS = "status"
46
+
47
+ ACK_CANCEL = "ack_cancel"
48
+ ACK_PAUSE = "ack_pause"
49
+ ACK_RESUME = "ack_resume"
50
+ ACK_STATUS = "ack_status"
51
+
52
+
53
+ class SignalMessage(BaseModel):
54
+ """Signal message model for task monitoring."""
55
+
56
+ task_id: str = Field(..., description="Unique identifier for the task")
57
+ mission_id: str = Field(..., description="Identifier for the mission")
58
+ setup_id: str = Field(default="", description="Identifier for the setup")
59
+ setup_version_id: str = Field(default="", description="Identifier for the setup version")
60
+ status: TaskStatus = Field(..., description="Current status of the task")
61
+ action: SignalType = Field(..., description="Type of signal action")
62
+ timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
63
+ payload: dict[str, Any] = Field(default={}, description="Optional payload for the signal")
64
+ model_config = {"use_enum_values": True}
65
+
66
+
67
+ class HeartbeatMessage(BaseModel):
68
+ """Heartbeat message model for task monitoring."""
69
+
70
+ task_id: str = Field(..., description="Unique identifier for the task")
71
+ mission_id: str = Field(..., description="Identifier for the mission")
72
+ setup_id: str = Field(default="", description="Identifier for the setup")
73
+ setup_version_id: str = Field(default="", description="Identifier for the setup version")
74
+ timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
@@ -0,0 +1 @@
1
+ """Base gRPC server and client models."""
@@ -65,6 +65,42 @@ class ServerCredentials(BaseModel):
65
65
  return v
66
66
 
67
67
 
68
+ class RetryPolicy(BaseModel):
69
+ """gRPC retry policy configuration for resilient connections.
70
+
71
+ Attributes:
72
+ max_attempts: Maximum retry attempts including the original call
73
+ initial_backoff: Initial backoff duration (e.g., "0.1s")
74
+ max_backoff: Maximum backoff duration (e.g., "10s")
75
+ backoff_multiplier: Multiplier for exponential backoff
76
+ retryable_status_codes: gRPC status codes that trigger retry
77
+ """
78
+
79
+ max_attempts: int = Field(default=5, ge=1, le=10, description="Maximum retry attempts including the original call")
80
+ initial_backoff: str = Field(default="0.1s", description="Initial backoff duration (e.g., '0.1s')")
81
+ max_backoff: str = Field(default="10s", description="Maximum backoff duration (e.g., '10s')")
82
+ backoff_multiplier: float = Field(default=2.0, ge=1.0, description="Multiplier for exponential backoff")
83
+ retryable_status_codes: list[str] = Field(
84
+ default_factory=lambda: ["UNAVAILABLE", "RESOURCE_EXHAUSTED"],
85
+ description="gRPC status codes that trigger retry",
86
+ )
87
+
88
+ model_config = {"extra": "forbid", "frozen": True}
89
+
90
+ def to_service_config_json(self) -> str:
91
+ """Serialize to gRPC service config JSON string.
92
+
93
+ Returns:
94
+ JSON string for grpc.service_config channel option.
95
+ """
96
+ codes = "[" + ",".join(f'"{c}"' for c in self.retryable_status_codes) + "]"
97
+ return (
98
+ f'{{"methodConfig":[{{"name":[{{}}],"retryPolicy":{{"maxAttempts":{self.max_attempts},'
99
+ f'"initialBackoff":"{self.initial_backoff}","maxBackoff":"{self.max_backoff}",'
100
+ f'"backoffMultiplier":{self.backoff_multiplier},"retryableStatusCodes":{codes}}}}}]}}'
101
+ )
102
+
103
+
68
104
  class ClientCredentials(BaseModel):
69
105
  """Model for client credentials in secure mode.
70
106
 
@@ -170,15 +206,47 @@ class ClientConfig(ChannelConfig):
170
206
  security: Security mode (secure/insecure)
171
207
  credentials: Client credentials for secure mode
172
208
  channel_options: Additional channel options
209
+ retry_policy: Retry policy for failed RPCs
173
210
  """
174
211
 
175
212
  credentials: ClientCredentials | None = Field(None, description="Client credentials for secure mode")
213
+ retry_policy: RetryPolicy = Field(default_factory=lambda: RetryPolicy(), description="Retry policy for failed RPCs") # noqa: PLW0108
176
214
  channel_options: list[tuple[str, Any]] = Field(
177
215
  default_factory=lambda: [
178
- ("grpc.max_receive_message_length", 50 * 1024 * 1024), # 50MB
179
- ("grpc.max_send_message_length", 50 * 1024 * 1024), # 50MB
216
+ ("grpc.max_receive_message_length", 100 * 1024 * 1024),
217
+ ("grpc.max_send_message_length", 100 * 1024 * 1024),
218
+ # === DNS Re-resolution (Critical for Container Environments) ===
219
+ # Minimum milliseconds between DNS re-resolution attempts (500 ms)
220
+ # When connection fails, gRPC will re-query DNS after this interval
221
+ # Solves: Container restarts with new IPs causing "No route to host"
222
+ ("grpc.dns_min_time_between_resolutions_ms", 500),
223
+ # Initial delay before first reconnection attempt (1 second)
224
+ ("grpc.initial_reconnect_backoff_ms", 1000),
225
+ # Maximum delay between reconnection attempts (10 seconds)
226
+ # Prevents overwhelming the network during extended outages
227
+ ("grpc.max_reconnect_backoff_ms", 10000),
228
+ # Minimum delay between reconnection attempts (500ms)
229
+ # Ensures rapid recovery for brief network glitches
230
+ ("grpc.min_reconnect_backoff_ms", 500),
231
+ # === Keepalive Settings (Detect Dead Connections) ===
232
+ # Send keepalive ping every 60 seconds when connection is idle
233
+ # Proactively detects dead connections before RPC calls fail
234
+ ("grpc.keepalive_time_ms", 60000),
235
+ # Wait 20 seconds for keepalive response before declaring connection dead
236
+ # Triggers reconnection (with DNS re-resolution) if pong not received
237
+ ("grpc.keepalive_timeout_ms", 20000),
238
+ # Send keepalive pings even when no RPCs are in flight
239
+ # Essential for long-lived connections that may sit idle
240
+ ("grpc.keepalive_permit_without_calls", True),
241
+ # Minimum interval between HTTP/2 pings (30 seconds)
242
+ # Must be >= server's grpc.http2.min_ping_interval_without_data_ms (10s)
243
+ ("grpc.http2.min_time_between_pings_ms", 30000),
244
+ # === Retry Configuration ===
245
+ # Enable automatic retry for failed RPCs (1 = enabled)
246
+ # Works with retryable status codes: UNAVAILABLE, RESOURCE_EXHAUSTED
247
+ ("grpc.enable_retries", 1),
180
248
  ],
181
- description="Additional channel options",
249
+ description="Resilient gRPC channel options with DNS re-resolution, keepalive, and retries",
182
250
  )
183
251
 
184
252
  @field_validator("credentials")
@@ -204,6 +272,15 @@ class ClientConfig(ChannelConfig):
204
272
  raise ConfigurationError(msg)
205
273
  return v
206
274
 
275
+ @property
276
+ def grpc_options(self) -> list[tuple[str, Any]]:
277
+ """Get channel options with retry policy service config.
278
+
279
+ Returns:
280
+ Full list of gRPC channel options.
281
+ """
282
+ return [*self.channel_options, ("grpc.service_config", self.retry_policy.to_service_config_json())]
283
+
207
284
 
208
285
  class ServerConfig(ChannelConfig):
209
286
  """Base configuration for gRPC servers.
@@ -223,10 +300,18 @@ class ServerConfig(ChannelConfig):
223
300
  credentials: ServerCredentials | None = Field(None, description="Server credentials for secure mode")
224
301
  server_options: list[tuple[str, Any]] = Field(
225
302
  default_factory=lambda: [
226
- ("grpc.max_receive_message_length", 50 * 1024 * 1024), # 50MB
227
- ("grpc.max_send_message_length", 50 * 1024 * 1024), # 50MB
303
+ ("grpc.max_receive_message_length", 100 * 1024 * 1024),
304
+ ("grpc.max_send_message_length", 100 * 1024 * 1024),
305
+ # === Keepalive Permission (Required for Client Keepalive) ===
306
+ # Allow clients to send keepalive pings without active RPCs
307
+ # Without this, server rejects client keepalives with GOAWAY
308
+ ("grpc.keepalive_permit_without_calls", True),
309
+ # Minimum interval server allows between client pings (10 seconds)
310
+ # Prevents "too_many_pings" GOAWAY errors
311
+ # Must match or be less than client's http2.min_time_between_pings_ms
312
+ ("grpc.http2.min_ping_interval_without_data_ms", 10000),
228
313
  ],
229
- description="Additional server options",
314
+ description="gRPC server options with keepalive support",
230
315
  )
231
316
  enable_reflection: bool = Field(default=True, description="Enable reflection for the server")
232
317
  enable_health_check: bool = Field(default=True, description="Enable health check service")
@@ -262,7 +347,7 @@ class ModuleServerConfig(ServerConfig):
262
347
  registry_address: Address of the registry server
263
348
  """
264
349
 
265
- registry_address: str | None = Field(None, description="Address of the registry server")
350
+ registry_address: str = Field(..., description="Address of the registry server")
266
351
 
267
352
 
268
353
  class RegistryServerConfig(ServerConfig):
@@ -1,26 +1,33 @@
1
1
  """This module contains the models for the modules."""
2
2
 
3
- from digitalkin.models.module.module import Module, ModuleStatus
4
3
  from digitalkin.models.module.module_context import ModuleContext
5
4
  from digitalkin.models.module.module_types import (
6
5
  DataModel,
7
6
  DataTrigger,
8
- InputModelT,
9
- OutputModelT,
10
- SecretModelT,
11
7
  SetupModel,
12
- SetupModelT,
8
+ )
9
+ from digitalkin.models.module.tool_reference import (
10
+ ToolReference,
11
+ ToolReferenceConfig,
12
+ ToolSelectionMode,
13
+ )
14
+ from digitalkin.models.module.utility import (
15
+ EndOfStreamOutput,
16
+ ModuleStartInfoOutput,
17
+ UtilityProtocol,
18
+ UtilityRegistry,
13
19
  )
14
20
 
15
21
  __all__ = [
16
22
  "DataModel",
17
23
  "DataTrigger",
18
- "InputModelT",
19
- "Module",
24
+ "EndOfStreamOutput",
20
25
  "ModuleContext",
21
- "ModuleStatus",
22
- "OutputModelT",
23
- "SecretModelT",
26
+ "ModuleStartInfoOutput",
24
27
  "SetupModel",
25
- "SetupModelT",
28
+ "ToolReference",
29
+ "ToolReferenceConfig",
30
+ "ToolSelectionMode",
31
+ "UtilityProtocol",
32
+ "UtilityRegistry",
26
33
  ]
@@ -0,0 +1,61 @@
1
+ """Base types for module models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime, timezone
6
+ from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+ if TYPE_CHECKING:
11
+ from digitalkin.models.module.setup_types import SetupModel
12
+
13
+
14
+ class DataTrigger(BaseModel):
15
+ """Defines the root input/output model exposing the protocol.
16
+
17
+ The mandatory protocol is important to define the module beahvior following the user or agent input/output.
18
+
19
+ Example:
20
+ class MyInput(DataModel):
21
+ root: DataTrigger
22
+ user_define_data: Any
23
+
24
+ # Usage
25
+ my_input = MyInput(root=DataTrigger(protocol="message"))
26
+ print(my_input.root.protocol) # Output: message
27
+ """
28
+
29
+ protocol: ClassVar[str]
30
+ created_at: str = Field(
31
+ default_factory=lambda: datetime.now(tz=timezone.utc).isoformat(),
32
+ title="Created At",
33
+ description="Timestamp when the payload was created.",
34
+ )
35
+
36
+
37
+ DataTriggerT = TypeVar("DataTriggerT", bound=DataTrigger)
38
+
39
+
40
+ class DataModel(BaseModel, Generic[DataTriggerT]):
41
+ """Base definition of input/output model showing mandatory root fields.
42
+
43
+ The Model define the Module Input/output, usually referring to multiple input/output type defined by an union.
44
+
45
+ Example:
46
+ class ModuleInput(DataModel):
47
+ root: FileInput | MessageInput
48
+ """
49
+
50
+ root: DataTriggerT
51
+ annotations: dict[str, str] = Field(
52
+ default={},
53
+ title="Annotations",
54
+ description="Additional metadata or annotations related to the output. ex {'role': 'user'}",
55
+ )
56
+
57
+
58
+ InputModelT = TypeVar("InputModelT", bound=DataModel)
59
+ OutputModelT = TypeVar("OutputModelT", bound=DataModel)
60
+ SecretModelT = TypeVar("SecretModelT", bound=BaseModel)
61
+ SetupModelT = TypeVar("SetupModelT", bound="SetupModel")
@@ -2,7 +2,15 @@
2
2
 
3
3
  from enum import Enum, auto
4
4
 
5
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class ModuleCodeModel(BaseModel):
9
+ """typed error/code model."""
10
+
11
+ code: str = Field(...)
12
+ message: str | None = Field(default=None)
13
+ short_description: str | None = Field(default=None)
6
14
 
7
15
 
8
16
  class ModuleStatus(Enum):