digitalkin 0.2.25rc0__py3-none-any.whl → 0.3.2.dev14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- base_server/server_async_insecure.py +6 -5
- base_server/server_async_secure.py +6 -5
- base_server/server_sync_insecure.py +5 -4
- base_server/server_sync_secure.py +5 -4
- digitalkin/__version__.py +1 -1
- digitalkin/core/__init__.py +1 -0
- digitalkin/core/common/__init__.py +9 -0
- digitalkin/core/common/factories.py +156 -0
- digitalkin/core/job_manager/__init__.py +1 -0
- digitalkin/{modules → core}/job_manager/base_job_manager.py +138 -32
- digitalkin/core/job_manager/single_job_manager.py +373 -0
- digitalkin/{modules → core}/job_manager/taskiq_broker.py +121 -26
- digitalkin/core/job_manager/taskiq_job_manager.py +541 -0
- digitalkin/core/task_manager/__init__.py +1 -0
- digitalkin/core/task_manager/base_task_manager.py +539 -0
- digitalkin/core/task_manager/local_task_manager.py +108 -0
- digitalkin/core/task_manager/remote_task_manager.py +87 -0
- digitalkin/core/task_manager/surrealdb_repository.py +266 -0
- digitalkin/core/task_manager/task_executor.py +249 -0
- digitalkin/core/task_manager/task_session.py +368 -0
- digitalkin/grpc_servers/__init__.py +1 -19
- digitalkin/grpc_servers/_base_server.py +3 -3
- digitalkin/grpc_servers/module_server.py +120 -195
- digitalkin/grpc_servers/module_servicer.py +81 -44
- digitalkin/grpc_servers/utils/__init__.py +1 -0
- digitalkin/grpc_servers/utils/exceptions.py +0 -8
- digitalkin/grpc_servers/utils/grpc_client_wrapper.py +25 -9
- digitalkin/grpc_servers/utils/grpc_error_handler.py +53 -0
- digitalkin/grpc_servers/utils/utility_schema_extender.py +100 -0
- digitalkin/logger.py +64 -27
- digitalkin/mixins/__init__.py +19 -0
- digitalkin/mixins/base_mixin.py +10 -0
- digitalkin/mixins/callback_mixin.py +24 -0
- digitalkin/mixins/chat_history_mixin.py +110 -0
- digitalkin/mixins/cost_mixin.py +76 -0
- digitalkin/mixins/file_history_mixin.py +93 -0
- digitalkin/mixins/filesystem_mixin.py +46 -0
- digitalkin/mixins/logger_mixin.py +51 -0
- digitalkin/mixins/storage_mixin.py +79 -0
- digitalkin/models/__init__.py +1 -1
- digitalkin/models/core/__init__.py +1 -0
- digitalkin/{modules/job_manager → models/core}/job_manager_models.py +3 -11
- digitalkin/models/core/task_monitor.py +74 -0
- digitalkin/models/grpc_servers/__init__.py +1 -0
- digitalkin/{grpc_servers/utils → models/grpc_servers}/models.py +92 -7
- digitalkin/models/module/__init__.py +18 -11
- digitalkin/models/module/base_types.py +61 -0
- digitalkin/models/module/module.py +9 -1
- digitalkin/models/module/module_context.py +282 -6
- digitalkin/models/module/module_types.py +29 -105
- digitalkin/models/module/setup_types.py +490 -0
- digitalkin/models/module/tool_cache.py +68 -0
- digitalkin/models/module/tool_reference.py +117 -0
- digitalkin/models/module/utility.py +167 -0
- digitalkin/models/services/__init__.py +9 -0
- digitalkin/models/services/cost.py +1 -0
- digitalkin/models/services/registry.py +35 -0
- digitalkin/models/services/storage.py +39 -5
- digitalkin/modules/__init__.py +5 -1
- digitalkin/modules/_base_module.py +265 -167
- digitalkin/modules/archetype_module.py +6 -1
- digitalkin/modules/tool_module.py +16 -3
- digitalkin/modules/trigger_handler.py +7 -6
- digitalkin/modules/triggers/__init__.py +8 -0
- digitalkin/modules/triggers/healthcheck_ping_trigger.py +45 -0
- digitalkin/modules/triggers/healthcheck_services_trigger.py +63 -0
- digitalkin/modules/triggers/healthcheck_status_trigger.py +52 -0
- digitalkin/services/__init__.py +4 -0
- digitalkin/services/communication/__init__.py +7 -0
- digitalkin/services/communication/communication_strategy.py +76 -0
- digitalkin/services/communication/default_communication.py +101 -0
- digitalkin/services/communication/grpc_communication.py +234 -0
- digitalkin/services/cost/__init__.py +9 -2
- digitalkin/services/cost/grpc_cost.py +9 -42
- digitalkin/services/filesystem/default_filesystem.py +0 -2
- digitalkin/services/filesystem/grpc_filesystem.py +10 -39
- digitalkin/services/registry/__init__.py +22 -1
- digitalkin/services/registry/default_registry.py +135 -4
- digitalkin/services/registry/exceptions.py +47 -0
- digitalkin/services/registry/grpc_registry.py +306 -0
- digitalkin/services/registry/registry_models.py +15 -0
- digitalkin/services/registry/registry_strategy.py +88 -4
- digitalkin/services/services_config.py +25 -3
- digitalkin/services/services_models.py +5 -1
- digitalkin/services/setup/default_setup.py +6 -7
- digitalkin/services/setup/grpc_setup.py +52 -15
- digitalkin/services/storage/grpc_storage.py +4 -4
- digitalkin/services/user_profile/__init__.py +12 -0
- digitalkin/services/user_profile/default_user_profile.py +55 -0
- digitalkin/services/user_profile/grpc_user_profile.py +69 -0
- digitalkin/services/user_profile/user_profile_strategy.py +25 -0
- digitalkin/utils/__init__.py +28 -0
- digitalkin/utils/arg_parser.py +1 -1
- digitalkin/utils/development_mode_action.py +2 -2
- digitalkin/utils/dynamic_schema.py +483 -0
- digitalkin/utils/package_discover.py +1 -2
- digitalkin/utils/schema_splitter.py +207 -0
- {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/METADATA +11 -30
- digitalkin-0.3.2.dev14.dist-info/RECORD +143 -0
- {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/top_level.txt +1 -0
- modules/archetype_with_tools_module.py +244 -0
- modules/cpu_intensive_module.py +1 -1
- modules/dynamic_setup_module.py +338 -0
- modules/minimal_llm_module.py +1 -1
- modules/text_transform_module.py +1 -1
- monitoring/digitalkin_observability/__init__.py +46 -0
- monitoring/digitalkin_observability/http_server.py +150 -0
- monitoring/digitalkin_observability/interceptors.py +176 -0
- monitoring/digitalkin_observability/metrics.py +201 -0
- monitoring/digitalkin_observability/prometheus.py +137 -0
- monitoring/tests/test_metrics.py +172 -0
- services/filesystem_module.py +7 -5
- services/storage_module.py +4 -2
- digitalkin/grpc_servers/registry_server.py +0 -65
- digitalkin/grpc_servers/registry_servicer.py +0 -456
- digitalkin/grpc_servers/utils/factory.py +0 -180
- digitalkin/modules/job_manager/single_job_manager.py +0 -294
- digitalkin/modules/job_manager/taskiq_job_manager.py +0 -290
- digitalkin-0.2.25rc0.dist-info/RECORD +0 -89
- /digitalkin/{grpc_servers/utils → models/grpc_servers}/types.py +0 -0
- {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/WHEEL +0 -0
- {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Filesystem Mixin to ease filesystem use."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from digitalkin.models.module.module_context import ModuleContext
|
|
6
|
+
from digitalkin.services.filesystem.filesystem_strategy import FilesystemRecord
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FilesystemMixin:
|
|
10
|
+
"""Mixin providing filesystem operations through the filesystem strategy.
|
|
11
|
+
|
|
12
|
+
This mixin wraps filesystem strategy calls to provide a cleaner API
|
|
13
|
+
for file operations in trigger handlers.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def upload_files(context: ModuleContext, files: list[Any]) -> tuple[list[FilesystemRecord], int, int]:
|
|
18
|
+
"""Upload files using the filesystem strategy.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
context: Module context containing the filesystem strategy
|
|
22
|
+
files: List of files to upload
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Tuple of (all_files, succeeded_files, failed_files)
|
|
26
|
+
|
|
27
|
+
Raises:
|
|
28
|
+
FilesystemServiceError: If upload operation fails
|
|
29
|
+
"""
|
|
30
|
+
return context.filesystem.upload_files(files)
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def get_file(context: ModuleContext, file_id: str) -> FilesystemRecord:
|
|
34
|
+
"""Retrieve a file by ID with the content.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
context: Module context containing the filesystem strategy
|
|
38
|
+
file_id: Unique identifier for the file
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
File object with metadata and optionally content
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
FilesystemServiceError: If file retrieval fails
|
|
45
|
+
"""
|
|
46
|
+
return context.filesystem.get_file(file_id, include_content=True)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Logger Mixin to ease and merge every logs."""
|
|
2
|
+
|
|
3
|
+
from digitalkin.models.module.module_context import ModuleContext
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LoggerMixin:
|
|
7
|
+
"""Mixin providing callback operations through the callbacks strategy.
|
|
8
|
+
|
|
9
|
+
This mixin wraps callback strategy calls to provide a cleaner API
|
|
10
|
+
for logging and messaging in trigger handlers.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def log_debug(context: ModuleContext, message: str) -> None:
|
|
15
|
+
"""Log debug message using the callbacks strategy.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
context: Module context containing the callbacks strategy
|
|
19
|
+
message: Debug message to log
|
|
20
|
+
"""
|
|
21
|
+
return context.callbacks.logger.debug(message, extra=context.session.current_ids())
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def log_info(context: ModuleContext, message: str) -> None:
|
|
25
|
+
"""Log info message using the callbacks strategy.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
context: Module context containing the callbacks strategy
|
|
29
|
+
message: Info message to log
|
|
30
|
+
"""
|
|
31
|
+
return context.callbacks.logger.info(message, extra=context.session.current_ids())
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def log_warning(context: ModuleContext, message: str) -> None:
|
|
35
|
+
"""Log warning message using the callbacks strategy.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
context: Module context containing the callbacks strategy
|
|
39
|
+
message: Warning message to log
|
|
40
|
+
"""
|
|
41
|
+
return context.callbacks.logger.warning(message, extra=context.session.current_ids())
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def log_error(context: ModuleContext, message: str) -> None:
|
|
45
|
+
"""Log error message using the callbacks strategy.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
context: Module context containing the callbacks strategy
|
|
49
|
+
message: Error message to log
|
|
50
|
+
"""
|
|
51
|
+
return context.callbacks.logger.error(message, extra=context.session.current_ids())
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Storage Mixin to ease storage access in Triggers."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
from digitalkin.models.module.module_context import ModuleContext
|
|
6
|
+
from digitalkin.services.storage.storage_strategy import StorageRecord
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StorageMixin:
|
|
10
|
+
"""Mixin providing storage operations through the storage strategy.
|
|
11
|
+
|
|
12
|
+
This mixin wraps storage strategy calls to provide a cleaner API
|
|
13
|
+
for trigger handlers.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def store_storage(
|
|
18
|
+
context: ModuleContext,
|
|
19
|
+
collection: str,
|
|
20
|
+
record_id: str | None,
|
|
21
|
+
data: dict[str, Any],
|
|
22
|
+
data_type: Literal["OUTPUT", "VIEW", "LOGS", "OTHER"] = "OUTPUT",
|
|
23
|
+
) -> StorageRecord:
|
|
24
|
+
"""Store data using the storage strategy.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
context: Module context containing the storage strategy
|
|
28
|
+
collection: Collection name for the data
|
|
29
|
+
record_id: Optional record identifier
|
|
30
|
+
data: Data to store
|
|
31
|
+
data_type: Type of data being stored
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Result from the storage strategy
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
StorageServiceError: If storage operation fails
|
|
38
|
+
"""
|
|
39
|
+
return context.storage.store(collection, record_id, data, data_type=data_type)
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def read_storage(context: ModuleContext, collection: str, record_id: str) -> StorageRecord | None:
|
|
43
|
+
"""Read data from storage.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
context: Module context containing the storage strategy
|
|
47
|
+
collection: Collection name
|
|
48
|
+
record_id: Record identifier
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Retrieved data
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
StorageServiceError: If read operation fails
|
|
55
|
+
"""
|
|
56
|
+
return context.storage.read(collection, record_id)
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def update_storage(
|
|
60
|
+
context: ModuleContext,
|
|
61
|
+
collection: str,
|
|
62
|
+
record_id: str,
|
|
63
|
+
data: dict[str, Any],
|
|
64
|
+
) -> StorageRecord | None:
|
|
65
|
+
"""Update existing data in storage.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
context: Module context containing the storage strategy
|
|
69
|
+
collection: Collection name
|
|
70
|
+
record_id: Record identifier
|
|
71
|
+
data: Updated data
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Result from the storage strategy
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
StorageServiceError: If update operation fails
|
|
78
|
+
"""
|
|
79
|
+
return context.storage.update(collection, record_id, data)
|
digitalkin/models/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core models."""
|
|
@@ -2,15 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
|
|
7
|
-
from digitalkin.modules.job_manager.base_job_manager import BaseJobManager
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class StreamCodeModel(BaseModel):
|
|
11
|
-
"""Typed error/code model."""
|
|
12
|
-
|
|
13
|
-
code: str
|
|
5
|
+
from digitalkin.core.job_manager.base_job_manager import BaseJobManager
|
|
14
6
|
|
|
15
7
|
|
|
16
8
|
class JobManagerMode(Enum):
|
|
@@ -35,10 +27,10 @@ class JobManagerMode(Enum):
|
|
|
35
27
|
"""
|
|
36
28
|
match self:
|
|
37
29
|
case JobManagerMode.SINGLE:
|
|
38
|
-
from digitalkin.
|
|
30
|
+
from digitalkin.core.job_manager.single_job_manager import SingleJobManager # noqa: PLC0415
|
|
39
31
|
|
|
40
32
|
return SingleJobManager
|
|
41
33
|
case JobManagerMode.TASKIQ:
|
|
42
|
-
from digitalkin.
|
|
34
|
+
from digitalkin.core.job_manager.taskiq_job_manager import TaskiqJobManager # noqa: PLC0415
|
|
43
35
|
|
|
44
36
|
return TaskiqJobManager
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Task monitoring models for signaling and heartbeat messages."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TaskStatus(Enum):
|
|
11
|
+
"""Task status enumeration."""
|
|
12
|
+
|
|
13
|
+
PENDING = "pending"
|
|
14
|
+
RUNNING = "running"
|
|
15
|
+
CANCELLED = "cancelled"
|
|
16
|
+
COMPLETED = "completed"
|
|
17
|
+
FAILED = "failed"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CancellationReason(Enum):
|
|
21
|
+
"""Reason for task cancellation - helps distinguish cleanup vs real cancellation."""
|
|
22
|
+
|
|
23
|
+
# Cleanup cancellations (not errors)
|
|
24
|
+
SUCCESS_CLEANUP = "success_cleanup" # Main task completed, cleaning up helper tasks
|
|
25
|
+
FAILURE_CLEANUP = "failure_cleanup" # Main task failed, cleaning up helper tasks
|
|
26
|
+
|
|
27
|
+
# Real cancellations
|
|
28
|
+
SIGNAL = "signal" # External signal requested cancellation
|
|
29
|
+
HEARTBEAT_FAILURE = "heartbeat_failure" # Heartbeat stopped working
|
|
30
|
+
TIMEOUT = "timeout" # Task timed out
|
|
31
|
+
SHUTDOWN = "shutdown" # Manager is shutting down
|
|
32
|
+
|
|
33
|
+
# Unknown/unset
|
|
34
|
+
UNKNOWN = "unknown" # Reason not determined
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SignalType(Enum):
|
|
38
|
+
"""Signal type enumeration."""
|
|
39
|
+
|
|
40
|
+
START = "start"
|
|
41
|
+
STOP = "stop"
|
|
42
|
+
CANCEL = "cancel"
|
|
43
|
+
PAUSE = "pause"
|
|
44
|
+
RESUME = "resume"
|
|
45
|
+
STATUS = "status"
|
|
46
|
+
|
|
47
|
+
ACK_CANCEL = "ack_cancel"
|
|
48
|
+
ACK_PAUSE = "ack_pause"
|
|
49
|
+
ACK_RESUME = "ack_resume"
|
|
50
|
+
ACK_STATUS = "ack_status"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class SignalMessage(BaseModel):
|
|
54
|
+
"""Signal message model for task monitoring."""
|
|
55
|
+
|
|
56
|
+
task_id: str = Field(..., description="Unique identifier for the task")
|
|
57
|
+
mission_id: str = Field(..., description="Identifier for the mission")
|
|
58
|
+
setup_id: str = Field(default="", description="Identifier for the setup")
|
|
59
|
+
setup_version_id: str = Field(default="", description="Identifier for the setup version")
|
|
60
|
+
status: TaskStatus = Field(..., description="Current status of the task")
|
|
61
|
+
action: SignalType = Field(..., description="Type of signal action")
|
|
62
|
+
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
63
|
+
payload: dict[str, Any] = Field(default={}, description="Optional payload for the signal")
|
|
64
|
+
model_config = {"use_enum_values": True}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class HeartbeatMessage(BaseModel):
|
|
68
|
+
"""Heartbeat message model for task monitoring."""
|
|
69
|
+
|
|
70
|
+
task_id: str = Field(..., description="Unique identifier for the task")
|
|
71
|
+
mission_id: str = Field(..., description="Identifier for the mission")
|
|
72
|
+
setup_id: str = Field(default="", description="Identifier for the setup")
|
|
73
|
+
setup_version_id: str = Field(default="", description="Identifier for the setup version")
|
|
74
|
+
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Base gRPC server and client models."""
|
|
@@ -65,6 +65,42 @@ class ServerCredentials(BaseModel):
|
|
|
65
65
|
return v
|
|
66
66
|
|
|
67
67
|
|
|
68
|
+
class RetryPolicy(BaseModel):
|
|
69
|
+
"""gRPC retry policy configuration for resilient connections.
|
|
70
|
+
|
|
71
|
+
Attributes:
|
|
72
|
+
max_attempts: Maximum retry attempts including the original call
|
|
73
|
+
initial_backoff: Initial backoff duration (e.g., "0.1s")
|
|
74
|
+
max_backoff: Maximum backoff duration (e.g., "10s")
|
|
75
|
+
backoff_multiplier: Multiplier for exponential backoff
|
|
76
|
+
retryable_status_codes: gRPC status codes that trigger retry
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
max_attempts: int = Field(default=5, ge=1, le=10, description="Maximum retry attempts including the original call")
|
|
80
|
+
initial_backoff: str = Field(default="0.1s", description="Initial backoff duration (e.g., '0.1s')")
|
|
81
|
+
max_backoff: str = Field(default="10s", description="Maximum backoff duration (e.g., '10s')")
|
|
82
|
+
backoff_multiplier: float = Field(default=2.0, ge=1.0, description="Multiplier for exponential backoff")
|
|
83
|
+
retryable_status_codes: list[str] = Field(
|
|
84
|
+
default_factory=lambda: ["UNAVAILABLE", "RESOURCE_EXHAUSTED"],
|
|
85
|
+
description="gRPC status codes that trigger retry",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
model_config = {"extra": "forbid", "frozen": True}
|
|
89
|
+
|
|
90
|
+
def to_service_config_json(self) -> str:
|
|
91
|
+
"""Serialize to gRPC service config JSON string.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
JSON string for grpc.service_config channel option.
|
|
95
|
+
"""
|
|
96
|
+
codes = "[" + ",".join(f'"{c}"' for c in self.retryable_status_codes) + "]"
|
|
97
|
+
return (
|
|
98
|
+
f'{{"methodConfig":[{{"name":[{{}}],"retryPolicy":{{"maxAttempts":{self.max_attempts},'
|
|
99
|
+
f'"initialBackoff":"{self.initial_backoff}","maxBackoff":"{self.max_backoff}",'
|
|
100
|
+
f'"backoffMultiplier":{self.backoff_multiplier},"retryableStatusCodes":{codes}}}}}]}}'
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
68
104
|
class ClientCredentials(BaseModel):
|
|
69
105
|
"""Model for client credentials in secure mode.
|
|
70
106
|
|
|
@@ -170,15 +206,47 @@ class ClientConfig(ChannelConfig):
|
|
|
170
206
|
security: Security mode (secure/insecure)
|
|
171
207
|
credentials: Client credentials for secure mode
|
|
172
208
|
channel_options: Additional channel options
|
|
209
|
+
retry_policy: Retry policy for failed RPCs
|
|
173
210
|
"""
|
|
174
211
|
|
|
175
212
|
credentials: ClientCredentials | None = Field(None, description="Client credentials for secure mode")
|
|
213
|
+
retry_policy: RetryPolicy = Field(default_factory=lambda: RetryPolicy(), description="Retry policy for failed RPCs") # noqa: PLW0108
|
|
176
214
|
channel_options: list[tuple[str, Any]] = Field(
|
|
177
215
|
default_factory=lambda: [
|
|
178
|
-
("grpc.max_receive_message_length",
|
|
179
|
-
("grpc.max_send_message_length",
|
|
216
|
+
("grpc.max_receive_message_length", 100 * 1024 * 1024),
|
|
217
|
+
("grpc.max_send_message_length", 100 * 1024 * 1024),
|
|
218
|
+
# === DNS Re-resolution (Critical for Container Environments) ===
|
|
219
|
+
# Minimum milliseconds between DNS re-resolution attempts (500 ms)
|
|
220
|
+
# When connection fails, gRPC will re-query DNS after this interval
|
|
221
|
+
# Solves: Container restarts with new IPs causing "No route to host"
|
|
222
|
+
("grpc.dns_min_time_between_resolutions_ms", 500),
|
|
223
|
+
# Initial delay before first reconnection attempt (1 second)
|
|
224
|
+
("grpc.initial_reconnect_backoff_ms", 1000),
|
|
225
|
+
# Maximum delay between reconnection attempts (10 seconds)
|
|
226
|
+
# Prevents overwhelming the network during extended outages
|
|
227
|
+
("grpc.max_reconnect_backoff_ms", 10000),
|
|
228
|
+
# Minimum delay between reconnection attempts (500ms)
|
|
229
|
+
# Ensures rapid recovery for brief network glitches
|
|
230
|
+
("grpc.min_reconnect_backoff_ms", 500),
|
|
231
|
+
# === Keepalive Settings (Detect Dead Connections) ===
|
|
232
|
+
# Send keepalive ping every 60 seconds when connection is idle
|
|
233
|
+
# Proactively detects dead connections before RPC calls fail
|
|
234
|
+
("grpc.keepalive_time_ms", 60000),
|
|
235
|
+
# Wait 20 seconds for keepalive response before declaring connection dead
|
|
236
|
+
# Triggers reconnection (with DNS re-resolution) if pong not received
|
|
237
|
+
("grpc.keepalive_timeout_ms", 20000),
|
|
238
|
+
# Send keepalive pings even when no RPCs are in flight
|
|
239
|
+
# Essential for long-lived connections that may sit idle
|
|
240
|
+
("grpc.keepalive_permit_without_calls", True),
|
|
241
|
+
# Minimum interval between HTTP/2 pings (30 seconds)
|
|
242
|
+
# Must be >= server's grpc.http2.min_ping_interval_without_data_ms (10s)
|
|
243
|
+
("grpc.http2.min_time_between_pings_ms", 30000),
|
|
244
|
+
# === Retry Configuration ===
|
|
245
|
+
# Enable automatic retry for failed RPCs (1 = enabled)
|
|
246
|
+
# Works with retryable status codes: UNAVAILABLE, RESOURCE_EXHAUSTED
|
|
247
|
+
("grpc.enable_retries", 1),
|
|
180
248
|
],
|
|
181
|
-
description="
|
|
249
|
+
description="Resilient gRPC channel options with DNS re-resolution, keepalive, and retries",
|
|
182
250
|
)
|
|
183
251
|
|
|
184
252
|
@field_validator("credentials")
|
|
@@ -204,6 +272,15 @@ class ClientConfig(ChannelConfig):
|
|
|
204
272
|
raise ConfigurationError(msg)
|
|
205
273
|
return v
|
|
206
274
|
|
|
275
|
+
@property
|
|
276
|
+
def grpc_options(self) -> list[tuple[str, Any]]:
|
|
277
|
+
"""Get channel options with retry policy service config.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Full list of gRPC channel options.
|
|
281
|
+
"""
|
|
282
|
+
return [*self.channel_options, ("grpc.service_config", self.retry_policy.to_service_config_json())]
|
|
283
|
+
|
|
207
284
|
|
|
208
285
|
class ServerConfig(ChannelConfig):
|
|
209
286
|
"""Base configuration for gRPC servers.
|
|
@@ -223,10 +300,18 @@ class ServerConfig(ChannelConfig):
|
|
|
223
300
|
credentials: ServerCredentials | None = Field(None, description="Server credentials for secure mode")
|
|
224
301
|
server_options: list[tuple[str, Any]] = Field(
|
|
225
302
|
default_factory=lambda: [
|
|
226
|
-
("grpc.max_receive_message_length",
|
|
227
|
-
("grpc.max_send_message_length",
|
|
303
|
+
("grpc.max_receive_message_length", 100 * 1024 * 1024),
|
|
304
|
+
("grpc.max_send_message_length", 100 * 1024 * 1024),
|
|
305
|
+
# === Keepalive Permission (Required for Client Keepalive) ===
|
|
306
|
+
# Allow clients to send keepalive pings without active RPCs
|
|
307
|
+
# Without this, server rejects client keepalives with GOAWAY
|
|
308
|
+
("grpc.keepalive_permit_without_calls", True),
|
|
309
|
+
# Minimum interval server allows between client pings (10 seconds)
|
|
310
|
+
# Prevents "too_many_pings" GOAWAY errors
|
|
311
|
+
# Must match or be less than client's http2.min_time_between_pings_ms
|
|
312
|
+
("grpc.http2.min_ping_interval_without_data_ms", 10000),
|
|
228
313
|
],
|
|
229
|
-
description="
|
|
314
|
+
description="gRPC server options with keepalive support",
|
|
230
315
|
)
|
|
231
316
|
enable_reflection: bool = Field(default=True, description="Enable reflection for the server")
|
|
232
317
|
enable_health_check: bool = Field(default=True, description="Enable health check service")
|
|
@@ -262,7 +347,7 @@ class ModuleServerConfig(ServerConfig):
|
|
|
262
347
|
registry_address: Address of the registry server
|
|
263
348
|
"""
|
|
264
349
|
|
|
265
|
-
registry_address: str
|
|
350
|
+
registry_address: str = Field(..., description="Address of the registry server")
|
|
266
351
|
|
|
267
352
|
|
|
268
353
|
class RegistryServerConfig(ServerConfig):
|
|
@@ -1,26 +1,33 @@
|
|
|
1
1
|
"""This module contains the models for the modules."""
|
|
2
2
|
|
|
3
|
-
from digitalkin.models.module.module import Module, ModuleStatus
|
|
4
3
|
from digitalkin.models.module.module_context import ModuleContext
|
|
5
4
|
from digitalkin.models.module.module_types import (
|
|
6
5
|
DataModel,
|
|
7
6
|
DataTrigger,
|
|
8
|
-
InputModelT,
|
|
9
|
-
OutputModelT,
|
|
10
|
-
SecretModelT,
|
|
11
7
|
SetupModel,
|
|
12
|
-
|
|
8
|
+
)
|
|
9
|
+
from digitalkin.models.module.tool_reference import (
|
|
10
|
+
ToolReference,
|
|
11
|
+
ToolReferenceConfig,
|
|
12
|
+
ToolSelectionMode,
|
|
13
|
+
)
|
|
14
|
+
from digitalkin.models.module.utility import (
|
|
15
|
+
EndOfStreamOutput,
|
|
16
|
+
ModuleStartInfoOutput,
|
|
17
|
+
UtilityProtocol,
|
|
18
|
+
UtilityRegistry,
|
|
13
19
|
)
|
|
14
20
|
|
|
15
21
|
__all__ = [
|
|
16
22
|
"DataModel",
|
|
17
23
|
"DataTrigger",
|
|
18
|
-
"
|
|
19
|
-
"Module",
|
|
24
|
+
"EndOfStreamOutput",
|
|
20
25
|
"ModuleContext",
|
|
21
|
-
"
|
|
22
|
-
"OutputModelT",
|
|
23
|
-
"SecretModelT",
|
|
26
|
+
"ModuleStartInfoOutput",
|
|
24
27
|
"SetupModel",
|
|
25
|
-
"
|
|
28
|
+
"ToolReference",
|
|
29
|
+
"ToolReferenceConfig",
|
|
30
|
+
"ToolSelectionMode",
|
|
31
|
+
"UtilityProtocol",
|
|
32
|
+
"UtilityRegistry",
|
|
26
33
|
]
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Base types for module models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from digitalkin.models.module.setup_types import SetupModel
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DataTrigger(BaseModel):
|
|
15
|
+
"""Defines the root input/output model exposing the protocol.
|
|
16
|
+
|
|
17
|
+
The mandatory protocol is important to define the module beahvior following the user or agent input/output.
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
class MyInput(DataModel):
|
|
21
|
+
root: DataTrigger
|
|
22
|
+
user_define_data: Any
|
|
23
|
+
|
|
24
|
+
# Usage
|
|
25
|
+
my_input = MyInput(root=DataTrigger(protocol="message"))
|
|
26
|
+
print(my_input.root.protocol) # Output: message
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
protocol: ClassVar[str]
|
|
30
|
+
created_at: str = Field(
|
|
31
|
+
default_factory=lambda: datetime.now(tz=timezone.utc).isoformat(),
|
|
32
|
+
title="Created At",
|
|
33
|
+
description="Timestamp when the payload was created.",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
DataTriggerT = TypeVar("DataTriggerT", bound=DataTrigger)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DataModel(BaseModel, Generic[DataTriggerT]):
|
|
41
|
+
"""Base definition of input/output model showing mandatory root fields.
|
|
42
|
+
|
|
43
|
+
The Model define the Module Input/output, usually referring to multiple input/output type defined by an union.
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
class ModuleInput(DataModel):
|
|
47
|
+
root: FileInput | MessageInput
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
root: DataTriggerT
|
|
51
|
+
annotations: dict[str, str] = Field(
|
|
52
|
+
default={},
|
|
53
|
+
title="Annotations",
|
|
54
|
+
description="Additional metadata or annotations related to the output. ex {'role': 'user'}",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
InputModelT = TypeVar("InputModelT", bound=DataModel)
|
|
59
|
+
OutputModelT = TypeVar("OutputModelT", bound=DataModel)
|
|
60
|
+
SecretModelT = TypeVar("SecretModelT", bound=BaseModel)
|
|
61
|
+
SetupModelT = TypeVar("SetupModelT", bound="SetupModel")
|
|
@@ -2,7 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
from enum import Enum, auto
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ModuleCodeModel(BaseModel):
|
|
9
|
+
"""typed error/code model."""
|
|
10
|
+
|
|
11
|
+
code: str = Field(...)
|
|
12
|
+
message: str | None = Field(default=None)
|
|
13
|
+
short_description: str | None = Field(default=None)
|
|
6
14
|
|
|
7
15
|
|
|
8
16
|
class ModuleStatus(Enum):
|