omnibase_infra 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnibase_infra/__init__.py +1 -1
- omnibase_infra/adapters/adapter_onex_tool_execution.py +451 -0
- omnibase_infra/capabilities/__init__.py +15 -0
- omnibase_infra/capabilities/capability_inference_rules.py +211 -0
- omnibase_infra/capabilities/contract_capability_extractor.py +221 -0
- omnibase_infra/capabilities/intent_type_extractor.py +160 -0
- omnibase_infra/cli/commands.py +1 -1
- omnibase_infra/configs/widget_mapping.yaml +176 -0
- omnibase_infra/contracts/handlers/filesystem/handler_contract.yaml +5 -2
- omnibase_infra/contracts/handlers/mcp/handler_contract.yaml +5 -2
- omnibase_infra/enums/__init__.py +6 -0
- omnibase_infra/enums/enum_handler_error_type.py +10 -0
- omnibase_infra/enums/enum_handler_source_mode.py +72 -0
- omnibase_infra/enums/enum_kafka_acks.py +99 -0
- omnibase_infra/errors/error_compute_registry.py +4 -1
- omnibase_infra/errors/error_event_bus_registry.py +4 -1
- omnibase_infra/errors/error_infra.py +3 -1
- omnibase_infra/errors/error_policy_registry.py +4 -1
- omnibase_infra/event_bus/event_bus_kafka.py +1 -1
- omnibase_infra/event_bus/models/config/model_kafka_event_bus_config.py +59 -10
- omnibase_infra/handlers/__init__.py +8 -1
- omnibase_infra/handlers/handler_consul.py +7 -1
- omnibase_infra/handlers/handler_db.py +10 -3
- omnibase_infra/handlers/handler_graph.py +10 -5
- omnibase_infra/handlers/handler_http.py +8 -2
- omnibase_infra/handlers/handler_intent.py +387 -0
- omnibase_infra/handlers/handler_mcp.py +745 -63
- omnibase_infra/handlers/handler_vault.py +11 -5
- omnibase_infra/handlers/mixins/mixin_consul_kv.py +4 -3
- omnibase_infra/handlers/mixins/mixin_consul_service.py +2 -1
- omnibase_infra/handlers/registration_storage/handler_registration_storage_postgres.py +7 -0
- omnibase_infra/handlers/service_discovery/handler_service_discovery_consul.py +308 -4
- omnibase_infra/handlers/service_discovery/models/model_service_info.py +10 -0
- omnibase_infra/mixins/mixin_async_circuit_breaker.py +3 -2
- omnibase_infra/mixins/mixin_node_introspection.py +42 -7
- omnibase_infra/mixins/mixin_retry_execution.py +1 -1
- omnibase_infra/models/discovery/model_introspection_config.py +11 -0
- omnibase_infra/models/handlers/__init__.py +48 -5
- omnibase_infra/models/handlers/model_bootstrap_handler_descriptor.py +162 -0
- omnibase_infra/models/handlers/model_contract_discovery_result.py +6 -4
- omnibase_infra/models/handlers/model_handler_descriptor.py +15 -0
- omnibase_infra/models/handlers/model_handler_source_config.py +220 -0
- omnibase_infra/models/mcp/__init__.py +15 -0
- omnibase_infra/models/mcp/model_mcp_contract_config.py +80 -0
- omnibase_infra/models/mcp/model_mcp_server_config.py +67 -0
- omnibase_infra/models/mcp/model_mcp_tool_definition.py +73 -0
- omnibase_infra/models/mcp/model_mcp_tool_parameter.py +35 -0
- omnibase_infra/models/registration/model_node_capabilities.py +11 -0
- omnibase_infra/models/registration/model_node_introspection_event.py +9 -0
- omnibase_infra/models/runtime/model_handler_contract.py +25 -9
- omnibase_infra/models/runtime/model_loaded_handler.py +9 -0
- omnibase_infra/nodes/architecture_validator/contract_architecture_validator.yaml +0 -5
- omnibase_infra/nodes/architecture_validator/registry/registry_infra_architecture_validator.py +17 -10
- omnibase_infra/nodes/effects/contract.yaml +0 -5
- omnibase_infra/nodes/node_registration_orchestrator/contract.yaml +7 -0
- omnibase_infra/nodes/node_registration_orchestrator/handlers/handler_node_introspected.py +86 -1
- omnibase_infra/nodes/node_registration_orchestrator/introspection_event_router.py +3 -3
- omnibase_infra/nodes/node_registration_orchestrator/plugin.py +1 -1
- omnibase_infra/nodes/node_registration_orchestrator/registry/registry_infra_node_registration_orchestrator.py +9 -8
- omnibase_infra/nodes/node_registration_orchestrator/timeout_coordinator.py +4 -3
- omnibase_infra/nodes/node_registration_orchestrator/wiring.py +14 -13
- omnibase_infra/nodes/node_registration_storage_effect/contract.yaml +0 -5
- omnibase_infra/nodes/node_registration_storage_effect/node.py +4 -1
- omnibase_infra/nodes/node_registration_storage_effect/registry/registry_infra_registration_storage.py +47 -26
- omnibase_infra/nodes/node_registry_effect/contract.yaml +0 -5
- omnibase_infra/nodes/node_registry_effect/handlers/handler_partial_retry.py +2 -1
- omnibase_infra/nodes/node_service_discovery_effect/registry/registry_infra_service_discovery.py +28 -20
- omnibase_infra/plugins/examples/plugin_json_normalizer.py +2 -2
- omnibase_infra/plugins/examples/plugin_json_normalizer_error_handling.py +2 -2
- omnibase_infra/plugins/plugin_compute_base.py +16 -2
- omnibase_infra/protocols/__init__.py +2 -0
- omnibase_infra/protocols/protocol_container_aware.py +200 -0
- omnibase_infra/protocols/protocol_event_projector.py +1 -1
- omnibase_infra/runtime/__init__.py +90 -1
- omnibase_infra/runtime/binding_config_resolver.py +102 -37
- omnibase_infra/runtime/constants_notification.py +75 -0
- omnibase_infra/runtime/contract_handler_discovery.py +6 -1
- omnibase_infra/runtime/handler_bootstrap_source.py +507 -0
- omnibase_infra/runtime/handler_contract_config_loader.py +603 -0
- omnibase_infra/runtime/handler_contract_source.py +267 -186
- omnibase_infra/runtime/handler_identity.py +81 -0
- omnibase_infra/runtime/handler_plugin_loader.py +19 -2
- omnibase_infra/runtime/handler_registry.py +11 -3
- omnibase_infra/runtime/handler_source_resolver.py +326 -0
- omnibase_infra/runtime/mixin_semver_cache.py +25 -1
- omnibase_infra/runtime/mixins/__init__.py +7 -0
- omnibase_infra/runtime/mixins/mixin_projector_notification_publishing.py +566 -0
- omnibase_infra/runtime/mixins/mixin_projector_sql_operations.py +31 -10
- omnibase_infra/runtime/models/__init__.py +24 -0
- omnibase_infra/runtime/models/model_health_check_result.py +2 -1
- omnibase_infra/runtime/models/model_projector_notification_config.py +171 -0
- omnibase_infra/runtime/models/model_transition_notification_outbox_config.py +112 -0
- omnibase_infra/runtime/models/model_transition_notification_outbox_metrics.py +140 -0
- omnibase_infra/runtime/models/model_transition_notification_publisher_metrics.py +357 -0
- omnibase_infra/runtime/projector_plugin_loader.py +1 -1
- omnibase_infra/runtime/projector_shell.py +229 -1
- omnibase_infra/runtime/protocol_lifecycle_executor.py +6 -6
- omnibase_infra/runtime/protocols/__init__.py +10 -0
- omnibase_infra/runtime/registry/registry_protocol_binding.py +16 -15
- omnibase_infra/runtime/registry_contract_source.py +693 -0
- omnibase_infra/runtime/registry_policy.py +9 -326
- omnibase_infra/runtime/secret_resolver.py +4 -2
- omnibase_infra/runtime/service_kernel.py +11 -3
- omnibase_infra/runtime/service_message_dispatch_engine.py +4 -2
- omnibase_infra/runtime/service_runtime_host_process.py +589 -106
- omnibase_infra/runtime/transition_notification_outbox.py +1190 -0
- omnibase_infra/runtime/transition_notification_publisher.py +764 -0
- omnibase_infra/runtime/util_container_wiring.py +6 -5
- omnibase_infra/runtime/util_wiring.py +17 -4
- omnibase_infra/schemas/schema_transition_notification_outbox.sql +245 -0
- omnibase_infra/services/__init__.py +21 -0
- omnibase_infra/services/corpus_capture.py +7 -1
- omnibase_infra/services/mcp/__init__.py +31 -0
- omnibase_infra/services/mcp/mcp_server_lifecycle.py +449 -0
- omnibase_infra/services/mcp/service_mcp_tool_discovery.py +411 -0
- omnibase_infra/services/mcp/service_mcp_tool_registry.py +329 -0
- omnibase_infra/services/mcp/service_mcp_tool_sync.py +547 -0
- omnibase_infra/services/registry_api/__init__.py +40 -0
- omnibase_infra/services/registry_api/main.py +261 -0
- omnibase_infra/services/registry_api/models/__init__.py +66 -0
- omnibase_infra/services/registry_api/models/model_capability_widget_mapping.py +38 -0
- omnibase_infra/services/registry_api/models/model_pagination_info.py +48 -0
- omnibase_infra/services/registry_api/models/model_registry_discovery_response.py +73 -0
- omnibase_infra/services/registry_api/models/model_registry_health_response.py +49 -0
- omnibase_infra/services/registry_api/models/model_registry_instance_view.py +88 -0
- omnibase_infra/services/registry_api/models/model_registry_node_view.py +88 -0
- omnibase_infra/services/registry_api/models/model_registry_summary.py +60 -0
- omnibase_infra/services/registry_api/models/model_response_list_instances.py +43 -0
- omnibase_infra/services/registry_api/models/model_response_list_nodes.py +51 -0
- omnibase_infra/services/registry_api/models/model_warning.py +49 -0
- omnibase_infra/services/registry_api/models/model_widget_defaults.py +28 -0
- omnibase_infra/services/registry_api/models/model_widget_mapping.py +51 -0
- omnibase_infra/services/registry_api/routes.py +371 -0
- omnibase_infra/services/registry_api/service.py +837 -0
- omnibase_infra/services/service_capability_query.py +4 -4
- omnibase_infra/services/service_health.py +3 -2
- omnibase_infra/services/service_timeout_emitter.py +20 -3
- omnibase_infra/services/service_timeout_scanner.py +7 -3
- omnibase_infra/services/session/__init__.py +56 -0
- omnibase_infra/services/session/config_consumer.py +120 -0
- omnibase_infra/services/session/config_store.py +139 -0
- omnibase_infra/services/session/consumer.py +1007 -0
- omnibase_infra/services/session/protocol_session_aggregator.py +117 -0
- omnibase_infra/services/session/store.py +997 -0
- omnibase_infra/utils/__init__.py +19 -0
- omnibase_infra/utils/util_atomic_file.py +261 -0
- omnibase_infra/utils/util_db_transaction.py +239 -0
- omnibase_infra/utils/util_dsn_validation.py +1 -1
- omnibase_infra/utils/util_retry_optimistic.py +281 -0
- omnibase_infra/validation/__init__.py +3 -19
- omnibase_infra/validation/contracts/security.validation.yaml +114 -0
- omnibase_infra/validation/infra_validators.py +35 -24
- omnibase_infra/validation/validation_exemptions.yaml +140 -9
- omnibase_infra/validation/validator_chain_propagation.py +2 -2
- omnibase_infra/validation/validator_runtime_shape.py +1 -1
- omnibase_infra/validation/validator_security.py +473 -370
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/METADATA +3 -3
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/RECORD +161 -98
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/WHEEL +0 -0
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/entry_points.txt +0 -0
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,6 +13,7 @@ Key Features:
|
|
|
13
13
|
- Dynamic tool discovery from ONEX node registry
|
|
14
14
|
- Contract-to-MCP schema generation
|
|
15
15
|
- Request/response correlation for observability
|
|
16
|
+
- Internal uvicorn server lifecycle management (OMN-1282)
|
|
16
17
|
|
|
17
18
|
Note:
|
|
18
19
|
This handler requires the `mcp` package (anthropic-ai/mcp-python-sdk).
|
|
@@ -21,12 +22,18 @@ Note:
|
|
|
21
22
|
|
|
22
23
|
from __future__ import annotations
|
|
23
24
|
|
|
25
|
+
import asyncio
|
|
24
26
|
import logging
|
|
25
27
|
import time
|
|
26
28
|
from typing import TYPE_CHECKING
|
|
27
29
|
from uuid import UUID, uuid4
|
|
28
30
|
|
|
31
|
+
import uvicorn
|
|
29
32
|
from pydantic import ValidationError
|
|
33
|
+
from starlette.applications import Starlette
|
|
34
|
+
from starlette.requests import Request
|
|
35
|
+
from starlette.responses import JSONResponse
|
|
36
|
+
from starlette.routing import Route
|
|
30
37
|
|
|
31
38
|
from omnibase_core.models.dispatch import ModelHandlerOutput
|
|
32
39
|
from omnibase_infra.enums import (
|
|
@@ -46,11 +53,18 @@ from omnibase_infra.handlers.models.mcp import (
|
|
|
46
53
|
ModelMcpToolResult,
|
|
47
54
|
)
|
|
48
55
|
from omnibase_infra.mixins import MixinAsyncCircuitBreaker, MixinEnvelopeExtraction
|
|
56
|
+
from omnibase_infra.services.mcp import MCPServerLifecycle, ModelMCPServerConfig
|
|
49
57
|
|
|
50
58
|
if TYPE_CHECKING:
|
|
51
|
-
from collections.abc import Sequence
|
|
59
|
+
from collections.abc import Callable, Coroutine, Sequence
|
|
52
60
|
|
|
53
61
|
from omnibase_core.models.container.model_onex_container import ModelONEXContainer
|
|
62
|
+
from omnibase_infra.adapters.adapter_onex_tool_execution import (
|
|
63
|
+
AdapterONEXToolExecution,
|
|
64
|
+
)
|
|
65
|
+
from omnibase_infra.services.mcp.service_mcp_tool_registry import (
|
|
66
|
+
ServiceMCPToolRegistry,
|
|
67
|
+
)
|
|
54
68
|
from omnibase_spi.protocols.types.protocol_mcp_tool_types import (
|
|
55
69
|
ProtocolMCPToolDefinition,
|
|
56
70
|
)
|
|
@@ -60,6 +74,64 @@ logger = logging.getLogger(__name__)
|
|
|
60
74
|
# Handler ID for ModelHandlerOutput
|
|
61
75
|
HANDLER_ID_MCP: str = "mcp-handler"
|
|
62
76
|
|
|
77
|
+
# Shutdown timeout constants (can be overridden via class attributes)
|
|
78
|
+
_DEFAULT_SHUTDOWN_TIMEOUT: float = 5.0
|
|
79
|
+
_DEFAULT_CANCEL_TIMEOUT: float = 1.0
|
|
80
|
+
_DEFAULT_STARTUP_TIMEOUT: float = 2.0
|
|
81
|
+
|
|
82
|
+
# Error message truncation limit for health check responses
|
|
83
|
+
_ERROR_MESSAGE_MAX_LENGTH: int = 200
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _require_config_value[T](
|
|
87
|
+
config: dict[str, object],
|
|
88
|
+
key: str,
|
|
89
|
+
expected_type: type[T],
|
|
90
|
+
correlation_id: UUID,
|
|
91
|
+
) -> T:
|
|
92
|
+
"""Extract required config value or raise ProtocolConfigurationError.
|
|
93
|
+
|
|
94
|
+
Per CLAUDE.md configuration rules, the `.env` file is the SINGLE SOURCE OF TRUTH.
|
|
95
|
+
There should be ZERO hardcoded fallbacks - all configuration must be explicitly
|
|
96
|
+
provided. If missing, this function raises an error rather than using defaults.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
config: Configuration dictionary to extract value from.
|
|
100
|
+
key: Configuration key to look up.
|
|
101
|
+
expected_type: Expected Python type for the value.
|
|
102
|
+
correlation_id: Correlation ID for error context.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
The validated configuration value.
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
ProtocolConfigurationError: If value is missing or has wrong type.
|
|
109
|
+
"""
|
|
110
|
+
value = config.get(key)
|
|
111
|
+
if value is None:
|
|
112
|
+
raise ProtocolConfigurationError(
|
|
113
|
+
f"Missing required config: '{key}'. Must be set in .env or runtime config.",
|
|
114
|
+
context=ModelInfraErrorContext.with_correlation(
|
|
115
|
+
correlation_id=correlation_id,
|
|
116
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
117
|
+
operation="initialize",
|
|
118
|
+
target_name="handler_mcp",
|
|
119
|
+
),
|
|
120
|
+
)
|
|
121
|
+
if not isinstance(value, expected_type):
|
|
122
|
+
raise ProtocolConfigurationError(
|
|
123
|
+
f"Invalid config type for '{key}': expected {expected_type.__name__}, "
|
|
124
|
+
f"got {type(value).__name__}",
|
|
125
|
+
context=ModelInfraErrorContext.with_correlation(
|
|
126
|
+
correlation_id=correlation_id,
|
|
127
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
128
|
+
operation="initialize",
|
|
129
|
+
target_name="handler_mcp",
|
|
130
|
+
),
|
|
131
|
+
)
|
|
132
|
+
return value
|
|
133
|
+
|
|
134
|
+
|
|
63
135
|
# Supported operations
|
|
64
136
|
_SUPPORTED_OPERATIONS: frozenset[str] = frozenset(
|
|
65
137
|
{op.value for op in EnumMcpOperationType}
|
|
@@ -108,27 +180,75 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
108
180
|
- Timeout enforcement via asyncio.wait_for()
|
|
109
181
|
- Full observability through the ONEX runtime
|
|
110
182
|
See: TODO(OMN-1288) for dispatcher integration tracking
|
|
183
|
+
|
|
184
|
+
Class Attributes:
|
|
185
|
+
shutdown_timeout: Timeout for graceful server shutdown (default: 5.0s).
|
|
186
|
+
cancel_timeout: Timeout for forced cancellation after graceful fails (default: 1.0s).
|
|
187
|
+
startup_timeout: Timeout for server readiness check during startup (default: 2.0s).
|
|
111
188
|
"""
|
|
112
189
|
|
|
113
|
-
|
|
114
|
-
|
|
190
|
+
# Configurable timeout attributes (can be overridden on subclasses or instances)
|
|
191
|
+
shutdown_timeout: float = _DEFAULT_SHUTDOWN_TIMEOUT
|
|
192
|
+
cancel_timeout: float = _DEFAULT_CANCEL_TIMEOUT
|
|
193
|
+
startup_timeout: float = _DEFAULT_STARTUP_TIMEOUT
|
|
194
|
+
|
|
195
|
+
def __init__(
|
|
196
|
+
self,
|
|
197
|
+
container: ModelONEXContainer | None = None,
|
|
198
|
+
registry: ServiceMCPToolRegistry | None = None,
|
|
199
|
+
executor: AdapterONEXToolExecution | None = None,
|
|
200
|
+
) -> None:
|
|
201
|
+
"""Initialize HandlerMCP with optional ONEX container for dependency injection.
|
|
115
202
|
|
|
116
203
|
Args:
|
|
117
|
-
container: ONEX container providing dependency injection for
|
|
118
|
-
services, configuration, and runtime context.
|
|
204
|
+
container: Optional ONEX container providing dependency injection for
|
|
205
|
+
services, configuration, and runtime context. When None, the handler
|
|
206
|
+
operates in standalone mode without container-based DI.
|
|
207
|
+
registry: Optional MCP tool registry for dynamic tool discovery.
|
|
208
|
+
If provided, tools are looked up from this registry. If not
|
|
209
|
+
provided, the handler uses its local _tool_registry dict.
|
|
210
|
+
executor: Optional tool execution adapter for dispatching to
|
|
211
|
+
ONEX orchestrators. If provided, tool calls are routed through
|
|
212
|
+
this adapter. If not provided, placeholder execution is used.
|
|
119
213
|
|
|
120
214
|
Note:
|
|
121
|
-
The container is
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
215
|
+
The container parameter is optional to support two instantiation paths:
|
|
216
|
+
1. Registry-based: RuntimeHostProcess creates handlers via registry lookup
|
|
217
|
+
with no-argument constructor calls. Container is None in this case.
|
|
218
|
+
2. DI-based: Explicit container injection for full ONEX integration.
|
|
219
|
+
|
|
220
|
+
When container is provided, it enables future DI-based service resolution
|
|
221
|
+
(e.g., dispatcher routing, metrics integration).
|
|
222
|
+
|
|
223
|
+
MCP Integration (OMN-1281):
|
|
224
|
+
When registry and executor are provided, the handler operates in
|
|
225
|
+
"integrated mode" with full MCP tool discovery and execution:
|
|
226
|
+
- Tools are discovered from Consul via ServiceMCPToolDiscovery
|
|
227
|
+
- Tool list is cached in ServiceMCPToolRegistry
|
|
228
|
+
- Tool execution routes through AdapterONEXToolExecution
|
|
229
|
+
- Hot reload updates are received via ServiceMCPToolSync
|
|
230
|
+
|
|
231
|
+
Server Lifecycle (OMN-1282):
|
|
232
|
+
The handler owns its uvicorn server lifecycle. When initialize() is
|
|
233
|
+
called, the handler starts a uvicorn server in a background task.
|
|
234
|
+
When shutdown() is called, the server is gracefully stopped.
|
|
126
235
|
"""
|
|
127
236
|
self._container = container
|
|
128
237
|
self._config: ModelMcpHandlerConfig | None = None
|
|
129
238
|
self._initialized: bool = False
|
|
130
239
|
self._tool_registry: dict[str, ProtocolMCPToolDefinition] = {}
|
|
131
240
|
|
|
241
|
+
# MCP integration components (OMN-1281)
|
|
242
|
+
self._mcp_registry: ServiceMCPToolRegistry | None = registry
|
|
243
|
+
self._mcp_executor: AdapterONEXToolExecution | None = executor
|
|
244
|
+
|
|
245
|
+
# Server lifecycle components (OMN-1282)
|
|
246
|
+
self._server: uvicorn.Server | None = None
|
|
247
|
+
self._server_task: asyncio.Task[None] | None = None
|
|
248
|
+
self._lifecycle: MCPServerLifecycle | None = None
|
|
249
|
+
self._skip_server: bool = False # Track if server was intentionally skipped
|
|
250
|
+
self._server_started_at: float | None = None # Timestamp for uptime tracking
|
|
251
|
+
|
|
132
252
|
@property
|
|
133
253
|
def handler_type(self) -> EnumHandlerType:
|
|
134
254
|
"""Return the architectural role of this handler.
|
|
@@ -158,8 +278,194 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
158
278
|
"""
|
|
159
279
|
return EnumInfraTransportType.MCP
|
|
160
280
|
|
|
281
|
+
def _create_json_endpoint(
|
|
282
|
+
self,
|
|
283
|
+
response_factory: Callable[[], Coroutine[object, object, dict[str, object]]],
|
|
284
|
+
) -> Callable[[Request], Coroutine[object, object, JSONResponse]]:
|
|
285
|
+
"""Create a JSON endpoint that wraps an async response factory.
|
|
286
|
+
|
|
287
|
+
This method creates a Starlette-compatible async route handler that:
|
|
288
|
+
1. Calls the provided response_factory to generate response data
|
|
289
|
+
2. Wraps the data in a JSONResponse
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
response_factory: Async callable that returns the response data dict.
|
|
293
|
+
The factory is called on each request to generate fresh data.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Async function suitable for Starlette Route.
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
async def endpoint(_request: Request) -> JSONResponse:
|
|
300
|
+
data = await response_factory()
|
|
301
|
+
return JSONResponse(data)
|
|
302
|
+
|
|
303
|
+
return endpoint
|
|
304
|
+
|
|
305
|
+
def _create_health_endpoint(
|
|
306
|
+
self,
|
|
307
|
+
) -> Callable[[Request], Coroutine[object, object, JSONResponse]]:
|
|
308
|
+
"""Create health endpoint with explicit handler binding.
|
|
309
|
+
|
|
310
|
+
Returns a coroutine function that closes over `self` explicitly,
|
|
311
|
+
avoiding fragile closure patterns with intermediate variables.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Async function suitable for Starlette Route.
|
|
315
|
+
"""
|
|
316
|
+
# Capture reference explicitly in closure scope
|
|
317
|
+
handler = self
|
|
318
|
+
|
|
319
|
+
async def get_health_data() -> dict[str, object]:
|
|
320
|
+
"""Return health status data for the MCP server."""
|
|
321
|
+
tool_count = 0
|
|
322
|
+
if handler._lifecycle and handler._lifecycle.registry:
|
|
323
|
+
tool_count = handler._lifecycle.registry.tool_count
|
|
324
|
+
return {
|
|
325
|
+
"status": "healthy",
|
|
326
|
+
"tool_count": tool_count,
|
|
327
|
+
"initialized": handler._initialized,
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
return self._create_json_endpoint(get_health_data)
|
|
331
|
+
|
|
332
|
+
def _create_tools_list_endpoint(
|
|
333
|
+
self,
|
|
334
|
+
) -> Callable[[Request], Coroutine[object, object, JSONResponse]]:
|
|
335
|
+
"""Create tools list endpoint with explicit handler binding.
|
|
336
|
+
|
|
337
|
+
Returns a coroutine function that closes over `self` explicitly,
|
|
338
|
+
avoiding fragile closure patterns with intermediate variables.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
Async function suitable for Starlette Route.
|
|
342
|
+
"""
|
|
343
|
+
# Capture reference explicitly in closure scope
|
|
344
|
+
handler = self
|
|
345
|
+
|
|
346
|
+
async def get_tools_data() -> dict[str, object]:
|
|
347
|
+
"""Return list of available MCP tools."""
|
|
348
|
+
if handler._lifecycle and handler._lifecycle.registry:
|
|
349
|
+
tools = await handler._lifecycle.registry.list_tools()
|
|
350
|
+
return {
|
|
351
|
+
"tools": [
|
|
352
|
+
{
|
|
353
|
+
"name": t.name,
|
|
354
|
+
"description": t.description,
|
|
355
|
+
"endpoint": t.endpoint,
|
|
356
|
+
}
|
|
357
|
+
for t in tools
|
|
358
|
+
]
|
|
359
|
+
}
|
|
360
|
+
return {"tools": []}
|
|
361
|
+
|
|
362
|
+
return self._create_json_endpoint(get_tools_data)
|
|
363
|
+
|
|
364
|
+
async def _wait_for_server_ready(
|
|
365
|
+
self,
|
|
366
|
+
host: str,
|
|
367
|
+
port: int,
|
|
368
|
+
timeout: float = 2.0,
|
|
369
|
+
poll_interval: float = 0.05,
|
|
370
|
+
) -> None:
|
|
371
|
+
"""Wait for server to be ready by polling TCP connect.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
host: Server host
|
|
375
|
+
port: Server port
|
|
376
|
+
timeout: Maximum time to wait
|
|
377
|
+
poll_interval: Time between connection attempts
|
|
378
|
+
|
|
379
|
+
Raises:
|
|
380
|
+
ProtocolConfigurationError: If server doesn't start within timeout
|
|
381
|
+
|
|
382
|
+
Note:
|
|
383
|
+
Circuit Breaker Failures Are NOT Recorded Here
|
|
384
|
+
|
|
385
|
+
This method is for startup verification, not runtime health checking.
|
|
386
|
+
TCP connect failures during startup are expected and transient - the
|
|
387
|
+
server is still spinning up and will become available shortly.
|
|
388
|
+
|
|
389
|
+
Circuit breaker tracking is intentionally omitted because:
|
|
390
|
+
|
|
391
|
+
1. Startup retries are bounded and transient - the method either succeeds
|
|
392
|
+
within the timeout or raises ProtocolConfigurationError, ending startup.
|
|
393
|
+
|
|
394
|
+
2. Recording startup failures would pollute circuit breaker metrics with
|
|
395
|
+
expected transient failures, potentially triggering an open circuit
|
|
396
|
+
before the server even starts.
|
|
397
|
+
|
|
398
|
+
3. Circuit breakers are designed for runtime fault tolerance - detecting
|
|
399
|
+
when a previously-healthy service becomes unhealthy. Startup behavior
|
|
400
|
+
is fundamentally different: we expect failures until success.
|
|
401
|
+
|
|
402
|
+
4. If the server fails to start within timeout, we fail fast with
|
|
403
|
+
ProtocolConfigurationError rather than entering a degraded state.
|
|
404
|
+
|
|
405
|
+
Circuit breaker tracking should occur during runtime operations (e.g.,
|
|
406
|
+
tool execution, health checks) where failures indicate actual service
|
|
407
|
+
degradation rather than expected startup latency.
|
|
408
|
+
"""
|
|
409
|
+
import socket
|
|
410
|
+
|
|
411
|
+
start_time = time.perf_counter()
|
|
412
|
+
last_error: Exception | None = None
|
|
413
|
+
|
|
414
|
+
while time.perf_counter() - start_time < timeout:
|
|
415
|
+
# Check if server task has failed
|
|
416
|
+
if self._server_task is not None and self._server_task.done():
|
|
417
|
+
exc = self._server_task.exception()
|
|
418
|
+
if exc:
|
|
419
|
+
ctx = ModelInfraErrorContext.with_correlation(
|
|
420
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
421
|
+
operation="server_startup",
|
|
422
|
+
target_name="mcp_handler",
|
|
423
|
+
)
|
|
424
|
+
raise ProtocolConfigurationError(
|
|
425
|
+
f"Server failed to start: {exc}",
|
|
426
|
+
context=ctx,
|
|
427
|
+
) from exc
|
|
428
|
+
|
|
429
|
+
# Try TCP connect
|
|
430
|
+
try:
|
|
431
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
432
|
+
sock.settimeout(poll_interval)
|
|
433
|
+
# TCP PROTOCOL REQUIREMENT (NOT a config fallback):
|
|
434
|
+
# When a server binds to 0.0.0.0 (INADDR_ANY), it listens on all
|
|
435
|
+
# interfaces but you cannot connect() to 0.0.0.0 - it's not a
|
|
436
|
+
# routable address. TCP requires connecting to a specific interface.
|
|
437
|
+
# Using 127.0.0.1 (loopback) is the correct way to reach a local
|
|
438
|
+
# server that bound to 0.0.0.0. This is standard TCP/IP behavior,
|
|
439
|
+
# not an environment configuration fallback.
|
|
440
|
+
effective_host = "127.0.0.1" if host == "0.0.0.0" else host # noqa: S104
|
|
441
|
+
result = sock.connect_ex((effective_host, port))
|
|
442
|
+
sock.close()
|
|
443
|
+
if result == 0:
|
|
444
|
+
return # Server is ready
|
|
445
|
+
except Exception as e:
|
|
446
|
+
last_error = e
|
|
447
|
+
|
|
448
|
+
await asyncio.sleep(poll_interval)
|
|
449
|
+
|
|
450
|
+
# Timeout reached
|
|
451
|
+
ctx = ModelInfraErrorContext.with_correlation(
|
|
452
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
453
|
+
operation="server_startup",
|
|
454
|
+
target_name="mcp_handler",
|
|
455
|
+
)
|
|
456
|
+
raise ProtocolConfigurationError(
|
|
457
|
+
f"Server failed to start within {timeout}s. Last error: {last_error}",
|
|
458
|
+
context=ctx,
|
|
459
|
+
)
|
|
460
|
+
|
|
161
461
|
async def initialize(self, config: dict[str, object]) -> None:
|
|
162
|
-
"""Initialize MCP handler with configuration.
|
|
462
|
+
"""Initialize MCP handler with configuration and optionally start uvicorn server.
|
|
463
|
+
|
|
464
|
+
This method performs the following steps:
|
|
465
|
+
1. Parse and validate handler configuration
|
|
466
|
+
2. Initialize MCPServerLifecycle for tool discovery (unless skip_server=True)
|
|
467
|
+
3. Create Starlette app with /health and /mcp/tools endpoints
|
|
468
|
+
4. Start uvicorn server in a background task (unless skip_server=True)
|
|
163
469
|
|
|
164
470
|
Args:
|
|
165
471
|
config: Configuration dict containing:
|
|
@@ -170,9 +476,19 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
170
476
|
- json_response: Return JSON responses (default: True)
|
|
171
477
|
- timeout_seconds: Tool execution timeout (default: 30.0)
|
|
172
478
|
- max_tools: Maximum tools to expose (default: 100)
|
|
479
|
+
- consul_host: Consul server hostname (REQUIRED - no default)
|
|
480
|
+
- consul_port: Consul server port (REQUIRED - no default)
|
|
481
|
+
- kafka_enabled: Whether to enable Kafka hot reload (REQUIRED - no default)
|
|
482
|
+
- dev_mode: Whether to run in development mode (REQUIRED - no default)
|
|
483
|
+
- contracts_dir: Directory for contract scanning in dev mode (optional)
|
|
484
|
+
- skip_server: Skip starting uvicorn server (default: False).
|
|
485
|
+
Use for unit testing to avoid port binding.
|
|
173
486
|
|
|
174
487
|
Raises:
|
|
175
|
-
ProtocolConfigurationError: If configuration is invalid
|
|
488
|
+
ProtocolConfigurationError: If configuration is invalid or required
|
|
489
|
+
config values (consul_host, consul_port, kafka_enabled, dev_mode)
|
|
490
|
+
are missing. Per CLAUDE.md, .env is the single source of truth -
|
|
491
|
+
no hardcoded fallbacks are used.
|
|
176
492
|
"""
|
|
177
493
|
init_correlation_id = uuid4()
|
|
178
494
|
|
|
@@ -203,22 +519,175 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
203
519
|
transport_type=EnumInfraTransportType.MCP,
|
|
204
520
|
)
|
|
205
521
|
|
|
206
|
-
#
|
|
207
|
-
|
|
522
|
+
# Check if server startup should be skipped (for unit testing)
|
|
523
|
+
skip_server_val = config.get("skip_server")
|
|
524
|
+
skip_server: bool = (
|
|
525
|
+
skip_server_val if isinstance(skip_server_val, bool) else False
|
|
526
|
+
)
|
|
527
|
+
self._skip_server = skip_server
|
|
528
|
+
|
|
529
|
+
if not skip_server:
|
|
530
|
+
# Build MCPServerConfig from handler config (OMN-1282)
|
|
531
|
+
# Map handler config fields to lifecycle config fields
|
|
532
|
+
#
|
|
533
|
+
# Per CLAUDE.md: .env is the SINGLE SOURCE OF TRUTH.
|
|
534
|
+
# No hardcoded fallbacks - all required config must be explicit.
|
|
535
|
+
# The _require_config_value helper validates type, cast() is for mypy.
|
|
536
|
+
consul_host = _require_config_value(
|
|
537
|
+
config, "consul_host", str, init_correlation_id
|
|
538
|
+
)
|
|
539
|
+
consul_port = _require_config_value(
|
|
540
|
+
config, "consul_port", int, init_correlation_id
|
|
541
|
+
)
|
|
542
|
+
kafka_enabled = _require_config_value(
|
|
543
|
+
config, "kafka_enabled", bool, init_correlation_id
|
|
544
|
+
)
|
|
545
|
+
dev_mode = _require_config_value(
|
|
546
|
+
config, "dev_mode", bool, init_correlation_id
|
|
547
|
+
)
|
|
548
|
+
# contracts_dir is optional - only used when dev_mode=True
|
|
549
|
+
contracts_dir_val = config.get("contracts_dir")
|
|
550
|
+
contracts_dir: str | None = (
|
|
551
|
+
contracts_dir_val if isinstance(contracts_dir_val, str) else None
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
server_config = ModelMCPServerConfig(
|
|
555
|
+
consul_host=consul_host,
|
|
556
|
+
consul_port=consul_port,
|
|
557
|
+
kafka_enabled=kafka_enabled,
|
|
558
|
+
http_host=self._config.host,
|
|
559
|
+
http_port=self._config.port,
|
|
560
|
+
default_timeout=self._config.timeout_seconds,
|
|
561
|
+
dev_mode=dev_mode,
|
|
562
|
+
contracts_dir=contracts_dir,
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Wrap entire server startup in try/except to ensure cleanup
|
|
566
|
+
# if ANY step fails after lifecycle starts. This prevents:
|
|
567
|
+
# - Orphan lifecycle resources (registry, executor, sync)
|
|
568
|
+
# - Orphan server tasks
|
|
569
|
+
# - Resource leaks from partial initialization
|
|
570
|
+
try:
|
|
571
|
+
# Create and start MCPServerLifecycle for tool discovery
|
|
572
|
+
# Container is required for lifecycle initialization
|
|
573
|
+
if self._container is None:
|
|
574
|
+
raise ValueError(
|
|
575
|
+
"Container required for MCPServerLifecycle initialization"
|
|
576
|
+
)
|
|
577
|
+
self._lifecycle = MCPServerLifecycle(
|
|
578
|
+
container=self._container,
|
|
579
|
+
config=server_config,
|
|
580
|
+
bus=None,
|
|
581
|
+
)
|
|
582
|
+
await self._lifecycle.start()
|
|
583
|
+
|
|
584
|
+
# Update MCP registry and executor references from lifecycle
|
|
585
|
+
if self._lifecycle.registry is not None:
|
|
586
|
+
self._mcp_registry = self._lifecycle.registry
|
|
587
|
+
if self._lifecycle.executor is not None:
|
|
588
|
+
self._mcp_executor = self._lifecycle.executor
|
|
589
|
+
|
|
590
|
+
# Create Starlette app with HTTP endpoints (OMN-1282)
|
|
591
|
+
# Use factory methods for explicit handler reference binding
|
|
592
|
+
health_endpoint = self._create_health_endpoint()
|
|
593
|
+
tools_list_endpoint = self._create_tools_list_endpoint()
|
|
594
|
+
|
|
595
|
+
app = Starlette(
|
|
596
|
+
routes=[
|
|
597
|
+
Route("/health", health_endpoint, methods=["GET"]),
|
|
598
|
+
Route("/mcp/tools", tools_list_endpoint, methods=["GET"]),
|
|
599
|
+
],
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
# Create uvicorn server config and server
|
|
603
|
+
uvicorn_config = uvicorn.Config(
|
|
604
|
+
app=app,
|
|
605
|
+
host=self._config.host,
|
|
606
|
+
port=self._config.port,
|
|
607
|
+
log_level="info",
|
|
608
|
+
)
|
|
609
|
+
self._server = uvicorn.Server(uvicorn_config)
|
|
610
|
+
|
|
611
|
+
# Start server in background task
|
|
612
|
+
self._server_task = asyncio.create_task(self._server.serve())
|
|
613
|
+
|
|
614
|
+
# Wait for server to be ready before marking as initialized
|
|
615
|
+
await self._wait_for_server_ready(
|
|
616
|
+
self._config.host,
|
|
617
|
+
self._config.port,
|
|
618
|
+
timeout=self.startup_timeout,
|
|
619
|
+
)
|
|
620
|
+
self._server_started_at = time.time()
|
|
621
|
+
|
|
622
|
+
except Exception as startup_error:
|
|
623
|
+
# Any failure during server startup - clean up all resources
|
|
624
|
+
# This handles failures in:
|
|
625
|
+
# - lifecycle.start() (Consul/contract discovery)
|
|
626
|
+
# - Starlette app creation
|
|
627
|
+
# - uvicorn config/server creation
|
|
628
|
+
# - server task creation
|
|
629
|
+
# - server readiness check
|
|
630
|
+
logger.exception(
|
|
631
|
+
"MCP server startup failed, cleaning up resources",
|
|
632
|
+
extra={
|
|
633
|
+
"host": self._config.host,
|
|
634
|
+
"port": self._config.port,
|
|
635
|
+
"lifecycle_created": self._lifecycle is not None,
|
|
636
|
+
"server_created": self._server is not None,
|
|
637
|
+
"server_task_created": self._server_task is not None,
|
|
638
|
+
"correlation_id": str(init_correlation_id),
|
|
639
|
+
},
|
|
640
|
+
)
|
|
641
|
+
# shutdown() safely handles partially initialized state:
|
|
642
|
+
# - Checks each component before cleanup
|
|
643
|
+
# - Safe to call even if components weren't created
|
|
644
|
+
await self.shutdown()
|
|
645
|
+
ctx = ModelInfraErrorContext(
|
|
646
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
647
|
+
operation="initialize",
|
|
648
|
+
target_name="mcp_handler",
|
|
649
|
+
correlation_id=init_correlation_id,
|
|
650
|
+
)
|
|
651
|
+
raise ProtocolConfigurationError(
|
|
652
|
+
f"MCP server startup failed: {startup_error}",
|
|
653
|
+
context=ctx,
|
|
654
|
+
) from startup_error
|
|
655
|
+
|
|
208
656
|
self._initialized = True
|
|
209
657
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
self.
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
"
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
658
|
+
tool_count = 0
|
|
659
|
+
if self._lifecycle and self._lifecycle.registry:
|
|
660
|
+
tool_count = self._lifecycle.registry.tool_count
|
|
661
|
+
|
|
662
|
+
if skip_server:
|
|
663
|
+
logger.info(
|
|
664
|
+
"%s initialized successfully (server skipped)",
|
|
665
|
+
self.__class__.__name__,
|
|
666
|
+
extra={
|
|
667
|
+
"handler": self.__class__.__name__,
|
|
668
|
+
"host": self._config.host,
|
|
669
|
+
"port": self._config.port,
|
|
670
|
+
"path": self._config.path,
|
|
671
|
+
"stateless": self._config.stateless,
|
|
672
|
+
"skip_server": True,
|
|
673
|
+
"correlation_id": str(init_correlation_id),
|
|
674
|
+
},
|
|
675
|
+
)
|
|
676
|
+
else:
|
|
677
|
+
logger.info(
|
|
678
|
+
"%s initialized successfully - uvicorn server running",
|
|
679
|
+
self.__class__.__name__,
|
|
680
|
+
extra={
|
|
681
|
+
"handler": self.__class__.__name__,
|
|
682
|
+
"host": self._config.host,
|
|
683
|
+
"port": self._config.port,
|
|
684
|
+
"path": self._config.path,
|
|
685
|
+
"stateless": self._config.stateless,
|
|
686
|
+
"tool_count": tool_count,
|
|
687
|
+
"url": f"http://{self._config.host}:{self._config.port}",
|
|
688
|
+
"correlation_id": str(init_correlation_id),
|
|
689
|
+
},
|
|
690
|
+
)
|
|
222
691
|
|
|
223
692
|
except ValidationError as e:
|
|
224
693
|
ctx = ModelInfraErrorContext(
|
|
@@ -242,11 +711,102 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
242
711
|
) from e
|
|
243
712
|
|
|
244
713
|
async def shutdown(self) -> None:
|
|
245
|
-
"""Shutdown MCP handler
|
|
714
|
+
"""Shutdown MCP handler with timeout protection.
|
|
715
|
+
|
|
716
|
+
This method performs graceful shutdown with timeout protection:
|
|
717
|
+
1. Signal uvicorn server to stop
|
|
718
|
+
2. Wait for server task with timeout (max 5s graceful, 1s forced)
|
|
719
|
+
3. Shutdown MCPServerLifecycle (registry, discovery, sync)
|
|
720
|
+
4. Clear tool registry and reset state
|
|
721
|
+
|
|
722
|
+
Safe to call multiple times. Never hangs indefinitely (max ~6s with defaults).
|
|
723
|
+
|
|
724
|
+
Note:
|
|
725
|
+
Timeouts are configurable via class attributes:
|
|
726
|
+
- shutdown_timeout: Graceful shutdown timeout (default: 5.0s)
|
|
727
|
+
- cancel_timeout: Forced cancellation timeout (default: 1.0s)
|
|
728
|
+
"""
|
|
729
|
+
shutdown_correlation_id = uuid4()
|
|
730
|
+
|
|
731
|
+
logger.info(
|
|
732
|
+
"Shutting down %s",
|
|
733
|
+
self.__class__.__name__,
|
|
734
|
+
extra={
|
|
735
|
+
"handler": self.__class__.__name__,
|
|
736
|
+
"correlation_id": str(shutdown_correlation_id),
|
|
737
|
+
},
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
# Stop uvicorn server with timeout protection (OMN-1282)
|
|
741
|
+
if (
|
|
742
|
+
self._server is not None
|
|
743
|
+
and self._server_task is not None
|
|
744
|
+
and not self._skip_server
|
|
745
|
+
):
|
|
746
|
+
# Signal server to stop
|
|
747
|
+
self._server.should_exit = True
|
|
748
|
+
|
|
749
|
+
try:
|
|
750
|
+
# Wait for graceful shutdown with timeout
|
|
751
|
+
logger.debug(
|
|
752
|
+
"Waiting for server task to complete",
|
|
753
|
+
extra={
|
|
754
|
+
"timeout_seconds": self.shutdown_timeout,
|
|
755
|
+
"correlation_id": str(shutdown_correlation_id),
|
|
756
|
+
},
|
|
757
|
+
)
|
|
758
|
+
await asyncio.wait_for(self._server_task, timeout=self.shutdown_timeout)
|
|
759
|
+
except TimeoutError:
|
|
760
|
+
logger.warning(
|
|
761
|
+
"Server shutdown timed out, forcing cancellation",
|
|
762
|
+
extra={
|
|
763
|
+
"timeout_seconds": self.shutdown_timeout,
|
|
764
|
+
"correlation_id": str(shutdown_correlation_id),
|
|
765
|
+
},
|
|
766
|
+
)
|
|
767
|
+
self._server_task.cancel()
|
|
768
|
+
try:
|
|
769
|
+
await asyncio.wait_for(
|
|
770
|
+
self._server_task, timeout=self.cancel_timeout
|
|
771
|
+
)
|
|
772
|
+
except (TimeoutError, asyncio.CancelledError):
|
|
773
|
+
pass # Best effort
|
|
774
|
+
except asyncio.CancelledError:
|
|
775
|
+
logger.debug(
|
|
776
|
+
"Server task was cancelled",
|
|
777
|
+
extra={"correlation_id": str(shutdown_correlation_id)},
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
# Shutdown lifecycle (registry, discovery, sync)
|
|
781
|
+
if self._lifecycle is not None:
|
|
782
|
+
logger.debug(
|
|
783
|
+
"Shutting down MCPServerLifecycle",
|
|
784
|
+
extra={"correlation_id": str(shutdown_correlation_id)},
|
|
785
|
+
)
|
|
786
|
+
await self._lifecycle.shutdown()
|
|
787
|
+
self._lifecycle = None
|
|
788
|
+
|
|
789
|
+
# Clear registry and executor references
|
|
790
|
+
self._mcp_registry = None
|
|
791
|
+
self._mcp_executor = None
|
|
792
|
+
|
|
793
|
+
# Clear all state
|
|
246
794
|
self._tool_registry.clear()
|
|
247
795
|
self._config = None
|
|
248
796
|
self._initialized = False
|
|
249
|
-
|
|
797
|
+
self._server = None
|
|
798
|
+
self._server_task = None
|
|
799
|
+
self._skip_server = False
|
|
800
|
+
self._server_started_at = None
|
|
801
|
+
|
|
802
|
+
logger.info(
|
|
803
|
+
"%s shutdown complete",
|
|
804
|
+
self.__class__.__name__,
|
|
805
|
+
extra={
|
|
806
|
+
"handler": self.__class__.__name__,
|
|
807
|
+
"correlation_id": str(shutdown_correlation_id),
|
|
808
|
+
},
|
|
809
|
+
)
|
|
250
810
|
|
|
251
811
|
async def execute(
|
|
252
812
|
self, envelope: dict[str, object]
|
|
@@ -566,23 +1126,23 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
566
1126
|
) -> dict[str, object]:
|
|
567
1127
|
"""Execute a registered tool.
|
|
568
1128
|
|
|
569
|
-
This method delegates to the ONEX
|
|
570
|
-
|
|
1129
|
+
This method delegates to the ONEX orchestrator that provides this tool.
|
|
1130
|
+
When operating in integrated mode (with registry and executor), the tool
|
|
1131
|
+
is looked up from the MCP registry and executed via the execution adapter.
|
|
571
1132
|
|
|
572
1133
|
Circuit breaker protection is applied to prevent cascading failures
|
|
573
1134
|
when tool execution repeatedly fails.
|
|
574
1135
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
1136
|
+
Integration Mode (OMN-1281):
|
|
1137
|
+
When _mcp_registry and _mcp_executor are configured:
|
|
1138
|
+
1. Look up the tool definition from the MCP registry
|
|
1139
|
+
2. Delegate execution to AdapterONEXToolExecution
|
|
1140
|
+
3. The adapter dispatches to the orchestrator endpoint
|
|
1141
|
+
4. Timeout is enforced by the adapter using the tool's timeout_seconds
|
|
579
1142
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
- HTTP client timeouts on the caller side
|
|
584
|
-
|
|
585
|
-
See: TODO(OMN-1288) for dispatcher timeout integration
|
|
1143
|
+
Legacy Mode:
|
|
1144
|
+
When registry/executor are not configured, returns placeholder response
|
|
1145
|
+
for backward compatibility.
|
|
586
1146
|
|
|
587
1147
|
Args:
|
|
588
1148
|
tool_name: Name of the tool to execute.
|
|
@@ -593,26 +1153,54 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
593
1153
|
Tool execution result.
|
|
594
1154
|
|
|
595
1155
|
Raises:
|
|
596
|
-
InfraUnavailableError: If tool
|
|
1156
|
+
InfraUnavailableError: If tool not found or execution fails.
|
|
597
1157
|
"""
|
|
598
1158
|
# Check circuit breaker before tool execution
|
|
599
1159
|
async with self._circuit_breaker_lock:
|
|
600
1160
|
await self._check_circuit_breaker("execute_tool", correlation_id)
|
|
601
1161
|
|
|
602
1162
|
try:
|
|
603
|
-
#
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
1163
|
+
# Integrated mode: use MCP registry and executor (OMN-1281)
|
|
1164
|
+
if self._mcp_registry is not None and self._mcp_executor is not None:
|
|
1165
|
+
# Look up tool from registry
|
|
1166
|
+
tool = await self._mcp_registry.get_tool(tool_name)
|
|
1167
|
+
if tool is None:
|
|
1168
|
+
ctx = ModelInfraErrorContext.with_correlation(
|
|
1169
|
+
correlation_id=correlation_id,
|
|
1170
|
+
transport_type=self.transport_type,
|
|
1171
|
+
operation="execute_tool",
|
|
1172
|
+
target_name=tool_name,
|
|
1173
|
+
)
|
|
1174
|
+
raise InfraUnavailableError(
|
|
1175
|
+
f"Tool not found: {tool_name}",
|
|
1176
|
+
context=ctx,
|
|
1177
|
+
)
|
|
1178
|
+
|
|
1179
|
+
logger.info(
|
|
1180
|
+
"Executing MCP tool via adapter",
|
|
1181
|
+
extra={
|
|
1182
|
+
"tool_name": tool_name,
|
|
1183
|
+
"argument_count": len(arguments),
|
|
1184
|
+
"correlation_id": str(correlation_id),
|
|
1185
|
+
},
|
|
1186
|
+
)
|
|
1187
|
+
|
|
1188
|
+
# Execute via adapter
|
|
1189
|
+
result = await self._mcp_executor.execute(
|
|
1190
|
+
tool=tool,
|
|
1191
|
+
arguments=arguments,
|
|
1192
|
+
correlation_id=correlation_id,
|
|
1193
|
+
)
|
|
1194
|
+
|
|
1195
|
+
# Reset circuit breaker on success
|
|
1196
|
+
async with self._circuit_breaker_lock:
|
|
1197
|
+
await self._reset_circuit_breaker()
|
|
1198
|
+
|
|
1199
|
+
return result
|
|
1200
|
+
|
|
1201
|
+
# Legacy mode: placeholder response for backward compatibility
|
|
614
1202
|
logger.info(
|
|
615
|
-
"Tool execution requested",
|
|
1203
|
+
"Tool execution requested (placeholder mode)",
|
|
616
1204
|
extra={
|
|
617
1205
|
"tool_name": tool_name,
|
|
618
1206
|
"argument_count": len(arguments),
|
|
@@ -620,7 +1208,7 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
620
1208
|
},
|
|
621
1209
|
)
|
|
622
1210
|
|
|
623
|
-
|
|
1211
|
+
placeholder_result: dict[str, object] = {
|
|
624
1212
|
"message": f"Tool '{tool_name}' executed successfully",
|
|
625
1213
|
"arguments_received": list(arguments.keys()),
|
|
626
1214
|
}
|
|
@@ -629,7 +1217,13 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
629
1217
|
async with self._circuit_breaker_lock:
|
|
630
1218
|
await self._reset_circuit_breaker()
|
|
631
1219
|
|
|
632
|
-
return
|
|
1220
|
+
return placeholder_result
|
|
1221
|
+
|
|
1222
|
+
except InfraUnavailableError:
|
|
1223
|
+
# Record failure in circuit breaker and re-raise
|
|
1224
|
+
async with self._circuit_breaker_lock:
|
|
1225
|
+
await self._record_circuit_failure("execute_tool", correlation_id)
|
|
1226
|
+
raise
|
|
633
1227
|
|
|
634
1228
|
except Exception:
|
|
635
1229
|
# Record failure in circuit breaker
|
|
@@ -706,7 +1300,7 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
706
1300
|
|
|
707
1301
|
Returns:
|
|
708
1302
|
dict containing handler type, category, transport type,
|
|
709
|
-
supported operations, configuration, and
|
|
1303
|
+
supported operations, configuration, tool count, and server state.
|
|
710
1304
|
"""
|
|
711
1305
|
config_dict: dict[str, object] = {}
|
|
712
1306
|
if self._config:
|
|
@@ -720,28 +1314,116 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
720
1314
|
"max_tools": self._config.max_tools,
|
|
721
1315
|
}
|
|
722
1316
|
|
|
1317
|
+
# Include lifecycle tool count if available (OMN-1282)
|
|
1318
|
+
tool_count = len(self._tool_registry)
|
|
1319
|
+
if self._lifecycle and self._lifecycle.registry:
|
|
1320
|
+
tool_count = self._lifecycle.registry.tool_count
|
|
1321
|
+
|
|
723
1322
|
return {
|
|
724
1323
|
"handler_type": self.handler_type.value,
|
|
725
1324
|
"handler_category": self.handler_category.value,
|
|
726
1325
|
"transport_type": self.transport_type.value,
|
|
727
1326
|
"supported_operations": sorted(_SUPPORTED_OPERATIONS),
|
|
728
|
-
"tool_count":
|
|
1327
|
+
"tool_count": tool_count,
|
|
729
1328
|
"config": config_dict,
|
|
730
1329
|
"initialized": self._initialized,
|
|
1330
|
+
"server_running": self._server is not None
|
|
1331
|
+
and self._server_task is not None,
|
|
1332
|
+
"lifecycle_running": self._lifecycle is not None
|
|
1333
|
+
and self._lifecycle.is_running,
|
|
731
1334
|
"version": "0.1.0-mvp",
|
|
732
1335
|
}
|
|
733
1336
|
|
|
734
1337
|
async def health_check(self) -> dict[str, object]:
|
|
735
|
-
"""Check handler health and
|
|
1338
|
+
"""Check handler health and server status.
|
|
736
1339
|
|
|
737
|
-
Returns:
|
|
738
|
-
|
|
1340
|
+
Returns unhealthy if:
|
|
1341
|
+
- Not initialized
|
|
1342
|
+
- Server task has crashed/completed unexpectedly
|
|
1343
|
+
- Server task was cancelled
|
|
1344
|
+
|
|
1345
|
+
Note:
|
|
1346
|
+
When skip_server=True was used during initialization, the handler is
|
|
1347
|
+
considered healthy if initialized, even without a running server.
|
|
1348
|
+
This enables unit testing without actual port binding.
|
|
739
1349
|
"""
|
|
1350
|
+
if not self._initialized:
|
|
1351
|
+
return {
|
|
1352
|
+
"healthy": False,
|
|
1353
|
+
"reason": "not_initialized",
|
|
1354
|
+
"transport_type": self.transport_type.value,
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
if self._skip_server:
|
|
1358
|
+
return {
|
|
1359
|
+
"healthy": True,
|
|
1360
|
+
"skip_server": True,
|
|
1361
|
+
"transport_type": self.transport_type.value,
|
|
1362
|
+
"initialized": True,
|
|
1363
|
+
}
|
|
1364
|
+
|
|
1365
|
+
# Capture server task reference once to avoid TOCTOU race conditions.
|
|
1366
|
+
# If _server_task is reassigned (e.g., by concurrent shutdown()),
|
|
1367
|
+
# we work with the captured reference consistently.
|
|
1368
|
+
server_task = self._server_task
|
|
1369
|
+
|
|
1370
|
+
# Check server task state
|
|
1371
|
+
if server_task is None:
|
|
1372
|
+
return {
|
|
1373
|
+
"healthy": False,
|
|
1374
|
+
"reason": "server_task_missing",
|
|
1375
|
+
"transport_type": self.transport_type.value,
|
|
1376
|
+
"initialized": True,
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
if server_task.done():
|
|
1380
|
+
# Task completed - check why
|
|
1381
|
+
if server_task.cancelled():
|
|
1382
|
+
return {
|
|
1383
|
+
"healthy": False,
|
|
1384
|
+
"reason": "server_cancelled",
|
|
1385
|
+
"transport_type": self.transport_type.value,
|
|
1386
|
+
"initialized": True,
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
exc = server_task.exception()
|
|
1390
|
+
if exc is not None:
|
|
1391
|
+
return {
|
|
1392
|
+
"healthy": False,
|
|
1393
|
+
"reason": "server_crashed",
|
|
1394
|
+
"error": str(exc)[:_ERROR_MESSAGE_MAX_LENGTH],
|
|
1395
|
+
"transport_type": self.transport_type.value,
|
|
1396
|
+
"initialized": True,
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
# Exited cleanly but unexpectedly
|
|
1400
|
+
return {
|
|
1401
|
+
"healthy": False,
|
|
1402
|
+
"reason": "server_exited",
|
|
1403
|
+
"transport_type": self.transport_type.value,
|
|
1404
|
+
"initialized": True,
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1407
|
+
# Task is still running - healthy
|
|
1408
|
+
# Include lifecycle tool count if available (OMN-1282)
|
|
1409
|
+
tool_count = len(self._tool_registry)
|
|
1410
|
+
if self._lifecycle and self._lifecycle.registry:
|
|
1411
|
+
tool_count = self._lifecycle.registry.tool_count
|
|
1412
|
+
|
|
1413
|
+
lifecycle_running = self._lifecycle is not None and self._lifecycle.is_running
|
|
1414
|
+
|
|
740
1415
|
return {
|
|
741
|
-
"healthy":
|
|
742
|
-
"initialized":
|
|
743
|
-
"
|
|
1416
|
+
"healthy": True,
|
|
1417
|
+
"initialized": True,
|
|
1418
|
+
"server_running": True,
|
|
1419
|
+
"tool_count": tool_count,
|
|
744
1420
|
"transport_type": self.transport_type.value,
|
|
1421
|
+
"lifecycle_running": lifecycle_running,
|
|
1422
|
+
"uptime_seconds": (
|
|
1423
|
+
time.time() - self._server_started_at
|
|
1424
|
+
if self._server_started_at is not None
|
|
1425
|
+
else None
|
|
1426
|
+
),
|
|
745
1427
|
}
|
|
746
1428
|
|
|
747
1429
|
|