omnibase_infra 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnibase_infra/__init__.py +1 -1
- omnibase_infra/adapters/adapter_onex_tool_execution.py +446 -0
- omnibase_infra/cli/commands.py +1 -1
- omnibase_infra/configs/widget_mapping.yaml +176 -0
- omnibase_infra/contracts/handlers/filesystem/handler_contract.yaml +4 -1
- omnibase_infra/contracts/handlers/mcp/handler_contract.yaml +4 -1
- omnibase_infra/errors/error_compute_registry.py +4 -1
- omnibase_infra/errors/error_event_bus_registry.py +4 -1
- omnibase_infra/errors/error_infra.py +3 -1
- omnibase_infra/errors/error_policy_registry.py +4 -1
- omnibase_infra/handlers/handler_db.py +2 -1
- omnibase_infra/handlers/handler_graph.py +10 -5
- omnibase_infra/handlers/handler_mcp.py +736 -63
- omnibase_infra/handlers/mixins/mixin_consul_kv.py +4 -3
- omnibase_infra/handlers/mixins/mixin_consul_service.py +2 -1
- omnibase_infra/handlers/service_discovery/handler_service_discovery_consul.py +301 -4
- omnibase_infra/handlers/service_discovery/models/model_service_info.py +10 -0
- omnibase_infra/mixins/mixin_async_circuit_breaker.py +3 -2
- omnibase_infra/mixins/mixin_node_introspection.py +24 -7
- omnibase_infra/mixins/mixin_retry_execution.py +1 -1
- omnibase_infra/models/handlers/__init__.py +10 -0
- omnibase_infra/models/handlers/model_bootstrap_handler_descriptor.py +162 -0
- omnibase_infra/models/handlers/model_handler_descriptor.py +15 -0
- omnibase_infra/models/mcp/__init__.py +15 -0
- omnibase_infra/models/mcp/model_mcp_contract_config.py +80 -0
- omnibase_infra/models/mcp/model_mcp_server_config.py +67 -0
- omnibase_infra/models/mcp/model_mcp_tool_definition.py +73 -0
- omnibase_infra/models/mcp/model_mcp_tool_parameter.py +35 -0
- omnibase_infra/models/registration/model_node_capabilities.py +11 -0
- omnibase_infra/nodes/architecture_validator/contract_architecture_validator.yaml +0 -5
- omnibase_infra/nodes/architecture_validator/registry/registry_infra_architecture_validator.py +17 -10
- omnibase_infra/nodes/effects/contract.yaml +0 -5
- omnibase_infra/nodes/node_registration_orchestrator/contract.yaml +7 -0
- omnibase_infra/nodes/node_registration_orchestrator/handlers/handler_node_introspected.py +86 -1
- omnibase_infra/nodes/node_registration_orchestrator/introspection_event_router.py +3 -3
- omnibase_infra/nodes/node_registration_orchestrator/registry/registry_infra_node_registration_orchestrator.py +9 -8
- omnibase_infra/nodes/node_registration_orchestrator/wiring.py +14 -13
- omnibase_infra/nodes/node_registration_storage_effect/contract.yaml +0 -5
- omnibase_infra/nodes/node_registration_storage_effect/registry/registry_infra_registration_storage.py +46 -25
- omnibase_infra/nodes/node_registry_effect/contract.yaml +0 -5
- omnibase_infra/nodes/node_registry_effect/handlers/handler_partial_retry.py +2 -1
- omnibase_infra/nodes/node_service_discovery_effect/registry/registry_infra_service_discovery.py +24 -19
- omnibase_infra/plugins/examples/plugin_json_normalizer.py +2 -2
- omnibase_infra/plugins/examples/plugin_json_normalizer_error_handling.py +2 -2
- omnibase_infra/plugins/plugin_compute_base.py +16 -2
- omnibase_infra/protocols/protocol_event_projector.py +1 -1
- omnibase_infra/runtime/__init__.py +51 -1
- omnibase_infra/runtime/binding_config_resolver.py +102 -37
- omnibase_infra/runtime/constants_notification.py +75 -0
- omnibase_infra/runtime/contract_handler_discovery.py +6 -1
- omnibase_infra/runtime/handler_bootstrap_source.py +514 -0
- omnibase_infra/runtime/handler_contract_config_loader.py +603 -0
- omnibase_infra/runtime/handler_contract_source.py +289 -167
- omnibase_infra/runtime/handler_plugin_loader.py +4 -2
- omnibase_infra/runtime/mixin_semver_cache.py +25 -1
- omnibase_infra/runtime/mixins/__init__.py +7 -0
- omnibase_infra/runtime/mixins/mixin_projector_notification_publishing.py +566 -0
- omnibase_infra/runtime/mixins/mixin_projector_sql_operations.py +31 -10
- omnibase_infra/runtime/models/__init__.py +24 -0
- omnibase_infra/runtime/models/model_health_check_result.py +2 -1
- omnibase_infra/runtime/models/model_projector_notification_config.py +171 -0
- omnibase_infra/runtime/models/model_transition_notification_outbox_config.py +112 -0
- omnibase_infra/runtime/models/model_transition_notification_outbox_metrics.py +140 -0
- omnibase_infra/runtime/models/model_transition_notification_publisher_metrics.py +357 -0
- omnibase_infra/runtime/projector_plugin_loader.py +1 -1
- omnibase_infra/runtime/projector_shell.py +229 -1
- omnibase_infra/runtime/protocols/__init__.py +10 -0
- omnibase_infra/runtime/registry/registry_protocol_binding.py +3 -2
- omnibase_infra/runtime/registry_policy.py +9 -326
- omnibase_infra/runtime/secret_resolver.py +4 -2
- omnibase_infra/runtime/service_kernel.py +10 -2
- omnibase_infra/runtime/service_message_dispatch_engine.py +4 -2
- omnibase_infra/runtime/service_runtime_host_process.py +225 -15
- omnibase_infra/runtime/transition_notification_outbox.py +1190 -0
- omnibase_infra/runtime/transition_notification_publisher.py +764 -0
- omnibase_infra/runtime/util_container_wiring.py +6 -5
- omnibase_infra/runtime/util_wiring.py +5 -1
- omnibase_infra/schemas/schema_transition_notification_outbox.sql +245 -0
- omnibase_infra/services/mcp/__init__.py +31 -0
- omnibase_infra/services/mcp/mcp_server_lifecycle.py +443 -0
- omnibase_infra/services/mcp/service_mcp_tool_discovery.py +411 -0
- omnibase_infra/services/mcp/service_mcp_tool_registry.py +329 -0
- omnibase_infra/services/mcp/service_mcp_tool_sync.py +547 -0
- omnibase_infra/services/registry_api/__init__.py +40 -0
- omnibase_infra/services/registry_api/main.py +243 -0
- omnibase_infra/services/registry_api/models/__init__.py +66 -0
- omnibase_infra/services/registry_api/models/model_capability_widget_mapping.py +38 -0
- omnibase_infra/services/registry_api/models/model_pagination_info.py +48 -0
- omnibase_infra/services/registry_api/models/model_registry_discovery_response.py +73 -0
- omnibase_infra/services/registry_api/models/model_registry_health_response.py +49 -0
- omnibase_infra/services/registry_api/models/model_registry_instance_view.py +88 -0
- omnibase_infra/services/registry_api/models/model_registry_node_view.py +88 -0
- omnibase_infra/services/registry_api/models/model_registry_summary.py +60 -0
- omnibase_infra/services/registry_api/models/model_response_list_instances.py +43 -0
- omnibase_infra/services/registry_api/models/model_response_list_nodes.py +51 -0
- omnibase_infra/services/registry_api/models/model_warning.py +49 -0
- omnibase_infra/services/registry_api/models/model_widget_defaults.py +28 -0
- omnibase_infra/services/registry_api/models/model_widget_mapping.py +51 -0
- omnibase_infra/services/registry_api/routes.py +371 -0
- omnibase_infra/services/registry_api/service.py +846 -0
- omnibase_infra/services/service_capability_query.py +4 -4
- omnibase_infra/services/service_health.py +3 -2
- omnibase_infra/services/service_timeout_emitter.py +13 -2
- omnibase_infra/utils/util_dsn_validation.py +1 -1
- omnibase_infra/validation/__init__.py +3 -19
- omnibase_infra/validation/contracts/security.validation.yaml +114 -0
- omnibase_infra/validation/infra_validators.py +35 -24
- omnibase_infra/validation/validation_exemptions.yaml +113 -9
- omnibase_infra/validation/validator_chain_propagation.py +2 -2
- omnibase_infra/validation/validator_runtime_shape.py +1 -1
- omnibase_infra/validation/validator_security.py +473 -370
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/METADATA +2 -2
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/RECORD +116 -74
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/WHEEL +0 -0
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/entry_points.txt +0 -0
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,6 +13,7 @@ Key Features:
|
|
|
13
13
|
- Dynamic tool discovery from ONEX node registry
|
|
14
14
|
- Contract-to-MCP schema generation
|
|
15
15
|
- Request/response correlation for observability
|
|
16
|
+
- Internal uvicorn server lifecycle management (OMN-1282)
|
|
16
17
|
|
|
17
18
|
Note:
|
|
18
19
|
This handler requires the `mcp` package (anthropic-ai/mcp-python-sdk).
|
|
@@ -21,12 +22,18 @@ Note:
|
|
|
21
22
|
|
|
22
23
|
from __future__ import annotations
|
|
23
24
|
|
|
25
|
+
import asyncio
|
|
24
26
|
import logging
|
|
25
27
|
import time
|
|
26
28
|
from typing import TYPE_CHECKING
|
|
27
29
|
from uuid import UUID, uuid4
|
|
28
30
|
|
|
31
|
+
import uvicorn
|
|
29
32
|
from pydantic import ValidationError
|
|
33
|
+
from starlette.applications import Starlette
|
|
34
|
+
from starlette.requests import Request
|
|
35
|
+
from starlette.responses import JSONResponse
|
|
36
|
+
from starlette.routing import Route
|
|
30
37
|
|
|
31
38
|
from omnibase_core.models.dispatch import ModelHandlerOutput
|
|
32
39
|
from omnibase_infra.enums import (
|
|
@@ -46,11 +53,18 @@ from omnibase_infra.handlers.models.mcp import (
|
|
|
46
53
|
ModelMcpToolResult,
|
|
47
54
|
)
|
|
48
55
|
from omnibase_infra.mixins import MixinAsyncCircuitBreaker, MixinEnvelopeExtraction
|
|
56
|
+
from omnibase_infra.services.mcp import MCPServerLifecycle, ModelMCPServerConfig
|
|
49
57
|
|
|
50
58
|
if TYPE_CHECKING:
|
|
51
|
-
from collections.abc import Sequence
|
|
59
|
+
from collections.abc import Callable, Coroutine, Sequence
|
|
52
60
|
|
|
53
61
|
from omnibase_core.models.container.model_onex_container import ModelONEXContainer
|
|
62
|
+
from omnibase_infra.adapters.adapter_onex_tool_execution import (
|
|
63
|
+
AdapterONEXToolExecution,
|
|
64
|
+
)
|
|
65
|
+
from omnibase_infra.services.mcp.service_mcp_tool_registry import (
|
|
66
|
+
ServiceMCPToolRegistry,
|
|
67
|
+
)
|
|
54
68
|
from omnibase_spi.protocols.types.protocol_mcp_tool_types import (
|
|
55
69
|
ProtocolMCPToolDefinition,
|
|
56
70
|
)
|
|
@@ -60,6 +74,64 @@ logger = logging.getLogger(__name__)
|
|
|
60
74
|
# Handler ID for ModelHandlerOutput
|
|
61
75
|
HANDLER_ID_MCP: str = "mcp-handler"
|
|
62
76
|
|
|
77
|
+
# Shutdown timeout constants (can be overridden via class attributes)
|
|
78
|
+
_DEFAULT_SHUTDOWN_TIMEOUT: float = 5.0
|
|
79
|
+
_DEFAULT_CANCEL_TIMEOUT: float = 1.0
|
|
80
|
+
_DEFAULT_STARTUP_TIMEOUT: float = 2.0
|
|
81
|
+
|
|
82
|
+
# Error message truncation limit for health check responses
|
|
83
|
+
_ERROR_MESSAGE_MAX_LENGTH: int = 200
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _require_config_value[T](
|
|
87
|
+
config: dict[str, object],
|
|
88
|
+
key: str,
|
|
89
|
+
expected_type: type[T],
|
|
90
|
+
correlation_id: UUID,
|
|
91
|
+
) -> T:
|
|
92
|
+
"""Extract required config value or raise ProtocolConfigurationError.
|
|
93
|
+
|
|
94
|
+
Per CLAUDE.md configuration rules, the `.env` file is the SINGLE SOURCE OF TRUTH.
|
|
95
|
+
There should be ZERO hardcoded fallbacks - all configuration must be explicitly
|
|
96
|
+
provided. If missing, this function raises an error rather than using defaults.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
config: Configuration dictionary to extract value from.
|
|
100
|
+
key: Configuration key to look up.
|
|
101
|
+
expected_type: Expected Python type for the value.
|
|
102
|
+
correlation_id: Correlation ID for error context.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
The validated configuration value.
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
ProtocolConfigurationError: If value is missing or has wrong type.
|
|
109
|
+
"""
|
|
110
|
+
value = config.get(key)
|
|
111
|
+
if value is None:
|
|
112
|
+
raise ProtocolConfigurationError(
|
|
113
|
+
f"Missing required config: '{key}'. Must be set in .env or runtime config.",
|
|
114
|
+
context=ModelInfraErrorContext.with_correlation(
|
|
115
|
+
correlation_id=correlation_id,
|
|
116
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
117
|
+
operation="initialize",
|
|
118
|
+
target_name="handler_mcp",
|
|
119
|
+
),
|
|
120
|
+
)
|
|
121
|
+
if not isinstance(value, expected_type):
|
|
122
|
+
raise ProtocolConfigurationError(
|
|
123
|
+
f"Invalid config type for '{key}': expected {expected_type.__name__}, "
|
|
124
|
+
f"got {type(value).__name__}",
|
|
125
|
+
context=ModelInfraErrorContext.with_correlation(
|
|
126
|
+
correlation_id=correlation_id,
|
|
127
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
128
|
+
operation="initialize",
|
|
129
|
+
target_name="handler_mcp",
|
|
130
|
+
),
|
|
131
|
+
)
|
|
132
|
+
return value
|
|
133
|
+
|
|
134
|
+
|
|
63
135
|
# Supported operations
|
|
64
136
|
_SUPPORTED_OPERATIONS: frozenset[str] = frozenset(
|
|
65
137
|
{op.value for op in EnumMcpOperationType}
|
|
@@ -108,27 +180,75 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
108
180
|
- Timeout enforcement via asyncio.wait_for()
|
|
109
181
|
- Full observability through the ONEX runtime
|
|
110
182
|
See: TODO(OMN-1288) for dispatcher integration tracking
|
|
183
|
+
|
|
184
|
+
Class Attributes:
|
|
185
|
+
shutdown_timeout: Timeout for graceful server shutdown (default: 5.0s).
|
|
186
|
+
cancel_timeout: Timeout for forced cancellation after graceful fails (default: 1.0s).
|
|
187
|
+
startup_timeout: Timeout for server readiness check during startup (default: 2.0s).
|
|
111
188
|
"""
|
|
112
189
|
|
|
113
|
-
|
|
114
|
-
|
|
190
|
+
# Configurable timeout attributes (can be overridden on subclasses or instances)
|
|
191
|
+
shutdown_timeout: float = _DEFAULT_SHUTDOWN_TIMEOUT
|
|
192
|
+
cancel_timeout: float = _DEFAULT_CANCEL_TIMEOUT
|
|
193
|
+
startup_timeout: float = _DEFAULT_STARTUP_TIMEOUT
|
|
194
|
+
|
|
195
|
+
def __init__(
|
|
196
|
+
self,
|
|
197
|
+
container: ModelONEXContainer | None = None,
|
|
198
|
+
registry: ServiceMCPToolRegistry | None = None,
|
|
199
|
+
executor: AdapterONEXToolExecution | None = None,
|
|
200
|
+
) -> None:
|
|
201
|
+
"""Initialize HandlerMCP with optional ONEX container for dependency injection.
|
|
115
202
|
|
|
116
203
|
Args:
|
|
117
|
-
container: ONEX container providing dependency injection for
|
|
118
|
-
services, configuration, and runtime context.
|
|
204
|
+
container: Optional ONEX container providing dependency injection for
|
|
205
|
+
services, configuration, and runtime context. When None, the handler
|
|
206
|
+
operates in standalone mode without container-based DI.
|
|
207
|
+
registry: Optional MCP tool registry for dynamic tool discovery.
|
|
208
|
+
If provided, tools are looked up from this registry. If not
|
|
209
|
+
provided, the handler uses its local _tool_registry dict.
|
|
210
|
+
executor: Optional tool execution adapter for dispatching to
|
|
211
|
+
ONEX orchestrators. If provided, tool calls are routed through
|
|
212
|
+
this adapter. If not provided, placeholder execution is used.
|
|
119
213
|
|
|
120
214
|
Note:
|
|
121
|
-
The container is
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
215
|
+
The container parameter is optional to support two instantiation paths:
|
|
216
|
+
1. Registry-based: RuntimeHostProcess creates handlers via registry lookup
|
|
217
|
+
with no-argument constructor calls. Container is None in this case.
|
|
218
|
+
2. DI-based: Explicit container injection for full ONEX integration.
|
|
219
|
+
|
|
220
|
+
When container is provided, it enables future DI-based service resolution
|
|
221
|
+
(e.g., dispatcher routing, metrics integration).
|
|
222
|
+
|
|
223
|
+
MCP Integration (OMN-1281):
|
|
224
|
+
When registry and executor are provided, the handler operates in
|
|
225
|
+
"integrated mode" with full MCP tool discovery and execution:
|
|
226
|
+
- Tools are discovered from Consul via ServiceMCPToolDiscovery
|
|
227
|
+
- Tool list is cached in ServiceMCPToolRegistry
|
|
228
|
+
- Tool execution routes through AdapterONEXToolExecution
|
|
229
|
+
- Hot reload updates are received via ServiceMCPToolSync
|
|
230
|
+
|
|
231
|
+
Server Lifecycle (OMN-1282):
|
|
232
|
+
The handler owns its uvicorn server lifecycle. When initialize() is
|
|
233
|
+
called, the handler starts a uvicorn server in a background task.
|
|
234
|
+
When shutdown() is called, the server is gracefully stopped.
|
|
126
235
|
"""
|
|
127
236
|
self._container = container
|
|
128
237
|
self._config: ModelMcpHandlerConfig | None = None
|
|
129
238
|
self._initialized: bool = False
|
|
130
239
|
self._tool_registry: dict[str, ProtocolMCPToolDefinition] = {}
|
|
131
240
|
|
|
241
|
+
# MCP integration components (OMN-1281)
|
|
242
|
+
self._mcp_registry: ServiceMCPToolRegistry | None = registry
|
|
243
|
+
self._mcp_executor: AdapterONEXToolExecution | None = executor
|
|
244
|
+
|
|
245
|
+
# Server lifecycle components (OMN-1282)
|
|
246
|
+
self._server: uvicorn.Server | None = None
|
|
247
|
+
self._server_task: asyncio.Task[None] | None = None
|
|
248
|
+
self._lifecycle: MCPServerLifecycle | None = None
|
|
249
|
+
self._skip_server: bool = False # Track if server was intentionally skipped
|
|
250
|
+
self._server_started_at: float | None = None # Timestamp for uptime tracking
|
|
251
|
+
|
|
132
252
|
@property
|
|
133
253
|
def handler_type(self) -> EnumHandlerType:
|
|
134
254
|
"""Return the architectural role of this handler.
|
|
@@ -158,8 +278,194 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
158
278
|
"""
|
|
159
279
|
return EnumInfraTransportType.MCP
|
|
160
280
|
|
|
281
|
+
def _create_json_endpoint(
|
|
282
|
+
self,
|
|
283
|
+
response_factory: Callable[[], Coroutine[object, object, dict[str, object]]],
|
|
284
|
+
) -> Callable[[Request], Coroutine[object, object, JSONResponse]]:
|
|
285
|
+
"""Create a JSON endpoint that wraps an async response factory.
|
|
286
|
+
|
|
287
|
+
This method creates a Starlette-compatible async route handler that:
|
|
288
|
+
1. Calls the provided response_factory to generate response data
|
|
289
|
+
2. Wraps the data in a JSONResponse
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
response_factory: Async callable that returns the response data dict.
|
|
293
|
+
The factory is called on each request to generate fresh data.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Async function suitable for Starlette Route.
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
async def endpoint(_request: Request) -> JSONResponse:
|
|
300
|
+
data = await response_factory()
|
|
301
|
+
return JSONResponse(data)
|
|
302
|
+
|
|
303
|
+
return endpoint
|
|
304
|
+
|
|
305
|
+
def _create_health_endpoint(
|
|
306
|
+
self,
|
|
307
|
+
) -> Callable[[Request], Coroutine[object, object, JSONResponse]]:
|
|
308
|
+
"""Create health endpoint with explicit handler binding.
|
|
309
|
+
|
|
310
|
+
Returns a coroutine function that closes over `self` explicitly,
|
|
311
|
+
avoiding fragile closure patterns with intermediate variables.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Async function suitable for Starlette Route.
|
|
315
|
+
"""
|
|
316
|
+
# Capture reference explicitly in closure scope
|
|
317
|
+
handler = self
|
|
318
|
+
|
|
319
|
+
async def get_health_data() -> dict[str, object]:
|
|
320
|
+
"""Return health status data for the MCP server."""
|
|
321
|
+
tool_count = 0
|
|
322
|
+
if handler._lifecycle and handler._lifecycle.registry:
|
|
323
|
+
tool_count = handler._lifecycle.registry.tool_count
|
|
324
|
+
return {
|
|
325
|
+
"status": "healthy",
|
|
326
|
+
"tool_count": tool_count,
|
|
327
|
+
"initialized": handler._initialized,
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
return self._create_json_endpoint(get_health_data)
|
|
331
|
+
|
|
332
|
+
def _create_tools_list_endpoint(
|
|
333
|
+
self,
|
|
334
|
+
) -> Callable[[Request], Coroutine[object, object, JSONResponse]]:
|
|
335
|
+
"""Create tools list endpoint with explicit handler binding.
|
|
336
|
+
|
|
337
|
+
Returns a coroutine function that closes over `self` explicitly,
|
|
338
|
+
avoiding fragile closure patterns with intermediate variables.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
Async function suitable for Starlette Route.
|
|
342
|
+
"""
|
|
343
|
+
# Capture reference explicitly in closure scope
|
|
344
|
+
handler = self
|
|
345
|
+
|
|
346
|
+
async def get_tools_data() -> dict[str, object]:
|
|
347
|
+
"""Return list of available MCP tools."""
|
|
348
|
+
if handler._lifecycle and handler._lifecycle.registry:
|
|
349
|
+
tools = await handler._lifecycle.registry.list_tools()
|
|
350
|
+
return {
|
|
351
|
+
"tools": [
|
|
352
|
+
{
|
|
353
|
+
"name": t.name,
|
|
354
|
+
"description": t.description,
|
|
355
|
+
"endpoint": t.endpoint,
|
|
356
|
+
}
|
|
357
|
+
for t in tools
|
|
358
|
+
]
|
|
359
|
+
}
|
|
360
|
+
return {"tools": []}
|
|
361
|
+
|
|
362
|
+
return self._create_json_endpoint(get_tools_data)
|
|
363
|
+
|
|
364
|
+
async def _wait_for_server_ready(
|
|
365
|
+
self,
|
|
366
|
+
host: str,
|
|
367
|
+
port: int,
|
|
368
|
+
timeout: float = 2.0,
|
|
369
|
+
poll_interval: float = 0.05,
|
|
370
|
+
) -> None:
|
|
371
|
+
"""Wait for server to be ready by polling TCP connect.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
host: Server host
|
|
375
|
+
port: Server port
|
|
376
|
+
timeout: Maximum time to wait
|
|
377
|
+
poll_interval: Time between connection attempts
|
|
378
|
+
|
|
379
|
+
Raises:
|
|
380
|
+
ProtocolConfigurationError: If server doesn't start within timeout
|
|
381
|
+
|
|
382
|
+
Note:
|
|
383
|
+
Circuit Breaker Failures Are NOT Recorded Here
|
|
384
|
+
|
|
385
|
+
This method is for startup verification, not runtime health checking.
|
|
386
|
+
TCP connect failures during startup are expected and transient - the
|
|
387
|
+
server is still spinning up and will become available shortly.
|
|
388
|
+
|
|
389
|
+
Circuit breaker tracking is intentionally omitted because:
|
|
390
|
+
|
|
391
|
+
1. Startup retries are bounded and transient - the method either succeeds
|
|
392
|
+
within the timeout or raises ProtocolConfigurationError, ending startup.
|
|
393
|
+
|
|
394
|
+
2. Recording startup failures would pollute circuit breaker metrics with
|
|
395
|
+
expected transient failures, potentially triggering an open circuit
|
|
396
|
+
before the server even starts.
|
|
397
|
+
|
|
398
|
+
3. Circuit breakers are designed for runtime fault tolerance - detecting
|
|
399
|
+
when a previously-healthy service becomes unhealthy. Startup behavior
|
|
400
|
+
is fundamentally different: we expect failures until success.
|
|
401
|
+
|
|
402
|
+
4. If the server fails to start within timeout, we fail fast with
|
|
403
|
+
ProtocolConfigurationError rather than entering a degraded state.
|
|
404
|
+
|
|
405
|
+
Circuit breaker tracking should occur during runtime operations (e.g.,
|
|
406
|
+
tool execution, health checks) where failures indicate actual service
|
|
407
|
+
degradation rather than expected startup latency.
|
|
408
|
+
"""
|
|
409
|
+
import socket
|
|
410
|
+
|
|
411
|
+
start_time = time.perf_counter()
|
|
412
|
+
last_error: Exception | None = None
|
|
413
|
+
|
|
414
|
+
while time.perf_counter() - start_time < timeout:
|
|
415
|
+
# Check if server task has failed
|
|
416
|
+
if self._server_task is not None and self._server_task.done():
|
|
417
|
+
exc = self._server_task.exception()
|
|
418
|
+
if exc:
|
|
419
|
+
ctx = ModelInfraErrorContext.with_correlation(
|
|
420
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
421
|
+
operation="server_startup",
|
|
422
|
+
target_name="mcp_handler",
|
|
423
|
+
)
|
|
424
|
+
raise ProtocolConfigurationError(
|
|
425
|
+
f"Server failed to start: {exc}",
|
|
426
|
+
context=ctx,
|
|
427
|
+
) from exc
|
|
428
|
+
|
|
429
|
+
# Try TCP connect
|
|
430
|
+
try:
|
|
431
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
432
|
+
sock.settimeout(poll_interval)
|
|
433
|
+
# TCP PROTOCOL REQUIREMENT (NOT a config fallback):
|
|
434
|
+
# When a server binds to 0.0.0.0 (INADDR_ANY), it listens on all
|
|
435
|
+
# interfaces but you cannot connect() to 0.0.0.0 - it's not a
|
|
436
|
+
# routable address. TCP requires connecting to a specific interface.
|
|
437
|
+
# Using 127.0.0.1 (loopback) is the correct way to reach a local
|
|
438
|
+
# server that bound to 0.0.0.0. This is standard TCP/IP behavior,
|
|
439
|
+
# not an environment configuration fallback.
|
|
440
|
+
effective_host = "127.0.0.1" if host == "0.0.0.0" else host # noqa: S104
|
|
441
|
+
result = sock.connect_ex((effective_host, port))
|
|
442
|
+
sock.close()
|
|
443
|
+
if result == 0:
|
|
444
|
+
return # Server is ready
|
|
445
|
+
except Exception as e:
|
|
446
|
+
last_error = e
|
|
447
|
+
|
|
448
|
+
await asyncio.sleep(poll_interval)
|
|
449
|
+
|
|
450
|
+
# Timeout reached
|
|
451
|
+
ctx = ModelInfraErrorContext.with_correlation(
|
|
452
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
453
|
+
operation="server_startup",
|
|
454
|
+
target_name="mcp_handler",
|
|
455
|
+
)
|
|
456
|
+
raise ProtocolConfigurationError(
|
|
457
|
+
f"Server failed to start within {timeout}s. Last error: {last_error}",
|
|
458
|
+
context=ctx,
|
|
459
|
+
)
|
|
460
|
+
|
|
161
461
|
async def initialize(self, config: dict[str, object]) -> None:
|
|
162
|
-
"""Initialize MCP handler with configuration.
|
|
462
|
+
"""Initialize MCP handler with configuration and optionally start uvicorn server.
|
|
463
|
+
|
|
464
|
+
This method performs the following steps:
|
|
465
|
+
1. Parse and validate handler configuration
|
|
466
|
+
2. Initialize MCPServerLifecycle for tool discovery (unless skip_server=True)
|
|
467
|
+
3. Create Starlette app with /health and /mcp/tools endpoints
|
|
468
|
+
4. Start uvicorn server in a background task (unless skip_server=True)
|
|
163
469
|
|
|
164
470
|
Args:
|
|
165
471
|
config: Configuration dict containing:
|
|
@@ -170,9 +476,19 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
170
476
|
- json_response: Return JSON responses (default: True)
|
|
171
477
|
- timeout_seconds: Tool execution timeout (default: 30.0)
|
|
172
478
|
- max_tools: Maximum tools to expose (default: 100)
|
|
479
|
+
- consul_host: Consul server hostname (REQUIRED - no default)
|
|
480
|
+
- consul_port: Consul server port (REQUIRED - no default)
|
|
481
|
+
- kafka_enabled: Whether to enable Kafka hot reload (REQUIRED - no default)
|
|
482
|
+
- dev_mode: Whether to run in development mode (REQUIRED - no default)
|
|
483
|
+
- contracts_dir: Directory for contract scanning in dev mode (optional)
|
|
484
|
+
- skip_server: Skip starting uvicorn server (default: False).
|
|
485
|
+
Use for unit testing to avoid port binding.
|
|
173
486
|
|
|
174
487
|
Raises:
|
|
175
|
-
ProtocolConfigurationError: If configuration is invalid
|
|
488
|
+
ProtocolConfigurationError: If configuration is invalid or required
|
|
489
|
+
config values (consul_host, consul_port, kafka_enabled, dev_mode)
|
|
490
|
+
are missing. Per CLAUDE.md, .env is the single source of truth -
|
|
491
|
+
no hardcoded fallbacks are used.
|
|
176
492
|
"""
|
|
177
493
|
init_correlation_id = uuid4()
|
|
178
494
|
|
|
@@ -203,22 +519,166 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
203
519
|
transport_type=EnumInfraTransportType.MCP,
|
|
204
520
|
)
|
|
205
521
|
|
|
206
|
-
#
|
|
207
|
-
|
|
522
|
+
# Check if server startup should be skipped (for unit testing)
|
|
523
|
+
skip_server_val = config.get("skip_server")
|
|
524
|
+
skip_server: bool = (
|
|
525
|
+
skip_server_val if isinstance(skip_server_val, bool) else False
|
|
526
|
+
)
|
|
527
|
+
self._skip_server = skip_server
|
|
528
|
+
|
|
529
|
+
if not skip_server:
|
|
530
|
+
# Build MCPServerConfig from handler config (OMN-1282)
|
|
531
|
+
# Map handler config fields to lifecycle config fields
|
|
532
|
+
#
|
|
533
|
+
# Per CLAUDE.md: .env is the SINGLE SOURCE OF TRUTH.
|
|
534
|
+
# No hardcoded fallbacks - all required config must be explicit.
|
|
535
|
+
# The _require_config_value helper validates type, cast() is for mypy.
|
|
536
|
+
consul_host = _require_config_value(
|
|
537
|
+
config, "consul_host", str, init_correlation_id
|
|
538
|
+
)
|
|
539
|
+
consul_port = _require_config_value(
|
|
540
|
+
config, "consul_port", int, init_correlation_id
|
|
541
|
+
)
|
|
542
|
+
kafka_enabled = _require_config_value(
|
|
543
|
+
config, "kafka_enabled", bool, init_correlation_id
|
|
544
|
+
)
|
|
545
|
+
dev_mode = _require_config_value(
|
|
546
|
+
config, "dev_mode", bool, init_correlation_id
|
|
547
|
+
)
|
|
548
|
+
# contracts_dir is optional - only used when dev_mode=True
|
|
549
|
+
contracts_dir_val = config.get("contracts_dir")
|
|
550
|
+
contracts_dir: str | None = (
|
|
551
|
+
contracts_dir_val if isinstance(contracts_dir_val, str) else None
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
server_config = ModelMCPServerConfig(
|
|
555
|
+
consul_host=consul_host,
|
|
556
|
+
consul_port=consul_port,
|
|
557
|
+
kafka_enabled=kafka_enabled,
|
|
558
|
+
http_host=self._config.host,
|
|
559
|
+
http_port=self._config.port,
|
|
560
|
+
default_timeout=self._config.timeout_seconds,
|
|
561
|
+
dev_mode=dev_mode,
|
|
562
|
+
contracts_dir=contracts_dir,
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Wrap entire server startup in try/except to ensure cleanup
|
|
566
|
+
# if ANY step fails after lifecycle starts. This prevents:
|
|
567
|
+
# - Orphan lifecycle resources (registry, executor, sync)
|
|
568
|
+
# - Orphan server tasks
|
|
569
|
+
# - Resource leaks from partial initialization
|
|
570
|
+
try:
|
|
571
|
+
# Create and start MCPServerLifecycle for tool discovery
|
|
572
|
+
self._lifecycle = MCPServerLifecycle(config=server_config, bus=None)
|
|
573
|
+
await self._lifecycle.start()
|
|
574
|
+
|
|
575
|
+
# Update MCP registry and executor references from lifecycle
|
|
576
|
+
if self._lifecycle.registry is not None:
|
|
577
|
+
self._mcp_registry = self._lifecycle.registry
|
|
578
|
+
if self._lifecycle.executor is not None:
|
|
579
|
+
self._mcp_executor = self._lifecycle.executor
|
|
580
|
+
|
|
581
|
+
# Create Starlette app with HTTP endpoints (OMN-1282)
|
|
582
|
+
# Use factory methods for explicit handler reference binding
|
|
583
|
+
health_endpoint = self._create_health_endpoint()
|
|
584
|
+
tools_list_endpoint = self._create_tools_list_endpoint()
|
|
585
|
+
|
|
586
|
+
app = Starlette(
|
|
587
|
+
routes=[
|
|
588
|
+
Route("/health", health_endpoint, methods=["GET"]),
|
|
589
|
+
Route("/mcp/tools", tools_list_endpoint, methods=["GET"]),
|
|
590
|
+
],
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
# Create uvicorn server config and server
|
|
594
|
+
uvicorn_config = uvicorn.Config(
|
|
595
|
+
app=app,
|
|
596
|
+
host=self._config.host,
|
|
597
|
+
port=self._config.port,
|
|
598
|
+
log_level="info",
|
|
599
|
+
)
|
|
600
|
+
self._server = uvicorn.Server(uvicorn_config)
|
|
601
|
+
|
|
602
|
+
# Start server in background task
|
|
603
|
+
self._server_task = asyncio.create_task(self._server.serve())
|
|
604
|
+
|
|
605
|
+
# Wait for server to be ready before marking as initialized
|
|
606
|
+
await self._wait_for_server_ready(
|
|
607
|
+
self._config.host,
|
|
608
|
+
self._config.port,
|
|
609
|
+
timeout=self.startup_timeout,
|
|
610
|
+
)
|
|
611
|
+
self._server_started_at = time.time()
|
|
612
|
+
|
|
613
|
+
except Exception as startup_error:
|
|
614
|
+
# Any failure during server startup - clean up all resources
|
|
615
|
+
# This handles failures in:
|
|
616
|
+
# - lifecycle.start() (Consul/contract discovery)
|
|
617
|
+
# - Starlette app creation
|
|
618
|
+
# - uvicorn config/server creation
|
|
619
|
+
# - server task creation
|
|
620
|
+
# - server readiness check
|
|
621
|
+
logger.exception(
|
|
622
|
+
"MCP server startup failed, cleaning up resources",
|
|
623
|
+
extra={
|
|
624
|
+
"host": self._config.host,
|
|
625
|
+
"port": self._config.port,
|
|
626
|
+
"lifecycle_created": self._lifecycle is not None,
|
|
627
|
+
"server_created": self._server is not None,
|
|
628
|
+
"server_task_created": self._server_task is not None,
|
|
629
|
+
"correlation_id": str(init_correlation_id),
|
|
630
|
+
},
|
|
631
|
+
)
|
|
632
|
+
# shutdown() safely handles partially initialized state:
|
|
633
|
+
# - Checks each component before cleanup
|
|
634
|
+
# - Safe to call even if components weren't created
|
|
635
|
+
await self.shutdown()
|
|
636
|
+
ctx = ModelInfraErrorContext(
|
|
637
|
+
transport_type=EnumInfraTransportType.MCP,
|
|
638
|
+
operation="initialize",
|
|
639
|
+
target_name="mcp_handler",
|
|
640
|
+
correlation_id=init_correlation_id,
|
|
641
|
+
)
|
|
642
|
+
raise ProtocolConfigurationError(
|
|
643
|
+
f"MCP server startup failed: {startup_error}",
|
|
644
|
+
context=ctx,
|
|
645
|
+
) from startup_error
|
|
646
|
+
|
|
208
647
|
self._initialized = True
|
|
209
648
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
self.
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
"
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
649
|
+
tool_count = 0
|
|
650
|
+
if self._lifecycle and self._lifecycle.registry:
|
|
651
|
+
tool_count = self._lifecycle.registry.tool_count
|
|
652
|
+
|
|
653
|
+
if skip_server:
|
|
654
|
+
logger.info(
|
|
655
|
+
"%s initialized successfully (server skipped)",
|
|
656
|
+
self.__class__.__name__,
|
|
657
|
+
extra={
|
|
658
|
+
"handler": self.__class__.__name__,
|
|
659
|
+
"host": self._config.host,
|
|
660
|
+
"port": self._config.port,
|
|
661
|
+
"path": self._config.path,
|
|
662
|
+
"stateless": self._config.stateless,
|
|
663
|
+
"skip_server": True,
|
|
664
|
+
"correlation_id": str(init_correlation_id),
|
|
665
|
+
},
|
|
666
|
+
)
|
|
667
|
+
else:
|
|
668
|
+
logger.info(
|
|
669
|
+
"%s initialized successfully - uvicorn server running",
|
|
670
|
+
self.__class__.__name__,
|
|
671
|
+
extra={
|
|
672
|
+
"handler": self.__class__.__name__,
|
|
673
|
+
"host": self._config.host,
|
|
674
|
+
"port": self._config.port,
|
|
675
|
+
"path": self._config.path,
|
|
676
|
+
"stateless": self._config.stateless,
|
|
677
|
+
"tool_count": tool_count,
|
|
678
|
+
"url": f"http://{self._config.host}:{self._config.port}",
|
|
679
|
+
"correlation_id": str(init_correlation_id),
|
|
680
|
+
},
|
|
681
|
+
)
|
|
222
682
|
|
|
223
683
|
except ValidationError as e:
|
|
224
684
|
ctx = ModelInfraErrorContext(
|
|
@@ -242,11 +702,102 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
242
702
|
) from e
|
|
243
703
|
|
|
244
704
|
async def shutdown(self) -> None:
|
|
245
|
-
"""Shutdown MCP handler
|
|
705
|
+
"""Shutdown MCP handler with timeout protection.
|
|
706
|
+
|
|
707
|
+
This method performs graceful shutdown with timeout protection:
|
|
708
|
+
1. Signal uvicorn server to stop
|
|
709
|
+
2. Wait for server task with timeout (max 5s graceful, 1s forced)
|
|
710
|
+
3. Shutdown MCPServerLifecycle (registry, discovery, sync)
|
|
711
|
+
4. Clear tool registry and reset state
|
|
712
|
+
|
|
713
|
+
Safe to call multiple times. Never hangs indefinitely (max ~6s with defaults).
|
|
714
|
+
|
|
715
|
+
Note:
|
|
716
|
+
Timeouts are configurable via class attributes:
|
|
717
|
+
- shutdown_timeout: Graceful shutdown timeout (default: 5.0s)
|
|
718
|
+
- cancel_timeout: Forced cancellation timeout (default: 1.0s)
|
|
719
|
+
"""
|
|
720
|
+
shutdown_correlation_id = uuid4()
|
|
721
|
+
|
|
722
|
+
logger.info(
|
|
723
|
+
"Shutting down %s",
|
|
724
|
+
self.__class__.__name__,
|
|
725
|
+
extra={
|
|
726
|
+
"handler": self.__class__.__name__,
|
|
727
|
+
"correlation_id": str(shutdown_correlation_id),
|
|
728
|
+
},
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
# Stop uvicorn server with timeout protection (OMN-1282)
|
|
732
|
+
if (
|
|
733
|
+
self._server is not None
|
|
734
|
+
and self._server_task is not None
|
|
735
|
+
and not self._skip_server
|
|
736
|
+
):
|
|
737
|
+
# Signal server to stop
|
|
738
|
+
self._server.should_exit = True
|
|
739
|
+
|
|
740
|
+
try:
|
|
741
|
+
# Wait for graceful shutdown with timeout
|
|
742
|
+
logger.debug(
|
|
743
|
+
"Waiting for server task to complete",
|
|
744
|
+
extra={
|
|
745
|
+
"timeout_seconds": self.shutdown_timeout,
|
|
746
|
+
"correlation_id": str(shutdown_correlation_id),
|
|
747
|
+
},
|
|
748
|
+
)
|
|
749
|
+
await asyncio.wait_for(self._server_task, timeout=self.shutdown_timeout)
|
|
750
|
+
except TimeoutError:
|
|
751
|
+
logger.warning(
|
|
752
|
+
"Server shutdown timed out, forcing cancellation",
|
|
753
|
+
extra={
|
|
754
|
+
"timeout_seconds": self.shutdown_timeout,
|
|
755
|
+
"correlation_id": str(shutdown_correlation_id),
|
|
756
|
+
},
|
|
757
|
+
)
|
|
758
|
+
self._server_task.cancel()
|
|
759
|
+
try:
|
|
760
|
+
await asyncio.wait_for(
|
|
761
|
+
self._server_task, timeout=self.cancel_timeout
|
|
762
|
+
)
|
|
763
|
+
except (TimeoutError, asyncio.CancelledError):
|
|
764
|
+
pass # Best effort
|
|
765
|
+
except asyncio.CancelledError:
|
|
766
|
+
logger.debug(
|
|
767
|
+
"Server task was cancelled",
|
|
768
|
+
extra={"correlation_id": str(shutdown_correlation_id)},
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
# Shutdown lifecycle (registry, discovery, sync)
|
|
772
|
+
if self._lifecycle is not None:
|
|
773
|
+
logger.debug(
|
|
774
|
+
"Shutting down MCPServerLifecycle",
|
|
775
|
+
extra={"correlation_id": str(shutdown_correlation_id)},
|
|
776
|
+
)
|
|
777
|
+
await self._lifecycle.shutdown()
|
|
778
|
+
self._lifecycle = None
|
|
779
|
+
|
|
780
|
+
# Clear registry and executor references
|
|
781
|
+
self._mcp_registry = None
|
|
782
|
+
self._mcp_executor = None
|
|
783
|
+
|
|
784
|
+
# Clear all state
|
|
246
785
|
self._tool_registry.clear()
|
|
247
786
|
self._config = None
|
|
248
787
|
self._initialized = False
|
|
249
|
-
|
|
788
|
+
self._server = None
|
|
789
|
+
self._server_task = None
|
|
790
|
+
self._skip_server = False
|
|
791
|
+
self._server_started_at = None
|
|
792
|
+
|
|
793
|
+
logger.info(
|
|
794
|
+
"%s shutdown complete",
|
|
795
|
+
self.__class__.__name__,
|
|
796
|
+
extra={
|
|
797
|
+
"handler": self.__class__.__name__,
|
|
798
|
+
"correlation_id": str(shutdown_correlation_id),
|
|
799
|
+
},
|
|
800
|
+
)
|
|
250
801
|
|
|
251
802
|
async def execute(
|
|
252
803
|
self, envelope: dict[str, object]
|
|
@@ -566,23 +1117,23 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
566
1117
|
) -> dict[str, object]:
|
|
567
1118
|
"""Execute a registered tool.
|
|
568
1119
|
|
|
569
|
-
This method delegates to the ONEX
|
|
570
|
-
|
|
1120
|
+
This method delegates to the ONEX orchestrator that provides this tool.
|
|
1121
|
+
When operating in integrated mode (with registry and executor), the tool
|
|
1122
|
+
is looked up from the MCP registry and executed via the execution adapter.
|
|
571
1123
|
|
|
572
1124
|
Circuit breaker protection is applied to prevent cascading failures
|
|
573
1125
|
when tool execution repeatedly fails.
|
|
574
1126
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
1127
|
+
Integration Mode (OMN-1281):
|
|
1128
|
+
When _mcp_registry and _mcp_executor are configured:
|
|
1129
|
+
1. Look up the tool definition from the MCP registry
|
|
1130
|
+
2. Delegate execution to AdapterONEXToolExecution
|
|
1131
|
+
3. The adapter dispatches to the orchestrator endpoint
|
|
1132
|
+
4. Timeout is enforced by the adapter using the tool's timeout_seconds
|
|
579
1133
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
- HTTP client timeouts on the caller side
|
|
584
|
-
|
|
585
|
-
See: TODO(OMN-1288) for dispatcher timeout integration
|
|
1134
|
+
Legacy Mode:
|
|
1135
|
+
When registry/executor are not configured, returns placeholder response
|
|
1136
|
+
for backward compatibility.
|
|
586
1137
|
|
|
587
1138
|
Args:
|
|
588
1139
|
tool_name: Name of the tool to execute.
|
|
@@ -593,26 +1144,54 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
593
1144
|
Tool execution result.
|
|
594
1145
|
|
|
595
1146
|
Raises:
|
|
596
|
-
InfraUnavailableError: If tool
|
|
1147
|
+
InfraUnavailableError: If tool not found or execution fails.
|
|
597
1148
|
"""
|
|
598
1149
|
# Check circuit breaker before tool execution
|
|
599
1150
|
async with self._circuit_breaker_lock:
|
|
600
1151
|
await self._check_circuit_breaker("execute_tool", correlation_id)
|
|
601
1152
|
|
|
602
1153
|
try:
|
|
603
|
-
#
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
1154
|
+
# Integrated mode: use MCP registry and executor (OMN-1281)
|
|
1155
|
+
if self._mcp_registry is not None and self._mcp_executor is not None:
|
|
1156
|
+
# Look up tool from registry
|
|
1157
|
+
tool = await self._mcp_registry.get_tool(tool_name)
|
|
1158
|
+
if tool is None:
|
|
1159
|
+
ctx = ModelInfraErrorContext.with_correlation(
|
|
1160
|
+
correlation_id=correlation_id,
|
|
1161
|
+
transport_type=self.transport_type,
|
|
1162
|
+
operation="execute_tool",
|
|
1163
|
+
target_name=tool_name,
|
|
1164
|
+
)
|
|
1165
|
+
raise InfraUnavailableError(
|
|
1166
|
+
f"Tool not found: {tool_name}",
|
|
1167
|
+
context=ctx,
|
|
1168
|
+
)
|
|
1169
|
+
|
|
1170
|
+
logger.info(
|
|
1171
|
+
"Executing MCP tool via adapter",
|
|
1172
|
+
extra={
|
|
1173
|
+
"tool_name": tool_name,
|
|
1174
|
+
"argument_count": len(arguments),
|
|
1175
|
+
"correlation_id": str(correlation_id),
|
|
1176
|
+
},
|
|
1177
|
+
)
|
|
1178
|
+
|
|
1179
|
+
# Execute via adapter
|
|
1180
|
+
result = await self._mcp_executor.execute(
|
|
1181
|
+
tool=tool,
|
|
1182
|
+
arguments=arguments,
|
|
1183
|
+
correlation_id=correlation_id,
|
|
1184
|
+
)
|
|
1185
|
+
|
|
1186
|
+
# Reset circuit breaker on success
|
|
1187
|
+
async with self._circuit_breaker_lock:
|
|
1188
|
+
await self._reset_circuit_breaker()
|
|
1189
|
+
|
|
1190
|
+
return result
|
|
1191
|
+
|
|
1192
|
+
# Legacy mode: placeholder response for backward compatibility
|
|
614
1193
|
logger.info(
|
|
615
|
-
"Tool execution requested",
|
|
1194
|
+
"Tool execution requested (placeholder mode)",
|
|
616
1195
|
extra={
|
|
617
1196
|
"tool_name": tool_name,
|
|
618
1197
|
"argument_count": len(arguments),
|
|
@@ -620,7 +1199,7 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
620
1199
|
},
|
|
621
1200
|
)
|
|
622
1201
|
|
|
623
|
-
|
|
1202
|
+
placeholder_result: dict[str, object] = {
|
|
624
1203
|
"message": f"Tool '{tool_name}' executed successfully",
|
|
625
1204
|
"arguments_received": list(arguments.keys()),
|
|
626
1205
|
}
|
|
@@ -629,7 +1208,13 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
629
1208
|
async with self._circuit_breaker_lock:
|
|
630
1209
|
await self._reset_circuit_breaker()
|
|
631
1210
|
|
|
632
|
-
return
|
|
1211
|
+
return placeholder_result
|
|
1212
|
+
|
|
1213
|
+
except InfraUnavailableError:
|
|
1214
|
+
# Record failure in circuit breaker and re-raise
|
|
1215
|
+
async with self._circuit_breaker_lock:
|
|
1216
|
+
await self._record_circuit_failure("execute_tool", correlation_id)
|
|
1217
|
+
raise
|
|
633
1218
|
|
|
634
1219
|
except Exception:
|
|
635
1220
|
# Record failure in circuit breaker
|
|
@@ -706,7 +1291,7 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
706
1291
|
|
|
707
1292
|
Returns:
|
|
708
1293
|
dict containing handler type, category, transport type,
|
|
709
|
-
supported operations, configuration, and
|
|
1294
|
+
supported operations, configuration, tool count, and server state.
|
|
710
1295
|
"""
|
|
711
1296
|
config_dict: dict[str, object] = {}
|
|
712
1297
|
if self._config:
|
|
@@ -720,28 +1305,116 @@ class HandlerMCP(MixinEnvelopeExtraction, MixinAsyncCircuitBreaker):
|
|
|
720
1305
|
"max_tools": self._config.max_tools,
|
|
721
1306
|
}
|
|
722
1307
|
|
|
1308
|
+
# Include lifecycle tool count if available (OMN-1282)
|
|
1309
|
+
tool_count = len(self._tool_registry)
|
|
1310
|
+
if self._lifecycle and self._lifecycle.registry:
|
|
1311
|
+
tool_count = self._lifecycle.registry.tool_count
|
|
1312
|
+
|
|
723
1313
|
return {
|
|
724
1314
|
"handler_type": self.handler_type.value,
|
|
725
1315
|
"handler_category": self.handler_category.value,
|
|
726
1316
|
"transport_type": self.transport_type.value,
|
|
727
1317
|
"supported_operations": sorted(_SUPPORTED_OPERATIONS),
|
|
728
|
-
"tool_count":
|
|
1318
|
+
"tool_count": tool_count,
|
|
729
1319
|
"config": config_dict,
|
|
730
1320
|
"initialized": self._initialized,
|
|
1321
|
+
"server_running": self._server is not None
|
|
1322
|
+
and self._server_task is not None,
|
|
1323
|
+
"lifecycle_running": self._lifecycle is not None
|
|
1324
|
+
and self._lifecycle.is_running,
|
|
731
1325
|
"version": "0.1.0-mvp",
|
|
732
1326
|
}
|
|
733
1327
|
|
|
734
1328
|
async def health_check(self) -> dict[str, object]:
|
|
735
|
-
"""Check handler health and
|
|
1329
|
+
"""Check handler health and server status.
|
|
736
1330
|
|
|
737
|
-
Returns:
|
|
738
|
-
|
|
1331
|
+
Returns unhealthy if:
|
|
1332
|
+
- Not initialized
|
|
1333
|
+
- Server task has crashed/completed unexpectedly
|
|
1334
|
+
- Server task was cancelled
|
|
1335
|
+
|
|
1336
|
+
Note:
|
|
1337
|
+
When skip_server=True was used during initialization, the handler is
|
|
1338
|
+
considered healthy if initialized, even without a running server.
|
|
1339
|
+
This enables unit testing without actual port binding.
|
|
739
1340
|
"""
|
|
1341
|
+
if not self._initialized:
|
|
1342
|
+
return {
|
|
1343
|
+
"healthy": False,
|
|
1344
|
+
"reason": "not_initialized",
|
|
1345
|
+
"transport_type": self.transport_type.value,
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
if self._skip_server:
|
|
1349
|
+
return {
|
|
1350
|
+
"healthy": True,
|
|
1351
|
+
"skip_server": True,
|
|
1352
|
+
"transport_type": self.transport_type.value,
|
|
1353
|
+
"initialized": True,
|
|
1354
|
+
}
|
|
1355
|
+
|
|
1356
|
+
# Capture server task reference once to avoid TOCTOU race conditions.
|
|
1357
|
+
# If _server_task is reassigned (e.g., by concurrent shutdown()),
|
|
1358
|
+
# we work with the captured reference consistently.
|
|
1359
|
+
server_task = self._server_task
|
|
1360
|
+
|
|
1361
|
+
# Check server task state
|
|
1362
|
+
if server_task is None:
|
|
1363
|
+
return {
|
|
1364
|
+
"healthy": False,
|
|
1365
|
+
"reason": "server_task_missing",
|
|
1366
|
+
"transport_type": self.transport_type.value,
|
|
1367
|
+
"initialized": True,
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
if server_task.done():
|
|
1371
|
+
# Task completed - check why
|
|
1372
|
+
if server_task.cancelled():
|
|
1373
|
+
return {
|
|
1374
|
+
"healthy": False,
|
|
1375
|
+
"reason": "server_cancelled",
|
|
1376
|
+
"transport_type": self.transport_type.value,
|
|
1377
|
+
"initialized": True,
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
exc = server_task.exception()
|
|
1381
|
+
if exc is not None:
|
|
1382
|
+
return {
|
|
1383
|
+
"healthy": False,
|
|
1384
|
+
"reason": "server_crashed",
|
|
1385
|
+
"error": str(exc)[:_ERROR_MESSAGE_MAX_LENGTH],
|
|
1386
|
+
"transport_type": self.transport_type.value,
|
|
1387
|
+
"initialized": True,
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
# Exited cleanly but unexpectedly
|
|
1391
|
+
return {
|
|
1392
|
+
"healthy": False,
|
|
1393
|
+
"reason": "server_exited",
|
|
1394
|
+
"transport_type": self.transport_type.value,
|
|
1395
|
+
"initialized": True,
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
# Task is still running - healthy
|
|
1399
|
+
# Include lifecycle tool count if available (OMN-1282)
|
|
1400
|
+
tool_count = len(self._tool_registry)
|
|
1401
|
+
if self._lifecycle and self._lifecycle.registry:
|
|
1402
|
+
tool_count = self._lifecycle.registry.tool_count
|
|
1403
|
+
|
|
1404
|
+
lifecycle_running = self._lifecycle is not None and self._lifecycle.is_running
|
|
1405
|
+
|
|
740
1406
|
return {
|
|
741
|
-
"healthy":
|
|
742
|
-
"initialized":
|
|
743
|
-
"
|
|
1407
|
+
"healthy": True,
|
|
1408
|
+
"initialized": True,
|
|
1409
|
+
"server_running": True,
|
|
1410
|
+
"tool_count": tool_count,
|
|
744
1411
|
"transport_type": self.transport_type.value,
|
|
1412
|
+
"lifecycle_running": lifecycle_running,
|
|
1413
|
+
"uptime_seconds": (
|
|
1414
|
+
time.time() - self._server_started_at
|
|
1415
|
+
if self._server_started_at is not None
|
|
1416
|
+
else None
|
|
1417
|
+
),
|
|
745
1418
|
}
|
|
746
1419
|
|
|
747
1420
|
|