mcp-mesh 0.5.7__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _mcp_mesh/__init__.py +1 -1
- _mcp_mesh/engine/base_injector.py +171 -0
- _mcp_mesh/engine/decorator_registry.py +162 -35
- _mcp_mesh/engine/dependency_injector.py +105 -19
- _mcp_mesh/engine/http_wrapper.py +5 -22
- _mcp_mesh/engine/llm_config.py +45 -0
- _mcp_mesh/engine/llm_errors.py +115 -0
- _mcp_mesh/engine/mesh_llm_agent.py +626 -0
- _mcp_mesh/engine/mesh_llm_agent_injector.py +617 -0
- _mcp_mesh/engine/provider_handlers/__init__.py +20 -0
- _mcp_mesh/engine/provider_handlers/base_provider_handler.py +122 -0
- _mcp_mesh/engine/provider_handlers/claude_handler.py +138 -0
- _mcp_mesh/engine/provider_handlers/generic_handler.py +156 -0
- _mcp_mesh/engine/provider_handlers/openai_handler.py +163 -0
- _mcp_mesh/engine/provider_handlers/provider_handler_registry.py +167 -0
- _mcp_mesh/engine/response_parser.py +205 -0
- _mcp_mesh/engine/signature_analyzer.py +229 -99
- _mcp_mesh/engine/tool_executor.py +169 -0
- _mcp_mesh/engine/tool_schema_builder.py +126 -0
- _mcp_mesh/engine/unified_mcp_proxy.py +14 -12
- _mcp_mesh/generated/.openapi-generator/FILES +7 -0
- _mcp_mesh/generated/.openapi-generator-ignore +0 -1
- _mcp_mesh/generated/mcp_mesh_registry_client/__init__.py +7 -16
- _mcp_mesh/generated/mcp_mesh_registry_client/models/__init__.py +7 -0
- _mcp_mesh/generated/mcp_mesh_registry_client/models/agent_info.py +11 -1
- _mcp_mesh/generated/mcp_mesh_registry_client/models/dependency_resolution_info.py +108 -0
- _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_provider.py +95 -0
- _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_tool_filter.py +111 -0
- _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_tool_filter_filter_inner.py +141 -0
- _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_tool_filter_filter_inner_one_of.py +93 -0
- _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_tool_info.py +103 -0
- _mcp_mesh/generated/mcp_mesh_registry_client/models/mesh_agent_registration.py +1 -1
- _mcp_mesh/generated/mcp_mesh_registry_client/models/mesh_registration_response.py +35 -1
- _mcp_mesh/generated/mcp_mesh_registry_client/models/mesh_tool_registration.py +11 -1
- _mcp_mesh/generated/mcp_mesh_registry_client/models/resolved_llm_provider.py +112 -0
- _mcp_mesh/pipeline/api_heartbeat/api_dependency_resolution.py +9 -72
- _mcp_mesh/pipeline/mcp_heartbeat/fast_heartbeat_check.py +3 -3
- _mcp_mesh/pipeline/mcp_heartbeat/heartbeat_orchestrator.py +35 -10
- _mcp_mesh/pipeline/mcp_heartbeat/heartbeat_pipeline.py +7 -4
- _mcp_mesh/pipeline/mcp_heartbeat/llm_tools_resolution.py +260 -0
- _mcp_mesh/pipeline/mcp_startup/fastapiserver_setup.py +118 -35
- _mcp_mesh/pipeline/mcp_startup/fastmcpserver_discovery.py +8 -1
- _mcp_mesh/pipeline/mcp_startup/heartbeat_preparation.py +111 -5
- _mcp_mesh/pipeline/mcp_startup/server_discovery.py +77 -48
- _mcp_mesh/pipeline/mcp_startup/startup_orchestrator.py +2 -2
- _mcp_mesh/pipeline/mcp_startup/startup_pipeline.py +2 -2
- _mcp_mesh/shared/health_check_cache.py +246 -0
- _mcp_mesh/shared/registry_client_wrapper.py +87 -4
- _mcp_mesh/utils/fastmcp_schema_extractor.py +476 -0
- {mcp_mesh-0.5.7.dist-info → mcp_mesh-0.6.1.dist-info}/METADATA +1 -1
- {mcp_mesh-0.5.7.dist-info → mcp_mesh-0.6.1.dist-info}/RECORD +57 -32
- mesh/__init__.py +18 -4
- mesh/decorators.py +439 -31
- mesh/helpers.py +259 -0
- mesh/types.py +197 -97
- {mcp_mesh-0.5.7.dist-info → mcp_mesh-0.6.1.dist-info}/WHEEL +0 -0
- {mcp_mesh-0.5.7.dist-info → mcp_mesh-0.6.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -46,7 +46,7 @@ class FastMCPServerDiscoveryStep(PipelineStep):
|
|
|
46
46
|
server_info.append(info)
|
|
47
47
|
total_registered_functions += info.get("function_count", 0)
|
|
48
48
|
|
|
49
|
-
self.logger.
|
|
49
|
+
self.logger.debug(
|
|
50
50
|
f"📡 Discovered FastMCP server '{server_name}': "
|
|
51
51
|
f"{info.get('function_count', 0)} functions"
|
|
52
52
|
)
|
|
@@ -57,6 +57,13 @@ class FastMCPServerDiscoveryStep(PipelineStep):
|
|
|
57
57
|
result.add_context("fastmcp_server_count", len(discovered_servers))
|
|
58
58
|
result.add_context("fastmcp_total_functions", total_registered_functions)
|
|
59
59
|
|
|
60
|
+
# Store server info in DecoratorRegistry for heartbeat schema extraction (Phase 2)
|
|
61
|
+
from ...engine.decorator_registry import DecoratorRegistry
|
|
62
|
+
|
|
63
|
+
# Convert server_info list to dict for easier lookup
|
|
64
|
+
server_info_dict = {info["server_name"]: info for info in server_info}
|
|
65
|
+
DecoratorRegistry.store_fastmcp_server_info(server_info_dict)
|
|
66
|
+
|
|
60
67
|
result.message = (
|
|
61
68
|
f"Discovered {len(discovered_servers)} FastMCP servers "
|
|
62
69
|
f"with {total_registered_functions} total functions"
|
|
@@ -9,6 +9,7 @@ from ...engine.decorator_registry import DecoratorRegistry
|
|
|
9
9
|
from ...engine.signature_analyzer import validate_mesh_dependencies
|
|
10
10
|
from ...shared.config_resolver import ValidationRule, get_config_value
|
|
11
11
|
from ...shared.support_types import HealthStatus, HealthStatusType
|
|
12
|
+
from ...utils.fastmcp_schema_extractor import FastMCPSchemaExtractor
|
|
12
13
|
from ..shared import PipelineResult, PipelineStatus, PipelineStep
|
|
13
14
|
|
|
14
15
|
|
|
@@ -39,8 +40,17 @@ class HeartbeatPreparationStep(PipelineStep):
|
|
|
39
40
|
agent_config = DecoratorRegistry.get_resolved_agent_config()
|
|
40
41
|
agent_id = agent_config["agent_id"]
|
|
41
42
|
|
|
42
|
-
#
|
|
43
|
-
|
|
43
|
+
# Get FastMCP server info from context (set by fastmcp-server-discovery step)
|
|
44
|
+
fastmcp_server_info = context.get("fastmcp_server_info", [])
|
|
45
|
+
|
|
46
|
+
# Convert server_info list to dict for schema extractor
|
|
47
|
+
fastmcp_servers = {}
|
|
48
|
+
for server_info in fastmcp_server_info:
|
|
49
|
+
server_name = server_info.get("server_name", "unknown")
|
|
50
|
+
fastmcp_servers[server_name] = server_info
|
|
51
|
+
|
|
52
|
+
# Build tools list for registration (with FastMCP schemas)
|
|
53
|
+
tools_list = self._build_tools_list(mesh_tools, fastmcp_servers)
|
|
44
54
|
|
|
45
55
|
# Build agent registration payload
|
|
46
56
|
registration_data = self._build_registration_payload(
|
|
@@ -71,8 +81,10 @@ class HeartbeatPreparationStep(PipelineStep):
|
|
|
71
81
|
|
|
72
82
|
return result
|
|
73
83
|
|
|
74
|
-
def _build_tools_list(
|
|
75
|
-
|
|
84
|
+
def _build_tools_list(
|
|
85
|
+
self, mesh_tools: dict[str, Any], fastmcp_servers: dict[str, Any] = None
|
|
86
|
+
) -> list[dict[str, Any]]:
|
|
87
|
+
"""Build tools list from mesh_tools, validating function signatures and extracting schemas."""
|
|
76
88
|
tools_list = []
|
|
77
89
|
skipped_tools = []
|
|
78
90
|
|
|
@@ -93,14 +105,108 @@ class HeartbeatPreparationStep(PipelineStep):
|
|
|
93
105
|
skipped_tools.append(func_name)
|
|
94
106
|
continue
|
|
95
107
|
|
|
108
|
+
# Extract inputSchema from FastMCP tool (if available)
|
|
109
|
+
# First try matching with FastMCP servers, then fallback to direct attribute
|
|
110
|
+
input_schema = FastMCPSchemaExtractor.extract_from_fastmcp_servers(
|
|
111
|
+
current_function, fastmcp_servers
|
|
112
|
+
)
|
|
113
|
+
if input_schema is None:
|
|
114
|
+
input_schema = FastMCPSchemaExtractor.extract_input_schema(
|
|
115
|
+
current_function
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Check if this function has @mesh.llm decorator (Phase 3)
|
|
119
|
+
llm_filter_data = None
|
|
120
|
+
llm_provider_data = None
|
|
121
|
+
llm_agents = DecoratorRegistry.get_mesh_llm_agents()
|
|
122
|
+
self.logger.debug(
|
|
123
|
+
f"🤖 Checking for LLM filter: function={func_name}, total_llm_agents_registered={len(llm_agents)}"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
for llm_agent_id, llm_metadata in llm_agents.items():
|
|
127
|
+
if llm_metadata.function.__name__ == func_name:
|
|
128
|
+
# Found matching LLM agent - extract filter config
|
|
129
|
+
raw_filter = llm_metadata.config.get("filter")
|
|
130
|
+
filter_mode = llm_metadata.config.get("filter_mode", "all")
|
|
131
|
+
|
|
132
|
+
# Normalize filter to array format (OpenAPI schema requirement)
|
|
133
|
+
if raw_filter is None:
|
|
134
|
+
normalized_filter = []
|
|
135
|
+
elif isinstance(raw_filter, str):
|
|
136
|
+
normalized_filter = [raw_filter]
|
|
137
|
+
elif isinstance(raw_filter, dict):
|
|
138
|
+
normalized_filter = [raw_filter]
|
|
139
|
+
elif isinstance(raw_filter, list):
|
|
140
|
+
normalized_filter = raw_filter
|
|
141
|
+
else:
|
|
142
|
+
self.logger.warning(
|
|
143
|
+
f"⚠️ Invalid filter type for {func_name}: {type(raw_filter)}"
|
|
144
|
+
)
|
|
145
|
+
normalized_filter = []
|
|
146
|
+
|
|
147
|
+
llm_filter_data = {
|
|
148
|
+
"filter": normalized_filter,
|
|
149
|
+
"filter_mode": filter_mode,
|
|
150
|
+
}
|
|
151
|
+
self.logger.debug(
|
|
152
|
+
f"🤖 LLM filter found for {func_name}: {len(normalized_filter)} filters, mode={filter_mode}, raw_filter={raw_filter}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Check if provider is a dict (mesh delegation mode - v0.6.1)
|
|
156
|
+
# If so, add it as llm_provider field (NOT in dependencies array)
|
|
157
|
+
provider = llm_metadata.config.get("provider")
|
|
158
|
+
if isinstance(provider, dict):
|
|
159
|
+
self.logger.debug(
|
|
160
|
+
f"🔌 LLM provider is dict (mesh delegation) for {func_name}: {provider}"
|
|
161
|
+
)
|
|
162
|
+
# Set llm_provider field (separate from dependencies)
|
|
163
|
+
# Registry will resolve this to an actual provider agent
|
|
164
|
+
llm_provider_data = {
|
|
165
|
+
"capability": provider.get("capability", "llm"),
|
|
166
|
+
"tags": provider.get("tags", []),
|
|
167
|
+
"version": provider.get("version", ""),
|
|
168
|
+
"namespace": provider.get("namespace", "default"),
|
|
169
|
+
}
|
|
170
|
+
self.logger.debug(
|
|
171
|
+
f"✅ LLM provider spec prepared for {func_name}: {llm_provider_data}"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
break
|
|
175
|
+
|
|
96
176
|
# Build tool registration data
|
|
177
|
+
self.logger.debug(
|
|
178
|
+
f"Building tool_data for {func_name}, dependencies={dependencies}"
|
|
179
|
+
)
|
|
180
|
+
processed_deps = self._process_dependencies(dependencies)
|
|
181
|
+
self.logger.debug(
|
|
182
|
+
f"Processed dependencies for {func_name}: {processed_deps}"
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Extract kwargs (any extra fields not in standard set)
|
|
186
|
+
standard_fields = {
|
|
187
|
+
"capability",
|
|
188
|
+
"tags",
|
|
189
|
+
"version",
|
|
190
|
+
"description",
|
|
191
|
+
"dependencies",
|
|
192
|
+
}
|
|
193
|
+
kwargs_data = {
|
|
194
|
+
k: v for k, v in metadata.items() if k not in standard_fields
|
|
195
|
+
}
|
|
196
|
+
|
|
97
197
|
tool_data = {
|
|
98
198
|
"function_name": func_name,
|
|
99
199
|
"capability": metadata.get("capability"),
|
|
100
200
|
"tags": metadata.get("tags", []),
|
|
101
201
|
"version": metadata.get("version", "1.0.0"),
|
|
102
202
|
"description": metadata.get("description"),
|
|
103
|
-
"dependencies":
|
|
203
|
+
"dependencies": processed_deps,
|
|
204
|
+
"input_schema": input_schema, # Add inputSchema for LLM integration (Phase 2)
|
|
205
|
+
"llm_filter": llm_filter_data, # Add LLM filter for LLM integration (Phase 3)
|
|
206
|
+
"llm_provider": llm_provider_data, # Add LLM provider for mesh delegation (v0.6.1)
|
|
207
|
+
"kwargs": (
|
|
208
|
+
kwargs_data if kwargs_data else None
|
|
209
|
+
), # Add kwargs for vendor and other metadata
|
|
104
210
|
}
|
|
105
211
|
|
|
106
212
|
# Add debug pointer information only if debug flag is enabled
|
|
@@ -8,14 +8,14 @@ in @mesh.agent decorators to prevent Python interpreter shutdown.
|
|
|
8
8
|
import logging
|
|
9
9
|
from typing import Any, Dict, Optional
|
|
10
10
|
|
|
11
|
-
from ..shared import PipelineResult, PipelineStatus, PipelineStep
|
|
12
11
|
from ...shared.server_discovery import ServerDiscoveryUtil
|
|
12
|
+
from ..shared import PipelineResult, PipelineStatus, PipelineStep
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class ServerDiscoveryStep(PipelineStep):
|
|
16
16
|
"""
|
|
17
17
|
Discovers existing uvicorn servers that may be running.
|
|
18
|
-
|
|
18
|
+
|
|
19
19
|
This step checks if there's already a uvicorn server running on the target port,
|
|
20
20
|
which could happen when @mesh.agent(auto_run=True) starts an immediate uvicorn
|
|
21
21
|
server to prevent Python interpreter shutdown.
|
|
@@ -39,77 +39,100 @@ class ServerDiscoveryStep(PipelineStep):
|
|
|
39
39
|
agent_config = context.get("agent_config", {})
|
|
40
40
|
target_port = agent_config.get("http_port", 8080)
|
|
41
41
|
target_host = agent_config.get("http_host", "0.0.0.0")
|
|
42
|
-
|
|
43
|
-
self.logger.
|
|
42
|
+
|
|
43
|
+
self.logger.debug(
|
|
44
|
+
"🔍 DISCOVERY: Looking for immediate uvicorn server from DecoratorRegistry"
|
|
45
|
+
)
|
|
44
46
|
|
|
45
47
|
# Check DecoratorRegistry for immediate uvicorn server (much more reliable)
|
|
46
48
|
from ...engine.decorator_registry import DecoratorRegistry
|
|
49
|
+
|
|
47
50
|
existing_server = DecoratorRegistry.get_immediate_uvicorn_server()
|
|
48
51
|
|
|
49
52
|
# Debug: Show what we found
|
|
50
53
|
if existing_server:
|
|
51
54
|
server_status = existing_server.get("status", "unknown")
|
|
52
55
|
server_type = existing_server.get("type", "unknown")
|
|
53
|
-
self.logger.
|
|
56
|
+
self.logger.debug(
|
|
57
|
+
f"🔍 DISCOVERY: Found server - status='{server_status}', type='{server_type}'"
|
|
58
|
+
)
|
|
54
59
|
else:
|
|
55
|
-
self.logger.
|
|
56
|
-
|
|
60
|
+
self.logger.debug(
|
|
61
|
+
"🔍 DISCOVERY: No immediate uvicorn server found in registry"
|
|
62
|
+
)
|
|
63
|
+
|
|
57
64
|
if existing_server:
|
|
58
65
|
# Found existing immediate uvicorn server
|
|
59
|
-
server_host = existing_server.get(
|
|
60
|
-
server_port = existing_server.get(
|
|
61
|
-
|
|
66
|
+
server_host = existing_server.get("host", "unknown")
|
|
67
|
+
server_port = existing_server.get("port", 0)
|
|
68
|
+
|
|
62
69
|
result.add_context("existing_server", existing_server)
|
|
63
70
|
result.add_context("server_reuse", True)
|
|
64
|
-
|
|
71
|
+
|
|
65
72
|
# Get the FastAPI app directly from server info
|
|
66
|
-
existing_app = existing_server.get(
|
|
73
|
+
existing_app = existing_server.get("app")
|
|
67
74
|
if existing_app:
|
|
68
75
|
app_info = {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
76
|
+
"instance": existing_app,
|
|
77
|
+
"title": getattr(
|
|
78
|
+
existing_app, "title", "MCP Mesh Agent (Starting)"
|
|
79
|
+
),
|
|
80
|
+
"version": getattr(existing_app, "version", "unknown"),
|
|
81
|
+
"object_id": id(existing_app),
|
|
82
|
+
"type": "immediate_uvicorn",
|
|
74
83
|
}
|
|
75
84
|
result.add_context("existing_fastapi_app", app_info)
|
|
76
85
|
result.message = (
|
|
77
86
|
f"Found immediate uvicorn server on {server_host}:{server_port} "
|
|
78
87
|
f"with FastAPI app '{app_info.get('title', 'Unknown')}'"
|
|
79
88
|
)
|
|
80
|
-
self.logger.
|
|
89
|
+
self.logger.debug(
|
|
81
90
|
f"✅ DISCOVERY: Found immediate uvicorn server on {server_host}:{server_port} "
|
|
82
91
|
f"with FastAPI app '{app_info.get('title', 'Unknown')}'"
|
|
83
92
|
)
|
|
84
93
|
else:
|
|
85
94
|
result.message = f"Found immediate uvicorn server on {server_host}:{server_port} (no FastAPI app reference)"
|
|
86
|
-
self.logger.warning(
|
|
87
|
-
|
|
95
|
+
self.logger.warning(
|
|
96
|
+
"⚠️ DISCOVERY: Found immediate uvicorn server but no FastAPI app reference"
|
|
97
|
+
)
|
|
98
|
+
|
|
88
99
|
else:
|
|
89
100
|
# No existing server found
|
|
90
101
|
result.add_context("existing_server", None)
|
|
91
102
|
result.add_context("server_reuse", False)
|
|
92
|
-
result.message =
|
|
93
|
-
|
|
103
|
+
result.message = (
|
|
104
|
+
"No immediate uvicorn server found in DecoratorRegistry"
|
|
105
|
+
)
|
|
106
|
+
self.logger.info(
|
|
107
|
+
"🔍 DISCOVERY: No immediate uvicorn server found - pipeline will start new server"
|
|
108
|
+
)
|
|
94
109
|
|
|
95
110
|
# Only discover FastAPI apps if no immediate uvicorn server was found
|
|
96
111
|
if not existing_server:
|
|
97
|
-
self.logger.debug(
|
|
112
|
+
self.logger.debug(
|
|
113
|
+
"🔍 DISCOVERY: No immediate uvicorn server found, discovering FastAPI apps via garbage collection"
|
|
114
|
+
)
|
|
98
115
|
fastapi_apps = ServerDiscoveryUtil.discover_fastapi_instances()
|
|
99
116
|
result.add_context("discovered_fastapi_apps", fastapi_apps)
|
|
100
|
-
|
|
117
|
+
|
|
101
118
|
if fastapi_apps:
|
|
102
119
|
app_count = len(fastapi_apps)
|
|
103
120
|
result.message += f" | Discovered {app_count} FastAPI app(s)"
|
|
104
|
-
self.logger.info(
|
|
105
|
-
|
|
121
|
+
self.logger.info(
|
|
122
|
+
f"📦 DISCOVERY: Discovered {app_count} FastAPI application(s) for potential mounting"
|
|
123
|
+
)
|
|
124
|
+
|
|
106
125
|
# Log details about discovered apps
|
|
107
126
|
for app_id, app_info in fastapi_apps.items():
|
|
108
127
|
app_title = app_info.get("title", "Unknown")
|
|
109
128
|
route_count = len(app_info.get("routes", []))
|
|
110
|
-
self.logger.debug(
|
|
129
|
+
self.logger.debug(
|
|
130
|
+
f" 📦 App '{app_title}' ({app_id}): {route_count} routes"
|
|
131
|
+
)
|
|
111
132
|
else:
|
|
112
|
-
self.logger.debug(
|
|
133
|
+
self.logger.debug(
|
|
134
|
+
"🔍 DISCOVERY: Using FastAPI app from immediate uvicorn server, skipping garbage collection discovery"
|
|
135
|
+
)
|
|
113
136
|
|
|
114
137
|
except Exception as e:
|
|
115
138
|
result.status = PipelineStatus.FAILED
|
|
@@ -119,46 +142,52 @@ class ServerDiscoveryStep(PipelineStep):
|
|
|
119
142
|
|
|
120
143
|
return result
|
|
121
144
|
|
|
122
|
-
def _find_associated_fastapi_app(
|
|
145
|
+
def _find_associated_fastapi_app(
|
|
146
|
+
self, server_info: dict[str, Any]
|
|
147
|
+
) -> Optional[dict[str, Any]]:
|
|
123
148
|
"""
|
|
124
149
|
Try to find the FastAPI app associated with the existing server.
|
|
125
|
-
|
|
150
|
+
|
|
126
151
|
Args:
|
|
127
152
|
server_info: Server information from discovery
|
|
128
|
-
|
|
153
|
+
|
|
129
154
|
Returns:
|
|
130
155
|
FastAPI app info if found, None otherwise
|
|
131
156
|
"""
|
|
132
157
|
try:
|
|
133
158
|
# Check if server info already has an app
|
|
134
|
-
if
|
|
135
|
-
app = server_info[
|
|
159
|
+
if "app" in server_info:
|
|
160
|
+
app = server_info["app"]
|
|
136
161
|
return {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
162
|
+
"instance": app,
|
|
163
|
+
"title": getattr(app, "title", "Unknown"),
|
|
164
|
+
"version": getattr(app, "version", "unknown"),
|
|
165
|
+
"routes": ServerDiscoveryUtil._extract_route_info(app),
|
|
166
|
+
"object_id": id(app),
|
|
142
167
|
}
|
|
143
|
-
|
|
168
|
+
|
|
144
169
|
# If not, discover all FastAPI apps and try to match
|
|
145
170
|
fastapi_apps = ServerDiscoveryUtil.discover_fastapi_instances()
|
|
146
|
-
|
|
171
|
+
|
|
147
172
|
# For immediate uvicorn servers, look for apps with specific titles
|
|
148
173
|
for app_id, app_info in fastapi_apps.items():
|
|
149
|
-
app_title = app_info.get(
|
|
150
|
-
if
|
|
174
|
+
app_title = app_info.get("title", "")
|
|
175
|
+
if "MCP Mesh Agent" in app_title and "Starting" in app_title:
|
|
151
176
|
# This looks like our immediate uvicorn app
|
|
152
|
-
self.logger.debug(
|
|
177
|
+
self.logger.debug(
|
|
178
|
+
f"🔍 DISCOVERY: Found immediate uvicorn FastAPI app: {app_title}"
|
|
179
|
+
)
|
|
153
180
|
return app_info
|
|
154
|
-
|
|
181
|
+
|
|
155
182
|
# If no immediate uvicorn app found, return the first available app
|
|
156
183
|
if fastapi_apps:
|
|
157
184
|
first_app = next(iter(fastapi_apps.values()))
|
|
158
|
-
self.logger.debug(
|
|
185
|
+
self.logger.debug(
|
|
186
|
+
f"🔍 DISCOVERY: Using first available FastAPI app: {first_app.get('title', 'Unknown')}"
|
|
187
|
+
)
|
|
159
188
|
return first_app
|
|
160
|
-
|
|
189
|
+
|
|
161
190
|
except Exception as e:
|
|
162
191
|
self.logger.warning(f"Error finding associated FastAPI app: {e}")
|
|
163
|
-
|
|
164
|
-
return None
|
|
192
|
+
|
|
193
|
+
return None
|
|
@@ -228,7 +228,7 @@ class DebounceCoordinator:
|
|
|
228
228
|
fastapi_app, binding_config
|
|
229
229
|
)
|
|
230
230
|
elif server_status == "running":
|
|
231
|
-
self.logger.
|
|
231
|
+
self.logger.debug(
|
|
232
232
|
"🔄 RUNNING SERVER: Server already running with proper lifecycle, pipeline skipping uvicorn.run()"
|
|
233
233
|
)
|
|
234
234
|
self.logger.info(
|
|
@@ -493,7 +493,7 @@ class MeshOrchestrator:
|
|
|
493
493
|
|
|
494
494
|
This replaces the background polling with explicit execution.
|
|
495
495
|
"""
|
|
496
|
-
self.logger.
|
|
496
|
+
self.logger.debug(f"🚀 Starting single pipeline execution: {self.name}")
|
|
497
497
|
|
|
498
498
|
result = await self.pipeline.execute()
|
|
499
499
|
|
|
@@ -49,9 +49,9 @@ class StartupPipeline(MeshPipeline):
|
|
|
49
49
|
steps = [
|
|
50
50
|
DecoratorCollectionStep(),
|
|
51
51
|
ConfigurationStep(),
|
|
52
|
-
|
|
52
|
+
FastMCPServerDiscoveryStep(), # Discover user's FastMCP instances (MOVED UP for Phase 2)
|
|
53
|
+
HeartbeatPreparationStep(), # Prepare heartbeat payload structure (can now access FastMCP schemas)
|
|
53
54
|
ServerDiscoveryStep(), # Discover existing uvicorn servers from immediate startup
|
|
54
|
-
FastMCPServerDiscoveryStep(), # Discover user's FastMCP instances
|
|
55
55
|
HeartbeatLoopStep(), # Setup background heartbeat config (handles no registry gracefully)
|
|
56
56
|
FastAPIServerSetupStep(), # Setup FastAPI app with background heartbeat
|
|
57
57
|
# Note: Registry connection is handled in heartbeat pipeline for retry behavior
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Health check caching with TTL support.
|
|
3
|
+
|
|
4
|
+
Provides a TTL-based cache for health check results to avoid expensive
|
|
5
|
+
health check operations on every heartbeat and /health endpoint call.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import time
|
|
10
|
+
from collections.abc import Awaitable, Callable
|
|
11
|
+
from datetime import UTC, datetime
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
|
|
14
|
+
from .support_types import HealthStatus, HealthStatusType
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Global cache instance for health status
|
|
19
|
+
# Stores tuples of (health_status, expiry_timestamp) for per-key TTL support
|
|
20
|
+
# Format: {"health:agent_id": (HealthStatus, expiry_timestamp)}
|
|
21
|
+
_health_cache: dict[str, tuple[HealthStatus, float]] = {}
|
|
22
|
+
_max_cache_size = 100
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def get_health_status_with_cache(
|
|
26
|
+
agent_id: str,
|
|
27
|
+
health_check_fn: Optional[Callable[[], Awaitable[Any]]],
|
|
28
|
+
agent_config: dict[str, Any],
|
|
29
|
+
startup_context: dict[str, Any],
|
|
30
|
+
ttl: int = 15,
|
|
31
|
+
) -> HealthStatus:
|
|
32
|
+
"""
|
|
33
|
+
Get health status with TTL caching.
|
|
34
|
+
|
|
35
|
+
This function synchronously returns from cache if available, otherwise
|
|
36
|
+
calls the user's health check function and caches the result.
|
|
37
|
+
|
|
38
|
+
User health check can return:
|
|
39
|
+
- bool: True = HEALTHY, False = UNHEALTHY
|
|
40
|
+
- dict: {"status": "healthy/degraded/unhealthy", "checks": {...}, "errors": [...]}
|
|
41
|
+
- HealthStatus: Full object (fields will be overridden with correct values)
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
agent_id: Unique identifier for the agent
|
|
45
|
+
health_check_fn: Optional async function that returns bool, dict, or HealthStatus
|
|
46
|
+
agent_config: Agent configuration dict for building default health status
|
|
47
|
+
startup_context: Full startup context with capabilities
|
|
48
|
+
ttl: Cache TTL in seconds (default: 15)
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
HealthStatus: Current health status (from cache or fresh check)
|
|
52
|
+
|
|
53
|
+
Note:
|
|
54
|
+
- Cache key is based on agent_id
|
|
55
|
+
- If health_check_fn is None, returns default HEALTHY status
|
|
56
|
+
- If health_check_fn raises an exception, returns DEGRADED status
|
|
57
|
+
- TTL is enforced per-key with manual expiry tracking
|
|
58
|
+
"""
|
|
59
|
+
cache_key = f"health:{agent_id}"
|
|
60
|
+
current_time = time.time()
|
|
61
|
+
|
|
62
|
+
# Try to get from cache and check if expired
|
|
63
|
+
if cache_key in _health_cache:
|
|
64
|
+
cached_status, expiry_time = _health_cache[cache_key]
|
|
65
|
+
if current_time < expiry_time:
|
|
66
|
+
logger.debug(f"✅ Health check cache HIT for agent '{agent_id}'")
|
|
67
|
+
return cached_status
|
|
68
|
+
else:
|
|
69
|
+
# Cache entry expired, remove it
|
|
70
|
+
logger.debug(
|
|
71
|
+
f"⏰ Health check cache EXPIRED for agent '{agent_id}' (TTL exceeded)"
|
|
72
|
+
)
|
|
73
|
+
del _health_cache[cache_key]
|
|
74
|
+
|
|
75
|
+
logger.debug(f"❌ Health check cache MISS for agent '{agent_id}'")
|
|
76
|
+
|
|
77
|
+
# Cache miss - call user's health check if provided
|
|
78
|
+
if health_check_fn:
|
|
79
|
+
try:
|
|
80
|
+
logger.debug(
|
|
81
|
+
f"🔍 Executing health check function for agent '{agent_id}'..."
|
|
82
|
+
)
|
|
83
|
+
user_result = await health_check_fn()
|
|
84
|
+
|
|
85
|
+
# Parse user result into status, checks, and errors
|
|
86
|
+
status_type = HealthStatusType.HEALTHY
|
|
87
|
+
checks = {}
|
|
88
|
+
errors = []
|
|
89
|
+
|
|
90
|
+
if isinstance(user_result, bool):
|
|
91
|
+
# Simple boolean: True = HEALTHY, False = UNHEALTHY
|
|
92
|
+
status_type = (
|
|
93
|
+
HealthStatusType.HEALTHY
|
|
94
|
+
if user_result
|
|
95
|
+
else HealthStatusType.UNHEALTHY
|
|
96
|
+
)
|
|
97
|
+
checks["health_check"] = user_result
|
|
98
|
+
if not user_result:
|
|
99
|
+
errors.append("Health check returned False")
|
|
100
|
+
|
|
101
|
+
elif isinstance(user_result, dict):
|
|
102
|
+
# Dictionary with status, checks, errors
|
|
103
|
+
status_str = user_result.get("status", "healthy").lower()
|
|
104
|
+
if status_str == "healthy":
|
|
105
|
+
status_type = HealthStatusType.HEALTHY
|
|
106
|
+
elif status_str == "degraded":
|
|
107
|
+
status_type = HealthStatusType.DEGRADED
|
|
108
|
+
elif status_str == "unhealthy":
|
|
109
|
+
status_type = HealthStatusType.UNHEALTHY
|
|
110
|
+
else:
|
|
111
|
+
status_type = HealthStatusType.UNKNOWN
|
|
112
|
+
|
|
113
|
+
checks = user_result.get("checks", {})
|
|
114
|
+
errors = user_result.get("errors", [])
|
|
115
|
+
|
|
116
|
+
elif isinstance(user_result, HealthStatus):
|
|
117
|
+
# Full HealthStatus object - extract status, checks, errors
|
|
118
|
+
status_type = user_result.status
|
|
119
|
+
checks = user_result.checks
|
|
120
|
+
errors = user_result.errors
|
|
121
|
+
|
|
122
|
+
else:
|
|
123
|
+
logger.warning(
|
|
124
|
+
f"⚠️ Health check for '{agent_id}' returned unexpected type {type(user_result)}, treating as unhealthy"
|
|
125
|
+
)
|
|
126
|
+
status_type = HealthStatusType.UNHEALTHY
|
|
127
|
+
checks = {"health_check_return_type": False}
|
|
128
|
+
errors = [f"Invalid return type: {type(user_result)}"]
|
|
129
|
+
|
|
130
|
+
# Build complete HealthStatus with resolved values
|
|
131
|
+
# Get capabilities from startup_context (from registered tools)
|
|
132
|
+
capabilities = startup_context.get("capabilities", [])
|
|
133
|
+
if not capabilities:
|
|
134
|
+
# Fallback: try to get from agent_config
|
|
135
|
+
capabilities = agent_config.get("capabilities", [])
|
|
136
|
+
if not capabilities:
|
|
137
|
+
# Last resort: use a default to satisfy validation
|
|
138
|
+
capabilities = ["default"]
|
|
139
|
+
|
|
140
|
+
health_status = HealthStatus(
|
|
141
|
+
agent_name=agent_id,
|
|
142
|
+
status=status_type,
|
|
143
|
+
capabilities=capabilities,
|
|
144
|
+
checks=checks,
|
|
145
|
+
errors=errors,
|
|
146
|
+
timestamp=datetime.now(UTC),
|
|
147
|
+
version=agent_config.get("version", "1.0.0"),
|
|
148
|
+
metadata=agent_config,
|
|
149
|
+
uptime_seconds=0,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
logger.info(
|
|
153
|
+
f"💚 Health check function executed successfully for '{agent_id}': {health_status.status.value}"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
# Health check function failed - return DEGRADED
|
|
158
|
+
logger.warning(
|
|
159
|
+
f"⚠️ Health check function failed for agent '{agent_id}': {e}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Get capabilities from startup_context
|
|
163
|
+
capabilities = startup_context.get("capabilities", [])
|
|
164
|
+
if not capabilities:
|
|
165
|
+
capabilities = agent_config.get("capabilities", ["default"])
|
|
166
|
+
|
|
167
|
+
health_status = HealthStatus(
|
|
168
|
+
agent_name=agent_id,
|
|
169
|
+
status=HealthStatusType.DEGRADED,
|
|
170
|
+
capabilities=capabilities,
|
|
171
|
+
checks={"health_check_execution": False},
|
|
172
|
+
errors=[f"Health check failed: {str(e)}"],
|
|
173
|
+
timestamp=datetime.now(UTC),
|
|
174
|
+
version=agent_config.get("version", "1.0.0"),
|
|
175
|
+
metadata=agent_config,
|
|
176
|
+
uptime_seconds=0,
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
# No health check provided - default to HEALTHY
|
|
180
|
+
logger.debug(
|
|
181
|
+
f"ℹ️ No health check function provided for '{agent_id}', using default HEALTHY status"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Get capabilities from startup_context
|
|
185
|
+
capabilities = startup_context.get("capabilities", [])
|
|
186
|
+
if not capabilities:
|
|
187
|
+
capabilities = agent_config.get("capabilities", ["default"])
|
|
188
|
+
|
|
189
|
+
health_status = HealthStatus(
|
|
190
|
+
agent_name=agent_id,
|
|
191
|
+
status=HealthStatusType.HEALTHY,
|
|
192
|
+
capabilities=capabilities,
|
|
193
|
+
timestamp=datetime.now(UTC),
|
|
194
|
+
version=agent_config.get("version", "1.0.0"),
|
|
195
|
+
metadata=agent_config,
|
|
196
|
+
uptime_seconds=0,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Store in cache with TTL (manual expiry tracking)
|
|
200
|
+
expiry_time = current_time + ttl
|
|
201
|
+
_health_cache[cache_key] = (health_status, expiry_time)
|
|
202
|
+
logger.debug(f"💾 Cached health status for '{agent_id}' with TTL={ttl}s")
|
|
203
|
+
|
|
204
|
+
# Enforce max cache size by removing oldest entry if needed
|
|
205
|
+
if len(_health_cache) > _max_cache_size:
|
|
206
|
+
# Remove the entry with earliest expiry time
|
|
207
|
+
oldest_key = min(_health_cache.keys(), key=lambda k: _health_cache[k][1])
|
|
208
|
+
del _health_cache[oldest_key]
|
|
209
|
+
logger.debug("🗑️ Evicted oldest cache entry to maintain max size")
|
|
210
|
+
|
|
211
|
+
return health_status
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def clear_health_cache(agent_id: Optional[str] = None) -> None:
|
|
215
|
+
"""
|
|
216
|
+
Clear health cache for a specific agent or all agents.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
agent_id: Optional agent ID to clear. If None, clears entire cache.
|
|
220
|
+
|
|
221
|
+
Note:
|
|
222
|
+
This is useful for testing or forcing a fresh health check.
|
|
223
|
+
"""
|
|
224
|
+
if agent_id:
|
|
225
|
+
cache_key = f"health:{agent_id}"
|
|
226
|
+
if cache_key in _health_cache:
|
|
227
|
+
del _health_cache[cache_key]
|
|
228
|
+
logger.debug(f"🗑️ Cleared health cache for agent '{agent_id}'")
|
|
229
|
+
else:
|
|
230
|
+
_health_cache.clear()
|
|
231
|
+
logger.debug("🗑️ Cleared entire health cache")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def get_cache_stats() -> dict[str, Any]:
|
|
235
|
+
"""
|
|
236
|
+
Get cache statistics for monitoring and debugging.
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
dict: Cache statistics including size, maxsize, and current keys
|
|
240
|
+
"""
|
|
241
|
+
return {
|
|
242
|
+
"size": len(_health_cache),
|
|
243
|
+
"maxsize": _max_cache_size,
|
|
244
|
+
"ttl": 15, # Default TTL (for backward compatibility)
|
|
245
|
+
"cached_agents": [key.replace("health:", "") for key in _health_cache.keys()],
|
|
246
|
+
}
|