mcp-mesh 0.5.7__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. _mcp_mesh/__init__.py +1 -1
  2. _mcp_mesh/engine/base_injector.py +171 -0
  3. _mcp_mesh/engine/decorator_registry.py +162 -35
  4. _mcp_mesh/engine/dependency_injector.py +105 -19
  5. _mcp_mesh/engine/http_wrapper.py +5 -22
  6. _mcp_mesh/engine/llm_config.py +45 -0
  7. _mcp_mesh/engine/llm_errors.py +115 -0
  8. _mcp_mesh/engine/mesh_llm_agent.py +626 -0
  9. _mcp_mesh/engine/mesh_llm_agent_injector.py +617 -0
  10. _mcp_mesh/engine/provider_handlers/__init__.py +20 -0
  11. _mcp_mesh/engine/provider_handlers/base_provider_handler.py +122 -0
  12. _mcp_mesh/engine/provider_handlers/claude_handler.py +138 -0
  13. _mcp_mesh/engine/provider_handlers/generic_handler.py +156 -0
  14. _mcp_mesh/engine/provider_handlers/openai_handler.py +163 -0
  15. _mcp_mesh/engine/provider_handlers/provider_handler_registry.py +167 -0
  16. _mcp_mesh/engine/response_parser.py +205 -0
  17. _mcp_mesh/engine/signature_analyzer.py +229 -99
  18. _mcp_mesh/engine/tool_executor.py +169 -0
  19. _mcp_mesh/engine/tool_schema_builder.py +126 -0
  20. _mcp_mesh/engine/unified_mcp_proxy.py +14 -12
  21. _mcp_mesh/generated/.openapi-generator/FILES +7 -0
  22. _mcp_mesh/generated/.openapi-generator-ignore +0 -1
  23. _mcp_mesh/generated/mcp_mesh_registry_client/__init__.py +7 -16
  24. _mcp_mesh/generated/mcp_mesh_registry_client/models/__init__.py +7 -0
  25. _mcp_mesh/generated/mcp_mesh_registry_client/models/agent_info.py +11 -1
  26. _mcp_mesh/generated/mcp_mesh_registry_client/models/dependency_resolution_info.py +108 -0
  27. _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_provider.py +95 -0
  28. _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_tool_filter.py +111 -0
  29. _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_tool_filter_filter_inner.py +141 -0
  30. _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_tool_filter_filter_inner_one_of.py +93 -0
  31. _mcp_mesh/generated/mcp_mesh_registry_client/models/llm_tool_info.py +103 -0
  32. _mcp_mesh/generated/mcp_mesh_registry_client/models/mesh_agent_registration.py +1 -1
  33. _mcp_mesh/generated/mcp_mesh_registry_client/models/mesh_registration_response.py +35 -1
  34. _mcp_mesh/generated/mcp_mesh_registry_client/models/mesh_tool_registration.py +11 -1
  35. _mcp_mesh/generated/mcp_mesh_registry_client/models/resolved_llm_provider.py +112 -0
  36. _mcp_mesh/pipeline/api_heartbeat/api_dependency_resolution.py +9 -72
  37. _mcp_mesh/pipeline/mcp_heartbeat/fast_heartbeat_check.py +3 -3
  38. _mcp_mesh/pipeline/mcp_heartbeat/heartbeat_orchestrator.py +35 -10
  39. _mcp_mesh/pipeline/mcp_heartbeat/heartbeat_pipeline.py +7 -4
  40. _mcp_mesh/pipeline/mcp_heartbeat/llm_tools_resolution.py +260 -0
  41. _mcp_mesh/pipeline/mcp_startup/fastapiserver_setup.py +118 -35
  42. _mcp_mesh/pipeline/mcp_startup/fastmcpserver_discovery.py +8 -1
  43. _mcp_mesh/pipeline/mcp_startup/heartbeat_preparation.py +111 -5
  44. _mcp_mesh/pipeline/mcp_startup/server_discovery.py +77 -48
  45. _mcp_mesh/pipeline/mcp_startup/startup_orchestrator.py +2 -2
  46. _mcp_mesh/pipeline/mcp_startup/startup_pipeline.py +2 -2
  47. _mcp_mesh/shared/health_check_cache.py +246 -0
  48. _mcp_mesh/shared/registry_client_wrapper.py +87 -4
  49. _mcp_mesh/utils/fastmcp_schema_extractor.py +476 -0
  50. {mcp_mesh-0.5.7.dist-info → mcp_mesh-0.6.1.dist-info}/METADATA +1 -1
  51. {mcp_mesh-0.5.7.dist-info → mcp_mesh-0.6.1.dist-info}/RECORD +57 -32
  52. mesh/__init__.py +18 -4
  53. mesh/decorators.py +439 -31
  54. mesh/helpers.py +259 -0
  55. mesh/types.py +197 -97
  56. {mcp_mesh-0.5.7.dist-info → mcp_mesh-0.6.1.dist-info}/WHEEL +0 -0
  57. {mcp_mesh-0.5.7.dist-info → mcp_mesh-0.6.1.dist-info}/licenses/LICENSE +0 -0
@@ -46,7 +46,7 @@ class FastMCPServerDiscoveryStep(PipelineStep):
46
46
  server_info.append(info)
47
47
  total_registered_functions += info.get("function_count", 0)
48
48
 
49
- self.logger.info(
49
+ self.logger.debug(
50
50
  f"📡 Discovered FastMCP server '{server_name}': "
51
51
  f"{info.get('function_count', 0)} functions"
52
52
  )
@@ -57,6 +57,13 @@ class FastMCPServerDiscoveryStep(PipelineStep):
57
57
  result.add_context("fastmcp_server_count", len(discovered_servers))
58
58
  result.add_context("fastmcp_total_functions", total_registered_functions)
59
59
 
60
+ # Store server info in DecoratorRegistry for heartbeat schema extraction (Phase 2)
61
+ from ...engine.decorator_registry import DecoratorRegistry
62
+
63
+ # Convert server_info list to dict for easier lookup
64
+ server_info_dict = {info["server_name"]: info for info in server_info}
65
+ DecoratorRegistry.store_fastmcp_server_info(server_info_dict)
66
+
60
67
  result.message = (
61
68
  f"Discovered {len(discovered_servers)} FastMCP servers "
62
69
  f"with {total_registered_functions} total functions"
@@ -9,6 +9,7 @@ from ...engine.decorator_registry import DecoratorRegistry
9
9
  from ...engine.signature_analyzer import validate_mesh_dependencies
10
10
  from ...shared.config_resolver import ValidationRule, get_config_value
11
11
  from ...shared.support_types import HealthStatus, HealthStatusType
12
+ from ...utils.fastmcp_schema_extractor import FastMCPSchemaExtractor
12
13
  from ..shared import PipelineResult, PipelineStatus, PipelineStep
13
14
 
14
15
 
@@ -39,8 +40,17 @@ class HeartbeatPreparationStep(PipelineStep):
39
40
  agent_config = DecoratorRegistry.get_resolved_agent_config()
40
41
  agent_id = agent_config["agent_id"]
41
42
 
42
- # Build tools list for registration
43
- tools_list = self._build_tools_list(mesh_tools)
43
+ # Get FastMCP server info from context (set by fastmcp-server-discovery step)
44
+ fastmcp_server_info = context.get("fastmcp_server_info", [])
45
+
46
+ # Convert server_info list to dict for schema extractor
47
+ fastmcp_servers = {}
48
+ for server_info in fastmcp_server_info:
49
+ server_name = server_info.get("server_name", "unknown")
50
+ fastmcp_servers[server_name] = server_info
51
+
52
+ # Build tools list for registration (with FastMCP schemas)
53
+ tools_list = self._build_tools_list(mesh_tools, fastmcp_servers)
44
54
 
45
55
  # Build agent registration payload
46
56
  registration_data = self._build_registration_payload(
@@ -71,8 +81,10 @@ class HeartbeatPreparationStep(PipelineStep):
71
81
 
72
82
  return result
73
83
 
74
- def _build_tools_list(self, mesh_tools: dict[str, Any]) -> list[dict[str, Any]]:
75
- """Build tools list from mesh_tools, validating function signatures."""
84
+ def _build_tools_list(
85
+ self, mesh_tools: dict[str, Any], fastmcp_servers: dict[str, Any] = None
86
+ ) -> list[dict[str, Any]]:
87
+ """Build tools list from mesh_tools, validating function signatures and extracting schemas."""
76
88
  tools_list = []
77
89
  skipped_tools = []
78
90
 
@@ -93,14 +105,108 @@ class HeartbeatPreparationStep(PipelineStep):
93
105
  skipped_tools.append(func_name)
94
106
  continue
95
107
 
108
+ # Extract inputSchema from FastMCP tool (if available)
109
+ # First try matching with FastMCP servers, then fallback to direct attribute
110
+ input_schema = FastMCPSchemaExtractor.extract_from_fastmcp_servers(
111
+ current_function, fastmcp_servers
112
+ )
113
+ if input_schema is None:
114
+ input_schema = FastMCPSchemaExtractor.extract_input_schema(
115
+ current_function
116
+ )
117
+
118
+ # Check if this function has @mesh.llm decorator (Phase 3)
119
+ llm_filter_data = None
120
+ llm_provider_data = None
121
+ llm_agents = DecoratorRegistry.get_mesh_llm_agents()
122
+ self.logger.debug(
123
+ f"🤖 Checking for LLM filter: function={func_name}, total_llm_agents_registered={len(llm_agents)}"
124
+ )
125
+
126
+ for llm_agent_id, llm_metadata in llm_agents.items():
127
+ if llm_metadata.function.__name__ == func_name:
128
+ # Found matching LLM agent - extract filter config
129
+ raw_filter = llm_metadata.config.get("filter")
130
+ filter_mode = llm_metadata.config.get("filter_mode", "all")
131
+
132
+ # Normalize filter to array format (OpenAPI schema requirement)
133
+ if raw_filter is None:
134
+ normalized_filter = []
135
+ elif isinstance(raw_filter, str):
136
+ normalized_filter = [raw_filter]
137
+ elif isinstance(raw_filter, dict):
138
+ normalized_filter = [raw_filter]
139
+ elif isinstance(raw_filter, list):
140
+ normalized_filter = raw_filter
141
+ else:
142
+ self.logger.warning(
143
+ f"⚠️ Invalid filter type for {func_name}: {type(raw_filter)}"
144
+ )
145
+ normalized_filter = []
146
+
147
+ llm_filter_data = {
148
+ "filter": normalized_filter,
149
+ "filter_mode": filter_mode,
150
+ }
151
+ self.logger.debug(
152
+ f"🤖 LLM filter found for {func_name}: {len(normalized_filter)} filters, mode={filter_mode}, raw_filter={raw_filter}"
153
+ )
154
+
155
+ # Check if provider is a dict (mesh delegation mode - v0.6.1)
156
+ # If so, add it as llm_provider field (NOT in dependencies array)
157
+ provider = llm_metadata.config.get("provider")
158
+ if isinstance(provider, dict):
159
+ self.logger.debug(
160
+ f"🔌 LLM provider is dict (mesh delegation) for {func_name}: {provider}"
161
+ )
162
+ # Set llm_provider field (separate from dependencies)
163
+ # Registry will resolve this to an actual provider agent
164
+ llm_provider_data = {
165
+ "capability": provider.get("capability", "llm"),
166
+ "tags": provider.get("tags", []),
167
+ "version": provider.get("version", ""),
168
+ "namespace": provider.get("namespace", "default"),
169
+ }
170
+ self.logger.debug(
171
+ f"✅ LLM provider spec prepared for {func_name}: {llm_provider_data}"
172
+ )
173
+
174
+ break
175
+
96
176
  # Build tool registration data
177
+ self.logger.debug(
178
+ f"Building tool_data for {func_name}, dependencies={dependencies}"
179
+ )
180
+ processed_deps = self._process_dependencies(dependencies)
181
+ self.logger.debug(
182
+ f"Processed dependencies for {func_name}: {processed_deps}"
183
+ )
184
+
185
+ # Extract kwargs (any extra fields not in standard set)
186
+ standard_fields = {
187
+ "capability",
188
+ "tags",
189
+ "version",
190
+ "description",
191
+ "dependencies",
192
+ }
193
+ kwargs_data = {
194
+ k: v for k, v in metadata.items() if k not in standard_fields
195
+ }
196
+
97
197
  tool_data = {
98
198
  "function_name": func_name,
99
199
  "capability": metadata.get("capability"),
100
200
  "tags": metadata.get("tags", []),
101
201
  "version": metadata.get("version", "1.0.0"),
102
202
  "description": metadata.get("description"),
103
- "dependencies": self._process_dependencies(dependencies),
203
+ "dependencies": processed_deps,
204
+ "input_schema": input_schema, # Add inputSchema for LLM integration (Phase 2)
205
+ "llm_filter": llm_filter_data, # Add LLM filter for LLM integration (Phase 3)
206
+ "llm_provider": llm_provider_data, # Add LLM provider for mesh delegation (v0.6.1)
207
+ "kwargs": (
208
+ kwargs_data if kwargs_data else None
209
+ ), # Add kwargs for vendor and other metadata
104
210
  }
105
211
 
106
212
  # Add debug pointer information only if debug flag is enabled
@@ -8,14 +8,14 @@ in @mesh.agent decorators to prevent Python interpreter shutdown.
8
8
  import logging
9
9
  from typing import Any, Dict, Optional
10
10
 
11
- from ..shared import PipelineResult, PipelineStatus, PipelineStep
12
11
  from ...shared.server_discovery import ServerDiscoveryUtil
12
+ from ..shared import PipelineResult, PipelineStatus, PipelineStep
13
13
 
14
14
 
15
15
  class ServerDiscoveryStep(PipelineStep):
16
16
  """
17
17
  Discovers existing uvicorn servers that may be running.
18
-
18
+
19
19
  This step checks if there's already a uvicorn server running on the target port,
20
20
  which could happen when @mesh.agent(auto_run=True) starts an immediate uvicorn
21
21
  server to prevent Python interpreter shutdown.
@@ -39,77 +39,100 @@ class ServerDiscoveryStep(PipelineStep):
39
39
  agent_config = context.get("agent_config", {})
40
40
  target_port = agent_config.get("http_port", 8080)
41
41
  target_host = agent_config.get("http_host", "0.0.0.0")
42
-
43
- self.logger.info(f"🔍 DISCOVERY: Looking for immediate uvicorn server from DecoratorRegistry")
42
+
43
+ self.logger.debug(
44
+ "🔍 DISCOVERY: Looking for immediate uvicorn server from DecoratorRegistry"
45
+ )
44
46
 
45
47
  # Check DecoratorRegistry for immediate uvicorn server (much more reliable)
46
48
  from ...engine.decorator_registry import DecoratorRegistry
49
+
47
50
  existing_server = DecoratorRegistry.get_immediate_uvicorn_server()
48
51
 
49
52
  # Debug: Show what we found
50
53
  if existing_server:
51
54
  server_status = existing_server.get("status", "unknown")
52
55
  server_type = existing_server.get("type", "unknown")
53
- self.logger.info(f"🔍 DISCOVERY: Found server - status='{server_status}', type='{server_type}'")
56
+ self.logger.debug(
57
+ f"🔍 DISCOVERY: Found server - status='{server_status}', type='{server_type}'"
58
+ )
54
59
  else:
55
- self.logger.info(f"🔍 DISCOVERY: No immediate uvicorn server found in registry")
56
-
60
+ self.logger.debug(
61
+ "🔍 DISCOVERY: No immediate uvicorn server found in registry"
62
+ )
63
+
57
64
  if existing_server:
58
65
  # Found existing immediate uvicorn server
59
- server_host = existing_server.get('host', 'unknown')
60
- server_port = existing_server.get('port', 0)
61
-
66
+ server_host = existing_server.get("host", "unknown")
67
+ server_port = existing_server.get("port", 0)
68
+
62
69
  result.add_context("existing_server", existing_server)
63
70
  result.add_context("server_reuse", True)
64
-
71
+
65
72
  # Get the FastAPI app directly from server info
66
- existing_app = existing_server.get('app')
73
+ existing_app = existing_server.get("app")
67
74
  if existing_app:
68
75
  app_info = {
69
- 'instance': existing_app,
70
- 'title': getattr(existing_app, 'title', 'MCP Mesh Agent (Starting)'),
71
- 'version': getattr(existing_app, 'version', 'unknown'),
72
- 'object_id': id(existing_app),
73
- 'type': 'immediate_uvicorn'
76
+ "instance": existing_app,
77
+ "title": getattr(
78
+ existing_app, "title", "MCP Mesh Agent (Starting)"
79
+ ),
80
+ "version": getattr(existing_app, "version", "unknown"),
81
+ "object_id": id(existing_app),
82
+ "type": "immediate_uvicorn",
74
83
  }
75
84
  result.add_context("existing_fastapi_app", app_info)
76
85
  result.message = (
77
86
  f"Found immediate uvicorn server on {server_host}:{server_port} "
78
87
  f"with FastAPI app '{app_info.get('title', 'Unknown')}'"
79
88
  )
80
- self.logger.info(
89
+ self.logger.debug(
81
90
  f"✅ DISCOVERY: Found immediate uvicorn server on {server_host}:{server_port} "
82
91
  f"with FastAPI app '{app_info.get('title', 'Unknown')}'"
83
92
  )
84
93
  else:
85
94
  result.message = f"Found immediate uvicorn server on {server_host}:{server_port} (no FastAPI app reference)"
86
- self.logger.warning(f"⚠️ DISCOVERY: Found immediate uvicorn server but no FastAPI app reference")
87
-
95
+ self.logger.warning(
96
+ "⚠️ DISCOVERY: Found immediate uvicorn server but no FastAPI app reference"
97
+ )
98
+
88
99
  else:
89
100
  # No existing server found
90
101
  result.add_context("existing_server", None)
91
102
  result.add_context("server_reuse", False)
92
- result.message = f"No immediate uvicorn server found in DecoratorRegistry"
93
- self.logger.info(f"🔍 DISCOVERY: No immediate uvicorn server found - pipeline will start new server")
103
+ result.message = (
104
+ "No immediate uvicorn server found in DecoratorRegistry"
105
+ )
106
+ self.logger.info(
107
+ "🔍 DISCOVERY: No immediate uvicorn server found - pipeline will start new server"
108
+ )
94
109
 
95
110
  # Only discover FastAPI apps if no immediate uvicorn server was found
96
111
  if not existing_server:
97
- self.logger.debug("🔍 DISCOVERY: No immediate uvicorn server found, discovering FastAPI apps via garbage collection")
112
+ self.logger.debug(
113
+ "🔍 DISCOVERY: No immediate uvicorn server found, discovering FastAPI apps via garbage collection"
114
+ )
98
115
  fastapi_apps = ServerDiscoveryUtil.discover_fastapi_instances()
99
116
  result.add_context("discovered_fastapi_apps", fastapi_apps)
100
-
117
+
101
118
  if fastapi_apps:
102
119
  app_count = len(fastapi_apps)
103
120
  result.message += f" | Discovered {app_count} FastAPI app(s)"
104
- self.logger.info(f"📦 DISCOVERY: Discovered {app_count} FastAPI application(s) for potential mounting")
105
-
121
+ self.logger.info(
122
+ f"📦 DISCOVERY: Discovered {app_count} FastAPI application(s) for potential mounting"
123
+ )
124
+
106
125
  # Log details about discovered apps
107
126
  for app_id, app_info in fastapi_apps.items():
108
127
  app_title = app_info.get("title", "Unknown")
109
128
  route_count = len(app_info.get("routes", []))
110
- self.logger.debug(f" 📦 App '{app_title}' ({app_id}): {route_count} routes")
129
+ self.logger.debug(
130
+ f" 📦 App '{app_title}' ({app_id}): {route_count} routes"
131
+ )
111
132
  else:
112
- self.logger.debug("🔍 DISCOVERY: Using FastAPI app from immediate uvicorn server, skipping garbage collection discovery")
133
+ self.logger.debug(
134
+ "🔍 DISCOVERY: Using FastAPI app from immediate uvicorn server, skipping garbage collection discovery"
135
+ )
113
136
 
114
137
  except Exception as e:
115
138
  result.status = PipelineStatus.FAILED
@@ -119,46 +142,52 @@ class ServerDiscoveryStep(PipelineStep):
119
142
 
120
143
  return result
121
144
 
122
- def _find_associated_fastapi_app(self, server_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
145
+ def _find_associated_fastapi_app(
146
+ self, server_info: dict[str, Any]
147
+ ) -> Optional[dict[str, Any]]:
123
148
  """
124
149
  Try to find the FastAPI app associated with the existing server.
125
-
150
+
126
151
  Args:
127
152
  server_info: Server information from discovery
128
-
153
+
129
154
  Returns:
130
155
  FastAPI app info if found, None otherwise
131
156
  """
132
157
  try:
133
158
  # Check if server info already has an app
134
- if 'app' in server_info:
135
- app = server_info['app']
159
+ if "app" in server_info:
160
+ app = server_info["app"]
136
161
  return {
137
- 'instance': app,
138
- 'title': getattr(app, 'title', 'Unknown'),
139
- 'version': getattr(app, 'version', 'unknown'),
140
- 'routes': ServerDiscoveryUtil._extract_route_info(app),
141
- 'object_id': id(app),
162
+ "instance": app,
163
+ "title": getattr(app, "title", "Unknown"),
164
+ "version": getattr(app, "version", "unknown"),
165
+ "routes": ServerDiscoveryUtil._extract_route_info(app),
166
+ "object_id": id(app),
142
167
  }
143
-
168
+
144
169
  # If not, discover all FastAPI apps and try to match
145
170
  fastapi_apps = ServerDiscoveryUtil.discover_fastapi_instances()
146
-
171
+
147
172
  # For immediate uvicorn servers, look for apps with specific titles
148
173
  for app_id, app_info in fastapi_apps.items():
149
- app_title = app_info.get('title', '')
150
- if 'MCP Mesh Agent' in app_title and 'Starting' in app_title:
174
+ app_title = app_info.get("title", "")
175
+ if "MCP Mesh Agent" in app_title and "Starting" in app_title:
151
176
  # This looks like our immediate uvicorn app
152
- self.logger.debug(f"🔍 DISCOVERY: Found immediate uvicorn FastAPI app: {app_title}")
177
+ self.logger.debug(
178
+ f"🔍 DISCOVERY: Found immediate uvicorn FastAPI app: {app_title}"
179
+ )
153
180
  return app_info
154
-
181
+
155
182
  # If no immediate uvicorn app found, return the first available app
156
183
  if fastapi_apps:
157
184
  first_app = next(iter(fastapi_apps.values()))
158
- self.logger.debug(f"🔍 DISCOVERY: Using first available FastAPI app: {first_app.get('title', 'Unknown')}")
185
+ self.logger.debug(
186
+ f"🔍 DISCOVERY: Using first available FastAPI app: {first_app.get('title', 'Unknown')}"
187
+ )
159
188
  return first_app
160
-
189
+
161
190
  except Exception as e:
162
191
  self.logger.warning(f"Error finding associated FastAPI app: {e}")
163
-
164
- return None
192
+
193
+ return None
@@ -228,7 +228,7 @@ class DebounceCoordinator:
228
228
  fastapi_app, binding_config
229
229
  )
230
230
  elif server_status == "running":
231
- self.logger.info(
231
+ self.logger.debug(
232
232
  "🔄 RUNNING SERVER: Server already running with proper lifecycle, pipeline skipping uvicorn.run()"
233
233
  )
234
234
  self.logger.info(
@@ -493,7 +493,7 @@ class MeshOrchestrator:
493
493
 
494
494
  This replaces the background polling with explicit execution.
495
495
  """
496
- self.logger.info(f"🚀 Starting single pipeline execution: {self.name}")
496
+ self.logger.debug(f"🚀 Starting single pipeline execution: {self.name}")
497
497
 
498
498
  result = await self.pipeline.execute()
499
499
 
@@ -49,9 +49,9 @@ class StartupPipeline(MeshPipeline):
49
49
  steps = [
50
50
  DecoratorCollectionStep(),
51
51
  ConfigurationStep(),
52
- HeartbeatPreparationStep(), # Prepare heartbeat payload structure
52
+ FastMCPServerDiscoveryStep(), # Discover user's FastMCP instances (MOVED UP for Phase 2)
53
+ HeartbeatPreparationStep(), # Prepare heartbeat payload structure (can now access FastMCP schemas)
53
54
  ServerDiscoveryStep(), # Discover existing uvicorn servers from immediate startup
54
- FastMCPServerDiscoveryStep(), # Discover user's FastMCP instances
55
55
  HeartbeatLoopStep(), # Setup background heartbeat config (handles no registry gracefully)
56
56
  FastAPIServerSetupStep(), # Setup FastAPI app with background heartbeat
57
57
  # Note: Registry connection is handled in heartbeat pipeline for retry behavior
@@ -0,0 +1,246 @@
1
+ """
2
+ Health check caching with TTL support.
3
+
4
+ Provides a TTL-based cache for health check results to avoid expensive
5
+ health check operations on every heartbeat and /health endpoint call.
6
+ """
7
+
8
+ import logging
9
+ import time
10
+ from collections.abc import Awaitable, Callable
11
+ from datetime import UTC, datetime
12
+ from typing import Any, Optional
13
+
14
+ from .support_types import HealthStatus, HealthStatusType
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Global cache instance for health status
19
+ # Stores tuples of (health_status, expiry_timestamp) for per-key TTL support
20
+ # Format: {"health:agent_id": (HealthStatus, expiry_timestamp)}
21
+ _health_cache: dict[str, tuple[HealthStatus, float]] = {}
22
+ _max_cache_size = 100
23
+
24
+
25
+ async def get_health_status_with_cache(
26
+ agent_id: str,
27
+ health_check_fn: Optional[Callable[[], Awaitable[Any]]],
28
+ agent_config: dict[str, Any],
29
+ startup_context: dict[str, Any],
30
+ ttl: int = 15,
31
+ ) -> HealthStatus:
32
+ """
33
+ Get health status with TTL caching.
34
+
35
+ This function synchronously returns from cache if available, otherwise
36
+ calls the user's health check function and caches the result.
37
+
38
+ User health check can return:
39
+ - bool: True = HEALTHY, False = UNHEALTHY
40
+ - dict: {"status": "healthy/degraded/unhealthy", "checks": {...}, "errors": [...]}
41
+ - HealthStatus: Full object (fields will be overridden with correct values)
42
+
43
+ Args:
44
+ agent_id: Unique identifier for the agent
45
+ health_check_fn: Optional async function that returns bool, dict, or HealthStatus
46
+ agent_config: Agent configuration dict for building default health status
47
+ startup_context: Full startup context with capabilities
48
+ ttl: Cache TTL in seconds (default: 15)
49
+
50
+ Returns:
51
+ HealthStatus: Current health status (from cache or fresh check)
52
+
53
+ Note:
54
+ - Cache key is based on agent_id
55
+ - If health_check_fn is None, returns default HEALTHY status
56
+ - If health_check_fn raises an exception, returns DEGRADED status
57
+ - TTL is enforced per-key with manual expiry tracking
58
+ """
59
+ cache_key = f"health:{agent_id}"
60
+ current_time = time.time()
61
+
62
+ # Try to get from cache and check if expired
63
+ if cache_key in _health_cache:
64
+ cached_status, expiry_time = _health_cache[cache_key]
65
+ if current_time < expiry_time:
66
+ logger.debug(f"✅ Health check cache HIT for agent '{agent_id}'")
67
+ return cached_status
68
+ else:
69
+ # Cache entry expired, remove it
70
+ logger.debug(
71
+ f"⏰ Health check cache EXPIRED for agent '{agent_id}' (TTL exceeded)"
72
+ )
73
+ del _health_cache[cache_key]
74
+
75
+ logger.debug(f"❌ Health check cache MISS for agent '{agent_id}'")
76
+
77
+ # Cache miss - call user's health check if provided
78
+ if health_check_fn:
79
+ try:
80
+ logger.debug(
81
+ f"🔍 Executing health check function for agent '{agent_id}'..."
82
+ )
83
+ user_result = await health_check_fn()
84
+
85
+ # Parse user result into status, checks, and errors
86
+ status_type = HealthStatusType.HEALTHY
87
+ checks = {}
88
+ errors = []
89
+
90
+ if isinstance(user_result, bool):
91
+ # Simple boolean: True = HEALTHY, False = UNHEALTHY
92
+ status_type = (
93
+ HealthStatusType.HEALTHY
94
+ if user_result
95
+ else HealthStatusType.UNHEALTHY
96
+ )
97
+ checks["health_check"] = user_result
98
+ if not user_result:
99
+ errors.append("Health check returned False")
100
+
101
+ elif isinstance(user_result, dict):
102
+ # Dictionary with status, checks, errors
103
+ status_str = user_result.get("status", "healthy").lower()
104
+ if status_str == "healthy":
105
+ status_type = HealthStatusType.HEALTHY
106
+ elif status_str == "degraded":
107
+ status_type = HealthStatusType.DEGRADED
108
+ elif status_str == "unhealthy":
109
+ status_type = HealthStatusType.UNHEALTHY
110
+ else:
111
+ status_type = HealthStatusType.UNKNOWN
112
+
113
+ checks = user_result.get("checks", {})
114
+ errors = user_result.get("errors", [])
115
+
116
+ elif isinstance(user_result, HealthStatus):
117
+ # Full HealthStatus object - extract status, checks, errors
118
+ status_type = user_result.status
119
+ checks = user_result.checks
120
+ errors = user_result.errors
121
+
122
+ else:
123
+ logger.warning(
124
+ f"⚠️ Health check for '{agent_id}' returned unexpected type {type(user_result)}, treating as unhealthy"
125
+ )
126
+ status_type = HealthStatusType.UNHEALTHY
127
+ checks = {"health_check_return_type": False}
128
+ errors = [f"Invalid return type: {type(user_result)}"]
129
+
130
+ # Build complete HealthStatus with resolved values
131
+ # Get capabilities from startup_context (from registered tools)
132
+ capabilities = startup_context.get("capabilities", [])
133
+ if not capabilities:
134
+ # Fallback: try to get from agent_config
135
+ capabilities = agent_config.get("capabilities", [])
136
+ if not capabilities:
137
+ # Last resort: use a default to satisfy validation
138
+ capabilities = ["default"]
139
+
140
+ health_status = HealthStatus(
141
+ agent_name=agent_id,
142
+ status=status_type,
143
+ capabilities=capabilities,
144
+ checks=checks,
145
+ errors=errors,
146
+ timestamp=datetime.now(UTC),
147
+ version=agent_config.get("version", "1.0.0"),
148
+ metadata=agent_config,
149
+ uptime_seconds=0,
150
+ )
151
+
152
+ logger.info(
153
+ f"💚 Health check function executed successfully for '{agent_id}': {health_status.status.value}"
154
+ )
155
+
156
+ except Exception as e:
157
+ # Health check function failed - return DEGRADED
158
+ logger.warning(
159
+ f"⚠️ Health check function failed for agent '{agent_id}': {e}"
160
+ )
161
+
162
+ # Get capabilities from startup_context
163
+ capabilities = startup_context.get("capabilities", [])
164
+ if not capabilities:
165
+ capabilities = agent_config.get("capabilities", ["default"])
166
+
167
+ health_status = HealthStatus(
168
+ agent_name=agent_id,
169
+ status=HealthStatusType.DEGRADED,
170
+ capabilities=capabilities,
171
+ checks={"health_check_execution": False},
172
+ errors=[f"Health check failed: {str(e)}"],
173
+ timestamp=datetime.now(UTC),
174
+ version=agent_config.get("version", "1.0.0"),
175
+ metadata=agent_config,
176
+ uptime_seconds=0,
177
+ )
178
+ else:
179
+ # No health check provided - default to HEALTHY
180
+ logger.debug(
181
+ f"ℹ️ No health check function provided for '{agent_id}', using default HEALTHY status"
182
+ )
183
+
184
+ # Get capabilities from startup_context
185
+ capabilities = startup_context.get("capabilities", [])
186
+ if not capabilities:
187
+ capabilities = agent_config.get("capabilities", ["default"])
188
+
189
+ health_status = HealthStatus(
190
+ agent_name=agent_id,
191
+ status=HealthStatusType.HEALTHY,
192
+ capabilities=capabilities,
193
+ timestamp=datetime.now(UTC),
194
+ version=agent_config.get("version", "1.0.0"),
195
+ metadata=agent_config,
196
+ uptime_seconds=0,
197
+ )
198
+
199
+ # Store in cache with TTL (manual expiry tracking)
200
+ expiry_time = current_time + ttl
201
+ _health_cache[cache_key] = (health_status, expiry_time)
202
+ logger.debug(f"💾 Cached health status for '{agent_id}' with TTL={ttl}s")
203
+
204
+ # Enforce max cache size by removing oldest entry if needed
205
+ if len(_health_cache) > _max_cache_size:
206
+ # Remove the entry with earliest expiry time
207
+ oldest_key = min(_health_cache.keys(), key=lambda k: _health_cache[k][1])
208
+ del _health_cache[oldest_key]
209
+ logger.debug("🗑️ Evicted oldest cache entry to maintain max size")
210
+
211
+ return health_status
212
+
213
+
214
+ def clear_health_cache(agent_id: Optional[str] = None) -> None:
215
+ """
216
+ Clear health cache for a specific agent or all agents.
217
+
218
+ Args:
219
+ agent_id: Optional agent ID to clear. If None, clears entire cache.
220
+
221
+ Note:
222
+ This is useful for testing or forcing a fresh health check.
223
+ """
224
+ if agent_id:
225
+ cache_key = f"health:{agent_id}"
226
+ if cache_key in _health_cache:
227
+ del _health_cache[cache_key]
228
+ logger.debug(f"🗑️ Cleared health cache for agent '{agent_id}'")
229
+ else:
230
+ _health_cache.clear()
231
+ logger.debug("🗑️ Cleared entire health cache")
232
+
233
+
234
+ def get_cache_stats() -> dict[str, Any]:
235
+ """
236
+ Get cache statistics for monitoring and debugging.
237
+
238
+ Returns:
239
+ dict: Cache statistics including size, maxsize, and current keys
240
+ """
241
+ return {
242
+ "size": len(_health_cache),
243
+ "maxsize": _max_cache_size,
244
+ "ttl": 15, # Default TTL (for backward compatibility)
245
+ "cached_agents": [key.replace("health:", "") for key in _health_cache.keys()],
246
+ }