mcp-mesh 0.7.12__py3-none-any.whl → 0.7.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. _mcp_mesh/__init__.py +1 -1
  2. _mcp_mesh/engine/__init__.py +1 -22
  3. _mcp_mesh/engine/async_mcp_client.py +88 -25
  4. _mcp_mesh/engine/decorator_registry.py +10 -9
  5. _mcp_mesh/engine/dependency_injector.py +64 -53
  6. _mcp_mesh/engine/mesh_llm_agent.py +119 -5
  7. _mcp_mesh/engine/mesh_llm_agent_injector.py +30 -0
  8. _mcp_mesh/engine/session_aware_client.py +3 -3
  9. _mcp_mesh/engine/unified_mcp_proxy.py +82 -90
  10. _mcp_mesh/pipeline/api_heartbeat/api_dependency_resolution.py +0 -89
  11. _mcp_mesh/pipeline/api_heartbeat/api_fast_heartbeat_check.py +3 -3
  12. _mcp_mesh/pipeline/api_heartbeat/api_heartbeat_pipeline.py +30 -28
  13. _mcp_mesh/pipeline/mcp_heartbeat/dependency_resolution.py +16 -18
  14. _mcp_mesh/pipeline/mcp_heartbeat/fast_heartbeat_check.py +5 -5
  15. _mcp_mesh/pipeline/mcp_heartbeat/heartbeat_orchestrator.py +3 -3
  16. _mcp_mesh/pipeline/mcp_heartbeat/heartbeat_pipeline.py +6 -6
  17. _mcp_mesh/pipeline/mcp_heartbeat/heartbeat_send.py +1 -1
  18. _mcp_mesh/pipeline/mcp_heartbeat/llm_tools_resolution.py +15 -11
  19. _mcp_mesh/pipeline/mcp_heartbeat/registry_connection.py +3 -3
  20. _mcp_mesh/pipeline/mcp_startup/fastapiserver_setup.py +37 -268
  21. _mcp_mesh/pipeline/mcp_startup/lifespan_factory.py +142 -0
  22. _mcp_mesh/pipeline/mcp_startup/startup_orchestrator.py +57 -93
  23. _mcp_mesh/pipeline/shared/registry_connection.py +1 -1
  24. _mcp_mesh/shared/health_check_manager.py +313 -0
  25. _mcp_mesh/shared/logging_config.py +190 -7
  26. _mcp_mesh/shared/registry_client_wrapper.py +8 -8
  27. _mcp_mesh/shared/sse_parser.py +19 -17
  28. _mcp_mesh/tracing/execution_tracer.py +26 -1
  29. _mcp_mesh/tracing/fastapi_tracing_middleware.py +3 -4
  30. _mcp_mesh/tracing/trace_context_helper.py +25 -6
  31. {mcp_mesh-0.7.12.dist-info → mcp_mesh-0.7.14.dist-info}/METADATA +1 -1
  32. {mcp_mesh-0.7.12.dist-info → mcp_mesh-0.7.14.dist-info}/RECORD +38 -39
  33. mesh/__init__.py +3 -1
  34. mesh/decorators.py +81 -43
  35. mesh/helpers.py +72 -4
  36. mesh/types.py +48 -4
  37. _mcp_mesh/engine/full_mcp_proxy.py +0 -641
  38. _mcp_mesh/engine/mcp_client_proxy.py +0 -457
  39. _mcp_mesh/shared/health_check_cache.py +0 -246
  40. {mcp_mesh-0.7.12.dist-info → mcp_mesh-0.7.14.dist-info}/WHEEL +0 -0
  41. {mcp_mesh-0.7.12.dist-info → mcp_mesh-0.7.14.dist-info}/licenses/LICENSE +0 -0
@@ -1,457 +0,0 @@
1
- """MCP Client Proxy using HTTP JSON-RPC for MCP protocol compliance."""
2
-
3
- import asyncio
4
- import json
5
- import logging
6
- import os
7
- import urllib.error
8
- import urllib.request
9
- import uuid
10
- from typing import Any, Optional
11
-
12
- from ..shared.content_extractor import ContentExtractor
13
- from ..shared.sse_parser import SSEParser
14
- from .async_mcp_client import AsyncMCPClient
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
-
19
- class MCPClientProxy:
20
- """Synchronous MCP client proxy for dependency injection.
21
-
22
- Replaces SyncHttpClient with official MCP SDK integration while
23
- maintaining the same callable interface for dependency injection.
24
-
25
- NO CONNECTION POOLING - Creates new connection per request for K8s load balancing.
26
- """
27
-
28
- def __init__(
29
- self, endpoint: str, function_name: str, kwargs_config: Optional[dict] = None
30
- ):
31
- """Initialize MCP client proxy.
32
-
33
- Args:
34
- endpoint: Base URL of the remote MCP service
35
- function_name: Specific tool function to call
36
- kwargs_config: Optional kwargs configuration from @mesh.tool decorator
37
- """
38
- self.endpoint = endpoint.rstrip("/")
39
- self.function_name = function_name
40
- self.kwargs_config = kwargs_config or {}
41
- self.logger = logger.getChild(f"proxy.{function_name}")
42
-
43
- # Log kwargs configuration if provided
44
- if self.kwargs_config:
45
- self.logger.debug(
46
- f"🔧 MCPClientProxy initialized with kwargs: {self.kwargs_config}"
47
- )
48
-
49
- def _run_async(self, coro):
50
- """Convert async coroutine to sync call."""
51
-
52
- try:
53
- # Try to get existing event loop
54
- loop = asyncio.get_event_loop()
55
- if loop.is_running():
56
- # We're in an async context, need to run in thread
57
- import concurrent.futures
58
-
59
- with concurrent.futures.ThreadPoolExecutor() as executor:
60
- future = executor.submit(asyncio.run, coro)
61
- return future.result()
62
- else:
63
- # No running loop, safe to use loop.run_until_complete
64
- return loop.run_until_complete(coro)
65
- except RuntimeError:
66
- # No event loop exists, create new one
67
- return asyncio.run(coro)
68
-
69
- def __call__(self, **kwargs) -> Any:
70
- """Callable interface for dependency injection.
71
-
72
- Makes HTTP MCP calls to remote services. This proxy is only used
73
- for cross-service dependencies - self-dependencies use SelfDependencyProxy.
74
- """
75
- self.logger.debug(f"🔌 MCP call to '{self.function_name}' with args: {kwargs}")
76
-
77
- try:
78
- result = self._sync_call(**kwargs)
79
- self.logger.debug(f"✅ MCP call to '{self.function_name}' succeeded")
80
- return result
81
- except Exception as e:
82
- self.logger.error(f"❌ MCP call to '{self.function_name}' failed: {e}")
83
- raise
84
-
85
- def _sync_call(self, **kwargs) -> Any:
86
- """Make synchronous MCP tool call to remote service."""
87
- try:
88
- # Prepare JSON-RPC payload
89
- payload = {
90
- "jsonrpc": "2.0",
91
- "id": 1,
92
- "method": "tools/call",
93
- "params": {"name": self.function_name, "arguments": kwargs},
94
- }
95
-
96
- url = f"{self.endpoint}/mcp" # Remove trailing slash to avoid 307 redirect
97
- data = json.dumps(payload).encode("utf-8")
98
-
99
- # Build headers with trace context injection
100
- headers = {
101
- "Content-Type": "application/json",
102
- "Accept": "application/json, text/event-stream", # FastMCP requires both
103
- }
104
-
105
- # Inject trace headers for distributed tracing
106
- from ..tracing.trace_context_helper import TraceContextHelper
107
-
108
- TraceContextHelper.inject_trace_headers_to_request(
109
- headers, url, self.logger
110
- )
111
-
112
- req = urllib.request.Request(url, data=data, headers=headers)
113
-
114
- with urllib.request.urlopen(req, timeout=30.0) as response:
115
- response_data = response.read().decode("utf-8")
116
-
117
- # Use shared SSE parser
118
- data = SSEParser.parse_sse_response(
119
- response_data, f"MCPClientProxy.{self.function_name}"
120
- )
121
-
122
- # Check for JSON-RPC error
123
- if "error" in data:
124
- error = data["error"]
125
- error_msg = error.get("message", "Unknown error")
126
- raise RuntimeError(f"Tool call error: {error_msg}")
127
-
128
- # Return the result
129
- if "result" in data:
130
- result = data["result"]
131
- return ContentExtractor.extract_content(result)
132
- return None
133
-
134
- except Exception as e:
135
- self.logger.error(f"Failed to call {self.function_name}: {e}")
136
- raise RuntimeError(f"Error calling {self.function_name}: {e}")
137
-
138
- async def _async_call(self, **kwargs) -> Any:
139
- """Make async MCP tool call with fresh connection."""
140
- client = None
141
- try:
142
- # Create new client for each request (K8s load balancing)
143
- client = AsyncMCPClient(self.endpoint)
144
- result = await client.call_tool(self.function_name, kwargs)
145
- return ContentExtractor.extract_content(result)
146
- except Exception as e:
147
- self.logger.error(f"Failed to call {self.function_name}: {e}")
148
- raise RuntimeError(f"Error calling {self.function_name}: {e}")
149
- finally:
150
- # Always clean up connection
151
- if client:
152
- await client.close()
153
-
154
-
155
- class EnhancedMCPClientProxy(MCPClientProxy):
156
- """Enhanced MCP client proxy with kwargs-based auto-configuration.
157
-
158
- Auto-configures based on kwargs from @mesh.tool decorator:
159
- - timeout: Request timeout in seconds
160
- - retry_count: Number of retries for failed requests
161
- - retry_delay: Base delay between retries (seconds)
162
- - retry_backoff: Backoff multiplier for retry delays
163
- - custom_headers: Dict of additional headers to send
164
- - auth_required: Whether authentication is required
165
- - accepts: List of accepted content types
166
- - content_type: Default content type for requests
167
- - max_response_size: Maximum allowed response size
168
- """
169
-
170
- def __init__(
171
- self, endpoint: str, function_name: str, kwargs_config: Optional[dict] = None
172
- ):
173
- """Initialize Enhanced MCP Client Proxy.
174
-
175
- Args:
176
- endpoint: Base URL of the remote MCP service
177
- function_name: Specific tool function to call
178
- kwargs_config: Optional kwargs configuration from @mesh.tool decorator
179
- """
180
- super().__init__(endpoint, function_name, kwargs_config)
181
-
182
- # Auto-configure from kwargs
183
- self._configure_from_kwargs()
184
-
185
- self.logger = logger.getChild(f"enhanced_proxy.{function_name}")
186
-
187
- def _configure_from_kwargs(self):
188
- """Auto-configure proxy settings from kwargs."""
189
- # Timeout configuration
190
- self.timeout = self.kwargs_config.get("timeout", 30)
191
-
192
- # Retry configuration
193
- self.retry_count = self.kwargs_config.get("retry_count", 1)
194
- self.max_retries = self.retry_count
195
- self.retry_delay = self.kwargs_config.get("retry_delay", 1.0)
196
- self.retry_backoff = self.kwargs_config.get("retry_backoff", 2.0)
197
-
198
- # Header configuration
199
- self.custom_headers = self.kwargs_config.get("custom_headers", {})
200
- self.auth_required = self.kwargs_config.get("auth_required", False)
201
-
202
- # Content type configuration
203
- self.accepted_content_types = self.kwargs_config.get(
204
- "accepts", ["application/json"]
205
- )
206
- self.default_content_type = self.kwargs_config.get(
207
- "content_type", "application/json"
208
- )
209
- self.max_response_size = self.kwargs_config.get(
210
- "max_response_size", 10 * 1024 * 1024
211
- ) # 10MB default
212
-
213
- # Streaming configuration
214
- self.streaming_capable = self.kwargs_config.get("streaming", False)
215
-
216
- self.logger.info(
217
- f"🔧 Enhanced proxy configured - timeout: {self.timeout}s, "
218
- f"retries: {self.retry_count}, streaming: {self.streaming_capable}"
219
- )
220
-
221
- def __call__(self, **kwargs) -> Any:
222
- """Enhanced callable interface with retry logic and custom configuration."""
223
- self.logger.debug(
224
- f"🔌 Enhanced MCP call to '{self.function_name}' with args: {kwargs}"
225
- )
226
-
227
- try:
228
- result = self._sync_call_with_retries(**kwargs)
229
- self.logger.debug(
230
- f"✅ Enhanced MCP call to '{self.function_name}' succeeded"
231
- )
232
- return result
233
- except Exception as e:
234
- self.logger.error(
235
- f"❌ Enhanced MCP call to '{self.function_name}' failed: {e}"
236
- )
237
- raise
238
-
239
- def _sync_call_with_retries(self, **kwargs) -> Any:
240
- """Make synchronous MCP request with automatic retry logic."""
241
- last_exception = None
242
-
243
- for attempt in range(self.max_retries + 1):
244
- try:
245
- return self._enhanced_sync_call(**kwargs)
246
-
247
- except Exception as e:
248
- last_exception = e
249
-
250
- if attempt < self.max_retries:
251
- # Calculate retry delay with backoff
252
- delay = self.retry_delay * (self.retry_backoff**attempt)
253
-
254
- self.logger.warning(
255
- f"🔄 Request failed (attempt {attempt + 1}/{self.max_retries + 1}), "
256
- f"retrying in {delay:.1f}s: {str(e)}"
257
- )
258
-
259
- import time
260
-
261
- time.sleep(delay)
262
- else:
263
- self.logger.error(
264
- f"❌ All {self.max_retries + 1} attempts failed for {self.function_name}"
265
- )
266
-
267
- raise last_exception
268
-
269
- def _enhanced_sync_call(self, **kwargs) -> Any:
270
- """Make enhanced synchronous MCP request with custom headers and configuration."""
271
- try:
272
- # Prepare JSON-RPC payload
273
- payload = {
274
- "jsonrpc": "2.0",
275
- "id": str(uuid.uuid4()),
276
- "method": "tools/call",
277
- "params": {"name": self.function_name, "arguments": kwargs},
278
- }
279
-
280
- url = f"{self.endpoint}/mcp/"
281
- data = json.dumps(payload).encode("utf-8")
282
-
283
- # Build headers with custom configuration
284
- headers = {
285
- "Content-Type": self.default_content_type,
286
- "Accept": ", ".join(self.accepted_content_types),
287
- }
288
-
289
- # Add custom headers
290
- headers.update(self.custom_headers)
291
-
292
- # Inject trace headers for distributed tracing
293
- from ..tracing.trace_context_helper import TraceContextHelper
294
-
295
- TraceContextHelper.inject_trace_headers_to_request(
296
- headers, url, self.logger
297
- )
298
-
299
- # Add authentication headers if required
300
- if self.auth_required:
301
- auth_token = os.getenv("MCP_MESH_AUTH_TOKEN")
302
- if auth_token:
303
- headers["Authorization"] = f"Bearer {auth_token}"
304
- else:
305
- self.logger.warning(
306
- "⚠️ Authentication required but no token available"
307
- )
308
-
309
- req = urllib.request.Request(url, data=data, headers=headers)
310
-
311
- with urllib.request.urlopen(req, timeout=self.timeout) as response:
312
- # Check response size
313
- content_length = response.headers.get("content-length")
314
- if content_length and int(content_length) > self.max_response_size:
315
- raise ValueError(
316
- f"Response too large: {content_length} bytes > {self.max_response_size}"
317
- )
318
-
319
- response_data = response.read().decode("utf-8")
320
-
321
- # Handle Server-Sent Events format from FastMCP
322
- if response_data.startswith("event:"):
323
- # Parse SSE format: extract JSON from "data:" lines
324
- json_data = None
325
- for line in response_data.split("\n"):
326
- if line.startswith("data:"):
327
- json_str = line[5:].strip() # Remove 'data:' prefix
328
- try:
329
- json_data = json.loads(json_str)
330
- break
331
- except json.JSONDecodeError:
332
- continue
333
-
334
- if json_data is None:
335
- raise RuntimeError("Could not parse SSE response from FastMCP")
336
- data = json_data
337
- else:
338
- # Plain JSON response
339
- data = json.loads(response_data)
340
-
341
- # Check for JSON-RPC error
342
- if "error" in data:
343
- error = data["error"]
344
- error_msg = error.get("message", "Unknown error")
345
- raise RuntimeError(f"Tool call error: {error_msg}")
346
-
347
- # Return the result
348
- if "result" in data:
349
- result = data["result"]
350
- return ContentExtractor.extract_content(result)
351
- return None
352
-
353
- except urllib.error.HTTPError as e:
354
- if e.code == 404:
355
- raise RuntimeError(f"MCP endpoint not found at {url}")
356
- raise RuntimeError(f"HTTP error {e.code}: {e.reason}")
357
- except urllib.error.URLError as e:
358
- raise RuntimeError(f"Connection error to {url}: {e.reason}")
359
- except Exception as e:
360
- self.logger.error(f"Enhanced sync call failed: {e}")
361
- raise RuntimeError(f"Error calling {self.function_name}: {e}")
362
-
363
- async def _enhanced_async_call(self, **kwargs) -> Any:
364
- """Make enhanced async MCP tool call with retry logic."""
365
- last_exception = None
366
-
367
- for attempt in range(self.max_retries + 1):
368
- try:
369
- return await self._make_enhanced_async_request(**kwargs)
370
-
371
- except Exception as e:
372
- last_exception = e
373
-
374
- if attempt < self.max_retries:
375
- # Calculate retry delay with backoff
376
- delay = self.retry_delay * (self.retry_backoff**attempt)
377
-
378
- self.logger.warning(
379
- f"🔄 Async request failed (attempt {attempt + 1}/{self.max_retries + 1}), "
380
- f"retrying in {delay:.1f}s: {str(e)}"
381
- )
382
-
383
- await asyncio.sleep(delay)
384
- else:
385
- self.logger.error(
386
- f"❌ All {self.max_retries + 1} async attempts failed for {self.function_name}"
387
- )
388
-
389
- raise last_exception
390
-
391
- async def _make_enhanced_async_request(self, **kwargs) -> Any:
392
- """Make enhanced async HTTP request with custom configuration."""
393
- try:
394
- # Try to use httpx for better async support
395
- import httpx
396
-
397
- payload = {
398
- "jsonrpc": "2.0",
399
- "id": str(uuid.uuid4()),
400
- "method": "tools/call",
401
- "params": {"name": self.function_name, "arguments": kwargs},
402
- }
403
-
404
- # Build headers with custom configuration
405
- headers = {
406
- "Content-Type": self.default_content_type,
407
- "Accept": ", ".join(self.accepted_content_types),
408
- }
409
-
410
- # Add custom headers
411
- headers.update(self.custom_headers)
412
-
413
- # Inject trace headers for distributed tracing
414
- from ..tracing.trace_context_helper import TraceContextHelper
415
-
416
- TraceContextHelper.inject_trace_headers_to_request(
417
- headers, url, self.logger
418
- )
419
-
420
- # Add authentication headers if required
421
- if self.auth_required:
422
- auth_token = os.getenv("MCP_MESH_AUTH_TOKEN")
423
- if auth_token:
424
- headers["Authorization"] = f"Bearer {auth_token}"
425
-
426
- url = f"{self.endpoint}/mcp/"
427
-
428
- async with httpx.AsyncClient(timeout=self.timeout) as client:
429
- response = await client.post(url, json=payload, headers=headers)
430
-
431
- # Check response size
432
- content_length = response.headers.get("content-length")
433
- if content_length and int(content_length) > self.max_response_size:
434
- raise ValueError(
435
- f"Response too large: {content_length} bytes > {self.max_response_size}"
436
- )
437
-
438
- response.raise_for_status()
439
- result = response.json()
440
-
441
- if "error" in result:
442
- raise Exception(f"MCP request failed: {result['error']}")
443
-
444
- # Apply existing content extraction
445
- return ContentExtractor.extract_content(result.get("result"))
446
-
447
- except ImportError:
448
- # Fallback to using AsyncMCPClient
449
- client = AsyncMCPClient(self.endpoint, timeout=self.timeout)
450
- try:
451
- result = await client.call_tool(self.function_name, kwargs)
452
- return ContentExtractor.extract_content(result)
453
- finally:
454
- await client.close()
455
- except Exception as e:
456
- self.logger.error(f"Enhanced async request failed: {e}")
457
- raise
@@ -1,246 +0,0 @@
1
- """
2
- Health check caching with TTL support.
3
-
4
- Provides a TTL-based cache for health check results to avoid expensive
5
- health check operations on every heartbeat and /health endpoint call.
6
- """
7
-
8
- import logging
9
- import time
10
- from collections.abc import Awaitable, Callable
11
- from datetime import UTC, datetime
12
- from typing import Any, Optional
13
-
14
- from .support_types import HealthStatus, HealthStatusType
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
- # Global cache instance for health status
19
- # Stores tuples of (health_status, expiry_timestamp) for per-key TTL support
20
- # Format: {"health:agent_id": (HealthStatus, expiry_timestamp)}
21
- _health_cache: dict[str, tuple[HealthStatus, float]] = {}
22
- _max_cache_size = 100
23
-
24
-
25
- async def get_health_status_with_cache(
26
- agent_id: str,
27
- health_check_fn: Optional[Callable[[], Awaitable[Any]]],
28
- agent_config: dict[str, Any],
29
- startup_context: dict[str, Any],
30
- ttl: int = 15,
31
- ) -> HealthStatus:
32
- """
33
- Get health status with TTL caching.
34
-
35
- This function synchronously returns from cache if available, otherwise
36
- calls the user's health check function and caches the result.
37
-
38
- User health check can return:
39
- - bool: True = HEALTHY, False = UNHEALTHY
40
- - dict: {"status": "healthy/degraded/unhealthy", "checks": {...}, "errors": [...]}
41
- - HealthStatus: Full object (fields will be overridden with correct values)
42
-
43
- Args:
44
- agent_id: Unique identifier for the agent
45
- health_check_fn: Optional async function that returns bool, dict, or HealthStatus
46
- agent_config: Agent configuration dict for building default health status
47
- startup_context: Full startup context with capabilities
48
- ttl: Cache TTL in seconds (default: 15)
49
-
50
- Returns:
51
- HealthStatus: Current health status (from cache or fresh check)
52
-
53
- Note:
54
- - Cache key is based on agent_id
55
- - If health_check_fn is None, returns default HEALTHY status
56
- - If health_check_fn raises an exception, returns DEGRADED status
57
- - TTL is enforced per-key with manual expiry tracking
58
- """
59
- cache_key = f"health:{agent_id}"
60
- current_time = time.time()
61
-
62
- # Try to get from cache and check if expired
63
- if cache_key in _health_cache:
64
- cached_status, expiry_time = _health_cache[cache_key]
65
- if current_time < expiry_time:
66
- logger.debug(f"✅ Health check cache HIT for agent '{agent_id}'")
67
- return cached_status
68
- else:
69
- # Cache entry expired, remove it
70
- logger.debug(
71
- f"⏰ Health check cache EXPIRED for agent '{agent_id}' (TTL exceeded)"
72
- )
73
- del _health_cache[cache_key]
74
-
75
- logger.debug(f"❌ Health check cache MISS for agent '{agent_id}'")
76
-
77
- # Cache miss - call user's health check if provided
78
- if health_check_fn:
79
- try:
80
- logger.debug(
81
- f"🔍 Executing health check function for agent '{agent_id}'..."
82
- )
83
- user_result = await health_check_fn()
84
-
85
- # Parse user result into status, checks, and errors
86
- status_type = HealthStatusType.HEALTHY
87
- checks = {}
88
- errors = []
89
-
90
- if isinstance(user_result, bool):
91
- # Simple boolean: True = HEALTHY, False = UNHEALTHY
92
- status_type = (
93
- HealthStatusType.HEALTHY
94
- if user_result
95
- else HealthStatusType.UNHEALTHY
96
- )
97
- checks["health_check"] = user_result
98
- if not user_result:
99
- errors.append("Health check returned False")
100
-
101
- elif isinstance(user_result, dict):
102
- # Dictionary with status, checks, errors
103
- status_str = user_result.get("status", "healthy").lower()
104
- if status_str == "healthy":
105
- status_type = HealthStatusType.HEALTHY
106
- elif status_str == "degraded":
107
- status_type = HealthStatusType.DEGRADED
108
- elif status_str == "unhealthy":
109
- status_type = HealthStatusType.UNHEALTHY
110
- else:
111
- status_type = HealthStatusType.UNKNOWN
112
-
113
- checks = user_result.get("checks", {})
114
- errors = user_result.get("errors", [])
115
-
116
- elif isinstance(user_result, HealthStatus):
117
- # Full HealthStatus object - extract status, checks, errors
118
- status_type = user_result.status
119
- checks = user_result.checks
120
- errors = user_result.errors
121
-
122
- else:
123
- logger.warning(
124
- f"⚠️ Health check for '{agent_id}' returned unexpected type {type(user_result)}, treating as unhealthy"
125
- )
126
- status_type = HealthStatusType.UNHEALTHY
127
- checks = {"health_check_return_type": False}
128
- errors = [f"Invalid return type: {type(user_result)}"]
129
-
130
- # Build complete HealthStatus with resolved values
131
- # Get capabilities from startup_context (from registered tools)
132
- capabilities = startup_context.get("capabilities", [])
133
- if not capabilities:
134
- # Fallback: try to get from agent_config
135
- capabilities = agent_config.get("capabilities", [])
136
- if not capabilities:
137
- # Last resort: use a default to satisfy validation
138
- capabilities = ["default"]
139
-
140
- health_status = HealthStatus(
141
- agent_name=agent_id,
142
- status=status_type,
143
- capabilities=capabilities,
144
- checks=checks,
145
- errors=errors,
146
- timestamp=datetime.now(UTC),
147
- version=agent_config.get("version", "1.0.0"),
148
- metadata=agent_config,
149
- uptime_seconds=0,
150
- )
151
-
152
- logger.info(
153
- f"💚 Health check function executed successfully for '{agent_id}': {health_status.status.value}"
154
- )
155
-
156
- except Exception as e:
157
- # Health check function failed - return DEGRADED
158
- logger.warning(
159
- f"⚠️ Health check function failed for agent '{agent_id}': {e}"
160
- )
161
-
162
- # Get capabilities from startup_context
163
- capabilities = startup_context.get("capabilities", [])
164
- if not capabilities:
165
- capabilities = agent_config.get("capabilities", ["default"])
166
-
167
- health_status = HealthStatus(
168
- agent_name=agent_id,
169
- status=HealthStatusType.DEGRADED,
170
- capabilities=capabilities,
171
- checks={"health_check_execution": False},
172
- errors=[f"Health check failed: {str(e)}"],
173
- timestamp=datetime.now(UTC),
174
- version=agent_config.get("version", "1.0.0"),
175
- metadata=agent_config,
176
- uptime_seconds=0,
177
- )
178
- else:
179
- # No health check provided - default to HEALTHY
180
- logger.debug(
181
- f"ℹ️ No health check function provided for '{agent_id}', using default HEALTHY status"
182
- )
183
-
184
- # Get capabilities from startup_context
185
- capabilities = startup_context.get("capabilities", [])
186
- if not capabilities:
187
- capabilities = agent_config.get("capabilities", ["default"])
188
-
189
- health_status = HealthStatus(
190
- agent_name=agent_id,
191
- status=HealthStatusType.HEALTHY,
192
- capabilities=capabilities,
193
- timestamp=datetime.now(UTC),
194
- version=agent_config.get("version", "1.0.0"),
195
- metadata=agent_config,
196
- uptime_seconds=0,
197
- )
198
-
199
- # Store in cache with TTL (manual expiry tracking)
200
- expiry_time = current_time + ttl
201
- _health_cache[cache_key] = (health_status, expiry_time)
202
- logger.debug(f"💾 Cached health status for '{agent_id}' with TTL={ttl}s")
203
-
204
- # Enforce max cache size by removing oldest entry if needed
205
- if len(_health_cache) > _max_cache_size:
206
- # Remove the entry with earliest expiry time
207
- oldest_key = min(_health_cache.keys(), key=lambda k: _health_cache[k][1])
208
- del _health_cache[oldest_key]
209
- logger.debug("🗑️ Evicted oldest cache entry to maintain max size")
210
-
211
- return health_status
212
-
213
-
214
- def clear_health_cache(agent_id: Optional[str] = None) -> None:
215
- """
216
- Clear health cache for a specific agent or all agents.
217
-
218
- Args:
219
- agent_id: Optional agent ID to clear. If None, clears entire cache.
220
-
221
- Note:
222
- This is useful for testing or forcing a fresh health check.
223
- """
224
- if agent_id:
225
- cache_key = f"health:{agent_id}"
226
- if cache_key in _health_cache:
227
- del _health_cache[cache_key]
228
- logger.debug(f"🗑️ Cleared health cache for agent '{agent_id}'")
229
- else:
230
- _health_cache.clear()
231
- logger.debug("🗑️ Cleared entire health cache")
232
-
233
-
234
- def get_cache_stats() -> dict[str, Any]:
235
- """
236
- Get cache statistics for monitoring and debugging.
237
-
238
- Returns:
239
- dict: Cache statistics including size, maxsize, and current keys
240
- """
241
- return {
242
- "size": len(_health_cache),
243
- "maxsize": _max_cache_size,
244
- "ttl": 15, # Default TTL (for backward compatibility)
245
- "cached_agents": [key.replace("health:", "") for key in _health_cache.keys()],
246
- }