mcp-hangar 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. mcp_hangar/__init__.py +139 -0
  2. mcp_hangar/application/__init__.py +1 -0
  3. mcp_hangar/application/commands/__init__.py +67 -0
  4. mcp_hangar/application/commands/auth_commands.py +118 -0
  5. mcp_hangar/application/commands/auth_handlers.py +296 -0
  6. mcp_hangar/application/commands/commands.py +59 -0
  7. mcp_hangar/application/commands/handlers.py +189 -0
  8. mcp_hangar/application/discovery/__init__.py +21 -0
  9. mcp_hangar/application/discovery/discovery_metrics.py +283 -0
  10. mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
  11. mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
  12. mcp_hangar/application/discovery/security_validator.py +414 -0
  13. mcp_hangar/application/event_handlers/__init__.py +50 -0
  14. mcp_hangar/application/event_handlers/alert_handler.py +191 -0
  15. mcp_hangar/application/event_handlers/audit_handler.py +203 -0
  16. mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
  17. mcp_hangar/application/event_handlers/logging_handler.py +69 -0
  18. mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
  19. mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
  20. mcp_hangar/application/event_handlers/security_handler.py +604 -0
  21. mcp_hangar/application/mcp/tooling.py +158 -0
  22. mcp_hangar/application/ports/__init__.py +9 -0
  23. mcp_hangar/application/ports/observability.py +237 -0
  24. mcp_hangar/application/queries/__init__.py +52 -0
  25. mcp_hangar/application/queries/auth_handlers.py +237 -0
  26. mcp_hangar/application/queries/auth_queries.py +118 -0
  27. mcp_hangar/application/queries/handlers.py +227 -0
  28. mcp_hangar/application/read_models/__init__.py +11 -0
  29. mcp_hangar/application/read_models/provider_views.py +139 -0
  30. mcp_hangar/application/sagas/__init__.py +11 -0
  31. mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
  32. mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
  33. mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
  34. mcp_hangar/application/services/__init__.py +9 -0
  35. mcp_hangar/application/services/provider_service.py +208 -0
  36. mcp_hangar/application/services/traced_provider_service.py +211 -0
  37. mcp_hangar/bootstrap/runtime.py +328 -0
  38. mcp_hangar/context.py +178 -0
  39. mcp_hangar/domain/__init__.py +117 -0
  40. mcp_hangar/domain/contracts/__init__.py +57 -0
  41. mcp_hangar/domain/contracts/authentication.py +225 -0
  42. mcp_hangar/domain/contracts/authorization.py +229 -0
  43. mcp_hangar/domain/contracts/event_store.py +178 -0
  44. mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
  45. mcp_hangar/domain/contracts/persistence.py +383 -0
  46. mcp_hangar/domain/contracts/provider_runtime.py +146 -0
  47. mcp_hangar/domain/discovery/__init__.py +20 -0
  48. mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
  49. mcp_hangar/domain/discovery/discovered_provider.py +185 -0
  50. mcp_hangar/domain/discovery/discovery_service.py +412 -0
  51. mcp_hangar/domain/discovery/discovery_source.py +192 -0
  52. mcp_hangar/domain/events.py +433 -0
  53. mcp_hangar/domain/exceptions.py +525 -0
  54. mcp_hangar/domain/model/__init__.py +70 -0
  55. mcp_hangar/domain/model/aggregate.py +58 -0
  56. mcp_hangar/domain/model/circuit_breaker.py +152 -0
  57. mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
  58. mcp_hangar/domain/model/event_sourced_provider.py +423 -0
  59. mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
  60. mcp_hangar/domain/model/health_tracker.py +183 -0
  61. mcp_hangar/domain/model/load_balancer.py +185 -0
  62. mcp_hangar/domain/model/provider.py +810 -0
  63. mcp_hangar/domain/model/provider_group.py +656 -0
  64. mcp_hangar/domain/model/tool_catalog.py +105 -0
  65. mcp_hangar/domain/policies/__init__.py +19 -0
  66. mcp_hangar/domain/policies/provider_health.py +187 -0
  67. mcp_hangar/domain/repository.py +249 -0
  68. mcp_hangar/domain/security/__init__.py +85 -0
  69. mcp_hangar/domain/security/input_validator.py +710 -0
  70. mcp_hangar/domain/security/rate_limiter.py +387 -0
  71. mcp_hangar/domain/security/roles.py +237 -0
  72. mcp_hangar/domain/security/sanitizer.py +387 -0
  73. mcp_hangar/domain/security/secrets.py +501 -0
  74. mcp_hangar/domain/services/__init__.py +20 -0
  75. mcp_hangar/domain/services/audit_service.py +376 -0
  76. mcp_hangar/domain/services/image_builder.py +328 -0
  77. mcp_hangar/domain/services/provider_launcher.py +1046 -0
  78. mcp_hangar/domain/value_objects.py +1138 -0
  79. mcp_hangar/errors.py +818 -0
  80. mcp_hangar/fastmcp_server.py +1105 -0
  81. mcp_hangar/gc.py +134 -0
  82. mcp_hangar/infrastructure/__init__.py +79 -0
  83. mcp_hangar/infrastructure/async_executor.py +133 -0
  84. mcp_hangar/infrastructure/auth/__init__.py +37 -0
  85. mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
  86. mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
  87. mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
  88. mcp_hangar/infrastructure/auth/middleware.py +340 -0
  89. mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
  90. mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
  91. mcp_hangar/infrastructure/auth/projections.py +366 -0
  92. mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
  93. mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
  94. mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
  95. mcp_hangar/infrastructure/command_bus.py +112 -0
  96. mcp_hangar/infrastructure/discovery/__init__.py +110 -0
  97. mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
  98. mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
  99. mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
  100. mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
  101. mcp_hangar/infrastructure/event_bus.py +260 -0
  102. mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
  103. mcp_hangar/infrastructure/event_store.py +396 -0
  104. mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
  105. mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
  106. mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
  107. mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
  108. mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
  109. mcp_hangar/infrastructure/metrics_publisher.py +36 -0
  110. mcp_hangar/infrastructure/observability/__init__.py +10 -0
  111. mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
  112. mcp_hangar/infrastructure/persistence/__init__.py +33 -0
  113. mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
  114. mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
  115. mcp_hangar/infrastructure/persistence/database.py +333 -0
  116. mcp_hangar/infrastructure/persistence/database_common.py +330 -0
  117. mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
  118. mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
  119. mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
  120. mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
  121. mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
  122. mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
  123. mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
  124. mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
  125. mcp_hangar/infrastructure/query_bus.py +153 -0
  126. mcp_hangar/infrastructure/saga_manager.py +401 -0
  127. mcp_hangar/logging_config.py +209 -0
  128. mcp_hangar/metrics.py +1007 -0
  129. mcp_hangar/models.py +31 -0
  130. mcp_hangar/observability/__init__.py +54 -0
  131. mcp_hangar/observability/health.py +487 -0
  132. mcp_hangar/observability/metrics.py +319 -0
  133. mcp_hangar/observability/tracing.py +433 -0
  134. mcp_hangar/progress.py +542 -0
  135. mcp_hangar/retry.py +613 -0
  136. mcp_hangar/server/__init__.py +120 -0
  137. mcp_hangar/server/__main__.py +6 -0
  138. mcp_hangar/server/auth_bootstrap.py +340 -0
  139. mcp_hangar/server/auth_cli.py +335 -0
  140. mcp_hangar/server/auth_config.py +305 -0
  141. mcp_hangar/server/bootstrap.py +735 -0
  142. mcp_hangar/server/cli.py +161 -0
  143. mcp_hangar/server/config.py +224 -0
  144. mcp_hangar/server/context.py +215 -0
  145. mcp_hangar/server/http_auth_middleware.py +165 -0
  146. mcp_hangar/server/lifecycle.py +467 -0
  147. mcp_hangar/server/state.py +117 -0
  148. mcp_hangar/server/tools/__init__.py +16 -0
  149. mcp_hangar/server/tools/discovery.py +186 -0
  150. mcp_hangar/server/tools/groups.py +75 -0
  151. mcp_hangar/server/tools/health.py +301 -0
  152. mcp_hangar/server/tools/provider.py +939 -0
  153. mcp_hangar/server/tools/registry.py +320 -0
  154. mcp_hangar/server/validation.py +113 -0
  155. mcp_hangar/stdio_client.py +229 -0
  156. mcp_hangar-0.2.0.dist-info/METADATA +347 -0
  157. mcp_hangar-0.2.0.dist-info/RECORD +160 -0
  158. mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
  159. mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
  160. mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
mcp_hangar/models.py ADDED
@@ -0,0 +1,31 @@
1
+ """Core data models for the MCP registry with explicit state management.
2
+
3
+ This module provides backward compatibility imports for legacy code.
4
+ New code should import directly from the domain layer.
5
+
6
+ Deprecated imports (use domain layer instead):
7
+ - ProviderState -> from mcp_hangar.domain.value_objects import ProviderState
8
+ - MCPError, ProviderStartError, etc. -> from mcp_hangar.domain.exceptions import ...
9
+ - ToolSchema -> from mcp_hangar.domain.model import ToolSchema
10
+ """
11
+
12
+ # Re-export all exceptions from the canonical location for backward compatibility
13
+ from .domain.exceptions import (
14
+ ClientError,
15
+ MCPError,
16
+ ProviderDegradedError,
17
+ ProviderError,
18
+ ProviderNotFoundError,
19
+ ProviderNotReadyError,
20
+ ProviderStartError,
21
+ ToolError,
22
+ ToolInvocationError,
23
+ ToolNotFoundError,
24
+ ValidationError,
25
+ )
26
+
27
+ # Re-export ToolSchema from the canonical location
28
+ from .domain.model import ToolSchema
29
+
30
+ # Re-export ProviderState from the canonical location
31
+ from .domain.value_objects import ProviderState
@@ -0,0 +1,54 @@
1
+ """Observability module for MCP Hangar.
2
+
3
+ Provides unified observability stack:
4
+ - OpenTelemetry tracing
5
+ - Extended metrics
6
+ - Health endpoints
7
+ - Log correlation
8
+
9
+ Usage:
10
+ from mcp_hangar.observability import init_tracing, get_tracer
11
+
12
+ init_tracing(service_name="mcp-hangar")
13
+ tracer = get_tracer(__name__)
14
+
15
+ with tracer.start_as_current_span("operation") as span:
16
+ span.set_attribute("provider.id", provider_id)
17
+ # ... do work
18
+ """
19
+
20
+ from mcp_hangar.observability.health import get_health_endpoint, HealthCheck, HealthEndpoint, HealthStatus
21
+ from mcp_hangar.observability.metrics import CircuitState, get_observability_metrics, ObservabilityMetrics
22
+ from mcp_hangar.observability.tracing import (
23
+ extract_trace_context,
24
+ get_current_span_id,
25
+ get_current_trace_id,
26
+ get_tracer,
27
+ init_tracing,
28
+ inject_trace_context,
29
+ shutdown_tracing,
30
+ trace_span,
31
+ trace_tool_invocation,
32
+ )
33
+
34
+ __all__ = [
35
+ # Tracing
36
+ "init_tracing",
37
+ "shutdown_tracing",
38
+ "get_tracer",
39
+ "trace_tool_invocation",
40
+ "trace_span",
41
+ "inject_trace_context",
42
+ "extract_trace_context",
43
+ "get_current_trace_id",
44
+ "get_current_span_id",
45
+ # Metrics
46
+ "ObservabilityMetrics",
47
+ "get_observability_metrics",
48
+ "CircuitState",
49
+ # Health
50
+ "HealthStatus",
51
+ "HealthCheck",
52
+ "HealthEndpoint",
53
+ "get_health_endpoint",
54
+ ]
@@ -0,0 +1,487 @@
1
+ """Health check endpoints and status for MCP Hangar.
2
+
3
+ Provides Kubernetes-compatible health endpoints:
4
+ - /health/live - Liveness probe (is the process alive?)
5
+ - /health/ready - Readiness probe (can it serve traffic?)
6
+ - /health/startup - Startup probe (has it finished initializing?)
7
+
8
+ Also provides detailed health status for dashboards.
9
+ """
10
+
11
+ import asyncio
12
+ from dataclasses import dataclass, field
13
+ from enum import Enum
14
+ import threading
15
+ import time
16
+ from typing import Any, Callable, Dict, List, Optional
17
+
18
+ from mcp_hangar.logging_config import get_logger
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ class HealthStatus(Enum):
24
+ """Health check result status."""
25
+
26
+ HEALTHY = "healthy"
27
+ DEGRADED = "degraded"
28
+ UNHEALTHY = "unhealthy"
29
+ UNKNOWN = "unknown"
30
+
31
+
32
+ @dataclass
33
+ class HealthCheckResult:
34
+ """Result of a single health check."""
35
+
36
+ name: str
37
+ status: HealthStatus
38
+ message: str = ""
39
+ duration_ms: float = 0.0
40
+ details: Dict[str, Any] = field(default_factory=dict)
41
+ timestamp: float = field(default_factory=time.time)
42
+
43
+ def to_dict(self) -> Dict[str, Any]:
44
+ """Convert to dictionary for JSON serialization."""
45
+ return {
46
+ "name": self.name,
47
+ "status": self.status.value,
48
+ "message": self.message,
49
+ "duration_ms": round(self.duration_ms, 2),
50
+ "details": self.details,
51
+ "timestamp": self.timestamp,
52
+ }
53
+
54
+
55
+ @dataclass
56
+ class HealthCheck:
57
+ """Health check definition."""
58
+
59
+ name: str
60
+ check_fn: Callable[[], bool]
61
+ description: str = ""
62
+ timeout_seconds: float = 5.0
63
+ critical: bool = True # If False, failure degrades but doesn't make unhealthy
64
+
65
+ async def execute(self) -> HealthCheckResult:
66
+ """Execute the health check.
67
+
68
+ Returns:
69
+ HealthCheckResult with status and timing.
70
+ """
71
+ start = time.perf_counter()
72
+ try:
73
+ # Run check with timeout
74
+ if asyncio.iscoroutinefunction(self.check_fn):
75
+ result = await asyncio.wait_for(self.check_fn(), timeout=self.timeout_seconds)
76
+ else:
77
+ # Run sync function in thread pool
78
+ loop = asyncio.get_event_loop()
79
+ result = await asyncio.wait_for(
80
+ loop.run_in_executor(None, self.check_fn),
81
+ timeout=self.timeout_seconds,
82
+ )
83
+
84
+ duration_ms = (time.perf_counter() - start) * 1000
85
+
86
+ if result:
87
+ return HealthCheckResult(
88
+ name=self.name,
89
+ status=HealthStatus.HEALTHY,
90
+ message="Check passed",
91
+ duration_ms=duration_ms,
92
+ )
93
+ else:
94
+ status = HealthStatus.UNHEALTHY if self.critical else HealthStatus.DEGRADED
95
+ return HealthCheckResult(
96
+ name=self.name,
97
+ status=status,
98
+ message="Check returned false",
99
+ duration_ms=duration_ms,
100
+ )
101
+
102
+ except asyncio.TimeoutError:
103
+ duration_ms = (time.perf_counter() - start) * 1000
104
+ status = HealthStatus.UNHEALTHY if self.critical else HealthStatus.DEGRADED
105
+ return HealthCheckResult(
106
+ name=self.name,
107
+ status=status,
108
+ message=f"Check timed out after {self.timeout_seconds}s",
109
+ duration_ms=duration_ms,
110
+ )
111
+ except Exception as e:
112
+ duration_ms = (time.perf_counter() - start) * 1000
113
+ status = HealthStatus.UNHEALTHY if self.critical else HealthStatus.DEGRADED
114
+ return HealthCheckResult(
115
+ name=self.name,
116
+ status=status,
117
+ message=f"Check failed: {str(e)}",
118
+ duration_ms=duration_ms,
119
+ details={"error_type": type(e).__name__},
120
+ )
121
+
122
+
123
+ @dataclass
124
+ class HealthResponse:
125
+ """Complete health response."""
126
+
127
+ status: HealthStatus
128
+ checks: List[HealthCheckResult]
129
+ version: str = "unknown"
130
+ uptime_seconds: float = 0.0
131
+
132
+ def to_dict(self) -> Dict[str, Any]:
133
+ """Convert to dictionary for JSON serialization."""
134
+ return {
135
+ "status": self.status.value,
136
+ "checks": [c.to_dict() for c in self.checks],
137
+ "version": self.version,
138
+ "uptime_seconds": round(self.uptime_seconds, 1),
139
+ "timestamp": time.time(),
140
+ }
141
+
142
+
143
+ class HealthEndpoint:
144
+ """Health endpoint manager.
145
+
146
+ Manages health checks and provides Kubernetes-compatible endpoints.
147
+ """
148
+
149
+ def __init__(self):
150
+ self._checks: List[HealthCheck] = []
151
+ self._startup_complete = False
152
+ self._start_time = time.time()
153
+ self._lock = threading.Lock()
154
+ self._last_results: Dict[str, HealthCheckResult] = {}
155
+
156
+ def register_check(self, check: HealthCheck) -> None:
157
+ """Register a health check.
158
+
159
+ Args:
160
+ check: HealthCheck to register.
161
+ """
162
+ with self._lock:
163
+ # Avoid duplicates
164
+ existing_names = {c.name for c in self._checks}
165
+ if check.name not in existing_names:
166
+ self._checks.append(check)
167
+ logger.debug("health_check_registered", name=check.name)
168
+
169
+ def unregister_check(self, name: str) -> None:
170
+ """Unregister a health check by name.
171
+
172
+ Args:
173
+ name: Name of check to remove.
174
+ """
175
+ with self._lock:
176
+ self._checks = [c for c in self._checks if c.name != name]
177
+ self._last_results.pop(name, None)
178
+
179
+ def mark_startup_complete(self) -> None:
180
+ """Mark that startup is complete."""
181
+ self._startup_complete = True
182
+ logger.info("startup_marked_complete")
183
+
184
+ @property
185
+ def uptime_seconds(self) -> float:
186
+ """Get server uptime in seconds."""
187
+ return time.time() - self._start_time
188
+
189
+ async def check_liveness(self) -> HealthResponse:
190
+ """Liveness probe - is the process alive?
191
+
192
+ Always returns healthy unless completely broken.
193
+ Used by Kubernetes to restart the container.
194
+
195
+ Returns:
196
+ HealthResponse with liveness status.
197
+ """
198
+ # Simple liveness - just verify we can respond
199
+ return HealthResponse(
200
+ status=HealthStatus.HEALTHY,
201
+ checks=[
202
+ HealthCheckResult(
203
+ name="liveness",
204
+ status=HealthStatus.HEALTHY,
205
+ message="Process is alive",
206
+ )
207
+ ],
208
+ version=self._get_version(),
209
+ uptime_seconds=self.uptime_seconds,
210
+ )
211
+
212
+ async def check_readiness(self) -> HealthResponse:
213
+ """Readiness probe - can we serve traffic?
214
+
215
+ Runs all registered health checks.
216
+ Used by Kubernetes to route traffic.
217
+
218
+ Returns:
219
+ HealthResponse with aggregated status.
220
+ """
221
+ results = await self._run_all_checks()
222
+
223
+ # Determine overall status
224
+ has_unhealthy = any(r.status == HealthStatus.UNHEALTHY for r in results)
225
+ has_degraded = any(r.status == HealthStatus.DEGRADED for r in results)
226
+
227
+ if has_unhealthy:
228
+ overall = HealthStatus.UNHEALTHY
229
+ elif has_degraded:
230
+ overall = HealthStatus.DEGRADED
231
+ elif not results:
232
+ overall = HealthStatus.HEALTHY # No checks = healthy
233
+ else:
234
+ overall = HealthStatus.HEALTHY
235
+
236
+ return HealthResponse(
237
+ status=overall,
238
+ checks=results,
239
+ version=self._get_version(),
240
+ uptime_seconds=self.uptime_seconds,
241
+ )
242
+
243
+ async def check_startup(self) -> HealthResponse:
244
+ """Startup probe - has initialization completed?
245
+
246
+ Returns unhealthy until mark_startup_complete() is called.
247
+ Used by Kubernetes to delay liveness/readiness probes.
248
+
249
+ Returns:
250
+ HealthResponse with startup status.
251
+ """
252
+ if self._startup_complete:
253
+ status = HealthStatus.HEALTHY
254
+ message = "Startup complete"
255
+ else:
256
+ status = HealthStatus.UNHEALTHY
257
+ message = "Startup in progress"
258
+
259
+ return HealthResponse(
260
+ status=status,
261
+ checks=[
262
+ HealthCheckResult(
263
+ name="startup",
264
+ status=status,
265
+ message=message,
266
+ )
267
+ ],
268
+ version=self._get_version(),
269
+ uptime_seconds=self.uptime_seconds,
270
+ )
271
+
272
+ async def get_detailed_status(self) -> Dict[str, Any]:
273
+ """Get detailed health status for dashboards.
274
+
275
+ Returns:
276
+ Detailed status dictionary.
277
+ """
278
+ results = await self._run_all_checks()
279
+
280
+ # Calculate statistics
281
+ total = len(results)
282
+ healthy = sum(1 for r in results if r.status == HealthStatus.HEALTHY)
283
+ degraded = sum(1 for r in results if r.status == HealthStatus.DEGRADED)
284
+ unhealthy = sum(1 for r in results if r.status == HealthStatus.UNHEALTHY)
285
+
286
+ avg_duration = sum(r.duration_ms for r in results) / total if total > 0 else 0
287
+
288
+ return {
289
+ "status": self._aggregate_status(results).value,
290
+ "summary": {
291
+ "total_checks": total,
292
+ "healthy": healthy,
293
+ "degraded": degraded,
294
+ "unhealthy": unhealthy,
295
+ "avg_check_duration_ms": round(avg_duration, 2),
296
+ },
297
+ "checks": [r.to_dict() for r in results],
298
+ "version": self._get_version(),
299
+ "uptime_seconds": round(self.uptime_seconds, 1),
300
+ "startup_complete": self._startup_complete,
301
+ "timestamp": time.time(),
302
+ }
303
+
304
+ async def _run_all_checks(self) -> List[HealthCheckResult]:
305
+ """Run all registered health checks.
306
+
307
+ Returns:
308
+ List of check results.
309
+ """
310
+ with self._lock:
311
+ checks = list(self._checks)
312
+
313
+ if not checks:
314
+ return []
315
+
316
+ # Run checks concurrently
317
+ tasks = [check.execute() for check in checks]
318
+ results = await asyncio.gather(*tasks, return_exceptions=True)
319
+
320
+ # Process results
321
+ processed = []
322
+ for i, result in enumerate(results):
323
+ if isinstance(result, Exception):
324
+ processed.append(
325
+ HealthCheckResult(
326
+ name=checks[i].name,
327
+ status=HealthStatus.UNHEALTHY,
328
+ message=f"Check execution failed: {result}",
329
+ )
330
+ )
331
+ else:
332
+ processed.append(result)
333
+
334
+ # Cache results
335
+ with self._lock:
336
+ for r in processed:
337
+ self._last_results[r.name] = r
338
+
339
+ return processed
340
+
341
+ def _aggregate_status(self, results: List[HealthCheckResult]) -> HealthStatus:
342
+ """Aggregate check results into overall status."""
343
+ if not results:
344
+ return HealthStatus.HEALTHY
345
+
346
+ has_unhealthy = any(r.status == HealthStatus.UNHEALTHY for r in results)
347
+ has_degraded = any(r.status == HealthStatus.DEGRADED for r in results)
348
+
349
+ if has_unhealthy:
350
+ return HealthStatus.UNHEALTHY
351
+ elif has_degraded:
352
+ return HealthStatus.DEGRADED
353
+ else:
354
+ return HealthStatus.HEALTHY
355
+
356
+ def _get_version(self) -> str:
357
+ """Get MCP Hangar version."""
358
+ try:
359
+ from mcp_hangar import __version__
360
+
361
+ return __version__
362
+ except (ImportError, AttributeError):
363
+ return "unknown"
364
+
365
+ def get_last_result(self, name: str) -> Optional[HealthCheckResult]:
366
+ """Get the last result for a specific check.
367
+
368
+ Args:
369
+ name: Check name.
370
+
371
+ Returns:
372
+ Last result or None.
373
+ """
374
+ with self._lock:
375
+ return self._last_results.get(name)
376
+
377
+
378
+ # Global singleton
379
+ _health_endpoint: Optional[HealthEndpoint] = None
380
+
381
+
382
+ def get_health_endpoint() -> HealthEndpoint:
383
+ """Get the health endpoint singleton.
384
+
385
+ Returns:
386
+ HealthEndpoint instance.
387
+ """
388
+ global _health_endpoint
389
+ if _health_endpoint is None:
390
+ _health_endpoint = HealthEndpoint()
391
+ return _health_endpoint
392
+
393
+
394
+ def reset_health_endpoint() -> None:
395
+ """Reset health endpoint singleton (for testing)."""
396
+ global _health_endpoint
397
+ _health_endpoint = None
398
+
399
+
400
+ # Built-in health checks
401
+ def create_provider_health_check(providers_dict: Any) -> HealthCheck:
402
+ """Create health check for provider availability.
403
+
404
+ Args:
405
+ providers_dict: Providers dictionary or dict-like object.
406
+
407
+ Returns:
408
+ HealthCheck instance.
409
+ """
410
+
411
+ def check() -> bool:
412
+ if not providers_dict:
413
+ return True # No providers = healthy (vacuous)
414
+
415
+ total = len(providers_dict)
416
+ ready = sum(1 for p in providers_dict.values() if hasattr(p, "state") and str(p.state) == "ready")
417
+
418
+ # At least 50% providers should be ready
419
+ return total == 0 or (ready / total) >= 0.5
420
+
421
+ return HealthCheck(
422
+ name="providers",
423
+ check_fn=check,
424
+ description="Check that at least 50% of providers are ready",
425
+ critical=False, # Degraded, not unhealthy
426
+ )
427
+
428
+
429
+ def create_memory_health_check(
430
+ threshold_mb: int = 1024,
431
+ ) -> HealthCheck:
432
+ """Create health check for memory usage.
433
+
434
+ Args:
435
+ threshold_mb: Memory threshold in MB.
436
+
437
+ Returns:
438
+ HealthCheck instance.
439
+ """
440
+
441
+ def check() -> bool:
442
+ try:
443
+ import resource
444
+
445
+ # Get current memory usage (RSS in bytes)
446
+ usage = resource.getrusage(resource.RUSAGE_SELF)
447
+ rss_mb = usage.ru_maxrss / (1024 * 1024) # Convert to MB
448
+
449
+ # macOS reports in bytes, Linux in KB
450
+ import platform
451
+
452
+ if platform.system() == "Darwin":
453
+ rss_mb = usage.ru_maxrss / (1024 * 1024)
454
+ else:
455
+ rss_mb = usage.ru_maxrss / 1024
456
+
457
+ return rss_mb < threshold_mb
458
+ except (ImportError, AttributeError):
459
+ return True # Can't check, assume healthy
460
+
461
+ return HealthCheck(
462
+ name="memory",
463
+ check_fn=check,
464
+ description=f"Check memory usage is below {threshold_mb}MB",
465
+ critical=False,
466
+ )
467
+
468
+
469
+ def create_event_loop_health_check() -> HealthCheck:
470
+ """Create health check for event loop responsiveness.
471
+
472
+ Returns:
473
+ HealthCheck instance.
474
+ """
475
+
476
+ async def check() -> bool:
477
+ # Simple check that async works
478
+ await asyncio.sleep(0.001)
479
+ return True
480
+
481
+ return HealthCheck(
482
+ name="event_loop",
483
+ check_fn=check,
484
+ description="Check event loop is responsive",
485
+ timeout_seconds=1.0,
486
+ critical=True,
487
+ )