foundry-mcp 0.8.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of foundry-mcp might be problematic. Click here for more details.

Files changed (153) hide show
  1. foundry_mcp/__init__.py +13 -0
  2. foundry_mcp/cli/__init__.py +67 -0
  3. foundry_mcp/cli/__main__.py +9 -0
  4. foundry_mcp/cli/agent.py +96 -0
  5. foundry_mcp/cli/commands/__init__.py +37 -0
  6. foundry_mcp/cli/commands/cache.py +137 -0
  7. foundry_mcp/cli/commands/dashboard.py +148 -0
  8. foundry_mcp/cli/commands/dev.py +446 -0
  9. foundry_mcp/cli/commands/journal.py +377 -0
  10. foundry_mcp/cli/commands/lifecycle.py +274 -0
  11. foundry_mcp/cli/commands/modify.py +824 -0
  12. foundry_mcp/cli/commands/plan.py +640 -0
  13. foundry_mcp/cli/commands/pr.py +393 -0
  14. foundry_mcp/cli/commands/review.py +667 -0
  15. foundry_mcp/cli/commands/session.py +472 -0
  16. foundry_mcp/cli/commands/specs.py +686 -0
  17. foundry_mcp/cli/commands/tasks.py +807 -0
  18. foundry_mcp/cli/commands/testing.py +676 -0
  19. foundry_mcp/cli/commands/validate.py +982 -0
  20. foundry_mcp/cli/config.py +98 -0
  21. foundry_mcp/cli/context.py +298 -0
  22. foundry_mcp/cli/logging.py +212 -0
  23. foundry_mcp/cli/main.py +44 -0
  24. foundry_mcp/cli/output.py +122 -0
  25. foundry_mcp/cli/registry.py +110 -0
  26. foundry_mcp/cli/resilience.py +178 -0
  27. foundry_mcp/cli/transcript.py +217 -0
  28. foundry_mcp/config.py +1454 -0
  29. foundry_mcp/core/__init__.py +144 -0
  30. foundry_mcp/core/ai_consultation.py +1773 -0
  31. foundry_mcp/core/batch_operations.py +1202 -0
  32. foundry_mcp/core/cache.py +195 -0
  33. foundry_mcp/core/capabilities.py +446 -0
  34. foundry_mcp/core/concurrency.py +898 -0
  35. foundry_mcp/core/context.py +540 -0
  36. foundry_mcp/core/discovery.py +1603 -0
  37. foundry_mcp/core/error_collection.py +728 -0
  38. foundry_mcp/core/error_store.py +592 -0
  39. foundry_mcp/core/health.py +749 -0
  40. foundry_mcp/core/intake.py +933 -0
  41. foundry_mcp/core/journal.py +700 -0
  42. foundry_mcp/core/lifecycle.py +412 -0
  43. foundry_mcp/core/llm_config.py +1376 -0
  44. foundry_mcp/core/llm_patterns.py +510 -0
  45. foundry_mcp/core/llm_provider.py +1569 -0
  46. foundry_mcp/core/logging_config.py +374 -0
  47. foundry_mcp/core/metrics_persistence.py +584 -0
  48. foundry_mcp/core/metrics_registry.py +327 -0
  49. foundry_mcp/core/metrics_store.py +641 -0
  50. foundry_mcp/core/modifications.py +224 -0
  51. foundry_mcp/core/naming.py +146 -0
  52. foundry_mcp/core/observability.py +1216 -0
  53. foundry_mcp/core/otel.py +452 -0
  54. foundry_mcp/core/otel_stubs.py +264 -0
  55. foundry_mcp/core/pagination.py +255 -0
  56. foundry_mcp/core/progress.py +387 -0
  57. foundry_mcp/core/prometheus.py +564 -0
  58. foundry_mcp/core/prompts/__init__.py +464 -0
  59. foundry_mcp/core/prompts/fidelity_review.py +691 -0
  60. foundry_mcp/core/prompts/markdown_plan_review.py +515 -0
  61. foundry_mcp/core/prompts/plan_review.py +627 -0
  62. foundry_mcp/core/providers/__init__.py +237 -0
  63. foundry_mcp/core/providers/base.py +515 -0
  64. foundry_mcp/core/providers/claude.py +472 -0
  65. foundry_mcp/core/providers/codex.py +637 -0
  66. foundry_mcp/core/providers/cursor_agent.py +630 -0
  67. foundry_mcp/core/providers/detectors.py +515 -0
  68. foundry_mcp/core/providers/gemini.py +426 -0
  69. foundry_mcp/core/providers/opencode.py +718 -0
  70. foundry_mcp/core/providers/opencode_wrapper.js +308 -0
  71. foundry_mcp/core/providers/package-lock.json +24 -0
  72. foundry_mcp/core/providers/package.json +25 -0
  73. foundry_mcp/core/providers/registry.py +607 -0
  74. foundry_mcp/core/providers/test_provider.py +171 -0
  75. foundry_mcp/core/providers/validation.py +857 -0
  76. foundry_mcp/core/rate_limit.py +427 -0
  77. foundry_mcp/core/research/__init__.py +68 -0
  78. foundry_mcp/core/research/memory.py +528 -0
  79. foundry_mcp/core/research/models.py +1234 -0
  80. foundry_mcp/core/research/providers/__init__.py +40 -0
  81. foundry_mcp/core/research/providers/base.py +242 -0
  82. foundry_mcp/core/research/providers/google.py +507 -0
  83. foundry_mcp/core/research/providers/perplexity.py +442 -0
  84. foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
  85. foundry_mcp/core/research/providers/tavily.py +383 -0
  86. foundry_mcp/core/research/workflows/__init__.py +25 -0
  87. foundry_mcp/core/research/workflows/base.py +298 -0
  88. foundry_mcp/core/research/workflows/chat.py +271 -0
  89. foundry_mcp/core/research/workflows/consensus.py +539 -0
  90. foundry_mcp/core/research/workflows/deep_research.py +4142 -0
  91. foundry_mcp/core/research/workflows/ideate.py +682 -0
  92. foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
  93. foundry_mcp/core/resilience.py +600 -0
  94. foundry_mcp/core/responses.py +1624 -0
  95. foundry_mcp/core/review.py +366 -0
  96. foundry_mcp/core/security.py +438 -0
  97. foundry_mcp/core/spec.py +4119 -0
  98. foundry_mcp/core/task.py +2463 -0
  99. foundry_mcp/core/testing.py +839 -0
  100. foundry_mcp/core/validation.py +2357 -0
  101. foundry_mcp/dashboard/__init__.py +32 -0
  102. foundry_mcp/dashboard/app.py +119 -0
  103. foundry_mcp/dashboard/components/__init__.py +17 -0
  104. foundry_mcp/dashboard/components/cards.py +88 -0
  105. foundry_mcp/dashboard/components/charts.py +177 -0
  106. foundry_mcp/dashboard/components/filters.py +136 -0
  107. foundry_mcp/dashboard/components/tables.py +195 -0
  108. foundry_mcp/dashboard/data/__init__.py +11 -0
  109. foundry_mcp/dashboard/data/stores.py +433 -0
  110. foundry_mcp/dashboard/launcher.py +300 -0
  111. foundry_mcp/dashboard/views/__init__.py +12 -0
  112. foundry_mcp/dashboard/views/errors.py +217 -0
  113. foundry_mcp/dashboard/views/metrics.py +164 -0
  114. foundry_mcp/dashboard/views/overview.py +96 -0
  115. foundry_mcp/dashboard/views/providers.py +83 -0
  116. foundry_mcp/dashboard/views/sdd_workflow.py +255 -0
  117. foundry_mcp/dashboard/views/tool_usage.py +139 -0
  118. foundry_mcp/prompts/__init__.py +9 -0
  119. foundry_mcp/prompts/workflows.py +525 -0
  120. foundry_mcp/resources/__init__.py +9 -0
  121. foundry_mcp/resources/specs.py +591 -0
  122. foundry_mcp/schemas/__init__.py +38 -0
  123. foundry_mcp/schemas/intake-schema.json +89 -0
  124. foundry_mcp/schemas/sdd-spec-schema.json +414 -0
  125. foundry_mcp/server.py +150 -0
  126. foundry_mcp/tools/__init__.py +10 -0
  127. foundry_mcp/tools/unified/__init__.py +92 -0
  128. foundry_mcp/tools/unified/authoring.py +3620 -0
  129. foundry_mcp/tools/unified/context_helpers.py +98 -0
  130. foundry_mcp/tools/unified/documentation_helpers.py +268 -0
  131. foundry_mcp/tools/unified/environment.py +1341 -0
  132. foundry_mcp/tools/unified/error.py +479 -0
  133. foundry_mcp/tools/unified/health.py +225 -0
  134. foundry_mcp/tools/unified/journal.py +841 -0
  135. foundry_mcp/tools/unified/lifecycle.py +640 -0
  136. foundry_mcp/tools/unified/metrics.py +777 -0
  137. foundry_mcp/tools/unified/plan.py +876 -0
  138. foundry_mcp/tools/unified/pr.py +294 -0
  139. foundry_mcp/tools/unified/provider.py +589 -0
  140. foundry_mcp/tools/unified/research.py +1283 -0
  141. foundry_mcp/tools/unified/review.py +1042 -0
  142. foundry_mcp/tools/unified/review_helpers.py +314 -0
  143. foundry_mcp/tools/unified/router.py +102 -0
  144. foundry_mcp/tools/unified/server.py +565 -0
  145. foundry_mcp/tools/unified/spec.py +1283 -0
  146. foundry_mcp/tools/unified/task.py +3846 -0
  147. foundry_mcp/tools/unified/test.py +431 -0
  148. foundry_mcp/tools/unified/verification.py +520 -0
  149. foundry_mcp-0.8.22.dist-info/METADATA +344 -0
  150. foundry_mcp-0.8.22.dist-info/RECORD +153 -0
  151. foundry_mcp-0.8.22.dist-info/WHEEL +4 -0
  152. foundry_mcp-0.8.22.dist-info/entry_points.txt +3 -0
  153. foundry_mcp-0.8.22.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,749 @@
1
+ """Health check system for foundry-mcp.
2
+
3
+ Provides Kubernetes-style health probes (liveness, readiness, health)
4
+ with pluggable dependency checkers and configurable thresholds.
5
+
6
+ Usage:
7
+ from foundry_mcp.core.health import (
8
+ get_health_manager,
9
+ HealthStatus,
10
+ check_liveness,
11
+ check_readiness,
12
+ check_health,
13
+ )
14
+
15
+ # Quick checks
16
+ if check_liveness().is_healthy:
17
+ print("Server is alive")
18
+
19
+ # Full health check with details
20
+ result = check_health()
21
+ print(f"Status: {result.status.value}")
22
+ print(f"Dependencies: {result.dependencies}")
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import logging
28
+ import shutil
29
+ import time
30
+ from dataclasses import dataclass, field
31
+ from enum import Enum
32
+ from pathlib import Path
33
+ from typing import Any, Dict, List, Optional, Protocol
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class HealthStatus(str, Enum):
39
+ """Health status values following Kubernetes conventions."""
40
+
41
+ HEALTHY = "healthy"
42
+ DEGRADED = "degraded"
43
+ UNHEALTHY = "unhealthy"
44
+
45
+
46
+ @dataclass
47
+ class DependencyHealth:
48
+ """Health status of a single dependency."""
49
+
50
+ name: str
51
+ healthy: bool
52
+ status: HealthStatus
53
+ message: str = ""
54
+ latency_ms: Optional[float] = None
55
+ details: Dict[str, Any] = field(default_factory=dict)
56
+
57
+ def to_dict(self) -> Dict[str, Any]:
58
+ """Convert to dictionary for JSON serialization."""
59
+ result = {
60
+ "name": self.name,
61
+ "healthy": self.healthy,
62
+ "status": self.status.value,
63
+ }
64
+ if self.message:
65
+ result["message"] = self.message
66
+ if self.latency_ms is not None:
67
+ result["latency_ms"] = round(self.latency_ms, 2)
68
+ if self.details:
69
+ result["details"] = self.details
70
+ return result
71
+
72
+
73
+ @dataclass
74
+ class HealthResult:
75
+ """Result of a health check operation."""
76
+
77
+ status: HealthStatus
78
+ is_healthy: bool
79
+ message: str = ""
80
+ timestamp: float = field(default_factory=time.time)
81
+ dependencies: List[DependencyHealth] = field(default_factory=list)
82
+ details: Dict[str, Any] = field(default_factory=dict)
83
+
84
+ def to_dict(self) -> Dict[str, Any]:
85
+ """Convert to dictionary for JSON serialization."""
86
+ result = {
87
+ "status": self.status.value,
88
+ "is_healthy": self.is_healthy,
89
+ "timestamp": self.timestamp,
90
+ }
91
+ if self.message:
92
+ result["message"] = self.message
93
+ if self.dependencies:
94
+ result["dependencies"] = [d.to_dict() for d in self.dependencies]
95
+ if self.details:
96
+ result["details"] = self.details
97
+ return result
98
+
99
+
100
+ class DependencyChecker(Protocol):
101
+ """Protocol for dependency health checkers."""
102
+
103
+ @property
104
+ def name(self) -> str:
105
+ """Unique name for this dependency."""
106
+ ...
107
+
108
+ def check(self, timeout: float = 5.0) -> DependencyHealth:
109
+ """Check the health of this dependency."""
110
+ ...
111
+
112
+
113
+ # =============================================================================
114
+ # Built-in Dependency Checkers
115
+ # =============================================================================
116
+
117
+
118
+ class SpecsDirectoryChecker:
119
+ """Check that specs directory exists and is accessible."""
120
+
121
+ name = "specs_directory"
122
+
123
+ def __init__(self, specs_dir: Optional[Path] = None):
124
+ self.specs_dir = specs_dir
125
+
126
+ def check(self, timeout: float = 5.0) -> DependencyHealth:
127
+ start = time.perf_counter()
128
+ try:
129
+ # Try to get specs_dir from config if not provided
130
+ if self.specs_dir is None:
131
+ from foundry_mcp.config import get_config
132
+
133
+ config = get_config()
134
+ self.specs_dir = config.specs_dir if config else None
135
+
136
+ if self.specs_dir is None:
137
+ return DependencyHealth(
138
+ name=self.name,
139
+ healthy=False,
140
+ status=HealthStatus.UNHEALTHY,
141
+ message="specs_dir not configured",
142
+ latency_ms=(time.perf_counter() - start) * 1000,
143
+ )
144
+
145
+ if not self.specs_dir.exists():
146
+ return DependencyHealth(
147
+ name=self.name,
148
+ healthy=False,
149
+ status=HealthStatus.UNHEALTHY,
150
+ message=f"specs_dir does not exist: {self.specs_dir}",
151
+ latency_ms=(time.perf_counter() - start) * 1000,
152
+ )
153
+
154
+ if not self.specs_dir.is_dir():
155
+ return DependencyHealth(
156
+ name=self.name,
157
+ healthy=False,
158
+ status=HealthStatus.UNHEALTHY,
159
+ message=f"specs_dir is not a directory: {self.specs_dir}",
160
+ latency_ms=(time.perf_counter() - start) * 1000,
161
+ )
162
+
163
+ # Check if readable
164
+ try:
165
+ list(self.specs_dir.iterdir())
166
+ except PermissionError:
167
+ return DependencyHealth(
168
+ name=self.name,
169
+ healthy=False,
170
+ status=HealthStatus.UNHEALTHY,
171
+ message=f"specs_dir not readable: {self.specs_dir}",
172
+ latency_ms=(time.perf_counter() - start) * 1000,
173
+ )
174
+
175
+ return DependencyHealth(
176
+ name=self.name,
177
+ healthy=True,
178
+ status=HealthStatus.HEALTHY,
179
+ message="specs_dir accessible",
180
+ latency_ms=(time.perf_counter() - start) * 1000,
181
+ details={"path": str(self.specs_dir)},
182
+ )
183
+
184
+ except Exception as e:
185
+ return DependencyHealth(
186
+ name=self.name,
187
+ healthy=False,
188
+ status=HealthStatus.UNHEALTHY,
189
+ message=f"Error checking specs_dir: {e}",
190
+ latency_ms=(time.perf_counter() - start) * 1000,
191
+ )
192
+
193
+
194
+ class DiskSpaceChecker:
195
+ """Check available disk space meets threshold."""
196
+
197
+ name = "disk_space"
198
+
199
+ def __init__(
200
+ self,
201
+ path: Optional[Path] = None,
202
+ threshold_mb: int = 100,
203
+ warning_mb: int = 500,
204
+ ):
205
+ self.path = path or Path(".")
206
+ self.threshold_mb = threshold_mb
207
+ self.warning_mb = warning_mb
208
+
209
+ def check(self, timeout: float = 5.0) -> DependencyHealth:
210
+ start = time.perf_counter()
211
+ try:
212
+ usage = shutil.disk_usage(self.path)
213
+ free_mb = usage.free / (1024 * 1024)
214
+
215
+ details = {
216
+ "path": str(self.path),
217
+ "free_mb": round(free_mb, 2),
218
+ "total_mb": round(usage.total / (1024 * 1024), 2),
219
+ "threshold_mb": self.threshold_mb,
220
+ }
221
+
222
+ if free_mb < self.threshold_mb:
223
+ return DependencyHealth(
224
+ name=self.name,
225
+ healthy=False,
226
+ status=HealthStatus.UNHEALTHY,
227
+ message=f"Disk space critically low: {free_mb:.1f}MB free",
228
+ latency_ms=(time.perf_counter() - start) * 1000,
229
+ details=details,
230
+ )
231
+
232
+ if free_mb < self.warning_mb:
233
+ return DependencyHealth(
234
+ name=self.name,
235
+ healthy=True,
236
+ status=HealthStatus.DEGRADED,
237
+ message=f"Disk space low: {free_mb:.1f}MB free",
238
+ latency_ms=(time.perf_counter() - start) * 1000,
239
+ details=details,
240
+ )
241
+
242
+ return DependencyHealth(
243
+ name=self.name,
244
+ healthy=True,
245
+ status=HealthStatus.HEALTHY,
246
+ message=f"Disk space OK: {free_mb:.1f}MB free",
247
+ latency_ms=(time.perf_counter() - start) * 1000,
248
+ details=details,
249
+ )
250
+
251
+ except Exception as e:
252
+ return DependencyHealth(
253
+ name=self.name,
254
+ healthy=False,
255
+ status=HealthStatus.UNHEALTHY,
256
+ message=f"Error checking disk space: {e}",
257
+ latency_ms=(time.perf_counter() - start) * 1000,
258
+ )
259
+
260
+
261
+ class OpenTelemetryChecker:
262
+ """Check OpenTelemetry availability."""
263
+
264
+ name = "opentelemetry"
265
+
266
+ def check(self, timeout: float = 5.0) -> DependencyHealth:
267
+ start = time.perf_counter()
268
+ try:
269
+ from foundry_mcp.core.observability import get_observability_manager
270
+
271
+ manager = get_observability_manager()
272
+ is_enabled = manager.is_tracing_enabled()
273
+
274
+ # OTel being disabled is not unhealthy, just a different state
275
+ if is_enabled:
276
+ return DependencyHealth(
277
+ name=self.name,
278
+ healthy=True,
279
+ status=HealthStatus.HEALTHY,
280
+ message="OpenTelemetry tracing enabled",
281
+ latency_ms=(time.perf_counter() - start) * 1000,
282
+ details={"enabled": True},
283
+ )
284
+ else:
285
+ return DependencyHealth(
286
+ name=self.name,
287
+ healthy=True, # Disabled is still healthy
288
+ status=HealthStatus.HEALTHY,
289
+ message="OpenTelemetry tracing disabled (optional)",
290
+ latency_ms=(time.perf_counter() - start) * 1000,
291
+ details={"enabled": False},
292
+ )
293
+
294
+ except Exception as e:
295
+ return DependencyHealth(
296
+ name=self.name,
297
+ healthy=True, # OTel errors shouldn't fail health check
298
+ status=HealthStatus.DEGRADED,
299
+ message=f"OpenTelemetry check failed: {e}",
300
+ latency_ms=(time.perf_counter() - start) * 1000,
301
+ )
302
+
303
+
304
+ class PrometheusChecker:
305
+ """Check Prometheus metrics availability."""
306
+
307
+ name = "prometheus"
308
+
309
+ def check(self, timeout: float = 5.0) -> DependencyHealth:
310
+ start = time.perf_counter()
311
+ try:
312
+ from foundry_mcp.core.observability import get_observability_manager
313
+
314
+ manager = get_observability_manager()
315
+ is_enabled = manager.is_metrics_enabled()
316
+
317
+ # Prometheus being disabled is not unhealthy
318
+ if is_enabled:
319
+ return DependencyHealth(
320
+ name=self.name,
321
+ healthy=True,
322
+ status=HealthStatus.HEALTHY,
323
+ message="Prometheus metrics enabled",
324
+ latency_ms=(time.perf_counter() - start) * 1000,
325
+ details={"enabled": True},
326
+ )
327
+ else:
328
+ return DependencyHealth(
329
+ name=self.name,
330
+ healthy=True,
331
+ status=HealthStatus.HEALTHY,
332
+ message="Prometheus metrics disabled (optional)",
333
+ latency_ms=(time.perf_counter() - start) * 1000,
334
+ details={"enabled": False},
335
+ )
336
+
337
+ except Exception as e:
338
+ return DependencyHealth(
339
+ name=self.name,
340
+ healthy=True, # Prometheus errors shouldn't fail health check
341
+ status=HealthStatus.DEGRADED,
342
+ message=f"Prometheus check failed: {e}",
343
+ latency_ms=(time.perf_counter() - start) * 1000,
344
+ )
345
+
346
+
347
+ class AIProviderChecker:
348
+ """Check AI provider availability."""
349
+
350
+ name = "ai_provider"
351
+
352
+ def check(self, timeout: float = 5.0) -> DependencyHealth:
353
+ start = time.perf_counter()
354
+ try:
355
+ from foundry_mcp.core.providers import (
356
+ available_providers,
357
+ get_provider_statuses,
358
+ )
359
+
360
+ available = available_providers()
361
+ statuses = get_provider_statuses()
362
+
363
+ # AI providers are optional - just report what's available
364
+ if available:
365
+ return DependencyHealth(
366
+ name=self.name,
367
+ healthy=True,
368
+ status=HealthStatus.HEALTHY,
369
+ message=f"AI providers available: {', '.join(available)}",
370
+ latency_ms=(time.perf_counter() - start) * 1000,
371
+ details={
372
+ "available": available,
373
+ # statuses is Dict[str, bool], not enum values
374
+ "statuses": statuses,
375
+ },
376
+ )
377
+ else:
378
+ return DependencyHealth(
379
+ name=self.name,
380
+ healthy=True, # No providers is not unhealthy
381
+ status=HealthStatus.DEGRADED,
382
+ message="No AI providers available (optional)",
383
+ latency_ms=(time.perf_counter() - start) * 1000,
384
+ details={"available": [], "statuses": {}},
385
+ )
386
+
387
+ except ImportError:
388
+ return DependencyHealth(
389
+ name=self.name,
390
+ healthy=True,
391
+ status=HealthStatus.HEALTHY,
392
+ message="AI provider module not available (optional)",
393
+ latency_ms=(time.perf_counter() - start) * 1000,
394
+ )
395
+ except Exception as e:
396
+ return DependencyHealth(
397
+ name=self.name,
398
+ healthy=True,
399
+ status=HealthStatus.DEGRADED,
400
+ message=f"AI provider check failed: {e}",
401
+ latency_ms=(time.perf_counter() - start) * 1000,
402
+ )
403
+
404
+
405
+ # =============================================================================
406
+ # Health Manager
407
+ # =============================================================================
408
+
409
+
410
+ @dataclass
411
+ class HealthConfig:
412
+ """Configuration for health checks.
413
+
414
+ Attributes:
415
+ enabled: Whether health checks are enabled
416
+ liveness_timeout: Timeout for liveness checks (seconds)
417
+ readiness_timeout: Timeout for readiness checks (seconds)
418
+ health_timeout: Timeout for full health checks (seconds)
419
+ disk_space_threshold_mb: Minimum disk space before unhealthy
420
+ disk_space_warning_mb: Minimum disk space before degraded
421
+ """
422
+
423
+ enabled: bool = True
424
+ liveness_timeout: float = 1.0
425
+ readiness_timeout: float = 5.0
426
+ health_timeout: float = 10.0
427
+ disk_space_threshold_mb: int = 100
428
+ disk_space_warning_mb: int = 500
429
+
430
+ @classmethod
431
+ def from_toml_dict(cls, data: Dict[str, Any]) -> "HealthConfig":
432
+ """Create config from TOML dictionary."""
433
+ return cls(
434
+ enabled=data.get("enabled", True),
435
+ liveness_timeout=data.get("liveness_timeout", 1.0),
436
+ readiness_timeout=data.get("readiness_timeout", 5.0),
437
+ health_timeout=data.get("health_timeout", 10.0),
438
+ disk_space_threshold_mb=data.get("disk_space_threshold_mb", 100),
439
+ disk_space_warning_mb=data.get("disk_space_warning_mb", 500),
440
+ )
441
+
442
+
443
+ class HealthManager:
444
+ """Manages health checks for the foundry-mcp server.
445
+
446
+ Provides three levels of health checks:
447
+ - Liveness: Is the process running? (always true if this code executes)
448
+ - Readiness: Can the server handle requests? (checks critical deps)
449
+ - Health: Full health status (all dependencies)
450
+ """
451
+
452
+ def __init__(self, config: Optional[HealthConfig] = None):
453
+ self.config = config or HealthConfig()
454
+ self._liveness_checkers: List[DependencyChecker] = []
455
+ self._readiness_checkers: List[DependencyChecker] = []
456
+ self._health_checkers: List[DependencyChecker] = []
457
+ self._setup_default_checkers()
458
+
459
+ def _setup_default_checkers(self) -> None:
460
+ """Set up default dependency checkers."""
461
+ # Readiness checks - critical for serving requests
462
+ specs_checker = SpecsDirectoryChecker()
463
+ disk_checker = DiskSpaceChecker(
464
+ threshold_mb=self.config.disk_space_threshold_mb,
465
+ warning_mb=self.config.disk_space_warning_mb,
466
+ )
467
+
468
+ self._readiness_checkers = [specs_checker, disk_checker]
469
+
470
+ # Health checks - full system status
471
+ self._health_checkers = [
472
+ specs_checker,
473
+ disk_checker,
474
+ OpenTelemetryChecker(),
475
+ PrometheusChecker(),
476
+ AIProviderChecker(),
477
+ ]
478
+
479
+ def register_checker(
480
+ self,
481
+ checker: DependencyChecker,
482
+ *,
483
+ liveness: bool = False,
484
+ readiness: bool = False,
485
+ health: bool = True,
486
+ ) -> None:
487
+ """Register a custom dependency checker.
488
+
489
+ Args:
490
+ checker: The dependency checker to register
491
+ liveness: Include in liveness checks
492
+ readiness: Include in readiness checks
493
+ health: Include in full health checks (default True)
494
+ """
495
+ if liveness:
496
+ self._liveness_checkers.append(checker)
497
+ if readiness:
498
+ self._readiness_checkers.append(checker)
499
+ if health:
500
+ self._health_checkers.append(checker)
501
+
502
+ def check_liveness(self) -> HealthResult:
503
+ """Check if the server is alive.
504
+
505
+ Liveness checks are intentionally minimal - if this code runs,
506
+ we're alive. Custom checkers can be added for process-level health.
507
+
508
+ Returns:
509
+ HealthResult indicating liveness status
510
+ """
511
+ if not self.config.enabled:
512
+ return HealthResult(
513
+ status=HealthStatus.HEALTHY,
514
+ is_healthy=True,
515
+ message="Health checks disabled",
516
+ )
517
+
518
+ dependencies = []
519
+ for checker in self._liveness_checkers:
520
+ try:
521
+ result = checker.check(timeout=self.config.liveness_timeout)
522
+ dependencies.append(result)
523
+ except Exception as e:
524
+ dependencies.append(
525
+ DependencyHealth(
526
+ name=checker.name,
527
+ healthy=False,
528
+ status=HealthStatus.UNHEALTHY,
529
+ message=f"Check failed: {e}",
530
+ )
531
+ )
532
+
533
+ # If no liveness checkers, we're alive
534
+ if not dependencies:
535
+ return HealthResult(
536
+ status=HealthStatus.HEALTHY,
537
+ is_healthy=True,
538
+ message="Server is alive",
539
+ )
540
+
541
+ # Check if any are unhealthy
542
+ unhealthy = [d for d in dependencies if not d.healthy]
543
+ if unhealthy:
544
+ return HealthResult(
545
+ status=HealthStatus.UNHEALTHY,
546
+ is_healthy=False,
547
+ message=f"Liveness check failed: {unhealthy[0].message}",
548
+ dependencies=dependencies,
549
+ )
550
+
551
+ return HealthResult(
552
+ status=HealthStatus.HEALTHY,
553
+ is_healthy=True,
554
+ message="Server is alive",
555
+ dependencies=dependencies,
556
+ )
557
+
558
+ def check_readiness(self) -> HealthResult:
559
+ """Check if the server is ready to handle requests.
560
+
561
+ Readiness checks verify critical dependencies are available.
562
+
563
+ Returns:
564
+ HealthResult indicating readiness status
565
+ """
566
+ if not self.config.enabled:
567
+ return HealthResult(
568
+ status=HealthStatus.HEALTHY,
569
+ is_healthy=True,
570
+ message="Health checks disabled",
571
+ )
572
+
573
+ dependencies = []
574
+ for checker in self._readiness_checkers:
575
+ try:
576
+ result = checker.check(timeout=self.config.readiness_timeout)
577
+ dependencies.append(result)
578
+ except Exception as e:
579
+ dependencies.append(
580
+ DependencyHealth(
581
+ name=checker.name,
582
+ healthy=False,
583
+ status=HealthStatus.UNHEALTHY,
584
+ message=f"Check failed: {e}",
585
+ )
586
+ )
587
+
588
+ # Check if any critical dependencies are unhealthy
589
+ unhealthy = [d for d in dependencies if not d.healthy]
590
+ degraded = [d for d in dependencies if d.status == HealthStatus.DEGRADED]
591
+
592
+ if unhealthy:
593
+ return HealthResult(
594
+ status=HealthStatus.UNHEALTHY,
595
+ is_healthy=False,
596
+ message=f"Not ready: {unhealthy[0].message}",
597
+ dependencies=dependencies,
598
+ )
599
+
600
+ if degraded:
601
+ return HealthResult(
602
+ status=HealthStatus.DEGRADED,
603
+ is_healthy=True, # Still ready, but degraded
604
+ message=f"Ready with warnings: {degraded[0].message}",
605
+ dependencies=dependencies,
606
+ )
607
+
608
+ return HealthResult(
609
+ status=HealthStatus.HEALTHY,
610
+ is_healthy=True,
611
+ message="Server is ready",
612
+ dependencies=dependencies,
613
+ )
614
+
615
+ def check_health(self) -> HealthResult:
616
+ """Perform a full health check of all dependencies.
617
+
618
+ Returns:
619
+ HealthResult with complete system health status
620
+ """
621
+ if not self.config.enabled:
622
+ return HealthResult(
623
+ status=HealthStatus.HEALTHY,
624
+ is_healthy=True,
625
+ message="Health checks disabled",
626
+ )
627
+
628
+ dependencies = []
629
+ for checker in self._health_checkers:
630
+ try:
631
+ result = checker.check(timeout=self.config.health_timeout)
632
+ dependencies.append(result)
633
+ except Exception as e:
634
+ dependencies.append(
635
+ DependencyHealth(
636
+ name=checker.name,
637
+ healthy=False,
638
+ status=HealthStatus.UNHEALTHY,
639
+ message=f"Check failed: {e}",
640
+ )
641
+ )
642
+
643
+ # Aggregate status
644
+ unhealthy = [d for d in dependencies if not d.healthy]
645
+ degraded = [d for d in dependencies if d.status == HealthStatus.DEGRADED]
646
+
647
+ if unhealthy:
648
+ return HealthResult(
649
+ status=HealthStatus.UNHEALTHY,
650
+ is_healthy=False,
651
+ message=f"Unhealthy: {len(unhealthy)} failed check(s)",
652
+ dependencies=dependencies,
653
+ details={
654
+ "unhealthy_count": len(unhealthy),
655
+ "degraded_count": len(degraded),
656
+ "healthy_count": len(dependencies)
657
+ - len(unhealthy)
658
+ - len(degraded),
659
+ },
660
+ )
661
+
662
+ if degraded:
663
+ return HealthResult(
664
+ status=HealthStatus.DEGRADED,
665
+ is_healthy=True,
666
+ message=f"Degraded: {len(degraded)} warning(s)",
667
+ dependencies=dependencies,
668
+ details={
669
+ "unhealthy_count": 0,
670
+ "degraded_count": len(degraded),
671
+ "healthy_count": len(dependencies) - len(degraded),
672
+ },
673
+ )
674
+
675
+ return HealthResult(
676
+ status=HealthStatus.HEALTHY,
677
+ is_healthy=True,
678
+ message="All systems healthy",
679
+ dependencies=dependencies,
680
+ details={
681
+ "unhealthy_count": 0,
682
+ "degraded_count": 0,
683
+ "healthy_count": len(dependencies),
684
+ },
685
+ )
686
+
687
+
688
+ # =============================================================================
689
+ # Global Manager Instance
690
+ # =============================================================================
691
+
692
+ _health_manager: Optional[HealthManager] = None
693
+ _manager_lock = __import__("threading").Lock()
694
+
695
+
696
+ def get_health_manager(config: Optional[HealthConfig] = None) -> HealthManager:
697
+ """Get or create the global health manager.
698
+
699
+ Args:
700
+ config: Optional config (only used on first call)
701
+
702
+ Returns:
703
+ Global HealthManager instance
704
+ """
705
+ global _health_manager
706
+ if _health_manager is None:
707
+ with _manager_lock:
708
+ if _health_manager is None:
709
+ _health_manager = HealthManager(config)
710
+ return _health_manager
711
+
712
+
713
+ def reset_health_manager() -> None:
714
+ """Reset the global health manager (for testing)."""
715
+ global _health_manager
716
+ with _manager_lock:
717
+ _health_manager = None
718
+
719
+
720
+ # =============================================================================
721
+ # Convenience Functions
722
+ # =============================================================================
723
+
724
+
725
+ def check_liveness() -> HealthResult:
726
+ """Quick liveness check.
727
+
728
+ Returns:
729
+ HealthResult indicating if server is alive
730
+ """
731
+ return get_health_manager().check_liveness()
732
+
733
+
734
+ def check_readiness() -> HealthResult:
735
+ """Quick readiness check.
736
+
737
+ Returns:
738
+ HealthResult indicating if server is ready
739
+ """
740
+ return get_health_manager().check_readiness()
741
+
742
+
743
+ def check_health() -> HealthResult:
744
+ """Full health check.
745
+
746
+ Returns:
747
+ HealthResult with complete system status
748
+ """
749
+ return get_health_manager().check_health()