foundry-mcp 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. foundry_mcp/__init__.py +7 -0
  2. foundry_mcp/cli/__init__.py +80 -0
  3. foundry_mcp/cli/__main__.py +9 -0
  4. foundry_mcp/cli/agent.py +96 -0
  5. foundry_mcp/cli/commands/__init__.py +37 -0
  6. foundry_mcp/cli/commands/cache.py +137 -0
  7. foundry_mcp/cli/commands/dashboard.py +148 -0
  8. foundry_mcp/cli/commands/dev.py +446 -0
  9. foundry_mcp/cli/commands/journal.py +377 -0
  10. foundry_mcp/cli/commands/lifecycle.py +274 -0
  11. foundry_mcp/cli/commands/modify.py +824 -0
  12. foundry_mcp/cli/commands/plan.py +633 -0
  13. foundry_mcp/cli/commands/pr.py +393 -0
  14. foundry_mcp/cli/commands/review.py +652 -0
  15. foundry_mcp/cli/commands/session.py +479 -0
  16. foundry_mcp/cli/commands/specs.py +856 -0
  17. foundry_mcp/cli/commands/tasks.py +807 -0
  18. foundry_mcp/cli/commands/testing.py +676 -0
  19. foundry_mcp/cli/commands/validate.py +982 -0
  20. foundry_mcp/cli/config.py +98 -0
  21. foundry_mcp/cli/context.py +259 -0
  22. foundry_mcp/cli/flags.py +266 -0
  23. foundry_mcp/cli/logging.py +212 -0
  24. foundry_mcp/cli/main.py +44 -0
  25. foundry_mcp/cli/output.py +122 -0
  26. foundry_mcp/cli/registry.py +110 -0
  27. foundry_mcp/cli/resilience.py +178 -0
  28. foundry_mcp/cli/transcript.py +217 -0
  29. foundry_mcp/config.py +850 -0
  30. foundry_mcp/core/__init__.py +144 -0
  31. foundry_mcp/core/ai_consultation.py +1636 -0
  32. foundry_mcp/core/cache.py +195 -0
  33. foundry_mcp/core/capabilities.py +446 -0
  34. foundry_mcp/core/concurrency.py +898 -0
  35. foundry_mcp/core/context.py +540 -0
  36. foundry_mcp/core/discovery.py +1603 -0
  37. foundry_mcp/core/error_collection.py +728 -0
  38. foundry_mcp/core/error_store.py +592 -0
  39. foundry_mcp/core/feature_flags.py +592 -0
  40. foundry_mcp/core/health.py +749 -0
  41. foundry_mcp/core/journal.py +694 -0
  42. foundry_mcp/core/lifecycle.py +412 -0
  43. foundry_mcp/core/llm_config.py +1350 -0
  44. foundry_mcp/core/llm_patterns.py +510 -0
  45. foundry_mcp/core/llm_provider.py +1569 -0
  46. foundry_mcp/core/logging_config.py +374 -0
  47. foundry_mcp/core/metrics_persistence.py +584 -0
  48. foundry_mcp/core/metrics_registry.py +327 -0
  49. foundry_mcp/core/metrics_store.py +641 -0
  50. foundry_mcp/core/modifications.py +224 -0
  51. foundry_mcp/core/naming.py +123 -0
  52. foundry_mcp/core/observability.py +1216 -0
  53. foundry_mcp/core/otel.py +452 -0
  54. foundry_mcp/core/otel_stubs.py +264 -0
  55. foundry_mcp/core/pagination.py +255 -0
  56. foundry_mcp/core/progress.py +317 -0
  57. foundry_mcp/core/prometheus.py +577 -0
  58. foundry_mcp/core/prompts/__init__.py +464 -0
  59. foundry_mcp/core/prompts/fidelity_review.py +546 -0
  60. foundry_mcp/core/prompts/markdown_plan_review.py +511 -0
  61. foundry_mcp/core/prompts/plan_review.py +623 -0
  62. foundry_mcp/core/providers/__init__.py +225 -0
  63. foundry_mcp/core/providers/base.py +476 -0
  64. foundry_mcp/core/providers/claude.py +460 -0
  65. foundry_mcp/core/providers/codex.py +619 -0
  66. foundry_mcp/core/providers/cursor_agent.py +642 -0
  67. foundry_mcp/core/providers/detectors.py +488 -0
  68. foundry_mcp/core/providers/gemini.py +405 -0
  69. foundry_mcp/core/providers/opencode.py +616 -0
  70. foundry_mcp/core/providers/opencode_wrapper.js +302 -0
  71. foundry_mcp/core/providers/package-lock.json +24 -0
  72. foundry_mcp/core/providers/package.json +25 -0
  73. foundry_mcp/core/providers/registry.py +607 -0
  74. foundry_mcp/core/providers/test_provider.py +171 -0
  75. foundry_mcp/core/providers/validation.py +729 -0
  76. foundry_mcp/core/rate_limit.py +427 -0
  77. foundry_mcp/core/resilience.py +600 -0
  78. foundry_mcp/core/responses.py +934 -0
  79. foundry_mcp/core/review.py +366 -0
  80. foundry_mcp/core/security.py +438 -0
  81. foundry_mcp/core/spec.py +1650 -0
  82. foundry_mcp/core/task.py +1289 -0
  83. foundry_mcp/core/testing.py +450 -0
  84. foundry_mcp/core/validation.py +2081 -0
  85. foundry_mcp/dashboard/__init__.py +32 -0
  86. foundry_mcp/dashboard/app.py +119 -0
  87. foundry_mcp/dashboard/components/__init__.py +17 -0
  88. foundry_mcp/dashboard/components/cards.py +88 -0
  89. foundry_mcp/dashboard/components/charts.py +234 -0
  90. foundry_mcp/dashboard/components/filters.py +136 -0
  91. foundry_mcp/dashboard/components/tables.py +195 -0
  92. foundry_mcp/dashboard/data/__init__.py +11 -0
  93. foundry_mcp/dashboard/data/stores.py +433 -0
  94. foundry_mcp/dashboard/launcher.py +289 -0
  95. foundry_mcp/dashboard/views/__init__.py +12 -0
  96. foundry_mcp/dashboard/views/errors.py +217 -0
  97. foundry_mcp/dashboard/views/metrics.py +174 -0
  98. foundry_mcp/dashboard/views/overview.py +160 -0
  99. foundry_mcp/dashboard/views/providers.py +83 -0
  100. foundry_mcp/dashboard/views/sdd_workflow.py +255 -0
  101. foundry_mcp/dashboard/views/tool_usage.py +139 -0
  102. foundry_mcp/prompts/__init__.py +9 -0
  103. foundry_mcp/prompts/workflows.py +525 -0
  104. foundry_mcp/resources/__init__.py +9 -0
  105. foundry_mcp/resources/specs.py +591 -0
  106. foundry_mcp/schemas/__init__.py +38 -0
  107. foundry_mcp/schemas/sdd-spec-schema.json +386 -0
  108. foundry_mcp/server.py +164 -0
  109. foundry_mcp/tools/__init__.py +10 -0
  110. foundry_mcp/tools/unified/__init__.py +71 -0
  111. foundry_mcp/tools/unified/authoring.py +1487 -0
  112. foundry_mcp/tools/unified/context_helpers.py +98 -0
  113. foundry_mcp/tools/unified/documentation_helpers.py +198 -0
  114. foundry_mcp/tools/unified/environment.py +939 -0
  115. foundry_mcp/tools/unified/error.py +462 -0
  116. foundry_mcp/tools/unified/health.py +225 -0
  117. foundry_mcp/tools/unified/journal.py +841 -0
  118. foundry_mcp/tools/unified/lifecycle.py +632 -0
  119. foundry_mcp/tools/unified/metrics.py +777 -0
  120. foundry_mcp/tools/unified/plan.py +745 -0
  121. foundry_mcp/tools/unified/pr.py +294 -0
  122. foundry_mcp/tools/unified/provider.py +629 -0
  123. foundry_mcp/tools/unified/review.py +685 -0
  124. foundry_mcp/tools/unified/review_helpers.py +299 -0
  125. foundry_mcp/tools/unified/router.py +102 -0
  126. foundry_mcp/tools/unified/server.py +580 -0
  127. foundry_mcp/tools/unified/spec.py +808 -0
  128. foundry_mcp/tools/unified/task.py +2202 -0
  129. foundry_mcp/tools/unified/test.py +370 -0
  130. foundry_mcp/tools/unified/verification.py +520 -0
  131. foundry_mcp-0.3.3.dist-info/METADATA +337 -0
  132. foundry_mcp-0.3.3.dist-info/RECORD +135 -0
  133. foundry_mcp-0.3.3.dist-info/WHEEL +4 -0
  134. foundry_mcp-0.3.3.dist-info/entry_points.txt +3 -0
  135. foundry_mcp-0.3.3.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,600 @@
1
+ """
2
+ Resilience primitives for MCP tool operations.
3
+
4
+ Provides timeout budgets, retry patterns, circuit breakers, and health checks
5
+ for building robust MCP tools that handle failures gracefully.
6
+
7
+ Timeout Budget Categories
8
+ =========================
9
+
10
+ Use the appropriate timeout category based on operation type:
11
+
12
+ FAST_TIMEOUT (5s) - Cache lookups, simple queries
13
+ MEDIUM_TIMEOUT (30s) - Database operations, API calls
14
+ SLOW_TIMEOUT (120s) - File processing, complex operations
15
+ BACKGROUND_TIMEOUT (600s) - Batch jobs, large transfers
16
+
17
+ Example usage:
18
+
19
+ from foundry_mcp.core.resilience import (
20
+ MEDIUM_TIMEOUT,
21
+ with_timeout,
22
+ retry_with_backoff,
23
+ CircuitBreaker,
24
+ )
25
+
26
+ @mcp.tool()
27
+ @with_timeout(MEDIUM_TIMEOUT, "Database query timed out")
28
+ async def query_database(query: str) -> dict:
29
+ result = await db.execute(query)
30
+ return asdict(success_response(data={"result": result}))
31
+ """
32
+
33
+ from dataclasses import dataclass, field
34
+ from datetime import datetime
35
+ from enum import Enum
36
+ from functools import wraps
37
+ from threading import Lock
38
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar
39
+ import asyncio
40
+ import random
41
+ import time
42
+
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # Timeout Budget Constants
46
+ # ---------------------------------------------------------------------------
47
+
48
+ #: Fast operations: cache lookups, simple queries (default 5s, max 10s)
49
+ FAST_TIMEOUT: float = 5.0
50
+ FAST_TIMEOUT_MAX: float = 10.0
51
+
52
+ #: Medium operations: database ops, API calls (default 30s, max 60s)
53
+ MEDIUM_TIMEOUT: float = 30.0
54
+ MEDIUM_TIMEOUT_MAX: float = 60.0
55
+
56
+ #: Slow operations: file processing, complex operations (default 120s, max 300s)
57
+ SLOW_TIMEOUT: float = 120.0
58
+ SLOW_TIMEOUT_MAX: float = 300.0
59
+
60
+ #: Background operations: batch jobs, large transfers (default 600s, max 3600s)
61
+ BACKGROUND_TIMEOUT: float = 600.0
62
+ BACKGROUND_TIMEOUT_MAX: float = 3600.0
63
+
64
+
65
+ T = TypeVar("T")
66
+
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # Timeout Error
70
+ # ---------------------------------------------------------------------------
71
+
72
+
73
+ class TimeoutException(Exception):
74
+ """Operation timed out.
75
+
76
+ Attributes:
77
+ timeout_seconds: The timeout duration that was exceeded.
78
+ operation: Name of the operation that timed out.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ message: str,
84
+ timeout_seconds: Optional[float] = None,
85
+ operation: Optional[str] = None,
86
+ ):
87
+ super().__init__(message)
88
+ self.timeout_seconds = timeout_seconds
89
+ self.operation = operation
90
+
91
+
92
+ # ---------------------------------------------------------------------------
93
+ # Timeout Decorator
94
+ # ---------------------------------------------------------------------------
95
+
96
+
97
+ def with_timeout(
98
+ seconds: float,
99
+ error_message: Optional[str] = None,
100
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
101
+ """Decorator to add timeout to async functions.
102
+
103
+ Uses asyncio.wait_for to enforce timeout on async operations.
104
+ On timeout, raises TimeoutException with details.
105
+
106
+ Args:
107
+ seconds: Timeout duration in seconds.
108
+ error_message: Custom error message (defaults to function name).
109
+
110
+ Returns:
111
+ Decorated async function with timeout enforcement.
112
+
113
+ Example:
114
+ >>> @with_timeout(30, "Database query timed out")
115
+ ... async def query_database(query: str):
116
+ ... return await db.execute(query)
117
+
118
+ Raises:
119
+ TimeoutException: If the operation exceeds the timeout.
120
+ """
121
+
122
+ def decorator(func: Callable[..., T]) -> Callable[..., T]:
123
+ @wraps(func)
124
+ async def wrapper(*args: Any, **kwargs: Any) -> T:
125
+ try:
126
+ return await asyncio.wait_for(
127
+ func(*args, **kwargs),
128
+ timeout=seconds,
129
+ )
130
+ except asyncio.TimeoutError:
131
+ msg = error_message or f"{func.__name__} timed out after {seconds}s"
132
+ raise TimeoutException(
133
+ msg,
134
+ timeout_seconds=seconds,
135
+ operation=func.__name__,
136
+ )
137
+
138
+ return wrapper
139
+
140
+ return decorator
141
+
142
+
143
+ # ---------------------------------------------------------------------------
144
+ # Retry with Backoff
145
+ # ---------------------------------------------------------------------------
146
+
147
+
148
+ def retry_with_backoff(
149
+ func: Callable[..., T],
150
+ *,
151
+ max_retries: int = 3,
152
+ base_delay: float = 1.0,
153
+ max_delay: float = 60.0,
154
+ exponential_base: float = 2.0,
155
+ jitter: bool = True,
156
+ retryable_exceptions: Optional[List[Type[Exception]]] = None,
157
+ ) -> T:
158
+ """Retry a function with exponential backoff.
159
+
160
+ Retries the function on failure with increasing delays between attempts.
161
+ Supports jitter to prevent thundering herd problems.
162
+
163
+ Args:
164
+ func: Function to retry (should take no arguments; use lambda for args).
165
+ max_retries: Maximum number of retry attempts (default 3).
166
+ base_delay: Initial delay in seconds (default 1.0).
167
+ max_delay: Maximum delay cap in seconds (default 60.0).
168
+ exponential_base: Multiplier for each retry (default 2.0).
169
+ jitter: Add randomness to delay (default True).
170
+ retryable_exceptions: List of exceptions to retry on (default: all).
171
+
172
+ Returns:
173
+ Result from the function on success.
174
+
175
+ Raises:
176
+ Exception: The last exception if all retries exhausted.
177
+
178
+ Example:
179
+ >>> result = retry_with_backoff(
180
+ ... lambda: http_client.get(url),
181
+ ... max_retries=3,
182
+ ... retryable_exceptions=[ConnectionError, TimeoutException],
183
+ ... )
184
+ """
185
+ retryable = tuple(retryable_exceptions or [Exception])
186
+ last_exception: Optional[Exception] = None
187
+
188
+ for attempt in range(max_retries + 1):
189
+ try:
190
+ return func()
191
+ except retryable as e:
192
+ last_exception = e
193
+
194
+ if attempt == max_retries:
195
+ break
196
+
197
+ # Calculate delay with exponential backoff
198
+ delay = min(base_delay * (exponential_base**attempt), max_delay)
199
+
200
+ # Add jitter to prevent thundering herd
201
+ if jitter:
202
+ delay = delay * (0.5 + random.random())
203
+
204
+ time.sleep(delay)
205
+
206
+ # All retries exhausted
207
+ if last_exception:
208
+ raise last_exception
209
+ raise RuntimeError("retry_with_backoff: unexpected state")
210
+
211
+
212
+ def retryable(
213
+ max_retries: int = 3,
214
+ delay: float = 1.0,
215
+ exceptions: Tuple[Type[Exception], ...] = (Exception,),
216
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
217
+ """Decorator for automatic retries with exponential backoff.
218
+
219
+ Args:
220
+ max_retries: Maximum retry attempts (default 3).
221
+ delay: Base delay in seconds (default 1.0).
222
+ exceptions: Tuple of exceptions to retry on.
223
+
224
+ Returns:
225
+ Decorated function with retry logic.
226
+
227
+ Example:
228
+ >>> @retryable(max_retries=3, exceptions=(ConnectionError,))
229
+ ... def call_api(endpoint: str):
230
+ ... return http_client.get(endpoint)
231
+ """
232
+
233
+ def decorator(func: Callable[..., T]) -> Callable[..., T]:
234
+ @wraps(func)
235
+ def wrapper(*args: Any, **kwargs: Any) -> T:
236
+ return retry_with_backoff(
237
+ lambda: func(*args, **kwargs),
238
+ max_retries=max_retries,
239
+ base_delay=delay,
240
+ retryable_exceptions=list(exceptions),
241
+ )
242
+
243
+ return wrapper
244
+
245
+ return decorator
246
+
247
+
248
+ # ---------------------------------------------------------------------------
249
+ # Circuit Breaker
250
+ # ---------------------------------------------------------------------------
251
+
252
+
253
+ class CircuitState(Enum):
254
+ """Circuit breaker states.
255
+
256
+ CLOSED: Normal operation, requests flow through.
257
+ OPEN: Failures exceeded threshold, requests rejected.
258
+ HALF_OPEN: Testing recovery, limited requests allowed.
259
+ """
260
+
261
+ CLOSED = "closed"
262
+ OPEN = "open"
263
+ HALF_OPEN = "half_open"
264
+
265
+
266
+ class CircuitBreakerError(Exception):
267
+ """Circuit breaker is open and rejecting requests.
268
+
269
+ Attributes:
270
+ breaker_name: Name of the circuit breaker.
271
+ state: Current state of the breaker.
272
+ retry_after: Seconds until recovery timeout.
273
+ """
274
+
275
+ def __init__(
276
+ self,
277
+ message: str,
278
+ breaker_name: Optional[str] = None,
279
+ state: Optional[CircuitState] = None,
280
+ retry_after: Optional[float] = None,
281
+ ):
282
+ super().__init__(message)
283
+ self.breaker_name = breaker_name
284
+ self.state = state
285
+ self.retry_after = retry_after
286
+
287
+
288
+ @dataclass
289
+ class CircuitBreaker:
290
+ """Circuit breaker for external dependencies.
291
+
292
+ Prevents cascade failures by tracking failures and temporarily
293
+ blocking requests when a dependency is unhealthy.
294
+
295
+ States:
296
+ CLOSED: Normal operation, requests pass through.
297
+ OPEN: Too many failures, requests rejected immediately.
298
+ HALF_OPEN: Testing recovery, limited requests allowed.
299
+
300
+ Attributes:
301
+ name: Identifier for this circuit breaker.
302
+ failure_threshold: Failures before opening circuit (default 5).
303
+ recovery_timeout: Seconds before testing recovery (default 30).
304
+ half_open_max_calls: Test calls allowed in half-open (default 3).
305
+
306
+ Example:
307
+ >>> breaker = CircuitBreaker(name="database")
308
+ >>>
309
+ >>> if breaker.can_execute():
310
+ ... try:
311
+ ... result = db.query()
312
+ ... breaker.record_success()
313
+ ... except Exception:
314
+ ... breaker.record_failure()
315
+ ... raise
316
+ ... else:
317
+ ... raise CircuitBreakerError("Database circuit open")
318
+ """
319
+
320
+ name: str = "default"
321
+ failure_threshold: int = 5
322
+ recovery_timeout: float = 30.0
323
+ half_open_max_calls: int = 3
324
+
325
+ # Internal state (initialized in __post_init__)
326
+ state: CircuitState = field(default=CircuitState.CLOSED, init=False)
327
+ failure_count: int = field(default=0, init=False)
328
+ last_failure_time: float = field(default=0.0, init=False)
329
+ half_open_calls: int = field(default=0, init=False)
330
+ _lock: Lock = field(default_factory=Lock, init=False)
331
+
332
+ def can_execute(self) -> bool:
333
+ """Check if request should proceed.
334
+
335
+ Returns:
336
+ True if request can proceed, False if circuit is open.
337
+ """
338
+ with self._lock:
339
+ if self.state == CircuitState.CLOSED:
340
+ return True
341
+
342
+ if self.state == CircuitState.OPEN:
343
+ # Check if recovery timeout has elapsed
344
+ if time.time() - self.last_failure_time >= self.recovery_timeout:
345
+ self.state = CircuitState.HALF_OPEN
346
+ self.half_open_calls = 0
347
+ return True
348
+ return False
349
+
350
+ if self.state == CircuitState.HALF_OPEN:
351
+ if self.half_open_calls < self.half_open_max_calls:
352
+ self.half_open_calls += 1
353
+ return True
354
+ return False
355
+
356
+ return False
357
+
358
+ def record_success(self) -> None:
359
+ """Record successful call.
360
+
361
+ In HALF_OPEN state, successful calls contribute to recovery.
362
+ Once enough calls succeed, circuit closes.
363
+ Note: half_open_calls is already incremented in can_execute().
364
+ """
365
+ with self._lock:
366
+ if self.state == CircuitState.HALF_OPEN:
367
+ # Check if enough successful calls for recovery
368
+ # (counter already incremented in can_execute)
369
+ if self.half_open_calls >= self.half_open_max_calls:
370
+ # Recovery successful
371
+ self.state = CircuitState.CLOSED
372
+ self.failure_count = 0
373
+ else:
374
+ # Reset failure count on success
375
+ self.failure_count = 0
376
+
377
+ def record_failure(self) -> None:
378
+ """Record failed call.
379
+
380
+ Increments failure count. If threshold exceeded, opens circuit.
381
+ In HALF_OPEN state, any failure returns to OPEN.
382
+ """
383
+ with self._lock:
384
+ self.failure_count += 1
385
+ self.last_failure_time = time.time()
386
+
387
+ if self.state == CircuitState.HALF_OPEN:
388
+ # Recovery failed, back to open
389
+ self.state = CircuitState.OPEN
390
+
391
+ elif self.failure_count >= self.failure_threshold:
392
+ self.state = CircuitState.OPEN
393
+
394
+ def reset(self) -> None:
395
+ """Reset circuit breaker to closed state."""
396
+ with self._lock:
397
+ self.state = CircuitState.CLOSED
398
+ self.failure_count = 0
399
+ self.half_open_calls = 0
400
+ self.last_failure_time = 0.0
401
+
402
+ def get_status(self) -> Dict[str, Any]:
403
+ """Get current circuit breaker status.
404
+
405
+ Returns:
406
+ Dict with state, failure_count, and other metrics.
407
+ """
408
+ with self._lock:
409
+ retry_after = None
410
+ if self.state == CircuitState.OPEN:
411
+ elapsed = time.time() - self.last_failure_time
412
+ retry_after = max(0.0, self.recovery_timeout - elapsed)
413
+
414
+ return {
415
+ "name": self.name,
416
+ "state": self.state.value,
417
+ "failure_count": self.failure_count,
418
+ "failure_threshold": self.failure_threshold,
419
+ "recovery_timeout": self.recovery_timeout,
420
+ "retry_after_seconds": retry_after,
421
+ }
422
+
423
+
424
+ def with_circuit_breaker(
425
+ breaker: CircuitBreaker,
426
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
427
+ """Decorator to wrap function with circuit breaker protection.
428
+
429
+ Args:
430
+ breaker: CircuitBreaker instance to use.
431
+
432
+ Returns:
433
+ Decorated function that checks circuit before execution.
434
+
435
+ Example:
436
+ >>> db_breaker = CircuitBreaker(name="database", failure_threshold=3)
437
+ >>>
438
+ >>> @with_circuit_breaker(db_breaker)
439
+ ... def query_database(sql: str):
440
+ ... return db.execute(sql)
441
+
442
+ Raises:
443
+ CircuitBreakerError: If circuit is open and rejecting requests.
444
+ """
445
+
446
+ def decorator(func: Callable[..., T]) -> Callable[..., T]:
447
+ @wraps(func)
448
+ def wrapper(*args: Any, **kwargs: Any) -> T:
449
+ if not breaker.can_execute():
450
+ status = breaker.get_status()
451
+ raise CircuitBreakerError(
452
+ f"Circuit breaker '{breaker.name}' is open",
453
+ breaker_name=breaker.name,
454
+ state=breaker.state,
455
+ retry_after=status.get("retry_after_seconds"),
456
+ )
457
+
458
+ try:
459
+ result = func(*args, **kwargs)
460
+ breaker.record_success()
461
+ return result
462
+ except Exception:
463
+ breaker.record_failure()
464
+ raise
465
+
466
+ return wrapper
467
+
468
+ return decorator
469
+
470
+
471
+ # ---------------------------------------------------------------------------
472
+ # Health Check Utilities
473
+ # ---------------------------------------------------------------------------
474
+
475
+
476
+ @dataclass
477
+ class HealthStatus:
478
+ """Health status for a dependency.
479
+
480
+ Attributes:
481
+ name: Dependency identifier.
482
+ healthy: Whether dependency is healthy.
483
+ latency_ms: Check latency in milliseconds.
484
+ last_check: Timestamp of the check.
485
+ error: Error message if unhealthy.
486
+ """
487
+
488
+ name: str
489
+ healthy: bool
490
+ latency_ms: float
491
+ last_check: datetime
492
+ error: Optional[str] = None
493
+
494
+
495
+ async def health_check(
496
+ name: str,
497
+ check_func: Callable[[], Any],
498
+ timeout: float = FAST_TIMEOUT,
499
+ ) -> HealthStatus:
500
+ """Check health of a dependency with timeout.
501
+
502
+ Args:
503
+ name: Identifier for the dependency.
504
+ check_func: Async callable that tests dependency health.
505
+ timeout: Maximum time to wait for check (default FAST_TIMEOUT).
506
+
507
+ Returns:
508
+ HealthStatus with check results.
509
+
510
+ Example:
511
+ >>> status = await health_check(
512
+ ... "database",
513
+ ... lambda: db.execute("SELECT 1"),
514
+ ... )
515
+ >>> if not status.healthy:
516
+ ... logger.warning(f"DB unhealthy: {status.error}")
517
+ """
518
+ start = time.perf_counter()
519
+ try:
520
+ result = check_func()
521
+ # Handle both sync and async callables
522
+ if asyncio.iscoroutine(result):
523
+ await asyncio.wait_for(result, timeout=timeout)
524
+ latency = (time.perf_counter() - start) * 1000
525
+
526
+ return HealthStatus(
527
+ name=name,
528
+ healthy=True,
529
+ latency_ms=latency,
530
+ last_check=datetime.utcnow(),
531
+ )
532
+ except asyncio.TimeoutError:
533
+ latency = (time.perf_counter() - start) * 1000
534
+ return HealthStatus(
535
+ name=name,
536
+ healthy=False,
537
+ latency_ms=latency,
538
+ last_check=datetime.utcnow(),
539
+ error=f"Health check timed out after {timeout}s",
540
+ )
541
+ except Exception as e:
542
+ latency = (time.perf_counter() - start) * 1000
543
+ return HealthStatus(
544
+ name=name,
545
+ healthy=False,
546
+ latency_ms=latency,
547
+ last_check=datetime.utcnow(),
548
+ error=str(e),
549
+ )
550
+
551
+
552
+ async def check_dependencies(
553
+ checks: Dict[str, Callable[[], Any]],
554
+ timeout_per_check: float = FAST_TIMEOUT,
555
+ ) -> Dict[str, Any]:
556
+ """Check health of multiple dependencies concurrently.
557
+
558
+ Args:
559
+ checks: Dict mapping dependency names to check functions.
560
+ timeout_per_check: Timeout per individual check.
561
+
562
+ Returns:
563
+ Dict with overall status and per-dependency results.
564
+
565
+ Example:
566
+ >>> results = await check_dependencies({
567
+ ... "database": lambda: db.execute("SELECT 1"),
568
+ ... "cache": lambda: cache.ping(),
569
+ ... "api": lambda: http.get(health_url),
570
+ ... })
571
+ >>> if results["status"] == "degraded":
572
+ ... logger.warning(f"Unhealthy: {results['unhealthy']}")
573
+ """
574
+ results: Dict[str, Dict[str, Any]] = {}
575
+
576
+ # Run all checks concurrently
577
+ statuses = await asyncio.gather(
578
+ *[
579
+ health_check(name, check_func, timeout_per_check)
580
+ for name, check_func in checks.items()
581
+ ],
582
+ return_exceptions=False,
583
+ )
584
+
585
+ unhealthy: List[str] = []
586
+ for status in statuses:
587
+ results[status.name] = {
588
+ "healthy": status.healthy,
589
+ "latency_ms": round(status.latency_ms, 2),
590
+ "error": status.error,
591
+ }
592
+ if not status.healthy:
593
+ unhealthy.append(status.name)
594
+
595
+ return {
596
+ "status": "healthy" if not unhealthy else "degraded",
597
+ "dependencies": results,
598
+ "unhealthy": unhealthy,
599
+ "checked_at": datetime.utcnow().isoformat(),
600
+ }