mcp-hangar 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. mcp_hangar/__init__.py +139 -0
  2. mcp_hangar/application/__init__.py +1 -0
  3. mcp_hangar/application/commands/__init__.py +67 -0
  4. mcp_hangar/application/commands/auth_commands.py +118 -0
  5. mcp_hangar/application/commands/auth_handlers.py +296 -0
  6. mcp_hangar/application/commands/commands.py +59 -0
  7. mcp_hangar/application/commands/handlers.py +189 -0
  8. mcp_hangar/application/discovery/__init__.py +21 -0
  9. mcp_hangar/application/discovery/discovery_metrics.py +283 -0
  10. mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
  11. mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
  12. mcp_hangar/application/discovery/security_validator.py +414 -0
  13. mcp_hangar/application/event_handlers/__init__.py +50 -0
  14. mcp_hangar/application/event_handlers/alert_handler.py +191 -0
  15. mcp_hangar/application/event_handlers/audit_handler.py +203 -0
  16. mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
  17. mcp_hangar/application/event_handlers/logging_handler.py +69 -0
  18. mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
  19. mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
  20. mcp_hangar/application/event_handlers/security_handler.py +604 -0
  21. mcp_hangar/application/mcp/tooling.py +158 -0
  22. mcp_hangar/application/ports/__init__.py +9 -0
  23. mcp_hangar/application/ports/observability.py +237 -0
  24. mcp_hangar/application/queries/__init__.py +52 -0
  25. mcp_hangar/application/queries/auth_handlers.py +237 -0
  26. mcp_hangar/application/queries/auth_queries.py +118 -0
  27. mcp_hangar/application/queries/handlers.py +227 -0
  28. mcp_hangar/application/read_models/__init__.py +11 -0
  29. mcp_hangar/application/read_models/provider_views.py +139 -0
  30. mcp_hangar/application/sagas/__init__.py +11 -0
  31. mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
  32. mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
  33. mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
  34. mcp_hangar/application/services/__init__.py +9 -0
  35. mcp_hangar/application/services/provider_service.py +208 -0
  36. mcp_hangar/application/services/traced_provider_service.py +211 -0
  37. mcp_hangar/bootstrap/runtime.py +328 -0
  38. mcp_hangar/context.py +178 -0
  39. mcp_hangar/domain/__init__.py +117 -0
  40. mcp_hangar/domain/contracts/__init__.py +57 -0
  41. mcp_hangar/domain/contracts/authentication.py +225 -0
  42. mcp_hangar/domain/contracts/authorization.py +229 -0
  43. mcp_hangar/domain/contracts/event_store.py +178 -0
  44. mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
  45. mcp_hangar/domain/contracts/persistence.py +383 -0
  46. mcp_hangar/domain/contracts/provider_runtime.py +146 -0
  47. mcp_hangar/domain/discovery/__init__.py +20 -0
  48. mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
  49. mcp_hangar/domain/discovery/discovered_provider.py +185 -0
  50. mcp_hangar/domain/discovery/discovery_service.py +412 -0
  51. mcp_hangar/domain/discovery/discovery_source.py +192 -0
  52. mcp_hangar/domain/events.py +433 -0
  53. mcp_hangar/domain/exceptions.py +525 -0
  54. mcp_hangar/domain/model/__init__.py +70 -0
  55. mcp_hangar/domain/model/aggregate.py +58 -0
  56. mcp_hangar/domain/model/circuit_breaker.py +152 -0
  57. mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
  58. mcp_hangar/domain/model/event_sourced_provider.py +423 -0
  59. mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
  60. mcp_hangar/domain/model/health_tracker.py +183 -0
  61. mcp_hangar/domain/model/load_balancer.py +185 -0
  62. mcp_hangar/domain/model/provider.py +810 -0
  63. mcp_hangar/domain/model/provider_group.py +656 -0
  64. mcp_hangar/domain/model/tool_catalog.py +105 -0
  65. mcp_hangar/domain/policies/__init__.py +19 -0
  66. mcp_hangar/domain/policies/provider_health.py +187 -0
  67. mcp_hangar/domain/repository.py +249 -0
  68. mcp_hangar/domain/security/__init__.py +85 -0
  69. mcp_hangar/domain/security/input_validator.py +710 -0
  70. mcp_hangar/domain/security/rate_limiter.py +387 -0
  71. mcp_hangar/domain/security/roles.py +237 -0
  72. mcp_hangar/domain/security/sanitizer.py +387 -0
  73. mcp_hangar/domain/security/secrets.py +501 -0
  74. mcp_hangar/domain/services/__init__.py +20 -0
  75. mcp_hangar/domain/services/audit_service.py +376 -0
  76. mcp_hangar/domain/services/image_builder.py +328 -0
  77. mcp_hangar/domain/services/provider_launcher.py +1046 -0
  78. mcp_hangar/domain/value_objects.py +1138 -0
  79. mcp_hangar/errors.py +818 -0
  80. mcp_hangar/fastmcp_server.py +1105 -0
  81. mcp_hangar/gc.py +134 -0
  82. mcp_hangar/infrastructure/__init__.py +79 -0
  83. mcp_hangar/infrastructure/async_executor.py +133 -0
  84. mcp_hangar/infrastructure/auth/__init__.py +37 -0
  85. mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
  86. mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
  87. mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
  88. mcp_hangar/infrastructure/auth/middleware.py +340 -0
  89. mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
  90. mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
  91. mcp_hangar/infrastructure/auth/projections.py +366 -0
  92. mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
  93. mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
  94. mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
  95. mcp_hangar/infrastructure/command_bus.py +112 -0
  96. mcp_hangar/infrastructure/discovery/__init__.py +110 -0
  97. mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
  98. mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
  99. mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
  100. mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
  101. mcp_hangar/infrastructure/event_bus.py +260 -0
  102. mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
  103. mcp_hangar/infrastructure/event_store.py +396 -0
  104. mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
  105. mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
  106. mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
  107. mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
  108. mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
  109. mcp_hangar/infrastructure/metrics_publisher.py +36 -0
  110. mcp_hangar/infrastructure/observability/__init__.py +10 -0
  111. mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
  112. mcp_hangar/infrastructure/persistence/__init__.py +33 -0
  113. mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
  114. mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
  115. mcp_hangar/infrastructure/persistence/database.py +333 -0
  116. mcp_hangar/infrastructure/persistence/database_common.py +330 -0
  117. mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
  118. mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
  119. mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
  120. mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
  121. mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
  122. mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
  123. mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
  124. mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
  125. mcp_hangar/infrastructure/query_bus.py +153 -0
  126. mcp_hangar/infrastructure/saga_manager.py +401 -0
  127. mcp_hangar/logging_config.py +209 -0
  128. mcp_hangar/metrics.py +1007 -0
  129. mcp_hangar/models.py +31 -0
  130. mcp_hangar/observability/__init__.py +54 -0
  131. mcp_hangar/observability/health.py +487 -0
  132. mcp_hangar/observability/metrics.py +319 -0
  133. mcp_hangar/observability/tracing.py +433 -0
  134. mcp_hangar/progress.py +542 -0
  135. mcp_hangar/retry.py +613 -0
  136. mcp_hangar/server/__init__.py +120 -0
  137. mcp_hangar/server/__main__.py +6 -0
  138. mcp_hangar/server/auth_bootstrap.py +340 -0
  139. mcp_hangar/server/auth_cli.py +335 -0
  140. mcp_hangar/server/auth_config.py +305 -0
  141. mcp_hangar/server/bootstrap.py +735 -0
  142. mcp_hangar/server/cli.py +161 -0
  143. mcp_hangar/server/config.py +224 -0
  144. mcp_hangar/server/context.py +215 -0
  145. mcp_hangar/server/http_auth_middleware.py +165 -0
  146. mcp_hangar/server/lifecycle.py +467 -0
  147. mcp_hangar/server/state.py +117 -0
  148. mcp_hangar/server/tools/__init__.py +16 -0
  149. mcp_hangar/server/tools/discovery.py +186 -0
  150. mcp_hangar/server/tools/groups.py +75 -0
  151. mcp_hangar/server/tools/health.py +301 -0
  152. mcp_hangar/server/tools/provider.py +939 -0
  153. mcp_hangar/server/tools/registry.py +320 -0
  154. mcp_hangar/server/validation.py +113 -0
  155. mcp_hangar/stdio_client.py +229 -0
  156. mcp_hangar-0.2.0.dist-info/METADATA +347 -0
  157. mcp_hangar-0.2.0.dist-info/RECORD +160 -0
  158. mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
  159. mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
  160. mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
mcp_hangar/retry.py ADDED
@@ -0,0 +1,613 @@
1
+ """Automatic retry with exponential backoff.
2
+
3
+ This module provides retry functionality for transient failures,
4
+ including:
5
+
6
+ - Configurable retry policies
7
+ - Exponential, linear, and constant backoff strategies
8
+ - Per-provider retry configuration
9
+ - Circuit breaker integration
10
+
11
+ Usage example::
12
+
13
+ from mcp_hangar import RetryPolicy, BackoffStrategy, with_retry
14
+
15
+ policy = RetryPolicy(
16
+ max_attempts=3,
17
+ backoff=BackoffStrategy.EXPONENTIAL
18
+ )
19
+
20
+ @with_retry(policy)
21
+ def call_provider():
22
+ return risky_operation()
23
+
24
+ See docs/guides/UX_IMPROVEMENTS.md for more examples.
25
+ """
26
+
27
+ import asyncio
28
+ from dataclasses import dataclass, field
29
+ from enum import Enum
30
+ import time
31
+ from typing import Any, Callable, Dict, List, Optional, TypeVar
32
+
33
+ from .errors import is_retryable
34
+ from .logging_config import get_logger
35
+
36
+ logger = get_logger(__name__)
37
+
38
+ T = TypeVar("T")
39
+
40
+
41
+ class BackoffStrategy(str, Enum):
42
+ """Backoff strategy for retries."""
43
+
44
+ EXPONENTIAL = "exponential"
45
+ LINEAR = "linear"
46
+ CONSTANT = "constant"
47
+
48
+
49
+ @dataclass
50
+ class RetryPolicy:
51
+ """Configuration for automatic retry behavior.
52
+
53
+ Attributes:
54
+ max_attempts: Maximum number of attempts (including initial)
55
+ backoff: Backoff strategy (exponential, linear, constant)
56
+ initial_delay: Initial delay in seconds before first retry
57
+ max_delay: Maximum delay cap in seconds
58
+ retry_on: List of error types to retry on
59
+ jitter: Whether to add random jitter to delays
60
+ jitter_factor: Jitter factor (0.0 to 1.0)
61
+ """
62
+
63
+ max_attempts: int = 3
64
+ backoff: BackoffStrategy = BackoffStrategy.EXPONENTIAL
65
+ initial_delay: float = 1.0
66
+ max_delay: float = 30.0
67
+ retry_on: List[str] = field(
68
+ default_factory=lambda: [
69
+ "MalformedJSON",
70
+ "JSONDecodeError",
71
+ "Timeout",
72
+ "TimeoutError",
73
+ "ConnectionError",
74
+ "ProviderNotResponding",
75
+ "TransientError",
76
+ "ProviderProtocolError",
77
+ "NetworkError",
78
+ ]
79
+ )
80
+ jitter: bool = True
81
+ jitter_factor: float = 0.25
82
+
83
+ @classmethod
84
+ def from_dict(cls, data: Dict[str, Any]) -> "RetryPolicy":
85
+ """Create RetryPolicy from dictionary (e.g., from config.yaml)."""
86
+ backoff = data.get("backoff", "exponential")
87
+ if isinstance(backoff, str):
88
+ backoff = BackoffStrategy(backoff)
89
+
90
+ default_retry_on = [
91
+ "MalformedJSON",
92
+ "JSONDecodeError",
93
+ "Timeout",
94
+ "TimeoutError",
95
+ "ConnectionError",
96
+ "ProviderNotResponding",
97
+ "TransientError",
98
+ "ProviderProtocolError",
99
+ "NetworkError",
100
+ ]
101
+
102
+ return cls(
103
+ max_attempts=data.get("max_attempts", 3),
104
+ backoff=backoff,
105
+ initial_delay=data.get("initial_delay", 1.0),
106
+ max_delay=data.get("max_delay", 30.0),
107
+ retry_on=data.get("retry_on", default_retry_on),
108
+ jitter=data.get("jitter", True),
109
+ jitter_factor=data.get("jitter_factor", 0.25),
110
+ )
111
+
112
+ def to_dict(self) -> Dict[str, Any]:
113
+ """Convert to dictionary for serialization."""
114
+ return {
115
+ "max_attempts": self.max_attempts,
116
+ "backoff": self.backoff.value if isinstance(self.backoff, BackoffStrategy) else self.backoff,
117
+ "initial_delay": self.initial_delay,
118
+ "max_delay": self.max_delay,
119
+ "retry_on": self.retry_on,
120
+ "jitter": self.jitter,
121
+ "jitter_factor": self.jitter_factor,
122
+ }
123
+
124
+
125
+ @dataclass
126
+ class RetryAttempt:
127
+ """Record of a single retry attempt."""
128
+
129
+ attempt_number: int
130
+ error_type: str
131
+ error_message: str
132
+ delay_before: float
133
+ timestamp: float = field(default_factory=time.time)
134
+
135
+
136
+ @dataclass
137
+ class RetryResult:
138
+ """Result of a retry operation."""
139
+
140
+ success: bool
141
+ result: Any = None
142
+ final_error: Optional[Exception] = None
143
+ attempts: List[RetryAttempt] = field(default_factory=list)
144
+ total_time_s: float = 0.0
145
+
146
+ @property
147
+ def attempt_count(self) -> int:
148
+ """Total number of attempts made."""
149
+ return len(self.attempts) + (1 if self.success else 0)
150
+
151
+ def to_dict(self) -> Dict[str, Any]:
152
+ """Convert to dictionary for logging/reporting."""
153
+ return {
154
+ "success": self.success,
155
+ "attempt_count": self.attempt_count,
156
+ "total_time_s": self.total_time_s,
157
+ "attempts": [
158
+ {
159
+ "attempt": a.attempt_number,
160
+ "error_type": a.error_type,
161
+ "error_message": a.error_message[:100],
162
+ "delay_before": a.delay_before,
163
+ }
164
+ for a in self.attempts
165
+ ],
166
+ "final_error": str(self.final_error) if self.final_error else None,
167
+ }
168
+
169
+
170
+ def calculate_backoff(
171
+ attempt: int,
172
+ strategy: BackoffStrategy,
173
+ initial_delay: float,
174
+ max_delay: float,
175
+ jitter: bool = True,
176
+ jitter_factor: float = 0.25,
177
+ ) -> float:
178
+ """Calculate delay before next retry.
179
+
180
+ Args:
181
+ attempt: Current attempt number (0-indexed)
182
+ strategy: Backoff strategy
183
+ initial_delay: Base delay in seconds
184
+ max_delay: Maximum delay cap
185
+ jitter: Whether to add random jitter
186
+ jitter_factor: Jitter range (e.g., 0.25 = ±25%)
187
+
188
+ Returns:
189
+ Delay in seconds
190
+ """
191
+ if strategy == BackoffStrategy.EXPONENTIAL:
192
+ # min(initial_delay * 2^attempt, max_delay)
193
+ delay = min(initial_delay * (2**attempt), max_delay)
194
+ elif strategy == BackoffStrategy.LINEAR:
195
+ # initial_delay * (attempt + 1), capped at max_delay
196
+ delay = min(initial_delay * (attempt + 1), max_delay)
197
+ else: # CONSTANT
198
+ delay = initial_delay
199
+
200
+ if jitter and jitter_factor > 0:
201
+ import random
202
+
203
+ jitter_range = delay * jitter_factor
204
+ delay += random.uniform(-jitter_range, jitter_range)
205
+ delay = max(0, delay) # Ensure non-negative
206
+
207
+ return delay
208
+
209
+
210
+ def should_retry(error: Exception, policy: RetryPolicy) -> bool:
211
+ """Determine if an error should trigger a retry.
212
+
213
+ Args:
214
+ error: The exception that occurred
215
+ policy: The retry policy
216
+
217
+ Returns:
218
+ True if the error matches retry criteria
219
+ """
220
+ # Check if it's a known retryable HangarError
221
+ if is_retryable(error):
222
+ return True
223
+
224
+ # Check against policy's retry_on list
225
+ error_type = type(error).__name__
226
+ error_str = str(error).lower()
227
+
228
+ for pattern in policy.retry_on:
229
+ pattern_lower = pattern.lower()
230
+ if pattern_lower in error_type.lower():
231
+ return True
232
+ if pattern_lower in error_str:
233
+ return True
234
+
235
+ return False
236
+
237
+
238
+ async def retry_async(
239
+ operation: Callable[[], Any],
240
+ policy: RetryPolicy,
241
+ provider: str = "",
242
+ operation_name: str = "",
243
+ on_retry: Optional[Callable[[int, Exception, float], None]] = None,
244
+ ) -> RetryResult:
245
+ """Execute an async operation with retry logic.
246
+
247
+ Args:
248
+ operation: Async callable to execute
249
+ policy: Retry policy to use
250
+ provider: Provider name for logging
251
+ operation_name: Operation name for logging
252
+ on_retry: Optional callback(attempt, error, delay) called before each retry
253
+
254
+ Returns:
255
+ RetryResult with success status, result, and attempt history
256
+ """
257
+ start_time = time.time()
258
+ attempts: List[RetryAttempt] = []
259
+ last_error: Optional[Exception] = None
260
+
261
+ for attempt in range(policy.max_attempts):
262
+ try:
263
+ # Execute the operation
264
+ if asyncio.iscoroutinefunction(operation):
265
+ result = await operation()
266
+ else:
267
+ result = operation()
268
+
269
+ # Success!
270
+ total_time = time.time() - start_time
271
+
272
+ if attempts: # Had retries
273
+ logger.info(
274
+ "retry_succeeded",
275
+ provider=provider,
276
+ operation=operation_name,
277
+ attempt=attempt + 1,
278
+ total_attempts=len(attempts) + 1,
279
+ total_time_s=round(total_time, 3),
280
+ )
281
+
282
+ return RetryResult(
283
+ success=True,
284
+ result=result,
285
+ attempts=attempts,
286
+ total_time_s=total_time,
287
+ )
288
+
289
+ except Exception as e:
290
+ last_error = e
291
+ error_type = type(e).__name__
292
+
293
+ # Check if we should retry
294
+ if attempt < policy.max_attempts - 1 and should_retry(e, policy):
295
+ delay = calculate_backoff(
296
+ attempt=attempt,
297
+ strategy=policy.backoff,
298
+ initial_delay=policy.initial_delay,
299
+ max_delay=policy.max_delay,
300
+ jitter=policy.jitter,
301
+ jitter_factor=policy.jitter_factor,
302
+ )
303
+
304
+ # Record attempt
305
+ attempts.append(
306
+ RetryAttempt(
307
+ attempt_number=attempt + 1,
308
+ error_type=error_type,
309
+ error_message=str(e),
310
+ delay_before=delay,
311
+ )
312
+ )
313
+
314
+ # Log retry
315
+ logger.info(
316
+ "retry_attempt_failed",
317
+ provider=provider,
318
+ operation=operation_name,
319
+ attempt=attempt + 1,
320
+ max_attempts=policy.max_attempts,
321
+ error_type=error_type,
322
+ error_preview=str(e)[:100],
323
+ retry_in_s=round(delay, 2),
324
+ )
325
+
326
+ # Callback if provided
327
+ if on_retry:
328
+ try:
329
+ on_retry(attempt + 1, e, delay)
330
+ except Exception:
331
+ pass # Ignore callback errors
332
+
333
+ # Wait before retry
334
+ await asyncio.sleep(delay)
335
+
336
+ else:
337
+ # No more retries or non-retryable error
338
+ if attempts:
339
+ logger.warning(
340
+ "retry_exhausted",
341
+ provider=provider,
342
+ operation=operation_name,
343
+ total_attempts=len(attempts) + 1,
344
+ final_error_type=error_type,
345
+ final_error=str(e)[:200],
346
+ )
347
+ break
348
+
349
+ # All retries exhausted
350
+ return RetryResult(
351
+ success=False,
352
+ final_error=last_error,
353
+ attempts=attempts,
354
+ total_time_s=time.time() - start_time,
355
+ )
356
+
357
+
358
+ def retry_sync(
359
+ operation: Callable[[], T],
360
+ policy: RetryPolicy,
361
+ provider: str = "",
362
+ operation_name: str = "",
363
+ on_retry: Optional[Callable[[int, Exception, float], None]] = None,
364
+ ) -> RetryResult:
365
+ """Execute a sync operation with retry logic.
366
+
367
+ Args:
368
+ operation: Callable to execute
369
+ policy: Retry policy to use
370
+ provider: Provider name for logging
371
+ operation_name: Operation name for logging
372
+ on_retry: Optional callback(attempt, error, delay) called before each retry
373
+
374
+ Returns:
375
+ RetryResult with success status, result, and attempt history
376
+ """
377
+ start_time = time.time()
378
+ attempts: List[RetryAttempt] = []
379
+ last_error: Optional[Exception] = None
380
+
381
+ for attempt in range(policy.max_attempts):
382
+ try:
383
+ result = operation()
384
+
385
+ # Success!
386
+ total_time = time.time() - start_time
387
+
388
+ if attempts:
389
+ logger.info(
390
+ "retry_succeeded",
391
+ provider=provider,
392
+ operation=operation_name,
393
+ attempt=attempt + 1,
394
+ total_attempts=len(attempts) + 1,
395
+ total_time_s=round(total_time, 3),
396
+ )
397
+
398
+ return RetryResult(
399
+ success=True,
400
+ result=result,
401
+ attempts=attempts,
402
+ total_time_s=total_time,
403
+ )
404
+
405
+ except Exception as e:
406
+ last_error = e
407
+ error_type = type(e).__name__
408
+
409
+ if attempt < policy.max_attempts - 1 and should_retry(e, policy):
410
+ delay = calculate_backoff(
411
+ attempt=attempt,
412
+ strategy=policy.backoff,
413
+ initial_delay=policy.initial_delay,
414
+ max_delay=policy.max_delay,
415
+ jitter=policy.jitter,
416
+ jitter_factor=policy.jitter_factor,
417
+ )
418
+
419
+ attempts.append(
420
+ RetryAttempt(
421
+ attempt_number=attempt + 1,
422
+ error_type=error_type,
423
+ error_message=str(e),
424
+ delay_before=delay,
425
+ )
426
+ )
427
+
428
+ logger.info(
429
+ "retry_attempt_failed",
430
+ provider=provider,
431
+ operation=operation_name,
432
+ attempt=attempt + 1,
433
+ max_attempts=policy.max_attempts,
434
+ error_type=error_type,
435
+ error_preview=str(e)[:100],
436
+ retry_in_s=round(delay, 2),
437
+ )
438
+
439
+ if on_retry:
440
+ try:
441
+ on_retry(attempt + 1, e, delay)
442
+ except (TypeError, ValueError, RuntimeError) as callback_err:
443
+ logger.debug("retry_callback_error", error=str(callback_err))
444
+
445
+ time.sleep(delay)
446
+
447
+ else:
448
+ if attempts:
449
+ logger.warning(
450
+ "retry_exhausted",
451
+ provider=provider,
452
+ operation=operation_name,
453
+ total_attempts=len(attempts) + 1,
454
+ final_error_type=error_type,
455
+ final_error=str(e)[:200],
456
+ )
457
+ break
458
+
459
+ return RetryResult(
460
+ success=False,
461
+ final_error=last_error,
462
+ attempts=attempts,
463
+ total_time_s=time.time() - start_time,
464
+ )
465
+
466
+
467
+ # =============================================================================
468
+ # Retry Configuration Store
469
+ # =============================================================================
470
+
471
+
472
+ class RetryConfigStore:
473
+ """Stores retry configurations per provider.
474
+
475
+ Allows loading retry policies from config.yaml and
476
+ retrieving them for specific providers.
477
+ """
478
+
479
+ _default_policy: RetryPolicy
480
+ _provider_policies: Dict[str, RetryPolicy]
481
+
482
+ def __init__(self):
483
+ self._default_policy = RetryPolicy()
484
+ self._provider_policies = {}
485
+
486
+ def set_default(self, policy: RetryPolicy) -> None:
487
+ """Set the default retry policy."""
488
+ self._default_policy = policy
489
+
490
+ def set_provider_policy(self, provider_id: str, policy: RetryPolicy) -> None:
491
+ """Set retry policy for a specific provider."""
492
+ self._provider_policies[provider_id] = policy
493
+
494
+ def get_policy(self, provider_id: str) -> RetryPolicy:
495
+ """Get retry policy for a provider.
496
+
497
+ Returns provider-specific policy if configured,
498
+ otherwise returns default policy.
499
+ """
500
+ return self._provider_policies.get(provider_id, self._default_policy)
501
+
502
+ def load_from_config(self, config: Dict[str, Any]) -> None:
503
+ """Load retry configuration from config dictionary.
504
+
505
+ Expected format:
506
+ retry:
507
+ default_policy:
508
+ max_attempts: 3
509
+ backoff: exponential
510
+ ...
511
+ per_provider:
512
+ sqlite:
513
+ max_attempts: 5
514
+ fetch:
515
+ max_attempts: 2
516
+ """
517
+ retry_config = config.get("retry", {})
518
+
519
+ # Load default policy
520
+ default_config = retry_config.get("default_policy", {})
521
+ if default_config:
522
+ self._default_policy = RetryPolicy.from_dict(default_config)
523
+ logger.info(
524
+ "retry_default_policy_loaded",
525
+ max_attempts=self._default_policy.max_attempts,
526
+ backoff=self._default_policy.backoff.value,
527
+ )
528
+
529
+ # Load per-provider policies
530
+ per_provider = retry_config.get("per_provider", {})
531
+ for provider_id, provider_config in per_provider.items():
532
+ # Merge with default
533
+ merged = self._default_policy.to_dict()
534
+ merged.update(provider_config)
535
+ self._provider_policies[provider_id] = RetryPolicy.from_dict(merged)
536
+ logger.info(
537
+ "retry_provider_policy_loaded",
538
+ provider=provider_id,
539
+ max_attempts=self._provider_policies[provider_id].max_attempts,
540
+ )
541
+
542
+
543
+ # Global store instance
544
+ _retry_store = RetryConfigStore()
545
+
546
+
547
+ def get_retry_store() -> RetryConfigStore:
548
+ """Get the global retry configuration store."""
549
+ return _retry_store
550
+
551
+
552
+ def get_retry_policy(provider_id: str) -> RetryPolicy:
553
+ """Get retry policy for a provider."""
554
+ return _retry_store.get_policy(provider_id)
555
+
556
+
557
+ # =============================================================================
558
+ # Decorator
559
+ # =============================================================================
560
+
561
+
562
+ def with_retry(
563
+ policy: Optional[RetryPolicy] = None,
564
+ provider: str = "",
565
+ operation: str = "",
566
+ ):
567
+ """Decorator to add retry logic to a function.
568
+
569
+ Args:
570
+ policy: Retry policy (uses default if None)
571
+ provider: Provider name for logging
572
+ operation: Operation name for logging
573
+
574
+ Usage:
575
+ @with_retry(RetryPolicy(max_attempts=5))
576
+ async def risky_operation():
577
+ ...
578
+ """
579
+
580
+ def decorator(func: Callable) -> Callable:
581
+ import functools
582
+
583
+ @functools.wraps(func)
584
+ async def async_wrapper(*args, **kwargs):
585
+ p = policy or _retry_store._default_policy
586
+ result = await retry_async(
587
+ lambda: func(*args, **kwargs),
588
+ policy=p,
589
+ provider=provider,
590
+ operation_name=operation or func.__name__,
591
+ )
592
+ if result.success:
593
+ return result.result
594
+ raise result.final_error or Exception("Retry failed")
595
+
596
+ @functools.wraps(func)
597
+ def sync_wrapper(*args, **kwargs):
598
+ p = policy or _retry_store._default_policy
599
+ result = retry_sync(
600
+ lambda: func(*args, **kwargs),
601
+ policy=p,
602
+ provider=provider,
603
+ operation_name=operation or func.__name__,
604
+ )
605
+ if result.success:
606
+ return result.result
607
+ raise result.final_error or Exception("Retry failed")
608
+
609
+ if asyncio.iscoroutinefunction(func):
610
+ return async_wrapper
611
+ return sync_wrapper
612
+
613
+ return decorator