mcp-hangar 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_hangar/__init__.py +139 -0
- mcp_hangar/application/__init__.py +1 -0
- mcp_hangar/application/commands/__init__.py +67 -0
- mcp_hangar/application/commands/auth_commands.py +118 -0
- mcp_hangar/application/commands/auth_handlers.py +296 -0
- mcp_hangar/application/commands/commands.py +59 -0
- mcp_hangar/application/commands/handlers.py +189 -0
- mcp_hangar/application/discovery/__init__.py +21 -0
- mcp_hangar/application/discovery/discovery_metrics.py +283 -0
- mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
- mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
- mcp_hangar/application/discovery/security_validator.py +414 -0
- mcp_hangar/application/event_handlers/__init__.py +50 -0
- mcp_hangar/application/event_handlers/alert_handler.py +191 -0
- mcp_hangar/application/event_handlers/audit_handler.py +203 -0
- mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
- mcp_hangar/application/event_handlers/logging_handler.py +69 -0
- mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
- mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
- mcp_hangar/application/event_handlers/security_handler.py +604 -0
- mcp_hangar/application/mcp/tooling.py +158 -0
- mcp_hangar/application/ports/__init__.py +9 -0
- mcp_hangar/application/ports/observability.py +237 -0
- mcp_hangar/application/queries/__init__.py +52 -0
- mcp_hangar/application/queries/auth_handlers.py +237 -0
- mcp_hangar/application/queries/auth_queries.py +118 -0
- mcp_hangar/application/queries/handlers.py +227 -0
- mcp_hangar/application/read_models/__init__.py +11 -0
- mcp_hangar/application/read_models/provider_views.py +139 -0
- mcp_hangar/application/sagas/__init__.py +11 -0
- mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
- mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
- mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
- mcp_hangar/application/services/__init__.py +9 -0
- mcp_hangar/application/services/provider_service.py +208 -0
- mcp_hangar/application/services/traced_provider_service.py +211 -0
- mcp_hangar/bootstrap/runtime.py +328 -0
- mcp_hangar/context.py +178 -0
- mcp_hangar/domain/__init__.py +117 -0
- mcp_hangar/domain/contracts/__init__.py +57 -0
- mcp_hangar/domain/contracts/authentication.py +225 -0
- mcp_hangar/domain/contracts/authorization.py +229 -0
- mcp_hangar/domain/contracts/event_store.py +178 -0
- mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
- mcp_hangar/domain/contracts/persistence.py +383 -0
- mcp_hangar/domain/contracts/provider_runtime.py +146 -0
- mcp_hangar/domain/discovery/__init__.py +20 -0
- mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
- mcp_hangar/domain/discovery/discovered_provider.py +185 -0
- mcp_hangar/domain/discovery/discovery_service.py +412 -0
- mcp_hangar/domain/discovery/discovery_source.py +192 -0
- mcp_hangar/domain/events.py +433 -0
- mcp_hangar/domain/exceptions.py +525 -0
- mcp_hangar/domain/model/__init__.py +70 -0
- mcp_hangar/domain/model/aggregate.py +58 -0
- mcp_hangar/domain/model/circuit_breaker.py +152 -0
- mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
- mcp_hangar/domain/model/event_sourced_provider.py +423 -0
- mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
- mcp_hangar/domain/model/health_tracker.py +183 -0
- mcp_hangar/domain/model/load_balancer.py +185 -0
- mcp_hangar/domain/model/provider.py +810 -0
- mcp_hangar/domain/model/provider_group.py +656 -0
- mcp_hangar/domain/model/tool_catalog.py +105 -0
- mcp_hangar/domain/policies/__init__.py +19 -0
- mcp_hangar/domain/policies/provider_health.py +187 -0
- mcp_hangar/domain/repository.py +249 -0
- mcp_hangar/domain/security/__init__.py +85 -0
- mcp_hangar/domain/security/input_validator.py +710 -0
- mcp_hangar/domain/security/rate_limiter.py +387 -0
- mcp_hangar/domain/security/roles.py +237 -0
- mcp_hangar/domain/security/sanitizer.py +387 -0
- mcp_hangar/domain/security/secrets.py +501 -0
- mcp_hangar/domain/services/__init__.py +20 -0
- mcp_hangar/domain/services/audit_service.py +376 -0
- mcp_hangar/domain/services/image_builder.py +328 -0
- mcp_hangar/domain/services/provider_launcher.py +1046 -0
- mcp_hangar/domain/value_objects.py +1138 -0
- mcp_hangar/errors.py +818 -0
- mcp_hangar/fastmcp_server.py +1105 -0
- mcp_hangar/gc.py +134 -0
- mcp_hangar/infrastructure/__init__.py +79 -0
- mcp_hangar/infrastructure/async_executor.py +133 -0
- mcp_hangar/infrastructure/auth/__init__.py +37 -0
- mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
- mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
- mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
- mcp_hangar/infrastructure/auth/middleware.py +340 -0
- mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
- mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
- mcp_hangar/infrastructure/auth/projections.py +366 -0
- mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
- mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
- mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
- mcp_hangar/infrastructure/command_bus.py +112 -0
- mcp_hangar/infrastructure/discovery/__init__.py +110 -0
- mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
- mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
- mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
- mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
- mcp_hangar/infrastructure/event_bus.py +260 -0
- mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
- mcp_hangar/infrastructure/event_store.py +396 -0
- mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
- mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
- mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
- mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
- mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
- mcp_hangar/infrastructure/metrics_publisher.py +36 -0
- mcp_hangar/infrastructure/observability/__init__.py +10 -0
- mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
- mcp_hangar/infrastructure/persistence/__init__.py +33 -0
- mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
- mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
- mcp_hangar/infrastructure/persistence/database.py +333 -0
- mcp_hangar/infrastructure/persistence/database_common.py +330 -0
- mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
- mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
- mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
- mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
- mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
- mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
- mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
- mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
- mcp_hangar/infrastructure/query_bus.py +153 -0
- mcp_hangar/infrastructure/saga_manager.py +401 -0
- mcp_hangar/logging_config.py +209 -0
- mcp_hangar/metrics.py +1007 -0
- mcp_hangar/models.py +31 -0
- mcp_hangar/observability/__init__.py +54 -0
- mcp_hangar/observability/health.py +487 -0
- mcp_hangar/observability/metrics.py +319 -0
- mcp_hangar/observability/tracing.py +433 -0
- mcp_hangar/progress.py +542 -0
- mcp_hangar/retry.py +613 -0
- mcp_hangar/server/__init__.py +120 -0
- mcp_hangar/server/__main__.py +6 -0
- mcp_hangar/server/auth_bootstrap.py +340 -0
- mcp_hangar/server/auth_cli.py +335 -0
- mcp_hangar/server/auth_config.py +305 -0
- mcp_hangar/server/bootstrap.py +735 -0
- mcp_hangar/server/cli.py +161 -0
- mcp_hangar/server/config.py +224 -0
- mcp_hangar/server/context.py +215 -0
- mcp_hangar/server/http_auth_middleware.py +165 -0
- mcp_hangar/server/lifecycle.py +467 -0
- mcp_hangar/server/state.py +117 -0
- mcp_hangar/server/tools/__init__.py +16 -0
- mcp_hangar/server/tools/discovery.py +186 -0
- mcp_hangar/server/tools/groups.py +75 -0
- mcp_hangar/server/tools/health.py +301 -0
- mcp_hangar/server/tools/provider.py +939 -0
- mcp_hangar/server/tools/registry.py +320 -0
- mcp_hangar/server/validation.py +113 -0
- mcp_hangar/stdio_client.py +229 -0
- mcp_hangar-0.2.0.dist-info/METADATA +347 -0
- mcp_hangar-0.2.0.dist-info/RECORD +160 -0
- mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
- mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
- mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
mcp_hangar/retry.py
ADDED
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
"""Automatic retry with exponential backoff.
|
|
2
|
+
|
|
3
|
+
This module provides retry functionality for transient failures,
|
|
4
|
+
including:
|
|
5
|
+
|
|
6
|
+
- Configurable retry policies
|
|
7
|
+
- Exponential, linear, and constant backoff strategies
|
|
8
|
+
- Per-provider retry configuration
|
|
9
|
+
- Circuit breaker integration
|
|
10
|
+
|
|
11
|
+
Usage example::
|
|
12
|
+
|
|
13
|
+
from mcp_hangar import RetryPolicy, BackoffStrategy, with_retry
|
|
14
|
+
|
|
15
|
+
policy = RetryPolicy(
|
|
16
|
+
max_attempts=3,
|
|
17
|
+
backoff=BackoffStrategy.EXPONENTIAL
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
@with_retry(policy)
|
|
21
|
+
def call_provider():
|
|
22
|
+
return risky_operation()
|
|
23
|
+
|
|
24
|
+
See docs/guides/UX_IMPROVEMENTS.md for more examples.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import asyncio
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
from enum import Enum
|
|
30
|
+
import time
|
|
31
|
+
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
|
32
|
+
|
|
33
|
+
from .errors import is_retryable
|
|
34
|
+
from .logging_config import get_logger
|
|
35
|
+
|
|
36
|
+
logger = get_logger(__name__)
|
|
37
|
+
|
|
38
|
+
T = TypeVar("T")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class BackoffStrategy(str, Enum):
|
|
42
|
+
"""Backoff strategy for retries."""
|
|
43
|
+
|
|
44
|
+
EXPONENTIAL = "exponential"
|
|
45
|
+
LINEAR = "linear"
|
|
46
|
+
CONSTANT = "constant"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class RetryPolicy:
|
|
51
|
+
"""Configuration for automatic retry behavior.
|
|
52
|
+
|
|
53
|
+
Attributes:
|
|
54
|
+
max_attempts: Maximum number of attempts (including initial)
|
|
55
|
+
backoff: Backoff strategy (exponential, linear, constant)
|
|
56
|
+
initial_delay: Initial delay in seconds before first retry
|
|
57
|
+
max_delay: Maximum delay cap in seconds
|
|
58
|
+
retry_on: List of error types to retry on
|
|
59
|
+
jitter: Whether to add random jitter to delays
|
|
60
|
+
jitter_factor: Jitter factor (0.0 to 1.0)
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
max_attempts: int = 3
|
|
64
|
+
backoff: BackoffStrategy = BackoffStrategy.EXPONENTIAL
|
|
65
|
+
initial_delay: float = 1.0
|
|
66
|
+
max_delay: float = 30.0
|
|
67
|
+
retry_on: List[str] = field(
|
|
68
|
+
default_factory=lambda: [
|
|
69
|
+
"MalformedJSON",
|
|
70
|
+
"JSONDecodeError",
|
|
71
|
+
"Timeout",
|
|
72
|
+
"TimeoutError",
|
|
73
|
+
"ConnectionError",
|
|
74
|
+
"ProviderNotResponding",
|
|
75
|
+
"TransientError",
|
|
76
|
+
"ProviderProtocolError",
|
|
77
|
+
"NetworkError",
|
|
78
|
+
]
|
|
79
|
+
)
|
|
80
|
+
jitter: bool = True
|
|
81
|
+
jitter_factor: float = 0.25
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def from_dict(cls, data: Dict[str, Any]) -> "RetryPolicy":
|
|
85
|
+
"""Create RetryPolicy from dictionary (e.g., from config.yaml)."""
|
|
86
|
+
backoff = data.get("backoff", "exponential")
|
|
87
|
+
if isinstance(backoff, str):
|
|
88
|
+
backoff = BackoffStrategy(backoff)
|
|
89
|
+
|
|
90
|
+
default_retry_on = [
|
|
91
|
+
"MalformedJSON",
|
|
92
|
+
"JSONDecodeError",
|
|
93
|
+
"Timeout",
|
|
94
|
+
"TimeoutError",
|
|
95
|
+
"ConnectionError",
|
|
96
|
+
"ProviderNotResponding",
|
|
97
|
+
"TransientError",
|
|
98
|
+
"ProviderProtocolError",
|
|
99
|
+
"NetworkError",
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
return cls(
|
|
103
|
+
max_attempts=data.get("max_attempts", 3),
|
|
104
|
+
backoff=backoff,
|
|
105
|
+
initial_delay=data.get("initial_delay", 1.0),
|
|
106
|
+
max_delay=data.get("max_delay", 30.0),
|
|
107
|
+
retry_on=data.get("retry_on", default_retry_on),
|
|
108
|
+
jitter=data.get("jitter", True),
|
|
109
|
+
jitter_factor=data.get("jitter_factor", 0.25),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
113
|
+
"""Convert to dictionary for serialization."""
|
|
114
|
+
return {
|
|
115
|
+
"max_attempts": self.max_attempts,
|
|
116
|
+
"backoff": self.backoff.value if isinstance(self.backoff, BackoffStrategy) else self.backoff,
|
|
117
|
+
"initial_delay": self.initial_delay,
|
|
118
|
+
"max_delay": self.max_delay,
|
|
119
|
+
"retry_on": self.retry_on,
|
|
120
|
+
"jitter": self.jitter,
|
|
121
|
+
"jitter_factor": self.jitter_factor,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@dataclass
|
|
126
|
+
class RetryAttempt:
|
|
127
|
+
"""Record of a single retry attempt."""
|
|
128
|
+
|
|
129
|
+
attempt_number: int
|
|
130
|
+
error_type: str
|
|
131
|
+
error_message: str
|
|
132
|
+
delay_before: float
|
|
133
|
+
timestamp: float = field(default_factory=time.time)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class RetryResult:
|
|
138
|
+
"""Result of a retry operation."""
|
|
139
|
+
|
|
140
|
+
success: bool
|
|
141
|
+
result: Any = None
|
|
142
|
+
final_error: Optional[Exception] = None
|
|
143
|
+
attempts: List[RetryAttempt] = field(default_factory=list)
|
|
144
|
+
total_time_s: float = 0.0
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def attempt_count(self) -> int:
|
|
148
|
+
"""Total number of attempts made."""
|
|
149
|
+
return len(self.attempts) + (1 if self.success else 0)
|
|
150
|
+
|
|
151
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
152
|
+
"""Convert to dictionary for logging/reporting."""
|
|
153
|
+
return {
|
|
154
|
+
"success": self.success,
|
|
155
|
+
"attempt_count": self.attempt_count,
|
|
156
|
+
"total_time_s": self.total_time_s,
|
|
157
|
+
"attempts": [
|
|
158
|
+
{
|
|
159
|
+
"attempt": a.attempt_number,
|
|
160
|
+
"error_type": a.error_type,
|
|
161
|
+
"error_message": a.error_message[:100],
|
|
162
|
+
"delay_before": a.delay_before,
|
|
163
|
+
}
|
|
164
|
+
for a in self.attempts
|
|
165
|
+
],
|
|
166
|
+
"final_error": str(self.final_error) if self.final_error else None,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def calculate_backoff(
|
|
171
|
+
attempt: int,
|
|
172
|
+
strategy: BackoffStrategy,
|
|
173
|
+
initial_delay: float,
|
|
174
|
+
max_delay: float,
|
|
175
|
+
jitter: bool = True,
|
|
176
|
+
jitter_factor: float = 0.25,
|
|
177
|
+
) -> float:
|
|
178
|
+
"""Calculate delay before next retry.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
attempt: Current attempt number (0-indexed)
|
|
182
|
+
strategy: Backoff strategy
|
|
183
|
+
initial_delay: Base delay in seconds
|
|
184
|
+
max_delay: Maximum delay cap
|
|
185
|
+
jitter: Whether to add random jitter
|
|
186
|
+
jitter_factor: Jitter range (e.g., 0.25 = ±25%)
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Delay in seconds
|
|
190
|
+
"""
|
|
191
|
+
if strategy == BackoffStrategy.EXPONENTIAL:
|
|
192
|
+
# min(initial_delay * 2^attempt, max_delay)
|
|
193
|
+
delay = min(initial_delay * (2**attempt), max_delay)
|
|
194
|
+
elif strategy == BackoffStrategy.LINEAR:
|
|
195
|
+
# initial_delay * (attempt + 1), capped at max_delay
|
|
196
|
+
delay = min(initial_delay * (attempt + 1), max_delay)
|
|
197
|
+
else: # CONSTANT
|
|
198
|
+
delay = initial_delay
|
|
199
|
+
|
|
200
|
+
if jitter and jitter_factor > 0:
|
|
201
|
+
import random
|
|
202
|
+
|
|
203
|
+
jitter_range = delay * jitter_factor
|
|
204
|
+
delay += random.uniform(-jitter_range, jitter_range)
|
|
205
|
+
delay = max(0, delay) # Ensure non-negative
|
|
206
|
+
|
|
207
|
+
return delay
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def should_retry(error: Exception, policy: RetryPolicy) -> bool:
|
|
211
|
+
"""Determine if an error should trigger a retry.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
error: The exception that occurred
|
|
215
|
+
policy: The retry policy
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
True if the error matches retry criteria
|
|
219
|
+
"""
|
|
220
|
+
# Check if it's a known retryable HangarError
|
|
221
|
+
if is_retryable(error):
|
|
222
|
+
return True
|
|
223
|
+
|
|
224
|
+
# Check against policy's retry_on list
|
|
225
|
+
error_type = type(error).__name__
|
|
226
|
+
error_str = str(error).lower()
|
|
227
|
+
|
|
228
|
+
for pattern in policy.retry_on:
|
|
229
|
+
pattern_lower = pattern.lower()
|
|
230
|
+
if pattern_lower in error_type.lower():
|
|
231
|
+
return True
|
|
232
|
+
if pattern_lower in error_str:
|
|
233
|
+
return True
|
|
234
|
+
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
async def retry_async(
|
|
239
|
+
operation: Callable[[], Any],
|
|
240
|
+
policy: RetryPolicy,
|
|
241
|
+
provider: str = "",
|
|
242
|
+
operation_name: str = "",
|
|
243
|
+
on_retry: Optional[Callable[[int, Exception, float], None]] = None,
|
|
244
|
+
) -> RetryResult:
|
|
245
|
+
"""Execute an async operation with retry logic.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
operation: Async callable to execute
|
|
249
|
+
policy: Retry policy to use
|
|
250
|
+
provider: Provider name for logging
|
|
251
|
+
operation_name: Operation name for logging
|
|
252
|
+
on_retry: Optional callback(attempt, error, delay) called before each retry
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
RetryResult with success status, result, and attempt history
|
|
256
|
+
"""
|
|
257
|
+
start_time = time.time()
|
|
258
|
+
attempts: List[RetryAttempt] = []
|
|
259
|
+
last_error: Optional[Exception] = None
|
|
260
|
+
|
|
261
|
+
for attempt in range(policy.max_attempts):
|
|
262
|
+
try:
|
|
263
|
+
# Execute the operation
|
|
264
|
+
if asyncio.iscoroutinefunction(operation):
|
|
265
|
+
result = await operation()
|
|
266
|
+
else:
|
|
267
|
+
result = operation()
|
|
268
|
+
|
|
269
|
+
# Success!
|
|
270
|
+
total_time = time.time() - start_time
|
|
271
|
+
|
|
272
|
+
if attempts: # Had retries
|
|
273
|
+
logger.info(
|
|
274
|
+
"retry_succeeded",
|
|
275
|
+
provider=provider,
|
|
276
|
+
operation=operation_name,
|
|
277
|
+
attempt=attempt + 1,
|
|
278
|
+
total_attempts=len(attempts) + 1,
|
|
279
|
+
total_time_s=round(total_time, 3),
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return RetryResult(
|
|
283
|
+
success=True,
|
|
284
|
+
result=result,
|
|
285
|
+
attempts=attempts,
|
|
286
|
+
total_time_s=total_time,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
except Exception as e:
|
|
290
|
+
last_error = e
|
|
291
|
+
error_type = type(e).__name__
|
|
292
|
+
|
|
293
|
+
# Check if we should retry
|
|
294
|
+
if attempt < policy.max_attempts - 1 and should_retry(e, policy):
|
|
295
|
+
delay = calculate_backoff(
|
|
296
|
+
attempt=attempt,
|
|
297
|
+
strategy=policy.backoff,
|
|
298
|
+
initial_delay=policy.initial_delay,
|
|
299
|
+
max_delay=policy.max_delay,
|
|
300
|
+
jitter=policy.jitter,
|
|
301
|
+
jitter_factor=policy.jitter_factor,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Record attempt
|
|
305
|
+
attempts.append(
|
|
306
|
+
RetryAttempt(
|
|
307
|
+
attempt_number=attempt + 1,
|
|
308
|
+
error_type=error_type,
|
|
309
|
+
error_message=str(e),
|
|
310
|
+
delay_before=delay,
|
|
311
|
+
)
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Log retry
|
|
315
|
+
logger.info(
|
|
316
|
+
"retry_attempt_failed",
|
|
317
|
+
provider=provider,
|
|
318
|
+
operation=operation_name,
|
|
319
|
+
attempt=attempt + 1,
|
|
320
|
+
max_attempts=policy.max_attempts,
|
|
321
|
+
error_type=error_type,
|
|
322
|
+
error_preview=str(e)[:100],
|
|
323
|
+
retry_in_s=round(delay, 2),
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# Callback if provided
|
|
327
|
+
if on_retry:
|
|
328
|
+
try:
|
|
329
|
+
on_retry(attempt + 1, e, delay)
|
|
330
|
+
except Exception:
|
|
331
|
+
pass # Ignore callback errors
|
|
332
|
+
|
|
333
|
+
# Wait before retry
|
|
334
|
+
await asyncio.sleep(delay)
|
|
335
|
+
|
|
336
|
+
else:
|
|
337
|
+
# No more retries or non-retryable error
|
|
338
|
+
if attempts:
|
|
339
|
+
logger.warning(
|
|
340
|
+
"retry_exhausted",
|
|
341
|
+
provider=provider,
|
|
342
|
+
operation=operation_name,
|
|
343
|
+
total_attempts=len(attempts) + 1,
|
|
344
|
+
final_error_type=error_type,
|
|
345
|
+
final_error=str(e)[:200],
|
|
346
|
+
)
|
|
347
|
+
break
|
|
348
|
+
|
|
349
|
+
# All retries exhausted
|
|
350
|
+
return RetryResult(
|
|
351
|
+
success=False,
|
|
352
|
+
final_error=last_error,
|
|
353
|
+
attempts=attempts,
|
|
354
|
+
total_time_s=time.time() - start_time,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def retry_sync(
|
|
359
|
+
operation: Callable[[], T],
|
|
360
|
+
policy: RetryPolicy,
|
|
361
|
+
provider: str = "",
|
|
362
|
+
operation_name: str = "",
|
|
363
|
+
on_retry: Optional[Callable[[int, Exception, float], None]] = None,
|
|
364
|
+
) -> RetryResult:
|
|
365
|
+
"""Execute a sync operation with retry logic.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
operation: Callable to execute
|
|
369
|
+
policy: Retry policy to use
|
|
370
|
+
provider: Provider name for logging
|
|
371
|
+
operation_name: Operation name for logging
|
|
372
|
+
on_retry: Optional callback(attempt, error, delay) called before each retry
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
RetryResult with success status, result, and attempt history
|
|
376
|
+
"""
|
|
377
|
+
start_time = time.time()
|
|
378
|
+
attempts: List[RetryAttempt] = []
|
|
379
|
+
last_error: Optional[Exception] = None
|
|
380
|
+
|
|
381
|
+
for attempt in range(policy.max_attempts):
|
|
382
|
+
try:
|
|
383
|
+
result = operation()
|
|
384
|
+
|
|
385
|
+
# Success!
|
|
386
|
+
total_time = time.time() - start_time
|
|
387
|
+
|
|
388
|
+
if attempts:
|
|
389
|
+
logger.info(
|
|
390
|
+
"retry_succeeded",
|
|
391
|
+
provider=provider,
|
|
392
|
+
operation=operation_name,
|
|
393
|
+
attempt=attempt + 1,
|
|
394
|
+
total_attempts=len(attempts) + 1,
|
|
395
|
+
total_time_s=round(total_time, 3),
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
return RetryResult(
|
|
399
|
+
success=True,
|
|
400
|
+
result=result,
|
|
401
|
+
attempts=attempts,
|
|
402
|
+
total_time_s=total_time,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
except Exception as e:
|
|
406
|
+
last_error = e
|
|
407
|
+
error_type = type(e).__name__
|
|
408
|
+
|
|
409
|
+
if attempt < policy.max_attempts - 1 and should_retry(e, policy):
|
|
410
|
+
delay = calculate_backoff(
|
|
411
|
+
attempt=attempt,
|
|
412
|
+
strategy=policy.backoff,
|
|
413
|
+
initial_delay=policy.initial_delay,
|
|
414
|
+
max_delay=policy.max_delay,
|
|
415
|
+
jitter=policy.jitter,
|
|
416
|
+
jitter_factor=policy.jitter_factor,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
attempts.append(
|
|
420
|
+
RetryAttempt(
|
|
421
|
+
attempt_number=attempt + 1,
|
|
422
|
+
error_type=error_type,
|
|
423
|
+
error_message=str(e),
|
|
424
|
+
delay_before=delay,
|
|
425
|
+
)
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
logger.info(
|
|
429
|
+
"retry_attempt_failed",
|
|
430
|
+
provider=provider,
|
|
431
|
+
operation=operation_name,
|
|
432
|
+
attempt=attempt + 1,
|
|
433
|
+
max_attempts=policy.max_attempts,
|
|
434
|
+
error_type=error_type,
|
|
435
|
+
error_preview=str(e)[:100],
|
|
436
|
+
retry_in_s=round(delay, 2),
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
if on_retry:
|
|
440
|
+
try:
|
|
441
|
+
on_retry(attempt + 1, e, delay)
|
|
442
|
+
except (TypeError, ValueError, RuntimeError) as callback_err:
|
|
443
|
+
logger.debug("retry_callback_error", error=str(callback_err))
|
|
444
|
+
|
|
445
|
+
time.sleep(delay)
|
|
446
|
+
|
|
447
|
+
else:
|
|
448
|
+
if attempts:
|
|
449
|
+
logger.warning(
|
|
450
|
+
"retry_exhausted",
|
|
451
|
+
provider=provider,
|
|
452
|
+
operation=operation_name,
|
|
453
|
+
total_attempts=len(attempts) + 1,
|
|
454
|
+
final_error_type=error_type,
|
|
455
|
+
final_error=str(e)[:200],
|
|
456
|
+
)
|
|
457
|
+
break
|
|
458
|
+
|
|
459
|
+
return RetryResult(
|
|
460
|
+
success=False,
|
|
461
|
+
final_error=last_error,
|
|
462
|
+
attempts=attempts,
|
|
463
|
+
total_time_s=time.time() - start_time,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
# =============================================================================
|
|
468
|
+
# Retry Configuration Store
|
|
469
|
+
# =============================================================================
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
class RetryConfigStore:
|
|
473
|
+
"""Stores retry configurations per provider.
|
|
474
|
+
|
|
475
|
+
Allows loading retry policies from config.yaml and
|
|
476
|
+
retrieving them for specific providers.
|
|
477
|
+
"""
|
|
478
|
+
|
|
479
|
+
_default_policy: RetryPolicy
|
|
480
|
+
_provider_policies: Dict[str, RetryPolicy]
|
|
481
|
+
|
|
482
|
+
def __init__(self):
|
|
483
|
+
self._default_policy = RetryPolicy()
|
|
484
|
+
self._provider_policies = {}
|
|
485
|
+
|
|
486
|
+
def set_default(self, policy: RetryPolicy) -> None:
|
|
487
|
+
"""Set the default retry policy."""
|
|
488
|
+
self._default_policy = policy
|
|
489
|
+
|
|
490
|
+
def set_provider_policy(self, provider_id: str, policy: RetryPolicy) -> None:
|
|
491
|
+
"""Set retry policy for a specific provider."""
|
|
492
|
+
self._provider_policies[provider_id] = policy
|
|
493
|
+
|
|
494
|
+
def get_policy(self, provider_id: str) -> RetryPolicy:
|
|
495
|
+
"""Get retry policy for a provider.
|
|
496
|
+
|
|
497
|
+
Returns provider-specific policy if configured,
|
|
498
|
+
otherwise returns default policy.
|
|
499
|
+
"""
|
|
500
|
+
return self._provider_policies.get(provider_id, self._default_policy)
|
|
501
|
+
|
|
502
|
+
def load_from_config(self, config: Dict[str, Any]) -> None:
|
|
503
|
+
"""Load retry configuration from config dictionary.
|
|
504
|
+
|
|
505
|
+
Expected format:
|
|
506
|
+
retry:
|
|
507
|
+
default_policy:
|
|
508
|
+
max_attempts: 3
|
|
509
|
+
backoff: exponential
|
|
510
|
+
...
|
|
511
|
+
per_provider:
|
|
512
|
+
sqlite:
|
|
513
|
+
max_attempts: 5
|
|
514
|
+
fetch:
|
|
515
|
+
max_attempts: 2
|
|
516
|
+
"""
|
|
517
|
+
retry_config = config.get("retry", {})
|
|
518
|
+
|
|
519
|
+
# Load default policy
|
|
520
|
+
default_config = retry_config.get("default_policy", {})
|
|
521
|
+
if default_config:
|
|
522
|
+
self._default_policy = RetryPolicy.from_dict(default_config)
|
|
523
|
+
logger.info(
|
|
524
|
+
"retry_default_policy_loaded",
|
|
525
|
+
max_attempts=self._default_policy.max_attempts,
|
|
526
|
+
backoff=self._default_policy.backoff.value,
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
# Load per-provider policies
|
|
530
|
+
per_provider = retry_config.get("per_provider", {})
|
|
531
|
+
for provider_id, provider_config in per_provider.items():
|
|
532
|
+
# Merge with default
|
|
533
|
+
merged = self._default_policy.to_dict()
|
|
534
|
+
merged.update(provider_config)
|
|
535
|
+
self._provider_policies[provider_id] = RetryPolicy.from_dict(merged)
|
|
536
|
+
logger.info(
|
|
537
|
+
"retry_provider_policy_loaded",
|
|
538
|
+
provider=provider_id,
|
|
539
|
+
max_attempts=self._provider_policies[provider_id].max_attempts,
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
# Global store instance
|
|
544
|
+
_retry_store = RetryConfigStore()
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def get_retry_store() -> RetryConfigStore:
|
|
548
|
+
"""Get the global retry configuration store."""
|
|
549
|
+
return _retry_store
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def get_retry_policy(provider_id: str) -> RetryPolicy:
|
|
553
|
+
"""Get retry policy for a provider."""
|
|
554
|
+
return _retry_store.get_policy(provider_id)
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
# =============================================================================
|
|
558
|
+
# Decorator
|
|
559
|
+
# =============================================================================
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def with_retry(
|
|
563
|
+
policy: Optional[RetryPolicy] = None,
|
|
564
|
+
provider: str = "",
|
|
565
|
+
operation: str = "",
|
|
566
|
+
):
|
|
567
|
+
"""Decorator to add retry logic to a function.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
policy: Retry policy (uses default if None)
|
|
571
|
+
provider: Provider name for logging
|
|
572
|
+
operation: Operation name for logging
|
|
573
|
+
|
|
574
|
+
Usage:
|
|
575
|
+
@with_retry(RetryPolicy(max_attempts=5))
|
|
576
|
+
async def risky_operation():
|
|
577
|
+
...
|
|
578
|
+
"""
|
|
579
|
+
|
|
580
|
+
def decorator(func: Callable) -> Callable:
|
|
581
|
+
import functools
|
|
582
|
+
|
|
583
|
+
@functools.wraps(func)
|
|
584
|
+
async def async_wrapper(*args, **kwargs):
|
|
585
|
+
p = policy or _retry_store._default_policy
|
|
586
|
+
result = await retry_async(
|
|
587
|
+
lambda: func(*args, **kwargs),
|
|
588
|
+
policy=p,
|
|
589
|
+
provider=provider,
|
|
590
|
+
operation_name=operation or func.__name__,
|
|
591
|
+
)
|
|
592
|
+
if result.success:
|
|
593
|
+
return result.result
|
|
594
|
+
raise result.final_error or Exception("Retry failed")
|
|
595
|
+
|
|
596
|
+
@functools.wraps(func)
|
|
597
|
+
def sync_wrapper(*args, **kwargs):
|
|
598
|
+
p = policy or _retry_store._default_policy
|
|
599
|
+
result = retry_sync(
|
|
600
|
+
lambda: func(*args, **kwargs),
|
|
601
|
+
policy=p,
|
|
602
|
+
provider=provider,
|
|
603
|
+
operation_name=operation or func.__name__,
|
|
604
|
+
)
|
|
605
|
+
if result.success:
|
|
606
|
+
return result.result
|
|
607
|
+
raise result.final_error or Exception("Retry failed")
|
|
608
|
+
|
|
609
|
+
if asyncio.iscoroutinefunction(func):
|
|
610
|
+
return async_wrapper
|
|
611
|
+
return sync_wrapper
|
|
612
|
+
|
|
613
|
+
return decorator
|