agent-tool-resilience 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_tool_resilience/__init__.py +32 -0
- agent_tool_resilience/circuit_breaker.py +246 -0
- agent_tool_resilience/fallback.py +188 -0
- agent_tool_resilience/rate_limit.py +261 -0
- agent_tool_resilience/resilient_tool.py +393 -0
- agent_tool_resilience/retry.py +215 -0
- agent_tool_resilience/tracer.py +319 -0
- agent_tool_resilience/validator.py +217 -0
- agent_tool_resilience-0.1.0.dist-info/METADATA +184 -0
- agent_tool_resilience-0.1.0.dist-info/RECORD +12 -0
- agent_tool_resilience-0.1.0.dist-info/WHEEL +5 -0
- agent_tool_resilience-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rate limit handling for API calls.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
from collections import deque
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Callable, Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RateLimitExceeded(Exception):
|
|
14
|
+
"""Raised when rate limit is exceeded and blocking is disabled."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, message: str, retry_after: Optional[float] = None):
|
|
17
|
+
super().__init__(message)
|
|
18
|
+
self.retry_after = retry_after
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class RateLimitHandler:
|
|
23
|
+
"""
|
|
24
|
+
Handles rate limiting for API calls.
|
|
25
|
+
|
|
26
|
+
Supports:
|
|
27
|
+
- Request counting with sliding window
|
|
28
|
+
- Automatic throttling
|
|
29
|
+
- Respecting Retry-After headers
|
|
30
|
+
- Multiple rate limit tiers
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
requests_per_minute: Maximum requests per minute (0 = unlimited)
|
|
34
|
+
requests_per_second: Maximum requests per second (0 = unlimited)
|
|
35
|
+
auto_throttle: Whether to automatically delay requests
|
|
36
|
+
respect_retry_after: Whether to respect Retry-After from exceptions
|
|
37
|
+
on_throttle: Callback when throttling occurs
|
|
38
|
+
"""
|
|
39
|
+
requests_per_minute: int = 0
|
|
40
|
+
requests_per_second: int = 0
|
|
41
|
+
auto_throttle: bool = True
|
|
42
|
+
respect_retry_after: bool = True
|
|
43
|
+
on_throttle: Optional[Callable[[float], None]] = None
|
|
44
|
+
|
|
45
|
+
_minute_window: deque = field(default_factory=lambda: deque(), init=False)
|
|
46
|
+
_second_window: deque = field(default_factory=lambda: deque(), init=False)
|
|
47
|
+
_lock: threading.Lock = field(default_factory=threading.Lock, init=False)
|
|
48
|
+
_retry_after_until: Optional[float] = field(default=None, init=False)
|
|
49
|
+
|
|
50
|
+
def _clean_windows(self, now: float) -> None:
|
|
51
|
+
"""Remove old entries from sliding windows."""
|
|
52
|
+
# Clean minute window
|
|
53
|
+
while self._minute_window and now - self._minute_window[0] > 60:
|
|
54
|
+
self._minute_window.popleft()
|
|
55
|
+
|
|
56
|
+
# Clean second window
|
|
57
|
+
while self._second_window and now - self._second_window[0] > 1:
|
|
58
|
+
self._second_window.popleft()
|
|
59
|
+
|
|
60
|
+
def _get_wait_time(self, now: float) -> float:
|
|
61
|
+
"""Calculate how long to wait before the next request."""
|
|
62
|
+
wait_time = 0.0
|
|
63
|
+
|
|
64
|
+
# Check retry-after
|
|
65
|
+
if self._retry_after_until and now < self._retry_after_until:
|
|
66
|
+
wait_time = max(wait_time, self._retry_after_until - now)
|
|
67
|
+
|
|
68
|
+
# Check per-minute limit
|
|
69
|
+
if self.requests_per_minute > 0:
|
|
70
|
+
if len(self._minute_window) >= self.requests_per_minute:
|
|
71
|
+
oldest = self._minute_window[0]
|
|
72
|
+
wait_time = max(wait_time, 60 - (now - oldest))
|
|
73
|
+
|
|
74
|
+
# Check per-second limit
|
|
75
|
+
if self.requests_per_second > 0:
|
|
76
|
+
if len(self._second_window) >= self.requests_per_second:
|
|
77
|
+
oldest = self._second_window[0]
|
|
78
|
+
wait_time = max(wait_time, 1 - (now - oldest))
|
|
79
|
+
|
|
80
|
+
return wait_time
|
|
81
|
+
|
|
82
|
+
def _record_request(self, now: float) -> None:
|
|
83
|
+
"""Record a request in the sliding windows."""
|
|
84
|
+
if self.requests_per_minute > 0:
|
|
85
|
+
self._minute_window.append(now)
|
|
86
|
+
if self.requests_per_second > 0:
|
|
87
|
+
self._second_window.append(now)
|
|
88
|
+
|
|
89
|
+
def set_retry_after(self, seconds: float) -> None:
|
|
90
|
+
"""Set a retry-after delay from an API response."""
|
|
91
|
+
with self._lock:
|
|
92
|
+
self._retry_after_until = time.time() + seconds
|
|
93
|
+
|
|
94
|
+
def check_rate_limit(self) -> float:
|
|
95
|
+
"""
|
|
96
|
+
Check if we're within rate limits.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Wait time in seconds (0 if no wait needed)
|
|
100
|
+
"""
|
|
101
|
+
with self._lock:
|
|
102
|
+
now = time.time()
|
|
103
|
+
self._clean_windows(now)
|
|
104
|
+
return self._get_wait_time(now)
|
|
105
|
+
|
|
106
|
+
def acquire(self) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Acquire permission to make a request.
|
|
109
|
+
|
|
110
|
+
Blocks if auto_throttle is True, otherwise raises RateLimitExceeded.
|
|
111
|
+
"""
|
|
112
|
+
with self._lock:
|
|
113
|
+
now = time.time()
|
|
114
|
+
self._clean_windows(now)
|
|
115
|
+
wait_time = self._get_wait_time(now)
|
|
116
|
+
|
|
117
|
+
if wait_time > 0:
|
|
118
|
+
if self.auto_throttle:
|
|
119
|
+
if self.on_throttle:
|
|
120
|
+
self.on_throttle(wait_time)
|
|
121
|
+
time.sleep(wait_time)
|
|
122
|
+
now = time.time()
|
|
123
|
+
else:
|
|
124
|
+
raise RateLimitExceeded(
|
|
125
|
+
f"Rate limit exceeded, retry after {wait_time:.2f}s",
|
|
126
|
+
retry_after=wait_time
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
self._record_request(now)
|
|
130
|
+
|
|
131
|
+
async def acquire_async(self) -> None:
|
|
132
|
+
"""
|
|
133
|
+
Async version of acquire.
|
|
134
|
+
|
|
135
|
+
Blocks if auto_throttle is True, otherwise raises RateLimitExceeded.
|
|
136
|
+
"""
|
|
137
|
+
with self._lock:
|
|
138
|
+
now = time.time()
|
|
139
|
+
self._clean_windows(now)
|
|
140
|
+
wait_time = self._get_wait_time(now)
|
|
141
|
+
|
|
142
|
+
if wait_time > 0:
|
|
143
|
+
if self.auto_throttle:
|
|
144
|
+
if self.on_throttle:
|
|
145
|
+
self.on_throttle(wait_time)
|
|
146
|
+
await asyncio.sleep(wait_time)
|
|
147
|
+
now = time.time()
|
|
148
|
+
else:
|
|
149
|
+
raise RateLimitExceeded(
|
|
150
|
+
f"Rate limit exceeded, retry after {wait_time:.2f}s",
|
|
151
|
+
retry_after=wait_time
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
with self._lock:
|
|
155
|
+
self._record_request(time.time())
|
|
156
|
+
|
|
157
|
+
def execute(
|
|
158
|
+
self,
|
|
159
|
+
func: Callable[..., Any],
|
|
160
|
+
*args: Any,
|
|
161
|
+
**kwargs: Any
|
|
162
|
+
) -> Any:
|
|
163
|
+
"""
|
|
164
|
+
Execute a function with rate limiting.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
func: Function to execute
|
|
168
|
+
*args: Positional arguments
|
|
169
|
+
**kwargs: Keyword arguments
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Function's return value
|
|
173
|
+
"""
|
|
174
|
+
self.acquire()
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
return func(*args, **kwargs)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
# Check for rate limit response
|
|
180
|
+
if self.respect_retry_after:
|
|
181
|
+
retry_after = self._extract_retry_after(e)
|
|
182
|
+
if retry_after:
|
|
183
|
+
self.set_retry_after(retry_after)
|
|
184
|
+
raise
|
|
185
|
+
|
|
186
|
+
async def execute_async(
|
|
187
|
+
self,
|
|
188
|
+
func: Callable[..., Any],
|
|
189
|
+
*args: Any,
|
|
190
|
+
**kwargs: Any
|
|
191
|
+
) -> Any:
|
|
192
|
+
"""
|
|
193
|
+
Execute an async function with rate limiting.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
func: Async function to execute
|
|
197
|
+
*args: Positional arguments
|
|
198
|
+
**kwargs: Keyword arguments
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Function's return value
|
|
202
|
+
"""
|
|
203
|
+
await self.acquire_async()
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
return await func(*args, **kwargs)
|
|
207
|
+
except Exception as e:
|
|
208
|
+
# Check for rate limit response
|
|
209
|
+
if self.respect_retry_after:
|
|
210
|
+
retry_after = self._extract_retry_after(e)
|
|
211
|
+
if retry_after:
|
|
212
|
+
self.set_retry_after(retry_after)
|
|
213
|
+
raise
|
|
214
|
+
|
|
215
|
+
def _extract_retry_after(self, exception: Exception) -> Optional[float]:
|
|
216
|
+
"""Try to extract retry-after from an exception."""
|
|
217
|
+
# Check for retry_after attribute
|
|
218
|
+
if hasattr(exception, "retry_after"):
|
|
219
|
+
return float(exception.retry_after)
|
|
220
|
+
|
|
221
|
+
# Check for response with Retry-After header
|
|
222
|
+
if hasattr(exception, "response"):
|
|
223
|
+
response = exception.response
|
|
224
|
+
if hasattr(response, "headers"):
|
|
225
|
+
retry_after = response.headers.get("Retry-After")
|
|
226
|
+
if retry_after:
|
|
227
|
+
try:
|
|
228
|
+
return float(retry_after)
|
|
229
|
+
except ValueError:
|
|
230
|
+
pass
|
|
231
|
+
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
def get_stats(self) -> dict:
|
|
235
|
+
"""Get rate limiter statistics."""
|
|
236
|
+
with self._lock:
|
|
237
|
+
now = time.time()
|
|
238
|
+
self._clean_windows(now)
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
"requests_per_minute_limit": self.requests_per_minute,
|
|
242
|
+
"requests_per_second_limit": self.requests_per_second,
|
|
243
|
+
"current_minute_count": len(self._minute_window),
|
|
244
|
+
"current_second_count": len(self._second_window),
|
|
245
|
+
"retry_after_active": (
|
|
246
|
+
self._retry_after_until is not None and
|
|
247
|
+
now < self._retry_after_until
|
|
248
|
+
),
|
|
249
|
+
"retry_after_remaining": (
|
|
250
|
+
max(0, self._retry_after_until - now)
|
|
251
|
+
if self._retry_after_until
|
|
252
|
+
else 0
|
|
253
|
+
),
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
def reset(self) -> None:
|
|
257
|
+
"""Reset all rate limit state."""
|
|
258
|
+
with self._lock:
|
|
259
|
+
self._minute_window.clear()
|
|
260
|
+
self._second_window.clear()
|
|
261
|
+
self._retry_after_until = None
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main decorator and class for resilient tool execution.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import functools
|
|
7
|
+
import inspect
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Callable, Optional, Sequence, Type, Union
|
|
10
|
+
|
|
11
|
+
from .retry import RetryPolicy, RetryError
|
|
12
|
+
from .circuit_breaker import CircuitBreaker, CircuitBreakerOpen
|
|
13
|
+
from .fallback import FallbackChain, FallbackError
|
|
14
|
+
from .validator import ResultValidator, ValidationError
|
|
15
|
+
from .tracer import ToolExecutionTracer
|
|
16
|
+
from .rate_limit import RateLimitHandler
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ResilientTool:
|
|
21
|
+
"""
|
|
22
|
+
Decorator/wrapper for making tool calls resilient.
|
|
23
|
+
|
|
24
|
+
Combines retry, circuit breaker, fallback, validation, and rate limiting
|
|
25
|
+
into a single easy-to-use interface.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
retry: Retry policy for transient failures
|
|
29
|
+
circuit_breaker: Circuit breaker for cascade prevention
|
|
30
|
+
fallback: Fallback function or chain for graceful degradation
|
|
31
|
+
validator: Result validator for output verification
|
|
32
|
+
rate_limit: Rate limit handler for API throttling
|
|
33
|
+
tracer: Execution tracer for observability
|
|
34
|
+
name: Name for this tool (used in tracing)
|
|
35
|
+
timeout: Timeout in seconds (None = no timeout)
|
|
36
|
+
on_error: Callback for errors (before retry/fallback)
|
|
37
|
+
"""
|
|
38
|
+
retry: Optional[RetryPolicy] = None
|
|
39
|
+
circuit_breaker: Optional[CircuitBreaker] = None
|
|
40
|
+
fallback: Optional[Union[Callable[..., Any], FallbackChain]] = None
|
|
41
|
+
validator: Optional[ResultValidator] = None
|
|
42
|
+
rate_limit: Optional[RateLimitHandler] = None
|
|
43
|
+
tracer: Optional[ToolExecutionTracer] = None
|
|
44
|
+
name: Optional[str] = None
|
|
45
|
+
timeout: Optional[float] = None
|
|
46
|
+
on_error: Optional[Callable[[Exception], None]] = None
|
|
47
|
+
|
|
48
|
+
def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]:
|
|
49
|
+
"""Use as decorator."""
|
|
50
|
+
tool_name = self.name or func.__name__
|
|
51
|
+
|
|
52
|
+
if asyncio.iscoroutinefunction(func):
|
|
53
|
+
@functools.wraps(func)
|
|
54
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
55
|
+
return await self._execute_async(func, tool_name, *args, **kwargs)
|
|
56
|
+
return async_wrapper
|
|
57
|
+
else:
|
|
58
|
+
@functools.wraps(func)
|
|
59
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
60
|
+
return self._execute(func, tool_name, *args, **kwargs)
|
|
61
|
+
return sync_wrapper
|
|
62
|
+
|
|
63
|
+
def _execute(
|
|
64
|
+
self,
|
|
65
|
+
func: Callable[..., Any],
|
|
66
|
+
tool_name: str,
|
|
67
|
+
*args: Any,
|
|
68
|
+
**kwargs: Any
|
|
69
|
+
) -> Any:
|
|
70
|
+
"""Execute the function with all resilience features."""
|
|
71
|
+
attempt = 0
|
|
72
|
+
last_exception: Optional[Exception] = None
|
|
73
|
+
exec_id: Optional[str] = None
|
|
74
|
+
|
|
75
|
+
# Check circuit breaker first
|
|
76
|
+
if self.circuit_breaker:
|
|
77
|
+
if self.circuit_breaker.is_open:
|
|
78
|
+
if self.tracer:
|
|
79
|
+
self.tracer.record_circuit_open(tool_name)
|
|
80
|
+
|
|
81
|
+
# Try fallback
|
|
82
|
+
if self.fallback:
|
|
83
|
+
return self._execute_fallback(
|
|
84
|
+
tool_name, args, kwargs,
|
|
85
|
+
CircuitBreakerOpen("Circuit is open")
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
raise CircuitBreakerOpen("Circuit breaker is open")
|
|
89
|
+
|
|
90
|
+
max_attempts = self.retry.max_attempts if self.retry else 1
|
|
91
|
+
|
|
92
|
+
while attempt < max_attempts:
|
|
93
|
+
attempt += 1
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
# Rate limiting
|
|
97
|
+
if self.rate_limit:
|
|
98
|
+
self.rate_limit.acquire()
|
|
99
|
+
|
|
100
|
+
# Start tracing
|
|
101
|
+
if self.tracer:
|
|
102
|
+
exec_id = self.tracer.start_execution(tool_name, attempt)
|
|
103
|
+
|
|
104
|
+
# Execute with timeout
|
|
105
|
+
if self.timeout:
|
|
106
|
+
import signal
|
|
107
|
+
|
|
108
|
+
def timeout_handler(signum, frame):
|
|
109
|
+
raise TimeoutError(f"Tool {tool_name} timed out after {self.timeout}s")
|
|
110
|
+
|
|
111
|
+
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
|
112
|
+
signal.alarm(int(self.timeout))
|
|
113
|
+
try:
|
|
114
|
+
result = func(*args, **kwargs)
|
|
115
|
+
finally:
|
|
116
|
+
signal.alarm(0)
|
|
117
|
+
signal.signal(signal.SIGALRM, old_handler)
|
|
118
|
+
else:
|
|
119
|
+
result = func(*args, **kwargs)
|
|
120
|
+
|
|
121
|
+
# Validate result
|
|
122
|
+
if self.validator:
|
|
123
|
+
result = self.validator.validate(result)
|
|
124
|
+
|
|
125
|
+
# Record success
|
|
126
|
+
if self.circuit_breaker:
|
|
127
|
+
self.circuit_breaker._record_success()
|
|
128
|
+
|
|
129
|
+
if self.tracer:
|
|
130
|
+
self.tracer.record_success(exec_id, tool_name, attempt, result)
|
|
131
|
+
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
last_exception = e
|
|
136
|
+
|
|
137
|
+
if self.on_error:
|
|
138
|
+
self.on_error(e)
|
|
139
|
+
|
|
140
|
+
# Record failure in circuit breaker
|
|
141
|
+
if self.circuit_breaker:
|
|
142
|
+
self.circuit_breaker._record_failure(e)
|
|
143
|
+
|
|
144
|
+
# Check if we should retry
|
|
145
|
+
should_retry = (
|
|
146
|
+
self.retry and
|
|
147
|
+
attempt < max_attempts and
|
|
148
|
+
self.retry.should_retry(e)
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Record in tracer
|
|
152
|
+
if self.tracer:
|
|
153
|
+
self.tracer.record_failure(
|
|
154
|
+
exec_id, tool_name, attempt, e,
|
|
155
|
+
will_retry=should_retry
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
if should_retry:
|
|
159
|
+
# Calculate delay
|
|
160
|
+
delay = self.retry.calculate_delay(attempt - 1)
|
|
161
|
+
|
|
162
|
+
if self.retry.on_retry:
|
|
163
|
+
self.retry.on_retry(attempt, e, delay)
|
|
164
|
+
|
|
165
|
+
import time
|
|
166
|
+
time.sleep(delay)
|
|
167
|
+
else:
|
|
168
|
+
break
|
|
169
|
+
|
|
170
|
+
# All retries exhausted, try fallback
|
|
171
|
+
if self.fallback:
|
|
172
|
+
return self._execute_fallback(tool_name, args, kwargs, last_exception)
|
|
173
|
+
|
|
174
|
+
# No fallback, raise the error
|
|
175
|
+
if self.retry and last_exception:
|
|
176
|
+
raise RetryError(
|
|
177
|
+
f"All {max_attempts} attempts failed for {tool_name}",
|
|
178
|
+
attempts=max_attempts,
|
|
179
|
+
last_exception=last_exception
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
raise last_exception
|
|
183
|
+
|
|
184
|
+
async def _execute_async(
|
|
185
|
+
self,
|
|
186
|
+
func: Callable[..., Any],
|
|
187
|
+
tool_name: str,
|
|
188
|
+
*args: Any,
|
|
189
|
+
**kwargs: Any
|
|
190
|
+
) -> Any:
|
|
191
|
+
"""Execute an async function with all resilience features."""
|
|
192
|
+
attempt = 0
|
|
193
|
+
last_exception: Optional[Exception] = None
|
|
194
|
+
exec_id: Optional[str] = None
|
|
195
|
+
|
|
196
|
+
# Check circuit breaker first
|
|
197
|
+
if self.circuit_breaker:
|
|
198
|
+
if self.circuit_breaker.is_open:
|
|
199
|
+
if self.tracer:
|
|
200
|
+
self.tracer.record_circuit_open(tool_name)
|
|
201
|
+
|
|
202
|
+
# Try fallback
|
|
203
|
+
if self.fallback:
|
|
204
|
+
return await self._execute_fallback_async(
|
|
205
|
+
tool_name, args, kwargs,
|
|
206
|
+
CircuitBreakerOpen("Circuit is open")
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
raise CircuitBreakerOpen("Circuit breaker is open")
|
|
210
|
+
|
|
211
|
+
max_attempts = self.retry.max_attempts if self.retry else 1
|
|
212
|
+
|
|
213
|
+
while attempt < max_attempts:
|
|
214
|
+
attempt += 1
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
# Rate limiting
|
|
218
|
+
if self.rate_limit:
|
|
219
|
+
await self.rate_limit.acquire_async()
|
|
220
|
+
|
|
221
|
+
# Start tracing
|
|
222
|
+
if self.tracer:
|
|
223
|
+
exec_id = self.tracer.start_execution(tool_name, attempt)
|
|
224
|
+
|
|
225
|
+
# Execute with timeout
|
|
226
|
+
if self.timeout:
|
|
227
|
+
result = await asyncio.wait_for(
|
|
228
|
+
func(*args, **kwargs),
|
|
229
|
+
timeout=self.timeout
|
|
230
|
+
)
|
|
231
|
+
else:
|
|
232
|
+
result = await func(*args, **kwargs)
|
|
233
|
+
|
|
234
|
+
# Validate result
|
|
235
|
+
if self.validator:
|
|
236
|
+
result = self.validator.validate(result)
|
|
237
|
+
|
|
238
|
+
# Record success
|
|
239
|
+
if self.circuit_breaker:
|
|
240
|
+
self.circuit_breaker._record_success()
|
|
241
|
+
|
|
242
|
+
if self.tracer:
|
|
243
|
+
self.tracer.record_success(exec_id, tool_name, attempt, result)
|
|
244
|
+
|
|
245
|
+
return result
|
|
246
|
+
|
|
247
|
+
except Exception as e:
|
|
248
|
+
last_exception = e
|
|
249
|
+
|
|
250
|
+
if self.on_error:
|
|
251
|
+
self.on_error(e)
|
|
252
|
+
|
|
253
|
+
# Record failure in circuit breaker
|
|
254
|
+
if self.circuit_breaker:
|
|
255
|
+
self.circuit_breaker._record_failure(e)
|
|
256
|
+
|
|
257
|
+
# Check if we should retry
|
|
258
|
+
should_retry = (
|
|
259
|
+
self.retry and
|
|
260
|
+
attempt < max_attempts and
|
|
261
|
+
self.retry.should_retry(e)
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Record in tracer
|
|
265
|
+
if self.tracer:
|
|
266
|
+
self.tracer.record_failure(
|
|
267
|
+
exec_id, tool_name, attempt, e,
|
|
268
|
+
will_retry=should_retry
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
if should_retry:
|
|
272
|
+
# Calculate delay
|
|
273
|
+
delay = self.retry.calculate_delay(attempt - 1)
|
|
274
|
+
|
|
275
|
+
if self.retry.on_retry:
|
|
276
|
+
self.retry.on_retry(attempt, e, delay)
|
|
277
|
+
|
|
278
|
+
await asyncio.sleep(delay)
|
|
279
|
+
else:
|
|
280
|
+
break
|
|
281
|
+
|
|
282
|
+
# All retries exhausted, try fallback
|
|
283
|
+
if self.fallback:
|
|
284
|
+
return await self._execute_fallback_async(
|
|
285
|
+
tool_name, args, kwargs, last_exception
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# No fallback, raise the error
|
|
289
|
+
if self.retry and last_exception:
|
|
290
|
+
raise RetryError(
|
|
291
|
+
f"All {max_attempts} attempts failed for {tool_name}",
|
|
292
|
+
attempts=max_attempts,
|
|
293
|
+
last_exception=last_exception
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
raise last_exception
|
|
297
|
+
|
|
298
|
+
def _execute_fallback(
|
|
299
|
+
self,
|
|
300
|
+
tool_name: str,
|
|
301
|
+
args: tuple,
|
|
302
|
+
kwargs: dict,
|
|
303
|
+
primary_exception: Exception
|
|
304
|
+
) -> Any:
|
|
305
|
+
"""Execute the fallback."""
|
|
306
|
+
if isinstance(self.fallback, FallbackChain):
|
|
307
|
+
result = self.fallback.execute(
|
|
308
|
+
*args,
|
|
309
|
+
primary_exception=primary_exception,
|
|
310
|
+
**kwargs
|
|
311
|
+
)
|
|
312
|
+
else:
|
|
313
|
+
result = self.fallback(*args, **kwargs)
|
|
314
|
+
|
|
315
|
+
if self.tracer:
|
|
316
|
+
fallback_name = (
|
|
317
|
+
self.fallback.__name__
|
|
318
|
+
if callable(self.fallback) and hasattr(self.fallback, "__name__")
|
|
319
|
+
else "fallback"
|
|
320
|
+
)
|
|
321
|
+
self.tracer.record_fallback(tool_name, fallback_name, result)
|
|
322
|
+
|
|
323
|
+
return result
|
|
324
|
+
|
|
325
|
+
async def _execute_fallback_async(
|
|
326
|
+
self,
|
|
327
|
+
tool_name: str,
|
|
328
|
+
args: tuple,
|
|
329
|
+
kwargs: dict,
|
|
330
|
+
primary_exception: Exception
|
|
331
|
+
) -> Any:
|
|
332
|
+
"""Execute the fallback asynchronously."""
|
|
333
|
+
if isinstance(self.fallback, FallbackChain):
|
|
334
|
+
if asyncio.iscoroutinefunction(self.fallback.fallbacks[0]):
|
|
335
|
+
result = await self.fallback.execute_async(
|
|
336
|
+
*args,
|
|
337
|
+
primary_exception=primary_exception,
|
|
338
|
+
**kwargs
|
|
339
|
+
)
|
|
340
|
+
else:
|
|
341
|
+
result = self.fallback.execute(
|
|
342
|
+
*args,
|
|
343
|
+
primary_exception=primary_exception,
|
|
344
|
+
**kwargs
|
|
345
|
+
)
|
|
346
|
+
else:
|
|
347
|
+
if asyncio.iscoroutinefunction(self.fallback):
|
|
348
|
+
result = await self.fallback(*args, **kwargs)
|
|
349
|
+
else:
|
|
350
|
+
result = self.fallback(*args, **kwargs)
|
|
351
|
+
|
|
352
|
+
if self.tracer:
|
|
353
|
+
fallback_name = (
|
|
354
|
+
self.fallback.__name__
|
|
355
|
+
if callable(self.fallback) and hasattr(self.fallback, "__name__")
|
|
356
|
+
else "fallback"
|
|
357
|
+
)
|
|
358
|
+
self.tracer.record_fallback(tool_name, fallback_name, result)
|
|
359
|
+
|
|
360
|
+
return result
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def resilient_tool(
|
|
364
|
+
retry: Optional[RetryPolicy] = None,
|
|
365
|
+
circuit_breaker: Optional[CircuitBreaker] = None,
|
|
366
|
+
fallback: Optional[Union[Callable[..., Any], FallbackChain]] = None,
|
|
367
|
+
validator: Optional[ResultValidator] = None,
|
|
368
|
+
rate_limit: Optional[RateLimitHandler] = None,
|
|
369
|
+
tracer: Optional[ToolExecutionTracer] = None,
|
|
370
|
+
name: Optional[str] = None,
|
|
371
|
+
timeout: Optional[float] = None,
|
|
372
|
+
on_error: Optional[Callable[[Exception], None]] = None
|
|
373
|
+
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
|
374
|
+
"""
|
|
375
|
+
Decorator factory for resilient tools.
|
|
376
|
+
|
|
377
|
+
Usage:
|
|
378
|
+
@resilient_tool(retry=RetryPolicy(max_attempts=3))
|
|
379
|
+
def my_tool():
|
|
380
|
+
...
|
|
381
|
+
"""
|
|
382
|
+
wrapper = ResilientTool(
|
|
383
|
+
retry=retry,
|
|
384
|
+
circuit_breaker=circuit_breaker,
|
|
385
|
+
fallback=fallback,
|
|
386
|
+
validator=validator,
|
|
387
|
+
rate_limit=rate_limit,
|
|
388
|
+
tracer=tracer,
|
|
389
|
+
name=name,
|
|
390
|
+
timeout=timeout,
|
|
391
|
+
on_error=on_error
|
|
392
|
+
)
|
|
393
|
+
return wrapper.__call__
|