agent-tool-resilience 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,261 @@
1
+ """
2
+ Rate limit handling for API calls.
3
+ """
4
+
5
+ import asyncio
6
+ import threading
7
+ import time
8
+ from collections import deque
9
+ from dataclasses import dataclass, field
10
+ from typing import Any, Callable, Optional
11
+
12
+
13
+ class RateLimitExceeded(Exception):
14
+ """Raised when rate limit is exceeded and blocking is disabled."""
15
+
16
+ def __init__(self, message: str, retry_after: Optional[float] = None):
17
+ super().__init__(message)
18
+ self.retry_after = retry_after
19
+
20
+
21
+ @dataclass
22
+ class RateLimitHandler:
23
+ """
24
+ Handles rate limiting for API calls.
25
+
26
+ Supports:
27
+ - Request counting with sliding window
28
+ - Automatic throttling
29
+ - Respecting Retry-After headers
30
+ - Multiple rate limit tiers
31
+
32
+ Attributes:
33
+ requests_per_minute: Maximum requests per minute (0 = unlimited)
34
+ requests_per_second: Maximum requests per second (0 = unlimited)
35
+ auto_throttle: Whether to automatically delay requests
36
+ respect_retry_after: Whether to respect Retry-After from exceptions
37
+ on_throttle: Callback when throttling occurs
38
+ """
39
+ requests_per_minute: int = 0
40
+ requests_per_second: int = 0
41
+ auto_throttle: bool = True
42
+ respect_retry_after: bool = True
43
+ on_throttle: Optional[Callable[[float], None]] = None
44
+
45
+ _minute_window: deque = field(default_factory=lambda: deque(), init=False)
46
+ _second_window: deque = field(default_factory=lambda: deque(), init=False)
47
+ _lock: threading.Lock = field(default_factory=threading.Lock, init=False)
48
+ _retry_after_until: Optional[float] = field(default=None, init=False)
49
+
50
+ def _clean_windows(self, now: float) -> None:
51
+ """Remove old entries from sliding windows."""
52
+ # Clean minute window
53
+ while self._minute_window and now - self._minute_window[0] > 60:
54
+ self._minute_window.popleft()
55
+
56
+ # Clean second window
57
+ while self._second_window and now - self._second_window[0] > 1:
58
+ self._second_window.popleft()
59
+
60
+ def _get_wait_time(self, now: float) -> float:
61
+ """Calculate how long to wait before the next request."""
62
+ wait_time = 0.0
63
+
64
+ # Check retry-after
65
+ if self._retry_after_until and now < self._retry_after_until:
66
+ wait_time = max(wait_time, self._retry_after_until - now)
67
+
68
+ # Check per-minute limit
69
+ if self.requests_per_minute > 0:
70
+ if len(self._minute_window) >= self.requests_per_minute:
71
+ oldest = self._minute_window[0]
72
+ wait_time = max(wait_time, 60 - (now - oldest))
73
+
74
+ # Check per-second limit
75
+ if self.requests_per_second > 0:
76
+ if len(self._second_window) >= self.requests_per_second:
77
+ oldest = self._second_window[0]
78
+ wait_time = max(wait_time, 1 - (now - oldest))
79
+
80
+ return wait_time
81
+
82
+ def _record_request(self, now: float) -> None:
83
+ """Record a request in the sliding windows."""
84
+ if self.requests_per_minute > 0:
85
+ self._minute_window.append(now)
86
+ if self.requests_per_second > 0:
87
+ self._second_window.append(now)
88
+
89
+ def set_retry_after(self, seconds: float) -> None:
90
+ """Set a retry-after delay from an API response."""
91
+ with self._lock:
92
+ self._retry_after_until = time.time() + seconds
93
+
94
+ def check_rate_limit(self) -> float:
95
+ """
96
+ Check if we're within rate limits.
97
+
98
+ Returns:
99
+ Wait time in seconds (0 if no wait needed)
100
+ """
101
+ with self._lock:
102
+ now = time.time()
103
+ self._clean_windows(now)
104
+ return self._get_wait_time(now)
105
+
106
+ def acquire(self) -> None:
107
+ """
108
+ Acquire permission to make a request.
109
+
110
+ Blocks if auto_throttle is True, otherwise raises RateLimitExceeded.
111
+ """
112
+ with self._lock:
113
+ now = time.time()
114
+ self._clean_windows(now)
115
+ wait_time = self._get_wait_time(now)
116
+
117
+ if wait_time > 0:
118
+ if self.auto_throttle:
119
+ if self.on_throttle:
120
+ self.on_throttle(wait_time)
121
+ time.sleep(wait_time)
122
+ now = time.time()
123
+ else:
124
+ raise RateLimitExceeded(
125
+ f"Rate limit exceeded, retry after {wait_time:.2f}s",
126
+ retry_after=wait_time
127
+ )
128
+
129
+ self._record_request(now)
130
+
131
+ async def acquire_async(self) -> None:
132
+ """
133
+ Async version of acquire.
134
+
135
+ Blocks if auto_throttle is True, otherwise raises RateLimitExceeded.
136
+ """
137
+ with self._lock:
138
+ now = time.time()
139
+ self._clean_windows(now)
140
+ wait_time = self._get_wait_time(now)
141
+
142
+ if wait_time > 0:
143
+ if self.auto_throttle:
144
+ if self.on_throttle:
145
+ self.on_throttle(wait_time)
146
+ await asyncio.sleep(wait_time)
147
+ now = time.time()
148
+ else:
149
+ raise RateLimitExceeded(
150
+ f"Rate limit exceeded, retry after {wait_time:.2f}s",
151
+ retry_after=wait_time
152
+ )
153
+
154
+ with self._lock:
155
+ self._record_request(time.time())
156
+
157
+ def execute(
158
+ self,
159
+ func: Callable[..., Any],
160
+ *args: Any,
161
+ **kwargs: Any
162
+ ) -> Any:
163
+ """
164
+ Execute a function with rate limiting.
165
+
166
+ Args:
167
+ func: Function to execute
168
+ *args: Positional arguments
169
+ **kwargs: Keyword arguments
170
+
171
+ Returns:
172
+ Function's return value
173
+ """
174
+ self.acquire()
175
+
176
+ try:
177
+ return func(*args, **kwargs)
178
+ except Exception as e:
179
+ # Check for rate limit response
180
+ if self.respect_retry_after:
181
+ retry_after = self._extract_retry_after(e)
182
+ if retry_after:
183
+ self.set_retry_after(retry_after)
184
+ raise
185
+
186
+ async def execute_async(
187
+ self,
188
+ func: Callable[..., Any],
189
+ *args: Any,
190
+ **kwargs: Any
191
+ ) -> Any:
192
+ """
193
+ Execute an async function with rate limiting.
194
+
195
+ Args:
196
+ func: Async function to execute
197
+ *args: Positional arguments
198
+ **kwargs: Keyword arguments
199
+
200
+ Returns:
201
+ Function's return value
202
+ """
203
+ await self.acquire_async()
204
+
205
+ try:
206
+ return await func(*args, **kwargs)
207
+ except Exception as e:
208
+ # Check for rate limit response
209
+ if self.respect_retry_after:
210
+ retry_after = self._extract_retry_after(e)
211
+ if retry_after:
212
+ self.set_retry_after(retry_after)
213
+ raise
214
+
215
+ def _extract_retry_after(self, exception: Exception) -> Optional[float]:
216
+ """Try to extract retry-after from an exception."""
217
+ # Check for retry_after attribute
218
+ if hasattr(exception, "retry_after"):
219
+ return float(exception.retry_after)
220
+
221
+ # Check for response with Retry-After header
222
+ if hasattr(exception, "response"):
223
+ response = exception.response
224
+ if hasattr(response, "headers"):
225
+ retry_after = response.headers.get("Retry-After")
226
+ if retry_after:
227
+ try:
228
+ return float(retry_after)
229
+ except ValueError:
230
+ pass
231
+
232
+ return None
233
+
234
+ def get_stats(self) -> dict:
235
+ """Get rate limiter statistics."""
236
+ with self._lock:
237
+ now = time.time()
238
+ self._clean_windows(now)
239
+
240
+ return {
241
+ "requests_per_minute_limit": self.requests_per_minute,
242
+ "requests_per_second_limit": self.requests_per_second,
243
+ "current_minute_count": len(self._minute_window),
244
+ "current_second_count": len(self._second_window),
245
+ "retry_after_active": (
246
+ self._retry_after_until is not None and
247
+ now < self._retry_after_until
248
+ ),
249
+ "retry_after_remaining": (
250
+ max(0, self._retry_after_until - now)
251
+ if self._retry_after_until
252
+ else 0
253
+ ),
254
+ }
255
+
256
+ def reset(self) -> None:
257
+ """Reset all rate limit state."""
258
+ with self._lock:
259
+ self._minute_window.clear()
260
+ self._second_window.clear()
261
+ self._retry_after_until = None
@@ -0,0 +1,393 @@
1
+ """
2
+ Main decorator and class for resilient tool execution.
3
+ """
4
+
5
+ import asyncio
6
+ import functools
7
+ import inspect
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Callable, Optional, Sequence, Type, Union
10
+
11
+ from .retry import RetryPolicy, RetryError
12
+ from .circuit_breaker import CircuitBreaker, CircuitBreakerOpen
13
+ from .fallback import FallbackChain, FallbackError
14
+ from .validator import ResultValidator, ValidationError
15
+ from .tracer import ToolExecutionTracer
16
+ from .rate_limit import RateLimitHandler
17
+
18
+
19
+ @dataclass
20
+ class ResilientTool:
21
+ """
22
+ Decorator/wrapper for making tool calls resilient.
23
+
24
+ Combines retry, circuit breaker, fallback, validation, and rate limiting
25
+ into a single easy-to-use interface.
26
+
27
+ Attributes:
28
+ retry: Retry policy for transient failures
29
+ circuit_breaker: Circuit breaker for cascade prevention
30
+ fallback: Fallback function or chain for graceful degradation
31
+ validator: Result validator for output verification
32
+ rate_limit: Rate limit handler for API throttling
33
+ tracer: Execution tracer for observability
34
+ name: Name for this tool (used in tracing)
35
+ timeout: Timeout in seconds (None = no timeout)
36
+ on_error: Callback for errors (before retry/fallback)
37
+ """
38
+ retry: Optional[RetryPolicy] = None
39
+ circuit_breaker: Optional[CircuitBreaker] = None
40
+ fallback: Optional[Union[Callable[..., Any], FallbackChain]] = None
41
+ validator: Optional[ResultValidator] = None
42
+ rate_limit: Optional[RateLimitHandler] = None
43
+ tracer: Optional[ToolExecutionTracer] = None
44
+ name: Optional[str] = None
45
+ timeout: Optional[float] = None
46
+ on_error: Optional[Callable[[Exception], None]] = None
47
+
48
+ def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]:
49
+ """Use as decorator."""
50
+ tool_name = self.name or func.__name__
51
+
52
+ if asyncio.iscoroutinefunction(func):
53
+ @functools.wraps(func)
54
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
55
+ return await self._execute_async(func, tool_name, *args, **kwargs)
56
+ return async_wrapper
57
+ else:
58
+ @functools.wraps(func)
59
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
60
+ return self._execute(func, tool_name, *args, **kwargs)
61
+ return sync_wrapper
62
+
63
+ def _execute(
64
+ self,
65
+ func: Callable[..., Any],
66
+ tool_name: str,
67
+ *args: Any,
68
+ **kwargs: Any
69
+ ) -> Any:
70
+ """Execute the function with all resilience features."""
71
+ attempt = 0
72
+ last_exception: Optional[Exception] = None
73
+ exec_id: Optional[str] = None
74
+
75
+ # Check circuit breaker first
76
+ if self.circuit_breaker:
77
+ if self.circuit_breaker.is_open:
78
+ if self.tracer:
79
+ self.tracer.record_circuit_open(tool_name)
80
+
81
+ # Try fallback
82
+ if self.fallback:
83
+ return self._execute_fallback(
84
+ tool_name, args, kwargs,
85
+ CircuitBreakerOpen("Circuit is open")
86
+ )
87
+
88
+ raise CircuitBreakerOpen("Circuit breaker is open")
89
+
90
+ max_attempts = self.retry.max_attempts if self.retry else 1
91
+
92
+ while attempt < max_attempts:
93
+ attempt += 1
94
+
95
+ try:
96
+ # Rate limiting
97
+ if self.rate_limit:
98
+ self.rate_limit.acquire()
99
+
100
+ # Start tracing
101
+ if self.tracer:
102
+ exec_id = self.tracer.start_execution(tool_name, attempt)
103
+
104
+ # Execute with timeout
105
+ if self.timeout:
106
+ import signal
107
+
108
+ def timeout_handler(signum, frame):
109
+ raise TimeoutError(f"Tool {tool_name} timed out after {self.timeout}s")
110
+
111
+ old_handler = signal.signal(signal.SIGALRM, timeout_handler)
112
+ signal.alarm(int(self.timeout))
113
+ try:
114
+ result = func(*args, **kwargs)
115
+ finally:
116
+ signal.alarm(0)
117
+ signal.signal(signal.SIGALRM, old_handler)
118
+ else:
119
+ result = func(*args, **kwargs)
120
+
121
+ # Validate result
122
+ if self.validator:
123
+ result = self.validator.validate(result)
124
+
125
+ # Record success
126
+ if self.circuit_breaker:
127
+ self.circuit_breaker._record_success()
128
+
129
+ if self.tracer:
130
+ self.tracer.record_success(exec_id, tool_name, attempt, result)
131
+
132
+ return result
133
+
134
+ except Exception as e:
135
+ last_exception = e
136
+
137
+ if self.on_error:
138
+ self.on_error(e)
139
+
140
+ # Record failure in circuit breaker
141
+ if self.circuit_breaker:
142
+ self.circuit_breaker._record_failure(e)
143
+
144
+ # Check if we should retry
145
+ should_retry = (
146
+ self.retry and
147
+ attempt < max_attempts and
148
+ self.retry.should_retry(e)
149
+ )
150
+
151
+ # Record in tracer
152
+ if self.tracer:
153
+ self.tracer.record_failure(
154
+ exec_id, tool_name, attempt, e,
155
+ will_retry=should_retry
156
+ )
157
+
158
+ if should_retry:
159
+ # Calculate delay
160
+ delay = self.retry.calculate_delay(attempt - 1)
161
+
162
+ if self.retry.on_retry:
163
+ self.retry.on_retry(attempt, e, delay)
164
+
165
+ import time
166
+ time.sleep(delay)
167
+ else:
168
+ break
169
+
170
+ # All retries exhausted, try fallback
171
+ if self.fallback:
172
+ return self._execute_fallback(tool_name, args, kwargs, last_exception)
173
+
174
+ # No fallback, raise the error
175
+ if self.retry and last_exception:
176
+ raise RetryError(
177
+ f"All {max_attempts} attempts failed for {tool_name}",
178
+ attempts=max_attempts,
179
+ last_exception=last_exception
180
+ )
181
+
182
+ raise last_exception
183
+
184
+ async def _execute_async(
185
+ self,
186
+ func: Callable[..., Any],
187
+ tool_name: str,
188
+ *args: Any,
189
+ **kwargs: Any
190
+ ) -> Any:
191
+ """Execute an async function with all resilience features."""
192
+ attempt = 0
193
+ last_exception: Optional[Exception] = None
194
+ exec_id: Optional[str] = None
195
+
196
+ # Check circuit breaker first
197
+ if self.circuit_breaker:
198
+ if self.circuit_breaker.is_open:
199
+ if self.tracer:
200
+ self.tracer.record_circuit_open(tool_name)
201
+
202
+ # Try fallback
203
+ if self.fallback:
204
+ return await self._execute_fallback_async(
205
+ tool_name, args, kwargs,
206
+ CircuitBreakerOpen("Circuit is open")
207
+ )
208
+
209
+ raise CircuitBreakerOpen("Circuit breaker is open")
210
+
211
+ max_attempts = self.retry.max_attempts if self.retry else 1
212
+
213
+ while attempt < max_attempts:
214
+ attempt += 1
215
+
216
+ try:
217
+ # Rate limiting
218
+ if self.rate_limit:
219
+ await self.rate_limit.acquire_async()
220
+
221
+ # Start tracing
222
+ if self.tracer:
223
+ exec_id = self.tracer.start_execution(tool_name, attempt)
224
+
225
+ # Execute with timeout
226
+ if self.timeout:
227
+ result = await asyncio.wait_for(
228
+ func(*args, **kwargs),
229
+ timeout=self.timeout
230
+ )
231
+ else:
232
+ result = await func(*args, **kwargs)
233
+
234
+ # Validate result
235
+ if self.validator:
236
+ result = self.validator.validate(result)
237
+
238
+ # Record success
239
+ if self.circuit_breaker:
240
+ self.circuit_breaker._record_success()
241
+
242
+ if self.tracer:
243
+ self.tracer.record_success(exec_id, tool_name, attempt, result)
244
+
245
+ return result
246
+
247
+ except Exception as e:
248
+ last_exception = e
249
+
250
+ if self.on_error:
251
+ self.on_error(e)
252
+
253
+ # Record failure in circuit breaker
254
+ if self.circuit_breaker:
255
+ self.circuit_breaker._record_failure(e)
256
+
257
+ # Check if we should retry
258
+ should_retry = (
259
+ self.retry and
260
+ attempt < max_attempts and
261
+ self.retry.should_retry(e)
262
+ )
263
+
264
+ # Record in tracer
265
+ if self.tracer:
266
+ self.tracer.record_failure(
267
+ exec_id, tool_name, attempt, e,
268
+ will_retry=should_retry
269
+ )
270
+
271
+ if should_retry:
272
+ # Calculate delay
273
+ delay = self.retry.calculate_delay(attempt - 1)
274
+
275
+ if self.retry.on_retry:
276
+ self.retry.on_retry(attempt, e, delay)
277
+
278
+ await asyncio.sleep(delay)
279
+ else:
280
+ break
281
+
282
+ # All retries exhausted, try fallback
283
+ if self.fallback:
284
+ return await self._execute_fallback_async(
285
+ tool_name, args, kwargs, last_exception
286
+ )
287
+
288
+ # No fallback, raise the error
289
+ if self.retry and last_exception:
290
+ raise RetryError(
291
+ f"All {max_attempts} attempts failed for {tool_name}",
292
+ attempts=max_attempts,
293
+ last_exception=last_exception
294
+ )
295
+
296
+ raise last_exception
297
+
298
+ def _execute_fallback(
299
+ self,
300
+ tool_name: str,
301
+ args: tuple,
302
+ kwargs: dict,
303
+ primary_exception: Exception
304
+ ) -> Any:
305
+ """Execute the fallback."""
306
+ if isinstance(self.fallback, FallbackChain):
307
+ result = self.fallback.execute(
308
+ *args,
309
+ primary_exception=primary_exception,
310
+ **kwargs
311
+ )
312
+ else:
313
+ result = self.fallback(*args, **kwargs)
314
+
315
+ if self.tracer:
316
+ fallback_name = (
317
+ self.fallback.__name__
318
+ if callable(self.fallback) and hasattr(self.fallback, "__name__")
319
+ else "fallback"
320
+ )
321
+ self.tracer.record_fallback(tool_name, fallback_name, result)
322
+
323
+ return result
324
+
325
+ async def _execute_fallback_async(
326
+ self,
327
+ tool_name: str,
328
+ args: tuple,
329
+ kwargs: dict,
330
+ primary_exception: Exception
331
+ ) -> Any:
332
+ """Execute the fallback asynchronously."""
333
+ if isinstance(self.fallback, FallbackChain):
334
+ if asyncio.iscoroutinefunction(self.fallback.fallbacks[0]):
335
+ result = await self.fallback.execute_async(
336
+ *args,
337
+ primary_exception=primary_exception,
338
+ **kwargs
339
+ )
340
+ else:
341
+ result = self.fallback.execute(
342
+ *args,
343
+ primary_exception=primary_exception,
344
+ **kwargs
345
+ )
346
+ else:
347
+ if asyncio.iscoroutinefunction(self.fallback):
348
+ result = await self.fallback(*args, **kwargs)
349
+ else:
350
+ result = self.fallback(*args, **kwargs)
351
+
352
+ if self.tracer:
353
+ fallback_name = (
354
+ self.fallback.__name__
355
+ if callable(self.fallback) and hasattr(self.fallback, "__name__")
356
+ else "fallback"
357
+ )
358
+ self.tracer.record_fallback(tool_name, fallback_name, result)
359
+
360
+ return result
361
+
362
+
363
+ def resilient_tool(
364
+ retry: Optional[RetryPolicy] = None,
365
+ circuit_breaker: Optional[CircuitBreaker] = None,
366
+ fallback: Optional[Union[Callable[..., Any], FallbackChain]] = None,
367
+ validator: Optional[ResultValidator] = None,
368
+ rate_limit: Optional[RateLimitHandler] = None,
369
+ tracer: Optional[ToolExecutionTracer] = None,
370
+ name: Optional[str] = None,
371
+ timeout: Optional[float] = None,
372
+ on_error: Optional[Callable[[Exception], None]] = None
373
+ ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
374
+ """
375
+ Decorator factory for resilient tools.
376
+
377
+ Usage:
378
+ @resilient_tool(retry=RetryPolicy(max_attempts=3))
379
+ def my_tool():
380
+ ...
381
+ """
382
+ wrapper = ResilientTool(
383
+ retry=retry,
384
+ circuit_breaker=circuit_breaker,
385
+ fallback=fallback,
386
+ validator=validator,
387
+ rate_limit=rate_limit,
388
+ tracer=tracer,
389
+ name=name,
390
+ timeout=timeout,
391
+ on_error=on_error
392
+ )
393
+ return wrapper.__call__