agent-tool-resilience 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_tool_resilience-0.1.0/PKG-INFO +184 -0
- agent_tool_resilience-0.1.0/README.md +152 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience/__init__.py +32 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience/circuit_breaker.py +246 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience/fallback.py +188 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience/rate_limit.py +261 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience/resilient_tool.py +393 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience/retry.py +215 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience/tracer.py +319 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience/validator.py +217 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience.egg-info/PKG-INFO +184 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience.egg-info/SOURCES.txt +22 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience.egg-info/dependency_links.txt +1 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience.egg-info/requires.txt +11 -0
- agent_tool_resilience-0.1.0/agent_tool_resilience.egg-info/top_level.txt +1 -0
- agent_tool_resilience-0.1.0/pyproject.toml +64 -0
- agent_tool_resilience-0.1.0/setup.cfg +4 -0
- agent_tool_resilience-0.1.0/tests/test_circuit_breaker.py +272 -0
- agent_tool_resilience-0.1.0/tests/test_fallback.py +238 -0
- agent_tool_resilience-0.1.0/tests/test_rate_limit.py +279 -0
- agent_tool_resilience-0.1.0/tests/test_resilient_tool.py +383 -0
- agent_tool_resilience-0.1.0/tests/test_retry.py +237 -0
- agent_tool_resilience-0.1.0/tests/test_tracer.py +243 -0
- agent_tool_resilience-0.1.0/tests/test_validator.py +257 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-tool-resilience
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Production-grade resilience for AI agent tool calls: retries, fallbacks, circuit breakers, and validation
|
|
5
|
+
Author-email: Korah Stone <korahcomm@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/KorahStone/agent-tool-resilience
|
|
8
|
+
Project-URL: Documentation, https://github.com/KorahStone/agent-tool-resilience#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/KorahStone/agent-tool-resilience.git
|
|
10
|
+
Project-URL: Issues, https://github.com/KorahStone/agent-tool-resilience/issues
|
|
11
|
+
Keywords: ai,agents,llm,resilience,retry,circuit-breaker,fallback,tools,langchain,openai
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Provides-Extra: jsonschema
|
|
25
|
+
Requires-Dist: jsonschema>=4.0.0; extra == "jsonschema"
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Requires-Dist: agent-tool-resilience[dev,jsonschema]; extra == "all"
|
|
32
|
+
|
|
33
|
+
# agent-tool-resilience
|
|
34
|
+
|
|
35
|
+
A Python library for making AI agent tool calls resilient, with smart retries, fallbacks, circuit breakers, and result validation.
|
|
36
|
+
|
|
37
|
+
## Why?
|
|
38
|
+
|
|
39
|
+
AI agents fail silently when tools break. This library provides production-grade resilience patterns:
|
|
40
|
+
|
|
41
|
+
- **Smart Retries**: Exponential backoff with jitter, respects rate limits
|
|
42
|
+
- **Fallbacks**: Graceful degradation when primary tools fail
|
|
43
|
+
- **Circuit Breakers**: Prevent cascade failures by stopping calls to broken services
|
|
44
|
+
- **Result Validation**: Ensure tool outputs meet expected schemas/conditions
|
|
45
|
+
- **Observability**: Full visibility into what happened during tool execution
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install agent-tool-resilience
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from agent_tool_resilience import ResilientTool, RetryPolicy, CircuitBreaker
|
|
57
|
+
|
|
58
|
+
# Wrap any tool with resilience
|
|
59
|
+
@ResilientTool(
|
|
60
|
+
retry=RetryPolicy(max_attempts=3, backoff="exponential"),
|
|
61
|
+
circuit_breaker=CircuitBreaker(failure_threshold=5, reset_timeout=60),
|
|
62
|
+
fallback=lambda *args, **kwargs: {"error": "service unavailable", "cached": True}
|
|
63
|
+
)
|
|
64
|
+
def call_weather_api(location: str) -> dict:
|
|
65
|
+
return requests.get(f"https://api.weather.com/{location}").json()
|
|
66
|
+
|
|
67
|
+
# Use it normally - resilience is automatic
|
|
68
|
+
result = call_weather_api("NYC")
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Features
|
|
72
|
+
|
|
73
|
+
### Retry Policies
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from agent_tool_resilience import RetryPolicy
|
|
77
|
+
|
|
78
|
+
# Exponential backoff with jitter
|
|
79
|
+
policy = RetryPolicy(
|
|
80
|
+
max_attempts=5,
|
|
81
|
+
backoff="exponential",
|
|
82
|
+
base_delay=1.0,
|
|
83
|
+
max_delay=60.0,
|
|
84
|
+
jitter=True,
|
|
85
|
+
retry_on=[TimeoutError, ConnectionError, RateLimitError]
|
|
86
|
+
)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Circuit Breakers
|
|
90
|
+
|
|
91
|
+
Prevent cascade failures by temporarily stopping calls to failing services:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from agent_tool_resilience import CircuitBreaker
|
|
95
|
+
|
|
96
|
+
breaker = CircuitBreaker(
|
|
97
|
+
failure_threshold=5, # Open after 5 failures
|
|
98
|
+
success_threshold=2, # Close after 2 successes
|
|
99
|
+
reset_timeout=60, # Try again after 60 seconds
|
|
100
|
+
half_open_max_calls=3 # Limited calls in half-open state
|
|
101
|
+
)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Fallback Strategies
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from agent_tool_resilience import FallbackChain
|
|
108
|
+
|
|
109
|
+
fallbacks = FallbackChain([
|
|
110
|
+
lambda loc: call_backup_weather_api(loc),
|
|
111
|
+
lambda loc: get_cached_weather(loc),
|
|
112
|
+
lambda loc: {"status": "unavailable", "location": loc}
|
|
113
|
+
])
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Result Validation
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from agent_tool_resilience import ResultValidator
|
|
120
|
+
|
|
121
|
+
validator = ResultValidator(
|
|
122
|
+
schema={"type": "object", "required": ["temperature", "humidity"]},
|
|
123
|
+
conditions=[
|
|
124
|
+
lambda r: r.get("temperature") is not None,
|
|
125
|
+
lambda r: -100 < r.get("temperature", 0) < 150
|
|
126
|
+
]
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Observability
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from agent_tool_resilience import ToolExecutionTracer
|
|
134
|
+
|
|
135
|
+
tracer = ToolExecutionTracer()
|
|
136
|
+
|
|
137
|
+
@ResilientTool(tracer=tracer)
|
|
138
|
+
def my_tool():
|
|
139
|
+
...
|
|
140
|
+
|
|
141
|
+
# After execution
|
|
142
|
+
print(tracer.get_execution_log())
|
|
143
|
+
# [
|
|
144
|
+
# {"tool": "my_tool", "attempt": 1, "status": "failed", "error": "timeout", "duration_ms": 5023},
|
|
145
|
+
# {"tool": "my_tool", "attempt": 2, "status": "success", "duration_ms": 234}
|
|
146
|
+
# ]
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Rate Limit Awareness
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
from agent_tool_resilience import RateLimitHandler
|
|
153
|
+
|
|
154
|
+
handler = RateLimitHandler(
|
|
155
|
+
requests_per_minute=60,
|
|
156
|
+
respect_retry_after=True,
|
|
157
|
+
auto_throttle=True
|
|
158
|
+
)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Integration with Agent Frameworks
|
|
162
|
+
|
|
163
|
+
Works with any Python agent framework:
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
# LangChain
|
|
167
|
+
from langchain.tools import Tool
|
|
168
|
+
from agent_tool_resilience import ResilientTool
|
|
169
|
+
|
|
170
|
+
@ResilientTool(retry=RetryPolicy(max_attempts=3))
|
|
171
|
+
def search(query: str) -> str:
|
|
172
|
+
...
|
|
173
|
+
|
|
174
|
+
langchain_tool = Tool(name="search", func=search, description="Search the web")
|
|
175
|
+
|
|
176
|
+
# OpenAI Function Calling
|
|
177
|
+
@ResilientTool(circuit_breaker=CircuitBreaker(failure_threshold=3))
|
|
178
|
+
def get_stock_price(symbol: str) -> dict:
|
|
179
|
+
...
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## License
|
|
183
|
+
|
|
184
|
+
MIT
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# agent-tool-resilience
|
|
2
|
+
|
|
3
|
+
A Python library for making AI agent tool calls resilient, with smart retries, fallbacks, circuit breakers, and result validation.
|
|
4
|
+
|
|
5
|
+
## Why?
|
|
6
|
+
|
|
7
|
+
AI agents fail silently when tools break. This library provides production-grade resilience patterns:
|
|
8
|
+
|
|
9
|
+
- **Smart Retries**: Exponential backoff with jitter, respects rate limits
|
|
10
|
+
- **Fallbacks**: Graceful degradation when primary tools fail
|
|
11
|
+
- **Circuit Breakers**: Prevent cascade failures by stopping calls to broken services
|
|
12
|
+
- **Result Validation**: Ensure tool outputs meet expected schemas/conditions
|
|
13
|
+
- **Observability**: Full visibility into what happened during tool execution
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install agent-tool-resilience
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from agent_tool_resilience import ResilientTool, RetryPolicy, CircuitBreaker
|
|
25
|
+
|
|
26
|
+
# Wrap any tool with resilience
|
|
27
|
+
@ResilientTool(
|
|
28
|
+
retry=RetryPolicy(max_attempts=3, backoff="exponential"),
|
|
29
|
+
circuit_breaker=CircuitBreaker(failure_threshold=5, reset_timeout=60),
|
|
30
|
+
fallback=lambda *args, **kwargs: {"error": "service unavailable", "cached": True}
|
|
31
|
+
)
|
|
32
|
+
def call_weather_api(location: str) -> dict:
|
|
33
|
+
return requests.get(f"https://api.weather.com/{location}").json()
|
|
34
|
+
|
|
35
|
+
# Use it normally - resilience is automatic
|
|
36
|
+
result = call_weather_api("NYC")
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Features
|
|
40
|
+
|
|
41
|
+
### Retry Policies
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from agent_tool_resilience import RetryPolicy
|
|
45
|
+
|
|
46
|
+
# Exponential backoff with jitter
|
|
47
|
+
policy = RetryPolicy(
|
|
48
|
+
max_attempts=5,
|
|
49
|
+
backoff="exponential",
|
|
50
|
+
base_delay=1.0,
|
|
51
|
+
max_delay=60.0,
|
|
52
|
+
jitter=True,
|
|
53
|
+
retry_on=[TimeoutError, ConnectionError, RateLimitError]
|
|
54
|
+
)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Circuit Breakers
|
|
58
|
+
|
|
59
|
+
Prevent cascade failures by temporarily stopping calls to failing services:
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from agent_tool_resilience import CircuitBreaker
|
|
63
|
+
|
|
64
|
+
breaker = CircuitBreaker(
|
|
65
|
+
failure_threshold=5, # Open after 5 failures
|
|
66
|
+
success_threshold=2, # Close after 2 successes
|
|
67
|
+
reset_timeout=60, # Try again after 60 seconds
|
|
68
|
+
half_open_max_calls=3 # Limited calls in half-open state
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Fallback Strategies
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from agent_tool_resilience import FallbackChain
|
|
76
|
+
|
|
77
|
+
fallbacks = FallbackChain([
|
|
78
|
+
lambda loc: call_backup_weather_api(loc),
|
|
79
|
+
lambda loc: get_cached_weather(loc),
|
|
80
|
+
lambda loc: {"status": "unavailable", "location": loc}
|
|
81
|
+
])
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Result Validation
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from agent_tool_resilience import ResultValidator
|
|
88
|
+
|
|
89
|
+
validator = ResultValidator(
|
|
90
|
+
schema={"type": "object", "required": ["temperature", "humidity"]},
|
|
91
|
+
conditions=[
|
|
92
|
+
lambda r: r.get("temperature") is not None,
|
|
93
|
+
lambda r: -100 < r.get("temperature", 0) < 150
|
|
94
|
+
]
|
|
95
|
+
)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Observability
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from agent_tool_resilience import ToolExecutionTracer
|
|
102
|
+
|
|
103
|
+
tracer = ToolExecutionTracer()
|
|
104
|
+
|
|
105
|
+
@ResilientTool(tracer=tracer)
|
|
106
|
+
def my_tool():
|
|
107
|
+
...
|
|
108
|
+
|
|
109
|
+
# After execution
|
|
110
|
+
print(tracer.get_execution_log())
|
|
111
|
+
# [
|
|
112
|
+
# {"tool": "my_tool", "attempt": 1, "status": "failed", "error": "timeout", "duration_ms": 5023},
|
|
113
|
+
# {"tool": "my_tool", "attempt": 2, "status": "success", "duration_ms": 234}
|
|
114
|
+
# ]
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Rate Limit Awareness
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
from agent_tool_resilience import RateLimitHandler
|
|
121
|
+
|
|
122
|
+
handler = RateLimitHandler(
|
|
123
|
+
requests_per_minute=60,
|
|
124
|
+
respect_retry_after=True,
|
|
125
|
+
auto_throttle=True
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Integration with Agent Frameworks
|
|
130
|
+
|
|
131
|
+
Works with any Python agent framework:
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
# LangChain
|
|
135
|
+
from langchain.tools import Tool
|
|
136
|
+
from agent_tool_resilience import ResilientTool
|
|
137
|
+
|
|
138
|
+
@ResilientTool(retry=RetryPolicy(max_attempts=3))
|
|
139
|
+
def search(query: str) -> str:
|
|
140
|
+
...
|
|
141
|
+
|
|
142
|
+
langchain_tool = Tool(name="search", func=search, description="Search the web")
|
|
143
|
+
|
|
144
|
+
# OpenAI Function Calling
|
|
145
|
+
@ResilientTool(circuit_breaker=CircuitBreaker(failure_threshold=3))
|
|
146
|
+
def get_stock_price(symbol: str) -> dict:
|
|
147
|
+
...
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## License
|
|
151
|
+
|
|
152
|
+
MIT
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
agent-tool-resilience: Production-grade resilience for AI agent tool calls.
|
|
3
|
+
|
|
4
|
+
Provides smart retries, fallbacks, circuit breakers, and result validation
|
|
5
|
+
to prevent silent failures in AI agent workflows.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .retry import RetryPolicy, RetryError
|
|
9
|
+
from .circuit_breaker import CircuitBreaker, CircuitBreakerOpen
|
|
10
|
+
from .fallback import FallbackChain, FallbackError
|
|
11
|
+
from .validator import ResultValidator, ValidationError
|
|
12
|
+
from .tracer import ToolExecutionTracer, ExecutionEvent
|
|
13
|
+
from .rate_limit import RateLimitHandler, RateLimitExceeded
|
|
14
|
+
from .resilient_tool import ResilientTool, resilient_tool
|
|
15
|
+
|
|
16
|
+
__version__ = "0.1.0"
|
|
17
|
+
__all__ = [
|
|
18
|
+
"ResilientTool",
|
|
19
|
+
"resilient_tool",
|
|
20
|
+
"RetryPolicy",
|
|
21
|
+
"RetryError",
|
|
22
|
+
"CircuitBreaker",
|
|
23
|
+
"CircuitBreakerOpen",
|
|
24
|
+
"FallbackChain",
|
|
25
|
+
"FallbackError",
|
|
26
|
+
"ResultValidator",
|
|
27
|
+
"ValidationError",
|
|
28
|
+
"ToolExecutionTracer",
|
|
29
|
+
"ExecutionEvent",
|
|
30
|
+
"RateLimitHandler",
|
|
31
|
+
"RateLimitExceeded",
|
|
32
|
+
]
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Circuit breaker pattern to prevent cascade failures.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Any, Callable, Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CircuitState(Enum):
|
|
14
|
+
"""Circuit breaker states."""
|
|
15
|
+
CLOSED = "closed" # Normal operation, requests pass through
|
|
16
|
+
OPEN = "open" # Circuit is open, requests fail immediately
|
|
17
|
+
HALF_OPEN = "half_open" # Testing if service has recovered
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CircuitBreakerOpen(Exception):
|
|
21
|
+
"""Raised when the circuit breaker is open."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, message: str, reset_time: Optional[float] = None):
|
|
24
|
+
super().__init__(message)
|
|
25
|
+
self.reset_time = reset_time
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class CircuitBreaker:
|
|
30
|
+
"""
|
|
31
|
+
Circuit breaker to prevent cascade failures.
|
|
32
|
+
|
|
33
|
+
The circuit breaker has three states:
|
|
34
|
+
- CLOSED: Normal operation, requests pass through
|
|
35
|
+
- OPEN: Too many failures, requests fail immediately
|
|
36
|
+
- HALF_OPEN: Testing recovery, limited requests allowed
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
failure_threshold: Number of failures before opening circuit
|
|
40
|
+
success_threshold: Number of successes in half-open to close circuit
|
|
41
|
+
reset_timeout: Seconds to wait before transitioning from open to half-open
|
|
42
|
+
half_open_max_calls: Maximum concurrent calls in half-open state
|
|
43
|
+
exclude_exceptions: Exception types that don't count as failures
|
|
44
|
+
on_state_change: Callback for state transitions
|
|
45
|
+
"""
|
|
46
|
+
failure_threshold: int = 5
|
|
47
|
+
success_threshold: int = 2
|
|
48
|
+
reset_timeout: float = 60.0
|
|
49
|
+
half_open_max_calls: int = 3
|
|
50
|
+
exclude_exceptions: tuple[type, ...] = field(default_factory=tuple)
|
|
51
|
+
on_state_change: Optional[Callable[[CircuitState, CircuitState], None]] = None
|
|
52
|
+
|
|
53
|
+
# Internal state (not part of config)
|
|
54
|
+
_state: CircuitState = field(default=CircuitState.CLOSED, init=False)
|
|
55
|
+
_failure_count: int = field(default=0, init=False)
|
|
56
|
+
_success_count: int = field(default=0, init=False)
|
|
57
|
+
_last_failure_time: Optional[float] = field(default=None, init=False)
|
|
58
|
+
_half_open_calls: int = field(default=0, init=False)
|
|
59
|
+
_lock: threading.Lock = field(default_factory=threading.Lock, init=False)
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def state(self) -> CircuitState:
|
|
63
|
+
"""Get current circuit state."""
|
|
64
|
+
with self._lock:
|
|
65
|
+
self._maybe_transition_to_half_open()
|
|
66
|
+
return self._state
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def failure_count(self) -> int:
|
|
70
|
+
"""Get current failure count."""
|
|
71
|
+
with self._lock:
|
|
72
|
+
return self._failure_count
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def is_closed(self) -> bool:
|
|
76
|
+
"""Check if circuit is closed (normal operation)."""
|
|
77
|
+
return self.state == CircuitState.CLOSED
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def is_open(self) -> bool:
|
|
81
|
+
"""Check if circuit is open (blocking requests)."""
|
|
82
|
+
return self.state == CircuitState.OPEN
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def is_half_open(self) -> bool:
|
|
86
|
+
"""Check if circuit is half-open (testing recovery)."""
|
|
87
|
+
return self.state == CircuitState.HALF_OPEN
|
|
88
|
+
|
|
89
|
+
def _maybe_transition_to_half_open(self) -> None:
|
|
90
|
+
"""Check if we should transition from open to half-open."""
|
|
91
|
+
if self._state == CircuitState.OPEN and self._last_failure_time:
|
|
92
|
+
elapsed = time.time() - self._last_failure_time
|
|
93
|
+
if elapsed >= self.reset_timeout:
|
|
94
|
+
self._transition_to(CircuitState.HALF_OPEN)
|
|
95
|
+
|
|
96
|
+
def _transition_to(self, new_state: CircuitState) -> None:
|
|
97
|
+
"""Transition to a new state."""
|
|
98
|
+
old_state = self._state
|
|
99
|
+
self._state = new_state
|
|
100
|
+
|
|
101
|
+
if new_state == CircuitState.CLOSED:
|
|
102
|
+
self._failure_count = 0
|
|
103
|
+
self._success_count = 0
|
|
104
|
+
self._half_open_calls = 0
|
|
105
|
+
elif new_state == CircuitState.HALF_OPEN:
|
|
106
|
+
self._success_count = 0
|
|
107
|
+
self._half_open_calls = 0
|
|
108
|
+
elif new_state == CircuitState.OPEN:
|
|
109
|
+
self._last_failure_time = time.time()
|
|
110
|
+
|
|
111
|
+
if self.on_state_change and old_state != new_state:
|
|
112
|
+
self.on_state_change(old_state, new_state)
|
|
113
|
+
|
|
114
|
+
def _record_success(self) -> None:
|
|
115
|
+
"""Record a successful call."""
|
|
116
|
+
with self._lock:
|
|
117
|
+
if self._state == CircuitState.HALF_OPEN:
|
|
118
|
+
self._success_count += 1
|
|
119
|
+
if self._success_count >= self.success_threshold:
|
|
120
|
+
self._transition_to(CircuitState.CLOSED)
|
|
121
|
+
elif self._state == CircuitState.CLOSED:
|
|
122
|
+
# Reset failure count on success
|
|
123
|
+
self._failure_count = 0
|
|
124
|
+
|
|
125
|
+
def _record_failure(self, exception: Exception) -> None:
|
|
126
|
+
"""Record a failed call."""
|
|
127
|
+
# Check if this exception type is excluded
|
|
128
|
+
if isinstance(exception, self.exclude_exceptions):
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
with self._lock:
|
|
132
|
+
self._failure_count += 1
|
|
133
|
+
|
|
134
|
+
if self._state == CircuitState.HALF_OPEN:
|
|
135
|
+
# Any failure in half-open immediately opens the circuit
|
|
136
|
+
self._transition_to(CircuitState.OPEN)
|
|
137
|
+
elif self._state == CircuitState.CLOSED:
|
|
138
|
+
if self._failure_count >= self.failure_threshold:
|
|
139
|
+
self._transition_to(CircuitState.OPEN)
|
|
140
|
+
|
|
141
|
+
def _allow_request(self) -> bool:
|
|
142
|
+
"""Check if a request should be allowed."""
|
|
143
|
+
with self._lock:
|
|
144
|
+
self._maybe_transition_to_half_open()
|
|
145
|
+
|
|
146
|
+
if self._state == CircuitState.CLOSED:
|
|
147
|
+
return True
|
|
148
|
+
elif self._state == CircuitState.OPEN:
|
|
149
|
+
return False
|
|
150
|
+
elif self._state == CircuitState.HALF_OPEN:
|
|
151
|
+
if self._half_open_calls < self.half_open_max_calls:
|
|
152
|
+
self._half_open_calls += 1
|
|
153
|
+
return True
|
|
154
|
+
return False
|
|
155
|
+
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
def execute(
|
|
159
|
+
self,
|
|
160
|
+
func: Callable[..., Any],
|
|
161
|
+
*args: Any,
|
|
162
|
+
**kwargs: Any
|
|
163
|
+
) -> Any:
|
|
164
|
+
"""
|
|
165
|
+
Execute a function with circuit breaker protection.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
func: Function to execute
|
|
169
|
+
*args: Positional arguments
|
|
170
|
+
**kwargs: Keyword arguments
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Function's return value
|
|
174
|
+
|
|
175
|
+
Raises:
|
|
176
|
+
CircuitBreakerOpen: If circuit is open
|
|
177
|
+
"""
|
|
178
|
+
if not self._allow_request():
|
|
179
|
+
reset_time = None
|
|
180
|
+
if self._last_failure_time:
|
|
181
|
+
reset_time = self._last_failure_time + self.reset_timeout
|
|
182
|
+
raise CircuitBreakerOpen(
|
|
183
|
+
f"Circuit breaker is {self._state.value}",
|
|
184
|
+
reset_time=reset_time
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
result = func(*args, **kwargs)
|
|
189
|
+
self._record_success()
|
|
190
|
+
return result
|
|
191
|
+
except Exception as e:
|
|
192
|
+
self._record_failure(e)
|
|
193
|
+
raise
|
|
194
|
+
|
|
195
|
+
async def execute_async(
|
|
196
|
+
self,
|
|
197
|
+
func: Callable[..., Any],
|
|
198
|
+
*args: Any,
|
|
199
|
+
**kwargs: Any
|
|
200
|
+
) -> Any:
|
|
201
|
+
"""
|
|
202
|
+
Execute an async function with circuit breaker protection.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
func: Async function to execute
|
|
206
|
+
*args: Positional arguments
|
|
207
|
+
**kwargs: Keyword arguments
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Function's return value
|
|
211
|
+
|
|
212
|
+
Raises:
|
|
213
|
+
CircuitBreakerOpen: If circuit is open
|
|
214
|
+
"""
|
|
215
|
+
if not self._allow_request():
|
|
216
|
+
reset_time = None
|
|
217
|
+
if self._last_failure_time:
|
|
218
|
+
reset_time = self._last_failure_time + self.reset_timeout
|
|
219
|
+
raise CircuitBreakerOpen(
|
|
220
|
+
f"Circuit breaker is {self._state.value}",
|
|
221
|
+
reset_time=reset_time
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
result = await func(*args, **kwargs)
|
|
226
|
+
self._record_success()
|
|
227
|
+
return result
|
|
228
|
+
except Exception as e:
|
|
229
|
+
self._record_failure(e)
|
|
230
|
+
raise
|
|
231
|
+
|
|
232
|
+
def reset(self) -> None:
|
|
233
|
+
"""Manually reset the circuit breaker to closed state."""
|
|
234
|
+
with self._lock:
|
|
235
|
+
self._transition_to(CircuitState.CLOSED)
|
|
236
|
+
|
|
237
|
+
def get_stats(self) -> dict:
|
|
238
|
+
"""Get circuit breaker statistics."""
|
|
239
|
+
with self._lock:
|
|
240
|
+
return {
|
|
241
|
+
"state": self._state.value,
|
|
242
|
+
"failure_count": self._failure_count,
|
|
243
|
+
"success_count": self._success_count,
|
|
244
|
+
"last_failure_time": self._last_failure_time,
|
|
245
|
+
"half_open_calls": self._half_open_calls,
|
|
246
|
+
}
|