agentreplay 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentreplay/__init__.py +81 -0
- agentreplay/auto_instrument/__init__.py +237 -0
- agentreplay/auto_instrument/openai.py +431 -0
- agentreplay/batching.py +270 -0
- agentreplay/bootstrap.py +202 -0
- agentreplay/circuit_breaker.py +300 -0
- agentreplay/client.py +1560 -0
- agentreplay/config.py +215 -0
- agentreplay/context.py +168 -0
- agentreplay/env_config.py +327 -0
- agentreplay/env_init.py +128 -0
- agentreplay/exceptions.py +92 -0
- agentreplay/genai.py +510 -0
- agentreplay/genai_conventions.py +502 -0
- agentreplay/install_pth.py +159 -0
- agentreplay/langchain_tracer.py +385 -0
- agentreplay/models.py +120 -0
- agentreplay/otel_bridge.py +281 -0
- agentreplay/patch.py +308 -0
- agentreplay/propagation.py +328 -0
- agentreplay/py.typed +3 -0
- agentreplay/retry.py +151 -0
- agentreplay/sampling.py +298 -0
- agentreplay/session.py +164 -0
- agentreplay/sitecustomize.py +73 -0
- agentreplay/span.py +270 -0
- agentreplay/unified.py +465 -0
- agentreplay-0.1.2.dist-info/METADATA +285 -0
- agentreplay-0.1.2.dist-info/RECORD +33 -0
- agentreplay-0.1.2.dist-info/WHEEL +5 -0
- agentreplay-0.1.2.dist-info/entry_points.txt +2 -0
- agentreplay-0.1.2.dist-info/licenses/LICENSE +190 -0
- agentreplay-0.1.2.dist-info/top_level.txt +1 -0
agentreplay/bootstrap.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# Copyright 2025 Sushanth (https://github.com/sushanthpy)
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Bootstrap module for zero-code auto-instrumentation.
|
|
16
|
+
|
|
17
|
+
This module is called by the .pth file on Python startup when AGENTREPLAY_ENABLED=true.
|
|
18
|
+
It initializes OpenTelemetry instrumentation with minimal overhead.
|
|
19
|
+
|
|
20
|
+
Environment Variables:
|
|
21
|
+
AGENTREPLAY_ENABLED: Set to 'true' to enable auto-instrumentation
|
|
22
|
+
AGENTREPLAY_SERVICE_NAME: Service name for traces (default: 'agentreplay-app')
|
|
23
|
+
AGENTREPLAY_OTLP_ENDPOINT: OTLP gRPC endpoint (default: 'localhost:47117')
|
|
24
|
+
AGENTREPLAY_PROJECT_ID: Project ID for traces
|
|
25
|
+
AGENTREPLAY_TENANT_ID: Tenant ID for traces (default: 1)
|
|
26
|
+
AGENTREPLAY_DEBUG: Enable debug logging (default: false)
|
|
27
|
+
AGENTREPLAY_CAPTURE_CONTENT: Capture LLM request/response content (default: true)
|
|
28
|
+
OTEL_EXPORTER_OTLP_ENDPOINT: Standard OTEL endpoint override
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
# Option 1: Automatic via .pth file
|
|
32
|
+
$ export AGENTREPLAY_ENABLED=true
|
|
33
|
+
$ export AGENTREPLAY_PROJECT_ID=27986
|
|
34
|
+
$ python my_app.py # Auto-instrumented!
|
|
35
|
+
|
|
36
|
+
# Option 2: Manual initialization
|
|
37
|
+
>>> from agentreplay.bootstrap import init_otel_instrumentation
|
|
38
|
+
>>> init_otel_instrumentation()
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import os
|
|
42
|
+
import logging
|
|
43
|
+
from typing import Optional
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
# Global flag to prevent double-initialization
|
|
48
|
+
_initialized = False
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def init_otel_instrumentation(
|
|
52
|
+
service_name: Optional[str] = None,
|
|
53
|
+
otlp_endpoint: Optional[str] = None,
|
|
54
|
+
project_id: Optional[int] = None,
|
|
55
|
+
tenant_id: Optional[int] = None,
|
|
56
|
+
capture_content: Optional[bool] = None,
|
|
57
|
+
debug: Optional[bool] = None,
|
|
58
|
+
) -> bool:
|
|
59
|
+
"""Initialize OpenTelemetry instrumentation.
|
|
60
|
+
|
|
61
|
+
This function sets up the OpenTelemetry SDK with OTLP exporter and
|
|
62
|
+
automatically instruments all available libraries.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
service_name: Service name (default: from env or 'agentreplay-app')
|
|
66
|
+
otlp_endpoint: OTLP endpoint (default: from env or 'localhost:47117')
|
|
67
|
+
project_id: Project ID (default: from env)
|
|
68
|
+
tenant_id: Tenant ID (default: from env or 1)
|
|
69
|
+
capture_content: Capture LLM content (default: from env or True)
|
|
70
|
+
debug: Enable debug logging (default: from env or False)
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
True if initialization succeeded, False if already initialized
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
>>> from agentreplay.bootstrap import init_otel_instrumentation
|
|
77
|
+
>>> init_otel_instrumentation(
|
|
78
|
+
... service_name="my-agent",
|
|
79
|
+
... project_id=27986
|
|
80
|
+
... )
|
|
81
|
+
"""
|
|
82
|
+
global _initialized
|
|
83
|
+
|
|
84
|
+
if _initialized:
|
|
85
|
+
logger.debug("Agentreplay already initialized, skipping")
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
# Read from environment with fallbacks
|
|
89
|
+
service_name = service_name or os.getenv("AGENTREPLAY_SERVICE_NAME", "agentreplay-app")
|
|
90
|
+
otlp_endpoint = otlp_endpoint or os.getenv(
|
|
91
|
+
"AGENTREPLAY_OTLP_ENDPOINT",
|
|
92
|
+
os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:47117")
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Project/tenant IDs
|
|
96
|
+
if project_id is None:
|
|
97
|
+
project_id_str = os.getenv("AGENTREPLAY_PROJECT_ID", "0")
|
|
98
|
+
try:
|
|
99
|
+
project_id = int(project_id_str)
|
|
100
|
+
except ValueError:
|
|
101
|
+
logger.warning(f"Invalid AGENTREPLAY_PROJECT_ID: {project_id_str}, using 0")
|
|
102
|
+
project_id = 0
|
|
103
|
+
|
|
104
|
+
if tenant_id is None:
|
|
105
|
+
tenant_id_str = os.getenv("AGENTREPLAY_TENANT_ID", "1")
|
|
106
|
+
try:
|
|
107
|
+
tenant_id = int(tenant_id_str)
|
|
108
|
+
except ValueError:
|
|
109
|
+
logger.warning(f"Invalid AGENTREPLAY_TENANT_ID: {tenant_id_str}, using 1")
|
|
110
|
+
tenant_id = 1
|
|
111
|
+
|
|
112
|
+
# Flags
|
|
113
|
+
if capture_content is None:
|
|
114
|
+
capture_content = os.getenv("AGENTREPLAY_CAPTURE_CONTENT", "true").lower() in {
|
|
115
|
+
"1", "true", "yes"
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if debug is None:
|
|
119
|
+
debug = os.getenv("AGENTREPLAY_DEBUG", "false").lower() in {"1", "true", "yes"}
|
|
120
|
+
|
|
121
|
+
if debug:
|
|
122
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
123
|
+
logger.setLevel(logging.DEBUG)
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
# Import here to avoid loading OTEL on every Python startup
|
|
127
|
+
from agentreplay.auto_instrument import setup_instrumentation
|
|
128
|
+
|
|
129
|
+
logger.info(f"🚀 Initializing Agentreplay for service: {service_name}")
|
|
130
|
+
logger.debug(f" OTLP Endpoint: {otlp_endpoint}")
|
|
131
|
+
logger.debug(f" Project ID: {project_id}")
|
|
132
|
+
logger.debug(f" Tenant ID: {tenant_id}")
|
|
133
|
+
logger.debug(f" Capture Content: {capture_content}")
|
|
134
|
+
|
|
135
|
+
setup_instrumentation(
|
|
136
|
+
service_name=service_name,
|
|
137
|
+
otlp_endpoint=otlp_endpoint,
|
|
138
|
+
tenant_id=tenant_id,
|
|
139
|
+
project_id=project_id,
|
|
140
|
+
capture_content=capture_content,
|
|
141
|
+
debug=debug,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
_initialized = True
|
|
145
|
+
logger.info("✅ Agentreplay initialization complete")
|
|
146
|
+
return True
|
|
147
|
+
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.error(f"❌ Failed to initialize Agentreplay: {e}", exc_info=debug)
|
|
150
|
+
# Don't crash the user's app - fail open
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _auto_init():
|
|
155
|
+
"""Called by the .pth file on Python startup.
|
|
156
|
+
|
|
157
|
+
Only initializes if AGENTREPLAY_ENABLED=true to avoid overhead.
|
|
158
|
+
This is the entry point for zero-code auto-instrumentation.
|
|
159
|
+
|
|
160
|
+
Automatically loads .env file if present for developer convenience.
|
|
161
|
+
"""
|
|
162
|
+
# Try to load .env file first (if python-dotenv is available)
|
|
163
|
+
if os.path.exists('.env'):
|
|
164
|
+
try:
|
|
165
|
+
from dotenv import load_dotenv
|
|
166
|
+
load_dotenv('.env', override=False) # Don't override existing env vars
|
|
167
|
+
except ImportError:
|
|
168
|
+
pass # python-dotenv not installed, no problem
|
|
169
|
+
except Exception as e:
|
|
170
|
+
pass # Any other error, fail silently
|
|
171
|
+
|
|
172
|
+
if not os.getenv("AGENTREPLAY_ENABLED", "").lower() in {"1", "true", "yes"}:
|
|
173
|
+
# Not enabled, skip silently
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
init_otel_instrumentation(debug=True) # Enable debug to see what's happening
|
|
178
|
+
except Exception as e:
|
|
179
|
+
# Fail open - don't break user's app if SDK has issues
|
|
180
|
+
import sys
|
|
181
|
+
print(f"Agentreplay auto-init failed: {e}", file=sys.stderr)
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def is_initialized() -> bool:
|
|
186
|
+
"""Check if Agentreplay has been initialized.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
True if initialized, False otherwise
|
|
190
|
+
"""
|
|
191
|
+
return _initialized
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def reset_initialization():
|
|
195
|
+
"""Reset initialization state (primarily for testing).
|
|
196
|
+
|
|
197
|
+
Warning:
|
|
198
|
+
This does not actually tear down the OTEL SDK, it only resets
|
|
199
|
+
the initialization flag. Use only in tests.
|
|
200
|
+
"""
|
|
201
|
+
global _initialized
|
|
202
|
+
_initialized = False
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# Copyright 2025 Sushanth (https://github.com/sushanthpy)
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Circuit Breaker pattern for Agentreplay backend resilience.
|
|
16
|
+
|
|
17
|
+
This module implements a circuit breaker to prevent cascading failures when
|
|
18
|
+
the Agentreplay backend is unavailable. The circuit breaker has three states:
|
|
19
|
+
|
|
20
|
+
- CLOSED: Normal operation, requests pass through
|
|
21
|
+
- OPEN: Backend is failing, requests are rejected immediately
|
|
22
|
+
- HALF_OPEN: Testing recovery, allowing limited requests through
|
|
23
|
+
|
|
24
|
+
The circuit breaker helps maintain application responsiveness during backend
|
|
25
|
+
outages by failing fast rather than blocking on retries.
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
>>> from agentreplay.circuit_breaker import CircuitBreaker, CircuitBreakerOpen
|
|
29
|
+
>>>
|
|
30
|
+
>>> breaker = CircuitBreaker()
|
|
31
|
+
>>>
|
|
32
|
+
>>> try:
|
|
33
|
+
... with breaker:
|
|
34
|
+
... send_spans_to_backend()
|
|
35
|
+
... except CircuitBreakerOpen:
|
|
36
|
+
... logger.warning("Agentreplay backend unavailable, dropping spans")
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
import time
|
|
40
|
+
import threading
|
|
41
|
+
import logging
|
|
42
|
+
from enum import Enum
|
|
43
|
+
from typing import Optional, Callable, Any
|
|
44
|
+
from functools import wraps
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class CircuitState(Enum):
|
|
50
|
+
"""Circuit breaker states."""
|
|
51
|
+
CLOSED = "closed" # Normal operation
|
|
52
|
+
OPEN = "open" # Failing, reject requests
|
|
53
|
+
HALF_OPEN = "half_open" # Testing recovery
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class CircuitBreakerOpen(Exception):
|
|
57
|
+
"""Exception raised when circuit breaker is open."""
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class CircuitBreaker:
|
|
62
|
+
"""Thread-safe circuit breaker for backend resilience.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
failure_threshold: Number of failures before opening circuit (default: 5)
|
|
66
|
+
recovery_timeout: Seconds before attempting recovery (default: 30)
|
|
67
|
+
success_threshold: Successes needed to close circuit from half-open (default: 3)
|
|
68
|
+
failure_window: Time window in seconds for counting failures (default: 60)
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
>>> breaker = CircuitBreaker(failure_threshold=3, recovery_timeout=60)
|
|
72
|
+
>>>
|
|
73
|
+
>>> @breaker.protect
|
|
74
|
+
... def send_to_backend():
|
|
75
|
+
... response = requests.post(...)
|
|
76
|
+
... response.raise_for_status()
|
|
77
|
+
... return response
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
failure_threshold: int = 5,
|
|
83
|
+
recovery_timeout: float = 30.0,
|
|
84
|
+
success_threshold: int = 3,
|
|
85
|
+
failure_window: float = 60.0,
|
|
86
|
+
):
|
|
87
|
+
self.failure_threshold = failure_threshold
|
|
88
|
+
self.recovery_timeout = recovery_timeout
|
|
89
|
+
self.success_threshold = success_threshold
|
|
90
|
+
self.failure_window = failure_window
|
|
91
|
+
|
|
92
|
+
self._state = CircuitState.CLOSED
|
|
93
|
+
self._failure_count = 0
|
|
94
|
+
self._success_count = 0
|
|
95
|
+
self._last_failure_time: Optional[float] = None
|
|
96
|
+
self._last_state_change: float = time.time()
|
|
97
|
+
self._lock = threading.Lock()
|
|
98
|
+
|
|
99
|
+
# Track failure times for windowed counting
|
|
100
|
+
self._failure_times: list[float] = []
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def state(self) -> CircuitState:
|
|
104
|
+
"""Get current circuit state."""
|
|
105
|
+
with self._lock:
|
|
106
|
+
self._check_state_transition()
|
|
107
|
+
return self._state
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def is_closed(self) -> bool:
|
|
111
|
+
"""Check if circuit is closed (normal operation)."""
|
|
112
|
+
return self.state == CircuitState.CLOSED
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def is_open(self) -> bool:
|
|
116
|
+
"""Check if circuit is open (rejecting requests)."""
|
|
117
|
+
return self.state == CircuitState.OPEN
|
|
118
|
+
|
|
119
|
+
def _check_state_transition(self) -> None:
|
|
120
|
+
"""Check if state should transition (called with lock held)."""
|
|
121
|
+
now = time.time()
|
|
122
|
+
|
|
123
|
+
if self._state == CircuitState.OPEN:
|
|
124
|
+
# Check if recovery timeout has passed
|
|
125
|
+
if now - self._last_state_change >= self.recovery_timeout:
|
|
126
|
+
self._transition_to(CircuitState.HALF_OPEN)
|
|
127
|
+
|
|
128
|
+
elif self._state == CircuitState.CLOSED:
|
|
129
|
+
# Clean up old failures outside the window
|
|
130
|
+
cutoff = now - self.failure_window
|
|
131
|
+
self._failure_times = [t for t in self._failure_times if t > cutoff]
|
|
132
|
+
self._failure_count = len(self._failure_times)
|
|
133
|
+
|
|
134
|
+
def _transition_to(self, new_state: CircuitState) -> None:
|
|
135
|
+
"""Transition to a new state (called with lock held)."""
|
|
136
|
+
old_state = self._state
|
|
137
|
+
self._state = new_state
|
|
138
|
+
self._last_state_change = time.time()
|
|
139
|
+
|
|
140
|
+
if new_state == CircuitState.CLOSED:
|
|
141
|
+
self._failure_count = 0
|
|
142
|
+
self._failure_times.clear()
|
|
143
|
+
self._success_count = 0
|
|
144
|
+
elif new_state == CircuitState.HALF_OPEN:
|
|
145
|
+
self._success_count = 0
|
|
146
|
+
|
|
147
|
+
logger.info(
|
|
148
|
+
f"Circuit breaker state change: {old_state.value} -> {new_state.value}"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def record_success(self) -> None:
|
|
152
|
+
"""Record a successful request."""
|
|
153
|
+
with self._lock:
|
|
154
|
+
if self._state == CircuitState.HALF_OPEN:
|
|
155
|
+
self._success_count += 1
|
|
156
|
+
if self._success_count >= self.success_threshold:
|
|
157
|
+
self._transition_to(CircuitState.CLOSED)
|
|
158
|
+
|
|
159
|
+
def record_failure(self, error: Optional[Exception] = None) -> None:
|
|
160
|
+
"""Record a failed request."""
|
|
161
|
+
now = time.time()
|
|
162
|
+
|
|
163
|
+
with self._lock:
|
|
164
|
+
self._last_failure_time = now
|
|
165
|
+
|
|
166
|
+
if self._state == CircuitState.CLOSED:
|
|
167
|
+
self._failure_times.append(now)
|
|
168
|
+
self._failure_count = len(self._failure_times)
|
|
169
|
+
|
|
170
|
+
if self._failure_count >= self.failure_threshold:
|
|
171
|
+
self._transition_to(CircuitState.OPEN)
|
|
172
|
+
logger.warning(
|
|
173
|
+
f"Circuit breaker opened after {self._failure_count} failures. "
|
|
174
|
+
f"Will retry after {self.recovery_timeout}s."
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
elif self._state == CircuitState.HALF_OPEN:
|
|
178
|
+
# Any failure in half-open state reopens the circuit
|
|
179
|
+
self._transition_to(CircuitState.OPEN)
|
|
180
|
+
logger.warning("Circuit breaker reopened after recovery test failure.")
|
|
181
|
+
|
|
182
|
+
def allow_request(self) -> bool:
|
|
183
|
+
"""Check if a request should be allowed through.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
True if request is allowed, False if circuit is open
|
|
187
|
+
"""
|
|
188
|
+
with self._lock:
|
|
189
|
+
self._check_state_transition()
|
|
190
|
+
|
|
191
|
+
if self._state == CircuitState.CLOSED:
|
|
192
|
+
return True
|
|
193
|
+
elif self._state == CircuitState.OPEN:
|
|
194
|
+
return False
|
|
195
|
+
else: # HALF_OPEN
|
|
196
|
+
return True # Allow test requests through
|
|
197
|
+
|
|
198
|
+
def __enter__(self) -> "CircuitBreaker":
|
|
199
|
+
"""Context manager entry - check if request is allowed."""
|
|
200
|
+
if not self.allow_request():
|
|
201
|
+
raise CircuitBreakerOpen(
|
|
202
|
+
f"Circuit breaker is open. Recovery in "
|
|
203
|
+
f"{self.recovery_timeout - (time.time() - self._last_state_change):.1f}s"
|
|
204
|
+
)
|
|
205
|
+
return self
|
|
206
|
+
|
|
207
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> bool:
|
|
208
|
+
"""Context manager exit - record success or failure."""
|
|
209
|
+
if exc_type is None:
|
|
210
|
+
self.record_success()
|
|
211
|
+
else:
|
|
212
|
+
self.record_failure(exc_val)
|
|
213
|
+
return False # Don't suppress exceptions
|
|
214
|
+
|
|
215
|
+
def protect(self, func: Callable) -> Callable:
|
|
216
|
+
"""Decorator to protect a function with the circuit breaker.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
func: Function to protect
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
Wrapped function that respects circuit breaker state
|
|
223
|
+
|
|
224
|
+
Example:
|
|
225
|
+
>>> @breaker.protect
|
|
226
|
+
... def send_spans():
|
|
227
|
+
... pass
|
|
228
|
+
"""
|
|
229
|
+
@wraps(func)
|
|
230
|
+
def wrapper(*args, **kwargs) -> Any:
|
|
231
|
+
with self:
|
|
232
|
+
return func(*args, **kwargs)
|
|
233
|
+
return wrapper
|
|
234
|
+
|
|
235
|
+
def reset(self) -> None:
|
|
236
|
+
"""Manually reset the circuit breaker to closed state."""
|
|
237
|
+
with self._lock:
|
|
238
|
+
self._transition_to(CircuitState.CLOSED)
|
|
239
|
+
logger.info("Circuit breaker manually reset")
|
|
240
|
+
|
|
241
|
+
def stats(self) -> dict:
|
|
242
|
+
"""Get circuit breaker statistics.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Dictionary with current state and counters
|
|
246
|
+
"""
|
|
247
|
+
with self._lock:
|
|
248
|
+
return {
|
|
249
|
+
"state": self._state.value,
|
|
250
|
+
"failure_count": self._failure_count,
|
|
251
|
+
"success_count": self._success_count,
|
|
252
|
+
"last_failure_time": self._last_failure_time,
|
|
253
|
+
"last_state_change": self._last_state_change,
|
|
254
|
+
"seconds_in_state": time.time() - self._last_state_change,
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
# Global circuit breaker instance for the Agentreplay backend
|
|
259
|
+
_default_breaker: Optional[CircuitBreaker] = None
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def get_circuit_breaker() -> CircuitBreaker:
|
|
263
|
+
"""Get the default circuit breaker instance.
|
|
264
|
+
|
|
265
|
+
Creates one if it doesn't exist with default settings.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Default CircuitBreaker instance
|
|
269
|
+
"""
|
|
270
|
+
global _default_breaker
|
|
271
|
+
if _default_breaker is None:
|
|
272
|
+
_default_breaker = CircuitBreaker()
|
|
273
|
+
return _default_breaker
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def configure_circuit_breaker(
|
|
277
|
+
failure_threshold: int = 5,
|
|
278
|
+
recovery_timeout: float = 30.0,
|
|
279
|
+
success_threshold: int = 3,
|
|
280
|
+
failure_window: float = 60.0,
|
|
281
|
+
) -> CircuitBreaker:
|
|
282
|
+
"""Configure the default circuit breaker.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
failure_threshold: Number of failures before opening circuit
|
|
286
|
+
recovery_timeout: Seconds before attempting recovery
|
|
287
|
+
success_threshold: Successes needed to close circuit from half-open
|
|
288
|
+
failure_window: Time window in seconds for counting failures
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
Configured CircuitBreaker instance
|
|
292
|
+
"""
|
|
293
|
+
global _default_breaker
|
|
294
|
+
_default_breaker = CircuitBreaker(
|
|
295
|
+
failure_threshold=failure_threshold,
|
|
296
|
+
recovery_timeout=recovery_timeout,
|
|
297
|
+
success_threshold=success_threshold,
|
|
298
|
+
failure_window=failure_window,
|
|
299
|
+
)
|
|
300
|
+
return _default_breaker
|