asap-protocol 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- asap/__init__.py +1 -1
- asap/errors.py +167 -0
- asap/examples/README.md +3 -0
- asap/examples/run_demo.py +9 -2
- asap/models/__init__.py +4 -0
- asap/models/constants.py +73 -0
- asap/models/entities.py +38 -2
- asap/models/envelope.py +7 -1
- asap/transport/__init__.py +3 -0
- asap/transport/circuit_breaker.py +193 -0
- asap/transport/client.py +588 -53
- asap/transport/middleware.py +6 -5
- asap/transport/server.py +80 -3
- asap/transport/validators.py +324 -0
- asap/utils/__init__.py +7 -0
- asap/utils/sanitization.py +139 -0
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/METADATA +22 -5
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/RECORD +21 -17
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/WHEEL +0 -0
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/entry_points.txt +0 -0
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/licenses/LICENSE +0 -0
asap/__init__.py
CHANGED
asap/errors.py
CHANGED
|
@@ -190,3 +190,170 @@ class ThreadPoolExhaustedError(ASAPError):
|
|
|
190
190
|
)
|
|
191
191
|
self.max_threads = max_threads
|
|
192
192
|
self.active_threads = active_threads
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class InvalidTimestampError(ASAPError):
|
|
196
|
+
"""Raised when an envelope timestamp is invalid (too old or too far in the future).
|
|
197
|
+
|
|
198
|
+
This error occurs when validating envelope timestamps for replay attack prevention.
|
|
199
|
+
Envelopes with timestamps outside the acceptable window are rejected.
|
|
200
|
+
|
|
201
|
+
Attributes:
|
|
202
|
+
timestamp: The invalid timestamp value
|
|
203
|
+
age_seconds: Age of the envelope in seconds (if too old)
|
|
204
|
+
future_offset_seconds: Offset in seconds from current time (if too far in future)
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
def __init__(
|
|
208
|
+
self,
|
|
209
|
+
timestamp: str,
|
|
210
|
+
message: str,
|
|
211
|
+
age_seconds: float | None = None,
|
|
212
|
+
future_offset_seconds: float | None = None,
|
|
213
|
+
details: dict[str, Any] | None = None,
|
|
214
|
+
) -> None:
|
|
215
|
+
"""Initialize invalid timestamp error.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
timestamp: The invalid timestamp value
|
|
219
|
+
message: Human-readable error description
|
|
220
|
+
age_seconds: Age of the envelope in seconds (if too old)
|
|
221
|
+
future_offset_seconds: Offset in seconds from current time (if too far in future)
|
|
222
|
+
details: Optional additional context
|
|
223
|
+
"""
|
|
224
|
+
# Build details dict with optional fields
|
|
225
|
+
details_dict: dict[str, Any] = {"timestamp": timestamp}
|
|
226
|
+
if age_seconds is not None:
|
|
227
|
+
details_dict["age_seconds"] = age_seconds
|
|
228
|
+
if future_offset_seconds is not None:
|
|
229
|
+
details_dict["future_offset_seconds"] = future_offset_seconds
|
|
230
|
+
if details:
|
|
231
|
+
details_dict.update(details)
|
|
232
|
+
|
|
233
|
+
super().__init__(
|
|
234
|
+
code="asap:protocol/invalid_timestamp",
|
|
235
|
+
message=message,
|
|
236
|
+
details=details_dict,
|
|
237
|
+
)
|
|
238
|
+
self.timestamp = timestamp
|
|
239
|
+
self.age_seconds = age_seconds
|
|
240
|
+
self.future_offset_seconds = future_offset_seconds
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class InvalidNonceError(ASAPError):
|
|
244
|
+
"""Raised when an envelope nonce is invalid (duplicate or malformed).
|
|
245
|
+
|
|
246
|
+
This error occurs when validating envelope nonces for replay attack prevention.
|
|
247
|
+
Nonces that have been used before within the TTL window are rejected.
|
|
248
|
+
|
|
249
|
+
Attributes:
|
|
250
|
+
nonce: The invalid nonce value
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
def __init__(
|
|
254
|
+
self,
|
|
255
|
+
nonce: str,
|
|
256
|
+
message: str,
|
|
257
|
+
details: dict[str, Any] | None = None,
|
|
258
|
+
) -> None:
|
|
259
|
+
"""Initialize invalid nonce error.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
nonce: The invalid nonce value
|
|
263
|
+
message: Human-readable error description
|
|
264
|
+
details: Optional additional context
|
|
265
|
+
"""
|
|
266
|
+
super().__init__(
|
|
267
|
+
code="asap:protocol/invalid_nonce",
|
|
268
|
+
message=message,
|
|
269
|
+
details={
|
|
270
|
+
"nonce": nonce,
|
|
271
|
+
**(details or {}),
|
|
272
|
+
},
|
|
273
|
+
)
|
|
274
|
+
self.nonce = nonce
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class CircuitOpenError(ASAPError):
|
|
278
|
+
"""Raised when circuit breaker is open and request is rejected.
|
|
279
|
+
|
|
280
|
+
This error occurs when the circuit breaker pattern has detected
|
|
281
|
+
too many consecutive failures and is preventing further requests
|
|
282
|
+
to protect the system from cascading failures.
|
|
283
|
+
|
|
284
|
+
Attributes:
|
|
285
|
+
base_url: The URL for which the circuit is open
|
|
286
|
+
consecutive_failures: Number of consecutive failures that opened the circuit
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
def __init__(
|
|
290
|
+
self,
|
|
291
|
+
base_url: str,
|
|
292
|
+
consecutive_failures: int,
|
|
293
|
+
details: dict[str, Any] | None = None,
|
|
294
|
+
) -> None:
|
|
295
|
+
"""Initialize circuit open error.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
base_url: The URL for which the circuit is open
|
|
299
|
+
consecutive_failures: Number of consecutive failures
|
|
300
|
+
details: Optional additional context
|
|
301
|
+
"""
|
|
302
|
+
message = (
|
|
303
|
+
f"Circuit breaker is OPEN for {base_url}. "
|
|
304
|
+
f"Too many consecutive failures ({consecutive_failures}). "
|
|
305
|
+
"Service temporarily unavailable."
|
|
306
|
+
)
|
|
307
|
+
super().__init__(
|
|
308
|
+
code="asap:transport/circuit_open",
|
|
309
|
+
message=message,
|
|
310
|
+
details={
|
|
311
|
+
"base_url": base_url,
|
|
312
|
+
"consecutive_failures": consecutive_failures,
|
|
313
|
+
**(details or {}),
|
|
314
|
+
},
|
|
315
|
+
)
|
|
316
|
+
self.base_url = base_url
|
|
317
|
+
self.consecutive_failures = consecutive_failures
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class UnsupportedAuthSchemeError(ASAPError):
|
|
321
|
+
"""Raised when an unsupported authentication scheme is specified.
|
|
322
|
+
|
|
323
|
+
This error occurs when a Manifest specifies an authentication scheme
|
|
324
|
+
that is not supported by the current implementation.
|
|
325
|
+
|
|
326
|
+
Attributes:
|
|
327
|
+
scheme: The unsupported scheme name
|
|
328
|
+
supported_schemes: List of supported schemes
|
|
329
|
+
"""
|
|
330
|
+
|
|
331
|
+
def __init__(
|
|
332
|
+
self,
|
|
333
|
+
scheme: str,
|
|
334
|
+
supported_schemes: set[str] | frozenset[str],
|
|
335
|
+
details: dict[str, Any] | None = None,
|
|
336
|
+
) -> None:
|
|
337
|
+
"""Initialize unsupported auth scheme error.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
scheme: The unsupported scheme name
|
|
341
|
+
supported_schemes: Set of supported schemes
|
|
342
|
+
details: Optional additional context
|
|
343
|
+
"""
|
|
344
|
+
supported_list = sorted(supported_schemes)
|
|
345
|
+
message = (
|
|
346
|
+
f"Unsupported authentication scheme '{scheme}'. "
|
|
347
|
+
f"Supported schemes: {', '.join(supported_list)}"
|
|
348
|
+
)
|
|
349
|
+
super().__init__(
|
|
350
|
+
code="asap:auth/unsupported_scheme",
|
|
351
|
+
message=message,
|
|
352
|
+
details={
|
|
353
|
+
"scheme": scheme,
|
|
354
|
+
"supported_schemes": list(supported_list),
|
|
355
|
+
**(details or {}),
|
|
356
|
+
},
|
|
357
|
+
)
|
|
358
|
+
self.scheme = scheme
|
|
359
|
+
self.supported_schemes = supported_schemes
|
asap/examples/README.md
CHANGED
|
@@ -23,3 +23,6 @@ You can run the agents separately if needed:
|
|
|
23
23
|
|
|
24
24
|
- The echo agent exposes `/.well-known/asap/manifest.json` for readiness checks.
|
|
25
25
|
- Update ports in `asap.examples.run_demo` if you change the defaults.
|
|
26
|
+
- These examples use the basic ASAP API without authentication or advanced security features.
|
|
27
|
+
For production use, consider adding authentication via `manifest.auth` and enabling
|
|
28
|
+
additional security features (see `docs/security.md`).
|
asap/examples/run_demo.py
CHANGED
|
@@ -6,7 +6,7 @@ communication by sending a task request from the coordinator logic.
|
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
8
|
import signal
|
|
9
|
-
import subprocess
|
|
9
|
+
import subprocess # nosec B404
|
|
10
10
|
import sys
|
|
11
11
|
import time
|
|
12
12
|
from typing import Sequence
|
|
@@ -34,8 +34,15 @@ def start_process(command: Sequence[str]) -> subprocess.Popen[str]:
|
|
|
34
34
|
|
|
35
35
|
Returns:
|
|
36
36
|
Started subprocess handle.
|
|
37
|
+
|
|
38
|
+
Note:
|
|
39
|
+
This is example/demo code that only executes trusted commands
|
|
40
|
+
(sys.executable with known modules). The command is controlled
|
|
41
|
+
and not user input.
|
|
37
42
|
"""
|
|
38
|
-
|
|
43
|
+
# nosec B404, B603: This is example code executing trusted commands only
|
|
44
|
+
# (sys.executable with known Python modules, not user input)
|
|
45
|
+
return subprocess.Popen(command, text=True) # nosec B404, B603
|
|
39
46
|
|
|
40
47
|
|
|
41
48
|
def wait_for_ready(url: str, timeout_seconds: float) -> None:
|
asap/models/__init__.py
CHANGED
|
@@ -12,6 +12,8 @@ from asap.models.constants import (
|
|
|
12
12
|
AGENT_URN_PATTERN,
|
|
13
13
|
ASAP_PROTOCOL_VERSION,
|
|
14
14
|
DEFAULT_TIMEOUT_SECONDS,
|
|
15
|
+
MAX_ENVELOPE_AGE_SECONDS,
|
|
16
|
+
MAX_FUTURE_TOLERANCE_SECONDS,
|
|
15
17
|
MAX_TASK_DEPTH,
|
|
16
18
|
)
|
|
17
19
|
|
|
@@ -88,6 +90,8 @@ __all__ = [
|
|
|
88
90
|
"AGENT_URN_PATTERN",
|
|
89
91
|
"ASAP_PROTOCOL_VERSION",
|
|
90
92
|
"DEFAULT_TIMEOUT_SECONDS",
|
|
93
|
+
"MAX_ENVELOPE_AGE_SECONDS",
|
|
94
|
+
"MAX_FUTURE_TOLERANCE_SECONDS",
|
|
91
95
|
"MAX_TASK_DEPTH",
|
|
92
96
|
# Enums
|
|
93
97
|
"MessageRole",
|
asap/models/constants.py
CHANGED
|
@@ -11,5 +11,78 @@ DEFAULT_TIMEOUT_SECONDS = 600
|
|
|
11
11
|
MAX_TASK_DEPTH = 10 # Maximum nesting level for subtasks
|
|
12
12
|
MAX_REQUEST_SIZE = 10 * 1024 * 1024 # 10MB maximum request size
|
|
13
13
|
|
|
14
|
+
# Timestamp validation constants for replay attack prevention
|
|
15
|
+
MAX_ENVELOPE_AGE_SECONDS = 300 # 5 minutes
|
|
16
|
+
"""Maximum age of an envelope timestamp before it is considered stale.
|
|
17
|
+
|
|
18
|
+
This prevents replay attacks by rejecting envelopes that are too old.
|
|
19
|
+
The 5-minute window balances security (preventing old message replays)
|
|
20
|
+
with practical network latency and clock skew tolerance.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
MAX_FUTURE_TOLERANCE_SECONDS = 30 # 30 seconds
|
|
24
|
+
"""Maximum future timestamp tolerance to account for clock skew.
|
|
25
|
+
|
|
26
|
+
Envelopes with timestamps more than 30 seconds in the future are rejected
|
|
27
|
+
to prevent attacks using artificially future-dated messages. This tolerance
|
|
28
|
+
accounts for reasonable clock synchronization differences between systems.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
NONCE_TTL_SECONDS = MAX_ENVELOPE_AGE_SECONDS * 2 # 10 minutes by default
|
|
32
|
+
"""Time-to-live for nonce values in seconds.
|
|
33
|
+
|
|
34
|
+
Nonces are stored with a TTL of 2x the maximum envelope age to ensure they
|
|
35
|
+
expire after the envelope would have been rejected anyway. This provides a
|
|
36
|
+
safety margin for edge cases where an envelope might be processed near the
|
|
37
|
+
age limit, while preventing the nonce store from growing unbounded.
|
|
38
|
+
|
|
39
|
+
The 2x multiplier ensures that:
|
|
40
|
+
- Nonces remain valid for the full envelope validation window
|
|
41
|
+
- Nonces expire shortly after envelopes would be rejected, preventing unbounded growth
|
|
42
|
+
- There's a buffer for clock skew and processing delays
|
|
43
|
+
"""
|
|
44
|
+
|
|
14
45
|
# URN patterns
|
|
15
46
|
AGENT_URN_PATTERN = r"^urn:asap:agent:[a-z0-9-]+(?::[a-z0-9-]+)?$"
|
|
47
|
+
|
|
48
|
+
# Authentication schemes
|
|
49
|
+
SUPPORTED_AUTH_SCHEMES = frozenset({"bearer", "basic"})
|
|
50
|
+
"""Supported authentication schemes for agent access.
|
|
51
|
+
|
|
52
|
+
Currently supports:
|
|
53
|
+
- bearer: Bearer token authentication (RFC 6750)
|
|
54
|
+
- basic: HTTP Basic authentication (RFC 7617)
|
|
55
|
+
|
|
56
|
+
Future support planned:
|
|
57
|
+
- oauth2: OAuth 2.0 authentication flow
|
|
58
|
+
- hmac: HMAC-based authentication
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
# Retry and backoff constants
|
|
62
|
+
DEFAULT_BASE_DELAY = 1.0
|
|
63
|
+
"""Default base delay in seconds for exponential backoff.
|
|
64
|
+
|
|
65
|
+
This is the initial delay before the first retry attempt. Subsequent retries
|
|
66
|
+
will use exponential backoff: base_delay * (2 ** attempt) + jitter.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
DEFAULT_MAX_DELAY = 60.0
|
|
70
|
+
"""Maximum delay in seconds for exponential backoff.
|
|
71
|
+
|
|
72
|
+
This caps the maximum delay between retry attempts, preventing excessively
|
|
73
|
+
long waits while still providing exponential backoff for transient failures.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
DEFAULT_CIRCUIT_BREAKER_THRESHOLD = 5
|
|
77
|
+
"""Default threshold for circuit breaker pattern.
|
|
78
|
+
|
|
79
|
+
Number of consecutive failures required before opening the circuit breaker
|
|
80
|
+
and preventing further requests to a failing endpoint.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
DEFAULT_CIRCUIT_BREAKER_TIMEOUT = 60.0
|
|
84
|
+
"""Default timeout in seconds before circuit breaker transitions from OPEN to HALF_OPEN.
|
|
85
|
+
|
|
86
|
+
After this timeout, the circuit breaker will allow a test request to determine
|
|
87
|
+
if the service has recovered before closing the circuit.
|
|
88
|
+
"""
|
asap/models/entities.py
CHANGED
|
@@ -19,10 +19,11 @@ from datetime import datetime
|
|
|
19
19
|
from typing import Any
|
|
20
20
|
|
|
21
21
|
from packaging.version import InvalidVersion, Version
|
|
22
|
-
from pydantic import Field, field_validator
|
|
22
|
+
from pydantic import Field, field_validator, model_validator
|
|
23
23
|
|
|
24
|
+
from asap.errors import UnsupportedAuthSchemeError
|
|
24
25
|
from asap.models.base import ASAPBaseModel
|
|
25
|
-
from asap.models.constants import AGENT_URN_PATTERN, ASAP_PROTOCOL_VERSION
|
|
26
|
+
from asap.models.constants import AGENT_URN_PATTERN, ASAP_PROTOCOL_VERSION, SUPPORTED_AUTH_SCHEMES
|
|
26
27
|
from asap.models.enums import MessageRole, TaskStatus
|
|
27
28
|
from asap.models.types import (
|
|
28
29
|
AgentURN,
|
|
@@ -36,6 +37,27 @@ from asap.models.types import (
|
|
|
36
37
|
)
|
|
37
38
|
|
|
38
39
|
|
|
40
|
+
def _validate_auth_scheme(auth: "AuthScheme") -> None:
|
|
41
|
+
"""Validate that all authentication schemes are supported.
|
|
42
|
+
|
|
43
|
+
Checks each scheme in auth.schemes against SUPPORTED_AUTH_SCHEMES
|
|
44
|
+
and raises UnsupportedAuthSchemeError if any scheme is invalid.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
auth: AuthScheme instance to validate
|
|
48
|
+
|
|
49
|
+
Raises:
|
|
50
|
+
UnsupportedAuthSchemeError: If any scheme is not supported
|
|
51
|
+
"""
|
|
52
|
+
for scheme in auth.schemes:
|
|
53
|
+
scheme_lower = scheme.lower()
|
|
54
|
+
if scheme_lower not in SUPPORTED_AUTH_SCHEMES:
|
|
55
|
+
raise UnsupportedAuthSchemeError(
|
|
56
|
+
scheme=scheme,
|
|
57
|
+
supported_schemes=SUPPORTED_AUTH_SCHEMES,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
39
61
|
class Skill(ASAPBaseModel):
|
|
40
62
|
"""A specific capability that an agent can perform.
|
|
41
63
|
|
|
@@ -231,6 +253,20 @@ class Manifest(ASAPBaseModel):
|
|
|
231
253
|
raise ValueError(f"Invalid semantic version '{v}': {e}") from e
|
|
232
254
|
return v
|
|
233
255
|
|
|
256
|
+
@model_validator(mode="after")
|
|
257
|
+
def validate_auth_schemes(self) -> "Manifest":
|
|
258
|
+
"""Validate that all authentication schemes are supported.
|
|
259
|
+
|
|
260
|
+
Raises:
|
|
261
|
+
UnsupportedAuthSchemeError: If any scheme in auth.schemes is not supported
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
Self (for method chaining)
|
|
265
|
+
"""
|
|
266
|
+
if self.auth is not None:
|
|
267
|
+
_validate_auth_scheme(self.auth)
|
|
268
|
+
return self
|
|
269
|
+
|
|
234
270
|
|
|
235
271
|
class Conversation(ASAPBaseModel):
|
|
236
272
|
"""A context for related interactions between agents.
|
asap/models/envelope.py
CHANGED
|
@@ -65,7 +65,13 @@ class Envelope(ASAPBaseModel):
|
|
|
65
65
|
default=None, description="Optional trace ID for distributed tracing"
|
|
66
66
|
)
|
|
67
67
|
extensions: dict[str, Any] | None = Field(
|
|
68
|
-
default=None,
|
|
68
|
+
default=None,
|
|
69
|
+
description=(
|
|
70
|
+
"Optional custom extensions. "
|
|
71
|
+
"Can include a 'nonce' field (string) for replay attack prevention. "
|
|
72
|
+
"If provided, the nonce must be unique within the TTL window (typically 10 minutes). "
|
|
73
|
+
"Duplicate nonces will be rejected by the validation layer."
|
|
74
|
+
),
|
|
69
75
|
)
|
|
70
76
|
|
|
71
77
|
@field_validator("id", mode="before")
|
asap/transport/__init__.py
CHANGED
|
@@ -18,6 +18,7 @@ Public exports:
|
|
|
18
18
|
create_echo_handler: Factory for echo handler
|
|
19
19
|
create_default_registry: Factory for default registry
|
|
20
20
|
ASAPClient: Async HTTP client for agent communication
|
|
21
|
+
RetryConfig: Configuration dataclass for retry logic and circuit breaker
|
|
21
22
|
ASAPConnectionError: Connection error exception
|
|
22
23
|
ASAPTimeoutError: Timeout error exception
|
|
23
24
|
ASAPRemoteError: Remote error exception
|
|
@@ -45,6 +46,7 @@ from asap.transport.client import (
|
|
|
45
46
|
ASAPConnectionError,
|
|
46
47
|
ASAPRemoteError,
|
|
47
48
|
ASAPTimeoutError,
|
|
49
|
+
RetryConfig,
|
|
48
50
|
)
|
|
49
51
|
from asap.transport.handlers import (
|
|
50
52
|
Handler,
|
|
@@ -81,4 +83,5 @@ __all__ = [
|
|
|
81
83
|
"ASAPConnectionError",
|
|
82
84
|
"ASAPTimeoutError",
|
|
83
85
|
"ASAPRemoteError",
|
|
86
|
+
"RetryConfig",
|
|
84
87
|
]
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""Circuit breaker implementation for resilient request handling.
|
|
2
|
+
|
|
3
|
+
This module provides the CircuitBreaker pattern implementation and a registry
|
|
4
|
+
for sharing circuit breaker state across multiple client instances.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Dict
|
|
11
|
+
|
|
12
|
+
from asap.models.constants import (
|
|
13
|
+
DEFAULT_CIRCUIT_BREAKER_THRESHOLD,
|
|
14
|
+
DEFAULT_CIRCUIT_BREAKER_TIMEOUT,
|
|
15
|
+
)
|
|
16
|
+
from asap.observability import get_logger
|
|
17
|
+
|
|
18
|
+
logger = get_logger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CircuitState(str, Enum):
|
|
22
|
+
"""Circuit breaker states.
|
|
23
|
+
|
|
24
|
+
CLOSED: Normal operation, requests are allowed
|
|
25
|
+
OPEN: Circuit is open, requests are rejected immediately
|
|
26
|
+
HALF_OPEN: Testing state, allows one request to test if service recovered
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
CLOSED = "closed"
|
|
30
|
+
OPEN = "open"
|
|
31
|
+
HALF_OPEN = "half_open"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class CircuitBreaker:
|
|
35
|
+
"""Circuit breaker pattern implementation for resilient request handling.
|
|
36
|
+
|
|
37
|
+
The circuit breaker prevents cascading failures by opening the circuit
|
|
38
|
+
after a threshold of consecutive failures, then attempting to recover
|
|
39
|
+
after a timeout period.
|
|
40
|
+
|
|
41
|
+
States:
|
|
42
|
+
- CLOSED: Normal operation, all requests allowed
|
|
43
|
+
- OPEN: Circuit is open, all requests rejected immediately
|
|
44
|
+
- HALF_OPEN: Testing state, allows one request to test recovery
|
|
45
|
+
|
|
46
|
+
This implementation is thread-safe using RLock for concurrent access.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
threshold: int = DEFAULT_CIRCUIT_BREAKER_THRESHOLD,
|
|
52
|
+
timeout: float = DEFAULT_CIRCUIT_BREAKER_TIMEOUT,
|
|
53
|
+
) -> None:
|
|
54
|
+
"""Initialize circuit breaker.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
threshold: Number of consecutive failures before opening (default: 5)
|
|
58
|
+
timeout: Seconds before transitioning OPEN -> HALF_OPEN (default: 60.0)
|
|
59
|
+
"""
|
|
60
|
+
self.threshold = threshold
|
|
61
|
+
self.timeout = timeout
|
|
62
|
+
self._state = CircuitState.CLOSED
|
|
63
|
+
self._consecutive_failures = 0
|
|
64
|
+
self._last_failure_time: float | None = None
|
|
65
|
+
self._lock = threading.RLock()
|
|
66
|
+
|
|
67
|
+
def record_success(self) -> None:
|
|
68
|
+
"""Record a successful request.
|
|
69
|
+
|
|
70
|
+
Resets failure count and closes circuit if it was HALF_OPEN.
|
|
71
|
+
"""
|
|
72
|
+
with self._lock:
|
|
73
|
+
self._consecutive_failures = 0
|
|
74
|
+
if self._state == CircuitState.HALF_OPEN:
|
|
75
|
+
self._state = CircuitState.CLOSED
|
|
76
|
+
self._last_failure_time = None
|
|
77
|
+
|
|
78
|
+
def record_failure(self) -> None:
|
|
79
|
+
"""Record a failed request.
|
|
80
|
+
|
|
81
|
+
Increments failure count and opens circuit if threshold is reached.
|
|
82
|
+
"""
|
|
83
|
+
with self._lock:
|
|
84
|
+
self._consecutive_failures += 1
|
|
85
|
+
self._last_failure_time = time.time()
|
|
86
|
+
|
|
87
|
+
if self._consecutive_failures >= self.threshold and self._state == CircuitState.CLOSED:
|
|
88
|
+
self._state = CircuitState.OPEN
|
|
89
|
+
|
|
90
|
+
def can_attempt(self) -> bool:
|
|
91
|
+
"""Check if a request can be attempted.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
True if request can be attempted, False if circuit is open
|
|
95
|
+
"""
|
|
96
|
+
with self._lock:
|
|
97
|
+
# Check if we should transition from OPEN to HALF_OPEN
|
|
98
|
+
if self._state == CircuitState.OPEN:
|
|
99
|
+
if self._last_failure_time is not None:
|
|
100
|
+
elapsed = time.time() - self._last_failure_time
|
|
101
|
+
if elapsed >= self.timeout:
|
|
102
|
+
# Transition to HALF_OPEN to test recovery
|
|
103
|
+
self._state = CircuitState.HALF_OPEN
|
|
104
|
+
return True
|
|
105
|
+
# Still in OPEN state, reject request
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
# CLOSED or HALF_OPEN: allow request
|
|
109
|
+
return True
|
|
110
|
+
|
|
111
|
+
def get_state(self) -> CircuitState:
|
|
112
|
+
"""Get current circuit state.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Current circuit state
|
|
116
|
+
"""
|
|
117
|
+
with self._lock:
|
|
118
|
+
return self._state
|
|
119
|
+
|
|
120
|
+
def get_consecutive_failures(self) -> int:
|
|
121
|
+
"""Get number of consecutive failures.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Number of consecutive failures
|
|
125
|
+
"""
|
|
126
|
+
with self._lock:
|
|
127
|
+
return self._consecutive_failures
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class CircuitBreakerRegistry:
|
|
131
|
+
"""Registry for managing shared CircuitBreaker instances.
|
|
132
|
+
|
|
133
|
+
Ensures that multiple clients connecting to the same implementation
|
|
134
|
+
share the same circuit breaker state.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
def __init__(self) -> None:
|
|
138
|
+
"""Initialize registry."""
|
|
139
|
+
self._breakers: Dict[str, CircuitBreaker] = {}
|
|
140
|
+
self._lock = threading.RLock()
|
|
141
|
+
|
|
142
|
+
def get_or_create(
|
|
143
|
+
self,
|
|
144
|
+
base_url: str,
|
|
145
|
+
threshold: int = DEFAULT_CIRCUIT_BREAKER_THRESHOLD,
|
|
146
|
+
timeout: float = DEFAULT_CIRCUIT_BREAKER_TIMEOUT,
|
|
147
|
+
) -> CircuitBreaker:
|
|
148
|
+
"""Get existing circuit breaker or create a new one.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
base_url: The target URL (key for the registry)
|
|
152
|
+
threshold: Threshold for new breakers (ignored if exists)
|
|
153
|
+
timeout: Timeout for new breakers (ignored if exists)
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Shared CircuitBreaker instance
|
|
157
|
+
"""
|
|
158
|
+
with self._lock:
|
|
159
|
+
if base_url not in self._breakers:
|
|
160
|
+
logger.info(
|
|
161
|
+
"asap.circuit_breaker.created",
|
|
162
|
+
base_url=base_url,
|
|
163
|
+
threshold=threshold,
|
|
164
|
+
timeout=timeout,
|
|
165
|
+
message=f"Created shared circuit breaker for {base_url}",
|
|
166
|
+
)
|
|
167
|
+
self._breakers[base_url] = CircuitBreaker(threshold=threshold, timeout=timeout)
|
|
168
|
+
return self._breakers[base_url]
|
|
169
|
+
|
|
170
|
+
def clear(self) -> None:
|
|
171
|
+
"""Clear all registered circuit breakers (mostly for testing)."""
|
|
172
|
+
with self._lock:
|
|
173
|
+
self._breakers.clear()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# Global registry instance
|
|
177
|
+
# In a more complex app, this might be injected, but a module-level singleton
|
|
178
|
+
# is standard for this pattern in Python clients.
|
|
179
|
+
_registry = CircuitBreakerRegistry()
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def get_circuit_breaker(
|
|
183
|
+
base_url: str,
|
|
184
|
+
threshold: int = DEFAULT_CIRCUIT_BREAKER_THRESHOLD,
|
|
185
|
+
timeout: float = DEFAULT_CIRCUIT_BREAKER_TIMEOUT,
|
|
186
|
+
) -> CircuitBreaker:
|
|
187
|
+
"""Helper to get a circuit breaker from the global registry."""
|
|
188
|
+
return _registry.get_or_create(base_url, threshold, timeout)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def get_registry() -> CircuitBreakerRegistry:
|
|
192
|
+
"""Helper to get the global registry instance."""
|
|
193
|
+
return _registry
|