asap-protocol 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- asap/__init__.py +1 -1
- asap/errors.py +167 -0
- asap/examples/README.md +3 -0
- asap/examples/run_demo.py +9 -2
- asap/models/__init__.py +4 -0
- asap/models/constants.py +73 -0
- asap/models/entities.py +38 -2
- asap/models/envelope.py +7 -1
- asap/transport/__init__.py +3 -0
- asap/transport/circuit_breaker.py +193 -0
- asap/transport/client.py +588 -53
- asap/transport/middleware.py +6 -5
- asap/transport/server.py +80 -3
- asap/transport/validators.py +324 -0
- asap/utils/__init__.py +7 -0
- asap/utils/sanitization.py +139 -0
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/METADATA +22 -5
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/RECORD +21 -17
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/WHEEL +0 -0
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/entry_points.txt +0 -0
- {asap_protocol-0.3.0.dist-info → asap_protocol-0.5.0.dist-info}/licenses/LICENSE +0 -0
asap/transport/client.py
CHANGED
|
@@ -20,15 +20,30 @@ Example:
|
|
|
20
20
|
... print(response.payload_type)
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
+
import asyncio
|
|
24
|
+
import itertools
|
|
25
|
+
import random
|
|
23
26
|
import time
|
|
24
|
-
from
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
from email.utils import parsedate_to_datetime
|
|
29
|
+
from typing import Any, Optional
|
|
30
|
+
from urllib.parse import ParseResult
|
|
25
31
|
|
|
26
32
|
import httpx
|
|
27
33
|
|
|
34
|
+
from asap.errors import CircuitOpenError
|
|
35
|
+
from asap.models.constants import (
|
|
36
|
+
DEFAULT_BASE_DELAY,
|
|
37
|
+
DEFAULT_CIRCUIT_BREAKER_THRESHOLD,
|
|
38
|
+
DEFAULT_CIRCUIT_BREAKER_TIMEOUT,
|
|
39
|
+
DEFAULT_MAX_DELAY,
|
|
40
|
+
)
|
|
28
41
|
from asap.models.envelope import Envelope
|
|
29
42
|
from asap.models.ids import generate_id
|
|
30
43
|
from asap.observability import get_logger
|
|
44
|
+
from asap.transport.circuit_breaker import CircuitBreaker, CircuitState, get_registry
|
|
31
45
|
from asap.transport.jsonrpc import ASAP_METHOD
|
|
46
|
+
from asap.utils.sanitization import sanitize_url
|
|
32
47
|
|
|
33
48
|
# Module logger
|
|
34
49
|
logger = get_logger(__name__)
|
|
@@ -40,6 +55,32 @@ DEFAULT_TIMEOUT = 60.0
|
|
|
40
55
|
DEFAULT_MAX_RETRIES = 3
|
|
41
56
|
|
|
42
57
|
|
|
58
|
+
@dataclass
|
|
59
|
+
class RetryConfig:
|
|
60
|
+
"""Configuration for retry logic and circuit breaker.
|
|
61
|
+
|
|
62
|
+
Groups retry and circuit breaker parameters to simplify client initialization
|
|
63
|
+
and avoid boolean trap issues.
|
|
64
|
+
|
|
65
|
+
Attributes:
|
|
66
|
+
max_retries: Maximum retry attempts for transient failures (default: 3)
|
|
67
|
+
base_delay: Base delay in seconds for exponential backoff (default: 1.0)
|
|
68
|
+
max_delay: Maximum delay in seconds for exponential backoff (default: 60.0)
|
|
69
|
+
jitter: Whether to add random jitter to backoff delays (default: True)
|
|
70
|
+
circuit_breaker_enabled: Enable circuit breaker pattern (default: False)
|
|
71
|
+
circuit_breaker_threshold: Number of consecutive failures before opening circuit (default: 5)
|
|
72
|
+
circuit_breaker_timeout: Seconds before transitioning OPEN -> HALF_OPEN (default: 60.0)
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
max_retries: int = DEFAULT_MAX_RETRIES
|
|
76
|
+
base_delay: float = DEFAULT_BASE_DELAY
|
|
77
|
+
max_delay: float = DEFAULT_MAX_DELAY
|
|
78
|
+
jitter: bool = True
|
|
79
|
+
circuit_breaker_enabled: bool = False
|
|
80
|
+
circuit_breaker_threshold: int = DEFAULT_CIRCUIT_BREAKER_THRESHOLD
|
|
81
|
+
circuit_breaker_timeout: float = DEFAULT_CIRCUIT_BREAKER_TIMEOUT
|
|
82
|
+
|
|
83
|
+
|
|
43
84
|
class ASAPConnectionError(Exception):
|
|
44
85
|
"""Raised when connection to remote agent fails.
|
|
45
86
|
|
|
@@ -47,20 +88,36 @@ class ASAPConnectionError(Exception):
|
|
|
47
88
|
or when the remote server returns an HTTP error status.
|
|
48
89
|
|
|
49
90
|
Attributes:
|
|
50
|
-
message: Error description
|
|
91
|
+
message: Error description with troubleshooting suggestions
|
|
51
92
|
cause: Original exception that caused this error
|
|
93
|
+
url: URL that failed to connect (if available)
|
|
52
94
|
"""
|
|
53
95
|
|
|
54
|
-
def __init__(
|
|
96
|
+
def __init__(
|
|
97
|
+
self, message: str, cause: Exception | None = None, url: str | None = None
|
|
98
|
+
) -> None:
|
|
55
99
|
"""Initialize connection error.
|
|
56
100
|
|
|
57
101
|
Args:
|
|
58
102
|
message: Error description
|
|
59
103
|
cause: Original exception that caused this error
|
|
104
|
+
url: URL that failed to connect (for better error messages)
|
|
60
105
|
"""
|
|
61
|
-
|
|
62
|
-
|
|
106
|
+
# Enhance message with troubleshooting suggestions if URL is provided
|
|
107
|
+
if url and "Verify" not in message and "troubleshooting" not in message.lower():
|
|
108
|
+
enhanced_message = (
|
|
109
|
+
f"{message}\n"
|
|
110
|
+
f"Troubleshooting: Connection failed to {url}. "
|
|
111
|
+
"Verify the agent is running and accessible. "
|
|
112
|
+
"Check the URL format, network connectivity, and firewall settings."
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
enhanced_message = message
|
|
116
|
+
|
|
117
|
+
super().__init__(enhanced_message)
|
|
118
|
+
self.message = enhanced_message
|
|
63
119
|
self.cause = cause
|
|
120
|
+
self.url = url
|
|
64
121
|
|
|
65
122
|
|
|
66
123
|
class ASAPTimeoutError(Exception):
|
|
@@ -125,28 +182,101 @@ class ASAPClient:
|
|
|
125
182
|
base_url: Base URL of the remote agent
|
|
126
183
|
timeout: Request timeout in seconds
|
|
127
184
|
max_retries: Maximum retry attempts for transient failures
|
|
185
|
+
require_https: Whether HTTPS is required for non-localhost connections
|
|
128
186
|
is_connected: Whether the client has an active connection
|
|
187
|
+
_circuit_breaker: Optional circuit breaker instance
|
|
129
188
|
|
|
130
189
|
Example:
|
|
131
190
|
>>> async with ASAPClient("http://localhost:8000") as client:
|
|
132
191
|
... response = await client.send(envelope)
|
|
133
192
|
"""
|
|
134
193
|
|
|
194
|
+
_circuit_breaker: Optional[CircuitBreaker]
|
|
195
|
+
|
|
135
196
|
def __init__(
|
|
136
197
|
self,
|
|
137
198
|
base_url: str,
|
|
138
199
|
timeout: float = DEFAULT_TIMEOUT,
|
|
139
|
-
|
|
140
|
-
|
|
200
|
+
transport: httpx.AsyncBaseTransport | None = None,
|
|
201
|
+
require_https: bool = True,
|
|
202
|
+
retry_config: Optional[RetryConfig] = None,
|
|
203
|
+
# Individual retry parameters (for backward compatibility)
|
|
204
|
+
# If retry_config is provided, these are ignored
|
|
205
|
+
max_retries: int | None = None,
|
|
206
|
+
base_delay: float | None = None,
|
|
207
|
+
max_delay: float | None = None,
|
|
208
|
+
jitter: bool | None = None,
|
|
209
|
+
circuit_breaker_enabled: bool | None = None,
|
|
210
|
+
circuit_breaker_threshold: int | None = None,
|
|
211
|
+
circuit_breaker_timeout: float | None = None,
|
|
141
212
|
) -> None:
|
|
142
213
|
"""Initialize ASAP client.
|
|
143
214
|
|
|
144
215
|
Args:
|
|
145
216
|
base_url: Base URL of the remote agent (e.g., "http://localhost:8000")
|
|
146
217
|
timeout: Request timeout in seconds (default: 60)
|
|
147
|
-
|
|
148
|
-
|
|
218
|
+
transport: Optional custom async transport (for testing). Must be an instance
|
|
219
|
+
of httpx.AsyncBaseTransport (e.g., httpx.MockTransport).
|
|
220
|
+
require_https: If True, enforces HTTPS for non-localhost connections (default: True).
|
|
221
|
+
HTTP connections to localhost are allowed with a warning for development.
|
|
222
|
+
retry_config: Optional RetryConfig dataclass to group retry and circuit breaker parameters.
|
|
223
|
+
If provided, individual retry parameters are ignored.
|
|
224
|
+
max_retries: Maximum retry attempts for transient failures (default: 3).
|
|
225
|
+
Ignored if retry_config is provided.
|
|
226
|
+
base_delay: Base delay in seconds for exponential backoff (default: 1.0).
|
|
227
|
+
Ignored if retry_config is provided.
|
|
228
|
+
max_delay: Maximum delay in seconds for exponential backoff (default: 60.0).
|
|
229
|
+
Ignored if retry_config is provided.
|
|
230
|
+
jitter: Whether to add random jitter to backoff delays (default: True).
|
|
231
|
+
Ignored if retry_config is provided.
|
|
232
|
+
circuit_breaker_enabled: Enable circuit breaker pattern (default: False).
|
|
233
|
+
Ignored if retry_config is provided.
|
|
234
|
+
circuit_breaker_threshold: Number of consecutive failures before opening circuit (default: 5).
|
|
235
|
+
Ignored if retry_config is provided.
|
|
236
|
+
circuit_breaker_timeout: Seconds before transitioning OPEN -> HALF_OPEN (default: 60.0).
|
|
237
|
+
Ignored if retry_config is provided.
|
|
238
|
+
|
|
239
|
+
Raises:
|
|
240
|
+
ValueError: If URL format is invalid, scheme is not HTTP/HTTPS, or HTTPS is
|
|
241
|
+
required but URL uses HTTP for non-localhost connections.
|
|
242
|
+
|
|
243
|
+
Example:
|
|
244
|
+
>>> # Using individual parameters (backward compatible)
|
|
245
|
+
>>> client = ASAPClient("http://localhost:8000", max_retries=5)
|
|
246
|
+
>>>
|
|
247
|
+
>>> # Using RetryConfig (recommended)
|
|
248
|
+
>>> config = RetryConfig(max_retries=5, circuit_breaker_enabled=True)
|
|
249
|
+
>>> client = ASAPClient("http://localhost:8000", retry_config=config)
|
|
149
250
|
"""
|
|
251
|
+
# Extract retry config values
|
|
252
|
+
if retry_config is not None:
|
|
253
|
+
# Use retry_config values
|
|
254
|
+
max_retries_val = retry_config.max_retries
|
|
255
|
+
base_delay_val = retry_config.base_delay
|
|
256
|
+
max_delay_val = retry_config.max_delay
|
|
257
|
+
jitter_val = retry_config.jitter
|
|
258
|
+
circuit_breaker_enabled_val = retry_config.circuit_breaker_enabled
|
|
259
|
+
circuit_breaker_threshold_val = retry_config.circuit_breaker_threshold
|
|
260
|
+
circuit_breaker_timeout_val = retry_config.circuit_breaker_timeout
|
|
261
|
+
else:
|
|
262
|
+
# Use individual parameters with defaults
|
|
263
|
+
max_retries_val = max_retries if max_retries is not None else DEFAULT_MAX_RETRIES
|
|
264
|
+
base_delay_val = base_delay if base_delay is not None else DEFAULT_BASE_DELAY
|
|
265
|
+
max_delay_val = max_delay if max_delay is not None else DEFAULT_MAX_DELAY
|
|
266
|
+
jitter_val = jitter if jitter is not None else True
|
|
267
|
+
circuit_breaker_enabled_val = (
|
|
268
|
+
circuit_breaker_enabled if circuit_breaker_enabled is not None else False
|
|
269
|
+
)
|
|
270
|
+
circuit_breaker_threshold_val = (
|
|
271
|
+
circuit_breaker_threshold
|
|
272
|
+
if circuit_breaker_threshold is not None
|
|
273
|
+
else DEFAULT_CIRCUIT_BREAKER_THRESHOLD
|
|
274
|
+
)
|
|
275
|
+
circuit_breaker_timeout_val = (
|
|
276
|
+
circuit_breaker_timeout
|
|
277
|
+
if circuit_breaker_timeout is not None
|
|
278
|
+
else DEFAULT_CIRCUIT_BREAKER_TIMEOUT
|
|
279
|
+
)
|
|
150
280
|
# Validate URL format and scheme
|
|
151
281
|
from urllib.parse import urlparse
|
|
152
282
|
|
|
@@ -163,12 +293,184 @@ class ASAPClient:
|
|
|
163
293
|
f"Received: {base_url}"
|
|
164
294
|
)
|
|
165
295
|
|
|
296
|
+
# Validate HTTPS requirement
|
|
297
|
+
is_https = parsed.scheme.lower() == "https"
|
|
298
|
+
is_local = self._is_localhost(parsed)
|
|
299
|
+
|
|
300
|
+
if require_https and not is_https:
|
|
301
|
+
if is_local:
|
|
302
|
+
# Allow HTTP for localhost with warning
|
|
303
|
+
logger.warning(
|
|
304
|
+
"asap.client.http_localhost",
|
|
305
|
+
url=base_url,
|
|
306
|
+
message=(
|
|
307
|
+
"Using HTTP for localhost connection. "
|
|
308
|
+
"For production, use HTTPS. "
|
|
309
|
+
"To disable this warning, set require_https=False."
|
|
310
|
+
),
|
|
311
|
+
)
|
|
312
|
+
else:
|
|
313
|
+
# Reject HTTP for non-localhost
|
|
314
|
+
raise ValueError(
|
|
315
|
+
f"HTTPS is required for non-localhost connections. "
|
|
316
|
+
f"Received HTTP URL: {base_url}. "
|
|
317
|
+
f"Please use HTTPS or set require_https=False to override "
|
|
318
|
+
f"(not recommended for production)."
|
|
319
|
+
)
|
|
320
|
+
|
|
166
321
|
self.base_url = base_url.rstrip("/")
|
|
167
322
|
self.timeout = timeout
|
|
168
|
-
self.max_retries =
|
|
323
|
+
self.max_retries = max_retries_val
|
|
324
|
+
self.require_https = require_https
|
|
325
|
+
self.base_delay = base_delay_val
|
|
326
|
+
self.max_delay = max_delay_val
|
|
327
|
+
self.jitter = jitter_val
|
|
328
|
+
self.circuit_breaker_enabled = circuit_breaker_enabled_val
|
|
169
329
|
self._transport = transport
|
|
170
330
|
self._client: httpx.AsyncClient | None = None
|
|
171
|
-
|
|
331
|
+
# Thread-safe counter using itertools.count
|
|
332
|
+
self._request_counter = itertools.count(1)
|
|
333
|
+
|
|
334
|
+
# Initialize circuit breaker if enabled
|
|
335
|
+
# Use registry to ensure state is shared across multiple client instances
|
|
336
|
+
# for the same base_url
|
|
337
|
+
if circuit_breaker_enabled_val:
|
|
338
|
+
registry = get_registry()
|
|
339
|
+
self._circuit_breaker = registry.get_or_create(
|
|
340
|
+
base_url=sanitize_url(self.base_url),
|
|
341
|
+
threshold=circuit_breaker_threshold_val,
|
|
342
|
+
timeout=circuit_breaker_timeout_val,
|
|
343
|
+
)
|
|
344
|
+
else:
|
|
345
|
+
self._circuit_breaker = None
|
|
346
|
+
|
|
347
|
+
@staticmethod
|
|
348
|
+
def _is_localhost(parsed_url: ParseResult) -> bool:
|
|
349
|
+
"""Check if URL points to localhost.
|
|
350
|
+
|
|
351
|
+
Detects localhost, 127.0.0.1, and ::1 (IPv6 localhost).
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
parsed_url: Parsed URL from urlparse
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
True if URL points to localhost, False otherwise
|
|
358
|
+
"""
|
|
359
|
+
hostname = parsed_url.hostname
|
|
360
|
+
if not hostname:
|
|
361
|
+
return False
|
|
362
|
+
|
|
363
|
+
hostname_lower = hostname.lower()
|
|
364
|
+
# Handle both ::1 and [::1] (bracket notation from URL parsing)
|
|
365
|
+
return hostname_lower in ("localhost", "127.0.0.1", "::1", "[::1]")
|
|
366
|
+
|
|
367
|
+
def _calculate_backoff(self, attempt: int) -> float:
|
|
368
|
+
"""Calculate exponential backoff delay for retry attempt.
|
|
369
|
+
|
|
370
|
+
Implements exponential backoff with optional jitter:
|
|
371
|
+
delay = base_delay * (2 ** attempt) + jitter
|
|
372
|
+
|
|
373
|
+
The delay is capped at max_delay to prevent excessively long waits.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
attempt: Zero-based attempt number (0 = first retry)
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
Delay in seconds before next retry attempt
|
|
380
|
+
"""
|
|
381
|
+
# Calculate exponential delay: base_delay * (2 ** attempt)
|
|
382
|
+
delay = self.base_delay * (2**attempt)
|
|
383
|
+
|
|
384
|
+
# Cap at max_delay
|
|
385
|
+
delay = min(delay, self.max_delay)
|
|
386
|
+
|
|
387
|
+
# Add jitter if enabled (random value between 0 and 10% of delay)
|
|
388
|
+
# Note: random.uniform is appropriate here - jitter for retry backoff
|
|
389
|
+
# does not require cryptographic security, only statistical distribution
|
|
390
|
+
if self.jitter:
|
|
391
|
+
jitter_amount: float = random.uniform(0, delay * 0.1) # nosec B311
|
|
392
|
+
delay += jitter_amount
|
|
393
|
+
|
|
394
|
+
return float(delay)
|
|
395
|
+
|
|
396
|
+
async def _validate_connection(self) -> bool:
|
|
397
|
+
"""Validate that the agent endpoint is accessible.
|
|
398
|
+
|
|
399
|
+
Performs a pre-flight check by attempting to access the agent's
|
|
400
|
+
manifest endpoint. This can be used to detect connection issues
|
|
401
|
+
before sending actual requests.
|
|
402
|
+
|
|
403
|
+
Note: This is an optional validation step that can be disabled
|
|
404
|
+
for performance reasons in production environments.
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
True if connection is valid, False otherwise
|
|
408
|
+
|
|
409
|
+
Raises:
|
|
410
|
+
ASAPConnectionError: If connection validation fails
|
|
411
|
+
"""
|
|
412
|
+
if not self._client:
|
|
413
|
+
raise ASAPConnectionError(
|
|
414
|
+
"Client not connected. Use 'async with' context.",
|
|
415
|
+
url=sanitize_url(self.base_url),
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
# Try to access a lightweight endpoint (manifest or health check)
|
|
420
|
+
# Using HEAD request to minimize bandwidth
|
|
421
|
+
response = await self._client.head(
|
|
422
|
+
f"{self.base_url}/.well-known/asap/manifest.json",
|
|
423
|
+
timeout=min(self.timeout, 5.0), # Shorter timeout for validation
|
|
424
|
+
)
|
|
425
|
+
# Any 2xx or 3xx response indicates the server is reachable
|
|
426
|
+
is_valid = 200 <= response.status_code < 400
|
|
427
|
+
if not is_valid:
|
|
428
|
+
logger.warning(
|
|
429
|
+
"asap.client.connection_validation_failed",
|
|
430
|
+
target_url=sanitize_url(self.base_url),
|
|
431
|
+
status_code=response.status_code,
|
|
432
|
+
message=(
|
|
433
|
+
f"Connection validation failed for {self.base_url}. "
|
|
434
|
+
f"Server returned status {response.status_code}. "
|
|
435
|
+
f"Verify the agent is running and the URL is correct."
|
|
436
|
+
),
|
|
437
|
+
)
|
|
438
|
+
return is_valid
|
|
439
|
+
except httpx.ConnectError as e:
|
|
440
|
+
logger.warning(
|
|
441
|
+
"asap.client.connection_validation_failed",
|
|
442
|
+
target_url=sanitize_url(self.base_url),
|
|
443
|
+
error=str(e),
|
|
444
|
+
message=(
|
|
445
|
+
f"Connection validation failed for {self.base_url}. "
|
|
446
|
+
f"Cannot reach the agent. Verify the agent is running and accessible. "
|
|
447
|
+
f"Error: {str(e)[:200]}"
|
|
448
|
+
),
|
|
449
|
+
)
|
|
450
|
+
return False
|
|
451
|
+
except httpx.TimeoutException:
|
|
452
|
+
logger.warning(
|
|
453
|
+
"asap.client.connection_validation_timeout",
|
|
454
|
+
target_url=sanitize_url(self.base_url),
|
|
455
|
+
timeout=self.timeout,
|
|
456
|
+
message=(
|
|
457
|
+
f"Connection validation timed out for {self.base_url}. "
|
|
458
|
+
f"Check network connectivity and firewall settings."
|
|
459
|
+
),
|
|
460
|
+
)
|
|
461
|
+
return False
|
|
462
|
+
except Exception as e:
|
|
463
|
+
logger.warning(
|
|
464
|
+
"asap.client.connection_validation_error",
|
|
465
|
+
target_url=sanitize_url(self.base_url),
|
|
466
|
+
error=str(e),
|
|
467
|
+
error_type=type(e).__name__,
|
|
468
|
+
message=(
|
|
469
|
+
f"Connection validation encountered an error for {self.base_url}: {e}. "
|
|
470
|
+
f"Verify the agent is running and accessible."
|
|
471
|
+
),
|
|
472
|
+
)
|
|
473
|
+
return False
|
|
172
474
|
|
|
173
475
|
@property
|
|
174
476
|
def is_connected(self) -> bool:
|
|
@@ -179,10 +481,8 @@ class ASAPClient:
|
|
|
179
481
|
"""Enter async context and open connection."""
|
|
180
482
|
# Create the async client
|
|
181
483
|
if self._transport:
|
|
182
|
-
# MockTransport works for both sync and async, so we cast it
|
|
183
|
-
# This is safe because httpx.MockTransport is compatible with async usage
|
|
184
484
|
self._client = httpx.AsyncClient(
|
|
185
|
-
transport=self._transport,
|
|
485
|
+
transport=self._transport,
|
|
186
486
|
timeout=self.timeout,
|
|
187
487
|
)
|
|
188
488
|
else:
|
|
@@ -215,35 +515,56 @@ class ASAPClient:
|
|
|
215
515
|
Response envelope from the remote agent
|
|
216
516
|
|
|
217
517
|
Raises:
|
|
518
|
+
ValueError: If envelope is None
|
|
218
519
|
ASAPConnectionError: If connection fails or HTTP error occurs
|
|
219
520
|
ASAPTimeoutError: If request times out
|
|
220
521
|
ASAPRemoteError: If remote agent returns JSON-RPC error
|
|
522
|
+
CircuitOpenError: If circuit breaker is open and request is rejected
|
|
221
523
|
|
|
222
524
|
Example:
|
|
223
525
|
>>> async with ASAPClient("http://localhost:8000") as client:
|
|
224
526
|
... response = await client.send(envelope)
|
|
225
527
|
... response.payload_type
|
|
226
528
|
"""
|
|
529
|
+
if envelope is None:
|
|
530
|
+
raise ValueError("envelope cannot be None")
|
|
531
|
+
|
|
227
532
|
if not self._client:
|
|
228
|
-
raise ASAPConnectionError(
|
|
533
|
+
raise ASAPConnectionError(
|
|
534
|
+
"Client not connected. Use 'async with' context.",
|
|
535
|
+
url=sanitize_url(self.base_url),
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
# Check circuit breaker state before attempting request
|
|
539
|
+
if self._circuit_breaker is not None and not self._circuit_breaker.can_attempt():
|
|
540
|
+
consecutive_failures = self._circuit_breaker.get_consecutive_failures()
|
|
541
|
+
raise CircuitOpenError(
|
|
542
|
+
base_url=sanitize_url(self.base_url),
|
|
543
|
+
consecutive_failures=consecutive_failures,
|
|
544
|
+
)
|
|
229
545
|
|
|
230
546
|
start_time = time.perf_counter()
|
|
231
547
|
|
|
232
548
|
# Generate idempotency key for retries
|
|
233
549
|
idempotency_key = generate_id()
|
|
234
550
|
|
|
235
|
-
#
|
|
236
|
-
self._request_counter
|
|
237
|
-
request_id = f"req-{self._request_counter}"
|
|
551
|
+
# Get next request counter value (thread-safe)
|
|
552
|
+
request_id = f"req-{next(self._request_counter)}"
|
|
238
553
|
|
|
239
|
-
# Log send attempt
|
|
554
|
+
# Log send attempt with context (sanitize URL to hide credentials)
|
|
555
|
+
sanitized_url = sanitize_url(self.base_url)
|
|
240
556
|
logger.info(
|
|
241
557
|
"asap.client.send",
|
|
242
|
-
target_url=
|
|
558
|
+
target_url=sanitized_url,
|
|
243
559
|
envelope_id=envelope.id,
|
|
244
560
|
trace_id=envelope.trace_id,
|
|
245
561
|
payload_type=envelope.payload_type,
|
|
246
562
|
idempotency_key=idempotency_key,
|
|
563
|
+
max_retries=self.max_retries,
|
|
564
|
+
message=(
|
|
565
|
+
f"Sending envelope {envelope.id} to {sanitized_url} "
|
|
566
|
+
f"(payload: {envelope.payload_type}, max_retries: {self.max_retries})"
|
|
567
|
+
),
|
|
247
568
|
)
|
|
248
569
|
|
|
249
570
|
# Build JSON-RPC request
|
|
@@ -273,24 +594,166 @@ class ASAPClient:
|
|
|
273
594
|
# Check HTTP status
|
|
274
595
|
if response.status_code >= 500:
|
|
275
596
|
# Server errors (5xx) are retriable
|
|
597
|
+
error_msg = (
|
|
598
|
+
f"HTTP server error {response.status_code} from {self.base_url}. "
|
|
599
|
+
f"Server returned: {response.text[:200]}"
|
|
600
|
+
)
|
|
276
601
|
if attempt < self.max_retries - 1:
|
|
602
|
+
delay = self._calculate_backoff(attempt)
|
|
277
603
|
logger.warning(
|
|
278
604
|
"asap.client.server_error",
|
|
279
605
|
status_code=response.status_code,
|
|
280
606
|
attempt=attempt + 1,
|
|
281
607
|
max_retries=self.max_retries,
|
|
608
|
+
delay_seconds=round(delay, 2),
|
|
609
|
+
target_url=sanitize_url(self.base_url),
|
|
610
|
+
message=f"Server error {response.status_code}, retrying in {delay:.2f}s (attempt {attempt + 1}/{self.max_retries})",
|
|
611
|
+
)
|
|
612
|
+
logger.info(
|
|
613
|
+
"asap.client.retry",
|
|
614
|
+
target_url=sanitize_url(self.base_url),
|
|
615
|
+
envelope_id=envelope.id,
|
|
616
|
+
attempt=attempt + 1,
|
|
617
|
+
max_retries=self.max_retries,
|
|
618
|
+
delay_seconds=round(delay, 2),
|
|
619
|
+
)
|
|
620
|
+
await asyncio.sleep(delay)
|
|
621
|
+
last_exception = ASAPConnectionError(error_msg, url=self.base_url)
|
|
622
|
+
continue
|
|
623
|
+
# All retries exhausted, record failure in circuit breaker
|
|
624
|
+
if self._circuit_breaker is not None:
|
|
625
|
+
previous_state = self._circuit_breaker.get_state()
|
|
626
|
+
self._circuit_breaker.record_failure()
|
|
627
|
+
current_state = self._circuit_breaker.get_state()
|
|
628
|
+
consecutive_failures = self._circuit_breaker.get_consecutive_failures()
|
|
629
|
+
# Log state change if circuit opened
|
|
630
|
+
if previous_state != current_state and current_state == CircuitState.OPEN:
|
|
631
|
+
logger.warning(
|
|
632
|
+
"asap.client.circuit_opened",
|
|
633
|
+
target_url=sanitize_url(self.base_url),
|
|
634
|
+
consecutive_failures=consecutive_failures,
|
|
635
|
+
threshold=self._circuit_breaker.threshold,
|
|
636
|
+
message=f"Circuit breaker opened after {consecutive_failures} consecutive failures",
|
|
637
|
+
)
|
|
638
|
+
raise ASAPConnectionError(error_msg, url=self.base_url)
|
|
639
|
+
if response.status_code == 429:
|
|
640
|
+
# Rate limit (429) is retriable, respect Retry-After header
|
|
641
|
+
if attempt < self.max_retries - 1:
|
|
642
|
+
# Check for Retry-After header
|
|
643
|
+
retry_after = response.headers.get("Retry-After")
|
|
644
|
+
if retry_after:
|
|
645
|
+
retry_delay: Optional[float] = None
|
|
646
|
+
# Retry-After can be seconds (int/float) or HTTP date
|
|
647
|
+
# First, try to parse as seconds (numeric)
|
|
648
|
+
if retry_after.replace(".", "", 1).isdigit():
|
|
649
|
+
try:
|
|
650
|
+
retry_delay = float(retry_after)
|
|
651
|
+
logger.info(
|
|
652
|
+
"asap.client.retry_after",
|
|
653
|
+
target_url=sanitize_url(self.base_url),
|
|
654
|
+
envelope_id=envelope.id,
|
|
655
|
+
attempt=attempt + 1,
|
|
656
|
+
retry_after_seconds=retry_delay,
|
|
657
|
+
message=f"Respecting server Retry-After: {retry_delay}s",
|
|
658
|
+
)
|
|
659
|
+
except ValueError:
|
|
660
|
+
pass # Fall through to date parsing
|
|
661
|
+
else:
|
|
662
|
+
# Try to parse as HTTP date
|
|
663
|
+
try:
|
|
664
|
+
retry_date = parsedate_to_datetime(retry_after)
|
|
665
|
+
if retry_date:
|
|
666
|
+
# Calculate delay in seconds from now until retry_date
|
|
667
|
+
now_timestamp = time.time()
|
|
668
|
+
retry_timestamp = retry_date.timestamp()
|
|
669
|
+
calculated_delay = retry_timestamp - now_timestamp
|
|
670
|
+
# If date is in the past or delay is invalid, fall back to calculated backoff
|
|
671
|
+
if calculated_delay <= 0:
|
|
672
|
+
retry_delay = None # Will trigger fallback
|
|
673
|
+
else:
|
|
674
|
+
retry_delay = calculated_delay
|
|
675
|
+
logger.info(
|
|
676
|
+
"asap.client.retry_after",
|
|
677
|
+
target_url=sanitize_url(self.base_url),
|
|
678
|
+
envelope_id=envelope.id,
|
|
679
|
+
attempt=attempt + 1,
|
|
680
|
+
retry_after_seconds=round(retry_delay, 2),
|
|
681
|
+
retry_after_date=retry_after,
|
|
682
|
+
message=f"Respecting server Retry-After date: {retry_after} ({retry_delay:.2f}s)",
|
|
683
|
+
)
|
|
684
|
+
except (ValueError, TypeError, AttributeError, OSError):
|
|
685
|
+
# Invalid date format or timestamp conversion error, fall back to calculated backoff
|
|
686
|
+
pass
|
|
687
|
+
|
|
688
|
+
# If parsing failed or delay is invalid (None or <= 0), use calculated backoff
|
|
689
|
+
if retry_delay is None or retry_delay <= 0:
|
|
690
|
+
retry_delay = self._calculate_backoff(attempt)
|
|
691
|
+
logger.warning(
|
|
692
|
+
"asap.client.retry_after_invalid",
|
|
693
|
+
target_url=sanitize_url(self.base_url),
|
|
694
|
+
envelope_id=envelope.id,
|
|
695
|
+
retry_after_header=retry_after,
|
|
696
|
+
fallback_delay=round(retry_delay, 2),
|
|
697
|
+
message="Invalid Retry-After format, using calculated backoff",
|
|
698
|
+
)
|
|
699
|
+
delay = retry_delay
|
|
700
|
+
else:
|
|
701
|
+
# No Retry-After header, use calculated backoff
|
|
702
|
+
delay = self._calculate_backoff(attempt)
|
|
703
|
+
logger.warning(
|
|
704
|
+
"asap.client.rate_limited",
|
|
705
|
+
status_code=429,
|
|
706
|
+
attempt=attempt + 1,
|
|
707
|
+
max_retries=self.max_retries,
|
|
708
|
+
delay_seconds=round(delay, 2),
|
|
709
|
+
)
|
|
710
|
+
logger.info(
|
|
711
|
+
"asap.client.retry",
|
|
712
|
+
target_url=sanitize_url(self.base_url),
|
|
713
|
+
envelope_id=envelope.id,
|
|
714
|
+
attempt=attempt + 1,
|
|
715
|
+
max_retries=self.max_retries,
|
|
716
|
+
delay_seconds=round(delay, 2),
|
|
282
717
|
)
|
|
718
|
+
await asyncio.sleep(delay)
|
|
283
719
|
last_exception = ASAPConnectionError(
|
|
284
|
-
f"HTTP
|
|
720
|
+
f"HTTP rate limit error 429 from {self.base_url}. "
|
|
721
|
+
f"Server response: {response.text[:200]}",
|
|
722
|
+
url=sanitize_url(self.base_url),
|
|
285
723
|
)
|
|
286
724
|
continue
|
|
725
|
+
# All retries exhausted, record failure in circuit breaker
|
|
726
|
+
if self._circuit_breaker is not None:
|
|
727
|
+
previous_state = self._circuit_breaker.get_state()
|
|
728
|
+
self._circuit_breaker.record_failure()
|
|
729
|
+
current_state = self._circuit_breaker.get_state()
|
|
730
|
+
consecutive_failures = self._circuit_breaker.get_consecutive_failures()
|
|
731
|
+
# Log state change if circuit opened
|
|
732
|
+
if previous_state != current_state and current_state == CircuitState.OPEN:
|
|
733
|
+
logger.warning(
|
|
734
|
+
"asap.client.circuit_opened",
|
|
735
|
+
target_url=sanitize_url(self.base_url),
|
|
736
|
+
consecutive_failures=consecutive_failures,
|
|
737
|
+
threshold=self._circuit_breaker.threshold,
|
|
738
|
+
message=f"Circuit breaker opened after {consecutive_failures} consecutive failures (rate limited)",
|
|
739
|
+
)
|
|
287
740
|
raise ASAPConnectionError(
|
|
288
|
-
f"HTTP
|
|
741
|
+
f"HTTP rate limit error 429 from {self.base_url} after {self.max_retries} attempts. "
|
|
742
|
+
f"Server response: {response.text[:200]}",
|
|
743
|
+
url=sanitize_url(self.base_url),
|
|
289
744
|
)
|
|
290
745
|
if response.status_code >= 400:
|
|
291
|
-
# Client errors (4xx) are not retriable
|
|
746
|
+
# Client errors (4xx) are not retriable (except 429 handled above)
|
|
747
|
+
# We record a failure in the circuit breaker here because persistent 4xx
|
|
748
|
+
# (like 401/403) can indicate an unhealthy configuration or system state.
|
|
749
|
+
if self._circuit_breaker is not None:
|
|
750
|
+
self._circuit_breaker.record_failure()
|
|
751
|
+
|
|
292
752
|
raise ASAPConnectionError(
|
|
293
|
-
f"HTTP client error {response.status_code}
|
|
753
|
+
f"HTTP client error {response.status_code} from {self.base_url}. "
|
|
754
|
+
f"This indicates a problem with the request. "
|
|
755
|
+
f"Server response: {response.text[:200]}",
|
|
756
|
+
url=sanitize_url(self.base_url),
|
|
294
757
|
)
|
|
295
758
|
|
|
296
759
|
# Parse JSON response
|
|
@@ -301,6 +764,11 @@ class ASAPClient:
|
|
|
301
764
|
|
|
302
765
|
# Check for JSON-RPC error
|
|
303
766
|
if "error" in json_response:
|
|
767
|
+
# Record success pattern (service is reachable)
|
|
768
|
+
# A valid JSON-RPC error means the connection and transport are healthy
|
|
769
|
+
if self._circuit_breaker is not None:
|
|
770
|
+
self._circuit_breaker.record_success()
|
|
771
|
+
|
|
304
772
|
error = json_response["error"]
|
|
305
773
|
raise ASAPRemoteError(
|
|
306
774
|
error.get("code", -32603),
|
|
@@ -316,11 +784,24 @@ class ASAPClient:
|
|
|
316
784
|
|
|
317
785
|
response_envelope = Envelope(**envelope_data)
|
|
318
786
|
|
|
787
|
+
# Record success in circuit breaker
|
|
788
|
+
if self._circuit_breaker is not None:
|
|
789
|
+
previous_state = self._circuit_breaker.get_state()
|
|
790
|
+
self._circuit_breaker.record_success()
|
|
791
|
+
current_state = self._circuit_breaker.get_state()
|
|
792
|
+
# Log state change if circuit was closed
|
|
793
|
+
if previous_state != current_state and current_state == CircuitState.CLOSED:
|
|
794
|
+
logger.info(
|
|
795
|
+
"asap.client.circuit_closed",
|
|
796
|
+
target_url=sanitize_url(self.base_url),
|
|
797
|
+
message="Circuit breaker closed after successful request",
|
|
798
|
+
)
|
|
799
|
+
|
|
319
800
|
# Calculate duration and log success
|
|
320
801
|
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
321
802
|
logger.info(
|
|
322
803
|
"asap.client.response",
|
|
323
|
-
target_url=self.base_url,
|
|
804
|
+
target_url=sanitize_url(self.base_url),
|
|
324
805
|
envelope_id=envelope.id,
|
|
325
806
|
response_id=response_envelope.id,
|
|
326
807
|
trace_id=envelope.trace_id,
|
|
@@ -330,70 +811,124 @@ class ASAPClient:
|
|
|
330
811
|
|
|
331
812
|
return response_envelope
|
|
332
813
|
|
|
333
|
-
except httpx.ConnectError as e:
|
|
334
|
-
|
|
814
|
+
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
|
815
|
+
is_timeout = isinstance(e, httpx.TimeoutException)
|
|
816
|
+
error_type = "Timeout" if is_timeout else "Connection error"
|
|
817
|
+
error_msg = (
|
|
818
|
+
f"{error_type} to {self.base_url}: {e}. "
|
|
819
|
+
f"Verify the agent is running and accessible."
|
|
820
|
+
)
|
|
821
|
+
if is_timeout:
|
|
822
|
+
last_exception = ASAPTimeoutError(
|
|
823
|
+
f"Request timeout after {self.timeout}s", timeout=self.timeout
|
|
824
|
+
)
|
|
825
|
+
else:
|
|
826
|
+
last_exception = ASAPConnectionError(error_msg, cause=e, url=self.base_url)
|
|
827
|
+
|
|
335
828
|
# Log retry attempt
|
|
336
829
|
if attempt < self.max_retries - 1:
|
|
830
|
+
delay = self._calculate_backoff(attempt)
|
|
337
831
|
logger.warning(
|
|
338
832
|
"asap.client.retry",
|
|
339
|
-
target_url=self.base_url,
|
|
833
|
+
target_url=sanitize_url(self.base_url),
|
|
340
834
|
envelope_id=envelope.id,
|
|
341
835
|
attempt=attempt + 1,
|
|
342
836
|
max_retries=self.max_retries,
|
|
343
837
|
error=str(e),
|
|
838
|
+
delay_seconds=round(delay, 2),
|
|
839
|
+
message=(
|
|
840
|
+
f"{error_type} to {self.base_url} (attempt {attempt + 1}/{self.max_retries}). "
|
|
841
|
+
f"Retrying in {delay:.2f}s. "
|
|
842
|
+
f"Error: {str(e)[:100]}"
|
|
843
|
+
),
|
|
344
844
|
)
|
|
845
|
+
await asyncio.sleep(delay)
|
|
345
846
|
continue
|
|
346
|
-
# Log final failure
|
|
347
|
-
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
348
|
-
logger.error(
|
|
349
|
-
"asap.client.error",
|
|
350
|
-
target_url=self.base_url,
|
|
351
|
-
envelope_id=envelope.id,
|
|
352
|
-
error="Connection failed after retries",
|
|
353
|
-
error_type="ASAPConnectionError",
|
|
354
|
-
duration_ms=round(duration_ms, 2),
|
|
355
|
-
attempts=attempt + 1,
|
|
356
|
-
)
|
|
357
|
-
raise last_exception from e
|
|
358
847
|
|
|
359
|
-
|
|
848
|
+
# All retries exhausted, record failure in circuit breaker
|
|
849
|
+
if self._circuit_breaker is not None:
|
|
850
|
+
previous_state = self._circuit_breaker.get_state()
|
|
851
|
+
self._circuit_breaker.record_failure()
|
|
852
|
+
current_state = self._circuit_breaker.get_state()
|
|
853
|
+
consecutive_failures = self._circuit_breaker.get_consecutive_failures()
|
|
854
|
+
# Log state change if circuit opened
|
|
855
|
+
if previous_state != current_state and current_state == CircuitState.OPEN:
|
|
856
|
+
logger.warning(
|
|
857
|
+
"asap.client.circuit_opened",
|
|
858
|
+
target_url=sanitize_url(self.base_url),
|
|
859
|
+
consecutive_failures=consecutive_failures,
|
|
860
|
+
threshold=self._circuit_breaker.threshold,
|
|
861
|
+
message=f"Circuit breaker opened after {consecutive_failures} consecutive failures",
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
# Log final failure with detailed context
|
|
360
865
|
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
361
|
-
|
|
362
|
-
f"Request timeout after {self.timeout}s", timeout=self.timeout
|
|
363
|
-
)
|
|
364
|
-
# Log timeout (don't retry)
|
|
866
|
+
error_type_name = "ASAPTimeoutError" if is_timeout else "ASAPConnectionError"
|
|
365
867
|
logger.error(
|
|
366
868
|
"asap.client.error",
|
|
367
|
-
target_url=self.base_url,
|
|
869
|
+
target_url=sanitize_url(self.base_url),
|
|
368
870
|
envelope_id=envelope.id,
|
|
369
|
-
error="
|
|
370
|
-
error_type=
|
|
371
|
-
timeout=self.timeout,
|
|
871
|
+
error=f"{error_type} after retries",
|
|
872
|
+
error_type=error_type_name,
|
|
372
873
|
duration_ms=round(duration_ms, 2),
|
|
874
|
+
attempts=attempt + 1,
|
|
875
|
+
max_retries=self.max_retries,
|
|
876
|
+
timeout=self.timeout if is_timeout else None,
|
|
877
|
+
message=(
|
|
878
|
+
f"{error_type} to {self.base_url} failed after {attempt + 1} attempts. "
|
|
879
|
+
f"Total duration: {duration_ms:.2f}ms. "
|
|
880
|
+
f"Troubleshooting: Verify the agent is running, check network connectivity, "
|
|
881
|
+
f"and ensure the URL is correct. Original error: {str(e)[:200]}"
|
|
882
|
+
),
|
|
373
883
|
)
|
|
374
884
|
raise last_exception from e
|
|
375
885
|
|
|
376
886
|
except (ASAPConnectionError, ASAPRemoteError, ASAPTimeoutError):
|
|
377
|
-
# Re-raise our custom errors
|
|
887
|
+
# Re-raise our custom errors without recording failure again
|
|
888
|
+
# (failures are already recorded before these exceptions are raised)
|
|
378
889
|
raise
|
|
379
890
|
|
|
380
891
|
except Exception as e:
|
|
892
|
+
# Record failure in circuit breaker
|
|
893
|
+
if self._circuit_breaker is not None:
|
|
894
|
+
previous_state = self._circuit_breaker.get_state()
|
|
895
|
+
self._circuit_breaker.record_failure()
|
|
896
|
+
current_state = self._circuit_breaker.get_state()
|
|
897
|
+
consecutive_failures = self._circuit_breaker.get_consecutive_failures()
|
|
898
|
+
# Log state change if circuit opened
|
|
899
|
+
if previous_state != current_state and current_state == CircuitState.OPEN:
|
|
900
|
+
logger.warning(
|
|
901
|
+
"asap.client.circuit_opened",
|
|
902
|
+
target_url=sanitize_url(self.base_url),
|
|
903
|
+
consecutive_failures=consecutive_failures,
|
|
904
|
+
threshold=self._circuit_breaker.threshold,
|
|
905
|
+
message=f"Circuit breaker opened after {consecutive_failures} consecutive failures",
|
|
906
|
+
)
|
|
381
907
|
# Log unexpected error
|
|
382
908
|
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
383
909
|
logger.exception(
|
|
384
910
|
"asap.client.error",
|
|
385
|
-
target_url=self.base_url,
|
|
911
|
+
target_url=sanitize_url(self.base_url),
|
|
386
912
|
envelope_id=envelope.id,
|
|
387
913
|
error=str(e),
|
|
388
914
|
error_type=type(e).__name__,
|
|
389
915
|
duration_ms=round(duration_ms, 2),
|
|
390
916
|
)
|
|
391
917
|
# Wrap unexpected errors
|
|
392
|
-
raise ASAPConnectionError(
|
|
918
|
+
raise ASAPConnectionError(
|
|
919
|
+
f"Unexpected error connecting to {self.base_url}: {e}. "
|
|
920
|
+
f"Verify the agent is running and accessible.",
|
|
921
|
+
cause=e,
|
|
922
|
+
url=sanitize_url(self.base_url),
|
|
923
|
+
) from e
|
|
393
924
|
|
|
394
925
|
# Defensive code: This should never be reached because the loop above
|
|
395
926
|
# always either returns successfully or raises an exception.
|
|
396
927
|
# Kept as a safety net for future code changes.
|
|
397
928
|
if last_exception: # pragma: no cover
|
|
398
929
|
raise last_exception
|
|
399
|
-
raise ASAPConnectionError(
|
|
930
|
+
raise ASAPConnectionError(
|
|
931
|
+
f"Max retries ({self.max_retries}) exceeded for {self.base_url}. "
|
|
932
|
+
f"Verify the agent is running and accessible.",
|
|
933
|
+
url=sanitize_url(self.base_url),
|
|
934
|
+
) # pragma: no cover
|