asap-protocol 0.5.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- asap/__init__.py +1 -1
- asap/cli.py +137 -2
- asap/examples/README.md +81 -13
- asap/examples/auth_patterns.py +212 -0
- asap/examples/error_recovery.py +248 -0
- asap/examples/long_running.py +287 -0
- asap/examples/mcp_integration.py +240 -0
- asap/examples/multi_step_workflow.py +134 -0
- asap/examples/orchestration.py +293 -0
- asap/examples/rate_limiting.py +137 -0
- asap/examples/run_demo.py +0 -2
- asap/examples/secure_handler.py +84 -0
- asap/examples/state_migration.py +240 -0
- asap/examples/streaming_response.py +108 -0
- asap/examples/websocket_concept.py +129 -0
- asap/mcp/__init__.py +43 -0
- asap/mcp/client.py +224 -0
- asap/mcp/protocol.py +179 -0
- asap/mcp/server.py +333 -0
- asap/mcp/server_runner.py +40 -0
- asap/models/base.py +0 -3
- asap/models/constants.py +3 -1
- asap/models/entities.py +21 -6
- asap/models/envelope.py +7 -0
- asap/models/ids.py +8 -4
- asap/models/parts.py +33 -3
- asap/models/validators.py +16 -0
- asap/observability/__init__.py +6 -0
- asap/observability/dashboards/README.md +24 -0
- asap/observability/dashboards/asap-detailed.json +131 -0
- asap/observability/dashboards/asap-red.json +129 -0
- asap/observability/logging.py +81 -1
- asap/observability/metrics.py +15 -1
- asap/observability/trace_parser.py +238 -0
- asap/observability/trace_ui.py +218 -0
- asap/observability/tracing.py +293 -0
- asap/state/machine.py +15 -2
- asap/state/snapshot.py +0 -9
- asap/testing/__init__.py +31 -0
- asap/testing/assertions.py +108 -0
- asap/testing/fixtures.py +113 -0
- asap/testing/mocks.py +152 -0
- asap/transport/__init__.py +28 -0
- asap/transport/cache.py +180 -0
- asap/transport/circuit_breaker.py +9 -8
- asap/transport/client.py +418 -36
- asap/transport/compression.py +389 -0
- asap/transport/handlers.py +106 -53
- asap/transport/middleware.py +58 -34
- asap/transport/server.py +429 -139
- asap/transport/validators.py +0 -4
- asap/utils/sanitization.py +0 -5
- asap_protocol-1.0.0.dist-info/METADATA +264 -0
- asap_protocol-1.0.0.dist-info/RECORD +70 -0
- asap_protocol-0.5.0.dist-info/METADATA +0 -244
- asap_protocol-0.5.0.dist-info/RECORD +0 -41
- {asap_protocol-0.5.0.dist-info → asap_protocol-1.0.0.dist-info}/WHEEL +0 -0
- {asap_protocol-0.5.0.dist-info → asap_protocol-1.0.0.dist-info}/entry_points.txt +0 -0
- {asap_protocol-0.5.0.dist-info → asap_protocol-1.0.0.dist-info}/licenses/LICENSE +0 -0
asap/transport/client.py
CHANGED
|
@@ -10,6 +10,7 @@ The ASAPClient provides:
|
|
|
10
10
|
- Retry logic with idempotency keys
|
|
11
11
|
- Proper error handling and timeouts
|
|
12
12
|
- Structured logging for observability
|
|
13
|
+
- Compression support (gzip/brotli) for bandwidth reduction
|
|
13
14
|
|
|
14
15
|
Example:
|
|
15
16
|
>>> from asap.transport.client import ASAPClient
|
|
@@ -18,10 +19,15 @@ Example:
|
|
|
18
19
|
>>> async with ASAPClient("http://agent.example.com") as client:
|
|
19
20
|
... response = await client.send(request_envelope)
|
|
20
21
|
... print(response.payload_type)
|
|
22
|
+
>>>
|
|
23
|
+
>>> # With compression enabled (default for payloads > 1KB)
|
|
24
|
+
>>> async with ASAPClient("http://agent.example.com", compression=True) as client:
|
|
25
|
+
... response = await client.send(large_envelope) # Compressed automatically
|
|
21
26
|
"""
|
|
22
27
|
|
|
23
28
|
import asyncio
|
|
24
29
|
import itertools
|
|
30
|
+
import json
|
|
25
31
|
import random
|
|
26
32
|
import time
|
|
27
33
|
from dataclasses import dataclass
|
|
@@ -38,10 +44,18 @@ from asap.models.constants import (
|
|
|
38
44
|
DEFAULT_CIRCUIT_BREAKER_TIMEOUT,
|
|
39
45
|
DEFAULT_MAX_DELAY,
|
|
40
46
|
)
|
|
47
|
+
from asap.models.entities import Manifest
|
|
41
48
|
from asap.models.envelope import Envelope
|
|
42
49
|
from asap.models.ids import generate_id
|
|
43
|
-
from asap.observability import get_logger
|
|
50
|
+
from asap.observability import get_logger, get_metrics
|
|
51
|
+
from asap.transport.cache import DEFAULT_MAX_SIZE, ManifestCache
|
|
44
52
|
from asap.transport.circuit_breaker import CircuitBreaker, CircuitState, get_registry
|
|
53
|
+
from asap.transport.compression import (
|
|
54
|
+
COMPRESSION_THRESHOLD,
|
|
55
|
+
CompressionAlgorithm,
|
|
56
|
+
compress_payload,
|
|
57
|
+
get_accept_encoding_header,
|
|
58
|
+
)
|
|
45
59
|
from asap.transport.jsonrpc import ASAP_METHOD
|
|
46
60
|
from asap.utils.sanitization import sanitize_url
|
|
47
61
|
|
|
@@ -54,6 +68,30 @@ DEFAULT_TIMEOUT = 60.0
|
|
|
54
68
|
# Default maximum retries
|
|
55
69
|
DEFAULT_MAX_RETRIES = 3
|
|
56
70
|
|
|
71
|
+
# Connection pool defaults (support 1000+ concurrent via reuse)
|
|
72
|
+
DEFAULT_POOL_CONNECTIONS = 100
|
|
73
|
+
DEFAULT_POOL_MAXSIZE = 100
|
|
74
|
+
# Timeout for acquiring a connection from the pool (distinct from request timeout)
|
|
75
|
+
DEFAULT_POOL_TIMEOUT = 5.0
|
|
76
|
+
# Maximum time to wait for manifest retrieval
|
|
77
|
+
MANIFEST_REQUEST_TIMEOUT = 10.0
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _record_send_error_metrics(start_time: float, error: BaseException) -> None:
|
|
81
|
+
"""Record transport send error metrics (status=error, duration, reason)."""
|
|
82
|
+
duration_seconds = time.perf_counter() - start_time
|
|
83
|
+
metrics = get_metrics()
|
|
84
|
+
metrics.increment_counter("asap_transport_send_total", {"status": "error"})
|
|
85
|
+
metrics.increment_counter(
|
|
86
|
+
"asap_transport_send_errors_total",
|
|
87
|
+
{"reason": type(error).__name__},
|
|
88
|
+
)
|
|
89
|
+
metrics.observe_histogram(
|
|
90
|
+
"asap_transport_send_duration_seconds",
|
|
91
|
+
duration_seconds,
|
|
92
|
+
{"status": "error"},
|
|
93
|
+
)
|
|
94
|
+
|
|
57
95
|
|
|
58
96
|
@dataclass
|
|
59
97
|
class RetryConfig:
|
|
@@ -178,17 +216,49 @@ class ASAPClient:
|
|
|
178
216
|
The client should be used as an async context manager to ensure
|
|
179
217
|
proper connection lifecycle management.
|
|
180
218
|
|
|
219
|
+
Features:
|
|
220
|
+
- HTTP/2 multiplexing (enabled by default) for improved batch performance
|
|
221
|
+
- Connection pooling supporting 1000+ concurrent requests
|
|
222
|
+
- Automatic retry with exponential backoff
|
|
223
|
+
- Circuit breaker pattern for fault tolerance
|
|
224
|
+
- Batch operations via send_batch() method
|
|
225
|
+
- Compression support (gzip/brotli) for bandwidth reduction
|
|
226
|
+
|
|
181
227
|
Attributes:
|
|
182
228
|
base_url: Base URL of the remote agent
|
|
183
229
|
timeout: Request timeout in seconds
|
|
184
230
|
max_retries: Maximum retry attempts for transient failures
|
|
185
231
|
require_https: Whether HTTPS is required for non-localhost connections
|
|
186
232
|
is_connected: Whether the client has an active connection
|
|
233
|
+
compression: Whether compression is enabled for requests
|
|
234
|
+
compression_threshold: Minimum payload size to trigger compression
|
|
187
235
|
_circuit_breaker: Optional circuit breaker instance
|
|
188
236
|
|
|
237
|
+
Pool sizing (pool_connections / pool_maxsize):
|
|
238
|
+
Single-agent: 100 (default). Small cluster: 200–500. Large cluster: 500–1000.
|
|
239
|
+
Supports 1000+ concurrent requests via connection reuse when pool_maxsize < concurrency.
|
|
240
|
+
|
|
241
|
+
HTTP/2 Multiplexing:
|
|
242
|
+
HTTP/2 is enabled by default (http2=True) and provides request multiplexing over
|
|
243
|
+
a single TCP connection, reducing latency for batch operations. If the server
|
|
244
|
+
doesn't support HTTP/2, the client automatically falls back to HTTP/1.1.
|
|
245
|
+
|
|
246
|
+
Compression:
|
|
247
|
+
Compression is enabled by default (compression=True) for payloads exceeding
|
|
248
|
+
1KB. Supports gzip (standard) and brotli (optional, requires brotli package).
|
|
249
|
+
Brotli provides ~20% better compression than gzip for JSON payloads.
|
|
250
|
+
|
|
189
251
|
Example:
|
|
190
252
|
>>> async with ASAPClient("http://localhost:8000") as client:
|
|
191
253
|
... response = await client.send(envelope)
|
|
254
|
+
>>>
|
|
255
|
+
>>> # Batch operations with HTTP/2 multiplexing
|
|
256
|
+
>>> async with ASAPClient("https://agent.example.com") as client:
|
|
257
|
+
... responses = await client.send_batch([env1, env2, env3])
|
|
258
|
+
>>>
|
|
259
|
+
>>> # Disable compression for specific client
|
|
260
|
+
>>> async with ASAPClient("http://localhost:8000", compression=False) as client:
|
|
261
|
+
... response = await client.send(envelope) # No compression
|
|
192
262
|
"""
|
|
193
263
|
|
|
194
264
|
_circuit_breaker: Optional[CircuitBreaker]
|
|
@@ -200,6 +270,15 @@ class ASAPClient:
|
|
|
200
270
|
transport: httpx.AsyncBaseTransport | None = None,
|
|
201
271
|
require_https: bool = True,
|
|
202
272
|
retry_config: Optional[RetryConfig] = None,
|
|
273
|
+
# Connection pool (httpx.Limits); enables 1000+ concurrent via reuse
|
|
274
|
+
pool_connections: int | None = None,
|
|
275
|
+
pool_maxsize: int | None = None,
|
|
276
|
+
pool_timeout: float | None = None,
|
|
277
|
+
# HTTP/2 multiplexing for improved batch performance
|
|
278
|
+
http2: bool = True,
|
|
279
|
+
# Compression settings for bandwidth reduction
|
|
280
|
+
compression: bool = True,
|
|
281
|
+
compression_threshold: int = COMPRESSION_THRESHOLD,
|
|
203
282
|
# Individual retry parameters (for backward compatibility)
|
|
204
283
|
# If retry_config is provided, these are ignored
|
|
205
284
|
max_retries: int | None = None,
|
|
@@ -209,6 +288,7 @@ class ASAPClient:
|
|
|
209
288
|
circuit_breaker_enabled: bool | None = None,
|
|
210
289
|
circuit_breaker_threshold: int | None = None,
|
|
211
290
|
circuit_breaker_timeout: float | None = None,
|
|
291
|
+
manifest_cache_size: int | None = None,
|
|
212
292
|
) -> None:
|
|
213
293
|
"""Initialize ASAP client.
|
|
214
294
|
|
|
@@ -218,7 +298,28 @@ class ASAPClient:
|
|
|
218
298
|
transport: Optional custom async transport (for testing). Must be an instance
|
|
219
299
|
of httpx.AsyncBaseTransport (e.g., httpx.MockTransport).
|
|
220
300
|
require_https: If True, enforces HTTPS for non-localhost connections (default: True).
|
|
301
|
+
pool_connections: Max keep-alive connections in pool. Default: DEFAULT_POOL_CONNECTIONS (100).
|
|
302
|
+
Controls how many idle connections are kept open.
|
|
303
|
+
pool_maxsize: Max total connections in pool. Default: DEFAULT_POOL_MAXSIZE (100).
|
|
304
|
+
Controls maximum number of concurrent connections.
|
|
305
|
+
Tuning:
|
|
306
|
+
- Single agent: 100 (default)
|
|
307
|
+
- Small cluster: 200-500
|
|
308
|
+
- Large cluster: 500-1000
|
|
309
|
+
Safe to increase if OS file descriptor limits allow.
|
|
310
|
+
pool_timeout: Seconds to wait for connection from pool. Default: DEFAULT_POOL_TIMEOUT (5.0).
|
|
311
|
+
Increase if you see PoolTimeout exceptions under high load.
|
|
221
312
|
HTTP connections to localhost are allowed with a warning for development.
|
|
313
|
+
http2: Enable HTTP/2 multiplexing for improved batch performance (default: True).
|
|
314
|
+
HTTP/2 allows multiple concurrent requests over a single TCP connection,
|
|
315
|
+
reducing latency for batch operations. Falls back to HTTP/1.1 if server
|
|
316
|
+
doesn't support HTTP/2.
|
|
317
|
+
compression: Enable request compression for bandwidth reduction (default: True).
|
|
318
|
+
When enabled, payloads exceeding compression_threshold are compressed
|
|
319
|
+
using gzip or brotli (if available). The server must support the
|
|
320
|
+
Content-Encoding header to decompress requests.
|
|
321
|
+
compression_threshold: Minimum payload size in bytes to trigger compression
|
|
322
|
+
(default: 1024 = 1KB). Payloads smaller than this are sent uncompressed.
|
|
222
323
|
retry_config: Optional RetryConfig dataclass to group retry and circuit breaker parameters.
|
|
223
324
|
If provided, individual retry parameters are ignored.
|
|
224
325
|
max_retries: Maximum retry attempts for transient failures (default: 3).
|
|
@@ -235,6 +336,9 @@ class ASAPClient:
|
|
|
235
336
|
Ignored if retry_config is provided.
|
|
236
337
|
circuit_breaker_timeout: Seconds before transitioning OPEN -> HALF_OPEN (default: 60.0).
|
|
237
338
|
Ignored if retry_config is provided.
|
|
339
|
+
manifest_cache_size: Maximum number of manifests to cache (default: 1000).
|
|
340
|
+
Increase for high-cardinality environments (e.g. thousands of agents).
|
|
341
|
+
Set to 0 for unlimited. See ManifestCache for cleanup latency notes.
|
|
238
342
|
|
|
239
343
|
Raises:
|
|
240
344
|
ValueError: If URL format is invalid, scheme is not HTTP/HTTPS, or HTTPS is
|
|
@@ -247,6 +351,9 @@ class ASAPClient:
|
|
|
247
351
|
>>> # Using RetryConfig (recommended)
|
|
248
352
|
>>> config = RetryConfig(max_retries=5, circuit_breaker_enabled=True)
|
|
249
353
|
>>> client = ASAPClient("http://localhost:8000", retry_config=config)
|
|
354
|
+
>>>
|
|
355
|
+
>>> # With compression disabled
|
|
356
|
+
>>> client = ASAPClient("http://localhost:8000", compression=False)
|
|
250
357
|
"""
|
|
251
358
|
# Extract retry config values
|
|
252
359
|
if retry_config is not None:
|
|
@@ -277,7 +384,6 @@ class ASAPClient:
|
|
|
277
384
|
if circuit_breaker_timeout is not None
|
|
278
385
|
else DEFAULT_CIRCUIT_BREAKER_TIMEOUT
|
|
279
386
|
)
|
|
280
|
-
# Validate URL format and scheme
|
|
281
387
|
from urllib.parse import urlparse
|
|
282
388
|
|
|
283
389
|
parsed = urlparse(base_url)
|
|
@@ -293,7 +399,6 @@ class ASAPClient:
|
|
|
293
399
|
f"Received: {base_url}"
|
|
294
400
|
)
|
|
295
401
|
|
|
296
|
-
# Validate HTTPS requirement
|
|
297
402
|
is_https = parsed.scheme.lower() == "https"
|
|
298
403
|
is_local = self._is_localhost(parsed)
|
|
299
404
|
|
|
@@ -320,6 +425,11 @@ class ASAPClient:
|
|
|
320
425
|
|
|
321
426
|
self.base_url = base_url.rstrip("/")
|
|
322
427
|
self.timeout = timeout
|
|
428
|
+
self._pool_connections = (
|
|
429
|
+
pool_connections if pool_connections is not None else DEFAULT_POOL_CONNECTIONS
|
|
430
|
+
)
|
|
431
|
+
self._pool_maxsize = pool_maxsize if pool_maxsize is not None else DEFAULT_POOL_MAXSIZE
|
|
432
|
+
self._pool_timeout = pool_timeout if pool_timeout is not None else DEFAULT_POOL_TIMEOUT
|
|
323
433
|
self.max_retries = max_retries_val
|
|
324
434
|
self.require_https = require_https
|
|
325
435
|
self.base_delay = base_delay_val
|
|
@@ -327,6 +437,9 @@ class ASAPClient:
|
|
|
327
437
|
self.jitter = jitter_val
|
|
328
438
|
self.circuit_breaker_enabled = circuit_breaker_enabled_val
|
|
329
439
|
self._transport = transport
|
|
440
|
+
self._http2 = http2
|
|
441
|
+
self._compression = compression
|
|
442
|
+
self._compression_threshold = compression_threshold
|
|
330
443
|
self._client: httpx.AsyncClient | None = None
|
|
331
444
|
# Thread-safe counter using itertools.count
|
|
332
445
|
self._request_counter = itertools.count(1)
|
|
@@ -344,6 +457,10 @@ class ASAPClient:
|
|
|
344
457
|
else:
|
|
345
458
|
self._circuit_breaker = None
|
|
346
459
|
|
|
460
|
+
# Per-client manifest cache (not shared like circuit breaker).
|
|
461
|
+
cache_max = manifest_cache_size if manifest_cache_size is not None else DEFAULT_MAX_SIZE
|
|
462
|
+
self._manifest_cache = ManifestCache(max_size=cache_max)
|
|
463
|
+
|
|
347
464
|
@staticmethod
|
|
348
465
|
def _is_localhost(parsed_url: ParseResult) -> bool:
|
|
349
466
|
"""Check if URL points to localhost.
|
|
@@ -384,9 +501,6 @@ class ASAPClient:
|
|
|
384
501
|
# Cap at max_delay
|
|
385
502
|
delay = min(delay, self.max_delay)
|
|
386
503
|
|
|
387
|
-
# Add jitter if enabled (random value between 0 and 10% of delay)
|
|
388
|
-
# Note: random.uniform is appropriate here - jitter for retry backoff
|
|
389
|
-
# does not require cryptographic security, only statistical distribution
|
|
390
504
|
if self.jitter:
|
|
391
505
|
jitter_amount: float = random.uniform(0, delay * 0.1) # nosec B311
|
|
392
506
|
delay += jitter_amount
|
|
@@ -478,16 +592,30 @@ class ASAPClient:
|
|
|
478
592
|
return self._client is not None
|
|
479
593
|
|
|
480
594
|
async def __aenter__(self) -> "ASAPClient":
|
|
481
|
-
"""Enter async context and open connection.
|
|
482
|
-
|
|
595
|
+
"""Enter async context and open connection.
|
|
596
|
+
|
|
597
|
+
Creates an httpx.AsyncClient with configured pool limits and HTTP/2 support.
|
|
598
|
+
HTTP/2 enables multiplexing for improved batch performance.
|
|
599
|
+
"""
|
|
600
|
+
limits = httpx.Limits(
|
|
601
|
+
max_keepalive_connections=self._pool_connections,
|
|
602
|
+
max_connections=self._pool_maxsize,
|
|
603
|
+
keepalive_expiry=DEFAULT_POOL_TIMEOUT,
|
|
604
|
+
)
|
|
605
|
+
timeout_config = httpx.Timeout(self.timeout, pool=self._pool_timeout)
|
|
483
606
|
if self._transport:
|
|
607
|
+
# Custom transport (for testing) - http2 not applicable with mock transports
|
|
484
608
|
self._client = httpx.AsyncClient(
|
|
485
609
|
transport=self._transport,
|
|
486
|
-
timeout=
|
|
610
|
+
timeout=timeout_config,
|
|
611
|
+
limits=limits,
|
|
487
612
|
)
|
|
488
613
|
else:
|
|
614
|
+
# Production client with HTTP/2 multiplexing support
|
|
489
615
|
self._client = httpx.AsyncClient(
|
|
490
|
-
timeout=
|
|
616
|
+
timeout=timeout_config,
|
|
617
|
+
limits=limits,
|
|
618
|
+
http2=self._http2,
|
|
491
619
|
)
|
|
492
620
|
return self
|
|
493
621
|
|
|
@@ -535,7 +663,6 @@ class ASAPClient:
|
|
|
535
663
|
url=sanitize_url(self.base_url),
|
|
536
664
|
)
|
|
537
665
|
|
|
538
|
-
# Check circuit breaker state before attempting request
|
|
539
666
|
if self._circuit_breaker is not None and not self._circuit_breaker.can_attempt():
|
|
540
667
|
consecutive_failures = self._circuit_breaker.get_consecutive_failures()
|
|
541
668
|
raise CircuitOpenError(
|
|
@@ -578,20 +705,59 @@ class ASAPClient:
|
|
|
578
705
|
"id": request_id,
|
|
579
706
|
}
|
|
580
707
|
|
|
708
|
+
# Serialize to bytes for compression
|
|
709
|
+
request_body = json.dumps(json_rpc_request).encode("utf-8")
|
|
710
|
+
|
|
711
|
+
# Apply compression if enabled and payload exceeds threshold
|
|
712
|
+
content_encoding: str | None = None
|
|
713
|
+
if self._compression:
|
|
714
|
+
compressed_body, algorithm = compress_payload(
|
|
715
|
+
request_body,
|
|
716
|
+
threshold=self._compression_threshold,
|
|
717
|
+
)
|
|
718
|
+
if algorithm != CompressionAlgorithm.IDENTITY:
|
|
719
|
+
request_body = compressed_body
|
|
720
|
+
content_encoding = algorithm.value
|
|
721
|
+
logger.debug(
|
|
722
|
+
"asap.client.compression_applied",
|
|
723
|
+
target_url=sanitized_url,
|
|
724
|
+
envelope_id=envelope.id,
|
|
725
|
+
algorithm=content_encoding,
|
|
726
|
+
original_size=len(json.dumps(json_rpc_request).encode("utf-8")),
|
|
727
|
+
compressed_size=len(request_body),
|
|
728
|
+
)
|
|
729
|
+
|
|
581
730
|
# Attempt with retries
|
|
582
731
|
last_exception: Exception | None = None
|
|
583
732
|
for attempt in range(self.max_retries):
|
|
733
|
+
if attempt > 0:
|
|
734
|
+
get_metrics().increment_counter("asap_transport_retries_total")
|
|
584
735
|
try:
|
|
736
|
+
# Build headers
|
|
737
|
+
headers = {
|
|
738
|
+
"Content-Type": "application/json",
|
|
739
|
+
"X-Idempotency-Key": idempotency_key,
|
|
740
|
+
"Accept-Encoding": get_accept_encoding_header(),
|
|
741
|
+
}
|
|
742
|
+
if content_encoding:
|
|
743
|
+
headers["Content-Encoding"] = content_encoding
|
|
744
|
+
|
|
585
745
|
response = await self._client.post(
|
|
586
746
|
f"{self.base_url}/asap",
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
"Content-Type": "application/json",
|
|
590
|
-
"X-Idempotency-Key": idempotency_key,
|
|
591
|
-
},
|
|
747
|
+
headers=headers,
|
|
748
|
+
content=request_body,
|
|
592
749
|
)
|
|
593
750
|
|
|
594
|
-
#
|
|
751
|
+
# Log HTTP protocol version for debugging fallback behavior
|
|
752
|
+
if self._http2 and response.http_version != "HTTP/2":
|
|
753
|
+
logger.debug(
|
|
754
|
+
"asap.client.http_fallback",
|
|
755
|
+
target_url=sanitize_url(self.base_url),
|
|
756
|
+
requested="HTTP/2",
|
|
757
|
+
actual=response.http_version,
|
|
758
|
+
message=f"HTTP/2 requested but used {response.http_version}",
|
|
759
|
+
)
|
|
760
|
+
|
|
595
761
|
if response.status_code >= 500:
|
|
596
762
|
# Server errors (5xx) are retriable
|
|
597
763
|
error_msg = (
|
|
@@ -601,21 +767,18 @@ class ASAPClient:
|
|
|
601
767
|
if attempt < self.max_retries - 1:
|
|
602
768
|
delay = self._calculate_backoff(attempt)
|
|
603
769
|
logger.warning(
|
|
604
|
-
"asap.client.
|
|
770
|
+
"asap.client.retry_server_error",
|
|
605
771
|
status_code=response.status_code,
|
|
606
772
|
attempt=attempt + 1,
|
|
607
773
|
max_retries=self.max_retries,
|
|
608
774
|
delay_seconds=round(delay, 2),
|
|
609
775
|
target_url=sanitize_url(self.base_url),
|
|
610
|
-
message=f"Server error {response.status_code}, retrying in {delay:.2f}s (attempt {attempt + 1}/{self.max_retries})",
|
|
611
|
-
)
|
|
612
|
-
logger.info(
|
|
613
|
-
"asap.client.retry",
|
|
614
|
-
target_url=sanitize_url(self.base_url),
|
|
615
776
|
envelope_id=envelope.id,
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
777
|
+
message=(
|
|
778
|
+
f"Server error {response.status_code}, "
|
|
779
|
+
f"retrying in {delay:.2f}s "
|
|
780
|
+
f"(attempt {attempt + 1}/{self.max_retries})"
|
|
781
|
+
),
|
|
619
782
|
)
|
|
620
783
|
await asyncio.sleep(delay)
|
|
621
784
|
last_exception = ASAPConnectionError(error_msg, url=self.base_url)
|
|
@@ -637,9 +800,7 @@ class ASAPClient:
|
|
|
637
800
|
)
|
|
638
801
|
raise ASAPConnectionError(error_msg, url=self.base_url)
|
|
639
802
|
if response.status_code == 429:
|
|
640
|
-
# Rate limit (429) is retriable, respect Retry-After header
|
|
641
803
|
if attempt < self.max_retries - 1:
|
|
642
|
-
# Check for Retry-After header
|
|
643
804
|
retry_after = response.headers.get("Retry-After")
|
|
644
805
|
if retry_after:
|
|
645
806
|
retry_delay: Optional[float] = None
|
|
@@ -762,10 +923,7 @@ class ASAPClient:
|
|
|
762
923
|
except Exception as e:
|
|
763
924
|
raise ASAPRemoteError(-32700, f"Invalid JSON response: {e}") from e
|
|
764
925
|
|
|
765
|
-
# Check for JSON-RPC error
|
|
766
926
|
if "error" in json_response:
|
|
767
|
-
# Record success pattern (service is reachable)
|
|
768
|
-
# A valid JSON-RPC error means the connection and transport are healthy
|
|
769
927
|
if self._circuit_breaker is not None:
|
|
770
928
|
self._circuit_breaker.record_success()
|
|
771
929
|
|
|
@@ -798,7 +956,8 @@ class ASAPClient:
|
|
|
798
956
|
)
|
|
799
957
|
|
|
800
958
|
# Calculate duration and log success
|
|
801
|
-
|
|
959
|
+
duration_seconds = time.perf_counter() - start_time
|
|
960
|
+
duration_ms = duration_seconds * 1000
|
|
802
961
|
logger.info(
|
|
803
962
|
"asap.client.response",
|
|
804
963
|
target_url=sanitize_url(self.base_url),
|
|
@@ -808,7 +967,13 @@ class ASAPClient:
|
|
|
808
967
|
duration_ms=round(duration_ms, 2),
|
|
809
968
|
attempts=attempt + 1,
|
|
810
969
|
)
|
|
811
|
-
|
|
970
|
+
metrics = get_metrics()
|
|
971
|
+
metrics.increment_counter("asap_transport_send_total", {"status": "success"})
|
|
972
|
+
metrics.observe_histogram(
|
|
973
|
+
"asap_transport_send_duration_seconds",
|
|
974
|
+
duration_seconds,
|
|
975
|
+
{"status": "success"},
|
|
976
|
+
)
|
|
812
977
|
return response_envelope
|
|
813
978
|
|
|
814
979
|
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
|
@@ -914,6 +1079,7 @@ class ASAPClient:
|
|
|
914
1079
|
error_type=type(e).__name__,
|
|
915
1080
|
duration_ms=round(duration_ms, 2),
|
|
916
1081
|
)
|
|
1082
|
+
_record_send_error_metrics(start_time, e)
|
|
917
1083
|
# Wrap unexpected errors
|
|
918
1084
|
raise ASAPConnectionError(
|
|
919
1085
|
f"Unexpected error connecting to {self.base_url}: {e}. "
|
|
@@ -922,13 +1088,229 @@ class ASAPClient:
|
|
|
922
1088
|
url=sanitize_url(self.base_url),
|
|
923
1089
|
) from e
|
|
924
1090
|
|
|
925
|
-
# Defensive code: This should never be reached because the loop above
|
|
926
|
-
# always either returns successfully or raises an exception.
|
|
927
|
-
# Kept as a safety net for future code changes.
|
|
928
1091
|
if last_exception: # pragma: no cover
|
|
1092
|
+
_record_send_error_metrics(start_time, last_exception)
|
|
929
1093
|
raise last_exception
|
|
930
1094
|
raise ASAPConnectionError(
|
|
931
1095
|
f"Max retries ({self.max_retries}) exceeded for {self.base_url}. "
|
|
932
1096
|
f"Verify the agent is running and accessible.",
|
|
933
1097
|
url=sanitize_url(self.base_url),
|
|
934
1098
|
) # pragma: no cover
|
|
1099
|
+
|
|
1100
|
+
async def get_manifest(self, url: str | None = None) -> Manifest:
|
|
1101
|
+
"""Get agent manifest from cache or HTTP endpoint.
|
|
1102
|
+
|
|
1103
|
+
Checks cache first, then fetches from HTTP if not cached or expired.
|
|
1104
|
+
Caches successful responses with TTL (default: 5 minutes).
|
|
1105
|
+
Invalidates cache entry on error.
|
|
1106
|
+
|
|
1107
|
+
Args:
|
|
1108
|
+
url: Manifest URL (defaults to {base_url}/.well-known/asap/manifest.json)
|
|
1109
|
+
|
|
1110
|
+
Returns:
|
|
1111
|
+
Manifest object
|
|
1112
|
+
|
|
1113
|
+
Raises:
|
|
1114
|
+
ASAPConnectionError: If HTTP request fails
|
|
1115
|
+
ASAPTimeoutError: If request times out
|
|
1116
|
+
ValueError: If manifest JSON is invalid
|
|
1117
|
+
|
|
1118
|
+
Example:
|
|
1119
|
+
>>> async with ASAPClient("http://agent.example.com") as client:
|
|
1120
|
+
... manifest = await client.get_manifest()
|
|
1121
|
+
... print(manifest.id, manifest.name)
|
|
1122
|
+
"""
|
|
1123
|
+
if url is None:
|
|
1124
|
+
url = f"{self.base_url}/.well-known/asap/manifest.json"
|
|
1125
|
+
|
|
1126
|
+
if not self._client:
|
|
1127
|
+
raise ASAPConnectionError(
|
|
1128
|
+
"Client not connected. Use 'async with' context.",
|
|
1129
|
+
url=sanitize_url(url),
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
cached = self._manifest_cache.get(url)
|
|
1133
|
+
if cached is not None:
|
|
1134
|
+
logger.debug(
|
|
1135
|
+
"asap.client.manifest_cache_hit",
|
|
1136
|
+
url=sanitize_url(url),
|
|
1137
|
+
manifest_id=cached.id,
|
|
1138
|
+
message=f"Manifest cache hit for {sanitize_url(url)}",
|
|
1139
|
+
)
|
|
1140
|
+
return cached
|
|
1141
|
+
|
|
1142
|
+
# Cache miss - fetch from HTTP
|
|
1143
|
+
logger.debug(
|
|
1144
|
+
"asap.client.manifest_cache_miss",
|
|
1145
|
+
url=sanitize_url(url),
|
|
1146
|
+
message=f"Manifest cache miss for {sanitize_url(url)}, fetching from HTTP",
|
|
1147
|
+
)
|
|
1148
|
+
|
|
1149
|
+
try:
|
|
1150
|
+
response = await self._client.get(
|
|
1151
|
+
url,
|
|
1152
|
+
timeout=min(self.timeout, MANIFEST_REQUEST_TIMEOUT), # Cap timeout for manifest
|
|
1153
|
+
)
|
|
1154
|
+
|
|
1155
|
+
if response.status_code >= 400:
|
|
1156
|
+
# HTTP error - invalidate cache if entry exists
|
|
1157
|
+
self._manifest_cache.invalidate(url)
|
|
1158
|
+
raise ASAPConnectionError(
|
|
1159
|
+
f"HTTP error {response.status_code} fetching manifest from {url}. "
|
|
1160
|
+
f"Server response: {response.text[:200]}",
|
|
1161
|
+
url=sanitize_url(url),
|
|
1162
|
+
)
|
|
1163
|
+
|
|
1164
|
+
# Parse JSON response
|
|
1165
|
+
try:
|
|
1166
|
+
manifest_data = response.json()
|
|
1167
|
+
except Exception as e:
|
|
1168
|
+
self._manifest_cache.invalidate(url)
|
|
1169
|
+
raise ValueError(f"Invalid JSON in manifest response: {e}") from e
|
|
1170
|
+
|
|
1171
|
+
# Parse Manifest object
|
|
1172
|
+
try:
|
|
1173
|
+
manifest = Manifest(**manifest_data)
|
|
1174
|
+
except Exception as e:
|
|
1175
|
+
self._manifest_cache.invalidate(url)
|
|
1176
|
+
raise ValueError(f"Invalid manifest format: {e}") from e
|
|
1177
|
+
|
|
1178
|
+
# Cache successful response
|
|
1179
|
+
self._manifest_cache.set(url, manifest)
|
|
1180
|
+
logger.info(
|
|
1181
|
+
"asap.client.manifest_fetched",
|
|
1182
|
+
url=sanitize_url(url),
|
|
1183
|
+
manifest_id=manifest.id,
|
|
1184
|
+
message=f"Manifest fetched and cached for {sanitize_url(url)}",
|
|
1185
|
+
)
|
|
1186
|
+
|
|
1187
|
+
return manifest
|
|
1188
|
+
|
|
1189
|
+
except httpx.TimeoutException as e:
|
|
1190
|
+
self._manifest_cache.invalidate(url)
|
|
1191
|
+
raise ASAPTimeoutError(
|
|
1192
|
+
f"Manifest request timeout after {self.timeout}s", timeout=self.timeout
|
|
1193
|
+
) from e
|
|
1194
|
+
except httpx.ConnectError as e:
|
|
1195
|
+
self._manifest_cache.invalidate(url)
|
|
1196
|
+
raise ASAPConnectionError(
|
|
1197
|
+
f"Connection error fetching manifest from {url}: {e}. "
|
|
1198
|
+
f"Verify the agent is running and accessible.",
|
|
1199
|
+
cause=e,
|
|
1200
|
+
url=sanitize_url(url),
|
|
1201
|
+
) from e
|
|
1202
|
+
except (ASAPConnectionError, ASAPTimeoutError, ValueError):
|
|
1203
|
+
# Re-raise our custom errors (cache already invalidated above)
|
|
1204
|
+
raise
|
|
1205
|
+
except Exception as e:
|
|
1206
|
+
# Unexpected error - invalidate cache
|
|
1207
|
+
self._manifest_cache.invalidate(url)
|
|
1208
|
+
logger.exception(
|
|
1209
|
+
"asap.client.manifest_error",
|
|
1210
|
+
url=sanitize_url(url),
|
|
1211
|
+
error=str(e),
|
|
1212
|
+
error_type=type(e).__name__,
|
|
1213
|
+
message=f"Unexpected error fetching manifest from {url}: {e}",
|
|
1214
|
+
)
|
|
1215
|
+
raise ASAPConnectionError(
|
|
1216
|
+
f"Unexpected error fetching manifest from {url}: {e}. "
|
|
1217
|
+
f"Verify the agent is running and accessible.",
|
|
1218
|
+
cause=e,
|
|
1219
|
+
url=sanitize_url(url),
|
|
1220
|
+
) from e
|
|
1221
|
+
|
|
1222
|
+
async def send_batch(
|
|
1223
|
+
self,
|
|
1224
|
+
envelopes: list[Envelope],
|
|
1225
|
+
return_exceptions: bool = False,
|
|
1226
|
+
) -> list[Envelope | BaseException]:
|
|
1227
|
+
"""Send multiple envelopes in parallel using asyncio.gather.
|
|
1228
|
+
|
|
1229
|
+
Uses asyncio.gather to send all envelopes concurrently, leveraging
|
|
1230
|
+
connection pooling and HTTP/2 multiplexing for optimal throughput.
|
|
1231
|
+
|
|
1232
|
+
Args:
|
|
1233
|
+
envelopes: List of ASAP envelopes to send
|
|
1234
|
+
return_exceptions: If True, exceptions are returned in the result list
|
|
1235
|
+
instead of being raised. If False (default), the first exception
|
|
1236
|
+
encountered will be raised.
|
|
1237
|
+
|
|
1238
|
+
Returns:
|
|
1239
|
+
List of response envelopes in the same order as input envelopes.
|
|
1240
|
+
If return_exceptions=True, failed sends will have the exception
|
|
1241
|
+
in their position instead of an Envelope.
|
|
1242
|
+
|
|
1243
|
+
Raises:
|
|
1244
|
+
ValueError: If envelopes list is empty
|
|
1245
|
+
ASAPConnectionError: If any send fails (when return_exceptions=False)
|
|
1246
|
+
ASAPTimeoutError: If any send times out (when return_exceptions=False)
|
|
1247
|
+
ASAPRemoteError: If any remote agent returns error (when return_exceptions=False)
|
|
1248
|
+
CircuitOpenError: If circuit breaker is open (when return_exceptions=False)
|
|
1249
|
+
|
|
1250
|
+
Example:
|
|
1251
|
+
>>> async with ASAPClient("http://localhost:8000") as client:
|
|
1252
|
+
... responses = await client.send_batch([env1, env2, env3])
|
|
1253
|
+
... for response in responses:
|
|
1254
|
+
... print(response.payload_type)
|
|
1255
|
+
>>>
|
|
1256
|
+
>>> # With error handling
|
|
1257
|
+
>>> async with ASAPClient("http://localhost:8000") as client:
|
|
1258
|
+
... results = await client.send_batch(envelopes, return_exceptions=True)
|
|
1259
|
+
... for i, result in enumerate(results):
|
|
1260
|
+
... if isinstance(result, BaseException):
|
|
1261
|
+
... print(f"Envelope {i} failed: {result}")
|
|
1262
|
+
... else:
|
|
1263
|
+
... print(f"Envelope {i} succeeded: {result.id}")
|
|
1264
|
+
"""
|
|
1265
|
+
if not envelopes:
|
|
1266
|
+
raise ValueError("envelopes list cannot be empty")
|
|
1267
|
+
|
|
1268
|
+
if not self._client:
|
|
1269
|
+
raise ASAPConnectionError(
|
|
1270
|
+
"Client not connected. Use 'async with' context.",
|
|
1271
|
+
url=sanitize_url(self.base_url),
|
|
1272
|
+
)
|
|
1273
|
+
|
|
1274
|
+
batch_size = len(envelopes)
|
|
1275
|
+
logger.info(
|
|
1276
|
+
"asap.client.send_batch",
|
|
1277
|
+
target_url=sanitize_url(self.base_url),
|
|
1278
|
+
batch_size=batch_size,
|
|
1279
|
+
message=f"Sending batch of {batch_size} envelopes to {sanitize_url(self.base_url)}",
|
|
1280
|
+
)
|
|
1281
|
+
|
|
1282
|
+
start_time = time.perf_counter()
|
|
1283
|
+
|
|
1284
|
+
# Create send tasks for all envelopes
|
|
1285
|
+
tasks = [self.send(envelope) for envelope in envelopes]
|
|
1286
|
+
|
|
1287
|
+
# Execute all tasks concurrently
|
|
1288
|
+
results = await asyncio.gather(*tasks, return_exceptions=return_exceptions)
|
|
1289
|
+
|
|
1290
|
+
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
1291
|
+
|
|
1292
|
+
# Count successes and failures
|
|
1293
|
+
if return_exceptions:
|
|
1294
|
+
success_count = sum(1 for r in results if isinstance(r, Envelope))
|
|
1295
|
+
failure_count = batch_size - success_count
|
|
1296
|
+
else:
|
|
1297
|
+
success_count = batch_size
|
|
1298
|
+
failure_count = 0
|
|
1299
|
+
|
|
1300
|
+
logger.info(
|
|
1301
|
+
"asap.client.send_batch_complete",
|
|
1302
|
+
target_url=sanitize_url(self.base_url),
|
|
1303
|
+
batch_size=batch_size,
|
|
1304
|
+
success_count=success_count,
|
|
1305
|
+
failure_count=failure_count,
|
|
1306
|
+
duration_ms=round(duration_ms, 2),
|
|
1307
|
+
throughput_per_second=round(batch_size / (duration_ms / 1000), 2)
|
|
1308
|
+
if duration_ms > 0
|
|
1309
|
+
else 0,
|
|
1310
|
+
message=(
|
|
1311
|
+
f"Batch of {batch_size} envelopes completed in {duration_ms:.2f}ms "
|
|
1312
|
+
f"({success_count} succeeded, {failure_count} failed)"
|
|
1313
|
+
),
|
|
1314
|
+
)
|
|
1315
|
+
|
|
1316
|
+
return results
|