asap-protocol 0.5.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. asap/__init__.py +1 -1
  2. asap/cli.py +137 -2
  3. asap/examples/README.md +81 -13
  4. asap/examples/auth_patterns.py +212 -0
  5. asap/examples/error_recovery.py +248 -0
  6. asap/examples/long_running.py +287 -0
  7. asap/examples/mcp_integration.py +240 -0
  8. asap/examples/multi_step_workflow.py +134 -0
  9. asap/examples/orchestration.py +293 -0
  10. asap/examples/rate_limiting.py +137 -0
  11. asap/examples/run_demo.py +0 -2
  12. asap/examples/secure_handler.py +84 -0
  13. asap/examples/state_migration.py +240 -0
  14. asap/examples/streaming_response.py +108 -0
  15. asap/examples/websocket_concept.py +129 -0
  16. asap/mcp/__init__.py +43 -0
  17. asap/mcp/client.py +224 -0
  18. asap/mcp/protocol.py +179 -0
  19. asap/mcp/server.py +333 -0
  20. asap/mcp/server_runner.py +40 -0
  21. asap/models/base.py +0 -3
  22. asap/models/constants.py +3 -1
  23. asap/models/entities.py +21 -6
  24. asap/models/envelope.py +7 -0
  25. asap/models/ids.py +8 -4
  26. asap/models/parts.py +33 -3
  27. asap/models/validators.py +16 -0
  28. asap/observability/__init__.py +6 -0
  29. asap/observability/dashboards/README.md +24 -0
  30. asap/observability/dashboards/asap-detailed.json +131 -0
  31. asap/observability/dashboards/asap-red.json +129 -0
  32. asap/observability/logging.py +81 -1
  33. asap/observability/metrics.py +15 -1
  34. asap/observability/trace_parser.py +238 -0
  35. asap/observability/trace_ui.py +218 -0
  36. asap/observability/tracing.py +293 -0
  37. asap/state/machine.py +15 -2
  38. asap/state/snapshot.py +0 -9
  39. asap/testing/__init__.py +31 -0
  40. asap/testing/assertions.py +108 -0
  41. asap/testing/fixtures.py +113 -0
  42. asap/testing/mocks.py +152 -0
  43. asap/transport/__init__.py +28 -0
  44. asap/transport/cache.py +180 -0
  45. asap/transport/circuit_breaker.py +9 -8
  46. asap/transport/client.py +418 -36
  47. asap/transport/compression.py +389 -0
  48. asap/transport/handlers.py +106 -53
  49. asap/transport/middleware.py +58 -34
  50. asap/transport/server.py +429 -139
  51. asap/transport/validators.py +0 -4
  52. asap/utils/sanitization.py +0 -5
  53. asap_protocol-1.0.0.dist-info/METADATA +264 -0
  54. asap_protocol-1.0.0.dist-info/RECORD +70 -0
  55. asap_protocol-0.5.0.dist-info/METADATA +0 -244
  56. asap_protocol-0.5.0.dist-info/RECORD +0 -41
  57. {asap_protocol-0.5.0.dist-info → asap_protocol-1.0.0.dist-info}/WHEEL +0 -0
  58. {asap_protocol-0.5.0.dist-info → asap_protocol-1.0.0.dist-info}/entry_points.txt +0 -0
  59. {asap_protocol-0.5.0.dist-info → asap_protocol-1.0.0.dist-info}/licenses/LICENSE +0 -0
asap/transport/client.py CHANGED
@@ -10,6 +10,7 @@ The ASAPClient provides:
10
10
  - Retry logic with idempotency keys
11
11
  - Proper error handling and timeouts
12
12
  - Structured logging for observability
13
+ - Compression support (gzip/brotli) for bandwidth reduction
13
14
 
14
15
  Example:
15
16
  >>> from asap.transport.client import ASAPClient
@@ -18,10 +19,15 @@ Example:
18
19
  >>> async with ASAPClient("http://agent.example.com") as client:
19
20
  ... response = await client.send(request_envelope)
20
21
  ... print(response.payload_type)
22
+ >>>
23
+ >>> # With compression enabled (default for payloads > 1KB)
24
+ >>> async with ASAPClient("http://agent.example.com", compression=True) as client:
25
+ ... response = await client.send(large_envelope) # Compressed automatically
21
26
  """
22
27
 
23
28
  import asyncio
24
29
  import itertools
30
+ import json
25
31
  import random
26
32
  import time
27
33
  from dataclasses import dataclass
@@ -38,10 +44,18 @@ from asap.models.constants import (
38
44
  DEFAULT_CIRCUIT_BREAKER_TIMEOUT,
39
45
  DEFAULT_MAX_DELAY,
40
46
  )
47
+ from asap.models.entities import Manifest
41
48
  from asap.models.envelope import Envelope
42
49
  from asap.models.ids import generate_id
43
- from asap.observability import get_logger
50
+ from asap.observability import get_logger, get_metrics
51
+ from asap.transport.cache import DEFAULT_MAX_SIZE, ManifestCache
44
52
  from asap.transport.circuit_breaker import CircuitBreaker, CircuitState, get_registry
53
+ from asap.transport.compression import (
54
+ COMPRESSION_THRESHOLD,
55
+ CompressionAlgorithm,
56
+ compress_payload,
57
+ get_accept_encoding_header,
58
+ )
45
59
  from asap.transport.jsonrpc import ASAP_METHOD
46
60
  from asap.utils.sanitization import sanitize_url
47
61
 
@@ -54,6 +68,30 @@ DEFAULT_TIMEOUT = 60.0
54
68
  # Default maximum retries
55
69
  DEFAULT_MAX_RETRIES = 3
56
70
 
71
+ # Connection pool defaults (support 1000+ concurrent via reuse)
72
+ DEFAULT_POOL_CONNECTIONS = 100
73
+ DEFAULT_POOL_MAXSIZE = 100
74
+ # Timeout for acquiring a connection from the pool (distinct from request timeout)
75
+ DEFAULT_POOL_TIMEOUT = 5.0
76
+ # Maximum time to wait for manifest retrieval
77
+ MANIFEST_REQUEST_TIMEOUT = 10.0
78
+
79
+
80
+ def _record_send_error_metrics(start_time: float, error: BaseException) -> None:
81
+ """Record transport send error metrics (status=error, duration, reason)."""
82
+ duration_seconds = time.perf_counter() - start_time
83
+ metrics = get_metrics()
84
+ metrics.increment_counter("asap_transport_send_total", {"status": "error"})
85
+ metrics.increment_counter(
86
+ "asap_transport_send_errors_total",
87
+ {"reason": type(error).__name__},
88
+ )
89
+ metrics.observe_histogram(
90
+ "asap_transport_send_duration_seconds",
91
+ duration_seconds,
92
+ {"status": "error"},
93
+ )
94
+
57
95
 
58
96
  @dataclass
59
97
  class RetryConfig:
@@ -178,17 +216,49 @@ class ASAPClient:
178
216
  The client should be used as an async context manager to ensure
179
217
  proper connection lifecycle management.
180
218
 
219
+ Features:
220
+ - HTTP/2 multiplexing (enabled by default) for improved batch performance
221
+ - Connection pooling supporting 1000+ concurrent requests
222
+ - Automatic retry with exponential backoff
223
+ - Circuit breaker pattern for fault tolerance
224
+ - Batch operations via send_batch() method
225
+ - Compression support (gzip/brotli) for bandwidth reduction
226
+
181
227
  Attributes:
182
228
  base_url: Base URL of the remote agent
183
229
  timeout: Request timeout in seconds
184
230
  max_retries: Maximum retry attempts for transient failures
185
231
  require_https: Whether HTTPS is required for non-localhost connections
186
232
  is_connected: Whether the client has an active connection
233
+ compression: Whether compression is enabled for requests
234
+ compression_threshold: Minimum payload size to trigger compression
187
235
  _circuit_breaker: Optional circuit breaker instance
188
236
 
237
+ Pool sizing (pool_connections / pool_maxsize):
238
+ Single-agent: 100 (default). Small cluster: 200–500. Large cluster: 500–1000.
239
+ Supports 1000+ concurrent requests via connection reuse when pool_maxsize < concurrency.
240
+
241
+ HTTP/2 Multiplexing:
242
+ HTTP/2 is enabled by default (http2=True) and provides request multiplexing over
243
+ a single TCP connection, reducing latency for batch operations. If the server
244
+ doesn't support HTTP/2, the client automatically falls back to HTTP/1.1.
245
+
246
+ Compression:
247
+ Compression is enabled by default (compression=True) for payloads exceeding
248
+ 1KB. Supports gzip (standard) and brotli (optional, requires brotli package).
249
+ Brotli provides ~20% better compression than gzip for JSON payloads.
250
+
189
251
  Example:
190
252
  >>> async with ASAPClient("http://localhost:8000") as client:
191
253
  ... response = await client.send(envelope)
254
+ >>>
255
+ >>> # Batch operations with HTTP/2 multiplexing
256
+ >>> async with ASAPClient("https://agent.example.com") as client:
257
+ ... responses = await client.send_batch([env1, env2, env3])
258
+ >>>
259
+ >>> # Disable compression for specific client
260
+ >>> async with ASAPClient("http://localhost:8000", compression=False) as client:
261
+ ... response = await client.send(envelope) # No compression
192
262
  """
193
263
 
194
264
  _circuit_breaker: Optional[CircuitBreaker]
@@ -200,6 +270,15 @@ class ASAPClient:
200
270
  transport: httpx.AsyncBaseTransport | None = None,
201
271
  require_https: bool = True,
202
272
  retry_config: Optional[RetryConfig] = None,
273
+ # Connection pool (httpx.Limits); enables 1000+ concurrent via reuse
274
+ pool_connections: int | None = None,
275
+ pool_maxsize: int | None = None,
276
+ pool_timeout: float | None = None,
277
+ # HTTP/2 multiplexing for improved batch performance
278
+ http2: bool = True,
279
+ # Compression settings for bandwidth reduction
280
+ compression: bool = True,
281
+ compression_threshold: int = COMPRESSION_THRESHOLD,
203
282
  # Individual retry parameters (for backward compatibility)
204
283
  # If retry_config is provided, these are ignored
205
284
  max_retries: int | None = None,
@@ -209,6 +288,7 @@ class ASAPClient:
209
288
  circuit_breaker_enabled: bool | None = None,
210
289
  circuit_breaker_threshold: int | None = None,
211
290
  circuit_breaker_timeout: float | None = None,
291
+ manifest_cache_size: int | None = None,
212
292
  ) -> None:
213
293
  """Initialize ASAP client.
214
294
 
@@ -218,7 +298,28 @@ class ASAPClient:
218
298
  transport: Optional custom async transport (for testing). Must be an instance
219
299
  of httpx.AsyncBaseTransport (e.g., httpx.MockTransport).
220
300
  require_https: If True, enforces HTTPS for non-localhost connections (default: True).
301
+ pool_connections: Max keep-alive connections in pool. Default: DEFAULT_POOL_CONNECTIONS (100).
302
+ Controls how many idle connections are kept open.
303
+ pool_maxsize: Max total connections in pool. Default: DEFAULT_POOL_MAXSIZE (100).
304
+ Controls maximum number of concurrent connections.
305
+ Tuning:
306
+ - Single agent: 100 (default)
307
+ - Small cluster: 200-500
308
+ - Large cluster: 500-1000
309
+ Safe to increase if OS file descriptor limits allow.
310
+ pool_timeout: Seconds to wait for connection from pool. Default: DEFAULT_POOL_TIMEOUT (5.0).
311
+ Increase if you see PoolTimeout exceptions under high load.
221
312
  HTTP connections to localhost are allowed with a warning for development.
313
+ http2: Enable HTTP/2 multiplexing for improved batch performance (default: True).
314
+ HTTP/2 allows multiple concurrent requests over a single TCP connection,
315
+ reducing latency for batch operations. Falls back to HTTP/1.1 if server
316
+ doesn't support HTTP/2.
317
+ compression: Enable request compression for bandwidth reduction (default: True).
318
+ When enabled, payloads exceeding compression_threshold are compressed
319
+ using gzip or brotli (if available). The server must support the
320
+ Content-Encoding header to decompress requests.
321
+ compression_threshold: Minimum payload size in bytes to trigger compression
322
+ (default: 1024 = 1KB). Payloads smaller than this are sent uncompressed.
222
323
  retry_config: Optional RetryConfig dataclass to group retry and circuit breaker parameters.
223
324
  If provided, individual retry parameters are ignored.
224
325
  max_retries: Maximum retry attempts for transient failures (default: 3).
@@ -235,6 +336,9 @@ class ASAPClient:
235
336
  Ignored if retry_config is provided.
236
337
  circuit_breaker_timeout: Seconds before transitioning OPEN -> HALF_OPEN (default: 60.0).
237
338
  Ignored if retry_config is provided.
339
+ manifest_cache_size: Maximum number of manifests to cache (default: 1000).
340
+ Increase for high-cardinality environments (e.g. thousands of agents).
341
+ Set to 0 for unlimited. See ManifestCache for cleanup latency notes.
238
342
 
239
343
  Raises:
240
344
  ValueError: If URL format is invalid, scheme is not HTTP/HTTPS, or HTTPS is
@@ -247,6 +351,9 @@ class ASAPClient:
247
351
  >>> # Using RetryConfig (recommended)
248
352
  >>> config = RetryConfig(max_retries=5, circuit_breaker_enabled=True)
249
353
  >>> client = ASAPClient("http://localhost:8000", retry_config=config)
354
+ >>>
355
+ >>> # With compression disabled
356
+ >>> client = ASAPClient("http://localhost:8000", compression=False)
250
357
  """
251
358
  # Extract retry config values
252
359
  if retry_config is not None:
@@ -277,7 +384,6 @@ class ASAPClient:
277
384
  if circuit_breaker_timeout is not None
278
385
  else DEFAULT_CIRCUIT_BREAKER_TIMEOUT
279
386
  )
280
- # Validate URL format and scheme
281
387
  from urllib.parse import urlparse
282
388
 
283
389
  parsed = urlparse(base_url)
@@ -293,7 +399,6 @@ class ASAPClient:
293
399
  f"Received: {base_url}"
294
400
  )
295
401
 
296
- # Validate HTTPS requirement
297
402
  is_https = parsed.scheme.lower() == "https"
298
403
  is_local = self._is_localhost(parsed)
299
404
 
@@ -320,6 +425,11 @@ class ASAPClient:
320
425
 
321
426
  self.base_url = base_url.rstrip("/")
322
427
  self.timeout = timeout
428
+ self._pool_connections = (
429
+ pool_connections if pool_connections is not None else DEFAULT_POOL_CONNECTIONS
430
+ )
431
+ self._pool_maxsize = pool_maxsize if pool_maxsize is not None else DEFAULT_POOL_MAXSIZE
432
+ self._pool_timeout = pool_timeout if pool_timeout is not None else DEFAULT_POOL_TIMEOUT
323
433
  self.max_retries = max_retries_val
324
434
  self.require_https = require_https
325
435
  self.base_delay = base_delay_val
@@ -327,6 +437,9 @@ class ASAPClient:
327
437
  self.jitter = jitter_val
328
438
  self.circuit_breaker_enabled = circuit_breaker_enabled_val
329
439
  self._transport = transport
440
+ self._http2 = http2
441
+ self._compression = compression
442
+ self._compression_threshold = compression_threshold
330
443
  self._client: httpx.AsyncClient | None = None
331
444
  # Thread-safe counter using itertools.count
332
445
  self._request_counter = itertools.count(1)
@@ -344,6 +457,10 @@ class ASAPClient:
344
457
  else:
345
458
  self._circuit_breaker = None
346
459
 
460
+ # Per-client manifest cache (not shared like circuit breaker).
461
+ cache_max = manifest_cache_size if manifest_cache_size is not None else DEFAULT_MAX_SIZE
462
+ self._manifest_cache = ManifestCache(max_size=cache_max)
463
+
347
464
  @staticmethod
348
465
  def _is_localhost(parsed_url: ParseResult) -> bool:
349
466
  """Check if URL points to localhost.
@@ -384,9 +501,6 @@ class ASAPClient:
384
501
  # Cap at max_delay
385
502
  delay = min(delay, self.max_delay)
386
503
 
387
- # Add jitter if enabled (random value between 0 and 10% of delay)
388
- # Note: random.uniform is appropriate here - jitter for retry backoff
389
- # does not require cryptographic security, only statistical distribution
390
504
  if self.jitter:
391
505
  jitter_amount: float = random.uniform(0, delay * 0.1) # nosec B311
392
506
  delay += jitter_amount
@@ -478,16 +592,30 @@ class ASAPClient:
478
592
  return self._client is not None
479
593
 
480
594
  async def __aenter__(self) -> "ASAPClient":
481
- """Enter async context and open connection."""
482
- # Create the async client
595
+ """Enter async context and open connection.
596
+
597
+ Creates an httpx.AsyncClient with configured pool limits and HTTP/2 support.
598
+ HTTP/2 enables multiplexing for improved batch performance.
599
+ """
600
+ limits = httpx.Limits(
601
+ max_keepalive_connections=self._pool_connections,
602
+ max_connections=self._pool_maxsize,
603
+ keepalive_expiry=DEFAULT_POOL_TIMEOUT,
604
+ )
605
+ timeout_config = httpx.Timeout(self.timeout, pool=self._pool_timeout)
483
606
  if self._transport:
607
+ # Custom transport (for testing) - http2 not applicable with mock transports
484
608
  self._client = httpx.AsyncClient(
485
609
  transport=self._transport,
486
- timeout=self.timeout,
610
+ timeout=timeout_config,
611
+ limits=limits,
487
612
  )
488
613
  else:
614
+ # Production client with HTTP/2 multiplexing support
489
615
  self._client = httpx.AsyncClient(
490
- timeout=self.timeout,
616
+ timeout=timeout_config,
617
+ limits=limits,
618
+ http2=self._http2,
491
619
  )
492
620
  return self
493
621
 
@@ -535,7 +663,6 @@ class ASAPClient:
535
663
  url=sanitize_url(self.base_url),
536
664
  )
537
665
 
538
- # Check circuit breaker state before attempting request
539
666
  if self._circuit_breaker is not None and not self._circuit_breaker.can_attempt():
540
667
  consecutive_failures = self._circuit_breaker.get_consecutive_failures()
541
668
  raise CircuitOpenError(
@@ -578,20 +705,59 @@ class ASAPClient:
578
705
  "id": request_id,
579
706
  }
580
707
 
708
+ # Serialize to bytes for compression
709
+ request_body = json.dumps(json_rpc_request).encode("utf-8")
710
+
711
+ # Apply compression if enabled and payload exceeds threshold
712
+ content_encoding: str | None = None
713
+ if self._compression:
714
+ compressed_body, algorithm = compress_payload(
715
+ request_body,
716
+ threshold=self._compression_threshold,
717
+ )
718
+ if algorithm != CompressionAlgorithm.IDENTITY:
719
+ request_body = compressed_body
720
+ content_encoding = algorithm.value
721
+ logger.debug(
722
+ "asap.client.compression_applied",
723
+ target_url=sanitized_url,
724
+ envelope_id=envelope.id,
725
+ algorithm=content_encoding,
726
+ original_size=len(json.dumps(json_rpc_request).encode("utf-8")),
727
+ compressed_size=len(request_body),
728
+ )
729
+
581
730
  # Attempt with retries
582
731
  last_exception: Exception | None = None
583
732
  for attempt in range(self.max_retries):
733
+ if attempt > 0:
734
+ get_metrics().increment_counter("asap_transport_retries_total")
584
735
  try:
736
+ # Build headers
737
+ headers = {
738
+ "Content-Type": "application/json",
739
+ "X-Idempotency-Key": idempotency_key,
740
+ "Accept-Encoding": get_accept_encoding_header(),
741
+ }
742
+ if content_encoding:
743
+ headers["Content-Encoding"] = content_encoding
744
+
585
745
  response = await self._client.post(
586
746
  f"{self.base_url}/asap",
587
- json=json_rpc_request,
588
- headers={
589
- "Content-Type": "application/json",
590
- "X-Idempotency-Key": idempotency_key,
591
- },
747
+ headers=headers,
748
+ content=request_body,
592
749
  )
593
750
 
594
- # Check HTTP status
751
+ # Log HTTP protocol version for debugging fallback behavior
752
+ if self._http2 and response.http_version != "HTTP/2":
753
+ logger.debug(
754
+ "asap.client.http_fallback",
755
+ target_url=sanitize_url(self.base_url),
756
+ requested="HTTP/2",
757
+ actual=response.http_version,
758
+ message=f"HTTP/2 requested but used {response.http_version}",
759
+ )
760
+
595
761
  if response.status_code >= 500:
596
762
  # Server errors (5xx) are retriable
597
763
  error_msg = (
@@ -601,21 +767,18 @@ class ASAPClient:
601
767
  if attempt < self.max_retries - 1:
602
768
  delay = self._calculate_backoff(attempt)
603
769
  logger.warning(
604
- "asap.client.server_error",
770
+ "asap.client.retry_server_error",
605
771
  status_code=response.status_code,
606
772
  attempt=attempt + 1,
607
773
  max_retries=self.max_retries,
608
774
  delay_seconds=round(delay, 2),
609
775
  target_url=sanitize_url(self.base_url),
610
- message=f"Server error {response.status_code}, retrying in {delay:.2f}s (attempt {attempt + 1}/{self.max_retries})",
611
- )
612
- logger.info(
613
- "asap.client.retry",
614
- target_url=sanitize_url(self.base_url),
615
776
  envelope_id=envelope.id,
616
- attempt=attempt + 1,
617
- max_retries=self.max_retries,
618
- delay_seconds=round(delay, 2),
777
+ message=(
778
+ f"Server error {response.status_code}, "
779
+ f"retrying in {delay:.2f}s "
780
+ f"(attempt {attempt + 1}/{self.max_retries})"
781
+ ),
619
782
  )
620
783
  await asyncio.sleep(delay)
621
784
  last_exception = ASAPConnectionError(error_msg, url=self.base_url)
@@ -637,9 +800,7 @@ class ASAPClient:
637
800
  )
638
801
  raise ASAPConnectionError(error_msg, url=self.base_url)
639
802
  if response.status_code == 429:
640
- # Rate limit (429) is retriable, respect Retry-After header
641
803
  if attempt < self.max_retries - 1:
642
- # Check for Retry-After header
643
804
  retry_after = response.headers.get("Retry-After")
644
805
  if retry_after:
645
806
  retry_delay: Optional[float] = None
@@ -762,10 +923,7 @@ class ASAPClient:
762
923
  except Exception as e:
763
924
  raise ASAPRemoteError(-32700, f"Invalid JSON response: {e}") from e
764
925
 
765
- # Check for JSON-RPC error
766
926
  if "error" in json_response:
767
- # Record success pattern (service is reachable)
768
- # A valid JSON-RPC error means the connection and transport are healthy
769
927
  if self._circuit_breaker is not None:
770
928
  self._circuit_breaker.record_success()
771
929
 
@@ -798,7 +956,8 @@ class ASAPClient:
798
956
  )
799
957
 
800
958
  # Calculate duration and log success
801
- duration_ms = (time.perf_counter() - start_time) * 1000
959
+ duration_seconds = time.perf_counter() - start_time
960
+ duration_ms = duration_seconds * 1000
802
961
  logger.info(
803
962
  "asap.client.response",
804
963
  target_url=sanitize_url(self.base_url),
@@ -808,7 +967,13 @@ class ASAPClient:
808
967
  duration_ms=round(duration_ms, 2),
809
968
  attempts=attempt + 1,
810
969
  )
811
-
970
+ metrics = get_metrics()
971
+ metrics.increment_counter("asap_transport_send_total", {"status": "success"})
972
+ metrics.observe_histogram(
973
+ "asap_transport_send_duration_seconds",
974
+ duration_seconds,
975
+ {"status": "success"},
976
+ )
812
977
  return response_envelope
813
978
 
814
979
  except (httpx.ConnectError, httpx.TimeoutException) as e:
@@ -914,6 +1079,7 @@ class ASAPClient:
914
1079
  error_type=type(e).__name__,
915
1080
  duration_ms=round(duration_ms, 2),
916
1081
  )
1082
+ _record_send_error_metrics(start_time, e)
917
1083
  # Wrap unexpected errors
918
1084
  raise ASAPConnectionError(
919
1085
  f"Unexpected error connecting to {self.base_url}: {e}. "
@@ -922,13 +1088,229 @@ class ASAPClient:
922
1088
  url=sanitize_url(self.base_url),
923
1089
  ) from e
924
1090
 
925
- # Defensive code: This should never be reached because the loop above
926
- # always either returns successfully or raises an exception.
927
- # Kept as a safety net for future code changes.
928
1091
  if last_exception: # pragma: no cover
1092
+ _record_send_error_metrics(start_time, last_exception)
929
1093
  raise last_exception
930
1094
  raise ASAPConnectionError(
931
1095
  f"Max retries ({self.max_retries}) exceeded for {self.base_url}. "
932
1096
  f"Verify the agent is running and accessible.",
933
1097
  url=sanitize_url(self.base_url),
934
1098
  ) # pragma: no cover
1099
+
1100
+ async def get_manifest(self, url: str | None = None) -> Manifest:
1101
+ """Get agent manifest from cache or HTTP endpoint.
1102
+
1103
+ Checks cache first, then fetches from HTTP if not cached or expired.
1104
+ Caches successful responses with TTL (default: 5 minutes).
1105
+ Invalidates cache entry on error.
1106
+
1107
+ Args:
1108
+ url: Manifest URL (defaults to {base_url}/.well-known/asap/manifest.json)
1109
+
1110
+ Returns:
1111
+ Manifest object
1112
+
1113
+ Raises:
1114
+ ASAPConnectionError: If HTTP request fails
1115
+ ASAPTimeoutError: If request times out
1116
+ ValueError: If manifest JSON is invalid
1117
+
1118
+ Example:
1119
+ >>> async with ASAPClient("http://agent.example.com") as client:
1120
+ ... manifest = await client.get_manifest()
1121
+ ... print(manifest.id, manifest.name)
1122
+ """
1123
+ if url is None:
1124
+ url = f"{self.base_url}/.well-known/asap/manifest.json"
1125
+
1126
+ if not self._client:
1127
+ raise ASAPConnectionError(
1128
+ "Client not connected. Use 'async with' context.",
1129
+ url=sanitize_url(url),
1130
+ )
1131
+
1132
+ cached = self._manifest_cache.get(url)
1133
+ if cached is not None:
1134
+ logger.debug(
1135
+ "asap.client.manifest_cache_hit",
1136
+ url=sanitize_url(url),
1137
+ manifest_id=cached.id,
1138
+ message=f"Manifest cache hit for {sanitize_url(url)}",
1139
+ )
1140
+ return cached
1141
+
1142
+ # Cache miss - fetch from HTTP
1143
+ logger.debug(
1144
+ "asap.client.manifest_cache_miss",
1145
+ url=sanitize_url(url),
1146
+ message=f"Manifest cache miss for {sanitize_url(url)}, fetching from HTTP",
1147
+ )
1148
+
1149
+ try:
1150
+ response = await self._client.get(
1151
+ url,
1152
+ timeout=min(self.timeout, MANIFEST_REQUEST_TIMEOUT), # Cap timeout for manifest
1153
+ )
1154
+
1155
+ if response.status_code >= 400:
1156
+ # HTTP error - invalidate cache if entry exists
1157
+ self._manifest_cache.invalidate(url)
1158
+ raise ASAPConnectionError(
1159
+ f"HTTP error {response.status_code} fetching manifest from {url}. "
1160
+ f"Server response: {response.text[:200]}",
1161
+ url=sanitize_url(url),
1162
+ )
1163
+
1164
+ # Parse JSON response
1165
+ try:
1166
+ manifest_data = response.json()
1167
+ except Exception as e:
1168
+ self._manifest_cache.invalidate(url)
1169
+ raise ValueError(f"Invalid JSON in manifest response: {e}") from e
1170
+
1171
+ # Parse Manifest object
1172
+ try:
1173
+ manifest = Manifest(**manifest_data)
1174
+ except Exception as e:
1175
+ self._manifest_cache.invalidate(url)
1176
+ raise ValueError(f"Invalid manifest format: {e}") from e
1177
+
1178
+ # Cache successful response
1179
+ self._manifest_cache.set(url, manifest)
1180
+ logger.info(
1181
+ "asap.client.manifest_fetched",
1182
+ url=sanitize_url(url),
1183
+ manifest_id=manifest.id,
1184
+ message=f"Manifest fetched and cached for {sanitize_url(url)}",
1185
+ )
1186
+
1187
+ return manifest
1188
+
1189
+ except httpx.TimeoutException as e:
1190
+ self._manifest_cache.invalidate(url)
1191
+ raise ASAPTimeoutError(
1192
+ f"Manifest request timeout after {self.timeout}s", timeout=self.timeout
1193
+ ) from e
1194
+ except httpx.ConnectError as e:
1195
+ self._manifest_cache.invalidate(url)
1196
+ raise ASAPConnectionError(
1197
+ f"Connection error fetching manifest from {url}: {e}. "
1198
+ f"Verify the agent is running and accessible.",
1199
+ cause=e,
1200
+ url=sanitize_url(url),
1201
+ ) from e
1202
+ except (ASAPConnectionError, ASAPTimeoutError, ValueError):
1203
+ # Re-raise our custom errors (cache already invalidated above)
1204
+ raise
1205
+ except Exception as e:
1206
+ # Unexpected error - invalidate cache
1207
+ self._manifest_cache.invalidate(url)
1208
+ logger.exception(
1209
+ "asap.client.manifest_error",
1210
+ url=sanitize_url(url),
1211
+ error=str(e),
1212
+ error_type=type(e).__name__,
1213
+ message=f"Unexpected error fetching manifest from {url}: {e}",
1214
+ )
1215
+ raise ASAPConnectionError(
1216
+ f"Unexpected error fetching manifest from {url}: {e}. "
1217
+ f"Verify the agent is running and accessible.",
1218
+ cause=e,
1219
+ url=sanitize_url(url),
1220
+ ) from e
1221
+
1222
+ async def send_batch(
1223
+ self,
1224
+ envelopes: list[Envelope],
1225
+ return_exceptions: bool = False,
1226
+ ) -> list[Envelope | BaseException]:
1227
+ """Send multiple envelopes in parallel using asyncio.gather.
1228
+
1229
+ Uses asyncio.gather to send all envelopes concurrently, leveraging
1230
+ connection pooling and HTTP/2 multiplexing for optimal throughput.
1231
+
1232
+ Args:
1233
+ envelopes: List of ASAP envelopes to send
1234
+ return_exceptions: If True, exceptions are returned in the result list
1235
+ instead of being raised. If False (default), the first exception
1236
+ encountered will be raised.
1237
+
1238
+ Returns:
1239
+ List of response envelopes in the same order as input envelopes.
1240
+ If return_exceptions=True, failed sends will have the exception
1241
+ in their position instead of an Envelope.
1242
+
1243
+ Raises:
1244
+ ValueError: If envelopes list is empty
1245
+ ASAPConnectionError: If any send fails (when return_exceptions=False)
1246
+ ASAPTimeoutError: If any send times out (when return_exceptions=False)
1247
+ ASAPRemoteError: If any remote agent returns error (when return_exceptions=False)
1248
+ CircuitOpenError: If circuit breaker is open (when return_exceptions=False)
1249
+
1250
+ Example:
1251
+ >>> async with ASAPClient("http://localhost:8000") as client:
1252
+ ... responses = await client.send_batch([env1, env2, env3])
1253
+ ... for response in responses:
1254
+ ... print(response.payload_type)
1255
+ >>>
1256
+ >>> # With error handling
1257
+ >>> async with ASAPClient("http://localhost:8000") as client:
1258
+ ... results = await client.send_batch(envelopes, return_exceptions=True)
1259
+ ... for i, result in enumerate(results):
1260
+ ... if isinstance(result, BaseException):
1261
+ ... print(f"Envelope {i} failed: {result}")
1262
+ ... else:
1263
+ ... print(f"Envelope {i} succeeded: {result.id}")
1264
+ """
1265
+ if not envelopes:
1266
+ raise ValueError("envelopes list cannot be empty")
1267
+
1268
+ if not self._client:
1269
+ raise ASAPConnectionError(
1270
+ "Client not connected. Use 'async with' context.",
1271
+ url=sanitize_url(self.base_url),
1272
+ )
1273
+
1274
+ batch_size = len(envelopes)
1275
+ logger.info(
1276
+ "asap.client.send_batch",
1277
+ target_url=sanitize_url(self.base_url),
1278
+ batch_size=batch_size,
1279
+ message=f"Sending batch of {batch_size} envelopes to {sanitize_url(self.base_url)}",
1280
+ )
1281
+
1282
+ start_time = time.perf_counter()
1283
+
1284
+ # Create send tasks for all envelopes
1285
+ tasks = [self.send(envelope) for envelope in envelopes]
1286
+
1287
+ # Execute all tasks concurrently
1288
+ results = await asyncio.gather(*tasks, return_exceptions=return_exceptions)
1289
+
1290
+ duration_ms = (time.perf_counter() - start_time) * 1000
1291
+
1292
+ # Count successes and failures
1293
+ if return_exceptions:
1294
+ success_count = sum(1 for r in results if isinstance(r, Envelope))
1295
+ failure_count = batch_size - success_count
1296
+ else:
1297
+ success_count = batch_size
1298
+ failure_count = 0
1299
+
1300
+ logger.info(
1301
+ "asap.client.send_batch_complete",
1302
+ target_url=sanitize_url(self.base_url),
1303
+ batch_size=batch_size,
1304
+ success_count=success_count,
1305
+ failure_count=failure_count,
1306
+ duration_ms=round(duration_ms, 2),
1307
+ throughput_per_second=round(batch_size / (duration_ms / 1000), 2)
1308
+ if duration_ms > 0
1309
+ else 0,
1310
+ message=(
1311
+ f"Batch of {batch_size} envelopes completed in {duration_ms:.2f}ms "
1312
+ f"({success_count} succeeded, {failure_count} failed)"
1313
+ ),
1314
+ )
1315
+
1316
+ return results