PyPI - agent-tool-resilience - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agent-tool-resilience 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

agent_tool_resilience/__init__.py +32 -0
agent_tool_resilience/circuit_breaker.py +246 -0
agent_tool_resilience/fallback.py +188 -0
agent_tool_resilience/rate_limit.py +261 -0
agent_tool_resilience/resilient_tool.py +393 -0
agent_tool_resilience/retry.py +215 -0
agent_tool_resilience/tracer.py +319 -0
agent_tool_resilience/validator.py +217 -0
agent_tool_resilience-0.1.0.dist-info/METADATA +184 -0
agent_tool_resilience-0.1.0.dist-info/RECORD +12 -0
agent_tool_resilience-0.1.0.dist-info/WHEEL +5 -0
agent_tool_resilience-0.1.0.dist-info/top_level.txt +1 -0

agent_tool_resilience/retry.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""
+Retry policies with exponential backoff, jitter, and configurable conditions.
+"""
+import asyncio
+import random
+import time
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional, Sequence, Type, Union
+class RetryError(Exception):
+    """Raised when all retry attempts have been exhausted."""
+    def __init__(self, message: str, attempts: int, last_exception: Optional[Exception] = None):
+        super().__init__(message)
+        self.attempts = attempts
+        self.last_exception = last_exception
+@dataclass
+class RetryPolicy:
+    """
+    Configurable retry policy with various backoff strategies.
+    Attributes:
+        max_attempts: Maximum number of attempts (including initial call)
+        backoff: Backoff strategy - "constant", "linear", "exponential"
+        base_delay: Base delay in seconds
+        max_delay: Maximum delay cap in seconds
+        jitter: Whether to add random jitter to delays
+        jitter_range: Range for jitter as (min_factor, max_factor)
+        retry_on: Exception types to retry on
+        retry_if: Optional predicate function for custom retry conditions
+        on_retry: Optional callback called before each retry
+    """
+    max_attempts: int = 3
+    backoff: str = "exponential"
+    base_delay: float = 1.0
+    max_delay: float = 60.0
+    jitter: bool = True
+    jitter_range: tuple[float, float] = (0.5, 1.5)
+    retry_on: Sequence[Type[Exception]] = field(default_factory=lambda: [Exception])
+    retry_if: Optional[Callable[[Exception], bool]] = None
+    on_retry: Optional[Callable[[int, Exception, float], None]] = None
+    def calculate_delay(self, attempt: int) -> float:
+        """Calculate delay for the given attempt number (0-indexed)."""
+        if self.backoff == "constant":
+            delay = self.base_delay
+        elif self.backoff == "linear":
+            delay = self.base_delay * (attempt + 1)
+        elif self.backoff == "exponential":
+            delay = self.base_delay * (2 ** attempt)
+        else:
+            raise ValueError(f"Unknown backoff strategy: {self.backoff}")
+        # Apply max delay cap
+        delay = min(delay, self.max_delay)
+        # Apply jitter
+        if self.jitter:
+            min_factor, max_factor = self.jitter_range
+            delay *= random.uniform(min_factor, max_factor)
+        return delay
+    def should_retry(self, exception: Exception) -> bool:
+        """Check if the exception should trigger a retry."""
+        # Check exception type
+        if not isinstance(exception, tuple(self.retry_on)):
+            return False
+        # Check custom predicate
+        if self.retry_if is not None:
+            return self.retry_if(exception)
+        return True
+    def execute(
+        self,
+        func: Callable[..., Any],
+        *args: Any,
+        **kwargs: Any
+    ) -> Any:
+        """
+        Execute a function with retry logic.
+        Args:
+            func: Function to execute
+            *args: Positional arguments for the function
+            **kwargs: Keyword arguments for the function
+        Returns:
+            The function's return value
+        Raises:
+            RetryError: If all retry attempts are exhausted
+        """
+        last_exception: Optional[Exception] = None
+        for attempt in range(self.max_attempts):
+            try:
+                return func(*args, **kwargs)
+            except Exception as e:
+                last_exception = e
+                # Check if we should retry
+                if attempt >= self.max_attempts - 1:
+                    break
+                if not self.should_retry(e):
+                    raise
+                # Calculate and apply delay
+                delay = self.calculate_delay(attempt)
+                # Call retry callback if provided
+                if self.on_retry:
+                    self.on_retry(attempt + 1, e, delay)
+                time.sleep(delay)
+        raise RetryError(
+            f"All {self.max_attempts} retry attempts exhausted",
+            attempts=self.max_attempts,
+            last_exception=last_exception
+        )
+    async def execute_async(
+        self,
+        func: Callable[..., Any],
+        *args: Any,
+        **kwargs: Any
+    ) -> Any:
+        """
+        Execute an async function with retry logic.
+        Args:
+            func: Async function to execute
+            *args: Positional arguments for the function
+            **kwargs: Keyword arguments for the function
+        Returns:
+            The function's return value
+        Raises:
+            RetryError: If all retry attempts are exhausted
+        """
+        last_exception: Optional[Exception] = None
+        for attempt in range(self.max_attempts):
+            try:
+                return await func(*args, **kwargs)
+            except Exception as e:
+                last_exception = e
+                # Check if we should retry
+                if attempt >= self.max_attempts - 1:
+                    break
+                if not self.should_retry(e):
+                    raise
+                # Calculate and apply delay
+                delay = self.calculate_delay(attempt)
+                # Call retry callback if provided
+                if self.on_retry:
+                    self.on_retry(attempt + 1, e, delay)
+                await asyncio.sleep(delay)
+        raise RetryError(
+            f"All {self.max_attempts} retry attempts exhausted",
+            attempts=self.max_attempts,
+            last_exception=last_exception
+        )
+# Convenience functions for common retry policies
+def no_retry() -> RetryPolicy:
+    """Create a policy that doesn't retry."""
+    return RetryPolicy(max_attempts=1)
+def retry_with_backoff(
+    max_attempts: int = 3,
+    base_delay: float = 1.0,
+    max_delay: float = 60.0
+) -> RetryPolicy:
+    """Create an exponential backoff retry policy."""
+    return RetryPolicy(
+        max_attempts=max_attempts,
+        backoff="exponential",
+        base_delay=base_delay,
+        max_delay=max_delay,
+        jitter=True
+    )
+def retry_on_network_errors(max_attempts: int = 5) -> RetryPolicy:
+    """Create a retry policy for common network errors."""
+    return RetryPolicy(
+        max_attempts=max_attempts,
+        backoff="exponential",
+        base_delay=1.0,
+        max_delay=30.0,
+        jitter=True,
+        retry_on=[
+            ConnectionError,
+            TimeoutError,
+            OSError,
+        ]
+    )

agent_tool_resilience/tracer.py ADDED Viewed

@@ -0,0 +1,319 @@
+"""
+Execution tracing for observability.
+"""
+import json
+import threading
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any, Callable, Optional
+class ExecutionStatus(Enum):
+    """Status of a tool execution."""
+    PENDING = "pending"
+    RUNNING = "running"
+    SUCCESS = "success"
+    FAILED = "failed"
+    RETRYING = "retrying"
+    CIRCUIT_OPEN = "circuit_open"
+    FALLBACK = "fallback"
+@dataclass
+class ExecutionEvent:
+    """
+    A single execution event in the trace.
+    Attributes:
+        tool_name: Name of the tool being executed
+        attempt: Attempt number (1-indexed)
+        status: Status of this execution
+        timestamp: When this event occurred
+        duration_ms: Duration in milliseconds (if completed)
+        error: Error message if failed
+        error_type: Type of exception if failed
+        result_summary: Brief summary of result (if successful)
+        metadata: Additional context
+    """
+    tool_name: str
+    attempt: int
+    status: ExecutionStatus
+    timestamp: datetime = field(default_factory=datetime.now)
+    duration_ms: Optional[float] = None
+    error: Optional[str] = None
+    error_type: Optional[str] = None
+    result_summary: Optional[str] = None
+    metadata: dict = field(default_factory=dict)
+    def to_dict(self) -> dict:
+        """Convert to dictionary for serialization."""
+        return {
+            "tool_name": self.tool_name,
+            "attempt": self.attempt,
+            "status": self.status.value,
+            "timestamp": self.timestamp.isoformat(),
+            "duration_ms": self.duration_ms,
+            "error": self.error,
+            "error_type": self.error_type,
+            "result_summary": self.result_summary,
+            "metadata": self.metadata,
+        }
+@dataclass
+class ToolExecutionTracer:
+    """
+    Traces tool executions for observability.
+    Provides full visibility into what happened during tool execution,
+    including retries, fallbacks, and errors.
+    Attributes:
+        max_events: Maximum events to keep in memory
+        summarize_results: Whether to capture result summaries
+        result_max_length: Max length for result summaries
+        on_event: Callback for each event
+    """
+    max_events: int = 1000
+    summarize_results: bool = True
+    result_max_length: int = 200
+    on_event: Optional[Callable[[ExecutionEvent], None]] = None
+    _events: list[ExecutionEvent] = field(default_factory=list, init=False)
+    _lock: threading.Lock = field(default_factory=threading.Lock, init=False)
+    _active_executions: dict[str, tuple[int, float]] = field(
+        default_factory=dict, init=False
+    )
+    def _add_event(self, event: ExecutionEvent) -> None:
+        """Add an event to the trace."""
+        with self._lock:
+            self._events.append(event)
+            # Trim if over max
+            if len(self._events) > self.max_events:
+                self._events = self._events[-self.max_events:]
+        if self.on_event:
+            self.on_event(event)
+    def _summarize_result(self, result: Any) -> str:
+        """Create a brief summary of a result."""
+        if not self.summarize_results:
+            return None
+        try:
+            if result is None:
+                return "None"
+            elif isinstance(result, (str, int, float, bool)):
+                summary = str(result)
+            elif isinstance(result, dict):
+                summary = f"dict({len(result)} keys)"
+            elif isinstance(result, (list, tuple)):
+                summary = f"list({len(result)} items)"
+            else:
+                summary = f"{type(result).__name__}"
+            if len(summary) > self.result_max_length:
+                summary = summary[:self.result_max_length - 3] + "..."
+            return summary
+        except Exception:
+            return "unknown"
+    def start_execution(
+        self,
+        tool_name: str,
+        attempt: int = 1,
+        metadata: Optional[dict] = None
+    ) -> str:
+        """
+        Record the start of a tool execution.
+        Returns an execution ID for tracking.
+        """
+        exec_id = f"{tool_name}_{id(self)}_{time.time()}"
+        with self._lock:
+            self._active_executions[exec_id] = (attempt, time.time())
+        event = ExecutionEvent(
+            tool_name=tool_name,
+            attempt=attempt,
+            status=ExecutionStatus.RUNNING,
+            metadata=metadata or {},
+        )
+        self._add_event(event)
+        return exec_id
+    def record_success(
+        self,
+        exec_id: str,
+        tool_name: str,
+        attempt: int,
+        result: Any,
+        metadata: Optional[dict] = None
+    ) -> None:
+        """Record a successful execution."""
+        duration_ms = None
+        with self._lock:
+            if exec_id in self._active_executions:
+                _, start_time = self._active_executions.pop(exec_id)
+                duration_ms = (time.time() - start_time) * 1000
+        event = ExecutionEvent(
+            tool_name=tool_name,
+            attempt=attempt,
+            status=ExecutionStatus.SUCCESS,
+            duration_ms=duration_ms,
+            result_summary=self._summarize_result(result),
+            metadata=metadata or {},
+        )
+        self._add_event(event)
+    def record_failure(
+        self,
+        exec_id: str,
+        tool_name: str,
+        attempt: int,
+        error: Exception,
+        will_retry: bool = False,
+        metadata: Optional[dict] = None
+    ) -> None:
+        """Record a failed execution."""
+        duration_ms = None
+        with self._lock:
+            if exec_id in self._active_executions:
+                _, start_time = self._active_executions.pop(exec_id)
+                duration_ms = (time.time() - start_time) * 1000
+        status = ExecutionStatus.RETRYING if will_retry else ExecutionStatus.FAILED
+        event = ExecutionEvent(
+            tool_name=tool_name,
+            attempt=attempt,
+            status=status,
+            duration_ms=duration_ms,
+            error=str(error),
+            error_type=type(error).__name__,
+            metadata=metadata or {},
+        )
+        self._add_event(event)
+    def record_circuit_open(
+        self,
+        tool_name: str,
+        metadata: Optional[dict] = None
+    ) -> None:
+        """Record that execution was blocked by circuit breaker."""
+        event = ExecutionEvent(
+            tool_name=tool_name,
+            attempt=0,
+            status=ExecutionStatus.CIRCUIT_OPEN,
+            metadata=metadata or {},
+        )
+        self._add_event(event)
+    def record_fallback(
+        self,
+        tool_name: str,
+        fallback_name: str,
+        result: Any,
+        metadata: Optional[dict] = None
+    ) -> None:
+        """Record that a fallback was used."""
+        event = ExecutionEvent(
+            tool_name=tool_name,
+            attempt=0,
+            status=ExecutionStatus.FALLBACK,
+            result_summary=self._summarize_result(result),
+            metadata={"fallback_name": fallback_name, **(metadata or {})},
+        )
+        self._add_event(event)
+    def get_events(
+        self,
+        tool_name: Optional[str] = None,
+        status: Optional[ExecutionStatus] = None,
+        limit: Optional[int] = None
+    ) -> list[ExecutionEvent]:
+        """Get events with optional filtering."""
+        with self._lock:
+            events = self._events.copy()
+        if tool_name:
+            events = [e for e in events if e.tool_name == tool_name]
+        if status:
+            events = [e for e in events if e.status == status]
+        if limit:
+            events = events[-limit:]
+        return events
+    def get_execution_log(
+        self,
+        tool_name: Optional[str] = None,
+        limit: Optional[int] = None
+    ) -> list[dict]:
+        """Get execution log as list of dicts."""
+        events = self.get_events(tool_name=tool_name, limit=limit)
+        return [e.to_dict() for e in events]
+    def get_stats(self, tool_name: Optional[str] = None) -> dict:
+        """Get execution statistics."""
+        events = self.get_events(tool_name=tool_name)
+        if not events:
+            return {
+                "total_executions": 0,
+                "success_count": 0,
+                "failure_count": 0,
+                "retry_count": 0,
+                "circuit_open_count": 0,
+                "fallback_count": 0,
+                "success_rate": 0.0,
+                "avg_duration_ms": 0.0,
+            }
+        success_count = len([e for e in events if e.status == ExecutionStatus.SUCCESS])
+        failure_count = len([e for e in events if e.status == ExecutionStatus.FAILED])
+        retry_count = len([e for e in events if e.status == ExecutionStatus.RETRYING])
+        circuit_open_count = len([e for e in events if e.status == ExecutionStatus.CIRCUIT_OPEN])
+        fallback_count = len([e for e in events if e.status == ExecutionStatus.FALLBACK])
+        durations = [e.duration_ms for e in events if e.duration_ms is not None]
+        avg_duration = sum(durations) / len(durations) if durations else 0.0
+        total = success_count + failure_count
+        success_rate = success_count / total if total > 0 else 0.0
+        return {
+            "total_executions": total,
+            "success_count": success_count,
+            "failure_count": failure_count,
+            "retry_count": retry_count,
+            "circuit_open_count": circuit_open_count,
+            "fallback_count": fallback_count,
+            "success_rate": success_rate,
+            "avg_duration_ms": avg_duration,
+        }
+    def clear(self) -> None:
+        """Clear all events."""
+        with self._lock:
+            self._events.clear()
+            self._active_executions.clear()
+    def export_json(self, filepath: str) -> None:
+        """Export events to a JSON file."""
+        log = self.get_execution_log()
+        with open(filepath, "w") as f:
+            json.dump(log, f, indent=2)