PyPI - auditi - Versions diffs - 0.1.0__py3-none-any.whl - Mend

auditi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

auditi/__init__.py +47 -0
auditi/client.py +76 -0
auditi/context.py +71 -0
auditi/decorators.py +1441 -0
auditi/evaluator.py +38 -0
auditi/events.py +194 -0
auditi/providers/__init__.py +41 -0
auditi/providers/anthropic.py +141 -0
auditi/providers/base.py +156 -0
auditi/providers/google.py +182 -0
auditi/providers/openai.py +147 -0
auditi/providers/registry.py +166 -0
auditi/transport.py +78 -0
auditi/types/__init__.py +12 -0
auditi/types/api_types.py +107 -0
auditi-0.1.0.dist-info/METADATA +703 -0
auditi-0.1.0.dist-info/RECORD +19 -0
auditi-0.1.0.dist-info/WHEEL +4 -0
auditi-0.1.0.dist-info/licenses/LICENSE +21 -0

auditi/evaluator.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""
+Base evaluator class for implementing custom evaluation logic.
+"""
+from abc import ABC, abstractmethod
+from .types import TraceInput, EvaluationResult
+class BaseEvaluator(ABC):
+    """
+    Abstract base class for trace evaluators.
+    Subclass this to implement custom evaluation logic for your AI agents.
+    The evaluator is called after each trace completes and before sending
+    to the Auditi platform.
+    Example:
+        >>> class QualityEvaluator(BaseEvaluator):
+        ...     def evaluate(self, trace: TraceInput) -> EvaluationResult:
+        ...         score = calculate_quality(trace.assistant_output)
+        ...         return EvaluationResult(
+        ...             status="pass" if score > 0.7 else "fail",
+        ...             score=score,
+        ...             reason="Quality check"
+        ...         )
+    """
+    @abstractmethod
+    def evaluate(self, trace: TraceInput) -> EvaluationResult:
+        """
+        Evaluate a trace and return a pass/fail result with a score.
+        Args:
+            trace: The complete trace data including input, output, and spans
+        Returns:
+            EvaluationResult with status, score, and optional details
+        """
+        pass

auditi/events.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""
+Standardized event types for streaming agent responses.
+This module provides a clear contract for all event types used in the tracing system.
+"""
+from enum import Enum
+from typing import Any, Dict, Optional, List
+from dataclasses import dataclass, field
+class EventType(str, Enum):
+    """Standardized event types for agent streaming."""
+    # Content events
+    TOKEN = "token"  # Streaming text token
+    COMPLETE = "complete"  # Final complete response
+    # Phase events (agent lifecycle)
+    PHASE_START = "phase_start"  # Agent phase starting
+    PHASE_END = "phase_end"  # Agent phase ending
+    # Tool execution events
+    TOOL_EXEC_START = "tool_exec_start"  # Tool execution starting
+    TOOL_EXEC_END = "tool_exec_end"  # Tool execution ending
+    # Metadata events
+    TURN_METADATA = "turn_metadata"  # Turn-level metadata (usage, tool_calls, etc.)
+    USAGE = "usage"  # Usage statistics (standalone)
+    # Error events
+    ERROR = "error"  # Error occurred
+# Events that should NOT be accumulated in span outputs
+INTERNAL_EVENTS = frozenset(
+    {
+        EventType.PHASE_START,
+        EventType.PHASE_END,
+        EventType.TOOL_EXEC_START,
+        EventType.TOOL_EXEC_END,
+        EventType.TURN_METADATA,
+        EventType.USAGE,
+    }
+)
+# Events that contain content to accumulate
+CONTENT_EVENTS = frozenset(
+    {
+        EventType.TOKEN,
+        EventType.COMPLETE,
+    }
+)
+@dataclass
+class StreamEvent:
+    """
+    Standardized streaming event structure.
+    Provides a consistent interface for all events yielded during agent streaming.
+    Supports backward compatibility with raw dictionaries.
+    """
+    type: EventType
+    content: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+    usage: Optional[Dict[str, Any]] = None
+    error: Optional[str] = None
+    tool_calls: Optional[List[Any]] = field(default=None)
+    # Additional fields for specific events
+    phase: Optional[str] = None  # For PHASE_START/PHASE_END
+    tool: Optional[str] = None  # For TOOL_EXEC_START/TOOL_EXEC_END
+    history: Optional[List[Any]] = field(default=None)  # For COMPLETE
+    messages: Optional[List[Any]] = field(default=None)  # For COMPLETE
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for backward compatibility."""
+        result = {"type": self.type.value}
+        if self.content is not None:
+            result["content"] = self.content
+        if self.metadata is not None:
+            result["metadata"] = self.metadata
+        if self.usage is not None:
+            result["usage"] = self.usage
+        if self.error is not None:
+            result["error"] = self.error
+        if self.tool_calls is not None:
+            result["tool_calls"] = self.tool_calls
+        if self.phase is not None:
+            result["phase"] = self.phase
+        if self.tool is not None:
+            result["tool"] = self.tool
+        if self.history is not None:
+            result["history"] = self.history
+        if self.messages is not None:
+            result["messages"] = self.messages
+        return result
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "StreamEvent":
+        """
+        Create StreamEvent from dictionary (for backward compatibility).
+        Args:
+            data: Dictionary with at least a 'type' key
+        Returns:
+            StreamEvent instance
+        """
+        event_type_raw = data.get("type", "token")
+        # Handle both string and EventType enum
+        if isinstance(event_type_raw, EventType):
+            event_type = event_type_raw
+        elif isinstance(event_type_raw, str):
+            try:
+                event_type = EventType(event_type_raw)
+            except ValueError:
+                # Unknown event type, default to token
+                event_type = EventType.TOKEN
+        else:
+            event_type = EventType.TOKEN
+        return cls(
+            type=event_type,
+            content=data.get("content"),
+            metadata=data.get("metadata"),
+            usage=data.get("usage"),
+            error=data.get("error"),
+            tool_calls=data.get("tool_calls"),
+            phase=data.get("phase"),
+            tool=data.get("tool"),
+            history=data.get("history"),
+            messages=data.get("messages"),
+        )
+    def is_internal(self) -> bool:
+        """Check if this event should be filtered from outputs."""
+        return self.type in INTERNAL_EVENTS
+    def is_content(self) -> bool:
+        """Check if this event contains content to accumulate."""
+        return self.type in CONTENT_EVENTS
+# Helper factory functions for common events
+def token_event(content: str) -> Dict[str, Any]:
+    """Create a token event dictionary."""
+    return {"type": EventType.TOKEN.value, "content": content}
+def complete_event(
+    content: str,
+    history: Optional[List[Any]] = None,
+    messages: Optional[List[Any]] = None,
+) -> Dict[str, Any]:
+    """Create a complete event dictionary."""
+    result = {"type": EventType.COMPLETE.value, "content": content}
+    if history is not None:
+        result["history"] = history
+    if messages is not None:
+        result["messages"] = messages
+    return result
+def turn_metadata_event(
+    tool_calls: Optional[List[Any]] = None,
+    usage: Optional[Dict[str, Any]] = None,
+    perplexity: Optional[float] = None,
+    confidence_level: Optional[str] = None,
+    total_tokens: Optional[int] = None,
+) -> Dict[str, Any]:
+    """Create a turn_metadata event dictionary."""
+    return {
+        "type": EventType.TURN_METADATA.value,
+        "tool_calls": tool_calls or [],
+        "usage": usage,
+        "perplexity": perplexity,
+        "confidence_level": confidence_level,
+        "total_tokens": total_tokens,
+    }
+def phase_event(phase: str, start: bool = True) -> Dict[str, Any]:
+    """Create a phase start/end event dictionary."""
+    event_type = EventType.PHASE_START if start else EventType.PHASE_END
+    return {"type": event_type.value, "phase": phase}
+def tool_exec_event(tool: str, start: bool = True) -> Dict[str, Any]:
+    """Create a tool execution start/end event dictionary."""
+    event_type = EventType.TOOL_EXEC_START if start else EventType.TOOL_EXEC_END
+    return {"type": event_type.value, "tool": tool}

auditi/providers/__init__.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""
+Provider abstraction layer for LLM usage extraction and cost calculation.
+This module provides a clean, extensible way to handle different LLM providers
+(OpenAI, Anthropic, Google, etc.) with automatic detection and provider-specific
+pricing and usage extraction.
+Usage:
+    >>> from auditi.providers import detect_provider
+    >>>
+    >>> # Auto-detect from model name
+    >>> provider = detect_provider(model="gpt-4o")
+    >>> input_tokens, output_tokens, total = provider.extract_usage(response.usage)
+    >>> cost = provider.calculate_cost("gpt-4o", input_tokens, output_tokens)
+    >>>
+    >>> # Or detect from response structure
+    >>> provider = detect_provider(response=api_response)
+    >>> model = provider.extract_model(api_response)
+Adding a new provider:
+    1. Create a new file in auditi/providers/ (e.g., cohere.py)
+    2. Subclass BaseProvider and implement all abstract methods
+    3. Register it in registry.py's __init__ method
+    4. That's it! It will automatically be used for detection
+"""
+from .base import BaseProvider
+from .registry import get_registry, detect_provider, ProviderRegistry
+from .openai import OpenAIProvider
+from .anthropic import AnthropicProvider
+from .google import GoogleProvider
+__all__ = [
+    "BaseProvider",
+    "ProviderRegistry",
+    "get_registry",
+    "detect_provider",
+    "OpenAIProvider",
+    "AnthropicProvider",
+    "GoogleProvider",
+]

auditi/providers/anthropic.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""
+Anthropic provider implementation for usage extraction and cost calculation.
+"""
+from typing import Optional, Any, Dict, Tuple
+from .base import BaseProvider
+def _coerce_int(value: Any) -> Optional[int]:
+    """Helper to safely convert values to int."""
+    if value is None:
+        return None
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return None
+class AnthropicProvider(BaseProvider):
+    """Provider implementation for Anthropic Claude models."""
+    @property
+    def name(self) -> str:
+        return "anthropic"
+    @property
+    def model_pricing(self) -> Dict[str, Tuple[float, float]]:
+        """
+        Anthropic model pricing per 1M tokens (input, output) in USD.
+        Updated as of January 2025.
+        """
+        return {
+            # Claude 4.5 family (newest)
+            "claude-opus-4-5-20251101": (15.00, 75.00),
+            "claude-sonnet-4-5-20250929": (3.00, 15.00),
+            "claude-haiku-4-5-20251001": (0.80, 4.00),
+            # Claude 3.5 family
+            "claude-3-5-sonnet-20241022": (3.00, 15.00),
+            "claude-3-5-sonnet-20240620": (3.00, 15.00),
+            "claude-3-5-sonnet-latest": (3.00, 15.00),
+            "claude-3-5-haiku-20241022": (0.80, 4.00),
+            "claude-3-5-haiku-latest": (0.80, 4.00),
+            # Claude 3 family
+            "claude-3-opus-20240229": (15.00, 75.00),
+            "claude-3-opus-latest": (15.00, 75.00),
+            "claude-3-sonnet-20240229": (3.00, 15.00),
+            "claude-3-haiku-20240307": (0.25, 1.25),
+            # Legacy models
+            "claude-2.1": (8.00, 24.00),
+            "claude-2.0": (8.00, 24.00),
+            "claude-instant-1.2": (0.80, 2.40),
+        }
+    def get_default_pricing(self) -> Tuple[float, float]:
+        """Conservative default for unknown Anthropic models."""
+        return (3.00, 15.00)  # Similar to Sonnet pricing
+    def get_model_prefixes(self) -> list[str]:
+        return ["claude-"]
+    def extract_usage(self, usage: Any) -> Tuple[Optional[int], Optional[int], Optional[int]]:
+        """
+        Extract usage from Anthropic response.
+        Anthropic structure:
+        {
+            "usage": {
+                "input_tokens": 100,
+                "output_tokens": 50
+            }
+        }
+        Note: Anthropic does NOT return total_tokens, only input and output.
+        """
+        if usage is None:
+            return None, None, None
+        input_tokens = None
+        output_tokens = None
+        total_tokens = None
+        if isinstance(usage, dict):
+            input_tokens = _coerce_int(usage.get("input_tokens"))
+            output_tokens = _coerce_int(usage.get("output_tokens"))
+            # Anthropic doesn't provide total_tokens, we calculate it
+        else:
+            # Handle object attributes
+            input_tokens = _coerce_int(getattr(usage, "input_tokens", None))
+            output_tokens = _coerce_int(getattr(usage, "output_tokens", None))
+        # Always calculate total for Anthropic
+        if input_tokens is not None or output_tokens is not None:
+            total_tokens = (input_tokens or 0) + (output_tokens or 0)
+        return input_tokens, output_tokens, total_tokens
+    def extract_model(self, response: Any) -> Optional[str]:
+        """Extract model name from Anthropic response."""
+        if response is None:
+            return None
+        # Try dict access
+        if isinstance(response, dict):
+            return response.get("model")
+        # Try object attribute
+        if hasattr(response, "model"):
+            return str(response.model)
+        return None
+    def matches_response(self, response: Any) -> bool:
+        """
+        Detect Anthropic responses by structure.
+        Anthropic responses typically have:
+        - 'content' array with text blocks
+        - 'usage' with 'input_tokens' and 'output_tokens' (NOT prompt_tokens)
+        - 'stop_reason' field
+        """
+        if response is None:
+            return False
+        # Check for Anthropic-specific structure
+        if isinstance(response, dict):
+            has_anthropic_usage = (
+                "usage" in response
+                and isinstance(response.get("usage"), dict)
+                and "input_tokens" in response.get("usage", {})
+            )
+            has_stop_reason = "stop_reason" in response
+            if has_anthropic_usage or has_stop_reason:
+                return True
+        elif hasattr(response, "usage"):
+            usage = response.usage
+            if hasattr(usage, "input_tokens"):  # Anthropic-specific field
+                return True
+        # Fallback to model prefix matching
+        return super().matches_response(response)

auditi/providers/base.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""
+Base provider interface for LLM usage extraction and cost calculation.
+"""
+from abc import ABC, abstractmethod
+from typing import Optional, Any, Dict, Tuple
+class BaseProvider(ABC):
+    """
+    Abstract base class for LLM provider-specific logic.
+    Each provider implements:
+    1. Usage extraction from API responses
+    2. Model pricing lookup
+    3. Cost calculation
+    """
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Provider name (e.g., 'openai', 'anthropic', 'google')."""
+        pass
+    @property
+    @abstractmethod
+    def model_pricing(self) -> Dict[str, Tuple[float, float]]:
+        """
+        Model pricing dictionary.
+        Returns:
+            Dict mapping model names to (input_price, output_price) per 1M tokens in USD
+        Example:
+            {
+                "gpt-4o": (2.50, 10.00),
+                "claude-3-5-sonnet-20241022": (3.00, 15.00)
+            }
+        """
+        pass
+    @abstractmethod
+    def extract_usage(self, usage: Any) -> Tuple[Optional[int], Optional[int], Optional[int]]:
+        """
+        Extract token counts from provider-specific usage object.
+        Args:
+            usage: Raw usage object/dict from API response
+        Returns:
+            Tuple of (input_tokens, output_tokens, total_tokens)
+            Returns (None, None, None) if extraction fails
+        """
+        pass
+    @abstractmethod
+    def extract_model(self, response: Any) -> Optional[str]:
+        """
+        Extract model name from API response.
+        Args:
+            response: Raw API response object
+        Returns:
+            Model name string, or None if not found
+        """
+        pass
+    def calculate_cost(
+        self, model: Optional[str], input_tokens: Optional[int], output_tokens: Optional[int]
+    ) -> float:
+        """
+        Calculate cost based on model-specific pricing.
+        Args:
+            model: Model name
+            input_tokens: Number of input tokens
+            output_tokens: Number of output tokens
+        Returns:
+            Total cost in USD
+        """
+        if input_tokens is None and output_tokens is None:
+            return 0.0
+        input_tokens = input_tokens or 0
+        output_tokens = output_tokens or 0
+        # Look up pricing for this model
+        pricing = self.model_pricing.get(model)
+        if pricing is None:
+            # Use provider-specific default pricing
+            pricing = self.get_default_pricing()
+        input_price, output_price = pricing
+        # Convert from price per 1M tokens to per token
+        input_cost = (input_tokens / 1_000_000) * input_price
+        output_cost = (output_tokens / 1_000_000) * output_price
+        return input_cost + output_cost
+    @abstractmethod
+    def get_default_pricing(self) -> Tuple[float, float]:
+        """
+        Get default fallback pricing for unknown models.
+        Returns:
+            Tuple of (input_price, output_price) per 1M tokens in USD
+        """
+        pass
+    def matches_model(self, model: Optional[str]) -> bool:
+        """
+        Check if a model name belongs to this provider.
+        Default implementation checks if model starts with common prefixes.
+        Override for custom logic.
+        Args:
+            model: Model name string
+        Returns:
+            True if this provider handles the model
+        """
+        if not model:
+            return False
+        model_lower = model.lower()
+        return any(model_lower.startswith(prefix) for prefix in self.get_model_prefixes())
+    @abstractmethod
+    def get_model_prefixes(self) -> list[str]:
+        """
+        Get list of model name prefixes for this provider.
+        Returns:
+            List of lowercase prefixes (e.g., ['gpt-', 'o1-'])
+        """
+        pass
+    def matches_response(self, response: Any) -> bool:
+        """
+        Check if a response object comes from this provider.
+        Default implementation tries to extract model and check prefixes.
+        Override for custom detection logic (e.g., checking response structure).
+        Args:
+            response: Raw API response object
+        Returns:
+            True if this provider handles the response
+        """
+        model = self.extract_model(response)
+        return self.matches_model(model)