PyPI - llm-cost-guard - Versions diffs - 0.1.0__py3-none-any.whl - Mend

llm-cost-guard 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

llm_cost_guard/__init__.py +39 -0
llm_cost_guard/backends/__init__.py +52 -0
llm_cost_guard/backends/base.py +121 -0
llm_cost_guard/backends/memory.py +265 -0
llm_cost_guard/backends/sqlite.py +425 -0
llm_cost_guard/budget.py +306 -0
llm_cost_guard/cli.py +464 -0
llm_cost_guard/clients/__init__.py +11 -0
llm_cost_guard/clients/anthropic.py +231 -0
llm_cost_guard/clients/openai.py +262 -0
llm_cost_guard/exceptions.py +71 -0
llm_cost_guard/integrations/__init__.py +12 -0
llm_cost_guard/integrations/cache.py +189 -0
llm_cost_guard/integrations/langchain.py +257 -0
llm_cost_guard/models.py +123 -0
llm_cost_guard/pricing/__init__.py +7 -0
llm_cost_guard/pricing/anthropic.yaml +88 -0
llm_cost_guard/pricing/bedrock.yaml +215 -0
llm_cost_guard/pricing/loader.py +221 -0
llm_cost_guard/pricing/openai.yaml +148 -0
llm_cost_guard/pricing/vertex.yaml +133 -0
llm_cost_guard/providers/__init__.py +69 -0
llm_cost_guard/providers/anthropic.py +115 -0
llm_cost_guard/providers/base.py +72 -0
llm_cost_guard/providers/bedrock.py +135 -0
llm_cost_guard/providers/openai.py +110 -0
llm_cost_guard/rate_limit.py +233 -0
llm_cost_guard/span.py +143 -0
llm_cost_guard/tokenizers/__init__.py +7 -0
llm_cost_guard/tokenizers/base.py +207 -0
llm_cost_guard/tracker.py +718 -0
llm_cost_guard-0.1.0.dist-info/METADATA +357 -0
llm_cost_guard-0.1.0.dist-info/RECORD +36 -0
llm_cost_guard-0.1.0.dist-info/WHEEL +4 -0
llm_cost_guard-0.1.0.dist-info/entry_points.txt +2 -0
llm_cost_guard-0.1.0.dist-info/licenses/LICENSE +21 -0

llm_cost_guard/clients/openai.py ADDED Viewed

@@ -0,0 +1,262 @@
+"""
+Wrapped OpenAI client with automatic cost tracking.
+"""
+import time
+from typing import Any, Dict, Optional, TYPE_CHECKING
+if TYPE_CHECKING:
+    from llm_cost_guard import CostTracker
+class TrackedOpenAI:
+    """
+    OpenAI client wrapper with automatic cost tracking.
+    Usage:
+        from llm_cost_guard import CostTracker
+        from llm_cost_guard.clients import TrackedOpenAI
+        tracker = CostTracker()
+        client = TrackedOpenAI(tracker=tracker)
+        response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[{"role": "user", "content": "Hello!"}]
+        )
+        # Cost is automatically tracked
+    """
+    def __init__(
+        self,
+        tracker: "CostTracker",
+        client: Optional[Any] = None,
+        tags: Optional[Dict[str, str]] = None,
+        **openai_kwargs: Any,
+    ):
+        """
+        Initialize the tracked OpenAI client.
+        Args:
+            tracker: CostTracker instance
+            client: Optional existing OpenAI client to wrap
+            tags: Default tags for all calls
+            **openai_kwargs: Arguments to pass to OpenAI client
+        """
+        try:
+            from openai import OpenAI
+        except ImportError:
+            raise ImportError(
+                "OpenAI is required for this client. Install with: pip install openai"
+            )
+        self._tracker = tracker
+        self._default_tags = tags or {}
+        self._client = client or OpenAI(**openai_kwargs)
+        # Create wrapped interface
+        self.chat = _TrackedChat(self._client.chat, self._tracker, self._default_tags)
+        self.completions = _TrackedCompletions(
+            self._client.completions, self._tracker, self._default_tags
+        )
+        self.embeddings = _TrackedEmbeddings(
+            self._client.embeddings, self._tracker, self._default_tags
+        )
+    @property
+    def models(self):
+        """Access the models API."""
+        return self._client.models
+    def close(self) -> None:
+        """Close the client."""
+        self._client.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        self.close()
+class _TrackedChat:
+    """Wrapped chat completions API."""
+    def __init__(self, chat, tracker: "CostTracker", default_tags: Dict[str, str]):
+        self._chat = chat
+        self._tracker = tracker
+        self._default_tags = default_tags
+        self.completions = _TrackedChatCompletions(
+            chat.completions, tracker, default_tags
+        )
+class _TrackedChatCompletions:
+    """Wrapped chat.completions API."""
+    def __init__(self, completions, tracker: "CostTracker", default_tags: Dict[str, str]):
+        self._completions = completions
+        self._tracker = tracker
+        self._default_tags = default_tags
+    def create(
+        self,
+        *,
+        tags: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Create a chat completion with tracking."""
+        start_time = time.time()
+        success = True
+        error_type = None
+        response = None
+        try:
+            response = self._completions.create(**kwargs)
+            return response
+        except Exception as e:
+            success = False
+            error_type = type(e).__name__
+            raise
+        finally:
+            latency_ms = int((time.time() - start_time) * 1000)
+            if response is not None:
+                self._record_response(response, tags, success, error_type, latency_ms)
+    def _record_response(
+        self,
+        response: Any,
+        tags: Optional[Dict[str, str]],
+        success: bool,
+        error_type: Optional[str],
+        latency_ms: int,
+    ) -> None:
+        """Record the response with the tracker."""
+        from llm_cost_guard.providers.openai import OpenAIProvider
+        provider = OpenAIProvider()
+        usage = provider.extract_usage(response)
+        model = provider.extract_model(response)
+        all_tags = dict(self._default_tags)
+        if tags:
+            all_tags.update(tags)
+        self._tracker.record(
+            provider="openai",
+            model=model,
+            input_tokens=usage.input_tokens,
+            output_tokens=usage.output_tokens,
+            tags=all_tags,
+            success=success,
+            error_type=error_type,
+            latency_ms=latency_ms,
+            cached_tokens=usage.cached_tokens,
+        )
+class _TrackedCompletions:
+    """Wrapped completions API (legacy)."""
+    def __init__(self, completions, tracker: "CostTracker", default_tags: Dict[str, str]):
+        self._completions = completions
+        self._tracker = tracker
+        self._default_tags = default_tags
+    def create(
+        self,
+        *,
+        tags: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Create a completion with tracking."""
+        start_time = time.time()
+        success = True
+        error_type = None
+        response = None
+        try:
+            response = self._completions.create(**kwargs)
+            return response
+        except Exception as e:
+            success = False
+            error_type = type(e).__name__
+            raise
+        finally:
+            latency_ms = int((time.time() - start_time) * 1000)
+            if response is not None:
+                from llm_cost_guard.providers.openai import OpenAIProvider
+                provider = OpenAIProvider()
+                usage = provider.extract_usage(response)
+                model = provider.extract_model(response)
+                all_tags = dict(self._default_tags)
+                if tags:
+                    all_tags.update(tags)
+                self._tracker.record(
+                    provider="openai",
+                    model=model,
+                    input_tokens=usage.input_tokens,
+                    output_tokens=usage.output_tokens,
+                    tags=all_tags,
+                    success=success,
+                    error_type=error_type,
+                    latency_ms=latency_ms,
+                )
+class _TrackedEmbeddings:
+    """Wrapped embeddings API."""
+    def __init__(self, embeddings, tracker: "CostTracker", default_tags: Dict[str, str]):
+        self._embeddings = embeddings
+        self._tracker = tracker
+        self._default_tags = default_tags
+    def create(
+        self,
+        *,
+        tags: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Create embeddings with tracking."""
+        start_time = time.time()
+        success = True
+        error_type = None
+        response = None
+        try:
+            response = self._embeddings.create(**kwargs)
+            return response
+        except Exception as e:
+            success = False
+            error_type = type(e).__name__
+            raise
+        finally:
+            latency_ms = int((time.time() - start_time) * 1000)
+            if response is not None:
+                from llm_cost_guard.providers.openai import OpenAIProvider
+                provider = OpenAIProvider()
+                usage = provider.extract_usage(response)
+                model = kwargs.get("model", "unknown")
+                all_tags = dict(self._default_tags)
+                if tags:
+                    all_tags.update(tags)
+                self._tracker.record(
+                    provider="openai",
+                    model=model,
+                    input_tokens=usage.input_tokens,
+                    output_tokens=0,  # Embeddings don't have output tokens
+                    tags=all_tags,
+                    success=success,
+                    error_type=error_type,
+                    latency_ms=latency_ms,
+                )

llm_cost_guard/exceptions.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""
+Custom exceptions for LLM Cost Guard.
+"""
+from typing import TYPE_CHECKING, Optional
+if TYPE_CHECKING:
+    from llm_cost_guard.budget import Budget
+class LLMCostGuardError(Exception):
+    """Base exception for LLM Cost Guard."""
+    pass
+class BudgetExceededError(LLMCostGuardError):
+    """Raised when a budget limit is exceeded."""
+    def __init__(
+        self,
+        message: str,
+        budget: Optional["Budget"] = None,
+        current: float = 0.0,
+        limit: float = 0.0,
+    ):
+        super().__init__(message)
+        self.budget = budget
+        self.current = current
+        self.limit = limit
+class PricingNotFoundError(LLMCostGuardError):
+    """Raised when pricing information for a model is not found."""
+    def __init__(self, message: str, provider: str = "", model: str = ""):
+        super().__init__(message)
+        self.provider = provider
+        self.model = model
+class TokenCountError(LLMCostGuardError):
+    """Raised when token counting fails."""
+    pass
+class TrackingUnavailableError(LLMCostGuardError):
+    """Raised when the tracking backend is unavailable."""
+    def __init__(self, message: str, backend: str = ""):
+        super().__init__(message)
+        self.backend = backend
+class RateLimitExceededError(LLMCostGuardError):
+    """Raised when a rate limit is exceeded."""
+    def __init__(
+        self,
+        message: str,
+        limit_name: str = "",
+        current: int = 0,
+        limit: int = 0,
+        retry_after_seconds: Optional[float] = None,
+    ):
+        super().__init__(message)
+        self.limit_name = limit_name
+        self.current = current
+        self.limit = limit
+        self.retry_after_seconds = retry_after_seconds

llm_cost_guard/integrations/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""
+Integrations with external tools and frameworks.
+"""
+from llm_cost_guard.integrations.langchain import CostTrackingCallback, track_chain
+from llm_cost_guard.integrations.cache import CacheTracker
+__all__ = [
+    "CostTrackingCallback",
+    "track_chain",
+    "CacheTracker",
+]

llm_cost_guard/integrations/cache.py ADDED Viewed

@@ -0,0 +1,189 @@
+"""
+Cache integration for LLM Cost Guard.
+"""
+import functools
+import logging
+from typing import Any, Callable, Dict, Optional, TypeVar
+logger = logging.getLogger(__name__)
+F = TypeVar("F", bound=Callable[..., Any])
+class CacheTracker:
+    """
+    Tracks cache hits and savings for cached LLM calls.
+    Usage:
+        from llm_cost_guard import CostTracker
+        from llm_cost_guard.integrations.cache import CacheTracker
+        tracker = CostTracker()
+        cache_tracker = CacheTracker(tracker)
+        @cache_tracker.track
+        @your_cache_decorator
+        def cached_llm_call(prompt):
+            return llm.invoke(prompt)
+    """
+    def __init__(
+        self,
+        tracker: Any,  # CostTracker
+        default_tags: Optional[Dict[str, str]] = None,
+    ):
+        """
+        Initialize the cache tracker.
+        Args:
+            tracker: CostTracker instance
+            default_tags: Default tags to apply to all tracked calls
+        """
+        self._tracker = tracker
+        self._default_tags = default_tags or {}
+        self._cache_hits = 0
+        self._cache_misses = 0
+        self._estimated_savings = 0.0
+    def track(
+        self,
+        func: Optional[F] = None,
+        *,
+        tags: Optional[Dict[str, str]] = None,
+        cache_indicator: str = "_from_cache",
+    ) -> F:
+        """
+        Decorator to track cache hits and savings.
+        The decorated function should set a `_from_cache` attribute on
+        the result if it came from cache, or return a tuple (result, from_cache).
+        Args:
+            func: Function to decorate
+            tags: Additional tags
+            cache_indicator: Attribute name to check for cache hit
+        Returns:
+            Decorated function
+        """
+        def decorator(f: F) -> F:
+            @functools.wraps(f)
+            def wrapper(*args: Any, **kwargs: Any) -> Any:
+                result = f(*args, **kwargs)
+                # Check if result came from cache
+                from_cache = False
+                # Check for tuple return
+                if isinstance(result, tuple) and len(result) == 2:
+                    actual_result, from_cache = result
+                    result = actual_result
+                # Check for attribute on result
+                elif hasattr(result, cache_indicator):
+                    from_cache = getattr(result, cache_indicator, False)
+                # Update cache stats
+                if from_cache:
+                    self._cache_hits += 1
+                else:
+                    self._cache_misses += 1
+                return result
+            return wrapper  # type: ignore
+        if func is not None:
+            return decorator(func)
+        return decorator  # type: ignore
+    def record_cache_hit(
+        self,
+        estimated_cost: float,
+        provider: str = "unknown",
+        model: str = "unknown",
+        tags: Optional[Dict[str, str]] = None,
+    ) -> None:
+        """
+        Manually record a cache hit with estimated savings.
+        Args:
+            estimated_cost: Estimated cost that was saved
+            provider: Provider name
+            model: Model name
+            tags: Attribution tags
+        """
+        self._cache_hits += 1
+        self._estimated_savings += estimated_cost
+        # Record in the main tracker as a zero-cost call
+        all_tags = dict(self._default_tags)
+        if tags:
+            all_tags.update(tags)
+        all_tags["cache_hit"] = "true"
+        self._tracker.record(
+            provider=provider,
+            model=model,
+            input_tokens=0,
+            output_tokens=0,
+            tags=all_tags,
+            success=True,
+            metadata={"cache_savings": estimated_cost},
+        )
+    def record_cache_miss(
+        self,
+        provider: str = "unknown",
+        model: str = "unknown",
+        tags: Optional[Dict[str, str]] = None,
+    ) -> None:
+        """
+        Manually record a cache miss.
+        Args:
+            provider: Provider name
+            model: Model name
+            tags: Attribution tags
+        """
+        self._cache_misses += 1
+    @property
+    def cache_hits(self) -> int:
+        """Get total cache hits."""
+        return self._cache_hits
+    @property
+    def cache_misses(self) -> int:
+        """Get total cache misses."""
+        return self._cache_misses
+    @property
+    def cache_hit_rate(self) -> float:
+        """Get cache hit rate (0.0 to 1.0)."""
+        total = self._cache_hits + self._cache_misses
+        if total == 0:
+            return 0.0
+        return self._cache_hits / total
+    @property
+    def estimated_savings(self) -> float:
+        """Get estimated cost savings from cache hits."""
+        return self._estimated_savings
+    def reset(self) -> None:
+        """Reset cache statistics."""
+        self._cache_hits = 0
+        self._cache_misses = 0
+        self._estimated_savings = 0.0
+    def get_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        return {
+            "cache_hits": self._cache_hits,
+            "cache_misses": self._cache_misses,
+            "cache_hit_rate": self.cache_hit_rate,
+            "estimated_savings": self._estimated_savings,
+        }