PyPI - llm-cost-guard - Versions diffs - 0.1.0__py3-none-any.whl - Mend

llm-cost-guard 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

llm_cost_guard/__init__.py +39 -0
llm_cost_guard/backends/__init__.py +52 -0
llm_cost_guard/backends/base.py +121 -0
llm_cost_guard/backends/memory.py +265 -0
llm_cost_guard/backends/sqlite.py +425 -0
llm_cost_guard/budget.py +306 -0
llm_cost_guard/cli.py +464 -0
llm_cost_guard/clients/__init__.py +11 -0
llm_cost_guard/clients/anthropic.py +231 -0
llm_cost_guard/clients/openai.py +262 -0
llm_cost_guard/exceptions.py +71 -0
llm_cost_guard/integrations/__init__.py +12 -0
llm_cost_guard/integrations/cache.py +189 -0
llm_cost_guard/integrations/langchain.py +257 -0
llm_cost_guard/models.py +123 -0
llm_cost_guard/pricing/__init__.py +7 -0
llm_cost_guard/pricing/anthropic.yaml +88 -0
llm_cost_guard/pricing/bedrock.yaml +215 -0
llm_cost_guard/pricing/loader.py +221 -0
llm_cost_guard/pricing/openai.yaml +148 -0
llm_cost_guard/pricing/vertex.yaml +133 -0
llm_cost_guard/providers/__init__.py +69 -0
llm_cost_guard/providers/anthropic.py +115 -0
llm_cost_guard/providers/base.py +72 -0
llm_cost_guard/providers/bedrock.py +135 -0
llm_cost_guard/providers/openai.py +110 -0
llm_cost_guard/rate_limit.py +233 -0
llm_cost_guard/span.py +143 -0
llm_cost_guard/tokenizers/__init__.py +7 -0
llm_cost_guard/tokenizers/base.py +207 -0
llm_cost_guard/tracker.py +718 -0
llm_cost_guard-0.1.0.dist-info/METADATA +357 -0
llm_cost_guard-0.1.0.dist-info/RECORD +36 -0
llm_cost_guard-0.1.0.dist-info/WHEEL +4 -0
llm_cost_guard-0.1.0.dist-info/entry_points.txt +2 -0
llm_cost_guard-0.1.0.dist-info/licenses/LICENSE +21 -0

llm_cost_guard/cli.py ADDED Viewed

@@ -0,0 +1,464 @@
+"""
+Command-line interface for LLM Cost Guard.
+"""
+import argparse
+import json
+import sys
+from datetime import datetime, timedelta
+from typing import Any, Dict, Optional
+from llm_cost_guard import CostTracker
+from llm_cost_guard.pricing.loader import PricingLoader
+def create_parser() -> argparse.ArgumentParser:
+    """Create the argument parser."""
+    parser = argparse.ArgumentParser(
+        prog="llm-cost-guard",
+        description="LLM Cost Guard - Real-time cost tracking and budget enforcement for LLM applications",
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+    # status command
+    status_parser = subparsers.add_parser("status", help="View current costs and budget status")
+    status_parser.add_argument(
+        "--backend",
+        default="memory",
+        help="Backend URL (default: memory)",
+    )
+    # health command
+    health_parser = subparsers.add_parser("health", help="Check tracker health")
+    health_parser.add_argument(
+        "--backend",
+        default="memory",
+        help="Backend URL (default: memory)",
+    )
+    # report command
+    report_parser = subparsers.add_parser("report", help="Generate cost report")
+    report_parser.add_argument(
+        "--period",
+        choices=["day", "week", "month"],
+        default="day",
+        help="Report period (default: day)",
+    )
+    report_parser.add_argument(
+        "--group-by",
+        nargs="+",
+        help="Group by fields (e.g., --group-by model provider)",
+    )
+    report_parser.add_argument(
+        "--format",
+        choices=["text", "json", "csv"],
+        default="text",
+        help="Output format (default: text)",
+    )
+    report_parser.add_argument(
+        "--backend",
+        default="memory",
+        help="Backend URL (default: memory)",
+    )
+    # pricing-status command
+    pricing_parser = subparsers.add_parser("pricing-status", help="Check pricing data status")
+    # update-pricing command
+    update_parser = subparsers.add_parser("update-pricing", help="Update pricing data")
+    # export command
+    export_parser = subparsers.add_parser("export", help="Export cost data")
+    export_parser.add_argument(
+        "--format",
+        choices=["json", "csv"],
+        default="json",
+        help="Export format (default: json)",
+    )
+    export_parser.add_argument(
+        "--output",
+        "-o",
+        help="Output file path (default: stdout)",
+    )
+    export_parser.add_argument(
+        "--start-date",
+        help="Start date (ISO format)",
+    )
+    export_parser.add_argument(
+        "--end-date",
+        help="End date (ISO format)",
+    )
+    export_parser.add_argument(
+        "--backend",
+        default="memory",
+        help="Backend URL (default: memory)",
+    )
+    # validate-config command
+    validate_parser = subparsers.add_parser("validate-config", help="Validate configuration")
+    validate_parser.add_argument(
+        "--config",
+        "-c",
+        help="Configuration file path",
+    )
+    # models command
+    models_parser = subparsers.add_parser("models", help="List supported models and pricing")
+    models_parser.add_argument(
+        "--provider",
+        help="Filter by provider (e.g., openai, anthropic, bedrock)",
+    )
+    return parser
+def cmd_status(args: argparse.Namespace) -> int:
+    """Handle the status command."""
+    tracker = CostTracker(backend=args.backend)
+    try:
+        # Get today's report
+        report = tracker.daily_report()
+        print("=" * 50)
+        print("LLM Cost Guard - Status")
+        print("=" * 50)
+        print()
+        print(f"Today's Summary ({datetime.now().strftime('%Y-%m-%d')}):")
+        print(f"  Total Cost:     ${report.total_cost:.4f}")
+        print(f"  Total Calls:    {report.total_calls}")
+        print(f"  Input Tokens:   {report.total_input_tokens:,}")
+        print(f"  Output Tokens:  {report.total_output_tokens:,}")
+        print(f"  Success Rate:   {report.successful_calls / max(1, report.total_calls) * 100:.1f}%")
+        if report.cache_hits > 0:
+            print(f"  Cache Hits:     {report.cache_hits}")
+            print(f"  Cache Savings:  ${report.cache_savings:.4f}")
+        print()
+        # Check health
+        health = tracker.health_check()
+        print("Health Status:")
+        print(f"  Backend:   {'✓ Connected' if health.backend_connected else '✗ Disconnected'}")
+        print(f"  Pricing:   {'✓ Fresh' if health.pricing_fresh else '⚠ Stale'}")
+        if health.errors:
+            print("  Errors:")
+            for error in health.errors:
+                print(f"    - {error}")
+        return 0
+    finally:
+        tracker.close()
+def cmd_health(args: argparse.Namespace) -> int:
+    """Handle the health command."""
+    tracker = CostTracker(backend=args.backend)
+    try:
+        health = tracker.health_check()
+        print("LLM Cost Guard - Health Check")
+        print("-" * 40)
+        print(f"Overall:          {'✓ Healthy' if health.healthy else '✗ Unhealthy'}")
+        print(f"Backend:          {'✓ Connected' if health.backend_connected else '✗ Disconnected'}")
+        print(f"Pricing:          {'✓ Fresh' if health.pricing_fresh else '⚠ Stale'}")
+        if health.last_record_time:
+            print(f"Last Record:      {health.last_record_time.isoformat()}")
+        if health.pricing_last_updated:
+            print(f"Pricing Updated:  {health.pricing_last_updated.isoformat()}")
+        if health.errors:
+            print("\nErrors:")
+            for error in health.errors:
+                print(f"  - {error}")
+        return 0 if health.healthy else 1
+    finally:
+        tracker.close()
+def cmd_report(args: argparse.Namespace) -> int:
+    """Handle the report command."""
+    tracker = CostTracker(backend=args.backend)
+    try:
+        # Get period start
+        now = datetime.now()
+        if args.period == "day":
+            start = now.replace(hour=0, minute=0, second=0, microsecond=0)
+        elif args.period == "week":
+            start = now.replace(hour=0, minute=0, second=0, microsecond=0)
+            start = start - timedelta(days=now.weekday())
+        else:  # month
+            start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
+        report = tracker.get_costs(
+            start_date=start.isoformat(),
+            group_by=args.group_by,
+        )
+        if args.format == "json":
+            output = {
+                "period": args.period,
+                "start_date": start.isoformat(),
+                "end_date": now.isoformat(),
+                "total_cost": report.total_cost,
+                "total_calls": report.total_calls,
+                "total_input_tokens": report.total_input_tokens,
+                "total_output_tokens": report.total_output_tokens,
+                "successful_calls": report.successful_calls,
+                "failed_calls": report.failed_calls,
+            }
+            if report.grouped_data:
+                output["groups"] = report.grouped_data.get("groups", [])
+            print(json.dumps(output, indent=2))
+        elif args.format == "csv":
+            if report.grouped_data and report.grouped_data.get("groups"):
+                groups = report.grouped_data["groups"]
+                if groups:
+                    # Print header
+                    headers = list(groups[0].keys())
+                    print(",".join(headers))
+                    # Print rows
+                    for group in groups:
+                        print(",".join(str(group.get(h, "")) for h in headers))
+            else:
+                print("date,cost,calls,input_tokens,output_tokens")
+                print(
+                    f"{start.strftime('%Y-%m-%d')},{report.total_cost:.4f},"
+                    f"{report.total_calls},{report.total_input_tokens},{report.total_output_tokens}"
+                )
+        else:  # text
+            print(f"\nCost Report - {args.period.title()}")
+            print(f"Period: {start.strftime('%Y-%m-%d')} to {now.strftime('%Y-%m-%d')}")
+            print("=" * 60)
+            print(f"Total Cost:       ${report.total_cost:.4f}")
+            print(f"Total Calls:      {report.total_calls}")
+            print(f"Input Tokens:     {report.total_input_tokens:,}")
+            print(f"Output Tokens:    {report.total_output_tokens:,}")
+            print(f"Successful:       {report.successful_calls}")
+            print(f"Failed:           {report.failed_calls}")
+            if report.grouped_data and report.grouped_data.get("groups"):
+                print(f"\nBy {', '.join(args.group_by or [])}:")
+                print("-" * 60)
+                for group in report.grouped_data["groups"]:
+                    # Build label from group keys
+                    label_parts = []
+                    for key in args.group_by or []:
+                        if key in group:
+                            label_parts.append(f"{key}={group[key]}")
+                    label = ", ".join(label_parts)
+                    cost = group.get("cost", 0)
+                    calls = group.get("calls", 0)
+                    print(f"  {label}: ${cost:.4f} ({calls} calls)")
+        return 0
+    finally:
+        tracker.close()
+def cmd_pricing_status(args: argparse.Namespace) -> int:
+    """Handle the pricing-status command."""
+    loader = PricingLoader()
+    print("LLM Cost Guard - Pricing Status")
+    print("-" * 40)
+    if loader.last_updated:
+        print(f"Last Updated:  {loader.last_updated.isoformat()}")
+    else:
+        print("Last Updated:  Never")
+    print(f"Stale:         {'Yes ⚠' if loader.is_stale else 'No ✓'}")
+    print(f"Very Stale:    {'Yes ✗' if loader.is_very_stale else 'No ✓'}")
+    print("\nProvider Versions:")
+    for provider, version in loader.pricing_version.items():
+        print(f"  {provider}: {version}")
+    return 0
+def cmd_update_pricing(args: argparse.Namespace) -> int:
+    """Handle the update-pricing command."""
+    print("Refreshing pricing data from local files...")
+    loader = PricingLoader()
+    loader.refresh()
+    print("✓ Pricing data refreshed")
+    print(f"  Last updated: {loader.last_updated.isoformat() if loader.last_updated else 'Never'}")
+    return 0
+def cmd_export(args: argparse.Namespace) -> int:
+    """Handle the export command."""
+    tracker = CostTracker(backend=args.backend)
+    try:
+        start = datetime.fromisoformat(args.start_date) if args.start_date else None
+        end = datetime.fromisoformat(args.end_date) if args.end_date else None
+        records = tracker._backend.get_records(start_date=start, end_date=end)
+        if args.format == "json":
+            data = []
+            for r in records:
+                data.append(
+                    {
+                        "timestamp": r.timestamp.isoformat(),
+                        "provider": r.provider,
+                        "model": r.model,
+                        "input_tokens": r.input_tokens,
+                        "output_tokens": r.output_tokens,
+                        "input_cost": r.input_cost,
+                        "output_cost": r.output_cost,
+                        "total_cost": r.total_cost,
+                        "latency_ms": r.latency_ms,
+                        "success": r.success,
+                        "error_type": r.error_type,
+                        "cached": r.cached,
+                        "tags": r.tags,
+                    }
+                )
+            output = json.dumps(data, indent=2)
+        else:  # csv
+            lines = [
+                "timestamp,provider,model,input_tokens,output_tokens,total_cost,latency_ms,success"
+            ]
+            for r in records:
+                lines.append(
+                    f"{r.timestamp.isoformat()},{r.provider},{r.model},"
+                    f"{r.input_tokens},{r.output_tokens},{r.total_cost:.6f},"
+                    f"{r.latency_ms},{r.success}"
+                )
+            output = "\n".join(lines)
+        if args.output:
+            with open(args.output, "w") as f:
+                f.write(output)
+            print(f"Exported {len(records)} records to {args.output}")
+        else:
+            print(output)
+        return 0
+    finally:
+        tracker.close()
+def cmd_validate_config(args: argparse.Namespace) -> int:
+    """Handle the validate-config command."""
+    if not args.config:
+        print("No configuration file specified.")
+        print("Use --config to specify a configuration file.")
+        return 1
+    try:
+        import yaml
+        with open(args.config, "r") as f:
+            config = yaml.safe_load(f)
+        print(f"Configuration file: {args.config}")
+        print("-" * 40)
+        # Validate budgets
+        if "budgets" in config:
+            print(f"✓ Found {len(config['budgets'])} budget(s)")
+            for budget in config["budgets"]:
+                if "name" not in budget:
+                    print("  ✗ Budget missing 'name' field")
+                if "limit" not in budget:
+                    print(f"  ✗ Budget '{budget.get('name', 'unknown')}' missing 'limit' field")
+        # Validate rate limits
+        if "rate_limits" in config:
+            print(f"✓ Found {len(config['rate_limits'])} rate limit(s)")
+        print("\n✓ Configuration is valid")
+        return 0
+    except FileNotFoundError:
+        print(f"✗ Configuration file not found: {args.config}")
+        return 1
+    except yaml.YAMLError as e:
+        print(f"✗ Invalid YAML: {e}")
+        return 1
+    except Exception as e:
+        print(f"✗ Error validating configuration: {e}")
+        return 1
+def cmd_models(args: argparse.Namespace) -> int:
+    """Handle the models command."""
+    loader = PricingLoader()
+    models = loader.get_all_models(args.provider)
+    print("Supported Models and Pricing")
+    print("=" * 70)
+    for provider, model_list in sorted(models.items()):
+        print(f"\n{provider.upper()}")
+        print("-" * 70)
+        for model in sorted(model_list):
+            try:
+                pricing = loader.get_pricing(provider, model)
+                print(
+                    f"  {model:40} "
+                    f"Input: ${pricing.input_cost_per_1k:.6f}/1K  "
+                    f"Output: ${pricing.output_cost_per_1k:.6f}/1K"
+                )
+            except Exception:
+                print(f"  {model:40} (pricing unavailable)")
+    return 0
+def main() -> int:
+    """Main entry point for the CLI."""
+    parser = create_parser()
+    args = parser.parse_args()
+    if not args.command:
+        parser.print_help()
+        return 0
+    commands = {
+        "status": cmd_status,
+        "health": cmd_health,
+        "report": cmd_report,
+        "pricing-status": cmd_pricing_status,
+        "update-pricing": cmd_update_pricing,
+        "export": cmd_export,
+        "validate-config": cmd_validate_config,
+        "models": cmd_models,
+    }
+    handler = commands.get(args.command)
+    if handler:
+        return handler(args)
+    else:
+        print(f"Unknown command: {args.command}")
+        return 1
+if __name__ == "__main__":
+    sys.exit(main())

llm_cost_guard/clients/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""
+Wrapped LLM clients with automatic cost tracking.
+"""
+from llm_cost_guard.clients.openai import TrackedOpenAI
+from llm_cost_guard.clients.anthropic import TrackedAnthropic
+__all__ = [
+    "TrackedOpenAI",
+    "TrackedAnthropic",
+]

llm_cost_guard/clients/anthropic.py ADDED Viewed

@@ -0,0 +1,231 @@
+"""
+Wrapped Anthropic client with automatic cost tracking.
+"""
+import time
+from typing import Any, Dict, Optional, TYPE_CHECKING
+if TYPE_CHECKING:
+    from llm_cost_guard import CostTracker
+class TrackedAnthropic:
+    """
+    Anthropic client wrapper with automatic cost tracking.
+    Usage:
+        from llm_cost_guard import CostTracker
+        from llm_cost_guard.clients import TrackedAnthropic
+        tracker = CostTracker()
+        client = TrackedAnthropic(tracker=tracker)
+        response = client.messages.create(
+            model="claude-3-5-sonnet-20241022",
+            messages=[{"role": "user", "content": "Hello!"}]
+        )
+        # Cost is automatically tracked
+    """
+    def __init__(
+        self,
+        tracker: "CostTracker",
+        client: Optional[Any] = None,
+        tags: Optional[Dict[str, str]] = None,
+        **anthropic_kwargs: Any,
+    ):
+        """
+        Initialize the tracked Anthropic client.
+        Args:
+            tracker: CostTracker instance
+            client: Optional existing Anthropic client to wrap
+            tags: Default tags for all calls
+            **anthropic_kwargs: Arguments to pass to Anthropic client
+        """
+        try:
+            from anthropic import Anthropic
+        except ImportError:
+            raise ImportError(
+                "Anthropic is required for this client. "
+                "Install with: pip install llm-cost-guard[anthropic]"
+            )
+        self._tracker = tracker
+        self._default_tags = tags or {}
+        self._client = client or Anthropic(**anthropic_kwargs)
+        # Create wrapped interface
+        self.messages = _TrackedMessages(self._client.messages, self._tracker, self._default_tags)
+    def close(self) -> None:
+        """Close the client."""
+        self._client.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        self.close()
+class _TrackedMessages:
+    """Wrapped messages API."""
+    def __init__(self, messages, tracker: "CostTracker", default_tags: Dict[str, str]):
+        self._messages = messages
+        self._tracker = tracker
+        self._default_tags = default_tags
+    def create(
+        self,
+        *,
+        tags: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Create a message with tracking."""
+        start_time = time.time()
+        success = True
+        error_type = None
+        response = None
+        try:
+            response = self._messages.create(**kwargs)
+            return response
+        except Exception as e:
+            success = False
+            error_type = type(e).__name__
+            raise
+        finally:
+            latency_ms = int((time.time() - start_time) * 1000)
+            if response is not None:
+                self._record_response(
+                    response, kwargs.get("model"), tags, success, error_type, latency_ms
+                )
+    def _record_response(
+        self,
+        response: Any,
+        model_hint: Optional[str],
+        tags: Optional[Dict[str, str]],
+        success: bool,
+        error_type: Optional[str],
+        latency_ms: int,
+    ) -> None:
+        """Record the response with the tracker."""
+        from llm_cost_guard.providers.anthropic import AnthropicProvider
+        provider = AnthropicProvider()
+        usage = provider.extract_usage(response)
+        model = provider.extract_model(response)
+        if model == "unknown" and model_hint:
+            model = model_hint
+        all_tags = dict(self._default_tags)
+        if tags:
+            all_tags.update(tags)
+        self._tracker.record(
+            provider="anthropic",
+            model=model,
+            input_tokens=usage.input_tokens,
+            output_tokens=usage.output_tokens,
+            tags=all_tags,
+            success=success,
+            error_type=error_type,
+            latency_ms=latency_ms,
+            cached_tokens=usage.cached_tokens,
+        )
+    def stream(
+        self,
+        *,
+        tags: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Create a streaming message with tracking."""
+        # For streaming, we wrap the stream to track after completion
+        start_time = time.time()
+        stream = self._messages.stream(**kwargs)
+        # Wrap in a tracking context
+        return _TrackedStream(
+            stream,
+            self._tracker,
+            self._default_tags,
+            tags,
+            kwargs.get("model"),
+            start_time,
+        )
+class _TrackedStream:
+    """Wrapper for streaming responses."""
+    def __init__(
+        self,
+        stream: Any,
+        tracker: "CostTracker",
+        default_tags: Dict[str, str],
+        tags: Optional[Dict[str, str]],
+        model_hint: Optional[str],
+        start_time: float,
+    ):
+        self._stream = stream
+        self._tracker = tracker
+        self._default_tags = default_tags
+        self._tags = tags
+        self._model_hint = model_hint
+        self._start_time = start_time
+        self._input_tokens = 0
+        self._output_tokens = 0
+        self._model = "unknown"
+    def __enter__(self):
+        self._stream.__enter__()
+        return self
+    def __exit__(self, *args):
+        self._stream.__exit__(*args)
+        # Record the call
+        latency_ms = int((time.time() - self._start_time) * 1000)
+        all_tags = dict(self._default_tags)
+        if self._tags:
+            all_tags.update(self._tags)
+        model = self._model if self._model != "unknown" else (self._model_hint or "unknown")
+        self._tracker.record(
+            provider="anthropic",
+            model=model,
+            input_tokens=self._input_tokens,
+            output_tokens=self._output_tokens,
+            tags=all_tags,
+            success=True,
+            latency_ms=latency_ms,
+        )
+    def __iter__(self):
+        for event in self._stream:
+            # Track usage from events
+            self._handle_event(event)
+            yield event
+    def _handle_event(self, event: Any) -> None:
+        """Extract usage info from streaming event."""
+        event_type = getattr(event, "type", "")
+        if event_type == "message_start":
+            if hasattr(event, "message"):
+                self._model = getattr(event.message, "model", self._model)
+                if hasattr(event.message, "usage"):
+                    self._input_tokens = getattr(event.message.usage, "input_tokens", 0)
+        elif event_type == "message_delta":
+            if hasattr(event, "usage"):
+                self._output_tokens = getattr(event.usage, "output_tokens", 0)