llm-cost-guard 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_cost_guard/tracker.py CHANGED
@@ -23,6 +23,7 @@ from llm_cost_guard.pricing.loader import PricingLoader, get_pricing_loader
23
23
  from llm_cost_guard.providers import detect_provider, get_provider
24
24
  from llm_cost_guard.rate_limit import RateLimit, RateLimiter
25
25
  from llm_cost_guard.span import Span, get_current_span
26
+ from llm_cost_guard.audit import AuditLogger, AuditBackend, LoggingAuditBackend
26
27
 
27
28
  logger = logging.getLogger(__name__)
28
29
 
@@ -52,6 +53,8 @@ class CostTracker:
52
53
  budget_mode: Literal["local", "distributed"] = "local",
53
54
  streaming_budget_mode: Literal["estimate", "actual"] = "actual",
54
55
  streaming_max_output_estimate: int = 4096,
56
+ audit_enabled: bool = True,
57
+ audit_backend: Optional[AuditBackend] = None,
55
58
  **backend_kwargs: Any,
56
59
  ):
57
60
  """
@@ -72,6 +75,8 @@ class CostTracker:
72
75
  budget_mode: Budget enforcement mode (local or distributed)
73
76
  streaming_budget_mode: How to handle streaming budgets
74
77
  streaming_max_output_estimate: Max output tokens to estimate for streaming
78
+ audit_enabled: Enable audit logging for compliance
79
+ audit_backend: Custom audit backend (defaults to logging)
75
80
  """
76
81
  self._auto_detect_provider = auto_detect_provider
77
82
  self._on_tracking_failure = on_tracking_failure
@@ -83,27 +88,59 @@ class CostTracker:
83
88
  self._streaming_budget_mode = streaming_budget_mode
84
89
  self._streaming_max_output_estimate = streaming_max_output_estimate
85
90
 
91
+ # Graceful degradation metrics
92
+ self._metrics = {
93
+ "backend_failures": 0,
94
+ "fallback_activations": 0,
95
+ "budget_checks": 0,
96
+ "budget_exceeded_count": 0,
97
+ "rate_limit_exceeded_count": 0,
98
+ "tracking_errors": 0,
99
+ }
100
+ self._metrics_lock = threading.Lock()
101
+
102
+ # Initialize audit logging
103
+ self._audit = AuditLogger(
104
+ backend=audit_backend or LoggingAuditBackend(),
105
+ enabled=audit_enabled,
106
+ )
107
+
86
108
  # Initialize backend
87
109
  self._backend_url = backend
88
110
  self._fallback_backend: Optional[MemoryBackend] = None
111
+ self._using_fallback = False
89
112
  try:
90
113
  self._backend: Backend = get_backend(backend, **backend_kwargs)
91
114
  except Exception as e:
115
+ self._increment_metric("backend_failures")
92
116
  if on_tracking_failure == "block":
93
117
  raise TrackingUnavailableError(f"Failed to initialize backend: {e}", backend)
94
118
  elif on_tracking_failure == "fallback":
95
119
  logger.warning(f"Failed to initialize backend {backend}, using memory fallback: {e}")
96
120
  self._backend = MemoryBackend()
97
121
  self._fallback_backend = self._backend
122
+ self._using_fallback = True
123
+ self._increment_metric("fallback_activations")
124
+ self._audit.log_fallback_activated(backend, "memory", str(e))
98
125
  else:
99
126
  logger.warning(f"Failed to initialize backend {backend}: {e}")
100
127
  self._backend = MemoryBackend()
128
+ self._audit.log_tracking_failure(str(e), backend, "allow")
101
129
 
102
130
  # Initialize pricing
103
131
  self._pricing = PricingLoader(pricing_overrides=pricing_overrides)
104
132
 
105
133
  # Initialize budget tracking
106
134
  self._budget_tracker = BudgetTracker(budgets)
135
+
136
+ # Log budget creation for audit
137
+ for budget in (budgets or []):
138
+ self._audit.log_budget_created(
139
+ budget.name,
140
+ budget.limit,
141
+ budget.period,
142
+ budget.action.value,
143
+ )
107
144
 
108
145
  # Initialize rate limiting
109
146
  self._rate_limiter = RateLimiter(rate_limits)
@@ -115,6 +152,11 @@ class CostTracker:
115
152
  # Last call tracking
116
153
  self._last_record: Optional[CostRecord] = None
117
154
  self._lock = threading.Lock()
155
+
156
+ def _increment_metric(self, metric: str, amount: int = 1) -> None:
157
+ """Thread-safe metric increment."""
158
+ with self._metrics_lock:
159
+ self._metrics[metric] = self._metrics.get(metric, 0) + amount
118
160
 
119
161
  def track(
120
162
  self,
@@ -344,13 +386,31 @@ class CostTracker:
344
386
  )
345
387
 
346
388
  # Check budgets
389
+ self._increment_metric("budget_checks")
347
390
  exceeded = self._budget_tracker.check_budget(total_cost, tags)
348
391
  for budget, action in exceeded:
349
- if action == BudgetAction.BLOCK:
392
+ current_spending = self._budget_tracker.get_spending(budget.name)
393
+
394
+ # Log to audit
395
+ if action == BudgetAction.WARN:
396
+ self._audit.log_budget_warning(
397
+ budget.name,
398
+ current_spending,
399
+ budget.limit,
400
+ current_spending / budget.limit,
401
+ )
402
+ elif action == BudgetAction.BLOCK:
403
+ self._increment_metric("budget_exceeded_count")
404
+ self._audit.log_budget_exceeded(
405
+ budget.name,
406
+ current_spending,
407
+ budget.limit,
408
+ "blocked",
409
+ )
350
410
  raise BudgetExceededError(
351
411
  f"Budget '{budget.name}' would be exceeded",
352
412
  budget=budget,
353
- current=self._budget_tracker.get_spending(budget.name),
413
+ current=current_spending,
354
414
  limit=budget.limit,
355
415
  )
356
416
 
@@ -358,6 +418,13 @@ class CostTracker:
358
418
  rate_exceeded = self._rate_limiter.check(model=model, provider=provider, tags=tags)
359
419
  if rate_exceeded:
360
420
  limit, current, retry_after = rate_exceeded[0]
421
+ self._increment_metric("rate_limit_exceeded_count")
422
+ self._audit.log_rate_limit_exceeded(
423
+ limit.name,
424
+ current,
425
+ limit.limit,
426
+ retry_after,
427
+ )
361
428
  raise RateLimitExceededError(
362
429
  f"Rate limit '{limit.name}' exceeded",
363
430
  limit_name=limit.name,
@@ -481,14 +548,24 @@ class CostTracker:
481
548
 
482
549
  def _handle_tracking_error(self, error: Exception) -> None:
483
550
  """Handle errors during tracking based on configuration."""
551
+ self._increment_metric("tracking_errors")
552
+ self._increment_metric("backend_failures")
553
+
484
554
  if self._on_tracking_failure == "block":
555
+ self._audit.log_tracking_failure(str(error), self._backend_url, "blocked")
485
556
  raise TrackingUnavailableError(str(error), self._backend_url)
486
557
  elif self._on_tracking_failure == "fallback":
487
558
  logger.warning(f"Tracking error, using fallback: {error}")
488
559
  if self._fallback_backend is None:
489
560
  self._fallback_backend = MemoryBackend()
561
+ self._using_fallback = True
562
+ self._increment_metric("fallback_activations")
563
+ self._audit.log_fallback_activated(
564
+ self._backend_url, "memory", str(error)
565
+ )
490
566
  else:
491
567
  logger.warning(f"Tracking error (allowing): {error}")
568
+ self._audit.log_tracking_failure(str(error), self._backend_url, "allowed")
492
569
 
493
570
  def _check_tag_cardinality(self, tags: Dict[str, str]) -> None:
494
571
  """Check and track tag cardinality."""
@@ -648,6 +725,10 @@ class CostTracker:
648
725
  except Exception as e:
649
726
  errors.append(f"Backend health check failed: {e}")
650
727
 
728
+ # Check if using fallback
729
+ if self._using_fallback:
730
+ errors.append("Using fallback backend (primary unavailable)")
731
+
651
732
  # Check pricing freshness
652
733
  pricing_fresh = not self._pricing.is_stale
653
734
  if self._pricing.is_stale:
@@ -660,7 +741,7 @@ class CostTracker:
660
741
  last_record_time = self._last_record.timestamp
661
742
 
662
743
  return HealthStatus(
663
- healthy=backend_connected and pricing_fresh and len(errors) == 0,
744
+ healthy=backend_connected and pricing_fresh and len(errors) == 0 and not self._using_fallback,
664
745
  backend_connected=backend_connected,
665
746
  pricing_fresh=pricing_fresh,
666
747
  last_record_time=last_record_time,
@@ -670,6 +751,36 @@ class CostTracker:
670
751
  pricing_last_updated=self._pricing.last_updated,
671
752
  )
672
753
 
754
+ def get_metrics(self) -> Dict[str, Any]:
755
+ """
756
+ Get tracker metrics for observability.
757
+
758
+ Returns metrics for:
759
+ - backend_failures: Number of backend operation failures
760
+ - fallback_activations: Number of times fallback was activated
761
+ - budget_checks: Total budget checks performed
762
+ - budget_exceeded_count: Number of budget exceeded events
763
+ - rate_limit_exceeded_count: Number of rate limit exceeded events
764
+ - tracking_errors: Total tracking errors
765
+ - using_fallback: Whether currently using fallback backend
766
+ """
767
+ with self._metrics_lock:
768
+ metrics = dict(self._metrics)
769
+
770
+ metrics["using_fallback"] = self._using_fallback
771
+ metrics["backend_url"] = self._backend_url
772
+
773
+ # Add backend-specific metrics if available
774
+ if hasattr(self._backend, "get_metrics"):
775
+ metrics["backend_metrics"] = self._backend.get_metrics()
776
+
777
+ return metrics
778
+
779
+ @property
780
+ def audit(self) -> AuditLogger:
781
+ """Get the audit logger for querying audit events."""
782
+ return self._audit
783
+
673
784
  def on_budget_warning(self, callback: Callable[[Budget, float], None]) -> None:
674
785
  """Register a callback for budget warnings."""
675
786
  self._budget_tracker.on_warning(callback)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llm-cost-guard
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Real-time cost tracking, budget enforcement, and usage analytics for LLM applications
5
5
  Project-URL: Homepage, https://github.com/prashantdudami/llm-cost-guard
6
6
  Project-URL: Documentation, https://github.com/prashantdudami/llm-cost-guard#readme
@@ -334,6 +334,57 @@ tracker = CostTracker(
334
334
  )
335
335
  ```
336
336
 
337
+ ## Audit Logging (v0.2.0+)
338
+
339
+ Enterprise-ready audit trails for compliance:
340
+
341
+ ```python
342
+ from llm_cost_guard import CostTracker, FileAuditBackend
343
+
344
+ # Enable audit logging
345
+ tracker = CostTracker(
346
+ audit_enabled=True,
347
+ audit_backend=FileAuditBackend("audit.log"),
348
+ )
349
+
350
+ # Query audit history
351
+ events = tracker.audit.query(
352
+ event_type=AuditEventType.BUDGET_EXCEEDED,
353
+ start_date="2024-01-01",
354
+ )
355
+
356
+ # Get budget-specific history
357
+ history = tracker.audit.get_budget_history("daily")
358
+ ```
359
+
360
+ Audit events include:
361
+ - Budget created/modified/deleted
362
+ - Budget warnings and exceeded events
363
+ - Rate limit exceeded events
364
+ - Tracking failures and fallback activations
365
+
366
+ ## Observability Metrics (v0.2.0+)
367
+
368
+ Track health and degradation:
369
+
370
+ ```python
371
+ # Get tracker metrics
372
+ metrics = tracker.get_metrics()
373
+ print(metrics)
374
+ # {
375
+ # "backend_failures": 0,
376
+ # "fallback_activations": 0,
377
+ # "budget_exceeded_count": 3,
378
+ # "tracking_errors": 0,
379
+ # "using_fallback": False,
380
+ # }
381
+
382
+ # Health check
383
+ health = tracker.health_check()
384
+ print(health.healthy) # True/False
385
+ print(health.errors) # List of issues
386
+ ```
387
+
337
388
  ## Custom Pricing
338
389
 
339
390
  For negotiated enterprise rates:
@@ -349,6 +400,30 @@ tracker = CostTracker(
349
400
  )
350
401
  ```
351
402
 
403
+ ## Current Limitations
404
+
405
+ Being transparent about what's not yet production-ready:
406
+
407
+ | Feature | Status | Notes |
408
+ |---------|--------|-------|
409
+ | Distributed budgets (Redis) | ✅ v0.2.0 | Atomic operations with Lua scripts |
410
+ | Audit logging | ✅ v0.2.0 | File and logging backends |
411
+ | Graceful degradation metrics | ✅ v0.2.0 | Track failures and fallbacks |
412
+ | PostgreSQL backend | 🚧 Planned | Use SQLite or Redis for now |
413
+ | DynamoDB backend | 🚧 Planned | Use SQLite or Redis for now |
414
+ | Encryption at rest | 🚧 Planned | Use encrypted volumes as workaround |
415
+ | Multi-tenancy optimization | 🚧 Planned | Use tag-scoped budgets for now |
416
+ | Streaming cost estimation | ⚠️ Limited | Actual cost tracked on completion |
417
+ | Fine-tuning cost tracking | ❌ Not supported | |
418
+
419
+ ### Recommended for Production
420
+
421
+ | Deployment Size | Backend | Notes |
422
+ |-----------------|---------|-------|
423
+ | Single instance | SQLite | Simple, no setup |
424
+ | Multiple instances | Redis | Distributed budget enforcement |
425
+ | High-volume (>1k req/s) | Redis | With sampling (coming soon) |
426
+
352
427
  ## Contributing
353
428
 
354
429
  Contributions are welcome! Please read our contributing guidelines and submit pull requests.
@@ -1,14 +1,16 @@
1
- llm_cost_guard/__init__.py,sha256=XTXp-hAHbHD36YrJsZzWEQ-POPzcOq1WVGvSoUGrhCY,948
1
+ llm_cost_guard/__init__.py,sha256=o-AIhQ_c-Zr0XqH7KRhG6U4RjpXi8fSZbEzTJ3qKsEE,1245
2
+ llm_cost_guard/audit.py,sha256=WKJ-Q6arfhsP0y_Ibjkrd6-DBde7UWCIkQe3rGpJ5es,14280
2
3
  llm_cost_guard/budget.py,sha256=oT8m_7ir5hXfgxjfaaKDIAscRMuuk4PwseVz43IVAoA,10944
3
4
  llm_cost_guard/cli.py,sha256=GJZYwFkUHZ5txQhzBQkbnqQiZDr96Hf_zZ_hQNMvr1E,15166
4
5
  llm_cost_guard/exceptions.py,sha256=-WayjSAXLAY51FLdsFgGf0W8fMqGswq1zdnfY6ZUAhs,1727
5
6
  llm_cost_guard/models.py,sha256=ioH8-ZFt3PIx0AuDHEmPWtpOiW5HWw9tzI05B3bGI4U,3304
6
7
  llm_cost_guard/rate_limit.py,sha256=a3tG8RYSMAqirnmzSvNsJYOT7nDFAMoOU56OeUGuTFQ,7702
7
8
  llm_cost_guard/span.py,sha256=uXAIBVVLAfe7TDJwNl1TvG4Ay8RNNYSjJD2ZvLN7vYc,4828
8
- llm_cost_guard/tracker.py,sha256=RPOxXuocwISG-vycsPKZ8qZ2OuMxSN2tyGFG0Iqjffg,25322
9
- llm_cost_guard/backends/__init__.py,sha256=OSCbQRP_UHCyeOcHdRGEqBgA3o0TQGBqFEX8wE6DgP4,1594
9
+ llm_cost_guard/tracker.py,sha256=39p1LOQ0LoA4N003AhyA9c3gtemyPlIcYK7fuwcvM0g,29748
10
+ llm_cost_guard/backends/__init__.py,sha256=Jf6j8NWC4qapSU7o2Qt49-7tek20aQOmJQpsgFscM4o,1602
10
11
  llm_cost_guard/backends/base.py,sha256=8r9mm4mLNJ4jW9zhaDpvLpfFNg-uhdHCqWMp2L7K-SE,3289
11
12
  llm_cost_guard/backends/memory.py,sha256=O7FB2m_3qEoFG7cyQGbA1NanO-CalUkJCk8T666AkVE,9176
13
+ llm_cost_guard/backends/redis_backend.py,sha256=QbYzOfD8uL1XifYC5ZK8hONOzwAs02A2FWAMGUKudu8,19361
12
14
  llm_cost_guard/backends/sqlite.py,sha256=Qps7opmUm07zDYl1YPC12uLGWhwNQrBZymtaVWOaePU,14411
13
15
  llm_cost_guard/clients/__init__.py,sha256=yY-8_u7nW03gZVrnNiAyvwjarCcP4exo2WfnfgwuUNU,237
14
16
  llm_cost_guard/clients/anthropic.py,sha256=pG9wqCCKdMGu8igIlFVSb96ndifUm9N8CseaDcXTuaU,6639
@@ -29,8 +31,8 @@ llm_cost_guard/providers/bedrock.py,sha256=WglOxb4a5so3kecsdUMzxIdS5LrvjRnH8rQvD
29
31
  llm_cost_guard/providers/openai.py,sha256=dPYwYpwsYLdGHg0O6UYExl9-NFb6JN3m6xgrqN14gGM,4073
30
32
  llm_cost_guard/tokenizers/__init__.py,sha256=qCdf9onm6uDeCrVIH6oLTTBte1IsmbCeyef5gm09dWc,178
31
33
  llm_cost_guard/tokenizers/base.py,sha256=roDyOMPuuGR2WCdR1Vns45UThrEB8fVYV-4iE6mu89Q,6901
32
- llm_cost_guard-0.1.2.dist-info/METADATA,sha256=mRBH7gLKgj_hAeVKF53-i-TnKPlyFqs7xf9pFVXKtGM,9970
33
- llm_cost_guard-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
34
- llm_cost_guard-0.1.2.dist-info/entry_points.txt,sha256=k383VtNlqTw4JKJOPzQaUKXnTxDdpRzafvhKlzrk9jw,59
35
- llm_cost_guard-0.1.2.dist-info/licenses/LICENSE,sha256=nIvGzmGHr6xbKpf-EbwVZPpQDx65K1A8JJw50dREvps,1072
36
- llm_cost_guard-0.1.2.dist-info/RECORD,,
34
+ llm_cost_guard-0.2.0.dist-info/METADATA,sha256=7L25b6dFUUDRtQCQAT_5K0_tW9BFvj1hjTse4zSGFKI,12120
35
+ llm_cost_guard-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
36
+ llm_cost_guard-0.2.0.dist-info/entry_points.txt,sha256=k383VtNlqTw4JKJOPzQaUKXnTxDdpRzafvhKlzrk9jw,59
37
+ llm_cost_guard-0.2.0.dist-info/licenses/LICENSE,sha256=nIvGzmGHr6xbKpf-EbwVZPpQDx65K1A8JJw50dREvps,1072
38
+ llm_cost_guard-0.2.0.dist-info/RECORD,,