llm-cost-guard 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_cost_guard/__init__.py +16 -1
- llm_cost_guard/audit.py +480 -0
- llm_cost_guard/backends/__init__.py +1 -1
- llm_cost_guard/backends/redis_backend.py +557 -0
- llm_cost_guard/tracker.py +114 -3
- {llm_cost_guard-0.1.2.dist-info → llm_cost_guard-0.2.0.dist-info}/METADATA +76 -1
- {llm_cost_guard-0.1.2.dist-info → llm_cost_guard-0.2.0.dist-info}/RECORD +10 -8
- {llm_cost_guard-0.1.2.dist-info → llm_cost_guard-0.2.0.dist-info}/WHEEL +0 -0
- {llm_cost_guard-0.1.2.dist-info → llm_cost_guard-0.2.0.dist-info}/entry_points.txt +0 -0
- {llm_cost_guard-0.1.2.dist-info → llm_cost_guard-0.2.0.dist-info}/licenses/LICENSE +0 -0
llm_cost_guard/tracker.py
CHANGED
|
@@ -23,6 +23,7 @@ from llm_cost_guard.pricing.loader import PricingLoader, get_pricing_loader
|
|
|
23
23
|
from llm_cost_guard.providers import detect_provider, get_provider
|
|
24
24
|
from llm_cost_guard.rate_limit import RateLimit, RateLimiter
|
|
25
25
|
from llm_cost_guard.span import Span, get_current_span
|
|
26
|
+
from llm_cost_guard.audit import AuditLogger, AuditBackend, LoggingAuditBackend
|
|
26
27
|
|
|
27
28
|
logger = logging.getLogger(__name__)
|
|
28
29
|
|
|
@@ -52,6 +53,8 @@ class CostTracker:
|
|
|
52
53
|
budget_mode: Literal["local", "distributed"] = "local",
|
|
53
54
|
streaming_budget_mode: Literal["estimate", "actual"] = "actual",
|
|
54
55
|
streaming_max_output_estimate: int = 4096,
|
|
56
|
+
audit_enabled: bool = True,
|
|
57
|
+
audit_backend: Optional[AuditBackend] = None,
|
|
55
58
|
**backend_kwargs: Any,
|
|
56
59
|
):
|
|
57
60
|
"""
|
|
@@ -72,6 +75,8 @@ class CostTracker:
|
|
|
72
75
|
budget_mode: Budget enforcement mode (local or distributed)
|
|
73
76
|
streaming_budget_mode: How to handle streaming budgets
|
|
74
77
|
streaming_max_output_estimate: Max output tokens to estimate for streaming
|
|
78
|
+
audit_enabled: Enable audit logging for compliance
|
|
79
|
+
audit_backend: Custom audit backend (defaults to logging)
|
|
75
80
|
"""
|
|
76
81
|
self._auto_detect_provider = auto_detect_provider
|
|
77
82
|
self._on_tracking_failure = on_tracking_failure
|
|
@@ -83,27 +88,59 @@ class CostTracker:
|
|
|
83
88
|
self._streaming_budget_mode = streaming_budget_mode
|
|
84
89
|
self._streaming_max_output_estimate = streaming_max_output_estimate
|
|
85
90
|
|
|
91
|
+
# Graceful degradation metrics
|
|
92
|
+
self._metrics = {
|
|
93
|
+
"backend_failures": 0,
|
|
94
|
+
"fallback_activations": 0,
|
|
95
|
+
"budget_checks": 0,
|
|
96
|
+
"budget_exceeded_count": 0,
|
|
97
|
+
"rate_limit_exceeded_count": 0,
|
|
98
|
+
"tracking_errors": 0,
|
|
99
|
+
}
|
|
100
|
+
self._metrics_lock = threading.Lock()
|
|
101
|
+
|
|
102
|
+
# Initialize audit logging
|
|
103
|
+
self._audit = AuditLogger(
|
|
104
|
+
backend=audit_backend or LoggingAuditBackend(),
|
|
105
|
+
enabled=audit_enabled,
|
|
106
|
+
)
|
|
107
|
+
|
|
86
108
|
# Initialize backend
|
|
87
109
|
self._backend_url = backend
|
|
88
110
|
self._fallback_backend: Optional[MemoryBackend] = None
|
|
111
|
+
self._using_fallback = False
|
|
89
112
|
try:
|
|
90
113
|
self._backend: Backend = get_backend(backend, **backend_kwargs)
|
|
91
114
|
except Exception as e:
|
|
115
|
+
self._increment_metric("backend_failures")
|
|
92
116
|
if on_tracking_failure == "block":
|
|
93
117
|
raise TrackingUnavailableError(f"Failed to initialize backend: {e}", backend)
|
|
94
118
|
elif on_tracking_failure == "fallback":
|
|
95
119
|
logger.warning(f"Failed to initialize backend {backend}, using memory fallback: {e}")
|
|
96
120
|
self._backend = MemoryBackend()
|
|
97
121
|
self._fallback_backend = self._backend
|
|
122
|
+
self._using_fallback = True
|
|
123
|
+
self._increment_metric("fallback_activations")
|
|
124
|
+
self._audit.log_fallback_activated(backend, "memory", str(e))
|
|
98
125
|
else:
|
|
99
126
|
logger.warning(f"Failed to initialize backend {backend}: {e}")
|
|
100
127
|
self._backend = MemoryBackend()
|
|
128
|
+
self._audit.log_tracking_failure(str(e), backend, "allow")
|
|
101
129
|
|
|
102
130
|
# Initialize pricing
|
|
103
131
|
self._pricing = PricingLoader(pricing_overrides=pricing_overrides)
|
|
104
132
|
|
|
105
133
|
# Initialize budget tracking
|
|
106
134
|
self._budget_tracker = BudgetTracker(budgets)
|
|
135
|
+
|
|
136
|
+
# Log budget creation for audit
|
|
137
|
+
for budget in (budgets or []):
|
|
138
|
+
self._audit.log_budget_created(
|
|
139
|
+
budget.name,
|
|
140
|
+
budget.limit,
|
|
141
|
+
budget.period,
|
|
142
|
+
budget.action.value,
|
|
143
|
+
)
|
|
107
144
|
|
|
108
145
|
# Initialize rate limiting
|
|
109
146
|
self._rate_limiter = RateLimiter(rate_limits)
|
|
@@ -115,6 +152,11 @@ class CostTracker:
|
|
|
115
152
|
# Last call tracking
|
|
116
153
|
self._last_record: Optional[CostRecord] = None
|
|
117
154
|
self._lock = threading.Lock()
|
|
155
|
+
|
|
156
|
+
def _increment_metric(self, metric: str, amount: int = 1) -> None:
|
|
157
|
+
"""Thread-safe metric increment."""
|
|
158
|
+
with self._metrics_lock:
|
|
159
|
+
self._metrics[metric] = self._metrics.get(metric, 0) + amount
|
|
118
160
|
|
|
119
161
|
def track(
|
|
120
162
|
self,
|
|
@@ -344,13 +386,31 @@ class CostTracker:
|
|
|
344
386
|
)
|
|
345
387
|
|
|
346
388
|
# Check budgets
|
|
389
|
+
self._increment_metric("budget_checks")
|
|
347
390
|
exceeded = self._budget_tracker.check_budget(total_cost, tags)
|
|
348
391
|
for budget, action in exceeded:
|
|
349
|
-
|
|
392
|
+
current_spending = self._budget_tracker.get_spending(budget.name)
|
|
393
|
+
|
|
394
|
+
# Log to audit
|
|
395
|
+
if action == BudgetAction.WARN:
|
|
396
|
+
self._audit.log_budget_warning(
|
|
397
|
+
budget.name,
|
|
398
|
+
current_spending,
|
|
399
|
+
budget.limit,
|
|
400
|
+
current_spending / budget.limit,
|
|
401
|
+
)
|
|
402
|
+
elif action == BudgetAction.BLOCK:
|
|
403
|
+
self._increment_metric("budget_exceeded_count")
|
|
404
|
+
self._audit.log_budget_exceeded(
|
|
405
|
+
budget.name,
|
|
406
|
+
current_spending,
|
|
407
|
+
budget.limit,
|
|
408
|
+
"blocked",
|
|
409
|
+
)
|
|
350
410
|
raise BudgetExceededError(
|
|
351
411
|
f"Budget '{budget.name}' would be exceeded",
|
|
352
412
|
budget=budget,
|
|
353
|
-
current=
|
|
413
|
+
current=current_spending,
|
|
354
414
|
limit=budget.limit,
|
|
355
415
|
)
|
|
356
416
|
|
|
@@ -358,6 +418,13 @@ class CostTracker:
|
|
|
358
418
|
rate_exceeded = self._rate_limiter.check(model=model, provider=provider, tags=tags)
|
|
359
419
|
if rate_exceeded:
|
|
360
420
|
limit, current, retry_after = rate_exceeded[0]
|
|
421
|
+
self._increment_metric("rate_limit_exceeded_count")
|
|
422
|
+
self._audit.log_rate_limit_exceeded(
|
|
423
|
+
limit.name,
|
|
424
|
+
current,
|
|
425
|
+
limit.limit,
|
|
426
|
+
retry_after,
|
|
427
|
+
)
|
|
361
428
|
raise RateLimitExceededError(
|
|
362
429
|
f"Rate limit '{limit.name}' exceeded",
|
|
363
430
|
limit_name=limit.name,
|
|
@@ -481,14 +548,24 @@ class CostTracker:
|
|
|
481
548
|
|
|
482
549
|
def _handle_tracking_error(self, error: Exception) -> None:
|
|
483
550
|
"""Handle errors during tracking based on configuration."""
|
|
551
|
+
self._increment_metric("tracking_errors")
|
|
552
|
+
self._increment_metric("backend_failures")
|
|
553
|
+
|
|
484
554
|
if self._on_tracking_failure == "block":
|
|
555
|
+
self._audit.log_tracking_failure(str(error), self._backend_url, "blocked")
|
|
485
556
|
raise TrackingUnavailableError(str(error), self._backend_url)
|
|
486
557
|
elif self._on_tracking_failure == "fallback":
|
|
487
558
|
logger.warning(f"Tracking error, using fallback: {error}")
|
|
488
559
|
if self._fallback_backend is None:
|
|
489
560
|
self._fallback_backend = MemoryBackend()
|
|
561
|
+
self._using_fallback = True
|
|
562
|
+
self._increment_metric("fallback_activations")
|
|
563
|
+
self._audit.log_fallback_activated(
|
|
564
|
+
self._backend_url, "memory", str(error)
|
|
565
|
+
)
|
|
490
566
|
else:
|
|
491
567
|
logger.warning(f"Tracking error (allowing): {error}")
|
|
568
|
+
self._audit.log_tracking_failure(str(error), self._backend_url, "allowed")
|
|
492
569
|
|
|
493
570
|
def _check_tag_cardinality(self, tags: Dict[str, str]) -> None:
|
|
494
571
|
"""Check and track tag cardinality."""
|
|
@@ -648,6 +725,10 @@ class CostTracker:
|
|
|
648
725
|
except Exception as e:
|
|
649
726
|
errors.append(f"Backend health check failed: {e}")
|
|
650
727
|
|
|
728
|
+
# Check if using fallback
|
|
729
|
+
if self._using_fallback:
|
|
730
|
+
errors.append("Using fallback backend (primary unavailable)")
|
|
731
|
+
|
|
651
732
|
# Check pricing freshness
|
|
652
733
|
pricing_fresh = not self._pricing.is_stale
|
|
653
734
|
if self._pricing.is_stale:
|
|
@@ -660,7 +741,7 @@ class CostTracker:
|
|
|
660
741
|
last_record_time = self._last_record.timestamp
|
|
661
742
|
|
|
662
743
|
return HealthStatus(
|
|
663
|
-
healthy=backend_connected and pricing_fresh and len(errors) == 0,
|
|
744
|
+
healthy=backend_connected and pricing_fresh and len(errors) == 0 and not self._using_fallback,
|
|
664
745
|
backend_connected=backend_connected,
|
|
665
746
|
pricing_fresh=pricing_fresh,
|
|
666
747
|
last_record_time=last_record_time,
|
|
@@ -670,6 +751,36 @@ class CostTracker:
|
|
|
670
751
|
pricing_last_updated=self._pricing.last_updated,
|
|
671
752
|
)
|
|
672
753
|
|
|
754
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
755
|
+
"""
|
|
756
|
+
Get tracker metrics for observability.
|
|
757
|
+
|
|
758
|
+
Returns metrics for:
|
|
759
|
+
- backend_failures: Number of backend operation failures
|
|
760
|
+
- fallback_activations: Number of times fallback was activated
|
|
761
|
+
- budget_checks: Total budget checks performed
|
|
762
|
+
- budget_exceeded_count: Number of budget exceeded events
|
|
763
|
+
- rate_limit_exceeded_count: Number of rate limit exceeded events
|
|
764
|
+
- tracking_errors: Total tracking errors
|
|
765
|
+
- using_fallback: Whether currently using fallback backend
|
|
766
|
+
"""
|
|
767
|
+
with self._metrics_lock:
|
|
768
|
+
metrics = dict(self._metrics)
|
|
769
|
+
|
|
770
|
+
metrics["using_fallback"] = self._using_fallback
|
|
771
|
+
metrics["backend_url"] = self._backend_url
|
|
772
|
+
|
|
773
|
+
# Add backend-specific metrics if available
|
|
774
|
+
if hasattr(self._backend, "get_metrics"):
|
|
775
|
+
metrics["backend_metrics"] = self._backend.get_metrics()
|
|
776
|
+
|
|
777
|
+
return metrics
|
|
778
|
+
|
|
779
|
+
@property
|
|
780
|
+
def audit(self) -> AuditLogger:
|
|
781
|
+
"""Get the audit logger for querying audit events."""
|
|
782
|
+
return self._audit
|
|
783
|
+
|
|
673
784
|
def on_budget_warning(self, callback: Callable[[Budget, float], None]) -> None:
|
|
674
785
|
"""Register a callback for budget warnings."""
|
|
675
786
|
self._budget_tracker.on_warning(callback)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llm-cost-guard
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Real-time cost tracking, budget enforcement, and usage analytics for LLM applications
|
|
5
5
|
Project-URL: Homepage, https://github.com/prashantdudami/llm-cost-guard
|
|
6
6
|
Project-URL: Documentation, https://github.com/prashantdudami/llm-cost-guard#readme
|
|
@@ -334,6 +334,57 @@ tracker = CostTracker(
|
|
|
334
334
|
)
|
|
335
335
|
```
|
|
336
336
|
|
|
337
|
+
## Audit Logging (v0.2.0+)
|
|
338
|
+
|
|
339
|
+
Enterprise-ready audit trails for compliance:
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
from llm_cost_guard import CostTracker, FileAuditBackend
|
|
343
|
+
|
|
344
|
+
# Enable audit logging
|
|
345
|
+
tracker = CostTracker(
|
|
346
|
+
audit_enabled=True,
|
|
347
|
+
audit_backend=FileAuditBackend("audit.log"),
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Query audit history
|
|
351
|
+
events = tracker.audit.query(
|
|
352
|
+
event_type=AuditEventType.BUDGET_EXCEEDED,
|
|
353
|
+
start_date="2024-01-01",
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# Get budget-specific history
|
|
357
|
+
history = tracker.audit.get_budget_history("daily")
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
Audit events include:
|
|
361
|
+
- Budget created/modified/deleted
|
|
362
|
+
- Budget warnings and exceeded events
|
|
363
|
+
- Rate limit exceeded events
|
|
364
|
+
- Tracking failures and fallback activations
|
|
365
|
+
|
|
366
|
+
## Observability Metrics (v0.2.0+)
|
|
367
|
+
|
|
368
|
+
Track health and degradation:
|
|
369
|
+
|
|
370
|
+
```python
|
|
371
|
+
# Get tracker metrics
|
|
372
|
+
metrics = tracker.get_metrics()
|
|
373
|
+
print(metrics)
|
|
374
|
+
# {
|
|
375
|
+
# "backend_failures": 0,
|
|
376
|
+
# "fallback_activations": 0,
|
|
377
|
+
# "budget_exceeded_count": 3,
|
|
378
|
+
# "tracking_errors": 0,
|
|
379
|
+
# "using_fallback": False,
|
|
380
|
+
# }
|
|
381
|
+
|
|
382
|
+
# Health check
|
|
383
|
+
health = tracker.health_check()
|
|
384
|
+
print(health.healthy) # True/False
|
|
385
|
+
print(health.errors) # List of issues
|
|
386
|
+
```
|
|
387
|
+
|
|
337
388
|
## Custom Pricing
|
|
338
389
|
|
|
339
390
|
For negotiated enterprise rates:
|
|
@@ -349,6 +400,30 @@ tracker = CostTracker(
|
|
|
349
400
|
)
|
|
350
401
|
```
|
|
351
402
|
|
|
403
|
+
## Current Limitations
|
|
404
|
+
|
|
405
|
+
Being transparent about what's not yet production-ready:
|
|
406
|
+
|
|
407
|
+
| Feature | Status | Notes |
|
|
408
|
+
|---------|--------|-------|
|
|
409
|
+
| Distributed budgets (Redis) | ✅ v0.2.0 | Atomic operations with Lua scripts |
|
|
410
|
+
| Audit logging | ✅ v0.2.0 | File and logging backends |
|
|
411
|
+
| Graceful degradation metrics | ✅ v0.2.0 | Track failures and fallbacks |
|
|
412
|
+
| PostgreSQL backend | 🚧 Planned | Use SQLite or Redis for now |
|
|
413
|
+
| DynamoDB backend | 🚧 Planned | Use SQLite or Redis for now |
|
|
414
|
+
| Encryption at rest | 🚧 Planned | Use encrypted volumes as workaround |
|
|
415
|
+
| Multi-tenancy optimization | 🚧 Planned | Use tag-scoped budgets for now |
|
|
416
|
+
| Streaming cost estimation | ⚠️ Limited | Actual cost tracked on completion |
|
|
417
|
+
| Fine-tuning cost tracking | ❌ Not supported | |
|
|
418
|
+
|
|
419
|
+
### Recommended for Production
|
|
420
|
+
|
|
421
|
+
| Deployment Size | Backend | Notes |
|
|
422
|
+
|-----------------|---------|-------|
|
|
423
|
+
| Single instance | SQLite | Simple, no setup |
|
|
424
|
+
| Multiple instances | Redis | Distributed budget enforcement |
|
|
425
|
+
| High-volume (>1k req/s) | Redis | With sampling (coming soon) |
|
|
426
|
+
|
|
352
427
|
## Contributing
|
|
353
428
|
|
|
354
429
|
Contributions are welcome! Please read our contributing guidelines and submit pull requests.
|
|
@@ -1,14 +1,16 @@
|
|
|
1
|
-
llm_cost_guard/__init__.py,sha256=
|
|
1
|
+
llm_cost_guard/__init__.py,sha256=o-AIhQ_c-Zr0XqH7KRhG6U4RjpXi8fSZbEzTJ3qKsEE,1245
|
|
2
|
+
llm_cost_guard/audit.py,sha256=WKJ-Q6arfhsP0y_Ibjkrd6-DBde7UWCIkQe3rGpJ5es,14280
|
|
2
3
|
llm_cost_guard/budget.py,sha256=oT8m_7ir5hXfgxjfaaKDIAscRMuuk4PwseVz43IVAoA,10944
|
|
3
4
|
llm_cost_guard/cli.py,sha256=GJZYwFkUHZ5txQhzBQkbnqQiZDr96Hf_zZ_hQNMvr1E,15166
|
|
4
5
|
llm_cost_guard/exceptions.py,sha256=-WayjSAXLAY51FLdsFgGf0W8fMqGswq1zdnfY6ZUAhs,1727
|
|
5
6
|
llm_cost_guard/models.py,sha256=ioH8-ZFt3PIx0AuDHEmPWtpOiW5HWw9tzI05B3bGI4U,3304
|
|
6
7
|
llm_cost_guard/rate_limit.py,sha256=a3tG8RYSMAqirnmzSvNsJYOT7nDFAMoOU56OeUGuTFQ,7702
|
|
7
8
|
llm_cost_guard/span.py,sha256=uXAIBVVLAfe7TDJwNl1TvG4Ay8RNNYSjJD2ZvLN7vYc,4828
|
|
8
|
-
llm_cost_guard/tracker.py,sha256=
|
|
9
|
-
llm_cost_guard/backends/__init__.py,sha256=
|
|
9
|
+
llm_cost_guard/tracker.py,sha256=39p1LOQ0LoA4N003AhyA9c3gtemyPlIcYK7fuwcvM0g,29748
|
|
10
|
+
llm_cost_guard/backends/__init__.py,sha256=Jf6j8NWC4qapSU7o2Qt49-7tek20aQOmJQpsgFscM4o,1602
|
|
10
11
|
llm_cost_guard/backends/base.py,sha256=8r9mm4mLNJ4jW9zhaDpvLpfFNg-uhdHCqWMp2L7K-SE,3289
|
|
11
12
|
llm_cost_guard/backends/memory.py,sha256=O7FB2m_3qEoFG7cyQGbA1NanO-CalUkJCk8T666AkVE,9176
|
|
13
|
+
llm_cost_guard/backends/redis_backend.py,sha256=QbYzOfD8uL1XifYC5ZK8hONOzwAs02A2FWAMGUKudu8,19361
|
|
12
14
|
llm_cost_guard/backends/sqlite.py,sha256=Qps7opmUm07zDYl1YPC12uLGWhwNQrBZymtaVWOaePU,14411
|
|
13
15
|
llm_cost_guard/clients/__init__.py,sha256=yY-8_u7nW03gZVrnNiAyvwjarCcP4exo2WfnfgwuUNU,237
|
|
14
16
|
llm_cost_guard/clients/anthropic.py,sha256=pG9wqCCKdMGu8igIlFVSb96ndifUm9N8CseaDcXTuaU,6639
|
|
@@ -29,8 +31,8 @@ llm_cost_guard/providers/bedrock.py,sha256=WglOxb4a5so3kecsdUMzxIdS5LrvjRnH8rQvD
|
|
|
29
31
|
llm_cost_guard/providers/openai.py,sha256=dPYwYpwsYLdGHg0O6UYExl9-NFb6JN3m6xgrqN14gGM,4073
|
|
30
32
|
llm_cost_guard/tokenizers/__init__.py,sha256=qCdf9onm6uDeCrVIH6oLTTBte1IsmbCeyef5gm09dWc,178
|
|
31
33
|
llm_cost_guard/tokenizers/base.py,sha256=roDyOMPuuGR2WCdR1Vns45UThrEB8fVYV-4iE6mu89Q,6901
|
|
32
|
-
llm_cost_guard-0.
|
|
33
|
-
llm_cost_guard-0.
|
|
34
|
-
llm_cost_guard-0.
|
|
35
|
-
llm_cost_guard-0.
|
|
36
|
-
llm_cost_guard-0.
|
|
34
|
+
llm_cost_guard-0.2.0.dist-info/METADATA,sha256=7L25b6dFUUDRtQCQAT_5K0_tW9BFvj1hjTse4zSGFKI,12120
|
|
35
|
+
llm_cost_guard-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
36
|
+
llm_cost_guard-0.2.0.dist-info/entry_points.txt,sha256=k383VtNlqTw4JKJOPzQaUKXnTxDdpRzafvhKlzrk9jw,59
|
|
37
|
+
llm_cost_guard-0.2.0.dist-info/licenses/LICENSE,sha256=nIvGzmGHr6xbKpf-EbwVZPpQDx65K1A8JJw50dREvps,1072
|
|
38
|
+
llm_cost_guard-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|