proxilion 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. proxilion/__init__.py +136 -0
  2. proxilion/audit/__init__.py +133 -0
  3. proxilion/audit/base_exporters.py +527 -0
  4. proxilion/audit/compliance/__init__.py +130 -0
  5. proxilion/audit/compliance/base.py +457 -0
  6. proxilion/audit/compliance/eu_ai_act.py +603 -0
  7. proxilion/audit/compliance/iso27001.py +544 -0
  8. proxilion/audit/compliance/soc2.py +491 -0
  9. proxilion/audit/events.py +493 -0
  10. proxilion/audit/explainability.py +1173 -0
  11. proxilion/audit/exporters/__init__.py +58 -0
  12. proxilion/audit/exporters/aws_s3.py +636 -0
  13. proxilion/audit/exporters/azure_storage.py +608 -0
  14. proxilion/audit/exporters/cloud_base.py +468 -0
  15. proxilion/audit/exporters/gcp_storage.py +570 -0
  16. proxilion/audit/exporters/multi_exporter.py +498 -0
  17. proxilion/audit/hash_chain.py +652 -0
  18. proxilion/audit/logger.py +543 -0
  19. proxilion/caching/__init__.py +49 -0
  20. proxilion/caching/tool_cache.py +633 -0
  21. proxilion/context/__init__.py +73 -0
  22. proxilion/context/context_window.py +556 -0
  23. proxilion/context/message_history.py +505 -0
  24. proxilion/context/session.py +735 -0
  25. proxilion/contrib/__init__.py +51 -0
  26. proxilion/contrib/anthropic.py +609 -0
  27. proxilion/contrib/google.py +1012 -0
  28. proxilion/contrib/langchain.py +641 -0
  29. proxilion/contrib/mcp.py +893 -0
  30. proxilion/contrib/openai.py +646 -0
  31. proxilion/core.py +3058 -0
  32. proxilion/decorators.py +966 -0
  33. proxilion/engines/__init__.py +287 -0
  34. proxilion/engines/base.py +266 -0
  35. proxilion/engines/casbin_engine.py +412 -0
  36. proxilion/engines/opa_engine.py +493 -0
  37. proxilion/engines/simple.py +437 -0
  38. proxilion/exceptions.py +887 -0
  39. proxilion/guards/__init__.py +54 -0
  40. proxilion/guards/input_guard.py +522 -0
  41. proxilion/guards/output_guard.py +634 -0
  42. proxilion/observability/__init__.py +198 -0
  43. proxilion/observability/cost_tracker.py +866 -0
  44. proxilion/observability/hooks.py +683 -0
  45. proxilion/observability/metrics.py +798 -0
  46. proxilion/observability/session_cost_tracker.py +1063 -0
  47. proxilion/policies/__init__.py +67 -0
  48. proxilion/policies/base.py +304 -0
  49. proxilion/policies/builtin.py +486 -0
  50. proxilion/policies/registry.py +376 -0
  51. proxilion/providers/__init__.py +201 -0
  52. proxilion/providers/adapter.py +468 -0
  53. proxilion/providers/anthropic_adapter.py +330 -0
  54. proxilion/providers/gemini_adapter.py +391 -0
  55. proxilion/providers/openai_adapter.py +294 -0
  56. proxilion/py.typed +0 -0
  57. proxilion/resilience/__init__.py +81 -0
  58. proxilion/resilience/degradation.py +615 -0
  59. proxilion/resilience/fallback.py +555 -0
  60. proxilion/resilience/retry.py +554 -0
  61. proxilion/scheduling/__init__.py +57 -0
  62. proxilion/scheduling/priority_queue.py +419 -0
  63. proxilion/scheduling/scheduler.py +459 -0
  64. proxilion/security/__init__.py +244 -0
  65. proxilion/security/agent_trust.py +968 -0
  66. proxilion/security/behavioral_drift.py +794 -0
  67. proxilion/security/cascade_protection.py +869 -0
  68. proxilion/security/circuit_breaker.py +428 -0
  69. proxilion/security/cost_limiter.py +690 -0
  70. proxilion/security/idor_protection.py +460 -0
  71. proxilion/security/intent_capsule.py +849 -0
  72. proxilion/security/intent_validator.py +495 -0
  73. proxilion/security/memory_integrity.py +767 -0
  74. proxilion/security/rate_limiter.py +509 -0
  75. proxilion/security/scope_enforcer.py +680 -0
  76. proxilion/security/sequence_validator.py +636 -0
  77. proxilion/security/trust_boundaries.py +784 -0
  78. proxilion/streaming/__init__.py +70 -0
  79. proxilion/streaming/detector.py +761 -0
  80. proxilion/streaming/transformer.py +674 -0
  81. proxilion/timeouts/__init__.py +55 -0
  82. proxilion/timeouts/decorators.py +477 -0
  83. proxilion/timeouts/manager.py +545 -0
  84. proxilion/tools/__init__.py +69 -0
  85. proxilion/tools/decorators.py +493 -0
  86. proxilion/tools/registry.py +732 -0
  87. proxilion/types.py +339 -0
  88. proxilion/validation/__init__.py +93 -0
  89. proxilion/validation/pydantic_schema.py +351 -0
  90. proxilion/validation/schema.py +651 -0
  91. proxilion-0.0.1.dist-info/METADATA +872 -0
  92. proxilion-0.0.1.dist-info/RECORD +94 -0
  93. proxilion-0.0.1.dist-info/WHEEL +4 -0
  94. proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,866 @@
1
+ """
2
+ Token usage and cost tracking for Proxilion.
3
+
4
+ Tracks token usage and costs for every tool call and LLM interaction.
5
+ Essential for budgeting, chargebacks, and understanding agent costs.
6
+
7
+ Example:
8
+ >>> from proxilion.observability.cost_tracker import (
9
+ ... CostTracker, BudgetPolicy, ModelPricing
10
+ ... )
11
+ >>>
12
+ >>> # Create tracker with budget limits
13
+ >>> tracker = CostTracker(
14
+ ... budget_policy=BudgetPolicy(
15
+ ... max_cost_per_request=1.00,
16
+ ... max_cost_per_user_per_day=50.00,
17
+ ... )
18
+ ... )
19
+ >>>
20
+ >>> # Record usage
21
+ >>> record = tracker.record_usage(
22
+ ... model="claude-sonnet-4-20250514",
23
+ ... input_tokens=1000,
24
+ ... output_tokens=500,
25
+ ... user_id="user_123",
26
+ ... tool_name="database_query",
27
+ ... )
28
+ >>> print(f"Cost: ${record.cost_usd:.4f}")
29
+ >>>
30
+ >>> # Get summary
31
+ >>> summary = tracker.get_summary(user_id="user_123")
32
+ >>> print(f"Total cost: ${summary.total_cost:.4f}")
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ import json
38
+ import logging
39
+ import threading
40
+ from collections import defaultdict
41
+ from dataclasses import asdict, dataclass, field
42
+ from datetime import datetime, timedelta, timezone
43
+ from typing import Any
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ @dataclass
49
+ class ModelPricing:
50
+ """
51
+ Pricing information for an LLM model.
52
+
53
+ Prices are in USD per 1,000 tokens.
54
+
55
+ Attributes:
56
+ model_name: Display name for the model.
57
+ input_price_per_1k: Cost per 1,000 input tokens.
58
+ output_price_per_1k: Cost per 1,000 output tokens.
59
+ cache_read_price_per_1k: Cost per 1,000 cached input tokens (optional).
60
+ cache_write_price_per_1k: Cost per 1,000 tokens written to cache (optional).
61
+ """
62
+
63
+ model_name: str
64
+ input_price_per_1k: float
65
+ output_price_per_1k: float
66
+ cache_read_price_per_1k: float = 0.0
67
+ cache_write_price_per_1k: float = 0.0
68
+
69
+ def calculate_cost(
70
+ self,
71
+ input_tokens: int,
72
+ output_tokens: int,
73
+ cache_read_tokens: int = 0,
74
+ cache_write_tokens: int = 0,
75
+ ) -> float:
76
+ """
77
+ Calculate the cost for a given usage.
78
+
79
+ Args:
80
+ input_tokens: Number of input tokens.
81
+ output_tokens: Number of output tokens.
82
+ cache_read_tokens: Number of cached input tokens read.
83
+ cache_write_tokens: Number of tokens written to cache.
84
+
85
+ Returns:
86
+ Total cost in USD.
87
+ """
88
+ input_cost = (input_tokens / 1000) * self.input_price_per_1k
89
+ output_cost = (output_tokens / 1000) * self.output_price_per_1k
90
+ cache_read_cost = (cache_read_tokens / 1000) * self.cache_read_price_per_1k
91
+ cache_write_cost = (cache_write_tokens / 1000) * self.cache_write_price_per_1k
92
+
93
+ return input_cost + output_cost + cache_read_cost + cache_write_cost
94
+
95
+
96
+ # Default pricing for popular models (as of early 2026)
97
+ DEFAULT_PRICING: dict[str, ModelPricing] = {
98
+ # Anthropic Claude models
99
+ "claude-opus-4-5-20251101": ModelPricing(
100
+ model_name="Claude Opus 4.5",
101
+ input_price_per_1k=0.015,
102
+ output_price_per_1k=0.075,
103
+ cache_read_price_per_1k=0.00375,
104
+ cache_write_price_per_1k=0.01875,
105
+ ),
106
+ "claude-sonnet-4-20250514": ModelPricing(
107
+ model_name="Claude Sonnet 4",
108
+ input_price_per_1k=0.003,
109
+ output_price_per_1k=0.015,
110
+ cache_read_price_per_1k=0.0006,
111
+ cache_write_price_per_1k=0.00375,
112
+ ),
113
+ "claude-3-5-sonnet-20241022": ModelPricing(
114
+ model_name="Claude 3.5 Sonnet",
115
+ input_price_per_1k=0.003,
116
+ output_price_per_1k=0.015,
117
+ cache_read_price_per_1k=0.0003,
118
+ cache_write_price_per_1k=0.00375,
119
+ ),
120
+ "claude-3-5-haiku-20241022": ModelPricing(
121
+ model_name="Claude 3.5 Haiku",
122
+ input_price_per_1k=0.001,
123
+ output_price_per_1k=0.005,
124
+ cache_read_price_per_1k=0.0001,
125
+ cache_write_price_per_1k=0.00125,
126
+ ),
127
+ # OpenAI models
128
+ "gpt-4o": ModelPricing(
129
+ model_name="GPT-4o",
130
+ input_price_per_1k=0.0025,
131
+ output_price_per_1k=0.01,
132
+ cache_read_price_per_1k=0.00125,
133
+ ),
134
+ "gpt-4o-mini": ModelPricing(
135
+ model_name="GPT-4o Mini",
136
+ input_price_per_1k=0.00015,
137
+ output_price_per_1k=0.0006,
138
+ cache_read_price_per_1k=0.000075,
139
+ ),
140
+ "gpt-4-turbo": ModelPricing(
141
+ model_name="GPT-4 Turbo",
142
+ input_price_per_1k=0.01,
143
+ output_price_per_1k=0.03,
144
+ ),
145
+ "gpt-3.5-turbo": ModelPricing(
146
+ model_name="GPT-3.5 Turbo",
147
+ input_price_per_1k=0.0005,
148
+ output_price_per_1k=0.0015,
149
+ ),
150
+ # Google models
151
+ "gemini-1.5-pro": ModelPricing(
152
+ model_name="Gemini 1.5 Pro",
153
+ input_price_per_1k=0.00125,
154
+ output_price_per_1k=0.005,
155
+ cache_read_price_per_1k=0.000315,
156
+ ),
157
+ "gemini-1.5-flash": ModelPricing(
158
+ model_name="Gemini 1.5 Flash",
159
+ input_price_per_1k=0.000075,
160
+ output_price_per_1k=0.0003,
161
+ cache_read_price_per_1k=0.00001875,
162
+ ),
163
+ "gemini-2.0-flash": ModelPricing(
164
+ model_name="Gemini 2.0 Flash",
165
+ input_price_per_1k=0.0001,
166
+ output_price_per_1k=0.0004,
167
+ ),
168
+ }
169
+
170
+
171
+ @dataclass
172
+ class UsageRecord:
173
+ """
174
+ Record of a single usage event.
175
+
176
+ Attributes:
177
+ timestamp: When the usage occurred.
178
+ model: Model identifier.
179
+ input_tokens: Number of input tokens.
180
+ output_tokens: Number of output tokens.
181
+ cache_read_tokens: Number of cached tokens read.
182
+ cache_write_tokens: Number of tokens written to cache.
183
+ tool_name: Tool that triggered the usage (if any).
184
+ user_id: User who incurred the usage.
185
+ cost_usd: Calculated cost in USD.
186
+ request_id: Optional request identifier.
187
+ metadata: Additional metadata.
188
+ """
189
+
190
+ timestamp: datetime
191
+ model: str
192
+ input_tokens: int
193
+ output_tokens: int
194
+ cache_read_tokens: int = 0
195
+ cache_write_tokens: int = 0
196
+ tool_name: str | None = None
197
+ user_id: str | None = None
198
+ cost_usd: float = 0.0
199
+ request_id: str | None = None
200
+ metadata: dict[str, Any] = field(default_factory=dict)
201
+
202
+ @property
203
+ def total_tokens(self) -> int:
204
+ """Total tokens (input + output)."""
205
+ return self.input_tokens + self.output_tokens
206
+
207
+ def to_dict(self) -> dict[str, Any]:
208
+ """Convert to dictionary."""
209
+ result = asdict(self)
210
+ result["timestamp"] = self.timestamp.isoformat()
211
+ return result
212
+
213
+ def to_json(self) -> str:
214
+ """Convert to JSON string."""
215
+ return json.dumps(self.to_dict())
216
+
217
+
218
+ @dataclass
219
+ class CostSummary:
220
+ """
221
+ Summary of costs over a period.
222
+
223
+ Attributes:
224
+ total_cost: Total cost in USD.
225
+ total_input_tokens: Total input tokens.
226
+ total_output_tokens: Total output tokens.
227
+ total_cache_tokens: Total cache read tokens.
228
+ record_count: Number of usage records.
229
+ by_model: Cost breakdown by model.
230
+ by_user: Cost breakdown by user.
231
+ by_tool: Cost breakdown by tool.
232
+ start_time: Start of the summary period.
233
+ end_time: End of the summary period.
234
+ """
235
+
236
+ total_cost: float = 0.0
237
+ total_input_tokens: int = 0
238
+ total_output_tokens: int = 0
239
+ total_cache_tokens: int = 0
240
+ record_count: int = 0
241
+ by_model: dict[str, float] = field(default_factory=dict)
242
+ by_user: dict[str, float] = field(default_factory=dict)
243
+ by_tool: dict[str, float] = field(default_factory=dict)
244
+ start_time: datetime | None = None
245
+ end_time: datetime | None = None
246
+
247
+ def to_dict(self) -> dict[str, Any]:
248
+ """Convert to dictionary."""
249
+ return {
250
+ "total_cost": self.total_cost,
251
+ "total_input_tokens": self.total_input_tokens,
252
+ "total_output_tokens": self.total_output_tokens,
253
+ "total_cache_tokens": self.total_cache_tokens,
254
+ "record_count": self.record_count,
255
+ "by_model": self.by_model,
256
+ "by_user": self.by_user,
257
+ "by_tool": self.by_tool,
258
+ "start_time": self.start_time.isoformat() if self.start_time else None,
259
+ "end_time": self.end_time.isoformat() if self.end_time else None,
260
+ }
261
+
262
+
263
+ @dataclass
264
+ class BudgetPolicy:
265
+ """
266
+ Budget limits for cost control.
267
+
268
+ Attributes:
269
+ max_cost_per_request: Maximum cost for a single request.
270
+ max_cost_per_user_per_day: Maximum daily cost per user.
271
+ max_cost_per_user_per_hour: Maximum hourly cost per user.
272
+ max_tokens_per_request: Maximum tokens per request.
273
+ max_org_cost_per_day: Maximum daily cost for the organization.
274
+ max_org_cost_per_month: Maximum monthly cost for the organization.
275
+ warn_at_percentage: Percentage at which to warn (0.0 to 1.0).
276
+ """
277
+
278
+ max_cost_per_request: float | None = None
279
+ max_cost_per_user_per_day: float | None = None
280
+ max_cost_per_user_per_hour: float | None = None
281
+ max_tokens_per_request: int | None = None
282
+ max_org_cost_per_day: float | None = None
283
+ max_org_cost_per_month: float | None = None
284
+ warn_at_percentage: float = 0.8
285
+
286
+
287
+ class CostTracker:
288
+ """
289
+ Tracks token usage and costs across all operations.
290
+
291
+ The CostTracker maintains a history of usage records and can
292
+ calculate costs, check budgets, and provide summaries.
293
+
294
+ Example:
295
+ >>> tracker = CostTracker()
296
+ >>>
297
+ >>> # Record usage
298
+ >>> record = tracker.record_usage(
299
+ ... model="claude-sonnet-4-20250514",
300
+ ... input_tokens=1000,
301
+ ... output_tokens=500,
302
+ ... user_id="user_123",
303
+ ... )
304
+ >>>
305
+ >>> # Check budget before expensive operation
306
+ >>> allowed, reason = tracker.check_budget("user_123", estimated_tokens=10000)
307
+ >>> if not allowed:
308
+ ... print(f"Budget issue: {reason}")
309
+ """
310
+
311
+ def __init__(
312
+ self,
313
+ pricing: dict[str, ModelPricing] | None = None,
314
+ budget_policy: BudgetPolicy | None = None,
315
+ max_records: int = 100000,
316
+ default_model: str = "claude-sonnet-4-20250514",
317
+ ) -> None:
318
+ """
319
+ Initialize the cost tracker.
320
+
321
+ Args:
322
+ pricing: Model pricing information. Defaults to DEFAULT_PRICING.
323
+ budget_policy: Budget limits to enforce.
324
+ max_records: Maximum records to keep in memory.
325
+ default_model: Default model for cost estimation.
326
+ """
327
+ self._lock = threading.RLock()
328
+ self._pricing = dict(DEFAULT_PRICING)
329
+ if pricing:
330
+ self._pricing.update(pricing)
331
+
332
+ self._budget_policy = budget_policy
333
+ self._max_records = max_records
334
+ self._default_model = default_model
335
+
336
+ # Storage
337
+ self._records: list[UsageRecord] = []
338
+ self._user_daily_spend: dict[str, dict[str, float]] = defaultdict(
339
+ lambda: defaultdict(float)
340
+ )
341
+ self._user_hourly_spend: dict[str, dict[str, float]] = defaultdict(
342
+ lambda: defaultdict(float)
343
+ )
344
+ self._org_daily_spend: dict[str, float] = defaultdict(float)
345
+ self._org_monthly_spend: dict[str, float] = defaultdict(float)
346
+
347
+ def set_pricing(self, model: str, pricing: ModelPricing) -> None:
348
+ """
349
+ Set or update pricing for a model.
350
+
351
+ Args:
352
+ model: Model identifier.
353
+ pricing: Pricing information.
354
+ """
355
+ with self._lock:
356
+ self._pricing[model] = pricing
357
+
358
+ def get_pricing(self, model: str) -> ModelPricing | None:
359
+ """
360
+ Get pricing for a model.
361
+
362
+ Args:
363
+ model: Model identifier.
364
+
365
+ Returns:
366
+ ModelPricing if found, None otherwise.
367
+ """
368
+ return self._pricing.get(model)
369
+
370
+ def record_usage(
371
+ self,
372
+ model: str,
373
+ input_tokens: int,
374
+ output_tokens: int,
375
+ cache_read_tokens: int = 0,
376
+ cache_write_tokens: int = 0,
377
+ tool_name: str | None = None,
378
+ user_id: str | None = None,
379
+ request_id: str | None = None,
380
+ metadata: dict[str, Any] | None = None,
381
+ timestamp: datetime | None = None,
382
+ ) -> UsageRecord:
383
+ """
384
+ Record a usage event and calculate cost.
385
+
386
+ Args:
387
+ model: Model identifier.
388
+ input_tokens: Number of input tokens.
389
+ output_tokens: Number of output tokens.
390
+ cache_read_tokens: Number of cached tokens read.
391
+ cache_write_tokens: Number of tokens written to cache.
392
+ tool_name: Tool that triggered the usage.
393
+ user_id: User who incurred the usage.
394
+ request_id: Request identifier.
395
+ metadata: Additional metadata.
396
+ timestamp: Event timestamp (defaults to now).
397
+
398
+ Returns:
399
+ The created UsageRecord.
400
+ """
401
+ if timestamp is None:
402
+ timestamp = datetime.now(timezone.utc)
403
+
404
+ # Calculate cost
405
+ pricing = self._pricing.get(model)
406
+ if pricing:
407
+ cost = pricing.calculate_cost(
408
+ input_tokens, output_tokens, cache_read_tokens, cache_write_tokens
409
+ )
410
+ else:
411
+ # Use default model pricing as fallback
412
+ default_pricing = self._pricing.get(self._default_model)
413
+ if default_pricing:
414
+ cost = default_pricing.calculate_cost(
415
+ input_tokens, output_tokens, cache_read_tokens, cache_write_tokens
416
+ )
417
+ logger.warning(
418
+ f"Unknown model '{model}', using default pricing from '{self._default_model}'"
419
+ )
420
+ else:
421
+ cost = 0.0
422
+ logger.warning(f"Unknown model '{model}' and no default pricing available")
423
+
424
+ record = UsageRecord(
425
+ timestamp=timestamp,
426
+ model=model,
427
+ input_tokens=input_tokens,
428
+ output_tokens=output_tokens,
429
+ cache_read_tokens=cache_read_tokens,
430
+ cache_write_tokens=cache_write_tokens,
431
+ tool_name=tool_name,
432
+ user_id=user_id,
433
+ cost_usd=cost,
434
+ request_id=request_id,
435
+ metadata=metadata or {},
436
+ )
437
+
438
+ with self._lock:
439
+ self._records.append(record)
440
+
441
+ # Trim old records if needed
442
+ if len(self._records) > self._max_records:
443
+ self._records = self._records[-self._max_records:]
444
+
445
+ # Update spend tracking
446
+ if user_id:
447
+ day_key = timestamp.strftime("%Y-%m-%d")
448
+ hour_key = timestamp.strftime("%Y-%m-%d-%H")
449
+ self._user_daily_spend[user_id][day_key] += cost
450
+ self._user_hourly_spend[user_id][hour_key] += cost
451
+
452
+ # Track org-wide spend
453
+ day_key = timestamp.strftime("%Y-%m-%d")
454
+ month_key = timestamp.strftime("%Y-%m")
455
+ self._org_daily_spend[day_key] += cost
456
+ self._org_monthly_spend[month_key] += cost
457
+
458
+ logger.debug(
459
+ f"Recorded usage: model={model}, tokens={input_tokens}+{output_tokens}, "
460
+ f"cost=${cost:.4f}, user={user_id}"
461
+ )
462
+
463
+ return record
464
+
465
+ def estimate_cost(
466
+ self,
467
+ model: str | None = None,
468
+ input_tokens: int = 0,
469
+ output_tokens: int = 0,
470
+ cache_read_tokens: int = 0,
471
+ ) -> float:
472
+ """
473
+ Estimate cost for a potential request.
474
+
475
+ Args:
476
+ model: Model to use (defaults to default_model).
477
+ input_tokens: Expected input tokens.
478
+ output_tokens: Expected output tokens.
479
+ cache_read_tokens: Expected cached tokens.
480
+
481
+ Returns:
482
+ Estimated cost in USD.
483
+ """
484
+ model = model or self._default_model
485
+ pricing = self._pricing.get(model)
486
+
487
+ if pricing:
488
+ return pricing.calculate_cost(input_tokens, output_tokens, cache_read_tokens)
489
+
490
+ return 0.0
491
+
492
+ def check_budget(
493
+ self,
494
+ user_id: str,
495
+ estimated_cost: float = 0.0,
496
+ estimated_tokens: int = 0,
497
+ ) -> tuple[bool, str | None]:
498
+ """
499
+ Check if a request would exceed budget limits.
500
+
501
+ Args:
502
+ user_id: User making the request.
503
+ estimated_cost: Estimated cost of the request.
504
+ estimated_tokens: Estimated tokens for the request.
505
+
506
+ Returns:
507
+ Tuple of (allowed, reason). If not allowed, reason explains why.
508
+ """
509
+ if self._budget_policy is None:
510
+ return True, None
511
+
512
+ policy = self._budget_policy
513
+ now = datetime.now(timezone.utc)
514
+
515
+ # If estimated_cost not provided, estimate from tokens
516
+ if estimated_cost == 0.0 and estimated_tokens > 0:
517
+ # Assume 50/50 split between input/output for estimation
518
+ estimated_cost = self.estimate_cost(
519
+ input_tokens=estimated_tokens // 2,
520
+ output_tokens=estimated_tokens // 2,
521
+ )
522
+
523
+ # Check per-request cost limit
524
+ if policy.max_cost_per_request is not None:
525
+ if estimated_cost > policy.max_cost_per_request:
526
+ return False, (
527
+ f"Request would exceed per-request budget: "
528
+ f"${estimated_cost:.4f} > ${policy.max_cost_per_request:.4f}"
529
+ )
530
+
531
+ # Check per-request token limit
532
+ if policy.max_tokens_per_request is not None:
533
+ if estimated_tokens > policy.max_tokens_per_request:
534
+ return False, (
535
+ f"Request would exceed token limit: "
536
+ f"{estimated_tokens} > {policy.max_tokens_per_request}"
537
+ )
538
+
539
+ # Check user daily limit
540
+ if policy.max_cost_per_user_per_day is not None:
541
+ daily_spend = self.get_user_spend(user_id, timedelta(days=1))
542
+ if daily_spend + estimated_cost > policy.max_cost_per_user_per_day:
543
+ return False, (
544
+ f"User daily budget exceeded: "
545
+ f"${daily_spend + estimated_cost:.4f} > ${policy.max_cost_per_user_per_day:.4f}"
546
+ )
547
+
548
+ # Check user hourly limit
549
+ if policy.max_cost_per_user_per_hour is not None:
550
+ hourly_spend = self.get_user_spend(user_id, timedelta(hours=1))
551
+ if hourly_spend + estimated_cost > policy.max_cost_per_user_per_hour:
552
+ return False, (
553
+ f"User hourly budget exceeded: ${hourly_spend + estimated_cost:.4f}"
554
+ f" > ${policy.max_cost_per_user_per_hour:.4f}"
555
+ )
556
+
557
+ # Check org daily limit
558
+ if policy.max_org_cost_per_day is not None:
559
+ day_key = now.strftime("%Y-%m-%d")
560
+ org_daily = self._org_daily_spend.get(day_key, 0.0)
561
+ if org_daily + estimated_cost > policy.max_org_cost_per_day:
562
+ return False, (
563
+ f"Organization daily budget exceeded: "
564
+ f"${org_daily + estimated_cost:.4f} > ${policy.max_org_cost_per_day:.4f}"
565
+ )
566
+
567
+ # Check org monthly limit
568
+ if policy.max_org_cost_per_month is not None:
569
+ month_key = now.strftime("%Y-%m")
570
+ org_monthly = self._org_monthly_spend.get(month_key, 0.0)
571
+ if org_monthly + estimated_cost > policy.max_org_cost_per_month:
572
+ return False, (
573
+ f"Organization monthly budget exceeded: "
574
+ f"${org_monthly + estimated_cost:.4f} > ${policy.max_org_cost_per_month:.4f}"
575
+ )
576
+
577
+ return True, None
578
+
579
+ def get_user_spend(self, user_id: str, period: timedelta) -> float:
580
+ """
581
+ Get total spend for a user over a period.
582
+
583
+ Args:
584
+ user_id: User to check.
585
+ period: Time period to check.
586
+
587
+ Returns:
588
+ Total spend in USD.
589
+ """
590
+ with self._lock:
591
+ now = datetime.now(timezone.utc)
592
+ cutoff = now - period
593
+
594
+ total = 0.0
595
+ for record in reversed(self._records):
596
+ if record.timestamp < cutoff:
597
+ break
598
+ if record.user_id == user_id:
599
+ total += record.cost_usd
600
+
601
+ return total
602
+
603
+ def get_org_spend(self, period: timedelta) -> float:
604
+ """
605
+ Get total organization spend over a period.
606
+
607
+ Args:
608
+ period: Time period to check.
609
+
610
+ Returns:
611
+ Total spend in USD.
612
+ """
613
+ with self._lock:
614
+ now = datetime.now(timezone.utc)
615
+ cutoff = now - period
616
+
617
+ total = 0.0
618
+ for record in reversed(self._records):
619
+ if record.timestamp < cutoff:
620
+ break
621
+ total += record.cost_usd
622
+
623
+ return total
624
+
625
+ def get_summary(
626
+ self,
627
+ start: datetime | None = None,
628
+ end: datetime | None = None,
629
+ user_id: str | None = None,
630
+ model: str | None = None,
631
+ tool_name: str | None = None,
632
+ ) -> CostSummary:
633
+ """
634
+ Get a cost summary for the specified period and filters.
635
+
636
+ Args:
637
+ start: Start of period (defaults to all time).
638
+ end: End of period (defaults to now).
639
+ user_id: Filter by user.
640
+ model: Filter by model.
641
+ tool_name: Filter by tool.
642
+
643
+ Returns:
644
+ CostSummary with aggregated data.
645
+ """
646
+ with self._lock:
647
+ summary = CostSummary(
648
+ start_time=start,
649
+ end_time=end or datetime.now(timezone.utc),
650
+ )
651
+
652
+ by_model: dict[str, float] = defaultdict(float)
653
+ by_user: dict[str, float] = defaultdict(float)
654
+ by_tool: dict[str, float] = defaultdict(float)
655
+
656
+ for record in self._records:
657
+ # Apply time filters
658
+ if start and record.timestamp < start:
659
+ continue
660
+ if end and record.timestamp > end:
661
+ continue
662
+
663
+ # Apply entity filters
664
+ if user_id and record.user_id != user_id:
665
+ continue
666
+ if model and record.model != model:
667
+ continue
668
+ if tool_name and record.tool_name != tool_name:
669
+ continue
670
+
671
+ # Aggregate
672
+ summary.total_cost += record.cost_usd
673
+ summary.total_input_tokens += record.input_tokens
674
+ summary.total_output_tokens += record.output_tokens
675
+ summary.total_cache_tokens += record.cache_read_tokens
676
+ summary.record_count += 1
677
+
678
+ by_model[record.model] += record.cost_usd
679
+ if record.user_id:
680
+ by_user[record.user_id] += record.cost_usd
681
+ if record.tool_name:
682
+ by_tool[record.tool_name] += record.cost_usd
683
+
684
+ summary.by_model = dict(by_model)
685
+ summary.by_user = dict(by_user)
686
+ summary.by_tool = dict(by_tool)
687
+
688
+ return summary
689
+
690
+ def get_records(
691
+ self,
692
+ start: datetime | None = None,
693
+ end: datetime | None = None,
694
+ user_id: str | None = None,
695
+ limit: int | None = None,
696
+ ) -> list[UsageRecord]:
697
+ """
698
+ Get usage records with optional filters.
699
+
700
+ Args:
701
+ start: Start of period.
702
+ end: End of period.
703
+ user_id: Filter by user.
704
+ limit: Maximum records to return.
705
+
706
+ Returns:
707
+ List of UsageRecords, most recent first.
708
+ """
709
+ with self._lock:
710
+ result = []
711
+
712
+ for record in reversed(self._records):
713
+ if start and record.timestamp < start:
714
+ continue
715
+ if end and record.timestamp > end:
716
+ continue
717
+ if user_id and record.user_id != user_id:
718
+ continue
719
+
720
+ result.append(record)
721
+
722
+ if limit and len(result) >= limit:
723
+ break
724
+
725
+ return result
726
+
727
+ def export_records(
728
+ self,
729
+ format: str = "json",
730
+ start: datetime | None = None,
731
+ end: datetime | None = None,
732
+ ) -> str:
733
+ """
734
+ Export records to a string format.
735
+
736
+ Args:
737
+ format: Output format ("json" or "jsonl").
738
+ start: Start of period.
739
+ end: End of period.
740
+
741
+ Returns:
742
+ Exported records as string.
743
+ """
744
+ records = self.get_records(start=start, end=end)
745
+
746
+ if format == "jsonl":
747
+ return "\n".join(r.to_json() for r in records)
748
+ else:
749
+ return json.dumps([r.to_dict() for r in records], indent=2)
750
+
751
+ def clear_records(
752
+ self,
753
+ before: datetime | None = None,
754
+ user_id: str | None = None,
755
+ ) -> int:
756
+ """
757
+ Clear usage records.
758
+
759
+ Args:
760
+ before: Clear records before this time.
761
+ user_id: Clear only records for this user.
762
+
763
+ Returns:
764
+ Number of records cleared.
765
+ """
766
+ with self._lock:
767
+ original_count = len(self._records)
768
+
769
+ if before is None and user_id is None:
770
+ self._records.clear()
771
+ self._user_daily_spend.clear()
772
+ self._user_hourly_spend.clear()
773
+ self._org_daily_spend.clear()
774
+ self._org_monthly_spend.clear()
775
+ return original_count
776
+
777
+ self._records = [
778
+ r for r in self._records
779
+ if not (
780
+ (before is None or r.timestamp >= before) and
781
+ (user_id is None or r.user_id == user_id)
782
+ )
783
+ ]
784
+
785
+ return original_count - len(self._records)
786
+
787
+ def set_budget_policy(self, policy: BudgetPolicy | None) -> None:
788
+ """
789
+ Set or update the budget policy.
790
+
791
+ Args:
792
+ policy: New budget policy, or None to disable.
793
+ """
794
+ self._budget_policy = policy
795
+
796
+ def get_budget_policy(self) -> BudgetPolicy | None:
797
+ """Get the current budget policy."""
798
+ return self._budget_policy
799
+
800
+ def get_budget_status(self, user_id: str) -> dict[str, Any]:
801
+ """
802
+ Get current budget status for a user.
803
+
804
+ Args:
805
+ user_id: User to check.
806
+
807
+ Returns:
808
+ Dictionary with budget status information.
809
+ """
810
+ if self._budget_policy is None:
811
+ return {"policy_active": False}
812
+
813
+ policy = self._budget_policy
814
+ now = datetime.now(timezone.utc)
815
+
816
+ status: dict[str, Any] = {"policy_active": True}
817
+
818
+ if policy.max_cost_per_user_per_day is not None:
819
+ daily_spend = self.get_user_spend(user_id, timedelta(days=1))
820
+ status["daily"] = {
821
+ "spent": daily_spend,
822
+ "limit": policy.max_cost_per_user_per_day,
823
+ "remaining": max(0, policy.max_cost_per_user_per_day - daily_spend),
824
+ "percentage": daily_spend / policy.max_cost_per_user_per_day,
825
+ }
826
+
827
+ if policy.max_cost_per_user_per_hour is not None:
828
+ hourly_spend = self.get_user_spend(user_id, timedelta(hours=1))
829
+ status["hourly"] = {
830
+ "spent": hourly_spend,
831
+ "limit": policy.max_cost_per_user_per_hour,
832
+ "remaining": max(0, policy.max_cost_per_user_per_hour - hourly_spend),
833
+ "percentage": hourly_spend / policy.max_cost_per_user_per_hour,
834
+ }
835
+
836
+ if policy.max_org_cost_per_day is not None:
837
+ day_key = now.strftime("%Y-%m-%d")
838
+ org_daily = self._org_daily_spend.get(day_key, 0.0)
839
+ status["org_daily"] = {
840
+ "spent": org_daily,
841
+ "limit": policy.max_org_cost_per_day,
842
+ "remaining": max(0, policy.max_org_cost_per_day - org_daily),
843
+ "percentage": org_daily / policy.max_org_cost_per_day,
844
+ }
845
+
846
+ return status
847
+
848
+
849
+ def create_cost_tracker(
850
+ budget_policy: BudgetPolicy | None = None,
851
+ custom_pricing: dict[str, ModelPricing] | None = None,
852
+ ) -> CostTracker:
853
+ """
854
+ Factory function to create a CostTracker.
855
+
856
+ Args:
857
+ budget_policy: Optional budget limits.
858
+ custom_pricing: Additional model pricing.
859
+
860
+ Returns:
861
+ Configured CostTracker instance.
862
+ """
863
+ return CostTracker(
864
+ pricing=custom_pricing,
865
+ budget_policy=budget_policy,
866
+ )