llm-cost-guard 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. llm_cost_guard/__init__.py +39 -0
  2. llm_cost_guard/backends/__init__.py +52 -0
  3. llm_cost_guard/backends/base.py +121 -0
  4. llm_cost_guard/backends/memory.py +265 -0
  5. llm_cost_guard/backends/sqlite.py +425 -0
  6. llm_cost_guard/budget.py +306 -0
  7. llm_cost_guard/cli.py +464 -0
  8. llm_cost_guard/clients/__init__.py +11 -0
  9. llm_cost_guard/clients/anthropic.py +231 -0
  10. llm_cost_guard/clients/openai.py +262 -0
  11. llm_cost_guard/exceptions.py +71 -0
  12. llm_cost_guard/integrations/__init__.py +12 -0
  13. llm_cost_guard/integrations/cache.py +189 -0
  14. llm_cost_guard/integrations/langchain.py +257 -0
  15. llm_cost_guard/models.py +123 -0
  16. llm_cost_guard/pricing/__init__.py +7 -0
  17. llm_cost_guard/pricing/anthropic.yaml +88 -0
  18. llm_cost_guard/pricing/bedrock.yaml +215 -0
  19. llm_cost_guard/pricing/loader.py +221 -0
  20. llm_cost_guard/pricing/openai.yaml +148 -0
  21. llm_cost_guard/pricing/vertex.yaml +133 -0
  22. llm_cost_guard/providers/__init__.py +69 -0
  23. llm_cost_guard/providers/anthropic.py +115 -0
  24. llm_cost_guard/providers/base.py +72 -0
  25. llm_cost_guard/providers/bedrock.py +135 -0
  26. llm_cost_guard/providers/openai.py +110 -0
  27. llm_cost_guard/rate_limit.py +233 -0
  28. llm_cost_guard/span.py +143 -0
  29. llm_cost_guard/tokenizers/__init__.py +7 -0
  30. llm_cost_guard/tokenizers/base.py +207 -0
  31. llm_cost_guard/tracker.py +718 -0
  32. llm_cost_guard-0.1.0.dist-info/METADATA +357 -0
  33. llm_cost_guard-0.1.0.dist-info/RECORD +36 -0
  34. llm_cost_guard-0.1.0.dist-info/WHEEL +4 -0
  35. llm_cost_guard-0.1.0.dist-info/entry_points.txt +2 -0
  36. llm_cost_guard-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,221 @@
1
+ """
2
+ Pricing data loader for LLM Cost Guard.
3
+ """
4
+
5
+ import os
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Any, Dict, Optional
9
+ import logging
10
+ import yaml
11
+
12
+ from llm_cost_guard.models import ModelPricing, ModelType
13
+ from llm_cost_guard.exceptions import PricingNotFoundError
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Default pricing data directory
18
+ PRICING_DIR = Path(__file__).parent
19
+
20
+
21
+ class PricingLoader:
22
+ """Loads and manages pricing data for LLM providers."""
23
+
24
+ def __init__(
25
+ self,
26
+ pricing_overrides: Optional[Dict[str, Dict[str, Any]]] = None,
27
+ pricing_stale_warning_days: int = 7,
28
+ pricing_stale_error_days: int = 30,
29
+ bedrock_region: str = "us-east-1",
30
+ ):
31
+ self._pricing_data: Dict[str, Dict[str, ModelPricing]] = {}
32
+ self._pricing_versions: Dict[str, str] = {}
33
+ self._pricing_overrides = pricing_overrides or {}
34
+ self._stale_warning_days = pricing_stale_warning_days
35
+ self._stale_error_days = pricing_stale_error_days
36
+ self._bedrock_region = bedrock_region
37
+ self._last_loaded: Optional[datetime] = None
38
+
39
+ self._load_all_pricing()
40
+
41
+ def _load_all_pricing(self) -> None:
42
+ """Load pricing from all YAML files."""
43
+ for yaml_file in PRICING_DIR.glob("*.yaml"):
44
+ provider = yaml_file.stem
45
+ self._load_provider_pricing(provider, yaml_file)
46
+
47
+ self._last_loaded = datetime.now()
48
+
49
+ def _load_provider_pricing(self, provider: str, yaml_path: Path) -> None:
50
+ """Load pricing for a specific provider."""
51
+ try:
52
+ with open(yaml_path, "r") as f:
53
+ data = yaml.safe_load(f)
54
+
55
+ if not data:
56
+ return
57
+
58
+ self._pricing_versions[provider] = data.get("version", "unknown")
59
+ self._pricing_data[provider] = {}
60
+
61
+ models = data.get("models", {})
62
+ for model_name, model_data in models.items():
63
+ model_type_str = model_data.get("model_type", "chat")
64
+ model_type = ModelType(model_type_str) if model_type_str else ModelType.CHAT
65
+
66
+ pricing = ModelPricing(
67
+ input_cost_per_1k=model_data.get("input_cost_per_1k", 0.0),
68
+ output_cost_per_1k=model_data.get("output_cost_per_1k", 0.0),
69
+ cached_input_cost_per_1k=model_data.get("cached_input_cost_per_1k"),
70
+ context_window=model_data.get("context_window", 128000),
71
+ model_type=model_type,
72
+ image_cost_per_image=model_data.get("image_cost_per_image"),
73
+ audio_cost_per_minute=model_data.get("audio_cost_per_minute"),
74
+ embedding_dimensions=model_data.get("embedding_dimensions"),
75
+ )
76
+ self._pricing_data[provider][model_name] = pricing
77
+
78
+ except Exception as e:
79
+ logger.warning(f"Failed to load pricing for {provider}: {e}")
80
+
81
+ def get_pricing(self, provider: str, model: str) -> ModelPricing:
82
+ """Get pricing for a specific model."""
83
+ # Normalize provider and model names
84
+ provider = provider.lower()
85
+ model_lower = model.lower()
86
+
87
+ # Check overrides first
88
+ override_key = f"{provider}/{model}"
89
+ if override_key in self._pricing_overrides:
90
+ override = self._pricing_overrides[override_key]
91
+ return ModelPricing(
92
+ input_cost_per_1k=override.get("input_cost_per_1k", 0.0),
93
+ output_cost_per_1k=override.get("output_cost_per_1k", 0.0),
94
+ cached_input_cost_per_1k=override.get("cached_input_cost_per_1k"),
95
+ context_window=override.get("context_window", 128000),
96
+ )
97
+
98
+ # Check loaded pricing
99
+ if provider in self._pricing_data:
100
+ provider_pricing = self._pricing_data[provider]
101
+
102
+ # Try exact match
103
+ if model in provider_pricing:
104
+ return provider_pricing[model]
105
+
106
+ # Try lowercase match
107
+ if model_lower in provider_pricing:
108
+ return provider_pricing[model_lower]
109
+
110
+ # Try prefix match (for versioned models like gpt-4-0613)
111
+ for known_model in provider_pricing:
112
+ if model_lower.startswith(known_model) or known_model.startswith(model_lower):
113
+ return provider_pricing[known_model]
114
+
115
+ raise PricingNotFoundError(
116
+ f"Pricing not found for {provider}/{model}", provider=provider, model=model
117
+ )
118
+
119
+ def calculate_cost(
120
+ self,
121
+ provider: str,
122
+ model: str,
123
+ input_tokens: int,
124
+ output_tokens: int,
125
+ cached_tokens: int = 0,
126
+ ) -> tuple[float, float, float]:
127
+ """
128
+ Calculate cost for a call.
129
+ Returns (input_cost, output_cost, total_cost).
130
+ """
131
+ pricing = self.get_pricing(provider, model)
132
+
133
+ # Calculate input cost (considering cache)
134
+ regular_input_tokens = input_tokens - cached_tokens
135
+ input_cost = (regular_input_tokens / 1000) * pricing.input_cost_per_1k
136
+
137
+ # Add cached token cost if applicable
138
+ if cached_tokens > 0 and pricing.cached_input_cost_per_1k is not None:
139
+ input_cost += (cached_tokens / 1000) * pricing.cached_input_cost_per_1k
140
+
141
+ # Calculate output cost
142
+ output_cost = (output_tokens / 1000) * pricing.output_cost_per_1k
143
+
144
+ total_cost = input_cost + output_cost
145
+
146
+ return input_cost, output_cost, total_cost
147
+
148
+ def estimate_cost(
149
+ self,
150
+ provider: str,
151
+ model: str,
152
+ input_tokens: int,
153
+ max_output_tokens: int = 4096,
154
+ ) -> float:
155
+ """Estimate maximum cost for a call (for budget reservation)."""
156
+ pricing = self.get_pricing(provider, model)
157
+
158
+ input_cost = (input_tokens / 1000) * pricing.input_cost_per_1k
159
+ output_cost = (max_output_tokens / 1000) * pricing.output_cost_per_1k
160
+
161
+ return input_cost + output_cost
162
+
163
+ @property
164
+ def last_updated(self) -> Optional[datetime]:
165
+ """Get when pricing was last loaded."""
166
+ return self._last_loaded
167
+
168
+ @property
169
+ def pricing_version(self) -> Dict[str, str]:
170
+ """Get pricing versions for all providers."""
171
+ return dict(self._pricing_versions)
172
+
173
+ @property
174
+ def is_stale(self) -> bool:
175
+ """Check if pricing data is stale (beyond warning threshold)."""
176
+ if self._last_loaded is None:
177
+ return True
178
+
179
+ age_days = (datetime.now() - self._last_loaded).days
180
+ return age_days >= self._stale_warning_days
181
+
182
+ @property
183
+ def is_very_stale(self) -> bool:
184
+ """Check if pricing data is very stale (beyond error threshold)."""
185
+ if self._last_loaded is None:
186
+ return True
187
+
188
+ age_days = (datetime.now() - self._last_loaded).days
189
+ return age_days >= self._stale_error_days
190
+
191
+ def get_all_models(self, provider: Optional[str] = None) -> Dict[str, list[str]]:
192
+ """Get all known models, optionally filtered by provider."""
193
+ if provider:
194
+ return {provider: list(self._pricing_data.get(provider, {}).keys())}
195
+ return {p: list(models.keys()) for p, models in self._pricing_data.items()}
196
+
197
+ def refresh(self) -> None:
198
+ """Reload pricing data from files."""
199
+ self._pricing_data.clear()
200
+ self._pricing_versions.clear()
201
+ self._load_all_pricing()
202
+
203
+
204
+ # Global pricing loader instance
205
+ _global_loader: Optional[PricingLoader] = None
206
+
207
+
208
+ def get_pricing(provider: str, model: str) -> ModelPricing:
209
+ """Get pricing for a model using the global loader."""
210
+ global _global_loader
211
+ if _global_loader is None:
212
+ _global_loader = PricingLoader()
213
+ return _global_loader.get_pricing(provider, model)
214
+
215
+
216
+ def get_pricing_loader() -> PricingLoader:
217
+ """Get the global pricing loader instance."""
218
+ global _global_loader
219
+ if _global_loader is None:
220
+ _global_loader = PricingLoader()
221
+ return _global_loader
@@ -0,0 +1,148 @@
1
+ version: "2026-01-15"
2
+ models:
3
+ # GPT-4o models
4
+ gpt-4o:
5
+ input_cost_per_1k: 0.0025
6
+ output_cost_per_1k: 0.01
7
+ cached_input_cost_per_1k: 0.00125
8
+ context_window: 128000
9
+ model_type: chat
10
+
11
+ gpt-4o-2024-11-20:
12
+ input_cost_per_1k: 0.0025
13
+ output_cost_per_1k: 0.01
14
+ cached_input_cost_per_1k: 0.00125
15
+ context_window: 128000
16
+ model_type: chat
17
+
18
+ gpt-4o-mini:
19
+ input_cost_per_1k: 0.00015
20
+ output_cost_per_1k: 0.0006
21
+ cached_input_cost_per_1k: 0.000075
22
+ context_window: 128000
23
+ model_type: chat
24
+
25
+ gpt-4o-mini-2024-07-18:
26
+ input_cost_per_1k: 0.00015
27
+ output_cost_per_1k: 0.0006
28
+ cached_input_cost_per_1k: 0.000075
29
+ context_window: 128000
30
+ model_type: chat
31
+
32
+ # GPT-4 Turbo
33
+ gpt-4-turbo:
34
+ input_cost_per_1k: 0.01
35
+ output_cost_per_1k: 0.03
36
+ context_window: 128000
37
+ model_type: chat
38
+
39
+ gpt-4-turbo-preview:
40
+ input_cost_per_1k: 0.01
41
+ output_cost_per_1k: 0.03
42
+ context_window: 128000
43
+ model_type: chat
44
+
45
+ # GPT-4
46
+ gpt-4:
47
+ input_cost_per_1k: 0.03
48
+ output_cost_per_1k: 0.06
49
+ context_window: 8192
50
+ model_type: chat
51
+
52
+ gpt-4-32k:
53
+ input_cost_per_1k: 0.06
54
+ output_cost_per_1k: 0.12
55
+ context_window: 32768
56
+ model_type: chat
57
+
58
+ # GPT-3.5 Turbo
59
+ gpt-3.5-turbo:
60
+ input_cost_per_1k: 0.0005
61
+ output_cost_per_1k: 0.0015
62
+ context_window: 16385
63
+ model_type: chat
64
+
65
+ gpt-3.5-turbo-0125:
66
+ input_cost_per_1k: 0.0005
67
+ output_cost_per_1k: 0.0015
68
+ context_window: 16385
69
+ model_type: chat
70
+
71
+ gpt-3.5-turbo-instruct:
72
+ input_cost_per_1k: 0.0015
73
+ output_cost_per_1k: 0.002
74
+ context_window: 4096
75
+ model_type: completion
76
+
77
+ # o1 reasoning models
78
+ o1:
79
+ input_cost_per_1k: 0.015
80
+ output_cost_per_1k: 0.06
81
+ cached_input_cost_per_1k: 0.0075
82
+ context_window: 200000
83
+ model_type: chat
84
+
85
+ o1-preview:
86
+ input_cost_per_1k: 0.015
87
+ output_cost_per_1k: 0.06
88
+ context_window: 128000
89
+ model_type: chat
90
+
91
+ o1-mini:
92
+ input_cost_per_1k: 0.003
93
+ output_cost_per_1k: 0.012
94
+ cached_input_cost_per_1k: 0.0015
95
+ context_window: 128000
96
+ model_type: chat
97
+
98
+ # Embedding models
99
+ text-embedding-3-small:
100
+ input_cost_per_1k: 0.00002
101
+ output_cost_per_1k: 0.0
102
+ context_window: 8191
103
+ model_type: embedding
104
+ embedding_dimensions: 1536
105
+
106
+ text-embedding-3-large:
107
+ input_cost_per_1k: 0.00013
108
+ output_cost_per_1k: 0.0
109
+ context_window: 8191
110
+ model_type: embedding
111
+ embedding_dimensions: 3072
112
+
113
+ text-embedding-ada-002:
114
+ input_cost_per_1k: 0.0001
115
+ output_cost_per_1k: 0.0
116
+ context_window: 8191
117
+ model_type: embedding
118
+ embedding_dimensions: 1536
119
+
120
+ # Image models (DALL-E)
121
+ dall-e-3:
122
+ input_cost_per_1k: 0.0
123
+ output_cost_per_1k: 0.0
124
+ image_cost_per_image: 0.04 # standard quality 1024x1024
125
+ model_type: image
126
+
127
+ dall-e-2:
128
+ input_cost_per_1k: 0.0
129
+ output_cost_per_1k: 0.0
130
+ image_cost_per_image: 0.02
131
+ model_type: image
132
+
133
+ # Audio models
134
+ whisper-1:
135
+ input_cost_per_1k: 0.0
136
+ output_cost_per_1k: 0.0
137
+ audio_cost_per_minute: 0.006
138
+ model_type: audio
139
+
140
+ tts-1:
141
+ input_cost_per_1k: 0.015
142
+ output_cost_per_1k: 0.0
143
+ model_type: audio
144
+
145
+ tts-1-hd:
146
+ input_cost_per_1k: 0.03
147
+ output_cost_per_1k: 0.0
148
+ model_type: audio
@@ -0,0 +1,133 @@
1
+ version: "2026-01-15"
2
+ # Google Vertex AI / Gemini pricing
3
+ models:
4
+ # Gemini 1.5 Pro
5
+ gemini-1.5-pro:
6
+ input_cost_per_1k: 0.00125
7
+ output_cost_per_1k: 0.005
8
+ context_window: 2000000
9
+ model_type: chat
10
+
11
+ gemini-1.5-pro-001:
12
+ input_cost_per_1k: 0.00125
13
+ output_cost_per_1k: 0.005
14
+ context_window: 2000000
15
+ model_type: chat
16
+
17
+ gemini-1.5-pro-002:
18
+ input_cost_per_1k: 0.00125
19
+ output_cost_per_1k: 0.005
20
+ context_window: 2000000
21
+ model_type: chat
22
+
23
+ # Gemini 1.5 Flash
24
+ gemini-1.5-flash:
25
+ input_cost_per_1k: 0.000075
26
+ output_cost_per_1k: 0.0003
27
+ context_window: 1000000
28
+ model_type: chat
29
+
30
+ gemini-1.5-flash-001:
31
+ input_cost_per_1k: 0.000075
32
+ output_cost_per_1k: 0.0003
33
+ context_window: 1000000
34
+ model_type: chat
35
+
36
+ gemini-1.5-flash-002:
37
+ input_cost_per_1k: 0.000075
38
+ output_cost_per_1k: 0.0003
39
+ context_window: 1000000
40
+ model_type: chat
41
+
42
+ # Gemini 1.0 Pro
43
+ gemini-1.0-pro:
44
+ input_cost_per_1k: 0.0005
45
+ output_cost_per_1k: 0.0015
46
+ context_window: 32760
47
+ model_type: chat
48
+
49
+ gemini-1.0-pro-001:
50
+ input_cost_per_1k: 0.0005
51
+ output_cost_per_1k: 0.0015
52
+ context_window: 32760
53
+ model_type: chat
54
+
55
+ gemini-1.0-pro-002:
56
+ input_cost_per_1k: 0.0005
57
+ output_cost_per_1k: 0.0015
58
+ context_window: 32760
59
+ model_type: chat
60
+
61
+ # Gemini 2.0 Flash
62
+ gemini-2.0-flash-exp:
63
+ input_cost_per_1k: 0.0
64
+ output_cost_per_1k: 0.0
65
+ context_window: 1000000
66
+ model_type: chat
67
+
68
+ # PaLM 2 (legacy)
69
+ text-bison:
70
+ input_cost_per_1k: 0.00025
71
+ output_cost_per_1k: 0.0005
72
+ context_window: 8192
73
+ model_type: chat
74
+
75
+ text-bison-32k:
76
+ input_cost_per_1k: 0.00025
77
+ output_cost_per_1k: 0.0005
78
+ context_window: 32000
79
+ model_type: chat
80
+
81
+ chat-bison:
82
+ input_cost_per_1k: 0.00025
83
+ output_cost_per_1k: 0.0005
84
+ context_window: 8192
85
+ model_type: chat
86
+
87
+ chat-bison-32k:
88
+ input_cost_per_1k: 0.00025
89
+ output_cost_per_1k: 0.0005
90
+ context_window: 32000
91
+ model_type: chat
92
+
93
+ # Embeddings
94
+ textembedding-gecko:
95
+ input_cost_per_1k: 0.00001
96
+ output_cost_per_1k: 0.0
97
+ context_window: 3072
98
+ model_type: embedding
99
+ embedding_dimensions: 768
100
+
101
+ textembedding-gecko-multilingual:
102
+ input_cost_per_1k: 0.00001
103
+ output_cost_per_1k: 0.0
104
+ context_window: 3072
105
+ model_type: embedding
106
+ embedding_dimensions: 768
107
+
108
+ text-embedding-004:
109
+ input_cost_per_1k: 0.00001
110
+ output_cost_per_1k: 0.0
111
+ context_window: 2048
112
+ model_type: embedding
113
+ embedding_dimensions: 768
114
+
115
+ text-multilingual-embedding-002:
116
+ input_cost_per_1k: 0.00001
117
+ output_cost_per_1k: 0.0
118
+ context_window: 2048
119
+ model_type: embedding
120
+ embedding_dimensions: 768
121
+
122
+ # Image models
123
+ imagen-3.0-generate-001:
124
+ input_cost_per_1k: 0.0
125
+ output_cost_per_1k: 0.0
126
+ image_cost_per_image: 0.04
127
+ model_type: image
128
+
129
+ imagen-3.0-fast-generate-001:
130
+ input_cost_per_1k: 0.0
131
+ output_cost_per_1k: 0.0
132
+ image_cost_per_image: 0.02
133
+ model_type: image
@@ -0,0 +1,69 @@
1
+ """
2
+ LLM provider integrations for LLM Cost Guard.
3
+ """
4
+
5
+ from llm_cost_guard.providers.base import Provider
6
+ from llm_cost_guard.providers.openai import OpenAIProvider
7
+ from llm_cost_guard.providers.anthropic import AnthropicProvider
8
+ from llm_cost_guard.providers.bedrock import BedrockProvider
9
+
10
+ __all__ = [
11
+ "Provider",
12
+ "OpenAIProvider",
13
+ "AnthropicProvider",
14
+ "BedrockProvider",
15
+ "get_provider",
16
+ "detect_provider",
17
+ ]
18
+
19
+
20
+ def get_provider(name: str) -> Provider:
21
+ """Get a provider by name."""
22
+ providers = {
23
+ "openai": OpenAIProvider,
24
+ "anthropic": AnthropicProvider,
25
+ "bedrock": BedrockProvider,
26
+ }
27
+
28
+ name = name.lower()
29
+ if name not in providers:
30
+ raise ValueError(f"Unknown provider: {name}. Available: {list(providers.keys())}")
31
+
32
+ return providers[name]()
33
+
34
+
35
+ def detect_provider(model: str) -> str:
36
+ """Detect the provider from a model name."""
37
+ model_lower = model.lower()
38
+
39
+ # OpenAI models
40
+ if any(
41
+ prefix in model_lower
42
+ for prefix in ["gpt-", "o1", "text-embedding", "dall-e", "whisper", "tts-"]
43
+ ):
44
+ return "openai"
45
+
46
+ # Anthropic models
47
+ if "claude" in model_lower and not model_lower.startswith("anthropic."):
48
+ return "anthropic"
49
+
50
+ # AWS Bedrock models (have provider prefix)
51
+ if any(
52
+ model_lower.startswith(prefix)
53
+ for prefix in [
54
+ "anthropic.",
55
+ "amazon.",
56
+ "meta.",
57
+ "mistral.",
58
+ "cohere.",
59
+ "ai21.",
60
+ ]
61
+ ):
62
+ return "bedrock"
63
+
64
+ # Google Vertex AI
65
+ if any(prefix in model_lower for prefix in ["gemini", "palm", "text-bison", "chat-bison"]):
66
+ return "vertex"
67
+
68
+ # Default to OpenAI
69
+ return "openai"
@@ -0,0 +1,115 @@
1
+ """
2
+ Anthropic provider for LLM Cost Guard.
3
+ """
4
+
5
+ from typing import Any
6
+
7
+ from llm_cost_guard.models import UsageData
8
+ from llm_cost_guard.providers.base import Provider
9
+
10
+
11
+ class AnthropicProvider(Provider):
12
+ """Anthropic API provider."""
13
+
14
+ @property
15
+ def name(self) -> str:
16
+ return "anthropic"
17
+
18
+ def extract_usage(self, response: Any) -> UsageData:
19
+ """Extract token usage from an Anthropic API response."""
20
+ usage = UsageData()
21
+
22
+ # Handle dictionary response
23
+ if isinstance(response, dict):
24
+ usage_data = response.get("usage", {})
25
+ usage.input_tokens = usage_data.get("input_tokens", 0)
26
+ usage.output_tokens = usage_data.get("output_tokens", 0)
27
+
28
+ # Check for cached tokens
29
+ usage.cached_tokens = usage_data.get("cache_read_input_tokens", 0)
30
+ cache_creation = usage_data.get("cache_creation_input_tokens", 0)
31
+
32
+ # Total tokens
33
+ usage.total_tokens = usage.input_tokens + usage.output_tokens
34
+
35
+ return usage
36
+
37
+ # Handle Anthropic client response object
38
+ if hasattr(response, "usage") and response.usage is not None:
39
+ usage.input_tokens = getattr(response.usage, "input_tokens", 0) or 0
40
+ usage.output_tokens = getattr(response.usage, "output_tokens", 0) or 0
41
+
42
+ # Anthropic prompt caching
43
+ if hasattr(response.usage, "cache_read_input_tokens"):
44
+ usage.cached_tokens = response.usage.cache_read_input_tokens or 0
45
+
46
+ usage.total_tokens = usage.input_tokens + usage.output_tokens
47
+
48
+ return usage
49
+
50
+ def extract_model(self, response: Any) -> str:
51
+ """Extract the model name from an Anthropic API response."""
52
+ if isinstance(response, dict):
53
+ return response.get("model", "unknown")
54
+
55
+ if hasattr(response, "model"):
56
+ return response.model or "unknown"
57
+
58
+ return "unknown"
59
+
60
+ def extract_cached_tokens(self, response: Any) -> int:
61
+ """Extract cached token count from an Anthropic API response."""
62
+ usage = self.extract_usage(response)
63
+ return usage.cached_tokens
64
+
65
+ def normalize_model_name(self, model: str) -> str:
66
+ """Normalize Anthropic model name."""
67
+ # Anthropic model names are usually already normalized
68
+ return model
69
+
70
+
71
+ class AnthropicStreamingHandler:
72
+ """Handler for streaming Anthropic responses."""
73
+
74
+ def __init__(self):
75
+ self.input_tokens = 0
76
+ self.output_tokens = 0
77
+ self.model = "unknown"
78
+ self._started = False
79
+
80
+ def handle_event(self, event: Any) -> None:
81
+ """Process a streaming event."""
82
+ if isinstance(event, dict):
83
+ event_type = event.get("type", "")
84
+
85
+ if event_type == "message_start":
86
+ message = event.get("message", {})
87
+ self.model = message.get("model", self.model)
88
+ usage = message.get("usage", {})
89
+ self.input_tokens = usage.get("input_tokens", 0)
90
+
91
+ elif event_type == "message_delta":
92
+ usage = event.get("usage", {})
93
+ self.output_tokens = usage.get("output_tokens", 0)
94
+
95
+ else:
96
+ # Handle event objects
97
+ event_type = getattr(event, "type", "")
98
+
99
+ if event_type == "message_start":
100
+ if hasattr(event, "message"):
101
+ self.model = getattr(event.message, "model", self.model)
102
+ if hasattr(event.message, "usage"):
103
+ self.input_tokens = getattr(event.message.usage, "input_tokens", 0)
104
+
105
+ elif event_type == "message_delta":
106
+ if hasattr(event, "usage"):
107
+ self.output_tokens = getattr(event.usage, "output_tokens", 0)
108
+
109
+ def get_usage(self) -> UsageData:
110
+ """Get final usage data."""
111
+ return UsageData(
112
+ input_tokens=self.input_tokens,
113
+ output_tokens=self.output_tokens,
114
+ total_tokens=self.input_tokens + self.output_tokens,
115
+ )