llm-cost-guard 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_cost_guard/__init__.py +39 -0
- llm_cost_guard/backends/__init__.py +52 -0
- llm_cost_guard/backends/base.py +121 -0
- llm_cost_guard/backends/memory.py +265 -0
- llm_cost_guard/backends/sqlite.py +425 -0
- llm_cost_guard/budget.py +306 -0
- llm_cost_guard/cli.py +464 -0
- llm_cost_guard/clients/__init__.py +11 -0
- llm_cost_guard/clients/anthropic.py +231 -0
- llm_cost_guard/clients/openai.py +262 -0
- llm_cost_guard/exceptions.py +71 -0
- llm_cost_guard/integrations/__init__.py +12 -0
- llm_cost_guard/integrations/cache.py +189 -0
- llm_cost_guard/integrations/langchain.py +257 -0
- llm_cost_guard/models.py +123 -0
- llm_cost_guard/pricing/__init__.py +7 -0
- llm_cost_guard/pricing/anthropic.yaml +88 -0
- llm_cost_guard/pricing/bedrock.yaml +215 -0
- llm_cost_guard/pricing/loader.py +221 -0
- llm_cost_guard/pricing/openai.yaml +148 -0
- llm_cost_guard/pricing/vertex.yaml +133 -0
- llm_cost_guard/providers/__init__.py +69 -0
- llm_cost_guard/providers/anthropic.py +115 -0
- llm_cost_guard/providers/base.py +72 -0
- llm_cost_guard/providers/bedrock.py +135 -0
- llm_cost_guard/providers/openai.py +110 -0
- llm_cost_guard/rate_limit.py +233 -0
- llm_cost_guard/span.py +143 -0
- llm_cost_guard/tokenizers/__init__.py +7 -0
- llm_cost_guard/tokenizers/base.py +207 -0
- llm_cost_guard/tracker.py +718 -0
- llm_cost_guard-0.1.0.dist-info/METADATA +357 -0
- llm_cost_guard-0.1.0.dist-info/RECORD +36 -0
- llm_cost_guard-0.1.0.dist-info/WHEEL +4 -0
- llm_cost_guard-0.1.0.dist-info/entry_points.txt +2 -0
- llm_cost_guard-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Token counting utilities for LLM Cost Guard.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TokenCounter:
|
|
12
|
+
"""Token counter with support for multiple providers."""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self._tiktoken_encodings: Dict[str, Any] = {}
|
|
16
|
+
self._tiktoken_available = False
|
|
17
|
+
self._check_tiktoken()
|
|
18
|
+
|
|
19
|
+
def _check_tiktoken(self) -> None:
|
|
20
|
+
"""Check if tiktoken is available."""
|
|
21
|
+
try:
|
|
22
|
+
import tiktoken
|
|
23
|
+
|
|
24
|
+
self._tiktoken_available = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
self._tiktoken_available = False
|
|
27
|
+
logger.warning("tiktoken not available, using estimation for OpenAI models")
|
|
28
|
+
|
|
29
|
+
def _get_tiktoken_encoding(self, model: str) -> Any:
|
|
30
|
+
"""Get or create a tiktoken encoding for a model."""
|
|
31
|
+
if not self._tiktoken_available:
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
if model in self._tiktoken_encodings:
|
|
35
|
+
return self._tiktoken_encodings[model]
|
|
36
|
+
|
|
37
|
+
import tiktoken
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# Try to get encoding for specific model
|
|
41
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
42
|
+
except KeyError:
|
|
43
|
+
# Fall back to cl100k_base for GPT-4 and newer models
|
|
44
|
+
try:
|
|
45
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
|
46
|
+
except Exception:
|
|
47
|
+
encoding = tiktoken.get_encoding("gpt2")
|
|
48
|
+
|
|
49
|
+
self._tiktoken_encodings[model] = encoding
|
|
50
|
+
return encoding
|
|
51
|
+
|
|
52
|
+
def count_tokens(
|
|
53
|
+
self,
|
|
54
|
+
text: Union[str, List[Dict[str, str]]],
|
|
55
|
+
model: str,
|
|
56
|
+
provider: str = "openai",
|
|
57
|
+
) -> int:
|
|
58
|
+
"""
|
|
59
|
+
Count tokens in text or messages.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
text: Either a string or a list of message dicts
|
|
63
|
+
model: Model name
|
|
64
|
+
provider: Provider name (openai, anthropic, etc.)
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Number of tokens
|
|
68
|
+
"""
|
|
69
|
+
provider = provider.lower()
|
|
70
|
+
|
|
71
|
+
if isinstance(text, list):
|
|
72
|
+
# It's a list of messages
|
|
73
|
+
return self._count_message_tokens(text, model, provider)
|
|
74
|
+
|
|
75
|
+
# It's a string
|
|
76
|
+
return self._count_string_tokens(text, model, provider)
|
|
77
|
+
|
|
78
|
+
def _count_string_tokens(self, text: str, model: str, provider: str) -> int:
|
|
79
|
+
"""Count tokens in a string."""
|
|
80
|
+
if provider == "openai" and self._tiktoken_available:
|
|
81
|
+
encoding = self._get_tiktoken_encoding(model)
|
|
82
|
+
if encoding:
|
|
83
|
+
return len(encoding.encode(text))
|
|
84
|
+
|
|
85
|
+
# Fallback estimation: ~4 characters per token
|
|
86
|
+
return self._estimate_tokens(text)
|
|
87
|
+
|
|
88
|
+
def _count_message_tokens(
|
|
89
|
+
self, messages: List[Dict[str, str]], model: str, provider: str
|
|
90
|
+
) -> int:
|
|
91
|
+
"""Count tokens in a list of messages."""
|
|
92
|
+
if provider == "openai" and self._tiktoken_available:
|
|
93
|
+
return self._count_openai_message_tokens(messages, model)
|
|
94
|
+
|
|
95
|
+
if provider == "anthropic":
|
|
96
|
+
return self._count_anthropic_message_tokens(messages)
|
|
97
|
+
|
|
98
|
+
# Fallback: count tokens in all message content
|
|
99
|
+
total = 0
|
|
100
|
+
for msg in messages:
|
|
101
|
+
content = msg.get("content", "")
|
|
102
|
+
if isinstance(content, str):
|
|
103
|
+
total += self._count_string_tokens(content, model, provider)
|
|
104
|
+
elif isinstance(content, list):
|
|
105
|
+
# Handle multi-modal content
|
|
106
|
+
for item in content:
|
|
107
|
+
if isinstance(item, dict) and "text" in item:
|
|
108
|
+
total += self._count_string_tokens(item["text"], model, provider)
|
|
109
|
+
return total
|
|
110
|
+
|
|
111
|
+
def _count_openai_message_tokens(
|
|
112
|
+
self, messages: List[Dict[str, str]], model: str
|
|
113
|
+
) -> int:
|
|
114
|
+
"""Count tokens for OpenAI chat messages."""
|
|
115
|
+
encoding = self._get_tiktoken_encoding(model)
|
|
116
|
+
if not encoding:
|
|
117
|
+
return self._estimate_message_tokens(messages)
|
|
118
|
+
|
|
119
|
+
# Token overhead per message (varies by model)
|
|
120
|
+
if model.startswith("gpt-4o") or model.startswith("gpt-4-"):
|
|
121
|
+
tokens_per_message = 3
|
|
122
|
+
tokens_per_name = 1
|
|
123
|
+
elif model.startswith("gpt-3.5"):
|
|
124
|
+
tokens_per_message = 4
|
|
125
|
+
tokens_per_name = -1
|
|
126
|
+
else:
|
|
127
|
+
tokens_per_message = 3
|
|
128
|
+
tokens_per_name = 1
|
|
129
|
+
|
|
130
|
+
num_tokens = 0
|
|
131
|
+
for message in messages:
|
|
132
|
+
num_tokens += tokens_per_message
|
|
133
|
+
for key, value in message.items():
|
|
134
|
+
if isinstance(value, str):
|
|
135
|
+
num_tokens += len(encoding.encode(value))
|
|
136
|
+
if key == "name":
|
|
137
|
+
num_tokens += tokens_per_name
|
|
138
|
+
|
|
139
|
+
num_tokens += 3 # Every reply is primed with <|start|>assistant<|message|>
|
|
140
|
+
return num_tokens
|
|
141
|
+
|
|
142
|
+
def _count_anthropic_message_tokens(self, messages: List[Dict[str, str]]) -> int:
|
|
143
|
+
"""Estimate tokens for Anthropic messages."""
|
|
144
|
+
# Anthropic doesn't provide a public tokenizer
|
|
145
|
+
# Use estimation based on character count
|
|
146
|
+
total = 0
|
|
147
|
+
for msg in messages:
|
|
148
|
+
content = msg.get("content", "")
|
|
149
|
+
if isinstance(content, str):
|
|
150
|
+
total += self._estimate_tokens(content)
|
|
151
|
+
elif isinstance(content, list):
|
|
152
|
+
for item in content:
|
|
153
|
+
if isinstance(item, dict) and "text" in item:
|
|
154
|
+
total += self._estimate_tokens(item["text"])
|
|
155
|
+
# Add overhead for role
|
|
156
|
+
total += 4
|
|
157
|
+
|
|
158
|
+
return total
|
|
159
|
+
|
|
160
|
+
def _estimate_tokens(self, text: str) -> int:
|
|
161
|
+
"""Estimate token count based on text length."""
|
|
162
|
+
if not text:
|
|
163
|
+
return 0
|
|
164
|
+
# Rough estimate: ~4 characters per token for English
|
|
165
|
+
# This is a common approximation used when tokenizers aren't available
|
|
166
|
+
return max(1, len(text) // 4)
|
|
167
|
+
|
|
168
|
+
def _estimate_message_tokens(self, messages: List[Dict[str, str]]) -> int:
|
|
169
|
+
"""Estimate tokens for a list of messages."""
|
|
170
|
+
total = 0
|
|
171
|
+
for msg in messages:
|
|
172
|
+
content = msg.get("content", "")
|
|
173
|
+
if isinstance(content, str):
|
|
174
|
+
total += self._estimate_tokens(content)
|
|
175
|
+
total += 4 # Overhead per message
|
|
176
|
+
return total
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# Global token counter instance
|
|
180
|
+
_global_counter: Optional[TokenCounter] = None
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def get_token_counter() -> TokenCounter:
|
|
184
|
+
"""Get the global token counter instance."""
|
|
185
|
+
global _global_counter
|
|
186
|
+
if _global_counter is None:
|
|
187
|
+
_global_counter = TokenCounter()
|
|
188
|
+
return _global_counter
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def count_tokens(
|
|
192
|
+
text: Union[str, List[Dict[str, str]]],
|
|
193
|
+
model: str,
|
|
194
|
+
provider: str = "openai",
|
|
195
|
+
) -> int:
|
|
196
|
+
"""
|
|
197
|
+
Count tokens in text or messages using the global counter.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
text: Either a string or a list of message dicts
|
|
201
|
+
model: Model name
|
|
202
|
+
provider: Provider name
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Number of tokens
|
|
206
|
+
"""
|
|
207
|
+
return get_token_counter().count_tokens(text, model, provider)
|