headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""LiteLLM provider for universal LLM support.
|
|
2
|
+
|
|
3
|
+
LiteLLM provides a unified interface to 100+ LLM providers:
|
|
4
|
+
- OpenAI, Azure OpenAI
|
|
5
|
+
- Anthropic
|
|
6
|
+
- Google (Vertex AI, AI Studio)
|
|
7
|
+
- AWS Bedrock
|
|
8
|
+
- Cohere
|
|
9
|
+
- Replicate
|
|
10
|
+
- Hugging Face
|
|
11
|
+
- Ollama
|
|
12
|
+
- Together AI
|
|
13
|
+
- Groq
|
|
14
|
+
- And many more...
|
|
15
|
+
|
|
16
|
+
This integration allows Headroom to work with any LiteLLM-supported
|
|
17
|
+
model without needing provider-specific implementations.
|
|
18
|
+
|
|
19
|
+
Requires: pip install litellm
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
from headroom.tokenizers import EstimatingTokenCounter
|
|
28
|
+
|
|
29
|
+
from .base import Provider, TokenCounter
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
# Check if litellm is available
|
|
34
|
+
try:
|
|
35
|
+
import litellm
|
|
36
|
+
from litellm import get_model_info as litellm_get_model_info
|
|
37
|
+
from litellm import model_cost as litellm_model_cost
|
|
38
|
+
from litellm import token_counter as litellm_token_counter
|
|
39
|
+
|
|
40
|
+
LITELLM_AVAILABLE = True
|
|
41
|
+
except ImportError:
|
|
42
|
+
LITELLM_AVAILABLE = False
|
|
43
|
+
litellm = None # type: ignore[assignment]
|
|
44
|
+
litellm_token_counter = None # type: ignore[assignment]
|
|
45
|
+
litellm_model_cost = None # type: ignore[assignment]
|
|
46
|
+
litellm_get_model_info = None # type: ignore[assignment]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def is_litellm_available() -> bool:
|
|
50
|
+
"""Check if LiteLLM is installed.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
True if litellm is available.
|
|
54
|
+
"""
|
|
55
|
+
return LITELLM_AVAILABLE
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class LiteLLMTokenCounter:
|
|
59
|
+
"""Token counter using LiteLLM's token counting.
|
|
60
|
+
|
|
61
|
+
LiteLLM provides accurate token counting for most providers
|
|
62
|
+
by using the appropriate tokenizer for each model.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, model: str):
|
|
66
|
+
"""Initialize LiteLLM token counter.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
model: Model name in LiteLLM format (e.g., 'gpt-4o', 'claude-3-sonnet').
|
|
70
|
+
"""
|
|
71
|
+
if not LITELLM_AVAILABLE:
|
|
72
|
+
raise RuntimeError(
|
|
73
|
+
"LiteLLM is required for LiteLLMProvider. Install with: pip install litellm"
|
|
74
|
+
)
|
|
75
|
+
self.model = model
|
|
76
|
+
# Fallback estimator for when litellm counting fails
|
|
77
|
+
self._fallback = EstimatingTokenCounter()
|
|
78
|
+
|
|
79
|
+
def count_text(self, text: str) -> int:
|
|
80
|
+
"""Count tokens in text using LiteLLM."""
|
|
81
|
+
if not text:
|
|
82
|
+
return 0
|
|
83
|
+
try:
|
|
84
|
+
# LiteLLM's token_counter expects messages format
|
|
85
|
+
# We wrap text in a simple message
|
|
86
|
+
return litellm_token_counter(
|
|
87
|
+
model=self.model,
|
|
88
|
+
messages=[{"role": "user", "content": text}],
|
|
89
|
+
)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
logger.debug(f"LiteLLM token count failed for {self.model}: {e}")
|
|
92
|
+
return self._fallback.count_text(text)
|
|
93
|
+
|
|
94
|
+
def count_message(self, message: dict[str, Any]) -> int:
|
|
95
|
+
"""Count tokens in a single message."""
|
|
96
|
+
try:
|
|
97
|
+
return litellm_token_counter(
|
|
98
|
+
model=self.model,
|
|
99
|
+
messages=[message],
|
|
100
|
+
)
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.debug(f"LiteLLM message count failed for {self.model}: {e}")
|
|
103
|
+
# Fallback to estimation
|
|
104
|
+
tokens = 4 # Base overhead
|
|
105
|
+
content = message.get("content", "")
|
|
106
|
+
if isinstance(content, str):
|
|
107
|
+
tokens += self._fallback.count_text(content)
|
|
108
|
+
return tokens
|
|
109
|
+
|
|
110
|
+
def count_messages(self, messages: list[dict[str, Any]]) -> int:
|
|
111
|
+
"""Count tokens in messages using LiteLLM."""
|
|
112
|
+
if not messages:
|
|
113
|
+
return 0
|
|
114
|
+
try:
|
|
115
|
+
return litellm_token_counter(
|
|
116
|
+
model=self.model,
|
|
117
|
+
messages=messages,
|
|
118
|
+
)
|
|
119
|
+
except Exception as e:
|
|
120
|
+
logger.debug(f"LiteLLM messages count failed for {self.model}: {e}")
|
|
121
|
+
# Fallback to estimation
|
|
122
|
+
total = sum(self.count_message(msg) for msg in messages)
|
|
123
|
+
total += 3 # Priming
|
|
124
|
+
return total
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class LiteLLMProvider(Provider):
|
|
128
|
+
"""Provider using LiteLLM for universal model support.
|
|
129
|
+
|
|
130
|
+
LiteLLM supports 100+ LLM providers with a unified interface.
|
|
131
|
+
This provider leverages LiteLLM's:
|
|
132
|
+
- Token counting (accurate for most providers)
|
|
133
|
+
- Model info (context limits, capabilities)
|
|
134
|
+
- Cost estimation (from LiteLLM's model database)
|
|
135
|
+
|
|
136
|
+
Example:
|
|
137
|
+
from headroom.providers import LiteLLMProvider
|
|
138
|
+
|
|
139
|
+
provider = LiteLLMProvider()
|
|
140
|
+
|
|
141
|
+
# Works with any LiteLLM-supported model
|
|
142
|
+
counter = provider.get_token_counter("gpt-4o")
|
|
143
|
+
counter = provider.get_token_counter("claude-3-5-sonnet-20241022")
|
|
144
|
+
counter = provider.get_token_counter("gemini/gemini-1.5-pro")
|
|
145
|
+
counter = provider.get_token_counter("bedrock/anthropic.claude-v2")
|
|
146
|
+
counter = provider.get_token_counter("ollama/llama3")
|
|
147
|
+
|
|
148
|
+
Model Format:
|
|
149
|
+
LiteLLM uses a provider/model format for some providers:
|
|
150
|
+
- OpenAI: "gpt-4o" or "openai/gpt-4o"
|
|
151
|
+
- Anthropic: "claude-3-sonnet" or "anthropic/claude-3-sonnet"
|
|
152
|
+
- Google: "gemini/gemini-1.5-pro"
|
|
153
|
+
- Azure: "azure/gpt-4"
|
|
154
|
+
- Bedrock: "bedrock/anthropic.claude-v2"
|
|
155
|
+
- Ollama: "ollama/llama3"
|
|
156
|
+
|
|
157
|
+
See LiteLLM docs for full model list:
|
|
158
|
+
https://docs.litellm.ai/docs/providers
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
def __init__(self):
|
|
162
|
+
"""Initialize LiteLLM provider."""
|
|
163
|
+
if not LITELLM_AVAILABLE:
|
|
164
|
+
raise RuntimeError(
|
|
165
|
+
"LiteLLM is required for LiteLLMProvider. Install with: pip install litellm"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def name(self) -> str:
|
|
170
|
+
return "litellm"
|
|
171
|
+
|
|
172
|
+
def supports_model(self, model: str) -> bool:
|
|
173
|
+
"""Check if LiteLLM supports this model.
|
|
174
|
+
|
|
175
|
+
LiteLLM supports most models, so this returns True
|
|
176
|
+
for any model. Actual support depends on credentials.
|
|
177
|
+
"""
|
|
178
|
+
return True # LiteLLM handles validation
|
|
179
|
+
|
|
180
|
+
def get_token_counter(self, model: str) -> TokenCounter:
|
|
181
|
+
"""Get token counter for a model."""
|
|
182
|
+
return LiteLLMTokenCounter(model)
|
|
183
|
+
|
|
184
|
+
def get_context_limit(self, model: str) -> int:
|
|
185
|
+
"""Get context limit using LiteLLM's model info."""
|
|
186
|
+
try:
|
|
187
|
+
if litellm_get_model_info is not None:
|
|
188
|
+
info = litellm_get_model_info(model)
|
|
189
|
+
if info and "max_input_tokens" in info:
|
|
190
|
+
result = info["max_input_tokens"]
|
|
191
|
+
return result if result is not None else 128000
|
|
192
|
+
if info and "max_tokens" in info:
|
|
193
|
+
result = info["max_tokens"]
|
|
194
|
+
return result if result is not None else 128000
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.debug(f"LiteLLM get_model_info failed for {model}: {e}")
|
|
197
|
+
|
|
198
|
+
# Fallback to reasonable default
|
|
199
|
+
return 128000
|
|
200
|
+
|
|
201
|
+
def get_output_buffer(self, model: str, default: int = 4000) -> int:
|
|
202
|
+
"""Get recommended output buffer."""
|
|
203
|
+
try:
|
|
204
|
+
if litellm_get_model_info is not None:
|
|
205
|
+
info = litellm_get_model_info(model)
|
|
206
|
+
if info and "max_output_tokens" in info:
|
|
207
|
+
max_output = info["max_output_tokens"]
|
|
208
|
+
if max_output is not None:
|
|
209
|
+
return min(max_output, default)
|
|
210
|
+
except Exception:
|
|
211
|
+
pass
|
|
212
|
+
return default
|
|
213
|
+
|
|
214
|
+
def estimate_cost(
|
|
215
|
+
self,
|
|
216
|
+
input_tokens: int,
|
|
217
|
+
output_tokens: int,
|
|
218
|
+
model: str,
|
|
219
|
+
cached_tokens: int = 0,
|
|
220
|
+
) -> float | None:
|
|
221
|
+
"""Estimate cost using LiteLLM's cost database.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
input_tokens: Number of input tokens.
|
|
225
|
+
output_tokens: Number of output tokens.
|
|
226
|
+
model: Model name.
|
|
227
|
+
cached_tokens: Cached tokens (may not be supported by all providers).
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Estimated cost in USD, or None if pricing unknown.
|
|
231
|
+
"""
|
|
232
|
+
try:
|
|
233
|
+
# LiteLLM's cost calculation
|
|
234
|
+
cost = litellm.completion_cost(
|
|
235
|
+
model=model,
|
|
236
|
+
prompt="", # We're using token counts directly
|
|
237
|
+
completion="",
|
|
238
|
+
prompt_tokens=input_tokens,
|
|
239
|
+
completion_tokens=output_tokens,
|
|
240
|
+
)
|
|
241
|
+
return cost
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.debug(f"LiteLLM cost estimation failed for {model}: {e}")
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
@classmethod
|
|
247
|
+
def list_supported_providers(cls) -> list[str]:
|
|
248
|
+
"""List providers supported by LiteLLM.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
List of provider names.
|
|
252
|
+
"""
|
|
253
|
+
if not LITELLM_AVAILABLE:
|
|
254
|
+
return []
|
|
255
|
+
|
|
256
|
+
# Major providers supported by LiteLLM
|
|
257
|
+
return [
|
|
258
|
+
"openai",
|
|
259
|
+
"anthropic",
|
|
260
|
+
"azure",
|
|
261
|
+
"google",
|
|
262
|
+
"vertex_ai",
|
|
263
|
+
"bedrock",
|
|
264
|
+
"cohere",
|
|
265
|
+
"replicate",
|
|
266
|
+
"huggingface",
|
|
267
|
+
"ollama",
|
|
268
|
+
"together_ai",
|
|
269
|
+
"groq",
|
|
270
|
+
"fireworks_ai",
|
|
271
|
+
"anyscale",
|
|
272
|
+
"deepinfra",
|
|
273
|
+
"perplexity",
|
|
274
|
+
"mistral",
|
|
275
|
+
"cloudflare",
|
|
276
|
+
"ai21",
|
|
277
|
+
"nlp_cloud",
|
|
278
|
+
"aleph_alpha",
|
|
279
|
+
"petals",
|
|
280
|
+
"baseten",
|
|
281
|
+
"openrouter",
|
|
282
|
+
"vllm",
|
|
283
|
+
"xinference",
|
|
284
|
+
"text-generation-inference",
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def create_litellm_provider() -> LiteLLMProvider:
|
|
289
|
+
"""Create a LiteLLM provider.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
Configured LiteLLMProvider.
|
|
293
|
+
|
|
294
|
+
Raises:
|
|
295
|
+
RuntimeError: If LiteLLM is not installed.
|
|
296
|
+
"""
|
|
297
|
+
return LiteLLMProvider()
|