headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""Pricing registry for LLM model cost estimation."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import date, timedelta
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class ModelPricing:
|
|
9
|
+
"""Immutable pricing information for a specific model.
|
|
10
|
+
|
|
11
|
+
All prices are in USD per 1 million tokens.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
model: str
|
|
15
|
+
provider: str
|
|
16
|
+
input_per_1m: float
|
|
17
|
+
output_per_1m: float
|
|
18
|
+
cached_input_per_1m: float | None = None
|
|
19
|
+
batch_input_per_1m: float | None = None
|
|
20
|
+
batch_output_per_1m: float | None = None
|
|
21
|
+
context_window: int | None = None
|
|
22
|
+
notes: str | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class CostEstimate:
|
|
27
|
+
"""Result of a cost estimation calculation."""
|
|
28
|
+
|
|
29
|
+
cost_usd: float
|
|
30
|
+
breakdown: dict = field(default_factory=dict)
|
|
31
|
+
pricing_date: date | None = None
|
|
32
|
+
is_stale: bool = False
|
|
33
|
+
warning: str | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class PricingRegistry:
|
|
37
|
+
"""Registry of model pricing information with cost estimation capabilities."""
|
|
38
|
+
|
|
39
|
+
# Pricing is considered stale after this many days
|
|
40
|
+
STALENESS_THRESHOLD_DAYS = 30
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
last_updated: date,
|
|
45
|
+
source_url: str | None = None,
|
|
46
|
+
prices: dict[str, ModelPricing] | None = None,
|
|
47
|
+
):
|
|
48
|
+
"""Initialize the pricing registry.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
last_updated: Date when pricing information was last verified.
|
|
52
|
+
source_url: URL to the official pricing page.
|
|
53
|
+
prices: Dictionary mapping model names to ModelPricing objects.
|
|
54
|
+
"""
|
|
55
|
+
self.last_updated = last_updated
|
|
56
|
+
self.source_url = source_url
|
|
57
|
+
self.prices: dict[str, ModelPricing] = prices or {}
|
|
58
|
+
|
|
59
|
+
def get_price(self, model: str) -> ModelPricing | None:
|
|
60
|
+
"""Get pricing for a specific model.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
model: The model name/identifier.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
ModelPricing if found, None otherwise.
|
|
67
|
+
"""
|
|
68
|
+
return self.prices.get(model)
|
|
69
|
+
|
|
70
|
+
def is_stale(self) -> bool:
|
|
71
|
+
"""Check if pricing information is potentially outdated.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
True if pricing data is older than STALENESS_THRESHOLD_DAYS.
|
|
75
|
+
"""
|
|
76
|
+
age = date.today() - self.last_updated
|
|
77
|
+
return age > timedelta(days=self.STALENESS_THRESHOLD_DAYS)
|
|
78
|
+
|
|
79
|
+
def staleness_warning(self) -> str | None:
|
|
80
|
+
"""Get a warning message if pricing is stale.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Warning message if stale, None otherwise.
|
|
84
|
+
"""
|
|
85
|
+
if not self.is_stale():
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
age_days = (date.today() - self.last_updated).days
|
|
89
|
+
msg = f"Pricing data is {age_days} days old (last updated: {self.last_updated})."
|
|
90
|
+
if self.source_url:
|
|
91
|
+
msg += f" Please verify at: {self.source_url}"
|
|
92
|
+
return msg
|
|
93
|
+
|
|
94
|
+
def estimate_cost(
|
|
95
|
+
self,
|
|
96
|
+
model: str,
|
|
97
|
+
input_tokens: int = 0,
|
|
98
|
+
output_tokens: int = 0,
|
|
99
|
+
cached_input_tokens: int = 0,
|
|
100
|
+
batch_input_tokens: int = 0,
|
|
101
|
+
batch_output_tokens: int = 0,
|
|
102
|
+
) -> CostEstimate:
|
|
103
|
+
"""Estimate the cost for a given token usage.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
model: The model name/identifier.
|
|
107
|
+
input_tokens: Number of regular input tokens.
|
|
108
|
+
output_tokens: Number of regular output tokens.
|
|
109
|
+
cached_input_tokens: Number of cached input tokens.
|
|
110
|
+
batch_input_tokens: Number of batch API input tokens.
|
|
111
|
+
batch_output_tokens: Number of batch API output tokens.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
CostEstimate with calculated cost and breakdown.
|
|
115
|
+
|
|
116
|
+
Raises:
|
|
117
|
+
ValueError: If model is not found in registry.
|
|
118
|
+
"""
|
|
119
|
+
pricing = self.get_price(model)
|
|
120
|
+
if pricing is None:
|
|
121
|
+
raise ValueError(f"Model '{model}' not found in registry")
|
|
122
|
+
|
|
123
|
+
breakdown = {}
|
|
124
|
+
total_cost = 0.0
|
|
125
|
+
|
|
126
|
+
# Regular input tokens
|
|
127
|
+
if input_tokens > 0:
|
|
128
|
+
input_cost = (input_tokens / 1_000_000) * pricing.input_per_1m
|
|
129
|
+
breakdown["input"] = {
|
|
130
|
+
"tokens": input_tokens,
|
|
131
|
+
"rate_per_1m": pricing.input_per_1m,
|
|
132
|
+
"cost_usd": input_cost,
|
|
133
|
+
}
|
|
134
|
+
total_cost += input_cost
|
|
135
|
+
|
|
136
|
+
# Regular output tokens
|
|
137
|
+
if output_tokens > 0:
|
|
138
|
+
output_cost = (output_tokens / 1_000_000) * pricing.output_per_1m
|
|
139
|
+
breakdown["output"] = {
|
|
140
|
+
"tokens": output_tokens,
|
|
141
|
+
"rate_per_1m": pricing.output_per_1m,
|
|
142
|
+
"cost_usd": output_cost,
|
|
143
|
+
}
|
|
144
|
+
total_cost += output_cost
|
|
145
|
+
|
|
146
|
+
# Cached input tokens
|
|
147
|
+
if cached_input_tokens > 0:
|
|
148
|
+
if pricing.cached_input_per_1m is None:
|
|
149
|
+
raise ValueError(f"Model '{model}' does not have cached input pricing")
|
|
150
|
+
cached_cost = (cached_input_tokens / 1_000_000) * pricing.cached_input_per_1m
|
|
151
|
+
breakdown["cached_input"] = {
|
|
152
|
+
"tokens": cached_input_tokens,
|
|
153
|
+
"rate_per_1m": pricing.cached_input_per_1m,
|
|
154
|
+
"cost_usd": cached_cost,
|
|
155
|
+
}
|
|
156
|
+
total_cost += cached_cost
|
|
157
|
+
|
|
158
|
+
# Batch input tokens
|
|
159
|
+
if batch_input_tokens > 0:
|
|
160
|
+
if pricing.batch_input_per_1m is None:
|
|
161
|
+
raise ValueError(f"Model '{model}' does not have batch input pricing")
|
|
162
|
+
batch_input_cost = (batch_input_tokens / 1_000_000) * pricing.batch_input_per_1m
|
|
163
|
+
breakdown["batch_input"] = {
|
|
164
|
+
"tokens": batch_input_tokens,
|
|
165
|
+
"rate_per_1m": pricing.batch_input_per_1m,
|
|
166
|
+
"cost_usd": batch_input_cost,
|
|
167
|
+
}
|
|
168
|
+
total_cost += batch_input_cost
|
|
169
|
+
|
|
170
|
+
# Batch output tokens
|
|
171
|
+
if batch_output_tokens > 0:
|
|
172
|
+
if pricing.batch_output_per_1m is None:
|
|
173
|
+
raise ValueError(f"Model '{model}' does not have batch output pricing")
|
|
174
|
+
batch_output_cost = (batch_output_tokens / 1_000_000) * pricing.batch_output_per_1m
|
|
175
|
+
breakdown["batch_output"] = {
|
|
176
|
+
"tokens": batch_output_tokens,
|
|
177
|
+
"rate_per_1m": pricing.batch_output_per_1m,
|
|
178
|
+
"cost_usd": batch_output_cost,
|
|
179
|
+
}
|
|
180
|
+
total_cost += batch_output_cost
|
|
181
|
+
|
|
182
|
+
return CostEstimate(
|
|
183
|
+
cost_usd=total_cost,
|
|
184
|
+
breakdown=breakdown,
|
|
185
|
+
pricing_date=self.last_updated,
|
|
186
|
+
is_stale=self.is_stale(),
|
|
187
|
+
warning=self.staleness_warning(),
|
|
188
|
+
)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Provider abstractions for Headroom SDK.
|
|
2
|
+
|
|
3
|
+
Providers encapsulate model-specific behavior like tokenization,
|
|
4
|
+
context limits, and cost estimation.
|
|
5
|
+
|
|
6
|
+
Supported Providers:
|
|
7
|
+
- OpenAIProvider: Native OpenAI models (GPT-4o, o1, etc.)
|
|
8
|
+
- AnthropicProvider: Claude models
|
|
9
|
+
- GoogleProvider: Google Gemini models
|
|
10
|
+
- CohereProvider: Cohere Command models
|
|
11
|
+
- OpenAICompatibleProvider: Universal provider for any OpenAI-compatible API
|
|
12
|
+
(Ollama, vLLM, Together, Groq, Fireworks, LM Studio, etc.)
|
|
13
|
+
- LiteLLMProvider: Universal provider via LiteLLM (100+ providers)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from .anthropic import AnthropicProvider
|
|
17
|
+
from .base import Provider, TokenCounter
|
|
18
|
+
from .cohere import CohereProvider
|
|
19
|
+
from .google import GoogleProvider
|
|
20
|
+
from .litellm import (
|
|
21
|
+
LiteLLMProvider,
|
|
22
|
+
create_litellm_provider,
|
|
23
|
+
is_litellm_available,
|
|
24
|
+
)
|
|
25
|
+
from .openai import OpenAIProvider
|
|
26
|
+
from .openai_compatible import (
|
|
27
|
+
ModelCapabilities,
|
|
28
|
+
OpenAICompatibleProvider,
|
|
29
|
+
create_anyscale_provider,
|
|
30
|
+
create_fireworks_provider,
|
|
31
|
+
create_groq_provider,
|
|
32
|
+
create_lmstudio_provider,
|
|
33
|
+
create_ollama_provider,
|
|
34
|
+
create_together_provider,
|
|
35
|
+
create_vllm_provider,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
# Base
|
|
40
|
+
"Provider",
|
|
41
|
+
"TokenCounter",
|
|
42
|
+
# Native providers
|
|
43
|
+
"OpenAIProvider",
|
|
44
|
+
"AnthropicProvider",
|
|
45
|
+
"GoogleProvider",
|
|
46
|
+
"CohereProvider",
|
|
47
|
+
# Universal providers
|
|
48
|
+
"OpenAICompatibleProvider",
|
|
49
|
+
"ModelCapabilities",
|
|
50
|
+
"LiteLLMProvider",
|
|
51
|
+
"is_litellm_available",
|
|
52
|
+
# Factory functions
|
|
53
|
+
"create_ollama_provider",
|
|
54
|
+
"create_together_provider",
|
|
55
|
+
"create_groq_provider",
|
|
56
|
+
"create_fireworks_provider",
|
|
57
|
+
"create_anyscale_provider",
|
|
58
|
+
"create_vllm_provider",
|
|
59
|
+
"create_lmstudio_provider",
|
|
60
|
+
"create_litellm_provider",
|
|
61
|
+
]
|