headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
headroom/__init__.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Headroom - The Context Optimization Layer for LLM Applications.
|
|
3
|
+
|
|
4
|
+
Cut your LLM costs by 50-90% without losing accuracy.
|
|
5
|
+
|
|
6
|
+
Headroom wraps LLM clients to provide:
|
|
7
|
+
- Smart compression of tool outputs (keeps errors, anomalies, relevant items)
|
|
8
|
+
- Cache-aligned prefix optimization for better provider cache hits
|
|
9
|
+
- Rolling window token management for long conversations
|
|
10
|
+
- Full streaming support with zero accuracy loss
|
|
11
|
+
|
|
12
|
+
Quick Start:
|
|
13
|
+
|
|
14
|
+
from headroom import HeadroomClient, OpenAIProvider
|
|
15
|
+
from openai import OpenAI
|
|
16
|
+
|
|
17
|
+
# Wrap your existing client
|
|
18
|
+
client = HeadroomClient(
|
|
19
|
+
original_client=OpenAI(),
|
|
20
|
+
provider=OpenAIProvider(),
|
|
21
|
+
default_mode="optimize",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Use exactly like the original client
|
|
25
|
+
response = client.chat.completions.create(
|
|
26
|
+
model="gpt-4o",
|
|
27
|
+
messages=[
|
|
28
|
+
{"role": "user", "content": "Hello!"},
|
|
29
|
+
],
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Check savings
|
|
33
|
+
stats = client.get_stats()
|
|
34
|
+
print(f"Tokens saved: {stats['session']['tokens_saved_total']}")
|
|
35
|
+
|
|
36
|
+
Verify It's Working:
|
|
37
|
+
|
|
38
|
+
# Validate configuration
|
|
39
|
+
result = client.validate_setup()
|
|
40
|
+
if not result["valid"]:
|
|
41
|
+
print("Issues:", result)
|
|
42
|
+
|
|
43
|
+
# Enable logging to see what's happening
|
|
44
|
+
import logging
|
|
45
|
+
logging.basicConfig(level=logging.INFO)
|
|
46
|
+
# INFO:headroom.transforms.pipeline:Pipeline complete: 45000 -> 4500 tokens
|
|
47
|
+
|
|
48
|
+
Simulate Before Sending:
|
|
49
|
+
|
|
50
|
+
plan = client.chat.completions.simulate(
|
|
51
|
+
model="gpt-4o",
|
|
52
|
+
messages=large_messages,
|
|
53
|
+
)
|
|
54
|
+
print(f"Would save {plan.tokens_saved} tokens")
|
|
55
|
+
print(f"Transforms: {plan.transforms}")
|
|
56
|
+
|
|
57
|
+
Error Handling:
|
|
58
|
+
|
|
59
|
+
from headroom import HeadroomError, ConfigurationError, ProviderError
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
response = client.chat.completions.create(...)
|
|
63
|
+
except ConfigurationError as e:
|
|
64
|
+
print(f"Config issue: {e.details}")
|
|
65
|
+
except HeadroomError as e:
|
|
66
|
+
print(f"Headroom error: {e}")
|
|
67
|
+
|
|
68
|
+
For more examples, see https://github.com/headroom-sdk/headroom/tree/main/examples
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
from .cache import (
|
|
72
|
+
AnthropicCacheOptimizer,
|
|
73
|
+
BaseCacheOptimizer,
|
|
74
|
+
CacheConfig,
|
|
75
|
+
CacheMetrics,
|
|
76
|
+
CacheOptimizerRegistry,
|
|
77
|
+
CacheResult,
|
|
78
|
+
CacheStrategy,
|
|
79
|
+
GoogleCacheOptimizer,
|
|
80
|
+
OpenAICacheOptimizer,
|
|
81
|
+
OptimizationContext,
|
|
82
|
+
SemanticCache,
|
|
83
|
+
SemanticCacheLayer,
|
|
84
|
+
)
|
|
85
|
+
from .client import HeadroomClient
|
|
86
|
+
from .config import (
|
|
87
|
+
Block,
|
|
88
|
+
CacheAlignerConfig,
|
|
89
|
+
CacheOptimizerConfig,
|
|
90
|
+
CachePrefixMetrics,
|
|
91
|
+
DiffArtifact,
|
|
92
|
+
HeadroomConfig,
|
|
93
|
+
HeadroomMode,
|
|
94
|
+
RelevanceScorerConfig,
|
|
95
|
+
RequestMetrics,
|
|
96
|
+
RollingWindowConfig,
|
|
97
|
+
SimulationResult,
|
|
98
|
+
SmartCrusherConfig,
|
|
99
|
+
ToolCrusherConfig,
|
|
100
|
+
TransformDiff,
|
|
101
|
+
TransformResult,
|
|
102
|
+
WasteSignals,
|
|
103
|
+
)
|
|
104
|
+
from .exceptions import (
|
|
105
|
+
CacheError,
|
|
106
|
+
CompressionError,
|
|
107
|
+
ConfigurationError,
|
|
108
|
+
HeadroomError,
|
|
109
|
+
ProviderError,
|
|
110
|
+
StorageError,
|
|
111
|
+
TokenizationError,
|
|
112
|
+
TransformError,
|
|
113
|
+
ValidationError,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Memory module - simple, LLM-driven memory
|
|
117
|
+
from .memory import Memory, SQLiteMemoryStore, with_memory
|
|
118
|
+
from .providers import AnthropicProvider, OpenAIProvider, Provider, TokenCounter
|
|
119
|
+
from .relevance import (
|
|
120
|
+
BM25Scorer,
|
|
121
|
+
EmbeddingScorer,
|
|
122
|
+
HybridScorer,
|
|
123
|
+
RelevanceScore,
|
|
124
|
+
RelevanceScorer,
|
|
125
|
+
create_scorer,
|
|
126
|
+
embedding_available,
|
|
127
|
+
)
|
|
128
|
+
from .reporting import generate_report
|
|
129
|
+
from .tokenizer import Tokenizer, count_tokens_messages, count_tokens_text
|
|
130
|
+
from .transforms import (
|
|
131
|
+
CacheAligner,
|
|
132
|
+
RollingWindow,
|
|
133
|
+
SmartCrusher,
|
|
134
|
+
ToolCrusher,
|
|
135
|
+
TransformPipeline,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
__version__ = "0.2.0"
|
|
139
|
+
|
|
140
|
+
__all__ = [
|
|
141
|
+
# Main client
|
|
142
|
+
"HeadroomClient",
|
|
143
|
+
# Providers
|
|
144
|
+
"Provider",
|
|
145
|
+
"TokenCounter",
|
|
146
|
+
"OpenAIProvider",
|
|
147
|
+
"AnthropicProvider",
|
|
148
|
+
# Exceptions
|
|
149
|
+
"HeadroomError",
|
|
150
|
+
"ConfigurationError",
|
|
151
|
+
"ProviderError",
|
|
152
|
+
"StorageError",
|
|
153
|
+
"CompressionError",
|
|
154
|
+
"TokenizationError",
|
|
155
|
+
"CacheError",
|
|
156
|
+
"ValidationError",
|
|
157
|
+
"TransformError",
|
|
158
|
+
# Config
|
|
159
|
+
"HeadroomConfig",
|
|
160
|
+
"HeadroomMode",
|
|
161
|
+
"ToolCrusherConfig",
|
|
162
|
+
"SmartCrusherConfig",
|
|
163
|
+
"CacheAlignerConfig",
|
|
164
|
+
"CacheOptimizerConfig",
|
|
165
|
+
"RollingWindowConfig",
|
|
166
|
+
"RelevanceScorerConfig",
|
|
167
|
+
# Data models
|
|
168
|
+
"Block",
|
|
169
|
+
"CachePrefixMetrics",
|
|
170
|
+
"DiffArtifact",
|
|
171
|
+
"RequestMetrics",
|
|
172
|
+
"SimulationResult",
|
|
173
|
+
"TransformDiff",
|
|
174
|
+
"TransformResult",
|
|
175
|
+
"WasteSignals",
|
|
176
|
+
# Transforms
|
|
177
|
+
"ToolCrusher",
|
|
178
|
+
"SmartCrusher",
|
|
179
|
+
"CacheAligner",
|
|
180
|
+
"RollingWindow",
|
|
181
|
+
"TransformPipeline",
|
|
182
|
+
# Cache optimizers
|
|
183
|
+
"BaseCacheOptimizer",
|
|
184
|
+
"CacheConfig",
|
|
185
|
+
"CacheMetrics",
|
|
186
|
+
"CacheResult",
|
|
187
|
+
"CacheStrategy",
|
|
188
|
+
"OptimizationContext",
|
|
189
|
+
"CacheOptimizerRegistry",
|
|
190
|
+
"AnthropicCacheOptimizer",
|
|
191
|
+
"OpenAICacheOptimizer",
|
|
192
|
+
"GoogleCacheOptimizer",
|
|
193
|
+
"SemanticCache",
|
|
194
|
+
"SemanticCacheLayer",
|
|
195
|
+
# Relevance scoring
|
|
196
|
+
"RelevanceScore",
|
|
197
|
+
"RelevanceScorer",
|
|
198
|
+
"BM25Scorer",
|
|
199
|
+
"EmbeddingScorer",
|
|
200
|
+
"HybridScorer",
|
|
201
|
+
"create_scorer",
|
|
202
|
+
"embedding_available",
|
|
203
|
+
# Utilities
|
|
204
|
+
"Tokenizer",
|
|
205
|
+
"count_tokens_text",
|
|
206
|
+
"count_tokens_messages",
|
|
207
|
+
"generate_report",
|
|
208
|
+
# Memory - simple, LLM-driven memory
|
|
209
|
+
"with_memory",
|
|
210
|
+
"Memory",
|
|
211
|
+
"SQLiteMemoryStore",
|
|
212
|
+
]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Headroom Cache Optimization Module.
|
|
3
|
+
|
|
4
|
+
This module provides a plugin-based architecture for cache optimization
|
|
5
|
+
across different LLM providers. Each provider has different caching
|
|
6
|
+
mechanisms and this module abstracts those differences.
|
|
7
|
+
|
|
8
|
+
Provider Caching Differences:
|
|
9
|
+
- Anthropic: Explicit cache_control blocks, 90% savings, 5-min TTL
|
|
10
|
+
- OpenAI: Automatic prefix caching, 50% savings, no user control
|
|
11
|
+
- Google: Separate CachedContent API, 75% savings + storage costs
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
from headroom.cache import CacheOptimizerRegistry, SemanticCacheLayer
|
|
15
|
+
|
|
16
|
+
# Get provider-specific optimizer
|
|
17
|
+
optimizer = CacheOptimizerRegistry.get("anthropic")
|
|
18
|
+
result = optimizer.optimize(messages, context)
|
|
19
|
+
|
|
20
|
+
# With semantic caching layer
|
|
21
|
+
semantic = SemanticCacheLayer(optimizer, similarity_threshold=0.95)
|
|
22
|
+
result = semantic.process(messages, context)
|
|
23
|
+
|
|
24
|
+
# Register custom optimizer
|
|
25
|
+
CacheOptimizerRegistry.register("my-provider", MyOptimizer)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from .anthropic import AnthropicCacheOptimizer
|
|
29
|
+
from .base import (
|
|
30
|
+
BaseCacheOptimizer,
|
|
31
|
+
CacheBreakpoint,
|
|
32
|
+
CacheConfig,
|
|
33
|
+
CacheMetrics,
|
|
34
|
+
CacheOptimizer,
|
|
35
|
+
CacheResult,
|
|
36
|
+
CacheStrategy,
|
|
37
|
+
OptimizationContext,
|
|
38
|
+
)
|
|
39
|
+
from .dynamic_detector import (
|
|
40
|
+
DetectorConfig,
|
|
41
|
+
DynamicCategory,
|
|
42
|
+
DynamicContentDetector,
|
|
43
|
+
DynamicSpan,
|
|
44
|
+
detect_dynamic_content,
|
|
45
|
+
)
|
|
46
|
+
from .google import GoogleCacheOptimizer
|
|
47
|
+
from .openai import OpenAICacheOptimizer
|
|
48
|
+
from .registry import CacheOptimizerRegistry
|
|
49
|
+
from .semantic import SemanticCache, SemanticCacheLayer
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
# Base types
|
|
53
|
+
"BaseCacheOptimizer",
|
|
54
|
+
"CacheBreakpoint",
|
|
55
|
+
"CacheConfig",
|
|
56
|
+
"CacheMetrics",
|
|
57
|
+
"CacheOptimizer",
|
|
58
|
+
"CacheResult",
|
|
59
|
+
"CacheStrategy",
|
|
60
|
+
"OptimizationContext",
|
|
61
|
+
# Dynamic content detection
|
|
62
|
+
"DetectorConfig",
|
|
63
|
+
"DynamicCategory",
|
|
64
|
+
"DynamicContentDetector",
|
|
65
|
+
"DynamicSpan",
|
|
66
|
+
"detect_dynamic_content",
|
|
67
|
+
# Registry
|
|
68
|
+
"CacheOptimizerRegistry",
|
|
69
|
+
# Provider implementations
|
|
70
|
+
"AnthropicCacheOptimizer",
|
|
71
|
+
"OpenAICacheOptimizer",
|
|
72
|
+
"GoogleCacheOptimizer",
|
|
73
|
+
# Semantic caching
|
|
74
|
+
"SemanticCacheLayer",
|
|
75
|
+
"SemanticCache",
|
|
76
|
+
]
|