headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
headroom/cache/base.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base types and interfaces for cache optimization.
|
|
3
|
+
|
|
4
|
+
This module defines the core abstractions that all cache optimizers implement.
|
|
5
|
+
The design allows for provider-specific implementations while maintaining a
|
|
6
|
+
consistent interface for users.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from typing import Any, Literal, Protocol, runtime_checkable
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CacheStrategy(Enum):
|
|
19
|
+
"""Cache optimization strategy."""
|
|
20
|
+
|
|
21
|
+
# Just stabilize prefix (move dates, normalize whitespace)
|
|
22
|
+
PREFIX_STABILIZATION = "prefix_stabilization"
|
|
23
|
+
|
|
24
|
+
# Insert explicit cache breakpoints (Anthropic)
|
|
25
|
+
EXPLICIT_BREAKPOINTS = "explicit_breakpoints"
|
|
26
|
+
|
|
27
|
+
# Manage separate cached content objects (Google)
|
|
28
|
+
CACHED_CONTENT = "cached_content"
|
|
29
|
+
|
|
30
|
+
# No optimization possible (provider doesn't support caching)
|
|
31
|
+
NONE = "none"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BreakpointLocation(Enum):
|
|
35
|
+
"""Where to insert cache breakpoints."""
|
|
36
|
+
|
|
37
|
+
AFTER_SYSTEM = "after_system"
|
|
38
|
+
AFTER_TOOLS = "after_tools"
|
|
39
|
+
AFTER_EXAMPLES = "after_examples"
|
|
40
|
+
CUSTOM = "custom"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class CacheBreakpoint:
|
|
45
|
+
"""
|
|
46
|
+
Represents a cache breakpoint location.
|
|
47
|
+
|
|
48
|
+
For Anthropic, this maps to cache_control blocks.
|
|
49
|
+
For other providers, this is informational.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# Message index where breakpoint should be inserted
|
|
53
|
+
message_index: int
|
|
54
|
+
|
|
55
|
+
# Location type
|
|
56
|
+
location: BreakpointLocation
|
|
57
|
+
|
|
58
|
+
# For content arrays, index within the content
|
|
59
|
+
content_index: int | None = None
|
|
60
|
+
|
|
61
|
+
# Token count at this breakpoint
|
|
62
|
+
tokens_at_breakpoint: int = 0
|
|
63
|
+
|
|
64
|
+
# Reason for this breakpoint
|
|
65
|
+
reason: str = ""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class CacheConfig:
|
|
70
|
+
"""Configuration for cache optimization."""
|
|
71
|
+
|
|
72
|
+
# Whether to optimize at all
|
|
73
|
+
enabled: bool = True
|
|
74
|
+
|
|
75
|
+
# Strategy to use (auto-detected if None)
|
|
76
|
+
strategy: CacheStrategy | None = None
|
|
77
|
+
|
|
78
|
+
# Minimum tokens before caching makes sense
|
|
79
|
+
min_cacheable_tokens: int = 1024
|
|
80
|
+
|
|
81
|
+
# Maximum number of breakpoints (Anthropic limit is 4)
|
|
82
|
+
max_breakpoints: int = 4
|
|
83
|
+
|
|
84
|
+
# Patterns to extract and move to dynamic section
|
|
85
|
+
date_patterns: list[str] = field(
|
|
86
|
+
default_factory=lambda: [
|
|
87
|
+
r"Today is \w+ \d{1,2},? \d{4}\.?",
|
|
88
|
+
r"Current date: \d{4}-\d{2}-\d{2}",
|
|
89
|
+
r"The current time is .+\.",
|
|
90
|
+
]
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Whether to normalize whitespace
|
|
94
|
+
normalize_whitespace: bool = True
|
|
95
|
+
|
|
96
|
+
# Collapse multiple blank lines
|
|
97
|
+
collapse_blank_lines: bool = True
|
|
98
|
+
|
|
99
|
+
# Separator between static and dynamic content
|
|
100
|
+
dynamic_separator: str = "\n\n---\n\n"
|
|
101
|
+
|
|
102
|
+
# Dynamic content detection tiers (for OpenAI prefix stabilization)
|
|
103
|
+
# - "regex": Fast pattern matching (~0ms) - always recommended
|
|
104
|
+
# - "ner": Named Entity Recognition via spaCy (~5-10ms) - catches names, money, etc.
|
|
105
|
+
# - "semantic": Embedding similarity (~20-50ms) - catches volatile patterns
|
|
106
|
+
# Default is regex-only for speed. Add tiers for better detection at cost of latency.
|
|
107
|
+
dynamic_detection_tiers: list[Literal["regex", "ner", "semantic"]] = field(
|
|
108
|
+
default_factory=lambda: ["regex"]
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# For semantic caching
|
|
112
|
+
semantic_cache_enabled: bool = False
|
|
113
|
+
semantic_similarity_threshold: float = 0.95
|
|
114
|
+
semantic_cache_ttl_seconds: int = 300
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class CacheMetrics:
|
|
119
|
+
"""Metrics about cache optimization."""
|
|
120
|
+
|
|
121
|
+
# Prefix analysis
|
|
122
|
+
stable_prefix_tokens: int = 0
|
|
123
|
+
stable_prefix_hash: str = ""
|
|
124
|
+
|
|
125
|
+
# Breakpoint info
|
|
126
|
+
breakpoints_inserted: int = 0
|
|
127
|
+
breakpoint_locations: list[CacheBreakpoint] = field(default_factory=list)
|
|
128
|
+
|
|
129
|
+
# Cache hit estimation
|
|
130
|
+
prefix_changed_from_previous: bool = False
|
|
131
|
+
previous_prefix_hash: str | None = None
|
|
132
|
+
estimated_cache_hit: bool = False
|
|
133
|
+
|
|
134
|
+
# Savings estimation
|
|
135
|
+
estimated_savings_percent: float = 0.0
|
|
136
|
+
cacheable_tokens: int = 0
|
|
137
|
+
non_cacheable_tokens: int = 0
|
|
138
|
+
|
|
139
|
+
# Provider-specific
|
|
140
|
+
provider_cache_id: str | None = None # For Google's CachedContent
|
|
141
|
+
cache_ttl_remaining_seconds: int | None = None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@dataclass
|
|
145
|
+
class OptimizationContext:
|
|
146
|
+
"""Context for optimization request."""
|
|
147
|
+
|
|
148
|
+
# Request tracking
|
|
149
|
+
request_id: str = ""
|
|
150
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
151
|
+
|
|
152
|
+
# Provider info
|
|
153
|
+
provider: str = ""
|
|
154
|
+
model: str = ""
|
|
155
|
+
|
|
156
|
+
# Query for relevance (used by semantic cache)
|
|
157
|
+
query: str | None = None
|
|
158
|
+
|
|
159
|
+
# Previous request info (for cache hit detection)
|
|
160
|
+
previous_prefix_hash: str | None = None
|
|
161
|
+
|
|
162
|
+
# Additional metadata
|
|
163
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@dataclass
|
|
167
|
+
class CacheResult:
|
|
168
|
+
"""Result of cache optimization."""
|
|
169
|
+
|
|
170
|
+
# Optimized messages
|
|
171
|
+
messages: list[dict[str, Any]]
|
|
172
|
+
|
|
173
|
+
# Whether this was a semantic cache hit
|
|
174
|
+
semantic_cache_hit: bool = False
|
|
175
|
+
|
|
176
|
+
# Cached response (if semantic cache hit)
|
|
177
|
+
cached_response: Any | None = None
|
|
178
|
+
|
|
179
|
+
# Optimization metrics
|
|
180
|
+
metrics: CacheMetrics = field(default_factory=CacheMetrics)
|
|
181
|
+
|
|
182
|
+
# Tokens before/after
|
|
183
|
+
tokens_before: int = 0
|
|
184
|
+
tokens_after: int = 0
|
|
185
|
+
|
|
186
|
+
# Transforms applied
|
|
187
|
+
transforms_applied: list[str] = field(default_factory=list)
|
|
188
|
+
|
|
189
|
+
# Warnings
|
|
190
|
+
warnings: list[str] = field(default_factory=list)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@runtime_checkable
|
|
194
|
+
class CacheOptimizer(Protocol):
|
|
195
|
+
"""
|
|
196
|
+
Protocol for cache optimizers.
|
|
197
|
+
|
|
198
|
+
All provider-specific optimizers must implement this interface.
|
|
199
|
+
This allows for easy swapping of implementations and plugin registration.
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def name(self) -> str:
|
|
204
|
+
"""Name of this optimizer."""
|
|
205
|
+
...
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def provider(self) -> str:
|
|
209
|
+
"""Provider this optimizer is for."""
|
|
210
|
+
...
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def strategy(self) -> CacheStrategy:
|
|
214
|
+
"""The caching strategy this optimizer uses."""
|
|
215
|
+
...
|
|
216
|
+
|
|
217
|
+
def optimize(
|
|
218
|
+
self,
|
|
219
|
+
messages: list[dict[str, Any]],
|
|
220
|
+
context: OptimizationContext,
|
|
221
|
+
config: CacheConfig | None = None,
|
|
222
|
+
) -> CacheResult:
|
|
223
|
+
"""
|
|
224
|
+
Optimize messages for caching.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
messages: The messages to optimize.
|
|
228
|
+
context: Optimization context with request info.
|
|
229
|
+
config: Optional configuration override.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
CacheResult with optimized messages and metrics.
|
|
233
|
+
"""
|
|
234
|
+
...
|
|
235
|
+
|
|
236
|
+
def get_metrics(self) -> CacheMetrics:
|
|
237
|
+
"""Get aggregated metrics from this optimizer."""
|
|
238
|
+
...
|
|
239
|
+
|
|
240
|
+
def estimate_savings(
|
|
241
|
+
self,
|
|
242
|
+
messages: list[dict[str, Any]],
|
|
243
|
+
context: OptimizationContext,
|
|
244
|
+
) -> float:
|
|
245
|
+
"""
|
|
246
|
+
Estimate potential savings from optimization.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Estimated savings as a percentage (0-100).
|
|
250
|
+
"""
|
|
251
|
+
...
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class BaseCacheOptimizer(ABC):
|
|
255
|
+
"""
|
|
256
|
+
Abstract base class for cache optimizers.
|
|
257
|
+
|
|
258
|
+
Provides common functionality for all optimizers.
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
def __init__(self, config: CacheConfig | None = None):
|
|
262
|
+
self.config = config or CacheConfig()
|
|
263
|
+
self._metrics_history: list[CacheMetrics] = []
|
|
264
|
+
self._previous_prefix_hash: str | None = None
|
|
265
|
+
|
|
266
|
+
@property
|
|
267
|
+
@abstractmethod
|
|
268
|
+
def name(self) -> str:
|
|
269
|
+
"""Name of this optimizer."""
|
|
270
|
+
...
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
@abstractmethod
|
|
274
|
+
def provider(self) -> str:
|
|
275
|
+
"""Provider this optimizer is for."""
|
|
276
|
+
...
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
@abstractmethod
|
|
280
|
+
def strategy(self) -> CacheStrategy:
|
|
281
|
+
"""The caching strategy this optimizer uses."""
|
|
282
|
+
...
|
|
283
|
+
|
|
284
|
+
@abstractmethod
|
|
285
|
+
def optimize(
|
|
286
|
+
self,
|
|
287
|
+
messages: list[dict[str, Any]],
|
|
288
|
+
context: OptimizationContext,
|
|
289
|
+
config: CacheConfig | None = None,
|
|
290
|
+
) -> CacheResult:
|
|
291
|
+
"""Optimize messages for caching."""
|
|
292
|
+
...
|
|
293
|
+
|
|
294
|
+
def get_metrics(self) -> CacheMetrics:
|
|
295
|
+
"""Get aggregated metrics."""
|
|
296
|
+
if not self._metrics_history:
|
|
297
|
+
return CacheMetrics()
|
|
298
|
+
|
|
299
|
+
# Return most recent metrics
|
|
300
|
+
return self._metrics_history[-1]
|
|
301
|
+
|
|
302
|
+
def estimate_savings(
|
|
303
|
+
self,
|
|
304
|
+
messages: list[dict[str, Any]],
|
|
305
|
+
context: OptimizationContext,
|
|
306
|
+
) -> float:
|
|
307
|
+
"""Estimate potential savings."""
|
|
308
|
+
# Default implementation - subclasses can override
|
|
309
|
+
result = self.optimize(messages, context)
|
|
310
|
+
return result.metrics.estimated_savings_percent
|
|
311
|
+
|
|
312
|
+
def _record_metrics(self, metrics: CacheMetrics) -> None:
|
|
313
|
+
"""Record metrics for history."""
|
|
314
|
+
self._metrics_history.append(metrics)
|
|
315
|
+
# Keep only last 100 entries
|
|
316
|
+
if len(self._metrics_history) > 100:
|
|
317
|
+
self._metrics_history = self._metrics_history[-100:]
|
|
318
|
+
|
|
319
|
+
def _compute_prefix_hash(self, content: str) -> str:
|
|
320
|
+
"""Compute a short hash of content."""
|
|
321
|
+
import hashlib
|
|
322
|
+
|
|
323
|
+
return hashlib.sha256(content.encode()).hexdigest()[:12]
|
|
324
|
+
|
|
325
|
+
def _extract_system_content(self, messages: list[dict[str, Any]]) -> str:
|
|
326
|
+
"""Extract content from system messages."""
|
|
327
|
+
parts = []
|
|
328
|
+
for msg in messages:
|
|
329
|
+
if msg.get("role") == "system":
|
|
330
|
+
content = msg.get("content", "")
|
|
331
|
+
if isinstance(content, str):
|
|
332
|
+
parts.append(content)
|
|
333
|
+
elif isinstance(content, list):
|
|
334
|
+
# Handle content blocks
|
|
335
|
+
for block in content:
|
|
336
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
337
|
+
parts.append(block.get("text", ""))
|
|
338
|
+
return "\n".join(parts)
|
|
339
|
+
|
|
340
|
+
def _count_tokens_estimate(self, text: str) -> int:
|
|
341
|
+
"""Rough token count estimate (4 chars per token)."""
|
|
342
|
+
return len(text) // 4
|