headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,175 @@
1
+ """
2
+ Cache Optimizer Registry.
3
+
4
+ Provides a plugin system for registering and retrieving cache optimizers.
5
+ This allows users to swap implementations and register custom optimizers.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from .base import BaseCacheOptimizer, CacheConfig
11
+
12
+
13
+ class CacheOptimizerRegistry:
14
+ """
15
+ Registry for cache optimizer plugins.
16
+
17
+ This registry allows:
18
+ - Registration of custom optimizers
19
+ - Retrieval by provider name
20
+ - Tier-based selection (oss vs enterprise)
21
+
22
+ Usage:
23
+ # Get default optimizer for provider
24
+ optimizer = CacheOptimizerRegistry.get("anthropic")
25
+
26
+ # Get enterprise version if available
27
+ optimizer = CacheOptimizerRegistry.get("anthropic", tier="enterprise")
28
+
29
+ # Register custom optimizer
30
+ CacheOptimizerRegistry.register("my-provider", MyOptimizer)
31
+ """
32
+
33
+ _optimizers: dict[str, type[BaseCacheOptimizer]] = {}
34
+ _instances: dict[str, BaseCacheOptimizer] = {}
35
+
36
+ @classmethod
37
+ def register(
38
+ cls,
39
+ name: str,
40
+ optimizer_class: type[BaseCacheOptimizer],
41
+ *,
42
+ override: bool = False,
43
+ ) -> None:
44
+ """
45
+ Register a cache optimizer.
46
+
47
+ Args:
48
+ name: Name to register under (e.g., "anthropic", "anthropic-enterprise")
49
+ optimizer_class: The optimizer class to register
50
+ override: Whether to override existing registration
51
+
52
+ Raises:
53
+ ValueError: If name already registered and override=False
54
+ """
55
+ if name in cls._optimizers and not override:
56
+ raise ValueError(
57
+ f"Optimizer '{name}' already registered. Use override=True to replace."
58
+ )
59
+ cls._optimizers[name] = optimizer_class
60
+ # Clear cached instance if exists
61
+ cls._instances.pop(name, None)
62
+
63
+ @classmethod
64
+ def unregister(cls, name: str) -> None:
65
+ """
66
+ Unregister a cache optimizer.
67
+
68
+ Args:
69
+ name: Name to unregister
70
+ """
71
+ cls._optimizers.pop(name, None)
72
+ cls._instances.pop(name, None)
73
+
74
+ @classmethod
75
+ def get(
76
+ cls,
77
+ provider: str,
78
+ tier: str = "oss",
79
+ config: CacheConfig | None = None,
80
+ *,
81
+ cached: bool = True,
82
+ ) -> BaseCacheOptimizer:
83
+ """
84
+ Get a cache optimizer for a provider.
85
+
86
+ Args:
87
+ provider: Provider name (e.g., "anthropic", "openai", "google")
88
+ tier: Tier to get ("oss" or "enterprise")
89
+ config: Optional configuration
90
+ cached: Whether to return cached instance
91
+
92
+ Returns:
93
+ Cache optimizer instance
94
+
95
+ Raises:
96
+ KeyError: If no optimizer registered for provider/tier
97
+ """
98
+ # Build the lookup key
99
+ if tier != "oss":
100
+ key = f"{provider}-{tier}"
101
+ # Fall back to OSS if enterprise not available
102
+ if key not in cls._optimizers:
103
+ key = provider
104
+ else:
105
+ key = provider
106
+
107
+ if key not in cls._optimizers:
108
+ available = list(cls._optimizers.keys())
109
+ raise KeyError(f"No optimizer registered for '{key}'. Available: {available}")
110
+
111
+ # Return cached instance if requested
112
+ cache_key = f"{key}:{id(config)}" if config else key
113
+ if cached and cache_key in cls._instances:
114
+ return cls._instances[cache_key]
115
+
116
+ # Create new instance
117
+ optimizer_class = cls._optimizers[key]
118
+ instance = optimizer_class(config)
119
+
120
+ if cached:
121
+ cls._instances[cache_key] = instance
122
+
123
+ return instance
124
+
125
+ @classmethod
126
+ def list_providers(cls) -> list[str]:
127
+ """List all registered provider names (excluding tier suffixes)."""
128
+ providers = set()
129
+ for name in cls._optimizers:
130
+ # Remove tier suffix if present
131
+ base_name = name.split("-")[0]
132
+ providers.add(base_name)
133
+ return sorted(providers)
134
+
135
+ @classmethod
136
+ def list_all(cls) -> list[str]:
137
+ """List all registered optimizer names."""
138
+ return sorted(cls._optimizers.keys())
139
+
140
+ @classmethod
141
+ def is_registered(cls, name: str) -> bool:
142
+ """Check if an optimizer is registered."""
143
+ return name in cls._optimizers
144
+
145
+ @classmethod
146
+ def clear(cls) -> None:
147
+ """Clear all registrations. Mainly for testing."""
148
+ cls._optimizers.clear()
149
+ cls._instances.clear()
150
+
151
+ @classmethod
152
+ def reset_to_defaults(cls) -> None:
153
+ """Reset to default registrations."""
154
+ cls.clear()
155
+ _register_defaults()
156
+
157
+
158
+ def _register_defaults() -> None:
159
+ """Register default optimizers."""
160
+ # Import here to avoid circular imports
161
+ from .anthropic import AnthropicCacheOptimizer
162
+ from .google import GoogleCacheOptimizer
163
+ from .openai import OpenAICacheOptimizer
164
+
165
+ CacheOptimizerRegistry.register("anthropic", AnthropicCacheOptimizer)
166
+ CacheOptimizerRegistry.register("openai", OpenAICacheOptimizer)
167
+ CacheOptimizerRegistry.register("google", GoogleCacheOptimizer)
168
+
169
+
170
+ # Auto-register defaults on module import
171
+ # Wrapped in try/except to allow partial imports during development
172
+ try:
173
+ _register_defaults()
174
+ except ImportError:
175
+ pass
@@ -0,0 +1,451 @@
1
+ """
2
+ Semantic Cache Layer.
3
+
4
+ Provides query-level semantic caching using embedding similarity.
5
+ This is COMPLEMENTARY to provider prompt caching - it caches complete
6
+ responses for semantically similar queries.
7
+
8
+ How it works:
9
+ 1. When a query comes in, compute its embedding
10
+ 2. Search for similar queries in the cache (cosine similarity)
11
+ 3. If similarity > threshold, return cached response
12
+ 4. Otherwise, proceed with normal optimization
13
+
14
+ Key difference from Prompt Caching:
15
+ - Prompt Caching: Provider caches KV-cache for prefix (same prompt = faster)
16
+ - Semantic Caching: We cache responses for similar queries (similar query = cached answer)
17
+
18
+ Usage:
19
+ from headroom.cache import SemanticCacheLayer, CacheOptimizerRegistry
20
+
21
+ # Get provider optimizer
22
+ provider_optimizer = CacheOptimizerRegistry.get("anthropic")
23
+
24
+ # Wrap with semantic layer
25
+ semantic = SemanticCacheLayer(
26
+ provider_optimizer,
27
+ similarity_threshold=0.95,
28
+ )
29
+
30
+ result = semantic.process(messages, context)
31
+ if result.semantic_cache_hit:
32
+ # Use result.cached_response directly
33
+ pass
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ import hashlib
39
+ import time
40
+ from collections import OrderedDict
41
+ from collections.abc import Callable
42
+ from dataclasses import dataclass
43
+ from typing import Any
44
+
45
+ from .base import (
46
+ BaseCacheOptimizer,
47
+ CacheConfig,
48
+ CacheMetrics,
49
+ CacheResult,
50
+ OptimizationContext,
51
+ )
52
+
53
+
54
+ @dataclass
55
+ class CacheEntry:
56
+ """Entry in the semantic cache."""
57
+
58
+ # Query embedding
59
+ embedding: list[float]
60
+
61
+ # Original query text
62
+ query: str
63
+
64
+ # Cached response
65
+ response: Any
66
+
67
+ # Metadata
68
+ created_at: float
69
+ last_accessed: float
70
+ access_count: int = 1
71
+
72
+ # Hash of the full messages for exact matching
73
+ messages_hash: str = ""
74
+
75
+
76
+ @dataclass
77
+ class SemanticCacheConfig:
78
+ """Configuration for semantic caching."""
79
+
80
+ # Similarity threshold for cache hit (0.0 - 1.0)
81
+ similarity_threshold: float = 0.95
82
+
83
+ # Maximum entries in cache
84
+ max_entries: int = 1000
85
+
86
+ # TTL in seconds (0 = no expiry)
87
+ ttl_seconds: int = 300
88
+
89
+ # Whether to use exact hash matching as fallback
90
+ use_exact_matching: bool = True
91
+
92
+ # Embedding model (if using embeddings)
93
+ embedding_model: str = "all-MiniLM-L6-v2"
94
+
95
+
96
+ class SemanticCache:
97
+ """
98
+ In-memory semantic cache with LRU eviction.
99
+
100
+ Stores query embeddings and responses, supporting both
101
+ semantic similarity search and exact hash matching.
102
+ """
103
+
104
+ def __init__(
105
+ self,
106
+ config: SemanticCacheConfig | None = None,
107
+ embedding_fn: Callable[[str], list[float]] | None = None,
108
+ ):
109
+ """
110
+ Initialize the semantic cache.
111
+
112
+ Args:
113
+ config: Cache configuration
114
+ embedding_fn: Optional custom embedding function.
115
+ If not provided, uses simple hash-based matching.
116
+ """
117
+ self.config = config or SemanticCacheConfig()
118
+ self._embedding_fn = embedding_fn
119
+
120
+ # LRU cache: key -> CacheEntry
121
+ self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
122
+
123
+ # Exact hash index: messages_hash -> key
124
+ self._hash_index: dict[str, str] = {}
125
+
126
+ # Statistics
127
+ self._hits = 0
128
+ self._misses = 0
129
+ self._evictions = 0
130
+
131
+ def get(
132
+ self,
133
+ query: str,
134
+ messages_hash: str | None = None,
135
+ ) -> CacheEntry | None:
136
+ """
137
+ Look up a cached entry.
138
+
139
+ Args:
140
+ query: Query text to search for
141
+ messages_hash: Optional exact hash for fast lookup
142
+
143
+ Returns:
144
+ CacheEntry if found, None otherwise
145
+ """
146
+ self._cleanup_expired()
147
+
148
+ # Try exact hash match first
149
+ if messages_hash and self.config.use_exact_matching:
150
+ key = self._hash_index.get(messages_hash)
151
+ if key and key in self._cache:
152
+ entry = self._cache[key]
153
+ self._touch(key)
154
+ self._hits += 1
155
+ return entry
156
+
157
+ # Try semantic similarity if we have embedding function
158
+ if self._embedding_fn:
159
+ query_embedding = self._embedding_fn(query)
160
+ best_match, best_similarity = self._find_similar(query_embedding)
161
+
162
+ if best_similarity >= self.config.similarity_threshold:
163
+ self._touch(best_match)
164
+ self._hits += 1
165
+ return self._cache[best_match]
166
+
167
+ self._misses += 1
168
+ return None
169
+
170
+ def put(
171
+ self,
172
+ query: str,
173
+ response: Any,
174
+ messages_hash: str | None = None,
175
+ ) -> str:
176
+ """
177
+ Store a response in the cache.
178
+
179
+ Args:
180
+ query: Query text
181
+ response: Response to cache
182
+ messages_hash: Optional exact hash for fast lookup
183
+
184
+ Returns:
185
+ Cache key for the entry
186
+ """
187
+ self._cleanup_expired()
188
+
189
+ # Evict if at capacity
190
+ while len(self._cache) >= self.config.max_entries:
191
+ self._evict_oldest()
192
+
193
+ # Generate embedding if available
194
+ embedding: list[float] = []
195
+ if self._embedding_fn:
196
+ embedding = self._embedding_fn(query)
197
+
198
+ # Create cache key
199
+ key = self._generate_key(query)
200
+
201
+ now = time.time()
202
+ entry = CacheEntry(
203
+ embedding=embedding,
204
+ query=query,
205
+ response=response,
206
+ created_at=now,
207
+ last_accessed=now,
208
+ messages_hash=messages_hash or "",
209
+ )
210
+
211
+ self._cache[key] = entry
212
+
213
+ # Index by hash for fast exact matching
214
+ if messages_hash:
215
+ self._hash_index[messages_hash] = key
216
+
217
+ return key
218
+
219
+ def invalidate(self, key: str) -> bool:
220
+ """Invalidate a cache entry by key."""
221
+ if key in self._cache:
222
+ entry = self._cache.pop(key)
223
+ if entry.messages_hash:
224
+ self._hash_index.pop(entry.messages_hash, None)
225
+ return True
226
+ return False
227
+
228
+ def clear(self) -> None:
229
+ """Clear all cache entries."""
230
+ self._cache.clear()
231
+ self._hash_index.clear()
232
+
233
+ def get_stats(self) -> dict[str, Any]:
234
+ """Get cache statistics."""
235
+ total = self._hits + self._misses
236
+ hit_rate = self._hits / total if total > 0 else 0.0
237
+
238
+ return {
239
+ "entries": len(self._cache),
240
+ "max_entries": self.config.max_entries,
241
+ "hits": self._hits,
242
+ "misses": self._misses,
243
+ "hit_rate": hit_rate,
244
+ "evictions": self._evictions,
245
+ }
246
+
247
+ def _find_similar(
248
+ self,
249
+ query_embedding: list[float],
250
+ ) -> tuple[str, float]:
251
+ """Find the most similar cached entry."""
252
+ best_key = ""
253
+ best_similarity = -1.0
254
+
255
+ for key, entry in self._cache.items():
256
+ if not entry.embedding:
257
+ continue
258
+
259
+ similarity = self._cosine_similarity(query_embedding, entry.embedding)
260
+ if similarity > best_similarity:
261
+ best_similarity = similarity
262
+ best_key = key
263
+
264
+ return best_key, best_similarity
265
+
266
+ def _cosine_similarity(
267
+ self,
268
+ a: list[float],
269
+ b: list[float],
270
+ ) -> float:
271
+ """Compute cosine similarity between two vectors."""
272
+ if len(a) != len(b) or not a:
273
+ return 0.0
274
+
275
+ dot_product = sum(x * y for x, y in zip(a, b))
276
+ norm_a = sum(x * x for x in a) ** 0.5
277
+ norm_b = sum(x * x for x in b) ** 0.5
278
+
279
+ if norm_a == 0 or norm_b == 0:
280
+ return 0.0
281
+
282
+ return float(dot_product / (norm_a * norm_b))
283
+
284
+ def _touch(self, key: str) -> None:
285
+ """Update access time and move to end of LRU."""
286
+ if key in self._cache:
287
+ entry = self._cache.pop(key)
288
+ entry.last_accessed = time.time()
289
+ entry.access_count += 1
290
+ self._cache[key] = entry
291
+
292
+ def _evict_oldest(self) -> None:
293
+ """Evict the oldest (least recently used) entry."""
294
+ if self._cache:
295
+ key, entry = self._cache.popitem(last=False)
296
+ if entry.messages_hash:
297
+ self._hash_index.pop(entry.messages_hash, None)
298
+ self._evictions += 1
299
+
300
+ def _cleanup_expired(self) -> None:
301
+ """Remove expired entries."""
302
+ if self.config.ttl_seconds <= 0:
303
+ return
304
+
305
+ now = time.time()
306
+ expired = [
307
+ key
308
+ for key, entry in self._cache.items()
309
+ if now - entry.created_at > self.config.ttl_seconds
310
+ ]
311
+
312
+ for key in expired:
313
+ entry = self._cache.pop(key)
314
+ if entry.messages_hash:
315
+ self._hash_index.pop(entry.messages_hash, None)
316
+
317
+ def _generate_key(self, query: str) -> str:
318
+ """Generate a cache key for a query."""
319
+ return hashlib.sha256(query.encode()).hexdigest()[:16]
320
+
321
+
322
+ class SemanticCacheLayer:
323
+ """
324
+ Layer that adds semantic caching on top of provider optimizers.
325
+
326
+ This layer checks for semantically similar queries before
327
+ delegating to the underlying provider optimizer.
328
+ """
329
+
330
+ def __init__(
331
+ self,
332
+ provider_optimizer: BaseCacheOptimizer,
333
+ similarity_threshold: float = 0.95,
334
+ max_entries: int = 1000,
335
+ ttl_seconds: int = 300,
336
+ embedding_fn: Callable[[str], list[float]] | None = None,
337
+ ):
338
+ """
339
+ Initialize the semantic cache layer.
340
+
341
+ Args:
342
+ provider_optimizer: Underlying provider optimizer
343
+ similarity_threshold: Similarity threshold for cache hits
344
+ max_entries: Maximum cache entries
345
+ ttl_seconds: Cache TTL in seconds
346
+ embedding_fn: Optional embedding function
347
+ """
348
+ self.provider_optimizer = provider_optimizer
349
+
350
+ cache_config = SemanticCacheConfig(
351
+ similarity_threshold=similarity_threshold,
352
+ max_entries=max_entries,
353
+ ttl_seconds=ttl_seconds,
354
+ )
355
+ self.cache = SemanticCache(cache_config, embedding_fn)
356
+
357
+ def process(
358
+ self,
359
+ messages: list[dict[str, Any]],
360
+ context: OptimizationContext,
361
+ config: CacheConfig | None = None,
362
+ ) -> CacheResult:
363
+ """
364
+ Process messages through semantic cache and provider optimizer.
365
+
366
+ Args:
367
+ messages: Messages to process
368
+ context: Optimization context
369
+ config: Optional configuration override
370
+
371
+ Returns:
372
+ CacheResult with semantic_cache_hit=True if cache hit
373
+ """
374
+ # Extract query for semantic matching
375
+ query = context.query or self._extract_query(messages)
376
+ messages_hash = self._compute_messages_hash(messages)
377
+
378
+ # Check semantic cache
379
+ cached = self.cache.get(query, messages_hash)
380
+ if cached:
381
+ return CacheResult(
382
+ messages=messages,
383
+ semantic_cache_hit=True,
384
+ cached_response=cached.response,
385
+ metrics=CacheMetrics(
386
+ estimated_cache_hit=True,
387
+ estimated_savings_percent=100.0,
388
+ ),
389
+ transforms_applied=["semantic_cache_hit"],
390
+ )
391
+
392
+ # Delegate to provider optimizer
393
+ result = self.provider_optimizer.optimize(messages, context, config)
394
+
395
+ return result
396
+
397
+ def store_response(
398
+ self,
399
+ messages: list[dict[str, Any]],
400
+ response: Any,
401
+ context: OptimizationContext | None = None,
402
+ ) -> str:
403
+ """
404
+ Store a response in the semantic cache.
405
+
406
+ Call this after receiving a response from the LLM to enable
407
+ future cache hits.
408
+
409
+ Args:
410
+ messages: Original messages
411
+ response: Response from LLM
412
+ context: Optional context with query
413
+
414
+ Returns:
415
+ Cache key
416
+ """
417
+ query = (context.query if context else None) or self._extract_query(messages)
418
+ messages_hash = self._compute_messages_hash(messages)
419
+
420
+ return self.cache.put(query, response, messages_hash)
421
+
422
+ def get_stats(self) -> dict[str, Any]:
423
+ """Get combined statistics."""
424
+ return {
425
+ "semantic_cache": self.cache.get_stats(),
426
+ "provider_optimizer": self.provider_optimizer.name,
427
+ }
428
+
429
+ def _extract_query(self, messages: list[dict[str, Any]]) -> str:
430
+ """Extract the last user query from messages."""
431
+ for msg in reversed(messages):
432
+ if msg.get("role") == "user":
433
+ content = msg.get("content", "")
434
+ if isinstance(content, str):
435
+ return content
436
+ elif isinstance(content, list):
437
+ for block in content:
438
+ if isinstance(block, dict) and block.get("type") == "text":
439
+ text_val = block.get("text", "")
440
+ return str(text_val) if text_val else ""
441
+ return ""
442
+
443
+ def _compute_messages_hash(self, messages: list[dict[str, Any]]) -> str:
444
+ """Compute a hash of all messages."""
445
+ import json
446
+
447
+ try:
448
+ content = json.dumps(messages, sort_keys=True)
449
+ return hashlib.sha256(content.encode()).hexdigest()[:24]
450
+ except (TypeError, ValueError):
451
+ return ""
@@ -0,0 +1,77 @@
1
+ """CCR (Compress-Cache-Retrieve) module for reversible compression.
2
+
3
+ This module provides tool injection and retrieval handling for the CCR architecture.
4
+ When tool outputs are compressed, the LLM can retrieve more data if needed.
5
+
6
+ Three key components:
7
+ 1. Tool Injection: Proxy injects headroom_retrieve tool into requests
8
+ 2. Response Handler: Intercepts responses, handles CCR tool calls automatically
9
+ 3. Context Tracker: Tracks compressed content across turns, enables proactive expansion
10
+
11
+ Two distribution channels for the retrieval tool:
12
+ 1. Tool Injection: Proxy injects tool into request when compression occurs
13
+ 2. MCP Server: Standalone server exposes tool via MCP protocol
14
+
15
+ When MCP is configured, tool injection is skipped to avoid duplicates.
16
+ """
17
+
18
+ from .context_tracker import (
19
+ CompressedContext,
20
+ ContextTracker,
21
+ ContextTrackerConfig,
22
+ ExpansionRecommendation,
23
+ get_context_tracker,
24
+ reset_context_tracker,
25
+ )
26
+ from .response_handler import (
27
+ CCRResponseHandler,
28
+ CCRToolCall,
29
+ CCRToolResult,
30
+ ResponseHandlerConfig,
31
+ StreamingCCRBuffer,
32
+ StreamingCCRHandler,
33
+ )
34
+ from .tool_injection import (
35
+ CCR_TOOL_NAME,
36
+ CCRToolInjector,
37
+ create_ccr_tool_definition,
38
+ create_system_instructions,
39
+ parse_tool_call,
40
+ )
41
+
42
+ # MCP server is optional (requires mcp package)
43
+ try:
44
+ from .mcp_server import CCRMCPServer, create_ccr_mcp_server
45
+
46
+ MCP_SERVER_AVAILABLE = True
47
+ except ImportError:
48
+ CCRMCPServer = None # type: ignore
49
+ create_ccr_mcp_server = None # type: ignore
50
+ MCP_SERVER_AVAILABLE = False
51
+
52
+ __all__ = [
53
+ # Tool injection
54
+ "CCR_TOOL_NAME",
55
+ "CCRToolInjector",
56
+ "create_ccr_tool_definition",
57
+ "create_system_instructions",
58
+ "parse_tool_call",
59
+ # Response handling
60
+ "CCRResponseHandler",
61
+ "CCRToolCall",
62
+ "CCRToolResult",
63
+ "ResponseHandlerConfig",
64
+ "StreamingCCRBuffer",
65
+ "StreamingCCRHandler",
66
+ # Context tracking
67
+ "CompressedContext",
68
+ "ContextTracker",
69
+ "ContextTrackerConfig",
70
+ "ExpansionRecommendation",
71
+ "get_context_tracker",
72
+ "reset_context_tracker",
73
+ # MCP server
74
+ "CCRMCPServer",
75
+ "create_ccr_mcp_server",
76
+ "MCP_SERVER_AVAILABLE",
77
+ ]