headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
headroom/cache/base.py ADDED
@@ -0,0 +1,342 @@
1
+ """
2
+ Base types and interfaces for cache optimization.
3
+
4
+ This module defines the core abstractions that all cache optimizers implement.
5
+ The design allows for provider-specific implementations while maintaining a
6
+ consistent interface for users.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from abc import ABC, abstractmethod
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime
14
+ from enum import Enum
15
+ from typing import Any, Literal, Protocol, runtime_checkable
16
+
17
+
18
+ class CacheStrategy(Enum):
19
+ """Cache optimization strategy."""
20
+
21
+ # Just stabilize prefix (move dates, normalize whitespace)
22
+ PREFIX_STABILIZATION = "prefix_stabilization"
23
+
24
+ # Insert explicit cache breakpoints (Anthropic)
25
+ EXPLICIT_BREAKPOINTS = "explicit_breakpoints"
26
+
27
+ # Manage separate cached content objects (Google)
28
+ CACHED_CONTENT = "cached_content"
29
+
30
+ # No optimization possible (provider doesn't support caching)
31
+ NONE = "none"
32
+
33
+
34
+ class BreakpointLocation(Enum):
35
+ """Where to insert cache breakpoints."""
36
+
37
+ AFTER_SYSTEM = "after_system"
38
+ AFTER_TOOLS = "after_tools"
39
+ AFTER_EXAMPLES = "after_examples"
40
+ CUSTOM = "custom"
41
+
42
+
43
+ @dataclass
44
+ class CacheBreakpoint:
45
+ """
46
+ Represents a cache breakpoint location.
47
+
48
+ For Anthropic, this maps to cache_control blocks.
49
+ For other providers, this is informational.
50
+ """
51
+
52
+ # Message index where breakpoint should be inserted
53
+ message_index: int
54
+
55
+ # Location type
56
+ location: BreakpointLocation
57
+
58
+ # For content arrays, index within the content
59
+ content_index: int | None = None
60
+
61
+ # Token count at this breakpoint
62
+ tokens_at_breakpoint: int = 0
63
+
64
+ # Reason for this breakpoint
65
+ reason: str = ""
66
+
67
+
68
+ @dataclass
69
+ class CacheConfig:
70
+ """Configuration for cache optimization."""
71
+
72
+ # Whether to optimize at all
73
+ enabled: bool = True
74
+
75
+ # Strategy to use (auto-detected if None)
76
+ strategy: CacheStrategy | None = None
77
+
78
+ # Minimum tokens before caching makes sense
79
+ min_cacheable_tokens: int = 1024
80
+
81
+ # Maximum number of breakpoints (Anthropic limit is 4)
82
+ max_breakpoints: int = 4
83
+
84
+ # Patterns to extract and move to dynamic section
85
+ date_patterns: list[str] = field(
86
+ default_factory=lambda: [
87
+ r"Today is \w+ \d{1,2},? \d{4}\.?",
88
+ r"Current date: \d{4}-\d{2}-\d{2}",
89
+ r"The current time is .+\.",
90
+ ]
91
+ )
92
+
93
+ # Whether to normalize whitespace
94
+ normalize_whitespace: bool = True
95
+
96
+ # Collapse multiple blank lines
97
+ collapse_blank_lines: bool = True
98
+
99
+ # Separator between static and dynamic content
100
+ dynamic_separator: str = "\n\n---\n\n"
101
+
102
+ # Dynamic content detection tiers (for OpenAI prefix stabilization)
103
+ # - "regex": Fast pattern matching (~0ms) - always recommended
104
+ # - "ner": Named Entity Recognition via spaCy (~5-10ms) - catches names, money, etc.
105
+ # - "semantic": Embedding similarity (~20-50ms) - catches volatile patterns
106
+ # Default is regex-only for speed. Add tiers for better detection at cost of latency.
107
+ dynamic_detection_tiers: list[Literal["regex", "ner", "semantic"]] = field(
108
+ default_factory=lambda: ["regex"]
109
+ )
110
+
111
+ # For semantic caching
112
+ semantic_cache_enabled: bool = False
113
+ semantic_similarity_threshold: float = 0.95
114
+ semantic_cache_ttl_seconds: int = 300
115
+
116
+
117
+ @dataclass
118
+ class CacheMetrics:
119
+ """Metrics about cache optimization."""
120
+
121
+ # Prefix analysis
122
+ stable_prefix_tokens: int = 0
123
+ stable_prefix_hash: str = ""
124
+
125
+ # Breakpoint info
126
+ breakpoints_inserted: int = 0
127
+ breakpoint_locations: list[CacheBreakpoint] = field(default_factory=list)
128
+
129
+ # Cache hit estimation
130
+ prefix_changed_from_previous: bool = False
131
+ previous_prefix_hash: str | None = None
132
+ estimated_cache_hit: bool = False
133
+
134
+ # Savings estimation
135
+ estimated_savings_percent: float = 0.0
136
+ cacheable_tokens: int = 0
137
+ non_cacheable_tokens: int = 0
138
+
139
+ # Provider-specific
140
+ provider_cache_id: str | None = None # For Google's CachedContent
141
+ cache_ttl_remaining_seconds: int | None = None
142
+
143
+
144
+ @dataclass
145
+ class OptimizationContext:
146
+ """Context for optimization request."""
147
+
148
+ # Request tracking
149
+ request_id: str = ""
150
+ timestamp: datetime = field(default_factory=datetime.now)
151
+
152
+ # Provider info
153
+ provider: str = ""
154
+ model: str = ""
155
+
156
+ # Query for relevance (used by semantic cache)
157
+ query: str | None = None
158
+
159
+ # Previous request info (for cache hit detection)
160
+ previous_prefix_hash: str | None = None
161
+
162
+ # Additional metadata
163
+ metadata: dict[str, Any] = field(default_factory=dict)
164
+
165
+
166
+ @dataclass
167
+ class CacheResult:
168
+ """Result of cache optimization."""
169
+
170
+ # Optimized messages
171
+ messages: list[dict[str, Any]]
172
+
173
+ # Whether this was a semantic cache hit
174
+ semantic_cache_hit: bool = False
175
+
176
+ # Cached response (if semantic cache hit)
177
+ cached_response: Any | None = None
178
+
179
+ # Optimization metrics
180
+ metrics: CacheMetrics = field(default_factory=CacheMetrics)
181
+
182
+ # Tokens before/after
183
+ tokens_before: int = 0
184
+ tokens_after: int = 0
185
+
186
+ # Transforms applied
187
+ transforms_applied: list[str] = field(default_factory=list)
188
+
189
+ # Warnings
190
+ warnings: list[str] = field(default_factory=list)
191
+
192
+
193
+ @runtime_checkable
194
+ class CacheOptimizer(Protocol):
195
+ """
196
+ Protocol for cache optimizers.
197
+
198
+ All provider-specific optimizers must implement this interface.
199
+ This allows for easy swapping of implementations and plugin registration.
200
+ """
201
+
202
+ @property
203
+ def name(self) -> str:
204
+ """Name of this optimizer."""
205
+ ...
206
+
207
+ @property
208
+ def provider(self) -> str:
209
+ """Provider this optimizer is for."""
210
+ ...
211
+
212
+ @property
213
+ def strategy(self) -> CacheStrategy:
214
+ """The caching strategy this optimizer uses."""
215
+ ...
216
+
217
+ def optimize(
218
+ self,
219
+ messages: list[dict[str, Any]],
220
+ context: OptimizationContext,
221
+ config: CacheConfig | None = None,
222
+ ) -> CacheResult:
223
+ """
224
+ Optimize messages for caching.
225
+
226
+ Args:
227
+ messages: The messages to optimize.
228
+ context: Optimization context with request info.
229
+ config: Optional configuration override.
230
+
231
+ Returns:
232
+ CacheResult with optimized messages and metrics.
233
+ """
234
+ ...
235
+
236
+ def get_metrics(self) -> CacheMetrics:
237
+ """Get aggregated metrics from this optimizer."""
238
+ ...
239
+
240
+ def estimate_savings(
241
+ self,
242
+ messages: list[dict[str, Any]],
243
+ context: OptimizationContext,
244
+ ) -> float:
245
+ """
246
+ Estimate potential savings from optimization.
247
+
248
+ Returns:
249
+ Estimated savings as a percentage (0-100).
250
+ """
251
+ ...
252
+
253
+
254
+ class BaseCacheOptimizer(ABC):
255
+ """
256
+ Abstract base class for cache optimizers.
257
+
258
+ Provides common functionality for all optimizers.
259
+ """
260
+
261
+ def __init__(self, config: CacheConfig | None = None):
262
+ self.config = config or CacheConfig()
263
+ self._metrics_history: list[CacheMetrics] = []
264
+ self._previous_prefix_hash: str | None = None
265
+
266
+ @property
267
+ @abstractmethod
268
+ def name(self) -> str:
269
+ """Name of this optimizer."""
270
+ ...
271
+
272
+ @property
273
+ @abstractmethod
274
+ def provider(self) -> str:
275
+ """Provider this optimizer is for."""
276
+ ...
277
+
278
+ @property
279
+ @abstractmethod
280
+ def strategy(self) -> CacheStrategy:
281
+ """The caching strategy this optimizer uses."""
282
+ ...
283
+
284
+ @abstractmethod
285
+ def optimize(
286
+ self,
287
+ messages: list[dict[str, Any]],
288
+ context: OptimizationContext,
289
+ config: CacheConfig | None = None,
290
+ ) -> CacheResult:
291
+ """Optimize messages for caching."""
292
+ ...
293
+
294
+ def get_metrics(self) -> CacheMetrics:
295
+ """Get aggregated metrics."""
296
+ if not self._metrics_history:
297
+ return CacheMetrics()
298
+
299
+ # Return most recent metrics
300
+ return self._metrics_history[-1]
301
+
302
+ def estimate_savings(
303
+ self,
304
+ messages: list[dict[str, Any]],
305
+ context: OptimizationContext,
306
+ ) -> float:
307
+ """Estimate potential savings."""
308
+ # Default implementation - subclasses can override
309
+ result = self.optimize(messages, context)
310
+ return result.metrics.estimated_savings_percent
311
+
312
+ def _record_metrics(self, metrics: CacheMetrics) -> None:
313
+ """Record metrics for history."""
314
+ self._metrics_history.append(metrics)
315
+ # Keep only last 100 entries
316
+ if len(self._metrics_history) > 100:
317
+ self._metrics_history = self._metrics_history[-100:]
318
+
319
+ def _compute_prefix_hash(self, content: str) -> str:
320
+ """Compute a short hash of content."""
321
+ import hashlib
322
+
323
+ return hashlib.sha256(content.encode()).hexdigest()[:12]
324
+
325
+ def _extract_system_content(self, messages: list[dict[str, Any]]) -> str:
326
+ """Extract content from system messages."""
327
+ parts = []
328
+ for msg in messages:
329
+ if msg.get("role") == "system":
330
+ content = msg.get("content", "")
331
+ if isinstance(content, str):
332
+ parts.append(content)
333
+ elif isinstance(content, list):
334
+ # Handle content blocks
335
+ for block in content:
336
+ if isinstance(block, dict) and block.get("type") == "text":
337
+ parts.append(block.get("text", ""))
338
+ return "\n".join(parts)
339
+
340
+ def _count_tokens_estimate(self, text: str) -> int:
341
+ """Rough token count estimate (4 chars per token)."""
342
+ return len(text) // 4