headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
headroom/config.py ADDED
@@ -0,0 +1,474 @@
1
+ """Configuration models for Headroom SDK."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from typing import Any, Literal
9
+
10
+
11
+ class HeadroomMode(str, Enum):
12
+ """Operating modes for Headroom."""
13
+
14
+ AUDIT = "audit" # Observe only, no modifications
15
+ OPTIMIZE = "optimize" # Apply deterministic transforms
16
+ SIMULATE = "simulate" # Return transform plan without API call
17
+
18
+
19
+ # Model context limits should be provided by the Provider
20
+ # This dict allows user overrides only
21
+ DEFAULT_MODEL_CONTEXT_LIMITS: dict[str, int] = {}
22
+
23
+
24
+ @dataclass
25
+ class ToolCrusherConfig:
26
+ """Configuration for tool output compression (naive/fixed-rule approach).
27
+
28
+ GOTCHAS:
29
+ - Keeps FIRST N items only - may miss important data later in arrays
30
+ - A spike at index 50 will be lost if max_array_items=10
31
+ - String truncation cuts at fixed length, may break mid-word/mid-sentence
32
+ - No awareness of data patterns or importance
33
+
34
+ Consider using SmartCrusherConfig instead for statistical analysis.
35
+ """
36
+
37
+ enabled: bool = False # Disabled by default, SmartCrusher is preferred
38
+ min_tokens_to_crush: int = 500 # Only crush if > N tokens
39
+ max_array_items: int = 10 # Keep first N items
40
+ max_string_length: int = 1000 # Truncate strings > N chars
41
+ max_depth: int = 5 # Preserve structure to depth N
42
+ preserve_keys: set[str] = field(
43
+ default_factory=lambda: {"error", "status", "code", "id", "message", "name", "type"}
44
+ )
45
+ tool_profiles: dict[str, dict[str, Any]] = field(default_factory=dict)
46
+
47
+
48
+ @dataclass
49
+ class CacheAlignerConfig:
50
+ """Configuration for cache alignment.
51
+
52
+ GOTCHAS:
53
+ - Date regex may match non-date content (e.g., version numbers like "2024-01-15")
54
+ - Moving dates to end of system prompt may confuse models if date was
55
+ semantically important in its original position
56
+ - Whitespace normalization may break:
57
+ - Code blocks with significant indentation
58
+ - ASCII art or formatted tables
59
+ - Markdown that relies on specific spacing
60
+ - ISO timestamps in tool outputs may be incorrectly flagged as "dynamic dates"
61
+
62
+ SAFE: Only applied to SYSTEM messages, not user/assistant/tool content.
63
+ """
64
+
65
+ enabled: bool = True
66
+ date_patterns: list[str] = field(
67
+ default_factory=lambda: [
68
+ r"Current [Dd]ate:?\s*\d{4}-\d{2}-\d{2}",
69
+ r"Today is \w+,?\s+\w+ \d+",
70
+ r"Today's date:?\s*\d{4}-\d{2}-\d{2}",
71
+ r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}",
72
+ ]
73
+ )
74
+ normalize_whitespace: bool = True
75
+ collapse_blank_lines: bool = True
76
+ # Separator used to mark where dynamic content begins in system message
77
+ # Content before this separator is cached; content after is dynamic
78
+ dynamic_tail_separator: str = "\n\n---\n[Dynamic Context]\n"
79
+
80
+
81
+ @dataclass
82
+ class RollingWindowConfig:
83
+ """Configuration for rolling window token cap.
84
+
85
+ GOTCHAS:
86
+ - Dropping old turns loses context the model may need:
87
+ - "As I mentioned earlier..." - what was mentioned is now gone
88
+ - "The user asked about X" - that turn may be dropped
89
+ - Implicit references to prior conversation become orphaned
90
+ - Tool call/result pairs are kept atomic (correct), BUT:
91
+ - Assistant text referencing a dropped tool result becomes confusing
92
+ - "Based on the search results..." when those results are gone
93
+ - keep_last_turns=2 may not be enough for complex multi-step reasoning
94
+ - No semantic analysis - drops oldest first regardless of importance
95
+
96
+ SAFER ALTERNATIVES:
97
+ - Increase keep_last_turns for agentic workloads
98
+ - Use summarization for old context (not implemented - would add latency)
99
+ - Set enabled=False for short conversations
100
+ """
101
+
102
+ enabled: bool = True
103
+ keep_system: bool = True # Never drop system prompt
104
+ keep_last_turns: int = 2 # Never drop last N turns
105
+ output_buffer_tokens: int = 4000 # Reserve for output
106
+
107
+
108
+ @dataclass
109
+ class RelevanceScorerConfig:
110
+ """Configuration for relevance scoring in SmartCrusher.
111
+
112
+ Relevance scoring determines which items to keep when compressing
113
+ tool outputs. Uses the pattern: relevance(item, context) -> [0, 1].
114
+
115
+ Available tiers:
116
+ - "bm25": BM25 keyword matching (zero dependencies, fast)
117
+ - "embedding": Semantic similarity via sentence-transformers
118
+ - "hybrid": BM25 + embedding with adaptive fusion (RECOMMENDED)
119
+
120
+ DEFAULT: "hybrid" - combines exact matching (UUIDs, IDs) with semantic
121
+ understanding. Falls back to BM25 if sentence-transformers not installed.
122
+
123
+ For full hybrid support, install: pip install headroom[relevance]
124
+
125
+ WHY HYBRID IS DEFAULT:
126
+ - Missing important items during compression is catastrophic
127
+ - BM25 alone gives low scores for single-term matches (e.g., "Alice" = 0.07)
128
+ - Semantic matching catches "errors" -> "failed", "issues", etc.
129
+ - 5-10ms latency is acceptable vs. losing critical data
130
+ """
131
+
132
+ tier: Literal["bm25", "embedding", "hybrid"] = "hybrid"
133
+
134
+ # BM25 parameters
135
+ bm25_k1: float = 1.5 # Term frequency saturation
136
+ bm25_b: float = 0.75 # Length normalization
137
+
138
+ # Embedding parameters
139
+ embedding_model: str = "all-MiniLM-L6-v2" # Lightweight model
140
+
141
+ # Hybrid parameters
142
+ hybrid_alpha: float = 0.5 # BM25 weight (1-alpha = embedding weight)
143
+ adaptive_alpha: bool = True # Adjust alpha based on query type
144
+
145
+ # Scoring thresholds
146
+ # With hybrid/embedding: semantic scores are meaningful (0.3-0.5 for good matches)
147
+ # With BM25 fallback: threshold is still reasonable for multi-term matches
148
+ # Lower threshold = safer (keeps more items), higher = more aggressive compression
149
+ relevance_threshold: float = 0.25 # Keep items above this score
150
+
151
+
152
+ @dataclass
153
+ class SmartCrusherConfig:
154
+ """Configuration for smart statistical crusher (DEFAULT).
155
+
156
+ Uses statistical analysis to intelligently compress tool outputs while
157
+ PRESERVING THE ORIGINAL JSON SCHEMA. Output contains only items from
158
+ the original array - no wrappers, no generated text, no metadata.
159
+
160
+ Safe V1 Compression Recipe - Always keeps:
161
+ - First K items (default 3)
162
+ - Last K items (default 2)
163
+ - Error items (containing 'error', 'exception', 'failed', 'critical')
164
+ - Anomalous numeric items (> 2 std from mean)
165
+ - Top-K by score if score field present
166
+ - Items matching query context via RelevanceScorer
167
+
168
+ GOTCHAS:
169
+ - Adds ~5-10ms overhead per tool output for statistical analysis
170
+ - Change point detection uses fixed window (5 items) - may miss:
171
+ - Very gradual changes
172
+ - Patterns in smaller arrays
173
+ - TOP_N for search results assumes higher score = more relevant
174
+ (may not be true for all APIs)
175
+
176
+ SAFER SETTINGS:
177
+ - Increase max_items_after_crush for critical data
178
+ - Set variance_threshold lower (1.5) to catch more change points
179
+ """
180
+
181
+ enabled: bool = True # Enabled by default (preferred over ToolCrusher)
182
+ min_items_to_analyze: int = 5 # Don't analyze tiny arrays
183
+ min_tokens_to_crush: int = 200 # Only crush if > N tokens
184
+ variance_threshold: float = 2.0 # Std devs for change point detection
185
+ uniqueness_threshold: float = 0.1 # Below this = nearly constant
186
+ similarity_threshold: float = 0.8 # For clustering similar strings
187
+ max_items_after_crush: int = 15 # Target max items in output
188
+ preserve_change_points: bool = True
189
+ factor_out_constants: bool = False # Disabled - preserves original schema
190
+ include_summaries: bool = False # Disabled - no generated text
191
+
192
+ # Feedback loop integration (TOIN - Tool Output Intelligence Network)
193
+ use_feedback_hints: bool = True # Use learned patterns to adjust compression
194
+
195
+ # LOW FIX #21: Make TOIN confidence threshold configurable
196
+ # Minimum confidence required to apply TOIN recommendations
197
+ toin_confidence_threshold: float = 0.5
198
+
199
+ # Relevance scoring configuration
200
+ relevance: RelevanceScorerConfig = field(default_factory=RelevanceScorerConfig)
201
+
202
+
203
+ @dataclass
204
+ class CacheOptimizerConfig:
205
+ """Configuration for provider-specific cache optimization.
206
+
207
+ The CacheOptimizer system provides provider-specific caching strategies:
208
+ - Anthropic: Explicit cache_control breakpoints for prompt caching
209
+ - OpenAI: Prefix stabilization for automatic prefix caching
210
+ - Google: CachedContent API lifecycle management
211
+
212
+ This is COMPLEMENTARY to the CacheAligner transform - CacheAligner does
213
+ basic prefix stabilization (date extraction, whitespace normalization),
214
+ while CacheOptimizer applies provider-specific optimizations.
215
+
216
+ Enable this for maximum cache hit rates when you know your provider.
217
+ """
218
+
219
+ enabled: bool = True # Enable provider-specific cache optimization
220
+ auto_detect_provider: bool = True # Auto-detect from HeadroomClient provider
221
+ min_cacheable_tokens: int = 1024 # Minimum tokens for caching (provider may override)
222
+ enable_semantic_cache: bool = False # Enable query-level semantic caching
223
+ semantic_cache_similarity: float = 0.95 # Similarity threshold for semantic cache
224
+ semantic_cache_max_entries: int = 1000 # Max semantic cache entries
225
+ semantic_cache_ttl_seconds: int = 300 # Semantic cache TTL
226
+
227
+
228
+ @dataclass
229
+ class CCRConfig:
230
+ """Configuration for Compress-Cache-Retrieve architecture.
231
+
232
+ CCR makes compression REVERSIBLE: when SmartCrusher compresses tool outputs,
233
+ the original data is cached. If the LLM needs more data, it can retrieve it.
234
+
235
+ Key insight from research: REVERSIBLE compression beats irreversible compression.
236
+ - Phil Schmid: "Prefer raw > Compaction > Summarization"
237
+ - Factory.ai: "Cutting context too aggressively can backfire"
238
+
239
+ How CCR works:
240
+ 1. COMPRESS: SmartCrusher compresses array from 1000 to 20 items
241
+ 2. CACHE: Original 1000 items stored in CompressionStore
242
+ 3. INJECT: Marker added to tell LLM how to retrieve more
243
+ 4. RETRIEVE: If LLM needs more, it calls headroom_retrieve(hash, query)
244
+
245
+ Benefits:
246
+ - Zero-risk compression: worst case = LLM retrieves what it needs
247
+ - Feedback loop: track what gets retrieved to improve compression
248
+ - Network effect: retrieval patterns improve compression for all users
249
+
250
+ GOTCHAS:
251
+ - Cache has TTL (default 5 min) - retrieval fails after expiration
252
+ - Memory usage: ~1KB per cached entry
253
+ - Only works with array compression (not string truncation)
254
+ """
255
+
256
+ enabled: bool = True # Enable CCR (cache + retrieval markers)
257
+ store_max_entries: int = 1000 # Max entries in compression store
258
+ store_ttl_seconds: int = 300 # Cache TTL (5 minutes)
259
+ inject_retrieval_marker: bool = True # Add retrieval hint to compressed output
260
+ feedback_enabled: bool = True # Track retrieval events for learning
261
+ min_items_to_cache: int = 20 # Only cache if original had >= N items
262
+
263
+ # Tool injection (Phase 3)
264
+ inject_tool: bool = True # Inject headroom_retrieve tool into tools array
265
+ inject_system_instructions: bool = False # Add retrieval instructions to system message
266
+
267
+ # Retrieval marker format
268
+ # Inserted at end of compressed content to tell LLM how to get more
269
+ marker_template: str = (
270
+ "\n[{original_count} items compressed to {compressed_count}. Retrieve more: hash={hash}]"
271
+ )
272
+
273
+
274
+ @dataclass
275
+ class HeadroomConfig:
276
+ """Main configuration for HeadroomClient."""
277
+
278
+ store_url: str = "sqlite:///headroom.db"
279
+ default_mode: HeadroomMode = HeadroomMode.AUDIT
280
+ model_context_limits: dict[str, int] = field(
281
+ default_factory=lambda: DEFAULT_MODEL_CONTEXT_LIMITS.copy()
282
+ )
283
+ tool_crusher: ToolCrusherConfig = field(default_factory=ToolCrusherConfig)
284
+ smart_crusher: SmartCrusherConfig = field(default_factory=SmartCrusherConfig)
285
+ cache_aligner: CacheAlignerConfig = field(default_factory=CacheAlignerConfig)
286
+ rolling_window: RollingWindowConfig = field(default_factory=RollingWindowConfig)
287
+ cache_optimizer: CacheOptimizerConfig = field(default_factory=CacheOptimizerConfig)
288
+ ccr: CCRConfig = field(default_factory=CCRConfig) # Compress-Cache-Retrieve
289
+
290
+ # Debugging - opt-in diff artifact generation
291
+ generate_diff_artifact: bool = False # Enable to get detailed transform diffs
292
+
293
+ def get_context_limit(self, model: str) -> int | None:
294
+ """
295
+ Get context limit for a model from user overrides.
296
+
297
+ Args:
298
+ model: Model name.
299
+
300
+ Returns:
301
+ Context limit if configured, None otherwise.
302
+ Provider should be consulted if None is returned.
303
+ """
304
+ if model in self.model_context_limits:
305
+ return self.model_context_limits[model]
306
+ # Try prefix matching for versioned model names
307
+ for known_model, limit in self.model_context_limits.items():
308
+ if model.startswith(known_model):
309
+ return limit
310
+ return None
311
+
312
+
313
+ @dataclass
314
+ class Block:
315
+ """Atomic unit of context analysis."""
316
+
317
+ kind: Literal["system", "user", "assistant", "tool_call", "tool_result", "rag", "unknown"]
318
+ text: str
319
+ tokens_est: int
320
+ content_hash: str
321
+ source_index: int # Position in original messages
322
+ flags: dict[str, Any] = field(default_factory=dict)
323
+
324
+
325
+ @dataclass
326
+ class WasteSignals:
327
+ """Detected waste signals in a request."""
328
+
329
+ json_bloat_tokens: int = 0 # JSON blocks > 500 tokens
330
+ html_noise_tokens: int = 0 # HTML tags/comments
331
+ base64_tokens: int = 0 # Base64 encoded blobs
332
+ whitespace_tokens: int = 0 # Repeated whitespace
333
+ dynamic_date_tokens: int = 0 # Dynamic dates in system prompt
334
+ repetition_tokens: int = 0 # Repeated content
335
+
336
+ def total(self) -> int:
337
+ """Total waste tokens detected."""
338
+ return (
339
+ self.json_bloat_tokens
340
+ + self.html_noise_tokens
341
+ + self.base64_tokens
342
+ + self.whitespace_tokens
343
+ + self.dynamic_date_tokens
344
+ + self.repetition_tokens
345
+ )
346
+
347
+ def to_dict(self) -> dict[str, int]:
348
+ """Convert to dictionary for storage."""
349
+ return {
350
+ "json_bloat": self.json_bloat_tokens,
351
+ "html_noise": self.html_noise_tokens,
352
+ "base64": self.base64_tokens,
353
+ "whitespace": self.whitespace_tokens,
354
+ "dynamic_date": self.dynamic_date_tokens,
355
+ "repetition": self.repetition_tokens,
356
+ }
357
+
358
+
359
+ @dataclass
360
+ class CachePrefixMetrics:
361
+ """Detailed cache prefix metrics for debugging cache misses.
362
+
363
+ Log these per-request to understand why caching is or isn't working.
364
+ Compare stable_prefix_hash across requests - any change means cache miss.
365
+ """
366
+
367
+ stable_prefix_bytes: int # Byte length of static prefix
368
+ stable_prefix_tokens_est: int # Estimated token count of static prefix
369
+ stable_prefix_hash: str # Hash of canonicalized prefix (16 chars)
370
+ prefix_changed: bool # True if hash differs from previous request in session
371
+ previous_hash: str | None = None # Previous hash for comparison (None = first request)
372
+
373
+
374
+ @dataclass
375
+ class TransformResult:
376
+ """Output of a transform operation."""
377
+
378
+ messages: list[dict[str, Any]]
379
+ tokens_before: int
380
+ tokens_after: int
381
+ transforms_applied: list[str]
382
+ markers_inserted: list[str] = field(default_factory=list)
383
+ warnings: list[str] = field(default_factory=list)
384
+ diff_artifact: DiffArtifact | None = None # Populated if generate_diff_artifact=True
385
+ cache_metrics: CachePrefixMetrics | None = None # Populated by CacheAligner
386
+
387
+
388
+ @dataclass
389
+ class TransformDiff:
390
+ """Diff info for a single transform (for debugging)."""
391
+
392
+ transform_name: str
393
+ tokens_before: int
394
+ tokens_after: int
395
+ tokens_saved: int
396
+ items_removed: int = 0
397
+ items_kept: int = 0
398
+ details: str = "" # Human-readable description of what changed
399
+
400
+
401
+ @dataclass
402
+ class DiffArtifact:
403
+ """Complete diff artifact for debugging transform pipeline.
404
+
405
+ Opt-in via HeadroomConfig.generate_diff_artifact = True.
406
+ Useful for understanding what each transform did to your messages.
407
+ """
408
+
409
+ request_id: str
410
+ original_tokens: int
411
+ optimized_tokens: int
412
+ total_tokens_saved: int
413
+ transforms: list[TransformDiff] = field(default_factory=list)
414
+
415
+
416
+ @dataclass
417
+ class SimulationResult:
418
+ """Result of a simulation (dry-run)."""
419
+
420
+ tokens_before: int
421
+ tokens_after: int
422
+ tokens_saved: int
423
+ transforms: list[str]
424
+ estimated_savings: str # Human-readable cost estimate
425
+ messages_optimized: list[dict[str, Any]]
426
+ block_breakdown: dict[str, int]
427
+ waste_signals: dict[str, int]
428
+ stable_prefix_hash: str
429
+ cache_alignment_score: float
430
+
431
+
432
+ @dataclass
433
+ class RequestMetrics:
434
+ """Comprehensive metrics for a single request."""
435
+
436
+ request_id: str
437
+ timestamp: datetime
438
+ model: str
439
+ stream: bool
440
+ mode: str # audit | optimize | simulate
441
+
442
+ # Token breakdown
443
+ tokens_input_before: int
444
+ tokens_input_after: int
445
+ tokens_output: int | None = None # None if streaming
446
+
447
+ # Block breakdown
448
+ block_breakdown: dict[str, int] = field(default_factory=dict)
449
+
450
+ # Waste signals
451
+ waste_signals: dict[str, int] = field(default_factory=dict)
452
+
453
+ # Cache metrics (basic)
454
+ stable_prefix_hash: str = ""
455
+ cache_alignment_score: float = 0.0
456
+ cached_tokens: int | None = None # From API response if available
457
+
458
+ # Cache optimizer metrics (provider-specific)
459
+ cache_optimizer_used: str | None = None # e.g., "anthropic-cache-optimizer"
460
+ cache_optimizer_strategy: str | None = None # e.g., "explicit_breakpoints"
461
+ cacheable_tokens: int = 0 # Tokens eligible for caching
462
+ breakpoints_inserted: int = 0 # Cache breakpoints added (Anthropic)
463
+ estimated_cache_hit: bool = False # Whether prefix matches previous
464
+ estimated_savings_percent: float = 0.0 # Estimated savings if cached
465
+ semantic_cache_hit: bool = False # Whether semantic cache was hit
466
+
467
+ # Transform details
468
+ transforms_applied: list[str] = field(default_factory=list)
469
+ tool_units_dropped: int = 0
470
+ turns_dropped: int = 0
471
+
472
+ # For debugging
473
+ messages_hash: str = ""
474
+ error: str | None = None
headroom/exceptions.py ADDED
@@ -0,0 +1,192 @@
1
+ """Custom exceptions for Headroom.
2
+
3
+ This module provides explicit exception classes for better error handling
4
+ and debugging. All exceptions inherit from HeadroomError, making it easy
5
+ to catch all Headroom-related errors.
6
+
7
+ Example:
8
+ from headroom import HeadroomClient, HeadroomError, ConfigurationError
9
+
10
+ try:
11
+ client = HeadroomClient(...)
12
+ client.validate_setup()
13
+ except ConfigurationError as e:
14
+ print(f"Configuration problem: {e}")
15
+ except HeadroomError as e:
16
+ print(f"Headroom error: {e}")
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import Any
22
+
23
+
24
+ class HeadroomError(Exception):
25
+ """Base exception for all Headroom errors.
26
+
27
+ All Headroom exceptions inherit from this class, making it easy
28
+ to catch any Headroom-related error:
29
+
30
+ try:
31
+ client.chat.completions.create(...)
32
+ except HeadroomError as e:
33
+ # Handle any Headroom error
34
+ pass
35
+ """
36
+
37
+ def __init__(self, message: str, details: dict[str, Any] | None = None):
38
+ super().__init__(message)
39
+ self.message = message
40
+ self.details = details or {}
41
+
42
+ def __str__(self) -> str:
43
+ if self.details:
44
+ detail_str = ", ".join(f"{k}={v}" for k, v in self.details.items())
45
+ return f"{self.message} ({detail_str})"
46
+ return self.message
47
+
48
+
49
+ class ConfigurationError(HeadroomError):
50
+ """Raised when Headroom is misconfigured.
51
+
52
+ This includes:
53
+ - Invalid mode values
54
+ - Missing required configuration
55
+ - Incompatible configuration combinations
56
+
57
+ Example:
58
+ ConfigurationError(
59
+ "Invalid mode 'foo'",
60
+ details={"valid_modes": ["audit", "optimize"]}
61
+ )
62
+ """
63
+
64
+ pass
65
+
66
+
67
+ class ProviderError(HeadroomError):
68
+ """Raised when there's an issue with the LLM provider.
69
+
70
+ This includes:
71
+ - Provider not recognized
72
+ - Provider-specific configuration issues
73
+ - Token counter errors
74
+
75
+ Example:
76
+ ProviderError(
77
+ "Unknown provider",
78
+ details={"provider": "foo", "known_providers": ["openai", "anthropic"]}
79
+ )
80
+ """
81
+
82
+ pass
83
+
84
+
85
+ class StorageError(HeadroomError):
86
+ """Raised when there's an issue with metrics storage.
87
+
88
+ This includes:
89
+ - Database connection failures
90
+ - Invalid storage URL
91
+ - Write failures
92
+
93
+ Example:
94
+ StorageError(
95
+ "Cannot connect to database",
96
+ details={"url": "sqlite:///foo.db", "error": "Permission denied"}
97
+ )
98
+ """
99
+
100
+ pass
101
+
102
+
103
+ class CompressionError(HeadroomError):
104
+ """Raised when compression fails.
105
+
106
+ This includes:
107
+ - Parse errors in tool outputs
108
+ - Invalid JSON structures
109
+ - Compression strategy failures
110
+
111
+ Example:
112
+ CompressionError(
113
+ "Failed to parse tool output",
114
+ details={"tool_name": "search_api", "content_preview": "..."}
115
+ )
116
+ """
117
+
118
+ pass
119
+
120
+
121
+ class TokenizationError(HeadroomError):
122
+ """Raised when token counting fails.
123
+
124
+ This includes:
125
+ - Unknown model for tokenization
126
+ - Encoding errors
127
+ - Tiktoken/tokenizer loading failures
128
+
129
+ Example:
130
+ TokenizationError(
131
+ "Unknown model for tokenization",
132
+ details={"model": "gpt-99", "fallback_used": True}
133
+ )
134
+ """
135
+
136
+ pass
137
+
138
+
139
+ class CacheError(HeadroomError):
140
+ """Raised when caching operations fail.
141
+
142
+ This includes:
143
+ - Cache store errors
144
+ - Retrieval failures
145
+ - CCR (Compress-Cache-Retrieve) errors
146
+
147
+ Example:
148
+ CacheError(
149
+ "Cache entry expired",
150
+ details={"hash": "abc123", "ttl": 300}
151
+ )
152
+ """
153
+
154
+ pass
155
+
156
+
157
+ class ValidationError(HeadroomError):
158
+ """Raised when setup validation fails.
159
+
160
+ This is raised by validate_setup() when the configuration
161
+ or environment is not properly set up.
162
+
163
+ Example:
164
+ ValidationError(
165
+ "Setup validation failed",
166
+ details={
167
+ "provider_ok": True,
168
+ "storage_ok": False,
169
+ "storage_error": "Cannot write to database"
170
+ }
171
+ )
172
+ """
173
+
174
+ pass
175
+
176
+
177
+ class TransformError(HeadroomError):
178
+ """Raised when a transform fails to apply.
179
+
180
+ This includes:
181
+ - SmartCrusher failures
182
+ - RollingWindow errors
183
+ - Pipeline errors
184
+
185
+ Example:
186
+ TransformError(
187
+ "Transform failed",
188
+ details={"transform": "smart_crusher", "reason": "..."}
189
+ )
190
+ """
191
+
192
+ pass