headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,884 @@
1
+ """
2
+ Google Cache Optimizer for CachedContent API.
3
+
4
+ Google's Gemini API offers explicit cached content management through
5
+ the `genai.caching.CachedContent` API. Key characteristics:
6
+
7
+ - Minimum 32K tokens required for caching
8
+ - 75% discount on cached input tokens
9
+ - Storage costs (pay per hour for cached content)
10
+ - User-defined TTL (default 1 hour)
11
+ - Returns cache_id for subsequent requests
12
+
13
+ This optimizer provides cache lifecycle management utilities without
14
+ making actual API calls - users integrate with the google-generativeai
15
+ package themselves.
16
+
17
+ Usage:
18
+ optimizer = GoogleCacheOptimizer()
19
+
20
+ # Check if content is cacheable
21
+ analysis = optimizer.analyze_cacheability(messages, context)
22
+
23
+ # Optimize and get cache recommendation
24
+ result = optimizer.optimize(messages, context)
25
+
26
+ # After user creates cache via Google API, register it
27
+ optimizer.register_cache(
28
+ cache_id="cached-content-xyz",
29
+ content_hash=result.metrics.stable_prefix_hash,
30
+ token_count=50000,
31
+ expires_at=datetime.now() + timedelta(hours=1),
32
+ )
33
+
34
+ # Check if existing cache can be reused
35
+ cache_info = optimizer.get_reusable_cache(content_hash)
36
+
37
+ # Extend cache TTL
38
+ optimizer.extend_cache_ttl(cache_id, additional_seconds=3600)
39
+
40
+ # Clean up expired caches
41
+ optimizer.cleanup_expired_caches()
42
+ """
43
+
44
+ from __future__ import annotations
45
+
46
+ import logging
47
+ from dataclasses import dataclass, field
48
+ from datetime import datetime, timedelta
49
+ from typing import Any
50
+
51
+ from .base import (
52
+ BaseCacheOptimizer,
53
+ CacheConfig,
54
+ CacheMetrics,
55
+ CacheResult,
56
+ CacheStrategy,
57
+ OptimizationContext,
58
+ )
59
+
60
+ logger = logging.getLogger(__name__)
61
+
62
+
63
+ # Google-specific constants
64
+ GOOGLE_MIN_CACHE_TOKENS = 32_768 # 32K tokens minimum
65
+ GOOGLE_CACHE_DISCOUNT = 0.75 # 75% discount on cached tokens
66
+ GOOGLE_DEFAULT_TTL_SECONDS = 3600 # 1 hour default
67
+ GOOGLE_MAX_TTL_SECONDS = 86400 * 7 # 7 days maximum
68
+
69
+
70
+ @dataclass
71
+ class CachedContentInfo:
72
+ """
73
+ Information about a cached content object.
74
+
75
+ Tracks the lifecycle of a Google CachedContent resource.
76
+ """
77
+
78
+ # Google's cache identifier
79
+ cache_id: str
80
+
81
+ # Hash of the content for matching
82
+ content_hash: str
83
+
84
+ # Timestamps
85
+ created_at: datetime
86
+ expires_at: datetime
87
+
88
+ # Token count in the cached content
89
+ token_count: int
90
+
91
+ # Optional model used (some caches are model-specific)
92
+ model: str | None = None
93
+
94
+ # Display name for the cached content
95
+ display_name: str | None = None
96
+
97
+ # Metadata for tracking
98
+ metadata: dict[str, Any] = field(default_factory=dict)
99
+
100
+ @property
101
+ def is_expired(self) -> bool:
102
+ """Check if cache has expired."""
103
+ return datetime.now() >= self.expires_at
104
+
105
+ @property
106
+ def ttl_remaining_seconds(self) -> int:
107
+ """Seconds remaining until expiry."""
108
+ remaining = (self.expires_at - datetime.now()).total_seconds()
109
+ return max(0, int(remaining))
110
+
111
+ @property
112
+ def age_seconds(self) -> int:
113
+ """Age of the cache in seconds."""
114
+ return int((datetime.now() - self.created_at).total_seconds())
115
+
116
+ def to_dict(self) -> dict[str, Any]:
117
+ """Serialize to dictionary."""
118
+ return {
119
+ "cache_id": self.cache_id,
120
+ "content_hash": self.content_hash,
121
+ "created_at": self.created_at.isoformat(),
122
+ "expires_at": self.expires_at.isoformat(),
123
+ "token_count": self.token_count,
124
+ "model": self.model,
125
+ "display_name": self.display_name,
126
+ "metadata": self.metadata,
127
+ }
128
+
129
+ @classmethod
130
+ def from_dict(cls, data: dict[str, Any]) -> CachedContentInfo:
131
+ """Deserialize from dictionary."""
132
+ return cls(
133
+ cache_id=data["cache_id"],
134
+ content_hash=data["content_hash"],
135
+ created_at=datetime.fromisoformat(data["created_at"]),
136
+ expires_at=datetime.fromisoformat(data["expires_at"]),
137
+ token_count=data["token_count"],
138
+ model=data.get("model"),
139
+ display_name=data.get("display_name"),
140
+ metadata=data.get("metadata", {}),
141
+ )
142
+
143
+
144
+ @dataclass
145
+ class CacheabilityAnalysis:
146
+ """
147
+ Analysis of whether content is suitable for Google caching.
148
+
149
+ Provides detailed information about caching viability and
150
+ potential savings.
151
+ """
152
+
153
+ # Whether content meets minimum threshold
154
+ is_cacheable: bool
155
+
156
+ # Token counts
157
+ total_tokens: int
158
+ cacheable_tokens: int
159
+
160
+ # Shortfall if not cacheable
161
+ tokens_below_minimum: int = 0
162
+
163
+ # Estimated savings
164
+ estimated_hourly_storage_cost_usd: float = 0.0
165
+ estimated_savings_per_request_percent: float = 0.0
166
+
167
+ # Recommendations
168
+ recommendations: list[str] = field(default_factory=list)
169
+
170
+ # Content hash for cache matching
171
+ content_hash: str = ""
172
+
173
+
174
+ class GoogleCacheOptimizer(BaseCacheOptimizer):
175
+ """
176
+ Cache optimizer for Google's Gemini CachedContent API.
177
+
178
+ This optimizer provides:
179
+ 1. Analysis of whether content meets Google's caching requirements
180
+ 2. Cache lifecycle management (register, lookup, extend, delete)
181
+ 3. Optimization recommendations
182
+ 4. Integration utilities for the google-generativeai SDK
183
+
184
+ The optimizer does NOT make actual API calls - it provides the
185
+ infrastructure for users to manage caches themselves.
186
+
187
+ Example workflow:
188
+ optimizer = GoogleCacheOptimizer()
189
+
190
+ # Analyze content
191
+ result = optimizer.optimize(messages, context)
192
+
193
+ if result.metrics.cacheable_tokens >= GOOGLE_MIN_CACHE_TOKENS:
194
+ # User creates cache via Google SDK
195
+ cached_content = genai.caching.CachedContent.create(
196
+ model="gemini-1.5-pro",
197
+ contents=contents,
198
+ ttl=timedelta(hours=1),
199
+ )
200
+
201
+ # Register with optimizer for tracking
202
+ optimizer.register_cache(
203
+ cache_id=cached_content.name,
204
+ content_hash=result.metrics.stable_prefix_hash,
205
+ token_count=result.metrics.cacheable_tokens,
206
+ expires_at=datetime.now() + timedelta(hours=1),
207
+ )
208
+
209
+ # Later, check for reusable cache
210
+ cache = optimizer.get_reusable_cache(content_hash)
211
+ if cache:
212
+ # Use cache.cache_id in API call
213
+ pass
214
+ """
215
+
216
+ def __init__(self, config: CacheConfig | None = None):
217
+ """
218
+ Initialize Google cache optimizer.
219
+
220
+ Args:
221
+ config: Optional cache configuration
222
+ """
223
+ super().__init__(config)
224
+
225
+ # Override minimum tokens for Google's requirements
226
+ if self.config.min_cacheable_tokens < GOOGLE_MIN_CACHE_TOKENS:
227
+ self.config.min_cacheable_tokens = GOOGLE_MIN_CACHE_TOKENS
228
+
229
+ # Cache registry: content_hash -> CachedContentInfo
230
+ self._cache_registry: dict[str, CachedContentInfo] = {}
231
+
232
+ # Also index by cache_id for direct lookup
233
+ self._cache_by_id: dict[str, CachedContentInfo] = {}
234
+
235
+ # Statistics
236
+ self._caches_created: int = 0
237
+ self._caches_reused: int = 0
238
+ self._caches_expired: int = 0
239
+
240
+ @property
241
+ def name(self) -> str:
242
+ """Name of this optimizer."""
243
+ return "google-cached-content"
244
+
245
+ @property
246
+ def provider(self) -> str:
247
+ """Provider this optimizer is for."""
248
+ return "google"
249
+
250
+ @property
251
+ def strategy(self) -> CacheStrategy:
252
+ """The caching strategy this optimizer uses."""
253
+ return CacheStrategy.CACHED_CONTENT
254
+
255
+ def optimize(
256
+ self,
257
+ messages: list[dict[str, Any]],
258
+ context: OptimizationContext,
259
+ config: CacheConfig | None = None,
260
+ ) -> CacheResult:
261
+ """
262
+ Optimize messages for Google caching.
263
+
264
+ This method:
265
+ 1. Analyzes content for cacheability
266
+ 2. Checks for existing reusable caches
267
+ 3. Returns optimization metrics and recommendations
268
+
269
+ Args:
270
+ messages: The messages to optimize
271
+ context: Optimization context
272
+ config: Optional configuration override
273
+
274
+ Returns:
275
+ CacheResult with analysis and cache information
276
+ """
277
+
278
+ # Extract cacheable content (system messages + static context)
279
+ cacheable_content = self._extract_cacheable_content(messages)
280
+ content_hash = self._compute_prefix_hash(cacheable_content)
281
+
282
+ # Estimate tokens
283
+ total_tokens = self._count_tokens_estimate(self._messages_to_text(messages))
284
+ cacheable_tokens = self._count_tokens_estimate(cacheable_content)
285
+
286
+ # Check for existing cache
287
+ existing_cache = self.get_reusable_cache(content_hash)
288
+
289
+ # Build metrics
290
+ metrics = CacheMetrics(
291
+ stable_prefix_tokens=cacheable_tokens,
292
+ stable_prefix_hash=content_hash,
293
+ prefix_changed_from_previous=(
294
+ context.previous_prefix_hash != content_hash
295
+ if context.previous_prefix_hash
296
+ else False
297
+ ),
298
+ previous_prefix_hash=context.previous_prefix_hash,
299
+ cacheable_tokens=cacheable_tokens,
300
+ non_cacheable_tokens=total_tokens - cacheable_tokens,
301
+ )
302
+
303
+ # Calculate estimated savings
304
+ if cacheable_tokens >= GOOGLE_MIN_CACHE_TOKENS:
305
+ metrics.estimated_savings_percent = GOOGLE_CACHE_DISCOUNT * 100
306
+ metrics.estimated_cache_hit = existing_cache is not None
307
+
308
+ # Add cache info if available
309
+ if existing_cache:
310
+ metrics.provider_cache_id = existing_cache.cache_id
311
+ metrics.cache_ttl_remaining_seconds = existing_cache.ttl_remaining_seconds
312
+ self._caches_reused += 1
313
+
314
+ # Build warnings
315
+ warnings: list[str] = []
316
+ if cacheable_tokens < GOOGLE_MIN_CACHE_TOKENS:
317
+ shortfall = GOOGLE_MIN_CACHE_TOKENS - cacheable_tokens
318
+ warnings.append(
319
+ f"Content has {cacheable_tokens:,} tokens, needs {shortfall:,} more "
320
+ f"to meet Google's 32K minimum for caching"
321
+ )
322
+
323
+ if existing_cache and existing_cache.ttl_remaining_seconds < 300:
324
+ warnings.append(
325
+ f"Existing cache expires in {existing_cache.ttl_remaining_seconds}s - "
326
+ f"consider extending TTL"
327
+ )
328
+
329
+ # Record metrics
330
+ self._record_metrics(metrics)
331
+ self._previous_prefix_hash = content_hash
332
+
333
+ # Build transforms applied list
334
+ transforms: list[str] = ["content_analysis"]
335
+ if existing_cache:
336
+ transforms.append("cache_lookup")
337
+
338
+ return CacheResult(
339
+ messages=messages, # Messages unchanged - caching is separate
340
+ semantic_cache_hit=False,
341
+ metrics=metrics,
342
+ tokens_before=total_tokens,
343
+ tokens_after=total_tokens, # Token count doesn't change
344
+ transforms_applied=transforms,
345
+ warnings=warnings,
346
+ )
347
+
348
+ def analyze_cacheability(
349
+ self,
350
+ messages: list[dict[str, Any]],
351
+ context: OptimizationContext,
352
+ ) -> CacheabilityAnalysis:
353
+ """
354
+ Analyze content for Google cache suitability.
355
+
356
+ Provides detailed analysis including:
357
+ - Whether content meets minimum requirements
358
+ - Estimated costs and savings
359
+ - Recommendations for improving cacheability
360
+
361
+ Args:
362
+ messages: Messages to analyze
363
+ context: Optimization context
364
+
365
+ Returns:
366
+ CacheabilityAnalysis with detailed information
367
+ """
368
+ cacheable_content = self._extract_cacheable_content(messages)
369
+ content_hash = self._compute_prefix_hash(cacheable_content)
370
+
371
+ total_tokens = self._count_tokens_estimate(self._messages_to_text(messages))
372
+ cacheable_tokens = self._count_tokens_estimate(cacheable_content)
373
+
374
+ is_cacheable = cacheable_tokens >= GOOGLE_MIN_CACHE_TOKENS
375
+ tokens_below_minimum = max(0, GOOGLE_MIN_CACHE_TOKENS - cacheable_tokens)
376
+
377
+ # Build recommendations
378
+ recommendations: list[str] = []
379
+
380
+ if not is_cacheable:
381
+ recommendations.append(
382
+ f"Add {tokens_below_minimum:,} more tokens to static content to enable caching"
383
+ )
384
+ recommendations.append(
385
+ "Consider adding detailed examples or documentation to system prompt"
386
+ )
387
+ else:
388
+ recommendations.append(
389
+ "Content is cacheable. Create cache with google-generativeai SDK"
390
+ )
391
+
392
+ # Storage cost estimation (rough - actual pricing varies)
393
+ # Assuming ~$0.001 per 1000 tokens per hour (simplified)
394
+ hourly_cost = (cacheable_tokens / 1000) * 0.001
395
+ recommendations.append(f"Estimated storage cost: ~${hourly_cost:.4f}/hour")
396
+
397
+ # Break-even analysis
398
+ if hourly_cost > 0:
399
+ # Assuming $0.01 per 1000 input tokens base price
400
+ base_cost_per_request = (cacheable_tokens / 1000) * 0.01
401
+ savings_per_request = base_cost_per_request * GOOGLE_CACHE_DISCOUNT
402
+ break_even_requests = (
403
+ hourly_cost / savings_per_request if savings_per_request > 0 else float("inf")
404
+ )
405
+ recommendations.append(f"Break-even: ~{int(break_even_requests)} requests/hour")
406
+
407
+ return CacheabilityAnalysis(
408
+ is_cacheable=is_cacheable,
409
+ total_tokens=total_tokens,
410
+ cacheable_tokens=cacheable_tokens,
411
+ tokens_below_minimum=tokens_below_minimum,
412
+ estimated_savings_per_request_percent=(
413
+ GOOGLE_CACHE_DISCOUNT * 100 if is_cacheable else 0.0
414
+ ),
415
+ recommendations=recommendations,
416
+ content_hash=content_hash,
417
+ )
418
+
419
+ # -------------------------------------------------------------------------
420
+ # Cache Registry Management
421
+ # -------------------------------------------------------------------------
422
+
423
+ def register_cache(
424
+ self,
425
+ cache_id: str,
426
+ content_hash: str,
427
+ token_count: int,
428
+ expires_at: datetime,
429
+ *,
430
+ model: str | None = None,
431
+ display_name: str | None = None,
432
+ metadata: dict[str, Any] | None = None,
433
+ ) -> CachedContentInfo:
434
+ """
435
+ Register a cache after creating it via Google's API.
436
+
437
+ Call this after successfully creating a CachedContent resource
438
+ to enable cache reuse detection.
439
+
440
+ Args:
441
+ cache_id: Google's cache identifier (e.g., "cachedContents/xyz")
442
+ content_hash: Hash of cached content (from optimize() metrics)
443
+ token_count: Number of tokens in cached content
444
+ expires_at: When the cache expires
445
+ model: Optional model the cache was created for
446
+ display_name: Optional display name
447
+ metadata: Optional additional metadata
448
+
449
+ Returns:
450
+ CachedContentInfo for the registered cache
451
+
452
+ Example:
453
+ # After creating cache via Google SDK
454
+ cached_content = genai.caching.CachedContent.create(...)
455
+
456
+ info = optimizer.register_cache(
457
+ cache_id=cached_content.name,
458
+ content_hash=result.metrics.stable_prefix_hash,
459
+ token_count=result.metrics.cacheable_tokens,
460
+ expires_at=datetime.now() + timedelta(hours=1),
461
+ )
462
+ """
463
+ # Remove any existing cache with same content hash
464
+ old_cache = self._cache_registry.get(content_hash)
465
+ if old_cache:
466
+ self._cache_by_id.pop(old_cache.cache_id, None)
467
+ logger.debug(
468
+ f"Replacing existing cache for hash {content_hash}: "
469
+ f"{old_cache.cache_id} -> {cache_id}"
470
+ )
471
+
472
+ cache_info = CachedContentInfo(
473
+ cache_id=cache_id,
474
+ content_hash=content_hash,
475
+ created_at=datetime.now(),
476
+ expires_at=expires_at,
477
+ token_count=token_count,
478
+ model=model,
479
+ display_name=display_name,
480
+ metadata=metadata or {},
481
+ )
482
+
483
+ self._cache_registry[content_hash] = cache_info
484
+ self._cache_by_id[cache_id] = cache_info
485
+ self._caches_created += 1
486
+
487
+ logger.info(
488
+ f"Registered cache {cache_id} with {token_count:,} tokens, "
489
+ f"expires in {cache_info.ttl_remaining_seconds}s"
490
+ )
491
+
492
+ return cache_info
493
+
494
+ def get_reusable_cache(
495
+ self,
496
+ content_hash: str,
497
+ *,
498
+ min_ttl_seconds: int = 60,
499
+ ) -> CachedContentInfo | None:
500
+ """
501
+ Check if a reusable cache exists for the given content.
502
+
503
+ Args:
504
+ content_hash: Hash of the content to look up
505
+ min_ttl_seconds: Minimum remaining TTL to consider reusable
506
+
507
+ Returns:
508
+ CachedContentInfo if reusable cache exists, None otherwise
509
+ """
510
+ cache_info = self._cache_registry.get(content_hash)
511
+
512
+ if cache_info is None:
513
+ return None
514
+
515
+ if cache_info.is_expired:
516
+ self._remove_cache_internal(content_hash)
517
+ return None
518
+
519
+ if cache_info.ttl_remaining_seconds < min_ttl_seconds:
520
+ logger.debug(
521
+ f"Cache {cache_info.cache_id} has insufficient TTL "
522
+ f"({cache_info.ttl_remaining_seconds}s < {min_ttl_seconds}s)"
523
+ )
524
+ return None
525
+
526
+ return cache_info
527
+
528
+ def get_cache_by_id(self, cache_id: str) -> CachedContentInfo | None:
529
+ """
530
+ Look up cache information by cache ID.
531
+
532
+ Args:
533
+ cache_id: Google's cache identifier
534
+
535
+ Returns:
536
+ CachedContentInfo if found, None otherwise
537
+ """
538
+ return self._cache_by_id.get(cache_id)
539
+
540
+ def extend_cache_ttl(
541
+ self,
542
+ cache_id: str,
543
+ new_expires_at: datetime,
544
+ ) -> CachedContentInfo | None:
545
+ """
546
+ Update the expiry time for a cache after extending via Google API.
547
+
548
+ Call this after successfully calling update() on the CachedContent
549
+ to extend its TTL.
550
+
551
+ Args:
552
+ cache_id: Google's cache identifier
553
+ new_expires_at: New expiry time
554
+
555
+ Returns:
556
+ Updated CachedContentInfo or None if not found
557
+
558
+ Example:
559
+ # After extending via Google SDK
560
+ cached_content.update(ttl=timedelta(hours=2))
561
+
562
+ optimizer.extend_cache_ttl(
563
+ cache_id=cached_content.name,
564
+ new_expires_at=datetime.now() + timedelta(hours=2),
565
+ )
566
+ """
567
+ cache_info = self._cache_by_id.get(cache_id)
568
+ if cache_info is None:
569
+ logger.warning(f"Cannot extend unknown cache: {cache_id}")
570
+ return None
571
+
572
+ old_expires = cache_info.expires_at
573
+ cache_info.expires_at = new_expires_at
574
+
575
+ logger.info(f"Extended cache {cache_id} TTL from {old_expires} to {new_expires_at}")
576
+
577
+ return cache_info
578
+
579
+ def remove_cache(self, cache_id: str) -> bool:
580
+ """
581
+ Remove a cache from the registry.
582
+
583
+ Call this after deleting the cache via Google API.
584
+
585
+ Args:
586
+ cache_id: Google's cache identifier
587
+
588
+ Returns:
589
+ True if cache was removed, False if not found
590
+ """
591
+ cache_info = self._cache_by_id.get(cache_id)
592
+ if cache_info is None:
593
+ return False
594
+
595
+ self._cache_by_id.pop(cache_id, None)
596
+ self._cache_registry.pop(cache_info.content_hash, None)
597
+
598
+ logger.info(f"Removed cache {cache_id} from registry")
599
+ return True
600
+
601
+ def _remove_cache_internal(self, content_hash: str) -> None:
602
+ """Remove cache by content hash (internal use)."""
603
+ cache_info = self._cache_registry.pop(content_hash, None)
604
+ if cache_info:
605
+ self._cache_by_id.pop(cache_info.cache_id, None)
606
+ self._caches_expired += 1
607
+
608
+ def cleanup_expired_caches(self) -> list[str]:
609
+ """
610
+ Remove all expired caches from the registry.
611
+
612
+ Returns:
613
+ List of removed cache IDs (for user to delete via Google API)
614
+
615
+ Example:
616
+ expired_ids = optimizer.cleanup_expired_caches()
617
+ for cache_id in expired_ids:
618
+ # User deletes via Google SDK
619
+ genai.caching.CachedContent.get(cache_id).delete()
620
+ """
621
+ expired_ids: list[str] = []
622
+
623
+ # Find expired caches
624
+ for content_hash, cache_info in list(self._cache_registry.items()):
625
+ if cache_info.is_expired:
626
+ expired_ids.append(cache_info.cache_id)
627
+ self._remove_cache_internal(content_hash)
628
+
629
+ if expired_ids:
630
+ logger.info(f"Cleaned up {len(expired_ids)} expired caches")
631
+
632
+ return expired_ids
633
+
634
+ def list_caches(
635
+ self,
636
+ *,
637
+ include_expired: bool = False,
638
+ ) -> list[CachedContentInfo]:
639
+ """
640
+ List all registered caches.
641
+
642
+ Args:
643
+ include_expired: Whether to include expired caches
644
+
645
+ Returns:
646
+ List of CachedContentInfo objects
647
+ """
648
+ caches = list(self._cache_registry.values())
649
+
650
+ if not include_expired:
651
+ caches = [c for c in caches if not c.is_expired]
652
+
653
+ # Sort by expiry time
654
+ caches.sort(key=lambda c: c.expires_at)
655
+
656
+ return caches
657
+
658
+ def get_statistics(self) -> dict[str, Any]:
659
+ """
660
+ Get cache usage statistics.
661
+
662
+ Returns:
663
+ Dictionary with cache statistics
664
+ """
665
+ active_caches = [c for c in self._cache_registry.values() if not c.is_expired]
666
+ total_cached_tokens = sum(c.token_count for c in active_caches)
667
+
668
+ return {
669
+ "active_caches": len(active_caches),
670
+ "total_cached_tokens": total_cached_tokens,
671
+ "caches_created": self._caches_created,
672
+ "caches_reused": self._caches_reused,
673
+ "caches_expired": self._caches_expired,
674
+ "cache_hit_rate": (
675
+ self._caches_reused / (self._caches_reused + self._caches_created)
676
+ if (self._caches_reused + self._caches_created) > 0
677
+ else 0.0
678
+ ),
679
+ }
680
+
681
+ # -------------------------------------------------------------------------
682
+ # Cache Creation Helpers
683
+ # -------------------------------------------------------------------------
684
+
685
+ def prepare_cache_creation(
686
+ self,
687
+ messages: list[dict[str, Any]],
688
+ context: OptimizationContext,
689
+ ttl_seconds: int = GOOGLE_DEFAULT_TTL_SECONDS,
690
+ ) -> dict[str, Any] | None:
691
+ """
692
+ Prepare parameters for creating a Google cache.
693
+
694
+ Returns a dictionary with suggested parameters for
695
+ genai.caching.CachedContent.create().
696
+
697
+ Args:
698
+ messages: Messages to cache
699
+ context: Optimization context
700
+ ttl_seconds: Desired TTL in seconds
701
+
702
+ Returns:
703
+ Dictionary with cache creation parameters, or None if not cacheable
704
+
705
+ Example:
706
+ params = optimizer.prepare_cache_creation(messages, context)
707
+ if params:
708
+ cached_content = genai.caching.CachedContent.create(**params)
709
+ """
710
+ analysis = self.analyze_cacheability(messages, context)
711
+
712
+ if not analysis.is_cacheable:
713
+ logger.debug(
714
+ f"Content not cacheable: {analysis.tokens_below_minimum} tokens below minimum"
715
+ )
716
+ return None
717
+
718
+ cacheable_content = self._extract_cacheable_content(messages)
719
+
720
+ return {
721
+ "contents": cacheable_content,
722
+ "ttl": timedelta(seconds=min(ttl_seconds, GOOGLE_MAX_TTL_SECONDS)),
723
+ "display_name": f"headroom-cache-{analysis.content_hash[:8]}",
724
+ "_headroom_metadata": {
725
+ "content_hash": analysis.content_hash,
726
+ "token_count": analysis.cacheable_tokens,
727
+ "created_by": "headroom",
728
+ },
729
+ }
730
+
731
+ def build_request_with_cache(
732
+ self,
733
+ messages: list[dict[str, Any]],
734
+ cache_id: str,
735
+ ) -> dict[str, Any]:
736
+ """
737
+ Build request parameters using an existing cache.
738
+
739
+ Returns a dictionary suggesting how to structure the API call
740
+ when using cached content.
741
+
742
+ Args:
743
+ messages: Full message list
744
+ cache_id: Cache ID to use
745
+
746
+ Returns:
747
+ Dictionary with suggested request structure
748
+ """
749
+ # Extract only the non-cached (dynamic) content
750
+ dynamic_messages = self._extract_dynamic_messages(messages)
751
+
752
+ return {
753
+ "cached_content": cache_id,
754
+ "contents": dynamic_messages,
755
+ "_headroom_note": (
756
+ "Use cached_content parameter with GenerativeModel to leverage the cache"
757
+ ),
758
+ }
759
+
760
+ # -------------------------------------------------------------------------
761
+ # Content Extraction Helpers
762
+ # -------------------------------------------------------------------------
763
+
764
+ def _extract_cacheable_content(self, messages: list[dict[str, Any]]) -> str:
765
+ """
766
+ Extract content suitable for caching.
767
+
768
+ Includes:
769
+ - System messages
770
+ - Static context (tools, examples)
771
+
772
+ Excludes:
773
+ - Recent conversation turns
774
+ - Dynamic content (dates, user-specific data)
775
+ """
776
+ cacheable_parts: list[str] = []
777
+
778
+ for msg in messages:
779
+ role = msg.get("role", "")
780
+
781
+ # System messages are always cacheable
782
+ if role == "system":
783
+ content = self._extract_message_content(msg)
784
+ if content:
785
+ cacheable_parts.append(content)
786
+
787
+ # First few user/assistant turns with examples might be cacheable
788
+ # but we're conservative - only include system by default
789
+
790
+ return "\n\n".join(cacheable_parts)
791
+
792
+ def _extract_dynamic_messages(
793
+ self,
794
+ messages: list[dict[str, Any]],
795
+ ) -> list[dict[str, Any]]:
796
+ """
797
+ Extract messages that should NOT be cached.
798
+
799
+ These are the conversation turns after the cached prefix.
800
+ """
801
+ dynamic: list[dict[str, Any]] = []
802
+
803
+ for msg in messages:
804
+ if msg.get("role") != "system":
805
+ dynamic.append(msg)
806
+
807
+ return dynamic
808
+
809
+ def _extract_message_content(self, message: dict[str, Any]) -> str:
810
+ """Extract text content from a message."""
811
+ content = message.get("content", "")
812
+
813
+ if isinstance(content, str):
814
+ return content
815
+
816
+ if isinstance(content, list):
817
+ parts = []
818
+ for block in content:
819
+ if isinstance(block, dict):
820
+ if block.get("type") == "text":
821
+ parts.append(block.get("text", ""))
822
+ elif isinstance(block, str):
823
+ parts.append(block)
824
+ return "\n".join(parts)
825
+
826
+ return ""
827
+
828
+ def _messages_to_text(self, messages: list[dict[str, Any]]) -> str:
829
+ """Convert all messages to text for token counting."""
830
+ parts = []
831
+ for msg in messages:
832
+ content = self._extract_message_content(msg)
833
+ if content:
834
+ parts.append(f"{msg.get('role', 'unknown')}: {content}")
835
+ return "\n\n".join(parts)
836
+
837
+ # -------------------------------------------------------------------------
838
+ # Serialization for Persistence
839
+ # -------------------------------------------------------------------------
840
+
841
+ def export_cache_registry(self) -> list[dict[str, Any]]:
842
+ """
843
+ Export cache registry for persistence.
844
+
845
+ Returns:
846
+ List of cache info dictionaries
847
+ """
848
+ return [info.to_dict() for info in self._cache_registry.values()]
849
+
850
+ def import_cache_registry(
851
+ self,
852
+ cache_data: list[dict[str, Any]],
853
+ *,
854
+ skip_expired: bool = True,
855
+ ) -> int:
856
+ """
857
+ Import caches from persisted data.
858
+
859
+ Args:
860
+ cache_data: List of cache info dictionaries
861
+ skip_expired: Whether to skip already-expired caches
862
+
863
+ Returns:
864
+ Number of caches imported
865
+ """
866
+ imported = 0
867
+
868
+ for data in cache_data:
869
+ try:
870
+ cache_info = CachedContentInfo.from_dict(data)
871
+
872
+ if skip_expired and cache_info.is_expired:
873
+ continue
874
+
875
+ self._cache_registry[cache_info.content_hash] = cache_info
876
+ self._cache_by_id[cache_info.cache_id] = cache_info
877
+ imported += 1
878
+
879
+ except (KeyError, ValueError) as e:
880
+ logger.warning(f"Failed to import cache entry: {e}")
881
+ continue
882
+
883
+ logger.info(f"Imported {imported} caches from persisted data")
884
+ return imported