headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,517 @@
1
+ """
2
+ Anthropic Cache Optimizer.
3
+
4
+ Implements cache optimization for Anthropic's explicit cache_control mechanism.
5
+ Anthropic uses ephemeral cache breakpoints to mark content that should be cached.
6
+
7
+ Anthropic Caching Characteristics:
8
+ - Explicit cache_control: {"type": "ephemeral"} blocks
9
+ - Minimum 1024 tokens for caching to be effective
10
+ - Maximum 4 cache breakpoints per request
11
+ - 5-minute TTL (extended on cache hit)
12
+ - Cost: 25% MORE to write to cache, 90% LESS to read
13
+
14
+ Usage:
15
+ from headroom.cache import AnthropicCacheOptimizer, OptimizationContext
16
+
17
+ optimizer = AnthropicCacheOptimizer()
18
+ context = OptimizationContext(provider="anthropic", model="claude-3-opus")
19
+
20
+ result = optimizer.optimize(messages, context)
21
+ # result.messages now contains cache_control blocks
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import copy
27
+ import re
28
+ from dataclasses import dataclass, field
29
+ from typing import Any
30
+
31
+ from .base import (
32
+ BaseCacheOptimizer,
33
+ BreakpointLocation,
34
+ CacheBreakpoint,
35
+ CacheConfig,
36
+ CacheMetrics,
37
+ CacheResult,
38
+ CacheStrategy,
39
+ OptimizationContext,
40
+ )
41
+
42
+ # Anthropic-specific constants
43
+ ANTHROPIC_MIN_CACHEABLE_TOKENS = 1024
44
+ ANTHROPIC_MAX_BREAKPOINTS = 4
45
+ ANTHROPIC_CACHE_TTL_SECONDS = 300 # 5 minutes
46
+ ANTHROPIC_WRITE_COST_MULTIPLIER = 1.25 # 25% more to write
47
+ ANTHROPIC_READ_COST_MULTIPLIER = 0.10 # 90% less to read
48
+
49
+
50
+ @dataclass
51
+ class ContentSection:
52
+ """Represents a section of content that may be cacheable."""
53
+
54
+ content: str | list[dict[str, Any]]
55
+ section_type: str # "system", "tools", "examples", "user", "assistant"
56
+ message_index: int
57
+ content_index: int | None = None
58
+ token_count: int = 0
59
+ is_cacheable: bool = False
60
+ reason: str = ""
61
+
62
+
63
+ @dataclass
64
+ class BreakpointPlan:
65
+ """Plan for where to insert cache breakpoints."""
66
+
67
+ breakpoints: list[CacheBreakpoint] = field(default_factory=list)
68
+ total_cacheable_tokens: int = 0
69
+ estimated_savings_percent: float = 0.0
70
+ warnings: list[str] = field(default_factory=list)
71
+
72
+
73
+ class AnthropicCacheOptimizer(BaseCacheOptimizer):
74
+ """
75
+ Cache optimizer for Anthropic's explicit cache_control mechanism.
76
+
77
+ This optimizer analyzes messages and inserts cache_control blocks at
78
+ optimal positions to maximize cache hit rates and minimize costs.
79
+
80
+ Key features:
81
+ - Detects cacheable sections (system prompt, tools, few-shot examples)
82
+ - Respects Anthropic's 1024 token minimum and 4 breakpoint maximum
83
+ - Stabilizes prefixes by moving dates and normalizing whitespace
84
+ - Tracks metrics for monitoring and debugging
85
+ """
86
+
87
+ def __init__(self, config: CacheConfig | None = None):
88
+ super().__init__(config)
89
+ if self.config.min_cacheable_tokens < ANTHROPIC_MIN_CACHEABLE_TOKENS:
90
+ self.config.min_cacheable_tokens = ANTHROPIC_MIN_CACHEABLE_TOKENS
91
+ if self.config.max_breakpoints > ANTHROPIC_MAX_BREAKPOINTS:
92
+ self.config.max_breakpoints = ANTHROPIC_MAX_BREAKPOINTS
93
+
94
+ @property
95
+ def name(self) -> str:
96
+ return "anthropic-cache-optimizer"
97
+
98
+ @property
99
+ def provider(self) -> str:
100
+ return "anthropic"
101
+
102
+ @property
103
+ def strategy(self) -> CacheStrategy:
104
+ return CacheStrategy.EXPLICIT_BREAKPOINTS
105
+
106
+ def optimize(
107
+ self,
108
+ messages: list[dict[str, Any]],
109
+ context: OptimizationContext,
110
+ config: CacheConfig | None = None,
111
+ ) -> CacheResult:
112
+ """
113
+ Optimize messages for Anthropic's cache.
114
+
115
+ Steps:
116
+ 1. Analyze messages to identify cacheable sections
117
+ 2. Stabilize the prefix (moves dates, normalizes whitespace)
118
+ 3. Plan breakpoint placement
119
+ 4. Insert cache_control blocks at optimal positions
120
+ 5. Record metrics for monitoring
121
+ """
122
+ effective_config = config or self.config
123
+
124
+ if not effective_config.enabled:
125
+ return CacheResult(
126
+ messages=messages,
127
+ metrics=CacheMetrics(),
128
+ transforms_applied=[],
129
+ )
130
+
131
+ optimized_messages = copy.deepcopy(messages)
132
+ transforms_applied: list[str] = []
133
+ warnings: list[str] = []
134
+
135
+ # Step 1: Analyze content sections
136
+ sections = self._analyze_sections(optimized_messages)
137
+
138
+ # Step 2: Stabilize prefix
139
+ optimized_messages, stabilization_applied = self._stabilize_prefix(
140
+ optimized_messages, effective_config
141
+ )
142
+ transforms_applied.extend(stabilization_applied)
143
+
144
+ # Step 3: Plan breakpoint placement
145
+ plan = self._plan_breakpoints(sections, effective_config)
146
+ warnings.extend(plan.warnings)
147
+
148
+ # Step 4: Insert cache_control blocks
149
+ optimized_messages = self._insert_breakpoints(optimized_messages, plan.breakpoints)
150
+ if plan.breakpoints:
151
+ transforms_applied.append(f"inserted_{len(plan.breakpoints)}_cache_breakpoints")
152
+
153
+ # Step 5: Compute metrics
154
+ prefix_content = self._extract_cacheable_content(optimized_messages)
155
+ prefix_hash = self._compute_prefix_hash(prefix_content)
156
+
157
+ cache_hit = False
158
+ if context.previous_prefix_hash:
159
+ cache_hit = prefix_hash == context.previous_prefix_hash
160
+ elif self._previous_prefix_hash:
161
+ cache_hit = prefix_hash == self._previous_prefix_hash
162
+
163
+ total_tokens = sum(s.token_count for s in sections)
164
+ cacheable_tokens = plan.total_cacheable_tokens
165
+
166
+ metrics = CacheMetrics(
167
+ stable_prefix_tokens=cacheable_tokens,
168
+ stable_prefix_hash=prefix_hash,
169
+ breakpoints_inserted=len(plan.breakpoints),
170
+ breakpoint_locations=plan.breakpoints,
171
+ prefix_changed_from_previous=not cache_hit,
172
+ previous_prefix_hash=self._previous_prefix_hash,
173
+ estimated_cache_hit=cache_hit,
174
+ estimated_savings_percent=plan.estimated_savings_percent if cache_hit else 0.0,
175
+ cacheable_tokens=cacheable_tokens,
176
+ non_cacheable_tokens=total_tokens - cacheable_tokens,
177
+ cache_ttl_remaining_seconds=ANTHROPIC_CACHE_TTL_SECONDS if cache_hit else None,
178
+ )
179
+
180
+ self._previous_prefix_hash = prefix_hash
181
+ self._record_metrics(metrics)
182
+
183
+ return CacheResult(
184
+ messages=optimized_messages,
185
+ metrics=metrics,
186
+ tokens_before=total_tokens,
187
+ tokens_after=total_tokens,
188
+ transforms_applied=transforms_applied,
189
+ warnings=warnings,
190
+ )
191
+
192
+ def _analyze_sections(self, messages: list[dict[str, Any]]) -> list[ContentSection]:
193
+ """Analyze messages to identify distinct content sections."""
194
+ sections: list[ContentSection] = []
195
+
196
+ for idx, message in enumerate(messages):
197
+ role = message.get("role", "")
198
+ content = message.get("content", "")
199
+
200
+ if role == "system":
201
+ section_type = "system"
202
+ elif role == "user":
203
+ section_type = (
204
+ "examples" if self._looks_like_example(message, messages, idx) else "user"
205
+ )
206
+ elif role == "assistant":
207
+ section_type = (
208
+ "examples" if self._looks_like_example(message, messages, idx) else "assistant"
209
+ )
210
+ else:
211
+ section_type = role
212
+
213
+ # Handle tools
214
+ if "tools" in message:
215
+ tool_section = ContentSection(
216
+ content=message["tools"],
217
+ section_type="tools",
218
+ message_index=idx,
219
+ token_count=self._estimate_tools_tokens(message["tools"]),
220
+ is_cacheable=True,
221
+ reason="Tool definitions are static and cacheable",
222
+ )
223
+ sections.append(tool_section)
224
+
225
+ if isinstance(content, str):
226
+ token_count = self._count_tokens_estimate(content)
227
+ is_cacheable, reason = self._assess_cacheability(section_type, token_count, content)
228
+ sections.append(
229
+ ContentSection(
230
+ content=content,
231
+ section_type=section_type,
232
+ message_index=idx,
233
+ token_count=token_count,
234
+ is_cacheable=is_cacheable,
235
+ reason=reason,
236
+ )
237
+ )
238
+
239
+ elif isinstance(content, list):
240
+ for block_idx, block in enumerate(content):
241
+ if isinstance(block, dict) and block.get("type") == "text":
242
+ text = block.get("text", "")
243
+ token_count = self._count_tokens_estimate(text)
244
+ is_cacheable, reason = self._assess_cacheability(
245
+ section_type, token_count, text
246
+ )
247
+ sections.append(
248
+ ContentSection(
249
+ content=block, # type: ignore[arg-type]
250
+ section_type=section_type,
251
+ message_index=idx,
252
+ content_index=block_idx,
253
+ token_count=token_count,
254
+ is_cacheable=is_cacheable,
255
+ reason=reason,
256
+ )
257
+ )
258
+
259
+ return sections
260
+
261
+ def _assess_cacheability(
262
+ self, section_type: str, token_count: int, content: str
263
+ ) -> tuple[bool, str]:
264
+ """Assess whether a section is cacheable."""
265
+ if token_count < self.config.min_cacheable_tokens:
266
+ return (
267
+ False,
268
+ f"Below minimum tokens ({token_count} < {self.config.min_cacheable_tokens})",
269
+ )
270
+
271
+ if section_type == "system":
272
+ return True, "System prompts are highly cacheable"
273
+ if section_type == "tools":
274
+ return True, "Tool definitions are static and cacheable"
275
+ if section_type == "examples":
276
+ return True, "Few-shot examples are typically static"
277
+ if self._has_dynamic_content(content):
278
+ return False, "Contains dynamic content (dates, times, etc.)"
279
+ if section_type == "user":
280
+ return False, "User messages are typically dynamic"
281
+
282
+ return True, "Content is large enough for caching"
283
+
284
+ def _has_dynamic_content(self, content: str) -> bool:
285
+ """Check if content has dynamic elements."""
286
+ for pattern in self.config.date_patterns:
287
+ if re.search(pattern, content):
288
+ return True
289
+ return False
290
+
291
+ def _looks_like_example(
292
+ self,
293
+ message: dict[str, Any],
294
+ messages: list[dict[str, Any]],
295
+ idx: int,
296
+ ) -> bool:
297
+ """Determine if a message looks like a few-shot example."""
298
+ system_idx = -1
299
+ for i, msg in enumerate(messages):
300
+ if msg.get("role") == "system":
301
+ system_idx = i
302
+ break
303
+
304
+ if system_idx >= 0 and idx <= system_idx + 4:
305
+ role = message.get("role")
306
+ if role == "user" and idx + 1 < len(messages):
307
+ if messages[idx + 1].get("role") == "assistant":
308
+ return True
309
+ elif role == "assistant" and idx > 0:
310
+ if messages[idx - 1].get("role") == "user":
311
+ return True
312
+
313
+ content = message.get("content", "")
314
+ if isinstance(content, str):
315
+ example_markers = ["example:", "for example", "e.g.", "sample:"]
316
+ return any(marker in content.lower() for marker in example_markers)
317
+
318
+ return False
319
+
320
+ def _estimate_tools_tokens(self, tools: Any) -> int:
321
+ """Estimate token count for tool definitions."""
322
+ import json
323
+
324
+ try:
325
+ return self._count_tokens_estimate(json.dumps(tools))
326
+ except (TypeError, ValueError):
327
+ return 0
328
+
329
+ def _stabilize_prefix(
330
+ self,
331
+ messages: list[dict[str, Any]],
332
+ config: CacheConfig,
333
+ ) -> tuple[list[dict[str, Any]], list[str]]:
334
+ """Stabilize the prefix by moving dynamic content."""
335
+ transforms: list[str] = []
336
+
337
+ for message in messages:
338
+ if message.get("role") != "system":
339
+ continue
340
+
341
+ content = message.get("content", "")
342
+ if isinstance(content, str):
343
+ new_content, applied = self._stabilize_text(content, config)
344
+ if new_content != content:
345
+ message["content"] = new_content
346
+ transforms.extend(applied)
347
+
348
+ elif isinstance(content, list):
349
+ for block in content:
350
+ if isinstance(block, dict) and block.get("type") == "text":
351
+ text = block.get("text", "")
352
+ new_text, applied = self._stabilize_text(text, config)
353
+ if new_text != text:
354
+ block["text"] = new_text
355
+ transforms.extend(applied)
356
+
357
+ return messages, transforms
358
+
359
+ def _stabilize_text(self, text: str, config: CacheConfig) -> tuple[str, list[str]]:
360
+ """Stabilize a text string."""
361
+ transforms: list[str] = []
362
+ result = text
363
+
364
+ extracted_dates: list[str] = []
365
+ for pattern in config.date_patterns:
366
+ matches = re.findall(pattern, result)
367
+ if matches:
368
+ extracted_dates.extend(matches)
369
+ result = re.sub(pattern, "", result)
370
+ transforms.append("extracted_dates")
371
+
372
+ if config.normalize_whitespace:
373
+ new_result = re.sub(r"[ \t]+", " ", result)
374
+ if new_result != result:
375
+ result = new_result
376
+ transforms.append("normalized_spaces")
377
+
378
+ if config.collapse_blank_lines:
379
+ new_result = re.sub(r"\n{3,}", "\n\n", result)
380
+ if new_result != result:
381
+ result = new_result
382
+ transforms.append("collapsed_blank_lines")
383
+
384
+ result = result.strip()
385
+
386
+ if extracted_dates:
387
+ result = result + config.dynamic_separator + " ".join(extracted_dates)
388
+
389
+ return result, list(set(transforms))
390
+
391
+ def _plan_breakpoints(
392
+ self,
393
+ sections: list[ContentSection],
394
+ config: CacheConfig,
395
+ ) -> BreakpointPlan:
396
+ """Plan where to place cache breakpoints."""
397
+ plan = BreakpointPlan()
398
+
399
+ cacheable = [s for s in sections if s.is_cacheable]
400
+ if not cacheable:
401
+ plan.warnings.append("No sections meet caching requirements")
402
+ return plan
403
+
404
+ priority_order = {"system": 0, "tools": 1, "examples": 2}
405
+ cacheable.sort(key=lambda s: priority_order.get(s.section_type, 3))
406
+
407
+ selected: list[ContentSection] = []
408
+ accumulated_tokens = 0
409
+
410
+ for section in cacheable:
411
+ if len(selected) >= config.max_breakpoints:
412
+ plan.warnings.append(f"Reached maximum breakpoints ({config.max_breakpoints})")
413
+ break
414
+
415
+ selected.append(section)
416
+ accumulated_tokens += section.token_count
417
+
418
+ for section in selected:
419
+ location = self._section_type_to_location(section.section_type)
420
+ breakpoint = CacheBreakpoint(
421
+ message_index=section.message_index,
422
+ location=location,
423
+ content_index=section.content_index,
424
+ tokens_at_breakpoint=section.token_count,
425
+ reason=section.reason,
426
+ )
427
+ plan.breakpoints.append(breakpoint)
428
+
429
+ plan.total_cacheable_tokens = accumulated_tokens
430
+ if accumulated_tokens > 0:
431
+ plan.estimated_savings_percent = 90.0
432
+
433
+ return plan
434
+
435
+ def _section_type_to_location(self, section_type: str) -> BreakpointLocation:
436
+ """Convert section type to breakpoint location enum."""
437
+ mapping = {
438
+ "system": BreakpointLocation.AFTER_SYSTEM,
439
+ "tools": BreakpointLocation.AFTER_TOOLS,
440
+ "examples": BreakpointLocation.AFTER_EXAMPLES,
441
+ }
442
+ return mapping.get(section_type, BreakpointLocation.CUSTOM)
443
+
444
+ def _insert_breakpoints(
445
+ self,
446
+ messages: list[dict[str, Any]],
447
+ breakpoints: list[CacheBreakpoint],
448
+ ) -> list[dict[str, Any]]:
449
+ """Insert cache_control blocks at specified positions."""
450
+ for bp in breakpoints:
451
+ if bp.message_index >= len(messages):
452
+ continue
453
+
454
+ message = messages[bp.message_index]
455
+ content = message.get("content", "")
456
+
457
+ if isinstance(content, str):
458
+ message["content"] = [
459
+ {
460
+ "type": "text",
461
+ "text": content,
462
+ "cache_control": {"type": "ephemeral"},
463
+ }
464
+ ]
465
+ elif isinstance(content, list):
466
+ if bp.content_index is not None and bp.content_index < len(content):
467
+ block = content[bp.content_index]
468
+ if isinstance(block, dict):
469
+ block["cache_control"] = {"type": "ephemeral"}
470
+ elif content:
471
+ last_block = content[-1]
472
+ if isinstance(last_block, dict):
473
+ last_block["cache_control"] = {"type": "ephemeral"}
474
+
475
+ return messages
476
+
477
+ def _extract_cacheable_content(self, messages: list[dict[str, Any]]) -> str:
478
+ """Extract content that has cache_control markers for hashing."""
479
+ parts: list[str] = []
480
+
481
+ for message in messages:
482
+ content = message.get("content", "")
483
+ if isinstance(content, list):
484
+ for block in content:
485
+ if isinstance(block, dict) and "cache_control" in block:
486
+ text = block.get("text", "")
487
+ if text:
488
+ parts.append(text)
489
+ elif isinstance(content, str) and message.get("role") == "system":
490
+ parts.append(content)
491
+
492
+ return "\n".join(parts)
493
+
494
+ def estimate_savings(
495
+ self,
496
+ messages: list[dict[str, Any]],
497
+ context: OptimizationContext,
498
+ ) -> float:
499
+ """Estimate potential savings from caching."""
500
+ sections = self._analyze_sections(messages)
501
+ plan = self._plan_breakpoints(sections, self.config)
502
+
503
+ if plan.total_cacheable_tokens == 0:
504
+ return 0.0
505
+
506
+ total_tokens = sum(s.token_count for s in sections)
507
+ cacheable_ratio = plan.total_cacheable_tokens / total_tokens
508
+ return 90.0 * cacheable_ratio
509
+
510
+ def get_cache_write_cost_multiplier(self) -> float:
511
+ return ANTHROPIC_WRITE_COST_MULTIPLIER
512
+
513
+ def get_cache_read_cost_multiplier(self) -> float:
514
+ return ANTHROPIC_READ_COST_MULTIPLIER
515
+
516
+ def get_cache_ttl_seconds(self) -> int:
517
+ return ANTHROPIC_CACHE_TTL_SECONDS