headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,621 @@
1
+ """Anthropic provider implementation for Headroom SDK.
2
+
3
+ Token counting uses Anthropic's official Token Count API when a client
4
+ is provided. This gives accurate counts for all content types including
5
+ JSON, non-English text, and tool definitions.
6
+
7
+ Usage:
8
+ from anthropic import Anthropic
9
+ from headroom import AnthropicProvider
10
+
11
+ client = Anthropic() # Uses ANTHROPIC_API_KEY env var
12
+ provider = AnthropicProvider(client=client) # Accurate counting via API
13
+
14
+ # Or without client (uses tiktoken approximation - less accurate)
15
+ provider = AnthropicProvider() # Warning: approximate counting
16
+ """
17
+
18
+ import json
19
+ import logging
20
+ import os
21
+ import warnings
22
+ from pathlib import Path
23
+ from typing import Any, cast
24
+
25
+ from .base import Provider, TokenCounter
26
+
27
+ # Check if LiteLLM is available for pricing and context limits
28
+ try:
29
+ import litellm
30
+ from litellm import get_model_info as litellm_get_model_info
31
+
32
+ LITELLM_AVAILABLE = True
33
+ except ImportError:
34
+ LITELLM_AVAILABLE = False
35
+ litellm = None # type: ignore[assignment]
36
+ litellm_get_model_info = None # type: ignore[assignment]
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+ # Warning flags
41
+ _FALLBACK_WARNING_SHOWN = False
42
+ _UNKNOWN_MODEL_WARNINGS: set[str] = set()
43
+
44
+
45
+ # Anthropic model context limits
46
+ # All Claude 3+ models have 200K context
47
+ ANTHROPIC_CONTEXT_LIMITS: dict[str, int] = {
48
+ # Claude 4.5 (Opus 4.5)
49
+ "claude-opus-4-5-20251101": 200000,
50
+ # Claude 4 (Sonnet 4, Haiku 4)
51
+ "claude-sonnet-4-20250514": 200000,
52
+ "claude-haiku-4-5-20251001": 200000,
53
+ # Claude 3.5
54
+ "claude-3-5-sonnet-20241022": 200000,
55
+ "claude-3-5-sonnet-latest": 200000,
56
+ "claude-3-5-haiku-20241022": 200000,
57
+ "claude-3-5-haiku-latest": 200000,
58
+ # Claude 3
59
+ "claude-3-opus-20240229": 200000,
60
+ "claude-3-opus-latest": 200000,
61
+ "claude-3-sonnet-20240229": 200000,
62
+ "claude-3-haiku-20240307": 200000,
63
+ # Claude 2
64
+ "claude-2.1": 200000,
65
+ "claude-2.0": 100000,
66
+ "claude-instant-1.2": 100000,
67
+ }
68
+
69
+ # Fallback pricing - LiteLLM is preferred source
70
+ # NOTE: These are ESTIMATES. Always verify against actual Anthropic billing.
71
+ # Last updated: 2025-01-14
72
+ ANTHROPIC_PRICING: dict[str, dict[str, float]] = {
73
+ # Claude 4.5 (Opus tier pricing)
74
+ "claude-opus-4-5-20251101": {"input": 15.00, "output": 75.00, "cached_input": 1.50},
75
+ # Claude 4 (Sonnet/Haiku tier pricing)
76
+ "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00, "cached_input": 0.30},
77
+ "claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.00, "cached_input": 0.08},
78
+ # Claude 3.5
79
+ "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00, "cached_input": 0.30},
80
+ "claude-3-5-sonnet-latest": {"input": 3.00, "output": 15.00, "cached_input": 0.30},
81
+ "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00, "cached_input": 0.08},
82
+ "claude-3-5-haiku-latest": {"input": 0.80, "output": 4.00, "cached_input": 0.08},
83
+ # Claude 3
84
+ "claude-3-opus-20240229": {"input": 15.00, "output": 75.00, "cached_input": 1.50},
85
+ "claude-3-opus-latest": {"input": 15.00, "output": 75.00, "cached_input": 1.50},
86
+ "claude-3-sonnet-20240229": {"input": 3.00, "output": 15.00, "cached_input": 0.30},
87
+ "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25, "cached_input": 0.03},
88
+ }
89
+
90
+ # Default limits for pattern-based inference
91
+ # Used when a model isn't in the explicit list but matches a known pattern
92
+ _PATTERN_DEFAULTS = {
93
+ "opus": {"context": 200000, "pricing": {"input": 15.00, "output": 75.00, "cached_input": 1.50}},
94
+ "sonnet": {
95
+ "context": 200000,
96
+ "pricing": {"input": 3.00, "output": 15.00, "cached_input": 0.30},
97
+ },
98
+ "haiku": {"context": 200000, "pricing": {"input": 0.80, "output": 4.00, "cached_input": 0.08}},
99
+ }
100
+
101
+ # Fallback for completely unknown Claude models
102
+ _UNKNOWN_CLAUDE_DEFAULT = {
103
+ "context": 200000, # Safe assumption for Claude 3+
104
+ "pricing": {"input": 3.00, "output": 15.00, "cached_input": 0.30}, # Sonnet-tier pricing
105
+ }
106
+
107
+
108
+ def _load_custom_model_config() -> dict[str, Any]:
109
+ """Load custom model configuration from environment or config file.
110
+
111
+ Checks (in order):
112
+ 1. HEADROOM_MODEL_LIMITS environment variable (JSON string or file path)
113
+ 2. ~/.headroom/models.json config file
114
+
115
+ Returns:
116
+ Dict with 'context_limits' and 'pricing' keys.
117
+ """
118
+ config: dict[str, Any] = {"context_limits": {}, "pricing": {}}
119
+
120
+ # Check environment variable
121
+ env_config = os.environ.get("HEADROOM_MODEL_LIMITS", "")
122
+ if env_config:
123
+ try:
124
+ # Check if it's a file path
125
+ if os.path.isfile(env_config):
126
+ with open(env_config) as f:
127
+ loaded = json.load(f)
128
+ else:
129
+ # Try to parse as JSON string
130
+ loaded = json.loads(env_config)
131
+
132
+ # Check for anthropic-specific config, fall back to root level
133
+ anthropic_config = loaded.get("anthropic", loaded)
134
+ if "context_limits" in anthropic_config:
135
+ config["context_limits"].update(anthropic_config["context_limits"])
136
+ if "pricing" in anthropic_config:
137
+ config["pricing"].update(anthropic_config["pricing"])
138
+
139
+ logger.debug(f"Loaded custom model config from HEADROOM_MODEL_LIMITS: {loaded}")
140
+ except (json.JSONDecodeError, OSError) as e:
141
+ logger.warning(f"Failed to load HEADROOM_MODEL_LIMITS: {e}")
142
+
143
+ # Check config file
144
+ config_file = Path.home() / ".headroom" / "models.json"
145
+ if config_file.exists():
146
+ try:
147
+ with open(config_file) as f:
148
+ loaded = json.load(f)
149
+
150
+ # Only load anthropic-specific config
151
+ anthropic_config = loaded.get("anthropic", loaded)
152
+ if "context_limits" in anthropic_config:
153
+ # Don't override env var settings
154
+ for model, limit in anthropic_config["context_limits"].items():
155
+ if model not in config["context_limits"]:
156
+ config["context_limits"][model] = limit
157
+ if "pricing" in anthropic_config:
158
+ for model, pricing in anthropic_config["pricing"].items():
159
+ if model not in config["pricing"]:
160
+ config["pricing"][model] = pricing
161
+
162
+ logger.debug(f"Loaded custom model config from {config_file}")
163
+ except (json.JSONDecodeError, OSError) as e:
164
+ logger.warning(f"Failed to load {config_file}: {e}")
165
+
166
+ return config
167
+
168
+
169
+ def _infer_model_tier(model: str) -> str | None:
170
+ """Infer the model tier (opus/sonnet/haiku) from model name.
171
+
172
+ Uses pattern matching to handle future model releases.
173
+ """
174
+ model_lower = model.lower()
175
+
176
+ # Check for tier keywords in model name
177
+ if "opus" in model_lower:
178
+ return "opus"
179
+ elif "sonnet" in model_lower:
180
+ return "sonnet"
181
+ elif "haiku" in model_lower:
182
+ return "haiku"
183
+
184
+ return None
185
+
186
+
187
+ class AnthropicTokenCounter(TokenCounter):
188
+ """Token counter for Anthropic models.
189
+
190
+ When an Anthropic client is provided, uses the official Token Count API
191
+ (/v1/messages/count_tokens) for accurate counting. This handles:
192
+ - JSON-heavy tool payloads
193
+ - Non-English text
194
+ - Tool definitions and structured content
195
+
196
+ Falls back to tiktoken approximation only when no client is available.
197
+ """
198
+
199
+ def __init__(self, model: str, client: Any = None):
200
+ """Initialize token counter.
201
+
202
+ Args:
203
+ model: Anthropic model name.
204
+ client: Optional anthropic.Anthropic client for API-based counting.
205
+ If not provided, falls back to tiktoken approximation.
206
+ """
207
+ global _FALLBACK_WARNING_SHOWN
208
+
209
+ self.model = model
210
+ self._client = client
211
+ self._encoding: Any = None
212
+ self._use_api = client is not None
213
+
214
+ if not self._use_api and not _FALLBACK_WARNING_SHOWN:
215
+ warnings.warn(
216
+ "AnthropicProvider: No client provided, using tiktoken approximation. "
217
+ "For accurate counting, pass an Anthropic client: "
218
+ "AnthropicProvider(client=Anthropic())",
219
+ UserWarning,
220
+ stacklevel=4,
221
+ )
222
+ _FALLBACK_WARNING_SHOWN = True
223
+
224
+ # Load tiktoken as fallback
225
+ try:
226
+ import tiktoken
227
+
228
+ self._encoding = tiktoken.get_encoding("cl100k_base")
229
+ except ImportError:
230
+ if not self._use_api:
231
+ warnings.warn(
232
+ "tiktoken not installed - token counting will be very approximate. "
233
+ "Install tiktoken or provide an Anthropic client.",
234
+ UserWarning,
235
+ stacklevel=4,
236
+ )
237
+
238
+ def count_text(self, text: str) -> int:
239
+ """Count tokens in text.
240
+
241
+ Note: For single text strings, uses tiktoken approximation even when
242
+ API is available (API only supports full message counting).
243
+ """
244
+ if not text:
245
+ return 0
246
+
247
+ if self._encoding:
248
+ # tiktoken with ~1.1x multiplier for Claude
249
+ base_count = len(self._encoding.encode(text))
250
+ return int(base_count * 1.1)
251
+
252
+ # Character-based fallback
253
+ return max(1, len(text) // 3)
254
+
255
+ def count_message(self, message: dict[str, Any]) -> int:
256
+ """Count tokens in a single message.
257
+
258
+ Uses API if available, otherwise falls back to estimation.
259
+ """
260
+ if self._use_api:
261
+ return self._count_message_via_api(message)
262
+ return self._count_message_estimated(message)
263
+
264
+ def _count_message_via_api(self, message: dict[str, Any]) -> int:
265
+ """Count tokens using Anthropic Token Count API."""
266
+ try:
267
+ # Convert to Anthropic message format if needed
268
+ messages = [self._normalize_message(message)]
269
+ response = self._client.messages.count_tokens(
270
+ model=self.model,
271
+ messages=messages,
272
+ )
273
+ return int(response.input_tokens)
274
+ except Exception:
275
+ # Fall back to estimation on API error
276
+ return self._count_message_estimated(message)
277
+
278
+ def _count_message_estimated(self, message: dict[str, Any]) -> int:
279
+ """Estimate token count without API."""
280
+ tokens = 4 # Role overhead
281
+
282
+ content = message.get("content")
283
+ if isinstance(content, str):
284
+ tokens += self.count_text(content)
285
+ elif isinstance(content, list):
286
+ for block in content:
287
+ if isinstance(block, dict):
288
+ if block.get("type") == "text":
289
+ tokens += self.count_text(block.get("text", ""))
290
+ elif block.get("type") == "tool_use":
291
+ tokens += self.count_text(block.get("name", ""))
292
+ tokens += self.count_text(str(block.get("input", {})))
293
+ elif block.get("type") == "tool_result":
294
+ tokens += self.count_text(str(block.get("content", "")))
295
+
296
+ # OpenAI format tool calls
297
+ if "tool_calls" in message:
298
+ for tool_call in message.get("tool_calls", []):
299
+ if isinstance(tool_call, dict):
300
+ func = tool_call.get("function", {})
301
+ tokens += self.count_text(func.get("name", ""))
302
+ tokens += self.count_text(func.get("arguments", ""))
303
+
304
+ return tokens
305
+
306
+ def _normalize_message(self, message: dict[str, Any]) -> dict[str, Any]:
307
+ """Normalize message to Anthropic format."""
308
+ role = message.get("role", "user")
309
+
310
+ # Map OpenAI roles to Anthropic
311
+ if role == "system":
312
+ # System messages need special handling - count as user for API
313
+ return {"role": "user", "content": message.get("content", "")}
314
+ elif role == "tool":
315
+ # Tool results in OpenAI format
316
+ return {
317
+ "role": "user",
318
+ "content": [
319
+ {
320
+ "type": "tool_result",
321
+ "tool_use_id": message.get("tool_call_id", ""),
322
+ "content": message.get("content", ""),
323
+ }
324
+ ],
325
+ }
326
+
327
+ return {"role": role, "content": message.get("content", "")}
328
+
329
+ def count_messages(self, messages: list[dict[str, Any]]) -> int:
330
+ """Count tokens in a list of messages.
331
+
332
+ Uses the Token Count API for accurate counting when available.
333
+ """
334
+ if self._use_api:
335
+ return self._count_messages_via_api(messages)
336
+ return self._count_messages_estimated(messages)
337
+
338
+ def _count_messages_via_api(self, messages: list[dict[str, Any]]) -> int:
339
+ """Count tokens using Anthropic Token Count API."""
340
+ try:
341
+ # Separate system message (Anthropic handles it differently)
342
+ system_content = None
343
+ api_messages = []
344
+
345
+ for msg in messages:
346
+ if msg.get("role") == "system":
347
+ system_content = msg.get("content", "")
348
+ else:
349
+ api_messages.append(self._normalize_message(msg))
350
+
351
+ # Ensure we have at least one message
352
+ if not api_messages:
353
+ api_messages = [{"role": "user", "content": ""}]
354
+
355
+ kwargs: dict[str, Any] = {
356
+ "model": self.model,
357
+ "messages": api_messages,
358
+ }
359
+ if system_content:
360
+ kwargs["system"] = system_content
361
+
362
+ response = self._client.messages.count_tokens(**kwargs)
363
+ return int(response.input_tokens)
364
+
365
+ except Exception as e:
366
+ # Fall back to estimation on API error
367
+ warnings.warn(
368
+ f"Token Count API failed ({e}), using estimation", UserWarning, stacklevel=3
369
+ )
370
+ return self._count_messages_estimated(messages)
371
+
372
+ def _count_messages_estimated(self, messages: list[dict[str, Any]]) -> int:
373
+ """Estimate token count without API."""
374
+ total = sum(self._count_message_estimated(msg) for msg in messages)
375
+ return total + 3 # Base overhead
376
+
377
+
378
+ class AnthropicProvider(Provider):
379
+ """Provider implementation for Anthropic Claude models.
380
+
381
+ For accurate token counting, provide an Anthropic client:
382
+
383
+ from anthropic import Anthropic
384
+ provider = AnthropicProvider(client=Anthropic())
385
+
386
+ This uses Anthropic's official Token Count API which accurately handles:
387
+ - JSON-heavy tool payloads
388
+ - Non-English text
389
+ - Long system prompts
390
+ - Tool definitions and structured content
391
+
392
+ Without a client, falls back to tiktoken approximation (less accurate).
393
+
394
+ Custom Model Configuration:
395
+ You can configure custom models via environment variable or config file:
396
+
397
+ 1. Environment variable (JSON string):
398
+ export HEADROOM_MODEL_LIMITS='{"context_limits": {"my-model": 200000}}'
399
+
400
+ 2. Environment variable (file path):
401
+ export HEADROOM_MODEL_LIMITS=/path/to/models.json
402
+
403
+ 3. Config file (~/.headroom/models.json):
404
+ {
405
+ "anthropic": {
406
+ "context_limits": {"my-model": 200000},
407
+ "pricing": {"my-model": {"input": 3.0, "output": 15.0}}
408
+ }
409
+ }
410
+ """
411
+
412
+ def __init__(
413
+ self,
414
+ client: Any = None,
415
+ context_limits: dict[str, int] | None = None,
416
+ ):
417
+ """Initialize Anthropic provider.
418
+
419
+ Args:
420
+ client: Optional anthropic.Anthropic client for accurate token counting.
421
+ If not provided, uses tiktoken approximation.
422
+ context_limits: Optional override for model context limits.
423
+
424
+ Example:
425
+ from anthropic import Anthropic
426
+ provider = AnthropicProvider(client=Anthropic())
427
+ """
428
+ self._client = client
429
+ self._token_counters: dict[str, AnthropicTokenCounter] = {}
430
+
431
+ # Build context limits: defaults -> config file -> env var -> explicit
432
+ self._context_limits = {**ANTHROPIC_CONTEXT_LIMITS}
433
+ self._pricing = {**ANTHROPIC_PRICING}
434
+
435
+ # Load from config file and env var
436
+ custom_config = _load_custom_model_config()
437
+ self._context_limits.update(custom_config["context_limits"])
438
+ self._pricing.update(custom_config["pricing"])
439
+
440
+ # Explicit overrides take precedence
441
+ if context_limits:
442
+ self._context_limits.update(context_limits)
443
+
444
+ @property
445
+ def name(self) -> str:
446
+ return "anthropic"
447
+
448
+ def get_token_counter(self, model: str) -> TokenCounter:
449
+ """Get token counter for a model.
450
+
451
+ If a client was provided to the provider, uses the Token Count API.
452
+ Otherwise falls back to tiktoken approximation.
453
+ """
454
+ if model not in self._token_counters:
455
+ self._token_counters[model] = AnthropicTokenCounter(
456
+ model=model,
457
+ client=self._client,
458
+ )
459
+ return self._token_counters[model]
460
+
461
+ def get_context_limit(self, model: str) -> int:
462
+ """Get context window limit for a model.
463
+
464
+ Resolution order:
465
+ 1. Explicit context_limits passed to constructor
466
+ 2. HEADROOM_MODEL_LIMITS environment variable
467
+ 3. ~/.headroom/models.json config file
468
+ 4. LiteLLM model info (if available)
469
+ 5. Built-in ANTHROPIC_CONTEXT_LIMITS
470
+ 6. Pattern-based inference (opus/sonnet/haiku)
471
+ 7. Default fallback (200K for any Claude model)
472
+
473
+ Never raises an exception - uses sensible defaults for unknown models.
474
+ """
475
+ # Check explicit and loaded limits
476
+ if model in self._context_limits:
477
+ return self._context_limits[model]
478
+
479
+ # Check for partial matches (e.g., "claude-3-5-sonnet" matches "claude-3-5-sonnet-20241022")
480
+ for known_model, limit in self._context_limits.items():
481
+ if model in known_model or known_model in model:
482
+ return limit
483
+
484
+ # Try LiteLLM for context limit
485
+ if LITELLM_AVAILABLE and litellm_get_model_info is not None:
486
+ try:
487
+ info = litellm_get_model_info(model)
488
+ if info:
489
+ if "max_input_tokens" in info and info["max_input_tokens"] is not None:
490
+ limit = info["max_input_tokens"]
491
+ self._context_limits[model] = limit
492
+ return limit
493
+ if "max_tokens" in info and info["max_tokens"] is not None:
494
+ limit = info["max_tokens"]
495
+ self._context_limits[model] = limit
496
+ return limit
497
+ except Exception as e:
498
+ logger.debug(f"LiteLLM get_model_info failed for {model}: {e}")
499
+
500
+ # Pattern-based inference for new models
501
+ tier = _infer_model_tier(model)
502
+ if tier and tier in _PATTERN_DEFAULTS:
503
+ limit = cast(int, _PATTERN_DEFAULTS[tier]["context"])
504
+ self._warn_unknown_model(model, limit, f"inferred from '{tier}' tier")
505
+ # Cache for future calls
506
+ self._context_limits[model] = limit
507
+ return limit
508
+
509
+ # Fallback for unknown Claude models
510
+ if model.startswith("claude"):
511
+ limit = cast(int, _UNKNOWN_CLAUDE_DEFAULT["context"])
512
+ self._warn_unknown_model(model, limit, "using default Claude limit")
513
+ self._context_limits[model] = limit
514
+ return limit
515
+
516
+ # Non-Claude model - use conservative default
517
+ limit = 128000
518
+ self._warn_unknown_model(model, limit, "unknown provider, using conservative default")
519
+ self._context_limits[model] = limit
520
+ return limit
521
+
522
+ def _warn_unknown_model(self, model: str, limit: int, reason: str) -> None:
523
+ """Warn about unknown model (once per model)."""
524
+ global _UNKNOWN_MODEL_WARNINGS
525
+ if model not in _UNKNOWN_MODEL_WARNINGS:
526
+ _UNKNOWN_MODEL_WARNINGS.add(model)
527
+ logger.warning(
528
+ f"Unknown Anthropic model '{model}': {reason} ({limit:,} tokens). "
529
+ f"To configure explicitly, set HEADROOM_MODEL_LIMITS env var or "
530
+ f"add to ~/.headroom/models.json"
531
+ )
532
+
533
+ def supports_model(self, model: str) -> bool:
534
+ """Check if this provider supports the given model."""
535
+ if model in self._context_limits:
536
+ return True
537
+ # Check prefix matches - support all Claude models
538
+ return model.startswith("claude")
539
+
540
+ def estimate_cost(
541
+ self,
542
+ input_tokens: int,
543
+ output_tokens: int,
544
+ model: str,
545
+ cached_tokens: int = 0,
546
+ ) -> float | None:
547
+ """Estimate cost for a request.
548
+
549
+ Tries LiteLLM first for up-to-date pricing, falls back to manual pricing.
550
+ """
551
+ # Try LiteLLM first for cost estimation
552
+ if LITELLM_AVAILABLE and litellm is not None:
553
+ try:
554
+ cost = litellm.completion_cost(
555
+ model=model,
556
+ prompt="",
557
+ completion="",
558
+ prompt_tokens=input_tokens - cached_tokens,
559
+ completion_tokens=output_tokens,
560
+ )
561
+ # Add cached token cost if applicable
562
+ if cached_tokens > 0:
563
+ try:
564
+ # Get cached input pricing from LiteLLM model info
565
+ info = (
566
+ litellm_get_model_info(model)
567
+ if litellm_get_model_info is not None
568
+ else None
569
+ )
570
+ if info and "input_cost_per_token" in info:
571
+ # LiteLLM typically applies 90% discount for cached tokens
572
+ cached_cost = cached_tokens * info["input_cost_per_token"] * 0.1
573
+ cost += cached_cost
574
+ except Exception:
575
+ # Fall back to manual cached pricing
576
+ pricing = self._get_pricing(model)
577
+ if pricing:
578
+ cached_cost = (cached_tokens / 1_000_000) * pricing.get(
579
+ "cached_input", pricing["input"]
580
+ )
581
+ cost += cached_cost
582
+ return cost
583
+ except Exception as e:
584
+ logger.debug(f"LiteLLM cost estimation failed for {model}: {e}")
585
+
586
+ # Fall back to manual pricing
587
+ pricing = self._get_pricing(model)
588
+ if not pricing:
589
+ return None
590
+
591
+ # Calculate cost
592
+ non_cached_input = input_tokens - cached_tokens
593
+ cost = (
594
+ (non_cached_input / 1_000_000) * pricing["input"]
595
+ + (cached_tokens / 1_000_000) * pricing.get("cached_input", pricing["input"])
596
+ + (output_tokens / 1_000_000) * pricing["output"]
597
+ )
598
+
599
+ return cost
600
+
601
+ def _get_pricing(self, model: str) -> dict[str, float] | None:
602
+ """Get pricing for a model with fallback logic."""
603
+ # Direct match
604
+ if model in self._pricing:
605
+ return self._pricing[model]
606
+
607
+ # Partial match
608
+ for known_model, prices in self._pricing.items():
609
+ if model in known_model or known_model in model:
610
+ return prices
611
+
612
+ # Pattern-based inference
613
+ tier = _infer_model_tier(model)
614
+ if tier and tier in _PATTERN_DEFAULTS:
615
+ return cast(dict[str, float], _PATTERN_DEFAULTS[tier]["pricing"])
616
+
617
+ # Default for unknown Claude models
618
+ if model.startswith("claude"):
619
+ return cast(dict[str, float], _UNKNOWN_CLAUDE_DEFAULT["pricing"])
620
+
621
+ return None