headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,687 @@
1
+ """Model registry with capabilities database.
2
+
3
+ Centralized database of LLM models with their capabilities, context limits,
4
+ and provider information. Supports dynamic registration of custom models
5
+ and automatic provider detection.
6
+
7
+ Pricing is fetched dynamically from LiteLLM's community-maintained database.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ from typing import Any
14
+
15
+ from headroom.pricing.litellm_pricing import estimate_cost as litellm_estimate_cost
16
+ from headroom.pricing.litellm_pricing import get_model_pricing
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class ModelInfo:
21
+ """Information about an LLM model.
22
+
23
+ Attributes:
24
+ name: Model identifier.
25
+ provider: Provider name (openai, anthropic, etc.).
26
+ context_window: Maximum context window in tokens.
27
+ max_output_tokens: Maximum output tokens.
28
+ supports_tools: Whether model supports tool/function calling.
29
+ supports_vision: Whether model supports image inputs.
30
+ supports_streaming: Whether model supports streaming responses.
31
+ supports_json_mode: Whether model supports JSON output mode.
32
+ tokenizer_backend: Tokenizer backend to use.
33
+ aliases: Alternative names for the model.
34
+ notes: Additional notes about the model.
35
+
36
+ Note:
37
+ Pricing is fetched dynamically from LiteLLM's database.
38
+ Use ModelRegistry.estimate_cost() to get current pricing.
39
+ """
40
+
41
+ name: str
42
+ provider: str
43
+ context_window: int = 128000
44
+ max_output_tokens: int = 4096
45
+ supports_tools: bool = True
46
+ supports_vision: bool = False
47
+ supports_streaming: bool = True
48
+ supports_json_mode: bool = True
49
+ tokenizer_backend: str | None = None
50
+ aliases: tuple[str, ...] = ()
51
+ notes: str = ""
52
+
53
+
54
+ # Built-in model database
55
+ # Pricing as of January 2025 - verify current rates
56
+ _MODELS: dict[str, ModelInfo] = {}
57
+
58
+
59
+ def _register_builtin_models() -> None:
60
+ """Register built-in models.
61
+
62
+ Note: Pricing is fetched dynamically from LiteLLM's database.
63
+ """
64
+
65
+ # ============================================================
66
+ # OpenAI Models
67
+ # ============================================================
68
+
69
+ # GPT-4o family
70
+ _MODELS["gpt-4o"] = ModelInfo(
71
+ name="gpt-4o",
72
+ provider="openai",
73
+ context_window=128000,
74
+ max_output_tokens=16384,
75
+ supports_tools=True,
76
+ supports_vision=True,
77
+ supports_streaming=True,
78
+ tokenizer_backend="tiktoken",
79
+ aliases=("gpt-4o-2024-11-20", "gpt-4o-2024-08-06", "gpt-4o-2024-05-13"),
80
+ notes="Latest GPT-4o with vision and tools",
81
+ )
82
+
83
+ _MODELS["gpt-4o-mini"] = ModelInfo(
84
+ name="gpt-4o-mini",
85
+ provider="openai",
86
+ context_window=128000,
87
+ max_output_tokens=16384,
88
+ supports_tools=True,
89
+ supports_vision=True,
90
+ supports_streaming=True,
91
+ tokenizer_backend="tiktoken",
92
+ aliases=("gpt-4o-mini-2024-07-18",),
93
+ notes="Cost-effective GPT-4o variant",
94
+ )
95
+
96
+ # o1 reasoning models
97
+ _MODELS["o1"] = ModelInfo(
98
+ name="o1",
99
+ provider="openai",
100
+ context_window=200000,
101
+ max_output_tokens=100000,
102
+ supports_tools=True,
103
+ supports_vision=True,
104
+ supports_streaming=True,
105
+ tokenizer_backend="tiktoken",
106
+ notes="Full reasoning model with extended thinking",
107
+ )
108
+
109
+ _MODELS["o1-mini"] = ModelInfo(
110
+ name="o1-mini",
111
+ provider="openai",
112
+ context_window=128000,
113
+ max_output_tokens=65536,
114
+ supports_tools=True,
115
+ supports_vision=False,
116
+ supports_streaming=True,
117
+ tokenizer_backend="tiktoken",
118
+ notes="Fast reasoning model",
119
+ )
120
+
121
+ _MODELS["o3-mini"] = ModelInfo(
122
+ name="o3-mini",
123
+ provider="openai",
124
+ context_window=200000,
125
+ max_output_tokens=100000,
126
+ supports_tools=True,
127
+ supports_vision=True,
128
+ supports_streaming=True,
129
+ tokenizer_backend="tiktoken",
130
+ notes="Latest reasoning model",
131
+ )
132
+
133
+ # GPT-4 Turbo
134
+ _MODELS["gpt-4-turbo"] = ModelInfo(
135
+ name="gpt-4-turbo",
136
+ provider="openai",
137
+ context_window=128000,
138
+ max_output_tokens=4096,
139
+ supports_tools=True,
140
+ supports_vision=True,
141
+ supports_streaming=True,
142
+ tokenizer_backend="tiktoken",
143
+ aliases=("gpt-4-turbo-preview", "gpt-4-turbo-2024-04-09"),
144
+ notes="GPT-4 Turbo with vision",
145
+ )
146
+
147
+ # GPT-4
148
+ _MODELS["gpt-4"] = ModelInfo(
149
+ name="gpt-4",
150
+ provider="openai",
151
+ context_window=8192,
152
+ max_output_tokens=4096,
153
+ supports_tools=True,
154
+ supports_vision=False,
155
+ supports_streaming=True,
156
+ tokenizer_backend="tiktoken",
157
+ aliases=("gpt-4-0613",),
158
+ notes="Original GPT-4",
159
+ )
160
+
161
+ _MODELS["gpt-4-32k"] = ModelInfo(
162
+ name="gpt-4-32k",
163
+ provider="openai",
164
+ context_window=32768,
165
+ max_output_tokens=4096,
166
+ supports_tools=True,
167
+ supports_vision=False,
168
+ supports_streaming=True,
169
+ tokenizer_backend="tiktoken",
170
+ notes="Extended context GPT-4",
171
+ )
172
+
173
+ # GPT-3.5
174
+ _MODELS["gpt-3.5-turbo"] = ModelInfo(
175
+ name="gpt-3.5-turbo",
176
+ provider="openai",
177
+ context_window=16385,
178
+ max_output_tokens=4096,
179
+ supports_tools=True,
180
+ supports_vision=False,
181
+ supports_streaming=True,
182
+ tokenizer_backend="tiktoken",
183
+ aliases=("gpt-3.5-turbo-0125", "gpt-3.5-turbo-1106"),
184
+ notes="Fast and cost-effective",
185
+ )
186
+
187
+ # ============================================================
188
+ # Anthropic Models
189
+ # ============================================================
190
+
191
+ _MODELS["claude-3-5-sonnet-20241022"] = ModelInfo(
192
+ name="claude-3-5-sonnet-20241022",
193
+ provider="anthropic",
194
+ context_window=200000,
195
+ max_output_tokens=8192,
196
+ supports_tools=True,
197
+ supports_vision=True,
198
+ supports_streaming=True,
199
+ tokenizer_backend="anthropic",
200
+ aliases=("claude-3-5-sonnet-latest", "claude-sonnet-4-20250514"),
201
+ notes="Claude 3.5 Sonnet - Best balance of speed and capability",
202
+ )
203
+
204
+ _MODELS["claude-3-5-haiku-20241022"] = ModelInfo(
205
+ name="claude-3-5-haiku-20241022",
206
+ provider="anthropic",
207
+ context_window=200000,
208
+ max_output_tokens=8192,
209
+ supports_tools=True,
210
+ supports_vision=True,
211
+ supports_streaming=True,
212
+ tokenizer_backend="anthropic",
213
+ aliases=("claude-3-5-haiku-latest",),
214
+ notes="Claude 3.5 Haiku - Fast and cost-effective",
215
+ )
216
+
217
+ _MODELS["claude-3-opus-20240229"] = ModelInfo(
218
+ name="claude-3-opus-20240229",
219
+ provider="anthropic",
220
+ context_window=200000,
221
+ max_output_tokens=4096,
222
+ supports_tools=True,
223
+ supports_vision=True,
224
+ supports_streaming=True,
225
+ tokenizer_backend="anthropic",
226
+ aliases=("claude-3-opus-latest",),
227
+ notes="Claude 3 Opus - Most capable",
228
+ )
229
+
230
+ _MODELS["claude-3-haiku-20240307"] = ModelInfo(
231
+ name="claude-3-haiku-20240307",
232
+ provider="anthropic",
233
+ context_window=200000,
234
+ max_output_tokens=4096,
235
+ supports_tools=True,
236
+ supports_vision=True,
237
+ supports_streaming=True,
238
+ tokenizer_backend="anthropic",
239
+ notes="Claude 3 Haiku - Legacy fast model",
240
+ )
241
+
242
+ # ============================================================
243
+ # Google Models
244
+ # ============================================================
245
+
246
+ _MODELS["gemini-2.0-flash"] = ModelInfo(
247
+ name="gemini-2.0-flash",
248
+ provider="google",
249
+ context_window=1000000,
250
+ max_output_tokens=8192,
251
+ supports_tools=True,
252
+ supports_vision=True,
253
+ supports_streaming=True,
254
+ tokenizer_backend="google",
255
+ aliases=("gemini-2.0-flash-exp",),
256
+ notes="Gemini 2.0 Flash - Fast multimodal",
257
+ )
258
+
259
+ _MODELS["gemini-1.5-pro"] = ModelInfo(
260
+ name="gemini-1.5-pro",
261
+ provider="google",
262
+ context_window=2000000,
263
+ max_output_tokens=8192,
264
+ supports_tools=True,
265
+ supports_vision=True,
266
+ supports_streaming=True,
267
+ tokenizer_backend="google",
268
+ aliases=("gemini-1.5-pro-latest",),
269
+ notes="Gemini 1.5 Pro - 2M context window",
270
+ )
271
+
272
+ _MODELS["gemini-1.5-flash"] = ModelInfo(
273
+ name="gemini-1.5-flash",
274
+ provider="google",
275
+ context_window=1000000,
276
+ max_output_tokens=8192,
277
+ supports_tools=True,
278
+ supports_vision=True,
279
+ supports_streaming=True,
280
+ tokenizer_backend="google",
281
+ aliases=("gemini-1.5-flash-latest",),
282
+ notes="Gemini 1.5 Flash - Cost-effective",
283
+ )
284
+
285
+ # ============================================================
286
+ # Meta Llama Models (open source)
287
+ # ============================================================
288
+
289
+ _MODELS["llama-3.3-70b"] = ModelInfo(
290
+ name="llama-3.3-70b",
291
+ provider="meta",
292
+ context_window=128000,
293
+ max_output_tokens=4096,
294
+ supports_tools=True,
295
+ supports_vision=False,
296
+ supports_streaming=True,
297
+ tokenizer_backend="huggingface",
298
+ aliases=("llama-3.3-70b-instruct", "meta-llama/Llama-3.3-70B-Instruct"),
299
+ notes="Llama 3.3 70B - Open source",
300
+ )
301
+
302
+ _MODELS["llama-3.1-405b"] = ModelInfo(
303
+ name="llama-3.1-405b",
304
+ provider="meta",
305
+ context_window=128000,
306
+ max_output_tokens=4096,
307
+ supports_tools=True,
308
+ supports_vision=False,
309
+ supports_streaming=True,
310
+ tokenizer_backend="huggingface",
311
+ aliases=("llama-3.1-405b-instruct", "meta-llama/Llama-3.1-405B-Instruct"),
312
+ notes="Llama 3.1 405B - Largest open source",
313
+ )
314
+
315
+ _MODELS["llama-3.1-70b"] = ModelInfo(
316
+ name="llama-3.1-70b",
317
+ provider="meta",
318
+ context_window=128000,
319
+ max_output_tokens=4096,
320
+ supports_tools=True,
321
+ supports_vision=False,
322
+ supports_streaming=True,
323
+ tokenizer_backend="huggingface",
324
+ aliases=("llama-3.1-70b-instruct", "meta-llama/Llama-3.1-70B-Instruct"),
325
+ notes="Llama 3.1 70B",
326
+ )
327
+
328
+ _MODELS["llama-3.1-8b"] = ModelInfo(
329
+ name="llama-3.1-8b",
330
+ provider="meta",
331
+ context_window=128000,
332
+ max_output_tokens=4096,
333
+ supports_tools=True,
334
+ supports_vision=False,
335
+ supports_streaming=True,
336
+ tokenizer_backend="huggingface",
337
+ aliases=("llama-3.1-8b-instruct", "meta-llama/Llama-3.1-8B-Instruct"),
338
+ notes="Llama 3.1 8B - Fast and efficient",
339
+ )
340
+
341
+ # ============================================================
342
+ # Mistral Models
343
+ # ============================================================
344
+
345
+ _MODELS["mistral-large"] = ModelInfo(
346
+ name="mistral-large",
347
+ provider="mistral",
348
+ context_window=128000,
349
+ max_output_tokens=4096,
350
+ supports_tools=True,
351
+ supports_vision=False,
352
+ supports_streaming=True,
353
+ tokenizer_backend="huggingface",
354
+ aliases=("mistral-large-latest",),
355
+ notes="Mistral Large - Best capability",
356
+ )
357
+
358
+ _MODELS["mistral-small"] = ModelInfo(
359
+ name="mistral-small",
360
+ provider="mistral",
361
+ context_window=32768,
362
+ max_output_tokens=4096,
363
+ supports_tools=True,
364
+ supports_vision=False,
365
+ supports_streaming=True,
366
+ tokenizer_backend="huggingface",
367
+ aliases=("mistral-small-latest",),
368
+ notes="Mistral Small - Cost-effective",
369
+ )
370
+
371
+ _MODELS["mixtral-8x7b"] = ModelInfo(
372
+ name="mixtral-8x7b",
373
+ provider="mistral",
374
+ context_window=32768,
375
+ max_output_tokens=4096,
376
+ supports_tools=True,
377
+ supports_vision=False,
378
+ supports_streaming=True,
379
+ tokenizer_backend="huggingface",
380
+ aliases=("mixtral-8x7b-instruct",),
381
+ notes="Mixtral 8x7B - MoE architecture",
382
+ )
383
+
384
+ _MODELS["mistral-7b"] = ModelInfo(
385
+ name="mistral-7b",
386
+ provider="mistral",
387
+ context_window=32768,
388
+ max_output_tokens=4096,
389
+ supports_tools=False,
390
+ supports_vision=False,
391
+ supports_streaming=True,
392
+ tokenizer_backend="huggingface",
393
+ aliases=("mistral-7b-instruct",),
394
+ notes="Mistral 7B - Open source",
395
+ )
396
+
397
+ # ============================================================
398
+ # DeepSeek Models
399
+ # ============================================================
400
+
401
+ _MODELS["deepseek-v3"] = ModelInfo(
402
+ name="deepseek-v3",
403
+ provider="deepseek",
404
+ context_window=128000,
405
+ max_output_tokens=8192,
406
+ supports_tools=True,
407
+ supports_vision=False,
408
+ supports_streaming=True,
409
+ tokenizer_backend="huggingface",
410
+ notes="DeepSeek V3 - High performance, low cost",
411
+ )
412
+
413
+ _MODELS["deepseek-coder"] = ModelInfo(
414
+ name="deepseek-coder",
415
+ provider="deepseek",
416
+ context_window=16384,
417
+ max_output_tokens=4096,
418
+ supports_tools=False,
419
+ supports_vision=False,
420
+ supports_streaming=True,
421
+ tokenizer_backend="huggingface",
422
+ notes="DeepSeek Coder - Specialized for code",
423
+ )
424
+
425
+ # ============================================================
426
+ # Qwen Models
427
+ # ============================================================
428
+
429
+ _MODELS["qwen2.5-72b"] = ModelInfo(
430
+ name="qwen2.5-72b",
431
+ provider="alibaba",
432
+ context_window=131072,
433
+ max_output_tokens=8192,
434
+ supports_tools=True,
435
+ supports_vision=False,
436
+ supports_streaming=True,
437
+ tokenizer_backend="huggingface",
438
+ aliases=("qwen2.5-72b-instruct",),
439
+ notes="Qwen 2.5 72B - Strong multilingual",
440
+ )
441
+
442
+ _MODELS["qwen2.5-7b"] = ModelInfo(
443
+ name="qwen2.5-7b",
444
+ provider="alibaba",
445
+ context_window=131072,
446
+ max_output_tokens=8192,
447
+ supports_tools=True,
448
+ supports_vision=False,
449
+ supports_streaming=True,
450
+ tokenizer_backend="huggingface",
451
+ aliases=("qwen2.5-7b-instruct",),
452
+ notes="Qwen 2.5 7B - Efficient",
453
+ )
454
+
455
+
456
+ # Initialize built-in models
457
+ _register_builtin_models()
458
+
459
+ # Build alias lookup
460
+ _ALIASES: dict[str, str] = {}
461
+ for model_name, info in _MODELS.items():
462
+ for alias in info.aliases:
463
+ _ALIASES[alias.lower()] = model_name
464
+
465
+
466
+ class ModelRegistry:
467
+ """Registry of LLM models and their capabilities.
468
+
469
+ Singleton registry providing access to model information.
470
+ Supports built-in models and custom registration.
471
+
472
+ Example:
473
+ # Get model info
474
+ info = ModelRegistry.get("gpt-4o")
475
+ print(f"Context: {info.context_window}")
476
+
477
+ # Register custom model
478
+ ModelRegistry.register(
479
+ "my-model",
480
+ provider="custom",
481
+ context_window=32000,
482
+ )
483
+
484
+ # List models by provider
485
+ openai_models = ModelRegistry.list_models(provider="openai")
486
+ """
487
+
488
+ @classmethod
489
+ def get(cls, model: str) -> ModelInfo | None:
490
+ """Get model information.
491
+
492
+ Args:
493
+ model: Model name or alias.
494
+
495
+ Returns:
496
+ ModelInfo if found, None otherwise.
497
+ """
498
+ model_lower = model.lower()
499
+
500
+ # Direct lookup
501
+ if model_lower in _MODELS:
502
+ return _MODELS[model_lower]
503
+
504
+ # Alias lookup
505
+ if model_lower in _ALIASES:
506
+ return _MODELS[_ALIASES[model_lower]]
507
+
508
+ # Prefix matching
509
+ for name, info in _MODELS.items():
510
+ if model_lower.startswith(name):
511
+ return info
512
+
513
+ return None
514
+
515
+ @classmethod
516
+ def register(
517
+ cls,
518
+ model: str,
519
+ provider: str,
520
+ context_window: int = 128000,
521
+ **kwargs: Any,
522
+ ) -> ModelInfo:
523
+ """Register a custom model.
524
+
525
+ Args:
526
+ model: Model name.
527
+ provider: Provider name.
528
+ context_window: Maximum context window.
529
+ **kwargs: Additional ModelInfo fields.
530
+
531
+ Returns:
532
+ Registered ModelInfo.
533
+ """
534
+ info = ModelInfo(
535
+ name=model,
536
+ provider=provider,
537
+ context_window=context_window,
538
+ **kwargs,
539
+ )
540
+ _MODELS[model.lower()] = info
541
+
542
+ # Register aliases
543
+ for alias in info.aliases:
544
+ _ALIASES[alias.lower()] = model.lower()
545
+
546
+ return info
547
+
548
+ @classmethod
549
+ def list_models(
550
+ cls,
551
+ provider: str | None = None,
552
+ supports_tools: bool | None = None,
553
+ supports_vision: bool | None = None,
554
+ min_context: int | None = None,
555
+ ) -> list[ModelInfo]:
556
+ """List models matching criteria.
557
+
558
+ Args:
559
+ provider: Filter by provider.
560
+ supports_tools: Filter by tool support.
561
+ supports_vision: Filter by vision support.
562
+ min_context: Minimum context window.
563
+
564
+ Returns:
565
+ List of matching ModelInfo.
566
+ """
567
+ results = []
568
+ for info in _MODELS.values():
569
+ if provider and info.provider != provider:
570
+ continue
571
+ if supports_tools is not None and info.supports_tools != supports_tools:
572
+ continue
573
+ if supports_vision is not None and info.supports_vision != supports_vision:
574
+ continue
575
+ if min_context and info.context_window < min_context:
576
+ continue
577
+ results.append(info)
578
+ return results
579
+
580
+ @classmethod
581
+ def list_providers(cls) -> list[str]:
582
+ """List all known providers.
583
+
584
+ Returns:
585
+ List of provider names.
586
+ """
587
+ return list({info.provider for info in _MODELS.values()})
588
+
589
+ @classmethod
590
+ def get_context_limit(cls, model: str, default: int = 128000) -> int:
591
+ """Get context limit for a model.
592
+
593
+ Args:
594
+ model: Model name.
595
+ default: Default if model not found.
596
+
597
+ Returns:
598
+ Context window size.
599
+ """
600
+ info = cls.get(model)
601
+ return info.context_window if info else default
602
+
603
+ @classmethod
604
+ def estimate_cost(
605
+ cls,
606
+ model: str,
607
+ input_tokens: int,
608
+ output_tokens: int,
609
+ cached_tokens: int = 0,
610
+ ) -> float | None:
611
+ """Estimate API cost for a model using LiteLLM's pricing database.
612
+
613
+ Args:
614
+ model: Model name.
615
+ input_tokens: Number of input tokens.
616
+ output_tokens: Number of output tokens.
617
+ cached_tokens: Number of cached input tokens (not currently used).
618
+
619
+ Returns:
620
+ Estimated cost in USD, or None if pricing unknown.
621
+ """
622
+ # Use LiteLLM's pricing database
623
+ return litellm_estimate_cost(model, input_tokens, output_tokens)
624
+
625
+ @classmethod
626
+ def get_pricing(cls, model: str) -> tuple[float, float] | None:
627
+ """Get pricing for a model from LiteLLM's database.
628
+
629
+ Args:
630
+ model: Model name.
631
+
632
+ Returns:
633
+ Tuple of (input_cost_per_1m, output_cost_per_1m) or None if not found.
634
+ """
635
+ pricing = get_model_pricing(model)
636
+ if pricing is None:
637
+ return None
638
+ return (pricing.input_cost_per_1m, pricing.output_cost_per_1m)
639
+
640
+
641
+ # Convenience functions
642
+ def get_model_info(model: str) -> ModelInfo | None:
643
+ """Get information about a model.
644
+
645
+ Args:
646
+ model: Model name or alias.
647
+
648
+ Returns:
649
+ ModelInfo if found, None otherwise.
650
+ """
651
+ return ModelRegistry.get(model)
652
+
653
+
654
+ def list_models(
655
+ provider: str | None = None,
656
+ **kwargs: Any,
657
+ ) -> list[ModelInfo]:
658
+ """List models matching criteria.
659
+
660
+ Args:
661
+ provider: Filter by provider.
662
+ **kwargs: Additional filter criteria.
663
+
664
+ Returns:
665
+ List of matching ModelInfo.
666
+ """
667
+ return ModelRegistry.list_models(provider=provider, **kwargs)
668
+
669
+
670
+ def register_model(
671
+ model: str,
672
+ provider: str,
673
+ context_window: int = 128000,
674
+ **kwargs: Any,
675
+ ) -> ModelInfo:
676
+ """Register a custom model.
677
+
678
+ Args:
679
+ model: Model name.
680
+ provider: Provider name.
681
+ context_window: Maximum context window.
682
+ **kwargs: Additional ModelInfo fields.
683
+
684
+ Returns:
685
+ Registered ModelInfo.
686
+ """
687
+ return ModelRegistry.register(model, provider, context_window, **kwargs)