headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,687 @@
|
|
|
1
|
+
"""Model registry with capabilities database.
|
|
2
|
+
|
|
3
|
+
Centralized database of LLM models with their capabilities, context limits,
|
|
4
|
+
and provider information. Supports dynamic registration of custom models
|
|
5
|
+
and automatic provider detection.
|
|
6
|
+
|
|
7
|
+
Pricing is fetched dynamically from LiteLLM's community-maintained database.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from headroom.pricing.litellm_pricing import estimate_cost as litellm_estimate_cost
|
|
16
|
+
from headroom.pricing.litellm_pricing import get_model_pricing
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class ModelInfo:
|
|
21
|
+
"""Information about an LLM model.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
name: Model identifier.
|
|
25
|
+
provider: Provider name (openai, anthropic, etc.).
|
|
26
|
+
context_window: Maximum context window in tokens.
|
|
27
|
+
max_output_tokens: Maximum output tokens.
|
|
28
|
+
supports_tools: Whether model supports tool/function calling.
|
|
29
|
+
supports_vision: Whether model supports image inputs.
|
|
30
|
+
supports_streaming: Whether model supports streaming responses.
|
|
31
|
+
supports_json_mode: Whether model supports JSON output mode.
|
|
32
|
+
tokenizer_backend: Tokenizer backend to use.
|
|
33
|
+
aliases: Alternative names for the model.
|
|
34
|
+
notes: Additional notes about the model.
|
|
35
|
+
|
|
36
|
+
Note:
|
|
37
|
+
Pricing is fetched dynamically from LiteLLM's database.
|
|
38
|
+
Use ModelRegistry.estimate_cost() to get current pricing.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
name: str
|
|
42
|
+
provider: str
|
|
43
|
+
context_window: int = 128000
|
|
44
|
+
max_output_tokens: int = 4096
|
|
45
|
+
supports_tools: bool = True
|
|
46
|
+
supports_vision: bool = False
|
|
47
|
+
supports_streaming: bool = True
|
|
48
|
+
supports_json_mode: bool = True
|
|
49
|
+
tokenizer_backend: str | None = None
|
|
50
|
+
aliases: tuple[str, ...] = ()
|
|
51
|
+
notes: str = ""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Built-in model database
|
|
55
|
+
# Pricing as of January 2025 - verify current rates
|
|
56
|
+
_MODELS: dict[str, ModelInfo] = {}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _register_builtin_models() -> None:
|
|
60
|
+
"""Register built-in models.
|
|
61
|
+
|
|
62
|
+
Note: Pricing is fetched dynamically from LiteLLM's database.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
# ============================================================
|
|
66
|
+
# OpenAI Models
|
|
67
|
+
# ============================================================
|
|
68
|
+
|
|
69
|
+
# GPT-4o family
|
|
70
|
+
_MODELS["gpt-4o"] = ModelInfo(
|
|
71
|
+
name="gpt-4o",
|
|
72
|
+
provider="openai",
|
|
73
|
+
context_window=128000,
|
|
74
|
+
max_output_tokens=16384,
|
|
75
|
+
supports_tools=True,
|
|
76
|
+
supports_vision=True,
|
|
77
|
+
supports_streaming=True,
|
|
78
|
+
tokenizer_backend="tiktoken",
|
|
79
|
+
aliases=("gpt-4o-2024-11-20", "gpt-4o-2024-08-06", "gpt-4o-2024-05-13"),
|
|
80
|
+
notes="Latest GPT-4o with vision and tools",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
_MODELS["gpt-4o-mini"] = ModelInfo(
|
|
84
|
+
name="gpt-4o-mini",
|
|
85
|
+
provider="openai",
|
|
86
|
+
context_window=128000,
|
|
87
|
+
max_output_tokens=16384,
|
|
88
|
+
supports_tools=True,
|
|
89
|
+
supports_vision=True,
|
|
90
|
+
supports_streaming=True,
|
|
91
|
+
tokenizer_backend="tiktoken",
|
|
92
|
+
aliases=("gpt-4o-mini-2024-07-18",),
|
|
93
|
+
notes="Cost-effective GPT-4o variant",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# o1 reasoning models
|
|
97
|
+
_MODELS["o1"] = ModelInfo(
|
|
98
|
+
name="o1",
|
|
99
|
+
provider="openai",
|
|
100
|
+
context_window=200000,
|
|
101
|
+
max_output_tokens=100000,
|
|
102
|
+
supports_tools=True,
|
|
103
|
+
supports_vision=True,
|
|
104
|
+
supports_streaming=True,
|
|
105
|
+
tokenizer_backend="tiktoken",
|
|
106
|
+
notes="Full reasoning model with extended thinking",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
_MODELS["o1-mini"] = ModelInfo(
|
|
110
|
+
name="o1-mini",
|
|
111
|
+
provider="openai",
|
|
112
|
+
context_window=128000,
|
|
113
|
+
max_output_tokens=65536,
|
|
114
|
+
supports_tools=True,
|
|
115
|
+
supports_vision=False,
|
|
116
|
+
supports_streaming=True,
|
|
117
|
+
tokenizer_backend="tiktoken",
|
|
118
|
+
notes="Fast reasoning model",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
_MODELS["o3-mini"] = ModelInfo(
|
|
122
|
+
name="o3-mini",
|
|
123
|
+
provider="openai",
|
|
124
|
+
context_window=200000,
|
|
125
|
+
max_output_tokens=100000,
|
|
126
|
+
supports_tools=True,
|
|
127
|
+
supports_vision=True,
|
|
128
|
+
supports_streaming=True,
|
|
129
|
+
tokenizer_backend="tiktoken",
|
|
130
|
+
notes="Latest reasoning model",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# GPT-4 Turbo
|
|
134
|
+
_MODELS["gpt-4-turbo"] = ModelInfo(
|
|
135
|
+
name="gpt-4-turbo",
|
|
136
|
+
provider="openai",
|
|
137
|
+
context_window=128000,
|
|
138
|
+
max_output_tokens=4096,
|
|
139
|
+
supports_tools=True,
|
|
140
|
+
supports_vision=True,
|
|
141
|
+
supports_streaming=True,
|
|
142
|
+
tokenizer_backend="tiktoken",
|
|
143
|
+
aliases=("gpt-4-turbo-preview", "gpt-4-turbo-2024-04-09"),
|
|
144
|
+
notes="GPT-4 Turbo with vision",
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# GPT-4
|
|
148
|
+
_MODELS["gpt-4"] = ModelInfo(
|
|
149
|
+
name="gpt-4",
|
|
150
|
+
provider="openai",
|
|
151
|
+
context_window=8192,
|
|
152
|
+
max_output_tokens=4096,
|
|
153
|
+
supports_tools=True,
|
|
154
|
+
supports_vision=False,
|
|
155
|
+
supports_streaming=True,
|
|
156
|
+
tokenizer_backend="tiktoken",
|
|
157
|
+
aliases=("gpt-4-0613",),
|
|
158
|
+
notes="Original GPT-4",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
_MODELS["gpt-4-32k"] = ModelInfo(
|
|
162
|
+
name="gpt-4-32k",
|
|
163
|
+
provider="openai",
|
|
164
|
+
context_window=32768,
|
|
165
|
+
max_output_tokens=4096,
|
|
166
|
+
supports_tools=True,
|
|
167
|
+
supports_vision=False,
|
|
168
|
+
supports_streaming=True,
|
|
169
|
+
tokenizer_backend="tiktoken",
|
|
170
|
+
notes="Extended context GPT-4",
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# GPT-3.5
|
|
174
|
+
_MODELS["gpt-3.5-turbo"] = ModelInfo(
|
|
175
|
+
name="gpt-3.5-turbo",
|
|
176
|
+
provider="openai",
|
|
177
|
+
context_window=16385,
|
|
178
|
+
max_output_tokens=4096,
|
|
179
|
+
supports_tools=True,
|
|
180
|
+
supports_vision=False,
|
|
181
|
+
supports_streaming=True,
|
|
182
|
+
tokenizer_backend="tiktoken",
|
|
183
|
+
aliases=("gpt-3.5-turbo-0125", "gpt-3.5-turbo-1106"),
|
|
184
|
+
notes="Fast and cost-effective",
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# ============================================================
|
|
188
|
+
# Anthropic Models
|
|
189
|
+
# ============================================================
|
|
190
|
+
|
|
191
|
+
_MODELS["claude-3-5-sonnet-20241022"] = ModelInfo(
|
|
192
|
+
name="claude-3-5-sonnet-20241022",
|
|
193
|
+
provider="anthropic",
|
|
194
|
+
context_window=200000,
|
|
195
|
+
max_output_tokens=8192,
|
|
196
|
+
supports_tools=True,
|
|
197
|
+
supports_vision=True,
|
|
198
|
+
supports_streaming=True,
|
|
199
|
+
tokenizer_backend="anthropic",
|
|
200
|
+
aliases=("claude-3-5-sonnet-latest", "claude-sonnet-4-20250514"),
|
|
201
|
+
notes="Claude 3.5 Sonnet - Best balance of speed and capability",
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
_MODELS["claude-3-5-haiku-20241022"] = ModelInfo(
|
|
205
|
+
name="claude-3-5-haiku-20241022",
|
|
206
|
+
provider="anthropic",
|
|
207
|
+
context_window=200000,
|
|
208
|
+
max_output_tokens=8192,
|
|
209
|
+
supports_tools=True,
|
|
210
|
+
supports_vision=True,
|
|
211
|
+
supports_streaming=True,
|
|
212
|
+
tokenizer_backend="anthropic",
|
|
213
|
+
aliases=("claude-3-5-haiku-latest",),
|
|
214
|
+
notes="Claude 3.5 Haiku - Fast and cost-effective",
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
_MODELS["claude-3-opus-20240229"] = ModelInfo(
|
|
218
|
+
name="claude-3-opus-20240229",
|
|
219
|
+
provider="anthropic",
|
|
220
|
+
context_window=200000,
|
|
221
|
+
max_output_tokens=4096,
|
|
222
|
+
supports_tools=True,
|
|
223
|
+
supports_vision=True,
|
|
224
|
+
supports_streaming=True,
|
|
225
|
+
tokenizer_backend="anthropic",
|
|
226
|
+
aliases=("claude-3-opus-latest",),
|
|
227
|
+
notes="Claude 3 Opus - Most capable",
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
_MODELS["claude-3-haiku-20240307"] = ModelInfo(
|
|
231
|
+
name="claude-3-haiku-20240307",
|
|
232
|
+
provider="anthropic",
|
|
233
|
+
context_window=200000,
|
|
234
|
+
max_output_tokens=4096,
|
|
235
|
+
supports_tools=True,
|
|
236
|
+
supports_vision=True,
|
|
237
|
+
supports_streaming=True,
|
|
238
|
+
tokenizer_backend="anthropic",
|
|
239
|
+
notes="Claude 3 Haiku - Legacy fast model",
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# ============================================================
|
|
243
|
+
# Google Models
|
|
244
|
+
# ============================================================
|
|
245
|
+
|
|
246
|
+
_MODELS["gemini-2.0-flash"] = ModelInfo(
|
|
247
|
+
name="gemini-2.0-flash",
|
|
248
|
+
provider="google",
|
|
249
|
+
context_window=1000000,
|
|
250
|
+
max_output_tokens=8192,
|
|
251
|
+
supports_tools=True,
|
|
252
|
+
supports_vision=True,
|
|
253
|
+
supports_streaming=True,
|
|
254
|
+
tokenizer_backend="google",
|
|
255
|
+
aliases=("gemini-2.0-flash-exp",),
|
|
256
|
+
notes="Gemini 2.0 Flash - Fast multimodal",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
_MODELS["gemini-1.5-pro"] = ModelInfo(
|
|
260
|
+
name="gemini-1.5-pro",
|
|
261
|
+
provider="google",
|
|
262
|
+
context_window=2000000,
|
|
263
|
+
max_output_tokens=8192,
|
|
264
|
+
supports_tools=True,
|
|
265
|
+
supports_vision=True,
|
|
266
|
+
supports_streaming=True,
|
|
267
|
+
tokenizer_backend="google",
|
|
268
|
+
aliases=("gemini-1.5-pro-latest",),
|
|
269
|
+
notes="Gemini 1.5 Pro - 2M context window",
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
_MODELS["gemini-1.5-flash"] = ModelInfo(
|
|
273
|
+
name="gemini-1.5-flash",
|
|
274
|
+
provider="google",
|
|
275
|
+
context_window=1000000,
|
|
276
|
+
max_output_tokens=8192,
|
|
277
|
+
supports_tools=True,
|
|
278
|
+
supports_vision=True,
|
|
279
|
+
supports_streaming=True,
|
|
280
|
+
tokenizer_backend="google",
|
|
281
|
+
aliases=("gemini-1.5-flash-latest",),
|
|
282
|
+
notes="Gemini 1.5 Flash - Cost-effective",
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# ============================================================
|
|
286
|
+
# Meta Llama Models (open source)
|
|
287
|
+
# ============================================================
|
|
288
|
+
|
|
289
|
+
_MODELS["llama-3.3-70b"] = ModelInfo(
|
|
290
|
+
name="llama-3.3-70b",
|
|
291
|
+
provider="meta",
|
|
292
|
+
context_window=128000,
|
|
293
|
+
max_output_tokens=4096,
|
|
294
|
+
supports_tools=True,
|
|
295
|
+
supports_vision=False,
|
|
296
|
+
supports_streaming=True,
|
|
297
|
+
tokenizer_backend="huggingface",
|
|
298
|
+
aliases=("llama-3.3-70b-instruct", "meta-llama/Llama-3.3-70B-Instruct"),
|
|
299
|
+
notes="Llama 3.3 70B - Open source",
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
_MODELS["llama-3.1-405b"] = ModelInfo(
|
|
303
|
+
name="llama-3.1-405b",
|
|
304
|
+
provider="meta",
|
|
305
|
+
context_window=128000,
|
|
306
|
+
max_output_tokens=4096,
|
|
307
|
+
supports_tools=True,
|
|
308
|
+
supports_vision=False,
|
|
309
|
+
supports_streaming=True,
|
|
310
|
+
tokenizer_backend="huggingface",
|
|
311
|
+
aliases=("llama-3.1-405b-instruct", "meta-llama/Llama-3.1-405B-Instruct"),
|
|
312
|
+
notes="Llama 3.1 405B - Largest open source",
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
_MODELS["llama-3.1-70b"] = ModelInfo(
|
|
316
|
+
name="llama-3.1-70b",
|
|
317
|
+
provider="meta",
|
|
318
|
+
context_window=128000,
|
|
319
|
+
max_output_tokens=4096,
|
|
320
|
+
supports_tools=True,
|
|
321
|
+
supports_vision=False,
|
|
322
|
+
supports_streaming=True,
|
|
323
|
+
tokenizer_backend="huggingface",
|
|
324
|
+
aliases=("llama-3.1-70b-instruct", "meta-llama/Llama-3.1-70B-Instruct"),
|
|
325
|
+
notes="Llama 3.1 70B",
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
_MODELS["llama-3.1-8b"] = ModelInfo(
|
|
329
|
+
name="llama-3.1-8b",
|
|
330
|
+
provider="meta",
|
|
331
|
+
context_window=128000,
|
|
332
|
+
max_output_tokens=4096,
|
|
333
|
+
supports_tools=True,
|
|
334
|
+
supports_vision=False,
|
|
335
|
+
supports_streaming=True,
|
|
336
|
+
tokenizer_backend="huggingface",
|
|
337
|
+
aliases=("llama-3.1-8b-instruct", "meta-llama/Llama-3.1-8B-Instruct"),
|
|
338
|
+
notes="Llama 3.1 8B - Fast and efficient",
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# ============================================================
|
|
342
|
+
# Mistral Models
|
|
343
|
+
# ============================================================
|
|
344
|
+
|
|
345
|
+
_MODELS["mistral-large"] = ModelInfo(
|
|
346
|
+
name="mistral-large",
|
|
347
|
+
provider="mistral",
|
|
348
|
+
context_window=128000,
|
|
349
|
+
max_output_tokens=4096,
|
|
350
|
+
supports_tools=True,
|
|
351
|
+
supports_vision=False,
|
|
352
|
+
supports_streaming=True,
|
|
353
|
+
tokenizer_backend="huggingface",
|
|
354
|
+
aliases=("mistral-large-latest",),
|
|
355
|
+
notes="Mistral Large - Best capability",
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
_MODELS["mistral-small"] = ModelInfo(
|
|
359
|
+
name="mistral-small",
|
|
360
|
+
provider="mistral",
|
|
361
|
+
context_window=32768,
|
|
362
|
+
max_output_tokens=4096,
|
|
363
|
+
supports_tools=True,
|
|
364
|
+
supports_vision=False,
|
|
365
|
+
supports_streaming=True,
|
|
366
|
+
tokenizer_backend="huggingface",
|
|
367
|
+
aliases=("mistral-small-latest",),
|
|
368
|
+
notes="Mistral Small - Cost-effective",
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
_MODELS["mixtral-8x7b"] = ModelInfo(
|
|
372
|
+
name="mixtral-8x7b",
|
|
373
|
+
provider="mistral",
|
|
374
|
+
context_window=32768,
|
|
375
|
+
max_output_tokens=4096,
|
|
376
|
+
supports_tools=True,
|
|
377
|
+
supports_vision=False,
|
|
378
|
+
supports_streaming=True,
|
|
379
|
+
tokenizer_backend="huggingface",
|
|
380
|
+
aliases=("mixtral-8x7b-instruct",),
|
|
381
|
+
notes="Mixtral 8x7B - MoE architecture",
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
_MODELS["mistral-7b"] = ModelInfo(
|
|
385
|
+
name="mistral-7b",
|
|
386
|
+
provider="mistral",
|
|
387
|
+
context_window=32768,
|
|
388
|
+
max_output_tokens=4096,
|
|
389
|
+
supports_tools=False,
|
|
390
|
+
supports_vision=False,
|
|
391
|
+
supports_streaming=True,
|
|
392
|
+
tokenizer_backend="huggingface",
|
|
393
|
+
aliases=("mistral-7b-instruct",),
|
|
394
|
+
notes="Mistral 7B - Open source",
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# ============================================================
|
|
398
|
+
# DeepSeek Models
|
|
399
|
+
# ============================================================
|
|
400
|
+
|
|
401
|
+
_MODELS["deepseek-v3"] = ModelInfo(
|
|
402
|
+
name="deepseek-v3",
|
|
403
|
+
provider="deepseek",
|
|
404
|
+
context_window=128000,
|
|
405
|
+
max_output_tokens=8192,
|
|
406
|
+
supports_tools=True,
|
|
407
|
+
supports_vision=False,
|
|
408
|
+
supports_streaming=True,
|
|
409
|
+
tokenizer_backend="huggingface",
|
|
410
|
+
notes="DeepSeek V3 - High performance, low cost",
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
_MODELS["deepseek-coder"] = ModelInfo(
|
|
414
|
+
name="deepseek-coder",
|
|
415
|
+
provider="deepseek",
|
|
416
|
+
context_window=16384,
|
|
417
|
+
max_output_tokens=4096,
|
|
418
|
+
supports_tools=False,
|
|
419
|
+
supports_vision=False,
|
|
420
|
+
supports_streaming=True,
|
|
421
|
+
tokenizer_backend="huggingface",
|
|
422
|
+
notes="DeepSeek Coder - Specialized for code",
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# ============================================================
|
|
426
|
+
# Qwen Models
|
|
427
|
+
# ============================================================
|
|
428
|
+
|
|
429
|
+
_MODELS["qwen2.5-72b"] = ModelInfo(
|
|
430
|
+
name="qwen2.5-72b",
|
|
431
|
+
provider="alibaba",
|
|
432
|
+
context_window=131072,
|
|
433
|
+
max_output_tokens=8192,
|
|
434
|
+
supports_tools=True,
|
|
435
|
+
supports_vision=False,
|
|
436
|
+
supports_streaming=True,
|
|
437
|
+
tokenizer_backend="huggingface",
|
|
438
|
+
aliases=("qwen2.5-72b-instruct",),
|
|
439
|
+
notes="Qwen 2.5 72B - Strong multilingual",
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
_MODELS["qwen2.5-7b"] = ModelInfo(
|
|
443
|
+
name="qwen2.5-7b",
|
|
444
|
+
provider="alibaba",
|
|
445
|
+
context_window=131072,
|
|
446
|
+
max_output_tokens=8192,
|
|
447
|
+
supports_tools=True,
|
|
448
|
+
supports_vision=False,
|
|
449
|
+
supports_streaming=True,
|
|
450
|
+
tokenizer_backend="huggingface",
|
|
451
|
+
aliases=("qwen2.5-7b-instruct",),
|
|
452
|
+
notes="Qwen 2.5 7B - Efficient",
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
# Initialize built-in models
|
|
457
|
+
_register_builtin_models()
|
|
458
|
+
|
|
459
|
+
# Build alias lookup
|
|
460
|
+
_ALIASES: dict[str, str] = {}
|
|
461
|
+
for model_name, info in _MODELS.items():
|
|
462
|
+
for alias in info.aliases:
|
|
463
|
+
_ALIASES[alias.lower()] = model_name
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
class ModelRegistry:
|
|
467
|
+
"""Registry of LLM models and their capabilities.
|
|
468
|
+
|
|
469
|
+
Singleton registry providing access to model information.
|
|
470
|
+
Supports built-in models and custom registration.
|
|
471
|
+
|
|
472
|
+
Example:
|
|
473
|
+
# Get model info
|
|
474
|
+
info = ModelRegistry.get("gpt-4o")
|
|
475
|
+
print(f"Context: {info.context_window}")
|
|
476
|
+
|
|
477
|
+
# Register custom model
|
|
478
|
+
ModelRegistry.register(
|
|
479
|
+
"my-model",
|
|
480
|
+
provider="custom",
|
|
481
|
+
context_window=32000,
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# List models by provider
|
|
485
|
+
openai_models = ModelRegistry.list_models(provider="openai")
|
|
486
|
+
"""
|
|
487
|
+
|
|
488
|
+
@classmethod
|
|
489
|
+
def get(cls, model: str) -> ModelInfo | None:
|
|
490
|
+
"""Get model information.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
model: Model name or alias.
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
ModelInfo if found, None otherwise.
|
|
497
|
+
"""
|
|
498
|
+
model_lower = model.lower()
|
|
499
|
+
|
|
500
|
+
# Direct lookup
|
|
501
|
+
if model_lower in _MODELS:
|
|
502
|
+
return _MODELS[model_lower]
|
|
503
|
+
|
|
504
|
+
# Alias lookup
|
|
505
|
+
if model_lower in _ALIASES:
|
|
506
|
+
return _MODELS[_ALIASES[model_lower]]
|
|
507
|
+
|
|
508
|
+
# Prefix matching
|
|
509
|
+
for name, info in _MODELS.items():
|
|
510
|
+
if model_lower.startswith(name):
|
|
511
|
+
return info
|
|
512
|
+
|
|
513
|
+
return None
|
|
514
|
+
|
|
515
|
+
@classmethod
|
|
516
|
+
def register(
|
|
517
|
+
cls,
|
|
518
|
+
model: str,
|
|
519
|
+
provider: str,
|
|
520
|
+
context_window: int = 128000,
|
|
521
|
+
**kwargs: Any,
|
|
522
|
+
) -> ModelInfo:
|
|
523
|
+
"""Register a custom model.
|
|
524
|
+
|
|
525
|
+
Args:
|
|
526
|
+
model: Model name.
|
|
527
|
+
provider: Provider name.
|
|
528
|
+
context_window: Maximum context window.
|
|
529
|
+
**kwargs: Additional ModelInfo fields.
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
Registered ModelInfo.
|
|
533
|
+
"""
|
|
534
|
+
info = ModelInfo(
|
|
535
|
+
name=model,
|
|
536
|
+
provider=provider,
|
|
537
|
+
context_window=context_window,
|
|
538
|
+
**kwargs,
|
|
539
|
+
)
|
|
540
|
+
_MODELS[model.lower()] = info
|
|
541
|
+
|
|
542
|
+
# Register aliases
|
|
543
|
+
for alias in info.aliases:
|
|
544
|
+
_ALIASES[alias.lower()] = model.lower()
|
|
545
|
+
|
|
546
|
+
return info
|
|
547
|
+
|
|
548
|
+
@classmethod
|
|
549
|
+
def list_models(
|
|
550
|
+
cls,
|
|
551
|
+
provider: str | None = None,
|
|
552
|
+
supports_tools: bool | None = None,
|
|
553
|
+
supports_vision: bool | None = None,
|
|
554
|
+
min_context: int | None = None,
|
|
555
|
+
) -> list[ModelInfo]:
|
|
556
|
+
"""List models matching criteria.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
provider: Filter by provider.
|
|
560
|
+
supports_tools: Filter by tool support.
|
|
561
|
+
supports_vision: Filter by vision support.
|
|
562
|
+
min_context: Minimum context window.
|
|
563
|
+
|
|
564
|
+
Returns:
|
|
565
|
+
List of matching ModelInfo.
|
|
566
|
+
"""
|
|
567
|
+
results = []
|
|
568
|
+
for info in _MODELS.values():
|
|
569
|
+
if provider and info.provider != provider:
|
|
570
|
+
continue
|
|
571
|
+
if supports_tools is not None and info.supports_tools != supports_tools:
|
|
572
|
+
continue
|
|
573
|
+
if supports_vision is not None and info.supports_vision != supports_vision:
|
|
574
|
+
continue
|
|
575
|
+
if min_context and info.context_window < min_context:
|
|
576
|
+
continue
|
|
577
|
+
results.append(info)
|
|
578
|
+
return results
|
|
579
|
+
|
|
580
|
+
@classmethod
|
|
581
|
+
def list_providers(cls) -> list[str]:
|
|
582
|
+
"""List all known providers.
|
|
583
|
+
|
|
584
|
+
Returns:
|
|
585
|
+
List of provider names.
|
|
586
|
+
"""
|
|
587
|
+
return list({info.provider for info in _MODELS.values()})
|
|
588
|
+
|
|
589
|
+
@classmethod
|
|
590
|
+
def get_context_limit(cls, model: str, default: int = 128000) -> int:
|
|
591
|
+
"""Get context limit for a model.
|
|
592
|
+
|
|
593
|
+
Args:
|
|
594
|
+
model: Model name.
|
|
595
|
+
default: Default if model not found.
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
Context window size.
|
|
599
|
+
"""
|
|
600
|
+
info = cls.get(model)
|
|
601
|
+
return info.context_window if info else default
|
|
602
|
+
|
|
603
|
+
@classmethod
|
|
604
|
+
def estimate_cost(
|
|
605
|
+
cls,
|
|
606
|
+
model: str,
|
|
607
|
+
input_tokens: int,
|
|
608
|
+
output_tokens: int,
|
|
609
|
+
cached_tokens: int = 0,
|
|
610
|
+
) -> float | None:
|
|
611
|
+
"""Estimate API cost for a model using LiteLLM's pricing database.
|
|
612
|
+
|
|
613
|
+
Args:
|
|
614
|
+
model: Model name.
|
|
615
|
+
input_tokens: Number of input tokens.
|
|
616
|
+
output_tokens: Number of output tokens.
|
|
617
|
+
cached_tokens: Number of cached input tokens (not currently used).
|
|
618
|
+
|
|
619
|
+
Returns:
|
|
620
|
+
Estimated cost in USD, or None if pricing unknown.
|
|
621
|
+
"""
|
|
622
|
+
# Use LiteLLM's pricing database
|
|
623
|
+
return litellm_estimate_cost(model, input_tokens, output_tokens)
|
|
624
|
+
|
|
625
|
+
@classmethod
|
|
626
|
+
def get_pricing(cls, model: str) -> tuple[float, float] | None:
|
|
627
|
+
"""Get pricing for a model from LiteLLM's database.
|
|
628
|
+
|
|
629
|
+
Args:
|
|
630
|
+
model: Model name.
|
|
631
|
+
|
|
632
|
+
Returns:
|
|
633
|
+
Tuple of (input_cost_per_1m, output_cost_per_1m) or None if not found.
|
|
634
|
+
"""
|
|
635
|
+
pricing = get_model_pricing(model)
|
|
636
|
+
if pricing is None:
|
|
637
|
+
return None
|
|
638
|
+
return (pricing.input_cost_per_1m, pricing.output_cost_per_1m)
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
# Convenience functions
|
|
642
|
+
def get_model_info(model: str) -> ModelInfo | None:
|
|
643
|
+
"""Get information about a model.
|
|
644
|
+
|
|
645
|
+
Args:
|
|
646
|
+
model: Model name or alias.
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
ModelInfo if found, None otherwise.
|
|
650
|
+
"""
|
|
651
|
+
return ModelRegistry.get(model)
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def list_models(
|
|
655
|
+
provider: str | None = None,
|
|
656
|
+
**kwargs: Any,
|
|
657
|
+
) -> list[ModelInfo]:
|
|
658
|
+
"""List models matching criteria.
|
|
659
|
+
|
|
660
|
+
Args:
|
|
661
|
+
provider: Filter by provider.
|
|
662
|
+
**kwargs: Additional filter criteria.
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
List of matching ModelInfo.
|
|
666
|
+
"""
|
|
667
|
+
return ModelRegistry.list_models(provider=provider, **kwargs)
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
def register_model(
|
|
671
|
+
model: str,
|
|
672
|
+
provider: str,
|
|
673
|
+
context_window: int = 128000,
|
|
674
|
+
**kwargs: Any,
|
|
675
|
+
) -> ModelInfo:
|
|
676
|
+
"""Register a custom model.
|
|
677
|
+
|
|
678
|
+
Args:
|
|
679
|
+
model: Model name.
|
|
680
|
+
provider: Provider name.
|
|
681
|
+
context_window: Maximum context window.
|
|
682
|
+
**kwargs: Additional ModelInfo fields.
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
Registered ModelInfo.
|
|
686
|
+
"""
|
|
687
|
+
return ModelRegistry.register(model, provider, context_window, **kwargs)
|