langroid 0.58.2__py3-none-any.whl → 0.59.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +39 -17
- langroid/agent/base.py-e +2216 -0
- langroid/agent/callbacks/chainlit.py +2 -1
- langroid/agent/chat_agent.py +73 -55
- langroid/agent/chat_agent.py-e +2086 -0
- langroid/agent/chat_document.py +7 -7
- langroid/agent/chat_document.py-e +513 -0
- langroid/agent/openai_assistant.py +9 -9
- langroid/agent/openai_assistant.py-e +882 -0
- langroid/agent/special/arangodb/arangodb_agent.py +10 -18
- langroid/agent/special/arangodb/arangodb_agent.py-e +648 -0
- langroid/agent/special/arangodb/tools.py +3 -3
- langroid/agent/special/doc_chat_agent.py +16 -14
- langroid/agent/special/lance_rag/critic_agent.py +2 -2
- langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
- langroid/agent/special/lance_tools.py +6 -5
- langroid/agent/special/lance_tools.py-e +61 -0
- langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
- langroid/agent/special/neo4j/neo4j_chat_agent.py-e +430 -0
- langroid/agent/special/relevance_extractor_agent.py +1 -1
- langroid/agent/special/sql/sql_chat_agent.py +11 -3
- langroid/agent/task.py +9 -87
- langroid/agent/task.py-e +2418 -0
- langroid/agent/tool_message.py +33 -17
- langroid/agent/tool_message.py-e +400 -0
- langroid/agent/tools/file_tools.py +4 -2
- langroid/agent/tools/file_tools.py-e +234 -0
- langroid/agent/tools/mcp/fastmcp_client.py +19 -6
- langroid/agent/tools/mcp/fastmcp_client.py-e +584 -0
- langroid/agent/tools/orchestration.py +22 -17
- langroid/agent/tools/orchestration.py-e +301 -0
- langroid/agent/tools/recipient_tool.py +3 -3
- langroid/agent/tools/task_tool.py +22 -16
- langroid/agent/tools/task_tool.py-e +249 -0
- langroid/agent/xml_tool_message.py +90 -35
- langroid/agent/xml_tool_message.py-e +392 -0
- langroid/cachedb/base.py +1 -1
- langroid/embedding_models/base.py +2 -2
- langroid/embedding_models/models.py +3 -7
- langroid/embedding_models/models.py-e +563 -0
- langroid/exceptions.py +4 -1
- langroid/language_models/azure_openai.py +2 -2
- langroid/language_models/azure_openai.py-e +134 -0
- langroid/language_models/base.py +6 -4
- langroid/language_models/base.py-e +812 -0
- langroid/language_models/client_cache.py +64 -0
- langroid/language_models/config.py +2 -4
- langroid/language_models/config.py-e +18 -0
- langroid/language_models/model_info.py +9 -1
- langroid/language_models/model_info.py-e +483 -0
- langroid/language_models/openai_gpt.py +119 -20
- langroid/language_models/openai_gpt.py-e +2280 -0
- langroid/language_models/provider_params.py +3 -22
- langroid/language_models/provider_params.py-e +153 -0
- langroid/mytypes.py +11 -4
- langroid/mytypes.py-e +132 -0
- langroid/parsing/code_parser.py +1 -1
- langroid/parsing/file_attachment.py +1 -1
- langroid/parsing/file_attachment.py-e +246 -0
- langroid/parsing/md_parser.py +14 -4
- langroid/parsing/md_parser.py-e +574 -0
- langroid/parsing/parser.py +22 -7
- langroid/parsing/parser.py-e +410 -0
- langroid/parsing/repo_loader.py +3 -1
- langroid/parsing/repo_loader.py-e +812 -0
- langroid/parsing/search.py +1 -1
- langroid/parsing/url_loader.py +17 -51
- langroid/parsing/url_loader.py-e +683 -0
- langroid/parsing/urls.py +5 -4
- langroid/parsing/urls.py-e +279 -0
- langroid/prompts/prompts_config.py +1 -1
- langroid/pydantic_v1/__init__.py +45 -6
- langroid/pydantic_v1/__init__.py-e +36 -0
- langroid/pydantic_v1/main.py +11 -4
- langroid/pydantic_v1/main.py-e +11 -0
- langroid/utils/configuration.py +13 -11
- langroid/utils/configuration.py-e +141 -0
- langroid/utils/constants.py +1 -1
- langroid/utils/constants.py-e +32 -0
- langroid/utils/globals.py +21 -5
- langroid/utils/globals.py-e +49 -0
- langroid/utils/html_logger.py +2 -1
- langroid/utils/html_logger.py-e +825 -0
- langroid/utils/object_registry.py +1 -1
- langroid/utils/object_registry.py-e +66 -0
- langroid/utils/pydantic_utils.py +55 -28
- langroid/utils/pydantic_utils.py-e +602 -0
- langroid/utils/types.py +2 -2
- langroid/utils/types.py-e +113 -0
- langroid/vector_store/base.py +3 -3
- langroid/vector_store/lancedb.py +5 -5
- langroid/vector_store/lancedb.py-e +404 -0
- langroid/vector_store/meilisearch.py +2 -2
- langroid/vector_store/pineconedb.py +4 -4
- langroid/vector_store/pineconedb.py-e +427 -0
- langroid/vector_store/postgres.py +1 -1
- langroid/vector_store/qdrantdb.py +3 -3
- langroid/vector_store/weaviatedb.py +1 -1
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/METADATA +3 -2
- langroid-0.59.0b1.dist-info/RECORD +181 -0
- langroid/agent/special/doc_chat_task.py +0 -0
- langroid/mcp/__init__.py +0 -1
- langroid/mcp/server/__init__.py +0 -1
- langroid-0.58.2.dist-info/RECORD +0 -145
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/WHEEL +0 -0
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,483 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import Dict, List, Optional
|
3
|
+
|
4
|
+
from pydantic import BaseModel
|
5
|
+
|
6
|
+
|
7
|
+
class ModelProvider(str, Enum):
|
8
|
+
"""Enum for model providers"""
|
9
|
+
|
10
|
+
OPENAI = "openai"
|
11
|
+
ANTHROPIC = "anthropic"
|
12
|
+
DEEPSEEK = "deepseek"
|
13
|
+
GOOGLE = "google"
|
14
|
+
UNKNOWN = "unknown"
|
15
|
+
|
16
|
+
|
17
|
+
class ModelName(str, Enum):
|
18
|
+
"""Parent class for all model name enums"""
|
19
|
+
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
class OpenAIChatModel(ModelName):
|
24
|
+
"""Enum for OpenAI Chat models"""
|
25
|
+
|
26
|
+
GPT3_5_TURBO = "gpt-3.5-turbo"
|
27
|
+
GPT4 = "gpt-4o" # avoid deprecated gpt-4
|
28
|
+
GPT4_TURBO = "gpt-4-turbo"
|
29
|
+
GPT4o = "gpt-4o"
|
30
|
+
GPT4o_MINI = "gpt-4o-mini"
|
31
|
+
O1 = "o1"
|
32
|
+
O1_MINI = "o1-mini"
|
33
|
+
O3_MINI = "o3-mini"
|
34
|
+
O3 = "o3"
|
35
|
+
O4_MINI = "o4-mini"
|
36
|
+
GPT4_1 = "gpt-4.1"
|
37
|
+
GPT4_1_MINI = "gpt-4.1-mini"
|
38
|
+
GPT4_1_NANO = "gpt-4.1-nano"
|
39
|
+
|
40
|
+
|
41
|
+
class OpenAICompletionModel(str, Enum):
|
42
|
+
"""Enum for OpenAI Completion models"""
|
43
|
+
|
44
|
+
DAVINCI = "davinci-002"
|
45
|
+
BABBAGE = "babbage-002"
|
46
|
+
|
47
|
+
|
48
|
+
class AnthropicModel(ModelName):
|
49
|
+
"""Enum for Anthropic models"""
|
50
|
+
|
51
|
+
CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
|
52
|
+
CLAUDE_3_7_SONNET = "claude-3-7-sonnet-latest"
|
53
|
+
CLAUDE_3_OPUS = "claude-3-opus-latest"
|
54
|
+
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
|
55
|
+
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
|
56
|
+
|
57
|
+
|
58
|
+
class DeepSeekModel(ModelName):
|
59
|
+
"""Enum for DeepSeek models direct from DeepSeek API"""
|
60
|
+
|
61
|
+
DEEPSEEK = "deepseek/deepseek-chat"
|
62
|
+
DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
|
63
|
+
OPENROUTER_DEEPSEEK_R1 = "openrouter/deepseek/deepseek-r1"
|
64
|
+
|
65
|
+
|
66
|
+
class GeminiModel(ModelName):
|
67
|
+
"""Enum for Gemini models"""
|
68
|
+
|
69
|
+
GEMINI_1_5_FLASH = "gemini-1.5-flash"
|
70
|
+
GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
|
71
|
+
GEMINI_1_5_PRO = "gemini-1.5-pro"
|
72
|
+
GEMINI_2_5_PRO = "gemini-2.5-pro"
|
73
|
+
GEMINI_2_5_FLASH = "gemini-2.5-flash"
|
74
|
+
GEMINI_2_5_FLASH_LITE_PREVIEW = "gemini-2.5-flash-lite-preview-06-17"
|
75
|
+
GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
|
76
|
+
GEMINI_2_FLASH = "gemini-2.0-flash"
|
77
|
+
GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
|
78
|
+
GEMINI_2_FLASH_THINKING = "gemini-2.0-flash-thinking-exp"
|
79
|
+
|
80
|
+
|
81
|
+
class OpenAI_API_ParamInfo(BaseModel):
|
82
|
+
"""
|
83
|
+
Parameters exclusive to some models, when using OpenAI API
|
84
|
+
"""
|
85
|
+
|
86
|
+
# model-specific params at top level
|
87
|
+
params: Dict[str, List[str]] = dict(
|
88
|
+
reasoning_effort=[
|
89
|
+
OpenAIChatModel.O3_MINI.value,
|
90
|
+
],
|
91
|
+
)
|
92
|
+
# model-specific params in extra_body
|
93
|
+
extra_parameters: Dict[str, List[str]] = dict(
|
94
|
+
include_reasoning=[
|
95
|
+
DeepSeekModel.OPENROUTER_DEEPSEEK_R1.value,
|
96
|
+
]
|
97
|
+
)
|
98
|
+
|
99
|
+
|
100
|
+
class ModelInfo(BaseModel):
|
101
|
+
"""
|
102
|
+
Consolidated information about LLM, related to capacity, cost and API
|
103
|
+
idiosyncrasies. Reasonable defaults for all params in case there's no
|
104
|
+
specific info available.
|
105
|
+
"""
|
106
|
+
|
107
|
+
name: str = "unknown"
|
108
|
+
provider: ModelProvider = ModelProvider.UNKNOWN
|
109
|
+
context_length: int = 16_000
|
110
|
+
max_cot_tokens: int = 0 # max chain of thought (thinking) tokens where applicable
|
111
|
+
max_output_tokens: int = 8192 # Maximum number of output tokens - model dependent
|
112
|
+
input_cost_per_million: float = 0.0 # Cost in USD per million input tokens
|
113
|
+
cached_cost_per_million: float = 0.0 # Cost in USD per million cached tokens
|
114
|
+
output_cost_per_million: float = 0.0 # Cost in USD per million output tokens
|
115
|
+
allows_streaming: bool = True # Whether model supports streaming output
|
116
|
+
allows_system_message: bool = True # Whether model supports system messages
|
117
|
+
rename_params: Dict[str, str] = {} # Rename parameters for OpenAI API
|
118
|
+
unsupported_params: List[str] = []
|
119
|
+
has_structured_output: bool = False # Does model API support structured output?
|
120
|
+
has_tools: bool = True # Does model API support tools/function-calling?
|
121
|
+
needs_first_user_message: bool = False # Does API need first msg to be from user?
|
122
|
+
description: Optional[str] = None
|
123
|
+
|
124
|
+
|
125
|
+
# Model information registry
|
126
|
+
MODEL_INFO: Dict[str, ModelInfo] = {
|
127
|
+
# OpenAI Models
|
128
|
+
OpenAICompletionModel.DAVINCI.value: ModelInfo(
|
129
|
+
name=OpenAICompletionModel.DAVINCI.value,
|
130
|
+
provider=ModelProvider.OPENAI,
|
131
|
+
context_length=4096,
|
132
|
+
max_output_tokens=4096,
|
133
|
+
input_cost_per_million=2.0,
|
134
|
+
output_cost_per_million=2.0,
|
135
|
+
description="Davinci-002",
|
136
|
+
),
|
137
|
+
OpenAICompletionModel.BABBAGE.value: ModelInfo(
|
138
|
+
name=OpenAICompletionModel.BABBAGE.value,
|
139
|
+
provider=ModelProvider.OPENAI,
|
140
|
+
context_length=4096,
|
141
|
+
max_output_tokens=4096,
|
142
|
+
input_cost_per_million=0.40,
|
143
|
+
output_cost_per_million=0.40,
|
144
|
+
description="Babbage-002",
|
145
|
+
),
|
146
|
+
OpenAIChatModel.GPT3_5_TURBO.value: ModelInfo(
|
147
|
+
name=OpenAIChatModel.GPT3_5_TURBO.value,
|
148
|
+
provider=ModelProvider.OPENAI,
|
149
|
+
context_length=16_385,
|
150
|
+
max_output_tokens=4096,
|
151
|
+
input_cost_per_million=0.50,
|
152
|
+
output_cost_per_million=1.50,
|
153
|
+
description="GPT-3.5 Turbo",
|
154
|
+
),
|
155
|
+
OpenAIChatModel.GPT4.value: ModelInfo(
|
156
|
+
name=OpenAIChatModel.GPT4.value,
|
157
|
+
provider=ModelProvider.OPENAI,
|
158
|
+
context_length=8192,
|
159
|
+
max_output_tokens=8192,
|
160
|
+
input_cost_per_million=30.0,
|
161
|
+
output_cost_per_million=60.0,
|
162
|
+
description="GPT-4 (8K context)",
|
163
|
+
),
|
164
|
+
OpenAIChatModel.GPT4_TURBO.value: ModelInfo(
|
165
|
+
name=OpenAIChatModel.GPT4_TURBO.value,
|
166
|
+
provider=ModelProvider.OPENAI,
|
167
|
+
context_length=128_000,
|
168
|
+
max_output_tokens=4096,
|
169
|
+
input_cost_per_million=10.0,
|
170
|
+
output_cost_per_million=30.0,
|
171
|
+
description="GPT-4 Turbo",
|
172
|
+
),
|
173
|
+
OpenAIChatModel.GPT4_1_NANO.value: ModelInfo(
|
174
|
+
name=OpenAIChatModel.GPT4_1_NANO.value,
|
175
|
+
provider=ModelProvider.OPENAI,
|
176
|
+
context_length=1_047_576,
|
177
|
+
max_output_tokens=32_768,
|
178
|
+
input_cost_per_million=0.10,
|
179
|
+
cached_cost_per_million=0.025,
|
180
|
+
output_cost_per_million=0.40,
|
181
|
+
description="GPT-4.1",
|
182
|
+
),
|
183
|
+
OpenAIChatModel.GPT4_1_MINI.value: ModelInfo(
|
184
|
+
name=OpenAIChatModel.GPT4_1_MINI.value,
|
185
|
+
provider=ModelProvider.OPENAI,
|
186
|
+
context_length=1_047_576,
|
187
|
+
max_output_tokens=32_768,
|
188
|
+
input_cost_per_million=0.40,
|
189
|
+
cached_cost_per_million=0.10,
|
190
|
+
output_cost_per_million=1.60,
|
191
|
+
description="GPT-4.1 Mini",
|
192
|
+
),
|
193
|
+
OpenAIChatModel.GPT4_1.value: ModelInfo(
|
194
|
+
name=OpenAIChatModel.GPT4_1.value,
|
195
|
+
provider=ModelProvider.OPENAI,
|
196
|
+
context_length=1_047_576,
|
197
|
+
max_output_tokens=32_768,
|
198
|
+
input_cost_per_million=2.00,
|
199
|
+
cached_cost_per_million=0.50,
|
200
|
+
output_cost_per_million=8.00,
|
201
|
+
description="GPT-4.1",
|
202
|
+
),
|
203
|
+
OpenAIChatModel.GPT4o.value: ModelInfo(
|
204
|
+
name=OpenAIChatModel.GPT4o.value,
|
205
|
+
provider=ModelProvider.OPENAI,
|
206
|
+
context_length=128_000,
|
207
|
+
max_output_tokens=16_384,
|
208
|
+
input_cost_per_million=2.5,
|
209
|
+
cached_cost_per_million=1.25,
|
210
|
+
output_cost_per_million=10.0,
|
211
|
+
has_structured_output=True,
|
212
|
+
description="GPT-4o (128K context)",
|
213
|
+
),
|
214
|
+
OpenAIChatModel.GPT4o_MINI.value: ModelInfo(
|
215
|
+
name=OpenAIChatModel.GPT4o_MINI.value,
|
216
|
+
provider=ModelProvider.OPENAI,
|
217
|
+
context_length=128_000,
|
218
|
+
max_output_tokens=16_384,
|
219
|
+
input_cost_per_million=0.15,
|
220
|
+
cached_cost_per_million=0.075,
|
221
|
+
output_cost_per_million=0.60,
|
222
|
+
has_structured_output=True,
|
223
|
+
description="GPT-4o Mini",
|
224
|
+
),
|
225
|
+
OpenAIChatModel.O1.value: ModelInfo(
|
226
|
+
name=OpenAIChatModel.O1.value,
|
227
|
+
provider=ModelProvider.OPENAI,
|
228
|
+
context_length=200_000,
|
229
|
+
max_output_tokens=100_000,
|
230
|
+
input_cost_per_million=15.0,
|
231
|
+
cached_cost_per_million=7.50,
|
232
|
+
output_cost_per_million=60.0,
|
233
|
+
allows_streaming=True,
|
234
|
+
allows_system_message=False,
|
235
|
+
unsupported_params=["temperature"],
|
236
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
237
|
+
has_tools=False,
|
238
|
+
description="O1 Reasoning LM",
|
239
|
+
),
|
240
|
+
OpenAIChatModel.O3.value: ModelInfo(
|
241
|
+
name=OpenAIChatModel.O3.value,
|
242
|
+
provider=ModelProvider.OPENAI,
|
243
|
+
context_length=200_000,
|
244
|
+
max_output_tokens=100_000,
|
245
|
+
input_cost_per_million=2.0,
|
246
|
+
cached_cost_per_million=0.50,
|
247
|
+
output_cost_per_million=8.0,
|
248
|
+
allows_streaming=True,
|
249
|
+
allows_system_message=False,
|
250
|
+
unsupported_params=["temperature"],
|
251
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
252
|
+
has_tools=False,
|
253
|
+
description="O1 Reasoning LM",
|
254
|
+
),
|
255
|
+
OpenAIChatModel.O1_MINI.value: ModelInfo(
|
256
|
+
name=OpenAIChatModel.O1_MINI.value,
|
257
|
+
provider=ModelProvider.OPENAI,
|
258
|
+
context_length=128_000,
|
259
|
+
max_output_tokens=65_536,
|
260
|
+
input_cost_per_million=1.1,
|
261
|
+
cached_cost_per_million=0.55,
|
262
|
+
output_cost_per_million=4.4,
|
263
|
+
allows_streaming=False,
|
264
|
+
allows_system_message=False,
|
265
|
+
unsupported_params=["temperature", "stream"],
|
266
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
267
|
+
has_tools=False,
|
268
|
+
description="O1 Mini Reasoning LM",
|
269
|
+
),
|
270
|
+
OpenAIChatModel.O3_MINI.value: ModelInfo(
|
271
|
+
name=OpenAIChatModel.O3_MINI.value,
|
272
|
+
provider=ModelProvider.OPENAI,
|
273
|
+
context_length=200_000,
|
274
|
+
max_output_tokens=100_000,
|
275
|
+
input_cost_per_million=1.1,
|
276
|
+
cached_cost_per_million=0.55,
|
277
|
+
output_cost_per_million=4.4,
|
278
|
+
allows_streaming=False,
|
279
|
+
allows_system_message=False,
|
280
|
+
unsupported_params=["temperature", "stream"],
|
281
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
282
|
+
has_tools=False,
|
283
|
+
description="O3 Mini Reasoning LM",
|
284
|
+
),
|
285
|
+
OpenAIChatModel.O4_MINI.value: ModelInfo(
|
286
|
+
name=OpenAIChatModel.O4_MINI.value,
|
287
|
+
provider=ModelProvider.OPENAI,
|
288
|
+
context_length=200_000,
|
289
|
+
max_output_tokens=100_000,
|
290
|
+
input_cost_per_million=1.10,
|
291
|
+
cached_cost_per_million=0.275,
|
292
|
+
output_cost_per_million=4.40,
|
293
|
+
allows_streaming=False,
|
294
|
+
allows_system_message=False,
|
295
|
+
unsupported_params=["temperature", "stream"],
|
296
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
297
|
+
has_tools=False,
|
298
|
+
description="O3 Mini Reasoning LM",
|
299
|
+
),
|
300
|
+
# Anthropic Models
|
301
|
+
AnthropicModel.CLAUDE_3_5_SONNET.value: ModelInfo(
|
302
|
+
name=AnthropicModel.CLAUDE_3_5_SONNET.value,
|
303
|
+
provider=ModelProvider.ANTHROPIC,
|
304
|
+
context_length=200_000,
|
305
|
+
max_output_tokens=8192,
|
306
|
+
input_cost_per_million=3.0,
|
307
|
+
cached_cost_per_million=0.30,
|
308
|
+
output_cost_per_million=15.0,
|
309
|
+
description="Claude 3.5 Sonnet",
|
310
|
+
),
|
311
|
+
AnthropicModel.CLAUDE_3_OPUS.value: ModelInfo(
|
312
|
+
name=AnthropicModel.CLAUDE_3_OPUS.value,
|
313
|
+
provider=ModelProvider.ANTHROPIC,
|
314
|
+
context_length=200_000,
|
315
|
+
max_output_tokens=4096,
|
316
|
+
input_cost_per_million=15.0,
|
317
|
+
cached_cost_per_million=1.50,
|
318
|
+
output_cost_per_million=75.0,
|
319
|
+
description="Claude 3 Opus",
|
320
|
+
),
|
321
|
+
AnthropicModel.CLAUDE_3_SONNET.value: ModelInfo(
|
322
|
+
name=AnthropicModel.CLAUDE_3_SONNET.value,
|
323
|
+
provider=ModelProvider.ANTHROPIC,
|
324
|
+
context_length=200_000,
|
325
|
+
max_output_tokens=4096,
|
326
|
+
input_cost_per_million=3.0,
|
327
|
+
cached_cost_per_million=0.30,
|
328
|
+
output_cost_per_million=15.0,
|
329
|
+
description="Claude 3 Sonnet",
|
330
|
+
),
|
331
|
+
AnthropicModel.CLAUDE_3_HAIKU.value: ModelInfo(
|
332
|
+
name=AnthropicModel.CLAUDE_3_HAIKU.value,
|
333
|
+
provider=ModelProvider.ANTHROPIC,
|
334
|
+
context_length=200_000,
|
335
|
+
max_output_tokens=4096,
|
336
|
+
input_cost_per_million=0.25,
|
337
|
+
cached_cost_per_million=0.03,
|
338
|
+
output_cost_per_million=1.25,
|
339
|
+
description="Claude 3 Haiku",
|
340
|
+
),
|
341
|
+
# DeepSeek Models
|
342
|
+
DeepSeekModel.DEEPSEEK.value: ModelInfo(
|
343
|
+
name=DeepSeekModel.DEEPSEEK.value,
|
344
|
+
provider=ModelProvider.DEEPSEEK,
|
345
|
+
context_length=64_000,
|
346
|
+
max_output_tokens=8_000,
|
347
|
+
input_cost_per_million=0.27,
|
348
|
+
cached_cost_per_million=0.07,
|
349
|
+
output_cost_per_million=1.10,
|
350
|
+
description="DeepSeek Chat",
|
351
|
+
),
|
352
|
+
DeepSeekModel.DEEPSEEK_R1.value: ModelInfo(
|
353
|
+
name=DeepSeekModel.DEEPSEEK_R1.value,
|
354
|
+
provider=ModelProvider.DEEPSEEK,
|
355
|
+
context_length=64_000,
|
356
|
+
max_output_tokens=8_000,
|
357
|
+
input_cost_per_million=0.55,
|
358
|
+
cached_cost_per_million=0.14,
|
359
|
+
output_cost_per_million=2.19,
|
360
|
+
description="DeepSeek-R1 Reasoning LM",
|
361
|
+
),
|
362
|
+
# Gemini Models
|
363
|
+
GeminiModel.GEMINI_2_FLASH.value: ModelInfo(
|
364
|
+
name=GeminiModel.GEMINI_2_FLASH.value,
|
365
|
+
provider=ModelProvider.GOOGLE,
|
366
|
+
context_length=1_056_768,
|
367
|
+
max_output_tokens=8192,
|
368
|
+
input_cost_per_million=0.10,
|
369
|
+
cached_cost_per_million=0.025,
|
370
|
+
output_cost_per_million=0.40,
|
371
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
372
|
+
description="Gemini 2.0 Flash",
|
373
|
+
),
|
374
|
+
GeminiModel.GEMINI_2_FLASH_LITE.value: ModelInfo(
|
375
|
+
name=GeminiModel.GEMINI_2_FLASH_LITE.value,
|
376
|
+
provider=ModelProvider.GOOGLE,
|
377
|
+
context_length=1_056_768,
|
378
|
+
max_output_tokens=8192,
|
379
|
+
input_cost_per_million=0.075,
|
380
|
+
output_cost_per_million=0.30,
|
381
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
382
|
+
description="Gemini 2.0 Flash Lite Preview",
|
383
|
+
),
|
384
|
+
GeminiModel.GEMINI_1_5_FLASH.value: ModelInfo(
|
385
|
+
name=GeminiModel.GEMINI_1_5_FLASH.value,
|
386
|
+
provider=ModelProvider.GOOGLE,
|
387
|
+
context_length=1_056_768,
|
388
|
+
max_output_tokens=8192,
|
389
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
390
|
+
description="Gemini 1.5 Flash",
|
391
|
+
),
|
392
|
+
GeminiModel.GEMINI_1_5_FLASH_8B.value: ModelInfo(
|
393
|
+
name=GeminiModel.GEMINI_1_5_FLASH_8B.value,
|
394
|
+
provider=ModelProvider.GOOGLE,
|
395
|
+
context_length=1_000_000,
|
396
|
+
max_output_tokens=8192,
|
397
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
398
|
+
description="Gemini 1.5 Flash 8B",
|
399
|
+
),
|
400
|
+
GeminiModel.GEMINI_1_5_PRO.value: ModelInfo(
|
401
|
+
name=GeminiModel.GEMINI_1_5_PRO.value,
|
402
|
+
provider=ModelProvider.GOOGLE,
|
403
|
+
context_length=2_000_000,
|
404
|
+
max_output_tokens=8192,
|
405
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
406
|
+
description="Gemini 1.5 Pro",
|
407
|
+
),
|
408
|
+
GeminiModel.GEMINI_2_PRO.value: ModelInfo(
|
409
|
+
name=GeminiModel.GEMINI_2_PRO.value,
|
410
|
+
provider=ModelProvider.GOOGLE,
|
411
|
+
context_length=2_000_000,
|
412
|
+
max_output_tokens=8192,
|
413
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
414
|
+
description="Gemini 2 Pro Exp 02-05",
|
415
|
+
),
|
416
|
+
GeminiModel.GEMINI_2_FLASH_THINKING.value: ModelInfo(
|
417
|
+
name=GeminiModel.GEMINI_2_FLASH_THINKING.value,
|
418
|
+
provider=ModelProvider.GOOGLE,
|
419
|
+
context_length=1_000_000,
|
420
|
+
max_output_tokens=64_000,
|
421
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
422
|
+
description="Gemini 2.0 Flash Thinking",
|
423
|
+
),
|
424
|
+
# Gemini 2.5 Models
|
425
|
+
GeminiModel.GEMINI_2_5_PRO.value: ModelInfo(
|
426
|
+
name=GeminiModel.GEMINI_2_5_PRO.value,
|
427
|
+
provider=ModelProvider.GOOGLE,
|
428
|
+
context_length=1_048_576,
|
429
|
+
max_output_tokens=65_536,
|
430
|
+
input_cost_per_million=1.25,
|
431
|
+
cached_cost_per_million=0.31,
|
432
|
+
output_cost_per_million=10.0,
|
433
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
434
|
+
description="Gemini 2.5 Pro",
|
435
|
+
),
|
436
|
+
GeminiModel.GEMINI_2_5_FLASH.value: ModelInfo(
|
437
|
+
name=GeminiModel.GEMINI_2_5_FLASH.value,
|
438
|
+
provider=ModelProvider.GOOGLE,
|
439
|
+
context_length=1_048_576,
|
440
|
+
max_output_tokens=65_536,
|
441
|
+
input_cost_per_million=0.30,
|
442
|
+
cached_cost_per_million=0.075,
|
443
|
+
output_cost_per_million=2.50,
|
444
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
445
|
+
description="Gemini 2.5 Flash",
|
446
|
+
),
|
447
|
+
GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value: ModelInfo(
|
448
|
+
name=GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value,
|
449
|
+
provider=ModelProvider.GOOGLE,
|
450
|
+
context_length=65_536,
|
451
|
+
max_output_tokens=65_536,
|
452
|
+
input_cost_per_million=0.10,
|
453
|
+
cached_cost_per_million=0.025,
|
454
|
+
output_cost_per_million=0.40,
|
455
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
456
|
+
description="Gemini 2.5 Flash Lite Preview",
|
457
|
+
),
|
458
|
+
}
|
459
|
+
|
460
|
+
|
461
|
+
def get_model_info(
|
462
|
+
model: str | ModelName,
|
463
|
+
fallback_models: List[str] = [],
|
464
|
+
) -> ModelInfo:
|
465
|
+
"""Get model information by name or enum value"""
|
466
|
+
# Sequence of models to try, starting with the primary model
|
467
|
+
models_to_try = [model] + fallback_models
|
468
|
+
|
469
|
+
# Find the first model in the sequence that has info defined using next()
|
470
|
+
# on a generator expression that filters out None results from _get_model_info
|
471
|
+
found_info = next(
|
472
|
+
(info for m in models_to_try if (info := _get_model_info(m)) is not None),
|
473
|
+
None, # Default value if the iterator is exhausted (no valid info found)
|
474
|
+
)
|
475
|
+
|
476
|
+
# Return the found info, or a default ModelInfo if none was found
|
477
|
+
return found_info or ModelInfo()
|
478
|
+
|
479
|
+
|
480
|
+
def _get_model_info(model: str | ModelName) -> ModelInfo | None:
|
481
|
+
if isinstance(model, str):
|
482
|
+
return MODEL_INFO.get(model)
|
483
|
+
return MODEL_INFO.get(model.value)
|