langroid 0.59.0b2__py3-none-any.whl → 0.59.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/done_sequence_parser.py +46 -11
- langroid/agent/special/doc_chat_task.py +0 -0
- langroid/agent/task.py +44 -7
- langroid/language_models/model_info.py +51 -0
- langroid/mcp/__init__.py +1 -0
- langroid/mcp/server/__init__.py +1 -0
- langroid/pydantic_v1/__init__.py +7 -0
- {langroid-0.59.0b2.dist-info → langroid-0.59.1.dist-info}/METADATA +4 -1
- {langroid-0.59.0b2.dist-info → langroid-0.59.1.dist-info}/RECORD +11 -47
- langroid/agent/base.py-e +0 -2216
- langroid/agent/chat_agent.py-e +0 -2086
- langroid/agent/chat_document.py-e +0 -513
- langroid/agent/openai_assistant.py-e +0 -882
- langroid/agent/special/arangodb/arangodb_agent.py-e +0 -648
- langroid/agent/special/lance_tools.py-e +0 -61
- langroid/agent/special/neo4j/neo4j_chat_agent.py-e +0 -430
- langroid/agent/task.py-e +0 -2418
- langroid/agent/tool_message.py-e +0 -400
- langroid/agent/tools/file_tools.py-e +0 -234
- langroid/agent/tools/mcp/fastmcp_client.py-e +0 -584
- langroid/agent/tools/orchestration.py-e +0 -301
- langroid/agent/tools/task_tool.py-e +0 -249
- langroid/agent/xml_tool_message.py-e +0 -392
- langroid/embedding_models/models.py-e +0 -563
- langroid/language_models/azure_openai.py-e +0 -134
- langroid/language_models/base.py-e +0 -812
- langroid/language_models/config.py-e +0 -18
- langroid/language_models/model_info.py-e +0 -483
- langroid/language_models/openai_gpt.py-e +0 -2280
- langroid/language_models/provider_params.py-e +0 -153
- langroid/mytypes.py-e +0 -132
- langroid/parsing/file_attachment.py-e +0 -246
- langroid/parsing/md_parser.py-e +0 -574
- langroid/parsing/parser.py-e +0 -410
- langroid/parsing/repo_loader.py-e +0 -812
- langroid/parsing/url_loader.py-e +0 -683
- langroid/parsing/urls.py-e +0 -279
- langroid/pydantic_v1/__init__.py-e +0 -36
- langroid/pydantic_v1/main.py-e +0 -11
- langroid/utils/configuration.py-e +0 -141
- langroid/utils/constants.py-e +0 -32
- langroid/utils/globals.py-e +0 -49
- langroid/utils/html_logger.py-e +0 -825
- langroid/utils/object_registry.py-e +0 -66
- langroid/utils/pydantic_utils.py-e +0 -602
- langroid/utils/types.py-e +0 -113
- langroid/vector_store/lancedb.py-e +0 -404
- langroid/vector_store/pineconedb.py-e +0 -427
- {langroid-0.59.0b2.dist-info → langroid-0.59.1.dist-info}/WHEEL +0 -0
- {langroid-0.59.0b2.dist-info → langroid-0.59.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,18 +0,0 @@
|
|
1
|
-
from pydantic_settings import BaseSettings
|
2
|
-
|
3
|
-
from pydantic import ConfigDict
|
4
|
-
|
5
|
-
|
6
|
-
class PromptFormatterConfig(BaseSettings):
|
7
|
-
type: str = "llama2"
|
8
|
-
|
9
|
-
model_config = ConfigDict(env_prefix="FORMAT_", case_sensitive=False)
|
10
|
-
|
11
|
-
|
12
|
-
class Llama2FormatterConfig(PromptFormatterConfig):
|
13
|
-
use_bos_eos: bool = False
|
14
|
-
|
15
|
-
|
16
|
-
class HFPromptFormatterConfig(PromptFormatterConfig):
|
17
|
-
type: str = "hf"
|
18
|
-
model_name: str
|
@@ -1,483 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
from typing import Dict, List, Optional
|
3
|
-
|
4
|
-
from pydantic import BaseModel
|
5
|
-
|
6
|
-
|
7
|
-
class ModelProvider(str, Enum):
|
8
|
-
"""Enum for model providers"""
|
9
|
-
|
10
|
-
OPENAI = "openai"
|
11
|
-
ANTHROPIC = "anthropic"
|
12
|
-
DEEPSEEK = "deepseek"
|
13
|
-
GOOGLE = "google"
|
14
|
-
UNKNOWN = "unknown"
|
15
|
-
|
16
|
-
|
17
|
-
class ModelName(str, Enum):
|
18
|
-
"""Parent class for all model name enums"""
|
19
|
-
|
20
|
-
pass
|
21
|
-
|
22
|
-
|
23
|
-
class OpenAIChatModel(ModelName):
|
24
|
-
"""Enum for OpenAI Chat models"""
|
25
|
-
|
26
|
-
GPT3_5_TURBO = "gpt-3.5-turbo"
|
27
|
-
GPT4 = "gpt-4o" # avoid deprecated gpt-4
|
28
|
-
GPT4_TURBO = "gpt-4-turbo"
|
29
|
-
GPT4o = "gpt-4o"
|
30
|
-
GPT4o_MINI = "gpt-4o-mini"
|
31
|
-
O1 = "o1"
|
32
|
-
O1_MINI = "o1-mini"
|
33
|
-
O3_MINI = "o3-mini"
|
34
|
-
O3 = "o3"
|
35
|
-
O4_MINI = "o4-mini"
|
36
|
-
GPT4_1 = "gpt-4.1"
|
37
|
-
GPT4_1_MINI = "gpt-4.1-mini"
|
38
|
-
GPT4_1_NANO = "gpt-4.1-nano"
|
39
|
-
|
40
|
-
|
41
|
-
class OpenAICompletionModel(str, Enum):
|
42
|
-
"""Enum for OpenAI Completion models"""
|
43
|
-
|
44
|
-
DAVINCI = "davinci-002"
|
45
|
-
BABBAGE = "babbage-002"
|
46
|
-
|
47
|
-
|
48
|
-
class AnthropicModel(ModelName):
|
49
|
-
"""Enum for Anthropic models"""
|
50
|
-
|
51
|
-
CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
|
52
|
-
CLAUDE_3_7_SONNET = "claude-3-7-sonnet-latest"
|
53
|
-
CLAUDE_3_OPUS = "claude-3-opus-latest"
|
54
|
-
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
|
55
|
-
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
|
56
|
-
|
57
|
-
|
58
|
-
class DeepSeekModel(ModelName):
|
59
|
-
"""Enum for DeepSeek models direct from DeepSeek API"""
|
60
|
-
|
61
|
-
DEEPSEEK = "deepseek/deepseek-chat"
|
62
|
-
DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
|
63
|
-
OPENROUTER_DEEPSEEK_R1 = "openrouter/deepseek/deepseek-r1"
|
64
|
-
|
65
|
-
|
66
|
-
class GeminiModel(ModelName):
|
67
|
-
"""Enum for Gemini models"""
|
68
|
-
|
69
|
-
GEMINI_1_5_FLASH = "gemini-1.5-flash"
|
70
|
-
GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
|
71
|
-
GEMINI_1_5_PRO = "gemini-1.5-pro"
|
72
|
-
GEMINI_2_5_PRO = "gemini-2.5-pro"
|
73
|
-
GEMINI_2_5_FLASH = "gemini-2.5-flash"
|
74
|
-
GEMINI_2_5_FLASH_LITE_PREVIEW = "gemini-2.5-flash-lite-preview-06-17"
|
75
|
-
GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
|
76
|
-
GEMINI_2_FLASH = "gemini-2.0-flash"
|
77
|
-
GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
|
78
|
-
GEMINI_2_FLASH_THINKING = "gemini-2.0-flash-thinking-exp"
|
79
|
-
|
80
|
-
|
81
|
-
class OpenAI_API_ParamInfo(BaseModel):
|
82
|
-
"""
|
83
|
-
Parameters exclusive to some models, when using OpenAI API
|
84
|
-
"""
|
85
|
-
|
86
|
-
# model-specific params at top level
|
87
|
-
params: Dict[str, List[str]] = dict(
|
88
|
-
reasoning_effort=[
|
89
|
-
OpenAIChatModel.O3_MINI.value,
|
90
|
-
],
|
91
|
-
)
|
92
|
-
# model-specific params in extra_body
|
93
|
-
extra_parameters: Dict[str, List[str]] = dict(
|
94
|
-
include_reasoning=[
|
95
|
-
DeepSeekModel.OPENROUTER_DEEPSEEK_R1.value,
|
96
|
-
]
|
97
|
-
)
|
98
|
-
|
99
|
-
|
100
|
-
class ModelInfo(BaseModel):
|
101
|
-
"""
|
102
|
-
Consolidated information about LLM, related to capacity, cost and API
|
103
|
-
idiosyncrasies. Reasonable defaults for all params in case there's no
|
104
|
-
specific info available.
|
105
|
-
"""
|
106
|
-
|
107
|
-
name: str = "unknown"
|
108
|
-
provider: ModelProvider = ModelProvider.UNKNOWN
|
109
|
-
context_length: int = 16_000
|
110
|
-
max_cot_tokens: int = 0 # max chain of thought (thinking) tokens where applicable
|
111
|
-
max_output_tokens: int = 8192 # Maximum number of output tokens - model dependent
|
112
|
-
input_cost_per_million: float = 0.0 # Cost in USD per million input tokens
|
113
|
-
cached_cost_per_million: float = 0.0 # Cost in USD per million cached tokens
|
114
|
-
output_cost_per_million: float = 0.0 # Cost in USD per million output tokens
|
115
|
-
allows_streaming: bool = True # Whether model supports streaming output
|
116
|
-
allows_system_message: bool = True # Whether model supports system messages
|
117
|
-
rename_params: Dict[str, str] = {} # Rename parameters for OpenAI API
|
118
|
-
unsupported_params: List[str] = []
|
119
|
-
has_structured_output: bool = False # Does model API support structured output?
|
120
|
-
has_tools: bool = True # Does model API support tools/function-calling?
|
121
|
-
needs_first_user_message: bool = False # Does API need first msg to be from user?
|
122
|
-
description: Optional[str] = None
|
123
|
-
|
124
|
-
|
125
|
-
# Model information registry
|
126
|
-
MODEL_INFO: Dict[str, ModelInfo] = {
|
127
|
-
# OpenAI Models
|
128
|
-
OpenAICompletionModel.DAVINCI.value: ModelInfo(
|
129
|
-
name=OpenAICompletionModel.DAVINCI.value,
|
130
|
-
provider=ModelProvider.OPENAI,
|
131
|
-
context_length=4096,
|
132
|
-
max_output_tokens=4096,
|
133
|
-
input_cost_per_million=2.0,
|
134
|
-
output_cost_per_million=2.0,
|
135
|
-
description="Davinci-002",
|
136
|
-
),
|
137
|
-
OpenAICompletionModel.BABBAGE.value: ModelInfo(
|
138
|
-
name=OpenAICompletionModel.BABBAGE.value,
|
139
|
-
provider=ModelProvider.OPENAI,
|
140
|
-
context_length=4096,
|
141
|
-
max_output_tokens=4096,
|
142
|
-
input_cost_per_million=0.40,
|
143
|
-
output_cost_per_million=0.40,
|
144
|
-
description="Babbage-002",
|
145
|
-
),
|
146
|
-
OpenAIChatModel.GPT3_5_TURBO.value: ModelInfo(
|
147
|
-
name=OpenAIChatModel.GPT3_5_TURBO.value,
|
148
|
-
provider=ModelProvider.OPENAI,
|
149
|
-
context_length=16_385,
|
150
|
-
max_output_tokens=4096,
|
151
|
-
input_cost_per_million=0.50,
|
152
|
-
output_cost_per_million=1.50,
|
153
|
-
description="GPT-3.5 Turbo",
|
154
|
-
),
|
155
|
-
OpenAIChatModel.GPT4.value: ModelInfo(
|
156
|
-
name=OpenAIChatModel.GPT4.value,
|
157
|
-
provider=ModelProvider.OPENAI,
|
158
|
-
context_length=8192,
|
159
|
-
max_output_tokens=8192,
|
160
|
-
input_cost_per_million=30.0,
|
161
|
-
output_cost_per_million=60.0,
|
162
|
-
description="GPT-4 (8K context)",
|
163
|
-
),
|
164
|
-
OpenAIChatModel.GPT4_TURBO.value: ModelInfo(
|
165
|
-
name=OpenAIChatModel.GPT4_TURBO.value,
|
166
|
-
provider=ModelProvider.OPENAI,
|
167
|
-
context_length=128_000,
|
168
|
-
max_output_tokens=4096,
|
169
|
-
input_cost_per_million=10.0,
|
170
|
-
output_cost_per_million=30.0,
|
171
|
-
description="GPT-4 Turbo",
|
172
|
-
),
|
173
|
-
OpenAIChatModel.GPT4_1_NANO.value: ModelInfo(
|
174
|
-
name=OpenAIChatModel.GPT4_1_NANO.value,
|
175
|
-
provider=ModelProvider.OPENAI,
|
176
|
-
context_length=1_047_576,
|
177
|
-
max_output_tokens=32_768,
|
178
|
-
input_cost_per_million=0.10,
|
179
|
-
cached_cost_per_million=0.025,
|
180
|
-
output_cost_per_million=0.40,
|
181
|
-
description="GPT-4.1",
|
182
|
-
),
|
183
|
-
OpenAIChatModel.GPT4_1_MINI.value: ModelInfo(
|
184
|
-
name=OpenAIChatModel.GPT4_1_MINI.value,
|
185
|
-
provider=ModelProvider.OPENAI,
|
186
|
-
context_length=1_047_576,
|
187
|
-
max_output_tokens=32_768,
|
188
|
-
input_cost_per_million=0.40,
|
189
|
-
cached_cost_per_million=0.10,
|
190
|
-
output_cost_per_million=1.60,
|
191
|
-
description="GPT-4.1 Mini",
|
192
|
-
),
|
193
|
-
OpenAIChatModel.GPT4_1.value: ModelInfo(
|
194
|
-
name=OpenAIChatModel.GPT4_1.value,
|
195
|
-
provider=ModelProvider.OPENAI,
|
196
|
-
context_length=1_047_576,
|
197
|
-
max_output_tokens=32_768,
|
198
|
-
input_cost_per_million=2.00,
|
199
|
-
cached_cost_per_million=0.50,
|
200
|
-
output_cost_per_million=8.00,
|
201
|
-
description="GPT-4.1",
|
202
|
-
),
|
203
|
-
OpenAIChatModel.GPT4o.value: ModelInfo(
|
204
|
-
name=OpenAIChatModel.GPT4o.value,
|
205
|
-
provider=ModelProvider.OPENAI,
|
206
|
-
context_length=128_000,
|
207
|
-
max_output_tokens=16_384,
|
208
|
-
input_cost_per_million=2.5,
|
209
|
-
cached_cost_per_million=1.25,
|
210
|
-
output_cost_per_million=10.0,
|
211
|
-
has_structured_output=True,
|
212
|
-
description="GPT-4o (128K context)",
|
213
|
-
),
|
214
|
-
OpenAIChatModel.GPT4o_MINI.value: ModelInfo(
|
215
|
-
name=OpenAIChatModel.GPT4o_MINI.value,
|
216
|
-
provider=ModelProvider.OPENAI,
|
217
|
-
context_length=128_000,
|
218
|
-
max_output_tokens=16_384,
|
219
|
-
input_cost_per_million=0.15,
|
220
|
-
cached_cost_per_million=0.075,
|
221
|
-
output_cost_per_million=0.60,
|
222
|
-
has_structured_output=True,
|
223
|
-
description="GPT-4o Mini",
|
224
|
-
),
|
225
|
-
OpenAIChatModel.O1.value: ModelInfo(
|
226
|
-
name=OpenAIChatModel.O1.value,
|
227
|
-
provider=ModelProvider.OPENAI,
|
228
|
-
context_length=200_000,
|
229
|
-
max_output_tokens=100_000,
|
230
|
-
input_cost_per_million=15.0,
|
231
|
-
cached_cost_per_million=7.50,
|
232
|
-
output_cost_per_million=60.0,
|
233
|
-
allows_streaming=True,
|
234
|
-
allows_system_message=False,
|
235
|
-
unsupported_params=["temperature"],
|
236
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
237
|
-
has_tools=False,
|
238
|
-
description="O1 Reasoning LM",
|
239
|
-
),
|
240
|
-
OpenAIChatModel.O3.value: ModelInfo(
|
241
|
-
name=OpenAIChatModel.O3.value,
|
242
|
-
provider=ModelProvider.OPENAI,
|
243
|
-
context_length=200_000,
|
244
|
-
max_output_tokens=100_000,
|
245
|
-
input_cost_per_million=2.0,
|
246
|
-
cached_cost_per_million=0.50,
|
247
|
-
output_cost_per_million=8.0,
|
248
|
-
allows_streaming=True,
|
249
|
-
allows_system_message=False,
|
250
|
-
unsupported_params=["temperature"],
|
251
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
252
|
-
has_tools=False,
|
253
|
-
description="O1 Reasoning LM",
|
254
|
-
),
|
255
|
-
OpenAIChatModel.O1_MINI.value: ModelInfo(
|
256
|
-
name=OpenAIChatModel.O1_MINI.value,
|
257
|
-
provider=ModelProvider.OPENAI,
|
258
|
-
context_length=128_000,
|
259
|
-
max_output_tokens=65_536,
|
260
|
-
input_cost_per_million=1.1,
|
261
|
-
cached_cost_per_million=0.55,
|
262
|
-
output_cost_per_million=4.4,
|
263
|
-
allows_streaming=False,
|
264
|
-
allows_system_message=False,
|
265
|
-
unsupported_params=["temperature", "stream"],
|
266
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
267
|
-
has_tools=False,
|
268
|
-
description="O1 Mini Reasoning LM",
|
269
|
-
),
|
270
|
-
OpenAIChatModel.O3_MINI.value: ModelInfo(
|
271
|
-
name=OpenAIChatModel.O3_MINI.value,
|
272
|
-
provider=ModelProvider.OPENAI,
|
273
|
-
context_length=200_000,
|
274
|
-
max_output_tokens=100_000,
|
275
|
-
input_cost_per_million=1.1,
|
276
|
-
cached_cost_per_million=0.55,
|
277
|
-
output_cost_per_million=4.4,
|
278
|
-
allows_streaming=False,
|
279
|
-
allows_system_message=False,
|
280
|
-
unsupported_params=["temperature", "stream"],
|
281
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
282
|
-
has_tools=False,
|
283
|
-
description="O3 Mini Reasoning LM",
|
284
|
-
),
|
285
|
-
OpenAIChatModel.O4_MINI.value: ModelInfo(
|
286
|
-
name=OpenAIChatModel.O4_MINI.value,
|
287
|
-
provider=ModelProvider.OPENAI,
|
288
|
-
context_length=200_000,
|
289
|
-
max_output_tokens=100_000,
|
290
|
-
input_cost_per_million=1.10,
|
291
|
-
cached_cost_per_million=0.275,
|
292
|
-
output_cost_per_million=4.40,
|
293
|
-
allows_streaming=False,
|
294
|
-
allows_system_message=False,
|
295
|
-
unsupported_params=["temperature", "stream"],
|
296
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
297
|
-
has_tools=False,
|
298
|
-
description="O3 Mini Reasoning LM",
|
299
|
-
),
|
300
|
-
# Anthropic Models
|
301
|
-
AnthropicModel.CLAUDE_3_5_SONNET.value: ModelInfo(
|
302
|
-
name=AnthropicModel.CLAUDE_3_5_SONNET.value,
|
303
|
-
provider=ModelProvider.ANTHROPIC,
|
304
|
-
context_length=200_000,
|
305
|
-
max_output_tokens=8192,
|
306
|
-
input_cost_per_million=3.0,
|
307
|
-
cached_cost_per_million=0.30,
|
308
|
-
output_cost_per_million=15.0,
|
309
|
-
description="Claude 3.5 Sonnet",
|
310
|
-
),
|
311
|
-
AnthropicModel.CLAUDE_3_OPUS.value: ModelInfo(
|
312
|
-
name=AnthropicModel.CLAUDE_3_OPUS.value,
|
313
|
-
provider=ModelProvider.ANTHROPIC,
|
314
|
-
context_length=200_000,
|
315
|
-
max_output_tokens=4096,
|
316
|
-
input_cost_per_million=15.0,
|
317
|
-
cached_cost_per_million=1.50,
|
318
|
-
output_cost_per_million=75.0,
|
319
|
-
description="Claude 3 Opus",
|
320
|
-
),
|
321
|
-
AnthropicModel.CLAUDE_3_SONNET.value: ModelInfo(
|
322
|
-
name=AnthropicModel.CLAUDE_3_SONNET.value,
|
323
|
-
provider=ModelProvider.ANTHROPIC,
|
324
|
-
context_length=200_000,
|
325
|
-
max_output_tokens=4096,
|
326
|
-
input_cost_per_million=3.0,
|
327
|
-
cached_cost_per_million=0.30,
|
328
|
-
output_cost_per_million=15.0,
|
329
|
-
description="Claude 3 Sonnet",
|
330
|
-
),
|
331
|
-
AnthropicModel.CLAUDE_3_HAIKU.value: ModelInfo(
|
332
|
-
name=AnthropicModel.CLAUDE_3_HAIKU.value,
|
333
|
-
provider=ModelProvider.ANTHROPIC,
|
334
|
-
context_length=200_000,
|
335
|
-
max_output_tokens=4096,
|
336
|
-
input_cost_per_million=0.25,
|
337
|
-
cached_cost_per_million=0.03,
|
338
|
-
output_cost_per_million=1.25,
|
339
|
-
description="Claude 3 Haiku",
|
340
|
-
),
|
341
|
-
# DeepSeek Models
|
342
|
-
DeepSeekModel.DEEPSEEK.value: ModelInfo(
|
343
|
-
name=DeepSeekModel.DEEPSEEK.value,
|
344
|
-
provider=ModelProvider.DEEPSEEK,
|
345
|
-
context_length=64_000,
|
346
|
-
max_output_tokens=8_000,
|
347
|
-
input_cost_per_million=0.27,
|
348
|
-
cached_cost_per_million=0.07,
|
349
|
-
output_cost_per_million=1.10,
|
350
|
-
description="DeepSeek Chat",
|
351
|
-
),
|
352
|
-
DeepSeekModel.DEEPSEEK_R1.value: ModelInfo(
|
353
|
-
name=DeepSeekModel.DEEPSEEK_R1.value,
|
354
|
-
provider=ModelProvider.DEEPSEEK,
|
355
|
-
context_length=64_000,
|
356
|
-
max_output_tokens=8_000,
|
357
|
-
input_cost_per_million=0.55,
|
358
|
-
cached_cost_per_million=0.14,
|
359
|
-
output_cost_per_million=2.19,
|
360
|
-
description="DeepSeek-R1 Reasoning LM",
|
361
|
-
),
|
362
|
-
# Gemini Models
|
363
|
-
GeminiModel.GEMINI_2_FLASH.value: ModelInfo(
|
364
|
-
name=GeminiModel.GEMINI_2_FLASH.value,
|
365
|
-
provider=ModelProvider.GOOGLE,
|
366
|
-
context_length=1_056_768,
|
367
|
-
max_output_tokens=8192,
|
368
|
-
input_cost_per_million=0.10,
|
369
|
-
cached_cost_per_million=0.025,
|
370
|
-
output_cost_per_million=0.40,
|
371
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
372
|
-
description="Gemini 2.0 Flash",
|
373
|
-
),
|
374
|
-
GeminiModel.GEMINI_2_FLASH_LITE.value: ModelInfo(
|
375
|
-
name=GeminiModel.GEMINI_2_FLASH_LITE.value,
|
376
|
-
provider=ModelProvider.GOOGLE,
|
377
|
-
context_length=1_056_768,
|
378
|
-
max_output_tokens=8192,
|
379
|
-
input_cost_per_million=0.075,
|
380
|
-
output_cost_per_million=0.30,
|
381
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
382
|
-
description="Gemini 2.0 Flash Lite Preview",
|
383
|
-
),
|
384
|
-
GeminiModel.GEMINI_1_5_FLASH.value: ModelInfo(
|
385
|
-
name=GeminiModel.GEMINI_1_5_FLASH.value,
|
386
|
-
provider=ModelProvider.GOOGLE,
|
387
|
-
context_length=1_056_768,
|
388
|
-
max_output_tokens=8192,
|
389
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
390
|
-
description="Gemini 1.5 Flash",
|
391
|
-
),
|
392
|
-
GeminiModel.GEMINI_1_5_FLASH_8B.value: ModelInfo(
|
393
|
-
name=GeminiModel.GEMINI_1_5_FLASH_8B.value,
|
394
|
-
provider=ModelProvider.GOOGLE,
|
395
|
-
context_length=1_000_000,
|
396
|
-
max_output_tokens=8192,
|
397
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
398
|
-
description="Gemini 1.5 Flash 8B",
|
399
|
-
),
|
400
|
-
GeminiModel.GEMINI_1_5_PRO.value: ModelInfo(
|
401
|
-
name=GeminiModel.GEMINI_1_5_PRO.value,
|
402
|
-
provider=ModelProvider.GOOGLE,
|
403
|
-
context_length=2_000_000,
|
404
|
-
max_output_tokens=8192,
|
405
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
406
|
-
description="Gemini 1.5 Pro",
|
407
|
-
),
|
408
|
-
GeminiModel.GEMINI_2_PRO.value: ModelInfo(
|
409
|
-
name=GeminiModel.GEMINI_2_PRO.value,
|
410
|
-
provider=ModelProvider.GOOGLE,
|
411
|
-
context_length=2_000_000,
|
412
|
-
max_output_tokens=8192,
|
413
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
414
|
-
description="Gemini 2 Pro Exp 02-05",
|
415
|
-
),
|
416
|
-
GeminiModel.GEMINI_2_FLASH_THINKING.value: ModelInfo(
|
417
|
-
name=GeminiModel.GEMINI_2_FLASH_THINKING.value,
|
418
|
-
provider=ModelProvider.GOOGLE,
|
419
|
-
context_length=1_000_000,
|
420
|
-
max_output_tokens=64_000,
|
421
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
422
|
-
description="Gemini 2.0 Flash Thinking",
|
423
|
-
),
|
424
|
-
# Gemini 2.5 Models
|
425
|
-
GeminiModel.GEMINI_2_5_PRO.value: ModelInfo(
|
426
|
-
name=GeminiModel.GEMINI_2_5_PRO.value,
|
427
|
-
provider=ModelProvider.GOOGLE,
|
428
|
-
context_length=1_048_576,
|
429
|
-
max_output_tokens=65_536,
|
430
|
-
input_cost_per_million=1.25,
|
431
|
-
cached_cost_per_million=0.31,
|
432
|
-
output_cost_per_million=10.0,
|
433
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
434
|
-
description="Gemini 2.5 Pro",
|
435
|
-
),
|
436
|
-
GeminiModel.GEMINI_2_5_FLASH.value: ModelInfo(
|
437
|
-
name=GeminiModel.GEMINI_2_5_FLASH.value,
|
438
|
-
provider=ModelProvider.GOOGLE,
|
439
|
-
context_length=1_048_576,
|
440
|
-
max_output_tokens=65_536,
|
441
|
-
input_cost_per_million=0.30,
|
442
|
-
cached_cost_per_million=0.075,
|
443
|
-
output_cost_per_million=2.50,
|
444
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
445
|
-
description="Gemini 2.5 Flash",
|
446
|
-
),
|
447
|
-
GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value: ModelInfo(
|
448
|
-
name=GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value,
|
449
|
-
provider=ModelProvider.GOOGLE,
|
450
|
-
context_length=65_536,
|
451
|
-
max_output_tokens=65_536,
|
452
|
-
input_cost_per_million=0.10,
|
453
|
-
cached_cost_per_million=0.025,
|
454
|
-
output_cost_per_million=0.40,
|
455
|
-
rename_params={"max_tokens": "max_completion_tokens"},
|
456
|
-
description="Gemini 2.5 Flash Lite Preview",
|
457
|
-
),
|
458
|
-
}
|
459
|
-
|
460
|
-
|
461
|
-
def get_model_info(
|
462
|
-
model: str | ModelName,
|
463
|
-
fallback_models: List[str] = [],
|
464
|
-
) -> ModelInfo:
|
465
|
-
"""Get model information by name or enum value"""
|
466
|
-
# Sequence of models to try, starting with the primary model
|
467
|
-
models_to_try = [model] + fallback_models
|
468
|
-
|
469
|
-
# Find the first model in the sequence that has info defined using next()
|
470
|
-
# on a generator expression that filters out None results from _get_model_info
|
471
|
-
found_info = next(
|
472
|
-
(info for m in models_to_try if (info := _get_model_info(m)) is not None),
|
473
|
-
None, # Default value if the iterator is exhausted (no valid info found)
|
474
|
-
)
|
475
|
-
|
476
|
-
# Return the found info, or a default ModelInfo if none was found
|
477
|
-
return found_info or ModelInfo()
|
478
|
-
|
479
|
-
|
480
|
-
def _get_model_info(model: str | ModelName) -> ModelInfo | None:
|
481
|
-
if isinstance(model, str):
|
482
|
-
return MODEL_INFO.get(model)
|
483
|
-
return MODEL_INFO.get(model.value)
|