langroid 0.58.3__py3-none-any.whl → 0.59.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +39 -17
- langroid/agent/base.py-e +2216 -0
- langroid/agent/callbacks/chainlit.py +2 -1
- langroid/agent/chat_agent.py +73 -55
- langroid/agent/chat_agent.py-e +2086 -0
- langroid/agent/chat_document.py +7 -7
- langroid/agent/chat_document.py-e +513 -0
- langroid/agent/openai_assistant.py +9 -9
- langroid/agent/openai_assistant.py-e +882 -0
- langroid/agent/special/arangodb/arangodb_agent.py +10 -18
- langroid/agent/special/arangodb/arangodb_agent.py-e +648 -0
- langroid/agent/special/arangodb/tools.py +3 -3
- langroid/agent/special/doc_chat_agent.py +16 -14
- langroid/agent/special/lance_rag/critic_agent.py +2 -2
- langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
- langroid/agent/special/lance_tools.py +6 -5
- langroid/agent/special/lance_tools.py-e +61 -0
- langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
- langroid/agent/special/neo4j/neo4j_chat_agent.py-e +430 -0
- langroid/agent/special/relevance_extractor_agent.py +1 -1
- langroid/agent/special/sql/sql_chat_agent.py +11 -3
- langroid/agent/task.py +9 -87
- langroid/agent/task.py-e +2418 -0
- langroid/agent/tool_message.py +33 -17
- langroid/agent/tool_message.py-e +400 -0
- langroid/agent/tools/file_tools.py +4 -2
- langroid/agent/tools/file_tools.py-e +234 -0
- langroid/agent/tools/mcp/fastmcp_client.py +19 -6
- langroid/agent/tools/mcp/fastmcp_client.py-e +584 -0
- langroid/agent/tools/orchestration.py +22 -17
- langroid/agent/tools/orchestration.py-e +301 -0
- langroid/agent/tools/recipient_tool.py +3 -3
- langroid/agent/tools/task_tool.py +22 -16
- langroid/agent/tools/task_tool.py-e +249 -0
- langroid/agent/xml_tool_message.py +90 -35
- langroid/agent/xml_tool_message.py-e +392 -0
- langroid/cachedb/base.py +1 -1
- langroid/embedding_models/base.py +2 -2
- langroid/embedding_models/models.py +3 -7
- langroid/embedding_models/models.py-e +563 -0
- langroid/exceptions.py +4 -1
- langroid/language_models/azure_openai.py +2 -2
- langroid/language_models/azure_openai.py-e +134 -0
- langroid/language_models/base.py +6 -4
- langroid/language_models/base.py-e +812 -0
- langroid/language_models/config.py +2 -4
- langroid/language_models/config.py-e +18 -0
- langroid/language_models/model_info.py +9 -1
- langroid/language_models/model_info.py-e +483 -0
- langroid/language_models/openai_gpt.py +53 -18
- langroid/language_models/openai_gpt.py-e +2280 -0
- langroid/language_models/provider_params.py +3 -22
- langroid/language_models/provider_params.py-e +153 -0
- langroid/mytypes.py +11 -4
- langroid/mytypes.py-e +132 -0
- langroid/parsing/code_parser.py +1 -1
- langroid/parsing/file_attachment.py +1 -1
- langroid/parsing/file_attachment.py-e +246 -0
- langroid/parsing/md_parser.py +14 -4
- langroid/parsing/md_parser.py-e +574 -0
- langroid/parsing/parser.py +22 -7
- langroid/parsing/parser.py-e +410 -0
- langroid/parsing/repo_loader.py +3 -1
- langroid/parsing/repo_loader.py-e +812 -0
- langroid/parsing/search.py +1 -1
- langroid/parsing/url_loader.py +17 -51
- langroid/parsing/url_loader.py-e +683 -0
- langroid/parsing/urls.py +5 -4
- langroid/parsing/urls.py-e +279 -0
- langroid/prompts/prompts_config.py +1 -1
- langroid/pydantic_v1/__init__.py +56 -6
- langroid/pydantic_v1/__init__.py-e +36 -0
- langroid/pydantic_v1/main.py +10 -4
- langroid/pydantic_v1/main.py-e +11 -0
- langroid/utils/configuration.py +13 -11
- langroid/utils/configuration.py-e +141 -0
- langroid/utils/constants.py +1 -1
- langroid/utils/constants.py-e +32 -0
- langroid/utils/globals.py +21 -5
- langroid/utils/globals.py-e +49 -0
- langroid/utils/html_logger.py +2 -1
- langroid/utils/html_logger.py-e +825 -0
- langroid/utils/object_registry.py +1 -1
- langroid/utils/object_registry.py-e +66 -0
- langroid/utils/pydantic_utils.py +55 -28
- langroid/utils/pydantic_utils.py-e +602 -0
- langroid/utils/types.py +2 -2
- langroid/utils/types.py-e +113 -0
- langroid/vector_store/base.py +3 -3
- langroid/vector_store/lancedb.py +5 -5
- langroid/vector_store/lancedb.py-e +404 -0
- langroid/vector_store/meilisearch.py +2 -2
- langroid/vector_store/pineconedb.py +4 -4
- langroid/vector_store/pineconedb.py-e +427 -0
- langroid/vector_store/postgres.py +1 -1
- langroid/vector_store/qdrantdb.py +3 -3
- langroid/vector_store/weaviatedb.py +1 -1
- {langroid-0.58.3.dist-info → langroid-0.59.0b2.dist-info}/METADATA +3 -2
- langroid-0.59.0b2.dist-info/RECORD +181 -0
- langroid/agent/special/doc_chat_task.py +0 -0
- langroid/mcp/__init__.py +0 -1
- langroid/mcp/server/__init__.py +0 -1
- langroid-0.58.3.dist-info/RECORD +0 -145
- {langroid-0.58.3.dist-info → langroid-0.59.0b2.dist-info}/WHEEL +0 -0
- {langroid-0.58.3.dist-info → langroid-0.59.0b2.dist-info}/licenses/LICENSE +0 -0
@@ -1,12 +1,10 @@
|
|
1
|
-
from
|
1
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
2
2
|
|
3
3
|
|
4
4
|
class PromptFormatterConfig(BaseSettings):
|
5
5
|
type: str = "llama2"
|
6
6
|
|
7
|
-
|
8
|
-
env_prefix = "FORMAT_"
|
9
|
-
case_sensitive = False
|
7
|
+
model_config = SettingsConfigDict(env_prefix="FORMAT_", case_sensitive=False)
|
10
8
|
|
11
9
|
|
12
10
|
class Llama2FormatterConfig(PromptFormatterConfig):
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from pydantic_settings import BaseSettings
|
2
|
+
|
3
|
+
from pydantic import ConfigDict
|
4
|
+
|
5
|
+
|
6
|
+
class PromptFormatterConfig(BaseSettings):
|
7
|
+
type: str = "llama2"
|
8
|
+
|
9
|
+
model_config = ConfigDict(env_prefix="FORMAT_", case_sensitive=False)
|
10
|
+
|
11
|
+
|
12
|
+
class Llama2FormatterConfig(PromptFormatterConfig):
|
13
|
+
use_bos_eos: bool = False
|
14
|
+
|
15
|
+
|
16
|
+
class HFPromptFormatterConfig(PromptFormatterConfig):
|
17
|
+
type: str = "hf"
|
18
|
+
model_name: str
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from enum import Enum
|
2
2
|
from typing import Dict, List, Optional
|
3
3
|
|
4
|
-
from
|
4
|
+
from pydantic import BaseModel
|
5
5
|
|
6
6
|
|
7
7
|
class ModelProvider(str, Enum):
|
@@ -173,6 +173,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
173
173
|
OpenAIChatModel.GPT4_1_NANO.value: ModelInfo(
|
174
174
|
name=OpenAIChatModel.GPT4_1_NANO.value,
|
175
175
|
provider=ModelProvider.OPENAI,
|
176
|
+
has_structured_output=True,
|
176
177
|
context_length=1_047_576,
|
177
178
|
max_output_tokens=32_768,
|
178
179
|
input_cost_per_million=0.10,
|
@@ -183,6 +184,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
183
184
|
OpenAIChatModel.GPT4_1_MINI.value: ModelInfo(
|
184
185
|
name=OpenAIChatModel.GPT4_1_MINI.value,
|
185
186
|
provider=ModelProvider.OPENAI,
|
187
|
+
has_structured_output=True,
|
186
188
|
context_length=1_047_576,
|
187
189
|
max_output_tokens=32_768,
|
188
190
|
input_cost_per_million=0.40,
|
@@ -193,6 +195,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
193
195
|
OpenAIChatModel.GPT4_1.value: ModelInfo(
|
194
196
|
name=OpenAIChatModel.GPT4_1.value,
|
195
197
|
provider=ModelProvider.OPENAI,
|
198
|
+
has_structured_output=True,
|
196
199
|
context_length=1_047_576,
|
197
200
|
max_output_tokens=32_768,
|
198
201
|
input_cost_per_million=2.00,
|
@@ -232,6 +235,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
232
235
|
output_cost_per_million=60.0,
|
233
236
|
allows_streaming=True,
|
234
237
|
allows_system_message=False,
|
238
|
+
has_structured_output=True,
|
235
239
|
unsupported_params=["temperature"],
|
236
240
|
rename_params={"max_tokens": "max_completion_tokens"},
|
237
241
|
has_tools=False,
|
@@ -247,6 +251,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
247
251
|
output_cost_per_million=8.0,
|
248
252
|
allows_streaming=True,
|
249
253
|
allows_system_message=False,
|
254
|
+
has_structured_output=True,
|
250
255
|
unsupported_params=["temperature"],
|
251
256
|
rename_params={"max_tokens": "max_completion_tokens"},
|
252
257
|
has_tools=False,
|
@@ -262,6 +267,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
262
267
|
output_cost_per_million=4.4,
|
263
268
|
allows_streaming=False,
|
264
269
|
allows_system_message=False,
|
270
|
+
has_structured_output=True,
|
265
271
|
unsupported_params=["temperature", "stream"],
|
266
272
|
rename_params={"max_tokens": "max_completion_tokens"},
|
267
273
|
has_tools=False,
|
@@ -277,6 +283,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
277
283
|
output_cost_per_million=4.4,
|
278
284
|
allows_streaming=False,
|
279
285
|
allows_system_message=False,
|
286
|
+
has_structured_output=True,
|
280
287
|
unsupported_params=["temperature", "stream"],
|
281
288
|
rename_params={"max_tokens": "max_completion_tokens"},
|
282
289
|
has_tools=False,
|
@@ -292,6 +299,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
292
299
|
output_cost_per_million=4.40,
|
293
300
|
allows_streaming=False,
|
294
301
|
allows_system_message=False,
|
302
|
+
has_structured_output=True,
|
295
303
|
unsupported_params=["temperature", "stream"],
|
296
304
|
rename_params={"max_tokens": "max_completion_tokens"},
|
297
305
|
has_tools=False,
|
@@ -0,0 +1,483 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import Dict, List, Optional
|
3
|
+
|
4
|
+
from pydantic import BaseModel
|
5
|
+
|
6
|
+
|
7
|
+
class ModelProvider(str, Enum):
|
8
|
+
"""Enum for model providers"""
|
9
|
+
|
10
|
+
OPENAI = "openai"
|
11
|
+
ANTHROPIC = "anthropic"
|
12
|
+
DEEPSEEK = "deepseek"
|
13
|
+
GOOGLE = "google"
|
14
|
+
UNKNOWN = "unknown"
|
15
|
+
|
16
|
+
|
17
|
+
class ModelName(str, Enum):
|
18
|
+
"""Parent class for all model name enums"""
|
19
|
+
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
class OpenAIChatModel(ModelName):
|
24
|
+
"""Enum for OpenAI Chat models"""
|
25
|
+
|
26
|
+
GPT3_5_TURBO = "gpt-3.5-turbo"
|
27
|
+
GPT4 = "gpt-4o" # avoid deprecated gpt-4
|
28
|
+
GPT4_TURBO = "gpt-4-turbo"
|
29
|
+
GPT4o = "gpt-4o"
|
30
|
+
GPT4o_MINI = "gpt-4o-mini"
|
31
|
+
O1 = "o1"
|
32
|
+
O1_MINI = "o1-mini"
|
33
|
+
O3_MINI = "o3-mini"
|
34
|
+
O3 = "o3"
|
35
|
+
O4_MINI = "o4-mini"
|
36
|
+
GPT4_1 = "gpt-4.1"
|
37
|
+
GPT4_1_MINI = "gpt-4.1-mini"
|
38
|
+
GPT4_1_NANO = "gpt-4.1-nano"
|
39
|
+
|
40
|
+
|
41
|
+
class OpenAICompletionModel(str, Enum):
|
42
|
+
"""Enum for OpenAI Completion models"""
|
43
|
+
|
44
|
+
DAVINCI = "davinci-002"
|
45
|
+
BABBAGE = "babbage-002"
|
46
|
+
|
47
|
+
|
48
|
+
class AnthropicModel(ModelName):
|
49
|
+
"""Enum for Anthropic models"""
|
50
|
+
|
51
|
+
CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
|
52
|
+
CLAUDE_3_7_SONNET = "claude-3-7-sonnet-latest"
|
53
|
+
CLAUDE_3_OPUS = "claude-3-opus-latest"
|
54
|
+
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
|
55
|
+
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
|
56
|
+
|
57
|
+
|
58
|
+
class DeepSeekModel(ModelName):
|
59
|
+
"""Enum for DeepSeek models direct from DeepSeek API"""
|
60
|
+
|
61
|
+
DEEPSEEK = "deepseek/deepseek-chat"
|
62
|
+
DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
|
63
|
+
OPENROUTER_DEEPSEEK_R1 = "openrouter/deepseek/deepseek-r1"
|
64
|
+
|
65
|
+
|
66
|
+
class GeminiModel(ModelName):
|
67
|
+
"""Enum for Gemini models"""
|
68
|
+
|
69
|
+
GEMINI_1_5_FLASH = "gemini-1.5-flash"
|
70
|
+
GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
|
71
|
+
GEMINI_1_5_PRO = "gemini-1.5-pro"
|
72
|
+
GEMINI_2_5_PRO = "gemini-2.5-pro"
|
73
|
+
GEMINI_2_5_FLASH = "gemini-2.5-flash"
|
74
|
+
GEMINI_2_5_FLASH_LITE_PREVIEW = "gemini-2.5-flash-lite-preview-06-17"
|
75
|
+
GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
|
76
|
+
GEMINI_2_FLASH = "gemini-2.0-flash"
|
77
|
+
GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
|
78
|
+
GEMINI_2_FLASH_THINKING = "gemini-2.0-flash-thinking-exp"
|
79
|
+
|
80
|
+
|
81
|
+
class OpenAI_API_ParamInfo(BaseModel):
|
82
|
+
"""
|
83
|
+
Parameters exclusive to some models, when using OpenAI API
|
84
|
+
"""
|
85
|
+
|
86
|
+
# model-specific params at top level
|
87
|
+
params: Dict[str, List[str]] = dict(
|
88
|
+
reasoning_effort=[
|
89
|
+
OpenAIChatModel.O3_MINI.value,
|
90
|
+
],
|
91
|
+
)
|
92
|
+
# model-specific params in extra_body
|
93
|
+
extra_parameters: Dict[str, List[str]] = dict(
|
94
|
+
include_reasoning=[
|
95
|
+
DeepSeekModel.OPENROUTER_DEEPSEEK_R1.value,
|
96
|
+
]
|
97
|
+
)
|
98
|
+
|
99
|
+
|
100
|
+
class ModelInfo(BaseModel):
|
101
|
+
"""
|
102
|
+
Consolidated information about LLM, related to capacity, cost and API
|
103
|
+
idiosyncrasies. Reasonable defaults for all params in case there's no
|
104
|
+
specific info available.
|
105
|
+
"""
|
106
|
+
|
107
|
+
name: str = "unknown"
|
108
|
+
provider: ModelProvider = ModelProvider.UNKNOWN
|
109
|
+
context_length: int = 16_000
|
110
|
+
max_cot_tokens: int = 0 # max chain of thought (thinking) tokens where applicable
|
111
|
+
max_output_tokens: int = 8192 # Maximum number of output tokens - model dependent
|
112
|
+
input_cost_per_million: float = 0.0 # Cost in USD per million input tokens
|
113
|
+
cached_cost_per_million: float = 0.0 # Cost in USD per million cached tokens
|
114
|
+
output_cost_per_million: float = 0.0 # Cost in USD per million output tokens
|
115
|
+
allows_streaming: bool = True # Whether model supports streaming output
|
116
|
+
allows_system_message: bool = True # Whether model supports system messages
|
117
|
+
rename_params: Dict[str, str] = {} # Rename parameters for OpenAI API
|
118
|
+
unsupported_params: List[str] = []
|
119
|
+
has_structured_output: bool = False # Does model API support structured output?
|
120
|
+
has_tools: bool = True # Does model API support tools/function-calling?
|
121
|
+
needs_first_user_message: bool = False # Does API need first msg to be from user?
|
122
|
+
description: Optional[str] = None
|
123
|
+
|
124
|
+
|
125
|
+
# Model information registry
|
126
|
+
MODEL_INFO: Dict[str, ModelInfo] = {
|
127
|
+
# OpenAI Models
|
128
|
+
OpenAICompletionModel.DAVINCI.value: ModelInfo(
|
129
|
+
name=OpenAICompletionModel.DAVINCI.value,
|
130
|
+
provider=ModelProvider.OPENAI,
|
131
|
+
context_length=4096,
|
132
|
+
max_output_tokens=4096,
|
133
|
+
input_cost_per_million=2.0,
|
134
|
+
output_cost_per_million=2.0,
|
135
|
+
description="Davinci-002",
|
136
|
+
),
|
137
|
+
OpenAICompletionModel.BABBAGE.value: ModelInfo(
|
138
|
+
name=OpenAICompletionModel.BABBAGE.value,
|
139
|
+
provider=ModelProvider.OPENAI,
|
140
|
+
context_length=4096,
|
141
|
+
max_output_tokens=4096,
|
142
|
+
input_cost_per_million=0.40,
|
143
|
+
output_cost_per_million=0.40,
|
144
|
+
description="Babbage-002",
|
145
|
+
),
|
146
|
+
OpenAIChatModel.GPT3_5_TURBO.value: ModelInfo(
|
147
|
+
name=OpenAIChatModel.GPT3_5_TURBO.value,
|
148
|
+
provider=ModelProvider.OPENAI,
|
149
|
+
context_length=16_385,
|
150
|
+
max_output_tokens=4096,
|
151
|
+
input_cost_per_million=0.50,
|
152
|
+
output_cost_per_million=1.50,
|
153
|
+
description="GPT-3.5 Turbo",
|
154
|
+
),
|
155
|
+
OpenAIChatModel.GPT4.value: ModelInfo(
|
156
|
+
name=OpenAIChatModel.GPT4.value,
|
157
|
+
provider=ModelProvider.OPENAI,
|
158
|
+
context_length=8192,
|
159
|
+
max_output_tokens=8192,
|
160
|
+
input_cost_per_million=30.0,
|
161
|
+
output_cost_per_million=60.0,
|
162
|
+
description="GPT-4 (8K context)",
|
163
|
+
),
|
164
|
+
OpenAIChatModel.GPT4_TURBO.value: ModelInfo(
|
165
|
+
name=OpenAIChatModel.GPT4_TURBO.value,
|
166
|
+
provider=ModelProvider.OPENAI,
|
167
|
+
context_length=128_000,
|
168
|
+
max_output_tokens=4096,
|
169
|
+
input_cost_per_million=10.0,
|
170
|
+
output_cost_per_million=30.0,
|
171
|
+
description="GPT-4 Turbo",
|
172
|
+
),
|
173
|
+
OpenAIChatModel.GPT4_1_NANO.value: ModelInfo(
|
174
|
+
name=OpenAIChatModel.GPT4_1_NANO.value,
|
175
|
+
provider=ModelProvider.OPENAI,
|
176
|
+
context_length=1_047_576,
|
177
|
+
max_output_tokens=32_768,
|
178
|
+
input_cost_per_million=0.10,
|
179
|
+
cached_cost_per_million=0.025,
|
180
|
+
output_cost_per_million=0.40,
|
181
|
+
description="GPT-4.1",
|
182
|
+
),
|
183
|
+
OpenAIChatModel.GPT4_1_MINI.value: ModelInfo(
|
184
|
+
name=OpenAIChatModel.GPT4_1_MINI.value,
|
185
|
+
provider=ModelProvider.OPENAI,
|
186
|
+
context_length=1_047_576,
|
187
|
+
max_output_tokens=32_768,
|
188
|
+
input_cost_per_million=0.40,
|
189
|
+
cached_cost_per_million=0.10,
|
190
|
+
output_cost_per_million=1.60,
|
191
|
+
description="GPT-4.1 Mini",
|
192
|
+
),
|
193
|
+
OpenAIChatModel.GPT4_1.value: ModelInfo(
|
194
|
+
name=OpenAIChatModel.GPT4_1.value,
|
195
|
+
provider=ModelProvider.OPENAI,
|
196
|
+
context_length=1_047_576,
|
197
|
+
max_output_tokens=32_768,
|
198
|
+
input_cost_per_million=2.00,
|
199
|
+
cached_cost_per_million=0.50,
|
200
|
+
output_cost_per_million=8.00,
|
201
|
+
description="GPT-4.1",
|
202
|
+
),
|
203
|
+
OpenAIChatModel.GPT4o.value: ModelInfo(
|
204
|
+
name=OpenAIChatModel.GPT4o.value,
|
205
|
+
provider=ModelProvider.OPENAI,
|
206
|
+
context_length=128_000,
|
207
|
+
max_output_tokens=16_384,
|
208
|
+
input_cost_per_million=2.5,
|
209
|
+
cached_cost_per_million=1.25,
|
210
|
+
output_cost_per_million=10.0,
|
211
|
+
has_structured_output=True,
|
212
|
+
description="GPT-4o (128K context)",
|
213
|
+
),
|
214
|
+
OpenAIChatModel.GPT4o_MINI.value: ModelInfo(
|
215
|
+
name=OpenAIChatModel.GPT4o_MINI.value,
|
216
|
+
provider=ModelProvider.OPENAI,
|
217
|
+
context_length=128_000,
|
218
|
+
max_output_tokens=16_384,
|
219
|
+
input_cost_per_million=0.15,
|
220
|
+
cached_cost_per_million=0.075,
|
221
|
+
output_cost_per_million=0.60,
|
222
|
+
has_structured_output=True,
|
223
|
+
description="GPT-4o Mini",
|
224
|
+
),
|
225
|
+
OpenAIChatModel.O1.value: ModelInfo(
|
226
|
+
name=OpenAIChatModel.O1.value,
|
227
|
+
provider=ModelProvider.OPENAI,
|
228
|
+
context_length=200_000,
|
229
|
+
max_output_tokens=100_000,
|
230
|
+
input_cost_per_million=15.0,
|
231
|
+
cached_cost_per_million=7.50,
|
232
|
+
output_cost_per_million=60.0,
|
233
|
+
allows_streaming=True,
|
234
|
+
allows_system_message=False,
|
235
|
+
unsupported_params=["temperature"],
|
236
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
237
|
+
has_tools=False,
|
238
|
+
description="O1 Reasoning LM",
|
239
|
+
),
|
240
|
+
OpenAIChatModel.O3.value: ModelInfo(
|
241
|
+
name=OpenAIChatModel.O3.value,
|
242
|
+
provider=ModelProvider.OPENAI,
|
243
|
+
context_length=200_000,
|
244
|
+
max_output_tokens=100_000,
|
245
|
+
input_cost_per_million=2.0,
|
246
|
+
cached_cost_per_million=0.50,
|
247
|
+
output_cost_per_million=8.0,
|
248
|
+
allows_streaming=True,
|
249
|
+
allows_system_message=False,
|
250
|
+
unsupported_params=["temperature"],
|
251
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
252
|
+
has_tools=False,
|
253
|
+
description="O1 Reasoning LM",
|
254
|
+
),
|
255
|
+
OpenAIChatModel.O1_MINI.value: ModelInfo(
|
256
|
+
name=OpenAIChatModel.O1_MINI.value,
|
257
|
+
provider=ModelProvider.OPENAI,
|
258
|
+
context_length=128_000,
|
259
|
+
max_output_tokens=65_536,
|
260
|
+
input_cost_per_million=1.1,
|
261
|
+
cached_cost_per_million=0.55,
|
262
|
+
output_cost_per_million=4.4,
|
263
|
+
allows_streaming=False,
|
264
|
+
allows_system_message=False,
|
265
|
+
unsupported_params=["temperature", "stream"],
|
266
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
267
|
+
has_tools=False,
|
268
|
+
description="O1 Mini Reasoning LM",
|
269
|
+
),
|
270
|
+
OpenAIChatModel.O3_MINI.value: ModelInfo(
|
271
|
+
name=OpenAIChatModel.O3_MINI.value,
|
272
|
+
provider=ModelProvider.OPENAI,
|
273
|
+
context_length=200_000,
|
274
|
+
max_output_tokens=100_000,
|
275
|
+
input_cost_per_million=1.1,
|
276
|
+
cached_cost_per_million=0.55,
|
277
|
+
output_cost_per_million=4.4,
|
278
|
+
allows_streaming=False,
|
279
|
+
allows_system_message=False,
|
280
|
+
unsupported_params=["temperature", "stream"],
|
281
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
282
|
+
has_tools=False,
|
283
|
+
description="O3 Mini Reasoning LM",
|
284
|
+
),
|
285
|
+
OpenAIChatModel.O4_MINI.value: ModelInfo(
|
286
|
+
name=OpenAIChatModel.O4_MINI.value,
|
287
|
+
provider=ModelProvider.OPENAI,
|
288
|
+
context_length=200_000,
|
289
|
+
max_output_tokens=100_000,
|
290
|
+
input_cost_per_million=1.10,
|
291
|
+
cached_cost_per_million=0.275,
|
292
|
+
output_cost_per_million=4.40,
|
293
|
+
allows_streaming=False,
|
294
|
+
allows_system_message=False,
|
295
|
+
unsupported_params=["temperature", "stream"],
|
296
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
297
|
+
has_tools=False,
|
298
|
+
description="O3 Mini Reasoning LM",
|
299
|
+
),
|
300
|
+
# Anthropic Models
|
301
|
+
AnthropicModel.CLAUDE_3_5_SONNET.value: ModelInfo(
|
302
|
+
name=AnthropicModel.CLAUDE_3_5_SONNET.value,
|
303
|
+
provider=ModelProvider.ANTHROPIC,
|
304
|
+
context_length=200_000,
|
305
|
+
max_output_tokens=8192,
|
306
|
+
input_cost_per_million=3.0,
|
307
|
+
cached_cost_per_million=0.30,
|
308
|
+
output_cost_per_million=15.0,
|
309
|
+
description="Claude 3.5 Sonnet",
|
310
|
+
),
|
311
|
+
AnthropicModel.CLAUDE_3_OPUS.value: ModelInfo(
|
312
|
+
name=AnthropicModel.CLAUDE_3_OPUS.value,
|
313
|
+
provider=ModelProvider.ANTHROPIC,
|
314
|
+
context_length=200_000,
|
315
|
+
max_output_tokens=4096,
|
316
|
+
input_cost_per_million=15.0,
|
317
|
+
cached_cost_per_million=1.50,
|
318
|
+
output_cost_per_million=75.0,
|
319
|
+
description="Claude 3 Opus",
|
320
|
+
),
|
321
|
+
AnthropicModel.CLAUDE_3_SONNET.value: ModelInfo(
|
322
|
+
name=AnthropicModel.CLAUDE_3_SONNET.value,
|
323
|
+
provider=ModelProvider.ANTHROPIC,
|
324
|
+
context_length=200_000,
|
325
|
+
max_output_tokens=4096,
|
326
|
+
input_cost_per_million=3.0,
|
327
|
+
cached_cost_per_million=0.30,
|
328
|
+
output_cost_per_million=15.0,
|
329
|
+
description="Claude 3 Sonnet",
|
330
|
+
),
|
331
|
+
AnthropicModel.CLAUDE_3_HAIKU.value: ModelInfo(
|
332
|
+
name=AnthropicModel.CLAUDE_3_HAIKU.value,
|
333
|
+
provider=ModelProvider.ANTHROPIC,
|
334
|
+
context_length=200_000,
|
335
|
+
max_output_tokens=4096,
|
336
|
+
input_cost_per_million=0.25,
|
337
|
+
cached_cost_per_million=0.03,
|
338
|
+
output_cost_per_million=1.25,
|
339
|
+
description="Claude 3 Haiku",
|
340
|
+
),
|
341
|
+
# DeepSeek Models
|
342
|
+
DeepSeekModel.DEEPSEEK.value: ModelInfo(
|
343
|
+
name=DeepSeekModel.DEEPSEEK.value,
|
344
|
+
provider=ModelProvider.DEEPSEEK,
|
345
|
+
context_length=64_000,
|
346
|
+
max_output_tokens=8_000,
|
347
|
+
input_cost_per_million=0.27,
|
348
|
+
cached_cost_per_million=0.07,
|
349
|
+
output_cost_per_million=1.10,
|
350
|
+
description="DeepSeek Chat",
|
351
|
+
),
|
352
|
+
DeepSeekModel.DEEPSEEK_R1.value: ModelInfo(
|
353
|
+
name=DeepSeekModel.DEEPSEEK_R1.value,
|
354
|
+
provider=ModelProvider.DEEPSEEK,
|
355
|
+
context_length=64_000,
|
356
|
+
max_output_tokens=8_000,
|
357
|
+
input_cost_per_million=0.55,
|
358
|
+
cached_cost_per_million=0.14,
|
359
|
+
output_cost_per_million=2.19,
|
360
|
+
description="DeepSeek-R1 Reasoning LM",
|
361
|
+
),
|
362
|
+
# Gemini Models
|
363
|
+
GeminiModel.GEMINI_2_FLASH.value: ModelInfo(
|
364
|
+
name=GeminiModel.GEMINI_2_FLASH.value,
|
365
|
+
provider=ModelProvider.GOOGLE,
|
366
|
+
context_length=1_056_768,
|
367
|
+
max_output_tokens=8192,
|
368
|
+
input_cost_per_million=0.10,
|
369
|
+
cached_cost_per_million=0.025,
|
370
|
+
output_cost_per_million=0.40,
|
371
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
372
|
+
description="Gemini 2.0 Flash",
|
373
|
+
),
|
374
|
+
GeminiModel.GEMINI_2_FLASH_LITE.value: ModelInfo(
|
375
|
+
name=GeminiModel.GEMINI_2_FLASH_LITE.value,
|
376
|
+
provider=ModelProvider.GOOGLE,
|
377
|
+
context_length=1_056_768,
|
378
|
+
max_output_tokens=8192,
|
379
|
+
input_cost_per_million=0.075,
|
380
|
+
output_cost_per_million=0.30,
|
381
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
382
|
+
description="Gemini 2.0 Flash Lite Preview",
|
383
|
+
),
|
384
|
+
GeminiModel.GEMINI_1_5_FLASH.value: ModelInfo(
|
385
|
+
name=GeminiModel.GEMINI_1_5_FLASH.value,
|
386
|
+
provider=ModelProvider.GOOGLE,
|
387
|
+
context_length=1_056_768,
|
388
|
+
max_output_tokens=8192,
|
389
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
390
|
+
description="Gemini 1.5 Flash",
|
391
|
+
),
|
392
|
+
GeminiModel.GEMINI_1_5_FLASH_8B.value: ModelInfo(
|
393
|
+
name=GeminiModel.GEMINI_1_5_FLASH_8B.value,
|
394
|
+
provider=ModelProvider.GOOGLE,
|
395
|
+
context_length=1_000_000,
|
396
|
+
max_output_tokens=8192,
|
397
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
398
|
+
description="Gemini 1.5 Flash 8B",
|
399
|
+
),
|
400
|
+
GeminiModel.GEMINI_1_5_PRO.value: ModelInfo(
|
401
|
+
name=GeminiModel.GEMINI_1_5_PRO.value,
|
402
|
+
provider=ModelProvider.GOOGLE,
|
403
|
+
context_length=2_000_000,
|
404
|
+
max_output_tokens=8192,
|
405
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
406
|
+
description="Gemini 1.5 Pro",
|
407
|
+
),
|
408
|
+
GeminiModel.GEMINI_2_PRO.value: ModelInfo(
|
409
|
+
name=GeminiModel.GEMINI_2_PRO.value,
|
410
|
+
provider=ModelProvider.GOOGLE,
|
411
|
+
context_length=2_000_000,
|
412
|
+
max_output_tokens=8192,
|
413
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
414
|
+
description="Gemini 2 Pro Exp 02-05",
|
415
|
+
),
|
416
|
+
GeminiModel.GEMINI_2_FLASH_THINKING.value: ModelInfo(
|
417
|
+
name=GeminiModel.GEMINI_2_FLASH_THINKING.value,
|
418
|
+
provider=ModelProvider.GOOGLE,
|
419
|
+
context_length=1_000_000,
|
420
|
+
max_output_tokens=64_000,
|
421
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
422
|
+
description="Gemini 2.0 Flash Thinking",
|
423
|
+
),
|
424
|
+
# Gemini 2.5 Models
|
425
|
+
GeminiModel.GEMINI_2_5_PRO.value: ModelInfo(
|
426
|
+
name=GeminiModel.GEMINI_2_5_PRO.value,
|
427
|
+
provider=ModelProvider.GOOGLE,
|
428
|
+
context_length=1_048_576,
|
429
|
+
max_output_tokens=65_536,
|
430
|
+
input_cost_per_million=1.25,
|
431
|
+
cached_cost_per_million=0.31,
|
432
|
+
output_cost_per_million=10.0,
|
433
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
434
|
+
description="Gemini 2.5 Pro",
|
435
|
+
),
|
436
|
+
GeminiModel.GEMINI_2_5_FLASH.value: ModelInfo(
|
437
|
+
name=GeminiModel.GEMINI_2_5_FLASH.value,
|
438
|
+
provider=ModelProvider.GOOGLE,
|
439
|
+
context_length=1_048_576,
|
440
|
+
max_output_tokens=65_536,
|
441
|
+
input_cost_per_million=0.30,
|
442
|
+
cached_cost_per_million=0.075,
|
443
|
+
output_cost_per_million=2.50,
|
444
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
445
|
+
description="Gemini 2.5 Flash",
|
446
|
+
),
|
447
|
+
GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value: ModelInfo(
|
448
|
+
name=GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value,
|
449
|
+
provider=ModelProvider.GOOGLE,
|
450
|
+
context_length=65_536,
|
451
|
+
max_output_tokens=65_536,
|
452
|
+
input_cost_per_million=0.10,
|
453
|
+
cached_cost_per_million=0.025,
|
454
|
+
output_cost_per_million=0.40,
|
455
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
456
|
+
description="Gemini 2.5 Flash Lite Preview",
|
457
|
+
),
|
458
|
+
}
|
459
|
+
|
460
|
+
|
461
|
+
def get_model_info(
|
462
|
+
model: str | ModelName,
|
463
|
+
fallback_models: List[str] = [],
|
464
|
+
) -> ModelInfo:
|
465
|
+
"""Get model information by name or enum value"""
|
466
|
+
# Sequence of models to try, starting with the primary model
|
467
|
+
models_to_try = [model] + fallback_models
|
468
|
+
|
469
|
+
# Find the first model in the sequence that has info defined using next()
|
470
|
+
# on a generator expression that filters out None results from _get_model_info
|
471
|
+
found_info = next(
|
472
|
+
(info for m in models_to_try if (info := _get_model_info(m)) is not None),
|
473
|
+
None, # Default value if the iterator is exhausted (no valid info found)
|
474
|
+
)
|
475
|
+
|
476
|
+
# Return the found info, or a default ModelInfo if none was found
|
477
|
+
return found_info or ModelInfo()
|
478
|
+
|
479
|
+
|
480
|
+
def _get_model_info(model: str | ModelName) -> ModelInfo | None:
|
481
|
+
if isinstance(model, str):
|
482
|
+
return MODEL_INFO.get(model)
|
483
|
+
return MODEL_INFO.get(model.value)
|