langroid 0.58.3__py3-none-any.whl → 0.59.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. langroid/agent/base.py +39 -17
  2. langroid/agent/base.py-e +2216 -0
  3. langroid/agent/callbacks/chainlit.py +2 -1
  4. langroid/agent/chat_agent.py +73 -55
  5. langroid/agent/chat_agent.py-e +2086 -0
  6. langroid/agent/chat_document.py +7 -7
  7. langroid/agent/chat_document.py-e +513 -0
  8. langroid/agent/openai_assistant.py +9 -9
  9. langroid/agent/openai_assistant.py-e +882 -0
  10. langroid/agent/special/arangodb/arangodb_agent.py +10 -18
  11. langroid/agent/special/arangodb/arangodb_agent.py-e +648 -0
  12. langroid/agent/special/arangodb/tools.py +3 -3
  13. langroid/agent/special/doc_chat_agent.py +16 -14
  14. langroid/agent/special/lance_rag/critic_agent.py +2 -2
  15. langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
  16. langroid/agent/special/lance_tools.py +6 -5
  17. langroid/agent/special/lance_tools.py-e +61 -0
  18. langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
  19. langroid/agent/special/neo4j/neo4j_chat_agent.py-e +430 -0
  20. langroid/agent/special/relevance_extractor_agent.py +1 -1
  21. langroid/agent/special/sql/sql_chat_agent.py +11 -3
  22. langroid/agent/task.py +9 -87
  23. langroid/agent/task.py-e +2418 -0
  24. langroid/agent/tool_message.py +33 -17
  25. langroid/agent/tool_message.py-e +400 -0
  26. langroid/agent/tools/file_tools.py +4 -2
  27. langroid/agent/tools/file_tools.py-e +234 -0
  28. langroid/agent/tools/mcp/fastmcp_client.py +19 -6
  29. langroid/agent/tools/mcp/fastmcp_client.py-e +584 -0
  30. langroid/agent/tools/orchestration.py +22 -17
  31. langroid/agent/tools/orchestration.py-e +301 -0
  32. langroid/agent/tools/recipient_tool.py +3 -3
  33. langroid/agent/tools/task_tool.py +22 -16
  34. langroid/agent/tools/task_tool.py-e +249 -0
  35. langroid/agent/xml_tool_message.py +90 -35
  36. langroid/agent/xml_tool_message.py-e +392 -0
  37. langroid/cachedb/base.py +1 -1
  38. langroid/embedding_models/base.py +2 -2
  39. langroid/embedding_models/models.py +3 -7
  40. langroid/embedding_models/models.py-e +563 -0
  41. langroid/exceptions.py +4 -1
  42. langroid/language_models/azure_openai.py +2 -2
  43. langroid/language_models/azure_openai.py-e +134 -0
  44. langroid/language_models/base.py +6 -4
  45. langroid/language_models/base.py-e +812 -0
  46. langroid/language_models/config.py +2 -4
  47. langroid/language_models/config.py-e +18 -0
  48. langroid/language_models/model_info.py +9 -1
  49. langroid/language_models/model_info.py-e +483 -0
  50. langroid/language_models/openai_gpt.py +53 -18
  51. langroid/language_models/openai_gpt.py-e +2280 -0
  52. langroid/language_models/provider_params.py +3 -22
  53. langroid/language_models/provider_params.py-e +153 -0
  54. langroid/mytypes.py +11 -4
  55. langroid/mytypes.py-e +132 -0
  56. langroid/parsing/code_parser.py +1 -1
  57. langroid/parsing/file_attachment.py +1 -1
  58. langroid/parsing/file_attachment.py-e +246 -0
  59. langroid/parsing/md_parser.py +14 -4
  60. langroid/parsing/md_parser.py-e +574 -0
  61. langroid/parsing/parser.py +22 -7
  62. langroid/parsing/parser.py-e +410 -0
  63. langroid/parsing/repo_loader.py +3 -1
  64. langroid/parsing/repo_loader.py-e +812 -0
  65. langroid/parsing/search.py +1 -1
  66. langroid/parsing/url_loader.py +17 -51
  67. langroid/parsing/url_loader.py-e +683 -0
  68. langroid/parsing/urls.py +5 -4
  69. langroid/parsing/urls.py-e +279 -0
  70. langroid/prompts/prompts_config.py +1 -1
  71. langroid/pydantic_v1/__init__.py +56 -6
  72. langroid/pydantic_v1/__init__.py-e +36 -0
  73. langroid/pydantic_v1/main.py +10 -4
  74. langroid/pydantic_v1/main.py-e +11 -0
  75. langroid/utils/configuration.py +13 -11
  76. langroid/utils/configuration.py-e +141 -0
  77. langroid/utils/constants.py +1 -1
  78. langroid/utils/constants.py-e +32 -0
  79. langroid/utils/globals.py +21 -5
  80. langroid/utils/globals.py-e +49 -0
  81. langroid/utils/html_logger.py +2 -1
  82. langroid/utils/html_logger.py-e +825 -0
  83. langroid/utils/object_registry.py +1 -1
  84. langroid/utils/object_registry.py-e +66 -0
  85. langroid/utils/pydantic_utils.py +55 -28
  86. langroid/utils/pydantic_utils.py-e +602 -0
  87. langroid/utils/types.py +2 -2
  88. langroid/utils/types.py-e +113 -0
  89. langroid/vector_store/base.py +3 -3
  90. langroid/vector_store/lancedb.py +5 -5
  91. langroid/vector_store/lancedb.py-e +404 -0
  92. langroid/vector_store/meilisearch.py +2 -2
  93. langroid/vector_store/pineconedb.py +4 -4
  94. langroid/vector_store/pineconedb.py-e +427 -0
  95. langroid/vector_store/postgres.py +1 -1
  96. langroid/vector_store/qdrantdb.py +3 -3
  97. langroid/vector_store/weaviatedb.py +1 -1
  98. {langroid-0.58.3.dist-info → langroid-0.59.0b2.dist-info}/METADATA +3 -2
  99. langroid-0.59.0b2.dist-info/RECORD +181 -0
  100. langroid/agent/special/doc_chat_task.py +0 -0
  101. langroid/mcp/__init__.py +0 -1
  102. langroid/mcp/server/__init__.py +0 -1
  103. langroid-0.58.3.dist-info/RECORD +0 -145
  104. {langroid-0.58.3.dist-info → langroid-0.59.0b2.dist-info}/WHEEL +0 -0
  105. {langroid-0.58.3.dist-info → langroid-0.59.0b2.dist-info}/licenses/LICENSE +0 -0
@@ -1,12 +1,10 @@
1
- from langroid.pydantic_v1 import BaseSettings
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
2
 
3
3
 
4
4
  class PromptFormatterConfig(BaseSettings):
5
5
  type: str = "llama2"
6
6
 
7
- class Config:
8
- env_prefix = "FORMAT_"
9
- case_sensitive = False
7
+ model_config = SettingsConfigDict(env_prefix="FORMAT_", case_sensitive=False)
10
8
 
11
9
 
12
10
  class Llama2FormatterConfig(PromptFormatterConfig):
@@ -0,0 +1,18 @@
1
+ from pydantic_settings import BaseSettings
2
+
3
+ from pydantic import ConfigDict
4
+
5
+
6
+ class PromptFormatterConfig(BaseSettings):
7
+ type: str = "llama2"
8
+
9
+ model_config = ConfigDict(env_prefix="FORMAT_", case_sensitive=False)
10
+
11
+
12
+ class Llama2FormatterConfig(PromptFormatterConfig):
13
+ use_bos_eos: bool = False
14
+
15
+
16
+ class HFPromptFormatterConfig(PromptFormatterConfig):
17
+ type: str = "hf"
18
+ model_name: str
@@ -1,7 +1,7 @@
1
1
  from enum import Enum
2
2
  from typing import Dict, List, Optional
3
3
 
4
- from langroid.pydantic_v1 import BaseModel
4
+ from pydantic import BaseModel
5
5
 
6
6
 
7
7
  class ModelProvider(str, Enum):
@@ -173,6 +173,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
173
173
  OpenAIChatModel.GPT4_1_NANO.value: ModelInfo(
174
174
  name=OpenAIChatModel.GPT4_1_NANO.value,
175
175
  provider=ModelProvider.OPENAI,
176
+ has_structured_output=True,
176
177
  context_length=1_047_576,
177
178
  max_output_tokens=32_768,
178
179
  input_cost_per_million=0.10,
@@ -183,6 +184,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
183
184
  OpenAIChatModel.GPT4_1_MINI.value: ModelInfo(
184
185
  name=OpenAIChatModel.GPT4_1_MINI.value,
185
186
  provider=ModelProvider.OPENAI,
187
+ has_structured_output=True,
186
188
  context_length=1_047_576,
187
189
  max_output_tokens=32_768,
188
190
  input_cost_per_million=0.40,
@@ -193,6 +195,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
193
195
  OpenAIChatModel.GPT4_1.value: ModelInfo(
194
196
  name=OpenAIChatModel.GPT4_1.value,
195
197
  provider=ModelProvider.OPENAI,
198
+ has_structured_output=True,
196
199
  context_length=1_047_576,
197
200
  max_output_tokens=32_768,
198
201
  input_cost_per_million=2.00,
@@ -232,6 +235,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
232
235
  output_cost_per_million=60.0,
233
236
  allows_streaming=True,
234
237
  allows_system_message=False,
238
+ has_structured_output=True,
235
239
  unsupported_params=["temperature"],
236
240
  rename_params={"max_tokens": "max_completion_tokens"},
237
241
  has_tools=False,
@@ -247,6 +251,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
247
251
  output_cost_per_million=8.0,
248
252
  allows_streaming=True,
249
253
  allows_system_message=False,
254
+ has_structured_output=True,
250
255
  unsupported_params=["temperature"],
251
256
  rename_params={"max_tokens": "max_completion_tokens"},
252
257
  has_tools=False,
@@ -262,6 +267,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
262
267
  output_cost_per_million=4.4,
263
268
  allows_streaming=False,
264
269
  allows_system_message=False,
270
+ has_structured_output=True,
265
271
  unsupported_params=["temperature", "stream"],
266
272
  rename_params={"max_tokens": "max_completion_tokens"},
267
273
  has_tools=False,
@@ -277,6 +283,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
277
283
  output_cost_per_million=4.4,
278
284
  allows_streaming=False,
279
285
  allows_system_message=False,
286
+ has_structured_output=True,
280
287
  unsupported_params=["temperature", "stream"],
281
288
  rename_params={"max_tokens": "max_completion_tokens"},
282
289
  has_tools=False,
@@ -292,6 +299,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
292
299
  output_cost_per_million=4.40,
293
300
  allows_streaming=False,
294
301
  allows_system_message=False,
302
+ has_structured_output=True,
295
303
  unsupported_params=["temperature", "stream"],
296
304
  rename_params={"max_tokens": "max_completion_tokens"},
297
305
  has_tools=False,
@@ -0,0 +1,483 @@
1
+ from enum import Enum
2
+ from typing import Dict, List, Optional
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class ModelProvider(str, Enum):
8
+ """Enum for model providers"""
9
+
10
+ OPENAI = "openai"
11
+ ANTHROPIC = "anthropic"
12
+ DEEPSEEK = "deepseek"
13
+ GOOGLE = "google"
14
+ UNKNOWN = "unknown"
15
+
16
+
17
+ class ModelName(str, Enum):
18
+ """Parent class for all model name enums"""
19
+
20
+ pass
21
+
22
+
23
+ class OpenAIChatModel(ModelName):
24
+ """Enum for OpenAI Chat models"""
25
+
26
+ GPT3_5_TURBO = "gpt-3.5-turbo"
27
+ GPT4 = "gpt-4o" # avoid deprecated gpt-4
28
+ GPT4_TURBO = "gpt-4-turbo"
29
+ GPT4o = "gpt-4o"
30
+ GPT4o_MINI = "gpt-4o-mini"
31
+ O1 = "o1"
32
+ O1_MINI = "o1-mini"
33
+ O3_MINI = "o3-mini"
34
+ O3 = "o3"
35
+ O4_MINI = "o4-mini"
36
+ GPT4_1 = "gpt-4.1"
37
+ GPT4_1_MINI = "gpt-4.1-mini"
38
+ GPT4_1_NANO = "gpt-4.1-nano"
39
+
40
+
41
+ class OpenAICompletionModel(str, Enum):
42
+ """Enum for OpenAI Completion models"""
43
+
44
+ DAVINCI = "davinci-002"
45
+ BABBAGE = "babbage-002"
46
+
47
+
48
+ class AnthropicModel(ModelName):
49
+ """Enum for Anthropic models"""
50
+
51
+ CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
52
+ CLAUDE_3_7_SONNET = "claude-3-7-sonnet-latest"
53
+ CLAUDE_3_OPUS = "claude-3-opus-latest"
54
+ CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
55
+ CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
56
+
57
+
58
+ class DeepSeekModel(ModelName):
59
+ """Enum for DeepSeek models direct from DeepSeek API"""
60
+
61
+ DEEPSEEK = "deepseek/deepseek-chat"
62
+ DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
63
+ OPENROUTER_DEEPSEEK_R1 = "openrouter/deepseek/deepseek-r1"
64
+
65
+
66
+ class GeminiModel(ModelName):
67
+ """Enum for Gemini models"""
68
+
69
+ GEMINI_1_5_FLASH = "gemini-1.5-flash"
70
+ GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
71
+ GEMINI_1_5_PRO = "gemini-1.5-pro"
72
+ GEMINI_2_5_PRO = "gemini-2.5-pro"
73
+ GEMINI_2_5_FLASH = "gemini-2.5-flash"
74
+ GEMINI_2_5_FLASH_LITE_PREVIEW = "gemini-2.5-flash-lite-preview-06-17"
75
+ GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
76
+ GEMINI_2_FLASH = "gemini-2.0-flash"
77
+ GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
78
+ GEMINI_2_FLASH_THINKING = "gemini-2.0-flash-thinking-exp"
79
+
80
+
81
+ class OpenAI_API_ParamInfo(BaseModel):
82
+ """
83
+ Parameters exclusive to some models, when using OpenAI API
84
+ """
85
+
86
+ # model-specific params at top level
87
+ params: Dict[str, List[str]] = dict(
88
+ reasoning_effort=[
89
+ OpenAIChatModel.O3_MINI.value,
90
+ ],
91
+ )
92
+ # model-specific params in extra_body
93
+ extra_parameters: Dict[str, List[str]] = dict(
94
+ include_reasoning=[
95
+ DeepSeekModel.OPENROUTER_DEEPSEEK_R1.value,
96
+ ]
97
+ )
98
+
99
+
100
+ class ModelInfo(BaseModel):
101
+ """
102
+ Consolidated information about LLM, related to capacity, cost and API
103
+ idiosyncrasies. Reasonable defaults for all params in case there's no
104
+ specific info available.
105
+ """
106
+
107
+ name: str = "unknown"
108
+ provider: ModelProvider = ModelProvider.UNKNOWN
109
+ context_length: int = 16_000
110
+ max_cot_tokens: int = 0 # max chain of thought (thinking) tokens where applicable
111
+ max_output_tokens: int = 8192 # Maximum number of output tokens - model dependent
112
+ input_cost_per_million: float = 0.0 # Cost in USD per million input tokens
113
+ cached_cost_per_million: float = 0.0 # Cost in USD per million cached tokens
114
+ output_cost_per_million: float = 0.0 # Cost in USD per million output tokens
115
+ allows_streaming: bool = True # Whether model supports streaming output
116
+ allows_system_message: bool = True # Whether model supports system messages
117
+ rename_params: Dict[str, str] = {} # Rename parameters for OpenAI API
118
+ unsupported_params: List[str] = []
119
+ has_structured_output: bool = False # Does model API support structured output?
120
+ has_tools: bool = True # Does model API support tools/function-calling?
121
+ needs_first_user_message: bool = False # Does API need first msg to be from user?
122
+ description: Optional[str] = None
123
+
124
+
125
+ # Model information registry
126
+ MODEL_INFO: Dict[str, ModelInfo] = {
127
+ # OpenAI Models
128
+ OpenAICompletionModel.DAVINCI.value: ModelInfo(
129
+ name=OpenAICompletionModel.DAVINCI.value,
130
+ provider=ModelProvider.OPENAI,
131
+ context_length=4096,
132
+ max_output_tokens=4096,
133
+ input_cost_per_million=2.0,
134
+ output_cost_per_million=2.0,
135
+ description="Davinci-002",
136
+ ),
137
+ OpenAICompletionModel.BABBAGE.value: ModelInfo(
138
+ name=OpenAICompletionModel.BABBAGE.value,
139
+ provider=ModelProvider.OPENAI,
140
+ context_length=4096,
141
+ max_output_tokens=4096,
142
+ input_cost_per_million=0.40,
143
+ output_cost_per_million=0.40,
144
+ description="Babbage-002",
145
+ ),
146
+ OpenAIChatModel.GPT3_5_TURBO.value: ModelInfo(
147
+ name=OpenAIChatModel.GPT3_5_TURBO.value,
148
+ provider=ModelProvider.OPENAI,
149
+ context_length=16_385,
150
+ max_output_tokens=4096,
151
+ input_cost_per_million=0.50,
152
+ output_cost_per_million=1.50,
153
+ description="GPT-3.5 Turbo",
154
+ ),
155
+ OpenAIChatModel.GPT4.value: ModelInfo(
156
+ name=OpenAIChatModel.GPT4.value,
157
+ provider=ModelProvider.OPENAI,
158
+ context_length=8192,
159
+ max_output_tokens=8192,
160
+ input_cost_per_million=30.0,
161
+ output_cost_per_million=60.0,
162
+ description="GPT-4 (8K context)",
163
+ ),
164
+ OpenAIChatModel.GPT4_TURBO.value: ModelInfo(
165
+ name=OpenAIChatModel.GPT4_TURBO.value,
166
+ provider=ModelProvider.OPENAI,
167
+ context_length=128_000,
168
+ max_output_tokens=4096,
169
+ input_cost_per_million=10.0,
170
+ output_cost_per_million=30.0,
171
+ description="GPT-4 Turbo",
172
+ ),
173
+ OpenAIChatModel.GPT4_1_NANO.value: ModelInfo(
174
+ name=OpenAIChatModel.GPT4_1_NANO.value,
175
+ provider=ModelProvider.OPENAI,
176
+ context_length=1_047_576,
177
+ max_output_tokens=32_768,
178
+ input_cost_per_million=0.10,
179
+ cached_cost_per_million=0.025,
180
+ output_cost_per_million=0.40,
181
+ description="GPT-4.1",
182
+ ),
183
+ OpenAIChatModel.GPT4_1_MINI.value: ModelInfo(
184
+ name=OpenAIChatModel.GPT4_1_MINI.value,
185
+ provider=ModelProvider.OPENAI,
186
+ context_length=1_047_576,
187
+ max_output_tokens=32_768,
188
+ input_cost_per_million=0.40,
189
+ cached_cost_per_million=0.10,
190
+ output_cost_per_million=1.60,
191
+ description="GPT-4.1 Mini",
192
+ ),
193
+ OpenAIChatModel.GPT4_1.value: ModelInfo(
194
+ name=OpenAIChatModel.GPT4_1.value,
195
+ provider=ModelProvider.OPENAI,
196
+ context_length=1_047_576,
197
+ max_output_tokens=32_768,
198
+ input_cost_per_million=2.00,
199
+ cached_cost_per_million=0.50,
200
+ output_cost_per_million=8.00,
201
+ description="GPT-4.1",
202
+ ),
203
+ OpenAIChatModel.GPT4o.value: ModelInfo(
204
+ name=OpenAIChatModel.GPT4o.value,
205
+ provider=ModelProvider.OPENAI,
206
+ context_length=128_000,
207
+ max_output_tokens=16_384,
208
+ input_cost_per_million=2.5,
209
+ cached_cost_per_million=1.25,
210
+ output_cost_per_million=10.0,
211
+ has_structured_output=True,
212
+ description="GPT-4o (128K context)",
213
+ ),
214
+ OpenAIChatModel.GPT4o_MINI.value: ModelInfo(
215
+ name=OpenAIChatModel.GPT4o_MINI.value,
216
+ provider=ModelProvider.OPENAI,
217
+ context_length=128_000,
218
+ max_output_tokens=16_384,
219
+ input_cost_per_million=0.15,
220
+ cached_cost_per_million=0.075,
221
+ output_cost_per_million=0.60,
222
+ has_structured_output=True,
223
+ description="GPT-4o Mini",
224
+ ),
225
+ OpenAIChatModel.O1.value: ModelInfo(
226
+ name=OpenAIChatModel.O1.value,
227
+ provider=ModelProvider.OPENAI,
228
+ context_length=200_000,
229
+ max_output_tokens=100_000,
230
+ input_cost_per_million=15.0,
231
+ cached_cost_per_million=7.50,
232
+ output_cost_per_million=60.0,
233
+ allows_streaming=True,
234
+ allows_system_message=False,
235
+ unsupported_params=["temperature"],
236
+ rename_params={"max_tokens": "max_completion_tokens"},
237
+ has_tools=False,
238
+ description="O1 Reasoning LM",
239
+ ),
240
+ OpenAIChatModel.O3.value: ModelInfo(
241
+ name=OpenAIChatModel.O3.value,
242
+ provider=ModelProvider.OPENAI,
243
+ context_length=200_000,
244
+ max_output_tokens=100_000,
245
+ input_cost_per_million=2.0,
246
+ cached_cost_per_million=0.50,
247
+ output_cost_per_million=8.0,
248
+ allows_streaming=True,
249
+ allows_system_message=False,
250
+ unsupported_params=["temperature"],
251
+ rename_params={"max_tokens": "max_completion_tokens"},
252
+ has_tools=False,
253
+ description="O1 Reasoning LM",
254
+ ),
255
+ OpenAIChatModel.O1_MINI.value: ModelInfo(
256
+ name=OpenAIChatModel.O1_MINI.value,
257
+ provider=ModelProvider.OPENAI,
258
+ context_length=128_000,
259
+ max_output_tokens=65_536,
260
+ input_cost_per_million=1.1,
261
+ cached_cost_per_million=0.55,
262
+ output_cost_per_million=4.4,
263
+ allows_streaming=False,
264
+ allows_system_message=False,
265
+ unsupported_params=["temperature", "stream"],
266
+ rename_params={"max_tokens": "max_completion_tokens"},
267
+ has_tools=False,
268
+ description="O1 Mini Reasoning LM",
269
+ ),
270
+ OpenAIChatModel.O3_MINI.value: ModelInfo(
271
+ name=OpenAIChatModel.O3_MINI.value,
272
+ provider=ModelProvider.OPENAI,
273
+ context_length=200_000,
274
+ max_output_tokens=100_000,
275
+ input_cost_per_million=1.1,
276
+ cached_cost_per_million=0.55,
277
+ output_cost_per_million=4.4,
278
+ allows_streaming=False,
279
+ allows_system_message=False,
280
+ unsupported_params=["temperature", "stream"],
281
+ rename_params={"max_tokens": "max_completion_tokens"},
282
+ has_tools=False,
283
+ description="O3 Mini Reasoning LM",
284
+ ),
285
+ OpenAIChatModel.O4_MINI.value: ModelInfo(
286
+ name=OpenAIChatModel.O4_MINI.value,
287
+ provider=ModelProvider.OPENAI,
288
+ context_length=200_000,
289
+ max_output_tokens=100_000,
290
+ input_cost_per_million=1.10,
291
+ cached_cost_per_million=0.275,
292
+ output_cost_per_million=4.40,
293
+ allows_streaming=False,
294
+ allows_system_message=False,
295
+ unsupported_params=["temperature", "stream"],
296
+ rename_params={"max_tokens": "max_completion_tokens"},
297
+ has_tools=False,
298
+ description="O3 Mini Reasoning LM",
299
+ ),
300
+ # Anthropic Models
301
+ AnthropicModel.CLAUDE_3_5_SONNET.value: ModelInfo(
302
+ name=AnthropicModel.CLAUDE_3_5_SONNET.value,
303
+ provider=ModelProvider.ANTHROPIC,
304
+ context_length=200_000,
305
+ max_output_tokens=8192,
306
+ input_cost_per_million=3.0,
307
+ cached_cost_per_million=0.30,
308
+ output_cost_per_million=15.0,
309
+ description="Claude 3.5 Sonnet",
310
+ ),
311
+ AnthropicModel.CLAUDE_3_OPUS.value: ModelInfo(
312
+ name=AnthropicModel.CLAUDE_3_OPUS.value,
313
+ provider=ModelProvider.ANTHROPIC,
314
+ context_length=200_000,
315
+ max_output_tokens=4096,
316
+ input_cost_per_million=15.0,
317
+ cached_cost_per_million=1.50,
318
+ output_cost_per_million=75.0,
319
+ description="Claude 3 Opus",
320
+ ),
321
+ AnthropicModel.CLAUDE_3_SONNET.value: ModelInfo(
322
+ name=AnthropicModel.CLAUDE_3_SONNET.value,
323
+ provider=ModelProvider.ANTHROPIC,
324
+ context_length=200_000,
325
+ max_output_tokens=4096,
326
+ input_cost_per_million=3.0,
327
+ cached_cost_per_million=0.30,
328
+ output_cost_per_million=15.0,
329
+ description="Claude 3 Sonnet",
330
+ ),
331
+ AnthropicModel.CLAUDE_3_HAIKU.value: ModelInfo(
332
+ name=AnthropicModel.CLAUDE_3_HAIKU.value,
333
+ provider=ModelProvider.ANTHROPIC,
334
+ context_length=200_000,
335
+ max_output_tokens=4096,
336
+ input_cost_per_million=0.25,
337
+ cached_cost_per_million=0.03,
338
+ output_cost_per_million=1.25,
339
+ description="Claude 3 Haiku",
340
+ ),
341
+ # DeepSeek Models
342
+ DeepSeekModel.DEEPSEEK.value: ModelInfo(
343
+ name=DeepSeekModel.DEEPSEEK.value,
344
+ provider=ModelProvider.DEEPSEEK,
345
+ context_length=64_000,
346
+ max_output_tokens=8_000,
347
+ input_cost_per_million=0.27,
348
+ cached_cost_per_million=0.07,
349
+ output_cost_per_million=1.10,
350
+ description="DeepSeek Chat",
351
+ ),
352
+ DeepSeekModel.DEEPSEEK_R1.value: ModelInfo(
353
+ name=DeepSeekModel.DEEPSEEK_R1.value,
354
+ provider=ModelProvider.DEEPSEEK,
355
+ context_length=64_000,
356
+ max_output_tokens=8_000,
357
+ input_cost_per_million=0.55,
358
+ cached_cost_per_million=0.14,
359
+ output_cost_per_million=2.19,
360
+ description="DeepSeek-R1 Reasoning LM",
361
+ ),
362
+ # Gemini Models
363
+ GeminiModel.GEMINI_2_FLASH.value: ModelInfo(
364
+ name=GeminiModel.GEMINI_2_FLASH.value,
365
+ provider=ModelProvider.GOOGLE,
366
+ context_length=1_056_768,
367
+ max_output_tokens=8192,
368
+ input_cost_per_million=0.10,
369
+ cached_cost_per_million=0.025,
370
+ output_cost_per_million=0.40,
371
+ rename_params={"max_tokens": "max_completion_tokens"},
372
+ description="Gemini 2.0 Flash",
373
+ ),
374
+ GeminiModel.GEMINI_2_FLASH_LITE.value: ModelInfo(
375
+ name=GeminiModel.GEMINI_2_FLASH_LITE.value,
376
+ provider=ModelProvider.GOOGLE,
377
+ context_length=1_056_768,
378
+ max_output_tokens=8192,
379
+ input_cost_per_million=0.075,
380
+ output_cost_per_million=0.30,
381
+ rename_params={"max_tokens": "max_completion_tokens"},
382
+ description="Gemini 2.0 Flash Lite Preview",
383
+ ),
384
+ GeminiModel.GEMINI_1_5_FLASH.value: ModelInfo(
385
+ name=GeminiModel.GEMINI_1_5_FLASH.value,
386
+ provider=ModelProvider.GOOGLE,
387
+ context_length=1_056_768,
388
+ max_output_tokens=8192,
389
+ rename_params={"max_tokens": "max_completion_tokens"},
390
+ description="Gemini 1.5 Flash",
391
+ ),
392
+ GeminiModel.GEMINI_1_5_FLASH_8B.value: ModelInfo(
393
+ name=GeminiModel.GEMINI_1_5_FLASH_8B.value,
394
+ provider=ModelProvider.GOOGLE,
395
+ context_length=1_000_000,
396
+ max_output_tokens=8192,
397
+ rename_params={"max_tokens": "max_completion_tokens"},
398
+ description="Gemini 1.5 Flash 8B",
399
+ ),
400
+ GeminiModel.GEMINI_1_5_PRO.value: ModelInfo(
401
+ name=GeminiModel.GEMINI_1_5_PRO.value,
402
+ provider=ModelProvider.GOOGLE,
403
+ context_length=2_000_000,
404
+ max_output_tokens=8192,
405
+ rename_params={"max_tokens": "max_completion_tokens"},
406
+ description="Gemini 1.5 Pro",
407
+ ),
408
+ GeminiModel.GEMINI_2_PRO.value: ModelInfo(
409
+ name=GeminiModel.GEMINI_2_PRO.value,
410
+ provider=ModelProvider.GOOGLE,
411
+ context_length=2_000_000,
412
+ max_output_tokens=8192,
413
+ rename_params={"max_tokens": "max_completion_tokens"},
414
+ description="Gemini 2 Pro Exp 02-05",
415
+ ),
416
+ GeminiModel.GEMINI_2_FLASH_THINKING.value: ModelInfo(
417
+ name=GeminiModel.GEMINI_2_FLASH_THINKING.value,
418
+ provider=ModelProvider.GOOGLE,
419
+ context_length=1_000_000,
420
+ max_output_tokens=64_000,
421
+ rename_params={"max_tokens": "max_completion_tokens"},
422
+ description="Gemini 2.0 Flash Thinking",
423
+ ),
424
+ # Gemini 2.5 Models
425
+ GeminiModel.GEMINI_2_5_PRO.value: ModelInfo(
426
+ name=GeminiModel.GEMINI_2_5_PRO.value,
427
+ provider=ModelProvider.GOOGLE,
428
+ context_length=1_048_576,
429
+ max_output_tokens=65_536,
430
+ input_cost_per_million=1.25,
431
+ cached_cost_per_million=0.31,
432
+ output_cost_per_million=10.0,
433
+ rename_params={"max_tokens": "max_completion_tokens"},
434
+ description="Gemini 2.5 Pro",
435
+ ),
436
+ GeminiModel.GEMINI_2_5_FLASH.value: ModelInfo(
437
+ name=GeminiModel.GEMINI_2_5_FLASH.value,
438
+ provider=ModelProvider.GOOGLE,
439
+ context_length=1_048_576,
440
+ max_output_tokens=65_536,
441
+ input_cost_per_million=0.30,
442
+ cached_cost_per_million=0.075,
443
+ output_cost_per_million=2.50,
444
+ rename_params={"max_tokens": "max_completion_tokens"},
445
+ description="Gemini 2.5 Flash",
446
+ ),
447
+ GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value: ModelInfo(
448
+ name=GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value,
449
+ provider=ModelProvider.GOOGLE,
450
+ context_length=65_536,
451
+ max_output_tokens=65_536,
452
+ input_cost_per_million=0.10,
453
+ cached_cost_per_million=0.025,
454
+ output_cost_per_million=0.40,
455
+ rename_params={"max_tokens": "max_completion_tokens"},
456
+ description="Gemini 2.5 Flash Lite Preview",
457
+ ),
458
+ }
459
+
460
+
461
+ def get_model_info(
462
+ model: str | ModelName,
463
+ fallback_models: List[str] = [],
464
+ ) -> ModelInfo:
465
+ """Get model information by name or enum value"""
466
+ # Sequence of models to try, starting with the primary model
467
+ models_to_try = [model] + fallback_models
468
+
469
+ # Find the first model in the sequence that has info defined using next()
470
+ # on a generator expression that filters out None results from _get_model_info
471
+ found_info = next(
472
+ (info for m in models_to_try if (info := _get_model_info(m)) is not None),
473
+ None, # Default value if the iterator is exhausted (no valid info found)
474
+ )
475
+
476
+ # Return the found info, or a default ModelInfo if none was found
477
+ return found_info or ModelInfo()
478
+
479
+
480
+ def _get_model_info(model: str | ModelName) -> ModelInfo | None:
481
+ if isinstance(model, str):
482
+ return MODEL_INFO.get(model)
483
+ return MODEL_INFO.get(model.value)