langroid 0.58.2__py3-none-any.whl → 0.59.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. langroid/agent/base.py +39 -17
  2. langroid/agent/base.py-e +2216 -0
  3. langroid/agent/callbacks/chainlit.py +2 -1
  4. langroid/agent/chat_agent.py +73 -55
  5. langroid/agent/chat_agent.py-e +2086 -0
  6. langroid/agent/chat_document.py +7 -7
  7. langroid/agent/chat_document.py-e +513 -0
  8. langroid/agent/openai_assistant.py +9 -9
  9. langroid/agent/openai_assistant.py-e +882 -0
  10. langroid/agent/special/arangodb/arangodb_agent.py +10 -18
  11. langroid/agent/special/arangodb/arangodb_agent.py-e +648 -0
  12. langroid/agent/special/arangodb/tools.py +3 -3
  13. langroid/agent/special/doc_chat_agent.py +16 -14
  14. langroid/agent/special/lance_rag/critic_agent.py +2 -2
  15. langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
  16. langroid/agent/special/lance_tools.py +6 -5
  17. langroid/agent/special/lance_tools.py-e +61 -0
  18. langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
  19. langroid/agent/special/neo4j/neo4j_chat_agent.py-e +430 -0
  20. langroid/agent/special/relevance_extractor_agent.py +1 -1
  21. langroid/agent/special/sql/sql_chat_agent.py +11 -3
  22. langroid/agent/task.py +9 -87
  23. langroid/agent/task.py-e +2418 -0
  24. langroid/agent/tool_message.py +33 -17
  25. langroid/agent/tool_message.py-e +400 -0
  26. langroid/agent/tools/file_tools.py +4 -2
  27. langroid/agent/tools/file_tools.py-e +234 -0
  28. langroid/agent/tools/mcp/fastmcp_client.py +19 -6
  29. langroid/agent/tools/mcp/fastmcp_client.py-e +584 -0
  30. langroid/agent/tools/orchestration.py +22 -17
  31. langroid/agent/tools/orchestration.py-e +301 -0
  32. langroid/agent/tools/recipient_tool.py +3 -3
  33. langroid/agent/tools/task_tool.py +22 -16
  34. langroid/agent/tools/task_tool.py-e +249 -0
  35. langroid/agent/xml_tool_message.py +90 -35
  36. langroid/agent/xml_tool_message.py-e +392 -0
  37. langroid/cachedb/base.py +1 -1
  38. langroid/embedding_models/base.py +2 -2
  39. langroid/embedding_models/models.py +3 -7
  40. langroid/embedding_models/models.py-e +563 -0
  41. langroid/exceptions.py +4 -1
  42. langroid/language_models/azure_openai.py +2 -2
  43. langroid/language_models/azure_openai.py-e +134 -0
  44. langroid/language_models/base.py +6 -4
  45. langroid/language_models/base.py-e +812 -0
  46. langroid/language_models/client_cache.py +64 -0
  47. langroid/language_models/config.py +2 -4
  48. langroid/language_models/config.py-e +18 -0
  49. langroid/language_models/model_info.py +9 -1
  50. langroid/language_models/model_info.py-e +483 -0
  51. langroid/language_models/openai_gpt.py +119 -20
  52. langroid/language_models/openai_gpt.py-e +2280 -0
  53. langroid/language_models/provider_params.py +3 -22
  54. langroid/language_models/provider_params.py-e +153 -0
  55. langroid/mytypes.py +11 -4
  56. langroid/mytypes.py-e +132 -0
  57. langroid/parsing/code_parser.py +1 -1
  58. langroid/parsing/file_attachment.py +1 -1
  59. langroid/parsing/file_attachment.py-e +246 -0
  60. langroid/parsing/md_parser.py +14 -4
  61. langroid/parsing/md_parser.py-e +574 -0
  62. langroid/parsing/parser.py +22 -7
  63. langroid/parsing/parser.py-e +410 -0
  64. langroid/parsing/repo_loader.py +3 -1
  65. langroid/parsing/repo_loader.py-e +812 -0
  66. langroid/parsing/search.py +1 -1
  67. langroid/parsing/url_loader.py +17 -51
  68. langroid/parsing/url_loader.py-e +683 -0
  69. langroid/parsing/urls.py +5 -4
  70. langroid/parsing/urls.py-e +279 -0
  71. langroid/prompts/prompts_config.py +1 -1
  72. langroid/pydantic_v1/__init__.py +45 -6
  73. langroid/pydantic_v1/__init__.py-e +36 -0
  74. langroid/pydantic_v1/main.py +11 -4
  75. langroid/pydantic_v1/main.py-e +11 -0
  76. langroid/utils/configuration.py +13 -11
  77. langroid/utils/configuration.py-e +141 -0
  78. langroid/utils/constants.py +1 -1
  79. langroid/utils/constants.py-e +32 -0
  80. langroid/utils/globals.py +21 -5
  81. langroid/utils/globals.py-e +49 -0
  82. langroid/utils/html_logger.py +2 -1
  83. langroid/utils/html_logger.py-e +825 -0
  84. langroid/utils/object_registry.py +1 -1
  85. langroid/utils/object_registry.py-e +66 -0
  86. langroid/utils/pydantic_utils.py +55 -28
  87. langroid/utils/pydantic_utils.py-e +602 -0
  88. langroid/utils/types.py +2 -2
  89. langroid/utils/types.py-e +113 -0
  90. langroid/vector_store/base.py +3 -3
  91. langroid/vector_store/lancedb.py +5 -5
  92. langroid/vector_store/lancedb.py-e +404 -0
  93. langroid/vector_store/meilisearch.py +2 -2
  94. langroid/vector_store/pineconedb.py +4 -4
  95. langroid/vector_store/pineconedb.py-e +427 -0
  96. langroid/vector_store/postgres.py +1 -1
  97. langroid/vector_store/qdrantdb.py +3 -3
  98. langroid/vector_store/weaviatedb.py +1 -1
  99. {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/METADATA +3 -2
  100. langroid-0.59.0b1.dist-info/RECORD +181 -0
  101. langroid/agent/special/doc_chat_task.py +0 -0
  102. langroid/mcp/__init__.py +0 -1
  103. langroid/mcp/server/__init__.py +0 -1
  104. langroid-0.58.2.dist-info/RECORD +0 -145
  105. {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/WHEEL +0 -0
  106. {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,483 @@
1
+ from enum import Enum
2
+ from typing import Dict, List, Optional
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class ModelProvider(str, Enum):
8
+ """Enum for model providers"""
9
+
10
+ OPENAI = "openai"
11
+ ANTHROPIC = "anthropic"
12
+ DEEPSEEK = "deepseek"
13
+ GOOGLE = "google"
14
+ UNKNOWN = "unknown"
15
+
16
+
17
+ class ModelName(str, Enum):
18
+ """Parent class for all model name enums"""
19
+
20
+ pass
21
+
22
+
23
+ class OpenAIChatModel(ModelName):
24
+ """Enum for OpenAI Chat models"""
25
+
26
+ GPT3_5_TURBO = "gpt-3.5-turbo"
27
+ GPT4 = "gpt-4o" # avoid deprecated gpt-4
28
+ GPT4_TURBO = "gpt-4-turbo"
29
+ GPT4o = "gpt-4o"
30
+ GPT4o_MINI = "gpt-4o-mini"
31
+ O1 = "o1"
32
+ O1_MINI = "o1-mini"
33
+ O3_MINI = "o3-mini"
34
+ O3 = "o3"
35
+ O4_MINI = "o4-mini"
36
+ GPT4_1 = "gpt-4.1"
37
+ GPT4_1_MINI = "gpt-4.1-mini"
38
+ GPT4_1_NANO = "gpt-4.1-nano"
39
+
40
+
41
+ class OpenAICompletionModel(str, Enum):
42
+ """Enum for OpenAI Completion models"""
43
+
44
+ DAVINCI = "davinci-002"
45
+ BABBAGE = "babbage-002"
46
+
47
+
48
+ class AnthropicModel(ModelName):
49
+ """Enum for Anthropic models"""
50
+
51
+ CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
52
+ CLAUDE_3_7_SONNET = "claude-3-7-sonnet-latest"
53
+ CLAUDE_3_OPUS = "claude-3-opus-latest"
54
+ CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
55
+ CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
56
+
57
+
58
+ class DeepSeekModel(ModelName):
59
+ """Enum for DeepSeek models direct from DeepSeek API"""
60
+
61
+ DEEPSEEK = "deepseek/deepseek-chat"
62
+ DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
63
+ OPENROUTER_DEEPSEEK_R1 = "openrouter/deepseek/deepseek-r1"
64
+
65
+
66
+ class GeminiModel(ModelName):
67
+ """Enum for Gemini models"""
68
+
69
+ GEMINI_1_5_FLASH = "gemini-1.5-flash"
70
+ GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
71
+ GEMINI_1_5_PRO = "gemini-1.5-pro"
72
+ GEMINI_2_5_PRO = "gemini-2.5-pro"
73
+ GEMINI_2_5_FLASH = "gemini-2.5-flash"
74
+ GEMINI_2_5_FLASH_LITE_PREVIEW = "gemini-2.5-flash-lite-preview-06-17"
75
+ GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
76
+ GEMINI_2_FLASH = "gemini-2.0-flash"
77
+ GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
78
+ GEMINI_2_FLASH_THINKING = "gemini-2.0-flash-thinking-exp"
79
+
80
+
81
+ class OpenAI_API_ParamInfo(BaseModel):
82
+ """
83
+ Parameters exclusive to some models, when using OpenAI API
84
+ """
85
+
86
+ # model-specific params at top level
87
+ params: Dict[str, List[str]] = dict(
88
+ reasoning_effort=[
89
+ OpenAIChatModel.O3_MINI.value,
90
+ ],
91
+ )
92
+ # model-specific params in extra_body
93
+ extra_parameters: Dict[str, List[str]] = dict(
94
+ include_reasoning=[
95
+ DeepSeekModel.OPENROUTER_DEEPSEEK_R1.value,
96
+ ]
97
+ )
98
+
99
+
100
+ class ModelInfo(BaseModel):
101
+ """
102
+ Consolidated information about LLM, related to capacity, cost and API
103
+ idiosyncrasies. Reasonable defaults for all params in case there's no
104
+ specific info available.
105
+ """
106
+
107
+ name: str = "unknown"
108
+ provider: ModelProvider = ModelProvider.UNKNOWN
109
+ context_length: int = 16_000
110
+ max_cot_tokens: int = 0 # max chain of thought (thinking) tokens where applicable
111
+ max_output_tokens: int = 8192 # Maximum number of output tokens - model dependent
112
+ input_cost_per_million: float = 0.0 # Cost in USD per million input tokens
113
+ cached_cost_per_million: float = 0.0 # Cost in USD per million cached tokens
114
+ output_cost_per_million: float = 0.0 # Cost in USD per million output tokens
115
+ allows_streaming: bool = True # Whether model supports streaming output
116
+ allows_system_message: bool = True # Whether model supports system messages
117
+ rename_params: Dict[str, str] = {} # Rename parameters for OpenAI API
118
+ unsupported_params: List[str] = []
119
+ has_structured_output: bool = False # Does model API support structured output?
120
+ has_tools: bool = True # Does model API support tools/function-calling?
121
+ needs_first_user_message: bool = False # Does API need first msg to be from user?
122
+ description: Optional[str] = None
123
+
124
+
125
+ # Model information registry
126
+ MODEL_INFO: Dict[str, ModelInfo] = {
127
+ # OpenAI Models
128
+ OpenAICompletionModel.DAVINCI.value: ModelInfo(
129
+ name=OpenAICompletionModel.DAVINCI.value,
130
+ provider=ModelProvider.OPENAI,
131
+ context_length=4096,
132
+ max_output_tokens=4096,
133
+ input_cost_per_million=2.0,
134
+ output_cost_per_million=2.0,
135
+ description="Davinci-002",
136
+ ),
137
+ OpenAICompletionModel.BABBAGE.value: ModelInfo(
138
+ name=OpenAICompletionModel.BABBAGE.value,
139
+ provider=ModelProvider.OPENAI,
140
+ context_length=4096,
141
+ max_output_tokens=4096,
142
+ input_cost_per_million=0.40,
143
+ output_cost_per_million=0.40,
144
+ description="Babbage-002",
145
+ ),
146
+ OpenAIChatModel.GPT3_5_TURBO.value: ModelInfo(
147
+ name=OpenAIChatModel.GPT3_5_TURBO.value,
148
+ provider=ModelProvider.OPENAI,
149
+ context_length=16_385,
150
+ max_output_tokens=4096,
151
+ input_cost_per_million=0.50,
152
+ output_cost_per_million=1.50,
153
+ description="GPT-3.5 Turbo",
154
+ ),
155
+ OpenAIChatModel.GPT4.value: ModelInfo(
156
+ name=OpenAIChatModel.GPT4.value,
157
+ provider=ModelProvider.OPENAI,
158
+ context_length=8192,
159
+ max_output_tokens=8192,
160
+ input_cost_per_million=30.0,
161
+ output_cost_per_million=60.0,
162
+ description="GPT-4 (8K context)",
163
+ ),
164
+ OpenAIChatModel.GPT4_TURBO.value: ModelInfo(
165
+ name=OpenAIChatModel.GPT4_TURBO.value,
166
+ provider=ModelProvider.OPENAI,
167
+ context_length=128_000,
168
+ max_output_tokens=4096,
169
+ input_cost_per_million=10.0,
170
+ output_cost_per_million=30.0,
171
+ description="GPT-4 Turbo",
172
+ ),
173
+ OpenAIChatModel.GPT4_1_NANO.value: ModelInfo(
174
+ name=OpenAIChatModel.GPT4_1_NANO.value,
175
+ provider=ModelProvider.OPENAI,
176
+ context_length=1_047_576,
177
+ max_output_tokens=32_768,
178
+ input_cost_per_million=0.10,
179
+ cached_cost_per_million=0.025,
180
+ output_cost_per_million=0.40,
181
+ description="GPT-4.1",
182
+ ),
183
+ OpenAIChatModel.GPT4_1_MINI.value: ModelInfo(
184
+ name=OpenAIChatModel.GPT4_1_MINI.value,
185
+ provider=ModelProvider.OPENAI,
186
+ context_length=1_047_576,
187
+ max_output_tokens=32_768,
188
+ input_cost_per_million=0.40,
189
+ cached_cost_per_million=0.10,
190
+ output_cost_per_million=1.60,
191
+ description="GPT-4.1 Mini",
192
+ ),
193
+ OpenAIChatModel.GPT4_1.value: ModelInfo(
194
+ name=OpenAIChatModel.GPT4_1.value,
195
+ provider=ModelProvider.OPENAI,
196
+ context_length=1_047_576,
197
+ max_output_tokens=32_768,
198
+ input_cost_per_million=2.00,
199
+ cached_cost_per_million=0.50,
200
+ output_cost_per_million=8.00,
201
+ description="GPT-4.1",
202
+ ),
203
+ OpenAIChatModel.GPT4o.value: ModelInfo(
204
+ name=OpenAIChatModel.GPT4o.value,
205
+ provider=ModelProvider.OPENAI,
206
+ context_length=128_000,
207
+ max_output_tokens=16_384,
208
+ input_cost_per_million=2.5,
209
+ cached_cost_per_million=1.25,
210
+ output_cost_per_million=10.0,
211
+ has_structured_output=True,
212
+ description="GPT-4o (128K context)",
213
+ ),
214
+ OpenAIChatModel.GPT4o_MINI.value: ModelInfo(
215
+ name=OpenAIChatModel.GPT4o_MINI.value,
216
+ provider=ModelProvider.OPENAI,
217
+ context_length=128_000,
218
+ max_output_tokens=16_384,
219
+ input_cost_per_million=0.15,
220
+ cached_cost_per_million=0.075,
221
+ output_cost_per_million=0.60,
222
+ has_structured_output=True,
223
+ description="GPT-4o Mini",
224
+ ),
225
+ OpenAIChatModel.O1.value: ModelInfo(
226
+ name=OpenAIChatModel.O1.value,
227
+ provider=ModelProvider.OPENAI,
228
+ context_length=200_000,
229
+ max_output_tokens=100_000,
230
+ input_cost_per_million=15.0,
231
+ cached_cost_per_million=7.50,
232
+ output_cost_per_million=60.0,
233
+ allows_streaming=True,
234
+ allows_system_message=False,
235
+ unsupported_params=["temperature"],
236
+ rename_params={"max_tokens": "max_completion_tokens"},
237
+ has_tools=False,
238
+ description="O1 Reasoning LM",
239
+ ),
240
+ OpenAIChatModel.O3.value: ModelInfo(
241
+ name=OpenAIChatModel.O3.value,
242
+ provider=ModelProvider.OPENAI,
243
+ context_length=200_000,
244
+ max_output_tokens=100_000,
245
+ input_cost_per_million=2.0,
246
+ cached_cost_per_million=0.50,
247
+ output_cost_per_million=8.0,
248
+ allows_streaming=True,
249
+ allows_system_message=False,
250
+ unsupported_params=["temperature"],
251
+ rename_params={"max_tokens": "max_completion_tokens"},
252
+ has_tools=False,
253
+ description="O1 Reasoning LM",
254
+ ),
255
+ OpenAIChatModel.O1_MINI.value: ModelInfo(
256
+ name=OpenAIChatModel.O1_MINI.value,
257
+ provider=ModelProvider.OPENAI,
258
+ context_length=128_000,
259
+ max_output_tokens=65_536,
260
+ input_cost_per_million=1.1,
261
+ cached_cost_per_million=0.55,
262
+ output_cost_per_million=4.4,
263
+ allows_streaming=False,
264
+ allows_system_message=False,
265
+ unsupported_params=["temperature", "stream"],
266
+ rename_params={"max_tokens": "max_completion_tokens"},
267
+ has_tools=False,
268
+ description="O1 Mini Reasoning LM",
269
+ ),
270
+ OpenAIChatModel.O3_MINI.value: ModelInfo(
271
+ name=OpenAIChatModel.O3_MINI.value,
272
+ provider=ModelProvider.OPENAI,
273
+ context_length=200_000,
274
+ max_output_tokens=100_000,
275
+ input_cost_per_million=1.1,
276
+ cached_cost_per_million=0.55,
277
+ output_cost_per_million=4.4,
278
+ allows_streaming=False,
279
+ allows_system_message=False,
280
+ unsupported_params=["temperature", "stream"],
281
+ rename_params={"max_tokens": "max_completion_tokens"},
282
+ has_tools=False,
283
+ description="O3 Mini Reasoning LM",
284
+ ),
285
+ OpenAIChatModel.O4_MINI.value: ModelInfo(
286
+ name=OpenAIChatModel.O4_MINI.value,
287
+ provider=ModelProvider.OPENAI,
288
+ context_length=200_000,
289
+ max_output_tokens=100_000,
290
+ input_cost_per_million=1.10,
291
+ cached_cost_per_million=0.275,
292
+ output_cost_per_million=4.40,
293
+ allows_streaming=False,
294
+ allows_system_message=False,
295
+ unsupported_params=["temperature", "stream"],
296
+ rename_params={"max_tokens": "max_completion_tokens"},
297
+ has_tools=False,
298
+ description="O3 Mini Reasoning LM",
299
+ ),
300
+ # Anthropic Models
301
+ AnthropicModel.CLAUDE_3_5_SONNET.value: ModelInfo(
302
+ name=AnthropicModel.CLAUDE_3_5_SONNET.value,
303
+ provider=ModelProvider.ANTHROPIC,
304
+ context_length=200_000,
305
+ max_output_tokens=8192,
306
+ input_cost_per_million=3.0,
307
+ cached_cost_per_million=0.30,
308
+ output_cost_per_million=15.0,
309
+ description="Claude 3.5 Sonnet",
310
+ ),
311
+ AnthropicModel.CLAUDE_3_OPUS.value: ModelInfo(
312
+ name=AnthropicModel.CLAUDE_3_OPUS.value,
313
+ provider=ModelProvider.ANTHROPIC,
314
+ context_length=200_000,
315
+ max_output_tokens=4096,
316
+ input_cost_per_million=15.0,
317
+ cached_cost_per_million=1.50,
318
+ output_cost_per_million=75.0,
319
+ description="Claude 3 Opus",
320
+ ),
321
+ AnthropicModel.CLAUDE_3_SONNET.value: ModelInfo(
322
+ name=AnthropicModel.CLAUDE_3_SONNET.value,
323
+ provider=ModelProvider.ANTHROPIC,
324
+ context_length=200_000,
325
+ max_output_tokens=4096,
326
+ input_cost_per_million=3.0,
327
+ cached_cost_per_million=0.30,
328
+ output_cost_per_million=15.0,
329
+ description="Claude 3 Sonnet",
330
+ ),
331
+ AnthropicModel.CLAUDE_3_HAIKU.value: ModelInfo(
332
+ name=AnthropicModel.CLAUDE_3_HAIKU.value,
333
+ provider=ModelProvider.ANTHROPIC,
334
+ context_length=200_000,
335
+ max_output_tokens=4096,
336
+ input_cost_per_million=0.25,
337
+ cached_cost_per_million=0.03,
338
+ output_cost_per_million=1.25,
339
+ description="Claude 3 Haiku",
340
+ ),
341
+ # DeepSeek Models
342
+ DeepSeekModel.DEEPSEEK.value: ModelInfo(
343
+ name=DeepSeekModel.DEEPSEEK.value,
344
+ provider=ModelProvider.DEEPSEEK,
345
+ context_length=64_000,
346
+ max_output_tokens=8_000,
347
+ input_cost_per_million=0.27,
348
+ cached_cost_per_million=0.07,
349
+ output_cost_per_million=1.10,
350
+ description="DeepSeek Chat",
351
+ ),
352
+ DeepSeekModel.DEEPSEEK_R1.value: ModelInfo(
353
+ name=DeepSeekModel.DEEPSEEK_R1.value,
354
+ provider=ModelProvider.DEEPSEEK,
355
+ context_length=64_000,
356
+ max_output_tokens=8_000,
357
+ input_cost_per_million=0.55,
358
+ cached_cost_per_million=0.14,
359
+ output_cost_per_million=2.19,
360
+ description="DeepSeek-R1 Reasoning LM",
361
+ ),
362
+ # Gemini Models
363
+ GeminiModel.GEMINI_2_FLASH.value: ModelInfo(
364
+ name=GeminiModel.GEMINI_2_FLASH.value,
365
+ provider=ModelProvider.GOOGLE,
366
+ context_length=1_056_768,
367
+ max_output_tokens=8192,
368
+ input_cost_per_million=0.10,
369
+ cached_cost_per_million=0.025,
370
+ output_cost_per_million=0.40,
371
+ rename_params={"max_tokens": "max_completion_tokens"},
372
+ description="Gemini 2.0 Flash",
373
+ ),
374
+ GeminiModel.GEMINI_2_FLASH_LITE.value: ModelInfo(
375
+ name=GeminiModel.GEMINI_2_FLASH_LITE.value,
376
+ provider=ModelProvider.GOOGLE,
377
+ context_length=1_056_768,
378
+ max_output_tokens=8192,
379
+ input_cost_per_million=0.075,
380
+ output_cost_per_million=0.30,
381
+ rename_params={"max_tokens": "max_completion_tokens"},
382
+ description="Gemini 2.0 Flash Lite Preview",
383
+ ),
384
+ GeminiModel.GEMINI_1_5_FLASH.value: ModelInfo(
385
+ name=GeminiModel.GEMINI_1_5_FLASH.value,
386
+ provider=ModelProvider.GOOGLE,
387
+ context_length=1_056_768,
388
+ max_output_tokens=8192,
389
+ rename_params={"max_tokens": "max_completion_tokens"},
390
+ description="Gemini 1.5 Flash",
391
+ ),
392
+ GeminiModel.GEMINI_1_5_FLASH_8B.value: ModelInfo(
393
+ name=GeminiModel.GEMINI_1_5_FLASH_8B.value,
394
+ provider=ModelProvider.GOOGLE,
395
+ context_length=1_000_000,
396
+ max_output_tokens=8192,
397
+ rename_params={"max_tokens": "max_completion_tokens"},
398
+ description="Gemini 1.5 Flash 8B",
399
+ ),
400
+ GeminiModel.GEMINI_1_5_PRO.value: ModelInfo(
401
+ name=GeminiModel.GEMINI_1_5_PRO.value,
402
+ provider=ModelProvider.GOOGLE,
403
+ context_length=2_000_000,
404
+ max_output_tokens=8192,
405
+ rename_params={"max_tokens": "max_completion_tokens"},
406
+ description="Gemini 1.5 Pro",
407
+ ),
408
+ GeminiModel.GEMINI_2_PRO.value: ModelInfo(
409
+ name=GeminiModel.GEMINI_2_PRO.value,
410
+ provider=ModelProvider.GOOGLE,
411
+ context_length=2_000_000,
412
+ max_output_tokens=8192,
413
+ rename_params={"max_tokens": "max_completion_tokens"},
414
+ description="Gemini 2 Pro Exp 02-05",
415
+ ),
416
+ GeminiModel.GEMINI_2_FLASH_THINKING.value: ModelInfo(
417
+ name=GeminiModel.GEMINI_2_FLASH_THINKING.value,
418
+ provider=ModelProvider.GOOGLE,
419
+ context_length=1_000_000,
420
+ max_output_tokens=64_000,
421
+ rename_params={"max_tokens": "max_completion_tokens"},
422
+ description="Gemini 2.0 Flash Thinking",
423
+ ),
424
+ # Gemini 2.5 Models
425
+ GeminiModel.GEMINI_2_5_PRO.value: ModelInfo(
426
+ name=GeminiModel.GEMINI_2_5_PRO.value,
427
+ provider=ModelProvider.GOOGLE,
428
+ context_length=1_048_576,
429
+ max_output_tokens=65_536,
430
+ input_cost_per_million=1.25,
431
+ cached_cost_per_million=0.31,
432
+ output_cost_per_million=10.0,
433
+ rename_params={"max_tokens": "max_completion_tokens"},
434
+ description="Gemini 2.5 Pro",
435
+ ),
436
+ GeminiModel.GEMINI_2_5_FLASH.value: ModelInfo(
437
+ name=GeminiModel.GEMINI_2_5_FLASH.value,
438
+ provider=ModelProvider.GOOGLE,
439
+ context_length=1_048_576,
440
+ max_output_tokens=65_536,
441
+ input_cost_per_million=0.30,
442
+ cached_cost_per_million=0.075,
443
+ output_cost_per_million=2.50,
444
+ rename_params={"max_tokens": "max_completion_tokens"},
445
+ description="Gemini 2.5 Flash",
446
+ ),
447
+ GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value: ModelInfo(
448
+ name=GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value,
449
+ provider=ModelProvider.GOOGLE,
450
+ context_length=65_536,
451
+ max_output_tokens=65_536,
452
+ input_cost_per_million=0.10,
453
+ cached_cost_per_million=0.025,
454
+ output_cost_per_million=0.40,
455
+ rename_params={"max_tokens": "max_completion_tokens"},
456
+ description="Gemini 2.5 Flash Lite Preview",
457
+ ),
458
+ }
459
+
460
+
461
+ def get_model_info(
462
+ model: str | ModelName,
463
+ fallback_models: List[str] = [],
464
+ ) -> ModelInfo:
465
+ """Get model information by name or enum value"""
466
+ # Sequence of models to try, starting with the primary model
467
+ models_to_try = [model] + fallback_models
468
+
469
+ # Find the first model in the sequence that has info defined using next()
470
+ # on a generator expression that filters out None results from _get_model_info
471
+ found_info = next(
472
+ (info for m in models_to_try if (info := _get_model_info(m)) is not None),
473
+ None, # Default value if the iterator is exhausted (no valid info found)
474
+ )
475
+
476
+ # Return the found info, or a default ModelInfo if none was found
477
+ return found_info or ModelInfo()
478
+
479
+
480
+ def _get_model_info(model: str | ModelName) -> ModelInfo | None:
481
+ if isinstance(model, str):
482
+ return MODEL_INFO.get(model)
483
+ return MODEL_INFO.get(model.value)