ragaai-catalyst 2.0.7.2b1__py3-none-any.whl → 2.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. ragaai_catalyst/dataset.py +0 -3
  2. ragaai_catalyst/evaluation.py +1 -2
  3. ragaai_catalyst/tracers/__init__.py +1 -1
  4. ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +217 -106
  5. ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +27 -41
  6. ragaai_catalyst/tracers/agentic_tracing/base.py +127 -21
  7. ragaai_catalyst/tracers/agentic_tracing/data_structure.py +88 -79
  8. ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
  9. ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
  10. ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
  11. ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
  12. ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +258 -356
  13. ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
  14. ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
  15. ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
  16. ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
  17. ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
  18. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
  19. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
  20. ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +342 -0
  21. ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
  22. ragaai_catalyst/tracers/llamaindex_callback.py +56 -60
  23. ragaai_catalyst/tracers/tracer.py +6 -2
  24. ragaai_catalyst/tracers/upload_traces.py +46 -57
  25. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/METADATA +6 -2
  26. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/RECORD +28 -22
  27. ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
  28. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/WHEEL +0 -0
  29. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,82 @@
9
9
  "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
10
10
  "supports_function_calling": true,
11
11
  "supports_parallel_function_calling": true,
12
- "supports_vision": true
12
+ "supports_vision": true,
13
+ "supports_audio_input": true,
14
+ "supports_audio_output": true,
15
+ "supports_prompt_caching": true,
16
+ "supports_response_schema": true,
17
+ "supports_system_messages": true
18
+ },
19
+ "sambanova/Meta-Llama-3.1-8B-Instruct": {
20
+ "max_tokens": 16000,
21
+ "max_input_tokens": 16000,
22
+ "max_output_tokens": 16000,
23
+ "input_cost_per_token": 0.0000001,
24
+ "output_cost_per_token": 0.0000002,
25
+ "litellm_provider": "sambanova",
26
+ "supports_function_calling": true,
27
+ "mode": "chat"
28
+ },
29
+ "sambanova/Meta-Llama-3.1-70B-Instruct": {
30
+ "max_tokens": 128000,
31
+ "max_input_tokens": 128000,
32
+ "max_output_tokens": 128000,
33
+ "input_cost_per_token": 0.0000006,
34
+ "output_cost_per_token": 0.0000012,
35
+ "litellm_provider": "sambanova",
36
+ "supports_function_calling": true,
37
+ "mode": "chat"
38
+ },
39
+ "sambanova/Meta-Llama-3.1-405B-Instruct": {
40
+ "max_tokens": 16000,
41
+ "max_input_tokens": 16000,
42
+ "max_output_tokens": 16000,
43
+ "input_cost_per_token": 0.000005,
44
+ "output_cost_per_token": 0.000010,
45
+ "litellm_provider": "sambanova",
46
+ "supports_function_calling": true,
47
+ "mode": "chat"
48
+ },
49
+ "sambanova/Meta-Llama-3.2-1B-Instruct": {
50
+ "max_tokens": 16000,
51
+ "max_input_tokens": 16000,
52
+ "max_output_tokens": 16000,
53
+ "input_cost_per_token": 0.0000004,
54
+ "output_cost_per_token": 0.0000008,
55
+ "litellm_provider": "sambanova",
56
+ "supports_function_calling": true,
57
+ "mode": "chat"
58
+ },
59
+ "sambanova/Meta-Llama-3.2-3B-Instruct": {
60
+ "max_tokens": 4000,
61
+ "max_input_tokens": 4000,
62
+ "max_output_tokens": 4000,
63
+ "input_cost_per_token": 0.0000008,
64
+ "output_cost_per_token": 0.0000016,
65
+ "litellm_provider": "sambanova",
66
+ "supports_function_calling": true,
67
+ "mode": "chat"
68
+ },
69
+ "sambanova/Qwen2.5-Coder-32B-Instruct": {
70
+ "max_tokens": 8000,
71
+ "max_input_tokens": 8000,
72
+ "max_output_tokens": 8000,
73
+ "input_cost_per_token": 0.0000015,
74
+ "output_cost_per_token": 0.000003,
75
+ "litellm_provider": "sambanova",
76
+ "supports_function_calling": true,
77
+ "mode": "chat"
78
+ },
79
+ "sambanova/Qwen2.5-72B-Instruct": {
80
+ "max_tokens": 8000,
81
+ "max_input_tokens": 8000,
82
+ "max_output_tokens": 8000,
83
+ "input_cost_per_token": 0.000002,
84
+ "output_cost_per_token": 0.000004,
85
+ "litellm_provider": "sambanova",
86
+ "supports_function_calling": true,
87
+ "mode": "chat"
13
88
  },
14
89
  "gpt-4": {
15
90
  "max_tokens": 4096,
@@ -19,19 +94,91 @@
19
94
  "output_cost_per_token": 0.00006,
20
95
  "litellm_provider": "openai",
21
96
  "mode": "chat",
22
- "supports_function_calling": true
97
+ "supports_function_calling": true,
98
+ "supports_prompt_caching": true,
99
+ "supports_system_messages": true
23
100
  },
24
101
  "gpt-4o": {
25
- "max_tokens": 4096,
102
+ "max_tokens": 16384,
26
103
  "max_input_tokens": 128000,
27
- "max_output_tokens": 4096,
28
- "input_cost_per_token": 0.000005,
29
- "output_cost_per_token": 0.000015,
104
+ "max_output_tokens": 16384,
105
+ "input_cost_per_token": 0.0000025,
106
+ "output_cost_per_token": 0.000010,
107
+ "input_cost_per_token_batches": 0.00000125,
108
+ "output_cost_per_token_batches": 0.00000500,
109
+ "cache_read_input_token_cost": 0.00000125,
30
110
  "litellm_provider": "openai",
31
111
  "mode": "chat",
32
112
  "supports_function_calling": true,
33
113
  "supports_parallel_function_calling": true,
34
- "supports_vision": true
114
+ "supports_response_schema": true,
115
+ "supports_vision": true,
116
+ "supports_prompt_caching": true,
117
+ "supports_system_messages": true
118
+ },
119
+ "gpt-4o-audio-preview": {
120
+ "max_tokens": 16384,
121
+ "max_input_tokens": 128000,
122
+ "max_output_tokens": 16384,
123
+ "input_cost_per_token": 0.0000025,
124
+ "input_cost_per_audio_token": 0.0001,
125
+ "output_cost_per_token": 0.000010,
126
+ "output_cost_per_audio_token": 0.0002,
127
+ "litellm_provider": "openai",
128
+ "mode": "chat",
129
+ "supports_function_calling": true,
130
+ "supports_parallel_function_calling": true,
131
+ "supports_audio_input": true,
132
+ "supports_audio_output": true,
133
+ "supports_system_messages": true
134
+ },
135
+ "gpt-4o-audio-preview-2024-12-17": {
136
+ "max_tokens": 16384,
137
+ "max_input_tokens": 128000,
138
+ "max_output_tokens": 16384,
139
+ "input_cost_per_token": 0.0000025,
140
+ "input_cost_per_audio_token": 0.00004,
141
+ "output_cost_per_token": 0.000010,
142
+ "output_cost_per_audio_token": 0.00008,
143
+ "litellm_provider": "openai",
144
+ "mode": "chat",
145
+ "supports_function_calling": true,
146
+ "supports_parallel_function_calling": true,
147
+ "supports_audio_input": true,
148
+ "supports_audio_output": true,
149
+ "supports_system_messages": true
150
+ },
151
+ "gpt-4o-audio-preview-2024-10-01": {
152
+ "max_tokens": 16384,
153
+ "max_input_tokens": 128000,
154
+ "max_output_tokens": 16384,
155
+ "input_cost_per_token": 0.0000025,
156
+ "input_cost_per_audio_token": 0.0001,
157
+ "output_cost_per_token": 0.000010,
158
+ "output_cost_per_audio_token": 0.0002,
159
+ "litellm_provider": "openai",
160
+ "mode": "chat",
161
+ "supports_function_calling": true,
162
+ "supports_parallel_function_calling": true,
163
+ "supports_audio_input": true,
164
+ "supports_audio_output": true,
165
+ "supports_system_messages": true
166
+ },
167
+ "gpt-4o-mini-audio-preview-2024-12-17": {
168
+ "max_tokens": 16384,
169
+ "max_input_tokens": 128000,
170
+ "max_output_tokens": 16384,
171
+ "input_cost_per_token": 0.00000015,
172
+ "input_cost_per_audio_token": 0.00001,
173
+ "output_cost_per_token": 0.0000006,
174
+ "output_cost_per_audio_token": 0.00002,
175
+ "litellm_provider": "openai",
176
+ "mode": "chat",
177
+ "supports_function_calling": true,
178
+ "supports_parallel_function_calling": true,
179
+ "supports_audio_input": true,
180
+ "supports_audio_output": true,
181
+ "supports_system_messages": true
35
182
  },
36
183
  "gpt-4o-mini": {
37
184
  "max_tokens": 16384,
@@ -39,11 +186,17 @@
39
186
  "max_output_tokens": 16384,
40
187
  "input_cost_per_token": 0.00000015,
41
188
  "output_cost_per_token": 0.00000060,
189
+ "input_cost_per_token_batches": 0.000000075,
190
+ "output_cost_per_token_batches": 0.00000030,
191
+ "cache_read_input_token_cost": 0.000000075,
42
192
  "litellm_provider": "openai",
43
193
  "mode": "chat",
44
194
  "supports_function_calling": true,
45
195
  "supports_parallel_function_calling": true,
46
- "supports_vision": true
196
+ "supports_response_schema": true,
197
+ "supports_vision": true,
198
+ "supports_prompt_caching": true,
199
+ "supports_system_messages": true
47
200
  },
48
201
  "gpt-4o-mini-2024-07-18": {
49
202
  "max_tokens": 16384,
@@ -51,11 +204,33 @@
51
204
  "max_output_tokens": 16384,
52
205
  "input_cost_per_token": 0.00000015,
53
206
  "output_cost_per_token": 0.00000060,
207
+ "input_cost_per_token_batches": 0.000000075,
208
+ "output_cost_per_token_batches": 0.00000030,
209
+ "cache_read_input_token_cost": 0.000000075,
54
210
  "litellm_provider": "openai",
55
211
  "mode": "chat",
56
212
  "supports_function_calling": true,
57
213
  "supports_parallel_function_calling": true,
58
- "supports_vision": true
214
+ "supports_response_schema": true,
215
+ "supports_vision": true,
216
+ "supports_prompt_caching": true,
217
+ "supports_system_messages": true
218
+ },
219
+ "o1": {
220
+ "max_tokens": 100000,
221
+ "max_input_tokens": 200000,
222
+ "max_output_tokens": 100000,
223
+ "input_cost_per_token": 0.000015,
224
+ "output_cost_per_token": 0.00006,
225
+ "cache_read_input_token_cost": 0.0000075,
226
+ "litellm_provider": "openai",
227
+ "mode": "chat",
228
+ "supports_function_calling": true,
229
+ "supports_parallel_function_calling": true,
230
+ "supports_vision": true,
231
+ "supports_prompt_caching": true,
232
+ "supports_system_messages": true,
233
+ "supports_response_schema": true
59
234
  },
60
235
  "o1-mini": {
61
236
  "max_tokens": 65536,
@@ -63,11 +238,11 @@
63
238
  "max_output_tokens": 65536,
64
239
  "input_cost_per_token": 0.000003,
65
240
  "output_cost_per_token": 0.000012,
241
+ "cache_read_input_token_cost": 0.0000015,
66
242
  "litellm_provider": "openai",
67
243
  "mode": "chat",
68
- "supports_function_calling": true,
69
- "supports_parallel_function_calling": true,
70
- "supports_vision": true
244
+ "supports_vision": true,
245
+ "supports_prompt_caching": true
71
246
  },
72
247
  "o1-mini-2024-09-12": {
73
248
  "max_tokens": 65536,
@@ -75,11 +250,11 @@
75
250
  "max_output_tokens": 65536,
76
251
  "input_cost_per_token": 0.000003,
77
252
  "output_cost_per_token": 0.000012,
253
+ "cache_read_input_token_cost": 0.0000015,
78
254
  "litellm_provider": "openai",
79
255
  "mode": "chat",
80
- "supports_function_calling": true,
81
- "supports_parallel_function_calling": true,
82
- "supports_vision": true
256
+ "supports_vision": true,
257
+ "supports_prompt_caching": true
83
258
  },
84
259
  "o1-preview": {
85
260
  "max_tokens": 32768,
@@ -87,11 +262,11 @@
87
262
  "max_output_tokens": 32768,
88
263
  "input_cost_per_token": 0.000015,
89
264
  "output_cost_per_token": 0.000060,
265
+ "cache_read_input_token_cost": 0.0000075,
90
266
  "litellm_provider": "openai",
91
267
  "mode": "chat",
92
- "supports_function_calling": true,
93
- "supports_parallel_function_calling": true,
94
- "supports_vision": true
268
+ "supports_vision": true,
269
+ "supports_prompt_caching": true
95
270
  },
96
271
  "o1-preview-2024-09-12": {
97
272
  "max_tokens": 32768,
@@ -99,11 +274,27 @@
99
274
  "max_output_tokens": 32768,
100
275
  "input_cost_per_token": 0.000015,
101
276
  "output_cost_per_token": 0.000060,
277
+ "cache_read_input_token_cost": 0.0000075,
278
+ "litellm_provider": "openai",
279
+ "mode": "chat",
280
+ "supports_vision": true,
281
+ "supports_prompt_caching": true
282
+ },
283
+ "o1-2024-12-17": {
284
+ "max_tokens": 100000,
285
+ "max_input_tokens": 200000,
286
+ "max_output_tokens": 100000,
287
+ "input_cost_per_token": 0.000015,
288
+ "output_cost_per_token": 0.000060,
289
+ "cache_read_input_token_cost": 0.0000075,
102
290
  "litellm_provider": "openai",
103
291
  "mode": "chat",
104
292
  "supports_function_calling": true,
105
293
  "supports_parallel_function_calling": true,
106
- "supports_vision": true
294
+ "supports_vision": true,
295
+ "supports_prompt_caching": true,
296
+ "supports_system_messages": true,
297
+ "supports_response_schema": true
107
298
  },
108
299
  "chatgpt-4o-latest": {
109
300
  "max_tokens": 4096,
@@ -115,7 +306,9 @@
115
306
  "mode": "chat",
116
307
  "supports_function_calling": true,
117
308
  "supports_parallel_function_calling": true,
118
- "supports_vision": true
309
+ "supports_vision": true,
310
+ "supports_prompt_caching": true,
311
+ "supports_system_messages": true
119
312
  },
120
313
  "gpt-4o-2024-05-13": {
121
314
  "max_tokens": 4096,
@@ -123,11 +316,15 @@
123
316
  "max_output_tokens": 4096,
124
317
  "input_cost_per_token": 0.000005,
125
318
  "output_cost_per_token": 0.000015,
319
+ "input_cost_per_token_batches": 0.0000025,
320
+ "output_cost_per_token_batches": 0.0000075,
126
321
  "litellm_provider": "openai",
127
322
  "mode": "chat",
128
323
  "supports_function_calling": true,
129
324
  "supports_parallel_function_calling": true,
130
- "supports_vision": true
325
+ "supports_vision": true,
326
+ "supports_prompt_caching": true,
327
+ "supports_system_messages": true
131
328
  },
132
329
  "gpt-4o-2024-08-06": {
133
330
  "max_tokens": 16384,
@@ -135,11 +332,123 @@
135
332
  "max_output_tokens": 16384,
136
333
  "input_cost_per_token": 0.0000025,
137
334
  "output_cost_per_token": 0.000010,
335
+ "input_cost_per_token_batches": 0.00000125,
336
+ "output_cost_per_token_batches": 0.0000050,
337
+ "cache_read_input_token_cost": 0.00000125,
138
338
  "litellm_provider": "openai",
139
339
  "mode": "chat",
140
340
  "supports_function_calling": true,
141
341
  "supports_parallel_function_calling": true,
142
- "supports_vision": true
342
+ "supports_response_schema": true,
343
+ "supports_vision": true,
344
+ "supports_prompt_caching": true,
345
+ "supports_system_messages": true
346
+ },
347
+ "gpt-4o-2024-11-20": {
348
+ "max_tokens": 16384,
349
+ "max_input_tokens": 128000,
350
+ "max_output_tokens": 16384,
351
+ "input_cost_per_token": 0.0000025,
352
+ "output_cost_per_token": 0.000010,
353
+ "input_cost_per_token_batches": 0.00000125,
354
+ "output_cost_per_token_batches": 0.0000050,
355
+ "cache_read_input_token_cost": 0.00000125,
356
+ "litellm_provider": "openai",
357
+ "mode": "chat",
358
+ "supports_function_calling": true,
359
+ "supports_parallel_function_calling": true,
360
+ "supports_response_schema": true,
361
+ "supports_vision": true,
362
+ "supports_prompt_caching": true,
363
+ "supports_system_messages": true
364
+ },
365
+ "gpt-4o-realtime-preview-2024-10-01": {
366
+ "max_tokens": 4096,
367
+ "max_input_tokens": 128000,
368
+ "max_output_tokens": 4096,
369
+ "input_cost_per_token": 0.000005,
370
+ "input_cost_per_audio_token": 0.0001,
371
+ "cache_read_input_token_cost": 0.0000025,
372
+ "cache_creation_input_audio_token_cost": 0.00002,
373
+ "output_cost_per_token": 0.00002,
374
+ "output_cost_per_audio_token": 0.0002,
375
+ "litellm_provider": "openai",
376
+ "mode": "chat",
377
+ "supports_function_calling": true,
378
+ "supports_parallel_function_calling": true,
379
+ "supports_audio_input": true,
380
+ "supports_audio_output": true,
381
+ "supports_system_messages": true
382
+ },
383
+ "gpt-4o-realtime-preview": {
384
+ "max_tokens": 4096,
385
+ "max_input_tokens": 128000,
386
+ "max_output_tokens": 4096,
387
+ "input_cost_per_token": 0.000005,
388
+ "input_cost_per_audio_token": 0.00004,
389
+ "cache_read_input_token_cost": 0.0000025,
390
+ "output_cost_per_token": 0.00002,
391
+ "output_cost_per_audio_token": 0.00008,
392
+ "litellm_provider": "openai",
393
+ "mode": "chat",
394
+ "supports_function_calling": true,
395
+ "supports_parallel_function_calling": true,
396
+ "supports_audio_input": true,
397
+ "supports_audio_output": true,
398
+ "supports_system_messages": true
399
+ },
400
+ "gpt-4o-realtime-preview-2024-12-17": {
401
+ "max_tokens": 4096,
402
+ "max_input_tokens": 128000,
403
+ "max_output_tokens": 4096,
404
+ "input_cost_per_token": 0.000005,
405
+ "input_cost_per_audio_token": 0.00004,
406
+ "cache_read_input_token_cost": 0.0000025,
407
+ "output_cost_per_token": 0.00002,
408
+ "output_cost_per_audio_token": 0.00008,
409
+ "litellm_provider": "openai",
410
+ "mode": "chat",
411
+ "supports_function_calling": true,
412
+ "supports_parallel_function_calling": true,
413
+ "supports_audio_input": true,
414
+ "supports_audio_output": true,
415
+ "supports_system_messages": true
416
+ },
417
+ "gpt-4o-mini-realtime-preview": {
418
+ "max_tokens": 4096,
419
+ "max_input_tokens": 128000,
420
+ "max_output_tokens": 4096,
421
+ "input_cost_per_token": 0.0000006,
422
+ "input_cost_per_audio_token": 0.00001,
423
+ "cache_read_input_token_cost": 0.0000003,
424
+ "cache_creation_input_audio_token_cost": 0.0000003,
425
+ "output_cost_per_token": 0.0000024,
426
+ "output_cost_per_audio_token": 0.00002,
427
+ "litellm_provider": "openai",
428
+ "mode": "chat",
429
+ "supports_function_calling": true,
430
+ "supports_parallel_function_calling": true,
431
+ "supports_audio_input": true,
432
+ "supports_audio_output": true,
433
+ "supports_system_messages": true
434
+ },
435
+ "gpt-4o-mini-realtime-preview-2024-12-17": {
436
+ "max_tokens": 4096,
437
+ "max_input_tokens": 128000,
438
+ "max_output_tokens": 4096,
439
+ "input_cost_per_token": 0.0000006,
440
+ "input_cost_per_audio_token": 0.00001,
441
+ "cache_read_input_token_cost": 0.0000003,
442
+ "cache_creation_input_audio_token_cost": 0.0000003,
443
+ "output_cost_per_token": 0.0000024,
444
+ "output_cost_per_audio_token": 0.00002,
445
+ "litellm_provider": "openai",
446
+ "mode": "chat",
447
+ "supports_function_calling": true,
448
+ "supports_parallel_function_calling": true,
449
+ "supports_audio_input": true,
450
+ "supports_audio_output": true,
451
+ "supports_system_messages": true
143
452
  },
144
453
  "gpt-4-turbo-preview": {
145
454
  "max_tokens": 4096,
@@ -150,7 +459,9 @@
150
459
  "litellm_provider": "openai",
151
460
  "mode": "chat",
152
461
  "supports_function_calling": true,
153
- "supports_parallel_function_calling": true
462
+ "supports_parallel_function_calling": true,
463
+ "supports_prompt_caching": true,
464
+ "supports_system_messages": true
154
465
  },
155
466
  "gpt-4-0314": {
156
467
  "max_tokens": 4096,
@@ -159,7 +470,9 @@
159
470
  "input_cost_per_token": 0.00003,
160
471
  "output_cost_per_token": 0.00006,
161
472
  "litellm_provider": "openai",
162
- "mode": "chat"
473
+ "mode": "chat",
474
+ "supports_prompt_caching": true,
475
+ "supports_system_messages": true
163
476
  },
164
477
  "gpt-4-0613": {
165
478
  "max_tokens": 4096,
@@ -169,7 +482,9 @@
169
482
  "output_cost_per_token": 0.00006,
170
483
  "litellm_provider": "openai",
171
484
  "mode": "chat",
172
- "supports_function_calling": true
485
+ "supports_function_calling": true,
486
+ "supports_prompt_caching": true,
487
+ "supports_system_messages": true
173
488
  },
174
489
  "gpt-4-32k": {
175
490
  "max_tokens": 4096,
@@ -178,7 +493,9 @@
178
493
  "input_cost_per_token": 0.00006,
179
494
  "output_cost_per_token": 0.00012,
180
495
  "litellm_provider": "openai",
181
- "mode": "chat"
496
+ "mode": "chat",
497
+ "supports_prompt_caching": true,
498
+ "supports_system_messages": true
182
499
  },
183
500
  "gpt-4-32k-0314": {
184
501
  "max_tokens": 4096,
@@ -187,7 +504,9 @@
187
504
  "input_cost_per_token": 0.00006,
188
505
  "output_cost_per_token": 0.00012,
189
506
  "litellm_provider": "openai",
190
- "mode": "chat"
507
+ "mode": "chat",
508
+ "supports_prompt_caching": true,
509
+ "supports_system_messages": true
191
510
  },
192
511
  "gpt-4-32k-0613": {
193
512
  "max_tokens": 4096,
@@ -196,7 +515,9 @@
196
515
  "input_cost_per_token": 0.00006,
197
516
  "output_cost_per_token": 0.00012,
198
517
  "litellm_provider": "openai",
199
- "mode": "chat"
518
+ "mode": "chat",
519
+ "supports_prompt_caching": true,
520
+ "supports_system_messages": true
200
521
  },
201
522
  "gpt-4-turbo": {
202
523
  "max_tokens": 4096,
@@ -208,7 +529,9 @@
208
529
  "mode": "chat",
209
530
  "supports_function_calling": true,
210
531
  "supports_parallel_function_calling": true,
211
- "supports_vision": true
532
+ "supports_vision": true,
533
+ "supports_prompt_caching": true,
534
+ "supports_system_messages": true
212
535
  },
213
536
  "gpt-4-turbo-2024-04-09": {
214
537
  "max_tokens": 4096,
@@ -220,7 +543,9 @@
220
543
  "mode": "chat",
221
544
  "supports_function_calling": true,
222
545
  "supports_parallel_function_calling": true,
223
- "supports_vision": true
546
+ "supports_vision": true,
547
+ "supports_prompt_caching": true,
548
+ "supports_system_messages": true
224
549
  },
225
550
  "gpt-4-1106-preview": {
226
551
  "max_tokens": 4096,
@@ -231,7 +556,9 @@
231
556
  "litellm_provider": "openai",
232
557
  "mode": "chat",
233
558
  "supports_function_calling": true,
234
- "supports_parallel_function_calling": true
559
+ "supports_parallel_function_calling": true,
560
+ "supports_prompt_caching": true,
561
+ "supports_system_messages": true
235
562
  },
236
563
  "gpt-4-0125-preview": {
237
564
  "max_tokens": 4096,
@@ -242,7 +569,9 @@
242
569
  "litellm_provider": "openai",
243
570
  "mode": "chat",
244
571
  "supports_function_calling": true,
245
- "supports_parallel_function_calling": true
572
+ "supports_parallel_function_calling": true,
573
+ "supports_prompt_caching": true,
574
+ "supports_system_messages": true
246
575
  },
247
576
  "gpt-4-vision-preview": {
248
577
  "max_tokens": 4096,
@@ -252,7 +581,9 @@
252
581
  "output_cost_per_token": 0.00003,
253
582
  "litellm_provider": "openai",
254
583
  "mode": "chat",
255
- "supports_vision": true
584
+ "supports_vision": true,
585
+ "supports_prompt_caching": true,
586
+ "supports_system_messages": true
256
587
  },
257
588
  "gpt-4-1106-vision-preview": {
258
589
  "max_tokens": 4096,
@@ -262,7 +593,9 @@
262
593
  "output_cost_per_token": 0.00003,
263
594
  "litellm_provider": "openai",
264
595
  "mode": "chat",
265
- "supports_vision": true
596
+ "supports_vision": true,
597
+ "supports_prompt_caching": true,
598
+ "supports_system_messages": true
266
599
  },
267
600
  "gpt-3.5-turbo": {
268
601
  "max_tokens": 4097,
@@ -272,7 +605,9 @@
272
605
  "output_cost_per_token": 0.000002,
273
606
  "litellm_provider": "openai",
274
607
  "mode": "chat",
275
- "supports_function_calling": true
608
+ "supports_function_calling": true,
609
+ "supports_prompt_caching": true,
610
+ "supports_system_messages": true
276
611
  },
277
612
  "gpt-3.5-turbo-0301": {
278
613
  "max_tokens": 4097,
@@ -281,7 +616,9 @@
281
616
  "input_cost_per_token": 0.0000015,
282
617
  "output_cost_per_token": 0.000002,
283
618
  "litellm_provider": "openai",
284
- "mode": "chat"
619
+ "mode": "chat",
620
+ "supports_prompt_caching": true,
621
+ "supports_system_messages": true
285
622
  },
286
623
  "gpt-3.5-turbo-0613": {
287
624
  "max_tokens": 4097,
@@ -291,7 +628,9 @@
291
628
  "output_cost_per_token": 0.000002,
292
629
  "litellm_provider": "openai",
293
630
  "mode": "chat",
294
- "supports_function_calling": true
631
+ "supports_function_calling": true,
632
+ "supports_prompt_caching": true,
633
+ "supports_system_messages": true
295
634
  },
296
635
  "gpt-3.5-turbo-1106": {
297
636
  "max_tokens": 16385,
@@ -302,7 +641,9 @@
302
641
  "litellm_provider": "openai",
303
642
  "mode": "chat",
304
643
  "supports_function_calling": true,
305
- "supports_parallel_function_calling": true
644
+ "supports_parallel_function_calling": true,
645
+ "supports_prompt_caching": true,
646
+ "supports_system_messages": true
306
647
  },
307
648
  "gpt-3.5-turbo-0125": {
308
649
  "max_tokens": 16385,
@@ -313,7 +654,9 @@
313
654
  "litellm_provider": "openai",
314
655
  "mode": "chat",
315
656
  "supports_function_calling": true,
316
- "supports_parallel_function_calling": true
657
+ "supports_parallel_function_calling": true,
658
+ "supports_prompt_caching": true,
659
+ "supports_system_messages": true
317
660
  },
318
661
  "gpt-3.5-turbo-16k": {
319
662
  "max_tokens": 16385,
@@ -322,7 +665,9 @@
322
665
  "input_cost_per_token": 0.000003,
323
666
  "output_cost_per_token": 0.000004,
324
667
  "litellm_provider": "openai",
325
- "mode": "chat"
668
+ "mode": "chat",
669
+ "supports_prompt_caching": true,
670
+ "supports_system_messages": true
326
671
  },
327
672
  "gpt-3.5-turbo-16k-0613": {
328
673
  "max_tokens": 16385,
@@ -331,7 +676,9 @@
331
676
  "input_cost_per_token": 0.000003,
332
677
  "output_cost_per_token": 0.000004,
333
678
  "litellm_provider": "openai",
334
- "mode": "chat"
679
+ "mode": "chat",
680
+ "supports_prompt_caching": true,
681
+ "supports_system_messages": true
335
682
  },
336
683
  "ft:gpt-3.5-turbo": {
337
684
  "max_tokens": 4096,
@@ -339,8 +686,11 @@
339
686
  "max_output_tokens": 4096,
340
687
  "input_cost_per_token": 0.000003,
341
688
  "output_cost_per_token": 0.000006,
689
+ "input_cost_per_token_batches": 0.0000015,
690
+ "output_cost_per_token_batches": 0.000003,
342
691
  "litellm_provider": "openai",
343
- "mode": "chat"
692
+ "mode": "chat",
693
+ "supports_system_messages": true
344
694
  },
345
695
  "ft:gpt-3.5-turbo-0125": {
346
696
  "max_tokens": 4096,
@@ -349,7 +699,8 @@
349
699
  "input_cost_per_token": 0.000003,
350
700
  "output_cost_per_token": 0.000006,
351
701
  "litellm_provider": "openai",
352
- "mode": "chat"
702
+ "mode": "chat",
703
+ "supports_system_messages": true
353
704
  },
354
705
  "ft:gpt-3.5-turbo-1106": {
355
706
  "max_tokens": 4096,
@@ -358,7 +709,8 @@
358
709
  "input_cost_per_token": 0.000003,
359
710
  "output_cost_per_token": 0.000006,
360
711
  "litellm_provider": "openai",
361
- "mode": "chat"
712
+ "mode": "chat",
713
+ "supports_system_messages": true
362
714
  },
363
715
  "ft:gpt-3.5-turbo-0613": {
364
716
  "max_tokens": 4096,
@@ -367,7 +719,8 @@
367
719
  "input_cost_per_token": 0.000003,
368
720
  "output_cost_per_token": 0.000006,
369
721
  "litellm_provider": "openai",
370
- "mode": "chat"
722
+ "mode": "chat",
723
+ "supports_system_messages": true
371
724
  },
372
725
  "ft:gpt-4-0613": {
373
726
  "max_tokens": 4096,
@@ -378,7 +731,8 @@
378
731
  "litellm_provider": "openai",
379
732
  "mode": "chat",
380
733
  "supports_function_calling": true,
381
- "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
734
+ "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
735
+ "supports_system_messages": true
382
736
  },
383
737
  "ft:gpt-4o-2024-08-06": {
384
738
  "max_tokens": 16384,
@@ -386,11 +740,31 @@
386
740
  "max_output_tokens": 16384,
387
741
  "input_cost_per_token": 0.00000375,
388
742
  "output_cost_per_token": 0.000015,
743
+ "input_cost_per_token_batches": 0.000001875,
744
+ "output_cost_per_token_batches": 0.000007500,
389
745
  "litellm_provider": "openai",
390
746
  "mode": "chat",
391
747
  "supports_function_calling": true,
392
748
  "supports_parallel_function_calling": true,
393
- "supports_vision": true
749
+ "supports_response_schema": true,
750
+ "supports_vision": true,
751
+ "supports_system_messages": true
752
+ },
753
+ "ft:gpt-4o-2024-11-20": {
754
+ "max_tokens": 16384,
755
+ "max_input_tokens": 128000,
756
+ "max_output_tokens": 16384,
757
+ "input_cost_per_token": 0.00000375,
758
+ "cache_creation_input_token_cost": 0.000001875,
759
+ "output_cost_per_token": 0.000015,
760
+ "litellm_provider": "openai",
761
+ "mode": "chat",
762
+ "supports_function_calling": true,
763
+ "supports_parallel_function_calling": true,
764
+ "supports_response_schema": true,
765
+ "supports_vision": true,
766
+ "supports_prompt_caching": true,
767
+ "supports_system_messages": true
394
768
  },
395
769
  "ft:gpt-4o-mini-2024-07-18": {
396
770
  "max_tokens": 16384,
@@ -398,11 +772,17 @@
398
772
  "max_output_tokens": 16384,
399
773
  "input_cost_per_token": 0.0000003,
400
774
  "output_cost_per_token": 0.0000012,
775
+ "input_cost_per_token_batches": 0.000000150,
776
+ "output_cost_per_token_batches": 0.000000600,
777
+ "cache_read_input_token_cost": 0.00000015,
401
778
  "litellm_provider": "openai",
402
779
  "mode": "chat",
403
780
  "supports_function_calling": true,
404
781
  "supports_parallel_function_calling": true,
405
- "supports_vision": true
782
+ "supports_response_schema": true,
783
+ "supports_vision": true,
784
+ "supports_prompt_caching": true,
785
+ "supports_system_messages": true
406
786
  },
407
787
  "ft:davinci-002": {
408
788
  "max_tokens": 16384,
@@ -410,6 +790,8 @@
410
790
  "max_output_tokens": 4096,
411
791
  "input_cost_per_token": 0.000002,
412
792
  "output_cost_per_token": 0.000002,
793
+ "input_cost_per_token_batches": 0.000001,
794
+ "output_cost_per_token_batches": 0.000001,
413
795
  "litellm_provider": "text-completion-openai",
414
796
  "mode": "completion"
415
797
  },
@@ -419,6 +801,8 @@
419
801
  "max_output_tokens": 4096,
420
802
  "input_cost_per_token": 0.0000004,
421
803
  "output_cost_per_token": 0.0000004,
804
+ "input_cost_per_token_batches": 0.0000002,
805
+ "output_cost_per_token_batches": 0.0000002,
422
806
  "litellm_provider": "text-completion-openai",
423
807
  "mode": "completion"
424
808
  },
@@ -428,6 +812,8 @@
428
812
  "output_vector_size": 3072,
429
813
  "input_cost_per_token": 0.00000013,
430
814
  "output_cost_per_token": 0.000000,
815
+ "input_cost_per_token_batches": 0.000000065,
816
+ "output_cost_per_token_batches": 0.000000000,
431
817
  "litellm_provider": "openai",
432
818
  "mode": "embedding"
433
819
  },
@@ -437,6 +823,8 @@
437
823
  "output_vector_size": 1536,
438
824
  "input_cost_per_token": 0.00000002,
439
825
  "output_cost_per_token": 0.000000,
826
+ "input_cost_per_token_batches": 0.000000010,
827
+ "output_cost_per_token_batches": 0.000000000,
440
828
  "litellm_provider": "openai",
441
829
  "mode": "embedding"
442
830
  },
@@ -454,6 +842,8 @@
454
842
  "max_input_tokens": 8191,
455
843
  "input_cost_per_token": 0.0000001,
456
844
  "output_cost_per_token": 0.000000,
845
+ "input_cost_per_token_batches": 0.000000050,
846
+ "output_cost_per_token_batches": 0.000000000,
457
847
  "litellm_provider": "openai",
458
848
  "mode": "embedding"
459
849
  },
@@ -570,17 +960,75 @@
570
960
  "output_cost_per_second": 0.0001,
571
961
  "litellm_provider": "azure"
572
962
  },
963
+ "azure/o1-mini": {
964
+ "max_tokens": 65536,
965
+ "max_input_tokens": 128000,
966
+ "max_output_tokens": 65536,
967
+ "input_cost_per_token": 0.000003,
968
+ "output_cost_per_token": 0.000012,
969
+ "cache_read_input_token_cost": 0.0000015,
970
+ "litellm_provider": "azure",
971
+ "mode": "chat",
972
+ "supports_function_calling": true,
973
+ "supports_parallel_function_calling": true,
974
+ "supports_vision": false,
975
+ "supports_prompt_caching": true
976
+ },
977
+ "azure/o1-mini-2024-09-12": {
978
+ "max_tokens": 65536,
979
+ "max_input_tokens": 128000,
980
+ "max_output_tokens": 65536,
981
+ "input_cost_per_token": 0.000003,
982
+ "output_cost_per_token": 0.000012,
983
+ "cache_read_input_token_cost": 0.0000015,
984
+ "litellm_provider": "azure",
985
+ "mode": "chat",
986
+ "supports_function_calling": true,
987
+ "supports_parallel_function_calling": true,
988
+ "supports_vision": false,
989
+ "supports_prompt_caching": true
990
+ },
991
+ "azure/o1-preview": {
992
+ "max_tokens": 32768,
993
+ "max_input_tokens": 128000,
994
+ "max_output_tokens": 32768,
995
+ "input_cost_per_token": 0.000015,
996
+ "output_cost_per_token": 0.000060,
997
+ "cache_read_input_token_cost": 0.0000075,
998
+ "litellm_provider": "azure",
999
+ "mode": "chat",
1000
+ "supports_function_calling": true,
1001
+ "supports_parallel_function_calling": true,
1002
+ "supports_vision": false,
1003
+ "supports_prompt_caching": true
1004
+ },
1005
+ "azure/o1-preview-2024-09-12": {
1006
+ "max_tokens": 32768,
1007
+ "max_input_tokens": 128000,
1008
+ "max_output_tokens": 32768,
1009
+ "input_cost_per_token": 0.000015,
1010
+ "output_cost_per_token": 0.000060,
1011
+ "cache_read_input_token_cost": 0.0000075,
1012
+ "litellm_provider": "azure",
1013
+ "mode": "chat",
1014
+ "supports_function_calling": true,
1015
+ "supports_parallel_function_calling": true,
1016
+ "supports_vision": false,
1017
+ "supports_prompt_caching": true
1018
+ },
573
1019
  "azure/gpt-4o": {
574
1020
  "max_tokens": 4096,
575
1021
  "max_input_tokens": 128000,
576
1022
  "max_output_tokens": 4096,
577
1023
  "input_cost_per_token": 0.000005,
578
1024
  "output_cost_per_token": 0.000015,
1025
+ "cache_read_input_token_cost": 0.00000125,
579
1026
  "litellm_provider": "azure",
580
1027
  "mode": "chat",
581
1028
  "supports_function_calling": true,
582
1029
  "supports_parallel_function_calling": true,
583
- "supports_vision": true
1030
+ "supports_vision": true,
1031
+ "supports_prompt_caching": true
584
1032
  },
585
1033
  "azure/gpt-4o-2024-08-06": {
586
1034
  "max_tokens": 16384,
@@ -592,8 +1040,36 @@
592
1040
  "mode": "chat",
593
1041
  "supports_function_calling": true,
594
1042
  "supports_parallel_function_calling": true,
1043
+ "supports_response_schema": true,
1044
+ "supports_vision": true,
1045
+ "supports_prompt_caching": true
1046
+ },
1047
+ "azure/gpt-4o-2024-11-20": {
1048
+ "max_tokens": 16384,
1049
+ "max_input_tokens": 128000,
1050
+ "max_output_tokens": 16384,
1051
+ "input_cost_per_token": 0.00000275,
1052
+ "output_cost_per_token": 0.000011,
1053
+ "litellm_provider": "azure",
1054
+ "mode": "chat",
1055
+ "supports_function_calling": true,
1056
+ "supports_parallel_function_calling": true,
1057
+ "supports_response_schema": true,
595
1058
  "supports_vision": true
596
1059
  },
1060
+ "azure/gpt-4o-2024-05-13": {
1061
+ "max_tokens": 4096,
1062
+ "max_input_tokens": 128000,
1063
+ "max_output_tokens": 4096,
1064
+ "input_cost_per_token": 0.000005,
1065
+ "output_cost_per_token": 0.000015,
1066
+ "litellm_provider": "azure",
1067
+ "mode": "chat",
1068
+ "supports_function_calling": true,
1069
+ "supports_parallel_function_calling": true,
1070
+ "supports_vision": true,
1071
+ "supports_prompt_caching": true
1072
+ },
597
1073
  "azure/global-standard/gpt-4o-2024-08-06": {
598
1074
  "max_tokens": 16384,
599
1075
  "max_input_tokens": 128000,
@@ -604,6 +1080,21 @@
604
1080
  "mode": "chat",
605
1081
  "supports_function_calling": true,
606
1082
  "supports_parallel_function_calling": true,
1083
+ "supports_response_schema": true,
1084
+ "supports_vision": true,
1085
+ "supports_prompt_caching": true
1086
+ },
1087
+ "azure/global-standard/gpt-4o-2024-11-20": {
1088
+ "max_tokens": 16384,
1089
+ "max_input_tokens": 128000,
1090
+ "max_output_tokens": 16384,
1091
+ "input_cost_per_token": 0.0000025,
1092
+ "output_cost_per_token": 0.000010,
1093
+ "litellm_provider": "azure",
1094
+ "mode": "chat",
1095
+ "supports_function_calling": true,
1096
+ "supports_parallel_function_calling": true,
1097
+ "supports_response_schema": true,
607
1098
  "supports_vision": true
608
1099
  },
609
1100
  "azure/global-standard/gpt-4o-mini": {
@@ -616,6 +1107,7 @@
616
1107
  "mode": "chat",
617
1108
  "supports_function_calling": true,
618
1109
  "supports_parallel_function_calling": true,
1110
+ "supports_response_schema": true,
619
1111
  "supports_vision": true
620
1112
  },
621
1113
  "azure/gpt-4o-mini": {
@@ -624,11 +1116,29 @@
624
1116
  "max_output_tokens": 16384,
625
1117
  "input_cost_per_token": 0.000000165,
626
1118
  "output_cost_per_token": 0.00000066,
1119
+ "cache_read_input_token_cost": 0.000000075,
627
1120
  "litellm_provider": "azure",
628
1121
  "mode": "chat",
629
1122
  "supports_function_calling": true,
630
1123
  "supports_parallel_function_calling": true,
631
- "supports_vision": true
1124
+ "supports_response_schema": true,
1125
+ "supports_vision": true,
1126
+ "supports_prompt_caching": true
1127
+ },
1128
+ "azure/gpt-4o-mini-2024-07-18": {
1129
+ "max_tokens": 16384,
1130
+ "max_input_tokens": 128000,
1131
+ "max_output_tokens": 16384,
1132
+ "input_cost_per_token": 0.000000165,
1133
+ "output_cost_per_token": 0.00000066,
1134
+ "cache_read_input_token_cost": 0.000000075,
1135
+ "litellm_provider": "azure",
1136
+ "mode": "chat",
1137
+ "supports_function_calling": true,
1138
+ "supports_parallel_function_calling": true,
1139
+ "supports_response_schema": true,
1140
+ "supports_vision": true,
1141
+ "supports_prompt_caching": true
632
1142
  },
633
1143
  "azure/gpt-4-turbo-2024-04-09": {
634
1144
  "max_tokens": 4096,
@@ -801,7 +1311,7 @@
801
1311
  "max_input_tokens": 4097,
802
1312
  "input_cost_per_token": 0.0000015,
803
1313
  "output_cost_per_token": 0.000002,
804
- "litellm_provider": "text-completion-openai",
1314
+ "litellm_provider": "azure_text",
805
1315
  "mode": "completion"
806
1316
  },
807
1317
  "azure/gpt-35-turbo-instruct": {
@@ -809,7 +1319,7 @@
809
1319
  "max_input_tokens": 4097,
810
1320
  "input_cost_per_token": 0.0000015,
811
1321
  "output_cost_per_token": 0.000002,
812
- "litellm_provider": "text-completion-openai",
1322
+ "litellm_provider": "azure_text",
813
1323
  "mode": "completion"
814
1324
  },
815
1325
  "azure/gpt-35-turbo-instruct-0914": {
@@ -817,7 +1327,7 @@
817
1327
  "max_input_tokens": 4097,
818
1328
  "input_cost_per_token": 0.0000015,
819
1329
  "output_cost_per_token": 0.000002,
820
- "litellm_provider": "text-completion-openai",
1330
+ "litellm_provider": "azure_text",
821
1331
  "mode": "completion"
822
1332
  },
823
1333
  "azure/mistral-large-latest": {
@@ -949,47 +1459,245 @@
949
1459
  "output_cost_per_token": 0.000003,
950
1460
  "litellm_provider": "azure_ai",
951
1461
  "supports_function_calling": true,
952
- "mode": "chat"
1462
+ "mode": "chat"
1463
+ },
1464
+ "azure_ai/mistral-large-2407": {
1465
+ "max_tokens": 4096,
1466
+ "max_input_tokens": 128000,
1467
+ "max_output_tokens": 4096,
1468
+ "input_cost_per_token": 0.000002,
1469
+ "output_cost_per_token": 0.000006,
1470
+ "litellm_provider": "azure_ai",
1471
+ "supports_function_calling": true,
1472
+ "mode": "chat",
1473
+ "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview"
1474
+ },
1475
+ "azure_ai/ministral-3b": {
1476
+ "max_tokens": 4096,
1477
+ "max_input_tokens": 128000,
1478
+ "max_output_tokens": 4096,
1479
+ "input_cost_per_token": 0.00000004,
1480
+ "output_cost_per_token": 0.00000004,
1481
+ "litellm_provider": "azure_ai",
1482
+ "supports_function_calling": true,
1483
+ "mode": "chat",
1484
+ "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.ministral-3b-2410-offer?tab=Overview"
1485
+ },
1486
+ "azure_ai/Llama-3.2-11B-Vision-Instruct": {
1487
+ "max_tokens": 2048,
1488
+ "max_input_tokens": 128000,
1489
+ "max_output_tokens": 2048,
1490
+ "input_cost_per_token": 0.00000037,
1491
+ "output_cost_per_token": 0.00000037,
1492
+ "litellm_provider": "azure_ai",
1493
+ "supports_function_calling": true,
1494
+ "supports_vision": true,
1495
+ "mode": "chat",
1496
+ "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-11b-vision-instruct-offer?tab=Overview"
1497
+ },
1498
+ "azure_ai/Llama-3.3-70B-Instruct": {
1499
+ "max_tokens": 2048,
1500
+ "max_input_tokens": 128000,
1501
+ "max_output_tokens": 2048,
1502
+ "input_cost_per_token": 0.00000071,
1503
+ "output_cost_per_token": 0.00000071,
1504
+ "litellm_provider": "azure_ai",
1505
+ "supports_function_calling": true,
1506
+ "mode": "chat",
1507
+ "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.llama-3-3-70b-instruct-offer?tab=Overview"
1508
+ },
1509
+ "azure_ai/Llama-3.2-90B-Vision-Instruct": {
1510
+ "max_tokens": 2048,
1511
+ "max_input_tokens": 128000,
1512
+ "max_output_tokens": 2048,
1513
+ "input_cost_per_token": 0.00000204,
1514
+ "output_cost_per_token": 0.00000204,
1515
+ "litellm_provider": "azure_ai",
1516
+ "supports_function_calling": true,
1517
+ "supports_vision": true,
1518
+ "mode": "chat",
1519
+ "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview"
953
1520
  },
954
1521
  "azure_ai/Meta-Llama-3-70B-Instruct": {
955
- "max_tokens": 8192,
1522
+ "max_tokens": 2048,
956
1523
  "max_input_tokens": 8192,
957
- "max_output_tokens": 8192,
1524
+ "max_output_tokens": 2048,
958
1525
  "input_cost_per_token": 0.0000011,
959
1526
  "output_cost_per_token": 0.00000037,
960
1527
  "litellm_provider": "azure_ai",
961
1528
  "mode": "chat"
962
1529
  },
963
- "azure_ai/Meta-Llama-31-8B-Instruct": {
964
- "max_tokens": 128000,
1530
+ "azure_ai/Meta-Llama-3.1-8B-Instruct": {
1531
+ "max_tokens": 2048,
965
1532
  "max_input_tokens": 128000,
966
- "max_output_tokens": 128000,
1533
+ "max_output_tokens": 2048,
967
1534
  "input_cost_per_token": 0.0000003,
968
1535
  "output_cost_per_token": 0.00000061,
969
1536
  "litellm_provider": "azure_ai",
970
1537
  "mode": "chat",
971
1538
  "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice"
972
1539
  },
973
- "azure_ai/Meta-Llama-31-70B-Instruct": {
974
- "max_tokens": 128000,
1540
+ "azure_ai/Meta-Llama-3.1-70B-Instruct": {
1541
+ "max_tokens": 2048,
975
1542
  "max_input_tokens": 128000,
976
- "max_output_tokens": 128000,
1543
+ "max_output_tokens": 2048,
977
1544
  "input_cost_per_token": 0.00000268,
978
1545
  "output_cost_per_token": 0.00000354,
979
1546
  "litellm_provider": "azure_ai",
980
1547
  "mode": "chat",
981
1548
  "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice"
982
1549
  },
983
- "azure_ai/Meta-Llama-31-405B-Instruct": {
984
- "max_tokens": 128000,
1550
+ "azure_ai/Meta-Llama-3.1-405B-Instruct": {
1551
+ "max_tokens": 2048,
985
1552
  "max_input_tokens": 128000,
986
- "max_output_tokens": 128000,
1553
+ "max_output_tokens": 2048,
987
1554
  "input_cost_per_token": 0.00000533,
988
1555
  "output_cost_per_token": 0.000016,
989
1556
  "litellm_provider": "azure_ai",
990
1557
  "mode": "chat",
991
1558
  "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice"
992
1559
  },
1560
+ "azure_ai/Phi-3.5-mini-instruct": {
1561
+ "max_tokens": 4096,
1562
+ "max_input_tokens": 128000,
1563
+ "max_output_tokens": 4096,
1564
+ "input_cost_per_token": 0.00000013,
1565
+ "output_cost_per_token": 0.00000052,
1566
+ "litellm_provider": "azure_ai",
1567
+ "mode": "chat",
1568
+ "supports_vision": false,
1569
+ "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
1570
+ },
1571
+ "azure_ai/Phi-3.5-vision-instruct": {
1572
+ "max_tokens": 4096,
1573
+ "max_input_tokens": 128000,
1574
+ "max_output_tokens": 4096,
1575
+ "input_cost_per_token": 0.00000013,
1576
+ "output_cost_per_token": 0.00000052,
1577
+ "litellm_provider": "azure_ai",
1578
+ "mode": "chat",
1579
+ "supports_vision": true,
1580
+ "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
1581
+ },
1582
+ "azure_ai/Phi-3.5-MoE-instruct": {
1583
+ "max_tokens": 4096,
1584
+ "max_input_tokens": 128000,
1585
+ "max_output_tokens": 4096,
1586
+ "input_cost_per_token": 0.00000016,
1587
+ "output_cost_per_token": 0.00000064,
1588
+ "litellm_provider": "azure_ai",
1589
+ "mode": "chat",
1590
+ "supports_vision": false,
1591
+ "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
1592
+ },
1593
+ "azure_ai/Phi-3-mini-4k-instruct": {
1594
+ "max_tokens": 4096,
1595
+ "max_input_tokens": 4096,
1596
+ "max_output_tokens": 4096,
1597
+ "input_cost_per_token": 0.00000013,
1598
+ "output_cost_per_token": 0.00000052,
1599
+ "litellm_provider": "azure_ai",
1600
+ "mode": "chat",
1601
+ "supports_vision": false,
1602
+ "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
1603
+ },
1604
+ "azure_ai/Phi-3-mini-128k-instruct": {
1605
+ "max_tokens": 4096,
1606
+ "max_input_tokens": 128000,
1607
+ "max_output_tokens": 4096,
1608
+ "input_cost_per_token": 0.00000013,
1609
+ "output_cost_per_token": 0.00000052,
1610
+ "litellm_provider": "azure_ai",
1611
+ "mode": "chat",
1612
+ "supports_vision": false,
1613
+ "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
1614
+ },
1615
+ "azure_ai/Phi-3-small-8k-instruct": {
1616
+ "max_tokens": 4096,
1617
+ "max_input_tokens": 8192,
1618
+ "max_output_tokens": 4096,
1619
+ "input_cost_per_token": 0.00000015,
1620
+ "output_cost_per_token": 0.0000006,
1621
+ "litellm_provider": "azure_ai",
1622
+ "mode": "chat",
1623
+ "supports_vision": false,
1624
+ "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
1625
+ },
1626
+ "azure_ai/Phi-3-small-128k-instruct": {
1627
+ "max_tokens": 4096,
1628
+ "max_input_tokens": 128000,
1629
+ "max_output_tokens": 4096,
1630
+ "input_cost_per_token": 0.00000015,
1631
+ "output_cost_per_token": 0.0000006,
1632
+ "litellm_provider": "azure_ai",
1633
+ "mode": "chat",
1634
+ "supports_vision": false,
1635
+ "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
1636
+ },
1637
+ "azure_ai/Phi-3-medium-4k-instruct": {
1638
+ "max_tokens": 4096,
1639
+ "max_input_tokens": 4096,
1640
+ "max_output_tokens": 4096,
1641
+ "input_cost_per_token": 0.00000017,
1642
+ "output_cost_per_token": 0.00000068,
1643
+ "litellm_provider": "azure_ai",
1644
+ "mode": "chat",
1645
+ "supports_vision": false,
1646
+ "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
1647
+ },
1648
+ "azure_ai/Phi-3-medium-128k-instruct": {
1649
+ "max_tokens": 4096,
1650
+ "max_input_tokens": 128000,
1651
+ "max_output_tokens": 4096,
1652
+ "input_cost_per_token": 0.00000017,
1653
+ "output_cost_per_token": 0.00000068,
1654
+ "litellm_provider": "azure_ai",
1655
+ "mode": "chat",
1656
+ "supports_vision": false,
1657
+ "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
1658
+ },
1659
+ "azure_ai/cohere-rerank-v3-multilingual": {
1660
+ "max_tokens": 4096,
1661
+ "max_input_tokens": 4096,
1662
+ "max_output_tokens": 4096,
1663
+ "max_query_tokens": 2048,
1664
+ "input_cost_per_token": 0.0,
1665
+ "input_cost_per_query": 0.002,
1666
+ "output_cost_per_token": 0.0,
1667
+ "litellm_provider": "azure_ai",
1668
+ "mode": "rerank"
1669
+ },
1670
+ "azure_ai/cohere-rerank-v3-english": {
1671
+ "max_tokens": 4096,
1672
+ "max_input_tokens": 4096,
1673
+ "max_output_tokens": 4096,
1674
+ "max_query_tokens": 2048,
1675
+ "input_cost_per_token": 0.0,
1676
+ "input_cost_per_query": 0.002,
1677
+ "output_cost_per_token": 0.0,
1678
+ "litellm_provider": "azure_ai",
1679
+ "mode": "rerank"
1680
+ },
1681
+ "azure_ai/Cohere-embed-v3-english": {
1682
+ "max_tokens": 512,
1683
+ "max_input_tokens": 512,
1684
+ "output_vector_size": 1024,
1685
+ "input_cost_per_token": 0.0000001,
1686
+ "output_cost_per_token": 0.0,
1687
+ "litellm_provider": "azure_ai",
1688
+ "mode": "embedding",
1689
+ "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice"
1690
+ },
1691
+ "azure_ai/Cohere-embed-v3-multilingual": {
1692
+ "max_tokens": 512,
1693
+ "max_input_tokens": 512,
1694
+ "output_vector_size": 1024,
1695
+ "input_cost_per_token": 0.0000001,
1696
+ "output_cost_per_token": 0.0,
1697
+ "litellm_provider": "azure_ai",
1698
+ "mode": "embedding",
1699
+ "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice"
1700
+ },
993
1701
  "babbage-002": {
994
1702
  "max_tokens": 16384,
995
1703
  "max_input_tokens": 16384,
@@ -1131,6 +1839,18 @@
1131
1839
  "supports_function_calling": true,
1132
1840
  "supports_assistant_prefill": true
1133
1841
  },
1842
+ "mistral/pixtral-12b-2409": {
1843
+ "max_tokens": 128000,
1844
+ "max_input_tokens": 128000,
1845
+ "max_output_tokens": 128000,
1846
+ "input_cost_per_token": 0.00000015,
1847
+ "output_cost_per_token": 0.00000015,
1848
+ "litellm_provider": "mistral",
1849
+ "mode": "chat",
1850
+ "supports_function_calling": true,
1851
+ "supports_assistant_prefill": true,
1852
+ "supports_vision": true
1853
+ },
1134
1854
  "mistral/open-mistral-7b": {
1135
1855
  "max_tokens": 8191,
1136
1856
  "max_input_tokens": 32000,
@@ -1240,6 +1960,8 @@
1240
1960
  "max_output_tokens": 4096,
1241
1961
  "input_cost_per_token": 0.00000014,
1242
1962
  "input_cost_per_token_cache_hit": 0.000000014,
1963
+ "cache_read_input_token_cost": 0.000000014,
1964
+ "cache_creation_input_token_cost": 0.0,
1243
1965
  "output_cost_per_token": 0.00000028,
1244
1966
  "litellm_provider": "deepseek",
1245
1967
  "mode": "chat",
@@ -1290,6 +2012,17 @@
1290
2012
  "mode": "completion",
1291
2013
  "source": "https://docs.mistral.ai/capabilities/code_generation/"
1292
2014
  },
2015
+ "xai/grok-beta": {
2016
+ "max_tokens": 131072,
2017
+ "max_input_tokens": 131072,
2018
+ "max_output_tokens": 131072,
2019
+ "input_cost_per_token": 0.000005,
2020
+ "output_cost_per_token": 0.000015,
2021
+ "litellm_provider": "xai",
2022
+ "mode": "chat",
2023
+ "supports_function_calling": true,
2024
+ "supports_vision": true
2025
+ },
1293
2026
  "deepseek-coder": {
1294
2027
  "max_tokens": 4096,
1295
2028
  "max_input_tokens": 128000,
@@ -1304,6 +2037,24 @@
1304
2037
  "supports_tool_choice": true,
1305
2038
  "supports_prompt_caching": true
1306
2039
  },
2040
+ "groq/llama-3.3-70b-versatile": {
2041
+ "max_tokens": 8192,
2042
+ "max_input_tokens": 128000,
2043
+ "max_output_tokens": 8192,
2044
+ "input_cost_per_token": 0.00000059,
2045
+ "output_cost_per_token": 0.00000079,
2046
+ "litellm_provider": "groq",
2047
+ "mode": "chat"
2048
+ },
2049
+ "groq/llama-3.3-70b-specdec": {
2050
+ "max_tokens": 8192,
2051
+ "max_input_tokens": 8192,
2052
+ "max_output_tokens": 8192,
2053
+ "input_cost_per_token": 0.00000059,
2054
+ "output_cost_per_token": 0.00000099,
2055
+ "litellm_provider": "groq",
2056
+ "mode": "chat"
2057
+ },
1307
2058
  "groq/llama2-70b-4096": {
1308
2059
  "max_tokens": 4096,
1309
2060
  "max_input_tokens": 4096,
@@ -1312,7 +2063,8 @@
1312
2063
  "output_cost_per_token": 0.00000080,
1313
2064
  "litellm_provider": "groq",
1314
2065
  "mode": "chat",
1315
- "supports_function_calling": true
2066
+ "supports_function_calling": true,
2067
+ "supports_response_schema": true
1316
2068
  },
1317
2069
  "groq/llama3-8b-8192": {
1318
2070
  "max_tokens": 8192,
@@ -1322,7 +2074,76 @@
1322
2074
  "output_cost_per_token": 0.00000008,
1323
2075
  "litellm_provider": "groq",
1324
2076
  "mode": "chat",
1325
- "supports_function_calling": true
2077
+ "supports_function_calling": true,
2078
+ "supports_response_schema": true
2079
+ },
2080
+ "groq/llama-3.2-1b-preview": {
2081
+ "max_tokens": 8192,
2082
+ "max_input_tokens": 8192,
2083
+ "max_output_tokens": 8192,
2084
+ "input_cost_per_token": 0.00000004,
2085
+ "output_cost_per_token": 0.00000004,
2086
+ "litellm_provider": "groq",
2087
+ "mode": "chat",
2088
+ "supports_function_calling": true,
2089
+ "supports_response_schema": true
2090
+ },
2091
+ "groq/llama-3.2-3b-preview": {
2092
+ "max_tokens": 8192,
2093
+ "max_input_tokens": 8192,
2094
+ "max_output_tokens": 8192,
2095
+ "input_cost_per_token": 0.00000006,
2096
+ "output_cost_per_token": 0.00000006,
2097
+ "litellm_provider": "groq",
2098
+ "mode": "chat",
2099
+ "supports_function_calling": true,
2100
+ "supports_response_schema": true
2101
+ },
2102
+ "groq/llama-3.2-11b-text-preview": {
2103
+ "max_tokens": 8192,
2104
+ "max_input_tokens": 8192,
2105
+ "max_output_tokens": 8192,
2106
+ "input_cost_per_token": 0.00000018,
2107
+ "output_cost_per_token": 0.00000018,
2108
+ "litellm_provider": "groq",
2109
+ "mode": "chat",
2110
+ "supports_function_calling": true,
2111
+ "supports_response_schema": true
2112
+ },
2113
+ "groq/llama-3.2-11b-vision-preview": {
2114
+ "max_tokens": 8192,
2115
+ "max_input_tokens": 8192,
2116
+ "max_output_tokens": 8192,
2117
+ "input_cost_per_token": 0.00000018,
2118
+ "output_cost_per_token": 0.00000018,
2119
+ "litellm_provider": "groq",
2120
+ "mode": "chat",
2121
+ "supports_function_calling": true,
2122
+ "supports_response_schema": true,
2123
+ "supports_vision": true
2124
+ },
2125
+ "groq/llama-3.2-90b-text-preview": {
2126
+ "max_tokens": 8192,
2127
+ "max_input_tokens": 8192,
2128
+ "max_output_tokens": 8192,
2129
+ "input_cost_per_token": 0.0000009,
2130
+ "output_cost_per_token": 0.0000009,
2131
+ "litellm_provider": "groq",
2132
+ "mode": "chat",
2133
+ "supports_function_calling": true,
2134
+ "supports_response_schema": true
2135
+ },
2136
+ "groq/llama-3.2-90b-vision-preview": {
2137
+ "max_tokens": 8192,
2138
+ "max_input_tokens": 8192,
2139
+ "max_output_tokens": 8192,
2140
+ "input_cost_per_token": 0.0000009,
2141
+ "output_cost_per_token": 0.0000009,
2142
+ "litellm_provider": "groq",
2143
+ "mode": "chat",
2144
+ "supports_function_calling": true,
2145
+ "supports_response_schema": true,
2146
+ "supports_vision": true
1326
2147
  },
1327
2148
  "groq/llama3-70b-8192": {
1328
2149
  "max_tokens": 8192,
@@ -1332,17 +2153,19 @@
1332
2153
  "output_cost_per_token": 0.00000079,
1333
2154
  "litellm_provider": "groq",
1334
2155
  "mode": "chat",
1335
- "supports_function_calling": true
2156
+ "supports_function_calling": true,
2157
+ "supports_response_schema": true
1336
2158
  },
1337
2159
  "groq/llama-3.1-8b-instant": {
1338
2160
  "max_tokens": 8192,
1339
2161
  "max_input_tokens": 8192,
1340
2162
  "max_output_tokens": 8192,
1341
- "input_cost_per_token": 0.00000059,
1342
- "output_cost_per_token": 0.00000079,
2163
+ "input_cost_per_token": 0.00000005,
2164
+ "output_cost_per_token": 0.00000008,
1343
2165
  "litellm_provider": "groq",
1344
2166
  "mode": "chat",
1345
- "supports_function_calling": true
2167
+ "supports_function_calling": true,
2168
+ "supports_response_schema": true
1346
2169
  },
1347
2170
  "groq/llama-3.1-70b-versatile": {
1348
2171
  "max_tokens": 8192,
@@ -1352,7 +2175,8 @@
1352
2175
  "output_cost_per_token": 0.00000079,
1353
2176
  "litellm_provider": "groq",
1354
2177
  "mode": "chat",
1355
- "supports_function_calling": true
2178
+ "supports_function_calling": true,
2179
+ "supports_response_schema": true
1356
2180
  },
1357
2181
  "groq/llama-3.1-405b-reasoning": {
1358
2182
  "max_tokens": 8192,
@@ -1362,7 +2186,8 @@
1362
2186
  "output_cost_per_token": 0.00000079,
1363
2187
  "litellm_provider": "groq",
1364
2188
  "mode": "chat",
1365
- "supports_function_calling": true
2189
+ "supports_function_calling": true,
2190
+ "supports_response_schema": true
1366
2191
  },
1367
2192
  "groq/mixtral-8x7b-32768": {
1368
2193
  "max_tokens": 32768,
@@ -1372,7 +2197,8 @@
1372
2197
  "output_cost_per_token": 0.00000024,
1373
2198
  "litellm_provider": "groq",
1374
2199
  "mode": "chat",
1375
- "supports_function_calling": true
2200
+ "supports_function_calling": true,
2201
+ "supports_response_schema": true
1376
2202
  },
1377
2203
  "groq/gemma-7b-it": {
1378
2204
  "max_tokens": 8192,
@@ -1382,7 +2208,8 @@
1382
2208
  "output_cost_per_token": 0.00000007,
1383
2209
  "litellm_provider": "groq",
1384
2210
  "mode": "chat",
1385
- "supports_function_calling": true
2211
+ "supports_function_calling": true,
2212
+ "supports_response_schema": true
1386
2213
  },
1387
2214
  "groq/gemma2-9b-it": {
1388
2215
  "max_tokens": 8192,
@@ -1392,7 +2219,8 @@
1392
2219
  "output_cost_per_token": 0.00000020,
1393
2220
  "litellm_provider": "groq",
1394
2221
  "mode": "chat",
1395
- "supports_function_calling": true
2222
+ "supports_function_calling": true,
2223
+ "supports_response_schema": true
1396
2224
  },
1397
2225
  "groq/llama3-groq-70b-8192-tool-use-preview": {
1398
2226
  "max_tokens": 8192,
@@ -1402,7 +2230,8 @@
1402
2230
  "output_cost_per_token": 0.00000089,
1403
2231
  "litellm_provider": "groq",
1404
2232
  "mode": "chat",
1405
- "supports_function_calling": true
2233
+ "supports_function_calling": true,
2234
+ "supports_response_schema": true
1406
2235
  },
1407
2236
  "groq/llama3-groq-8b-8192-tool-use-preview": {
1408
2237
  "max_tokens": 8192,
@@ -1412,7 +2241,8 @@
1412
2241
  "output_cost_per_token": 0.00000019,
1413
2242
  "litellm_provider": "groq",
1414
2243
  "mode": "chat",
1415
- "supports_function_calling": true
2244
+ "supports_function_calling": true,
2245
+ "supports_response_schema": true
1416
2246
  },
1417
2247
  "cerebras/llama3.1-8b": {
1418
2248
  "max_tokens": 128000,
@@ -1505,7 +2335,24 @@
1505
2335
  "supports_vision": true,
1506
2336
  "tool_use_system_prompt_tokens": 264,
1507
2337
  "supports_assistant_prefill": true,
1508
- "supports_prompt_caching": true
2338
+ "supports_prompt_caching": true,
2339
+ "supports_response_schema": true
2340
+ },
2341
+ "claude-3-5-haiku-20241022": {
2342
+ "max_tokens": 8192,
2343
+ "max_input_tokens": 200000,
2344
+ "max_output_tokens": 8192,
2345
+ "input_cost_per_token": 0.000001,
2346
+ "output_cost_per_token": 0.000005,
2347
+ "cache_creation_input_token_cost": 0.00000125,
2348
+ "cache_read_input_token_cost": 0.0000001,
2349
+ "litellm_provider": "anthropic",
2350
+ "mode": "chat",
2351
+ "supports_function_calling": true,
2352
+ "tool_use_system_prompt_tokens": 264,
2353
+ "supports_assistant_prefill": true,
2354
+ "supports_prompt_caching": true,
2355
+ "supports_response_schema": true
1509
2356
  },
1510
2357
  "claude-3-opus-20240229": {
1511
2358
  "max_tokens": 4096,
@@ -1521,23 +2368,42 @@
1521
2368
  "supports_vision": true,
1522
2369
  "tool_use_system_prompt_tokens": 395,
1523
2370
  "supports_assistant_prefill": true,
1524
- "supports_prompt_caching": true
2371
+ "supports_prompt_caching": true,
2372
+ "supports_response_schema": true
2373
+ },
2374
+ "claude-3-sonnet-20240229": {
2375
+ "max_tokens": 4096,
2376
+ "max_input_tokens": 200000,
2377
+ "max_output_tokens": 4096,
2378
+ "input_cost_per_token": 0.000003,
2379
+ "output_cost_per_token": 0.000015,
2380
+ "litellm_provider": "anthropic",
2381
+ "mode": "chat",
2382
+ "supports_function_calling": true,
2383
+ "supports_vision": true,
2384
+ "tool_use_system_prompt_tokens": 159,
2385
+ "supports_assistant_prefill": true,
2386
+ "supports_prompt_caching": true,
2387
+ "supports_response_schema": true
1525
2388
  },
1526
- "claude-3-sonnet-20240229": {
1527
- "max_tokens": 4096,
2389
+ "claude-3-5-sonnet-20240620": {
2390
+ "max_tokens": 8192,
1528
2391
  "max_input_tokens": 200000,
1529
- "max_output_tokens": 4096,
2392
+ "max_output_tokens": 8192,
1530
2393
  "input_cost_per_token": 0.000003,
1531
2394
  "output_cost_per_token": 0.000015,
2395
+ "cache_creation_input_token_cost": 0.00000375,
2396
+ "cache_read_input_token_cost": 0.0000003,
1532
2397
  "litellm_provider": "anthropic",
1533
2398
  "mode": "chat",
1534
2399
  "supports_function_calling": true,
1535
2400
  "supports_vision": true,
1536
2401
  "tool_use_system_prompt_tokens": 159,
1537
2402
  "supports_assistant_prefill": true,
1538
- "supports_prompt_caching": true
2403
+ "supports_prompt_caching": true,
2404
+ "supports_response_schema": true
1539
2405
  },
1540
- "claude-3-5-sonnet-20240620": {
2406
+ "claude-3-5-sonnet-20241022": {
1541
2407
  "max_tokens": 8192,
1542
2408
  "max_input_tokens": 200000,
1543
2409
  "max_output_tokens": 8192,
@@ -1551,7 +2417,9 @@
1551
2417
  "supports_vision": true,
1552
2418
  "tool_use_system_prompt_tokens": 159,
1553
2419
  "supports_assistant_prefill": true,
1554
- "supports_prompt_caching": true
2420
+ "supports_pdf_input": true,
2421
+ "supports_prompt_caching": true,
2422
+ "supports_response_schema": true
1555
2423
  },
1556
2424
  "text-bison": {
1557
2425
  "max_tokens": 2048,
@@ -1953,20 +2821,20 @@
1953
2821
  "max_tokens": 8192,
1954
2822
  "max_input_tokens": 2097152,
1955
2823
  "max_output_tokens": 8192,
1956
- "input_cost_per_image": 0.001315,
1957
- "input_cost_per_audio_per_second": 0.000125,
1958
- "input_cost_per_video_per_second": 0.001315,
1959
- "input_cost_per_token": 0.000005,
1960
- "input_cost_per_character": 0.00000125,
1961
- "input_cost_per_token_above_128k_tokens": 0.00001,
1962
- "input_cost_per_character_above_128k_tokens": 0.0000025,
1963
- "output_cost_per_token": 0.000015,
1964
- "output_cost_per_character": 0.00000375,
1965
- "output_cost_per_token_above_128k_tokens": 0.00003,
1966
- "output_cost_per_character_above_128k_tokens": 0.0000075,
1967
- "output_cost_per_image": 0.00263,
1968
- "output_cost_per_video_per_second": 0.00263,
1969
- "output_cost_per_audio_per_second": 0.00025,
2824
+ "input_cost_per_image": 0.00032875,
2825
+ "input_cost_per_audio_per_second": 0.00003125,
2826
+ "input_cost_per_video_per_second": 0.00032875,
2827
+ "input_cost_per_token": 0.00000125,
2828
+ "input_cost_per_character": 0.0000003125,
2829
+ "input_cost_per_image_above_128k_tokens": 0.0006575,
2830
+ "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
2831
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
2832
+ "input_cost_per_token_above_128k_tokens": 0.0000025,
2833
+ "input_cost_per_character_above_128k_tokens": 0.000000625,
2834
+ "output_cost_per_token": 0.000005,
2835
+ "output_cost_per_character": 0.00000125,
2836
+ "output_cost_per_token_above_128k_tokens": 0.00001,
2837
+ "output_cost_per_character_above_128k_tokens": 0.0000025,
1970
2838
  "litellm_provider": "vertex_ai-language-models",
1971
2839
  "mode": "chat",
1972
2840
  "supports_system_messages": true,
@@ -1975,24 +2843,50 @@
1975
2843
  "supports_response_schema": true,
1976
2844
  "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
1977
2845
  },
2846
+ "gemini-1.5-pro-002": {
2847
+ "max_tokens": 8192,
2848
+ "max_input_tokens": 2097152,
2849
+ "max_output_tokens": 8192,
2850
+ "input_cost_per_image": 0.00032875,
2851
+ "input_cost_per_audio_per_second": 0.00003125,
2852
+ "input_cost_per_video_per_second": 0.00032875,
2853
+ "input_cost_per_token": 0.00000125,
2854
+ "input_cost_per_character": 0.0000003125,
2855
+ "input_cost_per_image_above_128k_tokens": 0.0006575,
2856
+ "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
2857
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
2858
+ "input_cost_per_token_above_128k_tokens": 0.0000025,
2859
+ "input_cost_per_character_above_128k_tokens": 0.000000625,
2860
+ "output_cost_per_token": 0.000005,
2861
+ "output_cost_per_character": 0.00000125,
2862
+ "output_cost_per_token_above_128k_tokens": 0.00001,
2863
+ "output_cost_per_character_above_128k_tokens": 0.0000025,
2864
+ "litellm_provider": "vertex_ai-language-models",
2865
+ "mode": "chat",
2866
+ "supports_system_messages": true,
2867
+ "supports_function_calling": true,
2868
+ "supports_tool_choice": true,
2869
+ "supports_response_schema": true,
2870
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro"
2871
+ },
1978
2872
  "gemini-1.5-pro-001": {
1979
2873
  "max_tokens": 8192,
1980
2874
  "max_input_tokens": 1000000,
1981
2875
  "max_output_tokens": 8192,
1982
- "input_cost_per_image": 0.001315,
1983
- "input_cost_per_audio_per_second": 0.000125,
1984
- "input_cost_per_video_per_second": 0.001315,
1985
- "input_cost_per_token": 0.000005,
1986
- "input_cost_per_character": 0.00000125,
1987
- "input_cost_per_token_above_128k_tokens": 0.00001,
1988
- "input_cost_per_character_above_128k_tokens": 0.0000025,
1989
- "output_cost_per_token": 0.000015,
1990
- "output_cost_per_character": 0.00000375,
1991
- "output_cost_per_token_above_128k_tokens": 0.00003,
1992
- "output_cost_per_character_above_128k_tokens": 0.0000075,
1993
- "output_cost_per_image": 0.00263,
1994
- "output_cost_per_video_per_second": 0.00263,
1995
- "output_cost_per_audio_per_second": 0.00025,
2876
+ "input_cost_per_image": 0.00032875,
2877
+ "input_cost_per_audio_per_second": 0.00003125,
2878
+ "input_cost_per_video_per_second": 0.00032875,
2879
+ "input_cost_per_token": 0.00000125,
2880
+ "input_cost_per_character": 0.0000003125,
2881
+ "input_cost_per_image_above_128k_tokens": 0.0006575,
2882
+ "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
2883
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
2884
+ "input_cost_per_token_above_128k_tokens": 0.0000025,
2885
+ "input_cost_per_character_above_128k_tokens": 0.000000625,
2886
+ "output_cost_per_token": 0.000005,
2887
+ "output_cost_per_character": 0.00000125,
2888
+ "output_cost_per_token_above_128k_tokens": 0.00001,
2889
+ "output_cost_per_character_above_128k_tokens": 0.0000025,
1996
2890
  "litellm_provider": "vertex_ai-language-models",
1997
2891
  "mode": "chat",
1998
2892
  "supports_system_messages": true,
@@ -2005,20 +2899,20 @@
2005
2899
  "max_tokens": 8192,
2006
2900
  "max_input_tokens": 1000000,
2007
2901
  "max_output_tokens": 8192,
2008
- "input_cost_per_image": 0.001315,
2009
- "input_cost_per_audio_per_second": 0.000125,
2010
- "input_cost_per_video_per_second": 0.001315,
2011
- "input_cost_per_token": 0.000005,
2012
- "input_cost_per_character": 0.00000125,
2013
- "input_cost_per_token_above_128k_tokens": 0.00001,
2014
- "input_cost_per_character_above_128k_tokens": 0.0000025,
2015
- "output_cost_per_token": 0.000015,
2016
- "output_cost_per_character": 0.00000375,
2017
- "output_cost_per_token_above_128k_tokens": 0.00003,
2018
- "output_cost_per_character_above_128k_tokens": 0.0000075,
2019
- "output_cost_per_image": 0.00263,
2020
- "output_cost_per_video_per_second": 0.00263,
2021
- "output_cost_per_audio_per_second": 0.00025,
2902
+ "input_cost_per_image": 0.00032875,
2903
+ "input_cost_per_audio_per_second": 0.00003125,
2904
+ "input_cost_per_video_per_second": 0.00032875,
2905
+ "input_cost_per_token": 0.000000078125,
2906
+ "input_cost_per_character": 0.0000003125,
2907
+ "input_cost_per_image_above_128k_tokens": 0.0006575,
2908
+ "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
2909
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
2910
+ "input_cost_per_token_above_128k_tokens": 0.00000015625,
2911
+ "input_cost_per_character_above_128k_tokens": 0.000000625,
2912
+ "output_cost_per_token": 0.0000003125,
2913
+ "output_cost_per_character": 0.00000125,
2914
+ "output_cost_per_token_above_128k_tokens": 0.000000625,
2915
+ "output_cost_per_character_above_128k_tokens": 0.0000025,
2022
2916
  "litellm_provider": "vertex_ai-language-models",
2023
2917
  "mode": "chat",
2024
2918
  "supports_system_messages": true,
@@ -2031,20 +2925,20 @@
2031
2925
  "max_tokens": 8192,
2032
2926
  "max_input_tokens": 1000000,
2033
2927
  "max_output_tokens": 8192,
2034
- "input_cost_per_image": 0.001315,
2035
- "input_cost_per_audio_per_second": 0.000125,
2036
- "input_cost_per_video_per_second": 0.001315,
2037
- "input_cost_per_token": 0.000005,
2038
- "input_cost_per_character": 0.00000125,
2039
- "input_cost_per_token_above_128k_tokens": 0.00001,
2040
- "input_cost_per_character_above_128k_tokens": 0.0000025,
2041
- "output_cost_per_token": 0.000015,
2042
- "output_cost_per_character": 0.00000375,
2043
- "output_cost_per_token_above_128k_tokens": 0.00003,
2044
- "output_cost_per_character_above_128k_tokens": 0.0000075,
2045
- "output_cost_per_image": 0.00263,
2046
- "output_cost_per_video_per_second": 0.00263,
2047
- "output_cost_per_audio_per_second": 0.00025,
2928
+ "input_cost_per_image": 0.00032875,
2929
+ "input_cost_per_audio_per_second": 0.00003125,
2930
+ "input_cost_per_video_per_second": 0.00032875,
2931
+ "input_cost_per_token": 0.000000078125,
2932
+ "input_cost_per_character": 0.0000003125,
2933
+ "input_cost_per_image_above_128k_tokens": 0.0006575,
2934
+ "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
2935
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
2936
+ "input_cost_per_token_above_128k_tokens": 0.00000015625,
2937
+ "input_cost_per_character_above_128k_tokens": 0.000000625,
2938
+ "output_cost_per_token": 0.0000003125,
2939
+ "output_cost_per_character": 0.00000125,
2940
+ "output_cost_per_token_above_128k_tokens": 0.000000625,
2941
+ "output_cost_per_character_above_128k_tokens": 0.0000025,
2048
2942
  "litellm_provider": "vertex_ai-language-models",
2049
2943
  "mode": "chat",
2050
2944
  "supports_system_messages": true,
@@ -2057,20 +2951,20 @@
2057
2951
  "max_tokens": 8192,
2058
2952
  "max_input_tokens": 1000000,
2059
2953
  "max_output_tokens": 8192,
2060
- "input_cost_per_image": 0.001315,
2061
- "input_cost_per_audio_per_second": 0.000125,
2062
- "input_cost_per_video_per_second": 0.001315,
2063
- "input_cost_per_token": 0.000005,
2064
- "input_cost_per_character": 0.00000125,
2065
- "input_cost_per_token_above_128k_tokens": 0.00001,
2066
- "input_cost_per_character_above_128k_tokens": 0.0000025,
2067
- "output_cost_per_token": 0.000015,
2068
- "output_cost_per_character": 0.00000375,
2069
- "output_cost_per_token_above_128k_tokens": 0.00003,
2070
- "output_cost_per_character_above_128k_tokens": 0.0000075,
2071
- "output_cost_per_image": 0.00263,
2072
- "output_cost_per_video_per_second": 0.00263,
2073
- "output_cost_per_audio_per_second": 0.00025,
2954
+ "input_cost_per_image": 0.00032875,
2955
+ "input_cost_per_audio_per_second": 0.00003125,
2956
+ "input_cost_per_video_per_second": 0.00032875,
2957
+ "input_cost_per_token": 0.000000078125,
2958
+ "input_cost_per_character": 0.0000003125,
2959
+ "input_cost_per_image_above_128k_tokens": 0.0006575,
2960
+ "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
2961
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
2962
+ "input_cost_per_token_above_128k_tokens": 0.00000015625,
2963
+ "input_cost_per_character_above_128k_tokens": 0.000000625,
2964
+ "output_cost_per_token": 0.0000003125,
2965
+ "output_cost_per_character": 0.00000125,
2966
+ "output_cost_per_token_above_128k_tokens": 0.000000625,
2967
+ "output_cost_per_character_above_128k_tokens": 0.0000025,
2074
2968
  "litellm_provider": "vertex_ai-language-models",
2075
2969
  "mode": "chat",
2076
2970
  "supports_function_calling": true,
@@ -2088,20 +2982,20 @@
2088
2982
  "max_audio_length_hours": 8.4,
2089
2983
  "max_audio_per_prompt": 1,
2090
2984
  "max_pdf_size_mb": 30,
2091
- "input_cost_per_image": 0.0001315,
2092
- "input_cost_per_video_per_second": 0.0001315,
2093
- "input_cost_per_audio_per_second": 0.000125,
2094
- "input_cost_per_token": 0.0000005,
2095
- "input_cost_per_character": 0.000000125,
2985
+ "input_cost_per_image": 0.00002,
2986
+ "input_cost_per_video_per_second": 0.00002,
2987
+ "input_cost_per_audio_per_second": 0.000002,
2988
+ "input_cost_per_token": 0.000000075,
2989
+ "input_cost_per_character": 0.00000001875,
2096
2990
  "input_cost_per_token_above_128k_tokens": 0.000001,
2097
2991
  "input_cost_per_character_above_128k_tokens": 0.00000025,
2098
- "output_cost_per_token": 0.0000015,
2099
- "output_cost_per_character": 0.000000375,
2100
- "output_cost_per_token_above_128k_tokens": 0.000003,
2101
- "output_cost_per_character_above_128k_tokens": 0.00000075,
2102
- "output_cost_per_image": 0.000263,
2103
- "output_cost_per_video_per_second": 0.000263,
2104
- "output_cost_per_audio_per_second": 0.00025,
2992
+ "input_cost_per_image_above_128k_tokens": 0.00004,
2993
+ "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
2994
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
2995
+ "output_cost_per_token": 0.0000003,
2996
+ "output_cost_per_character": 0.000000075,
2997
+ "output_cost_per_token_above_128k_tokens": 0.0000006,
2998
+ "output_cost_per_character_above_128k_tokens": 0.00000015,
2105
2999
  "litellm_provider": "vertex_ai-language-models",
2106
3000
  "mode": "chat",
2107
3001
  "supports_system_messages": true,
@@ -2120,20 +3014,20 @@
2120
3014
  "max_audio_length_hours": 8.4,
2121
3015
  "max_audio_per_prompt": 1,
2122
3016
  "max_pdf_size_mb": 30,
2123
- "input_cost_per_image": 0.0001315,
2124
- "input_cost_per_video_per_second": 0.0001315,
2125
- "input_cost_per_audio_per_second": 0.000125,
2126
- "input_cost_per_token": 0.0000005,
2127
- "input_cost_per_character": 0.000000125,
3017
+ "input_cost_per_image": 0.00002,
3018
+ "input_cost_per_video_per_second": 0.00002,
3019
+ "input_cost_per_audio_per_second": 0.000002,
3020
+ "input_cost_per_token": 0.000000004688,
3021
+ "input_cost_per_character": 0.00000001875,
2128
3022
  "input_cost_per_token_above_128k_tokens": 0.000001,
2129
3023
  "input_cost_per_character_above_128k_tokens": 0.00000025,
2130
- "output_cost_per_token": 0.0000015,
2131
- "output_cost_per_character": 0.000000375,
2132
- "output_cost_per_token_above_128k_tokens": 0.000003,
2133
- "output_cost_per_character_above_128k_tokens": 0.00000075,
2134
- "output_cost_per_image": 0.000263,
2135
- "output_cost_per_video_per_second": 0.000263,
2136
- "output_cost_per_audio_per_second": 0.00025,
3024
+ "input_cost_per_image_above_128k_tokens": 0.00004,
3025
+ "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
3026
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
3027
+ "output_cost_per_token": 0.0000000046875,
3028
+ "output_cost_per_character": 0.00000001875,
3029
+ "output_cost_per_token_above_128k_tokens": 0.000000009375,
3030
+ "output_cost_per_character_above_128k_tokens": 0.0000000375,
2137
3031
  "litellm_provider": "vertex_ai-language-models",
2138
3032
  "mode": "chat",
2139
3033
  "supports_system_messages": true,
@@ -2142,6 +3036,38 @@
2142
3036
  "supports_response_schema": true,
2143
3037
  "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
2144
3038
  },
3039
+ "gemini-1.5-flash-002": {
3040
+ "max_tokens": 8192,
3041
+ "max_input_tokens": 1048576,
3042
+ "max_output_tokens": 8192,
3043
+ "max_images_per_prompt": 3000,
3044
+ "max_videos_per_prompt": 10,
3045
+ "max_video_length": 1,
3046
+ "max_audio_length_hours": 8.4,
3047
+ "max_audio_per_prompt": 1,
3048
+ "max_pdf_size_mb": 30,
3049
+ "input_cost_per_image": 0.00002,
3050
+ "input_cost_per_video_per_second": 0.00002,
3051
+ "input_cost_per_audio_per_second": 0.000002,
3052
+ "input_cost_per_token": 0.000000075,
3053
+ "input_cost_per_character": 0.00000001875,
3054
+ "input_cost_per_token_above_128k_tokens": 0.000001,
3055
+ "input_cost_per_character_above_128k_tokens": 0.00000025,
3056
+ "input_cost_per_image_above_128k_tokens": 0.00004,
3057
+ "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
3058
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
3059
+ "output_cost_per_token": 0.0000003,
3060
+ "output_cost_per_character": 0.000000075,
3061
+ "output_cost_per_token_above_128k_tokens": 0.0000006,
3062
+ "output_cost_per_character_above_128k_tokens": 0.00000015,
3063
+ "litellm_provider": "vertex_ai-language-models",
3064
+ "mode": "chat",
3065
+ "supports_system_messages": true,
3066
+ "supports_function_calling": true,
3067
+ "supports_vision": true,
3068
+ "supports_response_schema": true,
3069
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash"
3070
+ },
2145
3071
  "gemini-1.5-flash-001": {
2146
3072
  "max_tokens": 8192,
2147
3073
  "max_input_tokens": 1000000,
@@ -2152,20 +3078,20 @@
2152
3078
  "max_audio_length_hours": 8.4,
2153
3079
  "max_audio_per_prompt": 1,
2154
3080
  "max_pdf_size_mb": 30,
2155
- "input_cost_per_image": 0.0001315,
2156
- "input_cost_per_video_per_second": 0.0001315,
2157
- "input_cost_per_audio_per_second": 0.000125,
2158
- "input_cost_per_token": 0.0000005,
2159
- "input_cost_per_character": 0.000000125,
3081
+ "input_cost_per_image": 0.00002,
3082
+ "input_cost_per_video_per_second": 0.00002,
3083
+ "input_cost_per_audio_per_second": 0.000002,
3084
+ "input_cost_per_token": 0.000000075,
3085
+ "input_cost_per_character": 0.00000001875,
2160
3086
  "input_cost_per_token_above_128k_tokens": 0.000001,
2161
3087
  "input_cost_per_character_above_128k_tokens": 0.00000025,
2162
- "output_cost_per_token": 0.0000015,
2163
- "output_cost_per_character": 0.000000375,
2164
- "output_cost_per_token_above_128k_tokens": 0.000003,
2165
- "output_cost_per_character_above_128k_tokens": 0.00000075,
2166
- "output_cost_per_image": 0.000263,
2167
- "output_cost_per_video_per_second": 0.000263,
2168
- "output_cost_per_audio_per_second": 0.00025,
3088
+ "input_cost_per_image_above_128k_tokens": 0.00004,
3089
+ "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
3090
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
3091
+ "output_cost_per_token": 0.0000003,
3092
+ "output_cost_per_character": 0.000000075,
3093
+ "output_cost_per_token_above_128k_tokens": 0.0000006,
3094
+ "output_cost_per_character_above_128k_tokens": 0.00000015,
2169
3095
  "litellm_provider": "vertex_ai-language-models",
2170
3096
  "mode": "chat",
2171
3097
  "supports_system_messages": true,
@@ -2184,20 +3110,20 @@
2184
3110
  "max_audio_length_hours": 8.4,
2185
3111
  "max_audio_per_prompt": 1,
2186
3112
  "max_pdf_size_mb": 30,
2187
- "input_cost_per_image": 0.0001315,
2188
- "input_cost_per_video_per_second": 0.0001315,
2189
- "input_cost_per_audio_per_second": 0.000125,
2190
- "input_cost_per_token": 0.0000005,
2191
- "input_cost_per_character": 0.000000125,
3113
+ "input_cost_per_image": 0.00002,
3114
+ "input_cost_per_video_per_second": 0.00002,
3115
+ "input_cost_per_audio_per_second": 0.000002,
3116
+ "input_cost_per_token": 0.000000075,
3117
+ "input_cost_per_character": 0.00000001875,
2192
3118
  "input_cost_per_token_above_128k_tokens": 0.000001,
2193
3119
  "input_cost_per_character_above_128k_tokens": 0.00000025,
2194
- "output_cost_per_token": 0.0000015,
2195
- "output_cost_per_character": 0.000000375,
2196
- "output_cost_per_token_above_128k_tokens": 0.000003,
2197
- "output_cost_per_character_above_128k_tokens": 0.00000075,
2198
- "output_cost_per_image": 0.000263,
2199
- "output_cost_per_video_per_second": 0.000263,
2200
- "output_cost_per_audio_per_second": 0.00025,
3120
+ "input_cost_per_image_above_128k_tokens": 0.00004,
3121
+ "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
3122
+ "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
3123
+ "output_cost_per_token": 0.0000000046875,
3124
+ "output_cost_per_character": 0.00000001875,
3125
+ "output_cost_per_token_above_128k_tokens": 0.000000009375,
3126
+ "output_cost_per_character_above_128k_tokens": 0.0000000375,
2201
3127
  "litellm_provider": "vertex_ai-language-models",
2202
3128
  "mode": "chat",
2203
3129
  "supports_system_messages": true,
@@ -2219,7 +3145,7 @@
2219
3145
  "supports_tool_choice": true,
2220
3146
  "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental"
2221
3147
  },
2222
- "gemini-pro-flash": {
3148
+ "gemini-flash-experimental": {
2223
3149
  "max_tokens": 8192,
2224
3150
  "max_input_tokens": 1000000,
2225
3151
  "max_output_tokens": 8192,
@@ -2298,6 +3224,86 @@
2298
3224
  "mode": "chat",
2299
3225
  "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
2300
3226
  },
3227
+ "gemini-2.0-flash-exp": {
3228
+ "max_tokens": 8192,
3229
+ "max_input_tokens": 1048576,
3230
+ "max_output_tokens": 8192,
3231
+ "max_images_per_prompt": 3000,
3232
+ "max_videos_per_prompt": 10,
3233
+ "max_video_length": 1,
3234
+ "max_audio_length_hours": 8.4,
3235
+ "max_audio_per_prompt": 1,
3236
+ "max_pdf_size_mb": 30,
3237
+ "input_cost_per_image": 0,
3238
+ "input_cost_per_video_per_second": 0,
3239
+ "input_cost_per_audio_per_second": 0,
3240
+ "input_cost_per_token": 0,
3241
+ "input_cost_per_character": 0,
3242
+ "input_cost_per_token_above_128k_tokens": 0,
3243
+ "input_cost_per_character_above_128k_tokens": 0,
3244
+ "input_cost_per_image_above_128k_tokens": 0,
3245
+ "input_cost_per_video_per_second_above_128k_tokens": 0,
3246
+ "input_cost_per_audio_per_second_above_128k_tokens": 0,
3247
+ "output_cost_per_token": 0,
3248
+ "output_cost_per_character": 0,
3249
+ "output_cost_per_token_above_128k_tokens": 0,
3250
+ "output_cost_per_character_above_128k_tokens": 0,
3251
+ "litellm_provider": "vertex_ai-language-models",
3252
+ "mode": "chat",
3253
+ "supports_system_messages": true,
3254
+ "supports_function_calling": true,
3255
+ "supports_vision": true,
3256
+ "supports_response_schema": true,
3257
+ "supports_audio_output": true,
3258
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
3259
+ },
3260
+ "gemini/gemini-2.0-flash-exp": {
3261
+ "max_tokens": 8192,
3262
+ "max_input_tokens": 1048576,
3263
+ "max_output_tokens": 8192,
3264
+ "max_images_per_prompt": 3000,
3265
+ "max_videos_per_prompt": 10,
3266
+ "max_video_length": 1,
3267
+ "max_audio_length_hours": 8.4,
3268
+ "max_audio_per_prompt": 1,
3269
+ "max_pdf_size_mb": 30,
3270
+ "input_cost_per_image": 0,
3271
+ "input_cost_per_video_per_second": 0,
3272
+ "input_cost_per_audio_per_second": 0,
3273
+ "input_cost_per_token": 0,
3274
+ "input_cost_per_character": 0,
3275
+ "input_cost_per_token_above_128k_tokens": 0,
3276
+ "input_cost_per_character_above_128k_tokens": 0,
3277
+ "input_cost_per_image_above_128k_tokens": 0,
3278
+ "input_cost_per_video_per_second_above_128k_tokens": 0,
3279
+ "input_cost_per_audio_per_second_above_128k_tokens": 0,
3280
+ "output_cost_per_token": 0,
3281
+ "output_cost_per_character": 0,
3282
+ "output_cost_per_token_above_128k_tokens": 0,
3283
+ "output_cost_per_character_above_128k_tokens": 0,
3284
+ "litellm_provider": "gemini",
3285
+ "mode": "chat",
3286
+ "supports_system_messages": true,
3287
+ "supports_function_calling": true,
3288
+ "supports_vision": true,
3289
+ "supports_response_schema": true,
3290
+ "supports_audio_output": true,
3291
+ "tpm": 4000000,
3292
+ "rpm": 10,
3293
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
3294
+ },
3295
+ "vertex_ai/claude-3-sonnet": {
3296
+ "max_tokens": 4096,
3297
+ "max_input_tokens": 200000,
3298
+ "max_output_tokens": 4096,
3299
+ "input_cost_per_token": 0.000003,
3300
+ "output_cost_per_token": 0.000015,
3301
+ "litellm_provider": "vertex_ai-anthropic_models",
3302
+ "mode": "chat",
3303
+ "supports_function_calling": true,
3304
+ "supports_vision": true,
3305
+ "supports_assistant_prefill": true
3306
+ },
2301
3307
  "vertex_ai/claude-3-sonnet@20240229": {
2302
3308
  "max_tokens": 4096,
2303
3309
  "max_input_tokens": 200000,
@@ -2310,6 +3316,18 @@
2310
3316
  "supports_vision": true,
2311
3317
  "supports_assistant_prefill": true
2312
3318
  },
3319
+ "vertex_ai/claude-3-5-sonnet": {
3320
+ "max_tokens": 8192,
3321
+ "max_input_tokens": 200000,
3322
+ "max_output_tokens": 8192,
3323
+ "input_cost_per_token": 0.000003,
3324
+ "output_cost_per_token": 0.000015,
3325
+ "litellm_provider": "vertex_ai-anthropic_models",
3326
+ "mode": "chat",
3327
+ "supports_function_calling": true,
3328
+ "supports_vision": true,
3329
+ "supports_assistant_prefill": true
3330
+ },
2313
3331
  "vertex_ai/claude-3-5-sonnet@20240620": {
2314
3332
  "max_tokens": 8192,
2315
3333
  "max_input_tokens": 200000,
@@ -2322,6 +3340,42 @@
2322
3340
  "supports_vision": true,
2323
3341
  "supports_assistant_prefill": true
2324
3342
  },
3343
+ "vertex_ai/claude-3-5-sonnet-v2": {
3344
+ "max_tokens": 8192,
3345
+ "max_input_tokens": 200000,
3346
+ "max_output_tokens": 8192,
3347
+ "input_cost_per_token": 0.000003,
3348
+ "output_cost_per_token": 0.000015,
3349
+ "litellm_provider": "vertex_ai-anthropic_models",
3350
+ "mode": "chat",
3351
+ "supports_function_calling": true,
3352
+ "supports_vision": true,
3353
+ "supports_assistant_prefill": true
3354
+ },
3355
+ "vertex_ai/claude-3-5-sonnet-v2@20241022": {
3356
+ "max_tokens": 8192,
3357
+ "max_input_tokens": 200000,
3358
+ "max_output_tokens": 8192,
3359
+ "input_cost_per_token": 0.000003,
3360
+ "output_cost_per_token": 0.000015,
3361
+ "litellm_provider": "vertex_ai-anthropic_models",
3362
+ "mode": "chat",
3363
+ "supports_function_calling": true,
3364
+ "supports_vision": true,
3365
+ "supports_assistant_prefill": true
3366
+ },
3367
+ "vertex_ai/claude-3-haiku": {
3368
+ "max_tokens": 4096,
3369
+ "max_input_tokens": 200000,
3370
+ "max_output_tokens": 4096,
3371
+ "input_cost_per_token": 0.00000025,
3372
+ "output_cost_per_token": 0.00000125,
3373
+ "litellm_provider": "vertex_ai-anthropic_models",
3374
+ "mode": "chat",
3375
+ "supports_function_calling": true,
3376
+ "supports_vision": true,
3377
+ "supports_assistant_prefill": true
3378
+ },
2325
3379
  "vertex_ai/claude-3-haiku@20240307": {
2326
3380
  "max_tokens": 4096,
2327
3381
  "max_input_tokens": 200000,
@@ -2334,6 +3388,40 @@
2334
3388
  "supports_vision": true,
2335
3389
  "supports_assistant_prefill": true
2336
3390
  },
3391
+ "vertex_ai/claude-3-5-haiku": {
3392
+ "max_tokens": 8192,
3393
+ "max_input_tokens": 200000,
3394
+ "max_output_tokens": 8192,
3395
+ "input_cost_per_token": 0.000001,
3396
+ "output_cost_per_token": 0.000005,
3397
+ "litellm_provider": "vertex_ai-anthropic_models",
3398
+ "mode": "chat",
3399
+ "supports_function_calling": true,
3400
+ "supports_assistant_prefill": true
3401
+ },
3402
+ "vertex_ai/claude-3-5-haiku@20241022": {
3403
+ "max_tokens": 8192,
3404
+ "max_input_tokens": 200000,
3405
+ "max_output_tokens": 8192,
3406
+ "input_cost_per_token": 0.000001,
3407
+ "output_cost_per_token": 0.000005,
3408
+ "litellm_provider": "vertex_ai-anthropic_models",
3409
+ "mode": "chat",
3410
+ "supports_function_calling": true,
3411
+ "supports_assistant_prefill": true
3412
+ },
3413
+ "vertex_ai/claude-3-opus": {
3414
+ "max_tokens": 4096,
3415
+ "max_input_tokens": 200000,
3416
+ "max_output_tokens": 4096,
3417
+ "input_cost_per_token": 0.000015,
3418
+ "output_cost_per_token": 0.000075,
3419
+ "litellm_provider": "vertex_ai-anthropic_models",
3420
+ "mode": "chat",
3421
+ "supports_function_calling": true,
3422
+ "supports_vision": true,
3423
+ "supports_assistant_prefill": true
3424
+ },
2337
3425
  "vertex_ai/claude-3-opus@20240229": {
2338
3426
  "max_tokens": 4096,
2339
3427
  "max_input_tokens": 200000,
@@ -2376,12 +3464,44 @@
2376
3464
  "mode": "chat",
2377
3465
  "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
2378
3466
  },
3467
+ "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": {
3468
+ "max_tokens": 128000,
3469
+ "max_input_tokens": 128000,
3470
+ "max_output_tokens": 2048,
3471
+ "input_cost_per_token": 0.0,
3472
+ "output_cost_per_token": 0.0,
3473
+ "litellm_provider": "vertex_ai-llama_models",
3474
+ "mode": "chat",
3475
+ "supports_system_messages": true,
3476
+ "supports_vision": true,
3477
+ "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas"
3478
+ },
2379
3479
  "vertex_ai/mistral-large@latest": {
2380
3480
  "max_tokens": 8191,
2381
3481
  "max_input_tokens": 128000,
2382
3482
  "max_output_tokens": 8191,
2383
- "input_cost_per_token": 0.000003,
2384
- "output_cost_per_token": 0.000009,
3483
+ "input_cost_per_token": 0.000002,
3484
+ "output_cost_per_token": 0.000006,
3485
+ "litellm_provider": "vertex_ai-mistral_models",
3486
+ "mode": "chat",
3487
+ "supports_function_calling": true
3488
+ },
3489
+ "vertex_ai/mistral-large@2411-001": {
3490
+ "max_tokens": 8191,
3491
+ "max_input_tokens": 128000,
3492
+ "max_output_tokens": 8191,
3493
+ "input_cost_per_token": 0.000002,
3494
+ "output_cost_per_token": 0.000006,
3495
+ "litellm_provider": "vertex_ai-mistral_models",
3496
+ "mode": "chat",
3497
+ "supports_function_calling": true
3498
+ },
3499
+ "vertex_ai/mistral-large-2411": {
3500
+ "max_tokens": 8191,
3501
+ "max_input_tokens": 128000,
3502
+ "max_output_tokens": 8191,
3503
+ "input_cost_per_token": 0.000002,
3504
+ "output_cost_per_token": 0.000006,
2385
3505
  "litellm_provider": "vertex_ai-mistral_models",
2386
3506
  "mode": "chat",
2387
3507
  "supports_function_calling": true
@@ -2390,8 +3510,8 @@
2390
3510
  "max_tokens": 8191,
2391
3511
  "max_input_tokens": 128000,
2392
3512
  "max_output_tokens": 8191,
2393
- "input_cost_per_token": 0.000003,
2394
- "output_cost_per_token": 0.000009,
3513
+ "input_cost_per_token": 0.000002,
3514
+ "output_cost_per_token": 0.000006,
2395
3515
  "litellm_provider": "vertex_ai-mistral_models",
2396
3516
  "mode": "chat",
2397
3517
  "supports_function_calling": true
@@ -2400,8 +3520,8 @@
2400
3520
  "max_tokens": 128000,
2401
3521
  "max_input_tokens": 128000,
2402
3522
  "max_output_tokens": 128000,
2403
- "input_cost_per_token": 0.000003,
2404
- "output_cost_per_token": 0.000003,
3523
+ "input_cost_per_token": 0.00000015,
3524
+ "output_cost_per_token": 0.00000015,
2405
3525
  "litellm_provider": "vertex_ai-mistral_models",
2406
3526
  "mode": "chat",
2407
3527
  "supports_function_calling": true
@@ -2465,8 +3585,8 @@
2465
3585
  "max_tokens": 128000,
2466
3586
  "max_input_tokens": 128000,
2467
3587
  "max_output_tokens": 128000,
2468
- "input_cost_per_token": 0.000001,
2469
- "output_cost_per_token": 0.000003,
3588
+ "input_cost_per_token": 0.0000002,
3589
+ "output_cost_per_token": 0.0000006,
2470
3590
  "litellm_provider": "vertex_ai-mistral_models",
2471
3591
  "mode": "chat",
2472
3592
  "supports_function_calling": true
@@ -2475,35 +3595,47 @@
2475
3595
  "max_tokens": 128000,
2476
3596
  "max_input_tokens": 128000,
2477
3597
  "max_output_tokens": 128000,
2478
- "input_cost_per_token": 0.000001,
2479
- "output_cost_per_token": 0.000003,
3598
+ "input_cost_per_token": 0.0000002,
3599
+ "output_cost_per_token": 0.0000006,
2480
3600
  "litellm_provider": "vertex_ai-mistral_models",
2481
3601
  "mode": "chat",
2482
3602
  "supports_function_calling": true
2483
3603
  },
2484
3604
  "vertex_ai/imagegeneration@006": {
2485
- "cost_per_image": 0.020,
3605
+ "output_cost_per_image": 0.020,
2486
3606
  "litellm_provider": "vertex_ai-image-models",
2487
3607
  "mode": "image_generation",
2488
3608
  "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
2489
3609
  },
2490
3610
  "vertex_ai/imagen-3.0-generate-001": {
2491
- "cost_per_image": 0.04,
3611
+ "output_cost_per_image": 0.04,
2492
3612
  "litellm_provider": "vertex_ai-image-models",
2493
3613
  "mode": "image_generation",
2494
3614
  "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
2495
3615
  },
2496
3616
  "vertex_ai/imagen-3.0-fast-generate-001": {
2497
- "cost_per_image": 0.02,
3617
+ "output_cost_per_image": 0.02,
2498
3618
  "litellm_provider": "vertex_ai-image-models",
2499
3619
  "mode": "image_generation",
2500
3620
  "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
2501
3621
  },
2502
3622
  "text-embedding-004": {
2503
- "max_tokens": 3072,
2504
- "max_input_tokens": 3072,
3623
+ "max_tokens": 2048,
3624
+ "max_input_tokens": 2048,
2505
3625
  "output_vector_size": 768,
2506
- "input_cost_per_token": 0.00000000625,
3626
+ "input_cost_per_character": 0.000000025,
3627
+ "input_cost_per_token": 0.0000001,
3628
+ "output_cost_per_token": 0,
3629
+ "litellm_provider": "vertex_ai-embedding-models",
3630
+ "mode": "embedding",
3631
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
3632
+ },
3633
+ "text-embedding-005": {
3634
+ "max_tokens": 2048,
3635
+ "max_input_tokens": 2048,
3636
+ "output_vector_size": 768,
3637
+ "input_cost_per_character": 0.000000025,
3638
+ "input_cost_per_token": 0.0000001,
2507
3639
  "output_cost_per_token": 0,
2508
3640
  "litellm_provider": "vertex_ai-embedding-models",
2509
3641
  "mode": "embedding",
@@ -2513,7 +3645,8 @@
2513
3645
  "max_tokens": 2048,
2514
3646
  "max_input_tokens": 2048,
2515
3647
  "output_vector_size": 768,
2516
- "input_cost_per_token": 0.00000000625,
3648
+ "input_cost_per_character": 0.000000025,
3649
+ "input_cost_per_token": 0.0000001,
2517
3650
  "output_cost_per_token": 0,
2518
3651
  "litellm_provider": "vertex_ai-embedding-models",
2519
3652
  "mode": "embedding",
@@ -2523,7 +3656,8 @@
2523
3656
  "max_tokens": 3072,
2524
3657
  "max_input_tokens": 3072,
2525
3658
  "output_vector_size": 768,
2526
- "input_cost_per_token": 0.00000000625,
3659
+ "input_cost_per_character": 0.000000025,
3660
+ "input_cost_per_token": 0.0000001,
2527
3661
  "output_cost_per_token": 0,
2528
3662
  "litellm_provider": "vertex_ai-embedding-models",
2529
3663
  "mode": "embedding",
@@ -2533,7 +3667,8 @@
2533
3667
  "max_tokens": 3072,
2534
3668
  "max_input_tokens": 3072,
2535
3669
  "output_vector_size": 768,
2536
- "input_cost_per_token": 0.00000000625,
3670
+ "input_cost_per_character": 0.000000025,
3671
+ "input_cost_per_token": 0.0000001,
2537
3672
  "output_cost_per_token": 0,
2538
3673
  "litellm_provider": "vertex_ai-embedding-models",
2539
3674
  "mode": "embedding",
@@ -2543,7 +3678,8 @@
2543
3678
  "max_tokens": 3072,
2544
3679
  "max_input_tokens": 3072,
2545
3680
  "output_vector_size": 768,
2546
- "input_cost_per_token": 0.00000000625,
3681
+ "input_cost_per_character": 0.000000025,
3682
+ "input_cost_per_token": 0.0000001,
2547
3683
  "output_cost_per_token": 0,
2548
3684
  "litellm_provider": "vertex_ai-embedding-models",
2549
3685
  "mode": "embedding",
@@ -2553,7 +3689,8 @@
2553
3689
  "max_tokens": 3072,
2554
3690
  "max_input_tokens": 3072,
2555
3691
  "output_vector_size": 768,
2556
- "input_cost_per_token": 0.00000000625,
3692
+ "input_cost_per_character": 0.000000025,
3693
+ "input_cost_per_token": 0.0000001,
2557
3694
  "output_cost_per_token": 0,
2558
3695
  "litellm_provider": "vertex_ai-embedding-models",
2559
3696
  "mode": "embedding",
@@ -2563,7 +3700,8 @@
2563
3700
  "max_tokens": 3072,
2564
3701
  "max_input_tokens": 3072,
2565
3702
  "output_vector_size": 768,
2566
- "input_cost_per_token": 0.00000000625,
3703
+ "input_cost_per_character": 0.000000025,
3704
+ "input_cost_per_token": 0.0000001,
2567
3705
  "output_cost_per_token": 0,
2568
3706
  "litellm_provider": "vertex_ai-embedding-models",
2569
3707
  "mode": "embedding",
@@ -2650,9 +3788,36 @@
2650
3788
  "mode": "completion",
2651
3789
  "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
2652
3790
  },
3791
+ "gemini/gemini-1.5-flash-002": {
3792
+ "max_tokens": 8192,
3793
+ "max_input_tokens": 1048576,
3794
+ "max_output_tokens": 8192,
3795
+ "max_images_per_prompt": 3000,
3796
+ "max_videos_per_prompt": 10,
3797
+ "max_video_length": 1,
3798
+ "max_audio_length_hours": 8.4,
3799
+ "max_audio_per_prompt": 1,
3800
+ "max_pdf_size_mb": 30,
3801
+ "cache_read_input_token_cost": 0.00000001875,
3802
+ "cache_creation_input_token_cost": 0.000001,
3803
+ "input_cost_per_token": 0.000000075,
3804
+ "input_cost_per_token_above_128k_tokens": 0.00000015,
3805
+ "output_cost_per_token": 0.0000003,
3806
+ "output_cost_per_token_above_128k_tokens": 0.0000006,
3807
+ "litellm_provider": "gemini",
3808
+ "mode": "chat",
3809
+ "supports_system_messages": true,
3810
+ "supports_function_calling": true,
3811
+ "supports_vision": true,
3812
+ "supports_response_schema": true,
3813
+ "supports_prompt_caching": true,
3814
+ "tpm": 4000000,
3815
+ "rpm": 2000,
3816
+ "source": "https://ai.google.dev/pricing"
3817
+ },
2653
3818
  "gemini/gemini-1.5-flash-001": {
2654
3819
  "max_tokens": 8192,
2655
- "max_input_tokens": 1000000,
3820
+ "max_input_tokens": 1048576,
2656
3821
  "max_output_tokens": 8192,
2657
3822
  "max_images_per_prompt": 3000,
2658
3823
  "max_videos_per_prompt": 10,
@@ -2660,6 +3825,8 @@
2660
3825
  "max_audio_length_hours": 8.4,
2661
3826
  "max_audio_per_prompt": 1,
2662
3827
  "max_pdf_size_mb": 30,
3828
+ "cache_read_input_token_cost": 0.00000001875,
3829
+ "cache_creation_input_token_cost": 0.000001,
2663
3830
  "input_cost_per_token": 0.000000075,
2664
3831
  "input_cost_per_token_above_128k_tokens": 0.00000015,
2665
3832
  "output_cost_per_token": 0.0000003,
@@ -2671,11 +3838,13 @@
2671
3838
  "supports_vision": true,
2672
3839
  "supports_response_schema": true,
2673
3840
  "supports_prompt_caching": true,
3841
+ "tpm": 4000000,
3842
+ "rpm": 2000,
2674
3843
  "source": "https://ai.google.dev/pricing"
2675
3844
  },
2676
3845
  "gemini/gemini-1.5-flash": {
2677
3846
  "max_tokens": 8192,
2678
- "max_input_tokens": 1000000,
3847
+ "max_input_tokens": 1048576,
2679
3848
  "max_output_tokens": 8192,
2680
3849
  "max_images_per_prompt": 3000,
2681
3850
  "max_videos_per_prompt": 10,
@@ -2693,11 +3862,13 @@
2693
3862
  "supports_function_calling": true,
2694
3863
  "supports_vision": true,
2695
3864
  "supports_response_schema": true,
3865
+ "tpm": 4000000,
3866
+ "rpm": 2000,
2696
3867
  "source": "https://ai.google.dev/pricing"
2697
3868
  },
2698
3869
  "gemini/gemini-1.5-flash-latest": {
2699
3870
  "max_tokens": 8192,
2700
- "max_input_tokens": 1000000,
3871
+ "max_input_tokens": 1048576,
2701
3872
  "max_output_tokens": 8192,
2702
3873
  "max_images_per_prompt": 3000,
2703
3874
  "max_videos_per_prompt": 10,
@@ -2715,11 +3886,118 @@
2715
3886
  "supports_function_calling": true,
2716
3887
  "supports_vision": true,
2717
3888
  "supports_response_schema": true,
3889
+ "supports_prompt_caching": true,
3890
+ "tpm": 4000000,
3891
+ "rpm": 2000,
3892
+ "source": "https://ai.google.dev/pricing"
3893
+ },
3894
+ "gemini/gemini-1.5-flash-8b": {
3895
+ "max_tokens": 8192,
3896
+ "max_input_tokens": 1048576,
3897
+ "max_output_tokens": 8192,
3898
+ "max_images_per_prompt": 3000,
3899
+ "max_videos_per_prompt": 10,
3900
+ "max_video_length": 1,
3901
+ "max_audio_length_hours": 8.4,
3902
+ "max_audio_per_prompt": 1,
3903
+ "max_pdf_size_mb": 30,
3904
+ "input_cost_per_token": 0,
3905
+ "input_cost_per_token_above_128k_tokens": 0,
3906
+ "output_cost_per_token": 0,
3907
+ "output_cost_per_token_above_128k_tokens": 0,
3908
+ "litellm_provider": "gemini",
3909
+ "mode": "chat",
3910
+ "supports_system_messages": true,
3911
+ "supports_function_calling": true,
3912
+ "supports_vision": true,
3913
+ "supports_response_schema": true,
3914
+ "supports_prompt_caching": true,
3915
+ "tpm": 4000000,
3916
+ "rpm": 4000,
3917
+ "source": "https://ai.google.dev/pricing"
3918
+ },
3919
+ "gemini/gemini-1.5-flash-8b-exp-0924": {
3920
+ "max_tokens": 8192,
3921
+ "max_input_tokens": 1048576,
3922
+ "max_output_tokens": 8192,
3923
+ "max_images_per_prompt": 3000,
3924
+ "max_videos_per_prompt": 10,
3925
+ "max_video_length": 1,
3926
+ "max_audio_length_hours": 8.4,
3927
+ "max_audio_per_prompt": 1,
3928
+ "max_pdf_size_mb": 30,
3929
+ "input_cost_per_token": 0,
3930
+ "input_cost_per_token_above_128k_tokens": 0,
3931
+ "output_cost_per_token": 0,
3932
+ "output_cost_per_token_above_128k_tokens": 0,
3933
+ "litellm_provider": "gemini",
3934
+ "mode": "chat",
3935
+ "supports_system_messages": true,
3936
+ "supports_function_calling": true,
3937
+ "supports_vision": true,
3938
+ "supports_response_schema": true,
3939
+ "supports_prompt_caching": true,
3940
+ "tpm": 4000000,
3941
+ "rpm": 4000,
2718
3942
  "source": "https://ai.google.dev/pricing"
2719
3943
  },
3944
+ "gemini/gemini-exp-1114": {
3945
+ "max_tokens": 8192,
3946
+ "max_input_tokens": 1048576,
3947
+ "max_output_tokens": 8192,
3948
+ "max_images_per_prompt": 3000,
3949
+ "max_videos_per_prompt": 10,
3950
+ "max_video_length": 1,
3951
+ "max_audio_length_hours": 8.4,
3952
+ "max_audio_per_prompt": 1,
3953
+ "max_pdf_size_mb": 30,
3954
+ "input_cost_per_token": 0,
3955
+ "input_cost_per_token_above_128k_tokens": 0,
3956
+ "output_cost_per_token": 0,
3957
+ "output_cost_per_token_above_128k_tokens": 0,
3958
+ "litellm_provider": "gemini",
3959
+ "mode": "chat",
3960
+ "supports_system_messages": true,
3961
+ "supports_function_calling": true,
3962
+ "supports_vision": true,
3963
+ "supports_response_schema": true,
3964
+ "tpm": 4000000,
3965
+ "rpm": 1000,
3966
+ "source": "https://ai.google.dev/pricing",
3967
+ "metadata": {
3968
+ "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro."
3969
+ }
3970
+ },
3971
+ "gemini/gemini-exp-1206": {
3972
+ "max_tokens": 8192,
3973
+ "max_input_tokens": 2097152,
3974
+ "max_output_tokens": 8192,
3975
+ "max_images_per_prompt": 3000,
3976
+ "max_videos_per_prompt": 10,
3977
+ "max_video_length": 1,
3978
+ "max_audio_length_hours": 8.4,
3979
+ "max_audio_per_prompt": 1,
3980
+ "max_pdf_size_mb": 30,
3981
+ "input_cost_per_token": 0,
3982
+ "input_cost_per_token_above_128k_tokens": 0,
3983
+ "output_cost_per_token": 0,
3984
+ "output_cost_per_token_above_128k_tokens": 0,
3985
+ "litellm_provider": "gemini",
3986
+ "mode": "chat",
3987
+ "supports_system_messages": true,
3988
+ "supports_function_calling": true,
3989
+ "supports_vision": true,
3990
+ "supports_response_schema": true,
3991
+ "tpm": 4000000,
3992
+ "rpm": 1000,
3993
+ "source": "https://ai.google.dev/pricing",
3994
+ "metadata": {
3995
+ "notes": "Rate limits not documented for gemini-exp-1206. Assuming same as gemini-1.5-pro."
3996
+ }
3997
+ },
2720
3998
  "gemini/gemini-1.5-flash-exp-0827": {
2721
3999
  "max_tokens": 8192,
2722
- "max_input_tokens": 1000000,
4000
+ "max_input_tokens": 1048576,
2723
4001
  "max_output_tokens": 8192,
2724
4002
  "max_images_per_prompt": 3000,
2725
4003
  "max_videos_per_prompt": 10,
@@ -2737,6 +4015,8 @@
2737
4015
  "supports_function_calling": true,
2738
4016
  "supports_vision": true,
2739
4017
  "supports_response_schema": true,
4018
+ "tpm": 4000000,
4019
+ "rpm": 2000,
2740
4020
  "source": "https://ai.google.dev/pricing"
2741
4021
  },
2742
4022
  "gemini/gemini-1.5-flash-8b-exp-0827": {
@@ -2758,6 +4038,9 @@
2758
4038
  "supports_system_messages": true,
2759
4039
  "supports_function_calling": true,
2760
4040
  "supports_vision": true,
4041
+ "supports_response_schema": true,
4042
+ "tpm": 4000000,
4043
+ "rpm": 4000,
2761
4044
  "source": "https://ai.google.dev/pricing"
2762
4045
  },
2763
4046
  "gemini/gemini-pro": {
@@ -2771,7 +4054,10 @@
2771
4054
  "litellm_provider": "gemini",
2772
4055
  "mode": "chat",
2773
4056
  "supports_function_calling": true,
2774
- "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
4057
+ "rpd": 30000,
4058
+ "tpm": 120000,
4059
+ "rpm": 360,
4060
+ "source": "https://ai.google.dev/gemini-api/docs/models/gemini"
2775
4061
  },
2776
4062
  "gemini/gemini-1.5-pro": {
2777
4063
  "max_tokens": 8192,
@@ -2788,6 +4074,28 @@
2788
4074
  "supports_vision": true,
2789
4075
  "supports_tool_choice": true,
2790
4076
  "supports_response_schema": true,
4077
+ "tpm": 4000000,
4078
+ "rpm": 1000,
4079
+ "source": "https://ai.google.dev/pricing"
4080
+ },
4081
+ "gemini/gemini-1.5-pro-002": {
4082
+ "max_tokens": 8192,
4083
+ "max_input_tokens": 2097152,
4084
+ "max_output_tokens": 8192,
4085
+ "input_cost_per_token": 0.0000035,
4086
+ "input_cost_per_token_above_128k_tokens": 0.000007,
4087
+ "output_cost_per_token": 0.0000105,
4088
+ "output_cost_per_token_above_128k_tokens": 0.000021,
4089
+ "litellm_provider": "gemini",
4090
+ "mode": "chat",
4091
+ "supports_system_messages": true,
4092
+ "supports_function_calling": true,
4093
+ "supports_vision": true,
4094
+ "supports_tool_choice": true,
4095
+ "supports_response_schema": true,
4096
+ "supports_prompt_caching": true,
4097
+ "tpm": 4000000,
4098
+ "rpm": 1000,
2791
4099
  "source": "https://ai.google.dev/pricing"
2792
4100
  },
2793
4101
  "gemini/gemini-1.5-pro-001": {
@@ -2806,6 +4114,8 @@
2806
4114
  "supports_tool_choice": true,
2807
4115
  "supports_response_schema": true,
2808
4116
  "supports_prompt_caching": true,
4117
+ "tpm": 4000000,
4118
+ "rpm": 1000,
2809
4119
  "source": "https://ai.google.dev/pricing"
2810
4120
  },
2811
4121
  "gemini/gemini-1.5-pro-exp-0801": {
@@ -2823,6 +4133,8 @@
2823
4133
  "supports_vision": true,
2824
4134
  "supports_tool_choice": true,
2825
4135
  "supports_response_schema": true,
4136
+ "tpm": 4000000,
4137
+ "rpm": 1000,
2826
4138
  "source": "https://ai.google.dev/pricing"
2827
4139
  },
2828
4140
  "gemini/gemini-1.5-pro-exp-0827": {
@@ -2840,6 +4152,8 @@
2840
4152
  "supports_vision": true,
2841
4153
  "supports_tool_choice": true,
2842
4154
  "supports_response_schema": true,
4155
+ "tpm": 4000000,
4156
+ "rpm": 1000,
2843
4157
  "source": "https://ai.google.dev/pricing"
2844
4158
  },
2845
4159
  "gemini/gemini-1.5-pro-latest": {
@@ -2857,6 +4171,8 @@
2857
4171
  "supports_vision": true,
2858
4172
  "supports_tool_choice": true,
2859
4173
  "supports_response_schema": true,
4174
+ "tpm": 4000000,
4175
+ "rpm": 1000,
2860
4176
  "source": "https://ai.google.dev/pricing"
2861
4177
  },
2862
4178
  "gemini/gemini-pro-vision": {
@@ -2871,6 +4187,9 @@
2871
4187
  "mode": "chat",
2872
4188
  "supports_function_calling": true,
2873
4189
  "supports_vision": true,
4190
+ "rpd": 30000,
4191
+ "tpm": 120000,
4192
+ "rpm": 360,
2874
4193
  "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
2875
4194
  },
2876
4195
  "gemini/gemini-gemma-2-27b-it": {
@@ -2962,54 +4281,115 @@
2962
4281
  "litellm_provider": "cohere",
2963
4282
  "mode": "completion"
2964
4283
  },
2965
- "embed-english-v3.0": {
2966
- "max_tokens": 512,
2967
- "max_input_tokens": 512,
2968
- "input_cost_per_token": 0.00000010,
2969
- "output_cost_per_token": 0.00000,
4284
+ "rerank-v3.5": {
4285
+ "max_tokens": 4096,
4286
+ "max_input_tokens": 4096,
4287
+ "max_output_tokens": 4096,
4288
+ "max_query_tokens": 2048,
4289
+ "input_cost_per_token": 0.0,
4290
+ "input_cost_per_query": 0.002,
4291
+ "output_cost_per_token": 0.0,
2970
4292
  "litellm_provider": "cohere",
2971
- "mode": "embedding"
4293
+ "mode": "rerank"
4294
+ },
4295
+ "rerank-english-v3.0": {
4296
+ "max_tokens": 4096,
4297
+ "max_input_tokens": 4096,
4298
+ "max_output_tokens": 4096,
4299
+ "max_query_tokens": 2048,
4300
+ "input_cost_per_token": 0.0,
4301
+ "input_cost_per_query": 0.002,
4302
+ "output_cost_per_token": 0.0,
4303
+ "litellm_provider": "cohere",
4304
+ "mode": "rerank"
4305
+ },
4306
+ "rerank-multilingual-v3.0": {
4307
+ "max_tokens": 4096,
4308
+ "max_input_tokens": 4096,
4309
+ "max_output_tokens": 4096,
4310
+ "max_query_tokens": 2048,
4311
+ "input_cost_per_token": 0.0,
4312
+ "input_cost_per_query": 0.002,
4313
+ "output_cost_per_token": 0.0,
4314
+ "litellm_provider": "cohere",
4315
+ "mode": "rerank"
4316
+ },
4317
+ "rerank-english-v2.0": {
4318
+ "max_tokens": 4096,
4319
+ "max_input_tokens": 4096,
4320
+ "max_output_tokens": 4096,
4321
+ "max_query_tokens": 2048,
4322
+ "input_cost_per_token": 0.0,
4323
+ "input_cost_per_query": 0.002,
4324
+ "output_cost_per_token": 0.0,
4325
+ "litellm_provider": "cohere",
4326
+ "mode": "rerank"
4327
+ },
4328
+ "rerank-multilingual-v2.0": {
4329
+ "max_tokens": 4096,
4330
+ "max_input_tokens": 4096,
4331
+ "max_output_tokens": 4096,
4332
+ "max_query_tokens": 2048,
4333
+ "input_cost_per_token": 0.0,
4334
+ "input_cost_per_query": 0.002,
4335
+ "output_cost_per_token": 0.0,
4336
+ "litellm_provider": "cohere",
4337
+ "mode": "rerank"
2972
4338
  },
2973
4339
  "embed-english-light-v3.0": {
2974
- "max_tokens": 512,
2975
- "max_input_tokens": 512,
4340
+ "max_tokens": 1024,
4341
+ "max_input_tokens": 1024,
2976
4342
  "input_cost_per_token": 0.00000010,
2977
4343
  "output_cost_per_token": 0.00000,
2978
4344
  "litellm_provider": "cohere",
2979
4345
  "mode": "embedding"
2980
4346
  },
2981
4347
  "embed-multilingual-v3.0": {
2982
- "max_tokens": 512,
2983
- "max_input_tokens": 512,
4348
+ "max_tokens": 1024,
4349
+ "max_input_tokens": 1024,
2984
4350
  "input_cost_per_token": 0.00000010,
2985
4351
  "output_cost_per_token": 0.00000,
2986
4352
  "litellm_provider": "cohere",
2987
4353
  "mode": "embedding"
2988
4354
  },
2989
4355
  "embed-english-v2.0": {
2990
- "max_tokens": 512,
2991
- "max_input_tokens": 512,
4356
+ "max_tokens": 4096,
4357
+ "max_input_tokens": 4096,
2992
4358
  "input_cost_per_token": 0.00000010,
2993
4359
  "output_cost_per_token": 0.00000,
2994
4360
  "litellm_provider": "cohere",
2995
4361
  "mode": "embedding"
2996
4362
  },
2997
4363
  "embed-english-light-v2.0": {
2998
- "max_tokens": 512,
2999
- "max_input_tokens": 512,
4364
+ "max_tokens": 1024,
4365
+ "max_input_tokens": 1024,
3000
4366
  "input_cost_per_token": 0.00000010,
3001
4367
  "output_cost_per_token": 0.00000,
3002
4368
  "litellm_provider": "cohere",
3003
4369
  "mode": "embedding"
3004
4370
  },
3005
4371
  "embed-multilingual-v2.0": {
3006
- "max_tokens": 256,
3007
- "max_input_tokens": 256,
4372
+ "max_tokens": 768,
4373
+ "max_input_tokens": 768,
3008
4374
  "input_cost_per_token": 0.00000010,
3009
4375
  "output_cost_per_token": 0.00000,
3010
4376
  "litellm_provider": "cohere",
3011
4377
  "mode": "embedding"
3012
4378
  },
4379
+ "embed-english-v3.0": {
4380
+ "max_tokens": 1024,
4381
+ "max_input_tokens": 1024,
4382
+ "input_cost_per_token": 0.00000010,
4383
+ "input_cost_per_image": 0.0001,
4384
+ "output_cost_per_token": 0.00000,
4385
+ "litellm_provider": "cohere",
4386
+ "mode": "embedding",
4387
+ "supports_image_input": true,
4388
+ "supports_embedding_image_input": true,
4389
+ "metadata": {
4390
+ "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead."
4391
+ }
4392
+ },
3013
4393
  "replicate/meta/llama-2-13b": {
3014
4394
  "max_tokens": 4096,
3015
4395
  "max_input_tokens": 4096,
@@ -3127,13 +4507,24 @@
3127
4507
  "litellm_provider": "replicate",
3128
4508
  "mode": "chat"
3129
4509
  },
4510
+ "openrouter/deepseek/deepseek-chat": {
4511
+ "max_tokens": 8192,
4512
+ "max_input_tokens": 66000,
4513
+ "max_output_tokens": 4096,
4514
+ "input_cost_per_token": 0.00000014,
4515
+ "output_cost_per_token": 0.00000028,
4516
+ "litellm_provider": "openrouter",
4517
+ "supports_prompt_caching": true,
4518
+ "mode": "chat"
4519
+ },
3130
4520
  "openrouter/deepseek/deepseek-coder": {
3131
- "max_tokens": 4096,
3132
- "max_input_tokens": 32000,
4521
+ "max_tokens": 8192,
4522
+ "max_input_tokens": 66000,
3133
4523
  "max_output_tokens": 4096,
3134
4524
  "input_cost_per_token": 0.00000014,
3135
4525
  "output_cost_per_token": 0.00000028,
3136
4526
  "litellm_provider": "openrouter",
4527
+ "supports_prompt_caching": true,
3137
4528
  "mode": "chat"
3138
4529
  },
3139
4530
  "openrouter/microsoft/wizardlm-2-8x22b:nitro": {
@@ -3186,6 +4577,14 @@
3186
4577
  "supports_function_calling": true,
3187
4578
  "supports_vision": true
3188
4579
  },
4580
+ "openrouter/anthropic/claude-3-5-haiku": {
4581
+ "max_tokens": 200000,
4582
+ "input_cost_per_token": 0.000001,
4583
+ "output_cost_per_token": 0.000005,
4584
+ "litellm_provider": "openrouter",
4585
+ "mode": "chat",
4586
+ "supports_function_calling": true
4587
+ },
3189
4588
  "openrouter/anthropic/claude-3-haiku-20240307": {
3190
4589
  "max_tokens": 4096,
3191
4590
  "max_input_tokens": 200000,
@@ -3198,6 +4597,17 @@
3198
4597
  "supports_vision": true,
3199
4598
  "tool_use_system_prompt_tokens": 264
3200
4599
  },
4600
+ "openrouter/anthropic/claude-3-5-haiku-20241022": {
4601
+ "max_tokens": 8192,
4602
+ "max_input_tokens": 200000,
4603
+ "max_output_tokens": 8192,
4604
+ "input_cost_per_token": 0.000001,
4605
+ "output_cost_per_token": 0.000005,
4606
+ "litellm_provider": "openrouter",
4607
+ "mode": "chat",
4608
+ "supports_function_calling": true,
4609
+ "tool_use_system_prompt_tokens": 264
4610
+ },
3201
4611
  "openrouter/anthropic/claude-3.5-sonnet": {
3202
4612
  "max_tokens": 8192,
3203
4613
  "max_input_tokens": 200000,
@@ -3292,17 +4702,33 @@
3292
4702
  "litellm_provider": "openrouter",
3293
4703
  "mode": "chat"
3294
4704
  },
4705
+ "openrouter/openai/o1": {
4706
+ "max_tokens": 100000,
4707
+ "max_input_tokens": 200000,
4708
+ "max_output_tokens": 100000,
4709
+ "input_cost_per_token": 0.000015,
4710
+ "output_cost_per_token": 0.00006,
4711
+ "cache_read_input_token_cost": 0.0000075,
4712
+ "litellm_provider": "openrouter",
4713
+ "mode": "chat",
4714
+ "supports_function_calling": true,
4715
+ "supports_parallel_function_calling": true,
4716
+ "supports_vision": true,
4717
+ "supports_prompt_caching": true,
4718
+ "supports_system_messages": true,
4719
+ "supports_response_schema": true
4720
+ },
3295
4721
  "openrouter/openai/o1-mini": {
3296
4722
  "max_tokens": 65536,
3297
4723
  "max_input_tokens": 128000,
3298
4724
  "max_output_tokens": 65536,
3299
4725
  "input_cost_per_token": 0.000003,
3300
4726
  "output_cost_per_token": 0.000012,
3301
- "litellm_provider": "openai",
4727
+ "litellm_provider": "openrouter",
3302
4728
  "mode": "chat",
3303
4729
  "supports_function_calling": true,
3304
4730
  "supports_parallel_function_calling": true,
3305
- "supports_vision": true
4731
+ "supports_vision": false
3306
4732
  },
3307
4733
  "openrouter/openai/o1-mini-2024-09-12": {
3308
4734
  "max_tokens": 65536,
@@ -3310,11 +4736,11 @@
3310
4736
  "max_output_tokens": 65536,
3311
4737
  "input_cost_per_token": 0.000003,
3312
4738
  "output_cost_per_token": 0.000012,
3313
- "litellm_provider": "openai",
4739
+ "litellm_provider": "openrouter",
3314
4740
  "mode": "chat",
3315
4741
  "supports_function_calling": true,
3316
4742
  "supports_parallel_function_calling": true,
3317
- "supports_vision": true
4743
+ "supports_vision": false
3318
4744
  },
3319
4745
  "openrouter/openai/o1-preview": {
3320
4746
  "max_tokens": 32768,
@@ -3322,11 +4748,11 @@
3322
4748
  "max_output_tokens": 32768,
3323
4749
  "input_cost_per_token": 0.000015,
3324
4750
  "output_cost_per_token": 0.000060,
3325
- "litellm_provider": "openai",
4751
+ "litellm_provider": "openrouter",
3326
4752
  "mode": "chat",
3327
4753
  "supports_function_calling": true,
3328
4754
  "supports_parallel_function_calling": true,
3329
- "supports_vision": true
4755
+ "supports_vision": false
3330
4756
  },
3331
4757
  "openrouter/openai/o1-preview-2024-09-12": {
3332
4758
  "max_tokens": 32768,
@@ -3334,11 +4760,11 @@
3334
4760
  "max_output_tokens": 32768,
3335
4761
  "input_cost_per_token": 0.000015,
3336
4762
  "output_cost_per_token": 0.000060,
3337
- "litellm_provider": "openai",
4763
+ "litellm_provider": "openrouter",
3338
4764
  "mode": "chat",
3339
4765
  "supports_function_calling": true,
3340
4766
  "supports_parallel_function_calling": true,
3341
- "supports_vision": true
4767
+ "supports_vision": false
3342
4768
  },
3343
4769
  "openrouter/openai/gpt-4o": {
3344
4770
  "max_tokens": 4096,
@@ -3514,6 +4940,15 @@
3514
4940
  "litellm_provider": "openrouter",
3515
4941
  "mode": "chat"
3516
4942
  },
4943
+ "openrouter/qwen/qwen-2.5-coder-32b-instruct": {
4944
+ "max_tokens": 33792,
4945
+ "max_input_tokens": 33792,
4946
+ "max_output_tokens": 33792,
4947
+ "input_cost_per_token": 0.00000018,
4948
+ "output_cost_per_token": 0.00000018,
4949
+ "litellm_provider": "openrouter",
4950
+ "mode": "chat"
4951
+ },
3517
4952
  "j2-ultra": {
3518
4953
  "max_tokens": 8192,
3519
4954
  "max_input_tokens": 8192,
@@ -3719,6 +5154,22 @@
3719
5154
  "litellm_provider": "bedrock",
3720
5155
  "mode": "embedding"
3721
5156
  },
5157
+ "amazon.titan-embed-image-v1": {
5158
+ "max_tokens": 128,
5159
+ "max_input_tokens": 128,
5160
+ "output_vector_size": 1024,
5161
+ "input_cost_per_token": 0.0000008,
5162
+ "input_cost_per_image": 0.00006,
5163
+ "output_cost_per_token": 0.0,
5164
+ "litellm_provider": "bedrock",
5165
+ "supports_image_input": true,
5166
+ "supports_embedding_image_input": true,
5167
+ "mode": "embedding",
5168
+ "source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1",
5169
+ "metadata": {
5170
+ "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead."
5171
+ }
5172
+ },
3722
5173
  "mistral.mistral-7b-instruct-v0:2": {
3723
5174
  "max_tokens": 8191,
3724
5175
  "max_input_tokens": 32000,
@@ -3850,6 +5301,43 @@
3850
5301
  "mode": "chat",
3851
5302
  "supports_function_calling": true
3852
5303
  },
5304
+ "amazon.nova-micro-v1:0": {
5305
+ "max_tokens": 4096,
5306
+ "max_input_tokens": 300000,
5307
+ "max_output_tokens": 4096,
5308
+ "input_cost_per_token": 0.000000035,
5309
+ "output_cost_per_token": 0.00000014,
5310
+ "litellm_provider": "bedrock_converse",
5311
+ "mode": "chat",
5312
+ "supports_function_calling": true,
5313
+ "supports_prompt_caching": true
5314
+ },
5315
+ "amazon.nova-lite-v1:0": {
5316
+ "max_tokens": 4096,
5317
+ "max_input_tokens": 128000,
5318
+ "max_output_tokens": 4096,
5319
+ "input_cost_per_token": 0.00000006,
5320
+ "output_cost_per_token": 0.00000024,
5321
+ "litellm_provider": "bedrock_converse",
5322
+ "mode": "chat",
5323
+ "supports_function_calling": true,
5324
+ "supports_vision": true,
5325
+ "supports_pdf_input": true,
5326
+ "supports_prompt_caching": true
5327
+ },
5328
+ "amazon.nova-pro-v1:0": {
5329
+ "max_tokens": 4096,
5330
+ "max_input_tokens": 300000,
5331
+ "max_output_tokens": 4096,
5332
+ "input_cost_per_token": 0.0000008,
5333
+ "output_cost_per_token": 0.0000032,
5334
+ "litellm_provider": "bedrock_converse",
5335
+ "mode": "chat",
5336
+ "supports_function_calling": true,
5337
+ "supports_vision": true,
5338
+ "supports_pdf_input": true,
5339
+ "supports_prompt_caching": true
5340
+ },
3853
5341
  "anthropic.claude-3-sonnet-20240229-v1:0": {
3854
5342
  "max_tokens": 4096,
3855
5343
  "max_input_tokens": 200000,
@@ -3862,9 +5350,9 @@
3862
5350
  "supports_vision": true
3863
5351
  },
3864
5352
  "anthropic.claude-3-5-sonnet-20240620-v1:0": {
3865
- "max_tokens": 8192,
5353
+ "max_tokens": 4096,
3866
5354
  "max_input_tokens": 200000,
3867
- "max_output_tokens": 8192,
5355
+ "max_output_tokens": 4096,
3868
5356
  "input_cost_per_token": 0.000003,
3869
5357
  "output_cost_per_token": 0.000015,
3870
5358
  "litellm_provider": "bedrock",
@@ -3872,6 +5360,19 @@
3872
5360
  "supports_function_calling": true,
3873
5361
  "supports_vision": true
3874
5362
  },
5363
+ "anthropic.claude-3-5-sonnet-20241022-v2:0": {
5364
+ "max_tokens": 8192,
5365
+ "max_input_tokens": 200000,
5366
+ "max_output_tokens": 8192,
5367
+ "input_cost_per_token": 0.000003,
5368
+ "output_cost_per_token": 0.000015,
5369
+ "litellm_provider": "bedrock",
5370
+ "mode": "chat",
5371
+ "supports_function_calling": true,
5372
+ "supports_vision": true,
5373
+ "supports_assistant_prefill": true,
5374
+ "supports_prompt_caching": true
5375
+ },
3875
5376
  "anthropic.claude-3-haiku-20240307-v1:0": {
3876
5377
  "max_tokens": 4096,
3877
5378
  "max_input_tokens": 200000,
@@ -3883,6 +5384,18 @@
3883
5384
  "supports_function_calling": true,
3884
5385
  "supports_vision": true
3885
5386
  },
5387
+ "anthropic.claude-3-5-haiku-20241022-v1:0": {
5388
+ "max_tokens": 4096,
5389
+ "max_input_tokens": 200000,
5390
+ "max_output_tokens": 4096,
5391
+ "input_cost_per_token": 0.000001,
5392
+ "output_cost_per_token": 0.000005,
5393
+ "litellm_provider": "bedrock",
5394
+ "mode": "chat",
5395
+ "supports_assistant_prefill": true,
5396
+ "supports_function_calling": true,
5397
+ "supports_prompt_caching": true
5398
+ },
3886
5399
  "anthropic.claude-3-opus-20240229-v1:0": {
3887
5400
  "max_tokens": 4096,
3888
5401
  "max_input_tokens": 200000,
@@ -3906,6 +5419,17 @@
3906
5419
  "supports_vision": true
3907
5420
  },
3908
5421
  "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
5422
+ "max_tokens": 4096,
5423
+ "max_input_tokens": 200000,
5424
+ "max_output_tokens": 4096,
5425
+ "input_cost_per_token": 0.000003,
5426
+ "output_cost_per_token": 0.000015,
5427
+ "litellm_provider": "bedrock",
5428
+ "mode": "chat",
5429
+ "supports_function_calling": true,
5430
+ "supports_vision": true
5431
+ },
5432
+ "us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
3909
5433
  "max_tokens": 8192,
3910
5434
  "max_input_tokens": 200000,
3911
5435
  "max_output_tokens": 8192,
@@ -3914,7 +5438,8 @@
3914
5438
  "litellm_provider": "bedrock",
3915
5439
  "mode": "chat",
3916
5440
  "supports_function_calling": true,
3917
- "supports_vision": true
5441
+ "supports_vision": true,
5442
+ "supports_assistant_prefill": true
3918
5443
  },
3919
5444
  "us.anthropic.claude-3-haiku-20240307-v1:0": {
3920
5445
  "max_tokens": 4096,
@@ -3927,6 +5452,17 @@
3927
5452
  "supports_function_calling": true,
3928
5453
  "supports_vision": true
3929
5454
  },
5455
+ "us.anthropic.claude-3-5-haiku-20241022-v1:0": {
5456
+ "max_tokens": 4096,
5457
+ "max_input_tokens": 200000,
5458
+ "max_output_tokens": 4096,
5459
+ "input_cost_per_token": 0.000001,
5460
+ "output_cost_per_token": 0.000005,
5461
+ "litellm_provider": "bedrock",
5462
+ "mode": "chat",
5463
+ "supports_assistant_prefill": true,
5464
+ "supports_function_calling": true
5465
+ },
3930
5466
  "us.anthropic.claude-3-opus-20240229-v1:0": {
3931
5467
  "max_tokens": 4096,
3932
5468
  "max_input_tokens": 200000,
@@ -3939,9 +5475,9 @@
3939
5475
  "supports_vision": true
3940
5476
  },
3941
5477
  "eu.anthropic.claude-3-sonnet-20240229-v1:0": {
3942
- "max_tokens": 8192,
5478
+ "max_tokens": 4096,
3943
5479
  "max_input_tokens": 200000,
3944
- "max_output_tokens": 8192,
5480
+ "max_output_tokens": 4096,
3945
5481
  "input_cost_per_token": 0.000003,
3946
5482
  "output_cost_per_token": 0.000015,
3947
5483
  "litellm_provider": "bedrock",
@@ -3950,6 +5486,17 @@
3950
5486
  "supports_vision": true
3951
5487
  },
3952
5488
  "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
5489
+ "max_tokens": 4096,
5490
+ "max_input_tokens": 200000,
5491
+ "max_output_tokens": 4096,
5492
+ "input_cost_per_token": 0.000003,
5493
+ "output_cost_per_token": 0.000015,
5494
+ "litellm_provider": "bedrock",
5495
+ "mode": "chat",
5496
+ "supports_function_calling": true,
5497
+ "supports_vision": true
5498
+ },
5499
+ "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
3953
5500
  "max_tokens": 8192,
3954
5501
  "max_input_tokens": 200000,
3955
5502
  "max_output_tokens": 8192,
@@ -3958,7 +5505,8 @@
3958
5505
  "litellm_provider": "bedrock",
3959
5506
  "mode": "chat",
3960
5507
  "supports_function_calling": true,
3961
- "supports_vision": true
5508
+ "supports_vision": true,
5509
+ "supports_assistant_prefill": true
3962
5510
  },
3963
5511
  "eu.anthropic.claude-3-haiku-20240307-v1:0": {
3964
5512
  "max_tokens": 4096,
@@ -3971,6 +5519,16 @@
3971
5519
  "supports_function_calling": true,
3972
5520
  "supports_vision": true
3973
5521
  },
5522
+ "eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
5523
+ "max_tokens": 4096,
5524
+ "max_input_tokens": 200000,
5525
+ "max_output_tokens": 4096,
5526
+ "input_cost_per_token": 0.000001,
5527
+ "output_cost_per_token": 0.000005,
5528
+ "litellm_provider": "bedrock",
5529
+ "mode": "chat",
5530
+ "supports_function_calling": true
5531
+ },
3974
5532
  "eu.anthropic.claude-3-opus-20240229-v1:0": {
3975
5533
  "max_tokens": 4096,
3976
5534
  "max_input_tokens": 200000,
@@ -4538,6 +6096,15 @@
4538
6096
  "litellm_provider": "bedrock",
4539
6097
  "mode": "embedding"
4540
6098
  },
6099
+ "meta.llama3-3-70b-instruct-v1:0": {
6100
+ "max_tokens": 4096,
6101
+ "max_input_tokens": 128000,
6102
+ "max_output_tokens": 4096,
6103
+ "input_cost_per_token": 0.00000072,
6104
+ "output_cost_per_token": 0.00000072,
6105
+ "litellm_provider": "bedrock_converse",
6106
+ "mode": "chat"
6107
+ },
4541
6108
  "meta.llama2-13b-chat-v1": {
4542
6109
  "max_tokens": 4096,
4543
6110
  "max_input_tokens": 4096,
@@ -4655,79 +6222,222 @@
4655
6222
  "litellm_provider": "bedrock",
4656
6223
  "mode": "chat"
4657
6224
  },
4658
- "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": {
4659
- "max_tokens": 8192,
4660
- "max_input_tokens": 8192,
4661
- "max_output_tokens": 8192,
4662
- "input_cost_per_token": 0.00000318,
4663
- "output_cost_per_token": 0.0000042,
6225
+ "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": {
6226
+ "max_tokens": 8192,
6227
+ "max_input_tokens": 8192,
6228
+ "max_output_tokens": 8192,
6229
+ "input_cost_per_token": 0.00000318,
6230
+ "output_cost_per_token": 0.0000042,
6231
+ "litellm_provider": "bedrock",
6232
+ "mode": "chat"
6233
+ },
6234
+ "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": {
6235
+ "max_tokens": 8192,
6236
+ "max_input_tokens": 8192,
6237
+ "max_output_tokens": 8192,
6238
+ "input_cost_per_token": 0.00000305,
6239
+ "output_cost_per_token": 0.00000403,
6240
+ "litellm_provider": "bedrock",
6241
+ "mode": "chat"
6242
+ },
6243
+ "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": {
6244
+ "max_tokens": 8192,
6245
+ "max_input_tokens": 8192,
6246
+ "max_output_tokens": 8192,
6247
+ "input_cost_per_token": 0.00000286,
6248
+ "output_cost_per_token": 0.00000378,
6249
+ "litellm_provider": "bedrock",
6250
+ "mode": "chat"
6251
+ },
6252
+ "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": {
6253
+ "max_tokens": 8192,
6254
+ "max_input_tokens": 8192,
6255
+ "max_output_tokens": 8192,
6256
+ "input_cost_per_token": 0.00000345,
6257
+ "output_cost_per_token": 0.00000455,
6258
+ "litellm_provider": "bedrock",
6259
+ "mode": "chat"
6260
+ },
6261
+ "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": {
6262
+ "max_tokens": 8192,
6263
+ "max_input_tokens": 8192,
6264
+ "max_output_tokens": 8192,
6265
+ "input_cost_per_token": 0.00000445,
6266
+ "output_cost_per_token": 0.00000588,
6267
+ "litellm_provider": "bedrock",
6268
+ "mode": "chat"
6269
+ },
6270
+ "meta.llama3-1-8b-instruct-v1:0": {
6271
+ "max_tokens": 128000,
6272
+ "max_input_tokens": 128000,
6273
+ "max_output_tokens": 2048,
6274
+ "input_cost_per_token": 0.00000022,
6275
+ "output_cost_per_token": 0.00000022,
6276
+ "litellm_provider": "bedrock",
6277
+ "mode": "chat",
6278
+ "supports_function_calling": true,
6279
+ "supports_tool_choice": false
6280
+ },
6281
+ "us.meta.llama3-1-8b-instruct-v1:0": {
6282
+ "max_tokens": 128000,
6283
+ "max_input_tokens": 128000,
6284
+ "max_output_tokens": 2048,
6285
+ "input_cost_per_token": 0.00000022,
6286
+ "output_cost_per_token": 0.00000022,
6287
+ "litellm_provider": "bedrock",
6288
+ "mode": "chat",
6289
+ "supports_function_calling": true,
6290
+ "supports_tool_choice": false
6291
+ },
6292
+ "meta.llama3-1-70b-instruct-v1:0": {
6293
+ "max_tokens": 128000,
6294
+ "max_input_tokens": 128000,
6295
+ "max_output_tokens": 2048,
6296
+ "input_cost_per_token": 0.00000099,
6297
+ "output_cost_per_token": 0.00000099,
6298
+ "litellm_provider": "bedrock",
6299
+ "mode": "chat",
6300
+ "supports_function_calling": true,
6301
+ "supports_tool_choice": false
6302
+ },
6303
+ "us.meta.llama3-1-70b-instruct-v1:0": {
6304
+ "max_tokens": 128000,
6305
+ "max_input_tokens": 128000,
6306
+ "max_output_tokens": 2048,
6307
+ "input_cost_per_token": 0.00000099,
6308
+ "output_cost_per_token": 0.00000099,
6309
+ "litellm_provider": "bedrock",
6310
+ "mode": "chat",
6311
+ "supports_function_calling": true,
6312
+ "supports_tool_choice": false
6313
+ },
6314
+ "meta.llama3-1-405b-instruct-v1:0": {
6315
+ "max_tokens": 128000,
6316
+ "max_input_tokens": 128000,
6317
+ "max_output_tokens": 4096,
6318
+ "input_cost_per_token": 0.00000532,
6319
+ "output_cost_per_token": 0.000016,
6320
+ "litellm_provider": "bedrock",
6321
+ "mode": "chat",
6322
+ "supports_function_calling": true,
6323
+ "supports_tool_choice": false
6324
+ },
6325
+ "us.meta.llama3-1-405b-instruct-v1:0": {
6326
+ "max_tokens": 128000,
6327
+ "max_input_tokens": 128000,
6328
+ "max_output_tokens": 4096,
6329
+ "input_cost_per_token": 0.00000532,
6330
+ "output_cost_per_token": 0.000016,
6331
+ "litellm_provider": "bedrock",
6332
+ "mode": "chat",
6333
+ "supports_function_calling": true,
6334
+ "supports_tool_choice": false
6335
+ },
6336
+ "meta.llama3-2-1b-instruct-v1:0": {
6337
+ "max_tokens": 128000,
6338
+ "max_input_tokens": 128000,
6339
+ "max_output_tokens": 4096,
6340
+ "input_cost_per_token": 0.0000001,
6341
+ "output_cost_per_token": 0.0000001,
6342
+ "litellm_provider": "bedrock",
6343
+ "mode": "chat",
6344
+ "supports_function_calling": true,
6345
+ "supports_tool_choice": false
6346
+ },
6347
+ "us.meta.llama3-2-1b-instruct-v1:0": {
6348
+ "max_tokens": 128000,
6349
+ "max_input_tokens": 128000,
6350
+ "max_output_tokens": 4096,
6351
+ "input_cost_per_token": 0.0000001,
6352
+ "output_cost_per_token": 0.0000001,
6353
+ "litellm_provider": "bedrock",
6354
+ "mode": "chat",
6355
+ "supports_function_calling": true,
6356
+ "supports_tool_choice": false
6357
+ },
6358
+ "eu.meta.llama3-2-1b-instruct-v1:0": {
6359
+ "max_tokens": 128000,
6360
+ "max_input_tokens": 128000,
6361
+ "max_output_tokens": 4096,
6362
+ "input_cost_per_token": 0.00000013,
6363
+ "output_cost_per_token": 0.00000013,
4664
6364
  "litellm_provider": "bedrock",
4665
- "mode": "chat"
6365
+ "mode": "chat",
6366
+ "supports_function_calling": true,
6367
+ "supports_tool_choice": false
4666
6368
  },
4667
- "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": {
4668
- "max_tokens": 8192,
4669
- "max_input_tokens": 8192,
4670
- "max_output_tokens": 8192,
4671
- "input_cost_per_token": 0.00000305,
4672
- "output_cost_per_token": 0.00000403,
6369
+ "meta.llama3-2-3b-instruct-v1:0": {
6370
+ "max_tokens": 128000,
6371
+ "max_input_tokens": 128000,
6372
+ "max_output_tokens": 4096,
6373
+ "input_cost_per_token": 0.00000015,
6374
+ "output_cost_per_token": 0.00000015,
4673
6375
  "litellm_provider": "bedrock",
4674
- "mode": "chat"
6376
+ "mode": "chat",
6377
+ "supports_function_calling": true,
6378
+ "supports_tool_choice": false
4675
6379
  },
4676
- "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": {
4677
- "max_tokens": 8192,
4678
- "max_input_tokens": 8192,
4679
- "max_output_tokens": 8192,
4680
- "input_cost_per_token": 0.00000286,
4681
- "output_cost_per_token": 0.00000378,
6380
+ "us.meta.llama3-2-3b-instruct-v1:0": {
6381
+ "max_tokens": 128000,
6382
+ "max_input_tokens": 128000,
6383
+ "max_output_tokens": 4096,
6384
+ "input_cost_per_token": 0.00000015,
6385
+ "output_cost_per_token": 0.00000015,
4682
6386
  "litellm_provider": "bedrock",
4683
- "mode": "chat"
6387
+ "mode": "chat",
6388
+ "supports_function_calling": true,
6389
+ "supports_tool_choice": false
4684
6390
  },
4685
- "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": {
4686
- "max_tokens": 8192,
4687
- "max_input_tokens": 8192,
4688
- "max_output_tokens": 8192,
4689
- "input_cost_per_token": 0.00000345,
4690
- "output_cost_per_token": 0.00000455,
6391
+ "eu.meta.llama3-2-3b-instruct-v1:0": {
6392
+ "max_tokens": 128000,
6393
+ "max_input_tokens": 128000,
6394
+ "max_output_tokens": 4096,
6395
+ "input_cost_per_token": 0.00000019,
6396
+ "output_cost_per_token": 0.00000019,
4691
6397
  "litellm_provider": "bedrock",
4692
- "mode": "chat"
6398
+ "mode": "chat",
6399
+ "supports_function_calling": true,
6400
+ "supports_tool_choice": false
4693
6401
  },
4694
- "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": {
4695
- "max_tokens": 8192,
4696
- "max_input_tokens": 8192,
4697
- "max_output_tokens": 8192,
4698
- "input_cost_per_token": 0.00000445,
4699
- "output_cost_per_token": 0.00000588,
6402
+ "meta.llama3-2-11b-instruct-v1:0": {
6403
+ "max_tokens": 128000,
6404
+ "max_input_tokens": 128000,
6405
+ "max_output_tokens": 4096,
6406
+ "input_cost_per_token": 0.00000035,
6407
+ "output_cost_per_token": 0.00000035,
4700
6408
  "litellm_provider": "bedrock",
4701
- "mode": "chat"
6409
+ "mode": "chat",
6410
+ "supports_function_calling": true,
6411
+ "supports_tool_choice": false
4702
6412
  },
4703
- "meta.llama3-1-8b-instruct-v1:0": {
6413
+ "us.meta.llama3-2-11b-instruct-v1:0": {
4704
6414
  "max_tokens": 128000,
4705
6415
  "max_input_tokens": 128000,
4706
- "max_output_tokens": 2048,
4707
- "input_cost_per_token": 0.00000022,
4708
- "output_cost_per_token": 0.00000022,
6416
+ "max_output_tokens": 4096,
6417
+ "input_cost_per_token": 0.00000035,
6418
+ "output_cost_per_token": 0.00000035,
4709
6419
  "litellm_provider": "bedrock",
4710
6420
  "mode": "chat",
4711
6421
  "supports_function_calling": true,
4712
6422
  "supports_tool_choice": false
4713
6423
  },
4714
- "meta.llama3-1-70b-instruct-v1:0": {
6424
+ "meta.llama3-2-90b-instruct-v1:0": {
4715
6425
  "max_tokens": 128000,
4716
6426
  "max_input_tokens": 128000,
4717
- "max_output_tokens": 2048,
4718
- "input_cost_per_token": 0.00000099,
4719
- "output_cost_per_token": 0.00000099,
6427
+ "max_output_tokens": 4096,
6428
+ "input_cost_per_token": 0.000002,
6429
+ "output_cost_per_token": 0.000002,
4720
6430
  "litellm_provider": "bedrock",
4721
6431
  "mode": "chat",
4722
6432
  "supports_function_calling": true,
4723
6433
  "supports_tool_choice": false
4724
6434
  },
4725
- "meta.llama3-1-405b-instruct-v1:0": {
6435
+ "us.meta.llama3-2-90b-instruct-v1:0": {
4726
6436
  "max_tokens": 128000,
4727
6437
  "max_input_tokens": 128000,
4728
6438
  "max_output_tokens": 4096,
4729
- "input_cost_per_token": 0.00000532,
4730
- "output_cost_per_token": 0.000016,
6439
+ "input_cost_per_token": 0.000002,
6440
+ "output_cost_per_token": 0.000002,
4731
6441
  "litellm_provider": "bedrock",
4732
6442
  "mode": "chat",
4733
6443
  "supports_function_calling": true,
@@ -4775,6 +6485,20 @@
4775
6485
  "litellm_provider": "bedrock",
4776
6486
  "mode": "image_generation"
4777
6487
  },
6488
+ "stability.sd3-large-v1:0": {
6489
+ "max_tokens": 77,
6490
+ "max_input_tokens": 77,
6491
+ "output_cost_per_image": 0.08,
6492
+ "litellm_provider": "bedrock",
6493
+ "mode": "image_generation"
6494
+ },
6495
+ "stability.stable-image-ultra-v1:0": {
6496
+ "max_tokens": 77,
6497
+ "max_input_tokens": 77,
6498
+ "output_cost_per_image": 0.14,
6499
+ "litellm_provider": "bedrock",
6500
+ "mode": "image_generation"
6501
+ },
4778
6502
  "sagemaker/meta-textgeneration-llama-2-7b": {
4779
6503
  "max_tokens": 4096,
4780
6504
  "max_input_tokens": 4096,
@@ -4832,50 +6556,99 @@
4832
6556
  "together-ai-up-to-4b": {
4833
6557
  "input_cost_per_token": 0.0000001,
4834
6558
  "output_cost_per_token": 0.0000001,
4835
- "litellm_provider": "together_ai"
6559
+ "litellm_provider": "together_ai",
6560
+ "mode": "chat"
4836
6561
  },
4837
6562
  "together-ai-4.1b-8b": {
4838
6563
  "input_cost_per_token": 0.0000002,
4839
6564
  "output_cost_per_token": 0.0000002,
4840
- "litellm_provider": "together_ai"
6565
+ "litellm_provider": "together_ai",
6566
+ "mode": "chat"
4841
6567
  },
4842
6568
  "together-ai-8.1b-21b": {
4843
6569
  "max_tokens": 1000,
4844
6570
  "input_cost_per_token": 0.0000003,
4845
6571
  "output_cost_per_token": 0.0000003,
4846
- "litellm_provider": "together_ai"
6572
+ "litellm_provider": "together_ai",
6573
+ "mode": "chat"
4847
6574
  },
4848
6575
  "together-ai-21.1b-41b": {
4849
6576
  "input_cost_per_token": 0.0000008,
4850
6577
  "output_cost_per_token": 0.0000008,
4851
- "litellm_provider": "together_ai"
6578
+ "litellm_provider": "together_ai",
6579
+ "mode": "chat"
4852
6580
  },
4853
6581
  "together-ai-41.1b-80b": {
4854
6582
  "input_cost_per_token": 0.0000009,
4855
6583
  "output_cost_per_token": 0.0000009,
4856
- "litellm_provider": "together_ai"
6584
+ "litellm_provider": "together_ai",
6585
+ "mode": "chat"
4857
6586
  },
4858
6587
  "together-ai-81.1b-110b": {
4859
6588
  "input_cost_per_token": 0.0000018,
4860
6589
  "output_cost_per_token": 0.0000018,
4861
- "litellm_provider": "together_ai"
6590
+ "litellm_provider": "together_ai",
6591
+ "mode": "chat"
6592
+ },
6593
+ "together-ai-embedding-up-to-150m": {
6594
+ "input_cost_per_token": 0.000000008,
6595
+ "output_cost_per_token": 0.0,
6596
+ "litellm_provider": "together_ai",
6597
+ "mode": "embedding"
6598
+ },
6599
+ "together-ai-embedding-151m-to-350m": {
6600
+ "input_cost_per_token": 0.000000016,
6601
+ "output_cost_per_token": 0.0,
6602
+ "litellm_provider": "together_ai",
6603
+ "mode": "embedding"
6604
+ },
6605
+ "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
6606
+ "input_cost_per_token": 0.00000018,
6607
+ "output_cost_per_token": 0.00000018,
6608
+ "litellm_provider": "together_ai",
6609
+ "supports_function_calling": true,
6610
+ "supports_parallel_function_calling": true,
6611
+ "supports_response_schema": true,
6612
+ "mode": "chat"
6613
+ },
6614
+ "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
6615
+ "input_cost_per_token": 0.00000088,
6616
+ "output_cost_per_token": 0.00000088,
6617
+ "litellm_provider": "together_ai",
6618
+ "supports_function_calling": true,
6619
+ "supports_parallel_function_calling": true,
6620
+ "supports_response_schema": true,
6621
+ "mode": "chat"
6622
+ },
6623
+ "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
6624
+ "input_cost_per_token": 0.0000035,
6625
+ "output_cost_per_token": 0.0000035,
6626
+ "litellm_provider": "together_ai",
6627
+ "supports_function_calling": true,
6628
+ "supports_parallel_function_calling": true,
6629
+ "mode": "chat"
4862
6630
  },
4863
6631
  "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
4864
6632
  "input_cost_per_token": 0.0000006,
4865
6633
  "output_cost_per_token": 0.0000006,
4866
6634
  "litellm_provider": "together_ai",
4867
6635
  "supports_function_calling": true,
4868
- "supports_parallel_function_calling": true
6636
+ "supports_parallel_function_calling": true,
6637
+ "supports_response_schema": true,
6638
+ "mode": "chat"
4869
6639
  },
4870
6640
  "together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
4871
6641
  "litellm_provider": "together_ai",
4872
6642
  "supports_function_calling": true,
4873
- "supports_parallel_function_calling": true
6643
+ "supports_parallel_function_calling": true,
6644
+ "supports_response_schema": true,
6645
+ "mode": "chat"
4874
6646
  },
4875
6647
  "together_ai/togethercomputer/CodeLlama-34b-Instruct": {
4876
6648
  "litellm_provider": "together_ai",
4877
6649
  "supports_function_calling": true,
4878
- "supports_parallel_function_calling": true
6650
+ "supports_parallel_function_calling": true,
6651
+ "mode": "chat"
4879
6652
  },
4880
6653
  "ollama/codegemma": {
4881
6654
  "max_tokens": 8192,
@@ -4953,7 +6726,7 @@
4953
6726
  "input_cost_per_token": 0.0,
4954
6727
  "output_cost_per_token": 0.0,
4955
6728
  "litellm_provider": "ollama",
4956
- "mode": "completion"
6729
+ "mode": "chat"
4957
6730
  },
4958
6731
  "ollama/llama2:7b": {
4959
6732
  "max_tokens": 4096,
@@ -4962,7 +6735,7 @@
4962
6735
  "input_cost_per_token": 0.0,
4963
6736
  "output_cost_per_token": 0.0,
4964
6737
  "litellm_provider": "ollama",
4965
- "mode": "completion"
6738
+ "mode": "chat"
4966
6739
  },
4967
6740
  "ollama/llama2:13b": {
4968
6741
  "max_tokens": 4096,
@@ -4971,7 +6744,7 @@
4971
6744
  "input_cost_per_token": 0.0,
4972
6745
  "output_cost_per_token": 0.0,
4973
6746
  "litellm_provider": "ollama",
4974
- "mode": "completion"
6747
+ "mode": "chat"
4975
6748
  },
4976
6749
  "ollama/llama2:70b": {
4977
6750
  "max_tokens": 4096,
@@ -4980,7 +6753,7 @@
4980
6753
  "input_cost_per_token": 0.0,
4981
6754
  "output_cost_per_token": 0.0,
4982
6755
  "litellm_provider": "ollama",
4983
- "mode": "completion"
6756
+ "mode": "chat"
4984
6757
  },
4985
6758
  "ollama/llama2-uncensored": {
4986
6759
  "max_tokens": 4096,
@@ -5271,6 +7044,17 @@
5271
7044
  "litellm_provider": "deepinfra",
5272
7045
  "mode": "chat"
5273
7046
  },
7047
+ "deepinfra/meta-llama/Meta-Llama-3.1-405B-Instruct": {
7048
+ "max_tokens": 32768,
7049
+ "max_input_tokens": 32768,
7050
+ "max_output_tokens": 32768,
7051
+ "input_cost_per_token": 0.0000009,
7052
+ "output_cost_per_token": 0.0000009,
7053
+ "litellm_provider": "deepinfra",
7054
+ "mode": "chat",
7055
+ "supports_function_calling": true,
7056
+ "supports_parallel_function_calling": true
7057
+ },
5274
7058
  "deepinfra/01-ai/Yi-34B-200K": {
5275
7059
  "max_tokens": 4096,
5276
7060
  "max_input_tokens": 200000,
@@ -5473,6 +7257,56 @@
5473
7257
  "litellm_provider": "perplexity",
5474
7258
  "mode": "chat"
5475
7259
  },
7260
+ "fireworks_ai/accounts/fireworks/models/llama-v3p2-1b-instruct": {
7261
+ "max_tokens": 16384,
7262
+ "max_input_tokens": 16384,
7263
+ "max_output_tokens": 16384,
7264
+ "input_cost_per_token": 0.0000001,
7265
+ "output_cost_per_token": 0.0000001,
7266
+ "litellm_provider": "fireworks_ai",
7267
+ "mode": "chat",
7268
+ "supports_function_calling": true,
7269
+ "supports_response_schema": true,
7270
+ "source": "https://fireworks.ai/pricing"
7271
+ },
7272
+ "fireworks_ai/accounts/fireworks/models/llama-v3p2-3b-instruct": {
7273
+ "max_tokens": 16384,
7274
+ "max_input_tokens": 16384,
7275
+ "max_output_tokens": 16384,
7276
+ "input_cost_per_token": 0.0000001,
7277
+ "output_cost_per_token": 0.0000001,
7278
+ "litellm_provider": "fireworks_ai",
7279
+ "mode": "chat",
7280
+ "supports_function_calling": true,
7281
+ "supports_response_schema": true,
7282
+ "source": "https://fireworks.ai/pricing"
7283
+ },
7284
+ "fireworks_ai/accounts/fireworks/models/llama-v3p2-11b-vision-instruct": {
7285
+ "max_tokens": 16384,
7286
+ "max_input_tokens": 16384,
7287
+ "max_output_tokens": 16384,
7288
+ "input_cost_per_token": 0.0000002,
7289
+ "output_cost_per_token": 0.0000002,
7290
+ "litellm_provider": "fireworks_ai",
7291
+ "mode": "chat",
7292
+ "supports_function_calling": true,
7293
+ "supports_vision": true,
7294
+ "supports_response_schema": true,
7295
+ "source": "https://fireworks.ai/pricing"
7296
+ },
7297
+ "accounts/fireworks/models/llama-v3p2-90b-vision-instruct": {
7298
+ "max_tokens": 16384,
7299
+ "max_input_tokens": 16384,
7300
+ "max_output_tokens": 16384,
7301
+ "input_cost_per_token": 0.0000009,
7302
+ "output_cost_per_token": 0.0000009,
7303
+ "litellm_provider": "fireworks_ai",
7304
+ "mode": "chat",
7305
+ "supports_function_calling": true,
7306
+ "supports_vision": true,
7307
+ "supports_response_schema": true,
7308
+ "source": "https://fireworks.ai/pricing"
7309
+ },
5476
7310
  "fireworks_ai/accounts/fireworks/models/firefunction-v2": {
5477
7311
  "max_tokens": 8192,
5478
7312
  "max_input_tokens": 8192,
@@ -5482,6 +7316,7 @@
5482
7316
  "litellm_provider": "fireworks_ai",
5483
7317
  "mode": "chat",
5484
7318
  "supports_function_calling": true,
7319
+ "supports_response_schema": true,
5485
7320
  "source": "https://fireworks.ai/pricing"
5486
7321
  },
5487
7322
  "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": {
@@ -5493,6 +7328,7 @@
5493
7328
  "litellm_provider": "fireworks_ai",
5494
7329
  "mode": "chat",
5495
7330
  "supports_function_calling": true,
7331
+ "supports_response_schema": true,
5496
7332
  "source": "https://fireworks.ai/pricing"
5497
7333
  },
5498
7334
  "fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct": {
@@ -5504,6 +7340,19 @@
5504
7340
  "litellm_provider": "fireworks_ai",
5505
7341
  "mode": "chat",
5506
7342
  "supports_function_calling": true,
7343
+ "supports_response_schema": true,
7344
+ "source": "https://fireworks.ai/pricing"
7345
+ },
7346
+ "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct": {
7347
+ "max_tokens": 4096,
7348
+ "max_input_tokens": 4096,
7349
+ "max_output_tokens": 4096,
7350
+ "input_cost_per_token": 0.0000009,
7351
+ "output_cost_per_token": 0.0000009,
7352
+ "litellm_provider": "fireworks_ai",
7353
+ "mode": "chat",
7354
+ "supports_function_calling": true,
7355
+ "supports_response_schema": true,
5507
7356
  "source": "https://fireworks.ai/pricing"
5508
7357
  },
5509
7358
  "fireworks_ai/accounts/fireworks/models/yi-large": {
@@ -5515,6 +7364,7 @@
5515
7364
  "litellm_provider": "fireworks_ai",
5516
7365
  "mode": "chat",
5517
7366
  "supports_function_calling": true,
7367
+ "supports_response_schema": true,
5518
7368
  "source": "https://fireworks.ai/pricing"
5519
7369
  },
5520
7370
  "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": {
@@ -5526,6 +7376,7 @@
5526
7376
  "litellm_provider": "fireworks_ai",
5527
7377
  "mode": "chat",
5528
7378
  "supports_function_calling": true,
7379
+ "supports_response_schema": true,
5529
7380
  "source": "https://fireworks.ai/pricing"
5530
7381
  },
5531
7382
  "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": {
@@ -5593,6 +7444,11 @@
5593
7444
  "output_cost_per_token": 0.0000012,
5594
7445
  "litellm_provider": "fireworks_ai"
5595
7446
  },
7447
+ "fireworks-ai-default": {
7448
+ "input_cost_per_token": 0.0,
7449
+ "output_cost_per_token": 0.0,
7450
+ "litellm_provider": "fireworks_ai"
7451
+ },
5596
7452
  "fireworks-ai-embedding-up-to-150m": {
5597
7453
  "input_cost_per_token": 0.000000008,
5598
7454
  "output_cost_per_token": 0.000000,
@@ -5813,6 +7669,14 @@
5813
7669
  "litellm_provider": "voyage",
5814
7670
  "mode": "embedding"
5815
7671
  },
7672
+ "voyage/voyage-finance-2": {
7673
+ "max_tokens": 4000,
7674
+ "max_input_tokens": 4000,
7675
+ "input_cost_per_token": 0.00000012,
7676
+ "output_cost_per_token": 0.000000,
7677
+ "litellm_provider": "voyage",
7678
+ "mode": "embedding"
7679
+ },
5816
7680
  "databricks/databricks-meta-llama-3-1-405b-instruct": {
5817
7681
  "max_tokens": 128000,
5818
7682
  "max_input_tokens": 128000,
@@ -5839,6 +7703,19 @@
5839
7703
  "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
5840
7704
  "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
5841
7705
  },
7706
+ "databricks/meta-llama-3.3-70b-instruct": {
7707
+ "max_tokens": 128000,
7708
+ "max_input_tokens": 128000,
7709
+ "max_output_tokens": 128000,
7710
+ "input_cost_per_token": 0.00000100002,
7711
+ "input_dbu_cost_per_token": 0.000014286,
7712
+ "output_cost_per_token": 0.00000299999,
7713
+ "output_dbu_cost_per_token": 0.000042857,
7714
+ "litellm_provider": "databricks",
7715
+ "mode": "chat",
7716
+ "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
7717
+ "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
7718
+ },
5842
7719
  "databricks/databricks-dbrx-instruct": {
5843
7720
  "max_tokens": 32768,
5844
7721
  "max_input_tokens": 32768,
@@ -5943,4 +7820,4 @@
5943
7820
  "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
5944
7821
  "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
5945
7822
  }
5946
- }
7823
+ }