ragaai-catalyst 2.0.7.2b1__py3-none-any.whl → 2.1b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/dataset.py +0 -3
- ragaai_catalyst/evaluation.py +1 -2
- ragaai_catalyst/tracers/__init__.py +1 -1
- ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +217 -106
- ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +27 -41
- ragaai_catalyst/tracers/agentic_tracing/base.py +127 -21
- ragaai_catalyst/tracers/agentic_tracing/data_structure.py +88 -79
- ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
- ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
- ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +258 -356
- ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
- ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
- ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
- ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
- ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
- ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +342 -0
- ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
- ragaai_catalyst/tracers/llamaindex_callback.py +56 -60
- ragaai_catalyst/tracers/tracer.py +6 -2
- ragaai_catalyst/tracers/upload_traces.py +46 -57
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/METADATA +6 -2
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/RECORD +28 -22
- ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,82 @@
|
|
9
9
|
"mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
|
10
10
|
"supports_function_calling": true,
|
11
11
|
"supports_parallel_function_calling": true,
|
12
|
-
"supports_vision": true
|
12
|
+
"supports_vision": true,
|
13
|
+
"supports_audio_input": true,
|
14
|
+
"supports_audio_output": true,
|
15
|
+
"supports_prompt_caching": true,
|
16
|
+
"supports_response_schema": true,
|
17
|
+
"supports_system_messages": true
|
18
|
+
},
|
19
|
+
"sambanova/Meta-Llama-3.1-8B-Instruct": {
|
20
|
+
"max_tokens": 16000,
|
21
|
+
"max_input_tokens": 16000,
|
22
|
+
"max_output_tokens": 16000,
|
23
|
+
"input_cost_per_token": 0.0000001,
|
24
|
+
"output_cost_per_token": 0.0000002,
|
25
|
+
"litellm_provider": "sambanova",
|
26
|
+
"supports_function_calling": true,
|
27
|
+
"mode": "chat"
|
28
|
+
},
|
29
|
+
"sambanova/Meta-Llama-3.1-70B-Instruct": {
|
30
|
+
"max_tokens": 128000,
|
31
|
+
"max_input_tokens": 128000,
|
32
|
+
"max_output_tokens": 128000,
|
33
|
+
"input_cost_per_token": 0.0000006,
|
34
|
+
"output_cost_per_token": 0.0000012,
|
35
|
+
"litellm_provider": "sambanova",
|
36
|
+
"supports_function_calling": true,
|
37
|
+
"mode": "chat"
|
38
|
+
},
|
39
|
+
"sambanova/Meta-Llama-3.1-405B-Instruct": {
|
40
|
+
"max_tokens": 16000,
|
41
|
+
"max_input_tokens": 16000,
|
42
|
+
"max_output_tokens": 16000,
|
43
|
+
"input_cost_per_token": 0.000005,
|
44
|
+
"output_cost_per_token": 0.000010,
|
45
|
+
"litellm_provider": "sambanova",
|
46
|
+
"supports_function_calling": true,
|
47
|
+
"mode": "chat"
|
48
|
+
},
|
49
|
+
"sambanova/Meta-Llama-3.2-1B-Instruct": {
|
50
|
+
"max_tokens": 16000,
|
51
|
+
"max_input_tokens": 16000,
|
52
|
+
"max_output_tokens": 16000,
|
53
|
+
"input_cost_per_token": 0.0000004,
|
54
|
+
"output_cost_per_token": 0.0000008,
|
55
|
+
"litellm_provider": "sambanova",
|
56
|
+
"supports_function_calling": true,
|
57
|
+
"mode": "chat"
|
58
|
+
},
|
59
|
+
"sambanova/Meta-Llama-3.2-3B-Instruct": {
|
60
|
+
"max_tokens": 4000,
|
61
|
+
"max_input_tokens": 4000,
|
62
|
+
"max_output_tokens": 4000,
|
63
|
+
"input_cost_per_token": 0.0000008,
|
64
|
+
"output_cost_per_token": 0.0000016,
|
65
|
+
"litellm_provider": "sambanova",
|
66
|
+
"supports_function_calling": true,
|
67
|
+
"mode": "chat"
|
68
|
+
},
|
69
|
+
"sambanova/Qwen2.5-Coder-32B-Instruct": {
|
70
|
+
"max_tokens": 8000,
|
71
|
+
"max_input_tokens": 8000,
|
72
|
+
"max_output_tokens": 8000,
|
73
|
+
"input_cost_per_token": 0.0000015,
|
74
|
+
"output_cost_per_token": 0.000003,
|
75
|
+
"litellm_provider": "sambanova",
|
76
|
+
"supports_function_calling": true,
|
77
|
+
"mode": "chat"
|
78
|
+
},
|
79
|
+
"sambanova/Qwen2.5-72B-Instruct": {
|
80
|
+
"max_tokens": 8000,
|
81
|
+
"max_input_tokens": 8000,
|
82
|
+
"max_output_tokens": 8000,
|
83
|
+
"input_cost_per_token": 0.000002,
|
84
|
+
"output_cost_per_token": 0.000004,
|
85
|
+
"litellm_provider": "sambanova",
|
86
|
+
"supports_function_calling": true,
|
87
|
+
"mode": "chat"
|
13
88
|
},
|
14
89
|
"gpt-4": {
|
15
90
|
"max_tokens": 4096,
|
@@ -19,19 +94,91 @@
|
|
19
94
|
"output_cost_per_token": 0.00006,
|
20
95
|
"litellm_provider": "openai",
|
21
96
|
"mode": "chat",
|
22
|
-
"supports_function_calling": true
|
97
|
+
"supports_function_calling": true,
|
98
|
+
"supports_prompt_caching": true,
|
99
|
+
"supports_system_messages": true
|
23
100
|
},
|
24
101
|
"gpt-4o": {
|
25
|
-
"max_tokens":
|
102
|
+
"max_tokens": 16384,
|
26
103
|
"max_input_tokens": 128000,
|
27
|
-
"max_output_tokens":
|
28
|
-
"input_cost_per_token": 0.
|
29
|
-
"output_cost_per_token": 0.
|
104
|
+
"max_output_tokens": 16384,
|
105
|
+
"input_cost_per_token": 0.0000025,
|
106
|
+
"output_cost_per_token": 0.000010,
|
107
|
+
"input_cost_per_token_batches": 0.00000125,
|
108
|
+
"output_cost_per_token_batches": 0.00000500,
|
109
|
+
"cache_read_input_token_cost": 0.00000125,
|
30
110
|
"litellm_provider": "openai",
|
31
111
|
"mode": "chat",
|
32
112
|
"supports_function_calling": true,
|
33
113
|
"supports_parallel_function_calling": true,
|
34
|
-
"
|
114
|
+
"supports_response_schema": true,
|
115
|
+
"supports_vision": true,
|
116
|
+
"supports_prompt_caching": true,
|
117
|
+
"supports_system_messages": true
|
118
|
+
},
|
119
|
+
"gpt-4o-audio-preview": {
|
120
|
+
"max_tokens": 16384,
|
121
|
+
"max_input_tokens": 128000,
|
122
|
+
"max_output_tokens": 16384,
|
123
|
+
"input_cost_per_token": 0.0000025,
|
124
|
+
"input_cost_per_audio_token": 0.0001,
|
125
|
+
"output_cost_per_token": 0.000010,
|
126
|
+
"output_cost_per_audio_token": 0.0002,
|
127
|
+
"litellm_provider": "openai",
|
128
|
+
"mode": "chat",
|
129
|
+
"supports_function_calling": true,
|
130
|
+
"supports_parallel_function_calling": true,
|
131
|
+
"supports_audio_input": true,
|
132
|
+
"supports_audio_output": true,
|
133
|
+
"supports_system_messages": true
|
134
|
+
},
|
135
|
+
"gpt-4o-audio-preview-2024-12-17": {
|
136
|
+
"max_tokens": 16384,
|
137
|
+
"max_input_tokens": 128000,
|
138
|
+
"max_output_tokens": 16384,
|
139
|
+
"input_cost_per_token": 0.0000025,
|
140
|
+
"input_cost_per_audio_token": 0.00004,
|
141
|
+
"output_cost_per_token": 0.000010,
|
142
|
+
"output_cost_per_audio_token": 0.00008,
|
143
|
+
"litellm_provider": "openai",
|
144
|
+
"mode": "chat",
|
145
|
+
"supports_function_calling": true,
|
146
|
+
"supports_parallel_function_calling": true,
|
147
|
+
"supports_audio_input": true,
|
148
|
+
"supports_audio_output": true,
|
149
|
+
"supports_system_messages": true
|
150
|
+
},
|
151
|
+
"gpt-4o-audio-preview-2024-10-01": {
|
152
|
+
"max_tokens": 16384,
|
153
|
+
"max_input_tokens": 128000,
|
154
|
+
"max_output_tokens": 16384,
|
155
|
+
"input_cost_per_token": 0.0000025,
|
156
|
+
"input_cost_per_audio_token": 0.0001,
|
157
|
+
"output_cost_per_token": 0.000010,
|
158
|
+
"output_cost_per_audio_token": 0.0002,
|
159
|
+
"litellm_provider": "openai",
|
160
|
+
"mode": "chat",
|
161
|
+
"supports_function_calling": true,
|
162
|
+
"supports_parallel_function_calling": true,
|
163
|
+
"supports_audio_input": true,
|
164
|
+
"supports_audio_output": true,
|
165
|
+
"supports_system_messages": true
|
166
|
+
},
|
167
|
+
"gpt-4o-mini-audio-preview-2024-12-17": {
|
168
|
+
"max_tokens": 16384,
|
169
|
+
"max_input_tokens": 128000,
|
170
|
+
"max_output_tokens": 16384,
|
171
|
+
"input_cost_per_token": 0.00000015,
|
172
|
+
"input_cost_per_audio_token": 0.00001,
|
173
|
+
"output_cost_per_token": 0.0000006,
|
174
|
+
"output_cost_per_audio_token": 0.00002,
|
175
|
+
"litellm_provider": "openai",
|
176
|
+
"mode": "chat",
|
177
|
+
"supports_function_calling": true,
|
178
|
+
"supports_parallel_function_calling": true,
|
179
|
+
"supports_audio_input": true,
|
180
|
+
"supports_audio_output": true,
|
181
|
+
"supports_system_messages": true
|
35
182
|
},
|
36
183
|
"gpt-4o-mini": {
|
37
184
|
"max_tokens": 16384,
|
@@ -39,11 +186,17 @@
|
|
39
186
|
"max_output_tokens": 16384,
|
40
187
|
"input_cost_per_token": 0.00000015,
|
41
188
|
"output_cost_per_token": 0.00000060,
|
189
|
+
"input_cost_per_token_batches": 0.000000075,
|
190
|
+
"output_cost_per_token_batches": 0.00000030,
|
191
|
+
"cache_read_input_token_cost": 0.000000075,
|
42
192
|
"litellm_provider": "openai",
|
43
193
|
"mode": "chat",
|
44
194
|
"supports_function_calling": true,
|
45
195
|
"supports_parallel_function_calling": true,
|
46
|
-
"
|
196
|
+
"supports_response_schema": true,
|
197
|
+
"supports_vision": true,
|
198
|
+
"supports_prompt_caching": true,
|
199
|
+
"supports_system_messages": true
|
47
200
|
},
|
48
201
|
"gpt-4o-mini-2024-07-18": {
|
49
202
|
"max_tokens": 16384,
|
@@ -51,11 +204,33 @@
|
|
51
204
|
"max_output_tokens": 16384,
|
52
205
|
"input_cost_per_token": 0.00000015,
|
53
206
|
"output_cost_per_token": 0.00000060,
|
207
|
+
"input_cost_per_token_batches": 0.000000075,
|
208
|
+
"output_cost_per_token_batches": 0.00000030,
|
209
|
+
"cache_read_input_token_cost": 0.000000075,
|
54
210
|
"litellm_provider": "openai",
|
55
211
|
"mode": "chat",
|
56
212
|
"supports_function_calling": true,
|
57
213
|
"supports_parallel_function_calling": true,
|
58
|
-
"
|
214
|
+
"supports_response_schema": true,
|
215
|
+
"supports_vision": true,
|
216
|
+
"supports_prompt_caching": true,
|
217
|
+
"supports_system_messages": true
|
218
|
+
},
|
219
|
+
"o1": {
|
220
|
+
"max_tokens": 100000,
|
221
|
+
"max_input_tokens": 200000,
|
222
|
+
"max_output_tokens": 100000,
|
223
|
+
"input_cost_per_token": 0.000015,
|
224
|
+
"output_cost_per_token": 0.00006,
|
225
|
+
"cache_read_input_token_cost": 0.0000075,
|
226
|
+
"litellm_provider": "openai",
|
227
|
+
"mode": "chat",
|
228
|
+
"supports_function_calling": true,
|
229
|
+
"supports_parallel_function_calling": true,
|
230
|
+
"supports_vision": true,
|
231
|
+
"supports_prompt_caching": true,
|
232
|
+
"supports_system_messages": true,
|
233
|
+
"supports_response_schema": true
|
59
234
|
},
|
60
235
|
"o1-mini": {
|
61
236
|
"max_tokens": 65536,
|
@@ -63,11 +238,11 @@
|
|
63
238
|
"max_output_tokens": 65536,
|
64
239
|
"input_cost_per_token": 0.000003,
|
65
240
|
"output_cost_per_token": 0.000012,
|
241
|
+
"cache_read_input_token_cost": 0.0000015,
|
66
242
|
"litellm_provider": "openai",
|
67
243
|
"mode": "chat",
|
68
|
-
"
|
69
|
-
"
|
70
|
-
"supports_vision": true
|
244
|
+
"supports_vision": true,
|
245
|
+
"supports_prompt_caching": true
|
71
246
|
},
|
72
247
|
"o1-mini-2024-09-12": {
|
73
248
|
"max_tokens": 65536,
|
@@ -75,11 +250,11 @@
|
|
75
250
|
"max_output_tokens": 65536,
|
76
251
|
"input_cost_per_token": 0.000003,
|
77
252
|
"output_cost_per_token": 0.000012,
|
253
|
+
"cache_read_input_token_cost": 0.0000015,
|
78
254
|
"litellm_provider": "openai",
|
79
255
|
"mode": "chat",
|
80
|
-
"
|
81
|
-
"
|
82
|
-
"supports_vision": true
|
256
|
+
"supports_vision": true,
|
257
|
+
"supports_prompt_caching": true
|
83
258
|
},
|
84
259
|
"o1-preview": {
|
85
260
|
"max_tokens": 32768,
|
@@ -87,11 +262,11 @@
|
|
87
262
|
"max_output_tokens": 32768,
|
88
263
|
"input_cost_per_token": 0.000015,
|
89
264
|
"output_cost_per_token": 0.000060,
|
265
|
+
"cache_read_input_token_cost": 0.0000075,
|
90
266
|
"litellm_provider": "openai",
|
91
267
|
"mode": "chat",
|
92
|
-
"
|
93
|
-
"
|
94
|
-
"supports_vision": true
|
268
|
+
"supports_vision": true,
|
269
|
+
"supports_prompt_caching": true
|
95
270
|
},
|
96
271
|
"o1-preview-2024-09-12": {
|
97
272
|
"max_tokens": 32768,
|
@@ -99,11 +274,27 @@
|
|
99
274
|
"max_output_tokens": 32768,
|
100
275
|
"input_cost_per_token": 0.000015,
|
101
276
|
"output_cost_per_token": 0.000060,
|
277
|
+
"cache_read_input_token_cost": 0.0000075,
|
278
|
+
"litellm_provider": "openai",
|
279
|
+
"mode": "chat",
|
280
|
+
"supports_vision": true,
|
281
|
+
"supports_prompt_caching": true
|
282
|
+
},
|
283
|
+
"o1-2024-12-17": {
|
284
|
+
"max_tokens": 100000,
|
285
|
+
"max_input_tokens": 200000,
|
286
|
+
"max_output_tokens": 100000,
|
287
|
+
"input_cost_per_token": 0.000015,
|
288
|
+
"output_cost_per_token": 0.000060,
|
289
|
+
"cache_read_input_token_cost": 0.0000075,
|
102
290
|
"litellm_provider": "openai",
|
103
291
|
"mode": "chat",
|
104
292
|
"supports_function_calling": true,
|
105
293
|
"supports_parallel_function_calling": true,
|
106
|
-
"supports_vision": true
|
294
|
+
"supports_vision": true,
|
295
|
+
"supports_prompt_caching": true,
|
296
|
+
"supports_system_messages": true,
|
297
|
+
"supports_response_schema": true
|
107
298
|
},
|
108
299
|
"chatgpt-4o-latest": {
|
109
300
|
"max_tokens": 4096,
|
@@ -115,7 +306,9 @@
|
|
115
306
|
"mode": "chat",
|
116
307
|
"supports_function_calling": true,
|
117
308
|
"supports_parallel_function_calling": true,
|
118
|
-
"supports_vision": true
|
309
|
+
"supports_vision": true,
|
310
|
+
"supports_prompt_caching": true,
|
311
|
+
"supports_system_messages": true
|
119
312
|
},
|
120
313
|
"gpt-4o-2024-05-13": {
|
121
314
|
"max_tokens": 4096,
|
@@ -123,11 +316,15 @@
|
|
123
316
|
"max_output_tokens": 4096,
|
124
317
|
"input_cost_per_token": 0.000005,
|
125
318
|
"output_cost_per_token": 0.000015,
|
319
|
+
"input_cost_per_token_batches": 0.0000025,
|
320
|
+
"output_cost_per_token_batches": 0.0000075,
|
126
321
|
"litellm_provider": "openai",
|
127
322
|
"mode": "chat",
|
128
323
|
"supports_function_calling": true,
|
129
324
|
"supports_parallel_function_calling": true,
|
130
|
-
"supports_vision": true
|
325
|
+
"supports_vision": true,
|
326
|
+
"supports_prompt_caching": true,
|
327
|
+
"supports_system_messages": true
|
131
328
|
},
|
132
329
|
"gpt-4o-2024-08-06": {
|
133
330
|
"max_tokens": 16384,
|
@@ -135,11 +332,123 @@
|
|
135
332
|
"max_output_tokens": 16384,
|
136
333
|
"input_cost_per_token": 0.0000025,
|
137
334
|
"output_cost_per_token": 0.000010,
|
335
|
+
"input_cost_per_token_batches": 0.00000125,
|
336
|
+
"output_cost_per_token_batches": 0.0000050,
|
337
|
+
"cache_read_input_token_cost": 0.00000125,
|
138
338
|
"litellm_provider": "openai",
|
139
339
|
"mode": "chat",
|
140
340
|
"supports_function_calling": true,
|
141
341
|
"supports_parallel_function_calling": true,
|
142
|
-
"
|
342
|
+
"supports_response_schema": true,
|
343
|
+
"supports_vision": true,
|
344
|
+
"supports_prompt_caching": true,
|
345
|
+
"supports_system_messages": true
|
346
|
+
},
|
347
|
+
"gpt-4o-2024-11-20": {
|
348
|
+
"max_tokens": 16384,
|
349
|
+
"max_input_tokens": 128000,
|
350
|
+
"max_output_tokens": 16384,
|
351
|
+
"input_cost_per_token": 0.0000025,
|
352
|
+
"output_cost_per_token": 0.000010,
|
353
|
+
"input_cost_per_token_batches": 0.00000125,
|
354
|
+
"output_cost_per_token_batches": 0.0000050,
|
355
|
+
"cache_read_input_token_cost": 0.00000125,
|
356
|
+
"litellm_provider": "openai",
|
357
|
+
"mode": "chat",
|
358
|
+
"supports_function_calling": true,
|
359
|
+
"supports_parallel_function_calling": true,
|
360
|
+
"supports_response_schema": true,
|
361
|
+
"supports_vision": true,
|
362
|
+
"supports_prompt_caching": true,
|
363
|
+
"supports_system_messages": true
|
364
|
+
},
|
365
|
+
"gpt-4o-realtime-preview-2024-10-01": {
|
366
|
+
"max_tokens": 4096,
|
367
|
+
"max_input_tokens": 128000,
|
368
|
+
"max_output_tokens": 4096,
|
369
|
+
"input_cost_per_token": 0.000005,
|
370
|
+
"input_cost_per_audio_token": 0.0001,
|
371
|
+
"cache_read_input_token_cost": 0.0000025,
|
372
|
+
"cache_creation_input_audio_token_cost": 0.00002,
|
373
|
+
"output_cost_per_token": 0.00002,
|
374
|
+
"output_cost_per_audio_token": 0.0002,
|
375
|
+
"litellm_provider": "openai",
|
376
|
+
"mode": "chat",
|
377
|
+
"supports_function_calling": true,
|
378
|
+
"supports_parallel_function_calling": true,
|
379
|
+
"supports_audio_input": true,
|
380
|
+
"supports_audio_output": true,
|
381
|
+
"supports_system_messages": true
|
382
|
+
},
|
383
|
+
"gpt-4o-realtime-preview": {
|
384
|
+
"max_tokens": 4096,
|
385
|
+
"max_input_tokens": 128000,
|
386
|
+
"max_output_tokens": 4096,
|
387
|
+
"input_cost_per_token": 0.000005,
|
388
|
+
"input_cost_per_audio_token": 0.00004,
|
389
|
+
"cache_read_input_token_cost": 0.0000025,
|
390
|
+
"output_cost_per_token": 0.00002,
|
391
|
+
"output_cost_per_audio_token": 0.00008,
|
392
|
+
"litellm_provider": "openai",
|
393
|
+
"mode": "chat",
|
394
|
+
"supports_function_calling": true,
|
395
|
+
"supports_parallel_function_calling": true,
|
396
|
+
"supports_audio_input": true,
|
397
|
+
"supports_audio_output": true,
|
398
|
+
"supports_system_messages": true
|
399
|
+
},
|
400
|
+
"gpt-4o-realtime-preview-2024-12-17": {
|
401
|
+
"max_tokens": 4096,
|
402
|
+
"max_input_tokens": 128000,
|
403
|
+
"max_output_tokens": 4096,
|
404
|
+
"input_cost_per_token": 0.000005,
|
405
|
+
"input_cost_per_audio_token": 0.00004,
|
406
|
+
"cache_read_input_token_cost": 0.0000025,
|
407
|
+
"output_cost_per_token": 0.00002,
|
408
|
+
"output_cost_per_audio_token": 0.00008,
|
409
|
+
"litellm_provider": "openai",
|
410
|
+
"mode": "chat",
|
411
|
+
"supports_function_calling": true,
|
412
|
+
"supports_parallel_function_calling": true,
|
413
|
+
"supports_audio_input": true,
|
414
|
+
"supports_audio_output": true,
|
415
|
+
"supports_system_messages": true
|
416
|
+
},
|
417
|
+
"gpt-4o-mini-realtime-preview": {
|
418
|
+
"max_tokens": 4096,
|
419
|
+
"max_input_tokens": 128000,
|
420
|
+
"max_output_tokens": 4096,
|
421
|
+
"input_cost_per_token": 0.0000006,
|
422
|
+
"input_cost_per_audio_token": 0.00001,
|
423
|
+
"cache_read_input_token_cost": 0.0000003,
|
424
|
+
"cache_creation_input_audio_token_cost": 0.0000003,
|
425
|
+
"output_cost_per_token": 0.0000024,
|
426
|
+
"output_cost_per_audio_token": 0.00002,
|
427
|
+
"litellm_provider": "openai",
|
428
|
+
"mode": "chat",
|
429
|
+
"supports_function_calling": true,
|
430
|
+
"supports_parallel_function_calling": true,
|
431
|
+
"supports_audio_input": true,
|
432
|
+
"supports_audio_output": true,
|
433
|
+
"supports_system_messages": true
|
434
|
+
},
|
435
|
+
"gpt-4o-mini-realtime-preview-2024-12-17": {
|
436
|
+
"max_tokens": 4096,
|
437
|
+
"max_input_tokens": 128000,
|
438
|
+
"max_output_tokens": 4096,
|
439
|
+
"input_cost_per_token": 0.0000006,
|
440
|
+
"input_cost_per_audio_token": 0.00001,
|
441
|
+
"cache_read_input_token_cost": 0.0000003,
|
442
|
+
"cache_creation_input_audio_token_cost": 0.0000003,
|
443
|
+
"output_cost_per_token": 0.0000024,
|
444
|
+
"output_cost_per_audio_token": 0.00002,
|
445
|
+
"litellm_provider": "openai",
|
446
|
+
"mode": "chat",
|
447
|
+
"supports_function_calling": true,
|
448
|
+
"supports_parallel_function_calling": true,
|
449
|
+
"supports_audio_input": true,
|
450
|
+
"supports_audio_output": true,
|
451
|
+
"supports_system_messages": true
|
143
452
|
},
|
144
453
|
"gpt-4-turbo-preview": {
|
145
454
|
"max_tokens": 4096,
|
@@ -150,7 +459,9 @@
|
|
150
459
|
"litellm_provider": "openai",
|
151
460
|
"mode": "chat",
|
152
461
|
"supports_function_calling": true,
|
153
|
-
"supports_parallel_function_calling": true
|
462
|
+
"supports_parallel_function_calling": true,
|
463
|
+
"supports_prompt_caching": true,
|
464
|
+
"supports_system_messages": true
|
154
465
|
},
|
155
466
|
"gpt-4-0314": {
|
156
467
|
"max_tokens": 4096,
|
@@ -159,7 +470,9 @@
|
|
159
470
|
"input_cost_per_token": 0.00003,
|
160
471
|
"output_cost_per_token": 0.00006,
|
161
472
|
"litellm_provider": "openai",
|
162
|
-
"mode": "chat"
|
473
|
+
"mode": "chat",
|
474
|
+
"supports_prompt_caching": true,
|
475
|
+
"supports_system_messages": true
|
163
476
|
},
|
164
477
|
"gpt-4-0613": {
|
165
478
|
"max_tokens": 4096,
|
@@ -169,7 +482,9 @@
|
|
169
482
|
"output_cost_per_token": 0.00006,
|
170
483
|
"litellm_provider": "openai",
|
171
484
|
"mode": "chat",
|
172
|
-
"supports_function_calling": true
|
485
|
+
"supports_function_calling": true,
|
486
|
+
"supports_prompt_caching": true,
|
487
|
+
"supports_system_messages": true
|
173
488
|
},
|
174
489
|
"gpt-4-32k": {
|
175
490
|
"max_tokens": 4096,
|
@@ -178,7 +493,9 @@
|
|
178
493
|
"input_cost_per_token": 0.00006,
|
179
494
|
"output_cost_per_token": 0.00012,
|
180
495
|
"litellm_provider": "openai",
|
181
|
-
"mode": "chat"
|
496
|
+
"mode": "chat",
|
497
|
+
"supports_prompt_caching": true,
|
498
|
+
"supports_system_messages": true
|
182
499
|
},
|
183
500
|
"gpt-4-32k-0314": {
|
184
501
|
"max_tokens": 4096,
|
@@ -187,7 +504,9 @@
|
|
187
504
|
"input_cost_per_token": 0.00006,
|
188
505
|
"output_cost_per_token": 0.00012,
|
189
506
|
"litellm_provider": "openai",
|
190
|
-
"mode": "chat"
|
507
|
+
"mode": "chat",
|
508
|
+
"supports_prompt_caching": true,
|
509
|
+
"supports_system_messages": true
|
191
510
|
},
|
192
511
|
"gpt-4-32k-0613": {
|
193
512
|
"max_tokens": 4096,
|
@@ -196,7 +515,9 @@
|
|
196
515
|
"input_cost_per_token": 0.00006,
|
197
516
|
"output_cost_per_token": 0.00012,
|
198
517
|
"litellm_provider": "openai",
|
199
|
-
"mode": "chat"
|
518
|
+
"mode": "chat",
|
519
|
+
"supports_prompt_caching": true,
|
520
|
+
"supports_system_messages": true
|
200
521
|
},
|
201
522
|
"gpt-4-turbo": {
|
202
523
|
"max_tokens": 4096,
|
@@ -208,7 +529,9 @@
|
|
208
529
|
"mode": "chat",
|
209
530
|
"supports_function_calling": true,
|
210
531
|
"supports_parallel_function_calling": true,
|
211
|
-
"supports_vision": true
|
532
|
+
"supports_vision": true,
|
533
|
+
"supports_prompt_caching": true,
|
534
|
+
"supports_system_messages": true
|
212
535
|
},
|
213
536
|
"gpt-4-turbo-2024-04-09": {
|
214
537
|
"max_tokens": 4096,
|
@@ -220,7 +543,9 @@
|
|
220
543
|
"mode": "chat",
|
221
544
|
"supports_function_calling": true,
|
222
545
|
"supports_parallel_function_calling": true,
|
223
|
-
"supports_vision": true
|
546
|
+
"supports_vision": true,
|
547
|
+
"supports_prompt_caching": true,
|
548
|
+
"supports_system_messages": true
|
224
549
|
},
|
225
550
|
"gpt-4-1106-preview": {
|
226
551
|
"max_tokens": 4096,
|
@@ -231,7 +556,9 @@
|
|
231
556
|
"litellm_provider": "openai",
|
232
557
|
"mode": "chat",
|
233
558
|
"supports_function_calling": true,
|
234
|
-
"supports_parallel_function_calling": true
|
559
|
+
"supports_parallel_function_calling": true,
|
560
|
+
"supports_prompt_caching": true,
|
561
|
+
"supports_system_messages": true
|
235
562
|
},
|
236
563
|
"gpt-4-0125-preview": {
|
237
564
|
"max_tokens": 4096,
|
@@ -242,7 +569,9 @@
|
|
242
569
|
"litellm_provider": "openai",
|
243
570
|
"mode": "chat",
|
244
571
|
"supports_function_calling": true,
|
245
|
-
"supports_parallel_function_calling": true
|
572
|
+
"supports_parallel_function_calling": true,
|
573
|
+
"supports_prompt_caching": true,
|
574
|
+
"supports_system_messages": true
|
246
575
|
},
|
247
576
|
"gpt-4-vision-preview": {
|
248
577
|
"max_tokens": 4096,
|
@@ -252,7 +581,9 @@
|
|
252
581
|
"output_cost_per_token": 0.00003,
|
253
582
|
"litellm_provider": "openai",
|
254
583
|
"mode": "chat",
|
255
|
-
"supports_vision": true
|
584
|
+
"supports_vision": true,
|
585
|
+
"supports_prompt_caching": true,
|
586
|
+
"supports_system_messages": true
|
256
587
|
},
|
257
588
|
"gpt-4-1106-vision-preview": {
|
258
589
|
"max_tokens": 4096,
|
@@ -262,7 +593,9 @@
|
|
262
593
|
"output_cost_per_token": 0.00003,
|
263
594
|
"litellm_provider": "openai",
|
264
595
|
"mode": "chat",
|
265
|
-
"supports_vision": true
|
596
|
+
"supports_vision": true,
|
597
|
+
"supports_prompt_caching": true,
|
598
|
+
"supports_system_messages": true
|
266
599
|
},
|
267
600
|
"gpt-3.5-turbo": {
|
268
601
|
"max_tokens": 4097,
|
@@ -272,7 +605,9 @@
|
|
272
605
|
"output_cost_per_token": 0.000002,
|
273
606
|
"litellm_provider": "openai",
|
274
607
|
"mode": "chat",
|
275
|
-
"supports_function_calling": true
|
608
|
+
"supports_function_calling": true,
|
609
|
+
"supports_prompt_caching": true,
|
610
|
+
"supports_system_messages": true
|
276
611
|
},
|
277
612
|
"gpt-3.5-turbo-0301": {
|
278
613
|
"max_tokens": 4097,
|
@@ -281,7 +616,9 @@
|
|
281
616
|
"input_cost_per_token": 0.0000015,
|
282
617
|
"output_cost_per_token": 0.000002,
|
283
618
|
"litellm_provider": "openai",
|
284
|
-
"mode": "chat"
|
619
|
+
"mode": "chat",
|
620
|
+
"supports_prompt_caching": true,
|
621
|
+
"supports_system_messages": true
|
285
622
|
},
|
286
623
|
"gpt-3.5-turbo-0613": {
|
287
624
|
"max_tokens": 4097,
|
@@ -291,7 +628,9 @@
|
|
291
628
|
"output_cost_per_token": 0.000002,
|
292
629
|
"litellm_provider": "openai",
|
293
630
|
"mode": "chat",
|
294
|
-
"supports_function_calling": true
|
631
|
+
"supports_function_calling": true,
|
632
|
+
"supports_prompt_caching": true,
|
633
|
+
"supports_system_messages": true
|
295
634
|
},
|
296
635
|
"gpt-3.5-turbo-1106": {
|
297
636
|
"max_tokens": 16385,
|
@@ -302,7 +641,9 @@
|
|
302
641
|
"litellm_provider": "openai",
|
303
642
|
"mode": "chat",
|
304
643
|
"supports_function_calling": true,
|
305
|
-
"supports_parallel_function_calling": true
|
644
|
+
"supports_parallel_function_calling": true,
|
645
|
+
"supports_prompt_caching": true,
|
646
|
+
"supports_system_messages": true
|
306
647
|
},
|
307
648
|
"gpt-3.5-turbo-0125": {
|
308
649
|
"max_tokens": 16385,
|
@@ -313,7 +654,9 @@
|
|
313
654
|
"litellm_provider": "openai",
|
314
655
|
"mode": "chat",
|
315
656
|
"supports_function_calling": true,
|
316
|
-
"supports_parallel_function_calling": true
|
657
|
+
"supports_parallel_function_calling": true,
|
658
|
+
"supports_prompt_caching": true,
|
659
|
+
"supports_system_messages": true
|
317
660
|
},
|
318
661
|
"gpt-3.5-turbo-16k": {
|
319
662
|
"max_tokens": 16385,
|
@@ -322,7 +665,9 @@
|
|
322
665
|
"input_cost_per_token": 0.000003,
|
323
666
|
"output_cost_per_token": 0.000004,
|
324
667
|
"litellm_provider": "openai",
|
325
|
-
"mode": "chat"
|
668
|
+
"mode": "chat",
|
669
|
+
"supports_prompt_caching": true,
|
670
|
+
"supports_system_messages": true
|
326
671
|
},
|
327
672
|
"gpt-3.5-turbo-16k-0613": {
|
328
673
|
"max_tokens": 16385,
|
@@ -331,7 +676,9 @@
|
|
331
676
|
"input_cost_per_token": 0.000003,
|
332
677
|
"output_cost_per_token": 0.000004,
|
333
678
|
"litellm_provider": "openai",
|
334
|
-
"mode": "chat"
|
679
|
+
"mode": "chat",
|
680
|
+
"supports_prompt_caching": true,
|
681
|
+
"supports_system_messages": true
|
335
682
|
},
|
336
683
|
"ft:gpt-3.5-turbo": {
|
337
684
|
"max_tokens": 4096,
|
@@ -339,8 +686,11 @@
|
|
339
686
|
"max_output_tokens": 4096,
|
340
687
|
"input_cost_per_token": 0.000003,
|
341
688
|
"output_cost_per_token": 0.000006,
|
689
|
+
"input_cost_per_token_batches": 0.0000015,
|
690
|
+
"output_cost_per_token_batches": 0.000003,
|
342
691
|
"litellm_provider": "openai",
|
343
|
-
"mode": "chat"
|
692
|
+
"mode": "chat",
|
693
|
+
"supports_system_messages": true
|
344
694
|
},
|
345
695
|
"ft:gpt-3.5-turbo-0125": {
|
346
696
|
"max_tokens": 4096,
|
@@ -349,7 +699,8 @@
|
|
349
699
|
"input_cost_per_token": 0.000003,
|
350
700
|
"output_cost_per_token": 0.000006,
|
351
701
|
"litellm_provider": "openai",
|
352
|
-
"mode": "chat"
|
702
|
+
"mode": "chat",
|
703
|
+
"supports_system_messages": true
|
353
704
|
},
|
354
705
|
"ft:gpt-3.5-turbo-1106": {
|
355
706
|
"max_tokens": 4096,
|
@@ -358,7 +709,8 @@
|
|
358
709
|
"input_cost_per_token": 0.000003,
|
359
710
|
"output_cost_per_token": 0.000006,
|
360
711
|
"litellm_provider": "openai",
|
361
|
-
"mode": "chat"
|
712
|
+
"mode": "chat",
|
713
|
+
"supports_system_messages": true
|
362
714
|
},
|
363
715
|
"ft:gpt-3.5-turbo-0613": {
|
364
716
|
"max_tokens": 4096,
|
@@ -367,7 +719,8 @@
|
|
367
719
|
"input_cost_per_token": 0.000003,
|
368
720
|
"output_cost_per_token": 0.000006,
|
369
721
|
"litellm_provider": "openai",
|
370
|
-
"mode": "chat"
|
722
|
+
"mode": "chat",
|
723
|
+
"supports_system_messages": true
|
371
724
|
},
|
372
725
|
"ft:gpt-4-0613": {
|
373
726
|
"max_tokens": 4096,
|
@@ -378,7 +731,8 @@
|
|
378
731
|
"litellm_provider": "openai",
|
379
732
|
"mode": "chat",
|
380
733
|
"supports_function_calling": true,
|
381
|
-
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
|
734
|
+
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
|
735
|
+
"supports_system_messages": true
|
382
736
|
},
|
383
737
|
"ft:gpt-4o-2024-08-06": {
|
384
738
|
"max_tokens": 16384,
|
@@ -386,11 +740,31 @@
|
|
386
740
|
"max_output_tokens": 16384,
|
387
741
|
"input_cost_per_token": 0.00000375,
|
388
742
|
"output_cost_per_token": 0.000015,
|
743
|
+
"input_cost_per_token_batches": 0.000001875,
|
744
|
+
"output_cost_per_token_batches": 0.000007500,
|
389
745
|
"litellm_provider": "openai",
|
390
746
|
"mode": "chat",
|
391
747
|
"supports_function_calling": true,
|
392
748
|
"supports_parallel_function_calling": true,
|
393
|
-
"
|
749
|
+
"supports_response_schema": true,
|
750
|
+
"supports_vision": true,
|
751
|
+
"supports_system_messages": true
|
752
|
+
},
|
753
|
+
"ft:gpt-4o-2024-11-20": {
|
754
|
+
"max_tokens": 16384,
|
755
|
+
"max_input_tokens": 128000,
|
756
|
+
"max_output_tokens": 16384,
|
757
|
+
"input_cost_per_token": 0.00000375,
|
758
|
+
"cache_creation_input_token_cost": 0.000001875,
|
759
|
+
"output_cost_per_token": 0.000015,
|
760
|
+
"litellm_provider": "openai",
|
761
|
+
"mode": "chat",
|
762
|
+
"supports_function_calling": true,
|
763
|
+
"supports_parallel_function_calling": true,
|
764
|
+
"supports_response_schema": true,
|
765
|
+
"supports_vision": true,
|
766
|
+
"supports_prompt_caching": true,
|
767
|
+
"supports_system_messages": true
|
394
768
|
},
|
395
769
|
"ft:gpt-4o-mini-2024-07-18": {
|
396
770
|
"max_tokens": 16384,
|
@@ -398,11 +772,17 @@
|
|
398
772
|
"max_output_tokens": 16384,
|
399
773
|
"input_cost_per_token": 0.0000003,
|
400
774
|
"output_cost_per_token": 0.0000012,
|
775
|
+
"input_cost_per_token_batches": 0.000000150,
|
776
|
+
"output_cost_per_token_batches": 0.000000600,
|
777
|
+
"cache_read_input_token_cost": 0.00000015,
|
401
778
|
"litellm_provider": "openai",
|
402
779
|
"mode": "chat",
|
403
780
|
"supports_function_calling": true,
|
404
781
|
"supports_parallel_function_calling": true,
|
405
|
-
"
|
782
|
+
"supports_response_schema": true,
|
783
|
+
"supports_vision": true,
|
784
|
+
"supports_prompt_caching": true,
|
785
|
+
"supports_system_messages": true
|
406
786
|
},
|
407
787
|
"ft:davinci-002": {
|
408
788
|
"max_tokens": 16384,
|
@@ -410,6 +790,8 @@
|
|
410
790
|
"max_output_tokens": 4096,
|
411
791
|
"input_cost_per_token": 0.000002,
|
412
792
|
"output_cost_per_token": 0.000002,
|
793
|
+
"input_cost_per_token_batches": 0.000001,
|
794
|
+
"output_cost_per_token_batches": 0.000001,
|
413
795
|
"litellm_provider": "text-completion-openai",
|
414
796
|
"mode": "completion"
|
415
797
|
},
|
@@ -419,6 +801,8 @@
|
|
419
801
|
"max_output_tokens": 4096,
|
420
802
|
"input_cost_per_token": 0.0000004,
|
421
803
|
"output_cost_per_token": 0.0000004,
|
804
|
+
"input_cost_per_token_batches": 0.0000002,
|
805
|
+
"output_cost_per_token_batches": 0.0000002,
|
422
806
|
"litellm_provider": "text-completion-openai",
|
423
807
|
"mode": "completion"
|
424
808
|
},
|
@@ -428,6 +812,8 @@
|
|
428
812
|
"output_vector_size": 3072,
|
429
813
|
"input_cost_per_token": 0.00000013,
|
430
814
|
"output_cost_per_token": 0.000000,
|
815
|
+
"input_cost_per_token_batches": 0.000000065,
|
816
|
+
"output_cost_per_token_batches": 0.000000000,
|
431
817
|
"litellm_provider": "openai",
|
432
818
|
"mode": "embedding"
|
433
819
|
},
|
@@ -437,6 +823,8 @@
|
|
437
823
|
"output_vector_size": 1536,
|
438
824
|
"input_cost_per_token": 0.00000002,
|
439
825
|
"output_cost_per_token": 0.000000,
|
826
|
+
"input_cost_per_token_batches": 0.000000010,
|
827
|
+
"output_cost_per_token_batches": 0.000000000,
|
440
828
|
"litellm_provider": "openai",
|
441
829
|
"mode": "embedding"
|
442
830
|
},
|
@@ -454,6 +842,8 @@
|
|
454
842
|
"max_input_tokens": 8191,
|
455
843
|
"input_cost_per_token": 0.0000001,
|
456
844
|
"output_cost_per_token": 0.000000,
|
845
|
+
"input_cost_per_token_batches": 0.000000050,
|
846
|
+
"output_cost_per_token_batches": 0.000000000,
|
457
847
|
"litellm_provider": "openai",
|
458
848
|
"mode": "embedding"
|
459
849
|
},
|
@@ -570,17 +960,75 @@
|
|
570
960
|
"output_cost_per_second": 0.0001,
|
571
961
|
"litellm_provider": "azure"
|
572
962
|
},
|
963
|
+
"azure/o1-mini": {
|
964
|
+
"max_tokens": 65536,
|
965
|
+
"max_input_tokens": 128000,
|
966
|
+
"max_output_tokens": 65536,
|
967
|
+
"input_cost_per_token": 0.000003,
|
968
|
+
"output_cost_per_token": 0.000012,
|
969
|
+
"cache_read_input_token_cost": 0.0000015,
|
970
|
+
"litellm_provider": "azure",
|
971
|
+
"mode": "chat",
|
972
|
+
"supports_function_calling": true,
|
973
|
+
"supports_parallel_function_calling": true,
|
974
|
+
"supports_vision": false,
|
975
|
+
"supports_prompt_caching": true
|
976
|
+
},
|
977
|
+
"azure/o1-mini-2024-09-12": {
|
978
|
+
"max_tokens": 65536,
|
979
|
+
"max_input_tokens": 128000,
|
980
|
+
"max_output_tokens": 65536,
|
981
|
+
"input_cost_per_token": 0.000003,
|
982
|
+
"output_cost_per_token": 0.000012,
|
983
|
+
"cache_read_input_token_cost": 0.0000015,
|
984
|
+
"litellm_provider": "azure",
|
985
|
+
"mode": "chat",
|
986
|
+
"supports_function_calling": true,
|
987
|
+
"supports_parallel_function_calling": true,
|
988
|
+
"supports_vision": false,
|
989
|
+
"supports_prompt_caching": true
|
990
|
+
},
|
991
|
+
"azure/o1-preview": {
|
992
|
+
"max_tokens": 32768,
|
993
|
+
"max_input_tokens": 128000,
|
994
|
+
"max_output_tokens": 32768,
|
995
|
+
"input_cost_per_token": 0.000015,
|
996
|
+
"output_cost_per_token": 0.000060,
|
997
|
+
"cache_read_input_token_cost": 0.0000075,
|
998
|
+
"litellm_provider": "azure",
|
999
|
+
"mode": "chat",
|
1000
|
+
"supports_function_calling": true,
|
1001
|
+
"supports_parallel_function_calling": true,
|
1002
|
+
"supports_vision": false,
|
1003
|
+
"supports_prompt_caching": true
|
1004
|
+
},
|
1005
|
+
"azure/o1-preview-2024-09-12": {
|
1006
|
+
"max_tokens": 32768,
|
1007
|
+
"max_input_tokens": 128000,
|
1008
|
+
"max_output_tokens": 32768,
|
1009
|
+
"input_cost_per_token": 0.000015,
|
1010
|
+
"output_cost_per_token": 0.000060,
|
1011
|
+
"cache_read_input_token_cost": 0.0000075,
|
1012
|
+
"litellm_provider": "azure",
|
1013
|
+
"mode": "chat",
|
1014
|
+
"supports_function_calling": true,
|
1015
|
+
"supports_parallel_function_calling": true,
|
1016
|
+
"supports_vision": false,
|
1017
|
+
"supports_prompt_caching": true
|
1018
|
+
},
|
573
1019
|
"azure/gpt-4o": {
|
574
1020
|
"max_tokens": 4096,
|
575
1021
|
"max_input_tokens": 128000,
|
576
1022
|
"max_output_tokens": 4096,
|
577
1023
|
"input_cost_per_token": 0.000005,
|
578
1024
|
"output_cost_per_token": 0.000015,
|
1025
|
+
"cache_read_input_token_cost": 0.00000125,
|
579
1026
|
"litellm_provider": "azure",
|
580
1027
|
"mode": "chat",
|
581
1028
|
"supports_function_calling": true,
|
582
1029
|
"supports_parallel_function_calling": true,
|
583
|
-
"supports_vision": true
|
1030
|
+
"supports_vision": true,
|
1031
|
+
"supports_prompt_caching": true
|
584
1032
|
},
|
585
1033
|
"azure/gpt-4o-2024-08-06": {
|
586
1034
|
"max_tokens": 16384,
|
@@ -592,8 +1040,36 @@
|
|
592
1040
|
"mode": "chat",
|
593
1041
|
"supports_function_calling": true,
|
594
1042
|
"supports_parallel_function_calling": true,
|
1043
|
+
"supports_response_schema": true,
|
1044
|
+
"supports_vision": true,
|
1045
|
+
"supports_prompt_caching": true
|
1046
|
+
},
|
1047
|
+
"azure/gpt-4o-2024-11-20": {
|
1048
|
+
"max_tokens": 16384,
|
1049
|
+
"max_input_tokens": 128000,
|
1050
|
+
"max_output_tokens": 16384,
|
1051
|
+
"input_cost_per_token": 0.00000275,
|
1052
|
+
"output_cost_per_token": 0.000011,
|
1053
|
+
"litellm_provider": "azure",
|
1054
|
+
"mode": "chat",
|
1055
|
+
"supports_function_calling": true,
|
1056
|
+
"supports_parallel_function_calling": true,
|
1057
|
+
"supports_response_schema": true,
|
595
1058
|
"supports_vision": true
|
596
1059
|
},
|
1060
|
+
"azure/gpt-4o-2024-05-13": {
|
1061
|
+
"max_tokens": 4096,
|
1062
|
+
"max_input_tokens": 128000,
|
1063
|
+
"max_output_tokens": 4096,
|
1064
|
+
"input_cost_per_token": 0.000005,
|
1065
|
+
"output_cost_per_token": 0.000015,
|
1066
|
+
"litellm_provider": "azure",
|
1067
|
+
"mode": "chat",
|
1068
|
+
"supports_function_calling": true,
|
1069
|
+
"supports_parallel_function_calling": true,
|
1070
|
+
"supports_vision": true,
|
1071
|
+
"supports_prompt_caching": true
|
1072
|
+
},
|
597
1073
|
"azure/global-standard/gpt-4o-2024-08-06": {
|
598
1074
|
"max_tokens": 16384,
|
599
1075
|
"max_input_tokens": 128000,
|
@@ -604,6 +1080,21 @@
|
|
604
1080
|
"mode": "chat",
|
605
1081
|
"supports_function_calling": true,
|
606
1082
|
"supports_parallel_function_calling": true,
|
1083
|
+
"supports_response_schema": true,
|
1084
|
+
"supports_vision": true,
|
1085
|
+
"supports_prompt_caching": true
|
1086
|
+
},
|
1087
|
+
"azure/global-standard/gpt-4o-2024-11-20": {
|
1088
|
+
"max_tokens": 16384,
|
1089
|
+
"max_input_tokens": 128000,
|
1090
|
+
"max_output_tokens": 16384,
|
1091
|
+
"input_cost_per_token": 0.0000025,
|
1092
|
+
"output_cost_per_token": 0.000010,
|
1093
|
+
"litellm_provider": "azure",
|
1094
|
+
"mode": "chat",
|
1095
|
+
"supports_function_calling": true,
|
1096
|
+
"supports_parallel_function_calling": true,
|
1097
|
+
"supports_response_schema": true,
|
607
1098
|
"supports_vision": true
|
608
1099
|
},
|
609
1100
|
"azure/global-standard/gpt-4o-mini": {
|
@@ -616,6 +1107,7 @@
|
|
616
1107
|
"mode": "chat",
|
617
1108
|
"supports_function_calling": true,
|
618
1109
|
"supports_parallel_function_calling": true,
|
1110
|
+
"supports_response_schema": true,
|
619
1111
|
"supports_vision": true
|
620
1112
|
},
|
621
1113
|
"azure/gpt-4o-mini": {
|
@@ -624,11 +1116,29 @@
|
|
624
1116
|
"max_output_tokens": 16384,
|
625
1117
|
"input_cost_per_token": 0.000000165,
|
626
1118
|
"output_cost_per_token": 0.00000066,
|
1119
|
+
"cache_read_input_token_cost": 0.000000075,
|
627
1120
|
"litellm_provider": "azure",
|
628
1121
|
"mode": "chat",
|
629
1122
|
"supports_function_calling": true,
|
630
1123
|
"supports_parallel_function_calling": true,
|
631
|
-
"
|
1124
|
+
"supports_response_schema": true,
|
1125
|
+
"supports_vision": true,
|
1126
|
+
"supports_prompt_caching": true
|
1127
|
+
},
|
1128
|
+
"azure/gpt-4o-mini-2024-07-18": {
|
1129
|
+
"max_tokens": 16384,
|
1130
|
+
"max_input_tokens": 128000,
|
1131
|
+
"max_output_tokens": 16384,
|
1132
|
+
"input_cost_per_token": 0.000000165,
|
1133
|
+
"output_cost_per_token": 0.00000066,
|
1134
|
+
"cache_read_input_token_cost": 0.000000075,
|
1135
|
+
"litellm_provider": "azure",
|
1136
|
+
"mode": "chat",
|
1137
|
+
"supports_function_calling": true,
|
1138
|
+
"supports_parallel_function_calling": true,
|
1139
|
+
"supports_response_schema": true,
|
1140
|
+
"supports_vision": true,
|
1141
|
+
"supports_prompt_caching": true
|
632
1142
|
},
|
633
1143
|
"azure/gpt-4-turbo-2024-04-09": {
|
634
1144
|
"max_tokens": 4096,
|
@@ -801,7 +1311,7 @@
|
|
801
1311
|
"max_input_tokens": 4097,
|
802
1312
|
"input_cost_per_token": 0.0000015,
|
803
1313
|
"output_cost_per_token": 0.000002,
|
804
|
-
"litellm_provider": "
|
1314
|
+
"litellm_provider": "azure_text",
|
805
1315
|
"mode": "completion"
|
806
1316
|
},
|
807
1317
|
"azure/gpt-35-turbo-instruct": {
|
@@ -809,7 +1319,7 @@
|
|
809
1319
|
"max_input_tokens": 4097,
|
810
1320
|
"input_cost_per_token": 0.0000015,
|
811
1321
|
"output_cost_per_token": 0.000002,
|
812
|
-
"litellm_provider": "
|
1322
|
+
"litellm_provider": "azure_text",
|
813
1323
|
"mode": "completion"
|
814
1324
|
},
|
815
1325
|
"azure/gpt-35-turbo-instruct-0914": {
|
@@ -817,7 +1327,7 @@
|
|
817
1327
|
"max_input_tokens": 4097,
|
818
1328
|
"input_cost_per_token": 0.0000015,
|
819
1329
|
"output_cost_per_token": 0.000002,
|
820
|
-
"litellm_provider": "
|
1330
|
+
"litellm_provider": "azure_text",
|
821
1331
|
"mode": "completion"
|
822
1332
|
},
|
823
1333
|
"azure/mistral-large-latest": {
|
@@ -949,47 +1459,245 @@
|
|
949
1459
|
"output_cost_per_token": 0.000003,
|
950
1460
|
"litellm_provider": "azure_ai",
|
951
1461
|
"supports_function_calling": true,
|
952
|
-
"mode": "chat"
|
1462
|
+
"mode": "chat"
|
1463
|
+
},
|
1464
|
+
"azure_ai/mistral-large-2407": {
|
1465
|
+
"max_tokens": 4096,
|
1466
|
+
"max_input_tokens": 128000,
|
1467
|
+
"max_output_tokens": 4096,
|
1468
|
+
"input_cost_per_token": 0.000002,
|
1469
|
+
"output_cost_per_token": 0.000006,
|
1470
|
+
"litellm_provider": "azure_ai",
|
1471
|
+
"supports_function_calling": true,
|
1472
|
+
"mode": "chat",
|
1473
|
+
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview"
|
1474
|
+
},
|
1475
|
+
"azure_ai/ministral-3b": {
|
1476
|
+
"max_tokens": 4096,
|
1477
|
+
"max_input_tokens": 128000,
|
1478
|
+
"max_output_tokens": 4096,
|
1479
|
+
"input_cost_per_token": 0.00000004,
|
1480
|
+
"output_cost_per_token": 0.00000004,
|
1481
|
+
"litellm_provider": "azure_ai",
|
1482
|
+
"supports_function_calling": true,
|
1483
|
+
"mode": "chat",
|
1484
|
+
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.ministral-3b-2410-offer?tab=Overview"
|
1485
|
+
},
|
1486
|
+
"azure_ai/Llama-3.2-11B-Vision-Instruct": {
|
1487
|
+
"max_tokens": 2048,
|
1488
|
+
"max_input_tokens": 128000,
|
1489
|
+
"max_output_tokens": 2048,
|
1490
|
+
"input_cost_per_token": 0.00000037,
|
1491
|
+
"output_cost_per_token": 0.00000037,
|
1492
|
+
"litellm_provider": "azure_ai",
|
1493
|
+
"supports_function_calling": true,
|
1494
|
+
"supports_vision": true,
|
1495
|
+
"mode": "chat",
|
1496
|
+
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-11b-vision-instruct-offer?tab=Overview"
|
1497
|
+
},
|
1498
|
+
"azure_ai/Llama-3.3-70B-Instruct": {
|
1499
|
+
"max_tokens": 2048,
|
1500
|
+
"max_input_tokens": 128000,
|
1501
|
+
"max_output_tokens": 2048,
|
1502
|
+
"input_cost_per_token": 0.00000071,
|
1503
|
+
"output_cost_per_token": 0.00000071,
|
1504
|
+
"litellm_provider": "azure_ai",
|
1505
|
+
"supports_function_calling": true,
|
1506
|
+
"mode": "chat",
|
1507
|
+
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.llama-3-3-70b-instruct-offer?tab=Overview"
|
1508
|
+
},
|
1509
|
+
"azure_ai/Llama-3.2-90B-Vision-Instruct": {
|
1510
|
+
"max_tokens": 2048,
|
1511
|
+
"max_input_tokens": 128000,
|
1512
|
+
"max_output_tokens": 2048,
|
1513
|
+
"input_cost_per_token": 0.00000204,
|
1514
|
+
"output_cost_per_token": 0.00000204,
|
1515
|
+
"litellm_provider": "azure_ai",
|
1516
|
+
"supports_function_calling": true,
|
1517
|
+
"supports_vision": true,
|
1518
|
+
"mode": "chat",
|
1519
|
+
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview"
|
953
1520
|
},
|
954
1521
|
"azure_ai/Meta-Llama-3-70B-Instruct": {
|
955
|
-
"max_tokens":
|
1522
|
+
"max_tokens": 2048,
|
956
1523
|
"max_input_tokens": 8192,
|
957
|
-
"max_output_tokens":
|
1524
|
+
"max_output_tokens": 2048,
|
958
1525
|
"input_cost_per_token": 0.0000011,
|
959
1526
|
"output_cost_per_token": 0.00000037,
|
960
1527
|
"litellm_provider": "azure_ai",
|
961
1528
|
"mode": "chat"
|
962
1529
|
},
|
963
|
-
"azure_ai/Meta-Llama-
|
964
|
-
"max_tokens":
|
1530
|
+
"azure_ai/Meta-Llama-3.1-8B-Instruct": {
|
1531
|
+
"max_tokens": 2048,
|
965
1532
|
"max_input_tokens": 128000,
|
966
|
-
"max_output_tokens":
|
1533
|
+
"max_output_tokens": 2048,
|
967
1534
|
"input_cost_per_token": 0.0000003,
|
968
1535
|
"output_cost_per_token": 0.00000061,
|
969
1536
|
"litellm_provider": "azure_ai",
|
970
1537
|
"mode": "chat",
|
971
1538
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice"
|
972
1539
|
},
|
973
|
-
"azure_ai/Meta-Llama-
|
974
|
-
"max_tokens":
|
1540
|
+
"azure_ai/Meta-Llama-3.1-70B-Instruct": {
|
1541
|
+
"max_tokens": 2048,
|
975
1542
|
"max_input_tokens": 128000,
|
976
|
-
"max_output_tokens":
|
1543
|
+
"max_output_tokens": 2048,
|
977
1544
|
"input_cost_per_token": 0.00000268,
|
978
1545
|
"output_cost_per_token": 0.00000354,
|
979
1546
|
"litellm_provider": "azure_ai",
|
980
1547
|
"mode": "chat",
|
981
1548
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice"
|
982
1549
|
},
|
983
|
-
"azure_ai/Meta-Llama-
|
984
|
-
"max_tokens":
|
1550
|
+
"azure_ai/Meta-Llama-3.1-405B-Instruct": {
|
1551
|
+
"max_tokens": 2048,
|
985
1552
|
"max_input_tokens": 128000,
|
986
|
-
"max_output_tokens":
|
1553
|
+
"max_output_tokens": 2048,
|
987
1554
|
"input_cost_per_token": 0.00000533,
|
988
1555
|
"output_cost_per_token": 0.000016,
|
989
1556
|
"litellm_provider": "azure_ai",
|
990
1557
|
"mode": "chat",
|
991
1558
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice"
|
992
1559
|
},
|
1560
|
+
"azure_ai/Phi-3.5-mini-instruct": {
|
1561
|
+
"max_tokens": 4096,
|
1562
|
+
"max_input_tokens": 128000,
|
1563
|
+
"max_output_tokens": 4096,
|
1564
|
+
"input_cost_per_token": 0.00000013,
|
1565
|
+
"output_cost_per_token": 0.00000052,
|
1566
|
+
"litellm_provider": "azure_ai",
|
1567
|
+
"mode": "chat",
|
1568
|
+
"supports_vision": false,
|
1569
|
+
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
1570
|
+
},
|
1571
|
+
"azure_ai/Phi-3.5-vision-instruct": {
|
1572
|
+
"max_tokens": 4096,
|
1573
|
+
"max_input_tokens": 128000,
|
1574
|
+
"max_output_tokens": 4096,
|
1575
|
+
"input_cost_per_token": 0.00000013,
|
1576
|
+
"output_cost_per_token": 0.00000052,
|
1577
|
+
"litellm_provider": "azure_ai",
|
1578
|
+
"mode": "chat",
|
1579
|
+
"supports_vision": true,
|
1580
|
+
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
1581
|
+
},
|
1582
|
+
"azure_ai/Phi-3.5-MoE-instruct": {
|
1583
|
+
"max_tokens": 4096,
|
1584
|
+
"max_input_tokens": 128000,
|
1585
|
+
"max_output_tokens": 4096,
|
1586
|
+
"input_cost_per_token": 0.00000016,
|
1587
|
+
"output_cost_per_token": 0.00000064,
|
1588
|
+
"litellm_provider": "azure_ai",
|
1589
|
+
"mode": "chat",
|
1590
|
+
"supports_vision": false,
|
1591
|
+
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
1592
|
+
},
|
1593
|
+
"azure_ai/Phi-3-mini-4k-instruct": {
|
1594
|
+
"max_tokens": 4096,
|
1595
|
+
"max_input_tokens": 4096,
|
1596
|
+
"max_output_tokens": 4096,
|
1597
|
+
"input_cost_per_token": 0.00000013,
|
1598
|
+
"output_cost_per_token": 0.00000052,
|
1599
|
+
"litellm_provider": "azure_ai",
|
1600
|
+
"mode": "chat",
|
1601
|
+
"supports_vision": false,
|
1602
|
+
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
1603
|
+
},
|
1604
|
+
"azure_ai/Phi-3-mini-128k-instruct": {
|
1605
|
+
"max_tokens": 4096,
|
1606
|
+
"max_input_tokens": 128000,
|
1607
|
+
"max_output_tokens": 4096,
|
1608
|
+
"input_cost_per_token": 0.00000013,
|
1609
|
+
"output_cost_per_token": 0.00000052,
|
1610
|
+
"litellm_provider": "azure_ai",
|
1611
|
+
"mode": "chat",
|
1612
|
+
"supports_vision": false,
|
1613
|
+
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
1614
|
+
},
|
1615
|
+
"azure_ai/Phi-3-small-8k-instruct": {
|
1616
|
+
"max_tokens": 4096,
|
1617
|
+
"max_input_tokens": 8192,
|
1618
|
+
"max_output_tokens": 4096,
|
1619
|
+
"input_cost_per_token": 0.00000015,
|
1620
|
+
"output_cost_per_token": 0.0000006,
|
1621
|
+
"litellm_provider": "azure_ai",
|
1622
|
+
"mode": "chat",
|
1623
|
+
"supports_vision": false,
|
1624
|
+
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
1625
|
+
},
|
1626
|
+
"azure_ai/Phi-3-small-128k-instruct": {
|
1627
|
+
"max_tokens": 4096,
|
1628
|
+
"max_input_tokens": 128000,
|
1629
|
+
"max_output_tokens": 4096,
|
1630
|
+
"input_cost_per_token": 0.00000015,
|
1631
|
+
"output_cost_per_token": 0.0000006,
|
1632
|
+
"litellm_provider": "azure_ai",
|
1633
|
+
"mode": "chat",
|
1634
|
+
"supports_vision": false,
|
1635
|
+
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
1636
|
+
},
|
1637
|
+
"azure_ai/Phi-3-medium-4k-instruct": {
|
1638
|
+
"max_tokens": 4096,
|
1639
|
+
"max_input_tokens": 4096,
|
1640
|
+
"max_output_tokens": 4096,
|
1641
|
+
"input_cost_per_token": 0.00000017,
|
1642
|
+
"output_cost_per_token": 0.00000068,
|
1643
|
+
"litellm_provider": "azure_ai",
|
1644
|
+
"mode": "chat",
|
1645
|
+
"supports_vision": false,
|
1646
|
+
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
1647
|
+
},
|
1648
|
+
"azure_ai/Phi-3-medium-128k-instruct": {
|
1649
|
+
"max_tokens": 4096,
|
1650
|
+
"max_input_tokens": 128000,
|
1651
|
+
"max_output_tokens": 4096,
|
1652
|
+
"input_cost_per_token": 0.00000017,
|
1653
|
+
"output_cost_per_token": 0.00000068,
|
1654
|
+
"litellm_provider": "azure_ai",
|
1655
|
+
"mode": "chat",
|
1656
|
+
"supports_vision": false,
|
1657
|
+
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
1658
|
+
},
|
1659
|
+
"azure_ai/cohere-rerank-v3-multilingual": {
|
1660
|
+
"max_tokens": 4096,
|
1661
|
+
"max_input_tokens": 4096,
|
1662
|
+
"max_output_tokens": 4096,
|
1663
|
+
"max_query_tokens": 2048,
|
1664
|
+
"input_cost_per_token": 0.0,
|
1665
|
+
"input_cost_per_query": 0.002,
|
1666
|
+
"output_cost_per_token": 0.0,
|
1667
|
+
"litellm_provider": "azure_ai",
|
1668
|
+
"mode": "rerank"
|
1669
|
+
},
|
1670
|
+
"azure_ai/cohere-rerank-v3-english": {
|
1671
|
+
"max_tokens": 4096,
|
1672
|
+
"max_input_tokens": 4096,
|
1673
|
+
"max_output_tokens": 4096,
|
1674
|
+
"max_query_tokens": 2048,
|
1675
|
+
"input_cost_per_token": 0.0,
|
1676
|
+
"input_cost_per_query": 0.002,
|
1677
|
+
"output_cost_per_token": 0.0,
|
1678
|
+
"litellm_provider": "azure_ai",
|
1679
|
+
"mode": "rerank"
|
1680
|
+
},
|
1681
|
+
"azure_ai/Cohere-embed-v3-english": {
|
1682
|
+
"max_tokens": 512,
|
1683
|
+
"max_input_tokens": 512,
|
1684
|
+
"output_vector_size": 1024,
|
1685
|
+
"input_cost_per_token": 0.0000001,
|
1686
|
+
"output_cost_per_token": 0.0,
|
1687
|
+
"litellm_provider": "azure_ai",
|
1688
|
+
"mode": "embedding",
|
1689
|
+
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice"
|
1690
|
+
},
|
1691
|
+
"azure_ai/Cohere-embed-v3-multilingual": {
|
1692
|
+
"max_tokens": 512,
|
1693
|
+
"max_input_tokens": 512,
|
1694
|
+
"output_vector_size": 1024,
|
1695
|
+
"input_cost_per_token": 0.0000001,
|
1696
|
+
"output_cost_per_token": 0.0,
|
1697
|
+
"litellm_provider": "azure_ai",
|
1698
|
+
"mode": "embedding",
|
1699
|
+
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice"
|
1700
|
+
},
|
993
1701
|
"babbage-002": {
|
994
1702
|
"max_tokens": 16384,
|
995
1703
|
"max_input_tokens": 16384,
|
@@ -1131,6 +1839,18 @@
|
|
1131
1839
|
"supports_function_calling": true,
|
1132
1840
|
"supports_assistant_prefill": true
|
1133
1841
|
},
|
1842
|
+
"mistral/pixtral-12b-2409": {
|
1843
|
+
"max_tokens": 128000,
|
1844
|
+
"max_input_tokens": 128000,
|
1845
|
+
"max_output_tokens": 128000,
|
1846
|
+
"input_cost_per_token": 0.00000015,
|
1847
|
+
"output_cost_per_token": 0.00000015,
|
1848
|
+
"litellm_provider": "mistral",
|
1849
|
+
"mode": "chat",
|
1850
|
+
"supports_function_calling": true,
|
1851
|
+
"supports_assistant_prefill": true,
|
1852
|
+
"supports_vision": true
|
1853
|
+
},
|
1134
1854
|
"mistral/open-mistral-7b": {
|
1135
1855
|
"max_tokens": 8191,
|
1136
1856
|
"max_input_tokens": 32000,
|
@@ -1240,6 +1960,8 @@
|
|
1240
1960
|
"max_output_tokens": 4096,
|
1241
1961
|
"input_cost_per_token": 0.00000014,
|
1242
1962
|
"input_cost_per_token_cache_hit": 0.000000014,
|
1963
|
+
"cache_read_input_token_cost": 0.000000014,
|
1964
|
+
"cache_creation_input_token_cost": 0.0,
|
1243
1965
|
"output_cost_per_token": 0.00000028,
|
1244
1966
|
"litellm_provider": "deepseek",
|
1245
1967
|
"mode": "chat",
|
@@ -1290,6 +2012,17 @@
|
|
1290
2012
|
"mode": "completion",
|
1291
2013
|
"source": "https://docs.mistral.ai/capabilities/code_generation/"
|
1292
2014
|
},
|
2015
|
+
"xai/grok-beta": {
|
2016
|
+
"max_tokens": 131072,
|
2017
|
+
"max_input_tokens": 131072,
|
2018
|
+
"max_output_tokens": 131072,
|
2019
|
+
"input_cost_per_token": 0.000005,
|
2020
|
+
"output_cost_per_token": 0.000015,
|
2021
|
+
"litellm_provider": "xai",
|
2022
|
+
"mode": "chat",
|
2023
|
+
"supports_function_calling": true,
|
2024
|
+
"supports_vision": true
|
2025
|
+
},
|
1293
2026
|
"deepseek-coder": {
|
1294
2027
|
"max_tokens": 4096,
|
1295
2028
|
"max_input_tokens": 128000,
|
@@ -1304,6 +2037,24 @@
|
|
1304
2037
|
"supports_tool_choice": true,
|
1305
2038
|
"supports_prompt_caching": true
|
1306
2039
|
},
|
2040
|
+
"groq/llama-3.3-70b-versatile": {
|
2041
|
+
"max_tokens": 8192,
|
2042
|
+
"max_input_tokens": 128000,
|
2043
|
+
"max_output_tokens": 8192,
|
2044
|
+
"input_cost_per_token": 0.00000059,
|
2045
|
+
"output_cost_per_token": 0.00000079,
|
2046
|
+
"litellm_provider": "groq",
|
2047
|
+
"mode": "chat"
|
2048
|
+
},
|
2049
|
+
"groq/llama-3.3-70b-specdec": {
|
2050
|
+
"max_tokens": 8192,
|
2051
|
+
"max_input_tokens": 8192,
|
2052
|
+
"max_output_tokens": 8192,
|
2053
|
+
"input_cost_per_token": 0.00000059,
|
2054
|
+
"output_cost_per_token": 0.00000099,
|
2055
|
+
"litellm_provider": "groq",
|
2056
|
+
"mode": "chat"
|
2057
|
+
},
|
1307
2058
|
"groq/llama2-70b-4096": {
|
1308
2059
|
"max_tokens": 4096,
|
1309
2060
|
"max_input_tokens": 4096,
|
@@ -1312,7 +2063,8 @@
|
|
1312
2063
|
"output_cost_per_token": 0.00000080,
|
1313
2064
|
"litellm_provider": "groq",
|
1314
2065
|
"mode": "chat",
|
1315
|
-
"supports_function_calling": true
|
2066
|
+
"supports_function_calling": true,
|
2067
|
+
"supports_response_schema": true
|
1316
2068
|
},
|
1317
2069
|
"groq/llama3-8b-8192": {
|
1318
2070
|
"max_tokens": 8192,
|
@@ -1322,7 +2074,76 @@
|
|
1322
2074
|
"output_cost_per_token": 0.00000008,
|
1323
2075
|
"litellm_provider": "groq",
|
1324
2076
|
"mode": "chat",
|
1325
|
-
"supports_function_calling": true
|
2077
|
+
"supports_function_calling": true,
|
2078
|
+
"supports_response_schema": true
|
2079
|
+
},
|
2080
|
+
"groq/llama-3.2-1b-preview": {
|
2081
|
+
"max_tokens": 8192,
|
2082
|
+
"max_input_tokens": 8192,
|
2083
|
+
"max_output_tokens": 8192,
|
2084
|
+
"input_cost_per_token": 0.00000004,
|
2085
|
+
"output_cost_per_token": 0.00000004,
|
2086
|
+
"litellm_provider": "groq",
|
2087
|
+
"mode": "chat",
|
2088
|
+
"supports_function_calling": true,
|
2089
|
+
"supports_response_schema": true
|
2090
|
+
},
|
2091
|
+
"groq/llama-3.2-3b-preview": {
|
2092
|
+
"max_tokens": 8192,
|
2093
|
+
"max_input_tokens": 8192,
|
2094
|
+
"max_output_tokens": 8192,
|
2095
|
+
"input_cost_per_token": 0.00000006,
|
2096
|
+
"output_cost_per_token": 0.00000006,
|
2097
|
+
"litellm_provider": "groq",
|
2098
|
+
"mode": "chat",
|
2099
|
+
"supports_function_calling": true,
|
2100
|
+
"supports_response_schema": true
|
2101
|
+
},
|
2102
|
+
"groq/llama-3.2-11b-text-preview": {
|
2103
|
+
"max_tokens": 8192,
|
2104
|
+
"max_input_tokens": 8192,
|
2105
|
+
"max_output_tokens": 8192,
|
2106
|
+
"input_cost_per_token": 0.00000018,
|
2107
|
+
"output_cost_per_token": 0.00000018,
|
2108
|
+
"litellm_provider": "groq",
|
2109
|
+
"mode": "chat",
|
2110
|
+
"supports_function_calling": true,
|
2111
|
+
"supports_response_schema": true
|
2112
|
+
},
|
2113
|
+
"groq/llama-3.2-11b-vision-preview": {
|
2114
|
+
"max_tokens": 8192,
|
2115
|
+
"max_input_tokens": 8192,
|
2116
|
+
"max_output_tokens": 8192,
|
2117
|
+
"input_cost_per_token": 0.00000018,
|
2118
|
+
"output_cost_per_token": 0.00000018,
|
2119
|
+
"litellm_provider": "groq",
|
2120
|
+
"mode": "chat",
|
2121
|
+
"supports_function_calling": true,
|
2122
|
+
"supports_response_schema": true,
|
2123
|
+
"supports_vision": true
|
2124
|
+
},
|
2125
|
+
"groq/llama-3.2-90b-text-preview": {
|
2126
|
+
"max_tokens": 8192,
|
2127
|
+
"max_input_tokens": 8192,
|
2128
|
+
"max_output_tokens": 8192,
|
2129
|
+
"input_cost_per_token": 0.0000009,
|
2130
|
+
"output_cost_per_token": 0.0000009,
|
2131
|
+
"litellm_provider": "groq",
|
2132
|
+
"mode": "chat",
|
2133
|
+
"supports_function_calling": true,
|
2134
|
+
"supports_response_schema": true
|
2135
|
+
},
|
2136
|
+
"groq/llama-3.2-90b-vision-preview": {
|
2137
|
+
"max_tokens": 8192,
|
2138
|
+
"max_input_tokens": 8192,
|
2139
|
+
"max_output_tokens": 8192,
|
2140
|
+
"input_cost_per_token": 0.0000009,
|
2141
|
+
"output_cost_per_token": 0.0000009,
|
2142
|
+
"litellm_provider": "groq",
|
2143
|
+
"mode": "chat",
|
2144
|
+
"supports_function_calling": true,
|
2145
|
+
"supports_response_schema": true,
|
2146
|
+
"supports_vision": true
|
1326
2147
|
},
|
1327
2148
|
"groq/llama3-70b-8192": {
|
1328
2149
|
"max_tokens": 8192,
|
@@ -1332,17 +2153,19 @@
|
|
1332
2153
|
"output_cost_per_token": 0.00000079,
|
1333
2154
|
"litellm_provider": "groq",
|
1334
2155
|
"mode": "chat",
|
1335
|
-
"supports_function_calling": true
|
2156
|
+
"supports_function_calling": true,
|
2157
|
+
"supports_response_schema": true
|
1336
2158
|
},
|
1337
2159
|
"groq/llama-3.1-8b-instant": {
|
1338
2160
|
"max_tokens": 8192,
|
1339
2161
|
"max_input_tokens": 8192,
|
1340
2162
|
"max_output_tokens": 8192,
|
1341
|
-
"input_cost_per_token": 0.
|
1342
|
-
"output_cost_per_token": 0.
|
2163
|
+
"input_cost_per_token": 0.00000005,
|
2164
|
+
"output_cost_per_token": 0.00000008,
|
1343
2165
|
"litellm_provider": "groq",
|
1344
2166
|
"mode": "chat",
|
1345
|
-
"supports_function_calling": true
|
2167
|
+
"supports_function_calling": true,
|
2168
|
+
"supports_response_schema": true
|
1346
2169
|
},
|
1347
2170
|
"groq/llama-3.1-70b-versatile": {
|
1348
2171
|
"max_tokens": 8192,
|
@@ -1352,7 +2175,8 @@
|
|
1352
2175
|
"output_cost_per_token": 0.00000079,
|
1353
2176
|
"litellm_provider": "groq",
|
1354
2177
|
"mode": "chat",
|
1355
|
-
"supports_function_calling": true
|
2178
|
+
"supports_function_calling": true,
|
2179
|
+
"supports_response_schema": true
|
1356
2180
|
},
|
1357
2181
|
"groq/llama-3.1-405b-reasoning": {
|
1358
2182
|
"max_tokens": 8192,
|
@@ -1362,7 +2186,8 @@
|
|
1362
2186
|
"output_cost_per_token": 0.00000079,
|
1363
2187
|
"litellm_provider": "groq",
|
1364
2188
|
"mode": "chat",
|
1365
|
-
"supports_function_calling": true
|
2189
|
+
"supports_function_calling": true,
|
2190
|
+
"supports_response_schema": true
|
1366
2191
|
},
|
1367
2192
|
"groq/mixtral-8x7b-32768": {
|
1368
2193
|
"max_tokens": 32768,
|
@@ -1372,7 +2197,8 @@
|
|
1372
2197
|
"output_cost_per_token": 0.00000024,
|
1373
2198
|
"litellm_provider": "groq",
|
1374
2199
|
"mode": "chat",
|
1375
|
-
"supports_function_calling": true
|
2200
|
+
"supports_function_calling": true,
|
2201
|
+
"supports_response_schema": true
|
1376
2202
|
},
|
1377
2203
|
"groq/gemma-7b-it": {
|
1378
2204
|
"max_tokens": 8192,
|
@@ -1382,7 +2208,8 @@
|
|
1382
2208
|
"output_cost_per_token": 0.00000007,
|
1383
2209
|
"litellm_provider": "groq",
|
1384
2210
|
"mode": "chat",
|
1385
|
-
"supports_function_calling": true
|
2211
|
+
"supports_function_calling": true,
|
2212
|
+
"supports_response_schema": true
|
1386
2213
|
},
|
1387
2214
|
"groq/gemma2-9b-it": {
|
1388
2215
|
"max_tokens": 8192,
|
@@ -1392,7 +2219,8 @@
|
|
1392
2219
|
"output_cost_per_token": 0.00000020,
|
1393
2220
|
"litellm_provider": "groq",
|
1394
2221
|
"mode": "chat",
|
1395
|
-
"supports_function_calling": true
|
2222
|
+
"supports_function_calling": true,
|
2223
|
+
"supports_response_schema": true
|
1396
2224
|
},
|
1397
2225
|
"groq/llama3-groq-70b-8192-tool-use-preview": {
|
1398
2226
|
"max_tokens": 8192,
|
@@ -1402,7 +2230,8 @@
|
|
1402
2230
|
"output_cost_per_token": 0.00000089,
|
1403
2231
|
"litellm_provider": "groq",
|
1404
2232
|
"mode": "chat",
|
1405
|
-
"supports_function_calling": true
|
2233
|
+
"supports_function_calling": true,
|
2234
|
+
"supports_response_schema": true
|
1406
2235
|
},
|
1407
2236
|
"groq/llama3-groq-8b-8192-tool-use-preview": {
|
1408
2237
|
"max_tokens": 8192,
|
@@ -1412,7 +2241,8 @@
|
|
1412
2241
|
"output_cost_per_token": 0.00000019,
|
1413
2242
|
"litellm_provider": "groq",
|
1414
2243
|
"mode": "chat",
|
1415
|
-
"supports_function_calling": true
|
2244
|
+
"supports_function_calling": true,
|
2245
|
+
"supports_response_schema": true
|
1416
2246
|
},
|
1417
2247
|
"cerebras/llama3.1-8b": {
|
1418
2248
|
"max_tokens": 128000,
|
@@ -1505,7 +2335,24 @@
|
|
1505
2335
|
"supports_vision": true,
|
1506
2336
|
"tool_use_system_prompt_tokens": 264,
|
1507
2337
|
"supports_assistant_prefill": true,
|
1508
|
-
"supports_prompt_caching": true
|
2338
|
+
"supports_prompt_caching": true,
|
2339
|
+
"supports_response_schema": true
|
2340
|
+
},
|
2341
|
+
"claude-3-5-haiku-20241022": {
|
2342
|
+
"max_tokens": 8192,
|
2343
|
+
"max_input_tokens": 200000,
|
2344
|
+
"max_output_tokens": 8192,
|
2345
|
+
"input_cost_per_token": 0.000001,
|
2346
|
+
"output_cost_per_token": 0.000005,
|
2347
|
+
"cache_creation_input_token_cost": 0.00000125,
|
2348
|
+
"cache_read_input_token_cost": 0.0000001,
|
2349
|
+
"litellm_provider": "anthropic",
|
2350
|
+
"mode": "chat",
|
2351
|
+
"supports_function_calling": true,
|
2352
|
+
"tool_use_system_prompt_tokens": 264,
|
2353
|
+
"supports_assistant_prefill": true,
|
2354
|
+
"supports_prompt_caching": true,
|
2355
|
+
"supports_response_schema": true
|
1509
2356
|
},
|
1510
2357
|
"claude-3-opus-20240229": {
|
1511
2358
|
"max_tokens": 4096,
|
@@ -1521,23 +2368,42 @@
|
|
1521
2368
|
"supports_vision": true,
|
1522
2369
|
"tool_use_system_prompt_tokens": 395,
|
1523
2370
|
"supports_assistant_prefill": true,
|
1524
|
-
"supports_prompt_caching": true
|
2371
|
+
"supports_prompt_caching": true,
|
2372
|
+
"supports_response_schema": true
|
2373
|
+
},
|
2374
|
+
"claude-3-sonnet-20240229": {
|
2375
|
+
"max_tokens": 4096,
|
2376
|
+
"max_input_tokens": 200000,
|
2377
|
+
"max_output_tokens": 4096,
|
2378
|
+
"input_cost_per_token": 0.000003,
|
2379
|
+
"output_cost_per_token": 0.000015,
|
2380
|
+
"litellm_provider": "anthropic",
|
2381
|
+
"mode": "chat",
|
2382
|
+
"supports_function_calling": true,
|
2383
|
+
"supports_vision": true,
|
2384
|
+
"tool_use_system_prompt_tokens": 159,
|
2385
|
+
"supports_assistant_prefill": true,
|
2386
|
+
"supports_prompt_caching": true,
|
2387
|
+
"supports_response_schema": true
|
1525
2388
|
},
|
1526
|
-
"claude-3-sonnet-
|
1527
|
-
"max_tokens":
|
2389
|
+
"claude-3-5-sonnet-20240620": {
|
2390
|
+
"max_tokens": 8192,
|
1528
2391
|
"max_input_tokens": 200000,
|
1529
|
-
"max_output_tokens":
|
2392
|
+
"max_output_tokens": 8192,
|
1530
2393
|
"input_cost_per_token": 0.000003,
|
1531
2394
|
"output_cost_per_token": 0.000015,
|
2395
|
+
"cache_creation_input_token_cost": 0.00000375,
|
2396
|
+
"cache_read_input_token_cost": 0.0000003,
|
1532
2397
|
"litellm_provider": "anthropic",
|
1533
2398
|
"mode": "chat",
|
1534
2399
|
"supports_function_calling": true,
|
1535
2400
|
"supports_vision": true,
|
1536
2401
|
"tool_use_system_prompt_tokens": 159,
|
1537
2402
|
"supports_assistant_prefill": true,
|
1538
|
-
"supports_prompt_caching": true
|
2403
|
+
"supports_prompt_caching": true,
|
2404
|
+
"supports_response_schema": true
|
1539
2405
|
},
|
1540
|
-
"claude-3-5-sonnet-
|
2406
|
+
"claude-3-5-sonnet-20241022": {
|
1541
2407
|
"max_tokens": 8192,
|
1542
2408
|
"max_input_tokens": 200000,
|
1543
2409
|
"max_output_tokens": 8192,
|
@@ -1551,7 +2417,9 @@
|
|
1551
2417
|
"supports_vision": true,
|
1552
2418
|
"tool_use_system_prompt_tokens": 159,
|
1553
2419
|
"supports_assistant_prefill": true,
|
1554
|
-
"
|
2420
|
+
"supports_pdf_input": true,
|
2421
|
+
"supports_prompt_caching": true,
|
2422
|
+
"supports_response_schema": true
|
1555
2423
|
},
|
1556
2424
|
"text-bison": {
|
1557
2425
|
"max_tokens": 2048,
|
@@ -1953,20 +2821,20 @@
|
|
1953
2821
|
"max_tokens": 8192,
|
1954
2822
|
"max_input_tokens": 2097152,
|
1955
2823
|
"max_output_tokens": 8192,
|
1956
|
-
"input_cost_per_image": 0.
|
1957
|
-
"input_cost_per_audio_per_second": 0.
|
1958
|
-
"input_cost_per_video_per_second": 0.
|
1959
|
-
"input_cost_per_token": 0.
|
1960
|
-
"input_cost_per_character": 0.
|
1961
|
-
"
|
1962
|
-
"
|
1963
|
-
"
|
1964
|
-
"
|
1965
|
-
"
|
1966
|
-
"
|
1967
|
-
"
|
1968
|
-
"
|
1969
|
-
"
|
2824
|
+
"input_cost_per_image": 0.00032875,
|
2825
|
+
"input_cost_per_audio_per_second": 0.00003125,
|
2826
|
+
"input_cost_per_video_per_second": 0.00032875,
|
2827
|
+
"input_cost_per_token": 0.00000125,
|
2828
|
+
"input_cost_per_character": 0.0000003125,
|
2829
|
+
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
2830
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
2831
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
2832
|
+
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
2833
|
+
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
2834
|
+
"output_cost_per_token": 0.000005,
|
2835
|
+
"output_cost_per_character": 0.00000125,
|
2836
|
+
"output_cost_per_token_above_128k_tokens": 0.00001,
|
2837
|
+
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
1970
2838
|
"litellm_provider": "vertex_ai-language-models",
|
1971
2839
|
"mode": "chat",
|
1972
2840
|
"supports_system_messages": true,
|
@@ -1975,24 +2843,50 @@
|
|
1975
2843
|
"supports_response_schema": true,
|
1976
2844
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
1977
2845
|
},
|
2846
|
+
"gemini-1.5-pro-002": {
|
2847
|
+
"max_tokens": 8192,
|
2848
|
+
"max_input_tokens": 2097152,
|
2849
|
+
"max_output_tokens": 8192,
|
2850
|
+
"input_cost_per_image": 0.00032875,
|
2851
|
+
"input_cost_per_audio_per_second": 0.00003125,
|
2852
|
+
"input_cost_per_video_per_second": 0.00032875,
|
2853
|
+
"input_cost_per_token": 0.00000125,
|
2854
|
+
"input_cost_per_character": 0.0000003125,
|
2855
|
+
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
2856
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
2857
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
2858
|
+
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
2859
|
+
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
2860
|
+
"output_cost_per_token": 0.000005,
|
2861
|
+
"output_cost_per_character": 0.00000125,
|
2862
|
+
"output_cost_per_token_above_128k_tokens": 0.00001,
|
2863
|
+
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
2864
|
+
"litellm_provider": "vertex_ai-language-models",
|
2865
|
+
"mode": "chat",
|
2866
|
+
"supports_system_messages": true,
|
2867
|
+
"supports_function_calling": true,
|
2868
|
+
"supports_tool_choice": true,
|
2869
|
+
"supports_response_schema": true,
|
2870
|
+
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro"
|
2871
|
+
},
|
1978
2872
|
"gemini-1.5-pro-001": {
|
1979
2873
|
"max_tokens": 8192,
|
1980
2874
|
"max_input_tokens": 1000000,
|
1981
2875
|
"max_output_tokens": 8192,
|
1982
|
-
"input_cost_per_image": 0.
|
1983
|
-
"input_cost_per_audio_per_second": 0.
|
1984
|
-
"input_cost_per_video_per_second": 0.
|
1985
|
-
"input_cost_per_token": 0.
|
1986
|
-
"input_cost_per_character": 0.
|
1987
|
-
"
|
1988
|
-
"
|
1989
|
-
"
|
1990
|
-
"
|
1991
|
-
"
|
1992
|
-
"
|
1993
|
-
"
|
1994
|
-
"
|
1995
|
-
"
|
2876
|
+
"input_cost_per_image": 0.00032875,
|
2877
|
+
"input_cost_per_audio_per_second": 0.00003125,
|
2878
|
+
"input_cost_per_video_per_second": 0.00032875,
|
2879
|
+
"input_cost_per_token": 0.00000125,
|
2880
|
+
"input_cost_per_character": 0.0000003125,
|
2881
|
+
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
2882
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
2883
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
2884
|
+
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
2885
|
+
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
2886
|
+
"output_cost_per_token": 0.000005,
|
2887
|
+
"output_cost_per_character": 0.00000125,
|
2888
|
+
"output_cost_per_token_above_128k_tokens": 0.00001,
|
2889
|
+
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
1996
2890
|
"litellm_provider": "vertex_ai-language-models",
|
1997
2891
|
"mode": "chat",
|
1998
2892
|
"supports_system_messages": true,
|
@@ -2005,20 +2899,20 @@
|
|
2005
2899
|
"max_tokens": 8192,
|
2006
2900
|
"max_input_tokens": 1000000,
|
2007
2901
|
"max_output_tokens": 8192,
|
2008
|
-
"input_cost_per_image": 0.
|
2009
|
-
"input_cost_per_audio_per_second": 0.
|
2010
|
-
"input_cost_per_video_per_second": 0.
|
2011
|
-
"input_cost_per_token": 0.
|
2012
|
-
"input_cost_per_character": 0.
|
2013
|
-
"
|
2014
|
-
"
|
2015
|
-
"
|
2016
|
-
"
|
2017
|
-
"
|
2018
|
-
"
|
2019
|
-
"
|
2020
|
-
"
|
2021
|
-
"
|
2902
|
+
"input_cost_per_image": 0.00032875,
|
2903
|
+
"input_cost_per_audio_per_second": 0.00003125,
|
2904
|
+
"input_cost_per_video_per_second": 0.00032875,
|
2905
|
+
"input_cost_per_token": 0.000000078125,
|
2906
|
+
"input_cost_per_character": 0.0000003125,
|
2907
|
+
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
2908
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
2909
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
2910
|
+
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
2911
|
+
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
2912
|
+
"output_cost_per_token": 0.0000003125,
|
2913
|
+
"output_cost_per_character": 0.00000125,
|
2914
|
+
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
2915
|
+
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
2022
2916
|
"litellm_provider": "vertex_ai-language-models",
|
2023
2917
|
"mode": "chat",
|
2024
2918
|
"supports_system_messages": true,
|
@@ -2031,20 +2925,20 @@
|
|
2031
2925
|
"max_tokens": 8192,
|
2032
2926
|
"max_input_tokens": 1000000,
|
2033
2927
|
"max_output_tokens": 8192,
|
2034
|
-
"input_cost_per_image": 0.
|
2035
|
-
"input_cost_per_audio_per_second": 0.
|
2036
|
-
"input_cost_per_video_per_second": 0.
|
2037
|
-
"input_cost_per_token": 0.
|
2038
|
-
"input_cost_per_character": 0.
|
2039
|
-
"
|
2040
|
-
"
|
2041
|
-
"
|
2042
|
-
"
|
2043
|
-
"
|
2044
|
-
"
|
2045
|
-
"
|
2046
|
-
"
|
2047
|
-
"
|
2928
|
+
"input_cost_per_image": 0.00032875,
|
2929
|
+
"input_cost_per_audio_per_second": 0.00003125,
|
2930
|
+
"input_cost_per_video_per_second": 0.00032875,
|
2931
|
+
"input_cost_per_token": 0.000000078125,
|
2932
|
+
"input_cost_per_character": 0.0000003125,
|
2933
|
+
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
2934
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
2935
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
2936
|
+
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
2937
|
+
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
2938
|
+
"output_cost_per_token": 0.0000003125,
|
2939
|
+
"output_cost_per_character": 0.00000125,
|
2940
|
+
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
2941
|
+
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
2048
2942
|
"litellm_provider": "vertex_ai-language-models",
|
2049
2943
|
"mode": "chat",
|
2050
2944
|
"supports_system_messages": true,
|
@@ -2057,20 +2951,20 @@
|
|
2057
2951
|
"max_tokens": 8192,
|
2058
2952
|
"max_input_tokens": 1000000,
|
2059
2953
|
"max_output_tokens": 8192,
|
2060
|
-
"input_cost_per_image": 0.
|
2061
|
-
"input_cost_per_audio_per_second": 0.
|
2062
|
-
"input_cost_per_video_per_second": 0.
|
2063
|
-
"input_cost_per_token": 0.
|
2064
|
-
"input_cost_per_character": 0.
|
2065
|
-
"
|
2066
|
-
"
|
2067
|
-
"
|
2068
|
-
"
|
2069
|
-
"
|
2070
|
-
"
|
2071
|
-
"
|
2072
|
-
"
|
2073
|
-
"
|
2954
|
+
"input_cost_per_image": 0.00032875,
|
2955
|
+
"input_cost_per_audio_per_second": 0.00003125,
|
2956
|
+
"input_cost_per_video_per_second": 0.00032875,
|
2957
|
+
"input_cost_per_token": 0.000000078125,
|
2958
|
+
"input_cost_per_character": 0.0000003125,
|
2959
|
+
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
2960
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
2961
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
2962
|
+
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
2963
|
+
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
2964
|
+
"output_cost_per_token": 0.0000003125,
|
2965
|
+
"output_cost_per_character": 0.00000125,
|
2966
|
+
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
2967
|
+
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
2074
2968
|
"litellm_provider": "vertex_ai-language-models",
|
2075
2969
|
"mode": "chat",
|
2076
2970
|
"supports_function_calling": true,
|
@@ -2088,20 +2982,20 @@
|
|
2088
2982
|
"max_audio_length_hours": 8.4,
|
2089
2983
|
"max_audio_per_prompt": 1,
|
2090
2984
|
"max_pdf_size_mb": 30,
|
2091
|
-
"input_cost_per_image": 0.
|
2092
|
-
"input_cost_per_video_per_second": 0.
|
2093
|
-
"input_cost_per_audio_per_second": 0.
|
2094
|
-
"input_cost_per_token": 0.
|
2095
|
-
"input_cost_per_character": 0.
|
2985
|
+
"input_cost_per_image": 0.00002,
|
2986
|
+
"input_cost_per_video_per_second": 0.00002,
|
2987
|
+
"input_cost_per_audio_per_second": 0.000002,
|
2988
|
+
"input_cost_per_token": 0.000000075,
|
2989
|
+
"input_cost_per_character": 0.00000001875,
|
2096
2990
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
2097
2991
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
2098
|
-
"
|
2099
|
-
"
|
2100
|
-
"
|
2101
|
-
"
|
2102
|
-
"
|
2103
|
-
"
|
2104
|
-
"
|
2992
|
+
"input_cost_per_image_above_128k_tokens": 0.00004,
|
2993
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
2994
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
2995
|
+
"output_cost_per_token": 0.0000003,
|
2996
|
+
"output_cost_per_character": 0.000000075,
|
2997
|
+
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
2998
|
+
"output_cost_per_character_above_128k_tokens": 0.00000015,
|
2105
2999
|
"litellm_provider": "vertex_ai-language-models",
|
2106
3000
|
"mode": "chat",
|
2107
3001
|
"supports_system_messages": true,
|
@@ -2120,20 +3014,20 @@
|
|
2120
3014
|
"max_audio_length_hours": 8.4,
|
2121
3015
|
"max_audio_per_prompt": 1,
|
2122
3016
|
"max_pdf_size_mb": 30,
|
2123
|
-
"input_cost_per_image": 0.
|
2124
|
-
"input_cost_per_video_per_second": 0.
|
2125
|
-
"input_cost_per_audio_per_second": 0.
|
2126
|
-
"input_cost_per_token": 0.
|
2127
|
-
"input_cost_per_character": 0.
|
3017
|
+
"input_cost_per_image": 0.00002,
|
3018
|
+
"input_cost_per_video_per_second": 0.00002,
|
3019
|
+
"input_cost_per_audio_per_second": 0.000002,
|
3020
|
+
"input_cost_per_token": 0.000000004688,
|
3021
|
+
"input_cost_per_character": 0.00000001875,
|
2128
3022
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
2129
3023
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
2130
|
-
"
|
2131
|
-
"
|
2132
|
-
"
|
2133
|
-
"
|
2134
|
-
"
|
2135
|
-
"
|
2136
|
-
"
|
3024
|
+
"input_cost_per_image_above_128k_tokens": 0.00004,
|
3025
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
3026
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
3027
|
+
"output_cost_per_token": 0.0000000046875,
|
3028
|
+
"output_cost_per_character": 0.00000001875,
|
3029
|
+
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
3030
|
+
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
2137
3031
|
"litellm_provider": "vertex_ai-language-models",
|
2138
3032
|
"mode": "chat",
|
2139
3033
|
"supports_system_messages": true,
|
@@ -2142,6 +3036,38 @@
|
|
2142
3036
|
"supports_response_schema": true,
|
2143
3037
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
2144
3038
|
},
|
3039
|
+
"gemini-1.5-flash-002": {
|
3040
|
+
"max_tokens": 8192,
|
3041
|
+
"max_input_tokens": 1048576,
|
3042
|
+
"max_output_tokens": 8192,
|
3043
|
+
"max_images_per_prompt": 3000,
|
3044
|
+
"max_videos_per_prompt": 10,
|
3045
|
+
"max_video_length": 1,
|
3046
|
+
"max_audio_length_hours": 8.4,
|
3047
|
+
"max_audio_per_prompt": 1,
|
3048
|
+
"max_pdf_size_mb": 30,
|
3049
|
+
"input_cost_per_image": 0.00002,
|
3050
|
+
"input_cost_per_video_per_second": 0.00002,
|
3051
|
+
"input_cost_per_audio_per_second": 0.000002,
|
3052
|
+
"input_cost_per_token": 0.000000075,
|
3053
|
+
"input_cost_per_character": 0.00000001875,
|
3054
|
+
"input_cost_per_token_above_128k_tokens": 0.000001,
|
3055
|
+
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
3056
|
+
"input_cost_per_image_above_128k_tokens": 0.00004,
|
3057
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
3058
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
3059
|
+
"output_cost_per_token": 0.0000003,
|
3060
|
+
"output_cost_per_character": 0.000000075,
|
3061
|
+
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
3062
|
+
"output_cost_per_character_above_128k_tokens": 0.00000015,
|
3063
|
+
"litellm_provider": "vertex_ai-language-models",
|
3064
|
+
"mode": "chat",
|
3065
|
+
"supports_system_messages": true,
|
3066
|
+
"supports_function_calling": true,
|
3067
|
+
"supports_vision": true,
|
3068
|
+
"supports_response_schema": true,
|
3069
|
+
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash"
|
3070
|
+
},
|
2145
3071
|
"gemini-1.5-flash-001": {
|
2146
3072
|
"max_tokens": 8192,
|
2147
3073
|
"max_input_tokens": 1000000,
|
@@ -2152,20 +3078,20 @@
|
|
2152
3078
|
"max_audio_length_hours": 8.4,
|
2153
3079
|
"max_audio_per_prompt": 1,
|
2154
3080
|
"max_pdf_size_mb": 30,
|
2155
|
-
"input_cost_per_image": 0.
|
2156
|
-
"input_cost_per_video_per_second": 0.
|
2157
|
-
"input_cost_per_audio_per_second": 0.
|
2158
|
-
"input_cost_per_token": 0.
|
2159
|
-
"input_cost_per_character": 0.
|
3081
|
+
"input_cost_per_image": 0.00002,
|
3082
|
+
"input_cost_per_video_per_second": 0.00002,
|
3083
|
+
"input_cost_per_audio_per_second": 0.000002,
|
3084
|
+
"input_cost_per_token": 0.000000075,
|
3085
|
+
"input_cost_per_character": 0.00000001875,
|
2160
3086
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
2161
3087
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
2162
|
-
"
|
2163
|
-
"
|
2164
|
-
"
|
2165
|
-
"
|
2166
|
-
"
|
2167
|
-
"
|
2168
|
-
"
|
3088
|
+
"input_cost_per_image_above_128k_tokens": 0.00004,
|
3089
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
3090
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
3091
|
+
"output_cost_per_token": 0.0000003,
|
3092
|
+
"output_cost_per_character": 0.000000075,
|
3093
|
+
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
3094
|
+
"output_cost_per_character_above_128k_tokens": 0.00000015,
|
2169
3095
|
"litellm_provider": "vertex_ai-language-models",
|
2170
3096
|
"mode": "chat",
|
2171
3097
|
"supports_system_messages": true,
|
@@ -2184,20 +3110,20 @@
|
|
2184
3110
|
"max_audio_length_hours": 8.4,
|
2185
3111
|
"max_audio_per_prompt": 1,
|
2186
3112
|
"max_pdf_size_mb": 30,
|
2187
|
-
"input_cost_per_image": 0.
|
2188
|
-
"input_cost_per_video_per_second": 0.
|
2189
|
-
"input_cost_per_audio_per_second": 0.
|
2190
|
-
"input_cost_per_token": 0.
|
2191
|
-
"input_cost_per_character": 0.
|
3113
|
+
"input_cost_per_image": 0.00002,
|
3114
|
+
"input_cost_per_video_per_second": 0.00002,
|
3115
|
+
"input_cost_per_audio_per_second": 0.000002,
|
3116
|
+
"input_cost_per_token": 0.000000075,
|
3117
|
+
"input_cost_per_character": 0.00000001875,
|
2192
3118
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
2193
3119
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
2194
|
-
"
|
2195
|
-
"
|
2196
|
-
"
|
2197
|
-
"
|
2198
|
-
"
|
2199
|
-
"
|
2200
|
-
"
|
3120
|
+
"input_cost_per_image_above_128k_tokens": 0.00004,
|
3121
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
3122
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
3123
|
+
"output_cost_per_token": 0.0000000046875,
|
3124
|
+
"output_cost_per_character": 0.00000001875,
|
3125
|
+
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
3126
|
+
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
2201
3127
|
"litellm_provider": "vertex_ai-language-models",
|
2202
3128
|
"mode": "chat",
|
2203
3129
|
"supports_system_messages": true,
|
@@ -2219,7 +3145,7 @@
|
|
2219
3145
|
"supports_tool_choice": true,
|
2220
3146
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental"
|
2221
3147
|
},
|
2222
|
-
"gemini-
|
3148
|
+
"gemini-flash-experimental": {
|
2223
3149
|
"max_tokens": 8192,
|
2224
3150
|
"max_input_tokens": 1000000,
|
2225
3151
|
"max_output_tokens": 8192,
|
@@ -2298,6 +3224,86 @@
|
|
2298
3224
|
"mode": "chat",
|
2299
3225
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
2300
3226
|
},
|
3227
|
+
"gemini-2.0-flash-exp": {
|
3228
|
+
"max_tokens": 8192,
|
3229
|
+
"max_input_tokens": 1048576,
|
3230
|
+
"max_output_tokens": 8192,
|
3231
|
+
"max_images_per_prompt": 3000,
|
3232
|
+
"max_videos_per_prompt": 10,
|
3233
|
+
"max_video_length": 1,
|
3234
|
+
"max_audio_length_hours": 8.4,
|
3235
|
+
"max_audio_per_prompt": 1,
|
3236
|
+
"max_pdf_size_mb": 30,
|
3237
|
+
"input_cost_per_image": 0,
|
3238
|
+
"input_cost_per_video_per_second": 0,
|
3239
|
+
"input_cost_per_audio_per_second": 0,
|
3240
|
+
"input_cost_per_token": 0,
|
3241
|
+
"input_cost_per_character": 0,
|
3242
|
+
"input_cost_per_token_above_128k_tokens": 0,
|
3243
|
+
"input_cost_per_character_above_128k_tokens": 0,
|
3244
|
+
"input_cost_per_image_above_128k_tokens": 0,
|
3245
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
3246
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
3247
|
+
"output_cost_per_token": 0,
|
3248
|
+
"output_cost_per_character": 0,
|
3249
|
+
"output_cost_per_token_above_128k_tokens": 0,
|
3250
|
+
"output_cost_per_character_above_128k_tokens": 0,
|
3251
|
+
"litellm_provider": "vertex_ai-language-models",
|
3252
|
+
"mode": "chat",
|
3253
|
+
"supports_system_messages": true,
|
3254
|
+
"supports_function_calling": true,
|
3255
|
+
"supports_vision": true,
|
3256
|
+
"supports_response_schema": true,
|
3257
|
+
"supports_audio_output": true,
|
3258
|
+
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
|
3259
|
+
},
|
3260
|
+
"gemini/gemini-2.0-flash-exp": {
|
3261
|
+
"max_tokens": 8192,
|
3262
|
+
"max_input_tokens": 1048576,
|
3263
|
+
"max_output_tokens": 8192,
|
3264
|
+
"max_images_per_prompt": 3000,
|
3265
|
+
"max_videos_per_prompt": 10,
|
3266
|
+
"max_video_length": 1,
|
3267
|
+
"max_audio_length_hours": 8.4,
|
3268
|
+
"max_audio_per_prompt": 1,
|
3269
|
+
"max_pdf_size_mb": 30,
|
3270
|
+
"input_cost_per_image": 0,
|
3271
|
+
"input_cost_per_video_per_second": 0,
|
3272
|
+
"input_cost_per_audio_per_second": 0,
|
3273
|
+
"input_cost_per_token": 0,
|
3274
|
+
"input_cost_per_character": 0,
|
3275
|
+
"input_cost_per_token_above_128k_tokens": 0,
|
3276
|
+
"input_cost_per_character_above_128k_tokens": 0,
|
3277
|
+
"input_cost_per_image_above_128k_tokens": 0,
|
3278
|
+
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
3279
|
+
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
3280
|
+
"output_cost_per_token": 0,
|
3281
|
+
"output_cost_per_character": 0,
|
3282
|
+
"output_cost_per_token_above_128k_tokens": 0,
|
3283
|
+
"output_cost_per_character_above_128k_tokens": 0,
|
3284
|
+
"litellm_provider": "gemini",
|
3285
|
+
"mode": "chat",
|
3286
|
+
"supports_system_messages": true,
|
3287
|
+
"supports_function_calling": true,
|
3288
|
+
"supports_vision": true,
|
3289
|
+
"supports_response_schema": true,
|
3290
|
+
"supports_audio_output": true,
|
3291
|
+
"tpm": 4000000,
|
3292
|
+
"rpm": 10,
|
3293
|
+
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
|
3294
|
+
},
|
3295
|
+
"vertex_ai/claude-3-sonnet": {
|
3296
|
+
"max_tokens": 4096,
|
3297
|
+
"max_input_tokens": 200000,
|
3298
|
+
"max_output_tokens": 4096,
|
3299
|
+
"input_cost_per_token": 0.000003,
|
3300
|
+
"output_cost_per_token": 0.000015,
|
3301
|
+
"litellm_provider": "vertex_ai-anthropic_models",
|
3302
|
+
"mode": "chat",
|
3303
|
+
"supports_function_calling": true,
|
3304
|
+
"supports_vision": true,
|
3305
|
+
"supports_assistant_prefill": true
|
3306
|
+
},
|
2301
3307
|
"vertex_ai/claude-3-sonnet@20240229": {
|
2302
3308
|
"max_tokens": 4096,
|
2303
3309
|
"max_input_tokens": 200000,
|
@@ -2310,6 +3316,18 @@
|
|
2310
3316
|
"supports_vision": true,
|
2311
3317
|
"supports_assistant_prefill": true
|
2312
3318
|
},
|
3319
|
+
"vertex_ai/claude-3-5-sonnet": {
|
3320
|
+
"max_tokens": 8192,
|
3321
|
+
"max_input_tokens": 200000,
|
3322
|
+
"max_output_tokens": 8192,
|
3323
|
+
"input_cost_per_token": 0.000003,
|
3324
|
+
"output_cost_per_token": 0.000015,
|
3325
|
+
"litellm_provider": "vertex_ai-anthropic_models",
|
3326
|
+
"mode": "chat",
|
3327
|
+
"supports_function_calling": true,
|
3328
|
+
"supports_vision": true,
|
3329
|
+
"supports_assistant_prefill": true
|
3330
|
+
},
|
2313
3331
|
"vertex_ai/claude-3-5-sonnet@20240620": {
|
2314
3332
|
"max_tokens": 8192,
|
2315
3333
|
"max_input_tokens": 200000,
|
@@ -2322,6 +3340,42 @@
|
|
2322
3340
|
"supports_vision": true,
|
2323
3341
|
"supports_assistant_prefill": true
|
2324
3342
|
},
|
3343
|
+
"vertex_ai/claude-3-5-sonnet-v2": {
|
3344
|
+
"max_tokens": 8192,
|
3345
|
+
"max_input_tokens": 200000,
|
3346
|
+
"max_output_tokens": 8192,
|
3347
|
+
"input_cost_per_token": 0.000003,
|
3348
|
+
"output_cost_per_token": 0.000015,
|
3349
|
+
"litellm_provider": "vertex_ai-anthropic_models",
|
3350
|
+
"mode": "chat",
|
3351
|
+
"supports_function_calling": true,
|
3352
|
+
"supports_vision": true,
|
3353
|
+
"supports_assistant_prefill": true
|
3354
|
+
},
|
3355
|
+
"vertex_ai/claude-3-5-sonnet-v2@20241022": {
|
3356
|
+
"max_tokens": 8192,
|
3357
|
+
"max_input_tokens": 200000,
|
3358
|
+
"max_output_tokens": 8192,
|
3359
|
+
"input_cost_per_token": 0.000003,
|
3360
|
+
"output_cost_per_token": 0.000015,
|
3361
|
+
"litellm_provider": "vertex_ai-anthropic_models",
|
3362
|
+
"mode": "chat",
|
3363
|
+
"supports_function_calling": true,
|
3364
|
+
"supports_vision": true,
|
3365
|
+
"supports_assistant_prefill": true
|
3366
|
+
},
|
3367
|
+
"vertex_ai/claude-3-haiku": {
|
3368
|
+
"max_tokens": 4096,
|
3369
|
+
"max_input_tokens": 200000,
|
3370
|
+
"max_output_tokens": 4096,
|
3371
|
+
"input_cost_per_token": 0.00000025,
|
3372
|
+
"output_cost_per_token": 0.00000125,
|
3373
|
+
"litellm_provider": "vertex_ai-anthropic_models",
|
3374
|
+
"mode": "chat",
|
3375
|
+
"supports_function_calling": true,
|
3376
|
+
"supports_vision": true,
|
3377
|
+
"supports_assistant_prefill": true
|
3378
|
+
},
|
2325
3379
|
"vertex_ai/claude-3-haiku@20240307": {
|
2326
3380
|
"max_tokens": 4096,
|
2327
3381
|
"max_input_tokens": 200000,
|
@@ -2334,6 +3388,40 @@
|
|
2334
3388
|
"supports_vision": true,
|
2335
3389
|
"supports_assistant_prefill": true
|
2336
3390
|
},
|
3391
|
+
"vertex_ai/claude-3-5-haiku": {
|
3392
|
+
"max_tokens": 8192,
|
3393
|
+
"max_input_tokens": 200000,
|
3394
|
+
"max_output_tokens": 8192,
|
3395
|
+
"input_cost_per_token": 0.000001,
|
3396
|
+
"output_cost_per_token": 0.000005,
|
3397
|
+
"litellm_provider": "vertex_ai-anthropic_models",
|
3398
|
+
"mode": "chat",
|
3399
|
+
"supports_function_calling": true,
|
3400
|
+
"supports_assistant_prefill": true
|
3401
|
+
},
|
3402
|
+
"vertex_ai/claude-3-5-haiku@20241022": {
|
3403
|
+
"max_tokens": 8192,
|
3404
|
+
"max_input_tokens": 200000,
|
3405
|
+
"max_output_tokens": 8192,
|
3406
|
+
"input_cost_per_token": 0.000001,
|
3407
|
+
"output_cost_per_token": 0.000005,
|
3408
|
+
"litellm_provider": "vertex_ai-anthropic_models",
|
3409
|
+
"mode": "chat",
|
3410
|
+
"supports_function_calling": true,
|
3411
|
+
"supports_assistant_prefill": true
|
3412
|
+
},
|
3413
|
+
"vertex_ai/claude-3-opus": {
|
3414
|
+
"max_tokens": 4096,
|
3415
|
+
"max_input_tokens": 200000,
|
3416
|
+
"max_output_tokens": 4096,
|
3417
|
+
"input_cost_per_token": 0.000015,
|
3418
|
+
"output_cost_per_token": 0.000075,
|
3419
|
+
"litellm_provider": "vertex_ai-anthropic_models",
|
3420
|
+
"mode": "chat",
|
3421
|
+
"supports_function_calling": true,
|
3422
|
+
"supports_vision": true,
|
3423
|
+
"supports_assistant_prefill": true
|
3424
|
+
},
|
2337
3425
|
"vertex_ai/claude-3-opus@20240229": {
|
2338
3426
|
"max_tokens": 4096,
|
2339
3427
|
"max_input_tokens": 200000,
|
@@ -2376,12 +3464,44 @@
|
|
2376
3464
|
"mode": "chat",
|
2377
3465
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
|
2378
3466
|
},
|
3467
|
+
"vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": {
|
3468
|
+
"max_tokens": 128000,
|
3469
|
+
"max_input_tokens": 128000,
|
3470
|
+
"max_output_tokens": 2048,
|
3471
|
+
"input_cost_per_token": 0.0,
|
3472
|
+
"output_cost_per_token": 0.0,
|
3473
|
+
"litellm_provider": "vertex_ai-llama_models",
|
3474
|
+
"mode": "chat",
|
3475
|
+
"supports_system_messages": true,
|
3476
|
+
"supports_vision": true,
|
3477
|
+
"source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas"
|
3478
|
+
},
|
2379
3479
|
"vertex_ai/mistral-large@latest": {
|
2380
3480
|
"max_tokens": 8191,
|
2381
3481
|
"max_input_tokens": 128000,
|
2382
3482
|
"max_output_tokens": 8191,
|
2383
|
-
"input_cost_per_token": 0.
|
2384
|
-
"output_cost_per_token": 0.
|
3483
|
+
"input_cost_per_token": 0.000002,
|
3484
|
+
"output_cost_per_token": 0.000006,
|
3485
|
+
"litellm_provider": "vertex_ai-mistral_models",
|
3486
|
+
"mode": "chat",
|
3487
|
+
"supports_function_calling": true
|
3488
|
+
},
|
3489
|
+
"vertex_ai/mistral-large@2411-001": {
|
3490
|
+
"max_tokens": 8191,
|
3491
|
+
"max_input_tokens": 128000,
|
3492
|
+
"max_output_tokens": 8191,
|
3493
|
+
"input_cost_per_token": 0.000002,
|
3494
|
+
"output_cost_per_token": 0.000006,
|
3495
|
+
"litellm_provider": "vertex_ai-mistral_models",
|
3496
|
+
"mode": "chat",
|
3497
|
+
"supports_function_calling": true
|
3498
|
+
},
|
3499
|
+
"vertex_ai/mistral-large-2411": {
|
3500
|
+
"max_tokens": 8191,
|
3501
|
+
"max_input_tokens": 128000,
|
3502
|
+
"max_output_tokens": 8191,
|
3503
|
+
"input_cost_per_token": 0.000002,
|
3504
|
+
"output_cost_per_token": 0.000006,
|
2385
3505
|
"litellm_provider": "vertex_ai-mistral_models",
|
2386
3506
|
"mode": "chat",
|
2387
3507
|
"supports_function_calling": true
|
@@ -2390,8 +3510,8 @@
|
|
2390
3510
|
"max_tokens": 8191,
|
2391
3511
|
"max_input_tokens": 128000,
|
2392
3512
|
"max_output_tokens": 8191,
|
2393
|
-
"input_cost_per_token": 0.
|
2394
|
-
"output_cost_per_token": 0.
|
3513
|
+
"input_cost_per_token": 0.000002,
|
3514
|
+
"output_cost_per_token": 0.000006,
|
2395
3515
|
"litellm_provider": "vertex_ai-mistral_models",
|
2396
3516
|
"mode": "chat",
|
2397
3517
|
"supports_function_calling": true
|
@@ -2400,8 +3520,8 @@
|
|
2400
3520
|
"max_tokens": 128000,
|
2401
3521
|
"max_input_tokens": 128000,
|
2402
3522
|
"max_output_tokens": 128000,
|
2403
|
-
"input_cost_per_token": 0.
|
2404
|
-
"output_cost_per_token": 0.
|
3523
|
+
"input_cost_per_token": 0.00000015,
|
3524
|
+
"output_cost_per_token": 0.00000015,
|
2405
3525
|
"litellm_provider": "vertex_ai-mistral_models",
|
2406
3526
|
"mode": "chat",
|
2407
3527
|
"supports_function_calling": true
|
@@ -2465,8 +3585,8 @@
|
|
2465
3585
|
"max_tokens": 128000,
|
2466
3586
|
"max_input_tokens": 128000,
|
2467
3587
|
"max_output_tokens": 128000,
|
2468
|
-
"input_cost_per_token": 0.
|
2469
|
-
"output_cost_per_token": 0.
|
3588
|
+
"input_cost_per_token": 0.0000002,
|
3589
|
+
"output_cost_per_token": 0.0000006,
|
2470
3590
|
"litellm_provider": "vertex_ai-mistral_models",
|
2471
3591
|
"mode": "chat",
|
2472
3592
|
"supports_function_calling": true
|
@@ -2475,35 +3595,47 @@
|
|
2475
3595
|
"max_tokens": 128000,
|
2476
3596
|
"max_input_tokens": 128000,
|
2477
3597
|
"max_output_tokens": 128000,
|
2478
|
-
"input_cost_per_token": 0.
|
2479
|
-
"output_cost_per_token": 0.
|
3598
|
+
"input_cost_per_token": 0.0000002,
|
3599
|
+
"output_cost_per_token": 0.0000006,
|
2480
3600
|
"litellm_provider": "vertex_ai-mistral_models",
|
2481
3601
|
"mode": "chat",
|
2482
3602
|
"supports_function_calling": true
|
2483
3603
|
},
|
2484
3604
|
"vertex_ai/imagegeneration@006": {
|
2485
|
-
"
|
3605
|
+
"output_cost_per_image": 0.020,
|
2486
3606
|
"litellm_provider": "vertex_ai-image-models",
|
2487
3607
|
"mode": "image_generation",
|
2488
3608
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
2489
3609
|
},
|
2490
3610
|
"vertex_ai/imagen-3.0-generate-001": {
|
2491
|
-
"
|
3611
|
+
"output_cost_per_image": 0.04,
|
2492
3612
|
"litellm_provider": "vertex_ai-image-models",
|
2493
3613
|
"mode": "image_generation",
|
2494
3614
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
2495
3615
|
},
|
2496
3616
|
"vertex_ai/imagen-3.0-fast-generate-001": {
|
2497
|
-
"
|
3617
|
+
"output_cost_per_image": 0.02,
|
2498
3618
|
"litellm_provider": "vertex_ai-image-models",
|
2499
3619
|
"mode": "image_generation",
|
2500
3620
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
2501
3621
|
},
|
2502
3622
|
"text-embedding-004": {
|
2503
|
-
"max_tokens":
|
2504
|
-
"max_input_tokens":
|
3623
|
+
"max_tokens": 2048,
|
3624
|
+
"max_input_tokens": 2048,
|
2505
3625
|
"output_vector_size": 768,
|
2506
|
-
"
|
3626
|
+
"input_cost_per_character": 0.000000025,
|
3627
|
+
"input_cost_per_token": 0.0000001,
|
3628
|
+
"output_cost_per_token": 0,
|
3629
|
+
"litellm_provider": "vertex_ai-embedding-models",
|
3630
|
+
"mode": "embedding",
|
3631
|
+
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
3632
|
+
},
|
3633
|
+
"text-embedding-005": {
|
3634
|
+
"max_tokens": 2048,
|
3635
|
+
"max_input_tokens": 2048,
|
3636
|
+
"output_vector_size": 768,
|
3637
|
+
"input_cost_per_character": 0.000000025,
|
3638
|
+
"input_cost_per_token": 0.0000001,
|
2507
3639
|
"output_cost_per_token": 0,
|
2508
3640
|
"litellm_provider": "vertex_ai-embedding-models",
|
2509
3641
|
"mode": "embedding",
|
@@ -2513,7 +3645,8 @@
|
|
2513
3645
|
"max_tokens": 2048,
|
2514
3646
|
"max_input_tokens": 2048,
|
2515
3647
|
"output_vector_size": 768,
|
2516
|
-
"
|
3648
|
+
"input_cost_per_character": 0.000000025,
|
3649
|
+
"input_cost_per_token": 0.0000001,
|
2517
3650
|
"output_cost_per_token": 0,
|
2518
3651
|
"litellm_provider": "vertex_ai-embedding-models",
|
2519
3652
|
"mode": "embedding",
|
@@ -2523,7 +3656,8 @@
|
|
2523
3656
|
"max_tokens": 3072,
|
2524
3657
|
"max_input_tokens": 3072,
|
2525
3658
|
"output_vector_size": 768,
|
2526
|
-
"
|
3659
|
+
"input_cost_per_character": 0.000000025,
|
3660
|
+
"input_cost_per_token": 0.0000001,
|
2527
3661
|
"output_cost_per_token": 0,
|
2528
3662
|
"litellm_provider": "vertex_ai-embedding-models",
|
2529
3663
|
"mode": "embedding",
|
@@ -2533,7 +3667,8 @@
|
|
2533
3667
|
"max_tokens": 3072,
|
2534
3668
|
"max_input_tokens": 3072,
|
2535
3669
|
"output_vector_size": 768,
|
2536
|
-
"
|
3670
|
+
"input_cost_per_character": 0.000000025,
|
3671
|
+
"input_cost_per_token": 0.0000001,
|
2537
3672
|
"output_cost_per_token": 0,
|
2538
3673
|
"litellm_provider": "vertex_ai-embedding-models",
|
2539
3674
|
"mode": "embedding",
|
@@ -2543,7 +3678,8 @@
|
|
2543
3678
|
"max_tokens": 3072,
|
2544
3679
|
"max_input_tokens": 3072,
|
2545
3680
|
"output_vector_size": 768,
|
2546
|
-
"
|
3681
|
+
"input_cost_per_character": 0.000000025,
|
3682
|
+
"input_cost_per_token": 0.0000001,
|
2547
3683
|
"output_cost_per_token": 0,
|
2548
3684
|
"litellm_provider": "vertex_ai-embedding-models",
|
2549
3685
|
"mode": "embedding",
|
@@ -2553,7 +3689,8 @@
|
|
2553
3689
|
"max_tokens": 3072,
|
2554
3690
|
"max_input_tokens": 3072,
|
2555
3691
|
"output_vector_size": 768,
|
2556
|
-
"
|
3692
|
+
"input_cost_per_character": 0.000000025,
|
3693
|
+
"input_cost_per_token": 0.0000001,
|
2557
3694
|
"output_cost_per_token": 0,
|
2558
3695
|
"litellm_provider": "vertex_ai-embedding-models",
|
2559
3696
|
"mode": "embedding",
|
@@ -2563,7 +3700,8 @@
|
|
2563
3700
|
"max_tokens": 3072,
|
2564
3701
|
"max_input_tokens": 3072,
|
2565
3702
|
"output_vector_size": 768,
|
2566
|
-
"
|
3703
|
+
"input_cost_per_character": 0.000000025,
|
3704
|
+
"input_cost_per_token": 0.0000001,
|
2567
3705
|
"output_cost_per_token": 0,
|
2568
3706
|
"litellm_provider": "vertex_ai-embedding-models",
|
2569
3707
|
"mode": "embedding",
|
@@ -2650,9 +3788,36 @@
|
|
2650
3788
|
"mode": "completion",
|
2651
3789
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
2652
3790
|
},
|
3791
|
+
"gemini/gemini-1.5-flash-002": {
|
3792
|
+
"max_tokens": 8192,
|
3793
|
+
"max_input_tokens": 1048576,
|
3794
|
+
"max_output_tokens": 8192,
|
3795
|
+
"max_images_per_prompt": 3000,
|
3796
|
+
"max_videos_per_prompt": 10,
|
3797
|
+
"max_video_length": 1,
|
3798
|
+
"max_audio_length_hours": 8.4,
|
3799
|
+
"max_audio_per_prompt": 1,
|
3800
|
+
"max_pdf_size_mb": 30,
|
3801
|
+
"cache_read_input_token_cost": 0.00000001875,
|
3802
|
+
"cache_creation_input_token_cost": 0.000001,
|
3803
|
+
"input_cost_per_token": 0.000000075,
|
3804
|
+
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
3805
|
+
"output_cost_per_token": 0.0000003,
|
3806
|
+
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
3807
|
+
"litellm_provider": "gemini",
|
3808
|
+
"mode": "chat",
|
3809
|
+
"supports_system_messages": true,
|
3810
|
+
"supports_function_calling": true,
|
3811
|
+
"supports_vision": true,
|
3812
|
+
"supports_response_schema": true,
|
3813
|
+
"supports_prompt_caching": true,
|
3814
|
+
"tpm": 4000000,
|
3815
|
+
"rpm": 2000,
|
3816
|
+
"source": "https://ai.google.dev/pricing"
|
3817
|
+
},
|
2653
3818
|
"gemini/gemini-1.5-flash-001": {
|
2654
3819
|
"max_tokens": 8192,
|
2655
|
-
"max_input_tokens":
|
3820
|
+
"max_input_tokens": 1048576,
|
2656
3821
|
"max_output_tokens": 8192,
|
2657
3822
|
"max_images_per_prompt": 3000,
|
2658
3823
|
"max_videos_per_prompt": 10,
|
@@ -2660,6 +3825,8 @@
|
|
2660
3825
|
"max_audio_length_hours": 8.4,
|
2661
3826
|
"max_audio_per_prompt": 1,
|
2662
3827
|
"max_pdf_size_mb": 30,
|
3828
|
+
"cache_read_input_token_cost": 0.00000001875,
|
3829
|
+
"cache_creation_input_token_cost": 0.000001,
|
2663
3830
|
"input_cost_per_token": 0.000000075,
|
2664
3831
|
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
2665
3832
|
"output_cost_per_token": 0.0000003,
|
@@ -2671,11 +3838,13 @@
|
|
2671
3838
|
"supports_vision": true,
|
2672
3839
|
"supports_response_schema": true,
|
2673
3840
|
"supports_prompt_caching": true,
|
3841
|
+
"tpm": 4000000,
|
3842
|
+
"rpm": 2000,
|
2674
3843
|
"source": "https://ai.google.dev/pricing"
|
2675
3844
|
},
|
2676
3845
|
"gemini/gemini-1.5-flash": {
|
2677
3846
|
"max_tokens": 8192,
|
2678
|
-
"max_input_tokens":
|
3847
|
+
"max_input_tokens": 1048576,
|
2679
3848
|
"max_output_tokens": 8192,
|
2680
3849
|
"max_images_per_prompt": 3000,
|
2681
3850
|
"max_videos_per_prompt": 10,
|
@@ -2693,11 +3862,13 @@
|
|
2693
3862
|
"supports_function_calling": true,
|
2694
3863
|
"supports_vision": true,
|
2695
3864
|
"supports_response_schema": true,
|
3865
|
+
"tpm": 4000000,
|
3866
|
+
"rpm": 2000,
|
2696
3867
|
"source": "https://ai.google.dev/pricing"
|
2697
3868
|
},
|
2698
3869
|
"gemini/gemini-1.5-flash-latest": {
|
2699
3870
|
"max_tokens": 8192,
|
2700
|
-
"max_input_tokens":
|
3871
|
+
"max_input_tokens": 1048576,
|
2701
3872
|
"max_output_tokens": 8192,
|
2702
3873
|
"max_images_per_prompt": 3000,
|
2703
3874
|
"max_videos_per_prompt": 10,
|
@@ -2715,11 +3886,118 @@
|
|
2715
3886
|
"supports_function_calling": true,
|
2716
3887
|
"supports_vision": true,
|
2717
3888
|
"supports_response_schema": true,
|
3889
|
+
"supports_prompt_caching": true,
|
3890
|
+
"tpm": 4000000,
|
3891
|
+
"rpm": 2000,
|
3892
|
+
"source": "https://ai.google.dev/pricing"
|
3893
|
+
},
|
3894
|
+
"gemini/gemini-1.5-flash-8b": {
|
3895
|
+
"max_tokens": 8192,
|
3896
|
+
"max_input_tokens": 1048576,
|
3897
|
+
"max_output_tokens": 8192,
|
3898
|
+
"max_images_per_prompt": 3000,
|
3899
|
+
"max_videos_per_prompt": 10,
|
3900
|
+
"max_video_length": 1,
|
3901
|
+
"max_audio_length_hours": 8.4,
|
3902
|
+
"max_audio_per_prompt": 1,
|
3903
|
+
"max_pdf_size_mb": 30,
|
3904
|
+
"input_cost_per_token": 0,
|
3905
|
+
"input_cost_per_token_above_128k_tokens": 0,
|
3906
|
+
"output_cost_per_token": 0,
|
3907
|
+
"output_cost_per_token_above_128k_tokens": 0,
|
3908
|
+
"litellm_provider": "gemini",
|
3909
|
+
"mode": "chat",
|
3910
|
+
"supports_system_messages": true,
|
3911
|
+
"supports_function_calling": true,
|
3912
|
+
"supports_vision": true,
|
3913
|
+
"supports_response_schema": true,
|
3914
|
+
"supports_prompt_caching": true,
|
3915
|
+
"tpm": 4000000,
|
3916
|
+
"rpm": 4000,
|
3917
|
+
"source": "https://ai.google.dev/pricing"
|
3918
|
+
},
|
3919
|
+
"gemini/gemini-1.5-flash-8b-exp-0924": {
|
3920
|
+
"max_tokens": 8192,
|
3921
|
+
"max_input_tokens": 1048576,
|
3922
|
+
"max_output_tokens": 8192,
|
3923
|
+
"max_images_per_prompt": 3000,
|
3924
|
+
"max_videos_per_prompt": 10,
|
3925
|
+
"max_video_length": 1,
|
3926
|
+
"max_audio_length_hours": 8.4,
|
3927
|
+
"max_audio_per_prompt": 1,
|
3928
|
+
"max_pdf_size_mb": 30,
|
3929
|
+
"input_cost_per_token": 0,
|
3930
|
+
"input_cost_per_token_above_128k_tokens": 0,
|
3931
|
+
"output_cost_per_token": 0,
|
3932
|
+
"output_cost_per_token_above_128k_tokens": 0,
|
3933
|
+
"litellm_provider": "gemini",
|
3934
|
+
"mode": "chat",
|
3935
|
+
"supports_system_messages": true,
|
3936
|
+
"supports_function_calling": true,
|
3937
|
+
"supports_vision": true,
|
3938
|
+
"supports_response_schema": true,
|
3939
|
+
"supports_prompt_caching": true,
|
3940
|
+
"tpm": 4000000,
|
3941
|
+
"rpm": 4000,
|
2718
3942
|
"source": "https://ai.google.dev/pricing"
|
2719
3943
|
},
|
3944
|
+
"gemini/gemini-exp-1114": {
|
3945
|
+
"max_tokens": 8192,
|
3946
|
+
"max_input_tokens": 1048576,
|
3947
|
+
"max_output_tokens": 8192,
|
3948
|
+
"max_images_per_prompt": 3000,
|
3949
|
+
"max_videos_per_prompt": 10,
|
3950
|
+
"max_video_length": 1,
|
3951
|
+
"max_audio_length_hours": 8.4,
|
3952
|
+
"max_audio_per_prompt": 1,
|
3953
|
+
"max_pdf_size_mb": 30,
|
3954
|
+
"input_cost_per_token": 0,
|
3955
|
+
"input_cost_per_token_above_128k_tokens": 0,
|
3956
|
+
"output_cost_per_token": 0,
|
3957
|
+
"output_cost_per_token_above_128k_tokens": 0,
|
3958
|
+
"litellm_provider": "gemini",
|
3959
|
+
"mode": "chat",
|
3960
|
+
"supports_system_messages": true,
|
3961
|
+
"supports_function_calling": true,
|
3962
|
+
"supports_vision": true,
|
3963
|
+
"supports_response_schema": true,
|
3964
|
+
"tpm": 4000000,
|
3965
|
+
"rpm": 1000,
|
3966
|
+
"source": "https://ai.google.dev/pricing",
|
3967
|
+
"metadata": {
|
3968
|
+
"notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro."
|
3969
|
+
}
|
3970
|
+
},
|
3971
|
+
"gemini/gemini-exp-1206": {
|
3972
|
+
"max_tokens": 8192,
|
3973
|
+
"max_input_tokens": 2097152,
|
3974
|
+
"max_output_tokens": 8192,
|
3975
|
+
"max_images_per_prompt": 3000,
|
3976
|
+
"max_videos_per_prompt": 10,
|
3977
|
+
"max_video_length": 1,
|
3978
|
+
"max_audio_length_hours": 8.4,
|
3979
|
+
"max_audio_per_prompt": 1,
|
3980
|
+
"max_pdf_size_mb": 30,
|
3981
|
+
"input_cost_per_token": 0,
|
3982
|
+
"input_cost_per_token_above_128k_tokens": 0,
|
3983
|
+
"output_cost_per_token": 0,
|
3984
|
+
"output_cost_per_token_above_128k_tokens": 0,
|
3985
|
+
"litellm_provider": "gemini",
|
3986
|
+
"mode": "chat",
|
3987
|
+
"supports_system_messages": true,
|
3988
|
+
"supports_function_calling": true,
|
3989
|
+
"supports_vision": true,
|
3990
|
+
"supports_response_schema": true,
|
3991
|
+
"tpm": 4000000,
|
3992
|
+
"rpm": 1000,
|
3993
|
+
"source": "https://ai.google.dev/pricing",
|
3994
|
+
"metadata": {
|
3995
|
+
"notes": "Rate limits not documented for gemini-exp-1206. Assuming same as gemini-1.5-pro."
|
3996
|
+
}
|
3997
|
+
},
|
2720
3998
|
"gemini/gemini-1.5-flash-exp-0827": {
|
2721
3999
|
"max_tokens": 8192,
|
2722
|
-
"max_input_tokens":
|
4000
|
+
"max_input_tokens": 1048576,
|
2723
4001
|
"max_output_tokens": 8192,
|
2724
4002
|
"max_images_per_prompt": 3000,
|
2725
4003
|
"max_videos_per_prompt": 10,
|
@@ -2737,6 +4015,8 @@
|
|
2737
4015
|
"supports_function_calling": true,
|
2738
4016
|
"supports_vision": true,
|
2739
4017
|
"supports_response_schema": true,
|
4018
|
+
"tpm": 4000000,
|
4019
|
+
"rpm": 2000,
|
2740
4020
|
"source": "https://ai.google.dev/pricing"
|
2741
4021
|
},
|
2742
4022
|
"gemini/gemini-1.5-flash-8b-exp-0827": {
|
@@ -2758,6 +4038,9 @@
|
|
2758
4038
|
"supports_system_messages": true,
|
2759
4039
|
"supports_function_calling": true,
|
2760
4040
|
"supports_vision": true,
|
4041
|
+
"supports_response_schema": true,
|
4042
|
+
"tpm": 4000000,
|
4043
|
+
"rpm": 4000,
|
2761
4044
|
"source": "https://ai.google.dev/pricing"
|
2762
4045
|
},
|
2763
4046
|
"gemini/gemini-pro": {
|
@@ -2771,7 +4054,10 @@
|
|
2771
4054
|
"litellm_provider": "gemini",
|
2772
4055
|
"mode": "chat",
|
2773
4056
|
"supports_function_calling": true,
|
2774
|
-
"
|
4057
|
+
"rpd": 30000,
|
4058
|
+
"tpm": 120000,
|
4059
|
+
"rpm": 360,
|
4060
|
+
"source": "https://ai.google.dev/gemini-api/docs/models/gemini"
|
2775
4061
|
},
|
2776
4062
|
"gemini/gemini-1.5-pro": {
|
2777
4063
|
"max_tokens": 8192,
|
@@ -2788,6 +4074,28 @@
|
|
2788
4074
|
"supports_vision": true,
|
2789
4075
|
"supports_tool_choice": true,
|
2790
4076
|
"supports_response_schema": true,
|
4077
|
+
"tpm": 4000000,
|
4078
|
+
"rpm": 1000,
|
4079
|
+
"source": "https://ai.google.dev/pricing"
|
4080
|
+
},
|
4081
|
+
"gemini/gemini-1.5-pro-002": {
|
4082
|
+
"max_tokens": 8192,
|
4083
|
+
"max_input_tokens": 2097152,
|
4084
|
+
"max_output_tokens": 8192,
|
4085
|
+
"input_cost_per_token": 0.0000035,
|
4086
|
+
"input_cost_per_token_above_128k_tokens": 0.000007,
|
4087
|
+
"output_cost_per_token": 0.0000105,
|
4088
|
+
"output_cost_per_token_above_128k_tokens": 0.000021,
|
4089
|
+
"litellm_provider": "gemini",
|
4090
|
+
"mode": "chat",
|
4091
|
+
"supports_system_messages": true,
|
4092
|
+
"supports_function_calling": true,
|
4093
|
+
"supports_vision": true,
|
4094
|
+
"supports_tool_choice": true,
|
4095
|
+
"supports_response_schema": true,
|
4096
|
+
"supports_prompt_caching": true,
|
4097
|
+
"tpm": 4000000,
|
4098
|
+
"rpm": 1000,
|
2791
4099
|
"source": "https://ai.google.dev/pricing"
|
2792
4100
|
},
|
2793
4101
|
"gemini/gemini-1.5-pro-001": {
|
@@ -2806,6 +4114,8 @@
|
|
2806
4114
|
"supports_tool_choice": true,
|
2807
4115
|
"supports_response_schema": true,
|
2808
4116
|
"supports_prompt_caching": true,
|
4117
|
+
"tpm": 4000000,
|
4118
|
+
"rpm": 1000,
|
2809
4119
|
"source": "https://ai.google.dev/pricing"
|
2810
4120
|
},
|
2811
4121
|
"gemini/gemini-1.5-pro-exp-0801": {
|
@@ -2823,6 +4133,8 @@
|
|
2823
4133
|
"supports_vision": true,
|
2824
4134
|
"supports_tool_choice": true,
|
2825
4135
|
"supports_response_schema": true,
|
4136
|
+
"tpm": 4000000,
|
4137
|
+
"rpm": 1000,
|
2826
4138
|
"source": "https://ai.google.dev/pricing"
|
2827
4139
|
},
|
2828
4140
|
"gemini/gemini-1.5-pro-exp-0827": {
|
@@ -2840,6 +4152,8 @@
|
|
2840
4152
|
"supports_vision": true,
|
2841
4153
|
"supports_tool_choice": true,
|
2842
4154
|
"supports_response_schema": true,
|
4155
|
+
"tpm": 4000000,
|
4156
|
+
"rpm": 1000,
|
2843
4157
|
"source": "https://ai.google.dev/pricing"
|
2844
4158
|
},
|
2845
4159
|
"gemini/gemini-1.5-pro-latest": {
|
@@ -2857,6 +4171,8 @@
|
|
2857
4171
|
"supports_vision": true,
|
2858
4172
|
"supports_tool_choice": true,
|
2859
4173
|
"supports_response_schema": true,
|
4174
|
+
"tpm": 4000000,
|
4175
|
+
"rpm": 1000,
|
2860
4176
|
"source": "https://ai.google.dev/pricing"
|
2861
4177
|
},
|
2862
4178
|
"gemini/gemini-pro-vision": {
|
@@ -2871,6 +4187,9 @@
|
|
2871
4187
|
"mode": "chat",
|
2872
4188
|
"supports_function_calling": true,
|
2873
4189
|
"supports_vision": true,
|
4190
|
+
"rpd": 30000,
|
4191
|
+
"tpm": 120000,
|
4192
|
+
"rpm": 360,
|
2874
4193
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
2875
4194
|
},
|
2876
4195
|
"gemini/gemini-gemma-2-27b-it": {
|
@@ -2962,54 +4281,115 @@
|
|
2962
4281
|
"litellm_provider": "cohere",
|
2963
4282
|
"mode": "completion"
|
2964
4283
|
},
|
2965
|
-
"
|
2966
|
-
"max_tokens":
|
2967
|
-
"max_input_tokens":
|
2968
|
-
"
|
2969
|
-
"
|
4284
|
+
"rerank-v3.5": {
|
4285
|
+
"max_tokens": 4096,
|
4286
|
+
"max_input_tokens": 4096,
|
4287
|
+
"max_output_tokens": 4096,
|
4288
|
+
"max_query_tokens": 2048,
|
4289
|
+
"input_cost_per_token": 0.0,
|
4290
|
+
"input_cost_per_query": 0.002,
|
4291
|
+
"output_cost_per_token": 0.0,
|
2970
4292
|
"litellm_provider": "cohere",
|
2971
|
-
"mode": "
|
4293
|
+
"mode": "rerank"
|
4294
|
+
},
|
4295
|
+
"rerank-english-v3.0": {
|
4296
|
+
"max_tokens": 4096,
|
4297
|
+
"max_input_tokens": 4096,
|
4298
|
+
"max_output_tokens": 4096,
|
4299
|
+
"max_query_tokens": 2048,
|
4300
|
+
"input_cost_per_token": 0.0,
|
4301
|
+
"input_cost_per_query": 0.002,
|
4302
|
+
"output_cost_per_token": 0.0,
|
4303
|
+
"litellm_provider": "cohere",
|
4304
|
+
"mode": "rerank"
|
4305
|
+
},
|
4306
|
+
"rerank-multilingual-v3.0": {
|
4307
|
+
"max_tokens": 4096,
|
4308
|
+
"max_input_tokens": 4096,
|
4309
|
+
"max_output_tokens": 4096,
|
4310
|
+
"max_query_tokens": 2048,
|
4311
|
+
"input_cost_per_token": 0.0,
|
4312
|
+
"input_cost_per_query": 0.002,
|
4313
|
+
"output_cost_per_token": 0.0,
|
4314
|
+
"litellm_provider": "cohere",
|
4315
|
+
"mode": "rerank"
|
4316
|
+
},
|
4317
|
+
"rerank-english-v2.0": {
|
4318
|
+
"max_tokens": 4096,
|
4319
|
+
"max_input_tokens": 4096,
|
4320
|
+
"max_output_tokens": 4096,
|
4321
|
+
"max_query_tokens": 2048,
|
4322
|
+
"input_cost_per_token": 0.0,
|
4323
|
+
"input_cost_per_query": 0.002,
|
4324
|
+
"output_cost_per_token": 0.0,
|
4325
|
+
"litellm_provider": "cohere",
|
4326
|
+
"mode": "rerank"
|
4327
|
+
},
|
4328
|
+
"rerank-multilingual-v2.0": {
|
4329
|
+
"max_tokens": 4096,
|
4330
|
+
"max_input_tokens": 4096,
|
4331
|
+
"max_output_tokens": 4096,
|
4332
|
+
"max_query_tokens": 2048,
|
4333
|
+
"input_cost_per_token": 0.0,
|
4334
|
+
"input_cost_per_query": 0.002,
|
4335
|
+
"output_cost_per_token": 0.0,
|
4336
|
+
"litellm_provider": "cohere",
|
4337
|
+
"mode": "rerank"
|
2972
4338
|
},
|
2973
4339
|
"embed-english-light-v3.0": {
|
2974
|
-
"max_tokens":
|
2975
|
-
"max_input_tokens":
|
4340
|
+
"max_tokens": 1024,
|
4341
|
+
"max_input_tokens": 1024,
|
2976
4342
|
"input_cost_per_token": 0.00000010,
|
2977
4343
|
"output_cost_per_token": 0.00000,
|
2978
4344
|
"litellm_provider": "cohere",
|
2979
4345
|
"mode": "embedding"
|
2980
4346
|
},
|
2981
4347
|
"embed-multilingual-v3.0": {
|
2982
|
-
"max_tokens":
|
2983
|
-
"max_input_tokens":
|
4348
|
+
"max_tokens": 1024,
|
4349
|
+
"max_input_tokens": 1024,
|
2984
4350
|
"input_cost_per_token": 0.00000010,
|
2985
4351
|
"output_cost_per_token": 0.00000,
|
2986
4352
|
"litellm_provider": "cohere",
|
2987
4353
|
"mode": "embedding"
|
2988
4354
|
},
|
2989
4355
|
"embed-english-v2.0": {
|
2990
|
-
"max_tokens":
|
2991
|
-
"max_input_tokens":
|
4356
|
+
"max_tokens": 4096,
|
4357
|
+
"max_input_tokens": 4096,
|
2992
4358
|
"input_cost_per_token": 0.00000010,
|
2993
4359
|
"output_cost_per_token": 0.00000,
|
2994
4360
|
"litellm_provider": "cohere",
|
2995
4361
|
"mode": "embedding"
|
2996
4362
|
},
|
2997
4363
|
"embed-english-light-v2.0": {
|
2998
|
-
"max_tokens":
|
2999
|
-
"max_input_tokens":
|
4364
|
+
"max_tokens": 1024,
|
4365
|
+
"max_input_tokens": 1024,
|
3000
4366
|
"input_cost_per_token": 0.00000010,
|
3001
4367
|
"output_cost_per_token": 0.00000,
|
3002
4368
|
"litellm_provider": "cohere",
|
3003
4369
|
"mode": "embedding"
|
3004
4370
|
},
|
3005
4371
|
"embed-multilingual-v2.0": {
|
3006
|
-
"max_tokens":
|
3007
|
-
"max_input_tokens":
|
4372
|
+
"max_tokens": 768,
|
4373
|
+
"max_input_tokens": 768,
|
3008
4374
|
"input_cost_per_token": 0.00000010,
|
3009
4375
|
"output_cost_per_token": 0.00000,
|
3010
4376
|
"litellm_provider": "cohere",
|
3011
4377
|
"mode": "embedding"
|
3012
4378
|
},
|
4379
|
+
"embed-english-v3.0": {
|
4380
|
+
"max_tokens": 1024,
|
4381
|
+
"max_input_tokens": 1024,
|
4382
|
+
"input_cost_per_token": 0.00000010,
|
4383
|
+
"input_cost_per_image": 0.0001,
|
4384
|
+
"output_cost_per_token": 0.00000,
|
4385
|
+
"litellm_provider": "cohere",
|
4386
|
+
"mode": "embedding",
|
4387
|
+
"supports_image_input": true,
|
4388
|
+
"supports_embedding_image_input": true,
|
4389
|
+
"metadata": {
|
4390
|
+
"notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead."
|
4391
|
+
}
|
4392
|
+
},
|
3013
4393
|
"replicate/meta/llama-2-13b": {
|
3014
4394
|
"max_tokens": 4096,
|
3015
4395
|
"max_input_tokens": 4096,
|
@@ -3127,13 +4507,24 @@
|
|
3127
4507
|
"litellm_provider": "replicate",
|
3128
4508
|
"mode": "chat"
|
3129
4509
|
},
|
4510
|
+
"openrouter/deepseek/deepseek-chat": {
|
4511
|
+
"max_tokens": 8192,
|
4512
|
+
"max_input_tokens": 66000,
|
4513
|
+
"max_output_tokens": 4096,
|
4514
|
+
"input_cost_per_token": 0.00000014,
|
4515
|
+
"output_cost_per_token": 0.00000028,
|
4516
|
+
"litellm_provider": "openrouter",
|
4517
|
+
"supports_prompt_caching": true,
|
4518
|
+
"mode": "chat"
|
4519
|
+
},
|
3130
4520
|
"openrouter/deepseek/deepseek-coder": {
|
3131
|
-
"max_tokens":
|
3132
|
-
"max_input_tokens":
|
4521
|
+
"max_tokens": 8192,
|
4522
|
+
"max_input_tokens": 66000,
|
3133
4523
|
"max_output_tokens": 4096,
|
3134
4524
|
"input_cost_per_token": 0.00000014,
|
3135
4525
|
"output_cost_per_token": 0.00000028,
|
3136
4526
|
"litellm_provider": "openrouter",
|
4527
|
+
"supports_prompt_caching": true,
|
3137
4528
|
"mode": "chat"
|
3138
4529
|
},
|
3139
4530
|
"openrouter/microsoft/wizardlm-2-8x22b:nitro": {
|
@@ -3186,6 +4577,14 @@
|
|
3186
4577
|
"supports_function_calling": true,
|
3187
4578
|
"supports_vision": true
|
3188
4579
|
},
|
4580
|
+
"openrouter/anthropic/claude-3-5-haiku": {
|
4581
|
+
"max_tokens": 200000,
|
4582
|
+
"input_cost_per_token": 0.000001,
|
4583
|
+
"output_cost_per_token": 0.000005,
|
4584
|
+
"litellm_provider": "openrouter",
|
4585
|
+
"mode": "chat",
|
4586
|
+
"supports_function_calling": true
|
4587
|
+
},
|
3189
4588
|
"openrouter/anthropic/claude-3-haiku-20240307": {
|
3190
4589
|
"max_tokens": 4096,
|
3191
4590
|
"max_input_tokens": 200000,
|
@@ -3198,6 +4597,17 @@
|
|
3198
4597
|
"supports_vision": true,
|
3199
4598
|
"tool_use_system_prompt_tokens": 264
|
3200
4599
|
},
|
4600
|
+
"openrouter/anthropic/claude-3-5-haiku-20241022": {
|
4601
|
+
"max_tokens": 8192,
|
4602
|
+
"max_input_tokens": 200000,
|
4603
|
+
"max_output_tokens": 8192,
|
4604
|
+
"input_cost_per_token": 0.000001,
|
4605
|
+
"output_cost_per_token": 0.000005,
|
4606
|
+
"litellm_provider": "openrouter",
|
4607
|
+
"mode": "chat",
|
4608
|
+
"supports_function_calling": true,
|
4609
|
+
"tool_use_system_prompt_tokens": 264
|
4610
|
+
},
|
3201
4611
|
"openrouter/anthropic/claude-3.5-sonnet": {
|
3202
4612
|
"max_tokens": 8192,
|
3203
4613
|
"max_input_tokens": 200000,
|
@@ -3292,17 +4702,33 @@
|
|
3292
4702
|
"litellm_provider": "openrouter",
|
3293
4703
|
"mode": "chat"
|
3294
4704
|
},
|
4705
|
+
"openrouter/openai/o1": {
|
4706
|
+
"max_tokens": 100000,
|
4707
|
+
"max_input_tokens": 200000,
|
4708
|
+
"max_output_tokens": 100000,
|
4709
|
+
"input_cost_per_token": 0.000015,
|
4710
|
+
"output_cost_per_token": 0.00006,
|
4711
|
+
"cache_read_input_token_cost": 0.0000075,
|
4712
|
+
"litellm_provider": "openrouter",
|
4713
|
+
"mode": "chat",
|
4714
|
+
"supports_function_calling": true,
|
4715
|
+
"supports_parallel_function_calling": true,
|
4716
|
+
"supports_vision": true,
|
4717
|
+
"supports_prompt_caching": true,
|
4718
|
+
"supports_system_messages": true,
|
4719
|
+
"supports_response_schema": true
|
4720
|
+
},
|
3295
4721
|
"openrouter/openai/o1-mini": {
|
3296
4722
|
"max_tokens": 65536,
|
3297
4723
|
"max_input_tokens": 128000,
|
3298
4724
|
"max_output_tokens": 65536,
|
3299
4725
|
"input_cost_per_token": 0.000003,
|
3300
4726
|
"output_cost_per_token": 0.000012,
|
3301
|
-
"litellm_provider": "
|
4727
|
+
"litellm_provider": "openrouter",
|
3302
4728
|
"mode": "chat",
|
3303
4729
|
"supports_function_calling": true,
|
3304
4730
|
"supports_parallel_function_calling": true,
|
3305
|
-
"supports_vision":
|
4731
|
+
"supports_vision": false
|
3306
4732
|
},
|
3307
4733
|
"openrouter/openai/o1-mini-2024-09-12": {
|
3308
4734
|
"max_tokens": 65536,
|
@@ -3310,11 +4736,11 @@
|
|
3310
4736
|
"max_output_tokens": 65536,
|
3311
4737
|
"input_cost_per_token": 0.000003,
|
3312
4738
|
"output_cost_per_token": 0.000012,
|
3313
|
-
"litellm_provider": "
|
4739
|
+
"litellm_provider": "openrouter",
|
3314
4740
|
"mode": "chat",
|
3315
4741
|
"supports_function_calling": true,
|
3316
4742
|
"supports_parallel_function_calling": true,
|
3317
|
-
"supports_vision":
|
4743
|
+
"supports_vision": false
|
3318
4744
|
},
|
3319
4745
|
"openrouter/openai/o1-preview": {
|
3320
4746
|
"max_tokens": 32768,
|
@@ -3322,11 +4748,11 @@
|
|
3322
4748
|
"max_output_tokens": 32768,
|
3323
4749
|
"input_cost_per_token": 0.000015,
|
3324
4750
|
"output_cost_per_token": 0.000060,
|
3325
|
-
"litellm_provider": "
|
4751
|
+
"litellm_provider": "openrouter",
|
3326
4752
|
"mode": "chat",
|
3327
4753
|
"supports_function_calling": true,
|
3328
4754
|
"supports_parallel_function_calling": true,
|
3329
|
-
"supports_vision":
|
4755
|
+
"supports_vision": false
|
3330
4756
|
},
|
3331
4757
|
"openrouter/openai/o1-preview-2024-09-12": {
|
3332
4758
|
"max_tokens": 32768,
|
@@ -3334,11 +4760,11 @@
|
|
3334
4760
|
"max_output_tokens": 32768,
|
3335
4761
|
"input_cost_per_token": 0.000015,
|
3336
4762
|
"output_cost_per_token": 0.000060,
|
3337
|
-
"litellm_provider": "
|
4763
|
+
"litellm_provider": "openrouter",
|
3338
4764
|
"mode": "chat",
|
3339
4765
|
"supports_function_calling": true,
|
3340
4766
|
"supports_parallel_function_calling": true,
|
3341
|
-
"supports_vision":
|
4767
|
+
"supports_vision": false
|
3342
4768
|
},
|
3343
4769
|
"openrouter/openai/gpt-4o": {
|
3344
4770
|
"max_tokens": 4096,
|
@@ -3514,6 +4940,15 @@
|
|
3514
4940
|
"litellm_provider": "openrouter",
|
3515
4941
|
"mode": "chat"
|
3516
4942
|
},
|
4943
|
+
"openrouter/qwen/qwen-2.5-coder-32b-instruct": {
|
4944
|
+
"max_tokens": 33792,
|
4945
|
+
"max_input_tokens": 33792,
|
4946
|
+
"max_output_tokens": 33792,
|
4947
|
+
"input_cost_per_token": 0.00000018,
|
4948
|
+
"output_cost_per_token": 0.00000018,
|
4949
|
+
"litellm_provider": "openrouter",
|
4950
|
+
"mode": "chat"
|
4951
|
+
},
|
3517
4952
|
"j2-ultra": {
|
3518
4953
|
"max_tokens": 8192,
|
3519
4954
|
"max_input_tokens": 8192,
|
@@ -3719,6 +5154,22 @@
|
|
3719
5154
|
"litellm_provider": "bedrock",
|
3720
5155
|
"mode": "embedding"
|
3721
5156
|
},
|
5157
|
+
"amazon.titan-embed-image-v1": {
|
5158
|
+
"max_tokens": 128,
|
5159
|
+
"max_input_tokens": 128,
|
5160
|
+
"output_vector_size": 1024,
|
5161
|
+
"input_cost_per_token": 0.0000008,
|
5162
|
+
"input_cost_per_image": 0.00006,
|
5163
|
+
"output_cost_per_token": 0.0,
|
5164
|
+
"litellm_provider": "bedrock",
|
5165
|
+
"supports_image_input": true,
|
5166
|
+
"supports_embedding_image_input": true,
|
5167
|
+
"mode": "embedding",
|
5168
|
+
"source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1",
|
5169
|
+
"metadata": {
|
5170
|
+
"notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead."
|
5171
|
+
}
|
5172
|
+
},
|
3722
5173
|
"mistral.mistral-7b-instruct-v0:2": {
|
3723
5174
|
"max_tokens": 8191,
|
3724
5175
|
"max_input_tokens": 32000,
|
@@ -3850,6 +5301,43 @@
|
|
3850
5301
|
"mode": "chat",
|
3851
5302
|
"supports_function_calling": true
|
3852
5303
|
},
|
5304
|
+
"amazon.nova-micro-v1:0": {
|
5305
|
+
"max_tokens": 4096,
|
5306
|
+
"max_input_tokens": 300000,
|
5307
|
+
"max_output_tokens": 4096,
|
5308
|
+
"input_cost_per_token": 0.000000035,
|
5309
|
+
"output_cost_per_token": 0.00000014,
|
5310
|
+
"litellm_provider": "bedrock_converse",
|
5311
|
+
"mode": "chat",
|
5312
|
+
"supports_function_calling": true,
|
5313
|
+
"supports_prompt_caching": true
|
5314
|
+
},
|
5315
|
+
"amazon.nova-lite-v1:0": {
|
5316
|
+
"max_tokens": 4096,
|
5317
|
+
"max_input_tokens": 128000,
|
5318
|
+
"max_output_tokens": 4096,
|
5319
|
+
"input_cost_per_token": 0.00000006,
|
5320
|
+
"output_cost_per_token": 0.00000024,
|
5321
|
+
"litellm_provider": "bedrock_converse",
|
5322
|
+
"mode": "chat",
|
5323
|
+
"supports_function_calling": true,
|
5324
|
+
"supports_vision": true,
|
5325
|
+
"supports_pdf_input": true,
|
5326
|
+
"supports_prompt_caching": true
|
5327
|
+
},
|
5328
|
+
"amazon.nova-pro-v1:0": {
|
5329
|
+
"max_tokens": 4096,
|
5330
|
+
"max_input_tokens": 300000,
|
5331
|
+
"max_output_tokens": 4096,
|
5332
|
+
"input_cost_per_token": 0.0000008,
|
5333
|
+
"output_cost_per_token": 0.0000032,
|
5334
|
+
"litellm_provider": "bedrock_converse",
|
5335
|
+
"mode": "chat",
|
5336
|
+
"supports_function_calling": true,
|
5337
|
+
"supports_vision": true,
|
5338
|
+
"supports_pdf_input": true,
|
5339
|
+
"supports_prompt_caching": true
|
5340
|
+
},
|
3853
5341
|
"anthropic.claude-3-sonnet-20240229-v1:0": {
|
3854
5342
|
"max_tokens": 4096,
|
3855
5343
|
"max_input_tokens": 200000,
|
@@ -3862,9 +5350,9 @@
|
|
3862
5350
|
"supports_vision": true
|
3863
5351
|
},
|
3864
5352
|
"anthropic.claude-3-5-sonnet-20240620-v1:0": {
|
3865
|
-
"max_tokens":
|
5353
|
+
"max_tokens": 4096,
|
3866
5354
|
"max_input_tokens": 200000,
|
3867
|
-
"max_output_tokens":
|
5355
|
+
"max_output_tokens": 4096,
|
3868
5356
|
"input_cost_per_token": 0.000003,
|
3869
5357
|
"output_cost_per_token": 0.000015,
|
3870
5358
|
"litellm_provider": "bedrock",
|
@@ -3872,6 +5360,19 @@
|
|
3872
5360
|
"supports_function_calling": true,
|
3873
5361
|
"supports_vision": true
|
3874
5362
|
},
|
5363
|
+
"anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
5364
|
+
"max_tokens": 8192,
|
5365
|
+
"max_input_tokens": 200000,
|
5366
|
+
"max_output_tokens": 8192,
|
5367
|
+
"input_cost_per_token": 0.000003,
|
5368
|
+
"output_cost_per_token": 0.000015,
|
5369
|
+
"litellm_provider": "bedrock",
|
5370
|
+
"mode": "chat",
|
5371
|
+
"supports_function_calling": true,
|
5372
|
+
"supports_vision": true,
|
5373
|
+
"supports_assistant_prefill": true,
|
5374
|
+
"supports_prompt_caching": true
|
5375
|
+
},
|
3875
5376
|
"anthropic.claude-3-haiku-20240307-v1:0": {
|
3876
5377
|
"max_tokens": 4096,
|
3877
5378
|
"max_input_tokens": 200000,
|
@@ -3883,6 +5384,18 @@
|
|
3883
5384
|
"supports_function_calling": true,
|
3884
5385
|
"supports_vision": true
|
3885
5386
|
},
|
5387
|
+
"anthropic.claude-3-5-haiku-20241022-v1:0": {
|
5388
|
+
"max_tokens": 4096,
|
5389
|
+
"max_input_tokens": 200000,
|
5390
|
+
"max_output_tokens": 4096,
|
5391
|
+
"input_cost_per_token": 0.000001,
|
5392
|
+
"output_cost_per_token": 0.000005,
|
5393
|
+
"litellm_provider": "bedrock",
|
5394
|
+
"mode": "chat",
|
5395
|
+
"supports_assistant_prefill": true,
|
5396
|
+
"supports_function_calling": true,
|
5397
|
+
"supports_prompt_caching": true
|
5398
|
+
},
|
3886
5399
|
"anthropic.claude-3-opus-20240229-v1:0": {
|
3887
5400
|
"max_tokens": 4096,
|
3888
5401
|
"max_input_tokens": 200000,
|
@@ -3906,6 +5419,17 @@
|
|
3906
5419
|
"supports_vision": true
|
3907
5420
|
},
|
3908
5421
|
"us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
|
5422
|
+
"max_tokens": 4096,
|
5423
|
+
"max_input_tokens": 200000,
|
5424
|
+
"max_output_tokens": 4096,
|
5425
|
+
"input_cost_per_token": 0.000003,
|
5426
|
+
"output_cost_per_token": 0.000015,
|
5427
|
+
"litellm_provider": "bedrock",
|
5428
|
+
"mode": "chat",
|
5429
|
+
"supports_function_calling": true,
|
5430
|
+
"supports_vision": true
|
5431
|
+
},
|
5432
|
+
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
3909
5433
|
"max_tokens": 8192,
|
3910
5434
|
"max_input_tokens": 200000,
|
3911
5435
|
"max_output_tokens": 8192,
|
@@ -3914,7 +5438,8 @@
|
|
3914
5438
|
"litellm_provider": "bedrock",
|
3915
5439
|
"mode": "chat",
|
3916
5440
|
"supports_function_calling": true,
|
3917
|
-
"supports_vision": true
|
5441
|
+
"supports_vision": true,
|
5442
|
+
"supports_assistant_prefill": true
|
3918
5443
|
},
|
3919
5444
|
"us.anthropic.claude-3-haiku-20240307-v1:0": {
|
3920
5445
|
"max_tokens": 4096,
|
@@ -3927,6 +5452,17 @@
|
|
3927
5452
|
"supports_function_calling": true,
|
3928
5453
|
"supports_vision": true
|
3929
5454
|
},
|
5455
|
+
"us.anthropic.claude-3-5-haiku-20241022-v1:0": {
|
5456
|
+
"max_tokens": 4096,
|
5457
|
+
"max_input_tokens": 200000,
|
5458
|
+
"max_output_tokens": 4096,
|
5459
|
+
"input_cost_per_token": 0.000001,
|
5460
|
+
"output_cost_per_token": 0.000005,
|
5461
|
+
"litellm_provider": "bedrock",
|
5462
|
+
"mode": "chat",
|
5463
|
+
"supports_assistant_prefill": true,
|
5464
|
+
"supports_function_calling": true
|
5465
|
+
},
|
3930
5466
|
"us.anthropic.claude-3-opus-20240229-v1:0": {
|
3931
5467
|
"max_tokens": 4096,
|
3932
5468
|
"max_input_tokens": 200000,
|
@@ -3939,9 +5475,9 @@
|
|
3939
5475
|
"supports_vision": true
|
3940
5476
|
},
|
3941
5477
|
"eu.anthropic.claude-3-sonnet-20240229-v1:0": {
|
3942
|
-
"max_tokens":
|
5478
|
+
"max_tokens": 4096,
|
3943
5479
|
"max_input_tokens": 200000,
|
3944
|
-
"max_output_tokens":
|
5480
|
+
"max_output_tokens": 4096,
|
3945
5481
|
"input_cost_per_token": 0.000003,
|
3946
5482
|
"output_cost_per_token": 0.000015,
|
3947
5483
|
"litellm_provider": "bedrock",
|
@@ -3950,6 +5486,17 @@
|
|
3950
5486
|
"supports_vision": true
|
3951
5487
|
},
|
3952
5488
|
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
|
5489
|
+
"max_tokens": 4096,
|
5490
|
+
"max_input_tokens": 200000,
|
5491
|
+
"max_output_tokens": 4096,
|
5492
|
+
"input_cost_per_token": 0.000003,
|
5493
|
+
"output_cost_per_token": 0.000015,
|
5494
|
+
"litellm_provider": "bedrock",
|
5495
|
+
"mode": "chat",
|
5496
|
+
"supports_function_calling": true,
|
5497
|
+
"supports_vision": true
|
5498
|
+
},
|
5499
|
+
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
3953
5500
|
"max_tokens": 8192,
|
3954
5501
|
"max_input_tokens": 200000,
|
3955
5502
|
"max_output_tokens": 8192,
|
@@ -3958,7 +5505,8 @@
|
|
3958
5505
|
"litellm_provider": "bedrock",
|
3959
5506
|
"mode": "chat",
|
3960
5507
|
"supports_function_calling": true,
|
3961
|
-
"supports_vision": true
|
5508
|
+
"supports_vision": true,
|
5509
|
+
"supports_assistant_prefill": true
|
3962
5510
|
},
|
3963
5511
|
"eu.anthropic.claude-3-haiku-20240307-v1:0": {
|
3964
5512
|
"max_tokens": 4096,
|
@@ -3971,6 +5519,16 @@
|
|
3971
5519
|
"supports_function_calling": true,
|
3972
5520
|
"supports_vision": true
|
3973
5521
|
},
|
5522
|
+
"eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
|
5523
|
+
"max_tokens": 4096,
|
5524
|
+
"max_input_tokens": 200000,
|
5525
|
+
"max_output_tokens": 4096,
|
5526
|
+
"input_cost_per_token": 0.000001,
|
5527
|
+
"output_cost_per_token": 0.000005,
|
5528
|
+
"litellm_provider": "bedrock",
|
5529
|
+
"mode": "chat",
|
5530
|
+
"supports_function_calling": true
|
5531
|
+
},
|
3974
5532
|
"eu.anthropic.claude-3-opus-20240229-v1:0": {
|
3975
5533
|
"max_tokens": 4096,
|
3976
5534
|
"max_input_tokens": 200000,
|
@@ -4538,6 +6096,15 @@
|
|
4538
6096
|
"litellm_provider": "bedrock",
|
4539
6097
|
"mode": "embedding"
|
4540
6098
|
},
|
6099
|
+
"meta.llama3-3-70b-instruct-v1:0": {
|
6100
|
+
"max_tokens": 4096,
|
6101
|
+
"max_input_tokens": 128000,
|
6102
|
+
"max_output_tokens": 4096,
|
6103
|
+
"input_cost_per_token": 0.00000072,
|
6104
|
+
"output_cost_per_token": 0.00000072,
|
6105
|
+
"litellm_provider": "bedrock_converse",
|
6106
|
+
"mode": "chat"
|
6107
|
+
},
|
4541
6108
|
"meta.llama2-13b-chat-v1": {
|
4542
6109
|
"max_tokens": 4096,
|
4543
6110
|
"max_input_tokens": 4096,
|
@@ -4655,79 +6222,222 @@
|
|
4655
6222
|
"litellm_provider": "bedrock",
|
4656
6223
|
"mode": "chat"
|
4657
6224
|
},
|
4658
|
-
"bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": {
|
4659
|
-
"max_tokens": 8192,
|
4660
|
-
"max_input_tokens": 8192,
|
4661
|
-
"max_output_tokens": 8192,
|
4662
|
-
"input_cost_per_token": 0.00000318,
|
4663
|
-
"output_cost_per_token": 0.0000042,
|
6225
|
+
"bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": {
|
6226
|
+
"max_tokens": 8192,
|
6227
|
+
"max_input_tokens": 8192,
|
6228
|
+
"max_output_tokens": 8192,
|
6229
|
+
"input_cost_per_token": 0.00000318,
|
6230
|
+
"output_cost_per_token": 0.0000042,
|
6231
|
+
"litellm_provider": "bedrock",
|
6232
|
+
"mode": "chat"
|
6233
|
+
},
|
6234
|
+
"bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": {
|
6235
|
+
"max_tokens": 8192,
|
6236
|
+
"max_input_tokens": 8192,
|
6237
|
+
"max_output_tokens": 8192,
|
6238
|
+
"input_cost_per_token": 0.00000305,
|
6239
|
+
"output_cost_per_token": 0.00000403,
|
6240
|
+
"litellm_provider": "bedrock",
|
6241
|
+
"mode": "chat"
|
6242
|
+
},
|
6243
|
+
"bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": {
|
6244
|
+
"max_tokens": 8192,
|
6245
|
+
"max_input_tokens": 8192,
|
6246
|
+
"max_output_tokens": 8192,
|
6247
|
+
"input_cost_per_token": 0.00000286,
|
6248
|
+
"output_cost_per_token": 0.00000378,
|
6249
|
+
"litellm_provider": "bedrock",
|
6250
|
+
"mode": "chat"
|
6251
|
+
},
|
6252
|
+
"bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": {
|
6253
|
+
"max_tokens": 8192,
|
6254
|
+
"max_input_tokens": 8192,
|
6255
|
+
"max_output_tokens": 8192,
|
6256
|
+
"input_cost_per_token": 0.00000345,
|
6257
|
+
"output_cost_per_token": 0.00000455,
|
6258
|
+
"litellm_provider": "bedrock",
|
6259
|
+
"mode": "chat"
|
6260
|
+
},
|
6261
|
+
"bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": {
|
6262
|
+
"max_tokens": 8192,
|
6263
|
+
"max_input_tokens": 8192,
|
6264
|
+
"max_output_tokens": 8192,
|
6265
|
+
"input_cost_per_token": 0.00000445,
|
6266
|
+
"output_cost_per_token": 0.00000588,
|
6267
|
+
"litellm_provider": "bedrock",
|
6268
|
+
"mode": "chat"
|
6269
|
+
},
|
6270
|
+
"meta.llama3-1-8b-instruct-v1:0": {
|
6271
|
+
"max_tokens": 128000,
|
6272
|
+
"max_input_tokens": 128000,
|
6273
|
+
"max_output_tokens": 2048,
|
6274
|
+
"input_cost_per_token": 0.00000022,
|
6275
|
+
"output_cost_per_token": 0.00000022,
|
6276
|
+
"litellm_provider": "bedrock",
|
6277
|
+
"mode": "chat",
|
6278
|
+
"supports_function_calling": true,
|
6279
|
+
"supports_tool_choice": false
|
6280
|
+
},
|
6281
|
+
"us.meta.llama3-1-8b-instruct-v1:0": {
|
6282
|
+
"max_tokens": 128000,
|
6283
|
+
"max_input_tokens": 128000,
|
6284
|
+
"max_output_tokens": 2048,
|
6285
|
+
"input_cost_per_token": 0.00000022,
|
6286
|
+
"output_cost_per_token": 0.00000022,
|
6287
|
+
"litellm_provider": "bedrock",
|
6288
|
+
"mode": "chat",
|
6289
|
+
"supports_function_calling": true,
|
6290
|
+
"supports_tool_choice": false
|
6291
|
+
},
|
6292
|
+
"meta.llama3-1-70b-instruct-v1:0": {
|
6293
|
+
"max_tokens": 128000,
|
6294
|
+
"max_input_tokens": 128000,
|
6295
|
+
"max_output_tokens": 2048,
|
6296
|
+
"input_cost_per_token": 0.00000099,
|
6297
|
+
"output_cost_per_token": 0.00000099,
|
6298
|
+
"litellm_provider": "bedrock",
|
6299
|
+
"mode": "chat",
|
6300
|
+
"supports_function_calling": true,
|
6301
|
+
"supports_tool_choice": false
|
6302
|
+
},
|
6303
|
+
"us.meta.llama3-1-70b-instruct-v1:0": {
|
6304
|
+
"max_tokens": 128000,
|
6305
|
+
"max_input_tokens": 128000,
|
6306
|
+
"max_output_tokens": 2048,
|
6307
|
+
"input_cost_per_token": 0.00000099,
|
6308
|
+
"output_cost_per_token": 0.00000099,
|
6309
|
+
"litellm_provider": "bedrock",
|
6310
|
+
"mode": "chat",
|
6311
|
+
"supports_function_calling": true,
|
6312
|
+
"supports_tool_choice": false
|
6313
|
+
},
|
6314
|
+
"meta.llama3-1-405b-instruct-v1:0": {
|
6315
|
+
"max_tokens": 128000,
|
6316
|
+
"max_input_tokens": 128000,
|
6317
|
+
"max_output_tokens": 4096,
|
6318
|
+
"input_cost_per_token": 0.00000532,
|
6319
|
+
"output_cost_per_token": 0.000016,
|
6320
|
+
"litellm_provider": "bedrock",
|
6321
|
+
"mode": "chat",
|
6322
|
+
"supports_function_calling": true,
|
6323
|
+
"supports_tool_choice": false
|
6324
|
+
},
|
6325
|
+
"us.meta.llama3-1-405b-instruct-v1:0": {
|
6326
|
+
"max_tokens": 128000,
|
6327
|
+
"max_input_tokens": 128000,
|
6328
|
+
"max_output_tokens": 4096,
|
6329
|
+
"input_cost_per_token": 0.00000532,
|
6330
|
+
"output_cost_per_token": 0.000016,
|
6331
|
+
"litellm_provider": "bedrock",
|
6332
|
+
"mode": "chat",
|
6333
|
+
"supports_function_calling": true,
|
6334
|
+
"supports_tool_choice": false
|
6335
|
+
},
|
6336
|
+
"meta.llama3-2-1b-instruct-v1:0": {
|
6337
|
+
"max_tokens": 128000,
|
6338
|
+
"max_input_tokens": 128000,
|
6339
|
+
"max_output_tokens": 4096,
|
6340
|
+
"input_cost_per_token": 0.0000001,
|
6341
|
+
"output_cost_per_token": 0.0000001,
|
6342
|
+
"litellm_provider": "bedrock",
|
6343
|
+
"mode": "chat",
|
6344
|
+
"supports_function_calling": true,
|
6345
|
+
"supports_tool_choice": false
|
6346
|
+
},
|
6347
|
+
"us.meta.llama3-2-1b-instruct-v1:0": {
|
6348
|
+
"max_tokens": 128000,
|
6349
|
+
"max_input_tokens": 128000,
|
6350
|
+
"max_output_tokens": 4096,
|
6351
|
+
"input_cost_per_token": 0.0000001,
|
6352
|
+
"output_cost_per_token": 0.0000001,
|
6353
|
+
"litellm_provider": "bedrock",
|
6354
|
+
"mode": "chat",
|
6355
|
+
"supports_function_calling": true,
|
6356
|
+
"supports_tool_choice": false
|
6357
|
+
},
|
6358
|
+
"eu.meta.llama3-2-1b-instruct-v1:0": {
|
6359
|
+
"max_tokens": 128000,
|
6360
|
+
"max_input_tokens": 128000,
|
6361
|
+
"max_output_tokens": 4096,
|
6362
|
+
"input_cost_per_token": 0.00000013,
|
6363
|
+
"output_cost_per_token": 0.00000013,
|
4664
6364
|
"litellm_provider": "bedrock",
|
4665
|
-
"mode": "chat"
|
6365
|
+
"mode": "chat",
|
6366
|
+
"supports_function_calling": true,
|
6367
|
+
"supports_tool_choice": false
|
4666
6368
|
},
|
4667
|
-
"
|
4668
|
-
"max_tokens":
|
4669
|
-
"max_input_tokens":
|
4670
|
-
"max_output_tokens":
|
4671
|
-
"input_cost_per_token": 0.
|
4672
|
-
"output_cost_per_token": 0.
|
6369
|
+
"meta.llama3-2-3b-instruct-v1:0": {
|
6370
|
+
"max_tokens": 128000,
|
6371
|
+
"max_input_tokens": 128000,
|
6372
|
+
"max_output_tokens": 4096,
|
6373
|
+
"input_cost_per_token": 0.00000015,
|
6374
|
+
"output_cost_per_token": 0.00000015,
|
4673
6375
|
"litellm_provider": "bedrock",
|
4674
|
-
"mode": "chat"
|
6376
|
+
"mode": "chat",
|
6377
|
+
"supports_function_calling": true,
|
6378
|
+
"supports_tool_choice": false
|
4675
6379
|
},
|
4676
|
-
"
|
4677
|
-
"max_tokens":
|
4678
|
-
"max_input_tokens":
|
4679
|
-
"max_output_tokens":
|
4680
|
-
"input_cost_per_token": 0.
|
4681
|
-
"output_cost_per_token": 0.
|
6380
|
+
"us.meta.llama3-2-3b-instruct-v1:0": {
|
6381
|
+
"max_tokens": 128000,
|
6382
|
+
"max_input_tokens": 128000,
|
6383
|
+
"max_output_tokens": 4096,
|
6384
|
+
"input_cost_per_token": 0.00000015,
|
6385
|
+
"output_cost_per_token": 0.00000015,
|
4682
6386
|
"litellm_provider": "bedrock",
|
4683
|
-
"mode": "chat"
|
6387
|
+
"mode": "chat",
|
6388
|
+
"supports_function_calling": true,
|
6389
|
+
"supports_tool_choice": false
|
4684
6390
|
},
|
4685
|
-
"
|
4686
|
-
"max_tokens":
|
4687
|
-
"max_input_tokens":
|
4688
|
-
"max_output_tokens":
|
4689
|
-
"input_cost_per_token": 0.
|
4690
|
-
"output_cost_per_token": 0.
|
6391
|
+
"eu.meta.llama3-2-3b-instruct-v1:0": {
|
6392
|
+
"max_tokens": 128000,
|
6393
|
+
"max_input_tokens": 128000,
|
6394
|
+
"max_output_tokens": 4096,
|
6395
|
+
"input_cost_per_token": 0.00000019,
|
6396
|
+
"output_cost_per_token": 0.00000019,
|
4691
6397
|
"litellm_provider": "bedrock",
|
4692
|
-
"mode": "chat"
|
6398
|
+
"mode": "chat",
|
6399
|
+
"supports_function_calling": true,
|
6400
|
+
"supports_tool_choice": false
|
4693
6401
|
},
|
4694
|
-
"
|
4695
|
-
"max_tokens":
|
4696
|
-
"max_input_tokens":
|
4697
|
-
"max_output_tokens":
|
4698
|
-
"input_cost_per_token": 0.
|
4699
|
-
"output_cost_per_token": 0.
|
6402
|
+
"meta.llama3-2-11b-instruct-v1:0": {
|
6403
|
+
"max_tokens": 128000,
|
6404
|
+
"max_input_tokens": 128000,
|
6405
|
+
"max_output_tokens": 4096,
|
6406
|
+
"input_cost_per_token": 0.00000035,
|
6407
|
+
"output_cost_per_token": 0.00000035,
|
4700
6408
|
"litellm_provider": "bedrock",
|
4701
|
-
"mode": "chat"
|
6409
|
+
"mode": "chat",
|
6410
|
+
"supports_function_calling": true,
|
6411
|
+
"supports_tool_choice": false
|
4702
6412
|
},
|
4703
|
-
"meta.llama3-
|
6413
|
+
"us.meta.llama3-2-11b-instruct-v1:0": {
|
4704
6414
|
"max_tokens": 128000,
|
4705
6415
|
"max_input_tokens": 128000,
|
4706
|
-
"max_output_tokens":
|
4707
|
-
"input_cost_per_token": 0.
|
4708
|
-
"output_cost_per_token": 0.
|
6416
|
+
"max_output_tokens": 4096,
|
6417
|
+
"input_cost_per_token": 0.00000035,
|
6418
|
+
"output_cost_per_token": 0.00000035,
|
4709
6419
|
"litellm_provider": "bedrock",
|
4710
6420
|
"mode": "chat",
|
4711
6421
|
"supports_function_calling": true,
|
4712
6422
|
"supports_tool_choice": false
|
4713
6423
|
},
|
4714
|
-
"meta.llama3-
|
6424
|
+
"meta.llama3-2-90b-instruct-v1:0": {
|
4715
6425
|
"max_tokens": 128000,
|
4716
6426
|
"max_input_tokens": 128000,
|
4717
|
-
"max_output_tokens":
|
4718
|
-
"input_cost_per_token": 0.
|
4719
|
-
"output_cost_per_token": 0.
|
6427
|
+
"max_output_tokens": 4096,
|
6428
|
+
"input_cost_per_token": 0.000002,
|
6429
|
+
"output_cost_per_token": 0.000002,
|
4720
6430
|
"litellm_provider": "bedrock",
|
4721
6431
|
"mode": "chat",
|
4722
6432
|
"supports_function_calling": true,
|
4723
6433
|
"supports_tool_choice": false
|
4724
6434
|
},
|
4725
|
-
"meta.llama3-
|
6435
|
+
"us.meta.llama3-2-90b-instruct-v1:0": {
|
4726
6436
|
"max_tokens": 128000,
|
4727
6437
|
"max_input_tokens": 128000,
|
4728
6438
|
"max_output_tokens": 4096,
|
4729
|
-
"input_cost_per_token": 0.
|
4730
|
-
"output_cost_per_token": 0.
|
6439
|
+
"input_cost_per_token": 0.000002,
|
6440
|
+
"output_cost_per_token": 0.000002,
|
4731
6441
|
"litellm_provider": "bedrock",
|
4732
6442
|
"mode": "chat",
|
4733
6443
|
"supports_function_calling": true,
|
@@ -4775,6 +6485,20 @@
|
|
4775
6485
|
"litellm_provider": "bedrock",
|
4776
6486
|
"mode": "image_generation"
|
4777
6487
|
},
|
6488
|
+
"stability.sd3-large-v1:0": {
|
6489
|
+
"max_tokens": 77,
|
6490
|
+
"max_input_tokens": 77,
|
6491
|
+
"output_cost_per_image": 0.08,
|
6492
|
+
"litellm_provider": "bedrock",
|
6493
|
+
"mode": "image_generation"
|
6494
|
+
},
|
6495
|
+
"stability.stable-image-ultra-v1:0": {
|
6496
|
+
"max_tokens": 77,
|
6497
|
+
"max_input_tokens": 77,
|
6498
|
+
"output_cost_per_image": 0.14,
|
6499
|
+
"litellm_provider": "bedrock",
|
6500
|
+
"mode": "image_generation"
|
6501
|
+
},
|
4778
6502
|
"sagemaker/meta-textgeneration-llama-2-7b": {
|
4779
6503
|
"max_tokens": 4096,
|
4780
6504
|
"max_input_tokens": 4096,
|
@@ -4832,50 +6556,99 @@
|
|
4832
6556
|
"together-ai-up-to-4b": {
|
4833
6557
|
"input_cost_per_token": 0.0000001,
|
4834
6558
|
"output_cost_per_token": 0.0000001,
|
4835
|
-
"litellm_provider": "together_ai"
|
6559
|
+
"litellm_provider": "together_ai",
|
6560
|
+
"mode": "chat"
|
4836
6561
|
},
|
4837
6562
|
"together-ai-4.1b-8b": {
|
4838
6563
|
"input_cost_per_token": 0.0000002,
|
4839
6564
|
"output_cost_per_token": 0.0000002,
|
4840
|
-
"litellm_provider": "together_ai"
|
6565
|
+
"litellm_provider": "together_ai",
|
6566
|
+
"mode": "chat"
|
4841
6567
|
},
|
4842
6568
|
"together-ai-8.1b-21b": {
|
4843
6569
|
"max_tokens": 1000,
|
4844
6570
|
"input_cost_per_token": 0.0000003,
|
4845
6571
|
"output_cost_per_token": 0.0000003,
|
4846
|
-
"litellm_provider": "together_ai"
|
6572
|
+
"litellm_provider": "together_ai",
|
6573
|
+
"mode": "chat"
|
4847
6574
|
},
|
4848
6575
|
"together-ai-21.1b-41b": {
|
4849
6576
|
"input_cost_per_token": 0.0000008,
|
4850
6577
|
"output_cost_per_token": 0.0000008,
|
4851
|
-
"litellm_provider": "together_ai"
|
6578
|
+
"litellm_provider": "together_ai",
|
6579
|
+
"mode": "chat"
|
4852
6580
|
},
|
4853
6581
|
"together-ai-41.1b-80b": {
|
4854
6582
|
"input_cost_per_token": 0.0000009,
|
4855
6583
|
"output_cost_per_token": 0.0000009,
|
4856
|
-
"litellm_provider": "together_ai"
|
6584
|
+
"litellm_provider": "together_ai",
|
6585
|
+
"mode": "chat"
|
4857
6586
|
},
|
4858
6587
|
"together-ai-81.1b-110b": {
|
4859
6588
|
"input_cost_per_token": 0.0000018,
|
4860
6589
|
"output_cost_per_token": 0.0000018,
|
4861
|
-
"litellm_provider": "together_ai"
|
6590
|
+
"litellm_provider": "together_ai",
|
6591
|
+
"mode": "chat"
|
6592
|
+
},
|
6593
|
+
"together-ai-embedding-up-to-150m": {
|
6594
|
+
"input_cost_per_token": 0.000000008,
|
6595
|
+
"output_cost_per_token": 0.0,
|
6596
|
+
"litellm_provider": "together_ai",
|
6597
|
+
"mode": "embedding"
|
6598
|
+
},
|
6599
|
+
"together-ai-embedding-151m-to-350m": {
|
6600
|
+
"input_cost_per_token": 0.000000016,
|
6601
|
+
"output_cost_per_token": 0.0,
|
6602
|
+
"litellm_provider": "together_ai",
|
6603
|
+
"mode": "embedding"
|
6604
|
+
},
|
6605
|
+
"together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
|
6606
|
+
"input_cost_per_token": 0.00000018,
|
6607
|
+
"output_cost_per_token": 0.00000018,
|
6608
|
+
"litellm_provider": "together_ai",
|
6609
|
+
"supports_function_calling": true,
|
6610
|
+
"supports_parallel_function_calling": true,
|
6611
|
+
"supports_response_schema": true,
|
6612
|
+
"mode": "chat"
|
6613
|
+
},
|
6614
|
+
"together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
|
6615
|
+
"input_cost_per_token": 0.00000088,
|
6616
|
+
"output_cost_per_token": 0.00000088,
|
6617
|
+
"litellm_provider": "together_ai",
|
6618
|
+
"supports_function_calling": true,
|
6619
|
+
"supports_parallel_function_calling": true,
|
6620
|
+
"supports_response_schema": true,
|
6621
|
+
"mode": "chat"
|
6622
|
+
},
|
6623
|
+
"together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
|
6624
|
+
"input_cost_per_token": 0.0000035,
|
6625
|
+
"output_cost_per_token": 0.0000035,
|
6626
|
+
"litellm_provider": "together_ai",
|
6627
|
+
"supports_function_calling": true,
|
6628
|
+
"supports_parallel_function_calling": true,
|
6629
|
+
"mode": "chat"
|
4862
6630
|
},
|
4863
6631
|
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
|
4864
6632
|
"input_cost_per_token": 0.0000006,
|
4865
6633
|
"output_cost_per_token": 0.0000006,
|
4866
6634
|
"litellm_provider": "together_ai",
|
4867
6635
|
"supports_function_calling": true,
|
4868
|
-
"supports_parallel_function_calling": true
|
6636
|
+
"supports_parallel_function_calling": true,
|
6637
|
+
"supports_response_schema": true,
|
6638
|
+
"mode": "chat"
|
4869
6639
|
},
|
4870
6640
|
"together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
|
4871
6641
|
"litellm_provider": "together_ai",
|
4872
6642
|
"supports_function_calling": true,
|
4873
|
-
"supports_parallel_function_calling": true
|
6643
|
+
"supports_parallel_function_calling": true,
|
6644
|
+
"supports_response_schema": true,
|
6645
|
+
"mode": "chat"
|
4874
6646
|
},
|
4875
6647
|
"together_ai/togethercomputer/CodeLlama-34b-Instruct": {
|
4876
6648
|
"litellm_provider": "together_ai",
|
4877
6649
|
"supports_function_calling": true,
|
4878
|
-
"supports_parallel_function_calling": true
|
6650
|
+
"supports_parallel_function_calling": true,
|
6651
|
+
"mode": "chat"
|
4879
6652
|
},
|
4880
6653
|
"ollama/codegemma": {
|
4881
6654
|
"max_tokens": 8192,
|
@@ -4953,7 +6726,7 @@
|
|
4953
6726
|
"input_cost_per_token": 0.0,
|
4954
6727
|
"output_cost_per_token": 0.0,
|
4955
6728
|
"litellm_provider": "ollama",
|
4956
|
-
"mode": "
|
6729
|
+
"mode": "chat"
|
4957
6730
|
},
|
4958
6731
|
"ollama/llama2:7b": {
|
4959
6732
|
"max_tokens": 4096,
|
@@ -4962,7 +6735,7 @@
|
|
4962
6735
|
"input_cost_per_token": 0.0,
|
4963
6736
|
"output_cost_per_token": 0.0,
|
4964
6737
|
"litellm_provider": "ollama",
|
4965
|
-
"mode": "
|
6738
|
+
"mode": "chat"
|
4966
6739
|
},
|
4967
6740
|
"ollama/llama2:13b": {
|
4968
6741
|
"max_tokens": 4096,
|
@@ -4971,7 +6744,7 @@
|
|
4971
6744
|
"input_cost_per_token": 0.0,
|
4972
6745
|
"output_cost_per_token": 0.0,
|
4973
6746
|
"litellm_provider": "ollama",
|
4974
|
-
"mode": "
|
6747
|
+
"mode": "chat"
|
4975
6748
|
},
|
4976
6749
|
"ollama/llama2:70b": {
|
4977
6750
|
"max_tokens": 4096,
|
@@ -4980,7 +6753,7 @@
|
|
4980
6753
|
"input_cost_per_token": 0.0,
|
4981
6754
|
"output_cost_per_token": 0.0,
|
4982
6755
|
"litellm_provider": "ollama",
|
4983
|
-
"mode": "
|
6756
|
+
"mode": "chat"
|
4984
6757
|
},
|
4985
6758
|
"ollama/llama2-uncensored": {
|
4986
6759
|
"max_tokens": 4096,
|
@@ -5271,6 +7044,17 @@
|
|
5271
7044
|
"litellm_provider": "deepinfra",
|
5272
7045
|
"mode": "chat"
|
5273
7046
|
},
|
7047
|
+
"deepinfra/meta-llama/Meta-Llama-3.1-405B-Instruct": {
|
7048
|
+
"max_tokens": 32768,
|
7049
|
+
"max_input_tokens": 32768,
|
7050
|
+
"max_output_tokens": 32768,
|
7051
|
+
"input_cost_per_token": 0.0000009,
|
7052
|
+
"output_cost_per_token": 0.0000009,
|
7053
|
+
"litellm_provider": "deepinfra",
|
7054
|
+
"mode": "chat",
|
7055
|
+
"supports_function_calling": true,
|
7056
|
+
"supports_parallel_function_calling": true
|
7057
|
+
},
|
5274
7058
|
"deepinfra/01-ai/Yi-34B-200K": {
|
5275
7059
|
"max_tokens": 4096,
|
5276
7060
|
"max_input_tokens": 200000,
|
@@ -5473,6 +7257,56 @@
|
|
5473
7257
|
"litellm_provider": "perplexity",
|
5474
7258
|
"mode": "chat"
|
5475
7259
|
},
|
7260
|
+
"fireworks_ai/accounts/fireworks/models/llama-v3p2-1b-instruct": {
|
7261
|
+
"max_tokens": 16384,
|
7262
|
+
"max_input_tokens": 16384,
|
7263
|
+
"max_output_tokens": 16384,
|
7264
|
+
"input_cost_per_token": 0.0000001,
|
7265
|
+
"output_cost_per_token": 0.0000001,
|
7266
|
+
"litellm_provider": "fireworks_ai",
|
7267
|
+
"mode": "chat",
|
7268
|
+
"supports_function_calling": true,
|
7269
|
+
"supports_response_schema": true,
|
7270
|
+
"source": "https://fireworks.ai/pricing"
|
7271
|
+
},
|
7272
|
+
"fireworks_ai/accounts/fireworks/models/llama-v3p2-3b-instruct": {
|
7273
|
+
"max_tokens": 16384,
|
7274
|
+
"max_input_tokens": 16384,
|
7275
|
+
"max_output_tokens": 16384,
|
7276
|
+
"input_cost_per_token": 0.0000001,
|
7277
|
+
"output_cost_per_token": 0.0000001,
|
7278
|
+
"litellm_provider": "fireworks_ai",
|
7279
|
+
"mode": "chat",
|
7280
|
+
"supports_function_calling": true,
|
7281
|
+
"supports_response_schema": true,
|
7282
|
+
"source": "https://fireworks.ai/pricing"
|
7283
|
+
},
|
7284
|
+
"fireworks_ai/accounts/fireworks/models/llama-v3p2-11b-vision-instruct": {
|
7285
|
+
"max_tokens": 16384,
|
7286
|
+
"max_input_tokens": 16384,
|
7287
|
+
"max_output_tokens": 16384,
|
7288
|
+
"input_cost_per_token": 0.0000002,
|
7289
|
+
"output_cost_per_token": 0.0000002,
|
7290
|
+
"litellm_provider": "fireworks_ai",
|
7291
|
+
"mode": "chat",
|
7292
|
+
"supports_function_calling": true,
|
7293
|
+
"supports_vision": true,
|
7294
|
+
"supports_response_schema": true,
|
7295
|
+
"source": "https://fireworks.ai/pricing"
|
7296
|
+
},
|
7297
|
+
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct": {
|
7298
|
+
"max_tokens": 16384,
|
7299
|
+
"max_input_tokens": 16384,
|
7300
|
+
"max_output_tokens": 16384,
|
7301
|
+
"input_cost_per_token": 0.0000009,
|
7302
|
+
"output_cost_per_token": 0.0000009,
|
7303
|
+
"litellm_provider": "fireworks_ai",
|
7304
|
+
"mode": "chat",
|
7305
|
+
"supports_function_calling": true,
|
7306
|
+
"supports_vision": true,
|
7307
|
+
"supports_response_schema": true,
|
7308
|
+
"source": "https://fireworks.ai/pricing"
|
7309
|
+
},
|
5476
7310
|
"fireworks_ai/accounts/fireworks/models/firefunction-v2": {
|
5477
7311
|
"max_tokens": 8192,
|
5478
7312
|
"max_input_tokens": 8192,
|
@@ -5482,6 +7316,7 @@
|
|
5482
7316
|
"litellm_provider": "fireworks_ai",
|
5483
7317
|
"mode": "chat",
|
5484
7318
|
"supports_function_calling": true,
|
7319
|
+
"supports_response_schema": true,
|
5485
7320
|
"source": "https://fireworks.ai/pricing"
|
5486
7321
|
},
|
5487
7322
|
"fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": {
|
@@ -5493,6 +7328,7 @@
|
|
5493
7328
|
"litellm_provider": "fireworks_ai",
|
5494
7329
|
"mode": "chat",
|
5495
7330
|
"supports_function_calling": true,
|
7331
|
+
"supports_response_schema": true,
|
5496
7332
|
"source": "https://fireworks.ai/pricing"
|
5497
7333
|
},
|
5498
7334
|
"fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct": {
|
@@ -5504,6 +7340,19 @@
|
|
5504
7340
|
"litellm_provider": "fireworks_ai",
|
5505
7341
|
"mode": "chat",
|
5506
7342
|
"supports_function_calling": true,
|
7343
|
+
"supports_response_schema": true,
|
7344
|
+
"source": "https://fireworks.ai/pricing"
|
7345
|
+
},
|
7346
|
+
"fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct": {
|
7347
|
+
"max_tokens": 4096,
|
7348
|
+
"max_input_tokens": 4096,
|
7349
|
+
"max_output_tokens": 4096,
|
7350
|
+
"input_cost_per_token": 0.0000009,
|
7351
|
+
"output_cost_per_token": 0.0000009,
|
7352
|
+
"litellm_provider": "fireworks_ai",
|
7353
|
+
"mode": "chat",
|
7354
|
+
"supports_function_calling": true,
|
7355
|
+
"supports_response_schema": true,
|
5507
7356
|
"source": "https://fireworks.ai/pricing"
|
5508
7357
|
},
|
5509
7358
|
"fireworks_ai/accounts/fireworks/models/yi-large": {
|
@@ -5515,6 +7364,7 @@
|
|
5515
7364
|
"litellm_provider": "fireworks_ai",
|
5516
7365
|
"mode": "chat",
|
5517
7366
|
"supports_function_calling": true,
|
7367
|
+
"supports_response_schema": true,
|
5518
7368
|
"source": "https://fireworks.ai/pricing"
|
5519
7369
|
},
|
5520
7370
|
"fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": {
|
@@ -5526,6 +7376,7 @@
|
|
5526
7376
|
"litellm_provider": "fireworks_ai",
|
5527
7377
|
"mode": "chat",
|
5528
7378
|
"supports_function_calling": true,
|
7379
|
+
"supports_response_schema": true,
|
5529
7380
|
"source": "https://fireworks.ai/pricing"
|
5530
7381
|
},
|
5531
7382
|
"fireworks_ai/nomic-ai/nomic-embed-text-v1.5": {
|
@@ -5593,6 +7444,11 @@
|
|
5593
7444
|
"output_cost_per_token": 0.0000012,
|
5594
7445
|
"litellm_provider": "fireworks_ai"
|
5595
7446
|
},
|
7447
|
+
"fireworks-ai-default": {
|
7448
|
+
"input_cost_per_token": 0.0,
|
7449
|
+
"output_cost_per_token": 0.0,
|
7450
|
+
"litellm_provider": "fireworks_ai"
|
7451
|
+
},
|
5596
7452
|
"fireworks-ai-embedding-up-to-150m": {
|
5597
7453
|
"input_cost_per_token": 0.000000008,
|
5598
7454
|
"output_cost_per_token": 0.000000,
|
@@ -5813,6 +7669,14 @@
|
|
5813
7669
|
"litellm_provider": "voyage",
|
5814
7670
|
"mode": "embedding"
|
5815
7671
|
},
|
7672
|
+
"voyage/voyage-finance-2": {
|
7673
|
+
"max_tokens": 4000,
|
7674
|
+
"max_input_tokens": 4000,
|
7675
|
+
"input_cost_per_token": 0.00000012,
|
7676
|
+
"output_cost_per_token": 0.000000,
|
7677
|
+
"litellm_provider": "voyage",
|
7678
|
+
"mode": "embedding"
|
7679
|
+
},
|
5816
7680
|
"databricks/databricks-meta-llama-3-1-405b-instruct": {
|
5817
7681
|
"max_tokens": 128000,
|
5818
7682
|
"max_input_tokens": 128000,
|
@@ -5839,6 +7703,19 @@
|
|
5839
7703
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
5840
7704
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
5841
7705
|
},
|
7706
|
+
"databricks/meta-llama-3.3-70b-instruct": {
|
7707
|
+
"max_tokens": 128000,
|
7708
|
+
"max_input_tokens": 128000,
|
7709
|
+
"max_output_tokens": 128000,
|
7710
|
+
"input_cost_per_token": 0.00000100002,
|
7711
|
+
"input_dbu_cost_per_token": 0.000014286,
|
7712
|
+
"output_cost_per_token": 0.00000299999,
|
7713
|
+
"output_dbu_cost_per_token": 0.000042857,
|
7714
|
+
"litellm_provider": "databricks",
|
7715
|
+
"mode": "chat",
|
7716
|
+
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
7717
|
+
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
7718
|
+
},
|
5842
7719
|
"databricks/databricks-dbrx-instruct": {
|
5843
7720
|
"max_tokens": 32768,
|
5844
7721
|
"max_input_tokens": 32768,
|
@@ -5943,4 +7820,4 @@
|
|
5943
7820
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
5944
7821
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
5945
7822
|
}
|
5946
|
-
}
|
7823
|
+
}
|