cost-katana-cli 2.2.2 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,146 @@ function getModelProvider(model) {
18
18
  }
19
19
  exports.AVAILABLE_MODELS = [
20
20
  // === OpenAI Models ===
21
- // === GPT-5 Models (Latest) ===
21
+ // === GPT-5.2 Models (Latest) ===
22
+ {
23
+ id: 'gpt-5.2',
24
+ name: 'GPT-5.2',
25
+ provider: 'OpenAI',
26
+ available: true,
27
+ maxTokens: 128000,
28
+ contextLength: 128000,
29
+ pricing: { input: 1.75, output: 14.0 },
30
+ capabilities: ['text', 'reasoning', 'analysis', 'coding', 'agents'],
31
+ category: 'text',
32
+ isLatest: true,
33
+ notes: 'Latest GPT-5 model. Standard: $1.75/$14.00 per 1M tokens. Batch: $0.875/$7.00. Flex: $0.875/$7.00. Priority: $3.50/$28.00. Cached input: $0.175 (standard), $0.0875 (batch/flex), $0.35 (priority)',
34
+ },
35
+ {
36
+ id: 'gpt-5.2-pro',
37
+ name: 'GPT-5.2 Pro',
38
+ provider: 'OpenAI',
39
+ available: true,
40
+ maxTokens: 128000,
41
+ contextLength: 128000,
42
+ pricing: { input: 21.0, output: 168.0 },
43
+ capabilities: [
44
+ 'text',
45
+ 'reasoning',
46
+ 'analysis',
47
+ 'coding',
48
+ 'agents',
49
+ 'premium',
50
+ ],
51
+ category: 'text',
52
+ isLatest: true,
53
+ notes: 'Premium version of GPT-5.2. Standard: $21.00/$168.00 per 1M tokens. Batch: $10.50/$84.00',
54
+ },
55
+ {
56
+ id: 'gpt-5.2-codex',
57
+ name: 'GPT-5.2-Codex',
58
+ provider: 'OpenAI',
59
+ available: true,
60
+ maxTokens: 128000,
61
+ contextLength: 128000,
62
+ pricing: { input: 1.75, output: 14.0 },
63
+ capabilities: ['code', 'programming', 'agents', 'coding'],
64
+ category: 'code',
65
+ isLatest: true,
66
+ notes: 'GPT-5.2 optimized for agentic coding in Codex. Standard: $1.75/$14.00 per 1M tokens. Priority: $3.50/$28.00. Cached input: $0.175 (standard), $0.35 (priority)',
67
+ },
68
+ {
69
+ id: 'gpt-5.2-chat-latest',
70
+ name: 'GPT-5.2 Chat',
71
+ provider: 'OpenAI',
72
+ available: true,
73
+ maxTokens: 128000,
74
+ contextLength: 128000,
75
+ pricing: { input: 1.75, output: 14.0 },
76
+ capabilities: ['text', 'chat', 'reasoning', 'analysis'],
77
+ category: 'text',
78
+ isLatest: true,
79
+ notes: 'GPT-5.2 model used in ChatGPT. Standard: $1.75/$14.00 per 1M tokens. Cached input: $0.175',
80
+ },
81
+ // === GPT-5.1 Models ===
82
+ {
83
+ id: 'gpt-5.1',
84
+ name: 'GPT-5.1',
85
+ provider: 'OpenAI',
86
+ available: true,
87
+ maxTokens: 128000,
88
+ contextLength: 128000,
89
+ pricing: { input: 1.25, output: 10.0 },
90
+ capabilities: ['text', 'reasoning', 'analysis', 'coding', 'agents'],
91
+ category: 'text',
92
+ isLatest: true,
93
+ notes: 'GPT-5.1 model. Standard: $1.25/$10.00 per 1M tokens. Batch: $0.625/$5.00. Flex: $0.625/$5.00. Priority: $2.50/$20.00. Cached input: $0.125 (standard), $0.0625 (batch/flex), $0.25 (priority)',
94
+ },
95
+ {
96
+ id: 'gpt-5.1-codex-max',
97
+ name: 'GPT-5.1-Codex Max',
98
+ provider: 'OpenAI',
99
+ available: true,
100
+ maxTokens: 128000,
101
+ contextLength: 128000,
102
+ pricing: { input: 1.25, output: 10.0 },
103
+ capabilities: ['code', 'programming', 'agents', 'coding'],
104
+ category: 'code',
105
+ isLatest: true,
106
+ notes: 'GPT-5.1 optimized for agentic coding. Standard: $1.25/$10.00 per 1M tokens. Priority: $2.50/$20.00. Cached input: $0.125 (standard), $0.25 (priority)',
107
+ },
108
+ {
109
+ id: 'gpt-5.1-codex',
110
+ name: 'GPT-5.1-Codex',
111
+ provider: 'OpenAI',
112
+ available: true,
113
+ maxTokens: 128000,
114
+ contextLength: 128000,
115
+ pricing: { input: 1.25, output: 10.0 },
116
+ capabilities: ['code', 'programming', 'agents', 'coding'],
117
+ category: 'code',
118
+ isLatest: true,
119
+ notes: 'GPT-5.1 optimized for agentic coding in Codex. Standard: $1.25/$10.00 per 1M tokens. Priority: $2.50/$20.00. Cached input: $0.125 (standard), $0.25 (priority)',
120
+ },
121
+ {
122
+ id: 'gpt-5.1-codex-mini',
123
+ name: 'GPT-5.1-Codex Mini',
124
+ provider: 'OpenAI',
125
+ available: true,
126
+ maxTokens: 128000,
127
+ contextLength: 128000,
128
+ pricing: { input: 0.25, output: 2.0 },
129
+ capabilities: ['code', 'programming', 'agents', 'coding', 'efficient'],
130
+ category: 'code',
131
+ isLatest: true,
132
+ notes: 'Cost-efficient GPT-5.1 Codex variant. Standard: $0.25/$2.00 per 1M tokens. Cached input: $0.025',
133
+ },
134
+ {
135
+ id: 'gpt-5.1-chat-latest',
136
+ name: 'GPT-5.1 Chat',
137
+ provider: 'OpenAI',
138
+ available: true,
139
+ maxTokens: 128000,
140
+ contextLength: 128000,
141
+ pricing: { input: 1.25, output: 10.0 },
142
+ capabilities: ['text', 'chat', 'reasoning', 'analysis'],
143
+ category: 'text',
144
+ isLatest: true,
145
+ notes: 'GPT-5.1 model used in ChatGPT. Standard: $1.25/$10.00 per 1M tokens. Cached input: $0.125',
146
+ },
147
+ {
148
+ id: 'gpt-5.1-search-api',
149
+ name: 'GPT-5.1 Search API',
150
+ provider: 'OpenAI',
151
+ available: true,
152
+ maxTokens: 128000,
153
+ contextLength: 128000,
154
+ pricing: { input: 1.25, output: 10.0 },
155
+ capabilities: ['text', 'search', 'multimodal', 'reasoning'],
156
+ category: 'search',
157
+ isLatest: true,
158
+ notes: 'GPT-5.1 with search capabilities. Standard: $1.25/$10.00 per 1M tokens. Cached input: $0.125. Web search: $10.00/1k calls + search content tokens billed at model rates',
159
+ },
160
+ // === GPT-5 Models ===
22
161
  {
23
162
  id: 'gpt-5',
24
163
  name: 'GPT-5',
@@ -30,158 +169,835 @@ exports.AVAILABLE_MODELS = [
30
169
  capabilities: ['text', 'reasoning', 'analysis', 'coding', 'agents'],
31
170
  category: 'text',
32
171
  isLatest: true,
33
- notes: 'The best model for coding and agentic tasks across domains',
172
+ notes: 'The best model for coding and agentic tasks across domains. Standard: $1.25/$10.00 per 1M tokens. Batch: $0.625/$5.00. Flex: $0.625/$5.00. Priority: $2.50/$20.00. Cached input: $0.125 (standard), $0.0625 (batch/flex), $0.25 (priority)',
173
+ },
174
+ {
175
+ id: 'gpt-5-mini',
176
+ name: 'GPT-5 mini',
177
+ provider: 'OpenAI',
178
+ available: true,
179
+ maxTokens: 128000,
180
+ contextLength: 128000,
181
+ pricing: { input: 0.25, output: 2.0 },
182
+ capabilities: ['text', 'reasoning', 'analysis', 'efficient'],
183
+ category: 'text',
184
+ isLatest: true,
185
+ notes: 'A faster, cost-efficient version of GPT-5 for well-defined tasks. Standard: $0.25/$2.00 per 1M tokens. Batch: $0.125/$1.00. Flex: $0.125/$1.00. Priority: $0.45/$3.60. Cached input: $0.025 (standard), $0.0125 (batch/flex), $0.045 (priority)',
186
+ },
187
+ {
188
+ id: 'gpt-5-nano',
189
+ name: 'GPT-5 nano',
190
+ provider: 'OpenAI',
191
+ available: true,
192
+ maxTokens: 128000,
193
+ contextLength: 128000,
194
+ pricing: { input: 0.05, output: 0.4 },
195
+ capabilities: ['text', 'fast', 'cost-effective'],
196
+ category: 'text',
197
+ isLatest: true,
198
+ notes: 'Fastest, most cost-efficient version of GPT-5. Standard: $0.05/$0.40 per 1M tokens. Batch: $0.025/$0.20. Flex: $0.025/$0.20. Cached input: $0.005 (standard), $0.0025 (batch/flex)',
199
+ },
200
+ {
201
+ id: 'gpt-5-pro',
202
+ name: 'GPT-5 pro',
203
+ provider: 'OpenAI',
204
+ available: true,
205
+ maxTokens: 128000,
206
+ contextLength: 128000,
207
+ pricing: { input: 15.0, output: 120.0 },
208
+ capabilities: [
209
+ 'text',
210
+ 'reasoning',
211
+ 'analysis',
212
+ 'coding',
213
+ 'agents',
214
+ 'premium',
215
+ ],
216
+ category: 'text',
217
+ isLatest: true,
218
+ notes: 'Version of GPT-5 that produces smarter and more precise responses. Standard: $15.00/$120.00 per 1M tokens. Batch: $7.50/$60.00',
219
+ },
220
+ {
221
+ id: 'gpt-5-codex',
222
+ name: 'GPT-5-Codex',
223
+ provider: 'OpenAI',
224
+ available: true,
225
+ maxTokens: 128000,
226
+ contextLength: 128000,
227
+ pricing: { input: 1.25, output: 10.0 },
228
+ capabilities: ['code', 'programming', 'agents', 'coding'],
229
+ category: 'code',
230
+ isLatest: true,
231
+ notes: 'A version of GPT-5 optimized for agentic coding in Codex. Standard: $1.25/$10.00 per 1M tokens. Priority: $2.50/$20.00. Cached input: $0.125 (standard), $0.25 (priority)',
232
+ },
233
+ {
234
+ id: 'gpt-5-chat-latest',
235
+ name: 'GPT-5 Chat',
236
+ provider: 'OpenAI',
237
+ available: true,
238
+ maxTokens: 128000,
239
+ contextLength: 128000,
240
+ pricing: { input: 1.25, output: 10.0 },
241
+ capabilities: ['text', 'chat', 'reasoning', 'analysis'],
242
+ category: 'text',
243
+ isLatest: true,
244
+ notes: 'GPT-5 model used in ChatGPT. Standard: $1.25/$10.00 per 1M tokens. Cached input: $0.125',
245
+ },
246
+ // === GPT-4.1 Series (Latest) ===
247
+ {
248
+ id: 'gpt-4.1',
249
+ name: 'GPT-4.1',
250
+ provider: 'OpenAI',
251
+ available: true,
252
+ maxTokens: 128000,
253
+ contextLength: 128000,
254
+ pricing: { input: 2.0, output: 8.0 },
255
+ capabilities: ['text', 'analysis', 'enhanced'],
256
+ category: 'text',
257
+ isLatest: true,
258
+ notes: 'Smartest non-reasoning model. Standard: $2.00/$8.00 per 1M tokens. Batch: $1.00/$4.00. Priority: $3.50/$14.00. Cached input: $0.50 (standard), $0.875 (priority). Fine-tuning: $25.00 training, $3.00/$0.75/$12.00 (standard), $1.50/$0.50/$6.00 (batch)',
259
+ },
260
+ {
261
+ id: 'gpt-4.1-mini',
262
+ name: 'GPT-4.1 mini',
263
+ provider: 'OpenAI',
264
+ available: true,
265
+ maxTokens: 128000,
266
+ contextLength: 128000,
267
+ pricing: { input: 0.4, output: 1.6 },
268
+ capabilities: ['text', 'analysis', 'efficient'],
269
+ category: 'text',
270
+ isLatest: true,
271
+ notes: 'Smaller, faster version of GPT-4.1. Standard: $0.40/$1.60 per 1M tokens. Batch: $0.20/$0.80. Priority: $0.70/$2.80. Cached input: $0.10 (standard), $0.175 (priority). Fine-tuning: $5.00 training, $0.80/$0.20/$3.20 (standard), $0.40/$0.10/$1.60 (batch)',
272
+ },
273
+ {
274
+ id: 'gpt-4.1-nano',
275
+ name: 'GPT-4.1 nano',
276
+ provider: 'OpenAI',
277
+ available: true,
278
+ maxTokens: 128000,
279
+ contextLength: 128000,
280
+ pricing: { input: 0.1, output: 0.4 },
281
+ capabilities: ['text', 'fast', 'cost-effective'],
282
+ category: 'text',
283
+ isLatest: true,
284
+ notes: 'Fastest, most cost-efficient version of GPT-4.1. Standard: $0.10/$0.40 per 1M tokens. Batch: $0.05/$0.20. Priority: $0.20/$0.80. Cached input: $0.025 (standard), $0.05 (priority). Fine-tuning: $1.50 training, $0.20/$0.05/$0.80 (standard), $0.10/$0.025/$0.40 (batch)',
285
+ },
286
+ // === GPT-4o Series (Latest) ===
287
+ {
288
+ id: 'gpt-4o-2024-08-06',
289
+ name: 'GPT-4o',
290
+ provider: 'OpenAI',
291
+ available: true,
292
+ maxTokens: 128000,
293
+ contextLength: 128000,
294
+ pricing: { input: 2.5, output: 10.0 },
295
+ capabilities: ['text', 'vision', 'multimodal', 'analysis'],
296
+ category: 'multimodal',
297
+ isLatest: true,
298
+ notes: 'Latest GPT-4o model with multimodal capabilities. Standard: $2.50/$10.00 per 1M tokens. Batch: $1.25/$5.00. Priority: $4.25/$17.00. Cached input: $1.25 (standard), $2.125 (priority). Fine-tuning: $25.00 training, $3.75/$1.875/$15.00 (standard), $2.225/$0.90/$12.50 (batch)',
299
+ },
300
+ {
301
+ id: 'gpt-4o-2024-05-13',
302
+ name: 'GPT-4o (2024-05-13)',
303
+ provider: 'OpenAI',
304
+ available: true,
305
+ maxTokens: 128000,
306
+ contextLength: 128000,
307
+ pricing: { input: 5.0, output: 15.0 },
308
+ capabilities: ['text', 'vision', 'multimodal'],
309
+ category: 'multimodal',
310
+ isLatest: false,
311
+ notes: 'GPT-4o model from May 2024. Standard: $5.00/$15.00 per 1M tokens. Priority: $8.75/$26.25',
312
+ },
313
+ {
314
+ id: 'gpt-4o-audio-preview',
315
+ name: 'GPT-4o Audio Preview',
316
+ provider: 'OpenAI',
317
+ available: true,
318
+ maxTokens: 128000,
319
+ contextLength: 128000,
320
+ pricing: { input: 2.5, output: 10.0 },
321
+ capabilities: ['text', 'audio', 'multimodal'],
322
+ category: 'audio',
323
+ isLatest: true,
324
+ notes: 'GPT-4o with audio input capabilities. Text: $2.50/$10.00 per 1M tokens. Audio: $40.00/$80.00 per 1M tokens',
325
+ },
326
+ {
327
+ id: 'gpt-4o-realtime-preview',
328
+ name: 'GPT-4o Realtime Preview',
329
+ provider: 'OpenAI',
330
+ available: true,
331
+ maxTokens: 128000,
332
+ contextLength: 128000,
333
+ pricing: { input: 5.0, output: 20.0 },
334
+ capabilities: ['text', 'realtime', 'multimodal'],
335
+ category: 'realtime',
336
+ isLatest: true,
337
+ notes: 'GPT-4o with realtime processing capabilities. Text: $5.00/$20.00 per 1M tokens. Cached: $2.50. Audio: $40.00/$80.00 per 1M tokens. Image: $5.00/$0.50 per 1M tokens',
338
+ },
339
+ {
340
+ id: 'gpt-4o-mini-2024-07-18',
341
+ name: 'GPT-4o Mini',
342
+ provider: 'OpenAI',
343
+ available: true,
344
+ maxTokens: 128000,
345
+ contextLength: 128000,
346
+ pricing: { input: 0.15, output: 0.6 },
347
+ capabilities: ['text', 'vision', 'multimodal', 'efficient'],
348
+ category: 'multimodal',
349
+ isLatest: true,
350
+ notes: 'Efficient GPT-4o variant with multimodal capabilities. Standard: $0.15/$0.60 per 1M tokens. Batch: $0.075/$0.30. Priority: $0.25/$1.00. Cached input: $0.075 (standard), $0.125 (priority). Fine-tuning: $3.00 training, $0.30/$0.15/$1.20 (standard), $0.15/$0.075/$0.60 (batch)',
351
+ },
352
+ {
353
+ id: 'gpt-4o-mini-audio-preview',
354
+ name: 'GPT-4o Mini Audio Preview',
355
+ provider: 'OpenAI',
356
+ available: true,
357
+ maxTokens: 128000,
358
+ contextLength: 128000,
359
+ pricing: { input: 0.15, output: 0.6 },
360
+ capabilities: ['text', 'audio', 'efficient'],
361
+ category: 'audio',
362
+ isLatest: true,
363
+ notes: 'GPT-4o Mini with audio input capabilities. Text: $0.15/$0.60 per 1M tokens. Audio: $10.00/$20.00 per 1M tokens',
364
+ },
365
+ {
366
+ id: 'gpt-4o-mini-realtime-preview',
367
+ name: 'GPT-4o Mini Realtime Preview',
368
+ provider: 'OpenAI',
369
+ available: true,
370
+ maxTokens: 128000,
371
+ contextLength: 128000,
372
+ pricing: { input: 0.6, output: 2.4 },
373
+ capabilities: ['text', 'realtime', 'efficient'],
374
+ category: 'realtime',
375
+ isLatest: true,
376
+ notes: 'GPT-4o Mini with realtime processing capabilities. Text: $0.60/$2.40 per 1M tokens. Cached: $0.30. Audio: $10.00/$20.00 per 1M tokens. Image: $0.80/$0.08 per 1M tokens',
377
+ },
378
+ {
379
+ id: 'gpt-4o-mini',
380
+ name: 'GPT-4o mini',
381
+ provider: 'OpenAI',
382
+ available: true,
383
+ maxTokens: 128000,
384
+ contextLength: 128000,
385
+ pricing: { input: 0.15, output: 0.6 },
386
+ capabilities: ['text', 'vision', 'multimodal'],
387
+ category: 'multimodal',
388
+ isLatest: false,
389
+ notes: 'Fast, affordable small model for focused tasks. Standard: $0.15/$0.60 per 1M tokens. Batch: $0.075/$0.30',
390
+ },
391
+ {
392
+ id: 'gpt-4o',
393
+ name: 'GPT-4o',
394
+ provider: 'OpenAI',
395
+ available: true,
396
+ maxTokens: 128000,
397
+ contextLength: 128000,
398
+ pricing: { input: 2.5, output: 10.0 },
399
+ capabilities: ['text', 'vision', 'multimodal'],
400
+ category: 'multimodal',
401
+ isLatest: false,
402
+ notes: 'Fast, intelligent, flexible GPT model. Standard: $2.50/$10.00 per 1M tokens. Batch: $1.25/$5.00',
403
+ },
404
+ {
405
+ id: 'gpt-4-turbo',
406
+ name: 'GPT-4 Turbo',
407
+ provider: 'OpenAI',
408
+ available: true,
409
+ maxTokens: 128000,
410
+ contextLength: 128000,
411
+ pricing: { input: 10.0, output: 30.0 },
412
+ capabilities: ['text', 'vision', 'multimodal'],
413
+ category: 'multimodal',
414
+ isLatest: false,
415
+ notes: 'Legacy GPT-4 Turbo model. Standard: $10.00/$30.00 per 1M tokens. Batch: $5.00/$15.00',
416
+ },
417
+ {
418
+ id: 'gpt-4',
419
+ name: 'GPT-4',
420
+ provider: 'OpenAI',
421
+ available: true,
422
+ maxTokens: 8192,
423
+ contextLength: 8192,
424
+ pricing: { input: 30.0, output: 60.0 },
425
+ capabilities: ['text'],
426
+ category: 'text',
427
+ isLatest: false,
428
+ notes: 'Legacy GPT-4 base model. Standard: $30.00/$60.00 per 1M tokens. Batch: $15.00/$30.00',
429
+ },
430
+ {
431
+ id: 'gpt-3.5-turbo',
432
+ name: 'GPT-3.5 Turbo',
433
+ provider: 'OpenAI',
434
+ available: true,
435
+ maxTokens: 16385,
436
+ contextLength: 16385,
437
+ pricing: { input: 0.5, output: 1.5 },
438
+ capabilities: ['text'],
439
+ category: 'text',
440
+ isLatest: false,
441
+ notes: 'Legacy GPT-3.5 Turbo model. Standard: $0.50/$1.50 per 1M tokens. Batch: $0.25/$0.75. Fine-tuning: $8.00 training, $3.00/$6.00 (standard), $1.50/$3.00 (batch)',
442
+ },
443
+ // === Audio and Realtime Models ===
444
+ {
445
+ id: 'gpt-realtime',
446
+ name: 'gpt-realtime',
447
+ provider: 'OpenAI',
448
+ available: true,
449
+ maxTokens: 128000,
450
+ contextLength: 128000,
451
+ pricing: { input: 4.0, output: 16.0 },
452
+ capabilities: ['text', 'audio', 'realtime', 'multimodal'],
453
+ category: 'realtime',
454
+ isLatest: true,
455
+ notes: 'Model capable of realtime text and audio inputs and outputs. Text: $4.00/$16.00 per 1M tokens (standard). Cached: $0.40. Audio: $32.00/$64.00 per 1M tokens. Image: $5.00/$0.50 per 1M tokens',
456
+ },
457
+ {
458
+ id: 'gpt-realtime-mini',
459
+ name: 'gpt-realtime-mini',
460
+ provider: 'OpenAI',
461
+ available: true,
462
+ maxTokens: 128000,
463
+ contextLength: 128000,
464
+ pricing: { input: 0.6, output: 2.4 },
465
+ capabilities: ['text', 'audio', 'realtime', 'efficient'],
466
+ category: 'realtime',
467
+ isLatest: true,
468
+ notes: 'A cost-efficient version of GPT Realtime. Text: $0.60/$2.40 per 1M tokens (standard). Cached: $0.06. Audio: $10.00/$20.00 per 1M tokens. Image: $0.80/$0.08 per 1M tokens',
469
+ },
470
+ {
471
+ id: 'gpt-audio',
472
+ name: 'gpt-audio',
473
+ provider: 'OpenAI',
474
+ available: true,
475
+ maxTokens: 128000,
476
+ contextLength: 128000,
477
+ pricing: { input: 2.5, output: 10.0 },
478
+ capabilities: ['text', 'audio', 'multimodal'],
479
+ category: 'audio',
480
+ isLatest: true,
481
+ notes: 'For audio inputs and outputs with Chat Completions API. Text: $2.50/$10.00 per 1M tokens. Audio: $32.00/$64.00 per 1M tokens',
482
+ },
483
+ {
484
+ id: 'gpt-audio-mini',
485
+ name: 'gpt-audio-mini',
486
+ provider: 'OpenAI',
487
+ available: true,
488
+ maxTokens: 128000,
489
+ contextLength: 128000,
490
+ pricing: { input: 0.6, output: 2.4 },
491
+ capabilities: ['text', 'audio', 'efficient'],
492
+ category: 'audio',
493
+ isLatest: true,
494
+ notes: 'A cost-efficient version of GPT Audio. Text: $0.60/$2.40 per 1M tokens. Audio: $10.00/$20.00 per 1M tokens',
495
+ },
496
+ // === Transcription Models ===
497
+ {
498
+ id: 'gpt-4o-transcribe',
499
+ name: 'GPT-4o Transcribe',
500
+ provider: 'OpenAI',
501
+ available: true,
502
+ maxTokens: 0,
503
+ contextLength: 0,
504
+ pricing: { input: 2.5, output: 10.0 },
505
+ capabilities: ['audio', 'transcription', 'speech-to-text'],
506
+ category: 'audio',
507
+ isLatest: true,
508
+ notes: 'Speech-to-text model powered by GPT-4o. Text tokens: $2.50/$10.00 per 1M tokens. Audio tokens: $6.00 per 1M tokens. Estimated cost: $0.006 per minute',
509
+ },
510
+ {
511
+ id: 'gpt-4o-transcribe-diarize',
512
+ name: 'GPT-4o Transcribe Diarize',
513
+ provider: 'OpenAI',
514
+ available: true,
515
+ maxTokens: 0,
516
+ contextLength: 0,
517
+ pricing: { input: 2.5, output: 10.0 },
518
+ capabilities: ['audio', 'transcription', 'speech-to-text', 'diarization'],
519
+ category: 'audio',
520
+ isLatest: true,
521
+ notes: "Transcription model that identifies who's speaking when. Text tokens: $2.50/$10.00 per 1M tokens. Audio tokens: $6.00 per 1M tokens. Estimated cost: $0.006 per minute",
522
+ },
523
+ {
524
+ id: 'gpt-4o-mini-transcribe',
525
+ name: 'GPT-4o mini Transcribe',
526
+ provider: 'OpenAI',
527
+ available: true,
528
+ maxTokens: 0,
529
+ contextLength: 0,
530
+ pricing: { input: 1.25, output: 5.0 },
531
+ capabilities: ['audio', 'transcription', 'speech-to-text', 'efficient'],
532
+ category: 'audio',
533
+ isLatest: true,
534
+ notes: 'Speech-to-text model powered by GPT-4o mini. Text tokens: $1.25/$5.00 per 1M tokens. Audio tokens: $3.00 per 1M tokens. Estimated cost: $0.003 per minute',
535
+ },
536
+ {
537
+ id: 'whisper-1',
538
+ name: 'Whisper',
539
+ provider: 'OpenAI',
540
+ available: true,
541
+ maxTokens: 0,
542
+ contextLength: 0,
543
+ pricing: { input: 0.006, output: 0.006 },
544
+ capabilities: [
545
+ 'audio',
546
+ 'transcription',
547
+ 'speech-to-text',
548
+ 'general-purpose',
549
+ ],
550
+ category: 'audio',
551
+ isLatest: true,
552
+ notes: 'General-purpose speech recognition model. Priced at $0.006 per minute',
553
+ },
554
+ // === Text-to-Speech Models ===
555
+ {
556
+ id: 'gpt-4o-mini-tts',
557
+ name: 'GPT-4o mini TTS',
558
+ provider: 'OpenAI',
559
+ available: true,
560
+ maxTokens: 0,
561
+ contextLength: 0,
562
+ pricing: { input: 0.6, output: 12.0 },
563
+ capabilities: ['audio', 'text-to-speech', 'tts'],
564
+ category: 'audio',
565
+ isLatest: true,
566
+ notes: 'Text-to-speech model powered by GPT-4o mini. Text tokens: $0.60 per 1M tokens. Audio tokens: $12.00 per 1M tokens. Estimated cost: $0.015 per minute',
567
+ },
568
+ {
569
+ id: 'tts-1',
570
+ name: 'TTS-1',
571
+ provider: 'OpenAI',
572
+ available: true,
573
+ maxTokens: 0,
574
+ contextLength: 0,
575
+ pricing: { input: 0.015, output: 0.015 },
576
+ capabilities: ['audio', 'text-to-speech', 'tts', 'fast'],
577
+ category: 'audio',
578
+ isLatest: true,
579
+ notes: 'Text-to-speech model optimized for speed. Priced at $15.00 per 1M characters',
580
+ },
581
+ {
582
+ id: 'tts-1-hd',
583
+ name: 'TTS-1 HD',
584
+ provider: 'OpenAI',
585
+ available: true,
586
+ maxTokens: 0,
587
+ contextLength: 0,
588
+ pricing: { input: 0.03, output: 0.03 },
589
+ capabilities: ['audio', 'text-to-speech', 'tts', 'high-quality'],
590
+ category: 'audio',
591
+ isLatest: true,
592
+ notes: 'Text-to-speech model optimized for quality. Priced at $30.00 per 1M characters',
593
+ },
594
+ // === Open-Weight Models ===
595
+ {
596
+ id: 'gpt-oss-120b',
597
+ name: 'gpt-oss-120b',
598
+ provider: 'OpenAI',
599
+ available: true,
600
+ maxTokens: 131072,
601
+ contextLength: 131072,
602
+ pricing: { input: 2.0, output: 8.0 },
603
+ capabilities: ['text', 'open-source', 'open-weight'],
604
+ category: 'text',
605
+ isLatest: true,
606
+ notes: 'Most powerful open-weight model, fits into an H100 GPU. Licensed under Apache 2.0',
607
+ },
608
+ {
609
+ id: 'gpt-oss-20b',
610
+ name: 'gpt-oss-20b',
611
+ provider: 'OpenAI',
612
+ available: true,
613
+ maxTokens: 131072,
614
+ contextLength: 131072,
615
+ pricing: { input: 0.5, output: 2.0 },
616
+ capabilities: ['text', 'open-source', 'open-weight', 'low-latency'],
617
+ category: 'text',
618
+ isLatest: true,
619
+ notes: 'Medium-sized open-weight model for low latency. Licensed under Apache 2.0',
620
+ },
621
+ // === Specialized Models ===
622
+ {
623
+ id: 'codex-mini-latest',
624
+ name: 'codex-mini-latest',
625
+ provider: 'OpenAI',
626
+ available: true,
627
+ maxTokens: 128000,
628
+ contextLength: 128000,
629
+ pricing: { input: 1.5, output: 6.0 },
630
+ capabilities: ['code', 'programming', 'reasoning'],
631
+ category: 'code',
632
+ isLatest: true,
633
+ notes: 'Fast reasoning model optimized for the Codex CLI. Standard: $1.50/$6.00 per 1M tokens. Cached input: $0.375',
634
+ },
635
+ {
636
+ id: 'omni-moderation-latest',
637
+ name: 'omni-moderation',
638
+ provider: 'OpenAI',
639
+ available: true,
640
+ maxTokens: 32768,
641
+ contextLength: 32768,
642
+ pricing: { input: 0.0, output: 0.0 },
643
+ capabilities: ['moderation', 'text', 'image', 'harmful-content-detection'],
644
+ category: 'moderation',
645
+ isLatest: true,
646
+ notes: 'Identify potentially harmful content in text and images. Made available free of charge. Built-in tools: Code Interpreter ($0.03-$1.92/container based on memory), File search storage ($0.10/GB per day, 1GB free), File search tool calls ($2.50/1k calls), Web search ($10.00/1k calls for reasoning models, $25.00/1k calls for non-reasoning models) + search content tokens billed at model rates',
647
+ },
648
+ {
649
+ id: 'gpt-4o-mini-search-preview-2025-03-11',
650
+ name: 'GPT-4o Mini Search Preview',
651
+ provider: 'OpenAI',
652
+ available: true,
653
+ maxTokens: 128000,
654
+ contextLength: 128000,
655
+ pricing: { input: 0.15, output: 0.6 },
656
+ capabilities: ['text', 'search', 'multimodal'],
657
+ category: 'search',
658
+ isLatest: true,
659
+ notes: 'GPT-4o Mini with search capabilities for enhanced information retrieval. Text: $0.15/$0.60 per 1M tokens. Web search: $10.00/1k calls + search content tokens billed at model rates (non-reasoning models: $25.00/1k calls, search content tokens are free)',
660
+ },
661
+ {
662
+ id: 'gpt-4o-search-preview-2025-03-11',
663
+ name: 'GPT-4o Search Preview',
664
+ provider: 'OpenAI',
665
+ available: true,
666
+ maxTokens: 128000,
667
+ contextLength: 128000,
668
+ pricing: { input: 2.5, output: 10.0 },
669
+ capabilities: ['text', 'search', 'multimodal'],
670
+ category: 'search',
671
+ isLatest: true,
672
+ notes: 'GPT-4o with search capabilities for enhanced information retrieval. Text: $2.50/$10.00 per 1M tokens. Web search: $10.00/1k calls + search content tokens billed at model rates (non-reasoning models: $25.00/1k calls, search content tokens are free)',
673
+ },
674
+ {
675
+ id: 'computer-use-preview-2025-03-11',
676
+ name: 'Computer Use Preview',
677
+ provider: 'OpenAI',
678
+ available: true,
679
+ maxTokens: 128000,
680
+ contextLength: 128000,
681
+ pricing: { input: 3.0, output: 12.0 },
682
+ capabilities: ['text', 'computer-use', 'automation'],
683
+ category: 'computer-use',
684
+ isLatest: true,
685
+ notes: 'Model optimized for computer use and automation tasks. Standard: $3.00/$12.00 per 1M tokens. Batch: $1.50/$6.00',
686
+ },
687
+ // === Embedding Models ===
688
+ {
689
+ id: 'text-embedding-3-small',
690
+ name: 'Text Embedding 3 Small',
691
+ provider: 'OpenAI',
692
+ available: true,
693
+ maxTokens: 8191,
694
+ contextLength: 8191,
695
+ pricing: { input: 0.02, output: 0.0 },
696
+ capabilities: ['embedding', 'semantic-search'],
697
+ category: 'embedding',
698
+ isLatest: true,
699
+ notes: 'Latest small embedding model for semantic search and analysis. Online: $0.02 per 1M tokens. Batch: $0.01 per 1M tokens',
700
+ },
701
+ {
702
+ id: 'text-embedding-3-large',
703
+ name: 'Text Embedding 3 Large',
704
+ provider: 'OpenAI',
705
+ available: true,
706
+ maxTokens: 8191,
707
+ contextLength: 8191,
708
+ pricing: { input: 0.13, output: 0.0 },
709
+ capabilities: ['embedding', 'semantic-search', 'high-quality'],
710
+ category: 'embedding',
711
+ isLatest: true,
712
+ notes: 'Latest large embedding model with highest quality for semantic search. Online: $0.13 per 1M tokens. Batch: $0.065 per 1M tokens',
713
+ },
714
+ {
715
+ id: 'text-embedding-ada-002',
716
+ name: 'Text Embedding Ada 002',
717
+ provider: 'OpenAI',
718
+ available: true,
719
+ maxTokens: 8191,
720
+ contextLength: 8191,
721
+ pricing: { input: 0.1, output: 0.0 },
722
+ capabilities: ['embedding', 'semantic-search'],
723
+ category: 'embedding',
724
+ isLatest: false,
725
+ notes: 'Previous generation embedding model for semantic search. Online: $0.10 per 1M tokens. Batch: $0.05 per 1M tokens',
726
+ },
727
+ // === ChatGPT Models ===
728
+ {
729
+ id: 'chatgpt-4o-latest',
730
+ name: 'ChatGPT-4o',
731
+ provider: 'OpenAI',
732
+ available: true,
733
+ maxTokens: 128000,
734
+ contextLength: 128000,
735
+ pricing: { input: 5.0, output: 15.0 },
736
+ capabilities: ['text', 'vision', 'multimodal', 'chat'],
737
+ category: 'multimodal',
738
+ isLatest: false,
739
+ notes: 'GPT-4o model used in ChatGPT (not recommended for API use). Standard: $5.00/$15.00 per 1M tokens. Batch: $5.00/$15.00',
740
+ },
741
+ // === Legacy and Deprecated Models ===
742
+ {
743
+ id: 'gpt-4.5-preview',
744
+ name: 'GPT-4.5 Preview',
745
+ provider: 'OpenAI',
746
+ available: true,
747
+ maxTokens: 128000,
748
+ contextLength: 128000,
749
+ pricing: { input: 10.0, output: 30.0 },
750
+ capabilities: ['text', 'vision', 'multimodal'],
751
+ category: 'multimodal',
752
+ isLatest: false,
753
+ notes: 'Deprecated - Large model. Standard: $10.00/$30.00 per 1M tokens. Batch: $5.00/$15.00',
754
+ },
755
+ {
756
+ id: 'o1-preview',
757
+ name: 'o1 Preview',
758
+ provider: 'OpenAI',
759
+ available: true,
760
+ maxTokens: 128000,
761
+ contextLength: 128000,
762
+ pricing: { input: 15.0, output: 60.0 },
763
+ capabilities: ['text', 'reasoning', 'analysis'],
764
+ category: 'reasoning',
765
+ isLatest: false,
766
+ notes: 'Deprecated - Preview of our first o-series reasoning model. Standard: $15.00/$60.00 per 1M tokens. Batch: $7.50/$30.00',
767
+ },
768
+ {
769
+ id: 'text-moderation-latest',
770
+ name: 'text-moderation',
771
+ provider: 'OpenAI',
772
+ available: true,
773
+ maxTokens: 32768,
774
+ contextLength: 32768,
775
+ pricing: { input: 0.1, output: 0.1 },
776
+ capabilities: ['moderation', 'text', 'harmful-content-detection'],
777
+ category: 'moderation',
778
+ isLatest: false,
779
+ notes: 'Deprecated - Previous generation text-only moderation model',
780
+ },
781
+ {
782
+ id: 'text-moderation-stable',
783
+ name: 'text-moderation-stable',
784
+ provider: 'OpenAI',
785
+ available: true,
786
+ maxTokens: 32768,
787
+ contextLength: 32768,
788
+ pricing: { input: 0.1, output: 0.1 },
789
+ capabilities: ['moderation', 'text', 'harmful-content-detection'],
790
+ category: 'moderation',
791
+ isLatest: false,
792
+ notes: 'Deprecated - Previous generation text-only moderation model',
793
+ },
794
+ {
795
+ id: 'babbage-002',
796
+ name: 'babbage-002',
797
+ provider: 'OpenAI',
798
+ available: true,
799
+ maxTokens: 16384,
800
+ contextLength: 16384,
801
+ pricing: { input: 0.4, output: 0.4 },
802
+ capabilities: ['text'],
803
+ category: 'text',
804
+ isLatest: false,
805
+ notes: 'Deprecated - Replacement for the GPT-3 ada and babbage base models. Standard: $0.40/$0.40 per 1M tokens. Batch: $0.20/$0.20. Fine-tuning: $0.40 training, $1.60/$1.60 (standard), $0.80/$0.90 (batch)',
806
+ },
807
+ {
808
+ id: 'davinci-002',
809
+ name: 'davinci-002',
810
+ provider: 'OpenAI',
811
+ available: true,
812
+ maxTokens: 16384,
813
+ contextLength: 16384,
814
+ pricing: { input: 2.0, output: 2.0 },
815
+ capabilities: ['text'],
816
+ category: 'text',
817
+ isLatest: false,
818
+ notes: 'Deprecated - Replacement for the GPT-3 curie and davinci base models. Standard: $2.00/$2.00 per 1M tokens. Batch: $1.00/$1.00. Fine-tuning: $6.00 training, $12.00/$12.00 (standard), $6.00/$6.00 (batch)',
819
+ },
820
+ {
821
+ id: 'gpt-4-turbo-2024-04-09',
822
+ name: 'GPT-4 Turbo (2024-04-09)',
823
+ provider: 'OpenAI',
824
+ available: true,
825
+ maxTokens: 128000,
826
+ contextLength: 128000,
827
+ pricing: { input: 10.0, output: 30.0 },
828
+ capabilities: ['text', 'vision', 'multimodal'],
829
+ category: 'multimodal',
830
+ isLatest: false,
831
+ notes: 'Legacy GPT-4 Turbo model. Standard: $10.00/$30.00 per 1M tokens. Batch: $5.00/$15.00',
832
+ },
833
+ {
834
+ id: 'gpt-4-0125-preview',
835
+ name: 'GPT-4 (0125 Preview)',
836
+ provider: 'OpenAI',
837
+ available: true,
838
+ maxTokens: 128000,
839
+ contextLength: 128000,
840
+ pricing: { input: 10.0, output: 30.0 },
841
+ capabilities: ['text', 'vision', 'multimodal'],
842
+ category: 'multimodal',
843
+ isLatest: false,
844
+ notes: 'Legacy GPT-4 preview model. Standard: $10.00/$30.00 per 1M tokens. Batch: $5.00/$15.00',
845
+ },
846
+ {
847
+ id: 'gpt-4-1106-preview',
848
+ name: 'GPT-4 (1106 Preview)',
849
+ provider: 'OpenAI',
850
+ available: true,
851
+ maxTokens: 128000,
852
+ contextLength: 128000,
853
+ pricing: { input: 10.0, output: 30.0 },
854
+ capabilities: ['text', 'vision', 'multimodal'],
855
+ category: 'multimodal',
856
+ isLatest: false,
857
+ notes: 'Legacy GPT-4 preview model. Standard: $10.00/$30.00 per 1M tokens. Batch: $5.00/$15.00',
34
858
  },
35
859
  {
36
- id: 'gpt-5-mini',
37
- name: 'GPT-5 mini',
860
+ id: 'gpt-4-1106-vision-preview',
861
+ name: 'GPT-4 Vision (1106 Preview)',
38
862
  provider: 'OpenAI',
39
863
  available: true,
40
864
  maxTokens: 128000,
41
865
  contextLength: 128000,
42
- pricing: { input: 0.25, output: 2.0 },
43
- capabilities: ['text', 'reasoning', 'analysis', 'efficient'],
44
- category: 'text',
45
- isLatest: true,
46
- notes: 'A faster, cost-efficient version of GPT-5 for well-defined tasks',
866
+ pricing: { input: 10.0, output: 30.0 },
867
+ capabilities: ['text', 'vision', 'multimodal'],
868
+ category: 'multimodal',
869
+ isLatest: false,
870
+ notes: 'Legacy GPT-4 vision preview model. Standard: $10.00/$30.00 per 1M tokens. Batch: $5.00/$15.00',
47
871
  },
48
872
  {
49
- id: 'gpt-5-nano',
50
- name: 'GPT-5 nano',
873
+ id: 'gpt-4-0613',
874
+ name: 'GPT-4 (0613)',
51
875
  provider: 'OpenAI',
52
876
  available: true,
53
- maxTokens: 128000,
54
- contextLength: 128000,
55
- pricing: { input: 0.05, output: 0.4 },
56
- capabilities: ['text', 'fast', 'cost-effective'],
877
+ maxTokens: 8192,
878
+ contextLength: 8192,
879
+ pricing: { input: 30.0, output: 60.0 },
880
+ capabilities: ['text'],
57
881
  category: 'text',
58
- isLatest: true,
59
- notes: 'Fastest, most cost-efficient version of GPT-5',
882
+ isLatest: false,
883
+ notes: 'Legacy GPT-4 model. Standard: $30.00/$60.00 per 1M tokens. Batch: $15.00/$30.00',
60
884
  },
61
885
  {
62
- id: 'gpt-5-pro',
63
- name: 'GPT-5 pro',
886
+ id: 'gpt-4-0314',
887
+ name: 'GPT-4 (0314)',
64
888
  provider: 'OpenAI',
65
889
  available: true,
66
- maxTokens: 128000,
67
- contextLength: 128000,
68
- pricing: { input: 2.5, output: 20.0 },
69
- capabilities: [
70
- 'text',
71
- 'reasoning',
72
- 'analysis',
73
- 'coding',
74
- 'agents',
75
- 'premium',
76
- ],
890
+ maxTokens: 8192,
891
+ contextLength: 8192,
892
+ pricing: { input: 30.0, output: 60.0 },
893
+ capabilities: ['text'],
77
894
  category: 'text',
78
- isLatest: true,
79
- notes: 'Version of GPT-5 that produces smarter and more precise responses',
895
+ isLatest: false,
896
+ notes: 'Legacy GPT-4 model. Standard: $30.00/$60.00 per 1M tokens. Batch: $15.00/$30.00',
80
897
  },
81
898
  {
82
- id: 'gpt-5-codex',
83
- name: 'GPT-5-Codex',
899
+ id: 'gpt-4-32k',
900
+ name: 'GPT-4 32K',
84
901
  provider: 'OpenAI',
85
902
  available: true,
86
- maxTokens: 128000,
87
- contextLength: 128000,
88
- pricing: { input: 1.25, output: 10.0 },
89
- capabilities: ['code', 'programming', 'agents', 'coding'],
90
- category: 'code',
91
- isLatest: true,
92
- notes: 'A version of GPT-5 optimized for agentic coding in Codex',
903
+ maxTokens: 32768,
904
+ contextLength: 32768,
905
+ pricing: { input: 60.0, output: 120.0 },
906
+ capabilities: ['text'],
907
+ category: 'text',
908
+ isLatest: false,
909
+ notes: 'Legacy GPT-4 model with 32K context. Standard: $60.00/$120.00 per 1M tokens. Batch: $30.00/$60.00',
93
910
  },
94
911
  {
95
- id: 'gpt-5-chat-latest',
96
- name: 'GPT-5 Chat',
912
+ id: 'gpt-4-turbo',
913
+ name: 'GPT-4 Turbo',
97
914
  provider: 'OpenAI',
98
915
  available: true,
99
916
  maxTokens: 128000,
100
917
  contextLength: 128000,
101
- pricing: { input: 1.25, output: 10.0 },
102
- capabilities: ['text', 'chat', 'reasoning', 'analysis'],
103
- category: 'text',
104
- isLatest: true,
105
- notes: 'GPT-5 model used in ChatGPT (not recommended for API use)',
918
+ pricing: { input: 10.0, output: 30.0 },
919
+ capabilities: ['text', 'vision', 'multimodal'],
920
+ category: 'multimodal',
921
+ isLatest: false,
922
+ notes: 'Legacy GPT-4 Turbo model. Standard: $10.00/$30.00 per 1M tokens. Batch: $5.00/$15.00',
106
923
  },
107
- // === GPT-4o Models ===
108
924
  {
109
- id: 'gpt-4o-mini-2024-07-18',
110
- name: 'GPT-4o Mini',
925
+ id: 'gpt-3.5-turbo-0125',
926
+ name: 'GPT-3.5 Turbo (0125)',
111
927
  provider: 'OpenAI',
112
928
  available: true,
113
- maxTokens: 128000,
114
- contextLength: 128000,
115
- pricing: { input: 0.15, output: 0.6 },
116
- capabilities: ['text', 'vision', 'multimodal'],
929
+ maxTokens: 16385,
930
+ contextLength: 16385,
931
+ pricing: { input: 0.5, output: 1.5 },
932
+ capabilities: ['text'],
117
933
  category: 'text',
118
- isLatest: true,
119
- notes: 'Latest GPT-4o Mini model with vision capabilities',
934
+ isLatest: false,
935
+ notes: 'Legacy GPT-3.5 Turbo model. Standard: $0.50/$1.50 per 1M tokens. Batch: $0.25/$0.75. Fine-tuning: $8.00 training, $3.00/$6.00 (standard), $1.50/$3.00 (batch)',
120
936
  },
121
937
  {
122
- id: 'gpt-4o',
123
- name: 'GPT-4o',
938
+ id: 'gpt-3.5-turbo-1106',
939
+ name: 'GPT-3.5 Turbo (1106)',
124
940
  provider: 'OpenAI',
125
941
  available: true,
126
- maxTokens: 128000,
127
- contextLength: 128000,
128
- pricing: { input: 2.5, output: 10.0 },
129
- capabilities: ['text', 'vision', 'multimodal'],
942
+ maxTokens: 16385,
943
+ contextLength: 16385,
944
+ pricing: { input: 1.0, output: 2.0 },
945
+ capabilities: ['text'],
130
946
  category: 'text',
131
- isLatest: true,
132
- notes: 'Latest GPT-4o model with enhanced capabilities',
947
+ isLatest: false,
948
+ notes: 'Legacy GPT-3.5 Turbo model. Standard: $1.00/$2.00 per 1M tokens. Batch: $1.00/$2.00',
133
949
  },
134
950
  {
135
- id: 'gpt-4o-mini',
136
- name: 'GPT-4o Mini',
951
+ id: 'gpt-3.5-turbo-0613',
952
+ name: 'GPT-3.5 Turbo (0613)',
137
953
  provider: 'OpenAI',
138
954
  available: true,
139
- maxTokens: 128000,
140
- contextLength: 128000,
141
- pricing: { input: 0.15, output: 0.6 },
142
- capabilities: ['text', 'vision', 'multimodal'],
955
+ maxTokens: 16385,
956
+ contextLength: 16385,
957
+ pricing: { input: 1.5, output: 2.0 },
958
+ capabilities: ['text'],
143
959
  category: 'text',
144
- isLatest: true,
145
- notes: 'GPT-4o Mini model with vision capabilities',
960
+ isLatest: false,
961
+ notes: 'Legacy GPT-3.5 Turbo model. Standard: $1.50/$2.00 per 1M tokens. Batch: $1.50/$2.00',
146
962
  },
147
963
  {
148
- id: 'gpt-4-turbo',
149
- name: 'GPT-4 Turbo',
964
+ id: 'gpt-3.5-0301',
965
+ name: 'GPT-3.5 (0301)',
150
966
  provider: 'OpenAI',
151
967
  available: true,
152
- maxTokens: 128000,
153
- contextLength: 128000,
154
- pricing: { input: 10.0, output: 30.0 },
155
- capabilities: ['text', 'vision', 'multimodal'],
968
+ maxTokens: 16385,
969
+ contextLength: 16385,
970
+ pricing: { input: 1.5, output: 2.0 },
971
+ capabilities: ['text'],
156
972
  category: 'text',
157
973
  isLatest: false,
158
- notes: 'GPT-4 Turbo with vision capabilities',
974
+ notes: 'Legacy GPT-3.5 model. Standard: $1.50/$2.00 per 1M tokens. Batch: $1.50/$2.00',
159
975
  },
160
976
  {
161
- id: 'gpt-4',
162
- name: 'GPT-4',
977
+ id: 'gpt-3.5-turbo-16k-0613',
978
+ name: 'GPT-3.5 Turbo 16K (0613)',
163
979
  provider: 'OpenAI',
164
980
  available: true,
165
- maxTokens: 8192,
166
- contextLength: 8192,
167
- pricing: { input: 30.0, output: 60.0 },
981
+ maxTokens: 16385,
982
+ contextLength: 16385,
983
+ pricing: { input: 3.0, output: 4.0 },
168
984
  capabilities: ['text'],
169
985
  category: 'text',
170
986
  isLatest: false,
171
- notes: 'GPT-4 base model',
987
+ notes: 'Legacy GPT-3.5 Turbo model with 16K context. Standard: $3.00/$4.00 per 1M tokens. Batch: $1.50/$2.00',
172
988
  },
173
989
  {
174
- id: 'gpt-3.5-turbo',
175
- name: 'GPT-3.5 Turbo',
990
+ id: 'gpt-3.5-turbo-instruct',
991
+ name: 'GPT-3.5 Turbo Instruct',
176
992
  provider: 'OpenAI',
177
993
  available: true,
178
994
  maxTokens: 16385,
179
995
  contextLength: 16385,
180
- pricing: { input: 0.5, output: 1.5 },
996
+ pricing: { input: 1.5, output: 2.0 },
181
997
  capabilities: ['text'],
182
998
  category: 'text',
183
999
  isLatest: false,
184
- notes: 'GPT-3.5 Turbo model',
1000
+ notes: 'Legacy GPT-3.5 Turbo Instruct model. Standard: $1.50/$2.00 per 1M tokens',
185
1001
  },
186
1002
  // === O-Series Models (Latest) ===
187
1003
  {
@@ -195,7 +1011,7 @@ exports.AVAILABLE_MODELS = [
195
1011
  capabilities: ['text', 'reasoning', 'analysis', 'pro'],
196
1012
  category: 'reasoning',
197
1013
  isLatest: true,
198
- notes: 'Version of o3 with more compute for better responses',
1014
+ notes: 'Version of o3 with more compute for better responses. Standard: $20.00/$80.00 per 1M tokens. Batch: $10.00/$40.00',
199
1015
  },
200
1016
  {
201
1017
  id: 'o3-deep-research',
@@ -208,7 +1024,7 @@ exports.AVAILABLE_MODELS = [
208
1024
  capabilities: ['text', 'research', 'analysis', 'deep'],
209
1025
  category: 'research',
210
1026
  isLatest: true,
211
- notes: 'Our most powerful deep research model',
1027
+ notes: 'Our most powerful deep research model. Standard: $10.00/$40.00 per 1M tokens. Batch: $5.00/$20.00. Cached input: $2.50 (standard)',
212
1028
  },
213
1029
  {
214
1030
  id: 'o4-mini',
@@ -221,7 +1037,7 @@ exports.AVAILABLE_MODELS = [
221
1037
  capabilities: ['text', 'reasoning', 'efficient'],
222
1038
  category: 'reasoning',
223
1039
  isLatest: true,
224
- notes: 'Fast, cost-efficient reasoning model, succeeded by GPT-5 mini',
1040
+ notes: 'Fast, cost-efficient reasoning model, succeeded by GPT-5 mini. Standard: $1.10/$4.40 per 1M tokens. Batch: $0.55/$2.20. Flex: $0.55/$2.20. Priority: $2.00/$8.00. Cached input: $0.275 (standard), $0.138 (flex), $0.50 (priority). Fine-tuning: $100.00/hour training, $4.00/$1.00/$16.00 (standard), $2.00/$0.50/$8.00 (batch), $2.00/$0.50/$8.00 (with data sharing)',
225
1041
  },
226
1042
  {
227
1043
  id: 'o4-mini-deep-research',
@@ -234,7 +1050,7 @@ exports.AVAILABLE_MODELS = [
234
1050
  capabilities: ['text', 'research', 'analysis', 'efficient'],
235
1051
  category: 'research',
236
1052
  isLatest: true,
237
- notes: 'Faster, more affordable deep research model',
1053
+ notes: 'Faster, more affordable deep research model. Standard: $2.00/$8.00 per 1M tokens. Batch: $1.00/$4.00. Cached input: $0.50 (standard)',
238
1054
  },
239
1055
  {
240
1056
  id: 'o3',
@@ -247,7 +1063,7 @@ exports.AVAILABLE_MODELS = [
247
1063
  capabilities: ['text', 'reasoning', 'analysis'],
248
1064
  category: 'reasoning',
249
1065
  isLatest: true,
250
- notes: 'Reasoning model for complex tasks, succeeded by GPT-5',
1066
+ notes: 'Reasoning model for complex tasks, succeeded by GPT-5. Standard: $2.00/$8.00 per 1M tokens. Batch: $1.00/$4.00. Flex: $1.00/$4.00. Priority: $3.50/$14.00. Cached input: $0.50 (standard), $0.25 (flex), $0.875 (priority)',
251
1067
  },
252
1068
  {
253
1069
  id: 'o1-pro',
@@ -260,7 +1076,7 @@ exports.AVAILABLE_MODELS = [
260
1076
  capabilities: ['text', 'reasoning', 'analysis', 'premium'],
261
1077
  category: 'reasoning',
262
1078
  isLatest: true,
263
- notes: 'Version of o1 with more compute for better responses',
1079
+ notes: 'Version of o1 with more compute for better responses. Standard: $150.00/$600.00 per 1M tokens. Batch: $75.00/$300.00',
264
1080
  },
265
1081
  {
266
1082
  id: 'o1',
@@ -273,7 +1089,33 @@ exports.AVAILABLE_MODELS = [
273
1089
  capabilities: ['text', 'reasoning', 'analysis', 'advanced'],
274
1090
  category: 'reasoning',
275
1091
  isLatest: false,
276
- notes: 'Previous full o-series reasoning model',
1092
+ notes: 'Previous full o-series reasoning model. Standard: $15.00/$60.00 per 1M tokens. Batch: $7.50/$30.00. Cached input: $7.50 (standard)',
1093
+ },
1094
+ {
1095
+ id: 'o3-mini',
1096
+ name: 'o3-mini',
1097
+ provider: 'OpenAI',
1098
+ available: true,
1099
+ maxTokens: 128000,
1100
+ contextLength: 128000,
1101
+ pricing: { input: 1.1, output: 4.4 },
1102
+ capabilities: ['text', 'reasoning', 'efficient'],
1103
+ category: 'reasoning',
1104
+ isLatest: false,
1105
+ notes: 'A small model alternative to o3. Standard: $1.10/$4.40 per 1M tokens. Batch: $0.55/$2.20. Cached input: $0.55 (standard)',
1106
+ },
1107
+ {
1108
+ id: 'o1-mini',
1109
+ name: 'o1-mini',
1110
+ provider: 'OpenAI',
1111
+ available: true,
1112
+ maxTokens: 128000,
1113
+ contextLength: 128000,
1114
+ pricing: { input: 1.1, output: 4.4 },
1115
+ capabilities: ['text', 'reasoning', 'efficient'],
1116
+ category: 'reasoning',
1117
+ isLatest: false,
1118
+ notes: 'Deprecated - A small model alternative to o1. Standard: $1.10/$4.40 per 1M tokens. Batch: $0.55/$2.20. Cached input: $0.55 (standard)',
277
1119
  },
278
1120
  // === Video Generation Models ===
279
1121
  {
@@ -283,11 +1125,11 @@ exports.AVAILABLE_MODELS = [
283
1125
  available: true,
284
1126
  maxTokens: 0,
285
1127
  contextLength: 0,
286
- pricing: { input: 0.05, output: 0.05 },
1128
+ pricing: { input: 0.1, output: 0.1 },
287
1129
  capabilities: ['video-generation', 'audio', 'synced-audio'],
288
1130
  category: 'video',
289
1131
  isLatest: true,
290
- notes: 'Flagship video generation with synced audio (priced per second)',
1132
+ notes: 'Flagship video generation with synced audio. $0.10/sec (720p)',
291
1133
  },
292
1134
  {
293
1135
  id: 'sora-2-pro',
@@ -296,257 +1138,233 @@ exports.AVAILABLE_MODELS = [
296
1138
  available: true,
297
1139
  maxTokens: 0,
298
1140
  contextLength: 0,
299
- pricing: { input: 0.1, output: 0.1 },
1141
+ pricing: { input: 0.3, output: 0.3 },
300
1142
  capabilities: ['video-generation', 'audio', 'synced-audio', 'advanced'],
301
1143
  category: 'video',
302
1144
  isLatest: true,
303
- notes: 'Most advanced synced-audio video generation (priced per second)',
1145
+ notes: 'Most advanced synced-audio video generation. $0.30/sec (720p), $0.50/sec (1024p)',
304
1146
  },
305
1147
  // === Image Generation Models ===
306
1148
  {
307
- id: 'gpt-image-1',
308
- name: 'GPT Image 1',
1149
+ id: 'gpt-image-1.5',
1150
+ name: 'GPT Image 1.5',
309
1151
  provider: 'OpenAI',
310
1152
  available: true,
311
1153
  maxTokens: 0,
312
1154
  contextLength: 0,
313
- pricing: { input: 0.04, output: 0.04 },
1155
+ pricing: { input: 5.0, output: 10.0 },
314
1156
  capabilities: ['image-generation', 'text-to-image'],
315
1157
  category: 'image',
316
1158
  isLatest: true,
317
- notes: 'State-of-the-art image generation model',
1159
+ notes: 'Latest state-of-the-art image generation model. Text tokens: $5.00/$10.00 per 1M tokens (standard), $1.25 cached input. Image tokens: $8.00/$32.00 per 1M tokens (standard), $2.00 cached input. Per image: Low $0.009 (1024x1024), $0.013 (1024x1536/1536x1024); Medium $0.034 (1024x1024), $0.05 (1024x1536/1536x1024); High $0.133 (1024x1024), $0.2 (1024x1536/1536x1024). Text output tokens include model reasoning tokens',
318
1160
  },
319
1161
  {
320
- id: 'gpt-image-1-mini',
321
- name: 'gpt-image-1-mini',
1162
+ id: 'chatgpt-image-latest',
1163
+ name: 'ChatGPT Image Latest',
322
1164
  provider: 'OpenAI',
323
1165
  available: true,
324
1166
  maxTokens: 0,
325
1167
  contextLength: 0,
326
- pricing: { input: 0.02, output: 0.02 },
327
- capabilities: ['image-generation', 'text-to-image', 'cost-efficient'],
1168
+ pricing: { input: 5.0, output: 10.0 },
1169
+ capabilities: ['image-generation', 'text-to-image'],
328
1170
  category: 'image',
329
1171
  isLatest: true,
330
- notes: 'A cost-efficient version of GPT Image 1',
331
- },
332
- // === Audio and Realtime Models ===
333
- {
334
- id: 'gpt-realtime',
335
- name: 'gpt-realtime',
336
- provider: 'OpenAI',
337
- available: true,
338
- maxTokens: 128000,
339
- contextLength: 128000,
340
- pricing: { input: 5.0, output: 20.0 },
341
- capabilities: ['text', 'audio', 'realtime', 'multimodal'],
342
- category: 'realtime',
343
- isLatest: true,
344
- notes: 'Model capable of realtime text and audio inputs and outputs',
345
- },
346
- {
347
- id: 'gpt-realtime-mini',
348
- name: 'gpt-realtime-mini',
349
- provider: 'OpenAI',
350
- available: true,
351
- maxTokens: 128000,
352
- contextLength: 128000,
353
- pricing: { input: 0.6, output: 2.4 },
354
- capabilities: ['text', 'audio', 'realtime', 'efficient'],
355
- category: 'realtime',
356
- isLatest: true,
357
- notes: 'A cost-efficient version of GPT Realtime',
358
- },
359
- {
360
- id: 'gpt-audio',
361
- name: 'gpt-audio',
362
- provider: 'OpenAI',
363
- available: true,
364
- maxTokens: 128000,
365
- contextLength: 128000,
366
- pricing: { input: 2.5, output: 10.0 },
367
- capabilities: ['text', 'audio', 'multimodal'],
368
- category: 'audio',
369
- isLatest: true,
370
- notes: 'For audio inputs and outputs with Chat Completions API',
371
- },
372
- {
373
- id: 'gpt-audio-mini',
374
- name: 'gpt-audio-mini',
375
- provider: 'OpenAI',
376
- available: true,
377
- maxTokens: 128000,
378
- contextLength: 128000,
379
- pricing: { input: 0.15, output: 0.6 },
380
- capabilities: ['text', 'audio', 'efficient'],
381
- category: 'audio',
382
- isLatest: true,
383
- notes: 'A cost-efficient version of GPT Audio',
384
- },
385
- // === Transcription Models ===
386
- {
387
- id: 'gpt-4o-transcribe',
388
- name: 'GPT-4o Transcribe',
389
- provider: 'OpenAI',
390
- available: true,
391
- maxTokens: 0,
392
- contextLength: 0,
393
- pricing: { input: 0.15, output: 0.15 },
394
- capabilities: ['audio', 'transcription', 'speech-to-text'],
395
- category: 'audio',
396
- isLatest: true,
397
- notes: 'Speech-to-text model powered by GPT-4o',
1172
+ notes: 'GPT Image model used in ChatGPT. Text tokens: $5.00/$10.00 per 1M tokens (standard), $1.25 cached input. Image tokens: $8.00/$32.00 per 1M tokens (standard), $2.00 cached input. Per image: Low $0.009 (1024x1024), $0.013 (1024x1536/1536x1024); Medium $0.034 (1024x1024), $0.05 (1024x1536/1536x1024); High $0.133 (1024x1024), $0.2 (1024x1536/1536x1024)',
398
1173
  },
399
1174
  {
400
- id: 'gpt-4o-transcribe-diarize',
401
- name: 'GPT-4o Transcribe Diarize',
1175
+ id: 'gpt-image-1',
1176
+ name: 'GPT Image 1',
402
1177
  provider: 'OpenAI',
403
1178
  available: true,
404
1179
  maxTokens: 0,
405
1180
  contextLength: 0,
406
- pricing: { input: 0.2, output: 0.2 },
407
- capabilities: ['audio', 'transcription', 'speech-to-text', 'diarization'],
408
- category: 'audio',
409
- isLatest: true,
410
- notes: "Transcription model that identifies who's speaking when",
1181
+ pricing: { input: 5.0, output: 0.0 },
1182
+ capabilities: ['image-generation', 'text-to-image'],
1183
+ category: 'image',
1184
+ isLatest: false,
1185
+ notes: 'State-of-the-art image generation model. Text tokens: $5.00 per 1M tokens (standard), $1.25 cached input. Image tokens: $10.00/$40.00 per 1M tokens (standard), $2.50 cached input. Per image: Low $0.011 (1024x1024), $0.016 (1024x1536/1536x1024); Medium $0.042 (1024x1024), $0.063 (1024x1536/1536x1024); High $0.167 (1024x1024), $0.25 (1024x1536/1536x1024)',
411
1186
  },
412
1187
  {
413
- id: 'gpt-4o-mini-transcribe',
414
- name: 'GPT-4o mini Transcribe',
1188
+ id: 'gpt-image-1-mini',
1189
+ name: 'gpt-image-1-mini',
415
1190
  provider: 'OpenAI',
416
1191
  available: true,
417
1192
  maxTokens: 0,
418
1193
  contextLength: 0,
419
- pricing: { input: 0.1, output: 0.1 },
420
- capabilities: ['audio', 'transcription', 'speech-to-text', 'efficient'],
421
- category: 'audio',
422
- isLatest: true,
423
- notes: 'Speech-to-text model powered by GPT-4o mini',
1194
+ pricing: { input: 2.0, output: 0.0 },
1195
+ capabilities: ['image-generation', 'text-to-image', 'cost-efficient'],
1196
+ category: 'image',
1197
+ isLatest: false,
1198
+ notes: 'A cost-efficient version of GPT Image 1. Text tokens: $2.00 per 1M tokens (standard), $0.20 cached input. Image tokens: $2.50/$8.00 per 1M tokens (standard), $0.25 cached input. Per image: Low $0.005 (1024x1024), $0.006 (1024x1536/1536x1024); Medium $0.011 (1024x1024), $0.015 (1024x1536/1536x1024); High $0.036 (1024x1024), $0.052 (1024x1536/1536x1024)',
424
1199
  },
425
1200
  {
426
- id: 'whisper-1',
427
- name: 'Whisper',
1201
+ id: 'dall-e-3',
1202
+ name: 'DALL·E 3',
428
1203
  provider: 'OpenAI',
429
1204
  available: true,
430
1205
  maxTokens: 0,
431
1206
  contextLength: 0,
432
- pricing: { input: 0.006, output: 0.006 },
433
- capabilities: [
434
- 'audio',
435
- 'transcription',
436
- 'speech-to-text',
437
- 'general-purpose',
438
- ],
439
- category: 'audio',
440
- isLatest: true,
441
- notes: 'General-purpose speech recognition model (priced per minute)',
1207
+ pricing: { input: 0.04, output: 0.08 },
1208
+ capabilities: ['image-generation', 'text-to-image'],
1209
+ category: 'image',
1210
+ isLatest: false,
1211
+ notes: 'Previous generation image generation model. Standard: $0.04 (1024x1024), $0.08 (1024x1536/1536x1024). HD: $0.08 (1024x1024), $0.12 (1024x1536/1536x1024)',
442
1212
  },
443
- // === Text-to-Speech Models ===
444
1213
  {
445
- id: 'gpt-4o-mini-tts',
446
- name: 'GPT-4o mini TTS',
1214
+ id: 'dall-e-2',
1215
+ name: 'DALL·E 2',
447
1216
  provider: 'OpenAI',
448
1217
  available: true,
449
1218
  maxTokens: 0,
450
1219
  contextLength: 0,
451
- pricing: { input: 0.15, output: 0.15 },
452
- capabilities: ['audio', 'text-to-speech', 'tts'],
453
- category: 'audio',
454
- isLatest: true,
455
- notes: 'Text-to-speech model powered by GPT-4o mini',
1220
+ pricing: { input: 0.016, output: 0.02 },
1221
+ capabilities: ['image-generation', 'text-to-image'],
1222
+ category: 'image',
1223
+ isLatest: false,
1224
+ notes: 'Our first image generation model. Standard: $0.016 (1024x1024), $0.018 (1024x1536), $0.02 (1536x1024)',
456
1225
  },
457
1226
  {
458
- id: 'tts-1',
459
- name: 'TTS-1',
1227
+ id: 'gpt-image-1-mini',
1228
+ name: 'gpt-image-1-mini',
460
1229
  provider: 'OpenAI',
461
1230
  available: true,
462
1231
  maxTokens: 0,
463
1232
  contextLength: 0,
464
- pricing: { input: 0.015, output: 0.015 },
465
- capabilities: ['audio', 'text-to-speech', 'tts', 'fast'],
466
- category: 'audio',
1233
+ pricing: { input: 0.02, output: 0.02 },
1234
+ capabilities: ['image-generation', 'text-to-image', 'cost-efficient'],
1235
+ category: 'image',
467
1236
  isLatest: true,
468
- notes: 'Text-to-speech model optimized for speed (priced per 1K characters)',
1237
+ notes: 'A cost-efficient version of GPT Image 1',
469
1238
  },
1239
+ // === Anthropic Models ===
1240
+ // === Claude 4.5 Series (Latest) ===
470
1241
  {
471
- id: 'tts-1-hd',
472
- name: 'TTS-1 HD',
473
- provider: 'OpenAI',
1242
+ id: 'claude-sonnet-4-5-20250929',
1243
+ name: 'Claude Sonnet 4.5',
1244
+ provider: 'Anthropic',
474
1245
  available: true,
475
- maxTokens: 0,
476
- contextLength: 0,
477
- pricing: { input: 0.03, output: 0.03 },
478
- capabilities: ['audio', 'text-to-speech', 'tts', 'high-quality'],
479
- category: 'audio',
1246
+ maxTokens: 64000,
1247
+ contextLength: 200000,
1248
+ pricing: { input: 3.0, output: 15.0 },
1249
+ capabilities: [
1250
+ 'text',
1251
+ 'vision',
1252
+ 'multimodal',
1253
+ 'reasoning',
1254
+ 'coding',
1255
+ 'agents',
1256
+ 'extended-thinking',
1257
+ 'multilingual',
1258
+ ],
1259
+ category: 'multimodal',
480
1260
  isLatest: true,
481
- notes: 'Text-to-speech model optimized for quality (priced per 1K characters)',
1261
+ notes: 'Latest Claude Sonnet model with enhanced capabilities and 1M context window support (beta). Reliable knowledge cutoff: Jan 2025. Training data cutoff: Jul 2025. Max output: 64K tokens',
482
1262
  },
483
- // === Open-Weight Models ===
484
1263
  {
485
- id: 'gpt-oss-120b',
486
- name: 'gpt-oss-120b',
487
- provider: 'OpenAI',
1264
+ id: 'claude-sonnet-4-5',
1265
+ name: 'Claude Sonnet 4.5 (Alias)',
1266
+ provider: 'Anthropic',
488
1267
  available: true,
489
- maxTokens: 131072,
490
- contextLength: 131072,
491
- pricing: { input: 0.0, output: 0.0 },
492
- capabilities: ['text', 'open-source', 'open-weight'],
493
- category: 'text',
1268
+ maxTokens: 64000,
1269
+ contextLength: 200000,
1270
+ pricing: { input: 3.0, output: 15.0 },
1271
+ capabilities: [
1272
+ 'text',
1273
+ 'vision',
1274
+ 'multimodal',
1275
+ 'reasoning',
1276
+ 'coding',
1277
+ 'agents',
1278
+ 'extended-thinking',
1279
+ 'multilingual',
1280
+ ],
1281
+ category: 'multimodal',
494
1282
  isLatest: true,
495
- notes: 'Most powerful open-weight model, fits into an H100 GPU. Licensed under Apache 2.0',
1283
+ notes: 'Alias for claude-sonnet-4-5-20250929 - automatically points to latest snapshot',
496
1284
  },
497
1285
  {
498
- id: 'gpt-oss-20b',
499
- name: 'gpt-oss-20b',
500
- provider: 'OpenAI',
1286
+ id: 'claude-haiku-4-5-20251001',
1287
+ name: 'Claude Haiku 4.5',
1288
+ provider: 'Anthropic',
501
1289
  available: true,
502
- maxTokens: 131072,
503
- contextLength: 131072,
504
- pricing: { input: 0.0, output: 0.0 },
505
- capabilities: ['text', 'open-source', 'open-weight', 'low-latency'],
506
- category: 'text',
1290
+ maxTokens: 64000,
1291
+ contextLength: 200000,
1292
+ pricing: { input: 1.0, output: 5.0 },
1293
+ capabilities: [
1294
+ 'text',
1295
+ 'vision',
1296
+ 'multimodal',
1297
+ 'fast',
1298
+ 'extended-thinking',
1299
+ 'multilingual',
1300
+ ],
1301
+ category: 'multimodal',
507
1302
  isLatest: true,
508
- notes: 'Medium-sized open-weight model for low latency. Licensed under Apache 2.0',
1303
+ notes: 'Latest Claude Haiku model with improved performance and capabilities. Reliable knowledge cutoff: Feb 2025. Training data cutoff: Jul 2025. Max output: 64K tokens',
509
1304
  },
510
1305
  {
511
- id: 'gpt-4.1',
512
- name: 'GPT-4.1',
513
- provider: 'OpenAI',
1306
+ id: 'claude-haiku-4-5',
1307
+ name: 'Claude Haiku 4.5 (Alias)',
1308
+ provider: 'Anthropic',
514
1309
  available: true,
515
- maxTokens: 128000,
516
- contextLength: 128000,
517
- pricing: { input: 2.0, output: 8.0 },
518
- capabilities: ['text', 'analysis', 'enhanced'],
519
- category: 'text',
1310
+ maxTokens: 64000,
1311
+ contextLength: 200000,
1312
+ pricing: { input: 1.0, output: 5.0 },
1313
+ capabilities: [
1314
+ 'text',
1315
+ 'vision',
1316
+ 'multimodal',
1317
+ 'fast',
1318
+ 'extended-thinking',
1319
+ 'multilingual',
1320
+ ],
1321
+ category: 'multimodal',
520
1322
  isLatest: true,
521
- notes: 'Smartest non-reasoning model',
1323
+ notes: 'Alias for claude-haiku-4-5-20251001 - automatically points to latest snapshot',
522
1324
  },
523
1325
  {
524
- id: 'gpt-4.1-mini',
525
- name: 'GPT-4.1 mini',
526
- provider: 'OpenAI',
1326
+ id: 'claude-opus-4-5-20251101',
1327
+ name: 'Claude Opus 4.5',
1328
+ provider: 'Anthropic',
527
1329
  available: true,
528
- maxTokens: 128000,
529
- contextLength: 128000,
530
- pricing: { input: 0.4, output: 1.6 },
531
- capabilities: ['text', 'analysis', 'efficient'],
532
- category: 'text',
1330
+ maxTokens: 64000,
1331
+ contextLength: 200000,
1332
+ pricing: { input: 5.0, output: 25.0 },
1333
+ capabilities: [
1334
+ 'text',
1335
+ 'vision',
1336
+ 'multimodal',
1337
+ 'reasoning',
1338
+ 'premium',
1339
+ 'extended-thinking',
1340
+ 'multilingual',
1341
+ ],
1342
+ category: 'multimodal',
533
1343
  isLatest: true,
534
- notes: 'Smaller, faster version of GPT-4.1',
1344
+ notes: 'Latest Claude Opus model with enhanced capabilities. Reliable knowledge cutoff: May 2025. Training data cutoff: Aug 2025. Max output: 64K tokens',
535
1345
  },
536
1346
  {
537
- id: 'gpt-4.1-nano',
538
- name: 'GPT-4.1 nano',
539
- provider: 'OpenAI',
1347
+ id: 'claude-opus-4-5',
1348
+ name: 'Claude Opus 4.5 (Alias)',
1349
+ provider: 'Anthropic',
540
1350
  available: true,
541
- maxTokens: 128000,
542
- contextLength: 128000,
543
- pricing: { input: 0.1, output: 0.4 },
544
- capabilities: ['text', 'fast', 'cost-effective'],
545
- category: 'text',
1351
+ maxTokens: 64000,
1352
+ contextLength: 200000,
1353
+ pricing: { input: 5.0, output: 25.0 },
1354
+ capabilities: [
1355
+ 'text',
1356
+ 'vision',
1357
+ 'multimodal',
1358
+ 'reasoning',
1359
+ 'premium',
1360
+ 'extended-thinking',
1361
+ 'multilingual',
1362
+ ],
1363
+ category: 'multimodal',
546
1364
  isLatest: true,
547
- notes: 'Fastest, most cost-efficient version of GPT-4.1',
1365
+ notes: 'Alias for claude-opus-4-5-20251101 - automatically points to latest snapshot',
548
1366
  },
549
- // === Anthropic Models ===
1367
+ // === Claude 4 Series (Legacy) ===
550
1368
  {
551
1369
  id: 'claude-opus-4-1-20250805',
552
1370
  name: 'Claude Opus 4.1',
@@ -564,8 +1382,8 @@ exports.AVAILABLE_MODELS = [
564
1382
  'multilingual',
565
1383
  ],
566
1384
  category: 'multimodal',
567
- isLatest: true,
568
- notes: 'Most capable and intelligent Claude model yet - superior reasoning and advanced coding (Mar 2025 cutoff)',
1385
+ isLatest: false,
1386
+ notes: 'Legacy model - migrate to Claude Opus 4.5. Reliable knowledge cutoff: Jan 2025. Training data cutoff: Mar 2025. Max output: 32K tokens',
569
1387
  },
570
1388
  {
571
1389
  id: 'claude-opus-4-20250514',
@@ -584,8 +1402,8 @@ exports.AVAILABLE_MODELS = [
584
1402
  'multilingual',
585
1403
  ],
586
1404
  category: 'multimodal',
587
- isLatest: true,
588
- notes: 'Previous flagship model with very high intelligence and capability (Mar 2025 cutoff)',
1405
+ isLatest: false,
1406
+ notes: 'Legacy model - migrate to Claude Opus 4.5. Reliable knowledge cutoff: Jan 2025. Training data cutoff: Mar 2025. Max output: 32K tokens',
589
1407
  },
590
1408
  {
591
1409
  id: 'claude-sonnet-4-20250514',
@@ -600,13 +1418,15 @@ exports.AVAILABLE_MODELS = [
600
1418
  'vision',
601
1419
  'multimodal',
602
1420
  'reasoning',
1421
+ 'coding',
603
1422
  'extended-thinking',
604
1423
  'multilingual',
605
1424
  ],
606
1425
  category: 'multimodal',
607
1426
  isLatest: false,
608
- notes: 'High-performance model with exceptional reasoning (Mar 2025 cutoff, 1M context beta available). Use Claude Sonnet 4.5 for latest version',
1427
+ notes: 'Legacy model - migrate to Claude Sonnet 4.5. Reliable knowledge cutoff: Jan 2025. Training data cutoff: Mar 2025. Max output: 64K tokens. 1M context beta available',
609
1428
  },
1429
+ // === Claude 3.7 Series (Deprecated) ===
610
1430
  {
611
1431
  id: 'claude-3-7-sonnet-20250219',
612
1432
  name: 'Claude Sonnet 3.7',
@@ -620,13 +1440,15 @@ exports.AVAILABLE_MODELS = [
620
1440
  'vision',
621
1441
  'multimodal',
622
1442
  'reasoning',
1443
+ 'coding',
623
1444
  'extended-thinking',
624
1445
  'multilingual',
625
1446
  ],
626
1447
  category: 'multimodal',
627
1448
  isLatest: false,
628
- notes: 'High-performance model with early extended thinking (Oct 2024 cutoff, 64k output). Deprecated - use Claude Sonnet 4.5 instead',
1449
+ notes: 'DEPRECATED - migrate to Claude Sonnet 4.5. Reliable knowledge cutoff: Oct 2024. Training data cutoff: Nov 2024. Max output: 64K tokens',
629
1450
  },
1451
+ // === Claude 3.5 Series ===
630
1452
  {
631
1453
  id: 'claude-3-5-sonnet-20241022',
632
1454
  name: 'Claude Sonnet 3.5 v2',
@@ -641,52 +1463,66 @@ exports.AVAILABLE_MODELS = [
641
1463
  notes: 'Upgraded Claude 3.5 Sonnet (Apr 2024 cutoff, 8k output)',
642
1464
  },
643
1465
  {
644
- id: 'claude-sonnet-4-5',
645
- name: 'Claude Sonnet 4.5',
1466
+ id: 'claude-3-5-haiku-20241022',
1467
+ name: 'Claude Haiku 3.5',
646
1468
  provider: 'Anthropic',
647
1469
  available: true,
648
- maxTokens: 200000,
1470
+ maxTokens: 8192,
649
1471
  contextLength: 200000,
650
- pricing: { input: 3.0, output: 15.0 },
1472
+ pricing: { input: 0.8, output: 4.0 },
1473
+ capabilities: ['text', 'vision', 'multimodal', 'fast', 'multilingual'],
1474
+ category: 'multimodal',
1475
+ isLatest: false,
1476
+ notes: 'Legacy model - migrate to Claude Haiku 4.5. Training data cutoff: July 2024. Max output: 8K tokens',
1477
+ },
1478
+ // === Google AI Models ===
1479
+ // === Gemini 3 Models (Latest) ===
1480
+ {
1481
+ id: 'gemini-3-pro-preview',
1482
+ name: 'Gemini 3 Pro Preview',
1483
+ provider: 'Google AI',
1484
+ available: true,
1485
+ maxTokens: 2000000,
1486
+ contextLength: 2000000,
1487
+ pricing: { input: 2.0, output: 12.0 },
651
1488
  capabilities: [
652
1489
  'text',
653
1490
  'vision',
654
- 'multimodal',
655
1491
  'reasoning',
656
- 'extended-thinking',
657
- 'multilingual',
1492
+ 'coding',
1493
+ 'agents',
1494
+ 'multimodal',
658
1495
  ],
659
1496
  category: 'multimodal',
660
1497
  isLatest: true,
661
- notes: 'Latest Claude Sonnet model with enhanced capabilities and 1M context window support (beta)',
1498
+ notes: 'Latest Gemini 3 Pro preview model. Input (text, image, video, audio): $2.00/1M tokens (<=200K), $4.00/1M tokens (>200K). Text output: $12.00/1M tokens (<=200K), $18.00/1M tokens (>200K). Cached input: $0.20/1M tokens (<=200K), $0.40/1M tokens (>200K). Batch API: $1.00/$6.00 (<=200K), $2.00/$9.00 (>200K). Image output: $120/1M tokens (1K/2K image = 1120 tokens = $0.134/image, 4K image = 2000 tokens = $0.24/image)',
662
1499
  },
663
1500
  {
664
- id: 'claude-haiku-4-5',
665
- name: 'Claude Haiku 4.5',
666
- provider: 'Anthropic',
1501
+ id: 'gemini-3-pro-image-preview',
1502
+ name: 'Gemini 3 Pro Image Preview',
1503
+ provider: 'Google AI',
667
1504
  available: true,
668
- maxTokens: 200000,
669
- contextLength: 200000,
670
- pricing: { input: 1.0, output: 5.0 },
671
- capabilities: ['text', 'vision', 'multimodal', 'multilingual'],
1505
+ maxTokens: 2000000,
1506
+ contextLength: 2000000,
1507
+ pricing: { input: 2.0, output: 12.0 },
1508
+ capabilities: ['text', 'image', 'vision', 'multimodal'],
672
1509
  category: 'multimodal',
673
1510
  isLatest: true,
674
- notes: 'Latest Claude Haiku model with improved performance and capabilities',
1511
+ notes: 'Latest Gemini 3 Pro Image preview model with image generation capabilities. Image output: $120/1M tokens (1K/2K image = 1120 tokens = $0.134/image, 4K image = 2000 tokens = $0.24/image)',
675
1512
  },
676
1513
  {
677
- id: 'claude-3-5-haiku-20241022',
678
- name: 'Claude Haiku 3.5',
679
- provider: 'Anthropic',
1514
+ id: 'gemini-3-flash-preview',
1515
+ name: 'Gemini 3 Flash Preview',
1516
+ provider: 'Google AI',
680
1517
  available: true,
681
- maxTokens: 8192,
682
- contextLength: 200000,
683
- pricing: { input: 0.8, output: 4.0 },
684
- capabilities: ['text', 'vision', 'multimodal', 'multilingual'],
1518
+ maxTokens: 2000000,
1519
+ contextLength: 2000000,
1520
+ pricing: { input: 0.5, output: 3.0 },
1521
+ capabilities: ['text', 'vision', 'audio', 'fast', 'multimodal'],
685
1522
  category: 'multimodal',
686
- isLatest: false,
687
- notes: 'Fastest Claude model (July 2024 cutoff, 8k output)',
1523
+ isLatest: true,
1524
+ notes: 'Latest Gemini 3 Flash preview model. Input (text, image, video): $0.50/1M tokens. Input (audio): $1.00/1M tokens. Text output: $3.00/1M tokens. Cached input: $0.05/1M tokens (text/image/video), $0.10/1M tokens (audio). Batch API: $0.25/1M tokens (text/image/video), $0.50/1M tokens (audio) input, $1.50/1M tokens output',
688
1525
  },
689
- // === Google AI Models ===
690
1526
  // === Gemini 2.5 Models (Latest) ===
691
1527
  {
692
1528
  id: 'gemini-2.5-pro',
@@ -696,17 +1532,30 @@ exports.AVAILABLE_MODELS = [
696
1532
  maxTokens: 2000000,
697
1533
  contextLength: 2000000,
698
1534
  pricing: { input: 1.25, output: 10.0 },
1535
+ capabilities: ['text', 'vision', 'reasoning', 'coding', 'multimodal'],
1536
+ category: 'multimodal',
1537
+ isLatest: true,
1538
+ notes: 'Our state-of-the-art thinking model, capable of reasoning over complex problems in code, math, and STEM, as well as analyzing large datasets, codebases, and documents using long context. Best for multimodal understanding, coding (web development), and complex prompts. Input (text, image, video, audio): $1.25/1M tokens (<=200K), $2.50/1M tokens (>200K). Text output: $10.00/1M tokens (<=200K), $15.00/1M tokens (>200K). Cached: $0.125/1M tokens (<=200K), $0.250/1M tokens (>200K). Batch API: $0.625/$5.00 (<=200K), $1.25/$7.50 (>200K)',
1539
+ },
1540
+ {
1541
+ id: 'gemini-2.5-pro-computer-use-preview',
1542
+ name: 'Gemini 2.5 Pro Computer Use-Preview',
1543
+ provider: 'Google AI',
1544
+ available: true,
1545
+ maxTokens: 2000000,
1546
+ contextLength: 2000000,
1547
+ pricing: { input: 1.25, output: 10.0 },
699
1548
  capabilities: [
700
1549
  'text',
701
- 'multimodal',
1550
+ 'vision',
702
1551
  'reasoning',
703
1552
  'coding',
704
- 'complex-problems',
705
- 'thinking',
1553
+ 'computer-use',
1554
+ 'multimodal',
706
1555
  ],
707
1556
  category: 'multimodal',
708
1557
  isLatest: true,
709
- notes: 'Our state-of-the-art thinking model, capable of reasoning over complex problems in code, math, and STEM, as well as analyzing large datasets, codebases, and documents using long context',
1558
+ notes: 'Gemini 2.5 Pro with Computer Use capabilities. Input (text, image, video, audio): $1.25/1M tokens (<=200K), $2.50/1M tokens (>200K). Text output: $10.00/1M tokens (<=200K), $15.00/1M tokens (>200K). Computer Use billing uses the Gemini 2.5 Pro SKU',
710
1559
  },
711
1560
  {
712
1561
  id: 'gemini-2.5-flash',
@@ -718,17 +1567,28 @@ exports.AVAILABLE_MODELS = [
718
1567
  pricing: { input: 0.3, output: 2.5 },
719
1568
  capabilities: [
720
1569
  'text',
721
- 'image',
722
- 'video',
1570
+ 'vision',
1571
+ 'audio',
1572
+ 'fast',
723
1573
  'multimodal',
724
- 'reasoning',
725
- 'thinking',
726
- 'live-api',
727
- 'agents',
1574
+ 'image-generation',
728
1575
  ],
729
1576
  category: 'multimodal',
730
1577
  isLatest: true,
731
- notes: 'Our best model in terms of price-performance, offering well-rounded capabilities. Best for large scale processing, low-latency, high volume tasks that require thinking, and agentic use cases',
1578
+ notes: "Our best model in terms of price-performance, offering well-rounded capabilities. Best for large scale processing, low-latency, high volume tasks that require thinking, and agentic use cases. Support for Live API included for some endpoints. See the model's thinking process as part of the response. Input (text, image, video): $0.30/1M tokens. Audio input: $1.00/1M tokens. Text output: $2.50/1M tokens. Image output: $30/1M tokens (1024x1024 image = 1290 tokens). Cached: $0.030/1M tokens. Batch API: $0.15/$1.25 (text/image/video), $0.50/$1.25 (audio)",
1579
+ },
1580
+ {
1581
+ id: 'gemini-2.5-flash-preview-09-2025',
1582
+ name: 'Gemini 2.5 Flash Preview',
1583
+ provider: 'Google AI',
1584
+ available: true,
1585
+ maxTokens: 1000000,
1586
+ contextLength: 1000000,
1587
+ pricing: { input: 0.3, output: 2.5 },
1588
+ capabilities: ['text', 'vision', 'audio', 'fast'],
1589
+ category: 'multimodal',
1590
+ isLatest: true,
1591
+ notes: 'Gemini 2.5 Flash preview model. Cached: $0.03',
732
1592
  },
733
1593
  {
734
1594
  id: 'gemini-2.5-flash-lite-preview',
@@ -749,7 +1609,7 @@ exports.AVAILABLE_MODELS = [
749
1609
  ],
750
1610
  category: 'multimodal',
751
1611
  isLatest: true,
752
- notes: 'Our fastest flash model optimized for cost-efficiency and high throughput. Features 1M token context window and multimodal input',
1612
+ notes: 'Our fastest flash model optimized for cost-efficiency and high throughput. Features 1M token context window and multimodal input. Outperforms 2.0 Flash on most evaluation benchmarks. Audio input: $0.50',
753
1613
  },
754
1614
  {
755
1615
  id: 'gemini-2.5-flash-lite',
@@ -759,18 +1619,10 @@ exports.AVAILABLE_MODELS = [
759
1619
  maxTokens: 1000000,
760
1620
  contextLength: 1000000,
761
1621
  pricing: { input: 0.1, output: 0.4 },
762
- capabilities: [
763
- 'text',
764
- 'image',
765
- 'video',
766
- 'multimodal',
767
- 'reasoning',
768
- 'thinking',
769
- 'high-throughput',
770
- ],
1622
+ capabilities: ['text', 'vision', 'fast', 'multimodal'],
771
1623
  category: 'multimodal',
772
1624
  isLatest: true,
773
- notes: 'Our fastest flash model optimized for cost-efficiency and high throughput (stable version)',
1625
+ notes: 'Our fastest flash model optimized for cost-efficiency and high throughput (stable version). Features 1M token context window and multimodal input. Input (text, image, video): $0.10/1M tokens. Audio input: $0.30/1M tokens. Text output: $0.40/1M tokens. Cached: $0.010/1M tokens (text/image/video), $0.030/1M tokens (audio). Batch API: $0.05/$0.20 (text/image/video), $0.15/$0.20 (audio)',
774
1626
  },
775
1627
  {
776
1628
  id: 'gemini-2.5-flash-audio',
@@ -792,11 +1644,11 @@ exports.AVAILABLE_MODELS = [
792
1644
  available: true,
793
1645
  maxTokens: 1000000,
794
1646
  contextLength: 1000000,
795
- pricing: { input: 0.5, output: 0.4 },
1647
+ pricing: { input: 0.3, output: 0.4 },
796
1648
  capabilities: ['audio', 'multimodal', 'audio-input', 'high-throughput'],
797
1649
  category: 'audio',
798
1650
  isLatest: true,
799
- notes: 'Gemini 2.5 Flash-Lite with audio input capabilities',
1651
+ notes: 'Gemini 2.5 Flash-Lite with audio input capabilities. Audio input: $0.30/1M tokens. Text output: $0.40/1M tokens',
800
1652
  },
801
1653
  {
802
1654
  id: 'gemini-2.5-flash-native-audio',
@@ -809,7 +1661,7 @@ exports.AVAILABLE_MODELS = [
809
1661
  capabilities: ['audio', 'multimodal', 'native-audio'],
810
1662
  category: 'audio',
811
1663
  isLatest: true,
812
- notes: 'Native audio model optimized for higher quality audio outputs',
1664
+ notes: 'Native audio model optimized for higher quality audio outputs. Audio/video input: $3.00, Audio output: $12.00',
813
1665
  },
814
1666
  {
815
1667
  id: 'gemini-2.5-flash-native-audio-output',
@@ -858,18 +1710,24 @@ exports.AVAILABLE_MODELS = [
858
1710
  available: true,
859
1711
  maxTokens: 1000000,
860
1712
  contextLength: 1000000,
861
- pricing: { input: 0.1, output: 0.4 },
862
- capabilities: [
863
- 'text',
864
- 'image',
865
- 'video',
866
- 'multimodal',
867
- 'agents',
868
- 'next-generation',
869
- ],
1713
+ pricing: { input: 0.15, output: 0.6 },
1714
+ capabilities: ['text', 'vision', 'audio', 'multimodal'],
1715
+ category: 'multimodal',
1716
+ isLatest: false,
1717
+ notes: 'Our second generation workhorse model, with a 1 million token context window. Most balanced multimodal model built for the era of Agents. Input (text, image, video): $0.15/1M tokens. Audio input: $1.00/1M tokens. Text output: $0.60/1M tokens. Batch API: $0.075/$0.30. Tuning: $3.00/1M training tokens',
1718
+ },
1719
+ {
1720
+ id: 'gemini-2.0-flash-image-generation',
1721
+ name: 'Gemini 2.0 Flash Image Generation',
1722
+ provider: 'Google AI',
1723
+ available: true,
1724
+ maxTokens: 1000000,
1725
+ contextLength: 1000000,
1726
+ pricing: { input: 0.15, output: 30.0 },
1727
+ capabilities: ['text', 'vision', 'audio', 'image-generation', 'multimodal'],
870
1728
  category: 'multimodal',
871
1729
  isLatest: false,
872
- notes: 'Our second generation workhorse model, with a 1 million token context window',
1730
+ notes: 'Gemini 2.0 Flash with image generation capabilities. Input (text, image, video): $0.15/1M tokens. Audio input: $1.00/1M tokens. Video input: $3.00/1M tokens. Text output: $0.60/1M tokens. Image output: $30.00/1M tokens',
873
1731
  },
874
1732
  {
875
1733
  id: 'gemini-2.0-flash-lite',
@@ -879,10 +1737,10 @@ exports.AVAILABLE_MODELS = [
879
1737
  maxTokens: 1000000,
880
1738
  contextLength: 1000000,
881
1739
  pricing: { input: 0.075, output: 0.3 },
882
- capabilities: ['text', 'multimodal', 'cost-efficient', 'low-latency'],
1740
+ capabilities: ['text', 'fast', 'multimodal'],
883
1741
  category: 'multimodal',
884
1742
  isLatest: false,
885
- notes: 'Our second generation small workhorse model, with a 1 million token context window',
1743
+ notes: 'Our second generation small workhorse model, with a 1 million token context window. Optimized for cost efficiency and low latency, built for at scale usage. Input (text, image, video, audio): $0.075/1M tokens. Text output: $0.30/1M tokens. Batch API: $0.0375/$0.15. Tuning: $1.00/1M training tokens',
886
1744
  },
887
1745
  {
888
1746
  id: 'gemini-2.0-flash-audio',
@@ -909,7 +1767,7 @@ exports.AVAILABLE_MODELS = [
909
1767
  capabilities: ['text', 'image', 'video', 'multimodal'],
910
1768
  category: 'multimodal',
911
1769
  isLatest: false,
912
- notes: 'Fastest multimodal model for diverse, repetitive tasks',
1770
+ notes: 'Fastest multimodal model for diverse, repetitive tasks. $0.15/$0.60 for prompts > 128k tokens',
913
1771
  },
914
1772
  {
915
1773
  id: 'gemini-1.5-flash-large-context',
@@ -935,7 +1793,7 @@ exports.AVAILABLE_MODELS = [
935
1793
  capabilities: ['text', 'image', 'video', 'multimodal', 'efficient'],
936
1794
  category: 'multimodal',
937
1795
  isLatest: false,
938
- notes: 'Smallest model for lower intelligence use cases',
1796
+ notes: 'Smallest model for lower intelligence use cases. $0.075/$0.30 for prompts > 128k tokens',
939
1797
  },
940
1798
  {
941
1799
  id: 'gemini-1.5-flash-8b-large-context',
@@ -968,7 +1826,7 @@ exports.AVAILABLE_MODELS = [
968
1826
  capabilities: ['text', 'code', 'reasoning', 'multimodal'],
969
1827
  category: 'text',
970
1828
  isLatest: false,
971
- notes: 'Highest intelligence Gemini 1.5 series model with 2M context',
1829
+ notes: 'Highest intelligence Gemini 1.5 series model with 2M context. $2.50/$10.00 for prompts > 128k tokens',
972
1830
  },
973
1831
  {
974
1832
  id: 'gemini-1.5-pro-large-context',
@@ -1001,7 +1859,7 @@ exports.AVAILABLE_MODELS = [
1001
1859
  ],
1002
1860
  category: 'text',
1003
1861
  isLatest: true,
1004
- notes: 'The latest open models, designed for efficient execution on low-resource devices, capable of multimodal input (text, image, video, audio), and trained with data in over 140 spoken languages',
1862
+ notes: 'The latest open models, designed for efficient execution on low-resource devices, capable of multimodal input (text, image, video, audio), and trained with data in over 140 spoken languages. Open model built for efficient performance on everyday devices (free tier only)',
1005
1863
  },
1006
1864
  {
1007
1865
  id: 'gemma-3',
@@ -1020,7 +1878,7 @@ exports.AVAILABLE_MODELS = [
1020
1878
  ],
1021
1879
  category: 'text',
1022
1880
  isLatest: true,
1023
- notes: 'The third generation of our open models, featuring the ability to solve a wide variety of tasks with text and image input, support for over 140 languages, and long 128K context window',
1881
+ notes: 'The third generation of our open models, featuring the ability to solve a wide variety of tasks with text and image input, support for over 140 languages, and long 128K context window (free tier only)',
1024
1882
  },
1025
1883
  {
1026
1884
  id: 'gemma-2',
@@ -1039,7 +1897,7 @@ exports.AVAILABLE_MODELS = [
1039
1897
  ],
1040
1898
  category: 'text',
1041
1899
  isLatest: false,
1042
- notes: 'The second generation of our open models featuring text generation, summarization, and extraction',
1900
+ notes: 'The second generation of our open models featuring text generation, summarization, and extraction (free tier only)',
1043
1901
  },
1044
1902
  {
1045
1903
  id: 'gemma',
@@ -1059,7 +1917,7 @@ exports.AVAILABLE_MODELS = [
1059
1917
  ],
1060
1918
  category: 'text',
1061
1919
  isLatest: false,
1062
- notes: 'A small-sized, lightweight open model supporting text generation, summarization, and extraction',
1920
+ notes: 'A small-sized, lightweight open model supporting text generation, summarization, and extraction (free tier only)',
1063
1921
  },
1064
1922
  // === Specialized Gemma Models ===
1065
1923
  {
@@ -1078,7 +1936,7 @@ exports.AVAILABLE_MODELS = [
1078
1936
  ],
1079
1937
  category: 'safety',
1080
1938
  isLatest: true,
1081
- notes: 'Instruction tuned models for evaluating the safety of text and images against a set of defined safety policies',
1939
+ notes: 'Instruction tuned models for evaluating the safety of text and images against a set of defined safety policies (free tier only)',
1082
1940
  },
1083
1941
  {
1084
1942
  id: 'paligemma',
@@ -1091,7 +1949,7 @@ exports.AVAILABLE_MODELS = [
1091
1949
  capabilities: ['text', 'open-source', 'vision-language', 'siglip', 'gemma'],
1092
1950
  category: 'vision-language',
1093
1951
  isLatest: true,
1094
- notes: 'Our open vision-language model that combines SigLIP and Gemma',
1952
+ notes: 'Our open vision-language model that combines SigLIP and Gemma (free tier only)',
1095
1953
  },
1096
1954
  {
1097
1955
  id: 'codegemma',
@@ -1111,7 +1969,7 @@ exports.AVAILABLE_MODELS = [
1111
1969
  ],
1112
1970
  category: 'coding',
1113
1971
  isLatest: true,
1114
- notes: 'Powerful, lightweight open model that can perform a variety of coding tasks like fill-in-the-middle code completion, code generation, natural language understanding, mathematical reasoning, and instruction following',
1972
+ notes: 'Powerful, lightweight open model that can perform a variety of coding tasks like fill-in-the-middle code completion, code generation, natural language understanding, mathematical reasoning, and instruction following (free tier only)',
1115
1973
  },
1116
1974
  {
1117
1975
  id: 'txgemma',
@@ -1131,7 +1989,7 @@ exports.AVAILABLE_MODELS = [
1131
1989
  ],
1132
1990
  category: 'therapeutic',
1133
1991
  isLatest: true,
1134
- notes: 'Generates predictions, classifications or text based on therapeutic related data and can be used to efficiently build AI models for therapeutic-related tasks with less data and less compute',
1992
+ notes: 'Generates predictions, classifications or text based on therapeutic related data and can be used to efficiently build AI models for therapeutic-related tasks with less data and less compute (free tier only)',
1135
1993
  },
1136
1994
  {
1137
1995
  id: 'medgemma',
@@ -1150,7 +2008,7 @@ exports.AVAILABLE_MODELS = [
1150
2008
  ],
1151
2009
  category: 'medical',
1152
2010
  isLatest: true,
1153
- notes: 'Collection of Gemma 3 variants that are trained for performance on medical text and image comprehension',
2011
+ notes: 'Collection of Gemma 3 variants that are trained for performance on medical text and image comprehension (free tier only)',
1154
2012
  },
1155
2013
  {
1156
2014
  id: 'medsiglip',
@@ -1169,7 +2027,7 @@ exports.AVAILABLE_MODELS = [
1169
2027
  ],
1170
2028
  category: 'medical',
1171
2029
  isLatest: true,
1172
- notes: 'SigLIP variant that is trained to encode medical images and text into a common embedding space',
2030
+ notes: 'SigLIP variant that is trained to encode medical images and text into a common embedding space (free tier only)',
1173
2031
  },
1174
2032
  {
1175
2033
  id: 't5gemma',
@@ -1189,9 +2047,22 @@ exports.AVAILABLE_MODELS = [
1189
2047
  ],
1190
2048
  category: 'research',
1191
2049
  isLatest: true,
1192
- notes: 'A family of lightweight yet powerful encoder-decoder research models from Google',
2050
+ notes: 'A family of lightweight yet powerful encoder-decoder research models from Google (free tier only)',
1193
2051
  },
1194
2052
  // === Embeddings Models ===
2053
+ {
2054
+ id: 'gemini-embedding-001',
2055
+ name: 'Gemini Embedding',
2056
+ provider: 'Google AI',
2057
+ available: true,
2058
+ maxTokens: 2048,
2059
+ contextLength: 2048,
2060
+ pricing: { input: 0.15, output: 0.0 },
2061
+ capabilities: ['embeddings', 'semantic-search'],
2062
+ category: 'embedding',
2063
+ isLatest: true,
2064
+ notes: 'Gemini embedding model for semantic search and similarity tasks. Online requests: $0.00015 per 1,000 input tokens. Batch requests: $0.00012 per 1,000 input tokens. Output: No charge',
2065
+ },
1195
2066
  {
1196
2067
  id: 'text-embedding-004',
1197
2068
  name: 'Text Embedding 004',
@@ -1206,170 +2077,503 @@ exports.AVAILABLE_MODELS = [
1206
2077
  'classification',
1207
2078
  'clustering',
1208
2079
  ],
1209
- category: 'embedding',
2080
+ category: 'embedding',
2081
+ isLatest: true,
2082
+ notes: 'State-of-the-art text embedding model for semantic search, classification, clustering, and similar tasks (free tier only)',
2083
+ },
2084
+ {
2085
+ id: 'multimodal-embeddings',
2086
+ name: 'Multimodal Embeddings',
2087
+ provider: 'Google AI',
2088
+ available: true,
2089
+ maxTokens: 2048,
2090
+ contextLength: 2048,
2091
+ pricing: { input: 0.0, output: 0.0 },
2092
+ capabilities: [
2093
+ 'embedding',
2094
+ 'multimodal',
2095
+ 'image-classification',
2096
+ 'image-search',
2097
+ ],
2098
+ category: 'embedding',
2099
+ isLatest: true,
2100
+ notes: 'Generates vectors based on images, which can be used for downstream tasks like image classification, image search, and more (free tier only)',
2101
+ },
2102
+ // === Imagen Models (Image Generation) ===
2103
+ {
2104
+ id: 'imagen-4-generation',
2105
+ name: 'Imagen 4 for Generation',
2106
+ provider: 'Google AI',
2107
+ available: true,
2108
+ maxTokens: 0,
2109
+ contextLength: 0,
2110
+ pricing: { input: 0.04, output: 0.04 },
2111
+ capabilities: ['image-generation', 'text-to-image', 'higher-quality'],
2112
+ category: 'image',
2113
+ isLatest: true,
2114
+ notes: 'Use text prompts to generate novel images with higher quality than our previous image generation models. Priced at $0.04 per image',
2115
+ },
2116
+ {
2117
+ id: 'imagen-4-fast-generation',
2118
+ name: 'Imagen 4 for Fast Generation',
2119
+ provider: 'Google AI',
2120
+ available: true,
2121
+ maxTokens: 0,
2122
+ contextLength: 0,
2123
+ pricing: { input: 0.02, output: 0.02 },
2124
+ capabilities: [
2125
+ 'image-generation',
2126
+ 'text-to-image',
2127
+ 'higher-quality',
2128
+ 'lower-latency',
2129
+ ],
2130
+ category: 'image',
2131
+ isLatest: true,
2132
+ notes: 'Use text prompts to generate novel images with higher quality and lower latency than our previous image generation models. Priced at $0.02 per image',
2133
+ },
2134
+ {
2135
+ id: 'imagen-4-ultra-generation',
2136
+ name: 'Imagen 4 for Ultra Generation',
2137
+ provider: 'Google AI',
2138
+ available: true,
2139
+ maxTokens: 0,
2140
+ contextLength: 0,
2141
+ pricing: { input: 0.06, output: 0.06 },
2142
+ capabilities: [
2143
+ 'image-generation',
2144
+ 'text-to-image',
2145
+ 'higher-quality',
2146
+ 'better-prompt-adherence',
2147
+ ],
2148
+ category: 'image',
2149
+ isLatest: true,
2150
+ notes: 'Use text prompts to generate novel images with higher quality and better prompt adherence than our previous image generation models. Priced at $0.06 per image',
2151
+ },
2152
+ {
2153
+ id: 'imagen-4-upscaling',
2154
+ name: 'Imagen 4 for Upscaling',
2155
+ provider: 'Google AI',
2156
+ available: true,
2157
+ maxTokens: 0,
2158
+ contextLength: 0,
2159
+ pricing: { input: 0.06, output: 0.06 },
2160
+ capabilities: ['image-generation', 'upscaling', 'image-enhancement'],
2161
+ category: 'image',
2162
+ isLatest: true,
2163
+ notes: 'Increase resolution of a generated image to 2K, 3K, and 4K. Priced at $0.06 per image',
2164
+ },
2165
+ {
2166
+ id: 'imagen-3-generation',
2167
+ name: 'Imagen 3 for Generation',
2168
+ provider: 'Google AI',
2169
+ available: true,
2170
+ maxTokens: 0,
2171
+ contextLength: 0,
2172
+ pricing: { input: 0.04, output: 0.0 },
2173
+ capabilities: [
2174
+ 'image-generation',
2175
+ 'text-to-image',
2176
+ 'image-editing',
2177
+ 'customization',
2178
+ ],
2179
+ category: 'image',
2180
+ isLatest: false,
2181
+ notes: 'Use text prompts to generate novel images, edit an image, or customize an image. Priced at $0.04 per image',
2182
+ },
2183
+ {
2184
+ id: 'imagen-3-editing-customization',
2185
+ name: 'Imagen 3 for Editing and Customization',
2186
+ provider: 'Google AI',
2187
+ available: true,
2188
+ maxTokens: 0,
2189
+ contextLength: 0,
2190
+ pricing: { input: 0.03, output: 0.03 },
2191
+ capabilities: [
2192
+ 'image-generation',
2193
+ 'text-to-image',
2194
+ 'image-editing',
2195
+ 'customization',
2196
+ 'mask-editing',
2197
+ ],
2198
+ category: 'image',
2199
+ isLatest: false,
2200
+ notes: 'Use text prompts to edit existing input images, or parts of an image with a mask or generate new images based upon the context provided by input reference images',
2201
+ },
2202
+ {
2203
+ id: 'imagen-3-fast-generation',
2204
+ name: 'Imagen 3 for Fast Generation',
2205
+ provider: 'Google AI',
2206
+ available: true,
2207
+ maxTokens: 0,
2208
+ contextLength: 0,
2209
+ pricing: { input: 0.02, output: 0.02 },
2210
+ capabilities: ['image-generation', 'text-to-image', 'lower-latency'],
2211
+ category: 'image',
2212
+ isLatest: false,
2213
+ notes: 'Use text prompts to generate novel images with lower latency than our other image generation models. Priced at $0.02 per image',
2214
+ },
2215
+ {
2216
+ id: 'imagen-2-generation',
2217
+ name: 'Imagen 2 for Generation',
2218
+ provider: 'Google AI',
2219
+ available: true,
2220
+ maxTokens: 0,
2221
+ contextLength: 0,
2222
+ pricing: { input: 0.02, output: 0.02 },
2223
+ capabilities: ['image-generation', 'text-to-image'],
2224
+ category: 'image',
2225
+ isLatest: false,
2226
+ notes: 'Use text prompts to generate novel images. Priced at $0.020 per image',
2227
+ },
2228
+ {
2229
+ id: 'imagen-2-editing',
2230
+ name: 'Imagen 2 for Editing',
2231
+ provider: 'Google AI',
2232
+ available: true,
2233
+ maxTokens: 0,
2234
+ contextLength: 0,
2235
+ pricing: { input: 0.02, output: 0.02 },
2236
+ capabilities: ['image-generation', 'image-editing', 'mask-editing'],
2237
+ category: 'image',
2238
+ isLatest: false,
2239
+ notes: 'Edit an image using mask free or mask approach. Priced at $0.020 per image',
2240
+ },
2241
+ {
2242
+ id: 'imagen-1-generation',
2243
+ name: 'Imagen 1 for Generation',
2244
+ provider: 'Google AI',
2245
+ available: true,
2246
+ maxTokens: 0,
2247
+ contextLength: 0,
2248
+ pricing: { input: 0.02, output: 0.02 },
2249
+ capabilities: ['image-generation', 'text-to-image'],
2250
+ category: 'image',
2251
+ isLatest: false,
2252
+ notes: 'Use text prompts to generate novel images. Priced at $0.020 per image',
2253
+ },
2254
+ {
2255
+ id: 'imagen-1-editing',
2256
+ name: 'Imagen 1 for Editing',
2257
+ provider: 'Google AI',
2258
+ available: true,
2259
+ maxTokens: 0,
2260
+ contextLength: 0,
2261
+ pricing: { input: 0.02, output: 0.02 },
2262
+ capabilities: ['image-generation', 'image-editing', 'mask-editing'],
2263
+ category: 'image',
2264
+ isLatest: false,
2265
+ notes: 'Edit an image using mask free or mask approach. Priced at $0.020 per image',
2266
+ },
2267
+ {
2268
+ id: 'imagen-1-upscaling',
2269
+ name: 'Imagen 1 for Upscaling',
2270
+ provider: 'Google AI',
2271
+ available: true,
2272
+ maxTokens: 0,
2273
+ contextLength: 0,
2274
+ pricing: { input: 0.003, output: 0.003 },
2275
+ capabilities: ['image-generation', 'upscaling', 'image-enhancement'],
2276
+ category: 'image',
2277
+ isLatest: false,
2278
+ notes: 'Increase resolution of a generated image to 2k and 4k. Priced at $0.003 per image',
2279
+ },
2280
+ {
2281
+ id: 'imagen-visual-captioning',
2282
+ name: 'Imagen Visual Captioning',
2283
+ provider: 'Google AI',
2284
+ available: true,
2285
+ maxTokens: 0,
2286
+ contextLength: 0,
2287
+ pricing: { input: 0.0015, output: 0.0 },
2288
+ capabilities: ['image-generation', 'captioning', 'image-analysis'],
2289
+ category: 'image',
2290
+ isLatest: true,
2291
+ notes: 'Generate a short or long text caption for an image. Priced at $0.0015 per image',
2292
+ },
2293
+ {
2294
+ id: 'imagen-visual-qa',
2295
+ name: 'Imagen Visual Q&A',
2296
+ provider: 'Google AI',
2297
+ available: true,
2298
+ maxTokens: 0,
2299
+ contextLength: 0,
2300
+ pricing: { input: 0.0015, output: 0.0 },
2301
+ capabilities: ['image-generation', 'vqa', 'image-analysis'],
2302
+ category: 'image',
2303
+ isLatest: true,
2304
+ notes: 'Provide an answer based on a question referencing an image. Priced at $0.0015 per image',
2305
+ },
2306
+ {
2307
+ id: 'imagen-product-recontext',
2308
+ name: 'Imagen Product Recontext',
2309
+ provider: 'Google AI',
2310
+ available: true,
2311
+ maxTokens: 0,
2312
+ contextLength: 0,
2313
+ pricing: { input: 0.12, output: 0.0 },
2314
+ capabilities: ['image-generation', 'product-recontext', 'scene-generation'],
2315
+ category: 'image',
2316
+ isLatest: true,
2317
+ notes: 'Re-imagine products in a new scene. Requires 1-3 images of the same product and a text prompt describing desired scene. Priced at $0.12 per image',
2318
+ },
2319
+ {
2320
+ id: 'imagen-captioning-vqa',
2321
+ name: 'Imagen for Captioning & VQA',
2322
+ provider: 'Google AI',
2323
+ available: true,
2324
+ maxTokens: 0,
2325
+ contextLength: 0,
2326
+ pricing: { input: 0.03, output: 0.03 },
2327
+ capabilities: [
2328
+ 'image-generation',
2329
+ 'text-to-image',
2330
+ 'image-editing',
2331
+ 'mask-editing',
2332
+ 'captioning',
2333
+ 'vqa',
2334
+ ],
2335
+ category: 'image',
2336
+ isLatest: false,
2337
+ notes: 'Use text prompts to generate novel images, edit existing ones, edit parts of an image with a mask and more',
2338
+ },
2339
+ // === Veo Models (Video Generation) ===
2340
+ // === Veo 3.1 Series (Latest) ===
2341
+ {
2342
+ id: 'veo-3.1-video-audio-720p-1080p',
2343
+ name: 'Veo 3.1 Video + Audio (720p/1080p)',
2344
+ provider: 'Google AI',
2345
+ available: true,
2346
+ maxTokens: 0,
2347
+ contextLength: 0,
2348
+ pricing: { input: 0.4, output: 0.0 },
2349
+ capabilities: [
2350
+ 'video-generation',
2351
+ 'text-to-video',
2352
+ 'image-to-video',
2353
+ 'audio-generation',
2354
+ 'higher-quality',
2355
+ ],
2356
+ category: 'video',
2357
+ isLatest: true,
2358
+ notes: 'Generate high-quality videos with synchronized speech/sound effects from a text prompt or reference image. Output resolution: 720p, 1080p. Priced at $0.40 per second',
2359
+ },
2360
+ {
2361
+ id: 'veo-3.1-video-audio-4k',
2362
+ name: 'Veo 3.1 Video + Audio (4K)',
2363
+ provider: 'Google AI',
2364
+ available: true,
2365
+ maxTokens: 0,
2366
+ contextLength: 0,
2367
+ pricing: { input: 0.6, output: 0.0 },
2368
+ capabilities: [
2369
+ 'video-generation',
2370
+ 'text-to-video',
2371
+ 'image-to-video',
2372
+ 'audio-generation',
2373
+ 'higher-quality',
2374
+ '4k',
2375
+ ],
2376
+ category: 'video',
2377
+ isLatest: true,
2378
+ notes: 'Generate high-quality videos with synchronized speech/sound effects from a text prompt or reference image. Output resolution: 4K. Priced at $0.60 per second',
2379
+ },
2380
+ {
2381
+ id: 'veo-3.1-video-720p-1080p',
2382
+ name: 'Veo 3.1 Video (720p/1080p)',
2383
+ provider: 'Google AI',
2384
+ available: true,
2385
+ maxTokens: 0,
2386
+ contextLength: 0,
2387
+ pricing: { input: 0.2, output: 0.0 },
2388
+ capabilities: [
2389
+ 'video-generation',
2390
+ 'text-to-video',
2391
+ 'image-to-video',
2392
+ 'higher-quality',
2393
+ ],
2394
+ category: 'video',
1210
2395
  isLatest: true,
1211
- notes: 'State-of-the-art text embedding model for semantic search, classification, clustering, and similar tasks',
2396
+ notes: 'Generate high-quality videos from a text prompt or reference image. Output resolution: 720p, 1080p. Priced at $0.20 per second',
1212
2397
  },
1213
2398
  {
1214
- id: 'multimodal-embeddings',
1215
- name: 'Multimodal Embeddings',
2399
+ id: 'veo-3.1-video-4k',
2400
+ name: 'Veo 3.1 Video (4K)',
1216
2401
  provider: 'Google AI',
1217
2402
  available: true,
1218
- maxTokens: 2048,
1219
- contextLength: 2048,
1220
- pricing: { input: 0.0, output: 0.0 },
2403
+ maxTokens: 0,
2404
+ contextLength: 0,
2405
+ pricing: { input: 0.4, output: 0.0 },
1221
2406
  capabilities: [
1222
- 'embedding',
1223
- 'multimodal',
1224
- 'image-classification',
1225
- 'image-search',
2407
+ 'video-generation',
2408
+ 'text-to-video',
2409
+ 'image-to-video',
2410
+ 'higher-quality',
2411
+ '4k',
1226
2412
  ],
1227
- category: 'embedding',
2413
+ category: 'video',
1228
2414
  isLatest: true,
1229
- notes: 'Generates vectors based on images, which can be used for downstream tasks like image classification, image search, and more',
2415
+ notes: 'Generate high-quality videos from a text prompt or reference image. Output resolution: 4K. Priced at $0.40 per second',
1230
2416
  },
1231
- // === Imagen Models (Image Generation) ===
1232
2417
  {
1233
- id: 'imagen-4-generation',
1234
- name: 'Imagen 4 for Generation',
2418
+ id: 'veo-3.1-fast-video-audio-720p-1080p',
2419
+ name: 'Veo 3.1 Fast Video + Audio (720p/1080p)',
1235
2420
  provider: 'Google AI',
1236
2421
  available: true,
1237
2422
  maxTokens: 0,
1238
2423
  contextLength: 0,
1239
- pricing: { input: 0.04, output: 0.04 },
1240
- capabilities: ['image-generation', 'text-to-image', 'higher-quality'],
1241
- category: 'image',
2424
+ pricing: { input: 0.15, output: 0.0 },
2425
+ capabilities: [
2426
+ 'video-generation',
2427
+ 'text-to-video',
2428
+ 'image-to-video',
2429
+ 'audio-generation',
2430
+ 'lower-latency',
2431
+ ],
2432
+ category: 'video',
1242
2433
  isLatest: true,
1243
- notes: 'Use text prompts to generate novel images with higher quality than our previous image generation models',
2434
+ notes: 'Generate videos with synchronized speech/sound effects from a text prompt or reference image faster. Output resolution: 720p, 1080p. Priced at $0.15 per second',
1244
2435
  },
1245
2436
  {
1246
- id: 'imagen-4-fast-generation',
1247
- name: 'Imagen 4 for Fast Generation',
2437
+ id: 'veo-3.1-fast-video-audio-4k',
2438
+ name: 'Veo 3.1 Fast Video + Audio (4K)',
1248
2439
  provider: 'Google AI',
1249
2440
  available: true,
1250
2441
  maxTokens: 0,
1251
2442
  contextLength: 0,
1252
- pricing: { input: 0.04, output: 0.04 },
2443
+ pricing: { input: 0.35, output: 0.0 },
1253
2444
  capabilities: [
1254
- 'image-generation',
1255
- 'text-to-image',
1256
- 'higher-quality',
2445
+ 'video-generation',
2446
+ 'text-to-video',
2447
+ 'image-to-video',
2448
+ 'audio-generation',
1257
2449
  'lower-latency',
2450
+ '4k',
1258
2451
  ],
1259
- category: 'image',
2452
+ category: 'video',
1260
2453
  isLatest: true,
1261
- notes: 'Use text prompts to generate novel images with higher quality and lower latency than our previous image generation models',
2454
+ notes: 'Generate videos with synchronized speech/sound effects from a text prompt or reference image faster. Output resolution: 4K. Priced at $0.35 per second',
1262
2455
  },
1263
2456
  {
1264
- id: 'imagen-4-ultra-generation',
1265
- name: 'Imagen 4 for Ultra Generation',
2457
+ id: 'veo-3.1-fast-video-720p-1080p',
2458
+ name: 'Veo 3.1 Fast Video (720p/1080p)',
1266
2459
  provider: 'Google AI',
1267
2460
  available: true,
1268
2461
  maxTokens: 0,
1269
2462
  contextLength: 0,
1270
- pricing: { input: 0.06, output: 0.06 },
2463
+ pricing: { input: 0.1, output: 0.0 },
1271
2464
  capabilities: [
1272
- 'image-generation',
1273
- 'text-to-image',
1274
- 'higher-quality',
1275
- 'better-prompt-adherence',
2465
+ 'video-generation',
2466
+ 'text-to-video',
2467
+ 'image-to-video',
2468
+ 'lower-latency',
1276
2469
  ],
1277
- category: 'image',
2470
+ category: 'video',
1278
2471
  isLatest: true,
1279
- notes: 'Use text prompts to generate novel images with higher quality and better prompt adherence than our previous image generation models',
2472
+ notes: 'Generate videos from a text prompt or reference image faster. Output resolution: 720p, 1080p. Priced at $0.10 per second',
1280
2473
  },
1281
2474
  {
1282
- id: 'imagen-3-generation',
1283
- name: 'Imagen 3 for Generation',
2475
+ id: 'veo-3.1-fast-video-4k',
2476
+ name: 'Veo 3.1 Fast Video (4K)',
1284
2477
  provider: 'Google AI',
1285
2478
  available: true,
1286
2479
  maxTokens: 0,
1287
2480
  contextLength: 0,
1288
- pricing: { input: 0.03, output: 0.03 },
1289
- capabilities: ['image-generation', 'text-to-image'],
1290
- category: 'image',
1291
- isLatest: false,
1292
- notes: 'Use text prompts to generate novel images',
2481
+ pricing: { input: 0.3, output: 0.0 },
2482
+ capabilities: [
2483
+ 'video-generation',
2484
+ 'text-to-video',
2485
+ 'image-to-video',
2486
+ 'lower-latency',
2487
+ '4k',
2488
+ ],
2489
+ category: 'video',
2490
+ isLatest: true,
2491
+ notes: 'Generate videos from a text prompt or reference image faster. Output resolution: 4K. Priced at $0.30 per second',
1293
2492
  },
2493
+ // === Veo 3 Series ===
1294
2494
  {
1295
- id: 'imagen-3-editing-customization',
1296
- name: 'Imagen 3 for Editing and Customization',
2495
+ id: 'veo-3-video-audio',
2496
+ name: 'Veo 3 Video + Audio',
1297
2497
  provider: 'Google AI',
1298
2498
  available: true,
1299
2499
  maxTokens: 0,
1300
2500
  contextLength: 0,
1301
- pricing: { input: 0.03, output: 0.03 },
2501
+ pricing: { input: 0.4, output: 0.0 },
1302
2502
  capabilities: [
1303
- 'image-generation',
1304
- 'text-to-image',
1305
- 'image-editing',
1306
- 'customization',
1307
- 'mask-editing',
2503
+ 'video-generation',
2504
+ 'text-to-video',
2505
+ 'image-to-video',
2506
+ 'audio-generation',
2507
+ 'higher-quality',
1308
2508
  ],
1309
- category: 'image',
1310
- isLatest: false,
1311
- notes: 'Use text prompts to edit existing input images, or parts of an image with a mask or generate new images based upon the context provided by input reference images',
2509
+ category: 'video',
2510
+ isLatest: true,
2511
+ notes: 'Generate high-quality videos with synchronized speech/sound effects from a text prompt or reference image. Output resolution: 720p, 1080p. Priced at $0.40 per second',
1312
2512
  },
1313
2513
  {
1314
- id: 'imagen-3-fast-generation',
1315
- name: 'Imagen 3 for Fast Generation',
2514
+ id: 'veo-3-video',
2515
+ name: 'Veo 3 Video',
1316
2516
  provider: 'Google AI',
1317
2517
  available: true,
1318
2518
  maxTokens: 0,
1319
2519
  contextLength: 0,
1320
- pricing: { input: 0.03, output: 0.03 },
1321
- capabilities: ['image-generation', 'text-to-image', 'lower-latency'],
1322
- category: 'image',
1323
- isLatest: false,
1324
- notes: 'Use text prompts to generate novel images with lower latency than our other image generation models',
2520
+ pricing: { input: 0.2, output: 0.0 },
2521
+ capabilities: [
2522
+ 'video-generation',
2523
+ 'text-to-video',
2524
+ 'image-to-video',
2525
+ 'higher-quality',
2526
+ ],
2527
+ category: 'video',
2528
+ isLatest: true,
2529
+ notes: 'Generate high-quality videos from a text prompt or reference image. Output resolution: 720p, 1080p. Priced at $0.20 per second',
1325
2530
  },
1326
2531
  {
1327
- id: 'imagen-captioning-vqa',
1328
- name: 'Imagen for Captioning & VQA',
2532
+ id: 'veo-3-fast-video-audio',
2533
+ name: 'Veo 3 Fast Video + Audio',
1329
2534
  provider: 'Google AI',
1330
2535
  available: true,
1331
2536
  maxTokens: 0,
1332
2537
  contextLength: 0,
1333
- pricing: { input: 0.03, output: 0.03 },
2538
+ pricing: { input: 0.15, output: 0.0 },
1334
2539
  capabilities: [
1335
- 'image-generation',
1336
- 'text-to-image',
1337
- 'image-editing',
1338
- 'mask-editing',
1339
- 'captioning',
1340
- 'vqa',
2540
+ 'video-generation',
2541
+ 'text-to-video',
2542
+ 'image-to-video',
2543
+ 'audio-generation',
2544
+ 'lower-latency',
1341
2545
  ],
1342
- category: 'image',
1343
- isLatest: false,
1344
- notes: 'Use text prompts to generate novel images, edit existing ones, edit parts of an image with a mask and more',
2546
+ category: 'video',
2547
+ isLatest: true,
2548
+ notes: 'Generate videos with synchronized speech/sound effects from a text prompt or reference image faster. Output resolution: 720p, 1080p. Priced at $0.15 per second',
1345
2549
  },
1346
- // === Veo Models (Video Generation) ===
1347
2550
  {
1348
- id: 'veo-2',
1349
- name: 'Veo 2',
2551
+ id: 'veo-3-fast-video',
2552
+ name: 'Veo 3 Fast Video',
1350
2553
  provider: 'Google AI',
1351
2554
  available: true,
1352
2555
  maxTokens: 0,
1353
2556
  contextLength: 0,
1354
- pricing: { input: 0.35, output: 0.35 },
2557
+ pricing: { input: 0.1, output: 0.0 },
1355
2558
  capabilities: [
1356
2559
  'video-generation',
1357
2560
  'text-to-video',
1358
2561
  'image-to-video',
1359
- 'higher-quality',
2562
+ 'lower-latency',
1360
2563
  ],
1361
2564
  category: 'video',
1362
2565
  isLatest: true,
1363
- notes: 'Use text prompts and images to generate novel videos with higher quality than our previous video generation model (priced per second)',
2566
+ notes: 'Generate videos from a text prompt or reference image faster. Output resolution: 720p, 1080p. Priced at $0.10 per second',
1364
2567
  },
2568
+ // === Veo 2 Series ===
1365
2569
  {
1366
- id: 'veo-3',
1367
- name: 'Veo 3',
2570
+ id: 'veo-2-video',
2571
+ name: 'Veo 2 Video',
1368
2572
  provider: 'Google AI',
1369
2573
  available: true,
1370
2574
  maxTokens: 0,
1371
2575
  contextLength: 0,
1372
- pricing: { input: 0.35, output: 0.35 },
2576
+ pricing: { input: 0.5, output: 0.0 },
1373
2577
  capabilities: [
1374
2578
  'video-generation',
1375
2579
  'text-to-video',
@@ -1377,27 +2581,27 @@ exports.AVAILABLE_MODELS = [
1377
2581
  'higher-quality',
1378
2582
  ],
1379
2583
  category: 'video',
1380
- isLatest: true,
1381
- notes: 'Use text prompts and images to generate novel videos with higher quality than our previous video generation model (priced per second)',
2584
+ isLatest: false,
2585
+ notes: 'Generate videos from a text prompt or reference image. Output resolution: 720p. Priced at $0.50 per second',
1382
2586
  },
1383
2587
  {
1384
- id: 'veo-3-fast',
1385
- name: 'Veo 3 Fast',
2588
+ id: 'veo-2-advanced-controls',
2589
+ name: 'Veo 2 Advanced Controls',
1386
2590
  provider: 'Google AI',
1387
2591
  available: true,
1388
2592
  maxTokens: 0,
1389
2593
  contextLength: 0,
1390
- pricing: { input: 0.35, output: 0.35 },
2594
+ pricing: { input: 0.5, output: 0.0 },
1391
2595
  capabilities: [
1392
2596
  'video-generation',
1393
2597
  'text-to-video',
1394
2598
  'image-to-video',
1395
- 'higher-quality',
1396
- 'lower-latency',
2599
+ 'interpolation',
2600
+ 'camera-controls',
1397
2601
  ],
1398
2602
  category: 'video',
1399
- isLatest: true,
1400
- notes: 'Use text prompts and images to generate novel videos with higher quality and lower latency than our previous video generation model (priced per second)',
2603
+ isLatest: false,
2604
+ notes: 'Generate videos through start and end frame interpolation, extend generated videos, and apply camera controls. Output resolution: 720p. Priced at $0.50 per second',
1401
2605
  },
1402
2606
  // === Preview Models ===
1403
2607
  {
@@ -1407,11 +2611,11 @@ exports.AVAILABLE_MODELS = [
1407
2611
  available: true,
1408
2612
  maxTokens: 0,
1409
2613
  contextLength: 0,
1410
- pricing: { input: 0.0, output: 0.0 },
2614
+ pricing: { input: 0.06, output: 0.0 },
1411
2615
  capabilities: ['image-generation', 'virtual-try-on', 'clothing'],
1412
2616
  category: 'image',
1413
2617
  isLatest: true,
1414
- notes: 'Generate images of people wearing clothing products (preview model, free tier only)',
2618
+ notes: 'Create images of people wearing different clothes. Requires 1 image of a person and 1 image of clothing. Priced at $0.06 per image',
1415
2619
  },
1416
2620
  {
1417
2621
  id: 'veo-3-preview',
@@ -1420,7 +2624,7 @@ exports.AVAILABLE_MODELS = [
1420
2624
  available: true,
1421
2625
  maxTokens: 0,
1422
2626
  contextLength: 0,
1423
- pricing: { input: 0.35, output: 0.35 },
2627
+ pricing: { input: 0.2, output: 0.0 },
1424
2628
  capabilities: [
1425
2629
  'video-generation',
1426
2630
  'text-to-video',
@@ -1430,7 +2634,7 @@ exports.AVAILABLE_MODELS = [
1430
2634
  ],
1431
2635
  category: 'video',
1432
2636
  isLatest: true,
1433
- notes: 'Use text prompts and images to generate novel videos with higher quality than our previous video generation model (preview model, priced per second)',
2637
+ notes: 'Use text prompts and images to generate novel videos with higher quality than our previous video generation model (preview model). Output resolution: 720p, 1080p. Priced at $0.20 per second',
1434
2638
  },
1435
2639
  {
1436
2640
  id: 'veo-3-fast-preview',
@@ -1439,7 +2643,7 @@ exports.AVAILABLE_MODELS = [
1439
2643
  available: true,
1440
2644
  maxTokens: 0,
1441
2645
  contextLength: 0,
1442
- pricing: { input: 0.35, output: 0.35 },
2646
+ pricing: { input: 0.1, output: 0.0 },
1443
2647
  capabilities: [
1444
2648
  'video-generation',
1445
2649
  'text-to-video',
@@ -1450,7 +2654,21 @@ exports.AVAILABLE_MODELS = [
1450
2654
  ],
1451
2655
  category: 'video',
1452
2656
  isLatest: true,
1453
- notes: 'Use text prompts and images to generate novel videos with higher quality and lower latency than our previous video generation model (preview model, priced per second)',
2657
+ notes: 'Use text prompts and images to generate novel videos with higher quality and lower latency than our previous video generation model (preview model). Output resolution: 720p, 1080p. Priced at $0.10 per second',
2658
+ },
2659
+ // === Lyria Models (Music Generation) ===
2660
+ {
2661
+ id: 'lyria-2',
2662
+ name: 'Lyria 2',
2663
+ provider: 'Google AI',
2664
+ available: true,
2665
+ maxTokens: 0,
2666
+ contextLength: 0,
2667
+ pricing: { input: 0.06, output: 0.0 },
2668
+ capabilities: ['music-generation', 'text-to-music', 'instrumental'],
2669
+ category: 'audio',
2670
+ isLatest: true,
2671
+ notes: 'High-quality instrumental music generation ideal for sophisticated composition and detailed creative exploration. Priced at $0.06 per 30 seconds',
1454
2672
  },
1455
2673
  // === Legacy Models for Backward Compatibility ===
1456
2674
  {
@@ -1480,6 +2698,7 @@ exports.AVAILABLE_MODELS = [
1480
2698
  notes: 'Earlier generation Gemini model with vision capabilities (legacy)',
1481
2699
  },
1482
2700
  // === AWS Bedrock Models ===
2701
+ // === AI21 Labs Models ===
1483
2702
  {
1484
2703
  id: 'ai21.jamba-1-5-large-v1:0',
1485
2704
  name: 'Jamba 1.5 Large (Bedrock)',
@@ -1491,7 +2710,7 @@ exports.AVAILABLE_MODELS = [
1491
2710
  capabilities: ['text', 'long-context'],
1492
2711
  category: 'text',
1493
2712
  isLatest: true,
1494
- notes: 'AI21 Labs Jamba 1.5 Large via AWS Bedrock',
2713
+ notes: 'AI21 Labs Jamba 1.5 Large - advanced AI model for text generation and chat',
1495
2714
  },
1496
2715
  {
1497
2716
  id: 'ai21.jamba-1-5-mini-v1:0',
@@ -1504,98 +2723,232 @@ exports.AVAILABLE_MODELS = [
1504
2723
  capabilities: ['text', 'long-context', 'efficient'],
1505
2724
  category: 'text',
1506
2725
  isLatest: true,
1507
- notes: 'AI21 Labs Jamba 1.5 Mini via AWS Bedrock',
2726
+ notes: 'AI21 Labs Jamba 1.5 Mini - advanced AI model for text generation and chat',
2727
+ },
2728
+ {
2729
+ id: 'ai21.jamba-instruct-v1:0',
2730
+ name: 'Jamba-Instruct (Bedrock)',
2731
+ provider: 'AWS Bedrock',
2732
+ available: true,
2733
+ maxTokens: 256000,
2734
+ contextLength: 256000,
2735
+ pricing: { input: 0.5, output: 0.7 },
2736
+ capabilities: ['text', 'instruct', 'long-context'],
2737
+ category: 'text',
2738
+ isLatest: true,
2739
+ notes: 'AI21 Labs Jamba-Instruct - hybrid architecture for long context tasks',
2740
+ },
2741
+ {
2742
+ id: 'ai21.j2-mid-v1',
2743
+ name: 'Jurassic-2 Mid (Bedrock)',
2744
+ provider: 'AWS Bedrock',
2745
+ available: true,
2746
+ maxTokens: 8192,
2747
+ contextLength: 8192,
2748
+ pricing: { input: 12.5, output: 12.5 },
2749
+ capabilities: ['text'],
2750
+ category: 'text',
2751
+ isLatest: true,
2752
+ notes: 'AI21 Labs Jurassic-2 Mid - advanced AI model for text generation and chat',
2753
+ },
2754
+ {
2755
+ id: 'ai21.j2-ultra-v1',
2756
+ name: 'Jurassic-2 Ultra (Bedrock)',
2757
+ provider: 'AWS Bedrock',
2758
+ available: true,
2759
+ maxTokens: 8192,
2760
+ contextLength: 8192,
2761
+ pricing: { input: 18.8, output: 18.8 },
2762
+ capabilities: ['text'],
2763
+ category: 'text',
2764
+ isLatest: true,
2765
+ notes: 'AI21 Labs Jurassic-2 Ultra - advanced AI model for text generation and chat',
2766
+ },
2767
+ // === Amazon Nova 2.0 Series (Latest) ===
2768
+ {
2769
+ id: 'amazon.nova-2-lite-v1:0',
2770
+ name: 'Nova 2 Lite (Bedrock)',
2771
+ provider: 'AWS Bedrock',
2772
+ available: true,
2773
+ maxTokens: 300000,
2774
+ contextLength: 300000,
2775
+ pricing: { input: 0.3, output: 2.5 },
2776
+ capabilities: ['text', 'vision', 'video', 'multimodal', 'cache-read'],
2777
+ category: 'text',
2778
+ isLatest: true,
2779
+ notes: 'Amazon Nova 2 Lite - Text/Image/Video input: $0.3/1M tokens, Output: $2.5/1M tokens. Cache read: $0.075/1M tokens (75% discount)',
2780
+ },
2781
+ {
2782
+ id: 'amazon.nova-2-omni-v1:0',
2783
+ name: 'Nova 2 Omni Preview (Bedrock)',
2784
+ provider: 'AWS Bedrock',
2785
+ available: true,
2786
+ maxTokens: 300000,
2787
+ contextLength: 300000,
2788
+ pricing: { input: 0.3, output: 2.5 },
2789
+ capabilities: [
2790
+ 'text',
2791
+ 'vision',
2792
+ 'video',
2793
+ 'audio',
2794
+ 'multimodal',
2795
+ 'cache-read',
2796
+ ],
2797
+ category: 'multimodal',
2798
+ isLatest: true,
2799
+ notes: 'Amazon Nova 2 Omni (Preview) - Text/Image/Video input: $0.3/1M tokens, Audio input: $1.0/1M tokens, Text output: $2.5/1M tokens, Image output: $40/1M tokens. Cache read: 75% discount',
2800
+ },
2801
+ {
2802
+ id: 'amazon.nova-2-pro-v1:0',
2803
+ name: 'Nova 2 Pro Preview (Bedrock)',
2804
+ provider: 'AWS Bedrock',
2805
+ available: true,
2806
+ maxTokens: 300000,
2807
+ contextLength: 300000,
2808
+ pricing: { input: 1.25, output: 10.0 },
2809
+ capabilities: [
2810
+ 'text',
2811
+ 'vision',
2812
+ 'video',
2813
+ 'audio',
2814
+ 'multimodal',
2815
+ 'cache-read',
2816
+ ],
2817
+ category: 'multimodal',
2818
+ isLatest: true,
2819
+ notes: 'Amazon Nova 2 Pro (Preview) - Text/Image/Video/Audio input: $1.25/1M tokens, Text output: $10/1M tokens. Cache read: 75% discount',
2820
+ },
2821
+ {
2822
+ id: 'amazon.nova-2-sonic-v1:0',
2823
+ name: 'Nova 2 Sonic (Bedrock)',
2824
+ provider: 'AWS Bedrock',
2825
+ available: true,
2826
+ maxTokens: 300000,
2827
+ contextLength: 300000,
2828
+ pricing: { input: 3.0, output: 12.0 },
2829
+ capabilities: ['speech', 'text', 'multimodal', 'native-audio'],
2830
+ category: 'audio',
2831
+ isLatest: true,
2832
+ notes: 'Amazon Nova 2 Sonic - Speech input: $3.0/1M tokens, Speech output: $12.0/1M tokens. Text input: $0.33/1M tokens, Text output: $2.75/1M tokens',
1508
2833
  },
2834
+ // === Amazon Nova 1.0 Series ===
1509
2835
  {
1510
2836
  id: 'amazon.nova-micro-v1:0',
1511
- name: 'Amazon Nova Micro (Bedrock)',
2837
+ name: 'Nova Micro (Bedrock)',
1512
2838
  provider: 'AWS Bedrock',
1513
2839
  available: true,
1514
2840
  maxTokens: 128000,
1515
2841
  contextLength: 128000,
1516
2842
  pricing: { input: 0.035, output: 0.14 },
1517
- capabilities: ['text', 'ultra-fast', 'cost-effective'],
2843
+ capabilities: ['text', 'efficient', 'cache-read'],
1518
2844
  category: 'text',
1519
- isLatest: true,
1520
- notes: 'Amazon Nova Micro via AWS Bedrock',
2845
+ isLatest: false,
2846
+ notes: 'Amazon Nova Micro - Cache read: $0.00875/1M tokens (75% discount), Batch: $0.0175/$0.07 per 1M tokens',
1521
2847
  },
1522
2848
  {
1523
2849
  id: 'amazon.nova-lite-v1:0',
1524
- name: 'Amazon Nova Lite (Bedrock)',
2850
+ name: 'Nova Lite (Bedrock)',
1525
2851
  provider: 'AWS Bedrock',
1526
2852
  available: true,
1527
2853
  maxTokens: 300000,
1528
2854
  contextLength: 300000,
1529
2855
  pricing: { input: 0.06, output: 0.24 },
1530
- capabilities: ['text', 'multimodal', 'fast'],
1531
- category: 'multimodal',
1532
- isLatest: true,
1533
- notes: 'Amazon Nova Lite via AWS Bedrock',
2856
+ capabilities: ['text', 'multimodal', 'cache-read'],
2857
+ category: 'text',
2858
+ isLatest: false,
2859
+ notes: 'Amazon Nova Lite - Cache read: $0.015/1M tokens (75% discount), Batch: $0.03/$0.12 per 1M tokens',
1534
2860
  },
1535
2861
  {
1536
2862
  id: 'amazon.nova-pro-v1:0',
1537
- name: 'Amazon Nova Pro (Bedrock)',
2863
+ name: 'Nova Pro (Bedrock)',
1538
2864
  provider: 'AWS Bedrock',
1539
2865
  available: true,
1540
2866
  maxTokens: 300000,
1541
2867
  contextLength: 300000,
1542
2868
  pricing: { input: 0.8, output: 3.2 },
1543
- capabilities: ['text', 'multimodal', 'reasoning'],
1544
- category: 'multimodal',
1545
- isLatest: true,
1546
- notes: 'Amazon Nova Pro via AWS Bedrock',
2869
+ capabilities: ['text', 'multimodal', 'cache-read'],
2870
+ category: 'text',
2871
+ isLatest: false,
2872
+ notes: 'Amazon Nova Pro - Cache read: $0.2/1M tokens (75% discount), Batch: $0.4/$1.6 per 1M tokens',
1547
2873
  },
1548
2874
  {
1549
2875
  id: 'amazon.nova-premier-v1:0',
1550
- name: 'Amazon Nova Premier (Bedrock)',
2876
+ name: 'Nova Premier (Bedrock)',
1551
2877
  provider: 'AWS Bedrock',
1552
2878
  available: true,
1553
2879
  maxTokens: 300000,
1554
2880
  contextLength: 300000,
1555
2881
  pricing: { input: 2.5, output: 12.5 },
1556
2882
  capabilities: ['text', 'multimodal', 'advanced-reasoning'],
1557
- category: 'multimodal',
1558
- isLatest: true,
1559
- notes: 'Amazon Nova Premier via AWS Bedrock',
2883
+ category: 'text',
2884
+ isLatest: false,
2885
+ notes: 'Amazon Nova Premier - Advanced reasoning capabilities. Batch: $1.25/$6.25 per 1M tokens',
2886
+ },
2887
+ {
2888
+ id: 'amazon.nova-sonic-v1:0',
2889
+ name: 'Nova Sonic (Bedrock)',
2890
+ provider: 'AWS Bedrock',
2891
+ available: true,
2892
+ maxTokens: 300000,
2893
+ contextLength: 300000,
2894
+ pricing: { input: 3.4, output: 13.6 },
2895
+ capabilities: ['speech', 'text', 'multimodal', 'native-audio'],
2896
+ category: 'audio',
2897
+ isLatest: false,
2898
+ notes: 'Amazon Nova Sonic - Speech input: $3.4/1M tokens, Speech output: $13.6/1M tokens. Text input: $0.06/1M tokens, Text output: $0.24/1M tokens',
1560
2899
  },
2900
+ // === Nova Creative Generation Models ===
1561
2901
  {
1562
2902
  id: 'amazon.nova-canvas-v1:0',
1563
- name: 'Amazon Nova Canvas (Bedrock)',
2903
+ name: 'Nova Canvas (Bedrock)',
1564
2904
  provider: 'AWS Bedrock',
1565
2905
  available: true,
1566
- maxTokens: 4096,
1567
- contextLength: 4096,
2906
+ maxTokens: 0,
2907
+ contextLength: 0,
1568
2908
  pricing: { input: 0.04, output: 0.04 },
1569
2909
  capabilities: ['image-generation'],
1570
2910
  category: 'image',
1571
2911
  isLatest: true,
1572
- notes: 'Amazon Nova Canvas via AWS Bedrock - image generation model',
2912
+ notes: 'Amazon Nova Canvas - Standard quality (up to 1024x1024): $0.04/image, Premium quality (up to 1024x1024): $0.06/image, Standard quality (up to 2048x2048): $0.06/image, Premium quality (up to 2048x2048): $0.08/image',
2913
+ },
2914
+ {
2915
+ id: 'amazon.nova-reel-v1:0',
2916
+ name: 'Nova Reel (Bedrock)',
2917
+ provider: 'AWS Bedrock',
2918
+ available: true,
2919
+ maxTokens: 0,
2920
+ contextLength: 0,
2921
+ pricing: { input: 0.08, output: 0.08 },
2922
+ capabilities: ['video-generation'],
2923
+ category: 'video',
2924
+ isLatest: true,
2925
+ notes: 'Amazon Nova Reel - $0.08 per second of video generated (720p, 24 fps)',
1573
2926
  },
1574
2927
  {
1575
- id: 'amazon.nova-reel-v1:0',
1576
- name: 'Amazon Nova Reel (Bedrock)',
2928
+ id: 'amazon.nova-multimodal-embeddings-v1:0',
2929
+ name: 'Nova Multimodal Embeddings (Bedrock)',
1577
2930
  provider: 'AWS Bedrock',
1578
2931
  available: true,
1579
- maxTokens: 4096,
1580
- contextLength: 4096,
1581
- pricing: { input: 0.08, output: 0.08 },
1582
- capabilities: ['video-generation'],
1583
- category: 'video',
2932
+ maxTokens: 0,
2933
+ contextLength: 0,
2934
+ pricing: { input: 0.135, output: 0.0 },
2935
+ capabilities: ['embedding', 'multimodal'],
2936
+ category: 'embedding',
1584
2937
  isLatest: true,
1585
- notes: 'Amazon Nova Reel via AWS Bedrock - video generation model',
2938
+ notes: 'Amazon Nova Multimodal Embeddings (On-demand) - Text: $0.135/1M tokens, Standard image: $0.06/image, Document image: $0.6/image, Video: $0.7/second, Audio: $0.14/second. Batch: Text $0.0675/1M tokens, Standard image $0.03/image, Document image $0.48/image, Video $0.56/second, Audio $0.112/second',
1586
2939
  },
1587
2940
  {
1588
- id: 'amazon.nova-sonic-v1:0',
1589
- name: 'Amazon Nova Sonic (Bedrock)',
2941
+ id: 'amazon-rerank-v1.0',
2942
+ name: 'Amazon Rerank v1.0 (Bedrock)',
1590
2943
  provider: 'AWS Bedrock',
1591
2944
  available: true,
1592
- maxTokens: 300000,
1593
- contextLength: 300000,
1594
- pricing: { input: 3.4, output: 13.6 },
1595
- capabilities: ['speech', 'multimodal', 'native-audio'],
1596
- category: 'audio',
2945
+ maxTokens: 0,
2946
+ contextLength: 0,
2947
+ pricing: { input: 0.001, output: 0.0 },
2948
+ capabilities: ['rerank', 'semantic-search', 'retrieval'],
2949
+ category: 'retrieval',
1597
2950
  isLatest: true,
1598
- notes: 'Amazon Nova Sonic via AWS Bedrock - speech model',
2951
+ notes: 'Amazon Rerank v1.0 - $1.00 per 1,000 queries. A query can contain up to 100 document chunks. If a query contains more than 100 document chunks, it is counted as multiple queries (e.g., 350 documents = 4 queries). Each document can contain up to 512 tokens (inclusive of query and document total tokens). If token length exceeds 512 tokens, it is broken down into multiple documents.',
1599
2952
  },
1600
2953
  {
1601
2954
  id: 'amazon.titan-text-express-v1',
@@ -1637,13 +2990,13 @@ exports.AVAILABLE_MODELS = [
1637
2990
  notes: 'Amazon Titan Text Embeddings V2 via AWS Bedrock',
1638
2991
  },
1639
2992
  {
1640
- id: 'anthropic.claude-opus-4-1-20250805-v1:0',
1641
- name: 'Claude Opus 4.1 (Bedrock)',
2993
+ id: 'anthropic.claude-sonnet-4-5-v1:0',
2994
+ name: 'Claude Sonnet 4.5 (Bedrock)',
1642
2995
  provider: 'AWS Bedrock',
1643
2996
  available: true,
1644
- maxTokens: 32000,
2997
+ maxTokens: 64000,
1645
2998
  contextLength: 200000,
1646
- pricing: { input: 15.0, output: 75.0 },
2999
+ pricing: { input: 3.3, output: 16.5 },
1647
3000
  capabilities: [
1648
3001
  'text',
1649
3002
  'vision',
@@ -1654,16 +3007,29 @@ exports.AVAILABLE_MODELS = [
1654
3007
  ],
1655
3008
  category: 'multimodal',
1656
3009
  isLatest: true,
1657
- notes: 'Claude Opus 4.1 via AWS Bedrock - most capable and intelligent Claude model yet',
3010
+ notes: 'Claude Sonnet 4.5 on AWS Bedrock (Geo/In-region) - Input: $3.3/1M, Output: $16.5/1M. Batch: $1.65/$8.25 per 1M. Cache (5m write): $4.125/1M, Cache (1h write): $6.6/1M, Cache read: $0.33/1M. Global: $3/$15 per 1M',
1658
3011
  },
1659
3012
  {
1660
- id: 'anthropic.claude-opus-4-20250514-v1:0',
1661
- name: 'Claude Opus 4 (Bedrock)',
3013
+ id: 'anthropic.claude-haiku-4-5-v1:0',
3014
+ name: 'Claude Haiku 4.5 (Bedrock)',
1662
3015
  provider: 'AWS Bedrock',
1663
3016
  available: true,
1664
- maxTokens: 32000,
3017
+ maxTokens: 64000,
1665
3018
  contextLength: 200000,
1666
- pricing: { input: 15.0, output: 75.0 },
3019
+ pricing: { input: 1.1, output: 5.5 },
3020
+ capabilities: ['text', 'vision', 'multimodal', 'multilingual'],
3021
+ category: 'multimodal',
3022
+ isLatest: true,
3023
+ notes: 'Claude Haiku 4.5 on AWS Bedrock (Geo/In-region) - Input: $1.1/1M, Output: $5.5/1M. Batch: $0.55/$2.75 per 1M. Cache (5m write): $1.375/1M, Cache (1h write): $2.2/1M, Cache read: $0.11/1M. Global: $1/$5 per 1M',
3024
+ },
3025
+ {
3026
+ id: 'anthropic.claude-opus-4-5-20251101-v1:0',
3027
+ name: 'Claude Opus 4.5 (Bedrock)',
3028
+ provider: 'AWS Bedrock',
3029
+ available: true,
3030
+ maxTokens: 64000,
3031
+ contextLength: 200000,
3032
+ pricing: { input: 5.0, output: 25.0 },
1667
3033
  capabilities: [
1668
3034
  'text',
1669
3035
  'vision',
@@ -1674,16 +3040,16 @@ exports.AVAILABLE_MODELS = [
1674
3040
  ],
1675
3041
  category: 'multimodal',
1676
3042
  isLatest: true,
1677
- notes: 'Claude Opus 4 via AWS Bedrock - previous flagship model',
3043
+ notes: 'Claude Opus 4.5 via AWS Bedrock - premium model combining maximum intelligence with practical performance. Reliable knowledge cutoff: May 2025. Training data cutoff: Aug 2025. Max output: 64K tokens',
1678
3044
  },
1679
3045
  {
1680
- id: 'anthropic.claude-sonnet-4-5-v1:0',
1681
- name: 'Claude Sonnet 4.5 (Bedrock)',
3046
+ id: 'anthropic.claude-opus-4-1-20250805-v1:0',
3047
+ name: 'Claude Opus 4.1 (Bedrock)',
1682
3048
  provider: 'AWS Bedrock',
1683
3049
  available: true,
1684
- maxTokens: 200000,
3050
+ maxTokens: 32000,
1685
3051
  contextLength: 200000,
1686
- pricing: { input: 3.0, output: 15.0 },
3052
+ pricing: { input: 15.0, output: 75.0 },
1687
3053
  capabilities: [
1688
3054
  'text',
1689
3055
  'vision',
@@ -1693,21 +3059,28 @@ exports.AVAILABLE_MODELS = [
1693
3059
  'multilingual',
1694
3060
  ],
1695
3061
  category: 'multimodal',
1696
- isLatest: true,
1697
- notes: 'Claude Sonnet 4.5 on AWS Bedrock - latest Claude Sonnet model with enhanced capabilities and 1M context window support (beta)',
3062
+ isLatest: false,
3063
+ notes: 'Claude Opus 4.1 via AWS Bedrock - legacy model, migrate to Claude Opus 4.5. Reliable knowledge cutoff: Jan 2025. Training data cutoff: Mar 2025. Max output: 32K tokens',
1698
3064
  },
1699
3065
  {
1700
- id: 'anthropic.claude-haiku-4-5-v1:0',
1701
- name: 'Claude Haiku 4.5 (Bedrock)',
3066
+ id: 'anthropic.claude-opus-4-20250514-v1:0',
3067
+ name: 'Claude Opus 4 (Bedrock)',
1702
3068
  provider: 'AWS Bedrock',
1703
3069
  available: true,
1704
- maxTokens: 200000,
3070
+ maxTokens: 32000,
1705
3071
  contextLength: 200000,
1706
- pricing: { input: 1.0, output: 5.0 },
1707
- capabilities: ['text', 'vision', 'multimodal', 'multilingual'],
3072
+ pricing: { input: 15.0, output: 75.0 },
3073
+ capabilities: [
3074
+ 'text',
3075
+ 'vision',
3076
+ 'multimodal',
3077
+ 'reasoning',
3078
+ 'extended-thinking',
3079
+ 'multilingual',
3080
+ ],
1708
3081
  category: 'multimodal',
1709
- isLatest: true,
1710
- notes: 'Claude Haiku 4.5 on AWS Bedrock - latest Claude Haiku model with improved performance',
3082
+ isLatest: false,
3083
+ notes: 'Claude Opus 4 via AWS Bedrock - legacy model, migrate to Claude Opus 4.5. Reliable knowledge cutoff: Jan 2025. Training data cutoff: Mar 2025. Max output: 32K tokens',
1711
3084
  },
1712
3085
  {
1713
3086
  id: 'anthropic.claude-sonnet-4-20250514-v1:0',
@@ -1727,7 +3100,7 @@ exports.AVAILABLE_MODELS = [
1727
3100
  ],
1728
3101
  category: 'multimodal',
1729
3102
  isLatest: false,
1730
- notes: 'Claude Sonnet 4 via AWS Bedrock - high-performance model with exceptional reasoning',
3103
+ notes: 'Claude Sonnet 4 via AWS Bedrock - legacy model, migrate to Claude Sonnet 4.5. Reliable knowledge cutoff: Jan 2025. Training data cutoff: Mar 2025. Max output: 64K tokens. 1M context beta available',
1731
3104
  },
1732
3105
  {
1733
3106
  id: 'anthropic.claude-3-7-sonnet-20250219-v1:0',
@@ -1747,7 +3120,7 @@ exports.AVAILABLE_MODELS = [
1747
3120
  ],
1748
3121
  category: 'multimodal',
1749
3122
  isLatest: false,
1750
- notes: 'Claude Sonnet 3.7 via AWS Bedrock - high-performance model with early extended thinking. Deprecated - use Claude Sonnet 4.5 instead',
3123
+ notes: 'Claude Sonnet 3.7 via AWS Bedrock - DEPRECATED, migrate to Claude Sonnet 4.5. Reliable knowledge cutoff: Oct 2024. Training data cutoff: Nov 2024. Max output: 64K tokens',
1751
3124
  },
1752
3125
  {
1753
3126
  id: 'anthropic.claude-3-5-sonnet-20241022-v1:0',
@@ -1772,38 +3145,38 @@ exports.AVAILABLE_MODELS = [
1772
3145
  pricing: { input: 0.8, output: 4.0 },
1773
3146
  capabilities: ['text', 'vision', 'multimodal', 'multilingual'],
1774
3147
  category: 'multimodal',
1775
- isLatest: true,
1776
- notes: 'Claude Haiku 3.5 via AWS Bedrock - fastest Claude model',
3148
+ isLatest: false,
3149
+ notes: 'Claude Haiku 3.5 via AWS Bedrock - legacy model, migrate to Claude Haiku 4.5. Training data cutoff: July 2024. Max output: 8K tokens',
1777
3150
  },
1778
3151
  {
1779
3152
  id: 'meta.llama3-70b-instruct-v1:0',
1780
- name: 'Llama 3 70B Instruct (Bedrock)',
3153
+ name: 'Llama 3 Instruct 70B (Bedrock)',
1781
3154
  provider: 'AWS Bedrock',
1782
3155
  available: true,
1783
3156
  maxTokens: 8192,
1784
3157
  contextLength: 8192,
1785
- pricing: { input: 0.59, output: 0.79 },
3158
+ pricing: { input: 2.65, output: 3.5 },
1786
3159
  capabilities: ['text', 'instruction-following'],
1787
3160
  category: 'text',
1788
- isLatest: true,
1789
- notes: 'Meta Llama 3 70B Instruct via AWS Bedrock',
3161
+ isLatest: false,
3162
+ notes: 'Meta Llama 3 Instruct (70B) on AWS Bedrock (US East N. Virginia) - Input: $2.65/1M, Output: $3.5/1M',
1790
3163
  },
1791
3164
  {
1792
3165
  id: 'meta.llama3-8b-instruct-v1:0',
1793
- name: 'Llama 3 8B Instruct (Bedrock)',
3166
+ name: 'Llama 3 Instruct 8B (Bedrock)',
1794
3167
  provider: 'AWS Bedrock',
1795
3168
  available: true,
1796
3169
  maxTokens: 8192,
1797
3170
  contextLength: 8192,
1798
- pricing: { input: 0.05, output: 0.1 },
3171
+ pricing: { input: 0.3, output: 0.6 },
1799
3172
  capabilities: ['text', 'instruction-following', 'fast'],
1800
3173
  category: 'text',
1801
- isLatest: true,
1802
- notes: 'Meta Llama 3 8B Instruct via AWS Bedrock',
3174
+ isLatest: false,
3175
+ notes: 'Meta Llama 3 Instruct (8B) on AWS Bedrock (US East N. Virginia) - Input: $0.3/1M, Output: $0.6/1M',
1803
3176
  },
1804
3177
  {
1805
3178
  id: 'meta.llama3-2-11b-instruct-v1:0',
1806
- name: 'Llama 3.2 11B Instruct (Bedrock)',
3179
+ name: 'Llama 3.2 Instruct 11B (Bedrock)',
1807
3180
  provider: 'AWS Bedrock',
1808
3181
  available: true,
1809
3182
  maxTokens: 128000,
@@ -1811,12 +3184,12 @@ exports.AVAILABLE_MODELS = [
1811
3184
  pricing: { input: 0.16, output: 0.16 },
1812
3185
  capabilities: ['text', 'instruction-following', 'vision'],
1813
3186
  category: 'multimodal',
1814
- isLatest: true,
1815
- notes: 'Meta Llama 3.2 11B Instruct via AWS Bedrock',
3187
+ isLatest: false,
3188
+ notes: 'Meta Llama 3.2 Instruct (11B) on AWS Bedrock (US East Ohio) - Input: $0.16/1M, Output: $0.16/1M. Batch not available',
1816
3189
  },
1817
3190
  {
1818
3191
  id: 'meta.llama3-2-90b-instruct-v1:0',
1819
- name: 'Llama 3.2 90B Instruct (Bedrock)',
3192
+ name: 'Llama 3.2 Instruct 90B (Bedrock)',
1820
3193
  provider: 'AWS Bedrock',
1821
3194
  available: true,
1822
3195
  maxTokens: 128000,
@@ -1824,21 +3197,21 @@ exports.AVAILABLE_MODELS = [
1824
3197
  pricing: { input: 0.72, output: 0.72 },
1825
3198
  capabilities: ['text', 'instruction-following', 'vision'],
1826
3199
  category: 'multimodal',
1827
- isLatest: true,
1828
- notes: 'Meta Llama 3.2 90B Instruct via AWS Bedrock',
3200
+ isLatest: false,
3201
+ notes: 'Meta Llama 3.2 Instruct (90B) on AWS Bedrock (US East Ohio) - Input: $0.72/1M, Output: $0.72/1M. Batch not available',
1829
3202
  },
1830
3203
  {
1831
3204
  id: 'meta.llama4-scout-17b-instruct-v1:0',
1832
- name: 'Llama 4 Scout 17B Instruct (Bedrock)',
3205
+ name: 'Llama 4 Scout 17B (Bedrock)',
1833
3206
  provider: 'AWS Bedrock',
1834
3207
  available: true,
1835
3208
  maxTokens: 128000,
1836
3209
  contextLength: 128000,
1837
3210
  pricing: { input: 0.17, output: 0.66 },
1838
- capabilities: ['text', 'instruction-following', 'vision'],
3211
+ capabilities: ['text', 'instruct', 'vision'],
1839
3212
  category: 'multimodal',
1840
3213
  isLatest: true,
1841
- notes: 'Meta Llama 4 Scout 17B Instruct via AWS Bedrock',
3214
+ notes: 'Meta Llama 4 Scout 17B on AWS Bedrock (US East Ohio) - Input: $0.17/1M, Output: $0.66/1M. Batch: $0.085/$0.33 per 1M tokens',
1842
3215
  },
1843
3216
  {
1844
3217
  id: 'meta.llama4-maverick-17b-instruct-v1:0',
@@ -1851,8 +3224,219 @@ exports.AVAILABLE_MODELS = [
1851
3224
  capabilities: ['text', 'instruction-following', 'vision'],
1852
3225
  category: 'multimodal',
1853
3226
  isLatest: true,
1854
- notes: 'Meta Llama 4 Maverick 17B Instruct via AWS Bedrock',
3227
+ notes: 'Meta Llama 4 Maverick 17B on AWS Bedrock (US East Ohio) - Input: $0.24/1M, Output: $0.97/1M. Batch: $0.12/$0.485 per 1M tokens',
3228
+ },
3229
+ {
3230
+ id: 'meta.llama3-3-70b-instruct-v1:0',
3231
+ name: 'Llama 3.3 Instruct 70B (Bedrock)',
3232
+ provider: 'AWS Bedrock',
3233
+ available: true,
3234
+ maxTokens: 131072,
3235
+ contextLength: 131072,
3236
+ pricing: { input: 0.72, output: 0.72 },
3237
+ capabilities: ['text', 'instruct'],
3238
+ category: 'text',
3239
+ isLatest: true,
3240
+ notes: 'Meta Llama 3.3 Instruct (70B) on AWS Bedrock (US East Ohio) - Input: $0.72/1M, Output: $0.72/1M. Batch: $0.36/$0.36 per 1M tokens',
3241
+ },
3242
+ {
3243
+ id: 'meta.llama3-2-1b-instruct-v1:0',
3244
+ name: 'Llama 3.2 Instruct 1B (Bedrock)',
3245
+ provider: 'AWS Bedrock',
3246
+ available: true,
3247
+ maxTokens: 128000,
3248
+ contextLength: 128000,
3249
+ pricing: { input: 0.1, output: 0.1 },
3250
+ capabilities: ['text', 'instruct'],
3251
+ category: 'text',
3252
+ isLatest: false,
3253
+ notes: 'Meta Llama 3.2 Instruct (1B) on AWS Bedrock (US East Ohio) - Input: $0.1/1M, Output: $0.1/1M. Batch not available',
3254
+ },
3255
+ {
3256
+ id: 'meta.llama3-2-3b-instruct-v1:0',
3257
+ name: 'Llama 3.2 Instruct 3B (Bedrock)',
3258
+ provider: 'AWS Bedrock',
3259
+ available: true,
3260
+ maxTokens: 128000,
3261
+ contextLength: 128000,
3262
+ pricing: { input: 0.15, output: 0.15 },
3263
+ capabilities: ['text', 'instruct'],
3264
+ category: 'text',
3265
+ isLatest: false,
3266
+ notes: 'Meta Llama 3.2 Instruct (3B) on AWS Bedrock (US East Ohio) - Input: $0.15/1M, Output: $0.15/1M. Batch not available',
3267
+ },
3268
+ {
3269
+ id: 'meta.llama3-1-8b-instruct-v1:0',
3270
+ name: 'Llama 3.1 Instruct 8B (Bedrock)',
3271
+ provider: 'AWS Bedrock',
3272
+ available: true,
3273
+ maxTokens: 131072,
3274
+ contextLength: 131072,
3275
+ pricing: { input: 0.22, output: 0.22 },
3276
+ capabilities: ['text', 'instruct'],
3277
+ category: 'text',
3278
+ isLatest: false,
3279
+ notes: 'Meta Llama 3.1 Instruct (8B) on AWS Bedrock (US East Ohio) - Input: $0.22/1M, Output: $0.22/1M. Batch: $0.11/$0.11 per 1M tokens',
3280
+ },
3281
+ {
3282
+ id: 'meta.llama3-1-70b-instruct-v1:0',
3283
+ name: 'Llama 3.1 Instruct 70B (Bedrock)',
3284
+ provider: 'AWS Bedrock',
3285
+ available: true,
3286
+ maxTokens: 131072,
3287
+ contextLength: 131072,
3288
+ pricing: { input: 0.72, output: 0.72 },
3289
+ capabilities: ['text', 'instruct'],
3290
+ category: 'text',
3291
+ isLatest: false,
3292
+ notes: 'Meta Llama 3.1 Instruct (70B) on AWS Bedrock (US East Ohio) - Input: $0.72/1M, Output: $0.72/1M. Batch: $0.36/$0.36 per 1M tokens. Latency optimized: $0.9/$0.9 per 1M tokens',
3293
+ },
3294
+ {
3295
+ id: 'meta.llama3-1-405b-instruct-v1:0',
3296
+ name: 'Llama 3.1 Instruct 405B (Bedrock)',
3297
+ provider: 'AWS Bedrock',
3298
+ available: true,
3299
+ maxTokens: 131072,
3300
+ contextLength: 131072,
3301
+ pricing: { input: 2.4, output: 2.4 },
3302
+ capabilities: ['text', 'instruct'],
3303
+ category: 'text',
3304
+ isLatest: false,
3305
+ notes: 'Meta Llama 3.1 Instruct (405B) on AWS Bedrock (US East Ohio) - Input: $2.4/1M, Output: $2.4/1M. Batch: $1.2/$1.2 per 1M tokens. Latency optimized: $3/$3 per 1M tokens',
3306
+ },
3307
+ {
3308
+ id: 'meta.llama2-13b-chat-v1',
3309
+ name: 'Llama 2 Chat 13B (Bedrock)',
3310
+ provider: 'AWS Bedrock',
3311
+ available: true,
3312
+ maxTokens: 4096,
3313
+ contextLength: 4096,
3314
+ pricing: { input: 0.75, output: 1.0 },
3315
+ capabilities: ['text', 'chat'],
3316
+ category: 'text',
3317
+ isLatest: false,
3318
+ notes: 'Meta Llama 2 Chat (13B) on AWS Bedrock (US East N. Virginia, US West Oregon) - Input: $0.75/1M, Output: $1.0/1M',
3319
+ },
3320
+ {
3321
+ id: 'meta.llama2-70b-chat-v1',
3322
+ name: 'Llama 2 Chat 70B (Bedrock)',
3323
+ provider: 'AWS Bedrock',
3324
+ available: true,
3325
+ maxTokens: 4096,
3326
+ contextLength: 4096,
3327
+ pricing: { input: 1.95, output: 2.56 },
3328
+ capabilities: ['text', 'chat'],
3329
+ category: 'text',
3330
+ isLatest: false,
3331
+ notes: 'Meta Llama 2 Chat (70B) on AWS Bedrock (US East N. Virginia, US West Oregon) - Input: $1.95/1M, Output: $2.56/1M',
3332
+ },
3333
+ // === Mistral AI Models on AWS Bedrock ===
3334
+ // Latest Models
3335
+ {
3336
+ id: 'mistral.pixtral-large-2502-v1:0',
3337
+ name: 'Pixtral Large (25.02) (Bedrock)',
3338
+ provider: 'AWS Bedrock',
3339
+ available: true,
3340
+ maxTokens: 128000,
3341
+ contextLength: 128000,
3342
+ pricing: { input: 2.0, output: 6.0 },
3343
+ capabilities: ['vision', 'multimodal', 'reasoning'],
3344
+ category: 'multimodal',
3345
+ isLatest: true,
3346
+ notes: 'Mistral Pixtral Large (25.02) on AWS Bedrock (US East Ohio) - Input: $2/1M, Output: $6/1M. Batch not available',
3347
+ },
3348
+ {
3349
+ id: 'mistral.magistral-small-1-2-v1:0',
3350
+ name: 'Magistral Small 1.2 (Bedrock)',
3351
+ provider: 'AWS Bedrock',
3352
+ available: true,
3353
+ maxTokens: 40000,
3354
+ contextLength: 40000,
3355
+ pricing: { input: 0.5, output: 1.5 },
3356
+ capabilities: ['text'],
3357
+ category: 'text',
3358
+ isLatest: true,
3359
+ notes: 'Mistral Magistral Small 1.2 on AWS Bedrock (US East N. Virginia, US East Ohio, US West Oregon) - Standard: $0.50/$1.50 per 1M tokens. Asia Pacific (Mumbai), Europe (Ireland), Europe (Milan): $0.59/$1.76. South America (Sao Paulo), Asia Pacific (Tokyo): $0.61/$1.82. Europe (London): $0.78/$2.33. Priority tier: 75% premium. Flex tier: 50% discount',
3360
+ },
3361
+ {
3362
+ id: 'mistral.voxtral-mini-1-0-v1:0',
3363
+ name: 'Voxtral Mini 1.0 (Bedrock)',
3364
+ provider: 'AWS Bedrock',
3365
+ available: true,
3366
+ maxTokens: 0,
3367
+ contextLength: 0,
3368
+ pricing: { input: 0.04, output: 0.04 },
3369
+ capabilities: ['text', 'audio'],
3370
+ category: 'multimodal',
3371
+ isLatest: true,
3372
+ notes: 'Mistral Voxtral Mini 1.0 on AWS Bedrock (US East N. Virginia, US East Ohio, US West Oregon) - Standard: $0.04/$0.04 per 1M tokens. Asia Pacific (Mumbai), Europe (Ireland), Europe (Milan): $0.05/$0.05. South America (Sao Paulo), Asia Pacific (Tokyo): $0.05/$0.05. Europe (London): $0.06/$0.06. Priority tier: 75% premium. Flex tier: 50% discount',
3373
+ },
3374
+ {
3375
+ id: 'mistral.voxtral-small-1-0-v1:0',
3376
+ name: 'Voxtral Small 1.0 (Bedrock)',
3377
+ provider: 'AWS Bedrock',
3378
+ available: true,
3379
+ maxTokens: 0,
3380
+ contextLength: 0,
3381
+ pricing: { input: 0.1, output: 0.3 },
3382
+ capabilities: ['text', 'audio'],
3383
+ category: 'multimodal',
3384
+ isLatest: true,
3385
+ notes: 'Mistral Voxtral Small 1.0 on AWS Bedrock (US East N. Virginia, US East Ohio, US West Oregon) - Standard: $0.10/$0.30 per 1M tokens. Asia Pacific (Mumbai), Europe (Ireland), Europe (Milan): $0.12/$0.35. South America (Sao Paulo), Asia Pacific (Tokyo): $0.12/$0.36. Europe (London): $0.16/$0.47. Priority tier: 75% premium. Flex tier: 50% discount',
3386
+ },
3387
+ {
3388
+ id: 'mistral.ministral-3b-3-0-v1:0',
3389
+ name: 'Ministral 3B 3.0 (Bedrock)',
3390
+ provider: 'AWS Bedrock',
3391
+ available: true,
3392
+ maxTokens: 128000,
3393
+ contextLength: 128000,
3394
+ pricing: { input: 0.1, output: 0.1 },
3395
+ capabilities: ['text'],
3396
+ category: 'text',
3397
+ isLatest: true,
3398
+ notes: 'Mistral Ministral 3B 3.0 on AWS Bedrock (US East N. Virginia, US East Ohio, US West Oregon) - Standard: $0.10/$0.10 per 1M tokens. Asia Pacific (Mumbai), Europe (Ireland), Europe (Milan): $0.12/$0.12. South America (Sao Paulo), Asia Pacific (Tokyo): $0.12/$0.12. Europe (London): $0.16/$0.16. Priority tier: 75% premium. Flex tier: 50% discount',
3399
+ },
3400
+ {
3401
+ id: 'mistral.ministral-8b-3-0-v1:0',
3402
+ name: 'Ministral 8B 3.0 (Bedrock)',
3403
+ provider: 'AWS Bedrock',
3404
+ available: true,
3405
+ maxTokens: 128000,
3406
+ contextLength: 128000,
3407
+ pricing: { input: 0.15, output: 0.15 },
3408
+ capabilities: ['text'],
3409
+ category: 'text',
3410
+ isLatest: true,
3411
+ notes: 'Mistral Ministral 8B 3.0 on AWS Bedrock (US East N. Virginia, US East Ohio, US West Oregon) - Standard: $0.15/$0.15 per 1M tokens. Asia Pacific (Mumbai), Europe (Ireland), Europe (Milan): $0.18/$0.18. South America (Sao Paulo), Asia Pacific (Tokyo): $0.18/$0.18. Europe (London): $0.23/$0.23. Priority tier: 75% premium. Flex tier: 50% discount',
3412
+ },
3413
+ {
3414
+ id: 'mistral.ministral-14b-3-0-v1:0',
3415
+ name: 'Ministral 14B 3.0 (Bedrock)',
3416
+ provider: 'AWS Bedrock',
3417
+ available: true,
3418
+ maxTokens: 128000,
3419
+ contextLength: 128000,
3420
+ pricing: { input: 0.2, output: 0.2 },
3421
+ capabilities: ['text'],
3422
+ category: 'text',
3423
+ isLatest: true,
3424
+ notes: 'Mistral Ministral 14B 3.0 on AWS Bedrock (US East N. Virginia, US East Ohio, US West Oregon) - Standard: $0.20/$0.20 per 1M tokens. Asia Pacific (Mumbai), Europe (Ireland), Europe (Milan): $0.24/$0.24. South America (Sao Paulo), Asia Pacific (Tokyo): $0.24/$0.24. Europe (London): $0.31/$0.31. Priority tier: 75% premium. Flex tier: 50% discount',
3425
+ },
3426
+ {
3427
+ id: 'mistral.mistral-large-3-v1:0',
3428
+ name: 'Mistral Large 3 (Bedrock)',
3429
+ provider: 'AWS Bedrock',
3430
+ available: true,
3431
+ maxTokens: 128000,
3432
+ contextLength: 128000,
3433
+ pricing: { input: 0.5, output: 1.5 },
3434
+ capabilities: ['text'],
3435
+ category: 'text',
3436
+ isLatest: true,
3437
+ notes: 'Mistral Large 3 on AWS Bedrock (US East N. Virginia, US East Ohio, US West Oregon) - Standard: $0.50/$1.50 per 1M tokens. Asia Pacific (Mumbai), Europe (Ireland), Europe (Milan): $0.59/$1.76. South America (Sao Paulo), Asia Pacific (Tokyo): $0.61/$1.82. Europe (London): $0.78/$2.33. Priority tier: 75% premium. Flex tier: 50% discount',
1855
3438
  },
3439
+ // Legacy Models
1856
3440
  {
1857
3441
  id: 'mistral.mistral-7b-instruct-v0:2',
1858
3442
  name: 'Mistral 7B Instruct (Bedrock)',
@@ -1860,11 +3444,11 @@ exports.AVAILABLE_MODELS = [
1860
3444
  available: true,
1861
3445
  maxTokens: 32768,
1862
3446
  contextLength: 32768,
1863
- pricing: { input: 0.15, output: 0.45 },
1864
- capabilities: ['text', 'instruction-following', 'fast'],
3447
+ pricing: { input: 0.14, output: 0.42 },
3448
+ capabilities: ['text', 'instruct'],
1865
3449
  category: 'text',
1866
- isLatest: true,
1867
- notes: 'Mistral 7B Instruct via AWS Bedrock',
3450
+ isLatest: false,
3451
+ notes: 'Mistral 7B Instruct on AWS Bedrock (legacy)',
1868
3452
  },
1869
3453
  {
1870
3454
  id: 'mistral.mixtral-8x7b-instruct-v0:1',
@@ -1873,11 +3457,11 @@ exports.AVAILABLE_MODELS = [
1873
3457
  available: true,
1874
3458
  maxTokens: 32768,
1875
3459
  contextLength: 32768,
1876
- pricing: { input: 0.24, output: 0.72 },
1877
- capabilities: ['text', 'instruction-following', 'mixture-of-experts'],
3460
+ pricing: { input: 0.14, output: 0.42 },
3461
+ capabilities: ['text', 'instruct'],
1878
3462
  category: 'text',
1879
- isLatest: true,
1880
- notes: 'Mistral Mixtral 8x7B Instruct via AWS Bedrock',
3463
+ isLatest: false,
3464
+ notes: 'Mistral Mixtral 8x7B Instruct on AWS Bedrock (legacy)',
1881
3465
  },
1882
3466
  {
1883
3467
  id: 'mistral.mistral-large-2402-v1:0',
@@ -1887,10 +3471,10 @@ exports.AVAILABLE_MODELS = [
1887
3471
  maxTokens: 32768,
1888
3472
  contextLength: 32768,
1889
3473
  pricing: { input: 6.5, output: 25.0 },
1890
- capabilities: ['text', 'instruction-following'],
3474
+ capabilities: ['text', 'instruct'],
1891
3475
  category: 'text',
1892
- isLatest: true,
1893
- notes: 'Mistral Large via AWS Bedrock',
3476
+ isLatest: false,
3477
+ notes: 'Mistral Large (24.02) on AWS Bedrock (legacy)',
1894
3478
  },
1895
3479
  {
1896
3480
  id: 'mistral.mistral-small-2402-v1:0',
@@ -1899,24 +3483,11 @@ exports.AVAILABLE_MODELS = [
1899
3483
  available: true,
1900
3484
  maxTokens: 32768,
1901
3485
  contextLength: 32768,
1902
- pricing: { input: 2.0, output: 6.0 },
1903
- capabilities: ['text', 'instruction-following'],
1904
- category: 'text',
1905
- isLatest: true,
1906
- notes: 'Mistral Small via AWS Bedrock',
1907
- },
1908
- {
1909
- id: 'mistral.pixtral-large-2502-v1:0',
1910
- name: 'Pixtral Large (25.02) (Bedrock)',
1911
- provider: 'AWS Bedrock',
1912
- available: true,
1913
- maxTokens: 128000,
1914
- contextLength: 128000,
1915
- pricing: { input: 2.0, output: 6.0 },
1916
- capabilities: ['vision', 'multimodal', 'reasoning'],
1917
- category: 'multimodal',
1918
- isLatest: true,
1919
- notes: 'Mistral Pixtral Large via AWS Bedrock',
3486
+ pricing: { input: 2.0, output: 6.0 },
3487
+ capabilities: ['text', 'instruct'],
3488
+ category: 'text',
3489
+ isLatest: false,
3490
+ notes: 'Mistral Small (24.02) on AWS Bedrock (legacy)',
1920
3491
  },
1921
3492
  {
1922
3493
  id: 'cohere.command-r-plus-v1:0',
@@ -1929,7 +3500,7 @@ exports.AVAILABLE_MODELS = [
1929
3500
  capabilities: ['text', 'multilingual', 'enterprise'],
1930
3501
  category: 'text',
1931
3502
  isLatest: true,
1932
- notes: 'Cohere Command R+ via AWS Bedrock - updated pricing',
3503
+ notes: 'Cohere Command R+ via AWS Bedrock - advanced AI model for text generation and chat',
1933
3504
  },
1934
3505
  {
1935
3506
  id: 'cohere.command-r-v1:0',
@@ -1942,7 +3513,7 @@ exports.AVAILABLE_MODELS = [
1942
3513
  capabilities: ['text', 'multilingual', 'rag', 'tools'],
1943
3514
  category: 'text',
1944
3515
  isLatest: true,
1945
- notes: 'Cohere Command R via AWS Bedrock - updated pricing',
3516
+ notes: 'Cohere Command R via AWS Bedrock - advanced AI model for text generation and chat',
1946
3517
  },
1947
3518
  {
1948
3519
  id: 'cohere.embed-english-v3',
@@ -1951,11 +3522,11 @@ exports.AVAILABLE_MODELS = [
1951
3522
  available: true,
1952
3523
  maxTokens: 512,
1953
3524
  contextLength: 512,
1954
- pricing: { input: 0.1, output: 0.1 },
3525
+ pricing: { input: 0.1, output: 0.0 },
1955
3526
  capabilities: ['embedding'],
1956
3527
  category: 'embedding',
1957
3528
  isLatest: true,
1958
- notes: 'Cohere Embed English v3 via AWS Bedrock',
3529
+ notes: 'Cohere Embed English v3 via AWS Bedrock - advanced AI model for text generation and chat',
1959
3530
  },
1960
3531
  {
1961
3532
  id: 'cohere.embed-multilingual-v3',
@@ -1964,13 +3535,39 @@ exports.AVAILABLE_MODELS = [
1964
3535
  available: true,
1965
3536
  maxTokens: 512,
1966
3537
  contextLength: 512,
1967
- pricing: { input: 0.1, output: 0.1 },
3538
+ pricing: { input: 0.1, output: 0.0 },
1968
3539
  capabilities: ['embedding', 'multilingual'],
1969
3540
  category: 'embedding',
1970
3541
  isLatest: true,
1971
- notes: 'Cohere Embed Multilingual v3 via AWS Bedrock',
3542
+ notes: 'Cohere Embed Multilingual v3 via AWS Bedrock - advanced AI model for text generation and chat',
3543
+ },
3544
+ {
3545
+ id: 'cohere.embed-4-v1:0',
3546
+ name: 'Embed 4 (Bedrock)',
3547
+ provider: 'AWS Bedrock',
3548
+ available: true,
3549
+ maxTokens: 0,
3550
+ contextLength: 0,
3551
+ pricing: { input: 0.12, output: 0.0 },
3552
+ capabilities: ['embedding'],
3553
+ category: 'embedding',
3554
+ isLatest: true,
3555
+ notes: 'Cohere Embed 4 on AWS Bedrock - $0.12 per 1M input tokens. Latest embeddings model (no output tokens)',
3556
+ },
3557
+ {
3558
+ id: 'cohere.rerank-3-5-v1:0',
3559
+ name: 'Rerank 3.5 (Bedrock)',
3560
+ provider: 'AWS Bedrock',
3561
+ available: true,
3562
+ maxTokens: 0,
3563
+ contextLength: 0,
3564
+ pricing: { input: 0.002, output: 0.0 },
3565
+ capabilities: ['rerank', 'semantic-search', 'retrieval'],
3566
+ category: 'retrieval',
3567
+ isLatest: true,
3568
+ notes: "Cohere Rerank 3.5 on AWS Bedrock - $2.00 per 1,000 queries ($0.002 per query). A query can contain up to 100 document chunks. If a query contains more than 100 document chunks, it is counted as multiple queries (e.g., 350 documents = 4 queries). Each document can only contain up to 500 tokens (inclusive of the query and document's total tokens), and if the token length is higher than 512 tokens, it is broken down into multiple documents.",
1972
3569
  },
1973
- // Latest Cohere Models
3570
+ // === Cohere Models ===
1974
3571
  {
1975
3572
  id: 'command-a-03-2025',
1976
3573
  name: 'Command A',
@@ -1979,10 +3576,10 @@ exports.AVAILABLE_MODELS = [
1979
3576
  maxTokens: 256000,
1980
3577
  contextLength: 256000,
1981
3578
  pricing: { input: 2.5, output: 10.0 },
1982
- capabilities: ['text', 'agentic', 'multilingual', 'human-evaluations'],
3579
+ capabilities: ['text', 'reasoning', 'enterprise', 'agents', 'multilingual'],
1983
3580
  category: 'text',
1984
3581
  isLatest: true,
1985
- notes: 'Most performant model to date, excelling at tool use, agents, RAG, and multilingual use cases',
3582
+ notes: 'Advanced generative model for advanced agents and multilingual tasks. Most performant model to date, excelling at tool use, agents, RAG, and multilingual use cases. Priced at $2.50 per 1M input tokens and $10.00 per 1M output tokens.',
1986
3583
  },
1987
3584
  {
1988
3585
  id: 'command-r7b-12-2024',
@@ -1992,10 +3589,17 @@ exports.AVAILABLE_MODELS = [
1992
3589
  maxTokens: 128000,
1993
3590
  contextLength: 128000,
1994
3591
  pricing: { input: 0.0375, output: 0.15 },
1995
- capabilities: ['text', 'rag', 'tool-use', 'agents'],
3592
+ capabilities: [
3593
+ 'text',
3594
+ 'edge',
3595
+ 'commodity-gpu',
3596
+ 'rag',
3597
+ 'tools',
3598
+ 'reasoning',
3599
+ ],
1996
3600
  category: 'text',
1997
3601
  isLatest: true,
1998
- notes: 'Small, fast update delivered in December 2024, excels at RAG, tool use, and complex reasoning',
3602
+ notes: 'Cost-effective generative model for simpler tasks. Small, fast update delivered in December 2024, excels at RAG, tool use, and complex reasoning. Priced at $0.0375 per 1M input tokens and $0.15 per 1M output tokens.',
1999
3603
  },
2000
3604
  {
2001
3605
  id: 'command-a-reasoning-08-2025',
@@ -2005,10 +3609,16 @@ exports.AVAILABLE_MODELS = [
2005
3609
  maxTokens: 256000,
2006
3610
  contextLength: 256000,
2007
3611
  pricing: { input: 2.5, output: 10.0 },
2008
- capabilities: ['text', 'reasoning', 'agentic', 'multilingual'],
3612
+ capabilities: [
3613
+ 'text',
3614
+ 'reasoning',
3615
+ 'agentic',
3616
+ 'multilingual',
3617
+ 'enterprise',
3618
+ ],
2009
3619
  category: 'text',
2010
3620
  isLatest: true,
2011
- notes: 'First reasoning model, able to think before generating output for nuanced problem-solving and agent-based tasks in 23 languages',
3621
+ notes: 'Advanced generative model for reasoning tasks. First reasoning model, able to think before generating output for nuanced problem-solving and agent-based tasks in 23 languages. Priced at $2.50 per 1M input tokens and $10.00 per 1M output tokens.',
2012
3622
  },
2013
3623
  {
2014
3624
  id: 'command-a-vision-07-2025',
@@ -2021,7 +3631,7 @@ exports.AVAILABLE_MODELS = [
2021
3631
  capabilities: ['text', 'vision', 'multimodal', 'enterprise'],
2022
3632
  category: 'multimodal',
2023
3633
  isLatest: true,
2024
- notes: 'First model capable of processing images, excelling in enterprise use cases like charts, graphs, diagrams, table understanding, OCR, and object detection',
3634
+ notes: 'Advanced generative model for vision tasks. First model capable of processing images, excelling in enterprise use cases like charts, graphs, diagrams, table understanding, OCR, and object detection. Priced at $2.50 per 1M input tokens and $10.00 per 1M output tokens.',
2025
3635
  },
2026
3636
  {
2027
3637
  id: 'command-r-plus-04-2024',
@@ -2031,23 +3641,30 @@ exports.AVAILABLE_MODELS = [
2031
3641
  maxTokens: 128000,
2032
3642
  contextLength: 128000,
2033
3643
  pricing: { input: 2.5, output: 10.0 },
2034
- capabilities: ['text', 'enterprise', 'rag', 'tools', 'multilingual'],
3644
+ capabilities: ['text', 'reasoning', 'rag', 'tools', 'enterprise'],
2035
3645
  category: 'text',
2036
3646
  isLatest: true,
2037
- notes: 'Instruction-following conversational model for complex RAG workflows and multi-step tool use',
3647
+ notes: 'High-performance generative model for complex tasks. Instruction-following conversational model for complex RAG workflows and multi-step tool use. Priced at $2.50 per 1M input tokens and $10.00 per 1M output tokens.',
2038
3648
  },
2039
3649
  {
2040
3650
  id: 'command-r-08-2024',
2041
- name: 'Command R (08-2024)',
3651
+ name: 'Command R',
2042
3652
  provider: 'Cohere',
2043
3653
  available: true,
2044
3654
  maxTokens: 128000,
2045
3655
  contextLength: 128000,
2046
3656
  pricing: { input: 0.15, output: 0.6 },
2047
- capabilities: ['text', 'rag', 'tools', 'agents'],
3657
+ capabilities: [
3658
+ 'text',
3659
+ 'efficiency',
3660
+ 'performance',
3661
+ 'rag',
3662
+ 'tools',
3663
+ 'agents',
3664
+ ],
2048
3665
  category: 'text',
2049
3666
  isLatest: true,
2050
- notes: 'Update of Command R model delivered in August 2024',
3667
+ notes: 'Balanced generative model for Retrieval-Augmented Generation (RAG) and tool use. Update of Command R model delivered in August 2024. Priced at $0.15 per 1M input tokens and $0.60 per 1M output tokens.',
2051
3668
  },
2052
3669
  {
2053
3670
  id: 'command-r-03-2024',
@@ -2095,8 +3712,8 @@ exports.AVAILABLE_MODELS = [
2095
3712
  available: true,
2096
3713
  maxTokens: 4096,
2097
3714
  contextLength: 4096,
2098
- pricing: { input: 0.15, output: 0.6 },
2099
- capabilities: ['text', 'lightweight', 'fast'],
3715
+ pricing: { input: 0.3, output: 0.6 },
3716
+ capabilities: ['text', 'fast'],
2100
3717
  category: 'text',
2101
3718
  isLatest: false,
2102
3719
  notes: 'Smaller, faster version of command, almost as capable but much faster',
@@ -2122,10 +3739,10 @@ exports.AVAILABLE_MODELS = [
2122
3739
  maxTokens: 4096,
2123
3740
  contextLength: 4096,
2124
3741
  pricing: { input: 2.0, output: 2.0 },
2125
- capabilities: ['rerank', 'semantic-search', 'retrieval'],
3742
+ capabilities: ['search', 'reranking', 'multilingual', 'semantic-search'],
2126
3743
  category: 'rerank',
2127
3744
  isLatest: true,
2128
- notes: 'Provides powerful semantic boost to search quality of any keyword or vector search system, $2.00 per 1K searches',
3745
+ notes: 'Rerank model used to improve search result relevance. Provides powerful semantic boost to search quality of any keyword or vector search system. Priced at $2.00 per 1,000 searches (usage-based pricing).',
2129
3746
  },
2130
3747
  {
2131
3748
  id: 'rerank-english-v3.0',
@@ -2135,10 +3752,10 @@ exports.AVAILABLE_MODELS = [
2135
3752
  maxTokens: 4096,
2136
3753
  contextLength: 4096,
2137
3754
  pricing: { input: 2.0, output: 2.0 },
2138
- capabilities: ['rerank', 'semantic-search', 'english'],
3755
+ capabilities: ['rerank', 'semantic-search', 'english', 'search'],
2139
3756
  category: 'rerank',
2140
3757
  isLatest: true,
2141
- notes: 'English language document and semi-structured data reranking model',
3758
+ notes: 'Rerank model used to improve search result relevance. English language document and semi-structured data reranking model. Priced at $2.00 per 1,000 searches (usage-based pricing).',
2142
3759
  },
2143
3760
  {
2144
3761
  id: 'rerank-multilingual-v3.0',
@@ -2148,10 +3765,10 @@ exports.AVAILABLE_MODELS = [
2148
3765
  maxTokens: 4096,
2149
3766
  contextLength: 4096,
2150
3767
  pricing: { input: 2.0, output: 2.0 },
2151
- capabilities: ['rerank', 'semantic-search', 'multilingual'],
3768
+ capabilities: ['rerank', 'semantic-search', 'multilingual', 'search'],
2152
3769
  category: 'rerank',
2153
3770
  isLatest: true,
2154
- notes: 'Multilingual document and semi-structured data reranking model',
3771
+ notes: 'Rerank model used to improve search result relevance. Multilingual document and semi-structured data reranking model. Priced at $2.00 per 1,000 searches (usage-based pricing).',
2155
3772
  },
2156
3773
  {
2157
3774
  id: 'embed-v4.0',
@@ -2160,11 +3777,17 @@ exports.AVAILABLE_MODELS = [
2160
3777
  available: true,
2161
3778
  maxTokens: 128000,
2162
3779
  contextLength: 128000,
2163
- pricing: { input: 0.12, output: 0.12 },
2164
- capabilities: ['embedding', 'multimodal', 'semantic-search', 'rag'],
3780
+ pricing: { input: 0.12, output: 0.0 },
3781
+ capabilities: [
3782
+ 'embedding',
3783
+ 'multimodal',
3784
+ 'multilingual',
3785
+ 'semantic-search',
3786
+ 'rag',
3787
+ ],
2165
3788
  category: 'embedding',
2166
3789
  isLatest: true,
2167
- notes: 'Leading multimodal embedding model for text and images, acts as intelligent retrieval engine for semantic search and RAG systems',
3790
+ notes: 'Embed model for converting text/images to vectors for semantic search. Leading multimodal embedding model for text and images, acts as intelligent retrieval engine for semantic search and RAG systems. Priced at $0.12 per 1M tokens processed.',
2168
3791
  },
2169
3792
  {
2170
3793
  id: 'embed-english-v3.0',
@@ -2173,11 +3796,11 @@ exports.AVAILABLE_MODELS = [
2173
3796
  available: true,
2174
3797
  maxTokens: 512,
2175
3798
  contextLength: 512,
2176
- pricing: { input: 0.1, output: 0.1 },
2177
- capabilities: ['embedding', 'english'],
3799
+ pricing: { input: 0.1, output: 0.0 },
3800
+ capabilities: ['embedding', 'english', 'semantic-search', 'rag'],
2178
3801
  category: 'embedding',
2179
3802
  isLatest: true,
2180
- notes: 'English-only embedding model for text classification and embeddings',
3803
+ notes: 'Embed model for converting text to vectors for semantic search. English-only embedding model for text classification and embeddings. Priced at $0.10 per 1M tokens processed (usage-based pricing).',
2181
3804
  },
2182
3805
  {
2183
3806
  id: 'embed-english-light-v3.0',
@@ -2186,11 +3809,17 @@ exports.AVAILABLE_MODELS = [
2186
3809
  available: true,
2187
3810
  maxTokens: 512,
2188
3811
  contextLength: 512,
2189
- pricing: { input: 0.1, output: 0.1 },
2190
- capabilities: ['embedding', 'english', 'lightweight'],
3812
+ pricing: { input: 0.1, output: 0.0 },
3813
+ capabilities: [
3814
+ 'embedding',
3815
+ 'english',
3816
+ 'lightweight',
3817
+ 'semantic-search',
3818
+ 'rag',
3819
+ ],
2191
3820
  category: 'embedding',
2192
3821
  isLatest: true,
2193
- notes: 'Smaller, faster version of embed-english-v3.0, almost as capable but much faster',
3822
+ notes: 'Embed model for converting text to vectors for semantic search. Smaller, faster version of embed-english-v3.0, almost as capable but much faster. Priced at $0.10 per 1M tokens processed (usage-based pricing).',
2194
3823
  },
2195
3824
  {
2196
3825
  id: 'embed-multilingual-v3.0',
@@ -2199,11 +3828,11 @@ exports.AVAILABLE_MODELS = [
2199
3828
  available: true,
2200
3829
  maxTokens: 512,
2201
3830
  contextLength: 512,
2202
- pricing: { input: 0.1, output: 0.1 },
2203
- capabilities: ['embedding', 'multilingual'],
3831
+ pricing: { input: 0.1, output: 0.0 },
3832
+ capabilities: ['embedding', 'multilingual', 'semantic-search', 'rag'],
2204
3833
  category: 'embedding',
2205
3834
  isLatest: true,
2206
- notes: 'Multilingual embedding model for classification and embeddings in multiple languages',
3835
+ notes: 'Embed model for converting text to vectors for semantic search. Multilingual embedding model for classification and embeddings in multiple languages. Priced at $0.10 per 1M tokens processed (usage-based pricing).',
2207
3836
  },
2208
3837
  {
2209
3838
  id: 'embed-multilingual-light-v3.0',
@@ -2212,11 +3841,17 @@ exports.AVAILABLE_MODELS = [
2212
3841
  available: true,
2213
3842
  maxTokens: 512,
2214
3843
  contextLength: 512,
2215
- pricing: { input: 0.1, output: 0.1 },
2216
- capabilities: ['embedding', 'multilingual', 'lightweight'],
3844
+ pricing: { input: 0.1, output: 0.0 },
3845
+ capabilities: [
3846
+ 'embedding',
3847
+ 'multilingual',
3848
+ 'lightweight',
3849
+ 'semantic-search',
3850
+ 'rag',
3851
+ ],
2217
3852
  category: 'embedding',
2218
3853
  isLatest: true,
2219
- notes: 'Smaller, faster version of embed-multilingual-v3.0, almost as capable but much faster',
3854
+ notes: 'Embed model for converting text to vectors for semantic search. Smaller, faster version of embed-multilingual-v3.0, almost as capable but much faster. Priced at $0.10 per 1M tokens processed (usage-based pricing).',
2220
3855
  },
2221
3856
  {
2222
3857
  id: 'c4ai-aya-expanse-8b',
@@ -2398,14 +4033,7 @@ exports.AVAILABLE_MODELS = [
2398
4033
  maxTokens: 40000,
2399
4034
  contextLength: 40000,
2400
4035
  pricing: { input: 2.0, output: 5.0 },
2401
- capabilities: [
2402
- 'text',
2403
- 'reasoning',
2404
- 'thinking',
2405
- 'domain-specific',
2406
- 'multilingual',
2407
- 'multimodal',
2408
- ],
4036
+ capabilities: ['text', 'reasoning'],
2409
4037
  category: 'reasoning',
2410
4038
  isLatest: true,
2411
4039
  notes: 'Our frontier-class multimodal reasoning model released September 2025 (v25.09).',
@@ -2436,7 +4064,7 @@ exports.AVAILABLE_MODELS = [
2436
4064
  available: true,
2437
4065
  maxTokens: 256000,
2438
4066
  contextLength: 256000,
2439
- pricing: { input: 0.3, output: 0.9 },
4067
+ pricing: { input: 0.2, output: 0.6 },
2440
4068
  capabilities: [
2441
4069
  'code',
2442
4070
  'programming',
@@ -2447,7 +4075,7 @@ exports.AVAILABLE_MODELS = [
2447
4075
  ],
2448
4076
  category: 'code',
2449
4077
  isLatest: true,
2450
- notes: 'Our cutting-edge language model for coding released end of July 2025, specializes in low-latency, high-frequency tasks.',
4078
+ notes: 'Coding-focused model. Priced at $0.20 per 1M input tokens and $0.60 per 1M output tokens. Our cutting-edge language model for coding released end of July 2025, specializes in low-latency, high-frequency tasks. Available on La Plateforme API with up to 256K context window. Fine-tuning available.',
2451
4079
  },
2452
4080
  {
2453
4081
  id: 'codestral-latest',
@@ -2456,7 +4084,7 @@ exports.AVAILABLE_MODELS = [
2456
4084
  available: true,
2457
4085
  maxTokens: 256000,
2458
4086
  contextLength: 256000,
2459
- pricing: { input: 0.3, output: 0.9 },
4087
+ pricing: { input: 0.2, output: 0.6 },
2460
4088
  capabilities: [
2461
4089
  'code',
2462
4090
  'programming',
@@ -2467,7 +4095,7 @@ exports.AVAILABLE_MODELS = [
2467
4095
  ],
2468
4096
  category: 'code',
2469
4097
  isLatest: true,
2470
- notes: 'Our cutting-edge language model for coding released end of July 2025, specializes in low-latency, high-frequency tasks.',
4098
+ notes: 'Coding-focused model. Priced at $0.20 per 1M input tokens and $0.60 per 1M output tokens. Our cutting-edge language model for coding released end of July 2025, specializes in low-latency, high-frequency tasks. Available on La Plateforme API with up to 256K context window. Fine-tuning available.',
2471
4099
  },
2472
4100
  {
2473
4101
  id: 'voxtral-mini-2507',
@@ -2497,7 +4125,7 @@ exports.AVAILABLE_MODELS = [
2497
4125
  },
2498
4126
  {
2499
4127
  id: 'devstral-medium-2507',
2500
- name: 'Devstral Medium',
4128
+ name: 'Devstral Medium 1.0',
2501
4129
  provider: 'Mistral AI',
2502
4130
  available: true,
2503
4131
  maxTokens: 128000,
@@ -2509,14 +4137,15 @@ exports.AVAILABLE_MODELS = [
2509
4137
  'advanced-coding',
2510
4138
  'codebase-exploration',
2511
4139
  'multi-file-editing',
4140
+ 'swe',
2512
4141
  ],
2513
4142
  category: 'code',
2514
4143
  isLatest: true,
2515
- notes: 'An enterprise grade text model that excels at using tools to explore codebases, editing multiple files and power software engineering agents.',
4144
+ notes: 'An enterprise grade text model that excels at SWE use cases, released July 2025 (v25.07).',
2516
4145
  },
2517
4146
  {
2518
4147
  id: 'devstral-medium-latest',
2519
- name: 'Devstral Medium',
4148
+ name: 'Devstral Medium 1.0',
2520
4149
  provider: 'Mistral AI',
2521
4150
  available: true,
2522
4151
  maxTokens: 128000,
@@ -2528,10 +4157,11 @@ exports.AVAILABLE_MODELS = [
2528
4157
  'advanced-coding',
2529
4158
  'codebase-exploration',
2530
4159
  'multi-file-editing',
4160
+ 'swe',
2531
4161
  ],
2532
4162
  category: 'code',
2533
4163
  isLatest: true,
2534
- notes: 'An enterprise grade text model that excels at using tools to explore codebases, editing multiple files and power software engineering agents.',
4164
+ notes: 'An enterprise grade text model that excels at SWE use cases, released July 2025 (v25.07).',
2535
4165
  },
2536
4166
  {
2537
4167
  id: 'mistral-ocr-2505',
@@ -2549,7 +4179,7 @@ exports.AVAILABLE_MODELS = [
2549
4179
  ],
2550
4180
  category: 'document',
2551
4181
  isLatest: true,
2552
- notes: 'Our OCR service powering our Document AI stack that enables our users to extract interleaved text and images.',
4182
+ notes: 'OCR service powering Document AI stack for extracting interleaved text and images. Priced at $2.00 per 1,000 pages (50% discount with Batch-API). Available on La Plateforme API.',
2553
4183
  },
2554
4184
  {
2555
4185
  id: 'mistral-ocr-latest',
@@ -2559,15 +4189,10 @@ exports.AVAILABLE_MODELS = [
2559
4189
  maxTokens: 0,
2560
4190
  contextLength: 0,
2561
4191
  pricing: { input: 1.0, output: 3.0 },
2562
- capabilities: [
2563
- 'ocr',
2564
- 'document-understanding',
2565
- 'annotations',
2566
- 'text-extraction',
2567
- ],
4192
+ capabilities: ['ocr', 'multimodal', 'text'],
2568
4193
  category: 'document',
2569
4194
  isLatest: true,
2570
- notes: 'Our OCR service powering our Document AI stack that enables our users to extract interleaved text and images.',
4195
+ notes: 'OCR service powering Document AI stack for extracting interleaved text and images. Priced at $2.00 per 1,000 pages (50% discount with Batch-API). Available on La Plateforme API.',
2571
4196
  },
2572
4197
  {
2573
4198
  id: 'mistral-large-2411',
@@ -2577,10 +4202,16 @@ exports.AVAILABLE_MODELS = [
2577
4202
  maxTokens: 128000,
2578
4203
  contextLength: 128000,
2579
4204
  pricing: { input: 2.0, output: 6.0 },
2580
- capabilities: ['text', 'reasoning', 'complex-tasks', 'high-complexity'],
4205
+ capabilities: [
4206
+ 'text',
4207
+ 'reasoning',
4208
+ 'complex-tasks',
4209
+ 'high-complexity',
4210
+ 'function-calling',
4211
+ ],
2581
4212
  category: 'text',
2582
4213
  isLatest: true,
2583
- notes: 'Our top-tier large model for high-complexity tasks with the latest version released November 2024.',
4214
+ notes: 'Top-tier large model for high-complexity tasks. Priced at $2.00 per 1M input tokens and $6.00 per 1M output tokens. Latest version released November 2024 (v24.11). Available on La Plateforme API with up to 128K context window. Supports function calling. Fine-tuning available ($9/1M tokens + storage for Fine-tuned Large 2). Also available on Azure AI Foundry.',
2584
4215
  },
2585
4216
  {
2586
4217
  id: 'mistral-large-latest',
@@ -2590,10 +4221,16 @@ exports.AVAILABLE_MODELS = [
2590
4221
  maxTokens: 128000,
2591
4222
  contextLength: 128000,
2592
4223
  pricing: { input: 2.0, output: 6.0 },
2593
- capabilities: ['text', 'reasoning', 'complex-tasks', 'high-complexity'],
4224
+ capabilities: [
4225
+ 'text',
4226
+ 'agentic',
4227
+ 'multimodal',
4228
+ 'reasoning',
4229
+ 'function-calling',
4230
+ ],
2594
4231
  category: 'text',
2595
4232
  isLatest: true,
2596
- notes: 'Our top-tier large model for high-complexity tasks with the latest version released November 2024.',
4233
+ notes: 'Top-tier large model for high-complexity tasks. Priced at $2.00 per 1M input tokens and $6.00 per 1M output tokens. Latest version released November 2024. Available on La Plateforme API with up to 128K context window. Supports function calling. Fine-tuning available ($9/1M tokens + storage for Fine-tuned Large 2). Also available on Azure AI Foundry.',
2597
4234
  },
2598
4235
  {
2599
4236
  id: 'pixtral-large-2411',
@@ -2621,6 +4258,19 @@ exports.AVAILABLE_MODELS = [
2621
4258
  isLatest: true,
2622
4259
  notes: 'Our first frontier-class multimodal model released November 2024.',
2623
4260
  },
4261
+ {
4262
+ id: 'mistral-small-2409',
4263
+ name: 'Mistral Small 2.0',
4264
+ provider: 'Mistral AI',
4265
+ available: true,
4266
+ maxTokens: 32000,
4267
+ contextLength: 32000,
4268
+ pricing: { input: 0.1, output: 0.3 },
4269
+ capabilities: ['text', 'multimodal', 'multilingual', 'open-source'],
4270
+ category: 'multimodal',
4271
+ isLatest: false,
4272
+ notes: 'An update to our efficient small model with open weights under an MRL license, released September 2024 (v24.09).',
4273
+ },
2624
4274
  {
2625
4275
  id: 'mistral-small-2407',
2626
4276
  name: 'Mistral Small 2',
@@ -2632,7 +4282,7 @@ exports.AVAILABLE_MODELS = [
2632
4282
  capabilities: ['text', 'multimodal', 'multilingual', 'open-source'],
2633
4283
  category: 'multimodal',
2634
4284
  isLatest: false,
2635
- notes: 'Our updated small version, released September 2024.',
4285
+ notes: 'Our updated small version, released September 2024 (v24.07).',
2636
4286
  },
2637
4287
  {
2638
4288
  id: 'mistral-embed',
@@ -2641,11 +4291,11 @@ exports.AVAILABLE_MODELS = [
2641
4291
  available: true,
2642
4292
  maxTokens: 8192,
2643
4293
  contextLength: 8192,
2644
- pricing: { input: 0.1, output: 0.1 },
2645
- capabilities: ['embedding', 'text', 'semantic'],
4294
+ pricing: { input: 0.01, output: 0.0 },
4295
+ capabilities: ['text', 'embedding', 'semantic-search'],
2646
4296
  category: 'embedding',
2647
4297
  isLatest: true,
2648
- notes: 'Our state-of-the-art semantic for extracting representation of text extracts.',
4298
+ notes: 'Embedding model for extracting semantic representations of text. Priced at $0.01 per 1M tokens. Our state-of-the-art semantic for extracting representation of text extracts. Available on La Plateforme API.',
2649
4299
  },
2650
4300
  {
2651
4301
  id: 'codestral-embed-2505',
@@ -2654,8 +4304,8 @@ exports.AVAILABLE_MODELS = [
2654
4304
  available: true,
2655
4305
  maxTokens: 8192,
2656
4306
  contextLength: 8192,
2657
- pricing: { input: 0.15, output: 0.15 },
2658
- capabilities: ['embedding', 'code', 'semantic'],
4307
+ pricing: { input: 0.15, output: 0.0 },
4308
+ capabilities: ['coding', 'embedding'],
2659
4309
  category: 'embedding',
2660
4310
  isLatest: true,
2661
4311
  notes: 'Our state-of-the-art semantic for extracting representation of code extracts.',
@@ -2715,21 +4365,33 @@ exports.AVAILABLE_MODELS = [
2715
4365
  maxTokens: 40000,
2716
4366
  contextLength: 40000,
2717
4367
  pricing: { input: 0.5, output: 1.5 },
4368
+ capabilities: ['text', 'reasoning', 'lightweight'],
4369
+ category: 'reasoning',
4370
+ isLatest: true,
4371
+ notes: 'Our small multimodal reasoning model released September 2025 (v25.09).',
4372
+ },
4373
+ {
4374
+ id: 'magistral-small-2507',
4375
+ name: 'Magistral Small 1.1',
4376
+ provider: 'Mistral AI',
4377
+ available: true,
4378
+ maxTokens: 40000,
4379
+ contextLength: 40000,
4380
+ pricing: { input: 0.5, output: 1.5 },
2718
4381
  capabilities: [
2719
4382
  'text',
2720
4383
  'reasoning',
2721
4384
  'thinking',
2722
4385
  'domain-specific',
2723
4386
  'multilingual',
2724
- 'multimodal',
2725
4387
  ],
2726
4388
  category: 'reasoning',
2727
- isLatest: true,
2728
- notes: 'Our small multimodal reasoning model released September 2025 (v25.09).',
4389
+ isLatest: false,
4390
+ notes: 'Our small reasoning model released July 2025 (v25.07). Deprecated October 31, 2025, retirement November 30, 2025. Use Magistral Small 1.2 instead.',
2729
4391
  },
2730
4392
  {
2731
- id: 'magistral-small-2507',
2732
- name: 'Magistral Small 1.1',
4393
+ id: 'magistral-small-2506',
4394
+ name: 'Magistral Small 1.0',
2733
4395
  provider: 'Mistral AI',
2734
4396
  available: true,
2735
4397
  maxTokens: 40000,
@@ -2744,7 +4406,7 @@ exports.AVAILABLE_MODELS = [
2744
4406
  ],
2745
4407
  category: 'reasoning',
2746
4408
  isLatest: false,
2747
- notes: 'Our small reasoning model released July 2025 (v25.07). Deprecated October 31, 2025, retirement November 30, 2025. Use Magistral Small 1.2 instead.',
4409
+ notes: 'Our first small reasoning model released June 2025 (v25.06). Deprecated October 31, 2025, retirement November 30, 2025. Use Magistral Small 1.2 instead.',
2748
4410
  },
2749
4411
  {
2750
4412
  id: 'voxtral-small-2507',
@@ -2767,11 +4429,37 @@ exports.AVAILABLE_MODELS = [
2767
4429
  maxTokens: 32000,
2768
4430
  contextLength: 32000,
2769
4431
  pricing: { input: 0.1, output: 0.1 },
2770
- capabilities: ['audio', 'instruct', 'multimodal'],
4432
+ capabilities: ['voice', 'text'],
2771
4433
  category: 'audio',
2772
4434
  isLatest: true,
2773
4435
  notes: 'Our first model with audio input capabilities for instruct use cases.',
2774
4436
  },
4437
+ {
4438
+ id: 'voxtral-mini-2507',
4439
+ name: 'Voxtral Mini',
4440
+ provider: 'Mistral AI',
4441
+ available: true,
4442
+ maxTokens: 32000,
4443
+ contextLength: 32000,
4444
+ pricing: { input: 0.1, output: 0.1 },
4445
+ capabilities: ['audio', 'instruct', 'mini'],
4446
+ category: 'audio',
4447
+ isLatest: true,
4448
+ notes: 'A mini version of our first audio input model.',
4449
+ },
4450
+ {
4451
+ id: 'voxtral-mini-latest',
4452
+ name: 'Voxtral Mini',
4453
+ provider: 'Mistral AI',
4454
+ available: true,
4455
+ maxTokens: 32000,
4456
+ contextLength: 32000,
4457
+ pricing: { input: 0.1, output: 0.1 },
4458
+ capabilities: ['voice', 'text'],
4459
+ category: 'audio',
4460
+ isLatest: true,
4461
+ notes: 'A mini version of our first audio input model.',
4462
+ },
2775
4463
  {
2776
4464
  id: 'mistral-small-2506',
2777
4465
  name: 'Mistral Small 3.2',
@@ -2783,10 +4471,49 @@ exports.AVAILABLE_MODELS = [
2783
4471
  capabilities: ['text', 'multimodal', 'multilingual', 'open-source'],
2784
4472
  category: 'multimodal',
2785
4473
  isLatest: true,
2786
- notes: 'An update to our previous small model, released June 2025.',
4474
+ notes: 'An update to our previous small model, released June 2025 (v25.06).',
4475
+ },
4476
+ {
4477
+ id: 'mistral-small-latest',
4478
+ name: 'Mistral Small 3.2',
4479
+ provider: 'Mistral AI',
4480
+ available: true,
4481
+ maxTokens: 128000,
4482
+ contextLength: 128000,
4483
+ pricing: { input: 0.1, output: 0.3 },
4484
+ capabilities: [
4485
+ 'text',
4486
+ 'agentic',
4487
+ 'multimodal',
4488
+ 'lightweight',
4489
+ 'function-calling',
4490
+ ],
4491
+ category: 'multimodal',
4492
+ isLatest: true,
4493
+ notes: 'An update to our previous small model, released June 2025. Priced at $0.10 per 1M input tokens and $0.30 per 1M output tokens. Available on La Plateforme API with up to 128K context window. Supports function calling. Fine-tuning available. Also available on Azure AI Foundry.',
2787
4494
  },
2788
4495
  {
2789
4496
  id: 'mistral-small-2503',
4497
+ name: 'Mistral Small 3.1',
4498
+ provider: 'Mistral AI',
4499
+ available: true,
4500
+ maxTokens: 128000,
4501
+ contextLength: 128000,
4502
+ pricing: { input: 0.1, output: 0.3 },
4503
+ capabilities: [
4504
+ 'text',
4505
+ 'multimodal',
4506
+ 'multilingual',
4507
+ 'open-source',
4508
+ 'image-understanding',
4509
+ 'function-calling',
4510
+ ],
4511
+ category: 'multimodal',
4512
+ isLatest: false,
4513
+ notes: 'A new leader in the small models category with image understanding capabilities, released March 2025 (v25.03). Priced at $0.10 per 1M input tokens and $0.30 per 1M output tokens. Available on La Plateforme API with up to 128K context window. Supports function calling. Fine-tuning available. Also available on Azure AI Foundry.',
4514
+ },
4515
+ {
4516
+ id: 'mistral-small-2501',
2790
4517
  name: 'Mistral Small 3.0',
2791
4518
  provider: 'Mistral AI',
2792
4519
  available: true,
@@ -2796,33 +4523,33 @@ exports.AVAILABLE_MODELS = [
2796
4523
  capabilities: ['text', 'multimodal', 'multilingual', 'open-source'],
2797
4524
  category: 'multimodal',
2798
4525
  isLatest: false,
2799
- notes: 'Mistral Small 3.0 model released March 2025.',
4526
+ notes: 'A small efficient and powerful 24B open model for personal or commercial use, released January 2025 (v25.01).',
2800
4527
  },
2801
4528
  {
2802
4529
  id: 'ministral-3b',
2803
4530
  name: 'Ministral 3B',
2804
4531
  provider: 'Mistral AI',
2805
4532
  available: true,
2806
- maxTokens: 128000,
2807
- contextLength: 128000,
2808
- pricing: { input: 0.05, output: 0.15 },
2809
- capabilities: ['text', 'lightweight', 'edge', 'open-source'],
4533
+ maxTokens: 32000,
4534
+ contextLength: 32000,
4535
+ pricing: { input: 0.0, output: 0.0 },
4536
+ capabilities: ['text', 'edge', 'mobile', 'open-source'],
2810
4537
  category: 'text',
2811
4538
  isLatest: true,
2812
- notes: "World's best edge model. 3B parameter model optimized for edge devices.",
4539
+ notes: "World's best edge model released October 2024 (v24.1).",
2813
4540
  },
2814
4541
  {
2815
4542
  id: 'ministral-8b',
2816
4543
  name: 'Ministral 8B',
2817
4544
  provider: 'Mistral AI',
2818
4545
  available: true,
2819
- maxTokens: 128000,
2820
- contextLength: 128000,
2821
- pricing: { input: 0.1, output: 0.3 },
2822
- capabilities: ['text', 'edge', 'open-source', 'high-performance'],
4546
+ maxTokens: 32000,
4547
+ contextLength: 32000,
4548
+ pricing: { input: 0.0, output: 0.0 },
4549
+ capabilities: ['text', 'edge', 'mobile', 'open-source', 'high-performance'],
2823
4550
  category: 'text',
2824
4551
  isLatest: true,
2825
- notes: 'Powerful edge model with extremely high performance/price ratio. 8B parameter model.',
4552
+ notes: 'Powerful edge model with extremely high performance/price ratio, released October 2024 (v24.1).',
2826
4553
  },
2827
4554
  {
2828
4555
  id: 'devstral-small-2507',
@@ -2851,13 +4578,7 @@ exports.AVAILABLE_MODELS = [
2851
4578
  maxTokens: 128000,
2852
4579
  contextLength: 128000,
2853
4580
  pricing: { input: 0.1, output: 0.3 },
2854
- capabilities: [
2855
- 'code',
2856
- 'agents',
2857
- 'open-source',
2858
- 'codebase-exploration',
2859
- 'multi-file-editing',
2860
- ],
4581
+ capabilities: ['coding', 'agentic', 'text', 'lightweight'],
2861
4582
  category: 'code',
2862
4583
  isLatest: true,
2863
4584
  notes: 'An update to our open source model that excels at using tools to explore codebases, editing multiple files and power software engineering agents.',
@@ -2896,16 +4617,16 @@ exports.AVAILABLE_MODELS = [
2896
4617
  },
2897
4618
  {
2898
4619
  id: 'devstral-small-2505',
2899
- name: 'Devstral Small 1',
4620
+ name: 'Devstral Small 1.0',
2900
4621
  provider: 'Mistral AI',
2901
4622
  available: true,
2902
4623
  maxTokens: 128000,
2903
4624
  contextLength: 128000,
2904
4625
  pricing: { input: 0.1, output: 0.3 },
2905
- capabilities: ['code', 'agents', 'open-source', '24b-parameter'],
4626
+ capabilities: ['code', 'agents', 'open-source', '24b-parameter', 'swe'],
2906
4627
  category: 'code',
2907
4628
  isLatest: false,
2908
- notes: 'A 24B text model, open source model that excels at using tools to explore codebases, editing multiple files and power software engineering agents.',
4629
+ notes: 'A 24B text model, open source model that excels at SWE use cases, released May 2025 (v25.05). Deprecated October 31, 2025, retirement November 30, 2025. Use Devstral Small 1.1 instead.',
2909
4630
  },
2910
4631
  {
2911
4632
  id: 'pixtral-12b-2409',
@@ -2918,7 +4639,7 @@ exports.AVAILABLE_MODELS = [
2918
4639
  capabilities: ['vision', 'multimodal', 'small', 'image-understanding'],
2919
4640
  category: 'multimodal',
2920
4641
  isLatest: true,
2921
- notes: 'A 12B model with image understanding capabilities in addition to text.',
4642
+ notes: 'Multimodal model with image understanding capabilities. Priced at $0.15 per 1M tokens (input/output). A 12B model with image understanding capabilities in addition to text. Available on La Plateforme API with up to 128K context window. Also available on Azure AI Foundry.',
2922
4643
  },
2923
4644
  {
2924
4645
  id: 'pixtral-12b',
@@ -2931,7 +4652,7 @@ exports.AVAILABLE_MODELS = [
2931
4652
  capabilities: ['vision', 'multimodal', 'small', 'image-understanding'],
2932
4653
  category: 'multimodal',
2933
4654
  isLatest: true,
2934
- notes: 'A 12B model with image understanding capabilities in addition to text.',
4655
+ notes: 'Multimodal model with image understanding capabilities. Priced at $0.15 per 1M tokens (input/output). A 12B model with image understanding capabilities in addition to text. Available on La Plateforme API with up to 128K context window. Also available on Azure AI Foundry.',
2935
4656
  },
2936
4657
  {
2937
4658
  id: 'open-mistral-nemo-2407',
@@ -2944,7 +4665,7 @@ exports.AVAILABLE_MODELS = [
2944
4665
  capabilities: ['text', 'multilingual', 'open-source', 'best-multilingual'],
2945
4666
  category: 'text',
2946
4667
  isLatest: true,
2947
- notes: 'Our best multilingual open source model released July 2024.',
4668
+ notes: 'Our best multilingual open source model released July 2024. Priced at $0.15 per 1M tokens (input/output). Available on La Plateforme API with up to 128K context window. Also available on Azure AI Foundry.',
2948
4669
  },
2949
4670
  {
2950
4671
  id: 'open-mistral-nemo',
@@ -2957,7 +4678,7 @@ exports.AVAILABLE_MODELS = [
2957
4678
  capabilities: ['text', 'multilingual', 'open-source', 'best-multilingual'],
2958
4679
  category: 'text',
2959
4680
  isLatest: true,
2960
- notes: 'Our best multilingual open source model released July 2024.',
4681
+ notes: 'Our best multilingual open source model released July 2024. Priced at $0.15 per 1M tokens (input/output). Available on La Plateforme API with up to 128K context window. Also available on Azure AI Foundry.',
2961
4682
  },
2962
4683
  {
2963
4684
  id: 'mistral-nemo',
@@ -2983,7 +4704,7 @@ exports.AVAILABLE_MODELS = [
2983
4704
  capabilities: ['text', 'open-source', 'fast'],
2984
4705
  category: 'text',
2985
4706
  isLatest: false,
2986
- notes: 'A 7B transformer model, fast-deployed and easily customisable.',
4707
+ notes: 'A 7B transformer model, fast-deployed and easily customisable. Open-source/open-weights model available for self-hosting (free to use, but requires own infrastructure). Also available on La Plateforme API and Azure AI Foundry.',
2987
4708
  },
2988
4709
  {
2989
4710
  id: 'open-mixtral-8x7b',
@@ -2996,7 +4717,7 @@ exports.AVAILABLE_MODELS = [
2996
4717
  capabilities: ['text', 'mixture-of-experts', 'open-source'],
2997
4718
  category: 'text',
2998
4719
  isLatest: false,
2999
- notes: 'A 7B sparse Mixture-of-Experts (SMoE). Uses 12.9B active parameters out of 45B total.',
4720
+ notes: 'A 7B sparse Mixture-of-Experts (SMoE). Uses 12.9B active parameters out of 45B total. Open-source/open-weights model available for self-hosting (free to use, but requires own infrastructure). Also available on La Plateforme API and Azure AI Foundry.',
3000
4721
  },
3001
4722
  {
3002
4723
  id: 'open-mixtral-8x22b',
@@ -3017,7 +4738,34 @@ exports.AVAILABLE_MODELS = [
3017
4738
  notes: 'Most performant open model. A 22B sparse Mixture-of-Experts (SMoE). Uses only 39B active parameters out of 141B.',
3018
4739
  },
3019
4740
  // === Grok AI Models ===
3020
- // === Grok 4 Fast Series (Latest) ===
4741
+ // === Grok 4.1 Fast Series (Latest) ===
4742
+ {
4743
+ id: 'grok-4-1-fast-reasoning',
4744
+ name: 'Grok 4.1 Fast Reasoning',
4745
+ provider: 'xAI',
4746
+ available: true,
4747
+ maxTokens: 2000000,
4748
+ contextLength: 2000000,
4749
+ pricing: { input: 0.2, output: 0.5 },
4750
+ capabilities: ['text', 'vision', 'reasoning', 'agents', 'tools'],
4751
+ category: 'text',
4752
+ isLatest: true,
4753
+ notes: 'Latest cost-efficient reasoning model with 2M context window. Lightning fast, low cost. Priced at $0.20 per 1M input tokens and $0.50 per 1M output tokens. Rate limits: 4M TPM (tokens per minute), 480 RPM (requests per minute)',
4754
+ },
4755
+ {
4756
+ id: 'grok-4-1-fast-non-reasoning',
4757
+ name: 'Grok 4.1 Fast Non-Reasoning',
4758
+ provider: 'xAI',
4759
+ available: true,
4760
+ maxTokens: 2000000,
4761
+ contextLength: 2000000,
4762
+ pricing: { input: 0.2, output: 0.5 },
4763
+ capabilities: ['text', 'vision', 'fast'],
4764
+ category: 'text',
4765
+ isLatest: true,
4766
+ notes: 'Latest cost-efficient non-reasoning model with 2M context window. Lightning fast, low cost. Priced at $0.20 per 1M input tokens and $0.50 per 1M output tokens. Rate limits: 4M TPM (tokens per minute), 480 RPM (requests per minute)',
4767
+ },
4768
+ // === Grok 4 Fast Series ===
3021
4769
  {
3022
4770
  id: 'grok-4-fast-reasoning',
3023
4771
  name: 'Grok 4 Fast Reasoning',
@@ -3026,15 +4774,10 @@ exports.AVAILABLE_MODELS = [
3026
4774
  maxTokens: 2000000,
3027
4775
  contextLength: 2000000,
3028
4776
  pricing: { input: 0.2, output: 0.5 },
3029
- capabilities: [
3030
- 'text',
3031
- 'reasoning',
3032
- 'function-calling',
3033
- 'structured-outputs',
3034
- ],
4777
+ capabilities: ['text', 'vision', 'reasoning', 'agents'],
3035
4778
  category: 'text',
3036
4779
  isLatest: true,
3037
- notes: 'Latest cost-efficient reasoning model with 2M context window. Lightning fast, low cost. 4M TPM, 480 RPM rate limits',
4780
+ notes: 'Cost-efficient reasoning model with 2M context window. Lightning fast, low cost. Priced at $0.20 per 1M input tokens and $0.50 per 1M output tokens. Rate limits: 4M TPM (tokens per minute), 480 RPM (requests per minute)',
3038
4781
  },
3039
4782
  {
3040
4783
  id: 'grok-4-fast-non-reasoning',
@@ -3044,10 +4787,10 @@ exports.AVAILABLE_MODELS = [
3044
4787
  maxTokens: 2000000,
3045
4788
  contextLength: 2000000,
3046
4789
  pricing: { input: 0.2, output: 0.5 },
3047
- capabilities: ['text', 'function-calling', 'structured-outputs'],
4790
+ capabilities: ['text', 'vision', 'fast'],
3048
4791
  category: 'text',
3049
4792
  isLatest: true,
3050
- notes: 'Latest cost-efficient non-reasoning model with 2M context window. Lightning fast, low cost. 4M TPM, 480 RPM rate limits',
4793
+ notes: 'Cost-efficient non-reasoning model with 2M context window. Lightning fast, low cost. Priced at $0.20 per 1M input tokens and $0.50 per 1M output tokens. Rate limits: 4M TPM (tokens per minute), 480 RPM (requests per minute)',
3051
4794
  },
3052
4795
  {
3053
4796
  id: 'grok-code-fast-1',
@@ -3057,15 +4800,10 @@ exports.AVAILABLE_MODELS = [
3057
4800
  maxTokens: 256000,
3058
4801
  contextLength: 256000,
3059
4802
  pricing: { input: 0.2, output: 1.5 },
3060
- capabilities: [
3061
- 'code',
3062
- 'programming',
3063
- 'function-calling',
3064
- 'structured-outputs',
3065
- ],
4803
+ capabilities: ['text', 'coding', 'function-calling', 'structured-outputs'],
3066
4804
  category: 'code',
3067
4805
  isLatest: true,
3068
- notes: 'Cost-efficient coding model optimized for code generation and programming tasks. 2M TPM, 480 RPM rate limits',
4806
+ notes: 'Cost-efficient coding model optimized for code generation and programming tasks. Priced at $0.20 per 1M input tokens and $1.50 per 1M output tokens. Context window: 256K tokens. Rate limits: 2M TPM (tokens per minute), 480 RPM (requests per minute)',
3069
4807
  },
3070
4808
  // === Grok 4 Series ===
3071
4809
  {
@@ -3084,7 +4822,7 @@ exports.AVAILABLE_MODELS = [
3084
4822
  ],
3085
4823
  category: 'text',
3086
4824
  isLatest: true,
3087
- notes: 'Latest Grok 4 reasoning model. Note: Grok 4 is always a reasoning model with no non-reasoning mode. 2M TPM, 480 RPM rate limits. Knowledge cutoff: November 2024',
4825
+ notes: 'Latest Grok 4 reasoning model. Note: Grok 4 is always a reasoning model with no non-reasoning mode. Priced at $3.00 per 1M input tokens and $15.00 per 1M output tokens. Context window: 256K tokens. Rate limits: 2M TPM (tokens per minute), 480 RPM (requests per minute). Knowledge cutoff: November 2024',
3088
4826
  },
3089
4827
  {
3090
4828
  id: 'grok-4',
@@ -3102,7 +4840,7 @@ exports.AVAILABLE_MODELS = [
3102
4840
  ],
3103
4841
  category: 'text',
3104
4842
  isLatest: true,
3105
- notes: 'Alias for latest stable Grok 4 version. Points to grok-4-0709. 2M TPM, 480 RPM rate limits',
4843
+ notes: 'Alias for latest stable Grok 4 version. Points to grok-4-0709. Priced at $3.00 per 1M input tokens and $15.00 per 1M output tokens. Context window: 256K tokens. Rate limits: 2M TPM (tokens per minute), 480 RPM (requests per minute)',
3106
4844
  },
3107
4845
  {
3108
4846
  id: 'grok-4-latest',
@@ -3120,7 +4858,7 @@ exports.AVAILABLE_MODELS = [
3120
4858
  ],
3121
4859
  category: 'text',
3122
4860
  isLatest: true,
3123
- notes: 'Alias for latest Grok 4 version (may include preview features). Auto-updates with new releases. 2M TPM, 480 RPM rate limits',
4861
+ notes: 'Alias for latest Grok 4 version (may include preview features). Auto-updates with new releases. Priced at $3.00 per 1M input tokens and $15.00 per 1M output tokens. Context window: 256K tokens. Rate limits: 2M TPM (tokens per minute), 480 RPM (requests per minute)',
3124
4862
  },
3125
4863
  // === Grok 3 Series ===
3126
4864
  {
@@ -3134,7 +4872,7 @@ exports.AVAILABLE_MODELS = [
3134
4872
  capabilities: ['text', 'vision', 'function-calling', 'structured-outputs'],
3135
4873
  category: 'multimodal',
3136
4874
  isLatest: false,
3137
- notes: 'Standard Grok 3 model. 600 RPM rate limits. Knowledge cutoff: November 2024',
4875
+ notes: 'Standard Grok 3 model. Priced at $3.00 per 1M input tokens and $15.00 per 1M output tokens. Context window: 131K tokens. Rate limits: 600 RPM (requests per minute). Knowledge cutoff: November 2024',
3138
4876
  },
3139
4877
  {
3140
4878
  id: 'grok-3-mini',
@@ -3147,7 +4885,7 @@ exports.AVAILABLE_MODELS = [
3147
4885
  capabilities: ['text', 'vision', 'function-calling', 'structured-outputs'],
3148
4886
  category: 'multimodal',
3149
4887
  isLatest: false,
3150
- notes: 'Cost-effective Grok 3 Mini model. 480 RPM rate limits. Knowledge cutoff: November 2024',
4888
+ notes: 'Cost-effective Grok 3 Mini model. Priced at $0.30 per 1M input tokens and $0.50 per 1M output tokens. Context window: 131K tokens. Rate limits: 480 RPM (requests per minute). Knowledge cutoff: November 2024',
3151
4889
  },
3152
4890
  // === Grok 2 Vision Series ===
3153
4891
  {
@@ -3161,7 +4899,7 @@ exports.AVAILABLE_MODELS = [
3161
4899
  capabilities: ['text', 'vision', 'image-understanding'],
3162
4900
  category: 'multimodal',
3163
4901
  isLatest: false,
3164
- notes: 'Grok 2 Vision model for image understanding. 600 RPM rate limits (us-east-1) or 50 RPS (eu-west-1)',
4902
+ notes: 'Grok 2 Vision model for image understanding. Priced at $2.00 per 1M input tokens and $10.00 per 1M output tokens. Context window: 32K tokens. Rate limits: 600 RPM (requests per minute) for us-east-1 region, 50 RPS (requests per second) for eu-west-1 region',
3165
4903
  },
3166
4904
  {
3167
4905
  id: 'grok-2-vision-1212-us-east-1',
@@ -3174,7 +4912,7 @@ exports.AVAILABLE_MODELS = [
3174
4912
  capabilities: ['text', 'vision', 'image-understanding'],
3175
4913
  category: 'multimodal',
3176
4914
  isLatest: false,
3177
- notes: 'Grok 2 Vision model for us-east-1 region. 600 RPM rate limits',
4915
+ notes: 'Grok 2 Vision model for us-east-1 region. Priced at $2.00 per 1M input tokens and $10.00 per 1M output tokens. Context window: 32K tokens. Rate limits: 600 RPM (requests per minute)',
3178
4916
  },
3179
4917
  {
3180
4918
  id: 'grok-2-vision-1212-eu-west-1',
@@ -3187,7 +4925,7 @@ exports.AVAILABLE_MODELS = [
3187
4925
  capabilities: ['text', 'vision', 'image-understanding'],
3188
4926
  category: 'multimodal',
3189
4927
  isLatest: false,
3190
- notes: 'Grok 2 Vision model for eu-west-1 region. 50 RPS rate limits',
4928
+ notes: 'Grok 2 Vision model for eu-west-1 region. Priced at $2.00 per 1M input tokens and $10.00 per 1M output tokens. Context window: 32K tokens. Rate limits: 50 RPS (requests per second)',
3191
4929
  },
3192
4930
  // === Grok 2 Image Generation ===
3193
4931
  {
@@ -3197,11 +4935,11 @@ exports.AVAILABLE_MODELS = [
3197
4935
  available: true,
3198
4936
  maxTokens: 0,
3199
4937
  contextLength: 0,
3200
- pricing: { input: 0.07, output: 0.07 },
3201
- capabilities: ['image-generation'],
4938
+ pricing: { input: 0.07, output: 0.0 },
4939
+ capabilities: ['image-generation', 'text-to-image'],
3202
4940
  category: 'image',
3203
4941
  isLatest: true,
3204
- notes: 'Grok 2 image generation model. $0.07 per image output, 300 RPM rate limits',
4942
+ notes: 'Grok 2 image generation model. Priced at $0.07 per image output. Rate limits: 300 RPM (requests per minute)',
3205
4943
  },
3206
4944
  {
3207
4945
  id: 'grok-2-image',
@@ -3210,11 +4948,11 @@ exports.AVAILABLE_MODELS = [
3210
4948
  available: true,
3211
4949
  maxTokens: 0,
3212
4950
  contextLength: 0,
3213
- pricing: { input: 0.07, output: 0.07 },
3214
- capabilities: ['image-generation'],
4951
+ pricing: { input: 0.07, output: 0.0 },
4952
+ capabilities: ['image-generation', 'text-to-image'],
3215
4953
  category: 'image',
3216
4954
  isLatest: true,
3217
- notes: 'Alias for latest stable Grok 2 Image version. Points to grok-2-image-1212. $0.07 per image, 300 RPM rate limits',
4955
+ notes: 'Alias for latest stable Grok 2 Image version. Points to grok-2-image-1212. Priced at $0.07 per image output. Rate limits: 300 RPM (requests per minute)',
3218
4956
  },
3219
4957
  {
3220
4958
  id: 'grok-2-image-latest',
@@ -3223,11 +4961,11 @@ exports.AVAILABLE_MODELS = [
3223
4961
  available: true,
3224
4962
  maxTokens: 0,
3225
4963
  contextLength: 0,
3226
- pricing: { input: 0.07, output: 0.07 },
3227
- capabilities: ['image-generation'],
4964
+ pricing: { input: 0.07, output: 0.0 },
4965
+ capabilities: ['image-generation', 'text-to-image'],
3228
4966
  category: 'image',
3229
4967
  isLatest: true,
3230
- notes: 'Alias for latest Grok 2 Image version. Auto-updates with new releases. $0.07 per image, 300 RPM rate limits',
4968
+ notes: 'Alias for latest Grok 2 Image version. Auto-updates with new releases. Priced at $0.07 per image output. Rate limits: 300 RPM (requests per minute)',
3231
4969
  },
3232
4970
  // === Meta Llama Models ===
3233
4971
  // === Llama 4 Series (Latest) ===
@@ -3238,18 +4976,11 @@ exports.AVAILABLE_MODELS = [
3238
4976
  available: true,
3239
4977
  maxTokens: 10000000,
3240
4978
  contextLength: 10000000,
3241
- pricing: { input: 0.19, output: 0.49 },
3242
- capabilities: [
3243
- 'text',
3244
- 'vision',
3245
- 'multimodal',
3246
- 'long-context',
3247
- 'multilingual',
3248
- 'image-grounding',
3249
- ],
4979
+ pricing: { input: 0.15, output: 0.45 },
4980
+ capabilities: ['text', 'vision', 'coding', 'reasoning'],
3250
4981
  category: 'multimodal',
3251
4982
  isLatest: true,
3252
- notes: 'Class-leading natively multimodal model with superior text and visual intelligence. 17B active params x 16 experts, 109B total params. Includes Llama Guard 4 12B, Llama Prompt Guard 2 22M and 86M. Licensed under Llama 4 Community License Agreement',
4983
+ notes: 'Class-leading natively multimodal model with superior text and visual intelligence. 17B active params x 16 experts, 109B total params. Includes Llama Guard 4 12B, Llama Prompt Guard 2 22M and 86M. Features 10M context window and improved multimodal capabilities. Pricing varies by provider: ~$0.08-$0.18 per 1M input tokens, ~$0.30-$0.59 per 1M output tokens. Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Licensed under Llama 4 Community License Agreement',
3253
4984
  },
3254
4985
  {
3255
4986
  id: 'llama-4-maverick',
@@ -3258,19 +4989,18 @@ exports.AVAILABLE_MODELS = [
3258
4989
  available: true,
3259
4990
  maxTokens: 10000000,
3260
4991
  contextLength: 10000000,
3261
- pricing: { input: 0.19, output: 0.49 },
4992
+ pricing: { input: 0.25, output: 0.75 },
3262
4993
  capabilities: [
3263
4994
  'text',
3264
4995
  'vision',
3265
- 'multimodal',
3266
- 'long-context',
4996
+ 'coding',
4997
+ 'reasoning',
3267
4998
  'multilingual',
3268
- 'image-grounding',
3269
- 'fast-responses',
4999
+ 'long-context',
3270
5000
  ],
3271
5001
  category: 'multimodal',
3272
5002
  isLatest: true,
3273
- notes: 'Industry-leading natively multimodal model with groundbreaking intelligence and fast responses at a low cost. 17B active params x 128 experts, 400B total params. Includes Llama Guard 4 12B, Llama Prompt Guard 2 22M and 86M. Licensed under Llama 4 Community License Agreement',
5003
+ notes: 'Industry-leading natively multimodal model with groundbreaking intelligence and fast responses at a low cost. 17B active params x 128 experts, 400B total params. Includes Llama Guard 4 12B, Llama Prompt Guard 2 22M and 86M. Features 10M context window and improved multimodal capabilities. Pricing varies by provider: ~$0.15-$0.27 per 1M input tokens, ~$0.60-$0.85 per 1M output tokens. Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Licensed under Llama 4 Community License Agreement',
3274
5004
  },
3275
5005
  {
3276
5006
  id: 'llama-4-behemoth-preview',
@@ -3279,16 +5009,8 @@ exports.AVAILABLE_MODELS = [
3279
5009
  available: true,
3280
5010
  maxTokens: 10000000,
3281
5011
  contextLength: 10000000,
3282
- pricing: { input: 0.19, output: 0.49 },
3283
- capabilities: [
3284
- 'text',
3285
- 'vision',
3286
- 'multimodal',
3287
- 'long-context',
3288
- 'multilingual',
3289
- 'image-grounding',
3290
- 'teacher-model',
3291
- ],
5012
+ pricing: { input: 0.3, output: 0.3 },
5013
+ capabilities: ['text', 'vision', 'coding', 'reasoning', 'multilingual'],
3292
5014
  category: 'multimodal',
3293
5015
  isLatest: true,
3294
5016
  notes: 'Early preview of the Llama 4 teacher model used to distill Llama 4 Scout and Llama 4 Maverick. Still in training phase. Licensed under Llama 4 Community License Agreement',
@@ -3301,11 +5023,11 @@ exports.AVAILABLE_MODELS = [
3301
5023
  available: true,
3302
5024
  maxTokens: 131072,
3303
5025
  contextLength: 131072,
3304
- pricing: { input: 0.59, output: 0.79 },
3305
- capabilities: ['text', 'multilingual', 'open-source'],
5026
+ pricing: { input: 0.1, output: 0.1 },
5027
+ capabilities: ['text', 'coding'],
3306
5028
  category: 'text',
3307
5029
  isLatest: true,
3308
- notes: 'Multilingual open source large language model. Experience 405B performance and quality at a fraction of the cost. Licensed under Llama 3.3 Community License Agreement',
5030
+ notes: 'Multilingual open source large language model. Experience 405B performance and quality at a fraction of the cost. Highly cost-efficient model delivering performance comparable to larger models at significantly lower cost. Pricing varies by provider: ~$0.07-$0.88 per 1M tokens (input/output). Available on Azure AI Foundry (~$0.70/1M tokens), AWS Bedrock, Together AI, and DeepInfra. Licensed under Llama 3.3 Community License Agreement',
3309
5031
  },
3310
5032
  // === Llama 3.2 Series ===
3311
5033
  {
@@ -3315,11 +5037,11 @@ exports.AVAILABLE_MODELS = [
3315
5037
  available: true,
3316
5038
  maxTokens: 128000,
3317
5039
  contextLength: 128000,
3318
- pricing: { input: 0.16, output: 0.16 },
3319
- capabilities: ['text', 'vision', 'multimodal', 'open-source'],
5040
+ pricing: { input: 0.08, output: 0.08 },
5041
+ capabilities: ['text', 'vision'],
3320
5042
  category: 'multimodal',
3321
5043
  isLatest: true,
3322
- notes: 'Open multimodal model that is flexible and can reason on high resolution images and output text. Includes Llama Guard 3 11B Vision. Licensed under Llama 3.2 Community License Agreement',
5044
+ notes: 'Open multimodal model that is flexible and can reason on high resolution images and output text. Includes Llama Guard 3 11B Vision. Pricing varies by provider: ~$0.02-$0.18 per 1M tokens (input/output). Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Licensed under Llama 3.2 Community License Agreement',
3323
5045
  },
3324
5046
  {
3325
5047
  id: 'llama-3.2-90b',
@@ -3332,7 +5054,7 @@ exports.AVAILABLE_MODELS = [
3332
5054
  capabilities: ['text', 'vision', 'multimodal', 'open-source'],
3333
5055
  category: 'multimodal',
3334
5056
  isLatest: true,
3335
- notes: 'Open multimodal model that is flexible and can reason on high resolution images and output text. Includes Llama Guard 3 11B Vision. Licensed under Llama 3.2 Community License Agreement',
5057
+ notes: 'Open multimodal model that is flexible and can reason on high resolution images and output text. Includes Llama Guard 3 11B Vision. Pricing varies by provider. Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Licensed under Llama 3.2 Community License Agreement',
3336
5058
  },
3337
5059
  {
3338
5060
  id: 'llama-3.2-3b',
@@ -3345,7 +5067,7 @@ exports.AVAILABLE_MODELS = [
3345
5067
  capabilities: ['text', 'lightweight', 'mobile', 'edge', 'open-source'],
3346
5068
  category: 'text',
3347
5069
  isLatest: true,
3348
- notes: 'Lightweight and most cost-efficient model you can run anywhere on mobile and on edge devices. Includes Llama Guard 3 1B. Quantized models available. Licensed under Llama 3.2 Community License Agreement',
5070
+ notes: 'Lightweight and most cost-efficient model you can run anywhere on mobile and on edge devices. Includes Llama Guard 3 1B. Quantized models available. Pricing varies by provider: ~$0.02-$0.18 per 1M tokens (input/output). Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Licensed under Llama 3.2 Community License Agreement',
3349
5071
  },
3350
5072
  {
3351
5073
  id: 'llama-3.2-1b',
@@ -3358,7 +5080,7 @@ exports.AVAILABLE_MODELS = [
3358
5080
  capabilities: ['text', 'lightweight', 'mobile', 'edge', 'open-source'],
3359
5081
  category: 'text',
3360
5082
  isLatest: true,
3361
- notes: 'Lightweight and most cost-efficient model you can run anywhere on mobile and on edge devices. Includes Llama Guard 3 1B. Quantized models available. Licensed under Llama 3.2 Community License Agreement',
5083
+ notes: 'Lightweight and most cost-efficient model you can run anywhere on mobile and on edge devices. Includes Llama Guard 3 1B. Quantized models available. Pricing varies by provider: ~$0.02-$0.18 per 1M tokens (input/output). Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Licensed under Llama 3.2 Community License Agreement',
3362
5084
  },
3363
5085
  // === Llama 3.1 Series ===
3364
5086
  {
@@ -3368,11 +5090,11 @@ exports.AVAILABLE_MODELS = [
3368
5090
  available: true,
3369
5091
  maxTokens: 131072,
3370
5092
  contextLength: 131072,
3371
- pricing: { input: 0.0, output: 0.0 },
3372
- capabilities: ['text', 'multilingual', 'open-source'],
5093
+ pricing: { input: 2.25, output: 2.25 },
5094
+ capabilities: ['text', 'reasoning'],
3373
5095
  category: 'text',
3374
5096
  isLatest: false,
3375
- notes: 'Multilingual open source large language model. Includes Llama Guard 3 8B and Llama Prompt Guard 2. Licensed under Llama 3.1 Community License Agreement',
5097
+ notes: 'Multilingual open source large language model. Includes Llama Guard 3 8B and Llama Prompt Guard 2. Pricing varies by provider: ~$1.00-$3.50 per 1M tokens (input/output). Together AI offers at $3.50/1M tokens. Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Licensed under Llama 3.1 Community License Agreement',
3376
5098
  },
3377
5099
  {
3378
5100
  id: 'llama-3.1-8b',
@@ -3385,7 +5107,7 @@ exports.AVAILABLE_MODELS = [
3385
5107
  capabilities: ['text', 'multilingual', 'open-source'],
3386
5108
  category: 'text',
3387
5109
  isLatest: false,
3388
- notes: 'Multilingual open source large language model. Includes Llama Guard 3 8B and Llama Prompt Guard 2. Licensed under Llama 3.1 Community License Agreement',
5110
+ notes: 'Multilingual open source large language model. Includes Llama Guard 3 8B and Llama Prompt Guard 2. Pricing varies by provider: ~$0.02-$0.18 per 1M tokens (input/output). Together AI offers at $0.18/1M tokens. DeepInfra provides Llama-3.1-8B-Instruct-Turbo at $0.02/$0.03 (input/output) per million tokens. Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Licensed under Llama 3.1 Community License Agreement',
3389
5111
  },
3390
5112
  // === Llama 3 Series (Legacy) ===
3391
5113
  {
@@ -3399,7 +5121,7 @@ exports.AVAILABLE_MODELS = [
3399
5121
  capabilities: ['text', 'open-source'],
3400
5122
  category: 'text',
3401
5123
  isLatest: false,
3402
- notes: 'Legacy Llama 3 70B model. Licensed under Llama 3 Community License Agreement',
5124
+ notes: 'Legacy Llama 3 70B model. Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Self-hosting requires substantial GPU resources (e.g., H100s). Licensed under Llama 3 Community License Agreement',
3403
5125
  },
3404
5126
  {
3405
5127
  id: 'llama-3-8b',
@@ -3412,7 +5134,7 @@ exports.AVAILABLE_MODELS = [
3412
5134
  capabilities: ['text', 'open-source'],
3413
5135
  category: 'text',
3414
5136
  isLatest: false,
3415
- notes: 'Legacy Llama 3 8B model. Licensed under Llama 3 Community License Agreement',
5137
+ notes: 'Legacy Llama 3 8B model. Available on Azure, AWS Bedrock, Together AI, and DeepInfra. Models are open-source and can be run for free if you have your own hardware. Licensed under Llama 3 Community License Agreement',
3416
5138
  },
3417
5139
  ];
3418
5140
  const getModelsByProvider = (provider) => {