ruby_llm 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -2
  3. data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +30 -18
  4. data/lib/generators/ruby_llm/generator_helpers.rb +129 -0
  5. data/lib/generators/ruby_llm/install/install_generator.rb +110 -0
  6. data/lib/generators/ruby_llm/install/templates/add_references_to_chats_tool_calls_and_messages_migration.rb.tt +9 -0
  7. data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +2 -3
  8. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -6
  9. data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +3 -6
  10. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +4 -5
  11. data/lib/generators/ruby_llm/upgrade_to_v1_7/templates/migration.rb.tt +21 -13
  12. data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +121 -0
  13. data/lib/ruby_llm/attachment.rb +5 -0
  14. data/lib/ruby_llm/configuration.rb +2 -0
  15. data/lib/ruby_llm/mime_type.rb +4 -0
  16. data/lib/ruby_llm/model/info.rb +4 -0
  17. data/lib/ruby_llm/models.json +780 -511
  18. data/lib/ruby_llm/models.rb +7 -3
  19. data/lib/ruby_llm/moderation.rb +56 -0
  20. data/lib/ruby_llm/provider.rb +6 -0
  21. data/lib/ruby_llm/providers/gemini/capabilities.rb +5 -0
  22. data/lib/ruby_llm/providers/openai/moderation.rb +34 -0
  23. data/lib/ruby_llm/providers/openai.rb +1 -0
  24. data/lib/ruby_llm/railtie.rb +1 -1
  25. data/lib/ruby_llm/version.rb +1 -1
  26. data/lib/ruby_llm.rb +4 -0
  27. metadata +7 -3
  28. data/lib/generators/ruby_llm/install_generator.rb +0 -217
  29. data/lib/generators/ruby_llm/upgrade_to_v1_7_generator.rb +0 -160
@@ -3,7 +3,7 @@
3
3
  "id": "claude-3-5-haiku-20241022",
4
4
  "name": "Claude Haiku 3.5",
5
5
  "provider": "anthropic",
6
- "family": "claude-3-5-haiku-latest",
6
+ "family": "claude-3-5-haiku",
7
7
  "created_at": null,
8
8
  "context_window": 200000,
9
9
  "max_output_tokens": 8192,
@@ -14,10 +14,12 @@
14
14
  "text"
15
15
  ],
16
16
  "output": [
17
+ "embeddings",
17
18
  "text"
18
19
  ]
19
20
  },
20
21
  "capabilities": [
22
+ "batch",
21
23
  "function_calling"
22
24
  ],
23
25
  "pricing": {
@@ -26,6 +28,10 @@
26
28
  "input_per_million": 0.8,
27
29
  "cached_input_per_million": 1.0,
28
30
  "output_per_million": 4.0
31
+ },
32
+ "batch": {
33
+ "input_per_million": 0.4,
34
+ "output_per_million": 2.0
29
35
  }
30
36
  }
31
37
  },
@@ -111,7 +117,7 @@
111
117
  "id": "claude-3-7-sonnet-20250219",
112
118
  "name": "Claude Sonnet 3.7",
113
119
  "provider": "anthropic",
114
- "family": "claude-3-7-sonnet-latest",
120
+ "family": "claude-3-7-sonnet",
115
121
  "created_at": null,
116
122
  "context_window": 200000,
117
123
  "max_output_tokens": 64000,
@@ -122,10 +128,12 @@
122
128
  "text"
123
129
  ],
124
130
  "output": [
131
+ "embeddings",
125
132
  "text"
126
133
  ]
127
134
  },
128
135
  "capabilities": [
136
+ "batch",
129
137
  "function_calling"
130
138
  ],
131
139
  "pricing": {
@@ -134,6 +142,10 @@
134
142
  "input_per_million": 3.0,
135
143
  "cached_input_per_million": 3.75,
136
144
  "output_per_million": 15.0
145
+ },
146
+ "batch": {
147
+ "input_per_million": 1.5,
148
+ "output_per_million": 7.5
137
149
  }
138
150
  }
139
151
  },
@@ -154,10 +166,12 @@
154
166
  "text"
155
167
  ],
156
168
  "output": [
169
+ "embeddings",
157
170
  "text"
158
171
  ]
159
172
  },
160
173
  "capabilities": [
174
+ "batch",
161
175
  "function_calling"
162
176
  ],
163
177
  "pricing": {
@@ -166,6 +180,10 @@
166
180
  "input_per_million": 0.25,
167
181
  "cached_input_per_million": 0.3,
168
182
  "output_per_million": 1.25
183
+ },
184
+ "batch": {
185
+ "input_per_million": 0.125,
186
+ "output_per_million": 0.625
169
187
  }
170
188
  }
171
189
  },
@@ -224,10 +242,12 @@
224
242
  "text"
225
243
  ],
226
244
  "output": [
245
+ "embeddings",
227
246
  "text"
228
247
  ]
229
248
  },
230
249
  "capabilities": [
250
+ "batch",
231
251
  "function_calling"
232
252
  ],
233
253
  "pricing": {
@@ -236,6 +256,10 @@
236
256
  "input_per_million": 15.0,
237
257
  "cached_input_per_million": 18.75,
238
258
  "output_per_million": 75.0
259
+ },
260
+ "batch": {
261
+ "input_per_million": 7.5,
262
+ "output_per_million": 37.5
239
263
  }
240
264
  }
241
265
  },
@@ -245,7 +269,7 @@
245
269
  "id": "claude-opus-4-20250514",
246
270
  "name": "Claude Opus 4",
247
271
  "provider": "anthropic",
248
- "family": "claude-opus-4-0",
272
+ "family": "claude-opus-4",
249
273
  "created_at": null,
250
274
  "context_window": 200000,
251
275
  "max_output_tokens": 32000,
@@ -256,10 +280,12 @@
256
280
  "text"
257
281
  ],
258
282
  "output": [
283
+ "embeddings",
259
284
  "text"
260
285
  ]
261
286
  },
262
287
  "capabilities": [
288
+ "batch",
263
289
  "function_calling"
264
290
  ],
265
291
  "pricing": {
@@ -268,6 +294,10 @@
268
294
  "input_per_million": 15.0,
269
295
  "cached_input_per_million": 18.75,
270
296
  "output_per_million": 75.0
297
+ },
298
+ "batch": {
299
+ "input_per_million": 7.5,
300
+ "output_per_million": 37.5
271
301
  }
272
302
  }
273
303
  },
@@ -277,7 +307,7 @@
277
307
  "id": "claude-sonnet-4-20250514",
278
308
  "name": "Claude Sonnet 4",
279
309
  "provider": "anthropic",
280
- "family": "claude-sonnet-4-0",
310
+ "family": "claude-sonnet-4",
281
311
  "created_at": null,
282
312
  "context_window": 200000,
283
313
  "max_output_tokens": 64000,
@@ -288,10 +318,12 @@
288
318
  "text"
289
319
  ],
290
320
  "output": [
321
+ "embeddings",
291
322
  "text"
292
323
  ]
293
324
  },
294
325
  "capabilities": [
326
+ "batch",
295
327
  "function_calling"
296
328
  ],
297
329
  "pricing": {
@@ -300,6 +332,10 @@
300
332
  "input_per_million": 3.0,
301
333
  "cached_input_per_million": 3.75,
302
334
  "output_per_million": 15.0
335
+ },
336
+ "batch": {
337
+ "input_per_million": 1.5,
338
+ "output_per_million": 7.5
303
339
  }
304
340
  }
305
341
  },
@@ -1249,55 +1285,6 @@
1249
1285
  ]
1250
1286
  }
1251
1287
  },
1252
- {
1253
- "id": "anthropic.claude-instant-v1",
1254
- "name": "Claude Instant",
1255
- "provider": "bedrock",
1256
- "family": "claude_instant",
1257
- "created_at": null,
1258
- "context_window": 200000,
1259
- "max_output_tokens": 4096,
1260
- "knowledge_cutoff": null,
1261
- "modalities": {
1262
- "input": [
1263
- "text",
1264
- "image",
1265
- "pdf"
1266
- ],
1267
- "output": [
1268
- "text"
1269
- ]
1270
- },
1271
- "capabilities": [
1272
- "streaming",
1273
- "function_calling"
1274
- ],
1275
- "pricing": {
1276
- "text_tokens": {
1277
- "standard": {
1278
- "input_per_million": 0.8,
1279
- "output_per_million": 2.4
1280
- },
1281
- "batch": {
1282
- "input_per_million": 0.4,
1283
- "output_per_million": 1.2
1284
- }
1285
- }
1286
- },
1287
- "metadata": {
1288
- "provider_name": "Anthropic",
1289
- "inference_types": [
1290
- "ON_DEMAND"
1291
- ],
1292
- "streaming_supported": true,
1293
- "input_modalities": [
1294
- "TEXT"
1295
- ],
1296
- "output_modalities": [
1297
- "TEXT"
1298
- ]
1299
- }
1300
- },
1301
1288
  {
1302
1289
  "id": "anthropic.claude-instant-v1:2:100k",
1303
1290
  "name": "Claude Instant",
@@ -1347,55 +1334,6 @@
1347
1334
  ]
1348
1335
  }
1349
1336
  },
1350
- {
1351
- "id": "anthropic.claude-v2",
1352
- "name": "Claude",
1353
- "provider": "bedrock",
1354
- "family": "claude2",
1355
- "created_at": null,
1356
- "context_window": 200000,
1357
- "max_output_tokens": 4096,
1358
- "knowledge_cutoff": null,
1359
- "modalities": {
1360
- "input": [
1361
- "text",
1362
- "image",
1363
- "pdf"
1364
- ],
1365
- "output": [
1366
- "text"
1367
- ]
1368
- },
1369
- "capabilities": [
1370
- "streaming",
1371
- "function_calling"
1372
- ],
1373
- "pricing": {
1374
- "text_tokens": {
1375
- "standard": {
1376
- "input_per_million": 8.0,
1377
- "output_per_million": 24.0
1378
- },
1379
- "batch": {
1380
- "input_per_million": 4.0,
1381
- "output_per_million": 12.0
1382
- }
1383
- }
1384
- },
1385
- "metadata": {
1386
- "provider_name": "Anthropic",
1387
- "inference_types": [
1388
- "ON_DEMAND"
1389
- ],
1390
- "streaming_supported": true,
1391
- "input_modalities": [
1392
- "TEXT"
1393
- ],
1394
- "output_modalities": [
1395
- "TEXT"
1396
- ]
1397
- }
1398
- },
1399
1337
  {
1400
1338
  "id": "anthropic.claude-v2:0:100k",
1401
1339
  "name": "Claude",
@@ -1494,55 +1432,6 @@
1494
1432
  ]
1495
1433
  }
1496
1434
  },
1497
- {
1498
- "id": "anthropic.claude-v2:1",
1499
- "name": "Claude",
1500
- "provider": "bedrock",
1501
- "family": "claude2",
1502
- "created_at": null,
1503
- "context_window": 200000,
1504
- "max_output_tokens": 4096,
1505
- "knowledge_cutoff": null,
1506
- "modalities": {
1507
- "input": [
1508
- "text",
1509
- "image",
1510
- "pdf"
1511
- ],
1512
- "output": [
1513
- "text"
1514
- ]
1515
- },
1516
- "capabilities": [
1517
- "streaming",
1518
- "function_calling"
1519
- ],
1520
- "pricing": {
1521
- "text_tokens": {
1522
- "standard": {
1523
- "input_per_million": 8.0,
1524
- "output_per_million": 24.0
1525
- },
1526
- "batch": {
1527
- "input_per_million": 4.0,
1528
- "output_per_million": 12.0
1529
- }
1530
- }
1531
- },
1532
- "metadata": {
1533
- "provider_name": "Anthropic",
1534
- "inference_types": [
1535
- "ON_DEMAND"
1536
- ],
1537
- "streaming_supported": true,
1538
- "input_modalities": [
1539
- "TEXT"
1540
- ],
1541
- "output_modalities": [
1542
- "TEXT"
1543
- ]
1544
- }
1545
- },
1546
1435
  {
1547
1436
  "id": "anthropic.claude-v2:1:18k",
1548
1437
  "name": "Claude",
@@ -2014,7 +1903,7 @@
2014
1903
  "id": "gemini-1.5-flash",
2015
1904
  "name": "Gemini 1.5 Flash",
2016
1905
  "provider": "gemini",
2017
- "family": "gemini-1.5-flash",
1906
+ "family": "models/gemini-1.5-flash",
2018
1907
  "created_at": null,
2019
1908
  "context_window": 1048576,
2020
1909
  "max_output_tokens": 8192,
@@ -2055,7 +1944,7 @@
2055
1944
  "id": "gemini-1.5-flash-001",
2056
1945
  "name": "Gemini 1.5 Flash",
2057
1946
  "provider": "gemini",
2058
- "family": "gemini-1.5-flash",
1947
+ "family": "models/gemini-1.5-flash",
2059
1948
  "created_at": null,
2060
1949
  "context_window": 1048576,
2061
1950
  "max_output_tokens": 8192,
@@ -2089,7 +1978,7 @@
2089
1978
  "id": "gemini-1.5-flash-002",
2090
1979
  "name": "Gemini 1.5 Flash",
2091
1980
  "provider": "gemini",
2092
- "family": "gemini-1.5-flash",
1981
+ "family": "models/gemini-1.5-flash",
2093
1982
  "created_at": null,
2094
1983
  "context_window": 1048576,
2095
1984
  "max_output_tokens": 8192,
@@ -2131,7 +2020,7 @@
2131
2020
  "id": "gemini-1.5-flash-8b",
2132
2021
  "name": "Gemini 1.5 Flash-8B",
2133
2022
  "provider": "gemini",
2134
- "family": "gemini-1.5-flash-8b",
2023
+ "family": "models/gemini-1.5-flash-8b",
2135
2024
  "created_at": null,
2136
2025
  "context_window": 1048576,
2137
2026
  "max_output_tokens": 8192,
@@ -2173,7 +2062,7 @@
2173
2062
  "id": "gemini-1.5-flash-8b-001",
2174
2063
  "name": "Gemini 1.5 Flash-8B",
2175
2064
  "provider": "gemini",
2176
- "family": "gemini-1.5-flash-8b",
2065
+ "family": "models/gemini-1.5-flash-8b",
2177
2066
  "created_at": null,
2178
2067
  "context_window": 1048576,
2179
2068
  "max_output_tokens": 8192,
@@ -2215,7 +2104,7 @@
2215
2104
  "id": "gemini-1.5-flash-8b-latest",
2216
2105
  "name": "Gemini 1.5 Flash-8B",
2217
2106
  "provider": "gemini",
2218
- "family": "gemini-1.5-flash-8b",
2107
+ "family": "models/gemini-1.5-flash-8b",
2219
2108
  "created_at": null,
2220
2109
  "context_window": 1048576,
2221
2110
  "max_output_tokens": 8192,
@@ -2257,7 +2146,7 @@
2257
2146
  "id": "gemini-1.5-flash-latest",
2258
2147
  "name": "Gemini 1.5 Flash",
2259
2148
  "provider": "gemini",
2260
- "family": "gemini-1.5-flash",
2149
+ "family": "models/gemini-1.5-flash",
2261
2150
  "created_at": null,
2262
2151
  "context_window": 1048576,
2263
2152
  "max_output_tokens": 8192,
@@ -2298,7 +2187,7 @@
2298
2187
  "id": "gemini-1.5-pro",
2299
2188
  "name": "Gemini 1.5 Pro",
2300
2189
  "provider": "gemini",
2301
- "family": "gemini-1.5-pro",
2190
+ "family": "models/gemini-1.5-pro",
2302
2191
  "created_at": null,
2303
2192
  "context_window": 2097152,
2304
2193
  "max_output_tokens": 8192,
@@ -2339,7 +2228,7 @@
2339
2228
  "id": "gemini-1.5-pro-001",
2340
2229
  "name": "Gemini 1.5 Pro",
2341
2230
  "provider": "gemini",
2342
- "family": "gemini-1.5-pro",
2231
+ "family": "models/gemini-1.5-pro",
2343
2232
  "created_at": null,
2344
2233
  "context_window": 2097152,
2345
2234
  "max_output_tokens": 8192,
@@ -2373,7 +2262,7 @@
2373
2262
  "id": "gemini-1.5-pro-002",
2374
2263
  "name": "Gemini 1.5 Pro",
2375
2264
  "provider": "gemini",
2376
- "family": "gemini-1.5-pro",
2265
+ "family": "models/gemini-1.5-pro",
2377
2266
  "created_at": null,
2378
2267
  "context_window": 2097152,
2379
2268
  "max_output_tokens": 8192,
@@ -2415,7 +2304,7 @@
2415
2304
  "id": "gemini-1.5-pro-latest",
2416
2305
  "name": "Gemini 1.5 Pro",
2417
2306
  "provider": "gemini",
2418
- "family": "gemini-1.5-pro",
2307
+ "family": "models/gemini-1.5-pro",
2419
2308
  "created_at": null,
2420
2309
  "context_window": 2097152,
2421
2310
  "max_output_tokens": 8192,
@@ -2456,7 +2345,7 @@
2456
2345
  "id": "gemini-2.0-flash",
2457
2346
  "name": "Gemini 2.0 Flash",
2458
2347
  "provider": "gemini",
2459
- "family": "gemini-2.0-flash",
2348
+ "family": "models/gemini-2.0-flash",
2460
2349
  "created_at": null,
2461
2350
  "context_window": 1048576,
2462
2351
  "max_output_tokens": 8192,
@@ -2504,7 +2393,7 @@
2504
2393
  "id": "gemini-2.0-flash-001",
2505
2394
  "name": "Gemini 2.0 Flash",
2506
2395
  "provider": "gemini",
2507
- "family": "gemini-2.0-flash",
2396
+ "family": "models/gemini-2.0-flash",
2508
2397
  "created_at": null,
2509
2398
  "context_window": 1048576,
2510
2399
  "max_output_tokens": 8192,
@@ -2552,7 +2441,7 @@
2552
2441
  "id": "gemini-2.0-flash-exp",
2553
2442
  "name": "Gemini 2.0 Flash",
2554
2443
  "provider": "gemini",
2555
- "family": "gemini-2.0-flash",
2444
+ "family": "models/gemini-2.0-flash",
2556
2445
  "created_at": null,
2557
2446
  "context_window": 1048576,
2558
2447
  "max_output_tokens": 8192,
@@ -2599,7 +2488,7 @@
2599
2488
  "id": "gemini-2.0-flash-lite",
2600
2489
  "name": "Gemini 2.0 Flash-Lite",
2601
2490
  "provider": "gemini",
2602
- "family": "gemini-2.0-flash-lite",
2491
+ "family": "models/gemini-2.0-flash-lite",
2603
2492
  "created_at": null,
2604
2493
  "context_window": 1048576,
2605
2494
  "max_output_tokens": 8192,
@@ -2647,7 +2536,7 @@
2647
2536
  "id": "gemini-2.0-flash-lite-001",
2648
2537
  "name": "Gemini 2.0 Flash-Lite",
2649
2538
  "provider": "gemini",
2650
- "family": "gemini-2.0-flash-lite",
2539
+ "family": "models/gemini-2.0-flash-lite",
2651
2540
  "created_at": null,
2652
2541
  "context_window": 1048576,
2653
2542
  "max_output_tokens": 8192,
@@ -2704,7 +2593,8 @@
2704
2593
  "input": [
2705
2594
  "text",
2706
2595
  "image",
2707
- "pdf"
2596
+ "pdf",
2597
+ "video"
2708
2598
  ],
2709
2599
  "output": [
2710
2600
  "text"
@@ -2750,7 +2640,8 @@
2750
2640
  "input": [
2751
2641
  "text",
2752
2642
  "image",
2753
- "pdf"
2643
+ "pdf",
2644
+ "video"
2754
2645
  ],
2755
2646
  "output": [
2756
2647
  "text"
@@ -2787,7 +2678,7 @@
2787
2678
  "id": "gemini-2.0-flash-live-001",
2788
2679
  "name": "Gemini 2.0 Flash Live",
2789
2680
  "provider": "gemini",
2790
- "family": "gemini-2.0-flash-live-001",
2681
+ "family": "models/gemini-2.0-flash-live-001",
2791
2682
  "created_at": null,
2792
2683
  "context_window": 1048576,
2793
2684
  "max_output_tokens": 8192,
@@ -2821,7 +2712,7 @@
2821
2712
  "id": "gemini-2.0-flash-preview-image-generation",
2822
2713
  "name": "Gemini 2.0 Flash Preview Image Generation",
2823
2714
  "provider": "gemini",
2824
- "family": "gemini-2.0-flash-preview-image-generation",
2715
+ "family": "models/gemini-2.0-flash-preview-image-generation",
2825
2716
  "created_at": null,
2826
2717
  "context_window": 32000,
2827
2718
  "max_output_tokens": 8192,
@@ -2869,7 +2760,8 @@
2869
2760
  "input": [
2870
2761
  "text",
2871
2762
  "image",
2872
- "pdf"
2763
+ "pdf",
2764
+ "video"
2873
2765
  ],
2874
2766
  "output": [
2875
2767
  "text"
@@ -2918,7 +2810,8 @@
2918
2810
  "input": [
2919
2811
  "text",
2920
2812
  "image",
2921
- "pdf"
2813
+ "pdf",
2814
+ "video"
2922
2815
  ],
2923
2816
  "output": [
2924
2817
  "text"
@@ -2967,7 +2860,8 @@
2967
2860
  "input": [
2968
2861
  "text",
2969
2862
  "image",
2970
- "pdf"
2863
+ "pdf",
2864
+ "video"
2971
2865
  ],
2972
2866
  "output": [
2973
2867
  "text"
@@ -3016,7 +2910,8 @@
3016
2910
  "input": [
3017
2911
  "text",
3018
2912
  "image",
3019
- "pdf"
2913
+ "pdf",
2914
+ "video"
3020
2915
  ],
3021
2916
  "output": [
3022
2917
  "text"
@@ -3064,7 +2959,8 @@
3064
2959
  "input": [
3065
2960
  "text",
3066
2961
  "image",
3067
- "pdf"
2962
+ "pdf",
2963
+ "video"
3068
2964
  ],
3069
2965
  "output": [
3070
2966
  "text"
@@ -3103,7 +2999,7 @@
3103
2999
  "id": "gemini-2.5-flash",
3104
3000
  "name": "Gemini 2.5 Flash",
3105
3001
  "provider": "gemini",
3106
- "family": "gemini-2.5-flash",
3002
+ "family": "models/gemini-2.5-flash",
3107
3003
  "created_at": null,
3108
3004
  "context_window": 1048576,
3109
3005
  "max_output_tokens": 65536,
@@ -3151,7 +3047,7 @@
3151
3047
  "id": "gemini-2.5-flash-exp-native-audio-thinking-dialog",
3152
3048
  "name": "Gemini 2.5 Flash Native Audio",
3153
3049
  "provider": "gemini",
3154
- "family": "gemini-2.5-flash-preview-native-audio-dialog",
3050
+ "family": "models/gemini-2.5-flash-preview-native-audio-dialog",
3155
3051
  "created_at": null,
3156
3052
  "context_window": 128000,
3157
3053
  "max_output_tokens": 8000,
@@ -3184,7 +3080,7 @@
3184
3080
  "id": "gemini-2.5-flash-image-preview",
3185
3081
  "name": "Gemini 2.5 Flash Image Preview",
3186
3082
  "provider": "gemini",
3187
- "family": "gemini-2.5-flash-image-preview",
3083
+ "family": "models/gemini-2.5-flash-image-preview",
3188
3084
  "created_at": null,
3189
3085
  "context_window": 32768,
3190
3086
  "max_output_tokens": 32768,
@@ -3229,7 +3125,7 @@
3229
3125
  "id": "gemini-2.5-flash-lite",
3230
3126
  "name": "Gemini 2.5 Flash-Lite",
3231
3127
  "provider": "gemini",
3232
- "family": "gemini-2.5-flash-lite",
3128
+ "family": "models/gemini-2.5-flash-lite",
3233
3129
  "created_at": null,
3234
3130
  "context_window": 1048576,
3235
3131
  "max_output_tokens": 65536,
@@ -3277,7 +3173,7 @@
3277
3173
  "id": "gemini-2.5-flash-lite-06-17",
3278
3174
  "name": "Gemini 2.5 Flash-Lite",
3279
3175
  "provider": "gemini",
3280
- "family": "gemini-2.5-flash-lite",
3176
+ "family": "models/gemini-2.5-flash-lite",
3281
3177
  "created_at": null,
3282
3178
  "context_window": 1048576,
3283
3179
  "max_output_tokens": 65536,
@@ -3325,7 +3221,8 @@
3325
3221
  "input": [
3326
3222
  "text",
3327
3223
  "image",
3328
- "pdf"
3224
+ "pdf",
3225
+ "video"
3329
3226
  ],
3330
3227
  "output": [
3331
3228
  "text"
@@ -3363,7 +3260,7 @@
3363
3260
  "id": "gemini-2.5-flash-preview-05-20",
3364
3261
  "name": "Gemini 2.5 Flash",
3365
3262
  "provider": "gemini",
3366
- "family": "gemini-2.5-flash",
3263
+ "family": "models/gemini-2.5-flash",
3367
3264
  "created_at": null,
3368
3265
  "context_window": 1048576,
3369
3266
  "max_output_tokens": 65536,
@@ -3411,7 +3308,7 @@
3411
3308
  "id": "gemini-2.5-flash-preview-native-audio-dialog",
3412
3309
  "name": "Gemini 2.5 Flash Native Audio",
3413
3310
  "provider": "gemini",
3414
- "family": "gemini-2.5-flash-preview-native-audio-dialog",
3311
+ "family": "models/gemini-2.5-flash-preview-native-audio-dialog",
3415
3312
  "created_at": null,
3416
3313
  "context_window": 128000,
3417
3314
  "max_output_tokens": 8000,
@@ -3444,7 +3341,7 @@
3444
3341
  "id": "gemini-2.5-flash-preview-tts",
3445
3342
  "name": "Gemini 2.5 Flash Preview TTS",
3446
3343
  "provider": "gemini",
3447
- "family": "gemini-2.5-flash-preview-tts",
3344
+ "family": "models/gemini-2.5-flash-preview-tts",
3448
3345
  "created_at": null,
3449
3346
  "context_window": 8000,
3450
3347
  "max_output_tokens": 16000,
@@ -3486,7 +3383,7 @@
3486
3383
  "id": "gemini-2.5-pro",
3487
3384
  "name": "Gemini 2.5 Pro",
3488
3385
  "provider": "gemini",
3489
- "family": "gemini-2.5-pro",
3386
+ "family": "models/gemini-2.5-pro",
3490
3387
  "created_at": null,
3491
3388
  "context_window": 1048576,
3492
3389
  "max_output_tokens": 65536,
@@ -3543,7 +3440,8 @@
3543
3440
  "input": [
3544
3441
  "text",
3545
3442
  "image",
3546
- "pdf"
3443
+ "pdf",
3444
+ "video"
3547
3445
  ],
3548
3446
  "output": [
3549
3447
  "text"
@@ -3591,7 +3489,8 @@
3591
3489
  "input": [
3592
3490
  "text",
3593
3491
  "image",
3594
- "pdf"
3492
+ "pdf",
3493
+ "video"
3595
3494
  ],
3596
3495
  "output": [
3597
3496
  "text"
@@ -3639,7 +3538,8 @@
3639
3538
  "input": [
3640
3539
  "text",
3641
3540
  "image",
3642
- "pdf"
3541
+ "pdf",
3542
+ "video"
3643
3543
  ],
3644
3544
  "output": [
3645
3545
  "text"
@@ -3678,7 +3578,7 @@
3678
3578
  "id": "gemini-2.5-pro-preview-tts",
3679
3579
  "name": "Gemini 2.5 Pro Preview TTS",
3680
3580
  "provider": "gemini",
3681
- "family": "gemini-2.5-pro-preview-tts",
3581
+ "family": "models/gemini-2.5-pro-preview-tts",
3682
3582
  "created_at": null,
3683
3583
  "context_window": 8000,
3684
3584
  "max_output_tokens": 16000,
@@ -3727,7 +3627,8 @@
3727
3627
  "knowledge_cutoff": null,
3728
3628
  "modalities": {
3729
3629
  "input": [
3730
- "text"
3630
+ "text",
3631
+ "video"
3731
3632
  ],
3732
3633
  "output": [
3733
3634
  "text",
@@ -3769,7 +3670,8 @@
3769
3670
  "input": [
3770
3671
  "text",
3771
3672
  "image",
3772
- "pdf"
3673
+ "pdf",
3674
+ "video"
3773
3675
  ],
3774
3676
  "output": [
3775
3677
  "text",
@@ -3823,7 +3725,8 @@
3823
3725
  "input": [
3824
3726
  "text",
3825
3727
  "image",
3826
- "pdf"
3728
+ "pdf",
3729
+ "video"
3827
3730
  ],
3828
3731
  "output": [
3829
3732
  "text",
@@ -3877,7 +3780,8 @@
3877
3780
  "input": [
3878
3781
  "text",
3879
3782
  "image",
3880
- "pdf"
3783
+ "pdf",
3784
+ "video"
3881
3785
  ],
3882
3786
  "output": [
3883
3787
  "text"
@@ -3916,7 +3820,7 @@
3916
3820
  "id": "gemini-live-2.5-flash-preview",
3917
3821
  "name": "Gemini 2.5 Flash Live",
3918
3822
  "provider": "gemini",
3919
- "family": "gemini-live-2.5-flash-preview",
3823
+ "family": "models/gemini-live-2.5-flash-preview",
3920
3824
  "created_at": null,
3921
3825
  "context_window": 1048576,
3922
3826
  "max_output_tokens": 8192,
@@ -5045,6 +4949,30 @@
5045
4949
  "owned_by": "mistralai"
5046
4950
  }
5047
4951
  },
4952
+ {
4953
+ "id": "mistral-embed-2312",
4954
+ "name": "Mistral Embed",
4955
+ "provider": "mistral",
4956
+ "family": "mistral-embed",
4957
+ "created_at": null,
4958
+ "context_window": 32768,
4959
+ "max_output_tokens": 8192,
4960
+ "knowledge_cutoff": null,
4961
+ "modalities": {
4962
+ "input": [
4963
+ "text"
4964
+ ],
4965
+ "output": [
4966
+ "embeddings"
4967
+ ]
4968
+ },
4969
+ "capabilities": [],
4970
+ "pricing": {},
4971
+ "metadata": {
4972
+ "object": "model",
4973
+ "owned_by": "mistralai"
4974
+ }
4975
+ },
5048
4976
  {
5049
4977
  "id": "mistral-large-2407",
5050
4978
  "name": "Mistral Large",
@@ -9815,8 +9743,8 @@
9815
9743
  "provider": "openrouter",
9816
9744
  "family": "alfredpros",
9817
9745
  "created_at": "2025-04-14 16:44:34 +0200",
9818
- "context_window": 8192,
9819
- "max_output_tokens": 8192,
9746
+ "context_window": 4096,
9747
+ "max_output_tokens": 4096,
9820
9748
  "knowledge_cutoff": null,
9821
9749
  "modalities": {
9822
9750
  "input": [
@@ -9827,14 +9755,13 @@
9827
9755
  ]
9828
9756
  },
9829
9757
  "capabilities": [
9830
- "streaming",
9831
- "predicted_outputs"
9758
+ "streaming"
9832
9759
  ],
9833
9760
  "pricing": {
9834
9761
  "text_tokens": {
9835
9762
  "standard": {
9836
- "input_per_million": 0.7,
9837
- "output_per_million": 1.1
9763
+ "input_per_million": 0.7999999999999999,
9764
+ "output_per_million": 1.2
9838
9765
  }
9839
9766
  }
9840
9767
  },
@@ -9852,14 +9779,13 @@
9852
9779
  "instruct_type": "alpaca"
9853
9780
  },
9854
9781
  "top_provider": {
9855
- "context_length": 8192,
9856
- "max_completion_tokens": 8192,
9782
+ "context_length": 4096,
9783
+ "max_completion_tokens": 4096,
9857
9784
  "is_moderated": false
9858
9785
  },
9859
9786
  "per_request_limits": null,
9860
9787
  "supported_parameters": [
9861
9788
  "frequency_penalty",
9862
- "logit_bias",
9863
9789
  "max_tokens",
9864
9790
  "min_p",
9865
9791
  "presence_penalty",
@@ -9873,17 +9799,18 @@
9873
9799
  }
9874
9800
  },
9875
9801
  {
9876
- "id": "alpindale/goliath-120b",
9877
- "name": "Goliath 120B",
9802
+ "id": "allenai/molmo-7b-d",
9803
+ "name": "AllenAI: Molmo 7B D",
9878
9804
  "provider": "openrouter",
9879
- "family": "alpindale",
9880
- "created_at": "2023-11-10 01:00:00 +0100",
9881
- "context_window": 6144,
9882
- "max_output_tokens": 512,
9805
+ "family": "allenai",
9806
+ "created_at": "2025-03-26 22:07:27 +0100",
9807
+ "context_window": 4096,
9808
+ "max_output_tokens": 4096,
9883
9809
  "knowledge_cutoff": null,
9884
9810
  "modalities": {
9885
9811
  "input": [
9886
- "text"
9812
+ "text",
9813
+ "image"
9887
9814
  ],
9888
9815
  "output": [
9889
9816
  "text"
@@ -9891,32 +9818,159 @@
9891
9818
  },
9892
9819
  "capabilities": [
9893
9820
  "streaming",
9894
- "structured_output",
9895
9821
  "predicted_outputs"
9896
9822
  ],
9897
9823
  "pricing": {
9898
9824
  "text_tokens": {
9899
9825
  "standard": {
9900
- "input_per_million": 4.0,
9901
- "output_per_million": 5.5
9826
+ "input_per_million": 0.09999999999999999,
9827
+ "output_per_million": 0.19999999999999998
9902
9828
  }
9903
9829
  }
9904
9830
  },
9905
9831
  "metadata": {
9906
- "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
9832
+ "description": "Molmo is a family of open vision-language models developed by the Allen Institute for AI. Molmo models are trained on PixMo, a dataset of 1 million, highly-curated image-text pairs. It has state-of-the-art performance among multimodal models with a similar size while being fully open-source. You can find all models in the Molmo family [here](https://huggingface.co/collections/allenai/molmo-66f379e6fe3b8ef090a8ca19). Learn more about the Molmo family [in the announcement blog post](https://molmo.allenai.org/blog) or the [paper](https://huggingface.co/papers/2409.17146).\n\nMolmo 7B-D is based on [Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) and uses [OpenAI CLIP](https://huggingface.co/openai/clip-vit-large-patch14-336) as vision backbone. It performs comfortably between GPT-4V and GPT-4o on both academic benchmarks and human evaluation.\n\nThis checkpoint is a preview of the Molmo release. All artifacts used in creating Molmo (PixMo dataset, training code, evaluations, intermediate checkpoints) will be made available at a later date, furthering our commitment to open-source AI development and reproducibility.",
9907
9833
  "architecture": {
9908
- "modality": "text->text",
9834
+ "modality": "text+image->text",
9909
9835
  "input_modalities": [
9910
- "text"
9836
+ "text",
9837
+ "image"
9911
9838
  ],
9912
9839
  "output_modalities": [
9913
9840
  "text"
9914
9841
  ],
9915
- "tokenizer": "Llama2",
9916
- "instruct_type": "airoboros"
9842
+ "tokenizer": "Other",
9843
+ "instruct_type": null
9917
9844
  },
9918
9845
  "top_provider": {
9919
- "context_length": 6144,
9846
+ "context_length": 4096,
9847
+ "max_completion_tokens": 4096,
9848
+ "is_moderated": false
9849
+ },
9850
+ "per_request_limits": null,
9851
+ "supported_parameters": [
9852
+ "frequency_penalty",
9853
+ "logit_bias",
9854
+ "max_tokens",
9855
+ "min_p",
9856
+ "presence_penalty",
9857
+ "repetition_penalty",
9858
+ "seed",
9859
+ "stop",
9860
+ "temperature",
9861
+ "top_k",
9862
+ "top_p"
9863
+ ]
9864
+ }
9865
+ },
9866
+ {
9867
+ "id": "allenai/olmo-2-0325-32b-instruct",
9868
+ "name": "AllenAI: Olmo 2 32B Instruct",
9869
+ "provider": "openrouter",
9870
+ "family": "allenai",
9871
+ "created_at": "2025-03-14 22:42:36 +0100",
9872
+ "context_window": 4096,
9873
+ "max_output_tokens": 4096,
9874
+ "knowledge_cutoff": null,
9875
+ "modalities": {
9876
+ "input": [
9877
+ "text"
9878
+ ],
9879
+ "output": [
9880
+ "text"
9881
+ ]
9882
+ },
9883
+ "capabilities": [
9884
+ "streaming",
9885
+ "predicted_outputs"
9886
+ ],
9887
+ "pricing": {
9888
+ "text_tokens": {
9889
+ "standard": {
9890
+ "input_per_million": 1.0,
9891
+ "output_per_million": 1.5
9892
+ }
9893
+ }
9894
+ },
9895
+ "metadata": {
9896
+ "description": "OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model. It excels in complex reasoning and instruction-following tasks across diverse benchmarks such as GSM8K, MATH, IFEval, and general NLP evaluation. Developed by AI2, OLMo-2 32B is part of an open, research-oriented initiative, trained primarily on English-language datasets to advance the understanding and development of open-source language models.",
9897
+ "architecture": {
9898
+ "modality": "text->text",
9899
+ "input_modalities": [
9900
+ "text"
9901
+ ],
9902
+ "output_modalities": [
9903
+ "text"
9904
+ ],
9905
+ "tokenizer": "Other",
9906
+ "instruct_type": null
9907
+ },
9908
+ "top_provider": {
9909
+ "context_length": 4096,
9910
+ "max_completion_tokens": 4096,
9911
+ "is_moderated": false
9912
+ },
9913
+ "per_request_limits": null,
9914
+ "supported_parameters": [
9915
+ "frequency_penalty",
9916
+ "logit_bias",
9917
+ "max_tokens",
9918
+ "min_p",
9919
+ "presence_penalty",
9920
+ "repetition_penalty",
9921
+ "seed",
9922
+ "stop",
9923
+ "temperature",
9924
+ "top_k",
9925
+ "top_p"
9926
+ ]
9927
+ }
9928
+ },
9929
+ {
9930
+ "id": "alpindale/goliath-120b",
9931
+ "name": "Goliath 120B",
9932
+ "provider": "openrouter",
9933
+ "family": "alpindale",
9934
+ "created_at": "2023-11-10 01:00:00 +0100",
9935
+ "context_window": 6144,
9936
+ "max_output_tokens": 512,
9937
+ "knowledge_cutoff": null,
9938
+ "modalities": {
9939
+ "input": [
9940
+ "text"
9941
+ ],
9942
+ "output": [
9943
+ "text"
9944
+ ]
9945
+ },
9946
+ "capabilities": [
9947
+ "streaming",
9948
+ "structured_output",
9949
+ "predicted_outputs"
9950
+ ],
9951
+ "pricing": {
9952
+ "text_tokens": {
9953
+ "standard": {
9954
+ "input_per_million": 4.0,
9955
+ "output_per_million": 5.5
9956
+ }
9957
+ }
9958
+ },
9959
+ "metadata": {
9960
+ "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
9961
+ "architecture": {
9962
+ "modality": "text->text",
9963
+ "input_modalities": [
9964
+ "text"
9965
+ ],
9966
+ "output_modalities": [
9967
+ "text"
9968
+ ],
9969
+ "tokenizer": "Llama2",
9970
+ "instruct_type": "airoboros"
9971
+ },
9972
+ "top_provider": {
9973
+ "context_length": 6144,
9920
9974
  "max_completion_tokens": 512,
9921
9975
  "is_moderated": false
9922
9976
  },
@@ -10802,7 +10856,7 @@
10802
10856
  "top_provider": {
10803
10857
  "context_length": 200000,
10804
10858
  "max_completion_tokens": 32000,
10805
- "is_moderated": false
10859
+ "is_moderated": true
10806
10860
  },
10807
10861
  "per_request_limits": null,
10808
10862
  "supported_parameters": [
@@ -10868,7 +10922,7 @@
10868
10922
  "top_provider": {
10869
10923
  "context_length": 200000,
10870
10924
  "max_completion_tokens": 32000,
10871
- "is_moderated": false
10925
+ "is_moderated": true
10872
10926
  },
10873
10927
  "per_request_limits": null,
10874
10928
  "supported_parameters": [
@@ -11225,8 +11279,8 @@
11225
11279
  "pricing": {
11226
11280
  "text_tokens": {
11227
11281
  "standard": {
11228
- "input_per_million": 0.01,
11229
- "output_per_million": 0.0400032
11282
+ "input_per_million": 0.017934774,
11283
+ "output_per_million": 0.07173912240000001
11230
11284
  }
11231
11285
  }
11232
11286
  },
@@ -11614,8 +11668,8 @@
11614
11668
  "pricing": {
11615
11669
  "text_tokens": {
11616
11670
  "standard": {
11617
- "input_per_million": 0.10366159999999999,
11618
- "output_per_million": 0.414848
11671
+ "input_per_million": 0.2006688,
11672
+ "output_per_million": 0.80267549538462
11619
11673
  }
11620
11674
  }
11621
11675
  },
@@ -11863,8 +11917,8 @@
11863
11917
  "pricing": {
11864
11918
  "text_tokens": {
11865
11919
  "standard": {
11866
- "input_per_million": 0.037022,
11867
- "output_per_million": 0.14816
11920
+ "input_per_million": 0.0271739,
11921
+ "output_per_million": 0.10869564
11868
11922
  }
11869
11923
  }
11870
11924
  },
@@ -11986,8 +12040,8 @@
11986
12040
  "pricing": {
11987
12041
  "text_tokens": {
11988
12042
  "standard": {
11989
- "input_per_million": 0.01,
11990
- "output_per_million": 0.0340768
12043
+ "input_per_million": 0.01333333333333,
12044
+ "output_per_million": 0.0347826048
11991
12045
  }
11992
12046
  }
11993
12047
  },
@@ -12818,8 +12872,8 @@
12818
12872
  "pricing": {
12819
12873
  "text_tokens": {
12820
12874
  "standard": {
12821
- "input_per_million": 0.1999188,
12822
- "output_per_million": 0.800064
12875
+ "input_per_million": 0.24999987999999998,
12876
+ "output_per_million": 0.999999888
12823
12877
  }
12824
12878
  }
12825
12879
  },
@@ -12889,8 +12943,8 @@
12889
12943
  "pricing": {
12890
12944
  "text_tokens": {
12891
12945
  "standard": {
12892
- "input_per_million": 0.1999188,
12893
- "output_per_million": 0.800064
12946
+ "input_per_million": 0.24999987999999998,
12947
+ "output_per_million": 0.999999888
12894
12948
  }
12895
12949
  }
12896
12950
  },
@@ -13021,8 +13075,8 @@
13021
13075
  "pricing": {
13022
13076
  "text_tokens": {
13023
13077
  "standard": {
13024
- "input_per_million": 0.19999999999999998,
13025
- "output_per_million": 0.7999999999999999
13078
+ "input_per_million": 0.24999987999999998,
13079
+ "output_per_million": 0.999999888
13026
13080
  }
13027
13081
  }
13028
13082
  },
@@ -13291,8 +13345,8 @@
13291
13345
  "pricing": {
13292
13346
  "text_tokens": {
13293
13347
  "standard": {
13294
- "input_per_million": 0.1999188,
13295
- "output_per_million": 0.800064
13348
+ "input_per_million": 0.24999987999999998,
13349
+ "output_per_million": 0.999999888
13296
13350
  }
13297
13351
  }
13298
13352
  },
@@ -13362,8 +13416,8 @@
13362
13416
  "pricing": {
13363
13417
  "text_tokens": {
13364
13418
  "standard": {
13365
- "input_per_million": 0.01703012,
13366
- "output_per_million": 0.0681536
13419
+ "input_per_million": 0.013043472,
13420
+ "output_per_million": 0.0521739072
13367
13421
  }
13368
13422
  }
13369
13423
  },
@@ -13551,8 +13605,8 @@
13551
13605
  "pricing": {
13552
13606
  "text_tokens": {
13553
13607
  "standard": {
13554
- "input_per_million": 0.025915399999999998,
13555
- "output_per_million": 0.103712
13608
+ "input_per_million": 0.03260868,
13609
+ "output_per_million": 0.130434768
13556
13610
  }
13557
13611
  }
13558
13612
  },
@@ -13872,8 +13926,8 @@
13872
13926
  "pricing": {
13873
13927
  "text_tokens": {
13874
13928
  "standard": {
13875
- "input_per_million": 0.075,
13876
- "output_per_million": 0.15
13929
+ "input_per_million": 0.27,
13930
+ "output_per_million": 0.27
13877
13931
  }
13878
13932
  }
13879
13933
  },
@@ -13986,8 +14040,8 @@
13986
14040
  "pricing": {
13987
14041
  "text_tokens": {
13988
14042
  "standard": {
13989
- "input_per_million": 0.19999999999999998,
13990
- "output_per_million": 0.7999999999999999
14043
+ "input_per_million": 0.24999987999999998,
14044
+ "output_per_million": 0.999999888
13991
14045
  }
13992
14046
  }
13993
14047
  },
@@ -14384,7 +14438,7 @@
14384
14438
  }
14385
14439
  },
14386
14440
  "metadata": {
14387
- "description": "Gemini 2.5 Flash Image Preview is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
14441
+ "description": "Gemini 2.5 Flash Image Preview, AKA Nano Banana is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
14388
14442
  "architecture": {
14389
14443
  "modality": "text+image->text+image",
14390
14444
  "input_modalities": [
@@ -15054,8 +15108,8 @@
15054
15108
  "pricing": {
15055
15109
  "text_tokens": {
15056
15110
  "standard": {
15057
- "input_per_million": 0.01,
15058
- "output_per_million": 0.0100008
15111
+ "input_per_million": 0.02,
15112
+ "output_per_million": 0.035869561200000004
15059
15113
  }
15060
15114
  }
15061
15115
  },
@@ -15180,8 +15234,8 @@
15180
15234
  "pricing": {
15181
15235
  "text_tokens": {
15182
15236
  "standard": {
15183
- "input_per_million": 0.0481286,
15184
- "output_per_million": 0.192608
15237
+ "input_per_million": 0.035326069999999994,
15238
+ "output_per_million": 0.141304332
15185
15239
  }
15186
15240
  }
15187
15241
  },
@@ -15309,8 +15363,8 @@
15309
15363
  "pricing": {
15310
15364
  "text_tokens": {
15311
15365
  "standard": {
15312
- "input_per_million": 0.0666396,
15313
- "output_per_million": 0.26668800000000004
15366
+ "input_per_million": 0.06521736,
15367
+ "output_per_million": 0.260869536
15314
15368
  }
15315
15369
  }
15316
15370
  },
@@ -15897,69 +15951,6 @@
15897
15951
  ]
15898
15952
  }
15899
15953
  },
15900
- {
15901
- "id": "infermatic/mn-inferor-12b",
15902
- "name": "Infermatic: Mistral Nemo Inferor 12B",
15903
- "provider": "openrouter",
15904
- "family": "infermatic",
15905
- "created_at": "2024-11-13 03:20:28 +0100",
15906
- "context_window": 8192,
15907
- "max_output_tokens": 8192,
15908
- "knowledge_cutoff": null,
15909
- "modalities": {
15910
- "input": [
15911
- "text"
15912
- ],
15913
- "output": [
15914
- "text"
15915
- ]
15916
- },
15917
- "capabilities": [
15918
- "streaming",
15919
- "predicted_outputs"
15920
- ],
15921
- "pricing": {
15922
- "text_tokens": {
15923
- "standard": {
15924
- "input_per_million": 0.6,
15925
- "output_per_million": 1.0
15926
- }
15927
- }
15928
- },
15929
- "metadata": {
15930
- "description": "Inferor 12B is a merge of top roleplay models, expert on immersive narratives and storytelling.\n\nThis model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [anthracite-org/magnum-v4-12b](https://openrouter.ai/anthracite-org/magnum-v4-72b) as a base.\n",
15931
- "architecture": {
15932
- "modality": "text->text",
15933
- "input_modalities": [
15934
- "text"
15935
- ],
15936
- "output_modalities": [
15937
- "text"
15938
- ],
15939
- "tokenizer": "Mistral",
15940
- "instruct_type": "mistral"
15941
- },
15942
- "top_provider": {
15943
- "context_length": 8192,
15944
- "max_completion_tokens": 8192,
15945
- "is_moderated": false
15946
- },
15947
- "per_request_limits": null,
15948
- "supported_parameters": [
15949
- "frequency_penalty",
15950
- "logit_bias",
15951
- "max_tokens",
15952
- "min_p",
15953
- "presence_penalty",
15954
- "repetition_penalty",
15955
- "seed",
15956
- "stop",
15957
- "temperature",
15958
- "top_k",
15959
- "top_p"
15960
- ]
15961
- }
15962
- },
15963
15954
  {
15964
15955
  "id": "inflection/inflection-3-pi",
15965
15956
  "name": "Inflection: Inflection 3 Pi",
@@ -16263,13 +16254,13 @@
16263
16254
  }
16264
16255
  },
16265
16256
  {
16266
- "id": "meta-llama/llama-3-70b-instruct",
16267
- "name": "Meta: Llama 3 70B Instruct",
16257
+ "id": "meituan/longcat-flash-chat",
16258
+ "name": "Meituan: LongCat Flash Chat",
16268
16259
  "provider": "openrouter",
16269
- "family": "meta-llama",
16270
- "created_at": "2024-04-18 02:00:00 +0200",
16271
- "context_window": 8192,
16272
- "max_output_tokens": 16384,
16260
+ "family": "meituan",
16261
+ "created_at": "2025-09-09 16:20:58 +0200",
16262
+ "context_window": 131072,
16263
+ "max_output_tokens": 131072,
16273
16264
  "knowledge_cutoff": null,
16274
16265
  "modalities": {
16275
16266
  "input": [
@@ -16282,19 +16273,18 @@
16282
16273
  "capabilities": [
16283
16274
  "streaming",
16284
16275
  "function_calling",
16285
- "structured_output",
16286
16276
  "predicted_outputs"
16287
16277
  ],
16288
16278
  "pricing": {
16289
16279
  "text_tokens": {
16290
16280
  "standard": {
16291
- "input_per_million": 0.3,
16292
- "output_per_million": 0.39999999999999997
16281
+ "input_per_million": 0.15,
16282
+ "output_per_million": 0.75
16293
16283
  }
16294
16284
  }
16295
16285
  },
16296
16286
  "metadata": {
16297
- "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16287
+ "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input. It introduces a shortcut-connected MoE design to reduce communication overhead and achieve high throughput while maintaining training stability through advanced scaling strategies such as hyperparameter transfer, deterministic computation, and multi-stage optimization.\n\nThis release, LongCat-Flash-Chat, is a non-thinking foundation model optimized for conversational and agentic tasks. It supports long context windows up to 128K tokens and shows competitive performance across reasoning, coding, instruction following, and domain benchmarks, with particular strengths in tool use and complex multi-step interactions.",
16298
16288
  "architecture": {
16299
16289
  "modality": "text->text",
16300
16290
  "input_modalities": [
@@ -16303,12 +16293,12 @@
16303
16293
  "output_modalities": [
16304
16294
  "text"
16305
16295
  ],
16306
- "tokenizer": "Llama3",
16307
- "instruct_type": "llama3"
16296
+ "tokenizer": "Other",
16297
+ "instruct_type": null
16308
16298
  },
16309
16299
  "top_provider": {
16310
- "context_length": 8192,
16311
- "max_completion_tokens": 16384,
16300
+ "context_length": 131072,
16301
+ "max_completion_tokens": 131072,
16312
16302
  "is_moderated": false
16313
16303
  },
16314
16304
  "per_request_limits": null,
@@ -16320,7 +16310,6 @@
16320
16310
  "min_p",
16321
16311
  "presence_penalty",
16322
16312
  "repetition_penalty",
16323
- "response_format",
16324
16313
  "seed",
16325
16314
  "stop",
16326
16315
  "temperature",
@@ -16333,8 +16322,8 @@
16333
16322
  }
16334
16323
  },
16335
16324
  {
16336
- "id": "meta-llama/llama-3-8b-instruct",
16337
- "name": "Meta: Llama 3 8B Instruct",
16325
+ "id": "meta-llama/llama-3-70b-instruct",
16326
+ "name": "Meta: Llama 3 70B Instruct",
16338
16327
  "provider": "openrouter",
16339
16328
  "family": "meta-llama",
16340
16329
  "created_at": "2024-04-18 02:00:00 +0200",
@@ -16358,13 +16347,13 @@
16358
16347
  "pricing": {
16359
16348
  "text_tokens": {
16360
16349
  "standard": {
16361
- "input_per_million": 0.03,
16362
- "output_per_million": 0.06
16350
+ "input_per_million": 0.3,
16351
+ "output_per_million": 0.39999999999999997
16363
16352
  }
16364
16353
  }
16365
16354
  },
16366
16355
  "metadata": {
16367
- "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16356
+ "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16368
16357
  "architecture": {
16369
16358
  "modality": "text->text",
16370
16359
  "input_modalities": [
@@ -16385,6 +16374,7 @@
16385
16374
  "supported_parameters": [
16386
16375
  "frequency_penalty",
16387
16376
  "logit_bias",
16377
+ "logprobs",
16388
16378
  "max_tokens",
16389
16379
  "min_p",
16390
16380
  "presence_penalty",
@@ -16396,18 +16386,19 @@
16396
16386
  "tool_choice",
16397
16387
  "tools",
16398
16388
  "top_k",
16389
+ "top_logprobs",
16399
16390
  "top_p"
16400
16391
  ]
16401
16392
  }
16402
16393
  },
16403
16394
  {
16404
- "id": "meta-llama/llama-3.1-405b",
16405
- "name": "Meta: Llama 3.1 405B (base)",
16395
+ "id": "meta-llama/llama-3-8b-instruct",
16396
+ "name": "Meta: Llama 3 8B Instruct",
16406
16397
  "provider": "openrouter",
16407
16398
  "family": "meta-llama",
16408
- "created_at": "2024-08-02 02:00:00 +0200",
16409
- "context_window": 32768,
16410
- "max_output_tokens": null,
16399
+ "created_at": "2024-04-18 02:00:00 +0200",
16400
+ "context_window": 8192,
16401
+ "max_output_tokens": 16384,
16411
16402
  "knowledge_cutoff": null,
16412
16403
  "modalities": {
16413
16404
  "input": [
@@ -16419,18 +16410,20 @@
16419
16410
  },
16420
16411
  "capabilities": [
16421
16412
  "streaming",
16413
+ "function_calling",
16414
+ "structured_output",
16422
16415
  "predicted_outputs"
16423
16416
  ],
16424
16417
  "pricing": {
16425
16418
  "text_tokens": {
16426
16419
  "standard": {
16427
- "input_per_million": 2.0,
16428
- "output_per_million": 2.0
16420
+ "input_per_million": 0.03,
16421
+ "output_per_million": 0.06
16429
16422
  }
16430
16423
  }
16431
16424
  },
16432
16425
  "metadata": {
16433
- "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16426
+ "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16434
16427
  "architecture": {
16435
16428
  "modality": "text->text",
16436
16429
  "input_modalities": [
@@ -16440,18 +16433,84 @@
16440
16433
  "text"
16441
16434
  ],
16442
16435
  "tokenizer": "Llama3",
16443
- "instruct_type": "none"
16436
+ "instruct_type": "llama3"
16444
16437
  },
16445
16438
  "top_provider": {
16446
- "context_length": 32768,
16447
- "max_completion_tokens": null,
16439
+ "context_length": 8192,
16440
+ "max_completion_tokens": 16384,
16448
16441
  "is_moderated": false
16449
16442
  },
16450
16443
  "per_request_limits": null,
16451
16444
  "supported_parameters": [
16452
16445
  "frequency_penalty",
16453
16446
  "logit_bias",
16454
- "logprobs",
16447
+ "max_tokens",
16448
+ "min_p",
16449
+ "presence_penalty",
16450
+ "repetition_penalty",
16451
+ "response_format",
16452
+ "seed",
16453
+ "stop",
16454
+ "temperature",
16455
+ "tool_choice",
16456
+ "tools",
16457
+ "top_k",
16458
+ "top_p"
16459
+ ]
16460
+ }
16461
+ },
16462
+ {
16463
+ "id": "meta-llama/llama-3.1-405b",
16464
+ "name": "Meta: Llama 3.1 405B (base)",
16465
+ "provider": "openrouter",
16466
+ "family": "meta-llama",
16467
+ "created_at": "2024-08-02 02:00:00 +0200",
16468
+ "context_window": 32768,
16469
+ "max_output_tokens": null,
16470
+ "knowledge_cutoff": null,
16471
+ "modalities": {
16472
+ "input": [
16473
+ "text"
16474
+ ],
16475
+ "output": [
16476
+ "text"
16477
+ ]
16478
+ },
16479
+ "capabilities": [
16480
+ "streaming",
16481
+ "predicted_outputs"
16482
+ ],
16483
+ "pricing": {
16484
+ "text_tokens": {
16485
+ "standard": {
16486
+ "input_per_million": 2.0,
16487
+ "output_per_million": 2.0
16488
+ }
16489
+ }
16490
+ },
16491
+ "metadata": {
16492
+ "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16493
+ "architecture": {
16494
+ "modality": "text->text",
16495
+ "input_modalities": [
16496
+ "text"
16497
+ ],
16498
+ "output_modalities": [
16499
+ "text"
16500
+ ],
16501
+ "tokenizer": "Llama3",
16502
+ "instruct_type": "none"
16503
+ },
16504
+ "top_provider": {
16505
+ "context_length": 32768,
16506
+ "max_completion_tokens": null,
16507
+ "is_moderated": false
16508
+ },
16509
+ "per_request_limits": null,
16510
+ "supported_parameters": [
16511
+ "frequency_penalty",
16512
+ "logit_bias",
16513
+ "logprobs",
16455
16514
  "max_tokens",
16456
16515
  "min_p",
16457
16516
  "presence_penalty",
@@ -17719,8 +17778,8 @@
17719
17778
  "pricing": {
17720
17779
  "text_tokens": {
17721
17780
  "standard": {
17722
- "input_per_million": 0.1999188,
17723
- "output_per_million": 0.800064
17781
+ "input_per_million": 0.24999987999999998,
17782
+ "output_per_million": 0.999999888
17724
17783
  }
17725
17784
  }
17726
17785
  },
@@ -18012,8 +18071,7 @@
18012
18071
  },
18013
18072
  "capabilities": [
18014
18073
  "streaming",
18015
- "structured_output",
18016
- "predicted_outputs"
18074
+ "structured_output"
18017
18075
  ],
18018
18076
  "pricing": {
18019
18077
  "text_tokens": {
@@ -18044,8 +18102,6 @@
18044
18102
  "per_request_limits": null,
18045
18103
  "supported_parameters": [
18046
18104
  "frequency_penalty",
18047
- "logit_bias",
18048
- "logprobs",
18049
18105
  "max_tokens",
18050
18106
  "min_p",
18051
18107
  "presence_penalty",
@@ -18056,7 +18112,6 @@
18056
18112
  "structured_outputs",
18057
18113
  "temperature",
18058
18114
  "top_k",
18059
- "top_logprobs",
18060
18115
  "top_p"
18061
18116
  ]
18062
18117
  }
@@ -18666,8 +18721,8 @@
18666
18721
  "pricing": {
18667
18722
  "text_tokens": {
18668
18723
  "standard": {
18669
- "input_per_million": 0.01999188,
18670
- "output_per_million": 0.0800064
18724
+ "input_per_million": 0.035869548,
18725
+ "output_per_million": 0.14347824480000002
18671
18726
  }
18672
18727
  }
18673
18728
  },
@@ -19706,8 +19761,8 @@
19706
19761
  "pricing": {
19707
19762
  "text_tokens": {
19708
19763
  "standard": {
19709
- "input_per_million": 0.01,
19710
- "output_per_million": 0.0400032
19764
+ "input_per_million": 0.017934774,
19765
+ "output_per_million": 0.07173912240000001
19711
19766
  }
19712
19767
  }
19713
19768
  },
@@ -19963,8 +20018,8 @@
19963
20018
  "pricing": {
19964
20019
  "text_tokens": {
19965
20020
  "standard": {
19966
- "input_per_million": 0.01999188,
19967
- "output_per_million": 0.0800064
20021
+ "input_per_million": 0.03804346,
20022
+ "output_per_million": 0.152173896
19968
20023
  }
19969
20024
  }
19970
20025
  },
@@ -20093,8 +20148,8 @@
20093
20148
  "pricing": {
20094
20149
  "text_tokens": {
20095
20150
  "standard": {
20096
- "input_per_million": 0.01999188,
20097
- "output_per_million": 0.0800064
20151
+ "input_per_million": 0.03804346,
20152
+ "output_per_million": 0.152173896
20098
20153
  }
20099
20154
  }
20100
20155
  },
@@ -20899,8 +20954,8 @@
20899
20954
  "pricing": {
20900
20955
  "text_tokens": {
20901
20956
  "standard": {
20902
- "input_per_million": 0.2962,
20903
- "output_per_million": 1.1852999999999998
20957
+ "input_per_million": 0.38043459999999996,
20958
+ "output_per_million": 1.52173896
20904
20959
  }
20905
20960
  }
20906
20961
  },
@@ -21030,8 +21085,8 @@
21030
21085
  "pricing": {
21031
21086
  "text_tokens": {
21032
21087
  "standard": {
21033
- "input_per_million": 0.02498985,
21034
- "output_per_million": 0.100008
21088
+ "input_per_million": 0.062499969999999995,
21089
+ "output_per_million": 0.249999972
21035
21090
  }
21036
21091
  }
21037
21092
  },
@@ -21521,8 +21576,8 @@
21521
21576
  "pricing": {
21522
21577
  "text_tokens": {
21523
21578
  "standard": {
21524
- "input_per_million": 0.09329544,
21525
- "output_per_million": 0.3733632
21579
+ "input_per_million": 0.127173852,
21580
+ "output_per_million": 0.5086955952000001
21526
21581
  }
21527
21582
  }
21528
21583
  },
@@ -21639,7 +21694,7 @@
21639
21694
  "family": "nousresearch",
21640
21695
  "created_at": "2024-08-16 02:00:00 +0200",
21641
21696
  "context_window": 131072,
21642
- "max_output_tokens": 16384,
21697
+ "max_output_tokens": 131072,
21643
21698
  "knowledge_cutoff": null,
21644
21699
  "modalities": {
21645
21700
  "input": [
@@ -21657,7 +21712,7 @@
21657
21712
  "pricing": {
21658
21713
  "text_tokens": {
21659
21714
  "standard": {
21660
- "input_per_million": 0.7,
21715
+ "input_per_million": 0.7999999999999999,
21661
21716
  "output_per_million": 0.7999999999999999
21662
21717
  }
21663
21718
  }
@@ -21677,7 +21732,7 @@
21677
21732
  },
21678
21733
  "top_provider": {
21679
21734
  "context_length": 131072,
21680
- "max_completion_tokens": 16384,
21735
+ "max_completion_tokens": 131072,
21681
21736
  "is_moderated": false
21682
21737
  },
21683
21738
  "per_request_limits": null,
@@ -21725,8 +21780,8 @@
21725
21780
  "pricing": {
21726
21781
  "text_tokens": {
21727
21782
  "standard": {
21728
- "input_per_million": 0.09999999999999999,
21729
- "output_per_million": 0.28
21783
+ "input_per_million": 0.12,
21784
+ "output_per_million": 0.3
21730
21785
  }
21731
21786
  }
21732
21787
  },
@@ -21795,8 +21850,8 @@
21795
21850
  "pricing": {
21796
21851
  "text_tokens": {
21797
21852
  "standard": {
21798
- "input_per_million": 0.1999188,
21799
- "output_per_million": 0.800064
21853
+ "input_per_million": 0.24999987999999998,
21854
+ "output_per_million": 0.999999888
21800
21855
  }
21801
21856
  }
21802
21857
  },
@@ -21865,8 +21920,8 @@
21865
21920
  "pricing": {
21866
21921
  "text_tokens": {
21867
21922
  "standard": {
21868
- "input_per_million": 0.09329544,
21869
- "output_per_million": 0.3733632
21923
+ "input_per_million": 0.127173852,
21924
+ "output_per_million": 0.5086955952000001
21870
21925
  }
21871
21926
  }
21872
21927
  },
@@ -22102,11 +22157,11 @@
22102
22157
  }
22103
22158
  },
22104
22159
  {
22105
- "id": "nvidia/llama-3.3-nemotron-super-49b-v1",
22106
- "name": "NVIDIA: Llama 3.3 Nemotron Super 49B v1",
22160
+ "id": "nvidia/nemotron-nano-9b-v2",
22161
+ "name": "NVIDIA: Nemotron Nano 9B V2",
22107
22162
  "provider": "openrouter",
22108
22163
  "family": "nvidia",
22109
- "created_at": "2025-04-08 15:38:14 +0200",
22164
+ "created_at": "2025-09-05 23:13:27 +0200",
22110
22165
  "context_window": 131072,
22111
22166
  "max_output_tokens": null,
22112
22167
  "knowledge_cutoff": null,
@@ -22120,18 +22175,19 @@
22120
22175
  },
22121
22176
  "capabilities": [
22122
22177
  "streaming",
22123
- "predicted_outputs"
22178
+ "function_calling",
22179
+ "structured_output"
22124
22180
  ],
22125
22181
  "pricing": {
22126
22182
  "text_tokens": {
22127
22183
  "standard": {
22128
- "input_per_million": 0.13,
22129
- "output_per_million": 0.39999999999999997
22184
+ "input_per_million": 0.04,
22185
+ "output_per_million": 0.16
22130
22186
  }
22131
22187
  }
22132
22188
  },
22133
22189
  "metadata": {
22134
- "description": "Llama-3.3-Nemotron-Super-49B-v1 is a large language model (LLM) optimized for advanced reasoning, conversational interactions, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta's Llama-3.3-70B-Instruct, it employs a Neural Architecture Search (NAS) approach, significantly enhancing efficiency and reducing memory requirements. This allows the model to support a context length of up to 128K tokens and fit efficiently on single high-performance GPUs, such as NVIDIA H200.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
22190
+ "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
22135
22191
  "architecture": {
22136
22192
  "modality": "text->text",
22137
22193
  "input_modalities": [
@@ -22151,22 +22207,26 @@
22151
22207
  "per_request_limits": null,
22152
22208
  "supported_parameters": [
22153
22209
  "frequency_penalty",
22154
- "logit_bias",
22155
- "logprobs",
22210
+ "include_reasoning",
22156
22211
  "max_tokens",
22212
+ "min_p",
22157
22213
  "presence_penalty",
22214
+ "reasoning",
22215
+ "repetition_penalty",
22216
+ "response_format",
22158
22217
  "seed",
22159
22218
  "stop",
22160
22219
  "temperature",
22220
+ "tool_choice",
22221
+ "tools",
22161
22222
  "top_k",
22162
- "top_logprobs",
22163
22223
  "top_p"
22164
22224
  ]
22165
22225
  }
22166
22226
  },
22167
22227
  {
22168
- "id": "nvidia/nemotron-nano-9b-v2",
22169
- "name": "NVIDIA: Nemotron Nano 9B V2",
22228
+ "id": "nvidia/nemotron-nano-9b-v2:free",
22229
+ "name": "NVIDIA: Nemotron Nano 9B V2 (free)",
22170
22230
  "provider": "openrouter",
22171
22231
  "family": "nvidia",
22172
22232
  "created_at": "2025-09-05 23:13:27 +0200",
@@ -24674,7 +24734,8 @@
24674
24734
  "knowledge_cutoff": null,
24675
24735
  "modalities": {
24676
24736
  "input": [
24677
- "text"
24737
+ "text",
24738
+ "file"
24678
24739
  ],
24679
24740
  "output": [
24680
24741
  "text"
@@ -24699,7 +24760,8 @@
24699
24760
  "architecture": {
24700
24761
  "modality": "text->text",
24701
24762
  "input_modalities": [
24702
- "text"
24763
+ "text",
24764
+ "file"
24703
24765
  ],
24704
24766
  "output_modalities": [
24705
24767
  "text"
@@ -24734,7 +24796,8 @@
24734
24796
  "knowledge_cutoff": null,
24735
24797
  "modalities": {
24736
24798
  "input": [
24737
- "text"
24799
+ "text",
24800
+ "file"
24738
24801
  ],
24739
24802
  "output": [
24740
24803
  "text"
@@ -24759,12 +24822,13 @@
24759
24822
  "architecture": {
24760
24823
  "modality": "text->text",
24761
24824
  "input_modalities": [
24762
- "text"
24825
+ "text",
24826
+ "file"
24763
24827
  ],
24764
24828
  "output_modalities": [
24765
24829
  "text"
24766
24830
  ],
24767
- "tokenizer": "Other",
24831
+ "tokenizer": "GPT",
24768
24832
  "instruct_type": null
24769
24833
  },
24770
24834
  "top_provider": {
@@ -25493,69 +25557,6 @@
25493
25557
  ]
25494
25558
  }
25495
25559
  },
25496
- {
25497
- "id": "pygmalionai/mythalion-13b",
25498
- "name": "Pygmalion: Mythalion 13B",
25499
- "provider": "openrouter",
25500
- "family": "pygmalionai",
25501
- "created_at": "2023-09-02 02:00:00 +0200",
25502
- "context_window": 4096,
25503
- "max_output_tokens": 4096,
25504
- "knowledge_cutoff": null,
25505
- "modalities": {
25506
- "input": [
25507
- "text"
25508
- ],
25509
- "output": [
25510
- "text"
25511
- ]
25512
- },
25513
- "capabilities": [
25514
- "streaming",
25515
- "predicted_outputs"
25516
- ],
25517
- "pricing": {
25518
- "text_tokens": {
25519
- "standard": {
25520
- "input_per_million": 0.7,
25521
- "output_per_million": 1.1
25522
- }
25523
- }
25524
- },
25525
- "metadata": {
25526
- "description": "A blend of the new Pygmalion-13b and MythoMax. #merge",
25527
- "architecture": {
25528
- "modality": "text->text",
25529
- "input_modalities": [
25530
- "text"
25531
- ],
25532
- "output_modalities": [
25533
- "text"
25534
- ],
25535
- "tokenizer": "Llama2",
25536
- "instruct_type": "alpaca"
25537
- },
25538
- "top_provider": {
25539
- "context_length": 4096,
25540
- "max_completion_tokens": 4096,
25541
- "is_moderated": false
25542
- },
25543
- "per_request_limits": null,
25544
- "supported_parameters": [
25545
- "frequency_penalty",
25546
- "logit_bias",
25547
- "max_tokens",
25548
- "min_p",
25549
- "presence_penalty",
25550
- "repetition_penalty",
25551
- "seed",
25552
- "stop",
25553
- "temperature",
25554
- "top_k",
25555
- "top_p"
25556
- ]
25557
- }
25558
- },
25559
25560
  {
25560
25561
  "id": "qwen/qwen-2.5-72b-instruct",
25561
25562
  "name": "Qwen2.5 72B Instruct",
@@ -25582,8 +25583,8 @@
25582
25583
  "pricing": {
25583
25584
  "text_tokens": {
25584
25585
  "standard": {
25585
- "input_per_million": 0.051830799999999996,
25586
- "output_per_million": 0.207424
25586
+ "input_per_million": 0.06521736,
25587
+ "output_per_million": 0.260869536
25587
25588
  }
25588
25589
  }
25589
25590
  },
@@ -25757,7 +25758,7 @@
25757
25758
  "family": "qwen",
25758
25759
  "created_at": "2024-11-12 00:40:00 +0100",
25759
25760
  "context_window": 32768,
25760
- "max_output_tokens": null,
25761
+ "max_output_tokens": 16384,
25761
25762
  "knowledge_cutoff": null,
25762
25763
  "modalities": {
25763
25764
  "input": [
@@ -25775,8 +25776,8 @@
25775
25776
  "pricing": {
25776
25777
  "text_tokens": {
25777
25778
  "standard": {
25778
- "input_per_million": 0.0499797,
25779
- "output_per_million": 0.200016
25779
+ "input_per_million": 0.06,
25780
+ "output_per_million": 0.15
25780
25781
  }
25781
25782
  }
25782
25783
  },
@@ -25795,7 +25796,7 @@
25795
25796
  },
25796
25797
  "top_provider": {
25797
25798
  "context_length": 32768,
25798
- "max_completion_tokens": null,
25799
+ "max_completion_tokens": 16384,
25799
25800
  "is_moderated": false
25800
25801
  },
25801
25802
  "per_request_limits": null,
@@ -26069,6 +26070,132 @@
26069
26070
  ]
26070
26071
  }
26071
26072
  },
26073
+ {
26074
+ "id": "qwen/qwen-plus-2025-07-28",
26075
+ "name": "Qwen: Qwen Plus 0728",
26076
+ "provider": "openrouter",
26077
+ "family": "qwen",
26078
+ "created_at": "2025-09-08 18:06:39 +0200",
26079
+ "context_window": 1000000,
26080
+ "max_output_tokens": 32768,
26081
+ "knowledge_cutoff": null,
26082
+ "modalities": {
26083
+ "input": [
26084
+ "text"
26085
+ ],
26086
+ "output": [
26087
+ "text"
26088
+ ]
26089
+ },
26090
+ "capabilities": [
26091
+ "streaming",
26092
+ "function_calling",
26093
+ "structured_output"
26094
+ ],
26095
+ "pricing": {
26096
+ "text_tokens": {
26097
+ "standard": {
26098
+ "input_per_million": 0.39999999999999997,
26099
+ "output_per_million": 1.2
26100
+ }
26101
+ }
26102
+ },
26103
+ "metadata": {
26104
+ "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
26105
+ "architecture": {
26106
+ "modality": "text->text",
26107
+ "input_modalities": [
26108
+ "text"
26109
+ ],
26110
+ "output_modalities": [
26111
+ "text"
26112
+ ],
26113
+ "tokenizer": "Qwen3",
26114
+ "instruct_type": null
26115
+ },
26116
+ "top_provider": {
26117
+ "context_length": 1000000,
26118
+ "max_completion_tokens": 32768,
26119
+ "is_moderated": false
26120
+ },
26121
+ "per_request_limits": null,
26122
+ "supported_parameters": [
26123
+ "max_tokens",
26124
+ "presence_penalty",
26125
+ "response_format",
26126
+ "seed",
26127
+ "structured_outputs",
26128
+ "temperature",
26129
+ "tool_choice",
26130
+ "tools",
26131
+ "top_p"
26132
+ ]
26133
+ }
26134
+ },
26135
+ {
26136
+ "id": "qwen/qwen-plus-2025-07-28:thinking",
26137
+ "name": "Qwen: Qwen Plus 0728 (thinking)",
26138
+ "provider": "openrouter",
26139
+ "family": "qwen",
26140
+ "created_at": "2025-09-08 18:06:39 +0200",
26141
+ "context_window": 1000000,
26142
+ "max_output_tokens": 32768,
26143
+ "knowledge_cutoff": null,
26144
+ "modalities": {
26145
+ "input": [
26146
+ "text"
26147
+ ],
26148
+ "output": [
26149
+ "text"
26150
+ ]
26151
+ },
26152
+ "capabilities": [
26153
+ "streaming",
26154
+ "function_calling",
26155
+ "structured_output"
26156
+ ],
26157
+ "pricing": {
26158
+ "text_tokens": {
26159
+ "standard": {
26160
+ "input_per_million": 0.39999999999999997,
26161
+ "output_per_million": 4.0
26162
+ }
26163
+ }
26164
+ },
26165
+ "metadata": {
26166
+ "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
26167
+ "architecture": {
26168
+ "modality": "text->text",
26169
+ "input_modalities": [
26170
+ "text"
26171
+ ],
26172
+ "output_modalities": [
26173
+ "text"
26174
+ ],
26175
+ "tokenizer": "Qwen3",
26176
+ "instruct_type": null
26177
+ },
26178
+ "top_provider": {
26179
+ "context_length": 1000000,
26180
+ "max_completion_tokens": 32768,
26181
+ "is_moderated": false
26182
+ },
26183
+ "per_request_limits": null,
26184
+ "supported_parameters": [
26185
+ "include_reasoning",
26186
+ "max_tokens",
26187
+ "presence_penalty",
26188
+ "reasoning",
26189
+ "response_format",
26190
+ "seed",
26191
+ "structured_outputs",
26192
+ "temperature",
26193
+ "tool_choice",
26194
+ "tools",
26195
+ "top_p"
26196
+ ]
26197
+ }
26198
+ },
26072
26199
  {
26073
26200
  "id": "qwen/qwen-turbo",
26074
26201
  "name": "Qwen: Qwen-Turbo",
@@ -26277,8 +26404,8 @@
26277
26404
  "pricing": {
26278
26405
  "text_tokens": {
26279
26406
  "standard": {
26280
- "input_per_million": 0.01999188,
26281
- "output_per_million": 0.0800064
26407
+ "input_per_million": 0.035869548,
26408
+ "output_per_million": 0.14347824480000002
26282
26409
  }
26283
26410
  }
26284
26411
  },
@@ -26408,8 +26535,8 @@
26408
26535
  "pricing": {
26409
26536
  "text_tokens": {
26410
26537
  "standard": {
26411
- "input_per_million": 0.0999594,
26412
- "output_per_million": 0.400032
26538
+ "input_per_million": 0.24999987999999998,
26539
+ "output_per_million": 0.999999888
26413
26540
  }
26414
26541
  }
26415
26542
  },
@@ -26739,8 +26866,8 @@
26739
26866
  "pricing": {
26740
26867
  "text_tokens": {
26741
26868
  "standard": {
26742
- "input_per_million": 0.077968332,
26743
- "output_per_million": 0.31202496
26869
+ "input_per_million": 0.0974999532,
26870
+ "output_per_million": 0.38999995632
26744
26871
  }
26745
26872
  }
26746
26873
  },
@@ -26810,8 +26937,8 @@
26810
26937
  "pricing": {
26811
26938
  "text_tokens": {
26812
26939
  "standard": {
26813
- "input_per_million": 0.077968332,
26814
- "output_per_million": 0.31202496
26940
+ "input_per_million": 0.0974999532,
26941
+ "output_per_million": 0.38999995632
26815
26942
  }
26816
26943
  }
26817
26944
  },
@@ -26948,8 +27075,8 @@
26948
27075
  "pricing": {
26949
27076
  "text_tokens": {
26950
27077
  "standard": {
26951
- "input_per_million": 0.01999188,
26952
- "output_per_million": 0.0800064
27078
+ "input_per_million": 0.035869548,
27079
+ "output_per_million": 0.14347824480000002
26953
27080
  }
26954
27081
  }
26955
27082
  },
@@ -27021,8 +27148,8 @@
27021
27148
  "pricing": {
27022
27149
  "text_tokens": {
27023
27150
  "standard": {
27024
- "input_per_million": 0.051830799999999996,
27025
- "output_per_million": 0.207424
27151
+ "input_per_million": 0.07065213999999999,
27152
+ "output_per_million": 0.282608664
27026
27153
  }
27027
27154
  }
27028
27155
  },
@@ -27091,8 +27218,8 @@
27091
27218
  "pricing": {
27092
27219
  "text_tokens": {
27093
27220
  "standard": {
27094
- "input_per_million": 0.0713,
27095
- "output_per_million": 0.2852
27221
+ "input_per_million": 0.08967387,
27222
+ "output_per_million": 0.358695612
27096
27223
  }
27097
27224
  }
27098
27225
  },
@@ -27223,8 +27350,8 @@
27223
27350
  "pricing": {
27224
27351
  "text_tokens": {
27225
27352
  "standard": {
27226
- "input_per_million": 0.017992691999999998,
27227
- "output_per_million": 0.07200576
27353
+ "input_per_million": 0.0322825932,
27354
+ "output_per_million": 0.12913042032
27228
27355
  }
27229
27356
  }
27230
27357
  },
@@ -27480,8 +27607,8 @@
27480
27607
  "pricing": {
27481
27608
  "text_tokens": {
27482
27609
  "standard": {
27483
- "input_per_million": 0.19999999999999998,
27484
- "output_per_million": 0.7999999999999999
27610
+ "input_per_million": 0.24999987999999998,
27611
+ "output_per_million": 0.999999888
27485
27612
  }
27486
27613
  }
27487
27614
  },
@@ -27551,8 +27678,8 @@
27551
27678
  "pricing": {
27552
27679
  "text_tokens": {
27553
27680
  "standard": {
27554
- "input_per_million": 0.051830799999999996,
27555
- "output_per_million": 0.207424
27681
+ "input_per_million": 0.07065213999999999,
27682
+ "output_per_million": 0.282608664
27556
27683
  }
27557
27684
  }
27558
27685
  },
@@ -27718,6 +27845,148 @@
27718
27845
  ]
27719
27846
  }
27720
27847
  },
27848
+ {
27849
+ "id": "qwen/qwen3-next-80b-a3b-instruct",
27850
+ "name": "Qwen: Qwen3 Next 80B A3B Instruct",
27851
+ "provider": "openrouter",
27852
+ "family": "qwen",
27853
+ "created_at": "2025-09-11 19:36:53 +0200",
27854
+ "context_window": 262144,
27855
+ "max_output_tokens": null,
27856
+ "knowledge_cutoff": null,
27857
+ "modalities": {
27858
+ "input": [
27859
+ "text"
27860
+ ],
27861
+ "output": [
27862
+ "text"
27863
+ ]
27864
+ },
27865
+ "capabilities": [
27866
+ "streaming",
27867
+ "function_calling",
27868
+ "structured_output",
27869
+ "predicted_outputs"
27870
+ ],
27871
+ "pricing": {
27872
+ "text_tokens": {
27873
+ "standard": {
27874
+ "input_per_million": 0.09782604,
27875
+ "output_per_million": 0.391304304
27876
+ }
27877
+ }
27878
+ },
27879
+ "metadata": {
27880
+ "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.",
27881
+ "architecture": {
27882
+ "modality": "text->text",
27883
+ "input_modalities": [
27884
+ "text"
27885
+ ],
27886
+ "output_modalities": [
27887
+ "text"
27888
+ ],
27889
+ "tokenizer": "Qwen3",
27890
+ "instruct_type": null
27891
+ },
27892
+ "top_provider": {
27893
+ "context_length": 262144,
27894
+ "max_completion_tokens": null,
27895
+ "is_moderated": false
27896
+ },
27897
+ "per_request_limits": null,
27898
+ "supported_parameters": [
27899
+ "frequency_penalty",
27900
+ "logit_bias",
27901
+ "logprobs",
27902
+ "max_tokens",
27903
+ "min_p",
27904
+ "presence_penalty",
27905
+ "repetition_penalty",
27906
+ "response_format",
27907
+ "seed",
27908
+ "stop",
27909
+ "temperature",
27910
+ "tool_choice",
27911
+ "tools",
27912
+ "top_k",
27913
+ "top_logprobs",
27914
+ "top_p"
27915
+ ]
27916
+ }
27917
+ },
27918
+ {
27919
+ "id": "qwen/qwen3-next-80b-a3b-thinking",
27920
+ "name": "Qwen: Qwen3 Next 80B A3B Thinking",
27921
+ "provider": "openrouter",
27922
+ "family": "qwen",
27923
+ "created_at": "2025-09-11 19:38:04 +0200",
27924
+ "context_window": 262144,
27925
+ "max_output_tokens": null,
27926
+ "knowledge_cutoff": null,
27927
+ "modalities": {
27928
+ "input": [
27929
+ "text"
27930
+ ],
27931
+ "output": [
27932
+ "text"
27933
+ ]
27934
+ },
27935
+ "capabilities": [
27936
+ "streaming",
27937
+ "function_calling",
27938
+ "structured_output",
27939
+ "predicted_outputs"
27940
+ ],
27941
+ "pricing": {
27942
+ "text_tokens": {
27943
+ "standard": {
27944
+ "input_per_million": 0.09782604,
27945
+ "output_per_million": 0.391304304
27946
+ }
27947
+ }
27948
+ },
27949
+ "metadata": {
27950
+ "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.",
27951
+ "architecture": {
27952
+ "modality": "text->text",
27953
+ "input_modalities": [
27954
+ "text"
27955
+ ],
27956
+ "output_modalities": [
27957
+ "text"
27958
+ ],
27959
+ "tokenizer": "Qwen3",
27960
+ "instruct_type": null
27961
+ },
27962
+ "top_provider": {
27963
+ "context_length": 262144,
27964
+ "max_completion_tokens": null,
27965
+ "is_moderated": false
27966
+ },
27967
+ "per_request_limits": null,
27968
+ "supported_parameters": [
27969
+ "frequency_penalty",
27970
+ "include_reasoning",
27971
+ "logit_bias",
27972
+ "logprobs",
27973
+ "max_tokens",
27974
+ "min_p",
27975
+ "presence_penalty",
27976
+ "reasoning",
27977
+ "repetition_penalty",
27978
+ "response_format",
27979
+ "seed",
27980
+ "stop",
27981
+ "temperature",
27982
+ "tool_choice",
27983
+ "tools",
27984
+ "top_k",
27985
+ "top_logprobs",
27986
+ "top_p"
27987
+ ]
27988
+ }
27989
+ },
27721
27990
  {
27722
27991
  "id": "qwen/qwq-32b",
27723
27992
  "name": "Qwen: QwQ 32B",
@@ -28315,8 +28584,8 @@
28315
28584
  "pricing": {
28316
28585
  "text_tokens": {
28317
28586
  "standard": {
28318
- "input_per_million": 0.01999188,
28319
- "output_per_million": 0.0800064
28587
+ "input_per_million": 0.035869548,
28588
+ "output_per_million": 0.14347824480000002
28320
28589
  }
28321
28590
  }
28322
28591
  },
@@ -28952,8 +29221,8 @@
28952
29221
  "pricing": {
28953
29222
  "text_tokens": {
28954
29223
  "standard": {
28955
- "input_per_million": 0.0481286,
28956
- "output_per_million": 0.192608
29224
+ "input_per_million": 0.039130416,
29225
+ "output_per_million": 0.1565217216
28957
29226
  }
28958
29227
  }
28959
29228
  },
@@ -29217,8 +29486,8 @@
29217
29486
  "pricing": {
29218
29487
  "text_tokens": {
29219
29488
  "standard": {
29220
- "input_per_million": 0.01999188,
29221
- "output_per_million": 0.0800064
29489
+ "input_per_million": 0.035869548,
29490
+ "output_per_million": 0.14347824480000002
29222
29491
  }
29223
29492
  }
29224
29493
  },
@@ -29284,8 +29553,8 @@
29284
29553
  "pricing": {
29285
29554
  "text_tokens": {
29286
29555
  "standard": {
29287
- "input_per_million": 0.1999188,
29288
- "output_per_million": 0.800064
29556
+ "input_per_million": 0.24999987999999998,
29557
+ "output_per_million": 0.999999888
29289
29558
  }
29290
29559
  }
29291
29560
  },
@@ -30127,8 +30396,8 @@
30127
30396
  "pricing": {
30128
30397
  "text_tokens": {
30129
30398
  "standard": {
30130
- "input_per_million": 0.32986602,
30131
- "output_per_million": 1.3201056
30399
+ "input_per_million": 0.41249980199999997,
30400
+ "output_per_million": 1.6499998152000002
30132
30401
  }
30133
30402
  }
30134
30403
  },
@@ -30375,7 +30644,7 @@
30375
30644
  "name": "Sonar",
30376
30645
  "provider": "perplexity",
30377
30646
  "family": "sonar",
30378
- "created_at": "2025-09-09 20:41:26 +0200",
30647
+ "created_at": "2025-09-14 11:15:24 +0200",
30379
30648
  "context_window": 128000,
30380
30649
  "max_output_tokens": 4096,
30381
30650
  "knowledge_cutoff": null,
@@ -30407,7 +30676,7 @@
30407
30676
  "name": "Sonar Deep Research",
30408
30677
  "provider": "perplexity",
30409
30678
  "family": "sonar_deep_research",
30410
- "created_at": "2025-09-09 20:41:26 +0200",
30679
+ "created_at": "2025-09-14 11:15:24 +0200",
30411
30680
  "context_window": 128000,
30412
30681
  "max_output_tokens": 4096,
30413
30682
  "knowledge_cutoff": null,
@@ -30442,7 +30711,7 @@
30442
30711
  "name": "Sonar Pro",
30443
30712
  "provider": "perplexity",
30444
30713
  "family": "sonar_pro",
30445
- "created_at": "2025-09-09 20:41:26 +0200",
30714
+ "created_at": "2025-09-14 11:15:24 +0200",
30446
30715
  "context_window": 200000,
30447
30716
  "max_output_tokens": 8192,
30448
30717
  "knowledge_cutoff": null,
@@ -30474,7 +30743,7 @@
30474
30743
  "name": "Sonar Reasoning",
30475
30744
  "provider": "perplexity",
30476
30745
  "family": "sonar_reasoning",
30477
- "created_at": "2025-09-09 20:41:26 +0200",
30746
+ "created_at": "2025-09-14 11:15:24 +0200",
30478
30747
  "context_window": 128000,
30479
30748
  "max_output_tokens": 4096,
30480
30749
  "knowledge_cutoff": null,
@@ -30506,7 +30775,7 @@
30506
30775
  "name": "Sonar Reasoning Pro",
30507
30776
  "provider": "perplexity",
30508
30777
  "family": "sonar_reasoning_pro",
30509
- "created_at": "2025-09-09 20:41:26 +0200",
30778
+ "created_at": "2025-09-14 11:15:24 +0200",
30510
30779
  "context_window": 128000,
30511
30780
  "max_output_tokens": 8192,
30512
30781
  "knowledge_cutoff": null,
@@ -30646,7 +30915,7 @@
30646
30915
  "id": "gemini-1.5-flash",
30647
30916
  "name": "Gemini 1.5 Flash",
30648
30917
  "provider": "vertexai",
30649
- "family": "gemini-1.5-flash",
30918
+ "family": "models/gemini-1.5-flash",
30650
30919
  "created_at": null,
30651
30920
  "context_window": 1048576,
30652
30921
  "max_output_tokens": 8192,
@@ -30682,7 +30951,7 @@
30682
30951
  "id": "gemini-1.5-flash-002",
30683
30952
  "name": "Gemini 1.5 Flash",
30684
30953
  "provider": "vertexai",
30685
- "family": "gemini-1.5-flash",
30954
+ "family": "models/gemini-1.5-flash",
30686
30955
  "created_at": null,
30687
30956
  "context_window": 1048576,
30688
30957
  "max_output_tokens": 8192,
@@ -30718,7 +30987,7 @@
30718
30987
  "id": "gemini-1.5-flash-8b",
30719
30988
  "name": "Gemini 1.5 Flash-8B",
30720
30989
  "provider": "vertexai",
30721
- "family": "gemini-1.5-flash-8b",
30990
+ "family": "models/gemini-1.5-flash-8b",
30722
30991
  "created_at": null,
30723
30992
  "context_window": 1048576,
30724
30993
  "max_output_tokens": 8192,
@@ -30754,7 +31023,7 @@
30754
31023
  "id": "gemini-1.5-pro",
30755
31024
  "name": "Gemini 1.5 Pro",
30756
31025
  "provider": "vertexai",
30757
- "family": "gemini-1.5-pro",
31026
+ "family": "models/gemini-1.5-pro",
30758
31027
  "created_at": null,
30759
31028
  "context_window": 2097152,
30760
31029
  "max_output_tokens": 8192,
@@ -30790,7 +31059,7 @@
30790
31059
  "id": "gemini-1.5-pro-002",
30791
31060
  "name": "Gemini 1.5 Pro",
30792
31061
  "provider": "vertexai",
30793
- "family": "gemini-1.5-pro",
31062
+ "family": "models/gemini-1.5-pro",
30794
31063
  "created_at": null,
30795
31064
  "context_window": 2097152,
30796
31065
  "max_output_tokens": 8192,
@@ -30826,7 +31095,7 @@
30826
31095
  "id": "gemini-2.0-flash",
30827
31096
  "name": "Gemini 2.0 Flash",
30828
31097
  "provider": "vertexai",
30829
- "family": "gemini-2.0-flash",
31098
+ "family": "models/gemini-2.0-flash",
30830
31099
  "created_at": null,
30831
31100
  "context_window": 1048576,
30832
31101
  "max_output_tokens": 8192,
@@ -30867,7 +31136,7 @@
30867
31136
  "id": "gemini-2.0-flash-001",
30868
31137
  "name": "Gemini 2.0 Flash",
30869
31138
  "provider": "vertexai",
30870
- "family": "gemini-2.0-flash",
31139
+ "family": "models/gemini-2.0-flash",
30871
31140
  "created_at": null,
30872
31141
  "context_window": 1048576,
30873
31142
  "max_output_tokens": 8192,
@@ -30908,7 +31177,7 @@
30908
31177
  "id": "gemini-2.0-flash-exp",
30909
31178
  "name": "Gemini 2.0 Flash",
30910
31179
  "provider": "vertexai",
30911
- "family": "gemini-2.0-flash",
31180
+ "family": "models/gemini-2.0-flash",
30912
31181
  "created_at": null,
30913
31182
  "context_window": 1048576,
30914
31183
  "max_output_tokens": 8192,
@@ -30949,7 +31218,7 @@
30949
31218
  "id": "gemini-2.0-flash-lite-001",
30950
31219
  "name": "Gemini 2.0 Flash-Lite",
30951
31220
  "provider": "vertexai",
30952
- "family": "gemini-2.0-flash-lite",
31221
+ "family": "models/gemini-2.0-flash-lite",
30953
31222
  "created_at": null,
30954
31223
  "context_window": 1048576,
30955
31224
  "max_output_tokens": 8192,
@@ -30990,7 +31259,7 @@
30990
31259
  "id": "gemini-2.5-flash",
30991
31260
  "name": "Gemini 2.5 Flash",
30992
31261
  "provider": "vertexai",
30993
- "family": "gemini-2.5-flash",
31262
+ "family": "models/gemini-2.5-flash",
30994
31263
  "created_at": null,
30995
31264
  "context_window": 1048576,
30996
31265
  "max_output_tokens": 65536,
@@ -31031,7 +31300,7 @@
31031
31300
  "id": "gemini-2.5-flash-lite",
31032
31301
  "name": "Gemini 2.5 Flash-Lite",
31033
31302
  "provider": "vertexai",
31034
- "family": "gemini-2.5-flash-lite",
31303
+ "family": "models/gemini-2.5-flash-lite",
31035
31304
  "created_at": null,
31036
31305
  "context_window": 1048576,
31037
31306
  "max_output_tokens": 65536,
@@ -31072,7 +31341,7 @@
31072
31341
  "id": "gemini-2.5-pro",
31073
31342
  "name": "Gemini 2.5 Pro",
31074
31343
  "provider": "vertexai",
31075
- "family": "gemini-2.5-pro",
31344
+ "family": "models/gemini-2.5-pro",
31076
31345
  "created_at": null,
31077
31346
  "context_window": 1048576,
31078
31347
  "max_output_tokens": 65536,