ruby_llm 1.8.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,12 +14,10 @@
14
14
  "text"
15
15
  ],
16
16
  "output": [
17
- "embeddings",
18
17
  "text"
19
18
  ]
20
19
  },
21
20
  "capabilities": [
22
- "batch",
23
21
  "function_calling"
24
22
  ],
25
23
  "pricing": {
@@ -28,10 +26,6 @@
28
26
  "input_per_million": 0.8,
29
27
  "cached_input_per_million": 1.0,
30
28
  "output_per_million": 4.0
31
- },
32
- "batch": {
33
- "input_per_million": 0.4,
34
- "output_per_million": 2.0
35
29
  }
36
30
  }
37
31
  },
@@ -128,12 +122,10 @@
128
122
  "text"
129
123
  ],
130
124
  "output": [
131
- "embeddings",
132
125
  "text"
133
126
  ]
134
127
  },
135
128
  "capabilities": [
136
- "batch",
137
129
  "function_calling"
138
130
  ],
139
131
  "pricing": {
@@ -142,10 +134,6 @@
142
134
  "input_per_million": 3.0,
143
135
  "cached_input_per_million": 3.75,
144
136
  "output_per_million": 15.0
145
- },
146
- "batch": {
147
- "input_per_million": 1.5,
148
- "output_per_million": 7.5
149
137
  }
150
138
  }
151
139
  },
@@ -166,12 +154,10 @@
166
154
  "text"
167
155
  ],
168
156
  "output": [
169
- "embeddings",
170
157
  "text"
171
158
  ]
172
159
  },
173
160
  "capabilities": [
174
- "batch",
175
161
  "function_calling"
176
162
  ],
177
163
  "pricing": {
@@ -180,10 +166,6 @@
180
166
  "input_per_million": 0.25,
181
167
  "cached_input_per_million": 0.3,
182
168
  "output_per_million": 1.25
183
- },
184
- "batch": {
185
- "input_per_million": 0.125,
186
- "output_per_million": 0.625
187
169
  }
188
170
  }
189
171
  },
@@ -242,12 +224,10 @@
242
224
  "text"
243
225
  ],
244
226
  "output": [
245
- "embeddings",
246
227
  "text"
247
228
  ]
248
229
  },
249
230
  "capabilities": [
250
- "batch",
251
231
  "function_calling"
252
232
  ],
253
233
  "pricing": {
@@ -256,10 +236,6 @@
256
236
  "input_per_million": 15.0,
257
237
  "cached_input_per_million": 18.75,
258
238
  "output_per_million": 75.0
259
- },
260
- "batch": {
261
- "input_per_million": 7.5,
262
- "output_per_million": 37.5
263
239
  }
264
240
  }
265
241
  },
@@ -280,12 +256,10 @@
280
256
  "text"
281
257
  ],
282
258
  "output": [
283
- "embeddings",
284
259
  "text"
285
260
  ]
286
261
  },
287
262
  "capabilities": [
288
- "batch",
289
263
  "function_calling"
290
264
  ],
291
265
  "pricing": {
@@ -294,10 +268,6 @@
294
268
  "input_per_million": 15.0,
295
269
  "cached_input_per_million": 18.75,
296
270
  "output_per_million": 75.0
297
- },
298
- "batch": {
299
- "input_per_million": 7.5,
300
- "output_per_million": 37.5
301
271
  }
302
272
  }
303
273
  },
@@ -318,12 +288,10 @@
318
288
  "text"
319
289
  ],
320
290
  "output": [
321
- "embeddings",
322
291
  "text"
323
292
  ]
324
293
  },
325
294
  "capabilities": [
326
- "batch",
327
295
  "function_calling"
328
296
  ],
329
297
  "pricing": {
@@ -332,10 +300,6 @@
332
300
  "input_per_million": 3.0,
333
301
  "cached_input_per_million": 3.75,
334
302
  "output_per_million": 15.0
335
- },
336
- "batch": {
337
- "input_per_million": 1.5,
338
- "output_per_million": 7.5
339
303
  }
340
304
  }
341
305
  },
@@ -1903,7 +1867,7 @@
1903
1867
  "id": "gemini-1.5-flash",
1904
1868
  "name": "Gemini 1.5 Flash",
1905
1869
  "provider": "gemini",
1906
- "family": "models/gemini-1.5-flash",
1870
+ "family": "gemini-1.5-flash",
1907
1871
  "created_at": null,
1908
1872
  "context_window": 1048576,
1909
1873
  "max_output_tokens": 8192,
@@ -1944,7 +1908,7 @@
1944
1908
  "id": "gemini-1.5-flash-001",
1945
1909
  "name": "Gemini 1.5 Flash",
1946
1910
  "provider": "gemini",
1947
- "family": "models/gemini-1.5-flash",
1911
+ "family": "gemini-1.5-flash",
1948
1912
  "created_at": null,
1949
1913
  "context_window": 1048576,
1950
1914
  "max_output_tokens": 8192,
@@ -1978,7 +1942,7 @@
1978
1942
  "id": "gemini-1.5-flash-002",
1979
1943
  "name": "Gemini 1.5 Flash",
1980
1944
  "provider": "gemini",
1981
- "family": "models/gemini-1.5-flash",
1945
+ "family": "gemini-1.5-flash",
1982
1946
  "created_at": null,
1983
1947
  "context_window": 1048576,
1984
1948
  "max_output_tokens": 8192,
@@ -2020,7 +1984,7 @@
2020
1984
  "id": "gemini-1.5-flash-8b",
2021
1985
  "name": "Gemini 1.5 Flash-8B",
2022
1986
  "provider": "gemini",
2023
- "family": "models/gemini-1.5-flash-8b",
1987
+ "family": "gemini-1.5-flash-8b",
2024
1988
  "created_at": null,
2025
1989
  "context_window": 1048576,
2026
1990
  "max_output_tokens": 8192,
@@ -2062,7 +2026,7 @@
2062
2026
  "id": "gemini-1.5-flash-8b-001",
2063
2027
  "name": "Gemini 1.5 Flash-8B",
2064
2028
  "provider": "gemini",
2065
- "family": "models/gemini-1.5-flash-8b",
2029
+ "family": "gemini-1.5-flash-8b",
2066
2030
  "created_at": null,
2067
2031
  "context_window": 1048576,
2068
2032
  "max_output_tokens": 8192,
@@ -2104,7 +2068,7 @@
2104
2068
  "id": "gemini-1.5-flash-8b-latest",
2105
2069
  "name": "Gemini 1.5 Flash-8B",
2106
2070
  "provider": "gemini",
2107
- "family": "models/gemini-1.5-flash-8b",
2071
+ "family": "gemini-1.5-flash-8b",
2108
2072
  "created_at": null,
2109
2073
  "context_window": 1048576,
2110
2074
  "max_output_tokens": 8192,
@@ -2146,7 +2110,7 @@
2146
2110
  "id": "gemini-1.5-flash-latest",
2147
2111
  "name": "Gemini 1.5 Flash",
2148
2112
  "provider": "gemini",
2149
- "family": "models/gemini-1.5-flash",
2113
+ "family": "gemini-1.5-flash",
2150
2114
  "created_at": null,
2151
2115
  "context_window": 1048576,
2152
2116
  "max_output_tokens": 8192,
@@ -2187,7 +2151,7 @@
2187
2151
  "id": "gemini-1.5-pro",
2188
2152
  "name": "Gemini 1.5 Pro",
2189
2153
  "provider": "gemini",
2190
- "family": "models/gemini-1.5-pro",
2154
+ "family": "gemini-1.5-pro",
2191
2155
  "created_at": null,
2192
2156
  "context_window": 2097152,
2193
2157
  "max_output_tokens": 8192,
@@ -2228,7 +2192,7 @@
2228
2192
  "id": "gemini-1.5-pro-001",
2229
2193
  "name": "Gemini 1.5 Pro",
2230
2194
  "provider": "gemini",
2231
- "family": "models/gemini-1.5-pro",
2195
+ "family": "gemini-1.5-pro",
2232
2196
  "created_at": null,
2233
2197
  "context_window": 2097152,
2234
2198
  "max_output_tokens": 8192,
@@ -2262,7 +2226,7 @@
2262
2226
  "id": "gemini-1.5-pro-002",
2263
2227
  "name": "Gemini 1.5 Pro",
2264
2228
  "provider": "gemini",
2265
- "family": "models/gemini-1.5-pro",
2229
+ "family": "gemini-1.5-pro",
2266
2230
  "created_at": null,
2267
2231
  "context_window": 2097152,
2268
2232
  "max_output_tokens": 8192,
@@ -2304,7 +2268,7 @@
2304
2268
  "id": "gemini-1.5-pro-latest",
2305
2269
  "name": "Gemini 1.5 Pro",
2306
2270
  "provider": "gemini",
2307
- "family": "models/gemini-1.5-pro",
2271
+ "family": "gemini-1.5-pro",
2308
2272
  "created_at": null,
2309
2273
  "context_window": 2097152,
2310
2274
  "max_output_tokens": 8192,
@@ -2345,7 +2309,7 @@
2345
2309
  "id": "gemini-2.0-flash",
2346
2310
  "name": "Gemini 2.0 Flash",
2347
2311
  "provider": "gemini",
2348
- "family": "models/gemini-2.0-flash",
2312
+ "family": "gemini-2.0-flash",
2349
2313
  "created_at": null,
2350
2314
  "context_window": 1048576,
2351
2315
  "max_output_tokens": 8192,
@@ -2393,7 +2357,7 @@
2393
2357
  "id": "gemini-2.0-flash-001",
2394
2358
  "name": "Gemini 2.0 Flash",
2395
2359
  "provider": "gemini",
2396
- "family": "models/gemini-2.0-flash",
2360
+ "family": "gemini-2.0-flash",
2397
2361
  "created_at": null,
2398
2362
  "context_window": 1048576,
2399
2363
  "max_output_tokens": 8192,
@@ -2441,7 +2405,7 @@
2441
2405
  "id": "gemini-2.0-flash-exp",
2442
2406
  "name": "Gemini 2.0 Flash",
2443
2407
  "provider": "gemini",
2444
- "family": "models/gemini-2.0-flash",
2408
+ "family": "gemini-2.0-flash",
2445
2409
  "created_at": null,
2446
2410
  "context_window": 1048576,
2447
2411
  "max_output_tokens": 8192,
@@ -2488,7 +2452,7 @@
2488
2452
  "id": "gemini-2.0-flash-lite",
2489
2453
  "name": "Gemini 2.0 Flash-Lite",
2490
2454
  "provider": "gemini",
2491
- "family": "models/gemini-2.0-flash-lite",
2455
+ "family": "gemini-2.0-flash-lite",
2492
2456
  "created_at": null,
2493
2457
  "context_window": 1048576,
2494
2458
  "max_output_tokens": 8192,
@@ -2536,7 +2500,7 @@
2536
2500
  "id": "gemini-2.0-flash-lite-001",
2537
2501
  "name": "Gemini 2.0 Flash-Lite",
2538
2502
  "provider": "gemini",
2539
- "family": "models/gemini-2.0-flash-lite",
2503
+ "family": "gemini-2.0-flash-lite",
2540
2504
  "created_at": null,
2541
2505
  "context_window": 1048576,
2542
2506
  "max_output_tokens": 8192,
@@ -2678,7 +2642,7 @@
2678
2642
  "id": "gemini-2.0-flash-live-001",
2679
2643
  "name": "Gemini 2.0 Flash Live",
2680
2644
  "provider": "gemini",
2681
- "family": "models/gemini-2.0-flash-live-001",
2645
+ "family": "gemini-2.0-flash-live-001",
2682
2646
  "created_at": null,
2683
2647
  "context_window": 1048576,
2684
2648
  "max_output_tokens": 8192,
@@ -2712,7 +2676,7 @@
2712
2676
  "id": "gemini-2.0-flash-preview-image-generation",
2713
2677
  "name": "Gemini 2.0 Flash Preview Image Generation",
2714
2678
  "provider": "gemini",
2715
- "family": "models/gemini-2.0-flash-preview-image-generation",
2679
+ "family": "gemini-2.0-flash-preview-image-generation",
2716
2680
  "created_at": null,
2717
2681
  "context_window": 32000,
2718
2682
  "max_output_tokens": 8192,
@@ -2999,7 +2963,7 @@
2999
2963
  "id": "gemini-2.5-flash",
3000
2964
  "name": "Gemini 2.5 Flash",
3001
2965
  "provider": "gemini",
3002
- "family": "models/gemini-2.5-flash",
2966
+ "family": "gemini-2.5-flash",
3003
2967
  "created_at": null,
3004
2968
  "context_window": 1048576,
3005
2969
  "max_output_tokens": 65536,
@@ -3047,7 +3011,7 @@
3047
3011
  "id": "gemini-2.5-flash-exp-native-audio-thinking-dialog",
3048
3012
  "name": "Gemini 2.5 Flash Native Audio",
3049
3013
  "provider": "gemini",
3050
- "family": "models/gemini-2.5-flash-preview-native-audio-dialog",
3014
+ "family": "gemini-2.5-flash-preview-native-audio-dialog",
3051
3015
  "created_at": null,
3052
3016
  "context_window": 128000,
3053
3017
  "max_output_tokens": 8000,
@@ -3080,7 +3044,7 @@
3080
3044
  "id": "gemini-2.5-flash-image-preview",
3081
3045
  "name": "Gemini 2.5 Flash Image Preview",
3082
3046
  "provider": "gemini",
3083
- "family": "models/gemini-2.5-flash-image-preview",
3047
+ "family": "gemini-2.5-flash-image-preview",
3084
3048
  "created_at": null,
3085
3049
  "context_window": 32768,
3086
3050
  "max_output_tokens": 32768,
@@ -3125,7 +3089,7 @@
3125
3089
  "id": "gemini-2.5-flash-lite",
3126
3090
  "name": "Gemini 2.5 Flash-Lite",
3127
3091
  "provider": "gemini",
3128
- "family": "models/gemini-2.5-flash-lite",
3092
+ "family": "gemini-2.5-flash-lite",
3129
3093
  "created_at": null,
3130
3094
  "context_window": 1048576,
3131
3095
  "max_output_tokens": 65536,
@@ -3173,7 +3137,7 @@
3173
3137
  "id": "gemini-2.5-flash-lite-06-17",
3174
3138
  "name": "Gemini 2.5 Flash-Lite",
3175
3139
  "provider": "gemini",
3176
- "family": "models/gemini-2.5-flash-lite",
3140
+ "family": "gemini-2.5-flash-lite",
3177
3141
  "created_at": null,
3178
3142
  "context_window": 1048576,
3179
3143
  "max_output_tokens": 65536,
@@ -3260,7 +3224,7 @@
3260
3224
  "id": "gemini-2.5-flash-preview-05-20",
3261
3225
  "name": "Gemini 2.5 Flash",
3262
3226
  "provider": "gemini",
3263
- "family": "models/gemini-2.5-flash",
3227
+ "family": "gemini-2.5-flash",
3264
3228
  "created_at": null,
3265
3229
  "context_window": 1048576,
3266
3230
  "max_output_tokens": 65536,
@@ -3308,7 +3272,7 @@
3308
3272
  "id": "gemini-2.5-flash-preview-native-audio-dialog",
3309
3273
  "name": "Gemini 2.5 Flash Native Audio",
3310
3274
  "provider": "gemini",
3311
- "family": "models/gemini-2.5-flash-preview-native-audio-dialog",
3275
+ "family": "gemini-2.5-flash-preview-native-audio-dialog",
3312
3276
  "created_at": null,
3313
3277
  "context_window": 128000,
3314
3278
  "max_output_tokens": 8000,
@@ -3341,7 +3305,7 @@
3341
3305
  "id": "gemini-2.5-flash-preview-tts",
3342
3306
  "name": "Gemini 2.5 Flash Preview TTS",
3343
3307
  "provider": "gemini",
3344
- "family": "models/gemini-2.5-flash-preview-tts",
3308
+ "family": "gemini-2.5-flash-preview-tts",
3345
3309
  "created_at": null,
3346
3310
  "context_window": 8000,
3347
3311
  "max_output_tokens": 16000,
@@ -3383,7 +3347,7 @@
3383
3347
  "id": "gemini-2.5-pro",
3384
3348
  "name": "Gemini 2.5 Pro",
3385
3349
  "provider": "gemini",
3386
- "family": "models/gemini-2.5-pro",
3350
+ "family": "gemini-2.5-pro",
3387
3351
  "created_at": null,
3388
3352
  "context_window": 1048576,
3389
3353
  "max_output_tokens": 65536,
@@ -3578,7 +3542,7 @@
3578
3542
  "id": "gemini-2.5-pro-preview-tts",
3579
3543
  "name": "Gemini 2.5 Pro Preview TTS",
3580
3544
  "provider": "gemini",
3581
- "family": "models/gemini-2.5-pro-preview-tts",
3545
+ "family": "gemini-2.5-pro-preview-tts",
3582
3546
  "created_at": null,
3583
3547
  "context_window": 8000,
3584
3548
  "max_output_tokens": 16000,
@@ -3820,7 +3784,7 @@
3820
3784
  "id": "gemini-live-2.5-flash-preview",
3821
3785
  "name": "Gemini 2.5 Flash Live",
3822
3786
  "provider": "gemini",
3823
- "family": "models/gemini-live-2.5-flash-preview",
3787
+ "family": "gemini-live-2.5-flash-preview",
3824
3788
  "created_at": null,
3825
3789
  "context_window": 1048576,
3826
3790
  "max_output_tokens": 8192,
@@ -4685,6 +4649,36 @@
4685
4649
  "owned_by": "mistralai"
4686
4650
  }
4687
4651
  },
4652
+ {
4653
+ "id": "magistral-medium-2509",
4654
+ "name": "Magistral Medium 2509",
4655
+ "provider": "mistral",
4656
+ "family": "mistral",
4657
+ "created_at": null,
4658
+ "context_window": 32768,
4659
+ "max_output_tokens": 8192,
4660
+ "knowledge_cutoff": null,
4661
+ "modalities": {
4662
+ "input": [
4663
+ "text"
4664
+ ],
4665
+ "output": [
4666
+ "text"
4667
+ ]
4668
+ },
4669
+ "capabilities": [
4670
+ "streaming",
4671
+ "function_calling",
4672
+ "structured_output",
4673
+ "reasoning",
4674
+ "batch"
4675
+ ],
4676
+ "pricing": {},
4677
+ "metadata": {
4678
+ "object": "model",
4679
+ "owned_by": "mistralai"
4680
+ }
4681
+ },
4688
4682
  {
4689
4683
  "id": "magistral-medium-latest",
4690
4684
  "name": "Magistral Medium Latest",
@@ -4775,6 +4769,36 @@
4775
4769
  "owned_by": "mistralai"
4776
4770
  }
4777
4771
  },
4772
+ {
4773
+ "id": "magistral-small-2509",
4774
+ "name": "Magistral Small 2509",
4775
+ "provider": "mistral",
4776
+ "family": "mistral",
4777
+ "created_at": null,
4778
+ "context_window": 32768,
4779
+ "max_output_tokens": 8192,
4780
+ "knowledge_cutoff": null,
4781
+ "modalities": {
4782
+ "input": [
4783
+ "text"
4784
+ ],
4785
+ "output": [
4786
+ "text"
4787
+ ]
4788
+ },
4789
+ "capabilities": [
4790
+ "streaming",
4791
+ "function_calling",
4792
+ "structured_output",
4793
+ "reasoning",
4794
+ "batch"
4795
+ ],
4796
+ "pricing": {},
4797
+ "metadata": {
4798
+ "object": "model",
4799
+ "owned_by": "mistralai"
4800
+ }
4801
+ },
4778
4802
  {
4779
4803
  "id": "magistral-small-latest",
4780
4804
  "name": "Magistral Small Latest",
@@ -7946,14 +7970,16 @@
7946
7970
  "id": "gpt-5",
7947
7971
  "name": "GPT-5",
7948
7972
  "provider": "openai",
7949
- "family": "other",
7973
+ "family": "gpt5",
7950
7974
  "created_at": "2025-08-05 22:29:37 +0200",
7951
- "context_window": 4096,
7952
- "max_output_tokens": 16384,
7975
+ "context_window": 128000,
7976
+ "max_output_tokens": 400000,
7953
7977
  "knowledge_cutoff": null,
7954
7978
  "modalities": {
7955
7979
  "input": [
7956
- "text"
7980
+ "text",
7981
+ "image",
7982
+ "pdf"
7957
7983
  ],
7958
7984
  "output": [
7959
7985
  "text"
@@ -7961,13 +7987,16 @@
7961
7987
  },
7962
7988
  "capabilities": [
7963
7989
  "streaming",
7990
+ "function_calling",
7991
+ "structured_output",
7964
7992
  "reasoning"
7965
7993
  ],
7966
7994
  "pricing": {
7967
7995
  "text_tokens": {
7968
7996
  "standard": {
7969
- "input_per_million": 0.5,
7970
- "output_per_million": 1.5
7997
+ "input_per_million": 1.25,
7998
+ "output_per_million": 10.0,
7999
+ "cached_input_per_million": 0.125
7971
8000
  }
7972
8001
  }
7973
8002
  },
@@ -7980,14 +8009,16 @@
7980
8009
  "id": "gpt-5-2025-08-07",
7981
8010
  "name": "GPT-5 20250807",
7982
8011
  "provider": "openai",
7983
- "family": "other",
8012
+ "family": "gpt5",
7984
8013
  "created_at": "2025-08-01 21:09:20 +0200",
7985
- "context_window": 4096,
7986
- "max_output_tokens": 16384,
8014
+ "context_window": 128000,
8015
+ "max_output_tokens": 400000,
7987
8016
  "knowledge_cutoff": null,
7988
8017
  "modalities": {
7989
8018
  "input": [
7990
- "text"
8019
+ "text",
8020
+ "image",
8021
+ "pdf"
7991
8022
  ],
7992
8023
  "output": [
7993
8024
  "text"
@@ -7995,13 +8026,16 @@
7995
8026
  },
7996
8027
  "capabilities": [
7997
8028
  "streaming",
8029
+ "function_calling",
8030
+ "structured_output",
7998
8031
  "reasoning"
7999
8032
  ],
8000
8033
  "pricing": {
8001
8034
  "text_tokens": {
8002
8035
  "standard": {
8003
- "input_per_million": 0.5,
8004
- "output_per_million": 1.5
8036
+ "input_per_million": 1.25,
8037
+ "output_per_million": 10.0,
8038
+ "cached_input_per_million": 0.125
8005
8039
  }
8006
8040
  }
8007
8041
  },
@@ -8014,14 +8048,16 @@
8014
8048
  "id": "gpt-5-chat-latest",
8015
8049
  "name": "GPT-5 Chat Latest",
8016
8050
  "provider": "openai",
8017
- "family": "other",
8051
+ "family": "gpt5",
8018
8052
  "created_at": "2025-08-01 20:35:06 +0200",
8019
- "context_window": 4096,
8020
- "max_output_tokens": 16384,
8053
+ "context_window": 128000,
8054
+ "max_output_tokens": 400000,
8021
8055
  "knowledge_cutoff": null,
8022
8056
  "modalities": {
8023
8057
  "input": [
8024
- "text"
8058
+ "text",
8059
+ "image",
8060
+ "pdf"
8025
8061
  ],
8026
8062
  "output": [
8027
8063
  "text"
@@ -8029,13 +8065,16 @@
8029
8065
  },
8030
8066
  "capabilities": [
8031
8067
  "streaming",
8068
+ "function_calling",
8069
+ "structured_output",
8032
8070
  "reasoning"
8033
8071
  ],
8034
8072
  "pricing": {
8035
8073
  "text_tokens": {
8036
8074
  "standard": {
8037
- "input_per_million": 0.5,
8038
- "output_per_million": 1.5
8075
+ "input_per_million": 1.25,
8076
+ "output_per_million": 10.0,
8077
+ "cached_input_per_million": 0.125
8039
8078
  }
8040
8079
  }
8041
8080
  },
@@ -8048,14 +8087,16 @@
8048
8087
  "id": "gpt-5-mini",
8049
8088
  "name": "GPT-5 Mini",
8050
8089
  "provider": "openai",
8051
- "family": "other",
8090
+ "family": "gpt5",
8052
8091
  "created_at": "2025-08-05 22:32:08 +0200",
8053
- "context_window": 4096,
8054
- "max_output_tokens": 16384,
8092
+ "context_window": 128000,
8093
+ "max_output_tokens": 400000,
8055
8094
  "knowledge_cutoff": null,
8056
8095
  "modalities": {
8057
8096
  "input": [
8058
- "text"
8097
+ "text",
8098
+ "image",
8099
+ "pdf"
8059
8100
  ],
8060
8101
  "output": [
8061
8102
  "text"
@@ -8063,13 +8104,16 @@
8063
8104
  },
8064
8105
  "capabilities": [
8065
8106
  "streaming",
8107
+ "function_calling",
8108
+ "structured_output",
8066
8109
  "reasoning"
8067
8110
  ],
8068
8111
  "pricing": {
8069
8112
  "text_tokens": {
8070
8113
  "standard": {
8071
- "input_per_million": 0.5,
8072
- "output_per_million": 1.5
8114
+ "input_per_million": 1.25,
8115
+ "output_per_million": 10.0,
8116
+ "cached_input_per_million": 0.125
8073
8117
  }
8074
8118
  }
8075
8119
  },
@@ -8082,14 +8126,16 @@
8082
8126
  "id": "gpt-5-mini-2025-08-07",
8083
8127
  "name": "GPT-5 Mini 20250807",
8084
8128
  "provider": "openai",
8085
- "family": "other",
8129
+ "family": "gpt5",
8086
8130
  "created_at": "2025-08-05 22:31:07 +0200",
8087
- "context_window": 4096,
8088
- "max_output_tokens": 16384,
8131
+ "context_window": 128000,
8132
+ "max_output_tokens": 400000,
8089
8133
  "knowledge_cutoff": null,
8090
8134
  "modalities": {
8091
8135
  "input": [
8092
- "text"
8136
+ "text",
8137
+ "image",
8138
+ "pdf"
8093
8139
  ],
8094
8140
  "output": [
8095
8141
  "text"
@@ -8097,13 +8143,16 @@
8097
8143
  },
8098
8144
  "capabilities": [
8099
8145
  "streaming",
8146
+ "function_calling",
8147
+ "structured_output",
8100
8148
  "reasoning"
8101
8149
  ],
8102
8150
  "pricing": {
8103
8151
  "text_tokens": {
8104
8152
  "standard": {
8105
- "input_per_million": 0.5,
8106
- "output_per_million": 1.5
8153
+ "input_per_million": 1.25,
8154
+ "output_per_million": 10.0,
8155
+ "cached_input_per_million": 0.125
8107
8156
  }
8108
8157
  }
8109
8158
  },
@@ -8116,14 +8165,16 @@
8116
8165
  "id": "gpt-5-nano",
8117
8166
  "name": "GPT-5 Nano",
8118
8167
  "provider": "openai",
8119
- "family": "other",
8168
+ "family": "gpt5",
8120
8169
  "created_at": "2025-08-05 22:39:44 +0200",
8121
- "context_window": 4096,
8122
- "max_output_tokens": 16384,
8170
+ "context_window": 128000,
8171
+ "max_output_tokens": 400000,
8123
8172
  "knowledge_cutoff": null,
8124
8173
  "modalities": {
8125
8174
  "input": [
8126
- "text"
8175
+ "text",
8176
+ "image",
8177
+ "pdf"
8127
8178
  ],
8128
8179
  "output": [
8129
8180
  "text"
@@ -8131,13 +8182,16 @@
8131
8182
  },
8132
8183
  "capabilities": [
8133
8184
  "streaming",
8185
+ "function_calling",
8186
+ "structured_output",
8134
8187
  "reasoning"
8135
8188
  ],
8136
8189
  "pricing": {
8137
8190
  "text_tokens": {
8138
8191
  "standard": {
8139
- "input_per_million": 0.5,
8140
- "output_per_million": 1.5
8192
+ "input_per_million": 1.25,
8193
+ "output_per_million": 10.0,
8194
+ "cached_input_per_million": 0.125
8141
8195
  }
8142
8196
  }
8143
8197
  },
@@ -8150,14 +8204,16 @@
8150
8204
  "id": "gpt-5-nano-2025-08-07",
8151
8205
  "name": "GPT-5 Nano 20250807",
8152
8206
  "provider": "openai",
8153
- "family": "other",
8207
+ "family": "gpt5",
8154
8208
  "created_at": "2025-08-05 22:38:23 +0200",
8155
- "context_window": 4096,
8156
- "max_output_tokens": 16384,
8209
+ "context_window": 128000,
8210
+ "max_output_tokens": 400000,
8157
8211
  "knowledge_cutoff": null,
8158
8212
  "modalities": {
8159
8213
  "input": [
8160
- "text"
8214
+ "text",
8215
+ "image",
8216
+ "pdf"
8161
8217
  ],
8162
8218
  "output": [
8163
8219
  "text"
@@ -8165,13 +8221,16 @@
8165
8221
  },
8166
8222
  "capabilities": [
8167
8223
  "streaming",
8224
+ "function_calling",
8225
+ "structured_output",
8168
8226
  "reasoning"
8169
8227
  ],
8170
8228
  "pricing": {
8171
8229
  "text_tokens": {
8172
8230
  "standard": {
8173
- "input_per_million": 0.5,
8174
- "output_per_million": 1.5
8231
+ "input_per_million": 1.25,
8232
+ "output_per_million": 10.0,
8233
+ "cached_input_per_million": 0.125
8175
8234
  }
8176
8235
  }
8177
8236
  },
@@ -9798,6 +9857,68 @@
9798
9857
  ]
9799
9858
  }
9800
9859
  },
9860
+ {
9861
+ "id": "alibaba/tongyi-deepresearch-30b-a3b",
9862
+ "name": "Tongyi DeepResearch 30B A3B",
9863
+ "provider": "openrouter",
9864
+ "family": "alibaba",
9865
+ "created_at": "2025-09-18 17:53:24 +0200",
9866
+ "context_window": 131072,
9867
+ "max_output_tokens": 131072,
9868
+ "knowledge_cutoff": null,
9869
+ "modalities": {
9870
+ "input": [
9871
+ "text"
9872
+ ],
9873
+ "output": [
9874
+ "text"
9875
+ ]
9876
+ },
9877
+ "capabilities": [
9878
+ "streaming",
9879
+ "function_calling",
9880
+ "structured_output"
9881
+ ],
9882
+ "pricing": {
9883
+ "text_tokens": {
9884
+ "standard": {
9885
+ "input_per_million": 0.09,
9886
+ "output_per_million": 0.44999999999999996
9887
+ }
9888
+ }
9889
+ },
9890
+ "metadata": {
9891
+ "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.",
9892
+ "architecture": {
9893
+ "modality": "text->text",
9894
+ "input_modalities": [
9895
+ "text"
9896
+ ],
9897
+ "output_modalities": [
9898
+ "text"
9899
+ ],
9900
+ "tokenizer": "Other",
9901
+ "instruct_type": null
9902
+ },
9903
+ "top_provider": {
9904
+ "context_length": 131072,
9905
+ "max_completion_tokens": 131072,
9906
+ "is_moderated": false
9907
+ },
9908
+ "per_request_limits": null,
9909
+ "supported_parameters": [
9910
+ "include_reasoning",
9911
+ "max_tokens",
9912
+ "reasoning",
9913
+ "response_format",
9914
+ "structured_outputs",
9915
+ "temperature",
9916
+ "tool_choice",
9917
+ "tools",
9918
+ "top_p"
9919
+ ]
9920
+ }
9921
+ },
9801
9922
  {
9802
9923
  "id": "allenai/molmo-7b-d",
9803
9924
  "name": "AllenAI: Molmo 7B D",
@@ -10932,7 +11053,9 @@
10932
11053
  "stop",
10933
11054
  "temperature",
10934
11055
  "tool_choice",
10935
- "tools"
11056
+ "tools",
11057
+ "top_k",
11058
+ "top_p"
10936
11059
  ]
10937
11060
  }
10938
11061
  },
@@ -11003,12 +11126,77 @@
11003
11126
  }
11004
11127
  },
11005
11128
  {
11006
- "id": "arcee-ai/coder-large",
11007
- "name": "Arcee AI: Coder Large",
11129
+ "id": "arcee-ai/afm-4.5b",
11130
+ "name": "Arcee AI: AFM 4.5B",
11008
11131
  "provider": "openrouter",
11009
11132
  "family": "arcee-ai",
11010
- "created_at": "2025-05-05 22:57:43 +0200",
11011
- "context_window": 32768,
11133
+ "created_at": "2025-09-16 18:34:44 +0200",
11134
+ "context_window": 65536,
11135
+ "max_output_tokens": null,
11136
+ "knowledge_cutoff": null,
11137
+ "modalities": {
11138
+ "input": [
11139
+ "text"
11140
+ ],
11141
+ "output": [
11142
+ "text"
11143
+ ]
11144
+ },
11145
+ "capabilities": [
11146
+ "streaming",
11147
+ "structured_output",
11148
+ "predicted_outputs"
11149
+ ],
11150
+ "pricing": {
11151
+ "text_tokens": {
11152
+ "standard": {
11153
+ "input_per_million": 0.09999999999999999,
11154
+ "output_per_million": 0.39999999999999997
11155
+ }
11156
+ }
11157
+ },
11158
+ "metadata": {
11159
+ "description": "AFM-4.5B is a 4.5 billion parameter instruction-tuned language model developed by Arcee AI. The model was pretrained on approximately 8 trillion tokens, including 6.5 trillion tokens of general data and 1.5 trillion tokens with an emphasis on mathematical reasoning and code generation. ",
11160
+ "architecture": {
11161
+ "modality": "text->text",
11162
+ "input_modalities": [
11163
+ "text"
11164
+ ],
11165
+ "output_modalities": [
11166
+ "text"
11167
+ ],
11168
+ "tokenizer": "Other",
11169
+ "instruct_type": null
11170
+ },
11171
+ "top_provider": {
11172
+ "context_length": 65536,
11173
+ "max_completion_tokens": null,
11174
+ "is_moderated": false
11175
+ },
11176
+ "per_request_limits": null,
11177
+ "supported_parameters": [
11178
+ "frequency_penalty",
11179
+ "logit_bias",
11180
+ "max_tokens",
11181
+ "min_p",
11182
+ "presence_penalty",
11183
+ "repetition_penalty",
11184
+ "response_format",
11185
+ "stop",
11186
+ "structured_outputs",
11187
+ "temperature",
11188
+ "top_k",
11189
+ "top_p"
11190
+ ]
11191
+ }
11192
+ },
11193
+ {
11194
+ "id": "arcee-ai/coder-large",
11195
+ "name": "Arcee AI: Coder Large",
11196
+ "provider": "openrouter",
11197
+ "family": "arcee-ai",
11198
+ "created_at": "2025-05-05 22:57:43 +0200",
11199
+ "context_window": 32768,
11012
11200
  "max_output_tokens": null,
11013
11201
  "knowledge_cutoff": null,
11014
11202
  "modalities": {
@@ -11279,8 +11467,8 @@
11279
11467
  "pricing": {
11280
11468
  "text_tokens": {
11281
11469
  "standard": {
11282
- "input_per_million": 0.017934774,
11283
- "output_per_million": 0.07173912240000001
11470
+ "input_per_million": 0.02,
11471
+ "output_per_million": 0.07
11284
11472
  }
11285
11473
  }
11286
11474
  },
@@ -11504,6 +11692,7 @@
11504
11692
  "response_format",
11505
11693
  "seed",
11506
11694
  "stop",
11695
+ "structured_outputs",
11507
11696
  "temperature",
11508
11697
  "top_k",
11509
11698
  "top_p"
@@ -11668,8 +11857,8 @@
11668
11857
  "pricing": {
11669
11858
  "text_tokens": {
11670
11859
  "standard": {
11671
- "input_per_million": 0.2006688,
11672
- "output_per_million": 0.80267549538462
11860
+ "input_per_million": 0.16,
11861
+ "output_per_million": 0.65
11673
11862
  }
11674
11863
  }
11675
11864
  },
@@ -11830,69 +12019,6 @@
11830
12019
  ]
11831
12020
  }
11832
12021
  },
11833
- {
11834
- "id": "cognitivecomputations/dolphin-mixtral-8x22b",
11835
- "name": "Dolphin 2.9.2 Mixtral 8x22B 🐬",
11836
- "provider": "openrouter",
11837
- "family": "cognitivecomputations",
11838
- "created_at": "2024-06-08 02:00:00 +0200",
11839
- "context_window": 16000,
11840
- "max_output_tokens": 8192,
11841
- "knowledge_cutoff": null,
11842
- "modalities": {
11843
- "input": [
11844
- "text"
11845
- ],
11846
- "output": [
11847
- "text"
11848
- ]
11849
- },
11850
- "capabilities": [
11851
- "streaming",
11852
- "predicted_outputs"
11853
- ],
11854
- "pricing": {
11855
- "text_tokens": {
11856
- "standard": {
11857
- "input_per_million": 0.8999999999999999,
11858
- "output_per_million": 0.8999999999999999
11859
- }
11860
- }
11861
- },
11862
- "metadata": {
11863
- "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/models/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.\n\nThis model is a successor to [Dolphin Mixtral 8x7B](/models/cognitivecomputations/dolphin-mixtral-8x7b).\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored",
11864
- "architecture": {
11865
- "modality": "text->text",
11866
- "input_modalities": [
11867
- "text"
11868
- ],
11869
- "output_modalities": [
11870
- "text"
11871
- ],
11872
- "tokenizer": "Mistral",
11873
- "instruct_type": "chatml"
11874
- },
11875
- "top_provider": {
11876
- "context_length": 16000,
11877
- "max_completion_tokens": 8192,
11878
- "is_moderated": false
11879
- },
11880
- "per_request_limits": null,
11881
- "supported_parameters": [
11882
- "frequency_penalty",
11883
- "logit_bias",
11884
- "max_tokens",
11885
- "min_p",
11886
- "presence_penalty",
11887
- "repetition_penalty",
11888
- "seed",
11889
- "stop",
11890
- "temperature",
11891
- "top_k",
11892
- "top_p"
11893
- ]
11894
- }
11895
- },
11896
12022
  {
11897
12023
  "id": "cognitivecomputations/dolphin3.0-mistral-24b",
11898
12024
  "name": "Dolphin3.0 Mistral 24B",
@@ -11917,8 +12043,8 @@
11917
12043
  "pricing": {
11918
12044
  "text_tokens": {
11919
12045
  "standard": {
11920
- "input_per_million": 0.0271739,
11921
- "output_per_million": 0.10869564
12046
+ "input_per_million": 0.03,
12047
+ "output_per_million": 0.11
11922
12048
  }
11923
12049
  }
11924
12050
  },
@@ -12040,8 +12166,8 @@
12040
12166
  "pricing": {
12041
12167
  "text_tokens": {
12042
12168
  "standard": {
12043
- "input_per_million": 0.01333333333333,
12044
- "output_per_million": 0.0347826048
12169
+ "input_per_million": 0.01,
12170
+ "output_per_million": 0.03
12045
12171
  }
12046
12172
  }
12047
12173
  },
@@ -13128,7 +13254,7 @@
13128
13254
  "provider": "openrouter",
13129
13255
  "family": "deepseek",
13130
13256
  "created_at": "2025-08-21 14:33:48 +0200",
13131
- "context_window": 64000,
13257
+ "context_window": 163840,
13132
13258
  "max_output_tokens": null,
13133
13259
  "knowledge_cutoff": null,
13134
13260
  "modalities": {
@@ -13159,9 +13285,9 @@
13159
13285
  "instruct_type": "deepseek-v3.1"
13160
13286
  },
13161
13287
  "top_provider": {
13162
- "context_length": 64000,
13288
+ "context_length": 163840,
13163
13289
  "max_completion_tokens": null,
13164
- "is_moderated": true
13290
+ "is_moderated": false
13165
13291
  },
13166
13292
  "per_request_limits": null,
13167
13293
  "supported_parameters": [
@@ -13345,8 +13471,8 @@
13345
13471
  "pricing": {
13346
13472
  "text_tokens": {
13347
13473
  "standard": {
13348
- "input_per_million": 0.24999987999999998,
13349
- "output_per_million": 0.999999888
13474
+ "input_per_million": 0.39999999999999997,
13475
+ "output_per_million": 1.75
13350
13476
  }
13351
13477
  }
13352
13478
  },
@@ -13416,8 +13542,8 @@
13416
13542
  "pricing": {
13417
13543
  "text_tokens": {
13418
13544
  "standard": {
13419
- "input_per_million": 0.013043472,
13420
- "output_per_million": 0.0521739072
13545
+ "input_per_million": 0.01,
13546
+ "output_per_million": 0.049999999999999996
13421
13547
  }
13422
13548
  }
13423
13549
  },
@@ -13605,8 +13731,8 @@
13605
13731
  "pricing": {
13606
13732
  "text_tokens": {
13607
13733
  "standard": {
13608
- "input_per_million": 0.03260868,
13609
- "output_per_million": 0.130434768
13734
+ "input_per_million": 0.03,
13735
+ "output_per_million": 0.13
13610
13736
  }
13611
13737
  }
13612
13738
  },
@@ -13782,8 +13908,8 @@
13782
13908
  "provider": "openrouter",
13783
13909
  "family": "deepseek",
13784
13910
  "created_at": "2025-01-30 00:39:00 +0100",
13785
- "context_window": 64000,
13786
- "max_output_tokens": 32000,
13911
+ "context_window": 32768,
13912
+ "max_output_tokens": 16384,
13787
13913
  "knowledge_cutoff": null,
13788
13914
  "modalities": {
13789
13915
  "input": [
@@ -13819,66 +13945,8 @@
13819
13945
  "instruct_type": "deepseek-r1"
13820
13946
  },
13821
13947
  "top_provider": {
13822
- "context_length": 64000,
13823
- "max_completion_tokens": 32000,
13824
- "is_moderated": false
13825
- },
13826
- "per_request_limits": null,
13827
- "supported_parameters": [
13828
- "frequency_penalty",
13829
- "include_reasoning",
13830
- "logit_bias",
13831
- "max_tokens",
13832
- "min_p",
13833
- "presence_penalty",
13834
- "reasoning",
13835
- "repetition_penalty",
13836
- "seed",
13837
- "stop",
13838
- "temperature",
13839
- "top_k",
13840
- "top_p"
13841
- ]
13842
- }
13843
- },
13844
- {
13845
- "id": "deepseek/deepseek-r1-distill-qwen-14b:free",
13846
- "name": "DeepSeek: R1 Distill Qwen 14B (free)",
13847
- "provider": "openrouter",
13848
- "family": "deepseek",
13849
- "created_at": "2025-01-30 00:39:00 +0100",
13850
- "context_window": 64000,
13851
- "max_output_tokens": null,
13852
- "knowledge_cutoff": null,
13853
- "modalities": {
13854
- "input": [
13855
- "text"
13856
- ],
13857
- "output": [
13858
- "text"
13859
- ]
13860
- },
13861
- "capabilities": [
13862
- "streaming",
13863
- "predicted_outputs"
13864
- ],
13865
- "pricing": {},
13866
- "metadata": {
13867
- "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 69.7\n- MATH-500 pass@1: 93.9\n- CodeForces Rating: 1481\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.",
13868
- "architecture": {
13869
- "modality": "text->text",
13870
- "input_modalities": [
13871
- "text"
13872
- ],
13873
- "output_modalities": [
13874
- "text"
13875
- ],
13876
- "tokenizer": "Qwen",
13877
- "instruct_type": "deepseek-r1"
13878
- },
13879
- "top_provider": {
13880
- "context_length": 64000,
13881
- "max_completion_tokens": null,
13948
+ "context_length": 32768,
13949
+ "max_completion_tokens": 16384,
13882
13950
  "is_moderated": false
13883
13951
  },
13884
13952
  "per_request_limits": null,
@@ -13886,7 +13954,6 @@
13886
13954
  "frequency_penalty",
13887
13955
  "include_reasoning",
13888
13956
  "logit_bias",
13889
- "logprobs",
13890
13957
  "max_tokens",
13891
13958
  "min_p",
13892
13959
  "presence_penalty",
@@ -13896,7 +13963,6 @@
13896
13963
  "stop",
13897
13964
  "temperature",
13898
13965
  "top_k",
13899
- "top_logprobs",
13900
13966
  "top_p"
13901
13967
  ]
13902
13968
  }
@@ -15108,8 +15174,8 @@
15108
15174
  "pricing": {
15109
15175
  "text_tokens": {
15110
15176
  "standard": {
15111
- "input_per_million": 0.02,
15112
- "output_per_million": 0.035869561200000004
15177
+ "input_per_million": 0.01,
15178
+ "output_per_million": 0.02
15113
15179
  }
15114
15180
  }
15115
15181
  },
@@ -15234,8 +15300,8 @@
15234
15300
  "pricing": {
15235
15301
  "text_tokens": {
15236
15302
  "standard": {
15237
- "input_per_million": 0.035326069999999994,
15238
- "output_per_million": 0.141304332
15303
+ "input_per_million": 0.04,
15304
+ "output_per_million": 0.14
15239
15305
  }
15240
15306
  }
15241
15307
  },
@@ -15363,8 +15429,8 @@
15363
15429
  "pricing": {
15364
15430
  "text_tokens": {
15365
15431
  "standard": {
15366
- "input_per_million": 0.06521736,
15367
- "output_per_million": 0.260869536
15432
+ "input_per_million": 0.07,
15433
+ "output_per_million": 0.26
15368
15434
  }
15369
15435
  }
15370
15436
  },
@@ -16260,7 +16326,7 @@
16260
16326
  "family": "meituan",
16261
16327
  "created_at": "2025-09-09 16:20:58 +0200",
16262
16328
  "context_window": 131072,
16263
- "max_output_tokens": 131072,
16329
+ "max_output_tokens": null,
16264
16330
  "knowledge_cutoff": null,
16265
16331
  "modalities": {
16266
16332
  "input": [
@@ -16278,8 +16344,8 @@
16278
16344
  "pricing": {
16279
16345
  "text_tokens": {
16280
16346
  "standard": {
16281
- "input_per_million": 0.15,
16282
- "output_per_million": 0.75
16347
+ "input_per_million": 0.12,
16348
+ "output_per_million": 0.6
16283
16349
  }
16284
16350
  }
16285
16351
  },
@@ -16298,7 +16364,7 @@
16298
16364
  },
16299
16365
  "top_provider": {
16300
16366
  "context_length": 131072,
16301
- "max_completion_tokens": 131072,
16367
+ "max_completion_tokens": null,
16302
16368
  "is_moderated": false
16303
16369
  },
16304
16370
  "per_request_limits": null,
@@ -16726,7 +16792,7 @@
16726
16792
  "provider": "openrouter",
16727
16793
  "family": "meta-llama",
16728
16794
  "created_at": "2024-07-23 02:00:00 +0200",
16729
- "context_window": 131072,
16795
+ "context_window": 16384,
16730
16796
  "max_output_tokens": 16384,
16731
16797
  "knowledge_cutoff": null,
16732
16798
  "modalities": {
@@ -16746,8 +16812,8 @@
16746
16812
  "pricing": {
16747
16813
  "text_tokens": {
16748
16814
  "standard": {
16749
- "input_per_million": 0.015,
16750
- "output_per_million": 0.02
16815
+ "input_per_million": 0.02,
16816
+ "output_per_million": 0.03
16751
16817
  }
16752
16818
  }
16753
16819
  },
@@ -16765,7 +16831,7 @@
16765
16831
  "instruct_type": "llama3"
16766
16832
  },
16767
16833
  "top_provider": {
16768
- "context_length": 131072,
16834
+ "context_length": 16384,
16769
16835
  "max_completion_tokens": 16384,
16770
16836
  "is_moderated": false
16771
16837
  },
@@ -16933,7 +16999,7 @@
16933
16999
  "provider": "openrouter",
16934
17000
  "family": "meta-llama",
16935
17001
  "created_at": "2024-09-25 02:00:00 +0200",
16936
- "context_window": 131072,
17002
+ "context_window": 16384,
16937
17003
  "max_output_tokens": 16384,
16938
17004
  "knowledge_cutoff": null,
16939
17005
  "modalities": {
@@ -16953,8 +17019,8 @@
16953
17019
  "pricing": {
16954
17020
  "text_tokens": {
16955
17021
  "standard": {
16956
- "input_per_million": 0.012,
16957
- "output_per_million": 0.024
17022
+ "input_per_million": 0.02,
17023
+ "output_per_million": 0.02
16958
17024
  }
16959
17025
  }
16960
17026
  },
@@ -16972,7 +17038,7 @@
16972
17038
  "instruct_type": "llama3"
16973
17039
  },
16974
17040
  "top_provider": {
16975
- "context_length": 131072,
17041
+ "context_length": 16384,
16976
17042
  "max_completion_tokens": 16384,
16977
17043
  "is_moderated": false
16978
17044
  },
@@ -17121,7 +17187,7 @@
17121
17187
  "family": "meta-llama",
17122
17188
  "created_at": "2024-12-06 18:28:57 +0100",
17123
17189
  "context_window": 131072,
17124
- "max_output_tokens": 16384,
17190
+ "max_output_tokens": 131072,
17125
17191
  "knowledge_cutoff": null,
17126
17192
  "modalities": {
17127
17193
  "input": [
@@ -17140,8 +17206,8 @@
17140
17206
  "pricing": {
17141
17207
  "text_tokens": {
17142
17208
  "standard": {
17143
- "input_per_million": 0.038000000000000006,
17144
- "output_per_million": 0.12
17209
+ "input_per_million": 0.012,
17210
+ "output_per_million": 0.036
17145
17211
  }
17146
17212
  }
17147
17213
  },
@@ -17160,7 +17226,7 @@
17160
17226
  },
17161
17227
  "top_provider": {
17162
17228
  "context_length": 131072,
17163
- "max_completion_tokens": 16384,
17229
+ "max_completion_tokens": 131072,
17164
17230
  "is_moderated": false
17165
17231
  },
17166
17232
  "per_request_limits": null,
@@ -18136,6 +18202,7 @@
18136
18202
  },
18137
18203
  "capabilities": [
18138
18204
  "streaming",
18205
+ "function_calling",
18139
18206
  "structured_output"
18140
18207
  ],
18141
18208
  "pricing": {
@@ -18175,7 +18242,10 @@
18175
18242
  "response_format",
18176
18243
  "seed",
18177
18244
  "stop",
18245
+ "structured_outputs",
18178
18246
  "temperature",
18247
+ "tool_choice",
18248
+ "tools",
18179
18249
  "top_k",
18180
18250
  "top_p"
18181
18251
  ]
@@ -18721,8 +18791,8 @@
18721
18791
  "pricing": {
18722
18792
  "text_tokens": {
18723
18793
  "standard": {
18724
- "input_per_million": 0.035869548,
18725
- "output_per_million": 0.14347824480000002
18794
+ "input_per_million": 0.04,
18795
+ "output_per_million": 0.14
18726
18796
  }
18727
18797
  }
18728
18798
  },
@@ -19742,7 +19812,7 @@
19742
19812
  "family": "mistralai",
19743
19813
  "created_at": "2024-07-19 02:00:00 +0200",
19744
19814
  "context_window": 131072,
19745
- "max_output_tokens": 128000,
19815
+ "max_output_tokens": 16384,
19746
19816
  "knowledge_cutoff": null,
19747
19817
  "modalities": {
19748
19818
  "input": [
@@ -19761,8 +19831,8 @@
19761
19831
  "pricing": {
19762
19832
  "text_tokens": {
19763
19833
  "standard": {
19764
- "input_per_million": 0.017934774,
19765
- "output_per_million": 0.07173912240000001
19834
+ "input_per_million": 0.02,
19835
+ "output_per_million": 0.04
19766
19836
  }
19767
19837
  }
19768
19838
  },
@@ -19781,7 +19851,7 @@
19781
19851
  },
19782
19852
  "top_provider": {
19783
19853
  "context_length": 131072,
19784
- "max_completion_tokens": 128000,
19854
+ "max_completion_tokens": 16384,
19785
19855
  "is_moderated": false
19786
19856
  },
19787
19857
  "per_request_limits": null,
@@ -20018,8 +20088,8 @@
20018
20088
  "pricing": {
20019
20089
  "text_tokens": {
20020
20090
  "standard": {
20021
- "input_per_million": 0.03804346,
20022
- "output_per_million": 0.152173896
20091
+ "input_per_million": 0.04,
20092
+ "output_per_million": 0.15
20023
20093
  }
20024
20094
  }
20025
20095
  },
@@ -20148,8 +20218,8 @@
20148
20218
  "pricing": {
20149
20219
  "text_tokens": {
20150
20220
  "standard": {
20151
- "input_per_million": 0.03804346,
20152
- "output_per_million": 0.152173896
20221
+ "input_per_million": 0.04,
20222
+ "output_per_million": 0.15
20153
20223
  }
20154
20224
  }
20155
20225
  },
@@ -20287,9 +20357,9 @@
20287
20357
  "pricing": {
20288
20358
  "text_tokens": {
20289
20359
  "standard": {
20290
- "input_per_million": 0.049999999999999996,
20291
- "output_per_million": 0.09999999999999999
20292
- }
20360
+ "input_per_million": 0.075,
20361
+ "output_per_million": 0.19999999999999998
20362
+ }
20293
20363
  }
20294
20364
  },
20295
20365
  "metadata": {
@@ -20557,8 +20627,8 @@
20557
20627
  "pricing": {
20558
20628
  "text_tokens": {
20559
20629
  "standard": {
20560
- "input_per_million": 0.08,
20561
- "output_per_million": 0.24
20630
+ "input_per_million": 0.39999999999999997,
20631
+ "output_per_million": 0.39999999999999997
20562
20632
  }
20563
20633
  }
20564
20634
  },
@@ -20791,6 +20861,7 @@
20791
20861
  "include_reasoning",
20792
20862
  "reasoning",
20793
20863
  "response_format",
20864
+ "structured_outputs",
20794
20865
  "temperature",
20795
20866
  "top_k",
20796
20867
  "top_p"
@@ -20954,8 +21025,8 @@
20954
21025
  "pricing": {
20955
21026
  "text_tokens": {
20956
21027
  "standard": {
20957
- "input_per_million": 0.38043459999999996,
20958
- "output_per_million": 1.52173896
21028
+ "input_per_million": 0.38,
21029
+ "output_per_million": 1.52
20959
21030
  }
20960
21031
  }
20961
21032
  },
@@ -21085,8 +21156,8 @@
21085
21156
  "pricing": {
21086
21157
  "text_tokens": {
21087
21158
  "standard": {
21088
- "input_per_million": 0.062499969999999995,
21089
- "output_per_million": 0.249999972
21159
+ "input_per_million": 0.02,
21160
+ "output_per_million": 0.07
21090
21161
  }
21091
21162
  }
21092
21163
  },
@@ -21576,8 +21647,8 @@
21576
21647
  "pricing": {
21577
21648
  "text_tokens": {
21578
21649
  "standard": {
21579
- "input_per_million": 0.127173852,
21580
- "output_per_million": 0.5086955952000001
21650
+ "input_per_million": 0.13,
21651
+ "output_per_million": 0.51
21581
21652
  }
21582
21653
  }
21583
21654
  },
@@ -21761,7 +21832,7 @@
21761
21832
  "family": "nousresearch",
21762
21833
  "created_at": "2024-08-18 02:00:00 +0200",
21763
21834
  "context_window": 131072,
21764
- "max_output_tokens": null,
21835
+ "max_output_tokens": 131072,
21765
21836
  "knowledge_cutoff": null,
21766
21837
  "modalities": {
21767
21838
  "input": [
@@ -21800,7 +21871,7 @@
21800
21871
  },
21801
21872
  "top_provider": {
21802
21873
  "context_length": 131072,
21803
- "max_completion_tokens": null,
21874
+ "max_completion_tokens": 131072,
21804
21875
  "is_moderated": false
21805
21876
  },
21806
21877
  "per_request_limits": null,
@@ -21920,8 +21991,8 @@
21920
21991
  "pricing": {
21921
21992
  "text_tokens": {
21922
21993
  "standard": {
21923
- "input_per_million": 0.127173852,
21924
- "output_per_million": 0.5086955952000001
21994
+ "input_per_million": 0.11,
21995
+ "output_per_million": 0.38
21925
21996
  }
21926
21997
  }
21927
21998
  },
@@ -21991,8 +22062,8 @@
21991
22062
  "pricing": {
21992
22063
  "text_tokens": {
21993
22064
  "standard": {
21994
- "input_per_million": 0.12,
21995
- "output_per_million": 0.3
22065
+ "input_per_million": 0.6,
22066
+ "output_per_million": 0.6
21996
22067
  }
21997
22068
  }
21998
22069
  },
@@ -22098,64 +22169,6 @@
22098
22169
  ]
22099
22170
  }
22100
22171
  },
22101
- {
22102
- "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1:free",
22103
- "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free)",
22104
- "provider": "openrouter",
22105
- "family": "nvidia",
22106
- "created_at": "2025-04-08 14:24:19 +0200",
22107
- "context_window": 131072,
22108
- "max_output_tokens": null,
22109
- "knowledge_cutoff": null,
22110
- "modalities": {
22111
- "input": [
22112
- "text"
22113
- ],
22114
- "output": [
22115
- "text"
22116
- ]
22117
- },
22118
- "capabilities": [
22119
- "streaming",
22120
- "predicted_outputs"
22121
- ],
22122
- "pricing": {},
22123
- "metadata": {
22124
- "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
22125
- "architecture": {
22126
- "modality": "text->text",
22127
- "input_modalities": [
22128
- "text"
22129
- ],
22130
- "output_modalities": [
22131
- "text"
22132
- ],
22133
- "tokenizer": "Llama3",
22134
- "instruct_type": null
22135
- },
22136
- "top_provider": {
22137
- "context_length": 131072,
22138
- "max_completion_tokens": null,
22139
- "is_moderated": false
22140
- },
22141
- "per_request_limits": null,
22142
- "supported_parameters": [
22143
- "frequency_penalty",
22144
- "logit_bias",
22145
- "logprobs",
22146
- "max_tokens",
22147
- "min_p",
22148
- "presence_penalty",
22149
- "repetition_penalty",
22150
- "seed",
22151
- "stop",
22152
- "temperature",
22153
- "top_k",
22154
- "top_logprobs",
22155
- "top_p"
22156
- ]
22157
- }
22158
- },
22159
22172
  {
22160
22173
  "id": "nvidia/nemotron-nano-9b-v2",
22161
22174
  "name": "NVIDIA: Nemotron Nano 9B V2",
@@ -23943,9 +23956,9 @@
23943
23956
  "pricing": {
23944
23957
  "text_tokens": {
23945
23958
  "standard": {
23946
- "input_per_million": 1.25,
23947
- "output_per_million": 10.0,
23948
- "cached_input_per_million": 0.125
23959
+ "input_per_million": 0.625,
23960
+ "output_per_million": 5.0,
23961
+ "cached_input_per_million": 0.0625
23949
23962
  }
23950
23963
  }
23951
23964
  },
@@ -24181,8 +24194,8 @@
24181
24194
  "provider": "openrouter",
24182
24195
  "family": "openai",
24183
24196
  "created_at": "2025-08-05 19:17:11 +0200",
24184
- "context_window": 131000,
24185
- "max_output_tokens": 131000,
24197
+ "context_window": 131072,
24198
+ "max_output_tokens": null,
24186
24199
  "knowledge_cutoff": null,
24187
24200
  "modalities": {
24188
24201
  "input": [
@@ -24201,8 +24214,8 @@
24201
24214
  "pricing": {
24202
24215
  "text_tokens": {
24203
24216
  "standard": {
24204
- "input_per_million": 0.072,
24205
- "output_per_million": 0.28
24217
+ "input_per_million": 0.049999999999999996,
24218
+ "output_per_million": 0.25
24206
24219
  }
24207
24220
  }
24208
24221
  },
@@ -24220,8 +24233,8 @@
24220
24233
  "instruct_type": null
24221
24234
  },
24222
24235
  "top_provider": {
24223
- "context_length": 131000,
24224
- "max_completion_tokens": 131000,
24236
+ "context_length": 131072,
24237
+ "max_completion_tokens": null,
24225
24238
  "is_moderated": false
24226
24239
  },
24227
24240
  "per_request_limits": null,
@@ -24304,8 +24317,8 @@
24304
24317
  "provider": "openrouter",
24305
24318
  "family": "openai",
24306
24319
  "created_at": "2025-08-05 19:17:09 +0200",
24307
- "context_window": 131000,
24308
- "max_output_tokens": 131000,
24320
+ "context_window": 131072,
24321
+ "max_output_tokens": 32768,
24309
24322
  "knowledge_cutoff": null,
24310
24323
  "modalities": {
24311
24324
  "input": [
@@ -24324,7 +24337,7 @@
24324
24337
  "pricing": {
24325
24338
  "text_tokens": {
24326
24339
  "standard": {
24327
- "input_per_million": 0.04,
24340
+ "input_per_million": 0.03,
24328
24341
  "output_per_million": 0.15
24329
24342
  }
24330
24343
  }
@@ -24343,8 +24356,8 @@
24343
24356
  "instruct_type": null
24344
24357
  },
24345
24358
  "top_provider": {
24346
- "context_length": 131000,
24347
- "max_completion_tokens": 131000,
24359
+ "context_length": 131072,
24360
+ "max_completion_tokens": 32768,
24348
24361
  "is_moderated": false
24349
24362
  },
24350
24363
  "per_request_limits": null,
@@ -25045,79 +25058,43 @@
25045
25058
  }
25046
25059
  },
25047
25060
  {
25048
- "id": "openrouter/auto",
25049
- "name": "Auto Router",
25061
+ "id": "opengvlab/internvl3-78b",
25062
+ "name": "OpenGVLab: InternVL3 78B",
25050
25063
  "provider": "openrouter",
25051
- "family": "openrouter",
25052
- "created_at": "2023-11-08 01:00:00 +0100",
25053
- "context_window": 2000000,
25064
+ "family": "opengvlab",
25065
+ "created_at": "2025-09-15 20:55:55 +0200",
25066
+ "context_window": 32768,
25054
25067
  "max_output_tokens": null,
25055
25068
  "knowledge_cutoff": null,
25056
25069
  "modalities": {
25057
25070
  "input": [
25071
+ "image",
25058
25072
  "text"
25059
25073
  ],
25060
25074
  "output": [
25061
25075
  "text"
25062
25076
  ]
25063
25077
  },
25064
- "capabilities": [
25065
- "streaming"
25066
- ],
25067
- "pricing": {},
25068
- "metadata": {
25069
- "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)",
25070
- "architecture": {
25071
- "modality": "text->text",
25072
- "input_modalities": [
25073
- "text"
25074
- ],
25075
- "output_modalities": [
25076
- "text"
25077
- ],
25078
- "tokenizer": "Router",
25079
- "instruct_type": null
25080
- },
25081
- "top_provider": {
25082
- "context_length": null,
25083
- "max_completion_tokens": null,
25084
- "is_moderated": false
25085
- },
25086
- "per_request_limits": null,
25087
- "supported_parameters": []
25088
- }
25089
- },
25090
- {
25091
- "id": "openrouter/sonoma-dusk-alpha",
25092
- "name": "Sonoma Dusk Alpha",
25093
- "provider": "openrouter",
25094
- "family": "openrouter",
25095
- "created_at": "2025-09-05 19:27:27 +0200",
25096
- "context_window": 2000000,
25097
- "max_output_tokens": null,
25098
- "knowledge_cutoff": null,
25099
- "modalities": {
25100
- "input": [
25101
- "text",
25102
- "image"
25103
- ],
25104
- "output": [
25105
- "text"
25106
- ]
25107
- },
25108
25078
  "capabilities": [
25109
25079
  "streaming",
25110
- "function_calling",
25111
- "structured_output"
25080
+ "structured_output",
25081
+ "predicted_outputs"
25112
25082
  ],
25113
- "pricing": {},
25083
+ "pricing": {
25084
+ "text_tokens": {
25085
+ "standard": {
25086
+ "input_per_million": 0.03,
25087
+ "output_per_million": 0.13
25088
+ }
25089
+ }
25090
+ },
25114
25091
  "metadata": {
25115
- "description": "This is a cloaked model provided to the community to gather feedback. A fast and intelligent general-purpose frontier model with a 2 million token context window. Supports image inputs and parallel tool calling.\n\nNote: It’s free to use during this testing period, and prompts and completions are logged by the model creator for feedback and training.",
25092
+ "description": "The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. \n\nIn addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.",
25116
25093
  "architecture": {
25117
25094
  "modality": "text+image->text",
25118
25095
  "input_modalities": [
25119
- "text",
25120
- "image"
25096
+ "image",
25097
+ "text"
25121
25098
  ],
25122
25099
  "output_modalities": [
25123
25100
  "text"
@@ -25126,73 +25103,71 @@
25126
25103
  "instruct_type": null
25127
25104
  },
25128
25105
  "top_provider": {
25129
- "context_length": 2000000,
25106
+ "context_length": 32768,
25130
25107
  "max_completion_tokens": null,
25131
25108
  "is_moderated": false
25132
25109
  },
25133
25110
  "per_request_limits": null,
25134
25111
  "supported_parameters": [
25112
+ "frequency_penalty",
25113
+ "logit_bias",
25114
+ "logprobs",
25135
25115
  "max_tokens",
25116
+ "min_p",
25117
+ "presence_penalty",
25118
+ "repetition_penalty",
25136
25119
  "response_format",
25120
+ "seed",
25121
+ "stop",
25137
25122
  "structured_outputs",
25138
- "tool_choice",
25139
- "tools"
25123
+ "temperature",
25124
+ "top_k",
25125
+ "top_logprobs",
25126
+ "top_p"
25140
25127
  ]
25141
25128
  }
25142
25129
  },
25143
25130
  {
25144
- "id": "openrouter/sonoma-sky-alpha",
25145
- "name": "Sonoma Sky Alpha",
25131
+ "id": "openrouter/auto",
25132
+ "name": "Auto Router",
25146
25133
  "provider": "openrouter",
25147
25134
  "family": "openrouter",
25148
- "created_at": "2025-09-05 19:23:21 +0200",
25135
+ "created_at": "2023-11-08 01:00:00 +0100",
25149
25136
  "context_window": 2000000,
25150
25137
  "max_output_tokens": null,
25151
25138
  "knowledge_cutoff": null,
25152
25139
  "modalities": {
25153
25140
  "input": [
25154
- "text",
25155
- "image"
25141
+ "text"
25156
25142
  ],
25157
25143
  "output": [
25158
25144
  "text"
25159
25145
  ]
25160
25146
  },
25161
25147
  "capabilities": [
25162
- "streaming",
25163
- "function_calling",
25164
- "structured_output"
25148
+ "streaming"
25165
25149
  ],
25166
25150
  "pricing": {},
25167
25151
  "metadata": {
25168
- "description": "This is a cloaked model provided to the community to gather feedback. A maximally intelligent general-purpose frontier model with a 2 million token context window. Supports image inputs and parallel tool calling.\n\nNote: It’s free to use during this testing period, and prompts and completions are logged by the model creator for feedback and training.",
25152
+ "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)",
25169
25153
  "architecture": {
25170
- "modality": "text+image->text",
25154
+ "modality": "text->text",
25171
25155
  "input_modalities": [
25172
- "text",
25173
- "image"
25156
+ "text"
25174
25157
  ],
25175
25158
  "output_modalities": [
25176
25159
  "text"
25177
25160
  ],
25178
- "tokenizer": "Other",
25161
+ "tokenizer": "Router",
25179
25162
  "instruct_type": null
25180
25163
  },
25181
25164
  "top_provider": {
25182
- "context_length": 2000000,
25165
+ "context_length": null,
25183
25166
  "max_completion_tokens": null,
25184
25167
  "is_moderated": false
25185
25168
  },
25186
25169
  "per_request_limits": null,
25187
- "supported_parameters": [
25188
- "include_reasoning",
25189
- "max_tokens",
25190
- "reasoning",
25191
- "response_format",
25192
- "structured_outputs",
25193
- "tool_choice",
25194
- "tools"
25195
- ]
25170
+ "supported_parameters": []
25196
25171
  }
25197
25172
  },
25198
25173
  {
@@ -25583,8 +25558,8 @@
25583
25558
  "pricing": {
25584
25559
  "text_tokens": {
25585
25560
  "standard": {
25586
- "input_per_million": 0.06521736,
25587
- "output_per_million": 0.260869536
25561
+ "input_per_million": 0.07,
25562
+ "output_per_million": 0.26
25588
25563
  }
25589
25564
  }
25590
25565
  },
@@ -26404,8 +26379,8 @@
26404
26379
  "pricing": {
26405
26380
  "text_tokens": {
26406
26381
  "standard": {
26407
- "input_per_million": 0.035869548,
26408
- "output_per_million": 0.14347824480000002
26382
+ "input_per_million": 0.04,
26383
+ "output_per_million": 0.14
26409
26384
  }
26410
26385
  }
26411
26386
  },
@@ -26535,8 +26510,8 @@
26535
26510
  "pricing": {
26536
26511
  "text_tokens": {
26537
26512
  "standard": {
26538
- "input_per_million": 0.24999987999999998,
26539
- "output_per_million": 0.999999888
26513
+ "input_per_million": 0.07,
26514
+ "output_per_million": 0.28
26540
26515
  }
26541
26516
  }
26542
26517
  },
@@ -26793,8 +26768,8 @@
26793
26768
  "pricing": {
26794
26769
  "text_tokens": {
26795
26770
  "standard": {
26796
- "input_per_million": 0.13,
26797
- "output_per_million": 0.6
26771
+ "input_per_million": 0.18,
26772
+ "output_per_million": 0.54
26798
26773
  }
26799
26774
  }
26800
26775
  },
@@ -26847,7 +26822,7 @@
26847
26822
  "family": "qwen",
26848
26823
  "created_at": "2025-07-21 19:39:15 +0200",
26849
26824
  "context_window": 262144,
26850
- "max_output_tokens": null,
26825
+ "max_output_tokens": 262144,
26851
26826
  "knowledge_cutoff": null,
26852
26827
  "modalities": {
26853
26828
  "input": [
@@ -26866,8 +26841,8 @@
26866
26841
  "pricing": {
26867
26842
  "text_tokens": {
26868
26843
  "standard": {
26869
- "input_per_million": 0.0974999532,
26870
- "output_per_million": 0.38999995632
26844
+ "input_per_million": 0.09999999999999999,
26845
+ "output_per_million": 0.09999999999999999
26871
26846
  }
26872
26847
  }
26873
26848
  },
@@ -26886,7 +26861,7 @@
26886
26861
  },
26887
26862
  "top_provider": {
26888
26863
  "context_length": 262144,
26889
- "max_completion_tokens": null,
26864
+ "max_completion_tokens": 262144,
26890
26865
  "is_moderated": false
26891
26866
  },
26892
26867
  "per_request_limits": null,
@@ -26937,8 +26912,8 @@
26937
26912
  "pricing": {
26938
26913
  "text_tokens": {
26939
26914
  "standard": {
26940
- "input_per_million": 0.0974999532,
26941
- "output_per_million": 0.38999995632
26915
+ "input_per_million": 0.09999999999999999,
26916
+ "output_per_million": 0.39
26942
26917
  }
26943
26918
  }
26944
26919
  },
@@ -26974,6 +26949,7 @@
26974
26949
  "response_format",
26975
26950
  "seed",
26976
26951
  "stop",
26952
+ "structured_outputs",
26977
26953
  "temperature",
26978
26954
  "tool_choice",
26979
26955
  "tools",
@@ -27075,8 +27051,8 @@
27075
27051
  "pricing": {
27076
27052
  "text_tokens": {
27077
27053
  "standard": {
27078
- "input_per_million": 0.035869548,
27079
- "output_per_million": 0.14347824480000002
27054
+ "input_per_million": 0.06,
27055
+ "output_per_million": 0.22
27080
27056
  }
27081
27057
  }
27082
27058
  },
@@ -27148,8 +27124,8 @@
27148
27124
  "pricing": {
27149
27125
  "text_tokens": {
27150
27126
  "standard": {
27151
- "input_per_million": 0.07065213999999999,
27152
- "output_per_million": 0.282608664
27127
+ "input_per_million": 0.07,
27128
+ "output_per_million": 0.28
27153
27129
  }
27154
27130
  }
27155
27131
  },
@@ -27183,6 +27159,7 @@
27183
27159
  "response_format",
27184
27160
  "seed",
27185
27161
  "stop",
27162
+ "structured_outputs",
27186
27163
  "temperature",
27187
27164
  "tool_choice",
27188
27165
  "tools",
@@ -27218,8 +27195,8 @@
27218
27195
  "pricing": {
27219
27196
  "text_tokens": {
27220
27197
  "standard": {
27221
- "input_per_million": 0.08967387,
27222
- "output_per_million": 0.358695612
27198
+ "input_per_million": 0.08,
27199
+ "output_per_million": 0.29
27223
27200
  }
27224
27201
  }
27225
27202
  },
@@ -27255,6 +27232,7 @@
27255
27232
  "response_format",
27256
27233
  "seed",
27257
27234
  "stop",
27235
+ "structured_outputs",
27258
27236
  "temperature",
27259
27237
  "tool_choice",
27260
27238
  "tools",
@@ -27350,8 +27328,8 @@
27350
27328
  "pricing": {
27351
27329
  "text_tokens": {
27352
27330
  "standard": {
27353
- "input_per_million": 0.0322825932,
27354
- "output_per_million": 0.12913042032
27331
+ "input_per_million": 0.03,
27332
+ "output_per_million": 0.13
27355
27333
  }
27356
27334
  }
27357
27335
  },
@@ -27607,8 +27585,8 @@
27607
27585
  "pricing": {
27608
27586
  "text_tokens": {
27609
27587
  "standard": {
27610
- "input_per_million": 0.24999987999999998,
27611
- "output_per_million": 0.999999888
27588
+ "input_per_million": 0.22,
27589
+ "output_per_million": 0.95
27612
27590
  }
27613
27591
  }
27614
27592
  },
@@ -27678,8 +27656,8 @@
27678
27656
  "pricing": {
27679
27657
  "text_tokens": {
27680
27658
  "standard": {
27681
- "input_per_million": 0.07065213999999999,
27682
- "output_per_million": 0.282608664
27659
+ "input_per_million": 0.07,
27660
+ "output_per_million": 0.28
27683
27661
  }
27684
27662
  }
27685
27663
  },
@@ -27713,6 +27691,7 @@
27713
27691
  "response_format",
27714
27692
  "seed",
27715
27693
  "stop",
27694
+ "structured_outputs",
27716
27695
  "temperature",
27717
27696
  "tool_choice",
27718
27697
  "tools",
@@ -27723,13 +27702,13 @@
27723
27702
  }
27724
27703
  },
27725
27704
  {
27726
- "id": "qwen/qwen3-coder:free",
27727
- "name": "Qwen: Qwen3 Coder 480B A35B (free)",
27705
+ "id": "qwen/qwen3-coder-flash",
27706
+ "name": "Qwen: Qwen3 Coder Flash",
27728
27707
  "provider": "openrouter",
27729
27708
  "family": "qwen",
27730
- "created_at": "2025-07-23 02:29:06 +0200",
27731
- "context_window": 262144,
27732
- "max_output_tokens": null,
27709
+ "created_at": "2025-09-17 15:25:36 +0200",
27710
+ "context_window": 128000,
27711
+ "max_output_tokens": 65536,
27733
27712
  "knowledge_cutoff": null,
27734
27713
  "modalities": {
27735
27714
  "input": [
@@ -27742,11 +27721,19 @@
27742
27721
  "capabilities": [
27743
27722
  "streaming",
27744
27723
  "function_calling",
27745
- "predicted_outputs"
27724
+ "structured_output"
27746
27725
  ],
27747
- "pricing": {},
27726
+ "pricing": {
27727
+ "text_tokens": {
27728
+ "standard": {
27729
+ "input_per_million": 0.3,
27730
+ "output_per_million": 1.5,
27731
+ "cached_input_per_million": 0.08
27732
+ }
27733
+ }
27734
+ },
27748
27735
  "metadata": {
27749
- "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.",
27736
+ "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.",
27750
27737
  "architecture": {
27751
27738
  "modality": "text->text",
27752
27739
  "input_modalities": [
@@ -27759,8 +27746,125 @@
27759
27746
  "instruct_type": null
27760
27747
  },
27761
27748
  "top_provider": {
27762
- "context_length": 262144,
27763
- "max_completion_tokens": null,
27749
+ "context_length": 128000,
27750
+ "max_completion_tokens": 65536,
27751
+ "is_moderated": false
27752
+ },
27753
+ "per_request_limits": null,
27754
+ "supported_parameters": [
27755
+ "max_tokens",
27756
+ "presence_penalty",
27757
+ "response_format",
27758
+ "seed",
27759
+ "temperature",
27760
+ "tool_choice",
27761
+ "tools",
27762
+ "top_p"
27763
+ ]
27764
+ }
27765
+ },
27766
+ {
27767
+ "id": "qwen/qwen3-coder-plus",
27768
+ "name": "Qwen: Qwen3 Coder Plus",
27769
+ "provider": "openrouter",
27770
+ "family": "qwen",
27771
+ "created_at": "2025-09-17 15:19:54 +0200",
27772
+ "context_window": 128000,
27773
+ "max_output_tokens": 65536,
27774
+ "knowledge_cutoff": null,
27775
+ "modalities": {
27776
+ "input": [
27777
+ "text"
27778
+ ],
27779
+ "output": [
27780
+ "text"
27781
+ ]
27782
+ },
27783
+ "capabilities": [
27784
+ "streaming",
27785
+ "function_calling",
27786
+ "structured_output"
27787
+ ],
27788
+ "pricing": {
27789
+ "text_tokens": {
27790
+ "standard": {
27791
+ "input_per_million": 1.0,
27792
+ "output_per_million": 5.0,
27793
+ "cached_input_per_million": 0.09999999999999999
27794
+ }
27795
+ }
27796
+ },
27797
+ "metadata": {
27798
+ "description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.",
27799
+ "architecture": {
27800
+ "modality": "text->text",
27801
+ "input_modalities": [
27802
+ "text"
27803
+ ],
27804
+ "output_modalities": [
27805
+ "text"
27806
+ ],
27807
+ "tokenizer": "Qwen3",
27808
+ "instruct_type": null
27809
+ },
27810
+ "top_provider": {
27811
+ "context_length": 128000,
27812
+ "max_completion_tokens": 65536,
27813
+ "is_moderated": false
27814
+ },
27815
+ "per_request_limits": null,
27816
+ "supported_parameters": [
27817
+ "max_tokens",
27818
+ "presence_penalty",
27819
+ "response_format",
27820
+ "seed",
27821
+ "structured_outputs",
27822
+ "temperature",
27823
+ "tool_choice",
27824
+ "tools",
27825
+ "top_p"
27826
+ ]
27827
+ }
27828
+ },
27829
+ {
27830
+ "id": "qwen/qwen3-coder:free",
27831
+ "name": "Qwen: Qwen3 Coder 480B A35B (free)",
27832
+ "provider": "openrouter",
27833
+ "family": "qwen",
27834
+ "created_at": "2025-07-23 02:29:06 +0200",
27835
+ "context_window": 262144,
27836
+ "max_output_tokens": null,
27837
+ "knowledge_cutoff": null,
27838
+ "modalities": {
27839
+ "input": [
27840
+ "text"
27841
+ ],
27842
+ "output": [
27843
+ "text"
27844
+ ]
27845
+ },
27846
+ "capabilities": [
27847
+ "streaming",
27848
+ "function_calling",
27849
+ "predicted_outputs"
27850
+ ],
27851
+ "pricing": {},
27852
+ "metadata": {
27853
+ "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.",
27854
+ "architecture": {
27855
+ "modality": "text->text",
27856
+ "input_modalities": [
27857
+ "text"
27858
+ ],
27859
+ "output_modalities": [
27860
+ "text"
27861
+ ],
27862
+ "tokenizer": "Qwen3",
27863
+ "instruct_type": null
27864
+ },
27865
+ "top_provider": {
27866
+ "context_length": 262144,
27867
+ "max_completion_tokens": null,
27764
27868
  "is_moderated": false
27765
27869
  },
27766
27870
  "per_request_limits": null,
@@ -27871,8 +27975,8 @@
27871
27975
  "pricing": {
27872
27976
  "text_tokens": {
27873
27977
  "standard": {
27874
- "input_per_million": 0.09782604,
27875
- "output_per_million": 0.391304304
27978
+ "input_per_million": 0.09999999999999999,
27979
+ "output_per_million": 0.7999999999999999
27876
27980
  }
27877
27981
  }
27878
27982
  },
@@ -27906,6 +28010,7 @@
27906
28010
  "response_format",
27907
28011
  "seed",
27908
28012
  "stop",
28013
+ "structured_outputs",
27909
28014
  "temperature",
27910
28015
  "tool_choice",
27911
28016
  "tools",
@@ -27941,8 +28046,8 @@
27941
28046
  "pricing": {
27942
28047
  "text_tokens": {
27943
28048
  "standard": {
27944
- "input_per_million": 0.09782604,
27945
- "output_per_million": 0.391304304
28049
+ "input_per_million": 0.09999999999999999,
28050
+ "output_per_million": 0.7999999999999999
27946
28051
  }
27947
28052
  }
27948
28053
  },
@@ -27978,6 +28083,7 @@
27978
28083
  "response_format",
27979
28084
  "seed",
27980
28085
  "stop",
28086
+ "structured_outputs",
27981
28087
  "temperature",
27982
28088
  "tool_choice",
27983
28089
  "tools",
@@ -28242,66 +28348,6 @@
28242
28348
  ]
28243
28349
  }
28244
28350
  },
28245
- {
28246
- "id": "rekaai/reka-flash-3:free",
28247
- "name": "Reka: Flash 3 (free)",
28248
- "provider": "openrouter",
28249
- "family": "rekaai",
28250
- "created_at": "2025-03-12 21:53:33 +0100",
28251
- "context_window": 32768,
28252
- "max_output_tokens": null,
28253
- "knowledge_cutoff": null,
28254
- "modalities": {
28255
- "input": [
28256
- "text"
28257
- ],
28258
- "output": [
28259
- "text"
28260
- ]
28261
- },
28262
- "capabilities": [
28263
- "streaming",
28264
- "predicted_outputs"
28265
- ],
28266
- "pricing": {},
28267
- "metadata": {
28268
- "description": "Reka Flash 3 is a general-purpose, instruction-tuned large language model with 21 billion parameters, developed by Reka. It excels at general chat, coding tasks, instruction-following, and function calling. Featuring a 32K context length and optimized through reinforcement learning (RLOO), it provides competitive performance comparable to proprietary models within a smaller parameter footprint. Ideal for low-latency, local, or on-device deployments, Reka Flash 3 is compact, supports efficient quantization (down to 11GB at 4-bit precision), and employs explicit reasoning tags (\"<reasoning>\") to indicate its internal thought process.\n\nReka Flash 3 is primarily an English model with limited multilingual understanding capabilities. The model weights are released under the Apache 2.0 license.",
28269
- "architecture": {
28270
- "modality": "text->text",
28271
- "input_modalities": [
28272
- "text"
28273
- ],
28274
- "output_modalities": [
28275
- "text"
28276
- ],
28277
- "tokenizer": "Other",
28278
- "instruct_type": null
28279
- },
28280
- "top_provider": {
28281
- "context_length": 32768,
28282
- "max_completion_tokens": null,
28283
- "is_moderated": false
28284
- },
28285
- "per_request_limits": null,
28286
- "supported_parameters": [
28287
- "frequency_penalty",
28288
- "include_reasoning",
28289
- "logit_bias",
28290
- "logprobs",
28291
- "max_tokens",
28292
- "min_p",
28293
- "presence_penalty",
28294
- "reasoning",
28295
- "repetition_penalty",
28296
- "seed",
28297
- "stop",
28298
- "temperature",
28299
- "top_k",
28300
- "top_logprobs",
28301
- "top_p"
28302
- ]
28303
- }
28304
- },
28305
28351
  {
28306
28352
  "id": "sao10k/l3-euryale-70b",
28307
28353
  "name": "Sao10k: Llama 3 Euryale 70B v2.1",
@@ -28390,7 +28436,7 @@
28390
28436
  "pricing": {
28391
28437
  "text_tokens": {
28392
28438
  "standard": {
28393
- "input_per_million": 0.02,
28439
+ "input_per_million": 0.04,
28394
28440
  "output_per_million": 0.049999999999999996
28395
28441
  }
28396
28442
  }
@@ -28584,8 +28630,8 @@
28584
28630
  "pricing": {
28585
28631
  "text_tokens": {
28586
28632
  "standard": {
28587
- "input_per_million": 0.035869548,
28588
- "output_per_million": 0.14347824480000002
28633
+ "input_per_million": 0.04,
28634
+ "output_per_million": 0.14
28589
28635
  }
28590
28636
  }
28591
28637
  },
@@ -28683,69 +28729,6 @@
28683
28729
  ]
28684
28730
  }
28685
28731
  },
28686
- {
28687
- "id": "sophosympatheia/midnight-rose-70b",
28688
- "name": "Midnight Rose 70B",
28689
- "provider": "openrouter",
28690
- "family": "sophosympatheia",
28691
- "created_at": "2024-03-22 01:00:00 +0100",
28692
- "context_window": 4096,
28693
- "max_output_tokens": 2048,
28694
- "knowledge_cutoff": null,
28695
- "modalities": {
28696
- "input": [
28697
- "text"
28698
- ],
28699
- "output": [
28700
- "text"
28701
- ]
28702
- },
28703
- "capabilities": [
28704
- "streaming",
28705
- "predicted_outputs"
28706
- ],
28707
- "pricing": {
28708
- "text_tokens": {
28709
- "standard": {
28710
- "input_per_million": 0.7999999999999999,
28711
- "output_per_million": 0.7999999999999999
28712
- }
28713
- }
28714
- },
28715
- "metadata": {
28716
- "description": "A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.\n\nDescending from earlier versions of Midnight Rose and [Wizard Tulu Dolphin 70B](https://huggingface.co/sophosympatheia/Wizard-Tulu-Dolphin-70B-v1.0), it inherits the best qualities of each.",
28717
- "architecture": {
28718
- "modality": "text->text",
28719
- "input_modalities": [
28720
- "text"
28721
- ],
28722
- "output_modalities": [
28723
- "text"
28724
- ],
28725
- "tokenizer": "Llama2",
28726
- "instruct_type": "airoboros"
28727
- },
28728
- "top_provider": {
28729
- "context_length": 4096,
28730
- "max_completion_tokens": 2048,
28731
- "is_moderated": false
28732
- },
28733
- "per_request_limits": null,
28734
- "supported_parameters": [
28735
- "frequency_penalty",
28736
- "logit_bias",
28737
- "max_tokens",
28738
- "min_p",
28739
- "presence_penalty",
28740
- "repetition_penalty",
28741
- "seed",
28742
- "stop",
28743
- "temperature",
28744
- "top_k",
28745
- "top_p"
28746
- ]
28747
- }
28748
- },
28749
28732
  {
28750
28733
  "id": "stepfun-ai/step3",
28751
28734
  "name": "StepFun: Step3",
@@ -28802,6 +28785,7 @@
28802
28785
  "include_reasoning",
28803
28786
  "reasoning",
28804
28787
  "response_format",
28788
+ "structured_outputs",
28805
28789
  "temperature",
28806
28790
  "tool_choice",
28807
28791
  "tools",
@@ -28931,6 +28915,7 @@
28931
28915
  "response_format",
28932
28916
  "seed",
28933
28917
  "stop",
28918
+ "structured_outputs",
28934
28919
  "temperature",
28935
28920
  "top_k",
28936
28921
  "top_logprobs",
@@ -29180,7 +29165,6 @@
29180
29165
  "supported_parameters": [
29181
29166
  "frequency_penalty",
29182
29167
  "logit_bias",
29183
- "logprobs",
29184
29168
  "max_tokens",
29185
29169
  "min_p",
29186
29170
  "presence_penalty",
@@ -29221,8 +29205,8 @@
29221
29205
  "pricing": {
29222
29206
  "text_tokens": {
29223
29207
  "standard": {
29224
- "input_per_million": 0.039130416,
29225
- "output_per_million": 0.1565217216
29208
+ "input_per_million": 0.04,
29209
+ "output_per_million": 0.16
29226
29210
  }
29227
29211
  }
29228
29212
  },
@@ -29282,8 +29266,7 @@
29282
29266
  "capabilities": [
29283
29267
  "streaming",
29284
29268
  "function_calling",
29285
- "structured_output",
29286
- "predicted_outputs"
29269
+ "structured_output"
29287
29270
  ],
29288
29271
  "pricing": {
29289
29272
  "text_tokens": {
@@ -29314,83 +29297,14 @@
29314
29297
  "per_request_limits": null,
29315
29298
  "supported_parameters": [
29316
29299
  "frequency_penalty",
29317
- "logit_bias",
29318
- "logprobs",
29319
29300
  "max_tokens",
29320
- "min_p",
29321
29301
  "presence_penalty",
29322
- "repetition_penalty",
29323
29302
  "response_format",
29324
- "seed",
29325
29303
  "stop",
29326
29304
  "structured_outputs",
29327
29305
  "temperature",
29328
29306
  "tool_choice",
29329
29307
  "tools",
29330
- "top_k",
29331
- "top_p"
29332
- ]
29333
- }
29334
- },
29335
- {
29336
- "id": "thudm/glm-4-32b",
29337
- "name": "THUDM: GLM 4 32B",
29338
- "provider": "openrouter",
29339
- "family": "thudm",
29340
- "created_at": "2025-04-17 22:15:15 +0200",
29341
- "context_window": 32000,
29342
- "max_output_tokens": 32000,
29343
- "knowledge_cutoff": null,
29344
- "modalities": {
29345
- "input": [
29346
- "text"
29347
- ],
29348
- "output": [
29349
- "text"
29350
- ]
29351
- },
29352
- "capabilities": [
29353
- "streaming",
29354
- "predicted_outputs"
29355
- ],
29356
- "pricing": {
29357
- "text_tokens": {
29358
- "standard": {
29359
- "input_per_million": 0.55,
29360
- "output_per_million": 1.66
29361
- }
29362
- }
29363
- },
29364
- "metadata": {
29365
- "description": "GLM-4-32B-0414 is a 32B bilingual (Chinese-English) open-weight language model optimized for code generation, function calling, and agent-style tasks. Pretrained on 15T of high-quality and reasoning-heavy data, it was further refined using human preference alignment, rejection sampling, and reinforcement learning. The model excels in complex reasoning, artifact generation, and structured output tasks, achieving performance comparable to GPT-4o and DeepSeek-V3-0324 across several benchmarks.",
29366
- "architecture": {
29367
- "modality": "text->text",
29368
- "input_modalities": [
29369
- "text"
29370
- ],
29371
- "output_modalities": [
29372
- "text"
29373
- ],
29374
- "tokenizer": "Other",
29375
- "instruct_type": null
29376
- },
29377
- "top_provider": {
29378
- "context_length": 32000,
29379
- "max_completion_tokens": 32000,
29380
- "is_moderated": false
29381
- },
29382
- "per_request_limits": null,
29383
- "supported_parameters": [
29384
- "frequency_penalty",
29385
- "logit_bias",
29386
- "max_tokens",
29387
- "min_p",
29388
- "presence_penalty",
29389
- "repetition_penalty",
29390
- "seed",
29391
- "stop",
29392
- "temperature",
29393
- "top_k",
29394
29308
  "top_p"
29395
29309
  ]
29396
29310
  }
@@ -29486,8 +29400,8 @@
29486
29400
  "pricing": {
29487
29401
  "text_tokens": {
29488
29402
  "standard": {
29489
- "input_per_million": 0.035869548,
29490
- "output_per_million": 0.14347824480000002
29403
+ "input_per_million": 0.04,
29404
+ "output_per_million": 0.14
29491
29405
  }
29492
29406
  }
29493
29407
  },
@@ -29784,11 +29698,11 @@
29784
29698
  }
29785
29699
  },
29786
29700
  {
29787
- "id": "x-ai/grok-2-1212",
29788
- "name": "xAI: Grok 2 1212",
29701
+ "id": "x-ai/grok-3",
29702
+ "name": "xAI: Grok 3",
29789
29703
  "provider": "openrouter",
29790
29704
  "family": "x-ai",
29791
- "created_at": "2024-12-15 04:20:14 +0100",
29705
+ "created_at": "2025-06-10 21:15:08 +0200",
29792
29706
  "context_window": 131072,
29793
29707
  "max_output_tokens": null,
29794
29708
  "knowledge_cutoff": null,
@@ -29808,13 +29722,14 @@
29808
29722
  "pricing": {
29809
29723
  "text_tokens": {
29810
29724
  "standard": {
29811
- "input_per_million": 2.0,
29812
- "output_per_million": 10.0
29725
+ "input_per_million": 3.0,
29726
+ "output_per_million": 15.0,
29727
+ "cached_input_per_million": 0.75
29813
29728
  }
29814
29729
  }
29815
29730
  },
29816
29731
  "metadata": {
29817
- "description": "Grok 2 1212 introduces significant enhancements to accuracy, instruction adherence, and multilingual support, making it a powerful and flexible choice for developers seeking a highly steerable, intelligent model.",
29732
+ "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n",
29818
29733
  "architecture": {
29819
29734
  "modality": "text->text",
29820
29735
  "input_modalities": [
@@ -29840,6 +29755,7 @@
29840
29755
  "response_format",
29841
29756
  "seed",
29842
29757
  "stop",
29758
+ "structured_outputs",
29843
29759
  "temperature",
29844
29760
  "tool_choice",
29845
29761
  "tools",
@@ -29849,75 +29765,11 @@
29849
29765
  }
29850
29766
  },
29851
29767
  {
29852
- "id": "x-ai/grok-2-vision-1212",
29853
- "name": "xAI: Grok 2 Vision 1212",
29854
- "provider": "openrouter",
29855
- "family": "x-ai",
29856
- "created_at": "2024-12-15 05:35:38 +0100",
29857
- "context_window": 32768,
29858
- "max_output_tokens": null,
29859
- "knowledge_cutoff": null,
29860
- "modalities": {
29861
- "input": [
29862
- "text",
29863
- "image"
29864
- ],
29865
- "output": [
29866
- "text"
29867
- ]
29868
- },
29869
- "capabilities": [
29870
- "streaming",
29871
- "structured_output"
29872
- ],
29873
- "pricing": {
29874
- "text_tokens": {
29875
- "standard": {
29876
- "input_per_million": 2.0,
29877
- "output_per_million": 10.0
29878
- }
29879
- }
29880
- },
29881
- "metadata": {
29882
- "description": "Grok 2 Vision 1212 advances image-based AI with stronger visual comprehension, refined instruction-following, and multilingual support. From object recognition to style analysis, it empowers developers to build more intuitive, visually aware applications. Its enhanced steerability and reasoning establish a robust foundation for next-generation image solutions.\n\nTo read more about this model, check out [xAI's announcement](https://x.ai/blog/grok-1212).",
29883
- "architecture": {
29884
- "modality": "text+image->text",
29885
- "input_modalities": [
29886
- "text",
29887
- "image"
29888
- ],
29889
- "output_modalities": [
29890
- "text"
29891
- ],
29892
- "tokenizer": "Grok",
29893
- "instruct_type": null
29894
- },
29895
- "top_provider": {
29896
- "context_length": 32768,
29897
- "max_completion_tokens": null,
29898
- "is_moderated": false
29899
- },
29900
- "per_request_limits": null,
29901
- "supported_parameters": [
29902
- "frequency_penalty",
29903
- "logprobs",
29904
- "max_tokens",
29905
- "presence_penalty",
29906
- "response_format",
29907
- "seed",
29908
- "stop",
29909
- "temperature",
29910
- "top_logprobs",
29911
- "top_p"
29912
- ]
29913
- }
29914
- },
29915
- {
29916
- "id": "x-ai/grok-3",
29917
- "name": "xAI: Grok 3",
29768
+ "id": "x-ai/grok-3-beta",
29769
+ "name": "xAI: Grok 3 Beta",
29918
29770
  "provider": "openrouter",
29919
29771
  "family": "x-ai",
29920
- "created_at": "2025-06-10 21:15:08 +0200",
29772
+ "created_at": "2025-04-10 01:07:48 +0200",
29921
29773
  "context_window": 131072,
29922
29774
  "max_output_tokens": null,
29923
29775
  "knowledge_cutoff": null,
@@ -29944,7 +29796,7 @@
29944
29796
  }
29945
29797
  },
29946
29798
  "metadata": {
29947
- "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n",
29799
+ "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
29948
29800
  "architecture": {
29949
29801
  "modality": "text->text",
29950
29802
  "input_modalities": [
@@ -29970,7 +29822,6 @@
29970
29822
  "response_format",
29971
29823
  "seed",
29972
29824
  "stop",
29973
- "structured_outputs",
29974
29825
  "temperature",
29975
29826
  "tool_choice",
29976
29827
  "tools",
@@ -29980,11 +29831,11 @@
29980
29831
  }
29981
29832
  },
29982
29833
  {
29983
- "id": "x-ai/grok-3-beta",
29984
- "name": "xAI: Grok 3 Beta",
29834
+ "id": "x-ai/grok-3-mini",
29835
+ "name": "xAI: Grok 3 Mini",
29985
29836
  "provider": "openrouter",
29986
29837
  "family": "x-ai",
29987
- "created_at": "2025-04-10 01:07:48 +0200",
29838
+ "created_at": "2025-06-10 21:20:45 +0200",
29988
29839
  "context_window": 131072,
29989
29840
  "max_output_tokens": null,
29990
29841
  "knowledge_cutoff": null,
@@ -30004,14 +29855,14 @@
30004
29855
  "pricing": {
30005
29856
  "text_tokens": {
30006
29857
  "standard": {
30007
- "input_per_million": 3.0,
30008
- "output_per_million": 15.0,
30009
- "cached_input_per_million": 0.75
29858
+ "input_per_million": 0.3,
29859
+ "output_per_million": 0.5,
29860
+ "cached_input_per_million": 0.075
30010
29861
  }
30011
29862
  }
30012
29863
  },
30013
29864
  "metadata": {
30014
- "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
29865
+ "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.",
30015
29866
  "architecture": {
30016
29867
  "modality": "text->text",
30017
29868
  "input_modalities": [
@@ -30030,13 +29881,14 @@
30030
29881
  },
30031
29882
  "per_request_limits": null,
30032
29883
  "supported_parameters": [
30033
- "frequency_penalty",
29884
+ "include_reasoning",
30034
29885
  "logprobs",
30035
29886
  "max_tokens",
30036
- "presence_penalty",
29887
+ "reasoning",
30037
29888
  "response_format",
30038
29889
  "seed",
30039
29890
  "stop",
29891
+ "structured_outputs",
30040
29892
  "temperature",
30041
29893
  "tool_choice",
30042
29894
  "tools",
@@ -30046,11 +29898,11 @@
30046
29898
  }
30047
29899
  },
30048
29900
  {
30049
- "id": "x-ai/grok-3-mini",
30050
- "name": "xAI: Grok 3 Mini",
29901
+ "id": "x-ai/grok-3-mini-beta",
29902
+ "name": "xAI: Grok 3 Mini Beta",
30051
29903
  "provider": "openrouter",
30052
29904
  "family": "x-ai",
30053
- "created_at": "2025-06-10 21:20:45 +0200",
29905
+ "created_at": "2025-04-10 01:09:55 +0200",
30054
29906
  "context_window": 131072,
30055
29907
  "max_output_tokens": null,
30056
29908
  "knowledge_cutoff": null,
@@ -30077,7 +29929,7 @@
30077
29929
  }
30078
29930
  },
30079
29931
  "metadata": {
30080
- "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.",
29932
+ "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
30081
29933
  "architecture": {
30082
29934
  "modality": "text->text",
30083
29935
  "input_modalities": [
@@ -30103,7 +29955,6 @@
30103
29955
  "response_format",
30104
29956
  "seed",
30105
29957
  "stop",
30106
- "structured_outputs",
30107
29958
  "temperature",
30108
29959
  "tool_choice",
30109
29960
  "tools",
@@ -30113,16 +29964,17 @@
30113
29964
  }
30114
29965
  },
30115
29966
  {
30116
- "id": "x-ai/grok-3-mini-beta",
30117
- "name": "xAI: Grok 3 Mini Beta",
29967
+ "id": "x-ai/grok-4",
29968
+ "name": "xAI: Grok 4",
30118
29969
  "provider": "openrouter",
30119
29970
  "family": "x-ai",
30120
- "created_at": "2025-04-10 01:09:55 +0200",
30121
- "context_window": 131072,
29971
+ "created_at": "2025-07-09 21:01:29 +0200",
29972
+ "context_window": 256000,
30122
29973
  "max_output_tokens": null,
30123
29974
  "knowledge_cutoff": null,
30124
29975
  "modalities": {
30125
29976
  "input": [
29977
+ "image",
30126
29978
  "text"
30127
29979
  ],
30128
29980
  "output": [
@@ -30137,17 +29989,18 @@
30137
29989
  "pricing": {
30138
29990
  "text_tokens": {
30139
29991
  "standard": {
30140
- "input_per_million": 0.3,
30141
- "output_per_million": 0.5,
30142
- "cached_input_per_million": 0.075
29992
+ "input_per_million": 3.0,
29993
+ "output_per_million": 15.0,
29994
+ "cached_input_per_million": 0.75
30143
29995
  }
30144
29996
  }
30145
29997
  },
30146
29998
  "metadata": {
30147
- "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
29999
+ "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)",
30148
30000
  "architecture": {
30149
- "modality": "text->text",
30001
+ "modality": "text+image->text",
30150
30002
  "input_modalities": [
30003
+ "image",
30151
30004
  "text"
30152
30005
  ],
30153
30006
  "output_modalities": [
@@ -30157,7 +30010,7 @@
30157
30010
  "instruct_type": null
30158
30011
  },
30159
30012
  "top_provider": {
30160
- "context_length": 131072,
30013
+ "context_length": 256000,
30161
30014
  "max_completion_tokens": null,
30162
30015
  "is_moderated": false
30163
30016
  },
@@ -30169,7 +30022,7 @@
30169
30022
  "reasoning",
30170
30023
  "response_format",
30171
30024
  "seed",
30172
- "stop",
30025
+ "structured_outputs",
30173
30026
  "temperature",
30174
30027
  "tool_choice",
30175
30028
  "tools",
@@ -30179,18 +30032,18 @@
30179
30032
  }
30180
30033
  },
30181
30034
  {
30182
- "id": "x-ai/grok-4",
30183
- "name": "xAI: Grok 4",
30035
+ "id": "x-ai/grok-4-fast:free",
30036
+ "name": "xAI: Grok 4 Fast (free)",
30184
30037
  "provider": "openrouter",
30185
30038
  "family": "x-ai",
30186
- "created_at": "2025-07-09 21:01:29 +0200",
30187
- "context_window": 256000,
30188
- "max_output_tokens": null,
30039
+ "created_at": "2025-09-19 02:01:30 +0200",
30040
+ "context_window": 2000000,
30041
+ "max_output_tokens": 30000,
30189
30042
  "knowledge_cutoff": null,
30190
30043
  "modalities": {
30191
30044
  "input": [
30192
- "image",
30193
- "text"
30045
+ "text",
30046
+ "image"
30194
30047
  ],
30195
30048
  "output": [
30196
30049
  "text"
@@ -30201,22 +30054,14 @@
30201
30054
  "function_calling",
30202
30055
  "structured_output"
30203
30056
  ],
30204
- "pricing": {
30205
- "text_tokens": {
30206
- "standard": {
30207
- "input_per_million": 3.0,
30208
- "output_per_million": 15.0,
30209
- "cached_input_per_million": 0.75
30210
- }
30211
- }
30212
- },
30057
+ "pricing": {},
30213
30058
  "metadata": {
30214
- "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)",
30059
+ "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window. It comes in two flavors: non-reasoning and reasoning. Read more about the model on xAI's [news post](http://x.ai/news/grok-4-fast). Reasoning can be enabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)\n\nPrompts and completions may be used by xAI or OpenRouter to improve future models.",
30215
30060
  "architecture": {
30216
30061
  "modality": "text+image->text",
30217
30062
  "input_modalities": [
30218
- "image",
30219
- "text"
30063
+ "text",
30064
+ "image"
30220
30065
  ],
30221
30066
  "output_modalities": [
30222
30067
  "text"
@@ -30225,8 +30070,8 @@
30225
30070
  "instruct_type": null
30226
30071
  },
30227
30072
  "top_provider": {
30228
- "context_length": 256000,
30229
- "max_completion_tokens": null,
30073
+ "context_length": 2000000,
30074
+ "max_completion_tokens": 30000,
30230
30075
  "is_moderated": false
30231
30076
  },
30232
30077
  "per_request_limits": null,
@@ -30396,8 +30241,8 @@
30396
30241
  "pricing": {
30397
30242
  "text_tokens": {
30398
30243
  "standard": {
30399
- "input_per_million": 0.41249980199999997,
30400
- "output_per_million": 1.6499998152000002
30244
+ "input_per_million": 0.41,
30245
+ "output_per_million": 1.6500000000000001
30401
30246
  }
30402
30247
  }
30403
30248
  },
@@ -30433,6 +30278,7 @@
30433
30278
  "response_format",
30434
30279
  "seed",
30435
30280
  "stop",
30281
+ "structured_outputs",
30436
30282
  "temperature",
30437
30283
  "tool_choice",
30438
30284
  "tools",
@@ -30644,7 +30490,7 @@
30644
30490
  "name": "Sonar",
30645
30491
  "provider": "perplexity",
30646
30492
  "family": "sonar",
30647
- "created_at": "2025-09-14 11:15:24 +0200",
30493
+ "created_at": "2025-09-21 16:12:52 +0200",
30648
30494
  "context_window": 128000,
30649
30495
  "max_output_tokens": 4096,
30650
30496
  "knowledge_cutoff": null,
@@ -30676,7 +30522,7 @@
30676
30522
  "name": "Sonar Deep Research",
30677
30523
  "provider": "perplexity",
30678
30524
  "family": "sonar_deep_research",
30679
- "created_at": "2025-09-14 11:15:24 +0200",
30525
+ "created_at": "2025-09-21 16:12:52 +0200",
30680
30526
  "context_window": 128000,
30681
30527
  "max_output_tokens": 4096,
30682
30528
  "knowledge_cutoff": null,
@@ -30711,7 +30557,7 @@
30711
30557
  "name": "Sonar Pro",
30712
30558
  "provider": "perplexity",
30713
30559
  "family": "sonar_pro",
30714
- "created_at": "2025-09-14 11:15:24 +0200",
30560
+ "created_at": "2025-09-21 16:12:52 +0200",
30715
30561
  "context_window": 200000,
30716
30562
  "max_output_tokens": 8192,
30717
30563
  "knowledge_cutoff": null,
@@ -30743,7 +30589,7 @@
30743
30589
  "name": "Sonar Reasoning",
30744
30590
  "provider": "perplexity",
30745
30591
  "family": "sonar_reasoning",
30746
- "created_at": "2025-09-14 11:15:24 +0200",
30592
+ "created_at": "2025-09-21 16:12:52 +0200",
30747
30593
  "context_window": 128000,
30748
30594
  "max_output_tokens": 4096,
30749
30595
  "knowledge_cutoff": null,
@@ -30775,7 +30621,7 @@
30775
30621
  "name": "Sonar Reasoning Pro",
30776
30622
  "provider": "perplexity",
30777
30623
  "family": "sonar_reasoning_pro",
30778
- "created_at": "2025-09-14 11:15:24 +0200",
30624
+ "created_at": "2025-09-21 16:12:52 +0200",
30779
30625
  "context_window": 128000,
30780
30626
  "max_output_tokens": 8192,
30781
30627
  "knowledge_cutoff": null,
@@ -30915,7 +30761,7 @@
30915
30761
  "id": "gemini-1.5-flash",
30916
30762
  "name": "Gemini 1.5 Flash",
30917
30763
  "provider": "vertexai",
30918
- "family": "models/gemini-1.5-flash",
30764
+ "family": "gemini-1.5-flash",
30919
30765
  "created_at": null,
30920
30766
  "context_window": 1048576,
30921
30767
  "max_output_tokens": 8192,
@@ -30951,7 +30797,7 @@
30951
30797
  "id": "gemini-1.5-flash-002",
30952
30798
  "name": "Gemini 1.5 Flash",
30953
30799
  "provider": "vertexai",
30954
- "family": "models/gemini-1.5-flash",
30800
+ "family": "gemini-1.5-flash",
30955
30801
  "created_at": null,
30956
30802
  "context_window": 1048576,
30957
30803
  "max_output_tokens": 8192,
@@ -30987,7 +30833,7 @@
30987
30833
  "id": "gemini-1.5-flash-8b",
30988
30834
  "name": "Gemini 1.5 Flash-8B",
30989
30835
  "provider": "vertexai",
30990
- "family": "models/gemini-1.5-flash-8b",
30836
+ "family": "gemini-1.5-flash-8b",
30991
30837
  "created_at": null,
30992
30838
  "context_window": 1048576,
30993
30839
  "max_output_tokens": 8192,
@@ -31023,7 +30869,7 @@
31023
30869
  "id": "gemini-1.5-pro",
31024
30870
  "name": "Gemini 1.5 Pro",
31025
30871
  "provider": "vertexai",
31026
- "family": "models/gemini-1.5-pro",
30872
+ "family": "gemini-1.5-pro",
31027
30873
  "created_at": null,
31028
30874
  "context_window": 2097152,
31029
30875
  "max_output_tokens": 8192,
@@ -31059,7 +30905,7 @@
31059
30905
  "id": "gemini-1.5-pro-002",
31060
30906
  "name": "Gemini 1.5 Pro",
31061
30907
  "provider": "vertexai",
31062
- "family": "models/gemini-1.5-pro",
30908
+ "family": "gemini-1.5-pro",
31063
30909
  "created_at": null,
31064
30910
  "context_window": 2097152,
31065
30911
  "max_output_tokens": 8192,
@@ -31095,7 +30941,7 @@
31095
30941
  "id": "gemini-2.0-flash",
31096
30942
  "name": "Gemini 2.0 Flash",
31097
30943
  "provider": "vertexai",
31098
- "family": "models/gemini-2.0-flash",
30944
+ "family": "gemini-2.0-flash",
31099
30945
  "created_at": null,
31100
30946
  "context_window": 1048576,
31101
30947
  "max_output_tokens": 8192,
@@ -31136,7 +30982,7 @@
31136
30982
  "id": "gemini-2.0-flash-001",
31137
30983
  "name": "Gemini 2.0 Flash",
31138
30984
  "provider": "vertexai",
31139
- "family": "models/gemini-2.0-flash",
30985
+ "family": "gemini-2.0-flash",
31140
30986
  "created_at": null,
31141
30987
  "context_window": 1048576,
31142
30988
  "max_output_tokens": 8192,
@@ -31177,7 +31023,7 @@
31177
31023
  "id": "gemini-2.0-flash-exp",
31178
31024
  "name": "Gemini 2.0 Flash",
31179
31025
  "provider": "vertexai",
31180
- "family": "models/gemini-2.0-flash",
31026
+ "family": "gemini-2.0-flash",
31181
31027
  "created_at": null,
31182
31028
  "context_window": 1048576,
31183
31029
  "max_output_tokens": 8192,
@@ -31218,7 +31064,7 @@
31218
31064
  "id": "gemini-2.0-flash-lite-001",
31219
31065
  "name": "Gemini 2.0 Flash-Lite",
31220
31066
  "provider": "vertexai",
31221
- "family": "models/gemini-2.0-flash-lite",
31067
+ "family": "gemini-2.0-flash-lite",
31222
31068
  "created_at": null,
31223
31069
  "context_window": 1048576,
31224
31070
  "max_output_tokens": 8192,
@@ -31259,7 +31105,7 @@
31259
31105
  "id": "gemini-2.5-flash",
31260
31106
  "name": "Gemini 2.5 Flash",
31261
31107
  "provider": "vertexai",
31262
- "family": "models/gemini-2.5-flash",
31108
+ "family": "gemini-2.5-flash",
31263
31109
  "created_at": null,
31264
31110
  "context_window": 1048576,
31265
31111
  "max_output_tokens": 65536,
@@ -31300,7 +31146,7 @@
31300
31146
  "id": "gemini-2.5-flash-lite",
31301
31147
  "name": "Gemini 2.5 Flash-Lite",
31302
31148
  "provider": "vertexai",
31303
- "family": "models/gemini-2.5-flash-lite",
31149
+ "family": "gemini-2.5-flash-lite",
31304
31150
  "created_at": null,
31305
31151
  "context_window": 1048576,
31306
31152
  "max_output_tokens": 65536,
@@ -31341,7 +31187,7 @@
31341
31187
  "id": "gemini-2.5-pro",
31342
31188
  "name": "Gemini 2.5 Pro",
31343
31189
  "provider": "vertexai",
31344
- "family": "models/gemini-2.5-pro",
31190
+ "family": "gemini-2.5-pro",
31345
31191
  "created_at": null,
31346
31192
  "context_window": 1048576,
31347
31193
  "max_output_tokens": 65536,