dify_llm 1.7.1 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@
3
3
  "id": "claude-3-5-haiku-20241022",
4
4
  "name": "Claude Haiku 3.5",
5
5
  "provider": "anthropic",
6
- "family": "claude-3-5-haiku-latest",
6
+ "family": "claude-3-5-haiku",
7
7
  "created_at": null,
8
8
  "context_window": 200000,
9
9
  "max_output_tokens": 8192,
@@ -111,7 +111,7 @@
111
111
  "id": "claude-3-7-sonnet-20250219",
112
112
  "name": "Claude Sonnet 3.7",
113
113
  "provider": "anthropic",
114
- "family": "claude-3-7-sonnet-latest",
114
+ "family": "claude-3-7-sonnet",
115
115
  "created_at": null,
116
116
  "context_window": 200000,
117
117
  "max_output_tokens": 64000,
@@ -245,7 +245,7 @@
245
245
  "id": "claude-opus-4-20250514",
246
246
  "name": "Claude Opus 4",
247
247
  "provider": "anthropic",
248
- "family": "claude-opus-4-0",
248
+ "family": "claude-opus-4",
249
249
  "created_at": null,
250
250
  "context_window": 200000,
251
251
  "max_output_tokens": 32000,
@@ -277,7 +277,7 @@
277
277
  "id": "claude-sonnet-4-20250514",
278
278
  "name": "Claude Sonnet 4",
279
279
  "provider": "anthropic",
280
- "family": "claude-sonnet-4-0",
280
+ "family": "claude-sonnet-4",
281
281
  "created_at": null,
282
282
  "context_window": 200000,
283
283
  "max_output_tokens": 64000,
@@ -1249,55 +1249,6 @@
1249
1249
  ]
1250
1250
  }
1251
1251
  },
1252
- {
1253
- "id": "anthropic.claude-instant-v1",
1254
- "name": "Claude Instant",
1255
- "provider": "bedrock",
1256
- "family": "claude_instant",
1257
- "created_at": null,
1258
- "context_window": 200000,
1259
- "max_output_tokens": 4096,
1260
- "knowledge_cutoff": null,
1261
- "modalities": {
1262
- "input": [
1263
- "text",
1264
- "image",
1265
- "pdf"
1266
- ],
1267
- "output": [
1268
- "text"
1269
- ]
1270
- },
1271
- "capabilities": [
1272
- "streaming",
1273
- "function_calling"
1274
- ],
1275
- "pricing": {
1276
- "text_tokens": {
1277
- "standard": {
1278
- "input_per_million": 0.8,
1279
- "output_per_million": 2.4
1280
- },
1281
- "batch": {
1282
- "input_per_million": 0.4,
1283
- "output_per_million": 1.2
1284
- }
1285
- }
1286
- },
1287
- "metadata": {
1288
- "provider_name": "Anthropic",
1289
- "inference_types": [
1290
- "ON_DEMAND"
1291
- ],
1292
- "streaming_supported": true,
1293
- "input_modalities": [
1294
- "TEXT"
1295
- ],
1296
- "output_modalities": [
1297
- "TEXT"
1298
- ]
1299
- }
1300
- },
1301
1252
  {
1302
1253
  "id": "anthropic.claude-instant-v1:2:100k",
1303
1254
  "name": "Claude Instant",
@@ -1347,55 +1298,6 @@
1347
1298
  ]
1348
1299
  }
1349
1300
  },
1350
- {
1351
- "id": "anthropic.claude-v2",
1352
- "name": "Claude",
1353
- "provider": "bedrock",
1354
- "family": "claude2",
1355
- "created_at": null,
1356
- "context_window": 200000,
1357
- "max_output_tokens": 4096,
1358
- "knowledge_cutoff": null,
1359
- "modalities": {
1360
- "input": [
1361
- "text",
1362
- "image",
1363
- "pdf"
1364
- ],
1365
- "output": [
1366
- "text"
1367
- ]
1368
- },
1369
- "capabilities": [
1370
- "streaming",
1371
- "function_calling"
1372
- ],
1373
- "pricing": {
1374
- "text_tokens": {
1375
- "standard": {
1376
- "input_per_million": 8.0,
1377
- "output_per_million": 24.0
1378
- },
1379
- "batch": {
1380
- "input_per_million": 4.0,
1381
- "output_per_million": 12.0
1382
- }
1383
- }
1384
- },
1385
- "metadata": {
1386
- "provider_name": "Anthropic",
1387
- "inference_types": [
1388
- "ON_DEMAND"
1389
- ],
1390
- "streaming_supported": true,
1391
- "input_modalities": [
1392
- "TEXT"
1393
- ],
1394
- "output_modalities": [
1395
- "TEXT"
1396
- ]
1397
- }
1398
- },
1399
1301
  {
1400
1302
  "id": "anthropic.claude-v2:0:100k",
1401
1303
  "name": "Claude",
@@ -1494,55 +1396,6 @@
1494
1396
  ]
1495
1397
  }
1496
1398
  },
1497
- {
1498
- "id": "anthropic.claude-v2:1",
1499
- "name": "Claude",
1500
- "provider": "bedrock",
1501
- "family": "claude2",
1502
- "created_at": null,
1503
- "context_window": 200000,
1504
- "max_output_tokens": 4096,
1505
- "knowledge_cutoff": null,
1506
- "modalities": {
1507
- "input": [
1508
- "text",
1509
- "image",
1510
- "pdf"
1511
- ],
1512
- "output": [
1513
- "text"
1514
- ]
1515
- },
1516
- "capabilities": [
1517
- "streaming",
1518
- "function_calling"
1519
- ],
1520
- "pricing": {
1521
- "text_tokens": {
1522
- "standard": {
1523
- "input_per_million": 8.0,
1524
- "output_per_million": 24.0
1525
- },
1526
- "batch": {
1527
- "input_per_million": 4.0,
1528
- "output_per_million": 12.0
1529
- }
1530
- }
1531
- },
1532
- "metadata": {
1533
- "provider_name": "Anthropic",
1534
- "inference_types": [
1535
- "ON_DEMAND"
1536
- ],
1537
- "streaming_supported": true,
1538
- "input_modalities": [
1539
- "TEXT"
1540
- ],
1541
- "output_modalities": [
1542
- "TEXT"
1543
- ]
1544
- }
1545
- },
1546
1399
  {
1547
1400
  "id": "anthropic.claude-v2:1:18k",
1548
1401
  "name": "Claude",
@@ -2704,7 +2557,8 @@
2704
2557
  "input": [
2705
2558
  "text",
2706
2559
  "image",
2707
- "pdf"
2560
+ "pdf",
2561
+ "video"
2708
2562
  ],
2709
2563
  "output": [
2710
2564
  "text"
@@ -2750,7 +2604,8 @@
2750
2604
  "input": [
2751
2605
  "text",
2752
2606
  "image",
2753
- "pdf"
2607
+ "pdf",
2608
+ "video"
2754
2609
  ],
2755
2610
  "output": [
2756
2611
  "text"
@@ -2869,7 +2724,8 @@
2869
2724
  "input": [
2870
2725
  "text",
2871
2726
  "image",
2872
- "pdf"
2727
+ "pdf",
2728
+ "video"
2873
2729
  ],
2874
2730
  "output": [
2875
2731
  "text"
@@ -2918,7 +2774,8 @@
2918
2774
  "input": [
2919
2775
  "text",
2920
2776
  "image",
2921
- "pdf"
2777
+ "pdf",
2778
+ "video"
2922
2779
  ],
2923
2780
  "output": [
2924
2781
  "text"
@@ -2967,7 +2824,8 @@
2967
2824
  "input": [
2968
2825
  "text",
2969
2826
  "image",
2970
- "pdf"
2827
+ "pdf",
2828
+ "video"
2971
2829
  ],
2972
2830
  "output": [
2973
2831
  "text"
@@ -3016,7 +2874,8 @@
3016
2874
  "input": [
3017
2875
  "text",
3018
2876
  "image",
3019
- "pdf"
2877
+ "pdf",
2878
+ "video"
3020
2879
  ],
3021
2880
  "output": [
3022
2881
  "text"
@@ -3064,7 +2923,8 @@
3064
2923
  "input": [
3065
2924
  "text",
3066
2925
  "image",
3067
- "pdf"
2926
+ "pdf",
2927
+ "video"
3068
2928
  ],
3069
2929
  "output": [
3070
2930
  "text"
@@ -3325,7 +3185,8 @@
3325
3185
  "input": [
3326
3186
  "text",
3327
3187
  "image",
3328
- "pdf"
3188
+ "pdf",
3189
+ "video"
3329
3190
  ],
3330
3191
  "output": [
3331
3192
  "text"
@@ -3543,7 +3404,8 @@
3543
3404
  "input": [
3544
3405
  "text",
3545
3406
  "image",
3546
- "pdf"
3407
+ "pdf",
3408
+ "video"
3547
3409
  ],
3548
3410
  "output": [
3549
3411
  "text"
@@ -3591,7 +3453,8 @@
3591
3453
  "input": [
3592
3454
  "text",
3593
3455
  "image",
3594
- "pdf"
3456
+ "pdf",
3457
+ "video"
3595
3458
  ],
3596
3459
  "output": [
3597
3460
  "text"
@@ -3639,7 +3502,8 @@
3639
3502
  "input": [
3640
3503
  "text",
3641
3504
  "image",
3642
- "pdf"
3505
+ "pdf",
3506
+ "video"
3643
3507
  ],
3644
3508
  "output": [
3645
3509
  "text"
@@ -3727,7 +3591,8 @@
3727
3591
  "knowledge_cutoff": null,
3728
3592
  "modalities": {
3729
3593
  "input": [
3730
- "text"
3594
+ "text",
3595
+ "video"
3731
3596
  ],
3732
3597
  "output": [
3733
3598
  "text",
@@ -3769,7 +3634,8 @@
3769
3634
  "input": [
3770
3635
  "text",
3771
3636
  "image",
3772
- "pdf"
3637
+ "pdf",
3638
+ "video"
3773
3639
  ],
3774
3640
  "output": [
3775
3641
  "text",
@@ -3823,7 +3689,8 @@
3823
3689
  "input": [
3824
3690
  "text",
3825
3691
  "image",
3826
- "pdf"
3692
+ "pdf",
3693
+ "video"
3827
3694
  ],
3828
3695
  "output": [
3829
3696
  "text",
@@ -3877,7 +3744,8 @@
3877
3744
  "input": [
3878
3745
  "text",
3879
3746
  "image",
3880
- "pdf"
3747
+ "pdf",
3748
+ "video"
3881
3749
  ],
3882
3750
  "output": [
3883
3751
  "text"
@@ -4781,6 +4649,36 @@
4781
4649
  "owned_by": "mistralai"
4782
4650
  }
4783
4651
  },
4652
+ {
4653
+ "id": "magistral-medium-2509",
4654
+ "name": "Magistral Medium 2509",
4655
+ "provider": "mistral",
4656
+ "family": "mistral",
4657
+ "created_at": null,
4658
+ "context_window": 32768,
4659
+ "max_output_tokens": 8192,
4660
+ "knowledge_cutoff": null,
4661
+ "modalities": {
4662
+ "input": [
4663
+ "text"
4664
+ ],
4665
+ "output": [
4666
+ "text"
4667
+ ]
4668
+ },
4669
+ "capabilities": [
4670
+ "streaming",
4671
+ "function_calling",
4672
+ "structured_output",
4673
+ "reasoning",
4674
+ "batch"
4675
+ ],
4676
+ "pricing": {},
4677
+ "metadata": {
4678
+ "object": "model",
4679
+ "owned_by": "mistralai"
4680
+ }
4681
+ },
4784
4682
  {
4785
4683
  "id": "magistral-medium-latest",
4786
4684
  "name": "Magistral Medium Latest",
@@ -4871,6 +4769,36 @@
4871
4769
  "owned_by": "mistralai"
4872
4770
  }
4873
4771
  },
4772
+ {
4773
+ "id": "magistral-small-2509",
4774
+ "name": "Magistral Small 2509",
4775
+ "provider": "mistral",
4776
+ "family": "mistral",
4777
+ "created_at": null,
4778
+ "context_window": 32768,
4779
+ "max_output_tokens": 8192,
4780
+ "knowledge_cutoff": null,
4781
+ "modalities": {
4782
+ "input": [
4783
+ "text"
4784
+ ],
4785
+ "output": [
4786
+ "text"
4787
+ ]
4788
+ },
4789
+ "capabilities": [
4790
+ "streaming",
4791
+ "function_calling",
4792
+ "structured_output",
4793
+ "reasoning",
4794
+ "batch"
4795
+ ],
4796
+ "pricing": {},
4797
+ "metadata": {
4798
+ "object": "model",
4799
+ "owned_by": "mistralai"
4800
+ }
4801
+ },
4874
4802
  {
4875
4803
  "id": "magistral-small-latest",
4876
4804
  "name": "Magistral Small Latest",
@@ -5045,6 +4973,30 @@
5045
4973
  "owned_by": "mistralai"
5046
4974
  }
5047
4975
  },
4976
+ {
4977
+ "id": "mistral-embed-2312",
4978
+ "name": "Mistral Embed",
4979
+ "provider": "mistral",
4980
+ "family": "mistral-embed",
4981
+ "created_at": null,
4982
+ "context_window": 32768,
4983
+ "max_output_tokens": 8192,
4984
+ "knowledge_cutoff": null,
4985
+ "modalities": {
4986
+ "input": [
4987
+ "text"
4988
+ ],
4989
+ "output": [
4990
+ "embeddings"
4991
+ ]
4992
+ },
4993
+ "capabilities": [],
4994
+ "pricing": {},
4995
+ "metadata": {
4996
+ "object": "model",
4997
+ "owned_by": "mistralai"
4998
+ }
4999
+ },
5048
5000
  {
5049
5001
  "id": "mistral-large-2407",
5050
5002
  "name": "Mistral Large",
@@ -8018,14 +7970,16 @@
8018
7970
  "id": "gpt-5",
8019
7971
  "name": "GPT-5",
8020
7972
  "provider": "openai",
8021
- "family": "other",
7973
+ "family": "gpt5",
8022
7974
  "created_at": "2025-08-05 22:29:37 +0200",
8023
- "context_window": 4096,
8024
- "max_output_tokens": 16384,
7975
+ "context_window": 128000,
7976
+ "max_output_tokens": 400000,
8025
7977
  "knowledge_cutoff": null,
8026
7978
  "modalities": {
8027
7979
  "input": [
8028
- "text"
7980
+ "text",
7981
+ "image",
7982
+ "pdf"
8029
7983
  ],
8030
7984
  "output": [
8031
7985
  "text"
@@ -8033,13 +7987,16 @@
8033
7987
  },
8034
7988
  "capabilities": [
8035
7989
  "streaming",
7990
+ "function_calling",
7991
+ "structured_output",
8036
7992
  "reasoning"
8037
7993
  ],
8038
7994
  "pricing": {
8039
7995
  "text_tokens": {
8040
7996
  "standard": {
8041
- "input_per_million": 0.5,
8042
- "output_per_million": 1.5
7997
+ "input_per_million": 1.25,
7998
+ "output_per_million": 10.0,
7999
+ "cached_input_per_million": 0.125
8043
8000
  }
8044
8001
  }
8045
8002
  },
@@ -8052,14 +8009,16 @@
8052
8009
  "id": "gpt-5-2025-08-07",
8053
8010
  "name": "GPT-5 20250807",
8054
8011
  "provider": "openai",
8055
- "family": "other",
8012
+ "family": "gpt5",
8056
8013
  "created_at": "2025-08-01 21:09:20 +0200",
8057
- "context_window": 4096,
8058
- "max_output_tokens": 16384,
8014
+ "context_window": 128000,
8015
+ "max_output_tokens": 400000,
8059
8016
  "knowledge_cutoff": null,
8060
8017
  "modalities": {
8061
8018
  "input": [
8062
- "text"
8019
+ "text",
8020
+ "image",
8021
+ "pdf"
8063
8022
  ],
8064
8023
  "output": [
8065
8024
  "text"
@@ -8067,13 +8026,16 @@
8067
8026
  },
8068
8027
  "capabilities": [
8069
8028
  "streaming",
8029
+ "function_calling",
8030
+ "structured_output",
8070
8031
  "reasoning"
8071
8032
  ],
8072
8033
  "pricing": {
8073
8034
  "text_tokens": {
8074
8035
  "standard": {
8075
- "input_per_million": 0.5,
8076
- "output_per_million": 1.5
8036
+ "input_per_million": 1.25,
8037
+ "output_per_million": 10.0,
8038
+ "cached_input_per_million": 0.125
8077
8039
  }
8078
8040
  }
8079
8041
  },
@@ -8086,14 +8048,16 @@
8086
8048
  "id": "gpt-5-chat-latest",
8087
8049
  "name": "GPT-5 Chat Latest",
8088
8050
  "provider": "openai",
8089
- "family": "other",
8051
+ "family": "gpt5",
8090
8052
  "created_at": "2025-08-01 20:35:06 +0200",
8091
- "context_window": 4096,
8092
- "max_output_tokens": 16384,
8053
+ "context_window": 128000,
8054
+ "max_output_tokens": 400000,
8093
8055
  "knowledge_cutoff": null,
8094
8056
  "modalities": {
8095
8057
  "input": [
8096
- "text"
8058
+ "text",
8059
+ "image",
8060
+ "pdf"
8097
8061
  ],
8098
8062
  "output": [
8099
8063
  "text"
@@ -8101,13 +8065,16 @@
8101
8065
  },
8102
8066
  "capabilities": [
8103
8067
  "streaming",
8068
+ "function_calling",
8069
+ "structured_output",
8104
8070
  "reasoning"
8105
8071
  ],
8106
8072
  "pricing": {
8107
8073
  "text_tokens": {
8108
8074
  "standard": {
8109
- "input_per_million": 0.5,
8110
- "output_per_million": 1.5
8075
+ "input_per_million": 1.25,
8076
+ "output_per_million": 10.0,
8077
+ "cached_input_per_million": 0.125
8111
8078
  }
8112
8079
  }
8113
8080
  },
@@ -8120,14 +8087,16 @@
8120
8087
  "id": "gpt-5-mini",
8121
8088
  "name": "GPT-5 Mini",
8122
8089
  "provider": "openai",
8123
- "family": "other",
8090
+ "family": "gpt5",
8124
8091
  "created_at": "2025-08-05 22:32:08 +0200",
8125
- "context_window": 4096,
8126
- "max_output_tokens": 16384,
8092
+ "context_window": 128000,
8093
+ "max_output_tokens": 400000,
8127
8094
  "knowledge_cutoff": null,
8128
8095
  "modalities": {
8129
8096
  "input": [
8130
- "text"
8097
+ "text",
8098
+ "image",
8099
+ "pdf"
8131
8100
  ],
8132
8101
  "output": [
8133
8102
  "text"
@@ -8135,13 +8104,16 @@
8135
8104
  },
8136
8105
  "capabilities": [
8137
8106
  "streaming",
8107
+ "function_calling",
8108
+ "structured_output",
8138
8109
  "reasoning"
8139
8110
  ],
8140
8111
  "pricing": {
8141
8112
  "text_tokens": {
8142
8113
  "standard": {
8143
- "input_per_million": 0.5,
8144
- "output_per_million": 1.5
8114
+ "input_per_million": 1.25,
8115
+ "output_per_million": 10.0,
8116
+ "cached_input_per_million": 0.125
8145
8117
  }
8146
8118
  }
8147
8119
  },
@@ -8154,14 +8126,16 @@
8154
8126
  "id": "gpt-5-mini-2025-08-07",
8155
8127
  "name": "GPT-5 Mini 20250807",
8156
8128
  "provider": "openai",
8157
- "family": "other",
8129
+ "family": "gpt5",
8158
8130
  "created_at": "2025-08-05 22:31:07 +0200",
8159
- "context_window": 4096,
8160
- "max_output_tokens": 16384,
8131
+ "context_window": 128000,
8132
+ "max_output_tokens": 400000,
8161
8133
  "knowledge_cutoff": null,
8162
8134
  "modalities": {
8163
8135
  "input": [
8164
- "text"
8136
+ "text",
8137
+ "image",
8138
+ "pdf"
8165
8139
  ],
8166
8140
  "output": [
8167
8141
  "text"
@@ -8169,13 +8143,16 @@
8169
8143
  },
8170
8144
  "capabilities": [
8171
8145
  "streaming",
8146
+ "function_calling",
8147
+ "structured_output",
8172
8148
  "reasoning"
8173
8149
  ],
8174
8150
  "pricing": {
8175
8151
  "text_tokens": {
8176
8152
  "standard": {
8177
- "input_per_million": 0.5,
8178
- "output_per_million": 1.5
8153
+ "input_per_million": 1.25,
8154
+ "output_per_million": 10.0,
8155
+ "cached_input_per_million": 0.125
8179
8156
  }
8180
8157
  }
8181
8158
  },
@@ -8188,14 +8165,16 @@
8188
8165
  "id": "gpt-5-nano",
8189
8166
  "name": "GPT-5 Nano",
8190
8167
  "provider": "openai",
8191
- "family": "other",
8168
+ "family": "gpt5",
8192
8169
  "created_at": "2025-08-05 22:39:44 +0200",
8193
- "context_window": 4096,
8194
- "max_output_tokens": 16384,
8170
+ "context_window": 128000,
8171
+ "max_output_tokens": 400000,
8195
8172
  "knowledge_cutoff": null,
8196
8173
  "modalities": {
8197
8174
  "input": [
8198
- "text"
8175
+ "text",
8176
+ "image",
8177
+ "pdf"
8199
8178
  ],
8200
8179
  "output": [
8201
8180
  "text"
@@ -8203,13 +8182,16 @@
8203
8182
  },
8204
8183
  "capabilities": [
8205
8184
  "streaming",
8185
+ "function_calling",
8186
+ "structured_output",
8206
8187
  "reasoning"
8207
8188
  ],
8208
8189
  "pricing": {
8209
8190
  "text_tokens": {
8210
8191
  "standard": {
8211
- "input_per_million": 0.5,
8212
- "output_per_million": 1.5
8192
+ "input_per_million": 1.25,
8193
+ "output_per_million": 10.0,
8194
+ "cached_input_per_million": 0.125
8213
8195
  }
8214
8196
  }
8215
8197
  },
@@ -8222,14 +8204,16 @@
8222
8204
  "id": "gpt-5-nano-2025-08-07",
8223
8205
  "name": "GPT-5 Nano 20250807",
8224
8206
  "provider": "openai",
8225
- "family": "other",
8207
+ "family": "gpt5",
8226
8208
  "created_at": "2025-08-05 22:38:23 +0200",
8227
- "context_window": 4096,
8228
- "max_output_tokens": 16384,
8209
+ "context_window": 128000,
8210
+ "max_output_tokens": 400000,
8229
8211
  "knowledge_cutoff": null,
8230
8212
  "modalities": {
8231
8213
  "input": [
8232
- "text"
8214
+ "text",
8215
+ "image",
8216
+ "pdf"
8233
8217
  ],
8234
8218
  "output": [
8235
8219
  "text"
@@ -8237,13 +8221,16 @@
8237
8221
  },
8238
8222
  "capabilities": [
8239
8223
  "streaming",
8224
+ "function_calling",
8225
+ "structured_output",
8240
8226
  "reasoning"
8241
8227
  ],
8242
8228
  "pricing": {
8243
8229
  "text_tokens": {
8244
8230
  "standard": {
8245
- "input_per_million": 0.5,
8246
- "output_per_million": 1.5
8231
+ "input_per_million": 1.25,
8232
+ "output_per_million": 10.0,
8233
+ "cached_input_per_million": 0.125
8247
8234
  }
8248
8235
  }
8249
8236
  },
@@ -9815,8 +9802,8 @@
9815
9802
  "provider": "openrouter",
9816
9803
  "family": "alfredpros",
9817
9804
  "created_at": "2025-04-14 16:44:34 +0200",
9818
- "context_window": 8192,
9819
- "max_output_tokens": 8192,
9805
+ "context_window": 4096,
9806
+ "max_output_tokens": 4096,
9820
9807
  "knowledge_cutoff": null,
9821
9808
  "modalities": {
9822
9809
  "input": [
@@ -9827,14 +9814,13 @@
9827
9814
  ]
9828
9815
  },
9829
9816
  "capabilities": [
9830
- "streaming",
9831
- "predicted_outputs"
9817
+ "streaming"
9832
9818
  ],
9833
9819
  "pricing": {
9834
9820
  "text_tokens": {
9835
9821
  "standard": {
9836
- "input_per_million": 0.7,
9837
- "output_per_million": 1.1
9822
+ "input_per_million": 0.7999999999999999,
9823
+ "output_per_million": 1.2
9838
9824
  }
9839
9825
  }
9840
9826
  },
@@ -9852,14 +9838,13 @@
9852
9838
  "instruct_type": "alpaca"
9853
9839
  },
9854
9840
  "top_provider": {
9855
- "context_length": 8192,
9856
- "max_completion_tokens": 8192,
9841
+ "context_length": 4096,
9842
+ "max_completion_tokens": 4096,
9857
9843
  "is_moderated": false
9858
9844
  },
9859
9845
  "per_request_limits": null,
9860
9846
  "supported_parameters": [
9861
9847
  "frequency_penalty",
9862
- "logit_bias",
9863
9848
  "max_tokens",
9864
9849
  "min_p",
9865
9850
  "presence_penalty",
@@ -9873,13 +9858,13 @@
9873
9858
  }
9874
9859
  },
9875
9860
  {
9876
- "id": "alpindale/goliath-120b",
9877
- "name": "Goliath 120B",
9861
+ "id": "alibaba/tongyi-deepresearch-30b-a3b",
9862
+ "name": "Tongyi DeepResearch 30B A3B",
9878
9863
  "provider": "openrouter",
9879
- "family": "alpindale",
9880
- "created_at": "2023-11-10 01:00:00 +0100",
9881
- "context_window": 6144,
9882
- "max_output_tokens": 512,
9864
+ "family": "alibaba",
9865
+ "created_at": "2025-09-18 17:53:24 +0200",
9866
+ "context_window": 131072,
9867
+ "max_output_tokens": 131072,
9883
9868
  "knowledge_cutoff": null,
9884
9869
  "modalities": {
9885
9870
  "input": [
@@ -9891,19 +9876,19 @@
9891
9876
  },
9892
9877
  "capabilities": [
9893
9878
  "streaming",
9894
- "structured_output",
9895
- "predicted_outputs"
9879
+ "function_calling",
9880
+ "structured_output"
9896
9881
  ],
9897
9882
  "pricing": {
9898
9883
  "text_tokens": {
9899
9884
  "standard": {
9900
- "input_per_million": 4.0,
9901
- "output_per_million": 5.5
9885
+ "input_per_million": 0.09,
9886
+ "output_per_million": 0.44999999999999996
9902
9887
  }
9903
9888
  }
9904
9889
  },
9905
9890
  "metadata": {
9906
- "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
9891
+ "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.",
9907
9892
  "architecture": {
9908
9893
  "modality": "text->text",
9909
9894
  "input_modalities": [
@@ -9912,41 +9897,36 @@
9912
9897
  "output_modalities": [
9913
9898
  "text"
9914
9899
  ],
9915
- "tokenizer": "Llama2",
9916
- "instruct_type": "airoboros"
9900
+ "tokenizer": "Other",
9901
+ "instruct_type": null
9917
9902
  },
9918
9903
  "top_provider": {
9919
- "context_length": 6144,
9920
- "max_completion_tokens": 512,
9904
+ "context_length": 131072,
9905
+ "max_completion_tokens": 131072,
9921
9906
  "is_moderated": false
9922
9907
  },
9923
9908
  "per_request_limits": null,
9924
9909
  "supported_parameters": [
9925
- "frequency_penalty",
9926
- "logit_bias",
9910
+ "include_reasoning",
9927
9911
  "max_tokens",
9928
- "min_p",
9929
- "presence_penalty",
9930
- "repetition_penalty",
9912
+ "reasoning",
9931
9913
  "response_format",
9932
- "seed",
9933
- "stop",
9934
9914
  "structured_outputs",
9935
9915
  "temperature",
9936
- "top_a",
9937
- "top_k",
9916
+ "tool_choice",
9917
+ "tools",
9938
9918
  "top_p"
9939
9919
  ]
9940
9920
  }
9941
9921
  },
9942
9922
  {
9943
- "id": "amazon/nova-lite-v1",
9944
- "name": "Amazon: Nova Lite 1.0",
9923
+ "id": "allenai/molmo-7b-d",
9924
+ "name": "AllenAI: Molmo 7B D",
9945
9925
  "provider": "openrouter",
9946
- "family": "amazon",
9947
- "created_at": "2024-12-05 23:22:43 +0100",
9948
- "context_window": 300000,
9949
- "max_output_tokens": 5120,
9926
+ "family": "allenai",
9927
+ "created_at": "2025-03-26 22:07:27 +0100",
9928
+ "context_window": 4096,
9929
+ "max_output_tokens": 4096,
9950
9930
  "knowledge_cutoff": null,
9951
9931
  "modalities": {
9952
9932
  "input": [
@@ -9959,18 +9939,18 @@
9959
9939
  },
9960
9940
  "capabilities": [
9961
9941
  "streaming",
9962
- "function_calling"
9942
+ "predicted_outputs"
9963
9943
  ],
9964
9944
  "pricing": {
9965
9945
  "text_tokens": {
9966
9946
  "standard": {
9967
- "input_per_million": 0.06,
9968
- "output_per_million": 0.24
9947
+ "input_per_million": 0.09999999999999999,
9948
+ "output_per_million": 0.19999999999999998
9969
9949
  }
9970
9950
  }
9971
9951
  },
9972
9952
  "metadata": {
9973
- "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite can handle real-time customer interactions, document analysis, and visual question-answering tasks with high accuracy.\n\nWith an input context of 300K tokens, it can analyze multiple images or up to 30 minutes of video in a single input.",
9953
+ "description": "Molmo is a family of open vision-language models developed by the Allen Institute for AI. Molmo models are trained on PixMo, a dataset of 1 million, highly-curated image-text pairs. It has state-of-the-art performance among multimodal models with a similar size while being fully open-source. You can find all models in the Molmo family [here](https://huggingface.co/collections/allenai/molmo-66f379e6fe3b8ef090a8ca19). Learn more about the Molmo family [in the announcement blog post](https://molmo.allenai.org/blog) or the [paper](https://huggingface.co/papers/2409.17146).\n\nMolmo 7B-D is based on [Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) and uses [OpenAI CLIP](https://huggingface.co/openai/clip-vit-large-patch14-336) as vision backbone. It performs comfortably between GPT-4V and GPT-4o on both academic benchmarks and human evaluation.\n\nThis checkpoint is a preview of the Molmo release. All artifacts used in creating Molmo (PixMo dataset, training code, evaluations, intermediate checkpoints) will be made available at a later date, furthering our commitment to open-source AI development and reproducibility.",
9974
9954
  "architecture": {
9975
9955
  "modality": "text+image->text",
9976
9956
  "input_modalities": [
@@ -9980,33 +9960,38 @@
9980
9960
  "output_modalities": [
9981
9961
  "text"
9982
9962
  ],
9983
- "tokenizer": "Nova",
9963
+ "tokenizer": "Other",
9984
9964
  "instruct_type": null
9985
9965
  },
9986
9966
  "top_provider": {
9987
- "context_length": 300000,
9988
- "max_completion_tokens": 5120,
9989
- "is_moderated": true
9967
+ "context_length": 4096,
9968
+ "max_completion_tokens": 4096,
9969
+ "is_moderated": false
9990
9970
  },
9991
9971
  "per_request_limits": null,
9992
9972
  "supported_parameters": [
9973
+ "frequency_penalty",
9974
+ "logit_bias",
9993
9975
  "max_tokens",
9976
+ "min_p",
9977
+ "presence_penalty",
9978
+ "repetition_penalty",
9979
+ "seed",
9994
9980
  "stop",
9995
9981
  "temperature",
9996
- "tools",
9997
9982
  "top_k",
9998
9983
  "top_p"
9999
9984
  ]
10000
9985
  }
10001
9986
  },
10002
9987
  {
10003
- "id": "amazon/nova-micro-v1",
10004
- "name": "Amazon: Nova Micro 1.0",
9988
+ "id": "allenai/olmo-2-0325-32b-instruct",
9989
+ "name": "AllenAI: Olmo 2 32B Instruct",
10005
9990
  "provider": "openrouter",
10006
- "family": "amazon",
10007
- "created_at": "2024-12-05 23:20:37 +0100",
10008
- "context_window": 128000,
10009
- "max_output_tokens": 5120,
9991
+ "family": "allenai",
9992
+ "created_at": "2025-03-14 22:42:36 +0100",
9993
+ "context_window": 4096,
9994
+ "max_output_tokens": 4096,
10010
9995
  "knowledge_cutoff": null,
10011
9996
  "modalities": {
10012
9997
  "input": [
@@ -10018,18 +10003,18 @@
10018
10003
  },
10019
10004
  "capabilities": [
10020
10005
  "streaming",
10021
- "function_calling"
10006
+ "predicted_outputs"
10022
10007
  ],
10023
10008
  "pricing": {
10024
10009
  "text_tokens": {
10025
10010
  "standard": {
10026
- "input_per_million": 0.035,
10027
- "output_per_million": 0.14
10011
+ "input_per_million": 1.0,
10012
+ "output_per_million": 1.5
10028
10013
  }
10029
10014
  }
10030
10015
  },
10031
10016
  "metadata": {
10032
- "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length of 128K tokens and optimized for speed and cost, Amazon Nova Micro excels at tasks such as text summarization, translation, content classification, interactive chat, and brainstorming. It has simple mathematical reasoning and coding abilities.",
10017
+ "description": "OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model. It excels in complex reasoning and instruction-following tasks across diverse benchmarks such as GSM8K, MATH, IFEval, and general NLP evaluation. Developed by AI2, OLMo-2 32B is part of an open, research-oriented initiative, trained primarily on English-language datasets to advance the understanding and development of open-source language models.",
10033
10018
  "architecture": {
10034
10019
  "modality": "text->text",
10035
10020
  "input_modalities": [
@@ -10038,38 +10023,42 @@
10038
10023
  "output_modalities": [
10039
10024
  "text"
10040
10025
  ],
10041
- "tokenizer": "Nova",
10026
+ "tokenizer": "Other",
10042
10027
  "instruct_type": null
10043
10028
  },
10044
10029
  "top_provider": {
10045
- "context_length": 128000,
10046
- "max_completion_tokens": 5120,
10047
- "is_moderated": true
10030
+ "context_length": 4096,
10031
+ "max_completion_tokens": 4096,
10032
+ "is_moderated": false
10048
10033
  },
10049
10034
  "per_request_limits": null,
10050
10035
  "supported_parameters": [
10036
+ "frequency_penalty",
10037
+ "logit_bias",
10051
10038
  "max_tokens",
10039
+ "min_p",
10040
+ "presence_penalty",
10041
+ "repetition_penalty",
10042
+ "seed",
10052
10043
  "stop",
10053
10044
  "temperature",
10054
- "tools",
10055
10045
  "top_k",
10056
10046
  "top_p"
10057
10047
  ]
10058
10048
  }
10059
10049
  },
10060
10050
  {
10061
- "id": "amazon/nova-pro-v1",
10062
- "name": "Amazon: Nova Pro 1.0",
10051
+ "id": "alpindale/goliath-120b",
10052
+ "name": "Goliath 120B",
10063
10053
  "provider": "openrouter",
10064
- "family": "amazon",
10065
- "created_at": "2024-12-05 23:05:03 +0100",
10066
- "context_window": 300000,
10067
- "max_output_tokens": 5120,
10054
+ "family": "alpindale",
10055
+ "created_at": "2023-11-10 01:00:00 +0100",
10056
+ "context_window": 6144,
10057
+ "max_output_tokens": 512,
10068
10058
  "knowledge_cutoff": null,
10069
10059
  "modalities": {
10070
10060
  "input": [
10071
- "text",
10072
- "image"
10061
+ "text"
10073
10062
  ],
10074
10063
  "output": [
10075
10064
  "text"
@@ -10077,52 +10066,238 @@
10077
10066
  },
10078
10067
  "capabilities": [
10079
10068
  "streaming",
10080
- "function_calling"
10069
+ "structured_output",
10070
+ "predicted_outputs"
10081
10071
  ],
10082
10072
  "pricing": {
10083
10073
  "text_tokens": {
10084
10074
  "standard": {
10085
- "input_per_million": 0.7999999999999999,
10086
- "output_per_million": 3.1999999999999997
10075
+ "input_per_million": 4.0,
10076
+ "output_per_million": 5.5
10087
10077
  }
10088
10078
  }
10089
10079
  },
10090
10080
  "metadata": {
10091
- "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December 2024, it achieves state-of-the-art performance on key benchmarks including visual question answering (TextVQA) and video understanding (VATEX).\n\nAmazon Nova Pro demonstrates strong capabilities in processing both visual and textual information and at analyzing financial documents.\n\n**NOTE**: Video input is not supported at this time.",
10081
+ "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
10092
10082
  "architecture": {
10093
- "modality": "text+image->text",
10083
+ "modality": "text->text",
10094
10084
  "input_modalities": [
10095
- "text",
10096
- "image"
10085
+ "text"
10097
10086
  ],
10098
10087
  "output_modalities": [
10099
10088
  "text"
10100
10089
  ],
10101
- "tokenizer": "Nova",
10102
- "instruct_type": null
10090
+ "tokenizer": "Llama2",
10091
+ "instruct_type": "airoboros"
10103
10092
  },
10104
10093
  "top_provider": {
10105
- "context_length": 300000,
10106
- "max_completion_tokens": 5120,
10107
- "is_moderated": true
10094
+ "context_length": 6144,
10095
+ "max_completion_tokens": 512,
10096
+ "is_moderated": false
10108
10097
  },
10109
10098
  "per_request_limits": null,
10110
10099
  "supported_parameters": [
10100
+ "frequency_penalty",
10101
+ "logit_bias",
10111
10102
  "max_tokens",
10103
+ "min_p",
10104
+ "presence_penalty",
10105
+ "repetition_penalty",
10106
+ "response_format",
10107
+ "seed",
10112
10108
  "stop",
10109
+ "structured_outputs",
10113
10110
  "temperature",
10114
- "tools",
10111
+ "top_a",
10115
10112
  "top_k",
10116
10113
  "top_p"
10117
10114
  ]
10118
10115
  }
10119
10116
  },
10120
10117
  {
10121
- "id": "anthracite-org/magnum-v2-72b",
10122
- "name": "Magnum v2 72B",
10118
+ "id": "amazon/nova-lite-v1",
10119
+ "name": "Amazon: Nova Lite 1.0",
10123
10120
  "provider": "openrouter",
10124
- "family": "anthracite-org",
10125
- "created_at": "2024-09-30 02:00:00 +0200",
10121
+ "family": "amazon",
10122
+ "created_at": "2024-12-05 23:22:43 +0100",
10123
+ "context_window": 300000,
10124
+ "max_output_tokens": 5120,
10125
+ "knowledge_cutoff": null,
10126
+ "modalities": {
10127
+ "input": [
10128
+ "text",
10129
+ "image"
10130
+ ],
10131
+ "output": [
10132
+ "text"
10133
+ ]
10134
+ },
10135
+ "capabilities": [
10136
+ "streaming",
10137
+ "function_calling"
10138
+ ],
10139
+ "pricing": {
10140
+ "text_tokens": {
10141
+ "standard": {
10142
+ "input_per_million": 0.06,
10143
+ "output_per_million": 0.24
10144
+ }
10145
+ }
10146
+ },
10147
+ "metadata": {
10148
+ "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite can handle real-time customer interactions, document analysis, and visual question-answering tasks with high accuracy.\n\nWith an input context of 300K tokens, it can analyze multiple images or up to 30 minutes of video in a single input.",
10149
+ "architecture": {
10150
+ "modality": "text+image->text",
10151
+ "input_modalities": [
10152
+ "text",
10153
+ "image"
10154
+ ],
10155
+ "output_modalities": [
10156
+ "text"
10157
+ ],
10158
+ "tokenizer": "Nova",
10159
+ "instruct_type": null
10160
+ },
10161
+ "top_provider": {
10162
+ "context_length": 300000,
10163
+ "max_completion_tokens": 5120,
10164
+ "is_moderated": true
10165
+ },
10166
+ "per_request_limits": null,
10167
+ "supported_parameters": [
10168
+ "max_tokens",
10169
+ "stop",
10170
+ "temperature",
10171
+ "tools",
10172
+ "top_k",
10173
+ "top_p"
10174
+ ]
10175
+ }
10176
+ },
10177
+ {
10178
+ "id": "amazon/nova-micro-v1",
10179
+ "name": "Amazon: Nova Micro 1.0",
10180
+ "provider": "openrouter",
10181
+ "family": "amazon",
10182
+ "created_at": "2024-12-05 23:20:37 +0100",
10183
+ "context_window": 128000,
10184
+ "max_output_tokens": 5120,
10185
+ "knowledge_cutoff": null,
10186
+ "modalities": {
10187
+ "input": [
10188
+ "text"
10189
+ ],
10190
+ "output": [
10191
+ "text"
10192
+ ]
10193
+ },
10194
+ "capabilities": [
10195
+ "streaming",
10196
+ "function_calling"
10197
+ ],
10198
+ "pricing": {
10199
+ "text_tokens": {
10200
+ "standard": {
10201
+ "input_per_million": 0.035,
10202
+ "output_per_million": 0.14
10203
+ }
10204
+ }
10205
+ },
10206
+ "metadata": {
10207
+ "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length of 128K tokens and optimized for speed and cost, Amazon Nova Micro excels at tasks such as text summarization, translation, content classification, interactive chat, and brainstorming. It has simple mathematical reasoning and coding abilities.",
10208
+ "architecture": {
10209
+ "modality": "text->text",
10210
+ "input_modalities": [
10211
+ "text"
10212
+ ],
10213
+ "output_modalities": [
10214
+ "text"
10215
+ ],
10216
+ "tokenizer": "Nova",
10217
+ "instruct_type": null
10218
+ },
10219
+ "top_provider": {
10220
+ "context_length": 128000,
10221
+ "max_completion_tokens": 5120,
10222
+ "is_moderated": true
10223
+ },
10224
+ "per_request_limits": null,
10225
+ "supported_parameters": [
10226
+ "max_tokens",
10227
+ "stop",
10228
+ "temperature",
10229
+ "tools",
10230
+ "top_k",
10231
+ "top_p"
10232
+ ]
10233
+ }
10234
+ },
10235
+ {
10236
+ "id": "amazon/nova-pro-v1",
10237
+ "name": "Amazon: Nova Pro 1.0",
10238
+ "provider": "openrouter",
10239
+ "family": "amazon",
10240
+ "created_at": "2024-12-05 23:05:03 +0100",
10241
+ "context_window": 300000,
10242
+ "max_output_tokens": 5120,
10243
+ "knowledge_cutoff": null,
10244
+ "modalities": {
10245
+ "input": [
10246
+ "text",
10247
+ "image"
10248
+ ],
10249
+ "output": [
10250
+ "text"
10251
+ ]
10252
+ },
10253
+ "capabilities": [
10254
+ "streaming",
10255
+ "function_calling"
10256
+ ],
10257
+ "pricing": {
10258
+ "text_tokens": {
10259
+ "standard": {
10260
+ "input_per_million": 0.7999999999999999,
10261
+ "output_per_million": 3.1999999999999997
10262
+ }
10263
+ }
10264
+ },
10265
+ "metadata": {
10266
+ "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December 2024, it achieves state-of-the-art performance on key benchmarks including visual question answering (TextVQA) and video understanding (VATEX).\n\nAmazon Nova Pro demonstrates strong capabilities in processing both visual and textual information and at analyzing financial documents.\n\n**NOTE**: Video input is not supported at this time.",
10267
+ "architecture": {
10268
+ "modality": "text+image->text",
10269
+ "input_modalities": [
10270
+ "text",
10271
+ "image"
10272
+ ],
10273
+ "output_modalities": [
10274
+ "text"
10275
+ ],
10276
+ "tokenizer": "Nova",
10277
+ "instruct_type": null
10278
+ },
10279
+ "top_provider": {
10280
+ "context_length": 300000,
10281
+ "max_completion_tokens": 5120,
10282
+ "is_moderated": true
10283
+ },
10284
+ "per_request_limits": null,
10285
+ "supported_parameters": [
10286
+ "max_tokens",
10287
+ "stop",
10288
+ "temperature",
10289
+ "tools",
10290
+ "top_k",
10291
+ "top_p"
10292
+ ]
10293
+ }
10294
+ },
10295
+ {
10296
+ "id": "anthracite-org/magnum-v2-72b",
10297
+ "name": "Magnum v2 72B",
10298
+ "provider": "openrouter",
10299
+ "family": "anthracite-org",
10300
+ "created_at": "2024-09-30 02:00:00 +0200",
10126
10301
  "context_window": 32768,
10127
10302
  "max_output_tokens": null,
10128
10303
  "knowledge_cutoff": null,
@@ -10802,7 +10977,7 @@
10802
10977
  "top_provider": {
10803
10978
  "context_length": 200000,
10804
10979
  "max_completion_tokens": 32000,
10805
- "is_moderated": false
10980
+ "is_moderated": true
10806
10981
  },
10807
10982
  "per_request_limits": null,
10808
10983
  "supported_parameters": [
@@ -10868,7 +11043,7 @@
10868
11043
  "top_provider": {
10869
11044
  "context_length": 200000,
10870
11045
  "max_completion_tokens": 32000,
10871
- "is_moderated": false
11046
+ "is_moderated": true
10872
11047
  },
10873
11048
  "per_request_limits": null,
10874
11049
  "supported_parameters": [
@@ -10878,7 +11053,9 @@
10878
11053
  "stop",
10879
11054
  "temperature",
10880
11055
  "tool_choice",
10881
- "tools"
11056
+ "tools",
11057
+ "top_k",
11058
+ "top_p"
10882
11059
  ]
10883
11060
  }
10884
11061
  },
@@ -10948,6 +11125,71 @@
10948
11125
  ]
10949
11126
  }
10950
11127
  },
11128
+ {
11129
+ "id": "arcee-ai/afm-4.5b",
11130
+ "name": "Arcee AI: AFM 4.5B",
11131
+ "provider": "openrouter",
11132
+ "family": "arcee-ai",
11133
+ "created_at": "2025-09-16 18:34:44 +0200",
11134
+ "context_window": 65536,
11135
+ "max_output_tokens": null,
11136
+ "knowledge_cutoff": null,
11137
+ "modalities": {
11138
+ "input": [
11139
+ "text"
11140
+ ],
11141
+ "output": [
11142
+ "text"
11143
+ ]
11144
+ },
11145
+ "capabilities": [
11146
+ "streaming",
11147
+ "structured_output",
11148
+ "predicted_outputs"
11149
+ ],
11150
+ "pricing": {
11151
+ "text_tokens": {
11152
+ "standard": {
11153
+ "input_per_million": 0.09999999999999999,
11154
+ "output_per_million": 0.39999999999999997
11155
+ }
11156
+ }
11157
+ },
11158
+ "metadata": {
11159
+ "description": "AFM-4.5B is a 4.5 billion parameter instruction-tuned language model developed by Arcee AI. The model was pretrained on approximately 8 trillion tokens, including 6.5 trillion tokens of general data and 1.5 trillion tokens with an emphasis on mathematical reasoning and code generation. ",
11160
+ "architecture": {
11161
+ "modality": "text->text",
11162
+ "input_modalities": [
11163
+ "text"
11164
+ ],
11165
+ "output_modalities": [
11166
+ "text"
11167
+ ],
11168
+ "tokenizer": "Other",
11169
+ "instruct_type": null
11170
+ },
11171
+ "top_provider": {
11172
+ "context_length": 65536,
11173
+ "max_completion_tokens": null,
11174
+ "is_moderated": false
11175
+ },
11176
+ "per_request_limits": null,
11177
+ "supported_parameters": [
11178
+ "frequency_penalty",
11179
+ "logit_bias",
11180
+ "max_tokens",
11181
+ "min_p",
11182
+ "presence_penalty",
11183
+ "repetition_penalty",
11184
+ "response_format",
11185
+ "stop",
11186
+ "structured_outputs",
11187
+ "temperature",
11188
+ "top_k",
11189
+ "top_p"
11190
+ ]
11191
+ }
11192
+ },
10951
11193
  {
10952
11194
  "id": "arcee-ai/coder-large",
10953
11195
  "name": "Arcee AI: Coder Large",
@@ -11225,8 +11467,8 @@
11225
11467
  "pricing": {
11226
11468
  "text_tokens": {
11227
11469
  "standard": {
11228
- "input_per_million": 0.01,
11229
- "output_per_million": 0.0400032
11470
+ "input_per_million": 0.02,
11471
+ "output_per_million": 0.07
11230
11472
  }
11231
11473
  }
11232
11474
  },
@@ -11450,6 +11692,7 @@
11450
11692
  "response_format",
11451
11693
  "seed",
11452
11694
  "stop",
11695
+ "structured_outputs",
11453
11696
  "temperature",
11454
11697
  "top_k",
11455
11698
  "top_p"
@@ -11614,8 +11857,8 @@
11614
11857
  "pricing": {
11615
11858
  "text_tokens": {
11616
11859
  "standard": {
11617
- "input_per_million": 0.10366159999999999,
11618
- "output_per_million": 0.414848
11860
+ "input_per_million": 0.16,
11861
+ "output_per_million": 0.65
11619
11862
  }
11620
11863
  }
11621
11864
  },
@@ -11777,71 +12020,8 @@
11777
12020
  }
11778
12021
  },
11779
12022
  {
11780
- "id": "cognitivecomputations/dolphin-mixtral-8x22b",
11781
- "name": "Dolphin 2.9.2 Mixtral 8x22B 🐬",
11782
- "provider": "openrouter",
11783
- "family": "cognitivecomputations",
11784
- "created_at": "2024-06-08 02:00:00 +0200",
11785
- "context_window": 16000,
11786
- "max_output_tokens": 8192,
11787
- "knowledge_cutoff": null,
11788
- "modalities": {
11789
- "input": [
11790
- "text"
11791
- ],
11792
- "output": [
11793
- "text"
11794
- ]
11795
- },
11796
- "capabilities": [
11797
- "streaming",
11798
- "predicted_outputs"
11799
- ],
11800
- "pricing": {
11801
- "text_tokens": {
11802
- "standard": {
11803
- "input_per_million": 0.8999999999999999,
11804
- "output_per_million": 0.8999999999999999
11805
- }
11806
- }
11807
- },
11808
- "metadata": {
11809
- "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/models/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.\n\nThis model is a successor to [Dolphin Mixtral 8x7B](/models/cognitivecomputations/dolphin-mixtral-8x7b).\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored",
11810
- "architecture": {
11811
- "modality": "text->text",
11812
- "input_modalities": [
11813
- "text"
11814
- ],
11815
- "output_modalities": [
11816
- "text"
11817
- ],
11818
- "tokenizer": "Mistral",
11819
- "instruct_type": "chatml"
11820
- },
11821
- "top_provider": {
11822
- "context_length": 16000,
11823
- "max_completion_tokens": 8192,
11824
- "is_moderated": false
11825
- },
11826
- "per_request_limits": null,
11827
- "supported_parameters": [
11828
- "frequency_penalty",
11829
- "logit_bias",
11830
- "max_tokens",
11831
- "min_p",
11832
- "presence_penalty",
11833
- "repetition_penalty",
11834
- "seed",
11835
- "stop",
11836
- "temperature",
11837
- "top_k",
11838
- "top_p"
11839
- ]
11840
- }
11841
- },
11842
- {
11843
- "id": "cognitivecomputations/dolphin3.0-mistral-24b",
11844
- "name": "Dolphin3.0 Mistral 24B",
12023
+ "id": "cognitivecomputations/dolphin3.0-mistral-24b",
12024
+ "name": "Dolphin3.0 Mistral 24B",
11845
12025
  "provider": "openrouter",
11846
12026
  "family": "cognitivecomputations",
11847
12027
  "created_at": "2025-02-13 16:53:39 +0100",
@@ -11863,8 +12043,8 @@
11863
12043
  "pricing": {
11864
12044
  "text_tokens": {
11865
12045
  "standard": {
11866
- "input_per_million": 0.037022,
11867
- "output_per_million": 0.14816
12046
+ "input_per_million": 0.03,
12047
+ "output_per_million": 0.11
11868
12048
  }
11869
12049
  }
11870
12050
  },
@@ -11987,7 +12167,7 @@
11987
12167
  "text_tokens": {
11988
12168
  "standard": {
11989
12169
  "input_per_million": 0.01,
11990
- "output_per_million": 0.0340768
12170
+ "output_per_million": 0.03
11991
12171
  }
11992
12172
  }
11993
12173
  },
@@ -12818,8 +12998,8 @@
12818
12998
  "pricing": {
12819
12999
  "text_tokens": {
12820
13000
  "standard": {
12821
- "input_per_million": 0.1999188,
12822
- "output_per_million": 0.800064
13001
+ "input_per_million": 0.24999987999999998,
13002
+ "output_per_million": 0.999999888
12823
13003
  }
12824
13004
  }
12825
13005
  },
@@ -12889,8 +13069,8 @@
12889
13069
  "pricing": {
12890
13070
  "text_tokens": {
12891
13071
  "standard": {
12892
- "input_per_million": 0.1999188,
12893
- "output_per_million": 0.800064
13072
+ "input_per_million": 0.24999987999999998,
13073
+ "output_per_million": 0.999999888
12894
13074
  }
12895
13075
  }
12896
13076
  },
@@ -13021,8 +13201,8 @@
13021
13201
  "pricing": {
13022
13202
  "text_tokens": {
13023
13203
  "standard": {
13024
- "input_per_million": 0.19999999999999998,
13025
- "output_per_million": 0.7999999999999999
13204
+ "input_per_million": 0.24999987999999998,
13205
+ "output_per_million": 0.999999888
13026
13206
  }
13027
13207
  }
13028
13208
  },
@@ -13074,7 +13254,7 @@
13074
13254
  "provider": "openrouter",
13075
13255
  "family": "deepseek",
13076
13256
  "created_at": "2025-08-21 14:33:48 +0200",
13077
- "context_window": 64000,
13257
+ "context_window": 163840,
13078
13258
  "max_output_tokens": null,
13079
13259
  "knowledge_cutoff": null,
13080
13260
  "modalities": {
@@ -13105,9 +13285,9 @@
13105
13285
  "instruct_type": "deepseek-v3.1"
13106
13286
  },
13107
13287
  "top_provider": {
13108
- "context_length": 64000,
13288
+ "context_length": 163840,
13109
13289
  "max_completion_tokens": null,
13110
- "is_moderated": true
13290
+ "is_moderated": false
13111
13291
  },
13112
13292
  "per_request_limits": null,
13113
13293
  "supported_parameters": [
@@ -13291,8 +13471,8 @@
13291
13471
  "pricing": {
13292
13472
  "text_tokens": {
13293
13473
  "standard": {
13294
- "input_per_million": 0.1999188,
13295
- "output_per_million": 0.800064
13474
+ "input_per_million": 0.39999999999999997,
13475
+ "output_per_million": 1.75
13296
13476
  }
13297
13477
  }
13298
13478
  },
@@ -13362,8 +13542,8 @@
13362
13542
  "pricing": {
13363
13543
  "text_tokens": {
13364
13544
  "standard": {
13365
- "input_per_million": 0.01703012,
13366
- "output_per_million": 0.0681536
13545
+ "input_per_million": 0.01,
13546
+ "output_per_million": 0.049999999999999996
13367
13547
  }
13368
13548
  }
13369
13549
  },
@@ -13551,8 +13731,8 @@
13551
13731
  "pricing": {
13552
13732
  "text_tokens": {
13553
13733
  "standard": {
13554
- "input_per_million": 0.025915399999999998,
13555
- "output_per_million": 0.103712
13734
+ "input_per_million": 0.03,
13735
+ "output_per_million": 0.13
13556
13736
  }
13557
13737
  }
13558
13738
  },
@@ -13728,8 +13908,8 @@
13728
13908
  "provider": "openrouter",
13729
13909
  "family": "deepseek",
13730
13910
  "created_at": "2025-01-30 00:39:00 +0100",
13731
- "context_window": 64000,
13732
- "max_output_tokens": 32000,
13911
+ "context_window": 32768,
13912
+ "max_output_tokens": 16384,
13733
13913
  "knowledge_cutoff": null,
13734
13914
  "modalities": {
13735
13915
  "input": [
@@ -13765,66 +13945,8 @@
13765
13945
  "instruct_type": "deepseek-r1"
13766
13946
  },
13767
13947
  "top_provider": {
13768
- "context_length": 64000,
13769
- "max_completion_tokens": 32000,
13770
- "is_moderated": false
13771
- },
13772
- "per_request_limits": null,
13773
- "supported_parameters": [
13774
- "frequency_penalty",
13775
- "include_reasoning",
13776
- "logit_bias",
13777
- "max_tokens",
13778
- "min_p",
13779
- "presence_penalty",
13780
- "reasoning",
13781
- "repetition_penalty",
13782
- "seed",
13783
- "stop",
13784
- "temperature",
13785
- "top_k",
13786
- "top_p"
13787
- ]
13788
- }
13789
- },
13790
- {
13791
- "id": "deepseek/deepseek-r1-distill-qwen-14b:free",
13792
- "name": "DeepSeek: R1 Distill Qwen 14B (free)",
13793
- "provider": "openrouter",
13794
- "family": "deepseek",
13795
- "created_at": "2025-01-30 00:39:00 +0100",
13796
- "context_window": 64000,
13797
- "max_output_tokens": null,
13798
- "knowledge_cutoff": null,
13799
- "modalities": {
13800
- "input": [
13801
- "text"
13802
- ],
13803
- "output": [
13804
- "text"
13805
- ]
13806
- },
13807
- "capabilities": [
13808
- "streaming",
13809
- "predicted_outputs"
13810
- ],
13811
- "pricing": {},
13812
- "metadata": {
13813
- "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 69.7\n- MATH-500 pass@1: 93.9\n- CodeForces Rating: 1481\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.",
13814
- "architecture": {
13815
- "modality": "text->text",
13816
- "input_modalities": [
13817
- "text"
13818
- ],
13819
- "output_modalities": [
13820
- "text"
13821
- ],
13822
- "tokenizer": "Qwen",
13823
- "instruct_type": "deepseek-r1"
13824
- },
13825
- "top_provider": {
13826
- "context_length": 64000,
13827
- "max_completion_tokens": null,
13948
+ "context_length": 32768,
13949
+ "max_completion_tokens": 16384,
13828
13950
  "is_moderated": false
13829
13951
  },
13830
13952
  "per_request_limits": null,
@@ -13832,7 +13954,6 @@
13832
13954
  "frequency_penalty",
13833
13955
  "include_reasoning",
13834
13956
  "logit_bias",
13835
- "logprobs",
13836
13957
  "max_tokens",
13837
13958
  "min_p",
13838
13959
  "presence_penalty",
@@ -13842,7 +13963,6 @@
13842
13963
  "stop",
13843
13964
  "temperature",
13844
13965
  "top_k",
13845
- "top_logprobs",
13846
13966
  "top_p"
13847
13967
  ]
13848
13968
  }
@@ -13872,8 +13992,8 @@
13872
13992
  "pricing": {
13873
13993
  "text_tokens": {
13874
13994
  "standard": {
13875
- "input_per_million": 0.075,
13876
- "output_per_million": 0.15
13995
+ "input_per_million": 0.27,
13996
+ "output_per_million": 0.27
13877
13997
  }
13878
13998
  }
13879
13999
  },
@@ -13986,8 +14106,8 @@
13986
14106
  "pricing": {
13987
14107
  "text_tokens": {
13988
14108
  "standard": {
13989
- "input_per_million": 0.19999999999999998,
13990
- "output_per_million": 0.7999999999999999
14109
+ "input_per_million": 0.24999987999999998,
14110
+ "output_per_million": 0.999999888
13991
14111
  }
13992
14112
  }
13993
14113
  },
@@ -14384,7 +14504,7 @@
14384
14504
  }
14385
14505
  },
14386
14506
  "metadata": {
14387
- "description": "Gemini 2.5 Flash Image Preview is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
14507
+ "description": "Gemini 2.5 Flash Image Preview, AKA Nano Banana is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
14388
14508
  "architecture": {
14389
14509
  "modality": "text+image->text+image",
14390
14510
  "input_modalities": [
@@ -15055,7 +15175,7 @@
15055
15175
  "text_tokens": {
15056
15176
  "standard": {
15057
15177
  "input_per_million": 0.01,
15058
- "output_per_million": 0.0100008
15178
+ "output_per_million": 0.02
15059
15179
  }
15060
15180
  }
15061
15181
  },
@@ -15180,8 +15300,8 @@
15180
15300
  "pricing": {
15181
15301
  "text_tokens": {
15182
15302
  "standard": {
15183
- "input_per_million": 0.0481286,
15184
- "output_per_million": 0.192608
15303
+ "input_per_million": 0.04,
15304
+ "output_per_million": 0.14
15185
15305
  }
15186
15306
  }
15187
15307
  },
@@ -15309,8 +15429,8 @@
15309
15429
  "pricing": {
15310
15430
  "text_tokens": {
15311
15431
  "standard": {
15312
- "input_per_million": 0.0666396,
15313
- "output_per_million": 0.26668800000000004
15432
+ "input_per_million": 0.07,
15433
+ "output_per_million": 0.26
15314
15434
  }
15315
15435
  }
15316
15436
  },
@@ -15897,69 +16017,6 @@
15897
16017
  ]
15898
16018
  }
15899
16019
  },
15900
- {
15901
- "id": "infermatic/mn-inferor-12b",
15902
- "name": "Infermatic: Mistral Nemo Inferor 12B",
15903
- "provider": "openrouter",
15904
- "family": "infermatic",
15905
- "created_at": "2024-11-13 03:20:28 +0100",
15906
- "context_window": 8192,
15907
- "max_output_tokens": 8192,
15908
- "knowledge_cutoff": null,
15909
- "modalities": {
15910
- "input": [
15911
- "text"
15912
- ],
15913
- "output": [
15914
- "text"
15915
- ]
15916
- },
15917
- "capabilities": [
15918
- "streaming",
15919
- "predicted_outputs"
15920
- ],
15921
- "pricing": {
15922
- "text_tokens": {
15923
- "standard": {
15924
- "input_per_million": 0.6,
15925
- "output_per_million": 1.0
15926
- }
15927
- }
15928
- },
15929
- "metadata": {
15930
- "description": "Inferor 12B is a merge of top roleplay models, expert on immersive narratives and storytelling.\n\nThis model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [anthracite-org/magnum-v4-12b](https://openrouter.ai/anthracite-org/magnum-v4-72b) as a base.\n",
15931
- "architecture": {
15932
- "modality": "text->text",
15933
- "input_modalities": [
15934
- "text"
15935
- ],
15936
- "output_modalities": [
15937
- "text"
15938
- ],
15939
- "tokenizer": "Mistral",
15940
- "instruct_type": "mistral"
15941
- },
15942
- "top_provider": {
15943
- "context_length": 8192,
15944
- "max_completion_tokens": 8192,
15945
- "is_moderated": false
15946
- },
15947
- "per_request_limits": null,
15948
- "supported_parameters": [
15949
- "frequency_penalty",
15950
- "logit_bias",
15951
- "max_tokens",
15952
- "min_p",
15953
- "presence_penalty",
15954
- "repetition_penalty",
15955
- "seed",
15956
- "stop",
15957
- "temperature",
15958
- "top_k",
15959
- "top_p"
15960
- ]
15961
- }
15962
- },
15963
16020
  {
15964
16021
  "id": "inflection/inflection-3-pi",
15965
16022
  "name": "Inflection: Inflection 3 Pi",
@@ -16263,13 +16320,13 @@
16263
16320
  }
16264
16321
  },
16265
16322
  {
16266
- "id": "meta-llama/llama-3-70b-instruct",
16267
- "name": "Meta: Llama 3 70B Instruct",
16323
+ "id": "meituan/longcat-flash-chat",
16324
+ "name": "Meituan: LongCat Flash Chat",
16268
16325
  "provider": "openrouter",
16269
- "family": "meta-llama",
16270
- "created_at": "2024-04-18 02:00:00 +0200",
16271
- "context_window": 8192,
16272
- "max_output_tokens": 16384,
16326
+ "family": "meituan",
16327
+ "created_at": "2025-09-09 16:20:58 +0200",
16328
+ "context_window": 131072,
16329
+ "max_output_tokens": null,
16273
16330
  "knowledge_cutoff": null,
16274
16331
  "modalities": {
16275
16332
  "input": [
@@ -16282,19 +16339,18 @@
16282
16339
  "capabilities": [
16283
16340
  "streaming",
16284
16341
  "function_calling",
16285
- "structured_output",
16286
16342
  "predicted_outputs"
16287
16343
  ],
16288
16344
  "pricing": {
16289
16345
  "text_tokens": {
16290
16346
  "standard": {
16291
- "input_per_million": 0.3,
16292
- "output_per_million": 0.39999999999999997
16347
+ "input_per_million": 0.12,
16348
+ "output_per_million": 0.6
16293
16349
  }
16294
16350
  }
16295
16351
  },
16296
16352
  "metadata": {
16297
- "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16353
+ "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input. It introduces a shortcut-connected MoE design to reduce communication overhead and achieve high throughput while maintaining training stability through advanced scaling strategies such as hyperparameter transfer, deterministic computation, and multi-stage optimization.\n\nThis release, LongCat-Flash-Chat, is a non-thinking foundation model optimized for conversational and agentic tasks. It supports long context windows up to 128K tokens and shows competitive performance across reasoning, coding, instruction following, and domain benchmarks, with particular strengths in tool use and complex multi-step interactions.",
16298
16354
  "architecture": {
16299
16355
  "modality": "text->text",
16300
16356
  "input_modalities": [
@@ -16303,12 +16359,12 @@
16303
16359
  "output_modalities": [
16304
16360
  "text"
16305
16361
  ],
16306
- "tokenizer": "Llama3",
16307
- "instruct_type": "llama3"
16362
+ "tokenizer": "Other",
16363
+ "instruct_type": null
16308
16364
  },
16309
16365
  "top_provider": {
16310
- "context_length": 8192,
16311
- "max_completion_tokens": 16384,
16366
+ "context_length": 131072,
16367
+ "max_completion_tokens": null,
16312
16368
  "is_moderated": false
16313
16369
  },
16314
16370
  "per_request_limits": null,
@@ -16320,7 +16376,6 @@
16320
16376
  "min_p",
16321
16377
  "presence_penalty",
16322
16378
  "repetition_penalty",
16323
- "response_format",
16324
16379
  "seed",
16325
16380
  "stop",
16326
16381
  "temperature",
@@ -16333,8 +16388,8 @@
16333
16388
  }
16334
16389
  },
16335
16390
  {
16336
- "id": "meta-llama/llama-3-8b-instruct",
16337
- "name": "Meta: Llama 3 8B Instruct",
16391
+ "id": "meta-llama/llama-3-70b-instruct",
16392
+ "name": "Meta: Llama 3 70B Instruct",
16338
16393
  "provider": "openrouter",
16339
16394
  "family": "meta-llama",
16340
16395
  "created_at": "2024-04-18 02:00:00 +0200",
@@ -16358,13 +16413,13 @@
16358
16413
  "pricing": {
16359
16414
  "text_tokens": {
16360
16415
  "standard": {
16361
- "input_per_million": 0.03,
16362
- "output_per_million": 0.06
16416
+ "input_per_million": 0.3,
16417
+ "output_per_million": 0.39999999999999997
16363
16418
  }
16364
16419
  }
16365
16420
  },
16366
16421
  "metadata": {
16367
- "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16422
+ "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16368
16423
  "architecture": {
16369
16424
  "modality": "text->text",
16370
16425
  "input_modalities": [
@@ -16385,6 +16440,7 @@
16385
16440
  "supported_parameters": [
16386
16441
  "frequency_penalty",
16387
16442
  "logit_bias",
16443
+ "logprobs",
16388
16444
  "max_tokens",
16389
16445
  "min_p",
16390
16446
  "presence_penalty",
@@ -16396,18 +16452,19 @@
16396
16452
  "tool_choice",
16397
16453
  "tools",
16398
16454
  "top_k",
16455
+ "top_logprobs",
16399
16456
  "top_p"
16400
16457
  ]
16401
16458
  }
16402
16459
  },
16403
16460
  {
16404
- "id": "meta-llama/llama-3.1-405b",
16405
- "name": "Meta: Llama 3.1 405B (base)",
16461
+ "id": "meta-llama/llama-3-8b-instruct",
16462
+ "name": "Meta: Llama 3 8B Instruct",
16406
16463
  "provider": "openrouter",
16407
16464
  "family": "meta-llama",
16408
- "created_at": "2024-08-02 02:00:00 +0200",
16409
- "context_window": 32768,
16410
- "max_output_tokens": null,
16465
+ "created_at": "2024-04-18 02:00:00 +0200",
16466
+ "context_window": 8192,
16467
+ "max_output_tokens": 16384,
16411
16468
  "knowledge_cutoff": null,
16412
16469
  "modalities": {
16413
16470
  "input": [
@@ -16419,18 +16476,20 @@
16419
16476
  },
16420
16477
  "capabilities": [
16421
16478
  "streaming",
16479
+ "function_calling",
16480
+ "structured_output",
16422
16481
  "predicted_outputs"
16423
16482
  ],
16424
16483
  "pricing": {
16425
16484
  "text_tokens": {
16426
16485
  "standard": {
16427
- "input_per_million": 2.0,
16428
- "output_per_million": 2.0
16486
+ "input_per_million": 0.03,
16487
+ "output_per_million": 0.06
16429
16488
  }
16430
16489
  }
16431
16490
  },
16432
16491
  "metadata": {
16433
- "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16492
+ "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16434
16493
  "architecture": {
16435
16494
  "modality": "text->text",
16436
16495
  "input_modalities": [
@@ -16440,7 +16499,73 @@
16440
16499
  "text"
16441
16500
  ],
16442
16501
  "tokenizer": "Llama3",
16443
- "instruct_type": "none"
16502
+ "instruct_type": "llama3"
16503
+ },
16504
+ "top_provider": {
16505
+ "context_length": 8192,
16506
+ "max_completion_tokens": 16384,
16507
+ "is_moderated": false
16508
+ },
16509
+ "per_request_limits": null,
16510
+ "supported_parameters": [
16511
+ "frequency_penalty",
16512
+ "logit_bias",
16513
+ "max_tokens",
16514
+ "min_p",
16515
+ "presence_penalty",
16516
+ "repetition_penalty",
16517
+ "response_format",
16518
+ "seed",
16519
+ "stop",
16520
+ "temperature",
16521
+ "tool_choice",
16522
+ "tools",
16523
+ "top_k",
16524
+ "top_p"
16525
+ ]
16526
+ }
16527
+ },
16528
+ {
16529
+ "id": "meta-llama/llama-3.1-405b",
16530
+ "name": "Meta: Llama 3.1 405B (base)",
16531
+ "provider": "openrouter",
16532
+ "family": "meta-llama",
16533
+ "created_at": "2024-08-02 02:00:00 +0200",
16534
+ "context_window": 32768,
16535
+ "max_output_tokens": null,
16536
+ "knowledge_cutoff": null,
16537
+ "modalities": {
16538
+ "input": [
16539
+ "text"
16540
+ ],
16541
+ "output": [
16542
+ "text"
16543
+ ]
16544
+ },
16545
+ "capabilities": [
16546
+ "streaming",
16547
+ "predicted_outputs"
16548
+ ],
16549
+ "pricing": {
16550
+ "text_tokens": {
16551
+ "standard": {
16552
+ "input_per_million": 2.0,
16553
+ "output_per_million": 2.0
16554
+ }
16555
+ }
16556
+ },
16557
+ "metadata": {
16558
+ "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
16559
+ "architecture": {
16560
+ "modality": "text->text",
16561
+ "input_modalities": [
16562
+ "text"
16563
+ ],
16564
+ "output_modalities": [
16565
+ "text"
16566
+ ],
16567
+ "tokenizer": "Llama3",
16568
+ "instruct_type": "none"
16444
16569
  },
16445
16570
  "top_provider": {
16446
16571
  "context_length": 32768,
@@ -16667,7 +16792,7 @@
16667
16792
  "provider": "openrouter",
16668
16793
  "family": "meta-llama",
16669
16794
  "created_at": "2024-07-23 02:00:00 +0200",
16670
- "context_window": 131072,
16795
+ "context_window": 16384,
16671
16796
  "max_output_tokens": 16384,
16672
16797
  "knowledge_cutoff": null,
16673
16798
  "modalities": {
@@ -16687,8 +16812,8 @@
16687
16812
  "pricing": {
16688
16813
  "text_tokens": {
16689
16814
  "standard": {
16690
- "input_per_million": 0.015,
16691
- "output_per_million": 0.02
16815
+ "input_per_million": 0.02,
16816
+ "output_per_million": 0.03
16692
16817
  }
16693
16818
  }
16694
16819
  },
@@ -16706,7 +16831,7 @@
16706
16831
  "instruct_type": "llama3"
16707
16832
  },
16708
16833
  "top_provider": {
16709
- "context_length": 131072,
16834
+ "context_length": 16384,
16710
16835
  "max_completion_tokens": 16384,
16711
16836
  "is_moderated": false
16712
16837
  },
@@ -16874,7 +16999,7 @@
16874
16999
  "provider": "openrouter",
16875
17000
  "family": "meta-llama",
16876
17001
  "created_at": "2024-09-25 02:00:00 +0200",
16877
- "context_window": 131072,
17002
+ "context_window": 16384,
16878
17003
  "max_output_tokens": 16384,
16879
17004
  "knowledge_cutoff": null,
16880
17005
  "modalities": {
@@ -16894,8 +17019,8 @@
16894
17019
  "pricing": {
16895
17020
  "text_tokens": {
16896
17021
  "standard": {
16897
- "input_per_million": 0.012,
16898
- "output_per_million": 0.024
17022
+ "input_per_million": 0.02,
17023
+ "output_per_million": 0.02
16899
17024
  }
16900
17025
  }
16901
17026
  },
@@ -16913,7 +17038,7 @@
16913
17038
  "instruct_type": "llama3"
16914
17039
  },
16915
17040
  "top_provider": {
16916
- "context_length": 131072,
17041
+ "context_length": 16384,
16917
17042
  "max_completion_tokens": 16384,
16918
17043
  "is_moderated": false
16919
17044
  },
@@ -17062,7 +17187,7 @@
17062
17187
  "family": "meta-llama",
17063
17188
  "created_at": "2024-12-06 18:28:57 +0100",
17064
17189
  "context_window": 131072,
17065
- "max_output_tokens": 16384,
17190
+ "max_output_tokens": 131072,
17066
17191
  "knowledge_cutoff": null,
17067
17192
  "modalities": {
17068
17193
  "input": [
@@ -17081,8 +17206,8 @@
17081
17206
  "pricing": {
17082
17207
  "text_tokens": {
17083
17208
  "standard": {
17084
- "input_per_million": 0.038000000000000006,
17085
- "output_per_million": 0.12
17209
+ "input_per_million": 0.012,
17210
+ "output_per_million": 0.036
17086
17211
  }
17087
17212
  }
17088
17213
  },
@@ -17101,7 +17226,7 @@
17101
17226
  },
17102
17227
  "top_provider": {
17103
17228
  "context_length": 131072,
17104
- "max_completion_tokens": 16384,
17229
+ "max_completion_tokens": 131072,
17105
17230
  "is_moderated": false
17106
17231
  },
17107
17232
  "per_request_limits": null,
@@ -17719,8 +17844,8 @@
17719
17844
  "pricing": {
17720
17845
  "text_tokens": {
17721
17846
  "standard": {
17722
- "input_per_million": 0.1999188,
17723
- "output_per_million": 0.800064
17847
+ "input_per_million": 0.24999987999999998,
17848
+ "output_per_million": 0.999999888
17724
17849
  }
17725
17850
  }
17726
17851
  },
@@ -18012,8 +18137,7 @@
18012
18137
  },
18013
18138
  "capabilities": [
18014
18139
  "streaming",
18015
- "structured_output",
18016
- "predicted_outputs"
18140
+ "structured_output"
18017
18141
  ],
18018
18142
  "pricing": {
18019
18143
  "text_tokens": {
@@ -18044,8 +18168,6 @@
18044
18168
  "per_request_limits": null,
18045
18169
  "supported_parameters": [
18046
18170
  "frequency_penalty",
18047
- "logit_bias",
18048
- "logprobs",
18049
18171
  "max_tokens",
18050
18172
  "min_p",
18051
18173
  "presence_penalty",
@@ -18056,7 +18178,6 @@
18056
18178
  "structured_outputs",
18057
18179
  "temperature",
18058
18180
  "top_k",
18059
- "top_logprobs",
18060
18181
  "top_p"
18061
18182
  ]
18062
18183
  }
@@ -18081,6 +18202,7 @@
18081
18202
  },
18082
18203
  "capabilities": [
18083
18204
  "streaming",
18205
+ "function_calling",
18084
18206
  "structured_output"
18085
18207
  ],
18086
18208
  "pricing": {
@@ -18120,7 +18242,10 @@
18120
18242
  "response_format",
18121
18243
  "seed",
18122
18244
  "stop",
18245
+ "structured_outputs",
18123
18246
  "temperature",
18247
+ "tool_choice",
18248
+ "tools",
18124
18249
  "top_k",
18125
18250
  "top_p"
18126
18251
  ]
@@ -18666,8 +18791,8 @@
18666
18791
  "pricing": {
18667
18792
  "text_tokens": {
18668
18793
  "standard": {
18669
- "input_per_million": 0.01999188,
18670
- "output_per_million": 0.0800064
18794
+ "input_per_million": 0.04,
18795
+ "output_per_million": 0.14
18671
18796
  }
18672
18797
  }
18673
18798
  },
@@ -19687,7 +19812,7 @@
19687
19812
  "family": "mistralai",
19688
19813
  "created_at": "2024-07-19 02:00:00 +0200",
19689
19814
  "context_window": 131072,
19690
- "max_output_tokens": 128000,
19815
+ "max_output_tokens": 16384,
19691
19816
  "knowledge_cutoff": null,
19692
19817
  "modalities": {
19693
19818
  "input": [
@@ -19706,8 +19831,8 @@
19706
19831
  "pricing": {
19707
19832
  "text_tokens": {
19708
19833
  "standard": {
19709
- "input_per_million": 0.01,
19710
- "output_per_million": 0.0400032
19834
+ "input_per_million": 0.02,
19835
+ "output_per_million": 0.04
19711
19836
  }
19712
19837
  }
19713
19838
  },
@@ -19726,7 +19851,7 @@
19726
19851
  },
19727
19852
  "top_provider": {
19728
19853
  "context_length": 131072,
19729
- "max_completion_tokens": 128000,
19854
+ "max_completion_tokens": 16384,
19730
19855
  "is_moderated": false
19731
19856
  },
19732
19857
  "per_request_limits": null,
@@ -19963,8 +20088,8 @@
19963
20088
  "pricing": {
19964
20089
  "text_tokens": {
19965
20090
  "standard": {
19966
- "input_per_million": 0.01999188,
19967
- "output_per_million": 0.0800064
20091
+ "input_per_million": 0.04,
20092
+ "output_per_million": 0.15
19968
20093
  }
19969
20094
  }
19970
20095
  },
@@ -20093,8 +20218,8 @@
20093
20218
  "pricing": {
20094
20219
  "text_tokens": {
20095
20220
  "standard": {
20096
- "input_per_million": 0.01999188,
20097
- "output_per_million": 0.0800064
20221
+ "input_per_million": 0.04,
20222
+ "output_per_million": 0.15
20098
20223
  }
20099
20224
  }
20100
20225
  },
@@ -20232,8 +20357,8 @@
20232
20357
  "pricing": {
20233
20358
  "text_tokens": {
20234
20359
  "standard": {
20235
- "input_per_million": 0.049999999999999996,
20236
- "output_per_million": 0.09999999999999999
20360
+ "input_per_million": 0.075,
20361
+ "output_per_million": 0.19999999999999998
20237
20362
  }
20238
20363
  }
20239
20364
  },
@@ -20502,8 +20627,8 @@
20502
20627
  "pricing": {
20503
20628
  "text_tokens": {
20504
20629
  "standard": {
20505
- "input_per_million": 0.08,
20506
- "output_per_million": 0.24
20630
+ "input_per_million": 0.39999999999999997,
20631
+ "output_per_million": 0.39999999999999997
20507
20632
  }
20508
20633
  }
20509
20634
  },
@@ -20736,6 +20861,7 @@
20736
20861
  "include_reasoning",
20737
20862
  "reasoning",
20738
20863
  "response_format",
20864
+ "structured_outputs",
20739
20865
  "temperature",
20740
20866
  "top_k",
20741
20867
  "top_p"
@@ -20899,8 +21025,8 @@
20899
21025
  "pricing": {
20900
21026
  "text_tokens": {
20901
21027
  "standard": {
20902
- "input_per_million": 0.2962,
20903
- "output_per_million": 1.1852999999999998
21028
+ "input_per_million": 0.38,
21029
+ "output_per_million": 1.52
20904
21030
  }
20905
21031
  }
20906
21032
  },
@@ -21030,8 +21156,8 @@
21030
21156
  "pricing": {
21031
21157
  "text_tokens": {
21032
21158
  "standard": {
21033
- "input_per_million": 0.02498985,
21034
- "output_per_million": 0.100008
21159
+ "input_per_million": 0.02,
21160
+ "output_per_million": 0.07
21035
21161
  }
21036
21162
  }
21037
21163
  },
@@ -21521,8 +21647,8 @@
21521
21647
  "pricing": {
21522
21648
  "text_tokens": {
21523
21649
  "standard": {
21524
- "input_per_million": 0.09329544,
21525
- "output_per_million": 0.3733632
21650
+ "input_per_million": 0.13,
21651
+ "output_per_million": 0.51
21526
21652
  }
21527
21653
  }
21528
21654
  },
@@ -21639,7 +21765,7 @@
21639
21765
  "family": "nousresearch",
21640
21766
  "created_at": "2024-08-16 02:00:00 +0200",
21641
21767
  "context_window": 131072,
21642
- "max_output_tokens": 16384,
21768
+ "max_output_tokens": 131072,
21643
21769
  "knowledge_cutoff": null,
21644
21770
  "modalities": {
21645
21771
  "input": [
@@ -21657,7 +21783,7 @@
21657
21783
  "pricing": {
21658
21784
  "text_tokens": {
21659
21785
  "standard": {
21660
- "input_per_million": 0.7,
21786
+ "input_per_million": 0.7999999999999999,
21661
21787
  "output_per_million": 0.7999999999999999
21662
21788
  }
21663
21789
  }
@@ -21677,7 +21803,7 @@
21677
21803
  },
21678
21804
  "top_provider": {
21679
21805
  "context_length": 131072,
21680
- "max_completion_tokens": 16384,
21806
+ "max_completion_tokens": 131072,
21681
21807
  "is_moderated": false
21682
21808
  },
21683
21809
  "per_request_limits": null,
@@ -21706,7 +21832,7 @@
21706
21832
  "family": "nousresearch",
21707
21833
  "created_at": "2024-08-18 02:00:00 +0200",
21708
21834
  "context_window": 131072,
21709
- "max_output_tokens": null,
21835
+ "max_output_tokens": 131072,
21710
21836
  "knowledge_cutoff": null,
21711
21837
  "modalities": {
21712
21838
  "input": [
@@ -21725,8 +21851,8 @@
21725
21851
  "pricing": {
21726
21852
  "text_tokens": {
21727
21853
  "standard": {
21728
- "input_per_million": 0.09999999999999999,
21729
- "output_per_million": 0.28
21854
+ "input_per_million": 0.12,
21855
+ "output_per_million": 0.3
21730
21856
  }
21731
21857
  }
21732
21858
  },
@@ -21745,7 +21871,7 @@
21745
21871
  },
21746
21872
  "top_provider": {
21747
21873
  "context_length": 131072,
21748
- "max_completion_tokens": null,
21874
+ "max_completion_tokens": 131072,
21749
21875
  "is_moderated": false
21750
21876
  },
21751
21877
  "per_request_limits": null,
@@ -21795,8 +21921,8 @@
21795
21921
  "pricing": {
21796
21922
  "text_tokens": {
21797
21923
  "standard": {
21798
- "input_per_million": 0.1999188,
21799
- "output_per_million": 0.800064
21924
+ "input_per_million": 0.24999987999999998,
21925
+ "output_per_million": 0.999999888
21800
21926
  }
21801
21927
  }
21802
21928
  },
@@ -21865,8 +21991,8 @@
21865
21991
  "pricing": {
21866
21992
  "text_tokens": {
21867
21993
  "standard": {
21868
- "input_per_million": 0.09329544,
21869
- "output_per_million": 0.3733632
21994
+ "input_per_million": 0.11,
21995
+ "output_per_million": 0.38
21870
21996
  }
21871
21997
  }
21872
21998
  },
@@ -21936,8 +22062,8 @@
21936
22062
  "pricing": {
21937
22063
  "text_tokens": {
21938
22064
  "standard": {
21939
- "input_per_million": 0.12,
21940
- "output_per_million": 0.3
22065
+ "input_per_million": 0.6,
22066
+ "output_per_million": 0.6
21941
22067
  }
21942
22068
  }
21943
22069
  },
@@ -22044,69 +22170,11 @@
22044
22170
  }
22045
22171
  },
22046
22172
  {
22047
- "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1:free",
22048
- "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free)",
22049
- "provider": "openrouter",
22050
- "family": "nvidia",
22051
- "created_at": "2025-04-08 14:24:19 +0200",
22052
- "context_window": 131072,
22053
- "max_output_tokens": null,
22054
- "knowledge_cutoff": null,
22055
- "modalities": {
22056
- "input": [
22057
- "text"
22058
- ],
22059
- "output": [
22060
- "text"
22061
- ]
22062
- },
22063
- "capabilities": [
22064
- "streaming",
22065
- "predicted_outputs"
22066
- ],
22067
- "pricing": {},
22068
- "metadata": {
22069
- "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
22070
- "architecture": {
22071
- "modality": "text->text",
22072
- "input_modalities": [
22073
- "text"
22074
- ],
22075
- "output_modalities": [
22076
- "text"
22077
- ],
22078
- "tokenizer": "Llama3",
22079
- "instruct_type": null
22080
- },
22081
- "top_provider": {
22082
- "context_length": 131072,
22083
- "max_completion_tokens": null,
22084
- "is_moderated": false
22085
- },
22086
- "per_request_limits": null,
22087
- "supported_parameters": [
22088
- "frequency_penalty",
22089
- "logit_bias",
22090
- "logprobs",
22091
- "max_tokens",
22092
- "min_p",
22093
- "presence_penalty",
22094
- "repetition_penalty",
22095
- "seed",
22096
- "stop",
22097
- "temperature",
22098
- "top_k",
22099
- "top_logprobs",
22100
- "top_p"
22101
- ]
22102
- }
22103
- },
22104
- {
22105
- "id": "nvidia/llama-3.3-nemotron-super-49b-v1",
22106
- "name": "NVIDIA: Llama 3.3 Nemotron Super 49B v1",
22173
+ "id": "nvidia/nemotron-nano-9b-v2",
22174
+ "name": "NVIDIA: Nemotron Nano 9B V2",
22107
22175
  "provider": "openrouter",
22108
22176
  "family": "nvidia",
22109
- "created_at": "2025-04-08 15:38:14 +0200",
22177
+ "created_at": "2025-09-05 23:13:27 +0200",
22110
22178
  "context_window": 131072,
22111
22179
  "max_output_tokens": null,
22112
22180
  "knowledge_cutoff": null,
@@ -22120,18 +22188,19 @@
22120
22188
  },
22121
22189
  "capabilities": [
22122
22190
  "streaming",
22123
- "predicted_outputs"
22191
+ "function_calling",
22192
+ "structured_output"
22124
22193
  ],
22125
22194
  "pricing": {
22126
22195
  "text_tokens": {
22127
22196
  "standard": {
22128
- "input_per_million": 0.13,
22129
- "output_per_million": 0.39999999999999997
22197
+ "input_per_million": 0.04,
22198
+ "output_per_million": 0.16
22130
22199
  }
22131
22200
  }
22132
22201
  },
22133
22202
  "metadata": {
22134
- "description": "Llama-3.3-Nemotron-Super-49B-v1 is a large language model (LLM) optimized for advanced reasoning, conversational interactions, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta's Llama-3.3-70B-Instruct, it employs a Neural Architecture Search (NAS) approach, significantly enhancing efficiency and reducing memory requirements. This allows the model to support a context length of up to 128K tokens and fit efficiently on single high-performance GPUs, such as NVIDIA H200.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
22203
+ "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
22135
22204
  "architecture": {
22136
22205
  "modality": "text->text",
22137
22206
  "input_modalities": [
@@ -22151,22 +22220,26 @@
22151
22220
  "per_request_limits": null,
22152
22221
  "supported_parameters": [
22153
22222
  "frequency_penalty",
22154
- "logit_bias",
22155
- "logprobs",
22223
+ "include_reasoning",
22156
22224
  "max_tokens",
22225
+ "min_p",
22157
22226
  "presence_penalty",
22227
+ "reasoning",
22228
+ "repetition_penalty",
22229
+ "response_format",
22158
22230
  "seed",
22159
22231
  "stop",
22160
22232
  "temperature",
22233
+ "tool_choice",
22234
+ "tools",
22161
22235
  "top_k",
22162
- "top_logprobs",
22163
22236
  "top_p"
22164
22237
  ]
22165
22238
  }
22166
22239
  },
22167
22240
  {
22168
- "id": "nvidia/nemotron-nano-9b-v2",
22169
- "name": "NVIDIA: Nemotron Nano 9B V2",
22241
+ "id": "nvidia/nemotron-nano-9b-v2:free",
22242
+ "name": "NVIDIA: Nemotron Nano 9B V2 (free)",
22170
22243
  "provider": "openrouter",
22171
22244
  "family": "nvidia",
22172
22245
  "created_at": "2025-09-05 23:13:27 +0200",
@@ -23883,9 +23956,9 @@
23883
23956
  "pricing": {
23884
23957
  "text_tokens": {
23885
23958
  "standard": {
23886
- "input_per_million": 1.25,
23887
- "output_per_million": 10.0,
23888
- "cached_input_per_million": 0.125
23959
+ "input_per_million": 0.625,
23960
+ "output_per_million": 5.0,
23961
+ "cached_input_per_million": 0.0625
23889
23962
  }
23890
23963
  }
23891
23964
  },
@@ -24121,8 +24194,8 @@
24121
24194
  "provider": "openrouter",
24122
24195
  "family": "openai",
24123
24196
  "created_at": "2025-08-05 19:17:11 +0200",
24124
- "context_window": 131000,
24125
- "max_output_tokens": 131000,
24197
+ "context_window": 131072,
24198
+ "max_output_tokens": null,
24126
24199
  "knowledge_cutoff": null,
24127
24200
  "modalities": {
24128
24201
  "input": [
@@ -24141,8 +24214,8 @@
24141
24214
  "pricing": {
24142
24215
  "text_tokens": {
24143
24216
  "standard": {
24144
- "input_per_million": 0.072,
24145
- "output_per_million": 0.28
24217
+ "input_per_million": 0.049999999999999996,
24218
+ "output_per_million": 0.25
24146
24219
  }
24147
24220
  }
24148
24221
  },
@@ -24160,8 +24233,8 @@
24160
24233
  "instruct_type": null
24161
24234
  },
24162
24235
  "top_provider": {
24163
- "context_length": 131000,
24164
- "max_completion_tokens": 131000,
24236
+ "context_length": 131072,
24237
+ "max_completion_tokens": null,
24165
24238
  "is_moderated": false
24166
24239
  },
24167
24240
  "per_request_limits": null,
@@ -24244,8 +24317,8 @@
24244
24317
  "provider": "openrouter",
24245
24318
  "family": "openai",
24246
24319
  "created_at": "2025-08-05 19:17:09 +0200",
24247
- "context_window": 131000,
24248
- "max_output_tokens": 131000,
24320
+ "context_window": 131072,
24321
+ "max_output_tokens": 32768,
24249
24322
  "knowledge_cutoff": null,
24250
24323
  "modalities": {
24251
24324
  "input": [
@@ -24264,7 +24337,7 @@
24264
24337
  "pricing": {
24265
24338
  "text_tokens": {
24266
24339
  "standard": {
24267
- "input_per_million": 0.04,
24340
+ "input_per_million": 0.03,
24268
24341
  "output_per_million": 0.15
24269
24342
  }
24270
24343
  }
@@ -24283,8 +24356,8 @@
24283
24356
  "instruct_type": null
24284
24357
  },
24285
24358
  "top_provider": {
24286
- "context_length": 131000,
24287
- "max_completion_tokens": 131000,
24359
+ "context_length": 131072,
24360
+ "max_completion_tokens": 32768,
24288
24361
  "is_moderated": false
24289
24362
  },
24290
24363
  "per_request_limits": null,
@@ -24674,7 +24747,8 @@
24674
24747
  "knowledge_cutoff": null,
24675
24748
  "modalities": {
24676
24749
  "input": [
24677
- "text"
24750
+ "text",
24751
+ "file"
24678
24752
  ],
24679
24753
  "output": [
24680
24754
  "text"
@@ -24699,7 +24773,8 @@
24699
24773
  "architecture": {
24700
24774
  "modality": "text->text",
24701
24775
  "input_modalities": [
24702
- "text"
24776
+ "text",
24777
+ "file"
24703
24778
  ],
24704
24779
  "output_modalities": [
24705
24780
  "text"
@@ -24734,7 +24809,8 @@
24734
24809
  "knowledge_cutoff": null,
24735
24810
  "modalities": {
24736
24811
  "input": [
24737
- "text"
24812
+ "text",
24813
+ "file"
24738
24814
  ],
24739
24815
  "output": [
24740
24816
  "text"
@@ -24759,12 +24835,13 @@
24759
24835
  "architecture": {
24760
24836
  "modality": "text->text",
24761
24837
  "input_modalities": [
24762
- "text"
24838
+ "text",
24839
+ "file"
24763
24840
  ],
24764
24841
  "output_modalities": [
24765
24842
  "text"
24766
24843
  ],
24767
- "tokenizer": "Other",
24844
+ "tokenizer": "GPT",
24768
24845
  "instruct_type": null
24769
24846
  },
24770
24847
  "top_provider": {
@@ -24981,79 +25058,43 @@
24981
25058
  }
24982
25059
  },
24983
25060
  {
24984
- "id": "openrouter/auto",
24985
- "name": "Auto Router",
25061
+ "id": "opengvlab/internvl3-78b",
25062
+ "name": "OpenGVLab: InternVL3 78B",
24986
25063
  "provider": "openrouter",
24987
- "family": "openrouter",
24988
- "created_at": "2023-11-08 01:00:00 +0100",
24989
- "context_window": 2000000,
25064
+ "family": "opengvlab",
25065
+ "created_at": "2025-09-15 20:55:55 +0200",
25066
+ "context_window": 32768,
24990
25067
  "max_output_tokens": null,
24991
25068
  "knowledge_cutoff": null,
24992
25069
  "modalities": {
24993
25070
  "input": [
25071
+ "image",
24994
25072
  "text"
24995
25073
  ],
24996
25074
  "output": [
24997
25075
  "text"
24998
25076
  ]
24999
25077
  },
25000
- "capabilities": [
25001
- "streaming"
25002
- ],
25003
- "pricing": {},
25004
- "metadata": {
25005
- "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)",
25006
- "architecture": {
25007
- "modality": "text->text",
25008
- "input_modalities": [
25009
- "text"
25010
- ],
25011
- "output_modalities": [
25012
- "text"
25013
- ],
25014
- "tokenizer": "Router",
25015
- "instruct_type": null
25016
- },
25017
- "top_provider": {
25018
- "context_length": null,
25019
- "max_completion_tokens": null,
25020
- "is_moderated": false
25021
- },
25022
- "per_request_limits": null,
25023
- "supported_parameters": []
25024
- }
25025
- },
25026
- {
25027
- "id": "openrouter/sonoma-dusk-alpha",
25028
- "name": "Sonoma Dusk Alpha",
25029
- "provider": "openrouter",
25030
- "family": "openrouter",
25031
- "created_at": "2025-09-05 19:27:27 +0200",
25032
- "context_window": 2000000,
25033
- "max_output_tokens": null,
25034
- "knowledge_cutoff": null,
25035
- "modalities": {
25036
- "input": [
25037
- "text",
25038
- "image"
25039
- ],
25040
- "output": [
25041
- "text"
25042
- ]
25043
- },
25044
25078
  "capabilities": [
25045
25079
  "streaming",
25046
- "function_calling",
25047
- "structured_output"
25080
+ "structured_output",
25081
+ "predicted_outputs"
25048
25082
  ],
25049
- "pricing": {},
25083
+ "pricing": {
25084
+ "text_tokens": {
25085
+ "standard": {
25086
+ "input_per_million": 0.03,
25087
+ "output_per_million": 0.13
25088
+ }
25089
+ }
25090
+ },
25050
25091
  "metadata": {
25051
- "description": "This is a cloaked model provided to the community to gather feedback. A fast and intelligent general-purpose frontier model with a 2 million token context window. Supports image inputs and parallel tool calling.\n\nNote: It’s free to use during this testing period, and prompts and completions are logged by the model creator for feedback and training.",
25092
+ "description": "The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. \n\nIn addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.",
25052
25093
  "architecture": {
25053
25094
  "modality": "text+image->text",
25054
25095
  "input_modalities": [
25055
- "text",
25056
- "image"
25096
+ "image",
25097
+ "text"
25057
25098
  ],
25058
25099
  "output_modalities": [
25059
25100
  "text"
@@ -25062,73 +25103,71 @@
25062
25103
  "instruct_type": null
25063
25104
  },
25064
25105
  "top_provider": {
25065
- "context_length": 2000000,
25106
+ "context_length": 32768,
25066
25107
  "max_completion_tokens": null,
25067
25108
  "is_moderated": false
25068
25109
  },
25069
25110
  "per_request_limits": null,
25070
25111
  "supported_parameters": [
25112
+ "frequency_penalty",
25113
+ "logit_bias",
25114
+ "logprobs",
25071
25115
  "max_tokens",
25116
+ "min_p",
25117
+ "presence_penalty",
25118
+ "repetition_penalty",
25072
25119
  "response_format",
25120
+ "seed",
25121
+ "stop",
25073
25122
  "structured_outputs",
25074
- "tool_choice",
25075
- "tools"
25123
+ "temperature",
25124
+ "top_k",
25125
+ "top_logprobs",
25126
+ "top_p"
25076
25127
  ]
25077
25128
  }
25078
25129
  },
25079
25130
  {
25080
- "id": "openrouter/sonoma-sky-alpha",
25081
- "name": "Sonoma Sky Alpha",
25131
+ "id": "openrouter/auto",
25132
+ "name": "Auto Router",
25082
25133
  "provider": "openrouter",
25083
25134
  "family": "openrouter",
25084
- "created_at": "2025-09-05 19:23:21 +0200",
25135
+ "created_at": "2023-11-08 01:00:00 +0100",
25085
25136
  "context_window": 2000000,
25086
25137
  "max_output_tokens": null,
25087
25138
  "knowledge_cutoff": null,
25088
25139
  "modalities": {
25089
25140
  "input": [
25090
- "text",
25091
- "image"
25141
+ "text"
25092
25142
  ],
25093
25143
  "output": [
25094
25144
  "text"
25095
25145
  ]
25096
25146
  },
25097
25147
  "capabilities": [
25098
- "streaming",
25099
- "function_calling",
25100
- "structured_output"
25148
+ "streaming"
25101
25149
  ],
25102
25150
  "pricing": {},
25103
25151
  "metadata": {
25104
- "description": "This is a cloaked model provided to the community to gather feedback. A maximally intelligent general-purpose frontier model with a 2 million token context window. Supports image inputs and parallel tool calling.\n\nNote: It’s free to use during this testing period, and prompts and completions are logged by the model creator for feedback and training.",
25152
+ "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)",
25105
25153
  "architecture": {
25106
- "modality": "text+image->text",
25154
+ "modality": "text->text",
25107
25155
  "input_modalities": [
25108
- "text",
25109
- "image"
25156
+ "text"
25110
25157
  ],
25111
25158
  "output_modalities": [
25112
25159
  "text"
25113
25160
  ],
25114
- "tokenizer": "Other",
25161
+ "tokenizer": "Router",
25115
25162
  "instruct_type": null
25116
25163
  },
25117
25164
  "top_provider": {
25118
- "context_length": 2000000,
25165
+ "context_length": null,
25119
25166
  "max_completion_tokens": null,
25120
25167
  "is_moderated": false
25121
25168
  },
25122
25169
  "per_request_limits": null,
25123
- "supported_parameters": [
25124
- "include_reasoning",
25125
- "max_tokens",
25126
- "reasoning",
25127
- "response_format",
25128
- "structured_outputs",
25129
- "tool_choice",
25130
- "tools"
25131
- ]
25170
+ "supported_parameters": []
25132
25171
  }
25133
25172
  },
25134
25173
  {
@@ -25493,69 +25532,6 @@
25493
25532
  ]
25494
25533
  }
25495
25534
  },
25496
- {
25497
- "id": "pygmalionai/mythalion-13b",
25498
- "name": "Pygmalion: Mythalion 13B",
25499
- "provider": "openrouter",
25500
- "family": "pygmalionai",
25501
- "created_at": "2023-09-02 02:00:00 +0200",
25502
- "context_window": 4096,
25503
- "max_output_tokens": 4096,
25504
- "knowledge_cutoff": null,
25505
- "modalities": {
25506
- "input": [
25507
- "text"
25508
- ],
25509
- "output": [
25510
- "text"
25511
- ]
25512
- },
25513
- "capabilities": [
25514
- "streaming",
25515
- "predicted_outputs"
25516
- ],
25517
- "pricing": {
25518
- "text_tokens": {
25519
- "standard": {
25520
- "input_per_million": 0.7,
25521
- "output_per_million": 1.1
25522
- }
25523
- }
25524
- },
25525
- "metadata": {
25526
- "description": "A blend of the new Pygmalion-13b and MythoMax. #merge",
25527
- "architecture": {
25528
- "modality": "text->text",
25529
- "input_modalities": [
25530
- "text"
25531
- ],
25532
- "output_modalities": [
25533
- "text"
25534
- ],
25535
- "tokenizer": "Llama2",
25536
- "instruct_type": "alpaca"
25537
- },
25538
- "top_provider": {
25539
- "context_length": 4096,
25540
- "max_completion_tokens": 4096,
25541
- "is_moderated": false
25542
- },
25543
- "per_request_limits": null,
25544
- "supported_parameters": [
25545
- "frequency_penalty",
25546
- "logit_bias",
25547
- "max_tokens",
25548
- "min_p",
25549
- "presence_penalty",
25550
- "repetition_penalty",
25551
- "seed",
25552
- "stop",
25553
- "temperature",
25554
- "top_k",
25555
- "top_p"
25556
- ]
25557
- }
25558
- },
25559
25535
  {
25560
25536
  "id": "qwen/qwen-2.5-72b-instruct",
25561
25537
  "name": "Qwen2.5 72B Instruct",
@@ -25582,8 +25558,8 @@
25582
25558
  "pricing": {
25583
25559
  "text_tokens": {
25584
25560
  "standard": {
25585
- "input_per_million": 0.051830799999999996,
25586
- "output_per_million": 0.207424
25561
+ "input_per_million": 0.07,
25562
+ "output_per_million": 0.26
25587
25563
  }
25588
25564
  }
25589
25565
  },
@@ -25757,7 +25733,7 @@
25757
25733
  "family": "qwen",
25758
25734
  "created_at": "2024-11-12 00:40:00 +0100",
25759
25735
  "context_window": 32768,
25760
- "max_output_tokens": null,
25736
+ "max_output_tokens": 16384,
25761
25737
  "knowledge_cutoff": null,
25762
25738
  "modalities": {
25763
25739
  "input": [
@@ -25775,8 +25751,8 @@
25775
25751
  "pricing": {
25776
25752
  "text_tokens": {
25777
25753
  "standard": {
25778
- "input_per_million": 0.0499797,
25779
- "output_per_million": 0.200016
25754
+ "input_per_million": 0.06,
25755
+ "output_per_million": 0.15
25780
25756
  }
25781
25757
  }
25782
25758
  },
@@ -25795,7 +25771,7 @@
25795
25771
  },
25796
25772
  "top_provider": {
25797
25773
  "context_length": 32768,
25798
- "max_completion_tokens": null,
25774
+ "max_completion_tokens": 16384,
25799
25775
  "is_moderated": false
25800
25776
  },
25801
25777
  "per_request_limits": null,
@@ -26070,13 +26046,13 @@
26070
26046
  }
26071
26047
  },
26072
26048
  {
26073
- "id": "qwen/qwen-turbo",
26074
- "name": "Qwen: Qwen-Turbo",
26049
+ "id": "qwen/qwen-plus-2025-07-28",
26050
+ "name": "Qwen: Qwen Plus 0728",
26075
26051
  "provider": "openrouter",
26076
26052
  "family": "qwen",
26077
- "created_at": "2025-02-01 12:56:14 +0100",
26053
+ "created_at": "2025-09-08 18:06:39 +0200",
26078
26054
  "context_window": 1000000,
26079
- "max_output_tokens": 8192,
26055
+ "max_output_tokens": 32768,
26080
26056
  "knowledge_cutoff": null,
26081
26057
  "modalities": {
26082
26058
  "input": [
@@ -26094,14 +26070,13 @@
26094
26070
  "pricing": {
26095
26071
  "text_tokens": {
26096
26072
  "standard": {
26097
- "input_per_million": 0.049999999999999996,
26098
- "output_per_million": 0.19999999999999998,
26099
- "cached_input_per_million": 0.02
26073
+ "input_per_million": 0.39999999999999997,
26074
+ "output_per_million": 1.2
26100
26075
  }
26101
26076
  }
26102
26077
  },
26103
26078
  "metadata": {
26104
- "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.",
26079
+ "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
26105
26080
  "architecture": {
26106
26081
  "modality": "text->text",
26107
26082
  "input_modalities": [
@@ -26110,12 +26085,12 @@
26110
26085
  "output_modalities": [
26111
26086
  "text"
26112
26087
  ],
26113
- "tokenizer": "Qwen",
26088
+ "tokenizer": "Qwen3",
26114
26089
  "instruct_type": null
26115
26090
  },
26116
26091
  "top_provider": {
26117
26092
  "context_length": 1000000,
26118
- "max_completion_tokens": 8192,
26093
+ "max_completion_tokens": 32768,
26119
26094
  "is_moderated": false
26120
26095
  },
26121
26096
  "per_request_limits": null,
@@ -26124,6 +26099,7 @@
26124
26099
  "presence_penalty",
26125
26100
  "response_format",
26126
26101
  "seed",
26102
+ "structured_outputs",
26127
26103
  "temperature",
26128
26104
  "tool_choice",
26129
26105
  "tools",
@@ -26132,18 +26108,17 @@
26132
26108
  }
26133
26109
  },
26134
26110
  {
26135
- "id": "qwen/qwen-vl-max",
26136
- "name": "Qwen: Qwen VL Max",
26111
+ "id": "qwen/qwen-plus-2025-07-28:thinking",
26112
+ "name": "Qwen: Qwen Plus 0728 (thinking)",
26137
26113
  "provider": "openrouter",
26138
26114
  "family": "qwen",
26139
- "created_at": "2025-02-01 19:25:04 +0100",
26140
- "context_window": 7500,
26141
- "max_output_tokens": 1500,
26115
+ "created_at": "2025-09-08 18:06:39 +0200",
26116
+ "context_window": 1000000,
26117
+ "max_output_tokens": 32768,
26142
26118
  "knowledge_cutoff": null,
26143
26119
  "modalities": {
26144
26120
  "input": [
26145
- "text",
26146
- "image"
26121
+ "text"
26147
26122
  ],
26148
26123
  "output": [
26149
26124
  "text"
@@ -26151,59 +26126,63 @@
26151
26126
  },
26152
26127
  "capabilities": [
26153
26128
  "streaming",
26129
+ "function_calling",
26154
26130
  "structured_output"
26155
26131
  ],
26156
26132
  "pricing": {
26157
26133
  "text_tokens": {
26158
26134
  "standard": {
26159
- "input_per_million": 0.7999999999999999,
26160
- "output_per_million": 3.1999999999999997
26135
+ "input_per_million": 0.39999999999999997,
26136
+ "output_per_million": 4.0
26161
26137
  }
26162
26138
  }
26163
26139
  },
26164
26140
  "metadata": {
26165
- "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length. It excels in delivering optimal performance for a broader spectrum of complex tasks.\n",
26141
+ "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
26166
26142
  "architecture": {
26167
- "modality": "text+image->text",
26143
+ "modality": "text->text",
26168
26144
  "input_modalities": [
26169
- "text",
26170
- "image"
26145
+ "text"
26171
26146
  ],
26172
26147
  "output_modalities": [
26173
26148
  "text"
26174
26149
  ],
26175
- "tokenizer": "Qwen",
26150
+ "tokenizer": "Qwen3",
26176
26151
  "instruct_type": null
26177
26152
  },
26178
26153
  "top_provider": {
26179
- "context_length": 7500,
26180
- "max_completion_tokens": 1500,
26154
+ "context_length": 1000000,
26155
+ "max_completion_tokens": 32768,
26181
26156
  "is_moderated": false
26182
26157
  },
26183
26158
  "per_request_limits": null,
26184
26159
  "supported_parameters": [
26160
+ "include_reasoning",
26185
26161
  "max_tokens",
26186
26162
  "presence_penalty",
26163
+ "reasoning",
26187
26164
  "response_format",
26188
26165
  "seed",
26166
+ "structured_outputs",
26189
26167
  "temperature",
26168
+ "tool_choice",
26169
+ "tools",
26190
26170
  "top_p"
26191
26171
  ]
26192
26172
  }
26193
26173
  },
26194
26174
  {
26195
- "id": "qwen/qwen-vl-plus",
26196
- "name": "Qwen: Qwen VL Plus",
26175
+ "id": "qwen/qwen-turbo",
26176
+ "name": "Qwen: Qwen-Turbo",
26197
26177
  "provider": "openrouter",
26198
26178
  "family": "qwen",
26199
- "created_at": "2025-02-05 05:54:15 +0100",
26200
- "context_window": 7500,
26201
- "max_output_tokens": 1500,
26179
+ "created_at": "2025-02-01 12:56:14 +0100",
26180
+ "context_window": 1000000,
26181
+ "max_output_tokens": 8192,
26202
26182
  "knowledge_cutoff": null,
26203
26183
  "modalities": {
26204
26184
  "input": [
26205
- "text",
26206
- "image"
26185
+ "text"
26207
26186
  ],
26208
26187
  "output": [
26209
26188
  "text"
@@ -26211,23 +26190,24 @@
26211
26190
  },
26212
26191
  "capabilities": [
26213
26192
  "streaming",
26193
+ "function_calling",
26214
26194
  "structured_output"
26215
26195
  ],
26216
26196
  "pricing": {
26217
26197
  "text_tokens": {
26218
26198
  "standard": {
26219
- "input_per_million": 0.21,
26220
- "output_per_million": 0.63
26199
+ "input_per_million": 0.049999999999999996,
26200
+ "output_per_million": 0.19999999999999998,
26201
+ "cached_input_per_million": 0.02
26221
26202
  }
26222
26203
  }
26223
26204
  },
26224
26205
  "metadata": {
26225
- "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.\n",
26206
+ "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.",
26226
26207
  "architecture": {
26227
- "modality": "text+image->text",
26208
+ "modality": "text->text",
26228
26209
  "input_modalities": [
26229
- "text",
26230
- "image"
26210
+ "text"
26231
26211
  ],
26232
26212
  "output_modalities": [
26233
26213
  "text"
@@ -26236,8 +26216,8 @@
26236
26216
  "instruct_type": null
26237
26217
  },
26238
26218
  "top_provider": {
26239
- "context_length": 7500,
26240
- "max_completion_tokens": 1500,
26219
+ "context_length": 1000000,
26220
+ "max_completion_tokens": 8192,
26241
26221
  "is_moderated": false
26242
26222
  },
26243
26223
  "per_request_limits": null,
@@ -26247,18 +26227,140 @@
26247
26227
  "response_format",
26248
26228
  "seed",
26249
26229
  "temperature",
26230
+ "tool_choice",
26231
+ "tools",
26250
26232
  "top_p"
26251
26233
  ]
26252
26234
  }
26253
26235
  },
26254
26236
  {
26255
- "id": "qwen/qwen2.5-vl-32b-instruct",
26256
- "name": "Qwen: Qwen2.5 VL 32B Instruct",
26237
+ "id": "qwen/qwen-vl-max",
26238
+ "name": "Qwen: Qwen VL Max",
26257
26239
  "provider": "openrouter",
26258
26240
  "family": "qwen",
26259
- "created_at": "2025-03-24 19:10:38 +0100",
26260
- "context_window": 16384,
26261
- "max_output_tokens": null,
26241
+ "created_at": "2025-02-01 19:25:04 +0100",
26242
+ "context_window": 7500,
26243
+ "max_output_tokens": 1500,
26244
+ "knowledge_cutoff": null,
26245
+ "modalities": {
26246
+ "input": [
26247
+ "text",
26248
+ "image"
26249
+ ],
26250
+ "output": [
26251
+ "text"
26252
+ ]
26253
+ },
26254
+ "capabilities": [
26255
+ "streaming",
26256
+ "structured_output"
26257
+ ],
26258
+ "pricing": {
26259
+ "text_tokens": {
26260
+ "standard": {
26261
+ "input_per_million": 0.7999999999999999,
26262
+ "output_per_million": 3.1999999999999997
26263
+ }
26264
+ }
26265
+ },
26266
+ "metadata": {
26267
+ "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length. It excels in delivering optimal performance for a broader spectrum of complex tasks.\n",
26268
+ "architecture": {
26269
+ "modality": "text+image->text",
26270
+ "input_modalities": [
26271
+ "text",
26272
+ "image"
26273
+ ],
26274
+ "output_modalities": [
26275
+ "text"
26276
+ ],
26277
+ "tokenizer": "Qwen",
26278
+ "instruct_type": null
26279
+ },
26280
+ "top_provider": {
26281
+ "context_length": 7500,
26282
+ "max_completion_tokens": 1500,
26283
+ "is_moderated": false
26284
+ },
26285
+ "per_request_limits": null,
26286
+ "supported_parameters": [
26287
+ "max_tokens",
26288
+ "presence_penalty",
26289
+ "response_format",
26290
+ "seed",
26291
+ "temperature",
26292
+ "top_p"
26293
+ ]
26294
+ }
26295
+ },
26296
+ {
26297
+ "id": "qwen/qwen-vl-plus",
26298
+ "name": "Qwen: Qwen VL Plus",
26299
+ "provider": "openrouter",
26300
+ "family": "qwen",
26301
+ "created_at": "2025-02-05 05:54:15 +0100",
26302
+ "context_window": 7500,
26303
+ "max_output_tokens": 1500,
26304
+ "knowledge_cutoff": null,
26305
+ "modalities": {
26306
+ "input": [
26307
+ "text",
26308
+ "image"
26309
+ ],
26310
+ "output": [
26311
+ "text"
26312
+ ]
26313
+ },
26314
+ "capabilities": [
26315
+ "streaming",
26316
+ "structured_output"
26317
+ ],
26318
+ "pricing": {
26319
+ "text_tokens": {
26320
+ "standard": {
26321
+ "input_per_million": 0.21,
26322
+ "output_per_million": 0.63
26323
+ }
26324
+ }
26325
+ },
26326
+ "metadata": {
26327
+ "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.\n",
26328
+ "architecture": {
26329
+ "modality": "text+image->text",
26330
+ "input_modalities": [
26331
+ "text",
26332
+ "image"
26333
+ ],
26334
+ "output_modalities": [
26335
+ "text"
26336
+ ],
26337
+ "tokenizer": "Qwen",
26338
+ "instruct_type": null
26339
+ },
26340
+ "top_provider": {
26341
+ "context_length": 7500,
26342
+ "max_completion_tokens": 1500,
26343
+ "is_moderated": false
26344
+ },
26345
+ "per_request_limits": null,
26346
+ "supported_parameters": [
26347
+ "max_tokens",
26348
+ "presence_penalty",
26349
+ "response_format",
26350
+ "seed",
26351
+ "temperature",
26352
+ "top_p"
26353
+ ]
26354
+ }
26355
+ },
26356
+ {
26357
+ "id": "qwen/qwen2.5-vl-32b-instruct",
26358
+ "name": "Qwen: Qwen2.5 VL 32B Instruct",
26359
+ "provider": "openrouter",
26360
+ "family": "qwen",
26361
+ "created_at": "2025-03-24 19:10:38 +0100",
26362
+ "context_window": 16384,
26363
+ "max_output_tokens": null,
26262
26364
  "knowledge_cutoff": null,
26263
26365
  "modalities": {
26264
26366
  "input": [
@@ -26277,8 +26379,8 @@
26277
26379
  "pricing": {
26278
26380
  "text_tokens": {
26279
26381
  "standard": {
26280
- "input_per_million": 0.01999188,
26281
- "output_per_million": 0.0800064
26382
+ "input_per_million": 0.04,
26383
+ "output_per_million": 0.14
26282
26384
  }
26283
26385
  }
26284
26386
  },
@@ -26408,8 +26510,8 @@
26408
26510
  "pricing": {
26409
26511
  "text_tokens": {
26410
26512
  "standard": {
26411
- "input_per_million": 0.0999594,
26412
- "output_per_million": 0.400032
26513
+ "input_per_million": 0.07,
26514
+ "output_per_million": 0.28
26413
26515
  }
26414
26516
  }
26415
26517
  },
@@ -26666,8 +26768,8 @@
26666
26768
  "pricing": {
26667
26769
  "text_tokens": {
26668
26770
  "standard": {
26669
- "input_per_million": 0.13,
26670
- "output_per_million": 0.6
26771
+ "input_per_million": 0.18,
26772
+ "output_per_million": 0.54
26671
26773
  }
26672
26774
  }
26673
26775
  },
@@ -26720,7 +26822,7 @@
26720
26822
  "family": "qwen",
26721
26823
  "created_at": "2025-07-21 19:39:15 +0200",
26722
26824
  "context_window": 262144,
26723
- "max_output_tokens": null,
26825
+ "max_output_tokens": 262144,
26724
26826
  "knowledge_cutoff": null,
26725
26827
  "modalities": {
26726
26828
  "input": [
@@ -26739,8 +26841,8 @@
26739
26841
  "pricing": {
26740
26842
  "text_tokens": {
26741
26843
  "standard": {
26742
- "input_per_million": 0.077968332,
26743
- "output_per_million": 0.31202496
26844
+ "input_per_million": 0.09999999999999999,
26845
+ "output_per_million": 0.09999999999999999
26744
26846
  }
26745
26847
  }
26746
26848
  },
@@ -26759,7 +26861,7 @@
26759
26861
  },
26760
26862
  "top_provider": {
26761
26863
  "context_length": 262144,
26762
- "max_completion_tokens": null,
26864
+ "max_completion_tokens": 262144,
26763
26865
  "is_moderated": false
26764
26866
  },
26765
26867
  "per_request_limits": null,
@@ -26810,8 +26912,8 @@
26810
26912
  "pricing": {
26811
26913
  "text_tokens": {
26812
26914
  "standard": {
26813
- "input_per_million": 0.077968332,
26814
- "output_per_million": 0.31202496
26915
+ "input_per_million": 0.09999999999999999,
26916
+ "output_per_million": 0.39
26815
26917
  }
26816
26918
  }
26817
26919
  },
@@ -26847,6 +26949,7 @@
26847
26949
  "response_format",
26848
26950
  "seed",
26849
26951
  "stop",
26952
+ "structured_outputs",
26850
26953
  "temperature",
26851
26954
  "tool_choice",
26852
26955
  "tools",
@@ -26948,8 +27051,8 @@
26948
27051
  "pricing": {
26949
27052
  "text_tokens": {
26950
27053
  "standard": {
26951
- "input_per_million": 0.01999188,
26952
- "output_per_million": 0.0800064
27054
+ "input_per_million": 0.06,
27055
+ "output_per_million": 0.22
26953
27056
  }
26954
27057
  }
26955
27058
  },
@@ -27021,8 +27124,8 @@
27021
27124
  "pricing": {
27022
27125
  "text_tokens": {
27023
27126
  "standard": {
27024
- "input_per_million": 0.051830799999999996,
27025
- "output_per_million": 0.207424
27127
+ "input_per_million": 0.07,
27128
+ "output_per_million": 0.28
27026
27129
  }
27027
27130
  }
27028
27131
  },
@@ -27056,6 +27159,7 @@
27056
27159
  "response_format",
27057
27160
  "seed",
27058
27161
  "stop",
27162
+ "structured_outputs",
27059
27163
  "temperature",
27060
27164
  "tool_choice",
27061
27165
  "tools",
@@ -27091,8 +27195,8 @@
27091
27195
  "pricing": {
27092
27196
  "text_tokens": {
27093
27197
  "standard": {
27094
- "input_per_million": 0.0713,
27095
- "output_per_million": 0.2852
27198
+ "input_per_million": 0.08,
27199
+ "output_per_million": 0.29
27096
27200
  }
27097
27201
  }
27098
27202
  },
@@ -27128,6 +27232,7 @@
27128
27232
  "response_format",
27129
27233
  "seed",
27130
27234
  "stop",
27235
+ "structured_outputs",
27131
27236
  "temperature",
27132
27237
  "tool_choice",
27133
27238
  "tools",
@@ -27223,8 +27328,8 @@
27223
27328
  "pricing": {
27224
27329
  "text_tokens": {
27225
27330
  "standard": {
27226
- "input_per_million": 0.017992691999999998,
27227
- "output_per_million": 0.07200576
27331
+ "input_per_million": 0.03,
27332
+ "output_per_million": 0.13
27228
27333
  }
27229
27334
  }
27230
27335
  },
@@ -27480,8 +27585,8 @@
27480
27585
  "pricing": {
27481
27586
  "text_tokens": {
27482
27587
  "standard": {
27483
- "input_per_million": 0.19999999999999998,
27484
- "output_per_million": 0.7999999999999999
27588
+ "input_per_million": 0.22,
27589
+ "output_per_million": 0.95
27485
27590
  }
27486
27591
  }
27487
27592
  },
@@ -27551,8 +27656,8 @@
27551
27656
  "pricing": {
27552
27657
  "text_tokens": {
27553
27658
  "standard": {
27554
- "input_per_million": 0.051830799999999996,
27555
- "output_per_million": 0.207424
27659
+ "input_per_million": 0.07,
27660
+ "output_per_million": 0.28
27556
27661
  }
27557
27662
  }
27558
27663
  },
@@ -27586,6 +27691,7 @@
27586
27691
  "response_format",
27587
27692
  "seed",
27588
27693
  "stop",
27694
+ "structured_outputs",
27589
27695
  "temperature",
27590
27696
  "tool_choice",
27591
27697
  "tools",
@@ -27595,6 +27701,131 @@
27595
27701
  ]
27596
27702
  }
27597
27703
  },
27704
+ {
27705
+ "id": "qwen/qwen3-coder-flash",
27706
+ "name": "Qwen: Qwen3 Coder Flash",
27707
+ "provider": "openrouter",
27708
+ "family": "qwen",
27709
+ "created_at": "2025-09-17 15:25:36 +0200",
27710
+ "context_window": 128000,
27711
+ "max_output_tokens": 65536,
27712
+ "knowledge_cutoff": null,
27713
+ "modalities": {
27714
+ "input": [
27715
+ "text"
27716
+ ],
27717
+ "output": [
27718
+ "text"
27719
+ ]
27720
+ },
27721
+ "capabilities": [
27722
+ "streaming",
27723
+ "function_calling",
27724
+ "structured_output"
27725
+ ],
27726
+ "pricing": {
27727
+ "text_tokens": {
27728
+ "standard": {
27729
+ "input_per_million": 0.3,
27730
+ "output_per_million": 1.5,
27731
+ "cached_input_per_million": 0.08
27732
+ }
27733
+ }
27734
+ },
27735
+ "metadata": {
27736
+ "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.",
27737
+ "architecture": {
27738
+ "modality": "text->text",
27739
+ "input_modalities": [
27740
+ "text"
27741
+ ],
27742
+ "output_modalities": [
27743
+ "text"
27744
+ ],
27745
+ "tokenizer": "Qwen3",
27746
+ "instruct_type": null
27747
+ },
27748
+ "top_provider": {
27749
+ "context_length": 128000,
27750
+ "max_completion_tokens": 65536,
27751
+ "is_moderated": false
27752
+ },
27753
+ "per_request_limits": null,
27754
+ "supported_parameters": [
27755
+ "max_tokens",
27756
+ "presence_penalty",
27757
+ "response_format",
27758
+ "seed",
27759
+ "temperature",
27760
+ "tool_choice",
27761
+ "tools",
27762
+ "top_p"
27763
+ ]
27764
+ }
27765
+ },
27766
+ {
27767
+ "id": "qwen/qwen3-coder-plus",
27768
+ "name": "Qwen: Qwen3 Coder Plus",
27769
+ "provider": "openrouter",
27770
+ "family": "qwen",
27771
+ "created_at": "2025-09-17 15:19:54 +0200",
27772
+ "context_window": 128000,
27773
+ "max_output_tokens": 65536,
27774
+ "knowledge_cutoff": null,
27775
+ "modalities": {
27776
+ "input": [
27777
+ "text"
27778
+ ],
27779
+ "output": [
27780
+ "text"
27781
+ ]
27782
+ },
27783
+ "capabilities": [
27784
+ "streaming",
27785
+ "function_calling",
27786
+ "structured_output"
27787
+ ],
27788
+ "pricing": {
27789
+ "text_tokens": {
27790
+ "standard": {
27791
+ "input_per_million": 1.0,
27792
+ "output_per_million": 5.0,
27793
+ "cached_input_per_million": 0.09999999999999999
27794
+ }
27795
+ }
27796
+ },
27797
+ "metadata": {
27798
+ "description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.",
27799
+ "architecture": {
27800
+ "modality": "text->text",
27801
+ "input_modalities": [
27802
+ "text"
27803
+ ],
27804
+ "output_modalities": [
27805
+ "text"
27806
+ ],
27807
+ "tokenizer": "Qwen3",
27808
+ "instruct_type": null
27809
+ },
27810
+ "top_provider": {
27811
+ "context_length": 128000,
27812
+ "max_completion_tokens": 65536,
27813
+ "is_moderated": false
27814
+ },
27815
+ "per_request_limits": null,
27816
+ "supported_parameters": [
27817
+ "max_tokens",
27818
+ "presence_penalty",
27819
+ "response_format",
27820
+ "seed",
27821
+ "structured_outputs",
27822
+ "temperature",
27823
+ "tool_choice",
27824
+ "tools",
27825
+ "top_p"
27826
+ ]
27827
+ }
27828
+ },
27598
27829
  {
27599
27830
  "id": "qwen/qwen3-coder:free",
27600
27831
  "name": "Qwen: Qwen3 Coder 480B A35B (free)",
@@ -27719,12 +27950,12 @@
27719
27950
  }
27720
27951
  },
27721
27952
  {
27722
- "id": "qwen/qwq-32b",
27723
- "name": "Qwen: QwQ 32B",
27953
+ "id": "qwen/qwen3-next-80b-a3b-instruct",
27954
+ "name": "Qwen: Qwen3 Next 80B A3B Instruct",
27724
27955
  "provider": "openrouter",
27725
27956
  "family": "qwen",
27726
- "created_at": "2025-03-05 22:06:54 +0100",
27727
- "context_window": 32768,
27957
+ "created_at": "2025-09-11 19:36:53 +0200",
27958
+ "context_window": 262144,
27728
27959
  "max_output_tokens": null,
27729
27960
  "knowledge_cutoff": null,
27730
27961
  "modalities": {
@@ -27744,13 +27975,13 @@
27744
27975
  "pricing": {
27745
27976
  "text_tokens": {
27746
27977
  "standard": {
27747
- "input_per_million": 0.15,
27748
- "output_per_million": 0.39999999999999997
27978
+ "input_per_million": 0.09999999999999999,
27979
+ "output_per_million": 0.7999999999999999
27749
27980
  }
27750
27981
  }
27751
27982
  },
27752
27983
  "metadata": {
27753
- "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
27984
+ "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.",
27754
27985
  "architecture": {
27755
27986
  "modality": "text->text",
27756
27987
  "input_modalities": [
@@ -27759,24 +27990,22 @@
27759
27990
  "output_modalities": [
27760
27991
  "text"
27761
27992
  ],
27762
- "tokenizer": "Qwen",
27763
- "instruct_type": "qwq"
27993
+ "tokenizer": "Qwen3",
27994
+ "instruct_type": null
27764
27995
  },
27765
27996
  "top_provider": {
27766
- "context_length": 32768,
27997
+ "context_length": 262144,
27767
27998
  "max_completion_tokens": null,
27768
27999
  "is_moderated": false
27769
28000
  },
27770
28001
  "per_request_limits": null,
27771
28002
  "supported_parameters": [
27772
28003
  "frequency_penalty",
27773
- "include_reasoning",
27774
28004
  "logit_bias",
27775
28005
  "logprobs",
27776
28006
  "max_tokens",
27777
28007
  "min_p",
27778
28008
  "presence_penalty",
27779
- "reasoning",
27780
28009
  "repetition_penalty",
27781
28010
  "response_format",
27782
28011
  "seed",
@@ -27792,12 +28021,12 @@
27792
28021
  }
27793
28022
  },
27794
28023
  {
27795
- "id": "qwen/qwq-32b-preview",
27796
- "name": "Qwen: QwQ 32B Preview",
28024
+ "id": "qwen/qwen3-next-80b-a3b-thinking",
28025
+ "name": "Qwen: Qwen3 Next 80B A3B Thinking",
27797
28026
  "provider": "openrouter",
27798
28027
  "family": "qwen",
27799
- "created_at": "2024-11-28 01:42:21 +0100",
27800
- "context_window": 32768,
28028
+ "created_at": "2025-09-11 19:38:04 +0200",
28029
+ "context_window": 262144,
27801
28030
  "max_output_tokens": null,
27802
28031
  "knowledge_cutoff": null,
27803
28032
  "modalities": {
@@ -27810,18 +28039,20 @@
27810
28039
  },
27811
28040
  "capabilities": [
27812
28041
  "streaming",
28042
+ "function_calling",
28043
+ "structured_output",
27813
28044
  "predicted_outputs"
27814
28045
  ],
27815
28046
  "pricing": {
27816
28047
  "text_tokens": {
27817
28048
  "standard": {
27818
- "input_per_million": 0.19999999999999998,
27819
- "output_per_million": 0.19999999999999998
28049
+ "input_per_million": 0.09999999999999999,
28050
+ "output_per_million": 0.7999999999999999
27820
28051
  }
27821
28052
  }
27822
28053
  },
27823
28054
  "metadata": {
27824
- "description": "QwQ-32B-Preview is an experimental research model focused on AI reasoning capabilities developed by the Qwen Team. As a preview release, it demonstrates promising analytical abilities while having several important limitations:\n\n1. **Language Mixing and Code-Switching**: The model may mix languages or switch between them unexpectedly, affecting response clarity.\n2. **Recursive Reasoning Loops**: The model may enter circular reasoning patterns, leading to lengthy responses without a conclusive answer.\n3. **Safety and Ethical Considerations**: The model requires enhanced safety measures to ensure reliable and secure performance, and users should exercise caution when deploying it.\n4. **Performance and Benchmark Limitations**: The model excels in math and coding but has room for improvement in other areas, such as common sense reasoning and nuanced language understanding.\n\n",
28055
+ "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.",
27825
28056
  "architecture": {
27826
28057
  "modality": "text->text",
27827
28058
  "input_modalities": [
@@ -27830,26 +28061,32 @@
27830
28061
  "output_modalities": [
27831
28062
  "text"
27832
28063
  ],
27833
- "tokenizer": "Qwen",
27834
- "instruct_type": "deepseek-r1"
28064
+ "tokenizer": "Qwen3",
28065
+ "instruct_type": null
27835
28066
  },
27836
28067
  "top_provider": {
27837
- "context_length": 32768,
28068
+ "context_length": 262144,
27838
28069
  "max_completion_tokens": null,
27839
28070
  "is_moderated": false
27840
28071
  },
27841
28072
  "per_request_limits": null,
27842
28073
  "supported_parameters": [
27843
28074
  "frequency_penalty",
28075
+ "include_reasoning",
27844
28076
  "logit_bias",
27845
28077
  "logprobs",
27846
28078
  "max_tokens",
27847
28079
  "min_p",
27848
28080
  "presence_penalty",
28081
+ "reasoning",
27849
28082
  "repetition_penalty",
28083
+ "response_format",
27850
28084
  "seed",
27851
28085
  "stop",
28086
+ "structured_outputs",
27852
28087
  "temperature",
28088
+ "tool_choice",
28089
+ "tools",
27853
28090
  "top_k",
27854
28091
  "top_logprobs",
27855
28092
  "top_p"
@@ -27857,8 +28094,8 @@
27857
28094
  }
27858
28095
  },
27859
28096
  {
27860
- "id": "qwen/qwq-32b:free",
27861
- "name": "Qwen: QwQ 32B (free)",
28097
+ "id": "qwen/qwq-32b",
28098
+ "name": "Qwen: QwQ 32B",
27862
28099
  "provider": "openrouter",
27863
28100
  "family": "qwen",
27864
28101
  "created_at": "2025-03-05 22:06:54 +0100",
@@ -27875,9 +28112,18 @@
27875
28112
  },
27876
28113
  "capabilities": [
27877
28114
  "streaming",
27878
- "structured_output"
28115
+ "function_calling",
28116
+ "structured_output",
28117
+ "predicted_outputs"
27879
28118
  ],
27880
- "pricing": {},
28119
+ "pricing": {
28120
+ "text_tokens": {
28121
+ "standard": {
28122
+ "input_per_million": 0.15,
28123
+ "output_per_million": 0.39999999999999997
28124
+ }
28125
+ }
28126
+ },
27881
28127
  "metadata": {
27882
28128
  "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
27883
28129
  "architecture": {
@@ -27899,24 +28145,34 @@
27899
28145
  "per_request_limits": null,
27900
28146
  "supported_parameters": [
27901
28147
  "frequency_penalty",
28148
+ "include_reasoning",
28149
+ "logit_bias",
28150
+ "logprobs",
27902
28151
  "max_tokens",
28152
+ "min_p",
27903
28153
  "presence_penalty",
28154
+ "reasoning",
28155
+ "repetition_penalty",
27904
28156
  "response_format",
28157
+ "seed",
27905
28158
  "stop",
27906
28159
  "structured_outputs",
27907
28160
  "temperature",
28161
+ "tool_choice",
28162
+ "tools",
27908
28163
  "top_k",
28164
+ "top_logprobs",
27909
28165
  "top_p"
27910
28166
  ]
27911
28167
  }
27912
28168
  },
27913
28169
  {
27914
- "id": "raifle/sorcererlm-8x22b",
27915
- "name": "SorcererLM 8x22B",
28170
+ "id": "qwen/qwq-32b-preview",
28171
+ "name": "Qwen: QwQ 32B Preview",
27916
28172
  "provider": "openrouter",
27917
- "family": "raifle",
27918
- "created_at": "2024-11-08 23:31:23 +0100",
27919
- "context_window": 16000,
28173
+ "family": "qwen",
28174
+ "created_at": "2024-11-28 01:42:21 +0100",
28175
+ "context_window": 32768,
27920
28176
  "max_output_tokens": null,
27921
28177
  "knowledge_cutoff": null,
27922
28178
  "modalities": {
@@ -27934,13 +28190,13 @@
27934
28190
  "pricing": {
27935
28191
  "text_tokens": {
27936
28192
  "standard": {
27937
- "input_per_million": 4.5,
27938
- "output_per_million": 4.5
28193
+ "input_per_million": 0.19999999999999998,
28194
+ "output_per_million": 0.19999999999999998
27939
28195
  }
27940
28196
  }
27941
28197
  },
27942
28198
  "metadata": {
27943
- "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling",
28199
+ "description": "QwQ-32B-Preview is an experimental research model focused on AI reasoning capabilities developed by the Qwen Team. As a preview release, it demonstrates promising analytical abilities while having several important limitations:\n\n1. **Language Mixing and Code-Switching**: The model may mix languages or switch between them unexpectedly, affecting response clarity.\n2. **Recursive Reasoning Loops**: The model may enter circular reasoning patterns, leading to lengthy responses without a conclusive answer.\n3. **Safety and Ethical Considerations**: The model requires enhanced safety measures to ensure reliable and secure performance, and users should exercise caution when deploying it.\n4. **Performance and Benchmark Limitations**: The model excels in math and coding but has room for improvement in other areas, such as common sense reasoning and nuanced language understanding.\n\n",
27944
28200
  "architecture": {
27945
28201
  "modality": "text->text",
27946
28202
  "input_modalities": [
@@ -27949,11 +28205,11 @@
27949
28205
  "output_modalities": [
27950
28206
  "text"
27951
28207
  ],
27952
- "tokenizer": "Mistral",
27953
- "instruct_type": "vicuna"
28208
+ "tokenizer": "Qwen",
28209
+ "instruct_type": "deepseek-r1"
27954
28210
  },
27955
28211
  "top_provider": {
27956
- "context_length": 16000,
28212
+ "context_length": 32768,
27957
28213
  "max_completion_tokens": null,
27958
28214
  "is_moderated": false
27959
28215
  },
@@ -27961,6 +28217,7 @@
27961
28217
  "supported_parameters": [
27962
28218
  "frequency_penalty",
27963
28219
  "logit_bias",
28220
+ "logprobs",
27964
28221
  "max_tokens",
27965
28222
  "min_p",
27966
28223
  "presence_penalty",
@@ -27969,16 +28226,17 @@
27969
28226
  "stop",
27970
28227
  "temperature",
27971
28228
  "top_k",
28229
+ "top_logprobs",
27972
28230
  "top_p"
27973
28231
  ]
27974
28232
  }
27975
28233
  },
27976
28234
  {
27977
- "id": "rekaai/reka-flash-3:free",
27978
- "name": "Reka: Flash 3 (free)",
28235
+ "id": "qwen/qwq-32b:free",
28236
+ "name": "Qwen: QwQ 32B (free)",
27979
28237
  "provider": "openrouter",
27980
- "family": "rekaai",
27981
- "created_at": "2025-03-12 21:53:33 +0100",
28238
+ "family": "qwen",
28239
+ "created_at": "2025-03-05 22:06:54 +0100",
27982
28240
  "context_window": 32768,
27983
28241
  "max_output_tokens": null,
27984
28242
  "knowledge_cutoff": null,
@@ -27992,11 +28250,11 @@
27992
28250
  },
27993
28251
  "capabilities": [
27994
28252
  "streaming",
27995
- "predicted_outputs"
28253
+ "structured_output"
27996
28254
  ],
27997
28255
  "pricing": {},
27998
28256
  "metadata": {
27999
- "description": "Reka Flash 3 is a general-purpose, instruction-tuned large language model with 21 billion parameters, developed by Reka. It excels at general chat, coding tasks, instruction-following, and function calling. Featuring a 32K context length and optimized through reinforcement learning (RLOO), it provides competitive performance comparable to proprietary models within a smaller parameter footprint. Ideal for low-latency, local, or on-device deployments, Reka Flash 3 is compact, supports efficient quantization (down to 11GB at 4-bit precision), and employs explicit reasoning tags (\"<reasoning>\") to indicate its internal thought process.\n\nReka Flash 3 is primarily an English model with limited multilingual understanding capabilities. The model weights are released under the Apache 2.0 license.",
28257
+ "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
28000
28258
  "architecture": {
28001
28259
  "modality": "text->text",
28002
28260
  "input_modalities": [
@@ -28005,8 +28263,8 @@
28005
28263
  "output_modalities": [
28006
28264
  "text"
28007
28265
  ],
28008
- "tokenizer": "Other",
28009
- "instruct_type": null
28266
+ "tokenizer": "Qwen",
28267
+ "instruct_type": "qwq"
28010
28268
  },
28011
28269
  "top_provider": {
28012
28270
  "context_length": 32768,
@@ -28016,19 +28274,76 @@
28016
28274
  "per_request_limits": null,
28017
28275
  "supported_parameters": [
28018
28276
  "frequency_penalty",
28019
- "include_reasoning",
28277
+ "max_tokens",
28278
+ "presence_penalty",
28279
+ "response_format",
28280
+ "stop",
28281
+ "structured_outputs",
28282
+ "temperature",
28283
+ "top_k",
28284
+ "top_p"
28285
+ ]
28286
+ }
28287
+ },
28288
+ {
28289
+ "id": "raifle/sorcererlm-8x22b",
28290
+ "name": "SorcererLM 8x22B",
28291
+ "provider": "openrouter",
28292
+ "family": "raifle",
28293
+ "created_at": "2024-11-08 23:31:23 +0100",
28294
+ "context_window": 16000,
28295
+ "max_output_tokens": null,
28296
+ "knowledge_cutoff": null,
28297
+ "modalities": {
28298
+ "input": [
28299
+ "text"
28300
+ ],
28301
+ "output": [
28302
+ "text"
28303
+ ]
28304
+ },
28305
+ "capabilities": [
28306
+ "streaming",
28307
+ "predicted_outputs"
28308
+ ],
28309
+ "pricing": {
28310
+ "text_tokens": {
28311
+ "standard": {
28312
+ "input_per_million": 4.5,
28313
+ "output_per_million": 4.5
28314
+ }
28315
+ }
28316
+ },
28317
+ "metadata": {
28318
+ "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling",
28319
+ "architecture": {
28320
+ "modality": "text->text",
28321
+ "input_modalities": [
28322
+ "text"
28323
+ ],
28324
+ "output_modalities": [
28325
+ "text"
28326
+ ],
28327
+ "tokenizer": "Mistral",
28328
+ "instruct_type": "vicuna"
28329
+ },
28330
+ "top_provider": {
28331
+ "context_length": 16000,
28332
+ "max_completion_tokens": null,
28333
+ "is_moderated": false
28334
+ },
28335
+ "per_request_limits": null,
28336
+ "supported_parameters": [
28337
+ "frequency_penalty",
28020
28338
  "logit_bias",
28021
- "logprobs",
28022
28339
  "max_tokens",
28023
28340
  "min_p",
28024
28341
  "presence_penalty",
28025
- "reasoning",
28026
28342
  "repetition_penalty",
28027
28343
  "seed",
28028
28344
  "stop",
28029
28345
  "temperature",
28030
28346
  "top_k",
28031
- "top_logprobs",
28032
28347
  "top_p"
28033
28348
  ]
28034
28349
  }
@@ -28121,7 +28436,7 @@
28121
28436
  "pricing": {
28122
28437
  "text_tokens": {
28123
28438
  "standard": {
28124
- "input_per_million": 0.02,
28439
+ "input_per_million": 0.04,
28125
28440
  "output_per_million": 0.049999999999999996
28126
28441
  }
28127
28442
  }
@@ -28315,8 +28630,8 @@
28315
28630
  "pricing": {
28316
28631
  "text_tokens": {
28317
28632
  "standard": {
28318
- "input_per_million": 0.01999188,
28319
- "output_per_million": 0.0800064
28633
+ "input_per_million": 0.04,
28634
+ "output_per_million": 0.14
28320
28635
  }
28321
28636
  }
28322
28637
  },
@@ -28361,67 +28676,9 @@
28361
28676
  "name": "Shisa AI: Shisa V2 Llama 3.3 70B (free)",
28362
28677
  "provider": "openrouter",
28363
28678
  "family": "shisa-ai",
28364
- "created_at": "2025-04-16 00:07:38 +0200",
28365
- "context_window": 32768,
28366
- "max_output_tokens": null,
28367
- "knowledge_cutoff": null,
28368
- "modalities": {
28369
- "input": [
28370
- "text"
28371
- ],
28372
- "output": [
28373
- "text"
28374
- ]
28375
- },
28376
- "capabilities": [
28377
- "streaming",
28378
- "predicted_outputs"
28379
- ],
28380
- "pricing": {},
28381
- "metadata": {
28382
- "description": "Shisa V2 Llama 3.3 70B is a bilingual Japanese-English chat model fine-tuned by Shisa.AI on Meta’s Llama-3.3-70B-Instruct base. It prioritizes Japanese language performance while retaining strong English capabilities. The model was optimized entirely through post-training, using a refined mix of supervised fine-tuning (SFT) and DPO datasets including regenerated ShareGPT-style data, translation tasks, roleplaying conversations, and instruction-following prompts. Unlike earlier Shisa releases, this version avoids tokenizer modifications or extended pretraining.\n\nShisa V2 70B achieves leading Japanese task performance across a wide range of custom and public benchmarks, including JA MT Bench, ELYZA 100, and Rakuda. It supports a 128K token context length and integrates smoothly with inference frameworks like vLLM and SGLang. While it inherits safety characteristics from its base model, no additional alignment was applied. The model is intended for high-performance bilingual chat, instruction following, and translation tasks across JA/EN.",
28383
- "architecture": {
28384
- "modality": "text->text",
28385
- "input_modalities": [
28386
- "text"
28387
- ],
28388
- "output_modalities": [
28389
- "text"
28390
- ],
28391
- "tokenizer": "Llama3",
28392
- "instruct_type": null
28393
- },
28394
- "top_provider": {
28395
- "context_length": 32768,
28396
- "max_completion_tokens": null,
28397
- "is_moderated": false
28398
- },
28399
- "per_request_limits": null,
28400
- "supported_parameters": [
28401
- "frequency_penalty",
28402
- "logit_bias",
28403
- "logprobs",
28404
- "max_tokens",
28405
- "min_p",
28406
- "presence_penalty",
28407
- "repetition_penalty",
28408
- "seed",
28409
- "stop",
28410
- "temperature",
28411
- "top_k",
28412
- "top_logprobs",
28413
- "top_p"
28414
- ]
28415
- }
28416
- },
28417
- {
28418
- "id": "sophosympatheia/midnight-rose-70b",
28419
- "name": "Midnight Rose 70B",
28420
- "provider": "openrouter",
28421
- "family": "sophosympatheia",
28422
- "created_at": "2024-03-22 01:00:00 +0100",
28423
- "context_window": 4096,
28424
- "max_output_tokens": 2048,
28679
+ "created_at": "2025-04-16 00:07:38 +0200",
28680
+ "context_window": 32768,
28681
+ "max_output_tokens": null,
28425
28682
  "knowledge_cutoff": null,
28426
28683
  "modalities": {
28427
28684
  "input": [
@@ -28435,16 +28692,9 @@
28435
28692
  "streaming",
28436
28693
  "predicted_outputs"
28437
28694
  ],
28438
- "pricing": {
28439
- "text_tokens": {
28440
- "standard": {
28441
- "input_per_million": 0.7999999999999999,
28442
- "output_per_million": 0.7999999999999999
28443
- }
28444
- }
28445
- },
28695
+ "pricing": {},
28446
28696
  "metadata": {
28447
- "description": "A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.\n\nDescending from earlier versions of Midnight Rose and [Wizard Tulu Dolphin 70B](https://huggingface.co/sophosympatheia/Wizard-Tulu-Dolphin-70B-v1.0), it inherits the best qualities of each.",
28697
+ "description": "Shisa V2 Llama 3.3 70B is a bilingual Japanese-English chat model fine-tuned by Shisa.AI on Meta’s Llama-3.3-70B-Instruct base. It prioritizes Japanese language performance while retaining strong English capabilities. The model was optimized entirely through post-training, using a refined mix of supervised fine-tuning (SFT) and DPO datasets including regenerated ShareGPT-style data, translation tasks, roleplaying conversations, and instruction-following prompts. Unlike earlier Shisa releases, this version avoids tokenizer modifications or extended pretraining.\n\nShisa V2 70B achieves leading Japanese task performance across a wide range of custom and public benchmarks, including JA MT Bench, ELYZA 100, and Rakuda. It supports a 128K token context length and integrates smoothly with inference frameworks like vLLM and SGLang. While it inherits safety characteristics from its base model, no additional alignment was applied. The model is intended for high-performance bilingual chat, instruction following, and translation tasks across JA/EN.",
28448
28698
  "architecture": {
28449
28699
  "modality": "text->text",
28450
28700
  "input_modalities": [
@@ -28453,18 +28703,19 @@
28453
28703
  "output_modalities": [
28454
28704
  "text"
28455
28705
  ],
28456
- "tokenizer": "Llama2",
28457
- "instruct_type": "airoboros"
28706
+ "tokenizer": "Llama3",
28707
+ "instruct_type": null
28458
28708
  },
28459
28709
  "top_provider": {
28460
- "context_length": 4096,
28461
- "max_completion_tokens": 2048,
28710
+ "context_length": 32768,
28711
+ "max_completion_tokens": null,
28462
28712
  "is_moderated": false
28463
28713
  },
28464
28714
  "per_request_limits": null,
28465
28715
  "supported_parameters": [
28466
28716
  "frequency_penalty",
28467
28717
  "logit_bias",
28718
+ "logprobs",
28468
28719
  "max_tokens",
28469
28720
  "min_p",
28470
28721
  "presence_penalty",
@@ -28473,6 +28724,7 @@
28473
28724
  "stop",
28474
28725
  "temperature",
28475
28726
  "top_k",
28727
+ "top_logprobs",
28476
28728
  "top_p"
28477
28729
  ]
28478
28730
  }
@@ -28533,6 +28785,7 @@
28533
28785
  "include_reasoning",
28534
28786
  "reasoning",
28535
28787
  "response_format",
28788
+ "structured_outputs",
28536
28789
  "temperature",
28537
28790
  "tool_choice",
28538
28791
  "tools",
@@ -28662,6 +28915,7 @@
28662
28915
  "response_format",
28663
28916
  "seed",
28664
28917
  "stop",
28918
+ "structured_outputs",
28665
28919
  "temperature",
28666
28920
  "top_k",
28667
28921
  "top_logprobs",
@@ -28911,7 +29165,6 @@
28911
29165
  "supported_parameters": [
28912
29166
  "frequency_penalty",
28913
29167
  "logit_bias",
28914
- "logprobs",
28915
29168
  "max_tokens",
28916
29169
  "min_p",
28917
29170
  "presence_penalty",
@@ -28952,8 +29205,8 @@
28952
29205
  "pricing": {
28953
29206
  "text_tokens": {
28954
29207
  "standard": {
28955
- "input_per_million": 0.0481286,
28956
- "output_per_million": 0.192608
29208
+ "input_per_million": 0.04,
29209
+ "output_per_million": 0.16
28957
29210
  }
28958
29211
  }
28959
29212
  },
@@ -29013,8 +29266,7 @@
29013
29266
  "capabilities": [
29014
29267
  "streaming",
29015
29268
  "function_calling",
29016
- "structured_output",
29017
- "predicted_outputs"
29269
+ "structured_output"
29018
29270
  ],
29019
29271
  "pricing": {
29020
29272
  "text_tokens": {
@@ -29045,83 +29297,14 @@
29045
29297
  "per_request_limits": null,
29046
29298
  "supported_parameters": [
29047
29299
  "frequency_penalty",
29048
- "logit_bias",
29049
- "logprobs",
29050
29300
  "max_tokens",
29051
- "min_p",
29052
29301
  "presence_penalty",
29053
- "repetition_penalty",
29054
29302
  "response_format",
29055
- "seed",
29056
29303
  "stop",
29057
29304
  "structured_outputs",
29058
29305
  "temperature",
29059
29306
  "tool_choice",
29060
29307
  "tools",
29061
- "top_k",
29062
- "top_p"
29063
- ]
29064
- }
29065
- },
29066
- {
29067
- "id": "thudm/glm-4-32b",
29068
- "name": "THUDM: GLM 4 32B",
29069
- "provider": "openrouter",
29070
- "family": "thudm",
29071
- "created_at": "2025-04-17 22:15:15 +0200",
29072
- "context_window": 32000,
29073
- "max_output_tokens": 32000,
29074
- "knowledge_cutoff": null,
29075
- "modalities": {
29076
- "input": [
29077
- "text"
29078
- ],
29079
- "output": [
29080
- "text"
29081
- ]
29082
- },
29083
- "capabilities": [
29084
- "streaming",
29085
- "predicted_outputs"
29086
- ],
29087
- "pricing": {
29088
- "text_tokens": {
29089
- "standard": {
29090
- "input_per_million": 0.55,
29091
- "output_per_million": 1.66
29092
- }
29093
- }
29094
- },
29095
- "metadata": {
29096
- "description": "GLM-4-32B-0414 is a 32B bilingual (Chinese-English) open-weight language model optimized for code generation, function calling, and agent-style tasks. Pretrained on 15T of high-quality and reasoning-heavy data, it was further refined using human preference alignment, rejection sampling, and reinforcement learning. The model excels in complex reasoning, artifact generation, and structured output tasks, achieving performance comparable to GPT-4o and DeepSeek-V3-0324 across several benchmarks.",
29097
- "architecture": {
29098
- "modality": "text->text",
29099
- "input_modalities": [
29100
- "text"
29101
- ],
29102
- "output_modalities": [
29103
- "text"
29104
- ],
29105
- "tokenizer": "Other",
29106
- "instruct_type": null
29107
- },
29108
- "top_provider": {
29109
- "context_length": 32000,
29110
- "max_completion_tokens": 32000,
29111
- "is_moderated": false
29112
- },
29113
- "per_request_limits": null,
29114
- "supported_parameters": [
29115
- "frequency_penalty",
29116
- "logit_bias",
29117
- "max_tokens",
29118
- "min_p",
29119
- "presence_penalty",
29120
- "repetition_penalty",
29121
- "seed",
29122
- "stop",
29123
- "temperature",
29124
- "top_k",
29125
29308
  "top_p"
29126
29309
  ]
29127
29310
  }
@@ -29217,8 +29400,8 @@
29217
29400
  "pricing": {
29218
29401
  "text_tokens": {
29219
29402
  "standard": {
29220
- "input_per_million": 0.01999188,
29221
- "output_per_million": 0.0800064
29403
+ "input_per_million": 0.04,
29404
+ "output_per_million": 0.14
29222
29405
  }
29223
29406
  }
29224
29407
  },
@@ -29284,8 +29467,8 @@
29284
29467
  "pricing": {
29285
29468
  "text_tokens": {
29286
29469
  "standard": {
29287
- "input_per_million": 0.1999188,
29288
- "output_per_million": 0.800064
29470
+ "input_per_million": 0.24999987999999998,
29471
+ "output_per_million": 0.999999888
29289
29472
  }
29290
29473
  }
29291
29474
  },
@@ -29515,11 +29698,11 @@
29515
29698
  }
29516
29699
  },
29517
29700
  {
29518
- "id": "x-ai/grok-2-1212",
29519
- "name": "xAI: Grok 2 1212",
29701
+ "id": "x-ai/grok-3",
29702
+ "name": "xAI: Grok 3",
29520
29703
  "provider": "openrouter",
29521
29704
  "family": "x-ai",
29522
- "created_at": "2024-12-15 04:20:14 +0100",
29705
+ "created_at": "2025-06-10 21:15:08 +0200",
29523
29706
  "context_window": 131072,
29524
29707
  "max_output_tokens": null,
29525
29708
  "knowledge_cutoff": null,
@@ -29539,13 +29722,14 @@
29539
29722
  "pricing": {
29540
29723
  "text_tokens": {
29541
29724
  "standard": {
29542
- "input_per_million": 2.0,
29543
- "output_per_million": 10.0
29725
+ "input_per_million": 3.0,
29726
+ "output_per_million": 15.0,
29727
+ "cached_input_per_million": 0.75
29544
29728
  }
29545
29729
  }
29546
29730
  },
29547
29731
  "metadata": {
29548
- "description": "Grok 2 1212 introduces significant enhancements to accuracy, instruction adherence, and multilingual support, making it a powerful and flexible choice for developers seeking a highly steerable, intelligent model.",
29732
+ "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n",
29549
29733
  "architecture": {
29550
29734
  "modality": "text->text",
29551
29735
  "input_modalities": [
@@ -29571,6 +29755,7 @@
29571
29755
  "response_format",
29572
29756
  "seed",
29573
29757
  "stop",
29758
+ "structured_outputs",
29574
29759
  "temperature",
29575
29760
  "tool_choice",
29576
29761
  "tools",
@@ -29580,75 +29765,11 @@
29580
29765
  }
29581
29766
  },
29582
29767
  {
29583
- "id": "x-ai/grok-2-vision-1212",
29584
- "name": "xAI: Grok 2 Vision 1212",
29585
- "provider": "openrouter",
29586
- "family": "x-ai",
29587
- "created_at": "2024-12-15 05:35:38 +0100",
29588
- "context_window": 32768,
29589
- "max_output_tokens": null,
29590
- "knowledge_cutoff": null,
29591
- "modalities": {
29592
- "input": [
29593
- "text",
29594
- "image"
29595
- ],
29596
- "output": [
29597
- "text"
29598
- ]
29599
- },
29600
- "capabilities": [
29601
- "streaming",
29602
- "structured_output"
29603
- ],
29604
- "pricing": {
29605
- "text_tokens": {
29606
- "standard": {
29607
- "input_per_million": 2.0,
29608
- "output_per_million": 10.0
29609
- }
29610
- }
29611
- },
29612
- "metadata": {
29613
- "description": "Grok 2 Vision 1212 advances image-based AI with stronger visual comprehension, refined instruction-following, and multilingual support. From object recognition to style analysis, it empowers developers to build more intuitive, visually aware applications. Its enhanced steerability and reasoning establish a robust foundation for next-generation image solutions.\n\nTo read more about this model, check out [xAI's announcement](https://x.ai/blog/grok-1212).",
29614
- "architecture": {
29615
- "modality": "text+image->text",
29616
- "input_modalities": [
29617
- "text",
29618
- "image"
29619
- ],
29620
- "output_modalities": [
29621
- "text"
29622
- ],
29623
- "tokenizer": "Grok",
29624
- "instruct_type": null
29625
- },
29626
- "top_provider": {
29627
- "context_length": 32768,
29628
- "max_completion_tokens": null,
29629
- "is_moderated": false
29630
- },
29631
- "per_request_limits": null,
29632
- "supported_parameters": [
29633
- "frequency_penalty",
29634
- "logprobs",
29635
- "max_tokens",
29636
- "presence_penalty",
29637
- "response_format",
29638
- "seed",
29639
- "stop",
29640
- "temperature",
29641
- "top_logprobs",
29642
- "top_p"
29643
- ]
29644
- }
29645
- },
29646
- {
29647
- "id": "x-ai/grok-3",
29648
- "name": "xAI: Grok 3",
29768
+ "id": "x-ai/grok-3-beta",
29769
+ "name": "xAI: Grok 3 Beta",
29649
29770
  "provider": "openrouter",
29650
29771
  "family": "x-ai",
29651
- "created_at": "2025-06-10 21:15:08 +0200",
29772
+ "created_at": "2025-04-10 01:07:48 +0200",
29652
29773
  "context_window": 131072,
29653
29774
  "max_output_tokens": null,
29654
29775
  "knowledge_cutoff": null,
@@ -29675,7 +29796,7 @@
29675
29796
  }
29676
29797
  },
29677
29798
  "metadata": {
29678
- "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n",
29799
+ "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
29679
29800
  "architecture": {
29680
29801
  "modality": "text->text",
29681
29802
  "input_modalities": [
@@ -29701,7 +29822,6 @@
29701
29822
  "response_format",
29702
29823
  "seed",
29703
29824
  "stop",
29704
- "structured_outputs",
29705
29825
  "temperature",
29706
29826
  "tool_choice",
29707
29827
  "tools",
@@ -29711,11 +29831,11 @@
29711
29831
  }
29712
29832
  },
29713
29833
  {
29714
- "id": "x-ai/grok-3-beta",
29715
- "name": "xAI: Grok 3 Beta",
29834
+ "id": "x-ai/grok-3-mini",
29835
+ "name": "xAI: Grok 3 Mini",
29716
29836
  "provider": "openrouter",
29717
29837
  "family": "x-ai",
29718
- "created_at": "2025-04-10 01:07:48 +0200",
29838
+ "created_at": "2025-06-10 21:20:45 +0200",
29719
29839
  "context_window": 131072,
29720
29840
  "max_output_tokens": null,
29721
29841
  "knowledge_cutoff": null,
@@ -29735,14 +29855,14 @@
29735
29855
  "pricing": {
29736
29856
  "text_tokens": {
29737
29857
  "standard": {
29738
- "input_per_million": 3.0,
29739
- "output_per_million": 15.0,
29740
- "cached_input_per_million": 0.75
29858
+ "input_per_million": 0.3,
29859
+ "output_per_million": 0.5,
29860
+ "cached_input_per_million": 0.075
29741
29861
  }
29742
29862
  }
29743
29863
  },
29744
29864
  "metadata": {
29745
- "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
29865
+ "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.",
29746
29866
  "architecture": {
29747
29867
  "modality": "text->text",
29748
29868
  "input_modalities": [
@@ -29761,13 +29881,14 @@
29761
29881
  },
29762
29882
  "per_request_limits": null,
29763
29883
  "supported_parameters": [
29764
- "frequency_penalty",
29884
+ "include_reasoning",
29765
29885
  "logprobs",
29766
29886
  "max_tokens",
29767
- "presence_penalty",
29887
+ "reasoning",
29768
29888
  "response_format",
29769
29889
  "seed",
29770
29890
  "stop",
29891
+ "structured_outputs",
29771
29892
  "temperature",
29772
29893
  "tool_choice",
29773
29894
  "tools",
@@ -29777,11 +29898,11 @@
29777
29898
  }
29778
29899
  },
29779
29900
  {
29780
- "id": "x-ai/grok-3-mini",
29781
- "name": "xAI: Grok 3 Mini",
29901
+ "id": "x-ai/grok-3-mini-beta",
29902
+ "name": "xAI: Grok 3 Mini Beta",
29782
29903
  "provider": "openrouter",
29783
29904
  "family": "x-ai",
29784
- "created_at": "2025-06-10 21:20:45 +0200",
29905
+ "created_at": "2025-04-10 01:09:55 +0200",
29785
29906
  "context_window": 131072,
29786
29907
  "max_output_tokens": null,
29787
29908
  "knowledge_cutoff": null,
@@ -29808,7 +29929,7 @@
29808
29929
  }
29809
29930
  },
29810
29931
  "metadata": {
29811
- "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.",
29932
+ "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
29812
29933
  "architecture": {
29813
29934
  "modality": "text->text",
29814
29935
  "input_modalities": [
@@ -29834,7 +29955,6 @@
29834
29955
  "response_format",
29835
29956
  "seed",
29836
29957
  "stop",
29837
- "structured_outputs",
29838
29958
  "temperature",
29839
29959
  "tool_choice",
29840
29960
  "tools",
@@ -29844,16 +29964,17 @@
29844
29964
  }
29845
29965
  },
29846
29966
  {
29847
- "id": "x-ai/grok-3-mini-beta",
29848
- "name": "xAI: Grok 3 Mini Beta",
29967
+ "id": "x-ai/grok-4",
29968
+ "name": "xAI: Grok 4",
29849
29969
  "provider": "openrouter",
29850
29970
  "family": "x-ai",
29851
- "created_at": "2025-04-10 01:09:55 +0200",
29852
- "context_window": 131072,
29971
+ "created_at": "2025-07-09 21:01:29 +0200",
29972
+ "context_window": 256000,
29853
29973
  "max_output_tokens": null,
29854
29974
  "knowledge_cutoff": null,
29855
29975
  "modalities": {
29856
29976
  "input": [
29977
+ "image",
29857
29978
  "text"
29858
29979
  ],
29859
29980
  "output": [
@@ -29868,17 +29989,18 @@
29868
29989
  "pricing": {
29869
29990
  "text_tokens": {
29870
29991
  "standard": {
29871
- "input_per_million": 0.3,
29872
- "output_per_million": 0.5,
29873
- "cached_input_per_million": 0.075
29992
+ "input_per_million": 3.0,
29993
+ "output_per_million": 15.0,
29994
+ "cached_input_per_million": 0.75
29874
29995
  }
29875
29996
  }
29876
29997
  },
29877
29998
  "metadata": {
29878
- "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
29999
+ "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)",
29879
30000
  "architecture": {
29880
- "modality": "text->text",
30001
+ "modality": "text+image->text",
29881
30002
  "input_modalities": [
30003
+ "image",
29882
30004
  "text"
29883
30005
  ],
29884
30006
  "output_modalities": [
@@ -29888,7 +30010,7 @@
29888
30010
  "instruct_type": null
29889
30011
  },
29890
30012
  "top_provider": {
29891
- "context_length": 131072,
30013
+ "context_length": 256000,
29892
30014
  "max_completion_tokens": null,
29893
30015
  "is_moderated": false
29894
30016
  },
@@ -29900,7 +30022,7 @@
29900
30022
  "reasoning",
29901
30023
  "response_format",
29902
30024
  "seed",
29903
- "stop",
30025
+ "structured_outputs",
29904
30026
  "temperature",
29905
30027
  "tool_choice",
29906
30028
  "tools",
@@ -29910,18 +30032,18 @@
29910
30032
  }
29911
30033
  },
29912
30034
  {
29913
- "id": "x-ai/grok-4",
29914
- "name": "xAI: Grok 4",
30035
+ "id": "x-ai/grok-4-fast:free",
30036
+ "name": "xAI: Grok 4 Fast (free)",
29915
30037
  "provider": "openrouter",
29916
30038
  "family": "x-ai",
29917
- "created_at": "2025-07-09 21:01:29 +0200",
29918
- "context_window": 256000,
29919
- "max_output_tokens": null,
30039
+ "created_at": "2025-09-19 02:01:30 +0200",
30040
+ "context_window": 2000000,
30041
+ "max_output_tokens": 30000,
29920
30042
  "knowledge_cutoff": null,
29921
30043
  "modalities": {
29922
30044
  "input": [
29923
- "image",
29924
- "text"
30045
+ "text",
30046
+ "image"
29925
30047
  ],
29926
30048
  "output": [
29927
30049
  "text"
@@ -29932,22 +30054,14 @@
29932
30054
  "function_calling",
29933
30055
  "structured_output"
29934
30056
  ],
29935
- "pricing": {
29936
- "text_tokens": {
29937
- "standard": {
29938
- "input_per_million": 3.0,
29939
- "output_per_million": 15.0,
29940
- "cached_input_per_million": 0.75
29941
- }
29942
- }
29943
- },
30057
+ "pricing": {},
29944
30058
  "metadata": {
29945
- "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)",
30059
+ "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window. It comes in two flavors: non-reasoning and reasoning. Read more about the model on xAI's [news post](http://x.ai/news/grok-4-fast). Reasoning can be enabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)\n\nPrompts and completions may be used by xAI or OpenRouter to improve future models.",
29946
30060
  "architecture": {
29947
30061
  "modality": "text+image->text",
29948
30062
  "input_modalities": [
29949
- "image",
29950
- "text"
30063
+ "text",
30064
+ "image"
29951
30065
  ],
29952
30066
  "output_modalities": [
29953
30067
  "text"
@@ -29956,8 +30070,8 @@
29956
30070
  "instruct_type": null
29957
30071
  },
29958
30072
  "top_provider": {
29959
- "context_length": 256000,
29960
- "max_completion_tokens": null,
30073
+ "context_length": 2000000,
30074
+ "max_completion_tokens": 30000,
29961
30075
  "is_moderated": false
29962
30076
  },
29963
30077
  "per_request_limits": null,
@@ -30127,8 +30241,8 @@
30127
30241
  "pricing": {
30128
30242
  "text_tokens": {
30129
30243
  "standard": {
30130
- "input_per_million": 0.32986602,
30131
- "output_per_million": 1.3201056
30244
+ "input_per_million": 0.41,
30245
+ "output_per_million": 1.6500000000000001
30132
30246
  }
30133
30247
  }
30134
30248
  },
@@ -30164,6 +30278,7 @@
30164
30278
  "response_format",
30165
30279
  "seed",
30166
30280
  "stop",
30281
+ "structured_outputs",
30167
30282
  "temperature",
30168
30283
  "tool_choice",
30169
30284
  "tools",
@@ -30375,7 +30490,7 @@
30375
30490
  "name": "Sonar",
30376
30491
  "provider": "perplexity",
30377
30492
  "family": "sonar",
30378
- "created_at": "2025-09-09 20:41:26 +0200",
30493
+ "created_at": "2025-09-21 16:12:52 +0200",
30379
30494
  "context_window": 128000,
30380
30495
  "max_output_tokens": 4096,
30381
30496
  "knowledge_cutoff": null,
@@ -30407,7 +30522,7 @@
30407
30522
  "name": "Sonar Deep Research",
30408
30523
  "provider": "perplexity",
30409
30524
  "family": "sonar_deep_research",
30410
- "created_at": "2025-09-09 20:41:26 +0200",
30525
+ "created_at": "2025-09-21 16:12:52 +0200",
30411
30526
  "context_window": 128000,
30412
30527
  "max_output_tokens": 4096,
30413
30528
  "knowledge_cutoff": null,
@@ -30442,7 +30557,7 @@
30442
30557
  "name": "Sonar Pro",
30443
30558
  "provider": "perplexity",
30444
30559
  "family": "sonar_pro",
30445
- "created_at": "2025-09-09 20:41:26 +0200",
30560
+ "created_at": "2025-09-21 16:12:52 +0200",
30446
30561
  "context_window": 200000,
30447
30562
  "max_output_tokens": 8192,
30448
30563
  "knowledge_cutoff": null,
@@ -30474,7 +30589,7 @@
30474
30589
  "name": "Sonar Reasoning",
30475
30590
  "provider": "perplexity",
30476
30591
  "family": "sonar_reasoning",
30477
- "created_at": "2025-09-09 20:41:26 +0200",
30592
+ "created_at": "2025-09-21 16:12:52 +0200",
30478
30593
  "context_window": 128000,
30479
30594
  "max_output_tokens": 4096,
30480
30595
  "knowledge_cutoff": null,
@@ -30506,7 +30621,7 @@
30506
30621
  "name": "Sonar Reasoning Pro",
30507
30622
  "provider": "perplexity",
30508
30623
  "family": "sonar_reasoning_pro",
30509
- "created_at": "2025-09-09 20:41:26 +0200",
30624
+ "created_at": "2025-09-21 16:12:52 +0200",
30510
30625
  "context_window": 128000,
30511
30626
  "max_output_tokens": 8192,
30512
30627
  "knowledge_cutoff": null,