ruby_llm 1.13.1 → 1.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -547,7 +547,9 @@
547
547
  "function_calling",
548
548
  "reasoning",
549
549
  "vision",
550
- "streaming"
550
+ "streaming",
551
+ "batch",
552
+ "structured_output"
551
553
  ],
552
554
  "pricing": {
553
555
  "text_tokens": {
@@ -707,7 +709,8 @@
707
709
  "function_calling",
708
710
  "reasoning",
709
711
  "vision",
710
- "streaming"
712
+ "streaming",
713
+ "batch"
711
714
  ],
712
715
  "pricing": {
713
716
  "text_tokens": {
@@ -761,7 +764,9 @@
761
764
  "function_calling",
762
765
  "reasoning",
763
766
  "vision",
764
- "streaming"
767
+ "streaming",
768
+ "batch",
769
+ "structured_output"
765
770
  ],
766
771
  "pricing": {
767
772
  "text_tokens": {
@@ -868,7 +873,9 @@
868
873
  "function_calling",
869
874
  "reasoning",
870
875
  "vision",
871
- "streaming"
876
+ "streaming",
877
+ "batch",
878
+ "structured_output"
872
879
  ],
873
880
  "pricing": {
874
881
  "text_tokens": {
@@ -922,7 +929,9 @@
922
929
  "function_calling",
923
930
  "reasoning",
924
931
  "vision",
925
- "streaming"
932
+ "streaming",
933
+ "batch",
934
+ "structured_output"
926
935
  ],
927
936
  "pricing": {
928
937
  "text_tokens": {
@@ -1035,7 +1044,9 @@
1035
1044
  "function_calling",
1036
1045
  "reasoning",
1037
1046
  "vision",
1038
- "streaming"
1047
+ "streaming",
1048
+ "batch",
1049
+ "structured_output"
1039
1050
  ],
1040
1051
  "pricing": {
1041
1052
  "text_tokens": {
@@ -1142,7 +1153,9 @@
1142
1153
  "function_calling",
1143
1154
  "reasoning",
1144
1155
  "vision",
1145
- "streaming"
1156
+ "streaming",
1157
+ "batch",
1158
+ "structured_output"
1146
1159
  ],
1147
1160
  "pricing": {
1148
1161
  "text_tokens": {
@@ -1196,7 +1209,9 @@
1196
1209
  "function_calling",
1197
1210
  "reasoning",
1198
1211
  "vision",
1199
- "streaming"
1212
+ "streaming",
1213
+ "batch",
1214
+ "structured_output"
1200
1215
  ],
1201
1216
  "pricing": {
1202
1217
  "text_tokens": {
@@ -1926,6 +1941,39 @@
1926
1941
  "owned_by": null
1927
1942
  }
1928
1943
  },
1944
+ {
1945
+ "id": "FLUX.2-flex",
1946
+ "name": "Flux.2 Flex",
1947
+ "provider": "azure",
1948
+ "family": "other",
1949
+ "created_at": null,
1950
+ "context_window": 4096,
1951
+ "max_output_tokens": 16384,
1952
+ "knowledge_cutoff": null,
1953
+ "modalities": {
1954
+ "input": [
1955
+ "text"
1956
+ ],
1957
+ "output": [
1958
+ "text"
1959
+ ]
1960
+ },
1961
+ "capabilities": [
1962
+ "streaming"
1963
+ ],
1964
+ "pricing": {
1965
+ "text_tokens": {
1966
+ "standard": {
1967
+ "input_per_million": 0.5,
1968
+ "output_per_million": 1.5
1969
+ }
1970
+ }
1971
+ },
1972
+ "metadata": {
1973
+ "object": "model",
1974
+ "owned_by": null
1975
+ }
1976
+ },
1929
1977
  {
1930
1978
  "id": "FLUX.2-pro",
1931
1979
  "name": "Flux.2 Pro",
@@ -8079,6 +8127,41 @@
8079
8127
  "owned_by": null
8080
8128
  }
8081
8129
  },
8130
+ {
8131
+ "id": "gpt-audio-1.5-2026-02-23",
8132
+ "name": "GPT-Audio 1.5 20260223",
8133
+ "provider": "azure",
8134
+ "family": "other",
8135
+ "created_at": null,
8136
+ "context_window": 4096,
8137
+ "max_output_tokens": 16384,
8138
+ "knowledge_cutoff": null,
8139
+ "modalities": {
8140
+ "input": [
8141
+ "text",
8142
+ "audio"
8143
+ ],
8144
+ "output": [
8145
+ "text",
8146
+ "audio"
8147
+ ]
8148
+ },
8149
+ "capabilities": [
8150
+ "streaming"
8151
+ ],
8152
+ "pricing": {
8153
+ "text_tokens": {
8154
+ "standard": {
8155
+ "input_per_million": 0.5,
8156
+ "output_per_million": 1.5
8157
+ }
8158
+ }
8159
+ },
8160
+ "metadata": {
8161
+ "object": "model",
8162
+ "owned_by": null
8163
+ }
8164
+ },
8082
8165
  {
8083
8166
  "id": "gpt-audio-2025-08-28",
8084
8167
  "name": "GPT-Audio 20250828",
@@ -8215,6 +8298,39 @@
8215
8298
  "owned_by": null
8216
8299
  }
8217
8300
  },
8301
+ {
8302
+ "id": "gpt-realtime-1.5-2026-02-23",
8303
+ "name": "GPT-Realtime 1.5 20260223",
8304
+ "provider": "azure",
8305
+ "family": "other",
8306
+ "created_at": null,
8307
+ "context_window": 4096,
8308
+ "max_output_tokens": 16384,
8309
+ "knowledge_cutoff": null,
8310
+ "modalities": {
8311
+ "input": [
8312
+ "text"
8313
+ ],
8314
+ "output": [
8315
+ "text"
8316
+ ]
8317
+ },
8318
+ "capabilities": [
8319
+ "streaming"
8320
+ ],
8321
+ "pricing": {
8322
+ "text_tokens": {
8323
+ "standard": {
8324
+ "input_per_million": 0.5,
8325
+ "output_per_million": 1.5
8326
+ }
8327
+ }
8328
+ },
8329
+ "metadata": {
8330
+ "object": "model",
8331
+ "owned_by": null
8332
+ }
8333
+ },
8218
8334
  {
8219
8335
  "id": "gpt-realtime-2025-08-28",
8220
8336
  "name": "GPT-Realtime 20250828",
@@ -8413,6 +8529,39 @@
8413
8529
  "owned_by": null
8414
8530
  }
8415
8531
  },
8532
+ {
8533
+ "id": "grok-4-1-fast-non-reasoning",
8534
+ "name": "Grok 4 1 Fast Non Reasoning",
8535
+ "provider": "azure",
8536
+ "family": "other",
8537
+ "created_at": null,
8538
+ "context_window": 4096,
8539
+ "max_output_tokens": 16384,
8540
+ "knowledge_cutoff": null,
8541
+ "modalities": {
8542
+ "input": [
8543
+ "text"
8544
+ ],
8545
+ "output": [
8546
+ "text"
8547
+ ]
8548
+ },
8549
+ "capabilities": [
8550
+ "streaming"
8551
+ ],
8552
+ "pricing": {
8553
+ "text_tokens": {
8554
+ "standard": {
8555
+ "input_per_million": 0.5,
8556
+ "output_per_million": 1.5
8557
+ }
8558
+ }
8559
+ },
8560
+ "metadata": {
8561
+ "object": "model",
8562
+ "owned_by": null
8563
+ }
8564
+ },
8416
8565
  {
8417
8566
  "id": "grok-4-fast-non-reasoning",
8418
8567
  "name": "Grok 4 Fast Non Reasoning",
@@ -17354,7 +17503,8 @@
17354
17503
  "function_calling",
17355
17504
  "reasoning",
17356
17505
  "vision",
17357
- "streaming"
17506
+ "streaming",
17507
+ "structured_output"
17358
17508
  ],
17359
17509
  "pricing": {
17360
17510
  "text_tokens": {
@@ -17572,7 +17722,8 @@
17572
17722
  "function_calling",
17573
17723
  "reasoning",
17574
17724
  "vision",
17575
- "streaming"
17725
+ "streaming",
17726
+ "structured_output"
17576
17727
  ],
17577
17728
  "pricing": {
17578
17729
  "text_tokens": {
@@ -17651,7 +17802,8 @@
17651
17802
  "function_calling",
17652
17803
  "reasoning",
17653
17804
  "vision",
17654
- "streaming"
17805
+ "streaming",
17806
+ "structured_output"
17655
17807
  ],
17656
17808
  "pricing": {
17657
17809
  "text_tokens": {
@@ -17815,7 +17967,8 @@
17815
17967
  "function_calling",
17816
17968
  "reasoning",
17817
17969
  "vision",
17818
- "streaming"
17970
+ "streaming",
17971
+ "structured_output"
17819
17972
  ],
17820
17973
  "pricing": {
17821
17974
  "text_tokens": {
@@ -17894,7 +18047,8 @@
17894
18047
  "function_calling",
17895
18048
  "reasoning",
17896
18049
  "vision",
17897
- "streaming"
18050
+ "streaming",
18051
+ "structured_output"
17898
18052
  ],
17899
18053
  "pricing": {
17900
18054
  "text_tokens": {
@@ -20997,6 +21151,103 @@
20997
21151
  "knowledge": "2025-01"
20998
21152
  }
20999
21153
  },
21154
+ {
21155
+ "id": "gemini-3.1-flash-image-preview",
21156
+ "name": "Nano Banana 2",
21157
+ "provider": "gemini",
21158
+ "family": "other",
21159
+ "created_at": null,
21160
+ "context_window": 65536,
21161
+ "max_output_tokens": 65536,
21162
+ "knowledge_cutoff": null,
21163
+ "modalities": {
21164
+ "input": [
21165
+ "text",
21166
+ "image",
21167
+ "pdf",
21168
+ "video"
21169
+ ],
21170
+ "output": [
21171
+ "text"
21172
+ ]
21173
+ },
21174
+ "capabilities": [
21175
+ "streaming",
21176
+ "function_calling",
21177
+ "structured_output",
21178
+ "batch",
21179
+ "caching"
21180
+ ],
21181
+ "pricing": {
21182
+ "text_tokens": {
21183
+ "standard": {
21184
+ "input_per_million": 0.075,
21185
+ "output_per_million": 0.3
21186
+ },
21187
+ "batch": {
21188
+ "input_per_million": 0.0375,
21189
+ "output_per_million": 0.15
21190
+ }
21191
+ }
21192
+ },
21193
+ "metadata": {
21194
+ "version": "3.0",
21195
+ "description": "Gemini 3.1 Flash Image Preview.",
21196
+ "supported_generation_methods": [
21197
+ "generateContent",
21198
+ "countTokens",
21199
+ "batchGenerateContent"
21200
+ ]
21201
+ }
21202
+ },
21203
+ {
21204
+ "id": "gemini-3.1-flash-lite-preview",
21205
+ "name": "Gemini 3.1 Flash Lite Preview",
21206
+ "provider": "gemini",
21207
+ "family": "other",
21208
+ "created_at": null,
21209
+ "context_window": 1048576,
21210
+ "max_output_tokens": 65536,
21211
+ "knowledge_cutoff": null,
21212
+ "modalities": {
21213
+ "input": [
21214
+ "text",
21215
+ "image",
21216
+ "pdf",
21217
+ "video"
21218
+ ],
21219
+ "output": [
21220
+ "text"
21221
+ ]
21222
+ },
21223
+ "capabilities": [
21224
+ "streaming",
21225
+ "structured_output",
21226
+ "batch"
21227
+ ],
21228
+ "pricing": {
21229
+ "text_tokens": {
21230
+ "standard": {
21231
+ "input_per_million": 0.075,
21232
+ "output_per_million": 0.3
21233
+ },
21234
+ "batch": {
21235
+ "input_per_million": 0.0375,
21236
+ "output_per_million": 0.15
21237
+ }
21238
+ }
21239
+ },
21240
+ "metadata": {
21241
+ "version": "3.1-flash-lite-preview-03-2026",
21242
+ "description": "Gemini 3.1 Flash Lite Preview",
21243
+ "supported_generation_methods": [
21244
+ "generateContent",
21245
+ "countTokens",
21246
+ "createCachedContent",
21247
+ "batchGenerateContent"
21248
+ ]
21249
+ }
21250
+ },
21000
21251
  {
21001
21252
  "id": "gemini-3.1-pro-preview",
21002
21253
  "name": "Gemini 3.1 Pro Preview",
@@ -23722,36 +23973,6 @@
23722
23973
  "owned_by": "mistralai"
23723
23974
  }
23724
23975
  },
23725
- {
23726
- "id": "mistral-small-2501",
23727
- "name": "Mistral Small",
23728
- "provider": "mistral",
23729
- "family": "mistral-small",
23730
- "created_at": "2025-01-12 23:00:00 UTC",
23731
- "context_window": 32768,
23732
- "max_output_tokens": 8192,
23733
- "knowledge_cutoff": null,
23734
- "modalities": {
23735
- "input": [
23736
- "text"
23737
- ],
23738
- "output": [
23739
- "text"
23740
- ]
23741
- },
23742
- "capabilities": [
23743
- "streaming",
23744
- "function_calling",
23745
- "structured_output",
23746
- "batch",
23747
- "fine_tuning"
23748
- ],
23749
- "pricing": {},
23750
- "metadata": {
23751
- "object": "model",
23752
- "owned_by": "mistralai"
23753
- }
23754
- },
23755
23976
  {
23756
23977
  "id": "mistral-small-2506",
23757
23978
  "name": "Mistral Small 3.2",
@@ -27688,6 +27909,45 @@
27688
27909
  "owned_by": "system"
27689
27910
  }
27690
27911
  },
27912
+ {
27913
+ "id": "gpt-5.3-chat-latest",
27914
+ "name": "GPT-5.3 Chat Latest",
27915
+ "provider": "openai",
27916
+ "family": "gpt5",
27917
+ "created_at": "2026-02-27 23:56:11 UTC",
27918
+ "context_window": 128000,
27919
+ "max_output_tokens": 400000,
27920
+ "knowledge_cutoff": null,
27921
+ "modalities": {
27922
+ "input": [
27923
+ "text",
27924
+ "image",
27925
+ "pdf"
27926
+ ],
27927
+ "output": [
27928
+ "text"
27929
+ ]
27930
+ },
27931
+ "capabilities": [
27932
+ "streaming",
27933
+ "function_calling",
27934
+ "structured_output",
27935
+ "reasoning"
27936
+ ],
27937
+ "pricing": {
27938
+ "text_tokens": {
27939
+ "standard": {
27940
+ "input_per_million": 1.25,
27941
+ "output_per_million": 10.0,
27942
+ "cached_input_per_million": 0.125
27943
+ }
27944
+ }
27945
+ },
27946
+ "metadata": {
27947
+ "object": "model",
27948
+ "owned_by": "system"
27949
+ }
27950
+ },
27691
27951
  {
27692
27952
  "id": "gpt-5.3-codex",
27693
27953
  "name": "GPT-5.3 Codex",
@@ -29751,7 +30011,8 @@
29751
30011
  "text_tokens": {
29752
30012
  "standard": {
29753
30013
  "input_per_million": 0.7999999999999999,
29754
- "output_per_million": 1.5999999999999999
30014
+ "output_per_million": 1.5999999999999999,
30015
+ "cached_input_per_million": 0.19999999999999998
29755
30016
  }
29756
30017
  }
29757
30018
  },
@@ -29918,7 +30179,8 @@
29918
30179
  "capabilities": [
29919
30180
  "streaming",
29920
30181
  "function_calling",
29921
- "structured_output"
30182
+ "structured_output",
30183
+ "predicted_outputs"
29922
30184
  ],
29923
30185
  "pricing": {
29924
30186
  "text_tokens": {
@@ -29949,14 +30211,22 @@
29949
30211
  },
29950
30212
  "per_request_limits": null,
29951
30213
  "supported_parameters": [
30214
+ "frequency_penalty",
29952
30215
  "include_reasoning",
30216
+ "logit_bias",
29953
30217
  "max_tokens",
30218
+ "min_p",
30219
+ "presence_penalty",
29954
30220
  "reasoning",
30221
+ "repetition_penalty",
29955
30222
  "response_format",
30223
+ "seed",
30224
+ "stop",
29956
30225
  "structured_outputs",
29957
30226
  "temperature",
29958
30227
  "tool_choice",
29959
30228
  "tools",
30229
+ "top_k",
29960
30230
  "top_p"
29961
30231
  ]
29962
30232
  }
@@ -33040,6 +33310,74 @@
33040
33310
  ]
33041
33311
  }
33042
33312
  },
33313
+ {
33314
+ "id": "bytedance-seed/seed-2.0-mini",
33315
+ "name": "ByteDance Seed: Seed-2.0-Mini",
33316
+ "provider": "openrouter",
33317
+ "family": "bytedance-seed",
33318
+ "created_at": "2026-02-26 18:38:27 UTC",
33319
+ "context_window": 262144,
33320
+ "max_output_tokens": 131072,
33321
+ "knowledge_cutoff": null,
33322
+ "modalities": {
33323
+ "input": [
33324
+ "text",
33325
+ "image",
33326
+ "video"
33327
+ ],
33328
+ "output": [
33329
+ "text"
33330
+ ]
33331
+ },
33332
+ "capabilities": [
33333
+ "streaming",
33334
+ "function_calling",
33335
+ "structured_output"
33336
+ ],
33337
+ "pricing": {
33338
+ "text_tokens": {
33339
+ "standard": {
33340
+ "input_per_million": 0.09999999999999999,
33341
+ "output_per_million": 0.39999999999999997
33342
+ }
33343
+ }
33344
+ },
33345
+ "metadata": {
33346
+ "description": "Seed-2.0-mini targets latency-sensitive, high-concurrency, and cost-sensitive scenarios, emphasizing fast response and flexible inference deployment. It delivers performance comparable to ByteDance-Seed-1.6, supports 256k context, four reasoning effort modes (minimal/low/medium/high), multimodal understanding, and is optimized for lightweight tasks where cost and speed take priority.",
33347
+ "architecture": {
33348
+ "modality": "text+image+video->text",
33349
+ "input_modalities": [
33350
+ "text",
33351
+ "image",
33352
+ "video"
33353
+ ],
33354
+ "output_modalities": [
33355
+ "text"
33356
+ ],
33357
+ "tokenizer": "Other",
33358
+ "instruct_type": null
33359
+ },
33360
+ "top_provider": {
33361
+ "context_length": 262144,
33362
+ "max_completion_tokens": 131072,
33363
+ "is_moderated": false
33364
+ },
33365
+ "per_request_limits": null,
33366
+ "supported_parameters": [
33367
+ "frequency_penalty",
33368
+ "include_reasoning",
33369
+ "max_tokens",
33370
+ "reasoning",
33371
+ "response_format",
33372
+ "stop",
33373
+ "structured_outputs",
33374
+ "temperature",
33375
+ "tool_choice",
33376
+ "tools",
33377
+ "top_p"
33378
+ ]
33379
+ }
33380
+ },
33043
33381
  {
33044
33382
  "id": "bytedance-seed/seedream-4.5",
33045
33383
  "name": "Seedream 4.5",
@@ -33705,9 +34043,9 @@
33705
34043
  "pricing": {
33706
34044
  "text_tokens": {
33707
34045
  "standard": {
33708
- "input_per_million": 0.19,
33709
- "output_per_million": 0.87,
33710
- "cached_input_per_million": 0.095
34046
+ "input_per_million": 0.19999999999999998,
34047
+ "output_per_million": 0.77,
34048
+ "cached_input_per_million": 0.135
33711
34049
  }
33712
34050
  }
33713
34051
  },
@@ -33726,7 +34064,7 @@
33726
34064
  },
33727
34065
  "top_provider": {
33728
34066
  "context_length": 163840,
33729
- "max_completion_tokens": 65536,
34067
+ "max_completion_tokens": null,
33730
34068
  "is_moderated": false
33731
34069
  },
33732
34070
  "per_request_limits": null,
@@ -33947,9 +34285,9 @@
33947
34285
  "pricing": {
33948
34286
  "text_tokens": {
33949
34287
  "standard": {
33950
- "input_per_million": 0.39999999999999997,
33951
- "output_per_million": 1.75,
33952
- "cached_input_per_million": 0.19999999999999998
34288
+ "input_per_million": 0.44999999999999996,
34289
+ "output_per_million": 2.1500000000000004,
34290
+ "cached_input_per_million": 0.22499999999999998
33953
34291
  }
33954
34292
  }
33955
34293
  },
@@ -34249,6 +34587,7 @@
34249
34587
  "supported_parameters": [
34250
34588
  "frequency_penalty",
34251
34589
  "include_reasoning",
34590
+ "logprobs",
34252
34591
  "max_tokens",
34253
34592
  "presence_penalty",
34254
34593
  "reasoning",
@@ -34259,6 +34598,7 @@
34259
34598
  "structured_outputs",
34260
34599
  "temperature",
34261
34600
  "top_k",
34601
+ "top_logprobs",
34262
34602
  "top_p"
34263
34603
  ]
34264
34604
  }
@@ -34361,7 +34701,8 @@
34361
34701
  "function_calling",
34362
34702
  "structured_output",
34363
34703
  "reasoning",
34364
- "streaming"
34704
+ "streaming",
34705
+ "predicted_outputs"
34365
34706
  ],
34366
34707
  "pricing": {
34367
34708
  "text_tokens": {
@@ -34393,6 +34734,7 @@
34393
34734
  "supported_parameters": [
34394
34735
  "frequency_penalty",
34395
34736
  "include_reasoning",
34737
+ "logit_bias",
34396
34738
  "max_tokens",
34397
34739
  "min_p",
34398
34740
  "presence_penalty",
@@ -34557,7 +34899,7 @@
34557
34899
  },
34558
34900
  "top_provider": {
34559
34901
  "context_length": 163840,
34560
- "max_completion_tokens": 163840,
34902
+ "max_completion_tokens": 65536,
34561
34903
  "is_moderated": false
34562
34904
  },
34563
34905
  "per_request_limits": null,
@@ -34619,7 +34961,8 @@
34619
34961
  "capabilities": [
34620
34962
  "streaming",
34621
34963
  "function_calling",
34622
- "structured_output"
34964
+ "structured_output",
34965
+ "predicted_outputs"
34623
34966
  ],
34624
34967
  "pricing": {
34625
34968
  "text_tokens": {
@@ -34651,7 +34994,9 @@
34651
34994
  "supported_parameters": [
34652
34995
  "frequency_penalty",
34653
34996
  "include_reasoning",
34997
+ "logit_bias",
34654
34998
  "max_tokens",
34999
+ "min_p",
34655
35000
  "presence_penalty",
34656
35001
  "reasoning",
34657
35002
  "repetition_penalty",
@@ -34688,7 +35033,8 @@
34688
35033
  "function_calling",
34689
35034
  "structured_output",
34690
35035
  "reasoning",
34691
- "streaming"
35036
+ "streaming",
35037
+ "predicted_outputs"
34692
35038
  ],
34693
35039
  "pricing": {
34694
35040
  "text_tokens": {
@@ -34713,15 +35059,25 @@
34713
35059
  },
34714
35060
  "top_provider": {
34715
35061
  "context_length": 163840,
34716
- "max_completion_tokens": 65536,
35062
+ "max_completion_tokens": 163840,
34717
35063
  "is_moderated": false
34718
35064
  },
34719
35065
  "per_request_limits": null,
34720
35066
  "supported_parameters": [
35067
+ "frequency_penalty",
34721
35068
  "include_reasoning",
35069
+ "logit_bias",
34722
35070
  "max_tokens",
35071
+ "min_p",
35072
+ "presence_penalty",
34723
35073
  "reasoning",
35074
+ "repetition_penalty",
35075
+ "response_format",
35076
+ "seed",
35077
+ "stop",
35078
+ "structured_outputs",
34724
35079
  "temperature",
35080
+ "top_k",
34725
35081
  "top_p"
34726
35082
  ],
34727
35083
  "source": "models.dev",
@@ -34821,6 +35177,7 @@
34821
35177
  },
34822
35178
  "capabilities": [
34823
35179
  "streaming",
35180
+ "function_calling",
34824
35181
  "structured_output",
34825
35182
  "predicted_outputs"
34826
35183
  ],
@@ -34862,6 +35219,8 @@
34862
35219
  "stop",
34863
35220
  "structured_outputs",
34864
35221
  "temperature",
35222
+ "tool_choice",
35223
+ "tools",
34865
35224
  "top_k",
34866
35225
  "top_p"
34867
35226
  ]
@@ -35197,7 +35556,7 @@
35197
35556
  },
35198
35557
  {
35199
35558
  "id": "google/gemini-2.5-flash-image",
35200
- "name": "Google: Gemini 2.5 Flash Image (Nano Banana)",
35559
+ "name": "Google: Nano Banana (Gemini 2.5 Flash Image)",
35201
35560
  "provider": "openrouter",
35202
35561
  "family": "google",
35203
35562
  "created_at": "2025-10-07 20:53:51 UTC",
@@ -35253,6 +35612,7 @@
35253
35612
  "max_tokens",
35254
35613
  "response_format",
35255
35614
  "seed",
35615
+ "stop",
35256
35616
  "structured_outputs",
35257
35617
  "temperature",
35258
35618
  "top_p"
@@ -35406,7 +35766,7 @@
35406
35766
  },
35407
35767
  "top_provider": {
35408
35768
  "context_length": 1048576,
35409
- "max_completion_tokens": 65535,
35769
+ "max_completion_tokens": 65536,
35410
35770
  "is_moderated": false
35411
35771
  },
35412
35772
  "per_request_limits": null,
@@ -35861,7 +36221,7 @@
35861
36221
  },
35862
36222
  "top_provider": {
35863
36223
  "context_length": 1048576,
35864
- "max_completion_tokens": 65535,
36224
+ "max_completion_tokens": 65536,
35865
36225
  "is_moderated": false
35866
36226
  },
35867
36227
  "per_request_limits": null,
@@ -36059,11 +36419,76 @@
36059
36419
  }
36060
36420
  },
36061
36421
  {
36062
- "id": "google/gemini-3.1-pro-preview",
36063
- "name": "Gemini 3.1 Pro Preview",
36422
+ "id": "google/gemini-3.1-flash-image-preview",
36423
+ "name": "Google: Nano Banana 2 (Gemini 3.1 Flash Image Preview)",
36064
36424
  "provider": "openrouter",
36065
- "family": "gemini-pro",
36066
- "created_at": "2026-02-19 00:00:00 UTC",
36425
+ "family": "google",
36426
+ "created_at": "2026-02-26 15:25:58 UTC",
36427
+ "context_window": 65536,
36428
+ "max_output_tokens": 65536,
36429
+ "knowledge_cutoff": null,
36430
+ "modalities": {
36431
+ "input": [
36432
+ "image",
36433
+ "text"
36434
+ ],
36435
+ "output": [
36436
+ "image",
36437
+ "text"
36438
+ ]
36439
+ },
36440
+ "capabilities": [
36441
+ "streaming",
36442
+ "structured_output"
36443
+ ],
36444
+ "pricing": {
36445
+ "text_tokens": {
36446
+ "standard": {
36447
+ "input_per_million": 0.5,
36448
+ "output_per_million": 3.0
36449
+ }
36450
+ }
36451
+ },
36452
+ "metadata": {
36453
+ "description": "Gemini 3.1 Flash Image Preview, a.k.a. \"Nano Banana 2,\" is Google’s latest state of the art image generation and editing model, delivering Pro-level visual quality at Flash speed. It combines advanced contextual understanding with fast, cost-efficient inference, making complex image generation and iterative edits significantly more accessible. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)",
36454
+ "architecture": {
36455
+ "modality": "text+image->text+image",
36456
+ "input_modalities": [
36457
+ "image",
36458
+ "text"
36459
+ ],
36460
+ "output_modalities": [
36461
+ "image",
36462
+ "text"
36463
+ ],
36464
+ "tokenizer": "Gemini",
36465
+ "instruct_type": null
36466
+ },
36467
+ "top_provider": {
36468
+ "context_length": 65536,
36469
+ "max_completion_tokens": 65536,
36470
+ "is_moderated": false
36471
+ },
36472
+ "per_request_limits": null,
36473
+ "supported_parameters": [
36474
+ "include_reasoning",
36475
+ "max_tokens",
36476
+ "reasoning",
36477
+ "response_format",
36478
+ "seed",
36479
+ "stop",
36480
+ "structured_outputs",
36481
+ "temperature",
36482
+ "top_p"
36483
+ ]
36484
+ }
36485
+ },
36486
+ {
36487
+ "id": "google/gemini-3.1-flash-lite-preview",
36488
+ "name": "Google: Gemini 3.1 Flash Lite Preview",
36489
+ "provider": "openrouter",
36490
+ "family": "google",
36491
+ "created_at": "2026-03-03 04:37:53 UTC",
36067
36492
  "context_window": 1048576,
36068
36493
  "max_output_tokens": 65536,
36069
36494
  "knowledge_cutoff": null,
@@ -36071,40 +36496,213 @@
36071
36496
  "input": [
36072
36497
  "text",
36073
36498
  "image",
36074
- "audio",
36075
36499
  "video",
36076
- "pdf"
36500
+ "file",
36501
+ "audio"
36077
36502
  ],
36078
36503
  "output": [
36079
36504
  "text"
36080
36505
  ]
36081
36506
  },
36082
36507
  "capabilities": [
36508
+ "streaming",
36083
36509
  "function_calling",
36084
- "structured_output",
36085
- "reasoning",
36086
- "vision",
36087
- "streaming"
36510
+ "structured_output"
36088
36511
  ],
36089
36512
  "pricing": {
36090
36513
  "text_tokens": {
36091
36514
  "standard": {
36092
- "input_per_million": 2,
36093
- "output_per_million": 12,
36094
- "reasoning_output_per_million": 12
36515
+ "input_per_million": 0.25,
36516
+ "output_per_million": 1.5,
36517
+ "cached_input_per_million": 0.024999999999999998,
36518
+ "reasoning_output_per_million": 1.5
36095
36519
  }
36096
36520
  }
36097
36521
  },
36098
36522
  "metadata": {
36099
- "description": "Gemini 3.1 Pro Preview is Googles frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation of the Gemini 3 series, it combines high-precision reasoning across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning. The 3.1 update introduces measurable gains in SWE benchmarks and real-world coding environments, along with stronger autonomous task execution in structured domains such as finance and spreadsheet-based workflows.\n\nDesigned for advanced development and agentic systems, Gemini 3.1 Pro Preview improves long-horizon stability and tool orchestration while increasing token efficiency. It introduces a new medium thinking level to better balance cost, speed, and performance. The model excels in agentic coding, structured planning, multimodal analysis, and workflow automation, making it well-suited for autonomous agents, financial modeling, spreadsheet automation, and high-context enterprise tasks.",
36523
+ "description": "Gemini 3.1 Flash Lite Preview is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across key capabilities. Improvements span audio input/ASR, RAG snippet ranking, translation, data extraction, and code completion. Supports full thinking levels (minimal, low, medium, high) for fine-grained cost/performance trade-offs. Priced at half the cost of Gemini 3 Flash.",
36100
36524
  "architecture": {
36101
36525
  "modality": "text+image+file+audio+video->text",
36102
36526
  "input_modalities": [
36103
- "audio",
36104
- "file",
36105
- "image",
36106
36527
  "text",
36107
- "video"
36528
+ "image",
36529
+ "video",
36530
+ "file",
36531
+ "audio"
36532
+ ],
36533
+ "output_modalities": [
36534
+ "text"
36535
+ ],
36536
+ "tokenizer": "Gemini",
36537
+ "instruct_type": null
36538
+ },
36539
+ "top_provider": {
36540
+ "context_length": 1048576,
36541
+ "max_completion_tokens": 65536,
36542
+ "is_moderated": false
36543
+ },
36544
+ "per_request_limits": null,
36545
+ "supported_parameters": [
36546
+ "include_reasoning",
36547
+ "max_tokens",
36548
+ "reasoning",
36549
+ "response_format",
36550
+ "seed",
36551
+ "stop",
36552
+ "structured_outputs",
36553
+ "temperature",
36554
+ "tool_choice",
36555
+ "tools",
36556
+ "top_p"
36557
+ ]
36558
+ }
36559
+ },
36560
+ {
36561
+ "id": "google/gemini-3.1-pro-preview",
36562
+ "name": "Gemini 3.1 Pro Preview",
36563
+ "provider": "openrouter",
36564
+ "family": "gemini-pro",
36565
+ "created_at": "2026-02-19 00:00:00 UTC",
36566
+ "context_window": 1048576,
36567
+ "max_output_tokens": 65536,
36568
+ "knowledge_cutoff": null,
36569
+ "modalities": {
36570
+ "input": [
36571
+ "text",
36572
+ "image",
36573
+ "audio",
36574
+ "video",
36575
+ "pdf"
36576
+ ],
36577
+ "output": [
36578
+ "text"
36579
+ ]
36580
+ },
36581
+ "capabilities": [
36582
+ "function_calling",
36583
+ "structured_output",
36584
+ "reasoning",
36585
+ "vision",
36586
+ "streaming"
36587
+ ],
36588
+ "pricing": {
36589
+ "text_tokens": {
36590
+ "standard": {
36591
+ "input_per_million": 2,
36592
+ "output_per_million": 12,
36593
+ "reasoning_output_per_million": 12
36594
+ }
36595
+ }
36596
+ },
36597
+ "metadata": {
36598
+ "description": "Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation of the Gemini 3 series, it combines high-precision reasoning across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning. The 3.1 update introduces measurable gains in SWE benchmarks and real-world coding environments, along with stronger autonomous task execution in structured domains such as finance and spreadsheet-based workflows.\n\nDesigned for advanced development and agentic systems, Gemini 3.1 Pro Preview improves long-horizon stability and tool orchestration while increasing token efficiency. It introduces a new medium thinking level to better balance cost, speed, and performance. The model excels in agentic coding, structured planning, multimodal analysis, and workflow automation, making it well-suited for autonomous agents, financial modeling, spreadsheet automation, and high-context enterprise tasks.",
36599
+ "architecture": {
36600
+ "modality": "text+image+file+audio+video->text",
36601
+ "input_modalities": [
36602
+ "audio",
36603
+ "file",
36604
+ "image",
36605
+ "text",
36606
+ "video"
36607
+ ],
36608
+ "output_modalities": [
36609
+ "text"
36610
+ ],
36611
+ "tokenizer": "Gemini",
36612
+ "instruct_type": null
36613
+ },
36614
+ "top_provider": {
36615
+ "context_length": 1048576,
36616
+ "max_completion_tokens": 65536,
36617
+ "is_moderated": false
36618
+ },
36619
+ "per_request_limits": null,
36620
+ "supported_parameters": [
36621
+ "include_reasoning",
36622
+ "max_tokens",
36623
+ "reasoning",
36624
+ "response_format",
36625
+ "seed",
36626
+ "stop",
36627
+ "structured_outputs",
36628
+ "temperature",
36629
+ "tool_choice",
36630
+ "tools",
36631
+ "top_p"
36632
+ ],
36633
+ "source": "models.dev",
36634
+ "provider_id": "openrouter",
36635
+ "open_weights": false,
36636
+ "attachment": true,
36637
+ "temperature": true,
36638
+ "last_updated": "2026-02-19",
36639
+ "interleaved": {
36640
+ "field": "reasoning_details"
36641
+ },
36642
+ "cost": {
36643
+ "input": 2,
36644
+ "output": 12,
36645
+ "reasoning": 12,
36646
+ "context_over_200k": {
36647
+ "input": 4,
36648
+ "output": 18,
36649
+ "cache_read": 0.4
36650
+ }
36651
+ },
36652
+ "limit": {
36653
+ "context": 1048576,
36654
+ "output": 65536
36655
+ },
36656
+ "knowledge": "2025-01"
36657
+ }
36658
+ },
36659
+ {
36660
+ "id": "google/gemini-3.1-pro-preview-customtools",
36661
+ "name": "Gemini 3.1 Pro Preview Custom Tools",
36662
+ "provider": "openrouter",
36663
+ "family": "gemini-pro",
36664
+ "created_at": "2026-02-19 00:00:00 UTC",
36665
+ "context_window": 1048576,
36666
+ "max_output_tokens": 65536,
36667
+ "knowledge_cutoff": null,
36668
+ "modalities": {
36669
+ "input": [
36670
+ "text",
36671
+ "image",
36672
+ "audio",
36673
+ "video",
36674
+ "pdf"
36675
+ ],
36676
+ "output": [
36677
+ "text"
36678
+ ]
36679
+ },
36680
+ "capabilities": [
36681
+ "function_calling",
36682
+ "structured_output",
36683
+ "reasoning",
36684
+ "vision",
36685
+ "streaming"
36686
+ ],
36687
+ "pricing": {
36688
+ "text_tokens": {
36689
+ "standard": {
36690
+ "input_per_million": 2,
36691
+ "output_per_million": 12,
36692
+ "reasoning_output_per_million": 12
36693
+ }
36694
+ }
36695
+ },
36696
+ "metadata": {
36697
+ "description": "Gemini 3.1 Pro Preview Custom Tools is a variant of Gemini 3.1 Pro that improves tool selection behavior by preventing overuse of a general bash tool when more efficient third-party or user-defined functions are available. This specialized preview endpoint significantly increases function calling reliability and ensures the model selects the most appropriate tool in coding agents and complex, multi-tool workflows.\n\nIt retains the core strengths of Gemini 3.1 Pro, including multimodal reasoning across text, image, video, audio, and code, a 1M-token context window, and strong software engineering performance.",
36698
+ "architecture": {
36699
+ "modality": "text+image+file+audio+video->text",
36700
+ "input_modalities": [
36701
+ "text",
36702
+ "audio",
36703
+ "image",
36704
+ "video",
36705
+ "file"
36108
36706
  ],
36109
36707
  "output_modalities": [
36110
36708
  "text"
@@ -36792,12 +37390,9 @@
36792
37390
  },
36793
37391
  "per_request_limits": null,
36794
37392
  "supported_parameters": [
36795
- "frequency_penalty",
36796
37393
  "max_tokens",
36797
- "presence_penalty",
36798
37394
  "response_format",
36799
37395
  "seed",
36800
- "stop",
36801
37396
  "temperature",
36802
37397
  "top_p"
36803
37398
  ],
@@ -36937,12 +37532,9 @@
36937
37532
  },
36938
37533
  "per_request_limits": null,
36939
37534
  "supported_parameters": [
36940
- "frequency_penalty",
36941
37535
  "max_tokens",
36942
- "presence_penalty",
36943
37536
  "response_format",
36944
37537
  "seed",
36945
- "stop",
36946
37538
  "temperature",
36947
37539
  "top_p"
36948
37540
  ],
@@ -37117,7 +37709,8 @@
37117
37709
  "text_tokens": {
37118
37710
  "standard": {
37119
37711
  "input_per_million": 0.25,
37120
- "output_per_million": 1.0
37712
+ "output_per_million": 0.75,
37713
+ "cached_input_per_million": 0.024999999999999998
37121
37714
  }
37122
37715
  }
37123
37716
  },
@@ -37181,7 +37774,8 @@
37181
37774
  "text_tokens": {
37182
37775
  "standard": {
37183
37776
  "input_per_million": 0.25,
37184
- "output_per_million": 1.0
37777
+ "output_per_million": 0.75,
37778
+ "cached_input_per_million": 0.024999999999999998
37185
37779
  }
37186
37780
  }
37187
37781
  },
@@ -37349,7 +37943,8 @@
37349
37943
  "capabilities": [
37350
37944
  "streaming",
37351
37945
  "function_calling",
37352
- "structured_output"
37946
+ "structured_output",
37947
+ "predicted_outputs"
37353
37948
  ],
37354
37949
  "pricing": {
37355
37950
  "text_tokens": {
@@ -37381,7 +37976,9 @@
37381
37976
  "per_request_limits": null,
37382
37977
  "supported_parameters": [
37383
37978
  "frequency_penalty",
37979
+ "logit_bias",
37384
37980
  "max_tokens",
37981
+ "min_p",
37385
37982
  "presence_penalty",
37386
37983
  "repetition_penalty",
37387
37984
  "response_format",
@@ -37436,6 +38033,68 @@
37436
38033
  "knowledge": "2025-11"
37437
38034
  }
37438
38035
  },
38036
+ {
38037
+ "id": "liquid/lfm-2-24b-a2b",
38038
+ "name": "LiquidAI: LFM2-24B-A2B",
38039
+ "provider": "openrouter",
38040
+ "family": "liquid",
38041
+ "created_at": "2026-02-25 19:45:11 UTC",
38042
+ "context_window": 32768,
38043
+ "max_output_tokens": null,
38044
+ "knowledge_cutoff": null,
38045
+ "modalities": {
38046
+ "input": [
38047
+ "text"
38048
+ ],
38049
+ "output": [
38050
+ "text"
38051
+ ]
38052
+ },
38053
+ "capabilities": [
38054
+ "streaming",
38055
+ "predicted_outputs"
38056
+ ],
38057
+ "pricing": {
38058
+ "text_tokens": {
38059
+ "standard": {
38060
+ "input_per_million": 0.03,
38061
+ "output_per_million": 0.12
38062
+ }
38063
+ }
38064
+ },
38065
+ "metadata": {
38066
+ "description": "LFM2-24B-A2B is the largest model in the LFM2 family of hybrid architectures designed for efficient on-device deployment. Built as a 24B parameter Mixture-of-Experts model with only 2B active parameters per token, it delivers high-quality generation while maintaining low inference costs. The model fits within 32 GB of RAM, making it practical to run on consumer laptops and desktops without sacrificing capability.",
38067
+ "architecture": {
38068
+ "modality": "text->text",
38069
+ "input_modalities": [
38070
+ "text"
38071
+ ],
38072
+ "output_modalities": [
38073
+ "text"
38074
+ ],
38075
+ "tokenizer": "Other",
38076
+ "instruct_type": null
38077
+ },
38078
+ "top_provider": {
38079
+ "context_length": 32768,
38080
+ "max_completion_tokens": null,
38081
+ "is_moderated": false
38082
+ },
38083
+ "per_request_limits": null,
38084
+ "supported_parameters": [
38085
+ "frequency_penalty",
38086
+ "logit_bias",
38087
+ "max_tokens",
38088
+ "min_p",
38089
+ "presence_penalty",
38090
+ "repetition_penalty",
38091
+ "stop",
38092
+ "temperature",
38093
+ "top_k",
38094
+ "top_p"
38095
+ ]
38096
+ }
38097
+ },
37439
38098
  {
37440
38099
  "id": "liquid/lfm-2.2-6b",
37441
38100
  "name": "LiquidAI: LFM2-2.6B",
@@ -37774,7 +38433,7 @@
37774
38433
  "family": "meituan",
37775
38434
  "created_at": "2025-09-09 14:20:58 UTC",
37776
38435
  "context_window": 131072,
37777
- "max_output_tokens": 32768,
38436
+ "max_output_tokens": 131072,
37778
38437
  "knowledge_cutoff": null,
37779
38438
  "modalities": {
37780
38439
  "input": [
@@ -37785,7 +38444,10 @@
37785
38444
  ]
37786
38445
  },
37787
38446
  "capabilities": [
37788
- "streaming"
38447
+ "streaming",
38448
+ "function_calling",
38449
+ "structured_output",
38450
+ "predicted_outputs"
37789
38451
  ],
37790
38452
  "pricing": {
37791
38453
  "text_tokens": {
@@ -37811,13 +38473,25 @@
37811
38473
  },
37812
38474
  "top_provider": {
37813
38475
  "context_length": 131072,
37814
- "max_completion_tokens": 32768,
38476
+ "max_completion_tokens": 131072,
37815
38477
  "is_moderated": false
37816
38478
  },
37817
38479
  "per_request_limits": null,
37818
38480
  "supported_parameters": [
38481
+ "frequency_penalty",
38482
+ "logit_bias",
37819
38483
  "max_tokens",
38484
+ "min_p",
38485
+ "presence_penalty",
38486
+ "repetition_penalty",
38487
+ "response_format",
38488
+ "seed",
38489
+ "stop",
38490
+ "structured_outputs",
37820
38491
  "temperature",
38492
+ "tool_choice",
38493
+ "tools",
38494
+ "top_k",
37821
38495
  "top_p"
37822
38496
  ]
37823
38497
  }
@@ -38284,8 +38958,7 @@
38284
38958
  "capabilities": [
38285
38959
  "vision",
38286
38960
  "streaming",
38287
- "structured_output",
38288
- "predicted_outputs"
38961
+ "structured_output"
38289
38962
  ],
38290
38963
  "pricing": {
38291
38964
  "text_tokens": {
@@ -38317,7 +38990,6 @@
38317
38990
  "per_request_limits": null,
38318
38991
  "supported_parameters": [
38319
38992
  "frequency_penalty",
38320
- "logit_bias",
38321
38993
  "max_tokens",
38322
38994
  "min_p",
38323
38995
  "presence_penalty",
@@ -38411,8 +39083,8 @@
38411
39083
  "provider": "openrouter",
38412
39084
  "family": "meta-llama",
38413
39085
  "created_at": "2024-09-25 00:00:00 UTC",
38414
- "context_window": 131072,
38415
- "max_output_tokens": 16384,
39086
+ "context_window": 80000,
39087
+ "max_output_tokens": null,
38416
39088
  "knowledge_cutoff": null,
38417
39089
  "modalities": {
38418
39090
  "input": [
@@ -38424,14 +39096,13 @@
38424
39096
  },
38425
39097
  "capabilities": [
38426
39098
  "streaming",
38427
- "structured_output",
38428
39099
  "predicted_outputs"
38429
39100
  ],
38430
39101
  "pricing": {
38431
39102
  "text_tokens": {
38432
39103
  "standard": {
38433
- "input_per_million": 0.02,
38434
- "output_per_million": 0.02
39104
+ "input_per_million": 0.051,
39105
+ "output_per_million": 0.33999999999999997
38435
39106
  }
38436
39107
  }
38437
39108
  },
@@ -38449,8 +39120,8 @@
38449
39120
  "instruct_type": "llama3"
38450
39121
  },
38451
39122
  "top_provider": {
38452
- "context_length": 131072,
38453
- "max_completion_tokens": 16384,
39123
+ "context_length": 80000,
39124
+ "max_completion_tokens": null,
38454
39125
  "is_moderated": false
38455
39126
  },
38456
39127
  "per_request_limits": null,
@@ -38461,7 +39132,6 @@
38461
39132
  "min_p",
38462
39133
  "presence_penalty",
38463
39134
  "repetition_penalty",
38464
- "response_format",
38465
39135
  "seed",
38466
39136
  "stop",
38467
39137
  "temperature",
@@ -39140,6 +39810,7 @@
39140
39810
  "per_request_limits": null,
39141
39811
  "supported_parameters": [
39142
39812
  "frequency_penalty",
39813
+ "logprobs",
39143
39814
  "max_tokens",
39144
39815
  "min_p",
39145
39816
  "presence_penalty",
@@ -39150,6 +39821,7 @@
39150
39821
  "structured_outputs",
39151
39822
  "temperature",
39152
39823
  "top_k",
39824
+ "top_logprobs",
39153
39825
  "top_p"
39154
39826
  ]
39155
39827
  }
@@ -39388,7 +40060,8 @@
39388
40060
  "function_calling",
39389
40061
  "structured_output",
39390
40062
  "reasoning",
39391
- "streaming"
40063
+ "streaming",
40064
+ "predicted_outputs"
39392
40065
  ],
39393
40066
  "pricing": {
39394
40067
  "text_tokens": {
@@ -39414,14 +40087,16 @@
39414
40087
  },
39415
40088
  "top_provider": {
39416
40089
  "context_length": 196608,
39417
- "max_completion_tokens": 65536,
40090
+ "max_completion_tokens": 196608,
39418
40091
  "is_moderated": false
39419
40092
  },
39420
40093
  "per_request_limits": null,
39421
40094
  "supported_parameters": [
39422
40095
  "frequency_penalty",
39423
40096
  "include_reasoning",
40097
+ "logit_bias",
39424
40098
  "max_tokens",
40099
+ "min_p",
39425
40100
  "presence_penalty",
39426
40101
  "reasoning",
39427
40102
  "repetition_penalty",
@@ -39650,7 +40325,7 @@
39650
40325
  },
39651
40326
  "top_provider": {
39652
40327
  "context_length": 196608,
39653
- "max_completion_tokens": 65536,
40328
+ "max_completion_tokens": 196608,
39654
40329
  "is_moderated": false
39655
40330
  },
39656
40331
  "per_request_limits": null,
@@ -40222,8 +40897,7 @@
40222
40897
  "capabilities": [
40223
40898
  "streaming",
40224
40899
  "function_calling",
40225
- "structured_output",
40226
- "predicted_outputs"
40900
+ "structured_output"
40227
40901
  ],
40228
40902
  "pricing": {
40229
40903
  "text_tokens": {
@@ -40255,11 +40929,8 @@
40255
40929
  "per_request_limits": null,
40256
40930
  "supported_parameters": [
40257
40931
  "frequency_penalty",
40258
- "logit_bias",
40259
40932
  "max_tokens",
40260
- "min_p",
40261
40933
  "presence_penalty",
40262
- "repetition_penalty",
40263
40934
  "response_format",
40264
40935
  "seed",
40265
40936
  "stop",
@@ -40267,7 +40938,6 @@
40267
40938
  "temperature",
40268
40939
  "tool_choice",
40269
40940
  "tools",
40270
- "top_k",
40271
40941
  "top_p"
40272
40942
  ]
40273
40943
  }
@@ -40405,13 +41075,13 @@
40405
41075
  }
40406
41076
  },
40407
41077
  {
40408
- "id": "mistralai/mistral-7b-instruct",
40409
- "name": "Mistral: Mistral 7B Instruct",
41078
+ "id": "mistralai/mistral-7b-instruct-v0.1",
41079
+ "name": "Mistral: Mistral 7B Instruct v0.1",
40410
41080
  "provider": "openrouter",
40411
41081
  "family": "mistralai",
40412
- "created_at": "2024-05-27 00:00:00 UTC",
40413
- "context_window": 32768,
40414
- "max_output_tokens": 4096,
41082
+ "created_at": "2023-09-28 00:00:00 UTC",
41083
+ "context_window": 2824,
41084
+ "max_output_tokens": null,
40415
41085
  "knowledge_cutoff": null,
40416
41086
  "modalities": {
40417
41087
  "input": [
@@ -40422,19 +41092,18 @@
40422
41092
  ]
40423
41093
  },
40424
41094
  "capabilities": [
40425
- "streaming",
40426
- "predicted_outputs"
41095
+ "streaming"
40427
41096
  ],
40428
41097
  "pricing": {
40429
41098
  "text_tokens": {
40430
41099
  "standard": {
40431
- "input_per_million": 0.19999999999999998,
40432
- "output_per_million": 0.19999999999999998
41100
+ "input_per_million": 0.11,
41101
+ "output_per_million": 0.19
40433
41102
  }
40434
41103
  }
40435
41104
  },
40436
41105
  "metadata": {
40437
- "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*",
41106
+ "description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.",
40438
41107
  "architecture": {
40439
41108
  "modality": "text->text",
40440
41109
  "input_modalities": [
@@ -40447,19 +41116,17 @@
40447
41116
  "instruct_type": "mistral"
40448
41117
  },
40449
41118
  "top_provider": {
40450
- "context_length": 32768,
40451
- "max_completion_tokens": 4096,
41119
+ "context_length": 2824,
41120
+ "max_completion_tokens": null,
40452
41121
  "is_moderated": false
40453
41122
  },
40454
41123
  "per_request_limits": null,
40455
41124
  "supported_parameters": [
40456
41125
  "frequency_penalty",
40457
- "logit_bias",
40458
41126
  "max_tokens",
40459
- "min_p",
40460
41127
  "presence_penalty",
40461
41128
  "repetition_penalty",
40462
- "stop",
41129
+ "seed",
40463
41130
  "temperature",
40464
41131
  "top_k",
40465
41132
  "top_p"
@@ -40467,12 +41134,51 @@
40467
41134
  }
40468
41135
  },
40469
41136
  {
40470
- "id": "mistralai/mistral-7b-instruct-v0.1",
40471
- "name": "Mistral: Mistral 7B Instruct v0.1",
41137
+ "id": "mistralai/mistral-7b-instruct:free",
41138
+ "name": "Mistral 7B Instruct (free)",
41139
+ "provider": "openrouter",
41140
+ "family": "mistral",
41141
+ "created_at": "2024-05-27 00:00:00 UTC",
41142
+ "context_window": 32768,
41143
+ "max_output_tokens": 32768,
41144
+ "knowledge_cutoff": null,
41145
+ "modalities": {
41146
+ "input": [
41147
+ "text"
41148
+ ],
41149
+ "output": [
41150
+ "text"
41151
+ ]
41152
+ },
41153
+ "capabilities": [
41154
+ "function_calling"
41155
+ ],
41156
+ "pricing": {},
41157
+ "metadata": {
41158
+ "source": "models.dev",
41159
+ "provider_id": "openrouter",
41160
+ "open_weights": true,
41161
+ "attachment": false,
41162
+ "temperature": true,
41163
+ "last_updated": "2024-05-27",
41164
+ "cost": {
41165
+ "input": 0,
41166
+ "output": 0
41167
+ },
41168
+ "limit": {
41169
+ "context": 32768,
41170
+ "output": 32768
41171
+ },
41172
+ "knowledge": "2024-05"
41173
+ }
41174
+ },
41175
+ {
41176
+ "id": "mistralai/mistral-large",
41177
+ "name": "Mistral Large",
40472
41178
  "provider": "openrouter",
40473
41179
  "family": "mistralai",
40474
- "created_at": "2023-09-28 00:00:00 UTC",
40475
- "context_window": 2824,
41180
+ "created_at": "2024-02-26 00:00:00 UTC",
41181
+ "context_window": 128000,
40476
41182
  "max_output_tokens": null,
40477
41183
  "knowledge_cutoff": null,
40478
41184
  "modalities": {
@@ -40484,18 +41190,20 @@
40484
41190
  ]
40485
41191
  },
40486
41192
  "capabilities": [
40487
- "streaming"
41193
+ "streaming",
41194
+ "function_calling",
41195
+ "structured_output"
40488
41196
  ],
40489
41197
  "pricing": {
40490
41198
  "text_tokens": {
40491
41199
  "standard": {
40492
- "input_per_million": 0.11,
40493
- "output_per_million": 0.19
41200
+ "input_per_million": 2.0,
41201
+ "output_per_million": 6.0
40494
41202
  }
40495
41203
  }
40496
41204
  },
40497
41205
  "metadata": {
40498
- "description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.",
41206
+ "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.",
40499
41207
  "architecture": {
40500
41208
  "modality": "text->text",
40501
41209
  "input_modalities": [
@@ -40505,10 +41213,10 @@
40505
41213
  "text"
40506
41214
  ],
40507
41215
  "tokenizer": "Mistral",
40508
- "instruct_type": "mistral"
41216
+ "instruct_type": null
40509
41217
  },
40510
41218
  "top_provider": {
40511
- "context_length": 2824,
41219
+ "context_length": 128000,
40512
41220
  "max_completion_tokens": null,
40513
41221
  "is_moderated": false
40514
41222
  },
@@ -40517,21 +41225,24 @@
40517
41225
  "frequency_penalty",
40518
41226
  "max_tokens",
40519
41227
  "presence_penalty",
40520
- "repetition_penalty",
41228
+ "response_format",
40521
41229
  "seed",
41230
+ "stop",
41231
+ "structured_outputs",
40522
41232
  "temperature",
40523
- "top_k",
41233
+ "tool_choice",
41234
+ "tools",
40524
41235
  "top_p"
40525
41236
  ]
40526
41237
  }
40527
41238
  },
40528
41239
  {
40529
- "id": "mistralai/mistral-7b-instruct-v0.2",
40530
- "name": "Mistral: Mistral 7B Instruct v0.2",
41240
+ "id": "mistralai/mistral-large-2407",
41241
+ "name": "Mistral Large 2407",
40531
41242
  "provider": "openrouter",
40532
41243
  "family": "mistralai",
40533
- "created_at": "2023-12-28 00:00:00 UTC",
40534
- "context_window": 32768,
41244
+ "created_at": "2024-11-19 01:06:55 UTC",
41245
+ "context_window": 131072,
40535
41246
  "max_output_tokens": null,
40536
41247
  "knowledge_cutoff": null,
40537
41248
  "modalities": {
@@ -40544,18 +41255,19 @@
40544
41255
  },
40545
41256
  "capabilities": [
40546
41257
  "streaming",
40547
- "predicted_outputs"
41258
+ "function_calling",
41259
+ "structured_output"
40548
41260
  ],
40549
41261
  "pricing": {
40550
41262
  "text_tokens": {
40551
41263
  "standard": {
40552
- "input_per_million": 0.19999999999999998,
40553
- "output_per_million": 0.19999999999999998
41264
+ "input_per_million": 2.0,
41265
+ "output_per_million": 6.0
40554
41266
  }
40555
41267
  }
40556
41268
  },
40557
41269
  "metadata": {
40558
- "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention",
41270
+ "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n",
40559
41271
  "architecture": {
40560
41272
  "modality": "text->text",
40561
41273
  "input_modalities": [
@@ -40565,36 +41277,37 @@
40565
41277
  "text"
40566
41278
  ],
40567
41279
  "tokenizer": "Mistral",
40568
- "instruct_type": "mistral"
41280
+ "instruct_type": null
40569
41281
  },
40570
41282
  "top_provider": {
40571
- "context_length": 32768,
41283
+ "context_length": 131072,
40572
41284
  "max_completion_tokens": null,
40573
41285
  "is_moderated": false
40574
41286
  },
40575
41287
  "per_request_limits": null,
40576
41288
  "supported_parameters": [
40577
41289
  "frequency_penalty",
40578
- "logit_bias",
40579
41290
  "max_tokens",
40580
- "min_p",
40581
41291
  "presence_penalty",
40582
- "repetition_penalty",
41292
+ "response_format",
41293
+ "seed",
40583
41294
  "stop",
41295
+ "structured_outputs",
40584
41296
  "temperature",
40585
- "top_k",
41297
+ "tool_choice",
41298
+ "tools",
40586
41299
  "top_p"
40587
41300
  ]
40588
41301
  }
40589
41302
  },
40590
41303
  {
40591
- "id": "mistralai/mistral-7b-instruct-v0.3",
40592
- "name": "Mistral: Mistral 7B Instruct v0.3",
41304
+ "id": "mistralai/mistral-large-2411",
41305
+ "name": "Mistral Large 2411",
40593
41306
  "provider": "openrouter",
40594
41307
  "family": "mistralai",
40595
- "created_at": "2024-05-27 00:00:00 UTC",
40596
- "context_window": 32768,
40597
- "max_output_tokens": 4096,
41308
+ "created_at": "2024-11-19 01:11:25 UTC",
41309
+ "context_window": 131072,
41310
+ "max_output_tokens": null,
40598
41311
  "knowledge_cutoff": null,
40599
41312
  "modalities": {
40600
41313
  "input": [
@@ -40606,18 +41319,19 @@
40606
41319
  },
40607
41320
  "capabilities": [
40608
41321
  "streaming",
40609
- "predicted_outputs"
41322
+ "function_calling",
41323
+ "structured_output"
40610
41324
  ],
40611
41325
  "pricing": {
40612
41326
  "text_tokens": {
40613
41327
  "standard": {
40614
- "input_per_million": 0.19999999999999998,
40615
- "output_per_million": 0.19999999999999998
41328
+ "input_per_million": 2.0,
41329
+ "output_per_million": 6.0
40616
41330
  }
40617
41331
  }
40618
41332
  },
40619
41333
  "metadata": {
40620
- "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.",
41334
+ "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.",
40621
41335
  "architecture": {
40622
41336
  "modality": "text->text",
40623
41337
  "input_modalities": [
@@ -40627,79 +41341,42 @@
40627
41341
  "text"
40628
41342
  ],
40629
41343
  "tokenizer": "Mistral",
40630
- "instruct_type": "mistral"
41344
+ "instruct_type": null
40631
41345
  },
40632
41346
  "top_provider": {
40633
- "context_length": 32768,
40634
- "max_completion_tokens": 4096,
41347
+ "context_length": 131072,
41348
+ "max_completion_tokens": null,
40635
41349
  "is_moderated": false
40636
41350
  },
40637
41351
  "per_request_limits": null,
40638
41352
  "supported_parameters": [
40639
41353
  "frequency_penalty",
40640
- "logit_bias",
40641
41354
  "max_tokens",
40642
- "min_p",
40643
41355
  "presence_penalty",
40644
- "repetition_penalty",
41356
+ "response_format",
41357
+ "seed",
40645
41358
  "stop",
41359
+ "structured_outputs",
40646
41360
  "temperature",
40647
- "top_k",
41361
+ "tool_choice",
41362
+ "tools",
40648
41363
  "top_p"
40649
41364
  ]
40650
41365
  }
40651
41366
  },
40652
41367
  {
40653
- "id": "mistralai/mistral-7b-instruct:free",
40654
- "name": "Mistral 7B Instruct (free)",
40655
- "provider": "openrouter",
40656
- "family": "mistral",
40657
- "created_at": "2024-05-27 00:00:00 UTC",
40658
- "context_window": 32768,
40659
- "max_output_tokens": 32768,
40660
- "knowledge_cutoff": null,
40661
- "modalities": {
40662
- "input": [
40663
- "text"
40664
- ],
40665
- "output": [
40666
- "text"
40667
- ]
40668
- },
40669
- "capabilities": [
40670
- "function_calling"
40671
- ],
40672
- "pricing": {},
40673
- "metadata": {
40674
- "source": "models.dev",
40675
- "provider_id": "openrouter",
40676
- "open_weights": true,
40677
- "attachment": false,
40678
- "temperature": true,
40679
- "last_updated": "2024-05-27",
40680
- "cost": {
40681
- "input": 0,
40682
- "output": 0
40683
- },
40684
- "limit": {
40685
- "context": 32768,
40686
- "output": 32768
40687
- },
40688
- "knowledge": "2024-05"
40689
- }
40690
- },
40691
- {
40692
- "id": "mistralai/mistral-large",
40693
- "name": "Mistral Large",
41368
+ "id": "mistralai/mistral-large-2512",
41369
+ "name": "Mistral: Mistral Large 3 2512",
40694
41370
  "provider": "openrouter",
40695
41371
  "family": "mistralai",
40696
- "created_at": "2024-02-26 00:00:00 UTC",
40697
- "context_window": 128000,
41372
+ "created_at": "2025-12-01 21:27:52 UTC",
41373
+ "context_window": 262144,
40698
41374
  "max_output_tokens": null,
40699
41375
  "knowledge_cutoff": null,
40700
41376
  "modalities": {
40701
41377
  "input": [
40702
- "text"
41378
+ "text",
41379
+ "image"
40703
41380
  ],
40704
41381
  "output": [
40705
41382
  "text"
@@ -40713,17 +41390,18 @@
40713
41390
  "pricing": {
40714
41391
  "text_tokens": {
40715
41392
  "standard": {
40716
- "input_per_million": 2.0,
40717
- "output_per_million": 6.0
41393
+ "input_per_million": 0.5,
41394
+ "output_per_million": 1.5
40718
41395
  }
40719
41396
  }
40720
41397
  },
40721
41398
  "metadata": {
40722
- "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.",
41399
+ "description": "Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.",
40723
41400
  "architecture": {
40724
- "modality": "text->text",
41401
+ "modality": "text+image->text",
40725
41402
  "input_modalities": [
40726
- "text"
41403
+ "text",
41404
+ "image"
40727
41405
  ],
40728
41406
  "output_modalities": [
40729
41407
  "text"
@@ -40732,7 +41410,7 @@
40732
41410
  "instruct_type": null
40733
41411
  },
40734
41412
  "top_provider": {
40735
- "context_length": 128000,
41413
+ "context_length": 262144,
40736
41414
  "max_completion_tokens": null,
40737
41415
  "is_moderated": false
40738
41416
  },
@@ -40753,238 +41431,44 @@
40753
41431
  }
40754
41432
  },
40755
41433
  {
40756
- "id": "mistralai/mistral-large-2407",
40757
- "name": "Mistral Large 2407",
41434
+ "id": "mistralai/mistral-medium-3",
41435
+ "name": "Mistral Medium 3",
40758
41436
  "provider": "openrouter",
40759
- "family": "mistralai",
40760
- "created_at": "2024-11-19 01:06:55 UTC",
41437
+ "family": "mistral-medium",
41438
+ "created_at": "2025-05-07 00:00:00 UTC",
40761
41439
  "context_window": 131072,
40762
- "max_output_tokens": null,
41440
+ "max_output_tokens": 131072,
40763
41441
  "knowledge_cutoff": null,
40764
41442
  "modalities": {
40765
41443
  "input": [
40766
- "text"
41444
+ "text",
41445
+ "image"
40767
41446
  ],
40768
41447
  "output": [
40769
41448
  "text"
40770
41449
  ]
40771
41450
  },
40772
41451
  "capabilities": [
40773
- "streaming",
40774
41452
  "function_calling",
40775
- "structured_output"
41453
+ "structured_output",
41454
+ "vision",
41455
+ "streaming"
40776
41456
  ],
40777
41457
  "pricing": {
40778
41458
  "text_tokens": {
40779
41459
  "standard": {
40780
- "input_per_million": 2.0,
40781
- "output_per_million": 6.0
41460
+ "input_per_million": 0.4,
41461
+ "output_per_million": 2
40782
41462
  }
40783
41463
  }
40784
41464
  },
40785
41465
  "metadata": {
40786
- "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n",
41466
+ "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.",
40787
41467
  "architecture": {
40788
- "modality": "text->text",
41468
+ "modality": "text+image->text",
40789
41469
  "input_modalities": [
40790
- "text"
40791
- ],
40792
- "output_modalities": [
40793
- "text"
40794
- ],
40795
- "tokenizer": "Mistral",
40796
- "instruct_type": null
40797
- },
40798
- "top_provider": {
40799
- "context_length": 131072,
40800
- "max_completion_tokens": null,
40801
- "is_moderated": false
40802
- },
40803
- "per_request_limits": null,
40804
- "supported_parameters": [
40805
- "frequency_penalty",
40806
- "max_tokens",
40807
- "presence_penalty",
40808
- "response_format",
40809
- "seed",
40810
- "stop",
40811
- "structured_outputs",
40812
- "temperature",
40813
- "tool_choice",
40814
- "tools",
40815
- "top_p"
40816
- ]
40817
- }
40818
- },
40819
- {
40820
- "id": "mistralai/mistral-large-2411",
40821
- "name": "Mistral Large 2411",
40822
- "provider": "openrouter",
40823
- "family": "mistralai",
40824
- "created_at": "2024-11-19 01:11:25 UTC",
40825
- "context_window": 131072,
40826
- "max_output_tokens": null,
40827
- "knowledge_cutoff": null,
40828
- "modalities": {
40829
- "input": [
40830
- "text"
40831
- ],
40832
- "output": [
40833
- "text"
40834
- ]
40835
- },
40836
- "capabilities": [
40837
- "streaming",
40838
- "function_calling",
40839
- "structured_output"
40840
- ],
40841
- "pricing": {
40842
- "text_tokens": {
40843
- "standard": {
40844
- "input_per_million": 2.0,
40845
- "output_per_million": 6.0
40846
- }
40847
- }
40848
- },
40849
- "metadata": {
40850
- "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.",
40851
- "architecture": {
40852
- "modality": "text->text",
40853
- "input_modalities": [
40854
- "text"
40855
- ],
40856
- "output_modalities": [
40857
- "text"
40858
- ],
40859
- "tokenizer": "Mistral",
40860
- "instruct_type": null
40861
- },
40862
- "top_provider": {
40863
- "context_length": 131072,
40864
- "max_completion_tokens": null,
40865
- "is_moderated": false
40866
- },
40867
- "per_request_limits": null,
40868
- "supported_parameters": [
40869
- "frequency_penalty",
40870
- "max_tokens",
40871
- "presence_penalty",
40872
- "response_format",
40873
- "seed",
40874
- "stop",
40875
- "structured_outputs",
40876
- "temperature",
40877
- "tool_choice",
40878
- "tools",
40879
- "top_p"
40880
- ]
40881
- }
40882
- },
40883
- {
40884
- "id": "mistralai/mistral-large-2512",
40885
- "name": "Mistral: Mistral Large 3 2512",
40886
- "provider": "openrouter",
40887
- "family": "mistralai",
40888
- "created_at": "2025-12-01 21:27:52 UTC",
40889
- "context_window": 262144,
40890
- "max_output_tokens": null,
40891
- "knowledge_cutoff": null,
40892
- "modalities": {
40893
- "input": [
40894
- "text",
40895
- "image"
40896
- ],
40897
- "output": [
40898
- "text"
40899
- ]
40900
- },
40901
- "capabilities": [
40902
- "streaming",
40903
- "function_calling",
40904
- "structured_output"
40905
- ],
40906
- "pricing": {
40907
- "text_tokens": {
40908
- "standard": {
40909
- "input_per_million": 0.5,
40910
- "output_per_million": 1.5
40911
- }
40912
- }
40913
- },
40914
- "metadata": {
40915
- "description": "Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.",
40916
- "architecture": {
40917
- "modality": "text+image->text",
40918
- "input_modalities": [
40919
- "text",
40920
- "image"
40921
- ],
40922
- "output_modalities": [
40923
- "text"
40924
- ],
40925
- "tokenizer": "Mistral",
40926
- "instruct_type": null
40927
- },
40928
- "top_provider": {
40929
- "context_length": 262144,
40930
- "max_completion_tokens": null,
40931
- "is_moderated": false
40932
- },
40933
- "per_request_limits": null,
40934
- "supported_parameters": [
40935
- "frequency_penalty",
40936
- "max_tokens",
40937
- "presence_penalty",
40938
- "response_format",
40939
- "seed",
40940
- "stop",
40941
- "structured_outputs",
40942
- "temperature",
40943
- "tool_choice",
40944
- "tools",
40945
- "top_p"
40946
- ]
40947
- }
40948
- },
40949
- {
40950
- "id": "mistralai/mistral-medium-3",
40951
- "name": "Mistral Medium 3",
40952
- "provider": "openrouter",
40953
- "family": "mistral-medium",
40954
- "created_at": "2025-05-07 00:00:00 UTC",
40955
- "context_window": 131072,
40956
- "max_output_tokens": 131072,
40957
- "knowledge_cutoff": null,
40958
- "modalities": {
40959
- "input": [
40960
- "text",
40961
- "image"
40962
- ],
40963
- "output": [
40964
- "text"
40965
- ]
40966
- },
40967
- "capabilities": [
40968
- "function_calling",
40969
- "structured_output",
40970
- "vision",
40971
- "streaming"
40972
- ],
40973
- "pricing": {
40974
- "text_tokens": {
40975
- "standard": {
40976
- "input_per_million": 0.4,
40977
- "output_per_million": 2
40978
- }
40979
- }
40980
- },
40981
- "metadata": {
40982
- "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.",
40983
- "architecture": {
40984
- "modality": "text+image->text",
40985
- "input_modalities": [
40986
- "text",
40987
- "image"
41470
+ "text",
41471
+ "image"
40988
41472
  ],
40989
41473
  "output_modalities": [
40990
41474
  "text"
@@ -42018,7 +42502,7 @@
42018
42502
  "instruct_type": null
42019
42503
  },
42020
42504
  "top_provider": {
42021
- "context_length": 131072,
42505
+ "context_length": 131000,
42022
42506
  "max_completion_tokens": null,
42023
42507
  "is_moderated": false
42024
42508
  },
@@ -42378,8 +42862,10 @@
42378
42862
  "logprobs",
42379
42863
  "max_tokens",
42380
42864
  "min_p",
42865
+ "parallel_tool_calls",
42381
42866
  "presence_penalty",
42382
42867
  "reasoning",
42868
+ "reasoning_effort",
42383
42869
  "repetition_penalty",
42384
42870
  "response_format",
42385
42871
  "seed",
@@ -42610,12 +43096,14 @@
42610
43096
  "per_request_limits": null,
42611
43097
  "supported_parameters": [
42612
43098
  "frequency_penalty",
43099
+ "logprobs",
42613
43100
  "max_tokens",
42614
43101
  "presence_penalty",
42615
43102
  "response_format",
42616
43103
  "stop",
42617
43104
  "structured_outputs",
42618
43105
  "temperature",
43106
+ "top_logprobs",
42619
43107
  "top_p"
42620
43108
  ]
42621
43109
  }
@@ -43024,6 +43512,7 @@
43024
43512
  "per_request_limits": null,
43025
43513
  "supported_parameters": [
43026
43514
  "frequency_penalty",
43515
+ "logprobs",
43027
43516
  "max_tokens",
43028
43517
  "min_p",
43029
43518
  "presence_penalty",
@@ -43034,6 +43523,7 @@
43034
43523
  "structured_outputs",
43035
43524
  "temperature",
43036
43525
  "top_k",
43526
+ "top_logprobs",
43037
43527
  "top_p"
43038
43528
  ]
43039
43529
  }
@@ -43361,7 +43851,7 @@
43361
43851
  }
43362
43852
  },
43363
43853
  "metadata": {
43364
- "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.",
43854
+ "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.",
43365
43855
  "architecture": {
43366
43856
  "modality": "text->text",
43367
43857
  "input_modalities": [
@@ -43423,7 +43913,7 @@
43423
43913
  ],
43424
43914
  "pricing": {},
43425
43915
  "metadata": {
43426
- "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.",
43916
+ "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.",
43427
43917
  "architecture": {
43428
43918
  "modality": "text->text",
43429
43919
  "input_modalities": [
@@ -43494,8 +43984,8 @@
43494
43984
  "pricing": {
43495
43985
  "text_tokens": {
43496
43986
  "standard": {
43497
- "input_per_million": 0.07,
43498
- "output_per_million": 0.19999999999999998
43987
+ "input_per_million": 0.19999999999999998,
43988
+ "output_per_million": 0.6
43499
43989
  }
43500
43990
  }
43501
43991
  },
@@ -46697,6 +47187,76 @@
46697
47187
  "knowledge": "2025-08-31"
46698
47188
  }
46699
47189
  },
47190
+ {
47191
+ "id": "openai/gpt-5.3-chat",
47192
+ "name": "OpenAI: GPT-5.3 Chat",
47193
+ "provider": "openrouter",
47194
+ "family": "openai",
47195
+ "created_at": "2026-03-03 18:54:21 UTC",
47196
+ "context_window": 128000,
47197
+ "max_output_tokens": 16384,
47198
+ "knowledge_cutoff": null,
47199
+ "modalities": {
47200
+ "input": [
47201
+ "text",
47202
+ "image",
47203
+ "file"
47204
+ ],
47205
+ "output": [
47206
+ "text"
47207
+ ]
47208
+ },
47209
+ "capabilities": [
47210
+ "streaming",
47211
+ "function_calling",
47212
+ "structured_output"
47213
+ ],
47214
+ "pricing": {
47215
+ "text_tokens": {
47216
+ "standard": {
47217
+ "input_per_million": 1.75,
47218
+ "output_per_million": 14.0,
47219
+ "cached_input_per_million": 0.175
47220
+ }
47221
+ }
47222
+ },
47223
+ "metadata": {
47224
+ "description": "GPT-5.3 Chat is an update to ChatGPT's most-used model that makes everyday conversations smoother, more useful, and more directly helpful. It delivers more accurate answers with better contextualization and significantly reduces unnecessary refusals, caveats, and overly cautious phrasing that can interrupt conversational flow.",
47225
+ "architecture": {
47226
+ "modality": "text+image+file->text",
47227
+ "input_modalities": [
47228
+ "text",
47229
+ "image",
47230
+ "file"
47231
+ ],
47232
+ "output_modalities": [
47233
+ "text"
47234
+ ],
47235
+ "tokenizer": "GPT",
47236
+ "instruct_type": null
47237
+ },
47238
+ "top_provider": {
47239
+ "context_length": 128000,
47240
+ "max_completion_tokens": 16384,
47241
+ "is_moderated": true
47242
+ },
47243
+ "per_request_limits": null,
47244
+ "supported_parameters": [
47245
+ "frequency_penalty",
47246
+ "logit_bias",
47247
+ "logprobs",
47248
+ "max_tokens",
47249
+ "presence_penalty",
47250
+ "response_format",
47251
+ "seed",
47252
+ "stop",
47253
+ "structured_outputs",
47254
+ "tool_choice",
47255
+ "tools",
47256
+ "top_logprobs"
47257
+ ]
47258
+ }
47259
+ },
46700
47260
  {
46701
47261
  "id": "openai/gpt-5.3-codex",
46702
47262
  "name": "OpenAI: GPT-5.3-Codex",
@@ -48049,72 +48609,6 @@
48049
48609
  ]
48050
48610
  }
48051
48611
  },
48052
- {
48053
- "id": "opengvlab/internvl3-78b",
48054
- "name": "OpenGVLab: InternVL3 78B",
48055
- "provider": "openrouter",
48056
- "family": "opengvlab",
48057
- "created_at": "2025-09-15 18:55:55 UTC",
48058
- "context_window": 32768,
48059
- "max_output_tokens": 32768,
48060
- "knowledge_cutoff": null,
48061
- "modalities": {
48062
- "input": [
48063
- "image",
48064
- "text"
48065
- ],
48066
- "output": [
48067
- "text"
48068
- ]
48069
- },
48070
- "capabilities": [
48071
- "streaming",
48072
- "structured_output"
48073
- ],
48074
- "pricing": {
48075
- "text_tokens": {
48076
- "standard": {
48077
- "input_per_million": 0.15,
48078
- "output_per_million": 0.6,
48079
- "cached_input_per_million": 0.075
48080
- }
48081
- }
48082
- },
48083
- "metadata": {
48084
- "description": "The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. \n\nIn addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.",
48085
- "architecture": {
48086
- "modality": "text+image->text",
48087
- "input_modalities": [
48088
- "image",
48089
- "text"
48090
- ],
48091
- "output_modalities": [
48092
- "text"
48093
- ],
48094
- "tokenizer": "Other",
48095
- "instruct_type": null
48096
- },
48097
- "top_provider": {
48098
- "context_length": 32768,
48099
- "max_completion_tokens": 32768,
48100
- "is_moderated": false
48101
- },
48102
- "per_request_limits": null,
48103
- "supported_parameters": [
48104
- "frequency_penalty",
48105
- "max_tokens",
48106
- "presence_penalty",
48107
- "repetition_penalty",
48108
- "response_format",
48109
- "seed",
48110
- "stop",
48111
- "structured_outputs",
48112
- "temperature",
48113
- "top_k",
48114
- "top_p"
48115
- ]
48116
- }
48117
- },
48118
48612
  {
48119
48613
  "id": "openrouter/aurora-alpha",
48120
48614
  "name": "Aurora Alpha",
@@ -48185,7 +48679,7 @@
48185
48679
  ],
48186
48680
  "pricing": {},
48187
48681
  "metadata": {
48188
- "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nLearn more, including how to customize the models for routing, in our [docs](/docs/guides/routing/routers/auto-router).\n\nRequests will be routed to the following models:\n- [anthropic/claude-haiku-4.5](/anthropic/claude-haiku-4.5)\n- [anthropic/claude-opus-4.6](/anthropic/claude-opus-4.6)\n- [anthropic/claude-sonnet-4.5](/anthropic/claude-sonnet-4.5)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [google/gemini-2.5-flash-lite](/google/gemini-2.5-flash-lite)\n- [google/gemini-3-flash-preview](/google/gemini-3-flash-preview)\n- [google/gemini-3-pro-preview](/google/gemini-3-pro-preview)\n- [meta-llama/llama-3.3-70b-instruct](/meta-llama/llama-3.3-70b-instruct)\n- [mistralai/codestral-2508](/mistralai/codestral-2508)\n- [mistralai/mistral-large](/mistralai/mistral-large)\n- [mistralai/mistral-medium-3.1](/mistralai/mistral-medium-3.1)\n- [mistralai/mistral-small-3.2-24b-instruct-2506](/mistralai/mistral-small-3.2-24b-instruct-2506)\n- [moonshotai/kimi-k2-thinking](/moonshotai/kimi-k2-thinking)\n- [moonshotai/kimi-k2.5](/moonshotai/kimi-k2.5)\n- [openai/gpt-5](/openai/gpt-5)\n- [openai/gpt-5-mini](/openai/gpt-5-mini)\n- [openai/gpt-5-nano](/openai/gpt-5-nano)\n- [openai/gpt-5.1](/openai/gpt-5.1)\n- [openai/gpt-5.2](/openai/gpt-5.2)\n- [openai/gpt-5.2-pro](/openai/gpt-5.2-pro)\n- [openai/gpt-oss-120b](/openai/gpt-oss-120b)\n- [perplexity/sonar](/perplexity/sonar)\n- [qwen/qwen3-235b-a22b](/qwen/qwen3-235b-a22b)\n- [x-ai/grok-3](/x-ai/grok-3)\n- [x-ai/grok-3-mini](/x-ai/grok-3-mini)\n- [x-ai/grok-4](/x-ai/grok-4)",
48682
+ "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nLearn more, including how to customize the models for routing, in our [docs](/docs/guides/routing/routers/auto-router).\n\nRequests will be routed to the following models:\n- [anthropic/claude-haiku-4.5](/anthropic/claude-haiku-4.5)\n- [anthropic/claude-opus-4.6](/anthropic/claude-opus-4.6)\n- [anthropic/claude-sonnet-4.5](/anthropic/claude-sonnet-4.5)\n- [anthropic/claude-sonnet-4.6](/anthropic/claude-sonnet-4.6)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [google/gemini-2.5-flash-lite](/google/gemini-2.5-flash-lite)\n- [google/gemini-3-flash-preview](/google/gemini-3-flash-preview)\n- [google/gemini-3-pro-preview](/google/gemini-3-pro-preview)\n- [google/gemini-3.1-pro-preview](/google/gemini-3.1-pro-preview)\n- [meta-llama/llama-3.3-70b-instruct](/meta-llama/llama-3.3-70b-instruct)\n- [minimax/minimax-m2.5](/minimax/minimax-m2.5)\n- [mistralai/codestral-2508](/mistralai/codestral-2508)\n- [mistralai/mistral-large](/mistralai/mistral-large)\n- [mistralai/mistral-medium-3.1](/mistralai/mistral-medium-3.1)\n- [mistralai/mistral-small-3.2-24b-instruct-2506](/mistralai/mistral-small-3.2-24b-instruct-2506)\n- [moonshotai/kimi-k2-thinking](/moonshotai/kimi-k2-thinking)\n- [moonshotai/kimi-k2.5](/moonshotai/kimi-k2.5)\n- [openai/gpt-5](/openai/gpt-5)\n- [openai/gpt-5-mini](/openai/gpt-5-mini)\n- [openai/gpt-5-nano](/openai/gpt-5-nano)\n- [openai/gpt-5.1](/openai/gpt-5.1)\n- [openai/gpt-5.2](/openai/gpt-5.2)\n- [openai/gpt-5.2-pro](/openai/gpt-5.2-pro)\n- [openai/gpt-oss-120b](/openai/gpt-oss-120b)\n- [perplexity/sonar](/perplexity/sonar)\n- [qwen/qwen3-235b-a22b](/qwen/qwen3-235b-a22b)\n- [x-ai/grok-3](/x-ai/grok-3)\n- [x-ai/grok-3-mini](/x-ai/grok-3-mini)\n- [x-ai/grok-4](/x-ai/grok-4)\n- [z-ai/glm-5](/z-ai/glm-5)",
48189
48683
  "architecture": {
48190
48684
  "modality": "text+image+file+audio+video->text+image",
48191
48685
  "input_modalities": [
@@ -48902,6 +49396,7 @@
48902
49396
  "capabilities": [
48903
49397
  "streaming",
48904
49398
  "function_calling",
49399
+ "structured_output",
48905
49400
  "predicted_outputs"
48906
49401
  ],
48907
49402
  "pricing": {
@@ -48938,6 +49433,7 @@
48938
49433
  "min_p",
48939
49434
  "presence_penalty",
48940
49435
  "repetition_penalty",
49436
+ "response_format",
48941
49437
  "seed",
48942
49438
  "stop",
48943
49439
  "temperature",
@@ -48973,8 +49469,8 @@
48973
49469
  "pricing": {
48974
49470
  "text_tokens": {
48975
49471
  "standard": {
48976
- "input_per_million": 0.19999999999999998,
48977
- "output_per_million": 0.19999999999999998
49472
+ "input_per_million": 0.2,
49473
+ "output_per_million": 0.2
48978
49474
  }
48979
49475
  }
48980
49476
  },
@@ -49052,8 +49548,8 @@
49052
49548
  "pricing": {
49053
49549
  "text_tokens": {
49054
49550
  "standard": {
49055
- "input_per_million": 0.19999999999999998,
49056
- "output_per_million": 0.19999999999999998
49551
+ "input_per_million": 0.2,
49552
+ "output_per_million": 0.2
49057
49553
  }
49058
49554
  }
49059
49555
  },
@@ -49158,9 +49654,9 @@
49158
49654
  "pricing": {
49159
49655
  "text_tokens": {
49160
49656
  "standard": {
49161
- "input_per_million": 1.5999999999999999,
49162
- "output_per_million": 6.3999999999999995,
49163
- "cached_input_per_million": 0.32
49657
+ "input_per_million": 1.04,
49658
+ "output_per_million": 4.16,
49659
+ "cached_input_per_million": 0.20800000000000002
49164
49660
  }
49165
49661
  }
49166
49662
  },
@@ -49282,8 +49778,8 @@
49282
49778
  "pricing": {
49283
49779
  "text_tokens": {
49284
49780
  "standard": {
49285
- "input_per_million": 0.39999999999999997,
49286
- "output_per_million": 1.2
49781
+ "input_per_million": 0.26,
49782
+ "output_per_million": 0.78
49287
49783
  }
49288
49784
  }
49289
49785
  },
@@ -49344,8 +49840,8 @@
49344
49840
  "pricing": {
49345
49841
  "text_tokens": {
49346
49842
  "standard": {
49347
- "input_per_million": 0.39999999999999997,
49348
- "output_per_million": 1.2
49843
+ "input_per_million": 0.26,
49844
+ "output_per_million": 0.78
49349
49845
  }
49350
49846
  }
49351
49847
  },
@@ -49408,9 +49904,9 @@
49408
49904
  "pricing": {
49409
49905
  "text_tokens": {
49410
49906
  "standard": {
49411
- "input_per_million": 0.049999999999999996,
49412
- "output_per_million": 0.19999999999999998,
49413
- "cached_input_per_million": 0.01
49907
+ "input_per_million": 0.0325,
49908
+ "output_per_million": 0.13,
49909
+ "cached_input_per_million": 0.006500000000000001
49414
49910
  }
49415
49911
  }
49416
49912
  },
@@ -49533,9 +50029,9 @@
49533
50029
  "pricing": {
49534
50030
  "text_tokens": {
49535
50031
  "standard": {
49536
- "input_per_million": 0.21,
49537
- "output_per_million": 0.63,
49538
- "cached_input_per_million": 0.041999999999999996
50032
+ "input_per_million": 0.1365,
50033
+ "output_per_million": 0.40950000000000003,
50034
+ "cached_input_per_million": 0.027299999999999998
49539
50035
  }
49540
50036
  }
49541
50037
  },
@@ -49914,6 +50410,7 @@
49914
50410
  "supported_parameters": [
49915
50411
  "frequency_penalty",
49916
50412
  "include_reasoning",
50413
+ "logprobs",
49917
50414
  "max_tokens",
49918
50415
  "min_p",
49919
50416
  "presence_penalty",
@@ -49927,6 +50424,7 @@
49927
50424
  "tool_choice",
49928
50425
  "tools",
49929
50426
  "top_k",
50427
+ "top_logprobs",
49930
50428
  "top_p"
49931
50429
  ]
49932
50430
  }
@@ -50412,7 +50910,8 @@
50412
50910
  "capabilities": [
50413
50911
  "function_calling",
50414
50912
  "structured_output",
50415
- "streaming"
50913
+ "streaming",
50914
+ "predicted_outputs"
50416
50915
  ],
50417
50916
  "pricing": {
50418
50917
  "text_tokens": {
@@ -50443,11 +50942,14 @@
50443
50942
  "per_request_limits": null,
50444
50943
  "supported_parameters": [
50445
50944
  "frequency_penalty",
50945
+ "logit_bias",
50446
50946
  "max_tokens",
50947
+ "min_p",
50447
50948
  "presence_penalty",
50448
50949
  "repetition_penalty",
50449
50950
  "response_format",
50450
50951
  "seed",
50952
+ "stop",
50451
50953
  "structured_outputs",
50452
50954
  "temperature",
50453
50955
  "tool_choice",
@@ -50493,7 +50995,8 @@
50493
50995
  "function_calling",
50494
50996
  "structured_output",
50495
50997
  "reasoning",
50496
- "streaming"
50998
+ "streaming",
50999
+ "predicted_outputs"
50497
51000
  ],
50498
51001
  "pricing": {
50499
51002
  "text_tokens": {
@@ -50525,12 +51028,15 @@
50525
51028
  "supported_parameters": [
50526
51029
  "frequency_penalty",
50527
51030
  "include_reasoning",
51031
+ "logit_bias",
50528
51032
  "max_tokens",
51033
+ "min_p",
50529
51034
  "presence_penalty",
50530
51035
  "reasoning",
50531
51036
  "repetition_penalty",
50532
51037
  "response_format",
50533
51038
  "seed",
51039
+ "stop",
50534
51040
  "structured_outputs",
50535
51041
  "temperature",
50536
51042
  "tool_choice",
@@ -50788,7 +51294,7 @@
50788
51294
  "provider": "openrouter",
50789
51295
  "family": "qwen",
50790
51296
  "created_at": "2025-04-28 21:43:52 UTC",
50791
- "context_window": 32000,
51297
+ "context_window": 40960,
50792
51298
  "max_output_tokens": 8192,
50793
51299
  "knowledge_cutoff": null,
50794
51300
  "modalities": {
@@ -50802,7 +51308,8 @@
50802
51308
  "capabilities": [
50803
51309
  "streaming",
50804
51310
  "function_calling",
50805
- "structured_output"
51311
+ "structured_output",
51312
+ "predicted_outputs"
50806
51313
  ],
50807
51314
  "pricing": {
50808
51315
  "text_tokens": {
@@ -50827,22 +51334,28 @@
50827
51334
  "instruct_type": "qwen3"
50828
51335
  },
50829
51336
  "top_provider": {
50830
- "context_length": 32000,
51337
+ "context_length": 40960,
50831
51338
  "max_completion_tokens": 8192,
50832
51339
  "is_moderated": false
50833
51340
  },
50834
51341
  "per_request_limits": null,
50835
51342
  "supported_parameters": [
51343
+ "frequency_penalty",
50836
51344
  "include_reasoning",
51345
+ "logit_bias",
50837
51346
  "max_tokens",
51347
+ "min_p",
50838
51348
  "presence_penalty",
50839
51349
  "reasoning",
51350
+ "repetition_penalty",
50840
51351
  "response_format",
50841
51352
  "seed",
51353
+ "stop",
50842
51354
  "structured_outputs",
50843
51355
  "temperature",
50844
51356
  "tool_choice",
50845
51357
  "tools",
51358
+ "top_k",
50846
51359
  "top_p"
50847
51360
  ]
50848
51361
  }
@@ -51224,9 +51737,9 @@
51224
51737
  "pricing": {
51225
51738
  "text_tokens": {
51226
51739
  "standard": {
51227
- "input_per_million": 1.0,
51228
- "output_per_million": 5.0,
51229
- "cached_input_per_million": 0.19999999999999998
51740
+ "input_per_million": 0.65,
51741
+ "output_per_million": 3.25,
51742
+ "cached_input_per_million": 0.13
51230
51743
  }
51231
51744
  }
51232
51745
  },
@@ -51312,13 +51825,10 @@
51312
51825
  },
51313
51826
  "per_request_limits": null,
51314
51827
  "supported_parameters": [
51315
- "frequency_penalty",
51316
51828
  "max_tokens",
51317
- "presence_penalty",
51318
51829
  "repetition_penalty",
51319
51830
  "response_format",
51320
51831
  "seed",
51321
- "stop",
51322
51832
  "structured_outputs",
51323
51833
  "temperature",
51324
51834
  "tool_choice",
@@ -51513,8 +52023,8 @@
51513
52023
  "pricing": {
51514
52024
  "text_tokens": {
51515
52025
  "standard": {
51516
- "input_per_million": 1.2,
51517
- "output_per_million": 6.0
52026
+ "input_per_million": 0.78,
52027
+ "output_per_million": 3.9
51518
52028
  }
51519
52029
  }
51520
52030
  },
@@ -52270,6 +52780,237 @@
52270
52780
  ]
52271
52781
  }
52272
52782
  },
52783
+ {
52784
+ "id": "qwen/qwen3.5-122b-a10b",
52785
+ "name": "Qwen: Qwen3.5-122B-A10B",
52786
+ "provider": "openrouter",
52787
+ "family": "qwen",
52788
+ "created_at": "2026-02-25 21:09:49 UTC",
52789
+ "context_window": 262144,
52790
+ "max_output_tokens": 65536,
52791
+ "knowledge_cutoff": null,
52792
+ "modalities": {
52793
+ "input": [
52794
+ "text",
52795
+ "image",
52796
+ "video"
52797
+ ],
52798
+ "output": [
52799
+ "text"
52800
+ ]
52801
+ },
52802
+ "capabilities": [
52803
+ "streaming",
52804
+ "function_calling",
52805
+ "structured_output",
52806
+ "predicted_outputs"
52807
+ ],
52808
+ "pricing": {
52809
+ "text_tokens": {
52810
+ "standard": {
52811
+ "input_per_million": 0.26,
52812
+ "output_per_million": 2.08
52813
+ }
52814
+ }
52815
+ },
52816
+ "metadata": {
52817
+ "description": "The Qwen3.5 122B-A10B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. In terms of overall performance, this model is second only to Qwen3.5-397B-A17B. Its text capabilities significantly outperform those of Qwen3-235B-2507, and its visual capabilities surpass those of Qwen3-VL-235B.",
52818
+ "architecture": {
52819
+ "modality": "text+image+video->text",
52820
+ "input_modalities": [
52821
+ "text",
52822
+ "image",
52823
+ "video"
52824
+ ],
52825
+ "output_modalities": [
52826
+ "text"
52827
+ ],
52828
+ "tokenizer": "Qwen3",
52829
+ "instruct_type": null
52830
+ },
52831
+ "top_provider": {
52832
+ "context_length": 262144,
52833
+ "max_completion_tokens": 65536,
52834
+ "is_moderated": false
52835
+ },
52836
+ "per_request_limits": null,
52837
+ "supported_parameters": [
52838
+ "frequency_penalty",
52839
+ "include_reasoning",
52840
+ "logit_bias",
52841
+ "logprobs",
52842
+ "max_tokens",
52843
+ "min_p",
52844
+ "presence_penalty",
52845
+ "reasoning",
52846
+ "repetition_penalty",
52847
+ "response_format",
52848
+ "seed",
52849
+ "stop",
52850
+ "structured_outputs",
52851
+ "temperature",
52852
+ "tool_choice",
52853
+ "tools",
52854
+ "top_k",
52855
+ "top_logprobs",
52856
+ "top_p"
52857
+ ]
52858
+ }
52859
+ },
52860
+ {
52861
+ "id": "qwen/qwen3.5-27b",
52862
+ "name": "Qwen: Qwen3.5-27B",
52863
+ "provider": "openrouter",
52864
+ "family": "qwen",
52865
+ "created_at": "2026-02-25 21:10:10 UTC",
52866
+ "context_window": 262144,
52867
+ "max_output_tokens": 65536,
52868
+ "knowledge_cutoff": null,
52869
+ "modalities": {
52870
+ "input": [
52871
+ "text",
52872
+ "image",
52873
+ "video"
52874
+ ],
52875
+ "output": [
52876
+ "text"
52877
+ ]
52878
+ },
52879
+ "capabilities": [
52880
+ "streaming",
52881
+ "function_calling",
52882
+ "structured_output",
52883
+ "predicted_outputs"
52884
+ ],
52885
+ "pricing": {
52886
+ "text_tokens": {
52887
+ "standard": {
52888
+ "input_per_million": 0.195,
52889
+ "output_per_million": 1.56
52890
+ }
52891
+ }
52892
+ },
52893
+ "metadata": {
52894
+ "description": "The Qwen3.5 27B native vision-language Dense model incorporates a linear attention mechanism, delivering fast response times while balancing inference speed and performance. Its overall capabilities are comparable to those of the Qwen3.5-122B-A10B.",
52895
+ "architecture": {
52896
+ "modality": "text+image+video->text",
52897
+ "input_modalities": [
52898
+ "text",
52899
+ "image",
52900
+ "video"
52901
+ ],
52902
+ "output_modalities": [
52903
+ "text"
52904
+ ],
52905
+ "tokenizer": "Qwen3",
52906
+ "instruct_type": null
52907
+ },
52908
+ "top_provider": {
52909
+ "context_length": 262144,
52910
+ "max_completion_tokens": 65536,
52911
+ "is_moderated": false
52912
+ },
52913
+ "per_request_limits": null,
52914
+ "supported_parameters": [
52915
+ "frequency_penalty",
52916
+ "include_reasoning",
52917
+ "logit_bias",
52918
+ "logprobs",
52919
+ "max_tokens",
52920
+ "min_p",
52921
+ "presence_penalty",
52922
+ "reasoning",
52923
+ "repetition_penalty",
52924
+ "response_format",
52925
+ "seed",
52926
+ "stop",
52927
+ "structured_outputs",
52928
+ "temperature",
52929
+ "tool_choice",
52930
+ "tools",
52931
+ "top_k",
52932
+ "top_logprobs",
52933
+ "top_p"
52934
+ ]
52935
+ }
52936
+ },
52937
+ {
52938
+ "id": "qwen/qwen3.5-35b-a3b",
52939
+ "name": "Qwen: Qwen3.5-35B-A3B",
52940
+ "provider": "openrouter",
52941
+ "family": "qwen",
52942
+ "created_at": "2026-02-25 21:10:22 UTC",
52943
+ "context_window": 262144,
52944
+ "max_output_tokens": 65536,
52945
+ "knowledge_cutoff": null,
52946
+ "modalities": {
52947
+ "input": [
52948
+ "text",
52949
+ "image",
52950
+ "video"
52951
+ ],
52952
+ "output": [
52953
+ "text"
52954
+ ]
52955
+ },
52956
+ "capabilities": [
52957
+ "streaming",
52958
+ "function_calling",
52959
+ "structured_output",
52960
+ "predicted_outputs"
52961
+ ],
52962
+ "pricing": {
52963
+ "text_tokens": {
52964
+ "standard": {
52965
+ "input_per_million": 0.1625,
52966
+ "output_per_million": 1.3
52967
+ }
52968
+ }
52969
+ },
52970
+ "metadata": {
52971
+ "description": "The Qwen3.5 Series 35B-A3B is a native vision-language model designed with a hybrid architecture that integrates linear attention mechanisms and a sparse mixture-of-experts model, achieving higher inference efficiency. Its overall performance is comparable to that of the Qwen3.5-27B.",
52972
+ "architecture": {
52973
+ "modality": "text+image+video->text",
52974
+ "input_modalities": [
52975
+ "text",
52976
+ "image",
52977
+ "video"
52978
+ ],
52979
+ "output_modalities": [
52980
+ "text"
52981
+ ],
52982
+ "tokenizer": "Qwen3",
52983
+ "instruct_type": null
52984
+ },
52985
+ "top_provider": {
52986
+ "context_length": 262144,
52987
+ "max_completion_tokens": 65536,
52988
+ "is_moderated": false
52989
+ },
52990
+ "per_request_limits": null,
52991
+ "supported_parameters": [
52992
+ "frequency_penalty",
52993
+ "include_reasoning",
52994
+ "logit_bias",
52995
+ "logprobs",
52996
+ "max_tokens",
52997
+ "min_p",
52998
+ "presence_penalty",
52999
+ "reasoning",
53000
+ "repetition_penalty",
53001
+ "response_format",
53002
+ "seed",
53003
+ "stop",
53004
+ "structured_outputs",
53005
+ "temperature",
53006
+ "tool_choice",
53007
+ "tools",
53008
+ "top_k",
53009
+ "top_logprobs",
53010
+ "top_p"
53011
+ ]
53012
+ }
53013
+ },
52273
53014
  {
52274
53015
  "id": "qwen/qwen3.5-397b-a17b",
52275
53016
  "name": "Qwen3.5 397B A17B",
@@ -52362,6 +53103,74 @@
52362
53103
  "knowledge": "2025-04"
52363
53104
  }
52364
53105
  },
53106
+ {
53107
+ "id": "qwen/qwen3.5-flash-02-23",
53108
+ "name": "Qwen: Qwen3.5-Flash",
53109
+ "provider": "openrouter",
53110
+ "family": "qwen",
53111
+ "created_at": "2026-02-25 21:09:36 UTC",
53112
+ "context_window": 1000000,
53113
+ "max_output_tokens": 65536,
53114
+ "knowledge_cutoff": null,
53115
+ "modalities": {
53116
+ "input": [
53117
+ "text",
53118
+ "image",
53119
+ "video"
53120
+ ],
53121
+ "output": [
53122
+ "text"
53123
+ ]
53124
+ },
53125
+ "capabilities": [
53126
+ "streaming",
53127
+ "function_calling",
53128
+ "structured_output"
53129
+ ],
53130
+ "pricing": {
53131
+ "text_tokens": {
53132
+ "standard": {
53133
+ "input_per_million": 0.09999999999999999,
53134
+ "output_per_million": 0.39999999999999997
53135
+ }
53136
+ }
53137
+ },
53138
+ "metadata": {
53139
+ "description": "The Qwen3.5 native vision-language Flash models are built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. Compared to the 3 series, these models deliver a leap forward in performance for both pure text and multimodal tasks, offering fast response times while balancing inference speed and overall performance.",
53140
+ "architecture": {
53141
+ "modality": "text+image+video->text",
53142
+ "input_modalities": [
53143
+ "text",
53144
+ "image",
53145
+ "video"
53146
+ ],
53147
+ "output_modalities": [
53148
+ "text"
53149
+ ],
53150
+ "tokenizer": "Qwen3",
53151
+ "instruct_type": null
53152
+ },
53153
+ "top_provider": {
53154
+ "context_length": 1000000,
53155
+ "max_completion_tokens": 65536,
53156
+ "is_moderated": false
53157
+ },
53158
+ "per_request_limits": null,
53159
+ "supported_parameters": [
53160
+ "include_reasoning",
53161
+ "max_tokens",
53162
+ "presence_penalty",
53163
+ "reasoning",
53164
+ "response_format",
53165
+ "seed",
53166
+ "structured_outputs",
53167
+ "temperature",
53168
+ "tool_choice",
53169
+ "tools",
53170
+ "top_p"
53171
+ ]
53172
+ }
53173
+ },
52365
53174
  {
52366
53175
  "id": "qwen/qwen3.5-plus-02-15",
52367
53176
  "name": "Qwen3.5 Plus 2026-02-15",
@@ -52499,6 +53308,7 @@
52499
53308
  "supported_parameters": [
52500
53309
  "frequency_penalty",
52501
53310
  "include_reasoning",
53311
+ "logprobs",
52502
53312
  "max_tokens",
52503
53313
  "presence_penalty",
52504
53314
  "reasoning",
@@ -52509,6 +53319,7 @@
52509
53319
  "tool_choice",
52510
53320
  "tools",
52511
53321
  "top_k",
53322
+ "top_logprobs",
52512
53323
  "top_p"
52513
53324
  ]
52514
53325
  }
@@ -53011,6 +53822,7 @@
53011
53822
  "per_request_limits": null,
53012
53823
  "supported_parameters": [
53013
53824
  "frequency_penalty",
53825
+ "logprobs",
53014
53826
  "max_tokens",
53015
53827
  "min_p",
53016
53828
  "presence_penalty",
@@ -53023,6 +53835,7 @@
53023
53835
  "tool_choice",
53024
53836
  "tools",
53025
53837
  "top_k",
53838
+ "top_logprobs",
53026
53839
  "top_p"
53027
53840
  ]
53028
53841
  }
@@ -53077,6 +53890,7 @@
53077
53890
  "per_request_limits": null,
53078
53891
  "supported_parameters": [
53079
53892
  "frequency_penalty",
53893
+ "logprobs",
53080
53894
  "max_tokens",
53081
53895
  "min_p",
53082
53896
  "presence_penalty",
@@ -53087,6 +53901,7 @@
53087
53901
  "structured_outputs",
53088
53902
  "temperature",
53089
53903
  "top_k",
53904
+ "top_logprobs",
53090
53905
  "top_p"
53091
53906
  ]
53092
53907
  }
@@ -53635,6 +54450,7 @@
53635
54450
  "supported_parameters": [
53636
54451
  "frequency_penalty",
53637
54452
  "logit_bias",
54453
+ "logprobs",
53638
54454
  "max_tokens",
53639
54455
  "min_p",
53640
54456
  "presence_penalty",
@@ -53647,6 +54463,7 @@
53647
54463
  "tool_choice",
53648
54464
  "tools",
53649
54465
  "top_k",
54466
+ "top_logprobs",
53650
54467
  "top_p"
53651
54468
  ]
53652
54469
  }
@@ -53764,14 +54581,18 @@
53764
54581
  "per_request_limits": null,
53765
54582
  "supported_parameters": [
53766
54583
  "frequency_penalty",
54584
+ "logprobs",
53767
54585
  "max_tokens",
53768
54586
  "presence_penalty",
54587
+ "repetition_penalty",
53769
54588
  "response_format",
54589
+ "seed",
53770
54590
  "stop",
53771
54591
  "structured_outputs",
53772
54592
  "temperature",
53773
54593
  "tool_choice",
53774
54594
  "tools",
54595
+ "top_logprobs",
53775
54596
  "top_p"
53776
54597
  ]
53777
54598
  }
@@ -54036,8 +54857,8 @@
54036
54857
  }
54037
54858
  },
54038
54859
  {
54039
- "id": "upstage/solar-pro-3:free",
54040
- "name": "Upstage: Solar Pro 3 (free)",
54860
+ "id": "upstage/solar-pro-3",
54861
+ "name": "Upstage: Solar Pro 3",
54041
54862
  "provider": "openrouter",
54042
54863
  "family": "upstage",
54043
54864
  "created_at": "2026-01-27 02:33:20 UTC",
@@ -54057,7 +54878,15 @@
54057
54878
  "function_calling",
54058
54879
  "structured_output"
54059
54880
  ],
54060
- "pricing": {},
54881
+ "pricing": {
54882
+ "text_tokens": {
54883
+ "standard": {
54884
+ "input_per_million": 0.15,
54885
+ "output_per_million": 0.6,
54886
+ "cached_input_per_million": 0.015
54887
+ }
54888
+ }
54889
+ },
54061
54890
  "metadata": {
54062
54891
  "description": "Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized for Korean with English and Japanese support.",
54063
54892
  "architecture": {
@@ -55765,8 +56594,8 @@
55765
56594
  "instruct_type": null
55766
56595
  },
55767
56596
  "top_provider": {
55768
- "context_length": 204800,
55769
- "max_completion_tokens": 131072,
56597
+ "context_length": 202752,
56598
+ "max_completion_tokens": null,
55770
56599
  "is_moderated": false
55771
56600
  },
55772
56601
  "per_request_limits": null,
@@ -55964,7 +56793,7 @@
55964
56793
  "name": "Sonar Reasoning",
55965
56794
  "provider": "perplexity",
55966
56795
  "family": "sonar_reasoning",
55967
- "created_at": "2026-02-25 13:11:46 UTC",
56796
+ "created_at": "2026-03-04 16:13:43 UTC",
55968
56797
  "context_window": 128000,
55969
56798
  "max_output_tokens": 4096,
55970
56799
  "knowledge_cutoff": null,
@@ -57395,6 +58224,32 @@
57395
58224
  "knowledge": "2025-01"
57396
58225
  }
57397
58226
  },
58227
+ {
58228
+ "id": "gemini-3.1-flash-image-preview",
58229
+ "name": "gemini-3.1-flash-image-preview",
58230
+ "provider": "vertexai",
58231
+ "family": "gemini",
58232
+ "created_at": null,
58233
+ "context_window": null,
58234
+ "max_output_tokens": null,
58235
+ "knowledge_cutoff": null,
58236
+ "modalities": {
58237
+ "input": [],
58238
+ "output": []
58239
+ },
58240
+ "capabilities": [
58241
+ "streaming",
58242
+ "function_calling"
58243
+ ],
58244
+ "pricing": {},
58245
+ "metadata": {
58246
+ "version_id": "default",
58247
+ "open_source_category": null,
58248
+ "launch_stage": "PUBLIC_PREVIEW",
58249
+ "supported_actions": null,
58250
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/gemini-3.1-flash-image-preview@default"
58251
+ }
58252
+ },
57398
58253
  {
57399
58254
  "id": "gemini-3.1-pro-preview",
57400
58255
  "name": "Gemini 3.1 Pro Preview",
@@ -58163,60 +59018,6 @@
58163
59018
  }
58164
59019
  }
58165
59020
  },
58166
- {
58167
- "id": "grok-2-image-1212",
58168
- "name": "Grok 2 Image 1212",
58169
- "provider": "xai",
58170
- "family": "grok",
58171
- "created_at": "2025-01-13 00:00:00 UTC",
58172
- "context_window": null,
58173
- "max_output_tokens": null,
58174
- "knowledge_cutoff": null,
58175
- "modalities": {
58176
- "input": [
58177
- "text"
58178
- ],
58179
- "output": [
58180
- "image"
58181
- ]
58182
- },
58183
- "capabilities": [],
58184
- "pricing": {},
58185
- "metadata": {
58186
- "object": "model",
58187
- "owned_by": "xai"
58188
- }
58189
- },
58190
- {
58191
- "id": "grok-2-vision-1212",
58192
- "name": "Grok 2 Vision 1212",
58193
- "provider": "xai",
58194
- "family": "grok",
58195
- "created_at": "2024-12-12 00:00:00 UTC",
58196
- "context_window": null,
58197
- "max_output_tokens": null,
58198
- "knowledge_cutoff": null,
58199
- "modalities": {
58200
- "input": [
58201
- "text",
58202
- "image"
58203
- ],
58204
- "output": [
58205
- "text"
58206
- ]
58207
- },
58208
- "capabilities": [
58209
- "streaming",
58210
- "function_calling",
58211
- "structured_output",
58212
- "vision"
58213
- ],
58214
- "pricing": {},
58215
- "metadata": {
58216
- "object": "model",
58217
- "owned_by": "xai"
58218
- }
58219
- },
58220
59021
  {
58221
59022
  "id": "grok-3",
58222
59023
  "name": "Grok 3",