ruby_llm_community 0.0.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +73 -91
  3. data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +34 -0
  4. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +5 -0
  5. data/lib/generators/ruby_llm/install/templates/model_model.rb.tt +6 -0
  6. data/lib/generators/ruby_llm/install_generator.rb +27 -2
  7. data/lib/ruby_llm/active_record/acts_as.rb +168 -24
  8. data/lib/ruby_llm/aliases.json +62 -5
  9. data/lib/ruby_llm/aliases.rb +7 -25
  10. data/lib/ruby_llm/chat.rb +10 -17
  11. data/lib/ruby_llm/configuration.rb +5 -12
  12. data/lib/ruby_llm/connection.rb +4 -4
  13. data/lib/ruby_llm/connection_multipart.rb +19 -0
  14. data/lib/ruby_llm/content.rb +5 -2
  15. data/lib/ruby_llm/embedding.rb +1 -2
  16. data/lib/ruby_llm/error.rb +0 -8
  17. data/lib/ruby_llm/image.rb +23 -8
  18. data/lib/ruby_llm/image_attachment.rb +21 -0
  19. data/lib/ruby_llm/message.rb +6 -6
  20. data/lib/ruby_llm/model/info.rb +12 -10
  21. data/lib/ruby_llm/model/pricing.rb +0 -3
  22. data/lib/ruby_llm/model/pricing_category.rb +0 -2
  23. data/lib/ruby_llm/model/pricing_tier.rb +0 -1
  24. data/lib/ruby_llm/models.json +2485 -676
  25. data/lib/ruby_llm/models.rb +65 -34
  26. data/lib/ruby_llm/provider.rb +8 -8
  27. data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -46
  28. data/lib/ruby_llm/providers/anthropic/chat.rb +2 -2
  29. data/lib/ruby_llm/providers/anthropic/media.rb +0 -1
  30. data/lib/ruby_llm/providers/anthropic/tools.rb +1 -2
  31. data/lib/ruby_llm/providers/anthropic.rb +1 -2
  32. data/lib/ruby_llm/providers/bedrock/chat.rb +2 -4
  33. data/lib/ruby_llm/providers/bedrock/media.rb +0 -1
  34. data/lib/ruby_llm/providers/bedrock/models.rb +0 -2
  35. data/lib/ruby_llm/providers/bedrock/streaming/base.rb +0 -12
  36. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -7
  37. data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -12
  38. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -12
  39. data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -13
  40. data/lib/ruby_llm/providers/bedrock/streaming.rb +0 -18
  41. data/lib/ruby_llm/providers/bedrock.rb +1 -2
  42. data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -2
  43. data/lib/ruby_llm/providers/deepseek/chat.rb +0 -1
  44. data/lib/ruby_llm/providers/gemini/capabilities.rb +28 -100
  45. data/lib/ruby_llm/providers/gemini/chat.rb +57 -29
  46. data/lib/ruby_llm/providers/gemini/embeddings.rb +0 -2
  47. data/lib/ruby_llm/providers/gemini/images.rb +1 -2
  48. data/lib/ruby_llm/providers/gemini/media.rb +0 -1
  49. data/lib/ruby_llm/providers/gemini/models.rb +1 -2
  50. data/lib/ruby_llm/providers/gemini/streaming.rb +15 -1
  51. data/lib/ruby_llm/providers/gemini/tools.rb +0 -5
  52. data/lib/ruby_llm/providers/gpustack/chat.rb +11 -1
  53. data/lib/ruby_llm/providers/gpustack/media.rb +45 -0
  54. data/lib/ruby_llm/providers/gpustack/models.rb +44 -9
  55. data/lib/ruby_llm/providers/gpustack.rb +1 -0
  56. data/lib/ruby_llm/providers/mistral/capabilities.rb +2 -10
  57. data/lib/ruby_llm/providers/mistral/chat.rb +0 -2
  58. data/lib/ruby_llm/providers/mistral/embeddings.rb +0 -3
  59. data/lib/ruby_llm/providers/mistral/models.rb +0 -1
  60. data/lib/ruby_llm/providers/ollama/chat.rb +0 -1
  61. data/lib/ruby_llm/providers/ollama/media.rb +1 -6
  62. data/lib/ruby_llm/providers/ollama/models.rb +36 -0
  63. data/lib/ruby_llm/providers/ollama.rb +1 -0
  64. data/lib/ruby_llm/providers/openai/capabilities.rb +3 -16
  65. data/lib/ruby_llm/providers/openai/chat.rb +1 -3
  66. data/lib/ruby_llm/providers/openai/embeddings.rb +0 -3
  67. data/lib/ruby_llm/providers/openai/images.rb +73 -3
  68. data/lib/ruby_llm/providers/openai/media.rb +0 -1
  69. data/lib/ruby_llm/providers/openai/response.rb +120 -29
  70. data/lib/ruby_llm/providers/openai/response_media.rb +2 -2
  71. data/lib/ruby_llm/providers/openai/streaming.rb +107 -47
  72. data/lib/ruby_llm/providers/openai/tools.rb +1 -1
  73. data/lib/ruby_llm/providers/openai.rb +1 -3
  74. data/lib/ruby_llm/providers/openai_base.rb +2 -2
  75. data/lib/ruby_llm/providers/openrouter/models.rb +1 -16
  76. data/lib/ruby_llm/providers/perplexity/capabilities.rb +0 -1
  77. data/lib/ruby_llm/providers/perplexity/chat.rb +0 -1
  78. data/lib/ruby_llm/providers/perplexity.rb +1 -5
  79. data/lib/ruby_llm/providers/vertexai/chat.rb +14 -0
  80. data/lib/ruby_llm/providers/vertexai/embeddings.rb +32 -0
  81. data/lib/ruby_llm/providers/vertexai/models.rb +130 -0
  82. data/lib/ruby_llm/providers/vertexai/streaming.rb +14 -0
  83. data/lib/ruby_llm/providers/vertexai.rb +55 -0
  84. data/lib/ruby_llm/railtie.rb +0 -1
  85. data/lib/ruby_llm/stream_accumulator.rb +72 -10
  86. data/lib/ruby_llm/streaming.rb +16 -25
  87. data/lib/ruby_llm/tool.rb +2 -19
  88. data/lib/ruby_llm/tool_call.rb +0 -9
  89. data/lib/ruby_llm/version.rb +1 -1
  90. data/lib/ruby_llm_community.rb +5 -3
  91. data/lib/tasks/models.rake +525 -0
  92. data/lib/tasks/release.rake +37 -2
  93. data/lib/tasks/vcr.rake +0 -7
  94. metadata +13 -4
  95. data/lib/tasks/aliases.rake +0 -235
  96. data/lib/tasks/models_docs.rake +0 -224
  97. data/lib/tasks/models_update.rake +0 -108
@@ -33,31 +33,37 @@
33
33
  },
34
34
  {
35
35
  "id": "claude-3-5-sonnet-20240620",
36
- "name": "Claude Sonnet 3.5",
36
+ "name": "Claude Sonnet 3.5 (Old)",
37
37
  "provider": "anthropic",
38
38
  "family": "claude-3-5-sonnet",
39
- "created_at": null,
39
+ "created_at": "2024-06-20 00:00:00 UTC",
40
40
  "context_window": 200000,
41
41
  "max_output_tokens": 8192,
42
42
  "knowledge_cutoff": null,
43
43
  "modalities": {
44
44
  "input": [
45
+ "text",
45
46
  "image",
46
- "text"
47
+ "pdf"
47
48
  ],
48
49
  "output": [
49
50
  "text"
50
51
  ]
51
52
  },
52
53
  "capabilities": [
53
- "function_calling"
54
+ "streaming",
55
+ "function_calling",
56
+ "batch"
54
57
  ],
55
58
  "pricing": {
56
59
  "text_tokens": {
57
60
  "standard": {
58
61
  "input_per_million": 3.0,
59
- "cached_input_per_million": 3.75,
60
62
  "output_per_million": 15.0
63
+ },
64
+ "batch": {
65
+ "input_per_million": 1.5,
66
+ "output_per_million": 7.5
61
67
  }
62
68
  }
63
69
  },
@@ -65,31 +71,37 @@
65
71
  },
66
72
  {
67
73
  "id": "claude-3-5-sonnet-20241022",
68
- "name": "Claude Sonnet 3.5",
74
+ "name": "Claude Sonnet 3.5 (New)",
69
75
  "provider": "anthropic",
70
76
  "family": "claude-3-5-sonnet",
71
- "created_at": null,
77
+ "created_at": "2024-10-22 00:00:00 UTC",
72
78
  "context_window": 200000,
73
79
  "max_output_tokens": 8192,
74
80
  "knowledge_cutoff": null,
75
81
  "modalities": {
76
82
  "input": [
83
+ "text",
77
84
  "image",
78
- "text"
85
+ "pdf"
79
86
  ],
80
87
  "output": [
81
88
  "text"
82
89
  ]
83
90
  },
84
91
  "capabilities": [
85
- "function_calling"
92
+ "streaming",
93
+ "function_calling",
94
+ "batch"
86
95
  ],
87
96
  "pricing": {
88
97
  "text_tokens": {
89
98
  "standard": {
90
99
  "input_per_million": 3.0,
91
- "cached_input_per_million": 3.75,
92
100
  "output_per_million": 15.0
101
+ },
102
+ "batch": {
103
+ "input_per_million": 1.5,
104
+ "output_per_million": 7.5
93
105
  }
94
106
  }
95
107
  },
@@ -1838,7 +1850,7 @@
1838
1850
  "provider": "deepseek",
1839
1851
  "family": "deepseek-chat",
1840
1852
  "created_at": null,
1841
- "context_window": 64000,
1853
+ "context_window": 128000,
1842
1854
  "max_output_tokens": 8000,
1843
1855
  "knowledge_cutoff": null,
1844
1856
  "modalities": {
@@ -1873,7 +1885,7 @@
1873
1885
  "provider": "deepseek",
1874
1886
  "family": "deepseek-reasoner",
1875
1887
  "created_at": null,
1876
- "context_window": 64000,
1888
+ "context_window": null,
1877
1889
  "max_output_tokens": 64000,
1878
1890
  "knowledge_cutoff": null,
1879
1891
  "modalities": {
@@ -3171,6 +3183,53 @@
3171
3183
  },
3172
3184
  "metadata": {}
3173
3185
  },
3186
+ {
3187
+ "id": "gemini-2.5-flash-image-preview",
3188
+ "name": "Gemini 2.5 Flash Image Preview",
3189
+ "provider": "gemini",
3190
+ "family": "other",
3191
+ "created_at": null,
3192
+ "context_window": 32768,
3193
+ "max_output_tokens": 8192,
3194
+ "knowledge_cutoff": null,
3195
+ "modalities": {
3196
+ "input": [
3197
+ "text",
3198
+ "image",
3199
+ "pdf"
3200
+ ],
3201
+ "output": [
3202
+ "text"
3203
+ ]
3204
+ },
3205
+ "capabilities": [
3206
+ "streaming",
3207
+ "function_calling",
3208
+ "structured_output",
3209
+ "batch",
3210
+ "caching"
3211
+ ],
3212
+ "pricing": {
3213
+ "text_tokens": {
3214
+ "standard": {
3215
+ "input_per_million": 0.075,
3216
+ "output_per_million": 0.3
3217
+ },
3218
+ "batch": {
3219
+ "input_per_million": 0.0375,
3220
+ "output_per_million": 0.15
3221
+ }
3222
+ }
3223
+ },
3224
+ "metadata": {
3225
+ "version": "2.0",
3226
+ "description": "Gemini 2.5 Flash Preview Image",
3227
+ "supported_generation_methods": [
3228
+ "generateContent",
3229
+ "countTokens"
3230
+ ]
3231
+ }
3232
+ },
3174
3233
  {
3175
3234
  "id": "gemini-2.5-flash-lite",
3176
3235
  "name": "Gemini 2.5 Flash-Lite",
@@ -3210,7 +3269,7 @@
3210
3269
  },
3211
3270
  "metadata": {
3212
3271
  "version": "001",
3213
- "description": "Stable verion of Gemini 2.5 Flash-Lite, released in July of 2025",
3272
+ "description": "Stable version of Gemini 2.5 Flash-Lite, released in July of 2025",
3214
3273
  "supported_generation_methods": [
3215
3274
  "generateContent",
3216
3275
  "countTokens",
@@ -3388,7 +3447,7 @@
3388
3447
  },
3389
3448
  {
3390
3449
  "id": "gemini-2.5-flash-preview-tts",
3391
- "name": "Gemini 2.5 Flash Preview TTS",
3450
+ "name": "Gemini 2.5 Flash Preview Text-to-Speech",
3392
3451
  "provider": "gemini",
3393
3452
  "family": "gemini-2.5-flash-preview-tts",
3394
3453
  "created_at": null,
@@ -3616,7 +3675,7 @@
3616
3675
  },
3617
3676
  {
3618
3677
  "id": "gemini-2.5-pro-preview-tts",
3619
- "name": "Gemini 2.5 Pro Preview TTS",
3678
+ "name": "Gemini 2.5 Pro Preview Text-to-Speech",
3620
3679
  "provider": "gemini",
3621
3680
  "family": "gemini-2.5-pro-preview-tts",
3622
3681
  "created_at": null,
@@ -4127,7 +4186,7 @@
4127
4186
  },
4128
4187
  {
4129
4188
  "id": "imagen-3.0-generate-002",
4130
- "name": "Imagen 3.0 002 model",
4189
+ "name": "Imagen 3.0",
4131
4190
  "provider": "gemini",
4132
4191
  "family": "imagen3",
4133
4192
  "created_at": null,
@@ -4302,46 +4361,6 @@
4302
4361
  ]
4303
4362
  }
4304
4363
  },
4305
- {
4306
- "id": "veo-2.0-generate-001",
4307
- "name": "Veo 2",
4308
- "provider": "gemini",
4309
- "family": "other",
4310
- "created_at": null,
4311
- "context_window": 480,
4312
- "max_output_tokens": 8192,
4313
- "knowledge_cutoff": null,
4314
- "modalities": {
4315
- "input": [
4316
- "text"
4317
- ],
4318
- "output": [
4319
- "text"
4320
- ]
4321
- },
4322
- "capabilities": [
4323
- "streaming"
4324
- ],
4325
- "pricing": {
4326
- "text_tokens": {
4327
- "standard": {
4328
- "input_per_million": 0.075,
4329
- "output_per_million": 0.3
4330
- },
4331
- "batch": {
4332
- "input_per_million": 0.0375,
4333
- "output_per_million": 0.15
4334
- }
4335
- }
4336
- },
4337
- "metadata": {
4338
- "version": "2.0",
4339
- "description": "Vertex served Veo 2 model. Access to this model requires billing to be enabled on the associated Google Cloud Platform account. Please visit https://console.cloud.google.com/billing to enable it.",
4340
- "supported_generation_methods": [
4341
- "predictLongRunning"
4342
- ]
4343
- }
4344
- },
4345
4364
  {
4346
4365
  "id": "codestral-2411-rc5",
4347
4366
  "name": "Codestral",
@@ -6700,29 +6719,31 @@
6700
6719
  "id": "gpt-4",
6701
6720
  "name": "GPT-4",
6702
6721
  "provider": "openai",
6703
- "family": "gpt4",
6704
- "created_at": "2023-06-27 18:13:31 +0200",
6722
+ "family": "gpt-4",
6723
+ "created_at": null,
6705
6724
  "context_window": 8192,
6706
6725
  "max_output_tokens": 8192,
6707
6726
  "knowledge_cutoff": null,
6708
6727
  "modalities": {
6709
6728
  "input": [
6710
- "text",
6711
- "image",
6712
- "pdf"
6729
+ "text"
6713
6730
  ],
6714
6731
  "output": [
6732
+ "embeddings",
6715
6733
  "text"
6716
6734
  ]
6717
6735
  },
6718
6736
  "capabilities": [
6719
- "streaming",
6720
- "function_calling"
6737
+ "batch"
6721
6738
  ],
6722
6739
  "pricing": {
6723
6740
  "text_tokens": {
6724
6741
  "standard": {
6725
- "input_per_million": 10.0,
6742
+ "input_per_million": 30.0,
6743
+ "output_per_million": 60.0
6744
+ },
6745
+ "batch": {
6746
+ "input_per_million": 15.0,
6726
6747
  "output_per_million": 30.0
6727
6748
  }
6728
6749
  }
@@ -6767,29 +6788,34 @@
6767
6788
  },
6768
6789
  {
6769
6790
  "id": "gpt-4-0613",
6770
- "name": "GPT-4 0613",
6791
+ "name": "GPT-4",
6771
6792
  "provider": "openai",
6772
- "family": "other",
6773
- "created_at": "2023-06-12 18:54:56 +0200",
6774
- "context_window": 4096,
6775
- "max_output_tokens": 16384,
6793
+ "family": "gpt-4",
6794
+ "created_at": null,
6795
+ "context_window": 8192,
6796
+ "max_output_tokens": 8192,
6776
6797
  "knowledge_cutoff": null,
6777
6798
  "modalities": {
6778
6799
  "input": [
6779
6800
  "text"
6780
6801
  ],
6781
6802
  "output": [
6803
+ "embeddings",
6782
6804
  "text"
6783
6805
  ]
6784
6806
  },
6785
6807
  "capabilities": [
6786
- "streaming"
6808
+ "batch"
6787
6809
  ],
6788
6810
  "pricing": {
6789
6811
  "text_tokens": {
6790
6812
  "standard": {
6791
- "input_per_million": 0.5,
6792
- "output_per_million": 1.5
6813
+ "input_per_million": 30.0,
6814
+ "output_per_million": 60.0
6815
+ },
6816
+ "batch": {
6817
+ "input_per_million": 15.0,
6818
+ "output_per_million": 30.0
6793
6819
  }
6794
6820
  }
6795
6821
  },
@@ -6915,25 +6941,21 @@
6915
6941
  "id": "gpt-4-turbo-preview",
6916
6942
  "name": "GPT-4 Turbo Preview",
6917
6943
  "provider": "openai",
6918
- "family": "gpt4_turbo",
6919
- "created_at": "2024-01-23 20:22:57 +0100",
6944
+ "family": "gpt-4-turbo-preview",
6945
+ "created_at": null,
6920
6946
  "context_window": 128000,
6921
6947
  "max_output_tokens": 4096,
6922
6948
  "knowledge_cutoff": null,
6923
6949
  "modalities": {
6924
6950
  "input": [
6925
- "text",
6926
- "image",
6927
- "pdf"
6951
+ "text"
6928
6952
  ],
6929
6953
  "output": [
6954
+ "embeddings",
6930
6955
  "text"
6931
6956
  ]
6932
6957
  },
6933
- "capabilities": [
6934
- "streaming",
6935
- "function_calling"
6936
- ],
6958
+ "capabilities": [],
6937
6959
  "pricing": {
6938
6960
  "text_tokens": {
6939
6961
  "standard": {
@@ -6951,23 +6973,23 @@
6951
6973
  "id": "gpt-4.1",
6952
6974
  "name": "GPT-4.1",
6953
6975
  "provider": "openai",
6954
- "family": "gpt41",
6955
- "created_at": "2025-04-10 22:22:22 +0200",
6976
+ "family": "gpt-4.1",
6977
+ "created_at": null,
6956
6978
  "context_window": 1047576,
6957
6979
  "max_output_tokens": 32768,
6958
6980
  "knowledge_cutoff": null,
6959
6981
  "modalities": {
6960
6982
  "input": [
6961
- "text",
6962
6983
  "image",
6963
- "pdf"
6984
+ "text"
6964
6985
  ],
6965
6986
  "output": [
6987
+ "embeddings",
6966
6988
  "text"
6967
6989
  ]
6968
6990
  },
6969
6991
  "capabilities": [
6970
- "streaming",
6992
+ "batch",
6971
6993
  "function_calling",
6972
6994
  "structured_output"
6973
6995
  ],
@@ -6975,8 +6997,12 @@
6975
6997
  "text_tokens": {
6976
6998
  "standard": {
6977
6999
  "input_per_million": 2.0,
6978
- "output_per_million": 8.0,
6979
- "cached_input_per_million": 0.5
7000
+ "cached_input_per_million": 0.5,
7001
+ "output_per_million": 8.0
7002
+ },
7003
+ "batch": {
7004
+ "input_per_million": 1.0,
7005
+ "output_per_million": 4.0
6980
7006
  }
6981
7007
  }
6982
7008
  },
@@ -6987,25 +7013,25 @@
6987
7013
  },
6988
7014
  {
6989
7015
  "id": "gpt-4.1-2025-04-14",
6990
- "name": "GPT-4.1 20250414",
7016
+ "name": "GPT-4.1",
6991
7017
  "provider": "openai",
6992
- "family": "gpt41",
6993
- "created_at": "2025-04-10 22:09:06 +0200",
7018
+ "family": "gpt-4.1",
7019
+ "created_at": null,
6994
7020
  "context_window": 1047576,
6995
7021
  "max_output_tokens": 32768,
6996
7022
  "knowledge_cutoff": null,
6997
7023
  "modalities": {
6998
7024
  "input": [
6999
- "text",
7000
7025
  "image",
7001
- "pdf"
7026
+ "text"
7002
7027
  ],
7003
7028
  "output": [
7029
+ "embeddings",
7004
7030
  "text"
7005
7031
  ]
7006
7032
  },
7007
7033
  "capabilities": [
7008
- "streaming",
7034
+ "batch",
7009
7035
  "function_calling",
7010
7036
  "structured_output"
7011
7037
  ],
@@ -7013,8 +7039,12 @@
7013
7039
  "text_tokens": {
7014
7040
  "standard": {
7015
7041
  "input_per_million": 2.0,
7016
- "output_per_million": 8.0,
7017
- "cached_input_per_million": 0.5
7042
+ "cached_input_per_million": 0.5,
7043
+ "output_per_million": 8.0
7044
+ },
7045
+ "batch": {
7046
+ "input_per_million": 1.0,
7047
+ "output_per_million": 4.0
7018
7048
  }
7019
7049
  }
7020
7050
  },
@@ -8277,9 +8307,7 @@
8277
8307
  "text"
8278
8308
  ]
8279
8309
  },
8280
- "capabilities": [
8281
- "structured_output"
8282
- ],
8310
+ "capabilities": [],
8283
8311
  "pricing": {
8284
8312
  "text_tokens": {
8285
8313
  "standard": {
@@ -8296,30 +8324,38 @@
8296
8324
  },
8297
8325
  {
8298
8326
  "id": "gpt-5-mini",
8299
- "name": "GPT-5 Mini",
8327
+ "name": "GPT-5 mini",
8300
8328
  "provider": "openai",
8301
- "family": "other",
8302
- "created_at": "2025-08-05 22:32:08 +0200",
8303
- "context_window": 4096,
8304
- "max_output_tokens": 16384,
8329
+ "family": "gpt-5-mini",
8330
+ "created_at": null,
8331
+ "context_window": 400000,
8332
+ "max_output_tokens": 128000,
8305
8333
  "knowledge_cutoff": null,
8306
8334
  "modalities": {
8307
8335
  "input": [
8336
+ "image",
8308
8337
  "text"
8309
8338
  ],
8310
8339
  "output": [
8340
+ "embeddings",
8311
8341
  "text"
8312
8342
  ]
8313
8343
  },
8314
8344
  "capabilities": [
8315
- "streaming",
8316
- "reasoning"
8345
+ "batch",
8346
+ "function_calling",
8347
+ "structured_output"
8317
8348
  ],
8318
8349
  "pricing": {
8319
8350
  "text_tokens": {
8320
8351
  "standard": {
8321
- "input_per_million": 0.5,
8322
- "output_per_million": 1.5
8352
+ "input_per_million": 0.25,
8353
+ "cached_input_per_million": 0.025,
8354
+ "output_per_million": 2.0
8355
+ },
8356
+ "batch": {
8357
+ "input_per_million": 0.125,
8358
+ "output_per_million": 1.0
8323
8359
  }
8324
8360
  }
8325
8361
  },
@@ -8330,30 +8366,38 @@
8330
8366
  },
8331
8367
  {
8332
8368
  "id": "gpt-5-mini-2025-08-07",
8333
- "name": "GPT-5 Mini 20250807",
8369
+ "name": "GPT-5 mini",
8334
8370
  "provider": "openai",
8335
- "family": "other",
8336
- "created_at": "2025-08-05 22:31:07 +0200",
8337
- "context_window": 4096,
8338
- "max_output_tokens": 16384,
8371
+ "family": "gpt-5-mini",
8372
+ "created_at": null,
8373
+ "context_window": 400000,
8374
+ "max_output_tokens": 128000,
8339
8375
  "knowledge_cutoff": null,
8340
8376
  "modalities": {
8341
8377
  "input": [
8378
+ "image",
8342
8379
  "text"
8343
8380
  ],
8344
8381
  "output": [
8382
+ "embeddings",
8345
8383
  "text"
8346
8384
  ]
8347
8385
  },
8348
8386
  "capabilities": [
8349
- "streaming",
8350
- "reasoning"
8387
+ "batch",
8388
+ "function_calling",
8389
+ "structured_output"
8351
8390
  ],
8352
8391
  "pricing": {
8353
8392
  "text_tokens": {
8354
8393
  "standard": {
8355
- "input_per_million": 0.5,
8356
- "output_per_million": 1.5
8394
+ "input_per_million": 0.25,
8395
+ "cached_input_per_million": 0.025,
8396
+ "output_per_million": 2.0
8397
+ },
8398
+ "batch": {
8399
+ "input_per_million": 0.125,
8400
+ "output_per_million": 1.0
8357
8401
  }
8358
8402
  }
8359
8403
  },
@@ -8364,30 +8408,38 @@
8364
8408
  },
8365
8409
  {
8366
8410
  "id": "gpt-5-nano",
8367
- "name": "GPT-5 Nano",
8411
+ "name": "GPT-5 nano",
8368
8412
  "provider": "openai",
8369
- "family": "other",
8370
- "created_at": "2025-08-05 22:39:44 +0200",
8371
- "context_window": 4096,
8372
- "max_output_tokens": 16384,
8413
+ "family": "gpt-5-nano",
8414
+ "created_at": null,
8415
+ "context_window": 400000,
8416
+ "max_output_tokens": 128000,
8373
8417
  "knowledge_cutoff": null,
8374
8418
  "modalities": {
8375
8419
  "input": [
8420
+ "image",
8376
8421
  "text"
8377
8422
  ],
8378
8423
  "output": [
8424
+ "embeddings",
8379
8425
  "text"
8380
8426
  ]
8381
8427
  },
8382
8428
  "capabilities": [
8383
- "streaming",
8384
- "reasoning"
8429
+ "batch",
8430
+ "function_calling",
8431
+ "structured_output"
8385
8432
  ],
8386
8433
  "pricing": {
8387
8434
  "text_tokens": {
8388
8435
  "standard": {
8389
- "input_per_million": 0.5,
8390
- "output_per_million": 1.5
8436
+ "input_per_million": 0.05,
8437
+ "cached_input_per_million": 0.005,
8438
+ "output_per_million": 0.4
8439
+ },
8440
+ "batch": {
8441
+ "input_per_million": 0.025,
8442
+ "output_per_million": 0.2
8391
8443
  }
8392
8444
  }
8393
8445
  },
@@ -8398,30 +8450,38 @@
8398
8450
  },
8399
8451
  {
8400
8452
  "id": "gpt-5-nano-2025-08-07",
8401
- "name": "GPT-5 Nano 20250807",
8453
+ "name": "GPT-5 nano",
8402
8454
  "provider": "openai",
8403
- "family": "other",
8404
- "created_at": "2025-08-05 22:38:23 +0200",
8405
- "context_window": 4096,
8406
- "max_output_tokens": 16384,
8455
+ "family": "gpt-5-nano",
8456
+ "created_at": null,
8457
+ "context_window": 400000,
8458
+ "max_output_tokens": 128000,
8407
8459
  "knowledge_cutoff": null,
8408
8460
  "modalities": {
8409
8461
  "input": [
8462
+ "image",
8410
8463
  "text"
8411
8464
  ],
8412
8465
  "output": [
8466
+ "embeddings",
8413
8467
  "text"
8414
8468
  ]
8415
8469
  },
8416
8470
  "capabilities": [
8417
- "streaming",
8418
- "reasoning"
8471
+ "batch",
8472
+ "function_calling",
8473
+ "structured_output"
8419
8474
  ],
8420
8475
  "pricing": {
8421
8476
  "text_tokens": {
8422
8477
  "standard": {
8423
- "input_per_million": 0.5,
8424
- "output_per_million": 1.5
8478
+ "input_per_million": 0.05,
8479
+ "cached_input_per_million": 0.005,
8480
+ "output_per_million": 0.4
8481
+ },
8482
+ "batch": {
8483
+ "input_per_million": 0.025,
8484
+ "output_per_million": 0.2
8425
8485
  }
8426
8486
  }
8427
8487
  },
@@ -8490,36 +8550,66 @@
8490
8550
  "pricing": {},
8491
8551
  "metadata": {}
8492
8552
  },
8553
+ {
8554
+ "id": "gpt-oss-20b",
8555
+ "name": "gpt-oss-20b",
8556
+ "provider": "openai",
8557
+ "family": "gpt-oss-20b",
8558
+ "created_at": null,
8559
+ "context_window": 131072,
8560
+ "max_output_tokens": 131072,
8561
+ "knowledge_cutoff": null,
8562
+ "modalities": {
8563
+ "input": [
8564
+ "text"
8565
+ ],
8566
+ "output": [
8567
+ "embeddings",
8568
+ "text"
8569
+ ]
8570
+ },
8571
+ "capabilities": [
8572
+ "batch",
8573
+ "function_calling",
8574
+ "structured_output"
8575
+ ],
8576
+ "pricing": {},
8577
+ "metadata": {}
8578
+ },
8493
8579
  {
8494
8580
  "id": "o1",
8495
- "name": "O1",
8581
+ "name": "o1",
8496
8582
  "provider": "openai",
8497
8583
  "family": "o1",
8498
- "created_at": "2024-12-16 20:03:36 +0100",
8584
+ "created_at": null,
8499
8585
  "context_window": 200000,
8500
8586
  "max_output_tokens": 100000,
8501
8587
  "knowledge_cutoff": null,
8502
8588
  "modalities": {
8503
8589
  "input": [
8504
- "text",
8505
8590
  "image",
8506
- "pdf"
8591
+ "text"
8507
8592
  ],
8508
8593
  "output": [
8594
+ "embeddings",
8509
8595
  "text"
8510
8596
  ]
8511
8597
  },
8512
8598
  "capabilities": [
8513
- "streaming",
8599
+ "batch",
8514
8600
  "function_calling",
8515
- "structured_output",
8516
- "reasoning"
8601
+ "structured_output"
8517
8602
  ],
8518
8603
  "pricing": {
8519
8604
  "text_tokens": {
8520
8605
  "standard": {
8521
8606
  "input_per_million": 15.0,
8607
+ "cached_input_per_million": 7.5,
8522
8608
  "output_per_million": 60.0
8609
+ },
8610
+ "batch": {
8611
+ "input_per_million": 7.5,
8612
+ "output_per_million": 30.0
8523
8613
  }
8524
8614
  }
8525
8615
  },
@@ -8530,34 +8620,38 @@
8530
8620
  },
8531
8621
  {
8532
8622
  "id": "o1-2024-12-17",
8533
- "name": "O1-20241217",
8623
+ "name": "o1",
8534
8624
  "provider": "openai",
8535
8625
  "family": "o1",
8536
- "created_at": "2024-12-16 06:29:36 +0100",
8626
+ "created_at": null,
8537
8627
  "context_window": 200000,
8538
8628
  "max_output_tokens": 100000,
8539
8629
  "knowledge_cutoff": null,
8540
8630
  "modalities": {
8541
8631
  "input": [
8542
- "text",
8543
8632
  "image",
8544
- "pdf"
8633
+ "text"
8545
8634
  ],
8546
8635
  "output": [
8636
+ "embeddings",
8547
8637
  "text"
8548
8638
  ]
8549
8639
  },
8550
8640
  "capabilities": [
8551
- "streaming",
8641
+ "batch",
8552
8642
  "function_calling",
8553
- "structured_output",
8554
- "reasoning"
8643
+ "structured_output"
8555
8644
  ],
8556
8645
  "pricing": {
8557
8646
  "text_tokens": {
8558
8647
  "standard": {
8559
8648
  "input_per_million": 15.0,
8649
+ "cached_input_per_million": 7.5,
8560
8650
  "output_per_million": 60.0
8651
+ },
8652
+ "batch": {
8653
+ "input_per_million": 7.5,
8654
+ "output_per_million": 30.0
8561
8655
  }
8562
8656
  }
8563
8657
  },
@@ -8632,6 +8726,72 @@
8632
8726
  "owned_by": "system"
8633
8727
  }
8634
8728
  },
8729
+ {
8730
+ "id": "o1-preview",
8731
+ "name": "o1 Preview",
8732
+ "provider": "openai",
8733
+ "family": "o1-preview",
8734
+ "created_at": null,
8735
+ "context_window": 128000,
8736
+ "max_output_tokens": 32768,
8737
+ "knowledge_cutoff": null,
8738
+ "modalities": {
8739
+ "input": [
8740
+ "text"
8741
+ ],
8742
+ "output": [
8743
+ "embeddings",
8744
+ "text"
8745
+ ]
8746
+ },
8747
+ "capabilities": [
8748
+ "function_calling",
8749
+ "structured_output"
8750
+ ],
8751
+ "pricing": {
8752
+ "text_tokens": {
8753
+ "standard": {
8754
+ "input_per_million": 15.0,
8755
+ "cached_input_per_million": 7.5,
8756
+ "output_per_million": 60.0
8757
+ }
8758
+ }
8759
+ },
8760
+ "metadata": {}
8761
+ },
8762
+ {
8763
+ "id": "o1-preview-2024-09-12",
8764
+ "name": "o1 Preview",
8765
+ "provider": "openai",
8766
+ "family": "o1-preview",
8767
+ "created_at": null,
8768
+ "context_window": 128000,
8769
+ "max_output_tokens": 32768,
8770
+ "knowledge_cutoff": null,
8771
+ "modalities": {
8772
+ "input": [
8773
+ "text"
8774
+ ],
8775
+ "output": [
8776
+ "embeddings",
8777
+ "text"
8778
+ ]
8779
+ },
8780
+ "capabilities": [
8781
+ "function_calling",
8782
+ "structured_output"
8783
+ ],
8784
+ "pricing": {
8785
+ "text_tokens": {
8786
+ "standard": {
8787
+ "input_per_million": 15.0,
8788
+ "cached_input_per_million": 7.5,
8789
+ "output_per_million": 60.0
8790
+ }
8791
+ }
8792
+ },
8793
+ "metadata": {}
8794
+ },
8635
8795
  {
8636
8796
  "id": "o1-pro",
8637
8797
  "name": "o1-pro",
@@ -9235,22 +9395,21 @@
9235
9395
  },
9236
9396
  {
9237
9397
  "id": "omni-moderation-latest",
9238
- "name": "Omni Moderation Latest",
9398
+ "name": "omni-moderation",
9239
9399
  "provider": "openai",
9240
- "family": "moderation",
9241
- "created_at": "2024-11-15 17:47:45 +0100",
9400
+ "family": "omni-moderation-latest",
9401
+ "created_at": null,
9242
9402
  "context_window": null,
9243
9403
  "max_output_tokens": null,
9244
9404
  "knowledge_cutoff": null,
9245
9405
  "modalities": {
9246
9406
  "input": [
9247
- "text",
9248
9407
  "image",
9249
- "pdf"
9408
+ "text"
9250
9409
  ],
9251
9410
  "output": [
9252
- "text",
9253
- "moderation"
9411
+ "embeddings",
9412
+ "text"
9254
9413
  ]
9255
9414
  },
9256
9415
  "capabilities": [],
@@ -9262,10 +9421,10 @@
9262
9421
  },
9263
9422
  {
9264
9423
  "id": "text-embedding-3-large",
9265
- "name": "text-embedding- 3 Large",
9424
+ "name": "text-embedding-3-large",
9266
9425
  "provider": "openai",
9267
- "family": "embedding3_large",
9268
- "created_at": "2024-01-22 20:53:00 +0100",
9426
+ "family": "text-embedding-3-large",
9427
+ "created_at": null,
9269
9428
  "context_window": null,
9270
9429
  "max_output_tokens": null,
9271
9430
  "knowledge_cutoff": null,
@@ -9274,8 +9433,8 @@
9274
9433
  "text"
9275
9434
  ],
9276
9435
  "output": [
9277
- "text",
9278
- "embeddings"
9436
+ "embeddings",
9437
+ "text"
9279
9438
  ]
9280
9439
  },
9281
9440
  "capabilities": [
@@ -9284,12 +9443,18 @@
9284
9443
  "pricing": {
9285
9444
  "text_tokens": {
9286
9445
  "standard": {
9287
- "input_per_million": 0.13,
9288
- "output_per_million": 0.13
9446
+ "input_per_million": 0.13
9447
+ },
9448
+ "batch": {
9449
+ "input_per_million": 0.065
9450
+ }
9451
+ },
9452
+ "embeddings": {
9453
+ "standard": {
9454
+ "input_per_million": 0.13
9289
9455
  },
9290
9456
  "batch": {
9291
- "input_per_million": 0.065,
9292
- "output_per_million": 0.065
9457
+ "input_per_million": 0.065
9293
9458
  }
9294
9459
  }
9295
9460
  },
@@ -9344,10 +9509,10 @@
9344
9509
  },
9345
9510
  {
9346
9511
  "id": "text-embedding-ada-002",
9347
- "name": "text-embedding- Ada 002",
9512
+ "name": "text-embedding-ada-002",
9348
9513
  "provider": "openai",
9349
- "family": "embedding_ada",
9350
- "created_at": "2022-12-16 20:01:39 +0100",
9514
+ "family": "text-embedding-ada-002",
9515
+ "created_at": null,
9351
9516
  "context_window": null,
9352
9517
  "max_output_tokens": null,
9353
9518
  "knowledge_cutoff": null,
@@ -9356,8 +9521,8 @@
9356
9521
  "text"
9357
9522
  ],
9358
9523
  "output": [
9359
- "text",
9360
- "embeddings"
9524
+ "embeddings",
9525
+ "text"
9361
9526
  ]
9362
9527
  },
9363
9528
  "capabilities": [
@@ -9366,12 +9531,18 @@
9366
9531
  "pricing": {
9367
9532
  "text_tokens": {
9368
9533
  "standard": {
9369
- "input_per_million": 0.1,
9370
- "output_per_million": 0.1
9534
+ "input_per_million": 0.1
9371
9535
  },
9372
9536
  "batch": {
9373
- "input_per_million": 0.05,
9374
- "output_per_million": 0.05
9537
+ "input_per_million": 0.05
9538
+ }
9539
+ },
9540
+ "embeddings": {
9541
+ "standard": {
9542
+ "input_per_million": 0.1
9543
+ },
9544
+ "batch": {
9545
+ "input_per_million": 0.05
9375
9546
  }
9376
9547
  }
9377
9548
  },
@@ -9428,29 +9599,25 @@
9428
9599
  "id": "tts-1",
9429
9600
  "name": "TTS-1",
9430
9601
  "provider": "openai",
9431
- "family": "tts1",
9432
- "created_at": "2023-04-19 23:49:11 +0200",
9602
+ "family": "tts-1",
9603
+ "created_at": null,
9433
9604
  "context_window": null,
9434
9605
  "max_output_tokens": null,
9435
9606
  "knowledge_cutoff": null,
9436
9607
  "modalities": {
9437
9608
  "input": [
9438
- "text",
9439
- "audio"
9609
+ "text"
9440
9610
  ],
9441
9611
  "output": [
9442
- "text",
9443
- "audio"
9612
+ "audio",
9613
+ "embeddings"
9444
9614
  ]
9445
9615
  },
9446
- "capabilities": [
9447
- "streaming"
9448
- ],
9616
+ "capabilities": [],
9449
9617
  "pricing": {
9450
9618
  "text_tokens": {
9451
9619
  "standard": {
9452
- "input_per_million": 15.0,
9453
- "output_per_million": 15.0
9620
+ "input_per_million": 15.0
9454
9621
  }
9455
9622
  }
9456
9623
  },
@@ -10091,8 +10258,8 @@
10091
10258
  "pricing": {
10092
10259
  "text_tokens": {
10093
10260
  "standard": {
10094
- "input_per_million": 9.0,
10095
- "output_per_million": 11.0
10261
+ "input_per_million": 4.0,
10262
+ "output_per_million": 5.5
10096
10263
  }
10097
10264
  }
10098
10265
  },
@@ -10381,7 +10548,7 @@
10381
10548
  "family": "anthracite-org",
10382
10549
  "created_at": "2024-10-22 02:00:00 +0200",
10383
10550
  "context_window": 16384,
10384
- "max_output_tokens": 1024,
10551
+ "max_output_tokens": 2048,
10385
10552
  "knowledge_cutoff": null,
10386
10553
  "modalities": {
10387
10554
  "input": [
@@ -10398,8 +10565,8 @@
10398
10565
  "pricing": {
10399
10566
  "text_tokens": {
10400
10567
  "standard": {
10401
- "input_per_million": 2.5,
10402
- "output_per_million": 3.0
10568
+ "input_per_million": 2.0,
10569
+ "output_per_million": 5.0
10403
10570
  }
10404
10571
  }
10405
10572
  },
@@ -10418,7 +10585,7 @@
10418
10585
  },
10419
10586
  "top_provider": {
10420
10587
  "context_length": 16384,
10421
- "max_completion_tokens": 1024,
10588
+ "max_completion_tokens": 2048,
10422
10589
  "is_moderated": false
10423
10590
  },
10424
10591
  "per_request_limits": null,
@@ -10882,70 +11049,6 @@
10882
11049
  ]
10883
11050
  }
10884
11051
  },
10885
- {
10886
- "id": "anthropic/claude-3.7-sonnet:beta",
10887
- "name": "Anthropic: Claude 3.7 Sonnet (self-moderated)",
10888
- "provider": "openrouter",
10889
- "family": "anthropic",
10890
- "created_at": "2025-02-24 19:35:10 +0100",
10891
- "context_window": 200000,
10892
- "max_output_tokens": 128000,
10893
- "knowledge_cutoff": null,
10894
- "modalities": {
10895
- "input": [
10896
- "text",
10897
- "image",
10898
- "file"
10899
- ],
10900
- "output": [
10901
- "text"
10902
- ]
10903
- },
10904
- "capabilities": [
10905
- "streaming",
10906
- "function_calling"
10907
- ],
10908
- "pricing": {
10909
- "text_tokens": {
10910
- "standard": {
10911
- "input_per_million": 3.0,
10912
- "output_per_million": 15.0,
10913
- "cached_input_per_million": 0.3
10914
- }
10915
- }
10916
- },
10917
- "metadata": {
10918
- "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
10919
- "architecture": {
10920
- "modality": "text+image->text",
10921
- "input_modalities": [
10922
- "text",
10923
- "image",
10924
- "file"
10925
- ],
10926
- "output_modalities": [
10927
- "text"
10928
- ],
10929
- "tokenizer": "Claude",
10930
- "instruct_type": null
10931
- },
10932
- "top_provider": {
10933
- "context_length": 200000,
10934
- "max_completion_tokens": 128000,
10935
- "is_moderated": false
10936
- },
10937
- "per_request_limits": null,
10938
- "supported_parameters": [
10939
- "include_reasoning",
10940
- "max_tokens",
10941
- "reasoning",
10942
- "stop",
10943
- "temperature",
10944
- "tool_choice",
10945
- "tools"
10946
- ]
10947
- }
10948
- },
10949
11052
  {
10950
11053
  "id": "anthropic/claude-3.7-sonnet:thinking",
10951
11054
  "name": "Anthropic: Claude 3.7 Sonnet (thinking)",
@@ -11587,13 +11690,13 @@
11587
11690
  }
11588
11691
  },
11589
11692
  {
11590
- "id": "baidu/ernie-4.5-300b-a47b",
11591
- "name": "Baidu: ERNIE 4.5 300B A47B ",
11693
+ "id": "baidu/ernie-4.5-21b-a3b",
11694
+ "name": "Baidu: ERNIE 4.5 21B A3B",
11592
11695
  "provider": "openrouter",
11593
11696
  "family": "baidu",
11594
- "created_at": "2025-06-30 18:15:39 +0200",
11595
- "context_window": 123000,
11596
- "max_output_tokens": 12000,
11697
+ "created_at": "2025-08-12 23:29:27 +0200",
11698
+ "context_window": 120000,
11699
+ "max_output_tokens": 8000,
11597
11700
  "knowledge_cutoff": null,
11598
11701
  "modalities": {
11599
11702
  "input": [
@@ -11610,13 +11713,13 @@
11610
11713
  "pricing": {
11611
11714
  "text_tokens": {
11612
11715
  "standard": {
11613
- "input_per_million": 0.28,
11614
- "output_per_million": 1.1
11716
+ "input_per_million": 0.07,
11717
+ "output_per_million": 0.28
11615
11718
  }
11616
11719
  }
11617
11720
  },
11618
11721
  "metadata": {
11619
- "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.",
11722
+ "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.",
11620
11723
  "architecture": {
11621
11724
  "modality": "text->text",
11622
11725
  "input_modalities": [
@@ -11629,8 +11732,8 @@
11629
11732
  "instruct_type": null
11630
11733
  },
11631
11734
  "top_provider": {
11632
- "context_length": 123000,
11633
- "max_completion_tokens": 12000,
11735
+ "context_length": 120000,
11736
+ "max_completion_tokens": 8000,
11634
11737
  "is_moderated": false
11635
11738
  },
11636
11739
  "per_request_limits": null,
@@ -11650,7 +11753,204 @@
11650
11753
  }
11651
11754
  },
11652
11755
  {
11653
- "id": "bytedance/ui-tars-1.5-7b",
11756
+ "id": "baidu/ernie-4.5-300b-a47b",
11757
+ "name": "Baidu: ERNIE 4.5 300B A47B ",
11758
+ "provider": "openrouter",
11759
+ "family": "baidu",
11760
+ "created_at": "2025-06-30 18:15:39 +0200",
11761
+ "context_window": 123000,
11762
+ "max_output_tokens": 12000,
11763
+ "knowledge_cutoff": null,
11764
+ "modalities": {
11765
+ "input": [
11766
+ "text"
11767
+ ],
11768
+ "output": [
11769
+ "text"
11770
+ ]
11771
+ },
11772
+ "capabilities": [
11773
+ "streaming",
11774
+ "predicted_outputs"
11775
+ ],
11776
+ "pricing": {
11777
+ "text_tokens": {
11778
+ "standard": {
11779
+ "input_per_million": 0.28,
11780
+ "output_per_million": 1.1
11781
+ }
11782
+ }
11783
+ },
11784
+ "metadata": {
11785
+ "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.",
11786
+ "architecture": {
11787
+ "modality": "text->text",
11788
+ "input_modalities": [
11789
+ "text"
11790
+ ],
11791
+ "output_modalities": [
11792
+ "text"
11793
+ ],
11794
+ "tokenizer": "Other",
11795
+ "instruct_type": null
11796
+ },
11797
+ "top_provider": {
11798
+ "context_length": 123000,
11799
+ "max_completion_tokens": 12000,
11800
+ "is_moderated": false
11801
+ },
11802
+ "per_request_limits": null,
11803
+ "supported_parameters": [
11804
+ "frequency_penalty",
11805
+ "logit_bias",
11806
+ "max_tokens",
11807
+ "min_p",
11808
+ "presence_penalty",
11809
+ "repetition_penalty",
11810
+ "seed",
11811
+ "stop",
11812
+ "temperature",
11813
+ "top_k",
11814
+ "top_p"
11815
+ ]
11816
+ }
11817
+ },
11818
+ {
11819
+ "id": "baidu/ernie-4.5-vl-28b-a3b",
11820
+ "name": "Baidu: ERNIE 4.5 VL 28B A3B",
11821
+ "provider": "openrouter",
11822
+ "family": "baidu",
11823
+ "created_at": "2025-08-12 23:07:16 +0200",
11824
+ "context_window": 30000,
11825
+ "max_output_tokens": 8000,
11826
+ "knowledge_cutoff": null,
11827
+ "modalities": {
11828
+ "input": [
11829
+ "text",
11830
+ "image"
11831
+ ],
11832
+ "output": [
11833
+ "text"
11834
+ ]
11835
+ },
11836
+ "capabilities": [
11837
+ "streaming",
11838
+ "predicted_outputs"
11839
+ ],
11840
+ "pricing": {
11841
+ "text_tokens": {
11842
+ "standard": {
11843
+ "input_per_million": 0.14,
11844
+ "output_per_million": 0.56
11845
+ }
11846
+ }
11847
+ },
11848
+ "metadata": {
11849
+ "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.",
11850
+ "architecture": {
11851
+ "modality": "text+image->text",
11852
+ "input_modalities": [
11853
+ "text",
11854
+ "image"
11855
+ ],
11856
+ "output_modalities": [
11857
+ "text"
11858
+ ],
11859
+ "tokenizer": "Other",
11860
+ "instruct_type": null
11861
+ },
11862
+ "top_provider": {
11863
+ "context_length": 30000,
11864
+ "max_completion_tokens": 8000,
11865
+ "is_moderated": false
11866
+ },
11867
+ "per_request_limits": null,
11868
+ "supported_parameters": [
11869
+ "frequency_penalty",
11870
+ "include_reasoning",
11871
+ "logit_bias",
11872
+ "max_tokens",
11873
+ "min_p",
11874
+ "presence_penalty",
11875
+ "reasoning",
11876
+ "repetition_penalty",
11877
+ "seed",
11878
+ "stop",
11879
+ "temperature",
11880
+ "top_k",
11881
+ "top_p"
11882
+ ]
11883
+ }
11884
+ },
11885
+ {
11886
+ "id": "baidu/ernie-4.5-vl-424b-a47b",
11887
+ "name": "Baidu: ERNIE 4.5 VL 424B A47B ",
11888
+ "provider": "openrouter",
11889
+ "family": "baidu",
11890
+ "created_at": "2025-06-30 18:28:23 +0200",
11891
+ "context_window": 123000,
11892
+ "max_output_tokens": 16000,
11893
+ "knowledge_cutoff": null,
11894
+ "modalities": {
11895
+ "input": [
11896
+ "image",
11897
+ "text"
11898
+ ],
11899
+ "output": [
11900
+ "text"
11901
+ ]
11902
+ },
11903
+ "capabilities": [
11904
+ "streaming",
11905
+ "predicted_outputs"
11906
+ ],
11907
+ "pricing": {
11908
+ "text_tokens": {
11909
+ "standard": {
11910
+ "input_per_million": 0.42,
11911
+ "output_per_million": 1.25
11912
+ }
11913
+ }
11914
+ },
11915
+ "metadata": {
11916
+ "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.",
11917
+ "architecture": {
11918
+ "modality": "text+image->text",
11919
+ "input_modalities": [
11920
+ "image",
11921
+ "text"
11922
+ ],
11923
+ "output_modalities": [
11924
+ "text"
11925
+ ],
11926
+ "tokenizer": "Other",
11927
+ "instruct_type": null
11928
+ },
11929
+ "top_provider": {
11930
+ "context_length": 123000,
11931
+ "max_completion_tokens": 16000,
11932
+ "is_moderated": false
11933
+ },
11934
+ "per_request_limits": null,
11935
+ "supported_parameters": [
11936
+ "frequency_penalty",
11937
+ "include_reasoning",
11938
+ "logit_bias",
11939
+ "max_tokens",
11940
+ "min_p",
11941
+ "presence_penalty",
11942
+ "reasoning",
11943
+ "repetition_penalty",
11944
+ "seed",
11945
+ "stop",
11946
+ "temperature",
11947
+ "top_k",
11948
+ "top_p"
11949
+ ]
11950
+ }
11951
+ },
11952
+ {
11953
+ "id": "bytedance/ui-tars-1.5-7b",
11654
11954
  "name": "Bytedance: UI-TARS 7B ",
11655
11955
  "provider": "openrouter",
11656
11956
  "family": "bytedance",
@@ -12675,8 +12975,8 @@
12675
12975
  "pricing": {
12676
12976
  "text_tokens": {
12677
12977
  "standard": {
12678
- "input_per_million": 0.17992692,
12679
- "output_per_million": 0.7200576000000001
12978
+ "input_per_million": 0.1999188,
12979
+ "output_per_million": 0.800064
12680
12980
  }
12681
12981
  }
12682
12982
  },
@@ -12746,8 +13046,8 @@
12746
13046
  "pricing": {
12747
13047
  "text_tokens": {
12748
13048
  "standard": {
12749
- "input_per_million": 0.17992692,
12750
- "output_per_million": 0.7200576000000001
13049
+ "input_per_million": 0.1999188,
13050
+ "output_per_million": 0.800064
12751
13051
  }
12752
13052
  }
12753
13053
  },
@@ -12798,7 +13098,7 @@
12798
13098
  "family": "deepseek",
12799
13099
  "created_at": "2025-03-24 14:59:15 +0100",
12800
13100
  "context_window": 163840,
12801
- "max_output_tokens": 163840,
13101
+ "max_output_tokens": null,
12802
13102
  "knowledge_cutoff": null,
12803
13103
  "modalities": {
12804
13104
  "input": [
@@ -12829,7 +13129,7 @@
12829
13129
  },
12830
13130
  "top_provider": {
12831
13131
  "context_length": 163840,
12832
- "max_completion_tokens": 163840,
13132
+ "max_completion_tokens": null,
12833
13133
  "is_moderated": false
12834
13134
  },
12835
13135
  "per_request_limits": null,
@@ -12853,11 +13153,11 @@
12853
13153
  }
12854
13154
  },
12855
13155
  {
12856
- "id": "deepseek/deepseek-prover-v2",
12857
- "name": "DeepSeek: DeepSeek Prover V2",
13156
+ "id": "deepseek/deepseek-chat-v3.1",
13157
+ "name": "DeepSeek: DeepSeek V3.1",
12858
13158
  "provider": "openrouter",
12859
13159
  "family": "deepseek",
12860
- "created_at": "2025-04-30 13:38:14 +0200",
13160
+ "created_at": "2025-08-21 14:33:48 +0200",
12861
13161
  "context_window": 163840,
12862
13162
  "max_output_tokens": null,
12863
13163
  "knowledge_cutoff": null,
@@ -12871,18 +13171,20 @@
12871
13171
  },
12872
13172
  "capabilities": [
12873
13173
  "streaming",
12874
- "structured_output"
13174
+ "function_calling",
13175
+ "structured_output",
13176
+ "predicted_outputs"
12875
13177
  ],
12876
13178
  "pricing": {
12877
13179
  "text_tokens": {
12878
13180
  "standard": {
12879
- "input_per_million": 0.5,
12880
- "output_per_million": 2.1799999999999997
13181
+ "input_per_million": 0.19999999999999998,
13182
+ "output_per_million": 0.7999999999999999
12881
13183
  }
12882
13184
  }
12883
13185
  },
12884
13186
  "metadata": {
12885
- "description": "DeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics. Likely an upgrade from [DeepSeek-Prover-V1.5](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V1.5-RL) Not much is known about the model yet, as DeepSeek released it on Hugging Face without an announcement or description.",
13187
+ "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.",
12886
13188
  "architecture": {
12887
13189
  "modality": "text->text",
12888
13190
  "input_modalities": [
@@ -12892,7 +13194,7 @@
12892
13194
  "text"
12893
13195
  ],
12894
13196
  "tokenizer": "DeepSeek",
12895
- "instruct_type": null
13197
+ "instruct_type": "deepseek-v3.1"
12896
13198
  },
12897
13199
  "top_provider": {
12898
13200
  "context_length": 163840,
@@ -12902,27 +13204,35 @@
12902
13204
  "per_request_limits": null,
12903
13205
  "supported_parameters": [
12904
13206
  "frequency_penalty",
13207
+ "include_reasoning",
13208
+ "logit_bias",
13209
+ "logprobs",
12905
13210
  "max_tokens",
12906
13211
  "min_p",
12907
13212
  "presence_penalty",
13213
+ "reasoning",
12908
13214
  "repetition_penalty",
12909
13215
  "response_format",
12910
13216
  "seed",
12911
13217
  "stop",
13218
+ "structured_outputs",
12912
13219
  "temperature",
13220
+ "tool_choice",
13221
+ "tools",
12913
13222
  "top_k",
13223
+ "top_logprobs",
12914
13224
  "top_p"
12915
13225
  ]
12916
13226
  }
12917
13227
  },
12918
13228
  {
12919
- "id": "deepseek/deepseek-r1",
12920
- "name": "DeepSeek: R1",
13229
+ "id": "deepseek/deepseek-prover-v2",
13230
+ "name": "DeepSeek: DeepSeek Prover V2",
12921
13231
  "provider": "openrouter",
12922
13232
  "family": "deepseek",
12923
- "created_at": "2025-01-20 14:51:35 +0100",
13233
+ "created_at": "2025-04-30 13:38:14 +0200",
12924
13234
  "context_window": 163840,
12925
- "max_output_tokens": 163840,
13235
+ "max_output_tokens": null,
12926
13236
  "knowledge_cutoff": null,
12927
13237
  "modalities": {
12928
13238
  "input": [
@@ -12934,20 +13244,18 @@
12934
13244
  },
12935
13245
  "capabilities": [
12936
13246
  "streaming",
12937
- "function_calling",
12938
- "structured_output",
12939
- "predicted_outputs"
13247
+ "structured_output"
12940
13248
  ],
12941
13249
  "pricing": {
12942
13250
  "text_tokens": {
12943
13251
  "standard": {
12944
- "input_per_million": 0.39999999999999997,
12945
- "output_per_million": 2.0
13252
+ "input_per_million": 0.5,
13253
+ "output_per_million": 2.1799999999999997
12946
13254
  }
12947
13255
  }
12948
13256
  },
12949
13257
  "metadata": {
12950
- "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!",
13258
+ "description": "DeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics. Likely an upgrade from [DeepSeek-Prover-V1.5](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V1.5-RL) Not much is known about the model yet, as DeepSeek released it on Hugging Face without an announcement or description.",
12951
13259
  "architecture": {
12952
13260
  "modality": "text->text",
12953
13261
  "input_modalities": [
@@ -12957,45 +13265,37 @@
12957
13265
  "text"
12958
13266
  ],
12959
13267
  "tokenizer": "DeepSeek",
12960
- "instruct_type": "deepseek-r1"
13268
+ "instruct_type": null
12961
13269
  },
12962
13270
  "top_provider": {
12963
13271
  "context_length": 163840,
12964
- "max_completion_tokens": 163840,
13272
+ "max_completion_tokens": null,
12965
13273
  "is_moderated": false
12966
13274
  },
12967
13275
  "per_request_limits": null,
12968
13276
  "supported_parameters": [
12969
13277
  "frequency_penalty",
12970
- "include_reasoning",
12971
- "logit_bias",
12972
- "logprobs",
12973
13278
  "max_tokens",
12974
13279
  "min_p",
12975
13280
  "presence_penalty",
12976
- "reasoning",
12977
13281
  "repetition_penalty",
12978
13282
  "response_format",
12979
13283
  "seed",
12980
13284
  "stop",
12981
- "structured_outputs",
12982
13285
  "temperature",
12983
- "tool_choice",
12984
- "tools",
12985
13286
  "top_k",
12986
- "top_logprobs",
12987
13287
  "top_p"
12988
13288
  ]
12989
13289
  }
12990
13290
  },
12991
13291
  {
12992
- "id": "deepseek/deepseek-r1-0528",
12993
- "name": "DeepSeek: R1 0528",
13292
+ "id": "deepseek/deepseek-r1",
13293
+ "name": "DeepSeek: R1",
12994
13294
  "provider": "openrouter",
12995
13295
  "family": "deepseek",
12996
- "created_at": "2025-05-28 19:59:30 +0200",
13296
+ "created_at": "2025-01-20 14:51:35 +0100",
12997
13297
  "context_window": 163840,
12998
- "max_output_tokens": null,
13298
+ "max_output_tokens": 163840,
12999
13299
  "knowledge_cutoff": null,
13000
13300
  "modalities": {
13001
13301
  "input": [
@@ -13014,13 +13314,86 @@
13014
13314
  "pricing": {
13015
13315
  "text_tokens": {
13016
13316
  "standard": {
13017
- "input_per_million": 0.17992692,
13018
- "output_per_million": 0.7200576000000001
13317
+ "input_per_million": 0.39999999999999997,
13318
+ "output_per_million": 2.0
13019
13319
  }
13020
13320
  }
13021
13321
  },
13022
13322
  "metadata": {
13023
- "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.",
13323
+ "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!",
13324
+ "architecture": {
13325
+ "modality": "text->text",
13326
+ "input_modalities": [
13327
+ "text"
13328
+ ],
13329
+ "output_modalities": [
13330
+ "text"
13331
+ ],
13332
+ "tokenizer": "DeepSeek",
13333
+ "instruct_type": "deepseek-r1"
13334
+ },
13335
+ "top_provider": {
13336
+ "context_length": 163840,
13337
+ "max_completion_tokens": 163840,
13338
+ "is_moderated": false
13339
+ },
13340
+ "per_request_limits": null,
13341
+ "supported_parameters": [
13342
+ "frequency_penalty",
13343
+ "include_reasoning",
13344
+ "logit_bias",
13345
+ "logprobs",
13346
+ "max_tokens",
13347
+ "min_p",
13348
+ "presence_penalty",
13349
+ "reasoning",
13350
+ "repetition_penalty",
13351
+ "response_format",
13352
+ "seed",
13353
+ "stop",
13354
+ "structured_outputs",
13355
+ "temperature",
13356
+ "tool_choice",
13357
+ "tools",
13358
+ "top_k",
13359
+ "top_logprobs",
13360
+ "top_p"
13361
+ ]
13362
+ }
13363
+ },
13364
+ {
13365
+ "id": "deepseek/deepseek-r1-0528",
13366
+ "name": "DeepSeek: R1 0528",
13367
+ "provider": "openrouter",
13368
+ "family": "deepseek",
13369
+ "created_at": "2025-05-28 19:59:30 +0200",
13370
+ "context_window": 163840,
13371
+ "max_output_tokens": null,
13372
+ "knowledge_cutoff": null,
13373
+ "modalities": {
13374
+ "input": [
13375
+ "text"
13376
+ ],
13377
+ "output": [
13378
+ "text"
13379
+ ]
13380
+ },
13381
+ "capabilities": [
13382
+ "streaming",
13383
+ "function_calling",
13384
+ "structured_output",
13385
+ "predicted_outputs"
13386
+ ],
13387
+ "pricing": {
13388
+ "text_tokens": {
13389
+ "standard": {
13390
+ "input_per_million": 0.1999188,
13391
+ "output_per_million": 0.800064
13392
+ }
13393
+ }
13394
+ },
13395
+ "metadata": {
13396
+ "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.",
13024
13397
  "architecture": {
13025
13398
  "modality": "text->text",
13026
13399
  "input_modalities": [
@@ -13274,8 +13647,8 @@
13274
13647
  "pricing": {
13275
13648
  "text_tokens": {
13276
13649
  "standard": {
13277
- "input_per_million": 0.0333198,
13278
- "output_per_million": 0.13334400000000002
13650
+ "input_per_million": 0.025915399999999998,
13651
+ "output_per_million": 0.103712
13279
13652
  }
13280
13653
  }
13281
13654
  },
@@ -13311,7 +13684,6 @@
13311
13684
  "response_format",
13312
13685
  "seed",
13313
13686
  "stop",
13314
- "structured_outputs",
13315
13687
  "temperature",
13316
13688
  "tool_choice",
13317
13689
  "tools",
@@ -13751,11 +14123,11 @@
13751
14123
  }
13752
14124
  },
13753
14125
  {
13754
- "id": "deepseek/deepseek-v3-base",
13755
- "name": "DeepSeek: DeepSeek V3 Base",
14126
+ "id": "deepseek/deepseek-v3.1-base",
14127
+ "name": "DeepSeek: DeepSeek V3.1 Base",
13756
14128
  "provider": "openrouter",
13757
14129
  "family": "deepseek",
13758
- "created_at": "2025-03-29 19:13:43 +0100",
14130
+ "created_at": "2025-08-20 23:56:57 +0200",
13759
14131
  "context_window": 163840,
13760
14132
  "max_output_tokens": null,
13761
14133
  "knowledge_cutoff": null,
@@ -13774,13 +14146,13 @@
13774
14146
  "pricing": {
13775
14147
  "text_tokens": {
13776
14148
  "standard": {
13777
- "input_per_million": 0.1999188,
13778
- "output_per_million": 0.800064
14149
+ "input_per_million": 0.19999999999999998,
14150
+ "output_per_million": 0.7999999999999999
13779
14151
  }
13780
14152
  }
13781
14153
  },
13782
14154
  "metadata": {
13783
- "description": "Note that this is a base model mostly meant for testing, you need to provide detailed prompts for the model to return useful responses. \n\nDeepSeek-V3 Base is a 671B parameter open Mixture-of-Experts (MoE) language model with 37B active parameters per forward pass and a context length of 128K tokens. Trained on 14.8T tokens using FP8 mixed precision, it achieves high training efficiency and stability, with strong performance across language, reasoning, math, and coding tasks. \n\nDeepSeek-V3 Base is the pre-trained model behind [DeepSeek V3](/deepseek/deepseek-chat-v3)",
14155
+ "description": "This is a base model, trained only for raw next-token prediction. Unlike instruct/chat models, it has not been fine-tuned to follow user instructions. Prompts need to be written more like training text or examples rather than simple requests (e.g., “Translate the following sentence…” instead of just “Translate this”).\n\nDeepSeek-V3.1 Base is a 671B parameter open Mixture-of-Experts (MoE) language model with 37B active parameters per forward pass and a context length of 128K tokens. Trained on 14.8T tokens using FP8 mixed precision, it achieves high training efficiency and stability, with strong performance across language, reasoning, math, and coding tasks. \n",
13784
14156
  "architecture": {
13785
14157
  "modality": "text->text",
13786
14158
  "input_modalities": [
@@ -13790,7 +14162,7 @@
13790
14162
  "text"
13791
14163
  ],
13792
14164
  "tokenizer": "DeepSeek",
13793
- "instruct_type": null
14165
+ "instruct_type": "none"
13794
14166
  },
13795
14167
  "top_provider": {
13796
14168
  "context_length": 163840,
@@ -13876,60 +14248,6 @@
13876
14248
  ]
13877
14249
  }
13878
14250
  },
13879
- {
13880
- "id": "featherless/qwerky-72b:free",
13881
- "name": "Qrwkv 72B (free)",
13882
- "provider": "openrouter",
13883
- "family": "featherless",
13884
- "created_at": "2025-03-20 15:39:57 +0100",
13885
- "context_window": 32768,
13886
- "max_output_tokens": 4096,
13887
- "knowledge_cutoff": null,
13888
- "modalities": {
13889
- "input": [
13890
- "text"
13891
- ],
13892
- "output": [
13893
- "text"
13894
- ]
13895
- },
13896
- "capabilities": [
13897
- "streaming"
13898
- ],
13899
- "pricing": {},
13900
- "metadata": {
13901
- "description": "Qrwkv-72B is a linear-attention RWKV variant of the Qwen 2.5 72B model, optimized to significantly reduce computational cost at scale. Leveraging linear attention, it achieves substantial inference speedups (>1000x) while retaining competitive accuracy on common benchmarks like ARC, HellaSwag, Lambada, and MMLU. It inherits knowledge and language support from Qwen 2.5, supporting approximately 30 languages, making it suitable for efficient inference in large-context applications.",
13902
- "architecture": {
13903
- "modality": "text->text",
13904
- "input_modalities": [
13905
- "text"
13906
- ],
13907
- "output_modalities": [
13908
- "text"
13909
- ],
13910
- "tokenizer": "Other",
13911
- "instruct_type": null
13912
- },
13913
- "top_provider": {
13914
- "context_length": 32768,
13915
- "max_completion_tokens": 4096,
13916
- "is_moderated": false
13917
- },
13918
- "per_request_limits": null,
13919
- "supported_parameters": [
13920
- "frequency_penalty",
13921
- "max_tokens",
13922
- "min_p",
13923
- "presence_penalty",
13924
- "repetition_penalty",
13925
- "seed",
13926
- "stop",
13927
- "temperature",
13928
- "top_k",
13929
- "top_p"
13930
- ]
13931
- }
13932
- },
13933
14251
  {
13934
14252
  "id": "google/gemini-2.0-flash-001",
13935
14253
  "name": "Google: Gemini 2.0 Flash",
@@ -14068,7 +14386,8 @@
14068
14386
  "input": [
14069
14387
  "text",
14070
14388
  "image",
14071
- "file"
14389
+ "file",
14390
+ "audio"
14072
14391
  ],
14073
14392
  "output": [
14074
14393
  "text"
@@ -14094,7 +14413,8 @@
14094
14413
  "input_modalities": [
14095
14414
  "text",
14096
14415
  "image",
14097
- "file"
14416
+ "file",
14417
+ "audio"
14098
14418
  ],
14099
14419
  "output_modalities": [
14100
14420
  "text"
@@ -14192,6 +14512,123 @@
14192
14512
  ]
14193
14513
  }
14194
14514
  },
14515
+ {
14516
+ "id": "google/gemini-2.5-flash-image-preview",
14517
+ "name": "Google: Gemini 2.5 Flash Image Preview",
14518
+ "provider": "openrouter",
14519
+ "family": "google",
14520
+ "created_at": "2025-08-26 16:36:17 +0200",
14521
+ "context_window": 32768,
14522
+ "max_output_tokens": 8192,
14523
+ "knowledge_cutoff": null,
14524
+ "modalities": {
14525
+ "input": [
14526
+ "image",
14527
+ "text"
14528
+ ],
14529
+ "output": [
14530
+ "image",
14531
+ "text"
14532
+ ]
14533
+ },
14534
+ "capabilities": [
14535
+ "streaming",
14536
+ "structured_output"
14537
+ ],
14538
+ "pricing": {
14539
+ "text_tokens": {
14540
+ "standard": {
14541
+ "input_per_million": 0.3,
14542
+ "output_per_million": 2.5
14543
+ }
14544
+ }
14545
+ },
14546
+ "metadata": {
14547
+ "description": "Gemini 2.5 Flash Image Preview is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
14548
+ "architecture": {
14549
+ "modality": "text+image->text+image",
14550
+ "input_modalities": [
14551
+ "image",
14552
+ "text"
14553
+ ],
14554
+ "output_modalities": [
14555
+ "image",
14556
+ "text"
14557
+ ],
14558
+ "tokenizer": "Gemini",
14559
+ "instruct_type": null
14560
+ },
14561
+ "top_provider": {
14562
+ "context_length": 32768,
14563
+ "max_completion_tokens": 8192,
14564
+ "is_moderated": false
14565
+ },
14566
+ "per_request_limits": null,
14567
+ "supported_parameters": [
14568
+ "max_tokens",
14569
+ "response_format",
14570
+ "seed",
14571
+ "structured_outputs",
14572
+ "temperature",
14573
+ "top_p"
14574
+ ]
14575
+ }
14576
+ },
14577
+ {
14578
+ "id": "google/gemini-2.5-flash-image-preview:free",
14579
+ "name": "Google: Gemini 2.5 Flash Image Preview (free)",
14580
+ "provider": "openrouter",
14581
+ "family": "google",
14582
+ "created_at": "2025-08-26 16:36:17 +0200",
14583
+ "context_window": 32768,
14584
+ "max_output_tokens": 8192,
14585
+ "knowledge_cutoff": null,
14586
+ "modalities": {
14587
+ "input": [
14588
+ "image",
14589
+ "text"
14590
+ ],
14591
+ "output": [
14592
+ "image",
14593
+ "text"
14594
+ ]
14595
+ },
14596
+ "capabilities": [
14597
+ "streaming",
14598
+ "structured_output"
14599
+ ],
14600
+ "pricing": {},
14601
+ "metadata": {
14602
+ "description": "Gemini 2.5 Flash Image Preview is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
14603
+ "architecture": {
14604
+ "modality": "text+image->text+image",
14605
+ "input_modalities": [
14606
+ "image",
14607
+ "text"
14608
+ ],
14609
+ "output_modalities": [
14610
+ "image",
14611
+ "text"
14612
+ ],
14613
+ "tokenizer": "Gemini",
14614
+ "instruct_type": null
14615
+ },
14616
+ "top_provider": {
14617
+ "context_length": 32768,
14618
+ "max_completion_tokens": 8192,
14619
+ "is_moderated": false
14620
+ },
14621
+ "per_request_limits": null,
14622
+ "supported_parameters": [
14623
+ "max_tokens",
14624
+ "response_format",
14625
+ "seed",
14626
+ "structured_outputs",
14627
+ "temperature",
14628
+ "top_p"
14629
+ ]
14630
+ }
14631
+ },
14195
14632
  {
14196
14633
  "id": "google/gemini-2.5-flash-lite",
14197
14634
  "name": "Google: Gemini 2.5 Flash Lite",
@@ -14254,6 +14691,7 @@
14254
14691
  "reasoning",
14255
14692
  "response_format",
14256
14693
  "seed",
14694
+ "stop",
14257
14695
  "structured_outputs",
14258
14696
  "temperature",
14259
14697
  "tool_choice",
@@ -14275,7 +14713,8 @@
14275
14713
  "input": [
14276
14714
  "file",
14277
14715
  "image",
14278
- "text"
14716
+ "text",
14717
+ "audio"
14279
14718
  ],
14280
14719
  "output": [
14281
14720
  "text"
@@ -14302,7 +14741,8 @@
14302
14741
  "input_modalities": [
14303
14742
  "file",
14304
14743
  "image",
14305
- "text"
14744
+ "text",
14745
+ "audio"
14306
14746
  ],
14307
14747
  "output_modalities": [
14308
14748
  "text"
@@ -14344,7 +14784,8 @@
14344
14784
  "input": [
14345
14785
  "file",
14346
14786
  "image",
14347
- "text"
14787
+ "text",
14788
+ "audio"
14348
14789
  ],
14349
14790
  "output": [
14350
14791
  "text"
@@ -14371,7 +14812,8 @@
14371
14812
  "input_modalities": [
14372
14813
  "file",
14373
14814
  "image",
14374
- "text"
14815
+ "text",
14816
+ "audio"
14375
14817
  ],
14376
14818
  "output_modalities": [
14377
14819
  "text"
@@ -14472,7 +14914,8 @@
14472
14914
  "input": [
14473
14915
  "file",
14474
14916
  "image",
14475
- "text"
14917
+ "text",
14918
+ "audio"
14476
14919
  ],
14477
14920
  "output": [
14478
14921
  "text"
@@ -14499,7 +14942,8 @@
14499
14942
  "input_modalities": [
14500
14943
  "file",
14501
14944
  "image",
14502
- "text"
14945
+ "text",
14946
+ "audio"
14503
14947
  ],
14504
14948
  "output_modalities": [
14505
14949
  "text"
@@ -14541,7 +14985,8 @@
14541
14985
  "input": [
14542
14986
  "text",
14543
14987
  "image",
14544
- "file"
14988
+ "file",
14989
+ "audio"
14545
14990
  ],
14546
14991
  "output": [
14547
14992
  "text"
@@ -14568,7 +15013,8 @@
14568
15013
  "input_modalities": [
14569
15014
  "text",
14570
15015
  "image",
14571
- "file"
15016
+ "file",
15017
+ "audio"
14572
15018
  ],
14573
15019
  "output_modalities": [
14574
15020
  "text"
@@ -15062,7 +15508,7 @@
15062
15508
  "provider": "openrouter",
15063
15509
  "family": "google",
15064
15510
  "created_at": "2025-03-13 22:50:25 +0100",
15065
- "context_window": 96000,
15511
+ "context_window": 32768,
15066
15512
  "max_output_tokens": 8192,
15067
15513
  "knowledge_cutoff": null,
15068
15514
  "modalities": {
@@ -15076,7 +15522,6 @@
15076
15522
  },
15077
15523
  "capabilities": [
15078
15524
  "streaming",
15079
- "structured_output",
15080
15525
  "predicted_outputs"
15081
15526
  ],
15082
15527
  "pricing": {},
@@ -15095,7 +15540,7 @@
15095
15540
  "instruct_type": "gemma"
15096
15541
  },
15097
15542
  "top_provider": {
15098
- "context_length": 96000,
15543
+ "context_length": 32768,
15099
15544
  "max_completion_tokens": 8192,
15100
15545
  "is_moderated": false
15101
15546
  },
@@ -15108,10 +15553,8 @@
15108
15553
  "min_p",
15109
15554
  "presence_penalty",
15110
15555
  "repetition_penalty",
15111
- "response_format",
15112
15556
  "seed",
15113
15557
  "stop",
15114
- "structured_outputs",
15115
15558
  "temperature",
15116
15559
  "top_k",
15117
15560
  "top_logprobs",
@@ -15181,6 +15624,7 @@
15181
15624
  "response_format",
15182
15625
  "seed",
15183
15626
  "stop",
15627
+ "structured_outputs",
15184
15628
  "temperature",
15185
15629
  "top_k",
15186
15630
  "top_logprobs",
@@ -15964,73 +16408,6 @@
15964
16408
  ]
15965
16409
  }
15966
16410
  },
15967
- {
15968
- "id": "liquid/lfm-40b",
15969
- "name": "Liquid: LFM 40B MoE",
15970
- "provider": "openrouter",
15971
- "family": "liquid",
15972
- "created_at": "2024-09-30 02:00:00 +0200",
15973
- "context_window": 65536,
15974
- "max_output_tokens": 65536,
15975
- "knowledge_cutoff": null,
15976
- "modalities": {
15977
- "input": [
15978
- "text"
15979
- ],
15980
- "output": [
15981
- "text"
15982
- ]
15983
- },
15984
- "capabilities": [
15985
- "streaming",
15986
- "structured_output",
15987
- "predicted_outputs"
15988
- ],
15989
- "pricing": {
15990
- "text_tokens": {
15991
- "standard": {
15992
- "input_per_million": 0.15,
15993
- "output_per_million": 0.15
15994
- }
15995
- }
15996
- },
15997
- "metadata": {
15998
- "description": "Liquid's 40.3B Mixture of Experts (MoE) model. Liquid Foundation Models (LFMs) are large neural networks built with computational units rooted in dynamic systems.\n\nLFMs are general-purpose AI models that can be used to model any kind of sequential data, including video, audio, text, time series, and signals.\n\nSee the [launch announcement](https://www.liquid.ai/liquid-foundation-models) for benchmarks and more info.",
15999
- "architecture": {
16000
- "modality": "text->text",
16001
- "input_modalities": [
16002
- "text"
16003
- ],
16004
- "output_modalities": [
16005
- "text"
16006
- ],
16007
- "tokenizer": "Other",
16008
- "instruct_type": "chatml"
16009
- },
16010
- "top_provider": {
16011
- "context_length": 65536,
16012
- "max_completion_tokens": 65536,
16013
- "is_moderated": false
16014
- },
16015
- "per_request_limits": null,
16016
- "supported_parameters": [
16017
- "frequency_penalty",
16018
- "logit_bias",
16019
- "logprobs",
16020
- "max_tokens",
16021
- "min_p",
16022
- "presence_penalty",
16023
- "repetition_penalty",
16024
- "response_format",
16025
- "seed",
16026
- "stop",
16027
- "temperature",
16028
- "top_k",
16029
- "top_logprobs",
16030
- "top_p"
16031
- ]
16032
- }
16033
- },
16034
16411
  {
16035
16412
  "id": "liquid/lfm-7b",
16036
16413
  "name": "Liquid: LFM 7B",
@@ -16105,7 +16482,7 @@
16105
16482
  "family": "mancer",
16106
16483
  "created_at": "2023-08-02 02:00:00 +0200",
16107
16484
  "context_window": 8000,
16108
- "max_output_tokens": 1000,
16485
+ "max_output_tokens": 2000,
16109
16486
  "knowledge_cutoff": null,
16110
16487
  "modalities": {
16111
16488
  "input": [
@@ -16122,8 +16499,8 @@
16122
16499
  "pricing": {
16123
16500
  "text_tokens": {
16124
16501
  "standard": {
16125
- "input_per_million": 1.5,
16126
- "output_per_million": 1.5
16502
+ "input_per_million": 1.125,
16503
+ "output_per_million": 1.125
16127
16504
  }
16128
16505
  }
16129
16506
  },
@@ -16142,7 +16519,7 @@
16142
16519
  },
16143
16520
  "top_provider": {
16144
16521
  "context_length": 8000,
16145
- "max_completion_tokens": 1000,
16522
+ "max_completion_tokens": 2000,
16146
16523
  "is_moderated": false
16147
16524
  },
16148
16525
  "per_request_limits": null,
@@ -17143,6 +17520,61 @@
17143
17520
  ]
17144
17521
  }
17145
17522
  },
17523
+ {
17524
+ "id": "meta-llama/llama-3.3-8b-instruct:free",
17525
+ "name": "Meta: Llama 3.3 8B Instruct (free)",
17526
+ "provider": "openrouter",
17527
+ "family": "meta-llama",
17528
+ "created_at": "2025-05-14 15:42:34 +0200",
17529
+ "context_window": 128000,
17530
+ "max_output_tokens": 4028,
17531
+ "knowledge_cutoff": null,
17532
+ "modalities": {
17533
+ "input": [
17534
+ "text"
17535
+ ],
17536
+ "output": [
17537
+ "text"
17538
+ ]
17539
+ },
17540
+ "capabilities": [
17541
+ "streaming",
17542
+ "function_calling",
17543
+ "structured_output"
17544
+ ],
17545
+ "pricing": {},
17546
+ "metadata": {
17547
+ "description": "A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.",
17548
+ "architecture": {
17549
+ "modality": "text->text",
17550
+ "input_modalities": [
17551
+ "text"
17552
+ ],
17553
+ "output_modalities": [
17554
+ "text"
17555
+ ],
17556
+ "tokenizer": "Llama3",
17557
+ "instruct_type": null
17558
+ },
17559
+ "top_provider": {
17560
+ "context_length": 128000,
17561
+ "max_completion_tokens": 4028,
17562
+ "is_moderated": true
17563
+ },
17564
+ "per_request_limits": null,
17565
+ "supported_parameters": [
17566
+ "max_tokens",
17567
+ "repetition_penalty",
17568
+ "response_format",
17569
+ "structured_outputs",
17570
+ "temperature",
17571
+ "tool_choice",
17572
+ "tools",
17573
+ "top_k",
17574
+ "top_p"
17575
+ ]
17576
+ }
17577
+ },
17146
17578
  {
17147
17579
  "id": "meta-llama/llama-4-maverick",
17148
17580
  "name": "Meta: Llama 4 Maverick",
@@ -17216,6 +17648,63 @@
17216
17648
  ]
17217
17649
  }
17218
17650
  },
17651
+ {
17652
+ "id": "meta-llama/llama-4-maverick:free",
17653
+ "name": "Meta: Llama 4 Maverick (free)",
17654
+ "provider": "openrouter",
17655
+ "family": "meta-llama",
17656
+ "created_at": "2025-04-05 21:37:02 +0200",
17657
+ "context_window": 128000,
17658
+ "max_output_tokens": 4028,
17659
+ "knowledge_cutoff": null,
17660
+ "modalities": {
17661
+ "input": [
17662
+ "text",
17663
+ "image"
17664
+ ],
17665
+ "output": [
17666
+ "text"
17667
+ ]
17668
+ },
17669
+ "capabilities": [
17670
+ "streaming",
17671
+ "function_calling",
17672
+ "structured_output"
17673
+ ],
17674
+ "pricing": {},
17675
+ "metadata": {
17676
+ "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.",
17677
+ "architecture": {
17678
+ "modality": "text+image->text",
17679
+ "input_modalities": [
17680
+ "text",
17681
+ "image"
17682
+ ],
17683
+ "output_modalities": [
17684
+ "text"
17685
+ ],
17686
+ "tokenizer": "Llama4",
17687
+ "instruct_type": null
17688
+ },
17689
+ "top_provider": {
17690
+ "context_length": 128000,
17691
+ "max_completion_tokens": 4028,
17692
+ "is_moderated": true
17693
+ },
17694
+ "per_request_limits": null,
17695
+ "supported_parameters": [
17696
+ "max_tokens",
17697
+ "repetition_penalty",
17698
+ "response_format",
17699
+ "structured_outputs",
17700
+ "temperature",
17701
+ "tool_choice",
17702
+ "tools",
17703
+ "top_k",
17704
+ "top_p"
17705
+ ]
17706
+ }
17707
+ },
17219
17708
  {
17220
17709
  "id": "meta-llama/llama-4-scout",
17221
17710
  "name": "Meta: Llama 4 Scout",
@@ -17289,6 +17778,63 @@
17289
17778
  ]
17290
17779
  }
17291
17780
  },
17781
+ {
17782
+ "id": "meta-llama/llama-4-scout:free",
17783
+ "name": "Meta: Llama 4 Scout (free)",
17784
+ "provider": "openrouter",
17785
+ "family": "meta-llama",
17786
+ "created_at": "2025-04-05 21:31:59 +0200",
17787
+ "context_window": 128000,
17788
+ "max_output_tokens": 4028,
17789
+ "knowledge_cutoff": null,
17790
+ "modalities": {
17791
+ "input": [
17792
+ "text",
17793
+ "image"
17794
+ ],
17795
+ "output": [
17796
+ "text"
17797
+ ]
17798
+ },
17799
+ "capabilities": [
17800
+ "streaming",
17801
+ "function_calling",
17802
+ "structured_output"
17803
+ ],
17804
+ "pricing": {},
17805
+ "metadata": {
17806
+ "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.",
17807
+ "architecture": {
17808
+ "modality": "text+image->text",
17809
+ "input_modalities": [
17810
+ "text",
17811
+ "image"
17812
+ ],
17813
+ "output_modalities": [
17814
+ "text"
17815
+ ],
17816
+ "tokenizer": "Llama4",
17817
+ "instruct_type": null
17818
+ },
17819
+ "top_provider": {
17820
+ "context_length": 128000,
17821
+ "max_completion_tokens": 4028,
17822
+ "is_moderated": true
17823
+ },
17824
+ "per_request_limits": null,
17825
+ "supported_parameters": [
17826
+ "max_tokens",
17827
+ "repetition_penalty",
17828
+ "response_format",
17829
+ "structured_outputs",
17830
+ "temperature",
17831
+ "tool_choice",
17832
+ "tools",
17833
+ "top_k",
17834
+ "top_p"
17835
+ ]
17836
+ }
17837
+ },
17292
17838
  {
17293
17839
  "id": "meta-llama/llama-guard-2-8b",
17294
17840
  "name": "Meta: LlamaGuard 2 8B",
@@ -19021,13 +19567,13 @@
19021
19567
  }
19022
19568
  },
19023
19569
  {
19024
- "id": "mistralai/mistral-7b-instruct-v0.2",
19025
- "name": "Mistral: Mistral 7B Instruct v0.2",
19570
+ "id": "mistralai/mistral-7b-instruct-v0.3",
19571
+ "name": "Mistral: Mistral 7B Instruct v0.3",
19026
19572
  "provider": "openrouter",
19027
19573
  "family": "mistralai",
19028
- "created_at": "2023-12-28 01:00:00 +0100",
19574
+ "created_at": "2024-05-27 02:00:00 +0200",
19029
19575
  "context_window": 32768,
19030
- "max_output_tokens": null,
19576
+ "max_output_tokens": 16384,
19031
19577
  "knowledge_cutoff": null,
19032
19578
  "modalities": {
19033
19579
  "input": [
@@ -19039,18 +19585,20 @@
19039
19585
  },
19040
19586
  "capabilities": [
19041
19587
  "streaming",
19588
+ "function_calling",
19589
+ "structured_output",
19042
19590
  "predicted_outputs"
19043
19591
  ],
19044
19592
  "pricing": {
19045
19593
  "text_tokens": {
19046
19594
  "standard": {
19047
- "input_per_million": 0.19999999999999998,
19048
- "output_per_million": 0.19999999999999998
19595
+ "input_per_million": 0.028,
19596
+ "output_per_million": 0.054
19049
19597
  }
19050
19598
  }
19051
19599
  },
19052
19600
  "metadata": {
19053
- "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention",
19601
+ "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.",
19054
19602
  "architecture": {
19055
19603
  "modality": "text->text",
19056
19604
  "input_modalities": [
@@ -19064,78 +19612,14 @@
19064
19612
  },
19065
19613
  "top_provider": {
19066
19614
  "context_length": 32768,
19067
- "max_completion_tokens": null,
19615
+ "max_completion_tokens": 16384,
19068
19616
  "is_moderated": false
19069
19617
  },
19070
19618
  "per_request_limits": null,
19071
19619
  "supported_parameters": [
19072
19620
  "frequency_penalty",
19073
19621
  "logit_bias",
19074
- "max_tokens",
19075
- "min_p",
19076
- "presence_penalty",
19077
- "repetition_penalty",
19078
- "stop",
19079
- "temperature",
19080
- "top_k",
19081
- "top_p"
19082
- ]
19083
- }
19084
- },
19085
- {
19086
- "id": "mistralai/mistral-7b-instruct-v0.3",
19087
- "name": "Mistral: Mistral 7B Instruct v0.3",
19088
- "provider": "openrouter",
19089
- "family": "mistralai",
19090
- "created_at": "2024-05-27 02:00:00 +0200",
19091
- "context_window": 32768,
19092
- "max_output_tokens": 16384,
19093
- "knowledge_cutoff": null,
19094
- "modalities": {
19095
- "input": [
19096
- "text"
19097
- ],
19098
- "output": [
19099
- "text"
19100
- ]
19101
- },
19102
- "capabilities": [
19103
- "streaming",
19104
- "function_calling",
19105
- "structured_output",
19106
- "predicted_outputs"
19107
- ],
19108
- "pricing": {
19109
- "text_tokens": {
19110
- "standard": {
19111
- "input_per_million": 0.028,
19112
- "output_per_million": 0.054
19113
- }
19114
- }
19115
- },
19116
- "metadata": {
19117
- "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.",
19118
- "architecture": {
19119
- "modality": "text->text",
19120
- "input_modalities": [
19121
- "text"
19122
- ],
19123
- "output_modalities": [
19124
- "text"
19125
- ],
19126
- "tokenizer": "Mistral",
19127
- "instruct_type": "mistral"
19128
- },
19129
- "top_provider": {
19130
- "context_length": 32768,
19131
- "max_completion_tokens": 16384,
19132
- "is_moderated": false
19133
- },
19134
- "per_request_limits": null,
19135
- "supported_parameters": [
19136
- "frequency_penalty",
19137
- "logit_bias",
19138
- "logprobs",
19622
+ "logprobs",
19139
19623
  "max_tokens",
19140
19624
  "min_p",
19141
19625
  "presence_penalty",
@@ -19947,8 +20431,8 @@
19947
20431
  "pricing": {
19948
20432
  "text_tokens": {
19949
20433
  "standard": {
19950
- "input_per_million": 0.017992691999999998,
19951
- "output_per_million": 0.07200576
20434
+ "input_per_million": 0.01999188,
20435
+ "output_per_million": 0.0800064
19952
20436
  }
19953
20437
  }
19954
20438
  },
@@ -20065,7 +20549,7 @@
20065
20549
  "provider": "openrouter",
20066
20550
  "family": "mistralai",
20067
20551
  "created_at": "2025-06-20 20:10:16 +0200",
20068
- "context_window": 131072,
20552
+ "context_window": 128000,
20069
20553
  "max_output_tokens": null,
20070
20554
  "knowledge_cutoff": null,
20071
20555
  "modalities": {
@@ -20086,8 +20570,8 @@
20086
20570
  "pricing": {
20087
20571
  "text_tokens": {
20088
20572
  "standard": {
20089
- "input_per_million": 0.01999188,
20090
- "output_per_million": 0.0800064
20573
+ "input_per_million": 0.049999999999999996,
20574
+ "output_per_million": 0.09999999999999999
20091
20575
  }
20092
20576
  }
20093
20577
  },
@@ -20106,7 +20590,7 @@
20106
20590
  "instruct_type": null
20107
20591
  },
20108
20592
  "top_provider": {
20109
- "context_length": 131072,
20593
+ "context_length": 128000,
20110
20594
  "max_completion_tokens": null,
20111
20595
  "is_moderated": false
20112
20596
  },
@@ -21035,8 +21519,8 @@
21035
21519
  "provider": "openrouter",
21036
21520
  "family": "neversleep",
21037
21521
  "created_at": "2024-09-15 02:00:00 +0200",
21038
- "context_window": 40000,
21039
- "max_output_tokens": 40000,
21522
+ "context_window": 32768,
21523
+ "max_output_tokens": null,
21040
21524
  "knowledge_cutoff": null,
21041
21525
  "modalities": {
21042
21526
  "input": [
@@ -21054,8 +21538,8 @@
21054
21538
  "pricing": {
21055
21539
  "text_tokens": {
21056
21540
  "standard": {
21057
- "input_per_million": 0.16999999999999998,
21058
- "output_per_million": 0.9900000000000001
21541
+ "input_per_million": 0.09,
21542
+ "output_per_million": 0.6
21059
21543
  }
21060
21544
  }
21061
21545
  },
@@ -21073,8 +21557,8 @@
21073
21557
  "instruct_type": "llama3"
21074
21558
  },
21075
21559
  "top_provider": {
21076
- "context_length": 40000,
21077
- "max_completion_tokens": 40000,
21560
+ "context_length": 32768,
21561
+ "max_completion_tokens": null,
21078
21562
  "is_moderated": false
21079
21563
  },
21080
21564
  "per_request_limits": null,
@@ -21494,6 +21978,146 @@
21494
21978
  ]
21495
21979
  }
21496
21980
  },
21981
+ {
21982
+ "id": "nousresearch/hermes-4-405b",
21983
+ "name": "Nous: Hermes 4 405B",
21984
+ "provider": "openrouter",
21985
+ "family": "nousresearch",
21986
+ "created_at": "2025-08-26 21:11:03 +0200",
21987
+ "context_window": 131072,
21988
+ "max_output_tokens": null,
21989
+ "knowledge_cutoff": null,
21990
+ "modalities": {
21991
+ "input": [
21992
+ "text"
21993
+ ],
21994
+ "output": [
21995
+ "text"
21996
+ ]
21997
+ },
21998
+ "capabilities": [
21999
+ "streaming",
22000
+ "function_calling",
22001
+ "predicted_outputs"
22002
+ ],
22003
+ "pricing": {
22004
+ "text_tokens": {
22005
+ "standard": {
22006
+ "input_per_million": 0.1999188,
22007
+ "output_per_million": 0.800064
22008
+ }
22009
+ }
22010
+ },
22011
+ "metadata": {
22012
+ "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with <think>...</think> traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.",
22013
+ "architecture": {
22014
+ "modality": "text->text",
22015
+ "input_modalities": [
22016
+ "text"
22017
+ ],
22018
+ "output_modalities": [
22019
+ "text"
22020
+ ],
22021
+ "tokenizer": "Other",
22022
+ "instruct_type": null
22023
+ },
22024
+ "top_provider": {
22025
+ "context_length": 131072,
22026
+ "max_completion_tokens": null,
22027
+ "is_moderated": false
22028
+ },
22029
+ "per_request_limits": null,
22030
+ "supported_parameters": [
22031
+ "frequency_penalty",
22032
+ "include_reasoning",
22033
+ "logit_bias",
22034
+ "logprobs",
22035
+ "max_tokens",
22036
+ "min_p",
22037
+ "presence_penalty",
22038
+ "reasoning",
22039
+ "repetition_penalty",
22040
+ "seed",
22041
+ "stop",
22042
+ "temperature",
22043
+ "tool_choice",
22044
+ "tools",
22045
+ "top_k",
22046
+ "top_logprobs",
22047
+ "top_p"
22048
+ ]
22049
+ }
22050
+ },
22051
+ {
22052
+ "id": "nousresearch/hermes-4-70b",
22053
+ "name": "Nous: Hermes 4 70B",
22054
+ "provider": "openrouter",
22055
+ "family": "nousresearch",
22056
+ "created_at": "2025-08-26 21:23:02 +0200",
22057
+ "context_window": 131072,
22058
+ "max_output_tokens": null,
22059
+ "knowledge_cutoff": null,
22060
+ "modalities": {
22061
+ "input": [
22062
+ "text"
22063
+ ],
22064
+ "output": [
22065
+ "text"
22066
+ ]
22067
+ },
22068
+ "capabilities": [
22069
+ "streaming",
22070
+ "function_calling",
22071
+ "predicted_outputs"
22072
+ ],
22073
+ "pricing": {
22074
+ "text_tokens": {
22075
+ "standard": {
22076
+ "input_per_million": 0.09329544,
22077
+ "output_per_million": 0.3733632
22078
+ }
22079
+ }
22080
+ },
22081
+ "metadata": {
22082
+ "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit <think>...</think> reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.",
22083
+ "architecture": {
22084
+ "modality": "text->text",
22085
+ "input_modalities": [
22086
+ "text"
22087
+ ],
22088
+ "output_modalities": [
22089
+ "text"
22090
+ ],
22091
+ "tokenizer": "Llama3",
22092
+ "instruct_type": null
22093
+ },
22094
+ "top_provider": {
22095
+ "context_length": 131072,
22096
+ "max_completion_tokens": null,
22097
+ "is_moderated": false
22098
+ },
22099
+ "per_request_limits": null,
22100
+ "supported_parameters": [
22101
+ "frequency_penalty",
22102
+ "include_reasoning",
22103
+ "logit_bias",
22104
+ "logprobs",
22105
+ "max_tokens",
22106
+ "min_p",
22107
+ "presence_penalty",
22108
+ "reasoning",
22109
+ "repetition_penalty",
22110
+ "seed",
22111
+ "stop",
22112
+ "temperature",
22113
+ "tool_choice",
22114
+ "tools",
22115
+ "top_k",
22116
+ "top_logprobs",
22117
+ "top_p"
22118
+ ]
22119
+ }
22120
+ },
21497
22121
  {
21498
22122
  "id": "nousresearch/nous-hermes-2-mixtral-8x7b-dpo",
21499
22123
  "name": "Nous: Hermes 2 Mixtral 8x7B DPO",
@@ -21563,7 +22187,7 @@
21563
22187
  "family": "nvidia",
21564
22188
  "created_at": "2024-10-15 02:00:00 +0200",
21565
22189
  "context_window": 131072,
21566
- "max_output_tokens": 131072,
22190
+ "max_output_tokens": 16384,
21567
22191
  "knowledge_cutoff": null,
21568
22192
  "modalities": {
21569
22193
  "input": [
@@ -21602,14 +22226,13 @@
21602
22226
  },
21603
22227
  "top_provider": {
21604
22228
  "context_length": 131072,
21605
- "max_completion_tokens": 131072,
22229
+ "max_completion_tokens": 16384,
21606
22230
  "is_moderated": false
21607
22231
  },
21608
22232
  "per_request_limits": null,
21609
22233
  "supported_parameters": [
21610
22234
  "frequency_penalty",
21611
22235
  "logit_bias",
21612
- "logprobs",
21613
22236
  "max_tokens",
21614
22237
  "min_p",
21615
22238
  "presence_penalty",
@@ -21621,7 +22244,6 @@
21621
22244
  "tool_choice",
21622
22245
  "tools",
21623
22246
  "top_k",
21624
- "top_logprobs",
21625
22247
  "top_p"
21626
22248
  ]
21627
22249
  }
@@ -23054,19 +23676,18 @@
23054
23676
  }
23055
23677
  },
23056
23678
  {
23057
- "id": "openai/gpt-4o-mini",
23058
- "name": "OpenAI: GPT-4o-mini",
23679
+ "id": "openai/gpt-4o-audio-preview",
23680
+ "name": "OpenAI: GPT-4o Audio",
23059
23681
  "provider": "openrouter",
23060
23682
  "family": "openai",
23061
- "created_at": "2024-07-18 02:00:00 +0200",
23683
+ "created_at": "2025-08-15 06:44:21 +0200",
23062
23684
  "context_window": 128000,
23063
23685
  "max_output_tokens": 16384,
23064
23686
  "knowledge_cutoff": null,
23065
23687
  "modalities": {
23066
23688
  "input": [
23067
- "text",
23068
- "image",
23069
- "file"
23689
+ "audio",
23690
+ "text"
23070
23691
  ],
23071
23692
  "output": [
23072
23693
  "text"
@@ -23080,20 +23701,18 @@
23080
23701
  "pricing": {
23081
23702
  "text_tokens": {
23082
23703
  "standard": {
23083
- "input_per_million": 0.15,
23084
- "output_per_million": 0.6,
23085
- "cached_input_per_million": 0.075
23704
+ "input_per_million": 2.5,
23705
+ "output_per_million": 10.0
23086
23706
  }
23087
23707
  }
23088
23708
  },
23089
23709
  "metadata": {
23090
- "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal",
23710
+ "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs are currently not supported. Audio tokens are priced at $40 per million input audio tokens.",
23091
23711
  "architecture": {
23092
- "modality": "text+image->text",
23712
+ "modality": "text->text",
23093
23713
  "input_modalities": [
23094
- "text",
23095
- "image",
23096
- "file"
23714
+ "audio",
23715
+ "text"
23097
23716
  ],
23098
23717
  "output_modalities": [
23099
23718
  "text"
@@ -23121,14 +23740,13 @@
23121
23740
  "tool_choice",
23122
23741
  "tools",
23123
23742
  "top_logprobs",
23124
- "top_p",
23125
- "web_search_options"
23743
+ "top_p"
23126
23744
  ]
23127
23745
  }
23128
23746
  },
23129
23747
  {
23130
- "id": "openai/gpt-4o-mini-2024-07-18",
23131
- "name": "OpenAI: GPT-4o-mini (2024-07-18)",
23748
+ "id": "openai/gpt-4o-mini",
23749
+ "name": "OpenAI: GPT-4o-mini",
23132
23750
  "provider": "openrouter",
23133
23751
  "family": "openai",
23134
23752
  "created_at": "2024-07-18 02:00:00 +0200",
@@ -23200,8 +23818,81 @@
23200
23818
  }
23201
23819
  },
23202
23820
  {
23203
- "id": "openai/gpt-4o-mini-search-preview",
23204
- "name": "OpenAI: GPT-4o-mini Search Preview",
23821
+ "id": "openai/gpt-4o-mini-2024-07-18",
23822
+ "name": "OpenAI: GPT-4o-mini (2024-07-18)",
23823
+ "provider": "openrouter",
23824
+ "family": "openai",
23825
+ "created_at": "2024-07-18 02:00:00 +0200",
23826
+ "context_window": 128000,
23827
+ "max_output_tokens": 16384,
23828
+ "knowledge_cutoff": null,
23829
+ "modalities": {
23830
+ "input": [
23831
+ "text",
23832
+ "image",
23833
+ "file"
23834
+ ],
23835
+ "output": [
23836
+ "text"
23837
+ ]
23838
+ },
23839
+ "capabilities": [
23840
+ "streaming",
23841
+ "function_calling",
23842
+ "structured_output"
23843
+ ],
23844
+ "pricing": {
23845
+ "text_tokens": {
23846
+ "standard": {
23847
+ "input_per_million": 0.15,
23848
+ "output_per_million": 0.6,
23849
+ "cached_input_per_million": 0.075
23850
+ }
23851
+ }
23852
+ },
23853
+ "metadata": {
23854
+ "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal",
23855
+ "architecture": {
23856
+ "modality": "text+image->text",
23857
+ "input_modalities": [
23858
+ "text",
23859
+ "image",
23860
+ "file"
23861
+ ],
23862
+ "output_modalities": [
23863
+ "text"
23864
+ ],
23865
+ "tokenizer": "GPT",
23866
+ "instruct_type": null
23867
+ },
23868
+ "top_provider": {
23869
+ "context_length": 128000,
23870
+ "max_completion_tokens": 16384,
23871
+ "is_moderated": true
23872
+ },
23873
+ "per_request_limits": null,
23874
+ "supported_parameters": [
23875
+ "frequency_penalty",
23876
+ "logit_bias",
23877
+ "logprobs",
23878
+ "max_tokens",
23879
+ "presence_penalty",
23880
+ "response_format",
23881
+ "seed",
23882
+ "stop",
23883
+ "structured_outputs",
23884
+ "temperature",
23885
+ "tool_choice",
23886
+ "tools",
23887
+ "top_logprobs",
23888
+ "top_p",
23889
+ "web_search_options"
23890
+ ]
23891
+ }
23892
+ },
23893
+ {
23894
+ "id": "openai/gpt-4o-mini-search-preview",
23895
+ "name": "OpenAI: GPT-4o-mini Search Preview",
23205
23896
  "provider": "openrouter",
23206
23897
  "family": "openai",
23207
23898
  "created_at": "2025-03-12 23:22:02 +0100",
@@ -23417,7 +24108,7 @@
23417
24108
  }
23418
24109
  },
23419
24110
  "metadata": {
23420
- "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.\n\nNote that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations",
24111
+ "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.",
23421
24112
  "architecture": {
23422
24113
  "modality": "text+image->text",
23423
24114
  "input_modalities": [
@@ -23650,8 +24341,8 @@
23650
24341
  "provider": "openrouter",
23651
24342
  "family": "openai",
23652
24343
  "created_at": "2025-08-05 19:17:11 +0200",
23653
- "context_window": 131072,
23654
- "max_output_tokens": null,
24344
+ "context_window": 131000,
24345
+ "max_output_tokens": 131000,
23655
24346
  "knowledge_cutoff": null,
23656
24347
  "modalities": {
23657
24348
  "input": [
@@ -23670,8 +24361,8 @@
23670
24361
  "pricing": {
23671
24362
  "text_tokens": {
23672
24363
  "standard": {
23673
- "input_per_million": 0.07256312,
23674
- "output_per_million": 0.2903936
24364
+ "input_per_million": 0.072,
24365
+ "output_per_million": 0.28
23675
24366
  }
23676
24367
  }
23677
24368
  },
@@ -23689,8 +24380,8 @@
23689
24380
  "instruct_type": null
23690
24381
  },
23691
24382
  "top_provider": {
23692
- "context_length": 131072,
23693
- "max_completion_tokens": null,
24383
+ "context_length": 131000,
24384
+ "max_completion_tokens": 131000,
23694
24385
  "is_moderated": false
23695
24386
  },
23696
24387
  "per_request_limits": null,
@@ -23723,8 +24414,8 @@
23723
24414
  "provider": "openrouter",
23724
24415
  "family": "openai",
23725
24416
  "created_at": "2025-08-05 19:17:09 +0200",
23726
- "context_window": 131072,
23727
- "max_output_tokens": null,
24417
+ "context_window": 131000,
24418
+ "max_output_tokens": 131000,
23728
24419
  "knowledge_cutoff": null,
23729
24420
  "modalities": {
23730
24421
  "input": [
@@ -23744,7 +24435,7 @@
23744
24435
  "text_tokens": {
23745
24436
  "standard": {
23746
24437
  "input_per_million": 0.04,
23747
- "output_per_million": 0.16
24438
+ "output_per_million": 0.15
23748
24439
  }
23749
24440
  }
23750
24441
  },
@@ -23762,8 +24453,8 @@
23762
24453
  "instruct_type": null
23763
24454
  },
23764
24455
  "top_provider": {
23765
- "context_length": 131072,
23766
- "max_completion_tokens": null,
24456
+ "context_length": 131000,
24457
+ "max_completion_tokens": 131000,
23767
24458
  "is_moderated": false
23768
24459
  },
23769
24460
  "per_request_limits": null,
@@ -24106,7 +24797,7 @@
24106
24797
  }
24107
24798
  },
24108
24799
  "metadata": {
24109
- "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. Note that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations",
24800
+ "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. ",
24110
24801
  "architecture": {
24111
24802
  "modality": "text+image->text",
24112
24803
  "input_modalities": [
@@ -25068,8 +25759,8 @@
25068
25759
  "pricing": {
25069
25760
  "text_tokens": {
25070
25761
  "standard": {
25071
- "input_per_million": 0.0666396,
25072
- "output_per_million": 0.26668800000000004
25762
+ "input_per_million": 0.051830799999999996,
25763
+ "output_per_million": 0.207424
25073
25764
  }
25074
25765
  }
25075
25766
  },
@@ -26487,8 +27178,8 @@
26487
27178
  "provider": "openrouter",
26488
27179
  "family": "qwen",
26489
27180
  "created_at": "2025-07-29 18:36:05 +0200",
26490
- "context_window": 131072,
26491
- "max_output_tokens": 32768,
27181
+ "context_window": 262144,
27182
+ "max_output_tokens": null,
26492
27183
  "knowledge_cutoff": null,
26493
27184
  "modalities": {
26494
27185
  "input": [
@@ -26500,13 +27191,15 @@
26500
27191
  },
26501
27192
  "capabilities": [
26502
27193
  "streaming",
26503
- "structured_output"
27194
+ "function_calling",
27195
+ "structured_output",
27196
+ "predicted_outputs"
26504
27197
  ],
26505
27198
  "pricing": {
26506
27199
  "text_tokens": {
26507
27200
  "standard": {
26508
- "input_per_million": 0.19999999999999998,
26509
- "output_per_million": 0.7999999999999999
27201
+ "input_per_million": 0.09999999999999999,
27202
+ "output_per_million": 0.3
26510
27203
  }
26511
27204
  }
26512
27205
  },
@@ -26524,17 +27217,25 @@
26524
27217
  "instruct_type": null
26525
27218
  },
26526
27219
  "top_provider": {
26527
- "context_length": 131072,
26528
- "max_completion_tokens": 32768,
27220
+ "context_length": 262144,
27221
+ "max_completion_tokens": null,
26529
27222
  "is_moderated": false
26530
27223
  },
26531
27224
  "per_request_limits": null,
26532
27225
  "supported_parameters": [
27226
+ "frequency_penalty",
27227
+ "logit_bias",
27228
+ "logprobs",
26533
27229
  "max_tokens",
26534
27230
  "presence_penalty",
26535
27231
  "response_format",
26536
27232
  "seed",
27233
+ "stop",
26537
27234
  "temperature",
27235
+ "tool_choice",
27236
+ "tools",
27237
+ "top_k",
27238
+ "top_logprobs",
26538
27239
  "top_p"
26539
27240
  ]
26540
27241
  }
@@ -28183,8 +28884,8 @@
28183
28884
  "provider": "openrouter",
28184
28885
  "family": "thedrummer",
28185
28886
  "created_at": "2024-09-30 02:00:00 +0200",
28186
- "context_window": 8192,
28187
- "max_output_tokens": 8192,
28887
+ "context_window": 32768,
28888
+ "max_output_tokens": null,
28188
28889
  "knowledge_cutoff": null,
28189
28890
  "modalities": {
28190
28891
  "input": [
@@ -28203,8 +28904,8 @@
28203
28904
  "pricing": {
28204
28905
  "text_tokens": {
28205
28906
  "standard": {
28206
- "input_per_million": 0.19,
28207
- "output_per_million": 0.44999999999999996
28907
+ "input_per_million": 0.16999999999999998,
28908
+ "output_per_million": 0.43
28208
28909
  }
28209
28910
  }
28210
28911
  },
@@ -28222,8 +28923,8 @@
28222
28923
  "instruct_type": "chatml"
28223
28924
  },
28224
28925
  "top_provider": {
28225
- "context_length": 8192,
28226
- "max_completion_tokens": 8192,
28926
+ "context_length": 32768,
28927
+ "max_completion_tokens": null,
28227
28928
  "is_moderated": false
28228
28929
  },
28229
28930
  "per_request_limits": null,
@@ -28385,69 +29086,6 @@
28385
29086
  ]
28386
29087
  }
28387
29088
  },
28388
- {
28389
- "id": "thedrummer/valkyrie-49b-v1",
28390
- "name": "TheDrummer: Valkyrie 49B V1",
28391
- "provider": "openrouter",
28392
- "family": "thedrummer",
28393
- "created_at": "2025-05-23 19:51:10 +0200",
28394
- "context_window": 131072,
28395
- "max_output_tokens": 131072,
28396
- "knowledge_cutoff": null,
28397
- "modalities": {
28398
- "input": [
28399
- "text"
28400
- ],
28401
- "output": [
28402
- "text"
28403
- ]
28404
- },
28405
- "capabilities": [
28406
- "streaming"
28407
- ],
28408
- "pricing": {
28409
- "text_tokens": {
28410
- "standard": {
28411
- "input_per_million": 0.65,
28412
- "output_per_million": 1.0
28413
- }
28414
- }
28415
- },
28416
- "metadata": {
28417
- "description": "Built on top of NVIDIA's Llama 3.3 Nemotron Super 49B, Valkyrie is TheDrummer's newest model drop for creative writing.",
28418
- "architecture": {
28419
- "modality": "text->text",
28420
- "input_modalities": [
28421
- "text"
28422
- ],
28423
- "output_modalities": [
28424
- "text"
28425
- ],
28426
- "tokenizer": "Other",
28427
- "instruct_type": null
28428
- },
28429
- "top_provider": {
28430
- "context_length": 131072,
28431
- "max_completion_tokens": 131072,
28432
- "is_moderated": false
28433
- },
28434
- "per_request_limits": null,
28435
- "supported_parameters": [
28436
- "frequency_penalty",
28437
- "include_reasoning",
28438
- "max_tokens",
28439
- "min_p",
28440
- "presence_penalty",
28441
- "reasoning",
28442
- "repetition_penalty",
28443
- "seed",
28444
- "stop",
28445
- "temperature",
28446
- "top_k",
28447
- "top_p"
28448
- ]
28449
- }
28450
- },
28451
29089
  {
28452
29090
  "id": "thudm/glm-4-32b",
28453
29091
  "name": "THUDM: GLM 4 32B",
@@ -28472,8 +29110,8 @@
28472
29110
  "pricing": {
28473
29111
  "text_tokens": {
28474
29112
  "standard": {
28475
- "input_per_million": 0.24,
28476
- "output_per_million": 0.24
29113
+ "input_per_million": 0.55,
29114
+ "output_per_million": 1.66
28477
29115
  }
28478
29116
  }
28479
29117
  },
@@ -28669,8 +29307,8 @@
28669
29307
  "pricing": {
28670
29308
  "text_tokens": {
28671
29309
  "standard": {
28672
- "input_per_million": 0.17992692,
28673
- "output_per_million": 0.7200576000000001
29310
+ "input_per_million": 0.1999188,
29311
+ "output_per_million": 0.800064
28674
29312
  }
28675
29313
  }
28676
29314
  },
@@ -28857,8 +29495,8 @@
28857
29495
  "pricing": {
28858
29496
  "text_tokens": {
28859
29497
  "standard": {
28860
- "input_per_million": 0.7,
28861
- "output_per_million": 1.0
29498
+ "input_per_million": 0.44999999999999996,
29499
+ "output_per_million": 0.65
28862
29500
  }
28863
29501
  }
28864
29502
  },
@@ -29362,6 +30000,73 @@
29362
30000
  ]
29363
30001
  }
29364
30002
  },
30003
+ {
30004
+ "id": "x-ai/grok-code-fast-1",
30005
+ "name": "xAI: Grok Code Fast 1",
30006
+ "provider": "openrouter",
30007
+ "family": "x-ai",
30008
+ "created_at": "2025-08-26 22:08:47 +0200",
30009
+ "context_window": 256000,
30010
+ "max_output_tokens": 10000,
30011
+ "knowledge_cutoff": null,
30012
+ "modalities": {
30013
+ "input": [
30014
+ "text"
30015
+ ],
30016
+ "output": [
30017
+ "text"
30018
+ ]
30019
+ },
30020
+ "capabilities": [
30021
+ "streaming",
30022
+ "function_calling",
30023
+ "structured_output"
30024
+ ],
30025
+ "pricing": {
30026
+ "text_tokens": {
30027
+ "standard": {
30028
+ "input_per_million": 0.19999999999999998,
30029
+ "output_per_million": 1.5,
30030
+ "cached_input_per_million": 0.02
30031
+ }
30032
+ }
30033
+ },
30034
+ "metadata": {
30035
+ "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.",
30036
+ "architecture": {
30037
+ "modality": "text->text",
30038
+ "input_modalities": [
30039
+ "text"
30040
+ ],
30041
+ "output_modalities": [
30042
+ "text"
30043
+ ],
30044
+ "tokenizer": "Grok",
30045
+ "instruct_type": null
30046
+ },
30047
+ "top_provider": {
30048
+ "context_length": 256000,
30049
+ "max_completion_tokens": 10000,
30050
+ "is_moderated": false
30051
+ },
30052
+ "per_request_limits": null,
30053
+ "supported_parameters": [
30054
+ "include_reasoning",
30055
+ "logprobs",
30056
+ "max_tokens",
30057
+ "reasoning",
30058
+ "response_format",
30059
+ "seed",
30060
+ "stop",
30061
+ "structured_outputs",
30062
+ "temperature",
30063
+ "tool_choice",
30064
+ "tools",
30065
+ "top_logprobs",
30066
+ "top_p"
30067
+ ]
30068
+ }
30069
+ },
29365
30070
  {
29366
30071
  "id": "x-ai/grok-vision-beta",
29367
30072
  "name": "xAI: Grok Vision Beta",
@@ -29489,7 +30194,7 @@
29489
30194
  "provider": "openrouter",
29490
30195
  "family": "z-ai",
29491
30196
  "created_at": "2025-07-25 21:22:27 +0200",
29492
- "context_window": 98304,
30197
+ "context_window": 131072,
29493
30198
  "max_output_tokens": null,
29494
30199
  "knowledge_cutoff": null,
29495
30200
  "modalities": {
@@ -29528,7 +30233,7 @@
29528
30233
  "instruct_type": null
29529
30234
  },
29530
30235
  "top_provider": {
29531
- "context_length": 98304,
30236
+ "context_length": 131072,
29532
30237
  "max_completion_tokens": null,
29533
30238
  "is_moderated": false
29534
30239
  },
@@ -29549,6 +30254,7 @@
29549
30254
  "temperature",
29550
30255
  "tool_choice",
29551
30256
  "tools",
30257
+ "top_a",
29552
30258
  "top_k",
29553
30259
  "top_logprobs",
29554
30260
  "top_p"
@@ -29606,21 +30312,15 @@
29606
30312
  },
29607
30313
  "per_request_limits": null,
29608
30314
  "supported_parameters": [
29609
- "frequency_penalty",
29610
30315
  "include_reasoning",
29611
30316
  "max_tokens",
29612
- "min_p",
29613
- "presence_penalty",
29614
30317
  "reasoning",
29615
- "repetition_penalty",
29616
30318
  "response_format",
29617
30319
  "seed",
29618
- "stop",
29619
30320
  "structured_outputs",
29620
30321
  "temperature",
29621
30322
  "tool_choice",
29622
30323
  "tools",
29623
- "top_k",
29624
30324
  "top_p"
29625
30325
  ]
29626
30326
  }
@@ -29763,7 +30463,7 @@
29763
30463
  "name": "Sonar",
29764
30464
  "provider": "perplexity",
29765
30465
  "family": "sonar",
29766
- "created_at": "2025-08-14 00:27:27 +0200",
30466
+ "created_at": "2025-08-27 18:49:59 +0200",
29767
30467
  "context_window": 128000,
29768
30468
  "max_output_tokens": 4096,
29769
30469
  "knowledge_cutoff": null,
@@ -29795,7 +30495,7 @@
29795
30495
  "name": "Sonar Deep Research",
29796
30496
  "provider": "perplexity",
29797
30497
  "family": "sonar_deep_research",
29798
- "created_at": "2025-08-14 00:27:27 +0200",
30498
+ "created_at": "2025-08-27 18:49:59 +0200",
29799
30499
  "context_window": 128000,
29800
30500
  "max_output_tokens": 4096,
29801
30501
  "knowledge_cutoff": null,
@@ -29830,7 +30530,7 @@
29830
30530
  "name": "Sonar Pro",
29831
30531
  "provider": "perplexity",
29832
30532
  "family": "sonar_pro",
29833
- "created_at": "2025-08-14 00:27:27 +0200",
30533
+ "created_at": "2025-08-27 18:49:59 +0200",
29834
30534
  "context_window": 200000,
29835
30535
  "max_output_tokens": 8192,
29836
30536
  "knowledge_cutoff": null,
@@ -29862,7 +30562,7 @@
29862
30562
  "name": "Sonar Reasoning",
29863
30563
  "provider": "perplexity",
29864
30564
  "family": "sonar_reasoning",
29865
- "created_at": "2025-08-14 00:27:27 +0200",
30565
+ "created_at": "2025-08-27 18:49:59 +0200",
29866
30566
  "context_window": 128000,
29867
30567
  "max_output_tokens": 4096,
29868
30568
  "knowledge_cutoff": null,
@@ -29894,7 +30594,7 @@
29894
30594
  "name": "Sonar Reasoning Pro",
29895
30595
  "provider": "perplexity",
29896
30596
  "family": "sonar_reasoning_pro",
29897
- "created_at": "2025-08-14 00:27:27 +0200",
30597
+ "created_at": "2025-08-27 18:49:59 +0200",
29898
30598
  "context_window": 128000,
29899
30599
  "max_output_tokens": 8192,
29900
30600
  "knowledge_cutoff": null,
@@ -29920,5 +30620,1114 @@
29920
30620
  }
29921
30621
  },
29922
30622
  "metadata": {}
30623
+ },
30624
+ {
30625
+ "id": "chat-bison",
30626
+ "name": "chat-bison",
30627
+ "provider": "vertexai",
30628
+ "family": "palm",
30629
+ "created_at": null,
30630
+ "context_window": null,
30631
+ "max_output_tokens": null,
30632
+ "knowledge_cutoff": null,
30633
+ "modalities": {
30634
+ "input": [],
30635
+ "output": []
30636
+ },
30637
+ "capabilities": [
30638
+ "streaming"
30639
+ ],
30640
+ "pricing": {},
30641
+ "metadata": {
30642
+ "version_id": "002",
30643
+ "open_source_category": "PROPRIETARY",
30644
+ "launch_stage": "GA",
30645
+ "supported_actions": {
30646
+ "openGenie": {
30647
+ "references": {
30648
+ "us-central1": {
30649
+ "uri": "https://console.cloud.google.com/vertex-ai/generative/language/create/chat"
30650
+ }
30651
+ },
30652
+ "title": "Open Prompt Design"
30653
+ }
30654
+ },
30655
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/chat-bison@002"
30656
+ }
30657
+ },
30658
+ {
30659
+ "id": "code-bison",
30660
+ "name": "code-bison",
30661
+ "provider": "vertexai",
30662
+ "family": "palm",
30663
+ "created_at": null,
30664
+ "context_window": null,
30665
+ "max_output_tokens": null,
30666
+ "knowledge_cutoff": null,
30667
+ "modalities": {
30668
+ "input": [],
30669
+ "output": []
30670
+ },
30671
+ "capabilities": [
30672
+ "streaming"
30673
+ ],
30674
+ "pricing": {},
30675
+ "metadata": {
30676
+ "version_id": "002",
30677
+ "open_source_category": null,
30678
+ "launch_stage": "GA",
30679
+ "supported_actions": null,
30680
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/code-bison@002"
30681
+ }
30682
+ },
30683
+ {
30684
+ "id": "code-gecko",
30685
+ "name": "code-gecko",
30686
+ "provider": "vertexai",
30687
+ "family": "gemini",
30688
+ "created_at": null,
30689
+ "context_window": null,
30690
+ "max_output_tokens": null,
30691
+ "knowledge_cutoff": null,
30692
+ "modalities": {
30693
+ "input": [],
30694
+ "output": []
30695
+ },
30696
+ "capabilities": [
30697
+ "streaming"
30698
+ ],
30699
+ "pricing": {},
30700
+ "metadata": {
30701
+ "version_id": "002",
30702
+ "open_source_category": null,
30703
+ "launch_stage": "GA",
30704
+ "supported_actions": null,
30705
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/code-gecko@002"
30706
+ }
30707
+ },
30708
+ {
30709
+ "id": "codechat-bison",
30710
+ "name": "codechat-bison",
30711
+ "provider": "vertexai",
30712
+ "family": "palm",
30713
+ "created_at": null,
30714
+ "context_window": null,
30715
+ "max_output_tokens": null,
30716
+ "knowledge_cutoff": null,
30717
+ "modalities": {
30718
+ "input": [],
30719
+ "output": []
30720
+ },
30721
+ "capabilities": [
30722
+ "streaming"
30723
+ ],
30724
+ "pricing": {},
30725
+ "metadata": {
30726
+ "version_id": "002",
30727
+ "open_source_category": null,
30728
+ "launch_stage": "GA",
30729
+ "supported_actions": null,
30730
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/codechat-bison@002"
30731
+ }
30732
+ },
30733
+ {
30734
+ "id": "gemini-1.5-flash",
30735
+ "name": "Gemini 1.5 Flash",
30736
+ "provider": "vertexai",
30737
+ "family": "gemini-1.5-flash",
30738
+ "created_at": null,
30739
+ "context_window": 1048576,
30740
+ "max_output_tokens": 8192,
30741
+ "knowledge_cutoff": null,
30742
+ "modalities": {
30743
+ "input": [
30744
+ "audio",
30745
+ "image",
30746
+ "text"
30747
+ ],
30748
+ "output": [
30749
+ "text"
30750
+ ]
30751
+ },
30752
+ "capabilities": [
30753
+ "function_calling",
30754
+ "structured_output"
30755
+ ],
30756
+ "pricing": {
30757
+ "text_tokens": {
30758
+ "standard": {
30759
+ "input_per_million": 0.075,
30760
+ "cached_input_per_million": 0.01875,
30761
+ "output_per_million": 0.3
30762
+ }
30763
+ }
30764
+ },
30765
+ "metadata": {
30766
+ "source": "known_models"
30767
+ }
30768
+ },
30769
+ {
30770
+ "id": "gemini-1.5-flash-002",
30771
+ "name": "Gemini 1.5 Flash",
30772
+ "provider": "vertexai",
30773
+ "family": "gemini-1.5-flash",
30774
+ "created_at": null,
30775
+ "context_window": 1048576,
30776
+ "max_output_tokens": 8192,
30777
+ "knowledge_cutoff": null,
30778
+ "modalities": {
30779
+ "input": [
30780
+ "audio",
30781
+ "image",
30782
+ "text"
30783
+ ],
30784
+ "output": [
30785
+ "text"
30786
+ ]
30787
+ },
30788
+ "capabilities": [
30789
+ "function_calling",
30790
+ "structured_output"
30791
+ ],
30792
+ "pricing": {
30793
+ "text_tokens": {
30794
+ "standard": {
30795
+ "input_per_million": 0.075,
30796
+ "cached_input_per_million": 0.01875,
30797
+ "output_per_million": 0.3
30798
+ }
30799
+ }
30800
+ },
30801
+ "metadata": {
30802
+ "source": "known_models"
30803
+ }
30804
+ },
30805
+ {
30806
+ "id": "gemini-1.5-flash-8b",
30807
+ "name": "Gemini 1.5 Flash-8B",
30808
+ "provider": "vertexai",
30809
+ "family": "gemini-1.5-flash-8b",
30810
+ "created_at": null,
30811
+ "context_window": 1048576,
30812
+ "max_output_tokens": 8192,
30813
+ "knowledge_cutoff": null,
30814
+ "modalities": {
30815
+ "input": [
30816
+ "audio",
30817
+ "image",
30818
+ "text"
30819
+ ],
30820
+ "output": [
30821
+ "text"
30822
+ ]
30823
+ },
30824
+ "capabilities": [
30825
+ "function_calling",
30826
+ "structured_output"
30827
+ ],
30828
+ "pricing": {
30829
+ "text_tokens": {
30830
+ "standard": {
30831
+ "input_per_million": 0.075,
30832
+ "cached_input_per_million": 0.01875,
30833
+ "output_per_million": 0.3
30834
+ }
30835
+ }
30836
+ },
30837
+ "metadata": {
30838
+ "source": "known_models"
30839
+ }
30840
+ },
30841
+ {
30842
+ "id": "gemini-1.5-pro",
30843
+ "name": "Gemini 1.5 Pro",
30844
+ "provider": "vertexai",
30845
+ "family": "gemini-1.5-pro",
30846
+ "created_at": null,
30847
+ "context_window": 2097152,
30848
+ "max_output_tokens": 8192,
30849
+ "knowledge_cutoff": null,
30850
+ "modalities": {
30851
+ "input": [
30852
+ "audio",
30853
+ "image",
30854
+ "text"
30855
+ ],
30856
+ "output": [
30857
+ "text"
30858
+ ]
30859
+ },
30860
+ "capabilities": [
30861
+ "function_calling",
30862
+ "structured_output"
30863
+ ],
30864
+ "pricing": {
30865
+ "text_tokens": {
30866
+ "standard": {
30867
+ "input_per_million": 1.25,
30868
+ "cached_input_per_million": 0.3125,
30869
+ "output_per_million": 5.0
30870
+ }
30871
+ }
30872
+ },
30873
+ "metadata": {
30874
+ "source": "known_models"
30875
+ }
30876
+ },
30877
+ {
30878
+ "id": "gemini-1.5-pro-002",
30879
+ "name": "Gemini 1.5 Pro",
30880
+ "provider": "vertexai",
30881
+ "family": "gemini-1.5-pro",
30882
+ "created_at": null,
30883
+ "context_window": 2097152,
30884
+ "max_output_tokens": 8192,
30885
+ "knowledge_cutoff": null,
30886
+ "modalities": {
30887
+ "input": [
30888
+ "audio",
30889
+ "image",
30890
+ "text"
30891
+ ],
30892
+ "output": [
30893
+ "text"
30894
+ ]
30895
+ },
30896
+ "capabilities": [
30897
+ "function_calling",
30898
+ "structured_output"
30899
+ ],
30900
+ "pricing": {
30901
+ "text_tokens": {
30902
+ "standard": {
30903
+ "input_per_million": 1.25,
30904
+ "cached_input_per_million": 0.3125,
30905
+ "output_per_million": 5.0
30906
+ }
30907
+ }
30908
+ },
30909
+ "metadata": {
30910
+ "source": "known_models"
30911
+ }
30912
+ },
30913
+ {
30914
+ "id": "gemini-2.0-flash",
30915
+ "name": "Gemini 2.0 Flash",
30916
+ "provider": "vertexai",
30917
+ "family": "gemini-2.0-flash",
30918
+ "created_at": null,
30919
+ "context_window": 1048576,
30920
+ "max_output_tokens": 8192,
30921
+ "knowledge_cutoff": null,
30922
+ "modalities": {
30923
+ "input": [
30924
+ "audio",
30925
+ "image",
30926
+ "text"
30927
+ ],
30928
+ "output": [
30929
+ "text"
30930
+ ]
30931
+ },
30932
+ "capabilities": [
30933
+ "batch",
30934
+ "function_calling",
30935
+ "structured_output"
30936
+ ],
30937
+ "pricing": {
30938
+ "text_tokens": {
30939
+ "standard": {
30940
+ "input_per_million": 0.1,
30941
+ "cached_input_per_million": 0.025,
30942
+ "output_per_million": 0.4
30943
+ },
30944
+ "batch": {
30945
+ "input_per_million": 0.05,
30946
+ "output_per_million": 0.2
30947
+ }
30948
+ }
30949
+ },
30950
+ "metadata": {
30951
+ "source": "known_models"
30952
+ }
30953
+ },
30954
+ {
30955
+ "id": "gemini-2.0-flash-001",
30956
+ "name": "Gemini 2.0 Flash",
30957
+ "provider": "vertexai",
30958
+ "family": "gemini-2.0-flash",
30959
+ "created_at": null,
30960
+ "context_window": 1048576,
30961
+ "max_output_tokens": 8192,
30962
+ "knowledge_cutoff": null,
30963
+ "modalities": {
30964
+ "input": [
30965
+ "audio",
30966
+ "image",
30967
+ "text"
30968
+ ],
30969
+ "output": [
30970
+ "text"
30971
+ ]
30972
+ },
30973
+ "capabilities": [
30974
+ "batch",
30975
+ "function_calling",
30976
+ "structured_output"
30977
+ ],
30978
+ "pricing": {
30979
+ "text_tokens": {
30980
+ "standard": {
30981
+ "input_per_million": 0.1,
30982
+ "cached_input_per_million": 0.025,
30983
+ "output_per_million": 0.4
30984
+ },
30985
+ "batch": {
30986
+ "input_per_million": 0.05,
30987
+ "output_per_million": 0.2
30988
+ }
30989
+ }
30990
+ },
30991
+ "metadata": {
30992
+ "source": "known_models"
30993
+ }
30994
+ },
30995
+ {
30996
+ "id": "gemini-2.0-flash-exp",
30997
+ "name": "Gemini 2.0 Flash",
30998
+ "provider": "vertexai",
30999
+ "family": "gemini-2.0-flash",
31000
+ "created_at": null,
31001
+ "context_window": 1048576,
31002
+ "max_output_tokens": 8192,
31003
+ "knowledge_cutoff": null,
31004
+ "modalities": {
31005
+ "input": [
31006
+ "audio",
31007
+ "image",
31008
+ "text"
31009
+ ],
31010
+ "output": [
31011
+ "text"
31012
+ ]
31013
+ },
31014
+ "capabilities": [
31015
+ "batch",
31016
+ "function_calling",
31017
+ "structured_output"
31018
+ ],
31019
+ "pricing": {
31020
+ "text_tokens": {
31021
+ "standard": {
31022
+ "input_per_million": 0.1,
31023
+ "cached_input_per_million": 0.025,
31024
+ "output_per_million": 0.4
31025
+ },
31026
+ "batch": {
31027
+ "input_per_million": 0.05,
31028
+ "output_per_million": 0.2
31029
+ }
31030
+ }
31031
+ },
31032
+ "metadata": {
31033
+ "source": "known_models"
31034
+ }
31035
+ },
31036
+ {
31037
+ "id": "gemini-2.0-flash-lite-001",
31038
+ "name": "Gemini 2.0 Flash-Lite",
31039
+ "provider": "vertexai",
31040
+ "family": "gemini-2.0-flash-lite",
31041
+ "created_at": null,
31042
+ "context_window": 1048576,
31043
+ "max_output_tokens": 8192,
31044
+ "knowledge_cutoff": null,
31045
+ "modalities": {
31046
+ "input": [
31047
+ "audio",
31048
+ "image",
31049
+ "text"
31050
+ ],
31051
+ "output": [
31052
+ "text"
31053
+ ]
31054
+ },
31055
+ "capabilities": [
31056
+ "batch",
31057
+ "function_calling",
31058
+ "structured_output"
31059
+ ],
31060
+ "pricing": {
31061
+ "text_tokens": {
31062
+ "standard": {
31063
+ "input_per_million": 0.1,
31064
+ "cached_input_per_million": 0.025,
31065
+ "output_per_million": 0.4
31066
+ },
31067
+ "batch": {
31068
+ "input_per_million": 0.05,
31069
+ "output_per_million": 0.2
31070
+ }
31071
+ }
31072
+ },
31073
+ "metadata": {
31074
+ "source": "known_models"
31075
+ }
31076
+ },
31077
+ {
31078
+ "id": "gemini-2.5-flash",
31079
+ "name": "Gemini 2.5 Flash",
31080
+ "provider": "vertexai",
31081
+ "family": "gemini-2.5-flash",
31082
+ "created_at": null,
31083
+ "context_window": 1048576,
31084
+ "max_output_tokens": 65536,
31085
+ "knowledge_cutoff": null,
31086
+ "modalities": {
31087
+ "input": [
31088
+ "audio",
31089
+ "image",
31090
+ "text"
31091
+ ],
31092
+ "output": [
31093
+ "text"
31094
+ ]
31095
+ },
31096
+ "capabilities": [
31097
+ "batch",
31098
+ "function_calling",
31099
+ "structured_output"
31100
+ ],
31101
+ "pricing": {
31102
+ "text_tokens": {
31103
+ "standard": {
31104
+ "input_per_million": 0.3,
31105
+ "cached_input_per_million": 0.075,
31106
+ "output_per_million": 2.5
31107
+ },
31108
+ "batch": {
31109
+ "input_per_million": 0.15,
31110
+ "output_per_million": 1.25
31111
+ }
31112
+ }
31113
+ },
31114
+ "metadata": {
31115
+ "source": "known_models"
31116
+ }
31117
+ },
31118
+ {
31119
+ "id": "gemini-2.5-flash-lite",
31120
+ "name": "Gemini 2.5 Flash-Lite",
31121
+ "provider": "vertexai",
31122
+ "family": "gemini-2.5-flash-lite",
31123
+ "created_at": null,
31124
+ "context_window": 1048576,
31125
+ "max_output_tokens": 65536,
31126
+ "knowledge_cutoff": null,
31127
+ "modalities": {
31128
+ "input": [
31129
+ "audio",
31130
+ "image",
31131
+ "text"
31132
+ ],
31133
+ "output": [
31134
+ "text"
31135
+ ]
31136
+ },
31137
+ "capabilities": [
31138
+ "batch",
31139
+ "function_calling",
31140
+ "structured_output"
31141
+ ],
31142
+ "pricing": {
31143
+ "text_tokens": {
31144
+ "standard": {
31145
+ "input_per_million": 0.3,
31146
+ "cached_input_per_million": 0.075,
31147
+ "output_per_million": 2.5
31148
+ },
31149
+ "batch": {
31150
+ "input_per_million": 0.15,
31151
+ "output_per_million": 1.25
31152
+ }
31153
+ }
31154
+ },
31155
+ "metadata": {
31156
+ "source": "known_models"
31157
+ }
31158
+ },
31159
+ {
31160
+ "id": "gemini-2.5-pro",
31161
+ "name": "Gemini 2.5 Pro",
31162
+ "provider": "vertexai",
31163
+ "family": "gemini-2.5-pro",
31164
+ "created_at": null,
31165
+ "context_window": 1048576,
31166
+ "max_output_tokens": 65536,
31167
+ "knowledge_cutoff": null,
31168
+ "modalities": {
31169
+ "input": [
31170
+ "audio",
31171
+ "image",
31172
+ "text"
31173
+ ],
31174
+ "output": [
31175
+ "text"
31176
+ ]
31177
+ },
31178
+ "capabilities": [
31179
+ "batch",
31180
+ "function_calling",
31181
+ "structured_output"
31182
+ ],
31183
+ "pricing": {
31184
+ "text_tokens": {
31185
+ "standard": {
31186
+ "input_per_million": 1.25,
31187
+ "cached_input_per_million": 0.31,
31188
+ "output_per_million": 10.0
31189
+ },
31190
+ "batch": {
31191
+ "input_per_million": 0.625,
31192
+ "output_per_million": 5.0
31193
+ }
31194
+ }
31195
+ },
31196
+ "metadata": {
31197
+ "source": "known_models"
31198
+ }
31199
+ },
31200
+ {
31201
+ "id": "gemini-embedding-001",
31202
+ "name": "gemini-embedding-001",
31203
+ "provider": "vertexai",
31204
+ "family": "gemini",
31205
+ "created_at": null,
31206
+ "context_window": null,
31207
+ "max_output_tokens": null,
31208
+ "knowledge_cutoff": null,
31209
+ "modalities": {
31210
+ "input": [],
31211
+ "output": []
31212
+ },
31213
+ "capabilities": [
31214
+ "streaming",
31215
+ "function_calling"
31216
+ ],
31217
+ "pricing": {},
31218
+ "metadata": {
31219
+ "source": "known_models"
31220
+ }
31221
+ },
31222
+ {
31223
+ "id": "gemini-exp-1121",
31224
+ "name": "gemini-exp-1121",
31225
+ "provider": "vertexai",
31226
+ "family": "gemini",
31227
+ "created_at": null,
31228
+ "context_window": null,
31229
+ "max_output_tokens": null,
31230
+ "knowledge_cutoff": null,
31231
+ "modalities": {
31232
+ "input": [],
31233
+ "output": []
31234
+ },
31235
+ "capabilities": [
31236
+ "streaming",
31237
+ "function_calling"
31238
+ ],
31239
+ "pricing": {},
31240
+ "metadata": {
31241
+ "source": "known_models"
31242
+ }
31243
+ },
31244
+ {
31245
+ "id": "gemini-exp-1206",
31246
+ "name": "gemini-exp-1206",
31247
+ "provider": "vertexai",
31248
+ "family": "gemini",
31249
+ "created_at": null,
31250
+ "context_window": null,
31251
+ "max_output_tokens": null,
31252
+ "knowledge_cutoff": null,
31253
+ "modalities": {
31254
+ "input": [],
31255
+ "output": []
31256
+ },
31257
+ "capabilities": [
31258
+ "streaming",
31259
+ "function_calling"
31260
+ ],
31261
+ "pricing": {},
31262
+ "metadata": {
31263
+ "source": "known_models"
31264
+ }
31265
+ },
31266
+ {
31267
+ "id": "gemini-pro",
31268
+ "name": "gemini-pro",
31269
+ "provider": "vertexai",
31270
+ "family": "gemini",
31271
+ "created_at": null,
31272
+ "context_window": null,
31273
+ "max_output_tokens": null,
31274
+ "knowledge_cutoff": null,
31275
+ "modalities": {
31276
+ "input": [],
31277
+ "output": []
31278
+ },
31279
+ "capabilities": [
31280
+ "streaming",
31281
+ "function_calling"
31282
+ ],
31283
+ "pricing": {},
31284
+ "metadata": {
31285
+ "source": "known_models"
31286
+ }
31287
+ },
31288
+ {
31289
+ "id": "gemini-pro-vision",
31290
+ "name": "gemini-pro-vision",
31291
+ "provider": "vertexai",
31292
+ "family": "gemini",
31293
+ "created_at": null,
31294
+ "context_window": null,
31295
+ "max_output_tokens": null,
31296
+ "knowledge_cutoff": null,
31297
+ "modalities": {
31298
+ "input": [],
31299
+ "output": []
31300
+ },
31301
+ "capabilities": [
31302
+ "streaming",
31303
+ "function_calling"
31304
+ ],
31305
+ "pricing": {},
31306
+ "metadata": {
31307
+ "source": "known_models"
31308
+ }
31309
+ },
31310
+ {
31311
+ "id": "image-segmentation-001",
31312
+ "name": "image-segmentation-001",
31313
+ "provider": "vertexai",
31314
+ "family": "gemini",
31315
+ "created_at": null,
31316
+ "context_window": null,
31317
+ "max_output_tokens": null,
31318
+ "knowledge_cutoff": null,
31319
+ "modalities": {
31320
+ "input": [],
31321
+ "output": []
31322
+ },
31323
+ "capabilities": [
31324
+ "streaming"
31325
+ ],
31326
+ "pricing": {},
31327
+ "metadata": {
31328
+ "version_id": "default",
31329
+ "open_source_category": null,
31330
+ "launch_stage": "PUBLIC_PREVIEW",
31331
+ "supported_actions": {
31332
+ "openNotebook": {
31333
+ "references": {
31334
+ "europe-west1": {
31335
+ "uri": "https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/vision/getting-started/image_segmentation.ipynb"
31336
+ }
31337
+ },
31338
+ "title": "Open Notebook"
31339
+ },
31340
+ "requestAccess": {
31341
+ "references": {
31342
+ "europe-west1": {
31343
+ "uri": "https://docs.google.com/forms/d/e/1FAIpQLSdzIR1EeQGFcMsqd9nPip5e9ovDKSjfWRd58QVjo1zLpfdvEg/viewform?resourcekey=0-Pvqc66u-0Z1QmuzHq4wLKg"
31344
+ }
31345
+ }
31346
+ },
31347
+ "openNotebooks": {
31348
+ "notebooks": [
31349
+ {
31350
+ "references": {
31351
+ "europe-west1": {
31352
+ "uri": "https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/vision/getting-started/image_segmentation.ipynb"
31353
+ }
31354
+ },
31355
+ "title": "Open Notebook"
31356
+ }
31357
+ ]
31358
+ }
31359
+ },
31360
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/image-segmentation-001@default"
31361
+ }
31362
+ },
31363
+ {
31364
+ "id": "imagegeneration",
31365
+ "name": "imagegeneration",
31366
+ "provider": "vertexai",
31367
+ "family": "gemini",
31368
+ "created_at": null,
31369
+ "context_window": null,
31370
+ "max_output_tokens": null,
31371
+ "knowledge_cutoff": null,
31372
+ "modalities": {
31373
+ "input": [],
31374
+ "output": []
31375
+ },
31376
+ "capabilities": [
31377
+ "streaming"
31378
+ ],
31379
+ "pricing": {},
31380
+ "metadata": {
31381
+ "version_id": "006",
31382
+ "open_source_category": "PROPRIETARY",
31383
+ "launch_stage": "PUBLIC_PREVIEW",
31384
+ "supported_actions": {
31385
+ "openGenerationAiStudio": {
31386
+ "references": {
31387
+ "europe-west1": {
31388
+ "uri": "https://cloud.google.com/console/vertex-ai/generative/vision"
31389
+ }
31390
+ },
31391
+ "title": "Open Vertex AI Studio"
31392
+ }
31393
+ },
31394
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagegeneration@006"
31395
+ }
31396
+ },
31397
+ {
31398
+ "id": "imagen-4.0-fast-generate-001",
31399
+ "name": "imagen-4.0-fast-generate-001",
31400
+ "provider": "vertexai",
31401
+ "family": "gemini",
31402
+ "created_at": null,
31403
+ "context_window": null,
31404
+ "max_output_tokens": null,
31405
+ "knowledge_cutoff": null,
31406
+ "modalities": {
31407
+ "input": [],
31408
+ "output": []
31409
+ },
31410
+ "capabilities": [
31411
+ "streaming"
31412
+ ],
31413
+ "pricing": {},
31414
+ "metadata": {
31415
+ "version_id": "default",
31416
+ "open_source_category": null,
31417
+ "launch_stage": "GA",
31418
+ "supported_actions": {
31419
+ "openGenerationAiStudio": {
31420
+ "references": {
31421
+ "europe-west1": {
31422
+ "uri": "https://console.cloud.google.com/vertex-ai/studio/media/generate"
31423
+ }
31424
+ },
31425
+ "title": "Open Vertex AI Studio"
31426
+ }
31427
+ },
31428
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagen-4.0-fast-generate-001@default"
31429
+ }
31430
+ },
31431
+ {
31432
+ "id": "imagen-4.0-generate-001",
31433
+ "name": "imagen-4.0-generate-001",
31434
+ "provider": "vertexai",
31435
+ "family": "gemini",
31436
+ "created_at": null,
31437
+ "context_window": null,
31438
+ "max_output_tokens": null,
31439
+ "knowledge_cutoff": null,
31440
+ "modalities": {
31441
+ "input": [],
31442
+ "output": []
31443
+ },
31444
+ "capabilities": [
31445
+ "streaming"
31446
+ ],
31447
+ "pricing": {},
31448
+ "metadata": {
31449
+ "version_id": "default",
31450
+ "open_source_category": null,
31451
+ "launch_stage": "GA",
31452
+ "supported_actions": {
31453
+ "openGenerationAiStudio": {
31454
+ "references": {
31455
+ "europe-west1": {
31456
+ "uri": "https://console.cloud.google.com/vertex-ai/studio/media/generate"
31457
+ }
31458
+ },
31459
+ "title": "Open Vertex AI Studio"
31460
+ }
31461
+ },
31462
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagen-4.0-generate-001@default"
31463
+ }
31464
+ },
31465
+ {
31466
+ "id": "imagen-4.0-ultra-generate-001",
31467
+ "name": "imagen-4.0-ultra-generate-001",
31468
+ "provider": "vertexai",
31469
+ "family": "gemini",
31470
+ "created_at": null,
31471
+ "context_window": null,
31472
+ "max_output_tokens": null,
31473
+ "knowledge_cutoff": null,
31474
+ "modalities": {
31475
+ "input": [],
31476
+ "output": []
31477
+ },
31478
+ "capabilities": [
31479
+ "streaming"
31480
+ ],
31481
+ "pricing": {},
31482
+ "metadata": {
31483
+ "version_id": "default",
31484
+ "open_source_category": null,
31485
+ "launch_stage": "GA",
31486
+ "supported_actions": {
31487
+ "openGenerationAiStudio": {
31488
+ "references": {
31489
+ "europe-west1": {
31490
+ "uri": "https://console.cloud.google.com/vertex-ai/studio/media/generate"
31491
+ }
31492
+ },
31493
+ "title": "Open Vertex AI Studio"
31494
+ }
31495
+ },
31496
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagen-4.0-ultra-generate-001@default"
31497
+ }
31498
+ },
31499
+ {
31500
+ "id": "imagetext",
31501
+ "name": "imagetext",
31502
+ "provider": "vertexai",
31503
+ "family": "gemini",
31504
+ "created_at": null,
31505
+ "context_window": null,
31506
+ "max_output_tokens": null,
31507
+ "knowledge_cutoff": null,
31508
+ "modalities": {
31509
+ "input": [],
31510
+ "output": []
31511
+ },
31512
+ "capabilities": [
31513
+ "streaming"
31514
+ ],
31515
+ "pricing": {},
31516
+ "metadata": {
31517
+ "version_id": "001",
31518
+ "open_source_category": "PROPRIETARY",
31519
+ "launch_stage": "GA",
31520
+ "supported_actions": {
31521
+ "openGenerationAiStudio": {
31522
+ "references": {
31523
+ "us-central1": {
31524
+ "uri": "https://cloud.google.com/console/vertex-ai/generative/vision"
31525
+ }
31526
+ },
31527
+ "title": "Open Vertex AI Studio"
31528
+ }
31529
+ },
31530
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagetext@001"
31531
+ }
31532
+ },
31533
+ {
31534
+ "id": "multimodalembedding",
31535
+ "name": "multimodalembedding",
31536
+ "provider": "vertexai",
31537
+ "family": "gemini",
31538
+ "created_at": null,
31539
+ "context_window": null,
31540
+ "max_output_tokens": null,
31541
+ "knowledge_cutoff": null,
31542
+ "modalities": {
31543
+ "input": [],
31544
+ "output": []
31545
+ },
31546
+ "capabilities": [
31547
+ "streaming"
31548
+ ],
31549
+ "pricing": {},
31550
+ "metadata": {
31551
+ "version_id": "001",
31552
+ "open_source_category": "PROPRIETARY",
31553
+ "launch_stage": "GA",
31554
+ "supported_actions": null,
31555
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/multimodalembedding@001"
31556
+ }
31557
+ },
31558
+ {
31559
+ "id": "text-bison",
31560
+ "name": "text-bison",
31561
+ "provider": "vertexai",
31562
+ "family": "palm",
31563
+ "created_at": null,
31564
+ "context_window": null,
31565
+ "max_output_tokens": null,
31566
+ "knowledge_cutoff": null,
31567
+ "modalities": {
31568
+ "input": [],
31569
+ "output": []
31570
+ },
31571
+ "capabilities": [
31572
+ "streaming"
31573
+ ],
31574
+ "pricing": {},
31575
+ "metadata": {
31576
+ "version_id": "002",
31577
+ "open_source_category": "PROPRIETARY",
31578
+ "launch_stage": "GA",
31579
+ "supported_actions": {
31580
+ "openGenie": {
31581
+ "references": {
31582
+ "us-central1": {
31583
+ "uri": "https://console.cloud.google.com/vertex-ai/generative/language/create/text"
31584
+ }
31585
+ },
31586
+ "title": "Open Prompt Design"
31587
+ },
31588
+ "openEvaluationPipeline": {
31589
+ "references": {
31590
+ "us-central1": {
31591
+ "uri": "https://console.cloud.google.com/vertex-ai/pipelines/vertex-ai-templates/evaluation-llm-text-generation-pipeline"
31592
+ }
31593
+ },
31594
+ "title": "Evaluate"
31595
+ }
31596
+ },
31597
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/text-bison@002"
31598
+ }
31599
+ },
31600
+ {
31601
+ "id": "text-embedding-004",
31602
+ "name": "text-embedding-004",
31603
+ "provider": "vertexai",
31604
+ "family": "text-embedding",
31605
+ "created_at": null,
31606
+ "context_window": null,
31607
+ "max_output_tokens": null,
31608
+ "knowledge_cutoff": null,
31609
+ "modalities": {
31610
+ "input": [],
31611
+ "output": []
31612
+ },
31613
+ "capabilities": [
31614
+ "streaming",
31615
+ "function_calling"
31616
+ ],
31617
+ "pricing": {},
31618
+ "metadata": {
31619
+ "source": "known_models"
31620
+ }
31621
+ },
31622
+ {
31623
+ "id": "text-embedding-005",
31624
+ "name": "text-embedding-005",
31625
+ "provider": "vertexai",
31626
+ "family": "text-embedding",
31627
+ "created_at": null,
31628
+ "context_window": null,
31629
+ "max_output_tokens": null,
31630
+ "knowledge_cutoff": null,
31631
+ "modalities": {
31632
+ "input": [],
31633
+ "output": []
31634
+ },
31635
+ "capabilities": [
31636
+ "streaming",
31637
+ "function_calling"
31638
+ ],
31639
+ "pricing": {},
31640
+ "metadata": {
31641
+ "source": "known_models"
31642
+ }
31643
+ },
31644
+ {
31645
+ "id": "text-multilingual-embedding-002",
31646
+ "name": "text-multilingual-embedding-002",
31647
+ "provider": "vertexai",
31648
+ "family": "gemini",
31649
+ "created_at": null,
31650
+ "context_window": null,
31651
+ "max_output_tokens": null,
31652
+ "knowledge_cutoff": null,
31653
+ "modalities": {
31654
+ "input": [],
31655
+ "output": []
31656
+ },
31657
+ "capabilities": [
31658
+ "streaming",
31659
+ "function_calling"
31660
+ ],
31661
+ "pricing": {},
31662
+ "metadata": {
31663
+ "source": "known_models"
31664
+ }
31665
+ },
31666
+ {
31667
+ "id": "text-unicorn",
31668
+ "name": "text-unicorn",
31669
+ "provider": "vertexai",
31670
+ "family": "gemini",
31671
+ "created_at": null,
31672
+ "context_window": null,
31673
+ "max_output_tokens": null,
31674
+ "knowledge_cutoff": null,
31675
+ "modalities": {
31676
+ "input": [],
31677
+ "output": []
31678
+ },
31679
+ "capabilities": [
31680
+ "streaming"
31681
+ ],
31682
+ "pricing": {},
31683
+ "metadata": {
31684
+ "version_id": "001",
31685
+ "open_source_category": "PROPRIETARY",
31686
+ "launch_stage": "GA",
31687
+ "supported_actions": {
31688
+ "openGenie": {
31689
+ "references": {
31690
+ "europe-west1": {
31691
+ "uri": "https://console.cloud.google.com/vertex-ai/generative/language/create/text"
31692
+ }
31693
+ },
31694
+ "title": "Open in Vertex AI Studio"
31695
+ },
31696
+ "openEvaluationPipeline": {
31697
+ "references": {
31698
+ "europe-west1": {
31699
+ "uri": "https://console.cloud.google.com/vertex-ai/pipelines/vertex-ai-templates/evaluation-llm-text-generation-pipeline"
31700
+ }
31701
+ },
31702
+ "title": "Evaluate"
31703
+ }
31704
+ },
31705
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/text-unicorn@001"
31706
+ }
31707
+ },
31708
+ {
31709
+ "id": "textembedding-gecko",
31710
+ "name": "textembedding-gecko",
31711
+ "provider": "vertexai",
31712
+ "family": "gemini",
31713
+ "created_at": null,
31714
+ "context_window": null,
31715
+ "max_output_tokens": null,
31716
+ "knowledge_cutoff": null,
31717
+ "modalities": {
31718
+ "input": [],
31719
+ "output": []
31720
+ },
31721
+ "capabilities": [
31722
+ "streaming"
31723
+ ],
31724
+ "pricing": {},
31725
+ "metadata": {
31726
+ "version_id": "003",
31727
+ "open_source_category": "PROPRIETARY",
31728
+ "launch_stage": "GA",
31729
+ "supported_actions": null,
31730
+ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/textembedding-gecko@003"
31731
+ }
29923
31732
  }
29924
31733
  ]