ruby_llm 1.6.1 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +73 -91
  3. data/lib/ruby_llm/active_record/acts_as.rb +3 -11
  4. data/lib/ruby_llm/aliases.json +4 -0
  5. data/lib/ruby_llm/aliases.rb +7 -25
  6. data/lib/ruby_llm/chat.rb +3 -19
  7. data/lib/ruby_llm/configuration.rb +1 -14
  8. data/lib/ruby_llm/content.rb +1 -3
  9. data/lib/ruby_llm/embedding.rb +1 -2
  10. data/lib/ruby_llm/error.rb +0 -10
  11. data/lib/ruby_llm/image.rb +0 -4
  12. data/lib/ruby_llm/message.rb +2 -4
  13. data/lib/ruby_llm/model/info.rb +0 -10
  14. data/lib/ruby_llm/model/pricing.rb +0 -3
  15. data/lib/ruby_llm/model/pricing_category.rb +0 -2
  16. data/lib/ruby_llm/model/pricing_tier.rb +0 -1
  17. data/lib/ruby_llm/models.json +668 -434
  18. data/lib/ruby_llm/models.rb +6 -18
  19. data/lib/ruby_llm/provider.rb +1 -5
  20. data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -46
  21. data/lib/ruby_llm/providers/anthropic/media.rb +0 -1
  22. data/lib/ruby_llm/providers/anthropic/tools.rb +0 -1
  23. data/lib/ruby_llm/providers/anthropic.rb +1 -2
  24. data/lib/ruby_llm/providers/bedrock/chat.rb +0 -2
  25. data/lib/ruby_llm/providers/bedrock/media.rb +0 -1
  26. data/lib/ruby_llm/providers/bedrock/models.rb +0 -2
  27. data/lib/ruby_llm/providers/bedrock/streaming/base.rb +1 -13
  28. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -7
  29. data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -12
  30. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -12
  31. data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -13
  32. data/lib/ruby_llm/providers/bedrock/streaming.rb +0 -18
  33. data/lib/ruby_llm/providers/bedrock.rb +1 -2
  34. data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -2
  35. data/lib/ruby_llm/providers/deepseek/chat.rb +0 -1
  36. data/lib/ruby_llm/providers/gemini/capabilities.rb +26 -101
  37. data/lib/ruby_llm/providers/gemini/chat.rb +12 -8
  38. data/lib/ruby_llm/providers/gemini/embeddings.rb +0 -2
  39. data/lib/ruby_llm/providers/gemini/images.rb +0 -1
  40. data/lib/ruby_llm/providers/gemini/media.rb +0 -1
  41. data/lib/ruby_llm/providers/gemini/models.rb +1 -2
  42. data/lib/ruby_llm/providers/gemini/streaming.rb +4 -1
  43. data/lib/ruby_llm/providers/gemini/tools.rb +0 -5
  44. data/lib/ruby_llm/providers/gpustack/chat.rb +0 -1
  45. data/lib/ruby_llm/providers/gpustack/models.rb +3 -4
  46. data/lib/ruby_llm/providers/mistral/capabilities.rb +2 -10
  47. data/lib/ruby_llm/providers/mistral/chat.rb +0 -2
  48. data/lib/ruby_llm/providers/mistral/embeddings.rb +0 -3
  49. data/lib/ruby_llm/providers/mistral/models.rb +0 -1
  50. data/lib/ruby_llm/providers/ollama/chat.rb +0 -1
  51. data/lib/ruby_llm/providers/ollama/media.rb +0 -1
  52. data/lib/ruby_llm/providers/openai/capabilities.rb +2 -17
  53. data/lib/ruby_llm/providers/openai/chat.rb +0 -3
  54. data/lib/ruby_llm/providers/openai/embeddings.rb +0 -3
  55. data/lib/ruby_llm/providers/openai/media.rb +0 -1
  56. data/lib/ruby_llm/providers/openai.rb +1 -3
  57. data/lib/ruby_llm/providers/openrouter/models.rb +1 -16
  58. data/lib/ruby_llm/providers/perplexity/capabilities.rb +0 -1
  59. data/lib/ruby_llm/providers/perplexity/chat.rb +0 -1
  60. data/lib/ruby_llm/providers/perplexity.rb +1 -5
  61. data/lib/ruby_llm/railtie.rb +0 -1
  62. data/lib/ruby_llm/stream_accumulator.rb +3 -5
  63. data/lib/ruby_llm/streaming.rb +16 -25
  64. data/lib/ruby_llm/tool.rb +2 -19
  65. data/lib/ruby_llm/tool_call.rb +0 -9
  66. data/lib/ruby_llm/version.rb +1 -1
  67. data/lib/ruby_llm.rb +0 -2
  68. data/lib/tasks/aliases.rake +7 -37
  69. data/lib/tasks/models_docs.rake +5 -15
  70. data/lib/tasks/models_update.rake +1 -1
  71. data/lib/tasks/vcr.rake +0 -7
  72. metadata +1 -1
@@ -33,31 +33,37 @@
33
33
  },
34
34
  {
35
35
  "id": "claude-3-5-sonnet-20240620",
36
- "name": "Claude Sonnet 3.5",
36
+ "name": "Claude Sonnet 3.5 (Old)",
37
37
  "provider": "anthropic",
38
38
  "family": "claude-3-5-sonnet",
39
- "created_at": null,
39
+ "created_at": "2024-06-20 00:00:00 UTC",
40
40
  "context_window": 200000,
41
41
  "max_output_tokens": 8192,
42
42
  "knowledge_cutoff": null,
43
43
  "modalities": {
44
44
  "input": [
45
+ "text",
45
46
  "image",
46
- "text"
47
+ "pdf"
47
48
  ],
48
49
  "output": [
49
50
  "text"
50
51
  ]
51
52
  },
52
53
  "capabilities": [
53
- "function_calling"
54
+ "streaming",
55
+ "function_calling",
56
+ "batch"
54
57
  ],
55
58
  "pricing": {
56
59
  "text_tokens": {
57
60
  "standard": {
58
61
  "input_per_million": 3.0,
59
- "cached_input_per_million": 3.75,
60
62
  "output_per_million": 15.0
63
+ },
64
+ "batch": {
65
+ "input_per_million": 1.5,
66
+ "output_per_million": 7.5
61
67
  }
62
68
  }
63
69
  },
@@ -65,31 +71,37 @@
65
71
  },
66
72
  {
67
73
  "id": "claude-3-5-sonnet-20241022",
68
- "name": "Claude Sonnet 3.5",
74
+ "name": "Claude Sonnet 3.5 (New)",
69
75
  "provider": "anthropic",
70
76
  "family": "claude-3-5-sonnet",
71
- "created_at": null,
77
+ "created_at": "2024-10-22 00:00:00 UTC",
72
78
  "context_window": 200000,
73
79
  "max_output_tokens": 8192,
74
80
  "knowledge_cutoff": null,
75
81
  "modalities": {
76
82
  "input": [
83
+ "text",
77
84
  "image",
78
- "text"
85
+ "pdf"
79
86
  ],
80
87
  "output": [
81
88
  "text"
82
89
  ]
83
90
  },
84
91
  "capabilities": [
85
- "function_calling"
92
+ "streaming",
93
+ "function_calling",
94
+ "batch"
86
95
  ],
87
96
  "pricing": {
88
97
  "text_tokens": {
89
98
  "standard": {
90
99
  "input_per_million": 3.0,
91
- "cached_input_per_million": 3.75,
92
100
  "output_per_million": 15.0
101
+ },
102
+ "batch": {
103
+ "input_per_million": 1.5,
104
+ "output_per_million": 7.5
93
105
  }
94
106
  }
95
107
  },
@@ -4127,7 +4139,7 @@
4127
4139
  },
4128
4140
  {
4129
4141
  "id": "imagen-3.0-generate-002",
4130
- "name": "Imagen 3.0 002 model",
4142
+ "name": "Imagen 3.0",
4131
4143
  "provider": "gemini",
4132
4144
  "family": "imagen3",
4133
4145
  "created_at": null,
@@ -4156,6 +4168,37 @@
4156
4168
  ]
4157
4169
  }
4158
4170
  },
4171
+ {
4172
+ "id": "imagen-4.0-generate-001",
4173
+ "name": "Imagen 4",
4174
+ "provider": "gemini",
4175
+ "family": "other",
4176
+ "created_at": null,
4177
+ "context_window": 480,
4178
+ "max_output_tokens": 8192,
4179
+ "knowledge_cutoff": null,
4180
+ "modalities": {
4181
+ "input": [
4182
+ "text",
4183
+ "image",
4184
+ "pdf"
4185
+ ],
4186
+ "output": [
4187
+ "image"
4188
+ ]
4189
+ },
4190
+ "capabilities": [
4191
+ "streaming"
4192
+ ],
4193
+ "pricing": {},
4194
+ "metadata": {
4195
+ "version": "001",
4196
+ "description": "Vertex served Imagen 4.0 model",
4197
+ "supported_generation_methods": [
4198
+ "predict"
4199
+ ]
4200
+ }
4201
+ },
4159
4202
  {
4160
4203
  "id": "imagen-4.0-generate-preview-06-06",
4161
4204
  "name": "Imagen 4 (Preview)",
@@ -4302,46 +4345,6 @@
4302
4345
  ]
4303
4346
  }
4304
4347
  },
4305
- {
4306
- "id": "veo-2.0-generate-001",
4307
- "name": "Veo 2",
4308
- "provider": "gemini",
4309
- "family": "other",
4310
- "created_at": null,
4311
- "context_window": 480,
4312
- "max_output_tokens": 8192,
4313
- "knowledge_cutoff": null,
4314
- "modalities": {
4315
- "input": [
4316
- "text"
4317
- ],
4318
- "output": [
4319
- "text"
4320
- ]
4321
- },
4322
- "capabilities": [
4323
- "streaming"
4324
- ],
4325
- "pricing": {
4326
- "text_tokens": {
4327
- "standard": {
4328
- "input_per_million": 0.075,
4329
- "output_per_million": 0.3
4330
- },
4331
- "batch": {
4332
- "input_per_million": 0.0375,
4333
- "output_per_million": 0.15
4334
- }
4335
- }
4336
- },
4337
- "metadata": {
4338
- "version": "2.0",
4339
- "description": "Vertex served Veo 2 model. Access to this model requires billing to be enabled on the associated Google Cloud Platform account. Please visit https://console.cloud.google.com/billing to enable it.",
4340
- "supported_generation_methods": [
4341
- "predictLongRunning"
4342
- ]
4343
- }
4344
- },
4345
4348
  {
4346
4349
  "id": "codestral-2411-rc5",
4347
4350
  "name": "Codestral",
@@ -6700,29 +6703,31 @@
6700
6703
  "id": "gpt-4",
6701
6704
  "name": "GPT-4",
6702
6705
  "provider": "openai",
6703
- "family": "gpt4",
6704
- "created_at": "2023-06-27 18:13:31 +0200",
6706
+ "family": "gpt-4",
6707
+ "created_at": null,
6705
6708
  "context_window": 8192,
6706
6709
  "max_output_tokens": 8192,
6707
6710
  "knowledge_cutoff": null,
6708
6711
  "modalities": {
6709
6712
  "input": [
6710
- "text",
6711
- "image",
6712
- "pdf"
6713
+ "text"
6713
6714
  ],
6714
6715
  "output": [
6716
+ "embeddings",
6715
6717
  "text"
6716
6718
  ]
6717
6719
  },
6718
6720
  "capabilities": [
6719
- "streaming",
6720
- "function_calling"
6721
+ "batch"
6721
6722
  ],
6722
6723
  "pricing": {
6723
6724
  "text_tokens": {
6724
6725
  "standard": {
6725
- "input_per_million": 10.0,
6726
+ "input_per_million": 30.0,
6727
+ "output_per_million": 60.0
6728
+ },
6729
+ "batch": {
6730
+ "input_per_million": 15.0,
6726
6731
  "output_per_million": 30.0
6727
6732
  }
6728
6733
  }
@@ -6767,29 +6772,34 @@
6767
6772
  },
6768
6773
  {
6769
6774
  "id": "gpt-4-0613",
6770
- "name": "GPT-4 0613",
6775
+ "name": "GPT-4",
6771
6776
  "provider": "openai",
6772
- "family": "other",
6773
- "created_at": "2023-06-12 18:54:56 +0200",
6774
- "context_window": 4096,
6775
- "max_output_tokens": 16384,
6777
+ "family": "gpt-4",
6778
+ "created_at": null,
6779
+ "context_window": 8192,
6780
+ "max_output_tokens": 8192,
6776
6781
  "knowledge_cutoff": null,
6777
6782
  "modalities": {
6778
6783
  "input": [
6779
6784
  "text"
6780
6785
  ],
6781
6786
  "output": [
6787
+ "embeddings",
6782
6788
  "text"
6783
6789
  ]
6784
6790
  },
6785
6791
  "capabilities": [
6786
- "streaming"
6792
+ "batch"
6787
6793
  ],
6788
6794
  "pricing": {
6789
6795
  "text_tokens": {
6790
6796
  "standard": {
6791
- "input_per_million": 0.5,
6792
- "output_per_million": 1.5
6797
+ "input_per_million": 30.0,
6798
+ "output_per_million": 60.0
6799
+ },
6800
+ "batch": {
6801
+ "input_per_million": 15.0,
6802
+ "output_per_million": 30.0
6793
6803
  }
6794
6804
  }
6795
6805
  },
@@ -6915,25 +6925,21 @@
6915
6925
  "id": "gpt-4-turbo-preview",
6916
6926
  "name": "GPT-4 Turbo Preview",
6917
6927
  "provider": "openai",
6918
- "family": "gpt4_turbo",
6919
- "created_at": "2024-01-23 20:22:57 +0100",
6928
+ "family": "gpt-4-turbo-preview",
6929
+ "created_at": null,
6920
6930
  "context_window": 128000,
6921
6931
  "max_output_tokens": 4096,
6922
6932
  "knowledge_cutoff": null,
6923
6933
  "modalities": {
6924
6934
  "input": [
6925
- "text",
6926
- "image",
6927
- "pdf"
6935
+ "text"
6928
6936
  ],
6929
6937
  "output": [
6938
+ "embeddings",
6930
6939
  "text"
6931
6940
  ]
6932
6941
  },
6933
- "capabilities": [
6934
- "streaming",
6935
- "function_calling"
6936
- ],
6942
+ "capabilities": [],
6937
6943
  "pricing": {
6938
6944
  "text_tokens": {
6939
6945
  "standard": {
@@ -6951,23 +6957,23 @@
6951
6957
  "id": "gpt-4.1",
6952
6958
  "name": "GPT-4.1",
6953
6959
  "provider": "openai",
6954
- "family": "gpt41",
6955
- "created_at": "2025-04-10 22:22:22 +0200",
6960
+ "family": "gpt-4.1",
6961
+ "created_at": null,
6956
6962
  "context_window": 1047576,
6957
6963
  "max_output_tokens": 32768,
6958
6964
  "knowledge_cutoff": null,
6959
6965
  "modalities": {
6960
6966
  "input": [
6961
- "text",
6962
6967
  "image",
6963
- "pdf"
6968
+ "text"
6964
6969
  ],
6965
6970
  "output": [
6971
+ "embeddings",
6966
6972
  "text"
6967
6973
  ]
6968
6974
  },
6969
6975
  "capabilities": [
6970
- "streaming",
6976
+ "batch",
6971
6977
  "function_calling",
6972
6978
  "structured_output"
6973
6979
  ],
@@ -6975,8 +6981,12 @@
6975
6981
  "text_tokens": {
6976
6982
  "standard": {
6977
6983
  "input_per_million": 2.0,
6978
- "output_per_million": 8.0,
6979
- "cached_input_per_million": 0.5
6984
+ "cached_input_per_million": 0.5,
6985
+ "output_per_million": 8.0
6986
+ },
6987
+ "batch": {
6988
+ "input_per_million": 1.0,
6989
+ "output_per_million": 4.0
6980
6990
  }
6981
6991
  }
6982
6992
  },
@@ -6987,25 +6997,25 @@
6987
6997
  },
6988
6998
  {
6989
6999
  "id": "gpt-4.1-2025-04-14",
6990
- "name": "GPT-4.1 20250414",
7000
+ "name": "GPT-4.1",
6991
7001
  "provider": "openai",
6992
- "family": "gpt41",
6993
- "created_at": "2025-04-10 22:09:06 +0200",
7002
+ "family": "gpt-4.1",
7003
+ "created_at": null,
6994
7004
  "context_window": 1047576,
6995
7005
  "max_output_tokens": 32768,
6996
7006
  "knowledge_cutoff": null,
6997
7007
  "modalities": {
6998
7008
  "input": [
6999
- "text",
7000
7009
  "image",
7001
- "pdf"
7010
+ "text"
7002
7011
  ],
7003
7012
  "output": [
7013
+ "embeddings",
7004
7014
  "text"
7005
7015
  ]
7006
7016
  },
7007
7017
  "capabilities": [
7008
- "streaming",
7018
+ "batch",
7009
7019
  "function_calling",
7010
7020
  "structured_output"
7011
7021
  ],
@@ -7013,8 +7023,12 @@
7013
7023
  "text_tokens": {
7014
7024
  "standard": {
7015
7025
  "input_per_million": 2.0,
7016
- "output_per_million": 8.0,
7017
- "cached_input_per_million": 0.5
7026
+ "cached_input_per_million": 0.5,
7027
+ "output_per_million": 8.0
7028
+ },
7029
+ "batch": {
7030
+ "input_per_million": 1.0,
7031
+ "output_per_million": 4.0
7018
7032
  }
7019
7033
  }
7020
7034
  },
@@ -8296,30 +8310,38 @@
8296
8310
  },
8297
8311
  {
8298
8312
  "id": "gpt-5-mini",
8299
- "name": "GPT-5 Mini",
8313
+ "name": "GPT-5 mini",
8300
8314
  "provider": "openai",
8301
- "family": "other",
8302
- "created_at": "2025-08-05 22:32:08 +0200",
8303
- "context_window": 4096,
8304
- "max_output_tokens": 16384,
8315
+ "family": "gpt-5-mini",
8316
+ "created_at": null,
8317
+ "context_window": 400000,
8318
+ "max_output_tokens": 128000,
8305
8319
  "knowledge_cutoff": null,
8306
8320
  "modalities": {
8307
8321
  "input": [
8322
+ "image",
8308
8323
  "text"
8309
8324
  ],
8310
8325
  "output": [
8326
+ "embeddings",
8311
8327
  "text"
8312
8328
  ]
8313
8329
  },
8314
8330
  "capabilities": [
8315
- "streaming",
8316
- "reasoning"
8331
+ "batch",
8332
+ "function_calling",
8333
+ "structured_output"
8317
8334
  ],
8318
8335
  "pricing": {
8319
8336
  "text_tokens": {
8320
8337
  "standard": {
8321
- "input_per_million": 0.5,
8322
- "output_per_million": 1.5
8338
+ "input_per_million": 0.25,
8339
+ "cached_input_per_million": 0.025,
8340
+ "output_per_million": 2.0
8341
+ },
8342
+ "batch": {
8343
+ "input_per_million": 0.125,
8344
+ "output_per_million": 1.0
8323
8345
  }
8324
8346
  }
8325
8347
  },
@@ -8330,30 +8352,38 @@
8330
8352
  },
8331
8353
  {
8332
8354
  "id": "gpt-5-mini-2025-08-07",
8333
- "name": "GPT-5 Mini 20250807",
8355
+ "name": "GPT-5 mini",
8334
8356
  "provider": "openai",
8335
- "family": "other",
8336
- "created_at": "2025-08-05 22:31:07 +0200",
8337
- "context_window": 4096,
8338
- "max_output_tokens": 16384,
8357
+ "family": "gpt-5-mini",
8358
+ "created_at": null,
8359
+ "context_window": 400000,
8360
+ "max_output_tokens": 128000,
8339
8361
  "knowledge_cutoff": null,
8340
8362
  "modalities": {
8341
8363
  "input": [
8364
+ "image",
8342
8365
  "text"
8343
8366
  ],
8344
8367
  "output": [
8368
+ "embeddings",
8345
8369
  "text"
8346
8370
  ]
8347
8371
  },
8348
8372
  "capabilities": [
8349
- "streaming",
8350
- "reasoning"
8373
+ "batch",
8374
+ "function_calling",
8375
+ "structured_output"
8351
8376
  ],
8352
8377
  "pricing": {
8353
8378
  "text_tokens": {
8354
8379
  "standard": {
8355
- "input_per_million": 0.5,
8356
- "output_per_million": 1.5
8380
+ "input_per_million": 0.25,
8381
+ "cached_input_per_million": 0.025,
8382
+ "output_per_million": 2.0
8383
+ },
8384
+ "batch": {
8385
+ "input_per_million": 0.125,
8386
+ "output_per_million": 1.0
8357
8387
  }
8358
8388
  }
8359
8389
  },
@@ -8364,30 +8394,38 @@
8364
8394
  },
8365
8395
  {
8366
8396
  "id": "gpt-5-nano",
8367
- "name": "GPT-5 Nano",
8397
+ "name": "GPT-5 nano",
8368
8398
  "provider": "openai",
8369
- "family": "other",
8370
- "created_at": "2025-08-05 22:39:44 +0200",
8371
- "context_window": 4096,
8372
- "max_output_tokens": 16384,
8399
+ "family": "gpt-5-nano",
8400
+ "created_at": null,
8401
+ "context_window": 400000,
8402
+ "max_output_tokens": 128000,
8373
8403
  "knowledge_cutoff": null,
8374
8404
  "modalities": {
8375
8405
  "input": [
8406
+ "image",
8376
8407
  "text"
8377
8408
  ],
8378
8409
  "output": [
8410
+ "embeddings",
8379
8411
  "text"
8380
8412
  ]
8381
8413
  },
8382
8414
  "capabilities": [
8383
- "streaming",
8384
- "reasoning"
8415
+ "batch",
8416
+ "function_calling",
8417
+ "structured_output"
8385
8418
  ],
8386
8419
  "pricing": {
8387
8420
  "text_tokens": {
8388
8421
  "standard": {
8389
- "input_per_million": 0.5,
8390
- "output_per_million": 1.5
8422
+ "input_per_million": 0.05,
8423
+ "cached_input_per_million": 0.005,
8424
+ "output_per_million": 0.4
8425
+ },
8426
+ "batch": {
8427
+ "input_per_million": 0.025,
8428
+ "output_per_million": 0.2
8391
8429
  }
8392
8430
  }
8393
8431
  },
@@ -8398,30 +8436,38 @@
8398
8436
  },
8399
8437
  {
8400
8438
  "id": "gpt-5-nano-2025-08-07",
8401
- "name": "GPT-5 Nano 20250807",
8439
+ "name": "GPT-5 nano",
8402
8440
  "provider": "openai",
8403
- "family": "other",
8404
- "created_at": "2025-08-05 22:38:23 +0200",
8405
- "context_window": 4096,
8406
- "max_output_tokens": 16384,
8441
+ "family": "gpt-5-nano",
8442
+ "created_at": null,
8443
+ "context_window": 400000,
8444
+ "max_output_tokens": 128000,
8407
8445
  "knowledge_cutoff": null,
8408
8446
  "modalities": {
8409
8447
  "input": [
8448
+ "image",
8410
8449
  "text"
8411
8450
  ],
8412
8451
  "output": [
8452
+ "embeddings",
8413
8453
  "text"
8414
8454
  ]
8415
8455
  },
8416
8456
  "capabilities": [
8417
- "streaming",
8418
- "reasoning"
8457
+ "batch",
8458
+ "function_calling",
8459
+ "structured_output"
8419
8460
  ],
8420
8461
  "pricing": {
8421
8462
  "text_tokens": {
8422
8463
  "standard": {
8423
- "input_per_million": 0.5,
8424
- "output_per_million": 1.5
8464
+ "input_per_million": 0.05,
8465
+ "cached_input_per_million": 0.005,
8466
+ "output_per_million": 0.4
8467
+ },
8468
+ "batch": {
8469
+ "input_per_million": 0.025,
8470
+ "output_per_million": 0.2
8425
8471
  }
8426
8472
  }
8427
8473
  },
@@ -8491,35 +8537,65 @@
8491
8537
  "metadata": {}
8492
8538
  },
8493
8539
  {
8494
- "id": "o1",
8495
- "name": "O1",
8540
+ "id": "gpt-oss-20b",
8541
+ "name": "gpt-oss-20b",
8496
8542
  "provider": "openai",
8497
- "family": "o1",
8498
- "created_at": "2024-12-16 20:03:36 +0100",
8499
- "context_window": 200000,
8500
- "max_output_tokens": 100000,
8543
+ "family": "gpt-oss-20b",
8544
+ "created_at": null,
8545
+ "context_window": 131072,
8546
+ "max_output_tokens": 131072,
8501
8547
  "knowledge_cutoff": null,
8502
8548
  "modalities": {
8503
8549
  "input": [
8504
- "text",
8505
- "image",
8506
- "pdf"
8550
+ "text"
8507
8551
  ],
8508
8552
  "output": [
8553
+ "embeddings",
8509
8554
  "text"
8510
8555
  ]
8511
8556
  },
8512
8557
  "capabilities": [
8513
- "streaming",
8558
+ "batch",
8514
8559
  "function_calling",
8515
- "structured_output",
8516
- "reasoning"
8560
+ "structured_output"
8561
+ ],
8562
+ "pricing": {},
8563
+ "metadata": {}
8564
+ },
8565
+ {
8566
+ "id": "o1",
8567
+ "name": "o1",
8568
+ "provider": "openai",
8569
+ "family": "o1",
8570
+ "created_at": null,
8571
+ "context_window": 200000,
8572
+ "max_output_tokens": 100000,
8573
+ "knowledge_cutoff": null,
8574
+ "modalities": {
8575
+ "input": [
8576
+ "image",
8577
+ "text"
8578
+ ],
8579
+ "output": [
8580
+ "embeddings",
8581
+ "text"
8582
+ ]
8583
+ },
8584
+ "capabilities": [
8585
+ "batch",
8586
+ "function_calling",
8587
+ "structured_output"
8517
8588
  ],
8518
8589
  "pricing": {
8519
8590
  "text_tokens": {
8520
8591
  "standard": {
8521
8592
  "input_per_million": 15.0,
8593
+ "cached_input_per_million": 7.5,
8522
8594
  "output_per_million": 60.0
8595
+ },
8596
+ "batch": {
8597
+ "input_per_million": 7.5,
8598
+ "output_per_million": 30.0
8523
8599
  }
8524
8600
  }
8525
8601
  },
@@ -8530,34 +8606,38 @@
8530
8606
  },
8531
8607
  {
8532
8608
  "id": "o1-2024-12-17",
8533
- "name": "O1-20241217",
8609
+ "name": "o1",
8534
8610
  "provider": "openai",
8535
8611
  "family": "o1",
8536
- "created_at": "2024-12-16 06:29:36 +0100",
8612
+ "created_at": null,
8537
8613
  "context_window": 200000,
8538
8614
  "max_output_tokens": 100000,
8539
8615
  "knowledge_cutoff": null,
8540
8616
  "modalities": {
8541
8617
  "input": [
8542
- "text",
8543
8618
  "image",
8544
- "pdf"
8619
+ "text"
8545
8620
  ],
8546
8621
  "output": [
8622
+ "embeddings",
8547
8623
  "text"
8548
8624
  ]
8549
8625
  },
8550
8626
  "capabilities": [
8551
- "streaming",
8627
+ "batch",
8552
8628
  "function_calling",
8553
- "structured_output",
8554
- "reasoning"
8629
+ "structured_output"
8555
8630
  ],
8556
8631
  "pricing": {
8557
8632
  "text_tokens": {
8558
8633
  "standard": {
8559
8634
  "input_per_million": 15.0,
8635
+ "cached_input_per_million": 7.5,
8560
8636
  "output_per_million": 60.0
8637
+ },
8638
+ "batch": {
8639
+ "input_per_million": 7.5,
8640
+ "output_per_million": 30.0
8561
8641
  }
8562
8642
  }
8563
8643
  },
@@ -8632,6 +8712,72 @@
8632
8712
  "owned_by": "system"
8633
8713
  }
8634
8714
  },
8715
+ {
8716
+ "id": "o1-preview",
8717
+ "name": "o1 Preview",
8718
+ "provider": "openai",
8719
+ "family": "o1-preview",
8720
+ "created_at": null,
8721
+ "context_window": 128000,
8722
+ "max_output_tokens": 32768,
8723
+ "knowledge_cutoff": null,
8724
+ "modalities": {
8725
+ "input": [
8726
+ "text"
8727
+ ],
8728
+ "output": [
8729
+ "embeddings",
8730
+ "text"
8731
+ ]
8732
+ },
8733
+ "capabilities": [
8734
+ "function_calling",
8735
+ "structured_output"
8736
+ ],
8737
+ "pricing": {
8738
+ "text_tokens": {
8739
+ "standard": {
8740
+ "input_per_million": 15.0,
8741
+ "cached_input_per_million": 7.5,
8742
+ "output_per_million": 60.0
8743
+ }
8744
+ }
8745
+ },
8746
+ "metadata": {}
8747
+ },
8748
+ {
8749
+ "id": "o1-preview-2024-09-12",
8750
+ "name": "o1 Preview",
8751
+ "provider": "openai",
8752
+ "family": "o1-preview",
8753
+ "created_at": null,
8754
+ "context_window": 128000,
8755
+ "max_output_tokens": 32768,
8756
+ "knowledge_cutoff": null,
8757
+ "modalities": {
8758
+ "input": [
8759
+ "text"
8760
+ ],
8761
+ "output": [
8762
+ "embeddings",
8763
+ "text"
8764
+ ]
8765
+ },
8766
+ "capabilities": [
8767
+ "function_calling",
8768
+ "structured_output"
8769
+ ],
8770
+ "pricing": {
8771
+ "text_tokens": {
8772
+ "standard": {
8773
+ "input_per_million": 15.0,
8774
+ "cached_input_per_million": 7.5,
8775
+ "output_per_million": 60.0
8776
+ }
8777
+ }
8778
+ },
8779
+ "metadata": {}
8780
+ },
8635
8781
  {
8636
8782
  "id": "o1-pro",
8637
8783
  "name": "o1-pro",
@@ -9235,22 +9381,21 @@
9235
9381
  },
9236
9382
  {
9237
9383
  "id": "omni-moderation-latest",
9238
- "name": "Omni Moderation Latest",
9384
+ "name": "omni-moderation",
9239
9385
  "provider": "openai",
9240
- "family": "moderation",
9241
- "created_at": "2024-11-15 17:47:45 +0100",
9386
+ "family": "omni-moderation-latest",
9387
+ "created_at": null,
9242
9388
  "context_window": null,
9243
9389
  "max_output_tokens": null,
9244
9390
  "knowledge_cutoff": null,
9245
9391
  "modalities": {
9246
9392
  "input": [
9247
- "text",
9248
9393
  "image",
9249
- "pdf"
9394
+ "text"
9250
9395
  ],
9251
9396
  "output": [
9252
- "text",
9253
- "moderation"
9397
+ "embeddings",
9398
+ "text"
9254
9399
  ]
9255
9400
  },
9256
9401
  "capabilities": [],
@@ -9262,10 +9407,10 @@
9262
9407
  },
9263
9408
  {
9264
9409
  "id": "text-embedding-3-large",
9265
- "name": "text-embedding- 3 Large",
9410
+ "name": "text-embedding-3-large",
9266
9411
  "provider": "openai",
9267
- "family": "embedding3_large",
9268
- "created_at": "2024-01-22 20:53:00 +0100",
9412
+ "family": "text-embedding-3-large",
9413
+ "created_at": null,
9269
9414
  "context_window": null,
9270
9415
  "max_output_tokens": null,
9271
9416
  "knowledge_cutoff": null,
@@ -9274,8 +9419,8 @@
9274
9419
  "text"
9275
9420
  ],
9276
9421
  "output": [
9277
- "text",
9278
- "embeddings"
9422
+ "embeddings",
9423
+ "text"
9279
9424
  ]
9280
9425
  },
9281
9426
  "capabilities": [
@@ -9284,12 +9429,18 @@
9284
9429
  "pricing": {
9285
9430
  "text_tokens": {
9286
9431
  "standard": {
9287
- "input_per_million": 0.13,
9288
- "output_per_million": 0.13
9432
+ "input_per_million": 0.13
9289
9433
  },
9290
9434
  "batch": {
9291
- "input_per_million": 0.065,
9292
- "output_per_million": 0.065
9435
+ "input_per_million": 0.065
9436
+ }
9437
+ },
9438
+ "embeddings": {
9439
+ "standard": {
9440
+ "input_per_million": 0.13
9441
+ },
9442
+ "batch": {
9443
+ "input_per_million": 0.065
9293
9444
  }
9294
9445
  }
9295
9446
  },
@@ -9344,10 +9495,10 @@
9344
9495
  },
9345
9496
  {
9346
9497
  "id": "text-embedding-ada-002",
9347
- "name": "text-embedding- Ada 002",
9498
+ "name": "text-embedding-ada-002",
9348
9499
  "provider": "openai",
9349
- "family": "embedding_ada",
9350
- "created_at": "2022-12-16 20:01:39 +0100",
9500
+ "family": "text-embedding-ada-002",
9501
+ "created_at": null,
9351
9502
  "context_window": null,
9352
9503
  "max_output_tokens": null,
9353
9504
  "knowledge_cutoff": null,
@@ -9356,8 +9507,8 @@
9356
9507
  "text"
9357
9508
  ],
9358
9509
  "output": [
9359
- "text",
9360
- "embeddings"
9510
+ "embeddings",
9511
+ "text"
9361
9512
  ]
9362
9513
  },
9363
9514
  "capabilities": [
@@ -9366,12 +9517,18 @@
9366
9517
  "pricing": {
9367
9518
  "text_tokens": {
9368
9519
  "standard": {
9369
- "input_per_million": 0.1,
9370
- "output_per_million": 0.1
9520
+ "input_per_million": 0.1
9371
9521
  },
9372
9522
  "batch": {
9373
- "input_per_million": 0.05,
9374
- "output_per_million": 0.05
9523
+ "input_per_million": 0.05
9524
+ }
9525
+ },
9526
+ "embeddings": {
9527
+ "standard": {
9528
+ "input_per_million": 0.1
9529
+ },
9530
+ "batch": {
9531
+ "input_per_million": 0.05
9375
9532
  }
9376
9533
  }
9377
9534
  },
@@ -9428,29 +9585,25 @@
9428
9585
  "id": "tts-1",
9429
9586
  "name": "TTS-1",
9430
9587
  "provider": "openai",
9431
- "family": "tts1",
9432
- "created_at": "2023-04-19 23:49:11 +0200",
9588
+ "family": "tts-1",
9589
+ "created_at": null,
9433
9590
  "context_window": null,
9434
9591
  "max_output_tokens": null,
9435
9592
  "knowledge_cutoff": null,
9436
9593
  "modalities": {
9437
9594
  "input": [
9438
- "text",
9439
- "audio"
9595
+ "text"
9440
9596
  ],
9441
9597
  "output": [
9442
- "text",
9443
- "audio"
9598
+ "audio",
9599
+ "embeddings"
9444
9600
  ]
9445
9601
  },
9446
- "capabilities": [
9447
- "streaming"
9448
- ],
9602
+ "capabilities": [],
9449
9603
  "pricing": {
9450
9604
  "text_tokens": {
9451
9605
  "standard": {
9452
- "input_per_million": 15.0,
9453
- "output_per_million": 15.0
9606
+ "input_per_million": 15.0
9454
9607
  }
9455
9608
  }
9456
9609
  },
@@ -10091,8 +10244,8 @@
10091
10244
  "pricing": {
10092
10245
  "text_tokens": {
10093
10246
  "standard": {
10094
- "input_per_million": 9.0,
10095
- "output_per_million": 11.0
10247
+ "input_per_million": 5.0,
10248
+ "output_per_million": 6.25
10096
10249
  }
10097
10250
  }
10098
10251
  },
@@ -10398,8 +10551,8 @@
10398
10551
  "pricing": {
10399
10552
  "text_tokens": {
10400
10553
  "standard": {
10401
- "input_per_million": 2.5,
10402
- "output_per_million": 3.0
10554
+ "input_per_million": 1.25,
10555
+ "output_per_million": 1.5
10403
10556
  }
10404
10557
  }
10405
10558
  },
@@ -10882,70 +11035,6 @@
10882
11035
  ]
10883
11036
  }
10884
11037
  },
10885
- {
10886
- "id": "anthropic/claude-3.7-sonnet:beta",
10887
- "name": "Anthropic: Claude 3.7 Sonnet (self-moderated)",
10888
- "provider": "openrouter",
10889
- "family": "anthropic",
10890
- "created_at": "2025-02-24 19:35:10 +0100",
10891
- "context_window": 200000,
10892
- "max_output_tokens": 128000,
10893
- "knowledge_cutoff": null,
10894
- "modalities": {
10895
- "input": [
10896
- "text",
10897
- "image",
10898
- "file"
10899
- ],
10900
- "output": [
10901
- "text"
10902
- ]
10903
- },
10904
- "capabilities": [
10905
- "streaming",
10906
- "function_calling"
10907
- ],
10908
- "pricing": {
10909
- "text_tokens": {
10910
- "standard": {
10911
- "input_per_million": 3.0,
10912
- "output_per_million": 15.0,
10913
- "cached_input_per_million": 0.3
10914
- }
10915
- }
10916
- },
10917
- "metadata": {
10918
- "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
10919
- "architecture": {
10920
- "modality": "text+image->text",
10921
- "input_modalities": [
10922
- "text",
10923
- "image",
10924
- "file"
10925
- ],
10926
- "output_modalities": [
10927
- "text"
10928
- ],
10929
- "tokenizer": "Claude",
10930
- "instruct_type": null
10931
- },
10932
- "top_provider": {
10933
- "context_length": 200000,
10934
- "max_completion_tokens": 128000,
10935
- "is_moderated": false
10936
- },
10937
- "per_request_limits": null,
10938
- "supported_parameters": [
10939
- "include_reasoning",
10940
- "max_tokens",
10941
- "reasoning",
10942
- "stop",
10943
- "temperature",
10944
- "tool_choice",
10945
- "tools"
10946
- ]
10947
- }
10948
- },
10949
11038
  {
10950
11039
  "id": "anthropic/claude-3.7-sonnet:thinking",
10951
11040
  "name": "Anthropic: Claude 3.7 Sonnet (thinking)",
@@ -11586,6 +11675,69 @@
11586
11675
  ]
11587
11676
  }
11588
11677
  },
11678
+ {
11679
+ "id": "baidu/ernie-4.5-21b-a3b",
11680
+ "name": "Baidu: ERNIE 4.5 21B A3B",
11681
+ "provider": "openrouter",
11682
+ "family": "baidu",
11683
+ "created_at": "2025-08-12 23:29:27 +0200",
11684
+ "context_window": 120000,
11685
+ "max_output_tokens": 8000,
11686
+ "knowledge_cutoff": null,
11687
+ "modalities": {
11688
+ "input": [
11689
+ "text"
11690
+ ],
11691
+ "output": [
11692
+ "text"
11693
+ ]
11694
+ },
11695
+ "capabilities": [
11696
+ "streaming",
11697
+ "predicted_outputs"
11698
+ ],
11699
+ "pricing": {
11700
+ "text_tokens": {
11701
+ "standard": {
11702
+ "input_per_million": 0.07,
11703
+ "output_per_million": 0.28
11704
+ }
11705
+ }
11706
+ },
11707
+ "metadata": {
11708
+ "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.",
11709
+ "architecture": {
11710
+ "modality": "text->text",
11711
+ "input_modalities": [
11712
+ "text"
11713
+ ],
11714
+ "output_modalities": [
11715
+ "text"
11716
+ ],
11717
+ "tokenizer": "Other",
11718
+ "instruct_type": null
11719
+ },
11720
+ "top_provider": {
11721
+ "context_length": 120000,
11722
+ "max_completion_tokens": 8000,
11723
+ "is_moderated": false
11724
+ },
11725
+ "per_request_limits": null,
11726
+ "supported_parameters": [
11727
+ "frequency_penalty",
11728
+ "logit_bias",
11729
+ "max_tokens",
11730
+ "min_p",
11731
+ "presence_penalty",
11732
+ "repetition_penalty",
11733
+ "seed",
11734
+ "stop",
11735
+ "temperature",
11736
+ "top_k",
11737
+ "top_p"
11738
+ ]
11739
+ }
11740
+ },
11589
11741
  {
11590
11742
  "id": "baidu/ernie-4.5-300b-a47b",
11591
11743
  "name": "Baidu: ERNIE 4.5 300B A47B ",
@@ -11649,6 +11801,140 @@
11649
11801
  ]
11650
11802
  }
11651
11803
  },
11804
+ {
11805
+ "id": "baidu/ernie-4.5-vl-28b-a3b",
11806
+ "name": "Baidu: ERNIE 4.5 VL 28B A3B",
11807
+ "provider": "openrouter",
11808
+ "family": "baidu",
11809
+ "created_at": "2025-08-12 23:07:16 +0200",
11810
+ "context_window": 30000,
11811
+ "max_output_tokens": 8000,
11812
+ "knowledge_cutoff": null,
11813
+ "modalities": {
11814
+ "input": [
11815
+ "text",
11816
+ "image"
11817
+ ],
11818
+ "output": [
11819
+ "text"
11820
+ ]
11821
+ },
11822
+ "capabilities": [
11823
+ "streaming",
11824
+ "predicted_outputs"
11825
+ ],
11826
+ "pricing": {
11827
+ "text_tokens": {
11828
+ "standard": {
11829
+ "input_per_million": 0.14,
11830
+ "output_per_million": 0.56
11831
+ }
11832
+ }
11833
+ },
11834
+ "metadata": {
11835
+ "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.",
11836
+ "architecture": {
11837
+ "modality": "text+image->text",
11838
+ "input_modalities": [
11839
+ "text",
11840
+ "image"
11841
+ ],
11842
+ "output_modalities": [
11843
+ "text"
11844
+ ],
11845
+ "tokenizer": "Other",
11846
+ "instruct_type": null
11847
+ },
11848
+ "top_provider": {
11849
+ "context_length": 30000,
11850
+ "max_completion_tokens": 8000,
11851
+ "is_moderated": false
11852
+ },
11853
+ "per_request_limits": null,
11854
+ "supported_parameters": [
11855
+ "frequency_penalty",
11856
+ "include_reasoning",
11857
+ "logit_bias",
11858
+ "max_tokens",
11859
+ "min_p",
11860
+ "presence_penalty",
11861
+ "reasoning",
11862
+ "repetition_penalty",
11863
+ "seed",
11864
+ "stop",
11865
+ "temperature",
11866
+ "top_k",
11867
+ "top_p"
11868
+ ]
11869
+ }
11870
+ },
11871
+ {
11872
+ "id": "baidu/ernie-4.5-vl-424b-a47b",
11873
+ "name": "Baidu: ERNIE 4.5 VL 424B A47B ",
11874
+ "provider": "openrouter",
11875
+ "family": "baidu",
11876
+ "created_at": "2025-06-30 18:28:23 +0200",
11877
+ "context_window": 123000,
11878
+ "max_output_tokens": 16000,
11879
+ "knowledge_cutoff": null,
11880
+ "modalities": {
11881
+ "input": [
11882
+ "image",
11883
+ "text"
11884
+ ],
11885
+ "output": [
11886
+ "text"
11887
+ ]
11888
+ },
11889
+ "capabilities": [
11890
+ "streaming",
11891
+ "predicted_outputs"
11892
+ ],
11893
+ "pricing": {
11894
+ "text_tokens": {
11895
+ "standard": {
11896
+ "input_per_million": 0.42,
11897
+ "output_per_million": 1.25
11898
+ }
11899
+ }
11900
+ },
11901
+ "metadata": {
11902
+ "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.",
11903
+ "architecture": {
11904
+ "modality": "text+image->text",
11905
+ "input_modalities": [
11906
+ "image",
11907
+ "text"
11908
+ ],
11909
+ "output_modalities": [
11910
+ "text"
11911
+ ],
11912
+ "tokenizer": "Other",
11913
+ "instruct_type": null
11914
+ },
11915
+ "top_provider": {
11916
+ "context_length": 123000,
11917
+ "max_completion_tokens": 16000,
11918
+ "is_moderated": false
11919
+ },
11920
+ "per_request_limits": null,
11921
+ "supported_parameters": [
11922
+ "frequency_penalty",
11923
+ "include_reasoning",
11924
+ "logit_bias",
11925
+ "max_tokens",
11926
+ "min_p",
11927
+ "presence_penalty",
11928
+ "reasoning",
11929
+ "repetition_penalty",
11930
+ "seed",
11931
+ "stop",
11932
+ "temperature",
11933
+ "top_k",
11934
+ "top_p"
11935
+ ]
11936
+ }
11937
+ },
11652
11938
  {
11653
11939
  "id": "bytedance/ui-tars-1.5-7b",
11654
11940
  "name": "Bytedance: UI-TARS 7B ",
@@ -14068,7 +14354,8 @@
14068
14354
  "input": [
14069
14355
  "text",
14070
14356
  "image",
14071
- "file"
14357
+ "file",
14358
+ "audio"
14072
14359
  ],
14073
14360
  "output": [
14074
14361
  "text"
@@ -14094,7 +14381,8 @@
14094
14381
  "input_modalities": [
14095
14382
  "text",
14096
14383
  "image",
14097
- "file"
14384
+ "file",
14385
+ "audio"
14098
14386
  ],
14099
14387
  "output_modalities": [
14100
14388
  "text"
@@ -14254,6 +14542,7 @@
14254
14542
  "reasoning",
14255
14543
  "response_format",
14256
14544
  "seed",
14545
+ "stop",
14257
14546
  "structured_outputs",
14258
14547
  "temperature",
14259
14548
  "tool_choice",
@@ -14275,7 +14564,8 @@
14275
14564
  "input": [
14276
14565
  "file",
14277
14566
  "image",
14278
- "text"
14567
+ "text",
14568
+ "audio"
14279
14569
  ],
14280
14570
  "output": [
14281
14571
  "text"
@@ -14302,7 +14592,8 @@
14302
14592
  "input_modalities": [
14303
14593
  "file",
14304
14594
  "image",
14305
- "text"
14595
+ "text",
14596
+ "audio"
14306
14597
  ],
14307
14598
  "output_modalities": [
14308
14599
  "text"
@@ -14344,7 +14635,8 @@
14344
14635
  "input": [
14345
14636
  "file",
14346
14637
  "image",
14347
- "text"
14638
+ "text",
14639
+ "audio"
14348
14640
  ],
14349
14641
  "output": [
14350
14642
  "text"
@@ -14371,7 +14663,8 @@
14371
14663
  "input_modalities": [
14372
14664
  "file",
14373
14665
  "image",
14374
- "text"
14666
+ "text",
14667
+ "audio"
14375
14668
  ],
14376
14669
  "output_modalities": [
14377
14670
  "text"
@@ -14472,7 +14765,8 @@
14472
14765
  "input": [
14473
14766
  "file",
14474
14767
  "image",
14475
- "text"
14768
+ "text",
14769
+ "audio"
14476
14770
  ],
14477
14771
  "output": [
14478
14772
  "text"
@@ -14499,7 +14793,8 @@
14499
14793
  "input_modalities": [
14500
14794
  "file",
14501
14795
  "image",
14502
- "text"
14796
+ "text",
14797
+ "audio"
14503
14798
  ],
14504
14799
  "output_modalities": [
14505
14800
  "text"
@@ -14541,7 +14836,8 @@
14541
14836
  "input": [
14542
14837
  "text",
14543
14838
  "image",
14544
- "file"
14839
+ "file",
14840
+ "audio"
14545
14841
  ],
14546
14842
  "output": [
14547
14843
  "text"
@@ -14568,7 +14864,8 @@
14568
14864
  "input_modalities": [
14569
14865
  "text",
14570
14866
  "image",
14571
- "file"
14867
+ "file",
14868
+ "audio"
14572
14869
  ],
14573
14870
  "output_modalities": [
14574
14871
  "text"
@@ -15062,7 +15359,7 @@
15062
15359
  "provider": "openrouter",
15063
15360
  "family": "google",
15064
15361
  "created_at": "2025-03-13 22:50:25 +0100",
15065
- "context_window": 96000,
15362
+ "context_window": 32768,
15066
15363
  "max_output_tokens": 8192,
15067
15364
  "knowledge_cutoff": null,
15068
15365
  "modalities": {
@@ -15076,7 +15373,6 @@
15076
15373
  },
15077
15374
  "capabilities": [
15078
15375
  "streaming",
15079
- "structured_output",
15080
15376
  "predicted_outputs"
15081
15377
  ],
15082
15378
  "pricing": {},
@@ -15095,7 +15391,7 @@
15095
15391
  "instruct_type": "gemma"
15096
15392
  },
15097
15393
  "top_provider": {
15098
- "context_length": 96000,
15394
+ "context_length": 32768,
15099
15395
  "max_completion_tokens": 8192,
15100
15396
  "is_moderated": false
15101
15397
  },
@@ -15108,10 +15404,8 @@
15108
15404
  "min_p",
15109
15405
  "presence_penalty",
15110
15406
  "repetition_penalty",
15111
- "response_format",
15112
15407
  "seed",
15113
15408
  "stop",
15114
- "structured_outputs",
15115
15409
  "temperature",
15116
15410
  "top_k",
15117
15411
  "top_logprobs",
@@ -16122,8 +16416,8 @@
16122
16416
  "pricing": {
16123
16417
  "text_tokens": {
16124
16418
  "standard": {
16125
- "input_per_million": 1.5,
16126
- "output_per_million": 1.5
16419
+ "input_per_million": 0.75,
16420
+ "output_per_million": 0.75
16127
16421
  }
16128
16422
  }
16129
16423
  },
@@ -16285,7 +16579,6 @@
16285
16579
  "supported_parameters": [
16286
16580
  "frequency_penalty",
16287
16581
  "logit_bias",
16288
- "logprobs",
16289
16582
  "max_tokens",
16290
16583
  "min_p",
16291
16584
  "presence_penalty",
@@ -16297,7 +16590,6 @@
16297
16590
  "tool_choice",
16298
16591
  "tools",
16299
16592
  "top_k",
16300
- "top_logprobs",
16301
16593
  "top_p"
16302
16594
  ]
16303
16595
  }
@@ -17474,7 +17766,6 @@
17474
17766
  "supported_parameters": [
17475
17767
  "frequency_penalty",
17476
17768
  "logit_bias",
17477
- "logprobs",
17478
17769
  "max_tokens",
17479
17770
  "min_p",
17480
17771
  "presence_penalty",
@@ -17484,7 +17775,6 @@
17484
17775
  "stop",
17485
17776
  "temperature",
17486
17777
  "top_k",
17487
- "top_logprobs",
17488
17778
  "top_p"
17489
17779
  ]
17490
17780
  }
@@ -19025,13 +19315,13 @@
19025
19315
  }
19026
19316
  },
19027
19317
  {
19028
- "id": "mistralai/mistral-7b-instruct-v0.2",
19029
- "name": "Mistral: Mistral 7B Instruct v0.2",
19318
+ "id": "mistralai/mistral-7b-instruct-v0.3",
19319
+ "name": "Mistral: Mistral 7B Instruct v0.3",
19030
19320
  "provider": "openrouter",
19031
19321
  "family": "mistralai",
19032
- "created_at": "2023-12-28 01:00:00 +0100",
19322
+ "created_at": "2024-05-27 02:00:00 +0200",
19033
19323
  "context_window": 32768,
19034
- "max_output_tokens": null,
19324
+ "max_output_tokens": 16384,
19035
19325
  "knowledge_cutoff": null,
19036
19326
  "modalities": {
19037
19327
  "input": [
@@ -19043,18 +19333,20 @@
19043
19333
  },
19044
19334
  "capabilities": [
19045
19335
  "streaming",
19336
+ "function_calling",
19337
+ "structured_output",
19046
19338
  "predicted_outputs"
19047
19339
  ],
19048
19340
  "pricing": {
19049
19341
  "text_tokens": {
19050
19342
  "standard": {
19051
- "input_per_million": 0.19999999999999998,
19052
- "output_per_million": 0.19999999999999998
19343
+ "input_per_million": 0.028,
19344
+ "output_per_million": 0.054
19053
19345
  }
19054
19346
  }
19055
19347
  },
19056
19348
  "metadata": {
19057
- "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention",
19349
+ "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.",
19058
19350
  "architecture": {
19059
19351
  "modality": "text->text",
19060
19352
  "input_modalities": [
@@ -19068,27 +19360,32 @@
19068
19360
  },
19069
19361
  "top_provider": {
19070
19362
  "context_length": 32768,
19071
- "max_completion_tokens": null,
19363
+ "max_completion_tokens": 16384,
19072
19364
  "is_moderated": false
19073
19365
  },
19074
19366
  "per_request_limits": null,
19075
19367
  "supported_parameters": [
19076
19368
  "frequency_penalty",
19077
19369
  "logit_bias",
19370
+ "logprobs",
19078
19371
  "max_tokens",
19079
19372
  "min_p",
19080
19373
  "presence_penalty",
19081
19374
  "repetition_penalty",
19375
+ "response_format",
19376
+ "seed",
19082
19377
  "stop",
19083
19378
  "temperature",
19379
+ "tool_choice",
19380
+ "tools",
19084
19381
  "top_k",
19085
19382
  "top_p"
19086
19383
  ]
19087
19384
  }
19088
19385
  },
19089
19386
  {
19090
- "id": "mistralai/mistral-7b-instruct-v0.3",
19091
- "name": "Mistral: Mistral 7B Instruct v0.3",
19387
+ "id": "mistralai/mistral-7b-instruct:free",
19388
+ "name": "Mistral: Mistral 7B Instruct (free)",
19092
19389
  "provider": "openrouter",
19093
19390
  "family": "mistralai",
19094
19391
  "created_at": "2024-05-27 02:00:00 +0200",
@@ -19106,19 +19403,11 @@
19106
19403
  "capabilities": [
19107
19404
  "streaming",
19108
19405
  "function_calling",
19109
- "structured_output",
19110
- "predicted_outputs"
19406
+ "structured_output"
19111
19407
  ],
19112
- "pricing": {
19113
- "text_tokens": {
19114
- "standard": {
19115
- "input_per_million": 0.028,
19116
- "output_per_million": 0.054
19117
- }
19118
- }
19119
- },
19408
+ "pricing": {},
19120
19409
  "metadata": {
19121
- "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.",
19410
+ "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*",
19122
19411
  "architecture": {
19123
19412
  "modality": "text->text",
19124
19413
  "input_modalities": [
@@ -19138,8 +19427,6 @@
19138
19427
  "per_request_limits": null,
19139
19428
  "supported_parameters": [
19140
19429
  "frequency_penalty",
19141
- "logit_bias",
19142
- "logprobs",
19143
19430
  "max_tokens",
19144
19431
  "min_p",
19145
19432
  "presence_penalty",
@@ -19156,13 +19443,13 @@
19156
19443
  }
19157
19444
  },
19158
19445
  {
19159
- "id": "mistralai/mistral-7b-instruct:free",
19160
- "name": "Mistral: Mistral 7B Instruct (free)",
19446
+ "id": "mistralai/mistral-large",
19447
+ "name": "Mistral Large",
19161
19448
  "provider": "openrouter",
19162
19449
  "family": "mistralai",
19163
- "created_at": "2024-05-27 02:00:00 +0200",
19164
- "context_window": 32768,
19165
- "max_output_tokens": 16384,
19450
+ "created_at": "2024-02-26 01:00:00 +0100",
19451
+ "context_window": 128000,
19452
+ "max_output_tokens": null,
19166
19453
  "knowledge_cutoff": null,
19167
19454
  "modalities": {
19168
19455
  "input": [
@@ -19177,9 +19464,16 @@
19177
19464
  "function_calling",
19178
19465
  "structured_output"
19179
19466
  ],
19180
- "pricing": {},
19467
+ "pricing": {
19468
+ "text_tokens": {
19469
+ "standard": {
19470
+ "input_per_million": 2.0,
19471
+ "output_per_million": 6.0
19472
+ }
19473
+ }
19474
+ },
19181
19475
  "metadata": {
19182
- "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*",
19476
+ "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.",
19183
19477
  "architecture": {
19184
19478
  "modality": "text->text",
19185
19479
  "input_modalities": [
@@ -19189,38 +19483,36 @@
19189
19483
  "text"
19190
19484
  ],
19191
19485
  "tokenizer": "Mistral",
19192
- "instruct_type": "mistral"
19486
+ "instruct_type": null
19193
19487
  },
19194
19488
  "top_provider": {
19195
- "context_length": 32768,
19196
- "max_completion_tokens": 16384,
19489
+ "context_length": 128000,
19490
+ "max_completion_tokens": null,
19197
19491
  "is_moderated": false
19198
19492
  },
19199
19493
  "per_request_limits": null,
19200
19494
  "supported_parameters": [
19201
19495
  "frequency_penalty",
19202
19496
  "max_tokens",
19203
- "min_p",
19204
19497
  "presence_penalty",
19205
- "repetition_penalty",
19206
19498
  "response_format",
19207
19499
  "seed",
19208
19500
  "stop",
19501
+ "structured_outputs",
19209
19502
  "temperature",
19210
19503
  "tool_choice",
19211
19504
  "tools",
19212
- "top_k",
19213
19505
  "top_p"
19214
19506
  ]
19215
19507
  }
19216
19508
  },
19217
19509
  {
19218
- "id": "mistralai/mistral-large",
19219
- "name": "Mistral Large",
19510
+ "id": "mistralai/mistral-large-2407",
19511
+ "name": "Mistral Large 2407",
19220
19512
  "provider": "openrouter",
19221
19513
  "family": "mistralai",
19222
- "created_at": "2024-02-26 01:00:00 +0100",
19223
- "context_window": 128000,
19514
+ "created_at": "2024-11-19 02:06:55 +0100",
19515
+ "context_window": 131072,
19224
19516
  "max_output_tokens": null,
19225
19517
  "knowledge_cutoff": null,
19226
19518
  "modalities": {
@@ -19245,7 +19537,7 @@
19245
19537
  }
19246
19538
  },
19247
19539
  "metadata": {
19248
- "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.",
19540
+ "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n",
19249
19541
  "architecture": {
19250
19542
  "modality": "text->text",
19251
19543
  "input_modalities": [
@@ -19258,7 +19550,7 @@
19258
19550
  "instruct_type": null
19259
19551
  },
19260
19552
  "top_provider": {
19261
- "context_length": 128000,
19553
+ "context_length": 131072,
19262
19554
  "max_completion_tokens": null,
19263
19555
  "is_moderated": false
19264
19556
  },
@@ -19279,11 +19571,11 @@
19279
19571
  }
19280
19572
  },
19281
19573
  {
19282
- "id": "mistralai/mistral-large-2407",
19283
- "name": "Mistral Large 2407",
19574
+ "id": "mistralai/mistral-large-2411",
19575
+ "name": "Mistral Large 2411",
19284
19576
  "provider": "openrouter",
19285
19577
  "family": "mistralai",
19286
- "created_at": "2024-11-19 02:06:55 +0100",
19578
+ "created_at": "2024-11-19 02:11:25 +0100",
19287
19579
  "context_window": 131072,
19288
19580
  "max_output_tokens": null,
19289
19581
  "knowledge_cutoff": null,
@@ -19309,7 +19601,7 @@
19309
19601
  }
19310
19602
  },
19311
19603
  "metadata": {
19312
- "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n",
19604
+ "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.",
19313
19605
  "architecture": {
19314
19606
  "modality": "text->text",
19315
19607
  "input_modalities": [
@@ -19343,17 +19635,18 @@
19343
19635
  }
19344
19636
  },
19345
19637
  {
19346
- "id": "mistralai/mistral-large-2411",
19347
- "name": "Mistral Large 2411",
19638
+ "id": "mistralai/mistral-medium-3",
19639
+ "name": "Mistral: Mistral Medium 3",
19348
19640
  "provider": "openrouter",
19349
19641
  "family": "mistralai",
19350
- "created_at": "2024-11-19 02:11:25 +0100",
19642
+ "created_at": "2025-05-07 16:15:41 +0200",
19351
19643
  "context_window": 131072,
19352
19644
  "max_output_tokens": null,
19353
19645
  "knowledge_cutoff": null,
19354
19646
  "modalities": {
19355
19647
  "input": [
19356
- "text"
19648
+ "text",
19649
+ "image"
19357
19650
  ],
19358
19651
  "output": [
19359
19652
  "text"
@@ -19367,17 +19660,18 @@
19367
19660
  "pricing": {
19368
19661
  "text_tokens": {
19369
19662
  "standard": {
19370
- "input_per_million": 2.0,
19371
- "output_per_million": 6.0
19663
+ "input_per_million": 0.39999999999999997,
19664
+ "output_per_million": 2.0
19372
19665
  }
19373
19666
  }
19374
19667
  },
19375
19668
  "metadata": {
19376
- "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.",
19669
+ "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.",
19377
19670
  "architecture": {
19378
- "modality": "text->text",
19671
+ "modality": "text+image->text",
19379
19672
  "input_modalities": [
19380
- "text"
19673
+ "text",
19674
+ "image"
19381
19675
  ],
19382
19676
  "output_modalities": [
19383
19677
  "text"
@@ -19407,12 +19701,12 @@
19407
19701
  }
19408
19702
  },
19409
19703
  {
19410
- "id": "mistralai/mistral-medium-3",
19411
- "name": "Mistral: Mistral Medium 3",
19704
+ "id": "mistralai/mistral-medium-3.1",
19705
+ "name": "Mistral: Mistral Medium 3.1",
19412
19706
  "provider": "openrouter",
19413
19707
  "family": "mistralai",
19414
- "created_at": "2025-05-07 16:15:41 +0200",
19415
- "context_window": 131072,
19708
+ "created_at": "2025-08-13 16:33:59 +0200",
19709
+ "context_window": 262144,
19416
19710
  "max_output_tokens": null,
19417
19711
  "knowledge_cutoff": null,
19418
19712
  "modalities": {
@@ -19438,7 +19732,7 @@
19438
19732
  }
19439
19733
  },
19440
19734
  "metadata": {
19441
- "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.",
19735
+ "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3.1 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.",
19442
19736
  "architecture": {
19443
19737
  "modality": "text+image->text",
19444
19738
  "input_modalities": [
@@ -19452,7 +19746,7 @@
19452
19746
  "instruct_type": null
19453
19747
  },
19454
19748
  "top_provider": {
19455
- "context_length": 131072,
19749
+ "context_length": 262144,
19456
19750
  "max_completion_tokens": null,
19457
19751
  "is_moderated": false
19458
19752
  },
@@ -20003,7 +20297,7 @@
20003
20297
  "provider": "openrouter",
20004
20298
  "family": "mistralai",
20005
20299
  "created_at": "2025-06-20 20:10:16 +0200",
20006
- "context_window": 131072,
20300
+ "context_window": 128000,
20007
20301
  "max_output_tokens": null,
20008
20302
  "knowledge_cutoff": null,
20009
20303
  "modalities": {
@@ -20024,8 +20318,8 @@
20024
20318
  "pricing": {
20025
20319
  "text_tokens": {
20026
20320
  "standard": {
20027
- "input_per_million": 0.01999188,
20028
- "output_per_million": 0.0800064
20321
+ "input_per_million": 0.049999999999999996,
20322
+ "output_per_million": 0.09999999999999999
20029
20323
  }
20030
20324
  }
20031
20325
  },
@@ -20044,7 +20338,7 @@
20044
20338
  "instruct_type": null
20045
20339
  },
20046
20340
  "top_provider": {
20047
- "context_length": 131072,
20341
+ "context_length": 128000,
20048
20342
  "max_completion_tokens": null,
20049
20343
  "is_moderated": false
20050
20344
  },
@@ -20973,8 +21267,8 @@
20973
21267
  "provider": "openrouter",
20974
21268
  "family": "neversleep",
20975
21269
  "created_at": "2024-09-15 02:00:00 +0200",
20976
- "context_window": 40000,
20977
- "max_output_tokens": 40000,
21270
+ "context_window": 32768,
21271
+ "max_output_tokens": 2048,
20978
21272
  "knowledge_cutoff": null,
20979
21273
  "modalities": {
20980
21274
  "input": [
@@ -20992,8 +21286,8 @@
20992
21286
  "pricing": {
20993
21287
  "text_tokens": {
20994
21288
  "standard": {
20995
- "input_per_million": 0.16999999999999998,
20996
- "output_per_million": 0.9900000000000001
21289
+ "input_per_million": 0.09999999999999999,
21290
+ "output_per_million": 0.625
20997
21291
  }
20998
21292
  }
20999
21293
  },
@@ -21011,8 +21305,8 @@
21011
21305
  "instruct_type": "llama3"
21012
21306
  },
21013
21307
  "top_provider": {
21014
- "context_length": 40000,
21015
- "max_completion_tokens": 40000,
21308
+ "context_length": 32768,
21309
+ "max_completion_tokens": 2048,
21016
21310
  "is_moderated": false
21017
21311
  },
21018
21312
  "per_request_limits": null,
@@ -21040,8 +21334,8 @@
21040
21334
  "provider": "openrouter",
21041
21335
  "family": "neversleep",
21042
21336
  "created_at": "2023-11-26 01:00:00 +0100",
21043
- "context_window": 4096,
21044
- "max_output_tokens": null,
21337
+ "context_window": 8192,
21338
+ "max_output_tokens": 2048,
21045
21339
  "knowledge_cutoff": null,
21046
21340
  "modalities": {
21047
21341
  "input": [
@@ -21059,8 +21353,8 @@
21059
21353
  "pricing": {
21060
21354
  "text_tokens": {
21061
21355
  "standard": {
21062
- "input_per_million": 1.0,
21063
- "output_per_million": 1.75
21356
+ "input_per_million": 0.625,
21357
+ "output_per_million": 1.0
21064
21358
  }
21065
21359
  }
21066
21360
  },
@@ -21078,8 +21372,8 @@
21078
21372
  "instruct_type": "alpaca"
21079
21373
  },
21080
21374
  "top_provider": {
21081
- "context_length": 4096,
21082
- "max_completion_tokens": null,
21375
+ "context_length": 8192,
21376
+ "max_completion_tokens": 2048,
21083
21377
  "is_moderated": false
21084
21378
  },
21085
21379
  "per_request_limits": null,
@@ -23588,8 +23882,8 @@
23588
23882
  "provider": "openrouter",
23589
23883
  "family": "openai",
23590
23884
  "created_at": "2025-08-05 19:17:11 +0200",
23591
- "context_window": 131072,
23592
- "max_output_tokens": null,
23885
+ "context_window": 131000,
23886
+ "max_output_tokens": 131000,
23593
23887
  "knowledge_cutoff": null,
23594
23888
  "modalities": {
23595
23889
  "input": [
@@ -23608,8 +23902,8 @@
23608
23902
  "pricing": {
23609
23903
  "text_tokens": {
23610
23904
  "standard": {
23611
- "input_per_million": 0.07256312,
23612
- "output_per_million": 0.2903936
23905
+ "input_per_million": 0.072,
23906
+ "output_per_million": 0.28
23613
23907
  }
23614
23908
  }
23615
23909
  },
@@ -23627,8 +23921,8 @@
23627
23921
  "instruct_type": null
23628
23922
  },
23629
23923
  "top_provider": {
23630
- "context_length": 131072,
23631
- "max_completion_tokens": null,
23924
+ "context_length": 131000,
23925
+ "max_completion_tokens": 131000,
23632
23926
  "is_moderated": false
23633
23927
  },
23634
23928
  "per_request_limits": null,
@@ -23661,8 +23955,8 @@
23661
23955
  "provider": "openrouter",
23662
23956
  "family": "openai",
23663
23957
  "created_at": "2025-08-05 19:17:09 +0200",
23664
- "context_window": 131072,
23665
- "max_output_tokens": null,
23958
+ "context_window": 131000,
23959
+ "max_output_tokens": 131000,
23666
23960
  "knowledge_cutoff": null,
23667
23961
  "modalities": {
23668
23962
  "input": [
@@ -23682,7 +23976,7 @@
23682
23976
  "text_tokens": {
23683
23977
  "standard": {
23684
23978
  "input_per_million": 0.04,
23685
- "output_per_million": 0.16
23979
+ "output_per_million": 0.15
23686
23980
  }
23687
23981
  }
23688
23982
  },
@@ -23700,8 +23994,8 @@
23700
23994
  "instruct_type": null
23701
23995
  },
23702
23996
  "top_provider": {
23703
- "context_length": 131072,
23704
- "max_completion_tokens": null,
23997
+ "context_length": 131000,
23998
+ "max_completion_tokens": 131000,
23705
23999
  "is_moderated": false
23706
24000
  },
23707
24001
  "per_request_limits": null,
@@ -28323,69 +28617,6 @@
28323
28617
  ]
28324
28618
  }
28325
28619
  },
28326
- {
28327
- "id": "thedrummer/valkyrie-49b-v1",
28328
- "name": "TheDrummer: Valkyrie 49B V1",
28329
- "provider": "openrouter",
28330
- "family": "thedrummer",
28331
- "created_at": "2025-05-23 19:51:10 +0200",
28332
- "context_window": 131072,
28333
- "max_output_tokens": 131072,
28334
- "knowledge_cutoff": null,
28335
- "modalities": {
28336
- "input": [
28337
- "text"
28338
- ],
28339
- "output": [
28340
- "text"
28341
- ]
28342
- },
28343
- "capabilities": [
28344
- "streaming"
28345
- ],
28346
- "pricing": {
28347
- "text_tokens": {
28348
- "standard": {
28349
- "input_per_million": 0.65,
28350
- "output_per_million": 1.0
28351
- }
28352
- }
28353
- },
28354
- "metadata": {
28355
- "description": "Built on top of NVIDIA's Llama 3.3 Nemotron Super 49B, Valkyrie is TheDrummer's newest model drop for creative writing.",
28356
- "architecture": {
28357
- "modality": "text->text",
28358
- "input_modalities": [
28359
- "text"
28360
- ],
28361
- "output_modalities": [
28362
- "text"
28363
- ],
28364
- "tokenizer": "Other",
28365
- "instruct_type": null
28366
- },
28367
- "top_provider": {
28368
- "context_length": 131072,
28369
- "max_completion_tokens": 131072,
28370
- "is_moderated": false
28371
- },
28372
- "per_request_limits": null,
28373
- "supported_parameters": [
28374
- "frequency_penalty",
28375
- "include_reasoning",
28376
- "max_tokens",
28377
- "min_p",
28378
- "presence_penalty",
28379
- "reasoning",
28380
- "repetition_penalty",
28381
- "seed",
28382
- "stop",
28383
- "temperature",
28384
- "top_k",
28385
- "top_p"
28386
- ]
28387
- }
28388
- },
28389
28620
  {
28390
28621
  "id": "thudm/glm-4-32b",
28391
28622
  "name": "THUDM: GLM 4 32B",
@@ -28777,7 +29008,7 @@
28777
29008
  "family": "undi95",
28778
29009
  "created_at": "2023-07-22 02:00:00 +0200",
28779
29010
  "context_window": 6144,
28780
- "max_output_tokens": null,
29011
+ "max_output_tokens": 1024,
28781
29012
  "knowledge_cutoff": null,
28782
29013
  "modalities": {
28783
29014
  "input": [
@@ -28795,8 +29026,8 @@
28795
29026
  "pricing": {
28796
29027
  "text_tokens": {
28797
29028
  "standard": {
28798
- "input_per_million": 0.7,
28799
- "output_per_million": 1.0
29029
+ "input_per_million": 0.5,
29030
+ "output_per_million": 0.75
28800
29031
  }
28801
29032
  }
28802
29033
  },
@@ -28815,7 +29046,7 @@
28815
29046
  },
28816
29047
  "top_provider": {
28817
29048
  "context_length": 6144,
28818
- "max_completion_tokens": null,
29049
+ "max_completion_tokens": 1024,
28819
29050
  "is_moderated": false
28820
29051
  },
28821
29052
  "per_request_limits": null,
@@ -29427,7 +29658,7 @@
29427
29658
  "provider": "openrouter",
29428
29659
  "family": "z-ai",
29429
29660
  "created_at": "2025-07-25 21:22:27 +0200",
29430
- "context_window": 98304,
29661
+ "context_window": 131072,
29431
29662
  "max_output_tokens": null,
29432
29663
  "knowledge_cutoff": null,
29433
29664
  "modalities": {
@@ -29466,7 +29697,7 @@
29466
29697
  "instruct_type": null
29467
29698
  },
29468
29699
  "top_provider": {
29469
- "context_length": 98304,
29700
+ "context_length": 131072,
29470
29701
  "max_completion_tokens": null,
29471
29702
  "is_moderated": false
29472
29703
  },
@@ -29554,6 +29785,7 @@
29554
29785
  "response_format",
29555
29786
  "seed",
29556
29787
  "stop",
29788
+ "structured_outputs",
29557
29789
  "temperature",
29558
29790
  "tool_choice",
29559
29791
  "tools",
@@ -29646,13 +29878,14 @@
29646
29878
  "capabilities": [
29647
29879
  "streaming",
29648
29880
  "function_calling",
29881
+ "structured_output",
29649
29882
  "predicted_outputs"
29650
29883
  ],
29651
29884
  "pricing": {
29652
29885
  "text_tokens": {
29653
29886
  "standard": {
29654
29887
  "input_per_million": 0.5,
29655
- "output_per_million": 1.7999999999999998
29888
+ "output_per_million": 1.7
29656
29889
  }
29657
29890
  }
29658
29891
  },
@@ -29685,6 +29918,7 @@
29685
29918
  "presence_penalty",
29686
29919
  "reasoning",
29687
29920
  "repetition_penalty",
29921
+ "response_format",
29688
29922
  "seed",
29689
29923
  "stop",
29690
29924
  "temperature",
@@ -29700,7 +29934,7 @@
29700
29934
  "name": "Sonar",
29701
29935
  "provider": "perplexity",
29702
29936
  "family": "sonar",
29703
- "created_at": "2025-08-13 11:35:41 +0200",
29937
+ "created_at": "2025-08-19 12:48:52 +0200",
29704
29938
  "context_window": 128000,
29705
29939
  "max_output_tokens": 4096,
29706
29940
  "knowledge_cutoff": null,
@@ -29732,7 +29966,7 @@
29732
29966
  "name": "Sonar Deep Research",
29733
29967
  "provider": "perplexity",
29734
29968
  "family": "sonar_deep_research",
29735
- "created_at": "2025-08-13 11:35:41 +0200",
29969
+ "created_at": "2025-08-19 12:48:52 +0200",
29736
29970
  "context_window": 128000,
29737
29971
  "max_output_tokens": 4096,
29738
29972
  "knowledge_cutoff": null,
@@ -29767,7 +30001,7 @@
29767
30001
  "name": "Sonar Pro",
29768
30002
  "provider": "perplexity",
29769
30003
  "family": "sonar_pro",
29770
- "created_at": "2025-08-13 11:35:41 +0200",
30004
+ "created_at": "2025-08-19 12:48:52 +0200",
29771
30005
  "context_window": 200000,
29772
30006
  "max_output_tokens": 8192,
29773
30007
  "knowledge_cutoff": null,
@@ -29799,7 +30033,7 @@
29799
30033
  "name": "Sonar Reasoning",
29800
30034
  "provider": "perplexity",
29801
30035
  "family": "sonar_reasoning",
29802
- "created_at": "2025-08-13 11:35:41 +0200",
30036
+ "created_at": "2025-08-19 12:48:52 +0200",
29803
30037
  "context_window": 128000,
29804
30038
  "max_output_tokens": 4096,
29805
30039
  "knowledge_cutoff": null,
@@ -29831,7 +30065,7 @@
29831
30065
  "name": "Sonar Reasoning Pro",
29832
30066
  "provider": "perplexity",
29833
30067
  "family": "sonar_reasoning_pro",
29834
- "created_at": "2025-08-13 11:35:41 +0200",
30068
+ "created_at": "2025-08-19 12:48:52 +0200",
29835
30069
  "context_window": 128000,
29836
30070
  "max_output_tokens": 8192,
29837
30071
  "knowledge_cutoff": null,