RubyGems - ruby_llm - Versions diffs - 1.6.1 → 1.6.3 - Mend

ruby_llm 1.6.1 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

checksums.yaml +4 -4
data/README.md +73 -91
data/lib/ruby_llm/active_record/acts_as.rb +3 -11
data/lib/ruby_llm/aliases.json +4 -0
data/lib/ruby_llm/aliases.rb +7 -25
data/lib/ruby_llm/chat.rb +3 -19
data/lib/ruby_llm/configuration.rb +1 -14
data/lib/ruby_llm/content.rb +1 -3
data/lib/ruby_llm/embedding.rb +1 -2
data/lib/ruby_llm/error.rb +0 -10
data/lib/ruby_llm/image.rb +0 -4
data/lib/ruby_llm/message.rb +2 -4
data/lib/ruby_llm/model/info.rb +0 -10
data/lib/ruby_llm/model/pricing.rb +0 -3
data/lib/ruby_llm/model/pricing_category.rb +0 -2
data/lib/ruby_llm/model/pricing_tier.rb +0 -1
data/lib/ruby_llm/models.json +668 -434
data/lib/ruby_llm/models.rb +6 -18
data/lib/ruby_llm/provider.rb +1 -5
data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -46
data/lib/ruby_llm/providers/anthropic/media.rb +0 -1
data/lib/ruby_llm/providers/anthropic/tools.rb +0 -1
data/lib/ruby_llm/providers/anthropic.rb +1 -2
data/lib/ruby_llm/providers/bedrock/chat.rb +0 -2
data/lib/ruby_llm/providers/bedrock/media.rb +0 -1
data/lib/ruby_llm/providers/bedrock/models.rb +0 -2
data/lib/ruby_llm/providers/bedrock/streaming/base.rb +1 -13
data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -7
data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -12
data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -12
data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -13
data/lib/ruby_llm/providers/bedrock/streaming.rb +0 -18
data/lib/ruby_llm/providers/bedrock.rb +1 -2
data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -2
data/lib/ruby_llm/providers/deepseek/chat.rb +0 -1
data/lib/ruby_llm/providers/gemini/capabilities.rb +26 -101
data/lib/ruby_llm/providers/gemini/chat.rb +12 -8
data/lib/ruby_llm/providers/gemini/embeddings.rb +0 -2
data/lib/ruby_llm/providers/gemini/images.rb +0 -1
data/lib/ruby_llm/providers/gemini/media.rb +0 -1
data/lib/ruby_llm/providers/gemini/models.rb +1 -2
data/lib/ruby_llm/providers/gemini/streaming.rb +4 -1
data/lib/ruby_llm/providers/gemini/tools.rb +0 -5
data/lib/ruby_llm/providers/gpustack/chat.rb +0 -1
data/lib/ruby_llm/providers/gpustack/models.rb +3 -4
data/lib/ruby_llm/providers/mistral/capabilities.rb +2 -10
data/lib/ruby_llm/providers/mistral/chat.rb +0 -2
data/lib/ruby_llm/providers/mistral/embeddings.rb +0 -3
data/lib/ruby_llm/providers/mistral/models.rb +0 -1
data/lib/ruby_llm/providers/ollama/chat.rb +0 -1
data/lib/ruby_llm/providers/ollama/media.rb +0 -1
data/lib/ruby_llm/providers/openai/capabilities.rb +2 -17
data/lib/ruby_llm/providers/openai/chat.rb +0 -3
data/lib/ruby_llm/providers/openai/embeddings.rb +0 -3
data/lib/ruby_llm/providers/openai/media.rb +0 -1
data/lib/ruby_llm/providers/openai.rb +1 -3
data/lib/ruby_llm/providers/openrouter/models.rb +1 -16
data/lib/ruby_llm/providers/perplexity/capabilities.rb +0 -1
data/lib/ruby_llm/providers/perplexity/chat.rb +0 -1
data/lib/ruby_llm/providers/perplexity.rb +1 -5
data/lib/ruby_llm/railtie.rb +0 -1
data/lib/ruby_llm/stream_accumulator.rb +3 -5
data/lib/ruby_llm/streaming.rb +16 -25
data/lib/ruby_llm/tool.rb +2 -19
data/lib/ruby_llm/tool_call.rb +0 -9
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm.rb +0 -2
data/lib/tasks/aliases.rake +7 -37
data/lib/tasks/models_docs.rake +5 -15
data/lib/tasks/models_update.rake +1 -1
data/lib/tasks/vcr.rake +0 -7
metadata +1 -1

data/lib/ruby_llm/models.json CHANGED Viewed

@@ -33,31 +33,37 @@
   },
   {
     "id": "claude-3-5-sonnet-20240620",
-    "name": "Claude Sonnet 3.5",
+    "name": "Claude Sonnet 3.5 (Old)",
     "provider": "anthropic",
     "family": "claude-3-5-sonnet",
-    "created_at": null,
+    "created_at": "2024-06-20 00:00:00 UTC",
     "context_window": 200000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "text",
         "image",
-        "text"
+        "pdf"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "function_calling"
+      "streaming",
+      "function_calling",
+      "batch"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 3.0,
-          "cached_input_per_million": 3.75,
           "output_per_million": 15.0
+        },
+        "batch": {
+          "input_per_million": 1.5,
+          "output_per_million": 7.5
         }
       }
     },
@@ -65,31 +71,37 @@
   },
   {
     "id": "claude-3-5-sonnet-20241022",
-    "name": "Claude Sonnet 3.5",
+    "name": "Claude Sonnet 3.5 (New)",
     "provider": "anthropic",
     "family": "claude-3-5-sonnet",
-    "created_at": null,
+    "created_at": "2024-10-22 00:00:00 UTC",
     "context_window": 200000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "text",
         "image",
-        "text"
+        "pdf"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "function_calling"
+      "streaming",
+      "function_calling",
+      "batch"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 3.0,
-          "cached_input_per_million": 3.75,
           "output_per_million": 15.0
+        },
+        "batch": {
+          "input_per_million": 1.5,
+          "output_per_million": 7.5
         }
       }
     },
@@ -4127,7 +4139,7 @@
   },
   {
     "id": "imagen-3.0-generate-002",
-    "name": "Imagen 3.0 002 model",
+    "name": "Imagen 3.0",
     "provider": "gemini",
     "family": "imagen3",
     "created_at": null,
@@ -4156,6 +4168,37 @@
       ]
     }
   },
+  {
+    "id": "imagen-4.0-generate-001",
+    "name": "Imagen 4",
+    "provider": "gemini",
+    "family": "other",
+    "created_at": null,
+    "context_window": 480,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image",
+        "pdf"
+      ],
+      "output": [
+        "image"
+      ]
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version": "001",
+      "description": "Vertex served Imagen 4.0 model",
+      "supported_generation_methods": [
+        "predict"
+      ]
+    }
+  },
   {
     "id": "imagen-4.0-generate-preview-06-06",
     "name": "Imagen 4 (Preview)",
@@ -4302,46 +4345,6 @@
       ]
     }
   },
-  {
-    "id": "veo-2.0-generate-001",
-    "name": "Veo 2",
-    "provider": "gemini",
-    "family": "other",
-    "created_at": null,
-    "context_window": 480,
-    "max_output_tokens": 8192,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.075,
-          "output_per_million": 0.3
-        },
-        "batch": {
-          "input_per_million": 0.0375,
-          "output_per_million": 0.15
-        }
-      }
-    },
-    "metadata": {
-      "version": "2.0",
-      "description": "Vertex served Veo 2 model. Access to this model requires billing to be enabled on the associated Google Cloud Platform account. Please visit https://console.cloud.google.com/billing to enable it.",
-      "supported_generation_methods": [
-        "predictLongRunning"
-      ]
-    }
-  },
   {
     "id": "codestral-2411-rc5",
     "name": "Codestral",
@@ -6700,29 +6703,31 @@
     "id": "gpt-4",
     "name": "GPT-4",
     "provider": "openai",
-    "family": "gpt4",
-    "created_at": "2023-06-27 18:13:31 +0200",
+    "family": "gpt-4",
+    "created_at": null,
     "context_window": 8192,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image",
-        "pdf"
+        "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
-      "function_calling"
+      "batch"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 10.0,
+          "input_per_million": 30.0,
+          "output_per_million": 60.0
+        },
+        "batch": {
+          "input_per_million": 15.0,
           "output_per_million": 30.0
         }
       }
@@ -6767,29 +6772,34 @@
   },
   {
     "id": "gpt-4-0613",
-    "name": "GPT-4 0613",
+    "name": "GPT-4",
     "provider": "openai",
-    "family": "other",
-    "created_at": "2023-06-12 18:54:56 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "family": "gpt-4",
+    "created_at": null,
+    "context_window": 8192,
+    "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
         "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming"
+      "batch"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 30.0,
+          "output_per_million": 60.0
+        },
+        "batch": {
+          "input_per_million": 15.0,
+          "output_per_million": 30.0
         }
       }
     },
@@ -6915,25 +6925,21 @@
     "id": "gpt-4-turbo-preview",
     "name": "GPT-4 Turbo Preview",
     "provider": "openai",
-    "family": "gpt4_turbo",
-    "created_at": "2024-01-23 20:22:57 +0100",
+    "family": "gpt-4-turbo-preview",
+    "created_at": null,
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image",
-        "pdf"
+        "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
-    "capabilities": [
-      "streaming",
-      "function_calling"
-    ],
+    "capabilities": [],
     "pricing": {
       "text_tokens": {
         "standard": {
@@ -6951,23 +6957,23 @@
     "id": "gpt-4.1",
     "name": "GPT-4.1",
     "provider": "openai",
-    "family": "gpt41",
-    "created_at": "2025-04-10 22:22:22 +0200",
+    "family": "gpt-4.1",
+    "created_at": null,
     "context_window": 1047576,
     "max_output_tokens": 32768,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
+      "batch",
       "function_calling",
       "structured_output"
     ],
@@ -6975,8 +6981,12 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 2.0,
-          "output_per_million": 8.0,
-          "cached_input_per_million": 0.5
+          "cached_input_per_million": 0.5,
+          "output_per_million": 8.0
+        },
+        "batch": {
+          "input_per_million": 1.0,
+          "output_per_million": 4.0
         }
       }
     },
@@ -6987,25 +6997,25 @@
   },
   {
     "id": "gpt-4.1-2025-04-14",
-    "name": "GPT-4.1 20250414",
+    "name": "GPT-4.1",
     "provider": "openai",
-    "family": "gpt41",
-    "created_at": "2025-04-10 22:09:06 +0200",
+    "family": "gpt-4.1",
+    "created_at": null,
     "context_window": 1047576,
     "max_output_tokens": 32768,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
+      "batch",
       "function_calling",
       "structured_output"
     ],
@@ -7013,8 +7023,12 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 2.0,
-          "output_per_million": 8.0,
-          "cached_input_per_million": 0.5
+          "cached_input_per_million": 0.5,
+          "output_per_million": 8.0
+        },
+        "batch": {
+          "input_per_million": 1.0,
+          "output_per_million": 4.0
         }
       }
     },
@@ -8296,30 +8310,38 @@
   },
   {
     "id": "gpt-5-mini",
-    "name": "GPT-5 Mini",
+    "name": "GPT-5 mini",
     "provider": "openai",
-    "family": "other",
-    "created_at": "2025-08-05 22:32:08 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "family": "gpt-5-mini",
+    "created_at": null,
+    "context_window": 400000,
+    "max_output_tokens": 128000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
-      "reasoning"
+      "batch",
+      "function_calling",
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 0.25,
+          "cached_input_per_million": 0.025,
+          "output_per_million": 2.0
+        },
+        "batch": {
+          "input_per_million": 0.125,
+          "output_per_million": 1.0
         }
       }
     },
@@ -8330,30 +8352,38 @@
   },
   {
     "id": "gpt-5-mini-2025-08-07",
-    "name": "GPT-5 Mini 20250807",
+    "name": "GPT-5 mini",
     "provider": "openai",
-    "family": "other",
-    "created_at": "2025-08-05 22:31:07 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "family": "gpt-5-mini",
+    "created_at": null,
+    "context_window": 400000,
+    "max_output_tokens": 128000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
-      "reasoning"
+      "batch",
+      "function_calling",
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 0.25,
+          "cached_input_per_million": 0.025,
+          "output_per_million": 2.0
+        },
+        "batch": {
+          "input_per_million": 0.125,
+          "output_per_million": 1.0
         }
       }
     },
@@ -8364,30 +8394,38 @@
   },
   {
     "id": "gpt-5-nano",
-    "name": "GPT-5 Nano",
+    "name": "GPT-5 nano",
     "provider": "openai",
-    "family": "other",
-    "created_at": "2025-08-05 22:39:44 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "family": "gpt-5-nano",
+    "created_at": null,
+    "context_window": 400000,
+    "max_output_tokens": 128000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
-      "reasoning"
+      "batch",
+      "function_calling",
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 0.05,
+          "cached_input_per_million": 0.005,
+          "output_per_million": 0.4
+        },
+        "batch": {
+          "input_per_million": 0.025,
+          "output_per_million": 0.2
         }
       }
     },
@@ -8398,30 +8436,38 @@
   },
   {
     "id": "gpt-5-nano-2025-08-07",
-    "name": "GPT-5 Nano 20250807",
+    "name": "GPT-5 nano",
     "provider": "openai",
-    "family": "other",
-    "created_at": "2025-08-05 22:38:23 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "family": "gpt-5-nano",
+    "created_at": null,
+    "context_window": 400000,
+    "max_output_tokens": 128000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
-      "reasoning"
+      "batch",
+      "function_calling",
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 0.05,
+          "cached_input_per_million": 0.005,
+          "output_per_million": 0.4
+        },
+        "batch": {
+          "input_per_million": 0.025,
+          "output_per_million": 0.2
         }
       }
     },
@@ -8491,35 +8537,65 @@
     "metadata": {}
   },
   {
-    "id": "o1",
-    "name": "O1",
+    "id": "gpt-oss-20b",
+    "name": "gpt-oss-20b",
     "provider": "openai",
-    "family": "o1",
-    "created_at": "2024-12-16 20:03:36 +0100",
-    "context_window": 200000,
-    "max_output_tokens": 100000,
+    "family": "gpt-oss-20b",
+    "created_at": null,
+    "context_window": 131072,
+    "max_output_tokens": 131072,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image",
-        "pdf"
+        "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
+      "batch",
       "function_calling",
-      "structured_output",
-      "reasoning"
+      "structured_output"
+    ],
+    "pricing": {},
+    "metadata": {}
+  },
+  {
+    "id": "o1",
+    "name": "o1",
+    "provider": "openai",
+    "family": "o1",
+    "created_at": null,
+    "context_window": 200000,
+    "max_output_tokens": 100000,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "image",
+        "text"
+      ],
+      "output": [
+        "embeddings",
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 15.0,
+          "cached_input_per_million": 7.5,
           "output_per_million": 60.0
+        },
+        "batch": {
+          "input_per_million": 7.5,
+          "output_per_million": 30.0
         }
       }
     },
@@ -8530,34 +8606,38 @@
   },
   {
     "id": "o1-2024-12-17",
-    "name": "O1-20241217",
+    "name": "o1",
     "provider": "openai",
     "family": "o1",
-    "created_at": "2024-12-16 06:29:36 +0100",
+    "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 100000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
+      "batch",
       "function_calling",
-      "structured_output",
-      "reasoning"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 15.0,
+          "cached_input_per_million": 7.5,
           "output_per_million": 60.0
+        },
+        "batch": {
+          "input_per_million": 7.5,
+          "output_per_million": 30.0
         }
       }
     },
@@ -8632,6 +8712,72 @@
       "owned_by": "system"
     }
   },
+  {
+    "id": "o1-preview",
+    "name": "o1 Preview",
+    "provider": "openai",
+    "family": "o1-preview",
+    "created_at": null,
+    "context_window": 128000,
+    "max_output_tokens": 32768,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "embeddings",
+        "text"
+      ]
+    },
+    "capabilities": [
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 15.0,
+          "cached_input_per_million": 7.5,
+          "output_per_million": 60.0
+        }
+      }
+    },
+    "metadata": {}
+  },
+  {
+    "id": "o1-preview-2024-09-12",
+    "name": "o1 Preview",
+    "provider": "openai",
+    "family": "o1-preview",
+    "created_at": null,
+    "context_window": 128000,
+    "max_output_tokens": 32768,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "embeddings",
+        "text"
+      ]
+    },
+    "capabilities": [
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 15.0,
+          "cached_input_per_million": 7.5,
+          "output_per_million": 60.0
+        }
+      }
+    },
+    "metadata": {}
+  },
   {
     "id": "o1-pro",
     "name": "o1-pro",
@@ -9235,22 +9381,21 @@
   },
   {
     "id": "omni-moderation-latest",
-    "name": "Omni Moderation Latest",
+    "name": "omni-moderation",
     "provider": "openai",
-    "family": "moderation",
-    "created_at": "2024-11-15 17:47:45 +0100",
+    "family": "omni-moderation-latest",
+    "created_at": null,
     "context_window": null,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
-        "text",
-        "moderation"
+        "embeddings",
+        "text"
       ]
     },
     "capabilities": [],
@@ -9262,10 +9407,10 @@
   },
   {
     "id": "text-embedding-3-large",
-    "name": "text-embedding- 3 Large",
+    "name": "text-embedding-3-large",
     "provider": "openai",
-    "family": "embedding3_large",
-    "created_at": "2024-01-22 20:53:00 +0100",
+    "family": "text-embedding-3-large",
+    "created_at": null,
     "context_window": null,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -9274,8 +9419,8 @@
         "text"
       ],
       "output": [
-        "text",
-        "embeddings"
+        "embeddings",
+        "text"
       ]
     },
     "capabilities": [
@@ -9284,12 +9429,18 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.13,
-          "output_per_million": 0.13
+          "input_per_million": 0.13
         },
         "batch": {
-          "input_per_million": 0.065,
-          "output_per_million": 0.065
+          "input_per_million": 0.065
+        }
+      },
+      "embeddings": {
+        "standard": {
+          "input_per_million": 0.13
+        },
+        "batch": {
+          "input_per_million": 0.065
         }
       }
     },
@@ -9344,10 +9495,10 @@
   },
   {
     "id": "text-embedding-ada-002",
-    "name": "text-embedding- Ada 002",
+    "name": "text-embedding-ada-002",
     "provider": "openai",
-    "family": "embedding_ada",
-    "created_at": "2022-12-16 20:01:39 +0100",
+    "family": "text-embedding-ada-002",
+    "created_at": null,
     "context_window": null,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -9356,8 +9507,8 @@
         "text"
       ],
       "output": [
-        "text",
-        "embeddings"
+        "embeddings",
+        "text"
       ]
     },
     "capabilities": [
@@ -9366,12 +9517,18 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.1,
-          "output_per_million": 0.1
+          "input_per_million": 0.1
         },
         "batch": {
-          "input_per_million": 0.05,
-          "output_per_million": 0.05
+          "input_per_million": 0.05
+        }
+      },
+      "embeddings": {
+        "standard": {
+          "input_per_million": 0.1
+        },
+        "batch": {
+          "input_per_million": 0.05
         }
       }
     },
@@ -9428,29 +9585,25 @@
     "id": "tts-1",
     "name": "TTS-1",
     "provider": "openai",
-    "family": "tts1",
-    "created_at": "2023-04-19 23:49:11 +0200",
+    "family": "tts-1",
+    "created_at": null,
     "context_window": null,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "audio"
+        "text"
       ],
       "output": [
-        "text",
-        "audio"
+        "audio",
+        "embeddings"
       ]
     },
-    "capabilities": [
-      "streaming"
-    ],
+    "capabilities": [],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 15.0,
-          "output_per_million": 15.0
+          "input_per_million": 15.0
         }
       }
     },
@@ -10091,8 +10244,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 9.0,
-          "output_per_million": 11.0
+          "input_per_million": 5.0,
+          "output_per_million": 6.25
         }
       }
     },
@@ -10398,8 +10551,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 2.5,
-          "output_per_million": 3.0
+          "input_per_million": 1.25,
+          "output_per_million": 1.5
         }
       }
     },
@@ -10882,70 +11035,6 @@
       ]
     }
   },
-  {
-    "id": "anthropic/claude-3.7-sonnet:beta",
-    "name": "Anthropic: Claude 3.7 Sonnet (self-moderated)",
-    "provider": "openrouter",
-    "family": "anthropic",
-    "created_at": "2025-02-24 19:35:10 +0100",
-    "context_window": 200000,
-    "max_output_tokens": 128000,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image",
-        "file"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "function_calling"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 3.0,
-          "output_per_million": 15.0,
-          "cached_input_per_million": 0.3
-        }
-      }
-    },
-    "metadata": {
-      "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
-      "architecture": {
-        "modality": "text+image->text",
-        "input_modalities": [
-          "text",
-          "image",
-          "file"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Claude",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 200000,
-        "max_completion_tokens": 128000,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "include_reasoning",
-        "max_tokens",
-        "reasoning",
-        "stop",
-        "temperature",
-        "tool_choice",
-        "tools"
-      ]
-    }
-  },
   {
     "id": "anthropic/claude-3.7-sonnet:thinking",
     "name": "Anthropic: Claude 3.7 Sonnet (thinking)",
@@ -11586,6 +11675,69 @@
       ]
     }
   },
+  {
+    "id": "baidu/ernie-4.5-21b-a3b",
+    "name": "Baidu: ERNIE 4.5 21B A3B",
+    "provider": "openrouter",
+    "family": "baidu",
+    "created_at": "2025-08-12 23:29:27 +0200",
+    "context_window": 120000,
+    "max_output_tokens": 8000,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.07,
+          "output_per_million": 0.28
+        }
+      }
+    },
+    "metadata": {
+      "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 120000,
+        "max_completion_tokens": 8000,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "seed",
+        "stop",
+        "temperature",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
   {
     "id": "baidu/ernie-4.5-300b-a47b",
     "name": "Baidu: ERNIE 4.5 300B A47B ",
@@ -11649,6 +11801,140 @@
       ]
     }
   },
+  {
+    "id": "baidu/ernie-4.5-vl-28b-a3b",
+    "name": "Baidu: ERNIE 4.5 VL 28B A3B",
+    "provider": "openrouter",
+    "family": "baidu",
+    "created_at": "2025-08-12 23:07:16 +0200",
+    "context_window": 30000,
+    "max_output_tokens": 8000,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.14,
+          "output_per_million": 0.56
+        }
+      }
+    },
+    "metadata": {
+      "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "text",
+          "image"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 30000,
+        "max_completion_tokens": 8000,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "include_reasoning",
+        "logit_bias",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "reasoning",
+        "repetition_penalty",
+        "seed",
+        "stop",
+        "temperature",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "baidu/ernie-4.5-vl-424b-a47b",
+    "name": "Baidu: ERNIE 4.5 VL 424B A47B ",
+    "provider": "openrouter",
+    "family": "baidu",
+    "created_at": "2025-06-30 18:28:23 +0200",
+    "context_window": 123000,
+    "max_output_tokens": 16000,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.42,
+          "output_per_million": 1.25
+        }
+      }
+    },
+    "metadata": {
+      "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "image",
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 123000,
+        "max_completion_tokens": 16000,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "include_reasoning",
+        "logit_bias",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "reasoning",
+        "repetition_penalty",
+        "seed",
+        "stop",
+        "temperature",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
   {
     "id": "bytedance/ui-tars-1.5-7b",
     "name": "Bytedance: UI-TARS 7B ",
@@ -14068,7 +14354,8 @@
       "input": [
         "text",
         "image",
-        "file"
+        "file",
+        "audio"
       ],
       "output": [
         "text"
@@ -14094,7 +14381,8 @@
         "input_modalities": [
           "text",
           "image",
-          "file"
+          "file",
+          "audio"
         ],
         "output_modalities": [
           "text"
@@ -14254,6 +14542,7 @@
         "reasoning",
         "response_format",
         "seed",
+        "stop",
         "structured_outputs",
         "temperature",
         "tool_choice",
@@ -14275,7 +14564,8 @@
       "input": [
         "file",
         "image",
-        "text"
+        "text",
+        "audio"
       ],
       "output": [
         "text"
@@ -14302,7 +14592,8 @@
         "input_modalities": [
           "file",
           "image",
-          "text"
+          "text",
+          "audio"
         ],
         "output_modalities": [
           "text"
@@ -14344,7 +14635,8 @@
       "input": [
         "file",
         "image",
-        "text"
+        "text",
+        "audio"
       ],
       "output": [
         "text"
@@ -14371,7 +14663,8 @@
         "input_modalities": [
           "file",
           "image",
-          "text"
+          "text",
+          "audio"
         ],
         "output_modalities": [
           "text"
@@ -14472,7 +14765,8 @@
       "input": [
         "file",
         "image",
-        "text"
+        "text",
+        "audio"
       ],
       "output": [
         "text"
@@ -14499,7 +14793,8 @@
         "input_modalities": [
           "file",
           "image",
-          "text"
+          "text",
+          "audio"
         ],
         "output_modalities": [
           "text"
@@ -14541,7 +14836,8 @@
       "input": [
         "text",
         "image",
-        "file"
+        "file",
+        "audio"
       ],
       "output": [
         "text"
@@ -14568,7 +14864,8 @@
         "input_modalities": [
           "text",
           "image",
-          "file"
+          "file",
+          "audio"
         ],
         "output_modalities": [
           "text"
@@ -15062,7 +15359,7 @@
     "provider": "openrouter",
     "family": "google",
     "created_at": "2025-03-13 22:50:25 +0100",
-    "context_window": 96000,
+    "context_window": 32768,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
@@ -15076,7 +15373,6 @@
     },
     "capabilities": [
       "streaming",
-      "structured_output",
       "predicted_outputs"
     ],
     "pricing": {},
@@ -15095,7 +15391,7 @@
         "instruct_type": "gemma"
       },
       "top_provider": {
-        "context_length": 96000,
+        "context_length": 32768,
         "max_completion_tokens": 8192,
         "is_moderated": false
       },
@@ -15108,10 +15404,8 @@
         "min_p",
         "presence_penalty",
         "repetition_penalty",
-        "response_format",
         "seed",
         "stop",
-        "structured_outputs",
         "temperature",
         "top_k",
         "top_logprobs",
@@ -16122,8 +16416,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 1.5,
-          "output_per_million": 1.5
+          "input_per_million": 0.75,
+          "output_per_million": 0.75
         }
       }
     },
@@ -16285,7 +16579,6 @@
       "supported_parameters": [
         "frequency_penalty",
         "logit_bias",
-        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -16297,7 +16590,6 @@
         "tool_choice",
         "tools",
         "top_k",
-        "top_logprobs",
         "top_p"
       ]
     }
@@ -17474,7 +17766,6 @@
       "supported_parameters": [
         "frequency_penalty",
         "logit_bias",
-        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -17484,7 +17775,6 @@
         "stop",
         "temperature",
         "top_k",
-        "top_logprobs",
         "top_p"
       ]
     }
@@ -19025,13 +19315,13 @@
     }
   },
   {
-    "id": "mistralai/mistral-7b-instruct-v0.2",
-    "name": "Mistral: Mistral 7B Instruct v0.2",
+    "id": "mistralai/mistral-7b-instruct-v0.3",
+    "name": "Mistral: Mistral 7B Instruct v0.3",
     "provider": "openrouter",
     "family": "mistralai",
-    "created_at": "2023-12-28 01:00:00 +0100",
+    "created_at": "2024-05-27 02:00:00 +0200",
     "context_window": 32768,
-    "max_output_tokens": null,
+    "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -19043,18 +19333,20 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.19999999999999998,
-          "output_per_million": 0.19999999999999998
+          "input_per_million": 0.028,
+          "output_per_million": 0.054
         }
       }
     },
     "metadata": {
-      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention",
+      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -19068,27 +19360,32 @@
       },
       "top_provider": {
         "context_length": 32768,
-        "max_completion_tokens": null,
+        "max_completion_tokens": 16384,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
         "logit_bias",
+        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
         "repetition_penalty",
+        "response_format",
+        "seed",
         "stop",
         "temperature",
+        "tool_choice",
+        "tools",
         "top_k",
         "top_p"
       ]
     }
   },
   {
-    "id": "mistralai/mistral-7b-instruct-v0.3",
-    "name": "Mistral: Mistral 7B Instruct v0.3",
+    "id": "mistralai/mistral-7b-instruct:free",
+    "name": "Mistral: Mistral 7B Instruct (free)",
     "provider": "openrouter",
     "family": "mistralai",
     "created_at": "2024-05-27 02:00:00 +0200",
@@ -19106,19 +19403,11 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output",
-      "predicted_outputs"
+      "structured_output"
     ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.028,
-          "output_per_million": 0.054
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
-      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.",
+      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -19138,8 +19427,6 @@
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
-        "logit_bias",
-        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -19156,13 +19443,13 @@
     }
   },
   {
-    "id": "mistralai/mistral-7b-instruct:free",
-    "name": "Mistral: Mistral 7B Instruct (free)",
+    "id": "mistralai/mistral-large",
+    "name": "Mistral Large",
     "provider": "openrouter",
     "family": "mistralai",
-    "created_at": "2024-05-27 02:00:00 +0200",
-    "context_window": 32768,
-    "max_output_tokens": 16384,
+    "created_at": "2024-02-26 01:00:00 +0100",
+    "context_window": 128000,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -19177,9 +19464,16 @@
       "function_calling",
       "structured_output"
     ],
-    "pricing": {},
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 2.0,
+          "output_per_million": 6.0
+        }
+      }
+    },
     "metadata": {
-      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*",
+      "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -19189,38 +19483,36 @@
           "text"
         ],
         "tokenizer": "Mistral",
-        "instruct_type": "mistral"
+        "instruct_type": null
       },
       "top_provider": {
-        "context_length": 32768,
-        "max_completion_tokens": 16384,
+        "context_length": 128000,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
         "max_tokens",
-        "min_p",
         "presence_penalty",
-        "repetition_penalty",
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
-        "top_k",
         "top_p"
       ]
     }
   },
   {
-    "id": "mistralai/mistral-large",
-    "name": "Mistral Large",
+    "id": "mistralai/mistral-large-2407",
+    "name": "Mistral Large 2407",
     "provider": "openrouter",
     "family": "mistralai",
-    "created_at": "2024-02-26 01:00:00 +0100",
-    "context_window": 128000,
+    "created_at": "2024-11-19 02:06:55 +0100",
+    "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -19245,7 +19537,7 @@
       }
     },
     "metadata": {
-      "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.",
+      "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -19258,7 +19550,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 128000,
+        "context_length": 131072,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -19279,11 +19571,11 @@
     }
   },
   {
-    "id": "mistralai/mistral-large-2407",
-    "name": "Mistral Large 2407",
+    "id": "mistralai/mistral-large-2411",
+    "name": "Mistral Large 2411",
     "provider": "openrouter",
     "family": "mistralai",
-    "created_at": "2024-11-19 02:06:55 +0100",
+    "created_at": "2024-11-19 02:11:25 +0100",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -19309,7 +19601,7 @@
       }
     },
     "metadata": {
-      "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n",
+      "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -19343,17 +19635,18 @@
     }
   },
   {
-    "id": "mistralai/mistral-large-2411",
-    "name": "Mistral Large 2411",
+    "id": "mistralai/mistral-medium-3",
+    "name": "Mistral: Mistral Medium 3",
     "provider": "openrouter",
     "family": "mistralai",
-    "created_at": "2024-11-19 02:11:25 +0100",
+    "created_at": "2025-05-07 16:15:41 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image"
       ],
       "output": [
         "text"
@@ -19367,17 +19660,18 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 2.0,
-          "output_per_million": 6.0
+          "input_per_million": 0.39999999999999997,
+          "output_per_million": 2.0
         }
       }
     },
     "metadata": {
-      "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.",
+      "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.",
       "architecture": {
-        "modality": "text->text",
+        "modality": "text+image->text",
         "input_modalities": [
-          "text"
+          "text",
+          "image"
         ],
         "output_modalities": [
           "text"
@@ -19407,12 +19701,12 @@
     }
   },
   {
-    "id": "mistralai/mistral-medium-3",
-    "name": "Mistral: Mistral Medium 3",
+    "id": "mistralai/mistral-medium-3.1",
+    "name": "Mistral: Mistral Medium 3.1",
     "provider": "openrouter",
     "family": "mistralai",
-    "created_at": "2025-05-07 16:15:41 +0200",
-    "context_window": 131072,
+    "created_at": "2025-08-13 16:33:59 +0200",
+    "context_window": 262144,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -19438,7 +19732,7 @@
       }
     },
     "metadata": {
-      "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.",
+      "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3.1 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
@@ -19452,7 +19746,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 262144,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -20003,7 +20297,7 @@
     "provider": "openrouter",
     "family": "mistralai",
     "created_at": "2025-06-20 20:10:16 +0200",
-    "context_window": 131072,
+    "context_window": 128000,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -20024,8 +20318,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01999188,
-          "output_per_million": 0.0800064
+          "input_per_million": 0.049999999999999996,
+          "output_per_million": 0.09999999999999999
         }
       }
     },
@@ -20044,7 +20338,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 128000,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -20973,8 +21267,8 @@
     "provider": "openrouter",
     "family": "neversleep",
     "created_at": "2024-09-15 02:00:00 +0200",
-    "context_window": 40000,
-    "max_output_tokens": 40000,
+    "context_window": 32768,
+    "max_output_tokens": 2048,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -20992,8 +21286,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.16999999999999998,
-          "output_per_million": 0.9900000000000001
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.625
         }
       }
     },
@@ -21011,8 +21305,8 @@
         "instruct_type": "llama3"
       },
       "top_provider": {
-        "context_length": 40000,
-        "max_completion_tokens": 40000,
+        "context_length": 32768,
+        "max_completion_tokens": 2048,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -21040,8 +21334,8 @@
     "provider": "openrouter",
     "family": "neversleep",
     "created_at": "2023-11-26 01:00:00 +0100",
-    "context_window": 4096,
-    "max_output_tokens": null,
+    "context_window": 8192,
+    "max_output_tokens": 2048,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -21059,8 +21353,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 1.0,
-          "output_per_million": 1.75
+          "input_per_million": 0.625,
+          "output_per_million": 1.0
         }
       }
     },
@@ -21078,8 +21372,8 @@
         "instruct_type": "alpaca"
       },
       "top_provider": {
-        "context_length": 4096,
-        "max_completion_tokens": null,
+        "context_length": 8192,
+        "max_completion_tokens": 2048,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -23588,8 +23882,8 @@
     "provider": "openrouter",
     "family": "openai",
     "created_at": "2025-08-05 19:17:11 +0200",
-    "context_window": 131072,
-    "max_output_tokens": null,
+    "context_window": 131000,
+    "max_output_tokens": 131000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -23608,8 +23902,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.07256312,
-          "output_per_million": 0.2903936
+          "input_per_million": 0.072,
+          "output_per_million": 0.28
         }
       }
     },
@@ -23627,8 +23921,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
-        "max_completion_tokens": null,
+        "context_length": 131000,
+        "max_completion_tokens": 131000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -23661,8 +23955,8 @@
     "provider": "openrouter",
     "family": "openai",
     "created_at": "2025-08-05 19:17:09 +0200",
-    "context_window": 131072,
-    "max_output_tokens": null,
+    "context_window": 131000,
+    "max_output_tokens": 131000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -23682,7 +23976,7 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 0.04,
-          "output_per_million": 0.16
+          "output_per_million": 0.15
         }
       }
     },
@@ -23700,8 +23994,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
-        "max_completion_tokens": null,
+        "context_length": 131000,
+        "max_completion_tokens": 131000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -28323,69 +28617,6 @@
       ]
     }
   },
-  {
-    "id": "thedrummer/valkyrie-49b-v1",
-    "name": "TheDrummer: Valkyrie 49B V1",
-    "provider": "openrouter",
-    "family": "thedrummer",
-    "created_at": "2025-05-23 19:51:10 +0200",
-    "context_window": 131072,
-    "max_output_tokens": 131072,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.65,
-          "output_per_million": 1.0
-        }
-      }
-    },
-    "metadata": {
-      "description": "Built on top of NVIDIA's Llama 3.3 Nemotron Super 49B, Valkyrie is TheDrummer's newest model drop for creative writing.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Other",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 131072,
-        "max_completion_tokens": 131072,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "include_reasoning",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "reasoning",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
   {
     "id": "thudm/glm-4-32b",
     "name": "THUDM: GLM 4 32B",
@@ -28777,7 +29008,7 @@
     "family": "undi95",
     "created_at": "2023-07-22 02:00:00 +0200",
     "context_window": 6144,
-    "max_output_tokens": null,
+    "max_output_tokens": 1024,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -28795,8 +29026,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.7,
-          "output_per_million": 1.0
+          "input_per_million": 0.5,
+          "output_per_million": 0.75
         }
       }
     },
@@ -28815,7 +29046,7 @@
       },
       "top_provider": {
         "context_length": 6144,
-        "max_completion_tokens": null,
+        "max_completion_tokens": 1024,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -29427,7 +29658,7 @@
     "provider": "openrouter",
     "family": "z-ai",
     "created_at": "2025-07-25 21:22:27 +0200",
-    "context_window": 98304,
+    "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -29466,7 +29697,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 98304,
+        "context_length": 131072,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -29554,6 +29785,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -29646,13 +29878,14 @@
     "capabilities": [
       "streaming",
       "function_calling",
+      "structured_output",
       "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 0.5,
-          "output_per_million": 1.7999999999999998
+          "output_per_million": 1.7
         }
       }
     },
@@ -29685,6 +29918,7 @@
         "presence_penalty",
         "reasoning",
         "repetition_penalty",
+        "response_format",
         "seed",
         "stop",
         "temperature",
@@ -29700,7 +29934,7 @@
     "name": "Sonar",
     "provider": "perplexity",
     "family": "sonar",
-    "created_at": "2025-08-13 11:35:41 +0200",
+    "created_at": "2025-08-19 12:48:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -29732,7 +29966,7 @@
     "name": "Sonar Deep Research",
     "provider": "perplexity",
     "family": "sonar_deep_research",
-    "created_at": "2025-08-13 11:35:41 +0200",
+    "created_at": "2025-08-19 12:48:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -29767,7 +30001,7 @@
     "name": "Sonar Pro",
     "provider": "perplexity",
     "family": "sonar_pro",
-    "created_at": "2025-08-13 11:35:41 +0200",
+    "created_at": "2025-08-19 12:48:52 +0200",
     "context_window": 200000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
@@ -29799,7 +30033,7 @@
     "name": "Sonar Reasoning",
     "provider": "perplexity",
     "family": "sonar_reasoning",
-    "created_at": "2025-08-13 11:35:41 +0200",
+    "created_at": "2025-08-19 12:48:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -29831,7 +30065,7 @@
     "name": "Sonar Reasoning Pro",
     "provider": "perplexity",
     "family": "sonar_reasoning_pro",
-    "created_at": "2025-08-13 11:35:41 +0200",
+    "created_at": "2025-08-19 12:48:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,