RubyGems - ruby_llm - Versions diffs - 1.3.0 → 1.3.1 - Mend

ruby_llm 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/README.md +13 -9
data/lib/ruby_llm/active_record/acts_as.rb +1 -0
data/lib/ruby_llm/aliases.json +8 -0
data/lib/ruby_llm/configuration.rb +1 -0
data/lib/ruby_llm/models.json +979 -839
data/lib/ruby_llm/providers/anthropic/tools.rb +5 -4
data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +3 -3
data/lib/ruby_llm/providers/openai/capabilities.rb +4 -1
data/lib/ruby_llm/providers/openai/chat.rb +12 -8
data/lib/ruby_llm/tool.rb +8 -8
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm.rb +1 -1
data/lib/tasks/models_docs.rake +13 -7
metadata +1 -1

data/lib/ruby_llm/models.json CHANGED Viewed

@@ -71,7 +71,7 @@
     "id": "claude-3-5-haiku-20241022",
     "name": "Claude Haiku 3.5",
     "provider": "anthropic",
-    "family": "claude-haiku-3-5",
+    "family": "claude-haiku-3.5",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 8192,
@@ -103,7 +103,7 @@
     "id": "claude-3-5-sonnet-20240620",
     "name": "Claude Sonnet 3.5",
     "provider": "anthropic",
-    "family": "claude-sonnet-3-5",
+    "family": "claude-sonnet-3.5",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 8192,
@@ -135,7 +135,7 @@
     "id": "claude-3-5-sonnet-20241022",
     "name": "Claude Sonnet 3.5",
     "provider": "anthropic",
-    "family": "claude-sonnet-3-5",
+    "family": "claude-sonnet-3.5",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 8192,
@@ -167,7 +167,7 @@
     "id": "claude-3-7-sonnet-20250219",
     "name": "Claude Sonnet 3.7",
     "provider": "anthropic",
-    "family": "claude-sonnet-3-7",
+    "family": "claude-sonnet-3.7",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 64000,
@@ -1959,7 +1959,7 @@
   },
   {
     "id": "aqa",
-    "name": "Model that performs Attributed Question Answering.",
+    "name": "AQA",
     "provider": "gemini",
     "family": "aqa",
     "created_at": null,
@@ -1974,9 +1974,7 @@
         "text"
       ]
     },
-    "capabilities": [
-      "streaming"
-    ],
+    "capabilities": [],
     "pricing": {},
     "metadata": {
       "version": "001",
@@ -1988,33 +1986,23 @@
   },
   {
     "id": "embedding-001",
-    "name": "Embedding 001",
+    "name": "Embedding",
     "provider": "gemini",
-    "family": "embedding1",
+    "family": "embedding-001",
     "created_at": null,
     "context_window": 2048,
-    "max_output_tokens": 1,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
         "text"
       ],
       "output": [
-        "text",
         "embeddings"
       ]
     },
-    "capabilities": [
-      "streaming",
-      "batch"
-    ],
-    "pricing": {
-      "embeddings": {
-        "standard": {
-          "input_per_million": 0.002
-        }
-      }
-    },
+    "capabilities": [],
+    "pricing": {},
     "metadata": {
       "version": "001",
       "description": "Obtain a distributed representation of a text.",
@@ -2150,40 +2138,33 @@
   },
   {
     "id": "gemini-1.5-flash-001",
-    "name": "Gemini 1.5 Flash 001",
+    "name": "Gemini 1.5 Flash",
     "provider": "gemini",
-    "family": "gemini15_flash",
+    "family": "gemini-1.5-flash",
     "created_at": null,
-    "context_window": 1000000,
+    "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "batch",
-      "caching",
-      "fine_tuning"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 0.075,
+          "cached_input_per_million": 0.01875,
           "output_per_million": 0.3
-        },
-        "batch": {
-          "input_per_million": 0.0375,
-          "output_per_million": 0.15
         }
       }
     },
@@ -2248,40 +2229,33 @@
   },
   {
     "id": "gemini-1.5-flash-002",
-    "name": "Gemini 1.5 Flash 002",
+    "name": "Gemini 1.5 Flash",
     "provider": "gemini",
-    "family": "gemini15_flash",
+    "family": "gemini-1.5-flash",
     "created_at": null,
-    "context_window": 1000000,
+    "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "batch",
-      "caching",
-      "fine_tuning"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 0.075,
+          "cached_input_per_million": 0.01875,
           "output_per_million": 0.3
-        },
-        "batch": {
-          "input_per_million": 0.0375,
-          "output_per_million": 0.15
         }
       }
     },
@@ -2339,40 +2313,33 @@
   },
   {
     "id": "gemini-1.5-flash-8b-001",
-    "name": "Gemini 1.5 Flash-8B 001",
+    "name": "Gemini 1.5 Flash-8B",
     "provider": "gemini",
-    "family": "gemini15_flash_8b",
+    "family": "gemini-1.5-flash-8b",
     "created_at": null,
-    "context_window": 1000000,
+    "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "batch",
-      "caching",
-      "fine_tuning"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0375,
-          "output_per_million": 0.15
-        },
-        "batch": {
-          "input_per_million": 0.01875,
-          "output_per_million": 0.075
+          "input_per_million": 0.075,
+          "cached_input_per_million": 0.01875,
+          "output_per_million": 0.3
         }
       }
     },
@@ -2429,6 +2396,7 @@
       "version": "001",
       "description": "Experimental release (August 27th, 2024) of Gemini 1.5 Flash-8B, our smallest and most cost effective Flash model. Replaced by Gemini-1.5-flash-8b-001 (stable).",
       "supported_generation_methods": [
+        "createCachedContent",
         "generateContent",
         "countTokens"
       ]
@@ -2477,6 +2445,7 @@
       "version": "001",
       "description": "Experimental release (September 24th, 2024) of Gemini 1.5 Flash-8B, our smallest and most cost effective Flash model. Replaced by Gemini-1.5-flash-8b-001 (stable).",
       "supported_generation_methods": [
+        "createCachedContent",
         "generateContent",
         "countTokens"
       ]
@@ -2484,40 +2453,33 @@
   },
   {
     "id": "gemini-1.5-flash-8b-latest",
-    "name": "Gemini 1.5 Flash-8B Latest",
+    "name": "Gemini 1.5 Flash-8B",
     "provider": "gemini",
-    "family": "gemini15_flash_8b",
+    "family": "gemini-1.5-flash-8b",
     "created_at": null,
-    "context_window": 1000000,
+    "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "batch",
-      "caching",
-      "fine_tuning"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0375,
-          "output_per_million": 0.15
-        },
-        "batch": {
-          "input_per_million": 0.01875,
-          "output_per_million": 0.075
+          "input_per_million": 0.075,
+          "cached_input_per_million": 0.01875,
+          "output_per_million": 0.3
         }
       }
     },
@@ -2533,40 +2495,33 @@
   },
   {
     "id": "gemini-1.5-flash-latest",
-    "name": "Gemini 1.5 Flash Latest",
+    "name": "Gemini 1.5 Flash",
     "provider": "gemini",
-    "family": "gemini15_flash",
+    "family": "gemini-1.5-flash",
     "created_at": null,
-    "context_window": 1000000,
+    "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "batch",
-      "caching",
-      "fine_tuning"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 0.075,
+          "cached_input_per_million": 0.01875,
           "output_per_million": 0.3
-        },
-        "batch": {
-          "input_per_million": 0.0375,
-          "output_per_million": 0.15
         }
       }
     },
@@ -2622,38 +2577,33 @@
   },
   {
     "id": "gemini-1.5-pro-001",
-    "name": "Gemini 1.5 Pro 001",
+    "name": "Gemini 1.5 Pro",
     "provider": "gemini",
-    "family": "gemini15_pro",
+    "family": "gemini-1.5-pro",
     "created_at": null,
-    "context_window": 2000000,
+    "context_window": 2097152,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "caching"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 1.25,
+          "cached_input_per_million": 0.3125,
           "output_per_million": 5.0
-        },
-        "batch": {
-          "input_per_million": 0.625,
-          "output_per_million": 2.5
         }
       }
     },
@@ -2669,38 +2619,33 @@
   },
   {
     "id": "gemini-1.5-pro-002",
-    "name": "Gemini 1.5 Pro 002",
+    "name": "Gemini 1.5 Pro",
     "provider": "gemini",
-    "family": "gemini15_pro",
+    "family": "gemini-1.5-pro",
     "created_at": null,
-    "context_window": 2000000,
+    "context_window": 2097152,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "caching"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 1.25,
+          "cached_input_per_million": 0.3125,
           "output_per_million": 5.0
-        },
-        "batch": {
-          "input_per_million": 0.625,
-          "output_per_million": 2.5
         }
       }
     },
@@ -2716,38 +2661,33 @@
   },
   {
     "id": "gemini-1.5-pro-latest",
-    "name": "Gemini 1.5 Pro Latest",
+    "name": "Gemini 1.5 Pro",
     "provider": "gemini",
-    "family": "gemini15_pro",
+    "family": "gemini-1.5-pro",
     "created_at": null,
-    "context_window": 2000000,
+    "context_window": 2097152,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "caching"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 1.25,
+          "cached_input_per_million": 0.3125,
           "output_per_million": 5.0
-        },
-        "batch": {
-          "input_per_million": 0.625,
-          "output_per_million": 2.5
         }
       }
     },
@@ -2805,39 +2745,33 @@
   },
   {
     "id": "gemini-2.0-flash-001",
-    "name": "Gemini 2.0 Flash 001",
+    "name": "Gemini 2.0 Flash",
     "provider": "gemini",
-    "family": "gemini20_flash",
+    "family": "gemini-2.0-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "batch",
-      "caching"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 0.1,
+          "cached_input_per_million": 0.025,
           "output_per_million": 0.4
-        },
-        "batch": {
-          "input_per_million": 0.05,
-          "output_per_million": 0.2
         }
       }
     },
@@ -2854,39 +2788,33 @@
   },
   {
     "id": "gemini-2.0-flash-exp",
-    "name": "Gemini 2.0 Flash Experimental",
+    "name": "Gemini 2.0 Flash",
     "provider": "gemini",
-    "family": "gemini20_flash",
+    "family": "gemini-2.0-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
       "function_calling",
-      "structured_output",
-      "batch",
-      "caching"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 0.1,
+          "cached_input_per_million": 0.025,
           "output_per_million": 0.4
-        },
-        "batch": {
-          "input_per_million": 0.05,
-          "output_per_million": 0.2
         }
       }
     },
@@ -2945,36 +2873,33 @@
   },
   {
     "id": "gemini-2.0-flash-lite-001",
-    "name": "Gemini 2.0 Flash-Lite 001",
+    "name": "Gemini 2.0 Flash-Lite",
     "provider": "gemini",
-    "family": "gemini20_flash_lite",
+    "family": "gemini-2.0-flash-lite",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
+        "audio",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
-      "batch"
+      "function_calling",
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.075,
-          "output_per_million": 0.3
-        },
-        "batch": {
-          "input_per_million": 0.0375,
-          "output_per_million": 0.15
+          "input_per_million": 0.1,
+          "cached_input_per_million": 0.025,
+          "output_per_million": 0.4
         }
       }
     },
@@ -3085,7 +3010,7 @@
     "id": "gemini-2.0-flash-live-001",
     "name": "Gemini 2.0 Flash Live",
     "provider": "gemini",
-    "family": "gemini-2.0-flash-live",
+    "family": "gemini-2.0-flash-live-001",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -3396,7 +3321,7 @@
     "id": "gemini-2.5-flash-exp-native-audio-thinking-dialog",
     "name": "Gemini 2.5 Flash Native Audio",
     "provider": "gemini",
-    "family": "gemini-2.5-flash-native-audio",
+    "family": "gemini-2.5-flash-preview-native-audio-dialog",
     "created_at": null,
     "context_window": 128000,
     "max_output_tokens": 8000,
@@ -3526,7 +3451,7 @@
     "id": "gemini-2.5-flash-preview-05-20",
     "name": "Gemini 2.5 Flash Preview 05-20",
     "provider": "gemini",
-    "family": "gemini-2.5-flash-preview",
+    "family": "gemini-2.5-flash-preview-05-20",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,
@@ -3569,7 +3494,7 @@
     "id": "gemini-2.5-flash-preview-native-audio-dialog",
     "name": "Gemini 2.5 Flash Native Audio",
     "provider": "gemini",
-    "family": "gemini-2.5-flash-native-audio",
+    "family": "gemini-2.5-flash-preview-native-audio-dialog",
     "created_at": null,
     "context_window": 128000,
     "max_output_tokens": 8000,
@@ -3599,7 +3524,7 @@
   },
   {
     "id": "gemini-2.5-flash-preview-tts",
-    "name": "Gemini 2.5 Flash Preview Text-to-Speech",
+    "name": "Gemini 2.5 Flash Preview TTS",
     "provider": "gemini",
     "family": "gemini-2.5-flash-preview-tts",
     "created_at": null,
@@ -3729,9 +3654,57 @@
   },
   {
     "id": "gemini-2.5-pro-preview-05-06",
+    "name": "Gemini 2.5 Pro Preview 05-06",
+    "provider": "gemini",
+    "family": "other",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 65536,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image",
+        "pdf"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "caching"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.075,
+          "output_per_million": 0.3
+        },
+        "batch": {
+          "input_per_million": 0.0375,
+          "output_per_million": 0.15
+        }
+      }
+    },
+    "metadata": {
+      "version": "2.5-preview-05-06",
+      "description": "Preview release (May 6th, 2025) of Gemini 2.5 Pro",
+      "supported_generation_methods": [
+        "generateContent",
+        "countTokens",
+        "createCachedContent",
+        "batchGenerateContent"
+      ]
+    }
+  },
+  {
+    "id": "gemini-2.5-pro-preview-06-05",
     "name": "Gemini 2.5 Pro Preview",
     "provider": "gemini",
-    "family": "gemini-2.5-pro-preview",
+    "family": "gemini-2.5-pro-preview-06-05",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,
@@ -3760,8 +3733,8 @@
       }
     },
     "metadata": {
-      "version": "2.5-preview-05-06",
-      "description": "Preview release (May 6th, 2025) of Gemini 2.5 Pro",
+      "version": "2.5-preview-06-05",
+      "description": "Preview release (June 5th, 2025) of Gemini 2.5 Pro",
       "supported_generation_methods": [
         "generateContent",
         "countTokens",
@@ -3772,7 +3745,7 @@
   },
   {
     "id": "gemini-2.5-pro-preview-tts",
-    "name": "Gemini 2.5 Pro Preview Text-to-Speech",
+    "name": "Gemini 2.5 Pro Preview TTS",
     "provider": "gemini",
     "family": "gemini-2.5-pro-preview-tts",
     "created_at": null,
@@ -3855,7 +3828,8 @@
       "description": "Obtain a distributed representation of a text.",
       "supported_generation_methods": [
         "embedContent",
-        "countTextTokens"
+        "countTextTokens",
+        "countTokens"
       ]
     }
   },
@@ -3863,7 +3837,7 @@
     "id": "gemini-embedding-exp-03-07",
     "name": "Gemini Embedding Experimental",
     "provider": "gemini",
-    "family": "gemini-embedding",
+    "family": "gemini-embedding-exp-03-07",
     "created_at": null,
     "context_window": 8192,
     "max_output_tokens": null,
@@ -3883,7 +3857,8 @@
       "description": "Obtain a distributed representation of a text.",
       "supported_generation_methods": [
         "embedContent",
-        "countTextTokens"
+        "countTextTokens",
+        "countTokens"
       ]
     }
   },
@@ -4190,7 +4165,7 @@
     "id": "imagen-3.0-generate-002",
     "name": "Imagen 3",
     "provider": "gemini",
-    "family": "imagen-3",
+    "family": "imagen-3.0-generate-002",
     "created_at": null,
     "context_window": null,
     "max_output_tokens": null,
@@ -4204,20 +4179,8 @@
       ]
     },
     "capabilities": [],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "output_per_million": 0.03
-        }
-      }
-    },
-    "metadata": {
-      "version": "002",
-      "description": "Vertex served Imagen 3.0 002 model",
-      "supported_generation_methods": [
-        "predict"
-      ]
-    }
+    "pricing": {},
+    "metadata": {}
   },
   {
     "id": "learnlm-2.0-flash-experimental",
@@ -4231,133 +4194,60 @@
     "modalities": {
       "input": [
         "text",
-        "image",
-        "pdf"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "function_calling",
-      "structured_output",
-      "batch",
-      "caching"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.075,
-          "output_per_million": 0.3
-        },
-        "batch": {
-          "input_per_million": 0.0375,
-          "output_per_million": 0.15
-        }
-      }
-    },
-    "metadata": {
-      "version": "2.0",
-      "description": "LearnLM 2.0 Flash Experimental",
-      "supported_generation_methods": [
-        "generateContent",
-        "countTokens"
-      ]
-    }
-  },
-  {
-    "id": "models/aqa",
-    "name": "AQA",
-    "provider": "gemini",
-    "family": "aqa",
-    "created_at": null,
-    "context_window": 7168,
-    "max_output_tokens": 1024,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [],
-    "pricing": {},
-    "metadata": {}
-  },
-  {
-    "id": "models/embedding-001",
-    "name": "Embedding",
-    "provider": "gemini",
-    "family": "embedding",
-    "created_at": null,
-    "context_window": 2048,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "embeddings"
-      ]
-    },
-    "capabilities": [],
-    "pricing": {},
-    "metadata": {}
-  },
-  {
-    "id": "models/text-embedding-004",
-    "name": "Text Embedding",
-    "provider": "gemini",
-    "family": "text-embedding",
-    "created_at": null,
-    "context_window": 2048,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
+        "image",
+        "pdf"
       ],
       "output": [
-        "embeddings"
+        "text"
       ]
     },
-    "capabilities": [],
-    "pricing": {},
-    "metadata": {}
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "batch",
+      "caching"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.075,
+          "output_per_million": 0.3
+        },
+        "batch": {
+          "input_per_million": 0.0375,
+          "output_per_million": 0.15
+        }
+      }
+    },
+    "metadata": {
+      "version": "2.0",
+      "description": "LearnLM 2.0 Flash Experimental",
+      "supported_generation_methods": [
+        "generateContent",
+        "countTokens"
+      ]
+    }
   },
   {
     "id": "text-embedding-004",
-    "name": "Text Embedding 004",
+    "name": "Text Embedding",
     "provider": "gemini",
-    "family": "embedding4",
+    "family": "text-embedding-004",
     "created_at": null,
     "context_window": 2048,
-    "max_output_tokens": 1,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
         "text"
       ],
       "output": [
-        "text",
         "embeddings"
       ]
     },
-    "capabilities": [
-      "streaming",
-      "batch"
-    ],
-    "pricing": {
-      "embeddings": {
-        "standard": {
-          "input_per_million": 0.002
-        }
-      }
-    },
+    "capabilities": [],
+    "pricing": {},
     "metadata": {
       "version": "004",
       "description": "Obtain a distributed representation of a text.",
@@ -4370,7 +4260,7 @@
     "id": "veo-2.0-generate-001",
     "name": "Veo 2",
     "provider": "gemini",
-    "family": "veo-2",
+    "family": "veo-2.0-generate-001",
     "created_at": null,
     "context_window": null,
     "max_output_tokens": null,
@@ -4383,13 +4273,7 @@
       "output": []
     },
     "capabilities": [],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "output_per_million": 0.35
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {}
   },
   {
@@ -4473,7 +4357,10 @@
         "text"
       ]
     },
-    "capabilities": [],
+    "capabilities": [
+      "function_calling",
+      "structured_output"
+    ],
     "pricing": {
       "text_tokens": {
         "standard": {
@@ -4507,7 +4394,8 @@
       ]
     },
     "capabilities": [
-      "batch"
+      "batch",
+      "function_calling"
     ],
     "pricing": {
       "text_tokens": {
@@ -4542,7 +4430,8 @@
       ]
     },
     "capabilities": [
-      "batch"
+      "batch",
+      "function_calling"
     ],
     "pricing": {
       "text_tokens": {
@@ -4572,17 +4461,12 @@
         "text"
       ],
       "output": [
-        "image"
+        "image",
+        "text"
       ]
     },
     "capabilities": [],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "output_per_million": 0.02
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
       "object": "model",
       "owned_by": "system"
@@ -5647,25 +5531,64 @@
   },
   {
     "id": "gpt-4o-audio-preview-2024-12-17",
-    "name": "GPT-4o Audio",
+    "name": "GPT-4o-Audio Preview 20241217",
     "provider": "openai",
-    "family": "gpt-4o-audio-preview",
-    "created_at": null,
+    "family": "gpt4o_audio",
+    "created_at": "2024-12-12 21:10:39 +0100",
     "context_window": 128000,
     "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "audio",
-        "text"
+        "text",
+        "audio"
       ],
       "output": [
-        "audio",
-        "text"
+        "text",
+        "audio"
       ]
     },
     "capabilities": [
-      "function_calling"
+      "streaming",
+      "speech_generation",
+      "transcription"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 2.5,
+          "output_per_million": 10.0
+        }
+      }
+    },
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
+  {
+    "id": "gpt-4o-audio-preview-2025-06-03",
+    "name": "GPT-4o-Audio Preview 20250603",
+    "provider": "openai",
+    "family": "gpt4o_audio",
+    "created_at": "2025-06-03 01:54:58 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 16384,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "audio"
+      ],
+      "output": [
+        "text",
+        "audio"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "speech_generation",
+      "transcription"
     ],
     "pricing": {
       "text_tokens": {
@@ -5921,7 +5844,9 @@
         "text"
       ]
     },
-    "capabilities": [],
+    "capabilities": [
+      "structured_output"
+    ],
     "pricing": {
       "text_tokens": {
         "standard": {
@@ -5952,7 +5877,9 @@
         "text"
       ]
     },
-    "capabilities": [],
+    "capabilities": [
+      "structured_output"
+    ],
     "pricing": {
       "text_tokens": {
         "standard": {
@@ -6131,6 +6058,39 @@
       "owned_by": "system"
     }
   },
+  {
+    "id": "gpt-4o-realtime-preview-2025-06-03",
+    "name": "GPT-4o-Realtime Preview 20250603",
+    "provider": "openai",
+    "family": "gpt4o_realtime",
+    "created_at": "2025-06-03 01:43:58 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 4096,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 5.0,
+          "output_per_million": 20.0
+        }
+      }
+    },
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
   {
     "id": "gpt-4o-search-preview",
     "name": "GPT-4o Search Preview",
@@ -6148,7 +6108,9 @@
         "text"
       ]
     },
-    "capabilities": [],
+    "capabilities": [
+      "structured_output"
+    ],
     "pricing": {
       "text_tokens": {
         "standard": {
@@ -6179,7 +6141,9 @@
         "text"
       ]
     },
-    "capabilities": [],
+    "capabilities": [
+      "structured_output"
+    ],
     "pricing": {
       "text_tokens": {
         "standard": {
@@ -6561,17 +6525,98 @@
     }
   },
   {
-    "id": "o3",
-    "name": "o3",
+    "id": "o3",
+    "name": "o3",
+    "provider": "openai",
+    "family": "o3",
+    "created_at": null,
+    "context_window": 200000,
+    "max_output_tokens": 100000,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 2.0,
+          "cached_input_per_million": 0.5,
+          "output_per_million": 8.0
+        },
+        "batch": {
+          "input_per_million": 1.0,
+          "output_per_million": 4.0
+        }
+      }
+    },
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
+  {
+    "id": "o3-2025-04-16",
+    "name": "o3",
+    "provider": "openai",
+    "family": "o3",
+    "created_at": null,
+    "context_window": 200000,
+    "max_output_tokens": 100000,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 2.0,
+          "cached_input_per_million": 0.5,
+          "output_per_million": 8.0
+        },
+        "batch": {
+          "input_per_million": 1.0,
+          "output_per_million": 4.0
+        }
+      }
+    },
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
+  {
+    "id": "o3-mini",
+    "name": "o3-mini",
     "provider": "openai",
-    "family": "o3",
+    "family": "o3-mini",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 100000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "image",
         "text"
       ],
       "output": [
@@ -6586,30 +6631,32 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 10.0,
-          "cached_input_per_million": 2.5,
-          "output_per_million": 40.0
+          "input_per_million": 1.1,
+          "cached_input_per_million": 0.55,
+          "output_per_million": 4.4
         },
         "batch": {
-          "input_per_million": 5.0,
-          "output_per_million": 20.0
+          "input_per_million": 0.55,
+          "output_per_million": 2.2
         }
       }
     },
-    "metadata": {}
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
   },
   {
-    "id": "o3-2025-04-16",
-    "name": "o3",
+    "id": "o3-mini-2025-01-31",
+    "name": "o3-mini",
     "provider": "openai",
-    "family": "o3",
+    "family": "o3-mini",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 100000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "image",
         "text"
       ],
       "output": [
@@ -6624,29 +6671,33 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 10.0,
-          "cached_input_per_million": 2.5,
-          "output_per_million": 40.0
+          "input_per_million": 1.1,
+          "cached_input_per_million": 0.55,
+          "output_per_million": 4.4
         },
         "batch": {
-          "input_per_million": 5.0,
-          "output_per_million": 20.0
+          "input_per_million": 0.55,
+          "output_per_million": 2.2
         }
       }
     },
-    "metadata": {}
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
   },
   {
-    "id": "o3-mini",
-    "name": "o3-mini",
+    "id": "o3-pro",
+    "name": "o3-pro",
     "provider": "openai",
-    "family": "o3-mini",
+    "family": "o3-pro",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 100000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
@@ -6661,13 +6712,12 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 1.1,
-          "cached_input_per_million": 0.55,
-          "output_per_million": 4.4
+          "input_per_million": 20.0,
+          "output_per_million": 80.0
         },
         "batch": {
-          "input_per_million": 0.55,
-          "output_per_million": 2.2
+          "input_per_million": 10.0,
+          "output_per_million": 40.0
         }
       }
     },
@@ -6677,16 +6727,17 @@
     }
   },
   {
-    "id": "o3-mini-2025-01-31",
-    "name": "o3-mini",
+    "id": "o3-pro-2025-06-10",
+    "name": "o3-pro",
     "provider": "openai",
-    "family": "o3-mini",
+    "family": "o3-pro",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 100000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
@@ -6701,13 +6752,12 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 1.1,
-          "cached_input_per_million": 0.55,
-          "output_per_million": 4.4
+          "input_per_million": 20.0,
+          "output_per_million": 80.0
         },
         "batch": {
-          "input_per_million": 0.55,
-          "output_per_million": 2.2
+          "input_per_million": 10.0,
+          "output_per_million": 40.0
         }
       }
     },
@@ -7147,6 +7197,7 @@
         "audio"
       ],
       "output": [
+        "audio",
         "text"
       ]
     },
@@ -11022,23 +11073,23 @@
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "tools",
+        "tool_choice",
         "max_tokens",
         "temperature",
         "top_p",
+        "structured_outputs",
+        "response_format",
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "repetition_penalty",
-        "response_format",
         "top_k",
-        "seed",
-        "min_p",
+        "repetition_penalty",
         "logit_bias",
         "logprobs",
         "top_logprobs",
-        "tools",
-        "tool_choice",
-        "structured_outputs"
+        "seed",
+        "min_p"
       ]
     }
   },
@@ -11048,8 +11099,8 @@
     "provider": "openrouter",
     "family": "deepseek",
     "created_at": "2025-03-24 14:59:15 +0100",
-    "context_window": 32768,
-    "max_output_tokens": 163840,
+    "context_window": 163840,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -11068,8 +11119,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.25,
-          "output_per_million": 1.3
+          "input_per_million": 0.3,
+          "output_per_million": 0.88
         }
       }
     },
@@ -11087,24 +11138,24 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 32768,
-        "max_completion_tokens": 163840,
+        "context_length": 163840,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "tools",
+        "tool_choice",
         "max_tokens",
         "temperature",
         "top_p",
-        "presence_penalty",
+        "structured_outputs",
+        "response_format",
+        "stop",
         "frequency_penalty",
-        "repetition_penalty",
+        "presence_penalty",
         "top_k",
-        "stop",
-        "tools",
-        "tool_choice",
-        "response_format",
-        "structured_outputs",
+        "repetition_penalty",
         "logit_bias",
         "logprobs",
         "top_logprobs",
@@ -11449,6 +11500,7 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
       "structured_output",
       "predicted_outputs"
     ],
@@ -11493,6 +11545,8 @@
         "logit_bias",
         "min_p",
         "response_format",
+        "tools",
+        "tool_choice",
         "logprobs",
         "top_logprobs",
         "seed",
@@ -11506,8 +11560,8 @@
     "provider": "openrouter",
     "family": "deepseek",
     "created_at": "2025-05-29 19:09:03 +0200",
-    "context_window": 128000,
-    "max_output_tokens": 32000,
+    "context_window": 131072,
+    "max_output_tokens": 131072,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -11524,8 +11578,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.06,
-          "output_per_million": 0.09
+          "input_per_million": 0.049999999999999996,
+          "output_per_million": 0.09999999999999999
         }
       }
     },
@@ -11543,8 +11597,8 @@
         "instruct_type": "deepseek-r1"
       },
       "top_provider": {
-        "context_length": 128000,
-        "max_completion_tokens": 32000,
+        "context_length": 131072,
+        "max_completion_tokens": 131072,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -11554,13 +11608,13 @@
         "top_p",
         "reasoning",
         "include_reasoning",
-        "stop",
-        "frequency_penalty",
         "presence_penalty",
-        "seed",
+        "frequency_penalty",
+        "repetition_penalty",
         "top_k",
+        "stop",
+        "seed",
         "min_p",
-        "repetition_penalty",
         "logit_bias"
       ]
     }
@@ -12257,7 +12311,7 @@
     "provider": "openrouter",
     "family": "deepseek",
     "created_at": "2025-03-06 22:43:54 +0100",
-    "context_window": 128000,
+    "context_window": 163840,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -12287,7 +12341,7 @@
         "instruct_type": "deepseek-r1"
       },
       "top_provider": {
-        "context_length": 128000,
+        "context_length": 163840,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -13245,7 +13299,78 @@
   },
   {
     "id": "google/gemini-2.5-pro-preview",
-    "name": "Google: Gemini 2.5 Pro Preview",
+    "name": "Google: Gemini 2.5 Pro Preview 06-05",
+    "provider": "openrouter",
+    "family": "google",
+    "created_at": "2025-06-05 17:27:37 +0200",
+    "context_window": 1048576,
+    "max_output_tokens": 65536,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "file",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.31
+        }
+      }
+    },
+    "metadata": {
+      "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.\n",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "file",
+          "image",
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Gemini",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 1048576,
+        "max_completion_tokens": 65536,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "tools",
+        "tool_choice",
+        "max_tokens",
+        "temperature",
+        "top_p",
+        "reasoning",
+        "include_reasoning",
+        "structured_outputs",
+        "response_format",
+        "stop",
+        "frequency_penalty",
+        "presence_penalty",
+        "seed"
+      ]
+    }
+  },
+  {
+    "id": "google/gemini-2.5-pro-preview-05-06",
+    "name": "Google: Gemini 2.5 Pro Preview 05-06",
     "provider": "openrouter",
     "family": "google",
     "created_at": "2025-05-07 02:41:53 +0200",
@@ -13644,65 +13769,7 @@
     "family": "google",
     "created_at": "2024-06-28 02:00:00 +0200",
     "context_window": 8192,
-    "max_output_tokens": 8192,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {},
-    "metadata": {
-      "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Gemini",
-        "instruct_type": "gemma"
-      },
-      "top_provider": {
-        "context_length": 8192,
-        "max_completion_tokens": 8192,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "max_tokens",
-        "temperature",
-        "top_p",
-        "stop",
-        "frequency_penalty",
-        "presence_penalty",
-        "seed",
-        "top_k",
-        "min_p",
-        "repetition_penalty",
-        "logprobs",
-        "logit_bias",
-        "top_logprobs"
-      ]
-    }
-  },
-  {
-    "id": "google/gemma-2b-it",
-    "name": "Google: Gemma 2 2B",
-    "provider": "openrouter",
-    "family": "google",
-    "created_at": "2025-05-28 21:33:35 +0200",
-    "context_window": 8192,
-    "max_output_tokens": null,
+    "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -13714,19 +13781,11 @@
     },
     "capabilities": [
       "streaming",
-      "structured_output",
       "predicted_outputs"
     ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.09999999999999999,
-          "output_per_million": 0.09999999999999999
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
-      "description": "Gemma 2 2B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).\n\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
+      "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -13740,7 +13799,7 @@
       },
       "top_provider": {
         "context_length": 8192,
-        "max_completion_tokens": null,
+        "max_completion_tokens": 8192,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -13751,11 +13810,13 @@
         "stop",
         "frequency_penalty",
         "presence_penalty",
+        "seed",
         "top_k",
+        "min_p",
         "repetition_penalty",
+        "logprobs",
         "logit_bias",
-        "min_p",
-        "response_format"
+        "top_logprobs"
       ]
     }
   },
@@ -14313,13 +14374,13 @@
         "max_tokens",
         "temperature",
         "top_p",
-        "stop",
-        "frequency_penalty",
         "presence_penalty",
-        "seed",
+        "frequency_penalty",
+        "repetition_penalty",
         "top_k",
+        "stop",
+        "seed",
         "min_p",
-        "repetition_penalty",
         "logit_bias",
         "response_format",
         "top_a"
@@ -15266,15 +15327,15 @@
         "max_tokens",
         "temperature",
         "top_p",
+        "response_format",
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "top_k",
         "repetition_penalty",
-        "logit_bias",
-        "min_p",
-        "response_format",
+        "top_k",
         "seed",
+        "min_p",
+        "logit_bias",
         "logprobs",
         "top_logprobs",
         "structured_outputs"
@@ -15287,8 +15348,8 @@
     "provider": "openrouter",
     "family": "meta-llama",
     "created_at": "2024-07-23 02:00:00 +0200",
-    "context_window": 16384,
-    "max_output_tokens": 16384,
+    "context_window": 131000,
+    "max_output_tokens": 131000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -15307,7 +15368,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.02,
+          "input_per_million": 0.019000000000000003,
           "output_per_million": 0.03
         }
       }
@@ -15326,8 +15387,8 @@
         "instruct_type": "llama3"
       },
       "top_provider": {
-        "context_length": 16384,
-        "max_completion_tokens": 16384,
+        "context_length": 131000,
+        "max_completion_tokens": 131000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -15894,14 +15955,14 @@
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "seed",
+        "repetition_penalty",
+        "response_format",
         "top_k",
+        "seed",
         "min_p",
-        "repetition_penalty",
         "logit_bias",
         "logprobs",
         "top_logprobs",
-        "response_format",
         "structured_outputs"
       ]
     }
@@ -15950,17 +16011,16 @@
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "tools",
+        "tool_choice",
         "max_tokens",
         "temperature",
         "top_p",
+        "repetition_penalty",
+        "top_k",
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "seed",
-        "tools",
-        "tool_choice",
-        "repetition_penalty",
-        "top_k",
         "logit_bias",
         "min_p",
         "response_format"
@@ -16078,12 +16138,12 @@
         "max_tokens",
         "temperature",
         "top_p",
-        "presence_penalty",
+        "stop",
         "frequency_penalty",
+        "presence_penalty",
         "repetition_penalty",
-        "top_k",
-        "stop",
         "response_format",
+        "top_k",
         "seed",
         "min_p",
         "logit_bias",
@@ -16422,11 +16482,12 @@
         "presence_penalty",
         "top_k",
         "repetition_penalty",
-        "logit_bias",
-        "min_p",
         "response_format",
-        "top_logprobs",
+        "structured_outputs",
+        "logit_bias",
         "logprobs",
+        "top_logprobs",
+        "min_p",
         "seed"
       ]
     }
@@ -17170,13 +17231,205 @@
     }
   },
   {
-    "id": "mistralai/codestral-2501",
-    "name": "Mistral: Codestral 2501",
+    "id": "mistralai/codestral-2501",
+    "name": "Mistral: Codestral 2501",
+    "provider": "openrouter",
+    "family": "mistralai",
+    "created_at": "2025-01-14 23:58:42 +0100",
+    "context_window": 262144,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.3,
+          "output_per_million": 0.8999999999999999
+        }
+      }
+    },
+    "metadata": {
+      "description": "[Mistral](/mistralai)'s cutting-edge language model for coding. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation. \n\nLearn more on their blog post: https://mistral.ai/news/codestral-2501/",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Mistral",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 262144,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "tools",
+        "tool_choice",
+        "max_tokens",
+        "temperature",
+        "top_p",
+        "stop",
+        "frequency_penalty",
+        "presence_penalty",
+        "response_format",
+        "structured_outputs",
+        "seed"
+      ]
+    }
+  },
+  {
+    "id": "mistralai/devstral-small",
+    "name": "Mistral: Devstral Small",
+    "provider": "openrouter",
+    "family": "mistralai",
+    "created_at": "2025-05-21 16:22:59 +0200",
+    "context_window": 128000,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.06,
+          "output_per_million": 0.12
+        }
+      }
+    },
+    "metadata": {
+      "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "temperature",
+        "top_p",
+        "stop",
+        "frequency_penalty",
+        "presence_penalty",
+        "repetition_penalty",
+        "response_format",
+        "top_k",
+        "seed",
+        "min_p",
+        "tools",
+        "tool_choice",
+        "structured_outputs"
+      ]
+    }
+  },
+  {
+    "id": "mistralai/devstral-small:free",
+    "name": "Mistral: Devstral Small (free)",
+    "provider": "openrouter",
+    "family": "mistralai",
+    "created_at": "2025-05-21 16:22:59 +0200",
+    "context_window": 131072,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "predicted_outputs"
+    ],
+    "pricing": {},
+    "metadata": {
+      "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 131072,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "tools",
+        "tool_choice",
+        "max_tokens",
+        "temperature",
+        "top_p",
+        "stop",
+        "frequency_penalty",
+        "presence_penalty",
+        "seed",
+        "top_k",
+        "min_p",
+        "repetition_penalty",
+        "logprobs",
+        "logit_bias",
+        "top_logprobs"
+      ]
+    }
+  },
+  {
+    "id": "mistralai/magistral-medium-2506",
+    "name": "Mistral: Magistral Medium 2506",
     "provider": "openrouter",
     "family": "mistralai",
-    "created_at": "2025-01-14 23:58:42 +0100",
-    "context_window": 262144,
-    "max_output_tokens": null,
+    "created_at": "2025-06-08 05:40:54 +0200",
+    "context_window": 40960,
+    "max_output_tokens": 40000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -17194,13 +17447,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.3,
-          "output_per_million": 0.8999999999999999
+          "input_per_million": 2.0,
+          "output_per_million": 5.0
         }
       }
     },
     "metadata": {
-      "description": "[Mistral](/mistralai)'s cutting-edge language model for coding. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation. \n\nLearn more on their blog post: https://mistral.ai/news/codestral-2501/",
+      "description": "Magistral is Mistral's first reasoning model. It is ideal for general purpose use requiring longer thought processing and better accuracy than with non-reasoning LLMs. From legal research and financial forecasting to software development and creative storytelling — this model solves multi-step challenges where transparency and precision are critical.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -17213,8 +17466,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 262144,
-        "max_completion_tokens": null,
+        "context_length": 40960,
+        "max_completion_tokens": 40000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -17224,23 +17477,25 @@
         "max_tokens",
         "temperature",
         "top_p",
+        "reasoning",
+        "include_reasoning",
+        "structured_outputs",
+        "response_format",
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "response_format",
-        "structured_outputs",
         "seed"
       ]
     }
   },
   {
-    "id": "mistralai/devstral-small",
-    "name": "Mistral: Devstral Small",
+    "id": "mistralai/magistral-medium-2506:thinking",
+    "name": "Mistral: Magistral Medium 2506 (thinking)",
     "provider": "openrouter",
     "family": "mistralai",
-    "created_at": "2025-05-21 16:22:59 +0200",
-    "context_window": 128000,
-    "max_output_tokens": null,
+    "created_at": "2025-06-08 05:40:54 +0200",
+    "context_window": 40960,
+    "max_output_tokens": 40000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -17258,13 +17513,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.06,
-          "output_per_million": 0.12
+          "input_per_million": 2.0,
+          "output_per_million": 5.0
         }
       }
     },
     "metadata": {
-      "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.",
+      "description": "Magistral is Mistral's first reasoning model. It is ideal for general purpose use requiring longer thought processing and better accuracy than with non-reasoning LLMs. From legal research and financial forecasting to software development and creative storytelling — this model solves multi-step challenges where transparency and precision are critical.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -17273,41 +17528,40 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Other",
+        "tokenizer": "Mistral",
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 128000,
-        "max_completion_tokens": null,
+        "context_length": 40960,
+        "max_completion_tokens": 40000,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "tools",
+        "tool_choice",
         "max_tokens",
         "temperature",
         "top_p",
+        "reasoning",
+        "include_reasoning",
+        "structured_outputs",
+        "response_format",
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "repetition_penalty",
-        "response_format",
-        "top_k",
-        "seed",
-        "min_p",
-        "tools",
-        "tool_choice",
-        "structured_outputs"
+        "seed"
       ]
     }
   },
   {
-    "id": "mistralai/devstral-small:free",
-    "name": "Mistral: Devstral Small (free)",
+    "id": "mistralai/magistral-small-2506",
+    "name": "Mistral: Magistral Small 2506",
     "provider": "openrouter",
     "family": "mistralai",
-    "created_at": "2025-05-21 16:22:59 +0200",
-    "context_window": 131072,
-    "max_output_tokens": null,
+    "created_at": "2025-06-10 17:32:41 +0200",
+    "context_window": 40000,
+    "max_output_tokens": 40000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -17320,11 +17574,18 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "predicted_outputs"
+      "structured_output"
     ],
-    "pricing": {},
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.5,
+          "output_per_million": 1.5
+        }
+      }
+    },
     "metadata": {
-      "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.",
+      "description": "Magistral Small is a 24B parameter instruction-tuned model based on Mistral-Small-3.1 (2503), enhanced through supervised fine-tuning on traces from Magistral Medium and further refined via reinforcement learning. It is optimized for reasoning and supports a wide multilingual range, including over 20 languages.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -17333,12 +17594,12 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Other",
+        "tokenizer": "Mistral",
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
-        "max_completion_tokens": null,
+        "context_length": 40000,
+        "max_completion_tokens": 40000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -17348,16 +17609,14 @@
         "max_tokens",
         "temperature",
         "top_p",
+        "reasoning",
+        "include_reasoning",
+        "structured_outputs",
+        "response_format",
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "seed",
-        "top_k",
-        "min_p",
-        "repetition_penalty",
-        "logprobs",
-        "logit_bias",
-        "top_logprobs"
+        "seed"
       ]
     }
   },
@@ -18080,7 +18339,7 @@
     "provider": "openrouter",
     "family": "mistralai",
     "created_at": "2025-05-07 16:15:41 +0200",
-    "context_window": 131072,
+    "context_window": 32768,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -18120,7 +18379,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 32768,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -18166,8 +18425,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.02,
-          "output_per_million": 0.07
+          "input_per_million": 0.01,
+          "output_per_million": 0.028
         }
       }
     },
@@ -18407,7 +18666,7 @@
     "family": "mistralai",
     "created_at": "2025-01-30 17:43:29 +0100",
     "context_window": 32768,
-    "max_output_tokens": 16384,
+    "max_output_tokens": 32768,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -18426,8 +18685,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.06,
-          "output_per_million": 0.12
+          "input_per_million": 0.049999999999999996,
+          "output_per_million": 0.09999999999999999
         }
       }
     },
@@ -18446,7 +18705,7 @@
       },
       "top_provider": {
         "context_length": 32768,
-        "max_completion_tokens": 16384,
+        "max_completion_tokens": 32768,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -18457,16 +18716,17 @@
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "repetition_penalty",
-        "response_format",
         "top_k",
-        "seed",
+        "repetition_penalty",
+        "logit_bias",
+        "logprobs",
+        "top_logprobs",
         "min_p",
+        "seed",
+        "response_format",
         "tools",
         "tool_choice",
-        "structured_outputs",
-        "logit_bias",
-        "logprobs"
+        "structured_outputs"
       ]
     }
   },
@@ -20722,142 +20982,13 @@
       ]
     }
   },
-  {
-    "id": "openai/gpt-3.5-turbo-instruct",
-    "name": "OpenAI: GPT-3.5 Turbo Instruct",
-    "provider": "openrouter",
-    "family": "openai",
-    "created_at": "2023-09-28 02:00:00 +0200",
-    "context_window": 4095,
-    "max_output_tokens": 4096,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "structured_output"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 1.5,
-          "output_per_million": 2.0
-        }
-      }
-    },
-    "metadata": {
-      "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "GPT",
-        "instruct_type": "chatml"
-      },
-      "top_provider": {
-        "context_length": 4095,
-        "max_completion_tokens": 4096,
-        "is_moderated": true
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "max_tokens",
-        "temperature",
-        "top_p",
-        "stop",
-        "frequency_penalty",
-        "presence_penalty",
-        "seed",
-        "logit_bias",
-        "logprobs",
-        "top_logprobs",
-        "response_format"
-      ]
-    }
-  },
-  {
-    "id": "openai/gpt-4",
-    "name": "OpenAI: GPT-4",
-    "provider": "openrouter",
-    "family": "openai",
-    "created_at": "2023-05-28 02:00:00 +0200",
-    "context_window": 8191,
-    "max_output_tokens": 4096,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "function_calling",
-      "structured_output"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 30.0,
-          "output_per_million": 60.0
-        }
-      }
-    },
-    "metadata": {
-      "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "GPT",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 8191,
-        "max_completion_tokens": 4096,
-        "is_moderated": true
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "tools",
-        "tool_choice",
-        "max_tokens",
-        "temperature",
-        "top_p",
-        "stop",
-        "frequency_penalty",
-        "presence_penalty",
-        "seed",
-        "logit_bias",
-        "logprobs",
-        "top_logprobs",
-        "response_format"
-      ]
-    }
-  },
-  {
-    "id": "openai/gpt-4-0314",
-    "name": "OpenAI: GPT-4 (older v0314)",
+  {
+    "id": "openai/gpt-3.5-turbo-instruct",
+    "name": "OpenAI: GPT-3.5 Turbo Instruct",
     "provider": "openrouter",
     "family": "openai",
-    "created_at": "2023-05-28 02:00:00 +0200",
-    "context_window": 8191,
+    "created_at": "2023-09-28 02:00:00 +0200",
+    "context_window": 4095,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
     "modalities": {
@@ -20870,19 +21001,18 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling",
       "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 30.0,
-          "output_per_million": 60.0
+          "input_per_million": 1.5,
+          "output_per_million": 2.0
         }
       }
     },
     "metadata": {
-      "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.",
+      "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -20892,17 +21022,15 @@
           "text"
         ],
         "tokenizer": "GPT",
-        "instruct_type": null
+        "instruct_type": "chatml"
       },
       "top_provider": {
-        "context_length": 8191,
+        "context_length": 4095,
         "max_completion_tokens": 4096,
         "is_moderated": true
       },
       "per_request_limits": null,
       "supported_parameters": [
-        "tools",
-        "tool_choice",
         "max_tokens",
         "temperature",
         "top_p",
@@ -20913,18 +21041,17 @@
         "logit_bias",
         "logprobs",
         "top_logprobs",
-        "response_format",
-        "structured_outputs"
+        "response_format"
       ]
     }
   },
   {
-    "id": "openai/gpt-4-1106-preview",
-    "name": "OpenAI: GPT-4 Turbo (older v1106)",
+    "id": "openai/gpt-4",
+    "name": "OpenAI: GPT-4",
     "provider": "openrouter",
     "family": "openai",
-    "created_at": "2023-11-06 01:00:00 +0100",
-    "context_window": 128000,
+    "created_at": "2023-05-28 02:00:00 +0200",
+    "context_window": 8191,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
     "modalities": {
@@ -20943,13 +21070,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 10.0,
-          "output_per_million": 30.0
+          "input_per_million": 30.0,
+          "output_per_million": 60.0
         }
       }
     },
     "metadata": {
-      "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.",
+      "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -20962,7 +21089,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 128000,
+        "context_length": 8191,
         "max_completion_tokens": 4096,
         "is_moderated": true
       },
@@ -20980,18 +21107,17 @@
         "logit_bias",
         "logprobs",
         "top_logprobs",
-        "response_format",
-        "structured_outputs"
+        "response_format"
       ]
     }
   },
   {
-    "id": "openai/gpt-4-32k",
-    "name": "OpenAI: GPT-4 32k",
+    "id": "openai/gpt-4-0314",
+    "name": "OpenAI: GPT-4 (older v0314)",
     "provider": "openrouter",
     "family": "openai",
-    "created_at": "2023-08-28 02:00:00 +0200",
-    "context_window": 32767,
+    "created_at": "2023-05-28 02:00:00 +0200",
+    "context_window": 8191,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
     "modalities": {
@@ -21010,13 +21136,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 60.0,
-          "output_per_million": 120.0
+          "input_per_million": 30.0,
+          "output_per_million": 60.0
         }
       }
     },
     "metadata": {
-      "description": "GPT-4-32k is an extended version of GPT-4, with the same capabilities but quadrupled context length, allowing for processing up to 40 pages of text in a single pass. This is particularly beneficial for handling longer content like interacting with PDFs without an external vector database. Training data: up to Sep 2021.",
+      "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -21029,7 +21155,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 32767,
+        "context_length": 8191,
         "max_completion_tokens": 4096,
         "is_moderated": true
       },
@@ -21047,17 +21173,18 @@
         "logit_bias",
         "logprobs",
         "top_logprobs",
-        "response_format"
+        "response_format",
+        "structured_outputs"
       ]
     }
   },
   {
-    "id": "openai/gpt-4-32k-0314",
-    "name": "OpenAI: GPT-4 32k (older v0314)",
+    "id": "openai/gpt-4-1106-preview",
+    "name": "OpenAI: GPT-4 Turbo (older v1106)",
     "provider": "openrouter",
     "family": "openai",
-    "created_at": "2023-08-28 02:00:00 +0200",
-    "context_window": 32767,
+    "created_at": "2023-11-06 01:00:00 +0100",
+    "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
     "modalities": {
@@ -21076,13 +21203,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 60.0,
-          "output_per_million": 120.0
+          "input_per_million": 10.0,
+          "output_per_million": 30.0
         }
       }
     },
     "metadata": {
-      "description": "GPT-4-32k is an extended version of GPT-4, with the same capabilities but quadrupled context length, allowing for processing up to 40 pages of text in a single pass. This is particularly beneficial for handling longer content like interacting with PDFs without an external vector database. Training data: up to Sep 2021.",
+      "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -21095,7 +21222,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 32767,
+        "context_length": 128000,
         "max_completion_tokens": 4096,
         "is_moderated": true
       },
@@ -21714,7 +21841,8 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 2.5,
-          "output_per_million": 10.0
+          "output_per_million": 10.0,
+          "cached_input_per_million": 1.25
         }
       }
     },
@@ -22524,9 +22652,9 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 10.0,
-          "output_per_million": 40.0,
-          "cached_input_per_million": 2.5
+          "input_per_million": 2.0,
+          "output_per_million": 8.0,
+          "cached_input_per_million": 0.5
         }
       }
     },
@@ -22681,6 +22809,69 @@
       ]
     }
   },
+  {
+    "id": "openai/o3-pro",
+    "name": "OpenAI: o3 Pro",
+    "provider": "openrouter",
+    "family": "openai",
+    "created_at": "2025-06-11 01:32:32 +0200",
+    "context_window": 200000,
+    "max_output_tokens": 100000,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "file",
+        "image"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 20.0,
+          "output_per_million": 80.0
+        }
+      }
+    },
+    "metadata": {
+      "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.\n\nNote that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "text",
+          "file",
+          "image"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 200000,
+        "max_completion_tokens": 100000,
+        "is_moderated": true
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "tools",
+        "tool_choice",
+        "seed",
+        "max_tokens",
+        "response_format",
+        "structured_outputs"
+      ]
+    }
+  },
   {
     "id": "openai/o4-mini",
     "name": "OpenAI: o4 Mini",
@@ -24702,7 +24893,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.07,
+          "input_per_million": 0.06,
           "output_per_million": 0.24
         }
       }
@@ -24727,19 +24918,19 @@
       },
       "per_request_limits": null,
       "supported_parameters": [
-        "tools",
-        "tool_choice",
         "max_tokens",
         "temperature",
         "top_p",
         "reasoning",
         "include_reasoning",
-        "stop",
-        "frequency_penalty",
         "presence_penalty",
+        "frequency_penalty",
         "repetition_penalty",
-        "response_format",
         "top_k",
+        "tools",
+        "tool_choice",
+        "stop",
+        "response_format",
         "seed",
         "min_p",
         "logit_bias",
@@ -24834,7 +25025,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.14,
+          "input_per_million": 0.13,
           "output_per_million": 0.6
         }
       }
@@ -24864,20 +25055,20 @@
         "top_p",
         "reasoning",
         "include_reasoning",
-        "stop",
-        "frequency_penalty",
+        "seed",
         "presence_penalty",
-        "top_k",
+        "frequency_penalty",
         "repetition_penalty",
-        "logit_bias",
-        "min_p",
-        "response_format",
-        "seed",
+        "top_k",
         "tools",
         "tool_choice",
+        "stop",
+        "response_format",
         "structured_outputs",
+        "logit_bias",
         "logprobs",
-        "top_logprobs"
+        "top_logprobs",
+        "min_p"
       ]
     }
   },
@@ -25130,20 +25321,20 @@
         "top_p",
         "reasoning",
         "include_reasoning",
-        "seed",
-        "tools",
-        "tool_choice",
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "repetition_penalty",
         "response_format",
+        "top_logprobs",
+        "logprobs",
+        "logit_bias",
+        "seed",
+        "tools",
+        "tool_choice",
+        "repetition_penalty",
         "top_k",
         "min_p",
-        "structured_outputs",
-        "logprobs",
-        "top_logprobs",
-        "logit_bias"
+        "structured_outputs"
       ]
     }
   },
@@ -25969,64 +26160,6 @@
       ]
     }
   },
-  {
-    "id": "sarvamai/sarvam-m",
-    "name": "Sarvam AI: Sarvam-M",
-    "provider": "openrouter",
-    "family": "sarvamai",
-    "created_at": "2025-05-25 17:53:33 +0200",
-    "context_window": 32768,
-    "max_output_tokens": 32768,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.25,
-          "output_per_million": 0.75
-        }
-      }
-    },
-    "metadata": {
-      "description": "Sarvam-M is a 24 B-parameter, instruction-tuned derivative of Mistral-Small-3.1-24B-Base-2503, post-trained on English plus eleven major Indic languages (bn, hi, kn, gu, mr, ml, or, pa, ta, te). The model introduces a dual-mode interface: “non-think” for low-latency chat and a optional “think” phase that exposes chain-of-thought tokens for more demanding reasoning, math, and coding tasks. \n\nBenchmark reports show solid gains versus similarly sized open models on Indic-language QA, GSM-8K math, and SWE-Bench coding, making Sarvam-M a practical general-purpose choice for multilingual conversational agents as well as analytical workloads that mix English, native Indic scripts, or romanized text.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Other",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 32768,
-        "max_completion_tokens": 32768,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "max_tokens",
-        "temperature",
-        "top_p",
-        "presence_penalty",
-        "frequency_penalty",
-        "repetition_penalty",
-        "top_k"
-      ]
-    }
-  },
   {
     "id": "sarvamai/sarvam-m:free",
     "name": "Sarvam AI: Sarvam-M (free)",
@@ -26150,12 +26283,12 @@
     }
   },
   {
-    "id": "scb10x/llama3.1-typhoon2-8b-instruct",
-    "name": "Typhoon2 8B Instruct",
+    "id": "sentientagi/dobby-mini-unhinged-plus-llama-3.1-8b",
+    "name": "SentientAGI: Dobby Mini Plus Llama 3.1 8B",
     "provider": "openrouter",
-    "family": "scb10x",
-    "created_at": "2025-03-28 22:15:11 +0100",
-    "context_window": 8192,
+    "family": "sentientagi",
+    "created_at": "2025-06-02 19:33:39 +0200",
+    "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -26174,13 +26307,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.18,
-          "output_per_million": 0.18
+          "input_per_million": 0.19999999999999998,
+          "output_per_million": 0.19999999999999998
         }
       }
     },
     "metadata": {
-      "description": "Llama3.1-Typhoon2-8B-Instruct is a Thai-English instruction-tuned model with 8 billion parameters, built on Llama 3.1. It significantly improves over its base model in Thai reasoning, instruction-following, and function-calling tasks, while maintaining competitive English performance. The model is optimized for bilingual interaction and performs well on Thai-English code-switching, MT-Bench, IFEval, and tool-use benchmarks.\n\nDespite its smaller size, it demonstrates strong generalization across math, coding, and multilingual benchmarks, outperforming comparable 8B models across most Thai-specific tasks. Full benchmark results and methodology are available in the [technical report.](https://arxiv.org/abs/2412.13702)",
+      "description": "Dobby-Mini-Leashed-Llama-3.1-8B and Dobby-Mini-Unhinged-Llama-3.1-8B are language models fine-tuned from Llama-3.1-8B-Instruct. Dobby models have a strong conviction towards personal freedom, decentralization, and all things crypto — even when coerced to speak otherwise. \n\nDobby-Mini-Leashed-Llama-3.1-8B and Dobby-Mini-Unhinged-Llama-3.1-8B have their own unique, uhh, personalities. The two versions are being released to be improved using the community’s feedback, which will steer the development of a 70B model.\n\n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -26189,11 +26322,11 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Llama3",
-        "instruct_type": "llama3"
+        "tokenizer": "Other",
+        "instruct_type": null
       },
       "top_provider": {
-        "context_length": 8192,
+        "context_length": 131072,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -26207,9 +26340,11 @@
         "presence_penalty",
         "top_k",
         "repetition_penalty",
+        "response_format",
+        "structured_outputs",
         "logit_bias",
-        "min_p",
-        "response_format"
+        "logprobs",
+        "top_logprobs"
       ]
     }
   },
@@ -26520,7 +26655,7 @@
     "family": "thedrummer",
     "created_at": "2024-11-08 23:04:08 +0100",
     "context_window": 32000,
-    "max_output_tokens": 16000,
+    "max_output_tokens": 32000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -26532,6 +26667,7 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
       "predicted_outputs"
     ],
     "pricing": {
@@ -26557,7 +26693,7 @@
       },
       "top_provider": {
         "context_length": 32000,
-        "max_completion_tokens": 16000,
+        "max_completion_tokens": 32000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -26573,6 +26709,8 @@
         "top_k",
         "min_p",
         "seed",
+        "tools",
+        "tool_choice",
         "logprobs"
       ]
     }
@@ -27288,7 +27426,8 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 3.0,
-          "output_per_million": 15.0
+          "output_per_million": 15.0,
+          "cached_input_per_million": 0.75
         }
       }
     },
@@ -27353,7 +27492,8 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 0.3,
-          "output_per_million": 0.5
+          "output_per_million": 0.5,
+          "cached_input_per_million": 0.075
         }
       }
     },