RubyGems - ruby_llm - Versions diffs - 1.8.0 → 1.8.1 - Mend

ruby_llm 1.8.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/README.md +14 -2
data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +14 -2
data/lib/generators/ruby_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +1 -1
data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_content.html.erb.tt +1 -0
data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_message.html.erb.tt +1 -1
data/lib/ruby_llm/models.json +686 -840
data/lib/ruby_llm/providers/openai/capabilities.rb +15 -7
data/lib/ruby_llm/version.rb +1 -1
metadata +3 -1

data/lib/ruby_llm/models.json CHANGED Viewed

@@ -14,12 +14,10 @@
         "text"
       ],
       "output": [
-        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "batch",
       "function_calling"
     ],
     "pricing": {
@@ -28,10 +26,6 @@
           "input_per_million": 0.8,
           "cached_input_per_million": 1.0,
           "output_per_million": 4.0
-        },
-        "batch": {
-          "input_per_million": 0.4,
-          "output_per_million": 2.0
         }
       }
     },
@@ -128,12 +122,10 @@
         "text"
       ],
       "output": [
-        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "batch",
       "function_calling"
     ],
     "pricing": {
@@ -142,10 +134,6 @@
           "input_per_million": 3.0,
           "cached_input_per_million": 3.75,
           "output_per_million": 15.0
-        },
-        "batch": {
-          "input_per_million": 1.5,
-          "output_per_million": 7.5
         }
       }
     },
@@ -166,12 +154,10 @@
         "text"
       ],
       "output": [
-        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "batch",
       "function_calling"
     ],
     "pricing": {
@@ -180,10 +166,6 @@
           "input_per_million": 0.25,
           "cached_input_per_million": 0.3,
           "output_per_million": 1.25
-        },
-        "batch": {
-          "input_per_million": 0.125,
-          "output_per_million": 0.625
         }
       }
     },
@@ -242,12 +224,10 @@
         "text"
       ],
       "output": [
-        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "batch",
       "function_calling"
     ],
     "pricing": {
@@ -256,10 +236,6 @@
           "input_per_million": 15.0,
           "cached_input_per_million": 18.75,
           "output_per_million": 75.0
-        },
-        "batch": {
-          "input_per_million": 7.5,
-          "output_per_million": 37.5
         }
       }
     },
@@ -280,12 +256,10 @@
         "text"
       ],
       "output": [
-        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "batch",
       "function_calling"
     ],
     "pricing": {
@@ -294,10 +268,6 @@
           "input_per_million": 15.0,
           "cached_input_per_million": 18.75,
           "output_per_million": 75.0
-        },
-        "batch": {
-          "input_per_million": 7.5,
-          "output_per_million": 37.5
         }
       }
     },
@@ -318,12 +288,10 @@
         "text"
       ],
       "output": [
-        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "batch",
       "function_calling"
     ],
     "pricing": {
@@ -332,10 +300,6 @@
           "input_per_million": 3.0,
           "cached_input_per_million": 3.75,
           "output_per_million": 15.0
-        },
-        "batch": {
-          "input_per_million": 1.5,
-          "output_per_million": 7.5
         }
       }
     },
@@ -1903,7 +1867,7 @@
     "id": "gemini-1.5-flash",
     "name": "Gemini 1.5 Flash",
     "provider": "gemini",
-    "family": "models/gemini-1.5-flash",
+    "family": "gemini-1.5-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -1944,7 +1908,7 @@
     "id": "gemini-1.5-flash-001",
     "name": "Gemini 1.5 Flash",
     "provider": "gemini",
-    "family": "models/gemini-1.5-flash",
+    "family": "gemini-1.5-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -1978,7 +1942,7 @@
     "id": "gemini-1.5-flash-002",
     "name": "Gemini 1.5 Flash",
     "provider": "gemini",
-    "family": "models/gemini-1.5-flash",
+    "family": "gemini-1.5-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2020,7 +1984,7 @@
     "id": "gemini-1.5-flash-8b",
     "name": "Gemini 1.5 Flash-8B",
     "provider": "gemini",
-    "family": "models/gemini-1.5-flash-8b",
+    "family": "gemini-1.5-flash-8b",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2062,7 +2026,7 @@
     "id": "gemini-1.5-flash-8b-001",
     "name": "Gemini 1.5 Flash-8B",
     "provider": "gemini",
-    "family": "models/gemini-1.5-flash-8b",
+    "family": "gemini-1.5-flash-8b",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2104,7 +2068,7 @@
     "id": "gemini-1.5-flash-8b-latest",
     "name": "Gemini 1.5 Flash-8B",
     "provider": "gemini",
-    "family": "models/gemini-1.5-flash-8b",
+    "family": "gemini-1.5-flash-8b",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2146,7 +2110,7 @@
     "id": "gemini-1.5-flash-latest",
     "name": "Gemini 1.5 Flash",
     "provider": "gemini",
-    "family": "models/gemini-1.5-flash",
+    "family": "gemini-1.5-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2187,7 +2151,7 @@
     "id": "gemini-1.5-pro",
     "name": "Gemini 1.5 Pro",
     "provider": "gemini",
-    "family": "models/gemini-1.5-pro",
+    "family": "gemini-1.5-pro",
     "created_at": null,
     "context_window": 2097152,
     "max_output_tokens": 8192,
@@ -2228,7 +2192,7 @@
     "id": "gemini-1.5-pro-001",
     "name": "Gemini 1.5 Pro",
     "provider": "gemini",
-    "family": "models/gemini-1.5-pro",
+    "family": "gemini-1.5-pro",
     "created_at": null,
     "context_window": 2097152,
     "max_output_tokens": 8192,
@@ -2262,7 +2226,7 @@
     "id": "gemini-1.5-pro-002",
     "name": "Gemini 1.5 Pro",
     "provider": "gemini",
-    "family": "models/gemini-1.5-pro",
+    "family": "gemini-1.5-pro",
     "created_at": null,
     "context_window": 2097152,
     "max_output_tokens": 8192,
@@ -2304,7 +2268,7 @@
     "id": "gemini-1.5-pro-latest",
     "name": "Gemini 1.5 Pro",
     "provider": "gemini",
-    "family": "models/gemini-1.5-pro",
+    "family": "gemini-1.5-pro",
     "created_at": null,
     "context_window": 2097152,
     "max_output_tokens": 8192,
@@ -2345,7 +2309,7 @@
     "id": "gemini-2.0-flash",
     "name": "Gemini 2.0 Flash",
     "provider": "gemini",
-    "family": "models/gemini-2.0-flash",
+    "family": "gemini-2.0-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2393,7 +2357,7 @@
     "id": "gemini-2.0-flash-001",
     "name": "Gemini 2.0 Flash",
     "provider": "gemini",
-    "family": "models/gemini-2.0-flash",
+    "family": "gemini-2.0-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2441,7 +2405,7 @@
     "id": "gemini-2.0-flash-exp",
     "name": "Gemini 2.0 Flash",
     "provider": "gemini",
-    "family": "models/gemini-2.0-flash",
+    "family": "gemini-2.0-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2488,7 +2452,7 @@
     "id": "gemini-2.0-flash-lite",
     "name": "Gemini 2.0 Flash-Lite",
     "provider": "gemini",
-    "family": "models/gemini-2.0-flash-lite",
+    "family": "gemini-2.0-flash-lite",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2536,7 +2500,7 @@
     "id": "gemini-2.0-flash-lite-001",
     "name": "Gemini 2.0 Flash-Lite",
     "provider": "gemini",
-    "family": "models/gemini-2.0-flash-lite",
+    "family": "gemini-2.0-flash-lite",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2678,7 +2642,7 @@
     "id": "gemini-2.0-flash-live-001",
     "name": "Gemini 2.0 Flash Live",
     "provider": "gemini",
-    "family": "models/gemini-2.0-flash-live-001",
+    "family": "gemini-2.0-flash-live-001",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -2712,7 +2676,7 @@
     "id": "gemini-2.0-flash-preview-image-generation",
     "name": "Gemini 2.0 Flash Preview Image Generation",
     "provider": "gemini",
-    "family": "models/gemini-2.0-flash-preview-image-generation",
+    "family": "gemini-2.0-flash-preview-image-generation",
     "created_at": null,
     "context_window": 32000,
     "max_output_tokens": 8192,
@@ -2999,7 +2963,7 @@
     "id": "gemini-2.5-flash",
     "name": "Gemini 2.5 Flash",
     "provider": "gemini",
-    "family": "models/gemini-2.5-flash",
+    "family": "gemini-2.5-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,
@@ -3047,7 +3011,7 @@
     "id": "gemini-2.5-flash-exp-native-audio-thinking-dialog",
     "name": "Gemini 2.5 Flash Native Audio",
     "provider": "gemini",
-    "family": "models/gemini-2.5-flash-preview-native-audio-dialog",
+    "family": "gemini-2.5-flash-preview-native-audio-dialog",
     "created_at": null,
     "context_window": 128000,
     "max_output_tokens": 8000,
@@ -3080,7 +3044,7 @@
     "id": "gemini-2.5-flash-image-preview",
     "name": "Gemini 2.5 Flash Image Preview",
     "provider": "gemini",
-    "family": "models/gemini-2.5-flash-image-preview",
+    "family": "gemini-2.5-flash-image-preview",
     "created_at": null,
     "context_window": 32768,
     "max_output_tokens": 32768,
@@ -3125,7 +3089,7 @@
     "id": "gemini-2.5-flash-lite",
     "name": "Gemini 2.5 Flash-Lite",
     "provider": "gemini",
-    "family": "models/gemini-2.5-flash-lite",
+    "family": "gemini-2.5-flash-lite",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,
@@ -3173,7 +3137,7 @@
     "id": "gemini-2.5-flash-lite-06-17",
     "name": "Gemini 2.5 Flash-Lite",
     "provider": "gemini",
-    "family": "models/gemini-2.5-flash-lite",
+    "family": "gemini-2.5-flash-lite",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,
@@ -3260,7 +3224,7 @@
     "id": "gemini-2.5-flash-preview-05-20",
     "name": "Gemini 2.5 Flash",
     "provider": "gemini",
-    "family": "models/gemini-2.5-flash",
+    "family": "gemini-2.5-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,
@@ -3308,7 +3272,7 @@
     "id": "gemini-2.5-flash-preview-native-audio-dialog",
     "name": "Gemini 2.5 Flash Native Audio",
     "provider": "gemini",
-    "family": "models/gemini-2.5-flash-preview-native-audio-dialog",
+    "family": "gemini-2.5-flash-preview-native-audio-dialog",
     "created_at": null,
     "context_window": 128000,
     "max_output_tokens": 8000,
@@ -3341,7 +3305,7 @@
     "id": "gemini-2.5-flash-preview-tts",
     "name": "Gemini 2.5 Flash Preview TTS",
     "provider": "gemini",
-    "family": "models/gemini-2.5-flash-preview-tts",
+    "family": "gemini-2.5-flash-preview-tts",
     "created_at": null,
     "context_window": 8000,
     "max_output_tokens": 16000,
@@ -3383,7 +3347,7 @@
     "id": "gemini-2.5-pro",
     "name": "Gemini 2.5 Pro",
     "provider": "gemini",
-    "family": "models/gemini-2.5-pro",
+    "family": "gemini-2.5-pro",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,
@@ -3578,7 +3542,7 @@
     "id": "gemini-2.5-pro-preview-tts",
     "name": "Gemini 2.5 Pro Preview TTS",
     "provider": "gemini",
-    "family": "models/gemini-2.5-pro-preview-tts",
+    "family": "gemini-2.5-pro-preview-tts",
     "created_at": null,
     "context_window": 8000,
     "max_output_tokens": 16000,
@@ -3820,7 +3784,7 @@
     "id": "gemini-live-2.5-flash-preview",
     "name": "Gemini 2.5 Flash Live",
     "provider": "gemini",
-    "family": "models/gemini-live-2.5-flash-preview",
+    "family": "gemini-live-2.5-flash-preview",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -4685,6 +4649,36 @@
       "owned_by": "mistralai"
     }
   },
+  {
+    "id": "magistral-medium-2509",
+    "name": "Magistral Medium 2509",
+    "provider": "mistral",
+    "family": "mistral",
+    "created_at": null,
+    "context_window": 32768,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "reasoning",
+      "batch"
+    ],
+    "pricing": {},
+    "metadata": {
+      "object": "model",
+      "owned_by": "mistralai"
+    }
+  },
   {
     "id": "magistral-medium-latest",
     "name": "Magistral Medium Latest",
@@ -4775,6 +4769,36 @@
       "owned_by": "mistralai"
     }
   },
+  {
+    "id": "magistral-small-2509",
+    "name": "Magistral Small 2509",
+    "provider": "mistral",
+    "family": "mistral",
+    "created_at": null,
+    "context_window": 32768,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "reasoning",
+      "batch"
+    ],
+    "pricing": {},
+    "metadata": {
+      "object": "model",
+      "owned_by": "mistralai"
+    }
+  },
   {
     "id": "magistral-small-latest",
     "name": "Magistral Small Latest",
@@ -7946,14 +7970,16 @@
     "id": "gpt-5",
     "name": "GPT-5",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:29:37 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -7961,13 +7987,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -7980,14 +8009,16 @@
     "id": "gpt-5-2025-08-07",
     "name": "GPT-5 20250807",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-01 21:09:20 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -7995,13 +8026,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8014,14 +8048,16 @@
     "id": "gpt-5-chat-latest",
     "name": "GPT-5 Chat Latest",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-01 20:35:06 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8029,13 +8065,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8048,14 +8087,16 @@
     "id": "gpt-5-mini",
     "name": "GPT-5 Mini",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:32:08 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8063,13 +8104,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8082,14 +8126,16 @@
     "id": "gpt-5-mini-2025-08-07",
     "name": "GPT-5 Mini 20250807",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:31:07 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8097,13 +8143,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8116,14 +8165,16 @@
     "id": "gpt-5-nano",
     "name": "GPT-5 Nano",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:39:44 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8131,13 +8182,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8150,14 +8204,16 @@
     "id": "gpt-5-nano-2025-08-07",
     "name": "GPT-5 Nano 20250807",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:38:23 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8165,13 +8221,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -9798,6 +9857,68 @@
       ]
     }
   },
+  {
+    "id": "alibaba/tongyi-deepresearch-30b-a3b",
+    "name": "Tongyi DeepResearch 30B A3B",
+    "provider": "openrouter",
+    "family": "alibaba",
+    "created_at": "2025-09-18 17:53:24 +0200",
+    "context_window": 131072,
+    "max_output_tokens": 131072,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.09,
+          "output_per_million": 0.44999999999999996
+        }
+      }
+    },
+    "metadata": {
+      "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 131072,
+        "max_completion_tokens": 131072,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "include_reasoning",
+        "max_tokens",
+        "reasoning",
+        "response_format",
+        "structured_outputs",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_p"
+      ]
+    }
+  },
   {
     "id": "allenai/molmo-7b-d",
     "name": "AllenAI: Molmo 7B D",
@@ -10932,7 +11053,9 @@
         "stop",
         "temperature",
         "tool_choice",
-        "tools"
+        "tools",
+        "top_k",
+        "top_p"
       ]
     }
   },
@@ -11003,12 +11126,77 @@
     }
   },
   {
-    "id": "arcee-ai/coder-large",
-    "name": "Arcee AI: Coder Large",
+    "id": "arcee-ai/afm-4.5b",
+    "name": "Arcee AI: AFM 4.5B",
     "provider": "openrouter",
     "family": "arcee-ai",
-    "created_at": "2025-05-05 22:57:43 +0200",
-    "context_window": 32768,
+    "created_at": "2025-09-16 18:34:44 +0200",
+    "context_window": 65536,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "structured_output",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.39999999999999997
+        }
+      }
+    },
+    "metadata": {
+      "description": "AFM-4.5B is a 4.5 billion parameter instruction-tuned language model developed by Arcee AI. The model was pretrained on approximately 8 trillion tokens, including 6.5 trillion tokens of general data and 1.5 trillion tokens with an emphasis on mathematical reasoning and code generation. ",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 65536,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "response_format",
+        "stop",
+        "structured_outputs",
+        "temperature",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "arcee-ai/coder-large",
+    "name": "Arcee AI: Coder Large",
+    "provider": "openrouter",
+    "family": "arcee-ai",
+    "created_at": "2025-05-05 22:57:43 +0200",
+    "context_window": 32768,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -11279,8 +11467,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.017934774,
-          "output_per_million": 0.07173912240000001
+          "input_per_million": 0.02,
+          "output_per_million": 0.07
         }
       }
     },
@@ -11504,6 +11692,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "top_k",
         "top_p"
@@ -11668,8 +11857,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.2006688,
-          "output_per_million": 0.80267549538462
+          "input_per_million": 0.16,
+          "output_per_million": 0.65
         }
       }
     },
@@ -11830,69 +12019,6 @@
       ]
     }
   },
-  {
-    "id": "cognitivecomputations/dolphin-mixtral-8x22b",
-    "name": "Dolphin 2.9.2 Mixtral 8x22B 🐬",
-    "provider": "openrouter",
-    "family": "cognitivecomputations",
-    "created_at": "2024-06-08 02:00:00 +0200",
-    "context_window": 16000,
-    "max_output_tokens": 8192,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.8999999999999999,
-          "output_per_million": 0.8999999999999999
-        }
-      }
-    },
-    "metadata": {
-      "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/models/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.\n\nThis model is a successor to [Dolphin Mixtral 8x7B](/models/cognitivecomputations/dolphin-mixtral-8x7b).\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Mistral",
-        "instruct_type": "chatml"
-      },
-      "top_provider": {
-        "context_length": 16000,
-        "max_completion_tokens": 8192,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
   {
     "id": "cognitivecomputations/dolphin3.0-mistral-24b",
     "name": "Dolphin3.0 Mistral 24B",
@@ -11917,8 +12043,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0271739,
-          "output_per_million": 0.10869564
+          "input_per_million": 0.03,
+          "output_per_million": 0.11
         }
       }
     },
@@ -12040,8 +12166,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01333333333333,
-          "output_per_million": 0.0347826048
+          "input_per_million": 0.01,
+          "output_per_million": 0.03
         }
       }
     },
@@ -13128,7 +13254,7 @@
     "provider": "openrouter",
     "family": "deepseek",
     "created_at": "2025-08-21 14:33:48 +0200",
-    "context_window": 64000,
+    "context_window": 163840,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -13159,9 +13285,9 @@
         "instruct_type": "deepseek-v3.1"
       },
       "top_provider": {
-        "context_length": 64000,
+        "context_length": 163840,
         "max_completion_tokens": null,
-        "is_moderated": true
+        "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
@@ -13345,8 +13471,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.24999987999999998,
-          "output_per_million": 0.999999888
+          "input_per_million": 0.39999999999999997,
+          "output_per_million": 1.75
         }
       }
     },
@@ -13416,8 +13542,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.013043472,
-          "output_per_million": 0.0521739072
+          "input_per_million": 0.01,
+          "output_per_million": 0.049999999999999996
         }
       }
     },
@@ -13605,8 +13731,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.03260868,
-          "output_per_million": 0.130434768
+          "input_per_million": 0.03,
+          "output_per_million": 0.13
         }
       }
     },
@@ -13782,8 +13908,8 @@
     "provider": "openrouter",
     "family": "deepseek",
     "created_at": "2025-01-30 00:39:00 +0100",
-    "context_window": 64000,
-    "max_output_tokens": 32000,
+    "context_window": 32768,
+    "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -13819,66 +13945,8 @@
         "instruct_type": "deepseek-r1"
       },
       "top_provider": {
-        "context_length": 64000,
-        "max_completion_tokens": 32000,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "include_reasoning",
-        "logit_bias",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "reasoning",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
-  {
-    "id": "deepseek/deepseek-r1-distill-qwen-14b:free",
-    "name": "DeepSeek: R1 Distill Qwen 14B (free)",
-    "provider": "openrouter",
-    "family": "deepseek",
-    "created_at": "2025-01-30 00:39:00 +0100",
-    "context_window": 64000,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {},
-    "metadata": {
-      "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 69.7\n- MATH-500 pass@1: 93.9\n- CodeForces Rating: 1481\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Qwen",
-        "instruct_type": "deepseek-r1"
-      },
-      "top_provider": {
-        "context_length": 64000,
-        "max_completion_tokens": null,
+        "context_length": 32768,
+        "max_completion_tokens": 16384,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -13886,7 +13954,6 @@
         "frequency_penalty",
         "include_reasoning",
         "logit_bias",
-        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -13896,7 +13963,6 @@
         "stop",
         "temperature",
         "top_k",
-        "top_logprobs",
         "top_p"
       ]
     }
@@ -15108,8 +15174,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.02,
-          "output_per_million": 0.035869561200000004
+          "input_per_million": 0.01,
+          "output_per_million": 0.02
         }
       }
     },
@@ -15234,8 +15300,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.035326069999999994,
-          "output_per_million": 0.141304332
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -15363,8 +15429,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.06521736,
-          "output_per_million": 0.260869536
+          "input_per_million": 0.07,
+          "output_per_million": 0.26
         }
       }
     },
@@ -16260,7 +16326,7 @@
     "family": "meituan",
     "created_at": "2025-09-09 16:20:58 +0200",
     "context_window": 131072,
-    "max_output_tokens": 131072,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -16278,8 +16344,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.15,
-          "output_per_million": 0.75
+          "input_per_million": 0.12,
+          "output_per_million": 0.6
         }
       }
     },
@@ -16298,7 +16364,7 @@
       },
       "top_provider": {
         "context_length": 131072,
-        "max_completion_tokens": 131072,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -16726,7 +16792,7 @@
     "provider": "openrouter",
     "family": "meta-llama",
     "created_at": "2024-07-23 02:00:00 +0200",
-    "context_window": 131072,
+    "context_window": 16384,
     "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
@@ -16746,8 +16812,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.015,
-          "output_per_million": 0.02
+          "input_per_million": 0.02,
+          "output_per_million": 0.03
         }
       }
     },
@@ -16765,7 +16831,7 @@
         "instruct_type": "llama3"
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 16384,
         "max_completion_tokens": 16384,
         "is_moderated": false
       },
@@ -16933,7 +16999,7 @@
     "provider": "openrouter",
     "family": "meta-llama",
     "created_at": "2024-09-25 02:00:00 +0200",
-    "context_window": 131072,
+    "context_window": 16384,
     "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
@@ -16953,8 +17019,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.012,
-          "output_per_million": 0.024
+          "input_per_million": 0.02,
+          "output_per_million": 0.02
         }
       }
     },
@@ -16972,7 +17038,7 @@
         "instruct_type": "llama3"
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 16384,
         "max_completion_tokens": 16384,
         "is_moderated": false
       },
@@ -17121,7 +17187,7 @@
     "family": "meta-llama",
     "created_at": "2024-12-06 18:28:57 +0100",
     "context_window": 131072,
-    "max_output_tokens": 16384,
+    "max_output_tokens": 131072,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -17140,8 +17206,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.038000000000000006,
-          "output_per_million": 0.12
+          "input_per_million": 0.012,
+          "output_per_million": 0.036
         }
       }
     },
@@ -17160,7 +17226,7 @@
       },
       "top_provider": {
         "context_length": 131072,
-        "max_completion_tokens": 16384,
+        "max_completion_tokens": 131072,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -18136,6 +18202,7 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
       "structured_output"
     ],
     "pricing": {
@@ -18175,7 +18242,10 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
+        "tool_choice",
+        "tools",
         "top_k",
         "top_p"
       ]
@@ -18721,8 +18791,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.035869548,
-          "output_per_million": 0.14347824480000002
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -19742,7 +19812,7 @@
     "family": "mistralai",
     "created_at": "2024-07-19 02:00:00 +0200",
     "context_window": 131072,
-    "max_output_tokens": 128000,
+    "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -19761,8 +19831,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.017934774,
-          "output_per_million": 0.07173912240000001
+          "input_per_million": 0.02,
+          "output_per_million": 0.04
         }
       }
     },
@@ -19781,7 +19851,7 @@
       },
       "top_provider": {
         "context_length": 131072,
-        "max_completion_tokens": 128000,
+        "max_completion_tokens": 16384,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -20018,8 +20088,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.03804346,
-          "output_per_million": 0.152173896
+          "input_per_million": 0.04,
+          "output_per_million": 0.15
         }
       }
     },
@@ -20148,8 +20218,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.03804346,
-          "output_per_million": 0.152173896
+          "input_per_million": 0.04,
+          "output_per_million": 0.15
         }
       }
     },
@@ -20287,9 +20357,9 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.049999999999999996,
-          "output_per_million": 0.09999999999999999
-        }
+          "input_per_million": 0.075,
+          "output_per_million": 0.19999999999999998
+        }
       }
     },
     "metadata": {
@@ -20557,8 +20627,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.08,
-          "output_per_million": 0.24
+          "input_per_million": 0.39999999999999997,
+          "output_per_million": 0.39999999999999997
         }
       }
     },
@@ -20791,6 +20861,7 @@
         "include_reasoning",
         "reasoning",
         "response_format",
+        "structured_outputs",
         "temperature",
         "top_k",
         "top_p"
@@ -20954,8 +21025,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.38043459999999996,
-          "output_per_million": 1.52173896
+          "input_per_million": 0.38,
+          "output_per_million": 1.52
         }
       }
     },
@@ -21085,8 +21156,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.062499969999999995,
-          "output_per_million": 0.249999972
+          "input_per_million": 0.02,
+          "output_per_million": 0.07
         }
       }
     },
@@ -21576,8 +21647,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.127173852,
-          "output_per_million": 0.5086955952000001
+          "input_per_million": 0.13,
+          "output_per_million": 0.51
         }
       }
     },
@@ -21761,7 +21832,7 @@
     "family": "nousresearch",
     "created_at": "2024-08-18 02:00:00 +0200",
     "context_window": 131072,
-    "max_output_tokens": null,
+    "max_output_tokens": 131072,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -21800,7 +21871,7 @@
       },
       "top_provider": {
         "context_length": 131072,
-        "max_completion_tokens": null,
+        "max_completion_tokens": 131072,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -21920,8 +21991,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.127173852,
-          "output_per_million": 0.5086955952000001
+          "input_per_million": 0.11,
+          "output_per_million": 0.38
         }
       }
     },
@@ -21991,8 +22062,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.12,
-          "output_per_million": 0.3
+          "input_per_million": 0.6,
+          "output_per_million": 0.6
         }
       }
     },
@@ -22098,64 +22169,6 @@
       ]
     }
   },
-  {
-    "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1:free",
-    "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free)",
-    "provider": "openrouter",
-    "family": "nvidia",
-    "created_at": "2025-04-08 14:24:19 +0200",
-    "context_window": 131072,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {},
-    "metadata": {
-      "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Llama3",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 131072,
-        "max_completion_tokens": null,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "logprobs",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_logprobs",
-        "top_p"
-      ]
-    }
-  },
   {
     "id": "nvidia/nemotron-nano-9b-v2",
     "name": "NVIDIA: Nemotron Nano 9B V2",
@@ -23943,9 +23956,9 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 1.25,
-          "output_per_million": 10.0,
-          "cached_input_per_million": 0.125
+          "input_per_million": 0.625,
+          "output_per_million": 5.0,
+          "cached_input_per_million": 0.0625
         }
       }
     },
@@ -24181,8 +24194,8 @@
     "provider": "openrouter",
     "family": "openai",
     "created_at": "2025-08-05 19:17:11 +0200",
-    "context_window": 131000,
-    "max_output_tokens": 131000,
+    "context_window": 131072,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -24201,8 +24214,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.072,
-          "output_per_million": 0.28
+          "input_per_million": 0.049999999999999996,
+          "output_per_million": 0.25
         }
       }
     },
@@ -24220,8 +24233,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131000,
-        "max_completion_tokens": 131000,
+        "context_length": 131072,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -24304,8 +24317,8 @@
     "provider": "openrouter",
     "family": "openai",
     "created_at": "2025-08-05 19:17:09 +0200",
-    "context_window": 131000,
-    "max_output_tokens": 131000,
+    "context_window": 131072,
+    "max_output_tokens": 32768,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -24324,7 +24337,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.04,
+          "input_per_million": 0.03,
           "output_per_million": 0.15
         }
       }
@@ -24343,8 +24356,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131000,
-        "max_completion_tokens": 131000,
+        "context_length": 131072,
+        "max_completion_tokens": 32768,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -25045,79 +25058,43 @@
     }
   },
   {
-    "id": "openrouter/auto",
-    "name": "Auto Router",
+    "id": "opengvlab/internvl3-78b",
+    "name": "OpenGVLab: InternVL3 78B",
     "provider": "openrouter",
-    "family": "openrouter",
-    "created_at": "2023-11-08 01:00:00 +0100",
-    "context_window": 2000000,
+    "family": "opengvlab",
+    "created_at": "2025-09-15 20:55:55 +0200",
+    "context_window": 32768,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
         "text"
       ]
     },
-    "capabilities": [
-      "streaming"
-    ],
-    "pricing": {},
-    "metadata": {
-      "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Router",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": null,
-        "max_completion_tokens": null,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": []
-    }
-  },
-  {
-    "id": "openrouter/sonoma-dusk-alpha",
-    "name": "Sonoma Dusk Alpha",
-    "provider": "openrouter",
-    "family": "openrouter",
-    "created_at": "2025-09-05 19:27:27 +0200",
-    "context_window": 2000000,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image"
-      ],
-      "output": [
-        "text"
-      ]
-    },
     "capabilities": [
       "streaming",
-      "function_calling",
-      "structured_output"
+      "structured_output",
+      "predicted_outputs"
     ],
-    "pricing": {},
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.03,
+          "output_per_million": 0.13
+        }
+      }
+    },
     "metadata": {
-      "description": "This is a cloaked model provided to the community to gather feedback. A fast and intelligent general-purpose frontier model with a 2 million token context window. Supports image inputs and parallel tool calling.\n\nNote: It’s free to use during this testing period, and prompts and completions are logged by the model creator for feedback and training.",
+      "description": "The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. \n\nIn addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
-          "text",
-          "image"
+          "image",
+          "text"
         ],
         "output_modalities": [
           "text"
@@ -25126,73 +25103,71 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 2000000,
+        "context_length": 32768,
         "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "logprobs",
         "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
         "response_format",
+        "seed",
+        "stop",
         "structured_outputs",
-        "tool_choice",
-        "tools"
+        "temperature",
+        "top_k",
+        "top_logprobs",
+        "top_p"
       ]
     }
   },
   {
-    "id": "openrouter/sonoma-sky-alpha",
-    "name": "Sonoma Sky Alpha",
+    "id": "openrouter/auto",
+    "name": "Auto Router",
     "provider": "openrouter",
     "family": "openrouter",
-    "created_at": "2025-09-05 19:23:21 +0200",
+    "created_at": "2023-11-08 01:00:00 +0100",
     "context_window": 2000000,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
-      "function_calling",
-      "structured_output"
+      "streaming"
     ],
     "pricing": {},
     "metadata": {
-      "description": "This is a cloaked model provided to the community to gather feedback. A maximally intelligent general-purpose frontier model with a 2 million token context window. Supports image inputs and parallel tool calling.\n\nNote: It’s free to use during this testing period, and prompts and completions are logged by the model creator for feedback and training.",
+      "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)",
       "architecture": {
-        "modality": "text+image->text",
+        "modality": "text->text",
         "input_modalities": [
-          "text",
-          "image"
+          "text"
         ],
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Other",
+        "tokenizer": "Router",
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 2000000,
+        "context_length": null,
         "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
-      "supported_parameters": [
-        "include_reasoning",
-        "max_tokens",
-        "reasoning",
-        "response_format",
-        "structured_outputs",
-        "tool_choice",
-        "tools"
-      ]
+      "supported_parameters": []
     }
   },
   {
@@ -25583,8 +25558,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.06521736,
-          "output_per_million": 0.260869536
+          "input_per_million": 0.07,
+          "output_per_million": 0.26
         }
       }
     },
@@ -26404,8 +26379,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.035869548,
-          "output_per_million": 0.14347824480000002
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -26535,8 +26510,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.24999987999999998,
-          "output_per_million": 0.999999888
+          "input_per_million": 0.07,
+          "output_per_million": 0.28
         }
       }
     },
@@ -26793,8 +26768,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.13,
-          "output_per_million": 0.6
+          "input_per_million": 0.18,
+          "output_per_million": 0.54
         }
       }
     },
@@ -26847,7 +26822,7 @@
     "family": "qwen",
     "created_at": "2025-07-21 19:39:15 +0200",
     "context_window": 262144,
-    "max_output_tokens": null,
+    "max_output_tokens": 262144,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -26866,8 +26841,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0974999532,
-          "output_per_million": 0.38999995632
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.09999999999999999
         }
       }
     },
@@ -26886,7 +26861,7 @@
       },
       "top_provider": {
         "context_length": 262144,
-        "max_completion_tokens": null,
+        "max_completion_tokens": 262144,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -26937,8 +26912,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0974999532,
-          "output_per_million": 0.38999995632
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.39
         }
       }
     },
@@ -26974,6 +26949,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -27075,8 +27051,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.035869548,
-          "output_per_million": 0.14347824480000002
+          "input_per_million": 0.06,
+          "output_per_million": 0.22
         }
       }
     },
@@ -27148,8 +27124,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.07065213999999999,
-          "output_per_million": 0.282608664
+          "input_per_million": 0.07,
+          "output_per_million": 0.28
         }
       }
     },
@@ -27183,6 +27159,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -27218,8 +27195,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.08967387,
-          "output_per_million": 0.358695612
+          "input_per_million": 0.08,
+          "output_per_million": 0.29
         }
       }
     },
@@ -27255,6 +27232,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -27350,8 +27328,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0322825932,
-          "output_per_million": 0.12913042032
+          "input_per_million": 0.03,
+          "output_per_million": 0.13
         }
       }
     },
@@ -27607,8 +27585,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.24999987999999998,
-          "output_per_million": 0.999999888
+          "input_per_million": 0.22,
+          "output_per_million": 0.95
         }
       }
     },
@@ -27678,8 +27656,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.07065213999999999,
-          "output_per_million": 0.282608664
+          "input_per_million": 0.07,
+          "output_per_million": 0.28
         }
       }
     },
@@ -27713,6 +27691,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -27723,13 +27702,13 @@
     }
   },
   {
-    "id": "qwen/qwen3-coder:free",
-    "name": "Qwen: Qwen3 Coder 480B A35B (free)",
+    "id": "qwen/qwen3-coder-flash",
+    "name": "Qwen: Qwen3 Coder Flash",
     "provider": "openrouter",
     "family": "qwen",
-    "created_at": "2025-07-23 02:29:06 +0200",
-    "context_window": 262144,
-    "max_output_tokens": null,
+    "created_at": "2025-09-17 15:25:36 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 65536,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -27742,11 +27721,19 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "predicted_outputs"
+      "structured_output"
     ],
-    "pricing": {},
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.3,
+          "output_per_million": 1.5,
+          "cached_input_per_million": 0.08
+        }
+      }
+    },
     "metadata": {
-      "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.",
+      "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -27759,8 +27746,125 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 262144,
-        "max_completion_tokens": null,
+        "context_length": 128000,
+        "max_completion_tokens": 65536,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "presence_penalty",
+        "response_format",
+        "seed",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "qwen/qwen3-coder-plus",
+    "name": "Qwen: Qwen3 Coder Plus",
+    "provider": "openrouter",
+    "family": "qwen",
+    "created_at": "2025-09-17 15:19:54 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 65536,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 1.0,
+          "output_per_million": 5.0,
+          "cached_input_per_million": 0.09999999999999999
+        }
+      }
+    },
+    "metadata": {
+      "description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Qwen3",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": 65536,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "presence_penalty",
+        "response_format",
+        "seed",
+        "structured_outputs",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "qwen/qwen3-coder:free",
+    "name": "Qwen: Qwen3 Coder 480B A35B (free)",
+    "provider": "openrouter",
+    "family": "qwen",
+    "created_at": "2025-07-23 02:29:06 +0200",
+    "context_window": 262144,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "predicted_outputs"
+    ],
+    "pricing": {},
+    "metadata": {
+      "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Qwen3",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 262144,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -27871,8 +27975,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.09782604,
-          "output_per_million": 0.391304304
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.7999999999999999
         }
       }
     },
@@ -27906,6 +28010,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -27941,8 +28046,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.09782604,
-          "output_per_million": 0.391304304
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.7999999999999999
         }
       }
     },
@@ -27978,6 +28083,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -28242,66 +28348,6 @@
       ]
     }
   },
-  {
-    "id": "rekaai/reka-flash-3:free",
-    "name": "Reka: Flash 3 (free)",
-    "provider": "openrouter",
-    "family": "rekaai",
-    "created_at": "2025-03-12 21:53:33 +0100",
-    "context_window": 32768,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {},
-    "metadata": {
-      "description": "Reka Flash 3 is a general-purpose, instruction-tuned large language model with 21 billion parameters, developed by Reka. It excels at general chat, coding tasks, instruction-following, and function calling. Featuring a 32K context length and optimized through reinforcement learning (RLOO), it provides competitive performance comparable to proprietary models within a smaller parameter footprint. Ideal for low-latency, local, or on-device deployments, Reka Flash 3 is compact, supports efficient quantization (down to 11GB at 4-bit precision), and employs explicit reasoning tags (\"<reasoning>\") to indicate its internal thought process.\n\nReka Flash 3 is primarily an English model with limited multilingual understanding capabilities. The model weights are released under the Apache 2.0 license.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Other",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 32768,
-        "max_completion_tokens": null,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "include_reasoning",
-        "logit_bias",
-        "logprobs",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "reasoning",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_logprobs",
-        "top_p"
-      ]
-    }
-  },
   {
     "id": "sao10k/l3-euryale-70b",
     "name": "Sao10k: Llama 3 Euryale 70B v2.1",
@@ -28390,7 +28436,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.02,
+          "input_per_million": 0.04,
           "output_per_million": 0.049999999999999996
         }
       }
@@ -28584,8 +28630,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.035869548,
-          "output_per_million": 0.14347824480000002
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -28683,69 +28729,6 @@
       ]
     }
   },
-  {
-    "id": "sophosympatheia/midnight-rose-70b",
-    "name": "Midnight Rose 70B",
-    "provider": "openrouter",
-    "family": "sophosympatheia",
-    "created_at": "2024-03-22 01:00:00 +0100",
-    "context_window": 4096,
-    "max_output_tokens": 2048,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.7999999999999999,
-          "output_per_million": 0.7999999999999999
-        }
-      }
-    },
-    "metadata": {
-      "description": "A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.\n\nDescending from earlier versions of Midnight Rose and [Wizard Tulu Dolphin 70B](https://huggingface.co/sophosympatheia/Wizard-Tulu-Dolphin-70B-v1.0), it inherits the best qualities of each.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Llama2",
-        "instruct_type": "airoboros"
-      },
-      "top_provider": {
-        "context_length": 4096,
-        "max_completion_tokens": 2048,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
   {
     "id": "stepfun-ai/step3",
     "name": "StepFun: Step3",
@@ -28802,6 +28785,7 @@
         "include_reasoning",
         "reasoning",
         "response_format",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -28931,6 +28915,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "top_k",
         "top_logprobs",
@@ -29180,7 +29165,6 @@
       "supported_parameters": [
         "frequency_penalty",
         "logit_bias",
-        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -29221,8 +29205,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.039130416,
-          "output_per_million": 0.1565217216
+          "input_per_million": 0.04,
+          "output_per_million": 0.16
         }
       }
     },
@@ -29282,8 +29266,7 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output",
-      "predicted_outputs"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
@@ -29314,83 +29297,14 @@
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
-        "logit_bias",
-        "logprobs",
         "max_tokens",
-        "min_p",
         "presence_penalty",
-        "repetition_penalty",
         "response_format",
-        "seed",
         "stop",
         "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
-  {
-    "id": "thudm/glm-4-32b",
-    "name": "THUDM: GLM 4 32B",
-    "provider": "openrouter",
-    "family": "thudm",
-    "created_at": "2025-04-17 22:15:15 +0200",
-    "context_window": 32000,
-    "max_output_tokens": 32000,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.55,
-          "output_per_million": 1.66
-        }
-      }
-    },
-    "metadata": {
-      "description": "GLM-4-32B-0414 is a 32B bilingual (Chinese-English) open-weight language model optimized for code generation, function calling, and agent-style tasks. Pretrained on 15T of high-quality and reasoning-heavy data, it was further refined using human preference alignment, rejection sampling, and reinforcement learning. The model excels in complex reasoning, artifact generation, and structured output tasks, achieving performance comparable to GPT-4o and DeepSeek-V3-0324 across several benchmarks.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Other",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 32000,
-        "max_completion_tokens": 32000,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
         "top_p"
       ]
     }
@@ -29486,8 +29400,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.035869548,
-          "output_per_million": 0.14347824480000002
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -29784,11 +29698,11 @@
     }
   },
   {
-    "id": "x-ai/grok-2-1212",
-    "name": "xAI: Grok 2 1212",
+    "id": "x-ai/grok-3",
+    "name": "xAI: Grok 3",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2024-12-15 04:20:14 +0100",
+    "created_at": "2025-06-10 21:15:08 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -29808,13 +29722,14 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 2.0,
-          "output_per_million": 10.0
+          "input_per_million": 3.0,
+          "output_per_million": 15.0,
+          "cached_input_per_million": 0.75
         }
       }
     },
     "metadata": {
-      "description": "Grok 2 1212 introduces significant enhancements to accuracy, instruction adherence, and multilingual support, making it a powerful and flexible choice for developers seeking a highly steerable, intelligent model.",
+      "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -29840,6 +29755,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -29849,75 +29765,11 @@
     }
   },
   {
-    "id": "x-ai/grok-2-vision-1212",
-    "name": "xAI: Grok 2 Vision 1212",
-    "provider": "openrouter",
-    "family": "x-ai",
-    "created_at": "2024-12-15 05:35:38 +0100",
-    "context_window": 32768,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "structured_output"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 2.0,
-          "output_per_million": 10.0
-        }
-      }
-    },
-    "metadata": {
-      "description": "Grok 2 Vision 1212 advances image-based AI with stronger visual comprehension, refined instruction-following, and multilingual support. From object recognition to style analysis, it empowers developers to build more intuitive, visually aware applications. Its enhanced steerability and reasoning establish a robust foundation for next-generation image solutions.\n\nTo read more about this model, check out [xAI's announcement](https://x.ai/blog/grok-1212).",
-      "architecture": {
-        "modality": "text+image->text",
-        "input_modalities": [
-          "text",
-          "image"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Grok",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 32768,
-        "max_completion_tokens": null,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logprobs",
-        "max_tokens",
-        "presence_penalty",
-        "response_format",
-        "seed",
-        "stop",
-        "temperature",
-        "top_logprobs",
-        "top_p"
-      ]
-    }
-  },
-  {
-    "id": "x-ai/grok-3",
-    "name": "xAI: Grok 3",
+    "id": "x-ai/grok-3-beta",
+    "name": "xAI: Grok 3 Beta",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-06-10 21:15:08 +0200",
+    "created_at": "2025-04-10 01:07:48 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -29944,7 +29796,7 @@
       }
     },
     "metadata": {
-      "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n",
+      "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -29970,7 +29822,6 @@
         "response_format",
         "seed",
         "stop",
-        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -29980,11 +29831,11 @@
     }
   },
   {
-    "id": "x-ai/grok-3-beta",
-    "name": "xAI: Grok 3 Beta",
+    "id": "x-ai/grok-3-mini",
+    "name": "xAI: Grok 3 Mini",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-04-10 01:07:48 +0200",
+    "created_at": "2025-06-10 21:20:45 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -30004,14 +29855,14 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 3.0,
-          "output_per_million": 15.0,
-          "cached_input_per_million": 0.75
+          "input_per_million": 0.3,
+          "output_per_million": 0.5,
+          "cached_input_per_million": 0.075
         }
       }
     },
     "metadata": {
-      "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
+      "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -30030,13 +29881,14 @@
       },
       "per_request_limits": null,
       "supported_parameters": [
-        "frequency_penalty",
+        "include_reasoning",
         "logprobs",
         "max_tokens",
-        "presence_penalty",
+        "reasoning",
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -30046,11 +29898,11 @@
     }
   },
   {
-    "id": "x-ai/grok-3-mini",
-    "name": "xAI: Grok 3 Mini",
+    "id": "x-ai/grok-3-mini-beta",
+    "name": "xAI: Grok 3 Mini Beta",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-06-10 21:20:45 +0200",
+    "created_at": "2025-04-10 01:09:55 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -30077,7 +29929,7 @@
       }
     },
     "metadata": {
-      "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.",
+      "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -30103,7 +29955,6 @@
         "response_format",
         "seed",
         "stop",
-        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -30113,16 +29964,17 @@
     }
   },
   {
-    "id": "x-ai/grok-3-mini-beta",
-    "name": "xAI: Grok 3 Mini Beta",
+    "id": "x-ai/grok-4",
+    "name": "xAI: Grok 4",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-04-10 01:09:55 +0200",
-    "context_window": 131072,
+    "created_at": "2025-07-09 21:01:29 +0200",
+    "context_window": 256000,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
@@ -30137,17 +29989,18 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.3,
-          "output_per_million": 0.5,
-          "cached_input_per_million": 0.075
+          "input_per_million": 3.0,
+          "output_per_million": 15.0,
+          "cached_input_per_million": 0.75
         }
       }
     },
     "metadata": {
-      "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
+      "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)",
       "architecture": {
-        "modality": "text->text",
+        "modality": "text+image->text",
         "input_modalities": [
+          "image",
           "text"
         ],
         "output_modalities": [
@@ -30157,7 +30010,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 256000,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -30169,7 +30022,7 @@
         "reasoning",
         "response_format",
         "seed",
-        "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -30179,18 +30032,18 @@
     }
   },
   {
-    "id": "x-ai/grok-4",
-    "name": "xAI: Grok 4",
+    "id": "x-ai/grok-4-fast:free",
+    "name": "xAI: Grok 4 Fast (free)",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-07-09 21:01:29 +0200",
-    "context_window": 256000,
-    "max_output_tokens": null,
+    "created_at": "2025-09-19 02:01:30 +0200",
+    "context_window": 2000000,
+    "max_output_tokens": 30000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "image",
-        "text"
+        "text",
+        "image"
       ],
       "output": [
         "text"
@@ -30201,22 +30054,14 @@
       "function_calling",
       "structured_output"
     ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 3.0,
-          "output_per_million": 15.0,
-          "cached_input_per_million": 0.75
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
-      "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)",
+      "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window. It comes in two flavors: non-reasoning and reasoning. Read more about the model on xAI's [news post](http://x.ai/news/grok-4-fast). Reasoning can be enabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)\n\nPrompts and completions may be used by xAI or OpenRouter to improve future models.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
-          "image",
-          "text"
+          "text",
+          "image"
         ],
         "output_modalities": [
           "text"
@@ -30225,8 +30070,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 256000,
-        "max_completion_tokens": null,
+        "context_length": 2000000,
+        "max_completion_tokens": 30000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -30396,8 +30241,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.41249980199999997,
-          "output_per_million": 1.6499998152000002
+          "input_per_million": 0.41,
+          "output_per_million": 1.6500000000000001
         }
       }
     },
@@ -30433,6 +30278,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -30644,7 +30490,7 @@
     "name": "Sonar",
     "provider": "perplexity",
     "family": "sonar",
-    "created_at": "2025-09-14 11:15:24 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -30676,7 +30522,7 @@
     "name": "Sonar Deep Research",
     "provider": "perplexity",
     "family": "sonar_deep_research",
-    "created_at": "2025-09-14 11:15:24 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -30711,7 +30557,7 @@
     "name": "Sonar Pro",
     "provider": "perplexity",
     "family": "sonar_pro",
-    "created_at": "2025-09-14 11:15:24 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 200000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
@@ -30743,7 +30589,7 @@
     "name": "Sonar Reasoning",
     "provider": "perplexity",
     "family": "sonar_reasoning",
-    "created_at": "2025-09-14 11:15:24 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -30775,7 +30621,7 @@
     "name": "Sonar Reasoning Pro",
     "provider": "perplexity",
     "family": "sonar_reasoning_pro",
-    "created_at": "2025-09-14 11:15:24 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
@@ -30915,7 +30761,7 @@
     "id": "gemini-1.5-flash",
     "name": "Gemini 1.5 Flash",
     "provider": "vertexai",
-    "family": "models/gemini-1.5-flash",
+    "family": "gemini-1.5-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -30951,7 +30797,7 @@
     "id": "gemini-1.5-flash-002",
     "name": "Gemini 1.5 Flash",
     "provider": "vertexai",
-    "family": "models/gemini-1.5-flash",
+    "family": "gemini-1.5-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -30987,7 +30833,7 @@
     "id": "gemini-1.5-flash-8b",
     "name": "Gemini 1.5 Flash-8B",
     "provider": "vertexai",
-    "family": "models/gemini-1.5-flash-8b",
+    "family": "gemini-1.5-flash-8b",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -31023,7 +30869,7 @@
     "id": "gemini-1.5-pro",
     "name": "Gemini 1.5 Pro",
     "provider": "vertexai",
-    "family": "models/gemini-1.5-pro",
+    "family": "gemini-1.5-pro",
     "created_at": null,
     "context_window": 2097152,
     "max_output_tokens": 8192,
@@ -31059,7 +30905,7 @@
     "id": "gemini-1.5-pro-002",
     "name": "Gemini 1.5 Pro",
     "provider": "vertexai",
-    "family": "models/gemini-1.5-pro",
+    "family": "gemini-1.5-pro",
     "created_at": null,
     "context_window": 2097152,
     "max_output_tokens": 8192,
@@ -31095,7 +30941,7 @@
     "id": "gemini-2.0-flash",
     "name": "Gemini 2.0 Flash",
     "provider": "vertexai",
-    "family": "models/gemini-2.0-flash",
+    "family": "gemini-2.0-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -31136,7 +30982,7 @@
     "id": "gemini-2.0-flash-001",
     "name": "Gemini 2.0 Flash",
     "provider": "vertexai",
-    "family": "models/gemini-2.0-flash",
+    "family": "gemini-2.0-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -31177,7 +31023,7 @@
     "id": "gemini-2.0-flash-exp",
     "name": "Gemini 2.0 Flash",
     "provider": "vertexai",
-    "family": "models/gemini-2.0-flash",
+    "family": "gemini-2.0-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -31218,7 +31064,7 @@
     "id": "gemini-2.0-flash-lite-001",
     "name": "Gemini 2.0 Flash-Lite",
     "provider": "vertexai",
-    "family": "models/gemini-2.0-flash-lite",
+    "family": "gemini-2.0-flash-lite",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 8192,
@@ -31259,7 +31105,7 @@
     "id": "gemini-2.5-flash",
     "name": "Gemini 2.5 Flash",
     "provider": "vertexai",
-    "family": "models/gemini-2.5-flash",
+    "family": "gemini-2.5-flash",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,
@@ -31300,7 +31146,7 @@
     "id": "gemini-2.5-flash-lite",
     "name": "Gemini 2.5 Flash-Lite",
     "provider": "vertexai",
-    "family": "models/gemini-2.5-flash-lite",
+    "family": "gemini-2.5-flash-lite",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,
@@ -31341,7 +31187,7 @@
     "id": "gemini-2.5-pro",
     "name": "Gemini 2.5 Pro",
     "provider": "vertexai",
-    "family": "models/gemini-2.5-pro",
+    "family": "gemini-2.5-pro",
     "created_at": null,
     "context_window": 1048576,
     "max_output_tokens": 65536,