RubyGems - ruby_llm_community - Versions diffs - 0.0.6 → 1.0.0 - Mend

ruby_llm_community 0.0.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

checksums.yaml +4 -4
data/README.md +3 -3
data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +34 -0
data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +5 -0
data/lib/generators/ruby_llm/install/templates/model_model.rb.tt +6 -0
data/lib/generators/ruby_llm/install_generator.rb +27 -2
data/lib/ruby_llm/active_record/acts_as.rb +163 -24
data/lib/ruby_llm/aliases.json +58 -5
data/lib/ruby_llm/aliases.rb +7 -25
data/lib/ruby_llm/chat.rb +10 -17
data/lib/ruby_llm/configuration.rb +5 -12
data/lib/ruby_llm/connection.rb +4 -4
data/lib/ruby_llm/connection_multipart.rb +19 -0
data/lib/ruby_llm/content.rb +5 -2
data/lib/ruby_llm/embedding.rb +1 -2
data/lib/ruby_llm/error.rb +0 -8
data/lib/ruby_llm/image.rb +23 -8
data/lib/ruby_llm/image_attachment.rb +21 -0
data/lib/ruby_llm/message.rb +6 -6
data/lib/ruby_llm/model/info.rb +12 -10
data/lib/ruby_llm/model/pricing.rb +0 -3
data/lib/ruby_llm/model/pricing_category.rb +0 -2
data/lib/ruby_llm/model/pricing_tier.rb +0 -1
data/lib/ruby_llm/models.json +2147 -470
data/lib/ruby_llm/models.rb +65 -34
data/lib/ruby_llm/provider.rb +8 -8
data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -46
data/lib/ruby_llm/providers/anthropic/chat.rb +2 -2
data/lib/ruby_llm/providers/anthropic/media.rb +0 -1
data/lib/ruby_llm/providers/anthropic/tools.rb +1 -2
data/lib/ruby_llm/providers/anthropic.rb +1 -2
data/lib/ruby_llm/providers/bedrock/chat.rb +2 -4
data/lib/ruby_llm/providers/bedrock/media.rb +0 -1
data/lib/ruby_llm/providers/bedrock/models.rb +0 -2
data/lib/ruby_llm/providers/bedrock/streaming/base.rb +0 -12
data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -7
data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -12
data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -12
data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -13
data/lib/ruby_llm/providers/bedrock/streaming.rb +0 -18
data/lib/ruby_llm/providers/bedrock.rb +1 -2
data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -2
data/lib/ruby_llm/providers/deepseek/chat.rb +0 -1
data/lib/ruby_llm/providers/gemini/capabilities.rb +28 -100
data/lib/ruby_llm/providers/gemini/chat.rb +57 -29
data/lib/ruby_llm/providers/gemini/embeddings.rb +0 -2
data/lib/ruby_llm/providers/gemini/images.rb +1 -2
data/lib/ruby_llm/providers/gemini/media.rb +0 -1
data/lib/ruby_llm/providers/gemini/models.rb +1 -2
data/lib/ruby_llm/providers/gemini/streaming.rb +15 -1
data/lib/ruby_llm/providers/gemini/tools.rb +0 -5
data/lib/ruby_llm/providers/gpustack/chat.rb +11 -1
data/lib/ruby_llm/providers/gpustack/media.rb +45 -0
data/lib/ruby_llm/providers/gpustack/models.rb +44 -9
data/lib/ruby_llm/providers/gpustack.rb +1 -0
data/lib/ruby_llm/providers/mistral/capabilities.rb +2 -10
data/lib/ruby_llm/providers/mistral/chat.rb +0 -2
data/lib/ruby_llm/providers/mistral/embeddings.rb +0 -3
data/lib/ruby_llm/providers/mistral/models.rb +0 -1
data/lib/ruby_llm/providers/ollama/chat.rb +0 -1
data/lib/ruby_llm/providers/ollama/media.rb +1 -6
data/lib/ruby_llm/providers/ollama/models.rb +36 -0
data/lib/ruby_llm/providers/ollama.rb +1 -0
data/lib/ruby_llm/providers/openai/capabilities.rb +3 -16
data/lib/ruby_llm/providers/openai/chat.rb +1 -3
data/lib/ruby_llm/providers/openai/embeddings.rb +0 -3
data/lib/ruby_llm/providers/openai/images.rb +73 -3
data/lib/ruby_llm/providers/openai/media.rb +0 -1
data/lib/ruby_llm/providers/openai/response.rb +120 -29
data/lib/ruby_llm/providers/openai/response_media.rb +2 -2
data/lib/ruby_llm/providers/openai/streaming.rb +107 -47
data/lib/ruby_llm/providers/openai/tools.rb +1 -1
data/lib/ruby_llm/providers/openai.rb +1 -3
data/lib/ruby_llm/providers/openai_base.rb +2 -2
data/lib/ruby_llm/providers/openrouter/models.rb +1 -16
data/lib/ruby_llm/providers/perplexity/capabilities.rb +0 -1
data/lib/ruby_llm/providers/perplexity/chat.rb +0 -1
data/lib/ruby_llm/providers/perplexity.rb +1 -5
data/lib/ruby_llm/providers/vertexai/chat.rb +14 -0
data/lib/ruby_llm/providers/vertexai/embeddings.rb +32 -0
data/lib/ruby_llm/providers/vertexai/models.rb +130 -0
data/lib/ruby_llm/providers/vertexai/streaming.rb +14 -0
data/lib/ruby_llm/providers/vertexai.rb +55 -0
data/lib/ruby_llm/railtie.rb +0 -1
data/lib/ruby_llm/stream_accumulator.rb +72 -10
data/lib/ruby_llm/streaming.rb +16 -25
data/lib/ruby_llm/tool.rb +2 -19
data/lib/ruby_llm/tool_call.rb +0 -9
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm_community.rb +5 -3
data/lib/tasks/models.rake +525 -0
data/lib/tasks/release.rake +37 -2
data/lib/tasks/vcr.rake +0 -7
metadata +13 -4
data/lib/tasks/aliases.rake +0 -235
data/lib/tasks/models_docs.rake +0 -224
data/lib/tasks/models_update.rake +0 -108

data/lib/ruby_llm/models.json CHANGED Viewed

@@ -1850,7 +1850,7 @@
     "provider": "deepseek",
     "family": "deepseek-chat",
     "created_at": null,
-    "context_window": 64000,
+    "context_window": 128000,
     "max_output_tokens": 8000,
     "knowledge_cutoff": null,
     "modalities": {
@@ -1885,7 +1885,7 @@
     "provider": "deepseek",
     "family": "deepseek-reasoner",
     "created_at": null,
-    "context_window": 64000,
+    "context_window": null,
     "max_output_tokens": 64000,
     "knowledge_cutoff": null,
     "modalities": {
@@ -3183,6 +3183,53 @@
     },
     "metadata": {}
   },
+  {
+    "id": "gemini-2.5-flash-image-preview",
+    "name": "Gemini 2.5 Flash Image Preview",
+    "provider": "gemini",
+    "family": "other",
+    "created_at": null,
+    "context_window": 32768,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image",
+        "pdf"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "batch",
+      "caching"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.075,
+          "output_per_million": 0.3
+        },
+        "batch": {
+          "input_per_million": 0.0375,
+          "output_per_million": 0.15
+        }
+      }
+    },
+    "metadata": {
+      "version": "2.0",
+      "description": "Gemini 2.5 Flash Preview Image",
+      "supported_generation_methods": [
+        "generateContent",
+        "countTokens"
+      ]
+    }
+  },
   {
     "id": "gemini-2.5-flash-lite",
     "name": "Gemini 2.5 Flash-Lite",
@@ -3222,7 +3269,7 @@
     },
     "metadata": {
       "version": "001",
-      "description": "Stable verion of Gemini 2.5 Flash-Lite, released in July of 2025",
+      "description": "Stable version of Gemini 2.5 Flash-Lite, released in July of 2025",
       "supported_generation_methods": [
         "generateContent",
         "countTokens",
@@ -3400,7 +3447,7 @@
   },
   {
     "id": "gemini-2.5-flash-preview-tts",
-    "name": "Gemini 2.5 Flash Preview TTS",
+    "name": "Gemini 2.5 Flash Preview Text-to-Speech",
     "provider": "gemini",
     "family": "gemini-2.5-flash-preview-tts",
     "created_at": null,
@@ -3628,7 +3675,7 @@
   },
   {
     "id": "gemini-2.5-pro-preview-tts",
-    "name": "Gemini 2.5 Pro Preview TTS",
+    "name": "Gemini 2.5 Pro Preview Text-to-Speech",
     "provider": "gemini",
     "family": "gemini-2.5-pro-preview-tts",
     "created_at": null,
@@ -4168,37 +4215,6 @@
       ]
     }
   },
-  {
-    "id": "imagen-4.0-generate-001",
-    "name": "Imagen 4",
-    "provider": "gemini",
-    "family": "other",
-    "created_at": null,
-    "context_window": 480,
-    "max_output_tokens": 8192,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image",
-        "pdf"
-      ],
-      "output": [
-        "image"
-      ]
-    },
-    "capabilities": [
-      "streaming"
-    ],
-    "pricing": {},
-    "metadata": {
-      "version": "001",
-      "description": "Vertex served Imagen 4.0 model",
-      "supported_generation_methods": [
-        "predict"
-      ]
-    }
-  },
   {
     "id": "imagen-4.0-generate-preview-06-06",
     "name": "Imagen 4 (Preview)",
@@ -7325,45 +7341,6 @@
       "owned_by": "system"
     }
   },
-  {
-    "id": "gpt-4o-2023-01-01",
-    "name": "GPT-4o",
-    "provider": "openai",
-    "family": "gpt-4o",
-    "created_at": null,
-    "context_window": 128000,
-    "max_output_tokens": 16384,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "image",
-        "text"
-      ],
-      "output": [
-        "embeddings",
-        "text"
-      ]
-    },
-    "capabilities": [
-      "batch",
-      "function_calling",
-      "structured_output"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 2.5,
-          "cached_input_per_million": 1.25,
-          "output_per_million": 10.0
-        },
-        "batch": {
-          "input_per_million": 1.25,
-          "output_per_million": 5.0
-        }
-      }
-    },
-    "metadata": {}
-  },
   {
     "id": "gpt-4o-2024-05-13",
     "name": "GPT-4o 20240513",
@@ -7403,25 +7380,25 @@
   },
   {
     "id": "gpt-4o-2024-08-06",
-    "name": "GPT-4o 20240806",
+    "name": "GPT-4o",
     "provider": "openai",
-    "family": "gpt4o",
-    "created_at": "2024-08-05 01:38:39 +0200",
+    "family": "gpt-4o",
+    "created_at": null,
     "context_window": 128000,
     "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
         "image",
-        "pdf"
+        "text"
       ],
       "output": [
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
+      "batch",
       "function_calling",
       "structured_output"
     ],
@@ -7429,7 +7406,12 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 2.5,
+          "cached_input_per_million": 1.25,
           "output_per_million": 10.0
+        },
+        "batch": {
+          "input_per_million": 1.25,
+          "output_per_million": 5.0
         }
       }
     },
@@ -7817,28 +7799,32 @@
   },
   {
     "id": "gpt-4o-mini-realtime-preview-2024-12-17",
-    "name": "GPT-4o-Mini Realtime Preview 20241217",
+    "name": "GPT-4o mini Realtime",
     "provider": "openai",
-    "family": "gpt4o_mini_realtime",
-    "created_at": "2024-12-13 18:56:41 +0100",
+    "family": "gpt-4o-mini-realtime-preview",
+    "created_at": null,
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "audio",
         "text"
       ],
       "output": [
+        "audio",
+        "embeddings",
         "text"
       ]
     },
     "capabilities": [
-      "streaming"
+      "function_calling"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 0.6,
+          "cached_input_per_million": 0.3,
           "output_per_million": 2.4
         }
       }
@@ -10272,8 +10258,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 9.0,
-          "output_per_million": 11.0
+          "input_per_million": 4.0,
+          "output_per_million": 5.5
         }
       }
     },
@@ -10562,7 +10548,7 @@
     "family": "anthracite-org",
     "created_at": "2024-10-22 02:00:00 +0200",
     "context_window": 16384,
-    "max_output_tokens": 1024,
+    "max_output_tokens": 2048,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -10579,8 +10565,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 2.5,
-          "output_per_million": 3.0
+          "input_per_million": 2.0,
+          "output_per_million": 5.0
         }
       }
     },
@@ -10599,7 +10585,7 @@
       },
       "top_provider": {
         "context_length": 16384,
-        "max_completion_tokens": 1024,
+        "max_completion_tokens": 2048,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -11063,70 +11049,6 @@
       ]
     }
   },
-  {
-    "id": "anthropic/claude-3.7-sonnet:beta",
-    "name": "Anthropic: Claude 3.7 Sonnet (self-moderated)",
-    "provider": "openrouter",
-    "family": "anthropic",
-    "created_at": "2025-02-24 19:35:10 +0100",
-    "context_window": 200000,
-    "max_output_tokens": 128000,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image",
-        "file"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "function_calling"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 3.0,
-          "output_per_million": 15.0,
-          "cached_input_per_million": 0.3
-        }
-      }
-    },
-    "metadata": {
-      "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
-      "architecture": {
-        "modality": "text+image->text",
-        "input_modalities": [
-          "text",
-          "image",
-          "file"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Claude",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 200000,
-        "max_completion_tokens": 128000,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "include_reasoning",
-        "max_tokens",
-        "reasoning",
-        "stop",
-        "temperature",
-        "tool_choice",
-        "tools"
-      ]
-    }
-  },
   {
     "id": "anthropic/claude-3.7-sonnet:thinking",
     "name": "Anthropic: Claude 3.7 Sonnet (thinking)",
@@ -11894,41 +11816,42 @@
     }
   },
   {
-    "id": "bytedance/ui-tars-1.5-7b",
-    "name": "Bytedance: UI-TARS 7B ",
+    "id": "baidu/ernie-4.5-vl-28b-a3b",
+    "name": "Baidu: ERNIE 4.5 VL 28B A3B",
     "provider": "openrouter",
-    "family": "bytedance",
-    "created_at": "2025-07-22 19:24:16 +0200",
-    "context_window": 128000,
-    "max_output_tokens": 2048,
+    "family": "baidu",
+    "created_at": "2025-08-12 23:07:16 +0200",
+    "context_window": 30000,
+    "max_output_tokens": 8000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "image",
-        "text"
+        "text",
+        "image"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming"
+      "streaming",
+      "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.09999999999999999,
-          "output_per_million": 0.19999999999999998
+          "input_per_million": 0.14,
+          "output_per_million": 0.56
         }
       }
     },
     "metadata": {
-      "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces.\n\nThis model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.",
+      "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
-          "image",
-          "text"
+          "text",
+          "image"
         ],
         "output_modalities": [
           "text"
@@ -11937,16 +11860,19 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 128000,
-        "max_completion_tokens": 2048,
+        "context_length": 30000,
+        "max_completion_tokens": 8000,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
+        "include_reasoning",
+        "logit_bias",
         "max_tokens",
         "min_p",
         "presence_penalty",
+        "reasoning",
         "repetition_penalty",
         "seed",
         "stop",
@@ -11957,16 +11883,17 @@
     }
   },
   {
-    "id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
-    "name": "Venice: Uncensored (free)",
+    "id": "baidu/ernie-4.5-vl-424b-a47b",
+    "name": "Baidu: ERNIE 4.5 VL 424B A47B ",
     "provider": "openrouter",
-    "family": "cognitivecomputations",
-    "created_at": "2025-07-09 23:02:46 +0200",
-    "context_window": 32768,
-    "max_output_tokens": null,
+    "family": "baidu",
+    "created_at": "2025-06-30 18:28:23 +0200",
+    "context_window": 123000,
+    "max_output_tokens": 16000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
@@ -11975,12 +11902,141 @@
     },
     "capabilities": [
       "streaming",
-      "structured_output"
+      "predicted_outputs"
     ],
-    "pricing": {},
-    "metadata": {
-      "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an “uncensored” instruct-tuned LLM, preserving user control over alignment, system prompts, and behavior. Intended for advanced and unrestricted use cases, Venice Uncensored emphasizes steerability and transparent behavior, removing default safety and alignment layers typically found in mainstream assistant models.",
-      "architecture": {
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.42,
+          "output_per_million": 1.25
+        }
+      }
+    },
+    "metadata": {
+      "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "image",
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 123000,
+        "max_completion_tokens": 16000,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "include_reasoning",
+        "logit_bias",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "reasoning",
+        "repetition_penalty",
+        "seed",
+        "stop",
+        "temperature",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "bytedance/ui-tars-1.5-7b",
+    "name": "Bytedance: UI-TARS 7B ",
+    "provider": "openrouter",
+    "family": "bytedance",
+    "created_at": "2025-07-22 19:24:16 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 2048,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.19999999999999998
+        }
+      }
+    },
+    "metadata": {
+      "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces.\n\nThis model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "image",
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": 2048,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "seed",
+        "stop",
+        "temperature",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
+    "name": "Venice: Uncensored (free)",
+    "provider": "openrouter",
+    "family": "cognitivecomputations",
+    "created_at": "2025-07-09 23:02:46 +0200",
+    "context_window": 32768,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "structured_output"
+    ],
+    "pricing": {},
+    "metadata": {
+      "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an “uncensored” instruct-tuned LLM, preserving user control over alignment, system prompts, and behavior. Intended for advanced and unrestricted use cases, Venice Uncensored emphasizes steerability and transparent behavior, removing default safety and alignment layers typically found in mainstream assistant models.",
+      "architecture": {
         "modality": "text->text",
         "input_modalities": [
           "text"
@@ -12919,8 +12975,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.17992692,
-          "output_per_million": 0.7200576000000001
+          "input_per_million": 0.1999188,
+          "output_per_million": 0.800064
         }
       }
     },
@@ -12990,8 +13046,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.17992692,
-          "output_per_million": 0.7200576000000001
+          "input_per_million": 0.1999188,
+          "output_per_million": 0.800064
         }
       }
     },
@@ -13042,7 +13098,7 @@
     "family": "deepseek",
     "created_at": "2025-03-24 14:59:15 +0100",
     "context_window": 163840,
-    "max_output_tokens": 163840,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -13073,20 +13129,93 @@
       },
       "top_provider": {
         "context_length": 163840,
-        "max_completion_tokens": 163840,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "logprobs",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "seed",
+        "stop",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_k",
+        "top_logprobs",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "deepseek/deepseek-chat-v3.1",
+    "name": "DeepSeek: DeepSeek V3.1",
+    "provider": "openrouter",
+    "family": "deepseek",
+    "created_at": "2025-08-21 14:33:48 +0200",
+    "context_window": 163840,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.19999999999999998,
+          "output_per_million": 0.7999999999999999
+        }
+      }
+    },
+    "metadata": {
+      "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "DeepSeek",
+        "instruct_type": "deepseek-v3.1"
+      },
+      "top_provider": {
+        "context_length": 163840,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
+        "include_reasoning",
         "logit_bias",
         "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
+        "reasoning",
         "repetition_penalty",
+        "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -13258,8 +13387,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.17992692,
-          "output_per_million": 0.7200576000000001
+          "input_per_million": 0.1999188,
+          "output_per_million": 0.800064
         }
       }
     },
@@ -13518,8 +13647,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0333198,
-          "output_per_million": 0.13334400000000002
+          "input_per_million": 0.025915399999999998,
+          "output_per_million": 0.103712
         }
       }
     },
@@ -13555,7 +13684,6 @@
         "response_format",
         "seed",
         "stop",
-        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -13995,11 +14123,11 @@
     }
   },
   {
-    "id": "deepseek/deepseek-v3-base",
-    "name": "DeepSeek: DeepSeek V3 Base",
+    "id": "deepseek/deepseek-v3.1-base",
+    "name": "DeepSeek: DeepSeek V3.1 Base",
     "provider": "openrouter",
     "family": "deepseek",
-    "created_at": "2025-03-29 19:13:43 +0100",
+    "created_at": "2025-08-20 23:56:57 +0200",
     "context_window": 163840,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -14018,13 +14146,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.1999188,
-          "output_per_million": 0.800064
+          "input_per_million": 0.19999999999999998,
+          "output_per_million": 0.7999999999999999
         }
       }
     },
     "metadata": {
-      "description": "Note that this is a base model mostly meant for testing, you need to provide detailed prompts for the model to return useful responses. \n\nDeepSeek-V3 Base is a 671B parameter open Mixture-of-Experts (MoE) language model with 37B active parameters per forward pass and a context length of 128K tokens. Trained on 14.8T tokens using FP8 mixed precision, it achieves high training efficiency and stability, with strong performance across language, reasoning, math, and coding tasks. \n\nDeepSeek-V3 Base is the pre-trained model behind [DeepSeek V3](/deepseek/deepseek-chat-v3)",
+      "description": "This is a base model, trained only for raw next-token prediction. Unlike instruct/chat models, it has not been fine-tuned to follow user instructions. Prompts need to be written more like training text or examples rather than simple requests (e.g., “Translate the following sentence…” instead of just “Translate this”).\n\nDeepSeek-V3.1 Base is a 671B parameter open Mixture-of-Experts (MoE) language model with 37B active parameters per forward pass and a context length of 128K tokens. Trained on 14.8T tokens using FP8 mixed precision, it achieves high training efficiency and stability, with strong performance across language, reasoning, math, and coding tasks. \n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -14034,7 +14162,7 @@
           "text"
         ],
         "tokenizer": "DeepSeek",
-        "instruct_type": null
+        "instruct_type": "none"
       },
       "top_provider": {
         "context_length": 163840,
@@ -14121,74 +14249,87 @@
     }
   },
   {
-    "id": "featherless/qwerky-72b:free",
-    "name": "Qrwkv 72B (free)",
+    "id": "google/gemini-2.0-flash-001",
+    "name": "Google: Gemini 2.0 Flash",
     "provider": "openrouter",
-    "family": "featherless",
-    "created_at": "2025-03-20 15:39:57 +0100",
-    "context_window": 32768,
-    "max_output_tokens": 4096,
+    "family": "google",
+    "created_at": "2025-02-05 16:30:13 +0100",
+    "context_window": 1048576,
+    "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "file",
+        "audio"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming"
+      "streaming",
+      "function_calling",
+      "structured_output"
     ],
-    "pricing": {},
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.39999999999999997,
+          "cached_input_per_million": 0.024999999999999998
+        }
+      }
+    },
     "metadata": {
-      "description": "Qrwkv-72B is a linear-attention RWKV variant of the Qwen 2.5 72B model, optimized to significantly reduce computational cost at scale. Leveraging linear attention, it achieves substantial inference speedups (>1000x) while retaining competitive accuracy on common benchmarks like ARC, HellaSwag, Lambada, and MMLU. It inherits knowledge and language support from Qwen 2.5, supporting approximately 30 languages, making it suitable for efficient inference in large-context applications.",
+      "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.",
       "architecture": {
-        "modality": "text->text",
+        "modality": "text+image->text",
         "input_modalities": [
-          "text"
+          "text",
+          "image",
+          "file",
+          "audio"
         ],
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Other",
+        "tokenizer": "Gemini",
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 32768,
-        "max_completion_tokens": 4096,
+        "context_length": 1048576,
+        "max_completion_tokens": 8192,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
-        "frequency_penalty",
         "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
+        "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
-        "top_k",
+        "tool_choice",
+        "tools",
         "top_p"
       ]
     }
   },
   {
-    "id": "google/gemini-2.0-flash-001",
-    "name": "Google: Gemini 2.0 Flash",
+    "id": "google/gemini-2.0-flash-exp:free",
+    "name": "Google: Gemini 2.0 Flash Experimental (free)",
     "provider": "openrouter",
     "family": "google",
-    "created_at": "2025-02-05 16:30:13 +0100",
+    "created_at": "2024-12-11 18:18:43 +0100",
     "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
         "text",
-        "image",
-        "file",
-        "audio"
+        "image"
       ],
       "output": [
         "text"
@@ -14199,24 +14340,14 @@
       "function_calling",
       "structured_output"
     ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.09999999999999999,
-          "output_per_million": 0.39999999999999997,
-          "cached_input_per_million": 0.024999999999999998
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
       "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
           "text",
-          "image",
-          "file",
-          "audio"
+          "image"
         ],
         "output_modalities": [
           "text"
@@ -14235,7 +14366,6 @@
         "response_format",
         "seed",
         "stop",
-        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -14244,18 +14374,20 @@
     }
   },
   {
-    "id": "google/gemini-2.0-flash-exp:free",
-    "name": "Google: Gemini 2.0 Flash Experimental (free)",
+    "id": "google/gemini-2.0-flash-lite-001",
+    "name": "Google: Gemini 2.0 Flash Lite",
     "provider": "openrouter",
     "family": "google",
-    "created_at": "2024-12-11 18:18:43 +0100",
+    "created_at": "2025-02-25 18:56:52 +0100",
     "context_window": 1048576,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
         "text",
-        "image"
+        "image",
+        "file",
+        "audio"
       ],
       "output": [
         "text"
@@ -14266,14 +14398,23 @@
       "function_calling",
       "structured_output"
     ],
-    "pricing": {},
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.075,
+          "output_per_million": 0.3
+        }
+      }
+    },
     "metadata": {
-      "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.",
+      "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
           "text",
-          "image"
+          "image",
+          "file",
+          "audio"
         ],
         "output_modalities": [
           "text"
@@ -14292,6 +14433,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -14300,19 +14442,19 @@
     }
   },
   {
-    "id": "google/gemini-2.0-flash-lite-001",
-    "name": "Google: Gemini 2.0 Flash Lite",
+    "id": "google/gemini-2.5-flash",
+    "name": "Google: Gemini 2.5 Flash",
     "provider": "openrouter",
     "family": "google",
-    "created_at": "2025-02-25 18:56:52 +0100",
+    "created_at": "2025-06-17 17:01:28 +0200",
     "context_window": 1048576,
-    "max_output_tokens": 8192,
+    "max_output_tokens": 65535,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image",
         "file",
+        "image",
+        "text",
         "audio"
       ],
       "output": [
@@ -14327,19 +14469,20 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.075,
-          "output_per_million": 0.3
+          "input_per_million": 0.3,
+          "output_per_million": 2.5,
+          "cached_input_per_million": 0.075
         }
       }
     },
     "metadata": {
-      "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.",
+      "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
-          "text",
-          "image",
           "file",
+          "image",
+          "text",
           "audio"
         ],
         "output_modalities": [
@@ -14350,12 +14493,14 @@
       },
       "top_provider": {
         "context_length": 1048576,
-        "max_completion_tokens": 8192,
+        "max_completion_tokens": 65535,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "include_reasoning",
         "max_tokens",
+        "reasoning",
         "response_format",
         "seed",
         "stop",
@@ -14368,72 +14513,118 @@
     }
   },
   {
-    "id": "google/gemini-2.5-flash",
-    "name": "Google: Gemini 2.5 Flash",
+    "id": "google/gemini-2.5-flash-image-preview",
+    "name": "Google: Gemini 2.5 Flash Image Preview",
     "provider": "openrouter",
     "family": "google",
-    "created_at": "2025-06-17 17:01:28 +0200",
-    "context_window": 1048576,
-    "max_output_tokens": 65535,
+    "created_at": "2025-08-26 16:36:17 +0200",
+    "context_window": 32768,
+    "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "file",
         "image",
-        "text",
-        "audio"
+        "text"
       ],
       "output": [
+        "image",
         "text"
       ]
     },
     "capabilities": [
       "streaming",
-      "function_calling",
       "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 0.3,
-          "output_per_million": 2.5,
-          "cached_input_per_million": 0.075
+          "output_per_million": 2.5
         }
       }
     },
     "metadata": {
-      "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).",
+      "description": "Gemini 2.5 Flash Image Preview is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
       "architecture": {
-        "modality": "text+image->text",
+        "modality": "text+image->text+image",
         "input_modalities": [
-          "file",
           "image",
-          "text",
-          "audio"
+          "text"
         ],
         "output_modalities": [
+          "image",
           "text"
         ],
         "tokenizer": "Gemini",
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 1048576,
-        "max_completion_tokens": 65535,
+        "context_length": 32768,
+        "max_completion_tokens": 8192,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "response_format",
+        "seed",
+        "structured_outputs",
+        "temperature",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "google/gemini-2.5-flash-image-preview:free",
+    "name": "Google: Gemini 2.5 Flash Image Preview (free)",
+    "provider": "openrouter",
+    "family": "google",
+    "created_at": "2025-08-26 16:36:17 +0200",
+    "context_window": 32768,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "image",
+        "text"
+      ],
+      "output": [
+        "image",
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "structured_output"
+    ],
+    "pricing": {},
+    "metadata": {
+      "description": "Gemini 2.5 Flash Image Preview is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
+      "architecture": {
+        "modality": "text+image->text+image",
+        "input_modalities": [
+          "image",
+          "text"
+        ],
+        "output_modalities": [
+          "image",
+          "text"
+        ],
+        "tokenizer": "Gemini",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 32768,
+        "max_completion_tokens": 8192,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
-        "include_reasoning",
         "max_tokens",
-        "reasoning",
         "response_format",
         "seed",
-        "stop",
         "structured_outputs",
         "temperature",
-        "tool_choice",
-        "tools",
         "top_p"
       ]
     }
@@ -15317,7 +15508,7 @@
     "provider": "openrouter",
     "family": "google",
     "created_at": "2025-03-13 22:50:25 +0100",
-    "context_window": 96000,
+    "context_window": 32768,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
@@ -15331,7 +15522,6 @@
     },
     "capabilities": [
       "streaming",
-      "structured_output",
       "predicted_outputs"
     ],
     "pricing": {},
@@ -15350,7 +15540,7 @@
         "instruct_type": "gemma"
       },
       "top_provider": {
-        "context_length": 96000,
+        "context_length": 32768,
         "max_completion_tokens": 8192,
         "is_moderated": false
       },
@@ -15363,10 +15553,8 @@
         "min_p",
         "presence_penalty",
         "repetition_penalty",
-        "response_format",
         "seed",
         "stop",
-        "structured_outputs",
         "temperature",
         "top_k",
         "top_logprobs",
@@ -15436,6 +15624,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "top_k",
         "top_logprobs",
@@ -16219,73 +16408,6 @@
       ]
     }
   },
-  {
-    "id": "liquid/lfm-40b",
-    "name": "Liquid: LFM 40B MoE",
-    "provider": "openrouter",
-    "family": "liquid",
-    "created_at": "2024-09-30 02:00:00 +0200",
-    "context_window": 65536,
-    "max_output_tokens": 65536,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "structured_output",
-      "predicted_outputs"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.15,
-          "output_per_million": 0.15
-        }
-      }
-    },
-    "metadata": {
-      "description": "Liquid's 40.3B Mixture of Experts (MoE) model. Liquid Foundation Models (LFMs) are large neural networks built with computational units rooted in dynamic systems.\n\nLFMs are general-purpose AI models that can be used to model any kind of sequential data, including video, audio, text, time series, and signals.\n\nSee the [launch announcement](https://www.liquid.ai/liquid-foundation-models) for benchmarks and more info.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Other",
-        "instruct_type": "chatml"
-      },
-      "top_provider": {
-        "context_length": 65536,
-        "max_completion_tokens": 65536,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "logprobs",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "response_format",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_logprobs",
-        "top_p"
-      ]
-    }
-  },
   {
     "id": "liquid/lfm-7b",
     "name": "Liquid: LFM 7B",
@@ -16360,7 +16482,7 @@
     "family": "mancer",
     "created_at": "2023-08-02 02:00:00 +0200",
     "context_window": 8000,
-    "max_output_tokens": 1000,
+    "max_output_tokens": 2000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -16377,8 +16499,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 1.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.125,
+          "output_per_million": 1.125
         }
       }
     },
@@ -16397,7 +16519,7 @@
       },
       "top_provider": {
         "context_length": 8000,
-        "max_completion_tokens": 1000,
+        "max_completion_tokens": 2000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -17399,18 +17521,17 @@
     }
   },
   {
-    "id": "meta-llama/llama-4-maverick",
-    "name": "Meta: Llama 4 Maverick",
+    "id": "meta-llama/llama-3.3-8b-instruct:free",
+    "name": "Meta: Llama 3.3 8B Instruct (free)",
     "provider": "openrouter",
     "family": "meta-llama",
-    "created_at": "2025-04-05 21:37:02 +0200",
-    "context_window": 1048576,
-    "max_output_tokens": 16384,
+    "created_at": "2025-05-14 15:42:34 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 4028,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image"
+        "text"
       ],
       "output": [
         "text"
@@ -17419,8 +17540,64 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output",
-      "predicted_outputs"
+      "structured_output"
+    ],
+    "pricing": {},
+    "metadata": {
+      "description": "A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Llama3",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": 4028,
+        "is_moderated": true
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "repetition_penalty",
+        "response_format",
+        "structured_outputs",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "meta-llama/llama-4-maverick",
+    "name": "Meta: Llama 4 Maverick",
+    "provider": "openrouter",
+    "family": "meta-llama",
+    "created_at": "2025-04-05 21:37:02 +0200",
+    "context_window": 1048576,
+    "max_output_tokens": 16384,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
@@ -17471,6 +17648,63 @@
       ]
     }
   },
+  {
+    "id": "meta-llama/llama-4-maverick:free",
+    "name": "Meta: Llama 4 Maverick (free)",
+    "provider": "openrouter",
+    "family": "meta-llama",
+    "created_at": "2025-04-05 21:37:02 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 4028,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {},
+    "metadata": {
+      "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "text",
+          "image"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Llama4",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": 4028,
+        "is_moderated": true
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "repetition_penalty",
+        "response_format",
+        "structured_outputs",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
   {
     "id": "meta-llama/llama-4-scout",
     "name": "Meta: Llama 4 Scout",
@@ -17544,6 +17778,63 @@
       ]
     }
   },
+  {
+    "id": "meta-llama/llama-4-scout:free",
+    "name": "Meta: Llama 4 Scout (free)",
+    "provider": "openrouter",
+    "family": "meta-llama",
+    "created_at": "2025-04-05 21:31:59 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 4028,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {},
+    "metadata": {
+      "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "text",
+          "image"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Llama4",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": 4028,
+        "is_moderated": true
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "repetition_penalty",
+        "response_format",
+        "structured_outputs",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
   {
     "id": "meta-llama/llama-guard-2-8b",
     "name": "Meta: LlamaGuard 2 8B",
@@ -19667,7 +19958,7 @@
     "provider": "openrouter",
     "family": "mistralai",
     "created_at": "2025-08-13 16:33:59 +0200",
-    "context_window": 262144,
+    "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -19707,7 +19998,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 262144,
+        "context_length": 131072,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -20140,8 +20431,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.017992691999999998,
-          "output_per_million": 0.07200576
+          "input_per_million": 0.01999188,
+          "output_per_million": 0.0800064
         }
       }
     },
@@ -20258,7 +20549,7 @@
     "provider": "openrouter",
     "family": "mistralai",
     "created_at": "2025-06-20 20:10:16 +0200",
-    "context_window": 131072,
+    "context_window": 128000,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -20279,8 +20570,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01999188,
-          "output_per_million": 0.0800064
+          "input_per_million": 0.049999999999999996,
+          "output_per_million": 0.09999999999999999
         }
       }
     },
@@ -20299,7 +20590,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 128000,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -21228,8 +21519,8 @@
     "provider": "openrouter",
     "family": "neversleep",
     "created_at": "2024-09-15 02:00:00 +0200",
-    "context_window": 40000,
-    "max_output_tokens": 40000,
+    "context_window": 32768,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -21247,8 +21538,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.16999999999999998,
-          "output_per_million": 0.9900000000000001
+          "input_per_million": 0.09,
+          "output_per_million": 0.6
         }
       }
     },
@@ -21266,8 +21557,8 @@
         "instruct_type": "llama3"
       },
       "top_provider": {
-        "context_length": 40000,
-        "max_completion_tokens": 40000,
+        "context_length": 32768,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -21688,13 +21979,13 @@
     }
   },
   {
-    "id": "nousresearch/nous-hermes-2-mixtral-8x7b-dpo",
-    "name": "Nous: Hermes 2 Mixtral 8x7B DPO",
+    "id": "nousresearch/hermes-4-405b",
+    "name": "Nous: Hermes 4 405B",
     "provider": "openrouter",
     "family": "nousresearch",
-    "created_at": "2024-01-16 01:00:00 +0100",
-    "context_window": 32768,
-    "max_output_tokens": 2048,
+    "created_at": "2025-08-26 21:11:03 +0200",
+    "context_window": 131072,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -21706,18 +21997,19 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
       "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.6,
-          "output_per_million": 0.6
+          "input_per_million": 0.1999188,
+          "output_per_million": 0.800064
         }
       }
     },
     "metadata": {
-      "description": "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the [Mixtral 8x7B MoE LLM](/models/mistralai/mixtral-8x7b).\n\nThe model was trained on over 1,000,000 entries of primarily [GPT-4](/models/openai/gpt-4) generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.\n\n#moe",
+      "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with <think>...</think> traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -21726,37 +22018,44 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Mistral",
-        "instruct_type": "chatml"
+        "tokenizer": "Other",
+        "instruct_type": null
       },
       "top_provider": {
-        "context_length": 32768,
-        "max_completion_tokens": 2048,
+        "context_length": 131072,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
+        "include_reasoning",
         "logit_bias",
+        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
+        "reasoning",
         "repetition_penalty",
+        "seed",
         "stop",
         "temperature",
+        "tool_choice",
+        "tools",
         "top_k",
+        "top_logprobs",
         "top_p"
       ]
     }
   },
   {
-    "id": "nvidia/llama-3.1-nemotron-70b-instruct",
-    "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct",
+    "id": "nousresearch/hermes-4-70b",
+    "name": "Nous: Hermes 4 70B",
     "provider": "openrouter",
-    "family": "nvidia",
-    "created_at": "2024-10-15 02:00:00 +0200",
+    "family": "nousresearch",
+    "created_at": "2025-08-26 21:23:02 +0200",
     "context_window": 131072,
-    "max_output_tokens": 131072,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -21769,19 +22068,18 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output",
       "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.12,
-          "output_per_million": 0.3
+          "input_per_million": 0.09329544,
+          "output_per_million": 0.3733632
         }
       }
     },
     "metadata": {
-      "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).",
+      "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit <think>...</think> reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -21791,23 +22089,24 @@
           "text"
         ],
         "tokenizer": "Llama3",
-        "instruct_type": "llama3"
+        "instruct_type": null
       },
       "top_provider": {
         "context_length": 131072,
-        "max_completion_tokens": 131072,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
+        "include_reasoning",
         "logit_bias",
         "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
+        "reasoning",
         "repetition_penalty",
-        "response_format",
         "seed",
         "stop",
         "temperature",
@@ -21820,13 +22119,13 @@
     }
   },
   {
-    "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1",
-    "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1",
+    "id": "nousresearch/nous-hermes-2-mixtral-8x7b-dpo",
+    "name": "Nous: Hermes 2 Mixtral 8x7B DPO",
     "provider": "openrouter",
-    "family": "nvidia",
-    "created_at": "2025-04-08 14:24:19 +0200",
-    "context_window": 131072,
-    "max_output_tokens": null,
+    "family": "nousresearch",
+    "created_at": "2024-01-16 01:00:00 +0100",
+    "context_window": 32768,
+    "max_output_tokens": 2048,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -21844,12 +22143,12 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 0.6,
-          "output_per_million": 1.7999999999999998
+          "output_per_million": 0.6
         }
       }
     },
     "metadata": {
-      "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
+      "description": "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the [Mixtral 8x7B MoE LLM](/models/mistralai/mixtral-8x7b).\n\nThe model was trained on over 1,000,000 entries of primarily [GPT-4](/models/openai/gpt-4) generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.\n\n#moe",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -21858,11 +22157,141 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Llama3",
-        "instruct_type": null
+        "tokenizer": "Mistral",
+        "instruct_type": "chatml"
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 32768,
+        "max_completion_tokens": 2048,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "stop",
+        "temperature",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "nvidia/llama-3.1-nemotron-70b-instruct",
+    "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct",
+    "provider": "openrouter",
+    "family": "nvidia",
+    "created_at": "2024-10-15 02:00:00 +0200",
+    "context_window": 131072,
+    "max_output_tokens": 16384,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.12,
+          "output_per_million": 0.3
+        }
+      }
+    },
+    "metadata": {
+      "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Llama3",
+        "instruct_type": "llama3"
+      },
+      "top_provider": {
+        "context_length": 131072,
+        "max_completion_tokens": 16384,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "response_format",
+        "seed",
+        "stop",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1",
+    "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1",
+    "provider": "openrouter",
+    "family": "nvidia",
+    "created_at": "2025-04-08 14:24:19 +0200",
+    "context_window": 131072,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.6,
+          "output_per_million": 1.7999999999999998
+        }
+      }
+    },
+    "metadata": {
+      "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Llama3",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 131072,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -23246,6 +23675,75 @@
       ]
     }
   },
+  {
+    "id": "openai/gpt-4o-audio-preview",
+    "name": "OpenAI: GPT-4o Audio",
+    "provider": "openrouter",
+    "family": "openai",
+    "created_at": "2025-08-15 06:44:21 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 16384,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 2.5,
+          "output_per_million": 10.0
+        }
+      }
+    },
+    "metadata": {
+      "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs are currently not supported. Audio tokens are priced at $40 per million input audio tokens.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "audio",
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "GPT",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": 16384,
+        "is_moderated": true
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "logprobs",
+        "max_tokens",
+        "presence_penalty",
+        "response_format",
+        "seed",
+        "stop",
+        "structured_outputs",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_logprobs",
+        "top_p"
+      ]
+    }
+  },
   {
     "id": "openai/gpt-4o-mini",
     "name": "OpenAI: GPT-4o-mini",
@@ -23610,7 +24108,7 @@
       }
     },
     "metadata": {
-      "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.\n\nNote that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations",
+      "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
@@ -23843,8 +24341,8 @@
     "provider": "openrouter",
     "family": "openai",
     "created_at": "2025-08-05 19:17:11 +0200",
-    "context_window": 131072,
-    "max_output_tokens": null,
+    "context_window": 131000,
+    "max_output_tokens": 131000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -23863,8 +24361,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.07256312,
-          "output_per_million": 0.2903936
+          "input_per_million": 0.072,
+          "output_per_million": 0.28
         }
       }
     },
@@ -23882,8 +24380,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
-        "max_completion_tokens": null,
+        "context_length": 131000,
+        "max_completion_tokens": 131000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -23916,8 +24414,8 @@
     "provider": "openrouter",
     "family": "openai",
     "created_at": "2025-08-05 19:17:09 +0200",
-    "context_window": 131072,
-    "max_output_tokens": null,
+    "context_window": 131000,
+    "max_output_tokens": 131000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -23937,7 +24435,7 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 0.04,
-          "output_per_million": 0.16
+          "output_per_million": 0.15
         }
       }
     },
@@ -23955,8 +24453,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
-        "max_completion_tokens": null,
+        "context_length": 131000,
+        "max_completion_tokens": 131000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -24299,7 +24797,7 @@
       }
     },
     "metadata": {
-      "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. Note that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations",
+      "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. ",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
@@ -25261,8 +25759,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0666396,
-          "output_per_million": 0.26668800000000004
+          "input_per_million": 0.051830799999999996,
+          "output_per_million": 0.207424
         }
       }
     },
@@ -26680,8 +27178,8 @@
     "provider": "openrouter",
     "family": "qwen",
     "created_at": "2025-07-29 18:36:05 +0200",
-    "context_window": 131072,
-    "max_output_tokens": 32768,
+    "context_window": 262144,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -26693,13 +27191,15 @@
     },
     "capabilities": [
       "streaming",
-      "structured_output"
+      "function_calling",
+      "structured_output",
+      "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.19999999999999998,
-          "output_per_million": 0.7999999999999999
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.3
         }
       }
     },
@@ -26717,17 +27217,25 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
-        "max_completion_tokens": 32768,
+        "context_length": 262144,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "logprobs",
         "max_tokens",
         "presence_penalty",
         "response_format",
         "seed",
+        "stop",
         "temperature",
+        "tool_choice",
+        "tools",
+        "top_k",
+        "top_logprobs",
         "top_p"
       ]
     }
@@ -28376,8 +28884,8 @@
     "provider": "openrouter",
     "family": "thedrummer",
     "created_at": "2024-09-30 02:00:00 +0200",
-    "context_window": 8192,
-    "max_output_tokens": 8192,
+    "context_window": 32768,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -28396,8 +28904,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.19,
-          "output_per_million": 0.44999999999999996
+          "input_per_million": 0.16999999999999998,
+          "output_per_million": 0.43
         }
       }
     },
@@ -28415,8 +28923,8 @@
         "instruct_type": "chatml"
       },
       "top_provider": {
-        "context_length": 8192,
-        "max_completion_tokens": 8192,
+        "context_length": 32768,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -28602,8 +29110,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.24,
-          "output_per_million": 0.24
+          "input_per_million": 0.55,
+          "output_per_million": 1.66
         }
       }
     },
@@ -28799,8 +29307,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.17992692,
-          "output_per_million": 0.7200576000000001
+          "input_per_million": 0.1999188,
+          "output_per_million": 0.800064
         }
       }
     },
@@ -28987,8 +29495,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.7,
-          "output_per_million": 1.0
+          "input_per_million": 0.44999999999999996,
+          "output_per_million": 0.65
         }
       }
     },
@@ -29493,18 +30001,85 @@
     }
   },
   {
-    "id": "x-ai/grok-vision-beta",
-    "name": "xAI: Grok Vision Beta",
+    "id": "x-ai/grok-code-fast-1",
+    "name": "xAI: Grok Code Fast 1",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2024-11-19 01:37:04 +0100",
-    "context_window": 8192,
-    "max_output_tokens": null,
+    "created_at": "2025-08-26 22:08:47 +0200",
+    "context_window": 256000,
+    "max_output_tokens": 10000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image"
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.19999999999999998,
+          "output_per_million": 1.5,
+          "cached_input_per_million": 0.02
+        }
+      }
+    },
+    "metadata": {
+      "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Grok",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 256000,
+        "max_completion_tokens": 10000,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "include_reasoning",
+        "logprobs",
+        "max_tokens",
+        "reasoning",
+        "response_format",
+        "seed",
+        "stop",
+        "structured_outputs",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_logprobs",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "x-ai/grok-vision-beta",
+    "name": "xAI: Grok Vision Beta",
+    "provider": "openrouter",
+    "family": "x-ai",
+    "created_at": "2024-11-19 01:37:04 +0100",
+    "context_window": 8192,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image"
       ],
       "output": [
         "text"
@@ -29679,6 +30254,7 @@
         "temperature",
         "tool_choice",
         "tools",
+        "top_a",
         "top_k",
         "top_logprobs",
         "top_p"
@@ -29736,21 +30312,15 @@
       },
       "per_request_limits": null,
       "supported_parameters": [
-        "frequency_penalty",
         "include_reasoning",
         "max_tokens",
-        "min_p",
-        "presence_penalty",
         "reasoning",
-        "repetition_penalty",
         "response_format",
         "seed",
-        "stop",
         "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
-        "top_k",
         "top_p"
       ]
     }
@@ -29839,14 +30409,13 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output",
       "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
           "input_per_million": 0.5,
-          "output_per_million": 1.7
+          "output_per_million": 1.7999999999999998
         }
       }
     },
@@ -29879,7 +30448,6 @@
         "presence_penalty",
         "reasoning",
         "repetition_penalty",
-        "response_format",
         "seed",
         "stop",
         "temperature",
@@ -29895,7 +30463,7 @@
     "name": "Sonar",
     "provider": "perplexity",
     "family": "sonar",
-    "created_at": "2025-08-15 16:34:12 +0200",
+    "created_at": "2025-08-27 18:49:59 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -29927,7 +30495,7 @@
     "name": "Sonar Deep Research",
     "provider": "perplexity",
     "family": "sonar_deep_research",
-    "created_at": "2025-08-15 16:34:12 +0200",
+    "created_at": "2025-08-27 18:49:59 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -29962,7 +30530,7 @@
     "name": "Sonar Pro",
     "provider": "perplexity",
     "family": "sonar_pro",
-    "created_at": "2025-08-15 16:34:12 +0200",
+    "created_at": "2025-08-27 18:49:59 +0200",
     "context_window": 200000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
@@ -29994,7 +30562,7 @@
     "name": "Sonar Reasoning",
     "provider": "perplexity",
     "family": "sonar_reasoning",
-    "created_at": "2025-08-15 16:34:12 +0200",
+    "created_at": "2025-08-27 18:49:59 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -30026,7 +30594,7 @@
     "name": "Sonar Reasoning Pro",
     "provider": "perplexity",
     "family": "sonar_reasoning_pro",
-    "created_at": "2025-08-15 16:34:12 +0200",
+    "created_at": "2025-08-27 18:49:59 +0200",
     "context_window": 128000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
@@ -30052,5 +30620,1114 @@
       }
     },
     "metadata": {}
+  },
+  {
+    "id": "chat-bison",
+    "name": "chat-bison",
+    "provider": "vertexai",
+    "family": "palm",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "002",
+      "open_source_category": "PROPRIETARY",
+      "launch_stage": "GA",
+      "supported_actions": {
+        "openGenie": {
+          "references": {
+            "us-central1": {
+              "uri": "https://console.cloud.google.com/vertex-ai/generative/language/create/chat"
+            }
+          },
+          "title": "Open Prompt Design"
+        }
+      },
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/chat-bison@002"
+    }
+  },
+  {
+    "id": "code-bison",
+    "name": "code-bison",
+    "provider": "vertexai",
+    "family": "palm",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "002",
+      "open_source_category": null,
+      "launch_stage": "GA",
+      "supported_actions": null,
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/code-bison@002"
+    }
+  },
+  {
+    "id": "code-gecko",
+    "name": "code-gecko",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "002",
+      "open_source_category": null,
+      "launch_stage": "GA",
+      "supported_actions": null,
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/code-gecko@002"
+    }
+  },
+  {
+    "id": "codechat-bison",
+    "name": "codechat-bison",
+    "provider": "vertexai",
+    "family": "palm",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "002",
+      "open_source_category": null,
+      "launch_stage": "GA",
+      "supported_actions": null,
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/codechat-bison@002"
+    }
+  },
+  {
+    "id": "gemini-1.5-flash",
+    "name": "Gemini 1.5 Flash",
+    "provider": "vertexai",
+    "family": "gemini-1.5-flash",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.075,
+          "cached_input_per_million": 0.01875,
+          "output_per_million": 0.3
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-1.5-flash-002",
+    "name": "Gemini 1.5 Flash",
+    "provider": "vertexai",
+    "family": "gemini-1.5-flash",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.075,
+          "cached_input_per_million": 0.01875,
+          "output_per_million": 0.3
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-1.5-flash-8b",
+    "name": "Gemini 1.5 Flash-8B",
+    "provider": "vertexai",
+    "family": "gemini-1.5-flash-8b",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.075,
+          "cached_input_per_million": 0.01875,
+          "output_per_million": 0.3
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-1.5-pro",
+    "name": "Gemini 1.5 Pro",
+    "provider": "vertexai",
+    "family": "gemini-1.5-pro",
+    "created_at": null,
+    "context_window": 2097152,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 1.25,
+          "cached_input_per_million": 0.3125,
+          "output_per_million": 5.0
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-1.5-pro-002",
+    "name": "Gemini 1.5 Pro",
+    "provider": "vertexai",
+    "family": "gemini-1.5-pro",
+    "created_at": null,
+    "context_window": 2097152,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 1.25,
+          "cached_input_per_million": 0.3125,
+          "output_per_million": 5.0
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-2.0-flash",
+    "name": "Gemini 2.0 Flash",
+    "provider": "vertexai",
+    "family": "gemini-2.0-flash",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.1,
+          "cached_input_per_million": 0.025,
+          "output_per_million": 0.4
+        },
+        "batch": {
+          "input_per_million": 0.05,
+          "output_per_million": 0.2
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-2.0-flash-001",
+    "name": "Gemini 2.0 Flash",
+    "provider": "vertexai",
+    "family": "gemini-2.0-flash",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.1,
+          "cached_input_per_million": 0.025,
+          "output_per_million": 0.4
+        },
+        "batch": {
+          "input_per_million": 0.05,
+          "output_per_million": 0.2
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-2.0-flash-exp",
+    "name": "Gemini 2.0 Flash",
+    "provider": "vertexai",
+    "family": "gemini-2.0-flash",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.1,
+          "cached_input_per_million": 0.025,
+          "output_per_million": 0.4
+        },
+        "batch": {
+          "input_per_million": 0.05,
+          "output_per_million": 0.2
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-2.0-flash-lite-001",
+    "name": "Gemini 2.0 Flash-Lite",
+    "provider": "vertexai",
+    "family": "gemini-2.0-flash-lite",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.1,
+          "cached_input_per_million": 0.025,
+          "output_per_million": 0.4
+        },
+        "batch": {
+          "input_per_million": 0.05,
+          "output_per_million": 0.2
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-2.5-flash",
+    "name": "Gemini 2.5 Flash",
+    "provider": "vertexai",
+    "family": "gemini-2.5-flash",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 65536,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.3,
+          "cached_input_per_million": 0.075,
+          "output_per_million": 2.5
+        },
+        "batch": {
+          "input_per_million": 0.15,
+          "output_per_million": 1.25
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-2.5-flash-lite",
+    "name": "Gemini 2.5 Flash-Lite",
+    "provider": "vertexai",
+    "family": "gemini-2.5-flash-lite",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 65536,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.3,
+          "cached_input_per_million": 0.075,
+          "output_per_million": 2.5
+        },
+        "batch": {
+          "input_per_million": 0.15,
+          "output_per_million": 1.25
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-2.5-pro",
+    "name": "Gemini 2.5 Pro",
+    "provider": "vertexai",
+    "family": "gemini-2.5-pro",
+    "created_at": null,
+    "context_window": 1048576,
+    "max_output_tokens": 65536,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "audio",
+        "image",
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "batch",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 1.25,
+          "cached_input_per_million": 0.31,
+          "output_per_million": 10.0
+        },
+        "batch": {
+          "input_per_million": 0.625,
+          "output_per_million": 5.0
+        }
+      }
+    },
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-embedding-001",
+    "name": "gemini-embedding-001",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {},
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-exp-1121",
+    "name": "gemini-exp-1121",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {},
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-exp-1206",
+    "name": "gemini-exp-1206",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {},
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-pro",
+    "name": "gemini-pro",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {},
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "gemini-pro-vision",
+    "name": "gemini-pro-vision",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {},
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "image-segmentation-001",
+    "name": "image-segmentation-001",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "default",
+      "open_source_category": null,
+      "launch_stage": "PUBLIC_PREVIEW",
+      "supported_actions": {
+        "openNotebook": {
+          "references": {
+            "europe-west1": {
+              "uri": "https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/vision/getting-started/image_segmentation.ipynb"
+            }
+          },
+          "title": "Open Notebook"
+        },
+        "requestAccess": {
+          "references": {
+            "europe-west1": {
+              "uri": "https://docs.google.com/forms/d/e/1FAIpQLSdzIR1EeQGFcMsqd9nPip5e9ovDKSjfWRd58QVjo1zLpfdvEg/viewform?resourcekey=0-Pvqc66u-0Z1QmuzHq4wLKg"
+            }
+          }
+        },
+        "openNotebooks": {
+          "notebooks": [
+            {
+              "references": {
+                "europe-west1": {
+                  "uri": "https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/vision/getting-started/image_segmentation.ipynb"
+                }
+              },
+              "title": "Open Notebook"
+            }
+          ]
+        }
+      },
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/image-segmentation-001@default"
+    }
+  },
+  {
+    "id": "imagegeneration",
+    "name": "imagegeneration",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "006",
+      "open_source_category": "PROPRIETARY",
+      "launch_stage": "PUBLIC_PREVIEW",
+      "supported_actions": {
+        "openGenerationAiStudio": {
+          "references": {
+            "europe-west1": {
+              "uri": "https://cloud.google.com/console/vertex-ai/generative/vision"
+            }
+          },
+          "title": "Open Vertex AI Studio"
+        }
+      },
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagegeneration@006"
+    }
+  },
+  {
+    "id": "imagen-4.0-fast-generate-001",
+    "name": "imagen-4.0-fast-generate-001",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "default",
+      "open_source_category": null,
+      "launch_stage": "GA",
+      "supported_actions": {
+        "openGenerationAiStudio": {
+          "references": {
+            "europe-west1": {
+              "uri": "https://console.cloud.google.com/vertex-ai/studio/media/generate"
+            }
+          },
+          "title": "Open Vertex AI Studio"
+        }
+      },
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagen-4.0-fast-generate-001@default"
+    }
+  },
+  {
+    "id": "imagen-4.0-generate-001",
+    "name": "imagen-4.0-generate-001",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "default",
+      "open_source_category": null,
+      "launch_stage": "GA",
+      "supported_actions": {
+        "openGenerationAiStudio": {
+          "references": {
+            "europe-west1": {
+              "uri": "https://console.cloud.google.com/vertex-ai/studio/media/generate"
+            }
+          },
+          "title": "Open Vertex AI Studio"
+        }
+      },
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagen-4.0-generate-001@default"
+    }
+  },
+  {
+    "id": "imagen-4.0-ultra-generate-001",
+    "name": "imagen-4.0-ultra-generate-001",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "default",
+      "open_source_category": null,
+      "launch_stage": "GA",
+      "supported_actions": {
+        "openGenerationAiStudio": {
+          "references": {
+            "europe-west1": {
+              "uri": "https://console.cloud.google.com/vertex-ai/studio/media/generate"
+            }
+          },
+          "title": "Open Vertex AI Studio"
+        }
+      },
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagen-4.0-ultra-generate-001@default"
+    }
+  },
+  {
+    "id": "imagetext",
+    "name": "imagetext",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "001",
+      "open_source_category": "PROPRIETARY",
+      "launch_stage": "GA",
+      "supported_actions": {
+        "openGenerationAiStudio": {
+          "references": {
+            "us-central1": {
+              "uri": "https://cloud.google.com/console/vertex-ai/generative/vision"
+            }
+          },
+          "title": "Open Vertex AI Studio"
+        }
+      },
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/imagetext@001"
+    }
+  },
+  {
+    "id": "multimodalembedding",
+    "name": "multimodalembedding",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "001",
+      "open_source_category": "PROPRIETARY",
+      "launch_stage": "GA",
+      "supported_actions": null,
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/multimodalembedding@001"
+    }
+  },
+  {
+    "id": "text-bison",
+    "name": "text-bison",
+    "provider": "vertexai",
+    "family": "palm",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "002",
+      "open_source_category": "PROPRIETARY",
+      "launch_stage": "GA",
+      "supported_actions": {
+        "openGenie": {
+          "references": {
+            "us-central1": {
+              "uri": "https://console.cloud.google.com/vertex-ai/generative/language/create/text"
+            }
+          },
+          "title": "Open Prompt Design"
+        },
+        "openEvaluationPipeline": {
+          "references": {
+            "us-central1": {
+              "uri": "https://console.cloud.google.com/vertex-ai/pipelines/vertex-ai-templates/evaluation-llm-text-generation-pipeline"
+            }
+          },
+          "title": "Evaluate"
+        }
+      },
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/text-bison@002"
+    }
+  },
+  {
+    "id": "text-embedding-004",
+    "name": "text-embedding-004",
+    "provider": "vertexai",
+    "family": "text-embedding",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {},
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "text-embedding-005",
+    "name": "text-embedding-005",
+    "provider": "vertexai",
+    "family": "text-embedding",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {},
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "text-multilingual-embedding-002",
+    "name": "text-multilingual-embedding-002",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {},
+    "metadata": {
+      "source": "known_models"
+    }
+  },
+  {
+    "id": "text-unicorn",
+    "name": "text-unicorn",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "001",
+      "open_source_category": "PROPRIETARY",
+      "launch_stage": "GA",
+      "supported_actions": {
+        "openGenie": {
+          "references": {
+            "europe-west1": {
+              "uri": "https://console.cloud.google.com/vertex-ai/generative/language/create/text"
+            }
+          },
+          "title": "Open in Vertex AI Studio"
+        },
+        "openEvaluationPipeline": {
+          "references": {
+            "europe-west1": {
+              "uri": "https://console.cloud.google.com/vertex-ai/pipelines/vertex-ai-templates/evaluation-llm-text-generation-pipeline"
+            }
+          },
+          "title": "Evaluate"
+        }
+      },
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/text-unicorn@001"
+    }
+  },
+  {
+    "id": "textembedding-gecko",
+    "name": "textembedding-gecko",
+    "provider": "vertexai",
+    "family": "gemini",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version_id": "003",
+      "open_source_category": "PROPRIETARY",
+      "launch_stage": "GA",
+      "supported_actions": null,
+      "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/textembedding-gecko@003"
+    }
   }
 ]