RubyGems - dify_llm - Versions diffs - 1.7.1 → 1.8.1 - Mend

dify_llm 1.7.1 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/README.md +16 -3
data/lib/dify_llm.rb +3 -0
data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +14 -2
data/lib/generators/ruby_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +1 -1
data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_content.html.erb.tt +1 -0
data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_message.html.erb.tt +1 -1
data/lib/generators/ruby_llm/install/install_generator.rb +8 -2
data/lib/generators/ruby_llm/install/templates/add_references_to_chats_tool_calls_and_messages_migration.rb.tt +9 -0
data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +0 -1
data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +0 -3
data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +0 -1
data/lib/ruby_llm/attachment.rb +5 -0
data/lib/ruby_llm/configuration.rb +2 -0
data/lib/ruby_llm/mime_type.rb +4 -0
data/lib/ruby_llm/model/info.rb +4 -0
data/lib/ruby_llm/models.json +1360 -1245
data/lib/ruby_llm/models.rb +7 -3
data/lib/ruby_llm/moderation.rb +56 -0
data/lib/ruby_llm/provider.rb +6 -0
data/lib/ruby_llm/providers/gemini/capabilities.rb +5 -0
data/lib/ruby_llm/providers/openai/capabilities.rb +15 -7
data/lib/ruby_llm/providers/openai/moderation.rb +34 -0
data/lib/ruby_llm/providers/openai.rb +1 -0
data/lib/ruby_llm/railtie.rb +1 -1
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm.rb +5 -0
metadata +10 -4

data/lib/ruby_llm/models.json CHANGED Viewed

@@ -3,7 +3,7 @@
     "id": "claude-3-5-haiku-20241022",
     "name": "Claude Haiku 3.5",
     "provider": "anthropic",
-    "family": "claude-3-5-haiku-latest",
+    "family": "claude-3-5-haiku",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 8192,
@@ -111,7 +111,7 @@
     "id": "claude-3-7-sonnet-20250219",
     "name": "Claude Sonnet 3.7",
     "provider": "anthropic",
-    "family": "claude-3-7-sonnet-latest",
+    "family": "claude-3-7-sonnet",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 64000,
@@ -245,7 +245,7 @@
     "id": "claude-opus-4-20250514",
     "name": "Claude Opus 4",
     "provider": "anthropic",
-    "family": "claude-opus-4-0",
+    "family": "claude-opus-4",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 32000,
@@ -277,7 +277,7 @@
     "id": "claude-sonnet-4-20250514",
     "name": "Claude Sonnet 4",
     "provider": "anthropic",
-    "family": "claude-sonnet-4-0",
+    "family": "claude-sonnet-4",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 64000,
@@ -1249,55 +1249,6 @@
       ]
     }
   },
-  {
-    "id": "anthropic.claude-instant-v1",
-    "name": "Claude Instant",
-    "provider": "bedrock",
-    "family": "claude_instant",
-    "created_at": null,
-    "context_window": 200000,
-    "max_output_tokens": 4096,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image",
-        "pdf"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "function_calling"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.8,
-          "output_per_million": 2.4
-        },
-        "batch": {
-          "input_per_million": 0.4,
-          "output_per_million": 1.2
-        }
-      }
-    },
-    "metadata": {
-      "provider_name": "Anthropic",
-      "inference_types": [
-        "ON_DEMAND"
-      ],
-      "streaming_supported": true,
-      "input_modalities": [
-        "TEXT"
-      ],
-      "output_modalities": [
-        "TEXT"
-      ]
-    }
-  },
   {
     "id": "anthropic.claude-instant-v1:2:100k",
     "name": "Claude Instant",
@@ -1347,55 +1298,6 @@
       ]
     }
   },
-  {
-    "id": "anthropic.claude-v2",
-    "name": "Claude",
-    "provider": "bedrock",
-    "family": "claude2",
-    "created_at": null,
-    "context_window": 200000,
-    "max_output_tokens": 4096,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image",
-        "pdf"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "function_calling"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 8.0,
-          "output_per_million": 24.0
-        },
-        "batch": {
-          "input_per_million": 4.0,
-          "output_per_million": 12.0
-        }
-      }
-    },
-    "metadata": {
-      "provider_name": "Anthropic",
-      "inference_types": [
-        "ON_DEMAND"
-      ],
-      "streaming_supported": true,
-      "input_modalities": [
-        "TEXT"
-      ],
-      "output_modalities": [
-        "TEXT"
-      ]
-    }
-  },
   {
     "id": "anthropic.claude-v2:0:100k",
     "name": "Claude",
@@ -1494,55 +1396,6 @@
       ]
     }
   },
-  {
-    "id": "anthropic.claude-v2:1",
-    "name": "Claude",
-    "provider": "bedrock",
-    "family": "claude2",
-    "created_at": null,
-    "context_window": 200000,
-    "max_output_tokens": 4096,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image",
-        "pdf"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "function_calling"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 8.0,
-          "output_per_million": 24.0
-        },
-        "batch": {
-          "input_per_million": 4.0,
-          "output_per_million": 12.0
-        }
-      }
-    },
-    "metadata": {
-      "provider_name": "Anthropic",
-      "inference_types": [
-        "ON_DEMAND"
-      ],
-      "streaming_supported": true,
-      "input_modalities": [
-        "TEXT"
-      ],
-      "output_modalities": [
-        "TEXT"
-      ]
-    }
-  },
   {
     "id": "anthropic.claude-v2:1:18k",
     "name": "Claude",
@@ -2704,7 +2557,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -2750,7 +2604,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -2869,7 +2724,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -2918,7 +2774,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -2967,7 +2824,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -3016,7 +2874,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -3064,7 +2923,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -3325,7 +3185,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -3543,7 +3404,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -3591,7 +3453,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -3639,7 +3502,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -3727,7 +3591,8 @@
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "video"
       ],
       "output": [
         "text",
@@ -3769,7 +3634,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text",
@@ -3823,7 +3689,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text",
@@ -3877,7 +3744,8 @@
       "input": [
         "text",
         "image",
-        "pdf"
+        "pdf",
+        "video"
       ],
       "output": [
         "text"
@@ -4781,6 +4649,36 @@
       "owned_by": "mistralai"
     }
   },
+  {
+    "id": "magistral-medium-2509",
+    "name": "Magistral Medium 2509",
+    "provider": "mistral",
+    "family": "mistral",
+    "created_at": null,
+    "context_window": 32768,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "reasoning",
+      "batch"
+    ],
+    "pricing": {},
+    "metadata": {
+      "object": "model",
+      "owned_by": "mistralai"
+    }
+  },
   {
     "id": "magistral-medium-latest",
     "name": "Magistral Medium Latest",
@@ -4871,6 +4769,36 @@
       "owned_by": "mistralai"
     }
   },
+  {
+    "id": "magistral-small-2509",
+    "name": "Magistral Small 2509",
+    "provider": "mistral",
+    "family": "mistral",
+    "created_at": null,
+    "context_window": 32768,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output",
+      "reasoning",
+      "batch"
+    ],
+    "pricing": {},
+    "metadata": {
+      "object": "model",
+      "owned_by": "mistralai"
+    }
+  },
   {
     "id": "magistral-small-latest",
     "name": "Magistral Small Latest",
@@ -5045,6 +4973,30 @@
       "owned_by": "mistralai"
     }
   },
+  {
+    "id": "mistral-embed-2312",
+    "name": "Mistral Embed",
+    "provider": "mistral",
+    "family": "mistral-embed",
+    "created_at": null,
+    "context_window": 32768,
+    "max_output_tokens": 8192,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "embeddings"
+      ]
+    },
+    "capabilities": [],
+    "pricing": {},
+    "metadata": {
+      "object": "model",
+      "owned_by": "mistralai"
+    }
+  },
   {
     "id": "mistral-large-2407",
     "name": "Mistral Large",
@@ -8018,14 +7970,16 @@
     "id": "gpt-5",
     "name": "GPT-5",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:29:37 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8033,13 +7987,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8052,14 +8009,16 @@
     "id": "gpt-5-2025-08-07",
     "name": "GPT-5 20250807",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-01 21:09:20 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8067,13 +8026,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8086,14 +8048,16 @@
     "id": "gpt-5-chat-latest",
     "name": "GPT-5 Chat Latest",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-01 20:35:06 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8101,13 +8065,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8120,14 +8087,16 @@
     "id": "gpt-5-mini",
     "name": "GPT-5 Mini",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:32:08 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8135,13 +8104,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8154,14 +8126,16 @@
     "id": "gpt-5-mini-2025-08-07",
     "name": "GPT-5 Mini 20250807",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:31:07 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8169,13 +8143,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8188,14 +8165,16 @@
     "id": "gpt-5-nano",
     "name": "GPT-5 Nano",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:39:44 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8203,13 +8182,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -8222,14 +8204,16 @@
     "id": "gpt-5-nano-2025-08-07",
     "name": "GPT-5 Nano 20250807",
     "provider": "openai",
-    "family": "other",
+    "family": "gpt5",
     "created_at": "2025-08-05 22:38:23 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 16384,
+    "context_window": 128000,
+    "max_output_tokens": 400000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "image",
+        "pdf"
       ],
       "output": [
         "text"
@@ -8237,13 +8221,16 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "reasoning"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.5,
-          "output_per_million": 1.5
+          "input_per_million": 1.25,
+          "output_per_million": 10.0,
+          "cached_input_per_million": 0.125
         }
       }
     },
@@ -9815,8 +9802,8 @@
     "provider": "openrouter",
     "family": "alfredpros",
     "created_at": "2025-04-14 16:44:34 +0200",
-    "context_window": 8192,
-    "max_output_tokens": 8192,
+    "context_window": 4096,
+    "max_output_tokens": 4096,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -9827,14 +9814,13 @@
       ]
     },
     "capabilities": [
-      "streaming",
-      "predicted_outputs"
+      "streaming"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.7,
-          "output_per_million": 1.1
+          "input_per_million": 0.7999999999999999,
+          "output_per_million": 1.2
         }
       }
     },
@@ -9852,14 +9838,13 @@
         "instruct_type": "alpaca"
       },
       "top_provider": {
-        "context_length": 8192,
-        "max_completion_tokens": 8192,
+        "context_length": 4096,
+        "max_completion_tokens": 4096,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
-        "logit_bias",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -9873,13 +9858,13 @@
     }
   },
   {
-    "id": "alpindale/goliath-120b",
-    "name": "Goliath 120B",
+    "id": "alibaba/tongyi-deepresearch-30b-a3b",
+    "name": "Tongyi DeepResearch 30B A3B",
     "provider": "openrouter",
-    "family": "alpindale",
-    "created_at": "2023-11-10 01:00:00 +0100",
-    "context_window": 6144,
-    "max_output_tokens": 512,
+    "family": "alibaba",
+    "created_at": "2025-09-18 17:53:24 +0200",
+    "context_window": 131072,
+    "max_output_tokens": 131072,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -9891,19 +9876,19 @@
     },
     "capabilities": [
       "streaming",
-      "structured_output",
-      "predicted_outputs"
+      "function_calling",
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 4.0,
-          "output_per_million": 5.5
+          "input_per_million": 0.09,
+          "output_per_million": 0.44999999999999996
         }
       }
     },
     "metadata": {
-      "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
+      "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -9912,41 +9897,36 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Llama2",
-        "instruct_type": "airoboros"
+        "tokenizer": "Other",
+        "instruct_type": null
       },
       "top_provider": {
-        "context_length": 6144,
-        "max_completion_tokens": 512,
+        "context_length": 131072,
+        "max_completion_tokens": 131072,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
+        "include_reasoning",
         "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
+        "reasoning",
         "response_format",
-        "seed",
-        "stop",
         "structured_outputs",
         "temperature",
-        "top_a",
-        "top_k",
+        "tool_choice",
+        "tools",
         "top_p"
       ]
     }
   },
   {
-    "id": "amazon/nova-lite-v1",
-    "name": "Amazon: Nova Lite 1.0",
+    "id": "allenai/molmo-7b-d",
+    "name": "AllenAI: Molmo 7B D",
     "provider": "openrouter",
-    "family": "amazon",
-    "created_at": "2024-12-05 23:22:43 +0100",
-    "context_window": 300000,
-    "max_output_tokens": 5120,
+    "family": "allenai",
+    "created_at": "2025-03-26 22:07:27 +0100",
+    "context_window": 4096,
+    "max_output_tokens": 4096,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -9959,18 +9939,18 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.06,
-          "output_per_million": 0.24
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.19999999999999998
         }
       }
     },
     "metadata": {
-      "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite can handle real-time customer interactions, document analysis, and visual question-answering tasks with high accuracy.\n\nWith an input context of 300K tokens, it can analyze multiple images or up to 30 minutes of video in a single input.",
+      "description": "Molmo is a family of open vision-language models developed by the Allen Institute for AI. Molmo models are trained on PixMo, a dataset of 1 million, highly-curated image-text pairs. It has state-of-the-art performance among multimodal models with a similar size while being fully open-source. You can find all models in the Molmo family [here](https://huggingface.co/collections/allenai/molmo-66f379e6fe3b8ef090a8ca19). Learn more about the Molmo family [in the announcement blog post](https://molmo.allenai.org/blog) or the [paper](https://huggingface.co/papers/2409.17146).\n\nMolmo 7B-D is based on [Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) and uses [OpenAI CLIP](https://huggingface.co/openai/clip-vit-large-patch14-336) as vision backbone. It performs comfortably between GPT-4V and GPT-4o on both academic benchmarks and human evaluation.\n\nThis checkpoint is a preview of the Molmo release. All artifacts used in creating Molmo (PixMo dataset, training code, evaluations, intermediate checkpoints) will be made available at a later date, furthering our commitment to open-source AI development and reproducibility.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
@@ -9980,33 +9960,38 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Nova",
+        "tokenizer": "Other",
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 300000,
-        "max_completion_tokens": 5120,
-        "is_moderated": true
+        "context_length": 4096,
+        "max_completion_tokens": 4096,
+        "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
         "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "seed",
         "stop",
         "temperature",
-        "tools",
         "top_k",
         "top_p"
       ]
     }
   },
   {
-    "id": "amazon/nova-micro-v1",
-    "name": "Amazon: Nova Micro 1.0",
+    "id": "allenai/olmo-2-0325-32b-instruct",
+    "name": "AllenAI: Olmo 2 32B Instruct",
     "provider": "openrouter",
-    "family": "amazon",
-    "created_at": "2024-12-05 23:20:37 +0100",
-    "context_window": 128000,
-    "max_output_tokens": 5120,
+    "family": "allenai",
+    "created_at": "2025-03-14 22:42:36 +0100",
+    "context_window": 4096,
+    "max_output_tokens": 4096,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -10018,18 +10003,18 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.035,
-          "output_per_million": 0.14
+          "input_per_million": 1.0,
+          "output_per_million": 1.5
         }
       }
     },
     "metadata": {
-      "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length of 128K tokens and optimized for speed and cost, Amazon Nova Micro excels at tasks such as text summarization, translation, content classification, interactive chat, and brainstorming. It has  simple mathematical reasoning and coding abilities.",
+      "description": "OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model. It excels in complex reasoning and instruction-following tasks across diverse benchmarks such as GSM8K, MATH, IFEval, and general NLP evaluation. Developed by AI2, OLMo-2 32B is part of an open, research-oriented initiative, trained primarily on English-language datasets to advance the understanding and development of open-source language models.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -10038,38 +10023,42 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Nova",
+        "tokenizer": "Other",
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 128000,
-        "max_completion_tokens": 5120,
-        "is_moderated": true
+        "context_length": 4096,
+        "max_completion_tokens": 4096,
+        "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
         "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "seed",
         "stop",
         "temperature",
-        "tools",
         "top_k",
         "top_p"
       ]
     }
   },
   {
-    "id": "amazon/nova-pro-v1",
-    "name": "Amazon: Nova Pro 1.0",
+    "id": "alpindale/goliath-120b",
+    "name": "Goliath 120B",
     "provider": "openrouter",
-    "family": "amazon",
-    "created_at": "2024-12-05 23:05:03 +0100",
-    "context_window": 300000,
-    "max_output_tokens": 5120,
+    "family": "alpindale",
+    "created_at": "2023-11-10 01:00:00 +0100",
+    "context_window": 6144,
+    "max_output_tokens": 512,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image"
+        "text"
       ],
       "output": [
         "text"
@@ -10077,52 +10066,238 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "structured_output",
+      "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.7999999999999999,
-          "output_per_million": 3.1999999999999997
+          "input_per_million": 4.0,
+          "output_per_million": 5.5
         }
       }
     },
     "metadata": {
-      "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December 2024, it achieves state-of-the-art performance on key benchmarks including visual question answering (TextVQA) and video understanding (VATEX).\n\nAmazon Nova Pro demonstrates strong capabilities in processing both visual and textual information and at analyzing financial documents.\n\n**NOTE**: Video input is not supported at this time.",
+      "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
       "architecture": {
-        "modality": "text+image->text",
+        "modality": "text->text",
         "input_modalities": [
-          "text",
-          "image"
+          "text"
         ],
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Nova",
-        "instruct_type": null
+        "tokenizer": "Llama2",
+        "instruct_type": "airoboros"
       },
       "top_provider": {
-        "context_length": 300000,
-        "max_completion_tokens": 5120,
-        "is_moderated": true
+        "context_length": 6144,
+        "max_completion_tokens": 512,
+        "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
         "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "response_format",
+        "seed",
         "stop",
+        "structured_outputs",
         "temperature",
-        "tools",
+        "top_a",
         "top_k",
         "top_p"
       ]
     }
   },
   {
-    "id": "anthracite-org/magnum-v2-72b",
-    "name": "Magnum v2 72B",
+    "id": "amazon/nova-lite-v1",
+    "name": "Amazon: Nova Lite 1.0",
     "provider": "openrouter",
-    "family": "anthracite-org",
-    "created_at": "2024-09-30 02:00:00 +0200",
+    "family": "amazon",
+    "created_at": "2024-12-05 23:22:43 +0100",
+    "context_window": 300000,
+    "max_output_tokens": 5120,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.06,
+          "output_per_million": 0.24
+        }
+      }
+    },
+    "metadata": {
+      "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite can handle real-time customer interactions, document analysis, and visual question-answering tasks with high accuracy.\n\nWith an input context of 300K tokens, it can analyze multiple images or up to 30 minutes of video in a single input.",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "text",
+          "image"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Nova",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 300000,
+        "max_completion_tokens": 5120,
+        "is_moderated": true
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "stop",
+        "temperature",
+        "tools",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "amazon/nova-micro-v1",
+    "name": "Amazon: Nova Micro 1.0",
+    "provider": "openrouter",
+    "family": "amazon",
+    "created_at": "2024-12-05 23:20:37 +0100",
+    "context_window": 128000,
+    "max_output_tokens": 5120,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.035,
+          "output_per_million": 0.14
+        }
+      }
+    },
+    "metadata": {
+      "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length of 128K tokens and optimized for speed and cost, Amazon Nova Micro excels at tasks such as text summarization, translation, content classification, interactive chat, and brainstorming. It has  simple mathematical reasoning and coding abilities.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Nova",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": 5120,
+        "is_moderated": true
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "stop",
+        "temperature",
+        "tools",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "amazon/nova-pro-v1",
+    "name": "Amazon: Nova Pro 1.0",
+    "provider": "openrouter",
+    "family": "amazon",
+    "created_at": "2024-12-05 23:05:03 +0100",
+    "context_window": 300000,
+    "max_output_tokens": 5120,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.7999999999999999,
+          "output_per_million": 3.1999999999999997
+        }
+      }
+    },
+    "metadata": {
+      "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December 2024, it achieves state-of-the-art performance on key benchmarks including visual question answering (TextVQA) and video understanding (VATEX).\n\nAmazon Nova Pro demonstrates strong capabilities in processing both visual and textual information and at analyzing financial documents.\n\n**NOTE**: Video input is not supported at this time.",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "text",
+          "image"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Nova",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 300000,
+        "max_completion_tokens": 5120,
+        "is_moderated": true
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "stop",
+        "temperature",
+        "tools",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "anthracite-org/magnum-v2-72b",
+    "name": "Magnum v2 72B",
+    "provider": "openrouter",
+    "family": "anthracite-org",
+    "created_at": "2024-09-30 02:00:00 +0200",
     "context_window": 32768,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -10802,7 +10977,7 @@
       "top_provider": {
         "context_length": 200000,
         "max_completion_tokens": 32000,
-        "is_moderated": false
+        "is_moderated": true
       },
       "per_request_limits": null,
       "supported_parameters": [
@@ -10868,7 +11043,7 @@
       "top_provider": {
         "context_length": 200000,
         "max_completion_tokens": 32000,
-        "is_moderated": false
+        "is_moderated": true
       },
       "per_request_limits": null,
       "supported_parameters": [
@@ -10878,7 +11053,9 @@
         "stop",
         "temperature",
         "tool_choice",
-        "tools"
+        "tools",
+        "top_k",
+        "top_p"
       ]
     }
   },
@@ -10948,6 +11125,71 @@
       ]
     }
   },
+  {
+    "id": "arcee-ai/afm-4.5b",
+    "name": "Arcee AI: AFM 4.5B",
+    "provider": "openrouter",
+    "family": "arcee-ai",
+    "created_at": "2025-09-16 18:34:44 +0200",
+    "context_window": 65536,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "structured_output",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.39999999999999997
+        }
+      }
+    },
+    "metadata": {
+      "description": "AFM-4.5B is a 4.5 billion parameter instruction-tuned language model developed by Arcee AI. The model was pretrained on approximately 8 trillion tokens, including 6.5 trillion tokens of general data and 1.5 trillion tokens with an emphasis on mathematical reasoning and code generation. ",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Other",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 65536,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "response_format",
+        "stop",
+        "structured_outputs",
+        "temperature",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
   {
     "id": "arcee-ai/coder-large",
     "name": "Arcee AI: Coder Large",
@@ -11225,8 +11467,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01,
-          "output_per_million": 0.0400032
+          "input_per_million": 0.02,
+          "output_per_million": 0.07
         }
       }
     },
@@ -11450,6 +11692,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "top_k",
         "top_p"
@@ -11614,8 +11857,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.10366159999999999,
-          "output_per_million": 0.414848
+          "input_per_million": 0.16,
+          "output_per_million": 0.65
         }
       }
     },
@@ -11777,71 +12020,8 @@
     }
   },
   {
-    "id": "cognitivecomputations/dolphin-mixtral-8x22b",
-    "name": "Dolphin 2.9.2 Mixtral 8x22B 🐬",
-    "provider": "openrouter",
-    "family": "cognitivecomputations",
-    "created_at": "2024-06-08 02:00:00 +0200",
-    "context_window": 16000,
-    "max_output_tokens": 8192,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.8999999999999999,
-          "output_per_million": 0.8999999999999999
-        }
-      }
-    },
-    "metadata": {
-      "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/models/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.\n\nThis model is a successor to [Dolphin Mixtral 8x7B](/models/cognitivecomputations/dolphin-mixtral-8x7b).\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Mistral",
-        "instruct_type": "chatml"
-      },
-      "top_provider": {
-        "context_length": 16000,
-        "max_completion_tokens": 8192,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
-  {
-    "id": "cognitivecomputations/dolphin3.0-mistral-24b",
-    "name": "Dolphin3.0 Mistral 24B",
+    "id": "cognitivecomputations/dolphin3.0-mistral-24b",
+    "name": "Dolphin3.0 Mistral 24B",
     "provider": "openrouter",
     "family": "cognitivecomputations",
     "created_at": "2025-02-13 16:53:39 +0100",
@@ -11863,8 +12043,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.037022,
-          "output_per_million": 0.14816
+          "input_per_million": 0.03,
+          "output_per_million": 0.11
         }
       }
     },
@@ -11987,7 +12167,7 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 0.01,
-          "output_per_million": 0.0340768
+          "output_per_million": 0.03
         }
       }
     },
@@ -12818,8 +12998,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.1999188,
-          "output_per_million": 0.800064
+          "input_per_million": 0.24999987999999998,
+          "output_per_million": 0.999999888
         }
       }
     },
@@ -12889,8 +13069,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.1999188,
-          "output_per_million": 0.800064
+          "input_per_million": 0.24999987999999998,
+          "output_per_million": 0.999999888
         }
       }
     },
@@ -13021,8 +13201,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.19999999999999998,
-          "output_per_million": 0.7999999999999999
+          "input_per_million": 0.24999987999999998,
+          "output_per_million": 0.999999888
         }
       }
     },
@@ -13074,7 +13254,7 @@
     "provider": "openrouter",
     "family": "deepseek",
     "created_at": "2025-08-21 14:33:48 +0200",
-    "context_window": 64000,
+    "context_window": 163840,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -13105,9 +13285,9 @@
         "instruct_type": "deepseek-v3.1"
       },
       "top_provider": {
-        "context_length": 64000,
+        "context_length": 163840,
         "max_completion_tokens": null,
-        "is_moderated": true
+        "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
@@ -13291,8 +13471,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.1999188,
-          "output_per_million": 0.800064
+          "input_per_million": 0.39999999999999997,
+          "output_per_million": 1.75
         }
       }
     },
@@ -13362,8 +13542,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01703012,
-          "output_per_million": 0.0681536
+          "input_per_million": 0.01,
+          "output_per_million": 0.049999999999999996
         }
       }
     },
@@ -13551,8 +13731,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.025915399999999998,
-          "output_per_million": 0.103712
+          "input_per_million": 0.03,
+          "output_per_million": 0.13
         }
       }
     },
@@ -13728,8 +13908,8 @@
     "provider": "openrouter",
     "family": "deepseek",
     "created_at": "2025-01-30 00:39:00 +0100",
-    "context_window": 64000,
-    "max_output_tokens": 32000,
+    "context_window": 32768,
+    "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -13765,66 +13945,8 @@
         "instruct_type": "deepseek-r1"
       },
       "top_provider": {
-        "context_length": 64000,
-        "max_completion_tokens": 32000,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "include_reasoning",
-        "logit_bias",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "reasoning",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
-  {
-    "id": "deepseek/deepseek-r1-distill-qwen-14b:free",
-    "name": "DeepSeek: R1 Distill Qwen 14B (free)",
-    "provider": "openrouter",
-    "family": "deepseek",
-    "created_at": "2025-01-30 00:39:00 +0100",
-    "context_window": 64000,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {},
-    "metadata": {
-      "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 69.7\n- MATH-500 pass@1: 93.9\n- CodeForces Rating: 1481\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Qwen",
-        "instruct_type": "deepseek-r1"
-      },
-      "top_provider": {
-        "context_length": 64000,
-        "max_completion_tokens": null,
+        "context_length": 32768,
+        "max_completion_tokens": 16384,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -13832,7 +13954,6 @@
         "frequency_penalty",
         "include_reasoning",
         "logit_bias",
-        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -13842,7 +13963,6 @@
         "stop",
         "temperature",
         "top_k",
-        "top_logprobs",
         "top_p"
       ]
     }
@@ -13872,8 +13992,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.075,
-          "output_per_million": 0.15
+          "input_per_million": 0.27,
+          "output_per_million": 0.27
         }
       }
     },
@@ -13986,8 +14106,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.19999999999999998,
-          "output_per_million": 0.7999999999999999
+          "input_per_million": 0.24999987999999998,
+          "output_per_million": 0.999999888
         }
       }
     },
@@ -14384,7 +14504,7 @@
       }
     },
     "metadata": {
-      "description": "Gemini 2.5 Flash Image Preview is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
+      "description": "Gemini 2.5 Flash Image Preview, AKA Nano Banana is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.",
       "architecture": {
         "modality": "text+image->text+image",
         "input_modalities": [
@@ -15055,7 +15175,7 @@
       "text_tokens": {
         "standard": {
           "input_per_million": 0.01,
-          "output_per_million": 0.0100008
+          "output_per_million": 0.02
         }
       }
     },
@@ -15180,8 +15300,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0481286,
-          "output_per_million": 0.192608
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -15309,8 +15429,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0666396,
-          "output_per_million": 0.26668800000000004
+          "input_per_million": 0.07,
+          "output_per_million": 0.26
         }
       }
     },
@@ -15897,69 +16017,6 @@
       ]
     }
   },
-  {
-    "id": "infermatic/mn-inferor-12b",
-    "name": "Infermatic: Mistral Nemo Inferor 12B",
-    "provider": "openrouter",
-    "family": "infermatic",
-    "created_at": "2024-11-13 03:20:28 +0100",
-    "context_window": 8192,
-    "max_output_tokens": 8192,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.6,
-          "output_per_million": 1.0
-        }
-      }
-    },
-    "metadata": {
-      "description": "Inferor 12B is a merge of top roleplay models, expert on immersive narratives and storytelling.\n\nThis model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [anthracite-org/magnum-v4-12b](https://openrouter.ai/anthracite-org/magnum-v4-72b) as a base.\n",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Mistral",
-        "instruct_type": "mistral"
-      },
-      "top_provider": {
-        "context_length": 8192,
-        "max_completion_tokens": 8192,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
   {
     "id": "inflection/inflection-3-pi",
     "name": "Inflection: Inflection 3 Pi",
@@ -16263,13 +16320,13 @@
     }
   },
   {
-    "id": "meta-llama/llama-3-70b-instruct",
-    "name": "Meta: Llama 3 70B Instruct",
+    "id": "meituan/longcat-flash-chat",
+    "name": "Meituan: LongCat Flash Chat",
     "provider": "openrouter",
-    "family": "meta-llama",
-    "created_at": "2024-04-18 02:00:00 +0200",
-    "context_window": 8192,
-    "max_output_tokens": 16384,
+    "family": "meituan",
+    "created_at": "2025-09-09 16:20:58 +0200",
+    "context_window": 131072,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -16282,19 +16339,18 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output",
       "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.3,
-          "output_per_million": 0.39999999999999997
+          "input_per_million": 0.12,
+          "output_per_million": 0.6
         }
       }
     },
     "metadata": {
-      "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+      "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input. It introduces a shortcut-connected MoE design to reduce communication overhead and achieve high throughput while maintaining training stability through advanced scaling strategies such as hyperparameter transfer, deterministic computation, and multi-stage optimization.\n\nThis release, LongCat-Flash-Chat, is a non-thinking foundation model optimized for conversational and agentic tasks. It supports long context windows up to 128K tokens and shows competitive performance across reasoning, coding, instruction following, and domain benchmarks, with particular strengths in tool use and complex multi-step interactions.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -16303,12 +16359,12 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Llama3",
-        "instruct_type": "llama3"
+        "tokenizer": "Other",
+        "instruct_type": null
       },
       "top_provider": {
-        "context_length": 8192,
-        "max_completion_tokens": 16384,
+        "context_length": 131072,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -16320,7 +16376,6 @@
         "min_p",
         "presence_penalty",
         "repetition_penalty",
-        "response_format",
         "seed",
         "stop",
         "temperature",
@@ -16333,8 +16388,8 @@
     }
   },
   {
-    "id": "meta-llama/llama-3-8b-instruct",
-    "name": "Meta: Llama 3 8B Instruct",
+    "id": "meta-llama/llama-3-70b-instruct",
+    "name": "Meta: Llama 3 70B Instruct",
     "provider": "openrouter",
     "family": "meta-llama",
     "created_at": "2024-04-18 02:00:00 +0200",
@@ -16358,13 +16413,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.03,
-          "output_per_million": 0.06
+          "input_per_million": 0.3,
+          "output_per_million": 0.39999999999999997
         }
       }
     },
     "metadata": {
-      "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+      "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -16385,6 +16440,7 @@
       "supported_parameters": [
         "frequency_penalty",
         "logit_bias",
+        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -16396,18 +16452,19 @@
         "tool_choice",
         "tools",
         "top_k",
+        "top_logprobs",
         "top_p"
       ]
     }
   },
   {
-    "id": "meta-llama/llama-3.1-405b",
-    "name": "Meta: Llama 3.1 405B (base)",
+    "id": "meta-llama/llama-3-8b-instruct",
+    "name": "Meta: Llama 3 8B Instruct",
     "provider": "openrouter",
     "family": "meta-llama",
-    "created_at": "2024-08-02 02:00:00 +0200",
-    "context_window": 32768,
-    "max_output_tokens": null,
+    "created_at": "2024-04-18 02:00:00 +0200",
+    "context_window": 8192,
+    "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -16419,18 +16476,20 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 2.0,
-          "output_per_million": 2.0
+          "input_per_million": 0.03,
+          "output_per_million": 0.06
         }
       }
     },
     "metadata": {
-      "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+      "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -16440,7 +16499,73 @@
           "text"
         ],
         "tokenizer": "Llama3",
-        "instruct_type": "none"
+        "instruct_type": "llama3"
+      },
+      "top_provider": {
+        "context_length": 8192,
+        "max_completion_tokens": 16384,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
+        "response_format",
+        "seed",
+        "stop",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "meta-llama/llama-3.1-405b",
+    "name": "Meta: Llama 3.1 405B (base)",
+    "provider": "openrouter",
+    "family": "meta-llama",
+    "created_at": "2024-08-02 02:00:00 +0200",
+    "context_window": 32768,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 2.0,
+          "output_per_million": 2.0
+        }
+      }
+    },
+    "metadata": {
+      "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Llama3",
+        "instruct_type": "none"
       },
       "top_provider": {
         "context_length": 32768,
@@ -16667,7 +16792,7 @@
     "provider": "openrouter",
     "family": "meta-llama",
     "created_at": "2024-07-23 02:00:00 +0200",
-    "context_window": 131072,
+    "context_window": 16384,
     "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
@@ -16687,8 +16812,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.015,
-          "output_per_million": 0.02
+          "input_per_million": 0.02,
+          "output_per_million": 0.03
         }
       }
     },
@@ -16706,7 +16831,7 @@
         "instruct_type": "llama3"
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 16384,
         "max_completion_tokens": 16384,
         "is_moderated": false
       },
@@ -16874,7 +16999,7 @@
     "provider": "openrouter",
     "family": "meta-llama",
     "created_at": "2024-09-25 02:00:00 +0200",
-    "context_window": 131072,
+    "context_window": 16384,
     "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
@@ -16894,8 +17019,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.012,
-          "output_per_million": 0.024
+          "input_per_million": 0.02,
+          "output_per_million": 0.02
         }
       }
     },
@@ -16913,7 +17038,7 @@
         "instruct_type": "llama3"
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 16384,
         "max_completion_tokens": 16384,
         "is_moderated": false
       },
@@ -17062,7 +17187,7 @@
     "family": "meta-llama",
     "created_at": "2024-12-06 18:28:57 +0100",
     "context_window": 131072,
-    "max_output_tokens": 16384,
+    "max_output_tokens": 131072,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -17081,8 +17206,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.038000000000000006,
-          "output_per_million": 0.12
+          "input_per_million": 0.012,
+          "output_per_million": 0.036
         }
       }
     },
@@ -17101,7 +17226,7 @@
       },
       "top_provider": {
         "context_length": 131072,
-        "max_completion_tokens": 16384,
+        "max_completion_tokens": 131072,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -17719,8 +17844,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.1999188,
-          "output_per_million": 0.800064
+          "input_per_million": 0.24999987999999998,
+          "output_per_million": 0.999999888
         }
       }
     },
@@ -18012,8 +18137,7 @@
     },
     "capabilities": [
       "streaming",
-      "structured_output",
-      "predicted_outputs"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
@@ -18044,8 +18168,6 @@
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
-        "logit_bias",
-        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -18056,7 +18178,6 @@
         "structured_outputs",
         "temperature",
         "top_k",
-        "top_logprobs",
         "top_p"
       ]
     }
@@ -18081,6 +18202,7 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
       "structured_output"
     ],
     "pricing": {
@@ -18120,7 +18242,10 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
+        "tool_choice",
+        "tools",
         "top_k",
         "top_p"
       ]
@@ -18666,8 +18791,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01999188,
-          "output_per_million": 0.0800064
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -19687,7 +19812,7 @@
     "family": "mistralai",
     "created_at": "2024-07-19 02:00:00 +0200",
     "context_window": 131072,
-    "max_output_tokens": 128000,
+    "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -19706,8 +19831,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01,
-          "output_per_million": 0.0400032
+          "input_per_million": 0.02,
+          "output_per_million": 0.04
         }
       }
     },
@@ -19726,7 +19851,7 @@
       },
       "top_provider": {
         "context_length": 131072,
-        "max_completion_tokens": 128000,
+        "max_completion_tokens": 16384,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -19963,8 +20088,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01999188,
-          "output_per_million": 0.0800064
+          "input_per_million": 0.04,
+          "output_per_million": 0.15
         }
       }
     },
@@ -20093,8 +20218,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01999188,
-          "output_per_million": 0.0800064
+          "input_per_million": 0.04,
+          "output_per_million": 0.15
         }
       }
     },
@@ -20232,8 +20357,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.049999999999999996,
-          "output_per_million": 0.09999999999999999
+          "input_per_million": 0.075,
+          "output_per_million": 0.19999999999999998
         }
       }
     },
@@ -20502,8 +20627,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.08,
-          "output_per_million": 0.24
+          "input_per_million": 0.39999999999999997,
+          "output_per_million": 0.39999999999999997
         }
       }
     },
@@ -20736,6 +20861,7 @@
         "include_reasoning",
         "reasoning",
         "response_format",
+        "structured_outputs",
         "temperature",
         "top_k",
         "top_p"
@@ -20899,8 +21025,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.2962,
-          "output_per_million": 1.1852999999999998
+          "input_per_million": 0.38,
+          "output_per_million": 1.52
         }
       }
     },
@@ -21030,8 +21156,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.02498985,
-          "output_per_million": 0.100008
+          "input_per_million": 0.02,
+          "output_per_million": 0.07
         }
       }
     },
@@ -21521,8 +21647,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.09329544,
-          "output_per_million": 0.3733632
+          "input_per_million": 0.13,
+          "output_per_million": 0.51
         }
       }
     },
@@ -21639,7 +21765,7 @@
     "family": "nousresearch",
     "created_at": "2024-08-16 02:00:00 +0200",
     "context_window": 131072,
-    "max_output_tokens": 16384,
+    "max_output_tokens": 131072,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -21657,7 +21783,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.7,
+          "input_per_million": 0.7999999999999999,
           "output_per_million": 0.7999999999999999
         }
       }
@@ -21677,7 +21803,7 @@
       },
       "top_provider": {
         "context_length": 131072,
-        "max_completion_tokens": 16384,
+        "max_completion_tokens": 131072,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -21706,7 +21832,7 @@
     "family": "nousresearch",
     "created_at": "2024-08-18 02:00:00 +0200",
     "context_window": 131072,
-    "max_output_tokens": null,
+    "max_output_tokens": 131072,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -21725,8 +21851,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.09999999999999999,
-          "output_per_million": 0.28
+          "input_per_million": 0.12,
+          "output_per_million": 0.3
         }
       }
     },
@@ -21745,7 +21871,7 @@
       },
       "top_provider": {
         "context_length": 131072,
-        "max_completion_tokens": null,
+        "max_completion_tokens": 131072,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -21795,8 +21921,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.1999188,
-          "output_per_million": 0.800064
+          "input_per_million": 0.24999987999999998,
+          "output_per_million": 0.999999888
         }
       }
     },
@@ -21865,8 +21991,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.09329544,
-          "output_per_million": 0.3733632
+          "input_per_million": 0.11,
+          "output_per_million": 0.38
         }
       }
     },
@@ -21936,8 +22062,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.12,
-          "output_per_million": 0.3
+          "input_per_million": 0.6,
+          "output_per_million": 0.6
         }
       }
     },
@@ -22044,69 +22170,11 @@
     }
   },
   {
-    "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1:free",
-    "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free)",
-    "provider": "openrouter",
-    "family": "nvidia",
-    "created_at": "2025-04-08 14:24:19 +0200",
-    "context_window": 131072,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {},
-    "metadata": {
-      "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Llama3",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 131072,
-        "max_completion_tokens": null,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "logprobs",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_logprobs",
-        "top_p"
-      ]
-    }
-  },
-  {
-    "id": "nvidia/llama-3.3-nemotron-super-49b-v1",
-    "name": "NVIDIA: Llama 3.3 Nemotron Super 49B v1",
+    "id": "nvidia/nemotron-nano-9b-v2",
+    "name": "NVIDIA: Nemotron Nano 9B V2",
     "provider": "openrouter",
     "family": "nvidia",
-    "created_at": "2025-04-08 15:38:14 +0200",
+    "created_at": "2025-09-05 23:13:27 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -22120,18 +22188,19 @@
     },
     "capabilities": [
       "streaming",
-      "predicted_outputs"
+      "function_calling",
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.13,
-          "output_per_million": 0.39999999999999997
+          "input_per_million": 0.04,
+          "output_per_million": 0.16
         }
       }
     },
     "metadata": {
-      "description": "Llama-3.3-Nemotron-Super-49B-v1 is a large language model (LLM) optimized for advanced reasoning, conversational interactions, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta's Llama-3.3-70B-Instruct, it employs a Neural Architecture Search (NAS) approach, significantly enhancing efficiency and reducing memory requirements. This allows the model to support a context length of up to 128K tokens and fit efficiently on single high-performance GPUs, such as NVIDIA H200.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
+      "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -22151,22 +22220,26 @@
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
-        "logit_bias",
-        "logprobs",
+        "include_reasoning",
         "max_tokens",
+        "min_p",
         "presence_penalty",
+        "reasoning",
+        "repetition_penalty",
+        "response_format",
         "seed",
         "stop",
         "temperature",
+        "tool_choice",
+        "tools",
         "top_k",
-        "top_logprobs",
         "top_p"
       ]
     }
   },
   {
-    "id": "nvidia/nemotron-nano-9b-v2",
-    "name": "NVIDIA: Nemotron Nano 9B V2",
+    "id": "nvidia/nemotron-nano-9b-v2:free",
+    "name": "NVIDIA: Nemotron Nano 9B V2 (free)",
     "provider": "openrouter",
     "family": "nvidia",
     "created_at": "2025-09-05 23:13:27 +0200",
@@ -23883,9 +23956,9 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 1.25,
-          "output_per_million": 10.0,
-          "cached_input_per_million": 0.125
+          "input_per_million": 0.625,
+          "output_per_million": 5.0,
+          "cached_input_per_million": 0.0625
         }
       }
     },
@@ -24121,8 +24194,8 @@
     "provider": "openrouter",
     "family": "openai",
     "created_at": "2025-08-05 19:17:11 +0200",
-    "context_window": 131000,
-    "max_output_tokens": 131000,
+    "context_window": 131072,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -24141,8 +24214,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.072,
-          "output_per_million": 0.28
+          "input_per_million": 0.049999999999999996,
+          "output_per_million": 0.25
         }
       }
     },
@@ -24160,8 +24233,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131000,
-        "max_completion_tokens": 131000,
+        "context_length": 131072,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -24244,8 +24317,8 @@
     "provider": "openrouter",
     "family": "openai",
     "created_at": "2025-08-05 19:17:09 +0200",
-    "context_window": 131000,
-    "max_output_tokens": 131000,
+    "context_window": 131072,
+    "max_output_tokens": 32768,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -24264,7 +24337,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.04,
+          "input_per_million": 0.03,
           "output_per_million": 0.15
         }
       }
@@ -24283,8 +24356,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131000,
-        "max_completion_tokens": 131000,
+        "context_length": 131072,
+        "max_completion_tokens": 32768,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -24674,7 +24747,8 @@
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "file"
       ],
       "output": [
         "text"
@@ -24699,7 +24773,8 @@
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
-          "text"
+          "text",
+          "file"
         ],
         "output_modalities": [
           "text"
@@ -24734,7 +24809,8 @@
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text"
+        "text",
+        "file"
       ],
       "output": [
         "text"
@@ -24759,12 +24835,13 @@
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
-          "text"
+          "text",
+          "file"
         ],
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Other",
+        "tokenizer": "GPT",
         "instruct_type": null
       },
       "top_provider": {
@@ -24981,79 +25058,43 @@
     }
   },
   {
-    "id": "openrouter/auto",
-    "name": "Auto Router",
+    "id": "opengvlab/internvl3-78b",
+    "name": "OpenGVLab: InternVL3 78B",
     "provider": "openrouter",
-    "family": "openrouter",
-    "created_at": "2023-11-08 01:00:00 +0100",
-    "context_window": 2000000,
+    "family": "opengvlab",
+    "created_at": "2025-09-15 20:55:55 +0200",
+    "context_window": 32768,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
         "text"
       ]
     },
-    "capabilities": [
-      "streaming"
-    ],
-    "pricing": {},
-    "metadata": {
-      "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Router",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": null,
-        "max_completion_tokens": null,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": []
-    }
-  },
-  {
-    "id": "openrouter/sonoma-dusk-alpha",
-    "name": "Sonoma Dusk Alpha",
-    "provider": "openrouter",
-    "family": "openrouter",
-    "created_at": "2025-09-05 19:27:27 +0200",
-    "context_window": 2000000,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image"
-      ],
-      "output": [
-        "text"
-      ]
-    },
     "capabilities": [
       "streaming",
-      "function_calling",
-      "structured_output"
+      "structured_output",
+      "predicted_outputs"
     ],
-    "pricing": {},
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.03,
+          "output_per_million": 0.13
+        }
+      }
+    },
     "metadata": {
-      "description": "This is a cloaked model provided to the community to gather feedback. A fast and intelligent general-purpose frontier model with a 2 million token context window. Supports image inputs and parallel tool calling.\n\nNote: It’s free to use during this testing period, and prompts and completions are logged by the model creator for feedback and training.",
+      "description": "The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. \n\nIn addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
-          "text",
-          "image"
+          "image",
+          "text"
         ],
         "output_modalities": [
           "text"
@@ -25062,73 +25103,71 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 2000000,
+        "context_length": 32768,
         "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "frequency_penalty",
+        "logit_bias",
+        "logprobs",
         "max_tokens",
+        "min_p",
+        "presence_penalty",
+        "repetition_penalty",
         "response_format",
+        "seed",
+        "stop",
         "structured_outputs",
-        "tool_choice",
-        "tools"
+        "temperature",
+        "top_k",
+        "top_logprobs",
+        "top_p"
       ]
     }
   },
   {
-    "id": "openrouter/sonoma-sky-alpha",
-    "name": "Sonoma Sky Alpha",
+    "id": "openrouter/auto",
+    "name": "Auto Router",
     "provider": "openrouter",
     "family": "openrouter",
-    "created_at": "2025-09-05 19:23:21 +0200",
+    "created_at": "2023-11-08 01:00:00 +0100",
     "context_window": 2000000,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image"
+        "text"
       ],
       "output": [
         "text"
       ]
     },
     "capabilities": [
-      "streaming",
-      "function_calling",
-      "structured_output"
+      "streaming"
     ],
     "pricing": {},
     "metadata": {
-      "description": "This is a cloaked model provided to the community to gather feedback. A maximally intelligent general-purpose frontier model with a 2 million token context window. Supports image inputs and parallel tool calling.\n\nNote: It’s free to use during this testing period, and prompts and completions are logged by the model creator for feedback and training.",
+      "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)",
       "architecture": {
-        "modality": "text+image->text",
+        "modality": "text->text",
         "input_modalities": [
-          "text",
-          "image"
+          "text"
         ],
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Other",
+        "tokenizer": "Router",
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 2000000,
+        "context_length": null,
         "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
-      "supported_parameters": [
-        "include_reasoning",
-        "max_tokens",
-        "reasoning",
-        "response_format",
-        "structured_outputs",
-        "tool_choice",
-        "tools"
-      ]
+      "supported_parameters": []
     }
   },
   {
@@ -25493,69 +25532,6 @@
       ]
     }
   },
-  {
-    "id": "pygmalionai/mythalion-13b",
-    "name": "Pygmalion: Mythalion 13B",
-    "provider": "openrouter",
-    "family": "pygmalionai",
-    "created_at": "2023-09-02 02:00:00 +0200",
-    "context_window": 4096,
-    "max_output_tokens": 4096,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.7,
-          "output_per_million": 1.1
-        }
-      }
-    },
-    "metadata": {
-      "description": "A blend of the new Pygmalion-13b and MythoMax. #merge",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Llama2",
-        "instruct_type": "alpaca"
-      },
-      "top_provider": {
-        "context_length": 4096,
-        "max_completion_tokens": 4096,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
   {
     "id": "qwen/qwen-2.5-72b-instruct",
     "name": "Qwen2.5 72B Instruct",
@@ -25582,8 +25558,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.051830799999999996,
-          "output_per_million": 0.207424
+          "input_per_million": 0.07,
+          "output_per_million": 0.26
         }
       }
     },
@@ -25757,7 +25733,7 @@
     "family": "qwen",
     "created_at": "2024-11-12 00:40:00 +0100",
     "context_window": 32768,
-    "max_output_tokens": null,
+    "max_output_tokens": 16384,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -25775,8 +25751,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0499797,
-          "output_per_million": 0.200016
+          "input_per_million": 0.06,
+          "output_per_million": 0.15
         }
       }
     },
@@ -25795,7 +25771,7 @@
       },
       "top_provider": {
         "context_length": 32768,
-        "max_completion_tokens": null,
+        "max_completion_tokens": 16384,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -26070,13 +26046,13 @@
     }
   },
   {
-    "id": "qwen/qwen-turbo",
-    "name": "Qwen: Qwen-Turbo",
+    "id": "qwen/qwen-plus-2025-07-28",
+    "name": "Qwen: Qwen Plus 0728",
     "provider": "openrouter",
     "family": "qwen",
-    "created_at": "2025-02-01 12:56:14 +0100",
+    "created_at": "2025-09-08 18:06:39 +0200",
     "context_window": 1000000,
-    "max_output_tokens": 8192,
+    "max_output_tokens": 32768,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -26094,14 +26070,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.049999999999999996,
-          "output_per_million": 0.19999999999999998,
-          "cached_input_per_million": 0.02
+          "input_per_million": 0.39999999999999997,
+          "output_per_million": 1.2
         }
       }
     },
     "metadata": {
-      "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.",
+      "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -26110,12 +26085,12 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Qwen",
+        "tokenizer": "Qwen3",
         "instruct_type": null
       },
       "top_provider": {
         "context_length": 1000000,
-        "max_completion_tokens": 8192,
+        "max_completion_tokens": 32768,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -26124,6 +26099,7 @@
         "presence_penalty",
         "response_format",
         "seed",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -26132,18 +26108,17 @@
     }
   },
   {
-    "id": "qwen/qwen-vl-max",
-    "name": "Qwen: Qwen VL Max",
+    "id": "qwen/qwen-plus-2025-07-28:thinking",
+    "name": "Qwen: Qwen Plus 0728 (thinking)",
     "provider": "openrouter",
     "family": "qwen",
-    "created_at": "2025-02-01 19:25:04 +0100",
-    "context_window": 7500,
-    "max_output_tokens": 1500,
+    "created_at": "2025-09-08 18:06:39 +0200",
+    "context_window": 1000000,
+    "max_output_tokens": 32768,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image"
+        "text"
       ],
       "output": [
         "text"
@@ -26151,59 +26126,63 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
       "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.7999999999999999,
-          "output_per_million": 3.1999999999999997
+          "input_per_million": 0.39999999999999997,
+          "output_per_million": 4.0
         }
       }
     },
     "metadata": {
-      "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length. It excels in delivering optimal performance for a broader spectrum of complex tasks.\n",
+      "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
       "architecture": {
-        "modality": "text+image->text",
+        "modality": "text->text",
         "input_modalities": [
-          "text",
-          "image"
+          "text"
         ],
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Qwen",
+        "tokenizer": "Qwen3",
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 7500,
-        "max_completion_tokens": 1500,
+        "context_length": 1000000,
+        "max_completion_tokens": 32768,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
+        "include_reasoning",
         "max_tokens",
         "presence_penalty",
+        "reasoning",
         "response_format",
         "seed",
+        "structured_outputs",
         "temperature",
+        "tool_choice",
+        "tools",
         "top_p"
       ]
     }
   },
   {
-    "id": "qwen/qwen-vl-plus",
-    "name": "Qwen: Qwen VL Plus",
+    "id": "qwen/qwen-turbo",
+    "name": "Qwen: Qwen-Turbo",
     "provider": "openrouter",
     "family": "qwen",
-    "created_at": "2025-02-05 05:54:15 +0100",
-    "context_window": 7500,
-    "max_output_tokens": 1500,
+    "created_at": "2025-02-01 12:56:14 +0100",
+    "context_window": 1000000,
+    "max_output_tokens": 8192,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "text",
-        "image"
+        "text"
       ],
       "output": [
         "text"
@@ -26211,23 +26190,24 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
       "structured_output"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.21,
-          "output_per_million": 0.63
+          "input_per_million": 0.049999999999999996,
+          "output_per_million": 0.19999999999999998,
+          "cached_input_per_million": 0.02
         }
       }
     },
     "metadata": {
-      "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.\n",
+      "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.",
       "architecture": {
-        "modality": "text+image->text",
+        "modality": "text->text",
         "input_modalities": [
-          "text",
-          "image"
+          "text"
         ],
         "output_modalities": [
           "text"
@@ -26236,8 +26216,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 7500,
-        "max_completion_tokens": 1500,
+        "context_length": 1000000,
+        "max_completion_tokens": 8192,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -26247,18 +26227,140 @@
         "response_format",
         "seed",
         "temperature",
+        "tool_choice",
+        "tools",
         "top_p"
       ]
     }
   },
   {
-    "id": "qwen/qwen2.5-vl-32b-instruct",
-    "name": "Qwen: Qwen2.5 VL 32B Instruct",
+    "id": "qwen/qwen-vl-max",
+    "name": "Qwen: Qwen VL Max",
     "provider": "openrouter",
     "family": "qwen",
-    "created_at": "2025-03-24 19:10:38 +0100",
-    "context_window": 16384,
-    "max_output_tokens": null,
+    "created_at": "2025-02-01 19:25:04 +0100",
+    "context_window": 7500,
+    "max_output_tokens": 1500,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.7999999999999999,
+          "output_per_million": 3.1999999999999997
+        }
+      }
+    },
+    "metadata": {
+      "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length. It excels in delivering optimal performance for a broader spectrum of complex tasks.\n",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "text",
+          "image"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Qwen",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 7500,
+        "max_completion_tokens": 1500,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "presence_penalty",
+        "response_format",
+        "seed",
+        "temperature",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "qwen/qwen-vl-plus",
+    "name": "Qwen: Qwen VL Plus",
+    "provider": "openrouter",
+    "family": "qwen",
+    "created_at": "2025-02-05 05:54:15 +0100",
+    "context_window": 7500,
+    "max_output_tokens": 1500,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text",
+        "image"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.21,
+          "output_per_million": 0.63
+        }
+      }
+    },
+    "metadata": {
+      "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.\n",
+      "architecture": {
+        "modality": "text+image->text",
+        "input_modalities": [
+          "text",
+          "image"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Qwen",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 7500,
+        "max_completion_tokens": 1500,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "presence_penalty",
+        "response_format",
+        "seed",
+        "temperature",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "qwen/qwen2.5-vl-32b-instruct",
+    "name": "Qwen: Qwen2.5 VL 32B Instruct",
+    "provider": "openrouter",
+    "family": "qwen",
+    "created_at": "2025-03-24 19:10:38 +0100",
+    "context_window": 16384,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -26277,8 +26379,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01999188,
-          "output_per_million": 0.0800064
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -26408,8 +26510,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0999594,
-          "output_per_million": 0.400032
+          "input_per_million": 0.07,
+          "output_per_million": 0.28
         }
       }
     },
@@ -26666,8 +26768,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.13,
-          "output_per_million": 0.6
+          "input_per_million": 0.18,
+          "output_per_million": 0.54
         }
       }
     },
@@ -26720,7 +26822,7 @@
     "family": "qwen",
     "created_at": "2025-07-21 19:39:15 +0200",
     "context_window": 262144,
-    "max_output_tokens": null,
+    "max_output_tokens": 262144,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -26739,8 +26841,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.077968332,
-          "output_per_million": 0.31202496
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.09999999999999999
         }
       }
     },
@@ -26759,7 +26861,7 @@
       },
       "top_provider": {
         "context_length": 262144,
-        "max_completion_tokens": null,
+        "max_completion_tokens": 262144,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -26810,8 +26912,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.077968332,
-          "output_per_million": 0.31202496
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.39
         }
       }
     },
@@ -26847,6 +26949,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -26948,8 +27051,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01999188,
-          "output_per_million": 0.0800064
+          "input_per_million": 0.06,
+          "output_per_million": 0.22
         }
       }
     },
@@ -27021,8 +27124,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.051830799999999996,
-          "output_per_million": 0.207424
+          "input_per_million": 0.07,
+          "output_per_million": 0.28
         }
       }
     },
@@ -27056,6 +27159,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -27091,8 +27195,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0713,
-          "output_per_million": 0.2852
+          "input_per_million": 0.08,
+          "output_per_million": 0.29
         }
       }
     },
@@ -27128,6 +27232,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -27223,8 +27328,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.017992691999999998,
-          "output_per_million": 0.07200576
+          "input_per_million": 0.03,
+          "output_per_million": 0.13
         }
       }
     },
@@ -27480,8 +27585,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.19999999999999998,
-          "output_per_million": 0.7999999999999999
+          "input_per_million": 0.22,
+          "output_per_million": 0.95
         }
       }
     },
@@ -27551,8 +27656,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.051830799999999996,
-          "output_per_million": 0.207424
+          "input_per_million": 0.07,
+          "output_per_million": 0.28
         }
       }
     },
@@ -27586,6 +27691,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -27595,6 +27701,131 @@
       ]
     }
   },
+  {
+    "id": "qwen/qwen3-coder-flash",
+    "name": "Qwen: Qwen3 Coder Flash",
+    "provider": "openrouter",
+    "family": "qwen",
+    "created_at": "2025-09-17 15:25:36 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 65536,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.3,
+          "output_per_million": 1.5,
+          "cached_input_per_million": 0.08
+        }
+      }
+    },
+    "metadata": {
+      "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Qwen3",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": 65536,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "presence_penalty",
+        "response_format",
+        "seed",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "qwen/qwen3-coder-plus",
+    "name": "Qwen: Qwen3 Coder Plus",
+    "provider": "openrouter",
+    "family": "qwen",
+    "created_at": "2025-09-17 15:19:54 +0200",
+    "context_window": 128000,
+    "max_output_tokens": 65536,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 1.0,
+          "output_per_million": 5.0,
+          "cached_input_per_million": 0.09999999999999999
+        }
+      }
+    },
+    "metadata": {
+      "description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Qwen3",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 128000,
+        "max_completion_tokens": 65536,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "presence_penalty",
+        "response_format",
+        "seed",
+        "structured_outputs",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_p"
+      ]
+    }
+  },
   {
     "id": "qwen/qwen3-coder:free",
     "name": "Qwen: Qwen3 Coder 480B A35B (free)",
@@ -27719,12 +27950,12 @@
     }
   },
   {
-    "id": "qwen/qwq-32b",
-    "name": "Qwen: QwQ 32B",
+    "id": "qwen/qwen3-next-80b-a3b-instruct",
+    "name": "Qwen: Qwen3 Next 80B A3B Instruct",
     "provider": "openrouter",
     "family": "qwen",
-    "created_at": "2025-03-05 22:06:54 +0100",
-    "context_window": 32768,
+    "created_at": "2025-09-11 19:36:53 +0200",
+    "context_window": 262144,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -27744,13 +27975,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.15,
-          "output_per_million": 0.39999999999999997
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.7999999999999999
         }
       }
     },
     "metadata": {
-      "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
+      "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -27759,24 +27990,22 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Qwen",
-        "instruct_type": "qwq"
+        "tokenizer": "Qwen3",
+        "instruct_type": null
       },
       "top_provider": {
-        "context_length": 32768,
+        "context_length": 262144,
         "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
-        "include_reasoning",
         "logit_bias",
         "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
-        "reasoning",
         "repetition_penalty",
         "response_format",
         "seed",
@@ -27792,12 +28021,12 @@
     }
   },
   {
-    "id": "qwen/qwq-32b-preview",
-    "name": "Qwen: QwQ 32B Preview",
+    "id": "qwen/qwen3-next-80b-a3b-thinking",
+    "name": "Qwen: Qwen3 Next 80B A3B Thinking",
     "provider": "openrouter",
     "family": "qwen",
-    "created_at": "2024-11-28 01:42:21 +0100",
-    "context_window": 32768,
+    "created_at": "2025-09-11 19:38:04 +0200",
+    "context_window": 262144,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -27810,18 +28039,20 @@
     },
     "capabilities": [
       "streaming",
+      "function_calling",
+      "structured_output",
       "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.19999999999999998,
-          "output_per_million": 0.19999999999999998
+          "input_per_million": 0.09999999999999999,
+          "output_per_million": 0.7999999999999999
         }
       }
     },
     "metadata": {
-      "description": "QwQ-32B-Preview is an experimental research model focused on AI reasoning capabilities developed by the Qwen Team. As a preview release, it demonstrates promising analytical abilities while having several important limitations:\n\n1. **Language Mixing and Code-Switching**: The model may mix languages or switch between them unexpectedly, affecting response clarity.\n2. **Recursive Reasoning Loops**: The model may enter circular reasoning patterns, leading to lengthy responses without a conclusive answer.\n3. **Safety and Ethical Considerations**: The model requires enhanced safety measures to ensure reliable and secure performance, and users should exercise caution when deploying it.\n4. **Performance and Benchmark Limitations**: The model excels in math and coding but has room for improvement in other areas, such as common sense reasoning and nuanced language understanding.\n\n",
+      "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -27830,26 +28061,32 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Qwen",
-        "instruct_type": "deepseek-r1"
+        "tokenizer": "Qwen3",
+        "instruct_type": null
       },
       "top_provider": {
-        "context_length": 32768,
+        "context_length": 262144,
         "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
+        "include_reasoning",
         "logit_bias",
         "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
+        "reasoning",
         "repetition_penalty",
+        "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
+        "tool_choice",
+        "tools",
         "top_k",
         "top_logprobs",
         "top_p"
@@ -27857,8 +28094,8 @@
     }
   },
   {
-    "id": "qwen/qwq-32b:free",
-    "name": "Qwen: QwQ 32B (free)",
+    "id": "qwen/qwq-32b",
+    "name": "Qwen: QwQ 32B",
     "provider": "openrouter",
     "family": "qwen",
     "created_at": "2025-03-05 22:06:54 +0100",
@@ -27875,9 +28112,18 @@
     },
     "capabilities": [
       "streaming",
-      "structured_output"
+      "function_calling",
+      "structured_output",
+      "predicted_outputs"
     ],
-    "pricing": {},
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.15,
+          "output_per_million": 0.39999999999999997
+        }
+      }
+    },
     "metadata": {
       "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
       "architecture": {
@@ -27899,24 +28145,34 @@
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
+        "include_reasoning",
+        "logit_bias",
+        "logprobs",
         "max_tokens",
+        "min_p",
         "presence_penalty",
+        "reasoning",
+        "repetition_penalty",
         "response_format",
+        "seed",
         "stop",
         "structured_outputs",
         "temperature",
+        "tool_choice",
+        "tools",
         "top_k",
+        "top_logprobs",
         "top_p"
       ]
     }
   },
   {
-    "id": "raifle/sorcererlm-8x22b",
-    "name": "SorcererLM 8x22B",
+    "id": "qwen/qwq-32b-preview",
+    "name": "Qwen: QwQ 32B Preview",
     "provider": "openrouter",
-    "family": "raifle",
-    "created_at": "2024-11-08 23:31:23 +0100",
-    "context_window": 16000,
+    "family": "qwen",
+    "created_at": "2024-11-28 01:42:21 +0100",
+    "context_window": 32768,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
@@ -27934,13 +28190,13 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 4.5,
-          "output_per_million": 4.5
+          "input_per_million": 0.19999999999999998,
+          "output_per_million": 0.19999999999999998
         }
       }
     },
     "metadata": {
-      "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling",
+      "description": "QwQ-32B-Preview is an experimental research model focused on AI reasoning capabilities developed by the Qwen Team. As a preview release, it demonstrates promising analytical abilities while having several important limitations:\n\n1. **Language Mixing and Code-Switching**: The model may mix languages or switch between them unexpectedly, affecting response clarity.\n2. **Recursive Reasoning Loops**: The model may enter circular reasoning patterns, leading to lengthy responses without a conclusive answer.\n3. **Safety and Ethical Considerations**: The model requires enhanced safety measures to ensure reliable and secure performance, and users should exercise caution when deploying it.\n4. **Performance and Benchmark Limitations**: The model excels in math and coding but has room for improvement in other areas, such as common sense reasoning and nuanced language understanding.\n\n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -27949,11 +28205,11 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Mistral",
-        "instruct_type": "vicuna"
+        "tokenizer": "Qwen",
+        "instruct_type": "deepseek-r1"
       },
       "top_provider": {
-        "context_length": 16000,
+        "context_length": 32768,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -27961,6 +28217,7 @@
       "supported_parameters": [
         "frequency_penalty",
         "logit_bias",
+        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -27969,16 +28226,17 @@
         "stop",
         "temperature",
         "top_k",
+        "top_logprobs",
         "top_p"
       ]
     }
   },
   {
-    "id": "rekaai/reka-flash-3:free",
-    "name": "Reka: Flash 3 (free)",
+    "id": "qwen/qwq-32b:free",
+    "name": "Qwen: QwQ 32B (free)",
     "provider": "openrouter",
-    "family": "rekaai",
-    "created_at": "2025-03-12 21:53:33 +0100",
+    "family": "qwen",
+    "created_at": "2025-03-05 22:06:54 +0100",
     "context_window": 32768,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -27992,11 +28250,11 @@
     },
     "capabilities": [
       "streaming",
-      "predicted_outputs"
+      "structured_output"
     ],
     "pricing": {},
     "metadata": {
-      "description": "Reka Flash 3 is a general-purpose, instruction-tuned large language model with 21 billion parameters, developed by Reka. It excels at general chat, coding tasks, instruction-following, and function calling. Featuring a 32K context length and optimized through reinforcement learning (RLOO), it provides competitive performance comparable to proprietary models within a smaller parameter footprint. Ideal for low-latency, local, or on-device deployments, Reka Flash 3 is compact, supports efficient quantization (down to 11GB at 4-bit precision), and employs explicit reasoning tags (\"<reasoning>\") to indicate its internal thought process.\n\nReka Flash 3 is primarily an English model with limited multilingual understanding capabilities. The model weights are released under the Apache 2.0 license.",
+      "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -28005,8 +28263,8 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Other",
-        "instruct_type": null
+        "tokenizer": "Qwen",
+        "instruct_type": "qwq"
       },
       "top_provider": {
         "context_length": 32768,
@@ -28016,19 +28274,76 @@
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
-        "include_reasoning",
+        "max_tokens",
+        "presence_penalty",
+        "response_format",
+        "stop",
+        "structured_outputs",
+        "temperature",
+        "top_k",
+        "top_p"
+      ]
+    }
+  },
+  {
+    "id": "raifle/sorcererlm-8x22b",
+    "name": "SorcererLM 8x22B",
+    "provider": "openrouter",
+    "family": "raifle",
+    "created_at": "2024-11-08 23:31:23 +0100",
+    "context_window": 16000,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "predicted_outputs"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 4.5,
+          "output_per_million": 4.5
+        }
+      }
+    },
+    "metadata": {
+      "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "Mistral",
+        "instruct_type": "vicuna"
+      },
+      "top_provider": {
+        "context_length": 16000,
+        "max_completion_tokens": null,
+        "is_moderated": false
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "frequency_penalty",
         "logit_bias",
-        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
-        "reasoning",
         "repetition_penalty",
         "seed",
         "stop",
         "temperature",
         "top_k",
-        "top_logprobs",
         "top_p"
       ]
     }
@@ -28121,7 +28436,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.02,
+          "input_per_million": 0.04,
           "output_per_million": 0.049999999999999996
         }
       }
@@ -28315,8 +28630,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01999188,
-          "output_per_million": 0.0800064
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -28361,67 +28676,9 @@
     "name": "Shisa AI: Shisa V2 Llama 3.3 70B  (free)",
     "provider": "openrouter",
     "family": "shisa-ai",
-    "created_at": "2025-04-16 00:07:38 +0200",
-    "context_window": 32768,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {},
-    "metadata": {
-      "description": "Shisa V2 Llama 3.3 70B is a bilingual Japanese-English chat model fine-tuned by Shisa.AI on Meta’s Llama-3.3-70B-Instruct base. It prioritizes Japanese language performance while retaining strong English capabilities. The model was optimized entirely through post-training, using a refined mix of supervised fine-tuning (SFT) and DPO datasets including regenerated ShareGPT-style data, translation tasks, roleplaying conversations, and instruction-following prompts. Unlike earlier Shisa releases, this version avoids tokenizer modifications or extended pretraining.\n\nShisa V2 70B achieves leading Japanese task performance across a wide range of custom and public benchmarks, including JA MT Bench, ELYZA 100, and Rakuda. It supports a 128K token context length and integrates smoothly with inference frameworks like vLLM and SGLang. While it inherits safety characteristics from its base model, no additional alignment was applied. The model is intended for high-performance bilingual chat, instruction following, and translation tasks across JA/EN.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Llama3",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 32768,
-        "max_completion_tokens": null,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "logprobs",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
-        "top_logprobs",
-        "top_p"
-      ]
-    }
-  },
-  {
-    "id": "sophosympatheia/midnight-rose-70b",
-    "name": "Midnight Rose 70B",
-    "provider": "openrouter",
-    "family": "sophosympatheia",
-    "created_at": "2024-03-22 01:00:00 +0100",
-    "context_window": 4096,
-    "max_output_tokens": 2048,
+    "created_at": "2025-04-16 00:07:38 +0200",
+    "context_window": 32768,
+    "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -28435,16 +28692,9 @@
       "streaming",
       "predicted_outputs"
     ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.7999999999999999,
-          "output_per_million": 0.7999999999999999
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
-      "description": "A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.\n\nDescending from earlier versions of Midnight Rose and [Wizard Tulu Dolphin 70B](https://huggingface.co/sophosympatheia/Wizard-Tulu-Dolphin-70B-v1.0), it inherits the best qualities of each.",
+      "description": "Shisa V2 Llama 3.3 70B is a bilingual Japanese-English chat model fine-tuned by Shisa.AI on Meta’s Llama-3.3-70B-Instruct base. It prioritizes Japanese language performance while retaining strong English capabilities. The model was optimized entirely through post-training, using a refined mix of supervised fine-tuning (SFT) and DPO datasets including regenerated ShareGPT-style data, translation tasks, roleplaying conversations, and instruction-following prompts. Unlike earlier Shisa releases, this version avoids tokenizer modifications or extended pretraining.\n\nShisa V2 70B achieves leading Japanese task performance across a wide range of custom and public benchmarks, including JA MT Bench, ELYZA 100, and Rakuda. It supports a 128K token context length and integrates smoothly with inference frameworks like vLLM and SGLang. While it inherits safety characteristics from its base model, no additional alignment was applied. The model is intended for high-performance bilingual chat, instruction following, and translation tasks across JA/EN.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -28453,18 +28703,19 @@
         "output_modalities": [
           "text"
         ],
-        "tokenizer": "Llama2",
-        "instruct_type": "airoboros"
+        "tokenizer": "Llama3",
+        "instruct_type": null
       },
       "top_provider": {
-        "context_length": 4096,
-        "max_completion_tokens": 2048,
+        "context_length": 32768,
+        "max_completion_tokens": null,
         "is_moderated": false
       },
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
         "logit_bias",
+        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -28473,6 +28724,7 @@
         "stop",
         "temperature",
         "top_k",
+        "top_logprobs",
         "top_p"
       ]
     }
@@ -28533,6 +28785,7 @@
         "include_reasoning",
         "reasoning",
         "response_format",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -28662,6 +28915,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "top_k",
         "top_logprobs",
@@ -28911,7 +29165,6 @@
       "supported_parameters": [
         "frequency_penalty",
         "logit_bias",
-        "logprobs",
         "max_tokens",
         "min_p",
         "presence_penalty",
@@ -28952,8 +29205,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.0481286,
-          "output_per_million": 0.192608
+          "input_per_million": 0.04,
+          "output_per_million": 0.16
         }
       }
     },
@@ -29013,8 +29266,7 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output",
-      "predicted_outputs"
+      "structured_output"
     ],
     "pricing": {
       "text_tokens": {
@@ -29045,83 +29297,14 @@
       "per_request_limits": null,
       "supported_parameters": [
         "frequency_penalty",
-        "logit_bias",
-        "logprobs",
         "max_tokens",
-        "min_p",
         "presence_penalty",
-        "repetition_penalty",
         "response_format",
-        "seed",
         "stop",
         "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
-        "top_k",
-        "top_p"
-      ]
-    }
-  },
-  {
-    "id": "thudm/glm-4-32b",
-    "name": "THUDM: GLM 4 32B",
-    "provider": "openrouter",
-    "family": "thudm",
-    "created_at": "2025-04-17 22:15:15 +0200",
-    "context_window": 32000,
-    "max_output_tokens": 32000,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "predicted_outputs"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 0.55,
-          "output_per_million": 1.66
-        }
-      }
-    },
-    "metadata": {
-      "description": "GLM-4-32B-0414 is a 32B bilingual (Chinese-English) open-weight language model optimized for code generation, function calling, and agent-style tasks. Pretrained on 15T of high-quality and reasoning-heavy data, it was further refined using human preference alignment, rejection sampling, and reinforcement learning. The model excels in complex reasoning, artifact generation, and structured output tasks, achieving performance comparable to GPT-4o and DeepSeek-V3-0324 across several benchmarks.",
-      "architecture": {
-        "modality": "text->text",
-        "input_modalities": [
-          "text"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Other",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 32000,
-        "max_completion_tokens": 32000,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logit_bias",
-        "max_tokens",
-        "min_p",
-        "presence_penalty",
-        "repetition_penalty",
-        "seed",
-        "stop",
-        "temperature",
-        "top_k",
         "top_p"
       ]
     }
@@ -29217,8 +29400,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.01999188,
-          "output_per_million": 0.0800064
+          "input_per_million": 0.04,
+          "output_per_million": 0.14
         }
       }
     },
@@ -29284,8 +29467,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.1999188,
-          "output_per_million": 0.800064
+          "input_per_million": 0.24999987999999998,
+          "output_per_million": 0.999999888
         }
       }
     },
@@ -29515,11 +29698,11 @@
     }
   },
   {
-    "id": "x-ai/grok-2-1212",
-    "name": "xAI: Grok 2 1212",
+    "id": "x-ai/grok-3",
+    "name": "xAI: Grok 3",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2024-12-15 04:20:14 +0100",
+    "created_at": "2025-06-10 21:15:08 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -29539,13 +29722,14 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 2.0,
-          "output_per_million": 10.0
+          "input_per_million": 3.0,
+          "output_per_million": 15.0,
+          "cached_input_per_million": 0.75
         }
       }
     },
     "metadata": {
-      "description": "Grok 2 1212 introduces significant enhancements to accuracy, instruction adherence, and multilingual support, making it a powerful and flexible choice for developers seeking a highly steerable, intelligent model.",
+      "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -29571,6 +29755,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -29580,75 +29765,11 @@
     }
   },
   {
-    "id": "x-ai/grok-2-vision-1212",
-    "name": "xAI: Grok 2 Vision 1212",
-    "provider": "openrouter",
-    "family": "x-ai",
-    "created_at": "2024-12-15 05:35:38 +0100",
-    "context_window": 32768,
-    "max_output_tokens": null,
-    "knowledge_cutoff": null,
-    "modalities": {
-      "input": [
-        "text",
-        "image"
-      ],
-      "output": [
-        "text"
-      ]
-    },
-    "capabilities": [
-      "streaming",
-      "structured_output"
-    ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 2.0,
-          "output_per_million": 10.0
-        }
-      }
-    },
-    "metadata": {
-      "description": "Grok 2 Vision 1212 advances image-based AI with stronger visual comprehension, refined instruction-following, and multilingual support. From object recognition to style analysis, it empowers developers to build more intuitive, visually aware applications. Its enhanced steerability and reasoning establish a robust foundation for next-generation image solutions.\n\nTo read more about this model, check out [xAI's announcement](https://x.ai/blog/grok-1212).",
-      "architecture": {
-        "modality": "text+image->text",
-        "input_modalities": [
-          "text",
-          "image"
-        ],
-        "output_modalities": [
-          "text"
-        ],
-        "tokenizer": "Grok",
-        "instruct_type": null
-      },
-      "top_provider": {
-        "context_length": 32768,
-        "max_completion_tokens": null,
-        "is_moderated": false
-      },
-      "per_request_limits": null,
-      "supported_parameters": [
-        "frequency_penalty",
-        "logprobs",
-        "max_tokens",
-        "presence_penalty",
-        "response_format",
-        "seed",
-        "stop",
-        "temperature",
-        "top_logprobs",
-        "top_p"
-      ]
-    }
-  },
-  {
-    "id": "x-ai/grok-3",
-    "name": "xAI: Grok 3",
+    "id": "x-ai/grok-3-beta",
+    "name": "xAI: Grok 3 Beta",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-06-10 21:15:08 +0200",
+    "created_at": "2025-04-10 01:07:48 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -29675,7 +29796,7 @@
       }
     },
     "metadata": {
-      "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n",
+      "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -29701,7 +29822,6 @@
         "response_format",
         "seed",
         "stop",
-        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -29711,11 +29831,11 @@
     }
   },
   {
-    "id": "x-ai/grok-3-beta",
-    "name": "xAI: Grok 3 Beta",
+    "id": "x-ai/grok-3-mini",
+    "name": "xAI: Grok 3 Mini",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-04-10 01:07:48 +0200",
+    "created_at": "2025-06-10 21:20:45 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -29735,14 +29855,14 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 3.0,
-          "output_per_million": 15.0,
-          "cached_input_per_million": 0.75
+          "input_per_million": 0.3,
+          "output_per_million": 0.5,
+          "cached_input_per_million": 0.075
         }
       }
     },
     "metadata": {
-      "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
+      "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -29761,13 +29881,14 @@
       },
       "per_request_limits": null,
       "supported_parameters": [
-        "frequency_penalty",
+        "include_reasoning",
         "logprobs",
         "max_tokens",
-        "presence_penalty",
+        "reasoning",
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -29777,11 +29898,11 @@
     }
   },
   {
-    "id": "x-ai/grok-3-mini",
-    "name": "xAI: Grok 3 Mini",
+    "id": "x-ai/grok-3-mini-beta",
+    "name": "xAI: Grok 3 Mini Beta",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-06-10 21:20:45 +0200",
+    "created_at": "2025-04-10 01:09:55 +0200",
     "context_window": 131072,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
@@ -29808,7 +29929,7 @@
       }
     },
     "metadata": {
-      "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.",
+      "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
       "architecture": {
         "modality": "text->text",
         "input_modalities": [
@@ -29834,7 +29955,6 @@
         "response_format",
         "seed",
         "stop",
-        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -29844,16 +29964,17 @@
     }
   },
   {
-    "id": "x-ai/grok-3-mini-beta",
-    "name": "xAI: Grok 3 Mini Beta",
+    "id": "x-ai/grok-4",
+    "name": "xAI: Grok 4",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-04-10 01:09:55 +0200",
-    "context_window": 131072,
+    "created_at": "2025-07-09 21:01:29 +0200",
+    "context_window": 256000,
     "max_output_tokens": null,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
+        "image",
         "text"
       ],
       "output": [
@@ -29868,17 +29989,18 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.3,
-          "output_per_million": 0.5,
-          "cached_input_per_million": 0.075
+          "input_per_million": 3.0,
+          "output_per_million": 15.0,
+          "cached_input_per_million": 0.75
         }
       }
     },
     "metadata": {
-      "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n",
+      "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)",
       "architecture": {
-        "modality": "text->text",
+        "modality": "text+image->text",
         "input_modalities": [
+          "image",
           "text"
         ],
         "output_modalities": [
@@ -29888,7 +30010,7 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 131072,
+        "context_length": 256000,
         "max_completion_tokens": null,
         "is_moderated": false
       },
@@ -29900,7 +30022,7 @@
         "reasoning",
         "response_format",
         "seed",
-        "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -29910,18 +30032,18 @@
     }
   },
   {
-    "id": "x-ai/grok-4",
-    "name": "xAI: Grok 4",
+    "id": "x-ai/grok-4-fast:free",
+    "name": "xAI: Grok 4 Fast (free)",
     "provider": "openrouter",
     "family": "x-ai",
-    "created_at": "2025-07-09 21:01:29 +0200",
-    "context_window": 256000,
-    "max_output_tokens": null,
+    "created_at": "2025-09-19 02:01:30 +0200",
+    "context_window": 2000000,
+    "max_output_tokens": 30000,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
-        "image",
-        "text"
+        "text",
+        "image"
       ],
       "output": [
         "text"
@@ -29932,22 +30054,14 @@
       "function_calling",
       "structured_output"
     ],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "input_per_million": 3.0,
-          "output_per_million": 15.0,
-          "cached_input_per_million": 0.75
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
-      "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)",
+      "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window. It comes in two flavors: non-reasoning and reasoning. Read more about the model on xAI's [news post](http://x.ai/news/grok-4-fast). Reasoning can be enabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)\n\nPrompts and completions may be used by xAI or OpenRouter to improve future models.",
       "architecture": {
         "modality": "text+image->text",
         "input_modalities": [
-          "image",
-          "text"
+          "text",
+          "image"
         ],
         "output_modalities": [
           "text"
@@ -29956,8 +30070,8 @@
         "instruct_type": null
       },
       "top_provider": {
-        "context_length": 256000,
-        "max_completion_tokens": null,
+        "context_length": 2000000,
+        "max_completion_tokens": 30000,
         "is_moderated": false
       },
       "per_request_limits": null,
@@ -30127,8 +30241,8 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 0.32986602,
-          "output_per_million": 1.3201056
+          "input_per_million": 0.41,
+          "output_per_million": 1.6500000000000001
         }
       }
     },
@@ -30164,6 +30278,7 @@
         "response_format",
         "seed",
         "stop",
+        "structured_outputs",
         "temperature",
         "tool_choice",
         "tools",
@@ -30375,7 +30490,7 @@
     "name": "Sonar",
     "provider": "perplexity",
     "family": "sonar",
-    "created_at": "2025-09-09 20:41:26 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -30407,7 +30522,7 @@
     "name": "Sonar Deep Research",
     "provider": "perplexity",
     "family": "sonar_deep_research",
-    "created_at": "2025-09-09 20:41:26 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -30442,7 +30557,7 @@
     "name": "Sonar Pro",
     "provider": "perplexity",
     "family": "sonar_pro",
-    "created_at": "2025-09-09 20:41:26 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 200000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,
@@ -30474,7 +30589,7 @@
     "name": "Sonar Reasoning",
     "provider": "perplexity",
     "family": "sonar_reasoning",
-    "created_at": "2025-09-09 20:41:26 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 4096,
     "knowledge_cutoff": null,
@@ -30506,7 +30621,7 @@
     "name": "Sonar Reasoning Pro",
     "provider": "perplexity",
     "family": "sonar_reasoning_pro",
-    "created_at": "2025-09-09 20:41:26 +0200",
+    "created_at": "2025-09-21 16:12:52 +0200",
     "context_window": 128000,
     "max_output_tokens": 8192,
     "knowledge_cutoff": null,