@lobehub/chat 1.129.0 → 1.129.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/changelog/v1.json +21 -0
  3. package/docs/development/database-schema.dbml +9 -0
  4. package/locales/ar/models.json +248 -23
  5. package/locales/ar/providers.json +3 -0
  6. package/locales/bg-BG/models.json +248 -23
  7. package/locales/bg-BG/providers.json +3 -0
  8. package/locales/de-DE/models.json +248 -23
  9. package/locales/de-DE/providers.json +3 -0
  10. package/locales/en-US/models.json +248 -23
  11. package/locales/en-US/providers.json +3 -0
  12. package/locales/es-ES/models.json +248 -23
  13. package/locales/es-ES/providers.json +3 -0
  14. package/locales/fa-IR/models.json +248 -23
  15. package/locales/fa-IR/providers.json +3 -0
  16. package/locales/fr-FR/models.json +248 -23
  17. package/locales/fr-FR/providers.json +3 -0
  18. package/locales/it-IT/models.json +248 -23
  19. package/locales/it-IT/providers.json +3 -0
  20. package/locales/ja-JP/models.json +248 -23
  21. package/locales/ja-JP/providers.json +3 -0
  22. package/locales/ko-KR/models.json +248 -23
  23. package/locales/ko-KR/providers.json +3 -0
  24. package/locales/nl-NL/models.json +248 -23
  25. package/locales/nl-NL/providers.json +3 -0
  26. package/locales/pl-PL/models.json +248 -23
  27. package/locales/pl-PL/providers.json +3 -0
  28. package/locales/pt-BR/models.json +248 -23
  29. package/locales/pt-BR/providers.json +3 -0
  30. package/locales/ru-RU/models.json +248 -23
  31. package/locales/ru-RU/providers.json +3 -0
  32. package/locales/tr-TR/models.json +248 -23
  33. package/locales/tr-TR/providers.json +3 -0
  34. package/locales/vi-VN/models.json +248 -23
  35. package/locales/vi-VN/providers.json +3 -0
  36. package/locales/zh-CN/models.json +248 -23
  37. package/locales/zh-CN/providers.json +3 -0
  38. package/locales/zh-TW/models.json +248 -23
  39. package/locales/zh-TW/providers.json +3 -0
  40. package/package.json +1 -1
  41. package/packages/database/migrations/0031_add_agent_index.sql +9 -3
  42. package/packages/database/migrations/0032_improve_agents_field.sql +0 -4
  43. package/packages/database/migrations/0033_modern_mercury.sql +18 -0
  44. package/packages/database/migrations/meta/0033_snapshot.json +6594 -0
  45. package/packages/database/migrations/meta/_journal.json +7 -0
  46. package/packages/database/src/core/migrations.json +23 -6
  47. package/packages/database/src/schemas/message.ts +12 -11
  48. package/packages/database/src/schemas/rag.ts +10 -6
  49. package/packages/database/src/schemas/session.ts +7 -5
  50. package/packages/database/src/schemas/topic.ts +7 -3
  51. package/packages/model-bank/src/aiModels/siliconcloud.ts +45 -0
  52. package/packages/model-runtime/src/providers/siliconcloud/index.ts +19 -11
@@ -602,6 +602,33 @@
602
602
  "ai21-labs/AI21-Jamba-1.5-Mini": {
603
603
  "description": "A 52B parameter (12B active) multilingual model offering a 256K long context window, function calling, structured output, and fact-based generation."
604
604
  },
605
+ "alibaba/qwen-3-14b": {
606
+ "description": "Qwen3 is the latest generation large language model in the Qwen series, offering a comprehensive set of dense and Mixture of Experts (MoE) models. Built on extensive training, Qwen3 delivers breakthrough advancements in reasoning, instruction following, agent capabilities, and multilingual support."
607
+ },
608
+ "alibaba/qwen-3-235b": {
609
+ "description": "Qwen3 is the latest generation large language model in the Qwen series, offering a comprehensive set of dense and Mixture of Experts (MoE) models. Built on extensive training, Qwen3 delivers breakthrough advancements in reasoning, instruction following, agent capabilities, and multilingual support."
610
+ },
611
+ "alibaba/qwen-3-30b": {
612
+ "description": "Qwen3 is the latest generation large language model in the Qwen series, offering a comprehensive set of dense and Mixture of Experts (MoE) models. Built on extensive training, Qwen3 delivers breakthrough advancements in reasoning, instruction following, agent capabilities, and multilingual support."
613
+ },
614
+ "alibaba/qwen-3-32b": {
615
+ "description": "Qwen3 is the latest generation large language model in the Qwen series, offering a comprehensive set of dense and Mixture of Experts (MoE) models. Built on extensive training, Qwen3 delivers breakthrough advancements in reasoning, instruction following, agent capabilities, and multilingual support."
616
+ },
617
+ "alibaba/qwen3-coder": {
618
+ "description": "Qwen3-Coder-480B-A35B-Instruct is Qwen's most agent-capable code model, demonstrating remarkable performance in agent coding, agent browser usage, and other fundamental coding tasks, achieving results comparable to Claude Sonnet."
619
+ },
620
+ "amazon/nova-lite": {
621
+ "description": "A very low-cost multimodal model that processes image, video, and text inputs at extremely high speed."
622
+ },
623
+ "amazon/nova-micro": {
624
+ "description": "A text-only model delivering the lowest latency responses at a very low cost."
625
+ },
626
+ "amazon/nova-pro": {
627
+ "description": "A highly capable multimodal model offering the best combination of accuracy, speed, and cost, suitable for a wide range of tasks."
628
+ },
629
+ "amazon/titan-embed-text-v2": {
630
+ "description": "Amazon Titan Text Embeddings V2 is a lightweight, efficient multilingual embedding model supporting 1024, 512, and 256 dimensions."
631
+ },
605
632
  "anthropic.claude-3-5-sonnet-20240620-v1:0": {
606
633
  "description": "Claude 3.5 Sonnet raises the industry standard, outperforming competitor models and Claude 3 Opus, excelling in a wide range of evaluations while maintaining the speed and cost of our mid-tier models."
607
634
  },
@@ -627,25 +654,28 @@
627
654
  "description": "An updated version of Claude 2, featuring double the context window and improvements in reliability, hallucination rates, and evidence-based accuracy in long documents and RAG contexts."
628
655
  },
629
656
  "anthropic/claude-3-haiku": {
630
- "description": "Claude 3 Haiku is Anthropic's fastest and most compact model, designed for near-instantaneous responses. It features quick and accurate directional performance."
657
+ "description": "Claude 3 Haiku is Anthropic's fastest model to date, designed for enterprise workloads that typically involve longer prompts. Haiku can quickly analyze large volumes of documents such as quarterly filings, contracts, or legal cases, at half the cost of other models in its performance tier."
631
658
  },
632
659
  "anthropic/claude-3-opus": {
633
- "description": "Claude 3 Opus is Anthropic's most powerful model for handling highly complex tasks. It excels in performance, intelligence, fluency, and comprehension."
660
+ "description": "Claude 3 Opus is Anthropic's smartest model, delivering market-leading performance on highly complex tasks. It navigates open-ended prompts and novel scenarios with exceptional fluency and human-like understanding."
634
661
  },
635
662
  "anthropic/claude-3.5-haiku": {
636
- "description": "Claude 3.5 Haiku is Anthropic's fastest next-generation model. Compared to Claude 3 Haiku, Claude 3.5 Haiku shows improvements across various skills and surpasses the previous generation's largest model, Claude 3 Opus, in many intelligence benchmarks."
663
+ "description": "Claude 3.5 Haiku is the next generation of our fastest model. Matching the speed of Claude 3 Haiku, it improves across every skill set and surpasses our previous largest model Claude 3 Opus on many intelligence benchmarks."
637
664
  },
638
665
  "anthropic/claude-3.5-sonnet": {
639
- "description": "Claude 3.5 Sonnet offers capabilities that surpass Opus and faster speeds than Sonnet, while maintaining the same pricing as Sonnet. Sonnet excels particularly in programming, data science, visual processing, and agent tasks."
666
+ "description": "Claude 3.5 Sonnet strikes an ideal balance between intelligence and speed—especially for enterprise workloads. It delivers powerful performance at lower cost compared to peers and is designed for high durability in large-scale AI deployments."
640
667
  },
641
668
  "anthropic/claude-3.7-sonnet": {
642
- "description": "Claude 3.7 Sonnet is Anthropic's most advanced model to date and the first hybrid reasoning model on the market. Claude 3.7 Sonnet can generate near-instant responses or extended step-by-step reasoning, allowing users to clearly observe these processes. Sonnet excels particularly in programming, data science, visual processing, and agent tasks."
669
+ "description": "Claude 3.7 Sonnet is the first hybrid reasoning model and Anthropic's smartest model to date. It offers state-of-the-art performance in coding, content generation, data analysis, and planning tasks, building on the software engineering and computer usage capabilities of its predecessor Claude 3.5 Sonnet."
643
670
  },
644
671
  "anthropic/claude-opus-4": {
645
- "description": "Claude Opus 4 is Anthropic's most powerful model designed for handling highly complex tasks. It excels in performance, intelligence, fluency, and comprehension."
672
+ "description": "Claude Opus 4 is Anthropic's most powerful model yet and the world's best coding model, leading on SWE-bench (72.5%) and Terminal-bench (43.2%). It provides sustained performance for long-term tasks requiring focused effort and thousands of steps, capable of continuous operation for hours—significantly extending AI agent capabilities."
673
+ },
674
+ "anthropic/claude-opus-4.1": {
675
+ "description": "Claude Opus 4.1 is a plug-and-play alternative to Opus 4, delivering excellent performance and accuracy for practical coding and agent tasks. Opus 4.1 advances state-of-the-art coding performance to 74.5% on SWE-bench Verified, handling complex multi-step problems with greater rigor and attention to detail."
646
676
  },
647
677
  "anthropic/claude-sonnet-4": {
648
- "description": "Claude Sonnet 4 can generate near-instant responses or extended step-by-step reasoning, allowing users to clearly observe these processes. API users also have fine-grained control over the model's thinking time."
678
+ "description": "Claude Sonnet 4 significantly improves upon the industry-leading capabilities of Sonnet 3.7, excelling in coding with state-of-the-art 72.7% on SWE-bench. The model balances performance and efficiency, suitable for both internal and external use cases, and offers enhanced controllability for greater command over outcomes."
649
679
  },
650
680
  "ascend-tribe/pangu-pro-moe": {
651
681
  "description": "Pangu-Pro-MoE 72B-A16B is a sparse large language model with 72 billion parameters and 16 billion activated parameters. It is based on the Group Mixture of Experts (MoGE) architecture, which groups experts during the expert selection phase and constrains tokens to activate an equal number of experts within each group, achieving expert load balancing and significantly improving deployment efficiency on the Ascend platform."
@@ -797,6 +827,18 @@
797
827
  "cohere/Cohere-command-r-plus": {
798
828
  "description": "Command R+ is a state-of-the-art RAG-optimized model designed to handle enterprise-level workloads."
799
829
  },
830
+ "cohere/command-a": {
831
+ "description": "Command A is Cohere's most powerful model to date, excelling in tool use, agents, retrieval-augmented generation (RAG), and multilingual use cases. With a context length of 256K, it runs on just two GPUs and achieves 150% higher throughput compared to Command R+ 08-2024."
832
+ },
833
+ "cohere/command-r": {
834
+ "description": "Command R is a large language model optimized for conversational interactions and long-context tasks. Positioned in the \"scalable\" category, it balances high performance and strong accuracy, enabling companies to move beyond proof of concept into production."
835
+ },
836
+ "cohere/command-r-plus": {
837
+ "description": "Command R+ is Cohere's latest large language model optimized for conversational interactions and long-context tasks. It aims for exceptional performance, enabling companies to transition from proof of concept to production."
838
+ },
839
+ "cohere/embed-v4.0": {
840
+ "description": "A model that enables classification or embedding transformation of text, images, or mixed content."
841
+ },
800
842
  "command": {
801
843
  "description": "An instruction-following dialogue model that delivers high quality and reliability in language tasks, with a longer context length compared to our base generation models."
802
844
  },
@@ -975,7 +1017,7 @@
975
1017
  "description": "DeepSeek-V3.1 is a large hybrid reasoning model supporting 128K long context and efficient mode switching, delivering outstanding performance and speed in tool invocation, code generation, and complex reasoning tasks."
976
1018
  },
977
1019
  "deepseek/deepseek-r1": {
978
- "description": "DeepSeek-R1 significantly enhances model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first provides a chain of thought to improve the accuracy of the final response."
1020
+ "description": "The DeepSeek R1 model has undergone minor version upgrades, currently at DeepSeek-R1-0528. The latest update significantly enhances inference depth and capability by leveraging increased compute resources and post-training algorithmic optimizations. The model performs excellently on benchmarks in mathematics, programming, and general logic, with overall performance approaching leading models like O3 and Gemini 2.5 Pro."
979
1021
  },
980
1022
  "deepseek/deepseek-r1-0528": {
981
1023
  "description": "DeepSeek-R1 greatly improves model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first generates a chain of thought to enhance answer accuracy."
@@ -984,7 +1026,7 @@
984
1026
  "description": "DeepSeek-R1 greatly improves model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first generates a chain of thought to enhance answer accuracy."
985
1027
  },
986
1028
  "deepseek/deepseek-r1-distill-llama-70b": {
987
- "description": "DeepSeek R1 Distill Llama 70B is a large language model based on Llama3.3 70B, which achieves competitive performance comparable to large cutting-edge models by utilizing fine-tuning from DeepSeek R1 outputs."
1029
+ "description": "DeepSeek-R1-Distill-Llama-70B is a distilled, more efficient variant of the 70B Llama model. It maintains strong performance on text generation tasks while reducing computational overhead for easier deployment and research. Served by Groq using its custom Language Processing Unit (LPU) hardware for fast, efficient inference."
988
1030
  },
989
1031
  "deepseek/deepseek-r1-distill-llama-8b": {
990
1032
  "description": "DeepSeek R1 Distill Llama 8B is a distilled large language model based on Llama-3.1-8B-Instruct, trained using outputs from DeepSeek R1."
@@ -1002,7 +1044,10 @@
1002
1044
  "description": "DeepSeek-R1 significantly enhances model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first provides a chain of thought to improve the accuracy of the final response."
1003
1045
  },
1004
1046
  "deepseek/deepseek-v3": {
1005
- "description": "DeepSeek-V3 has achieved a significant breakthrough in inference speed compared to previous models. It ranks first among open-source models and can compete with the world's most advanced closed-source models. DeepSeek-V3 employs Multi-Head Latent Attention (MLA) and DeepSeekMoE architectures, which have been thoroughly validated in DeepSeek-V2. Additionally, DeepSeek-V3 introduces an auxiliary lossless strategy for load balancing and sets multi-label prediction training objectives for enhanced performance."
1047
+ "description": "A fast, general-purpose large language model with enhanced reasoning capabilities."
1048
+ },
1049
+ "deepseek/deepseek-v3.1-base": {
1050
+ "description": "DeepSeek V3.1 Base is an improved version of the DeepSeek V3 model."
1006
1051
  },
1007
1052
  "deepseek/deepseek-v3/community": {
1008
1053
  "description": "DeepSeek-V3 has achieved a significant breakthrough in inference speed compared to previous models. It ranks first among open-source models and can compete with the world's most advanced closed-source models. DeepSeek-V3 employs Multi-Head Latent Attention (MLA) and DeepSeekMoE architectures, which have been thoroughly validated in DeepSeek-V2. Additionally, DeepSeek-V3 introduces an auxiliary lossless strategy for load balancing and sets multi-label prediction training objectives for enhanced performance."
@@ -1430,18 +1475,27 @@
1430
1475
  "glm-zero-preview": {
1431
1476
  "description": "GLM-Zero-Preview possesses strong complex reasoning abilities, excelling in logical reasoning, mathematics, programming, and other fields."
1432
1477
  },
1478
+ "google/gemini-2.0-flash": {
1479
+ "description": "Gemini 2.0 Flash offers next-generation features and improvements, including exceptional speed, built-in tool usage, multimodal generation, and a 1 million token context window."
1480
+ },
1433
1481
  "google/gemini-2.0-flash-001": {
1434
1482
  "description": "Gemini 2.0 Flash offers next-generation features and improvements, including exceptional speed, native tool usage, multimodal generation, and a 1M token context window."
1435
1483
  },
1436
1484
  "google/gemini-2.0-flash-exp:free": {
1437
1485
  "description": "Gemini 2.0 Flash Experimental is Google's latest experimental multimodal AI model, showing a quality improvement compared to historical versions, especially in world knowledge, code, and long context."
1438
1486
  },
1487
+ "google/gemini-2.0-flash-lite": {
1488
+ "description": "Gemini 2.0 Flash Lite provides next-generation features and improvements, including exceptional speed, built-in tool usage, multimodal generation, and a 1 million token context window."
1489
+ },
1439
1490
  "google/gemini-2.5-flash": {
1440
- "description": "Gemini 2.5 Flash is Google's most advanced flagship model, designed for advanced reasoning, coding, mathematics, and scientific tasks. It features built-in \"thinking\" capabilities, enabling it to provide responses with higher accuracy and more nuanced contextual understanding.\n\nNote: This model has two variants: thinking and non-thinking. Output pricing varies significantly depending on whether the thinking capability is activated. If you choose the standard variant (without the \":thinking\" suffix), the model will explicitly avoid generating thinking tokens.\n\nTo leverage the thinking capability and receive thinking tokens, you must select the \":thinking\" variant, which incurs higher pricing for thinking outputs.\n\nAdditionally, Gemini 2.5 Flash can be configured via the \"max tokens for reasoning\" parameter, as detailed in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning)."
1491
+ "description": "Gemini 2.5 Flash is a thoughtful model delivering excellent comprehensive capabilities. It is designed to balance price and performance, supporting multimodal inputs and a 1 million token context window."
1441
1492
  },
1442
1493
  "google/gemini-2.5-flash-image-preview": {
1443
1494
  "description": "Gemini 2.5 Flash experimental model, supporting image generation."
1444
1495
  },
1496
+ "google/gemini-2.5-flash-lite": {
1497
+ "description": "Gemini 2.5 Flash-Lite is a balanced, low-latency model with configurable reasoning budget and tool connectivity (e.g., Google Search grounding and code execution). It supports multimodal inputs and offers a 1 million token context window."
1498
+ },
1445
1499
  "google/gemini-2.5-flash-preview": {
1446
1500
  "description": "Gemini 2.5 Flash is Google's most advanced flagship model, designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in 'thinking' capabilities that allow it to provide responses with higher accuracy and detailed context handling.\n\nNote: This model has two variants: thinking and non-thinking. Output pricing varies significantly based on whether the thinking capability is activated. If you choose the standard variant (without the ':thinking' suffix), the model will explicitly avoid generating thinking tokens.\n\nTo leverage the thinking capability and receive thinking tokens, you must select the ':thinking' variant, which will incur higher thinking output pricing.\n\nAdditionally, Gemini 2.5 Flash can be configured via the 'maximum tokens for reasoning' parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning)."
1447
1501
  },
@@ -1449,11 +1503,14 @@
1449
1503
  "description": "Gemini 2.5 Flash is Google's most advanced flagship model, designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in 'thinking' capabilities that allow it to provide responses with higher accuracy and detailed context handling.\n\nNote: This model has two variants: thinking and non-thinking. Output pricing varies significantly based on whether the thinking capability is activated. If you choose the standard variant (without the ':thinking' suffix), the model will explicitly avoid generating thinking tokens.\n\nTo leverage the thinking capability and receive thinking tokens, you must select the ':thinking' variant, which will incur higher thinking output pricing.\n\nAdditionally, Gemini 2.5 Flash can be configured via the 'maximum tokens for reasoning' parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning)."
1450
1504
  },
1451
1505
  "google/gemini-2.5-pro": {
1452
- "description": "Gemini 2.5 Pro is Google's most advanced thinking model, capable of reasoning through complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using long-context processing."
1506
+ "description": "Gemini 2.5 Pro is our most advanced reasoning Gemini model, capable of solving complex problems. It features a 2 million token context window and supports multimodal inputs including text, images, audio, video, and PDF documents."
1453
1507
  },
1454
1508
  "google/gemini-2.5-pro-preview": {
1455
1509
  "description": "Gemini 2.5 Pro Preview is Google's most advanced thinking model, capable of reasoning through complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using extended context."
1456
1510
  },
1511
+ "google/gemini-embedding-001": {
1512
+ "description": "A state-of-the-art embedding model delivering excellent performance on English, multilingual, and code tasks."
1513
+ },
1457
1514
  "google/gemini-flash-1.5": {
1458
1515
  "description": "Gemini 1.5 Flash offers optimized multimodal processing capabilities, suitable for various complex task scenarios."
1459
1516
  },
@@ -1490,6 +1547,12 @@
1490
1547
  "google/gemma-3-27b-it": {
1491
1548
  "description": "Gemma 3 27B is an open-source language model from Google that sets new standards in efficiency and performance."
1492
1549
  },
1550
+ "google/text-embedding-005": {
1551
+ "description": "An English-focused text embedding model optimized for code and English language tasks."
1552
+ },
1553
+ "google/text-multilingual-embedding-002": {
1554
+ "description": "A multilingual text embedding model optimized for cross-lingual tasks, supporting multiple languages."
1555
+ },
1493
1556
  "gpt-3.5-turbo": {
1494
1557
  "description": "GPT 3.5 Turbo is suitable for various text generation and understanding tasks. Currently points to gpt-3.5-turbo-0125."
1495
1558
  },
@@ -1781,6 +1844,9 @@
1781
1844
  "imagen-4.0-ultra-generate-preview-06-06": {
1782
1845
  "description": "Imagen 4th generation text-to-image model series Ultra version"
1783
1846
  },
1847
+ "inception/mercury-coder-small": {
1848
+ "description": "Mercury Coder Small is ideal for code generation, debugging, and refactoring tasks, offering minimal latency."
1849
+ },
1784
1850
  "inclusionAI/Ling-mini-2.0": {
1785
1851
  "description": "Ling-mini-2.0 is a compact, high-performance large language model based on the MoE architecture. It has 16 billion total parameters, but only activates 1.4 billion per token (non-embedding 789 million), enabling extremely fast generation speeds. Thanks to its efficient MoE design and large-scale, high-quality training data, Ling-mini-2.0 delivers top-tier performance on downstream tasks comparable to dense LLMs under 10 billion parameters and even larger MoE models, despite having only 1.4 billion activated parameters."
1786
1852
  },
@@ -2057,30 +2123,63 @@
2057
2123
  "meta/Meta-Llama-3.1-8B-Instruct": {
2058
2124
  "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
2059
2125
  },
2126
+ "meta/llama-3-70b": {
2127
+ "description": "A 70 billion parameter open-source model finely tuned by Meta for instruction following. Served by Groq using its custom Language Processing Unit (LPU) hardware for fast, efficient inference."
2128
+ },
2129
+ "meta/llama-3-8b": {
2130
+ "description": "An 8 billion parameter open-source model finely tuned by Meta for instruction following. Served by Groq using its custom Language Processing Unit (LPU) hardware for fast, efficient inference."
2131
+ },
2060
2132
  "meta/llama-3.1-405b-instruct": {
2061
2133
  "description": "An advanced LLM supporting synthetic data generation, knowledge distillation, and reasoning, suitable for chatbots, programming, and domain-specific tasks."
2062
2134
  },
2135
+ "meta/llama-3.1-70b": {
2136
+ "description": "An updated version of Meta Llama 3 70B Instruct, featuring extended 128K context length, multilingual support, and improved reasoning capabilities."
2137
+ },
2063
2138
  "meta/llama-3.1-70b-instruct": {
2064
2139
  "description": "Empowering complex conversations with exceptional context understanding, reasoning capabilities, and text generation abilities."
2065
2140
  },
2141
+ "meta/llama-3.1-8b": {
2142
+ "description": "Llama 3.1 8B supports a 128K context window, making it ideal for real-time conversational interfaces and data analysis, while offering significant cost savings compared to larger models. Served by Groq using its custom Language Processing Unit (LPU) hardware for fast, efficient inference."
2143
+ },
2066
2144
  "meta/llama-3.1-8b-instruct": {
2067
2145
  "description": "An advanced cutting-edge model with language understanding, excellent reasoning capabilities, and text generation abilities."
2068
2146
  },
2147
+ "meta/llama-3.2-11b": {
2148
+ "description": "Instruction-tuned image reasoning generation model (text + image input / text output), optimized for visual recognition, image reasoning, captioning, and answering general questions about images."
2149
+ },
2069
2150
  "meta/llama-3.2-11b-vision-instruct": {
2070
2151
  "description": "A state-of-the-art vision-language model adept at high-quality reasoning from images."
2071
2152
  },
2153
+ "meta/llama-3.2-1b": {
2154
+ "description": "Text-only model supporting on-device use cases such as multilingual local knowledge retrieval, summarization, and rewriting."
2155
+ },
2072
2156
  "meta/llama-3.2-1b-instruct": {
2073
2157
  "description": "A cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
2074
2158
  },
2159
+ "meta/llama-3.2-3b": {
2160
+ "description": "Text-only model carefully tuned to support on-device use cases such as multilingual local knowledge retrieval, summarization, and rewriting."
2161
+ },
2075
2162
  "meta/llama-3.2-3b-instruct": {
2076
2163
  "description": "A cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
2077
2164
  },
2165
+ "meta/llama-3.2-90b": {
2166
+ "description": "Instruction-tuned image reasoning generation model (text + image input / text output), optimized for visual recognition, image reasoning, captioning, and answering general questions about images."
2167
+ },
2078
2168
  "meta/llama-3.2-90b-vision-instruct": {
2079
2169
  "description": "A state-of-the-art vision-language model adept at high-quality reasoning from images."
2080
2170
  },
2171
+ "meta/llama-3.3-70b": {
2172
+ "description": "The perfect blend of performance and efficiency. This model supports high-performance conversational AI, designed for content creation, enterprise applications, and research, offering advanced language understanding capabilities including text summarization, classification, sentiment analysis, and code generation."
2173
+ },
2081
2174
  "meta/llama-3.3-70b-instruct": {
2082
2175
  "description": "An advanced LLM skilled in reasoning, mathematics, common sense, and function calling."
2083
2176
  },
2177
+ "meta/llama-4-maverick": {
2178
+ "description": "The Llama 4 model family consists of native multimodal AI models supporting text and multimodal experiences. These models leverage a Mixture of Experts architecture to deliver industry-leading performance in text and image understanding. Llama 4 Maverick, a 17 billion parameter model with 128 experts, is served by DeepInfra."
2179
+ },
2180
+ "meta/llama-4-scout": {
2181
+ "description": "The Llama 4 model family consists of native multimodal AI models supporting text and multimodal experiences. These models leverage a Mixture of Experts architecture to deliver industry-leading performance in text and image understanding. Llama 4 Scout, a 17 billion parameter model with 16 experts, is served by DeepInfra."
2182
+ },
2084
2183
  "microsoft/Phi-3-medium-128k-instruct": {
2085
2184
  "description": "The same Phi-3-medium model but with a larger context size, suitable for RAG or few-shot prompting."
2086
2185
  },
@@ -2156,6 +2255,48 @@
2156
2255
  "mistral-small-latest": {
2157
2256
  "description": "Mistral Small is a cost-effective, fast, and reliable option suitable for use cases such as translation, summarization, and sentiment analysis."
2158
2257
  },
2258
+ "mistral/codestral": {
2259
+ "description": "Mistral Codestral 25.01 is a state-of-the-art coding model optimized for low-latency, high-frequency use cases. Proficient in over 80 programming languages, it excels at fill-in-the-middle (FIM), code correction, and test generation tasks."
2260
+ },
2261
+ "mistral/codestral-embed": {
2262
+ "description": "A code embedding model that can be embedded into code databases and repositories to support coding assistants."
2263
+ },
2264
+ "mistral/devstral-small": {
2265
+ "description": "Devstral is an agent large language model for software engineering tasks, making it an excellent choice for software engineering agents."
2266
+ },
2267
+ "mistral/magistral-medium": {
2268
+ "description": "Complex thinking supported by deep understanding, featuring transparent reasoning you can follow and verify. This model maintains high-fidelity reasoning across many languages, even when switching languages mid-task."
2269
+ },
2270
+ "mistral/magistral-small": {
2271
+ "description": "Complex thinking supported by deep understanding, featuring transparent reasoning you can follow and verify. This model maintains high-fidelity reasoning across many languages, even when switching languages mid-task."
2272
+ },
2273
+ "mistral/ministral-3b": {
2274
+ "description": "A compact, efficient model for on-device tasks such as intelligent assistants and local analytics, providing low-latency performance."
2275
+ },
2276
+ "mistral/ministral-8b": {
2277
+ "description": "A more powerful model with faster, memory-efficient inference, ideal for complex workflows and demanding edge applications."
2278
+ },
2279
+ "mistral/mistral-embed": {
2280
+ "description": "A general-purpose text embedding model for semantic search, similarity, clustering, and RAG workflows."
2281
+ },
2282
+ "mistral/mistral-large": {
2283
+ "description": "Mistral Large is ideal for complex tasks requiring large-scale reasoning capabilities or high specialization—such as synthetic text generation, code generation, RAG, or agents."
2284
+ },
2285
+ "mistral/mistral-saba-24b": {
2286
+ "description": "Mistral Saba 24B is a 24 billion parameter open-source model developed by Mistral.ai. Saba is a specialized model trained to excel in Arabic, Persian, Urdu, Hebrew, and Indian languages. Served by Groq using its custom Language Processing Unit (LPU) hardware for fast, efficient inference."
2287
+ },
2288
+ "mistral/mistral-small": {
2289
+ "description": "Mistral Small is ideal for simple tasks that can be batched—such as classification, customer support, or text generation. It delivers excellent performance at an affordable price point."
2290
+ },
2291
+ "mistral/mixtral-8x22b-instruct": {
2292
+ "description": "8x22b Instruct model. 8x22b is a Mixture of Experts open-source model served by Mistral."
2293
+ },
2294
+ "mistral/pixtral-12b": {
2295
+ "description": "A 12B model with image understanding capabilities as well as text."
2296
+ },
2297
+ "mistral/pixtral-large": {
2298
+ "description": "Pixtral Large is the second model in our multimodal family, demonstrating cutting-edge image understanding. Specifically, it can comprehend documents, charts, and natural images while maintaining the leading text understanding capabilities of Mistral Large 2."
2299
+ },
2159
2300
  "mistralai/Mistral-7B-Instruct-v0.1": {
2160
2301
  "description": "Mistral (7B) Instruct is known for its high performance, suitable for various language tasks."
2161
2302
  },
@@ -2222,12 +2363,21 @@
2222
2363
  "moonshotai/Kimi-K2-Instruct-0905": {
2223
2364
  "description": "Kimi K2-Instruct-0905 is the latest and most powerful version of Kimi K2. It is a top-tier Mixture of Experts (MoE) language model with a total of 1 trillion parameters and 32 billion activated parameters. Key features of this model include enhanced agent coding intelligence, demonstrating significant performance improvements in public benchmark tests and real-world agent coding tasks; and an improved frontend coding experience, with advancements in both aesthetics and practicality for frontend programming."
2224
2365
  },
2366
+ "moonshotai/kimi-k2": {
2367
+ "description": "Kimi K2 is a large-scale Mixture of Experts (MoE) language model developed by Moonshot AI, with a total of 1 trillion parameters and 32 billion active parameters per forward pass. It is optimized for agent capabilities, including advanced tool use, reasoning, and code synthesis."
2368
+ },
2225
2369
  "moonshotai/kimi-k2-0905": {
2226
2370
  "description": "The kimi-k2-0905-preview model has a context length of 256k, featuring stronger Agentic Coding capabilities, more outstanding aesthetics and practicality of frontend code, and better context understanding."
2227
2371
  },
2228
2372
  "moonshotai/kimi-k2-instruct-0905": {
2229
2373
  "description": "The kimi-k2-0905-preview model has a context length of 256k, featuring stronger Agentic Coding capabilities, more outstanding aesthetics and practicality of frontend code, and better context understanding."
2230
2374
  },
2375
+ "morph/morph-v3-fast": {
2376
+ "description": "Morph offers a specialized AI model that applies code changes suggested by cutting-edge models like Claude or GPT-4o to your existing code files FAST - 4500+ tokens/second. It acts as the final step in the AI coding workflow. Supports 16k input tokens and 16k output tokens."
2377
+ },
2378
+ "morph/morph-v3-large": {
2379
+ "description": "Morph offers a specialized AI model that applies code changes suggested by cutting-edge models like Claude or GPT-4o to your existing code files FAST - 2500+ tokens/second. It acts as the final step in the AI coding workflow. Supports 16k input tokens and 16k output tokens."
2380
+ },
2231
2381
  "nousresearch/hermes-2-pro-llama-3-8b": {
2232
2382
  "description": "Hermes 2 Pro Llama 3 8B is an upgraded version of Nous Hermes 2, featuring the latest internally developed datasets."
2233
2383
  },
@@ -2294,29 +2444,47 @@
2294
2444
  "open-mixtral-8x7b": {
2295
2445
  "description": "Mixtral 8x7B is a sparse expert model that leverages multiple parameters to enhance reasoning speed, suitable for handling multilingual and code generation tasks."
2296
2446
  },
2447
+ "openai/gpt-3.5-turbo": {
2448
+ "description": "OpenAI's most capable and cost-effective model in the GPT-3.5 series, optimized for chat purposes but also performing well on traditional completion tasks."
2449
+ },
2450
+ "openai/gpt-3.5-turbo-instruct": {
2451
+ "description": "Capabilities similar to GPT-3 era models. Compatible with traditional completion endpoints rather than chat completion endpoints."
2452
+ },
2453
+ "openai/gpt-4-turbo": {
2454
+ "description": "OpenAI's gpt-4-turbo features broad general knowledge and domain expertise, enabling it to follow complex natural language instructions and accurately solve difficult problems. Its knowledge cutoff is April 2023, with a 128,000 token context window."
2455
+ },
2297
2456
  "openai/gpt-4.1": {
2298
- "description": "GPT-4.1 is our flagship model for complex tasks. It is particularly well-suited for cross-domain problem solving."
2457
+ "description": "GPT 4.1 is OpenAI's flagship model, suited for complex tasks. It excels at cross-domain problem solving."
2299
2458
  },
2300
2459
  "openai/gpt-4.1-mini": {
2301
- "description": "GPT-4.1 mini strikes a balance between intelligence, speed, and cost, making it an attractive model for many use cases."
2460
+ "description": "GPT 4.1 mini balances intelligence, speed, and cost, making it an attractive model for many use cases."
2302
2461
  },
2303
2462
  "openai/gpt-4.1-nano": {
2304
- "description": "GPT-4.1 nano is the fastest and most cost-effective version of the GPT-4.1 model."
2463
+ "description": "GPT-4.1 nano is the fastest and most cost-effective GPT 4.1 model."
2305
2464
  },
2306
2465
  "openai/gpt-4o": {
2307
- "description": "ChatGPT-4o is a dynamic model that updates in real-time to maintain the latest version. It combines powerful language understanding and generation capabilities, suitable for large-scale application scenarios, including customer service, education, and technical support."
2466
+ "description": "GPT-4o from OpenAI has broad general knowledge and domain expertise, capable of following complex natural language instructions and accurately solving challenging problems. It matches GPT-4 Turbo's performance with a faster, cheaper API."
2308
2467
  },
2309
2468
  "openai/gpt-4o-mini": {
2310
- "description": "GPT-4o mini is the latest model released by OpenAI following GPT-4 Omni, supporting both text and image input while outputting text. As their most advanced small model, it is significantly cheaper than other recent cutting-edge models and over 60% cheaper than GPT-3.5 Turbo. It maintains state-of-the-art intelligence while offering remarkable cost-effectiveness. GPT-4o mini scored 82% on the MMLU test and currently ranks higher than GPT-4 in chat preferences."
2469
+ "description": "GPT-4o mini from OpenAI is their most advanced and cost-effective small model. It is multimodal (accepting text or image inputs and outputting text) and more intelligent than gpt-3.5-turbo, while maintaining similar speed."
2470
+ },
2471
+ "openai/gpt-5": {
2472
+ "description": "GPT-5 is OpenAI's flagship language model, excelling in complex reasoning, extensive real-world knowledge, code-intensive, and multi-step agent tasks."
2473
+ },
2474
+ "openai/gpt-5-mini": {
2475
+ "description": "GPT-5 mini is a cost-optimized model performing well on reasoning/chat tasks. It offers the best balance of speed, cost, and capability."
2476
+ },
2477
+ "openai/gpt-5-nano": {
2478
+ "description": "GPT-5 nano is a high-throughput model excelling at simple instruction or classification tasks."
2311
2479
  },
2312
2480
  "openai/gpt-oss-120b": {
2313
- "description": "OpenAI GPT-OSS 120B is a top-tier language model with 120 billion parameters, featuring built-in browser search and code execution capabilities, along with strong reasoning skills."
2481
+ "description": "An extremely capable general-purpose large language model with powerful, controllable reasoning abilities."
2314
2482
  },
2315
2483
  "openai/gpt-oss-20b": {
2316
- "description": "OpenAI GPT-OSS 20B is a top-tier language model with 20 billion parameters, featuring built-in browser search and code execution capabilities, along with strong reasoning skills."
2484
+ "description": "A compact, open-source weighted language model optimized for low latency and resource-constrained environments, including local and edge deployments."
2317
2485
  },
2318
2486
  "openai/o1": {
2319
- "description": "o1 is OpenAI's new reasoning model that supports multimodal input and outputs text, suitable for complex tasks requiring broad general knowledge. This model features a 200K context window and a knowledge cutoff date of October 2023."
2487
+ "description": "OpenAI's o1 is a flagship reasoning model designed for complex problems requiring deep thought. It provides strong reasoning capabilities and higher accuracy for complex multi-step tasks."
2320
2488
  },
2321
2489
  "openai/o1-mini": {
2322
2490
  "description": "o1-mini is a fast and cost-effective reasoning model designed for programming, mathematics, and scientific applications. This model features a 128K context and has a knowledge cutoff date of October 2023."
@@ -2325,23 +2493,44 @@
2325
2493
  "description": "o1 is OpenAI's new reasoning model, suitable for complex tasks that require extensive general knowledge. This model features a 128K context and has a knowledge cutoff date of October 2023."
2326
2494
  },
2327
2495
  "openai/o3": {
2328
- "description": "O3 is a versatile and powerful model that excels in multiple domains. It sets a new benchmark for tasks in mathematics, science, programming, and visual reasoning. It is also proficient in technical writing and following instructions. Users can leverage it to analyze text, code, and images, solving complex problems that require multiple steps."
2496
+ "description": "OpenAI's o3 is the most powerful reasoning model, setting new state-of-the-art levels in coding, mathematics, science, and visual perception. It excels at complex queries requiring multifaceted analysis, with special strengths in analyzing images, charts, and graphs."
2329
2497
  },
2330
2498
  "openai/o3-mini": {
2331
- "description": "O3-mini delivers high intelligence at the same cost and latency targets as o1-mini."
2499
+ "description": "o3-mini is OpenAI's latest small reasoning model, delivering high intelligence at the same cost and latency targets as o1-mini."
2332
2500
  },
2333
2501
  "openai/o3-mini-high": {
2334
2502
  "description": "O3-mini high inference level version provides high intelligence at the same cost and latency targets as o1-mini."
2335
2503
  },
2336
2504
  "openai/o4-mini": {
2337
- "description": "o4-mini is optimized for fast and efficient inference, demonstrating high efficiency and performance in coding and visual tasks."
2505
+ "description": "OpenAI's o4-mini offers fast, cost-effective reasoning with excellent performance for its size, especially in mathematics (best in AIME benchmark), coding, and visual tasks."
2338
2506
  },
2339
2507
  "openai/o4-mini-high": {
2340
2508
  "description": "o4-mini high inference level version, optimized for fast and efficient inference, demonstrating high efficiency and performance in coding and visual tasks."
2341
2509
  },
2510
+ "openai/text-embedding-3-large": {
2511
+ "description": "OpenAI's most capable embedding model, suitable for English and non-English tasks."
2512
+ },
2513
+ "openai/text-embedding-3-small": {
2514
+ "description": "OpenAI's improved, higher-performance version of the ada embedding model."
2515
+ },
2516
+ "openai/text-embedding-ada-002": {
2517
+ "description": "OpenAI's traditional text embedding model."
2518
+ },
2342
2519
  "openrouter/auto": {
2343
2520
  "description": "Based on context length, topic, and complexity, your request will be sent to Llama 3 70B Instruct, Claude 3.5 Sonnet (self-regulating), or GPT-4o."
2344
2521
  },
2522
+ "perplexity/sonar": {
2523
+ "description": "Perplexity's lightweight product with search grounding capabilities, faster and cheaper than Sonar Pro."
2524
+ },
2525
+ "perplexity/sonar-pro": {
2526
+ "description": "Perplexity's flagship product with search grounding capabilities, supporting advanced queries and follow-up actions."
2527
+ },
2528
+ "perplexity/sonar-reasoning": {
2529
+ "description": "A reasoning-focused model that outputs chain-of-thought (CoT) in responses, providing detailed explanations with search grounding."
2530
+ },
2531
+ "perplexity/sonar-reasoning-pro": {
2532
+ "description": "An advanced reasoning-focused model that outputs chain-of-thought (CoT) in responses, offering comprehensive explanations with enhanced search capabilities and multiple search queries per request."
2533
+ },
2345
2534
  "phi3": {
2346
2535
  "description": "Phi-3 is a lightweight open model launched by Microsoft, suitable for efficient integration and large-scale knowledge reasoning."
2347
2536
  },
@@ -2804,6 +2993,12 @@
2804
2993
  "v0-1.5-md": {
2805
2994
  "description": "The v0-1.5-md model is suitable for everyday tasks and user interface (UI) generation."
2806
2995
  },
2996
+ "vercel/v0-1.0-md": {
2997
+ "description": "Access the model behind v0 to generate, fix, and optimize modern web applications, with framework-specific reasoning and up-to-date knowledge."
2998
+ },
2999
+ "vercel/v0-1.5-md": {
3000
+ "description": "Access the model behind v0 to generate, fix, and optimize modern web applications, with framework-specific reasoning and up-to-date knowledge."
3001
+ },
2807
3002
  "wan2.2-t2i-flash": {
2808
3003
  "description": "Wanxiang 2.2 Flash version, the latest model currently available. Fully upgraded in creativity, stability, and realism, with fast generation speed and high cost-effectiveness."
2809
3004
  },
@@ -2834,6 +3029,27 @@
2834
3029
  "x1": {
2835
3030
  "description": "The Spark X1 model will undergo further upgrades, achieving results in reasoning, text generation, and language understanding tasks that match OpenAI o1 and DeepSeek R1, building on its leading position in domestic mathematical tasks."
2836
3031
  },
3032
+ "xai/grok-2": {
3033
+ "description": "Grok 2 is a cutting-edge language model with state-of-the-art reasoning capabilities. It excels in chat, coding, and reasoning, outperforming Claude 3.5 Sonnet and GPT-4-Turbo on the LMSYS leaderboard."
3034
+ },
3035
+ "xai/grok-2-vision": {
3036
+ "description": "Grok 2 Vision model excels at vision-based tasks, delivering state-of-the-art performance in visual math reasoning (MathVista) and document-based question answering (DocVQA). It can process various visual information including documents, charts, graphs, screenshots, and photos."
3037
+ },
3038
+ "xai/grok-3": {
3039
+ "description": "xAI's flagship model, excelling in enterprise use cases such as data extraction, coding, and text summarization. It has deep domain knowledge in finance, healthcare, legal, and scientific fields."
3040
+ },
3041
+ "xai/grok-3-fast": {
3042
+ "description": "xAI's flagship model excelling in enterprise use cases like data extraction, coding, and text summarization. The fast variant is served on faster infrastructure, providing much quicker response times at the cost of higher per-token output expenses."
3043
+ },
3044
+ "xai/grok-3-mini": {
3045
+ "description": "xAI's lightweight model that thinks before responding. Ideal for simple or logic-based tasks that do not require deep domain knowledge. Raw thought traces are accessible."
3046
+ },
3047
+ "xai/grok-3-mini-fast": {
3048
+ "description": "xAI's lightweight model that thinks before responding. Ideal for simple or logic-based tasks that do not require deep domain knowledge. Raw thought traces are accessible. The fast variant is served on faster infrastructure, providing much quicker response times at the cost of higher per-token output expenses."
3049
+ },
3050
+ "xai/grok-4": {
3051
+ "description": "xAI's latest and greatest flagship model, delivering unparalleled performance in natural language, mathematics, and reasoning—an ideal all-rounder."
3052
+ },
2837
3053
  "yi-1.5-34b-chat": {
2838
3054
  "description": "Yi-1.5 is an upgraded version of Yi. It continues pre-training on Yi using a high-quality corpus of 500B tokens and is fine-tuned on 3M diverse samples."
2839
3055
  },
@@ -2881,5 +3097,14 @@
2881
3097
  },
2882
3098
  "zai-org/GLM-4.5V": {
2883
3099
  "description": "GLM-4.5V is the latest-generation vision-language model (VLM) released by Zhipu AI. It is built on the flagship text model GLM-4.5-Air, which has 106B total parameters and 12B active parameters, and adopts a Mixture-of-Experts (MoE) architecture to deliver outstanding performance at reduced inference cost. Technically, GLM-4.5V continues the trajectory of GLM-4.1V-Thinking and introduces innovations such as three-dimensional rotary position encoding (3D-RoPE), significantly improving perception and reasoning of three-dimensional spatial relationships. Through optimizations across pretraining, supervised fine-tuning, and reinforcement learning stages, the model can handle a wide range of visual content including images, video, and long documents, and has achieved top-tier performance among comparable open-source models across 41 public multimodal benchmarks. The model also adds a \"Thinking Mode\" toggle that lets users flexibly choose between fast responses and deep reasoning to balance efficiency and effectiveness."
3100
+ },
3101
+ "zai/glm-4.5": {
3102
+ "description": "The GLM-4.5 series models are foundational models specifically designed for agents. The flagship GLM-4.5 integrates 355 billion total parameters (32 billion active), unifying reasoning, coding, and agent capabilities to address complex application needs. As a hybrid reasoning system, it offers dual operating modes."
3103
+ },
3104
+ "zai/glm-4.5-air": {
3105
+ "description": "GLM-4.5 and GLM-4.5-Air are our latest flagship models, specifically designed as foundational models for agent applications. Both utilize a Mixture of Experts (MoE) architecture. GLM-4.5 has 355 billion total parameters with 32 billion active per forward pass, while GLM-4.5-Air features a streamlined design with 106 billion total parameters and 12 billion active."
3106
+ },
3107
+ "zai/glm-4.5v": {
3108
+ "description": "GLM-4.5V is built on the GLM-4.5-Air foundational model, inheriting the proven techniques of GLM-4.1V-Thinking while achieving efficient scaling through a powerful 106 billion parameter MoE architecture."
2884
3109
  }
2885
3110
  }
@@ -161,6 +161,9 @@
161
161
  "v0": {
162
162
  "description": "v0 is a pair programming assistant that generates code and user interfaces (UI) for your projects based on your natural language descriptions."
163
163
  },
164
+ "vercelaigateway": {
165
+ "description": "Vercel AI Gateway provides a unified API to access over 100 models, allowing you to use models from multiple providers such as OpenAI, Anthropic, and Google through a single endpoint. It supports budget settings, usage monitoring, request load balancing, and failover."
166
+ },
164
167
  "vertexai": {
165
168
  "description": "Google's Gemini series is its most advanced and versatile AI model, developed by Google DeepMind. It is designed for multimodal use, supporting seamless understanding and processing of text, code, images, audio, and video. Suitable for a variety of environments, from data centers to mobile devices, it significantly enhances the efficiency and applicability of AI models."
166
169
  },