npm - @lobehub/chat - Versions diffs - 1.51.7 → 1.51.8 - Mend

@lobehub/chat 1.51.7 → 1.51.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/CHANGELOG.md +25 -0
package/README.ja-JP.md +8 -8
package/README.md +8 -8
package/README.zh-CN.md +8 -8
package/changelog/v1.json +9 -0
package/package.json +1 -1
package/src/app/(backend)/webapi/chat/models/[provider]/route.ts +1 -1
package/src/libs/agent-runtime/ai360/index.ts +8 -1
package/src/libs/agent-runtime/anthropic/index.ts +2 -1
package/src/libs/agent-runtime/baichuan/index.ts +1 -1
package/src/libs/agent-runtime/cloudflare/index.test.ts +0 -117
package/src/libs/agent-runtime/cloudflare/index.ts +32 -11
package/src/libs/agent-runtime/deepseek/index.ts +4 -1
package/src/libs/agent-runtime/fireworksai/index.ts +8 -1
package/src/libs/agent-runtime/giteeai/index.ts +9 -1
package/src/libs/agent-runtime/github/index.test.ts +5 -16
package/src/libs/agent-runtime/github/index.ts +31 -33
package/src/libs/agent-runtime/google/index.ts +2 -1
package/src/libs/agent-runtime/groq/index.ts +7 -1
package/src/libs/agent-runtime/higress/index.ts +2 -1
package/src/libs/agent-runtime/huggingface/index.ts +10 -1
package/src/libs/agent-runtime/hunyuan/index.ts +3 -1
package/src/libs/agent-runtime/internlm/index.ts +3 -1
package/src/libs/agent-runtime/mistral/index.ts +2 -1
package/src/libs/agent-runtime/moonshot/index.ts +3 -1
package/src/libs/agent-runtime/novita/__snapshots__/index.test.ts.snap +48 -12
package/src/libs/agent-runtime/novita/index.ts +9 -1
package/src/libs/agent-runtime/openai/__snapshots__/index.test.ts.snap +70 -66
package/src/libs/agent-runtime/openai/index.ts +37 -0
package/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap +172 -4
package/src/libs/agent-runtime/openrouter/index.ts +17 -2
package/src/libs/agent-runtime/qwen/index.ts +10 -1
package/src/libs/agent-runtime/sensenova/index.ts +3 -1
package/src/libs/agent-runtime/siliconcloud/index.ts +10 -1
package/src/libs/agent-runtime/stepfun/index.ts +3 -1
package/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap +1309 -5
package/src/libs/agent-runtime/togetherai/index.test.ts +0 -13
package/src/libs/agent-runtime/togetherai/index.ts +25 -20
package/src/libs/agent-runtime/utils/cloudflareHelpers.test.ts +0 -99
package/src/libs/agent-runtime/utils/cloudflareHelpers.ts +0 -70
package/src/libs/agent-runtime/xai/index.ts +3 -1
package/src/libs/agent-runtime/zeroone/index.ts +3 -1
package/src/libs/agent-runtime/zhipu/index.ts +3 -1

package/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap CHANGED Viewed

@@ -3,884 +3,2188 @@
 exports[`LobeTogetherAI > models > should get models 1`] = `
 [
   {
+    "contextWindowTokens": undefined,
     "description": "This model is a 75/25 merge of Chronos (13B) and Nous Hermes (13B) models resulting in having a great ability to produce evocative storywriting and follow a narrative.",
     "displayName": "Chronos Hermes (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "Austism/chronos-hermes-13b",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding",
+    "displayName": "BAAI-Bge-Base-1p5",
+    "enabled": false,
+    "functionCall": false,
+    "id": "BAAI/bge-base-en-v1.5",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding",
+    "displayName": "BAAI-Bge-Large-1p5",
+    "enabled": false,
+    "functionCall": false,
+    "id": "BAAI/bge-large-en-v1.5",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
     "description": "MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique. The main difference with MythoMix is that I allowed more of Huginn to intermingle with the single tensors located at the front and end of a model",
     "displayName": "MythoMax-L2 (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "Gryphe/MythoMax-L2-13b",
     "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Llama Guard: LLM-based Input-Output Safeguard for Human-AI Conversations",
+    "displayName": "Llama Guard (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Meta-Llama/Llama-Guard-7b",
+    "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "NexusRaven is an open-source and commercially viable function calling LLM that surpasses the state-of-the-art in function calling capabilities.",
+    "displayName": "NexusRaven (13B)",
+    "enabled": false,
+    "functionCall": true,
+    "id": "Nexusflow/NexusRaven-V2-13B",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "first Nous collection of dataset and models made by fine-tuning mostly on data created by Nous in-house",
     "displayName": "Nous Capybara v1.9 (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "NousResearch/Nous-Capybara-7B-V1p9",
     "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Nous Hermes 2 on Mistral 7B DPO is the new flagship 7B Hermes! This model was DPO'd from Teknium/OpenHermes-2.5-Mistral-7B and has improved across the board on all benchmarks tested - AGIEval, BigBench Reasoning, GPT4All, and TruthfulQA.",
     "displayName": "Nous Hermes 2 - Mistral DPO (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": 32768,
     "description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
     "displayName": "Nous Hermes 2 - Mixtral 8x7B-DPO ",
     "enabled": false,
     "functionCall": false,
     "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Nous Hermes 2 Mixtral 7bx8 SFT is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
     "displayName": "Nous Hermes 2 - Mixtral 8x7B-SFT",
     "enabled": false,
     "functionCall": false,
     "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune",
     "displayName": "Nous Hermes-2 Yi (34B)",
     "enabled": false,
     "functionCall": false,
     "id": "NousResearch/Nous-Hermes-2-Yi-34B",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
     "displayName": "Nous Hermes Llama-2 (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "NousResearch/Nous-Hermes-Llama2-13b",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Nous-Hermes-Llama2-7b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
     "displayName": "Nous Hermes LLaMA-2 (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "NousResearch/Nous-Hermes-llama-2-7b",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "An OpenOrca dataset fine-tune on top of Mistral 7B by the OpenOrca team.",
     "displayName": "OpenOrca Mistral (7B) 8K",
     "enabled": false,
     "functionCall": false,
     "id": "Open-Orca/Mistral-7B-OpenOrca",
     "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Phind-CodeLlama-34B-v1 trained on additional 1.5B tokens high-quality programming-related data proficient in Python, C/C++, TypeScript, Java, and more.",
+    "displayName": "Phind Code LLaMA v2 (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Phind/Phind-CodeLlama-34B-v2",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
     "displayName": "Qwen 1.5 Chat (0.5B)",
     "enabled": false,
     "functionCall": false,
     "id": "Qwen/Qwen1.5-0.5B-Chat",
     "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 (0.5B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-0.5B",
+    "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
     "displayName": "Qwen 1.5 Chat (1.8B)",
     "enabled": false,
     "functionCall": false,
     "id": "Qwen/Qwen1.5-1.8B-Chat",
     "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 (1.8B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-1.8B",
+    "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
     "displayName": "Qwen 1.5 Chat (110B)",
     "enabled": false,
     "functionCall": false,
     "id": "Qwen/Qwen1.5-110B-Chat",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
     "displayName": "Qwen 1.5 Chat (14B)",
     "enabled": false,
     "functionCall": false,
     "id": "Qwen/Qwen1.5-14B-Chat",
     "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 (14B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-14B",
+    "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
     "displayName": "Qwen 1.5 Chat (32B)",
     "enabled": false,
     "functionCall": false,
     "id": "Qwen/Qwen1.5-32B-Chat",
     "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 (32B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-32B",
+    "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
     "displayName": "Qwen 1.5 Chat (4B)",
     "enabled": false,
     "functionCall": false,
     "id": "Qwen/Qwen1.5-4B-Chat",
     "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 (4B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-4B",
+    "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
     "displayName": "Qwen 1.5 Chat (72B)",
     "enabled": false,
     "functionCall": false,
     "id": "Qwen/Qwen1.5-72B-Chat",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 (72B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-72B",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
     "displayName": "Qwen 1.5 Chat (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "Qwen/Qwen1.5-7B-Chat",
     "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-7B",
+    "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Fine-tune version of Stable Diffusion focused on photorealism.",
+    "displayName": "Realistic Vision 3.0",
+    "enabled": false,
+    "functionCall": false,
+    "id": "SG161222/Realistic_Vision_V3.0_VAE",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": true,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Arctic is a dense-MoE Hybrid transformer architecture pre-trained from scratch by the Snowflake AI Research Team.",
     "displayName": "Snowflake Arctic Instruct",
     "enabled": false,
     "functionCall": false,
     "id": "Snowflake/snowflake-arctic-instruct",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Re:MythoMax (ReMM) is a recreation trial of the original MythoMax-L2-B13 with updated models. This merge use SLERP [TESTING] to merge ReML and Huginn v1.2.",
     "displayName": "ReMM SLERP L2 (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "Undi95/ReMM-SLERP-L2-13B",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "A merge of models built by Undi95 with the new task_arithmetic merge method from mergekit.",
     "displayName": "Toppy M (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "Undi95/Toppy-M-7B",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "A universal English sentence embedding WhereIsAI/UAE-Large-V1 achieves SOTA on the MTEB Leaderboard with an average score of 64.64!",
+    "displayName": "UAE-Large-V1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "WhereIsAI/UAE-Large-V1",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",
+    "displayName": "WizardCoder v1.0 (15B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "WizardLM/WizardCoder-15B-V1.0",
+    "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",
+    "displayName": "WizardCoder Python v1.0 (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "WizardLM/WizardCoder-Python-34B-V1.0",
+    "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities",
     "displayName": "WizardLM v1.2 (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "WizardLM/WizardLM-13B-V1.2",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "The OLMo models are trained on the Dolma dataset",
     "displayName": "OLMo Instruct (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "allenai/OLMo-7B-Instruct",
     "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "The OLMo models are trained on the Dolma dataset",
+    "displayName": "OLMo Twin-2T (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "allenai/OLMo-7B-Twin-2T",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "The OLMo models are trained on the Dolma dataset",
+    "displayName": "OLMo (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "allenai/OLMo-7B",
+    "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "original BERT model",
+    "displayName": "Bert Base Uncased",
+    "enabled": false,
+    "functionCall": false,
+    "id": "bert-base-uncased",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
     "displayName": "Code Llama Instruct (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "codellama/CodeLlama-13b-Instruct-hf",
     "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Python (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-13b-Python-hf",
+    "maxOutput": 16384,
+    "reasoning": false,
     "tokens": 16384,
     "vision": false,
   },
   {
+    "contextWindowTokens": 16384,
     "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
     "displayName": "Code Llama Instruct (34B)",
     "enabled": false,
     "functionCall": false,
     "id": "codellama/CodeLlama-34b-Instruct-hf",
     "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Python (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-34b-Python-hf",
+    "maxOutput": 16384,
+    "reasoning": false,
     "tokens": 16384,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
     "displayName": "Code Llama Instruct (70B)",
     "enabled": false,
     "functionCall": false,
     "id": "codellama/CodeLlama-70b-Instruct-hf",
     "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Python (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-70b-Python-hf",
+    "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-70b-hf",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
     "displayName": "Code Llama Instruct (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "codellama/CodeLlama-7b-Instruct-hf",
     "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Python (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-7b-Python-hf",
+    "maxOutput": 16384,
+    "reasoning": false,
     "tokens": 16384,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "This Dolphin is really good at coding, I trained with a lot of coding data. It is very obedient but it is not DPO tuned - so you still might need to encourage it in the system prompt as I show in the below examples.",
     "displayName": "Dolphin 2.5 Mixtral 8x7b",
     "enabled": false,
     "functionCall": false,
     "id": "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": 32768,
     "description": "DBRX Instruct is a mixture-of-experts (MoE) large language model trained from scratch by Databricks. DBRX Instruct specializes in few-turn interactions.",
     "displayName": "DBRX Instruct",
     "enabled": false,
     "functionCall": false,
     "id": "databricks/dbrx-instruct",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.",
     "displayName": "Deepseek Coder Instruct (33B)",
     "enabled": false,
     "functionCall": false,
     "id": "deepseek-ai/deepseek-coder-33b-instruct",
     "maxOutput": 16384,
+    "reasoning": false,
     "tokens": 16384,
     "vision": false,
   },
   {
+    "contextWindowTokens": 4096,
     "description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese",
     "displayName": "DeepSeek LLM Chat (67B)",
     "enabled": true,
     "functionCall": false,
     "id": "deepseek-ai/deepseek-llm-67b-chat",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "An instruction fine-tuned LLaMA-2 (70B) model by merging Platypus2 (70B) by garage-bAInd and LLaMA-2 Instruct v2 (70B) by upstage.",
     "displayName": "Platypus2 Instruct (70B)",
     "enabled": false,
     "functionCall": false,
     "id": "garage-bAInd/Platypus2-70B-instruct",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": 8192,
     "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
     "displayName": "Gemma Instruct (2B)",
     "enabled": false,
     "functionCall": false,
     "id": "google/gemma-2b-it",
     "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "displayName": "Gemma (2B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/gemma-2b",
+    "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
     "displayName": "Gemma Instruct (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "google/gemma-7b-it",
     "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "displayName": "Gemma (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/gemma-7b",
+    "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
     "displayName": "Vicuna v1.5 (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "lmsys/vicuna-13b-v1.5",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
     "displayName": "Vicuna v1.5 (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "lmsys/vicuna-7b-v1.5",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": 4096,
     "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
     "displayName": "LLaMA-2 Chat (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "meta-llama/Llama-2-13b-chat-hf",
     "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/Llama-2-13b-hf",
+    "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
     "displayName": "LLaMA-2 Chat (70B)",
     "enabled": false,
     "functionCall": false,
     "id": "meta-llama/Llama-2-70b-chat-hf",
     "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/Llama-2-70b-hf",
+    "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
     "displayName": "LLaMA-2 Chat (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "meta-llama/Llama-2-7b-chat-hf",
     "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/Llama-2-7b-hf",
+    "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": 8192,
     "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
     "displayName": "Meta Llama 3 70B Instruct",
     "enabled": false,
     "functionCall": false,
     "id": "meta-llama/Llama-3-70b-chat-hf",
     "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": 8192,
     "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
     "displayName": "Meta Llama 3 8B Instruct",
     "enabled": false,
     "functionCall": false,
     "id": "meta-llama/Llama-3-8b-chat-hf",
     "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "displayName": "Meta Llama 3 8B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/Llama-3-8b-hf",
+    "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": null,
+    "displayName": "Meta Llama Guard 2 8B",
+    "enabled": false,
+    "functionCall": undefined,
+    "id": "meta-llama/LlamaGuard-2-8b",
+    "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "displayName": "Meta Llama 3 70B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/Meta-Llama-3-70B",
+    "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": 65536,
     "description": "WizardLM-2 8x22B is Wizard's most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models.",
     "displayName": "WizardLM-2 (8x22B)",
     "enabled": false,
     "functionCall": false,
     "id": "microsoft/WizardLM-2-8x22B",
     "maxOutput": 65536,
+    "reasoning": false,
     "tokens": 65536,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)",
+    "displayName": "Microsoft Phi-2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/phi-2",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 8192,
     "description": "instruct fine-tuned version of Mistral-7B-v0.1",
     "displayName": "Mistral (7B) Instruct",
     "enabled": false,
     "functionCall": false,
     "id": "mistralai/Mistral-7B-Instruct-v0.1",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": 32768,
     "description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
     "displayName": "Mistral (7B) Instruct v0.2",
     "enabled": false,
     "functionCall": false,
     "id": "mistralai/Mistral-7B-Instruct-v0.2",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": 8192,
+    "description": "7.3B parameter model that outperforms Llama 2 13B on all benchmarks, approaches CodeLlama 7B performance on code, Uses Grouped-query attention (GQA) for faster inference and Sliding Window Attention (SWA) to handle longer sequences at smaller cost",
+    "displayName": "Mistral (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/Mistral-7B-v0.1",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 65536,
     "description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.",
     "displayName": "Mixtral-8x22B Instruct v0.1",
     "enabled": true,
     "functionCall": false,
     "id": "mistralai/Mixtral-8x22B-Instruct-v0.1",
     "maxOutput": 65536,
+    "reasoning": false,
+    "tokens": 65536,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "The Mixtral-8x22B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
+    "displayName": "Mixtral-8x22B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/Mixtral-8x22B",
+    "maxOutput": 65536,
+    "reasoning": false,
     "tokens": 65536,
     "vision": false,
   },
   {
+    "contextWindowTokens": 32768,
     "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
     "displayName": "Mixtral-8x7B Instruct v0.1",
     "enabled": true,
     "functionCall": false,
     "id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
-    "description": "A merge of OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-quality instruction data, with a custom processing pipeline.",
-    "displayName": "OpenChat 3.5",
+    "contextWindowTokens": 32768,
+    "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
+    "displayName": "Mixtral-8x7B v0.1",
     "enabled": false,
     "functionCall": false,
-    "id": "openchat/openchat-3.5-1210",
-    "maxOutput": 8192,
+    "id": "mistralai/Mixtral-8x7B-v0.1",
+    "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "A merge of OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-quality instruction data, with a custom processing pipeline.",
+    "displayName": "OpenChat 3.5",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openchat/openchat-3.5-1210",
+    "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "An open source Stable Diffusion model fine tuned model on Midjourney images. ",
+    "displayName": "Openjourney v4",
+    "enabled": false,
+    "functionCall": false,
+    "id": "prompthero/openjourney",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
+    "displayName": "Stable Diffusion 1.5",
+    "enabled": false,
+    "functionCall": false,
+    "id": "runwayml/stable-diffusion-v1-5",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "A sentence-transformers model: it maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search.",
+    "displayName": "Sentence-BERT",
+    "enabled": false,
+    "functionCall": false,
+    "id": "sentence-transformers/msmarco-bert-base-dot-v5",
+    "maxOutput": 512,
+    "reasoning": false,
+    "tokens": 512,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "A state-of-the-art model by Snorkel AI, DPO fine-tuned on Mistral-7B",
     "displayName": "Snorkel Mistral PairRM DPO (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "snorkelai/Snorkel-Mistral-PairRM-DPO",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
+    "displayName": "Stable Diffusion 2.1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "stabilityai/stable-diffusion-2-1",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "A text-to-image generative AI model that excels at creating 1024x1024 images.",
+    "displayName": "Stable Diffusion XL 1.0",
+    "enabled": false,
+    "functionCall": false,
+    "id": "stabilityai/stable-diffusion-xl-base-1.0",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "State of the art Mistral Fine-tuned on extensive public datasets",
     "displayName": "OpenHermes-2-Mistral (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "teknium/OpenHermes-2-Mistral-7B",
     "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Continuation of OpenHermes 2 Mistral model trained on additional code datasets",
     "displayName": "OpenHermes-2.5-Mistral (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "teknium/OpenHermes-2p5-Mistral-7B",
     "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "This model can be used to moderate other chatbot models. Built using GPT-JT model fine-tuned on Ontocord.ai's OIG-moderation dataset v0.1.",
+    "displayName": "GPT-JT-Moderation (6B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/GPT-JT-Moderation-6B",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations.",
+    "displayName": "LLaMA-2-32K (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/LLaMA-2-7B-32K",
+    "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations, instruction tuned by Together",
     "displayName": "LLaMA-2-7B-32K-Instruct (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/Llama-2-7B-32K-Instruct",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).",
+    "displayName": "RedPajama-INCITE (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/RedPajama-INCITE-7B-Base",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-7B-v1 base model.",
     "displayName": "RedPajama-INCITE Chat (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/RedPajama-INCITE-7B-Chat",
     "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-7B-v1 base model.",
+    "displayName": "RedPajama-INCITE Instruct (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/RedPajama-INCITE-7B-Instruct",
+    "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).",
+    "displayName": "RedPajama-INCITE (3B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.",
     "displayName": "RedPajama-INCITE Chat (3B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
     "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-3B-v1 base model.",
+    "displayName": "RedPajama-INCITE Instruct (3B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
+    "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers",
+    "displayName": "StripedHyena Hessian (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/StripedHyena-Hessian-7B",
+    "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 32768,
     "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers",
     "displayName": "StripedHyena Nous (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/StripedHyena-Nous-7B",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations. ",
     "displayName": "Alpaca (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/alpaca-7b",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.",
+    "displayName": "Evo-1 Base (131K)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/evo-1-131k-base",
+    "maxOutput": 131073,
+    "reasoning": false,
+    "tokens": 131073,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.",
+    "displayName": "Evo-1 Base (8K)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/evo-1-8k-base",
+    "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "M2-BERT from the Monarch Mixer paper fine-tuned for retrieval",
+    "displayName": "M2-BERT-Retrieval-2K",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/m2-bert-80M-2k-retrieval",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.",
+    "displayName": "M2-BERT-Retrieval-32k",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/m2-bert-80M-32k-retrieval",
+    "maxOutput": 32768,
+    "reasoning": false,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.",
+    "displayName": "M2-BERT-Retrieval-8k",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/m2-bert-80M-8k-retrieval",
+    "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
     "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
     "displayName": "Upstage SOLAR Instruct v1 (11B)",
     "enabled": false,
     "functionCall": false,
     "id": "upstage/SOLAR-10.7B-Instruct-v1.0",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Dreambooth model trained on a diverse set of analog photographs to provide an analog film effect. ",
+    "displayName": "Analog Diffusion",
+    "enabled": false,
+    "functionCall": false,
+    "id": "wavymulder/Analog-Diffusion",
+    "maxOutput": undefined,
+    "reasoning": false,
+    "tokens": undefined,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
     "displayName": "01-ai Yi Chat (34B)",
-    "enabled": true,
+    "enabled": false,
     "functionCall": false,
     "id": "zero-one-ai/Yi-34B-Chat",
     "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
+    "displayName": "01-ai Yi Base (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "zero-one-ai/Yi-34B",
+    "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
+    "displayName": "01-ai Yi Base (6B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "zero-one-ai/Yi-6B",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
     "displayName": "Llama3 8B Chat HF INT4",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/Llama-3-8b-chat-hf-int4",
     "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
     "displayName": "Togethercomputer Llama3 8B Instruct Int8",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/Llama-3-8b-chat-hf-int8",
     "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
+    "displayName": "Pythia (1B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "EleutherAI/pythia-1b-v0",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "replit-code-v1-3b is a 2.7B Causal Language Model focused on Code Completion. The model has been trained on a subset of the Stack Dedup v1.2 dataset.",
+    "displayName": "Replit-Code-v1 (3B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "replit/replit-code-v1-3b",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Chat model based on EleutherAI’s Pythia-7B model, and is fine-tuned with data focusing on dialog-style interactions.",
     "displayName": "Pythia-Chat-Base (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/Pythia-Chat-Base-7B-v0.16",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Decoder-style transformer pretrained from scratch on 1T tokens of English text and code.",
+    "displayName": "MPT (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mosaicml/mpt-7b",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
     "displayName": "MPT-Chat (30B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/mpt-30b-chat",
     "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "T5 fine-tuned on more than 1000 additional tasks covering also more languages, making it better than T5 at majority of tasks. ",
+    "displayName": "Flan T5 XL (3B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/flan-t5-xl",
+    "maxOutput": 512,
+    "reasoning": false,
+    "tokens": 512,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Foundation model designed specifically for SQL generation tasks. Pre-trained for 3 epochs and fine-tuned for 10 epochs.",
+    "displayName": "NSQL (6B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "NumbersStation/nsql-6B",
+    "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
     "displayName": "Koala (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/Koala-7B",
     "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
+    "displayName": "Pythia (6.9B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "EleutherAI/pythia-6.9b",
+    "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "An instruction-following LLM based on pythia-12b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
     "displayName": "Dolly v2 (12B)",
     "enabled": false,
     "functionCall": false,
     "id": "databricks/dolly-v2-12b",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "An instruction-following LLM based on pythia-3b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
     "displayName": "Dolly v2 (3B)",
     "enabled": false,
     "functionCall": false,
     "id": "databricks/dolly-v2-3b",
     "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Autoregressive language model trained on the Pile. Its architecture intentionally resembles that of GPT-3, and is almost identical to that of GPT-J 6B.",
+    "displayName": "GPT-NeoX (20B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "EleutherAI/gpt-neox-20b",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
+    "displayName": "Pythia (2.8B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "EleutherAI/pythia-2.8b-v0",
+    "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "LLaMA 13B fine-tuned on over 300,000 instructions. Designed for long responses, low hallucination rate, and absence of censorship mechanisms.",
+    "displayName": "Nous Hermes (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "NousResearch/Nous-Hermes-13b",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
     "displayName": "Guanaco (65B) ",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/guanaco-65b",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/llama-2-7b",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Chatbot trained by fine-tuning Flan-t5-xl on user-shared conversations collected from ShareGPT.",
     "displayName": "Vicuna-FastChat-T5 (3B)",
     "enabled": false,
     "functionCall": false,
     "id": "lmsys/fastchat-t5-3b-v1.0",
     "maxOutput": 512,
+    "reasoning": false,
     "tokens": 512,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
+    "displayName": "LLaMA (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "huggyllama/llama-7b",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
     "displayName": "Open-Assistant StableLM SFT-7 (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "OpenAssistant/stablelm-7b-sft-v7-epoch-3",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": true,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
+    "displayName": "Pythia (12B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "EleutherAI/pythia-12b-v0",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
     "displayName": "MPT-Chat (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/mpt-7b-chat",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Transformer model trained using Ben Wang's Mesh Transformer JAX. ",
+    "displayName": "GPT-J (6B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "EleutherAI/gpt-j-6b",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
     "displayName": "Open-Assistant Pythia SFT-4 (12B)",
     "enabled": false,
     "functionCall": false,
     "id": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": true,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
     "displayName": "Vicuna v1.3 (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "lmsys/vicuna-7b-v1.3",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "This model is fine-tuned from CodeLlama-34B-Python and achieves 69.5% pass@1 on HumanEval.",
+    "displayName": "Phind Code LLaMA Python v1 (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Phind/Phind-CodeLlama-34B-Python-v1",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "NSQL is a family of autoregressive open-source large foundation models (FMs) designed specifically for SQL generation tasks",
+    "displayName": "NSQL LLaMA-2 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "NumbersStation/nsql-llama-2-7B",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Nous-Hermes-Llama2-70b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
     "displayName": "Nous Hermes LLaMA-2 (70B)",
     "enabled": false,
     "functionCall": false,
     "id": "NousResearch/Nous-Hermes-Llama2-70b",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities.",
+    "displayName": "WizardLM v1.0 (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "WizardLM/WizardLM-70B-V1.0",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
+    "displayName": "LLaMA (65B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "huggyllama/llama-65b",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
     "displayName": "Vicuna v1.5 16K (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "lmsys/vicuna-13b-v1.5-16k",
     "maxOutput": 16384,
+    "reasoning": false,
     "tokens": 16384,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Chat model fine-tuned from EleutherAI’s GPT-NeoX with over 40 million instructions on carbon reduced compute.",
     "displayName": "GPT-NeoXT-Chat-Base (20B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "A fine-tuned version of Mistral-7B to act as a helpful assistant.",
     "displayName": "Zephyr-7B-ß",
     "enabled": false,
     "functionCall": false,
     "id": "HuggingFaceH4/zephyr-7b-beta",
     "maxOutput": 32768,
+    "reasoning": false,
     "tokens": 32768,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Python (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/CodeLlama-13b-Python",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/llama-2-13b",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
     "displayName": "Code Llama Instruct (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/CodeLlama-7b-Instruct",
     "maxOutput": 16384,
+    "reasoning": false,
     "tokens": 16384,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
     "displayName": "Guanaco (13B) ",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/guanaco-13b",
     "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Python (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/CodeLlama-34b-Python",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Designed for short-form instruction following, finetuned on Dolly and Anthropic HH-RLHF and other datasets",
+    "displayName": "MPT-Instruct (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mosaicml/mpt-7b-instruct",
+    "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
     "displayName": "LLaMA-2 Chat (70B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/llama-2-70b-chat",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
     "displayName": "Code Llama Instruct (34B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/CodeLlama-34b-Instruct",
     "maxOutput": 16384,
+    "reasoning": false,
     "tokens": 16384,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/CodeLlama-34b",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "An autoregressive language models for program synthesis.",
+    "displayName": "CodeGen2 (16B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Salesforce/codegen2-16B",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "An autoregressive language models for program synthesis.",
+    "displayName": "CodeGen2 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Salesforce/codegen2-7B",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Flan T5 XXL (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).",
+    "displayName": "Flan T5 XXL (11B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/flan-t5-xxl",
+    "maxOutput": 512,
+    "reasoning": false,
+    "tokens": 512,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/llama-2-70b",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-7b-hf",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-13b-hf",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
     "displayName": "Code Llama Instruct (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/CodeLlama-13b-Instruct",
     "maxOutput": 16384,
+    "reasoning": false,
     "tokens": 16384,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
     "displayName": "LLaMA-2 Chat (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/llama-2-13b-chat",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
     "displayName": "Vicuna v1.3 (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "lmsys/vicuna-13b-v1.3",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
+    "displayName": "LLaMA (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "huggyllama/llama-13b",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Fine-tuned from StarCoder to act as a helpful coding assistant. As an alpha release is only intended for educational or research purpopses.",
     "displayName": "StarCoderChat Alpha (16B)",
     "enabled": false,
     "functionCall": false,
     "id": "HuggingFaceH4/starchat-alpha",
     "maxOutput": 8192,
+    "reasoning": false,
     "tokens": 8192,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
+    "displayName": "LLaMA (30B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "huggyllama/llama-30b",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.",
+    "displayName": "StableLM-Base-Alpha (3B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "stabilityai/stablelm-base-alpha-3b",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.",
+    "displayName": "StableLM-Base-Alpha (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "stabilityai/stablelm-base-alpha-7b",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Python (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/CodeLlama-7b-Python",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Defog's SQLCoder is a state-of-the-art LLM for converting natural language questions to SQL queries, fine-tuned from Bigcode's Starcoder 15B model.",
+    "displayName": "Sqlcoder (15B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "defog/sqlcoder",
+    "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Trained on 80+ coding languages, uses Multi Query Attention, an 8K context window, and was trained using the Fill-in-the-Middle objective on 1T tokens.",
+    "displayName": "StarCoder (16B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "bigcode/starcoder",
+    "maxOutput": 8192,
+    "reasoning": false,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "An instruction-following LLM based on pythia-7b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
     "displayName": "Dolly v2 (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "databricks/dolly-v2-7b",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
     "displayName": "Guanaco (33B) ",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/guanaco-33b",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
     "displayName": "Koala (13B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/Koala-13B",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
+    "description": "Fork of GPT-J instruction tuned to excel at few-shot prompts (blog post).",
+    "displayName": "GPT-JT (6B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/GPT-JT-6B-v1",
+    "maxOutput": 2048,
+    "reasoning": false,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
     "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
     "displayName": "LLaMA-2 Chat (7B)",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/llama-2-7b-chat",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
     "displayName": "Upstage SOLAR Instruct v1 (11B)-Int4",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4",
     "maxOutput": 4096,
+    "reasoning": false,
     "tokens": 4096,
     "vision": false,
   },
   {
+    "contextWindowTokens": undefined,
     "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks. ",
     "displayName": "Guanaco (7B) ",
     "enabled": false,
     "functionCall": false,
     "id": "togethercomputer/guanaco-7b",
     "maxOutput": 2048,
+    "reasoning": false,
     "tokens": 2048,
     "vision": false,
   },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens.",
+    "displayName": "Llemma (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "EleutherAI/llemma_7b",
+    "maxOutput": 4096,
+    "reasoning": false,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-34b-hf",
+    "maxOutput": 16384,
+    "reasoning": false,
+    "tokens": 16384,
+    "vision": false,
+  },
 ]
 `;