@lobehub/chat 1.40.1 → 1.40.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -0
- package/changelog/v1.json +18 -0
- package/package.json +1 -1
- package/src/app/(main)/changelog/page.tsx +3 -1
- package/src/app/(main)/chat/(workspace)/features/ChangelogModal.tsx +11 -0
- package/src/app/(main)/chat/(workspace)/page.tsx +8 -5
- package/src/app/(main)/discover/(detail)/model/[...slugs]/features/Actions.tsx +1 -1
- package/src/app/(main)/discover/(detail)/model/[...slugs]/features/Header.tsx +1 -1
- package/src/app/(main)/discover/(detail)/model/[...slugs]/features/InfoSidebar/SuggestionItem.tsx +2 -2
- package/src/app/(main)/discover/(detail)/model/[...slugs]/features/ProviderList/ProviderItem.tsx +1 -1
- package/src/app/(main)/discover/(detail)/provider/[slug]/features/ModelList/ModelItem.tsx +3 -3
- package/src/app/(main)/discover/(list)/models/features/Card.tsx +6 -2
- package/src/app/(main)/settings/llm/ProviderList/Cloudflare/index.tsx +1 -5
- package/src/app/(main)/settings/llm/components/ProviderModelList/ModelFetcher.tsx +2 -2
- package/src/app/@modal/(.)changelog/modal/page.tsx +3 -1
- package/src/components/Loading/BrandTextLoading/index.tsx +5 -0
- package/src/components/ModelSelect/index.tsx +7 -4
- package/src/config/__tests__/app.test.ts +6 -2
- package/src/config/app.ts +1 -2
- package/src/config/featureFlags/schema.ts +3 -0
- package/src/config/modelProviders/ai21.ts +2 -2
- package/src/config/modelProviders/ai360.ts +4 -4
- package/src/config/modelProviders/anthropic.ts +8 -8
- package/src/config/modelProviders/azure.ts +5 -5
- package/src/config/modelProviders/baichuan.ts +6 -6
- package/src/config/modelProviders/bedrock.ts +14 -14
- package/src/config/modelProviders/cloudflare.ts +12 -11
- package/src/config/modelProviders/deepseek.ts +1 -1
- package/src/config/modelProviders/fireworksai.ts +29 -27
- package/src/config/modelProviders/giteeai.ts +7 -7
- package/src/config/modelProviders/github.ts +29 -28
- package/src/config/modelProviders/google.ts +18 -19
- package/src/config/modelProviders/groq.ts +13 -13
- package/src/config/modelProviders/higress.ts +195 -194
- package/src/config/modelProviders/huggingface.ts +7 -7
- package/src/config/modelProviders/hunyuan.ts +25 -17
- package/src/config/modelProviders/internlm.ts +6 -4
- package/src/config/modelProviders/minimax.ts +5 -5
- package/src/config/modelProviders/mistral.ts +14 -16
- package/src/config/modelProviders/moonshot.ts +3 -3
- package/src/config/modelProviders/novita.ts +15 -15
- package/src/config/modelProviders/ollama.ts +46 -46
- package/src/config/modelProviders/openai.ts +23 -22
- package/src/config/modelProviders/openrouter.ts +20 -18
- package/src/config/modelProviders/perplexity.ts +7 -7
- package/src/config/modelProviders/qwen.ts +23 -25
- package/src/config/modelProviders/sensenova.ts +8 -8
- package/src/config/modelProviders/siliconcloud.ts +138 -92
- package/src/config/modelProviders/spark.ts +6 -6
- package/src/config/modelProviders/stepfun.ts +9 -9
- package/src/config/modelProviders/taichu.ts +2 -3
- package/src/config/modelProviders/togetherai.ts +57 -48
- package/src/config/modelProviders/upstage.ts +3 -3
- package/src/config/modelProviders/wenxin.ts +12 -12
- package/src/config/modelProviders/xai.ts +4 -4
- package/src/config/modelProviders/zeroone.ts +11 -11
- package/src/config/modelProviders/zhipu.ts +17 -16
- package/src/database/_deprecated/core/model.ts +1 -1
- package/src/database/_deprecated/models/sessionGroup.ts +4 -1
- package/src/database/client/migrations.json +2 -5
- package/src/database/migrations/meta/0012_snapshot.json +176 -518
- package/src/database/schemas/agent.ts +1 -1
- package/src/database/schemas/message.ts +1 -0
- package/src/database/schemas/session.ts +1 -0
- package/src/database/server/models/topic.ts +19 -17
- package/src/features/ChangelogModal/index.tsx +8 -2
- package/src/features/DebugUI/Content.tsx +0 -1
- package/src/features/PluginStore/index.tsx +2 -2
- package/src/features/User/UserPanel/useMenu.tsx +1 -1
- package/src/layout/GlobalProvider/AntdV5MonkeyPatch.tsx +4 -4
- package/src/libs/agent-runtime/google/index.ts +4 -3
- package/src/libs/agent-runtime/higress/index.ts +1 -1
- package/src/libs/agent-runtime/huggingface/index.ts +2 -4
- package/src/libs/agent-runtime/minimax/index.ts +5 -10
- package/src/libs/agent-runtime/mistral/index.ts +3 -6
- package/src/libs/agent-runtime/moonshot/index.ts +3 -6
- package/src/libs/agent-runtime/novita/__snapshots__/index.test.ts.snap +18 -18
- package/src/libs/agent-runtime/novita/index.ts +1 -1
- package/src/libs/agent-runtime/openai/__snapshots__/index.test.ts.snap +10 -10
- package/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap +168 -168
- package/src/libs/agent-runtime/openrouter/index.ts +1 -1
- package/src/libs/agent-runtime/perplexity/index.ts +4 -4
- package/src/libs/agent-runtime/sensenova/index.ts +9 -3
- package/src/libs/agent-runtime/taichu/index.ts +4 -10
- package/src/libs/agent-runtime/utils/streams/minimax.test.ts +5 -2
- package/src/libs/agent-runtime/utils/streams/minimax.ts +4 -1
- package/src/libs/agent-runtime/zhipu/index.ts +12 -13
- package/src/libs/langchain/loaders/index.ts +2 -2
- package/src/libs/langchain/types.ts +9 -1
- package/src/locales/default/modelProvider.ts +1 -1
- package/src/migrations/FromV3ToV4/fixtures/ollama-output-v4.json +1 -1
- package/src/migrations/FromV6ToV7/types/v7.ts +0 -2
- package/src/server/globalConfig/genServerLLMConfig.test.ts +4 -4
- package/src/server/globalConfig/genServerLLMConfig.ts +29 -24
- package/src/server/globalConfig/index.ts +1 -2
- package/src/server/routers/edge/config/__snapshots__/index.test.ts.snap +9 -9
- package/src/server/routers/lambda/_template.ts +1 -1
- package/src/server/routers/lambda/knowledgeBase.ts +1 -1
- package/src/server/routers/lambda/session.ts +1 -1
- package/src/server/routers/lambda/sessionGroup.ts +1 -1
- package/src/server/routers/lambda/thread.ts +1 -1
- package/src/server/services/changelog/index.test.ts +4 -2
- package/src/server/services/changelog/index.ts +10 -2
- package/src/server/services/nextAuthUser/index.ts +1 -1
- package/src/store/serverConfig/selectors.test.ts +1 -0
- package/src/store/user/slices/modelList/__snapshots__/action.test.ts.snap +1 -1
- package/src/store/user/slices/modelList/action.test.ts +4 -4
- package/src/store/user/slices/modelList/reducers/customModelCard.test.ts +6 -6
- package/src/store/user/slices/modelList/selectors/modelProvider.ts +3 -2
- package/src/tools/dalle/Render/Item/index.tsx +1 -1
- package/src/types/files/index.ts +0 -1
- package/src/types/llm.ts +4 -5
- package/src/utils/__snapshots__/parseModels.test.ts.snap +2 -2
- package/src/utils/genUserLLMConfig.test.ts +4 -4
- package/src/utils/genUserLLMConfig.ts +6 -4
- package/src/utils/parseModels.test.ts +16 -16
- package/src/utils/parseModels.ts +1 -1
- package/src/utils/server/jwt.ts +2 -6
@@ -3,6 +3,7 @@
|
|
3
3
|
exports[`LobeOpenRouterAI > models > should get models 1`] = `
|
4
4
|
[
|
5
5
|
{
|
6
|
+
"contextWindowTokens": 128000,
|
6
7
|
"description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.
|
7
8
|
|
8
9
|
Read the launch post [here](https://txt.cohere.com/command-r/).
|
@@ -13,10 +14,10 @@ Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.co
|
|
13
14
|
"functionCall": false,
|
14
15
|
"id": "cohere/command-r-03-2024",
|
15
16
|
"maxTokens": 4000,
|
16
|
-
"tokens": 128000,
|
17
17
|
"vision": false,
|
18
18
|
},
|
19
19
|
{
|
20
|
+
"contextWindowTokens": 128000,
|
20
21
|
"description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).
|
21
22
|
|
22
23
|
It offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).
|
@@ -27,10 +28,10 @@ Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.co
|
|
27
28
|
"functionCall": false,
|
28
29
|
"id": "cohere/command-r-plus-04-2024",
|
29
30
|
"maxTokens": 4000,
|
30
|
-
"tokens": 128000,
|
31
31
|
"vision": false,
|
32
32
|
},
|
33
33
|
{
|
34
|
+
"contextWindowTokens": 128000,
|
34
35
|
"description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.
|
35
36
|
|
36
37
|
Read the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).
|
@@ -41,10 +42,10 @@ Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.co
|
|
41
42
|
"functionCall": false,
|
42
43
|
"id": "cohere/command-r-plus-08-2024",
|
43
44
|
"maxTokens": 4000,
|
44
|
-
"tokens": 128000,
|
45
45
|
"vision": false,
|
46
46
|
},
|
47
47
|
{
|
48
|
+
"contextWindowTokens": 128000,
|
48
49
|
"description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.
|
49
50
|
|
50
51
|
Read the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).
|
@@ -55,10 +56,10 @@ Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.co
|
|
55
56
|
"functionCall": false,
|
56
57
|
"id": "cohere/command-r-08-2024",
|
57
58
|
"maxTokens": 4000,
|
58
|
-
"tokens": 128000,
|
59
59
|
"vision": false,
|
60
60
|
},
|
61
61
|
{
|
62
|
+
"contextWindowTokens": 4000000,
|
62
63
|
"description": "Gemini 1.5 Flash 8B Experimental is an experimental, 8B parameter version of the [Gemini 1.5 Flash](/models/google/gemini-flash-1.5) model.
|
63
64
|
|
64
65
|
Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).
|
@@ -71,10 +72,10 @@ Note: This model is experimental and not suited for production use-cases. It may
|
|
71
72
|
"functionCall": false,
|
72
73
|
"id": "google/gemini-flash-8b-1.5-exp",
|
73
74
|
"maxTokens": 32768,
|
74
|
-
"tokens": 4000000,
|
75
75
|
"vision": true,
|
76
76
|
},
|
77
77
|
{
|
78
|
+
"contextWindowTokens": 4000000,
|
78
79
|
"description": "Gemini 1.5 Flash Experimental is an experimental version of the [Gemini 1.5 Flash](/models/google/gemini-flash-1.5) model.
|
79
80
|
|
80
81
|
Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).
|
@@ -87,20 +88,20 @@ Note: This model is experimental and not suited for production use-cases. It may
|
|
87
88
|
"functionCall": false,
|
88
89
|
"id": "google/gemini-flash-1.5-exp",
|
89
90
|
"maxTokens": 32768,
|
90
|
-
"tokens": 4000000,
|
91
91
|
"vision": true,
|
92
92
|
},
|
93
93
|
{
|
94
|
+
"contextWindowTokens": 8192,
|
94
95
|
"description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).",
|
95
96
|
"displayName": "Llama 3.1 Euryale 70B v2.2",
|
96
97
|
"enabled": false,
|
97
98
|
"functionCall": false,
|
98
99
|
"id": "sao10k/l3.1-euryale-70b",
|
99
100
|
"maxTokens": undefined,
|
100
|
-
"tokens": 8192,
|
101
101
|
"vision": false,
|
102
102
|
},
|
103
103
|
{
|
104
|
+
"contextWindowTokens": 256000,
|
104
105
|
"description": "Jamba 1.5 Large is part of AI21's new family of open models, offering superior speed, efficiency, and quality.
|
105
106
|
|
106
107
|
It features a 256K effective context window, the longest among open models, enabling improved performance on tasks like document summarization and analysis.
|
@@ -113,10 +114,10 @@ Read their [announcement](https://www.ai21.com/blog/announcing-jamba-model-famil
|
|
113
114
|
"functionCall": false,
|
114
115
|
"id": "ai21/jamba-1-5-large",
|
115
116
|
"maxTokens": 4096,
|
116
|
-
"tokens": 256000,
|
117
117
|
"vision": false,
|
118
118
|
},
|
119
119
|
{
|
120
|
+
"contextWindowTokens": 256000,
|
120
121
|
"description": "Jamba 1.5 Mini is the world's first production-grade Mamba-based model, combining SSM and Transformer architectures for a 256K context window and high efficiency.
|
121
122
|
|
122
123
|
It works with 9 languages and can handle various writing and analysis tasks as well as or better than similar small models.
|
@@ -129,10 +130,10 @@ Read their [announcement](https://www.ai21.com/blog/announcing-jamba-model-famil
|
|
129
130
|
"functionCall": false,
|
130
131
|
"id": "ai21/jamba-1-5-mini",
|
131
132
|
"maxTokens": 4096,
|
132
|
-
"tokens": 256000,
|
133
133
|
"vision": false,
|
134
134
|
},
|
135
135
|
{
|
136
|
+
"contextWindowTokens": 128000,
|
136
137
|
"description": "Phi-3.5 models are lightweight, state-of-the-art open models. These models were trained with Phi-3 datasets that include both synthetic data and the filtered, publicly available websites data, with a focus on high quality and reasoning-dense properties. Phi-3.5 Mini uses 3.8B parameters, and is a dense decoder-only transformer model using the same tokenizer as [Phi-3 Mini](/models/microsoft/phi-3-mini-128k-instruct).
|
137
138
|
|
138
139
|
The models underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks that test common sense, language understanding, math, code, long context and logical reasoning, Phi-3.5 models showcased robust and state-of-the-art performance among models with less than 13 billion parameters.",
|
@@ -141,10 +142,10 @@ The models underwent a rigorous enhancement process, incorporating both supervis
|
|
141
142
|
"functionCall": false,
|
142
143
|
"id": "microsoft/phi-3.5-mini-128k-instruct",
|
143
144
|
"maxTokens": undefined,
|
144
|
-
"tokens": 128000,
|
145
145
|
"vision": false,
|
146
146
|
},
|
147
147
|
{
|
148
|
+
"contextWindowTokens": 131072,
|
148
149
|
"description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.
|
149
150
|
|
150
151
|
Hermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.
|
@@ -155,10 +156,10 @@ The Hermes 3 series builds and expands on the Hermes 2 set of capabilities, incl
|
|
155
156
|
"functionCall": true,
|
156
157
|
"id": "nousresearch/hermes-3-llama-3.1-70b",
|
157
158
|
"maxTokens": undefined,
|
158
|
-
"tokens": 131072,
|
159
159
|
"vision": false,
|
160
160
|
},
|
161
161
|
{
|
162
|
+
"contextWindowTokens": 131072,
|
162
163
|
"description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.
|
163
164
|
|
164
165
|
Hermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.
|
@@ -171,10 +172,10 @@ Hermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at genera
|
|
171
172
|
"functionCall": true,
|
172
173
|
"id": "nousresearch/hermes-3-llama-3.1-405b",
|
173
174
|
"maxTokens": undefined,
|
174
|
-
"tokens": 131072,
|
175
175
|
"vision": false,
|
176
176
|
},
|
177
177
|
{
|
178
|
+
"contextWindowTokens": 128000,
|
178
179
|
"description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.
|
179
180
|
|
180
181
|
Hermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.
|
@@ -189,20 +190,20 @@ _These are extended-context endpoints for [Hermes 3 405B Instruct](/models/nousr
|
|
189
190
|
"functionCall": true,
|
190
191
|
"id": "nousresearch/hermes-3-llama-3.1-405b:extended",
|
191
192
|
"maxTokens": undefined,
|
192
|
-
"tokens": 128000,
|
193
193
|
"vision": false,
|
194
194
|
},
|
195
195
|
{
|
196
|
+
"contextWindowTokens": 127072,
|
196
197
|
"description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance. The model is built upon the Llama 3.1 405B and has internet access.",
|
197
198
|
"displayName": "Perplexity: Llama 3.1 Sonar 405B Online",
|
198
199
|
"enabled": true,
|
199
200
|
"functionCall": false,
|
200
201
|
"id": "perplexity/llama-3.1-sonar-huge-128k-online",
|
201
202
|
"maxTokens": undefined,
|
202
|
-
"tokens": 127072,
|
203
203
|
"vision": false,
|
204
204
|
},
|
205
205
|
{
|
206
|
+
"contextWindowTokens": 128000,
|
206
207
|
"description": "Dynamic model continuously updated to the current version of [GPT-4o](/models/openai/gpt-4o) in ChatGPT. Intended for research and evaluation.
|
207
208
|
|
208
209
|
Note: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.",
|
@@ -211,10 +212,10 @@ Note: This model is experimental and not suited for production use-cases. It may
|
|
211
212
|
"functionCall": false,
|
212
213
|
"id": "openai/chatgpt-4o-latest",
|
213
214
|
"maxTokens": 16384,
|
214
|
-
"tokens": 128000,
|
215
215
|
"vision": false,
|
216
216
|
},
|
217
217
|
{
|
218
|
+
"contextWindowTokens": 8192,
|
218
219
|
"description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.
|
219
220
|
|
220
221
|
Created by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.
|
@@ -225,10 +226,10 @@ For best results, use with Llama 3 Instruct context template, temperature 1.4, a
|
|
225
226
|
"functionCall": false,
|
226
227
|
"id": "sao10k/l3-lunaris-8b",
|
227
228
|
"maxTokens": undefined,
|
228
|
-
"tokens": 8192,
|
229
229
|
"vision": false,
|
230
230
|
},
|
231
231
|
{
|
232
|
+
"contextWindowTokens": 12000,
|
232
233
|
"description": "Starcannon 12B is a creative roleplay and story writing model, using [nothingiisreal/mn-celeste-12b](https://openrouter.ai/models/nothingiisreal/mn-celeste-12b) as a base and [intervitens/mini-magnum-12b-v1.1](https://huggingface.co/intervitens/mini-magnum-12b-v1.1) merged in using the [TIES](https://arxiv.org/abs/2306.01708) method.
|
233
234
|
|
234
235
|
Although more similar to Magnum overall, the model remains very creative, with a pleasant writing style. It is recommended for people wanting more variety than Magnum, and yet more verbose prose than Celeste.",
|
@@ -237,10 +238,10 @@ Although more similar to Magnum overall, the model remains very creative, with a
|
|
237
238
|
"functionCall": false,
|
238
239
|
"id": "aetherwiing/mn-starcannon-12b",
|
239
240
|
"maxTokens": undefined,
|
240
|
-
"tokens": 12000,
|
241
241
|
"vision": false,
|
242
242
|
},
|
243
243
|
{
|
244
|
+
"contextWindowTokens": 128000,
|
244
245
|
"description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).
|
245
246
|
|
246
247
|
GPT-4o ("o" for "omni") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.
|
@@ -251,10 +252,10 @@ For benchmarking against other models, it was briefly called ["im-also-a-good-gp
|
|
251
252
|
"functionCall": false,
|
252
253
|
"id": "openai/gpt-4o-2024-08-06",
|
253
254
|
"maxTokens": 16384,
|
254
|
-
"tokens": 128000,
|
255
255
|
"vision": false,
|
256
256
|
},
|
257
257
|
{
|
258
|
+
"contextWindowTokens": 131072,
|
258
259
|
"description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.
|
259
260
|
|
260
261
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -265,10 +266,10 @@ To read more about the model release, [click here](https://ai.meta.com/blog/meta
|
|
265
266
|
"functionCall": false,
|
266
267
|
"id": "meta-llama/llama-3.1-405b",
|
267
268
|
"maxTokens": undefined,
|
268
|
-
"tokens": 131072,
|
269
269
|
"vision": false,
|
270
270
|
},
|
271
271
|
{
|
272
|
+
"contextWindowTokens": 32000,
|
272
273
|
"description": "A specialized story writing and roleplaying model based on Mistral's NeMo 12B Instruct. Fine-tuned on curated datasets including Reddit Writing Prompts and Opus Instruct 25K.
|
273
274
|
|
274
275
|
This model excels at creative writing, offering improved NSFW capabilities, with smarter and more active narration. It demonstrates remarkable versatility in both SFW and NSFW scenarios, with strong Out of Character (OOC) steering capabilities, allowing fine-tuned control over narrative direction and character behavior.
|
@@ -279,10 +280,10 @@ Check out the model's [HuggingFace page](https://huggingface.co/nothingiisreal/M
|
|
279
280
|
"functionCall": false,
|
280
281
|
"id": "nothingiisreal/mn-celeste-12b",
|
281
282
|
"maxTokens": undefined,
|
282
|
-
"tokens": 32000,
|
283
283
|
"vision": false,
|
284
284
|
},
|
285
285
|
{
|
286
|
+
"contextWindowTokens": 4000000,
|
286
287
|
"description": "Gemini 1.5 Pro (0827) is an experimental version of the [Gemini 1.5 Pro](/models/google/gemini-pro-1.5) model.
|
287
288
|
|
288
289
|
Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).
|
@@ -295,10 +296,10 @@ Note: This model is experimental and not suited for production use-cases. It may
|
|
295
296
|
"functionCall": false,
|
296
297
|
"id": "google/gemini-pro-1.5-exp",
|
297
298
|
"maxTokens": 32768,
|
298
|
-
"tokens": 4000000,
|
299
299
|
"vision": true,
|
300
300
|
},
|
301
301
|
{
|
302
|
+
"contextWindowTokens": 127072,
|
302
303
|
"description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
|
303
304
|
|
304
305
|
This is the online version of the [offline chat model](/models/perplexity/llama-3.1-sonar-large-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
|
@@ -307,10 +308,10 @@ This is the online version of the [offline chat model](/models/perplexity/llama-
|
|
307
308
|
"functionCall": false,
|
308
309
|
"id": "perplexity/llama-3.1-sonar-large-128k-online",
|
309
310
|
"maxTokens": undefined,
|
310
|
-
"tokens": 127072,
|
311
311
|
"vision": false,
|
312
312
|
},
|
313
313
|
{
|
314
|
+
"contextWindowTokens": 131072,
|
314
315
|
"description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
|
315
316
|
|
316
317
|
This is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-large-128k-online) of this model has Internet access.",
|
@@ -319,10 +320,10 @@ This is a normal offline LLM, but the [online version](/models/perplexity/llama-
|
|
319
320
|
"functionCall": false,
|
320
321
|
"id": "perplexity/llama-3.1-sonar-large-128k-chat",
|
321
322
|
"maxTokens": undefined,
|
322
|
-
"tokens": 131072,
|
323
323
|
"vision": false,
|
324
324
|
},
|
325
325
|
{
|
326
|
+
"contextWindowTokens": 127072,
|
326
327
|
"description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
|
327
328
|
|
328
329
|
This is the online version of the [offline chat model](/models/perplexity/llama-3.1-sonar-small-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
|
@@ -331,10 +332,10 @@ This is the online version of the [offline chat model](/models/perplexity/llama-
|
|
331
332
|
"functionCall": false,
|
332
333
|
"id": "perplexity/llama-3.1-sonar-small-128k-online",
|
333
334
|
"maxTokens": undefined,
|
334
|
-
"tokens": 127072,
|
335
335
|
"vision": false,
|
336
336
|
},
|
337
337
|
{
|
338
|
+
"contextWindowTokens": 131072,
|
338
339
|
"description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
|
339
340
|
|
340
341
|
This is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-small-128k-online) of this model has Internet access.",
|
@@ -343,10 +344,10 @@ This is a normal offline LLM, but the [online version](/models/perplexity/llama-
|
|
343
344
|
"functionCall": false,
|
344
345
|
"id": "perplexity/llama-3.1-sonar-small-128k-chat",
|
345
346
|
"maxTokens": undefined,
|
346
|
-
"tokens": 131072,
|
347
347
|
"vision": false,
|
348
348
|
},
|
349
349
|
{
|
350
|
+
"contextWindowTokens": 131072,
|
350
351
|
"description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.
|
351
352
|
|
352
353
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -357,10 +358,10 @@ To read more about the model release, [click here](https://ai.meta.com/blog/meta
|
|
357
358
|
"functionCall": false,
|
358
359
|
"id": "meta-llama/llama-3.1-70b-instruct",
|
359
360
|
"maxTokens": undefined,
|
360
|
-
"tokens": 131072,
|
361
361
|
"vision": false,
|
362
362
|
},
|
363
363
|
{
|
364
|
+
"contextWindowTokens": 131072,
|
364
365
|
"description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.
|
365
366
|
|
366
367
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -373,10 +374,10 @@ _These are free, rate-limited endpoints for [Llama 3.1 8B Instruct](/models/meta
|
|
373
374
|
"functionCall": false,
|
374
375
|
"id": "meta-llama/llama-3.1-8b-instruct:free",
|
375
376
|
"maxTokens": 4096,
|
376
|
-
"tokens": 131072,
|
377
377
|
"vision": false,
|
378
378
|
},
|
379
379
|
{
|
380
|
+
"contextWindowTokens": 131072,
|
380
381
|
"description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.
|
381
382
|
|
382
383
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -387,10 +388,10 @@ To read more about the model release, [click here](https://ai.meta.com/blog/meta
|
|
387
388
|
"functionCall": false,
|
388
389
|
"id": "meta-llama/llama-3.1-8b-instruct",
|
389
390
|
"maxTokens": undefined,
|
390
|
-
"tokens": 131072,
|
391
391
|
"vision": false,
|
392
392
|
},
|
393
393
|
{
|
394
|
+
"contextWindowTokens": 131072,
|
394
395
|
"description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.
|
395
396
|
|
396
397
|
Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.
|
@@ -403,10 +404,10 @@ To read more about the model release, [click here](https://ai.meta.com/blog/meta
|
|
403
404
|
"functionCall": false,
|
404
405
|
"id": "meta-llama/llama-3.1-405b-instruct",
|
405
406
|
"maxTokens": undefined,
|
406
|
-
"tokens": 131072,
|
407
407
|
"vision": false,
|
408
408
|
},
|
409
409
|
{
|
410
|
+
"contextWindowTokens": 8192,
|
410
411
|
"description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a fine-tune of [Llama 3 70B](/models/meta-llama/llama-3-70b-instruct). It demonstrates improvements in instruction, conversation, coding, and function calling abilities, when compared to the original.
|
411
412
|
|
412
413
|
Uncensored and is stripped of alignment and bias, it requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).
|
@@ -417,10 +418,10 @@ Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.m
|
|
417
418
|
"functionCall": true,
|
418
419
|
"id": "cognitivecomputations/dolphin-llama-3-70b",
|
419
420
|
"maxTokens": undefined,
|
420
|
-
"tokens": 8192,
|
421
421
|
"vision": false,
|
422
422
|
},
|
423
423
|
{
|
424
|
+
"contextWindowTokens": 256000,
|
424
425
|
"description": "A 7.3B parameter Mamba-based model designed for code and reasoning tasks.
|
425
426
|
|
426
427
|
- Linear time inference, allowing for theoretically infinite sequence lengths
|
@@ -433,10 +434,10 @@ Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.m
|
|
433
434
|
"functionCall": false,
|
434
435
|
"id": "mistralai/codestral-mamba",
|
435
436
|
"maxTokens": undefined,
|
436
|
-
"tokens": 256000,
|
437
437
|
"vision": false,
|
438
438
|
},
|
439
439
|
{
|
440
|
+
"contextWindowTokens": 128000,
|
440
441
|
"description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.
|
441
442
|
|
442
443
|
The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.
|
@@ -447,10 +448,10 @@ It supports function calling and is released under the Apache 2.0 license.",
|
|
447
448
|
"functionCall": true,
|
448
449
|
"id": "mistralai/mistral-nemo",
|
449
450
|
"maxTokens": undefined,
|
450
|
-
"tokens": 128000,
|
451
451
|
"vision": false,
|
452
452
|
},
|
453
453
|
{
|
454
|
+
"contextWindowTokens": 128000,
|
454
455
|
"description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.
|
455
456
|
|
456
457
|
As their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.
|
@@ -463,10 +464,10 @@ Check out the [launch announcement](https://openai.com/index/gpt-4o-mini-advanci
|
|
463
464
|
"functionCall": false,
|
464
465
|
"id": "openai/gpt-4o-mini-2024-07-18",
|
465
466
|
"maxTokens": 16384,
|
466
|
-
"tokens": 128000,
|
467
467
|
"vision": false,
|
468
468
|
},
|
469
469
|
{
|
470
|
+
"contextWindowTokens": 128000,
|
470
471
|
"description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.
|
471
472
|
|
472
473
|
As their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.
|
@@ -479,10 +480,10 @@ Check out the [launch announcement](https://openai.com/index/gpt-4o-mini-advanci
|
|
479
480
|
"functionCall": false,
|
480
481
|
"id": "openai/gpt-4o-mini",
|
481
482
|
"maxTokens": 16384,
|
482
|
-
"tokens": 128000,
|
483
483
|
"vision": false,
|
484
484
|
},
|
485
485
|
{
|
486
|
+
"contextWindowTokens": 32768,
|
486
487
|
"description": "Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.
|
487
488
|
|
488
489
|
It features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.
|
@@ -497,10 +498,10 @@ _These are free, rate-limited endpoints for [Qwen 2 7B Instruct](/models/qwen/qw
|
|
497
498
|
"functionCall": false,
|
498
499
|
"id": "qwen/qwen-2-7b-instruct:free",
|
499
500
|
"maxTokens": 4096,
|
500
|
-
"tokens": 32768,
|
501
501
|
"vision": false,
|
502
502
|
},
|
503
503
|
{
|
504
|
+
"contextWindowTokens": 32768,
|
504
505
|
"description": "Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.
|
505
506
|
|
506
507
|
It features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.
|
@@ -513,10 +514,10 @@ Usage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://hug
|
|
513
514
|
"functionCall": false,
|
514
515
|
"id": "qwen/qwen-2-7b-instruct",
|
515
516
|
"maxTokens": undefined,
|
516
|
-
"tokens": 32768,
|
517
517
|
"vision": false,
|
518
518
|
},
|
519
519
|
{
|
520
|
+
"contextWindowTokens": 8192,
|
520
521
|
"description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).
|
521
522
|
|
522
523
|
Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.
|
@@ -527,10 +528,10 @@ See the [launch announcement](https://blog.google/technology/developers/google-g
|
|
527
528
|
"functionCall": false,
|
528
529
|
"id": "google/gemma-2-27b-it",
|
529
530
|
"maxTokens": undefined,
|
530
|
-
"tokens": 8192,
|
531
531
|
"vision": false,
|
532
532
|
},
|
533
533
|
{
|
534
|
+
"contextWindowTokens": 16384,
|
534
535
|
"description": "From the maker of [Goliath](https://openrouter.ai/models/alpindale/goliath-120b), Magnum 72B is the first in a new family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.
|
535
536
|
|
536
537
|
The model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.",
|
@@ -539,10 +540,10 @@ The model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-i
|
|
539
540
|
"functionCall": false,
|
540
541
|
"id": "alpindale/magnum-72b",
|
541
542
|
"maxTokens": 1024,
|
542
|
-
"tokens": 16384,
|
543
543
|
"vision": false,
|
544
544
|
},
|
545
545
|
{
|
546
|
+
"contextWindowTokens": 16384,
|
546
547
|
"description": "An experimental merge model based on Llama 3, exhibiting a very distinctive style of writing. It combines the the best of [Meta's Llama 3 8B](https://openrouter.ai/models/meta-llama/llama-3-8b-instruct) and Nous Research's [Hermes 2 Pro](https://openrouter.ai/models/nousresearch/hermes-2-pro-llama-3-8b).
|
547
548
|
|
548
549
|
Hermes-2 Θ (theta) was specifically designed with a few capabilities in mind: executing function calls, generating JSON output, and most remarkably, demonstrating metacognitive abilities (contemplating the nature of thought and recognizing the diversity of cognitive processes among individuals).",
|
@@ -551,10 +552,10 @@ Hermes-2 Θ (theta) was specifically designed with a few capabilities in mind: e
|
|
551
552
|
"functionCall": false,
|
552
553
|
"id": "nousresearch/hermes-2-theta-llama-3-8b",
|
553
554
|
"maxTokens": 2048,
|
554
|
-
"tokens": 16384,
|
555
555
|
"vision": false,
|
556
556
|
},
|
557
557
|
{
|
558
|
+
"contextWindowTokens": 8192,
|
558
559
|
"description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.
|
559
560
|
|
560
561
|
Designed for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.
|
@@ -567,10 +568,10 @@ _These are free, rate-limited endpoints for [Gemma 2 9B](/models/google/gemma-2-
|
|
567
568
|
"functionCall": false,
|
568
569
|
"id": "google/gemma-2-9b-it:free",
|
569
570
|
"maxTokens": 2048,
|
570
|
-
"tokens": 8192,
|
571
571
|
"vision": false,
|
572
572
|
},
|
573
573
|
{
|
574
|
+
"contextWindowTokens": 8192,
|
574
575
|
"description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.
|
575
576
|
|
576
577
|
Designed for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.
|
@@ -581,10 +582,10 @@ See the [launch announcement](https://blog.google/technology/developers/google-g
|
|
581
582
|
"functionCall": false,
|
582
583
|
"id": "google/gemma-2-9b-it",
|
583
584
|
"maxTokens": undefined,
|
584
|
-
"tokens": 8192,
|
585
585
|
"vision": false,
|
586
586
|
},
|
587
587
|
{
|
588
|
+
"contextWindowTokens": 32000,
|
588
589
|
"description": "Stheno 8B 32K is a creative writing/roleplay model from [Sao10k](https://ko-fi.com/sao10k). It was trained at 8K context, then expanded to 32K context.
|
589
590
|
|
590
591
|
Compared to older Stheno version, this model is trained on:
|
@@ -596,10 +597,10 @@ Compared to older Stheno version, this model is trained on:
|
|
596
597
|
"functionCall": false,
|
597
598
|
"id": "sao10k/l3-stheno-8b",
|
598
599
|
"maxTokens": undefined,
|
599
|
-
"tokens": 32000,
|
600
600
|
"vision": false,
|
601
601
|
},
|
602
602
|
{
|
603
|
+
"contextWindowTokens": 256000,
|
603
604
|
"description": "The Jamba-Instruct model, introduced by AI21 Labs, is an instruction-tuned variant of their hybrid SSM-Transformer Jamba model, specifically optimized for enterprise applications.
|
604
605
|
|
605
606
|
- 256K Context Window: It can process extensive information, equivalent to a 400-page novel, which is beneficial for tasks involving large documents such as financial reports or legal documents
|
@@ -613,10 +614,10 @@ Jamba has a knowledge cutoff of February 2024.",
|
|
613
614
|
"functionCall": false,
|
614
615
|
"id": "ai21/jamba-instruct",
|
615
616
|
"maxTokens": 4096,
|
616
|
-
"tokens": 256000,
|
617
617
|
"vision": false,
|
618
618
|
},
|
619
619
|
{
|
620
|
+
"contextWindowTokens": 200000,
|
620
621
|
"description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:
|
621
622
|
|
622
623
|
- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting
|
@@ -630,10 +631,10 @@ Jamba has a knowledge cutoff of February 2024.",
|
|
630
631
|
"functionCall": true,
|
631
632
|
"id": "anthropic/claude-3.5-sonnet",
|
632
633
|
"maxTokens": 8192,
|
633
|
-
"tokens": 200000,
|
634
634
|
"vision": true,
|
635
635
|
},
|
636
636
|
{
|
637
|
+
"contextWindowTokens": 200000,
|
637
638
|
"description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:
|
638
639
|
|
639
640
|
- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting
|
@@ -649,10 +650,10 @@ _This is a faster endpoint, made available in collaboration with Anthropic, that
|
|
649
650
|
"functionCall": true,
|
650
651
|
"id": "anthropic/claude-3.5-sonnet:beta",
|
651
652
|
"maxTokens": 8192,
|
652
|
-
"tokens": 200000,
|
653
653
|
"vision": true,
|
654
654
|
},
|
655
655
|
{
|
656
|
+
"contextWindowTokens": 8192,
|
656
657
|
"description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).
|
657
658
|
|
658
659
|
- Better prompt adherence.
|
@@ -665,10 +666,10 @@ _This is a faster endpoint, made available in collaboration with Anthropic, that
|
|
665
666
|
"functionCall": false,
|
666
667
|
"id": "sao10k/l3-euryale-70b",
|
667
668
|
"maxTokens": undefined,
|
668
|
-
"tokens": 8192,
|
669
669
|
"vision": false,
|
670
670
|
},
|
671
671
|
{
|
672
|
+
"contextWindowTokens": 4000,
|
672
673
|
"description": "Phi-3 4K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
|
673
674
|
|
674
675
|
At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.
|
@@ -679,10 +680,10 @@ For 128k context length, try [Phi-3 Medium 128K](/models/microsoft/phi-3-medium-
|
|
679
680
|
"functionCall": false,
|
680
681
|
"id": "microsoft/phi-3-medium-4k-instruct",
|
681
682
|
"maxTokens": undefined,
|
682
|
-
"tokens": 4000,
|
683
683
|
"vision": false,
|
684
684
|
},
|
685
685
|
{
|
686
|
+
"contextWindowTokens": 65536,
|
686
687
|
"description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/models/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.
|
687
688
|
|
688
689
|
This model is a successor to [Dolphin Mixtral 8x7B](/models/cognitivecomputations/dolphin-mixtral-8x7b).
|
@@ -695,10 +696,10 @@ The model is uncensored and is stripped of alignment and bias. It requires an ex
|
|
695
696
|
"functionCall": false,
|
696
697
|
"id": "cognitivecomputations/dolphin-mixtral-8x22b",
|
697
698
|
"maxTokens": undefined,
|
698
|
-
"tokens": 65536,
|
699
699
|
"vision": false,
|
700
700
|
},
|
701
701
|
{
|
702
|
+
"contextWindowTokens": 32768,
|
702
703
|
"description": "Qwen2 72B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.
|
703
704
|
|
704
705
|
It features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.
|
@@ -711,10 +712,10 @@ Usage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://hug
|
|
711
712
|
"functionCall": false,
|
712
713
|
"id": "qwen/qwen-2-72b-instruct",
|
713
714
|
"maxTokens": undefined,
|
714
|
-
"tokens": 32768,
|
715
715
|
"vision": false,
|
716
716
|
},
|
717
717
|
{
|
718
|
+
"contextWindowTokens": 8192,
|
718
719
|
"description": "OpenChat 8B is a library of open-source language models, fine-tuned with "C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.
|
719
720
|
|
720
721
|
It outperforms many similarly sized models including [Llama 3 8B Instruct](/models/meta-llama/llama-3-8b-instruct) and various fine-tuned models. It excels in general conversation, coding assistance, and mathematical reasoning.
|
@@ -728,20 +729,20 @@ It outperforms many similarly sized models including [Llama 3 8B Instruct](/mode
|
|
728
729
|
"functionCall": false,
|
729
730
|
"id": "openchat/openchat-8b",
|
730
731
|
"maxTokens": undefined,
|
731
|
-
"tokens": 8192,
|
732
732
|
"vision": false,
|
733
733
|
},
|
734
734
|
{
|
735
|
+
"contextWindowTokens": 8192,
|
735
736
|
"description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.",
|
736
737
|
"displayName": "NousResearch: Hermes 2 Pro - Llama-3 8B",
|
737
738
|
"enabled": false,
|
738
739
|
"functionCall": false,
|
739
740
|
"id": "nousresearch/hermes-2-pro-llama-3-8b",
|
740
741
|
"maxTokens": undefined,
|
741
|
-
"tokens": 8192,
|
742
742
|
"vision": false,
|
743
743
|
},
|
744
744
|
{
|
745
|
+
"contextWindowTokens": 32768,
|
745
746
|
"description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
|
746
747
|
|
747
748
|
An improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:
|
@@ -756,10 +757,10 @@ NOTE: Support for function calling depends on the provider.",
|
|
756
757
|
"functionCall": true,
|
757
758
|
"id": "mistralai/mistral-7b-instruct-v0.3",
|
758
759
|
"maxTokens": undefined,
|
759
|
-
"tokens": 32768,
|
760
760
|
"vision": false,
|
761
761
|
},
|
762
762
|
{
|
763
|
+
"contextWindowTokens": 32768,
|
763
764
|
"description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
|
764
765
|
|
765
766
|
*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*
|
@@ -770,10 +771,10 @@ _These are free, rate-limited endpoints for [Mistral 7B Instruct](/models/mistra
|
|
770
771
|
"functionCall": false,
|
771
772
|
"id": "mistralai/mistral-7b-instruct:free",
|
772
773
|
"maxTokens": 4096,
|
773
|
-
"tokens": 32768,
|
774
774
|
"vision": false,
|
775
775
|
},
|
776
776
|
{
|
777
|
+
"contextWindowTokens": 32768,
|
777
778
|
"description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
|
778
779
|
|
779
780
|
*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*",
|
@@ -782,10 +783,10 @@ _These are free, rate-limited endpoints for [Mistral 7B Instruct](/models/mistra
|
|
782
783
|
"functionCall": false,
|
783
784
|
"id": "mistralai/mistral-7b-instruct",
|
784
785
|
"maxTokens": undefined,
|
785
|
-
"tokens": 32768,
|
786
786
|
"vision": false,
|
787
787
|
},
|
788
788
|
{
|
789
|
+
"contextWindowTokens": 32768,
|
789
790
|
"description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
|
790
791
|
|
791
792
|
*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*
|
@@ -796,10 +797,10 @@ _These are higher-throughput endpoints for [Mistral 7B Instruct](/models/mistral
|
|
796
797
|
"functionCall": false,
|
797
798
|
"id": "mistralai/mistral-7b-instruct:nitro",
|
798
799
|
"maxTokens": undefined,
|
799
|
-
"tokens": 32768,
|
800
800
|
"vision": false,
|
801
801
|
},
|
802
802
|
{
|
803
|
+
"contextWindowTokens": 128000,
|
803
804
|
"description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
|
804
805
|
|
805
806
|
At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.
|
@@ -810,10 +811,10 @@ _These are free, rate-limited endpoints for [Phi-3 Mini 128K Instruct](/models/m
|
|
810
811
|
"functionCall": false,
|
811
812
|
"id": "microsoft/phi-3-mini-128k-instruct:free",
|
812
813
|
"maxTokens": 4096,
|
813
|
-
"tokens": 128000,
|
814
814
|
"vision": false,
|
815
815
|
},
|
816
816
|
{
|
817
|
+
"contextWindowTokens": 128000,
|
817
818
|
"description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
|
818
819
|
|
819
820
|
At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.",
|
@@ -822,10 +823,10 @@ At time of release, Phi-3 Medium demonstrated state-of-the-art performance among
|
|
822
823
|
"functionCall": false,
|
823
824
|
"id": "microsoft/phi-3-mini-128k-instruct",
|
824
825
|
"maxTokens": undefined,
|
825
|
-
"tokens": 128000,
|
826
826
|
"vision": false,
|
827
827
|
},
|
828
828
|
{
|
829
|
+
"contextWindowTokens": 128000,
|
829
830
|
"description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
|
830
831
|
|
831
832
|
At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.
|
@@ -838,10 +839,10 @@ _These are free, rate-limited endpoints for [Phi-3 Medium 128K Instruct](/models
|
|
838
839
|
"functionCall": false,
|
839
840
|
"id": "microsoft/phi-3-medium-128k-instruct:free",
|
840
841
|
"maxTokens": 4096,
|
841
|
-
"tokens": 128000,
|
842
842
|
"vision": false,
|
843
843
|
},
|
844
844
|
{
|
845
|
+
"contextWindowTokens": 128000,
|
845
846
|
"description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
|
846
847
|
|
847
848
|
At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.
|
@@ -852,10 +853,10 @@ For 4k context length, try [Phi-3 Medium 4K](/models/microsoft/phi-3-medium-4k-i
|
|
852
853
|
"functionCall": false,
|
853
854
|
"id": "microsoft/phi-3-medium-128k-instruct",
|
854
855
|
"maxTokens": undefined,
|
855
|
-
"tokens": 128000,
|
856
856
|
"vision": false,
|
857
857
|
},
|
858
858
|
{
|
859
|
+
"contextWindowTokens": 8192,
|
859
860
|
"description": "The NeverSleep team is back, with a Llama 3 70B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.
|
860
861
|
|
861
862
|
To enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.
|
@@ -866,10 +867,10 @@ Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.m
|
|
866
867
|
"functionCall": false,
|
867
868
|
"id": "neversleep/llama-3-lumimaid-70b",
|
868
869
|
"maxTokens": 2048,
|
869
|
-
"tokens": 8192,
|
870
870
|
"vision": false,
|
871
871
|
},
|
872
872
|
{
|
873
|
+
"contextWindowTokens": 4000000,
|
873
874
|
"description": "Gemini 1.5 Flash is a foundation model that performs well at a variety of multimodal tasks such as visual understanding, classification, summarization, and creating content from image, audio and video. It's adept at processing visual and text inputs such as photographs, documents, infographics, and screenshots.
|
874
875
|
|
875
876
|
Gemini 1.5 Flash is designed for high-volume, high-frequency tasks where cost and latency matter. On most common tasks, Flash achieves comparable quality to other Gemini Pro models at a significantly reduced cost. Flash is well-suited for applications like chat assistants and on-demand content generation where speed and scale matter.
|
@@ -882,10 +883,10 @@ Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.d
|
|
882
883
|
"functionCall": false,
|
883
884
|
"id": "google/gemini-flash-1.5",
|
884
885
|
"maxTokens": 32768,
|
885
|
-
"tokens": 4000000,
|
886
886
|
"vision": true,
|
887
887
|
},
|
888
888
|
{
|
889
|
+
"contextWindowTokens": 128000,
|
889
890
|
"description": "DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model. It is further pre-trained from an intermediate checkpoint of DeepSeek-V2 with additional 6 trillion tokens.
|
890
891
|
|
891
892
|
The original V1 model was trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese. It was pre-trained on project-level code corpus by employing a extra fill-in-the-blank task.",
|
@@ -894,10 +895,10 @@ The original V1 model was trained from scratch on 2T tokens, with a composition
|
|
894
895
|
"functionCall": false,
|
895
896
|
"id": "deepseek/deepseek-coder",
|
896
897
|
"maxTokens": 4096,
|
897
|
-
"tokens": 128000,
|
898
898
|
"vision": false,
|
899
899
|
},
|
900
900
|
{
|
901
|
+
"contextWindowTokens": 128000,
|
901
902
|
"description": "DeepSeek-V2 Chat is a conversational finetune of DeepSeek-V2, a Mixture-of-Experts (MoE) language model. It comprises 236B total parameters, of which 21B are activated for each token.
|
902
903
|
|
903
904
|
Compared with DeepSeek 67B, DeepSeek-V2 achieves stronger performance, and meanwhile saves 42.5% of training costs, reduces the KV cache by 93.3%, and boosts the maximum generation throughput to 5.76 times.
|
@@ -908,10 +909,10 @@ DeepSeek-V2 achieves remarkable performance on both standard benchmarks and open
|
|
908
909
|
"functionCall": false,
|
909
910
|
"id": "deepseek/deepseek-chat",
|
910
911
|
"maxTokens": 4096,
|
911
|
-
"tokens": 128000,
|
912
912
|
"vision": false,
|
913
913
|
},
|
914
914
|
{
|
915
|
+
"contextWindowTokens": 28000,
|
915
916
|
"description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
|
916
917
|
|
917
918
|
This is the online version of the [offline chat model](/models/perplexity/llama-3-sonar-large-32k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
|
@@ -920,10 +921,10 @@ This is the online version of the [offline chat model](/models/perplexity/llama-
|
|
920
921
|
"functionCall": false,
|
921
922
|
"id": "perplexity/llama-3-sonar-large-32k-online",
|
922
923
|
"maxTokens": undefined,
|
923
|
-
"tokens": 28000,
|
924
924
|
"vision": false,
|
925
925
|
},
|
926
926
|
{
|
927
|
+
"contextWindowTokens": 32768,
|
927
928
|
"description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
|
928
929
|
|
929
930
|
This is a normal offline LLM, but the [online version](/models/perplexity/llama-3-sonar-large-32k-online) of this model has Internet access.",
|
@@ -932,10 +933,10 @@ This is a normal offline LLM, but the [online version](/models/perplexity/llama-
|
|
932
933
|
"functionCall": false,
|
933
934
|
"id": "perplexity/llama-3-sonar-large-32k-chat",
|
934
935
|
"maxTokens": undefined,
|
935
|
-
"tokens": 32768,
|
936
936
|
"vision": false,
|
937
937
|
},
|
938
938
|
{
|
939
|
+
"contextWindowTokens": 28000,
|
939
940
|
"description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
|
940
941
|
|
941
942
|
This is the online version of the [offline chat model](/models/perplexity/llama-3-sonar-small-32k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
|
@@ -944,10 +945,10 @@ This is the online version of the [offline chat model](/models/perplexity/llama-
|
|
944
945
|
"functionCall": false,
|
945
946
|
"id": "perplexity/llama-3-sonar-small-32k-online",
|
946
947
|
"maxTokens": undefined,
|
947
|
-
"tokens": 28000,
|
948
948
|
"vision": false,
|
949
949
|
},
|
950
950
|
{
|
951
|
+
"contextWindowTokens": 32768,
|
951
952
|
"description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
|
952
953
|
|
953
954
|
This is a normal offline LLM, but the [online version](/models/perplexity/llama-3-sonar-small-32k-online) of this model has Internet access.",
|
@@ -956,10 +957,10 @@ This is a normal offline LLM, but the [online version](/models/perplexity/llama-
|
|
956
957
|
"functionCall": false,
|
957
958
|
"id": "perplexity/llama-3-sonar-small-32k-chat",
|
958
959
|
"maxTokens": undefined,
|
959
|
-
"tokens": 32768,
|
960
960
|
"vision": false,
|
961
961
|
},
|
962
962
|
{
|
963
|
+
"contextWindowTokens": 8192,
|
963
964
|
"description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.
|
964
965
|
|
965
966
|
LlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.
|
@@ -974,10 +975,10 @@ To read more about the model release, [click here](https://ai.meta.com/blog/meta
|
|
974
975
|
"functionCall": false,
|
975
976
|
"id": "meta-llama/llama-guard-2-8b",
|
976
977
|
"maxTokens": undefined,
|
977
|
-
"tokens": 8192,
|
978
978
|
"vision": false,
|
979
979
|
},
|
980
980
|
{
|
981
|
+
"contextWindowTokens": 128000,
|
981
982
|
"description": "GPT-4o ("o" for "omni") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.
|
982
983
|
|
983
984
|
For benchmarking against other models, it was briefly called ["im-also-a-good-gpt2-chatbot"](https://twitter.com/LiamFedus/status/1790064963966370209)",
|
@@ -986,10 +987,10 @@ For benchmarking against other models, it was briefly called ["im-also-a-good-gp
|
|
986
987
|
"functionCall": false,
|
987
988
|
"id": "openai/gpt-4o-2024-05-13",
|
988
989
|
"maxTokens": 4096,
|
989
|
-
"tokens": 128000,
|
990
990
|
"vision": false,
|
991
991
|
},
|
992
992
|
{
|
993
|
+
"contextWindowTokens": 128000,
|
993
994
|
"description": "GPT-4o ("o" for "omni") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.
|
994
995
|
|
995
996
|
For benchmarking against other models, it was briefly called ["im-also-a-good-gpt2-chatbot"](https://twitter.com/LiamFedus/status/1790064963966370209)",
|
@@ -998,10 +999,10 @@ For benchmarking against other models, it was briefly called ["im-also-a-good-gp
|
|
998
999
|
"functionCall": false,
|
999
1000
|
"id": "openai/gpt-4o",
|
1000
1001
|
"maxTokens": 4096,
|
1001
|
-
"tokens": 128000,
|
1002
1002
|
"vision": false,
|
1003
1003
|
},
|
1004
1004
|
{
|
1005
|
+
"contextWindowTokens": 128000,
|
1005
1006
|
"description": "GPT-4o Extended is an experimental variant of GPT-4o with an extended max output tokens. This model supports only text input to text output.
|
1006
1007
|
|
1007
1008
|
_These are extended-context endpoints for [GPT-4o](/models/openai/gpt-4o). They may have higher prices._",
|
@@ -1010,10 +1011,10 @@ _These are extended-context endpoints for [GPT-4o](/models/openai/gpt-4o). They
|
|
1010
1011
|
"functionCall": false,
|
1011
1012
|
"id": "openai/gpt-4o:extended",
|
1012
1013
|
"maxTokens": 64000,
|
1013
|
-
"tokens": 128000,
|
1014
1014
|
"vision": false,
|
1015
1015
|
},
|
1016
1016
|
{
|
1017
|
+
"contextWindowTokens": 32768,
|
1017
1018
|
"description": "Qwen1.5 72B is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:
|
1018
1019
|
|
1019
1020
|
- Significant performance improvement in human preference for chat models
|
@@ -1028,10 +1029,10 @@ Usage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://hug
|
|
1028
1029
|
"functionCall": false,
|
1029
1030
|
"id": "qwen/qwen-72b-chat",
|
1030
1031
|
"maxTokens": undefined,
|
1031
|
-
"tokens": 32768,
|
1032
1032
|
"vision": false,
|
1033
1033
|
},
|
1034
1034
|
{
|
1035
|
+
"contextWindowTokens": 32768,
|
1035
1036
|
"description": "Qwen1.5 110B is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:
|
1036
1037
|
|
1037
1038
|
- Significant performance improvement in human preference for chat models
|
@@ -1046,10 +1047,10 @@ Usage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://hug
|
|
1046
1047
|
"functionCall": false,
|
1047
1048
|
"id": "qwen/qwen-110b-chat",
|
1048
1049
|
"maxTokens": undefined,
|
1049
|
-
"tokens": 32768,
|
1050
1050
|
"vision": false,
|
1051
1051
|
},
|
1052
1052
|
{
|
1053
|
+
"contextWindowTokens": 24576,
|
1053
1054
|
"description": "The NeverSleep team is back, with a Llama 3 8B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.
|
1054
1055
|
|
1055
1056
|
To enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.
|
@@ -1060,10 +1061,10 @@ Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.m
|
|
1060
1061
|
"functionCall": false,
|
1061
1062
|
"id": "neversleep/llama-3-lumimaid-8b",
|
1062
1063
|
"maxTokens": undefined,
|
1063
|
-
"tokens": 24576,
|
1064
1064
|
"vision": false,
|
1065
1065
|
},
|
1066
1066
|
{
|
1067
|
+
"contextWindowTokens": 24576,
|
1067
1068
|
"description": "The NeverSleep team is back, with a Llama 3 8B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.
|
1068
1069
|
|
1069
1070
|
To enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.
|
@@ -1076,10 +1077,10 @@ _These are extended-context endpoints for [Llama 3 Lumimaid 8B](/models/neversle
|
|
1076
1077
|
"functionCall": false,
|
1077
1078
|
"id": "neversleep/llama-3-lumimaid-8b:extended",
|
1078
1079
|
"maxTokens": 2048,
|
1079
|
-
"tokens": 24576,
|
1080
1080
|
"vision": false,
|
1081
1081
|
},
|
1082
1082
|
{
|
1083
|
+
"contextWindowTokens": 8192,
|
1083
1084
|
"description": "Creative writing model, routed with permission. It's fast, it keeps the conversation going, and it stays in character.
|
1084
1085
|
|
1085
1086
|
If you submit a raw prompt, you can use Alpaca or Vicuna formats.",
|
@@ -1088,10 +1089,10 @@ If you submit a raw prompt, you can use Alpaca or Vicuna formats.",
|
|
1088
1089
|
"functionCall": false,
|
1089
1090
|
"id": "sao10k/fimbulvetr-11b-v2",
|
1090
1091
|
"maxTokens": 2048,
|
1091
|
-
"tokens": 8192,
|
1092
1092
|
"vision": false,
|
1093
1093
|
},
|
1094
1094
|
{
|
1095
|
+
"contextWindowTokens": 8192,
|
1095
1096
|
"description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.
|
1096
1097
|
|
1097
1098
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -1102,10 +1103,10 @@ To read more about the model release, [click here](https://ai.meta.com/blog/meta
|
|
1102
1103
|
"functionCall": false,
|
1103
1104
|
"id": "meta-llama/llama-3-70b-instruct",
|
1104
1105
|
"maxTokens": undefined,
|
1105
|
-
"tokens": 8192,
|
1106
1106
|
"vision": false,
|
1107
1107
|
},
|
1108
1108
|
{
|
1109
|
+
"contextWindowTokens": 8192,
|
1109
1110
|
"description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.
|
1110
1111
|
|
1111
1112
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -1118,10 +1119,10 @@ _These are higher-throughput endpoints for [Llama 3 70B Instruct](/models/meta-l
|
|
1118
1119
|
"functionCall": false,
|
1119
1120
|
"id": "meta-llama/llama-3-70b-instruct:nitro",
|
1120
1121
|
"maxTokens": undefined,
|
1121
|
-
"tokens": 8192,
|
1122
1122
|
"vision": false,
|
1123
1123
|
},
|
1124
1124
|
{
|
1125
|
+
"contextWindowTokens": 8192,
|
1125
1126
|
"description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.
|
1126
1127
|
|
1127
1128
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -1134,10 +1135,10 @@ _These are free, rate-limited endpoints for [Llama 3 8B Instruct](/models/meta-l
|
|
1134
1135
|
"functionCall": false,
|
1135
1136
|
"id": "meta-llama/llama-3-8b-instruct:free",
|
1136
1137
|
"maxTokens": 4096,
|
1137
|
-
"tokens": 8192,
|
1138
1138
|
"vision": false,
|
1139
1139
|
},
|
1140
1140
|
{
|
1141
|
+
"contextWindowTokens": 8192,
|
1141
1142
|
"description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.
|
1142
1143
|
|
1143
1144
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -1148,10 +1149,10 @@ To read more about the model release, [click here](https://ai.meta.com/blog/meta
|
|
1148
1149
|
"functionCall": false,
|
1149
1150
|
"id": "meta-llama/llama-3-8b-instruct",
|
1150
1151
|
"maxTokens": undefined,
|
1151
|
-
"tokens": 8192,
|
1152
1152
|
"vision": false,
|
1153
1153
|
},
|
1154
1154
|
{
|
1155
|
+
"contextWindowTokens": 8192,
|
1155
1156
|
"description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.
|
1156
1157
|
|
1157
1158
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -1164,10 +1165,10 @@ _These are higher-throughput endpoints for [Llama 3 8B Instruct](/models/meta-ll
|
|
1164
1165
|
"functionCall": false,
|
1165
1166
|
"id": "meta-llama/llama-3-8b-instruct:nitro",
|
1166
1167
|
"maxTokens": undefined,
|
1167
|
-
"tokens": 8192,
|
1168
1168
|
"vision": false,
|
1169
1169
|
},
|
1170
1170
|
{
|
1171
|
+
"contextWindowTokens": 16384,
|
1171
1172
|
"description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.
|
1172
1173
|
|
1173
1174
|
It has demonstrated strong performance compared to leading closed-source models in human evaluations.
|
@@ -1180,10 +1181,10 @@ _These are extended-context endpoints for [Llama 3 8B Instruct](/models/meta-lla
|
|
1180
1181
|
"functionCall": false,
|
1181
1182
|
"id": "meta-llama/llama-3-8b-instruct:extended",
|
1182
1183
|
"maxTokens": 2048,
|
1183
|
-
"tokens": 16384,
|
1184
1184
|
"vision": false,
|
1185
1185
|
},
|
1186
1186
|
{
|
1187
|
+
"contextWindowTokens": 65536,
|
1187
1188
|
"description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:
|
1188
1189
|
- strong math, coding, and reasoning
|
1189
1190
|
- large context length (64k)
|
@@ -1196,10 +1197,10 @@ See benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral
|
|
1196
1197
|
"functionCall": false,
|
1197
1198
|
"id": "mistralai/mixtral-8x22b-instruct",
|
1198
1199
|
"maxTokens": undefined,
|
1199
|
-
"tokens": 65536,
|
1200
1200
|
"vision": false,
|
1201
1201
|
},
|
1202
1202
|
{
|
1203
|
+
"contextWindowTokens": 32000,
|
1203
1204
|
"description": "WizardLM-2 7B is the smaller variant of Microsoft AI's latest Wizard model. It is the fastest and achieves comparable performance with existing 10x larger opensource leading models
|
1204
1205
|
|
1205
1206
|
It is a finetune of [Mistral 7B Instruct](/models/mistralai/mistral-7b-instruct), using the same technique as [WizardLM-2 8x22B](/models/microsoft/wizardlm-2-8x22b).
|
@@ -1212,10 +1213,10 @@ To read more about the model release, [click here](https://wizardlm.github.io/Wi
|
|
1212
1213
|
"functionCall": false,
|
1213
1214
|
"id": "microsoft/wizardlm-2-7b",
|
1214
1215
|
"maxTokens": undefined,
|
1215
|
-
"tokens": 32000,
|
1216
1216
|
"vision": false,
|
1217
1217
|
},
|
1218
1218
|
{
|
1219
|
+
"contextWindowTokens": 65536,
|
1219
1220
|
"description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.
|
1220
1221
|
|
1221
1222
|
It is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).
|
@@ -1228,10 +1229,10 @@ To read more about the model release, [click here](https://wizardlm.github.io/Wi
|
|
1228
1229
|
"functionCall": false,
|
1229
1230
|
"id": "microsoft/wizardlm-2-8x22b",
|
1230
1231
|
"maxTokens": undefined,
|
1231
|
-
"tokens": 65536,
|
1232
1232
|
"vision": false,
|
1233
1233
|
},
|
1234
1234
|
{
|
1235
|
+
"contextWindowTokens": 4000000,
|
1235
1236
|
"description": "Google's latest multimodal model, supporting image and video in text or chat prompts.
|
1236
1237
|
|
1237
1238
|
Optimized for language tasks including:
|
@@ -1253,10 +1254,10 @@ Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.d
|
|
1253
1254
|
"functionCall": false,
|
1254
1255
|
"id": "google/gemini-pro-1.5",
|
1255
1256
|
"maxTokens": 32768,
|
1256
|
-
"tokens": 4000000,
|
1257
1257
|
"vision": true,
|
1258
1258
|
},
|
1259
1259
|
{
|
1260
|
+
"contextWindowTokens": 128000,
|
1260
1261
|
"description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.
|
1261
1262
|
|
1262
1263
|
Training data: up to December 2023.",
|
@@ -1265,10 +1266,10 @@ Training data: up to December 2023.",
|
|
1265
1266
|
"functionCall": true,
|
1266
1267
|
"id": "openai/gpt-4-turbo",
|
1267
1268
|
"maxTokens": 4096,
|
1268
|
-
"tokens": 128000,
|
1269
1269
|
"vision": true,
|
1270
1270
|
},
|
1271
1271
|
{
|
1272
|
+
"contextWindowTokens": 128000,
|
1272
1273
|
"description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).
|
1273
1274
|
|
1274
1275
|
It offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).
|
@@ -1279,10 +1280,10 @@ Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.co
|
|
1279
1280
|
"functionCall": false,
|
1280
1281
|
"id": "cohere/command-r-plus",
|
1281
1282
|
"maxTokens": 4000,
|
1282
|
-
"tokens": 128000,
|
1283
1283
|
"vision": false,
|
1284
1284
|
},
|
1285
1285
|
{
|
1286
|
+
"contextWindowTokens": 32768,
|
1286
1287
|
"description": "DBRX is a new open source large language model developed by Databricks. At 132B, it outperforms existing open source LLMs like Llama 2 70B and [Mixtral-8x7b](/models/mistralai/mixtral-8x7b) on standard industry benchmarks for language understanding, programming, math, and logic.
|
1287
1288
|
|
1288
1289
|
It uses a fine-grained mixture-of-experts (MoE) architecture. 36B parameters are active on any input. It was pre-trained on 12T tokens of text and code data. Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts.
|
@@ -1295,10 +1296,10 @@ See the launch announcement and benchmark results [here](https://www.databricks.
|
|
1295
1296
|
"functionCall": false,
|
1296
1297
|
"id": "databricks/dbrx-instruct",
|
1297
1298
|
"maxTokens": undefined,
|
1298
|
-
"tokens": 32768,
|
1299
1299
|
"vision": false,
|
1300
1300
|
},
|
1301
1301
|
{
|
1302
|
+
"contextWindowTokens": 4096,
|
1302
1303
|
"description": "A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.
|
1303
1304
|
|
1304
1305
|
Descending from earlier versions of Midnight Rose and [Wizard Tulu Dolphin 70B](https://huggingface.co/sophosympatheia/Wizard-Tulu-Dolphin-70B-v1.0), it inherits the best qualities of each.",
|
@@ -1307,10 +1308,10 @@ Descending from earlier versions of Midnight Rose and [Wizard Tulu Dolphin 70B](
|
|
1307
1308
|
"functionCall": false,
|
1308
1309
|
"id": "sophosympatheia/midnight-rose-70b",
|
1309
1310
|
"maxTokens": undefined,
|
1310
|
-
"tokens": 4096,
|
1311
1311
|
"vision": false,
|
1312
1312
|
},
|
1313
1313
|
{
|
1314
|
+
"contextWindowTokens": 128000,
|
1314
1315
|
"description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.
|
1315
1316
|
|
1316
1317
|
Read the launch post [here](https://txt.cohere.com/command-r/).
|
@@ -1321,10 +1322,10 @@ Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.co
|
|
1321
1322
|
"functionCall": false,
|
1322
1323
|
"id": "cohere/command-r",
|
1323
1324
|
"maxTokens": 4000,
|
1324
|
-
"tokens": 128000,
|
1325
1325
|
"vision": false,
|
1326
1326
|
},
|
1327
1327
|
{
|
1328
|
+
"contextWindowTokens": 4096,
|
1328
1329
|
"description": "Command is an instruction-following conversational model that performs language tasks with high quality, more reliably and with a longer context than our base generative models.
|
1329
1330
|
|
1330
1331
|
Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
|
@@ -1333,10 +1334,10 @@ Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.co
|
|
1333
1334
|
"functionCall": false,
|
1334
1335
|
"id": "cohere/command",
|
1335
1336
|
"maxTokens": 4000,
|
1336
|
-
"tokens": 4096,
|
1337
1337
|
"vision": false,
|
1338
1338
|
},
|
1339
1339
|
{
|
1340
|
+
"contextWindowTokens": 200000,
|
1340
1341
|
"description": "Claude 3 Haiku is Anthropic's fastest and most compact model for
|
1341
1342
|
near-instant responsiveness. Quick and accurate targeted performance.
|
1342
1343
|
|
@@ -1348,10 +1349,10 @@ See the launch announcement and benchmark results [here](https://www.anthropic.c
|
|
1348
1349
|
"functionCall": false,
|
1349
1350
|
"id": "anthropic/claude-3-haiku",
|
1350
1351
|
"maxTokens": 4096,
|
1351
|
-
"tokens": 200000,
|
1352
1352
|
"vision": true,
|
1353
1353
|
},
|
1354
1354
|
{
|
1355
|
+
"contextWindowTokens": 200000,
|
1355
1356
|
"description": "Claude 3 Haiku is Anthropic's fastest and most compact model for
|
1356
1357
|
near-instant responsiveness. Quick and accurate targeted performance.
|
1357
1358
|
|
@@ -1365,10 +1366,10 @@ _This is a faster endpoint, made available in collaboration with Anthropic, that
|
|
1365
1366
|
"functionCall": false,
|
1366
1367
|
"id": "anthropic/claude-3-haiku:beta",
|
1367
1368
|
"maxTokens": 4096,
|
1368
|
-
"tokens": 200000,
|
1369
1369
|
"vision": true,
|
1370
1370
|
},
|
1371
1371
|
{
|
1372
|
+
"contextWindowTokens": 200000,
|
1372
1373
|
"description": "Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.
|
1373
1374
|
|
1374
1375
|
See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
|
@@ -1379,10 +1380,10 @@ See the launch announcement and benchmark results [here](https://www.anthropic.c
|
|
1379
1380
|
"functionCall": false,
|
1380
1381
|
"id": "anthropic/claude-3-sonnet",
|
1381
1382
|
"maxTokens": 4096,
|
1382
|
-
"tokens": 200000,
|
1383
1383
|
"vision": true,
|
1384
1384
|
},
|
1385
1385
|
{
|
1386
|
+
"contextWindowTokens": 200000,
|
1386
1387
|
"description": "Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.
|
1387
1388
|
|
1388
1389
|
See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
|
@@ -1395,10 +1396,10 @@ _This is a faster endpoint, made available in collaboration with Anthropic, that
|
|
1395
1396
|
"functionCall": false,
|
1396
1397
|
"id": "anthropic/claude-3-sonnet:beta",
|
1397
1398
|
"maxTokens": 4096,
|
1398
|
-
"tokens": 200000,
|
1399
1399
|
"vision": true,
|
1400
1400
|
},
|
1401
1401
|
{
|
1402
|
+
"contextWindowTokens": 200000,
|
1402
1403
|
"description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.
|
1403
1404
|
|
1404
1405
|
See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
|
@@ -1409,10 +1410,10 @@ See the launch announcement and benchmark results [here](https://www.anthropic.c
|
|
1409
1410
|
"functionCall": false,
|
1410
1411
|
"id": "anthropic/claude-3-opus",
|
1411
1412
|
"maxTokens": 4096,
|
1412
|
-
"tokens": 200000,
|
1413
1413
|
"vision": true,
|
1414
1414
|
},
|
1415
1415
|
{
|
1416
|
+
"contextWindowTokens": 200000,
|
1416
1417
|
"description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.
|
1417
1418
|
|
1418
1419
|
See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
|
@@ -1425,10 +1426,10 @@ _This is a faster endpoint, made available in collaboration with Anthropic, that
|
|
1425
1426
|
"functionCall": false,
|
1426
1427
|
"id": "anthropic/claude-3-opus:beta",
|
1427
1428
|
"maxTokens": 4096,
|
1428
|
-
"tokens": 200000,
|
1429
1429
|
"vision": true,
|
1430
1430
|
},
|
1431
1431
|
{
|
1432
|
+
"contextWindowTokens": 128000,
|
1432
1433
|
"description": "This is Mistral AI's flagship model, Mistral Large 2 (version \`mistral-large-2407\`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).
|
1433
1434
|
|
1434
1435
|
It is fluent in English, French, Spanish, German, and Italian, with high grammatical accuracy, and its long context window allows precise information recall from large documents.",
|
@@ -1437,10 +1438,10 @@ It is fluent in English, French, Spanish, German, and Italian, with high grammat
|
|
1437
1438
|
"functionCall": false,
|
1438
1439
|
"id": "mistralai/mistral-large",
|
1439
1440
|
"maxTokens": undefined,
|
1440
|
-
"tokens": 128000,
|
1441
1441
|
"vision": false,
|
1442
1442
|
},
|
1443
1443
|
{
|
1444
|
+
"contextWindowTokens": 128000,
|
1444
1445
|
"description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.
|
1445
1446
|
|
1446
1447
|
**Note:** heavily rate limited by OpenAI while in preview.",
|
@@ -1449,10 +1450,10 @@ It is fluent in English, French, Spanish, German, and Italian, with high grammat
|
|
1449
1450
|
"functionCall": true,
|
1450
1451
|
"id": "openai/gpt-4-turbo-preview",
|
1451
1452
|
"maxTokens": 4096,
|
1452
|
-
"tokens": 128000,
|
1453
1453
|
"vision": false,
|
1454
1454
|
},
|
1455
1455
|
{
|
1456
|
+
"contextWindowTokens": 4095,
|
1456
1457
|
"description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.
|
1457
1458
|
|
1458
1459
|
Training data up to Sep 2021.",
|
@@ -1461,10 +1462,10 @@ Training data up to Sep 2021.",
|
|
1461
1462
|
"functionCall": false,
|
1462
1463
|
"id": "openai/gpt-3.5-turbo-0613",
|
1463
1464
|
"maxTokens": 4096,
|
1464
|
-
"tokens": 4095,
|
1465
1465
|
"vision": false,
|
1466
1466
|
},
|
1467
1467
|
{
|
1468
|
+
"contextWindowTokens": 32768,
|
1468
1469
|
"description": "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the [Mixtral 8x7B MoE LLM](/models/mistralai/mixtral-8x7b).
|
1469
1470
|
|
1470
1471
|
The model was trained on over 1,000,000 entries of primarily [GPT-4](/models/openai/gpt-4) generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.
|
@@ -1475,20 +1476,20 @@ The model was trained on over 1,000,000 entries of primarily [GPT-4](/models/ope
|
|
1475
1476
|
"functionCall": false,
|
1476
1477
|
"id": "nousresearch/nous-hermes-2-mixtral-8x7b-dpo",
|
1477
1478
|
"maxTokens": undefined,
|
1478
|
-
"tokens": 32768,
|
1479
1479
|
"vision": false,
|
1480
1480
|
},
|
1481
1481
|
{
|
1482
|
+
"contextWindowTokens": 32000,
|
1482
1483
|
"description": "This is Mistral AI's closed-source, medium-sided model. It's powered by a closed-source prototype and excels at reasoning, code, JSON, chat, and more. In benchmarks, it compares with many of the flagship models of other companies.",
|
1483
1484
|
"displayName": "Mistral Medium",
|
1484
1485
|
"enabled": false,
|
1485
1486
|
"functionCall": false,
|
1486
1487
|
"id": "mistralai/mistral-medium",
|
1487
1488
|
"maxTokens": undefined,
|
1488
|
-
"tokens": 32000,
|
1489
1489
|
"vision": false,
|
1490
1490
|
},
|
1491
1491
|
{
|
1492
|
+
"contextWindowTokens": 32000,
|
1492
1493
|
"description": "This model is currently powered by Mixtral-8X7B-v0.1, a sparse mixture of experts model with 12B active parameters. It has better reasoning, exhibits more capabilities, can produce and reason about code, and is multiligual, supporting English, French, German, Italian, and Spanish.
|
1493
1494
|
#moe",
|
1494
1495
|
"displayName": "Mistral Small",
|
@@ -1496,30 +1497,30 @@ The model was trained on over 1,000,000 entries of primarily [GPT-4](/models/ope
|
|
1496
1497
|
"functionCall": false,
|
1497
1498
|
"id": "mistralai/mistral-small",
|
1498
1499
|
"maxTokens": undefined,
|
1499
|
-
"tokens": 32000,
|
1500
1500
|
"vision": false,
|
1501
1501
|
},
|
1502
1502
|
{
|
1503
|
+
"contextWindowTokens": 32000,
|
1503
1504
|
"description": "This model is currently powered by Mistral-7B-v0.2, and incorporates a "better" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.",
|
1504
1505
|
"displayName": "Mistral Tiny",
|
1505
1506
|
"enabled": false,
|
1506
1507
|
"functionCall": false,
|
1507
1508
|
"id": "mistralai/mistral-tiny",
|
1508
1509
|
"maxTokens": undefined,
|
1509
|
-
"tokens": 32000,
|
1510
1510
|
"vision": false,
|
1511
1511
|
},
|
1512
1512
|
{
|
1513
|
+
"contextWindowTokens": 4096,
|
1513
1514
|
"description": "A 75/25 merge of [Chronos 13b v2](https://huggingface.co/elinas/chronos-13b-v2) and [Nous Hermes Llama2 13b](/models/nousresearch/nous-hermes-llama2-13b). This offers the imaginative writing style of Chronos while retaining coherency. Outputs are long and use exceptional prose. #merge",
|
1514
1515
|
"displayName": "Chronos Hermes 13B v2",
|
1515
1516
|
"enabled": false,
|
1516
1517
|
"functionCall": false,
|
1517
1518
|
"id": "austism/chronos-hermes-13b",
|
1518
1519
|
"maxTokens": undefined,
|
1519
|
-
"tokens": 4096,
|
1520
1520
|
"vision": false,
|
1521
1521
|
},
|
1522
1522
|
{
|
1523
|
+
"contextWindowTokens": 4096,
|
1523
1524
|
"description": "Nous Hermes 2 Yi 34B was trained on 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape.
|
1524
1525
|
|
1525
1526
|
Nous-Hermes 2 on Yi 34B outperforms all Nous-Hermes & Open-Hermes models of the past, achieving new heights in all benchmarks for a Nous Research LLM as well as surpassing many popular finetunes.",
|
@@ -1528,10 +1529,10 @@ Nous-Hermes 2 on Yi 34B outperforms all Nous-Hermes & Open-Hermes models of the
|
|
1528
1529
|
"functionCall": false,
|
1529
1530
|
"id": "nousresearch/nous-hermes-yi-34b",
|
1530
1531
|
"maxTokens": undefined,
|
1531
|
-
"tokens": 4096,
|
1532
1532
|
"vision": false,
|
1533
1533
|
},
|
1534
1534
|
{
|
1535
|
+
"contextWindowTokens": 32768,
|
1535
1536
|
"description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
|
1536
1537
|
|
1537
1538
|
An improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:
|
@@ -1544,10 +1545,10 @@ An improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruc
|
|
1544
1545
|
"functionCall": false,
|
1545
1546
|
"id": "mistralai/mistral-7b-instruct-v0.2",
|
1546
1547
|
"maxTokens": undefined,
|
1547
|
-
"tokens": 32768,
|
1548
1548
|
"vision": false,
|
1549
1549
|
},
|
1550
1550
|
{
|
1551
|
+
"contextWindowTokens": 32768,
|
1551
1552
|
"description": "This is a 16k context fine-tune of [Mixtral-8x7b](/models/mistralai/mixtral-8x7b). It excels in coding tasks due to extensive training with coding data and is known for its obedience, although it lacks DPO tuning.
|
1552
1553
|
|
1553
1554
|
The model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).
|
@@ -1558,10 +1559,10 @@ The model is uncensored and is stripped of alignment and bias. It requires an ex
|
|
1558
1559
|
"functionCall": false,
|
1559
1560
|
"id": "cognitivecomputations/dolphin-mixtral-8x7b",
|
1560
1561
|
"maxTokens": undefined,
|
1561
|
-
"tokens": 32768,
|
1562
1562
|
"vision": false,
|
1563
1563
|
},
|
1564
1564
|
{
|
1565
|
+
"contextWindowTokens": 65536,
|
1565
1566
|
"description": "Google's flagship multimodal model, supporting image and video in text or chat prompts for a text or code response.
|
1566
1567
|
|
1567
1568
|
See the benchmarks and prompting guidelines from [Deepmind](https://deepmind.google/technologies/gemini/).
|
@@ -1574,10 +1575,10 @@ Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.d
|
|
1574
1575
|
"functionCall": false,
|
1575
1576
|
"id": "google/gemini-pro-vision",
|
1576
1577
|
"maxTokens": 8192,
|
1577
|
-
"tokens": 65536,
|
1578
1578
|
"vision": true,
|
1579
1579
|
},
|
1580
1580
|
{
|
1581
|
+
"contextWindowTokens": 131040,
|
1581
1582
|
"description": "Google's flagship text generation model. Designed to handle natural language tasks, multiturn text and code chat, and code generation.
|
1582
1583
|
|
1583
1584
|
See the benchmarks and prompting guidelines from [Deepmind](https://deepmind.google/technologies/gemini/).
|
@@ -1588,10 +1589,10 @@ Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.d
|
|
1588
1589
|
"functionCall": false,
|
1589
1590
|
"id": "google/gemini-pro",
|
1590
1591
|
"maxTokens": 32768,
|
1591
|
-
"tokens": 131040,
|
1592
1592
|
"vision": false,
|
1593
1593
|
},
|
1594
1594
|
{
|
1595
|
+
"contextWindowTokens": 32768,
|
1595
1596
|
"description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.
|
1596
1597
|
|
1597
1598
|
Instruct model fine-tuned by Mistral. #moe",
|
@@ -1600,10 +1601,10 @@ Instruct model fine-tuned by Mistral. #moe",
|
|
1600
1601
|
"functionCall": false,
|
1601
1602
|
"id": "mistralai/mixtral-8x7b-instruct",
|
1602
1603
|
"maxTokens": undefined,
|
1603
|
-
"tokens": 32768,
|
1604
1604
|
"vision": false,
|
1605
1605
|
},
|
1606
1606
|
{
|
1607
|
+
"contextWindowTokens": 32768,
|
1607
1608
|
"description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.
|
1608
1609
|
|
1609
1610
|
Instruct model fine-tuned by Mistral. #moe
|
@@ -1614,10 +1615,10 @@ _These are higher-throughput endpoints for [Mixtral 8x7B Instruct](/models/mistr
|
|
1614
1615
|
"functionCall": false,
|
1615
1616
|
"id": "mistralai/mixtral-8x7b-instruct:nitro",
|
1616
1617
|
"maxTokens": undefined,
|
1617
|
-
"tokens": 32768,
|
1618
1618
|
"vision": false,
|
1619
1619
|
},
|
1620
1620
|
{
|
1621
|
+
"contextWindowTokens": 32768,
|
1621
1622
|
"description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI. Incorporates 8 experts (feed-forward networks) for a total of 47B parameters. Base model (not fine-tuned for instructions) - see [Mixtral 8x7B Instruct](/models/mistralai/mixtral-8x7b-instruct) for an instruct-tuned model.
|
1622
1623
|
|
1623
1624
|
#moe",
|
@@ -1626,10 +1627,10 @@ _These are higher-throughput endpoints for [Mixtral 8x7B Instruct](/models/mistr
|
|
1626
1627
|
"functionCall": false,
|
1627
1628
|
"id": "mistralai/mixtral-8x7b",
|
1628
1629
|
"maxTokens": undefined,
|
1629
|
-
"tokens": 32768,
|
1630
1630
|
"vision": false,
|
1631
1631
|
},
|
1632
1632
|
{
|
1633
|
+
"contextWindowTokens": 32768,
|
1633
1634
|
"description": "This is the chat model variant of the [StripedHyena series](/models?q=stripedhyena) developed by Together in collaboration with Nous Research.
|
1634
1635
|
|
1635
1636
|
StripedHyena uses a new architecture that competes with traditional Transformers, particularly in long-context data processing. It combines attention mechanisms with gated convolutions for improved speed, efficiency, and scaling. This model marks a significant advancement in AI architecture for sequence modeling tasks.",
|
@@ -1638,10 +1639,10 @@ StripedHyena uses a new architecture that competes with traditional Transformers
|
|
1638
1639
|
"functionCall": false,
|
1639
1640
|
"id": "togethercomputer/stripedhyena-nous-7b",
|
1640
1641
|
"maxTokens": undefined,
|
1641
|
-
"tokens": 32768,
|
1642
1642
|
"vision": false,
|
1643
1643
|
},
|
1644
1644
|
{
|
1645
|
+
"contextWindowTokens": 32768,
|
1645
1646
|
"description": "From the creator of [MythoMax](/models/gryphe/mythomax-l2-13b), merges a suite of models to reduce word anticipation, ministrations, and other undesirable words in ChatGPT roleplaying data.
|
1646
1647
|
|
1647
1648
|
It combines [Neural Chat 7B](/models/intel/neural-chat-7b), Airoboros 7b, [Toppy M 7B](/models/undi95/toppy-m-7b), [Zepher 7b beta](/models/huggingfaceh4/zephyr-7b-beta), [Nous Capybara 34B](/models/nousresearch/nous-capybara-34b), [OpenHeremes 2.5](/models/teknium/openhermes-2.5-mistral-7b), and many others.
|
@@ -1654,10 +1655,10 @@ _These are free, rate-limited endpoints for [MythoMist 7B](/models/gryphe/mythom
|
|
1654
1655
|
"functionCall": false,
|
1655
1656
|
"id": "gryphe/mythomist-7b:free",
|
1656
1657
|
"maxTokens": 4096,
|
1657
|
-
"tokens": 32768,
|
1658
1658
|
"vision": false,
|
1659
1659
|
},
|
1660
1660
|
{
|
1661
|
+
"contextWindowTokens": 32768,
|
1661
1662
|
"description": "From the creator of [MythoMax](/models/gryphe/mythomax-l2-13b), merges a suite of models to reduce word anticipation, ministrations, and other undesirable words in ChatGPT roleplaying data.
|
1662
1663
|
|
1663
1664
|
It combines [Neural Chat 7B](/models/intel/neural-chat-7b), Airoboros 7b, [Toppy M 7B](/models/undi95/toppy-m-7b), [Zepher 7b beta](/models/huggingfaceh4/zephyr-7b-beta), [Nous Capybara 34B](/models/nousresearch/nous-capybara-34b), [OpenHeremes 2.5](/models/teknium/openhermes-2.5-mistral-7b), and many others.
|
@@ -1668,10 +1669,10 @@ It combines [Neural Chat 7B](/models/intel/neural-chat-7b), Airoboros 7b, [Toppy
|
|
1668
1669
|
"functionCall": false,
|
1669
1670
|
"id": "gryphe/mythomist-7b",
|
1670
1671
|
"maxTokens": 2048,
|
1671
|
-
"tokens": 32768,
|
1672
1672
|
"vision": false,
|
1673
1673
|
},
|
1674
1674
|
{
|
1675
|
+
"contextWindowTokens": 8192,
|
1675
1676
|
"description": "OpenChat 7B is a library of open-source language models, fine-tuned with "C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.
|
1676
1677
|
|
1677
1678
|
- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).
|
@@ -1685,10 +1686,10 @@ _These are free, rate-limited endpoints for [OpenChat 3.5 7B](/models/openchat/o
|
|
1685
1686
|
"functionCall": false,
|
1686
1687
|
"id": "openchat/openchat-7b:free",
|
1687
1688
|
"maxTokens": 4096,
|
1688
|
-
"tokens": 8192,
|
1689
1689
|
"vision": false,
|
1690
1690
|
},
|
1691
1691
|
{
|
1692
|
+
"contextWindowTokens": 8192,
|
1692
1693
|
"description": "OpenChat 7B is a library of open-source language models, fine-tuned with "C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.
|
1693
1694
|
|
1694
1695
|
- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).
|
@@ -1700,10 +1701,10 @@ _These are free, rate-limited endpoints for [OpenChat 3.5 7B](/models/openchat/o
|
|
1700
1701
|
"functionCall": false,
|
1701
1702
|
"id": "openchat/openchat-7b",
|
1702
1703
|
"maxTokens": undefined,
|
1703
|
-
"tokens": 8192,
|
1704
1704
|
"vision": false,
|
1705
1705
|
},
|
1706
1706
|
{
|
1707
|
+
"contextWindowTokens": 8192,
|
1707
1708
|
"description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.
|
1708
1709
|
|
1709
1710
|
#merge #uncensored",
|
@@ -1712,30 +1713,30 @@ _These are free, rate-limited endpoints for [OpenChat 3.5 7B](/models/openchat/o
|
|
1712
1713
|
"functionCall": false,
|
1713
1714
|
"id": "neversleep/noromaid-20b",
|
1714
1715
|
"maxTokens": 2048,
|
1715
|
-
"tokens": 8192,
|
1716
1716
|
"vision": false,
|
1717
1717
|
},
|
1718
1718
|
{
|
1719
|
+
"contextWindowTokens": 100000,
|
1719
1720
|
"description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
|
1720
1721
|
"displayName": "Anthropic: Claude Instant v1.1",
|
1721
1722
|
"enabled": false,
|
1722
1723
|
"functionCall": false,
|
1723
1724
|
"id": "anthropic/claude-instant-1.1",
|
1724
1725
|
"maxTokens": 2048,
|
1725
|
-
"tokens": 100000,
|
1726
1726
|
"vision": false,
|
1727
1727
|
},
|
1728
1728
|
{
|
1729
|
+
"contextWindowTokens": 200000,
|
1729
1730
|
"description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.",
|
1730
1731
|
"displayName": "Anthropic: Claude v2.1",
|
1731
1732
|
"enabled": false,
|
1732
1733
|
"functionCall": false,
|
1733
1734
|
"id": "anthropic/claude-2.1",
|
1734
1735
|
"maxTokens": 4096,
|
1735
|
-
"tokens": 200000,
|
1736
1736
|
"vision": false,
|
1737
1737
|
},
|
1738
1738
|
{
|
1739
|
+
"contextWindowTokens": 200000,
|
1739
1740
|
"description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.
|
1740
1741
|
|
1741
1742
|
_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-2.1) variant._",
|
@@ -1744,20 +1745,20 @@ _This is a faster endpoint, made available in collaboration with Anthropic, that
|
|
1744
1745
|
"functionCall": false,
|
1745
1746
|
"id": "anthropic/claude-2.1:beta",
|
1746
1747
|
"maxTokens": 4096,
|
1747
|
-
"tokens": 200000,
|
1748
1748
|
"vision": false,
|
1749
1749
|
},
|
1750
1750
|
{
|
1751
|
+
"contextWindowTokens": 200000,
|
1751
1752
|
"description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.",
|
1752
1753
|
"displayName": "Anthropic: Claude v2",
|
1753
1754
|
"enabled": false,
|
1754
1755
|
"functionCall": false,
|
1755
1756
|
"id": "anthropic/claude-2",
|
1756
1757
|
"maxTokens": 4096,
|
1757
|
-
"tokens": 200000,
|
1758
1758
|
"vision": false,
|
1759
1759
|
},
|
1760
1760
|
{
|
1761
|
+
"contextWindowTokens": 200000,
|
1761
1762
|
"description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.
|
1762
1763
|
|
1763
1764
|
_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-2) variant._",
|
@@ -1766,10 +1767,10 @@ _This is a faster endpoint, made available in collaboration with Anthropic, that
|
|
1766
1767
|
"functionCall": false,
|
1767
1768
|
"id": "anthropic/claude-2:beta",
|
1768
1769
|
"maxTokens": 4096,
|
1769
|
-
"tokens": 200000,
|
1770
1770
|
"vision": false,
|
1771
1771
|
},
|
1772
1772
|
{
|
1773
|
+
"contextWindowTokens": 4096,
|
1773
1774
|
"description": "A continuation of [OpenHermes 2 model](/models/teknium/openhermes-2-mistral-7b), trained on additional code datasets.
|
1774
1775
|
Potentially the most interesting finding from training on a good ratio (est. of around 7-14% of the total dataset) of code instruction was that it has boosted several non-code benchmarks, including TruthfulQA, AGIEval, and GPT4All suite. It did however reduce BigBench benchmark score, but the net gain overall is significant.",
|
1775
1776
|
"displayName": "OpenHermes 2.5 Mistral 7B",
|
@@ -1777,10 +1778,10 @@ Potentially the most interesting finding from training on a good ratio (est. of
|
|
1777
1778
|
"functionCall": false,
|
1778
1779
|
"id": "teknium/openhermes-2.5-mistral-7b",
|
1779
1780
|
"maxTokens": undefined,
|
1780
|
-
"tokens": 4096,
|
1781
1781
|
"vision": false,
|
1782
1782
|
},
|
1783
1783
|
{
|
1784
|
+
"contextWindowTokens": 128000,
|
1784
1785
|
"description": "Ability to understand images, in addition to all other [GPT-4 Turbo capabilties](/models/openai/gpt-4-turbo). Training data: up to Apr 2023.
|
1785
1786
|
|
1786
1787
|
**Note:** heavily rate limited by OpenAI while in preview.
|
@@ -1791,10 +1792,10 @@ Potentially the most interesting finding from training on a good ratio (est. of
|
|
1791
1792
|
"functionCall": false,
|
1792
1793
|
"id": "openai/gpt-4-vision-preview",
|
1793
1794
|
"maxTokens": 4096,
|
1794
|
-
"tokens": 128000,
|
1795
1795
|
"vision": true,
|
1796
1796
|
},
|
1797
1797
|
{
|
1798
|
+
"contextWindowTokens": 4096,
|
1798
1799
|
"description": "A Mythomax/MLewd_13B-style merge of selected 70B models.
|
1799
1800
|
A multi-model merge of several LLaMA2 70B finetunes for roleplaying and creative work. The goal was to create a model that combines creativity with intelligence for an enhanced experience.
|
1800
1801
|
|
@@ -1804,10 +1805,10 @@ A multi-model merge of several LLaMA2 70B finetunes for roleplaying and creative
|
|
1804
1805
|
"functionCall": false,
|
1805
1806
|
"id": "lizpreciatior/lzlv-70b-fp16-hf",
|
1806
1807
|
"maxTokens": undefined,
|
1807
|
-
"tokens": 4096,
|
1808
1808
|
"vision": false,
|
1809
1809
|
},
|
1810
1810
|
{
|
1811
|
+
"contextWindowTokens": 6144,
|
1811
1812
|
"description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.
|
1812
1813
|
|
1813
1814
|
Credits to
|
@@ -1820,10 +1821,10 @@ Credits to
|
|
1820
1821
|
"functionCall": false,
|
1821
1822
|
"id": "alpindale/goliath-120b",
|
1822
1823
|
"maxTokens": 400,
|
1823
|
-
"tokens": 6144,
|
1824
1824
|
"vision": false,
|
1825
1825
|
},
|
1826
1826
|
{
|
1827
|
+
"contextWindowTokens": 4096,
|
1827
1828
|
"description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.
|
1828
1829
|
List of merged models:
|
1829
1830
|
- NousResearch/Nous-Capybara-7B-V1.9
|
@@ -1840,10 +1841,10 @@ _These are free, rate-limited endpoints for [Toppy M 7B](/models/undi95/toppy-m-
|
|
1840
1841
|
"functionCall": false,
|
1841
1842
|
"id": "undi95/toppy-m-7b:free",
|
1842
1843
|
"maxTokens": 2048,
|
1843
|
-
"tokens": 4096,
|
1844
1844
|
"vision": false,
|
1845
1845
|
},
|
1846
1846
|
{
|
1847
|
+
"contextWindowTokens": 4096,
|
1847
1848
|
"description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.
|
1848
1849
|
List of merged models:
|
1849
1850
|
- NousResearch/Nous-Capybara-7B-V1.9
|
@@ -1858,10 +1859,10 @@ List of merged models:
|
|
1858
1859
|
"functionCall": false,
|
1859
1860
|
"id": "undi95/toppy-m-7b",
|
1860
1861
|
"maxTokens": undefined,
|
1861
|
-
"tokens": 4096,
|
1862
1862
|
"vision": false,
|
1863
1863
|
},
|
1864
1864
|
{
|
1865
|
+
"contextWindowTokens": 4096,
|
1865
1866
|
"description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.
|
1866
1867
|
List of merged models:
|
1867
1868
|
- NousResearch/Nous-Capybara-7B-V1.9
|
@@ -1878,10 +1879,10 @@ _These are higher-throughput endpoints for [Toppy M 7B](/models/undi95/toppy-m-7
|
|
1878
1879
|
"functionCall": false,
|
1879
1880
|
"id": "undi95/toppy-m-7b:nitro",
|
1880
1881
|
"maxTokens": undefined,
|
1881
|
-
"tokens": 4096,
|
1882
1882
|
"vision": false,
|
1883
1883
|
},
|
1884
1884
|
{
|
1885
|
+
"contextWindowTokens": 200000,
|
1885
1886
|
"description": "Depending on their size, subject, and complexity, your prompts will be sent to [Llama 3 70B Instruct](/models/meta-llama/llama-3-70b-instruct), [Claude 3.5 Sonnet (self-moderated)](/models/anthropic/claude-3.5-sonnet:beta) or [GPT-4o](/models/openai/gpt-4o). To see which model was used, visit [Activity](/activity).
|
1886
1887
|
|
1887
1888
|
A major redesign of this router is coming soon. Stay tuned on [Discord](https://discord.gg/fVyRaUDgxW) for updates.",
|
@@ -1890,10 +1891,10 @@ A major redesign of this router is coming soon. Stay tuned on [Discord](https://
|
|
1890
1891
|
"functionCall": false,
|
1891
1892
|
"id": "openrouter/auto",
|
1892
1893
|
"maxTokens": undefined,
|
1893
|
-
"tokens": 200000,
|
1894
1894
|
"vision": false,
|
1895
1895
|
},
|
1896
1896
|
{
|
1897
|
+
"contextWindowTokens": 128000,
|
1897
1898
|
"description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.
|
1898
1899
|
|
1899
1900
|
Training data: up to April 2023.",
|
@@ -1902,40 +1903,40 @@ Training data: up to April 2023.",
|
|
1902
1903
|
"functionCall": true,
|
1903
1904
|
"id": "openai/gpt-4-1106-preview",
|
1904
1905
|
"maxTokens": 4096,
|
1905
|
-
"tokens": 128000,
|
1906
1906
|
"vision": true,
|
1907
1907
|
},
|
1908
1908
|
{
|
1909
|
+
"contextWindowTokens": 16385,
|
1909
1910
|
"description": "An older GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.",
|
1910
1911
|
"displayName": "OpenAI: GPT-3.5 Turbo 16k (older v1106)",
|
1911
1912
|
"enabled": false,
|
1912
1913
|
"functionCall": true,
|
1913
1914
|
"id": "openai/gpt-3.5-turbo-1106",
|
1914
1915
|
"maxTokens": 4096,
|
1915
|
-
"tokens": 16385,
|
1916
1916
|
"vision": false,
|
1917
1917
|
},
|
1918
1918
|
{
|
1919
|
+
"contextWindowTokens": 131040,
|
1919
1920
|
"description": "PaLM 2 fine-tuned for chatbot conversations that help with code-related questions.",
|
1920
1921
|
"displayName": "Google: PaLM 2 Code Chat 32k",
|
1921
1922
|
"enabled": false,
|
1922
1923
|
"functionCall": false,
|
1923
1924
|
"id": "google/palm-2-codechat-bison-32k",
|
1924
1925
|
"maxTokens": 32768,
|
1925
|
-
"tokens": 131040,
|
1926
1926
|
"vision": false,
|
1927
1927
|
},
|
1928
1928
|
{
|
1929
|
+
"contextWindowTokens": 131040,
|
1929
1930
|
"description": "PaLM 2 is a language model by Google with improved multilingual, reasoning and coding capabilities.",
|
1930
1931
|
"displayName": "Google: PaLM 2 Chat 32k",
|
1931
1932
|
"enabled": false,
|
1932
1933
|
"functionCall": false,
|
1933
1934
|
"id": "google/palm-2-chat-bison-32k",
|
1934
1935
|
"maxTokens": 32768,
|
1935
|
-
"tokens": 131040,
|
1936
1936
|
"vision": false,
|
1937
1937
|
},
|
1938
1938
|
{
|
1939
|
+
"contextWindowTokens": 4096,
|
1939
1940
|
"description": "A Llama 2 70B fine-tune using synthetic data (the Airoboros dataset).
|
1940
1941
|
|
1941
1942
|
Currently based on [jondurbin/airoboros-l2-70b](https://huggingface.co/jondurbin/airoboros-l2-70b-2.2.1), but might get updated in the future.",
|
@@ -1944,90 +1945,90 @@ Currently based on [jondurbin/airoboros-l2-70b](https://huggingface.co/jondurbin
|
|
1944
1945
|
"functionCall": false,
|
1945
1946
|
"id": "jondurbin/airoboros-l2-70b",
|
1946
1947
|
"maxTokens": undefined,
|
1947
|
-
"tokens": 4096,
|
1948
1948
|
"vision": false,
|
1949
1949
|
},
|
1950
1950
|
{
|
1951
|
+
"contextWindowTokens": 8192,
|
1951
1952
|
"description": "Xwin-LM aims to develop and open-source alignment tech for LLMs. Our first release, built-upon on the [Llama2](/models/\${Model.Llama_2_13B_Chat}) base models, ranked TOP-1 on AlpacaEval. Notably, it's the first to surpass [GPT-4](/models/\${Model.GPT_4}) on this benchmark. The project will be continuously updated.",
|
1952
1953
|
"displayName": "Xwin 70B",
|
1953
1954
|
"enabled": false,
|
1954
1955
|
"functionCall": false,
|
1955
1956
|
"id": "xwin-lm/xwin-lm-70b",
|
1956
1957
|
"maxTokens": 400,
|
1957
|
-
"tokens": 8192,
|
1958
1958
|
"vision": false,
|
1959
1959
|
},
|
1960
1960
|
{
|
1961
|
+
"contextWindowTokens": 4096,
|
1961
1962
|
"description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.",
|
1962
1963
|
"displayName": "Mistral: Mistral 7B Instruct v0.1",
|
1963
1964
|
"enabled": false,
|
1964
1965
|
"functionCall": false,
|
1965
1966
|
"id": "mistralai/mistral-7b-instruct-v0.1",
|
1966
1967
|
"maxTokens": undefined,
|
1967
|
-
"tokens": 4096,
|
1968
1968
|
"vision": false,
|
1969
1969
|
},
|
1970
1970
|
{
|
1971
|
+
"contextWindowTokens": 4095,
|
1971
1972
|
"description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.",
|
1972
1973
|
"displayName": "OpenAI: GPT-3.5 Turbo Instruct",
|
1973
1974
|
"enabled": false,
|
1974
1975
|
"functionCall": false,
|
1975
1976
|
"id": "openai/gpt-3.5-turbo-instruct",
|
1976
1977
|
"maxTokens": 4096,
|
1977
|
-
"tokens": 4095,
|
1978
1978
|
"vision": false,
|
1979
1979
|
},
|
1980
1980
|
{
|
1981
|
+
"contextWindowTokens": 8192,
|
1981
1982
|
"description": "A blend of the new Pygmalion-13b and MythoMax. #merge",
|
1982
1983
|
"displayName": "Pygmalion: Mythalion 13B",
|
1983
1984
|
"enabled": false,
|
1984
1985
|
"functionCall": false,
|
1985
1986
|
"id": "pygmalionai/mythalion-13b",
|
1986
1987
|
"maxTokens": 400,
|
1987
|
-
"tokens": 8192,
|
1988
1988
|
"vision": false,
|
1989
1989
|
},
|
1990
1990
|
{
|
1991
|
+
"contextWindowTokens": 32767,
|
1991
1992
|
"description": "GPT-4-32k is an extended version of GPT-4, with the same capabilities but quadrupled context length, allowing for processing up to 40 pages of text in a single pass. This is particularly beneficial for handling longer content like interacting with PDFs without an external vector database. Training data: up to Sep 2021.",
|
1992
1993
|
"displayName": "OpenAI: GPT-4 32k (older v0314)",
|
1993
1994
|
"enabled": false,
|
1994
1995
|
"functionCall": false,
|
1995
1996
|
"id": "openai/gpt-4-32k-0314",
|
1996
1997
|
"maxTokens": 4096,
|
1997
|
-
"tokens": 32767,
|
1998
1998
|
"vision": false,
|
1999
1999
|
},
|
2000
2000
|
{
|
2001
|
+
"contextWindowTokens": 32767,
|
2001
2002
|
"description": "GPT-4-32k is an extended version of GPT-4, with the same capabilities but quadrupled context length, allowing for processing up to 40 pages of text in a single pass. This is particularly beneficial for handling longer content like interacting with PDFs without an external vector database. Training data: up to Sep 2021.",
|
2002
2003
|
"displayName": "OpenAI: GPT-4 32k",
|
2003
2004
|
"enabled": false,
|
2004
2005
|
"functionCall": false,
|
2005
2006
|
"id": "openai/gpt-4-32k",
|
2006
2007
|
"maxTokens": 4096,
|
2007
|
-
"tokens": 32767,
|
2008
2008
|
"vision": false,
|
2009
2009
|
},
|
2010
2010
|
{
|
2011
|
+
"contextWindowTokens": 16385,
|
2011
2012
|
"description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.",
|
2012
2013
|
"displayName": "OpenAI: GPT-3.5 Turbo 16k",
|
2013
2014
|
"enabled": false,
|
2014
2015
|
"functionCall": false,
|
2015
2016
|
"id": "openai/gpt-3.5-turbo-16k",
|
2016
2017
|
"maxTokens": 4096,
|
2017
|
-
"tokens": 16385,
|
2018
2018
|
"vision": false,
|
2019
2019
|
},
|
2020
2020
|
{
|
2021
|
+
"contextWindowTokens": 4096,
|
2021
2022
|
"description": "A state-of-the-art language model fine-tuned on over 300k instructions by Nous Research, with Teknium and Emozilla leading the fine tuning process.",
|
2022
2023
|
"displayName": "Nous: Hermes 13B",
|
2023
2024
|
"enabled": false,
|
2024
2025
|
"functionCall": false,
|
2025
2026
|
"id": "nousresearch/nous-hermes-llama2-13b",
|
2026
2027
|
"maxTokens": undefined,
|
2027
|
-
"tokens": 4096,
|
2028
2028
|
"vision": false,
|
2029
2029
|
},
|
2030
2030
|
{
|
2031
|
+
"contextWindowTokens": 4096,
|
2031
2032
|
"description": "Zephyr is a series of language models that are trained to act as helpful assistants. Zephyr-7B-β is the second model in the series, and is a fine-tuned version of [mistralai/Mistral-7B-v0.1](/models/mistralai/mistral-7b-instruct-v0.1) that was trained on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO).
|
2032
2033
|
|
2033
2034
|
_These are free, rate-limited endpoints for [Zephyr 7B](/models/huggingfaceh4/zephyr-7b-beta). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
|
@@ -2036,60 +2037,60 @@ _These are free, rate-limited endpoints for [Zephyr 7B](/models/huggingfaceh4/ze
|
|
2036
2037
|
"functionCall": false,
|
2037
2038
|
"id": "huggingfaceh4/zephyr-7b-beta:free",
|
2038
2039
|
"maxTokens": 2048,
|
2039
|
-
"tokens": 4096,
|
2040
2040
|
"vision": false,
|
2041
2041
|
},
|
2042
2042
|
{
|
2043
|
+
"contextWindowTokens": 8000,
|
2043
2044
|
"description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.",
|
2044
2045
|
"displayName": "Mancer: Weaver (alpha)",
|
2045
2046
|
"enabled": false,
|
2046
2047
|
"functionCall": false,
|
2047
2048
|
"id": "mancer/weaver",
|
2048
2049
|
"maxTokens": 1000,
|
2049
|
-
"tokens": 8000,
|
2050
2050
|
"vision": false,
|
2051
2051
|
},
|
2052
2052
|
{
|
2053
|
+
"contextWindowTokens": 100000,
|
2053
2054
|
"description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
|
2054
2055
|
"displayName": "Anthropic: Claude Instant v1.0",
|
2055
2056
|
"enabled": false,
|
2056
2057
|
"functionCall": false,
|
2057
2058
|
"id": "anthropic/claude-instant-1.0",
|
2058
2059
|
"maxTokens": 4096,
|
2059
|
-
"tokens": 100000,
|
2060
2060
|
"vision": false,
|
2061
2061
|
},
|
2062
2062
|
{
|
2063
|
+
"contextWindowTokens": 100000,
|
2063
2064
|
"description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
|
2064
2065
|
"displayName": "Anthropic: Claude v1.2",
|
2065
2066
|
"enabled": false,
|
2066
2067
|
"functionCall": false,
|
2067
2068
|
"id": "anthropic/claude-1.2",
|
2068
2069
|
"maxTokens": 4096,
|
2069
|
-
"tokens": 100000,
|
2070
2070
|
"vision": false,
|
2071
2071
|
},
|
2072
2072
|
{
|
2073
|
+
"contextWindowTokens": 100000,
|
2073
2074
|
"description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
|
2074
2075
|
"displayName": "Anthropic: Claude v1",
|
2075
2076
|
"enabled": false,
|
2076
2077
|
"functionCall": false,
|
2077
2078
|
"id": "anthropic/claude-1",
|
2078
2079
|
"maxTokens": 4096,
|
2079
|
-
"tokens": 100000,
|
2080
2080
|
"vision": false,
|
2081
2081
|
},
|
2082
2082
|
{
|
2083
|
+
"contextWindowTokens": 100000,
|
2083
2084
|
"description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
|
2084
2085
|
"displayName": "Anthropic: Claude Instant v1",
|
2085
2086
|
"enabled": false,
|
2086
2087
|
"functionCall": false,
|
2087
2088
|
"id": "anthropic/claude-instant-1",
|
2088
2089
|
"maxTokens": 4096,
|
2089
|
-
"tokens": 100000,
|
2090
2090
|
"vision": false,
|
2091
2091
|
},
|
2092
2092
|
{
|
2093
|
+
"contextWindowTokens": 100000,
|
2093
2094
|
"description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.
|
2094
2095
|
|
2095
2096
|
_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-instant-1) variant._",
|
@@ -2098,20 +2099,20 @@ _This is a faster endpoint, made available in collaboration with Anthropic, that
|
|
2098
2099
|
"functionCall": false,
|
2099
2100
|
"id": "anthropic/claude-instant-1:beta",
|
2100
2101
|
"maxTokens": 4096,
|
2101
|
-
"tokens": 100000,
|
2102
2102
|
"vision": false,
|
2103
2103
|
},
|
2104
2104
|
{
|
2105
|
+
"contextWindowTokens": 100000,
|
2105
2106
|
"description": "Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.",
|
2106
2107
|
"displayName": "Anthropic: Claude v2.0",
|
2107
2108
|
"enabled": false,
|
2108
2109
|
"functionCall": false,
|
2109
2110
|
"id": "anthropic/claude-2.0",
|
2110
2111
|
"maxTokens": 4096,
|
2111
|
-
"tokens": 100000,
|
2112
2112
|
"vision": false,
|
2113
2113
|
},
|
2114
2114
|
{
|
2115
|
+
"contextWindowTokens": 100000,
|
2115
2116
|
"description": "Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.
|
2116
2117
|
|
2117
2118
|
_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-2.0) variant._",
|
@@ -2120,20 +2121,20 @@ _This is a faster endpoint, made available in collaboration with Anthropic, that
|
|
2120
2121
|
"functionCall": false,
|
2121
2122
|
"id": "anthropic/claude-2.0:beta",
|
2122
2123
|
"maxTokens": 4096,
|
2123
|
-
"tokens": 100000,
|
2124
2124
|
"vision": false,
|
2125
2125
|
},
|
2126
2126
|
{
|
2127
|
+
"contextWindowTokens": 4096,
|
2127
2128
|
"description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge",
|
2128
2129
|
"displayName": "ReMM SLERP 13B",
|
2129
2130
|
"enabled": false,
|
2130
2131
|
"functionCall": false,
|
2131
2132
|
"id": "undi95/remm-slerp-l2-13b",
|
2132
2133
|
"maxTokens": 400,
|
2133
|
-
"tokens": 4096,
|
2134
2134
|
"vision": false,
|
2135
2135
|
},
|
2136
2136
|
{
|
2137
|
+
"contextWindowTokens": 6144,
|
2137
2138
|
"description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge
|
2138
2139
|
|
2139
2140
|
_These are extended-context endpoints for [ReMM SLERP 13B](/models/undi95/remm-slerp-l2-13b). They may have higher prices._",
|
@@ -2142,40 +2143,40 @@ _These are extended-context endpoints for [ReMM SLERP 13B](/models/undi95/remm-s
|
|
2142
2143
|
"functionCall": false,
|
2143
2144
|
"id": "undi95/remm-slerp-l2-13b:extended",
|
2144
2145
|
"maxTokens": 400,
|
2145
|
-
"tokens": 6144,
|
2146
2146
|
"vision": false,
|
2147
2147
|
},
|
2148
2148
|
{
|
2149
|
+
"contextWindowTokens": 28672,
|
2149
2150
|
"description": "PaLM 2 fine-tuned for chatbot conversations that help with code-related questions.",
|
2150
2151
|
"displayName": "Google: PaLM 2 Code Chat",
|
2151
2152
|
"enabled": false,
|
2152
2153
|
"functionCall": false,
|
2153
2154
|
"id": "google/palm-2-codechat-bison",
|
2154
2155
|
"maxTokens": 4096,
|
2155
|
-
"tokens": 28672,
|
2156
2156
|
"vision": false,
|
2157
2157
|
},
|
2158
2158
|
{
|
2159
|
+
"contextWindowTokens": 36864,
|
2159
2160
|
"description": "PaLM 2 is a language model by Google with improved multilingual, reasoning and coding capabilities.",
|
2160
2161
|
"displayName": "Google: PaLM 2 Chat",
|
2161
2162
|
"enabled": false,
|
2162
2163
|
"functionCall": false,
|
2163
2164
|
"id": "google/palm-2-chat-bison",
|
2164
2165
|
"maxTokens": 4096,
|
2165
|
-
"tokens": 36864,
|
2166
2166
|
"vision": false,
|
2167
2167
|
},
|
2168
2168
|
{
|
2169
|
+
"contextWindowTokens": 4096,
|
2169
2170
|
"description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge",
|
2170
2171
|
"displayName": "MythoMax 13B",
|
2171
2172
|
"enabled": false,
|
2172
2173
|
"functionCall": false,
|
2173
2174
|
"id": "gryphe/mythomax-l2-13b",
|
2174
2175
|
"maxTokens": undefined,
|
2175
|
-
"tokens": 4096,
|
2176
2176
|
"vision": false,
|
2177
2177
|
},
|
2178
2178
|
{
|
2179
|
+
"contextWindowTokens": 4096,
|
2179
2180
|
"description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge
|
2180
2181
|
|
2181
2182
|
_These are higher-throughput endpoints for [MythoMax 13B](/models/gryphe/mythomax-l2-13b). They may have higher prices._",
|
@@ -2184,10 +2185,10 @@ _These are higher-throughput endpoints for [MythoMax 13B](/models/gryphe/mythoma
|
|
2184
2185
|
"functionCall": false,
|
2185
2186
|
"id": "gryphe/mythomax-l2-13b:nitro",
|
2186
2187
|
"maxTokens": undefined,
|
2187
|
-
"tokens": 4096,
|
2188
2188
|
"vision": false,
|
2189
2189
|
},
|
2190
2190
|
{
|
2191
|
+
"contextWindowTokens": 8192,
|
2191
2192
|
"description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge
|
2192
2193
|
|
2193
2194
|
_These are extended-context endpoints for [MythoMax 13B](/models/gryphe/mythomax-l2-13b). They may have higher prices._",
|
@@ -2196,40 +2197,40 @@ _These are extended-context endpoints for [MythoMax 13B](/models/gryphe/mythomax
|
|
2196
2197
|
"functionCall": false,
|
2197
2198
|
"id": "gryphe/mythomax-l2-13b:extended",
|
2198
2199
|
"maxTokens": 400,
|
2199
|
-
"tokens": 8192,
|
2200
2200
|
"vision": false,
|
2201
2201
|
},
|
2202
2202
|
{
|
2203
|
+
"contextWindowTokens": 4096,
|
2203
2204
|
"description": "A 13 billion parameter language model from Meta, fine tuned for chat completions",
|
2204
2205
|
"displayName": "Meta: Llama v2 13B Chat",
|
2205
2206
|
"enabled": false,
|
2206
2207
|
"functionCall": false,
|
2207
2208
|
"id": "meta-llama/llama-2-13b-chat",
|
2208
2209
|
"maxTokens": undefined,
|
2209
|
-
"tokens": 4096,
|
2210
2210
|
"vision": false,
|
2211
2211
|
},
|
2212
2212
|
{
|
2213
|
+
"contextWindowTokens": 8191,
|
2213
2214
|
"description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.",
|
2214
2215
|
"displayName": "OpenAI: GPT-4 (older v0314)",
|
2215
2216
|
"enabled": false,
|
2216
2217
|
"functionCall": false,
|
2217
2218
|
"id": "openai/gpt-4-0314",
|
2218
2219
|
"maxTokens": 4096,
|
2219
|
-
"tokens": 8191,
|
2220
2220
|
"vision": false,
|
2221
2221
|
},
|
2222
2222
|
{
|
2223
|
+
"contextWindowTokens": 8191,
|
2223
2224
|
"description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.",
|
2224
2225
|
"displayName": "OpenAI: GPT-4",
|
2225
2226
|
"enabled": false,
|
2226
2227
|
"functionCall": false,
|
2227
2228
|
"id": "openai/gpt-4",
|
2228
2229
|
"maxTokens": 4096,
|
2229
|
-
"tokens": 8191,
|
2230
2230
|
"vision": true,
|
2231
2231
|
},
|
2232
2232
|
{
|
2233
|
+
"contextWindowTokens": 4095,
|
2233
2234
|
"description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.
|
2234
2235
|
|
2235
2236
|
Training data up to Sep 2021.",
|
@@ -2238,10 +2239,10 @@ Training data up to Sep 2021.",
|
|
2238
2239
|
"functionCall": false,
|
2239
2240
|
"id": "openai/gpt-3.5-turbo-0301",
|
2240
2241
|
"maxTokens": 4096,
|
2241
|
-
"tokens": 4095,
|
2242
2242
|
"vision": false,
|
2243
2243
|
},
|
2244
2244
|
{
|
2245
|
+
"contextWindowTokens": 16385,
|
2245
2246
|
"description": "The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.
|
2246
2247
|
|
2247
2248
|
This version has a higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls.",
|
@@ -2250,10 +2251,10 @@ This version has a higher accuracy at responding in requested formats and a fix
|
|
2250
2251
|
"functionCall": true,
|
2251
2252
|
"id": "openai/gpt-3.5-turbo-0125",
|
2252
2253
|
"maxTokens": 4096,
|
2253
|
-
"tokens": 16385,
|
2254
2254
|
"vision": false,
|
2255
2255
|
},
|
2256
2256
|
{
|
2257
|
+
"contextWindowTokens": 16385,
|
2257
2258
|
"description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.
|
2258
2259
|
|
2259
2260
|
Training data up to Sep 2021.",
|
@@ -2262,7 +2263,6 @@ Training data up to Sep 2021.",
|
|
2262
2263
|
"functionCall": false,
|
2263
2264
|
"id": "openai/gpt-3.5-turbo",
|
2264
2265
|
"maxTokens": 4096,
|
2265
|
-
"tokens": 16385,
|
2266
2266
|
"vision": false,
|
2267
2267
|
},
|
2268
2268
|
]
|