@lobehub/chat 1.51.7 → 1.51.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/README.ja-JP.md +8 -8
- package/README.md +8 -8
- package/README.zh-CN.md +8 -8
- package/changelog/v1.json +9 -0
- package/package.json +1 -1
- package/src/app/(backend)/webapi/chat/models/[provider]/route.ts +1 -1
- package/src/libs/agent-runtime/ai360/index.ts +8 -1
- package/src/libs/agent-runtime/anthropic/index.ts +2 -1
- package/src/libs/agent-runtime/baichuan/index.ts +1 -1
- package/src/libs/agent-runtime/cloudflare/index.test.ts +0 -117
- package/src/libs/agent-runtime/cloudflare/index.ts +32 -11
- package/src/libs/agent-runtime/deepseek/index.ts +4 -1
- package/src/libs/agent-runtime/fireworksai/index.ts +8 -1
- package/src/libs/agent-runtime/giteeai/index.ts +9 -1
- package/src/libs/agent-runtime/github/index.test.ts +5 -16
- package/src/libs/agent-runtime/github/index.ts +31 -33
- package/src/libs/agent-runtime/google/index.ts +2 -1
- package/src/libs/agent-runtime/groq/index.ts +7 -1
- package/src/libs/agent-runtime/higress/index.ts +2 -1
- package/src/libs/agent-runtime/huggingface/index.ts +10 -1
- package/src/libs/agent-runtime/hunyuan/index.ts +3 -1
- package/src/libs/agent-runtime/internlm/index.ts +3 -1
- package/src/libs/agent-runtime/mistral/index.ts +2 -1
- package/src/libs/agent-runtime/moonshot/index.ts +3 -1
- package/src/libs/agent-runtime/novita/__snapshots__/index.test.ts.snap +48 -12
- package/src/libs/agent-runtime/novita/index.ts +9 -1
- package/src/libs/agent-runtime/openai/__snapshots__/index.test.ts.snap +70 -66
- package/src/libs/agent-runtime/openai/index.ts +37 -0
- package/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap +172 -4
- package/src/libs/agent-runtime/openrouter/index.ts +17 -2
- package/src/libs/agent-runtime/qwen/index.ts +10 -1
- package/src/libs/agent-runtime/sensenova/index.ts +3 -1
- package/src/libs/agent-runtime/siliconcloud/index.ts +10 -1
- package/src/libs/agent-runtime/stepfun/index.ts +3 -1
- package/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap +1309 -5
- package/src/libs/agent-runtime/togetherai/index.test.ts +0 -13
- package/src/libs/agent-runtime/togetherai/index.ts +25 -20
- package/src/libs/agent-runtime/utils/cloudflareHelpers.test.ts +0 -99
- package/src/libs/agent-runtime/utils/cloudflareHelpers.ts +0 -70
- package/src/libs/agent-runtime/xai/index.ts +3 -1
- package/src/libs/agent-runtime/zeroone/index.ts +3 -1
- package/src/libs/agent-runtime/zhipu/index.ts +3 -1
@@ -3,884 +3,2188 @@
|
|
3
3
|
exports[`LobeTogetherAI > models > should get models 1`] = `
|
4
4
|
[
|
5
5
|
{
|
6
|
+
"contextWindowTokens": undefined,
|
6
7
|
"description": "This model is a 75/25 merge of Chronos (13B) and Nous Hermes (13B) models resulting in having a great ability to produce evocative storywriting and follow a narrative.",
|
7
8
|
"displayName": "Chronos Hermes (13B)",
|
8
9
|
"enabled": false,
|
9
10
|
"functionCall": false,
|
10
11
|
"id": "Austism/chronos-hermes-13b",
|
11
12
|
"maxOutput": 2048,
|
13
|
+
"reasoning": false,
|
12
14
|
"tokens": 2048,
|
13
15
|
"vision": false,
|
14
16
|
},
|
15
17
|
{
|
18
|
+
"contextWindowTokens": undefined,
|
19
|
+
"description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding",
|
20
|
+
"displayName": "BAAI-Bge-Base-1p5",
|
21
|
+
"enabled": false,
|
22
|
+
"functionCall": false,
|
23
|
+
"id": "BAAI/bge-base-en-v1.5",
|
24
|
+
"maxOutput": undefined,
|
25
|
+
"reasoning": false,
|
26
|
+
"tokens": undefined,
|
27
|
+
"vision": false,
|
28
|
+
},
|
29
|
+
{
|
30
|
+
"contextWindowTokens": undefined,
|
31
|
+
"description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding",
|
32
|
+
"displayName": "BAAI-Bge-Large-1p5",
|
33
|
+
"enabled": false,
|
34
|
+
"functionCall": false,
|
35
|
+
"id": "BAAI/bge-large-en-v1.5",
|
36
|
+
"maxOutput": undefined,
|
37
|
+
"reasoning": false,
|
38
|
+
"tokens": undefined,
|
39
|
+
"vision": false,
|
40
|
+
},
|
41
|
+
{
|
42
|
+
"contextWindowTokens": 4096,
|
16
43
|
"description": "MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique. The main difference with MythoMix is that I allowed more of Huginn to intermingle with the single tensors located at the front and end of a model",
|
17
44
|
"displayName": "MythoMax-L2 (13B)",
|
18
45
|
"enabled": false,
|
19
46
|
"functionCall": false,
|
20
47
|
"id": "Gryphe/MythoMax-L2-13b",
|
21
48
|
"maxOutput": 4096,
|
49
|
+
"reasoning": false,
|
50
|
+
"tokens": 4096,
|
51
|
+
"vision": false,
|
52
|
+
},
|
53
|
+
{
|
54
|
+
"contextWindowTokens": undefined,
|
55
|
+
"description": "Llama Guard: LLM-based Input-Output Safeguard for Human-AI Conversations",
|
56
|
+
"displayName": "Llama Guard (7B)",
|
57
|
+
"enabled": false,
|
58
|
+
"functionCall": false,
|
59
|
+
"id": "Meta-Llama/Llama-Guard-7b",
|
60
|
+
"maxOutput": 4096,
|
61
|
+
"reasoning": false,
|
22
62
|
"tokens": 4096,
|
23
63
|
"vision": false,
|
24
64
|
},
|
25
65
|
{
|
66
|
+
"contextWindowTokens": undefined,
|
67
|
+
"description": "NexusRaven is an open-source and commercially viable function calling LLM that surpasses the state-of-the-art in function calling capabilities.",
|
68
|
+
"displayName": "NexusRaven (13B)",
|
69
|
+
"enabled": false,
|
70
|
+
"functionCall": true,
|
71
|
+
"id": "Nexusflow/NexusRaven-V2-13B",
|
72
|
+
"maxOutput": 16384,
|
73
|
+
"reasoning": false,
|
74
|
+
"tokens": 16384,
|
75
|
+
"vision": false,
|
76
|
+
},
|
77
|
+
{
|
78
|
+
"contextWindowTokens": undefined,
|
26
79
|
"description": "first Nous collection of dataset and models made by fine-tuning mostly on data created by Nous in-house",
|
27
80
|
"displayName": "Nous Capybara v1.9 (7B)",
|
28
81
|
"enabled": false,
|
29
82
|
"functionCall": false,
|
30
83
|
"id": "NousResearch/Nous-Capybara-7B-V1p9",
|
31
84
|
"maxOutput": 8192,
|
85
|
+
"reasoning": false,
|
32
86
|
"tokens": 8192,
|
33
87
|
"vision": false,
|
34
88
|
},
|
35
89
|
{
|
90
|
+
"contextWindowTokens": undefined,
|
36
91
|
"description": "Nous Hermes 2 on Mistral 7B DPO is the new flagship 7B Hermes! This model was DPO'd from Teknium/OpenHermes-2.5-Mistral-7B and has improved across the board on all benchmarks tested - AGIEval, BigBench Reasoning, GPT4All, and TruthfulQA.",
|
37
92
|
"displayName": "Nous Hermes 2 - Mistral DPO (7B)",
|
38
93
|
"enabled": false,
|
39
94
|
"functionCall": false,
|
40
95
|
"id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
|
41
96
|
"maxOutput": 32768,
|
97
|
+
"reasoning": false,
|
42
98
|
"tokens": 32768,
|
43
99
|
"vision": false,
|
44
100
|
},
|
45
101
|
{
|
102
|
+
"contextWindowTokens": 32768,
|
46
103
|
"description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
|
47
104
|
"displayName": "Nous Hermes 2 - Mixtral 8x7B-DPO ",
|
48
105
|
"enabled": false,
|
49
106
|
"functionCall": false,
|
50
107
|
"id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
|
51
108
|
"maxOutput": 32768,
|
109
|
+
"reasoning": false,
|
52
110
|
"tokens": 32768,
|
53
111
|
"vision": false,
|
54
112
|
},
|
55
113
|
{
|
114
|
+
"contextWindowTokens": undefined,
|
56
115
|
"description": "Nous Hermes 2 Mixtral 7bx8 SFT is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
|
57
116
|
"displayName": "Nous Hermes 2 - Mixtral 8x7B-SFT",
|
58
117
|
"enabled": false,
|
59
118
|
"functionCall": false,
|
60
119
|
"id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
|
61
120
|
"maxOutput": 32768,
|
121
|
+
"reasoning": false,
|
62
122
|
"tokens": 32768,
|
63
123
|
"vision": false,
|
64
124
|
},
|
65
125
|
{
|
126
|
+
"contextWindowTokens": undefined,
|
66
127
|
"description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune",
|
67
128
|
"displayName": "Nous Hermes-2 Yi (34B)",
|
68
129
|
"enabled": false,
|
69
130
|
"functionCall": false,
|
70
131
|
"id": "NousResearch/Nous-Hermes-2-Yi-34B",
|
71
132
|
"maxOutput": 4096,
|
133
|
+
"reasoning": false,
|
72
134
|
"tokens": 4096,
|
73
135
|
"vision": false,
|
74
136
|
},
|
75
137
|
{
|
138
|
+
"contextWindowTokens": undefined,
|
76
139
|
"description": "Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
|
77
140
|
"displayName": "Nous Hermes Llama-2 (13B)",
|
78
141
|
"enabled": false,
|
79
142
|
"functionCall": false,
|
80
143
|
"id": "NousResearch/Nous-Hermes-Llama2-13b",
|
81
144
|
"maxOutput": 4096,
|
145
|
+
"reasoning": false,
|
82
146
|
"tokens": 4096,
|
83
147
|
"vision": false,
|
84
148
|
},
|
85
149
|
{
|
150
|
+
"contextWindowTokens": undefined,
|
86
151
|
"description": "Nous-Hermes-Llama2-7b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
|
87
152
|
"displayName": "Nous Hermes LLaMA-2 (7B)",
|
88
153
|
"enabled": false,
|
89
154
|
"functionCall": false,
|
90
155
|
"id": "NousResearch/Nous-Hermes-llama-2-7b",
|
91
156
|
"maxOutput": 4096,
|
157
|
+
"reasoning": false,
|
92
158
|
"tokens": 4096,
|
93
159
|
"vision": false,
|
94
160
|
},
|
95
161
|
{
|
162
|
+
"contextWindowTokens": undefined,
|
96
163
|
"description": "An OpenOrca dataset fine-tune on top of Mistral 7B by the OpenOrca team.",
|
97
164
|
"displayName": "OpenOrca Mistral (7B) 8K",
|
98
165
|
"enabled": false,
|
99
166
|
"functionCall": false,
|
100
167
|
"id": "Open-Orca/Mistral-7B-OpenOrca",
|
101
168
|
"maxOutput": 8192,
|
169
|
+
"reasoning": false,
|
102
170
|
"tokens": 8192,
|
103
171
|
"vision": false,
|
104
172
|
},
|
105
173
|
{
|
174
|
+
"contextWindowTokens": undefined,
|
175
|
+
"description": "Phind-CodeLlama-34B-v1 trained on additional 1.5B tokens high-quality programming-related data proficient in Python, C/C++, TypeScript, Java, and more.",
|
176
|
+
"displayName": "Phind Code LLaMA v2 (34B)",
|
177
|
+
"enabled": false,
|
178
|
+
"functionCall": false,
|
179
|
+
"id": "Phind/Phind-CodeLlama-34B-v2",
|
180
|
+
"maxOutput": 16384,
|
181
|
+
"reasoning": false,
|
182
|
+
"tokens": 16384,
|
183
|
+
"vision": false,
|
184
|
+
},
|
185
|
+
{
|
186
|
+
"contextWindowTokens": undefined,
|
106
187
|
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
107
188
|
"displayName": "Qwen 1.5 Chat (0.5B)",
|
108
189
|
"enabled": false,
|
109
190
|
"functionCall": false,
|
110
191
|
"id": "Qwen/Qwen1.5-0.5B-Chat",
|
111
192
|
"maxOutput": 32768,
|
193
|
+
"reasoning": false,
|
194
|
+
"tokens": 32768,
|
195
|
+
"vision": false,
|
196
|
+
},
|
197
|
+
{
|
198
|
+
"contextWindowTokens": undefined,
|
199
|
+
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
200
|
+
"displayName": "Qwen 1.5 (0.5B)",
|
201
|
+
"enabled": false,
|
202
|
+
"functionCall": false,
|
203
|
+
"id": "Qwen/Qwen1.5-0.5B",
|
204
|
+
"maxOutput": 32768,
|
205
|
+
"reasoning": false,
|
112
206
|
"tokens": 32768,
|
113
207
|
"vision": false,
|
114
208
|
},
|
115
209
|
{
|
210
|
+
"contextWindowTokens": undefined,
|
116
211
|
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
117
212
|
"displayName": "Qwen 1.5 Chat (1.8B)",
|
118
213
|
"enabled": false,
|
119
214
|
"functionCall": false,
|
120
215
|
"id": "Qwen/Qwen1.5-1.8B-Chat",
|
121
216
|
"maxOutput": 32768,
|
217
|
+
"reasoning": false,
|
218
|
+
"tokens": 32768,
|
219
|
+
"vision": false,
|
220
|
+
},
|
221
|
+
{
|
222
|
+
"contextWindowTokens": undefined,
|
223
|
+
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
224
|
+
"displayName": "Qwen 1.5 (1.8B)",
|
225
|
+
"enabled": false,
|
226
|
+
"functionCall": false,
|
227
|
+
"id": "Qwen/Qwen1.5-1.8B",
|
228
|
+
"maxOutput": 32768,
|
229
|
+
"reasoning": false,
|
122
230
|
"tokens": 32768,
|
123
231
|
"vision": false,
|
124
232
|
},
|
125
233
|
{
|
234
|
+
"contextWindowTokens": undefined,
|
126
235
|
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
127
236
|
"displayName": "Qwen 1.5 Chat (110B)",
|
128
237
|
"enabled": false,
|
129
238
|
"functionCall": false,
|
130
239
|
"id": "Qwen/Qwen1.5-110B-Chat",
|
131
240
|
"maxOutput": 32768,
|
241
|
+
"reasoning": false,
|
132
242
|
"tokens": 32768,
|
133
243
|
"vision": false,
|
134
244
|
},
|
135
245
|
{
|
246
|
+
"contextWindowTokens": undefined,
|
136
247
|
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
137
248
|
"displayName": "Qwen 1.5 Chat (14B)",
|
138
249
|
"enabled": false,
|
139
250
|
"functionCall": false,
|
140
251
|
"id": "Qwen/Qwen1.5-14B-Chat",
|
141
252
|
"maxOutput": 32768,
|
253
|
+
"reasoning": false,
|
254
|
+
"tokens": 32768,
|
255
|
+
"vision": false,
|
256
|
+
},
|
257
|
+
{
|
258
|
+
"contextWindowTokens": undefined,
|
259
|
+
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
260
|
+
"displayName": "Qwen 1.5 (14B)",
|
261
|
+
"enabled": false,
|
262
|
+
"functionCall": false,
|
263
|
+
"id": "Qwen/Qwen1.5-14B",
|
264
|
+
"maxOutput": 32768,
|
265
|
+
"reasoning": false,
|
142
266
|
"tokens": 32768,
|
143
267
|
"vision": false,
|
144
268
|
},
|
145
269
|
{
|
270
|
+
"contextWindowTokens": undefined,
|
146
271
|
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
147
272
|
"displayName": "Qwen 1.5 Chat (32B)",
|
148
273
|
"enabled": false,
|
149
274
|
"functionCall": false,
|
150
275
|
"id": "Qwen/Qwen1.5-32B-Chat",
|
151
276
|
"maxOutput": 32768,
|
277
|
+
"reasoning": false,
|
278
|
+
"tokens": 32768,
|
279
|
+
"vision": false,
|
280
|
+
},
|
281
|
+
{
|
282
|
+
"contextWindowTokens": undefined,
|
283
|
+
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
284
|
+
"displayName": "Qwen 1.5 (32B)",
|
285
|
+
"enabled": false,
|
286
|
+
"functionCall": false,
|
287
|
+
"id": "Qwen/Qwen1.5-32B",
|
288
|
+
"maxOutput": 32768,
|
289
|
+
"reasoning": false,
|
152
290
|
"tokens": 32768,
|
153
291
|
"vision": false,
|
154
292
|
},
|
155
293
|
{
|
294
|
+
"contextWindowTokens": undefined,
|
156
295
|
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
157
296
|
"displayName": "Qwen 1.5 Chat (4B)",
|
158
297
|
"enabled": false,
|
159
298
|
"functionCall": false,
|
160
299
|
"id": "Qwen/Qwen1.5-4B-Chat",
|
161
300
|
"maxOutput": 32768,
|
301
|
+
"reasoning": false,
|
302
|
+
"tokens": 32768,
|
303
|
+
"vision": false,
|
304
|
+
},
|
305
|
+
{
|
306
|
+
"contextWindowTokens": undefined,
|
307
|
+
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
308
|
+
"displayName": "Qwen 1.5 (4B)",
|
309
|
+
"enabled": false,
|
310
|
+
"functionCall": false,
|
311
|
+
"id": "Qwen/Qwen1.5-4B",
|
312
|
+
"maxOutput": 32768,
|
313
|
+
"reasoning": false,
|
162
314
|
"tokens": 32768,
|
163
315
|
"vision": false,
|
164
316
|
},
|
165
317
|
{
|
318
|
+
"contextWindowTokens": undefined,
|
166
319
|
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
167
320
|
"displayName": "Qwen 1.5 Chat (72B)",
|
168
321
|
"enabled": false,
|
169
322
|
"functionCall": false,
|
170
323
|
"id": "Qwen/Qwen1.5-72B-Chat",
|
171
324
|
"maxOutput": 32768,
|
325
|
+
"reasoning": false,
|
172
326
|
"tokens": 32768,
|
173
327
|
"vision": false,
|
174
328
|
},
|
175
329
|
{
|
330
|
+
"contextWindowTokens": undefined,
|
331
|
+
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
332
|
+
"displayName": "Qwen 1.5 (72B)",
|
333
|
+
"enabled": false,
|
334
|
+
"functionCall": false,
|
335
|
+
"id": "Qwen/Qwen1.5-72B",
|
336
|
+
"maxOutput": 4096,
|
337
|
+
"reasoning": false,
|
338
|
+
"tokens": 4096,
|
339
|
+
"vision": false,
|
340
|
+
},
|
341
|
+
{
|
342
|
+
"contextWindowTokens": undefined,
|
176
343
|
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
177
344
|
"displayName": "Qwen 1.5 Chat (7B)",
|
178
345
|
"enabled": false,
|
179
346
|
"functionCall": false,
|
180
347
|
"id": "Qwen/Qwen1.5-7B-Chat",
|
181
348
|
"maxOutput": 32768,
|
349
|
+
"reasoning": false,
|
350
|
+
"tokens": 32768,
|
351
|
+
"vision": false,
|
352
|
+
},
|
353
|
+
{
|
354
|
+
"contextWindowTokens": undefined,
|
355
|
+
"description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
|
356
|
+
"displayName": "Qwen 1.5 (7B)",
|
357
|
+
"enabled": false,
|
358
|
+
"functionCall": false,
|
359
|
+
"id": "Qwen/Qwen1.5-7B",
|
360
|
+
"maxOutput": 32768,
|
361
|
+
"reasoning": false,
|
182
362
|
"tokens": 32768,
|
183
363
|
"vision": false,
|
184
364
|
},
|
185
365
|
{
|
366
|
+
"contextWindowTokens": undefined,
|
367
|
+
"description": "Fine-tune version of Stable Diffusion focused on photorealism.",
|
368
|
+
"displayName": "Realistic Vision 3.0",
|
369
|
+
"enabled": false,
|
370
|
+
"functionCall": false,
|
371
|
+
"id": "SG161222/Realistic_Vision_V3.0_VAE",
|
372
|
+
"maxOutput": undefined,
|
373
|
+
"reasoning": false,
|
374
|
+
"tokens": undefined,
|
375
|
+
"vision": true,
|
376
|
+
},
|
377
|
+
{
|
378
|
+
"contextWindowTokens": undefined,
|
186
379
|
"description": "Arctic is a dense-MoE Hybrid transformer architecture pre-trained from scratch by the Snowflake AI Research Team.",
|
187
380
|
"displayName": "Snowflake Arctic Instruct",
|
188
381
|
"enabled": false,
|
189
382
|
"functionCall": false,
|
190
383
|
"id": "Snowflake/snowflake-arctic-instruct",
|
191
384
|
"maxOutput": 4096,
|
385
|
+
"reasoning": false,
|
192
386
|
"tokens": 4096,
|
193
387
|
"vision": false,
|
194
388
|
},
|
195
389
|
{
|
390
|
+
"contextWindowTokens": undefined,
|
196
391
|
"description": "Re:MythoMax (ReMM) is a recreation trial of the original MythoMax-L2-B13 with updated models. This merge use SLERP [TESTING] to merge ReML and Huginn v1.2.",
|
197
392
|
"displayName": "ReMM SLERP L2 (13B)",
|
198
393
|
"enabled": false,
|
199
394
|
"functionCall": false,
|
200
395
|
"id": "Undi95/ReMM-SLERP-L2-13B",
|
201
396
|
"maxOutput": 4096,
|
397
|
+
"reasoning": false,
|
202
398
|
"tokens": 4096,
|
203
399
|
"vision": false,
|
204
400
|
},
|
205
401
|
{
|
402
|
+
"contextWindowTokens": undefined,
|
206
403
|
"description": "A merge of models built by Undi95 with the new task_arithmetic merge method from mergekit.",
|
207
404
|
"displayName": "Toppy M (7B)",
|
208
405
|
"enabled": false,
|
209
406
|
"functionCall": false,
|
210
407
|
"id": "Undi95/Toppy-M-7B",
|
211
408
|
"maxOutput": 4096,
|
409
|
+
"reasoning": false,
|
212
410
|
"tokens": 4096,
|
213
411
|
"vision": false,
|
214
412
|
},
|
215
413
|
{
|
414
|
+
"contextWindowTokens": undefined,
|
415
|
+
"description": "A universal English sentence embedding WhereIsAI/UAE-Large-V1 achieves SOTA on the MTEB Leaderboard with an average score of 64.64!",
|
416
|
+
"displayName": "UAE-Large-V1",
|
417
|
+
"enabled": false,
|
418
|
+
"functionCall": false,
|
419
|
+
"id": "WhereIsAI/UAE-Large-V1",
|
420
|
+
"maxOutput": undefined,
|
421
|
+
"reasoning": false,
|
422
|
+
"tokens": undefined,
|
423
|
+
"vision": false,
|
424
|
+
},
|
425
|
+
{
|
426
|
+
"contextWindowTokens": undefined,
|
427
|
+
"description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",
|
428
|
+
"displayName": "WizardCoder v1.0 (15B)",
|
429
|
+
"enabled": false,
|
430
|
+
"functionCall": false,
|
431
|
+
"id": "WizardLM/WizardCoder-15B-V1.0",
|
432
|
+
"maxOutput": 8192,
|
433
|
+
"reasoning": false,
|
434
|
+
"tokens": 8192,
|
435
|
+
"vision": false,
|
436
|
+
},
|
437
|
+
{
|
438
|
+
"contextWindowTokens": undefined,
|
439
|
+
"description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",
|
440
|
+
"displayName": "WizardCoder Python v1.0 (34B)",
|
441
|
+
"enabled": false,
|
442
|
+
"functionCall": false,
|
443
|
+
"id": "WizardLM/WizardCoder-Python-34B-V1.0",
|
444
|
+
"maxOutput": 8192,
|
445
|
+
"reasoning": false,
|
446
|
+
"tokens": 8192,
|
447
|
+
"vision": false,
|
448
|
+
},
|
449
|
+
{
|
450
|
+
"contextWindowTokens": undefined,
|
216
451
|
"description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities",
|
217
452
|
"displayName": "WizardLM v1.2 (13B)",
|
218
453
|
"enabled": false,
|
219
454
|
"functionCall": false,
|
220
455
|
"id": "WizardLM/WizardLM-13B-V1.2",
|
221
456
|
"maxOutput": 4096,
|
457
|
+
"reasoning": false,
|
222
458
|
"tokens": 4096,
|
223
459
|
"vision": false,
|
224
460
|
},
|
225
461
|
{
|
462
|
+
"contextWindowTokens": undefined,
|
226
463
|
"description": "The OLMo models are trained on the Dolma dataset",
|
227
464
|
"displayName": "OLMo Instruct (7B)",
|
228
465
|
"enabled": false,
|
229
466
|
"functionCall": false,
|
230
467
|
"id": "allenai/OLMo-7B-Instruct",
|
231
468
|
"maxOutput": 2048,
|
469
|
+
"reasoning": false,
|
470
|
+
"tokens": 2048,
|
471
|
+
"vision": false,
|
472
|
+
},
|
473
|
+
{
|
474
|
+
"contextWindowTokens": undefined,
|
475
|
+
"description": "The OLMo models are trained on the Dolma dataset",
|
476
|
+
"displayName": "OLMo Twin-2T (7B)",
|
477
|
+
"enabled": false,
|
478
|
+
"functionCall": false,
|
479
|
+
"id": "allenai/OLMo-7B-Twin-2T",
|
480
|
+
"maxOutput": 2048,
|
481
|
+
"reasoning": false,
|
482
|
+
"tokens": 2048,
|
483
|
+
"vision": false,
|
484
|
+
},
|
485
|
+
{
|
486
|
+
"contextWindowTokens": undefined,
|
487
|
+
"description": "The OLMo models are trained on the Dolma dataset",
|
488
|
+
"displayName": "OLMo (7B)",
|
489
|
+
"enabled": false,
|
490
|
+
"functionCall": false,
|
491
|
+
"id": "allenai/OLMo-7B",
|
492
|
+
"maxOutput": 2048,
|
493
|
+
"reasoning": false,
|
232
494
|
"tokens": 2048,
|
233
495
|
"vision": false,
|
234
496
|
},
|
235
497
|
{
|
498
|
+
"contextWindowTokens": undefined,
|
499
|
+
"description": "original BERT model",
|
500
|
+
"displayName": "Bert Base Uncased",
|
501
|
+
"enabled": false,
|
502
|
+
"functionCall": false,
|
503
|
+
"id": "bert-base-uncased",
|
504
|
+
"maxOutput": undefined,
|
505
|
+
"reasoning": false,
|
506
|
+
"tokens": undefined,
|
507
|
+
"vision": false,
|
508
|
+
},
|
509
|
+
{
|
510
|
+
"contextWindowTokens": undefined,
|
236
511
|
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
237
512
|
"displayName": "Code Llama Instruct (13B)",
|
238
513
|
"enabled": false,
|
239
514
|
"functionCall": false,
|
240
515
|
"id": "codellama/CodeLlama-13b-Instruct-hf",
|
241
516
|
"maxOutput": 16384,
|
517
|
+
"reasoning": false,
|
518
|
+
"tokens": 16384,
|
519
|
+
"vision": false,
|
520
|
+
},
|
521
|
+
{
|
522
|
+
"contextWindowTokens": undefined,
|
523
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
524
|
+
"displayName": "Code Llama Python (13B)",
|
525
|
+
"enabled": false,
|
526
|
+
"functionCall": false,
|
527
|
+
"id": "codellama/CodeLlama-13b-Python-hf",
|
528
|
+
"maxOutput": 16384,
|
529
|
+
"reasoning": false,
|
242
530
|
"tokens": 16384,
|
243
531
|
"vision": false,
|
244
532
|
},
|
245
533
|
{
|
534
|
+
"contextWindowTokens": 16384,
|
246
535
|
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
247
536
|
"displayName": "Code Llama Instruct (34B)",
|
248
537
|
"enabled": false,
|
249
538
|
"functionCall": false,
|
250
539
|
"id": "codellama/CodeLlama-34b-Instruct-hf",
|
251
540
|
"maxOutput": 16384,
|
541
|
+
"reasoning": false,
|
542
|
+
"tokens": 16384,
|
543
|
+
"vision": false,
|
544
|
+
},
|
545
|
+
{
|
546
|
+
"contextWindowTokens": undefined,
|
547
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
548
|
+
"displayName": "Code Llama Python (34B)",
|
549
|
+
"enabled": false,
|
550
|
+
"functionCall": false,
|
551
|
+
"id": "codellama/CodeLlama-34b-Python-hf",
|
552
|
+
"maxOutput": 16384,
|
553
|
+
"reasoning": false,
|
252
554
|
"tokens": 16384,
|
253
555
|
"vision": false,
|
254
556
|
},
|
255
557
|
{
|
558
|
+
"contextWindowTokens": undefined,
|
256
559
|
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
257
560
|
"displayName": "Code Llama Instruct (70B)",
|
258
561
|
"enabled": false,
|
259
562
|
"functionCall": false,
|
260
563
|
"id": "codellama/CodeLlama-70b-Instruct-hf",
|
261
564
|
"maxOutput": 4096,
|
565
|
+
"reasoning": false,
|
566
|
+
"tokens": 4096,
|
567
|
+
"vision": false,
|
568
|
+
},
|
569
|
+
{
|
570
|
+
"contextWindowTokens": undefined,
|
571
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
572
|
+
"displayName": "Code Llama Python (70B)",
|
573
|
+
"enabled": false,
|
574
|
+
"functionCall": false,
|
575
|
+
"id": "codellama/CodeLlama-70b-Python-hf",
|
576
|
+
"maxOutput": 4096,
|
577
|
+
"reasoning": false,
|
262
578
|
"tokens": 4096,
|
263
579
|
"vision": false,
|
264
580
|
},
|
265
581
|
{
|
582
|
+
"contextWindowTokens": undefined,
|
583
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
584
|
+
"displayName": "Code Llama (70B)",
|
585
|
+
"enabled": false,
|
586
|
+
"functionCall": false,
|
587
|
+
"id": "codellama/CodeLlama-70b-hf",
|
588
|
+
"maxOutput": 16384,
|
589
|
+
"reasoning": false,
|
590
|
+
"tokens": 16384,
|
591
|
+
"vision": false,
|
592
|
+
},
|
593
|
+
{
|
594
|
+
"contextWindowTokens": undefined,
|
266
595
|
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
267
596
|
"displayName": "Code Llama Instruct (7B)",
|
268
597
|
"enabled": false,
|
269
598
|
"functionCall": false,
|
270
599
|
"id": "codellama/CodeLlama-7b-Instruct-hf",
|
271
600
|
"maxOutput": 16384,
|
601
|
+
"reasoning": false,
|
602
|
+
"tokens": 16384,
|
603
|
+
"vision": false,
|
604
|
+
},
|
605
|
+
{
|
606
|
+
"contextWindowTokens": undefined,
|
607
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
608
|
+
"displayName": "Code Llama Python (7B)",
|
609
|
+
"enabled": false,
|
610
|
+
"functionCall": false,
|
611
|
+
"id": "codellama/CodeLlama-7b-Python-hf",
|
612
|
+
"maxOutput": 16384,
|
613
|
+
"reasoning": false,
|
272
614
|
"tokens": 16384,
|
273
615
|
"vision": false,
|
274
616
|
},
|
275
617
|
{
|
618
|
+
"contextWindowTokens": undefined,
|
276
619
|
"description": "This Dolphin is really good at coding, I trained with a lot of coding data. It is very obedient but it is not DPO tuned - so you still might need to encourage it in the system prompt as I show in the below examples.",
|
277
620
|
"displayName": "Dolphin 2.5 Mixtral 8x7b",
|
278
621
|
"enabled": false,
|
279
622
|
"functionCall": false,
|
280
623
|
"id": "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
|
281
624
|
"maxOutput": 32768,
|
625
|
+
"reasoning": false,
|
282
626
|
"tokens": 32768,
|
283
627
|
"vision": false,
|
284
628
|
},
|
285
629
|
{
|
630
|
+
"contextWindowTokens": 32768,
|
286
631
|
"description": "DBRX Instruct is a mixture-of-experts (MoE) large language model trained from scratch by Databricks. DBRX Instruct specializes in few-turn interactions.",
|
287
632
|
"displayName": "DBRX Instruct",
|
288
633
|
"enabled": false,
|
289
634
|
"functionCall": false,
|
290
635
|
"id": "databricks/dbrx-instruct",
|
291
636
|
"maxOutput": 32768,
|
637
|
+
"reasoning": false,
|
292
638
|
"tokens": 32768,
|
293
639
|
"vision": false,
|
294
640
|
},
|
295
641
|
{
|
642
|
+
"contextWindowTokens": undefined,
|
296
643
|
"description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.",
|
297
644
|
"displayName": "Deepseek Coder Instruct (33B)",
|
298
645
|
"enabled": false,
|
299
646
|
"functionCall": false,
|
300
647
|
"id": "deepseek-ai/deepseek-coder-33b-instruct",
|
301
648
|
"maxOutput": 16384,
|
649
|
+
"reasoning": false,
|
302
650
|
"tokens": 16384,
|
303
651
|
"vision": false,
|
304
652
|
},
|
305
653
|
{
|
654
|
+
"contextWindowTokens": 4096,
|
306
655
|
"description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese",
|
307
656
|
"displayName": "DeepSeek LLM Chat (67B)",
|
308
657
|
"enabled": true,
|
309
658
|
"functionCall": false,
|
310
659
|
"id": "deepseek-ai/deepseek-llm-67b-chat",
|
311
660
|
"maxOutput": 4096,
|
661
|
+
"reasoning": false,
|
312
662
|
"tokens": 4096,
|
313
663
|
"vision": false,
|
314
664
|
},
|
315
665
|
{
|
666
|
+
"contextWindowTokens": undefined,
|
316
667
|
"description": "An instruction fine-tuned LLaMA-2 (70B) model by merging Platypus2 (70B) by garage-bAInd and LLaMA-2 Instruct v2 (70B) by upstage.",
|
317
668
|
"displayName": "Platypus2 Instruct (70B)",
|
318
669
|
"enabled": false,
|
319
670
|
"functionCall": false,
|
320
671
|
"id": "garage-bAInd/Platypus2-70B-instruct",
|
321
672
|
"maxOutput": 4096,
|
673
|
+
"reasoning": false,
|
322
674
|
"tokens": 4096,
|
323
675
|
"vision": false,
|
324
676
|
},
|
325
677
|
{
|
678
|
+
"contextWindowTokens": 8192,
|
326
679
|
"description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
327
680
|
"displayName": "Gemma Instruct (2B)",
|
328
681
|
"enabled": false,
|
329
682
|
"functionCall": false,
|
330
683
|
"id": "google/gemma-2b-it",
|
331
684
|
"maxOutput": 8192,
|
685
|
+
"reasoning": false,
|
686
|
+
"tokens": 8192,
|
687
|
+
"vision": false,
|
688
|
+
},
|
689
|
+
{
|
690
|
+
"contextWindowTokens": undefined,
|
691
|
+
"description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
692
|
+
"displayName": "Gemma (2B)",
|
693
|
+
"enabled": false,
|
694
|
+
"functionCall": false,
|
695
|
+
"id": "google/gemma-2b",
|
696
|
+
"maxOutput": 8192,
|
697
|
+
"reasoning": false,
|
332
698
|
"tokens": 8192,
|
333
699
|
"vision": false,
|
334
700
|
},
|
335
701
|
{
|
702
|
+
"contextWindowTokens": undefined,
|
336
703
|
"description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
337
704
|
"displayName": "Gemma Instruct (7B)",
|
338
705
|
"enabled": false,
|
339
706
|
"functionCall": false,
|
340
707
|
"id": "google/gemma-7b-it",
|
341
708
|
"maxOutput": 8192,
|
709
|
+
"reasoning": false,
|
710
|
+
"tokens": 8192,
|
711
|
+
"vision": false,
|
712
|
+
},
|
713
|
+
{
|
714
|
+
"contextWindowTokens": undefined,
|
715
|
+
"description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
716
|
+
"displayName": "Gemma (7B)",
|
717
|
+
"enabled": false,
|
718
|
+
"functionCall": false,
|
719
|
+
"id": "google/gemma-7b",
|
720
|
+
"maxOutput": 8192,
|
721
|
+
"reasoning": false,
|
342
722
|
"tokens": 8192,
|
343
723
|
"vision": false,
|
344
724
|
},
|
345
725
|
{
|
726
|
+
"contextWindowTokens": undefined,
|
346
727
|
"description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
|
347
728
|
"displayName": "Vicuna v1.5 (13B)",
|
348
729
|
"enabled": false,
|
349
730
|
"functionCall": false,
|
350
731
|
"id": "lmsys/vicuna-13b-v1.5",
|
351
732
|
"maxOutput": 4096,
|
733
|
+
"reasoning": false,
|
352
734
|
"tokens": 4096,
|
353
735
|
"vision": false,
|
354
736
|
},
|
355
737
|
{
|
738
|
+
"contextWindowTokens": undefined,
|
356
739
|
"description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
|
357
740
|
"displayName": "Vicuna v1.5 (7B)",
|
358
741
|
"enabled": false,
|
359
742
|
"functionCall": false,
|
360
743
|
"id": "lmsys/vicuna-7b-v1.5",
|
361
744
|
"maxOutput": 4096,
|
745
|
+
"reasoning": false,
|
362
746
|
"tokens": 4096,
|
363
747
|
"vision": false,
|
364
748
|
},
|
365
749
|
{
|
750
|
+
"contextWindowTokens": 4096,
|
366
751
|
"description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
|
367
752
|
"displayName": "LLaMA-2 Chat (13B)",
|
368
753
|
"enabled": false,
|
369
754
|
"functionCall": false,
|
370
755
|
"id": "meta-llama/Llama-2-13b-chat-hf",
|
371
756
|
"maxOutput": 4096,
|
757
|
+
"reasoning": false,
|
758
|
+
"tokens": 4096,
|
759
|
+
"vision": false,
|
760
|
+
},
|
761
|
+
{
|
762
|
+
"contextWindowTokens": undefined,
|
763
|
+
"description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
|
764
|
+
"displayName": "LLaMA-2 (13B)",
|
765
|
+
"enabled": false,
|
766
|
+
"functionCall": false,
|
767
|
+
"id": "meta-llama/Llama-2-13b-hf",
|
768
|
+
"maxOutput": 4096,
|
769
|
+
"reasoning": false,
|
372
770
|
"tokens": 4096,
|
373
771
|
"vision": false,
|
374
772
|
},
|
375
773
|
{
|
774
|
+
"contextWindowTokens": undefined,
|
376
775
|
"description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
|
377
776
|
"displayName": "LLaMA-2 Chat (70B)",
|
378
777
|
"enabled": false,
|
379
778
|
"functionCall": false,
|
380
779
|
"id": "meta-llama/Llama-2-70b-chat-hf",
|
381
780
|
"maxOutput": 4096,
|
781
|
+
"reasoning": false,
|
782
|
+
"tokens": 4096,
|
783
|
+
"vision": false,
|
784
|
+
},
|
785
|
+
{
|
786
|
+
"contextWindowTokens": 4096,
|
787
|
+
"description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
|
788
|
+
"displayName": "LLaMA-2 (70B)",
|
789
|
+
"enabled": false,
|
790
|
+
"functionCall": false,
|
791
|
+
"id": "meta-llama/Llama-2-70b-hf",
|
792
|
+
"maxOutput": 4096,
|
793
|
+
"reasoning": false,
|
382
794
|
"tokens": 4096,
|
383
795
|
"vision": false,
|
384
796
|
},
|
385
797
|
{
|
798
|
+
"contextWindowTokens": undefined,
|
386
799
|
"description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
|
387
800
|
"displayName": "LLaMA-2 Chat (7B)",
|
388
801
|
"enabled": false,
|
389
802
|
"functionCall": false,
|
390
803
|
"id": "meta-llama/Llama-2-7b-chat-hf",
|
391
804
|
"maxOutput": 4096,
|
805
|
+
"reasoning": false,
|
806
|
+
"tokens": 4096,
|
807
|
+
"vision": false,
|
808
|
+
},
|
809
|
+
{
|
810
|
+
"contextWindowTokens": undefined,
|
811
|
+
"description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
|
812
|
+
"displayName": "LLaMA-2 (7B)",
|
813
|
+
"enabled": false,
|
814
|
+
"functionCall": false,
|
815
|
+
"id": "meta-llama/Llama-2-7b-hf",
|
816
|
+
"maxOutput": 4096,
|
817
|
+
"reasoning": false,
|
392
818
|
"tokens": 4096,
|
393
819
|
"vision": false,
|
394
820
|
},
|
395
821
|
{
|
822
|
+
"contextWindowTokens": 8192,
|
396
823
|
"description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
|
397
824
|
"displayName": "Meta Llama 3 70B Instruct",
|
398
825
|
"enabled": false,
|
399
826
|
"functionCall": false,
|
400
827
|
"id": "meta-llama/Llama-3-70b-chat-hf",
|
401
828
|
"maxOutput": 8192,
|
829
|
+
"reasoning": false,
|
402
830
|
"tokens": 8192,
|
403
831
|
"vision": false,
|
404
832
|
},
|
405
833
|
{
|
834
|
+
"contextWindowTokens": 8192,
|
406
835
|
"description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
|
407
836
|
"displayName": "Meta Llama 3 8B Instruct",
|
408
837
|
"enabled": false,
|
409
838
|
"functionCall": false,
|
410
839
|
"id": "meta-llama/Llama-3-8b-chat-hf",
|
411
840
|
"maxOutput": 8192,
|
841
|
+
"reasoning": false,
|
842
|
+
"tokens": 8192,
|
843
|
+
"vision": false,
|
844
|
+
},
|
845
|
+
{
|
846
|
+
"contextWindowTokens": undefined,
|
847
|
+
"description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
|
848
|
+
"displayName": "Meta Llama 3 8B",
|
849
|
+
"enabled": false,
|
850
|
+
"functionCall": false,
|
851
|
+
"id": "meta-llama/Llama-3-8b-hf",
|
852
|
+
"maxOutput": 8192,
|
853
|
+
"reasoning": false,
|
854
|
+
"tokens": 8192,
|
855
|
+
"vision": false,
|
856
|
+
},
|
857
|
+
{
|
858
|
+
"contextWindowTokens": undefined,
|
859
|
+
"description": null,
|
860
|
+
"displayName": "Meta Llama Guard 2 8B",
|
861
|
+
"enabled": false,
|
862
|
+
"functionCall": undefined,
|
863
|
+
"id": "meta-llama/LlamaGuard-2-8b",
|
864
|
+
"maxOutput": 8192,
|
865
|
+
"reasoning": false,
|
866
|
+
"tokens": 8192,
|
867
|
+
"vision": false,
|
868
|
+
},
|
869
|
+
{
|
870
|
+
"contextWindowTokens": undefined,
|
871
|
+
"description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
|
872
|
+
"displayName": "Meta Llama 3 70B",
|
873
|
+
"enabled": false,
|
874
|
+
"functionCall": false,
|
875
|
+
"id": "meta-llama/Meta-Llama-3-70B",
|
876
|
+
"maxOutput": 8192,
|
877
|
+
"reasoning": false,
|
412
878
|
"tokens": 8192,
|
413
879
|
"vision": false,
|
414
880
|
},
|
415
881
|
{
|
882
|
+
"contextWindowTokens": 65536,
|
416
883
|
"description": "WizardLM-2 8x22B is Wizard's most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models.",
|
417
884
|
"displayName": "WizardLM-2 (8x22B)",
|
418
885
|
"enabled": false,
|
419
886
|
"functionCall": false,
|
420
887
|
"id": "microsoft/WizardLM-2-8x22B",
|
421
888
|
"maxOutput": 65536,
|
889
|
+
"reasoning": false,
|
422
890
|
"tokens": 65536,
|
423
891
|
"vision": false,
|
424
892
|
},
|
425
893
|
{
|
894
|
+
"contextWindowTokens": undefined,
|
895
|
+
"description": "Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)",
|
896
|
+
"displayName": "Microsoft Phi-2",
|
897
|
+
"enabled": false,
|
898
|
+
"functionCall": false,
|
899
|
+
"id": "microsoft/phi-2",
|
900
|
+
"maxOutput": 2048,
|
901
|
+
"reasoning": false,
|
902
|
+
"tokens": 2048,
|
903
|
+
"vision": false,
|
904
|
+
},
|
905
|
+
{
|
906
|
+
"contextWindowTokens": 8192,
|
426
907
|
"description": "instruct fine-tuned version of Mistral-7B-v0.1",
|
427
908
|
"displayName": "Mistral (7B) Instruct",
|
428
909
|
"enabled": false,
|
429
910
|
"functionCall": false,
|
430
911
|
"id": "mistralai/Mistral-7B-Instruct-v0.1",
|
431
912
|
"maxOutput": 4096,
|
913
|
+
"reasoning": false,
|
432
914
|
"tokens": 4096,
|
433
915
|
"vision": false,
|
434
916
|
},
|
435
917
|
{
|
918
|
+
"contextWindowTokens": 32768,
|
436
919
|
"description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
|
437
920
|
"displayName": "Mistral (7B) Instruct v0.2",
|
438
921
|
"enabled": false,
|
439
922
|
"functionCall": false,
|
440
923
|
"id": "mistralai/Mistral-7B-Instruct-v0.2",
|
441
924
|
"maxOutput": 32768,
|
925
|
+
"reasoning": false,
|
442
926
|
"tokens": 32768,
|
443
927
|
"vision": false,
|
444
928
|
},
|
445
929
|
{
|
930
|
+
"contextWindowTokens": 8192,
|
931
|
+
"description": "7.3B parameter model that outperforms Llama 2 13B on all benchmarks, approaches CodeLlama 7B performance on code, Uses Grouped-query attention (GQA) for faster inference and Sliding Window Attention (SWA) to handle longer sequences at smaller cost",
|
932
|
+
"displayName": "Mistral (7B)",
|
933
|
+
"enabled": false,
|
934
|
+
"functionCall": false,
|
935
|
+
"id": "mistralai/Mistral-7B-v0.1",
|
936
|
+
"maxOutput": 4096,
|
937
|
+
"reasoning": false,
|
938
|
+
"tokens": 4096,
|
939
|
+
"vision": false,
|
940
|
+
},
|
941
|
+
{
|
942
|
+
"contextWindowTokens": 65536,
|
446
943
|
"description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.",
|
447
944
|
"displayName": "Mixtral-8x22B Instruct v0.1",
|
448
945
|
"enabled": true,
|
449
946
|
"functionCall": false,
|
450
947
|
"id": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
451
948
|
"maxOutput": 65536,
|
949
|
+
"reasoning": false,
|
950
|
+
"tokens": 65536,
|
951
|
+
"vision": false,
|
952
|
+
},
|
953
|
+
{
|
954
|
+
"contextWindowTokens": undefined,
|
955
|
+
"description": "The Mixtral-8x22B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
|
956
|
+
"displayName": "Mixtral-8x22B",
|
957
|
+
"enabled": false,
|
958
|
+
"functionCall": false,
|
959
|
+
"id": "mistralai/Mixtral-8x22B",
|
960
|
+
"maxOutput": 65536,
|
961
|
+
"reasoning": false,
|
452
962
|
"tokens": 65536,
|
453
963
|
"vision": false,
|
454
964
|
},
|
455
965
|
{
|
966
|
+
"contextWindowTokens": 32768,
|
456
967
|
"description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
|
457
968
|
"displayName": "Mixtral-8x7B Instruct v0.1",
|
458
969
|
"enabled": true,
|
459
970
|
"functionCall": false,
|
460
971
|
"id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
461
972
|
"maxOutput": 32768,
|
973
|
+
"reasoning": false,
|
462
974
|
"tokens": 32768,
|
463
975
|
"vision": false,
|
464
976
|
},
|
465
977
|
{
|
466
|
-
"
|
467
|
-
"
|
978
|
+
"contextWindowTokens": 32768,
|
979
|
+
"description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
|
980
|
+
"displayName": "Mixtral-8x7B v0.1",
|
468
981
|
"enabled": false,
|
469
982
|
"functionCall": false,
|
470
|
-
"id": "
|
471
|
-
"maxOutput":
|
983
|
+
"id": "mistralai/Mixtral-8x7B-v0.1",
|
984
|
+
"maxOutput": 32768,
|
985
|
+
"reasoning": false,
|
986
|
+
"tokens": 32768,
|
987
|
+
"vision": false,
|
988
|
+
},
|
989
|
+
{
|
990
|
+
"contextWindowTokens": undefined,
|
991
|
+
"description": "A merge of OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-quality instruction data, with a custom processing pipeline.",
|
992
|
+
"displayName": "OpenChat 3.5",
|
993
|
+
"enabled": false,
|
994
|
+
"functionCall": false,
|
995
|
+
"id": "openchat/openchat-3.5-1210",
|
996
|
+
"maxOutput": 8192,
|
997
|
+
"reasoning": false,
|
472
998
|
"tokens": 8192,
|
473
999
|
"vision": false,
|
474
1000
|
},
|
475
1001
|
{
|
1002
|
+
"contextWindowTokens": undefined,
|
1003
|
+
"description": "An open source Stable Diffusion model fine tuned model on Midjourney images. ",
|
1004
|
+
"displayName": "Openjourney v4",
|
1005
|
+
"enabled": false,
|
1006
|
+
"functionCall": false,
|
1007
|
+
"id": "prompthero/openjourney",
|
1008
|
+
"maxOutput": undefined,
|
1009
|
+
"reasoning": false,
|
1010
|
+
"tokens": undefined,
|
1011
|
+
"vision": false,
|
1012
|
+
},
|
1013
|
+
{
|
1014
|
+
"contextWindowTokens": undefined,
|
1015
|
+
"description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
|
1016
|
+
"displayName": "Stable Diffusion 1.5",
|
1017
|
+
"enabled": false,
|
1018
|
+
"functionCall": false,
|
1019
|
+
"id": "runwayml/stable-diffusion-v1-5",
|
1020
|
+
"maxOutput": undefined,
|
1021
|
+
"reasoning": false,
|
1022
|
+
"tokens": undefined,
|
1023
|
+
"vision": false,
|
1024
|
+
},
|
1025
|
+
{
|
1026
|
+
"contextWindowTokens": undefined,
|
1027
|
+
"description": "A sentence-transformers model: it maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search.",
|
1028
|
+
"displayName": "Sentence-BERT",
|
1029
|
+
"enabled": false,
|
1030
|
+
"functionCall": false,
|
1031
|
+
"id": "sentence-transformers/msmarco-bert-base-dot-v5",
|
1032
|
+
"maxOutput": 512,
|
1033
|
+
"reasoning": false,
|
1034
|
+
"tokens": 512,
|
1035
|
+
"vision": false,
|
1036
|
+
},
|
1037
|
+
{
|
1038
|
+
"contextWindowTokens": undefined,
|
476
1039
|
"description": "A state-of-the-art model by Snorkel AI, DPO fine-tuned on Mistral-7B",
|
477
1040
|
"displayName": "Snorkel Mistral PairRM DPO (7B)",
|
478
1041
|
"enabled": false,
|
479
1042
|
"functionCall": false,
|
480
1043
|
"id": "snorkelai/Snorkel-Mistral-PairRM-DPO",
|
481
1044
|
"maxOutput": 32768,
|
1045
|
+
"reasoning": false,
|
482
1046
|
"tokens": 32768,
|
483
1047
|
"vision": false,
|
484
1048
|
},
|
485
1049
|
{
|
1050
|
+
"contextWindowTokens": undefined,
|
1051
|
+
"description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
|
1052
|
+
"displayName": "Stable Diffusion 2.1",
|
1053
|
+
"enabled": false,
|
1054
|
+
"functionCall": false,
|
1055
|
+
"id": "stabilityai/stable-diffusion-2-1",
|
1056
|
+
"maxOutput": undefined,
|
1057
|
+
"reasoning": false,
|
1058
|
+
"tokens": undefined,
|
1059
|
+
"vision": false,
|
1060
|
+
},
|
1061
|
+
{
|
1062
|
+
"contextWindowTokens": undefined,
|
1063
|
+
"description": "A text-to-image generative AI model that excels at creating 1024x1024 images.",
|
1064
|
+
"displayName": "Stable Diffusion XL 1.0",
|
1065
|
+
"enabled": false,
|
1066
|
+
"functionCall": false,
|
1067
|
+
"id": "stabilityai/stable-diffusion-xl-base-1.0",
|
1068
|
+
"maxOutput": undefined,
|
1069
|
+
"reasoning": false,
|
1070
|
+
"tokens": undefined,
|
1071
|
+
"vision": false,
|
1072
|
+
},
|
1073
|
+
{
|
1074
|
+
"contextWindowTokens": undefined,
|
486
1075
|
"description": "State of the art Mistral Fine-tuned on extensive public datasets",
|
487
1076
|
"displayName": "OpenHermes-2-Mistral (7B)",
|
488
1077
|
"enabled": false,
|
489
1078
|
"functionCall": false,
|
490
1079
|
"id": "teknium/OpenHermes-2-Mistral-7B",
|
491
1080
|
"maxOutput": 8192,
|
1081
|
+
"reasoning": false,
|
492
1082
|
"tokens": 8192,
|
493
1083
|
"vision": false,
|
494
1084
|
},
|
495
1085
|
{
|
1086
|
+
"contextWindowTokens": undefined,
|
496
1087
|
"description": "Continuation of OpenHermes 2 Mistral model trained on additional code datasets",
|
497
1088
|
"displayName": "OpenHermes-2.5-Mistral (7B)",
|
498
1089
|
"enabled": false,
|
499
1090
|
"functionCall": false,
|
500
1091
|
"id": "teknium/OpenHermes-2p5-Mistral-7B",
|
501
1092
|
"maxOutput": 8192,
|
1093
|
+
"reasoning": false,
|
502
1094
|
"tokens": 8192,
|
503
1095
|
"vision": false,
|
504
1096
|
},
|
505
1097
|
{
|
1098
|
+
"contextWindowTokens": undefined,
|
1099
|
+
"description": "This model can be used to moderate other chatbot models. Built using GPT-JT model fine-tuned on Ontocord.ai's OIG-moderation dataset v0.1.",
|
1100
|
+
"displayName": "GPT-JT-Moderation (6B)",
|
1101
|
+
"enabled": false,
|
1102
|
+
"functionCall": false,
|
1103
|
+
"id": "togethercomputer/GPT-JT-Moderation-6B",
|
1104
|
+
"maxOutput": 2048,
|
1105
|
+
"reasoning": false,
|
1106
|
+
"tokens": 2048,
|
1107
|
+
"vision": false,
|
1108
|
+
},
|
1109
|
+
{
|
1110
|
+
"contextWindowTokens": undefined,
|
1111
|
+
"description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations.",
|
1112
|
+
"displayName": "LLaMA-2-32K (7B)",
|
1113
|
+
"enabled": false,
|
1114
|
+
"functionCall": false,
|
1115
|
+
"id": "togethercomputer/LLaMA-2-7B-32K",
|
1116
|
+
"maxOutput": 32768,
|
1117
|
+
"reasoning": false,
|
1118
|
+
"tokens": 32768,
|
1119
|
+
"vision": false,
|
1120
|
+
},
|
1121
|
+
{
|
1122
|
+
"contextWindowTokens": undefined,
|
506
1123
|
"description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations, instruction tuned by Together",
|
507
1124
|
"displayName": "LLaMA-2-7B-32K-Instruct (7B)",
|
508
1125
|
"enabled": false,
|
509
1126
|
"functionCall": false,
|
510
1127
|
"id": "togethercomputer/Llama-2-7B-32K-Instruct",
|
511
1128
|
"maxOutput": 32768,
|
1129
|
+
"reasoning": false,
|
512
1130
|
"tokens": 32768,
|
513
1131
|
"vision": false,
|
514
1132
|
},
|
515
1133
|
{
|
1134
|
+
"contextWindowTokens": undefined,
|
1135
|
+
"description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).",
|
1136
|
+
"displayName": "RedPajama-INCITE (7B)",
|
1137
|
+
"enabled": false,
|
1138
|
+
"functionCall": false,
|
1139
|
+
"id": "togethercomputer/RedPajama-INCITE-7B-Base",
|
1140
|
+
"maxOutput": 2048,
|
1141
|
+
"reasoning": false,
|
1142
|
+
"tokens": 2048,
|
1143
|
+
"vision": false,
|
1144
|
+
},
|
1145
|
+
{
|
1146
|
+
"contextWindowTokens": undefined,
|
516
1147
|
"description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-7B-v1 base model.",
|
517
1148
|
"displayName": "RedPajama-INCITE Chat (7B)",
|
518
1149
|
"enabled": false,
|
519
1150
|
"functionCall": false,
|
520
1151
|
"id": "togethercomputer/RedPajama-INCITE-7B-Chat",
|
521
1152
|
"maxOutput": 2048,
|
1153
|
+
"reasoning": false,
|
1154
|
+
"tokens": 2048,
|
1155
|
+
"vision": false,
|
1156
|
+
},
|
1157
|
+
{
|
1158
|
+
"contextWindowTokens": undefined,
|
1159
|
+
"description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-7B-v1 base model.",
|
1160
|
+
"displayName": "RedPajama-INCITE Instruct (7B)",
|
1161
|
+
"enabled": false,
|
1162
|
+
"functionCall": false,
|
1163
|
+
"id": "togethercomputer/RedPajama-INCITE-7B-Instruct",
|
1164
|
+
"maxOutput": 2048,
|
1165
|
+
"reasoning": false,
|
522
1166
|
"tokens": 2048,
|
523
1167
|
"vision": false,
|
524
1168
|
},
|
525
1169
|
{
|
1170
|
+
"contextWindowTokens": undefined,
|
1171
|
+
"description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).",
|
1172
|
+
"displayName": "RedPajama-INCITE (3B)",
|
1173
|
+
"enabled": false,
|
1174
|
+
"functionCall": false,
|
1175
|
+
"id": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
|
1176
|
+
"maxOutput": 2048,
|
1177
|
+
"reasoning": false,
|
1178
|
+
"tokens": 2048,
|
1179
|
+
"vision": false,
|
1180
|
+
},
|
1181
|
+
{
|
1182
|
+
"contextWindowTokens": undefined,
|
526
1183
|
"description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.",
|
527
1184
|
"displayName": "RedPajama-INCITE Chat (3B)",
|
528
1185
|
"enabled": false,
|
529
1186
|
"functionCall": false,
|
530
1187
|
"id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
|
531
1188
|
"maxOutput": 2048,
|
1189
|
+
"reasoning": false,
|
1190
|
+
"tokens": 2048,
|
1191
|
+
"vision": false,
|
1192
|
+
},
|
1193
|
+
{
|
1194
|
+
"contextWindowTokens": undefined,
|
1195
|
+
"description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-3B-v1 base model.",
|
1196
|
+
"displayName": "RedPajama-INCITE Instruct (3B)",
|
1197
|
+
"enabled": false,
|
1198
|
+
"functionCall": false,
|
1199
|
+
"id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
|
1200
|
+
"maxOutput": 2048,
|
1201
|
+
"reasoning": false,
|
532
1202
|
"tokens": 2048,
|
533
1203
|
"vision": false,
|
534
1204
|
},
|
535
1205
|
{
|
1206
|
+
"contextWindowTokens": undefined,
|
1207
|
+
"description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers",
|
1208
|
+
"displayName": "StripedHyena Hessian (7B)",
|
1209
|
+
"enabled": false,
|
1210
|
+
"functionCall": false,
|
1211
|
+
"id": "togethercomputer/StripedHyena-Hessian-7B",
|
1212
|
+
"maxOutput": 32768,
|
1213
|
+
"reasoning": false,
|
1214
|
+
"tokens": 32768,
|
1215
|
+
"vision": false,
|
1216
|
+
},
|
1217
|
+
{
|
1218
|
+
"contextWindowTokens": 32768,
|
536
1219
|
"description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers",
|
537
1220
|
"displayName": "StripedHyena Nous (7B)",
|
538
1221
|
"enabled": false,
|
539
1222
|
"functionCall": false,
|
540
1223
|
"id": "togethercomputer/StripedHyena-Nous-7B",
|
541
1224
|
"maxOutput": 32768,
|
1225
|
+
"reasoning": false,
|
542
1226
|
"tokens": 32768,
|
543
1227
|
"vision": false,
|
544
1228
|
},
|
545
1229
|
{
|
1230
|
+
"contextWindowTokens": undefined,
|
546
1231
|
"description": "Fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations. ",
|
547
1232
|
"displayName": "Alpaca (7B)",
|
548
1233
|
"enabled": false,
|
549
1234
|
"functionCall": false,
|
550
1235
|
"id": "togethercomputer/alpaca-7b",
|
551
1236
|
"maxOutput": 2048,
|
1237
|
+
"reasoning": false,
|
552
1238
|
"tokens": 2048,
|
553
1239
|
"vision": false,
|
554
1240
|
},
|
555
1241
|
{
|
1242
|
+
"contextWindowTokens": undefined,
|
1243
|
+
"description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.",
|
1244
|
+
"displayName": "Evo-1 Base (131K)",
|
1245
|
+
"enabled": false,
|
1246
|
+
"functionCall": false,
|
1247
|
+
"id": "togethercomputer/evo-1-131k-base",
|
1248
|
+
"maxOutput": 131073,
|
1249
|
+
"reasoning": false,
|
1250
|
+
"tokens": 131073,
|
1251
|
+
"vision": false,
|
1252
|
+
},
|
1253
|
+
{
|
1254
|
+
"contextWindowTokens": undefined,
|
1255
|
+
"description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.",
|
1256
|
+
"displayName": "Evo-1 Base (8K)",
|
1257
|
+
"enabled": false,
|
1258
|
+
"functionCall": false,
|
1259
|
+
"id": "togethercomputer/evo-1-8k-base",
|
1260
|
+
"maxOutput": 8192,
|
1261
|
+
"reasoning": false,
|
1262
|
+
"tokens": 8192,
|
1263
|
+
"vision": false,
|
1264
|
+
},
|
1265
|
+
{
|
1266
|
+
"contextWindowTokens": undefined,
|
1267
|
+
"description": "M2-BERT from the Monarch Mixer paper fine-tuned for retrieval",
|
1268
|
+
"displayName": "M2-BERT-Retrieval-2K",
|
1269
|
+
"enabled": false,
|
1270
|
+
"functionCall": false,
|
1271
|
+
"id": "togethercomputer/m2-bert-80M-2k-retrieval",
|
1272
|
+
"maxOutput": undefined,
|
1273
|
+
"reasoning": false,
|
1274
|
+
"tokens": undefined,
|
1275
|
+
"vision": false,
|
1276
|
+
},
|
1277
|
+
{
|
1278
|
+
"contextWindowTokens": undefined,
|
1279
|
+
"description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.",
|
1280
|
+
"displayName": "M2-BERT-Retrieval-32k",
|
1281
|
+
"enabled": false,
|
1282
|
+
"functionCall": false,
|
1283
|
+
"id": "togethercomputer/m2-bert-80M-32k-retrieval",
|
1284
|
+
"maxOutput": 32768,
|
1285
|
+
"reasoning": false,
|
1286
|
+
"tokens": 32768,
|
1287
|
+
"vision": false,
|
1288
|
+
},
|
1289
|
+
{
|
1290
|
+
"contextWindowTokens": undefined,
|
1291
|
+
"description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.",
|
1292
|
+
"displayName": "M2-BERT-Retrieval-8k",
|
1293
|
+
"enabled": false,
|
1294
|
+
"functionCall": false,
|
1295
|
+
"id": "togethercomputer/m2-bert-80M-8k-retrieval",
|
1296
|
+
"maxOutput": 8192,
|
1297
|
+
"reasoning": false,
|
1298
|
+
"tokens": 8192,
|
1299
|
+
"vision": false,
|
1300
|
+
},
|
1301
|
+
{
|
1302
|
+
"contextWindowTokens": 4096,
|
556
1303
|
"description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
|
557
1304
|
"displayName": "Upstage SOLAR Instruct v1 (11B)",
|
558
1305
|
"enabled": false,
|
559
1306
|
"functionCall": false,
|
560
1307
|
"id": "upstage/SOLAR-10.7B-Instruct-v1.0",
|
561
1308
|
"maxOutput": 4096,
|
1309
|
+
"reasoning": false,
|
562
1310
|
"tokens": 4096,
|
563
1311
|
"vision": false,
|
564
1312
|
},
|
565
1313
|
{
|
1314
|
+
"contextWindowTokens": undefined,
|
1315
|
+
"description": "Dreambooth model trained on a diverse set of analog photographs to provide an analog film effect. ",
|
1316
|
+
"displayName": "Analog Diffusion",
|
1317
|
+
"enabled": false,
|
1318
|
+
"functionCall": false,
|
1319
|
+
"id": "wavymulder/Analog-Diffusion",
|
1320
|
+
"maxOutput": undefined,
|
1321
|
+
"reasoning": false,
|
1322
|
+
"tokens": undefined,
|
1323
|
+
"vision": false,
|
1324
|
+
},
|
1325
|
+
{
|
1326
|
+
"contextWindowTokens": undefined,
|
566
1327
|
"description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
|
567
1328
|
"displayName": "01-ai Yi Chat (34B)",
|
568
|
-
"enabled":
|
1329
|
+
"enabled": false,
|
569
1330
|
"functionCall": false,
|
570
1331
|
"id": "zero-one-ai/Yi-34B-Chat",
|
571
1332
|
"maxOutput": 4096,
|
1333
|
+
"reasoning": false,
|
1334
|
+
"tokens": 4096,
|
1335
|
+
"vision": false,
|
1336
|
+
},
|
1337
|
+
{
|
1338
|
+
"contextWindowTokens": undefined,
|
1339
|
+
"description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
|
1340
|
+
"displayName": "01-ai Yi Base (34B)",
|
1341
|
+
"enabled": false,
|
1342
|
+
"functionCall": false,
|
1343
|
+
"id": "zero-one-ai/Yi-34B",
|
1344
|
+
"maxOutput": 4096,
|
1345
|
+
"reasoning": false,
|
572
1346
|
"tokens": 4096,
|
573
1347
|
"vision": false,
|
574
1348
|
},
|
575
1349
|
{
|
1350
|
+
"contextWindowTokens": undefined,
|
1351
|
+
"description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
|
1352
|
+
"displayName": "01-ai Yi Base (6B)",
|
1353
|
+
"enabled": false,
|
1354
|
+
"functionCall": false,
|
1355
|
+
"id": "zero-one-ai/Yi-6B",
|
1356
|
+
"maxOutput": 4096,
|
1357
|
+
"reasoning": false,
|
1358
|
+
"tokens": 4096,
|
1359
|
+
"vision": false,
|
1360
|
+
},
|
1361
|
+
{
|
1362
|
+
"contextWindowTokens": undefined,
|
576
1363
|
"description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
|
577
1364
|
"displayName": "Llama3 8B Chat HF INT4",
|
578
1365
|
"enabled": false,
|
579
1366
|
"functionCall": false,
|
580
1367
|
"id": "togethercomputer/Llama-3-8b-chat-hf-int4",
|
581
1368
|
"maxOutput": 8192,
|
1369
|
+
"reasoning": false,
|
582
1370
|
"tokens": 8192,
|
583
1371
|
"vision": false,
|
584
1372
|
},
|
585
1373
|
{
|
1374
|
+
"contextWindowTokens": undefined,
|
586
1375
|
"description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
|
587
1376
|
"displayName": "Togethercomputer Llama3 8B Instruct Int8",
|
588
1377
|
"enabled": false,
|
589
1378
|
"functionCall": false,
|
590
1379
|
"id": "togethercomputer/Llama-3-8b-chat-hf-int8",
|
591
1380
|
"maxOutput": 8192,
|
1381
|
+
"reasoning": false,
|
592
1382
|
"tokens": 8192,
|
593
1383
|
"vision": false,
|
594
1384
|
},
|
595
1385
|
{
|
1386
|
+
"contextWindowTokens": undefined,
|
1387
|
+
"description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
|
1388
|
+
"displayName": "Pythia (1B)",
|
1389
|
+
"enabled": false,
|
1390
|
+
"functionCall": false,
|
1391
|
+
"id": "EleutherAI/pythia-1b-v0",
|
1392
|
+
"maxOutput": 2048,
|
1393
|
+
"reasoning": false,
|
1394
|
+
"tokens": 2048,
|
1395
|
+
"vision": false,
|
1396
|
+
},
|
1397
|
+
{
|
1398
|
+
"contextWindowTokens": undefined,
|
1399
|
+
"description": "replit-code-v1-3b is a 2.7B Causal Language Model focused on Code Completion. The model has been trained on a subset of the Stack Dedup v1.2 dataset.",
|
1400
|
+
"displayName": "Replit-Code-v1 (3B)",
|
1401
|
+
"enabled": false,
|
1402
|
+
"functionCall": false,
|
1403
|
+
"id": "replit/replit-code-v1-3b",
|
1404
|
+
"maxOutput": 2048,
|
1405
|
+
"reasoning": false,
|
1406
|
+
"tokens": 2048,
|
1407
|
+
"vision": false,
|
1408
|
+
},
|
1409
|
+
{
|
1410
|
+
"contextWindowTokens": undefined,
|
596
1411
|
"description": "Chat model based on EleutherAI’s Pythia-7B model, and is fine-tuned with data focusing on dialog-style interactions.",
|
597
1412
|
"displayName": "Pythia-Chat-Base (7B)",
|
598
1413
|
"enabled": false,
|
599
1414
|
"functionCall": false,
|
600
1415
|
"id": "togethercomputer/Pythia-Chat-Base-7B-v0.16",
|
601
1416
|
"maxOutput": 2048,
|
1417
|
+
"reasoning": false,
|
602
1418
|
"tokens": 2048,
|
603
1419
|
"vision": false,
|
604
1420
|
},
|
605
1421
|
{
|
1422
|
+
"contextWindowTokens": undefined,
|
1423
|
+
"description": "Decoder-style transformer pretrained from scratch on 1T tokens of English text and code.",
|
1424
|
+
"displayName": "MPT (7B)",
|
1425
|
+
"enabled": false,
|
1426
|
+
"functionCall": false,
|
1427
|
+
"id": "mosaicml/mpt-7b",
|
1428
|
+
"maxOutput": 2048,
|
1429
|
+
"reasoning": false,
|
1430
|
+
"tokens": 2048,
|
1431
|
+
"vision": false,
|
1432
|
+
},
|
1433
|
+
{
|
1434
|
+
"contextWindowTokens": undefined,
|
606
1435
|
"description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
|
607
1436
|
"displayName": "MPT-Chat (30B)",
|
608
1437
|
"enabled": false,
|
609
1438
|
"functionCall": false,
|
610
1439
|
"id": "togethercomputer/mpt-30b-chat",
|
611
1440
|
"maxOutput": 2048,
|
1441
|
+
"reasoning": false,
|
1442
|
+
"tokens": 2048,
|
1443
|
+
"vision": false,
|
1444
|
+
},
|
1445
|
+
{
|
1446
|
+
"contextWindowTokens": undefined,
|
1447
|
+
"description": "T5 fine-tuned on more than 1000 additional tasks covering also more languages, making it better than T5 at majority of tasks. ",
|
1448
|
+
"displayName": "Flan T5 XL (3B)",
|
1449
|
+
"enabled": false,
|
1450
|
+
"functionCall": false,
|
1451
|
+
"id": "google/flan-t5-xl",
|
1452
|
+
"maxOutput": 512,
|
1453
|
+
"reasoning": false,
|
1454
|
+
"tokens": 512,
|
1455
|
+
"vision": false,
|
1456
|
+
},
|
1457
|
+
{
|
1458
|
+
"contextWindowTokens": undefined,
|
1459
|
+
"description": "Foundation model designed specifically for SQL generation tasks. Pre-trained for 3 epochs and fine-tuned for 10 epochs.",
|
1460
|
+
"displayName": "NSQL (6B)",
|
1461
|
+
"enabled": false,
|
1462
|
+
"functionCall": false,
|
1463
|
+
"id": "NumbersStation/nsql-6B",
|
1464
|
+
"maxOutput": 2048,
|
1465
|
+
"reasoning": false,
|
612
1466
|
"tokens": 2048,
|
613
1467
|
"vision": false,
|
614
1468
|
},
|
615
1469
|
{
|
1470
|
+
"contextWindowTokens": undefined,
|
616
1471
|
"description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
|
617
1472
|
"displayName": "Koala (7B)",
|
618
1473
|
"enabled": false,
|
619
1474
|
"functionCall": false,
|
620
1475
|
"id": "togethercomputer/Koala-7B",
|
621
1476
|
"maxOutput": 2048,
|
1477
|
+
"reasoning": false,
|
1478
|
+
"tokens": 2048,
|
1479
|
+
"vision": false,
|
1480
|
+
},
|
1481
|
+
{
|
1482
|
+
"contextWindowTokens": undefined,
|
1483
|
+
"description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
|
1484
|
+
"displayName": "Pythia (6.9B)",
|
1485
|
+
"enabled": false,
|
1486
|
+
"functionCall": false,
|
1487
|
+
"id": "EleutherAI/pythia-6.9b",
|
1488
|
+
"maxOutput": 2048,
|
1489
|
+
"reasoning": false,
|
622
1490
|
"tokens": 2048,
|
623
1491
|
"vision": false,
|
624
1492
|
},
|
625
1493
|
{
|
1494
|
+
"contextWindowTokens": undefined,
|
626
1495
|
"description": "An instruction-following LLM based on pythia-12b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
|
627
1496
|
"displayName": "Dolly v2 (12B)",
|
628
1497
|
"enabled": false,
|
629
1498
|
"functionCall": false,
|
630
1499
|
"id": "databricks/dolly-v2-12b",
|
631
1500
|
"maxOutput": 2048,
|
1501
|
+
"reasoning": false,
|
632
1502
|
"tokens": 2048,
|
633
1503
|
"vision": false,
|
634
1504
|
},
|
635
1505
|
{
|
1506
|
+
"contextWindowTokens": undefined,
|
636
1507
|
"description": "An instruction-following LLM based on pythia-3b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
|
637
1508
|
"displayName": "Dolly v2 (3B)",
|
638
1509
|
"enabled": false,
|
639
1510
|
"functionCall": false,
|
640
1511
|
"id": "databricks/dolly-v2-3b",
|
641
1512
|
"maxOutput": 2048,
|
1513
|
+
"reasoning": false,
|
1514
|
+
"tokens": 2048,
|
1515
|
+
"vision": false,
|
1516
|
+
},
|
1517
|
+
{
|
1518
|
+
"contextWindowTokens": undefined,
|
1519
|
+
"description": "Autoregressive language model trained on the Pile. Its architecture intentionally resembles that of GPT-3, and is almost identical to that of GPT-J 6B.",
|
1520
|
+
"displayName": "GPT-NeoX (20B)",
|
1521
|
+
"enabled": false,
|
1522
|
+
"functionCall": false,
|
1523
|
+
"id": "EleutherAI/gpt-neox-20b",
|
1524
|
+
"maxOutput": 2048,
|
1525
|
+
"reasoning": false,
|
1526
|
+
"tokens": 2048,
|
1527
|
+
"vision": false,
|
1528
|
+
},
|
1529
|
+
{
|
1530
|
+
"contextWindowTokens": undefined,
|
1531
|
+
"description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
|
1532
|
+
"displayName": "Pythia (2.8B)",
|
1533
|
+
"enabled": false,
|
1534
|
+
"functionCall": false,
|
1535
|
+
"id": "EleutherAI/pythia-2.8b-v0",
|
1536
|
+
"maxOutput": 2048,
|
1537
|
+
"reasoning": false,
|
642
1538
|
"tokens": 2048,
|
643
1539
|
"vision": false,
|
644
1540
|
},
|
645
1541
|
{
|
1542
|
+
"contextWindowTokens": undefined,
|
1543
|
+
"description": "LLaMA 13B fine-tuned on over 300,000 instructions. Designed for long responses, low hallucination rate, and absence of censorship mechanisms.",
|
1544
|
+
"displayName": "Nous Hermes (13B)",
|
1545
|
+
"enabled": false,
|
1546
|
+
"functionCall": false,
|
1547
|
+
"id": "NousResearch/Nous-Hermes-13b",
|
1548
|
+
"maxOutput": 2048,
|
1549
|
+
"reasoning": false,
|
1550
|
+
"tokens": 2048,
|
1551
|
+
"vision": false,
|
1552
|
+
},
|
1553
|
+
{
|
1554
|
+
"contextWindowTokens": undefined,
|
646
1555
|
"description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
|
647
1556
|
"displayName": "Guanaco (65B) ",
|
648
1557
|
"enabled": false,
|
649
1558
|
"functionCall": false,
|
650
1559
|
"id": "togethercomputer/guanaco-65b",
|
651
1560
|
"maxOutput": 2048,
|
1561
|
+
"reasoning": false,
|
652
1562
|
"tokens": 2048,
|
653
1563
|
"vision": false,
|
654
1564
|
},
|
655
1565
|
{
|
1566
|
+
"contextWindowTokens": undefined,
|
1567
|
+
"description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
|
1568
|
+
"displayName": "LLaMA-2 (7B)",
|
1569
|
+
"enabled": false,
|
1570
|
+
"functionCall": false,
|
1571
|
+
"id": "togethercomputer/llama-2-7b",
|
1572
|
+
"maxOutput": 4096,
|
1573
|
+
"reasoning": false,
|
1574
|
+
"tokens": 4096,
|
1575
|
+
"vision": false,
|
1576
|
+
},
|
1577
|
+
{
|
1578
|
+
"contextWindowTokens": undefined,
|
656
1579
|
"description": "Chatbot trained by fine-tuning Flan-t5-xl on user-shared conversations collected from ShareGPT.",
|
657
1580
|
"displayName": "Vicuna-FastChat-T5 (3B)",
|
658
1581
|
"enabled": false,
|
659
1582
|
"functionCall": false,
|
660
1583
|
"id": "lmsys/fastchat-t5-3b-v1.0",
|
661
1584
|
"maxOutput": 512,
|
1585
|
+
"reasoning": false,
|
662
1586
|
"tokens": 512,
|
663
1587
|
"vision": false,
|
664
1588
|
},
|
665
1589
|
{
|
1590
|
+
"contextWindowTokens": undefined,
|
1591
|
+
"description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
|
1592
|
+
"displayName": "LLaMA (7B)",
|
1593
|
+
"enabled": false,
|
1594
|
+
"functionCall": false,
|
1595
|
+
"id": "huggyllama/llama-7b",
|
1596
|
+
"maxOutput": 2048,
|
1597
|
+
"reasoning": false,
|
1598
|
+
"tokens": 2048,
|
1599
|
+
"vision": false,
|
1600
|
+
},
|
1601
|
+
{
|
1602
|
+
"contextWindowTokens": undefined,
|
666
1603
|
"description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
|
667
1604
|
"displayName": "Open-Assistant StableLM SFT-7 (7B)",
|
668
1605
|
"enabled": false,
|
669
1606
|
"functionCall": false,
|
670
1607
|
"id": "OpenAssistant/stablelm-7b-sft-v7-epoch-3",
|
671
1608
|
"maxOutput": 4096,
|
1609
|
+
"reasoning": false,
|
672
1610
|
"tokens": 4096,
|
673
1611
|
"vision": true,
|
674
1612
|
},
|
675
1613
|
{
|
1614
|
+
"contextWindowTokens": undefined,
|
1615
|
+
"description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
|
1616
|
+
"displayName": "Pythia (12B)",
|
1617
|
+
"enabled": false,
|
1618
|
+
"functionCall": false,
|
1619
|
+
"id": "EleutherAI/pythia-12b-v0",
|
1620
|
+
"maxOutput": 2048,
|
1621
|
+
"reasoning": false,
|
1622
|
+
"tokens": 2048,
|
1623
|
+
"vision": false,
|
1624
|
+
},
|
1625
|
+
{
|
1626
|
+
"contextWindowTokens": undefined,
|
676
1627
|
"description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
|
677
1628
|
"displayName": "MPT-Chat (7B)",
|
678
1629
|
"enabled": false,
|
679
1630
|
"functionCall": false,
|
680
1631
|
"id": "togethercomputer/mpt-7b-chat",
|
681
1632
|
"maxOutput": 2048,
|
1633
|
+
"reasoning": false,
|
682
1634
|
"tokens": 2048,
|
683
1635
|
"vision": false,
|
684
1636
|
},
|
685
1637
|
{
|
1638
|
+
"contextWindowTokens": undefined,
|
1639
|
+
"description": "Transformer model trained using Ben Wang's Mesh Transformer JAX. ",
|
1640
|
+
"displayName": "GPT-J (6B)",
|
1641
|
+
"enabled": false,
|
1642
|
+
"functionCall": false,
|
1643
|
+
"id": "EleutherAI/gpt-j-6b",
|
1644
|
+
"maxOutput": 2048,
|
1645
|
+
"reasoning": false,
|
1646
|
+
"tokens": 2048,
|
1647
|
+
"vision": false,
|
1648
|
+
},
|
1649
|
+
{
|
1650
|
+
"contextWindowTokens": undefined,
|
686
1651
|
"description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
|
687
1652
|
"displayName": "Open-Assistant Pythia SFT-4 (12B)",
|
688
1653
|
"enabled": false,
|
689
1654
|
"functionCall": false,
|
690
1655
|
"id": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
691
1656
|
"maxOutput": 2048,
|
1657
|
+
"reasoning": false,
|
692
1658
|
"tokens": 2048,
|
693
1659
|
"vision": true,
|
694
1660
|
},
|
695
1661
|
{
|
1662
|
+
"contextWindowTokens": undefined,
|
696
1663
|
"description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
|
697
1664
|
"displayName": "Vicuna v1.3 (7B)",
|
698
1665
|
"enabled": false,
|
699
1666
|
"functionCall": false,
|
700
1667
|
"id": "lmsys/vicuna-7b-v1.3",
|
701
1668
|
"maxOutput": 2048,
|
1669
|
+
"reasoning": false,
|
702
1670
|
"tokens": 2048,
|
703
1671
|
"vision": false,
|
704
1672
|
},
|
705
1673
|
{
|
1674
|
+
"contextWindowTokens": undefined,
|
1675
|
+
"description": "This model is fine-tuned from CodeLlama-34B-Python and achieves 69.5% pass@1 on HumanEval.",
|
1676
|
+
"displayName": "Phind Code LLaMA Python v1 (34B)",
|
1677
|
+
"enabled": false,
|
1678
|
+
"functionCall": false,
|
1679
|
+
"id": "Phind/Phind-CodeLlama-34B-Python-v1",
|
1680
|
+
"maxOutput": 16384,
|
1681
|
+
"reasoning": false,
|
1682
|
+
"tokens": 16384,
|
1683
|
+
"vision": false,
|
1684
|
+
},
|
1685
|
+
{
|
1686
|
+
"contextWindowTokens": undefined,
|
1687
|
+
"description": "NSQL is a family of autoregressive open-source large foundation models (FMs) designed specifically for SQL generation tasks",
|
1688
|
+
"displayName": "NSQL LLaMA-2 (7B)",
|
1689
|
+
"enabled": false,
|
1690
|
+
"functionCall": false,
|
1691
|
+
"id": "NumbersStation/nsql-llama-2-7B",
|
1692
|
+
"maxOutput": 4096,
|
1693
|
+
"reasoning": false,
|
1694
|
+
"tokens": 4096,
|
1695
|
+
"vision": false,
|
1696
|
+
},
|
1697
|
+
{
|
1698
|
+
"contextWindowTokens": undefined,
|
706
1699
|
"description": "Nous-Hermes-Llama2-70b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
|
707
1700
|
"displayName": "Nous Hermes LLaMA-2 (70B)",
|
708
1701
|
"enabled": false,
|
709
1702
|
"functionCall": false,
|
710
1703
|
"id": "NousResearch/Nous-Hermes-Llama2-70b",
|
711
1704
|
"maxOutput": 4096,
|
1705
|
+
"reasoning": false,
|
712
1706
|
"tokens": 4096,
|
713
1707
|
"vision": false,
|
714
1708
|
},
|
715
1709
|
{
|
1710
|
+
"contextWindowTokens": undefined,
|
1711
|
+
"description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities.",
|
1712
|
+
"displayName": "WizardLM v1.0 (70B)",
|
1713
|
+
"enabled": false,
|
1714
|
+
"functionCall": false,
|
1715
|
+
"id": "WizardLM/WizardLM-70B-V1.0",
|
1716
|
+
"maxOutput": 4096,
|
1717
|
+
"reasoning": false,
|
1718
|
+
"tokens": 4096,
|
1719
|
+
"vision": false,
|
1720
|
+
},
|
1721
|
+
{
|
1722
|
+
"contextWindowTokens": undefined,
|
1723
|
+
"description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
|
1724
|
+
"displayName": "LLaMA (65B)",
|
1725
|
+
"enabled": false,
|
1726
|
+
"functionCall": false,
|
1727
|
+
"id": "huggyllama/llama-65b",
|
1728
|
+
"maxOutput": 2048,
|
1729
|
+
"reasoning": false,
|
1730
|
+
"tokens": 2048,
|
1731
|
+
"vision": false,
|
1732
|
+
},
|
1733
|
+
{
|
1734
|
+
"contextWindowTokens": undefined,
|
716
1735
|
"description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
|
717
1736
|
"displayName": "Vicuna v1.5 16K (13B)",
|
718
1737
|
"enabled": false,
|
719
1738
|
"functionCall": false,
|
720
1739
|
"id": "lmsys/vicuna-13b-v1.5-16k",
|
721
1740
|
"maxOutput": 16384,
|
1741
|
+
"reasoning": false,
|
722
1742
|
"tokens": 16384,
|
723
1743
|
"vision": false,
|
724
1744
|
},
|
725
1745
|
{
|
1746
|
+
"contextWindowTokens": undefined,
|
726
1747
|
"description": "Chat model fine-tuned from EleutherAI’s GPT-NeoX with over 40 million instructions on carbon reduced compute.",
|
727
1748
|
"displayName": "GPT-NeoXT-Chat-Base (20B)",
|
728
1749
|
"enabled": false,
|
729
1750
|
"functionCall": false,
|
730
1751
|
"id": "togethercomputer/GPT-NeoXT-Chat-Base-20B",
|
731
1752
|
"maxOutput": 2048,
|
1753
|
+
"reasoning": false,
|
732
1754
|
"tokens": 2048,
|
733
1755
|
"vision": false,
|
734
1756
|
},
|
735
1757
|
{
|
1758
|
+
"contextWindowTokens": undefined,
|
736
1759
|
"description": "A fine-tuned version of Mistral-7B to act as a helpful assistant.",
|
737
1760
|
"displayName": "Zephyr-7B-ß",
|
738
1761
|
"enabled": false,
|
739
1762
|
"functionCall": false,
|
740
1763
|
"id": "HuggingFaceH4/zephyr-7b-beta",
|
741
1764
|
"maxOutput": 32768,
|
1765
|
+
"reasoning": false,
|
742
1766
|
"tokens": 32768,
|
743
1767
|
"vision": false,
|
744
1768
|
},
|
745
1769
|
{
|
1770
|
+
"contextWindowTokens": undefined,
|
1771
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
1772
|
+
"displayName": "Code Llama Python (13B)",
|
1773
|
+
"enabled": false,
|
1774
|
+
"functionCall": false,
|
1775
|
+
"id": "togethercomputer/CodeLlama-13b-Python",
|
1776
|
+
"maxOutput": 16384,
|
1777
|
+
"reasoning": false,
|
1778
|
+
"tokens": 16384,
|
1779
|
+
"vision": false,
|
1780
|
+
},
|
1781
|
+
{
|
1782
|
+
"contextWindowTokens": undefined,
|
1783
|
+
"description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
|
1784
|
+
"displayName": "LLaMA-2 (13B)",
|
1785
|
+
"enabled": false,
|
1786
|
+
"functionCall": false,
|
1787
|
+
"id": "togethercomputer/llama-2-13b",
|
1788
|
+
"maxOutput": 4096,
|
1789
|
+
"reasoning": false,
|
1790
|
+
"tokens": 4096,
|
1791
|
+
"vision": false,
|
1792
|
+
},
|
1793
|
+
{
|
1794
|
+
"contextWindowTokens": undefined,
|
746
1795
|
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
747
1796
|
"displayName": "Code Llama Instruct (7B)",
|
748
1797
|
"enabled": false,
|
749
1798
|
"functionCall": false,
|
750
1799
|
"id": "togethercomputer/CodeLlama-7b-Instruct",
|
751
1800
|
"maxOutput": 16384,
|
1801
|
+
"reasoning": false,
|
752
1802
|
"tokens": 16384,
|
753
1803
|
"vision": false,
|
754
1804
|
},
|
755
1805
|
{
|
1806
|
+
"contextWindowTokens": undefined,
|
756
1807
|
"description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
|
757
1808
|
"displayName": "Guanaco (13B) ",
|
758
1809
|
"enabled": false,
|
759
1810
|
"functionCall": false,
|
760
1811
|
"id": "togethercomputer/guanaco-13b",
|
761
1812
|
"maxOutput": 2048,
|
1813
|
+
"reasoning": false,
|
1814
|
+
"tokens": 2048,
|
1815
|
+
"vision": false,
|
1816
|
+
},
|
1817
|
+
{
|
1818
|
+
"contextWindowTokens": undefined,
|
1819
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
1820
|
+
"displayName": "Code Llama Python (34B)",
|
1821
|
+
"enabled": false,
|
1822
|
+
"functionCall": false,
|
1823
|
+
"id": "togethercomputer/CodeLlama-34b-Python",
|
1824
|
+
"maxOutput": 16384,
|
1825
|
+
"reasoning": false,
|
1826
|
+
"tokens": 16384,
|
1827
|
+
"vision": false,
|
1828
|
+
},
|
1829
|
+
{
|
1830
|
+
"contextWindowTokens": undefined,
|
1831
|
+
"description": "Designed for short-form instruction following, finetuned on Dolly and Anthropic HH-RLHF and other datasets",
|
1832
|
+
"displayName": "MPT-Instruct (7B)",
|
1833
|
+
"enabled": false,
|
1834
|
+
"functionCall": false,
|
1835
|
+
"id": "mosaicml/mpt-7b-instruct",
|
1836
|
+
"maxOutput": 2048,
|
1837
|
+
"reasoning": false,
|
762
1838
|
"tokens": 2048,
|
763
1839
|
"vision": false,
|
764
1840
|
},
|
765
1841
|
{
|
1842
|
+
"contextWindowTokens": undefined,
|
766
1843
|
"description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
|
767
1844
|
"displayName": "LLaMA-2 Chat (70B)",
|
768
1845
|
"enabled": false,
|
769
1846
|
"functionCall": false,
|
770
1847
|
"id": "togethercomputer/llama-2-70b-chat",
|
771
1848
|
"maxOutput": 4096,
|
1849
|
+
"reasoning": false,
|
772
1850
|
"tokens": 4096,
|
773
1851
|
"vision": false,
|
774
1852
|
},
|
775
1853
|
{
|
1854
|
+
"contextWindowTokens": undefined,
|
776
1855
|
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
777
1856
|
"displayName": "Code Llama Instruct (34B)",
|
778
1857
|
"enabled": false,
|
779
1858
|
"functionCall": false,
|
780
1859
|
"id": "togethercomputer/CodeLlama-34b-Instruct",
|
781
1860
|
"maxOutput": 16384,
|
1861
|
+
"reasoning": false,
|
782
1862
|
"tokens": 16384,
|
783
1863
|
"vision": false,
|
784
1864
|
},
|
785
1865
|
{
|
1866
|
+
"contextWindowTokens": undefined,
|
1867
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
1868
|
+
"displayName": "Code Llama (34B)",
|
1869
|
+
"enabled": false,
|
1870
|
+
"functionCall": false,
|
1871
|
+
"id": "togethercomputer/CodeLlama-34b",
|
1872
|
+
"maxOutput": 16384,
|
1873
|
+
"reasoning": false,
|
1874
|
+
"tokens": 16384,
|
1875
|
+
"vision": false,
|
1876
|
+
},
|
1877
|
+
{
|
1878
|
+
"contextWindowTokens": undefined,
|
1879
|
+
"description": "An autoregressive language models for program synthesis.",
|
1880
|
+
"displayName": "CodeGen2 (16B)",
|
1881
|
+
"enabled": false,
|
1882
|
+
"functionCall": false,
|
1883
|
+
"id": "Salesforce/codegen2-16B",
|
1884
|
+
"maxOutput": 2048,
|
1885
|
+
"reasoning": false,
|
1886
|
+
"tokens": 2048,
|
1887
|
+
"vision": false,
|
1888
|
+
},
|
1889
|
+
{
|
1890
|
+
"contextWindowTokens": undefined,
|
1891
|
+
"description": "An autoregressive language models for program synthesis.",
|
1892
|
+
"displayName": "CodeGen2 (7B)",
|
1893
|
+
"enabled": false,
|
1894
|
+
"functionCall": false,
|
1895
|
+
"id": "Salesforce/codegen2-7B",
|
1896
|
+
"maxOutput": 2048,
|
1897
|
+
"reasoning": false,
|
1898
|
+
"tokens": 2048,
|
1899
|
+
"vision": false,
|
1900
|
+
},
|
1901
|
+
{
|
1902
|
+
"contextWindowTokens": undefined,
|
1903
|
+
"description": "Flan T5 XXL (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).",
|
1904
|
+
"displayName": "Flan T5 XXL (11B)",
|
1905
|
+
"enabled": false,
|
1906
|
+
"functionCall": false,
|
1907
|
+
"id": "google/flan-t5-xxl",
|
1908
|
+
"maxOutput": 512,
|
1909
|
+
"reasoning": false,
|
1910
|
+
"tokens": 512,
|
1911
|
+
"vision": false,
|
1912
|
+
},
|
1913
|
+
{
|
1914
|
+
"contextWindowTokens": undefined,
|
1915
|
+
"description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
|
1916
|
+
"displayName": "LLaMA-2 (70B)",
|
1917
|
+
"enabled": false,
|
1918
|
+
"functionCall": false,
|
1919
|
+
"id": "togethercomputer/llama-2-70b",
|
1920
|
+
"maxOutput": 4096,
|
1921
|
+
"reasoning": false,
|
1922
|
+
"tokens": 4096,
|
1923
|
+
"vision": false,
|
1924
|
+
},
|
1925
|
+
{
|
1926
|
+
"contextWindowTokens": undefined,
|
1927
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
1928
|
+
"displayName": "Code Llama (7B)",
|
1929
|
+
"enabled": false,
|
1930
|
+
"functionCall": false,
|
1931
|
+
"id": "codellama/CodeLlama-7b-hf",
|
1932
|
+
"maxOutput": 16384,
|
1933
|
+
"reasoning": false,
|
1934
|
+
"tokens": 16384,
|
1935
|
+
"vision": false,
|
1936
|
+
},
|
1937
|
+
{
|
1938
|
+
"contextWindowTokens": undefined,
|
1939
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
1940
|
+
"displayName": "Code Llama (13B)",
|
1941
|
+
"enabled": false,
|
1942
|
+
"functionCall": false,
|
1943
|
+
"id": "codellama/CodeLlama-13b-hf",
|
1944
|
+
"maxOutput": 16384,
|
1945
|
+
"reasoning": false,
|
1946
|
+
"tokens": 16384,
|
1947
|
+
"vision": false,
|
1948
|
+
},
|
1949
|
+
{
|
1950
|
+
"contextWindowTokens": undefined,
|
786
1951
|
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
787
1952
|
"displayName": "Code Llama Instruct (13B)",
|
788
1953
|
"enabled": false,
|
789
1954
|
"functionCall": false,
|
790
1955
|
"id": "togethercomputer/CodeLlama-13b-Instruct",
|
791
1956
|
"maxOutput": 16384,
|
1957
|
+
"reasoning": false,
|
792
1958
|
"tokens": 16384,
|
793
1959
|
"vision": false,
|
794
1960
|
},
|
795
1961
|
{
|
1962
|
+
"contextWindowTokens": undefined,
|
796
1963
|
"description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
|
797
1964
|
"displayName": "LLaMA-2 Chat (13B)",
|
798
1965
|
"enabled": false,
|
799
1966
|
"functionCall": false,
|
800
1967
|
"id": "togethercomputer/llama-2-13b-chat",
|
801
1968
|
"maxOutput": 4096,
|
1969
|
+
"reasoning": false,
|
802
1970
|
"tokens": 4096,
|
803
1971
|
"vision": false,
|
804
1972
|
},
|
805
1973
|
{
|
1974
|
+
"contextWindowTokens": undefined,
|
806
1975
|
"description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
|
807
1976
|
"displayName": "Vicuna v1.3 (13B)",
|
808
1977
|
"enabled": false,
|
809
1978
|
"functionCall": false,
|
810
1979
|
"id": "lmsys/vicuna-13b-v1.3",
|
811
1980
|
"maxOutput": 2048,
|
1981
|
+
"reasoning": false,
|
812
1982
|
"tokens": 2048,
|
813
1983
|
"vision": false,
|
814
1984
|
},
|
815
1985
|
{
|
1986
|
+
"contextWindowTokens": undefined,
|
1987
|
+
"description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
|
1988
|
+
"displayName": "LLaMA (13B)",
|
1989
|
+
"enabled": false,
|
1990
|
+
"functionCall": false,
|
1991
|
+
"id": "huggyllama/llama-13b",
|
1992
|
+
"maxOutput": 2048,
|
1993
|
+
"reasoning": false,
|
1994
|
+
"tokens": 2048,
|
1995
|
+
"vision": false,
|
1996
|
+
},
|
1997
|
+
{
|
1998
|
+
"contextWindowTokens": undefined,
|
816
1999
|
"description": "Fine-tuned from StarCoder to act as a helpful coding assistant. As an alpha release is only intended for educational or research purpopses.",
|
817
2000
|
"displayName": "StarCoderChat Alpha (16B)",
|
818
2001
|
"enabled": false,
|
819
2002
|
"functionCall": false,
|
820
2003
|
"id": "HuggingFaceH4/starchat-alpha",
|
821
2004
|
"maxOutput": 8192,
|
2005
|
+
"reasoning": false,
|
822
2006
|
"tokens": 8192,
|
823
2007
|
"vision": false,
|
824
2008
|
},
|
825
2009
|
{
|
2010
|
+
"contextWindowTokens": undefined,
|
2011
|
+
"description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
|
2012
|
+
"displayName": "LLaMA (30B)",
|
2013
|
+
"enabled": false,
|
2014
|
+
"functionCall": false,
|
2015
|
+
"id": "huggyllama/llama-30b",
|
2016
|
+
"maxOutput": 2048,
|
2017
|
+
"reasoning": false,
|
2018
|
+
"tokens": 2048,
|
2019
|
+
"vision": false,
|
2020
|
+
},
|
2021
|
+
{
|
2022
|
+
"contextWindowTokens": undefined,
|
2023
|
+
"description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.",
|
2024
|
+
"displayName": "StableLM-Base-Alpha (3B)",
|
2025
|
+
"enabled": false,
|
2026
|
+
"functionCall": false,
|
2027
|
+
"id": "stabilityai/stablelm-base-alpha-3b",
|
2028
|
+
"maxOutput": 4096,
|
2029
|
+
"reasoning": false,
|
2030
|
+
"tokens": 4096,
|
2031
|
+
"vision": false,
|
2032
|
+
},
|
2033
|
+
{
|
2034
|
+
"contextWindowTokens": undefined,
|
2035
|
+
"description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.",
|
2036
|
+
"displayName": "StableLM-Base-Alpha (7B)",
|
2037
|
+
"enabled": false,
|
2038
|
+
"functionCall": false,
|
2039
|
+
"id": "stabilityai/stablelm-base-alpha-7b",
|
2040
|
+
"maxOutput": 4096,
|
2041
|
+
"reasoning": false,
|
2042
|
+
"tokens": 4096,
|
2043
|
+
"vision": false,
|
2044
|
+
},
|
2045
|
+
{
|
2046
|
+
"contextWindowTokens": undefined,
|
2047
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
2048
|
+
"displayName": "Code Llama Python (7B)",
|
2049
|
+
"enabled": false,
|
2050
|
+
"functionCall": false,
|
2051
|
+
"id": "togethercomputer/CodeLlama-7b-Python",
|
2052
|
+
"maxOutput": 16384,
|
2053
|
+
"reasoning": false,
|
2054
|
+
"tokens": 16384,
|
2055
|
+
"vision": false,
|
2056
|
+
},
|
2057
|
+
{
|
2058
|
+
"contextWindowTokens": undefined,
|
2059
|
+
"description": "Defog's SQLCoder is a state-of-the-art LLM for converting natural language questions to SQL queries, fine-tuned from Bigcode's Starcoder 15B model.",
|
2060
|
+
"displayName": "Sqlcoder (15B)",
|
2061
|
+
"enabled": false,
|
2062
|
+
"functionCall": false,
|
2063
|
+
"id": "defog/sqlcoder",
|
2064
|
+
"maxOutput": 8192,
|
2065
|
+
"reasoning": false,
|
2066
|
+
"tokens": 8192,
|
2067
|
+
"vision": false,
|
2068
|
+
},
|
2069
|
+
{
|
2070
|
+
"contextWindowTokens": undefined,
|
2071
|
+
"description": "Trained on 80+ coding languages, uses Multi Query Attention, an 8K context window, and was trained using the Fill-in-the-Middle objective on 1T tokens.",
|
2072
|
+
"displayName": "StarCoder (16B)",
|
2073
|
+
"enabled": false,
|
2074
|
+
"functionCall": false,
|
2075
|
+
"id": "bigcode/starcoder",
|
2076
|
+
"maxOutput": 8192,
|
2077
|
+
"reasoning": false,
|
2078
|
+
"tokens": 8192,
|
2079
|
+
"vision": false,
|
2080
|
+
},
|
2081
|
+
{
|
2082
|
+
"contextWindowTokens": undefined,
|
826
2083
|
"description": "An instruction-following LLM based on pythia-7b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
|
827
2084
|
"displayName": "Dolly v2 (7B)",
|
828
2085
|
"enabled": false,
|
829
2086
|
"functionCall": false,
|
830
2087
|
"id": "databricks/dolly-v2-7b",
|
831
2088
|
"maxOutput": 2048,
|
2089
|
+
"reasoning": false,
|
832
2090
|
"tokens": 2048,
|
833
2091
|
"vision": false,
|
834
2092
|
},
|
835
2093
|
{
|
2094
|
+
"contextWindowTokens": undefined,
|
836
2095
|
"description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
|
837
2096
|
"displayName": "Guanaco (33B) ",
|
838
2097
|
"enabled": false,
|
839
2098
|
"functionCall": false,
|
840
2099
|
"id": "togethercomputer/guanaco-33b",
|
841
2100
|
"maxOutput": 2048,
|
2101
|
+
"reasoning": false,
|
842
2102
|
"tokens": 2048,
|
843
2103
|
"vision": false,
|
844
2104
|
},
|
845
2105
|
{
|
2106
|
+
"contextWindowTokens": undefined,
|
846
2107
|
"description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
|
847
2108
|
"displayName": "Koala (13B)",
|
848
2109
|
"enabled": false,
|
849
2110
|
"functionCall": false,
|
850
2111
|
"id": "togethercomputer/Koala-13B",
|
851
2112
|
"maxOutput": 2048,
|
2113
|
+
"reasoning": false,
|
852
2114
|
"tokens": 2048,
|
853
2115
|
"vision": false,
|
854
2116
|
},
|
855
2117
|
{
|
2118
|
+
"contextWindowTokens": undefined,
|
2119
|
+
"description": "Fork of GPT-J instruction tuned to excel at few-shot prompts (blog post).",
|
2120
|
+
"displayName": "GPT-JT (6B)",
|
2121
|
+
"enabled": false,
|
2122
|
+
"functionCall": false,
|
2123
|
+
"id": "togethercomputer/GPT-JT-6B-v1",
|
2124
|
+
"maxOutput": 2048,
|
2125
|
+
"reasoning": false,
|
2126
|
+
"tokens": 2048,
|
2127
|
+
"vision": false,
|
2128
|
+
},
|
2129
|
+
{
|
2130
|
+
"contextWindowTokens": undefined,
|
856
2131
|
"description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
|
857
2132
|
"displayName": "LLaMA-2 Chat (7B)",
|
858
2133
|
"enabled": false,
|
859
2134
|
"functionCall": false,
|
860
2135
|
"id": "togethercomputer/llama-2-7b-chat",
|
861
2136
|
"maxOutput": 4096,
|
2137
|
+
"reasoning": false,
|
862
2138
|
"tokens": 4096,
|
863
2139
|
"vision": false,
|
864
2140
|
},
|
865
2141
|
{
|
2142
|
+
"contextWindowTokens": undefined,
|
866
2143
|
"description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
|
867
2144
|
"displayName": "Upstage SOLAR Instruct v1 (11B)-Int4",
|
868
2145
|
"enabled": false,
|
869
2146
|
"functionCall": false,
|
870
2147
|
"id": "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4",
|
871
2148
|
"maxOutput": 4096,
|
2149
|
+
"reasoning": false,
|
872
2150
|
"tokens": 4096,
|
873
2151
|
"vision": false,
|
874
2152
|
},
|
875
2153
|
{
|
2154
|
+
"contextWindowTokens": undefined,
|
876
2155
|
"description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks. ",
|
877
2156
|
"displayName": "Guanaco (7B) ",
|
878
2157
|
"enabled": false,
|
879
2158
|
"functionCall": false,
|
880
2159
|
"id": "togethercomputer/guanaco-7b",
|
881
2160
|
"maxOutput": 2048,
|
2161
|
+
"reasoning": false,
|
882
2162
|
"tokens": 2048,
|
883
2163
|
"vision": false,
|
884
2164
|
},
|
2165
|
+
{
|
2166
|
+
"contextWindowTokens": undefined,
|
2167
|
+
"description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens.",
|
2168
|
+
"displayName": "Llemma (7B)",
|
2169
|
+
"enabled": false,
|
2170
|
+
"functionCall": false,
|
2171
|
+
"id": "EleutherAI/llemma_7b",
|
2172
|
+
"maxOutput": 4096,
|
2173
|
+
"reasoning": false,
|
2174
|
+
"tokens": 4096,
|
2175
|
+
"vision": false,
|
2176
|
+
},
|
2177
|
+
{
|
2178
|
+
"contextWindowTokens": undefined,
|
2179
|
+
"description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
|
2180
|
+
"displayName": "Code Llama (34B)",
|
2181
|
+
"enabled": false,
|
2182
|
+
"functionCall": false,
|
2183
|
+
"id": "codellama/CodeLlama-34b-hf",
|
2184
|
+
"maxOutput": 16384,
|
2185
|
+
"reasoning": false,
|
2186
|
+
"tokens": 16384,
|
2187
|
+
"vision": false,
|
2188
|
+
},
|
885
2189
|
]
|
886
2190
|
`;
|