@botpress/cognitive 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +9 -9
- package/.turbo/turbo-generate.log +1 -1
- package/dist/index.cjs +7 -4
- package/dist/index.cjs.map +2 -2
- package/dist/index.d.ts +14 -13
- package/dist/index.mjs +7 -4
- package/dist/index.mjs.map +2 -2
- package/e2e/client.test.ts +126 -0
- package/e2e/client.ts +13 -0
- package/e2e/models.json +562 -0
- package/e2e/models.test.ts +131 -0
- package/package.json +4 -4
- package/tsconfig.build.json +9 -0
package/e2e/models.json
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"ref": "openai:o1-2024-12-17",
|
|
4
|
+
"integration": "openai",
|
|
5
|
+
"id": "o1-2024-12-17",
|
|
6
|
+
"name": "GPT o1",
|
|
7
|
+
"description": "The o1 model is designed to solve hard problems across domains. The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user.",
|
|
8
|
+
"input": {
|
|
9
|
+
"costPer1MTokens": 15,
|
|
10
|
+
"maxTokens": 200000
|
|
11
|
+
},
|
|
12
|
+
"output": {
|
|
13
|
+
"costPer1MTokens": 60,
|
|
14
|
+
"maxTokens": 100000
|
|
15
|
+
},
|
|
16
|
+
"tags": ["reasoning", "vision", "general-purpose"]
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"ref": "openai:o1-mini-2024-09-12",
|
|
20
|
+
"integration": "openai",
|
|
21
|
+
"id": "o1-mini-2024-09-12",
|
|
22
|
+
"name": "GPT o1-mini",
|
|
23
|
+
"description": "The o1-mini model is a fast and affordable reasoning model for specialized tasks. The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user.",
|
|
24
|
+
"input": {
|
|
25
|
+
"costPer1MTokens": 3,
|
|
26
|
+
"maxTokens": 128000
|
|
27
|
+
},
|
|
28
|
+
"output": {
|
|
29
|
+
"costPer1MTokens": 12,
|
|
30
|
+
"maxTokens": 65536
|
|
31
|
+
},
|
|
32
|
+
"tags": ["reasoning", "vision", "general-purpose"]
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"ref": "openai:gpt-4o-mini-2024-07-18",
|
|
36
|
+
"integration": "openai",
|
|
37
|
+
"id": "gpt-4o-mini-2024-07-18",
|
|
38
|
+
"name": "GPT-4o Mini",
|
|
39
|
+
"description": "GPT-4o mini (“o” for “omni”) is OpenAI's most advanced model in the small models category, and their cheapest model yet. It is multimodal (accepting text or image inputs and outputting text), has higher intelligence than gpt-3.5-turbo but is just as fast. It is meant to be used for smaller tasks, including vision tasks. It's recommended to choose gpt-4o-mini where you would have previously used gpt-3.5-turbo as this model is more capable and cheaper.",
|
|
40
|
+
"input": {
|
|
41
|
+
"costPer1MTokens": 0.15,
|
|
42
|
+
"maxTokens": 128000
|
|
43
|
+
},
|
|
44
|
+
"output": {
|
|
45
|
+
"costPer1MTokens": 0.6,
|
|
46
|
+
"maxTokens": 16384
|
|
47
|
+
},
|
|
48
|
+
"tags": ["recommended", "vision", "low-cost", "general-purpose", "function-calling"]
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"ref": "openai:gpt-4o-2024-11-20",
|
|
52
|
+
"integration": "openai",
|
|
53
|
+
"id": "gpt-4o-2024-11-20",
|
|
54
|
+
"name": "GPT-4o (November 2024)",
|
|
55
|
+
"description": "GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
|
|
56
|
+
"input": {
|
|
57
|
+
"costPer1MTokens": 2.5,
|
|
58
|
+
"maxTokens": 128000
|
|
59
|
+
},
|
|
60
|
+
"output": {
|
|
61
|
+
"costPer1MTokens": 10,
|
|
62
|
+
"maxTokens": 16384
|
|
63
|
+
},
|
|
64
|
+
"tags": ["recommended", "vision", "general-purpose", "coding", "agents", "function-calling"]
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"ref": "openai:gpt-4o-2024-08-06",
|
|
68
|
+
"integration": "openai",
|
|
69
|
+
"id": "gpt-4o-2024-08-06",
|
|
70
|
+
"name": "GPT-4o (August 2024)",
|
|
71
|
+
"description": "GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
|
|
72
|
+
"input": {
|
|
73
|
+
"costPer1MTokens": 2.5,
|
|
74
|
+
"maxTokens": 128000
|
|
75
|
+
},
|
|
76
|
+
"output": {
|
|
77
|
+
"costPer1MTokens": 10,
|
|
78
|
+
"maxTokens": 16384
|
|
79
|
+
},
|
|
80
|
+
"tags": ["recommended", "vision", "general-purpose", "coding", "agents", "function-calling"]
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"ref": "openai:gpt-4o-2024-05-13",
|
|
84
|
+
"integration": "openai",
|
|
85
|
+
"id": "gpt-4o-2024-05-13",
|
|
86
|
+
"name": "GPT-4o (May 2024)",
|
|
87
|
+
"description": "GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
|
|
88
|
+
"input": {
|
|
89
|
+
"costPer1MTokens": 5,
|
|
90
|
+
"maxTokens": 128000
|
|
91
|
+
},
|
|
92
|
+
"output": {
|
|
93
|
+
"costPer1MTokens": 15,
|
|
94
|
+
"maxTokens": 4096
|
|
95
|
+
},
|
|
96
|
+
"tags": ["vision", "general-purpose", "coding", "agents", "function-calling"]
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"ref": "openai:gpt-4-turbo-2024-04-09",
|
|
100
|
+
"integration": "openai",
|
|
101
|
+
"id": "gpt-4-turbo-2024-04-09",
|
|
102
|
+
"name": "GPT-4 Turbo",
|
|
103
|
+
"description": "GPT-4 is a large multimodal model (accepting text or image inputs and outputting text) that can solve difficult problems with greater accuracy than any of our previous models, thanks to its broader general knowledge and advanced reasoning capabilities.",
|
|
104
|
+
"input": {
|
|
105
|
+
"costPer1MTokens": 10,
|
|
106
|
+
"maxTokens": 128000
|
|
107
|
+
},
|
|
108
|
+
"output": {
|
|
109
|
+
"costPer1MTokens": 30,
|
|
110
|
+
"maxTokens": 4096
|
|
111
|
+
},
|
|
112
|
+
"tags": ["deprecated", "general-purpose", "coding", "agents", "function-calling"]
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"ref": "openai:gpt-3.5-turbo-0125",
|
|
116
|
+
"integration": "openai",
|
|
117
|
+
"id": "gpt-3.5-turbo-0125",
|
|
118
|
+
"name": "GPT-3.5 Turbo",
|
|
119
|
+
"description": "GPT-3.5 Turbo can understand and generate natural language or code and has been optimized for chat but works well for non-chat tasks as well.",
|
|
120
|
+
"input": {
|
|
121
|
+
"costPer1MTokens": 0.5,
|
|
122
|
+
"maxTokens": 128000
|
|
123
|
+
},
|
|
124
|
+
"output": {
|
|
125
|
+
"costPer1MTokens": 1.5,
|
|
126
|
+
"maxTokens": 4096
|
|
127
|
+
},
|
|
128
|
+
"tags": ["deprecated", "general-purpose", "low-cost"]
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"ref": "groq:llama-3.3-70b-versatile",
|
|
132
|
+
"integration": "groq",
|
|
133
|
+
"id": "llama-3.3-70b-versatile",
|
|
134
|
+
"name": "LLaMA 3.3 70B",
|
|
135
|
+
"description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.",
|
|
136
|
+
"input": {
|
|
137
|
+
"costPer1MTokens": 0.59,
|
|
138
|
+
"maxTokens": 128000
|
|
139
|
+
},
|
|
140
|
+
"output": {
|
|
141
|
+
"costPer1MTokens": 0.79,
|
|
142
|
+
"maxTokens": 32768
|
|
143
|
+
},
|
|
144
|
+
"tags": ["recommended", "general-purpose", "coding"]
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
"ref": "groq:llama-3.2-1b-preview",
|
|
148
|
+
"integration": "groq",
|
|
149
|
+
"id": "llama-3.2-1b-preview",
|
|
150
|
+
"name": "LLaMA 3.2 1B",
|
|
151
|
+
"description": "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
|
|
152
|
+
"input": {
|
|
153
|
+
"costPer1MTokens": 0.04,
|
|
154
|
+
"maxTokens": 128000
|
|
155
|
+
},
|
|
156
|
+
"output": {
|
|
157
|
+
"costPer1MTokens": 0.04,
|
|
158
|
+
"maxTokens": 8192
|
|
159
|
+
},
|
|
160
|
+
"tags": ["low-cost"]
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"ref": "groq:llama-3.2-3b-preview",
|
|
164
|
+
"integration": "groq",
|
|
165
|
+
"id": "llama-3.2-3b-preview",
|
|
166
|
+
"name": "LLaMA 3.2 3B",
|
|
167
|
+
"description": "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
|
|
168
|
+
"input": {
|
|
169
|
+
"costPer1MTokens": 0.06,
|
|
170
|
+
"maxTokens": 128000
|
|
171
|
+
},
|
|
172
|
+
"output": {
|
|
173
|
+
"costPer1MTokens": 0.06,
|
|
174
|
+
"maxTokens": 8192
|
|
175
|
+
},
|
|
176
|
+
"tags": ["low-cost", "general-purpose"]
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
"ref": "groq:llama-3.2-11b-vision-preview",
|
|
180
|
+
"integration": "groq",
|
|
181
|
+
"id": "llama-3.2-11b-vision-preview",
|
|
182
|
+
"name": "LLaMA 3.2 11B Vision",
|
|
183
|
+
"description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
|
|
184
|
+
"input": {
|
|
185
|
+
"costPer1MTokens": 0.18,
|
|
186
|
+
"maxTokens": 128000
|
|
187
|
+
},
|
|
188
|
+
"output": {
|
|
189
|
+
"costPer1MTokens": 0.18,
|
|
190
|
+
"maxTokens": 8192
|
|
191
|
+
},
|
|
192
|
+
"tags": ["low-cost", "vision", "general-purpose"]
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
"ref": "groq:llama-3.2-90b-vision-preview",
|
|
196
|
+
"integration": "groq",
|
|
197
|
+
"id": "llama-3.2-90b-vision-preview",
|
|
198
|
+
"name": "LLaMA 3.2 90B Vision",
|
|
199
|
+
"description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
|
|
200
|
+
"input": {
|
|
201
|
+
"costPer1MTokens": 0.9,
|
|
202
|
+
"maxTokens": 128000
|
|
203
|
+
},
|
|
204
|
+
"output": {
|
|
205
|
+
"costPer1MTokens": 0.9,
|
|
206
|
+
"maxTokens": 8192
|
|
207
|
+
},
|
|
208
|
+
"tags": ["recommended", "vision", "general-purpose"]
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
"ref": "groq:llama-3.1-8b-instant",
|
|
212
|
+
"integration": "groq",
|
|
213
|
+
"id": "llama-3.1-8b-instant",
|
|
214
|
+
"name": "LLaMA 3.1 8B",
|
|
215
|
+
"description": "The Llama 3.1 instruction-tuned, text-only models are optimized for multilingual dialogue use cases.",
|
|
216
|
+
"input": {
|
|
217
|
+
"costPer1MTokens": 0.05,
|
|
218
|
+
"maxTokens": 128000
|
|
219
|
+
},
|
|
220
|
+
"output": {
|
|
221
|
+
"costPer1MTokens": 0.08,
|
|
222
|
+
"maxTokens": 8192
|
|
223
|
+
},
|
|
224
|
+
"tags": ["low-cost", "general-purpose"]
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
"ref": "groq:llama3-8b-8192",
|
|
228
|
+
"integration": "groq",
|
|
229
|
+
"id": "llama3-8b-8192",
|
|
230
|
+
"name": "LLaMA 3 8B",
|
|
231
|
+
"description": "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
232
|
+
"input": {
|
|
233
|
+
"costPer1MTokens": 0.05,
|
|
234
|
+
"maxTokens": 8192
|
|
235
|
+
},
|
|
236
|
+
"output": {
|
|
237
|
+
"costPer1MTokens": 0.08,
|
|
238
|
+
"maxTokens": 8192
|
|
239
|
+
},
|
|
240
|
+
"tags": ["low-cost", "general-purpose", "deprecated"]
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
"ref": "groq:llama3-70b-8192",
|
|
244
|
+
"integration": "groq",
|
|
245
|
+
"id": "llama3-70b-8192",
|
|
246
|
+
"name": "LLaMA 3 70B",
|
|
247
|
+
"description": "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
248
|
+
"input": {
|
|
249
|
+
"costPer1MTokens": 0.59,
|
|
250
|
+
"maxTokens": 8192
|
|
251
|
+
},
|
|
252
|
+
"output": {
|
|
253
|
+
"costPer1MTokens": 0.79,
|
|
254
|
+
"maxTokens": 8192
|
|
255
|
+
},
|
|
256
|
+
"tags": ["general-purpose"]
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
"ref": "groq:mixtral-8x7b-32768",
|
|
260
|
+
"integration": "groq",
|
|
261
|
+
"id": "mixtral-8x7b-32768",
|
|
262
|
+
"name": "Mixtral 8x7B",
|
|
263
|
+
"description": "Mistral MoE 8x7B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following",
|
|
264
|
+
"input": {
|
|
265
|
+
"costPer1MTokens": 0.24,
|
|
266
|
+
"maxTokens": 32768
|
|
267
|
+
},
|
|
268
|
+
"output": {
|
|
269
|
+
"costPer1MTokens": 0.24,
|
|
270
|
+
"maxTokens": 32768
|
|
271
|
+
},
|
|
272
|
+
"tags": ["low-cost", "general-purpose", "deprecated"]
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
"ref": "groq:gemma2-9b-it",
|
|
276
|
+
"integration": "groq",
|
|
277
|
+
"id": "gemma2-9b-it",
|
|
278
|
+
"name": "Gemma2 9B",
|
|
279
|
+
"description": "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
|
|
280
|
+
"input": {
|
|
281
|
+
"costPer1MTokens": 0.2,
|
|
282
|
+
"maxTokens": 8192
|
|
283
|
+
},
|
|
284
|
+
"output": {
|
|
285
|
+
"costPer1MTokens": 0.2,
|
|
286
|
+
"maxTokens": 8192
|
|
287
|
+
},
|
|
288
|
+
"tags": ["low-cost", "general-purpose"]
|
|
289
|
+
},
|
|
290
|
+
{
|
|
291
|
+
"ref": "anthropic:claude-3-5-sonnet-20240620",
|
|
292
|
+
"integration": "anthropic",
|
|
293
|
+
"id": "claude-3-5-sonnet-20240620",
|
|
294
|
+
"name": "Claude 3.5 Sonnet",
|
|
295
|
+
"description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at coding, data science, visual processing, and agentic tasks.",
|
|
296
|
+
"input": {
|
|
297
|
+
"costPer1MTokens": 3,
|
|
298
|
+
"maxTokens": 200000
|
|
299
|
+
},
|
|
300
|
+
"output": {
|
|
301
|
+
"costPer1MTokens": 15,
|
|
302
|
+
"maxTokens": 4096
|
|
303
|
+
},
|
|
304
|
+
"tags": ["recommended", "vision", "general-purpose", "agents", "coding", "function-calling", "storytelling"]
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
"ref": "anthropic:claude-3-haiku-20240307",
|
|
308
|
+
"integration": "anthropic",
|
|
309
|
+
"id": "claude-3-haiku-20240307",
|
|
310
|
+
"name": "Claude 3 Haiku",
|
|
311
|
+
"description": "Claude 3 Haiku is Anthropic's fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.",
|
|
312
|
+
"input": {
|
|
313
|
+
"costPer1MTokens": 0.25,
|
|
314
|
+
"maxTokens": 200000
|
|
315
|
+
},
|
|
316
|
+
"output": {
|
|
317
|
+
"costPer1MTokens": 1.25,
|
|
318
|
+
"maxTokens": 4096
|
|
319
|
+
},
|
|
320
|
+
"tags": ["low-cost", "general-purpose"]
|
|
321
|
+
},
|
|
322
|
+
{
|
|
323
|
+
"ref": "fireworks-ai:accounts/fireworks/models/llama-v3p1-405b-instruct",
|
|
324
|
+
"integration": "fireworks-ai",
|
|
325
|
+
"id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
|
|
326
|
+
"name": "Llama 3.1 405B Instruct",
|
|
327
|
+
"description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
|
|
328
|
+
"input": {
|
|
329
|
+
"costPer1MTokens": 3,
|
|
330
|
+
"maxTokens": 131072
|
|
331
|
+
},
|
|
332
|
+
"output": {
|
|
333
|
+
"costPer1MTokens": 3,
|
|
334
|
+
"maxTokens": 131072
|
|
335
|
+
},
|
|
336
|
+
"tags": ["recommended", "general-purpose"]
|
|
337
|
+
},
|
|
338
|
+
{
|
|
339
|
+
"ref": "fireworks-ai:accounts/fireworks/models/llama-v3p1-70b-instruct",
|
|
340
|
+
"integration": "fireworks-ai",
|
|
341
|
+
"id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
|
|
342
|
+
"name": "Llama 3.1 70B Instruct",
|
|
343
|
+
"description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
|
|
344
|
+
"input": {
|
|
345
|
+
"costPer1MTokens": 0.9,
|
|
346
|
+
"maxTokens": 131072
|
|
347
|
+
},
|
|
348
|
+
"output": {
|
|
349
|
+
"costPer1MTokens": 0.9,
|
|
350
|
+
"maxTokens": 131072
|
|
351
|
+
},
|
|
352
|
+
"tags": ["general-purpose"]
|
|
353
|
+
},
|
|
354
|
+
{
|
|
355
|
+
"ref": "fireworks-ai:accounts/fireworks/models/llama-v3p1-8b-instruct",
|
|
356
|
+
"integration": "fireworks-ai",
|
|
357
|
+
"id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
|
358
|
+
"name": "Llama 3.1 8B Instruct",
|
|
359
|
+
"description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
|
|
360
|
+
"input": {
|
|
361
|
+
"costPer1MTokens": 0.2,
|
|
362
|
+
"maxTokens": 131072
|
|
363
|
+
},
|
|
364
|
+
"output": {
|
|
365
|
+
"costPer1MTokens": 0.2,
|
|
366
|
+
"maxTokens": 131072
|
|
367
|
+
},
|
|
368
|
+
"tags": ["low-cost", "general-purpose"]
|
|
369
|
+
},
|
|
370
|
+
{
|
|
371
|
+
"ref": "fireworks-ai:accounts/fireworks/models/mixtral-8x22b-instruct",
|
|
372
|
+
"integration": "fireworks-ai",
|
|
373
|
+
"id": "accounts/fireworks/models/mixtral-8x22b-instruct",
|
|
374
|
+
"name": "Mixtral MoE 8x22B Instruct",
|
|
375
|
+
"description": "Mistral MoE 8x22B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following.",
|
|
376
|
+
"input": {
|
|
377
|
+
"costPer1MTokens": 1.2,
|
|
378
|
+
"maxTokens": 65536
|
|
379
|
+
},
|
|
380
|
+
"output": {
|
|
381
|
+
"costPer1MTokens": 1.2,
|
|
382
|
+
"maxTokens": 65536
|
|
383
|
+
},
|
|
384
|
+
"tags": ["general-purpose"]
|
|
385
|
+
},
|
|
386
|
+
{
|
|
387
|
+
"ref": "fireworks-ai:accounts/fireworks/models/mixtral-8x7b-instruct",
|
|
388
|
+
"integration": "fireworks-ai",
|
|
389
|
+
"id": "accounts/fireworks/models/mixtral-8x7b-instruct",
|
|
390
|
+
"name": "Mixtral MoE 8x7B Instruct",
|
|
391
|
+
"description": "Mistral MoE 8x7B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following",
|
|
392
|
+
"input": {
|
|
393
|
+
"costPer1MTokens": 0.5,
|
|
394
|
+
"maxTokens": 32768
|
|
395
|
+
},
|
|
396
|
+
"output": {
|
|
397
|
+
"costPer1MTokens": 0.5,
|
|
398
|
+
"maxTokens": 32768
|
|
399
|
+
},
|
|
400
|
+
"tags": ["low-cost", "general-purpose"]
|
|
401
|
+
},
|
|
402
|
+
{
|
|
403
|
+
"ref": "fireworks-ai:accounts/fireworks/models/firefunction-v2",
|
|
404
|
+
"integration": "fireworks-ai",
|
|
405
|
+
"id": "accounts/fireworks/models/firefunction-v2",
|
|
406
|
+
"name": "Firefunction V2",
|
|
407
|
+
"description": "Fireworks' latest and most performant function-calling model. Firefunction-v2 is based on Llama-3 and trained to excel at function-calling as well as chat and instruction-following.",
|
|
408
|
+
"input": {
|
|
409
|
+
"costPer1MTokens": 0.9,
|
|
410
|
+
"maxTokens": 8192
|
|
411
|
+
},
|
|
412
|
+
"output": {
|
|
413
|
+
"costPer1MTokens": 0.9,
|
|
414
|
+
"maxTokens": 8192
|
|
415
|
+
},
|
|
416
|
+
"tags": ["function-calling"]
|
|
417
|
+
},
|
|
418
|
+
{
|
|
419
|
+
"ref": "fireworks-ai:accounts/fireworks/models/firellava-13b",
|
|
420
|
+
"integration": "fireworks-ai",
|
|
421
|
+
"id": "accounts/fireworks/models/firellava-13b",
|
|
422
|
+
"name": "FireLLaVA-13B",
|
|
423
|
+
"description": "Vision-language model allowing both image and text as inputs (single image is recommended), trained on OSS model generated training data.",
|
|
424
|
+
"input": {
|
|
425
|
+
"costPer1MTokens": 0.2,
|
|
426
|
+
"maxTokens": 4096
|
|
427
|
+
},
|
|
428
|
+
"output": {
|
|
429
|
+
"costPer1MTokens": 0.2,
|
|
430
|
+
"maxTokens": 4096
|
|
431
|
+
},
|
|
432
|
+
"tags": ["low-cost", "vision"]
|
|
433
|
+
},
|
|
434
|
+
{
|
|
435
|
+
"ref": "fireworks-ai:accounts/fireworks/models/deepseek-coder-v2-instruct",
|
|
436
|
+
"integration": "fireworks-ai",
|
|
437
|
+
"id": "accounts/fireworks/models/deepseek-coder-v2-instruct",
|
|
438
|
+
"name": "DeepSeek Coder V2 Instruct",
|
|
439
|
+
"description": "An open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks from Deepseek.",
|
|
440
|
+
"input": {
|
|
441
|
+
"costPer1MTokens": 2.7,
|
|
442
|
+
"maxTokens": 131072
|
|
443
|
+
},
|
|
444
|
+
"output": {
|
|
445
|
+
"costPer1MTokens": 2.7,
|
|
446
|
+
"maxTokens": 131072
|
|
447
|
+
},
|
|
448
|
+
"tags": ["coding"]
|
|
449
|
+
},
|
|
450
|
+
{
|
|
451
|
+
"ref": "fireworks-ai:accounts/fireworks/models/deepseek-coder-v2-lite-instruct",
|
|
452
|
+
"integration": "fireworks-ai",
|
|
453
|
+
"id": "accounts/fireworks/models/deepseek-coder-v2-lite-instruct",
|
|
454
|
+
"name": "DeepSeek Coder V2 Lite",
|
|
455
|
+
"description": "DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks.",
|
|
456
|
+
"input": {
|
|
457
|
+
"costPer1MTokens": 0.2,
|
|
458
|
+
"maxTokens": 163840
|
|
459
|
+
},
|
|
460
|
+
"output": {
|
|
461
|
+
"costPer1MTokens": 0.2,
|
|
462
|
+
"maxTokens": 163840
|
|
463
|
+
},
|
|
464
|
+
"tags": ["low-cost", "coding"]
|
|
465
|
+
},
|
|
466
|
+
{
|
|
467
|
+
"ref": "fireworks-ai:accounts/fireworks/models/mythomax-l2-13b",
|
|
468
|
+
"integration": "fireworks-ai",
|
|
469
|
+
"id": "accounts/fireworks/models/mythomax-l2-13b",
|
|
470
|
+
"name": "MythoMax L2 13b",
|
|
471
|
+
"description": "MythoMax L2 is designed to excel at both roleplaying and storytelling, and is an improved variant of the previous MythoMix model, combining the MythoLogic-L2 and Huginn models.",
|
|
472
|
+
"input": {
|
|
473
|
+
"costPer1MTokens": 0.2,
|
|
474
|
+
"maxTokens": 4096
|
|
475
|
+
},
|
|
476
|
+
"output": {
|
|
477
|
+
"costPer1MTokens": 0.2,
|
|
478
|
+
"maxTokens": 4096
|
|
479
|
+
},
|
|
480
|
+
"tags": ["roleplay", "storytelling", "low-cost"]
|
|
481
|
+
},
|
|
482
|
+
{
|
|
483
|
+
"ref": "fireworks-ai:accounts/fireworks/models/qwen2-72b-instruct",
|
|
484
|
+
"integration": "fireworks-ai",
|
|
485
|
+
"id": "accounts/fireworks/models/qwen2-72b-instruct",
|
|
486
|
+
"name": "Qwen2 72b Instruct",
|
|
487
|
+
"description": "Qwen 2 is the latest large language model series developed by the Qwen team at Alibaba Cloud. Key features and capabilities of Qwen 2 include multilingual proficiency with a particular strength in Asian languages, and enhanced performance in coding, mathematics, and long context understanding",
|
|
488
|
+
"input": {
|
|
489
|
+
"costPer1MTokens": 0.9,
|
|
490
|
+
"maxTokens": 32768
|
|
491
|
+
},
|
|
492
|
+
"output": {
|
|
493
|
+
"costPer1MTokens": 0.9,
|
|
494
|
+
"maxTokens": 32768
|
|
495
|
+
},
|
|
496
|
+
"tags": ["general-purpose", "function-calling"]
|
|
497
|
+
},
|
|
498
|
+
{
|
|
499
|
+
"ref": "fireworks-ai:accounts/fireworks/models/gemma2-9b-it",
|
|
500
|
+
"integration": "fireworks-ai",
|
|
501
|
+
"id": "accounts/fireworks/models/gemma2-9b-it",
|
|
502
|
+
"name": "Gemma 2 9B Instruct",
|
|
503
|
+
"description": "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
|
|
504
|
+
"input": {
|
|
505
|
+
"costPer1MTokens": 0.2,
|
|
506
|
+
"maxTokens": 8192
|
|
507
|
+
},
|
|
508
|
+
"output": {
|
|
509
|
+
"costPer1MTokens": 0.2,
|
|
510
|
+
"maxTokens": 8192
|
|
511
|
+
},
|
|
512
|
+
"tags": ["low-cost", "general-purpose"]
|
|
513
|
+
},
|
|
514
|
+
{
|
|
515
|
+
"ref": "google-ai:models/gemini-1.5-flash-8b-001",
|
|
516
|
+
"integration": "google-ai",
|
|
517
|
+
"id": "models/gemini-1.5-flash-8b-001",
|
|
518
|
+
"name": "Gemini 1.5 Flash-8B",
|
|
519
|
+
"description": "A small model designed for lower intelligence tasks. Google AI's fastest and most cost-efficient model with great performance for high-frequency tasks.",
|
|
520
|
+
"input": {
|
|
521
|
+
"costPer1MTokens": 0.0375,
|
|
522
|
+
"maxTokens": 128000
|
|
523
|
+
},
|
|
524
|
+
"output": {
|
|
525
|
+
"costPer1MTokens": 0.15,
|
|
526
|
+
"maxTokens": 128000
|
|
527
|
+
},
|
|
528
|
+
"tags": ["low-cost", "general-purpose", "vision"]
|
|
529
|
+
},
|
|
530
|
+
{
|
|
531
|
+
"ref": "google-ai:models/gemini-1.5-flash-002",
|
|
532
|
+
"integration": "google-ai",
|
|
533
|
+
"id": "models/gemini-1.5-flash-002",
|
|
534
|
+
"name": "Gemini 1.5 Flash",
|
|
535
|
+
"description": "A fast and versatile model for scaling across diverse tasks. Google AI's most balanced multimodal model with great performance for most tasks.",
|
|
536
|
+
"input": {
|
|
537
|
+
"costPer1MTokens": 0.075,
|
|
538
|
+
"maxTokens": 128000
|
|
539
|
+
},
|
|
540
|
+
"output": {
|
|
541
|
+
"costPer1MTokens": 0.3,
|
|
542
|
+
"maxTokens": 128000
|
|
543
|
+
},
|
|
544
|
+
"tags": ["recommended", "general-purpose", "vision"]
|
|
545
|
+
},
|
|
546
|
+
{
|
|
547
|
+
"ref": "google-ai:models/gemini-1.5-pro-002",
|
|
548
|
+
"integration": "google-ai",
|
|
549
|
+
"id": "models/gemini-1.5-pro-002",
|
|
550
|
+
"name": "Gemini 1.5 Pro",
|
|
551
|
+
"description": "A mid-size multimodal model that is optimized for a wide-range of reasoning tasks. Google AI's best-performing model with features for a wide variety of reasoning tasks.",
|
|
552
|
+
"input": {
|
|
553
|
+
"costPer1MTokens": 1.25,
|
|
554
|
+
"maxTokens": 128000
|
|
555
|
+
},
|
|
556
|
+
"output": {
|
|
557
|
+
"costPer1MTokens": 5,
|
|
558
|
+
"maxTokens": 128000
|
|
559
|
+
},
|
|
560
|
+
"tags": ["recommended", "general-purpose", "vision"]
|
|
561
|
+
}
|
|
562
|
+
]
|