@botpress/cognitive 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,562 @@
1
+ [
2
+ {
3
+ "ref": "openai:o1-2024-12-17",
4
+ "integration": "openai",
5
+ "id": "o1-2024-12-17",
6
+ "name": "GPT o1",
7
+ "description": "The o1 model is designed to solve hard problems across domains. The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user.",
8
+ "input": {
9
+ "costPer1MTokens": 15,
10
+ "maxTokens": 200000
11
+ },
12
+ "output": {
13
+ "costPer1MTokens": 60,
14
+ "maxTokens": 100000
15
+ },
16
+ "tags": ["reasoning", "vision", "general-purpose"]
17
+ },
18
+ {
19
+ "ref": "openai:o1-mini-2024-09-12",
20
+ "integration": "openai",
21
+ "id": "o1-mini-2024-09-12",
22
+ "name": "GPT o1-mini",
23
+ "description": "The o1-mini model is a fast and affordable reasoning model for specialized tasks. The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user.",
24
+ "input": {
25
+ "costPer1MTokens": 3,
26
+ "maxTokens": 128000
27
+ },
28
+ "output": {
29
+ "costPer1MTokens": 12,
30
+ "maxTokens": 65536
31
+ },
32
+ "tags": ["reasoning", "vision", "general-purpose"]
33
+ },
34
+ {
35
+ "ref": "openai:gpt-4o-mini-2024-07-18",
36
+ "integration": "openai",
37
+ "id": "gpt-4o-mini-2024-07-18",
38
+ "name": "GPT-4o Mini",
39
+ "description": "GPT-4o mini (“o” for “omni”) is OpenAI's most advanced model in the small models category, and their cheapest model yet. It is multimodal (accepting text or image inputs and outputting text), has higher intelligence than gpt-3.5-turbo but is just as fast. It is meant to be used for smaller tasks, including vision tasks. It's recommended to choose gpt-4o-mini where you would have previously used gpt-3.5-turbo as this model is more capable and cheaper.",
40
+ "input": {
41
+ "costPer1MTokens": 0.15,
42
+ "maxTokens": 128000
43
+ },
44
+ "output": {
45
+ "costPer1MTokens": 0.6,
46
+ "maxTokens": 16384
47
+ },
48
+ "tags": ["recommended", "vision", "low-cost", "general-purpose", "function-calling"]
49
+ },
50
+ {
51
+ "ref": "openai:gpt-4o-2024-11-20",
52
+ "integration": "openai",
53
+ "id": "gpt-4o-2024-11-20",
54
+ "name": "GPT-4o (November 2024)",
55
+ "description": "GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
56
+ "input": {
57
+ "costPer1MTokens": 2.5,
58
+ "maxTokens": 128000
59
+ },
60
+ "output": {
61
+ "costPer1MTokens": 10,
62
+ "maxTokens": 16384
63
+ },
64
+ "tags": ["recommended", "vision", "general-purpose", "coding", "agents", "function-calling"]
65
+ },
66
+ {
67
+ "ref": "openai:gpt-4o-2024-08-06",
68
+ "integration": "openai",
69
+ "id": "gpt-4o-2024-08-06",
70
+ "name": "GPT-4o (August 2024)",
71
+ "description": "GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
72
+ "input": {
73
+ "costPer1MTokens": 2.5,
74
+ "maxTokens": 128000
75
+ },
76
+ "output": {
77
+ "costPer1MTokens": 10,
78
+ "maxTokens": 16384
79
+ },
80
+ "tags": ["recommended", "vision", "general-purpose", "coding", "agents", "function-calling"]
81
+ },
82
+ {
83
+ "ref": "openai:gpt-4o-2024-05-13",
84
+ "integration": "openai",
85
+ "id": "gpt-4o-2024-05-13",
86
+ "name": "GPT-4o (May 2024)",
87
+ "description": "GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
88
+ "input": {
89
+ "costPer1MTokens": 5,
90
+ "maxTokens": 128000
91
+ },
92
+ "output": {
93
+ "costPer1MTokens": 15,
94
+ "maxTokens": 4096
95
+ },
96
+ "tags": ["vision", "general-purpose", "coding", "agents", "function-calling"]
97
+ },
98
+ {
99
+ "ref": "openai:gpt-4-turbo-2024-04-09",
100
+ "integration": "openai",
101
+ "id": "gpt-4-turbo-2024-04-09",
102
+ "name": "GPT-4 Turbo",
103
+ "description": "GPT-4 is a large multimodal model (accepting text or image inputs and outputting text) that can solve difficult problems with greater accuracy than any of our previous models, thanks to its broader general knowledge and advanced reasoning capabilities.",
104
+ "input": {
105
+ "costPer1MTokens": 10,
106
+ "maxTokens": 128000
107
+ },
108
+ "output": {
109
+ "costPer1MTokens": 30,
110
+ "maxTokens": 4096
111
+ },
112
+ "tags": ["deprecated", "general-purpose", "coding", "agents", "function-calling"]
113
+ },
114
+ {
115
+ "ref": "openai:gpt-3.5-turbo-0125",
116
+ "integration": "openai",
117
+ "id": "gpt-3.5-turbo-0125",
118
+ "name": "GPT-3.5 Turbo",
119
+ "description": "GPT-3.5 Turbo can understand and generate natural language or code and has been optimized for chat but works well for non-chat tasks as well.",
120
+ "input": {
121
+ "costPer1MTokens": 0.5,
122
+ "maxTokens": 128000
123
+ },
124
+ "output": {
125
+ "costPer1MTokens": 1.5,
126
+ "maxTokens": 4096
127
+ },
128
+ "tags": ["deprecated", "general-purpose", "low-cost"]
129
+ },
130
+ {
131
+ "ref": "groq:llama-3.3-70b-versatile",
132
+ "integration": "groq",
133
+ "id": "llama-3.3-70b-versatile",
134
+ "name": "LLaMA 3.3 70B",
135
+ "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.",
136
+ "input": {
137
+ "costPer1MTokens": 0.59,
138
+ "maxTokens": 128000
139
+ },
140
+ "output": {
141
+ "costPer1MTokens": 0.79,
142
+ "maxTokens": 32768
143
+ },
144
+ "tags": ["recommended", "general-purpose", "coding"]
145
+ },
146
+ {
147
+ "ref": "groq:llama-3.2-1b-preview",
148
+ "integration": "groq",
149
+ "id": "llama-3.2-1b-preview",
150
+ "name": "LLaMA 3.2 1B",
151
+ "description": "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
152
+ "input": {
153
+ "costPer1MTokens": 0.04,
154
+ "maxTokens": 128000
155
+ },
156
+ "output": {
157
+ "costPer1MTokens": 0.04,
158
+ "maxTokens": 8192
159
+ },
160
+ "tags": ["low-cost"]
161
+ },
162
+ {
163
+ "ref": "groq:llama-3.2-3b-preview",
164
+ "integration": "groq",
165
+ "id": "llama-3.2-3b-preview",
166
+ "name": "LLaMA 3.2 3B",
167
+ "description": "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
168
+ "input": {
169
+ "costPer1MTokens": 0.06,
170
+ "maxTokens": 128000
171
+ },
172
+ "output": {
173
+ "costPer1MTokens": 0.06,
174
+ "maxTokens": 8192
175
+ },
176
+ "tags": ["low-cost", "general-purpose"]
177
+ },
178
+ {
179
+ "ref": "groq:llama-3.2-11b-vision-preview",
180
+ "integration": "groq",
181
+ "id": "llama-3.2-11b-vision-preview",
182
+ "name": "LLaMA 3.2 11B Vision",
183
+ "description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
184
+ "input": {
185
+ "costPer1MTokens": 0.18,
186
+ "maxTokens": 128000
187
+ },
188
+ "output": {
189
+ "costPer1MTokens": 0.18,
190
+ "maxTokens": 8192
191
+ },
192
+ "tags": ["low-cost", "vision", "general-purpose"]
193
+ },
194
+ {
195
+ "ref": "groq:llama-3.2-90b-vision-preview",
196
+ "integration": "groq",
197
+ "id": "llama-3.2-90b-vision-preview",
198
+ "name": "LLaMA 3.2 90B Vision",
199
+ "description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
200
+ "input": {
201
+ "costPer1MTokens": 0.9,
202
+ "maxTokens": 128000
203
+ },
204
+ "output": {
205
+ "costPer1MTokens": 0.9,
206
+ "maxTokens": 8192
207
+ },
208
+ "tags": ["recommended", "vision", "general-purpose"]
209
+ },
210
+ {
211
+ "ref": "groq:llama-3.1-8b-instant",
212
+ "integration": "groq",
213
+ "id": "llama-3.1-8b-instant",
214
+ "name": "LLaMA 3.1 8B",
215
+ "description": "The Llama 3.1 instruction-tuned, text-only models are optimized for multilingual dialogue use cases.",
216
+ "input": {
217
+ "costPer1MTokens": 0.05,
218
+ "maxTokens": 128000
219
+ },
220
+ "output": {
221
+ "costPer1MTokens": 0.08,
222
+ "maxTokens": 8192
223
+ },
224
+ "tags": ["low-cost", "general-purpose"]
225
+ },
226
+ {
227
+ "ref": "groq:llama3-8b-8192",
228
+ "integration": "groq",
229
+ "id": "llama3-8b-8192",
230
+ "name": "LLaMA 3 8B",
231
+ "description": "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
232
+ "input": {
233
+ "costPer1MTokens": 0.05,
234
+ "maxTokens": 8192
235
+ },
236
+ "output": {
237
+ "costPer1MTokens": 0.08,
238
+ "maxTokens": 8192
239
+ },
240
+ "tags": ["low-cost", "general-purpose", "deprecated"]
241
+ },
242
+ {
243
+ "ref": "groq:llama3-70b-8192",
244
+ "integration": "groq",
245
+ "id": "llama3-70b-8192",
246
+ "name": "LLaMA 3 70B",
247
+ "description": "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
248
+ "input": {
249
+ "costPer1MTokens": 0.59,
250
+ "maxTokens": 8192
251
+ },
252
+ "output": {
253
+ "costPer1MTokens": 0.79,
254
+ "maxTokens": 8192
255
+ },
256
+ "tags": ["general-purpose"]
257
+ },
258
+ {
259
+ "ref": "groq:mixtral-8x7b-32768",
260
+ "integration": "groq",
261
+ "id": "mixtral-8x7b-32768",
262
+ "name": "Mixtral 8x7B",
263
+ "description": "Mistral MoE 8x7B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following",
264
+ "input": {
265
+ "costPer1MTokens": 0.24,
266
+ "maxTokens": 32768
267
+ },
268
+ "output": {
269
+ "costPer1MTokens": 0.24,
270
+ "maxTokens": 32768
271
+ },
272
+ "tags": ["low-cost", "general-purpose", "deprecated"]
273
+ },
274
+ {
275
+ "ref": "groq:gemma2-9b-it",
276
+ "integration": "groq",
277
+ "id": "gemma2-9b-it",
278
+ "name": "Gemma2 9B",
279
+ "description": "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
280
+ "input": {
281
+ "costPer1MTokens": 0.2,
282
+ "maxTokens": 8192
283
+ },
284
+ "output": {
285
+ "costPer1MTokens": 0.2,
286
+ "maxTokens": 8192
287
+ },
288
+ "tags": ["low-cost", "general-purpose"]
289
+ },
290
+ {
291
+ "ref": "anthropic:claude-3-5-sonnet-20240620",
292
+ "integration": "anthropic",
293
+ "id": "claude-3-5-sonnet-20240620",
294
+ "name": "Claude 3.5 Sonnet",
295
+ "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at coding, data science, visual processing, and agentic tasks.",
296
+ "input": {
297
+ "costPer1MTokens": 3,
298
+ "maxTokens": 200000
299
+ },
300
+ "output": {
301
+ "costPer1MTokens": 15,
302
+ "maxTokens": 4096
303
+ },
304
+ "tags": ["recommended", "vision", "general-purpose", "agents", "coding", "function-calling", "storytelling"]
305
+ },
306
+ {
307
+ "ref": "anthropic:claude-3-haiku-20240307",
308
+ "integration": "anthropic",
309
+ "id": "claude-3-haiku-20240307",
310
+ "name": "Claude 3 Haiku",
311
+ "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.",
312
+ "input": {
313
+ "costPer1MTokens": 0.25,
314
+ "maxTokens": 200000
315
+ },
316
+ "output": {
317
+ "costPer1MTokens": 1.25,
318
+ "maxTokens": 4096
319
+ },
320
+ "tags": ["low-cost", "general-purpose"]
321
+ },
322
+ {
323
+ "ref": "fireworks-ai:accounts/fireworks/models/llama-v3p1-405b-instruct",
324
+ "integration": "fireworks-ai",
325
+ "id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
326
+ "name": "Llama 3.1 405B Instruct",
327
+ "description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
328
+ "input": {
329
+ "costPer1MTokens": 3,
330
+ "maxTokens": 131072
331
+ },
332
+ "output": {
333
+ "costPer1MTokens": 3,
334
+ "maxTokens": 131072
335
+ },
336
+ "tags": ["recommended", "general-purpose"]
337
+ },
338
+ {
339
+ "ref": "fireworks-ai:accounts/fireworks/models/llama-v3p1-70b-instruct",
340
+ "integration": "fireworks-ai",
341
+ "id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
342
+ "name": "Llama 3.1 70B Instruct",
343
+ "description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
344
+ "input": {
345
+ "costPer1MTokens": 0.9,
346
+ "maxTokens": 131072
347
+ },
348
+ "output": {
349
+ "costPer1MTokens": 0.9,
350
+ "maxTokens": 131072
351
+ },
352
+ "tags": ["general-purpose"]
353
+ },
354
+ {
355
+ "ref": "fireworks-ai:accounts/fireworks/models/llama-v3p1-8b-instruct",
356
+ "integration": "fireworks-ai",
357
+ "id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
358
+ "name": "Llama 3.1 8B Instruct",
359
+ "description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
360
+ "input": {
361
+ "costPer1MTokens": 0.2,
362
+ "maxTokens": 131072
363
+ },
364
+ "output": {
365
+ "costPer1MTokens": 0.2,
366
+ "maxTokens": 131072
367
+ },
368
+ "tags": ["low-cost", "general-purpose"]
369
+ },
370
+ {
371
+ "ref": "fireworks-ai:accounts/fireworks/models/mixtral-8x22b-instruct",
372
+ "integration": "fireworks-ai",
373
+ "id": "accounts/fireworks/models/mixtral-8x22b-instruct",
374
+ "name": "Mixtral MoE 8x22B Instruct",
375
+ "description": "Mistral MoE 8x22B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following.",
376
+ "input": {
377
+ "costPer1MTokens": 1.2,
378
+ "maxTokens": 65536
379
+ },
380
+ "output": {
381
+ "costPer1MTokens": 1.2,
382
+ "maxTokens": 65536
383
+ },
384
+ "tags": ["general-purpose"]
385
+ },
386
+ {
387
+ "ref": "fireworks-ai:accounts/fireworks/models/mixtral-8x7b-instruct",
388
+ "integration": "fireworks-ai",
389
+ "id": "accounts/fireworks/models/mixtral-8x7b-instruct",
390
+ "name": "Mixtral MoE 8x7B Instruct",
391
+ "description": "Mistral MoE 8x7B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following",
392
+ "input": {
393
+ "costPer1MTokens": 0.5,
394
+ "maxTokens": 32768
395
+ },
396
+ "output": {
397
+ "costPer1MTokens": 0.5,
398
+ "maxTokens": 32768
399
+ },
400
+ "tags": ["low-cost", "general-purpose"]
401
+ },
402
+ {
403
+ "ref": "fireworks-ai:accounts/fireworks/models/firefunction-v2",
404
+ "integration": "fireworks-ai",
405
+ "id": "accounts/fireworks/models/firefunction-v2",
406
+ "name": "Firefunction V2",
407
+ "description": "Fireworks' latest and most performant function-calling model. Firefunction-v2 is based on Llama-3 and trained to excel at function-calling as well as chat and instruction-following.",
408
+ "input": {
409
+ "costPer1MTokens": 0.9,
410
+ "maxTokens": 8192
411
+ },
412
+ "output": {
413
+ "costPer1MTokens": 0.9,
414
+ "maxTokens": 8192
415
+ },
416
+ "tags": ["function-calling"]
417
+ },
418
+ {
419
+ "ref": "fireworks-ai:accounts/fireworks/models/firellava-13b",
420
+ "integration": "fireworks-ai",
421
+ "id": "accounts/fireworks/models/firellava-13b",
422
+ "name": "FireLLaVA-13B",
423
+ "description": "Vision-language model allowing both image and text as inputs (single image is recommended), trained on OSS model generated training data.",
424
+ "input": {
425
+ "costPer1MTokens": 0.2,
426
+ "maxTokens": 4096
427
+ },
428
+ "output": {
429
+ "costPer1MTokens": 0.2,
430
+ "maxTokens": 4096
431
+ },
432
+ "tags": ["low-cost", "vision"]
433
+ },
434
+ {
435
+ "ref": "fireworks-ai:accounts/fireworks/models/deepseek-coder-v2-instruct",
436
+ "integration": "fireworks-ai",
437
+ "id": "accounts/fireworks/models/deepseek-coder-v2-instruct",
438
+ "name": "DeepSeek Coder V2 Instruct",
439
+ "description": "An open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks from Deepseek.",
440
+ "input": {
441
+ "costPer1MTokens": 2.7,
442
+ "maxTokens": 131072
443
+ },
444
+ "output": {
445
+ "costPer1MTokens": 2.7,
446
+ "maxTokens": 131072
447
+ },
448
+ "tags": ["coding"]
449
+ },
450
+ {
451
+ "ref": "fireworks-ai:accounts/fireworks/models/deepseek-coder-v2-lite-instruct",
452
+ "integration": "fireworks-ai",
453
+ "id": "accounts/fireworks/models/deepseek-coder-v2-lite-instruct",
454
+ "name": "DeepSeek Coder V2 Lite",
455
+ "description": "DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks.",
456
+ "input": {
457
+ "costPer1MTokens": 0.2,
458
+ "maxTokens": 163840
459
+ },
460
+ "output": {
461
+ "costPer1MTokens": 0.2,
462
+ "maxTokens": 163840
463
+ },
464
+ "tags": ["low-cost", "coding"]
465
+ },
466
+ {
467
+ "ref": "fireworks-ai:accounts/fireworks/models/mythomax-l2-13b",
468
+ "integration": "fireworks-ai",
469
+ "id": "accounts/fireworks/models/mythomax-l2-13b",
470
+ "name": "MythoMax L2 13b",
471
+ "description": "MythoMax L2 is designed to excel at both roleplaying and storytelling, and is an improved variant of the previous MythoMix model, combining the MythoLogic-L2 and Huginn models.",
472
+ "input": {
473
+ "costPer1MTokens": 0.2,
474
+ "maxTokens": 4096
475
+ },
476
+ "output": {
477
+ "costPer1MTokens": 0.2,
478
+ "maxTokens": 4096
479
+ },
480
+ "tags": ["roleplay", "storytelling", "low-cost"]
481
+ },
482
+ {
483
+ "ref": "fireworks-ai:accounts/fireworks/models/qwen2-72b-instruct",
484
+ "integration": "fireworks-ai",
485
+ "id": "accounts/fireworks/models/qwen2-72b-instruct",
486
+ "name": "Qwen2 72b Instruct",
487
+ "description": "Qwen 2 is the latest large language model series developed by the Qwen team at Alibaba Cloud. Key features and capabilities of Qwen 2 include multilingual proficiency with a particular strength in Asian languages, and enhanced performance in coding, mathematics, and long context understanding",
488
+ "input": {
489
+ "costPer1MTokens": 0.9,
490
+ "maxTokens": 32768
491
+ },
492
+ "output": {
493
+ "costPer1MTokens": 0.9,
494
+ "maxTokens": 32768
495
+ },
496
+ "tags": ["general-purpose", "function-calling"]
497
+ },
498
+ {
499
+ "ref": "fireworks-ai:accounts/fireworks/models/gemma2-9b-it",
500
+ "integration": "fireworks-ai",
501
+ "id": "accounts/fireworks/models/gemma2-9b-it",
502
+ "name": "Gemma 2 9B Instruct",
503
+ "description": "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
504
+ "input": {
505
+ "costPer1MTokens": 0.2,
506
+ "maxTokens": 8192
507
+ },
508
+ "output": {
509
+ "costPer1MTokens": 0.2,
510
+ "maxTokens": 8192
511
+ },
512
+ "tags": ["low-cost", "general-purpose"]
513
+ },
514
+ {
515
+ "ref": "google-ai:models/gemini-1.5-flash-8b-001",
516
+ "integration": "google-ai",
517
+ "id": "models/gemini-1.5-flash-8b-001",
518
+ "name": "Gemini 1.5 Flash-8B",
519
+ "description": "A small model designed for lower intelligence tasks. Google AI's fastest and most cost-efficient model with great performance for high-frequency tasks.",
520
+ "input": {
521
+ "costPer1MTokens": 0.0375,
522
+ "maxTokens": 128000
523
+ },
524
+ "output": {
525
+ "costPer1MTokens": 0.15,
526
+ "maxTokens": 128000
527
+ },
528
+ "tags": ["low-cost", "general-purpose", "vision"]
529
+ },
530
+ {
531
+ "ref": "google-ai:models/gemini-1.5-flash-002",
532
+ "integration": "google-ai",
533
+ "id": "models/gemini-1.5-flash-002",
534
+ "name": "Gemini 1.5 Flash",
535
+ "description": "A fast and versatile model for scaling across diverse tasks. Google AI's most balanced multimodal model with great performance for most tasks.",
536
+ "input": {
537
+ "costPer1MTokens": 0.075,
538
+ "maxTokens": 128000
539
+ },
540
+ "output": {
541
+ "costPer1MTokens": 0.3,
542
+ "maxTokens": 128000
543
+ },
544
+ "tags": ["recommended", "general-purpose", "vision"]
545
+ },
546
+ {
547
+ "ref": "google-ai:models/gemini-1.5-pro-002",
548
+ "integration": "google-ai",
549
+ "id": "models/gemini-1.5-pro-002",
550
+ "name": "Gemini 1.5 Pro",
551
+ "description": "A mid-size multimodal model that is optimized for a wide-range of reasoning tasks. Google AI's best-performing model with features for a wide variety of reasoning tasks.",
552
+ "input": {
553
+ "costPer1MTokens": 1.25,
554
+ "maxTokens": 128000
555
+ },
556
+ "output": {
557
+ "costPer1MTokens": 5,
558
+ "maxTokens": 128000
559
+ },
560
+ "tags": ["recommended", "general-purpose", "vision"]
561
+ }
562
+ ]