@lobehub/chat 1.16.7 → 1.16.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @lobehub/chat might be problematic. Click here for more details.

@@ -1,12 +1,11 @@
1
1
  import { ModelProviderCard } from '@/types/llm';
2
2
 
3
- // ref https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
4
3
  // ref https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
5
4
  // ref https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/models
6
5
  // ref https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/models
7
6
  const Bedrock: ModelProviderCard = {
8
7
  chatModels: [
9
- /*
8
+ /*
10
9
  // TODO: Not support for now
11
10
  {
12
11
  description: 'Amazon Titan Text Lite is a light weight efficient model ideal for fine-tuning for English-language tasks, including like summarization and copywriting, where customers want a smaller, more cost-effective model that is also highly customizable.',
@@ -28,96 +27,156 @@ const Bedrock: ModelProviderCard = {
28
27
  },
29
28
  */
30
29
  {
31
- description: 'Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.',
30
+ description:
31
+ 'Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.',
32
32
  displayName: 'Claude 3.5 Sonnet',
33
33
  enabled: true,
34
34
  functionCall: true,
35
35
  id: 'anthropic.claude-3-5-sonnet-20240620-v1:0',
36
+ pricing: {
37
+ input: 3,
38
+ output: 15,
39
+ },
36
40
  tokens: 200_000,
37
41
  vision: true,
38
42
  },
39
43
  {
40
- description: 'Claude 3 Sonnet by Anthropic strikes the ideal balance between intelligence and speed—particularly for enterprise workloads. It offers maximum utility at a lower price than competitors, and is engineered to be the dependable, high-endurance workhorse for scaled AI deployments. Claude 3 Sonnet can process images and return text outputs, and features a 200K context window.',
44
+ description:
45
+ 'Claude 3 Sonnet by Anthropic strikes the ideal balance between intelligence and speed—particularly for enterprise workloads. It offers maximum utility at a lower price than competitors, and is engineered to be the dependable, high-endurance workhorse for scaled AI deployments. Claude 3 Sonnet can process images and return text outputs, and features a 200K context window.',
41
46
  displayName: 'Claude 3 Sonnet',
42
47
  enabled: true,
43
48
  functionCall: true,
44
49
  id: 'anthropic.claude-3-sonnet-20240229-v1:0',
50
+ pricing: {
51
+ input: 3,
52
+ output: 15,
53
+ },
45
54
  tokens: 200_000,
46
55
  vision: true,
47
56
  },
48
57
  {
49
- description: 'Claude 3 Opus is Anthropic most powerful AI model, with state-of-the-art performance on highly complex tasks. It can navigate open-ended prompts and sight-unseen scenarios with remarkable fluency and human-like understanding. Claude 3 Opus shows us the frontier of what’s possible with generative AI. Claude 3 Opus can process images and return text outputs, and features a 200K context window.',
58
+ description:
59
+ 'Claude 3 Opus is Anthropic most powerful AI model, with state-of-the-art performance on highly complex tasks. It can navigate open-ended prompts and sight-unseen scenarios with remarkable fluency and human-like understanding. Claude 3 Opus shows us the frontier of what’s possible with generative AI. Claude 3 Opus can process images and return text outputs, and features a 200K context window.',
50
60
  displayName: 'Claude 3 Opus',
51
61
  enabled: true,
52
62
  functionCall: true,
53
63
  id: 'anthropic.claude-3-opus-20240229-v1:0',
64
+ pricing: {
65
+ input: 15,
66
+ output: 75,
67
+ },
54
68
  tokens: 200_000,
55
69
  vision: true,
56
70
  },
57
71
  {
58
- description: 'Claude 3 Haiku is Anthropic fastest, most compact model for near-instant responsiveness. It answers simple queries and requests with speed. Customers will be able to build seamless AI experiences that mimic human interactions. Claude 3 Haiku can process images and return text outputs, and features a 200K context window.',
72
+ description:
73
+ 'Claude 3 Haiku is Anthropic fastest, most compact model for near-instant responsiveness. It answers simple queries and requests with speed. Customers will be able to build seamless AI experiences that mimic human interactions. Claude 3 Haiku can process images and return text outputs, and features a 200K context window.',
59
74
  displayName: 'Claude 3 Haiku',
60
75
  enabled: true,
61
76
  functionCall: true,
62
77
  id: 'anthropic.claude-3-haiku-20240307-v1:0',
78
+ pricing: {
79
+ input: 0.25,
80
+ output: 1.25,
81
+ },
63
82
  tokens: 200_000,
64
83
  vision: true,
65
84
  },
66
85
  {
67
- description: 'An update to Claude 2 that features double the context window, plus improvements across reliability, hallucination rates, and evidence-based accuracy in long document and RAG contexts.',
86
+ description:
87
+ 'An update to Claude 2 that features double the context window, plus improvements across reliability, hallucination rates, and evidence-based accuracy in long document and RAG contexts.',
68
88
  displayName: 'Claude 2.1',
69
89
  id: 'anthropic.claude-v2:1',
90
+ pricing: {
91
+ input: 8,
92
+ output: 24,
93
+ },
70
94
  tokens: 200_000,
71
95
  },
72
96
  {
73
- description: 'Anthropic highly capable model across a wide range of tasks from sophisticated dialogue and creative content generation to detailed instruction following.',
97
+ description:
98
+ 'Anthropic highly capable model across a wide range of tasks from sophisticated dialogue and creative content generation to detailed instruction following.',
74
99
  displayName: 'Claude 2.0',
75
100
  id: 'anthropic.claude-v2',
101
+ pricing: {
102
+ input: 8,
103
+ output: 24,
104
+ },
76
105
  tokens: 100_000,
77
106
  },
78
107
  {
79
- description: 'A fast, affordable yet still very capable model, which can handle a range of tasks including casual dialogue, text analysis, summarization, and document question-answering.',
108
+ description:
109
+ 'A fast, affordable yet still very capable model, which can handle a range of tasks including casual dialogue, text analysis, summarization, and document question-answering.',
80
110
  displayName: 'Claude Instant',
81
111
  id: 'anthropic.claude-instant-v1',
112
+ pricing: {
113
+ input: 0.8,
114
+ output: 2.4,
115
+ },
82
116
  tokens: 100_000,
83
117
  },
84
118
  {
85
- description: 'An update to Meta Llama 3 8B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
119
+ description:
120
+ 'An update to Meta Llama 3 8B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
86
121
  displayName: 'Llama 3.1 8B Instruct',
87
122
  enabled: true,
88
123
  functionCall: true,
89
124
  id: 'meta.llama3-1-8b-instruct-v1:0',
125
+ pricing: {
126
+ input: 0.22,
127
+ output: 0.22,
128
+ },
90
129
  tokens: 128_000,
91
130
  },
92
131
  {
93
- description: 'An update to Meta Llama 3 70B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
132
+ description:
133
+ 'An update to Meta Llama 3 70B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
94
134
  displayName: 'Llama 3.1 70B Instruct',
95
135
  enabled: true,
96
136
  functionCall: true,
97
137
  id: 'meta.llama3-1-70b-instruct-v1:0',
138
+ pricing: {
139
+ input: 0.99,
140
+ output: 0.99,
141
+ },
98
142
  tokens: 128_000,
99
143
  },
100
144
  {
101
- description: 'Meta Llama 3.1 405B Instruct is the largest and most powerful of the Llama 3.1 Instruct models that is a highly advanced model for conversational inference and reasoning, synthetic data generation, and a base to do specialized continual pre-training or fine-tuning on a specific domain. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
145
+ description:
146
+ 'Meta Llama 3.1 405B Instruct is the largest and most powerful of the Llama 3.1 Instruct models that is a highly advanced model for conversational inference and reasoning, synthetic data generation, and a base to do specialized continual pre-training or fine-tuning on a specific domain. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
102
147
  displayName: 'Llama 3.1 405B Instruct',
103
148
  enabled: true,
104
149
  functionCall: true,
105
150
  id: 'meta.llama3-1-405b-instruct-v1:0',
151
+ pricing: {
152
+ input: 5.32,
153
+ output: 16,
154
+ },
106
155
  tokens: 128_000,
107
156
  },
108
157
  {
109
- description: 'Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.',
158
+ description:
159
+ 'Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.',
110
160
  displayName: 'Llama 3 8B Instruct',
111
161
  id: 'meta.llama3-8b-instruct-v1:0',
162
+ pricing: {
163
+ input: 0.3,
164
+ output: 0.6,
165
+ },
112
166
  tokens: 8000,
113
167
  },
114
168
  {
115
- description: 'Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.',
169
+ description:
170
+ 'Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.',
116
171
  displayName: 'Llama 3 70B Instruct',
117
172
  id: 'meta.llama3-70b-instruct-v1:0',
173
+ pricing: {
174
+ input: 2.65,
175
+ output: 3.5,
176
+ },
118
177
  tokens: 8000,
119
178
  },
120
- /*
179
+ /*
121
180
  // TODO: Not support for now
122
181
  {
123
182
  description: 'A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.',
@@ -157,7 +216,7 @@ const Bedrock: ModelProviderCard = {
157
216
  tokens: 32_000,
158
217
  },
159
218
  */
160
- /*
219
+ /*
161
220
  // TODO: Not support for now
162
221
  {
163
222
  description: 'Command R+ is a highly performant generative language model optimized for large scale production workloads.',
@@ -176,7 +235,7 @@ const Bedrock: ModelProviderCard = {
176
235
  tokens: 128_000,
177
236
  },
178
237
  */
179
- /*
238
+ /*
180
239
  // Cohere Command (Text) and AI21 Labs Jurassic-2 (Text) don't support chat with the Converse API
181
240
  {
182
241
  description: 'Command is Cohere flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications.',
@@ -191,7 +250,7 @@ const Bedrock: ModelProviderCard = {
191
250
  tokens: 4000,
192
251
  },
193
252
  */
194
- /*
253
+ /*
195
254
  // TODO: Not support for now
196
255
  {
197
256
  description: 'The latest Foundation Model from AI21 Labs, Jamba-Instruct offers an impressive 256K context window and delivers the best value per price on core text generation, summarization, and question answering tasks for the enterprise.',
@@ -200,7 +259,7 @@ const Bedrock: ModelProviderCard = {
200
259
  tokens: 256_000,
201
260
  },
202
261
  */
203
- /*
262
+ /*
204
263
  // Cohere Command (Text) and AI21 Labs Jurassic-2 (Text) don't support chat with the Converse API
205
264
  {
206
265
  description: 'Jurassic-2 Mid is less powerful than Ultra, yet carefully designed to strike the right balance between exceptional quality and affordability. Jurassic-2 Mid can be applied to any language comprehension or generation task including question answering, summarization, long-form copy generation, advanced information extraction and many others.',
@@ -218,7 +277,9 @@ const Bedrock: ModelProviderCard = {
218
277
  ],
219
278
  checkModel: 'anthropic.claude-instant-v1',
220
279
  id: 'bedrock',
280
+ modelsUrl: 'https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html',
221
281
  name: 'Bedrock',
282
+ url: 'https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html',
222
283
  };
223
284
 
224
285
  export default Bedrock;
@@ -4,18 +4,27 @@ import { ModelProviderCard } from '@/types/llm';
4
4
  const DeepSeek: ModelProviderCard = {
5
5
  chatModels: [
6
6
  {
7
- description: '擅长通用对话任务',
8
- displayName: 'DeepSeek V2.5',
7
+ description:
8
+ '融合通用与代码能力的全新开源模型, 不仅保留了原有 Chat 模型的通用对话能力和 Coder 模型的强大代码处理能力,还更好地对齐了人类偏好。此外,DeepSeek-V2.5 在写作任务、指令跟随等多个方面也实现了大幅提升。',
9
+ displayName: 'DeepSeek-V2.5',
9
10
  enabled: true,
10
11
  functionCall: true,
11
12
  id: 'deepseek-chat',
13
+ pricing: {
14
+ cachedInput: 0.014,
15
+ input: 0.14,
16
+ output: 0.28,
17
+ },
18
+ releasedAt: '2024-09-05',
12
19
  tokens: 128_000,
13
20
  },
14
21
  ],
15
22
  checkModel: 'deepseek-chat',
16
23
  id: 'deepseek',
17
24
  modelList: { showModelFetcher: true },
25
+ modelsUrl: 'https://platform.deepseek.com/api-docs/zh-cn/quick_start/pricing',
18
26
  name: 'DeepSeek',
27
+ url: 'https://deepseek.com',
19
28
  };
20
29
 
21
30
  export default DeepSeek;
@@ -5,7 +5,8 @@ import { ModelProviderCard } from '@/types/llm';
5
5
  const FireworksAI: ModelProviderCard = {
6
6
  chatModels: [
7
7
  {
8
- description: 'Fireworks latest and most performant function-calling model. Firefunction-v2 is based on Llama-3 and trained to excel at function-calling as well as chat and instruction-following. See blog post for more details https://fireworks.ai/blog/firefunction-v2-launch-post',
8
+ description:
9
+ 'Fireworks latest and most performant function-calling model. Firefunction-v2 is based on Llama-3 and trained to excel at function-calling as well as chat and instruction-following. See blog post for more details https://fireworks.ai/blog/firefunction-v2-launch-post',
9
10
  displayName: 'Firefunction V2',
10
11
  enabled: true,
11
12
  functionCall: true,
@@ -20,7 +21,8 @@ const FireworksAI: ModelProviderCard = {
20
21
  tokens: 32_768,
21
22
  },
22
23
  {
23
- description: 'Vision-language model allowing both image and text as inputs (single image is recommended), trained on OSS model generated training data and open sourced on huggingface at fireworks-ai/FireLLaVA-13b',
24
+ description:
25
+ 'Vision-language model allowing both image and text as inputs (single image is recommended), trained on OSS model generated training data and open sourced on huggingface at fireworks-ai/FireLLaVA-13b',
24
26
  displayName: 'FireLLaVA-13B',
25
27
  enabled: true,
26
28
  functionCall: false,
@@ -137,7 +139,9 @@ const FireworksAI: ModelProviderCard = {
137
139
  checkModel: 'accounts/fireworks/models/firefunction-v2',
138
140
  id: 'fireworksai',
139
141
  modelList: { showModelFetcher: true },
142
+ modelsUrl: 'https://fireworks.ai/models?show=Serverless',
140
143
  name: 'Fireworks AI',
144
+ url: 'https://fireworks.ai',
141
145
  };
142
146
 
143
147
  export default FireworksAI;
@@ -1,7 +1,5 @@
1
1
  import { ModelProviderCard } from '@/types/llm';
2
2
 
3
- // ref https://ai.google.dev/models/gemini
4
- // api https://ai.google.dev/api/rest/v1beta/models/list
5
3
  const Google: ModelProviderCard = {
6
4
  chatModels: [
7
5
  {
@@ -11,6 +9,11 @@ const Google: ModelProviderCard = {
11
9
  functionCall: true,
12
10
  id: 'gemini-1.5-flash-latest',
13
11
  maxOutput: 8192,
12
+ pricing: {
13
+ cachedInput: 0.018_75,
14
+ input: 0.075,
15
+ output: 0.3,
16
+ },
14
17
  tokens: 1_048_576 + 8192,
15
18
  vision: true,
16
19
  },
@@ -21,6 +24,12 @@ const Google: ModelProviderCard = {
21
24
  functionCall: true,
22
25
  id: 'gemini-1.5-flash-exp-0827',
23
26
  maxOutput: 8192,
27
+ pricing: {
28
+ cachedInput: 0.018_75,
29
+ input: 0.075,
30
+ output: 0.3,
31
+ },
32
+ releasedAt: '2024-08-27',
24
33
  tokens: 1_048_576 + 8192,
25
34
  vision: true,
26
35
  },
@@ -30,6 +39,7 @@ const Google: ModelProviderCard = {
30
39
  functionCall: true,
31
40
  id: 'gemini-1.5-flash-8b-exp-0827',
32
41
  maxOutput: 8192,
42
+ releasedAt: '2024-08-27',
33
43
  tokens: 1_048_576 + 8192,
34
44
  vision: true,
35
45
  },
@@ -49,6 +59,12 @@ const Google: ModelProviderCard = {
49
59
  functionCall: true,
50
60
  id: 'gemini-1.5-pro-latest',
51
61
  maxOutput: 8192,
62
+ pricing: {
63
+ cachedInput: 0.875,
64
+ input: 3.5,
65
+ output: 10.5,
66
+ },
67
+ releasedAt: '2024-02-15',
52
68
  tokens: 2_097_152 + 8192,
53
69
  vision: true,
54
70
  },
@@ -59,6 +75,12 @@ const Google: ModelProviderCard = {
59
75
  functionCall: true,
60
76
  id: 'gemini-1.5-pro-exp-0827',
61
77
  maxOutput: 8192,
78
+ pricing: {
79
+ cachedInput: 0.875,
80
+ input: 3.5,
81
+ output: 10.5,
82
+ },
83
+ releasedAt: '2024-08-27',
62
84
  tokens: 2_097_152 + 8192,
63
85
  vision: true,
64
86
  },
@@ -68,6 +90,12 @@ const Google: ModelProviderCard = {
68
90
  functionCall: true,
69
91
  id: 'gemini-1.5-pro-exp-0801',
70
92
  maxOutput: 8192,
93
+ pricing: {
94
+ cachedInput: 0.875,
95
+ input: 3.5,
96
+ output: 10.5,
97
+ },
98
+ releasedAt: '2024-08-01',
71
99
  tokens: 2_097_152 + 8192,
72
100
  vision: true,
73
101
  },
@@ -77,6 +105,12 @@ const Google: ModelProviderCard = {
77
105
  functionCall: true,
78
106
  id: 'gemini-1.5-pro-001',
79
107
  maxOutput: 8192,
108
+ pricing: {
109
+ cachedInput: 0.875,
110
+ input: 3.5,
111
+ output: 10.5,
112
+ },
113
+ releasedAt: '2024-02-15',
80
114
  tokens: 2_097_152 + 8192,
81
115
  vision: true,
82
116
  },
@@ -86,6 +120,11 @@ const Google: ModelProviderCard = {
86
120
  displayName: 'Gemini 1.0 Pro',
87
121
  id: 'gemini-1.0-pro-latest',
88
122
  maxOutput: 2048,
123
+ pricing: {
124
+ input: 0.5,
125
+ output: 1.5,
126
+ },
127
+ releasedAt: '2023-12-06',
89
128
  tokens: 30_720 + 2048,
90
129
  },
91
130
  {
@@ -95,6 +134,11 @@ const Google: ModelProviderCard = {
95
134
  functionCall: true,
96
135
  id: 'gemini-1.0-pro-001',
97
136
  maxOutput: 2048,
137
+ pricing: {
138
+ input: 0.5,
139
+ output: 1.5,
140
+ },
141
+ releasedAt: '2023-12-06',
98
142
  tokens: 30_720 + 2048,
99
143
  },
100
144
  {
@@ -103,11 +147,17 @@ const Google: ModelProviderCard = {
103
147
  displayName: 'Gemini 1.0 Pro 002 (Tuning)',
104
148
  id: 'gemini-1.0-pro-002',
105
149
  maxOutput: 2048,
150
+ pricing: {
151
+ input: 0.5,
152
+ output: 1.5,
153
+ },
154
+ releasedAt: '2023-12-06',
106
155
  tokens: 30_720 + 2048,
107
156
  },
108
157
  ],
109
158
  checkModel: 'gemini-1.5-flash-latest',
110
159
  id: 'google',
160
+ modelsUrl: 'https://ai.google.dev/gemini-api/docs/models/gemini',
111
161
  name: 'Google',
112
162
  proxyUrl: {
113
163
  placeholder: 'https://generativelanguage.googleapis.com',
@@ -116,6 +166,7 @@ const Google: ModelProviderCard = {
116
166
  speed: 2,
117
167
  text: true,
118
168
  },
169
+ url: 'https://ai.google.dev',
119
170
  };
120
171
 
121
172
  export default Google;
@@ -1,6 +1,5 @@
1
1
  import { ModelProviderCard } from '@/types/llm';
2
2
 
3
- // ref https://console.groq.com/docs/models
4
3
  // ref https://console.groq.com/docs/tool-use
5
4
  const Groq: ModelProviderCard = {
6
5
  chatModels: [
@@ -10,6 +9,11 @@ const Groq: ModelProviderCard = {
10
9
  enabled: true,
11
10
  functionCall: true,
12
11
  id: 'llama-3.1-8b-instant',
12
+ maxOutput: 8192,
13
+ pricing: {
14
+ input: 0.05,
15
+ output: 0.08,
16
+ },
13
17
  tokens: 131_072,
14
18
  },
15
19
  {
@@ -17,9 +21,14 @@ const Groq: ModelProviderCard = {
17
21
  enabled: true,
18
22
  functionCall: true,
19
23
  id: 'llama-3.1-70b-versatile',
24
+ maxOutput: 8192,
25
+ pricing: {
26
+ input: 0.59,
27
+ output: 0.79,
28
+ },
20
29
  tokens: 131_072,
21
30
  },
22
- /*
31
+ /*
23
32
  // Offline due to overwhelming demand! Stay tuned for updates.
24
33
  {
25
34
  displayName: 'Llama 3.1 405B (Preview)',
@@ -29,17 +38,25 @@ const Groq: ModelProviderCard = {
29
38
  },
30
39
  */
31
40
  {
32
- displayName: 'Llama 3 Groq 8B Tool Use (Preview)',
41
+ displayName: 'Llama 3 Groq 8B Tool Use Preview 8K',
33
42
  enabled: true,
34
43
  functionCall: true,
35
44
  id: 'llama3-groq-8b-8192-tool-use-preview',
45
+ pricing: {
46
+ input: 0.19,
47
+ output: 0.19,
48
+ },
36
49
  tokens: 8192,
37
50
  },
38
51
  {
39
- displayName: 'Llama 3 Groq 70B Tool Use (Preview)',
52
+ displayName: 'Llama 3 Groq 70B Tool Use Preview 8K',
40
53
  enabled: true,
41
54
  functionCall: true,
42
55
  id: 'llama3-groq-70b-8192-tool-use-preview',
56
+ pricing: {
57
+ input: 0.89,
58
+ output: 0.89,
59
+ },
43
60
  tokens: 8192,
44
61
  },
45
62
  {
@@ -47,6 +64,10 @@ const Groq: ModelProviderCard = {
47
64
  enabled: true,
48
65
  functionCall: true,
49
66
  id: 'llama3-8b-8192',
67
+ pricing: {
68
+ input: 0.05,
69
+ output: 0.08,
70
+ },
50
71
  tokens: 8192,
51
72
  },
52
73
  {
@@ -54,26 +75,42 @@ const Groq: ModelProviderCard = {
54
75
  enabled: true,
55
76
  functionCall: true,
56
77
  id: 'llama3-70b-8192',
78
+ pricing: {
79
+ input: 0.59,
80
+ output: 0.79,
81
+ },
57
82
  tokens: 8192,
58
83
  },
59
84
  {
60
- displayName: 'Gemma 2 9B',
85
+ displayName: 'Gemma 2 9B 8k',
61
86
  enabled: true,
62
87
  functionCall: true,
63
88
  id: 'gemma2-9b-it',
89
+ pricing: {
90
+ input: 0.2,
91
+ output: 0.2,
92
+ },
64
93
  tokens: 8192,
65
94
  },
66
95
  {
67
- displayName: 'Gemma 7B',
96
+ displayName: 'Gemma 7B 8k Instruct',
68
97
  functionCall: true,
69
98
  id: 'gemma-7b-it',
99
+ pricing: {
100
+ input: 0.07,
101
+ output: 0.07,
102
+ },
70
103
  tokens: 8192,
71
104
  },
72
105
  {
73
- displayName: 'Mixtral 8x7B',
106
+ displayName: 'Mixtral 8x7B Instruct 32k',
74
107
  enabled: true,
75
108
  functionCall: true,
76
109
  id: 'mixtral-8x7b-32768',
110
+ pricing: {
111
+ input: 0.24,
112
+ output: 0.24,
113
+ },
77
114
  tokens: 32_768,
78
115
  },
79
116
  {
@@ -86,10 +123,12 @@ const Groq: ModelProviderCard = {
86
123
  ],
87
124
  checkModel: 'gemma2-9b-it',
88
125
  id: 'groq',
126
+ modelsUrl: 'https://console.groq.com/docs/models',
89
127
  name: 'Groq',
90
128
  proxyUrl: {
91
129
  placeholder: 'https://api.groq.com/openai/v1',
92
130
  },
131
+ url: 'https://groq.com',
93
132
  };
94
133
 
95
134
  export default Groq;
@@ -42,11 +42,13 @@ const Minimax: ModelProviderCard = {
42
42
  ],
43
43
  checkModel: 'abab6.5s-chat',
44
44
  id: 'minimax',
45
+ modelsUrl: 'https://platform.minimaxi.com/document/Models',
45
46
  name: 'Minimax',
46
47
  smoothing: {
47
48
  speed: 2,
48
49
  text: true,
49
50
  },
51
+ url: 'https://www.minimaxi.com',
50
52
  };
51
53
 
52
54
  export default Minimax;