@lobehub/chat 1.16.6 → 1.16.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @lobehub/chat might be problematic. Click here for more details.
- package/CHANGELOG.md +50 -0
- package/README.md +8 -8
- package/README.zh-CN.md +8 -8
- package/package.json +1 -1
- package/src/components/ModelSelect/index.tsx +1 -9
- package/src/config/modelProviders/ai360.ts +23 -68
- package/src/config/modelProviders/anthropic.ts +39 -2
- package/src/config/modelProviders/baichuan.ts +30 -11
- package/src/config/modelProviders/bedrock.ts +80 -19
- package/src/config/modelProviders/deepseek.ts +11 -2
- package/src/config/modelProviders/fireworksai.ts +6 -2
- package/src/config/modelProviders/google.ts +53 -2
- package/src/config/modelProviders/groq.ts +46 -7
- package/src/config/modelProviders/minimax.ts +2 -0
- package/src/config/modelProviders/openai.ts +102 -1
- package/src/const/discover.ts +1 -0
- package/src/libs/agent-runtime/openai/__snapshots__/index.test.ts.snap +56 -0
- package/src/server/routers/edge/config/__snapshots__/index.test.ts.snap +32 -0
- package/src/server/routers/edge/config/index.test.ts +4 -0
- package/src/types/llm.ts +29 -1
- package/src/utils/format.test.ts +42 -1
- package/src/utils/format.ts +11 -0
- package/src/utils/parseModels.test.ts +8 -0
@@ -1,12 +1,11 @@
|
|
1
1
|
import { ModelProviderCard } from '@/types/llm';
|
2
2
|
|
3
|
-
// ref https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
|
4
3
|
// ref https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
|
5
4
|
// ref https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/models
|
6
5
|
// ref https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/models
|
7
6
|
const Bedrock: ModelProviderCard = {
|
8
7
|
chatModels: [
|
9
|
-
/*
|
8
|
+
/*
|
10
9
|
// TODO: Not support for now
|
11
10
|
{
|
12
11
|
description: 'Amazon Titan Text Lite is a light weight efficient model ideal for fine-tuning for English-language tasks, including like summarization and copywriting, where customers want a smaller, more cost-effective model that is also highly customizable.',
|
@@ -28,96 +27,156 @@ const Bedrock: ModelProviderCard = {
|
|
28
27
|
},
|
29
28
|
*/
|
30
29
|
{
|
31
|
-
description:
|
30
|
+
description:
|
31
|
+
'Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.',
|
32
32
|
displayName: 'Claude 3.5 Sonnet',
|
33
33
|
enabled: true,
|
34
34
|
functionCall: true,
|
35
35
|
id: 'anthropic.claude-3-5-sonnet-20240620-v1:0',
|
36
|
+
pricing: {
|
37
|
+
input: 3,
|
38
|
+
output: 15,
|
39
|
+
},
|
36
40
|
tokens: 200_000,
|
37
41
|
vision: true,
|
38
42
|
},
|
39
43
|
{
|
40
|
-
description:
|
44
|
+
description:
|
45
|
+
'Claude 3 Sonnet by Anthropic strikes the ideal balance between intelligence and speed—particularly for enterprise workloads. It offers maximum utility at a lower price than competitors, and is engineered to be the dependable, high-endurance workhorse for scaled AI deployments. Claude 3 Sonnet can process images and return text outputs, and features a 200K context window.',
|
41
46
|
displayName: 'Claude 3 Sonnet',
|
42
47
|
enabled: true,
|
43
48
|
functionCall: true,
|
44
49
|
id: 'anthropic.claude-3-sonnet-20240229-v1:0',
|
50
|
+
pricing: {
|
51
|
+
input: 3,
|
52
|
+
output: 15,
|
53
|
+
},
|
45
54
|
tokens: 200_000,
|
46
55
|
vision: true,
|
47
56
|
},
|
48
57
|
{
|
49
|
-
description:
|
58
|
+
description:
|
59
|
+
'Claude 3 Opus is Anthropic most powerful AI model, with state-of-the-art performance on highly complex tasks. It can navigate open-ended prompts and sight-unseen scenarios with remarkable fluency and human-like understanding. Claude 3 Opus shows us the frontier of what’s possible with generative AI. Claude 3 Opus can process images and return text outputs, and features a 200K context window.',
|
50
60
|
displayName: 'Claude 3 Opus',
|
51
61
|
enabled: true,
|
52
62
|
functionCall: true,
|
53
63
|
id: 'anthropic.claude-3-opus-20240229-v1:0',
|
64
|
+
pricing: {
|
65
|
+
input: 15,
|
66
|
+
output: 75,
|
67
|
+
},
|
54
68
|
tokens: 200_000,
|
55
69
|
vision: true,
|
56
70
|
},
|
57
71
|
{
|
58
|
-
description:
|
72
|
+
description:
|
73
|
+
'Claude 3 Haiku is Anthropic fastest, most compact model for near-instant responsiveness. It answers simple queries and requests with speed. Customers will be able to build seamless AI experiences that mimic human interactions. Claude 3 Haiku can process images and return text outputs, and features a 200K context window.',
|
59
74
|
displayName: 'Claude 3 Haiku',
|
60
75
|
enabled: true,
|
61
76
|
functionCall: true,
|
62
77
|
id: 'anthropic.claude-3-haiku-20240307-v1:0',
|
78
|
+
pricing: {
|
79
|
+
input: 0.25,
|
80
|
+
output: 1.25,
|
81
|
+
},
|
63
82
|
tokens: 200_000,
|
64
83
|
vision: true,
|
65
84
|
},
|
66
85
|
{
|
67
|
-
description:
|
86
|
+
description:
|
87
|
+
'An update to Claude 2 that features double the context window, plus improvements across reliability, hallucination rates, and evidence-based accuracy in long document and RAG contexts.',
|
68
88
|
displayName: 'Claude 2.1',
|
69
89
|
id: 'anthropic.claude-v2:1',
|
90
|
+
pricing: {
|
91
|
+
input: 8,
|
92
|
+
output: 24,
|
93
|
+
},
|
70
94
|
tokens: 200_000,
|
71
95
|
},
|
72
96
|
{
|
73
|
-
description:
|
97
|
+
description:
|
98
|
+
'Anthropic highly capable model across a wide range of tasks from sophisticated dialogue and creative content generation to detailed instruction following.',
|
74
99
|
displayName: 'Claude 2.0',
|
75
100
|
id: 'anthropic.claude-v2',
|
101
|
+
pricing: {
|
102
|
+
input: 8,
|
103
|
+
output: 24,
|
104
|
+
},
|
76
105
|
tokens: 100_000,
|
77
106
|
},
|
78
107
|
{
|
79
|
-
description:
|
108
|
+
description:
|
109
|
+
'A fast, affordable yet still very capable model, which can handle a range of tasks including casual dialogue, text analysis, summarization, and document question-answering.',
|
80
110
|
displayName: 'Claude Instant',
|
81
111
|
id: 'anthropic.claude-instant-v1',
|
112
|
+
pricing: {
|
113
|
+
input: 0.8,
|
114
|
+
output: 2.4,
|
115
|
+
},
|
82
116
|
tokens: 100_000,
|
83
117
|
},
|
84
118
|
{
|
85
|
-
description:
|
119
|
+
description:
|
120
|
+
'An update to Meta Llama 3 8B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
|
86
121
|
displayName: 'Llama 3.1 8B Instruct',
|
87
122
|
enabled: true,
|
88
123
|
functionCall: true,
|
89
124
|
id: 'meta.llama3-1-8b-instruct-v1:0',
|
125
|
+
pricing: {
|
126
|
+
input: 0.22,
|
127
|
+
output: 0.22,
|
128
|
+
},
|
90
129
|
tokens: 128_000,
|
91
130
|
},
|
92
131
|
{
|
93
|
-
description:
|
132
|
+
description:
|
133
|
+
'An update to Meta Llama 3 70B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
|
94
134
|
displayName: 'Llama 3.1 70B Instruct',
|
95
135
|
enabled: true,
|
96
136
|
functionCall: true,
|
97
137
|
id: 'meta.llama3-1-70b-instruct-v1:0',
|
138
|
+
pricing: {
|
139
|
+
input: 0.99,
|
140
|
+
output: 0.99,
|
141
|
+
},
|
98
142
|
tokens: 128_000,
|
99
143
|
},
|
100
144
|
{
|
101
|
-
description:
|
145
|
+
description:
|
146
|
+
'Meta Llama 3.1 405B Instruct is the largest and most powerful of the Llama 3.1 Instruct models that is a highly advanced model for conversational inference and reasoning, synthetic data generation, and a base to do specialized continual pre-training or fine-tuning on a specific domain. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
|
102
147
|
displayName: 'Llama 3.1 405B Instruct',
|
103
148
|
enabled: true,
|
104
149
|
functionCall: true,
|
105
150
|
id: 'meta.llama3-1-405b-instruct-v1:0',
|
151
|
+
pricing: {
|
152
|
+
input: 5.32,
|
153
|
+
output: 16,
|
154
|
+
},
|
106
155
|
tokens: 128_000,
|
107
156
|
},
|
108
157
|
{
|
109
|
-
description:
|
158
|
+
description:
|
159
|
+
'Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.',
|
110
160
|
displayName: 'Llama 3 8B Instruct',
|
111
161
|
id: 'meta.llama3-8b-instruct-v1:0',
|
162
|
+
pricing: {
|
163
|
+
input: 0.3,
|
164
|
+
output: 0.6,
|
165
|
+
},
|
112
166
|
tokens: 8000,
|
113
167
|
},
|
114
168
|
{
|
115
|
-
description:
|
169
|
+
description:
|
170
|
+
'Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.',
|
116
171
|
displayName: 'Llama 3 70B Instruct',
|
117
172
|
id: 'meta.llama3-70b-instruct-v1:0',
|
173
|
+
pricing: {
|
174
|
+
input: 2.65,
|
175
|
+
output: 3.5,
|
176
|
+
},
|
118
177
|
tokens: 8000,
|
119
178
|
},
|
120
|
-
/*
|
179
|
+
/*
|
121
180
|
// TODO: Not support for now
|
122
181
|
{
|
123
182
|
description: 'A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.',
|
@@ -157,7 +216,7 @@ const Bedrock: ModelProviderCard = {
|
|
157
216
|
tokens: 32_000,
|
158
217
|
},
|
159
218
|
*/
|
160
|
-
/*
|
219
|
+
/*
|
161
220
|
// TODO: Not support for now
|
162
221
|
{
|
163
222
|
description: 'Command R+ is a highly performant generative language model optimized for large scale production workloads.',
|
@@ -176,7 +235,7 @@ const Bedrock: ModelProviderCard = {
|
|
176
235
|
tokens: 128_000,
|
177
236
|
},
|
178
237
|
*/
|
179
|
-
/*
|
238
|
+
/*
|
180
239
|
// Cohere Command (Text) and AI21 Labs Jurassic-2 (Text) don't support chat with the Converse API
|
181
240
|
{
|
182
241
|
description: 'Command is Cohere flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications.',
|
@@ -191,7 +250,7 @@ const Bedrock: ModelProviderCard = {
|
|
191
250
|
tokens: 4000,
|
192
251
|
},
|
193
252
|
*/
|
194
|
-
/*
|
253
|
+
/*
|
195
254
|
// TODO: Not support for now
|
196
255
|
{
|
197
256
|
description: 'The latest Foundation Model from AI21 Labs, Jamba-Instruct offers an impressive 256K context window and delivers the best value per price on core text generation, summarization, and question answering tasks for the enterprise.',
|
@@ -200,7 +259,7 @@ const Bedrock: ModelProviderCard = {
|
|
200
259
|
tokens: 256_000,
|
201
260
|
},
|
202
261
|
*/
|
203
|
-
/*
|
262
|
+
/*
|
204
263
|
// Cohere Command (Text) and AI21 Labs Jurassic-2 (Text) don't support chat with the Converse API
|
205
264
|
{
|
206
265
|
description: 'Jurassic-2 Mid is less powerful than Ultra, yet carefully designed to strike the right balance between exceptional quality and affordability. Jurassic-2 Mid can be applied to any language comprehension or generation task including question answering, summarization, long-form copy generation, advanced information extraction and many others.',
|
@@ -218,7 +277,9 @@ const Bedrock: ModelProviderCard = {
|
|
218
277
|
],
|
219
278
|
checkModel: 'anthropic.claude-instant-v1',
|
220
279
|
id: 'bedrock',
|
280
|
+
modelsUrl: 'https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html',
|
221
281
|
name: 'Bedrock',
|
282
|
+
url: 'https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html',
|
222
283
|
};
|
223
284
|
|
224
285
|
export default Bedrock;
|
@@ -4,18 +4,27 @@ import { ModelProviderCard } from '@/types/llm';
|
|
4
4
|
const DeepSeek: ModelProviderCard = {
|
5
5
|
chatModels: [
|
6
6
|
{
|
7
|
-
description:
|
8
|
-
|
7
|
+
description:
|
8
|
+
'融合通用与代码能力的全新开源模型, 不仅保留了原有 Chat 模型的通用对话能力和 Coder 模型的强大代码处理能力,还更好地对齐了人类偏好。此外,DeepSeek-V2.5 在写作任务、指令跟随等多个方面也实现了大幅提升。',
|
9
|
+
displayName: 'DeepSeek-V2.5',
|
9
10
|
enabled: true,
|
10
11
|
functionCall: true,
|
11
12
|
id: 'deepseek-chat',
|
13
|
+
pricing: {
|
14
|
+
cachedInput: 0.014,
|
15
|
+
input: 0.14,
|
16
|
+
output: 0.28,
|
17
|
+
},
|
18
|
+
releasedAt: '2024-09-05',
|
12
19
|
tokens: 128_000,
|
13
20
|
},
|
14
21
|
],
|
15
22
|
checkModel: 'deepseek-chat',
|
16
23
|
id: 'deepseek',
|
17
24
|
modelList: { showModelFetcher: true },
|
25
|
+
modelsUrl: 'https://platform.deepseek.com/api-docs/zh-cn/quick_start/pricing',
|
18
26
|
name: 'DeepSeek',
|
27
|
+
url: 'https://deepseek.com',
|
19
28
|
};
|
20
29
|
|
21
30
|
export default DeepSeek;
|
@@ -5,7 +5,8 @@ import { ModelProviderCard } from '@/types/llm';
|
|
5
5
|
const FireworksAI: ModelProviderCard = {
|
6
6
|
chatModels: [
|
7
7
|
{
|
8
|
-
description:
|
8
|
+
description:
|
9
|
+
'Fireworks latest and most performant function-calling model. Firefunction-v2 is based on Llama-3 and trained to excel at function-calling as well as chat and instruction-following. See blog post for more details https://fireworks.ai/blog/firefunction-v2-launch-post',
|
9
10
|
displayName: 'Firefunction V2',
|
10
11
|
enabled: true,
|
11
12
|
functionCall: true,
|
@@ -20,7 +21,8 @@ const FireworksAI: ModelProviderCard = {
|
|
20
21
|
tokens: 32_768,
|
21
22
|
},
|
22
23
|
{
|
23
|
-
description:
|
24
|
+
description:
|
25
|
+
'Vision-language model allowing both image and text as inputs (single image is recommended), trained on OSS model generated training data and open sourced on huggingface at fireworks-ai/FireLLaVA-13b',
|
24
26
|
displayName: 'FireLLaVA-13B',
|
25
27
|
enabled: true,
|
26
28
|
functionCall: false,
|
@@ -137,7 +139,9 @@ const FireworksAI: ModelProviderCard = {
|
|
137
139
|
checkModel: 'accounts/fireworks/models/firefunction-v2',
|
138
140
|
id: 'fireworksai',
|
139
141
|
modelList: { showModelFetcher: true },
|
142
|
+
modelsUrl: 'https://fireworks.ai/models?show=Serverless',
|
140
143
|
name: 'Fireworks AI',
|
144
|
+
url: 'https://fireworks.ai',
|
141
145
|
};
|
142
146
|
|
143
147
|
export default FireworksAI;
|
@@ -1,7 +1,5 @@
|
|
1
1
|
import { ModelProviderCard } from '@/types/llm';
|
2
2
|
|
3
|
-
// ref https://ai.google.dev/models/gemini
|
4
|
-
// api https://ai.google.dev/api/rest/v1beta/models/list
|
5
3
|
const Google: ModelProviderCard = {
|
6
4
|
chatModels: [
|
7
5
|
{
|
@@ -11,6 +9,11 @@ const Google: ModelProviderCard = {
|
|
11
9
|
functionCall: true,
|
12
10
|
id: 'gemini-1.5-flash-latest',
|
13
11
|
maxOutput: 8192,
|
12
|
+
pricing: {
|
13
|
+
cachedInput: 0.018_75,
|
14
|
+
input: 0.075,
|
15
|
+
output: 0.3,
|
16
|
+
},
|
14
17
|
tokens: 1_048_576 + 8192,
|
15
18
|
vision: true,
|
16
19
|
},
|
@@ -21,6 +24,12 @@ const Google: ModelProviderCard = {
|
|
21
24
|
functionCall: true,
|
22
25
|
id: 'gemini-1.5-flash-exp-0827',
|
23
26
|
maxOutput: 8192,
|
27
|
+
pricing: {
|
28
|
+
cachedInput: 0.018_75,
|
29
|
+
input: 0.075,
|
30
|
+
output: 0.3,
|
31
|
+
},
|
32
|
+
releasedAt: '2024-08-27',
|
24
33
|
tokens: 1_048_576 + 8192,
|
25
34
|
vision: true,
|
26
35
|
},
|
@@ -30,6 +39,7 @@ const Google: ModelProviderCard = {
|
|
30
39
|
functionCall: true,
|
31
40
|
id: 'gemini-1.5-flash-8b-exp-0827',
|
32
41
|
maxOutput: 8192,
|
42
|
+
releasedAt: '2024-08-27',
|
33
43
|
tokens: 1_048_576 + 8192,
|
34
44
|
vision: true,
|
35
45
|
},
|
@@ -49,6 +59,12 @@ const Google: ModelProviderCard = {
|
|
49
59
|
functionCall: true,
|
50
60
|
id: 'gemini-1.5-pro-latest',
|
51
61
|
maxOutput: 8192,
|
62
|
+
pricing: {
|
63
|
+
cachedInput: 0.875,
|
64
|
+
input: 3.5,
|
65
|
+
output: 10.5,
|
66
|
+
},
|
67
|
+
releasedAt: '2024-02-15',
|
52
68
|
tokens: 2_097_152 + 8192,
|
53
69
|
vision: true,
|
54
70
|
},
|
@@ -59,6 +75,12 @@ const Google: ModelProviderCard = {
|
|
59
75
|
functionCall: true,
|
60
76
|
id: 'gemini-1.5-pro-exp-0827',
|
61
77
|
maxOutput: 8192,
|
78
|
+
pricing: {
|
79
|
+
cachedInput: 0.875,
|
80
|
+
input: 3.5,
|
81
|
+
output: 10.5,
|
82
|
+
},
|
83
|
+
releasedAt: '2024-08-27',
|
62
84
|
tokens: 2_097_152 + 8192,
|
63
85
|
vision: true,
|
64
86
|
},
|
@@ -68,6 +90,12 @@ const Google: ModelProviderCard = {
|
|
68
90
|
functionCall: true,
|
69
91
|
id: 'gemini-1.5-pro-exp-0801',
|
70
92
|
maxOutput: 8192,
|
93
|
+
pricing: {
|
94
|
+
cachedInput: 0.875,
|
95
|
+
input: 3.5,
|
96
|
+
output: 10.5,
|
97
|
+
},
|
98
|
+
releasedAt: '2024-08-01',
|
71
99
|
tokens: 2_097_152 + 8192,
|
72
100
|
vision: true,
|
73
101
|
},
|
@@ -77,6 +105,12 @@ const Google: ModelProviderCard = {
|
|
77
105
|
functionCall: true,
|
78
106
|
id: 'gemini-1.5-pro-001',
|
79
107
|
maxOutput: 8192,
|
108
|
+
pricing: {
|
109
|
+
cachedInput: 0.875,
|
110
|
+
input: 3.5,
|
111
|
+
output: 10.5,
|
112
|
+
},
|
113
|
+
releasedAt: '2024-02-15',
|
80
114
|
tokens: 2_097_152 + 8192,
|
81
115
|
vision: true,
|
82
116
|
},
|
@@ -86,6 +120,11 @@ const Google: ModelProviderCard = {
|
|
86
120
|
displayName: 'Gemini 1.0 Pro',
|
87
121
|
id: 'gemini-1.0-pro-latest',
|
88
122
|
maxOutput: 2048,
|
123
|
+
pricing: {
|
124
|
+
input: 0.5,
|
125
|
+
output: 1.5,
|
126
|
+
},
|
127
|
+
releasedAt: '2023-12-06',
|
89
128
|
tokens: 30_720 + 2048,
|
90
129
|
},
|
91
130
|
{
|
@@ -95,6 +134,11 @@ const Google: ModelProviderCard = {
|
|
95
134
|
functionCall: true,
|
96
135
|
id: 'gemini-1.0-pro-001',
|
97
136
|
maxOutput: 2048,
|
137
|
+
pricing: {
|
138
|
+
input: 0.5,
|
139
|
+
output: 1.5,
|
140
|
+
},
|
141
|
+
releasedAt: '2023-12-06',
|
98
142
|
tokens: 30_720 + 2048,
|
99
143
|
},
|
100
144
|
{
|
@@ -103,11 +147,17 @@ const Google: ModelProviderCard = {
|
|
103
147
|
displayName: 'Gemini 1.0 Pro 002 (Tuning)',
|
104
148
|
id: 'gemini-1.0-pro-002',
|
105
149
|
maxOutput: 2048,
|
150
|
+
pricing: {
|
151
|
+
input: 0.5,
|
152
|
+
output: 1.5,
|
153
|
+
},
|
154
|
+
releasedAt: '2023-12-06',
|
106
155
|
tokens: 30_720 + 2048,
|
107
156
|
},
|
108
157
|
],
|
109
158
|
checkModel: 'gemini-1.5-flash-latest',
|
110
159
|
id: 'google',
|
160
|
+
modelsUrl: 'https://ai.google.dev/gemini-api/docs/models/gemini',
|
111
161
|
name: 'Google',
|
112
162
|
proxyUrl: {
|
113
163
|
placeholder: 'https://generativelanguage.googleapis.com',
|
@@ -116,6 +166,7 @@ const Google: ModelProviderCard = {
|
|
116
166
|
speed: 2,
|
117
167
|
text: true,
|
118
168
|
},
|
169
|
+
url: 'https://ai.google.dev',
|
119
170
|
};
|
120
171
|
|
121
172
|
export default Google;
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import { ModelProviderCard } from '@/types/llm';
|
2
2
|
|
3
|
-
// ref https://console.groq.com/docs/models
|
4
3
|
// ref https://console.groq.com/docs/tool-use
|
5
4
|
const Groq: ModelProviderCard = {
|
6
5
|
chatModels: [
|
@@ -10,6 +9,11 @@ const Groq: ModelProviderCard = {
|
|
10
9
|
enabled: true,
|
11
10
|
functionCall: true,
|
12
11
|
id: 'llama-3.1-8b-instant',
|
12
|
+
maxOutput: 8192,
|
13
|
+
pricing: {
|
14
|
+
input: 0.05,
|
15
|
+
output: 0.08,
|
16
|
+
},
|
13
17
|
tokens: 131_072,
|
14
18
|
},
|
15
19
|
{
|
@@ -17,9 +21,14 @@ const Groq: ModelProviderCard = {
|
|
17
21
|
enabled: true,
|
18
22
|
functionCall: true,
|
19
23
|
id: 'llama-3.1-70b-versatile',
|
24
|
+
maxOutput: 8192,
|
25
|
+
pricing: {
|
26
|
+
input: 0.59,
|
27
|
+
output: 0.79,
|
28
|
+
},
|
20
29
|
tokens: 131_072,
|
21
30
|
},
|
22
|
-
/*
|
31
|
+
/*
|
23
32
|
// Offline due to overwhelming demand! Stay tuned for updates.
|
24
33
|
{
|
25
34
|
displayName: 'Llama 3.1 405B (Preview)',
|
@@ -29,17 +38,25 @@ const Groq: ModelProviderCard = {
|
|
29
38
|
},
|
30
39
|
*/
|
31
40
|
{
|
32
|
-
displayName: 'Llama 3 Groq 8B Tool Use
|
41
|
+
displayName: 'Llama 3 Groq 8B Tool Use Preview 8K',
|
33
42
|
enabled: true,
|
34
43
|
functionCall: true,
|
35
44
|
id: 'llama3-groq-8b-8192-tool-use-preview',
|
45
|
+
pricing: {
|
46
|
+
input: 0.19,
|
47
|
+
output: 0.19,
|
48
|
+
},
|
36
49
|
tokens: 8192,
|
37
50
|
},
|
38
51
|
{
|
39
|
-
displayName: 'Llama 3 Groq 70B Tool Use
|
52
|
+
displayName: 'Llama 3 Groq 70B Tool Use Preview 8K',
|
40
53
|
enabled: true,
|
41
54
|
functionCall: true,
|
42
55
|
id: 'llama3-groq-70b-8192-tool-use-preview',
|
56
|
+
pricing: {
|
57
|
+
input: 0.89,
|
58
|
+
output: 0.89,
|
59
|
+
},
|
43
60
|
tokens: 8192,
|
44
61
|
},
|
45
62
|
{
|
@@ -47,6 +64,10 @@ const Groq: ModelProviderCard = {
|
|
47
64
|
enabled: true,
|
48
65
|
functionCall: true,
|
49
66
|
id: 'llama3-8b-8192',
|
67
|
+
pricing: {
|
68
|
+
input: 0.05,
|
69
|
+
output: 0.08,
|
70
|
+
},
|
50
71
|
tokens: 8192,
|
51
72
|
},
|
52
73
|
{
|
@@ -54,26 +75,42 @@ const Groq: ModelProviderCard = {
|
|
54
75
|
enabled: true,
|
55
76
|
functionCall: true,
|
56
77
|
id: 'llama3-70b-8192',
|
78
|
+
pricing: {
|
79
|
+
input: 0.59,
|
80
|
+
output: 0.79,
|
81
|
+
},
|
57
82
|
tokens: 8192,
|
58
83
|
},
|
59
84
|
{
|
60
|
-
displayName: 'Gemma 2 9B',
|
85
|
+
displayName: 'Gemma 2 9B 8k',
|
61
86
|
enabled: true,
|
62
87
|
functionCall: true,
|
63
88
|
id: 'gemma2-9b-it',
|
89
|
+
pricing: {
|
90
|
+
input: 0.2,
|
91
|
+
output: 0.2,
|
92
|
+
},
|
64
93
|
tokens: 8192,
|
65
94
|
},
|
66
95
|
{
|
67
|
-
displayName: 'Gemma 7B',
|
96
|
+
displayName: 'Gemma 7B 8k Instruct',
|
68
97
|
functionCall: true,
|
69
98
|
id: 'gemma-7b-it',
|
99
|
+
pricing: {
|
100
|
+
input: 0.07,
|
101
|
+
output: 0.07,
|
102
|
+
},
|
70
103
|
tokens: 8192,
|
71
104
|
},
|
72
105
|
{
|
73
|
-
displayName: 'Mixtral 8x7B',
|
106
|
+
displayName: 'Mixtral 8x7B Instruct 32k',
|
74
107
|
enabled: true,
|
75
108
|
functionCall: true,
|
76
109
|
id: 'mixtral-8x7b-32768',
|
110
|
+
pricing: {
|
111
|
+
input: 0.24,
|
112
|
+
output: 0.24,
|
113
|
+
},
|
77
114
|
tokens: 32_768,
|
78
115
|
},
|
79
116
|
{
|
@@ -86,10 +123,12 @@ const Groq: ModelProviderCard = {
|
|
86
123
|
],
|
87
124
|
checkModel: 'gemma2-9b-it',
|
88
125
|
id: 'groq',
|
126
|
+
modelsUrl: 'https://console.groq.com/docs/models',
|
89
127
|
name: 'Groq',
|
90
128
|
proxyUrl: {
|
91
129
|
placeholder: 'https://api.groq.com/openai/v1',
|
92
130
|
},
|
131
|
+
url: 'https://groq.com',
|
93
132
|
};
|
94
133
|
|
95
134
|
export default Groq;
|
@@ -42,11 +42,13 @@ const Minimax: ModelProviderCard = {
|
|
42
42
|
],
|
43
43
|
checkModel: 'abab6.5s-chat',
|
44
44
|
id: 'minimax',
|
45
|
+
modelsUrl: 'https://platform.minimaxi.com/document/Models',
|
45
46
|
name: 'Minimax',
|
46
47
|
smoothing: {
|
47
48
|
speed: 2,
|
48
49
|
text: true,
|
49
50
|
},
|
51
|
+
url: 'https://www.minimaxi.com',
|
50
52
|
};
|
51
53
|
|
52
54
|
export default Minimax;
|