@lobehub/chat 1.15.14 → 1.15.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -0
- package/Dockerfile +1 -1
- package/Dockerfile.database +1 -1
- package/README.md +5 -3
- package/docs/self-hosting/server-database/repocloud.mdx +20 -4
- package/package.json +1 -1
- package/src/config/llm.ts +2 -0
- package/src/config/modelProviders/bedrock.ts +144 -45
- package/src/config/modelProviders/groq.ts +8 -1
- package/src/server/globalConfig/index.ts +11 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,56 @@
|
|
|
2
2
|
|
|
3
3
|
# Changelog
|
|
4
4
|
|
|
5
|
+
### [Version 1.15.16](https://github.com/lobehub/lobe-chat/compare/v1.15.15...v1.15.16)
|
|
6
|
+
|
|
7
|
+
<sup>Released on **2024-09-06**</sup>
|
|
8
|
+
|
|
9
|
+
#### 💄 Styles
|
|
10
|
+
|
|
11
|
+
- **misc**: Update Bedrock model list & add `AWS_BEDROCK_MODEL_LIST` support.
|
|
12
|
+
|
|
13
|
+
<br/>
|
|
14
|
+
|
|
15
|
+
<details>
|
|
16
|
+
<summary><kbd>Improvements and Fixes</kbd></summary>
|
|
17
|
+
|
|
18
|
+
#### Styles
|
|
19
|
+
|
|
20
|
+
- **misc**: Update Bedrock model list & add `AWS_BEDROCK_MODEL_LIST` support, closes [#3723](https://github.com/lobehub/lobe-chat/issues/3723) ([0aad972](https://github.com/lobehub/lobe-chat/commit/0aad972))
|
|
21
|
+
|
|
22
|
+
</details>
|
|
23
|
+
|
|
24
|
+
<div align="right">
|
|
25
|
+
|
|
26
|
+
[](#readme-top)
|
|
27
|
+
|
|
28
|
+
</div>
|
|
29
|
+
|
|
30
|
+
### [Version 1.15.15](https://github.com/lobehub/lobe-chat/compare/v1.15.14...v1.15.15)
|
|
31
|
+
|
|
32
|
+
<sup>Released on **2024-09-06**</sup>
|
|
33
|
+
|
|
34
|
+
#### 💄 Styles
|
|
35
|
+
|
|
36
|
+
- **misc**: Add `LLaVA 1.5 7B` model in Groq.
|
|
37
|
+
|
|
38
|
+
<br/>
|
|
39
|
+
|
|
40
|
+
<details>
|
|
41
|
+
<summary><kbd>Improvements and Fixes</kbd></summary>
|
|
42
|
+
|
|
43
|
+
#### Styles
|
|
44
|
+
|
|
45
|
+
- **misc**: Add `LLaVA 1.5 7B` model in Groq, closes [#3769](https://github.com/lobehub/lobe-chat/issues/3769) ([f78a0b1](https://github.com/lobehub/lobe-chat/commit/f78a0b1))
|
|
46
|
+
|
|
47
|
+
</details>
|
|
48
|
+
|
|
49
|
+
<div align="right">
|
|
50
|
+
|
|
51
|
+
[](#readme-top)
|
|
52
|
+
|
|
53
|
+
</div>
|
|
54
|
+
|
|
5
55
|
### [Version 1.15.14](https://github.com/lobehub/lobe-chat/compare/v1.15.13...v1.15.14)
|
|
6
56
|
|
|
7
57
|
<sup>Released on **2024-09-06**</sup>
|
package/Dockerfile
CHANGED
|
@@ -109,7 +109,7 @@ ENV \
|
|
|
109
109
|
# Anthropic
|
|
110
110
|
ANTHROPIC_API_KEY="" ANTHROPIC_PROXY_URL="" \
|
|
111
111
|
# Amazon Bedrock
|
|
112
|
-
AWS_ACCESS_KEY_ID="" AWS_SECRET_ACCESS_KEY="" AWS_REGION="" \
|
|
112
|
+
AWS_ACCESS_KEY_ID="" AWS_SECRET_ACCESS_KEY="" AWS_REGION="" AWS_BEDROCK_MODEL_LIST="" \
|
|
113
113
|
# Azure OpenAI
|
|
114
114
|
AZURE_API_KEY="" AZURE_API_VERSION="" AZURE_ENDPOINT="" AZURE_MODEL_LIST="" \
|
|
115
115
|
# Baichuan
|
package/Dockerfile.database
CHANGED
|
@@ -141,7 +141,7 @@ ENV \
|
|
|
141
141
|
# Anthropic
|
|
142
142
|
ANTHROPIC_API_KEY="" ANTHROPIC_PROXY_URL="" \
|
|
143
143
|
# Amazon Bedrock
|
|
144
|
-
AWS_ACCESS_KEY_ID="" AWS_SECRET_ACCESS_KEY="" AWS_REGION="" \
|
|
144
|
+
AWS_ACCESS_KEY_ID="" AWS_SECRET_ACCESS_KEY="" AWS_REGION="" AWS_BEDROCK_MODEL_LIST="" \
|
|
145
145
|
# Azure OpenAI
|
|
146
146
|
AZURE_API_KEY="" AZURE_API_VERSION="" AZURE_ENDPOINT="" AZURE_MODEL_LIST="" \
|
|
147
147
|
# Baichuan
|
package/README.md
CHANGED
|
@@ -462,9 +462,9 @@ If you want to deploy this service yourself on either Vercel or Zeabur, you can
|
|
|
462
462
|
|
|
463
463
|
<div align="center">
|
|
464
464
|
|
|
465
|
-
| Deploy with Vercel | Deploy with Zeabur | Deploy with Sealos |
|
|
466
|
-
| :-------------------------------------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
|
|
467
|
-
| [![][deploy-button-image]][deploy-link] | [![][deploy-on-zeabur-button-image]][deploy-on-zeabur-link] | [![][deploy-on-sealos-button-image]][deploy-on-sealos-link] |
|
|
465
|
+
| Deploy with Vercel | Deploy with Zeabur | Deploy with Sealos | Deploy with RepoCloud |
|
|
466
|
+
| :-------------------------------------: | :---------------------------------------------------------: | :---------------------------------------------------------: | :------------------------------------------------------------: |
|
|
467
|
+
| [![][deploy-button-image]][deploy-link] | [![][deploy-on-zeabur-button-image]][deploy-on-zeabur-link] | [![][deploy-on-sealos-button-image]][deploy-on-sealos-link] | [![][deploy-on-repocloud-button-image]][deploy-on-repocloud-link] |
|
|
468
468
|
|
|
469
469
|
</div>
|
|
470
470
|
|
|
@@ -715,6 +715,8 @@ This project is [Apache 2.0](./LICENSE) licensed.
|
|
|
715
715
|
[deploy-on-sealos-link]: https://cloud.sealos.io/?openapp=system-template%3FtemplateName%3Dlobe-chat
|
|
716
716
|
[deploy-on-zeabur-button-image]: https://zeabur.com/button.svg
|
|
717
717
|
[deploy-on-zeabur-link]: https://zeabur.com/templates/VZGGTI
|
|
718
|
+
[deploy-on-repocloud-button-image]: https://d16t0pc4846x52.cloudfront.net/deploylobe.svg
|
|
719
|
+
[deploy-on-repocloud-link]: https://repocloud.io/details/?app_id=248
|
|
718
720
|
[discord-link]: https://discord.gg/AYFPHvv2jT
|
|
719
721
|
[discord-shield]: https://img.shields.io/discord/1127171173982154893?color=5865F2&label=discord&labelColor=black&logo=discord&logoColor=white&style=flat-square
|
|
720
722
|
[discord-shield-badge]: https://img.shields.io/discord/1127171173982154893?color=5865F2&label=discord&labelColor=black&logo=discord&logoColor=white&style=for-the-badge
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
title: Deploy LobeChat with Database on RepoCloud
|
|
3
|
-
description:
|
|
4
|
-
Learn how to deploy LobeChat on RepoCloud with ease, including: database,
|
|
5
|
-
authentication and S3 storage service.
|
|
3
|
+
description: Learn how to deploy LobeChat on RepoCloud with ease, including: database, authentication and S3 storage service.
|
|
6
4
|
tags:
|
|
7
5
|
- Deploy LobeChat
|
|
8
6
|
- RepoCloud Deployment
|
|
@@ -12,5 +10,23 @@ tags:
|
|
|
12
10
|
|
|
13
11
|
# Deploying LobeChat Database Edition with RepoCloud
|
|
14
12
|
|
|
15
|
-
|
|
13
|
+
If you want to deploy LobeChat Database Edition on RepoCloud, you can follow the steps below:
|
|
16
14
|
|
|
15
|
+
## RepoCloud Deployment Process
|
|
16
|
+
|
|
17
|
+
<Steps>
|
|
18
|
+
### Prepare your OpenAI API Key
|
|
19
|
+
|
|
20
|
+
Go to [OpenAI API Key](https://platform.openai.com/account/api-keys) to get your OpenAI API Key.
|
|
21
|
+
|
|
22
|
+
### One-click to deploy
|
|
23
|
+
|
|
24
|
+
[](https://repocloud.io/details/?app_id=248)
|
|
25
|
+
|
|
26
|
+
### Once deployed, you can start using it
|
|
27
|
+
|
|
28
|
+
### Bind a custom domain (optional)
|
|
29
|
+
|
|
30
|
+
You can use the subdomain provided by RepoCloud, or choose to bind a custom domain. Currently, the domains provided by RepoCloud have not been contaminated, and most regions can connect directly.
|
|
31
|
+
|
|
32
|
+
</Steps>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lobehub/chat",
|
|
3
|
-
"version": "1.15.
|
|
3
|
+
"version": "1.15.16",
|
|
4
4
|
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"framework",
|
package/src/config/llm.ts
CHANGED
|
@@ -65,6 +65,7 @@ export const getLLMConfig = () => {
|
|
|
65
65
|
TOGETHERAI_MODEL_LIST: z.string().optional(),
|
|
66
66
|
|
|
67
67
|
ENABLED_AWS_BEDROCK: z.boolean(),
|
|
68
|
+
AWS_BEDROCK_MODEL_LIST: z.string().optional(),
|
|
68
69
|
AWS_REGION: z.string().optional(),
|
|
69
70
|
AWS_ACCESS_KEY_ID: z.string().optional(),
|
|
70
71
|
AWS_SECRET_ACCESS_KEY: z.string().optional(),
|
|
@@ -162,6 +163,7 @@ export const getLLMConfig = () => {
|
|
|
162
163
|
ZEROONE_MODEL_LIST: process.env.ZEROONE_MODEL_LIST,
|
|
163
164
|
|
|
164
165
|
ENABLED_AWS_BEDROCK: process.env.ENABLED_AWS_BEDROCK === '1',
|
|
166
|
+
AWS_BEDROCK_MODEL_LIST: process.env.AWS_BEDROCK_MODEL_LIST,
|
|
165
167
|
AWS_REGION: process.env.AWS_REGION,
|
|
166
168
|
AWS_ACCESS_KEY_ID: process.env.AWS_ACCESS_KEY_ID,
|
|
167
169
|
AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY,
|
|
@@ -2,50 +2,42 @@ import { ModelProviderCard } from '@/types/llm';
|
|
|
2
2
|
|
|
3
3
|
// ref https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
|
|
4
4
|
// ref https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
|
|
5
|
+
// ref https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/models
|
|
6
|
+
// ref https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/models
|
|
5
7
|
const Bedrock: ModelProviderCard = {
|
|
6
8
|
chatModels: [
|
|
9
|
+
/*
|
|
10
|
+
// TODO: Not support for now
|
|
7
11
|
{
|
|
12
|
+
description: 'Amazon Titan Text Lite is a light weight efficient model ideal for fine-tuning for English-language tasks, including like summarization and copywriting, where customers want a smaller, more cost-effective model that is also highly customizable.',
|
|
8
13
|
displayName: 'Titan Text G1 - Lite',
|
|
9
|
-
id: 'amazon.titan-text-lite-v1
|
|
14
|
+
id: 'amazon.titan-text-lite-v1',
|
|
10
15
|
tokens: 4000,
|
|
11
16
|
},
|
|
12
17
|
{
|
|
13
|
-
description:
|
|
14
|
-
'Amazon Titan Text G1 - Express v1,上下文长度可达 8000 个 token,适合广泛的用途。',
|
|
18
|
+
description: 'Amazon Titan Text Express has a context length of up to 8,000 tokens, making it well-suited for a wide range of advanced, general language tasks such as open-ended text generation and conversational chat, as well as support within Retrieval Augmented Generation (RAG). At launch, the model is optimized for English, with multilingual support for more than 100 additional languages available in preview.',
|
|
15
19
|
displayName: 'Titan Text G1 - Express',
|
|
16
|
-
id: 'amazon.titan-text-express-v1
|
|
20
|
+
id: 'amazon.titan-text-express-v1',
|
|
17
21
|
tokens: 8000,
|
|
18
22
|
},
|
|
19
23
|
{
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
description: 'Titan Text Premier is a powerful and advanced model within the Titan Text family, designed to deliver superior performance across a wide range of enterprise applications. With its cutting-edge capabilities, it offers enhanced accuracy and exceptional results, making it an excellent choice for organizations seeking top-notch text processing solutions.',
|
|
25
|
+
displayName: 'Titan Text G1 - Premier',
|
|
26
|
+
id: 'amazon.titan-text-premier-v1:0',
|
|
22
27
|
tokens: 32_000,
|
|
23
28
|
},
|
|
29
|
+
*/
|
|
24
30
|
{
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
id: 'ai21.j2-mid-v1',
|
|
28
|
-
tokens: 8192,
|
|
29
|
-
},
|
|
30
|
-
{
|
|
31
|
-
displayName: 'Jurassic-2 Ultra',
|
|
32
|
-
enabled: true,
|
|
33
|
-
id: 'ai21.j2-ultra-v1',
|
|
34
|
-
tokens: 8192,
|
|
35
|
-
},
|
|
36
|
-
{
|
|
37
|
-
description:
|
|
38
|
-
'Claude 3 Opus 是 Anthropic 最强大的人工智能模型,在处理高度复杂的任务方面具备顶尖性能。该模型能够以非凡的流畅性和类似人类的理解能力引导开放式的提示和未可见的场景。Claude 3 Opus 向我们展示生成式人工智能的美好前景。 Claude 3 Opus 可以处理图像和返回文本输出,并且提供 200K 上下文窗口。',
|
|
39
|
-
displayName: 'Claude 3 Opus',
|
|
31
|
+
description: 'Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.',
|
|
32
|
+
displayName: 'Claude 3.5 Sonnet',
|
|
40
33
|
enabled: true,
|
|
41
34
|
functionCall: true,
|
|
42
|
-
id: 'anthropic.claude-3-
|
|
35
|
+
id: 'anthropic.claude-3-5-sonnet-20240620-v1:0',
|
|
43
36
|
tokens: 200_000,
|
|
44
37
|
vision: true,
|
|
45
38
|
},
|
|
46
39
|
{
|
|
47
|
-
description:
|
|
48
|
-
'Anthropic 推出的 Claude 3 Sonnet 模型在智能和速度之间取得理想的平衡,尤其是在处理企业工作负载方面。该模型提供最大的效用,同时价格低于竞争产品,并且其经过精心设计,是大规模部署人工智能的可信赖、高耐久性骨干模型。 Claude 3 Sonnet 可以处理图像和返回文本输出,并且提供 200K 上下文窗口。',
|
|
40
|
+
description: 'Claude 3 Sonnet by Anthropic strikes the ideal balance between intelligence and speed—particularly for enterprise workloads. It offers maximum utility at a lower price than competitors, and is engineered to be the dependable, high-endurance workhorse for scaled AI deployments. Claude 3 Sonnet can process images and return text outputs, and features a 200K context window.',
|
|
49
41
|
displayName: 'Claude 3 Sonnet',
|
|
50
42
|
enabled: true,
|
|
51
43
|
functionCall: true,
|
|
@@ -54,18 +46,16 @@ const Bedrock: ModelProviderCard = {
|
|
|
54
46
|
vision: true,
|
|
55
47
|
},
|
|
56
48
|
{
|
|
57
|
-
description:
|
|
58
|
-
|
|
59
|
-
displayName: 'Claude 3.5 Sonnet',
|
|
49
|
+
description: 'Claude 3 Opus is Anthropic most powerful AI model, with state-of-the-art performance on highly complex tasks. It can navigate open-ended prompts and sight-unseen scenarios with remarkable fluency and human-like understanding. Claude 3 Opus shows us the frontier of what’s possible with generative AI. Claude 3 Opus can process images and return text outputs, and features a 200K context window.',
|
|
50
|
+
displayName: 'Claude 3 Opus',
|
|
60
51
|
enabled: true,
|
|
61
52
|
functionCall: true,
|
|
62
|
-
id: 'anthropic.claude-3-
|
|
53
|
+
id: 'anthropic.claude-3-opus-20240229-v1:0',
|
|
63
54
|
tokens: 200_000,
|
|
64
55
|
vision: true,
|
|
65
56
|
},
|
|
66
57
|
{
|
|
67
|
-
description:
|
|
68
|
-
'Claude 3 Haiku 是 Anthropic 最快速、最紧凑的模型,具有近乎即时的响应能力。该模型可以快速回答简单的查询和请求。客户将能够构建模仿人类交互的无缝人工智能体验。 Claude 3 Haiku 可以处理图像和返回文本输出,并且提供 200K 上下文窗口。',
|
|
58
|
+
description: 'Claude 3 Haiku is Anthropic fastest, most compact model for near-instant responsiveness. It answers simple queries and requests with speed. Customers will be able to build seamless AI experiences that mimic human interactions. Claude 3 Haiku can process images and return text outputs, and features a 200K context window.',
|
|
69
59
|
displayName: 'Claude 3 Haiku',
|
|
70
60
|
enabled: true,
|
|
71
61
|
functionCall: true,
|
|
@@ -74,48 +64,157 @@ const Bedrock: ModelProviderCard = {
|
|
|
74
64
|
vision: true,
|
|
75
65
|
},
|
|
76
66
|
{
|
|
77
|
-
description:
|
|
78
|
-
'Claude 2.1 v2.1,上下文大小等于 200k。Claude 2 的更新版本,采用双倍的上下文窗口,并在长文档和 RAG 上下文中提高可靠性、幻觉率和循证准确性。',
|
|
67
|
+
description: 'An update to Claude 2 that features double the context window, plus improvements across reliability, hallucination rates, and evidence-based accuracy in long document and RAG contexts.',
|
|
79
68
|
displayName: 'Claude 2.1',
|
|
80
69
|
id: 'anthropic.claude-v2:1',
|
|
81
70
|
tokens: 200_000,
|
|
82
71
|
},
|
|
83
72
|
{
|
|
84
|
-
description:
|
|
85
|
-
|
|
86
|
-
|
|
73
|
+
description: 'Anthropic highly capable model across a wide range of tasks from sophisticated dialogue and creative content generation to detailed instruction following.',
|
|
74
|
+
displayName: 'Claude 2.0',
|
|
75
|
+
id: 'anthropic.claude-v2',
|
|
76
|
+
tokens: 100_000,
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
description: 'A fast, affordable yet still very capable model, which can handle a range of tasks including casual dialogue, text analysis, summarization, and document question-answering.',
|
|
80
|
+
displayName: 'Claude Instant',
|
|
87
81
|
id: 'anthropic.claude-instant-v1',
|
|
88
82
|
tokens: 100_000,
|
|
89
83
|
},
|
|
90
84
|
{
|
|
91
|
-
description: '
|
|
92
|
-
displayName: '
|
|
85
|
+
description: 'An update to Meta Llama 3 8B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
|
|
86
|
+
displayName: 'Llama 3.1 8B Instruct',
|
|
93
87
|
enabled: true,
|
|
94
88
|
functionCall: true,
|
|
95
|
-
id: '
|
|
89
|
+
id: 'meta.llama3-1-8b-instruct-v1:0',
|
|
96
90
|
tokens: 128_000,
|
|
97
91
|
},
|
|
98
92
|
{
|
|
99
|
-
description: 'Llama 3.1 405B
|
|
93
|
+
description: 'An update to Meta Llama 3 70B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
|
|
94
|
+
displayName: 'Llama 3.1 70B Instruct',
|
|
95
|
+
enabled: true,
|
|
96
|
+
functionCall: true,
|
|
97
|
+
id: 'meta.llama3-1-70b-instruct-v1:0',
|
|
98
|
+
tokens: 128_000,
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
description: 'Meta Llama 3.1 405B Instruct is the largest and most powerful of the Llama 3.1 Instruct models that is a highly advanced model for conversational inference and reasoning, synthetic data generation, and a base to do specialized continual pre-training or fine-tuning on a specific domain. The Llama 3.1 offering of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction-tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Llama 3.1 is intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. The Llama 3.1 models also support the ability to leverage the outputs of its models to improve other models including synthetic data generation and distillation. Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.',
|
|
100
102
|
displayName: 'Llama 3.1 405B Instruct',
|
|
101
103
|
enabled: true,
|
|
104
|
+
functionCall: true,
|
|
102
105
|
id: 'meta.llama3-1-405b-instruct-v1:0',
|
|
103
106
|
tokens: 128_000,
|
|
104
107
|
},
|
|
105
108
|
{
|
|
106
|
-
description: 'Llama 3.
|
|
107
|
-
displayName: 'Llama 3
|
|
109
|
+
description: 'Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.',
|
|
110
|
+
displayName: 'Llama 3 8B Instruct',
|
|
111
|
+
id: 'meta.llama3-8b-instruct-v1:0',
|
|
112
|
+
tokens: 8000,
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
description: 'Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.',
|
|
116
|
+
displayName: 'Llama 3 70B Instruct',
|
|
117
|
+
id: 'meta.llama3-70b-instruct-v1:0',
|
|
118
|
+
tokens: 8000,
|
|
119
|
+
},
|
|
120
|
+
/*
|
|
121
|
+
// TODO: Not support for now
|
|
122
|
+
{
|
|
123
|
+
description: 'A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.',
|
|
124
|
+
displayName: 'Mistral 7B Instruct',
|
|
108
125
|
enabled: true,
|
|
109
|
-
id: '
|
|
126
|
+
id: 'mistral.mistral-7b-instruct-v0:2',
|
|
127
|
+
tokens: 32_000,
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
description: 'A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.',
|
|
131
|
+
displayName: 'Mixtral 8X7B Instruct',
|
|
132
|
+
enabled: true,
|
|
133
|
+
id: 'mistral.mixtral-8x7b-instruct-v0:1',
|
|
134
|
+
tokens: 32_000,
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
description: 'Mistral Small is perfectly suited for straightforward tasks that can be performed in bulk, such as classification, customer support, or text generation. It provides outstanding performance at a cost-effective price point.',
|
|
138
|
+
displayName: 'Mistral Small',
|
|
139
|
+
functionCall: true,
|
|
140
|
+
id: 'mistral.mistral-small-2402-v1:0',
|
|
141
|
+
tokens: 32_000,
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
description: 'Mistral Large 2407 is an advanced Large Language Model (LLM) that supports dozens of languages and is trained on 80+ coding languages. It has best-in-class agentic capabilities with native function calling JSON outputting and reasoning capabilities.',
|
|
145
|
+
displayName: 'Mistral Large 2 (24.07)',
|
|
146
|
+
enabled: true,
|
|
147
|
+
functionCall: true,
|
|
148
|
+
id: 'mistral.mistral-large-2407-v1:0',
|
|
110
149
|
tokens: 128_000,
|
|
111
150
|
},
|
|
112
151
|
{
|
|
113
|
-
description: '
|
|
114
|
-
displayName: '
|
|
152
|
+
description: 'The most advanced Mistral AI Large Language model capable of handling any language task including complex multilingual reasoning, text understanding, transformation, and code generation.',
|
|
153
|
+
displayName: 'Mistral Large',
|
|
115
154
|
enabled: true,
|
|
116
|
-
|
|
155
|
+
functionCall: true,
|
|
156
|
+
id: 'mistral.mistral-large-2402-v1:0',
|
|
157
|
+
tokens: 32_000,
|
|
158
|
+
},
|
|
159
|
+
*/
|
|
160
|
+
/*
|
|
161
|
+
// TODO: Not support for now
|
|
162
|
+
{
|
|
163
|
+
description: 'Command R+ is a highly performant generative language model optimized for large scale production workloads.',
|
|
164
|
+
displayName: 'Command R+',
|
|
165
|
+
enabled: true,
|
|
166
|
+
functionCall: true,
|
|
167
|
+
id: 'cohere.command-r-plus-v1:0',
|
|
117
168
|
tokens: 128_000,
|
|
118
169
|
},
|
|
170
|
+
{
|
|
171
|
+
description: 'Command R is a generative language model optimized for long-context tasks and large scale production workloads.',
|
|
172
|
+
displayName: 'Command R',
|
|
173
|
+
enabled: true,
|
|
174
|
+
functionCall: true,
|
|
175
|
+
id: 'cohere.command-r-v1:0',
|
|
176
|
+
tokens: 128_000,
|
|
177
|
+
},
|
|
178
|
+
*/
|
|
179
|
+
/*
|
|
180
|
+
// Cohere Command (Text) and AI21 Labs Jurassic-2 (Text) don't support chat with the Converse API
|
|
181
|
+
{
|
|
182
|
+
description: 'Command is Cohere flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications.',
|
|
183
|
+
displayName: 'Command',
|
|
184
|
+
id: 'cohere.command-text-v14',
|
|
185
|
+
tokens: 4000,
|
|
186
|
+
},
|
|
187
|
+
{
|
|
188
|
+
description: 'Cohere Command-Light is a generative model that responds well with instruction-like prompts. This model provides customers with an unbeatable balance of quality, cost-effectiveness, and low-latency inference.',
|
|
189
|
+
displayName: 'Command Light',
|
|
190
|
+
id: 'cohere.command-light-text-v14',
|
|
191
|
+
tokens: 4000,
|
|
192
|
+
},
|
|
193
|
+
*/
|
|
194
|
+
/*
|
|
195
|
+
// TODO: Not support for now
|
|
196
|
+
{
|
|
197
|
+
description: 'The latest Foundation Model from AI21 Labs, Jamba-Instruct offers an impressive 256K context window and delivers the best value per price on core text generation, summarization, and question answering tasks for the enterprise.',
|
|
198
|
+
displayName: 'Jamba-Instruct',
|
|
199
|
+
id: 'ai21.jamba-instruct-v1:0',
|
|
200
|
+
tokens: 256_000,
|
|
201
|
+
},
|
|
202
|
+
*/
|
|
203
|
+
/*
|
|
204
|
+
// Cohere Command (Text) and AI21 Labs Jurassic-2 (Text) don't support chat with the Converse API
|
|
205
|
+
{
|
|
206
|
+
description: 'Jurassic-2 Mid is less powerful than Ultra, yet carefully designed to strike the right balance between exceptional quality and affordability. Jurassic-2 Mid can be applied to any language comprehension or generation task including question answering, summarization, long-form copy generation, advanced information extraction and many others.',
|
|
207
|
+
displayName: 'Jurassic-2 Mid',
|
|
208
|
+
id: 'ai21.j2-mid-v1',
|
|
209
|
+
tokens: 8191,
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
description: 'Jurassic-2 Ultra is AI21’s most powerful model for complex tasks that require advanced text generation and comprehension. Popular use cases include question answering, summarization, long-form copy generation, advanced information extraction, and more.',
|
|
213
|
+
displayName: 'Jurassic-2 Ultra',
|
|
214
|
+
id: 'ai21.j2-ultra-v1',
|
|
215
|
+
tokens: 8191,
|
|
216
|
+
},
|
|
217
|
+
*/
|
|
119
218
|
],
|
|
120
219
|
checkModel: 'anthropic.claude-instant-v1',
|
|
121
220
|
id: 'bedrock',
|
|
@@ -57,7 +57,7 @@ const Groq: ModelProviderCard = {
|
|
|
57
57
|
tokens: 8192,
|
|
58
58
|
},
|
|
59
59
|
{
|
|
60
|
-
displayName: '
|
|
60
|
+
displayName: 'Gemma 2 9B',
|
|
61
61
|
enabled: true,
|
|
62
62
|
functionCall: true,
|
|
63
63
|
id: 'gemma2-9b-it',
|
|
@@ -76,6 +76,13 @@ const Groq: ModelProviderCard = {
|
|
|
76
76
|
id: 'mixtral-8x7b-32768',
|
|
77
77
|
tokens: 32_768,
|
|
78
78
|
},
|
|
79
|
+
{
|
|
80
|
+
displayName: 'LLaVA 1.5 7B',
|
|
81
|
+
enabled: true,
|
|
82
|
+
id: 'llava-v1.5-7b-4096-preview',
|
|
83
|
+
tokens: 4096,
|
|
84
|
+
vision: true,
|
|
85
|
+
},
|
|
79
86
|
],
|
|
80
87
|
checkModel: 'gemma2-9b-it',
|
|
81
88
|
id: 'groq',
|
|
@@ -4,6 +4,7 @@ import { fileEnv } from '@/config/file';
|
|
|
4
4
|
import { langfuseEnv } from '@/config/langfuse';
|
|
5
5
|
import { getLLMConfig } from '@/config/llm';
|
|
6
6
|
import {
|
|
7
|
+
BedrockProviderCard,
|
|
7
8
|
GroqProviderCard,
|
|
8
9
|
NovitaProviderCard,
|
|
9
10
|
OllamaProviderCard,
|
|
@@ -34,6 +35,8 @@ export const getServerGlobalConfig = () => {
|
|
|
34
35
|
ZHIPU_MODEL_LIST,
|
|
35
36
|
|
|
36
37
|
ENABLED_AWS_BEDROCK,
|
|
38
|
+
AWS_BEDROCK_MODEL_LIST,
|
|
39
|
+
|
|
37
40
|
ENABLED_GOOGLE,
|
|
38
41
|
|
|
39
42
|
ENABLED_GROQ,
|
|
@@ -100,7 +103,14 @@ export const getServerGlobalConfig = () => {
|
|
|
100
103
|
}),
|
|
101
104
|
},
|
|
102
105
|
baichuan: { enabled: ENABLED_BAICHUAN },
|
|
103
|
-
bedrock: {
|
|
106
|
+
bedrock: {
|
|
107
|
+
enabled: ENABLED_AWS_BEDROCK,
|
|
108
|
+
enabledModels: extractEnabledModels(AWS_BEDROCK_MODEL_LIST),
|
|
109
|
+
serverModelCards: transformToChatModelCards({
|
|
110
|
+
defaultChatModels: BedrockProviderCard.chatModels,
|
|
111
|
+
modelString: AWS_BEDROCK_MODEL_LIST,
|
|
112
|
+
}),
|
|
113
|
+
},
|
|
104
114
|
deepseek: { enabled: ENABLED_DEEPSEEK },
|
|
105
115
|
google: { enabled: ENABLED_GOOGLE },
|
|
106
116
|
groq: {
|