npm - @lobehub/chat - Versions diffs - 1.55.2 → 1.55.3 - Mend

@lobehub/chat 1.55.2 → 1.55.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +25 -0
package/changelog/v1.json +9 -0
package/docs/self-hosting/server-database/docker-compose.mdx +1 -3
package/package.json +1 -1
package/src/config/aiModels/qwen.ts +18 -18
package/src/config/modelProviders/qwen.ts +84 -6

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,31 @@
 # Changelog
+### [Version 1.55.3](https://github.com/lobehub/lobe-chat/compare/v1.55.2...v1.55.3)
+<sup>Released on **2025-02-15**</sup>
+#### 💄 Styles
+- **misc**: Add deepseek r1 distill models for qwen series.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### Styles
+- **misc**: Add deepseek r1 distill models for qwen series, closes [#5850](https://github.com/lobehub/lobe-chat/issues/5850) ([4a96a05](https://github.com/lobehub/lobe-chat/commit/4a96a05))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
 ### [Version 1.55.2](https://github.com/lobehub/lobe-chat/compare/v1.55.1...v1.55.2)
 <sup>Released on **2025-02-15**</sup>

package/changelog/v1.json CHANGED Viewed

@@ -1,4 +1,13 @@
 [
+  {
+    "children": {
+      "improvements": [
+        "Add deepseek r1 distill models for qwen series."
+      ]
+    },
+    "date": "2025-02-15",
+    "version": "1.55.3"
+  },
   {
     "children": {
       "fixes": [

package/docs/self-hosting/server-database/docker-compose.mdx CHANGED Viewed

@@ -315,8 +315,7 @@ The script supports the following deployment modes; please choose the appropriat
 ## Custom Deployment
-This section mainly introduces the configurations that need to be modified to customize the deployment of the LobeChat service in different network environments.
-Before starting, you can download the [Docker Compose configuration file](https://raw.githubusercontent.com/lobehub/lobe-chat/HEAD/docker-compose/local/docker-compose.yml) and the [environment variable configuration file](https://raw.githubusercontent.com/lobehub/lobe-chat/HEAD/docker-compose/local/.env.en_US.example).
+This section mainly introduces the configurations that need to be modified to customize the deployment of the LobeChat service in different network environments. Before starting, you can download the [Docker Compose configuration file](https://raw.githubusercontent.com/lobehub/lobe-chat/HEAD/docker-compose/local/docker-compose.yml) and the [environment variable configuration file](https://raw.githubusercontent.com/lobehub/lobe-chat/refs/heads/main/docker-compose/local/.env.example).
 ```sh
 curl -O https://raw.githubusercontent.com/lobehub/lobe-chat/HEAD/docker-compose/local/docker-compose.yml
@@ -696,7 +695,6 @@ MINIO_ROOT_PASSWORD=Crj1570768
 MINIO_LOBE_BUCKET=lobe
 S3_ACCESS_KEY_ID=dB6Uq9CYZPdWSZouPyEd
 S3_SECRET_ACCESS_KEY=aPBW8CVULkh8bw1GatlT0GjLihcXHLNwRml4pieS
 ```
 - `docker-compose.yml`

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/chat",
-  "version": "1.55.2",
+  "version": "1.55.3",
   "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",

package/src/config/aiModels/qwen.ts CHANGED Viewed

@@ -376,7 +376,7 @@ const qwenChatModels: AIChatModelCard[] = [
       vision: true,
     },
     contextWindowTokens: 131_072,
-    description:
+    description:
       '指令跟随、数学、解题、代码整体提升，万物识别能力提升，支持多样格式直接精准定位视觉元素，支持对长视频文件（最长10分钟）进行理解和秒级别的事件时刻定位，能理解时间先后和快慢，基于解析和定位能力支持操控OS或Mobile的Agent，关键信息抽取能力和Json格式输出能力强，此版本为72B版本，本系列能力最强的版本。',
     displayName: 'Qwen2.5 VL 72B',
     id: 'qwen2.5-vl-72b-instruct',
@@ -394,7 +394,7 @@ const qwenChatModels: AIChatModelCard[] = [
       vision: true,
     },
     contextWindowTokens: 131_072,
-    description:
+    description:
       '指令跟随、数学、解题、代码整体提升，万物识别能力提升，支持多样格式直接精准定位视觉元素，支持对长视频文件（最长10分钟）进行理解和秒级别的事件时刻定位，能理解时间先后和快慢，基于解析和定位能力支持操控OS或Mobile的Agent，关键信息抽取能力和Json格式输出能力强，此版本为72B版本，本系列能力最强的版本。',
     displayName: 'Qwen2.5 VL 7B',
     id: 'qwen2.5-vl-7b-instruct',
@@ -412,8 +412,8 @@ const qwenChatModels: AIChatModelCard[] = [
       reasoning: true,
     },
     contextWindowTokens: 131_072,
-    description:
-      'DeepSeek-R1 在后训练阶段大规模使用了强化学习技术，在仅有极少标注数据的情况下，极大提升了模型推理能力，尤其在数学、代码、自然语言推理等任务上。',
+    description:
+      'DeepSeek-R1 在后训练阶段大规模使用了强化学习技术，在仅有极少标注数据的情况下，极大提升了模型推理能力。在数学、代码、自然语言推理等任务上，性能较高，能力较强。',
     displayName: 'DeepSeek R1',
     enabled: true,
     id: 'deepseek-r1',
@@ -431,7 +431,7 @@ const qwenChatModels: AIChatModelCard[] = [
       functionCall: true,
     },
     contextWindowTokens: 131_072,
-    description:
+    description:
       'DeepSeek-V3 为自研 MoE 模型，671B 参数，激活 37B，在 14.8T token 上进行了预训练，在长文本、代码、数学、百科、中文能力上表现优秀。',
     displayName: 'DeepSeek V3',
     enabled: true,
@@ -450,8 +450,8 @@ const qwenChatModels: AIChatModelCard[] = [
       reasoning: true,
     },
     contextWindowTokens: 131_072,
-    description:
-      'DeepSeek-R1-Distill 系列模型通过知识蒸馏技术，将 DeepSeek-R1 生成的样本对 Qwen、Llama 等开源模型进行微调后得到。',
+    description:
+      'DeepSeek-R1-Distill-Qwen-1.5B 是一个基于 Qwen2.5-Math-1.5B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。',
     displayName: 'DeepSeek R1 Distill Qwen 1.5B',
     id: 'deepseek-r1-distill-qwen-1.5b',
     maxOutput: 8192,
@@ -467,7 +467,7 @@ const qwenChatModels: AIChatModelCard[] = [
       reasoning: true
     },
     contextWindowTokens: 131_072,
-    description: "DeepSeek-R1-Distill 系列模型通过知识蒸馏技术，将 DeepSeek-R1 生成的样本对 Qwen、Llama 等开源模型进行微调后得到。",
+    description: "DeepSeek-R1-Distill-Qwen-7B 是一个基于 Qwen2.5-Math-7B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。",
     displayName: "DeepSeek R1 Distill Qwen 7B",
     id: "deepseek-r1-distill-qwen-7b",
     maxOutput: 8192,
@@ -483,9 +483,9 @@ const qwenChatModels: AIChatModelCard[] = [
       reasoning: true
     },
     contextWindowTokens: 131_072,
-    description: "DeepSeek-R1-Distill 系列模型通过知识蒸馏技术，将 DeepSeek-R1 生成的样本对 Qwen、Llama 等开源模型进行微调后得到。",
-    displayName: "DeepSeek R1 Distill Llama 8B",
-    id: "deepseek-r1-distill-llama-8b",
+    description: "DeepSeek-R1-Distill-Qwen-14B 是一个基于 Qwen2.5-14B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。",
+    displayName: "DeepSeek R1 Distill Qwen 14B",
+    id: "deepseek-r1-distill-qwen-14b",
     maxOutput: 8192,
     pricing: {
       currency: "CNY",
@@ -499,9 +499,9 @@ const qwenChatModels: AIChatModelCard[] = [
       reasoning: true
     },
     contextWindowTokens: 131_072,
-    description: "DeepSeek-R1-Distill 系列模型通过知识蒸馏技术，将 DeepSeek-R1 生成的样本对 Qwen、Llama 等开源模型进行微调后得到。",
-    displayName: "DeepSeek R1 Distill Qwen 14B",
-    id: "deepseek-r1-distill-qwen-14b",
+    description: "DeepSeek-R1-Distill-Qwen-32B 是一个基于 Qwen2.5-32B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。",
+    displayName: "DeepSeek R1 Distill Qwen 32B",
+    id: "deepseek-r1-distill-qwen-32b",
     maxOutput: 8192,
     pricing: {
       currency: "CNY",
@@ -515,9 +515,9 @@ const qwenChatModels: AIChatModelCard[] = [
       reasoning: true
     },
     contextWindowTokens: 131_072,
-    description: "DeepSeek-R1-Distill 系列模型通过知识蒸馏技术，将 DeepSeek-R1 生成的样本对 Qwen、Llama 等开源模型进行微调后得到。",
-    displayName: "DeepSeek R1 Distill Qwen 32B",
-    id: "deepseek-r1-distill-qwen-32b",
+    description: "DeepSeek-R1-Distill-Llama-8B 是一个基于 Llama-3.1-8B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。",
+    displayName: "DeepSeek R1 Distill Llama 8B",
+    id: "deepseek-r1-distill-llama-8b",
     maxOutput: 8192,
     pricing: {
       currency: "CNY",
@@ -531,7 +531,7 @@ const qwenChatModels: AIChatModelCard[] = [
       reasoning: true
     },
     contextWindowTokens: 131_072,
-    description: "DeepSeek-R1-Distill 系列模型通过知识蒸馏技术，将 DeepSeek-R1 生成的样本对 Qwen、Llama 等开源模型进行微调后得到。",
+    description: "DeepSeek-R1-Distill-Llama-70B 是一个基于 Llama-3.3-70B-Instruct 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。",
     displayName: "DeepSeek R1 Distill Llama 70B",
     id: "deepseek-r1-distill-llama-70b",
     maxOutput: 8192,

package/src/config/modelProviders/qwen.ts CHANGED Viewed

@@ -294,7 +294,7 @@ const Qwen: ModelProviderCard = {
     },
     {
       contextWindowTokens: 128_000,
-      description:
+      description:
         '指令跟随、数学、解题、代码整体提升，万物识别能力提升，支持多样格式直接精准定位视觉元素，支持对长视频文件（最长10分钟）进行理解和秒级别的事件时刻定位，能理解时间先后和快慢，基于解析和定位能力支持操控OS或Mobile的Agent，关键信息抽取能力和Json格式输出能力强，此版本为72B版本，本系列能力最强的版本。',
       displayName: 'Qwen2.5 VL 72B',
       id: 'qwen2.5-vl-72b-instruct',
@@ -307,9 +307,9 @@ const Qwen: ModelProviderCard = {
       vision: true,
     },
     {
-      contextWindowTokens: 65_536,
-      description:
-        'DeepSeek-R1 在后训练阶段大规模使用了强化学习技术，在仅有极少标注数据的情况下，极大提升了模型推理能力，尤其在数学、代码、自然语言推理等任务上。',
+      contextWindowTokens: 131_072,
+      description:
+        'DeepSeek-R1 在后训练阶段大规模使用了强化学习技术，在仅有极少标注数据的情况下，极大提升了模型推理能力。在数学、代码、自然语言推理等任务上，性能较高，能力较强。',
       displayName: 'DeepSeek R1',
       id: 'deepseek-r1',
       pricing: {
@@ -320,8 +320,8 @@ const Qwen: ModelProviderCard = {
       releasedAt: '2025-01-27',
     },
     {
-      contextWindowTokens: 65_536,
-      description:
+      contextWindowTokens: 131_072,
+      description:
         'DeepSeek-V3 为自研 MoE 模型，671B 参数，激活 37B，在 14.8T token 上进行了预训练，在长文本、代码、数学、百科、中文能力上表现优秀。',
       displayName: 'DeepSeek V3',
       id: 'deepseek-v3',
@@ -332,6 +332,84 @@ const Qwen: ModelProviderCard = {
       },
       releasedAt: '2025-01-27',
     },
+    {
+      contextWindowTokens: 131_072,
+      description:
+        'DeepSeek-R1-Distill-Qwen-1.5B 是一个基于 Qwen2.5-Math-1.5B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。',
+      displayName: 'DeepSeek R1 Distill Qwen 1.5B',
+      id: 'deepseek-r1-distill-qwen-1.5b',
+      pricing: {
+        currency: 'CNY',
+        input: 0,
+        output: 0,
+      },
+      releasedAt: '2025-02-05',
+    },
+    {
+      contextWindowTokens: 131_072,
+      description:
+        'DeepSeek-R1-Distill-Qwen-7B 是一个基于 Qwen2.5-Math-7B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。',
+      displayName: 'DeepSeek R1 Distill Qwen 7B',
+      id: 'deepseek-r1-distill-qwen-7b',
+      pricing: {
+        currency: 'CNY',
+        input: 0,
+        output: 0,
+      },
+      releasedAt: '2025-02-05',
+    },
+    {
+      contextWindowTokens: 131_072,
+      description:
+        'DeepSeek-R1-Distill-Qwen-14B 是一个基于 Qwen2.5-14B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。',
+      displayName: 'DeepSeek R1 Distill Qwen 14B',
+      id: 'deepseek-r1-distill-qwen-14b',
+      pricing: {
+        currency: 'CNY',
+        input: 0,
+        output: 0,
+      },
+      releasedAt: '2025-02-05',
+    },
+    {
+      contextWindowTokens: 131_072,
+      description:
+        'DeepSeek-R1-Distill-Qwen-32B 是一个基于 Qwen2.5-32B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。',
+      displayName: 'DeepSeek R1 Distill Qwen 32B',
+      id: 'deepseek-r1-distill-qwen-32b',
+      pricing: {
+        currency: 'CNY',
+        input: 0,
+        output: 0,
+      },
+      releasedAt: '2025-02-05',
+    },
+    {
+      contextWindowTokens: 131_072,
+      description:
+        'DeepSeek-R1-Distill-Llama-8B 是一个基于 Llama-3.1-8B 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。',
+      displayName: 'DeepSeek R1 Distill Llama 8B',
+      id: 'deepseek-r1-distill-llama-8b',
+      pricing: {
+        currency: 'CNY',
+        input: 0,
+        output: 0,
+      },
+      releasedAt: '2025-02-05',
+    },
+    {
+      contextWindowTokens: 131_072,
+      description:
+        'DeepSeek-R1-Distill-Llama-70B 是一个基于 Llama-3.3-70B-Instruct 的蒸馏大型语言模型，使用了 DeepSeek R1 的输出。',
+      displayName: 'DeepSeek R1 Distill Llama 70B',
+      id: 'deepseek-r1-distill-llama-70b',
+      pricing: {
+        currency: 'CNY',
+        input: 0,
+        output: 0,
+      },
+      releasedAt: '2025-02-05',
+    },
   ],
   checkModel: 'qwen-turbo-latest',
   description: