npm - @lobehub/chat - Versions diffs - 1.50.3 → 1.50.5 - Mend

@lobehub/chat 1.50.3 → 1.50.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/.env.example +1 -0
package/CHANGELOG.md +51 -0
package/README.ja-JP.md +8 -8
package/README.md +8 -8
package/README.zh-CN.md +8 -8
package/changelog/v1.json +18 -0
package/docs/changelog/2025-01-22-new-ai-provider.mdx +2 -2
package/docs/changelog/2025-02-02-deepseek-r1.mdx +33 -0
package/docs/changelog/2025-02-02-deepseek-r1.zh-CN.mdx +29 -0
package/docs/changelog/index.json +6 -0
package/docs/self-hosting/environment-variables/model-provider.mdx +7 -0
package/docs/self-hosting/environment-variables/model-provider.zh-CN.mdx +7 -0
package/package.json +1 -1
package/src/config/aiModels/github.ts +30 -2
package/src/config/aiModels/qwen.ts +139 -10
package/src/config/modelProviders/github.ts +27 -3
package/src/config/modelProviders/qwen.ts +90 -12
package/src/database/repositories/dataImporter/index.ts +3 -1
package/src/libs/langchain/loaders/pdf/index.ts +1 -1
package/src/server/routers/async/file.ts +7 -7
package/src/utils/sanitizeUTF8.test.ts +23 -0
package/src/utils/sanitizeUTF8.ts +14 -0

package/.env.example CHANGED Viewed

@@ -106,6 +106,7 @@ OPENAI_API_KEY=sk-xxxxxxxxx
 ### DeepSeek AI  ####
+# DEEPSEEK_PROXY_URL=https://api.deepseek.com/v1
 # DEEPSEEK_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 ### Qwen AI  ####

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,57 @@
 # Changelog
+### [Version 1.50.5](https://github.com/lobehub/lobe-chat/compare/v1.50.4...v1.50.5)
+<sup>Released on **2025-02-04**</sup>
+#### 💄 Styles
+- **misc**: Add/Update Aliyun Cloud Models, update GitHub Models.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### Styles
+- **misc**: Add/Update Aliyun Cloud Models, closes [#5613](https://github.com/lobehub/lobe-chat/issues/5613) ([95cd822](https://github.com/lobehub/lobe-chat/commit/95cd822))
+- **misc**: Update GitHub Models, closes [#5683](https://github.com/lobehub/lobe-chat/issues/5683) ([ed4e048](https://github.com/lobehub/lobe-chat/commit/ed4e048))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
+### [Version 1.50.4](https://github.com/lobehub/lobe-chat/compare/v1.50.3...v1.50.4)
+<sup>Released on **2025-02-04**</sup>
+#### 🐛 Bug Fixes
+- **misc**: Fix invalid utf8 character.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### What's fixed
+- **misc**: Fix invalid utf8 character, closes [#5732](https://github.com/lobehub/lobe-chat/issues/5732) ([2905cb5](https://github.com/lobehub/lobe-chat/commit/2905cb5))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
 ### [Version 1.50.3](https://github.com/lobehub/lobe-chat/compare/v1.50.2...v1.50.3)
 <sup>Released on **2025-02-04**</sup>

package/README.ja-JP.md CHANGED Viewed

@@ -302,14 +302,14 @@ LobeChat エージェントマーケットプレイスでは、クリエイタ
 <!-- AGENT LIST -->
-| 最近追加                                                                                                                                                                     | 説明                                                                                                                              |
-| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
-| [SUNO 曲作支援ツール](https://lobechat.com/discover/assistant/suno-lyrics-assistant)<br/><sup>By **[sqkkyzx](https://github.com/sqkkyzx)** on **2025-01-26**</sup>           | ユーザーのニーズに基づいて SUNO の曲作成パラメータを生成します<br/>`歌詞作成` `音楽スタイル` `編曲` `パラメータ設定`              |
-| [偉大なるビッグス・ディッカス](https://lobechat.com/discover/assistant/all-knowing)<br/><sup>By **[CGitwater](https://github.com/CGitwater)** on **2025-01-24**</sup>        | 全能の知識の神<br/>`ビッグス` `ディッカス`                                                                                        |
-| [PPT 制作達人](https://lobechat.com/discover/assistant/ppt-production-expert)<br/><sup>By **[patricleehua](https://github.com/patricleehua)** on **2025-01-24**</sup>        | 高品質な PPT の迅速な制作と最適化に優れています<br/>`ppt制作` `デザイン` `コンサルティング` `コンテンツ最適化` `ユーザーサポート` |
-| [OCR ドキュメント転写アシスタント](https://lobechat.com/discover/assistant/ocr-markdown)<br/><sup>By **[Liangpi000](https://github.com/Liangpi000)** on **2025-01-24**</sup> | 文書内容の転写と markdown フォーマットに優れています<br/>`文書生成` `markdown` `フォーマット` `転写` `タスクガイド`               |
-> 📊 Total agents: [<kbd>**471**</kbd> ](https://lobechat.com/discover/assistants)
+| 最近追加                                                                                                                                                     | 説明                                                                                                                                    |
+| ------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------- |
+| [鋭い評論家](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup>             | 鋭い評論と深い問題分析が得意<br/>`評論` `社会的見解` `鋭い分析`                                                                         |
+| [Python の天才](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | 高度な Python コーダー<br/>`コード` `python`                                                                                            |
+| [SAT マスター](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup>             | 1300 点以上のスコアを目指すデジタル SAT コーチングの専門家<br/>`sat` `適性試験`                                                         |
+| [宇宙の啓示者](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup>        | 時空を超えた知恵の神託、生命の本質を洞察する<br/>`キャラクターデザイン` `aiキャラクター` `メタバース` `ロールプレイング` `知恵システム` |
+> 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
  <!-- AGENT LIST -->

package/README.md CHANGED Viewed

@@ -319,14 +319,14 @@ Our marketplace is not just a showcase platform but also a collaborative space.
 <!-- AGENT LIST -->
-| Recent Submits                                                                                                                                                                   | Description                                                                                                                                             |
-| -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [SUNO Song Creation Assistant](https://lobechat.com/discover/assistant/suno-lyrics-assistant)<br/><sup>By **[sqkkyzx](https://github.com/sqkkyzx)** on **2025-01-26**</sup>      | Can generate SUNO song creation parameters based on user needs<br/>`lyric-creation` `music-style` `arrangement` `parameter-settings`                    |
-| [The Great Biggus Dickus](https://lobechat.com/discover/assistant/all-knowing)<br/><sup>By **[CGitwater](https://github.com/CGitwater)** on **2025-01-24**</sup>                 | The almighty powerful god of klnowledge<br/>`biggus` `diccus`                                                                                           |
-| [PPT Production Expert](https://lobechat.com/discover/assistant/ppt-production-expert)<br/><sup>By **[patricleehua](https://github.com/patricleehua)** on **2025-01-24**</sup>   | Skilled in the rapid production and optimization of high-quality PPTs<br/>`ppt-production` `design` `consulting` `content-optimization` `user-support`  |
-| [OCR Document Transcription Assistant](https://lobechat.com/discover/assistant/ocr-markdown)<br/><sup>By **[Liangpi000](https://github.com/Liangpi000)** on **2025-01-24**</sup> | Specializes in document content transcription and markdown formatting<br/>`document-generation` `markdown` `formatting` `transcription` `task-guidance` |
-> 📊 Total agents: [<kbd>**471**</kbd> ](https://lobechat.com/discover/assistants)
+| Recent Submits                                                                                                                                               | Description                                                                                                                                           |
+| ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [Sharp Commentator](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup>      | Specializes in sharp commentary and in-depth analysis of issues<br/>`commentary` `social-perspectives` `sharp-analysis`                               |
+| [Python Genius](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | An advanced python coder<br/>`code` `python`                                                                                                          |
+| [SAT master](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup>               | Expert in Digital SAT coaching for 1300+ scores<br/>`sat` `aptitude-test`                                                                             |
+| [Cosmic Oracle](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup>       | Wisdom from across time and space, insight into the essence of life<br/>`character-design` `ai-characters` `metaverse` `role-playing` `wisdom-system` |
+> 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
  <!-- AGENT LIST -->

package/README.zh-CN.md CHANGED Viewed

@@ -308,14 +308,14 @@ LobeChat 的插件生态系统是其核心功能的重要扩展，它极大地
 <!-- AGENT LIST -->
-| 最近新增                                                                                                                                                              | 描述                                                                                  |
-| --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
-| [SUNO 歌曲创作助手](https://lobechat.com/discover/assistant/suno-lyrics-assistant)<br/><sup>By **[sqkkyzx](https://github.com/sqkkyzx)** on **2025-01-26**</sup>      | 能根据用户需求生成 SUNO 歌曲创作参数<br/>`歌词创作` `音乐风格` `编曲` `参数设置`      |
-| [伟大的比古斯・迪克斯](https://lobechat.com/discover/assistant/all-knowing)<br/><sup>By **[CGitwater](https://github.com/CGitwater)** on **2025-01-24**</sup>         | 全能强大的知识之神<br/>`比古斯` `迪克斯`                                              |
-| [PPT 制作达人](https://lobechat.com/discover/assistant/ppt-production-expert)<br/><sup>By **[patricleehua](https://github.com/patricleehua)** on **2025-01-24**</sup> | 擅长高质量 PPT 的快速制作和优化<br/>`ppt制作` `设计` `咨询` `内容优化` `用户支持`     |
-| [OCR 文档转录助手](https://lobechat.com/discover/assistant/ocr-markdown)<br/><sup>By **[Liangpi000](https://github.com/Liangpi000)** on **2025-01-24**</sup>          | 擅长文件内容转录与 markdown 格式<br/>`文档生成` `markdown` `格式化` `转录` `任务指导` |
-> 📊 Total agents: [<kbd>**471**</kbd> ](https://lobechat.com/discover/assistants)
+| 最近新增                                                                                                                                                   | 描述                                                                                  |
+| ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
+| [锐评师](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup>               | 擅长犀利点评与深度剖析问题<br/>`评论` `社会观点` `尖锐分析`                           |
+| [Python 天才](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | 一名高级 Python 编程者<br/>`代码` `python`                                            |
+| [SAT 大师](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup>               | 数字 SAT 辅导专家，帮助学生取得 1300 + 分数<br/>`sat` `能力测试`                      |
+| [宇宙启示者](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup>        | 跨时空的智慧神谕，洞悉生命本质<br/>`角色设计` `ai角色` `元宇宙` `角色扮演` `智慧系统` |
+> 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
  <!-- AGENT LIST -->

package/changelog/v1.json CHANGED Viewed

@@ -1,4 +1,22 @@
 [
+  {
+    "children": {
+      "improvements": [
+        "Add/Update Aliyun Cloud Models, update GitHub Models."
+      ]
+    },
+    "date": "2025-02-04",
+    "version": "1.50.5"
+  },
+  {
+    "children": {
+      "fixes": [
+        "Fix invalid utf8 character."
+      ]
+    },
+    "date": "2025-02-04",
+    "version": "1.50.4"
+  },
   {
     "children": {
       "improvements": [

package/docs/changelog/2025-01-22-new-ai-provider.mdx CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: LobeChat Launches New AI Provider Management System
 description: >-
-  LobeChat has revamped its AI Provider Management System, now supporting custom AI providers and models.
+  LobeChat has revamped its AI Provider Management System, now supporting custom
+  AI providers and models.
 tags:
   - LobeChat
   - AI Provider

package/docs/changelog/2025-02-02-deepseek-r1.mdx ADDED Viewed

@@ -0,0 +1,33 @@
+---
+title: >-
+  LobeChat Integrates DeepSeek R1, Bringing a Revolutionary Chain of Thought Experience
+description: >-
+  LobeChat v1.49.12 fully supports the DeepSeek R1 model, providing users with an unprecedented interactive experience in the chain of thought.
+tags:
+  - LobeChat
+  - DeepSeek
+  - Chain of Thought
+---
+# Perfect Integration of DeepSeek R1 and it's Deep Thinking Experience 🎉
+After nearly 10 days of meticulous refinement, LobeChat has fully integrated the DeepSeek R1 model in version v1.49.12, offering users a revolutionary interactive experience in the chain of thought!
+## 🚀 Major Updates
+- 🤯 **Comprehensive Support for DeepSeek R1**: Now fully integrated in both the Community and Cloud versions ([lobechat.com](https://lobechat.com)).
+- 🧠 **Real-Time Chain of Thought Display**: Transparently presents the AI's reasoning process, making the resolution of complex issues clear and visible.
+- ⚡️ **Deep Thinking Experience**: Utilizing Chain of Thought technology, it provides more insightful AI conversations.
+- 💫 **Intuitive Problem Analysis**: Makes the analysis of complex issues clear and easy to understand.
+## 🌟 How to Use
+1. Upgrade to LobeChat v1.49.12 or visit [lobechat.com](https://lobechat.com).
+2. Select the DeepSeek R1 model in the settings.
+3. Experience a whole new level of intelligent conversation!
+## 📢 Feedback and Support
+If you encounter any issues while using the application or have suggestions for new features, feel free to engage with us through GitHub Discussions. Let's work together to create a better LobeChat!

package/docs/changelog/2025-02-02-deepseek-r1.zh-CN.mdx ADDED Viewed

@@ -0,0 +1,29 @@
+---
+title: LobeChat 重磅集成 DeepSeek R1，带来革命性思维链体验
+description: LobeChat v1.49.12 已完整支持 DeepSeek R1 模型，为用户带来前所未有的思维链交互体验
+tags:
+  - DeepSeek R1
+  - CoT
+  - 思维链
+---
+# 完美集成 DeepSeek R1 ，开启思维链新体验
+经过近 10 天的精心打磨，LobeChat 已在 v1.49.12 版本中完整集成了 DeepSeek R1 模型，为用户带来革命性的思维链交互体验！
+## 🚀 重大更新
+- 🤯 **DeepSeek R1 全面支持**: 现已在社区版与 Cloud 版（[lobechat.com](https://lobechat.com)）中完整接入
+- 🧠 **实时思维链展示**: 透明呈现 AI 的推理过程，让复杂问题的解决过程清晰可见
+- ⚡️ **深度思考体验**: 通过 Chain of Thought 技术，带来更具洞察力的 AI 对话
+- 💫 **直观的问题解析**: 让复杂问题的分析过程变得清晰易懂
+## 🌟 使用方式
+1. 升级到 LobeChat v1.49.12 或访问 [lobechat.com](https://lobechat.com)
+2. 在设置中选择 DeepSeek R1 模型
+3. 开启全新的智能对话体验！
+## 📢 反馈与支持
+如果您在使用过程中遇到任何问题，或对新功能有任何建议，欢迎通过 GitHub Discussions 与我们交流。让我们一起打造更好的 LobeChat！

package/docs/changelog/index.json CHANGED Viewed

@@ -2,6 +2,12 @@
   "$schema": "https://github.com/lobehub/lobe-chat/blob/main/docs/changelog/schema.json",
   "cloud": [],
   "community": [
+    {
+      "image": "https://github.com/user-attachments/assets/5fe4c373-ebd0-42a9-bdca-0ab7e0a2e747",
+      "id": "2025-02-02-deepseek-r1",
+      "date": "2025-02-02",
+      "versionRange": ["1.47.8", "1.49.12"]
+    },
     {
       "image": "https://github.com/user-attachments/assets/7350f211-61ce-488e-b0e2-f0fcac25caeb",
       "id": "2025-01-22-new-ai-provider",

package/docs/self-hosting/environment-variables/model-provider.mdx CHANGED Viewed

@@ -169,6 +169,13 @@ If you need to use Azure OpenAI to provide model services, you can refer to the
 ## DeepSeek AI
+### `DEEPSEEK_PROXY_URL`
+- Type: Optional
+- Description: If you manually configure the DeepSeek API proxy, you can use this configuration item to override the default DeepSeek API request base URL
+- Default: -
+- Example: `https://xxxx.models.ai.azure.com/v1`
 ### `DEEPSEEK_API_KEY`
 - Type: Required

package/docs/self-hosting/environment-variables/model-provider.zh-CN.mdx CHANGED Viewed

@@ -167,6 +167,13 @@ LobeChat 在部署时提供了丰富的模型服务商相关的环境变量，
 ## DeepSeek AI
+### `DEEPSEEK_PROXY_URL`
+- 类型：可选
+- 描述：如果您手动配置了 DeepSeek API 代理，可以使用此配置项覆盖默认的 DeepSeek API 请求基础 URL
+- 默认值: -
+- 示例: `https://xxxx.models.ai.azure.com/v1`
 ### `DEEPSEEK_API_KEY`
 - 类型：必选

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/chat",
-  "version": "1.50.3",
+  "version": "1.50.5",
   "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",

package/src/config/aiModels/github.ts CHANGED Viewed

@@ -58,7 +58,7 @@ const githubChatModels: AIChatModelCard[] = [
       functionCall: true,
       vision: true,
     },
-    contextWindowTokens: 128_000,
+    contextWindowTokens: 134_144,
     description: '一种经济高效的AI解决方案，适用于多种文本和图像任务。',
     displayName: 'OpenAI GPT-4o mini',
     enabled: true,
@@ -71,11 +71,18 @@ const githubChatModels: AIChatModelCard[] = [
       functionCall: true,
       vision: true,
     },
-    contextWindowTokens: 128_000,
+    contextWindowTokens: 134_144,
     description: 'OpenAI GPT-4系列中最先进的多模态模型，可以处理文本和图像输入。',
     displayName: 'OpenAI GPT-4o',
     enabled: true,
     id: 'gpt-4o',
+    maxOutput: 16_384,
+    type: 'chat',
+  },
+  {
+    contextWindowTokens: 128_000,
+    displayName: 'DeepSeek R1',
+    id: 'DeepSeek-R1',
     maxOutput: 4096,
     type: 'chat',
   },
@@ -145,6 +152,13 @@ const githubChatModels: AIChatModelCard[] = [
     maxOutput: 4096,
     type: 'chat',
   },
+  {
+    contextWindowTokens: 262_144,
+    displayName: 'Codestral',
+    id: 'Codestral-2501',
+    maxOutput: 4096,
+    type: 'chat',
+  },
   {
     abilities: {
       vision: true,
@@ -210,6 +224,20 @@ const githubChatModels: AIChatModelCard[] = [
     maxOutput: 4096,
     type: 'chat',
   },
+  {
+    contextWindowTokens: 16_384,
+    displayName: 'Phi 4',
+    id: 'Phi-4',
+    maxOutput: 16_384,
+    type: 'chat',
+  },
+  {
+    contextWindowTokens: 131_072,
+    displayName: 'Phi 3.5 MoE',
+    id: 'Phi-3.5-MoE-instruct',
+    maxOutput: 4096,
+    type: 'chat',
+  },
   {
     contextWindowTokens: 131_072,
     description: 'Phi-3-mini模型的更新版。',

package/src/config/aiModels/qwen.ts CHANGED Viewed

@@ -5,11 +5,12 @@ const qwenChatModels: AIChatModelCard[] = [
     abilities: {
       functionCall: true,
     },
-    contextWindowTokens: 131_072,
+    contextWindowTokens: 1_000_000,
     description: '通义千问超大规模语言模型，支持中文、英文等不同语言输入。',
     displayName: 'Qwen Turbo',
     enabled: true,
     id: 'qwen-turbo-latest',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
       input: 0.3,
@@ -26,6 +27,7 @@ const qwenChatModels: AIChatModelCard[] = [
     displayName: 'Qwen Plus',
     enabled: true,
     id: 'qwen-plus-latest',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
       input: 0.8,
@@ -43,6 +45,7 @@ const qwenChatModels: AIChatModelCard[] = [
     displayName: 'Qwen Max',
     enabled: true,
     id: 'qwen-max-latest',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
       input: 20,
@@ -56,6 +59,7 @@ const qwenChatModels: AIChatModelCard[] = [
       '通义千问超大规模语言模型，支持长文本上下文，以及基于长文档、多文档等多个场景的对话功能。',
     displayName: 'Qwen Long',
     id: 'qwen-long',
+    maxOutput: 6000,
     pricing: {
       currency: 'CNY',
       input: 0.5,
@@ -73,10 +77,11 @@ const qwenChatModels: AIChatModelCard[] = [
     displayName: 'Qwen VL Plus',
     enabled: true,
     id: 'qwen-vl-plus-latest',
+    maxOutput: 2048,
     pricing: {
       currency: 'CNY',
-      input: 8,
-      output: 8,
+      input: 1.5,
+      output: 4.5,
     },
     type: 'chat',
   },
@@ -84,16 +89,34 @@ const qwenChatModels: AIChatModelCard[] = [
     abilities: {
       vision: true,
     },
-    contextWindowTokens: 32_000,
+    contextWindowTokens: 32_768,
     description:
       '通义千问超大规模视觉语言模型。相比增强版，再次提升视觉推理能力和指令遵循能力，提供更高的视觉感知和认知水平。',
     displayName: 'Qwen VL Max',
     enabled: true,
     id: 'qwen-vl-max-latest',
+    maxOutput: 2048,
     pricing: {
       currency: 'CNY',
-      input: 20,
-      output: 20,
+      input: 3,
+      output: 9,
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      vision: true,
+    },
+    contextWindowTokens: 34_096,
+    description:
+      '通义千问OCR是文字提取专有模型，专注于文档、表格、试题、手写体文字等类型图像的文字提取能力。它能够识别多种文字，目前支持的语言有：汉语、英语、法语、日语、韩语、德语、俄语、意大利语、越南语、阿拉伯语。',
+    displayName: 'Qwen VL OCR',
+    id: 'qwen-vl-ocr-latest',
+    maxOutput: 4096,
+    pricing: {
+      currency: 'CNY',
+      input: 5,
+      output: 5,
     },
     type: 'chat',
   },
@@ -102,6 +125,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问数学模型是专门用于数学解题的语言模型。',
     displayName: 'Qwen Math Turbo',
     id: 'qwen-math-turbo-latest',
+    maxOutput: 3072,
     pricing: {
       currency: 'CNY',
       input: 2,
@@ -114,6 +138,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问数学模型是专门用于数学解题的语言模型。',
     displayName: 'Qwen Math Plus',
     id: 'qwen-math-plus-latest',
+    maxOutput: 3072,
     pricing: {
       currency: 'CNY',
       input: 4,
@@ -126,6 +151,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问代码模型。',
     displayName: 'Qwen Coder Turbo',
     id: 'qwen-coder-turbo-latest',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
       input: 2,
@@ -138,6 +164,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问代码模型。',
     displayName: 'Qwen Coder Plus',
     id: 'qwen-coder-plus-latest',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
       input: 3.5,
@@ -146,10 +173,14 @@ const qwenChatModels: AIChatModelCard[] = [
     type: 'chat',
   },
   {
+    abilities: {
+      functionCall: true,
+    },
     contextWindowTokens: 32_768,
     description: 'QwQ模型是由 Qwen 团队开发的实验性研究模型，专注于增强 AI 推理能力。',
     displayName: 'QwQ 32B Preview',
     id: 'qwq-32b-preview',
+    maxOutput: 16_384,
     pricing: {
       currency: 'CNY',
       input: 3.5,
@@ -166,6 +197,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: 'QVQ模型是由 Qwen 团队开发的实验性研究模型，专注于提升视觉推理能力，尤其在数学推理领域。',
     displayName: 'QVQ 72B Preview',
     id: 'qvq-72b-preview',
+    maxOutput: 16_384,
     pricing: {
       currency: 'CNY',
       input: 12,
@@ -182,10 +214,11 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问2.5对外开源的7B规模的模型。',
     displayName: 'Qwen2.5 7B',
     id: 'qwen2.5-7b-instruct',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
-      input: 1,
-      output: 2,
+      input: 0.5,
+      output: 1,
     },
     type: 'chat',
   },
@@ -197,10 +230,11 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问2.5对外开源的14B规模的模型。',
     displayName: 'Qwen2.5 14B',
     id: 'qwen2.5-14b-instruct',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
-      input: 2,
-      output: 6,
+      input: 1,
+      output: 3,
     },
     type: 'chat',
   },
@@ -212,6 +246,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问2.5对外开源的32B规模的模型。',
     displayName: 'Qwen2.5 32B',
     id: 'qwen2.5-32b-instruct',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
       input: 3.5,
@@ -227,6 +262,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问2.5对外开源的72B规模的模型。',
     displayName: 'Qwen2.5 72B',
     id: 'qwen2.5-72b-instruct',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
       input: 4,
@@ -234,11 +270,29 @@ const qwenChatModels: AIChatModelCard[] = [
     },
     type: 'chat',
   },
+  {
+    abilities: {
+      functionCall: true,
+    },
+    contextWindowTokens: 1_000_000,
+    description: '通义千问2.5对外开源的72B规模的模型。',
+    displayName: 'Qwen2.5 14B 1M',
+    id: 'qwen2.5-14b-instruct-1m',
+    maxOutput: 8192,
+    pricing: {
+      currency: 'CNY',
+      input: 1,
+      output: 3,
+    },
+    releasedAt: '2025-01-27',
+    type: 'chat',
+  },
   {
     contextWindowTokens: 4096,
     description: 'Qwen-Math 模型具有强大的数学解题能力。',
     displayName: 'Qwen2.5 Math 7B',
     id: 'qwen2.5-math-7b-instruct',
+    maxOutput: 3072,
     pricing: {
       currency: 'CNY',
       input: 1,
@@ -251,6 +305,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: 'Qwen-Math 模型具有强大的数学解题能力。',
     displayName: 'Qwen2.5 Math 72B',
     id: 'qwen2.5-math-72b-instruct',
+    maxOutput: 3072,
     pricing: {
       currency: 'CNY',
       input: 4,
@@ -263,6 +318,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问代码模型开源版。',
     displayName: 'Qwen2.5 Coder 7B',
     id: 'qwen2.5-coder-7b-instruct',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
       input: 1,
@@ -275,6 +331,7 @@ const qwenChatModels: AIChatModelCard[] = [
     description: '通义千问代码模型开源版。',
     displayName: 'Qwen2.5 Coder 32B',
     id: 'qwen2.5-coder-32b-instruct',
+    maxOutput: 8192,
     pricing: {
       currency: 'CNY',
       input: 3.5,
@@ -312,6 +369,78 @@ const qwenChatModels: AIChatModelCard[] = [
     },
     type: 'chat',
   },
+  {
+    abilities: {
+      vision: true,
+    },
+    contextWindowTokens: 131_072,
+    description:
+      '指令跟随、数学、解题、代码整体提升，万物识别能力提升，支持多样格式直接精准定位视觉元素，支持对长视频文件（最长10分钟）进行理解和秒级别的事件时刻定位，能理解时间先后和快慢，基于解析和定位能力支持操控OS或Mobile的Agent，关键信息抽取能力和Json格式输出能力强，此版本为72B版本，本系列能力最强的版本。',
+    displayName: 'Qwen2.5 VL 72B',
+    id: 'qwen2.5-vl-72b-instruct',
+    maxOutput: 2048,
+    pricing: {
+      currency: 'CNY',
+      input: 16,
+      output: 48,
+    },
+    releasedAt: '2025-01-27',
+    type: 'chat',
+  },
+  {
+    abilities: {
+      vision: true,
+    },
+    contextWindowTokens: 131_072,
+    description:
+      '指令跟随、数学、解题、代码整体提升，万物识别能力提升，支持多样格式直接精准定位视觉元素，支持对长视频文件（最长10分钟）进行理解和秒级别的事件时刻定位，能理解时间先后和快慢，基于解析和定位能力支持操控OS或Mobile的Agent，关键信息抽取能力和Json格式输出能力强，此版本为72B版本，本系列能力最强的版本。',
+    displayName: 'Qwen2.5 VL 7B',
+    id: 'qwen2.5-vl-7b-instruct',
+    maxOutput: 2048,
+    pricing: {
+      currency: 'CNY',
+      input: 2,
+      output: 5,
+    },
+    releasedAt: '2025-01-27',
+    type: 'chat',
+  },
+  {
+    abilities: {
+      reasoning: true,
+    },
+    contextWindowTokens: 65_536,
+    description:
+      'DeepSeek-R1 在后训练阶段大规模使用了强化学习技术，在仅有极少标注数据的情况下，极大提升了模型推理能力，尤其在数学、代码、自然语言推理等任务上。',
+    displayName: 'DeepSeek R1',
+    id: 'deepseek-r1',
+    maxOutput: 8192,
+    pricing: {
+      currency: 'CNY',
+      input: 0,
+      output: 0,
+    },
+    releasedAt: '2025-01-27',
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+    },
+    contextWindowTokens: 65_536,
+    description:
+      'DeepSeek-V3 为自研 MoE 模型，671B 参数，激活 37B，在 14.8T token 上进行了预训练，在长文本、代码、数学、百科、中文能力上表现优秀。',
+    displayName: 'DeepSeek V3',
+    id: 'deepseek-v3',
+    maxOutput: 8192,
+    pricing: {
+      currency: 'CNY',
+      input: 0,
+      output: 0,
+    },
+    releasedAt: '2025-01-27',
+    type: 'chat',
+  },
 ];
 export const allModels = [...qwenChatModels];

package/src/config/modelProviders/github.ts CHANGED Viewed

@@ -37,7 +37,7 @@ const Github: ModelProviderCard = {
       vision: true,
     },
     {
-      contextWindowTokens: 128_000,
+      contextWindowTokens: 134_144,
       description: '一种经济高效的AI解决方案，适用于多种文本和图像任务。',
       displayName: 'OpenAI GPT-4o mini',
       enabled: true,
@@ -47,15 +47,21 @@ const Github: ModelProviderCard = {
       vision: true,
     },
     {
-      contextWindowTokens: 128_000,
+      contextWindowTokens: 134_144,
       description: 'OpenAI GPT-4系列中最先进的多模态模型，可以处理文本和图像输入。',
       displayName: 'OpenAI GPT-4o',
       enabled: true,
       functionCall: true,
       id: 'gpt-4o',
-      maxOutput: 4096,
+      maxOutput: 16_384,
       vision: true,
     },
+    {
+      contextWindowTokens: 128_000,
+      displayName: 'DeepSeek R1',
+      id: 'DeepSeek-R1',
+      maxOutput: 4096,
+    },
     {
       contextWindowTokens: 262_144,
       description:
@@ -112,6 +118,12 @@ const Github: ModelProviderCard = {
       id: 'mistral-large',
       maxOutput: 4096,
     },
+    {
+      contextWindowTokens: 262_144,
+      displayName: 'Codestral',
+      id: 'Codestral-2501',
+      maxOutput: 4096,
+    },
     {
       contextWindowTokens: 131_072,
       description: '在高分辨率图像上表现出色的图像推理能力，适用于视觉理解应用。',
@@ -166,6 +178,18 @@ const Github: ModelProviderCard = {
       id: 'meta-llama-3-70b-instruct',
       maxOutput: 4096,
     },
+    {
+      contextWindowTokens: 16_384,
+      displayName: 'Phi 4',
+      id: 'Phi-4',
+      maxOutput: 16_384,
+    },
+    {
+      contextWindowTokens: 131_072,
+      displayName: 'Phi 3.5 MoE',
+      id: 'Phi-3.5-MoE-instruct',
+      maxOutput: 4096,
+    },
     {
       contextWindowTokens: 131_072,
       description: 'Phi-3-mini模型的更新版。',

package/src/config/modelProviders/qwen.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import { ModelProviderCard } from '@/types/llm';
 const Qwen: ModelProviderCard = {
   chatModels: [
     {
-      contextWindowTokens: 131_072,
+      contextWindowTokens: 1_000_000,
       description: '通义千问超大规模语言模型，支持中文、英文等不同语言输入。',
       displayName: 'Qwen Turbo',
       enabled: true,
@@ -64,13 +64,13 @@ const Qwen: ModelProviderCard = {
       id: 'qwen-vl-plus-latest',
       pricing: {
         currency: 'CNY',
-        input: 8,
-        output: 8,
+        input: 1.5,
+        output: 4.5,
       },
       vision: true,
     },
     {
-      contextWindowTokens: 32_000,
+      contextWindowTokens: 32_768,
       description:
         '通义千问超大规模视觉语言模型。相比增强版，再次提升视觉推理能力和指令遵循能力，提供更高的视觉感知和认知水平。',
       displayName: 'Qwen VL Max',
@@ -78,8 +78,21 @@ const Qwen: ModelProviderCard = {
       id: 'qwen-vl-max-latest',
       pricing: {
         currency: 'CNY',
-        input: 20,
-        output: 20,
+        input: 3,
+        output: 9,
+      },
+      vision: true,
+    },
+    {
+      contextWindowTokens: 34_096,
+      description:
+        '通义千问OCR是文字提取专有模型，专注于文档、表格、试题、手写体文字等类型图像的文字提取能力。它能够识别多种文字，目前支持的语言有：汉语、英语、法语、日语、韩语、德语、俄语、意大利语、越南语、阿拉伯语。',
+      displayName: 'Qwen VL OCR',
+      id: 'qwen-vl-ocr-latest',
+      pricing: {
+        currency: 'CNY',
+        input: 5,
+        output: 5,
       },
       vision: true,
     },
@@ -134,9 +147,22 @@ const Qwen: ModelProviderCard = {
       id: 'qwq-32b-preview',
       pricing: {
         currency: 'CNY',
-        input: 0,
-        output: 0,
+        input: 3.5,
+        output: 7,
+      },
+    },
+    {
+      contextWindowTokens: 32_768,
+      description: 'QVQ模型是由 Qwen 团队开发的实验性研究模型，专注于提升视觉推理能力，尤其在数学推理领域。',
+      displayName: 'QVQ 72B Preview',
+      id: 'qvq-72b-preview',
+      pricing: {
+        currency: 'CNY',
+        input: 12,
+        output: 36,
       },
+      releasedAt: '2024-12-25',
+      vision: true,
     },
     {
       contextWindowTokens: 131_072,
@@ -146,8 +172,8 @@ const Qwen: ModelProviderCard = {
       id: 'qwen2.5-7b-instruct',
       pricing: {
         currency: 'CNY',
-        input: 1,
-        output: 2,
+        input: 0.5,
+        output: 1,
       },
     },
     {
@@ -158,8 +184,8 @@ const Qwen: ModelProviderCard = {
       id: 'qwen2.5-14b-instruct',
       pricing: {
         currency: 'CNY',
-        input: 2,
-        output: 6,
+        input: 1,
+        output: 3,
       },
     },
     {
@@ -186,6 +212,18 @@ const Qwen: ModelProviderCard = {
         output: 12,
       },
     },
+    {
+      contextWindowTokens: 1_000_000,
+      description: '通义千问2.5对外开源的72B规模的模型。',
+      displayName: 'Qwen2.5 14B 1M',
+      functionCall: true,
+      id: 'qwen2.5-14b-instruct-1m',
+      pricing: {
+        currency: 'CNY',
+        input: 1,
+        output: 3,
+      },
+    },
     {
       contextWindowTokens: 4096,
       description: 'Qwen-Math 模型具有强大的数学解题能力。',
@@ -254,6 +292,46 @@ const Qwen: ModelProviderCard = {
       },
       vision: true,
     },
+    {
+      contextWindowTokens: 128_000,
+      description:
+        '指令跟随、数学、解题、代码整体提升，万物识别能力提升，支持多样格式直接精准定位视觉元素，支持对长视频文件（最长10分钟）进行理解和秒级别的事件时刻定位，能理解时间先后和快慢，基于解析和定位能力支持操控OS或Mobile的Agent，关键信息抽取能力和Json格式输出能力强，此版本为72B版本，本系列能力最强的版本。',
+      displayName: 'Qwen2.5 VL 72B',
+      id: 'qwen2.5-vl-72b-instruct',
+      pricing: {
+        currency: 'CNY',
+        input: 16,
+        output: 48,
+      },
+      releasedAt: '2025-01-26',
+      vision: true,
+    },
+    {
+      contextWindowTokens: 65_536,
+      description:
+        'DeepSeek-R1 在后训练阶段大规模使用了强化学习技术，在仅有极少标注数据的情况下，极大提升了模型推理能力，尤其在数学、代码、自然语言推理等任务上。',
+      displayName: 'DeepSeek R1',
+      id: 'deepseek-r1',
+      pricing: {
+        currency: 'CNY',
+        input: 0,
+        output: 0,
+      },
+      releasedAt: '2025-01-27',
+    },
+    {
+      contextWindowTokens: 65_536,
+      description:
+        'DeepSeek-V3 为自研 MoE 模型，671B 参数，激活 37B，在 14.8T token 上进行了预训练，在长文本、代码、数学、百科、中文能力上表现优秀。',
+      displayName: 'DeepSeek V3',
+      id: 'deepseek-v3',
+      pricing: {
+        currency: 'CNY',
+        input: 0,
+        output: 0,
+      },
+      releasedAt: '2025-01-27',
+    },
   ],
   checkModel: 'qwen-turbo-latest',
   description:

package/src/database/repositories/dataImporter/index.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import {
 import { LobeChatDatabase } from '@/database/type';
 import { ImportResult } from '@/services/config';
 import { ImporterEntryData } from '@/types/importer';
+import { sanitizeUTF8 } from '@/utils/sanitizeUTF8';
 export class DataImporterRepos {
   private userId: string;
@@ -204,9 +205,10 @@ export class DataImporterRepos {
         // 2. insert messages
         if (shouldInsertMessages.length > 0) {
           const inertValues = shouldInsertMessages.map(
-            ({ id, extra, createdAt, updatedAt, sessionId, topicId, ...res }) => ({
+            ({ id, extra, createdAt, updatedAt, sessionId, topicId, content, ...res }) => ({
               ...res,
               clientId: id,
+              content: sanitizeUTF8(content),
               createdAt: new Date(createdAt),
               model: extra?.fromModel,
               parentId: null,

package/src/libs/langchain/loaders/pdf/index.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
 export const PdfLoader = async (fileBlob: Blob) => {
-  const loader = new PDFLoader(fileBlob);
+  const loader = new PDFLoader(fileBlob, { splitPages: true });
   return await loader.load();
 };

package/src/server/routers/async/file.ts CHANGED Viewed

@@ -24,6 +24,7 @@ import {
   IAsyncTaskError,
 } from '@/types/asyncTask';
 import { safeParseJSON } from '@/utils/safeParseJSON';
+import { sanitizeUTF8 } from '@/utils/sanitizeUTF8';
 const fileProcedure = asyncAuthedProcedure.use(async (opts) => {
   const { ctx } = opts;
@@ -95,16 +96,13 @@ export const fileRouter = router({
                   ctx.jwtPayload,
                 );
-                const number = index + 1;
-                console.log(`执行第 ${number} 个任务`);
+                console.log(`run embedding task ${index + 1}`);
-                console.time(`任务[${number}]: embeddings`);
                 const embeddings = await agentRuntime.embeddings({
                   dimensions: 1024,
                   input: chunks.map((c) => c.text),
                   model,
                 });
-                console.timeEnd(`任务[${number}]: embeddings`);
                 const items: NewEmbeddingsItem[] =
                   embeddings?.map((e, idx) => ({
@@ -114,9 +112,7 @@ export const fileRouter = router({
                     model,
                   })) || [];
-                console.time(`任务[${number}]: insert db`);
                 await ctx.embeddingModel.bulkCreate(items);
-                console.timeEnd(`任务[${number}]: insert db`);
               },
               { concurrency: CONCURRENCY },
             );
@@ -215,7 +211,11 @@ export const fileRouter = router({
           // after finish partition, we need to filter out some elements
           const chunks = chunkResult.chunks.map(
-            (item): NewChunkItem => ({ ...item, userId: ctx.userId }),
+            ({ text, ...item }): NewChunkItem => ({
+              ...item,
+              text: text ? sanitizeUTF8(text) : '',
+              userId: ctx.userId,
+            }),
           );
           const duration = Date.now() - startAt;

package/src/utils/sanitizeUTF8.test.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import { sanitizeUTF8 } from './sanitizeUTF8';
+describe('UTF-8 Sanitization', () => {
+  it('should handle null bytes', () => {
+    const input = 'test\u0000string';
+    expect(sanitizeUTF8(input)).toBe('teststring');
+  });
+  it('should handle invalid UTF-8 sequences', () => {
+    const input = 'test\uD800string'; // 未配对的代理项
+    expect(sanitizeUTF8(input)).toBe('teststring');
+  });
+  it('should handle invalid UTF-8 content', () => {
+    const input = '\u0002\u0000\u0000\u0002�{\\"error\\":{\\"code\\":\\"resource_exhausted\\",';
+    expect(sanitizeUTF8(input)).toBe('{\\"error\\":{\\"code\\":\\"resource_exhausted\\",');
+  });
+  it('should preserve valid UTF-8 characters', () => {
+    const input = '你好，世界！';
+    expect(sanitizeUTF8(input)).toBe('你好，世界！');
+  });
+});

package/src/utils/sanitizeUTF8.ts ADDED Viewed

@@ -0,0 +1,14 @@
+/**
+ * Sanitize UTF-8 string to remove all control characters and invalid code points.
+ * @param str
+ */
+export const sanitizeUTF8 = (str: string) => {
+  // 移除替换字符 (0xFFFD) 和其他非法字符
+  return (
+    str
+      .replaceAll('�', '') // 移除 Unicode 替换字符
+      // eslint-disable-next-line no-control-regex
+      .replaceAll(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, '') // 移除控制字符
+      .replaceAll(/[\uD800-\uDFFF]/g, '')
+  ); // 移除未配对的代理项码点
+};