@lobehub/chat 1.50.3 → 1.50.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -106,6 +106,7 @@ OPENAI_API_KEY=sk-xxxxxxxxx
106
106
 
107
107
  ### DeepSeek AI ####
108
108
 
109
+ # DEEPSEEK_PROXY_URL=https://api.deepseek.com/v1
109
110
  # DEEPSEEK_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
110
111
 
111
112
  ### Qwen AI ####
package/CHANGELOG.md CHANGED
@@ -2,6 +2,57 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ### [Version 1.50.5](https://github.com/lobehub/lobe-chat/compare/v1.50.4...v1.50.5)
6
+
7
+ <sup>Released on **2025-02-04**</sup>
8
+
9
+ #### 💄 Styles
10
+
11
+ - **misc**: Add/Update Aliyun Cloud Models, update GitHub Models.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### Styles
19
+
20
+ - **misc**: Add/Update Aliyun Cloud Models, closes [#5613](https://github.com/lobehub/lobe-chat/issues/5613) ([95cd822](https://github.com/lobehub/lobe-chat/commit/95cd822))
21
+ - **misc**: Update GitHub Models, closes [#5683](https://github.com/lobehub/lobe-chat/issues/5683) ([ed4e048](https://github.com/lobehub/lobe-chat/commit/ed4e048))
22
+
23
+ </details>
24
+
25
+ <div align="right">
26
+
27
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
28
+
29
+ </div>
30
+
31
+ ### [Version 1.50.4](https://github.com/lobehub/lobe-chat/compare/v1.50.3...v1.50.4)
32
+
33
+ <sup>Released on **2025-02-04**</sup>
34
+
35
+ #### 🐛 Bug Fixes
36
+
37
+ - **misc**: Fix invalid utf8 character.
38
+
39
+ <br/>
40
+
41
+ <details>
42
+ <summary><kbd>Improvements and Fixes</kbd></summary>
43
+
44
+ #### What's fixed
45
+
46
+ - **misc**: Fix invalid utf8 character, closes [#5732](https://github.com/lobehub/lobe-chat/issues/5732) ([2905cb5](https://github.com/lobehub/lobe-chat/commit/2905cb5))
47
+
48
+ </details>
49
+
50
+ <div align="right">
51
+
52
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
53
+
54
+ </div>
55
+
5
56
  ### [Version 1.50.3](https://github.com/lobehub/lobe-chat/compare/v1.50.2...v1.50.3)
6
57
 
7
58
  <sup>Released on **2025-02-04**</sup>
package/README.ja-JP.md CHANGED
@@ -302,14 +302,14 @@ LobeChat エージェントマーケットプレイスでは、クリエイタ
302
302
 
303
303
  <!-- AGENT LIST -->
304
304
 
305
- | 最近追加 | 説明 |
306
- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
307
- | [SUNO 曲作支援ツール](https://lobechat.com/discover/assistant/suno-lyrics-assistant)<br/><sup>By **[sqkkyzx](https://github.com/sqkkyzx)** on **2025-01-26**</sup> | ユーザーのニーズに基づいて SUNO の曲作成パラメータを生成します<br/>`歌詞作成` `音楽スタイル` `編曲` `パラメータ設定` |
308
- | [偉大なるビッグス・ディッカス](https://lobechat.com/discover/assistant/all-knowing)<br/><sup>By **[CGitwater](https://github.com/CGitwater)** on **2025-01-24**</sup> | 全能の知識の神<br/>`ビッグス` `ディッカス` |
309
- | [PPT 制作達人](https://lobechat.com/discover/assistant/ppt-production-expert)<br/><sup>By **[patricleehua](https://github.com/patricleehua)** on **2025-01-24**</sup> | 高品質な PPT の迅速な制作と最適化に優れています<br/>`ppt制作` `デザイン` `コンサルティング` `コンテンツ最適化` `ユーザーサポート` |
310
- | [OCR ドキュメント転写アシスタント](https://lobechat.com/discover/assistant/ocr-markdown)<br/><sup>By **[Liangpi000](https://github.com/Liangpi000)** on **2025-01-24**</sup> | 文書内容の転写と markdown フォーマットに優れています<br/>`文書生成` `markdown` `フォーマット` `転写` `タスクガイド` |
311
-
312
- > 📊 Total agents: [<kbd>**471**</kbd> ](https://lobechat.com/discover/assistants)
305
+ | 最近追加 | 説明 |
306
+ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------- |
307
+ | [鋭い評論家](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup> | 鋭い評論と深い問題分析が得意<br/>`評論` `社会的見解` `鋭い分析` |
308
+ | [Python の天才](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | 高度な Python コーダー<br/>`コード` `python` |
309
+ | [SAT マスター](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup> | 1300 点以上のスコアを目指すデジタル SAT コーチングの専門家<br/>`sat` `適性試験` |
310
+ | [宇宙の啓示者](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup> | 時空を超えた知恵の神託、生命の本質を洞察する<br/>`キャラクターデザイン` `aiキャラクター` `メタバース` `ロールプレイング` `知恵システム` |
311
+
312
+ > 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
313
313
 
314
314
  <!-- AGENT LIST -->
315
315
 
package/README.md CHANGED
@@ -319,14 +319,14 @@ Our marketplace is not just a showcase platform but also a collaborative space.
319
319
 
320
320
  <!-- AGENT LIST -->
321
321
 
322
- | Recent Submits | Description |
323
- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
324
- | [SUNO Song Creation Assistant](https://lobechat.com/discover/assistant/suno-lyrics-assistant)<br/><sup>By **[sqkkyzx](https://github.com/sqkkyzx)** on **2025-01-26**</sup> | Can generate SUNO song creation parameters based on user needs<br/>`lyric-creation` `music-style` `arrangement` `parameter-settings` |
325
- | [The Great Biggus Dickus](https://lobechat.com/discover/assistant/all-knowing)<br/><sup>By **[CGitwater](https://github.com/CGitwater)** on **2025-01-24**</sup> | The almighty powerful god of klnowledge<br/>`biggus` `diccus` |
326
- | [PPT Production Expert](https://lobechat.com/discover/assistant/ppt-production-expert)<br/><sup>By **[patricleehua](https://github.com/patricleehua)** on **2025-01-24**</sup> | Skilled in the rapid production and optimization of high-quality PPTs<br/>`ppt-production` `design` `consulting` `content-optimization` `user-support` |
327
- | [OCR Document Transcription Assistant](https://lobechat.com/discover/assistant/ocr-markdown)<br/><sup>By **[Liangpi000](https://github.com/Liangpi000)** on **2025-01-24**</sup> | Specializes in document content transcription and markdown formatting<br/>`document-generation` `markdown` `formatting` `transcription` `task-guidance` |
328
-
329
- > 📊 Total agents: [<kbd>**471**</kbd> ](https://lobechat.com/discover/assistants)
322
+ | Recent Submits | Description |
323
+ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
324
+ | [Sharp Commentator](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup> | Specializes in sharp commentary and in-depth analysis of issues<br/>`commentary` `social-perspectives` `sharp-analysis` |
325
+ | [Python Genius](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | An advanced python coder<br/>`code` `python` |
326
+ | [SAT master](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup> | Expert in Digital SAT coaching for 1300+ scores<br/>`sat` `aptitude-test` |
327
+ | [Cosmic Oracle](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup> | Wisdom from across time and space, insight into the essence of life<br/>`character-design` `ai-characters` `metaverse` `role-playing` `wisdom-system` |
328
+
329
+ > 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
330
330
 
331
331
  <!-- AGENT LIST -->
332
332
 
package/README.zh-CN.md CHANGED
@@ -308,14 +308,14 @@ LobeChat 的插件生态系统是其核心功能的重要扩展,它极大地
308
308
 
309
309
  <!-- AGENT LIST -->
310
310
 
311
- | 最近新增 | 描述 |
312
- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
313
- | [SUNO 歌曲创作助手](https://lobechat.com/discover/assistant/suno-lyrics-assistant)<br/><sup>By **[sqkkyzx](https://github.com/sqkkyzx)** on **2025-01-26**</sup> | 能根据用户需求生成 SUNO 歌曲创作参数<br/>`歌词创作` `音乐风格` `编曲` `参数设置` |
314
- | [伟大的比古斯・迪克斯](https://lobechat.com/discover/assistant/all-knowing)<br/><sup>By **[CGitwater](https://github.com/CGitwater)** on **2025-01-24**</sup> | 全能强大的知识之神<br/>`比古斯` `迪克斯` |
315
- | [PPT 制作达人](https://lobechat.com/discover/assistant/ppt-production-expert)<br/><sup>By **[patricleehua](https://github.com/patricleehua)** on **2025-01-24**</sup> | 擅长高质量 PPT 的快速制作和优化<br/>`ppt制作` `设计` `咨询` `内容优化` `用户支持` |
316
- | [OCR 文档转录助手](https://lobechat.com/discover/assistant/ocr-markdown)<br/><sup>By **[Liangpi000](https://github.com/Liangpi000)** on **2025-01-24**</sup> | 擅长文件内容转录与 markdown 格式<br/>`文档生成` `markdown` `格式化` `转录` `任务指导` |
317
-
318
- > 📊 Total agents: [<kbd>**471**</kbd> ](https://lobechat.com/discover/assistants)
311
+ | 最近新增 | 描述 |
312
+ | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
313
+ | [锐评师](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup> | 擅长犀利点评与深度剖析问题<br/>`评论` `社会观点` `尖锐分析` |
314
+ | [Python 天才](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | 一名高级 Python 编程者<br/>`代码` `python` |
315
+ | [SAT 大师](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup> | 数字 SAT 辅导专家,帮助学生取得 1300 + 分数<br/>`sat` `能力测试` |
316
+ | [宇宙启示者](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup> | 跨时空的智慧神谕,洞悉生命本质<br/>`角色设计` `ai角色` `元宇宙` `角色扮演` `智慧系统` |
317
+
318
+ > 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
319
319
 
320
320
  <!-- AGENT LIST -->
321
321
 
package/changelog/v1.json CHANGED
@@ -1,4 +1,22 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "improvements": [
5
+ "Add/Update Aliyun Cloud Models, update GitHub Models."
6
+ ]
7
+ },
8
+ "date": "2025-02-04",
9
+ "version": "1.50.5"
10
+ },
11
+ {
12
+ "children": {
13
+ "fixes": [
14
+ "Fix invalid utf8 character."
15
+ ]
16
+ },
17
+ "date": "2025-02-04",
18
+ "version": "1.50.4"
19
+ },
2
20
  {
3
21
  "children": {
4
22
  "improvements": [
@@ -1,8 +1,8 @@
1
1
  ---
2
2
  title: LobeChat Launches New AI Provider Management System
3
3
  description: >-
4
- LobeChat has revamped its AI Provider Management System, now supporting custom AI providers and models.
5
-
4
+ LobeChat has revamped its AI Provider Management System, now supporting custom
5
+ AI providers and models.
6
6
  tags:
7
7
  - LobeChat
8
8
  - AI Provider
@@ -0,0 +1,33 @@
1
+ ---
2
+ title: >-
3
+ LobeChat Integrates DeepSeek R1, Bringing a Revolutionary Chain of Thought Experience
4
+
5
+ description: >-
6
+ LobeChat v1.49.12 fully supports the DeepSeek R1 model, providing users with an unprecedented interactive experience in the chain of thought.
7
+
8
+ tags:
9
+ - LobeChat
10
+ - DeepSeek
11
+ - Chain of Thought
12
+ ---
13
+
14
+ # Perfect Integration of DeepSeek R1 and it's Deep Thinking Experience 🎉
15
+
16
+ After nearly 10 days of meticulous refinement, LobeChat has fully integrated the DeepSeek R1 model in version v1.49.12, offering users a revolutionary interactive experience in the chain of thought!
17
+
18
+ ## 🚀 Major Updates
19
+
20
+ - 🤯 **Comprehensive Support for DeepSeek R1**: Now fully integrated in both the Community and Cloud versions ([lobechat.com](https://lobechat.com)).
21
+ - 🧠 **Real-Time Chain of Thought Display**: Transparently presents the AI's reasoning process, making the resolution of complex issues clear and visible.
22
+ - ⚡️ **Deep Thinking Experience**: Utilizing Chain of Thought technology, it provides more insightful AI conversations.
23
+ - 💫 **Intuitive Problem Analysis**: Makes the analysis of complex issues clear and easy to understand.
24
+
25
+ ## 🌟 How to Use
26
+
27
+ 1. Upgrade to LobeChat v1.49.12 or visit [lobechat.com](https://lobechat.com).
28
+ 2. Select the DeepSeek R1 model in the settings.
29
+ 3. Experience a whole new level of intelligent conversation!
30
+
31
+ ## 📢 Feedback and Support
32
+
33
+ If you encounter any issues while using the application or have suggestions for new features, feel free to engage with us through GitHub Discussions. Let's work together to create a better LobeChat!
@@ -0,0 +1,29 @@
1
+ ---
2
+ title: LobeChat 重磅集成 DeepSeek R1,带来革命性思维链体验
3
+ description: LobeChat v1.49.12 已完整支持 DeepSeek R1 模型,为用户带来前所未有的思维链交互体验
4
+ tags:
5
+ - DeepSeek R1
6
+ - CoT
7
+ - 思维链
8
+ ---
9
+
10
+ # 完美集成 DeepSeek R1 ,开启思维链新体验
11
+
12
+ 经过近 10 天的精心打磨,LobeChat 已在 v1.49.12 版本中完整集成了 DeepSeek R1 模型,为用户带来革命性的思维链交互体验!
13
+
14
+ ## 🚀 重大更新
15
+
16
+ - 🤯 **DeepSeek R1 全面支持**: 现已在社区版与 Cloud 版([lobechat.com](https://lobechat.com))中完整接入
17
+ - 🧠 **实时思维链展示**: 透明呈现 AI 的推理过程,让复杂问题的解决过程清晰可见
18
+ - ⚡️ **深度思考体验**: 通过 Chain of Thought 技术,带来更具洞察力的 AI 对话
19
+ - 💫 **直观的问题解析**: 让复杂问题的分析过程变得清晰易懂
20
+
21
+ ## 🌟 使用方式
22
+
23
+ 1. 升级到 LobeChat v1.49.12 或访问 [lobechat.com](https://lobechat.com)
24
+ 2. 在设置中选择 DeepSeek R1 模型
25
+ 3. 开启全新的智能对话体验!
26
+
27
+ ## 📢 反馈与支持
28
+
29
+ 如果您在使用过程中遇到任何问题,或对新功能有任何建议,欢迎通过 GitHub Discussions 与我们交流。让我们一起打造更好的 LobeChat!
@@ -2,6 +2,12 @@
2
2
  "$schema": "https://github.com/lobehub/lobe-chat/blob/main/docs/changelog/schema.json",
3
3
  "cloud": [],
4
4
  "community": [
5
+ {
6
+ "image": "https://github.com/user-attachments/assets/5fe4c373-ebd0-42a9-bdca-0ab7e0a2e747",
7
+ "id": "2025-02-02-deepseek-r1",
8
+ "date": "2025-02-02",
9
+ "versionRange": ["1.47.8", "1.49.12"]
10
+ },
5
11
  {
6
12
  "image": "https://github.com/user-attachments/assets/7350f211-61ce-488e-b0e2-f0fcac25caeb",
7
13
  "id": "2025-01-22-new-ai-provider",
@@ -169,6 +169,13 @@ If you need to use Azure OpenAI to provide model services, you can refer to the
169
169
 
170
170
  ## DeepSeek AI
171
171
 
172
+ ### `DEEPSEEK_PROXY_URL`
173
+
174
+ - Type: Optional
175
+ - Description: If you manually configure the DeepSeek API proxy, you can use this configuration item to override the default DeepSeek API request base URL
176
+ - Default: -
177
+ - Example: `https://xxxx.models.ai.azure.com/v1`
178
+
172
179
  ### `DEEPSEEK_API_KEY`
173
180
 
174
181
  - Type: Required
@@ -167,6 +167,13 @@ LobeChat 在部署时提供了丰富的模型服务商相关的环境变量,
167
167
 
168
168
  ## DeepSeek AI
169
169
 
170
+ ### `DEEPSEEK_PROXY_URL`
171
+
172
+ - 类型:可选
173
+ - 描述:如果您手动配置了 DeepSeek API 代理,可以使用此配置项覆盖默认的 DeepSeek API 请求基础 URL
174
+ - 默认值: -
175
+ - 示例: `https://xxxx.models.ai.azure.com/v1`
176
+
170
177
  ### `DEEPSEEK_API_KEY`
171
178
 
172
179
  - 类型:必选
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.50.3",
3
+ "version": "1.50.5",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -58,7 +58,7 @@ const githubChatModels: AIChatModelCard[] = [
58
58
  functionCall: true,
59
59
  vision: true,
60
60
  },
61
- contextWindowTokens: 128_000,
61
+ contextWindowTokens: 134_144,
62
62
  description: '一种经济高效的AI解决方案,适用于多种文本和图像任务。',
63
63
  displayName: 'OpenAI GPT-4o mini',
64
64
  enabled: true,
@@ -71,11 +71,18 @@ const githubChatModels: AIChatModelCard[] = [
71
71
  functionCall: true,
72
72
  vision: true,
73
73
  },
74
- contextWindowTokens: 128_000,
74
+ contextWindowTokens: 134_144,
75
75
  description: 'OpenAI GPT-4系列中最先进的多模态模型,可以处理文本和图像输入。',
76
76
  displayName: 'OpenAI GPT-4o',
77
77
  enabled: true,
78
78
  id: 'gpt-4o',
79
+ maxOutput: 16_384,
80
+ type: 'chat',
81
+ },
82
+ {
83
+ contextWindowTokens: 128_000,
84
+ displayName: 'DeepSeek R1',
85
+ id: 'DeepSeek-R1',
79
86
  maxOutput: 4096,
80
87
  type: 'chat',
81
88
  },
@@ -145,6 +152,13 @@ const githubChatModels: AIChatModelCard[] = [
145
152
  maxOutput: 4096,
146
153
  type: 'chat',
147
154
  },
155
+ {
156
+ contextWindowTokens: 262_144,
157
+ displayName: 'Codestral',
158
+ id: 'Codestral-2501',
159
+ maxOutput: 4096,
160
+ type: 'chat',
161
+ },
148
162
  {
149
163
  abilities: {
150
164
  vision: true,
@@ -210,6 +224,20 @@ const githubChatModels: AIChatModelCard[] = [
210
224
  maxOutput: 4096,
211
225
  type: 'chat',
212
226
  },
227
+ {
228
+ contextWindowTokens: 16_384,
229
+ displayName: 'Phi 4',
230
+ id: 'Phi-4',
231
+ maxOutput: 16_384,
232
+ type: 'chat',
233
+ },
234
+ {
235
+ contextWindowTokens: 131_072,
236
+ displayName: 'Phi 3.5 MoE',
237
+ id: 'Phi-3.5-MoE-instruct',
238
+ maxOutput: 4096,
239
+ type: 'chat',
240
+ },
213
241
  {
214
242
  contextWindowTokens: 131_072,
215
243
  description: 'Phi-3-mini模型的更新版。',
@@ -5,11 +5,12 @@ const qwenChatModels: AIChatModelCard[] = [
5
5
  abilities: {
6
6
  functionCall: true,
7
7
  },
8
- contextWindowTokens: 131_072,
8
+ contextWindowTokens: 1_000_000,
9
9
  description: '通义千问超大规模语言模型,支持中文、英文等不同语言输入。',
10
10
  displayName: 'Qwen Turbo',
11
11
  enabled: true,
12
12
  id: 'qwen-turbo-latest',
13
+ maxOutput: 8192,
13
14
  pricing: {
14
15
  currency: 'CNY',
15
16
  input: 0.3,
@@ -26,6 +27,7 @@ const qwenChatModels: AIChatModelCard[] = [
26
27
  displayName: 'Qwen Plus',
27
28
  enabled: true,
28
29
  id: 'qwen-plus-latest',
30
+ maxOutput: 8192,
29
31
  pricing: {
30
32
  currency: 'CNY',
31
33
  input: 0.8,
@@ -43,6 +45,7 @@ const qwenChatModels: AIChatModelCard[] = [
43
45
  displayName: 'Qwen Max',
44
46
  enabled: true,
45
47
  id: 'qwen-max-latest',
48
+ maxOutput: 8192,
46
49
  pricing: {
47
50
  currency: 'CNY',
48
51
  input: 20,
@@ -56,6 +59,7 @@ const qwenChatModels: AIChatModelCard[] = [
56
59
  '通义千问超大规模语言模型,支持长文本上下文,以及基于长文档、多文档等多个场景的对话功能。',
57
60
  displayName: 'Qwen Long',
58
61
  id: 'qwen-long',
62
+ maxOutput: 6000,
59
63
  pricing: {
60
64
  currency: 'CNY',
61
65
  input: 0.5,
@@ -73,10 +77,11 @@ const qwenChatModels: AIChatModelCard[] = [
73
77
  displayName: 'Qwen VL Plus',
74
78
  enabled: true,
75
79
  id: 'qwen-vl-plus-latest',
80
+ maxOutput: 2048,
76
81
  pricing: {
77
82
  currency: 'CNY',
78
- input: 8,
79
- output: 8,
83
+ input: 1.5,
84
+ output: 4.5,
80
85
  },
81
86
  type: 'chat',
82
87
  },
@@ -84,16 +89,34 @@ const qwenChatModels: AIChatModelCard[] = [
84
89
  abilities: {
85
90
  vision: true,
86
91
  },
87
- contextWindowTokens: 32_000,
92
+ contextWindowTokens: 32_768,
88
93
  description:
89
94
  '通义千问超大规模视觉语言模型。相比增强版,再次提升视觉推理能力和指令遵循能力,提供更高的视觉感知和认知水平。',
90
95
  displayName: 'Qwen VL Max',
91
96
  enabled: true,
92
97
  id: 'qwen-vl-max-latest',
98
+ maxOutput: 2048,
93
99
  pricing: {
94
100
  currency: 'CNY',
95
- input: 20,
96
- output: 20,
101
+ input: 3,
102
+ output: 9,
103
+ },
104
+ type: 'chat',
105
+ },
106
+ {
107
+ abilities: {
108
+ vision: true,
109
+ },
110
+ contextWindowTokens: 34_096,
111
+ description:
112
+ '通义千问OCR是文字提取专有模型,专注于文档、表格、试题、手写体文字等类型图像的文字提取能力。它能够识别多种文字,目前支持的语言有:汉语、英语、法语、日语、韩语、德语、俄语、意大利语、越南语、阿拉伯语。',
113
+ displayName: 'Qwen VL OCR',
114
+ id: 'qwen-vl-ocr-latest',
115
+ maxOutput: 4096,
116
+ pricing: {
117
+ currency: 'CNY',
118
+ input: 5,
119
+ output: 5,
97
120
  },
98
121
  type: 'chat',
99
122
  },
@@ -102,6 +125,7 @@ const qwenChatModels: AIChatModelCard[] = [
102
125
  description: '通义千问数学模型是专门用于数学解题的语言模型。',
103
126
  displayName: 'Qwen Math Turbo',
104
127
  id: 'qwen-math-turbo-latest',
128
+ maxOutput: 3072,
105
129
  pricing: {
106
130
  currency: 'CNY',
107
131
  input: 2,
@@ -114,6 +138,7 @@ const qwenChatModels: AIChatModelCard[] = [
114
138
  description: '通义千问数学模型是专门用于数学解题的语言模型。',
115
139
  displayName: 'Qwen Math Plus',
116
140
  id: 'qwen-math-plus-latest',
141
+ maxOutput: 3072,
117
142
  pricing: {
118
143
  currency: 'CNY',
119
144
  input: 4,
@@ -126,6 +151,7 @@ const qwenChatModels: AIChatModelCard[] = [
126
151
  description: '通义千问代码模型。',
127
152
  displayName: 'Qwen Coder Turbo',
128
153
  id: 'qwen-coder-turbo-latest',
154
+ maxOutput: 8192,
129
155
  pricing: {
130
156
  currency: 'CNY',
131
157
  input: 2,
@@ -138,6 +164,7 @@ const qwenChatModels: AIChatModelCard[] = [
138
164
  description: '通义千问代码模型。',
139
165
  displayName: 'Qwen Coder Plus',
140
166
  id: 'qwen-coder-plus-latest',
167
+ maxOutput: 8192,
141
168
  pricing: {
142
169
  currency: 'CNY',
143
170
  input: 3.5,
@@ -146,10 +173,14 @@ const qwenChatModels: AIChatModelCard[] = [
146
173
  type: 'chat',
147
174
  },
148
175
  {
176
+ abilities: {
177
+ functionCall: true,
178
+ },
149
179
  contextWindowTokens: 32_768,
150
180
  description: 'QwQ模型是由 Qwen 团队开发的实验性研究模型,专注于增强 AI 推理能力。',
151
181
  displayName: 'QwQ 32B Preview',
152
182
  id: 'qwq-32b-preview',
183
+ maxOutput: 16_384,
153
184
  pricing: {
154
185
  currency: 'CNY',
155
186
  input: 3.5,
@@ -166,6 +197,7 @@ const qwenChatModels: AIChatModelCard[] = [
166
197
  description: 'QVQ模型是由 Qwen 团队开发的实验性研究模型,专注于提升视觉推理能力,尤其在数学推理领域。',
167
198
  displayName: 'QVQ 72B Preview',
168
199
  id: 'qvq-72b-preview',
200
+ maxOutput: 16_384,
169
201
  pricing: {
170
202
  currency: 'CNY',
171
203
  input: 12,
@@ -182,10 +214,11 @@ const qwenChatModels: AIChatModelCard[] = [
182
214
  description: '通义千问2.5对外开源的7B规模的模型。',
183
215
  displayName: 'Qwen2.5 7B',
184
216
  id: 'qwen2.5-7b-instruct',
217
+ maxOutput: 8192,
185
218
  pricing: {
186
219
  currency: 'CNY',
187
- input: 1,
188
- output: 2,
220
+ input: 0.5,
221
+ output: 1,
189
222
  },
190
223
  type: 'chat',
191
224
  },
@@ -197,10 +230,11 @@ const qwenChatModels: AIChatModelCard[] = [
197
230
  description: '通义千问2.5对外开源的14B规模的模型。',
198
231
  displayName: 'Qwen2.5 14B',
199
232
  id: 'qwen2.5-14b-instruct',
233
+ maxOutput: 8192,
200
234
  pricing: {
201
235
  currency: 'CNY',
202
- input: 2,
203
- output: 6,
236
+ input: 1,
237
+ output: 3,
204
238
  },
205
239
  type: 'chat',
206
240
  },
@@ -212,6 +246,7 @@ const qwenChatModels: AIChatModelCard[] = [
212
246
  description: '通义千问2.5对外开源的32B规模的模型。',
213
247
  displayName: 'Qwen2.5 32B',
214
248
  id: 'qwen2.5-32b-instruct',
249
+ maxOutput: 8192,
215
250
  pricing: {
216
251
  currency: 'CNY',
217
252
  input: 3.5,
@@ -227,6 +262,7 @@ const qwenChatModels: AIChatModelCard[] = [
227
262
  description: '通义千问2.5对外开源的72B规模的模型。',
228
263
  displayName: 'Qwen2.5 72B',
229
264
  id: 'qwen2.5-72b-instruct',
265
+ maxOutput: 8192,
230
266
  pricing: {
231
267
  currency: 'CNY',
232
268
  input: 4,
@@ -234,11 +270,29 @@ const qwenChatModels: AIChatModelCard[] = [
234
270
  },
235
271
  type: 'chat',
236
272
  },
273
+ {
274
+ abilities: {
275
+ functionCall: true,
276
+ },
277
+ contextWindowTokens: 1_000_000,
278
+ description: '通义千问2.5对外开源的72B规模的模型。',
279
+ displayName: 'Qwen2.5 14B 1M',
280
+ id: 'qwen2.5-14b-instruct-1m',
281
+ maxOutput: 8192,
282
+ pricing: {
283
+ currency: 'CNY',
284
+ input: 1,
285
+ output: 3,
286
+ },
287
+ releasedAt: '2025-01-27',
288
+ type: 'chat',
289
+ },
237
290
  {
238
291
  contextWindowTokens: 4096,
239
292
  description: 'Qwen-Math 模型具有强大的数学解题能力。',
240
293
  displayName: 'Qwen2.5 Math 7B',
241
294
  id: 'qwen2.5-math-7b-instruct',
295
+ maxOutput: 3072,
242
296
  pricing: {
243
297
  currency: 'CNY',
244
298
  input: 1,
@@ -251,6 +305,7 @@ const qwenChatModels: AIChatModelCard[] = [
251
305
  description: 'Qwen-Math 模型具有强大的数学解题能力。',
252
306
  displayName: 'Qwen2.5 Math 72B',
253
307
  id: 'qwen2.5-math-72b-instruct',
308
+ maxOutput: 3072,
254
309
  pricing: {
255
310
  currency: 'CNY',
256
311
  input: 4,
@@ -263,6 +318,7 @@ const qwenChatModels: AIChatModelCard[] = [
263
318
  description: '通义千问代码模型开源版。',
264
319
  displayName: 'Qwen2.5 Coder 7B',
265
320
  id: 'qwen2.5-coder-7b-instruct',
321
+ maxOutput: 8192,
266
322
  pricing: {
267
323
  currency: 'CNY',
268
324
  input: 1,
@@ -275,6 +331,7 @@ const qwenChatModels: AIChatModelCard[] = [
275
331
  description: '通义千问代码模型开源版。',
276
332
  displayName: 'Qwen2.5 Coder 32B',
277
333
  id: 'qwen2.5-coder-32b-instruct',
334
+ maxOutput: 8192,
278
335
  pricing: {
279
336
  currency: 'CNY',
280
337
  input: 3.5,
@@ -312,6 +369,78 @@ const qwenChatModels: AIChatModelCard[] = [
312
369
  },
313
370
  type: 'chat',
314
371
  },
372
+ {
373
+ abilities: {
374
+ vision: true,
375
+ },
376
+ contextWindowTokens: 131_072,
377
+ description:
378
+ '指令跟随、数学、解题、代码整体提升,万物识别能力提升,支持多样格式直接精准定位视觉元素,支持对长视频文件(最长10分钟)进行理解和秒级别的事件时刻定位,能理解时间先后和快慢,基于解析和定位能力支持操控OS或Mobile的Agent,关键信息抽取能力和Json格式输出能力强,此版本为72B版本,本系列能力最强的版本。',
379
+ displayName: 'Qwen2.5 VL 72B',
380
+ id: 'qwen2.5-vl-72b-instruct',
381
+ maxOutput: 2048,
382
+ pricing: {
383
+ currency: 'CNY',
384
+ input: 16,
385
+ output: 48,
386
+ },
387
+ releasedAt: '2025-01-27',
388
+ type: 'chat',
389
+ },
390
+ {
391
+ abilities: {
392
+ vision: true,
393
+ },
394
+ contextWindowTokens: 131_072,
395
+ description:
396
+ '指令跟随、数学、解题、代码整体提升,万物识别能力提升,支持多样格式直接精准定位视觉元素,支持对长视频文件(最长10分钟)进行理解和秒级别的事件时刻定位,能理解时间先后和快慢,基于解析和定位能力支持操控OS或Mobile的Agent,关键信息抽取能力和Json格式输出能力强,此版本为72B版本,本系列能力最强的版本。',
397
+ displayName: 'Qwen2.5 VL 7B',
398
+ id: 'qwen2.5-vl-7b-instruct',
399
+ maxOutput: 2048,
400
+ pricing: {
401
+ currency: 'CNY',
402
+ input: 2,
403
+ output: 5,
404
+ },
405
+ releasedAt: '2025-01-27',
406
+ type: 'chat',
407
+ },
408
+ {
409
+ abilities: {
410
+ reasoning: true,
411
+ },
412
+ contextWindowTokens: 65_536,
413
+ description:
414
+ 'DeepSeek-R1 在后训练阶段大规模使用了强化学习技术,在仅有极少标注数据的情况下,极大提升了模型推理能力,尤其在数学、代码、自然语言推理等任务上。',
415
+ displayName: 'DeepSeek R1',
416
+ id: 'deepseek-r1',
417
+ maxOutput: 8192,
418
+ pricing: {
419
+ currency: 'CNY',
420
+ input: 0,
421
+ output: 0,
422
+ },
423
+ releasedAt: '2025-01-27',
424
+ type: 'chat',
425
+ },
426
+ {
427
+ abilities: {
428
+ functionCall: true,
429
+ },
430
+ contextWindowTokens: 65_536,
431
+ description:
432
+ 'DeepSeek-V3 为自研 MoE 模型,671B 参数,激活 37B,在 14.8T token 上进行了预训练,在长文本、代码、数学、百科、中文能力上表现优秀。',
433
+ displayName: 'DeepSeek V3',
434
+ id: 'deepseek-v3',
435
+ maxOutput: 8192,
436
+ pricing: {
437
+ currency: 'CNY',
438
+ input: 0,
439
+ output: 0,
440
+ },
441
+ releasedAt: '2025-01-27',
442
+ type: 'chat',
443
+ },
315
444
  ];
316
445
 
317
446
  export const allModels = [...qwenChatModels];
@@ -37,7 +37,7 @@ const Github: ModelProviderCard = {
37
37
  vision: true,
38
38
  },
39
39
  {
40
- contextWindowTokens: 128_000,
40
+ contextWindowTokens: 134_144,
41
41
  description: '一种经济高效的AI解决方案,适用于多种文本和图像任务。',
42
42
  displayName: 'OpenAI GPT-4o mini',
43
43
  enabled: true,
@@ -47,15 +47,21 @@ const Github: ModelProviderCard = {
47
47
  vision: true,
48
48
  },
49
49
  {
50
- contextWindowTokens: 128_000,
50
+ contextWindowTokens: 134_144,
51
51
  description: 'OpenAI GPT-4系列中最先进的多模态模型,可以处理文本和图像输入。',
52
52
  displayName: 'OpenAI GPT-4o',
53
53
  enabled: true,
54
54
  functionCall: true,
55
55
  id: 'gpt-4o',
56
- maxOutput: 4096,
56
+ maxOutput: 16_384,
57
57
  vision: true,
58
58
  },
59
+ {
60
+ contextWindowTokens: 128_000,
61
+ displayName: 'DeepSeek R1',
62
+ id: 'DeepSeek-R1',
63
+ maxOutput: 4096,
64
+ },
59
65
  {
60
66
  contextWindowTokens: 262_144,
61
67
  description:
@@ -112,6 +118,12 @@ const Github: ModelProviderCard = {
112
118
  id: 'mistral-large',
113
119
  maxOutput: 4096,
114
120
  },
121
+ {
122
+ contextWindowTokens: 262_144,
123
+ displayName: 'Codestral',
124
+ id: 'Codestral-2501',
125
+ maxOutput: 4096,
126
+ },
115
127
  {
116
128
  contextWindowTokens: 131_072,
117
129
  description: '在高分辨率图像上表现出色的图像推理能力,适用于视觉理解应用。',
@@ -166,6 +178,18 @@ const Github: ModelProviderCard = {
166
178
  id: 'meta-llama-3-70b-instruct',
167
179
  maxOutput: 4096,
168
180
  },
181
+ {
182
+ contextWindowTokens: 16_384,
183
+ displayName: 'Phi 4',
184
+ id: 'Phi-4',
185
+ maxOutput: 16_384,
186
+ },
187
+ {
188
+ contextWindowTokens: 131_072,
189
+ displayName: 'Phi 3.5 MoE',
190
+ id: 'Phi-3.5-MoE-instruct',
191
+ maxOutput: 4096,
192
+ },
169
193
  {
170
194
  contextWindowTokens: 131_072,
171
195
  description: 'Phi-3-mini模型的更新版。',
@@ -4,7 +4,7 @@ import { ModelProviderCard } from '@/types/llm';
4
4
  const Qwen: ModelProviderCard = {
5
5
  chatModels: [
6
6
  {
7
- contextWindowTokens: 131_072,
7
+ contextWindowTokens: 1_000_000,
8
8
  description: '通义千问超大规模语言模型,支持中文、英文等不同语言输入。',
9
9
  displayName: 'Qwen Turbo',
10
10
  enabled: true,
@@ -64,13 +64,13 @@ const Qwen: ModelProviderCard = {
64
64
  id: 'qwen-vl-plus-latest',
65
65
  pricing: {
66
66
  currency: 'CNY',
67
- input: 8,
68
- output: 8,
67
+ input: 1.5,
68
+ output: 4.5,
69
69
  },
70
70
  vision: true,
71
71
  },
72
72
  {
73
- contextWindowTokens: 32_000,
73
+ contextWindowTokens: 32_768,
74
74
  description:
75
75
  '通义千问超大规模视觉语言模型。相比增强版,再次提升视觉推理能力和指令遵循能力,提供更高的视觉感知和认知水平。',
76
76
  displayName: 'Qwen VL Max',
@@ -78,8 +78,21 @@ const Qwen: ModelProviderCard = {
78
78
  id: 'qwen-vl-max-latest',
79
79
  pricing: {
80
80
  currency: 'CNY',
81
- input: 20,
82
- output: 20,
81
+ input: 3,
82
+ output: 9,
83
+ },
84
+ vision: true,
85
+ },
86
+ {
87
+ contextWindowTokens: 34_096,
88
+ description:
89
+ '通义千问OCR是文字提取专有模型,专注于文档、表格、试题、手写体文字等类型图像的文字提取能力。它能够识别多种文字,目前支持的语言有:汉语、英语、法语、日语、韩语、德语、俄语、意大利语、越南语、阿拉伯语。',
90
+ displayName: 'Qwen VL OCR',
91
+ id: 'qwen-vl-ocr-latest',
92
+ pricing: {
93
+ currency: 'CNY',
94
+ input: 5,
95
+ output: 5,
83
96
  },
84
97
  vision: true,
85
98
  },
@@ -134,9 +147,22 @@ const Qwen: ModelProviderCard = {
134
147
  id: 'qwq-32b-preview',
135
148
  pricing: {
136
149
  currency: 'CNY',
137
- input: 0,
138
- output: 0,
150
+ input: 3.5,
151
+ output: 7,
152
+ },
153
+ },
154
+ {
155
+ contextWindowTokens: 32_768,
156
+ description: 'QVQ模型是由 Qwen 团队开发的实验性研究模型,专注于提升视觉推理能力,尤其在数学推理领域。',
157
+ displayName: 'QVQ 72B Preview',
158
+ id: 'qvq-72b-preview',
159
+ pricing: {
160
+ currency: 'CNY',
161
+ input: 12,
162
+ output: 36,
139
163
  },
164
+ releasedAt: '2024-12-25',
165
+ vision: true,
140
166
  },
141
167
  {
142
168
  contextWindowTokens: 131_072,
@@ -146,8 +172,8 @@ const Qwen: ModelProviderCard = {
146
172
  id: 'qwen2.5-7b-instruct',
147
173
  pricing: {
148
174
  currency: 'CNY',
149
- input: 1,
150
- output: 2,
175
+ input: 0.5,
176
+ output: 1,
151
177
  },
152
178
  },
153
179
  {
@@ -158,8 +184,8 @@ const Qwen: ModelProviderCard = {
158
184
  id: 'qwen2.5-14b-instruct',
159
185
  pricing: {
160
186
  currency: 'CNY',
161
- input: 2,
162
- output: 6,
187
+ input: 1,
188
+ output: 3,
163
189
  },
164
190
  },
165
191
  {
@@ -186,6 +212,18 @@ const Qwen: ModelProviderCard = {
186
212
  output: 12,
187
213
  },
188
214
  },
215
+ {
216
+ contextWindowTokens: 1_000_000,
217
+ description: '通义千问2.5对外开源的72B规模的模型。',
218
+ displayName: 'Qwen2.5 14B 1M',
219
+ functionCall: true,
220
+ id: 'qwen2.5-14b-instruct-1m',
221
+ pricing: {
222
+ currency: 'CNY',
223
+ input: 1,
224
+ output: 3,
225
+ },
226
+ },
189
227
  {
190
228
  contextWindowTokens: 4096,
191
229
  description: 'Qwen-Math 模型具有强大的数学解题能力。',
@@ -254,6 +292,46 @@ const Qwen: ModelProviderCard = {
254
292
  },
255
293
  vision: true,
256
294
  },
295
+ {
296
+ contextWindowTokens: 128_000,
297
+ description:
298
+ '指令跟随、数学、解题、代码整体提升,万物识别能力提升,支持多样格式直接精准定位视觉元素,支持对长视频文件(最长10分钟)进行理解和秒级别的事件时刻定位,能理解时间先后和快慢,基于解析和定位能力支持操控OS或Mobile的Agent,关键信息抽取能力和Json格式输出能力强,此版本为72B版本,本系列能力最强的版本。',
299
+ displayName: 'Qwen2.5 VL 72B',
300
+ id: 'qwen2.5-vl-72b-instruct',
301
+ pricing: {
302
+ currency: 'CNY',
303
+ input: 16,
304
+ output: 48,
305
+ },
306
+ releasedAt: '2025-01-26',
307
+ vision: true,
308
+ },
309
+ {
310
+ contextWindowTokens: 65_536,
311
+ description:
312
+ 'DeepSeek-R1 在后训练阶段大规模使用了强化学习技术,在仅有极少标注数据的情况下,极大提升了模型推理能力,尤其在数学、代码、自然语言推理等任务上。',
313
+ displayName: 'DeepSeek R1',
314
+ id: 'deepseek-r1',
315
+ pricing: {
316
+ currency: 'CNY',
317
+ input: 0,
318
+ output: 0,
319
+ },
320
+ releasedAt: '2025-01-27',
321
+ },
322
+ {
323
+ contextWindowTokens: 65_536,
324
+ description:
325
+ 'DeepSeek-V3 为自研 MoE 模型,671B 参数,激活 37B,在 14.8T token 上进行了预训练,在长文本、代码、数学、百科、中文能力上表现优秀。',
326
+ displayName: 'DeepSeek V3',
327
+ id: 'deepseek-v3',
328
+ pricing: {
329
+ currency: 'CNY',
330
+ input: 0,
331
+ output: 0,
332
+ },
333
+ releasedAt: '2025-01-27',
334
+ },
257
335
  ],
258
336
  checkModel: 'qwen-turbo-latest',
259
337
  description:
@@ -14,6 +14,7 @@ import {
14
14
  import { LobeChatDatabase } from '@/database/type';
15
15
  import { ImportResult } from '@/services/config';
16
16
  import { ImporterEntryData } from '@/types/importer';
17
+ import { sanitizeUTF8 } from '@/utils/sanitizeUTF8';
17
18
 
18
19
  export class DataImporterRepos {
19
20
  private userId: string;
@@ -204,9 +205,10 @@ export class DataImporterRepos {
204
205
  // 2. insert messages
205
206
  if (shouldInsertMessages.length > 0) {
206
207
  const inertValues = shouldInsertMessages.map(
207
- ({ id, extra, createdAt, updatedAt, sessionId, topicId, ...res }) => ({
208
+ ({ id, extra, createdAt, updatedAt, sessionId, topicId, content, ...res }) => ({
208
209
  ...res,
209
210
  clientId: id,
211
+ content: sanitizeUTF8(content),
210
212
  createdAt: new Date(createdAt),
211
213
  model: extra?.fromModel,
212
214
  parentId: null,
@@ -1,7 +1,7 @@
1
1
  import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
2
2
 
3
3
  export const PdfLoader = async (fileBlob: Blob) => {
4
- const loader = new PDFLoader(fileBlob);
4
+ const loader = new PDFLoader(fileBlob, { splitPages: true });
5
5
 
6
6
  return await loader.load();
7
7
  };
@@ -24,6 +24,7 @@ import {
24
24
  IAsyncTaskError,
25
25
  } from '@/types/asyncTask';
26
26
  import { safeParseJSON } from '@/utils/safeParseJSON';
27
+ import { sanitizeUTF8 } from '@/utils/sanitizeUTF8';
27
28
 
28
29
  const fileProcedure = asyncAuthedProcedure.use(async (opts) => {
29
30
  const { ctx } = opts;
@@ -95,16 +96,13 @@ export const fileRouter = router({
95
96
  ctx.jwtPayload,
96
97
  );
97
98
 
98
- const number = index + 1;
99
- console.log(`执行第 ${number} 个任务`);
99
+ console.log(`run embedding task ${index + 1}`);
100
100
 
101
- console.time(`任务[${number}]: embeddings`);
102
101
  const embeddings = await agentRuntime.embeddings({
103
102
  dimensions: 1024,
104
103
  input: chunks.map((c) => c.text),
105
104
  model,
106
105
  });
107
- console.timeEnd(`任务[${number}]: embeddings`);
108
106
 
109
107
  const items: NewEmbeddingsItem[] =
110
108
  embeddings?.map((e, idx) => ({
@@ -114,9 +112,7 @@ export const fileRouter = router({
114
112
  model,
115
113
  })) || [];
116
114
 
117
- console.time(`任务[${number}]: insert db`);
118
115
  await ctx.embeddingModel.bulkCreate(items);
119
- console.timeEnd(`任务[${number}]: insert db`);
120
116
  },
121
117
  { concurrency: CONCURRENCY },
122
118
  );
@@ -215,7 +211,11 @@ export const fileRouter = router({
215
211
 
216
212
  // after finish partition, we need to filter out some elements
217
213
  const chunks = chunkResult.chunks.map(
218
- (item): NewChunkItem => ({ ...item, userId: ctx.userId }),
214
+ ({ text, ...item }): NewChunkItem => ({
215
+ ...item,
216
+ text: text ? sanitizeUTF8(text) : '',
217
+ userId: ctx.userId,
218
+ }),
219
219
  );
220
220
 
221
221
  const duration = Date.now() - startAt;
@@ -0,0 +1,23 @@
1
+ import { sanitizeUTF8 } from './sanitizeUTF8';
2
+
3
+ describe('UTF-8 Sanitization', () => {
4
+ it('should handle null bytes', () => {
5
+ const input = 'test\u0000string';
6
+ expect(sanitizeUTF8(input)).toBe('teststring');
7
+ });
8
+
9
+ it('should handle invalid UTF-8 sequences', () => {
10
+ const input = 'test\uD800string'; // 未配对的代理项
11
+ expect(sanitizeUTF8(input)).toBe('teststring');
12
+ });
13
+
14
+ it('should handle invalid UTF-8 content', () => {
15
+ const input = '\u0002\u0000\u0000\u0002�{\\"error\\":{\\"code\\":\\"resource_exhausted\\",';
16
+ expect(sanitizeUTF8(input)).toBe('{\\"error\\":{\\"code\\":\\"resource_exhausted\\",');
17
+ });
18
+
19
+ it('should preserve valid UTF-8 characters', () => {
20
+ const input = '你好,世界!';
21
+ expect(sanitizeUTF8(input)).toBe('你好,世界!');
22
+ });
23
+ });
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Sanitize UTF-8 string to remove all control characters and invalid code points.
3
+ * @param str
4
+ */
5
+ export const sanitizeUTF8 = (str: string) => {
6
+ // 移除替换字符 (0xFFFD) 和其他非法字符
7
+ return (
8
+ str
9
+ .replaceAll('�', '') // 移除 Unicode 替换字符
10
+ // eslint-disable-next-line no-control-regex
11
+ .replaceAll(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, '') // 移除控制字符
12
+ .replaceAll(/[\uD800-\uDFFF]/g, '')
13
+ ); // 移除未配对的代理项码点
14
+ };