@lobehub/chat 1.50.3 → 1.50.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ### [Version 1.50.4](https://github.com/lobehub/lobe-chat/compare/v1.50.3...v1.50.4)
6
+
7
+ <sup>Released on **2025-02-04**</sup>
8
+
9
+ #### 🐛 Bug Fixes
10
+
11
+ - **misc**: Fix invalid utf8 character.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### What's fixed
19
+
20
+ - **misc**: Fix invalid utf8 character, closes [#5732](https://github.com/lobehub/lobe-chat/issues/5732) ([2905cb5](https://github.com/lobehub/lobe-chat/commit/2905cb5))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
5
30
  ### [Version 1.50.3](https://github.com/lobehub/lobe-chat/compare/v1.50.2...v1.50.3)
6
31
 
7
32
  <sup>Released on **2025-02-04**</sup>
package/README.ja-JP.md CHANGED
@@ -302,14 +302,14 @@ LobeChat エージェントマーケットプレイスでは、クリエイタ
302
302
 
303
303
  <!-- AGENT LIST -->
304
304
 
305
- | 最近追加 | 説明 |
306
- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
307
- | [SUNO 曲作支援ツール](https://lobechat.com/discover/assistant/suno-lyrics-assistant)<br/><sup>By **[sqkkyzx](https://github.com/sqkkyzx)** on **2025-01-26**</sup> | ユーザーのニーズに基づいて SUNO の曲作成パラメータを生成します<br/>`歌詞作成` `音楽スタイル` `編曲` `パラメータ設定` |
308
- | [偉大なるビッグス・ディッカス](https://lobechat.com/discover/assistant/all-knowing)<br/><sup>By **[CGitwater](https://github.com/CGitwater)** on **2025-01-24**</sup> | 全能の知識の神<br/>`ビッグス` `ディッカス` |
309
- | [PPT 制作達人](https://lobechat.com/discover/assistant/ppt-production-expert)<br/><sup>By **[patricleehua](https://github.com/patricleehua)** on **2025-01-24**</sup> | 高品質な PPT の迅速な制作と最適化に優れています<br/>`ppt制作` `デザイン` `コンサルティング` `コンテンツ最適化` `ユーザーサポート` |
310
- | [OCR ドキュメント転写アシスタント](https://lobechat.com/discover/assistant/ocr-markdown)<br/><sup>By **[Liangpi000](https://github.com/Liangpi000)** on **2025-01-24**</sup> | 文書内容の転写と markdown フォーマットに優れています<br/>`文書生成` `markdown` `フォーマット` `転写` `タスクガイド` |
311
-
312
- > 📊 Total agents: [<kbd>**471**</kbd> ](https://lobechat.com/discover/assistants)
305
+ | 最近追加 | 説明 |
306
+ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------- |
307
+ | [鋭い評論家](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup> | 鋭い評論と深い問題分析が得意<br/>`評論` `社会的見解` `鋭い分析` |
308
+ | [Python の天才](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | 高度な Python コーダー<br/>`コード` `python` |
309
+ | [SAT マスター](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup> | 1300 点以上のスコアを目指すデジタル SAT コーチングの専門家<br/>`sat` `適性試験` |
310
+ | [宇宙の啓示者](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup> | 時空を超えた知恵の神託、生命の本質を洞察する<br/>`キャラクターデザイン` `aiキャラクター` `メタバース` `ロールプレイング` `知恵システム` |
311
+
312
+ > 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
313
313
 
314
314
  <!-- AGENT LIST -->
315
315
 
package/README.md CHANGED
@@ -319,14 +319,14 @@ Our marketplace is not just a showcase platform but also a collaborative space.
319
319
 
320
320
  <!-- AGENT LIST -->
321
321
 
322
- | Recent Submits | Description |
323
- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
324
- | [SUNO Song Creation Assistant](https://lobechat.com/discover/assistant/suno-lyrics-assistant)<br/><sup>By **[sqkkyzx](https://github.com/sqkkyzx)** on **2025-01-26**</sup> | Can generate SUNO song creation parameters based on user needs<br/>`lyric-creation` `music-style` `arrangement` `parameter-settings` |
325
- | [The Great Biggus Dickus](https://lobechat.com/discover/assistant/all-knowing)<br/><sup>By **[CGitwater](https://github.com/CGitwater)** on **2025-01-24**</sup> | The almighty powerful god of klnowledge<br/>`biggus` `diccus` |
326
- | [PPT Production Expert](https://lobechat.com/discover/assistant/ppt-production-expert)<br/><sup>By **[patricleehua](https://github.com/patricleehua)** on **2025-01-24**</sup> | Skilled in the rapid production and optimization of high-quality PPTs<br/>`ppt-production` `design` `consulting` `content-optimization` `user-support` |
327
- | [OCR Document Transcription Assistant](https://lobechat.com/discover/assistant/ocr-markdown)<br/><sup>By **[Liangpi000](https://github.com/Liangpi000)** on **2025-01-24**</sup> | Specializes in document content transcription and markdown formatting<br/>`document-generation` `markdown` `formatting` `transcription` `task-guidance` |
328
-
329
- > 📊 Total agents: [<kbd>**471**</kbd> ](https://lobechat.com/discover/assistants)
322
+ | Recent Submits | Description |
323
+ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
324
+ | [Sharp Commentator](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup> | Specializes in sharp commentary and in-depth analysis of issues<br/>`commentary` `social-perspectives` `sharp-analysis` |
325
+ | [Python Genius](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | An advanced python coder<br/>`code` `python` |
326
+ | [SAT master](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup> | Expert in Digital SAT coaching for 1300+ scores<br/>`sat` `aptitude-test` |
327
+ | [Cosmic Oracle](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup> | Wisdom from across time and space, insight into the essence of life<br/>`character-design` `ai-characters` `metaverse` `role-playing` `wisdom-system` |
328
+
329
+ > 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
330
330
 
331
331
  <!-- AGENT LIST -->
332
332
 
package/README.zh-CN.md CHANGED
@@ -308,14 +308,14 @@ LobeChat 的插件生态系统是其核心功能的重要扩展,它极大地
308
308
 
309
309
  <!-- AGENT LIST -->
310
310
 
311
- | 最近新增 | 描述 |
312
- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
313
- | [SUNO 歌曲创作助手](https://lobechat.com/discover/assistant/suno-lyrics-assistant)<br/><sup>By **[sqkkyzx](https://github.com/sqkkyzx)** on **2025-01-26**</sup> | 能根据用户需求生成 SUNO 歌曲创作参数<br/>`歌词创作` `音乐风格` `编曲` `参数设置` |
314
- | [伟大的比古斯・迪克斯](https://lobechat.com/discover/assistant/all-knowing)<br/><sup>By **[CGitwater](https://github.com/CGitwater)** on **2025-01-24**</sup> | 全能强大的知识之神<br/>`比古斯` `迪克斯` |
315
- | [PPT 制作达人](https://lobechat.com/discover/assistant/ppt-production-expert)<br/><sup>By **[patricleehua](https://github.com/patricleehua)** on **2025-01-24**</sup> | 擅长高质量 PPT 的快速制作和优化<br/>`ppt制作` `设计` `咨询` `内容优化` `用户支持` |
316
- | [OCR 文档转录助手](https://lobechat.com/discover/assistant/ocr-markdown)<br/><sup>By **[Liangpi000](https://github.com/Liangpi000)** on **2025-01-24**</sup> | 擅长文件内容转录与 markdown 格式<br/>`文档生成` `markdown` `格式化` `转录` `任务指导` |
317
-
318
- > 📊 Total agents: [<kbd>**471**</kbd> ](https://lobechat.com/discover/assistants)
311
+ | 最近新增 | 描述 |
312
+ | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
313
+ | [锐评师](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup> | 擅长犀利点评与深度剖析问题<br/>`评论` `社会观点` `尖锐分析` |
314
+ | [Python 天才](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | 一名高级 Python 编程者<br/>`代码` `python` |
315
+ | [SAT 大师](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup> | 数字 SAT 辅导专家,帮助学生取得 1300 + 分数<br/>`sat` `能力测试` |
316
+ | [宇宙启示者](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup> | 跨时空的智慧神谕,洞悉生命本质<br/>`角色设计` `ai角色` `元宇宙` `角色扮演` `智慧系统` |
317
+
318
+ > 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
319
319
 
320
320
  <!-- AGENT LIST -->
321
321
 
package/changelog/v1.json CHANGED
@@ -1,4 +1,13 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "fixes": [
5
+ "Fix invalid utf8 character."
6
+ ]
7
+ },
8
+ "date": "2025-02-04",
9
+ "version": "1.50.4"
10
+ },
2
11
  {
3
12
  "children": {
4
13
  "improvements": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.50.3",
3
+ "version": "1.50.4",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -14,6 +14,7 @@ import {
14
14
  import { LobeChatDatabase } from '@/database/type';
15
15
  import { ImportResult } from '@/services/config';
16
16
  import { ImporterEntryData } from '@/types/importer';
17
+ import { sanitizeUTF8 } from '@/utils/sanitizeUTF8';
17
18
 
18
19
  export class DataImporterRepos {
19
20
  private userId: string;
@@ -204,9 +205,10 @@ export class DataImporterRepos {
204
205
  // 2. insert messages
205
206
  if (shouldInsertMessages.length > 0) {
206
207
  const inertValues = shouldInsertMessages.map(
207
- ({ id, extra, createdAt, updatedAt, sessionId, topicId, ...res }) => ({
208
+ ({ id, extra, createdAt, updatedAt, sessionId, topicId, content, ...res }) => ({
208
209
  ...res,
209
210
  clientId: id,
211
+ content: sanitizeUTF8(content),
210
212
  createdAt: new Date(createdAt),
211
213
  model: extra?.fromModel,
212
214
  parentId: null,
@@ -1,7 +1,7 @@
1
1
  import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
2
2
 
3
3
  export const PdfLoader = async (fileBlob: Blob) => {
4
- const loader = new PDFLoader(fileBlob);
4
+ const loader = new PDFLoader(fileBlob, { splitPages: true });
5
5
 
6
6
  return await loader.load();
7
7
  };
@@ -24,6 +24,7 @@ import {
24
24
  IAsyncTaskError,
25
25
  } from '@/types/asyncTask';
26
26
  import { safeParseJSON } from '@/utils/safeParseJSON';
27
+ import { sanitizeUTF8 } from '@/utils/sanitizeUTF8';
27
28
 
28
29
  const fileProcedure = asyncAuthedProcedure.use(async (opts) => {
29
30
  const { ctx } = opts;
@@ -95,16 +96,13 @@ export const fileRouter = router({
95
96
  ctx.jwtPayload,
96
97
  );
97
98
 
98
- const number = index + 1;
99
- console.log(`执行第 ${number} 个任务`);
99
+ console.log(`run embedding task ${index + 1}`);
100
100
 
101
- console.time(`任务[${number}]: embeddings`);
102
101
  const embeddings = await agentRuntime.embeddings({
103
102
  dimensions: 1024,
104
103
  input: chunks.map((c) => c.text),
105
104
  model,
106
105
  });
107
- console.timeEnd(`任务[${number}]: embeddings`);
108
106
 
109
107
  const items: NewEmbeddingsItem[] =
110
108
  embeddings?.map((e, idx) => ({
@@ -114,9 +112,7 @@ export const fileRouter = router({
114
112
  model,
115
113
  })) || [];
116
114
 
117
- console.time(`任务[${number}]: insert db`);
118
115
  await ctx.embeddingModel.bulkCreate(items);
119
- console.timeEnd(`任务[${number}]: insert db`);
120
116
  },
121
117
  { concurrency: CONCURRENCY },
122
118
  );
@@ -215,7 +211,11 @@ export const fileRouter = router({
215
211
 
216
212
  // after finish partition, we need to filter out some elements
217
213
  const chunks = chunkResult.chunks.map(
218
- (item): NewChunkItem => ({ ...item, userId: ctx.userId }),
214
+ ({ text, ...item }): NewChunkItem => ({
215
+ ...item,
216
+ text: text ? sanitizeUTF8(text) : '',
217
+ userId: ctx.userId,
218
+ }),
219
219
  );
220
220
 
221
221
  const duration = Date.now() - startAt;
@@ -0,0 +1,23 @@
1
+ import { sanitizeUTF8 } from './sanitizeUTF8';
2
+
3
+ describe('UTF-8 Sanitization', () => {
4
+ it('should handle null bytes', () => {
5
+ const input = 'test\u0000string';
6
+ expect(sanitizeUTF8(input)).toBe('teststring');
7
+ });
8
+
9
+ it('should handle invalid UTF-8 sequences', () => {
10
+ const input = 'test\uD800string'; // 未配对的代理项
11
+ expect(sanitizeUTF8(input)).toBe('teststring');
12
+ });
13
+
14
+ it('should handle invalid UTF-8 content', () => {
15
+ const input = '\u0002\u0000\u0000\u0002�{\\"error\\":{\\"code\\":\\"resource_exhausted\\",';
16
+ expect(sanitizeUTF8(input)).toBe('{\\"error\\":{\\"code\\":\\"resource_exhausted\\",');
17
+ });
18
+
19
+ it('should preserve valid UTF-8 characters', () => {
20
+ const input = '你好,世界!';
21
+ expect(sanitizeUTF8(input)).toBe('你好,世界!');
22
+ });
23
+ });
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Sanitize UTF-8 string to remove all control characters and invalid code points.
3
+ * @param str
4
+ */
5
+ export const sanitizeUTF8 = (str: string) => {
6
+ // 移除替换字符 (0xFFFD) 和其他非法字符
7
+ return (
8
+ str
9
+ .replaceAll('�', '') // 移除 Unicode 替换字符
10
+ // eslint-disable-next-line no-control-regex
11
+ .replaceAll(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, '') // 移除控制字符
12
+ .replaceAll(/[\uD800-\uDFFF]/g, '')
13
+ ); // 移除未配对的代理项码点
14
+ };