@lobehub/chat 1.50.3 → 1.50.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/README.ja-JP.md +8 -8
- package/README.md +8 -8
- package/README.zh-CN.md +8 -8
- package/changelog/v1.json +9 -0
- package/package.json +1 -1
- package/src/database/repositories/dataImporter/index.ts +3 -1
- package/src/libs/langchain/loaders/pdf/index.ts +1 -1
- package/src/server/routers/async/file.ts +7 -7
- package/src/utils/sanitizeUTF8.test.ts +23 -0
- package/src/utils/sanitizeUTF8.ts +14 -0
package/CHANGELOG.md
CHANGED
@@ -2,6 +2,31 @@
|
|
2
2
|
|
3
3
|
# Changelog
|
4
4
|
|
5
|
+
### [Version 1.50.4](https://github.com/lobehub/lobe-chat/compare/v1.50.3...v1.50.4)
|
6
|
+
|
7
|
+
<sup>Released on **2025-02-04**</sup>
|
8
|
+
|
9
|
+
#### 🐛 Bug Fixes
|
10
|
+
|
11
|
+
- **misc**: Fix invalid utf8 character.
|
12
|
+
|
13
|
+
<br/>
|
14
|
+
|
15
|
+
<details>
|
16
|
+
<summary><kbd>Improvements and Fixes</kbd></summary>
|
17
|
+
|
18
|
+
#### What's fixed
|
19
|
+
|
20
|
+
- **misc**: Fix invalid utf8 character, closes [#5732](https://github.com/lobehub/lobe-chat/issues/5732) ([2905cb5](https://github.com/lobehub/lobe-chat/commit/2905cb5))
|
21
|
+
|
22
|
+
</details>
|
23
|
+
|
24
|
+
<div align="right">
|
25
|
+
|
26
|
+
[](#readme-top)
|
27
|
+
|
28
|
+
</div>
|
29
|
+
|
5
30
|
### [Version 1.50.3](https://github.com/lobehub/lobe-chat/compare/v1.50.2...v1.50.3)
|
6
31
|
|
7
32
|
<sup>Released on **2025-02-04**</sup>
|
package/README.ja-JP.md
CHANGED
@@ -302,14 +302,14 @@ LobeChat エージェントマーケットプレイスでは、クリエイタ
|
|
302
302
|
|
303
303
|
<!-- AGENT LIST -->
|
304
304
|
|
305
|
-
| 最近追加
|
306
|
-
|
|
307
|
-
| [
|
308
|
-
| [
|
309
|
-
| [
|
310
|
-
| [
|
311
|
-
|
312
|
-
> 📊 Total agents: [<kbd>**
|
305
|
+
| 最近追加 | 説明 |
|
306
|
+
| ------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------- |
|
307
|
+
| [鋭い評論家](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup> | 鋭い評論と深い問題分析が得意<br/>`評論` `社会的見解` `鋭い分析` |
|
308
|
+
| [Python の天才](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | 高度な Python コーダー<br/>`コード` `python` |
|
309
|
+
| [SAT マスター](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup> | 1300 点以上のスコアを目指すデジタル SAT コーチングの専門家<br/>`sat` `適性試験` |
|
310
|
+
| [宇宙の啓示者](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup> | 時空を超えた知恵の神託、生命の本質を洞察する<br/>`キャラクターデザイン` `aiキャラクター` `メタバース` `ロールプレイング` `知恵システム` |
|
311
|
+
|
312
|
+
> 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
|
313
313
|
|
314
314
|
<!-- AGENT LIST -->
|
315
315
|
|
package/README.md
CHANGED
@@ -319,14 +319,14 @@ Our marketplace is not just a showcase platform but also a collaborative space.
|
|
319
319
|
|
320
320
|
<!-- AGENT LIST -->
|
321
321
|
|
322
|
-
| Recent Submits
|
323
|
-
|
|
324
|
-
| [
|
325
|
-
| [
|
326
|
-
| [
|
327
|
-
| [
|
328
|
-
|
329
|
-
> 📊 Total agents: [<kbd>**
|
322
|
+
| Recent Submits | Description |
|
323
|
+
| ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
|
324
|
+
| [Sharp Commentator](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup> | Specializes in sharp commentary and in-depth analysis of issues<br/>`commentary` `social-perspectives` `sharp-analysis` |
|
325
|
+
| [Python Genius](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | An advanced python coder<br/>`code` `python` |
|
326
|
+
| [SAT master](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup> | Expert in Digital SAT coaching for 1300+ scores<br/>`sat` `aptitude-test` |
|
327
|
+
| [Cosmic Oracle](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup> | Wisdom from across time and space, insight into the essence of life<br/>`character-design` `ai-characters` `metaverse` `role-playing` `wisdom-system` |
|
328
|
+
|
329
|
+
> 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
|
330
330
|
|
331
331
|
<!-- AGENT LIST -->
|
332
332
|
|
package/README.zh-CN.md
CHANGED
@@ -308,14 +308,14 @@ LobeChat 的插件生态系统是其核心功能的重要扩展,它极大地
|
|
308
308
|
|
309
309
|
<!-- AGENT LIST -->
|
310
310
|
|
311
|
-
| 最近新增
|
312
|
-
|
|
313
|
-
| [
|
314
|
-
| [
|
315
|
-
| [
|
316
|
-
| [
|
317
|
-
|
318
|
-
> 📊 Total agents: [<kbd>**
|
311
|
+
| 最近新增 | 描述 |
|
312
|
+
| ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
|
313
|
+
| [锐评师](https://lobechat.com/discover/assistant/ruipingshi)<br/><sup>By **[Zippland](https://github.com/Zippland)** on **2025-02-04**</sup> | 擅长犀利点评与深度剖析问题<br/>`评论` `社会观点` `尖锐分析` |
|
314
|
+
| [Python 天才](https://lobechat.com/discover/assistant/python-genius)<br/><sup>By **[novaspivack](https://github.com/novaspivack)** on **2025-02-04**</sup> | 一名高级 Python 编程者<br/>`代码` `python` |
|
315
|
+
| [SAT 大师](https://lobechat.com/discover/assistant/sat-teaching)<br/><sup>By **[iBz-04](https://github.com/iBz-04)** on **2025-02-04**</sup> | 数字 SAT 辅导专家,帮助学生取得 1300 + 分数<br/>`sat` `能力测试` |
|
316
|
+
| [宇宙启示者](https://lobechat.com/discover/assistant/universal-god)<br/><sup>By **[GowayLee](https://github.com/GowayLee)** on **2025-02-04**</sup> | 跨时空的智慧神谕,洞悉生命本质<br/>`角色设计` `ai角色` `元宇宙` `角色扮演` `智慧系统` |
|
317
|
+
|
318
|
+
> 📊 Total agents: [<kbd>**478**</kbd> ](https://lobechat.com/discover/assistants)
|
319
319
|
|
320
320
|
<!-- AGENT LIST -->
|
321
321
|
|
package/changelog/v1.json
CHANGED
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@lobehub/chat",
|
3
|
-
"version": "1.50.
|
3
|
+
"version": "1.50.4",
|
4
4
|
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
|
5
5
|
"keywords": [
|
6
6
|
"framework",
|
@@ -14,6 +14,7 @@ import {
|
|
14
14
|
import { LobeChatDatabase } from '@/database/type';
|
15
15
|
import { ImportResult } from '@/services/config';
|
16
16
|
import { ImporterEntryData } from '@/types/importer';
|
17
|
+
import { sanitizeUTF8 } from '@/utils/sanitizeUTF8';
|
17
18
|
|
18
19
|
export class DataImporterRepos {
|
19
20
|
private userId: string;
|
@@ -204,9 +205,10 @@ export class DataImporterRepos {
|
|
204
205
|
// 2. insert messages
|
205
206
|
if (shouldInsertMessages.length > 0) {
|
206
207
|
const inertValues = shouldInsertMessages.map(
|
207
|
-
({ id, extra, createdAt, updatedAt, sessionId, topicId, ...res }) => ({
|
208
|
+
({ id, extra, createdAt, updatedAt, sessionId, topicId, content, ...res }) => ({
|
208
209
|
...res,
|
209
210
|
clientId: id,
|
211
|
+
content: sanitizeUTF8(content),
|
210
212
|
createdAt: new Date(createdAt),
|
211
213
|
model: extra?.fromModel,
|
212
214
|
parentId: null,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
|
2
2
|
|
3
3
|
export const PdfLoader = async (fileBlob: Blob) => {
|
4
|
-
const loader = new PDFLoader(fileBlob);
|
4
|
+
const loader = new PDFLoader(fileBlob, { splitPages: true });
|
5
5
|
|
6
6
|
return await loader.load();
|
7
7
|
};
|
@@ -24,6 +24,7 @@ import {
|
|
24
24
|
IAsyncTaskError,
|
25
25
|
} from '@/types/asyncTask';
|
26
26
|
import { safeParseJSON } from '@/utils/safeParseJSON';
|
27
|
+
import { sanitizeUTF8 } from '@/utils/sanitizeUTF8';
|
27
28
|
|
28
29
|
const fileProcedure = asyncAuthedProcedure.use(async (opts) => {
|
29
30
|
const { ctx } = opts;
|
@@ -95,16 +96,13 @@ export const fileRouter = router({
|
|
95
96
|
ctx.jwtPayload,
|
96
97
|
);
|
97
98
|
|
98
|
-
|
99
|
-
console.log(`执行第 ${number} 个任务`);
|
99
|
+
console.log(`run embedding task ${index + 1}`);
|
100
100
|
|
101
|
-
console.time(`任务[${number}]: embeddings`);
|
102
101
|
const embeddings = await agentRuntime.embeddings({
|
103
102
|
dimensions: 1024,
|
104
103
|
input: chunks.map((c) => c.text),
|
105
104
|
model,
|
106
105
|
});
|
107
|
-
console.timeEnd(`任务[${number}]: embeddings`);
|
108
106
|
|
109
107
|
const items: NewEmbeddingsItem[] =
|
110
108
|
embeddings?.map((e, idx) => ({
|
@@ -114,9 +112,7 @@ export const fileRouter = router({
|
|
114
112
|
model,
|
115
113
|
})) || [];
|
116
114
|
|
117
|
-
console.time(`任务[${number}]: insert db`);
|
118
115
|
await ctx.embeddingModel.bulkCreate(items);
|
119
|
-
console.timeEnd(`任务[${number}]: insert db`);
|
120
116
|
},
|
121
117
|
{ concurrency: CONCURRENCY },
|
122
118
|
);
|
@@ -215,7 +211,11 @@ export const fileRouter = router({
|
|
215
211
|
|
216
212
|
// after finish partition, we need to filter out some elements
|
217
213
|
const chunks = chunkResult.chunks.map(
|
218
|
-
(item): NewChunkItem => ({
|
214
|
+
({ text, ...item }): NewChunkItem => ({
|
215
|
+
...item,
|
216
|
+
text: text ? sanitizeUTF8(text) : '',
|
217
|
+
userId: ctx.userId,
|
218
|
+
}),
|
219
219
|
);
|
220
220
|
|
221
221
|
const duration = Date.now() - startAt;
|
@@ -0,0 +1,23 @@
|
|
1
|
+
import { sanitizeUTF8 } from './sanitizeUTF8';
|
2
|
+
|
3
|
+
describe('UTF-8 Sanitization', () => {
|
4
|
+
it('should handle null bytes', () => {
|
5
|
+
const input = 'test\u0000string';
|
6
|
+
expect(sanitizeUTF8(input)).toBe('teststring');
|
7
|
+
});
|
8
|
+
|
9
|
+
it('should handle invalid UTF-8 sequences', () => {
|
10
|
+
const input = 'test\uD800string'; // 未配对的代理项
|
11
|
+
expect(sanitizeUTF8(input)).toBe('teststring');
|
12
|
+
});
|
13
|
+
|
14
|
+
it('should handle invalid UTF-8 content', () => {
|
15
|
+
const input = '\u0002\u0000\u0000\u0002�{\\"error\\":{\\"code\\":\\"resource_exhausted\\",';
|
16
|
+
expect(sanitizeUTF8(input)).toBe('{\\"error\\":{\\"code\\":\\"resource_exhausted\\",');
|
17
|
+
});
|
18
|
+
|
19
|
+
it('should preserve valid UTF-8 characters', () => {
|
20
|
+
const input = '你好,世界!';
|
21
|
+
expect(sanitizeUTF8(input)).toBe('你好,世界!');
|
22
|
+
});
|
23
|
+
});
|
@@ -0,0 +1,14 @@
|
|
1
|
+
/**
|
2
|
+
* Sanitize UTF-8 string to remove all control characters and invalid code points.
|
3
|
+
* @param str
|
4
|
+
*/
|
5
|
+
export const sanitizeUTF8 = (str: string) => {
|
6
|
+
// 移除替换字符 (0xFFFD) 和其他非法字符
|
7
|
+
return (
|
8
|
+
str
|
9
|
+
.replaceAll('�', '') // 移除 Unicode 替换字符
|
10
|
+
// eslint-disable-next-line no-control-regex
|
11
|
+
.replaceAll(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, '') // 移除控制字符
|
12
|
+
.replaceAll(/[\uD800-\uDFFF]/g, '')
|
13
|
+
); // 移除未配对的代理项码点
|
14
|
+
};
|