@lobehub/chat 1.68.8 → 1.68.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/changelog/v1.json +9 -0
- package/package.json +3 -1
- package/src/database/client/migrations.json +8 -3
- package/src/libs/langchain/loaders/epub/__tests__/__snapshots__/index.test.ts.snap +238 -0
- package/src/libs/langchain/loaders/epub/__tests__/demo.epub +0 -0
- package/src/libs/langchain/loaders/epub/__tests__/index.test.ts +24 -0
- package/src/libs/langchain/loaders/epub/index.ts +21 -0
- package/src/libs/langchain/loaders/index.ts +9 -0
- package/src/libs/langchain/types.ts +2 -1
- package/src/server/utils/tempFileManager.ts +70 -0
package/CHANGELOG.md
CHANGED
@@ -2,6 +2,31 @@
|
|
2
2
|
|
3
3
|
# Changelog
|
4
4
|
|
5
|
+
### [Version 1.68.9](https://github.com/lobehub/lobe-chat/compare/v1.68.8...v1.68.9)
|
6
|
+
|
7
|
+
<sup>Released on **2025-03-05**</sup>
|
8
|
+
|
9
|
+
#### 💄 Styles
|
10
|
+
|
11
|
+
- **misc**: Add epub file chunk split support.
|
12
|
+
|
13
|
+
<br/>
|
14
|
+
|
15
|
+
<details>
|
16
|
+
<summary><kbd>Improvements and Fixes</kbd></summary>
|
17
|
+
|
18
|
+
#### Styles
|
19
|
+
|
20
|
+
- **misc**: Add epub file chunk split support, closes [#6317](https://github.com/lobehub/lobe-chat/issues/6317) ([a79ab7a](https://github.com/lobehub/lobe-chat/commit/a79ab7a))
|
21
|
+
|
22
|
+
</details>
|
23
|
+
|
24
|
+
<div align="right">
|
25
|
+
|
26
|
+
[](#readme-top)
|
27
|
+
|
28
|
+
</div>
|
29
|
+
|
5
30
|
### [Version 1.68.8](https://github.com/lobehub/lobe-chat/compare/v1.68.7...v1.68.8)
|
6
31
|
|
7
32
|
<sup>Released on **2025-03-05**</sup>
|
package/changelog/v1.json
CHANGED
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@lobehub/chat",
|
3
|
-
"version": "1.68.
|
3
|
+
"version": "1.68.9",
|
4
4
|
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
|
5
5
|
"keywords": [
|
6
6
|
"framework",
|
@@ -162,10 +162,12 @@
|
|
162
162
|
"diff": "^7.0.0",
|
163
163
|
"drizzle-orm": "^0.40.0",
|
164
164
|
"drizzle-zod": "^0.5.1",
|
165
|
+
"epub2": "^3.0.2",
|
165
166
|
"fast-deep-equal": "^3.1.3",
|
166
167
|
"file-type": "^20.0.0",
|
167
168
|
"framer-motion": "^11.16.0",
|
168
169
|
"gpt-tokenizer": "^2.8.1",
|
170
|
+
"html-to-text": "^9.0.5",
|
169
171
|
"i18next": "^24.2.1",
|
170
172
|
"i18next-browser-languagedetector": "^8.0.2",
|
171
173
|
"i18next-resources-to-backend": "^1.2.1",
|
@@ -223,7 +223,10 @@
|
|
223
223
|
"hash": "9646161fa041354714f823d726af27247bcd6e60fa3be5698c0d69f337a5700b"
|
224
224
|
},
|
225
225
|
{
|
226
|
-
"sql": [
|
226
|
+
"sql": [
|
227
|
+
"DROP TABLE \"user_budgets\";",
|
228
|
+
"\nDROP TABLE \"user_subscriptions\";"
|
229
|
+
],
|
227
230
|
"bps": true,
|
228
231
|
"folderMillis": 1729699958471,
|
229
232
|
"hash": "7dad43a2a25d1aec82124a4e53f8d82f8505c3073f23606c1dc5d2a4598eacf9"
|
@@ -295,7 +298,9 @@
|
|
295
298
|
"hash": "845a692ceabbfc3caf252a97d3e19a213bc0c433df2689900135f9cfded2cf49"
|
296
299
|
},
|
297
300
|
{
|
298
|
-
"sql": [
|
301
|
+
"sql": [
|
302
|
+
"ALTER TABLE \"messages\" ADD COLUMN \"reasoning\" jsonb;"
|
303
|
+
],
|
299
304
|
"bps": true,
|
300
305
|
"folderMillis": 1737609172353,
|
301
306
|
"hash": "2cb36ae4fcdd7b7064767e04bfbb36ae34518ff4bb1b39006f2dd394d1893868"
|
@@ -309,4 +314,4 @@
|
|
309
314
|
"folderMillis": 1739901891891,
|
310
315
|
"hash": "78d8fefd8c58938d7bc3da2295a73b35ce2e8d7cb2820f8e817acdb8dd5bebb2"
|
311
316
|
}
|
312
|
-
]
|
317
|
+
]
|
@@ -0,0 +1,238 @@
|
|
1
|
+
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
2
|
+
|
3
|
+
exports[`EPubLoader > should run 1`] = `
|
4
|
+
[
|
5
|
+
Document {
|
6
|
+
"id": undefined,
|
7
|
+
"metadata": {
|
8
|
+
"loc": {
|
9
|
+
"lines": {
|
10
|
+
"from": 1,
|
11
|
+
"to": 13,
|
12
|
+
},
|
13
|
+
},
|
14
|
+
"source": "",
|
15
|
+
},
|
16
|
+
"pageContent": "HEFTY WATER
|
17
|
+
|
18
|
+
This document serves to test Reading System support for the epub:switch
|
19
|
+
[http://idpf.org/epub/30/spec/epub30-contentdocs.html#sec-xhtml-content-switch]
|
20
|
+
element. There is also a little bit of ruby markup
|
21
|
+
[http://www.w3.org/TR/html5/the-ruby-element.html#the-ruby-element] available.
|
22
|
+
|
23
|
+
|
24
|
+
THE SWITCH
|
25
|
+
|
26
|
+
Below is an instance of the epub:switch element, containing Chemical Markup
|
27
|
+
Language [http://en.wikipedia.org/wiki/Chemical_Markup_Language] (CML). The
|
28
|
+
fallback content is a chunk of plain XHTML5.",
|
29
|
+
},
|
30
|
+
Document {
|
31
|
+
"id": undefined,
|
32
|
+
"metadata": {
|
33
|
+
"loc": {
|
34
|
+
"lines": {
|
35
|
+
"from": 9,
|
36
|
+
"to": 22,
|
37
|
+
},
|
38
|
+
},
|
39
|
+
"source": "",
|
40
|
+
},
|
41
|
+
"pageContent": "THE SWITCH
|
42
|
+
|
43
|
+
Below is an instance of the epub:switch element, containing Chemical Markup
|
44
|
+
Language [http://en.wikipedia.org/wiki/Chemical_Markup_Language] (CML). The
|
45
|
+
fallback content is a chunk of plain XHTML5.
|
46
|
+
|
47
|
+
* If your Reading System supports epub:switch and CML, it will render the CML
|
48
|
+
formula natively, and ignore (a.k.a not display) the XHTML fallback.
|
49
|
+
* If your Reading System supports epub:switch but not CML, it will ignore (not
|
50
|
+
display) the CML formula, and render the the XHTML fallback instead.
|
51
|
+
* If your Reading System does not support epub:switch at all, then the
|
52
|
+
rendering results are somewhat unpredictable, but the most likely result is
|
53
|
+
that it will display both a failed attempt to render the CML and the XHTML
|
54
|
+
fallback.",
|
55
|
+
},
|
56
|
+
Document {
|
57
|
+
"id": undefined,
|
58
|
+
"metadata": {
|
59
|
+
"loc": {
|
60
|
+
"lines": {
|
61
|
+
"from": 24,
|
62
|
+
"to": 43,
|
63
|
+
},
|
64
|
+
},
|
65
|
+
"source": "",
|
66
|
+
},
|
67
|
+
"pageContent": "Note: the XHTML fallback is bold and enclosed in a gray dotted box with a
|
68
|
+
slightly gray background. A failed CML rendering will most likely appear above
|
69
|
+
the gray fallback box and read:
|
70
|
+
"H hydrogen O oxygen hefty H O water".
|
71
|
+
|
72
|
+
Here the switch begins...
|
73
|
+
|
74
|
+
|
75
|
+
H hydrogen O oxygen hefty H O water
|
76
|
+
|
77
|
+
2H2 + O2 ⟶ 2H2O
|
78
|
+
|
79
|
+
... and here the switch ends.
|
80
|
+
|
81
|
+
|
82
|
+
THE SOURCE
|
83
|
+
|
84
|
+
Below is a rendition of the source code of the switch element. Your Reading
|
85
|
+
System should display this correctly regardless of whether it supports the
|
86
|
+
switch element.",
|
87
|
+
},
|
88
|
+
Document {
|
89
|
+
"id": undefined,
|
90
|
+
"metadata": {
|
91
|
+
"loc": {
|
92
|
+
"lines": {
|
93
|
+
"from": 46,
|
94
|
+
"to": 66,
|
95
|
+
},
|
96
|
+
},
|
97
|
+
"source": "",
|
98
|
+
},
|
99
|
+
"pageContent": "<switch xmlns="http://www.idpf.org/2007/ops">
|
100
|
+
<case required-namespace="http://www.xml-cml.org/schema">
|
101
|
+
<chem xmlns="http://www.xml-cml.org/schema">
|
102
|
+
<reaction>
|
103
|
+
<molecule n="2">
|
104
|
+
<atom n="2"> H </atom>
|
105
|
+
<caption> hydrogen </caption>
|
106
|
+
</molecule>
|
107
|
+
<plus></plus>
|
108
|
+
<molecule>
|
109
|
+
<atom n="2"> O </atom>
|
110
|
+
<caption> oxygen </caption>
|
111
|
+
</molecule>
|
112
|
+
<gives>
|
113
|
+
<caption> hefty </caption>
|
114
|
+
</gives>
|
115
|
+
<molecule n="2">
|
116
|
+
<atom n="2"> H </atom>
|
117
|
+
<atom> O </atom>
|
118
|
+
<caption> water </caption>
|
119
|
+
</molecule>",
|
120
|
+
},
|
121
|
+
Document {
|
122
|
+
"id": undefined,
|
123
|
+
"metadata": {
|
124
|
+
"loc": {
|
125
|
+
"lines": {
|
126
|
+
"from": 57,
|
127
|
+
"to": 79,
|
128
|
+
},
|
129
|
+
},
|
130
|
+
"source": "",
|
131
|
+
},
|
132
|
+
"pageContent": "<caption> oxygen </caption>
|
133
|
+
</molecule>
|
134
|
+
<gives>
|
135
|
+
<caption> hefty </caption>
|
136
|
+
</gives>
|
137
|
+
<molecule n="2">
|
138
|
+
<atom n="2"> H </atom>
|
139
|
+
<atom> O </atom>
|
140
|
+
<caption> water </caption>
|
141
|
+
</molecule>
|
142
|
+
</reaction>
|
143
|
+
</chem>
|
144
|
+
</case>
|
145
|
+
<default>
|
146
|
+
<p xmlns="http://www.w3.org/1999/xhtml" id="fallback">
|
147
|
+
<span>2H<sub>2</sub></span>
|
148
|
+
<span>+</span>
|
149
|
+
<span>O<sub>2</sub></span>
|
150
|
+
<span>⟶</span>
|
151
|
+
<span>2H<sub>2</sub>O</span>
|
152
|
+
</p>
|
153
|
+
</default>
|
154
|
+
</switch>",
|
155
|
+
},
|
156
|
+
Document {
|
157
|
+
"id": undefined,
|
158
|
+
"metadata": {
|
159
|
+
"loc": {
|
160
|
+
"lines": {
|
161
|
+
"from": 84,
|
162
|
+
"to": 94,
|
163
|
+
},
|
164
|
+
},
|
165
|
+
"source": "",
|
166
|
+
},
|
167
|
+
"pageContent": "HEFTY RUBY WATER
|
168
|
+
|
169
|
+
While the ruby element is mostly used in east-asian languages, it can also be
|
170
|
+
useful in other contexts. As an example, and as you can see in the source of the
|
171
|
+
CML element above, the code includes a caption element which is intended to be
|
172
|
+
displayed below the formula segments. Following this paragraph is a reworked
|
173
|
+
version of the XHTML fallback used above, using the ruby element. If your
|
174
|
+
Reading System does not support ruby markup, then the captions will appear in
|
175
|
+
parentheses on the same line as the formula segments.
|
176
|
+
|
177
|
+
2H2(hydrogen) + O2(oxygen) ⟶(hefty) 2H2O(water)",
|
178
|
+
},
|
179
|
+
Document {
|
180
|
+
"id": undefined,
|
181
|
+
"metadata": {
|
182
|
+
"loc": {
|
183
|
+
"lines": {
|
184
|
+
"from": 94,
|
185
|
+
"to": 111,
|
186
|
+
},
|
187
|
+
},
|
188
|
+
"source": "",
|
189
|
+
},
|
190
|
+
"pageContent": "2H2(hydrogen) + O2(oxygen) ⟶(hefty) 2H2O(water)
|
191
|
+
|
192
|
+
If your Reading System in addition to supporting ruby markup also supports the
|
193
|
+
-epub-ruby-position
|
194
|
+
[http://idpf.org/epub/30/spec/epub30-contentdocs.html#sec-css-ruby-position]
|
195
|
+
property, then the captions will appear under the formula segments instead of
|
196
|
+
over them.
|
197
|
+
|
198
|
+
The source code for the ruby version of the XHTML fallback looks as follows:
|
199
|
+
|
200
|
+
|
201
|
+
<p id="rubyp">
|
202
|
+
<ruby>2H<sub>2</sub><rp>(</rp><rt>hydrogen</rt><rp>)</rp></ruby>
|
203
|
+
<span>+</span>
|
204
|
+
<ruby>O<sub>2</sub><rp>(</rp><rt>oxygen</rt><rp>)</rp></ruby>
|
205
|
+
<ruby>⟶<rp>(</rp><rt>hefty</rt><rp>)</rp></ruby>
|
206
|
+
<ruby>2H<sub>2</sub>O<rp>(</rp><rt>water</rt><rp>)</rp></ruby>
|
207
|
+
</p>",
|
208
|
+
},
|
209
|
+
Document {
|
210
|
+
"id": undefined,
|
211
|
+
"metadata": {
|
212
|
+
"loc": {
|
213
|
+
"lines": {
|
214
|
+
"from": 105,
|
215
|
+
"to": 120,
|
216
|
+
},
|
217
|
+
},
|
218
|
+
"source": "",
|
219
|
+
},
|
220
|
+
"pageContent": "<p id="rubyp">
|
221
|
+
<ruby>2H<sub>2</sub><rp>(</rp><rt>hydrogen</rt><rp>)</rp></ruby>
|
222
|
+
<span>+</span>
|
223
|
+
<ruby>O<sub>2</sub><rp>(</rp><rt>oxygen</rt><rp>)</rp></ruby>
|
224
|
+
<ruby>⟶<rp>(</rp><rt>hefty</rt><rp>)</rp></ruby>
|
225
|
+
<ruby>2H<sub>2</sub>O<rp>(</rp><rt>water</rt><rp>)</rp></ruby>
|
226
|
+
</p>
|
227
|
+
|
228
|
+
|
229
|
+
... and the css declaration using the -epub-ruby-position property looks like
|
230
|
+
this:
|
231
|
+
|
232
|
+
|
233
|
+
p#rubyp {
|
234
|
+
-epub-ruby-position : under;
|
235
|
+
}",
|
236
|
+
},
|
237
|
+
]
|
238
|
+
`;
|
Binary file
|
@@ -0,0 +1,24 @@
|
|
1
|
+
// @vitest-environment node
|
2
|
+
import * as fs from 'node:fs';
|
3
|
+
import { join } from 'node:path';
|
4
|
+
import { expect } from 'vitest';
|
5
|
+
|
6
|
+
import { EPubLoader } from '../index';
|
7
|
+
|
8
|
+
function sanitizeDynamicFields(document: any[]) {
|
9
|
+
for (const doc of document) {
|
10
|
+
doc.metadata.source && (doc.metadata.source = '');
|
11
|
+
}
|
12
|
+
return document;
|
13
|
+
}
|
14
|
+
|
15
|
+
describe('EPubLoader', () => {
|
16
|
+
it('should run', async () => {
|
17
|
+
const content = fs.readFileSync(join(__dirname, `./demo.epub`));
|
18
|
+
|
19
|
+
const fileContent: Uint8Array = new Uint8Array(content);
|
20
|
+
|
21
|
+
const data = await EPubLoader(fileContent);
|
22
|
+
expect(sanitizeDynamicFields(data)).toMatchSnapshot();
|
23
|
+
});
|
24
|
+
});
|
@@ -0,0 +1,21 @@
|
|
1
|
+
import { EPubLoader as Loader } from '@langchain/community/document_loaders/fs/epub';
|
2
|
+
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
3
|
+
import { loaderConfig } from '../config';
|
4
|
+
import { TempFileManager } from '@/server/utils/tempFileManager';
|
5
|
+
|
6
|
+
export const EPubLoader = async (content: Uint8Array) => {
|
7
|
+
const tempManager = new TempFileManager();
|
8
|
+
try {
|
9
|
+
const tempPath = await tempManager.writeTempFile(content);
|
10
|
+
const loader = new Loader(tempPath);
|
11
|
+
const documents = await loader.load();
|
12
|
+
|
13
|
+
const splitter = new RecursiveCharacterTextSplitter(loaderConfig);
|
14
|
+
return await splitter.splitDocuments(documents);
|
15
|
+
} catch (e) {
|
16
|
+
throw new Error(`EPubLoader error: ${(e as Error).message}`);
|
17
|
+
} finally {
|
18
|
+
tempManager.cleanup(); // 确保清理
|
19
|
+
}
|
20
|
+
|
21
|
+
};
|
@@ -14,6 +14,7 @@ import { MarkdownLoader } from './markdown';
|
|
14
14
|
import { PdfLoader } from './pdf';
|
15
15
|
import { PPTXLoader } from './pptx';
|
16
16
|
import { TextLoader } from './txt';
|
17
|
+
import { EPubLoader } from './epub';
|
17
18
|
|
18
19
|
class LangChainError extends Error {
|
19
20
|
constructor(message: string) {
|
@@ -64,6 +65,10 @@ export class ChunkingLoader {
|
|
64
65
|
return await CsVLoader(fileBlob);
|
65
66
|
}
|
66
67
|
|
68
|
+
case 'epub': {
|
69
|
+
return await EPubLoader(content);
|
70
|
+
}
|
71
|
+
|
67
72
|
default: {
|
68
73
|
throw new Error(
|
69
74
|
`Unsupported file type [${type}], please check your file is supported, or create report issue here: https://github.com/lobehub/lobe-chat/discussions/3550`,
|
@@ -100,6 +105,10 @@ export class ChunkingLoader {
|
|
100
105
|
return 'csv';
|
101
106
|
}
|
102
107
|
|
108
|
+
if (filename.endsWith('epub')) {
|
109
|
+
return 'epub';
|
110
|
+
}
|
111
|
+
|
103
112
|
const ext = filename.split('.').pop();
|
104
113
|
|
105
114
|
if (ext && SupportedTextSplitterLanguages.includes(ext as SupportedTextSplitterLanguage)) {
|
@@ -0,0 +1,70 @@
|
|
1
|
+
import { mkdtempSync, rmSync , writeFileSync, existsSync } from 'node:fs';
|
2
|
+
import { tmpdir } from 'node:os';
|
3
|
+
import { join } from 'node:path';
|
4
|
+
import { v4 as uuidv4 } from 'uuid';
|
5
|
+
|
6
|
+
/**
|
7
|
+
* 安全存储临时文件工具类
|
8
|
+
*/
|
9
|
+
export class TempFileManager {
|
10
|
+
private readonly tempDir: string;
|
11
|
+
private filePaths: Set<string> = new Set();
|
12
|
+
|
13
|
+
constructor() {
|
14
|
+
// 创建唯一临时目录 (跨平台安全)
|
15
|
+
this.tempDir = mkdtempSync(join(tmpdir(), 'epub-'));
|
16
|
+
// 注册退出清理钩子
|
17
|
+
this.registerCleanupHook();
|
18
|
+
}
|
19
|
+
|
20
|
+
/**
|
21
|
+
* 将 Uint8Array 写入临时文件
|
22
|
+
* @param data 文件数据
|
23
|
+
* @param ext 文件扩展名 (默认 .epub)
|
24
|
+
* @returns 临时文件绝对路径
|
25
|
+
*/
|
26
|
+
async writeTempFile(data: Uint8Array, ext = '.epub'): Promise<string> {
|
27
|
+
const filePath = join(this.tempDir, `${uuidv4()}${ext}`);
|
28
|
+
|
29
|
+
try {
|
30
|
+
writeFileSync(filePath, data);
|
31
|
+
this.filePaths.add(filePath);
|
32
|
+
return filePath;
|
33
|
+
} catch (error) {
|
34
|
+
this.cleanup(); // 写入失败时立即清理
|
35
|
+
throw new Error(`Failed to write temp file: ${(error as Error).message}`);
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
/**
|
40
|
+
* 安全清理临时资源
|
41
|
+
*/
|
42
|
+
cleanup(): void {
|
43
|
+
if (existsSync(this.tempDir)) {
|
44
|
+
// 递归删除目录及内容
|
45
|
+
rmSync(this.tempDir, { force: true, recursive: true });
|
46
|
+
this.filePaths.clear();
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
/**
|
51
|
+
* 注册进程退出/异常时的自动清理
|
52
|
+
*/
|
53
|
+
private registerCleanupHook(): void {
|
54
|
+
// 正常退出
|
55
|
+
process.on('exit', () => this.cleanup());
|
56
|
+
// 异常退出
|
57
|
+
process.on('uncaughtException', (err) => {
|
58
|
+
console.error('Uncaught exception, cleaning temp files:', err);
|
59
|
+
this.cleanup();
|
60
|
+
process.exit(1);
|
61
|
+
});
|
62
|
+
// 信号终止
|
63
|
+
['SIGINT', 'SIGTERM'].forEach((signal) => {
|
64
|
+
process.on(signal, () => {
|
65
|
+
this.cleanup();
|
66
|
+
process.exit(0);
|
67
|
+
});
|
68
|
+
});
|
69
|
+
}
|
70
|
+
}
|