@lobehub/chat 1.81.4 → 1.81.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.js +1 -0
- package/.github/workflows/release.yml +5 -0
- package/.github/workflows/test.yml +5 -0
- package/CHANGELOG.md +50 -0
- package/changelog/v1.json +18 -0
- package/locales/ar/auth.json +1 -1
- package/locales/ar/hotkey.json +4 -0
- package/locales/ar/models.json +3 -0
- package/locales/bg-BG/auth.json +1 -1
- package/locales/bg-BG/hotkey.json +4 -0
- package/locales/bg-BG/models.json +3 -0
- package/locales/de-DE/auth.json +1 -1
- package/locales/de-DE/hotkey.json +4 -0
- package/locales/de-DE/models.json +3 -0
- package/locales/en-US/auth.json +1 -1
- package/locales/en-US/hotkey.json +4 -0
- package/locales/en-US/models.json +3 -0
- package/locales/es-ES/auth.json +1 -1
- package/locales/es-ES/hotkey.json +4 -0
- package/locales/es-ES/models.json +3 -0
- package/locales/fa-IR/auth.json +1 -1
- package/locales/fa-IR/hotkey.json +4 -0
- package/locales/fa-IR/models.json +3 -0
- package/locales/fr-FR/auth.json +1 -1
- package/locales/fr-FR/hotkey.json +4 -0
- package/locales/fr-FR/models.json +3 -0
- package/locales/it-IT/auth.json +1 -1
- package/locales/it-IT/hotkey.json +4 -0
- package/locales/it-IT/models.json +3 -0
- package/locales/ja-JP/auth.json +1 -1
- package/locales/ja-JP/hotkey.json +4 -0
- package/locales/ja-JP/models.json +3 -0
- package/locales/ko-KR/auth.json +1 -1
- package/locales/ko-KR/hotkey.json +4 -0
- package/locales/ko-KR/models.json +3 -0
- package/locales/nl-NL/auth.json +1 -1
- package/locales/nl-NL/hotkey.json +4 -0
- package/locales/nl-NL/models.json +3 -0
- package/locales/pl-PL/auth.json +1 -1
- package/locales/pl-PL/hotkey.json +4 -0
- package/locales/pl-PL/models.json +3 -0
- package/locales/pt-BR/auth.json +1 -1
- package/locales/pt-BR/hotkey.json +4 -0
- package/locales/pt-BR/models.json +3 -0
- package/locales/ru-RU/auth.json +1 -1
- package/locales/ru-RU/hotkey.json +4 -0
- package/locales/ru-RU/models.json +3 -0
- package/locales/tr-TR/auth.json +1 -1
- package/locales/tr-TR/hotkey.json +4 -0
- package/locales/tr-TR/models.json +3 -0
- package/locales/vi-VN/auth.json +1 -1
- package/locales/vi-VN/hotkey.json +4 -0
- package/locales/vi-VN/models.json +3 -0
- package/locales/zh-CN/auth.json +1 -1
- package/locales/zh-CN/changelog.json +1 -1
- package/locales/zh-CN/clerk.json +1 -1
- package/locales/zh-CN/discover.json +1 -1
- package/locales/zh-CN/file.json +1 -1
- package/locales/zh-CN/hotkey.json +4 -0
- package/locales/zh-CN/knowledgeBase.json +1 -1
- package/locales/zh-CN/metadata.json +1 -1
- package/locales/zh-CN/migration.json +1 -1
- package/locales/zh-CN/models.json +3 -0
- package/locales/zh-CN/ragEval.json +1 -1
- package/locales/zh-CN/thread.json +1 -1
- package/locales/zh-CN/welcome.json +1 -1
- package/locales/zh-TW/auth.json +1 -1
- package/locales/zh-TW/hotkey.json +4 -0
- package/locales/zh-TW/models.json +3 -0
- package/package.json +6 -4
- package/packages/file-loaders/README.md +63 -0
- package/packages/file-loaders/package.json +42 -0
- package/packages/file-loaders/src/index.ts +2 -0
- package/packages/file-loaders/src/loadFile.ts +206 -0
- package/packages/file-loaders/src/loaders/docx/__snapshots__/index.test.ts.snap +74 -0
- package/packages/file-loaders/src/loaders/docx/fixtures/test.docx +0 -0
- package/packages/file-loaders/src/loaders/docx/index.test.ts +41 -0
- package/packages/file-loaders/src/loaders/docx/index.ts +73 -0
- package/packages/file-loaders/src/loaders/excel/__snapshots__/index.test.ts.snap +58 -0
- package/packages/file-loaders/src/loaders/excel/fixtures/test.xlsx +0 -0
- package/packages/file-loaders/src/loaders/excel/index.test.ts +47 -0
- package/packages/file-loaders/src/loaders/excel/index.ts +121 -0
- package/packages/file-loaders/src/loaders/index.ts +19 -0
- package/packages/file-loaders/src/loaders/pdf/__snapshots__/index.test.ts.snap +98 -0
- package/packages/file-loaders/src/loaders/pdf/index.test.ts +49 -0
- package/packages/file-loaders/src/loaders/pdf/index.ts +133 -0
- package/packages/file-loaders/src/loaders/pptx/__snapshots__/index.test.ts.snap +40 -0
- package/packages/file-loaders/src/loaders/pptx/fixtures/test.pptx +0 -0
- package/packages/file-loaders/src/loaders/pptx/index.test.ts +47 -0
- package/packages/file-loaders/src/loaders/pptx/index.ts +186 -0
- package/packages/file-loaders/src/loaders/text/__snapshots__/index.test.ts.snap +15 -0
- package/packages/file-loaders/src/loaders/text/fixtures/test.txt +2 -0
- package/packages/file-loaders/src/loaders/text/index.test.ts +38 -0
- package/packages/file-loaders/src/loaders/text/index.ts +53 -0
- package/packages/file-loaders/src/types.ts +200 -0
- package/packages/file-loaders/src/utils/isTextReadableFile.ts +68 -0
- package/packages/file-loaders/src/utils/parser-utils.ts +112 -0
- package/packages/file-loaders/test/__snapshots__/loaders.test.ts.snap +93 -0
- package/packages/file-loaders/test/fixtures/test.csv +4 -0
- package/packages/file-loaders/test/fixtures/test.docx +0 -0
- package/packages/file-loaders/test/fixtures/test.epub +0 -0
- package/packages/file-loaders/test/fixtures/test.md +3 -0
- package/packages/file-loaders/test/fixtures/test.pptx +0 -0
- package/packages/file-loaders/test/fixtures/test.txt +3 -0
- package/packages/file-loaders/test/loaders.test.ts +39 -0
- package/src/config/aiModels/github.ts +2 -4
- package/src/config/aiModels/google.ts +3 -4
- package/src/config/aiModels/sensenova.ts +4 -5
- package/src/const/hotkeys.ts +6 -0
- package/src/features/ChatInput/ActionBar/Clear.tsx +18 -8
- package/src/hooks/useHotkeys/chatScope.ts +7 -0
- package/src/libs/agent-runtime/google/index.ts +1 -1
- package/src/libs/agent-runtime/sensenova/index.ts +20 -27
- package/src/libs/agent-runtime/utils/sensenovaHelpers.test.ts +24 -33
- package/src/libs/agent-runtime/utils/sensenovaHelpers.ts +2 -3
- package/src/locales/default/hotkey.ts +4 -0
- package/src/server/modules/MCPClient/__tests__/__snapshots__/index.test.ts.snap +113 -0
- package/src/server/modules/MCPClient/__tests__/index.test.ts +81 -0
- package/src/server/modules/MCPClient/index.ts +80 -0
- package/src/types/hotkey.ts +1 -0
package/locales/zh-CN/clerk.json
CHANGED
package/locales/zh-CN/file.json
CHANGED
@@ -983,6 +983,9 @@
|
|
983
983
|
"gemini-2.0-pro-exp-02-05": {
|
984
984
|
"description": "Gemini 2.0 Pro Experimental 是 Google 最新的实验性多模态AI模型,与历史版本相比有一定的质量提升,特别是对于世界知识、代码和长上下文。"
|
985
985
|
},
|
986
|
+
"gemini-2.5-flash-preview-04-17": {
|
987
|
+
"description": "Gemini 2.5 Flash Preview 是 Google 性价比最高的模型,提供全面的功能。"
|
988
|
+
},
|
986
989
|
"gemini-2.5-pro-exp-03-25": {
|
987
990
|
"description": "Gemini 2.5 Pro Experimental 是 Google 最先进的思维模型,能够对代码、数学和STEM领域的复杂问题进行推理,以及使用长上下文分析大型数据集、代码库和文档。"
|
988
991
|
},
|
package/locales/zh-TW/auth.json
CHANGED
@@ -983,6 +983,9 @@
|
|
983
983
|
"gemini-2.0-pro-exp-02-05": {
|
984
984
|
"description": "Gemini 2.0 Pro Experimental 是 Google 最新的實驗性多模態AI模型,與歷史版本相比有一定的質量提升,特別是對於世界知識、代碼和長上下文。"
|
985
985
|
},
|
986
|
+
"gemini-2.5-flash-preview-04-17": {
|
987
|
+
"description": "Gemini 2.5 Flash Preview 是 Google 性價比最高的模型,提供全面的功能。"
|
988
|
+
},
|
986
989
|
"gemini-2.5-pro-exp-03-25": {
|
987
990
|
"description": "Gemini 2.5 Pro 實驗版是 Google 最先進的思維模型,能夠對代碼、數學和 STEM 領域的複雜問題進行推理,還能利用長上下文來分析大型數據集、代碼庫和文檔。"
|
988
991
|
},
|
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@lobehub/chat",
|
3
|
-
"version": "1.81.
|
3
|
+
"version": "1.81.6",
|
4
4
|
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
|
5
5
|
"keywords": [
|
6
6
|
"framework",
|
@@ -52,7 +52,7 @@
|
|
52
52
|
"desktop:prepare-dist": "tsx scripts/electronWorkflow/moveNextStandalone.ts",
|
53
53
|
"dev": "next dev --turbopack -p 3010",
|
54
54
|
"dev:desktop": "next dev --turbopack -p 3015",
|
55
|
-
"docs:i18n": "lobe-i18n md && npm run lint:md && npm run lint:mdx",
|
55
|
+
"docs:i18n": "lobe-i18n md && npm run lint:md && npm run lint:mdx && prettier -c --write locales/**/*",
|
56
56
|
"docs:seo": "lobe-seo && npm run lint:mdx",
|
57
57
|
"i18n": "npm run workflow:i18n && lobe-i18n",
|
58
58
|
"lint": "npm run lint:ts && npm run lint:style && npm run type-check && npm run lint:circular",
|
@@ -146,7 +146,8 @@
|
|
146
146
|
"@lobehub/chat-plugins-gateway": "^1.9.0",
|
147
147
|
"@lobehub/icons": "^1.95.0",
|
148
148
|
"@lobehub/tts": "^1.28.3",
|
149
|
-
"@lobehub/ui": "^1.
|
149
|
+
"@lobehub/ui": "^1.171.0",
|
150
|
+
"@modelcontextprotocol/sdk": "^1.10.1",
|
150
151
|
"@neondatabase/serverless": "^1.0.0",
|
151
152
|
"@next/third-parties": "^15.3.0",
|
152
153
|
"@react-spring/web": "^9.7.5",
|
@@ -200,7 +201,7 @@
|
|
200
201
|
"modern-screenshot": "^4.6.0",
|
201
202
|
"nanoid": "^5.1.5",
|
202
203
|
"next": "^15.3.0",
|
203
|
-
"next-auth": "beta",
|
204
|
+
"next-auth": "5.0.0-beta.25",
|
204
205
|
"next-mdx-remote": "^5.0.0",
|
205
206
|
"nextjs-toploader": "^3.8.16",
|
206
207
|
"numeral": "^2.0.6",
|
@@ -324,6 +325,7 @@
|
|
324
325
|
"lodash": "^4.17.21",
|
325
326
|
"markdown-table": "^3.0.4",
|
326
327
|
"markdown-to-txt": "^2.0.1",
|
328
|
+
"mcp-hello-world": "^1.1.2",
|
327
329
|
"mime": "^4.0.6",
|
328
330
|
"node-fetch": "^3.3.2",
|
329
331
|
"node-gyp": "^11.2.0",
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# @lobehub/file-loaders
|
2
|
+
|
3
|
+
`@lobehub/file-loaders` 是 LobeChat 项目中的一个工具包,专门用于从本地文件路径加载各种类型的文件,并将其内容转换为标准化的 `Document` 对象数组。
|
4
|
+
|
5
|
+
它的主要目的是提供一个统一的接口来读取不同的文件格式,提取其核心文本内容,并为后续处理(例如在 LobeChat 中进行文件预览、内容提取或将其作为知识库数据源)做好准备。
|
6
|
+
|
7
|
+
## ✨ 功能特性
|
8
|
+
|
9
|
+
- **统一接口**: 提供 `loadFile(filePath: string)` 函数作为核心入口点。
|
10
|
+
- **自动类型检测**: 根据文件扩展名自动选择合适的加载方式。
|
11
|
+
- **广泛的格式支持**:
|
12
|
+
- **纯文本类**: `.txt`, `.csv`, `.md`, `.json`, `.xml`, `.yaml`, `.html` 以及多种代码和配置文件格式。
|
13
|
+
- **PDF**: `.pdf` 文件。
|
14
|
+
- **Word**: `.docx` 文件。
|
15
|
+
- **Excel**: `.xlsx`, `.xls` 文件,每个工作表作为一个 `Page`。
|
16
|
+
- **PowerPoint**: `.pptx` 文件,每个幻灯片作为一个 `Page`。
|
17
|
+
- **标准化输出**: 始终返回 `Promise<Document>`。 `Document` 对象代表一个加载的文件,其内部包含一个 `Page` 数组,代表文件的各个逻辑单元(页、幻灯片、工作表、文本块等)。
|
18
|
+
- **层级结构**: 采用 `Document` 包含 `Page[]` 的结构,更好地反映文件原始组织方式。
|
19
|
+
- **丰富的元数据**: 在 `Document` 和 `Page` 层面提供详细的元数据,包括文件信息、内容统计和结构信息。
|
20
|
+
|
21
|
+
## 核心数据结构
|
22
|
+
|
23
|
+
`loadFile` 函数返回一个 `FileDocument` 对象,包含文件级信息和其所有逻辑页面 / 块 (`DocumentPage`)。
|
24
|
+
|
25
|
+
### `FileDocument` Interface
|
26
|
+
|
27
|
+
| 字段 | 类型 | 描述 |
|
28
|
+
| :---------------- | :---------------- | :------------------------------------------------------------- |
|
29
|
+
| `content` | `string` | 文件内容 (聚合后的内容) |
|
30
|
+
| `createdTime` | `Date` | 文件创建时间戳。 |
|
31
|
+
| `fileType` | `string` | 文件类型或扩展名。 |
|
32
|
+
| `filename` | `string` | 原始文件名。 |
|
33
|
+
| `metadata` | `object` | 文件级别的元数据。 |
|
34
|
+
| `metadata.author` | `string?` | 文档作者 (如果可用)。 |
|
35
|
+
| `metadata.error` | `string?` | 如果整个文件加载失败,记录错误信息。 |
|
36
|
+
| `metadata.title` | `string?` | 文档标题 (如果可用)。 |
|
37
|
+
| `...` | `any` | 其他文件级别的元数据。 |
|
38
|
+
| `modifiedTime` | `Date` | 文件最后修改时间戳。 |
|
39
|
+
| `pages` | `DocumentPage[]?` | 包含文档中所有逻辑页面 / 块的数组 (可选)。 |
|
40
|
+
| `source` | `string` | 原始文件的完整路径。 |
|
41
|
+
| `totalCharCount` | `number` | 整个文档的总字符数 (所有 `DocumentPage` 的 `charCount` 之和)。 |
|
42
|
+
| `totalLineCount` | `number` | 整个文档的总行数 (所有 `DocumentPage` 的 `lineCount` 之和)。 |
|
43
|
+
|
44
|
+
### `DocumentPage` Interface
|
45
|
+
|
46
|
+
| 字段 | 类型 | 描述 |
|
47
|
+
| :------------------------- | :-------- | :--------------------------- |
|
48
|
+
| `charCount` | `number` | 此页 / 块内容的字符数。 |
|
49
|
+
| `lineCount` | `number` | 此页 / 块内容的行数。 |
|
50
|
+
| `metadata` | `object` | 与此页 / 块相关的元数据。 |
|
51
|
+
| `metadata.chunkIndex` | `number?` | 如果分割成块,当前块的索引。 |
|
52
|
+
| `metadata.error` | `string?` | 处理此页 / 块时发生的错误。 |
|
53
|
+
| `metadata.lineNumberEnd` | `number?` | 在原始文件中的结束行号。 |
|
54
|
+
| `metadata.lineNumberStart` | `number?` | 在原始文件中的起始行号。 |
|
55
|
+
| `metadata.pageNumber` | `number?` | 页码 (适用于 PDF, DOCX)。 |
|
56
|
+
| `metadata.sectionTitle` | `string?` | 相关的章节标题。 |
|
57
|
+
| `metadata.sheetName` | `string?` | 工作表名称 (适用于 XLSX)。 |
|
58
|
+
| `metadata.slideNumber` | `number?` | 幻灯片编号 (适用于 PPTX)。 |
|
59
|
+
| `metadata.totalChunks` | `number?` | 如果分割成块,总块数。 |
|
60
|
+
| `...` | `any` | 其他特定于页 / 块的元数据。 |
|
61
|
+
| `pageContent` | `string` | 此页 / 块的核心文本内容。 |
|
62
|
+
|
63
|
+
如果你对我们的项目感兴趣,欢迎在 [GitHub](https://github.com/lobehub/lobe-chat) 上查看、点赞或贡献代码!
|
@@ -0,0 +1,42 @@
|
|
1
|
+
{
|
2
|
+
"name": "@lobechat/file-loaders",
|
3
|
+
"version": "1.0.0",
|
4
|
+
"private": true,
|
5
|
+
"description": "Utilities for reading and processing various file types",
|
6
|
+
"keywords": [
|
7
|
+
"file",
|
8
|
+
"loader",
|
9
|
+
"parser",
|
10
|
+
"lobehub"
|
11
|
+
],
|
12
|
+
"homepage": "https://github.com/lobehub/lobe-chat/tree/master/packages/file-loaders",
|
13
|
+
"bugs": {
|
14
|
+
"url": "https://github.com/lobehub/lobe-chat/issues/new"
|
15
|
+
},
|
16
|
+
"repository": {
|
17
|
+
"type": "git",
|
18
|
+
"url": "https://github.com/lobehub/lobe-chat.git"
|
19
|
+
},
|
20
|
+
"author": "LobeHub <i@lobehub.com>",
|
21
|
+
"sideEffects": false,
|
22
|
+
"main": "./src/index.ts",
|
23
|
+
"dependencies": {
|
24
|
+
"@langchain/community": "^0.3.41",
|
25
|
+
"@langchain/core": "^0.3.45",
|
26
|
+
"@xmldom/xmldom": "^0.9.8",
|
27
|
+
"concat-stream": "^2.0.0",
|
28
|
+
"mammoth": "^1.8.0",
|
29
|
+
"officeparser": "^5.1.1",
|
30
|
+
"pdfjs-dist": "4.8.69",
|
31
|
+
"xlsx": "^0.18.5",
|
32
|
+
"yauzl": "^3.2.0"
|
33
|
+
},
|
34
|
+
"devDependencies": {
|
35
|
+
"@types/concat-stream": "^2.0.3",
|
36
|
+
"@types/yauzl": "^2.10.3",
|
37
|
+
"typescript": "^5"
|
38
|
+
},
|
39
|
+
"peerDependencies": {
|
40
|
+
"typescript": ">=5"
|
41
|
+
}
|
42
|
+
}
|
@@ -0,0 +1,206 @@
|
|
1
|
+
import { stat } from 'node:fs/promises';
|
2
|
+
import * as path from 'node:path';
|
3
|
+
|
4
|
+
import { fileLoaders } from './loaders';
|
5
|
+
import { TextLoader } from './loaders/text';
|
6
|
+
import { FileDocument, FileMetadata, SupportedFileType } from './types';
|
7
|
+
import type { DocumentPage, FileLoaderInterface } from './types';
|
8
|
+
import { isTextReadableFile } from './utils/isTextReadableFile';
|
9
|
+
|
10
|
+
/**
|
11
|
+
* Determines the file type based on the filename extension.
|
12
|
+
* @param filePath The path to the file.
|
13
|
+
* @returns The determined file type or 'txt' if text-readable, undefined otherwise.
|
14
|
+
*/
|
15
|
+
const getFileType = (filePath: string): SupportedFileType | undefined => {
|
16
|
+
const extension = path.extname(filePath).toLowerCase().replace('.', '');
|
17
|
+
|
18
|
+
if (!extension) return 'txt'; // Treat files without extension as text?
|
19
|
+
|
20
|
+
// Prioritize checking if it's a generally text-readable type
|
21
|
+
if (isTextReadableFile(extension)) {
|
22
|
+
return 'txt';
|
23
|
+
}
|
24
|
+
|
25
|
+
// Handle specific non-text or complex types
|
26
|
+
switch (extension) {
|
27
|
+
case 'pdf': {
|
28
|
+
return 'pdf';
|
29
|
+
}
|
30
|
+
case 'docx': {
|
31
|
+
return 'docx';
|
32
|
+
}
|
33
|
+
case 'xlsx':
|
34
|
+
case 'xls': {
|
35
|
+
return 'excel';
|
36
|
+
}
|
37
|
+
case 'pptx': {
|
38
|
+
return 'pptx';
|
39
|
+
}
|
40
|
+
default: {
|
41
|
+
// If not text-readable and not a specific known type, it's unsupported
|
42
|
+
return undefined;
|
43
|
+
}
|
44
|
+
}
|
45
|
+
};
|
46
|
+
|
47
|
+
// Default fallback loader class
|
48
|
+
const DefaultLoader = TextLoader;
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Loads a file from the specified path, automatically detecting the file type
|
52
|
+
* and using the appropriate loader class.
|
53
|
+
*
|
54
|
+
* @param filePath The path to the file to load.
|
55
|
+
* @param fileMetadata Optional metadata to override information read from the filesystem.
|
56
|
+
* @returns A Promise resolving to a FileDocument object.
|
57
|
+
*/
|
58
|
+
export const loadFile = async (
|
59
|
+
filePath: string,
|
60
|
+
fileMetadata?: FileMetadata,
|
61
|
+
): Promise<FileDocument> => {
|
62
|
+
let stats;
|
63
|
+
let fsError: string | undefined;
|
64
|
+
|
65
|
+
try {
|
66
|
+
stats = await stat(filePath);
|
67
|
+
} catch (e) {
|
68
|
+
const error = e as Error;
|
69
|
+
console.error(`Error getting file stats for ${filePath}: ${error.message}`);
|
70
|
+
fsError = `Failed to access file stats: ${error.message}`;
|
71
|
+
}
|
72
|
+
|
73
|
+
// Determine base file info from path and stats (if available)
|
74
|
+
const fileExtension = path.extname(filePath).slice(1).toLowerCase();
|
75
|
+
const baseFilename = path.basename(filePath);
|
76
|
+
|
77
|
+
// Apply overrides from fileMetadata or use defaults
|
78
|
+
const source = fileMetadata?.source ?? filePath;
|
79
|
+
const filename = fileMetadata?.filename ?? baseFilename;
|
80
|
+
const fileType = fileMetadata?.fileType ?? fileExtension;
|
81
|
+
const createdTime = fileMetadata?.createdTime ?? stats?.ctime ?? new Date();
|
82
|
+
const modifiedTime = fileMetadata?.modifiedTime ?? stats?.mtime ?? new Date();
|
83
|
+
|
84
|
+
const paserType = getFileType(filePath);
|
85
|
+
|
86
|
+
// Select the loader CLASS based on the determined fileType, fallback to DefaultLoader
|
87
|
+
const LoaderClass: new () => FileLoaderInterface = paserType
|
88
|
+
? fileLoaders[paserType]
|
89
|
+
: DefaultLoader;
|
90
|
+
|
91
|
+
if (!paserType) {
|
92
|
+
console.warn(
|
93
|
+
`No specific loader found for file type '${fileType}'. Using default loader (${DefaultLoader.name}) as fallback.`,
|
94
|
+
);
|
95
|
+
}
|
96
|
+
|
97
|
+
let pages: DocumentPage[] = [];
|
98
|
+
let aggregatedContent = '';
|
99
|
+
let loaderError: string | undefined;
|
100
|
+
let aggregationError: string | undefined;
|
101
|
+
let metadataError: string | undefined;
|
102
|
+
let loaderSpecificMetadata: any | undefined;
|
103
|
+
|
104
|
+
// Instantiate the loader
|
105
|
+
const loaderInstance = new LoaderClass();
|
106
|
+
|
107
|
+
// If we couldn't even get stats, skip loader execution
|
108
|
+
if (!fsError) {
|
109
|
+
try {
|
110
|
+
// 1. Load pages using the instance
|
111
|
+
pages = await loaderInstance.loadPages(filePath);
|
112
|
+
|
113
|
+
try {
|
114
|
+
// 2. Aggregate content using the instance
|
115
|
+
aggregatedContent = await loaderInstance.aggregateContent(pages);
|
116
|
+
} catch (aggError) {
|
117
|
+
const error = aggError as Error;
|
118
|
+
console.error(
|
119
|
+
`Error aggregating content for ${filePath} using ${LoaderClass.name}: ${error.message}`,
|
120
|
+
);
|
121
|
+
aggregationError = `Content aggregation failed: ${error.message}`;
|
122
|
+
// Keep the pages loaded, but content might be empty/incomplete
|
123
|
+
}
|
124
|
+
|
125
|
+
// 3. Attach document-specific metadata if loader supports it
|
126
|
+
if (typeof loaderInstance.attachDocumentMetadata === 'function') {
|
127
|
+
try {
|
128
|
+
loaderSpecificMetadata = await loaderInstance.attachDocumentMetadata(filePath);
|
129
|
+
} catch (metaErr) {
|
130
|
+
const error = metaErr as Error;
|
131
|
+
console.error(
|
132
|
+
`Error attaching metadata for ${filePath} using ${LoaderClass.name}: ${error.message}`,
|
133
|
+
);
|
134
|
+
metadataError = `Metadata attachment failed: ${error.message}`;
|
135
|
+
}
|
136
|
+
}
|
137
|
+
} catch (loadErr) {
|
138
|
+
const error = loadErr as Error;
|
139
|
+
console.error(
|
140
|
+
`Error loading pages for ${filePath} using ${LoaderClass.name}: ${error.message}`,
|
141
|
+
);
|
142
|
+
loaderError = `Loader execution failed: ${error.message}`;
|
143
|
+
// Provide a minimal error page if loader failed critically
|
144
|
+
pages = [
|
145
|
+
{
|
146
|
+
charCount: 0,
|
147
|
+
lineCount: 0,
|
148
|
+
metadata: { error: loaderError },
|
149
|
+
pageContent: '',
|
150
|
+
},
|
151
|
+
];
|
152
|
+
// Aggregated content remains empty
|
153
|
+
}
|
154
|
+
} else {
|
155
|
+
// If stats failed, create a minimal error page
|
156
|
+
pages = [
|
157
|
+
{
|
158
|
+
charCount: 0,
|
159
|
+
lineCount: 0,
|
160
|
+
metadata: { error: fsError },
|
161
|
+
pageContent: '',
|
162
|
+
},
|
163
|
+
];
|
164
|
+
// Aggregated content remains empty
|
165
|
+
}
|
166
|
+
|
167
|
+
// Calculate totals from the loaded pages
|
168
|
+
let totalCharCount = 0;
|
169
|
+
let totalLineCount = 0;
|
170
|
+
for (const page of pages) {
|
171
|
+
totalCharCount += page.charCount;
|
172
|
+
totalLineCount += page.lineCount;
|
173
|
+
}
|
174
|
+
|
175
|
+
// Combine all potential errors
|
176
|
+
const combinedError =
|
177
|
+
[fsError, loaderError, aggregationError, metadataError].filter(Boolean).join('; ') || undefined;
|
178
|
+
|
179
|
+
// Construct the final FileDocument
|
180
|
+
const fileDocument: FileDocument = {
|
181
|
+
content: aggregatedContent, // Use content from aggregateContent
|
182
|
+
createdTime,
|
183
|
+
fileType,
|
184
|
+
filename,
|
185
|
+
metadata: {
|
186
|
+
// Include combined errors
|
187
|
+
error: combinedError,
|
188
|
+
// Add loader specific metadata under a namespace
|
189
|
+
loaderSpecific: loaderSpecificMetadata ?? undefined,
|
190
|
+
// Add other file-level metadata
|
191
|
+
...fileMetadata,
|
192
|
+
},
|
193
|
+
modifiedTime,
|
194
|
+
pages, // Use pages from loadPages
|
195
|
+
source,
|
196
|
+
totalCharCount,
|
197
|
+
totalLineCount,
|
198
|
+
};
|
199
|
+
|
200
|
+
// Clean up undefined error field if no error occurred
|
201
|
+
if (!fileDocument.metadata.error) {
|
202
|
+
delete fileDocument.metadata.error;
|
203
|
+
}
|
204
|
+
|
205
|
+
return fileDocument;
|
206
|
+
};
|
@@ -0,0 +1,74 @@
|
|
1
|
+
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
2
|
+
|
3
|
+
exports[`DocxLoader > should aggregate content correctly > aggregated_content 1`] = `
|
4
|
+
"简单报告
|
5
|
+
|
6
|
+
副标题
|
7
|
+
|
8
|
+
轻点或点按此占位符文本并开始键入即可开始。你可以在 Mac、iPad、iPhone 或 iCloud.com 上查看和编辑此文稿。
|
9
|
+
|
10
|
+
轻松编辑文本、更改字体以及添加精美的图形。使用段落样式来使整篇文稿保持一致的风格。例如,此段落使用“正文”样式。你可以在“格式”控制的“文本”标签页中更改样式。
|
11
|
+
|
12
|
+
若要添加照片、图像画廊、音频片段、视频、图表或任意 700 多种可自定义形状,请在工具栏中轻点或点按其中一个插入按钮,或者将对象拖放到页面中。你可以分层放置对象、调整其大小以及将其放在页面中的任意位置。若要更改对象随文本移动的方式,请选择对象并随后轻点或点按“格式”控制中的“排列”标签页。
|
13
|
+
|
14
|
+
小标题
|
15
|
+
|
16
|
+
Pages 文稿可用于文字处理和页面布局。此“简单报告”模板为文字处理而设置,如此一来,文本便会随着你的键入而从某一页流向下一页,到达页面末尾时会自动创建新的页面。
|
17
|
+
|
18
|
+
在页面布局文稿中,你可以手动重新排列页面并随意调整页面中的文本框、图像和其他对象的位置。若要创建页面布局文稿,请在模板选取器中选取一种页面布局模板。你也可以在 Mac、iPad 或 iPhone 上将此文稿改为页面布局,方法是在“文稿”控制中关闭“文稿正文”。
|
19
|
+
|
20
|
+
“这是一个引用(报告中的关键短语)的例子。轻点或点按此文本添加你自己的内容。”
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
这是第二页的内容
|
31
|
+
|
32
|
+
"
|
33
|
+
`;
|
34
|
+
|
35
|
+
exports[`DocxLoader > should load pages correctly from a DOCX file 1`] = `
|
36
|
+
[
|
37
|
+
{
|
38
|
+
"charCount": 587,
|
39
|
+
"lineCount": 29,
|
40
|
+
"metadata": {
|
41
|
+
"pageNumber": 1,
|
42
|
+
},
|
43
|
+
"pageContent": "简单报告
|
44
|
+
|
45
|
+
副标题
|
46
|
+
|
47
|
+
轻点或点按此占位符文本并开始键入即可开始。你可以在 Mac、iPad、iPhone 或 iCloud.com 上查看和编辑此文稿。
|
48
|
+
|
49
|
+
轻松编辑文本、更改字体以及添加精美的图形。使用段落样式来使整篇文稿保持一致的风格。例如,此段落使用“正文”样式。你可以在“格式”控制的“文本”标签页中更改样式。
|
50
|
+
|
51
|
+
若要添加照片、图像画廊、音频片段、视频、图表或任意 700 多种可自定义形状,请在工具栏中轻点或点按其中一个插入按钮,或者将对象拖放到页面中。你可以分层放置对象、调整其大小以及将其放在页面中的任意位置。若要更改对象随文本移动的方式,请选择对象并随后轻点或点按“格式”控制中的“排列”标签页。
|
52
|
+
|
53
|
+
小标题
|
54
|
+
|
55
|
+
Pages 文稿可用于文字处理和页面布局。此“简单报告”模板为文字处理而设置,如此一来,文本便会随着你的键入而从某一页流向下一页,到达页面末尾时会自动创建新的页面。
|
56
|
+
|
57
|
+
在页面布局文稿中,你可以手动重新排列页面并随意调整页面中的文本框、图像和其他对象的位置。若要创建页面布局文稿,请在模板选取器中选取一种页面布局模板。你也可以在 Mac、iPad 或 iPhone 上将此文稿改为页面布局,方法是在“文稿”控制中关闭“文稿正文”。
|
58
|
+
|
59
|
+
“这是一个引用(报告中的关键短语)的例子。轻点或点按此文本添加你自己的内容。”
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
这是第二页的内容
|
70
|
+
|
71
|
+
",
|
72
|
+
},
|
73
|
+
]
|
74
|
+
`;
|
Binary file
|
@@ -0,0 +1,41 @@
|
|
1
|
+
import path from 'node:path';
|
2
|
+
import { beforeEach, describe, expect, it } from 'vitest';
|
3
|
+
|
4
|
+
import type { FileLoaderInterface } from '../../types';
|
5
|
+
import { DocxLoader } from './index';
|
6
|
+
|
7
|
+
// 确保你已经在 fixtures 目录下放置了 test.docx 文件
|
8
|
+
const fixturePath = (filename: string) => path.join(__dirname, `./fixtures/${filename}`);
|
9
|
+
|
10
|
+
let loader: FileLoaderInterface;
|
11
|
+
|
12
|
+
const testFile = fixturePath('test.docx');
|
13
|
+
const nonExistentFile = fixturePath('nonexistent.docx');
|
14
|
+
|
15
|
+
beforeEach(() => {
|
16
|
+
loader = new DocxLoader();
|
17
|
+
});
|
18
|
+
|
19
|
+
describe('DocxLoader', () => {
|
20
|
+
it('should load pages correctly from a DOCX file', async () => {
|
21
|
+
const pages = await loader.loadPages(testFile);
|
22
|
+
// DOCX 通常加载为单个页面
|
23
|
+
expect(pages).toHaveLength(1);
|
24
|
+
expect(pages).toMatchSnapshot();
|
25
|
+
});
|
26
|
+
|
27
|
+
it('should aggregate content correctly', async () => {
|
28
|
+
const pages = await loader.loadPages(testFile);
|
29
|
+
const content = await loader.aggregateContent(pages);
|
30
|
+
// 对于单页文档,聚合内容应与页面内容相同
|
31
|
+
expect(content).toEqual(pages[0].pageContent);
|
32
|
+
expect(content).toMatchSnapshot('aggregated_content');
|
33
|
+
});
|
34
|
+
|
35
|
+
it('should handle file read errors in loadPages', async () => {
|
36
|
+
const pages = await loader.loadPages(nonExistentFile);
|
37
|
+
expect(pages).toHaveLength(1); // 即使失败也返回一个包含错误信息的页面
|
38
|
+
expect(pages[0].pageContent).toBe('');
|
39
|
+
expect(pages[0].metadata.error).toContain('Failed to load DOCX file');
|
40
|
+
});
|
41
|
+
});
|