@lobehub/chat 1.81.3 → 1.81.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.js +1 -0
- package/.github/workflows/release.yml +5 -0
- package/.github/workflows/test.yml +5 -0
- package/CHANGELOG.md +58 -0
- package/changelog/v1.json +21 -0
- package/locales/ar/common.json +2 -0
- package/locales/ar/electron.json +32 -0
- package/locales/ar/models.json +129 -3
- package/locales/ar/plugin.json +1 -0
- package/locales/ar/tool.json +25 -0
- package/locales/bg-BG/common.json +2 -0
- package/locales/bg-BG/electron.json +32 -0
- package/locales/bg-BG/models.json +129 -3
- package/locales/bg-BG/plugin.json +1 -0
- package/locales/bg-BG/tool.json +25 -0
- package/locales/de-DE/common.json +2 -0
- package/locales/de-DE/electron.json +32 -0
- package/locales/de-DE/models.json +129 -3
- package/locales/de-DE/plugin.json +1 -0
- package/locales/de-DE/tool.json +25 -0
- package/locales/en-US/common.json +2 -0
- package/locales/en-US/electron.json +32 -0
- package/locales/en-US/models.json +129 -3
- package/locales/en-US/plugin.json +1 -0
- package/locales/en-US/tool.json +25 -0
- package/locales/es-ES/common.json +2 -0
- package/locales/es-ES/electron.json +32 -0
- package/locales/es-ES/models.json +129 -3
- package/locales/es-ES/plugin.json +1 -0
- package/locales/es-ES/tool.json +25 -0
- package/locales/fa-IR/common.json +2 -0
- package/locales/fa-IR/electron.json +32 -0
- package/locales/fa-IR/models.json +129 -3
- package/locales/fa-IR/plugin.json +1 -0
- package/locales/fa-IR/tool.json +25 -0
- package/locales/fr-FR/common.json +2 -0
- package/locales/fr-FR/electron.json +32 -0
- package/locales/fr-FR/models.json +129 -3
- package/locales/fr-FR/plugin.json +1 -0
- package/locales/fr-FR/tool.json +25 -0
- package/locales/it-IT/common.json +2 -0
- package/locales/it-IT/electron.json +32 -0
- package/locales/it-IT/models.json +129 -3
- package/locales/it-IT/plugin.json +1 -0
- package/locales/it-IT/tool.json +25 -0
- package/locales/ja-JP/common.json +2 -0
- package/locales/ja-JP/electron.json +32 -0
- package/locales/ja-JP/models.json +129 -3
- package/locales/ja-JP/plugin.json +1 -0
- package/locales/ja-JP/tool.json +25 -0
- package/locales/ko-KR/common.json +2 -0
- package/locales/ko-KR/electron.json +32 -0
- package/locales/ko-KR/models.json +129 -3
- package/locales/ko-KR/plugin.json +1 -0
- package/locales/ko-KR/tool.json +25 -0
- package/locales/nl-NL/common.json +2 -0
- package/locales/nl-NL/electron.json +32 -0
- package/locales/nl-NL/models.json +129 -3
- package/locales/nl-NL/plugin.json +1 -0
- package/locales/nl-NL/tool.json +25 -0
- package/locales/pl-PL/common.json +2 -0
- package/locales/pl-PL/electron.json +32 -0
- package/locales/pl-PL/models.json +129 -3
- package/locales/pl-PL/plugin.json +1 -0
- package/locales/pl-PL/tool.json +25 -0
- package/locales/pt-BR/common.json +2 -0
- package/locales/pt-BR/electron.json +32 -0
- package/locales/pt-BR/models.json +129 -3
- package/locales/pt-BR/plugin.json +1 -0
- package/locales/pt-BR/tool.json +25 -0
- package/locales/ru-RU/common.json +2 -0
- package/locales/ru-RU/electron.json +32 -0
- package/locales/ru-RU/models.json +129 -3
- package/locales/ru-RU/plugin.json +1 -0
- package/locales/ru-RU/tool.json +25 -0
- package/locales/tr-TR/common.json +2 -0
- package/locales/tr-TR/electron.json +32 -0
- package/locales/tr-TR/models.json +129 -3
- package/locales/tr-TR/plugin.json +1 -0
- package/locales/tr-TR/tool.json +25 -0
- package/locales/vi-VN/common.json +2 -0
- package/locales/vi-VN/electron.json +32 -0
- package/locales/vi-VN/models.json +129 -3
- package/locales/vi-VN/plugin.json +1 -0
- package/locales/vi-VN/tool.json +25 -0
- package/locales/zh-CN/common.json +2 -0
- package/locales/zh-CN/electron.json +32 -0
- package/locales/zh-CN/models.json +134 -8
- package/locales/zh-CN/plugin.json +1 -0
- package/locales/zh-CN/tool.json +25 -0
- package/locales/zh-TW/common.json +2 -0
- package/locales/zh-TW/electron.json +32 -0
- package/locales/zh-TW/models.json +129 -3
- package/locales/zh-TW/plugin.json +1 -0
- package/locales/zh-TW/tool.json +25 -0
- package/package.json +4 -3
- package/packages/electron-client-ipc/src/events/index.ts +5 -5
- package/packages/electron-client-ipc/src/events/localFile.ts +22 -0
- package/packages/electron-client-ipc/src/events/{file.ts → upload.ts} +1 -1
- package/packages/electron-client-ipc/src/types/index.ts +2 -1
- package/packages/electron-client-ipc/src/types/localFile.ts +52 -0
- package/packages/file-loaders/README.md +63 -0
- package/packages/file-loaders/package.json +42 -0
- package/packages/file-loaders/src/index.ts +2 -0
- package/packages/file-loaders/src/loadFile.ts +206 -0
- package/packages/file-loaders/src/loaders/docx/__snapshots__/index.test.ts.snap +74 -0
- package/packages/file-loaders/src/loaders/docx/fixtures/test.docx +0 -0
- package/packages/file-loaders/src/loaders/docx/index.test.ts +41 -0
- package/packages/file-loaders/src/loaders/docx/index.ts +73 -0
- package/packages/file-loaders/src/loaders/excel/__snapshots__/index.test.ts.snap +58 -0
- package/packages/file-loaders/src/loaders/excel/fixtures/test.xlsx +0 -0
- package/packages/file-loaders/src/loaders/excel/index.test.ts +47 -0
- package/packages/file-loaders/src/loaders/excel/index.ts +121 -0
- package/packages/file-loaders/src/loaders/index.ts +19 -0
- package/packages/file-loaders/src/loaders/pdf/__snapshots__/index.test.ts.snap +98 -0
- package/packages/file-loaders/src/loaders/pdf/index.test.ts +49 -0
- package/packages/file-loaders/src/loaders/pdf/index.ts +133 -0
- package/packages/file-loaders/src/loaders/pptx/__snapshots__/index.test.ts.snap +40 -0
- package/packages/file-loaders/src/loaders/pptx/fixtures/test.pptx +0 -0
- package/packages/file-loaders/src/loaders/pptx/index.test.ts +47 -0
- package/packages/file-loaders/src/loaders/pptx/index.ts +186 -0
- package/packages/file-loaders/src/loaders/text/__snapshots__/index.test.ts.snap +15 -0
- package/packages/file-loaders/src/loaders/text/fixtures/test.txt +2 -0
- package/packages/file-loaders/src/loaders/text/index.test.ts +38 -0
- package/packages/file-loaders/src/loaders/text/index.ts +53 -0
- package/packages/file-loaders/src/types.ts +200 -0
- package/packages/file-loaders/src/utils/isTextReadableFile.ts +68 -0
- package/packages/file-loaders/src/utils/parser-utils.ts +112 -0
- package/packages/file-loaders/test/__snapshots__/loaders.test.ts.snap +93 -0
- package/packages/file-loaders/test/fixtures/test.csv +4 -0
- package/packages/file-loaders/test/fixtures/test.docx +0 -0
- package/packages/file-loaders/test/fixtures/test.epub +0 -0
- package/packages/file-loaders/test/fixtures/test.md +3 -0
- package/packages/file-loaders/test/fixtures/test.pptx +0 -0
- package/packages/file-loaders/test/fixtures/test.txt +3 -0
- package/packages/file-loaders/test/loaders.test.ts +39 -0
- package/scripts/prebuild.mts +5 -1
- package/src/app/(backend)/trpc/desktop/[trpc]/route.ts +26 -0
- package/src/features/Conversation/Messages/Assistant/Tool/Render/Arguments/ObjectEntity.tsx +81 -0
- package/src/features/Conversation/Messages/Assistant/Tool/Render/Arguments/ValueCell.tsx +43 -0
- package/src/features/Conversation/Messages/Assistant/Tool/Render/Arguments/index.tsx +120 -0
- package/src/features/Conversation/Messages/Assistant/Tool/Render/CustomRender.tsx +75 -2
- package/src/features/Conversation/Messages/Assistant/Tool/Render/KeyValueEditor.tsx +214 -0
- package/src/features/User/UserPanel/useMenu.tsx +8 -1
- package/src/libs/agent-runtime/google/index.ts +3 -0
- package/src/libs/trpc/client/desktop.ts +14 -0
- package/src/locales/default/common.ts +2 -0
- package/src/locales/default/electron.ts +34 -0
- package/src/locales/default/index.ts +2 -0
- package/src/locales/default/tool.ts +25 -0
- package/src/server/routers/desktop/index.ts +9 -0
- package/src/server/routers/desktop/pgTable.ts +43 -0
- package/src/services/electron/autoUpdate.ts +17 -0
- package/src/services/electron/file.ts +31 -0
- package/src/services/electron/localFileService.ts +39 -0
- package/src/services/electron/remoteServer.ts +40 -0
- package/src/store/chat/index.ts +1 -1
- package/src/store/chat/slices/builtinTool/actions/index.ts +3 -1
- package/src/store/chat/slices/builtinTool/actions/localFile.ts +129 -0
- package/src/store/chat/slices/builtinTool/initialState.ts +2 -0
- package/src/store/chat/slices/builtinTool/selectors.ts +2 -0
- package/src/store/chat/slices/plugin/action.ts +3 -3
- package/src/store/chat/store.ts +2 -0
- package/src/store/electron/actions/sync.ts +117 -0
- package/src/store/electron/index.ts +1 -0
- package/src/store/electron/initialState.ts +18 -0
- package/src/store/electron/selectors/index.ts +1 -0
- package/src/store/electron/selectors/sync.ts +9 -0
- package/src/store/electron/store.ts +29 -0
- package/src/tools/index.ts +8 -0
- package/src/tools/local-files/Render/ListFiles/Result.tsx +42 -0
- package/src/tools/local-files/Render/ListFiles/index.tsx +68 -0
- package/src/tools/local-files/Render/ReadLocalFile/ReadFileSkeleton.tsx +50 -0
- package/src/tools/local-files/Render/ReadLocalFile/ReadFileView.tsx +197 -0
- package/src/tools/local-files/Render/ReadLocalFile/index.tsx +31 -0
- package/src/tools/local-files/Render/ReadLocalFile/style.ts +37 -0
- package/src/tools/local-files/Render/SearchFiles/Result.tsx +42 -0
- package/src/tools/local-files/Render/SearchFiles/SearchQuery/SearchView.tsx +77 -0
- package/src/tools/local-files/Render/SearchFiles/SearchQuery/index.tsx +72 -0
- package/src/tools/local-files/Render/SearchFiles/index.tsx +32 -0
- package/src/tools/local-files/Render/index.tsx +36 -0
- package/src/tools/local-files/components/FileItem.tsx +117 -0
- package/src/tools/local-files/index.ts +149 -0
- package/src/tools/local-files/systemRole.ts +46 -0
- package/src/tools/local-files/type.ts +33 -0
- package/src/tools/renders.ts +3 -0
- package/packages/electron-client-ipc/src/events/search.ts +0 -4
- package/src/features/Conversation/Messages/Assistant/Tool/Render/Arguments.tsx +0 -165
- /package/packages/electron-client-ipc/src/types/{file.ts → upload.ts} +0 -0
@@ -0,0 +1,206 @@
|
|
1
|
+
import { stat } from 'node:fs/promises';
|
2
|
+
import * as path from 'node:path';
|
3
|
+
|
4
|
+
import { fileLoaders } from './loaders';
|
5
|
+
import { TextLoader } from './loaders/text';
|
6
|
+
import { FileDocument, FileMetadata, SupportedFileType } from './types';
|
7
|
+
import type { DocumentPage, FileLoaderInterface } from './types';
|
8
|
+
import { isTextReadableFile } from './utils/isTextReadableFile';
|
9
|
+
|
10
|
+
/**
|
11
|
+
* Determines the file type based on the filename extension.
|
12
|
+
* @param filePath The path to the file.
|
13
|
+
* @returns The determined file type or 'txt' if text-readable, undefined otherwise.
|
14
|
+
*/
|
15
|
+
const getFileType = (filePath: string): SupportedFileType | undefined => {
|
16
|
+
const extension = path.extname(filePath).toLowerCase().replace('.', '');
|
17
|
+
|
18
|
+
if (!extension) return 'txt'; // Treat files without extension as text?
|
19
|
+
|
20
|
+
// Prioritize checking if it's a generally text-readable type
|
21
|
+
if (isTextReadableFile(extension)) {
|
22
|
+
return 'txt';
|
23
|
+
}
|
24
|
+
|
25
|
+
// Handle specific non-text or complex types
|
26
|
+
switch (extension) {
|
27
|
+
case 'pdf': {
|
28
|
+
return 'pdf';
|
29
|
+
}
|
30
|
+
case 'docx': {
|
31
|
+
return 'docx';
|
32
|
+
}
|
33
|
+
case 'xlsx':
|
34
|
+
case 'xls': {
|
35
|
+
return 'excel';
|
36
|
+
}
|
37
|
+
case 'pptx': {
|
38
|
+
return 'pptx';
|
39
|
+
}
|
40
|
+
default: {
|
41
|
+
// If not text-readable and not a specific known type, it's unsupported
|
42
|
+
return undefined;
|
43
|
+
}
|
44
|
+
}
|
45
|
+
};
|
46
|
+
|
47
|
+
// Default fallback loader class
|
48
|
+
const DefaultLoader = TextLoader;
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Loads a file from the specified path, automatically detecting the file type
|
52
|
+
* and using the appropriate loader class.
|
53
|
+
*
|
54
|
+
* @param filePath The path to the file to load.
|
55
|
+
* @param fileMetadata Optional metadata to override information read from the filesystem.
|
56
|
+
* @returns A Promise resolving to a FileDocument object.
|
57
|
+
*/
|
58
|
+
export const loadFile = async (
|
59
|
+
filePath: string,
|
60
|
+
fileMetadata?: FileMetadata,
|
61
|
+
): Promise<FileDocument> => {
|
62
|
+
let stats;
|
63
|
+
let fsError: string | undefined;
|
64
|
+
|
65
|
+
try {
|
66
|
+
stats = await stat(filePath);
|
67
|
+
} catch (e) {
|
68
|
+
const error = e as Error;
|
69
|
+
console.error(`Error getting file stats for ${filePath}: ${error.message}`);
|
70
|
+
fsError = `Failed to access file stats: ${error.message}`;
|
71
|
+
}
|
72
|
+
|
73
|
+
// Determine base file info from path and stats (if available)
|
74
|
+
const fileExtension = path.extname(filePath).slice(1).toLowerCase();
|
75
|
+
const baseFilename = path.basename(filePath);
|
76
|
+
|
77
|
+
// Apply overrides from fileMetadata or use defaults
|
78
|
+
const source = fileMetadata?.source ?? filePath;
|
79
|
+
const filename = fileMetadata?.filename ?? baseFilename;
|
80
|
+
const fileType = fileMetadata?.fileType ?? fileExtension;
|
81
|
+
const createdTime = fileMetadata?.createdTime ?? stats?.ctime ?? new Date();
|
82
|
+
const modifiedTime = fileMetadata?.modifiedTime ?? stats?.mtime ?? new Date();
|
83
|
+
|
84
|
+
const paserType = getFileType(filePath);
|
85
|
+
|
86
|
+
// Select the loader CLASS based on the determined fileType, fallback to DefaultLoader
|
87
|
+
const LoaderClass: new () => FileLoaderInterface = paserType
|
88
|
+
? fileLoaders[paserType]
|
89
|
+
: DefaultLoader;
|
90
|
+
|
91
|
+
if (!paserType) {
|
92
|
+
console.warn(
|
93
|
+
`No specific loader found for file type '${fileType}'. Using default loader (${DefaultLoader.name}) as fallback.`,
|
94
|
+
);
|
95
|
+
}
|
96
|
+
|
97
|
+
let pages: DocumentPage[] = [];
|
98
|
+
let aggregatedContent = '';
|
99
|
+
let loaderError: string | undefined;
|
100
|
+
let aggregationError: string | undefined;
|
101
|
+
let metadataError: string | undefined;
|
102
|
+
let loaderSpecificMetadata: any | undefined;
|
103
|
+
|
104
|
+
// Instantiate the loader
|
105
|
+
const loaderInstance = new LoaderClass();
|
106
|
+
|
107
|
+
// If we couldn't even get stats, skip loader execution
|
108
|
+
if (!fsError) {
|
109
|
+
try {
|
110
|
+
// 1. Load pages using the instance
|
111
|
+
pages = await loaderInstance.loadPages(filePath);
|
112
|
+
|
113
|
+
try {
|
114
|
+
// 2. Aggregate content using the instance
|
115
|
+
aggregatedContent = await loaderInstance.aggregateContent(pages);
|
116
|
+
} catch (aggError) {
|
117
|
+
const error = aggError as Error;
|
118
|
+
console.error(
|
119
|
+
`Error aggregating content for ${filePath} using ${LoaderClass.name}: ${error.message}`,
|
120
|
+
);
|
121
|
+
aggregationError = `Content aggregation failed: ${error.message}`;
|
122
|
+
// Keep the pages loaded, but content might be empty/incomplete
|
123
|
+
}
|
124
|
+
|
125
|
+
// 3. Attach document-specific metadata if loader supports it
|
126
|
+
if (typeof loaderInstance.attachDocumentMetadata === 'function') {
|
127
|
+
try {
|
128
|
+
loaderSpecificMetadata = await loaderInstance.attachDocumentMetadata(filePath);
|
129
|
+
} catch (metaErr) {
|
130
|
+
const error = metaErr as Error;
|
131
|
+
console.error(
|
132
|
+
`Error attaching metadata for ${filePath} using ${LoaderClass.name}: ${error.message}`,
|
133
|
+
);
|
134
|
+
metadataError = `Metadata attachment failed: ${error.message}`;
|
135
|
+
}
|
136
|
+
}
|
137
|
+
} catch (loadErr) {
|
138
|
+
const error = loadErr as Error;
|
139
|
+
console.error(
|
140
|
+
`Error loading pages for ${filePath} using ${LoaderClass.name}: ${error.message}`,
|
141
|
+
);
|
142
|
+
loaderError = `Loader execution failed: ${error.message}`;
|
143
|
+
// Provide a minimal error page if loader failed critically
|
144
|
+
pages = [
|
145
|
+
{
|
146
|
+
charCount: 0,
|
147
|
+
lineCount: 0,
|
148
|
+
metadata: { error: loaderError },
|
149
|
+
pageContent: '',
|
150
|
+
},
|
151
|
+
];
|
152
|
+
// Aggregated content remains empty
|
153
|
+
}
|
154
|
+
} else {
|
155
|
+
// If stats failed, create a minimal error page
|
156
|
+
pages = [
|
157
|
+
{
|
158
|
+
charCount: 0,
|
159
|
+
lineCount: 0,
|
160
|
+
metadata: { error: fsError },
|
161
|
+
pageContent: '',
|
162
|
+
},
|
163
|
+
];
|
164
|
+
// Aggregated content remains empty
|
165
|
+
}
|
166
|
+
|
167
|
+
// Calculate totals from the loaded pages
|
168
|
+
let totalCharCount = 0;
|
169
|
+
let totalLineCount = 0;
|
170
|
+
for (const page of pages) {
|
171
|
+
totalCharCount += page.charCount;
|
172
|
+
totalLineCount += page.lineCount;
|
173
|
+
}
|
174
|
+
|
175
|
+
// Combine all potential errors
|
176
|
+
const combinedError =
|
177
|
+
[fsError, loaderError, aggregationError, metadataError].filter(Boolean).join('; ') || undefined;
|
178
|
+
|
179
|
+
// Construct the final FileDocument
|
180
|
+
const fileDocument: FileDocument = {
|
181
|
+
content: aggregatedContent, // Use content from aggregateContent
|
182
|
+
createdTime,
|
183
|
+
fileType,
|
184
|
+
filename,
|
185
|
+
metadata: {
|
186
|
+
// Include combined errors
|
187
|
+
error: combinedError,
|
188
|
+
// Add loader specific metadata under a namespace
|
189
|
+
loaderSpecific: loaderSpecificMetadata ?? undefined,
|
190
|
+
// Add other file-level metadata
|
191
|
+
...fileMetadata,
|
192
|
+
},
|
193
|
+
modifiedTime,
|
194
|
+
pages, // Use pages from loadPages
|
195
|
+
source,
|
196
|
+
totalCharCount,
|
197
|
+
totalLineCount,
|
198
|
+
};
|
199
|
+
|
200
|
+
// Clean up undefined error field if no error occurred
|
201
|
+
if (!fileDocument.metadata.error) {
|
202
|
+
delete fileDocument.metadata.error;
|
203
|
+
}
|
204
|
+
|
205
|
+
return fileDocument;
|
206
|
+
};
|
@@ -0,0 +1,74 @@
|
|
1
|
+
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
2
|
+
|
3
|
+
exports[`DocxLoader > should aggregate content correctly > aggregated_content 1`] = `
|
4
|
+
"简单报告
|
5
|
+
|
6
|
+
副标题
|
7
|
+
|
8
|
+
轻点或点按此占位符文本并开始键入即可开始。你可以在 Mac、iPad、iPhone 或 iCloud.com 上查看和编辑此文稿。
|
9
|
+
|
10
|
+
轻松编辑文本、更改字体以及添加精美的图形。使用段落样式来使整篇文稿保持一致的风格。例如,此段落使用“正文”样式。你可以在“格式”控制的“文本”标签页中更改样式。
|
11
|
+
|
12
|
+
若要添加照片、图像画廊、音频片段、视频、图表或任意 700 多种可自定义形状,请在工具栏中轻点或点按其中一个插入按钮,或者将对象拖放到页面中。你可以分层放置对象、调整其大小以及将其放在页面中的任意位置。若要更改对象随文本移动的方式,请选择对象并随后轻点或点按“格式”控制中的“排列”标签页。
|
13
|
+
|
14
|
+
小标题
|
15
|
+
|
16
|
+
Pages 文稿可用于文字处理和页面布局。此“简单报告”模板为文字处理而设置,如此一来,文本便会随着你的键入而从某一页流向下一页,到达页面末尾时会自动创建新的页面。
|
17
|
+
|
18
|
+
在页面布局文稿中,你可以手动重新排列页面并随意调整页面中的文本框、图像和其他对象的位置。若要创建页面布局文稿,请在模板选取器中选取一种页面布局模板。你也可以在 Mac、iPad 或 iPhone 上将此文稿改为页面布局,方法是在“文稿”控制中关闭“文稿正文”。
|
19
|
+
|
20
|
+
“这是一个引用(报告中的关键短语)的例子。轻点或点按此文本添加你自己的内容。”
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
这是第二页的内容
|
31
|
+
|
32
|
+
"
|
33
|
+
`;
|
34
|
+
|
35
|
+
exports[`DocxLoader > should load pages correctly from a DOCX file 1`] = `
|
36
|
+
[
|
37
|
+
{
|
38
|
+
"charCount": 587,
|
39
|
+
"lineCount": 29,
|
40
|
+
"metadata": {
|
41
|
+
"pageNumber": 1,
|
42
|
+
},
|
43
|
+
"pageContent": "简单报告
|
44
|
+
|
45
|
+
副标题
|
46
|
+
|
47
|
+
轻点或点按此占位符文本并开始键入即可开始。你可以在 Mac、iPad、iPhone 或 iCloud.com 上查看和编辑此文稿。
|
48
|
+
|
49
|
+
轻松编辑文本、更改字体以及添加精美的图形。使用段落样式来使整篇文稿保持一致的风格。例如,此段落使用“正文”样式。你可以在“格式”控制的“文本”标签页中更改样式。
|
50
|
+
|
51
|
+
若要添加照片、图像画廊、音频片段、视频、图表或任意 700 多种可自定义形状,请在工具栏中轻点或点按其中一个插入按钮,或者将对象拖放到页面中。你可以分层放置对象、调整其大小以及将其放在页面中的任意位置。若要更改对象随文本移动的方式,请选择对象并随后轻点或点按“格式”控制中的“排列”标签页。
|
52
|
+
|
53
|
+
小标题
|
54
|
+
|
55
|
+
Pages 文稿可用于文字处理和页面布局。此“简单报告”模板为文字处理而设置,如此一来,文本便会随着你的键入而从某一页流向下一页,到达页面末尾时会自动创建新的页面。
|
56
|
+
|
57
|
+
在页面布局文稿中,你可以手动重新排列页面并随意调整页面中的文本框、图像和其他对象的位置。若要创建页面布局文稿,请在模板选取器中选取一种页面布局模板。你也可以在 Mac、iPad 或 iPhone 上将此文稿改为页面布局,方法是在“文稿”控制中关闭“文稿正文”。
|
58
|
+
|
59
|
+
“这是一个引用(报告中的关键短语)的例子。轻点或点按此文本添加你自己的内容。”
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
这是第二页的内容
|
70
|
+
|
71
|
+
",
|
72
|
+
},
|
73
|
+
]
|
74
|
+
`;
|
Binary file
|
@@ -0,0 +1,41 @@
|
|
1
|
+
import path from 'node:path';
|
2
|
+
import { beforeEach, describe, expect, it } from 'vitest';
|
3
|
+
|
4
|
+
import type { FileLoaderInterface } from '../../types';
|
5
|
+
import { DocxLoader } from './index';
|
6
|
+
|
7
|
+
// 确保你已经在 fixtures 目录下放置了 test.docx 文件
|
8
|
+
const fixturePath = (filename: string) => path.join(__dirname, `./fixtures/${filename}`);
|
9
|
+
|
10
|
+
let loader: FileLoaderInterface;
|
11
|
+
|
12
|
+
const testFile = fixturePath('test.docx');
|
13
|
+
const nonExistentFile = fixturePath('nonexistent.docx');
|
14
|
+
|
15
|
+
beforeEach(() => {
|
16
|
+
loader = new DocxLoader();
|
17
|
+
});
|
18
|
+
|
19
|
+
describe('DocxLoader', () => {
|
20
|
+
it('should load pages correctly from a DOCX file', async () => {
|
21
|
+
const pages = await loader.loadPages(testFile);
|
22
|
+
// DOCX 通常加载为单个页面
|
23
|
+
expect(pages).toHaveLength(1);
|
24
|
+
expect(pages).toMatchSnapshot();
|
25
|
+
});
|
26
|
+
|
27
|
+
it('should aggregate content correctly', async () => {
|
28
|
+
const pages = await loader.loadPages(testFile);
|
29
|
+
const content = await loader.aggregateContent(pages);
|
30
|
+
// 对于单页文档,聚合内容应与页面内容相同
|
31
|
+
expect(content).toEqual(pages[0].pageContent);
|
32
|
+
expect(content).toMatchSnapshot('aggregated_content');
|
33
|
+
});
|
34
|
+
|
35
|
+
it('should handle file read errors in loadPages', async () => {
|
36
|
+
const pages = await loader.loadPages(nonExistentFile);
|
37
|
+
expect(pages).toHaveLength(1); // 即使失败也返回一个包含错误信息的页面
|
38
|
+
expect(pages[0].pageContent).toBe('');
|
39
|
+
expect(pages[0].metadata.error).toContain('Failed to load DOCX file');
|
40
|
+
});
|
41
|
+
});
|
@@ -0,0 +1,73 @@
|
|
1
|
+
import { DocxLoader as LangchainDocxLoader } from '@langchain/community/document_loaders/fs/docx';
|
2
|
+
|
3
|
+
import type { DocumentPage, FileLoaderInterface } from '../../types';
|
4
|
+
|
5
|
+
/**
|
6
|
+
* Loads Word documents (.docx) using the LangChain Community DocxLoader.
|
7
|
+
*/
|
8
|
+
export class DocxLoader implements FileLoaderInterface {
|
9
|
+
async loadPages(filePath: string): Promise<DocumentPage[]> {
|
10
|
+
try {
|
11
|
+
const loader = new LangchainDocxLoader(filePath);
|
12
|
+
const docs = await loader.load(); // Langchain DocxLoader typically loads the whole doc as one
|
13
|
+
|
14
|
+
const pages: DocumentPage[] = docs.map((doc) => {
|
15
|
+
const pageContent = doc.pageContent || '';
|
16
|
+
const lines = pageContent.split('\n');
|
17
|
+
const lineCount = lines.length;
|
18
|
+
const charCount = pageContent.length;
|
19
|
+
|
20
|
+
// Langchain DocxLoader doesn't usually provide page numbers in metadata
|
21
|
+
// We treat it as a single page
|
22
|
+
const metadata = {
|
23
|
+
...doc.metadata, // Include any other metadata Langchain provides
|
24
|
+
pageNumber: 1,
|
25
|
+
};
|
26
|
+
|
27
|
+
// @ts-expect-error Remove source if present, as it's handled at the FileDocument level
|
28
|
+
delete metadata.source;
|
29
|
+
|
30
|
+
return {
|
31
|
+
charCount,
|
32
|
+
lineCount,
|
33
|
+
metadata,
|
34
|
+
pageContent,
|
35
|
+
};
|
36
|
+
});
|
37
|
+
|
38
|
+
// If docs array is empty (e.g., empty file), create an empty page
|
39
|
+
if (pages.length === 0) {
|
40
|
+
pages.push({
|
41
|
+
charCount: 0,
|
42
|
+
lineCount: 0,
|
43
|
+
metadata: { pageNumber: 1 },
|
44
|
+
pageContent: '',
|
45
|
+
});
|
46
|
+
}
|
47
|
+
|
48
|
+
return pages;
|
49
|
+
} catch (e) {
|
50
|
+
const error = e as Error;
|
51
|
+
console.error(`Error loading DOCX file ${filePath} using LangChain loader: ${error.message}`);
|
52
|
+
const errorPage: DocumentPage = {
|
53
|
+
charCount: 0,
|
54
|
+
lineCount: 0,
|
55
|
+
metadata: {
|
56
|
+
error: `Failed to load DOCX file: ${error.message}`,
|
57
|
+
},
|
58
|
+
pageContent: '',
|
59
|
+
};
|
60
|
+
return [errorPage];
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
/**
|
65
|
+
* Aggregates content from DOCX pages.
|
66
|
+
* Uses double newline as a separator.
|
67
|
+
* @param pages Array of DocumentPage objects.
|
68
|
+
* @returns Aggregated content as a string.
|
69
|
+
*/
|
70
|
+
async aggregateContent(pages: DocumentPage[]): Promise<string> {
|
71
|
+
return pages.map((page) => page.pageContent).join('\n\n');
|
72
|
+
}
|
73
|
+
}
|
@@ -0,0 +1,58 @@
|
|
1
|
+
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
2
|
+
|
3
|
+
exports[`ExcelLoader > should aggregate content correctly (joining sheets) > aggregated_content 1`] = `
|
4
|
+
"## Sheet: 表1
|
5
|
+
|
6
|
+
| __EMPTY | 类别 A | 类别 B | __EMPTY_1 | __EMPTY_2 |
|
7
|
+
| --- | --- | --- | --- | --- |
|
8
|
+
| 项目 1 | 5 | 7 | | |
|
9
|
+
| 项目 2 | 10 | 8 | | |
|
10
|
+
| 项目 3 | 9 | 15 | | |
|
11
|
+
| 项目 4 | 7 | 12 | | |
|
12
|
+
| 项目 5 | 16 | 21 | | |
|
13
|
+
|
14
|
+
---
|
15
|
+
|
16
|
+
## Sheet: 表2 - 表格 2
|
17
|
+
|
18
|
+
| __EMPTY | 类别 A | 类别 B | __EMPTY_1 | __EMPTY_2 |
|
19
|
+
| --- | --- | --- | --- | --- |
|
20
|
+
| 项目 1 | 5 | 7 | | |
|
21
|
+
| 项目 2 | 10 | 8 | | |
|
22
|
+
| 项目 3 | 9 | 15 | | |
|
23
|
+
| 项目 4 | 7 | 12 | | |
|
24
|
+
| 项目 5 | 16 | 21 | | |"
|
25
|
+
`;
|
26
|
+
|
27
|
+
exports[`ExcelLoader > should load pages correctly from an Excel file (one page per sheet) 1`] = `
|
28
|
+
[
|
29
|
+
{
|
30
|
+
"charCount": 201,
|
31
|
+
"lineCount": 7,
|
32
|
+
"metadata": {
|
33
|
+
"sheetName": "表1",
|
34
|
+
},
|
35
|
+
"pageContent": "| __EMPTY | 类别 A | 类别 B | __EMPTY_1 | __EMPTY_2 |
|
36
|
+
| --- | --- | --- | --- | --- |
|
37
|
+
| 项目 1 | 5 | 7 | | |
|
38
|
+
| 项目 2 | 10 | 8 | | |
|
39
|
+
| 项目 3 | 9 | 15 | | |
|
40
|
+
| 项目 4 | 7 | 12 | | |
|
41
|
+
| 项目 5 | 16 | 21 | | |",
|
42
|
+
},
|
43
|
+
{
|
44
|
+
"charCount": 201,
|
45
|
+
"lineCount": 7,
|
46
|
+
"metadata": {
|
47
|
+
"sheetName": "表2 - 表格 2",
|
48
|
+
},
|
49
|
+
"pageContent": "| __EMPTY | 类别 A | 类别 B | __EMPTY_1 | __EMPTY_2 |
|
50
|
+
| --- | --- | --- | --- | --- |
|
51
|
+
| 项目 1 | 5 | 7 | | |
|
52
|
+
| 项目 2 | 10 | 8 | | |
|
53
|
+
| 项目 3 | 9 | 15 | | |
|
54
|
+
| 项目 4 | 7 | 12 | | |
|
55
|
+
| 项目 5 | 16 | 21 | | |",
|
56
|
+
},
|
57
|
+
]
|
58
|
+
`;
|
Binary file
|
@@ -0,0 +1,47 @@
|
|
1
|
+
import path from 'node:path';
|
2
|
+
import { beforeEach, describe, expect, it } from 'vitest';
|
3
|
+
|
4
|
+
import type { FileLoaderInterface } from '../../types';
|
5
|
+
import { ExcelLoader } from './index';
|
6
|
+
|
7
|
+
// 确保你已经在 fixtures 目录下放置了 test.xlsx 文件
|
8
|
+
// 这个 Excel 文件最好包含多个工作表 (sheets) 以便测试
|
9
|
+
const fixturePath = (filename: string) => path.join(__dirname, `./fixtures/${filename}`);
|
10
|
+
|
11
|
+
let loader: FileLoaderInterface;
|
12
|
+
|
13
|
+
const testFile = fixturePath('test.xlsx');
|
14
|
+
const nonExistentFile = fixturePath('nonexistent.xlsx');
|
15
|
+
|
16
|
+
beforeEach(() => {
|
17
|
+
loader = new ExcelLoader();
|
18
|
+
});
|
19
|
+
|
20
|
+
describe('ExcelLoader', () => {
|
21
|
+
it('should load pages correctly from an Excel file (one page per sheet)', async () => {
|
22
|
+
const pages = await loader.loadPages(testFile);
|
23
|
+
// Excel 文件有多少个 sheet,就应该有多少个 page
|
24
|
+
expect(pages.length).toBeGreaterThan(0);
|
25
|
+
|
26
|
+
// 直接对整个 pages 数组进行快照测试
|
27
|
+
expect(pages).toMatchSnapshot();
|
28
|
+
|
29
|
+
// 如果你的 test.xlsx 有多个 sheet,可以添加更多断言
|
30
|
+
// 例如检查特定 sheet 的 metadata 中的 sheetName
|
31
|
+
// expect(pages[1].metadata.sheetName).toBe('Sheet2');
|
32
|
+
});
|
33
|
+
|
34
|
+
it('should aggregate content correctly (joining sheets)', async () => {
|
35
|
+
const pages = await loader.loadPages(testFile);
|
36
|
+
const content = await loader.aggregateContent(pages);
|
37
|
+
// 默认聚合是以换行符连接各 sheet 内容
|
38
|
+
expect(content).toMatchSnapshot('aggregated_content');
|
39
|
+
});
|
40
|
+
|
41
|
+
it('should handle file read errors in loadPages', async () => {
|
42
|
+
const pages = await loader.loadPages(nonExistentFile);
|
43
|
+
expect(pages).toHaveLength(1); // 即使失败也返回一个包含错误信息的页面
|
44
|
+
expect(pages[0].pageContent).toBe('');
|
45
|
+
expect(pages[0].metadata.error).toContain('Failed to load Excel file');
|
46
|
+
});
|
47
|
+
});
|
@@ -0,0 +1,121 @@
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
2
|
+
import * as xlsx from 'xlsx';
|
3
|
+
|
4
|
+
import type { DocumentPage, FileLoaderInterface } from '../../types';
|
5
|
+
|
6
|
+
/**
|
7
|
+
* Converts sheet data (array of objects) to a Markdown table string.
|
8
|
+
* Handles empty sheets and escapes pipe characters.
|
9
|
+
*/
|
10
|
+
function sheetToMarkdownTable(jsonData: Record<string, any>[]): string {
|
11
|
+
if (!jsonData || jsonData.length === 0) {
|
12
|
+
return '*Sheet is empty or contains no data.*';
|
13
|
+
}
|
14
|
+
|
15
|
+
// Ensure all rows have the same keys based on the first row, handle potentially sparse data
|
16
|
+
const headers = Object.keys(jsonData[0] || {});
|
17
|
+
if (headers.length === 0) {
|
18
|
+
return '*Sheet has headers but no data.*';
|
19
|
+
}
|
20
|
+
|
21
|
+
const headerRow = `| ${headers.join(' | ')} |`;
|
22
|
+
const separatorRow = `| ${headers.map(() => '---').join(' | ')} |`;
|
23
|
+
|
24
|
+
const dataRows = jsonData
|
25
|
+
.map((row) => {
|
26
|
+
const cells = headers.map((header) => {
|
27
|
+
const value = row[header];
|
28
|
+
// Handle null/undefined and escape pipe characters within cells
|
29
|
+
const cellContent =
|
30
|
+
value === null || value === undefined ? '' : String(value).replaceAll('|', '\\|');
|
31
|
+
return cellContent.trim(); // Trim whitespace from cells
|
32
|
+
});
|
33
|
+
return `| ${cells.join(' | ')} |`;
|
34
|
+
})
|
35
|
+
.join('\n');
|
36
|
+
|
37
|
+
return `${headerRow}\n${separatorRow}\n${dataRows}`;
|
38
|
+
}
|
39
|
+
|
40
|
+
/**
|
41
|
+
* Loads Excel files (.xlsx, .xls) using the 'xlsx' library.
|
42
|
+
* Each sheet becomes a DocumentPage containing a Markdown table generated by sheetToMarkdownTable.
|
43
|
+
*/
|
44
|
+
export class ExcelLoader implements FileLoaderInterface {
|
45
|
+
async loadPages(filePath: string): Promise<DocumentPage[]> {
|
46
|
+
const pages: DocumentPage[] = [];
|
47
|
+
try {
|
48
|
+
// Use readFile for async operation compatible with other loaders
|
49
|
+
const dataBuffer = await readFile(filePath);
|
50
|
+
const workbook = xlsx.read(dataBuffer, { type: 'buffer' });
|
51
|
+
|
52
|
+
for (const sheetName of workbook.SheetNames) {
|
53
|
+
const worksheet = workbook.Sheets[sheetName];
|
54
|
+
// Use sheet_to_json to get array of objects for our custom markdown function
|
55
|
+
const jsonData = xlsx.utils.sheet_to_json<Record<string, any>>(worksheet, {
|
56
|
+
// Get formatted strings, not raw values
|
57
|
+
defval: '',
|
58
|
+
raw: false, // Use empty string for blank cells
|
59
|
+
});
|
60
|
+
|
61
|
+
// Convert to markdown using YOUR helper function
|
62
|
+
const tableMarkdown = sheetToMarkdownTable(jsonData);
|
63
|
+
|
64
|
+
const lines = tableMarkdown.split('\n');
|
65
|
+
const lineCount = lines.length;
|
66
|
+
const charCount = tableMarkdown.length;
|
67
|
+
|
68
|
+
pages.push({
|
69
|
+
// Trim whitespace
|
70
|
+
charCount,
|
71
|
+
lineCount,
|
72
|
+
metadata: {
|
73
|
+
sheetName: sheetName,
|
74
|
+
},
|
75
|
+
pageContent: tableMarkdown.trim(),
|
76
|
+
});
|
77
|
+
}
|
78
|
+
|
79
|
+
if (pages.length === 0) {
|
80
|
+
pages.push({
|
81
|
+
charCount: 0,
|
82
|
+
lineCount: 0,
|
83
|
+
metadata: {
|
84
|
+
error: 'Excel file contains no sheets.',
|
85
|
+
},
|
86
|
+
pageContent: '',
|
87
|
+
});
|
88
|
+
}
|
89
|
+
|
90
|
+
return pages;
|
91
|
+
} catch (e) {
|
92
|
+
const error = e as Error;
|
93
|
+
console.error(`Error loading Excel file ${filePath}: ${error.message}`);
|
94
|
+
const errorPage: DocumentPage = {
|
95
|
+
charCount: 0,
|
96
|
+
lineCount: 0,
|
97
|
+
metadata: {
|
98
|
+
error: `Failed to load Excel file: ${error.message}`,
|
99
|
+
},
|
100
|
+
pageContent: '',
|
101
|
+
};
|
102
|
+
return [errorPage];
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
/**
|
107
|
+
* Aggregates content from Excel sheets (Markdown tables).
|
108
|
+
* Adds the sheet name as a header before each table.
|
109
|
+
* @param pages Array of DocumentPage objects from loadPages.
|
110
|
+
* @returns Aggregated content as a string.
|
111
|
+
*/
|
112
|
+
async aggregateContent(pages: DocumentPage[]): Promise<string> {
|
113
|
+
return pages
|
114
|
+
.map((page) => {
|
115
|
+
const sheetName = page.metadata.sheetName;
|
116
|
+
const header = sheetName ? `## Sheet: ${sheetName}\n\n` : '';
|
117
|
+
return header + page.pageContent;
|
118
|
+
})
|
119
|
+
.join('\n\n---\n\n'); // Separator between sheets
|
120
|
+
}
|
121
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
import { FileLoaderInterface, SupportedFileType } from '../types';
|
2
|
+
import { DocxLoader } from './docx';
|
3
|
+
// import { EpubLoader } from './epub';
|
4
|
+
import { ExcelLoader } from './excel';
|
5
|
+
import { PdfLoader } from './pdf';
|
6
|
+
import { PptxLoader } from './pptx';
|
7
|
+
import { TextLoader } from './text';
|
8
|
+
|
9
|
+
// Loader configuration map
|
10
|
+
// Key: file extension (lowercase, without leading dot) or specific type name
|
11
|
+
// Value: Loader Class implementing FileLoaderInterface
|
12
|
+
export const fileLoaders: Record<SupportedFileType, new () => FileLoaderInterface> = {
|
13
|
+
docx: DocxLoader,
|
14
|
+
// epub: EpubLoader,
|
15
|
+
excel: ExcelLoader,
|
16
|
+
pdf: PdfLoader,
|
17
|
+
pptx: PptxLoader,
|
18
|
+
txt: TextLoader,
|
19
|
+
};
|