@lobehub/chat 1.84.27 → 1.85.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -0
- package/changelog/v1.json +18 -0
- package/docs/development/database-schema.dbml +59 -1
- package/package.json +3 -2
- package/packages/file-loaders/package.json +5 -1
- package/packages/file-loaders/src/loadFile.ts +51 -1
- package/packages/file-loaders/src/loaders/docx/index.ts +16 -1
- package/packages/file-loaders/src/loaders/excel/index.ts +30 -2
- package/packages/file-loaders/src/loaders/pdf/__snapshots__/index.test.ts.snap +1 -1
- package/packages/file-loaders/src/loaders/pdf/index.ts +52 -12
- package/packages/file-loaders/src/loaders/pptx/index.ts +32 -1
- package/packages/file-loaders/src/loaders/text/index.test.ts +1 -1
- package/packages/file-loaders/src/loaders/text/index.ts +13 -1
- package/packages/file-loaders/test/__snapshots__/loaders.test.ts.snap +41 -0
- package/packages/file-loaders/test/loaders.test.ts +20 -0
- package/packages/file-loaders/test/setup.ts +17 -0
- package/packages/file-loaders/vitest.config.ts +14 -0
- package/src/config/aiModels/infiniai.ts +113 -9
- package/src/const/file.ts +8 -1
- package/src/database/client/migrations.json +23 -1
- package/src/database/migrations/0022_add_documents.sql +49 -0
- package/src/database/migrations/meta/0022_snapshot.json +5340 -0
- package/src/database/migrations/meta/_journal.json +7 -0
- package/src/database/models/_template.ts +1 -1
- package/src/database/models/document.ts +54 -0
- package/src/database/models/message.ts +25 -0
- package/src/database/repositories/tableViewer/index.test.ts +1 -1
- package/src/database/schemas/document.ts +104 -0
- package/src/database/schemas/index.ts +1 -0
- package/src/database/schemas/relations.ts +34 -2
- package/src/database/schemas/topic.ts +31 -8
- package/src/database/utils/idGenerator.ts +1 -0
- package/src/features/ChatInput/Desktop/FilePreview/FileItem/Content.tsx +1 -1
- package/src/features/ChatInput/Desktop/FilePreview/FileItem/index.tsx +10 -10
- package/src/features/ChatInput/components/UploadDetail/UploadStatus.tsx +2 -2
- package/src/features/Conversation/Actions/Error.tsx +2 -2
- package/src/libs/agent-runtime/infiniai/index.ts +1 -1
- package/src/libs/trpc/lambda/context.ts +7 -0
- package/src/prompts/files/file.ts +6 -4
- package/src/server/routers/lambda/__tests__/message.test.ts +213 -0
- package/src/server/routers/lambda/document.ts +36 -0
- package/src/server/routers/lambda/index.ts +2 -0
- package/src/server/services/document/index.ts +66 -0
- package/src/server/services/file/__tests__/index.test.ts +115 -0
- package/src/server/services/mcp/index.ts +0 -4
- package/src/server/utils/__tests__/tempFileManager.test.ts +94 -0
- package/src/services/rag.ts +4 -0
- package/src/store/chat/slices/aiChat/actions/__tests__/rag.test.ts +2 -2
- package/src/store/chat/slices/aiChat/actions/rag.ts +2 -3
- package/src/store/file/slices/chat/action.ts +3 -51
- package/src/types/document/index.ts +172 -0
- package/src/types/message/chat.ts +1 -0
- package/src/features/ChatInput/Desktop/FilePreview/FileItem/style.ts +0 -4
@@ -1,37 +1,54 @@
|
|
1
|
+
import debug from 'debug';
|
1
2
|
import { readFile } from 'node:fs/promises';
|
2
|
-
import
|
3
|
-
import
|
3
|
+
import type { PDFDocumentProxy, PDFPageProxy } from 'pdfjs-dist';
|
4
|
+
import { getDocument, version } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
5
|
+
// @ts-ignore
|
6
|
+
import * as _pdfjsWorker from 'pdfjs-dist/legacy/build/pdf.worker.mjs';
|
7
|
+
import type { TextContent } from 'pdfjs-dist/types/src/display/api';
|
4
8
|
|
5
9
|
import type { DocumentPage, FileLoaderInterface } from '../../types';
|
6
10
|
|
11
|
+
const log = debug('file-loaders:pdf');
|
12
|
+
|
7
13
|
/**
|
8
14
|
* Loads PDF files page by page using the official pdfjs-dist library.
|
9
15
|
*/
|
10
16
|
export class PdfLoader implements FileLoaderInterface {
|
11
17
|
private pdfInstance: PDFDocumentProxy | null = null;
|
18
|
+
private pdfjsWorker = _pdfjsWorker;
|
12
19
|
|
13
20
|
private async getPDFFile(filePath: string) {
|
21
|
+
// GlobalWorkerOptions.workerSrc should have been set at the module level.
|
22
|
+
// We are now relying on pdfjs-dist to use this path when it creates a worker.
|
23
|
+
|
24
|
+
log('Reading PDF file:', filePath);
|
14
25
|
const dataBuffer = await readFile(filePath);
|
26
|
+
log('PDF file read successfully, size:', dataBuffer.length, 'bytes');
|
15
27
|
|
16
|
-
const loadingTask =
|
28
|
+
const loadingTask = getDocument({
|
17
29
|
data: new Uint8Array(dataBuffer.buffer, dataBuffer.byteOffset, dataBuffer.length),
|
18
30
|
useSystemFonts: true,
|
19
|
-
// Explicitly disable worker thread
|
20
|
-
worker: undefined, // Attempt to use system fonts
|
21
31
|
});
|
22
32
|
|
23
|
-
|
33
|
+
log('PDF document loading task created');
|
34
|
+
const pdf = await loadingTask.promise;
|
35
|
+
log('PDF document loaded successfully, pages:', pdf.numPages);
|
36
|
+
return pdf;
|
24
37
|
}
|
25
38
|
|
26
39
|
async loadPages(filePath: string): Promise<DocumentPage[]> {
|
40
|
+
log('Starting to load PDF pages from:', filePath);
|
27
41
|
try {
|
28
42
|
const pdf: PDFDocumentProxy = await this.getPDFFile(filePath);
|
29
43
|
|
30
44
|
const pages: DocumentPage[] = [];
|
45
|
+
log(`Processing ${pdf.numPages} PDF pages`);
|
31
46
|
|
32
47
|
for (let i = 1; i <= pdf.numPages; i += 1) {
|
48
|
+
log(`Loading page ${i}/${pdf.numPages}`);
|
33
49
|
const page: PDFPageProxy = await pdf.getPage(i);
|
34
50
|
const content: TextContent = await page.getTextContent();
|
51
|
+
log(`Page ${i} text content retrieved, items:`, content.items.length);
|
35
52
|
|
36
53
|
// --- Revert to EXACT Simple Langchain PDFLoader Logic ---
|
37
54
|
let lastY;
|
@@ -61,6 +78,7 @@ export class PdfLoader implements FileLoaderInterface {
|
|
61
78
|
const pageLines = cleanedPageContent.split('\n');
|
62
79
|
const lineCount = pageLines.length;
|
63
80
|
const charCount = cleanedPageContent.length;
|
81
|
+
log(`Page ${i} processed, lines: ${lineCount}, chars: ${charCount}`);
|
64
82
|
|
65
83
|
pages.push({
|
66
84
|
charCount,
|
@@ -70,15 +88,19 @@ export class PdfLoader implements FileLoaderInterface {
|
|
70
88
|
});
|
71
89
|
|
72
90
|
// Clean up page resources
|
91
|
+
log(`Cleaning up page ${i} resources`);
|
73
92
|
page.cleanup();
|
74
93
|
}
|
75
94
|
|
76
95
|
// Clean up document resources
|
96
|
+
log('Cleaning up PDF document resources');
|
77
97
|
await pdf.destroy();
|
78
98
|
|
99
|
+
log(`PDF loading completed for ${filePath}, total pages:`, pages.length);
|
79
100
|
return pages;
|
80
101
|
} catch (e) {
|
81
102
|
const error = e as Error;
|
103
|
+
log('Error encountered while loading PDF file');
|
82
104
|
console.error(
|
83
105
|
`Error loading PDF file ${filePath} using pdfjs-dist: ${error.message}`,
|
84
106
|
error.stack,
|
@@ -92,6 +114,7 @@ export class PdfLoader implements FileLoaderInterface {
|
|
92
114
|
},
|
93
115
|
pageContent: '',
|
94
116
|
};
|
117
|
+
log('Created error page for failed PDF loading');
|
95
118
|
return [errorPage];
|
96
119
|
}
|
97
120
|
}
|
@@ -103,25 +126,42 @@ export class PdfLoader implements FileLoaderInterface {
|
|
103
126
|
* @returns Aggregated content as a string.
|
104
127
|
*/
|
105
128
|
async aggregateContent(pages: DocumentPage[]): Promise<string> {
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
.
|
129
|
+
log('Aggregating content from', pages.length, 'PDF pages');
|
130
|
+
const validPages = pages.filter((page) => !page.metadata.error);
|
131
|
+
log(
|
132
|
+
`Found ${validPages.length} valid pages for aggregation (${pages.length - validPages.length} pages with errors filtered out)`,
|
133
|
+
);
|
134
|
+
|
135
|
+
const result = validPages.map((page) => page.pageContent).join('\n\n');
|
136
|
+
log('PDF content aggregated successfully, length:', result.length);
|
137
|
+
return result;
|
110
138
|
}
|
111
139
|
|
112
140
|
async attachDocumentMetadata(filePath: string): Promise<any> {
|
141
|
+
log('Attaching document metadata for PDF:', filePath);
|
113
142
|
const pdf: PDFDocumentProxy = await this.getPDFFile(filePath);
|
114
143
|
|
115
|
-
|
144
|
+
log('Getting PDF metadata');
|
145
|
+
const pdfMetadata =
|
146
|
+
(await pdf.getMetadata().catch((err) => {
|
147
|
+
log('Error retrieving PDF metadata');
|
148
|
+
console.error(`Error getting PDF metadata: ${err.message}`);
|
149
|
+
return null;
|
150
|
+
})) ?? null;
|
151
|
+
|
116
152
|
const pdfInfo = pdfMetadata?.info ?? {};
|
117
153
|
const metadata = pdfMetadata?.metadata ?? null;
|
154
|
+
log('PDF metadata retrieved:', {
|
155
|
+
hasInfo: !!Object.keys(pdfInfo).length,
|
156
|
+
hasMetadata: !!metadata,
|
157
|
+
});
|
118
158
|
|
119
159
|
return {
|
120
160
|
pdfInfo: pdfInfo,
|
121
161
|
// PDF info (Author, Title, etc.)
|
122
162
|
pdfMetadata: metadata,
|
123
163
|
// PDF metadata
|
124
|
-
pdfVersion:
|
164
|
+
pdfVersion: version,
|
125
165
|
};
|
126
166
|
}
|
127
167
|
}
|
@@ -1,8 +1,11 @@
|
|
1
|
+
import debug from 'debug';
|
1
2
|
import path from 'node:path';
|
2
3
|
|
3
4
|
import type { DocumentPage, FileLoaderInterface } from '../../types';
|
4
5
|
import { type ExtractedFile, extractFiles, parseString } from '../../utils/parser-utils';
|
5
6
|
|
7
|
+
const log = debug('file-loaders:pptx');
|
8
|
+
|
6
9
|
/**
|
7
10
|
* Represents a loader for PPTX files using extracted utility functions.
|
8
11
|
*
|
@@ -19,20 +22,25 @@ export class PptxLoader implements FileLoaderInterface {
|
|
19
22
|
* `DocumentPage` object with error information in its metadata.
|
20
23
|
*/
|
21
24
|
async loadPages(filePath: string): Promise<DocumentPage[]> {
|
25
|
+
log('Loading PPTX file:', filePath);
|
22
26
|
const sourceFileName = path.basename(filePath);
|
27
|
+
log('Source file name:', sourceFileName);
|
23
28
|
|
24
29
|
try {
|
25
30
|
// --- File Extraction Step ---
|
26
31
|
const slidesRegex = /ppt\/slides\/slide\d+\.xml/g;
|
27
32
|
const slideNumberRegex = /slide(\d+)\.xml/;
|
28
33
|
|
34
|
+
log('Extracting slide XML files from PPTX');
|
29
35
|
// Extract only slide XML files
|
30
36
|
const slideFiles: ExtractedFile[] = await extractFiles(filePath, (fileName) =>
|
31
37
|
slidesRegex.test(fileName),
|
32
38
|
);
|
39
|
+
log('Extracted slide files:', slideFiles.length);
|
33
40
|
|
34
41
|
// --- Validation Step ---
|
35
42
|
if (slideFiles.length === 0) {
|
43
|
+
log('No slide XML files found in the PPTX file');
|
36
44
|
console.warn(`No slide XML files found in ${sourceFileName}. May be corrupted or empty.`);
|
37
45
|
return [
|
38
46
|
this.createErrorPage(
|
@@ -43,6 +51,7 @@ export class PptxLoader implements FileLoaderInterface {
|
|
43
51
|
}
|
44
52
|
|
45
53
|
// --- Sorting Step ---
|
54
|
+
log('Sorting slide files by slide number');
|
46
55
|
// Sort files based on the slide number extracted from the path
|
47
56
|
slideFiles.sort((a, b) => {
|
48
57
|
const matchA = a.path.match(slideNumberRegex);
|
@@ -51,13 +60,17 @@ export class PptxLoader implements FileLoaderInterface {
|
|
51
60
|
const numB = matchB ? parseInt(matchB[1], 10) : Infinity;
|
52
61
|
return numA - numB;
|
53
62
|
});
|
63
|
+
log('Slide files sorted');
|
54
64
|
|
55
65
|
// --- Page Creation Step ---
|
66
|
+
log('Creating document pages from slide files');
|
56
67
|
const pages: DocumentPage[] = slideFiles
|
57
68
|
.map((slideFile, index) => {
|
58
69
|
try {
|
70
|
+
log(`Processing slide ${index + 1}/${slideFiles.length}, path: ${slideFile.path}`);
|
59
71
|
const xmlDoc = parseString(slideFile.content);
|
60
72
|
const paragraphNodes = xmlDoc.getElementsByTagName('a:p');
|
73
|
+
log(`Found ${paragraphNodes.length} paragraph nodes in slide ${index + 1}`);
|
61
74
|
|
62
75
|
const slideText = Array.from(paragraphNodes)
|
63
76
|
.map((pNode) => {
|
@@ -72,6 +85,9 @@ export class PptxLoader implements FileLoaderInterface {
|
|
72
85
|
const lines = slideText.split('\n');
|
73
86
|
const slideNumberMatch = slideFile.path.match(slideNumberRegex);
|
74
87
|
const slideNumber = slideNumberMatch ? parseInt(slideNumberMatch[1], 10) : index + 1; // Fallback to index if regex fails
|
88
|
+
log(
|
89
|
+
`Slide ${index + 1} text extracted, lines: ${lines.length}, characters: ${slideText.length}`,
|
90
|
+
);
|
75
91
|
|
76
92
|
const metadata = {
|
77
93
|
pageCount: slideFiles.length, // Total number of slides found
|
@@ -86,6 +102,7 @@ export class PptxLoader implements FileLoaderInterface {
|
|
86
102
|
pageContent: slideText.trim(), // Trim final content
|
87
103
|
};
|
88
104
|
} catch (parseError) {
|
105
|
+
log(`Error parsing slide ${slideFile.path}`);
|
89
106
|
console.error(
|
90
107
|
`Failed to parse XML for slide ${slideFile.path} in ${sourceFileName}: ${parseError instanceof Error ? parseError.message : String(parseError)}`,
|
91
108
|
);
|
@@ -101,9 +118,11 @@ export class PptxLoader implements FileLoaderInterface {
|
|
101
118
|
})
|
102
119
|
// Filter out any potential nulls if we change the error handling above
|
103
120
|
.filter((page): page is DocumentPage => page !== null);
|
121
|
+
log(`Created ${pages.length} document pages from slides`);
|
104
122
|
|
105
123
|
if (pages.length === 0) {
|
106
124
|
// This case might happen if all slides failed to parse
|
125
|
+
log('Parsing resulted in zero valid pages');
|
107
126
|
console.warn(`Parsing resulted in zero valid pages for ${sourceFileName}`);
|
108
127
|
return [this.createErrorPage('Parsing resulted in zero valid pages.', sourceFileName)];
|
109
128
|
}
|
@@ -112,15 +131,18 @@ export class PptxLoader implements FileLoaderInterface {
|
|
112
131
|
const allErrored = pages.every((page) => page.metadata?.error);
|
113
132
|
if (allErrored) {
|
114
133
|
// If all pages resulted in errors, perhaps return a single summary error
|
134
|
+
log('All slides failed to parse');
|
115
135
|
console.warn(`All slides failed to parse for ${sourceFileName}`);
|
116
136
|
return [this.createErrorPage('All slides failed to parse correctly.', sourceFileName)];
|
117
137
|
// Or return all the individual error pages: return pages;
|
118
138
|
}
|
119
139
|
|
140
|
+
log('PPTX loading completed successfully');
|
120
141
|
return pages;
|
121
142
|
} catch (error) {
|
122
143
|
// --- Error Handling Step ---
|
123
144
|
// This catches errors from extractFiles or other unexpected issues
|
145
|
+
log('Error loading or processing PPTX file');
|
124
146
|
const errorMessage = `Failed to load or process PPTX file: ${error instanceof Error ? error.message : String(error)}`;
|
125
147
|
console.error(errorMessage, { filePath });
|
126
148
|
return [this.createErrorPage(errorMessage, sourceFileName)];
|
@@ -137,16 +159,21 @@ export class PptxLoader implements FileLoaderInterface {
|
|
137
159
|
* @returns A Promise resolving to the aggregated content string.
|
138
160
|
*/
|
139
161
|
async aggregateContent(pages: DocumentPage[]): Promise<string> {
|
162
|
+
log('Aggregating content from', pages.length, 'PPTX pages');
|
140
163
|
// Ensure pages array is valid and non-empty before proceeding
|
141
164
|
// Filter out error pages before aggregation unless we want to include error messages
|
142
165
|
const validPages = pages.filter((page) => !page.metadata?.error);
|
166
|
+
log(
|
167
|
+
`Found ${validPages.length} valid pages for aggregation (${pages.length - validPages.length} error pages filtered out)`,
|
168
|
+
);
|
143
169
|
|
144
170
|
if (validPages.length === 0) {
|
145
171
|
// If only error pages existed, return empty or a summary error message
|
172
|
+
log('No valid pages found, returning content of first page (may be error page)');
|
146
173
|
return pages[0]?.pageContent || ''; // Return content of the first page (might be an error page)
|
147
174
|
}
|
148
175
|
|
149
|
-
|
176
|
+
const result = validPages
|
150
177
|
.map((page) => {
|
151
178
|
const slideNumber = page.metadata?.slideNumber;
|
152
179
|
// Use Markdown H2 for slide headers
|
@@ -156,6 +183,9 @@ ${page.pageContent}
|
|
156
183
|
</slide_page>`;
|
157
184
|
})
|
158
185
|
.join('\n\n'); // Use Markdown horizontal rule as separator
|
186
|
+
|
187
|
+
log('PPTX content aggregated successfully, length:', result.length);
|
188
|
+
return result;
|
159
189
|
}
|
160
190
|
|
161
191
|
/**
|
@@ -171,6 +201,7 @@ ${page.pageContent}
|
|
171
201
|
sourceFileName: string,
|
172
202
|
sourceFilePath?: string,
|
173
203
|
): DocumentPage {
|
204
|
+
log('Creating error page:', errorInfo);
|
174
205
|
return {
|
175
206
|
charCount: 0,
|
176
207
|
lineCount: 0,
|
@@ -1,17 +1,23 @@
|
|
1
|
+
import debug from 'debug';
|
1
2
|
import { readFile } from 'node:fs/promises';
|
2
3
|
|
3
4
|
import type { DocumentPage, FileLoaderInterface } from '../../types';
|
4
5
|
|
6
|
+
const log = debug('file-loaders:text');
|
7
|
+
|
5
8
|
/**
|
6
9
|
* 用于加载纯文本文件的加载器。
|
7
10
|
*/
|
8
11
|
export class TextLoader implements FileLoaderInterface {
|
9
12
|
async loadPages(filePath: string): Promise<DocumentPage[]> {
|
13
|
+
log('Loading text file:', filePath);
|
10
14
|
try {
|
11
15
|
const fileContent = await readFile(filePath, 'utf8');
|
16
|
+
log('Text file loaded successfully, size:', fileContent.length, 'bytes');
|
12
17
|
const lines = fileContent.split('\n');
|
13
18
|
const lineCount = lines.length;
|
14
19
|
const charCount = fileContent.length;
|
20
|
+
log('Text file stats:', { charCount, lineCount });
|
15
21
|
|
16
22
|
const page: DocumentPage = {
|
17
23
|
charCount,
|
@@ -23,9 +29,11 @@ export class TextLoader implements FileLoaderInterface {
|
|
23
29
|
pageContent: fileContent,
|
24
30
|
};
|
25
31
|
|
32
|
+
log('Text page created successfully');
|
26
33
|
return [page];
|
27
34
|
} catch (e) {
|
28
35
|
const error = e as Error;
|
36
|
+
log('Error encountered while loading text file');
|
29
37
|
console.error(`Error loading text file ${filePath}: ${error.message}`);
|
30
38
|
// 如果读取失败,返回一个包含错误信息的 Page
|
31
39
|
const errorPage: DocumentPage = {
|
@@ -36,6 +44,7 @@ export class TextLoader implements FileLoaderInterface {
|
|
36
44
|
},
|
37
45
|
pageContent: '',
|
38
46
|
};
|
47
|
+
log('Created error page for failed text file loading');
|
39
48
|
return [errorPage];
|
40
49
|
}
|
41
50
|
}
|
@@ -47,7 +56,10 @@ export class TextLoader implements FileLoaderInterface {
|
|
47
56
|
* @returns 聚合后的内容
|
48
57
|
*/
|
49
58
|
async aggregateContent(pages: DocumentPage[]): Promise<string> {
|
59
|
+
log('Aggregating content from', pages.length, 'text pages');
|
50
60
|
// 默认使用换行符连接,可以根据需要调整或使其可配置
|
51
|
-
|
61
|
+
const result = pages.map((page) => page.pageContent).join('\n');
|
62
|
+
log('Content aggregated successfully, length:', result.length);
|
63
|
+
return result;
|
52
64
|
}
|
53
65
|
}
|
@@ -1,5 +1,46 @@
|
|
1
1
|
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
2
2
|
|
3
|
+
exports[`loadFile Integration Tests > PDF Handling > should load content from a pdf file using filePath 1`] = `
|
4
|
+
{
|
5
|
+
"content": "123",
|
6
|
+
"fileType": "pdf",
|
7
|
+
"filename": "test.pdf",
|
8
|
+
"metadata": {
|
9
|
+
"loaderSpecific": {
|
10
|
+
"pdfInfo": {
|
11
|
+
"CreationDate": "D:20250419143655Z00'00'",
|
12
|
+
"Creator": "Pages文稿",
|
13
|
+
"EncryptFilterName": null,
|
14
|
+
"IsAcroFormPresent": false,
|
15
|
+
"IsCollectionPresent": false,
|
16
|
+
"IsLinearized": false,
|
17
|
+
"IsSignaturesPresent": false,
|
18
|
+
"IsXFAPresent": false,
|
19
|
+
"Language": null,
|
20
|
+
"ModDate": "D:20250419143655Z00'00'",
|
21
|
+
"PDFFormatVersion": "1.3",
|
22
|
+
"Producer": "macOS 版本15.3.2(版号24D81) Quartz PDFContext",
|
23
|
+
"Title": "test",
|
24
|
+
},
|
25
|
+
"pdfMetadata": null,
|
26
|
+
"pdfVersion": "4.10.38",
|
27
|
+
},
|
28
|
+
},
|
29
|
+
"pages": [
|
30
|
+
{
|
31
|
+
"charCount": 3,
|
32
|
+
"lineCount": 1,
|
33
|
+
"metadata": {
|
34
|
+
"pageNumber": 1,
|
35
|
+
},
|
36
|
+
"pageContent": "123",
|
37
|
+
},
|
38
|
+
],
|
39
|
+
"totalCharCount": 3,
|
40
|
+
"totalLineCount": 1,
|
41
|
+
}
|
42
|
+
`;
|
43
|
+
|
3
44
|
exports[`loadFile Integration Tests > Text Handling (.txt, .csv, .md, etc.) > should load content from a test.csv file using filePath 1`] = `
|
4
45
|
{
|
5
46
|
"content": "ID,Name,Value
|
@@ -36,4 +36,24 @@ describe('loadFile Integration Tests', () => {
|
|
36
36
|
testPureTextFile(file);
|
37
37
|
});
|
38
38
|
});
|
39
|
+
|
40
|
+
describe('PDF Handling', () => {
|
41
|
+
it(`should load content from a pdf file using filePath`, async () => {
|
42
|
+
const filePath = getFixturePath('test.pdf');
|
43
|
+
|
44
|
+
// Pass filePath directly to loadFile
|
45
|
+
const docs = await loadFile(filePath);
|
46
|
+
|
47
|
+
expect(docs.content).toEqual('123');
|
48
|
+
expect(docs.source).toEqual(filePath);
|
49
|
+
|
50
|
+
// @ts-expect-error
|
51
|
+
delete docs.source;
|
52
|
+
// @ts-expect-error
|
53
|
+
delete docs.createdTime;
|
54
|
+
// @ts-expect-error
|
55
|
+
delete docs.modifiedTime;
|
56
|
+
expect(docs).toMatchSnapshot();
|
57
|
+
});
|
58
|
+
});
|
39
59
|
});
|
@@ -0,0 +1,17 @@
|
|
1
|
+
// Polyfill DOMMatrix for pdfjs-dist in Node.js environment
|
2
|
+
import { DOMMatrix } from 'canvas';
|
3
|
+
|
4
|
+
if (typeof global.DOMMatrix === 'undefined') {
|
5
|
+
// @ts-ignore
|
6
|
+
global.DOMMatrix = DOMMatrix;
|
7
|
+
}
|
8
|
+
|
9
|
+
// Polyfill URL.createObjectURL and URL.revokeObjectURL for pdfjs-dist
|
10
|
+
if (typeof global.URL.createObjectURL === 'undefined') {
|
11
|
+
global.URL.createObjectURL = () => 'blob:http://localhost/fake-blob-url';
|
12
|
+
}
|
13
|
+
if (typeof global.URL.revokeObjectURL === 'undefined') {
|
14
|
+
global.URL.revokeObjectURL = () => {
|
15
|
+
/* no-op */
|
16
|
+
};
|
17
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
import { defineConfig } from 'vitest/config';
|
2
|
+
|
3
|
+
export default defineConfig({
|
4
|
+
test: {
|
5
|
+
// coverage: {
|
6
|
+
// all: false,
|
7
|
+
// provider: 'v8',
|
8
|
+
// reporter: ['text', 'json', 'lcov', 'text-summary'],
|
9
|
+
// reportsDirectory: './coverage/app',
|
10
|
+
// },
|
11
|
+
environment: 'happy-dom',
|
12
|
+
// setupFiles: join(__dirname, './test/setup.ts'),
|
13
|
+
},
|
14
|
+
});
|
@@ -37,12 +37,16 @@ const infiniaiChatModels: AIChatModelCard[] = [
|
|
37
37
|
type: 'chat',
|
38
38
|
},
|
39
39
|
{
|
40
|
-
|
40
|
+
abilities: {
|
41
|
+
functionCall: true,
|
42
|
+
reasoning: true,
|
43
|
+
},
|
44
|
+
contextWindowTokens: 128_000,
|
41
45
|
description:
|
42
|
-
'
|
43
|
-
displayName: '
|
46
|
+
'Qwen3-235B-A22B 是 Qwen 系列第三代的大型语言模型,采用混合专家(MoE)架构,总计 2350 亿参数,每 token 激活 220 亿参数。支持无缝切换思考模式(复杂推理)和非思考模式(通用对话),在数学、编码、常识推理及多语言指令执行中表现出色。',
|
47
|
+
displayName: 'Qwen3 235B A22B',
|
44
48
|
enabled: true,
|
45
|
-
id: '
|
49
|
+
id: 'qwen3-235b-a22b',
|
46
50
|
pricing: {
|
47
51
|
currency: 'CNY',
|
48
52
|
input: 0,
|
@@ -51,11 +55,70 @@ const infiniaiChatModels: AIChatModelCard[] = [
|
|
51
55
|
type: 'chat',
|
52
56
|
},
|
53
57
|
{
|
54
|
-
|
58
|
+
abilities: {
|
59
|
+
functionCall: true,
|
60
|
+
reasoning: true,
|
61
|
+
},
|
62
|
+
contextWindowTokens: 128_000,
|
55
63
|
description:
|
56
|
-
'
|
57
|
-
displayName: '
|
58
|
-
|
64
|
+
'Qwen3-30B-A3B 是 Qwen 系列第三代的大型语言模型,采用混合专家(MoE)架构,总计 305 亿参数,每 token 激活 33 亿参数。支持无缝切换思维模式(复杂推理)和非思维模式(通用对话),在数学、编码、常识推理及多语言指令执行中表现出色。',
|
65
|
+
displayName: 'Qwen3 30B A3B',
|
66
|
+
enabled: true,
|
67
|
+
id: 'qwen3-30b-a3b',
|
68
|
+
pricing: {
|
69
|
+
currency: 'CNY',
|
70
|
+
input: 0,
|
71
|
+
output: 0,
|
72
|
+
},
|
73
|
+
type: 'chat',
|
74
|
+
},
|
75
|
+
{
|
76
|
+
abilities: {
|
77
|
+
functionCall: true,
|
78
|
+
reasoning: true,
|
79
|
+
},
|
80
|
+
contextWindowTokens: 128_000,
|
81
|
+
description:
|
82
|
+
'Qwen3-32B 是 Qwen 系列第三代的大型语言模型,拥有 328 亿参数,专为高效推理和多语言任务设计。支持无缝切换思考模式(复杂推理)和非思考模式(通用对话),在数学、编码、常识推理及多语言指令执行中表现出色。',
|
83
|
+
displayName: 'Qwen3 32B',
|
84
|
+
enabled: true,
|
85
|
+
id: 'qwen3-32b',
|
86
|
+
pricing: {
|
87
|
+
currency: 'CNY',
|
88
|
+
input: 0,
|
89
|
+
output: 0,
|
90
|
+
},
|
91
|
+
type: 'chat',
|
92
|
+
},
|
93
|
+
{
|
94
|
+
abilities: {
|
95
|
+
functionCall: true,
|
96
|
+
reasoning: true,
|
97
|
+
},
|
98
|
+
contextWindowTokens: 128_000,
|
99
|
+
description:
|
100
|
+
'Qwen3-14B 是 Qwen 系列第三代的大型语言模型,拥有 148 亿参数,专为高效推理和多语言任务设计。支持无缝切换思维模式(复杂推理)和非思维模式(通用对话),在数学、编码、常识推理及多语言指令执行中表现出色。',
|
101
|
+
displayName: 'Qwen3 14B',
|
102
|
+
enabled: false,
|
103
|
+
id: 'qwen3-14b',
|
104
|
+
pricing: {
|
105
|
+
currency: 'CNY',
|
106
|
+
input: 0,
|
107
|
+
output: 0,
|
108
|
+
},
|
109
|
+
type: 'chat',
|
110
|
+
},
|
111
|
+
{
|
112
|
+
abilities: {
|
113
|
+
functionCall: true,
|
114
|
+
reasoning: true,
|
115
|
+
},
|
116
|
+
contextWindowTokens: 128_000,
|
117
|
+
description:
|
118
|
+
'Qwen3-8B 是 Qwen 系列第三代的大型语言模型,拥有 82 亿参数,专为高效推理和多语言任务设计。支持无缝切换思考模式(复杂推理)和非思考模式(通用对话),在数学、编码、常识推理及多语言指令执行中表现出色。',
|
119
|
+
displayName: 'Qwen3 8B',
|
120
|
+
enabled: false,
|
121
|
+
id: 'qwen3-8b',
|
59
122
|
pricing: {
|
60
123
|
currency: 'CNY',
|
61
124
|
input: 0,
|
@@ -114,6 +177,9 @@ const infiniaiChatModels: AIChatModelCard[] = [
|
|
114
177
|
type: 'chat',
|
115
178
|
},
|
116
179
|
{
|
180
|
+
abilities: {
|
181
|
+
functionCall: true,
|
182
|
+
},
|
117
183
|
contextWindowTokens: 32_768,
|
118
184
|
description:
|
119
185
|
'Qwen2.5 是 Qwen 大型语言模型系列的最新成果。Qwen2.5 发布了从 0.5 到 720 亿参数不等的基础语言模型及指令调优语言模型。Qwen2.5 相比 Qwen2 带来了以下改进:\n显著增加知识量,在编程与数学领域的能力得到极大提升。\n在遵循指令、生成长文本、理解结构化数据 (例如,表格) 以及生成结构化输出特别是 JSON 方面有显著提升。对系统提示的多样性更具韧性,增强了聊天机器人中的角色扮演实现和条件设定。\n支持长上下文处理。\n支持超过 29 种语言的多语言功能,包括中文、英语、法语、西班牙语、葡萄牙语、德语、意大利语、俄语、日语、韩语、越南语、泰语、阿拉伯语等。',
|
@@ -128,6 +194,9 @@ const infiniaiChatModels: AIChatModelCard[] = [
|
|
128
194
|
type: 'chat',
|
129
195
|
},
|
130
196
|
{
|
197
|
+
abilities: {
|
198
|
+
functionCall: true,
|
199
|
+
},
|
131
200
|
contextWindowTokens: 32_768,
|
132
201
|
description:
|
133
202
|
'Qwen2.5 是 Qwen 大型语言模型系列的最新成果。Qwen2.5 发布了从 0.5 到 720 亿参数不等的基础语言模型及指令调优语言模型。Qwen2.5 相比 Qwen2 带来了以下改进:\n显著增加知识量,在编程与数学领域的能力得到极大提升。\n在遵循指令、生成长文本、理解结构化数据 (例如,表格) 以及生成结构化输出特别是 JSON 方面有显著提升。对系统提示的多样性更具韧性,增强了聊天机器人中的角色扮演实现和条件设定。\n支持长上下文处理。\n支持超过 29 种语言的多语言功能,包括中文、英语、法语、西班牙语、葡萄牙语、德语、意大利语、俄语、日语、韩语、越南语、泰语、阿拉伯语等。',
|
@@ -168,6 +237,9 @@ const infiniaiChatModels: AIChatModelCard[] = [
|
|
168
237
|
type: 'chat',
|
169
238
|
},
|
170
239
|
{
|
240
|
+
abilities: {
|
241
|
+
functionCall: true,
|
242
|
+
},
|
171
243
|
contextWindowTokens: 32_768,
|
172
244
|
description:
|
173
245
|
'Qwen2.5 是 Qwen 大型语言模型系列的最新成果。Qwen2.5 发布了从 0.5 到 720 亿参数不等的基础语言模型及指令调优语言模型。Qwen2.5 相比 Qwen2 带来了以下改进:\n显著增加知识量,在编程与数学领域的能力得到极大提升。\n在遵循指令、生成长文本、理解结构化数据 (例如,表格) 以及生成结构化输出特别是 JSON 方面有显著提升。对系统提示的多样性更具韧性,增强了聊天机器人中的角色扮演实现和条件设定。\n支持长上下文处理。\n支持超过 29 种语言的多语言功能,包括中文、英语、法语、西班牙语、葡萄牙语、德语、意大利语、俄语、日语、韩语、越南语、泰语、阿拉伯语等。',
|
@@ -180,12 +252,44 @@ const infiniaiChatModels: AIChatModelCard[] = [
|
|
180
252
|
},
|
181
253
|
type: 'chat',
|
182
254
|
},
|
255
|
+
{
|
256
|
+
abilities: {
|
257
|
+
functionCall: true,
|
258
|
+
reasoning: true,
|
259
|
+
},
|
260
|
+
contextWindowTokens: 65_536,
|
261
|
+
description:
|
262
|
+
'QwQ 是 Qwen 系列的推理模型,相比传统指令调优模型,QwQ 具备思考和推理能力,在下游任务尤其是难题上能取得显著性能提升。QwQ-32B 是一款中等规模的推理模型,其性能可与最先进的推理模型相媲美,例如 DeepSeek-R1 和 o1-mini。',
|
263
|
+
displayName: 'QwQ 32B',
|
264
|
+
enabled: false,
|
265
|
+
id: 'qwq-32b',
|
266
|
+
pricing: {
|
267
|
+
currency: 'CNY',
|
268
|
+
input: 0,
|
269
|
+
output: 0,
|
270
|
+
},
|
271
|
+
type: 'chat',
|
272
|
+
},
|
273
|
+
{
|
274
|
+
contextWindowTokens: 32_768,
|
275
|
+
description:
|
276
|
+
'DeepSeek-R1-Distill-Qwen-32B 是基于 DeepSeek-R1 蒸馏而来的模型,在 Qwen2.5-32B 的基础上使用 DeepSeek-R1 生成的样本进行微调。该模型在各种基准测试中表现出色,保持了强大的推理能力。',
|
277
|
+
displayName: 'DeepSeek R1 Distill Qwen 32B',
|
278
|
+
enabled: false,
|
279
|
+
id: 'deepseek-r1-distill-qwen-32b',
|
280
|
+
pricing: {
|
281
|
+
currency: 'CNY',
|
282
|
+
input: 0,
|
283
|
+
output: 0,
|
284
|
+
},
|
285
|
+
type: 'chat',
|
286
|
+
},
|
183
287
|
{
|
184
288
|
contextWindowTokens: 8192,
|
185
289
|
description:
|
186
290
|
'Meta 发布的 LLaMA 3.3 多语言大规模语言模型(LLMs)是一个经过预训练和指令微调的生成模型,提供 70B 规模(文本输入/文本输出)。该模型使用超过 15T 的数据进行训练,支持英语、德语、法语、意大利语、葡萄牙语、印地语、西班牙语和泰语,知识更新截止于 2023 年 12 月。',
|
187
291
|
displayName: 'LLaMA 3.3 70B',
|
188
|
-
enabled:
|
292
|
+
enabled: false,
|
189
293
|
id: 'llama-3.3-70b-instruct',
|
190
294
|
pricing: {
|
191
295
|
currency: 'CNY',
|