@louloulinx/metagpt 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/.eslintrc.json +23 -0
  2. package/.prettierrc +7 -0
  3. package/LICENSE +21 -0
  4. package/README-CN.md +754 -0
  5. package/README.md +238 -0
  6. package/bun.lock +1023 -0
  7. package/doc/TutorialAssistant.md +114 -0
  8. package/doc/VercelLLMProvider.md +164 -0
  9. package/eslint.config.js +55 -0
  10. package/examples/data-interpreter-example.ts +173 -0
  11. package/examples/qwen-direct-example.ts +60 -0
  12. package/examples/qwen-example.ts +62 -0
  13. package/examples/tutorial-assistant-example.ts +97 -0
  14. package/jest.config.ts +22 -0
  15. package/output/tutorials/Go/350/257/255/350/250/200/347/274/226/347/250/213/346/225/231/347/250/213_2025-02-25T09-35-15-436Z.md +2208 -0
  16. package/output/tutorials/Rust/346/225/231/347/250/213_2025-02-25T08-27-27-632Z.md +1967 -0
  17. package/output/tutorials//345/246/202/344/275/225/344/275/277/347/224/250TypeScript/345/274/200/345/217/221Node.js/345/272/224/347/224/250_2025-02-25T08-14-39-605Z.md +1721 -0
  18. package/output/tutorials//346/225/260/345/255/227/347/273/217/346/265/216/345/255/246/346/225/231/347/250/213_2025-02-25T10-45-03-605Z.md +902 -0
  19. package/output/tutorials//346/232/250/345/215/227/345/244/247/345/255/246/346/225/260/345/255/227/347/273/217/346/265/216/345/255/246/345/244/215/350/257/225/350/265/204/346/226/231_2025-02-25T11-16-59-133Z.md +719 -0
  20. package/package.json +58 -0
  21. package/plan-cn.md +321 -0
  22. package/plan.md +154 -0
  23. package/src/actions/analyze-task.ts +65 -0
  24. package/src/actions/base-action.ts +103 -0
  25. package/src/actions/di/execute-nb-code.ts +247 -0
  26. package/src/actions/di/write-analysis-code.ts +234 -0
  27. package/src/actions/write-tutorial.ts +232 -0
  28. package/src/config/browser.ts +33 -0
  29. package/src/config/config.ts +345 -0
  30. package/src/config/embedding.ts +26 -0
  31. package/src/config/llm.ts +36 -0
  32. package/src/config/mermaid.ts +37 -0
  33. package/src/config/omniparse.ts +25 -0
  34. package/src/config/redis.ts +34 -0
  35. package/src/config/s3.ts +33 -0
  36. package/src/config/search.ts +30 -0
  37. package/src/config/workspace.ts +20 -0
  38. package/src/index.ts +40 -0
  39. package/src/management/team.ts +168 -0
  40. package/src/memory/longterm.ts +218 -0
  41. package/src/memory/manager.ts +160 -0
  42. package/src/memory/types.ts +100 -0
  43. package/src/memory/working.ts +154 -0
  44. package/src/monitoring/system.ts +413 -0
  45. package/src/monitoring/types.ts +230 -0
  46. package/src/plugin/manager.ts +79 -0
  47. package/src/plugin/types.ts +114 -0
  48. package/src/provider/vercel-llm.ts +314 -0
  49. package/src/rag/base-rag.ts +194 -0
  50. package/src/rag/document-qa.ts +102 -0
  51. package/src/roles/base-role.ts +155 -0
  52. package/src/roles/data-interpreter.ts +360 -0
  53. package/src/roles/engineer.ts +1 -0
  54. package/src/roles/tutorial-assistant.ts +217 -0
  55. package/src/skills/base-skill.ts +144 -0
  56. package/src/skills/code-review.ts +120 -0
  57. package/src/tools/base-tool.ts +155 -0
  58. package/src/tools/file-system.ts +204 -0
  59. package/src/tools/tool-recommend.d.ts +14 -0
  60. package/src/tools/tool-recommend.ts +31 -0
  61. package/src/types/action.ts +38 -0
  62. package/src/types/config.ts +129 -0
  63. package/src/types/document.ts +354 -0
  64. package/src/types/llm.ts +64 -0
  65. package/src/types/memory.ts +36 -0
  66. package/src/types/message.ts +193 -0
  67. package/src/types/rag.ts +86 -0
  68. package/src/types/role.ts +67 -0
  69. package/src/types/skill.ts +71 -0
  70. package/src/types/task.ts +32 -0
  71. package/src/types/team.ts +55 -0
  72. package/src/types/tool.ts +77 -0
  73. package/src/types/workflow.ts +133 -0
  74. package/src/utils/common.ts +73 -0
  75. package/src/utils/yaml.ts +67 -0
  76. package/src/websocket/browser-client.ts +187 -0
  77. package/src/websocket/client.ts +186 -0
  78. package/src/websocket/server.ts +169 -0
  79. package/src/websocket/types.ts +125 -0
  80. package/src/workflow/executor.ts +193 -0
  81. package/src/workflow/executors/action-executor.ts +72 -0
  82. package/src/workflow/executors/condition-executor.ts +118 -0
  83. package/src/workflow/executors/parallel-executor.ts +201 -0
  84. package/src/workflow/executors/role-executor.ts +76 -0
  85. package/src/workflow/executors/sequence-executor.ts +196 -0
  86. package/tests/actions.test.ts +105 -0
  87. package/tests/benchmark/performance.test.ts +147 -0
  88. package/tests/config/config.test.ts +115 -0
  89. package/tests/config.test.ts +106 -0
  90. package/tests/e2e/setup.ts +74 -0
  91. package/tests/e2e/workflow.test.ts +88 -0
  92. package/tests/llm.test.ts +84 -0
  93. package/tests/memory/memory.test.ts +164 -0
  94. package/tests/memory.test.ts +63 -0
  95. package/tests/monitoring/monitoring.test.ts +225 -0
  96. package/tests/plugin/plugin.test.ts +183 -0
  97. package/tests/provider/bailian-llm.test.ts +98 -0
  98. package/tests/rag.test.ts +162 -0
  99. package/tests/roles.test.ts +88 -0
  100. package/tests/skills.test.ts +166 -0
  101. package/tests/team.test.ts +143 -0
  102. package/tests/tools.test.ts +170 -0
  103. package/tests/types/document.test.ts +181 -0
  104. package/tests/types/message.test.ts +122 -0
  105. package/tests/utils/yaml.test.ts +110 -0
  106. package/tests/utils.test.ts +74 -0
  107. package/tests/websocket/browser-client.test.ts +1 -0
  108. package/tests/websocket/websocket.test.ts +42 -0
  109. package/tests/workflow/parallel-executor.test.ts +224 -0
  110. package/tests/workflow/sequence-executor.test.ts +207 -0
  111. package/tests/workflow.test.ts +290 -0
  112. package/tsconfig.json +27 -0
  113. package/typedoc.json +25 -0
@@ -0,0 +1,314 @@
1
+ /**
2
+ * 使用示例:
3
+ *
4
+ * ```typescript
5
+ * // 使用 qwen-plus 模型示例
6
+ * import { VercelLLMProvider } from './vercel-llm';
7
+ * import { generateText } from 'ai';
8
+ *
9
+ * async function main() {
10
+ * // 初始化 Qwen 提供商
11
+ * const qwenProvider = new VercelLLMProvider({
12
+ * providerType: 'qwen',
13
+ * apiKey: process.env.DASHSCOPE_API_KEY || 'your-qwen-api-key',
14
+ * model: 'qwen-plus',
15
+ * baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1', // 可选,自定义URL
16
+ * extraConfig: {
17
+ * qwenOptions: {
18
+ * // 其他Qwen配置选项
19
+ * }
20
+ * }
21
+ * });
22
+ *
23
+ * // 使用 qwen-plus 生成文本
24
+ * const result = await qwenProvider.generate(
25
+ * '写一个4人份的素食千层面食谱。',
26
+ * { temperature: 0.7 }
27
+ * );
28
+ *
29
+ * console.log(result);
30
+ *
31
+ * // 或者直接使用 Vercel AI SDK
32
+ * // 需要先安装: bun add qwen-ai-provider
33
+ * // import { createQwen } from 'qwen-ai-provider';
34
+ * // const qwen = createQwen({ baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1' });
35
+ * // const { text } = await generateText({
36
+ * // model: qwen('qwen-plus'),
37
+ * // prompt: '写一个4人份的素食千层面食谱。',
38
+ * // });
39
+ * // console.log(text);
40
+ * }
41
+ * ```
42
+ */
43
+
44
+ import { generateText } from 'ai';
45
+ import { openai } from '@ai-sdk/openai';
46
+ import { z } from 'zod';
47
+ import type { LLMConfig, LLMProvider } from '../types/llm';
48
+
49
+ /**
50
+ * 支持的模型提供商类型
51
+ */
52
+ export type ModelProviderType = 'openai' | 'qwen' | 'anthropic' | 'mistral' | 'google' | 'custom';
53
+
54
+ /**
55
+ * 模型提供商配置
56
+ */
57
+ export const ModelProviderConfigSchema = z.object({
58
+ providerType: z.enum(['openai', 'qwen', 'anthropic', 'mistral', 'google', 'custom']).default('openai'),
59
+ apiKey: z.string(),
60
+ baseURL: z.string().optional(),
61
+ model: z.string().optional(),
62
+ extraConfig: z.record(z.any()).optional(),
63
+ });
64
+
65
+ export type ModelProviderConfig = z.infer<typeof ModelProviderConfigSchema>;
66
+
67
+ /**
68
+ * 基于 Vercel AI SDK 的通用 LLM 提供商实现
69
+ */
70
+ export class VercelLLMProvider implements LLMProvider {
71
+ private config: ModelProviderConfig;
72
+ private providerFunctions: Record<ModelProviderType, any> = {
73
+ openai: openai,
74
+ qwen: null,
75
+ anthropic: null,
76
+ mistral: null,
77
+ google: null,
78
+ custom: null,
79
+ };
80
+
81
+ constructor(config: ModelProviderConfig) {
82
+ this.config = ModelProviderConfigSchema.parse(config);
83
+ this.setupEnvironment();
84
+ this.loadProviderModules();
85
+ }
86
+
87
+ /**
88
+ * 设置环境变量
89
+ */
90
+ private setupEnvironment(): void {
91
+ // 根据不同提供商设置对应的环境变量
92
+ switch (this.config.providerType) {
93
+ case 'openai':
94
+ if (process.env.OPENAI_API_KEY !== this.config.apiKey) {
95
+ process.env.OPENAI_API_KEY = this.config.apiKey;
96
+ }
97
+ if (this.config.baseURL) {
98
+ process.env.OPENAI_API_HOST = this.config.baseURL;
99
+ }
100
+ break;
101
+ case 'qwen':
102
+ if (process.env.DASHSCOPE_API_KEY !== this.config.apiKey) {
103
+ process.env.DASHSCOPE_API_KEY = this.config.apiKey;
104
+ }
105
+ // 注意: baseURL会在createQwen时设置,不需要设置环境变量
106
+ break;
107
+ // 其他提供商的环境变量设置...
108
+ default:
109
+ // 自定义提供商可能需要在extraConfig中指定环境变量
110
+ if (this.config.extraConfig?.environmentVars) {
111
+ const envVars = this.config.extraConfig.environmentVars as Record<string, string>;
112
+ for (const [key, value] of Object.entries(envVars)) {
113
+ process.env[key] = value;
114
+ }
115
+ }
116
+ }
117
+ }
118
+
119
+ /**
120
+ * 动态加载提供商模块
121
+ */
122
+ private loadProviderModules(): void {
123
+ try {
124
+ // 根据配置动态加载需要的提供商模块
125
+ switch (this.config.providerType) {
126
+ case 'qwen':
127
+ // 动态导入提供商,避免在不需要时加载所有依赖
128
+ import('qwen-ai-provider').then(module => {
129
+ // 使用createQwen创建自定义配置的qwen提供商
130
+ const createQwen = module.createQwen;
131
+ if (createQwen) {
132
+ // 准备Qwen配置选项
133
+ const qwenOptions: Record<string, any> = {
134
+ ...(this.config.extraConfig?.qwenOptions || {})
135
+ };
136
+
137
+ // 如果提供了baseURL,添加到配置中
138
+ if (this.config.baseURL) {
139
+ qwenOptions.baseURL = this.config.baseURL;
140
+ }
141
+
142
+ // 创建并存储qwen提供商函数
143
+ this.providerFunctions.qwen = createQwen(qwenOptions);
144
+ } else {
145
+ // 回退到传统方式
146
+ this.providerFunctions.qwen = module.qwen;
147
+ console.warn('Using legacy qwen provider. For more customization options, upgrade to newer qwen-ai-provider with createQwen support.');
148
+ }
149
+ }).catch(err => {
150
+ console.warn(`Failed to load qwen provider: ${err.message}. Make sure 'qwen-ai-provider' is installed.`);
151
+ });
152
+ break;
153
+ case 'anthropic':
154
+ import('@ai-sdk/anthropic').then(module => {
155
+ this.providerFunctions.anthropic = module.anthropic;
156
+ }).catch(err => {
157
+ console.warn(`Failed to load anthropic provider: ${err.message}. Make sure '@ai-sdk/anthropic' is installed.`);
158
+ });
159
+ break;
160
+ case 'mistral':
161
+ import('@ai-sdk/mistral').then(module => {
162
+ this.providerFunctions.mistral = module.mistral;
163
+ }).catch(err => {
164
+ console.warn(`Failed to load mistral provider: ${err.message}. Make sure '@ai-sdk/mistral' is installed.`);
165
+ });
166
+ break;
167
+ case 'google':
168
+ import('@ai-sdk/google').then(module => {
169
+ this.providerFunctions.google = module.google;
170
+ }).catch(err => {
171
+ console.warn(`Failed to load google provider: ${err.message}. Make sure '@ai-sdk/google' is installed.`);
172
+ });
173
+ break;
174
+ case 'custom':
175
+ // 自定义提供商需要在extraConfig中提供modelFunction
176
+ if (this.config.extraConfig?.modelFunction) {
177
+ this.providerFunctions.custom = this.config.extraConfig.modelFunction;
178
+ } else {
179
+ console.warn('Custom provider specified but no modelFunction provided in extraConfig');
180
+ }
181
+ break;
182
+ }
183
+ } catch (error) {
184
+ console.error('Error loading provider modules:', error);
185
+ }
186
+ }
187
+
188
+ /**
189
+ * 获取当前提供商的模型函数
190
+ * @param modelName 模型名称
191
+ * @returns 模型函数调用结果
192
+ */
193
+ private getModelFunction(modelName?: string): any {
194
+ const provider = this.providerFunctions[this.config.providerType];
195
+ if (!provider) {
196
+ throw new Error(`Provider ${this.config.providerType} not loaded or not available`);
197
+ }
198
+
199
+ return provider(modelName || this.config.model || this.getDefaultModel());
200
+ }
201
+
202
+ /**
203
+ * 获取提供商的默认模型
204
+ */
205
+ private getDefaultModel(): string {
206
+ switch (this.config.providerType) {
207
+ case 'openai':
208
+ return 'gpt-3.5-turbo';
209
+ case 'qwen':
210
+ return 'qwen-plus';
211
+ case 'anthropic':
212
+ return 'claude-3-sonnet-20240229';
213
+ case 'mistral':
214
+ return 'mistral-large-latest';
215
+ case 'google':
216
+ return 'gemini-pro';
217
+ case 'custom':
218
+ return this.config.extraConfig?.defaultModel || 'default-model';
219
+ default:
220
+ return 'gpt-3.5-turbo';
221
+ }
222
+ }
223
+
224
+ /**
225
+ * 生成文本
226
+ * @param prompt 提示词
227
+ * @param config 配置选项
228
+ * @returns 生成的文本
229
+ */
230
+ async generate(prompt: string, config?: Partial<LLMConfig>): Promise<string> {
231
+ try {
232
+ // 确保提供商模块已加载完成
233
+ if (this.config.providerType !== 'openai' && !this.providerFunctions[this.config.providerType]) {
234
+ await new Promise(resolve => setTimeout(resolve, 1000)); // 等待动态导入完成
235
+ }
236
+
237
+ // @ts-ignore - Type compatibility issue between different versions of AI SDK
238
+ const model = this.getModelFunction(config?.model);
239
+
240
+ const result = await generateText({
241
+ model,
242
+ prompt,
243
+ temperature: config?.temperature,
244
+ maxTokens: config?.maxTokens,
245
+ topP: config?.topP,
246
+ frequencyPenalty: config?.frequencyPenalty,
247
+ presencePenalty: config?.presencePenalty,
248
+ ...this.config.extraConfig?.generateOptions,
249
+ });
250
+
251
+ return result.text;
252
+ } catch (error) {
253
+ throw this.handleError(error);
254
+ }
255
+ }
256
+
257
+ /**
258
+ * 生成文本流
259
+ * @param prompt 提示词
260
+ * @param config 配置选项
261
+ * @returns 生成的文本流
262
+ */
263
+ async *generateStream(prompt: string, config?: Partial<LLMConfig>): AsyncGenerator<string> {
264
+ try {
265
+ // 使用generate方法获取完整响应,然后模拟流式响应
266
+ // 由于当前streamText在TypeScript类型中存在兼容性问题
267
+ const result = await this.generate(prompt, config);
268
+
269
+ // 将完整的响应分成字符并逐个返回,模拟流式响应
270
+ const characters = result.split('');
271
+ for (const char of characters) {
272
+ yield char;
273
+ // 添加小延迟以模拟真实的流式响应
274
+ await new Promise(resolve => setTimeout(resolve, 5));
275
+ }
276
+ } catch (error) {
277
+ throw this.handleError(error);
278
+ }
279
+ }
280
+
281
+ /**
282
+ * 嵌入文本
283
+ * @param text 要嵌入的文本
284
+ * @returns 嵌入向量
285
+ */
286
+ async embed(text: string): Promise<number[]> {
287
+ try {
288
+ // Note: For embeddings in the newer Vercel AI SDK, we'd typically use:
289
+ // import { generateEmbedding } from 'ai/embedding';
290
+
291
+ // This is a temporary implementation - in a real application,
292
+ // you would implement this based on the AI SDK's embedding functionality
293
+ console.warn(`Embedding functionality not yet implemented for ${this.config.providerType} provider`);
294
+
295
+ // Placeholder implementation
296
+ return new Array(1536).fill(0).map(() => Math.random());
297
+ } catch (error) {
298
+ throw this.handleError(error);
299
+ }
300
+ }
301
+
302
+ /**
303
+ * 错误处理
304
+ * @param error 原始错误
305
+ * @returns 标准化的错误
306
+ */
307
+ private handleError(error: unknown): Error {
308
+ console.error(`${this.config.providerType} provider error:`, error);
309
+ if (error instanceof Error) {
310
+ return error;
311
+ }
312
+ return new Error(`Unknown ${this.config.providerType} provider error`);
313
+ }
314
+ }
@@ -0,0 +1,194 @@
1
+ import { QdrantClient } from '@qdrant/js-client-rest';
2
+ import type { RAGConfig, RAGSystem, Chunk, SearchResult } from '../types/rag';
3
+ import { v4 as uuidv4 } from 'uuid';
4
+
5
+ /**
6
+ * 基础RAG系统实现
7
+ */
8
+ export class BaseRAG implements RAGSystem {
9
+ protected config: RAGConfig;
10
+ protected llm: any; // LLMProvider
11
+ protected vectorStore: QdrantClient;
12
+
13
+ constructor(config: RAGConfig) {
14
+ this.config = config;
15
+ this.llm = config.llm;
16
+ this.vectorStore = new QdrantClient({
17
+ url: config.vectorStore.url,
18
+ apiKey: config.vectorStore.apiKey,
19
+ });
20
+ }
21
+
22
+ /**
23
+ * 添加文档
24
+ * @param content 文档内容
25
+ * @param metadata 文档元数据
26
+ */
27
+ public async addDocument(
28
+ content: string,
29
+ metadata: Record<string, any> = {}
30
+ ): Promise<Chunk[]> {
31
+ // 分块
32
+ const chunks = await this.chunkText(content);
33
+
34
+ // 生成嵌入向量
35
+ const embeddings = await Promise.all(
36
+ chunks.map(chunk => this.llm.generateEmbedding(chunk))
37
+ );
38
+
39
+ // 创建文档块
40
+ const chunkObjects = chunks.map((chunk, i) => ({
41
+ id: uuidv4(),
42
+ content: chunk,
43
+ embedding: embeddings[i],
44
+ metadata: {
45
+ ...metadata,
46
+ timestamp: Date.now(),
47
+ },
48
+ }));
49
+
50
+ // 存储向量
51
+ await this.vectorStore.upsert(this.config.vectorStore.collectionName, {
52
+ points: chunkObjects.map(chunk => ({
53
+ id: chunk.id,
54
+ vector: chunk.embedding,
55
+ payload: {
56
+ content: chunk.content,
57
+ metadata: chunk.metadata,
58
+ },
59
+ })),
60
+ });
61
+
62
+ return chunkObjects;
63
+ }
64
+
65
+ /**
66
+ * 删除文档块
67
+ * @param ids 块ID列表
68
+ */
69
+ public async deleteChunks(ids: string[]): Promise<void> {
70
+ await this.vectorStore.delete(this.config.vectorStore.collectionName, {
71
+ points: ids,
72
+ });
73
+ }
74
+
75
+ /**
76
+ * 更新文档块
77
+ * @param chunk 文档块
78
+ */
79
+ public async updateChunk(chunk: Chunk): Promise<void> {
80
+ await this.vectorStore.upsert(this.config.vectorStore.collectionName, {
81
+ points: [
82
+ {
83
+ id: chunk.id,
84
+ vector: chunk.embedding,
85
+ payload: {
86
+ content: chunk.content,
87
+ metadata: chunk.metadata,
88
+ },
89
+ },
90
+ ],
91
+ });
92
+ }
93
+
94
+ /**
95
+ * 搜索相关文档
96
+ * @param query 查询文本
97
+ * @param topK 返回结果数量
98
+ */
99
+ public async search(
100
+ query: string,
101
+ topK: number = this.config.topK
102
+ ): Promise<SearchResult[]> {
103
+ // 生成查询向量
104
+ const queryEmbedding = await this.llm.generateEmbedding(query);
105
+
106
+ // 搜索相似向量
107
+ const results = await this.vectorStore.search(
108
+ this.config.vectorStore.collectionName,
109
+ {
110
+ vector: queryEmbedding,
111
+ limit: topK,
112
+ with_payload: true,
113
+ }
114
+ );
115
+
116
+ // 转换结果格式
117
+ return results
118
+ .filter(result => result.payload && typeof result.payload === 'object')
119
+ .map(result => ({
120
+ chunk: {
121
+ id: result.id as string,
122
+ content: result.payload!.content as string,
123
+ embedding: queryEmbedding as number[],
124
+ metadata: result.payload!.metadata as Record<string, any>,
125
+ },
126
+ score: result.score,
127
+ metadata: result.payload!.metadata as Record<string, any>,
128
+ }));
129
+ }
130
+
131
+ /**
132
+ * 生成回答
133
+ * @param query 查询文本
134
+ */
135
+ public async generate(query: string): Promise<string> {
136
+ // 搜索相关文档
137
+ const results = await this.search(query);
138
+ if (results.length === 0) {
139
+ return 'No relevant information found.';
140
+ }
141
+
142
+ // 构建提示词
143
+ const prompt = this.buildPrompt(query, results);
144
+
145
+ // 生成回答
146
+ return await this.llm.generate(prompt);
147
+ }
148
+
149
+ /**
150
+ * 分块文本
151
+ * @param text 文本内容
152
+ */
153
+ protected async chunkText(text: string): Promise<string[]> {
154
+ // 简单按长度分块,子类可以重写此方法实现更智能的分块
155
+ const chunks: string[] = [];
156
+ let currentChunk = '';
157
+
158
+ const words = text.split(/\s+/);
159
+ for (const word of words) {
160
+ if (
161
+ currentChunk.length + word.length + 1 <= this.config.chunkSize ||
162
+ currentChunk.length === 0
163
+ ) {
164
+ currentChunk = currentChunk ? `${currentChunk} ${word}` : word;
165
+ } else {
166
+ chunks.push(currentChunk);
167
+ currentChunk = word;
168
+ }
169
+ }
170
+
171
+ if (currentChunk) {
172
+ chunks.push(currentChunk);
173
+ }
174
+
175
+ return chunks;
176
+ }
177
+
178
+ /**
179
+ * 构建提示词
180
+ * @param query 查询文本
181
+ * @param results 搜索结果
182
+ */
183
+ protected buildPrompt(query: string, results: SearchResult[]): string {
184
+ return `
185
+ Based on the following passages, please answer the question.
186
+
187
+ Question: ${query}
188
+
189
+ Relevant passages:
190
+ ${results.map(r => r.chunk.content).join('\n\n')}
191
+
192
+ Answer:`.trim();
193
+ }
194
+ }
@@ -0,0 +1,102 @@
1
+ import { BaseRAG } from './base-rag';
2
+ import type { RAGConfig, SearchResult } from '../types/rag';
3
+
4
+ /**
5
+ * 文档问答系统
6
+ * 基于RAG的文档智能问答
7
+ */
8
+ export class DocumentQA extends BaseRAG {
9
+ constructor(config: RAGConfig) {
10
+ super(config);
11
+ }
12
+
13
+ /**
14
+ * 生成带引用的回答
15
+ * @param query 查询文本
16
+ */
17
+ public async generateWithCitations(
18
+ query: string
19
+ ): Promise<{ answer: string; citations: SearchResult[] }> {
20
+ // 搜索相关文档
21
+ const results = await this.search(query);
22
+ if (results.length === 0) {
23
+ return {
24
+ answer: 'No relevant information found.',
25
+ citations: [],
26
+ };
27
+ }
28
+
29
+ // 构建提示词
30
+ const prompt = this.buildPrompt(query, results);
31
+
32
+ // 生成回答
33
+ const answer = await this.llm.generate(prompt);
34
+
35
+ return {
36
+ answer,
37
+ citations: results,
38
+ };
39
+ }
40
+
41
+ /**
42
+ * 智能分块
43
+ * @param text 文本内容
44
+ */
45
+ protected async chunkText(text: string): Promise<string[]> {
46
+ // 按段落分块
47
+ const paragraphs = text
48
+ .split(/\n\s*\n/)
49
+ .map(p => p.trim())
50
+ .filter(Boolean);
51
+
52
+ const chunks: string[] = [];
53
+ let currentChunk = '';
54
+
55
+ for (const paragraph of paragraphs) {
56
+ // 如果当前块加上新段落不超过最大大小,则合并
57
+ if (
58
+ currentChunk.length + paragraph.length + 1 <= this.config.chunkSize ||
59
+ currentChunk.length === 0
60
+ ) {
61
+ currentChunk = currentChunk
62
+ ? `${currentChunk}\n\n${paragraph}`
63
+ : paragraph;
64
+ } else {
65
+ // 否则保存当前块并开始新块
66
+ chunks.push(currentChunk);
67
+ currentChunk = paragraph;
68
+ }
69
+ }
70
+
71
+ // 添加最后一个块
72
+ if (currentChunk) {
73
+ chunks.push(currentChunk);
74
+ }
75
+
76
+ return chunks;
77
+ }
78
+
79
+ /**
80
+ * 构建提示词
81
+ * @param query 查询文本
82
+ * @param results 搜索结果
83
+ */
84
+ protected buildPrompt(query: string, results: SearchResult[]): string {
85
+ return `
86
+ You are a helpful assistant that provides accurate answers based on the given reference passages.
87
+ Please answer the question and cite the relevant passages using [1], [2], etc.
88
+
89
+ Question: ${query}
90
+
91
+ Reference passages:
92
+ ${results.map((r, i) => `[${i + 1}] ${r.chunk.content}`).join('\n\n')}
93
+
94
+ Instructions:
95
+ 1. Use information from the reference passages to answer the question
96
+ 2. Cite sources using [1], [2], etc.
97
+ 3. If the passages don't contain enough information, say so
98
+ 4. Keep the answer concise and relevant
99
+
100
+ Answer:`.trim();
101
+ }
102
+ }