foliko 1.1.63 → 1.1.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/data/plugins-state.json +8 -0
- package/.agent/sessions/cli_default.json +258 -260
- package/cli/bin/foliko.js +2 -2
- package/cli/src/commands/chat.js +15 -26
- package/cli/src/ui/chat-ui.js +102 -165
- package/cli/src/ui/footer-bar.js +7 -32
- package/cli/src/ui/message-bubble.js +24 -2
- package/cli/src/ui/status-bar.js +177 -0
- package/package.json +1 -2
- package/plugins/audit-plugin.js +11 -7
- package/plugins/coordinator-plugin.js +14 -12
- package/plugins/data-splitter-plugin.js +323 -0
- package/plugins/default-plugins.js +12 -1
- package/plugins/extension-executor-plugin.js +2 -2
- package/plugins/file-system-plugin.js +68 -50
- package/plugins/gate-trading.js +10 -10
- package/plugins/install-plugin.js +3 -3
- package/plugins/memory-plugin.js +8 -11
- package/plugins/plugin-manager-plugin.js +9 -11
- package/plugins/qq-plugin.js +9 -9
- package/plugins/rules-plugin.js +7 -7
- package/plugins/scheduler-plugin.js +22 -18
- package/plugins/session-plugin.js +14 -14
- package/plugins/storage-plugin.js +11 -10
- package/plugins/subagent-plugin.js +13 -9
- package/plugins/think-plugin.js +63 -59
- package/plugins/tools-plugin.js +8 -8
- package/plugins/weixin-plugin.js +5 -5
- package/src/capabilities/skill-manager.js +23 -15
- package/src/capabilities/workflow-engine.js +2 -2
- package/src/core/agent-chat.js +70 -26
- package/src/core/agent.js +17 -27
- package/src/core/chat-session.js +7 -161
- package/src/core/constants.js +198 -0
- package/src/core/context-compressor.js +6 -181
- package/src/core/framework.js +125 -6
- package/src/core/plugin-base.js +7 -5
- package/src/core/provider.js +6 -0
- package/src/core/subagent.js +16 -135
- package/src/core/tool-executor.js +2 -70
- package/src/executors/mcp-executor.js +12 -10
- package/src/utils/chat-queue.js +11 -22
- package/src/utils/data-splitter.js +345 -0
- package/src/utils/download.js +5 -4
- package/src/utils/message-validator.js +283 -0
- package/src/utils/retry.js +168 -22
- package/src/utils/sandbox.js +60 -207
- package/cli/src/utils/debounce.js +0 -106
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
const { EventEmitter } = require('../utils/event-emitter');
|
|
11
11
|
const { logger } = require('../utils/logger');
|
|
12
|
+
const { validateToolCalls } = require('../utils/message-validator');
|
|
12
13
|
|
|
13
14
|
class ToolExecutor extends EventEmitter {
|
|
14
15
|
/**
|
|
@@ -183,76 +184,7 @@ class ToolExecutor extends EventEmitter {
|
|
|
183
184
|
* @returns {Array} 验证后的消息
|
|
184
185
|
*/
|
|
185
186
|
validateToolCalls(messages) {
|
|
186
|
-
|
|
187
|
-
// 收集被跳过的 toolCallId,用于清理对应的 tool-result
|
|
188
|
-
const invalidatedToolCallIds = new Set();
|
|
189
|
-
|
|
190
|
-
for (const msg of messages) {
|
|
191
|
-
// 清理 assistant 消息中的不完整 tool-call
|
|
192
|
-
if (msg.role === 'assistant' && Array.isArray(msg.content)) {
|
|
193
|
-
for (const item of msg.content) {
|
|
194
|
-
// 兼容 tool-call 和 tool-use 两种类型
|
|
195
|
-
if (item.type !== 'tool-call' && item.type !== 'tool-use') {
|
|
196
|
-
continue;
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
const input = item.input;
|
|
200
|
-
if (typeof input !== 'string') {
|
|
201
|
-
continue;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// 检查 input 是否是有效的 JSON(不是不完整的)
|
|
205
|
-
const trimmed = input.trim();
|
|
206
|
-
if (trimmed === '{' || trimmed === '' || !trimmed.startsWith('{')) {
|
|
207
|
-
// 不完整的 JSON,移除这个 tool-call
|
|
208
|
-
// 记录 toolCallId,以便后续清理对应的 tool-result
|
|
209
|
-
if (item.toolCallId) {
|
|
210
|
-
invalidatedToolCallIds.add(item.toolCallId);
|
|
211
|
-
}
|
|
212
|
-
logger.warn(
|
|
213
|
-
`_validateToolCalls: invalid tool-call input="${input}", toolCallId=${item.toolCallId}, converting to text`
|
|
214
|
-
);
|
|
215
|
-
item.type = 'text';
|
|
216
|
-
item.text = `(工具调用 ${item.toolName} 参数不完整,已跳过)`;
|
|
217
|
-
delete item.toolCallId;
|
|
218
|
-
delete item.toolName;
|
|
219
|
-
delete item.input;
|
|
220
|
-
fixedCount++;
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// 如果有无效的 tool-call,清理对应的 tool-result
|
|
227
|
-
if (invalidatedToolCallIds.size > 0) {
|
|
228
|
-
logger.warn(
|
|
229
|
-
`_validateToolCalls: removing ${invalidatedToolCallIds.size} tool-results with invalidated toolCallIds`
|
|
230
|
-
);
|
|
231
|
-
for (const msg of messages) {
|
|
232
|
-
if (msg.role === 'tool' && Array.isArray(msg.content)) {
|
|
233
|
-
// 过滤掉引用了无效 toolCallId 的 tool-result
|
|
234
|
-
const oldLen = msg.content.length;
|
|
235
|
-
msg.content = msg.content.filter((item) => {
|
|
236
|
-
if (item.type !== 'tool-result' && item.type !== 'tool_result') {
|
|
237
|
-
return true;
|
|
238
|
-
}
|
|
239
|
-
// 如果 tool-result 引用的 toolCallId 已被标记为无效,则移除
|
|
240
|
-
if (item.toolCallId && invalidatedToolCallIds.has(item.toolCallId)) {
|
|
241
|
-
logger.warn(
|
|
242
|
-
`_validateToolCalls: removing orphaned tool-result with toolCallId=${item.toolCallId}`
|
|
243
|
-
);
|
|
244
|
-
fixedCount++;
|
|
245
|
-
return false;
|
|
246
|
-
}
|
|
247
|
-
return true;
|
|
248
|
-
});
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
if (fixedCount > 0) {
|
|
254
|
-
logger.info(`_validateToolCalls: Fixed ${fixedCount} incomplete tool calls/results`);
|
|
255
|
-
}
|
|
187
|
+
return validateToolCalls(messages);
|
|
256
188
|
}
|
|
257
189
|
|
|
258
190
|
/**
|
|
@@ -307,7 +307,7 @@ class MCPExecutorPlugin extends Plugin {
|
|
|
307
307
|
|
|
308
308
|
return {
|
|
309
309
|
success: true,
|
|
310
|
-
|
|
310
|
+
data: {
|
|
311
311
|
name: toolInfo.name,
|
|
312
312
|
description: toolInfo.description,
|
|
313
313
|
required,
|
|
@@ -405,7 +405,7 @@ class MCPExecutorPlugin extends Plugin {
|
|
|
405
405
|
source: 'mcp',
|
|
406
406
|
});
|
|
407
407
|
|
|
408
|
-
return { success: true,
|
|
408
|
+
return { success: true, data: execResult };
|
|
409
409
|
} catch (err) {
|
|
410
410
|
log.error(` Tool '${tool}' failed:`, err.message);
|
|
411
411
|
|
|
@@ -450,7 +450,7 @@ class MCPExecutorPlugin extends Plugin {
|
|
|
450
450
|
});
|
|
451
451
|
}
|
|
452
452
|
}
|
|
453
|
-
return { success: true, servers };
|
|
453
|
+
return { success: true, data: servers };
|
|
454
454
|
},
|
|
455
455
|
});
|
|
456
456
|
|
|
@@ -481,8 +481,10 @@ class MCPExecutorPlugin extends Plugin {
|
|
|
481
481
|
|
|
482
482
|
return {
|
|
483
483
|
success: true,
|
|
484
|
-
|
|
485
|
-
|
|
484
|
+
data: 'MCP 配置已重载',
|
|
485
|
+
metadata: {
|
|
486
|
+
servers: Object.keys(config.mcpServers || {}),
|
|
487
|
+
},
|
|
486
488
|
};
|
|
487
489
|
} catch (err) {
|
|
488
490
|
log.error(' Reload error:', err);
|
|
@@ -507,7 +509,7 @@ class MCPExecutorPlugin extends Plugin {
|
|
|
507
509
|
if (enabled) {
|
|
508
510
|
// 启用服务器
|
|
509
511
|
if (clientInfo && clientInfo.enabled) {
|
|
510
|
-
return { success: true,
|
|
512
|
+
return { success: true, data: `MCP 服务器 '${server}' 已经是开启状态` };
|
|
511
513
|
}
|
|
512
514
|
// 如果服务器从未连接过,也检查配置中的 enabled 状态
|
|
513
515
|
if (!clientInfo && serverConfig && serverConfig.enabled === false) {
|
|
@@ -527,7 +529,7 @@ class MCPExecutorPlugin extends Plugin {
|
|
|
527
529
|
// 保存 enabled 状态到配置文件
|
|
528
530
|
await this._saveMCPServerEnabled(server, true);
|
|
529
531
|
this._refreshAllAgentsMCPPrompt(this._framework);
|
|
530
|
-
return { success: true,
|
|
532
|
+
return { success: true, data: `MCP 服务器 '${server}' 已开启` };
|
|
531
533
|
} else {
|
|
532
534
|
return { success: false, error: '服务器配置不存在,需要重载配置' };
|
|
533
535
|
}
|
|
@@ -555,10 +557,10 @@ class MCPExecutorPlugin extends Plugin {
|
|
|
555
557
|
// 保存 enabled 状态到配置文件
|
|
556
558
|
await this._saveMCPServerEnabled(server, false);
|
|
557
559
|
this._refreshAllAgentsMCPPrompt(this._framework);
|
|
558
|
-
return { success: true,
|
|
560
|
+
return { success: true, data: `MCP 服务器 '${server}' 已关闭` };
|
|
559
561
|
}
|
|
560
562
|
|
|
561
|
-
return { success: true,
|
|
563
|
+
return { success: true, data: `MCP 服务器 '${server}' 状态未变化` };
|
|
562
564
|
},
|
|
563
565
|
});
|
|
564
566
|
|
|
@@ -610,7 +612,7 @@ class MCPExecutorPlugin extends Plugin {
|
|
|
610
612
|
*/
|
|
611
613
|
_refreshAgentMCPPrompt(agent) {
|
|
612
614
|
// 检查是否已刷新过(通过检查系统提示词是否已包含 MCP 描述)
|
|
613
|
-
const existingPrompt = agent._originalPrompt || '';
|
|
615
|
+
const existingPrompt = (typeof agent.getOriginalPrompt === 'function' ? agent.getOriginalPrompt() : agent._originalPrompt) || '';
|
|
614
616
|
if (existingPrompt.includes('【MCP Servers】')) {
|
|
615
617
|
return;
|
|
616
618
|
}
|
package/src/utils/chat-queue.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const { EventEmitter } = require('./event-emitter');
|
|
2
2
|
const { cleanResponse } = require('./index');
|
|
3
3
|
const { logger } = require('./logger');
|
|
4
|
+
const { isNetworkError, calculateDelay, PRESETS } = require('./retry');
|
|
4
5
|
const log = logger.child('ChatQueue');
|
|
5
6
|
// ChatQueueManager.js
|
|
6
7
|
class ChatQueueManager extends EventEmitter {
|
|
@@ -118,10 +119,9 @@ class ChatQueueManager extends EventEmitter {
|
|
|
118
119
|
if (result.error) {
|
|
119
120
|
lastError = result.error;
|
|
120
121
|
if (attempt < this.retryAttempts && this.isRetryableError(lastError)) {
|
|
121
|
-
await this.sleep(
|
|
122
|
+
await this.sleep(calculateDelay(attempt, { baseDelay: this.retryDelay }));
|
|
122
123
|
continue;
|
|
123
124
|
}
|
|
124
|
-
// 重试耗尽,直接抛出 result.error
|
|
125
125
|
throw lastError;
|
|
126
126
|
}
|
|
127
127
|
|
|
@@ -130,7 +130,7 @@ class ChatQueueManager extends EventEmitter {
|
|
|
130
130
|
log.warn('[ChatQueue] executeWithRetry: ', attempt, 'error:', error.message);
|
|
131
131
|
lastError = error;
|
|
132
132
|
if (attempt < this.retryAttempts && this.isRetryableError(error)) {
|
|
133
|
-
await this.sleep(
|
|
133
|
+
await this.sleep(calculateDelay(attempt, { baseDelay: this.retryDelay }));
|
|
134
134
|
continue;
|
|
135
135
|
}
|
|
136
136
|
break;
|
|
@@ -138,9 +138,7 @@ class ChatQueueManager extends EventEmitter {
|
|
|
138
138
|
}
|
|
139
139
|
|
|
140
140
|
// 将最后的错误转换为友好消息
|
|
141
|
-
const
|
|
142
|
-
const isRetryError = errName === 'AI_RetryError' || errName === 'RetryError';
|
|
143
|
-
const friendlyMessage = isRetryError
|
|
141
|
+
const friendlyMessage = isNetworkError(lastError)
|
|
144
142
|
? 'AI 服务暂时不可用,请稍后重试'
|
|
145
143
|
: (lastError?.message || String(lastError)).split('\n')[0];
|
|
146
144
|
|
|
@@ -175,9 +173,7 @@ class ChatQueueManager extends EventEmitter {
|
|
|
175
173
|
} catch (err) {
|
|
176
174
|
// SDK 直接抛出错误(没有通过 chunk 传递)
|
|
177
175
|
// 转换为友好错误消息
|
|
178
|
-
const
|
|
179
|
-
const isRetryError = errName === 'AI_RetryError' || errName === 'RetryError';
|
|
180
|
-
const friendlyMessage = isRetryError
|
|
176
|
+
const friendlyMessage = isNetworkError(err)
|
|
181
177
|
? 'AI 服务暂时不可用,请稍后重试'
|
|
182
178
|
: (err.message || err.toString()).split('\n')[0];
|
|
183
179
|
|
|
@@ -225,20 +221,13 @@ class ChatQueueManager extends EventEmitter {
|
|
|
225
221
|
/**
|
|
226
222
|
* 判断错误是否可重试
|
|
227
223
|
*/
|
|
224
|
+
/**
|
|
225
|
+
* 判断错误是否可重试(委托给统一入口)
|
|
226
|
+
* @param {Error} error
|
|
227
|
+
* @returns {boolean}
|
|
228
|
+
*/
|
|
228
229
|
isRetryableError(error) {
|
|
229
|
-
|
|
230
|
-
return (
|
|
231
|
-
message.includes('负载较高') ||
|
|
232
|
-
message.includes('timeout') ||
|
|
233
|
-
message.includes('network') ||
|
|
234
|
-
message.includes('429') ||
|
|
235
|
-
message.includes('500') ||
|
|
236
|
-
message.includes('502') ||
|
|
237
|
-
message.includes('503') ||
|
|
238
|
-
message.includes('rate limit') ||
|
|
239
|
-
error.name === 'AI_RetryError' ||
|
|
240
|
-
error.name === 'AI_APICallError'
|
|
241
|
-
);
|
|
230
|
+
return isNetworkError(error);
|
|
242
231
|
}
|
|
243
232
|
|
|
244
233
|
/**
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DataSplitter — 大数据分拆引擎
|
|
3
|
+
*
|
|
4
|
+
* 职责:
|
|
5
|
+
* 1. 将大文本内容按 token 估算分块
|
|
6
|
+
* 2. 为每个分块创建子 Agent 并行处理
|
|
7
|
+
* 3. 汇总各子 Agent 的结果
|
|
8
|
+
*
|
|
9
|
+
* 使用场景:
|
|
10
|
+
* - 读取超过 200KB 的大文件
|
|
11
|
+
* - 抓取内容过多的网页
|
|
12
|
+
* - 任何超过上下文限制的工具结果
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const { logger } = require('./logger');
|
|
16
|
+
|
|
17
|
+
// 默认分块大小 (字符数,约 50K tokens)
|
|
18
|
+
const DEFAULT_CHUNK_SIZE = 60000;
|
|
19
|
+
|
|
20
|
+
// 默认 token 字节换算(中英混合粗略估算:1 token ≈ 2 chars)
|
|
21
|
+
const CHARS_PER_TOKEN = 2;
|
|
22
|
+
|
|
23
|
+
// 安全阈值:超过此字符数触发自动分拆(默认 100K tokens = 200K chars)
|
|
24
|
+
const DEFAULT_SAFE_THRESHOLD = 50000;
|
|
25
|
+
|
|
26
|
+
class DataSplitter {
|
|
27
|
+
/**
|
|
28
|
+
* @param {Object} framework - Framework 实例
|
|
29
|
+
* @param {Object} [options]
|
|
30
|
+
* @param {number} [options.chunkSize=60000] - 每块字符数
|
|
31
|
+
* @param {number} [options.safeThreshold=50000] - 安全阈值字符数
|
|
32
|
+
* @param {number} [options.maxConcurrent=3] - 最大并行子Agent数
|
|
33
|
+
* @param {number} [options.maxRetries=2] - 子Agent最大重试次数
|
|
34
|
+
*/
|
|
35
|
+
constructor(framework, options = {}) {
|
|
36
|
+
this.framework = framework;
|
|
37
|
+
this.chunkSize = options.chunkSize || DEFAULT_CHUNK_SIZE;
|
|
38
|
+
this.safeThreshold = options.safeThreshold || DEFAULT_SAFE_THRESHOLD;
|
|
39
|
+
this.maxConcurrent = options.maxConcurrent || 3;
|
|
40
|
+
this.maxRetries = options.maxRetries || 2;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* 判断内容是否需要分拆
|
|
45
|
+
* @param {string} content - 大文本内容
|
|
46
|
+
* @returns {boolean}
|
|
47
|
+
*/
|
|
48
|
+
needsSplit(content) {
|
|
49
|
+
if (!content || typeof content !== 'string') return false;
|
|
50
|
+
return content.length > this.safeThreshold;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* 获取内容统计信息
|
|
55
|
+
* @param {string} content
|
|
56
|
+
* @returns {{ chars: number, estimatedTokens: number, chunks: number }}
|
|
57
|
+
*/
|
|
58
|
+
getContentStats(content) {
|
|
59
|
+
const chars = content.length;
|
|
60
|
+
return {
|
|
61
|
+
chars,
|
|
62
|
+
estimatedTokens: Math.ceil(chars / CHARS_PER_TOKEN),
|
|
63
|
+
chunks: Math.ceil(chars / this.chunkSize),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* 将大文本按分块大小拆分成多个片段
|
|
69
|
+
* 尝试在段落边界(\n\n)切分,保持语义完整性
|
|
70
|
+
* @param {string} content - 大文本内容
|
|
71
|
+
* @param {number} [chunkSize] - 每块最大字符数,默认使用实例配置
|
|
72
|
+
* @returns {Array<{ index: number, content: string, startOffset: number, endOffset: number }>}
|
|
73
|
+
*/
|
|
74
|
+
splitContent(content, chunkSize) {
|
|
75
|
+
chunkSize = chunkSize || this.chunkSize;
|
|
76
|
+
if (!content) return [];
|
|
77
|
+
|
|
78
|
+
const chunks = [];
|
|
79
|
+
let start = 0;
|
|
80
|
+
|
|
81
|
+
while (start < content.length) {
|
|
82
|
+
// 计算当前块的理论结束位置
|
|
83
|
+
let end = Math.min(start + chunkSize, content.length);
|
|
84
|
+
|
|
85
|
+
// 如果不是最后一块,尝试在段落边界切分
|
|
86
|
+
if (end < content.length) {
|
|
87
|
+
// 从 end 向前查找段落边界(双重换行)
|
|
88
|
+
const searchStart = Math.max(start, end - Math.floor(chunkSize * 0.3));
|
|
89
|
+
const segmentToSearch = content.slice(searchStart, end);
|
|
90
|
+
|
|
91
|
+
// 优先找 \n\n(段落边界)
|
|
92
|
+
let boundary = segmentToSearch.lastIndexOf('\n\n');
|
|
93
|
+
if (boundary > 0) {
|
|
94
|
+
end = searchStart + boundary + 1; // 把 \n\n 留给下一块开头会空行,更好
|
|
95
|
+
// 实际上保留 \n\n 在末尾更好
|
|
96
|
+
end = searchStart + boundary + 2;
|
|
97
|
+
} else {
|
|
98
|
+
// 其次找 \n(行边界)
|
|
99
|
+
boundary = segmentToSearch.lastIndexOf('\n');
|
|
100
|
+
if (boundary > 0) {
|
|
101
|
+
end = searchStart + boundary + 1;
|
|
102
|
+
} else {
|
|
103
|
+
// 最后找空格
|
|
104
|
+
boundary = segmentToSearch.lastIndexOf(' ');
|
|
105
|
+
if (boundary > 0) {
|
|
106
|
+
end = searchStart + boundary + 1;
|
|
107
|
+
}
|
|
108
|
+
// 找不到合适边界就在 chunkSize 处硬切
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const chunkContent = content.slice(start, end);
|
|
114
|
+
if (chunkContent.trim()) {
|
|
115
|
+
chunks.push({
|
|
116
|
+
index: chunks.length,
|
|
117
|
+
content: chunkContent,
|
|
118
|
+
startOffset: start,
|
|
119
|
+
endOffset: end,
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
start = end;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return chunks;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* 为每个分块创建子 Agent 并分配处理任务
|
|
131
|
+
* 子 Agent 并行处理,结果自动汇总
|
|
132
|
+
*
|
|
133
|
+
* @param {Object} options
|
|
134
|
+
* @param {Array<{ index: number, content: string }>} options.chunks - 分块数据
|
|
135
|
+
* @param {string} options.taskDescription - 每个子 Agent 要执行的任务描述
|
|
136
|
+
* @param {string} [options.agentName] - 子 Agent 名称前缀
|
|
137
|
+
* @param {string} [options.agentRole] - 子 Agent 角色
|
|
138
|
+
* @param {number} [options.maxConcurrent] - 并行度
|
|
139
|
+
* @param {AbortSignal} [options.signal] - 取消信号
|
|
140
|
+
* @returns {Promise<{ success: boolean, results: Array, summary: string, errors: Array }>}
|
|
141
|
+
*/
|
|
142
|
+
async dispatchToSubAgents(options) {
|
|
143
|
+
const {
|
|
144
|
+
chunks,
|
|
145
|
+
taskDescription,
|
|
146
|
+
agentName = 'data-processor',
|
|
147
|
+
agentRole = '数据处理专家',
|
|
148
|
+
maxConcurrent = this.maxConcurrent,
|
|
149
|
+
signal,
|
|
150
|
+
} = options;
|
|
151
|
+
|
|
152
|
+
if (!chunks || chunks.length === 0) {
|
|
153
|
+
return { success: true, results: [], summary: '', errors: [] };
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const totalChunks = chunks.length;
|
|
157
|
+
const results = [];
|
|
158
|
+
const errors = [];
|
|
159
|
+
|
|
160
|
+
logger.info(
|
|
161
|
+
`[DataSplitter] 开始分拆处理: ${totalChunks} 块, 任务="${taskDescription.slice(0, 50)}..."`
|
|
162
|
+
);
|
|
163
|
+
|
|
164
|
+
// 并行执行(受 maxConcurrent 限制)
|
|
165
|
+
const queue = [...chunks];
|
|
166
|
+
const inFlight = new Set();
|
|
167
|
+
|
|
168
|
+
const processChunk = async (chunk) => {
|
|
169
|
+
const chunkIndex = chunk.index;
|
|
170
|
+
|
|
171
|
+
// 为当前块创建子 Agent
|
|
172
|
+
const subagent = this.framework.createSubAgent({
|
|
173
|
+
name: `${agentName}-chunk-${chunkIndex}`,
|
|
174
|
+
role: agentRole,
|
|
175
|
+
systemPrompt: `你是${agentRole},负责处理大数据中的第 ${chunkIndex + 1}/${totalChunks} 块。`,
|
|
176
|
+
maxRetries: this.maxRetries,
|
|
177
|
+
disableTools: true, // 分拆处理只做文本分析,不需要额外工具
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
try {
|
|
181
|
+
const taskMsg = `## 任务
|
|
182
|
+
${taskDescription}
|
|
183
|
+
|
|
184
|
+
## 待处理内容(第 ${chunkIndex + 1}/${totalChunks} 块)
|
|
185
|
+
\`\`\`
|
|
186
|
+
${chunk.content}
|
|
187
|
+
\`\`\`
|
|
188
|
+
|
|
189
|
+
## 要求
|
|
190
|
+
- 只处理这块内容
|
|
191
|
+
- 返回简洁的结果
|
|
192
|
+
- 如果这块内容没有有用信息,返回"无有效信息"`;
|
|
193
|
+
|
|
194
|
+
const result = await subagent.chat(taskMsg, { signal });
|
|
195
|
+
results.push({
|
|
196
|
+
chunkIndex,
|
|
197
|
+
success: result.success,
|
|
198
|
+
message: result.message || '',
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
logger.debug(
|
|
202
|
+
`[DataSplitter] 块 ${chunkIndex + 1}/${totalChunks} 处理完成: ` +
|
|
203
|
+
`${result.success ? '成功' : '失败'}`
|
|
204
|
+
);
|
|
205
|
+
} catch (err) {
|
|
206
|
+
errors.push({ chunkIndex, error: err.message });
|
|
207
|
+
results.push({ chunkIndex, success: false, error: err.message });
|
|
208
|
+
logger.warn(
|
|
209
|
+
`[DataSplitter] 块 ${chunkIndex + 1}/${totalChunks} 处理失败: ${err.message}`
|
|
210
|
+
);
|
|
211
|
+
} finally {
|
|
212
|
+
subagent.destroy();
|
|
213
|
+
inFlight.delete(chunk);
|
|
214
|
+
}
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
// 并发控制:最多 maxConcurrent 个同时运行
|
|
218
|
+
while (queue.length > 0 || inFlight.size > 0) {
|
|
219
|
+
// 填充 inFlight 到 maxConcurrent
|
|
220
|
+
while (queue.length > 0 && inFlight.size < maxConcurrent) {
|
|
221
|
+
const chunk = queue.shift();
|
|
222
|
+
inFlight.add(chunk);
|
|
223
|
+
processChunk(chunk); // 不 await,并发执行
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// 等待任意一个完成
|
|
227
|
+
if (inFlight.size > 0) {
|
|
228
|
+
await Promise.race(
|
|
229
|
+
[...inFlight].map(
|
|
230
|
+
(chunk) =>
|
|
231
|
+
new Promise((resolve) => {
|
|
232
|
+
const check = () => {
|
|
233
|
+
if (!inFlight.has(chunk)) resolve();
|
|
234
|
+
else setTimeout(check, 50);
|
|
235
|
+
};
|
|
236
|
+
check();
|
|
237
|
+
})
|
|
238
|
+
)
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// 按块索引排序结果
|
|
244
|
+
results.sort((a, b) => a.chunkIndex - b.chunkIndex);
|
|
245
|
+
|
|
246
|
+
// 生成汇总摘要
|
|
247
|
+
const summary = this._buildSummary(results, errors, taskDescription);
|
|
248
|
+
|
|
249
|
+
logger.info(
|
|
250
|
+
`[DataSplitter] 分拆处理完成: ` +
|
|
251
|
+
`${results.filter((r) => r.success).length}/${totalChunks} 块成功, ` +
|
|
252
|
+
`${errors.length} 个错误`
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
return {
|
|
256
|
+
success: errors.length === 0,
|
|
257
|
+
results,
|
|
258
|
+
summary,
|
|
259
|
+
errors,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* 一步完成:检测 → 分拆 → 派发 → 汇总
|
|
265
|
+
* 如果内容不超过阈值,直接返回原文
|
|
266
|
+
*
|
|
267
|
+
* @param {string} content - 原始内容
|
|
268
|
+
* @param {string} taskDescription - 要执行的任务
|
|
269
|
+
* @param {Object} [options]
|
|
270
|
+
* @returns {Promise<string>} 汇总结果
|
|
271
|
+
*/
|
|
272
|
+
async processLargeContent(content, taskDescription, options = {}) {
|
|
273
|
+
if (!this.needsSplit(content)) {
|
|
274
|
+
return content; // 不需要分拆
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const stats = this.getContentStats(content);
|
|
278
|
+
const chunkSize = options.chunkSize || this.chunkSize;
|
|
279
|
+
|
|
280
|
+
logger.info(
|
|
281
|
+
`[DataSplitter] 自动分拆: ${stats.chars} 字符, ` +
|
|
282
|
+
`约 ${stats.estimatedTokens} tokens, ` +
|
|
283
|
+
`分 ${stats.chunks} 块处理`
|
|
284
|
+
);
|
|
285
|
+
|
|
286
|
+
const chunks = this.splitContent(content, chunkSize);
|
|
287
|
+
const result = await this.dispatchToSubAgents({
|
|
288
|
+
chunks,
|
|
289
|
+
taskDescription,
|
|
290
|
+
maxConcurrent: options.maxConcurrent || this.maxConcurrent,
|
|
291
|
+
signal: options.signal,
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
return result.summary;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* 构建汇总
|
|
299
|
+
* @private
|
|
300
|
+
*/
|
|
301
|
+
_buildSummary(results, errors, taskDescription) {
|
|
302
|
+
const successfulResults = results.filter((r) => r.success && r.message && r.message !== '无有效信息');
|
|
303
|
+
const lineCounts = results.map((r) => r.message ? r.message.split('\n').length : 0);
|
|
304
|
+
|
|
305
|
+
const totalLines = lineCounts.reduce((a, b) => a + b, 0);
|
|
306
|
+
|
|
307
|
+
const lines = [
|
|
308
|
+
`## 大数据处理汇总`,
|
|
309
|
+
``,
|
|
310
|
+
`**任务**: ${taskDescription}`,
|
|
311
|
+
`**总块数**: ${results.length}`,
|
|
312
|
+
`**成功**: ${successfulResults.length} 块`,
|
|
313
|
+
`**失败**: ${errors.length} 块`,
|
|
314
|
+
``,
|
|
315
|
+
`### 各块处理结果`,
|
|
316
|
+
``,
|
|
317
|
+
];
|
|
318
|
+
|
|
319
|
+
for (const r of results) {
|
|
320
|
+
const status = r.success ? '✅' : '❌';
|
|
321
|
+
const msg = r.message || r.error || '无结果';
|
|
322
|
+
// 每块只取关键信息,避免重复
|
|
323
|
+
const trimmed = msg.split('\n').slice(0, 3).join('\n');
|
|
324
|
+
lines.push(`**块 ${r.chunkIndex + 1}** ${status}`);
|
|
325
|
+
lines.push(`> ${trimmed}`);
|
|
326
|
+
lines.push('');
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
if (errors.length > 0) {
|
|
330
|
+
lines.push(`### 错误信息`);
|
|
331
|
+
lines.push('');
|
|
332
|
+
for (const e of errors) {
|
|
333
|
+
lines.push(`- 块 ${e.chunkIndex + 1}: ${e.error}`);
|
|
334
|
+
}
|
|
335
|
+
lines.push('');
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
lines.push(`---`);
|
|
339
|
+
lines.push(`*共 ${results.length} 块, ${successfulResults.length} 块成功, 汇总 ${totalLines} 行*`);
|
|
340
|
+
|
|
341
|
+
return lines.join('\n');
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
module.exports = { DataSplitter };
|
package/src/utils/download.js
CHANGED
|
@@ -6,6 +6,7 @@ const path = require('path');
|
|
|
6
6
|
const crypto = require('crypto');
|
|
7
7
|
const { fileTypeFromFile, fileTypeFromBuffer } = require('file-type');
|
|
8
8
|
const { downloadAndDecryptMedia } = require('@chnak/weixin-bot');
|
|
9
|
+
const { logger } = require('./logger');
|
|
9
10
|
class FileDownloader {
|
|
10
11
|
constructor(options = {}) {
|
|
11
12
|
this.timeout = options.timeout || 30000;
|
|
@@ -318,7 +319,7 @@ class FileDownloader {
|
|
|
318
319
|
});
|
|
319
320
|
} catch (error) {
|
|
320
321
|
lastError = error;
|
|
321
|
-
|
|
322
|
+
logger.warn(`[Download] 失败,第 ${i + 1}/${retries} 次重试...`);
|
|
322
323
|
await this._sleep(1000 * (i + 1));
|
|
323
324
|
}
|
|
324
325
|
}
|
|
@@ -432,8 +433,8 @@ class FileDownloader {
|
|
|
432
433
|
|
|
433
434
|
// 获取 Content-Type
|
|
434
435
|
const contentType = response.headers['content-type'] || '';
|
|
435
|
-
|
|
436
|
-
|
|
436
|
+
logger.debug(`[Download] URL: ${url}`);
|
|
437
|
+
logger.debug(`[Download] Content-Type: ${contentType}`);
|
|
437
438
|
|
|
438
439
|
// 提取文件信息
|
|
439
440
|
const { filename: autoFilename, extension } = this._extractFileInfo(
|
|
@@ -484,7 +485,7 @@ class FileDownloader {
|
|
|
484
485
|
});
|
|
485
486
|
|
|
486
487
|
req.on('error', (err) => {
|
|
487
|
-
|
|
488
|
+
logger.error(`[Download] 请求错误: ${err.message}`);
|
|
488
489
|
reject(err);
|
|
489
490
|
});
|
|
490
491
|
|