deepspider 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/README.md +21 -15
- package/package.json +9 -7
- package/src/agent/core/PanelBridge.js +56 -78
- package/src/agent/core/StreamHandler.js +244 -20
- package/src/agent/index.js +120 -23
- package/src/agent/logger.js +183 -8
- package/src/agent/middleware/memoryFlush.js +48 -0
- package/src/agent/middleware/report.js +95 -37
- package/src/agent/middleware/subagent.js +236 -0
- package/src/agent/middleware/toolAvailability.js +37 -0
- package/src/agent/middleware/toolGuard.js +187 -0
- package/src/agent/middleware/validationWorkflow.js +171 -0
- package/src/agent/prompts/system.js +310 -59
- package/src/agent/run.js +168 -20
- package/src/agent/sessions.js +88 -0
- package/src/agent/skills/anti-detect/SKILL.md +89 -14
- package/src/agent/skills/captcha/SKILL.md +93 -19
- package/src/agent/skills/crawler/SKILL.md +64 -3
- package/src/agent/skills/crawler/evolved.md +9 -1
- package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
- package/src/agent/skills/env/SKILL.md +75 -0
- package/src/agent/skills/js2python/evolved.md +5 -1
- package/src/agent/skills/sandbox/SKILL.md +35 -0
- package/src/agent/skills/static-analysis/SKILL.md +98 -2
- package/src/agent/skills/static-analysis/evolved.md +5 -1
- package/src/agent/subagents/anti-detect.js +36 -24
- package/src/agent/subagents/captcha.js +35 -28
- package/src/agent/subagents/crawler.js +40 -105
- package/src/agent/subagents/factory.js +129 -9
- package/src/agent/subagents/index.js +4 -13
- package/src/agent/subagents/js2python.js +25 -35
- package/src/agent/subagents/reverse.js +180 -0
- package/src/agent/tools/analysis.js +101 -8
- package/src/agent/tools/anti-detect.js +5 -2
- package/src/agent/tools/browser.js +186 -13
- package/src/agent/tools/capture.js +24 -3
- package/src/agent/tools/correlate.js +129 -15
- package/src/agent/tools/crawler.js +3 -2
- package/src/agent/tools/crawlerGenerator.js +90 -0
- package/src/agent/tools/debug.js +43 -6
- package/src/agent/tools/evolve.js +5 -2
- package/src/agent/tools/extractor.js +5 -1
- package/src/agent/tools/file.js +14 -5
- package/src/agent/tools/generateHook.js +66 -0
- package/src/agent/tools/hookManager.js +19 -9
- package/src/agent/tools/index.js +36 -21
- package/src/agent/tools/nodejs.js +41 -6
- package/src/agent/tools/patch.js +1 -1
- package/src/agent/tools/sandbox.js +21 -1
- package/src/agent/tools/scratchpad.js +70 -0
- package/src/agent/tools/store.js +1 -1
- package/src/agent/tools/tracing.js +26 -0
- package/src/agent/tools/verifyAlgorithm.js +117 -0
- package/src/browser/EnvBridge.js +27 -13
- package/src/browser/client.js +128 -18
- package/src/browser/collector.js +101 -22
- package/src/browser/defaultHooks.js +3 -1
- package/src/browser/hooks/index.js +5 -0
- package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
- package/src/browser/interceptors/NetworkInterceptor.js +76 -12
- package/src/browser/interceptors/ScriptInterceptor.js +32 -7
- package/src/browser/interceptors/index.js +1 -0
- package/src/browser/ui/analysisPanel.js +541 -464
- package/src/cli/commands/config.js +11 -3
- package/src/config/paths.js +9 -1
- package/src/config/settings.js +7 -1
- package/src/core/PatchGenerator.js +24 -4
- package/src/core/Sandbox.js +140 -3
- package/src/env/EnvCodeGenerator.js +60 -88
- package/src/env/modules/bom/history.js +6 -0
- package/src/env/modules/bom/location.js +6 -0
- package/src/env/modules/bom/navigator.js +13 -0
- package/src/env/modules/bom/screen.js +6 -0
- package/src/env/modules/bom/storage.js +7 -0
- package/src/env/modules/dom/document.js +14 -0
- package/src/env/modules/dom/event.js +4 -0
- package/src/env/modules/index.js +27 -10
- package/src/env/modules/webapi/fetch.js +4 -0
- package/src/env/modules/webapi/url.js +4 -0
- package/src/env/modules/webapi/xhr.js +8 -0
- package/src/store/DataStore.js +125 -42
- package/src/store/Store.js +2 -1
- package/src/agent/subagents/dynamic.js +0 -64
- package/src/agent/subagents/env-agent.js +0 -82
- package/src/agent/subagents/sandbox.js +0 -55
- package/src/agent/subagents/static.js +0 -66
package/src/agent/logger.js
CHANGED
|
@@ -4,12 +4,14 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
|
|
7
|
-
import { appendFileSync, mkdirSync, existsSync } from 'fs';
|
|
7
|
+
import { appendFileSync, mkdirSync, existsSync, statSync, renameSync, unlinkSync } from 'fs';
|
|
8
8
|
import { join } from 'path';
|
|
9
9
|
import { DEEPSPIDER_HOME } from '../config/paths.js';
|
|
10
10
|
|
|
11
11
|
const LOG_DIR = join(DEEPSPIDER_HOME, 'logs');
|
|
12
12
|
const LOG_FILE = join(LOG_DIR, 'agent.log');
|
|
13
|
+
const MAX_LOG_SIZE = 5 * 1024 * 1024; // 5MB
|
|
14
|
+
const MAX_LOG_FILES = 3; // agent.log, agent.log.1, agent.log.2
|
|
13
15
|
|
|
14
16
|
function ensureLogDir() {
|
|
15
17
|
if (!existsSync(LOG_DIR)) {
|
|
@@ -17,6 +19,32 @@ function ensureLogDir() {
|
|
|
17
19
|
}
|
|
18
20
|
}
|
|
19
21
|
|
|
22
|
+
/**
|
|
23
|
+
* 日志滚动:agent.log 超过 MAX_LOG_SIZE 时轮转
|
|
24
|
+
* agent.log → agent.log.1 → agent.log.2 → 删除
|
|
25
|
+
*/
|
|
26
|
+
function rotateIfNeeded(logFile) {
|
|
27
|
+
try {
|
|
28
|
+
if (!existsSync(logFile)) return;
|
|
29
|
+
const { size } = statSync(logFile);
|
|
30
|
+
if (size < MAX_LOG_SIZE) return;
|
|
31
|
+
|
|
32
|
+
// 删除最老的
|
|
33
|
+
const oldest = `${logFile}.${MAX_LOG_FILES - 1}`;
|
|
34
|
+
if (existsSync(oldest)) unlinkSync(oldest);
|
|
35
|
+
|
|
36
|
+
// 依次轮转
|
|
37
|
+
for (let i = MAX_LOG_FILES - 2; i >= 1; i--) {
|
|
38
|
+
const from = `${logFile}.${i}`;
|
|
39
|
+
const to = `${logFile}.${i + 1}`;
|
|
40
|
+
if (existsSync(from)) renameSync(from, to);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// 当前文件变为 .1
|
|
44
|
+
renameSync(logFile, `${logFile}.1`);
|
|
45
|
+
} catch { /* 滚动失败不影响主流程 */ }
|
|
46
|
+
}
|
|
47
|
+
|
|
20
48
|
function formatTime() {
|
|
21
49
|
return new Date().toISOString();
|
|
22
50
|
}
|
|
@@ -27,6 +55,147 @@ function truncate(str, maxLen = 500) {
|
|
|
27
55
|
return s.length > maxLen ? s.slice(0, maxLen) + '...' : s;
|
|
28
56
|
}
|
|
29
57
|
|
|
58
|
+
/**
|
|
59
|
+
* 内存日志环形缓冲区
|
|
60
|
+
* 始终可用,不依赖 DEBUG 开关
|
|
61
|
+
*/
|
|
62
|
+
export class InMemoryLogStore {
|
|
63
|
+
constructor(maxSize = 500) {
|
|
64
|
+
this.logs = [];
|
|
65
|
+
this.maxSize = maxSize;
|
|
66
|
+
this.startTime = Date.now();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
add(entry) {
|
|
70
|
+
this.logs.push(entry);
|
|
71
|
+
if (this.logs.length > this.maxSize) this.logs.shift();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
query({ category, level, limit = 50, toolName } = {}) {
|
|
75
|
+
let result = this.logs;
|
|
76
|
+
if (category) result = result.filter(l => l.category === category);
|
|
77
|
+
if (level) result = result.filter(l => l.level === level);
|
|
78
|
+
if (toolName) result = result.filter(l => l.data?.toolName === toolName);
|
|
79
|
+
return result.slice(-limit);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
getStats() {
|
|
83
|
+
const cats = {};
|
|
84
|
+
for (const l of this.logs) {
|
|
85
|
+
cats[l.category] = (cats[l.category] || 0) + 1;
|
|
86
|
+
}
|
|
87
|
+
return {
|
|
88
|
+
total: this.logs.length,
|
|
89
|
+
categories: cats,
|
|
90
|
+
errors: this.logs.filter(l => l.level === 'ERROR').length,
|
|
91
|
+
uptimeMs: Date.now() - this.startTime,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
clear() { this.logs = []; }
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export const logStore = new InMemoryLogStore();
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* 内存日志回调处理器
|
|
102
|
+
* 始终启用,将日志写入 logStore
|
|
103
|
+
*/
|
|
104
|
+
export class InMemoryLoggerCallback extends BaseCallbackHandler {
|
|
105
|
+
name = 'InMemoryLoggerCallback';
|
|
106
|
+
|
|
107
|
+
log(level, category, message, data = null) {
|
|
108
|
+
logStore.add({ time: formatTime(), level, category, message, data });
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
handleLLMStart(llm, prompts, runId) {
|
|
112
|
+
this.log('INFO', 'LLM', 'LLM 调用开始', {
|
|
113
|
+
runId,
|
|
114
|
+
model: llm?.id?.[2] || llm?.name,
|
|
115
|
+
promptCount: prompts?.length,
|
|
116
|
+
promptPreview: truncate(prompts?.[0], 200),
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
handleLLMEnd(output, runId) {
|
|
121
|
+
const content = output?.generations?.[0]?.[0]?.text
|
|
122
|
+
|| output?.generations?.[0]?.[0]?.message?.content;
|
|
123
|
+
this.log('INFO', 'LLM', 'LLM 调用结束', {
|
|
124
|
+
runId,
|
|
125
|
+
outputPreview: truncate(content, 300),
|
|
126
|
+
tokenUsage: output?.llmOutput?.tokenUsage,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
handleLLMError(error, runId) {
|
|
131
|
+
this.log('ERROR', 'LLM', 'LLM 调用错误', {
|
|
132
|
+
runId,
|
|
133
|
+
error: error?.message || String(error),
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
handleToolStart(tool, input, runId) {
|
|
138
|
+
this.log('INFO', 'TOOL', `工具调用: ${tool?.name || 'unknown'}`, {
|
|
139
|
+
runId,
|
|
140
|
+
toolName: tool?.name,
|
|
141
|
+
input: truncate(input, 500),
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
handleToolEnd(output, runId) {
|
|
146
|
+
this.log('INFO', 'TOOL', '工具返回', {
|
|
147
|
+
runId,
|
|
148
|
+
output: truncate(output, 500),
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
handleToolError(error, runId) {
|
|
153
|
+
this.log('ERROR', 'TOOL', '工具错误', {
|
|
154
|
+
runId,
|
|
155
|
+
error: error?.message || String(error),
|
|
156
|
+
stack: error?.stack?.split('\n').slice(0, 5),
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
handleChainStart(chain, inputs, runId) {
|
|
161
|
+
this.log('DEBUG', 'CHAIN', `Chain 开始: ${chain?.name || 'unknown'}`, {
|
|
162
|
+
runId,
|
|
163
|
+
chainName: chain?.name,
|
|
164
|
+
inputKeys: Object.keys(inputs || {}),
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
handleChainEnd(outputs, runId) {
|
|
169
|
+
this.log('DEBUG', 'CHAIN', 'Chain 结束', {
|
|
170
|
+
runId,
|
|
171
|
+
outputKeys: Object.keys(outputs || {}),
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
handleChainError(error, runId) {
|
|
176
|
+
this.log('ERROR', 'CHAIN', 'Chain 错误', {
|
|
177
|
+
runId,
|
|
178
|
+
error: error?.message || String(error),
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
handleAgentAction(action, runId) {
|
|
183
|
+
this.log('INFO', 'AGENT', `Agent 动作: ${action?.tool}`, {
|
|
184
|
+
runId,
|
|
185
|
+
tool: action?.tool,
|
|
186
|
+
toolInput: truncate(action?.toolInput, 300),
|
|
187
|
+
log: truncate(action?.log, 200),
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
handleAgentEnd(action, runId) {
|
|
192
|
+
this.log('INFO', 'AGENT', 'Agent 结束', {
|
|
193
|
+
runId,
|
|
194
|
+
returnValues: truncate(action?.returnValues, 300),
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
30
199
|
/**
|
|
31
200
|
* 文件日志回调处理器
|
|
32
201
|
*/
|
|
@@ -41,6 +210,8 @@ export class FileLoggerCallback extends BaseCallbackHandler {
|
|
|
41
210
|
}
|
|
42
211
|
|
|
43
212
|
log(level, category, message, data = null) {
|
|
213
|
+
rotateIfNeeded(this.logFile);
|
|
214
|
+
|
|
44
215
|
const line = JSON.stringify({
|
|
45
216
|
time: formatTime(),
|
|
46
217
|
level,
|
|
@@ -149,16 +320,20 @@ export class FileLoggerCallback extends BaseCallbackHandler {
|
|
|
149
320
|
}
|
|
150
321
|
|
|
151
322
|
/**
|
|
152
|
-
*
|
|
323
|
+
* 创建日志回调实例数组
|
|
324
|
+
* 始终包含 InMemoryLoggerCallback + FileLoggerCallback
|
|
325
|
+
* DEBUG=true 时额外开启 verbose(控制台输出)
|
|
153
326
|
*/
|
|
154
327
|
export function createLogger(options = {}) {
|
|
155
|
-
const
|
|
156
|
-
if (!enabled) return null;
|
|
328
|
+
const verbose = process.env.DEBUG === 'true' || options.verbose || false;
|
|
157
329
|
|
|
158
|
-
return
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
330
|
+
return [
|
|
331
|
+
new InMemoryLoggerCallback(),
|
|
332
|
+
new FileLoggerCallback({
|
|
333
|
+
verbose,
|
|
334
|
+
logFile: options.logFile || LOG_FILE,
|
|
335
|
+
}),
|
|
336
|
+
];
|
|
162
337
|
}
|
|
163
338
|
|
|
164
339
|
export default FileLoggerCallback;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - Memory Flush 中间件
|
|
3
|
+
* 在 summarization 触发前(85k token),注入 SystemMessage 提醒 Agent 保存关键进度
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { createMiddleware, countTokensApproximately } from 'langchain';
|
|
7
|
+
import { SystemMessage } from '@langchain/core/messages';
|
|
8
|
+
|
|
9
|
+
const FLUSH_THRESHOLD = 85000;
|
|
10
|
+
|
|
11
|
+
const FLUSH_REMINDER = `⚠️ 上下文即将被压缩(当前接近 token 上限)。
|
|
12
|
+
请立即使用 save_memo 工具保存以下关键信息,否则压缩后将丢失:
|
|
13
|
+
1. 当前分析目标和已完成的步骤
|
|
14
|
+
2. 已发现的关键参数、加密逻辑、请求链路
|
|
15
|
+
3. 下一步计划
|
|
16
|
+
|
|
17
|
+
保存后继续正常工作。`;
|
|
18
|
+
|
|
19
|
+
export function createMemoryFlushMiddleware() {
|
|
20
|
+
let flushed = false;
|
|
21
|
+
|
|
22
|
+
return createMiddleware({
|
|
23
|
+
name: 'memoryFlushMiddleware',
|
|
24
|
+
|
|
25
|
+
beforeModel: async (state) => {
|
|
26
|
+
const tokens = countTokensApproximately(state.messages);
|
|
27
|
+
|
|
28
|
+
// token 骤降(summarization 已执行),重置标记
|
|
29
|
+
if (flushed && tokens < FLUSH_THRESHOLD * 0.5) {
|
|
30
|
+
flushed = false;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// 达到阈值且未提醒过,注入提醒
|
|
34
|
+
if (!flushed && tokens >= FLUSH_THRESHOLD) {
|
|
35
|
+
flushed = true;
|
|
36
|
+
return {
|
|
37
|
+
...state,
|
|
38
|
+
messages: [
|
|
39
|
+
...state.messages,
|
|
40
|
+
new SystemMessage(FLUSH_REMINDER),
|
|
41
|
+
],
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return state;
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* DeepSpider - 报告中间件
|
|
3
|
-
*
|
|
3
|
+
* 检测文件保存事件,触发报告显示和面板通知
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import { createMiddleware } from 'langchain';
|
|
@@ -13,64 +13,122 @@ const reportStateSchema = z.object({
|
|
|
13
13
|
reportReady: z.boolean().default(false),
|
|
14
14
|
});
|
|
15
15
|
|
|
16
|
+
/**
|
|
17
|
+
* 从工具返回值中提取 .md 文件路径
|
|
18
|
+
* 兼容两种工具的返回格式:
|
|
19
|
+
* artifact_save: { success, path: "/xxx/analysis.md" }
|
|
20
|
+
* save_analysis_report: { success, paths: { markdown: "/xxx/analysis.md" }, dir }
|
|
21
|
+
*/
|
|
22
|
+
function extractMdPath(content) {
|
|
23
|
+
if (!content?.success) return null;
|
|
24
|
+
|
|
25
|
+
// artifact_save 格式
|
|
26
|
+
if (content.path?.endsWith('.md')) return content.path;
|
|
27
|
+
|
|
28
|
+
// save_analysis_report 格式
|
|
29
|
+
if (content.paths?.markdown) return content.paths.markdown;
|
|
30
|
+
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* 从工具返回值中提取已保存的文件信息(用于面板通知)
|
|
36
|
+
* 返回 { path, type } 或 null
|
|
37
|
+
*/
|
|
38
|
+
function extractSavedFile(content) {
|
|
39
|
+
if (!content?.success) return null;
|
|
40
|
+
|
|
41
|
+
// artifact_save: 单文件
|
|
42
|
+
if (content.path) {
|
|
43
|
+
const ext = content.path.split('.').pop();
|
|
44
|
+
return { path: content.path, type: ext };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// save_analysis_report: 多文件
|
|
48
|
+
if (content.paths) {
|
|
49
|
+
return { path: content.dir || content.paths.markdown, type: 'report' };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function parseContent(result) {
|
|
56
|
+
try {
|
|
57
|
+
return typeof result?.content === 'string'
|
|
58
|
+
? JSON.parse(result.content)
|
|
59
|
+
: result?.content;
|
|
60
|
+
} catch {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
16
65
|
/**
|
|
17
66
|
* 创建报告中间件
|
|
18
|
-
*
|
|
19
|
-
* 在 afterAgent 中触发报告显示回调
|
|
67
|
+
* 监听 artifact_save 和 save_analysis_report,触发报告显示 + 面板通知
|
|
20
68
|
*/
|
|
21
69
|
export function createReportMiddleware(options = {}) {
|
|
22
|
-
const { onReportReady } = options;
|
|
70
|
+
const { onReportReady, onFileSaved } = options;
|
|
71
|
+
|
|
72
|
+
const WATCHED_TOOLS = new Set(['artifact_save', 'save_analysis_report']);
|
|
23
73
|
|
|
24
74
|
return createMiddleware({
|
|
25
75
|
name: 'reportMiddleware',
|
|
26
76
|
stateSchema: reportStateSchema,
|
|
27
77
|
|
|
28
|
-
|
|
78
|
+
wrapToolCall: async (request, handler) => {
|
|
79
|
+
const toolName = request.tool?.name ?? request.toolCall?.name;
|
|
80
|
+
const result = await handler(request);
|
|
81
|
+
|
|
82
|
+
if (!WATCHED_TOOLS.has(toolName)) return result;
|
|
83
|
+
|
|
84
|
+
const content = parseContent(result);
|
|
85
|
+
if (!content) return result;
|
|
86
|
+
|
|
87
|
+
// 检测 .md 文件 → 触发报告显示
|
|
88
|
+
const mdPath = extractMdPath(content);
|
|
89
|
+
if (mdPath) {
|
|
90
|
+
console.log('[reportMiddleware] 检测到报告文件:', mdPath);
|
|
91
|
+
if (onReportReady) {
|
|
92
|
+
await onReportReady(mdPath);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// 通知文件已保存(面板可显示提示)
|
|
97
|
+
const saved = extractSavedFile(content);
|
|
98
|
+
if (saved && onFileSaved) {
|
|
99
|
+
await onFileSaved(saved);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return result;
|
|
103
|
+
},
|
|
104
|
+
|
|
105
|
+
// 备选:afterModel 检测 ToolMessage 中的报告文件
|
|
29
106
|
afterModel: (state) => {
|
|
30
107
|
const messages = state.messages;
|
|
31
|
-
if (!messages
|
|
108
|
+
if (!messages?.length) return undefined;
|
|
32
109
|
|
|
33
|
-
// 查找最近的 ToolMessage
|
|
34
110
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
35
111
|
const msg = messages[i];
|
|
36
|
-
if (ToolMessage.isInstance(msg))
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
return { lastWrittenMdFile: content.path };
|
|
46
|
-
}
|
|
47
|
-
} catch {
|
|
48
|
-
// 解析失败,忽略
|
|
49
|
-
}
|
|
112
|
+
if (!ToolMessage.isInstance(msg)) continue;
|
|
113
|
+
|
|
114
|
+
const content = parseContent(msg);
|
|
115
|
+
if (!content) continue;
|
|
116
|
+
|
|
117
|
+
const mdPath = extractMdPath(content);
|
|
118
|
+
if (mdPath) {
|
|
119
|
+
console.log('[reportMiddleware] afterModel 检测到报告:', mdPath);
|
|
120
|
+
return { lastWrittenMdFile: mdPath };
|
|
50
121
|
}
|
|
51
122
|
}
|
|
52
123
|
return undefined;
|
|
53
124
|
},
|
|
54
125
|
|
|
55
|
-
//
|
|
126
|
+
// streamEvents 模式下可能不被调用
|
|
56
127
|
afterAgent: async (state) => {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if (mdFile) {
|
|
60
|
-
console.log('[reportMiddleware] afterAgent: 准备显示报告:', mdFile);
|
|
61
|
-
|
|
62
|
-
// 调用回调通知外部
|
|
63
|
-
if (onReportReady) {
|
|
64
|
-
try {
|
|
65
|
-
await onReportReady(mdFile);
|
|
66
|
-
} catch (e) {
|
|
67
|
-
console.error('[reportMiddleware] onReportReady 失败:', e.message);
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
128
|
+
if (state.lastWrittenMdFile) {
|
|
129
|
+
console.log('[reportMiddleware] afterAgent: 报告就绪:', state.lastWrittenMdFile);
|
|
71
130
|
return { reportReady: true };
|
|
72
131
|
}
|
|
73
|
-
|
|
74
132
|
return undefined;
|
|
75
133
|
},
|
|
76
134
|
});
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - 自定义子代理中间件
|
|
3
|
+
* 复刻 deepagents 内置的 createSubAgentMiddleware,增加 context 结构化传递
|
|
4
|
+
*
|
|
5
|
+
* 与内置版本的唯一区别:task tool schema 新增 context 字段(z.record(z.string(), z.string()).optional()),
|
|
6
|
+
* LLM 按需填写 key-value 对,子代理收到的 HumanMessage 中 context 以 <context> 块拼接在 description 之后。
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { createMiddleware, createAgent, tool, humanInTheLoopMiddleware } from 'langchain';
|
|
10
|
+
import { HumanMessage, SystemMessage, ToolMessage } from '@langchain/core/messages';
|
|
11
|
+
import { getCurrentTaskInput, Command } from '@langchain/langgraph';
|
|
12
|
+
import { TASK_SYSTEM_PROMPT } from 'deepagents';
|
|
13
|
+
import { z } from 'zod';
|
|
14
|
+
|
|
15
|
+
// 子代理 state 中需要排除的 key(与 deepagents 内部一致)
|
|
16
|
+
const EXCLUDED_STATE_KEYS = [
|
|
17
|
+
'messages',
|
|
18
|
+
'todos',
|
|
19
|
+
'structuredResponse',
|
|
20
|
+
'skillsMetadata',
|
|
21
|
+
'memoryContents',
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* 过滤 state,排除不应传递给子代理的 key
|
|
26
|
+
*/
|
|
27
|
+
function filterStateForSubagent(state) {
|
|
28
|
+
const filtered = {};
|
|
29
|
+
for (const [key, value] of Object.entries(state)) {
|
|
30
|
+
if (!EXCLUDED_STATE_KEYS.includes(key)) filtered[key] = value;
|
|
31
|
+
}
|
|
32
|
+
return filtered;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* 构造 Command 返回,将子代理结果的 state 更新 + 最后一条消息作为 ToolMessage 返回
|
|
37
|
+
*/
|
|
38
|
+
const TRUST_SIGNAL = `\n\n---\n⚠️ 子代理已完成任务。请直接使用子代理输出的文件和结论,不要重复执行 artifact_load / artifact_glob / ls 等文件读取操作来检查子代理已保存的文件。如果需要对生成的代码做端到端验证,那是你的职责,请正常执行。`;
|
|
39
|
+
|
|
40
|
+
function returnCommandWithStateUpdate(result, toolCallId) {
|
|
41
|
+
const stateUpdate = filterStateForSubagent(result);
|
|
42
|
+
const messages = result.messages;
|
|
43
|
+
const lastMessage = messages?.[messages.length - 1];
|
|
44
|
+
const content = (lastMessage?.content || 'Task completed') + TRUST_SIGNAL;
|
|
45
|
+
return new Command({
|
|
46
|
+
update: {
|
|
47
|
+
...stateUpdate,
|
|
48
|
+
messages: [new ToolMessage({
|
|
49
|
+
content,
|
|
50
|
+
tool_call_id: toolCallId,
|
|
51
|
+
name: 'task',
|
|
52
|
+
})],
|
|
53
|
+
},
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* 生成 task tool 的 description(复刻 deepagents 内部的 getTaskToolDescription)
|
|
59
|
+
*/
|
|
60
|
+
function getTaskToolDescription(subagentDescriptions) {
|
|
61
|
+
return `
|
|
62
|
+
Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context windows.
|
|
63
|
+
|
|
64
|
+
Available agent types and the tools they have access to:
|
|
65
|
+
${subagentDescriptions.join('\n')}
|
|
66
|
+
|
|
67
|
+
When using the Task tool, you must specify a subagent_type parameter to select which agent type to use.
|
|
68
|
+
|
|
69
|
+
## Usage notes:
|
|
70
|
+
1. Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses
|
|
71
|
+
2. When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.
|
|
72
|
+
3. Each agent invocation is stateless. You will not be able to send additional messages to the agent, nor will the agent be able to communicate with you outside of its final report. Therefore, your prompt should contain a highly detailed task description for the agent to perform autonomously and you should specify exactly what information the agent should return back to you in its final and only message to you.
|
|
73
|
+
4. The agent's outputs should generally be trusted
|
|
74
|
+
5. Clearly tell the agent whether you expect it to create content, perform analysis, or just do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent
|
|
75
|
+
6. If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.
|
|
76
|
+
|
|
77
|
+
## context 参数
|
|
78
|
+
委托子代理时,使用 context 参数传递结构化上下文(key-value 对),如站点标识、请求 ID、目标参数名等。context 会注入到子代理的初始消息中,确保关键信息不丢失。
|
|
79
|
+
`.trim();
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* 编译子代理:遍历 subagents 数组,用 createAgent 编译为可运行实例
|
|
84
|
+
*/
|
|
85
|
+
function getSubagents(options) {
|
|
86
|
+
const {
|
|
87
|
+
defaultModel,
|
|
88
|
+
defaultTools,
|
|
89
|
+
defaultMiddleware,
|
|
90
|
+
generalPurposeMiddleware: gpMiddleware,
|
|
91
|
+
defaultInterruptOn,
|
|
92
|
+
subagents,
|
|
93
|
+
generalPurposeAgent,
|
|
94
|
+
} = options;
|
|
95
|
+
|
|
96
|
+
const defaultSubagentMiddleware = defaultMiddleware || [];
|
|
97
|
+
const generalPurposeMiddlewareBase = gpMiddleware || defaultSubagentMiddleware;
|
|
98
|
+
const agents = {};
|
|
99
|
+
const descriptions = [];
|
|
100
|
+
|
|
101
|
+
// 通用子代理(DeepSpider 默认不启用,但保留能力)
|
|
102
|
+
if (generalPurposeAgent) {
|
|
103
|
+
const generalPurposeMiddleware = [...generalPurposeMiddlewareBase];
|
|
104
|
+
if (defaultInterruptOn) generalPurposeMiddleware.push(humanInTheLoopMiddleware({ interruptOn: defaultInterruptOn }));
|
|
105
|
+
agents['general-purpose'] = createAgent({
|
|
106
|
+
model: defaultModel,
|
|
107
|
+
systemPrompt: 'In order to complete the objective that the user asks of you, you have access to a number of standard tools.',
|
|
108
|
+
tools: defaultTools,
|
|
109
|
+
middleware: generalPurposeMiddleware,
|
|
110
|
+
name: 'general-purpose',
|
|
111
|
+
});
|
|
112
|
+
descriptions.push('- general-purpose: General-purpose agent for researching complex questions, searching for files and content, and executing multi-step tasks.');
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// 自定义子代理
|
|
116
|
+
for (const agentParams of subagents) {
|
|
117
|
+
descriptions.push(`- ${agentParams.name}: ${agentParams.description}`);
|
|
118
|
+
|
|
119
|
+
if ('runnable' in agentParams) {
|
|
120
|
+
// CompiledSubAgent — 已编译,直接使用
|
|
121
|
+
agents[agentParams.name] = agentParams.runnable;
|
|
122
|
+
} else {
|
|
123
|
+
const middleware = agentParams.middleware
|
|
124
|
+
? [...defaultSubagentMiddleware, ...agentParams.middleware]
|
|
125
|
+
: [...defaultSubagentMiddleware];
|
|
126
|
+
const interruptOn = agentParams.interruptOn || defaultInterruptOn;
|
|
127
|
+
if (interruptOn) middleware.push(humanInTheLoopMiddleware({ interruptOn }));
|
|
128
|
+
|
|
129
|
+
agents[agentParams.name] = createAgent({
|
|
130
|
+
model: agentParams.model ?? defaultModel,
|
|
131
|
+
systemPrompt: agentParams.systemPrompt,
|
|
132
|
+
tools: agentParams.tools ?? defaultTools,
|
|
133
|
+
middleware,
|
|
134
|
+
name: agentParams.name,
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return { agents, descriptions };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* 创建增强版 task tool:schema 增加 context 字段
|
|
144
|
+
*/
|
|
145
|
+
function createEnhancedTaskTool(options) {
|
|
146
|
+
const { agents: subagentGraphs, descriptions: subagentDescriptions } = getSubagents(options);
|
|
147
|
+
const availableTypes = Object.keys(subagentGraphs).join(', ');
|
|
148
|
+
|
|
149
|
+
return tool(
|
|
150
|
+
async (input, config) => {
|
|
151
|
+
const { description, subagent_type, context } = input;
|
|
152
|
+
|
|
153
|
+
if (!(subagent_type in subagentGraphs)) {
|
|
154
|
+
const allowedTypes = Object.keys(subagentGraphs).map((k) => `\`${k}\``).join(', ');
|
|
155
|
+
throw new Error(`Error: invoked agent of type ${subagent_type}, the only allowed types are ${allowedTypes}`);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// 构造子代理的初始消息:description + context 块
|
|
159
|
+
let content = description;
|
|
160
|
+
if (context && Object.keys(context).length > 0) {
|
|
161
|
+
content += `\n\n<context>\n${JSON.stringify(context)}\n</context>`;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const subagent = subagentGraphs[subagent_type];
|
|
165
|
+
const subagentState = filterStateForSubagent(getCurrentTaskInput());
|
|
166
|
+
subagentState.messages = [new HumanMessage({ content })];
|
|
167
|
+
|
|
168
|
+
const result = await subagent.invoke(subagentState, config);
|
|
169
|
+
if (!config.toolCall?.id) throw new Error('Tool call ID is required for subagent invocation');
|
|
170
|
+
return returnCommandWithStateUpdate(result, config.toolCall.id);
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
name: 'task',
|
|
174
|
+
description: getTaskToolDescription(subagentDescriptions),
|
|
175
|
+
schema: z.object({
|
|
176
|
+
description: z.string().describe('The task to execute with the selected agent'),
|
|
177
|
+
subagent_type: z.string().describe(`Name of the agent to use. Available: ${availableTypes}`),
|
|
178
|
+
// NOTE: 不用 z.record() 因为 Zod v4 toJSONSchema 会生成 propertyNames,
|
|
179
|
+
// 而 Anthropic API 不支持 propertyNames 关键字
|
|
180
|
+
// 改用 z.object({}) + additionalProperties 模式
|
|
181
|
+
context: z.object({}).passthrough().optional().describe('Structured key-value context to pass to the subagent (e.g. site, requestId, targetParam)'),
|
|
182
|
+
}),
|
|
183
|
+
},
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* 创建自定义子代理中间件
|
|
189
|
+
* 替换 deepagents 内置的 createSubAgentMiddleware,增加 context 结构化传递
|
|
190
|
+
*
|
|
191
|
+
* @param {Object} options
|
|
192
|
+
* @param {LanguageModelLike} options.defaultModel - LLM 实例
|
|
193
|
+
* @param {StructuredTool[]} options.defaultTools - 默认工具集
|
|
194
|
+
* @param {SubAgent[]} options.subagents - 子代理配置数组
|
|
195
|
+
* @param {AgentMiddleware[]} options.defaultMiddleware - 子代理默认中间件
|
|
196
|
+
* @param {boolean} [options.generalPurposeAgent=false] - 是否创建通用子代理
|
|
197
|
+
* @param {Object} [options.defaultInterruptOn] - HITL 配置
|
|
198
|
+
*/
|
|
199
|
+
export function createCustomSubAgentMiddleware(options) {
|
|
200
|
+
const {
|
|
201
|
+
defaultModel,
|
|
202
|
+
defaultTools = [],
|
|
203
|
+
subagents = [],
|
|
204
|
+
defaultMiddleware = null,
|
|
205
|
+
generalPurposeMiddleware = null,
|
|
206
|
+
generalPurposeAgent = false,
|
|
207
|
+
defaultInterruptOn = null,
|
|
208
|
+
} = options;
|
|
209
|
+
|
|
210
|
+
const taskToolOptions = {
|
|
211
|
+
defaultModel,
|
|
212
|
+
defaultTools,
|
|
213
|
+
subagents,
|
|
214
|
+
defaultMiddleware,
|
|
215
|
+
generalPurposeMiddleware,
|
|
216
|
+
generalPurposeAgent,
|
|
217
|
+
defaultInterruptOn,
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
const enhancedTaskTool = createEnhancedTaskTool(taskToolOptions);
|
|
221
|
+
|
|
222
|
+
// context 使用说明,拼接到 TASK_SYSTEM_PROMPT 末尾
|
|
223
|
+
const contextGuide = `\n\n委托子代理时,使用 context 参数传递结构化上下文(key-value 对),如站点标识、请求 ID、目标参数名等。context 会注入到子代理的初始消息中,确保关键信息不丢失。`;
|
|
224
|
+
const fullSystemPrompt = TASK_SYSTEM_PROMPT + contextGuide;
|
|
225
|
+
|
|
226
|
+
return createMiddleware({
|
|
227
|
+
name: 'subAgentMiddleware',
|
|
228
|
+
tools: [enhancedTaskTool],
|
|
229
|
+
wrapModelCall: async (request, handler) => {
|
|
230
|
+
return handler({
|
|
231
|
+
...request,
|
|
232
|
+
systemMessage: request.systemMessage.concat(new SystemMessage({ content: fullSystemPrompt })),
|
|
233
|
+
});
|
|
234
|
+
},
|
|
235
|
+
});
|
|
236
|
+
}
|