deepspider 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.env.example +3 -0
  2. package/README.md +21 -15
  3. package/package.json +9 -7
  4. package/src/agent/core/PanelBridge.js +56 -78
  5. package/src/agent/core/StreamHandler.js +244 -20
  6. package/src/agent/index.js +120 -23
  7. package/src/agent/logger.js +183 -8
  8. package/src/agent/middleware/memoryFlush.js +48 -0
  9. package/src/agent/middleware/report.js +95 -37
  10. package/src/agent/middleware/subagent.js +236 -0
  11. package/src/agent/middleware/toolAvailability.js +37 -0
  12. package/src/agent/middleware/toolGuard.js +187 -0
  13. package/src/agent/middleware/validationWorkflow.js +171 -0
  14. package/src/agent/prompts/system.js +310 -59
  15. package/src/agent/run.js +168 -20
  16. package/src/agent/sessions.js +88 -0
  17. package/src/agent/skills/anti-detect/SKILL.md +89 -14
  18. package/src/agent/skills/captcha/SKILL.md +93 -19
  19. package/src/agent/skills/crawler/SKILL.md +64 -3
  20. package/src/agent/skills/crawler/evolved.md +9 -1
  21. package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
  22. package/src/agent/skills/env/SKILL.md +75 -0
  23. package/src/agent/skills/js2python/evolved.md +5 -1
  24. package/src/agent/skills/sandbox/SKILL.md +35 -0
  25. package/src/agent/skills/static-analysis/SKILL.md +98 -2
  26. package/src/agent/skills/static-analysis/evolved.md +5 -1
  27. package/src/agent/subagents/anti-detect.js +36 -24
  28. package/src/agent/subagents/captcha.js +35 -28
  29. package/src/agent/subagents/crawler.js +40 -105
  30. package/src/agent/subagents/factory.js +129 -9
  31. package/src/agent/subagents/index.js +4 -13
  32. package/src/agent/subagents/js2python.js +25 -35
  33. package/src/agent/subagents/reverse.js +180 -0
  34. package/src/agent/tools/analysis.js +101 -8
  35. package/src/agent/tools/anti-detect.js +5 -2
  36. package/src/agent/tools/browser.js +186 -13
  37. package/src/agent/tools/capture.js +24 -3
  38. package/src/agent/tools/correlate.js +129 -15
  39. package/src/agent/tools/crawler.js +3 -2
  40. package/src/agent/tools/crawlerGenerator.js +90 -0
  41. package/src/agent/tools/debug.js +43 -6
  42. package/src/agent/tools/evolve.js +5 -2
  43. package/src/agent/tools/extractor.js +5 -1
  44. package/src/agent/tools/file.js +14 -5
  45. package/src/agent/tools/generateHook.js +66 -0
  46. package/src/agent/tools/hookManager.js +19 -9
  47. package/src/agent/tools/index.js +36 -21
  48. package/src/agent/tools/nodejs.js +41 -6
  49. package/src/agent/tools/patch.js +1 -1
  50. package/src/agent/tools/sandbox.js +21 -1
  51. package/src/agent/tools/scratchpad.js +70 -0
  52. package/src/agent/tools/store.js +1 -1
  53. package/src/agent/tools/tracing.js +26 -0
  54. package/src/agent/tools/verifyAlgorithm.js +117 -0
  55. package/src/browser/EnvBridge.js +27 -13
  56. package/src/browser/client.js +128 -18
  57. package/src/browser/collector.js +101 -22
  58. package/src/browser/defaultHooks.js +3 -1
  59. package/src/browser/hooks/index.js +5 -0
  60. package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
  61. package/src/browser/interceptors/NetworkInterceptor.js +76 -12
  62. package/src/browser/interceptors/ScriptInterceptor.js +32 -7
  63. package/src/browser/interceptors/index.js +1 -0
  64. package/src/browser/ui/analysisPanel.js +541 -464
  65. package/src/cli/commands/config.js +11 -3
  66. package/src/config/paths.js +9 -1
  67. package/src/config/settings.js +7 -1
  68. package/src/core/PatchGenerator.js +24 -4
  69. package/src/core/Sandbox.js +140 -3
  70. package/src/env/EnvCodeGenerator.js +60 -88
  71. package/src/env/modules/bom/history.js +6 -0
  72. package/src/env/modules/bom/location.js +6 -0
  73. package/src/env/modules/bom/navigator.js +13 -0
  74. package/src/env/modules/bom/screen.js +6 -0
  75. package/src/env/modules/bom/storage.js +7 -0
  76. package/src/env/modules/dom/document.js +14 -0
  77. package/src/env/modules/dom/event.js +4 -0
  78. package/src/env/modules/index.js +27 -10
  79. package/src/env/modules/webapi/fetch.js +4 -0
  80. package/src/env/modules/webapi/url.js +4 -0
  81. package/src/env/modules/webapi/xhr.js +8 -0
  82. package/src/store/DataStore.js +125 -42
  83. package/src/store/Store.js +2 -1
  84. package/src/agent/subagents/dynamic.js +0 -64
  85. package/src/agent/subagents/env-agent.js +0 -82
  86. package/src/agent/subagents/sandbox.js +0 -55
  87. package/src/agent/subagents/static.js +0 -66
@@ -4,12 +4,14 @@
4
4
  */
5
5
 
6
6
  import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
7
- import { appendFileSync, mkdirSync, existsSync } from 'fs';
7
+ import { appendFileSync, mkdirSync, existsSync, statSync, renameSync, unlinkSync } from 'fs';
8
8
  import { join } from 'path';
9
9
  import { DEEPSPIDER_HOME } from '../config/paths.js';
10
10
 
11
11
  const LOG_DIR = join(DEEPSPIDER_HOME, 'logs');
12
12
  const LOG_FILE = join(LOG_DIR, 'agent.log');
13
+ const MAX_LOG_SIZE = 5 * 1024 * 1024; // 5MB
14
+ const MAX_LOG_FILES = 3; // agent.log, agent.log.1, agent.log.2
13
15
 
14
16
  function ensureLogDir() {
15
17
  if (!existsSync(LOG_DIR)) {
@@ -17,6 +19,32 @@ function ensureLogDir() {
17
19
  }
18
20
  }
19
21
 
22
+ /**
23
+ * 日志滚动:agent.log 超过 MAX_LOG_SIZE 时轮转
24
+ * agent.log → agent.log.1 → agent.log.2 → 删除
25
+ */
26
+ function rotateIfNeeded(logFile) {
27
+ try {
28
+ if (!existsSync(logFile)) return;
29
+ const { size } = statSync(logFile);
30
+ if (size < MAX_LOG_SIZE) return;
31
+
32
+ // 删除最老的
33
+ const oldest = `${logFile}.${MAX_LOG_FILES - 1}`;
34
+ if (existsSync(oldest)) unlinkSync(oldest);
35
+
36
+ // 依次轮转
37
+ for (let i = MAX_LOG_FILES - 2; i >= 1; i--) {
38
+ const from = `${logFile}.${i}`;
39
+ const to = `${logFile}.${i + 1}`;
40
+ if (existsSync(from)) renameSync(from, to);
41
+ }
42
+
43
+ // 当前文件变为 .1
44
+ renameSync(logFile, `${logFile}.1`);
45
+ } catch { /* 滚动失败不影响主流程 */ }
46
+ }
47
+
20
48
  function formatTime() {
21
49
  return new Date().toISOString();
22
50
  }
@@ -27,6 +55,147 @@ function truncate(str, maxLen = 500) {
27
55
  return s.length > maxLen ? s.slice(0, maxLen) + '...' : s;
28
56
  }
29
57
 
58
+ /**
59
+ * 内存日志环形缓冲区
60
+ * 始终可用,不依赖 DEBUG 开关
61
+ */
62
+ export class InMemoryLogStore {
63
+ constructor(maxSize = 500) {
64
+ this.logs = [];
65
+ this.maxSize = maxSize;
66
+ this.startTime = Date.now();
67
+ }
68
+
69
+ add(entry) {
70
+ this.logs.push(entry);
71
+ if (this.logs.length > this.maxSize) this.logs.shift();
72
+ }
73
+
74
+ query({ category, level, limit = 50, toolName } = {}) {
75
+ let result = this.logs;
76
+ if (category) result = result.filter(l => l.category === category);
77
+ if (level) result = result.filter(l => l.level === level);
78
+ if (toolName) result = result.filter(l => l.data?.toolName === toolName);
79
+ return result.slice(-limit);
80
+ }
81
+
82
+ getStats() {
83
+ const cats = {};
84
+ for (const l of this.logs) {
85
+ cats[l.category] = (cats[l.category] || 0) + 1;
86
+ }
87
+ return {
88
+ total: this.logs.length,
89
+ categories: cats,
90
+ errors: this.logs.filter(l => l.level === 'ERROR').length,
91
+ uptimeMs: Date.now() - this.startTime,
92
+ };
93
+ }
94
+
95
+ clear() { this.logs = []; }
96
+ }
97
+
98
+ export const logStore = new InMemoryLogStore();
99
+
100
+ /**
101
+ * 内存日志回调处理器
102
+ * 始终启用,将日志写入 logStore
103
+ */
104
+ export class InMemoryLoggerCallback extends BaseCallbackHandler {
105
+ name = 'InMemoryLoggerCallback';
106
+
107
+ log(level, category, message, data = null) {
108
+ logStore.add({ time: formatTime(), level, category, message, data });
109
+ }
110
+
111
+ handleLLMStart(llm, prompts, runId) {
112
+ this.log('INFO', 'LLM', 'LLM 调用开始', {
113
+ runId,
114
+ model: llm?.id?.[2] || llm?.name,
115
+ promptCount: prompts?.length,
116
+ promptPreview: truncate(prompts?.[0], 200),
117
+ });
118
+ }
119
+
120
+ handleLLMEnd(output, runId) {
121
+ const content = output?.generations?.[0]?.[0]?.text
122
+ || output?.generations?.[0]?.[0]?.message?.content;
123
+ this.log('INFO', 'LLM', 'LLM 调用结束', {
124
+ runId,
125
+ outputPreview: truncate(content, 300),
126
+ tokenUsage: output?.llmOutput?.tokenUsage,
127
+ });
128
+ }
129
+
130
+ handleLLMError(error, runId) {
131
+ this.log('ERROR', 'LLM', 'LLM 调用错误', {
132
+ runId,
133
+ error: error?.message || String(error),
134
+ });
135
+ }
136
+
137
+ handleToolStart(tool, input, runId) {
138
+ this.log('INFO', 'TOOL', `工具调用: ${tool?.name || 'unknown'}`, {
139
+ runId,
140
+ toolName: tool?.name,
141
+ input: truncate(input, 500),
142
+ });
143
+ }
144
+
145
+ handleToolEnd(output, runId) {
146
+ this.log('INFO', 'TOOL', '工具返回', {
147
+ runId,
148
+ output: truncate(output, 500),
149
+ });
150
+ }
151
+
152
+ handleToolError(error, runId) {
153
+ this.log('ERROR', 'TOOL', '工具错误', {
154
+ runId,
155
+ error: error?.message || String(error),
156
+ stack: error?.stack?.split('\n').slice(0, 5),
157
+ });
158
+ }
159
+
160
+ handleChainStart(chain, inputs, runId) {
161
+ this.log('DEBUG', 'CHAIN', `Chain 开始: ${chain?.name || 'unknown'}`, {
162
+ runId,
163
+ chainName: chain?.name,
164
+ inputKeys: Object.keys(inputs || {}),
165
+ });
166
+ }
167
+
168
+ handleChainEnd(outputs, runId) {
169
+ this.log('DEBUG', 'CHAIN', 'Chain 结束', {
170
+ runId,
171
+ outputKeys: Object.keys(outputs || {}),
172
+ });
173
+ }
174
+
175
+ handleChainError(error, runId) {
176
+ this.log('ERROR', 'CHAIN', 'Chain 错误', {
177
+ runId,
178
+ error: error?.message || String(error),
179
+ });
180
+ }
181
+
182
+ handleAgentAction(action, runId) {
183
+ this.log('INFO', 'AGENT', `Agent 动作: ${action?.tool}`, {
184
+ runId,
185
+ tool: action?.tool,
186
+ toolInput: truncate(action?.toolInput, 300),
187
+ log: truncate(action?.log, 200),
188
+ });
189
+ }
190
+
191
+ handleAgentEnd(action, runId) {
192
+ this.log('INFO', 'AGENT', 'Agent 结束', {
193
+ runId,
194
+ returnValues: truncate(action?.returnValues, 300),
195
+ });
196
+ }
197
+ }
198
+
30
199
  /**
31
200
  * 文件日志回调处理器
32
201
  */
@@ -41,6 +210,8 @@ export class FileLoggerCallback extends BaseCallbackHandler {
41
210
  }
42
211
 
43
212
  log(level, category, message, data = null) {
213
+ rotateIfNeeded(this.logFile);
214
+
44
215
  const line = JSON.stringify({
45
216
  time: formatTime(),
46
217
  level,
@@ -149,16 +320,20 @@ export class FileLoggerCallback extends BaseCallbackHandler {
149
320
  }
150
321
 
151
322
  /**
152
- * 创建日志回调实例
323
+ * 创建日志回调实例数组
324
+ * 始终包含 InMemoryLoggerCallback + FileLoggerCallback
325
+ * DEBUG=true 时额外开启 verbose(控制台输出)
153
326
  */
154
327
  export function createLogger(options = {}) {
155
- const enabled = process.env.DEBUG === 'true' || options.enabled;
156
- if (!enabled) return null;
328
+ const verbose = process.env.DEBUG === 'true' || options.verbose || false;
157
329
 
158
- return new FileLoggerCallback({
159
- verbose: options.verbose || false,
160
- logFile: options.logFile || LOG_FILE,
161
- });
330
+ return [
331
+ new InMemoryLoggerCallback(),
332
+ new FileLoggerCallback({
333
+ verbose,
334
+ logFile: options.logFile || LOG_FILE,
335
+ }),
336
+ ];
162
337
  }
163
338
 
164
339
  export default FileLoggerCallback;
@@ -0,0 +1,48 @@
1
+ /**
2
+ * DeepSpider - Memory Flush 中间件
3
+ * 在 summarization 触发前(85k token),注入 SystemMessage 提醒 Agent 保存关键进度
4
+ */
5
+
6
+ import { createMiddleware, countTokensApproximately } from 'langchain';
7
+ import { SystemMessage } from '@langchain/core/messages';
8
+
9
+ const FLUSH_THRESHOLD = 85000;
10
+
11
+ const FLUSH_REMINDER = `⚠️ 上下文即将被压缩(当前接近 token 上限)。
12
+ 请立即使用 save_memo 工具保存以下关键信息,否则压缩后将丢失:
13
+ 1. 当前分析目标和已完成的步骤
14
+ 2. 已发现的关键参数、加密逻辑、请求链路
15
+ 3. 下一步计划
16
+
17
+ 保存后继续正常工作。`;
18
+
19
+ export function createMemoryFlushMiddleware() {
20
+ let flushed = false;
21
+
22
+ return createMiddleware({
23
+ name: 'memoryFlushMiddleware',
24
+
25
+ beforeModel: async (state) => {
26
+ const tokens = countTokensApproximately(state.messages);
27
+
28
+ // token 骤降(summarization 已执行),重置标记
29
+ if (flushed && tokens < FLUSH_THRESHOLD * 0.5) {
30
+ flushed = false;
31
+ }
32
+
33
+ // 达到阈值且未提醒过,注入提醒
34
+ if (!flushed && tokens >= FLUSH_THRESHOLD) {
35
+ flushed = true;
36
+ return {
37
+ ...state,
38
+ messages: [
39
+ ...state.messages,
40
+ new SystemMessage(FLUSH_REMINDER),
41
+ ],
42
+ };
43
+ }
44
+
45
+ return state;
46
+ },
47
+ });
48
+ }
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * DeepSpider - 报告中间件
3
- * 在 Agent 执行完成后自动检测并准备报告
3
+ * 检测文件保存事件,触发报告显示和面板通知
4
4
  */
5
5
 
6
6
  import { createMiddleware } from 'langchain';
@@ -13,64 +13,122 @@ const reportStateSchema = z.object({
13
13
  reportReady: z.boolean().default(false),
14
14
  });
15
15
 
16
+ /**
17
+ * 从工具返回值中提取 .md 文件路径
18
+ * 兼容两种工具的返回格式:
19
+ * artifact_save: { success, path: "/xxx/analysis.md" }
20
+ * save_analysis_report: { success, paths: { markdown: "/xxx/analysis.md" }, dir }
21
+ */
22
+ function extractMdPath(content) {
23
+ if (!content?.success) return null;
24
+
25
+ // artifact_save 格式
26
+ if (content.path?.endsWith('.md')) return content.path;
27
+
28
+ // save_analysis_report 格式
29
+ if (content.paths?.markdown) return content.paths.markdown;
30
+
31
+ return null;
32
+ }
33
+
34
+ /**
35
+ * 从工具返回值中提取已保存的文件信息(用于面板通知)
36
+ * 返回 { path, type } 或 null
37
+ */
38
+ function extractSavedFile(content) {
39
+ if (!content?.success) return null;
40
+
41
+ // artifact_save: 单文件
42
+ if (content.path) {
43
+ const ext = content.path.split('.').pop();
44
+ return { path: content.path, type: ext };
45
+ }
46
+
47
+ // save_analysis_report: 多文件
48
+ if (content.paths) {
49
+ return { path: content.dir || content.paths.markdown, type: 'report' };
50
+ }
51
+
52
+ return null;
53
+ }
54
+
55
+ function parseContent(result) {
56
+ try {
57
+ return typeof result?.content === 'string'
58
+ ? JSON.parse(result.content)
59
+ : result?.content;
60
+ } catch {
61
+ return null;
62
+ }
63
+ }
64
+
16
65
  /**
17
66
  * 创建报告中间件
18
- * afterModel 中检测 artifact_save 工具调用结果
19
- * 在 afterAgent 中触发报告显示回调
67
+ * 监听 artifact_save save_analysis_report,触发报告显示 + 面板通知
20
68
  */
21
69
  export function createReportMiddleware(options = {}) {
22
- const { onReportReady } = options;
70
+ const { onReportReady, onFileSaved } = options;
71
+
72
+ const WATCHED_TOOLS = new Set(['artifact_save', 'save_analysis_report']);
23
73
 
24
74
  return createMiddleware({
25
75
  name: 'reportMiddleware',
26
76
  stateSchema: reportStateSchema,
27
77
 
28
- // 模型调用后,检测工具调用结果
78
+ wrapToolCall: async (request, handler) => {
79
+ const toolName = request.tool?.name ?? request.toolCall?.name;
80
+ const result = await handler(request);
81
+
82
+ if (!WATCHED_TOOLS.has(toolName)) return result;
83
+
84
+ const content = parseContent(result);
85
+ if (!content) return result;
86
+
87
+ // 检测 .md 文件 → 触发报告显示
88
+ const mdPath = extractMdPath(content);
89
+ if (mdPath) {
90
+ console.log('[reportMiddleware] 检测到报告文件:', mdPath);
91
+ if (onReportReady) {
92
+ await onReportReady(mdPath);
93
+ }
94
+ }
95
+
96
+ // 通知文件已保存(面板可显示提示)
97
+ const saved = extractSavedFile(content);
98
+ if (saved && onFileSaved) {
99
+ await onFileSaved(saved);
100
+ }
101
+
102
+ return result;
103
+ },
104
+
105
+ // 备选:afterModel 检测 ToolMessage 中的报告文件
29
106
  afterModel: (state) => {
30
107
  const messages = state.messages;
31
- if (!messages || messages.length === 0) return undefined;
108
+ if (!messages?.length) return undefined;
32
109
 
33
- // 查找最近的 ToolMessage
34
110
  for (let i = messages.length - 1; i >= 0; i--) {
35
111
  const msg = messages[i];
36
- if (ToolMessage.isInstance(msg)) {
37
- try {
38
- const content = typeof msg.content === 'string'
39
- ? JSON.parse(msg.content)
40
- : msg.content;
41
-
42
- // 检测是否是 artifact_save 写入的 .md 文件
43
- if (content.success && content.path?.endsWith('.md')) {
44
- console.log('[reportMiddleware] 检测到 .md 文件:', content.path);
45
- return { lastWrittenMdFile: content.path };
46
- }
47
- } catch {
48
- // 解析失败,忽略
49
- }
112
+ if (!ToolMessage.isInstance(msg)) continue;
113
+
114
+ const content = parseContent(msg);
115
+ if (!content) continue;
116
+
117
+ const mdPath = extractMdPath(content);
118
+ if (mdPath) {
119
+ console.log('[reportMiddleware] afterModel 检测到报告:', mdPath);
120
+ return { lastWrittenMdFile: mdPath };
50
121
  }
51
122
  }
52
123
  return undefined;
53
124
  },
54
125
 
55
- // Agent 执行完成后
126
+ // streamEvents 模式下可能不被调用
56
127
  afterAgent: async (state) => {
57
- const mdFile = state.lastWrittenMdFile;
58
-
59
- if (mdFile) {
60
- console.log('[reportMiddleware] afterAgent: 准备显示报告:', mdFile);
61
-
62
- // 调用回调通知外部
63
- if (onReportReady) {
64
- try {
65
- await onReportReady(mdFile);
66
- } catch (e) {
67
- console.error('[reportMiddleware] onReportReady 失败:', e.message);
68
- }
69
- }
70
-
128
+ if (state.lastWrittenMdFile) {
129
+ console.log('[reportMiddleware] afterAgent: 报告就绪:', state.lastWrittenMdFile);
71
130
  return { reportReady: true };
72
131
  }
73
-
74
132
  return undefined;
75
133
  },
76
134
  });
@@ -0,0 +1,236 @@
1
+ /**
2
+ * DeepSpider - 自定义子代理中间件
3
+ * 复刻 deepagents 内置的 createSubAgentMiddleware,增加 context 结构化传递
4
+ *
5
+ * 与内置版本的唯一区别:task tool schema 新增 context 字段(z.record(z.string(), z.string()).optional()),
6
+ * LLM 按需填写 key-value 对,子代理收到的 HumanMessage 中 context 以 <context> 块拼接在 description 之后。
7
+ */
8
+
9
+ import { createMiddleware, createAgent, tool, humanInTheLoopMiddleware } from 'langchain';
10
+ import { HumanMessage, SystemMessage, ToolMessage } from '@langchain/core/messages';
11
+ import { getCurrentTaskInput, Command } from '@langchain/langgraph';
12
+ import { TASK_SYSTEM_PROMPT } from 'deepagents';
13
+ import { z } from 'zod';
14
+
15
+ // 子代理 state 中需要排除的 key(与 deepagents 内部一致)
16
+ const EXCLUDED_STATE_KEYS = [
17
+ 'messages',
18
+ 'todos',
19
+ 'structuredResponse',
20
+ 'skillsMetadata',
21
+ 'memoryContents',
22
+ ];
23
+
24
+ /**
25
+ * 过滤 state,排除不应传递给子代理的 key
26
+ */
27
+ function filterStateForSubagent(state) {
28
+ const filtered = {};
29
+ for (const [key, value] of Object.entries(state)) {
30
+ if (!EXCLUDED_STATE_KEYS.includes(key)) filtered[key] = value;
31
+ }
32
+ return filtered;
33
+ }
34
+
35
+ /**
36
+ * 构造 Command 返回,将子代理结果的 state 更新 + 最后一条消息作为 ToolMessage 返回
37
+ */
38
+ const TRUST_SIGNAL = `\n\n---\n⚠️ 子代理已完成任务。请直接使用子代理输出的文件和结论,不要重复执行 artifact_load / artifact_glob / ls 等文件读取操作来检查子代理已保存的文件。如果需要对生成的代码做端到端验证,那是你的职责,请正常执行。`;
39
+
40
+ function returnCommandWithStateUpdate(result, toolCallId) {
41
+ const stateUpdate = filterStateForSubagent(result);
42
+ const messages = result.messages;
43
+ const lastMessage = messages?.[messages.length - 1];
44
+ const content = (lastMessage?.content || 'Task completed') + TRUST_SIGNAL;
45
+ return new Command({
46
+ update: {
47
+ ...stateUpdate,
48
+ messages: [new ToolMessage({
49
+ content,
50
+ tool_call_id: toolCallId,
51
+ name: 'task',
52
+ })],
53
+ },
54
+ });
55
+ }
56
+
57
+ /**
58
+ * 生成 task tool 的 description(复刻 deepagents 内部的 getTaskToolDescription)
59
+ */
60
+ function getTaskToolDescription(subagentDescriptions) {
61
+ return `
62
+ Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context windows.
63
+
64
+ Available agent types and the tools they have access to:
65
+ ${subagentDescriptions.join('\n')}
66
+
67
+ When using the Task tool, you must specify a subagent_type parameter to select which agent type to use.
68
+
69
+ ## Usage notes:
70
+ 1. Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses
71
+ 2. When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.
72
+ 3. Each agent invocation is stateless. You will not be able to send additional messages to the agent, nor will the agent be able to communicate with you outside of its final report. Therefore, your prompt should contain a highly detailed task description for the agent to perform autonomously and you should specify exactly what information the agent should return back to you in its final and only message to you.
73
+ 4. The agent's outputs should generally be trusted
74
+ 5. Clearly tell the agent whether you expect it to create content, perform analysis, or just do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent
75
+ 6. If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.
76
+
77
+ ## context 参数
78
+ 委托子代理时,使用 context 参数传递结构化上下文(key-value 对),如站点标识、请求 ID、目标参数名等。context 会注入到子代理的初始消息中,确保关键信息不丢失。
79
+ `.trim();
80
+ }
81
+
82
+ /**
83
+ * 编译子代理:遍历 subagents 数组,用 createAgent 编译为可运行实例
84
+ */
85
+ function getSubagents(options) {
86
+ const {
87
+ defaultModel,
88
+ defaultTools,
89
+ defaultMiddleware,
90
+ generalPurposeMiddleware: gpMiddleware,
91
+ defaultInterruptOn,
92
+ subagents,
93
+ generalPurposeAgent,
94
+ } = options;
95
+
96
+ const defaultSubagentMiddleware = defaultMiddleware || [];
97
+ const generalPurposeMiddlewareBase = gpMiddleware || defaultSubagentMiddleware;
98
+ const agents = {};
99
+ const descriptions = [];
100
+
101
+ // 通用子代理(DeepSpider 默认不启用,但保留能力)
102
+ if (generalPurposeAgent) {
103
+ const generalPurposeMiddleware = [...generalPurposeMiddlewareBase];
104
+ if (defaultInterruptOn) generalPurposeMiddleware.push(humanInTheLoopMiddleware({ interruptOn: defaultInterruptOn }));
105
+ agents['general-purpose'] = createAgent({
106
+ model: defaultModel,
107
+ systemPrompt: 'In order to complete the objective that the user asks of you, you have access to a number of standard tools.',
108
+ tools: defaultTools,
109
+ middleware: generalPurposeMiddleware,
110
+ name: 'general-purpose',
111
+ });
112
+ descriptions.push('- general-purpose: General-purpose agent for researching complex questions, searching for files and content, and executing multi-step tasks.');
113
+ }
114
+
115
+ // 自定义子代理
116
+ for (const agentParams of subagents) {
117
+ descriptions.push(`- ${agentParams.name}: ${agentParams.description}`);
118
+
119
+ if ('runnable' in agentParams) {
120
+ // CompiledSubAgent — 已编译,直接使用
121
+ agents[agentParams.name] = agentParams.runnable;
122
+ } else {
123
+ const middleware = agentParams.middleware
124
+ ? [...defaultSubagentMiddleware, ...agentParams.middleware]
125
+ : [...defaultSubagentMiddleware];
126
+ const interruptOn = agentParams.interruptOn || defaultInterruptOn;
127
+ if (interruptOn) middleware.push(humanInTheLoopMiddleware({ interruptOn }));
128
+
129
+ agents[agentParams.name] = createAgent({
130
+ model: agentParams.model ?? defaultModel,
131
+ systemPrompt: agentParams.systemPrompt,
132
+ tools: agentParams.tools ?? defaultTools,
133
+ middleware,
134
+ name: agentParams.name,
135
+ });
136
+ }
137
+ }
138
+
139
+ return { agents, descriptions };
140
+ }
141
+
142
+ /**
143
+ * 创建增强版 task tool:schema 增加 context 字段
144
+ */
145
+ function createEnhancedTaskTool(options) {
146
+ const { agents: subagentGraphs, descriptions: subagentDescriptions } = getSubagents(options);
147
+ const availableTypes = Object.keys(subagentGraphs).join(', ');
148
+
149
+ return tool(
150
+ async (input, config) => {
151
+ const { description, subagent_type, context } = input;
152
+
153
+ if (!(subagent_type in subagentGraphs)) {
154
+ const allowedTypes = Object.keys(subagentGraphs).map((k) => `\`${k}\``).join(', ');
155
+ throw new Error(`Error: invoked agent of type ${subagent_type}, the only allowed types are ${allowedTypes}`);
156
+ }
157
+
158
+ // 构造子代理的初始消息:description + context 块
159
+ let content = description;
160
+ if (context && Object.keys(context).length > 0) {
161
+ content += `\n\n<context>\n${JSON.stringify(context)}\n</context>`;
162
+ }
163
+
164
+ const subagent = subagentGraphs[subagent_type];
165
+ const subagentState = filterStateForSubagent(getCurrentTaskInput());
166
+ subagentState.messages = [new HumanMessage({ content })];
167
+
168
+ const result = await subagent.invoke(subagentState, config);
169
+ if (!config.toolCall?.id) throw new Error('Tool call ID is required for subagent invocation');
170
+ return returnCommandWithStateUpdate(result, config.toolCall.id);
171
+ },
172
+ {
173
+ name: 'task',
174
+ description: getTaskToolDescription(subagentDescriptions),
175
+ schema: z.object({
176
+ description: z.string().describe('The task to execute with the selected agent'),
177
+ subagent_type: z.string().describe(`Name of the agent to use. Available: ${availableTypes}`),
178
+ // NOTE: 不用 z.record() 因为 Zod v4 toJSONSchema 会生成 propertyNames,
179
+ // 而 Anthropic API 不支持 propertyNames 关键字
180
+ // 改用 z.object({}) + additionalProperties 模式
181
+ context: z.object({}).passthrough().optional().describe('Structured key-value context to pass to the subagent (e.g. site, requestId, targetParam)'),
182
+ }),
183
+ },
184
+ );
185
+ }
186
+
187
+ /**
188
+ * 创建自定义子代理中间件
189
+ * 替换 deepagents 内置的 createSubAgentMiddleware,增加 context 结构化传递
190
+ *
191
+ * @param {Object} options
192
+ * @param {LanguageModelLike} options.defaultModel - LLM 实例
193
+ * @param {StructuredTool[]} options.defaultTools - 默认工具集
194
+ * @param {SubAgent[]} options.subagents - 子代理配置数组
195
+ * @param {AgentMiddleware[]} options.defaultMiddleware - 子代理默认中间件
196
+ * @param {boolean} [options.generalPurposeAgent=false] - 是否创建通用子代理
197
+ * @param {Object} [options.defaultInterruptOn] - HITL 配置
198
+ */
199
+ export function createCustomSubAgentMiddleware(options) {
200
+ const {
201
+ defaultModel,
202
+ defaultTools = [],
203
+ subagents = [],
204
+ defaultMiddleware = null,
205
+ generalPurposeMiddleware = null,
206
+ generalPurposeAgent = false,
207
+ defaultInterruptOn = null,
208
+ } = options;
209
+
210
+ const taskToolOptions = {
211
+ defaultModel,
212
+ defaultTools,
213
+ subagents,
214
+ defaultMiddleware,
215
+ generalPurposeMiddleware,
216
+ generalPurposeAgent,
217
+ defaultInterruptOn,
218
+ };
219
+
220
+ const enhancedTaskTool = createEnhancedTaskTool(taskToolOptions);
221
+
222
+ // context 使用说明,拼接到 TASK_SYSTEM_PROMPT 末尾
223
+ const contextGuide = `\n\n委托子代理时,使用 context 参数传递结构化上下文(key-value 对),如站点标识、请求 ID、目标参数名等。context 会注入到子代理的初始消息中,确保关键信息不丢失。`;
224
+ const fullSystemPrompt = TASK_SYSTEM_PROMPT + contextGuide;
225
+
226
+ return createMiddleware({
227
+ name: 'subAgentMiddleware',
228
+ tools: [enhancedTaskTool],
229
+ wrapModelCall: async (request, handler) => {
230
+ return handler({
231
+ ...request,
232
+ systemMessage: request.systemMessage.concat(new SystemMessage({ content: fullSystemPrompt })),
233
+ });
234
+ },
235
+ });
236
+ }