deepspider 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -21
- package/package.json +4 -2
- package/src/agent/core/PanelBridge.js +34 -8
- package/src/agent/core/StreamHandler.js +142 -26
- package/src/agent/index.js +72 -14
- package/src/agent/middleware/memoryFlush.js +48 -0
- package/src/agent/middleware/report.js +77 -45
- package/src/agent/middleware/subagent.js +4 -1
- package/src/agent/middleware/toolAvailability.js +37 -0
- package/src/agent/middleware/toolGuard.js +141 -31
- package/src/agent/prompts/system.js +144 -1
- package/src/agent/run.js +127 -14
- package/src/agent/sessions.js +88 -0
- package/src/agent/skills/anti-detect/SKILL.md +89 -14
- package/src/agent/skills/captcha/SKILL.md +93 -19
- package/src/agent/skills/crawler/SKILL.md +86 -0
- package/src/agent/skills/crawler/evolved.md +14 -13
- package/src/agent/skills/general/evolved.md +12 -1
- package/src/agent/skills/js2python/SKILL.md +40 -0
- package/src/agent/skills/js2python/evolved.md +13 -1
- package/src/agent/skills/sandbox/SKILL.md +33 -0
- package/src/agent/skills/sandbox/evolved.md +12 -5
- package/src/agent/skills/static-analysis/SKILL.md +39 -0
- package/src/agent/skills/static-analysis/evolved.md +88 -2
- package/src/agent/subagents/anti-detect.js +27 -5
- package/src/agent/subagents/captcha.js +28 -9
- package/src/agent/subagents/crawler.js +26 -79
- package/src/agent/subagents/factory.js +24 -4
- package/src/agent/subagents/js2python.js +18 -16
- package/src/agent/tools/analysis.js +17 -7
- package/src/agent/tools/browser.js +26 -13
- package/src/agent/tools/crawler.js +1 -1
- package/src/agent/tools/crawlerGenerator.js +2 -2
- package/src/agent/tools/evolve.js +47 -8
- package/src/agent/tools/index.js +7 -3
- package/src/agent/tools/patch.js +1 -1
- package/src/agent/tools/store.js +1 -1
- package/src/browser/client.js +5 -1
- package/src/browser/ui/analysisPanel.js +72 -0
package/README.md
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
|
|
6
6
|
> 智能爬虫工程平台 - 基于 DeepAgents + Patchright 的 AI 爬虫 Agent
|
|
7
7
|
|
|
8
|
+
[English](README_EN.md)
|
|
9
|
+
|
|
8
10
|
从 JS 逆向到完整爬虫脚本的一站式 AI Agent 解决方案。
|
|
9
11
|
|
|
10
12
|
## 特性
|
|
@@ -46,7 +48,7 @@ pnpm run setup:crypto # 安装 Python 加密库(可选)
|
|
|
46
48
|
|
|
47
49
|
### 配置
|
|
48
50
|
|
|
49
|
-
DeepSpider
|
|
51
|
+
DeepSpider 支持兼容 Anthropic 格式的 API 供应商。推荐使用 Claude API 以获得最佳效果。
|
|
50
52
|
|
|
51
53
|
| 配置键 | 环境变量 | 说明 |
|
|
52
54
|
|--------|----------|------|
|
|
@@ -57,33 +59,23 @@ DeepSpider 需要配置 LLM API 才能运行。支持任何兼容 OpenAI 格式
|
|
|
57
59
|
|
|
58
60
|
优先级:环境变量 > 配置文件 (`~/.deepspider/config/settings.json`) > 默认值
|
|
59
61
|
|
|
60
|
-
|
|
62
|
+
**方式一:CLI 命令(推荐)**
|
|
61
63
|
|
|
62
64
|
```bash
|
|
63
|
-
deepspider config set apiKey sk-xxx
|
|
64
|
-
deepspider config set baseUrl https://api.
|
|
65
|
-
deepspider config set model
|
|
65
|
+
deepspider config set apiKey sk-ant-api03-xxx
|
|
66
|
+
deepspider config set baseUrl https://api.anthropic.com
|
|
67
|
+
deepspider config set model claude-opus-4-6
|
|
66
68
|
```
|
|
67
69
|
|
|
68
70
|
**方式二:环境变量**
|
|
69
71
|
|
|
70
72
|
```bash
|
|
71
|
-
export DEEPSPIDER_API_KEY=sk-xxx
|
|
72
|
-
export DEEPSPIDER_BASE_URL=https://api.
|
|
73
|
-
export DEEPSPIDER_MODEL=
|
|
73
|
+
export DEEPSPIDER_API_KEY=sk-ant-api03-xxx
|
|
74
|
+
export DEEPSPIDER_BASE_URL=https://api.anthropic.com
|
|
75
|
+
export DEEPSPIDER_MODEL=claude-opus-4-6
|
|
74
76
|
```
|
|
75
77
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
```bash
|
|
79
|
-
# OpenAI
|
|
80
|
-
deepspider config set baseUrl https://api.openai.com/v1
|
|
81
|
-
deepspider config set model gpt-4o
|
|
82
|
-
|
|
83
|
-
# DeepSeek
|
|
84
|
-
deepspider config set baseUrl https://api.deepseek.com/v1
|
|
85
|
-
deepspider config set model deepseek-chat
|
|
86
|
-
```
|
|
78
|
+
> **提示**:也支持其他兼容 Anthropic 格式的 API 供应商。
|
|
87
79
|
|
|
88
80
|
### 使用
|
|
89
81
|
|
|
@@ -139,11 +131,15 @@ pnpm test
|
|
|
139
131
|
|
|
140
132
|
### 使用流程
|
|
141
133
|
|
|
142
|
-
1. **启动**: `
|
|
134
|
+
1. **启动**: `deepspider https://target-site.com`
|
|
143
135
|
2. **等待**: 浏览器打开,系统自动记录数据(不消耗 API)
|
|
144
136
|
3. **操作**: 在网站上登录、翻页、触发目标请求
|
|
145
137
|
4. **选择**: 点击面板的选择按钮 ⦿,进入选择模式
|
|
146
|
-
5. **分析**:
|
|
138
|
+
5. **分析**: 点击目标数据元素,选择快捷操作:
|
|
139
|
+
- **追踪数据来源** — 定位选中数据的 API 接口
|
|
140
|
+
- **分析加密参数** — 识别并逆向加密参数
|
|
141
|
+
- **完整分析并生成爬虫** — 端到端:逆向、验证、生成代码
|
|
142
|
+
- **提取页面结构** — 分析 DOM 结构,生成选择器和字段配置
|
|
147
143
|
6. **对话**: 在面板或 CLI 继续提问,深入分析
|
|
148
144
|
|
|
149
145
|
## 架构
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "deepspider",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "智能爬虫工程平台 - 基于 DeepAgents + Patchright 的 AI 爬虫 Agent",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"lint": "eslint src/",
|
|
24
24
|
"lint:fix": "eslint src/ --fix",
|
|
25
25
|
"setup:crypto": "uv venv .venv --python 3.11 2>/dev/null || true && uv pip install -r requirements-crypto.txt",
|
|
26
|
-
"postinstall": "patchright install chromium && npm rebuild isolated-vm 2>/dev/null || true",
|
|
26
|
+
"postinstall": "patchright install chromium && npm rebuild isolated-vm better-sqlite3 2>/dev/null || true",
|
|
27
27
|
"prepare": "husky"
|
|
28
28
|
},
|
|
29
29
|
"keywords": [
|
|
@@ -57,8 +57,10 @@
|
|
|
57
57
|
"@langchain/anthropic": "^1.3.17",
|
|
58
58
|
"@langchain/core": "^1.1.24",
|
|
59
59
|
"@langchain/langgraph": "^1.1.2",
|
|
60
|
+
"@langchain/langgraph-checkpoint-sqlite": "^1.0.1",
|
|
60
61
|
"@langchain/openai": "^1.2.3",
|
|
61
62
|
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
63
|
+
"better-sqlite3": "^12.6.2",
|
|
62
64
|
"crypto-js": "^4.2.0",
|
|
63
65
|
"deepagents": "^1.7.6",
|
|
64
66
|
"dotenv": "^17.2.3",
|
|
@@ -18,10 +18,16 @@ export class PanelBridge {
|
|
|
18
18
|
if (!cdp) return null;
|
|
19
19
|
|
|
20
20
|
try {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
// 3s 超时:断点暂停时 Runtime.evaluate 会永远挂住,必须限时
|
|
22
|
+
const result = await Promise.race([
|
|
23
|
+
cdp.send('Runtime.evaluate', {
|
|
24
|
+
expression: code,
|
|
25
|
+
returnByValue: true,
|
|
26
|
+
}),
|
|
27
|
+
new Promise((_, reject) =>
|
|
28
|
+
setTimeout(() => reject(new Error('evaluateInPage timeout (debugger paused?)')), 3000)
|
|
29
|
+
),
|
|
30
|
+
]);
|
|
25
31
|
return result.result?.value;
|
|
26
32
|
} catch (e) {
|
|
27
33
|
this.debug('evaluateInPage 失败:', e.message);
|
|
@@ -29,14 +35,34 @@ export class PanelBridge {
|
|
|
29
35
|
}
|
|
30
36
|
}
|
|
31
37
|
|
|
38
|
+
/**
|
|
39
|
+
* 等待面板 JS 初始化完成
|
|
40
|
+
*/
|
|
41
|
+
async waitForPanel(timeoutMs = 5000) {
|
|
42
|
+
const start = Date.now();
|
|
43
|
+
while (Date.now() - start < timeoutMs) {
|
|
44
|
+
const ready = await this.evaluateInPage('!!window.__deepspider__?.addStructuredMessage');
|
|
45
|
+
if (ready) return true;
|
|
46
|
+
await new Promise(r => setTimeout(r, 200));
|
|
47
|
+
}
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* 批量发送消息到面板(单次 CDP 调用)
|
|
53
|
+
*/
|
|
54
|
+
async sendBatch(messages) {
|
|
55
|
+
if (!messages?.length) return;
|
|
56
|
+
const escaped = JSON.stringify(messages);
|
|
57
|
+
await this.evaluateInPage(
|
|
58
|
+
`(function(msgs){var ds=window.__deepspider__;if(!ds)return;msgs.forEach(function(m){ds.addStructuredMessage?.(m.type,m.data);})})(${escaped})`
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
|
|
32
62
|
/**
|
|
33
63
|
* 发送结构化消息到前端面板
|
|
34
64
|
*/
|
|
35
65
|
async sendMessage(type, data) {
|
|
36
|
-
const browser = this.getBrowser();
|
|
37
|
-
const page = browser?.getPage?.();
|
|
38
|
-
if (!page) return;
|
|
39
|
-
|
|
40
66
|
try {
|
|
41
67
|
const escapedType = JSON.stringify(type);
|
|
42
68
|
const escapedData = JSON.stringify(data);
|
|
@@ -13,6 +13,31 @@ function cleanDSML(text) {
|
|
|
13
13
|
return text ? text.replace(DSML_PATTERN, '') : text;
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
+
// 流式事件停滞超时(单个事件间隔上限)
|
|
17
|
+
const STALL_TIMEOUT_MS = 150000; // 150s — 超过此时间无新事件则中断流
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* 包装异步迭代器,每个 next() 加独立超时
|
|
21
|
+
* 防止 LLM API 或 middleware 无响应时 for-await 永久挂起
|
|
22
|
+
*/
|
|
23
|
+
async function* withStallTimeout(asyncIterator, timeoutMs = STALL_TIMEOUT_MS) {
|
|
24
|
+
while (true) {
|
|
25
|
+
let timer;
|
|
26
|
+
const result = await Promise.race([
|
|
27
|
+
asyncIterator.next(),
|
|
28
|
+
new Promise((_, reject) => {
|
|
29
|
+
timer = setTimeout(
|
|
30
|
+
() => reject(new Error(`Stream timeout: no events for ${Math.round(timeoutMs / 1000)}s`)),
|
|
31
|
+
timeoutMs,
|
|
32
|
+
);
|
|
33
|
+
}),
|
|
34
|
+
]);
|
|
35
|
+
clearTimeout(timer);
|
|
36
|
+
if (result.done) break;
|
|
37
|
+
yield result.value;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
16
41
|
// 人工介入配置
|
|
17
42
|
const INTERVENTION_CONFIG = {
|
|
18
43
|
idleTimeoutMs: 120000, // 2分钟无响应触发提示
|
|
@@ -30,6 +55,20 @@ export class StreamHandler {
|
|
|
30
55
|
this.fullResponse = '';
|
|
31
56
|
}
|
|
32
57
|
|
|
58
|
+
/**
|
|
59
|
+
* 从 on_chat_model_end 提取最终响应
|
|
60
|
+
* 优先使用流式累积的 fullResponse,兜底使用 end 事件内容
|
|
61
|
+
*/
|
|
62
|
+
_extractFinalResponse(output) {
|
|
63
|
+
if (!output?.content) return null;
|
|
64
|
+
const streamContent = this.fullResponse;
|
|
65
|
+
const endContent = typeof output.content === 'string'
|
|
66
|
+
? output.content
|
|
67
|
+
: output.content.filter(c => c.type === 'text').map(c => c.text).join('');
|
|
68
|
+
// 使用较长的一方(通常流式累积的内容更完整)
|
|
69
|
+
return streamContent.length >= endContent.length ? streamContent : endContent;
|
|
70
|
+
}
|
|
71
|
+
|
|
33
72
|
/**
|
|
34
73
|
* 流式对话 - 显示思考过程(带重试)
|
|
35
74
|
*/
|
|
@@ -63,7 +102,7 @@ export class StreamHandler {
|
|
|
63
102
|
);
|
|
64
103
|
|
|
65
104
|
this.debug('chatStream: 开始遍历事件');
|
|
66
|
-
for await (const event of eventStream) {
|
|
105
|
+
for await (const event of withStallTimeout(eventStream)) {
|
|
67
106
|
lastEventTime = Date.now();
|
|
68
107
|
eventCount++;
|
|
69
108
|
|
|
@@ -73,11 +112,11 @@ export class StreamHandler {
|
|
|
73
112
|
|
|
74
113
|
await this._handleStreamEvent(event);
|
|
75
114
|
|
|
76
|
-
if (event.event === 'on_chat_model_end'
|
|
77
|
-
const
|
|
78
|
-
if (
|
|
79
|
-
finalResponse =
|
|
80
|
-
this.debug(`chatStream:
|
|
115
|
+
if (event.event === 'on_chat_model_end') {
|
|
116
|
+
const extracted = this._extractFinalResponse(event.data?.output);
|
|
117
|
+
if (extracted) {
|
|
118
|
+
finalResponse = extracted;
|
|
119
|
+
this.debug(`chatStream: 最终响应, 长度=${finalResponse.length}`);
|
|
81
120
|
}
|
|
82
121
|
}
|
|
83
122
|
}
|
|
@@ -86,10 +125,15 @@ export class StreamHandler {
|
|
|
86
125
|
console.log(`\n[完成] 共处理 ${eventCount} 个事件`);
|
|
87
126
|
|
|
88
127
|
// 发送剩余累积文本
|
|
89
|
-
await this._flushFullResponse();
|
|
128
|
+
const flushed = await this._flushFullResponse();
|
|
90
129
|
|
|
91
130
|
// 检测 interrupt 并渲染到面板
|
|
92
|
-
await this._checkAndRenderInterrupt();
|
|
131
|
+
const hasInterrupt = await this._checkAndRenderInterrupt();
|
|
132
|
+
|
|
133
|
+
// 兜底:如果没有文本输出也没有 interrupt,发送完成通知
|
|
134
|
+
if (!flushed && !hasInterrupt && eventCount > 0 && lastToolCall) {
|
|
135
|
+
await this.panelBridge.sendToPanel('system', '✅ 任务完成');
|
|
136
|
+
}
|
|
93
137
|
|
|
94
138
|
await this.panelBridge.setBusy(false);
|
|
95
139
|
|
|
@@ -126,23 +170,28 @@ export class StreamHandler {
|
|
|
126
170
|
{ ...this.config, version: 'v2' }
|
|
127
171
|
);
|
|
128
172
|
|
|
129
|
-
for await (const event of eventStream) {
|
|
173
|
+
for await (const event of withStallTimeout(eventStream)) {
|
|
130
174
|
lastEventTime = Date.now();
|
|
131
175
|
eventCount++;
|
|
132
176
|
await this._handleStreamEvent(event);
|
|
133
177
|
|
|
134
|
-
if (event.event === 'on_chat_model_end'
|
|
178
|
+
if (event.event === 'on_chat_model_end') {
|
|
135
179
|
const output = event.data?.output;
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
}
|
|
180
|
+
const extracted = this._extractFinalResponse(output);
|
|
181
|
+
if (extracted) finalResponse = extracted;
|
|
139
182
|
}
|
|
140
183
|
}
|
|
141
184
|
|
|
142
185
|
clearInterval(heartbeat);
|
|
143
186
|
|
|
144
|
-
await this._flushFullResponse();
|
|
145
|
-
await this._checkAndRenderInterrupt();
|
|
187
|
+
const flushed = await this._flushFullResponse();
|
|
188
|
+
const hasInterrupt = await this._checkAndRenderInterrupt();
|
|
189
|
+
|
|
190
|
+
// 兜底:如果没有文本输出也没有 interrupt,发送完成通知
|
|
191
|
+
if (!flushed && !hasInterrupt && eventCount > 0) {
|
|
192
|
+
await this.panelBridge.sendToPanel('system', '✅ 任务完成');
|
|
193
|
+
}
|
|
194
|
+
|
|
146
195
|
await this.panelBridge.setBusy(false);
|
|
147
196
|
|
|
148
197
|
console.log(`\n[恢复完成] 共处理 ${eventCount} 个事件`);
|
|
@@ -168,6 +217,23 @@ export class StreamHandler {
|
|
|
168
217
|
await this.panelBridge.setBusy(true);
|
|
169
218
|
this.debug(`chatStreamResume: 从检查点恢复, retryCount=${retryCount}`);
|
|
170
219
|
|
|
220
|
+
// 恢复前:检查 checkpoint 是否有实际消息
|
|
221
|
+
if (retryCount === 0) {
|
|
222
|
+
try {
|
|
223
|
+
const state = await this.agent.getState(this.config);
|
|
224
|
+
const messages = state?.values?.messages;
|
|
225
|
+
if (!messages?.length) {
|
|
226
|
+
console.log('[恢复] checkpoint 无历史消息,跳过恢复');
|
|
227
|
+
await this.panelBridge.sendToPanel('system', '该会话无历史记录,请重新开始分析');
|
|
228
|
+
await this.panelBridge.setBusy(false);
|
|
229
|
+
return '[无历史消息]';
|
|
230
|
+
}
|
|
231
|
+
await this._restoreHistoryToPanel(messages);
|
|
232
|
+
} catch (e) {
|
|
233
|
+
this.debug('chatStreamResume: getState 失败:', e.message);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
171
237
|
const heartbeat = setInterval(() => {
|
|
172
238
|
const elapsed = Math.round((Date.now() - lastEventTime) / 1000);
|
|
173
239
|
if (elapsed > 30) {
|
|
@@ -181,23 +247,27 @@ export class StreamHandler {
|
|
|
181
247
|
{ ...this.config, version: 'v2' }
|
|
182
248
|
);
|
|
183
249
|
|
|
184
|
-
for await (const event of eventStream) {
|
|
250
|
+
for await (const event of withStallTimeout(eventStream)) {
|
|
185
251
|
lastEventTime = Date.now();
|
|
186
252
|
eventCount++;
|
|
187
253
|
await this._handleStreamEvent(event);
|
|
188
254
|
|
|
189
|
-
if (event.event === 'on_chat_model_end'
|
|
255
|
+
if (event.event === 'on_chat_model_end') {
|
|
190
256
|
const output = event.data?.output;
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
}
|
|
257
|
+
const extracted = this._extractFinalResponse(output);
|
|
258
|
+
if (extracted) finalResponse = extracted;
|
|
194
259
|
}
|
|
195
260
|
}
|
|
196
261
|
|
|
197
262
|
clearInterval(heartbeat);
|
|
198
263
|
|
|
199
|
-
await this._flushFullResponse();
|
|
200
|
-
await this._checkAndRenderInterrupt();
|
|
264
|
+
const flushed2 = await this._flushFullResponse();
|
|
265
|
+
const hasInterrupt2 = await this._checkAndRenderInterrupt();
|
|
266
|
+
|
|
267
|
+
if (!flushed2 && !hasInterrupt2 && eventCount > 0) {
|
|
268
|
+
await this.panelBridge.sendToPanel('system', '✅ 任务完成');
|
|
269
|
+
}
|
|
270
|
+
|
|
201
271
|
await this.panelBridge.setBusy(false);
|
|
202
272
|
|
|
203
273
|
console.log(`\n[恢复完成] 共处理 ${eventCount} 个事件`);
|
|
@@ -219,14 +289,49 @@ export class StreamHandler {
|
|
|
219
289
|
}
|
|
220
290
|
}
|
|
221
291
|
|
|
292
|
+
/**
|
|
293
|
+
* 从 checkpoint 恢复历史消息到前端面板
|
|
294
|
+
*/
|
|
295
|
+
async _restoreHistoryToPanel(messages) {
|
|
296
|
+
try {
|
|
297
|
+
if (!messages?.length) return;
|
|
298
|
+
this.debug(`_restoreHistoryToPanel: ${messages.length} 条历史消息`);
|
|
299
|
+
|
|
300
|
+
const batch = [];
|
|
301
|
+
for (const msg of messages) {
|
|
302
|
+
const type = msg._getType?.() || msg.constructor?.name;
|
|
303
|
+
const content = Array.isArray(msg.content)
|
|
304
|
+
? msg.content.filter(c => c.type === 'text').map(c => c.text).join('')
|
|
305
|
+
: (typeof msg.content === 'string' ? msg.content : '');
|
|
306
|
+
if (!content.trim()) continue;
|
|
307
|
+
|
|
308
|
+
if (type === 'human') {
|
|
309
|
+
batch.push({ type: 'user', data: { content } });
|
|
310
|
+
} else if (type === 'ai') {
|
|
311
|
+
batch.push({ type: 'text', data: { content } });
|
|
312
|
+
} else if (type === 'tool') {
|
|
313
|
+
const summary = content.length > 200 ? content.slice(0, 200) + '...' : content;
|
|
314
|
+
batch.push({ type: 'system', data: { content: `[工具结果] ${summary}` } });
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
await this.panelBridge.sendBatch(batch);
|
|
318
|
+
} catch (e) {
|
|
319
|
+
this.debug('_restoreHistoryToPanel 失败:', e.message);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
222
323
|
/**
|
|
223
324
|
* 发送剩余累积文本到面板
|
|
325
|
+
* 返回 true 如果有文本被发送
|
|
224
326
|
*/
|
|
225
327
|
async _flushFullResponse() {
|
|
226
328
|
if (this.fullResponse?.trim()) {
|
|
227
329
|
await this.panelBridge.sendToPanel('assistant', this.fullResponse);
|
|
330
|
+
this.fullResponse = '';
|
|
331
|
+
return true;
|
|
228
332
|
}
|
|
229
333
|
this.fullResponse = '';
|
|
334
|
+
return false;
|
|
230
335
|
}
|
|
231
336
|
|
|
232
337
|
/**
|
|
@@ -316,12 +421,23 @@ export class StreamHandler {
|
|
|
316
421
|
switch (eventType) {
|
|
317
422
|
case 'on_chat_model_stream':
|
|
318
423
|
let chunk = data?.chunk?.content;
|
|
319
|
-
|
|
320
|
-
|
|
424
|
+
// 处理多种内容格式:字符串或数组
|
|
425
|
+
let textChunk = '';
|
|
426
|
+
if (typeof chunk === 'string') {
|
|
427
|
+
textChunk = chunk;
|
|
428
|
+
} else if (Array.isArray(chunk)) {
|
|
429
|
+
// 数组格式:提取所有 text 类型的内容
|
|
430
|
+
textChunk = chunk.filter(c => c.type === 'text').map(c => c.text).join('');
|
|
431
|
+
} else if (chunk?.text) {
|
|
432
|
+
// 对象格式:提取 text 字段
|
|
433
|
+
textChunk = chunk.text;
|
|
434
|
+
}
|
|
435
|
+
if (textChunk) {
|
|
436
|
+
textChunk = cleanDSML(textChunk);
|
|
321
437
|
// CLI 侧仍流式输出
|
|
322
|
-
process.stdout.write(
|
|
438
|
+
process.stdout.write(textChunk);
|
|
323
439
|
// 面板侧只累积,不推送
|
|
324
|
-
this.fullResponse = (this.fullResponse || '') +
|
|
440
|
+
this.fullResponse = (this.fullResponse || '') + textChunk;
|
|
325
441
|
}
|
|
326
442
|
break;
|
|
327
443
|
|
package/src/agent/index.js
CHANGED
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
import 'dotenv/config';
|
|
8
8
|
import { StateBackend, FilesystemBackend, createFilesystemMiddleware, createPatchToolCallsMiddleware } from 'deepagents';
|
|
9
9
|
import { createAgent, toolRetryMiddleware, summarizationMiddleware, anthropicPromptCachingMiddleware, todoListMiddleware, humanInTheLoopMiddleware } from 'langchain';
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
10
|
+
import { ChatAnthropic } from '@langchain/anthropic';
|
|
11
|
+
import { SqliteSaver } from '@langchain/langgraph-checkpoint-sqlite';
|
|
12
12
|
|
|
13
13
|
import { coreTools } from './tools/index.js';
|
|
14
14
|
import { allSubagents } from './subagents/index.js';
|
|
@@ -17,7 +17,10 @@ import { createReportMiddleware } from './middleware/report.js';
|
|
|
17
17
|
import { createFilterToolsMiddleware } from './middleware/filterTools.js';
|
|
18
18
|
import { createCustomSubAgentMiddleware } from './middleware/subagent.js';
|
|
19
19
|
import { createToolGuardMiddleware } from './middleware/toolGuard.js';
|
|
20
|
+
import { createToolCallLimitMiddleware } from './subagents/factory.js';
|
|
20
21
|
import { createValidationWorkflowMiddleware } from './middleware/validationWorkflow.js';
|
|
22
|
+
import { createMemoryFlushMiddleware } from './middleware/memoryFlush.js';
|
|
23
|
+
import { createToolAvailabilityMiddleware } from './middleware/toolAvailability.js';
|
|
21
24
|
|
|
22
25
|
// createDeepAgent 内部拼接的 BASE_PROMPT
|
|
23
26
|
const BASE_PROMPT = 'In order to complete the objective that the user asks of you, you have access to a number of standard tools.';
|
|
@@ -29,9 +32,48 @@ const config = {
|
|
|
29
32
|
model: process.env.DEEPSPIDER_MODEL || 'gpt-4o',
|
|
30
33
|
};
|
|
31
34
|
|
|
35
|
+
/**
|
|
36
|
+
* 递归移除 JSON Schema 中 Anthropic API 不支持的关键字
|
|
37
|
+
* Zod v4 的 toJSONSchema 会生成 $schema 和 propertyNames,Anthropic 拒绝
|
|
38
|
+
* additionalProperties: {} 空对象也不被接受,改成 true
|
|
39
|
+
*/
|
|
40
|
+
function stripUnsupportedSchemaKeys(obj) {
|
|
41
|
+
if (!obj || typeof obj !== 'object') return obj;
|
|
42
|
+
if (Array.isArray(obj)) return obj.map(stripUnsupportedSchemaKeys);
|
|
43
|
+
const res = {};
|
|
44
|
+
for (const k in obj) {
|
|
45
|
+
if (k === '$schema' || k === 'propertyNames') continue;
|
|
46
|
+
// additionalProperties: {} → true (空对象等于"任意类型",但Anthropic不接受空对象)
|
|
47
|
+
if (k === 'additionalProperties' && obj[k] !== null && typeof obj[k] === 'object' && Object.keys(obj[k]).length === 0) {
|
|
48
|
+
res[k] = true;
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
res[k] = stripUnsupportedSchemaKeys(obj[k]);
|
|
52
|
+
}
|
|
53
|
+
return res;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* 自定义 fetch:拦截 LLM API 请求,strip 工具 schema 中 Zod v4 生成的不兼容字段
|
|
58
|
+
* 保留作为安全网,防止 $schema / propertyNames / additionalProperties:{} 泄漏到 API
|
|
59
|
+
*/
|
|
60
|
+
const _origFetch = globalThis.fetch;
|
|
61
|
+
globalThis.fetch = async function(url, opts) {
|
|
62
|
+
if (opts?.body && typeof opts.body === 'string' && opts.body.includes('"tools"')) {
|
|
63
|
+
try {
|
|
64
|
+
const body = JSON.parse(opts.body);
|
|
65
|
+
if (body.tools) {
|
|
66
|
+
body.tools = stripUnsupportedSchemaKeys(body.tools);
|
|
67
|
+
opts = { ...opts, body: JSON.stringify(body) };
|
|
68
|
+
}
|
|
69
|
+
} catch { /* ignore parse errors on non-LLM requests */ }
|
|
70
|
+
}
|
|
71
|
+
return _origFetch(url, opts);
|
|
72
|
+
};
|
|
73
|
+
|
|
32
74
|
/**
|
|
33
75
|
* 创建 LLM 模型实例
|
|
34
|
-
* 使用
|
|
76
|
+
* 使用 ChatAnthropic 发送原生 Anthropic 格式,避免代理的 OpenAI→Anthropic 转换引入 schema 错误
|
|
35
77
|
*/
|
|
36
78
|
function createModel(options = {}) {
|
|
37
79
|
const {
|
|
@@ -40,10 +82,13 @@ function createModel(options = {}) {
|
|
|
40
82
|
baseUrl = config.baseUrl,
|
|
41
83
|
} = options;
|
|
42
84
|
|
|
43
|
-
|
|
85
|
+
// ChatAnthropic 的 baseURL 不含 /v1(SDK 自动拼接)
|
|
86
|
+
const anthropicBaseUrl = baseUrl?.replace(/\/v1\/?$/, '') || undefined;
|
|
87
|
+
|
|
88
|
+
return new ChatAnthropic({
|
|
44
89
|
model,
|
|
45
|
-
apiKey,
|
|
46
|
-
|
|
90
|
+
anthropicApiKey: apiKey,
|
|
91
|
+
anthropicApiUrl: anthropicBaseUrl,
|
|
47
92
|
temperature: 0,
|
|
48
93
|
});
|
|
49
94
|
}
|
|
@@ -59,18 +104,27 @@ export function createDeepSpiderAgent(options = {}) {
|
|
|
59
104
|
enableMemory = true,
|
|
60
105
|
enableInterrupt = false,
|
|
61
106
|
onReportReady = null, // 报告就绪回调
|
|
107
|
+
onFileSaved = null, // 文件保存通知回调
|
|
108
|
+
checkpointer,
|
|
62
109
|
} = options;
|
|
63
110
|
|
|
64
|
-
// 创建 LLM
|
|
111
|
+
// 创建 LLM 模型实例(加 timeout 防止 API 无响应时 streamEvents 永久挂起)
|
|
65
112
|
const llm = createModel({ model, apiKey, baseUrl });
|
|
113
|
+
llm.timeout = 120000; // 120s — 主 LLM 超时
|
|
114
|
+
|
|
115
|
+
// 摘要专用 LLM:故意不设 timeout
|
|
116
|
+
// 原因:summarizationMiddleware 的 createSummary 有 try-catch,超时会返回错误字符串,
|
|
117
|
+
// 但 beforeModel 仍会用这个错误字符串替换所有原始消息(REMOVE_ALL_MESSAGES),导致数据丢失。
|
|
118
|
+
// 安全网由 StreamHandler.withStallTimeout (150s) 提供 — 它在 BeforeModelNode 完成前触发,
|
|
119
|
+
// 不会写入 checkpoint,原始数据得以保留。
|
|
120
|
+
const summaryLlm = createModel({ model, apiKey, baseUrl });
|
|
66
121
|
|
|
67
122
|
// 后端配置:使用文件系统持久化
|
|
68
123
|
const backend = enableMemory
|
|
69
124
|
? new FilesystemBackend({ rootDir: './.deepspider-agent' })
|
|
70
125
|
: new StateBackend();
|
|
71
126
|
|
|
72
|
-
|
|
73
|
-
const checkpointer = new MemorySaver();
|
|
127
|
+
const resolvedCheckpointer = checkpointer ?? SqliteSaver.fromConnString(':memory:');
|
|
74
128
|
|
|
75
129
|
// 人机交互配置
|
|
76
130
|
const interruptOn = enableInterrupt
|
|
@@ -84,7 +138,7 @@ export function createDeepSpiderAgent(options = {}) {
|
|
|
84
138
|
const subagentDefaultMiddleware = [
|
|
85
139
|
todoListMiddleware(),
|
|
86
140
|
createFilesystemMiddleware({ backend }),
|
|
87
|
-
summarizationMiddleware({ model:
|
|
141
|
+
summarizationMiddleware({ model: summaryLlm, trigger: { tokens: 100000 }, keep: { messages: 6 } }),
|
|
88
142
|
anthropicPromptCachingMiddleware({ unsupportedModelBehavior: 'ignore' }),
|
|
89
143
|
createPatchToolCallsMiddleware(),
|
|
90
144
|
];
|
|
@@ -107,7 +161,10 @@ export function createDeepSpiderAgent(options = {}) {
|
|
|
107
161
|
generalPurposeAgent: false,
|
|
108
162
|
defaultInterruptOn: interruptOn,
|
|
109
163
|
}),
|
|
110
|
-
|
|
164
|
+
// === 预警 + 拦截(在 summarization 之前)===
|
|
165
|
+
createMemoryFlushMiddleware(),
|
|
166
|
+
createToolAvailabilityMiddleware(),
|
|
167
|
+
summarizationMiddleware({ model: summaryLlm, trigger: { tokens: 100000 }, keep: { messages: 6 } }),
|
|
111
168
|
anthropicPromptCachingMiddleware({ unsupportedModelBehavior: 'ignore' }),
|
|
112
169
|
createPatchToolCallsMiddleware(),
|
|
113
170
|
// === HITL(如果启用)===
|
|
@@ -122,15 +179,16 @@ export function createDeepSpiderAgent(options = {}) {
|
|
|
122
179
|
},
|
|
123
180
|
}),
|
|
124
181
|
createToolGuardMiddleware(),
|
|
182
|
+
createToolCallLimitMiddleware(200),
|
|
125
183
|
createFilterToolsMiddleware(),
|
|
126
184
|
createValidationWorkflowMiddleware(),
|
|
127
|
-
createReportMiddleware({ onReportReady }),
|
|
185
|
+
createReportMiddleware({ onReportReady, onFileSaved }),
|
|
128
186
|
],
|
|
129
|
-
checkpointer,
|
|
187
|
+
checkpointer: resolvedCheckpointer,
|
|
130
188
|
});
|
|
131
189
|
}
|
|
132
190
|
|
|
133
|
-
//
|
|
191
|
+
// 默认导出(内存模式,兼容 MCP 等非 CLI 场景)
|
|
134
192
|
export const agent = createDeepSpiderAgent();
|
|
135
193
|
|
|
136
194
|
export default agent;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - Memory Flush 中间件
|
|
3
|
+
* 在 summarization 触发前(85k token),注入 SystemMessage 提醒 Agent 保存关键进度
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { createMiddleware, countTokensApproximately } from 'langchain';
|
|
7
|
+
import { SystemMessage } from '@langchain/core/messages';
|
|
8
|
+
|
|
9
|
+
const FLUSH_THRESHOLD = 85000;
|
|
10
|
+
|
|
11
|
+
const FLUSH_REMINDER = `⚠️ 上下文即将被压缩(当前接近 token 上限)。
|
|
12
|
+
请立即使用 save_memo 工具保存以下关键信息,否则压缩后将丢失:
|
|
13
|
+
1. 当前分析目标和已完成的步骤
|
|
14
|
+
2. 已发现的关键参数、加密逻辑、请求链路
|
|
15
|
+
3. 下一步计划
|
|
16
|
+
|
|
17
|
+
保存后继续正常工作。`;
|
|
18
|
+
|
|
19
|
+
export function createMemoryFlushMiddleware() {
|
|
20
|
+
let flushed = false;
|
|
21
|
+
|
|
22
|
+
return createMiddleware({
|
|
23
|
+
name: 'memoryFlushMiddleware',
|
|
24
|
+
|
|
25
|
+
beforeModel: async (state) => {
|
|
26
|
+
const tokens = countTokensApproximately(state.messages);
|
|
27
|
+
|
|
28
|
+
// token 骤降(summarization 已执行),重置标记
|
|
29
|
+
if (flushed && tokens < FLUSH_THRESHOLD * 0.5) {
|
|
30
|
+
flushed = false;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// 达到阈值且未提醒过,注入提醒
|
|
34
|
+
if (!flushed && tokens >= FLUSH_THRESHOLD) {
|
|
35
|
+
flushed = true;
|
|
36
|
+
return {
|
|
37
|
+
...state,
|
|
38
|
+
messages: [
|
|
39
|
+
...state.messages,
|
|
40
|
+
new SystemMessage(FLUSH_REMINDER),
|
|
41
|
+
],
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return state;
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
}
|