deepspider 0.2.11 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -24
- package/bin/cli.js +45 -0
- package/package.json +10 -4
- package/src/agent/core/PanelBridge.js +133 -0
- package/src/agent/core/RetryManager.js +51 -0
- package/src/agent/core/StreamHandler.js +263 -0
- package/src/agent/core/index.js +7 -0
- package/src/agent/errors/ErrorClassifier.js +43 -0
- package/src/agent/errors/SpiderError.js +68 -0
- package/src/agent/errors/index.js +19 -0
- package/src/agent/run.js +67 -460
- package/src/agent/setup.js +14 -14
- package/src/agent/subagents/factory.js +60 -0
- package/src/agent/subagents/index.js +3 -0
- package/src/agent/tools/report.js +36 -4
- package/src/browser/client.js +47 -10
- package/src/cli/commands/config.js +94 -0
- package/src/cli/commands/help.js +34 -0
- package/src/cli/commands/update.js +78 -0
- package/src/cli/commands/version.js +9 -0
- package/src/cli/config.js +15 -0
- package/src/config/settings.js +102 -0
- package/.claude/agents/check.md +0 -122
- package/.claude/agents/debug.md +0 -106
- package/.claude/agents/dispatch.md +0 -214
- package/.claude/agents/implement.md +0 -96
- package/.claude/agents/plan.md +0 -396
- package/.claude/agents/research.md +0 -120
- package/.claude/commands/evolve/merge.md +0 -80
- package/.claude/commands/trellis/before-backend-dev.md +0 -13
- package/.claude/commands/trellis/before-frontend-dev.md +0 -13
- package/.claude/commands/trellis/break-loop.md +0 -107
- package/.claude/commands/trellis/check-backend.md +0 -13
- package/.claude/commands/trellis/check-cross-layer.md +0 -153
- package/.claude/commands/trellis/check-frontend.md +0 -13
- package/.claude/commands/trellis/create-command.md +0 -154
- package/.claude/commands/trellis/finish-work.md +0 -129
- package/.claude/commands/trellis/integrate-skill.md +0 -219
- package/.claude/commands/trellis/onboard.md +0 -358
- package/.claude/commands/trellis/parallel.md +0 -193
- package/.claude/commands/trellis/record-session.md +0 -62
- package/.claude/commands/trellis/start.md +0 -280
- package/.claude/commands/trellis/update-spec.md +0 -213
- package/.claude/hooks/inject-subagent-context.py +0 -758
- package/.claude/hooks/ralph-loop.py +0 -374
- package/.claude/hooks/session-start.py +0 -126
- package/.claude/settings.json +0 -41
- package/.claude/skills/deepagents-guide/SKILL.md +0 -428
- package/.cursor/commands/trellis-before-backend-dev.md +0 -13
- package/.cursor/commands/trellis-before-frontend-dev.md +0 -13
- package/.cursor/commands/trellis-break-loop.md +0 -107
- package/.cursor/commands/trellis-check-backend.md +0 -13
- package/.cursor/commands/trellis-check-cross-layer.md +0 -153
- package/.cursor/commands/trellis-check-frontend.md +0 -13
- package/.cursor/commands/trellis-create-command.md +0 -154
- package/.cursor/commands/trellis-finish-work.md +0 -129
- package/.cursor/commands/trellis-integrate-skill.md +0 -219
- package/.cursor/commands/trellis-onboard.md +0 -358
- package/.cursor/commands/trellis-record-session.md +0 -62
- package/.cursor/commands/trellis-start.md +0 -156
- package/.cursor/commands/trellis-update-spec.md +0 -213
- package/.github/workflows/publish.yml +0 -63
- package/.husky/pre-commit +0 -1
- package/.mcp.json +0 -8
- package/.trellis/.template-hashes.json +0 -65
- package/.trellis/.version +0 -1
- package/.trellis/scripts/add-session.sh +0 -384
- package/.trellis/scripts/common/developer.sh +0 -129
- package/.trellis/scripts/common/git-context.sh +0 -263
- package/.trellis/scripts/common/paths.sh +0 -208
- package/.trellis/scripts/common/phase.sh +0 -150
- package/.trellis/scripts/common/registry.sh +0 -247
- package/.trellis/scripts/common/task-queue.sh +0 -142
- package/.trellis/scripts/common/task-utils.sh +0 -151
- package/.trellis/scripts/common/worktree.sh +0 -128
- package/.trellis/scripts/create-bootstrap.sh +0 -299
- package/.trellis/scripts/get-context.sh +0 -7
- package/.trellis/scripts/get-developer.sh +0 -15
- package/.trellis/scripts/init-developer.sh +0 -34
- package/.trellis/scripts/multi-agent/cleanup.sh +0 -396
- package/.trellis/scripts/multi-agent/create-pr.sh +0 -241
- package/.trellis/scripts/multi-agent/plan.sh +0 -207
- package/.trellis/scripts/multi-agent/start.sh +0 -310
- package/.trellis/scripts/multi-agent/status.sh +0 -828
- package/.trellis/scripts/task.sh +0 -1118
- package/.trellis/spec/backend/ci-cd-guidelines.md +0 -73
- package/.trellis/spec/backend/deepagents-guide.md +0 -380
- package/.trellis/spec/backend/directory-structure.md +0 -126
- package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +0 -11
- package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +0 -20
- package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +0 -13
- package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +0 -19
- package/.trellis/spec/backend/hook-guidelines.md +0 -218
- package/.trellis/spec/backend/index.md +0 -37
- package/.trellis/spec/backend/quality-guidelines.md +0 -302
- package/.trellis/spec/backend/state-management.md +0 -76
- package/.trellis/spec/backend/tool-guidelines.md +0 -144
- package/.trellis/spec/backend/type-safety.md +0 -71
- package/.trellis/spec/guides/code-reuse-thinking-guide.md +0 -92
- package/.trellis/spec/guides/cross-layer-thinking-guide.md +0 -94
- package/.trellis/spec/guides/index.md +0 -79
- package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +0 -61
- package/.trellis/tasks/archive/02-02-evolving-skills/task.json +0 -29
- package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +0 -86
- package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +0 -27
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +0 -3
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +0 -2
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +0 -5
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +0 -33
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +0 -41
- package/.trellis/workflow.md +0 -407
- package/.trellis/workspace/index.md +0 -123
- package/.trellis/workspace/pony/index.md +0 -42
- package/.trellis/workspace/pony/journal-1.md +0 -125
- package/.trellis/worktree.yaml +0 -47
- package/AGENTS.md +0 -18
- package/CLAUDE.md +0 -315
- package/agents/deepspider.md +0 -142
- package/docs/DEBUG.md +0 -42
- package/docs/GUIDE.md +0 -334
- package/docs/PROMPT.md +0 -60
- package/docs/USAGE.md +0 -226
- package/eslint.config.js +0 -51
- package/test/analyze.test.js +0 -90
- package/test/envdump.test.js +0 -74
- package/test/flow.test.js +0 -90
- package/test/hooks.test.js +0 -138
- package/test/plugin.test.js +0 -35
- package/test/refactor-full.test.js +0 -30
- package/test/refactor.test.js +0 -21
- package/test/samples/obfuscated.js +0 -61
- package/test/samples/original.js +0 -66
- package/test/samples/v10_eval_chain.js +0 -52
- package/test/samples/v11_bytecode_vm.js +0 -81
- package/test/samples/v12_polymorphic.js +0 -69
- package/test/samples/v1_ob_basic.js +0 -98
- package/test/samples/v2_ob_advanced.js +0 -99
- package/test/samples/v3_jjencode.js +0 -77
- package/test/samples/v4_aaencode.js +0 -73
- package/test/samples/v5_control_flow.js +0 -86
- package/test/samples/v6_string_encryption.js +0 -71
- package/test/samples/v7_jsvmp.js +0 -83
- package/test/samples/v8_anti_debug.js +0 -79
- package/test/samples/v9_proxy_trap.js +0 -49
- package/test/samples.test.js +0 -96
- package/test/webcrack.test.js +0 -55
package/src/agent/run.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
1
|
/**
|
|
3
|
-
* DeepSpider Agent
|
|
2
|
+
* DeepSpider Agent 运行模块
|
|
4
3
|
* 使用 CDP binding 接收浏览器消息
|
|
5
4
|
* 支持流式输出显示思考过程
|
|
5
|
+
*
|
|
6
|
+
* 所有状态初始化延迟到 init() 中执行,避免 import 时产生副作用
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
|
-
import 'dotenv/config';
|
|
9
9
|
import readline from 'readline';
|
|
10
10
|
import { readFileSync } from 'fs';
|
|
11
11
|
import { marked } from 'marked';
|
|
@@ -16,445 +16,16 @@ import { markHookInjected } from './tools/runtime.js';
|
|
|
16
16
|
import { createLogger } from './logger.js';
|
|
17
17
|
import { browserTools } from './tools/browser.js';
|
|
18
18
|
import { ensureConfig } from './setup.js';
|
|
19
|
+
import { StreamHandler, PanelBridge } from './core/index.js';
|
|
19
20
|
|
|
20
|
-
|
|
21
|
-
const targetUrl = args.find(arg => arg.startsWith('http://') || arg.startsWith('https://'));
|
|
22
|
-
|
|
23
|
-
const rl = readline.createInterface({
|
|
24
|
-
input: process.stdin,
|
|
25
|
-
output: process.stdout,
|
|
26
|
-
});
|
|
27
|
-
|
|
21
|
+
let rl = null;
|
|
28
22
|
let browser = null;
|
|
29
|
-
let
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
const DEBUG = process.env.DEBUG === 'true' || process.argv.includes('--debug');
|
|
36
|
-
|
|
37
|
-
// 重试配置
|
|
38
|
-
const RETRY_CONFIG = {
|
|
39
|
-
maxRetries: 3,
|
|
40
|
-
baseDelayMs: 2000,
|
|
41
|
-
maxDelayMs: 30000,
|
|
42
|
-
};
|
|
43
|
-
|
|
44
|
-
// 人工介入配置
|
|
45
|
-
const INTERVENTION_CONFIG = {
|
|
46
|
-
idleTimeoutMs: 120000, // 2分钟无响应触发提示
|
|
47
|
-
checkIntervalMs: 30000, // 30秒检测一次
|
|
48
|
-
// 从 browserTools 获取可能触发风控的工具名称
|
|
49
|
-
riskTools: browserTools.map(t => t.name),
|
|
50
|
-
};
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* 判断是否为工具参数错误(需要 LLM 修正)
|
|
54
|
-
*/
|
|
55
|
-
function isToolSchemaError(errMsg) {
|
|
56
|
-
return /did not match expected schema|Invalid input|tool input/i.test(errMsg);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* 判断是否为 API 服务错误(可直接重试)
|
|
61
|
-
*/
|
|
62
|
-
function isApiServiceError(errMsg) {
|
|
63
|
-
return /503|502|429|rate limit|无可用渠道|timeout|ECONNRESET|ETIMEDOUT/i.test(errMsg);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* 计算重试延迟(指数退避 + 抖动)
|
|
68
|
-
*/
|
|
69
|
-
function getRetryDelay(retryCount) {
|
|
70
|
-
const delay = Math.min(
|
|
71
|
-
RETRY_CONFIG.baseDelayMs * Math.pow(2, retryCount),
|
|
72
|
-
RETRY_CONFIG.maxDelayMs
|
|
73
|
-
);
|
|
74
|
-
// 添加 0-25% 的随机抖动
|
|
75
|
-
const jitter = delay * Math.random() * 0.25;
|
|
76
|
-
return Math.round(delay + jitter);
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* 延迟函数
|
|
81
|
-
*/
|
|
82
|
-
function sleep(ms) {
|
|
83
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
// DeepSeek 特殊标记清理
|
|
87
|
-
const DSML_PATTERN = /|DSML|/g;
|
|
88
|
-
function cleanDSML(text) {
|
|
89
|
-
return text ? text.replace(DSML_PATTERN, '') : text;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// 创建日志回调
|
|
93
|
-
const logger = createLogger({ enabled: DEBUG, verbose: false });
|
|
94
|
-
|
|
95
|
-
/**
|
|
96
|
-
* 报告就绪回调 - 由中间件在 afterAgent 时调用
|
|
97
|
-
*/
|
|
98
|
-
async function onReportReady(mdFilePath) {
|
|
99
|
-
console.log('[report] 中间件触发报告显示:', mdFilePath);
|
|
100
|
-
await showReportFromFile(mdFilePath);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// 创建 Agent,传入报告回调
|
|
104
|
-
const agent = createDeepSpiderAgent({ onReportReady });
|
|
105
|
-
|
|
106
|
-
const config = {
|
|
107
|
-
configurable: { thread_id: `deepspider-${Date.now()}` },
|
|
108
|
-
recursionLimit: 5000,
|
|
109
|
-
callbacks: logger ? [logger] : [],
|
|
110
|
-
};
|
|
111
|
-
|
|
112
|
-
// 文本累积缓冲区 - 用于累积 LLM 流式输出
|
|
113
|
-
let panelTextBuffer = '';
|
|
114
|
-
let hasStartedAssistantMsg = false;
|
|
115
|
-
|
|
116
|
-
function debug(...args) {
|
|
117
|
-
if (DEBUG) {
|
|
118
|
-
console.log('[DEBUG]', ...args);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
/**
|
|
123
|
-
* 发送消息到前端面板
|
|
124
|
-
*/
|
|
125
|
-
async function sendToPanel(role, content) {
|
|
126
|
-
if (!content?.trim()) return;
|
|
127
|
-
|
|
128
|
-
const page = browser?.getPage?.();
|
|
129
|
-
if (!page) return;
|
|
130
|
-
|
|
131
|
-
try {
|
|
132
|
-
const escaped = JSON.stringify(content.trim());
|
|
133
|
-
const code = `window.__deepspider__?.addMessage?.('${role}', ${escaped})`;
|
|
134
|
-
await evaluateInPage(code);
|
|
135
|
-
} catch (e) {
|
|
136
|
-
// ignore
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
/**
|
|
141
|
-
* 累积文本到缓冲区(用于 LLM 流式输出)
|
|
142
|
-
*/
|
|
143
|
-
async function appendToPanel(text) {
|
|
144
|
-
if (!text) return;
|
|
145
|
-
panelTextBuffer += text;
|
|
146
|
-
|
|
147
|
-
// 每累积一定量或遇到换行时刷新
|
|
148
|
-
if (panelTextBuffer.length > 200 || text.includes('\n')) {
|
|
149
|
-
await flushPanelText();
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* 通过 CDP 在页面主世界执行 JavaScript(复用 session)
|
|
155
|
-
*/
|
|
156
|
-
async function evaluateInPage(code) {
|
|
157
|
-
const cdp = await browser?.getCDPSession?.();
|
|
158
|
-
if (!cdp) return null;
|
|
159
|
-
|
|
160
|
-
try {
|
|
161
|
-
const result = await cdp.send('Runtime.evaluate', {
|
|
162
|
-
expression: code,
|
|
163
|
-
returnByValue: true,
|
|
164
|
-
});
|
|
165
|
-
return result.result?.value;
|
|
166
|
-
} catch (e) {
|
|
167
|
-
debug('evaluateInPage 失败:', e.message);
|
|
168
|
-
return null;
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
/**
|
|
173
|
-
* 刷新累积的文本到面板
|
|
174
|
-
*/
|
|
175
|
-
async function flushPanelText() {
|
|
176
|
-
if (!panelTextBuffer.trim()) return;
|
|
177
|
-
|
|
178
|
-
const page = browser?.getPage?.();
|
|
179
|
-
if (!page) {
|
|
180
|
-
panelTextBuffer = '';
|
|
181
|
-
return;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
try {
|
|
185
|
-
const content = panelTextBuffer.trim();
|
|
186
|
-
const escaped = JSON.stringify(content);
|
|
187
|
-
|
|
188
|
-
if (!hasStartedAssistantMsg) {
|
|
189
|
-
const code = `(function() {
|
|
190
|
-
const fn = window.__deepspider__?.addMessage;
|
|
191
|
-
if (typeof fn === 'function') {
|
|
192
|
-
fn('assistant', ${escaped});
|
|
193
|
-
return { ok: true };
|
|
194
|
-
}
|
|
195
|
-
return { ok: false };
|
|
196
|
-
})()`;
|
|
197
|
-
await evaluateInPage(code);
|
|
198
|
-
hasStartedAssistantMsg = true;
|
|
199
|
-
} else {
|
|
200
|
-
const code = `(function() {
|
|
201
|
-
const fn = window.__deepspider__?.appendToLastMessage;
|
|
202
|
-
if (typeof fn === 'function') {
|
|
203
|
-
fn('assistant', ${escaped});
|
|
204
|
-
return { ok: true };
|
|
205
|
-
}
|
|
206
|
-
return { ok: false };
|
|
207
|
-
})()`;
|
|
208
|
-
await evaluateInPage(code);
|
|
209
|
-
}
|
|
210
|
-
} catch (e) {
|
|
211
|
-
// ignore
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
panelTextBuffer = '';
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
/**
|
|
218
|
-
* 流式对话 - 显示思考过程(带重试)
|
|
219
|
-
*/
|
|
220
|
-
async function chatStream(input, page = null, retryCount = 0) {
|
|
221
|
-
currentPage = page;
|
|
222
|
-
let finalResponse = '';
|
|
223
|
-
let lastEventTime = Date.now();
|
|
224
|
-
let eventCount = 0;
|
|
225
|
-
let lastToolCall = null;
|
|
226
|
-
|
|
227
|
-
// 重置面板状态
|
|
228
|
-
panelTextBuffer = '';
|
|
229
|
-
hasStartedAssistantMsg = false;
|
|
230
|
-
|
|
231
|
-
// 设置忙碌状态
|
|
232
|
-
await evaluateInPage('window.__deepspider__?.setBusy?.(true)');
|
|
233
|
-
|
|
234
|
-
debug(`chatStream: 开始处理, 输入长度=${input.length}, page=${!!page}`);
|
|
235
|
-
|
|
236
|
-
// 心跳检测 - 每30秒输出状态
|
|
237
|
-
let interventionNotified = false;
|
|
238
|
-
const heartbeat = setInterval(() => {
|
|
239
|
-
const elapsed = Math.round((Date.now() - lastEventTime) / 1000);
|
|
240
|
-
if (elapsed > 30) {
|
|
241
|
-
console.log(`\n[心跳] 已等待 ${elapsed}s, 事件数=${eventCount}, 最后工具=${lastToolCall || '无'}`);
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
// 超时提示 - 只在风险工具调用后提示
|
|
245
|
-
const isRiskTool = lastToolCall && INTERVENTION_CONFIG.riskTools.includes(lastToolCall);
|
|
246
|
-
if (elapsed * 1000 > INTERVENTION_CONFIG.idleTimeoutMs && !interventionNotified && isRiskTool) {
|
|
247
|
-
interventionNotified = true;
|
|
248
|
-
const msg = '⚠️ 页面操作后长时间无响应,可能遇到验证码或风控,请检查浏览器';
|
|
249
|
-
console.log('\n[提示] ' + msg);
|
|
250
|
-
sendToPanel('system', msg).catch(() => {});
|
|
251
|
-
}
|
|
252
|
-
}, INTERVENTION_CONFIG.checkIntervalMs);
|
|
253
|
-
|
|
254
|
-
try {
|
|
255
|
-
debug('chatStream: 创建事件流');
|
|
256
|
-
const eventStream = await agent.streamEvents(
|
|
257
|
-
{ messages: [{ role: 'user', content: input }] },
|
|
258
|
-
{ ...config, version: 'v2' }
|
|
259
|
-
);
|
|
260
|
-
|
|
261
|
-
debug('chatStream: 开始遍历事件');
|
|
262
|
-
for await (const event of eventStream) {
|
|
263
|
-
lastEventTime = Date.now();
|
|
264
|
-
eventCount++;
|
|
265
|
-
|
|
266
|
-
// 记录工具调用
|
|
267
|
-
if (event.event === 'on_tool_start') {
|
|
268
|
-
lastToolCall = event.name;
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
await handleStreamEvent(event);
|
|
272
|
-
|
|
273
|
-
// 收集最终响应
|
|
274
|
-
if (event.event === 'on_chat_model_end' && event.name === 'ChatOpenAI') {
|
|
275
|
-
const output = event.data?.output;
|
|
276
|
-
if (output?.content) {
|
|
277
|
-
finalResponse = output.content;
|
|
278
|
-
debug(`chatStream: 收到最终响应, 长度=${finalResponse.length}`);
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
// 流正常结束
|
|
284
|
-
clearInterval(heartbeat);
|
|
285
|
-
console.log(`\n[完成] 共处理 ${eventCount} 个事件`);
|
|
286
|
-
|
|
287
|
-
// 刷新剩余的累积内容到面板
|
|
288
|
-
debug('chatStream: 刷新剩余内容');
|
|
289
|
-
await flushPanelText();
|
|
290
|
-
|
|
291
|
-
// 流式输出完成,触发 Markdown 渲染
|
|
292
|
-
await evaluateInPage('window.__deepspider__?.finalizeMessage?.("assistant")');
|
|
293
|
-
|
|
294
|
-
// 清除忙碌状态
|
|
295
|
-
await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
|
|
296
|
-
|
|
297
|
-
debug(`chatStream: 完成, 响应长度=${finalResponse.length}`);
|
|
298
|
-
return finalResponse || '[无响应]';
|
|
299
|
-
} catch (error) {
|
|
300
|
-
clearInterval(heartbeat);
|
|
301
|
-
const errMsg = error.message || String(error);
|
|
302
|
-
|
|
303
|
-
// 清除忙碌状态
|
|
304
|
-
await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
|
|
305
|
-
|
|
306
|
-
console.error(`\n[异常] 事件数=${eventCount}, 最后工具=${lastToolCall || '无'}, 错误: ${errMsg}`);
|
|
307
|
-
|
|
308
|
-
// 检查是否可重试
|
|
309
|
-
if (retryCount < RETRY_CONFIG.maxRetries) {
|
|
310
|
-
// API 服务错误 - 从检查点恢复
|
|
311
|
-
if (isApiServiceError(errMsg)) {
|
|
312
|
-
const delay = getRetryDelay(retryCount);
|
|
313
|
-
console.log(`\n[重试 ${retryCount + 1}/${RETRY_CONFIG.maxRetries}] API错误,${delay}ms 后从检查点恢复...`);
|
|
314
|
-
await sendToPanel('system', `服务暂时不可用,${Math.round(delay/1000)}s 后重试 (${retryCount + 1}/${RETRY_CONFIG.maxRetries})`);
|
|
315
|
-
await sleep(delay);
|
|
316
|
-
// 从检查点恢复:不传入新消息,使用相同 thread_id
|
|
317
|
-
return chatStreamResume(page, retryCount + 1);
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
// 工具参数错误 - 发送错误信息让 LLM 修正
|
|
321
|
-
if (isToolSchemaError(errMsg)) {
|
|
322
|
-
console.log(`\n[重试 ${retryCount + 1}/${RETRY_CONFIG.maxRetries}] 工具参数错误,发送修正请求...`);
|
|
323
|
-
await sendToPanel('system', `工具调用失败,正在修正 (${retryCount + 1}/${RETRY_CONFIG.maxRetries})`);
|
|
324
|
-
const resumeInput = `工具调用失败: ${errMsg}\n请检查参数格式并重试。`;
|
|
325
|
-
return chatStream(resumeInput, page, retryCount + 1);
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
return `错误: ${errMsg}`;
|
|
330
|
-
}
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
/**
|
|
334
|
-
* 从检查点恢复流式对话
|
|
335
|
-
* 不传入新消息,使用相同 thread_id 从上次中断处继续
|
|
336
|
-
*/
|
|
337
|
-
async function chatStreamResume(page = null, retryCount = 0) {
|
|
338
|
-
currentPage = page;
|
|
339
|
-
let finalResponse = '';
|
|
340
|
-
let lastEventTime = Date.now();
|
|
341
|
-
let eventCount = 0;
|
|
342
|
-
|
|
343
|
-
await evaluateInPage('window.__deepspider__?.setBusy?.(true)');
|
|
344
|
-
debug(`chatStreamResume: 从检查点恢复, retryCount=${retryCount}`);
|
|
345
|
-
|
|
346
|
-
const heartbeat = setInterval(() => {
|
|
347
|
-
const elapsed = Math.round((Date.now() - lastEventTime) / 1000);
|
|
348
|
-
if (elapsed > 30) {
|
|
349
|
-
console.log(`\n[心跳] 恢复中,已等待 ${elapsed}s`);
|
|
350
|
-
}
|
|
351
|
-
}, 30000);
|
|
352
|
-
|
|
353
|
-
try {
|
|
354
|
-
// 从检查点恢复:传入 null 或空消息
|
|
355
|
-
const eventStream = await agent.streamEvents(
|
|
356
|
-
{ messages: [] },
|
|
357
|
-
{ ...config, version: 'v2' }
|
|
358
|
-
);
|
|
359
|
-
|
|
360
|
-
for await (const event of eventStream) {
|
|
361
|
-
lastEventTime = Date.now();
|
|
362
|
-
eventCount++;
|
|
363
|
-
await handleStreamEvent(event);
|
|
364
|
-
|
|
365
|
-
if (event.event === 'on_chat_model_end' && event.name === 'ChatOpenAI') {
|
|
366
|
-
const output = event.data?.output;
|
|
367
|
-
if (output?.content) {
|
|
368
|
-
finalResponse = output.content;
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
clearInterval(heartbeat);
|
|
374
|
-
await flushPanelText();
|
|
375
|
-
await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
|
|
376
|
-
console.log(`\n[恢复完成] 共处理 ${eventCount} 个事件`);
|
|
377
|
-
return finalResponse || '[无响应]';
|
|
378
|
-
} catch (error) {
|
|
379
|
-
clearInterval(heartbeat);
|
|
380
|
-
await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
|
|
381
|
-
const errMsg = error.message || String(error);
|
|
382
|
-
console.error(`\n[恢复失败] ${errMsg}`);
|
|
383
|
-
|
|
384
|
-
// 恢复失败也可以重试
|
|
385
|
-
if (isApiServiceError(errMsg) && retryCount < RETRY_CONFIG.maxRetries) {
|
|
386
|
-
const delay = getRetryDelay(retryCount);
|
|
387
|
-
console.log(`\n[重试 ${retryCount + 1}/${RETRY_CONFIG.maxRetries}] ${delay}ms 后再次恢复...`);
|
|
388
|
-
await sleep(delay);
|
|
389
|
-
return chatStreamResume(page, retryCount + 1);
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
return `恢复失败: ${errMsg}`;
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
/**
|
|
397
|
-
* 处理流式事件
|
|
398
|
-
*/
|
|
399
|
-
async function handleStreamEvent(event) {
|
|
400
|
-
const { event: eventType, name, data } = event;
|
|
401
|
-
|
|
402
|
-
// 过滤内部事件
|
|
403
|
-
if (name?.startsWith('ChannelWrite') ||
|
|
404
|
-
name?.startsWith('Branch') ||
|
|
405
|
-
name?.includes('Middleware') ||
|
|
406
|
-
name === 'RunnableSequence' ||
|
|
407
|
-
name === 'model_request' ||
|
|
408
|
-
name === 'tools') {
|
|
409
|
-
return;
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
debug(`handleStreamEvent: ${eventType}, name=${name}`);
|
|
413
|
-
|
|
414
|
-
switch (eventType) {
|
|
415
|
-
case 'on_chat_model_stream':
|
|
416
|
-
// LLM 输出流 - 清理 DeepSeek 特殊标记
|
|
417
|
-
let chunk = data?.chunk?.content;
|
|
418
|
-
if (chunk && typeof chunk === 'string') {
|
|
419
|
-
chunk = cleanDSML(chunk);
|
|
420
|
-
process.stdout.write(chunk);
|
|
421
|
-
await appendToPanel(chunk); // 累积发送到面板
|
|
422
|
-
}
|
|
423
|
-
break;
|
|
424
|
-
|
|
425
|
-
case 'on_tool_start':
|
|
426
|
-
// 工具调用开始
|
|
427
|
-
debug('handleStreamEvent: 工具开始,先刷新缓冲区');
|
|
428
|
-
await flushPanelText();
|
|
429
|
-
// 重置标志,让工具调用后的 AI 输出创建新消息
|
|
430
|
-
hasStartedAssistantMsg = false;
|
|
431
|
-
const input = data?.input || {};
|
|
432
|
-
const inputStr = typeof input === 'string' ? input : JSON.stringify(input);
|
|
433
|
-
const preview = inputStr.length > 100 ? inputStr.slice(0, 100) + '...' : inputStr;
|
|
434
|
-
console.log(`\n[调用] ${name}(${preview})`);
|
|
435
|
-
await sendToPanel('system', `[调用] ${name}`);
|
|
436
|
-
break;
|
|
437
|
-
|
|
438
|
-
case 'on_tool_end':
|
|
439
|
-
// 工具调用结束
|
|
440
|
-
const output = data?.output;
|
|
441
|
-
let result = '';
|
|
442
|
-
|
|
443
|
-
// 调试:打印完整的事件结构
|
|
444
|
-
debug(`on_tool_end: name=${name}, output type=${typeof output}, keys=${output ? Object.keys(output) : 'null'}`);
|
|
445
|
-
|
|
446
|
-
if (typeof output === 'string') {
|
|
447
|
-
result = output.slice(0, 80);
|
|
448
|
-
} else if (output?.content) {
|
|
449
|
-
result = String(output.content).slice(0, 80);
|
|
450
|
-
}
|
|
451
|
-
if (result) {
|
|
452
|
-
console.log(`[结果] ${result}${result.length >= 80 ? '...' : ''}`);
|
|
453
|
-
await sendToPanel('system', `[结果] ${result.slice(0, 50)}${result.length > 50 ? '...' : ''}`);
|
|
454
|
-
}
|
|
455
|
-
break;
|
|
456
|
-
}
|
|
457
|
-
}
|
|
23
|
+
let streamHandler = null;
|
|
24
|
+
let targetUrl = null;
|
|
25
|
+
let DEBUG = false;
|
|
26
|
+
let debugFn = () => {};
|
|
27
|
+
let agent = null;
|
|
28
|
+
let agentConfig = null;
|
|
458
29
|
|
|
459
30
|
/**
|
|
460
31
|
* 从文件显示报告(由中间件回调触发)
|
|
@@ -470,11 +41,15 @@ async function showReportFromFile(mdFilePath) {
|
|
|
470
41
|
const content = readFileSync(mdFilePath, 'utf-8');
|
|
471
42
|
console.log('[report] 读取 MD 文件成功, 长度:', content.length);
|
|
472
43
|
|
|
473
|
-
// 使用 marked 转换为 HTML
|
|
474
44
|
const htmlContent = marked.parse(content);
|
|
475
45
|
const escaped = JSON.stringify(htmlContent);
|
|
476
|
-
const
|
|
477
|
-
|
|
46
|
+
const cdp = await browser?.getCDPSession?.();
|
|
47
|
+
if (cdp) {
|
|
48
|
+
await cdp.send('Runtime.evaluate', {
|
|
49
|
+
expression: `window.__deepspider__?.showReport?.(${escaped}, true)`,
|
|
50
|
+
returnByValue: true,
|
|
51
|
+
});
|
|
52
|
+
}
|
|
478
53
|
console.log('[report] 已显示分析报告');
|
|
479
54
|
} catch (e) {
|
|
480
55
|
console.log('[report] showReportFromFile 失败:', e.message);
|
|
@@ -485,14 +60,12 @@ async function showReportFromFile(mdFilePath) {
|
|
|
485
60
|
* 处理浏览器消息(通过 CDP binding 接收)
|
|
486
61
|
*/
|
|
487
62
|
async function handleBrowserMessage(data, page) {
|
|
488
|
-
|
|
63
|
+
debugFn(`handleBrowserMessage: 收到消息, type=${data.type}, page=${!!page}`);
|
|
489
64
|
|
|
490
|
-
// 添加浏览器已就绪前缀,告诉 Agent 不需要再启动浏览器
|
|
491
65
|
const browserReadyPrefix = '[浏览器已就绪] ';
|
|
492
66
|
|
|
493
67
|
let userPrompt;
|
|
494
68
|
if (data.type === 'analysis') {
|
|
495
|
-
// 处理多元素选择
|
|
496
69
|
const elements = data.elements || [{ text: data.text, xpath: data.xpath, iframeSrc: data.iframeSrc }];
|
|
497
70
|
const elementsDesc = elements.map((el, i) =>
|
|
498
71
|
`${i + 1}. "${el.text?.slice(0, 100) || ''}"\n XPath: ${el.xpath}${el.iframeSrc ? `\n iframe: ${el.iframeSrc}` : ''}`
|
|
@@ -506,7 +79,6 @@ ${elementsDesc}${supplementText}
|
|
|
506
79
|
|
|
507
80
|
${fullAnalysisPrompt}`;
|
|
508
81
|
} else if (data.type === 'generate-config') {
|
|
509
|
-
// 生成爬虫配置 - 使用 crawler 子代理
|
|
510
82
|
const config = data.config;
|
|
511
83
|
userPrompt = `${browserReadyPrefix}请使用 crawler 子代理生成爬虫。
|
|
512
84
|
|
|
@@ -517,7 +89,6 @@ ${JSON.stringify(config.fields, null, 2)}
|
|
|
517
89
|
|
|
518
90
|
请先用 query_store 查询已有的加密代码,然后整合生成配置和脚本。`;
|
|
519
91
|
} else if (data.type === 'chat') {
|
|
520
|
-
// 普通对话,可能带有已选元素作为上下文
|
|
521
92
|
if (data.elements && data.elements.length > 0) {
|
|
522
93
|
const elementsDesc = data.elements.map((el, i) =>
|
|
523
94
|
`${i + 1}. "${el.text?.slice(0, 100) || ''}"\n XPath: ${el.xpath}`
|
|
@@ -530,10 +101,8 @@ ${elementsDesc}`;
|
|
|
530
101
|
userPrompt = `${browserReadyPrefix}${data.text}`;
|
|
531
102
|
}
|
|
532
103
|
} else if (data.type === 'open-file') {
|
|
533
|
-
// 打开文件 - 使用系统默认程序
|
|
534
104
|
let filePath = data.path;
|
|
535
105
|
if (filePath && typeof filePath === 'string') {
|
|
536
|
-
// 展开 ~ 为 home 目录
|
|
537
106
|
if (filePath.startsWith('~/')) {
|
|
538
107
|
filePath = filePath.replace('~', process.env.HOME || process.env.USERPROFILE);
|
|
539
108
|
}
|
|
@@ -553,9 +122,8 @@ ${elementsDesc}`;
|
|
|
553
122
|
}
|
|
554
123
|
|
|
555
124
|
console.log('\n[浏览器] ' + (data.type === 'analysis' ? '分析请求' : data.type === 'generate-config' ? '生成配置' : '对话'));
|
|
556
|
-
await chatStream(userPrompt
|
|
125
|
+
await streamHandler.chatStream(userPrompt);
|
|
557
126
|
console.log('\n');
|
|
558
|
-
// 流式输出已经同步到面板,无需再次发送
|
|
559
127
|
process.stdout.write('> ');
|
|
560
128
|
}
|
|
561
129
|
|
|
@@ -572,16 +140,21 @@ function prompt() {
|
|
|
572
140
|
return;
|
|
573
141
|
}
|
|
574
142
|
|
|
575
|
-
await chatStream(input
|
|
143
|
+
await streamHandler.chatStream(input);
|
|
576
144
|
console.log('\n');
|
|
577
145
|
prompt();
|
|
578
146
|
});
|
|
579
147
|
}
|
|
580
148
|
|
|
581
149
|
async function init() {
|
|
582
|
-
|
|
150
|
+
// 解析参数(在 init 时才读取,避免与 CLI 路由层的 argv 冲突)
|
|
151
|
+
const args = process.argv.slice(2);
|
|
152
|
+
targetUrl = args.find(arg => arg.startsWith('http://') || arg.startsWith('https://'));
|
|
153
|
+
DEBUG = process.env.DEBUG === 'true' || args.includes('--debug');
|
|
154
|
+
debugFn = (...a) => { if (DEBUG) console.log('[DEBUG]', ...a); };
|
|
155
|
+
|
|
156
|
+
debugFn('init: 启动');
|
|
583
157
|
|
|
584
|
-
// 首次运行检测:确保环境变量已配置
|
|
585
158
|
if (!ensureConfig()) {
|
|
586
159
|
process.exit(1);
|
|
587
160
|
}
|
|
@@ -590,24 +163,58 @@ async function init() {
|
|
|
590
163
|
console.log('[DEBUG] 调试模式已启用');
|
|
591
164
|
}
|
|
592
165
|
|
|
166
|
+
// 创建 readline、logger、agent(全部延迟到 init)
|
|
167
|
+
rl = readline.createInterface({
|
|
168
|
+
input: process.stdin,
|
|
169
|
+
output: process.stdout,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
const logger = createLogger({ enabled: DEBUG, verbose: false });
|
|
173
|
+
|
|
174
|
+
async function onReportReady(mdFilePath) {
|
|
175
|
+
console.log('[report] 中间件触发报告显示:', mdFilePath);
|
|
176
|
+
await showReportFromFile(mdFilePath);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
agent = createDeepSpiderAgent({ onReportReady });
|
|
180
|
+
|
|
181
|
+
agentConfig = {
|
|
182
|
+
configurable: { thread_id: `deepspider-${Date.now()}` },
|
|
183
|
+
recursionLimit: 5000,
|
|
184
|
+
callbacks: logger ? [logger] : [],
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
// 初始化流处理器
|
|
188
|
+
const panelBridge = new PanelBridge(() => browser, debugFn);
|
|
189
|
+
streamHandler = new StreamHandler({
|
|
190
|
+
agent,
|
|
191
|
+
config: agentConfig,
|
|
192
|
+
panelBridge,
|
|
193
|
+
riskTools: browserTools.map(t => t.name),
|
|
194
|
+
debug: debugFn,
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
console.log('=== DeepSpider Agent ===');
|
|
198
|
+
console.log('智能爬虫 Agent,输入 exit 退出\n');
|
|
199
|
+
|
|
593
200
|
if (targetUrl) {
|
|
594
201
|
console.log(`正在打开: ${targetUrl}\n`);
|
|
595
202
|
try {
|
|
596
|
-
|
|
203
|
+
debugFn('init: 获取浏览器实例');
|
|
597
204
|
browser = await getBrowser();
|
|
598
205
|
browser.onMessage = handleBrowserMessage;
|
|
599
|
-
|
|
206
|
+
debugFn('init: 导航到目标URL');
|
|
600
207
|
await browser.navigate(targetUrl);
|
|
601
208
|
markHookInjected();
|
|
602
|
-
|
|
209
|
+
debugFn('init: 浏览器就绪');
|
|
603
210
|
console.log('浏览器已就绪,数据自动记录中');
|
|
604
211
|
console.log('点击面板选择按钮(⦿)选择数据进行分析\n');
|
|
605
212
|
} catch (error) {
|
|
606
213
|
console.error('启动浏览器失败:', error.message);
|
|
607
|
-
|
|
214
|
+
debugFn('init: 浏览器启动失败 -', error.stack);
|
|
608
215
|
}
|
|
609
216
|
}
|
|
610
217
|
prompt();
|
|
611
218
|
}
|
|
612
219
|
|
|
613
|
-
init
|
|
220
|
+
export { init };
|
package/src/agent/setup.js
CHANGED
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* DeepSpider 配置检测
|
|
3
|
-
*
|
|
3
|
+
* 环境变量 > 配置文件 > 默认值
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
+
import { getConfigValues } from '../config/settings.js';
|
|
7
|
+
|
|
6
8
|
/**
|
|
7
|
-
*
|
|
9
|
+
* 检查配置(合并环境变量和配置文件)
|
|
8
10
|
*/
|
|
9
11
|
export function checkEnvConfig() {
|
|
10
|
-
return
|
|
11
|
-
apiKey: process.env.DEEPSPIDER_API_KEY,
|
|
12
|
-
baseUrl: process.env.DEEPSPIDER_BASE_URL,
|
|
13
|
-
model: process.env.DEEPSPIDER_MODEL,
|
|
14
|
-
};
|
|
12
|
+
return getConfigValues();
|
|
15
13
|
}
|
|
16
14
|
|
|
17
15
|
/**
|
|
@@ -22,9 +20,9 @@ export function ensureConfig() {
|
|
|
22
20
|
const { apiKey, baseUrl, model } = checkEnvConfig();
|
|
23
21
|
const missing = [];
|
|
24
22
|
|
|
25
|
-
if (!apiKey) missing.push('DEEPSPIDER_API_KEY');
|
|
26
|
-
if (!baseUrl) missing.push('DEEPSPIDER_BASE_URL');
|
|
27
|
-
if (!model) missing.push('DEEPSPIDER_MODEL');
|
|
23
|
+
if (!apiKey) missing.push('apiKey (DEEPSPIDER_API_KEY)');
|
|
24
|
+
if (!baseUrl) missing.push('baseUrl (DEEPSPIDER_BASE_URL)');
|
|
25
|
+
if (!model) missing.push('model (DEEPSPIDER_MODEL)');
|
|
28
26
|
|
|
29
27
|
if (missing.length === 0) {
|
|
30
28
|
return true;
|
|
@@ -35,14 +33,16 @@ export function ensureConfig() {
|
|
|
35
33
|
|
|
36
34
|
配置方式(任选其一):
|
|
37
35
|
|
|
38
|
-
1.
|
|
36
|
+
1. 使用 deepspider config 命令:
|
|
37
|
+
deepspider config set apiKey sk-xxx
|
|
38
|
+
deepspider config set baseUrl https://api.openai.com/v1
|
|
39
|
+
deepspider config set model gpt-4o
|
|
40
|
+
|
|
41
|
+
2. 配置环境变量:
|
|
39
42
|
export DEEPSPIDER_API_KEY=sk-xxx
|
|
40
43
|
export DEEPSPIDER_BASE_URL=https://api.openai.com/v1
|
|
41
44
|
export DEEPSPIDER_MODEL=gpt-4o
|
|
42
45
|
|
|
43
|
-
2. 一行命令:
|
|
44
|
-
DEEPSPIDER_API_KEY=sk-xxx DEEPSPIDER_BASE_URL=https://api.openai.com/v1 DEEPSPIDER_MODEL=gpt-4o deepspider <url>
|
|
45
|
-
|
|
46
46
|
请根据提示补全配置后重试。
|
|
47
47
|
`);
|
|
48
48
|
|