deepspider 0.2.11 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/README.md +71 -24
  2. package/bin/cli.js +45 -0
  3. package/package.json +10 -4
  4. package/src/agent/core/PanelBridge.js +133 -0
  5. package/src/agent/core/RetryManager.js +51 -0
  6. package/src/agent/core/StreamHandler.js +263 -0
  7. package/src/agent/core/index.js +7 -0
  8. package/src/agent/errors/ErrorClassifier.js +43 -0
  9. package/src/agent/errors/SpiderError.js +68 -0
  10. package/src/agent/errors/index.js +19 -0
  11. package/src/agent/run.js +67 -460
  12. package/src/agent/setup.js +14 -14
  13. package/src/agent/subagents/factory.js +60 -0
  14. package/src/agent/subagents/index.js +3 -0
  15. package/src/agent/tools/report.js +36 -4
  16. package/src/browser/client.js +47 -10
  17. package/src/cli/commands/config.js +94 -0
  18. package/src/cli/commands/help.js +34 -0
  19. package/src/cli/commands/update.js +78 -0
  20. package/src/cli/commands/version.js +9 -0
  21. package/src/cli/config.js +15 -0
  22. package/src/config/settings.js +102 -0
  23. package/.claude/agents/check.md +0 -122
  24. package/.claude/agents/debug.md +0 -106
  25. package/.claude/agents/dispatch.md +0 -214
  26. package/.claude/agents/implement.md +0 -96
  27. package/.claude/agents/plan.md +0 -396
  28. package/.claude/agents/research.md +0 -120
  29. package/.claude/commands/evolve/merge.md +0 -80
  30. package/.claude/commands/trellis/before-backend-dev.md +0 -13
  31. package/.claude/commands/trellis/before-frontend-dev.md +0 -13
  32. package/.claude/commands/trellis/break-loop.md +0 -107
  33. package/.claude/commands/trellis/check-backend.md +0 -13
  34. package/.claude/commands/trellis/check-cross-layer.md +0 -153
  35. package/.claude/commands/trellis/check-frontend.md +0 -13
  36. package/.claude/commands/trellis/create-command.md +0 -154
  37. package/.claude/commands/trellis/finish-work.md +0 -129
  38. package/.claude/commands/trellis/integrate-skill.md +0 -219
  39. package/.claude/commands/trellis/onboard.md +0 -358
  40. package/.claude/commands/trellis/parallel.md +0 -193
  41. package/.claude/commands/trellis/record-session.md +0 -62
  42. package/.claude/commands/trellis/start.md +0 -280
  43. package/.claude/commands/trellis/update-spec.md +0 -213
  44. package/.claude/hooks/inject-subagent-context.py +0 -758
  45. package/.claude/hooks/ralph-loop.py +0 -374
  46. package/.claude/hooks/session-start.py +0 -126
  47. package/.claude/settings.json +0 -41
  48. package/.claude/skills/deepagents-guide/SKILL.md +0 -428
  49. package/.cursor/commands/trellis-before-backend-dev.md +0 -13
  50. package/.cursor/commands/trellis-before-frontend-dev.md +0 -13
  51. package/.cursor/commands/trellis-break-loop.md +0 -107
  52. package/.cursor/commands/trellis-check-backend.md +0 -13
  53. package/.cursor/commands/trellis-check-cross-layer.md +0 -153
  54. package/.cursor/commands/trellis-check-frontend.md +0 -13
  55. package/.cursor/commands/trellis-create-command.md +0 -154
  56. package/.cursor/commands/trellis-finish-work.md +0 -129
  57. package/.cursor/commands/trellis-integrate-skill.md +0 -219
  58. package/.cursor/commands/trellis-onboard.md +0 -358
  59. package/.cursor/commands/trellis-record-session.md +0 -62
  60. package/.cursor/commands/trellis-start.md +0 -156
  61. package/.cursor/commands/trellis-update-spec.md +0 -213
  62. package/.github/workflows/publish.yml +0 -63
  63. package/.husky/pre-commit +0 -1
  64. package/.mcp.json +0 -8
  65. package/.trellis/.template-hashes.json +0 -65
  66. package/.trellis/.version +0 -1
  67. package/.trellis/scripts/add-session.sh +0 -384
  68. package/.trellis/scripts/common/developer.sh +0 -129
  69. package/.trellis/scripts/common/git-context.sh +0 -263
  70. package/.trellis/scripts/common/paths.sh +0 -208
  71. package/.trellis/scripts/common/phase.sh +0 -150
  72. package/.trellis/scripts/common/registry.sh +0 -247
  73. package/.trellis/scripts/common/task-queue.sh +0 -142
  74. package/.trellis/scripts/common/task-utils.sh +0 -151
  75. package/.trellis/scripts/common/worktree.sh +0 -128
  76. package/.trellis/scripts/create-bootstrap.sh +0 -299
  77. package/.trellis/scripts/get-context.sh +0 -7
  78. package/.trellis/scripts/get-developer.sh +0 -15
  79. package/.trellis/scripts/init-developer.sh +0 -34
  80. package/.trellis/scripts/multi-agent/cleanup.sh +0 -396
  81. package/.trellis/scripts/multi-agent/create-pr.sh +0 -241
  82. package/.trellis/scripts/multi-agent/plan.sh +0 -207
  83. package/.trellis/scripts/multi-agent/start.sh +0 -310
  84. package/.trellis/scripts/multi-agent/status.sh +0 -828
  85. package/.trellis/scripts/task.sh +0 -1118
  86. package/.trellis/spec/backend/ci-cd-guidelines.md +0 -73
  87. package/.trellis/spec/backend/deepagents-guide.md +0 -380
  88. package/.trellis/spec/backend/directory-structure.md +0 -126
  89. package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +0 -11
  90. package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +0 -20
  91. package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +0 -13
  92. package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +0 -19
  93. package/.trellis/spec/backend/hook-guidelines.md +0 -218
  94. package/.trellis/spec/backend/index.md +0 -37
  95. package/.trellis/spec/backend/quality-guidelines.md +0 -302
  96. package/.trellis/spec/backend/state-management.md +0 -76
  97. package/.trellis/spec/backend/tool-guidelines.md +0 -144
  98. package/.trellis/spec/backend/type-safety.md +0 -71
  99. package/.trellis/spec/guides/code-reuse-thinking-guide.md +0 -92
  100. package/.trellis/spec/guides/cross-layer-thinking-guide.md +0 -94
  101. package/.trellis/spec/guides/index.md +0 -79
  102. package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +0 -61
  103. package/.trellis/tasks/archive/02-02-evolving-skills/task.json +0 -29
  104. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +0 -86
  105. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +0 -27
  106. package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +0 -3
  107. package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +0 -2
  108. package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +0 -5
  109. package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +0 -33
  110. package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +0 -41
  111. package/.trellis/workflow.md +0 -407
  112. package/.trellis/workspace/index.md +0 -123
  113. package/.trellis/workspace/pony/index.md +0 -42
  114. package/.trellis/workspace/pony/journal-1.md +0 -125
  115. package/.trellis/worktree.yaml +0 -47
  116. package/AGENTS.md +0 -18
  117. package/CLAUDE.md +0 -315
  118. package/agents/deepspider.md +0 -142
  119. package/docs/DEBUG.md +0 -42
  120. package/docs/GUIDE.md +0 -334
  121. package/docs/PROMPT.md +0 -60
  122. package/docs/USAGE.md +0 -226
  123. package/eslint.config.js +0 -51
  124. package/test/analyze.test.js +0 -90
  125. package/test/envdump.test.js +0 -74
  126. package/test/flow.test.js +0 -90
  127. package/test/hooks.test.js +0 -138
  128. package/test/plugin.test.js +0 -35
  129. package/test/refactor-full.test.js +0 -30
  130. package/test/refactor.test.js +0 -21
  131. package/test/samples/obfuscated.js +0 -61
  132. package/test/samples/original.js +0 -66
  133. package/test/samples/v10_eval_chain.js +0 -52
  134. package/test/samples/v11_bytecode_vm.js +0 -81
  135. package/test/samples/v12_polymorphic.js +0 -69
  136. package/test/samples/v1_ob_basic.js +0 -98
  137. package/test/samples/v2_ob_advanced.js +0 -99
  138. package/test/samples/v3_jjencode.js +0 -77
  139. package/test/samples/v4_aaencode.js +0 -73
  140. package/test/samples/v5_control_flow.js +0 -86
  141. package/test/samples/v6_string_encryption.js +0 -71
  142. package/test/samples/v7_jsvmp.js +0 -83
  143. package/test/samples/v8_anti_debug.js +0 -79
  144. package/test/samples/v9_proxy_trap.js +0 -49
  145. package/test/samples.test.js +0 -96
  146. package/test/webcrack.test.js +0 -55
package/src/agent/run.js CHANGED
@@ -1,11 +1,11 @@
1
- #!/usr/bin/env node
2
1
  /**
3
- * DeepSpider Agent 独立运行入口
2
+ * DeepSpider Agent 运行模块
4
3
  * 使用 CDP binding 接收浏览器消息
5
4
  * 支持流式输出显示思考过程
5
+ *
6
+ * 所有状态初始化延迟到 init() 中执行,避免 import 时产生副作用
6
7
  */
7
8
 
8
- import 'dotenv/config';
9
9
  import readline from 'readline';
10
10
  import { readFileSync } from 'fs';
11
11
  import { marked } from 'marked';
@@ -16,445 +16,16 @@ import { markHookInjected } from './tools/runtime.js';
16
16
  import { createLogger } from './logger.js';
17
17
  import { browserTools } from './tools/browser.js';
18
18
  import { ensureConfig } from './setup.js';
19
+ import { StreamHandler, PanelBridge } from './core/index.js';
19
20
 
20
- const args = process.argv.slice(2);
21
- const targetUrl = args.find(arg => arg.startsWith('http://') || arg.startsWith('https://'));
22
-
23
- const rl = readline.createInterface({
24
- input: process.stdin,
25
- output: process.stdout,
26
- });
27
-
21
+ let rl = null;
28
22
  let browser = null;
29
- let currentPage = null;
30
-
31
- console.log('=== DeepSpider Agent ===');
32
- console.log('智能爬虫 Agent,输入 exit 退出\n');
33
-
34
- // 调试模式
35
- const DEBUG = process.env.DEBUG === 'true' || process.argv.includes('--debug');
36
-
37
- // 重试配置
38
- const RETRY_CONFIG = {
39
- maxRetries: 3,
40
- baseDelayMs: 2000,
41
- maxDelayMs: 30000,
42
- };
43
-
44
- // 人工介入配置
45
- const INTERVENTION_CONFIG = {
46
- idleTimeoutMs: 120000, // 2分钟无响应触发提示
47
- checkIntervalMs: 30000, // 30秒检测一次
48
- // 从 browserTools 获取可能触发风控的工具名称
49
- riskTools: browserTools.map(t => t.name),
50
- };
51
-
52
- /**
53
- * 判断是否为工具参数错误(需要 LLM 修正)
54
- */
55
- function isToolSchemaError(errMsg) {
56
- return /did not match expected schema|Invalid input|tool input/i.test(errMsg);
57
- }
58
-
59
- /**
60
- * 判断是否为 API 服务错误(可直接重试)
61
- */
62
- function isApiServiceError(errMsg) {
63
- return /503|502|429|rate limit|无可用渠道|timeout|ECONNRESET|ETIMEDOUT/i.test(errMsg);
64
- }
65
-
66
- /**
67
- * 计算重试延迟(指数退避 + 抖动)
68
- */
69
- function getRetryDelay(retryCount) {
70
- const delay = Math.min(
71
- RETRY_CONFIG.baseDelayMs * Math.pow(2, retryCount),
72
- RETRY_CONFIG.maxDelayMs
73
- );
74
- // 添加 0-25% 的随机抖动
75
- const jitter = delay * Math.random() * 0.25;
76
- return Math.round(delay + jitter);
77
- }
78
-
79
- /**
80
- * 延迟函数
81
- */
82
- function sleep(ms) {
83
- return new Promise(resolve => setTimeout(resolve, ms));
84
- }
85
-
86
- // DeepSeek 特殊标记清理
87
- const DSML_PATTERN = /|DSML|/g;
88
- function cleanDSML(text) {
89
- return text ? text.replace(DSML_PATTERN, '') : text;
90
- }
91
-
92
- // 创建日志回调
93
- const logger = createLogger({ enabled: DEBUG, verbose: false });
94
-
95
- /**
96
- * 报告就绪回调 - 由中间件在 afterAgent 时调用
97
- */
98
- async function onReportReady(mdFilePath) {
99
- console.log('[report] 中间件触发报告显示:', mdFilePath);
100
- await showReportFromFile(mdFilePath);
101
- }
102
-
103
- // 创建 Agent,传入报告回调
104
- const agent = createDeepSpiderAgent({ onReportReady });
105
-
106
- const config = {
107
- configurable: { thread_id: `deepspider-${Date.now()}` },
108
- recursionLimit: 5000,
109
- callbacks: logger ? [logger] : [],
110
- };
111
-
112
- // 文本累积缓冲区 - 用于累积 LLM 流式输出
113
- let panelTextBuffer = '';
114
- let hasStartedAssistantMsg = false;
115
-
116
- function debug(...args) {
117
- if (DEBUG) {
118
- console.log('[DEBUG]', ...args);
119
- }
120
- }
121
-
122
- /**
123
- * 发送消息到前端面板
124
- */
125
- async function sendToPanel(role, content) {
126
- if (!content?.trim()) return;
127
-
128
- const page = browser?.getPage?.();
129
- if (!page) return;
130
-
131
- try {
132
- const escaped = JSON.stringify(content.trim());
133
- const code = `window.__deepspider__?.addMessage?.('${role}', ${escaped})`;
134
- await evaluateInPage(code);
135
- } catch (e) {
136
- // ignore
137
- }
138
- }
139
-
140
- /**
141
- * 累积文本到缓冲区(用于 LLM 流式输出)
142
- */
143
- async function appendToPanel(text) {
144
- if (!text) return;
145
- panelTextBuffer += text;
146
-
147
- // 每累积一定量或遇到换行时刷新
148
- if (panelTextBuffer.length > 200 || text.includes('\n')) {
149
- await flushPanelText();
150
- }
151
- }
152
-
153
- /**
154
- * 通过 CDP 在页面主世界执行 JavaScript(复用 session)
155
- */
156
- async function evaluateInPage(code) {
157
- const cdp = await browser?.getCDPSession?.();
158
- if (!cdp) return null;
159
-
160
- try {
161
- const result = await cdp.send('Runtime.evaluate', {
162
- expression: code,
163
- returnByValue: true,
164
- });
165
- return result.result?.value;
166
- } catch (e) {
167
- debug('evaluateInPage 失败:', e.message);
168
- return null;
169
- }
170
- }
171
-
172
- /**
173
- * 刷新累积的文本到面板
174
- */
175
- async function flushPanelText() {
176
- if (!panelTextBuffer.trim()) return;
177
-
178
- const page = browser?.getPage?.();
179
- if (!page) {
180
- panelTextBuffer = '';
181
- return;
182
- }
183
-
184
- try {
185
- const content = panelTextBuffer.trim();
186
- const escaped = JSON.stringify(content);
187
-
188
- if (!hasStartedAssistantMsg) {
189
- const code = `(function() {
190
- const fn = window.__deepspider__?.addMessage;
191
- if (typeof fn === 'function') {
192
- fn('assistant', ${escaped});
193
- return { ok: true };
194
- }
195
- return { ok: false };
196
- })()`;
197
- await evaluateInPage(code);
198
- hasStartedAssistantMsg = true;
199
- } else {
200
- const code = `(function() {
201
- const fn = window.__deepspider__?.appendToLastMessage;
202
- if (typeof fn === 'function') {
203
- fn('assistant', ${escaped});
204
- return { ok: true };
205
- }
206
- return { ok: false };
207
- })()`;
208
- await evaluateInPage(code);
209
- }
210
- } catch (e) {
211
- // ignore
212
- }
213
-
214
- panelTextBuffer = '';
215
- }
216
-
217
- /**
218
- * 流式对话 - 显示思考过程(带重试)
219
- */
220
- async function chatStream(input, page = null, retryCount = 0) {
221
- currentPage = page;
222
- let finalResponse = '';
223
- let lastEventTime = Date.now();
224
- let eventCount = 0;
225
- let lastToolCall = null;
226
-
227
- // 重置面板状态
228
- panelTextBuffer = '';
229
- hasStartedAssistantMsg = false;
230
-
231
- // 设置忙碌状态
232
- await evaluateInPage('window.__deepspider__?.setBusy?.(true)');
233
-
234
- debug(`chatStream: 开始处理, 输入长度=${input.length}, page=${!!page}`);
235
-
236
- // 心跳检测 - 每30秒输出状态
237
- let interventionNotified = false;
238
- const heartbeat = setInterval(() => {
239
- const elapsed = Math.round((Date.now() - lastEventTime) / 1000);
240
- if (elapsed > 30) {
241
- console.log(`\n[心跳] 已等待 ${elapsed}s, 事件数=${eventCount}, 最后工具=${lastToolCall || '无'}`);
242
- }
243
-
244
- // 超时提示 - 只在风险工具调用后提示
245
- const isRiskTool = lastToolCall && INTERVENTION_CONFIG.riskTools.includes(lastToolCall);
246
- if (elapsed * 1000 > INTERVENTION_CONFIG.idleTimeoutMs && !interventionNotified && isRiskTool) {
247
- interventionNotified = true;
248
- const msg = '⚠️ 页面操作后长时间无响应,可能遇到验证码或风控,请检查浏览器';
249
- console.log('\n[提示] ' + msg);
250
- sendToPanel('system', msg).catch(() => {});
251
- }
252
- }, INTERVENTION_CONFIG.checkIntervalMs);
253
-
254
- try {
255
- debug('chatStream: 创建事件流');
256
- const eventStream = await agent.streamEvents(
257
- { messages: [{ role: 'user', content: input }] },
258
- { ...config, version: 'v2' }
259
- );
260
-
261
- debug('chatStream: 开始遍历事件');
262
- for await (const event of eventStream) {
263
- lastEventTime = Date.now();
264
- eventCount++;
265
-
266
- // 记录工具调用
267
- if (event.event === 'on_tool_start') {
268
- lastToolCall = event.name;
269
- }
270
-
271
- await handleStreamEvent(event);
272
-
273
- // 收集最终响应
274
- if (event.event === 'on_chat_model_end' && event.name === 'ChatOpenAI') {
275
- const output = event.data?.output;
276
- if (output?.content) {
277
- finalResponse = output.content;
278
- debug(`chatStream: 收到最终响应, 长度=${finalResponse.length}`);
279
- }
280
- }
281
- }
282
-
283
- // 流正常结束
284
- clearInterval(heartbeat);
285
- console.log(`\n[完成] 共处理 ${eventCount} 个事件`);
286
-
287
- // 刷新剩余的累积内容到面板
288
- debug('chatStream: 刷新剩余内容');
289
- await flushPanelText();
290
-
291
- // 流式输出完成,触发 Markdown 渲染
292
- await evaluateInPage('window.__deepspider__?.finalizeMessage?.("assistant")');
293
-
294
- // 清除忙碌状态
295
- await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
296
-
297
- debug(`chatStream: 完成, 响应长度=${finalResponse.length}`);
298
- return finalResponse || '[无响应]';
299
- } catch (error) {
300
- clearInterval(heartbeat);
301
- const errMsg = error.message || String(error);
302
-
303
- // 清除忙碌状态
304
- await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
305
-
306
- console.error(`\n[异常] 事件数=${eventCount}, 最后工具=${lastToolCall || '无'}, 错误: ${errMsg}`);
307
-
308
- // 检查是否可重试
309
- if (retryCount < RETRY_CONFIG.maxRetries) {
310
- // API 服务错误 - 从检查点恢复
311
- if (isApiServiceError(errMsg)) {
312
- const delay = getRetryDelay(retryCount);
313
- console.log(`\n[重试 ${retryCount + 1}/${RETRY_CONFIG.maxRetries}] API错误,${delay}ms 后从检查点恢复...`);
314
- await sendToPanel('system', `服务暂时不可用,${Math.round(delay/1000)}s 后重试 (${retryCount + 1}/${RETRY_CONFIG.maxRetries})`);
315
- await sleep(delay);
316
- // 从检查点恢复:不传入新消息,使用相同 thread_id
317
- return chatStreamResume(page, retryCount + 1);
318
- }
319
-
320
- // 工具参数错误 - 发送错误信息让 LLM 修正
321
- if (isToolSchemaError(errMsg)) {
322
- console.log(`\n[重试 ${retryCount + 1}/${RETRY_CONFIG.maxRetries}] 工具参数错误,发送修正请求...`);
323
- await sendToPanel('system', `工具调用失败,正在修正 (${retryCount + 1}/${RETRY_CONFIG.maxRetries})`);
324
- const resumeInput = `工具调用失败: ${errMsg}\n请检查参数格式并重试。`;
325
- return chatStream(resumeInput, page, retryCount + 1);
326
- }
327
- }
328
-
329
- return `错误: ${errMsg}`;
330
- }
331
- }
332
-
333
- /**
334
- * 从检查点恢复流式对话
335
- * 不传入新消息,使用相同 thread_id 从上次中断处继续
336
- */
337
- async function chatStreamResume(page = null, retryCount = 0) {
338
- currentPage = page;
339
- let finalResponse = '';
340
- let lastEventTime = Date.now();
341
- let eventCount = 0;
342
-
343
- await evaluateInPage('window.__deepspider__?.setBusy?.(true)');
344
- debug(`chatStreamResume: 从检查点恢复, retryCount=${retryCount}`);
345
-
346
- const heartbeat = setInterval(() => {
347
- const elapsed = Math.round((Date.now() - lastEventTime) / 1000);
348
- if (elapsed > 30) {
349
- console.log(`\n[心跳] 恢复中,已等待 ${elapsed}s`);
350
- }
351
- }, 30000);
352
-
353
- try {
354
- // 从检查点恢复:传入 null 或空消息
355
- const eventStream = await agent.streamEvents(
356
- { messages: [] },
357
- { ...config, version: 'v2' }
358
- );
359
-
360
- for await (const event of eventStream) {
361
- lastEventTime = Date.now();
362
- eventCount++;
363
- await handleStreamEvent(event);
364
-
365
- if (event.event === 'on_chat_model_end' && event.name === 'ChatOpenAI') {
366
- const output = event.data?.output;
367
- if (output?.content) {
368
- finalResponse = output.content;
369
- }
370
- }
371
- }
372
-
373
- clearInterval(heartbeat);
374
- await flushPanelText();
375
- await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
376
- console.log(`\n[恢复完成] 共处理 ${eventCount} 个事件`);
377
- return finalResponse || '[无响应]';
378
- } catch (error) {
379
- clearInterval(heartbeat);
380
- await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
381
- const errMsg = error.message || String(error);
382
- console.error(`\n[恢复失败] ${errMsg}`);
383
-
384
- // 恢复失败也可以重试
385
- if (isApiServiceError(errMsg) && retryCount < RETRY_CONFIG.maxRetries) {
386
- const delay = getRetryDelay(retryCount);
387
- console.log(`\n[重试 ${retryCount + 1}/${RETRY_CONFIG.maxRetries}] ${delay}ms 后再次恢复...`);
388
- await sleep(delay);
389
- return chatStreamResume(page, retryCount + 1);
390
- }
391
-
392
- return `恢复失败: ${errMsg}`;
393
- }
394
- }
395
-
396
- /**
397
- * 处理流式事件
398
- */
399
- async function handleStreamEvent(event) {
400
- const { event: eventType, name, data } = event;
401
-
402
- // 过滤内部事件
403
- if (name?.startsWith('ChannelWrite') ||
404
- name?.startsWith('Branch') ||
405
- name?.includes('Middleware') ||
406
- name === 'RunnableSequence' ||
407
- name === 'model_request' ||
408
- name === 'tools') {
409
- return;
410
- }
411
-
412
- debug(`handleStreamEvent: ${eventType}, name=${name}`);
413
-
414
- switch (eventType) {
415
- case 'on_chat_model_stream':
416
- // LLM 输出流 - 清理 DeepSeek 特殊标记
417
- let chunk = data?.chunk?.content;
418
- if (chunk && typeof chunk === 'string') {
419
- chunk = cleanDSML(chunk);
420
- process.stdout.write(chunk);
421
- await appendToPanel(chunk); // 累积发送到面板
422
- }
423
- break;
424
-
425
- case 'on_tool_start':
426
- // 工具调用开始
427
- debug('handleStreamEvent: 工具开始,先刷新缓冲区');
428
- await flushPanelText();
429
- // 重置标志,让工具调用后的 AI 输出创建新消息
430
- hasStartedAssistantMsg = false;
431
- const input = data?.input || {};
432
- const inputStr = typeof input === 'string' ? input : JSON.stringify(input);
433
- const preview = inputStr.length > 100 ? inputStr.slice(0, 100) + '...' : inputStr;
434
- console.log(`\n[调用] ${name}(${preview})`);
435
- await sendToPanel('system', `[调用] ${name}`);
436
- break;
437
-
438
- case 'on_tool_end':
439
- // 工具调用结束
440
- const output = data?.output;
441
- let result = '';
442
-
443
- // 调试:打印完整的事件结构
444
- debug(`on_tool_end: name=${name}, output type=${typeof output}, keys=${output ? Object.keys(output) : 'null'}`);
445
-
446
- if (typeof output === 'string') {
447
- result = output.slice(0, 80);
448
- } else if (output?.content) {
449
- result = String(output.content).slice(0, 80);
450
- }
451
- if (result) {
452
- console.log(`[结果] ${result}${result.length >= 80 ? '...' : ''}`);
453
- await sendToPanel('system', `[结果] ${result.slice(0, 50)}${result.length > 50 ? '...' : ''}`);
454
- }
455
- break;
456
- }
457
- }
23
+ let streamHandler = null;
24
+ let targetUrl = null;
25
+ let DEBUG = false;
26
+ let debugFn = () => {};
27
+ let agent = null;
28
+ let agentConfig = null;
458
29
 
459
30
  /**
460
31
  * 从文件显示报告(由中间件回调触发)
@@ -470,11 +41,15 @@ async function showReportFromFile(mdFilePath) {
470
41
  const content = readFileSync(mdFilePath, 'utf-8');
471
42
  console.log('[report] 读取 MD 文件成功, 长度:', content.length);
472
43
 
473
- // 使用 marked 转换为 HTML
474
44
  const htmlContent = marked.parse(content);
475
45
  const escaped = JSON.stringify(htmlContent);
476
- const code = `window.__deepspider__?.showReport?.(${escaped}, true)`;
477
- await evaluateInPage(code);
46
+ const cdp = await browser?.getCDPSession?.();
47
+ if (cdp) {
48
+ await cdp.send('Runtime.evaluate', {
49
+ expression: `window.__deepspider__?.showReport?.(${escaped}, true)`,
50
+ returnByValue: true,
51
+ });
52
+ }
478
53
  console.log('[report] 已显示分析报告');
479
54
  } catch (e) {
480
55
  console.log('[report] showReportFromFile 失败:', e.message);
@@ -485,14 +60,12 @@ async function showReportFromFile(mdFilePath) {
485
60
  * 处理浏览器消息(通过 CDP binding 接收)
486
61
  */
487
62
  async function handleBrowserMessage(data, page) {
488
- debug(`handleBrowserMessage: 收到消息, type=${data.type}, page=${!!page}`);
63
+ debugFn(`handleBrowserMessage: 收到消息, type=${data.type}, page=${!!page}`);
489
64
 
490
- // 添加浏览器已就绪前缀,告诉 Agent 不需要再启动浏览器
491
65
  const browserReadyPrefix = '[浏览器已就绪] ';
492
66
 
493
67
  let userPrompt;
494
68
  if (data.type === 'analysis') {
495
- // 处理多元素选择
496
69
  const elements = data.elements || [{ text: data.text, xpath: data.xpath, iframeSrc: data.iframeSrc }];
497
70
  const elementsDesc = elements.map((el, i) =>
498
71
  `${i + 1}. "${el.text?.slice(0, 100) || ''}"\n XPath: ${el.xpath}${el.iframeSrc ? `\n iframe: ${el.iframeSrc}` : ''}`
@@ -506,7 +79,6 @@ ${elementsDesc}${supplementText}
506
79
 
507
80
  ${fullAnalysisPrompt}`;
508
81
  } else if (data.type === 'generate-config') {
509
- // 生成爬虫配置 - 使用 crawler 子代理
510
82
  const config = data.config;
511
83
  userPrompt = `${browserReadyPrefix}请使用 crawler 子代理生成爬虫。
512
84
 
@@ -517,7 +89,6 @@ ${JSON.stringify(config.fields, null, 2)}
517
89
 
518
90
  请先用 query_store 查询已有的加密代码,然后整合生成配置和脚本。`;
519
91
  } else if (data.type === 'chat') {
520
- // 普通对话,可能带有已选元素作为上下文
521
92
  if (data.elements && data.elements.length > 0) {
522
93
  const elementsDesc = data.elements.map((el, i) =>
523
94
  `${i + 1}. "${el.text?.slice(0, 100) || ''}"\n XPath: ${el.xpath}`
@@ -530,10 +101,8 @@ ${elementsDesc}`;
530
101
  userPrompt = `${browserReadyPrefix}${data.text}`;
531
102
  }
532
103
  } else if (data.type === 'open-file') {
533
- // 打开文件 - 使用系统默认程序
534
104
  let filePath = data.path;
535
105
  if (filePath && typeof filePath === 'string') {
536
- // 展开 ~ 为 home 目录
537
106
  if (filePath.startsWith('~/')) {
538
107
  filePath = filePath.replace('~', process.env.HOME || process.env.USERPROFILE);
539
108
  }
@@ -553,9 +122,8 @@ ${elementsDesc}`;
553
122
  }
554
123
 
555
124
  console.log('\n[浏览器] ' + (data.type === 'analysis' ? '分析请求' : data.type === 'generate-config' ? '生成配置' : '对话'));
556
- await chatStream(userPrompt, page);
125
+ await streamHandler.chatStream(userPrompt);
557
126
  console.log('\n');
558
- // 流式输出已经同步到面板,无需再次发送
559
127
  process.stdout.write('> ');
560
128
  }
561
129
 
@@ -572,16 +140,21 @@ function prompt() {
572
140
  return;
573
141
  }
574
142
 
575
- await chatStream(input, browser?.getPage?.());
143
+ await streamHandler.chatStream(input);
576
144
  console.log('\n');
577
145
  prompt();
578
146
  });
579
147
  }
580
148
 
581
149
  async function init() {
582
- debug('init: 启动');
150
+ // 解析参数(在 init 时才读取,避免与 CLI 路由层的 argv 冲突)
151
+ const args = process.argv.slice(2);
152
+ targetUrl = args.find(arg => arg.startsWith('http://') || arg.startsWith('https://'));
153
+ DEBUG = process.env.DEBUG === 'true' || args.includes('--debug');
154
+ debugFn = (...a) => { if (DEBUG) console.log('[DEBUG]', ...a); };
155
+
156
+ debugFn('init: 启动');
583
157
 
584
- // 首次运行检测:确保环境变量已配置
585
158
  if (!ensureConfig()) {
586
159
  process.exit(1);
587
160
  }
@@ -590,24 +163,58 @@ async function init() {
590
163
  console.log('[DEBUG] 调试模式已启用');
591
164
  }
592
165
 
166
+ // 创建 readline、logger、agent(全部延迟到 init)
167
+ rl = readline.createInterface({
168
+ input: process.stdin,
169
+ output: process.stdout,
170
+ });
171
+
172
+ const logger = createLogger({ enabled: DEBUG, verbose: false });
173
+
174
+ async function onReportReady(mdFilePath) {
175
+ console.log('[report] 中间件触发报告显示:', mdFilePath);
176
+ await showReportFromFile(mdFilePath);
177
+ }
178
+
179
+ agent = createDeepSpiderAgent({ onReportReady });
180
+
181
+ agentConfig = {
182
+ configurable: { thread_id: `deepspider-${Date.now()}` },
183
+ recursionLimit: 5000,
184
+ callbacks: logger ? [logger] : [],
185
+ };
186
+
187
+ // 初始化流处理器
188
+ const panelBridge = new PanelBridge(() => browser, debugFn);
189
+ streamHandler = new StreamHandler({
190
+ agent,
191
+ config: agentConfig,
192
+ panelBridge,
193
+ riskTools: browserTools.map(t => t.name),
194
+ debug: debugFn,
195
+ });
196
+
197
+ console.log('=== DeepSpider Agent ===');
198
+ console.log('智能爬虫 Agent,输入 exit 退出\n');
199
+
593
200
  if (targetUrl) {
594
201
  console.log(`正在打开: ${targetUrl}\n`);
595
202
  try {
596
- debug('init: 获取浏览器实例');
203
+ debugFn('init: 获取浏览器实例');
597
204
  browser = await getBrowser();
598
205
  browser.onMessage = handleBrowserMessage;
599
- debug('init: 导航到目标URL');
206
+ debugFn('init: 导航到目标URL');
600
207
  await browser.navigate(targetUrl);
601
208
  markHookInjected();
602
- debug('init: 浏览器就绪');
209
+ debugFn('init: 浏览器就绪');
603
210
  console.log('浏览器已就绪,数据自动记录中');
604
211
  console.log('点击面板选择按钮(⦿)选择数据进行分析\n');
605
212
  } catch (error) {
606
213
  console.error('启动浏览器失败:', error.message);
607
- debug('init: 浏览器启动失败 -', error.stack);
214
+ debugFn('init: 浏览器启动失败 -', error.stack);
608
215
  }
609
216
  }
610
217
  prompt();
611
218
  }
612
219
 
613
- init();
220
+ export { init };
@@ -1,17 +1,15 @@
1
1
  /**
2
2
  * DeepSpider 配置检测
3
- * 简单检测 + 清晰提示,不做交互式向导
3
+ * 环境变量 > 配置文件 > 默认值
4
4
  */
5
5
 
6
+ import { getConfigValues } from '../config/settings.js';
7
+
6
8
  /**
7
- * 检查环境变量是否已配置
9
+ * 检查配置(合并环境变量和配置文件)
8
10
  */
9
11
  export function checkEnvConfig() {
10
- return {
11
- apiKey: process.env.DEEPSPIDER_API_KEY,
12
- baseUrl: process.env.DEEPSPIDER_BASE_URL,
13
- model: process.env.DEEPSPIDER_MODEL,
14
- };
12
+ return getConfigValues();
15
13
  }
16
14
 
17
15
  /**
@@ -22,9 +20,9 @@ export function ensureConfig() {
22
20
  const { apiKey, baseUrl, model } = checkEnvConfig();
23
21
  const missing = [];
24
22
 
25
- if (!apiKey) missing.push('DEEPSPIDER_API_KEY');
26
- if (!baseUrl) missing.push('DEEPSPIDER_BASE_URL');
27
- if (!model) missing.push('DEEPSPIDER_MODEL');
23
+ if (!apiKey) missing.push('apiKey (DEEPSPIDER_API_KEY)');
24
+ if (!baseUrl) missing.push('baseUrl (DEEPSPIDER_BASE_URL)');
25
+ if (!model) missing.push('model (DEEPSPIDER_MODEL)');
28
26
 
29
27
  if (missing.length === 0) {
30
28
  return true;
@@ -35,14 +33,16 @@ export function ensureConfig() {
35
33
 
36
34
  配置方式(任选其一):
37
35
 
38
- 1. 配置环境变量(推荐):
36
+ 1. 使用 deepspider config 命令:
37
+ deepspider config set apiKey sk-xxx
38
+ deepspider config set baseUrl https://api.openai.com/v1
39
+ deepspider config set model gpt-4o
40
+
41
+ 2. 配置环境变量:
39
42
  export DEEPSPIDER_API_KEY=sk-xxx
40
43
  export DEEPSPIDER_BASE_URL=https://api.openai.com/v1
41
44
  export DEEPSPIDER_MODEL=gpt-4o
42
45
 
43
- 2. 一行命令:
44
- DEEPSPIDER_API_KEY=sk-xxx DEEPSPIDER_BASE_URL=https://api.openai.com/v1 DEEPSPIDER_MODEL=gpt-4o deepspider <url>
45
-
46
46
  请根据提示补全配置后重试。
47
47
  `);
48
48