deepspider 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +17 -21
  2. package/package.json +4 -2
  3. package/src/agent/core/PanelBridge.js +34 -8
  4. package/src/agent/core/StreamHandler.js +142 -26
  5. package/src/agent/index.js +72 -14
  6. package/src/agent/middleware/memoryFlush.js +48 -0
  7. package/src/agent/middleware/report.js +77 -45
  8. package/src/agent/middleware/subagent.js +4 -1
  9. package/src/agent/middleware/toolAvailability.js +37 -0
  10. package/src/agent/middleware/toolGuard.js +141 -31
  11. package/src/agent/prompts/system.js +144 -1
  12. package/src/agent/run.js +127 -14
  13. package/src/agent/sessions.js +88 -0
  14. package/src/agent/skills/anti-detect/SKILL.md +89 -14
  15. package/src/agent/skills/captcha/SKILL.md +93 -19
  16. package/src/agent/skills/crawler/SKILL.md +86 -0
  17. package/src/agent/skills/crawler/evolved.md +14 -13
  18. package/src/agent/skills/general/evolved.md +12 -1
  19. package/src/agent/skills/js2python/SKILL.md +40 -0
  20. package/src/agent/skills/js2python/evolved.md +13 -1
  21. package/src/agent/skills/sandbox/SKILL.md +33 -0
  22. package/src/agent/skills/sandbox/evolved.md +12 -5
  23. package/src/agent/skills/static-analysis/SKILL.md +39 -0
  24. package/src/agent/skills/static-analysis/evolved.md +88 -2
  25. package/src/agent/subagents/anti-detect.js +27 -5
  26. package/src/agent/subagents/captcha.js +28 -9
  27. package/src/agent/subagents/crawler.js +26 -79
  28. package/src/agent/subagents/factory.js +24 -4
  29. package/src/agent/subagents/js2python.js +18 -16
  30. package/src/agent/tools/analysis.js +17 -7
  31. package/src/agent/tools/browser.js +26 -13
  32. package/src/agent/tools/crawler.js +1 -1
  33. package/src/agent/tools/crawlerGenerator.js +2 -2
  34. package/src/agent/tools/evolve.js +47 -8
  35. package/src/agent/tools/index.js +7 -3
  36. package/src/agent/tools/patch.js +1 -1
  37. package/src/agent/tools/store.js +1 -1
  38. package/src/browser/client.js +5 -1
  39. package/src/browser/ui/analysisPanel.js +72 -0
@@ -7,16 +7,26 @@ import { tool } from '@langchain/core/tools';
7
7
  import { getBrowserClient } from '../../browser/index.js';
8
8
 
9
9
  /**
10
- * 通过 CDP 执行 JS(复用 session
10
+ * 通过 CDP 执行 JS(复用 session,带超时保护)
11
11
  */
12
- async function evaluateViaCDP(client, expression) {
12
+ async function evaluateViaCDP(client, expression, timeout = 5000) {
13
13
  const cdp = await client.getCDPSession();
14
14
  if (!cdp) return null;
15
- const result = await cdp.send('Runtime.evaluate', {
16
- expression,
17
- returnByValue: true,
18
- });
19
- return result.result?.value;
15
+ try {
16
+ const result = await Promise.race([
17
+ cdp.send('Runtime.evaluate', {
18
+ expression,
19
+ returnByValue: true,
20
+ }),
21
+ new Promise((_, reject) =>
22
+ setTimeout(() => reject(new Error('CDP evaluate timeout')), timeout)
23
+ ),
24
+ ]);
25
+ return result.result?.value;
26
+ } catch (e) {
27
+ console.error('[analysis:evaluateViaCDP] 超时或错误:', e.message);
28
+ return null;
29
+ }
20
30
  }
21
31
 
22
32
  /**
@@ -9,16 +9,29 @@ import { getBrowser } from '../../browser/index.js';
9
9
  import { getScreenshotPath } from './utils.js';
10
10
 
11
11
  /**
12
- * 通过 CDP 执行 JS
12
+ * 安全获取 CDP session,断开时返回友好错误而非 TypeError
13
13
  */
14
- async function cdpEvaluate(browser, expression, returnByValue = true) {
14
+ async function safeCDP(browser) {
15
15
  const cdp = await browser.getCDPSession();
16
- if (!cdp) throw new Error('CDP session not available');
17
- const result = await cdp.send('Runtime.evaluate', {
18
- expression,
19
- returnByValue,
20
- awaitPromise: true,
21
- });
16
+ if (!cdp) throw new Error('CDP session 不可用,浏览器可能已关闭或断开连接');
17
+ return cdp;
18
+ }
19
+
20
+ /**
21
+ * 通过 CDP 执行 JS(带超时保护)
22
+ */
23
+ async function cdpEvaluate(browser, expression, returnByValue = true, timeout = 5000) {
24
+ const cdp = await safeCDP(browser);
25
+ const result = await Promise.race([
26
+ cdp.send('Runtime.evaluate', {
27
+ expression,
28
+ returnByValue,
29
+ awaitPromise: true,
30
+ }),
31
+ new Promise((_, reject) =>
32
+ setTimeout(() => reject(new Error('cdpEvaluate timeout (page loading/paused?)')), timeout)
33
+ ),
34
+ ]);
22
35
  if (result.exceptionDetails) {
23
36
  throw new Error(result.exceptionDetails.text || 'CDP evaluate error');
24
37
  }
@@ -112,7 +125,7 @@ export const waitForSelector = tool(
112
125
  export const reloadPage = tool(
113
126
  async () => {
114
127
  const browser = await getBrowser();
115
- const cdp = await browser.getCDPSession();
128
+ const cdp = await safeCDP(browser);
116
129
  await cdp.send('Page.reload');
117
130
  const url = await cdpEvaluate(browser, 'location.href');
118
131
  return JSON.stringify({ success: true, url });
@@ -130,7 +143,7 @@ export const reloadPage = tool(
130
143
  export const goBack = tool(
131
144
  async () => {
132
145
  const browser = await getBrowser();
133
- const cdp = await browser.getCDPSession();
146
+ const cdp = await safeCDP(browser);
134
147
  const history = await cdp.send('Page.getNavigationHistory');
135
148
  if (history.currentIndex > 0) {
136
149
  const entry = history.entries[history.currentIndex - 1];
@@ -152,7 +165,7 @@ export const goBack = tool(
152
165
  export const goForward = tool(
153
166
  async () => {
154
167
  const browser = await getBrowser();
155
- const cdp = await browser.getCDPSession();
168
+ const cdp = await safeCDP(browser);
156
169
  const history = await cdp.send('Page.getNavigationHistory');
157
170
  if (history.currentIndex < history.entries.length - 1) {
158
171
  const entry = history.entries[history.currentIndex + 1];
@@ -174,7 +187,7 @@ export const goForward = tool(
174
187
  export const scrollPage = tool(
175
188
  async ({ direction, distance }) => {
176
189
  const browser = await getBrowser();
177
- const cdp = await browser.getCDPSession();
190
+ const cdp = await safeCDP(browser);
178
191
  const deltaY = direction === 'up' ? -distance : distance;
179
192
  await cdp.send('Input.dispatchMouseEvent', {
180
193
  type: 'mouseWheel', x: 100, y: 100, deltaX: 0, deltaY
@@ -350,7 +363,7 @@ export const getElementHtml = tool(
350
363
  export const getCookies = tool(
351
364
  async ({ domain, format }) => {
352
365
  const browser = await getBrowser();
353
- const cdp = await browser.getCDPSession();
366
+ const cdp = await safeCDP(browser);
354
367
 
355
368
  // 获取当前页面 URL 用于过滤
356
369
  const currentUrl = await cdpEvaluate(browser, 'location.href');
@@ -108,7 +108,7 @@ export const e2eTest = tool(
108
108
  description: '端到端测试爬虫脚本',
109
109
  schema: z.object({
110
110
  script_path: z.string().describe('脚本路径'),
111
- test_params: z.record(z.string(), z.unknown()).optional().describe('测试参数'),
111
+ test_params: z.object({}).passthrough().optional().describe('测试参数'),
112
112
  }),
113
113
  }
114
114
  );
@@ -73,8 +73,8 @@ export const delegateCrawlerGeneration = tool(
73
73
  xpath: z.string(),
74
74
  type: z.string(),
75
75
  })),
76
- entry: z.any().nullable(),
77
- pagination: z.any().nullable(),
76
+ entry: z.string().optional().describe('入口 URL 或选择器'),
77
+ pagination: z.string().optional().describe('分页选择器或 URL 模式'),
78
78
  })),
79
79
  }).describe('爬虫配置'),
80
80
  domain: z.string().describe('目标网站域名'),
@@ -79,9 +79,10 @@ last_merged: null
79
79
 
80
80
  /**
81
81
  * evolve_skill 工具
82
+ * 使用结构化格式记录经验,符合 evolved.md 模板规范
82
83
  */
83
84
  export const evolveSkill = tool(
84
- async ({ skill, title, scenario, insight, isCore }) => {
85
+ async ({ skill, title, scenario, conclusion, technicalDetails, correctExample, incorrectExample, why, extensions, isCore }) => {
85
86
  const skillInfo = getSkillPath(skill);
86
87
  if (!skillInfo) {
87
88
  return JSON.stringify({
@@ -109,11 +110,44 @@ export const evolveSkill = tool(
109
110
 
110
111
  const data = parseEvolvedMd(content);
111
112
 
112
- // 生成新条目
113
+ // 生成新条目(结构化格式)
113
114
  const date = new Date().toISOString().split('T')[0];
114
- const entry = `### [${date}] ${title}
115
- **场景**: ${scenario}
116
- **经验**: ${insight}`;
115
+ let entry = `### [${date}] ${title}
116
+
117
+ **一句话结论**: ${conclusion}
118
+
119
+ **场景**: ${scenario}`;
120
+
121
+ // 技术细节(表格形式)
122
+ if (technicalDetails && Object.keys(technicalDetails).length > 0) {
123
+ entry += '\n\n**技术细节**:\n| 项目 | 值/说明 |\n|------|---------|';
124
+ for (const [key, value] of Object.entries(technicalDetails)) {
125
+ entry += `\n| ${key} | ${value} |`;
126
+ }
127
+ }
128
+
129
+ // 正确做法
130
+ if (correctExample) {
131
+ entry += `\n\n**正确做法**:\n\`\`\`python\n${correctExample}\n\`\`\``;
132
+ }
133
+
134
+ // 错误陷阱
135
+ if (incorrectExample) {
136
+ entry += `\n\n**错误陷阱** ⚠️:\n\`\`\`python\n${incorrectExample}\n\`\`\``;
137
+ }
138
+
139
+ // 原因解释
140
+ if (why) {
141
+ entry += `\n\n**为什么**: ${why}`;
142
+ }
143
+
144
+ // 举一反三
145
+ if (extensions && extensions.length > 0) {
146
+ entry += '\n\n**举一反三**:';
147
+ for (const item of extensions) {
148
+ entry += `\n- ${item}`;
149
+ }
150
+ }
117
151
 
118
152
  if (isCore) {
119
153
  // 追加到核心经验
@@ -153,13 +187,18 @@ export const evolveSkill = tool(
153
187
  },
154
188
  {
155
189
  name: 'evolve_skill',
156
- description: '记录分析过程中学到的经验。支持现有 skill 或 new:<name> 创建新 skill',
190
+ description: '记录分析过程中学到的经验。使用结构化格式(一句话结论、技术细节、正确/错误示例、陷阱标记)',
157
191
  schema: z.object({
158
192
  skill: z.string().describe('目标 skill: static-analysis, dynamic-analysis, sandbox, env, js2python, crawler, captcha, anti-detect, report, general,或 new:<name> 创建新 skill'),
159
193
  title: z.string().describe('经验标题,简短描述'),
160
194
  scenario: z.string().describe('具体场景/案例'),
161
- insight: z.string().describe('一句话总结经验'),
162
- isCore: z.boolean().default(false).describe('是否为核心经验'),
195
+ conclusion: z.string().describe('一句话核心结论(merge时必须提取到SKILL.md最前面)'),
196
+ technicalDetails: z.record(z.string(), z.string()).optional().describe('技术细节表格,如 {"参数类型": "int", "默认值": "0", "取值范围": "0或1"}'),
197
+ correctExample: z.string().optional().describe('正确代码示例'),
198
+ incorrectExample: z.string().optional().describe('错误代码示例(带陷阱标记)'),
199
+ why: z.string().optional().describe('解释根本原因'),
200
+ extensions: z.array(z.string()).optional().describe('类似场景列表(举一反三)'),
201
+ isCore: z.boolean().default(false).describe('是否为核心经验(已验证的高价值经验)'),
163
202
  }),
164
203
  }
165
204
  );
@@ -38,10 +38,9 @@ export { antiDetectTools } from './anti-detect.js';
38
38
  export { crawlerTools } from './crawler.js';
39
39
  export { crawlerGeneratorTools, generateCrawlerWithConfirm, delegateCrawlerGeneration } from './crawlerGenerator.js';
40
40
  export { nodejsTools, runNodeCode } from './nodejs.js';
41
+ export { pythonTools, executePythonCode } from './python.js';
41
42
  export { hookManagerTools, listHooks, enableHook, disableHook, injectHook, setHookConfig } from './hookManager.js';
42
43
  export { scratchpadTools, saveMemo, loadMemo, listMemo } from './scratchpad.js';
43
- // pythonTools 只在 js2python 子代理中使用,不导出到主工具集
44
-
45
44
  // 所有工具
46
45
  import { sandboxTools } from './sandbox.js';
47
46
  import { analyzerTools } from './analyzer.js';
@@ -54,7 +53,7 @@ import { profileTools } from './profile.js';
54
53
  import { runtimeTools } from './runtime.js';
55
54
  import { debugTools } from './debug.js';
56
55
  import { captureTools } from './capture.js';
57
- import { browserTools } from './browser.js';
56
+ import { browserTools, clickElement, scrollPage, fillInput, getInteractiveElements, getPageInfo, hoverElement, pressKey } from './browser.js';
58
57
  import { reportTools } from './report.js';
59
58
  import { webcrackTools } from './webcrack.js';
60
59
  import { preprocessTools } from './preprocess.js';
@@ -76,6 +75,7 @@ import { antiDetectTools } from './anti-detect.js';
76
75
  import { crawlerTools } from './crawler.js';
77
76
  import { crawlerGeneratorTools } from './crawlerGenerator.js';
78
77
  import { nodejsTools } from './nodejs.js';
78
+ import { executePythonCode } from './python.js';
79
79
  import { hookManagerTools } from './hookManager.js';
80
80
  import { scratchpadTools } from './scratchpad.js';
81
81
 
@@ -143,8 +143,12 @@ export const coreTools = [
143
143
  ...evolveTools,
144
144
  // Node.js 执行(委托前快速验证假设)- 已添加网络请求防护
145
145
  ...nodejsTools,
146
+ // Python 执行(用于加密验证、数据处理等任务)
147
+ executePythonCode,
146
148
  // 工作记忆
147
149
  ...scratchpadTools,
148
150
  // 爬虫代码生成(带 HITL 确认)
149
151
  ...crawlerGeneratorTools,
152
+ // 页面交互(自主数据搜寻:滚动加载、点击触发请求)
153
+ clickElement, scrollPage, fillInput, getInteractiveElements, getPageInfo, hoverElement, pressKey,
150
154
  ];
@@ -21,7 +21,7 @@ export const generatePatch = tool(
21
21
  description: '为缺失的环境属性生成补丁代码。',
22
22
  schema: z.object({
23
23
  property: z.string().describe('缺失的属性路径,如 navigator.userAgent'),
24
- context: z.record(z.string(), z.unknown()).optional().describe('上下文信息'),
24
+ context: z.object({}).passthrough().optional().describe('上下文信息'),
25
25
  }),
26
26
  }
27
27
  );
@@ -30,7 +30,7 @@ export const saveToStore = tool(
30
30
  type: z.enum(['env-module', 'crypto-pattern', 'obfuscation']).describe('类型'),
31
31
  name: z.string().describe('名称'),
32
32
  code: z.string().describe('代码'),
33
- metadata: z.record(z.string(), z.unknown()).optional().describe('元数据'),
33
+ metadata: z.object({}).passthrough().optional().describe('元数据'),
34
34
  }),
35
35
  }
36
36
  );
@@ -206,7 +206,11 @@ export class BrowserClient extends EventEmitter {
206
206
 
207
207
  try {
208
208
  // 通过简单的 Runtime.evaluate 验证 session 是否还活着
209
- await this.cdpSession.send('Runtime.evaluate', { expression: '1' });
209
+ // 必须加超时:页面 loading/断点暂停时 Runtime.evaluate 会永远挂住
210
+ await Promise.race([
211
+ this.cdpSession.send('Runtime.evaluate', { expression: '1' }),
212
+ new Promise((_, reject) => setTimeout(() => reject(new Error('CDP health check timeout')), 3000)),
213
+ ]);
210
214
  this._cdpLastCheck = now;
211
215
  return this.cdpSession;
212
216
  } catch {
@@ -657,6 +657,22 @@ export function getAnalysisPanelScript() {
657
657
  .deepspider-confirm-no {
658
658
  background: rgba(255,255,255,0.05); border: 1px solid rgba(255,255,255,0.15); color: #8b949e;
659
659
  }
660
+ /* 恢复 session 横幅 */
661
+ .deepspider-resume-banner {
662
+ background: rgba(99, 179, 237, 0.08); border: 1px solid rgba(99, 179, 237, 0.2);
663
+ border-radius: 10px; padding: 12px 14px; margin: 4px 0;
664
+ }
665
+ .deepspider-resume-btn {
666
+ width: 100%; padding: 8px; border-radius: 8px; border: none;
667
+ background: linear-gradient(135deg, #63b3ed, #4299e1); color: #fff;
668
+ font-size: 12px; font-weight: 500; cursor: pointer; transition: all 0.2s;
669
+ }
670
+ .deepspider-resume-btn:hover { opacity: 0.85; }
671
+ .deepspider-resume-dismiss {
672
+ width: 100%; padding: 6px; border: none; background: none;
673
+ color: #8b949e; font-size: 11px; cursor: pointer; margin-top: 4px;
674
+ }
675
+ .deepspider-resume-dismiss:hover { color: #c9d1d9; }
660
676
  .deepspider-msg-system {
661
677
  background: transparent;
662
678
  text-align: center;
@@ -1270,6 +1286,7 @@ export function getAnalysisPanelScript() {
1270
1286
  bindFilePathClicks(messagesEl);
1271
1287
  bindChoiceClicks(messagesEl);
1272
1288
  bindConfirmClicks(messagesEl);
1289
+ bindResumeClicks(messagesEl);
1273
1290
  }
1274
1291
  messagesEl.scrollTop = messagesEl.scrollHeight;
1275
1292
  }
@@ -1286,6 +1303,10 @@ export function getAnalysisPanelScript() {
1286
1303
  return renderChoicesMessage(m);
1287
1304
  case 'confirm':
1288
1305
  return renderConfirmMessage(m);
1306
+ case 'resume-available':
1307
+ return renderResumeMessage(m);
1308
+ case 'file-saved':
1309
+ return renderFileSavedMessage(m);
1289
1310
  default:
1290
1311
  return '<div class="deepspider-msg deepspider-msg-system">' + escapeHtml(JSON.stringify(m.data)) + '</div>';
1291
1312
  }
@@ -1326,6 +1347,32 @@ export function getAnalysisPanelScript() {
1326
1347
  return html;
1327
1348
  }
1328
1349
 
1350
+ function renderResumeMessage(m) {
1351
+ if (m.answered) return '';
1352
+ const d = m.data;
1353
+ return '<div class="deepspider-resume-banner">' +
1354
+ '<div style="margin-bottom:6px;">检测到上次未完成的分析</div>' +
1355
+ '<div style="font-size:11px;color:#8b949e;margin-bottom:8px;">' +
1356
+ escapeHtml(d.domain) + ' · ' + escapeHtml(d.timeAgo) + ' · ' + escapeHtml(String(d.messageCount)) + '条消息</div>' +
1357
+ '<button class="deepspider-resume-btn" data-resume-thread="' + escapeHtml(d.threadId) + '">恢复上次分析</button>' +
1358
+ '<button class="deepspider-resume-dismiss" data-resume-dismiss="true">忽略</button>' +
1359
+ '</div>';
1360
+ }
1361
+
1362
+ function renderFileSavedMessage(m) {
1363
+ var d = m.data;
1364
+ var icon = d.type === 'py' ? '🐍' : d.type === 'report' ? '📊' : '📄';
1365
+ var label = d.type === 'py' ? 'Python 脚本' : d.type === 'report' ? '分析报告' : '文件';
1366
+ return '<div class="deepspider-msg deepspider-msg-system" style="background:#1a2332;border-left:3px solid #388bfd;padding:8px 12px;">' +
1367
+ '<div style="display:flex;align-items:center;gap:6px;">' +
1368
+ '<span>' + icon + '</span>' +
1369
+ '<span style="color:#58a6ff;">' + escapeHtml(label) + '已保存</span>' +
1370
+ '</div>' +
1371
+ '<div class="deepspider-file-path" style="font-size:11px;color:#8b949e;margin-top:4px;cursor:pointer;" data-file-path="' + escapeHtml(d.path) + '">' +
1372
+ escapeHtml(d.path) +
1373
+ '</div></div>';
1374
+ }
1375
+
1329
1376
  function bindChoiceClicks(container) {
1330
1377
  container.querySelectorAll('.deepspider-choice-btn:not([style*="pointer-events"])').forEach(btn => {
1331
1378
  btn.onclick = () => {
@@ -1367,6 +1414,31 @@ export function getAnalysisPanelScript() {
1367
1414
  });
1368
1415
  }
1369
1416
 
1417
+ function bindResumeClicks(container) {
1418
+ container.querySelectorAll('.deepspider-resume-btn').forEach(btn => {
1419
+ btn.onclick = () => {
1420
+ const threadId = btn.dataset.resumeThread;
1421
+ const msgs = deepspider.chatMessages;
1422
+ for (let i = msgs.length - 1; i >= 0; i--) {
1423
+ if (msgs[i].type === 'resume-available') { msgs[i].answered = true; break; }
1424
+ }
1425
+ addMessage('system', '正在恢复上次分析...');
1426
+ if (typeof __deepspider_send__ === 'function') {
1427
+ __deepspider_send__(JSON.stringify({ __ds__: true, type: 'resume', threadId }));
1428
+ }
1429
+ };
1430
+ });
1431
+ container.querySelectorAll('.deepspider-resume-dismiss').forEach(btn => {
1432
+ btn.onclick = () => {
1433
+ const msgs = deepspider.chatMessages;
1434
+ for (let i = msgs.length - 1; i >= 0; i--) {
1435
+ if (msgs[i].type === 'resume-available') { msgs[i].answered = true; break; }
1436
+ }
1437
+ renderMessages();
1438
+ };
1439
+ });
1440
+ }
1441
+
1370
1442
  function escapeHtml(str) {
1371
1443
  return String(str).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
1372
1444
  }