deepspider 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/README.md +13 -13
- package/package.json +6 -6
- package/src/agent/core/PanelBridge.js +29 -77
- package/src/agent/core/StreamHandler.js +139 -14
- package/src/agent/index.js +51 -12
- package/src/agent/logger.js +184 -9
- package/src/agent/middleware/report.js +42 -16
- package/src/agent/middleware/subagent.js +233 -0
- package/src/agent/middleware/toolGuard.js +77 -0
- package/src/agent/middleware/validationWorkflow.js +171 -0
- package/src/agent/prompts/system.js +181 -59
- package/src/agent/run.js +41 -6
- package/src/agent/skills/crawler/SKILL.md +64 -3
- package/src/agent/skills/crawler/evolved.md +9 -1
- package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
- package/src/agent/skills/env/SKILL.md +75 -0
- package/src/agent/skills/evolve.js +0 -3
- package/src/agent/skills/sandbox/SKILL.md +35 -0
- package/src/agent/skills/static-analysis/SKILL.md +98 -2
- package/src/agent/subagents/anti-detect.js +10 -20
- package/src/agent/subagents/captcha.js +7 -19
- package/src/agent/subagents/crawler.js +25 -37
- package/src/agent/subagents/factory.js +109 -9
- package/src/agent/subagents/index.js +4 -13
- package/src/agent/subagents/js2python.js +7 -19
- package/src/agent/subagents/reverse.js +180 -0
- package/src/agent/tools/analysis.js +84 -1
- package/src/agent/tools/anti-detect.js +5 -2
- package/src/agent/tools/browser.js +160 -0
- package/src/agent/tools/captcha.js +1 -1
- package/src/agent/tools/capture.js +24 -3
- package/src/agent/tools/correlate.js +129 -15
- package/src/agent/tools/crawler.js +2 -1
- package/src/agent/tools/crawlerGenerator.js +90 -0
- package/src/agent/tools/debug.js +43 -6
- package/src/agent/tools/evolve.js +6 -3
- package/src/agent/tools/extractor.js +5 -1
- package/src/agent/tools/file.js +16 -7
- package/src/agent/tools/generateHook.js +66 -0
- package/src/agent/tools/hookManager.js +19 -9
- package/src/agent/tools/index.js +33 -20
- package/src/agent/tools/nodejs.js +41 -6
- package/src/agent/tools/python.js +4 -4
- package/src/agent/tools/report.js +2 -2
- package/src/agent/tools/runtime.js +1 -1
- package/src/agent/tools/sandbox.js +21 -1
- package/src/agent/tools/scratchpad.js +70 -0
- package/src/agent/tools/tracing.js +26 -0
- package/src/agent/tools/verifyAlgorithm.js +117 -0
- package/src/analyzer/EncryptionAnalyzer.js +2 -2
- package/src/browser/EnvBridge.js +27 -13
- package/src/browser/client.js +124 -18
- package/src/browser/collector.js +101 -22
- package/src/browser/defaultHooks.js +3 -1
- package/src/browser/hooks/index.js +5 -0
- package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
- package/src/browser/interceptors/NetworkInterceptor.js +77 -13
- package/src/browser/interceptors/ScriptInterceptor.js +34 -9
- package/src/browser/interceptors/index.js +1 -0
- package/src/browser/ui/analysisPanel.js +469 -464
- package/src/cli/commands/config.js +11 -3
- package/src/config/paths.js +9 -1
- package/src/config/settings.js +7 -1
- package/src/core/PatchGenerator.js +26 -6
- package/src/core/Sandbox.js +140 -3
- package/src/env/EnvCodeGenerator.js +60 -88
- package/src/env/modules/bom/history.js +6 -0
- package/src/env/modules/bom/location.js +6 -0
- package/src/env/modules/bom/navigator.js +13 -0
- package/src/env/modules/bom/screen.js +6 -0
- package/src/env/modules/bom/storage.js +7 -0
- package/src/env/modules/dom/document.js +14 -0
- package/src/env/modules/dom/event.js +4 -0
- package/src/env/modules/index.js +27 -10
- package/src/env/modules/webapi/fetch.js +4 -0
- package/src/env/modules/webapi/url.js +4 -0
- package/src/env/modules/webapi/xhr.js +8 -0
- package/src/store/DataStore.js +130 -47
- package/src/store/Store.js +2 -1
- package/src/agent/subagents/dynamic.js +0 -64
- package/src/agent/subagents/env-agent.js +0 -82
- package/src/agent/subagents/sandbox.js +0 -55
- package/src/agent/subagents/static.js +0 -66
|
@@ -406,10 +406,169 @@ export const getCookies = tool(
|
|
|
406
406
|
}
|
|
407
407
|
);
|
|
408
408
|
|
|
409
|
+
/**
|
|
410
|
+
* CSS 选择器转义(Node.js 没有 CSS.escape)
|
|
411
|
+
*/
|
|
412
|
+
function cssEscape(str) {
|
|
413
|
+
return str.replace(/([^\w-])/g, '\\$1');
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
/**
|
|
417
|
+
* 转义属性选择器中的值(双引号)
|
|
418
|
+
*/
|
|
419
|
+
function escapeAttrValue(str) {
|
|
420
|
+
return str.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* 可交互角色白名单
|
|
425
|
+
*/
|
|
426
|
+
const INTERACTIVE_ROLES = new Set([
|
|
427
|
+
'button', 'link', 'menuitem', 'tab',
|
|
428
|
+
'checkbox', 'radio', 'combobox', 'textbox',
|
|
429
|
+
'switch', 'option', 'menuitemcheckbox', 'menuitemradio',
|
|
430
|
+
'searchbox', 'spinbutton', 'slider',
|
|
431
|
+
]);
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* 从 DOM.describeNode 结果生成 CSS 选择器
|
|
435
|
+
*/
|
|
436
|
+
function buildSelector(nodeInfo) {
|
|
437
|
+
const { localName, attributes } = nodeInfo;
|
|
438
|
+
if (!localName) return '';
|
|
439
|
+
|
|
440
|
+
// attributes 是 [key, value, key, value, ...] 扁平数组
|
|
441
|
+
const attrs = {};
|
|
442
|
+
for (let i = 0; i < (attributes || []).length; i += 2) {
|
|
443
|
+
attrs[attributes[i]] = attributes[i + 1];
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// 优先用 id
|
|
447
|
+
if (attrs.id) return `#${cssEscape(attrs.id)}`;
|
|
448
|
+
|
|
449
|
+
// 其次用 data-* 属性
|
|
450
|
+
for (const [k, v] of Object.entries(attrs)) {
|
|
451
|
+
if (k.startsWith('data-') && v) return `${localName}[${k}="${escapeAttrValue(v)}"]`;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// 用 aria-label
|
|
455
|
+
if (attrs['aria-label']) {
|
|
456
|
+
return `${localName}[aria-label="${escapeAttrValue(attrs['aria-label'])}"]`;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// 用 name 属性(input/select 等)
|
|
460
|
+
if (attrs.name) {
|
|
461
|
+
return `${localName}[name="${escapeAttrValue(attrs.name)}"]`;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// 用 class(拼接所有类名提高唯一性)
|
|
465
|
+
if (attrs.class) {
|
|
466
|
+
const classes = attrs.class.split(/\s+/).filter(Boolean);
|
|
467
|
+
if (classes.length) return `${localName}.${classes.map(c => cssEscape(c)).join('.')}`;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// 兜底用 tagName
|
|
471
|
+
return localName;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* 获取页面可交互元素列表 - CDP Accessibility Tree
|
|
476
|
+
*/
|
|
477
|
+
export const getInteractiveElements = tool(
|
|
478
|
+
async ({ roles, limit }) => {
|
|
479
|
+
const browser = await getBrowser();
|
|
480
|
+
const cdp = await browser.getCDPSession();
|
|
481
|
+
if (!cdp) throw new Error('CDP session not available');
|
|
482
|
+
|
|
483
|
+
// 启用所需 CDP 域
|
|
484
|
+
await cdp.send('Accessibility.enable');
|
|
485
|
+
await cdp.send('DOM.enable');
|
|
486
|
+
|
|
487
|
+
// 获取完整无障碍树
|
|
488
|
+
const { nodes } = await cdp.send('Accessibility.getFullAXTree');
|
|
489
|
+
|
|
490
|
+
// 确定过滤角色集
|
|
491
|
+
const filterRoles = roles?.length
|
|
492
|
+
? new Set(roles)
|
|
493
|
+
: INTERACTIVE_ROLES;
|
|
494
|
+
|
|
495
|
+
// 过滤可交互节点
|
|
496
|
+
const candidates = [];
|
|
497
|
+
for (const node of nodes) {
|
|
498
|
+
if (node.ignored) continue;
|
|
499
|
+
const role = node.role?.value;
|
|
500
|
+
if (!role || !filterRoles.has(role)) continue;
|
|
501
|
+
candidates.push(node);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
const totalBeforeTruncate = candidates.length;
|
|
505
|
+
const truncated = candidates.length > limit;
|
|
506
|
+
const selected = candidates.slice(0, limit);
|
|
507
|
+
|
|
508
|
+
// 并发获取选择器(分批,每批 20)
|
|
509
|
+
const BATCH = 20;
|
|
510
|
+
const elements = [];
|
|
511
|
+
for (let i = 0; i < selected.length; i += BATCH) {
|
|
512
|
+
const batch = selected.slice(i, i + BATCH);
|
|
513
|
+
const results = await Promise.all(batch.map(async (node) => {
|
|
514
|
+
// 提取属性
|
|
515
|
+
const props = {};
|
|
516
|
+
for (const p of (node.properties || [])) {
|
|
517
|
+
props[p.name] = p.value?.value;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// 通过 backendDOMNodeId 获取选择器
|
|
521
|
+
let selector = '';
|
|
522
|
+
if (node.backendDOMNodeId) {
|
|
523
|
+
try {
|
|
524
|
+
const desc = await cdp.send('DOM.describeNode', {
|
|
525
|
+
backendNodeId: node.backendDOMNodeId,
|
|
526
|
+
});
|
|
527
|
+
selector = buildSelector(desc.node);
|
|
528
|
+
} catch {
|
|
529
|
+
// 节点可能已从 DOM 移除
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
return {
|
|
534
|
+
role: node.role?.value,
|
|
535
|
+
name: node.name?.value || '',
|
|
536
|
+
selector,
|
|
537
|
+
clickable: !props.disabled,
|
|
538
|
+
disabled: !!props.disabled,
|
|
539
|
+
focused: !!props.focused,
|
|
540
|
+
description: node.description?.value || '',
|
|
541
|
+
};
|
|
542
|
+
}));
|
|
543
|
+
elements.push(...results);
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
return JSON.stringify({
|
|
547
|
+
elements,
|
|
548
|
+
total: totalBeforeTruncate,
|
|
549
|
+
truncated,
|
|
550
|
+
});
|
|
551
|
+
},
|
|
552
|
+
{
|
|
553
|
+
name: 'get_interactive_elements',
|
|
554
|
+
description: `获取页面上所有可交互元素(按钮、链接、输入框等),基于 Accessibility Tree。
|
|
555
|
+
|
|
556
|
+
用途:在点击/操作元素前,先调用此工具了解页面上有哪些可交互元素,避免盲目猜测选择器。
|
|
557
|
+
返回的 selector 可直接传给 click_element / fill_input 使用。
|
|
558
|
+
|
|
559
|
+
支持的角色:button, link, menuitem, tab, checkbox, radio, combobox, textbox, switch, option, searchbox, spinbutton, slider 等。`,
|
|
560
|
+
schema: z.object({
|
|
561
|
+
roles: z.array(z.string()).optional().describe('过滤角色列表,如 ["button", "link"],不传则返回所有可交互角色'),
|
|
562
|
+
limit: z.number().default(100).describe('最大返回数量,默认 100,避免结果过大'),
|
|
563
|
+
}),
|
|
564
|
+
}
|
|
565
|
+
);
|
|
566
|
+
|
|
409
567
|
export const browserTools = [
|
|
410
568
|
clickElement,
|
|
411
569
|
fillInput,
|
|
412
570
|
waitForSelector,
|
|
571
|
+
takeScreenshot,
|
|
413
572
|
reloadPage,
|
|
414
573
|
goBack,
|
|
415
574
|
goForward,
|
|
@@ -420,4 +579,5 @@ export const browserTools = [
|
|
|
420
579
|
getPageSource,
|
|
421
580
|
getElementHtml,
|
|
422
581
|
getCookies,
|
|
582
|
+
getInteractiveElements,
|
|
423
583
|
];
|
|
@@ -106,7 +106,7 @@ export const captchaSlideDetect = tool(
|
|
|
106
106
|
|
|
107
107
|
// 获取背景图和滑块图
|
|
108
108
|
const bgElement = await page.$(bg_selector);
|
|
109
|
-
const
|
|
109
|
+
const _slideElement = slide_selector ? await page.$(slide_selector) : null;
|
|
110
110
|
|
|
111
111
|
if (!bgElement) {
|
|
112
112
|
return JSON.stringify({ success: false, error: '未找到背景图' });
|
|
@@ -33,6 +33,15 @@ export const collectProperty = tool(
|
|
|
33
33
|
const browser = await getBrowser();
|
|
34
34
|
const collector = new EnvCollector(browser.getPage());
|
|
35
35
|
const data = await collector.collect(path, { depth });
|
|
36
|
+
|
|
37
|
+
// 变量未定义时追加 fallback 引导,引导 LLM 走断点路径
|
|
38
|
+
if (data?.success === false && /undefined|null/.test(data?.error || '')) {
|
|
39
|
+
return JSON.stringify({
|
|
40
|
+
...data,
|
|
41
|
+
hint: `⚠️ 变量 ${path} 未定义。该变量可能仅在特定函数执行时存在(如请求发起时动态赋值,执行后被清理)。\n请使用 set_breakpoint 在目标函数处设断点,断点命中后用 evaluate_at_breakpoint 采集变量值。不要猜测变量值。`,
|
|
42
|
+
}, null, 2);
|
|
43
|
+
}
|
|
44
|
+
|
|
36
45
|
return JSON.stringify(data, null, 2);
|
|
37
46
|
},
|
|
38
47
|
{
|
|
@@ -67,14 +76,26 @@ export const autoFixEnv = tool(
|
|
|
67
76
|
/**
|
|
68
77
|
* 通过 CDP 在页面执行 JS(复用 session)
|
|
69
78
|
*/
|
|
70
|
-
async function evaluateViaCDP(browser, expression) {
|
|
79
|
+
async function evaluateViaCDP(browser, expression, timeout = 5000) {
|
|
71
80
|
const cdp = await browser.getCDPSession();
|
|
72
81
|
if (!cdp) return null;
|
|
73
|
-
|
|
82
|
+
|
|
83
|
+
const evaluatePromise = cdp.send('Runtime.evaluate', {
|
|
74
84
|
expression,
|
|
75
85
|
returnByValue: true,
|
|
76
86
|
});
|
|
77
|
-
|
|
87
|
+
|
|
88
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
89
|
+
setTimeout(() => reject(new Error('CDP evaluate timeout')), timeout)
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
try {
|
|
93
|
+
const result = await Promise.race([evaluatePromise, timeoutPromise]);
|
|
94
|
+
return result.result?.value;
|
|
95
|
+
} catch (e) {
|
|
96
|
+
console.error('[evaluateViaCDP] 超时或错误:', e.message);
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
78
99
|
}
|
|
79
100
|
|
|
80
101
|
/**
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import { z } from 'zod';
|
|
7
7
|
import { tool } from '@langchain/core/tools';
|
|
8
|
+
import { getDataStore } from '../../store/DataStore.js';
|
|
8
9
|
|
|
9
10
|
/**
|
|
10
11
|
* 分析请求-加密关联
|
|
@@ -74,22 +75,40 @@ export const analyzeCorrelation = tool(
|
|
|
74
75
|
|
|
75
76
|
/**
|
|
76
77
|
* 解析调用栈顶部
|
|
78
|
+
* 支持两种格式:
|
|
79
|
+
* 1. 字符串栈(来自 Error.stack)
|
|
80
|
+
* 2. callFrames 数组(来自 CDP initiator)
|
|
77
81
|
*/
|
|
78
82
|
function parseStackTop(stack) {
|
|
79
83
|
if (!stack) return null;
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
84
|
+
|
|
85
|
+
// 处理 callFrames 数组格式(来自 CDP initiator)
|
|
86
|
+
if (Array.isArray(stack)) {
|
|
87
|
+
return stack.slice(0, 3).map(frame => ({
|
|
88
|
+
func: frame.functionName || frame.func || '(anonymous)',
|
|
89
|
+
file: frame.url || frame.file || '',
|
|
90
|
+
line: frame.lineNumber || frame.line || 0
|
|
91
|
+
}));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// 处理字符串栈格式(来自 Error.stack)
|
|
95
|
+
if (typeof stack === 'string') {
|
|
96
|
+
const lines = stack.split('\n').slice(2, 5);
|
|
97
|
+
return lines.map(line => {
|
|
98
|
+
const match = line.match(/at\s+(.+?)\s+\((.+?):(\d+):(\d+)\)/) ||
|
|
99
|
+
line.match(/at\s+(.+?):(\d+):(\d+)/);
|
|
100
|
+
if (match) {
|
|
101
|
+
return {
|
|
102
|
+
func: match[1] || 'anonymous',
|
|
103
|
+
file: match[2] || match[1],
|
|
104
|
+
line: parseInt(match[3] || match[2])
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
return { raw: line.trim() };
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return null;
|
|
93
112
|
}
|
|
94
113
|
|
|
95
114
|
/**
|
|
@@ -183,11 +202,12 @@ export const analyzeCookieEncryption = tool(
|
|
|
183
202
|
async ({ logs, cookieName }) => {
|
|
184
203
|
const parsed = typeof logs === 'string' ? JSON.parse(logs) : logs;
|
|
185
204
|
|
|
186
|
-
// 找到设置该 cookie
|
|
205
|
+
// 找到设置该 cookie 的日志(匹配 cookie 键名)
|
|
187
206
|
const cookieLogs = parsed.filter(entry => {
|
|
188
207
|
if (entry._type !== 'cookie') return false;
|
|
189
208
|
if (entry.action !== 'write') return false;
|
|
190
|
-
|
|
209
|
+
// cookie hook 日志的 value 格式为 "name=value",匹配键名部分
|
|
210
|
+
return entry.value?.startsWith(cookieName + '=') || entry.name === cookieName;
|
|
191
211
|
});
|
|
192
212
|
|
|
193
213
|
if (cookieLogs.length === 0) {
|
|
@@ -294,10 +314,104 @@ export const analyzeResponseDecryption = tool(
|
|
|
294
314
|
}
|
|
295
315
|
);
|
|
296
316
|
|
|
317
|
+
/**
|
|
318
|
+
* 识别加密模式
|
|
319
|
+
*/
|
|
320
|
+
function identifyPattern(value) {
|
|
321
|
+
if (/^[0-9a-fA-F]+$/.test(value)) {
|
|
322
|
+
if (value.length === 32) return 'hash-md5';
|
|
323
|
+
if (value.length === 40) return 'hash-sha1';
|
|
324
|
+
if (value.length === 64) return 'hash-sha256';
|
|
325
|
+
return 'hex';
|
|
326
|
+
}
|
|
327
|
+
if (/^[A-Za-z0-9+/]{20,}={0,2}$/.test(value)) return 'base64';
|
|
328
|
+
if (/^ey[A-Za-z0-9_-]+\./.test(value)) return 'jwt';
|
|
329
|
+
return 'unknown';
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* 判断值是否像加密结果
|
|
334
|
+
*/
|
|
335
|
+
function looksEncrypted(value) {
|
|
336
|
+
if (/^[0-9a-fA-F]{32,}$/.test(value)) return true;
|
|
337
|
+
if (/^[A-Za-z0-9+/]{20,}={0,2}$/.test(value)) return true;
|
|
338
|
+
if (/^ey[A-Za-z0-9_-]+\./.test(value)) return true;
|
|
339
|
+
return false;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* 解析请求 body
|
|
344
|
+
*/
|
|
345
|
+
function parseBody(body) {
|
|
346
|
+
if (!body) return {};
|
|
347
|
+
try {
|
|
348
|
+
return JSON.parse(body);
|
|
349
|
+
} catch {
|
|
350
|
+
// 尝试 form-urlencoded
|
|
351
|
+
try {
|
|
352
|
+
return Object.fromEntries(new URLSearchParams(body));
|
|
353
|
+
} catch {
|
|
354
|
+
return { _raw: body.slice(0, 200) };
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* 分析请求参数结构(不依赖 Hook 日志)
|
|
361
|
+
*/
|
|
362
|
+
export const analyzeRequestParams = tool(
|
|
363
|
+
async ({ site, id }) => {
|
|
364
|
+
const store = getDataStore();
|
|
365
|
+
const detail = await store.getResponse(site, id);
|
|
366
|
+
if (!detail) return JSON.stringify({ error: '未找到该请求' });
|
|
367
|
+
|
|
368
|
+
let urlParams = {};
|
|
369
|
+
try {
|
|
370
|
+
urlParams = Object.fromEntries(new URL(detail.url).searchParams);
|
|
371
|
+
} catch { /* invalid URL */ }
|
|
372
|
+
|
|
373
|
+
const bodyParams = parseBody(detail.requestBody);
|
|
374
|
+
|
|
375
|
+
// 识别可疑参数
|
|
376
|
+
const suspiciousParams = [];
|
|
377
|
+
const allParams = { ...urlParams, ...bodyParams };
|
|
378
|
+
for (const [key, value] of Object.entries(allParams)) {
|
|
379
|
+
const str = String(value);
|
|
380
|
+
if (str.length > 20 && looksEncrypted(str)) {
|
|
381
|
+
suspiciousParams.push({
|
|
382
|
+
name: key,
|
|
383
|
+
value: str.slice(0, 50) + (str.length > 50 ? '...' : ''),
|
|
384
|
+
length: str.length,
|
|
385
|
+
pattern: identifyPattern(str),
|
|
386
|
+
});
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
return JSON.stringify({
|
|
391
|
+
url: detail.url,
|
|
392
|
+
method: detail.method,
|
|
393
|
+
urlParams,
|
|
394
|
+
bodyParams: typeof bodyParams === 'object' && !bodyParams._raw
|
|
395
|
+
? bodyParams : { _raw: detail.requestBody?.slice(0, 200) },
|
|
396
|
+
suspiciousParams,
|
|
397
|
+
initiator: detail.initiator || null,
|
|
398
|
+
}, null, 2);
|
|
399
|
+
},
|
|
400
|
+
{
|
|
401
|
+
name: 'analyze_request_params',
|
|
402
|
+
description: '分析请求的参数结构,自动识别可疑的加密参数(hex/base64/hash)。不依赖 Hook 日志,可直接使用。',
|
|
403
|
+
schema: z.object({
|
|
404
|
+
site: z.string().describe('站点 hostname'),
|
|
405
|
+
id: z.string().describe('请求 ID'),
|
|
406
|
+
}),
|
|
407
|
+
}
|
|
408
|
+
);
|
|
409
|
+
|
|
297
410
|
export const correlateTools = [
|
|
298
411
|
analyzeCorrelation,
|
|
299
412
|
locateCryptoSource,
|
|
300
413
|
analyzeHeaderEncryption,
|
|
301
414
|
analyzeCookieEncryption,
|
|
302
415
|
analyzeResponseDecryption,
|
|
416
|
+
analyzeRequestParams,
|
|
303
417
|
];
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - 爬虫代码生成工具
|
|
3
|
+
* 通过 LangGraph interrupt 机制实现面板交互式选择
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import { tool } from '@langchain/core/tools';
|
|
8
|
+
import { interrupt } from '@langchain/langgraph';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* 请求用户选择爬虫框架并生成代码
|
|
12
|
+
* interrupt payload 遵循统一协议,StreamHandler 自动渲染到面板
|
|
13
|
+
*/
|
|
14
|
+
export const generateCrawlerWithConfirm = tool(
|
|
15
|
+
async ({ analysisSummary, domain }) => {
|
|
16
|
+
const userChoice = interrupt({
|
|
17
|
+
type: 'choices',
|
|
18
|
+
question: '分析完成!选择爬虫框架生成完整脚本:',
|
|
19
|
+
options: [
|
|
20
|
+
{ id: 'requests', label: 'requests', description: '简单易用,适合快速原型' },
|
|
21
|
+
{ id: 'httpx', label: 'httpx', description: '异步高性能,适合大规模并发' },
|
|
22
|
+
{ id: 'scrapy', label: 'Scrapy', description: '企业级框架,适合复杂项目' },
|
|
23
|
+
{ id: 'skip', label: '不需要', description: '仅保存当前分析结果' },
|
|
24
|
+
],
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
return JSON.stringify({
|
|
28
|
+
success: true,
|
|
29
|
+
framework: userChoice,
|
|
30
|
+
domain,
|
|
31
|
+
message: userChoice === '不需要'
|
|
32
|
+
? '用户选择不生成爬虫脚本'
|
|
33
|
+
: `用户选择使用 ${userChoice} 框架生成爬虫`,
|
|
34
|
+
});
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
name: 'generate_crawler_code',
|
|
38
|
+
description: `分析完成后,向用户展示可点击的框架选项(requests/httpx/Scrapy/不需要)。
|
|
39
|
+
|
|
40
|
+
用户点击后,工具返回用户选择的框架名称。根据返回值委托 crawler 子代理生成代码。`,
|
|
41
|
+
schema: z.object({
|
|
42
|
+
analysisSummary: z.string().describe('分析结果摘要'),
|
|
43
|
+
domain: z.string().describe('目标网站域名'),
|
|
44
|
+
}),
|
|
45
|
+
}
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* 根据用户选择的框架委托 crawler 子代理生成代码
|
|
50
|
+
*/
|
|
51
|
+
export const delegateCrawlerGeneration = tool(
|
|
52
|
+
async ({ framework, config, domain }) => {
|
|
53
|
+
return JSON.stringify({
|
|
54
|
+
success: true,
|
|
55
|
+
ready: true,
|
|
56
|
+
framework,
|
|
57
|
+
config,
|
|
58
|
+
domain,
|
|
59
|
+
message: `准备使用 ${framework} 框架生成爬虫,请调用 task 工具委托 crawler 子代理`,
|
|
60
|
+
});
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
name: 'delegate_crawler_generation',
|
|
64
|
+
description: '准备参数,委托 crawler 子代理生成特定框架的爬虫代码',
|
|
65
|
+
schema: z.object({
|
|
66
|
+
framework: z.enum(['requests', 'httpx', 'scrapy']).describe('用户选择的爬虫框架'),
|
|
67
|
+
config: z.object({
|
|
68
|
+
url: z.string(),
|
|
69
|
+
stages: z.array(z.object({
|
|
70
|
+
name: z.string(),
|
|
71
|
+
fields: z.array(z.object({
|
|
72
|
+
name: z.string(),
|
|
73
|
+
xpath: z.string(),
|
|
74
|
+
type: z.string(),
|
|
75
|
+
})),
|
|
76
|
+
entry: z.any().nullable(),
|
|
77
|
+
pagination: z.any().nullable(),
|
|
78
|
+
})),
|
|
79
|
+
}).describe('爬虫配置'),
|
|
80
|
+
domain: z.string().describe('目标网站域名'),
|
|
81
|
+
}),
|
|
82
|
+
}
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
export const crawlerGeneratorTools = [
|
|
86
|
+
generateCrawlerWithConfirm,
|
|
87
|
+
delegateCrawlerGeneration,
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
export default crawlerGeneratorTools;
|
package/src/agent/tools/debug.js
CHANGED
|
@@ -6,6 +6,7 @@ import { z } from 'zod';
|
|
|
6
6
|
import { tool } from '@langchain/core/tools';
|
|
7
7
|
import { getBrowser } from '../../browser/index.js';
|
|
8
8
|
import { CDPSession } from '../../browser/cdp.js';
|
|
9
|
+
import { logStore } from '../logger.js';
|
|
9
10
|
|
|
10
11
|
let cdpSession = null;
|
|
11
12
|
let isPaused = false;
|
|
@@ -19,18 +20,26 @@ async function getSession() {
|
|
|
19
20
|
const browser = await getBrowser();
|
|
20
21
|
cdpSession = await CDPSession.fromBrowser(browser);
|
|
21
22
|
|
|
22
|
-
//
|
|
23
|
+
// 过滤反调试 debugger 语句的噪音:只在命中我们设的断点时打日志
|
|
24
|
+
let lastPauseIsBreakpoint = false;
|
|
25
|
+
|
|
23
26
|
cdpSession.on('Debugger.paused', (params) => {
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
lastPauseIsBreakpoint = params.reason === 'breakpoint' || params.hitBreakpoints?.length > 0;
|
|
28
|
+
if (lastPauseIsBreakpoint) {
|
|
29
|
+
isPaused = true;
|
|
30
|
+
currentCallFrames = params.callFrames || [];
|
|
31
|
+
const top = currentCallFrames[0];
|
|
32
|
+
const func = top?.functionName || '(anonymous)';
|
|
33
|
+
const url = top?.url?.split('/').pop() || top?.url || '?';
|
|
34
|
+
const line = top?.location?.lineNumber ?? '?';
|
|
35
|
+
console.log(`[debug] Breakpoint hit: ${func} @ ${url}:${line}`);
|
|
36
|
+
}
|
|
27
37
|
});
|
|
28
38
|
|
|
29
|
-
// 监听恢复事件
|
|
30
39
|
cdpSession.on('Debugger.resumed', () => {
|
|
31
40
|
isPaused = false;
|
|
32
41
|
currentCallFrames = [];
|
|
33
|
-
|
|
42
|
+
lastPauseIsBreakpoint = false;
|
|
34
43
|
});
|
|
35
44
|
}
|
|
36
45
|
return cdpSession;
|
|
@@ -235,6 +244,33 @@ export const stepOver = tool(
|
|
|
235
244
|
}
|
|
236
245
|
);
|
|
237
246
|
|
|
247
|
+
/**
|
|
248
|
+
* 查询 Agent 执行日志
|
|
249
|
+
*/
|
|
250
|
+
export const getAgentLogs = tool(
|
|
251
|
+
async ({ category, level, limit, toolName }) => {
|
|
252
|
+
if (category === 'stats') {
|
|
253
|
+
return JSON.stringify(logStore.getStats(), null, 2);
|
|
254
|
+
}
|
|
255
|
+
const logs = logStore.query({ category, level, limit, toolName });
|
|
256
|
+
return JSON.stringify(logs, null, 2);
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
name: 'get_agent_logs',
|
|
260
|
+
description: '获取当前 Agent 会话的执行日志,包括 LLM 调用、工具调用、错误等。用于调试和分析 Agent 执行过程。category=stats 可获取统计概览。',
|
|
261
|
+
schema: z.object({
|
|
262
|
+
category: z.enum(['LLM', 'TOOL', 'CHAIN', 'AGENT', 'stats']).optional()
|
|
263
|
+
.describe('日志类别:LLM/TOOL/CHAIN/AGENT,或 stats 获取统计'),
|
|
264
|
+
level: z.enum(['INFO', 'DEBUG', 'ERROR']).optional()
|
|
265
|
+
.describe('日志级别'),
|
|
266
|
+
limit: z.number().optional().default(50)
|
|
267
|
+
.describe('返回条数(默认50,最近的N条)'),
|
|
268
|
+
toolName: z.string().optional()
|
|
269
|
+
.describe('按工具名过滤(仅 TOOL 类别有效)'),
|
|
270
|
+
}),
|
|
271
|
+
}
|
|
272
|
+
);
|
|
273
|
+
|
|
238
274
|
export const debugTools = [
|
|
239
275
|
setBreakpoint,
|
|
240
276
|
setXHRBreakpoint,
|
|
@@ -243,4 +279,5 @@ export const debugTools = [
|
|
|
243
279
|
evaluateAtBreakpoint,
|
|
244
280
|
resumeExecution,
|
|
245
281
|
stepOver,
|
|
282
|
+
getAgentLogs,
|
|
246
283
|
];
|
|
@@ -23,6 +23,9 @@ function getSkillPath(skillName) {
|
|
|
23
23
|
'sandbox': SKILLS.sandbox,
|
|
24
24
|
'env': SKILLS.env,
|
|
25
25
|
'js2python': SKILLS.js2python,
|
|
26
|
+
'crawler': SKILLS.crawler,
|
|
27
|
+
'captcha': SKILLS.captcha,
|
|
28
|
+
'anti-detect': SKILLS.antiDetect,
|
|
26
29
|
'report': SKILLS.report,
|
|
27
30
|
'general': SKILLS.general,
|
|
28
31
|
};
|
|
@@ -83,7 +86,7 @@ export const evolveSkill = tool(
|
|
|
83
86
|
if (!skillInfo) {
|
|
84
87
|
return JSON.stringify({
|
|
85
88
|
success: false,
|
|
86
|
-
error: `未知的 skill: ${skill}。可用: static-analysis, dynamic-analysis, sandbox, env, js2python, report, general。或使用 new:<name> 创建新 skill。`
|
|
89
|
+
error: `未知的 skill: ${skill}。可用: static-analysis, dynamic-analysis, sandbox, env, js2python, crawler, captcha, anti-detect, report, general。或使用 new:<name> 创建新 skill。`
|
|
87
90
|
});
|
|
88
91
|
}
|
|
89
92
|
|
|
@@ -100,7 +103,7 @@ export const evolveSkill = tool(
|
|
|
100
103
|
let content = '';
|
|
101
104
|
try {
|
|
102
105
|
content = fs.readFileSync(evolvedPath, 'utf-8');
|
|
103
|
-
} catch
|
|
106
|
+
} catch {
|
|
104
107
|
// 文件不存在,使用空内容
|
|
105
108
|
}
|
|
106
109
|
|
|
@@ -152,7 +155,7 @@ export const evolveSkill = tool(
|
|
|
152
155
|
name: 'evolve_skill',
|
|
153
156
|
description: '记录分析过程中学到的经验。支持现有 skill 或 new:<name> 创建新 skill',
|
|
154
157
|
schema: z.object({
|
|
155
|
-
skill: z.string().describe('目标 skill: static-analysis, dynamic-analysis, sandbox, env, js2python, report, general,或 new:<name> 创建新 skill'),
|
|
158
|
+
skill: z.string().describe('目标 skill: static-analysis, dynamic-analysis, sandbox, env, js2python, crawler, captcha, anti-detect, report, general,或 new:<name> 创建新 skill'),
|
|
156
159
|
title: z.string().describe('经验标题,简短描述'),
|
|
157
160
|
scenario: z.string().describe('具体场景/案例'),
|
|
158
161
|
insight: z.string().describe('一句话总结经验'),
|
|
@@ -34,16 +34,20 @@ export const listFunctions = tool(
|
|
|
34
34
|
*/
|
|
35
35
|
export const getFunctionCode = tool(
|
|
36
36
|
async ({ code, funcName }) => {
|
|
37
|
+
// buildDependencyGraph 先调用,extractSlice 内部会复用 this.ast 缓存
|
|
38
|
+
const graph = astAnalyzer.buildDependencyGraph(code);
|
|
39
|
+
const deps = graph.get(funcName) || [];
|
|
37
40
|
const slice = astAnalyzer.extractSlice(code, funcName);
|
|
38
41
|
return JSON.stringify({
|
|
39
42
|
funcName,
|
|
40
43
|
found: !!slice,
|
|
41
44
|
code: slice || '未找到该函数',
|
|
45
|
+
dependencies: deps,
|
|
42
46
|
}, null, 2);
|
|
43
47
|
},
|
|
44
48
|
{
|
|
45
49
|
name: 'get_function_code',
|
|
46
|
-
description: '
|
|
50
|
+
description: '提取指定函数的完整代码(含递归依赖函数和全局变量)。返回可独立运行的代码片段 + 依赖函数列表',
|
|
47
51
|
schema: z.object({
|
|
48
52
|
code: z.string().describe('源代码'),
|
|
49
53
|
funcName: z.string().describe('函数名'),
|