deepspider 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/README.md +21 -15
- package/package.json +9 -7
- package/src/agent/core/PanelBridge.js +56 -78
- package/src/agent/core/StreamHandler.js +244 -20
- package/src/agent/index.js +120 -23
- package/src/agent/logger.js +183 -8
- package/src/agent/middleware/memoryFlush.js +48 -0
- package/src/agent/middleware/report.js +95 -37
- package/src/agent/middleware/subagent.js +236 -0
- package/src/agent/middleware/toolAvailability.js +37 -0
- package/src/agent/middleware/toolGuard.js +187 -0
- package/src/agent/middleware/validationWorkflow.js +171 -0
- package/src/agent/prompts/system.js +310 -59
- package/src/agent/run.js +168 -20
- package/src/agent/sessions.js +88 -0
- package/src/agent/skills/anti-detect/SKILL.md +89 -14
- package/src/agent/skills/captcha/SKILL.md +93 -19
- package/src/agent/skills/crawler/SKILL.md +64 -3
- package/src/agent/skills/crawler/evolved.md +9 -1
- package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
- package/src/agent/skills/env/SKILL.md +75 -0
- package/src/agent/skills/js2python/evolved.md +5 -1
- package/src/agent/skills/sandbox/SKILL.md +35 -0
- package/src/agent/skills/static-analysis/SKILL.md +98 -2
- package/src/agent/skills/static-analysis/evolved.md +5 -1
- package/src/agent/subagents/anti-detect.js +36 -24
- package/src/agent/subagents/captcha.js +35 -28
- package/src/agent/subagents/crawler.js +40 -105
- package/src/agent/subagents/factory.js +129 -9
- package/src/agent/subagents/index.js +4 -13
- package/src/agent/subagents/js2python.js +25 -35
- package/src/agent/subagents/reverse.js +180 -0
- package/src/agent/tools/analysis.js +101 -8
- package/src/agent/tools/anti-detect.js +5 -2
- package/src/agent/tools/browser.js +186 -13
- package/src/agent/tools/capture.js +24 -3
- package/src/agent/tools/correlate.js +129 -15
- package/src/agent/tools/crawler.js +3 -2
- package/src/agent/tools/crawlerGenerator.js +90 -0
- package/src/agent/tools/debug.js +43 -6
- package/src/agent/tools/evolve.js +5 -2
- package/src/agent/tools/extractor.js +5 -1
- package/src/agent/tools/file.js +14 -5
- package/src/agent/tools/generateHook.js +66 -0
- package/src/agent/tools/hookManager.js +19 -9
- package/src/agent/tools/index.js +36 -21
- package/src/agent/tools/nodejs.js +41 -6
- package/src/agent/tools/patch.js +1 -1
- package/src/agent/tools/sandbox.js +21 -1
- package/src/agent/tools/scratchpad.js +70 -0
- package/src/agent/tools/store.js +1 -1
- package/src/agent/tools/tracing.js +26 -0
- package/src/agent/tools/verifyAlgorithm.js +117 -0
- package/src/browser/EnvBridge.js +27 -13
- package/src/browser/client.js +128 -18
- package/src/browser/collector.js +101 -22
- package/src/browser/defaultHooks.js +3 -1
- package/src/browser/hooks/index.js +5 -0
- package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
- package/src/browser/interceptors/NetworkInterceptor.js +76 -12
- package/src/browser/interceptors/ScriptInterceptor.js +32 -7
- package/src/browser/interceptors/index.js +1 -0
- package/src/browser/ui/analysisPanel.js +541 -464
- package/src/cli/commands/config.js +11 -3
- package/src/config/paths.js +9 -1
- package/src/config/settings.js +7 -1
- package/src/core/PatchGenerator.js +24 -4
- package/src/core/Sandbox.js +140 -3
- package/src/env/EnvCodeGenerator.js +60 -88
- package/src/env/modules/bom/history.js +6 -0
- package/src/env/modules/bom/location.js +6 -0
- package/src/env/modules/bom/navigator.js +13 -0
- package/src/env/modules/bom/screen.js +6 -0
- package/src/env/modules/bom/storage.js +7 -0
- package/src/env/modules/dom/document.js +14 -0
- package/src/env/modules/dom/event.js +4 -0
- package/src/env/modules/index.js +27 -10
- package/src/env/modules/webapi/fetch.js +4 -0
- package/src/env/modules/webapi/url.js +4 -0
- package/src/env/modules/webapi/xhr.js +8 -0
- package/src/store/DataStore.js +125 -42
- package/src/store/Store.js +2 -1
- package/src/agent/subagents/dynamic.js +0 -64
- package/src/agent/subagents/env-agent.js +0 -82
- package/src/agent/subagents/sandbox.js +0 -55
- package/src/agent/subagents/static.js +0 -66
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - 爬虫代码生成工具
|
|
3
|
+
* 通过 LangGraph interrupt 机制实现面板交互式选择
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import { tool } from '@langchain/core/tools';
|
|
8
|
+
import { interrupt } from '@langchain/langgraph';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* 请求用户选择爬虫框架并生成代码
|
|
12
|
+
* interrupt payload 遵循统一协议,StreamHandler 自动渲染到面板
|
|
13
|
+
*/
|
|
14
|
+
export const generateCrawlerWithConfirm = tool(
|
|
15
|
+
async ({ analysisSummary, domain }) => {
|
|
16
|
+
const userChoice = interrupt({
|
|
17
|
+
type: 'choices',
|
|
18
|
+
question: '分析完成!选择爬虫框架生成完整脚本:',
|
|
19
|
+
options: [
|
|
20
|
+
{ id: 'requests', label: 'requests', description: '简单易用,适合快速原型' },
|
|
21
|
+
{ id: 'httpx', label: 'httpx', description: '异步高性能,适合大规模并发' },
|
|
22
|
+
{ id: 'scrapy', label: 'Scrapy', description: '企业级框架,适合复杂项目' },
|
|
23
|
+
{ id: 'skip', label: '不需要', description: '仅保存当前分析结果' },
|
|
24
|
+
],
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
return JSON.stringify({
|
|
28
|
+
success: true,
|
|
29
|
+
framework: userChoice,
|
|
30
|
+
domain,
|
|
31
|
+
message: userChoice === '不需要'
|
|
32
|
+
? '用户选择不生成爬虫脚本'
|
|
33
|
+
: `用户选择使用 ${userChoice} 框架生成爬虫`,
|
|
34
|
+
});
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
name: 'generate_crawler_code',
|
|
38
|
+
description: `分析完成后,向用户展示可点击的框架选项(requests/httpx/Scrapy/不需要)。
|
|
39
|
+
|
|
40
|
+
用户点击后,工具返回用户选择的框架名称。根据返回值委托 crawler 子代理生成代码。`,
|
|
41
|
+
schema: z.object({
|
|
42
|
+
analysisSummary: z.string().describe('分析结果摘要'),
|
|
43
|
+
domain: z.string().describe('目标网站域名'),
|
|
44
|
+
}),
|
|
45
|
+
}
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* 根据用户选择的框架委托 crawler 子代理生成代码
|
|
50
|
+
*/
|
|
51
|
+
export const delegateCrawlerGeneration = tool(
|
|
52
|
+
async ({ framework, config, domain }) => {
|
|
53
|
+
return JSON.stringify({
|
|
54
|
+
success: true,
|
|
55
|
+
ready: true,
|
|
56
|
+
framework,
|
|
57
|
+
config,
|
|
58
|
+
domain,
|
|
59
|
+
message: `准备使用 ${framework} 框架生成爬虫,请调用 task 工具委托 crawler 子代理`,
|
|
60
|
+
});
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
name: 'delegate_crawler_generation',
|
|
64
|
+
description: '准备参数,委托 crawler 子代理生成特定框架的爬虫代码',
|
|
65
|
+
schema: z.object({
|
|
66
|
+
framework: z.enum(['requests', 'httpx', 'scrapy']).describe('用户选择的爬虫框架'),
|
|
67
|
+
config: z.object({
|
|
68
|
+
url: z.string(),
|
|
69
|
+
stages: z.array(z.object({
|
|
70
|
+
name: z.string(),
|
|
71
|
+
fields: z.array(z.object({
|
|
72
|
+
name: z.string(),
|
|
73
|
+
xpath: z.string(),
|
|
74
|
+
type: z.string(),
|
|
75
|
+
})),
|
|
76
|
+
entry: z.string().optional().describe('入口 URL 或选择器'),
|
|
77
|
+
pagination: z.string().optional().describe('分页选择器或 URL 模式'),
|
|
78
|
+
})),
|
|
79
|
+
}).describe('爬虫配置'),
|
|
80
|
+
domain: z.string().describe('目标网站域名'),
|
|
81
|
+
}),
|
|
82
|
+
}
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
export const crawlerGeneratorTools = [
|
|
86
|
+
generateCrawlerWithConfirm,
|
|
87
|
+
delegateCrawlerGeneration,
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
export default crawlerGeneratorTools;
|
package/src/agent/tools/debug.js
CHANGED
|
@@ -6,6 +6,7 @@ import { z } from 'zod';
|
|
|
6
6
|
import { tool } from '@langchain/core/tools';
|
|
7
7
|
import { getBrowser } from '../../browser/index.js';
|
|
8
8
|
import { CDPSession } from '../../browser/cdp.js';
|
|
9
|
+
import { logStore } from '../logger.js';
|
|
9
10
|
|
|
10
11
|
let cdpSession = null;
|
|
11
12
|
let isPaused = false;
|
|
@@ -19,18 +20,26 @@ async function getSession() {
|
|
|
19
20
|
const browser = await getBrowser();
|
|
20
21
|
cdpSession = await CDPSession.fromBrowser(browser);
|
|
21
22
|
|
|
22
|
-
//
|
|
23
|
+
// 过滤反调试 debugger 语句的噪音:只在命中我们设的断点时打日志
|
|
24
|
+
let lastPauseIsBreakpoint = false;
|
|
25
|
+
|
|
23
26
|
cdpSession.on('Debugger.paused', (params) => {
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
lastPauseIsBreakpoint = params.reason === 'breakpoint' || params.hitBreakpoints?.length > 0;
|
|
28
|
+
if (lastPauseIsBreakpoint) {
|
|
29
|
+
isPaused = true;
|
|
30
|
+
currentCallFrames = params.callFrames || [];
|
|
31
|
+
const top = currentCallFrames[0];
|
|
32
|
+
const func = top?.functionName || '(anonymous)';
|
|
33
|
+
const url = top?.url?.split('/').pop() || top?.url || '?';
|
|
34
|
+
const line = top?.location?.lineNumber ?? '?';
|
|
35
|
+
console.log(`[debug] Breakpoint hit: ${func} @ ${url}:${line}`);
|
|
36
|
+
}
|
|
27
37
|
});
|
|
28
38
|
|
|
29
|
-
// 监听恢复事件
|
|
30
39
|
cdpSession.on('Debugger.resumed', () => {
|
|
31
40
|
isPaused = false;
|
|
32
41
|
currentCallFrames = [];
|
|
33
|
-
|
|
42
|
+
lastPauseIsBreakpoint = false;
|
|
34
43
|
});
|
|
35
44
|
}
|
|
36
45
|
return cdpSession;
|
|
@@ -235,6 +244,33 @@ export const stepOver = tool(
|
|
|
235
244
|
}
|
|
236
245
|
);
|
|
237
246
|
|
|
247
|
+
/**
|
|
248
|
+
* 查询 Agent 执行日志
|
|
249
|
+
*/
|
|
250
|
+
export const getAgentLogs = tool(
|
|
251
|
+
async ({ category, level, limit, toolName }) => {
|
|
252
|
+
if (category === 'stats') {
|
|
253
|
+
return JSON.stringify(logStore.getStats(), null, 2);
|
|
254
|
+
}
|
|
255
|
+
const logs = logStore.query({ category, level, limit, toolName });
|
|
256
|
+
return JSON.stringify(logs, null, 2);
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
name: 'get_agent_logs',
|
|
260
|
+
description: '获取当前 Agent 会话的执行日志,包括 LLM 调用、工具调用、错误等。用于调试和分析 Agent 执行过程。category=stats 可获取统计概览。',
|
|
261
|
+
schema: z.object({
|
|
262
|
+
category: z.enum(['LLM', 'TOOL', 'CHAIN', 'AGENT', 'stats']).optional()
|
|
263
|
+
.describe('日志类别:LLM/TOOL/CHAIN/AGENT,或 stats 获取统计'),
|
|
264
|
+
level: z.enum(['INFO', 'DEBUG', 'ERROR']).optional()
|
|
265
|
+
.describe('日志级别'),
|
|
266
|
+
limit: z.number().optional().default(50)
|
|
267
|
+
.describe('返回条数(默认50,最近的N条)'),
|
|
268
|
+
toolName: z.string().optional()
|
|
269
|
+
.describe('按工具名过滤(仅 TOOL 类别有效)'),
|
|
270
|
+
}),
|
|
271
|
+
}
|
|
272
|
+
);
|
|
273
|
+
|
|
238
274
|
export const debugTools = [
|
|
239
275
|
setBreakpoint,
|
|
240
276
|
setXHRBreakpoint,
|
|
@@ -243,4 +279,5 @@ export const debugTools = [
|
|
|
243
279
|
evaluateAtBreakpoint,
|
|
244
280
|
resumeExecution,
|
|
245
281
|
stepOver,
|
|
282
|
+
getAgentLogs,
|
|
246
283
|
];
|
|
@@ -23,6 +23,9 @@ function getSkillPath(skillName) {
|
|
|
23
23
|
'sandbox': SKILLS.sandbox,
|
|
24
24
|
'env': SKILLS.env,
|
|
25
25
|
'js2python': SKILLS.js2python,
|
|
26
|
+
'crawler': SKILLS.crawler,
|
|
27
|
+
'captcha': SKILLS.captcha,
|
|
28
|
+
'anti-detect': SKILLS.antiDetect,
|
|
26
29
|
'report': SKILLS.report,
|
|
27
30
|
'general': SKILLS.general,
|
|
28
31
|
};
|
|
@@ -83,7 +86,7 @@ export const evolveSkill = tool(
|
|
|
83
86
|
if (!skillInfo) {
|
|
84
87
|
return JSON.stringify({
|
|
85
88
|
success: false,
|
|
86
|
-
error: `未知的 skill: ${skill}。可用: static-analysis, dynamic-analysis, sandbox, env, js2python, report, general。或使用 new:<name> 创建新 skill。`
|
|
89
|
+
error: `未知的 skill: ${skill}。可用: static-analysis, dynamic-analysis, sandbox, env, js2python, crawler, captcha, anti-detect, report, general。或使用 new:<name> 创建新 skill。`
|
|
87
90
|
});
|
|
88
91
|
}
|
|
89
92
|
|
|
@@ -152,7 +155,7 @@ export const evolveSkill = tool(
|
|
|
152
155
|
name: 'evolve_skill',
|
|
153
156
|
description: '记录分析过程中学到的经验。支持现有 skill 或 new:<name> 创建新 skill',
|
|
154
157
|
schema: z.object({
|
|
155
|
-
skill: z.string().describe('目标 skill: static-analysis, dynamic-analysis, sandbox, env, js2python, report, general,或 new:<name> 创建新 skill'),
|
|
158
|
+
skill: z.string().describe('目标 skill: static-analysis, dynamic-analysis, sandbox, env, js2python, crawler, captcha, anti-detect, report, general,或 new:<name> 创建新 skill'),
|
|
156
159
|
title: z.string().describe('经验标题,简短描述'),
|
|
157
160
|
scenario: z.string().describe('具体场景/案例'),
|
|
158
161
|
insight: z.string().describe('一句话总结经验'),
|
|
@@ -34,16 +34,20 @@ export const listFunctions = tool(
|
|
|
34
34
|
*/
|
|
35
35
|
export const getFunctionCode = tool(
|
|
36
36
|
async ({ code, funcName }) => {
|
|
37
|
+
// buildDependencyGraph 先调用,extractSlice 内部会复用 this.ast 缓存
|
|
38
|
+
const graph = astAnalyzer.buildDependencyGraph(code);
|
|
39
|
+
const deps = graph.get(funcName) || [];
|
|
37
40
|
const slice = astAnalyzer.extractSlice(code, funcName);
|
|
38
41
|
return JSON.stringify({
|
|
39
42
|
funcName,
|
|
40
43
|
found: !!slice,
|
|
41
44
|
code: slice || '未找到该函数',
|
|
45
|
+
dependencies: deps,
|
|
42
46
|
}, null, 2);
|
|
43
47
|
},
|
|
44
48
|
{
|
|
45
49
|
name: 'get_function_code',
|
|
46
|
-
description: '
|
|
50
|
+
description: '提取指定函数的完整代码(含递归依赖函数和全局变量)。返回可独立运行的代码片段 + 依赖函数列表',
|
|
47
51
|
schema: z.object({
|
|
48
52
|
code: z.string().describe('源代码'),
|
|
49
53
|
funcName: z.string().describe('函数名'),
|
package/src/agent/tools/file.js
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import { z } from 'zod';
|
|
7
7
|
import { tool } from '@langchain/core/tools';
|
|
8
8
|
import { writeFileSync, readFileSync, existsSync, readdirSync } from 'fs';
|
|
9
|
-
import { dirname, join, isAbsolute, relative } from 'path';
|
|
9
|
+
import { dirname, join, isAbsolute, relative, resolve } from 'path';
|
|
10
10
|
import { PATHS, ensureDir, DEEPSPIDER_HOME } from '../../config/paths.js';
|
|
11
11
|
|
|
12
12
|
const OUTPUT_DIR = PATHS.OUTPUT_DIR;
|
|
@@ -17,15 +17,24 @@ function ensureFileDir(filePath) {
|
|
|
17
17
|
}
|
|
18
18
|
|
|
19
19
|
function getSafePath(filePath) {
|
|
20
|
+
let resolved;
|
|
20
21
|
if (isAbsolute(filePath)) {
|
|
21
22
|
// 如果是 ~/.deepspider/ 目录下的路径,直接使用
|
|
22
23
|
if (filePath.startsWith(DEEPSPIDER_HOME)) {
|
|
23
|
-
|
|
24
|
+
resolved = filePath;
|
|
25
|
+
} else {
|
|
26
|
+
// 其他绝对路径:放到 OUTPUT_DIR 下
|
|
27
|
+
resolved = join(OUTPUT_DIR, filePath.replace(/^\/+/, ''));
|
|
24
28
|
}
|
|
25
|
-
|
|
26
|
-
|
|
29
|
+
} else {
|
|
30
|
+
resolved = join(OUTPUT_DIR, filePath);
|
|
27
31
|
}
|
|
28
|
-
|
|
32
|
+
// 防止 ../ 穿越到 DEEPSPIDER_HOME 之外
|
|
33
|
+
const normalized = resolve(resolved);
|
|
34
|
+
if (!normalized.startsWith(DEEPSPIDER_HOME)) {
|
|
35
|
+
throw new Error(`路径不允许超出 ${DEEPSPIDER_HOME}: ${filePath}`);
|
|
36
|
+
}
|
|
37
|
+
return normalized;
|
|
29
38
|
}
|
|
30
39
|
|
|
31
40
|
export const artifactSave = tool(
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - 统一 Hook 代码生成工具
|
|
3
|
+
* 合并 hookTools + cryptoHookTools + asyncTools + antiDebugTools
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import { tool } from '@langchain/core/tools';
|
|
8
|
+
import { NetworkHook } from '../../env/NetworkHook.js';
|
|
9
|
+
import { CookieHook } from '../../env/CookieHook.js';
|
|
10
|
+
import { CryptoHook } from '../../env/CryptoHook.js';
|
|
11
|
+
import { AsyncHook } from '../../env/AsyncHook.js';
|
|
12
|
+
import { AntiAntiDebug } from '../../env/AntiAntiDebug.js';
|
|
13
|
+
|
|
14
|
+
const networkHook = new NetworkHook();
|
|
15
|
+
const cookieHook = new CookieHook();
|
|
16
|
+
const cryptoHook = new CryptoHook();
|
|
17
|
+
const asyncHook = new AsyncHook();
|
|
18
|
+
const antiDebug = new AntiAntiDebug();
|
|
19
|
+
|
|
20
|
+
const HOOK_TYPES = {
|
|
21
|
+
// 网络
|
|
22
|
+
xhr: { gen: () => networkHook.generateXHRHookCode({ captureBody: true, captureResponse: true }), usage: "getLogs('xhr')" },
|
|
23
|
+
fetch: { gen: () => networkHook.generateFetchHookCode({ captureBody: true, captureResponse: true }), usage: "getLogs('fetch')" },
|
|
24
|
+
cookie: { gen: () => cookieHook.generateCookieHookCode({ trackRead: true, trackWrite: true }), usage: "getLogs('cookie')" },
|
|
25
|
+
// 加密
|
|
26
|
+
cryptojs: { gen: () => cryptoHook.generateCryptoJSHookCode(), usage: "getLogs('crypto')" },
|
|
27
|
+
sm_crypto: { gen: () => cryptoHook.generateSMCryptoHookCode(), usage: "getLogs('crypto')" },
|
|
28
|
+
rsa: { gen: () => cryptoHook.generateRSAHookCode(), usage: "getLogs('crypto')" },
|
|
29
|
+
generic_crypto: { gen: () => cryptoHook.generateGenericCryptoHookCode(), usage: "getLogs('crypto')" },
|
|
30
|
+
// 异步
|
|
31
|
+
promise: { gen: () => asyncHook.generatePromiseHookCode(), usage: "getLogs('async')" },
|
|
32
|
+
timer: { gen: () => asyncHook.generateTimerHookCode(), usage: "getLogs('timer')" },
|
|
33
|
+
// 反反调试
|
|
34
|
+
anti_debugger: { gen: () => antiDebug.generateAntiDebuggerCode(), usage: '绕过无限 debugger' },
|
|
35
|
+
anti_console: { gen: () => antiDebug.generateAntiConsoleDetectCode(), usage: '绕过控制台检测' },
|
|
36
|
+
anti_cdp: { gen: () => antiDebug.generateAntiCDPDetectCode(), usage: '绕过 CDP 检测' },
|
|
37
|
+
anti_debug_full: { gen: () => antiDebug.generateFullAntiDebugCode(), usage: '完整反反调试(包含以上所有)' },
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
const typeEnum = /** @type {[string, ...string[]]} */ (Object.keys(HOOK_TYPES));
|
|
41
|
+
|
|
42
|
+
export const generateHook = tool(
|
|
43
|
+
async ({ type }) => {
|
|
44
|
+
const entry = HOOK_TYPES[type];
|
|
45
|
+
if (!entry) {
|
|
46
|
+
return JSON.stringify({ success: false, error: `未知类型: ${type},可选: ${typeEnum.join(', ')}` });
|
|
47
|
+
}
|
|
48
|
+
const code = entry.gen();
|
|
49
|
+
return JSON.stringify({ success: true, type, code, usage: entry.usage }, null, 2);
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
name: 'generate_hook',
|
|
53
|
+
description: `生成 Hook 代码。生成后需通过 inject_hook 注入浏览器。
|
|
54
|
+
|
|
55
|
+
类型:
|
|
56
|
+
- 网络: xhr, fetch, cookie
|
|
57
|
+
- 加密: cryptojs(CryptoJS), sm_crypto(国密), rsa(JSEncrypt/node-forge), generic_crypto(通用)
|
|
58
|
+
- 异步: promise, timer
|
|
59
|
+
- 反反调试: anti_debugger, anti_console, anti_cdp, anti_debug_full(完整)`,
|
|
60
|
+
schema: z.object({
|
|
61
|
+
type: z.enum(typeEnum).describe('Hook 类型'),
|
|
62
|
+
}),
|
|
63
|
+
}
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
export const generateHookTools = [generateHook];
|
|
@@ -8,16 +8,28 @@ import { tool } from '@langchain/core/tools';
|
|
|
8
8
|
import { getBrowser } from '../../browser/index.js';
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
|
-
* 通过 CDP 执行 JS
|
|
11
|
+
* 通过 CDP 执行 JS(带超时保护)
|
|
12
12
|
*/
|
|
13
|
-
async function evaluateViaCDP(browser, expression) {
|
|
13
|
+
async function evaluateViaCDP(browser, expression, timeout = 5000) {
|
|
14
14
|
const cdp = await browser.getCDPSession();
|
|
15
15
|
if (!cdp) return null;
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
const evaluatePromise = cdp.send('Runtime.evaluate', {
|
|
17
18
|
expression,
|
|
18
19
|
returnByValue: true,
|
|
19
20
|
});
|
|
20
|
-
|
|
21
|
+
|
|
22
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
23
|
+
setTimeout(() => reject(new Error('CDP evaluate timeout')), timeout)
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
try {
|
|
27
|
+
const result = await Promise.race([evaluatePromise, timeoutPromise]);
|
|
28
|
+
return result.result?.value;
|
|
29
|
+
} catch (e) {
|
|
30
|
+
console.error('[evaluateViaCDP] 超时或错误:', e.message);
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
21
33
|
}
|
|
22
34
|
|
|
23
35
|
/**
|
|
@@ -115,14 +127,12 @@ export const injectHook = tool(
|
|
|
115
127
|
if (!browser.getPage()) {
|
|
116
128
|
return JSON.stringify({ success: false, error: '浏览器未就绪' });
|
|
117
129
|
}
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
.replace(/'/g, "\\'")
|
|
121
|
-
.replace(/\n/g, '\\n');
|
|
130
|
+
// 用 JSON.stringify 安全转义,避免手动转义遗漏特殊字符
|
|
131
|
+
const safeCode = JSON.stringify(code);
|
|
122
132
|
|
|
123
133
|
const result = await evaluateViaCDP(
|
|
124
134
|
browser,
|
|
125
|
-
`JSON.stringify(window.__deepspider__?.injectHook?.(
|
|
135
|
+
`JSON.stringify(window.__deepspider__?.injectHook?.(${safeCode}))`
|
|
126
136
|
);
|
|
127
137
|
return result || JSON.stringify({ success: false, error: '注入失败' });
|
|
128
138
|
} catch (e) {
|
package/src/agent/tools/index.js
CHANGED
|
@@ -11,9 +11,9 @@ export { patchTools, generatePatch, matchModule } from './patch.js';
|
|
|
11
11
|
export { envTools, listEnvModules, loadEnvModule, loadAllEnvModules } from './env.js';
|
|
12
12
|
export { profileTools, listProfiles, loadProfile, generateProfileCode } from './profile.js';
|
|
13
13
|
export { runtimeTools, launchBrowser, navigateTo, browserClose, addInitScript, clearCookies } from './runtime.js';
|
|
14
|
-
export { debugTools, setBreakpoint, setXHRBreakpoint, getCallStack, getFrameVariables, evaluateAtBreakpoint, resumeExecution, stepOver } from './debug.js';
|
|
14
|
+
export { debugTools, setBreakpoint, setXHRBreakpoint, getCallStack, getFrameVariables, evaluateAtBreakpoint, resumeExecution, stepOver, getAgentLogs } from './debug.js';
|
|
15
15
|
export { captureTools, collectEnv, collectProperty, autoFixEnv, getHookLogs } from './capture.js';
|
|
16
|
-
export { browserTools, clickElement, fillInput, waitForSelector } from './browser.js';
|
|
16
|
+
export { browserTools, clickElement, fillInput, waitForSelector, takeScreenshot, reloadPage, goBack, goForward, scrollPage, pressKey, hoverElement, getPageInfo, getPageSource, getElementHtml, getCookies, getInteractiveElements } from './browser.js';
|
|
17
17
|
export { reportTools, saveAnalysisReport } from './report.js';
|
|
18
18
|
export { webcrackTools, unpackBundle, analyzeBundle } from './webcrack.js';
|
|
19
19
|
export { preprocessTools, preprocessCode } from './preprocess.js';
|
|
@@ -24,17 +24,22 @@ export { asyncTools, generatePromiseHook, generateTimerHook } from './async.js';
|
|
|
24
24
|
export { antiDebugTools, generateAntiDebugger, generateAntiConsoleDetect, generateAntiCDP, generateFullAntiDebug } from './antidebug.js';
|
|
25
25
|
export { verifyTools, verifyMD5, verifySHA256, verifyHMAC, verifyAES, identifyEncryption } from './verify.js';
|
|
26
26
|
export { cryptoHookTools, generateCryptoJSHook, generateRSAHook } from './cryptohook.js';
|
|
27
|
-
|
|
27
|
+
// 合并工具(reverse-agent 使用)
|
|
28
|
+
export { generateHookTools, generateHook } from './generateHook.js';
|
|
29
|
+
export { verifyAlgorithmTools, verifyAlgorithm } from './verifyAlgorithm.js';
|
|
30
|
+
export { correlateTools, analyzeCorrelation, locateCryptoSource, analyzeHeaderEncryption, analyzeCookieEncryption, analyzeResponseDecryption, analyzeRequestParams } from './correlate.js';
|
|
28
31
|
export { extractorTools, listFunctions, getFunctionCode } from './extractor.js';
|
|
29
|
-
export { tracingTools, getSiteList, searchInResponses, getRequestDetail, getRequestList, getScriptList, getScriptSource, searchInScripts, clearSiteData, clearAllData } from './tracing.js';
|
|
32
|
+
export { tracingTools, getSiteList, searchInResponses, getRequestDetail, getRequestList, getRequestInitiator, getScriptList, getScriptSource, searchInScripts, clearSiteData, clearAllData } from './tracing.js';
|
|
30
33
|
export { analysisTools, getPendingAnalysis, getPendingChat, sendPanelMessage, startSelector } from './analysis.js';
|
|
31
34
|
export { fileTools, artifactSave, artifactLoad, artifactEdit, artifactGlob, artifactGrep } from './file.js';
|
|
32
35
|
export { evolveTools, evolveSkill } from './evolve.js';
|
|
33
36
|
export { captchaTools } from './captcha.js';
|
|
34
37
|
export { antiDetectTools } from './anti-detect.js';
|
|
35
38
|
export { crawlerTools } from './crawler.js';
|
|
39
|
+
export { crawlerGeneratorTools, generateCrawlerWithConfirm, delegateCrawlerGeneration } from './crawlerGenerator.js';
|
|
36
40
|
export { nodejsTools, runNodeCode } from './nodejs.js';
|
|
37
41
|
export { hookManagerTools, listHooks, enableHook, disableHook, injectHook, setHookConfig } from './hookManager.js';
|
|
42
|
+
export { scratchpadTools, saveMemo, loadMemo, listMemo } from './scratchpad.js';
|
|
38
43
|
// pythonTools 只在 js2python 子代理中使用,不导出到主工具集
|
|
39
44
|
|
|
40
45
|
// 所有工具
|
|
@@ -49,7 +54,7 @@ import { profileTools } from './profile.js';
|
|
|
49
54
|
import { runtimeTools } from './runtime.js';
|
|
50
55
|
import { debugTools } from './debug.js';
|
|
51
56
|
import { captureTools } from './capture.js';
|
|
52
|
-
import { browserTools } from './browser.js';
|
|
57
|
+
import { browserTools, clickElement, scrollPage, fillInput, getInteractiveElements, getPageInfo, hoverElement, pressKey } from './browser.js';
|
|
53
58
|
import { reportTools } from './report.js';
|
|
54
59
|
import { webcrackTools } from './webcrack.js';
|
|
55
60
|
import { preprocessTools } from './preprocess.js';
|
|
@@ -62,15 +67,17 @@ import { verifyTools } from './verify.js';
|
|
|
62
67
|
import { cryptoHookTools } from './cryptohook.js';
|
|
63
68
|
import { correlateTools } from './correlate.js';
|
|
64
69
|
import { extractorTools } from './extractor.js';
|
|
65
|
-
import { tracingTools } from './tracing.js';
|
|
70
|
+
import { tracingTools, getSiteList, getRequestList, searchInResponses, getRequestDetail, getRequestInitiator } from './tracing.js';
|
|
66
71
|
import { analysisTools } from './analysis.js';
|
|
67
72
|
import { fileTools } from './file.js';
|
|
68
73
|
import { evolveTools } from './evolve.js';
|
|
69
74
|
import { captchaTools } from './captcha.js';
|
|
70
75
|
import { antiDetectTools } from './anti-detect.js';
|
|
71
76
|
import { crawlerTools } from './crawler.js';
|
|
77
|
+
import { crawlerGeneratorTools } from './crawlerGenerator.js';
|
|
72
78
|
import { nodejsTools } from './nodejs.js';
|
|
73
79
|
import { hookManagerTools } from './hookManager.js';
|
|
80
|
+
import { scratchpadTools } from './scratchpad.js';
|
|
74
81
|
|
|
75
82
|
export const allTools = [
|
|
76
83
|
...sandboxTools,
|
|
@@ -104,34 +111,42 @@ export const allTools = [
|
|
|
104
111
|
...captchaTools,
|
|
105
112
|
...antiDetectTools,
|
|
106
113
|
...crawlerTools,
|
|
114
|
+
...crawlerGeneratorTools,
|
|
107
115
|
...nodejsTools,
|
|
108
116
|
...hookManagerTools,
|
|
117
|
+
...scratchpadTools,
|
|
109
118
|
];
|
|
110
119
|
|
|
111
120
|
/**
|
|
112
121
|
* 主 Agent 核心工具
|
|
113
|
-
*
|
|
114
|
-
*
|
|
122
|
+
* 职责:浏览器生命周期、简单页面交互、数据查询、委托调度
|
|
123
|
+
*
|
|
124
|
+
* 以下工具有意不包含在主 agent 中,由专属子代理持有:
|
|
125
|
+
* - hookManagerTools (inject_hook 等) → reverse-agent
|
|
126
|
+
* - captureTools (collect_env, get_hook_logs 等) → reverse-agent
|
|
127
|
+
* - sandboxTools → reverse-agent
|
|
128
|
+
* - debugTools → reverse-agent
|
|
129
|
+
* - 静态分析工具 → reverse-agent
|
|
115
130
|
*/
|
|
116
131
|
export const coreTools = [
|
|
117
|
-
//
|
|
132
|
+
// 浏览器运行时(生命周期管理)
|
|
118
133
|
...runtimeTools,
|
|
119
|
-
//
|
|
120
|
-
...browserTools,
|
|
121
|
-
// 浏览器分析交互
|
|
134
|
+
// 浏览器分析面板交互
|
|
122
135
|
...analysisTools,
|
|
123
|
-
//
|
|
124
|
-
|
|
125
|
-
//
|
|
126
|
-
...
|
|
127
|
-
// Hook 日志
|
|
128
|
-
...captureTools,
|
|
136
|
+
// 数据查询(仅调度所需的最小集:列表、搜索、详情、initiator)
|
|
137
|
+
getSiteList, getRequestList, searchInResponses, getRequestDetail, getRequestInitiator,
|
|
138
|
+
// 报告生成
|
|
139
|
+
...reportTools,
|
|
129
140
|
// 文件操作
|
|
130
141
|
...fileTools,
|
|
131
142
|
// 经验进化
|
|
132
143
|
...evolveTools,
|
|
133
|
-
// Node.js
|
|
144
|
+
// Node.js 执行(委托前快速验证假设)- 已添加网络请求防护
|
|
134
145
|
...nodejsTools,
|
|
135
|
-
//
|
|
136
|
-
...
|
|
146
|
+
// 工作记忆
|
|
147
|
+
...scratchpadTools,
|
|
148
|
+
// 爬虫代码生成(带 HITL 确认)
|
|
149
|
+
...crawlerGeneratorTools,
|
|
150
|
+
// 页面交互(自主数据搜寻:滚动加载、点击触发请求)
|
|
151
|
+
clickElement, scrollPage, fillInput, getInteractiveElements, getPageInfo, hoverElement, pressKey,
|
|
137
152
|
];
|
|
@@ -17,10 +17,19 @@ const PROJECT_ROOT = join(__dirname, '../../..');
|
|
|
17
17
|
// 输出大小限制
|
|
18
18
|
const MAX_OUTPUT_SIZE = 100000;
|
|
19
19
|
|
|
20
|
+
// 超时上限(防止 LLM 传入过大值)
|
|
21
|
+
const MAX_TIMEOUT = 30000;
|
|
22
|
+
|
|
23
|
+
// 连续超时计数器
|
|
24
|
+
let consecutiveTimeouts = 0;
|
|
25
|
+
const MAX_CONSECUTIVE_TIMEOUTS = 3;
|
|
26
|
+
|
|
20
27
|
/**
|
|
21
28
|
* 执行 Node.js 代码
|
|
22
29
|
*/
|
|
23
30
|
async function executeNode(code, timeout = 10000) {
|
|
31
|
+
const effectiveTimeout = Math.min(timeout, MAX_TIMEOUT);
|
|
32
|
+
|
|
24
33
|
return new Promise((resolve) => {
|
|
25
34
|
const proc = spawn('node', ['-e', code], {
|
|
26
35
|
env: { ...process.env },
|
|
@@ -30,12 +39,17 @@ async function executeNode(code, timeout = 10000) {
|
|
|
30
39
|
let stdout = '';
|
|
31
40
|
let stderr = '';
|
|
32
41
|
let killed = false;
|
|
42
|
+
let killTimer = null;
|
|
33
43
|
|
|
34
|
-
//
|
|
44
|
+
// SIGTERM 超时
|
|
35
45
|
const timer = setTimeout(() => {
|
|
36
46
|
killed = true;
|
|
37
47
|
proc.kill('SIGTERM');
|
|
38
|
-
|
|
48
|
+
// SIGKILL 兜底:环境检测死循环可能忽略 SIGTERM
|
|
49
|
+
killTimer = setTimeout(() => {
|
|
50
|
+
try { proc.kill('SIGKILL'); } catch { /* already dead */ }
|
|
51
|
+
}, 2000);
|
|
52
|
+
}, effectiveTimeout);
|
|
39
53
|
|
|
40
54
|
proc.stdout.on('data', (data) => {
|
|
41
55
|
if (stdout.length < MAX_OUTPUT_SIZE) {
|
|
@@ -51,21 +65,25 @@ async function executeNode(code, timeout = 10000) {
|
|
|
51
65
|
|
|
52
66
|
proc.on('close', (exitCode) => {
|
|
53
67
|
clearTimeout(timer);
|
|
68
|
+
if (killTimer) clearTimeout(killTimer);
|
|
54
69
|
resolve({
|
|
55
70
|
success: !killed && exitCode === 0,
|
|
56
71
|
stdout: stdout.trim(),
|
|
57
|
-
stderr: killed ?
|
|
72
|
+
stderr: killed ? `Timeout after ${effectiveTimeout}ms: process killed` : stderr.trim(),
|
|
58
73
|
exitCode: killed ? -1 : exitCode,
|
|
74
|
+
timedOut: killed,
|
|
59
75
|
});
|
|
60
76
|
});
|
|
61
77
|
|
|
62
78
|
proc.on('error', (err) => {
|
|
63
79
|
clearTimeout(timer);
|
|
80
|
+
if (killTimer) clearTimeout(killTimer);
|
|
64
81
|
resolve({
|
|
65
82
|
success: false,
|
|
66
83
|
stdout: '',
|
|
67
84
|
stderr: err.message,
|
|
68
85
|
exitCode: -1,
|
|
86
|
+
timedOut: false,
|
|
69
87
|
});
|
|
70
88
|
});
|
|
71
89
|
});
|
|
@@ -76,8 +94,25 @@ async function executeNode(code, timeout = 10000) {
|
|
|
76
94
|
*/
|
|
77
95
|
export const runNodeCode = tool(
|
|
78
96
|
async ({ code, timeout }) => {
|
|
79
|
-
|
|
80
|
-
|
|
97
|
+
// 连续超时保护:降级为短超时探测,而非完全拒绝
|
|
98
|
+
const isBlocked = consecutiveTimeouts >= MAX_CONSECUTIVE_TIMEOUTS;
|
|
99
|
+
const effectiveTimeout = isBlocked ? 5000 : (timeout || 10000);
|
|
100
|
+
const result = await executeNode(code, effectiveTimeout);
|
|
101
|
+
|
|
102
|
+
// 更新连续超时计数
|
|
103
|
+
if (result.timedOut) {
|
|
104
|
+
consecutiveTimeouts++;
|
|
105
|
+
if (consecutiveTimeouts >= MAX_CONSECUTIVE_TIMEOUTS) {
|
|
106
|
+
result.stderr += `\n\n⚠️ 已连续超时 ${consecutiveTimeouts} 次,后续调用将降级为 5s 短超时。代码很可能存在死循环或触发了环境检测。请停止重试相同逻辑,改用 sandbox_execute(沙箱执行)、断点调试或静态分析。`;
|
|
107
|
+
} else {
|
|
108
|
+
result.stderr += `\n\n⚠️ 连续超时 ${consecutiveTimeouts}/${MAX_CONSECUTIVE_TIMEOUTS} 次。如果代码包含从网站提取的混淆代码,可能存在环境检测导致死循环。建议:1) 检查代码是否有 while(true)/setInterval 等循环 2) 改用 sandbox_execute 在受控环境中执行`;
|
|
109
|
+
}
|
|
110
|
+
} else {
|
|
111
|
+
consecutiveTimeouts = 0; // 成功执行或非超时失败,重置计数
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const { timedOut: _, ...output } = result;
|
|
115
|
+
return JSON.stringify(output);
|
|
81
116
|
},
|
|
82
117
|
{
|
|
83
118
|
name: 'run_node_code',
|
|
@@ -93,7 +128,7 @@ export const runNodeCode = tool(
|
|
|
93
128
|
示例:const CryptoJS = require('crypto-js'); console.log(CryptoJS.MD5('test').toString());`,
|
|
94
129
|
schema: z.object({
|
|
95
130
|
code: z.string().describe('要执行的 JS 代码,可使用 require 引入加密库'),
|
|
96
|
-
timeout: z.number().optional().default(10000).describe('
|
|
131
|
+
timeout: z.number().optional().default(10000).describe('超时时间(毫秒),上限 30000'),
|
|
97
132
|
}),
|
|
98
133
|
}
|
|
99
134
|
);
|
package/src/agent/tools/patch.js
CHANGED
|
@@ -21,7 +21,7 @@ export const generatePatch = tool(
|
|
|
21
21
|
description: '为缺失的环境属性生成补丁代码。',
|
|
22
22
|
schema: z.object({
|
|
23
23
|
property: z.string().describe('缺失的属性路径,如 navigator.userAgent'),
|
|
24
|
-
context: z.
|
|
24
|
+
context: z.object({}).passthrough().optional().describe('上下文信息'),
|
|
25
25
|
}),
|
|
26
26
|
}
|
|
27
27
|
);
|
|
@@ -76,4 +76,24 @@ export const sandboxReset = tool(
|
|
|
76
76
|
}
|
|
77
77
|
);
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
/**
|
|
80
|
+
* 自动补环境执行工具
|
|
81
|
+
*/
|
|
82
|
+
export const sandboxAutoFix = tool(
|
|
83
|
+
async ({ code, timeout, maxIterations }) => {
|
|
84
|
+
const sb = await getSandbox();
|
|
85
|
+
const result = await sb.executeWithAutoFix(code, { timeout, maxIterations });
|
|
86
|
+
return JSON.stringify(result, null, 2);
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
name: 'sandbox_auto_fix',
|
|
90
|
+
description: '自动补环境闭环执行:加载预置模块 → 执行代码 → 发现缺失环境 → 自动生成补丁 → 重试,直到成功或无法继续。适合快速验证混淆代码能否在沙箱中运行。',
|
|
91
|
+
schema: z.object({
|
|
92
|
+
code: z.string().describe('要执行的目标JS代码'),
|
|
93
|
+
timeout: z.number().optional().default(5000).describe('单次执行超时时间(ms)'),
|
|
94
|
+
maxIterations: z.number().optional().default(10).describe('最大迭代次数'),
|
|
95
|
+
}),
|
|
96
|
+
}
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
export const sandboxTools = [sandboxExecute, sandboxInject, sandboxReset, sandboxAutoFix];
|