deepspider 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/README.md +13 -13
- package/package.json +6 -6
- package/src/agent/core/PanelBridge.js +29 -77
- package/src/agent/core/StreamHandler.js +139 -14
- package/src/agent/index.js +51 -12
- package/src/agent/logger.js +184 -9
- package/src/agent/middleware/report.js +42 -16
- package/src/agent/middleware/subagent.js +233 -0
- package/src/agent/middleware/toolGuard.js +77 -0
- package/src/agent/middleware/validationWorkflow.js +171 -0
- package/src/agent/prompts/system.js +181 -59
- package/src/agent/run.js +41 -6
- package/src/agent/skills/crawler/SKILL.md +64 -3
- package/src/agent/skills/crawler/evolved.md +9 -1
- package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
- package/src/agent/skills/env/SKILL.md +75 -0
- package/src/agent/skills/evolve.js +0 -3
- package/src/agent/skills/sandbox/SKILL.md +35 -0
- package/src/agent/skills/static-analysis/SKILL.md +98 -2
- package/src/agent/subagents/anti-detect.js +10 -20
- package/src/agent/subagents/captcha.js +7 -19
- package/src/agent/subagents/crawler.js +25 -37
- package/src/agent/subagents/factory.js +109 -9
- package/src/agent/subagents/index.js +4 -13
- package/src/agent/subagents/js2python.js +7 -19
- package/src/agent/subagents/reverse.js +180 -0
- package/src/agent/tools/analysis.js +84 -1
- package/src/agent/tools/anti-detect.js +5 -2
- package/src/agent/tools/browser.js +160 -0
- package/src/agent/tools/captcha.js +1 -1
- package/src/agent/tools/capture.js +24 -3
- package/src/agent/tools/correlate.js +129 -15
- package/src/agent/tools/crawler.js +2 -1
- package/src/agent/tools/crawlerGenerator.js +90 -0
- package/src/agent/tools/debug.js +43 -6
- package/src/agent/tools/evolve.js +6 -3
- package/src/agent/tools/extractor.js +5 -1
- package/src/agent/tools/file.js +16 -7
- package/src/agent/tools/generateHook.js +66 -0
- package/src/agent/tools/hookManager.js +19 -9
- package/src/agent/tools/index.js +33 -20
- package/src/agent/tools/nodejs.js +41 -6
- package/src/agent/tools/python.js +4 -4
- package/src/agent/tools/report.js +2 -2
- package/src/agent/tools/runtime.js +1 -1
- package/src/agent/tools/sandbox.js +21 -1
- package/src/agent/tools/scratchpad.js +70 -0
- package/src/agent/tools/tracing.js +26 -0
- package/src/agent/tools/verifyAlgorithm.js +117 -0
- package/src/analyzer/EncryptionAnalyzer.js +2 -2
- package/src/browser/EnvBridge.js +27 -13
- package/src/browser/client.js +124 -18
- package/src/browser/collector.js +101 -22
- package/src/browser/defaultHooks.js +3 -1
- package/src/browser/hooks/index.js +5 -0
- package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
- package/src/browser/interceptors/NetworkInterceptor.js +77 -13
- package/src/browser/interceptors/ScriptInterceptor.js +34 -9
- package/src/browser/interceptors/index.js +1 -0
- package/src/browser/ui/analysisPanel.js +469 -464
- package/src/cli/commands/config.js +11 -3
- package/src/config/paths.js +9 -1
- package/src/config/settings.js +7 -1
- package/src/core/PatchGenerator.js +26 -6
- package/src/core/Sandbox.js +140 -3
- package/src/env/EnvCodeGenerator.js +60 -88
- package/src/env/modules/bom/history.js +6 -0
- package/src/env/modules/bom/location.js +6 -0
- package/src/env/modules/bom/navigator.js +13 -0
- package/src/env/modules/bom/screen.js +6 -0
- package/src/env/modules/bom/storage.js +7 -0
- package/src/env/modules/dom/document.js +14 -0
- package/src/env/modules/dom/event.js +4 -0
- package/src/env/modules/index.js +27 -10
- package/src/env/modules/webapi/fetch.js +4 -0
- package/src/env/modules/webapi/url.js +4 -0
- package/src/env/modules/webapi/xhr.js +8 -0
- package/src/store/DataStore.js +130 -47
- package/src/store/Store.js +2 -1
- package/src/agent/subagents/dynamic.js +0 -64
- package/src/agent/subagents/env-agent.js +0 -82
- package/src/agent/subagents/sandbox.js +0 -55
- package/src/agent/subagents/static.js +0 -66
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - 反无限 debugger 拦截器
|
|
3
|
+
* 通过 CDP Debugger.setBlackboxedRanges 跳过包含 debugger 语句的脚本
|
|
4
|
+
* 零运行时开销,不修改源码,不触发完整性校验
|
|
5
|
+
*
|
|
6
|
+
* 已知限制:/\bdebugger\b/ 会匹配字符串/注释中的 debugger,
|
|
7
|
+
* 对反爬场景可接受(误 blackbox 的脚本仍正常执行,只是不可调试)
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export class AntiDebugInterceptor {
|
|
11
|
+
constructor(cdpClient) {
|
|
12
|
+
this.client = cdpClient;
|
|
13
|
+
this.blackboxedScripts = new Set();
|
|
14
|
+
// 高频 debugger 检测
|
|
15
|
+
this.pausedCount = 0;
|
|
16
|
+
this.pausedWindowStart = 0;
|
|
17
|
+
this.PAUSED_WINDOW_MS = 1000; // 1秒窗口
|
|
18
|
+
this.PAUSED_THRESHOLD = 5; // 1秒内超过5次paused认为是debugger风暴
|
|
19
|
+
this.stormMode = false; // 风暴模式:跳过所有断点
|
|
20
|
+
this.stormTimer = null; // 风暴模式自动退出定时器
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
async start() {
|
|
24
|
+
// 兜底:对于 blackbox 来不及处理的同步 debugger(时序竞争),自动 resume
|
|
25
|
+
// reason 可能是 'other' 或 'debugCommand'(不同 Chrome 版本),
|
|
26
|
+
// 只要不是我们主动设的断点(hitBreakpoints 非空 / reason=breakpoint)就 resume
|
|
27
|
+
this.client.on('Debugger.paused', (params) => {
|
|
28
|
+
// 手动设置的断点(除非在风暴模式)
|
|
29
|
+
if (!this.stormMode && params.reason === 'breakpoint') return;
|
|
30
|
+
if (!this.stormMode && params.hitBreakpoints?.length > 0) return;
|
|
31
|
+
|
|
32
|
+
// 风暴模式下直接 resume,不参与计数
|
|
33
|
+
if (this.stormMode) {
|
|
34
|
+
this.client.send('Debugger.resume').catch(() => {});
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// 高频 debugger 检测
|
|
39
|
+
const now = Date.now();
|
|
40
|
+
if (now - this.pausedWindowStart > this.PAUSED_WINDOW_MS) {
|
|
41
|
+
// 新窗口
|
|
42
|
+
this.pausedWindowStart = now;
|
|
43
|
+
this.pausedCount = 1;
|
|
44
|
+
} else {
|
|
45
|
+
this.pausedCount++;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// 触发风暴模式
|
|
49
|
+
if (this.pausedCount > this.PAUSED_THRESHOLD) {
|
|
50
|
+
console.log('[AntiDebugInterceptor] 检测到 debugger 风暴,启用风暴模式');
|
|
51
|
+
this.stormMode = true;
|
|
52
|
+
// 清除之前的定时器
|
|
53
|
+
if (this.stormTimer) {
|
|
54
|
+
clearTimeout(this.stormTimer);
|
|
55
|
+
}
|
|
56
|
+
// 3秒后退出风暴模式
|
|
57
|
+
this.stormTimer = setTimeout(() => {
|
|
58
|
+
console.log('[AntiDebugInterceptor] 退出风暴模式');
|
|
59
|
+
this.stormMode = false;
|
|
60
|
+
this.pausedCount = 0;
|
|
61
|
+
this.stormTimer = null;
|
|
62
|
+
}, 3000);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// 自动 resume
|
|
66
|
+
this.client.send('Debugger.resume').catch(() => {});
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
console.log('[AntiDebugInterceptor] 已启动');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* 检查脚本源码,包含 debugger 则 blackbox 整个脚本
|
|
74
|
+
* 由 ScriptInterceptor.onSource 回调驱动,避免重复拉取源码
|
|
75
|
+
*/
|
|
76
|
+
checkScript(scriptId, scriptSource) {
|
|
77
|
+
if (/\bdebugger\b/.test(scriptSource)) {
|
|
78
|
+
this.client.send('Debugger.setBlackboxedRanges', {
|
|
79
|
+
scriptId,
|
|
80
|
+
positions: [{ lineNumber: 0, columnNumber: 0 }],
|
|
81
|
+
}).then(() => {
|
|
82
|
+
this.blackboxedScripts.add(scriptId);
|
|
83
|
+
}).catch(() => {});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* 取消指定脚本的 blackbox(供断点工具调用)
|
|
89
|
+
*/
|
|
90
|
+
async unblackbox(scriptId) {
|
|
91
|
+
if (this.blackboxedScripts.has(scriptId)) {
|
|
92
|
+
await this.client.send('Debugger.setBlackboxedRanges', {
|
|
93
|
+
scriptId,
|
|
94
|
+
positions: [],
|
|
95
|
+
});
|
|
96
|
+
this.blackboxedScripts.delete(scriptId);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* 手动启用/禁用风暴模式
|
|
102
|
+
* 用于绕过强反调试场景
|
|
103
|
+
*/
|
|
104
|
+
setStormMode(enabled) {
|
|
105
|
+
// 清除之前的定时器
|
|
106
|
+
if (this.stormTimer) {
|
|
107
|
+
clearTimeout(this.stormTimer);
|
|
108
|
+
this.stormTimer = null;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
this.stormMode = enabled;
|
|
112
|
+
if (enabled) {
|
|
113
|
+
console.log('[AntiDebugInterceptor] 手动启用风暴模式');
|
|
114
|
+
// 自动退出
|
|
115
|
+
this.stormTimer = setTimeout(() => {
|
|
116
|
+
this.stormMode = false;
|
|
117
|
+
this.stormTimer = null;
|
|
118
|
+
console.log('[AntiDebugInterceptor] 自动退出风暴模式');
|
|
119
|
+
}, 5000);
|
|
120
|
+
} else {
|
|
121
|
+
console.log('[AntiDebugInterceptor] 手动禁用风暴模式');
|
|
122
|
+
this.pausedCount = 0;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* 检查当前是否在风暴模式
|
|
128
|
+
*/
|
|
129
|
+
isStormMode() {
|
|
130
|
+
return this.stormMode;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
@@ -46,11 +46,16 @@ export class NetworkInterceptor {
|
|
|
46
46
|
this.onLoadingFinished(params);
|
|
47
47
|
});
|
|
48
48
|
|
|
49
|
+
// 监听加载失败(清理 pendingRequests,防止内存泄漏)
|
|
50
|
+
this.client.on('Network.loadingFailed', (params) => {
|
|
51
|
+
this.pendingRequests.delete(params.requestId);
|
|
52
|
+
});
|
|
53
|
+
|
|
49
54
|
console.log('[NetworkInterceptor] 已启动');
|
|
50
55
|
}
|
|
51
56
|
|
|
52
57
|
onRequest(params) {
|
|
53
|
-
const { requestId, request, timestamp } = params;
|
|
58
|
+
const { requestId, request, timestamp, initiator } = params;
|
|
54
59
|
|
|
55
60
|
// 只记录 XHR/Fetch 请求
|
|
56
61
|
const type = params.type;
|
|
@@ -62,10 +67,37 @@ export class NetworkInterceptor {
|
|
|
62
67
|
headers: request.headers,
|
|
63
68
|
postData: request.postData,
|
|
64
69
|
timestamp: timestamp * 1000,
|
|
65
|
-
pageUrl: this.getPageUrl()
|
|
70
|
+
pageUrl: this.getPageUrl(),
|
|
71
|
+
initiator: this.formatInitiator(initiator),
|
|
66
72
|
});
|
|
67
73
|
}
|
|
68
74
|
|
|
75
|
+
/**
|
|
76
|
+
* 精简 initiator 调用栈(只保留前 5 帧,过滤内部帧)
|
|
77
|
+
*/
|
|
78
|
+
formatInitiator(initiator) {
|
|
79
|
+
if (!initiator) return null;
|
|
80
|
+
const result = { type: initiator.type };
|
|
81
|
+
if (initiator.url) {
|
|
82
|
+
result.url = initiator.url;
|
|
83
|
+
result.lineNumber = initiator.lineNumber;
|
|
84
|
+
}
|
|
85
|
+
if (initiator.stack?.callFrames) {
|
|
86
|
+
result.callFrames = initiator.stack.callFrames
|
|
87
|
+
.filter(f => f.url && !f.url.includes('patchright') && !f.url.includes('__playwright'))
|
|
88
|
+
.slice(0, 5)
|
|
89
|
+
.map(f => ({
|
|
90
|
+
functionName: f.functionName || '(anonymous)',
|
|
91
|
+
url: f.url,
|
|
92
|
+
lineNumber: f.lineNumber,
|
|
93
|
+
columnNumber: f.columnNumber,
|
|
94
|
+
}));
|
|
95
|
+
}
|
|
96
|
+
// 只有 type 没有实际定位信息时返回 null
|
|
97
|
+
if (!result.url && !result.callFrames?.length) return null;
|
|
98
|
+
return result;
|
|
99
|
+
}
|
|
100
|
+
|
|
69
101
|
onResponse(params) {
|
|
70
102
|
const { requestId, response } = params;
|
|
71
103
|
const pending = this.pendingRequests.get(requestId);
|
|
@@ -81,15 +113,31 @@ export class NetworkInterceptor {
|
|
|
81
113
|
if (!pending) return;
|
|
82
114
|
|
|
83
115
|
try {
|
|
84
|
-
//
|
|
85
|
-
const
|
|
86
|
-
|
|
87
|
-
|
|
116
|
+
// 获取响应体,添加超时保护防止 CDP 命令挂起
|
|
117
|
+
const bodyPromise = this.client.send('Network.getResponseBody', { requestId });
|
|
118
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
119
|
+
setTimeout(() => reject(new Error('getResponseBody timeout')), 5000)
|
|
88
120
|
);
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
121
|
+
const { body, base64Encoded } = await Promise.race([bodyPromise, timeoutPromise]);
|
|
122
|
+
|
|
123
|
+
// 处理响应体:检测二进制内容,避免损坏
|
|
124
|
+
let responseBody;
|
|
125
|
+
const contentType = pending.responseHeaders?.['content-type'] || '';
|
|
126
|
+
|
|
127
|
+
if (this.isBinaryContent(contentType)) {
|
|
128
|
+
// 二进制内容:存储元数据而非原始内容
|
|
129
|
+
// base64 长度计算:每 4 个字符 = 3 字节,考虑 padding
|
|
130
|
+
const binarySize = base64Encoded
|
|
131
|
+
? Math.floor(body.length * 0.75) - (body.match(/=*$/)?.[0].length || 0)
|
|
132
|
+
: body.length;
|
|
133
|
+
responseBody = `[Binary: ${contentType}, ${binarySize} bytes]`;
|
|
134
|
+
} else {
|
|
135
|
+
// 文本内容:安全地转换为字符串
|
|
136
|
+
const rawBody = base64Encoded
|
|
137
|
+
? Buffer.from(body, 'base64').toString('utf-8')
|
|
138
|
+
: body;
|
|
139
|
+
responseBody = rawBody.slice(0, 50000);
|
|
140
|
+
}
|
|
93
141
|
|
|
94
142
|
// 异步存储到文件
|
|
95
143
|
this.store.saveResponse({
|
|
@@ -98,19 +146,35 @@ export class NetworkInterceptor {
|
|
|
98
146
|
status: pending.status,
|
|
99
147
|
requestHeaders: pending.headers,
|
|
100
148
|
requestBody: pending.postData,
|
|
101
|
-
responseBody
|
|
149
|
+
responseBody,
|
|
102
150
|
timestamp: pending.timestamp,
|
|
103
|
-
pageUrl: pending.pageUrl
|
|
151
|
+
pageUrl: pending.pageUrl,
|
|
152
|
+
initiator: pending.initiator,
|
|
104
153
|
}).catch(e => {
|
|
105
154
|
console.error('[NetworkInterceptor] 保存失败:', e.message);
|
|
106
155
|
});
|
|
107
156
|
|
|
108
|
-
} catch
|
|
157
|
+
} catch {
|
|
109
158
|
// 某些响应无法获取 body
|
|
110
159
|
}
|
|
111
160
|
|
|
112
161
|
this.pendingRequests.delete(requestId);
|
|
113
162
|
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* 检测是否为二进制内容类型
|
|
166
|
+
*/
|
|
167
|
+
isBinaryContent(contentType) {
|
|
168
|
+
if (!contentType) return false;
|
|
169
|
+
const binaryTypes = [
|
|
170
|
+
'image/', 'audio/', 'video/', 'application/pdf',
|
|
171
|
+
'application/octet-stream', 'application/zip',
|
|
172
|
+
'application/gzip', 'application/x-protobuf',
|
|
173
|
+
'font/', 'application/vnd.'
|
|
174
|
+
];
|
|
175
|
+
const lowerType = contentType.toLowerCase();
|
|
176
|
+
return binaryTypes.some(type => lowerType.includes(type));
|
|
177
|
+
}
|
|
114
178
|
}
|
|
115
179
|
|
|
116
180
|
export default NetworkInterceptor;
|
|
@@ -11,6 +11,7 @@ export class ScriptInterceptor {
|
|
|
11
11
|
this.page = page; // Playwright page 对象
|
|
12
12
|
this.store = getDataStore();
|
|
13
13
|
this.scriptIds = new Set();
|
|
14
|
+
this.onSource = null; // 回调: (scriptId, scriptSource) => void
|
|
14
15
|
}
|
|
15
16
|
|
|
16
17
|
/**
|
|
@@ -38,24 +39,48 @@ export class ScriptInterceptor {
|
|
|
38
39
|
}
|
|
39
40
|
|
|
40
41
|
async onScriptParsed(params) {
|
|
41
|
-
const { scriptId, url, length } = params;
|
|
42
|
+
const { scriptId, url, length: _length } = params;
|
|
42
43
|
|
|
43
|
-
//
|
|
44
|
-
if (
|
|
44
|
+
// 跳过扩展脚本
|
|
45
|
+
if (url?.startsWith('chrome-extension://')) return;
|
|
45
46
|
if (this.scriptIds.has(scriptId)) return;
|
|
46
47
|
|
|
47
48
|
this.scriptIds.add(scriptId);
|
|
48
49
|
|
|
49
|
-
|
|
50
|
-
|
|
50
|
+
if (url) {
|
|
51
|
+
// 有 URL 的脚本:获取源码、通知订阅者、存储
|
|
52
|
+
this.fetchAndSave(scriptId, url).catch(() => {});
|
|
53
|
+
} else if (this.onSource) {
|
|
54
|
+
// 无 URL 脚本(eval/new Function 生成):仅通知订阅者用于 debugger 检测,不存储
|
|
55
|
+
this.fetchAndNotify(scriptId).catch(() => {});
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async fetchAndNotify(scriptId) {
|
|
60
|
+
try {
|
|
61
|
+
// 添加超时保护防止 CDP 命令挂起
|
|
62
|
+
const sourcePromise = this.client.send('Debugger.getScriptSource', { scriptId });
|
|
63
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
64
|
+
setTimeout(() => reject(new Error('getScriptSource timeout')), 5000)
|
|
65
|
+
);
|
|
66
|
+
const { scriptSource } = await Promise.race([sourcePromise, timeoutPromise]);
|
|
67
|
+
try { this.onSource(scriptId, scriptSource); } catch { /* 订阅者异常不影响主流程 */ }
|
|
68
|
+
} catch {
|
|
69
|
+
// 获取失败(脚本已卸载等),忽略
|
|
70
|
+
}
|
|
51
71
|
}
|
|
52
72
|
|
|
53
73
|
async fetchAndSave(scriptId, url) {
|
|
54
74
|
try {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
75
|
+
// 添加超时保护防止 CDP 命令挂起
|
|
76
|
+
const sourcePromise = this.client.send('Debugger.getScriptSource', { scriptId });
|
|
77
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
78
|
+
setTimeout(() => reject(new Error('getScriptSource timeout')), 5000)
|
|
58
79
|
);
|
|
80
|
+
const { scriptSource } = await Promise.race([sourcePromise, timeoutPromise]);
|
|
81
|
+
|
|
82
|
+
// 通知订阅者(AntiDebugInterceptor 等)
|
|
83
|
+
try { this.onSource?.(scriptId, scriptSource); } catch { /* 订阅者异常不影响主流程 */ }
|
|
59
84
|
|
|
60
85
|
// 限制大小,超大脚本只保存部分
|
|
61
86
|
const source = scriptSource.slice(0, 500000);
|
|
@@ -67,7 +92,7 @@ export class ScriptInterceptor {
|
|
|
67
92
|
timestamp: Date.now(),
|
|
68
93
|
pageUrl: this.getPageUrl() // 传递页面 URL
|
|
69
94
|
});
|
|
70
|
-
} catch
|
|
95
|
+
} catch {
|
|
71
96
|
// 获取失败,跳过
|
|
72
97
|
}
|
|
73
98
|
}
|